1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 5 |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 1997-2013 The PHP Group |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> |
16 | Rui Hirokawa <hirokawa@php.net> |
17 +----------------------------------------------------------------------+
18 */
19
20 /* $Id$ */
21
22 /*
23 * PHP 4 Multibyte String module "mbstring"
24 *
25 * History:
26 * 2000.5.19 Release php-4.0RC2_jstring-1.0
27 * 2001.4.1 Release php4_jstring-1.0.91
28 * 2001.4.30 Release php4_jstring-1.1 (contribute to The PHP Group)
29 * 2001.5.1 Renamed from jstring to mbstring (hirokawa@php.net)
30 */
31
32 /*
33 * PHP3 Internationalization support program.
34 *
35 * Copyright (c) 1999,2000 by the PHP3 internationalization team.
36 * All rights reserved.
37 *
38 * See README_PHP3-i18n-ja for more detail.
39 *
40 * Authors:
41 * Hironori Sato <satoh@jpnnet.com>
42 * Shigeru Kanemoto <sgk@happysize.co.jp>
43 * Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>
44 * Rui Hirokawa <rui_hirokawa@ybb.ne.jp>
45 */
46
47 /* {{{ includes */
48 #ifdef HAVE_CONFIG_H
49 #include "config.h"
50 #endif
51
52 #include "php.h"
53 #include "php_ini.h"
54 #include "php_variables.h"
55 #include "mbstring.h"
56 #include "ext/standard/php_string.h"
57 #include "ext/standard/php_mail.h"
58 #include "ext/standard/exec.h"
59 #include "ext/standard/php_smart_str.h"
60 #include "ext/standard/url.h"
61 #include "main/php_output.h"
62 #include "ext/standard/info.h"
63
64 #include "libmbfl/mbfl/mbfl_allocators.h"
65
66 #include "php_variables.h"
67 #include "php_globals.h"
68 #include "rfc1867.h"
69 #include "php_content_types.h"
70 #include "SAPI.h"
71 #include "php_unicode.h"
72 #include "TSRM.h"
73
74 #include "mb_gpc.h"
75
76 #if HAVE_MBREGEX
77 #include "php_mbregex.h"
78 #endif
79
80 #ifdef ZEND_MULTIBYTE
81 #include "zend_multibyte.h"
82 #endif /* ZEND_MULTIBYTE */
83
84 #if HAVE_ONIG
85 #include "php_onig_compat.h"
86 #include <oniguruma.h>
87 #undef UChar
88 #elif HAVE_PCRE || HAVE_BUNDLED_PCRE
89 #include "ext/pcre/php_pcre.h"
90 #endif
91 /* }}} */
92
93 #if HAVE_MBSTRING
94
95 /* {{{ prototypes */
96 ZEND_DECLARE_MODULE_GLOBALS(mbstring)
97
98 static PHP_GINIT_FUNCTION(mbstring);
99 static PHP_GSHUTDOWN_FUNCTION(mbstring);
100
101 #ifdef ZEND_MULTIBYTE
102 static size_t php_mb_oddlen(const unsigned char *string, size_t length, const char *encoding TSRMLS_DC);
103 static int php_mb_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const char *encoding_to, const char *encoding_from TSRMLS_DC);
104 static char* php_mb_encoding_detector(const unsigned char *arg_string, size_t arg_length, char *arg_list TSRMLS_DC);
105 static int php_mb_set_zend_encoding(TSRMLS_D);
106 #endif
107 /* }}} */
108
109 /* {{{ php_mb_default_identify_list */
110 typedef struct _php_mb_nls_ident_list {
111 enum mbfl_no_language lang;
112 const enum mbfl_no_encoding* list;
113 int list_size;
114 } php_mb_nls_ident_list;
115
116 static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = {
117 mbfl_no_encoding_ascii,
118 mbfl_no_encoding_jis,
119 mbfl_no_encoding_utf8,
120 mbfl_no_encoding_euc_jp,
121 mbfl_no_encoding_sjis
122 };
123
124 static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = {
125 mbfl_no_encoding_ascii,
126 mbfl_no_encoding_utf8,
127 mbfl_no_encoding_euc_cn,
128 mbfl_no_encoding_cp936
129 };
130
131 static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
132 mbfl_no_encoding_ascii,
133 mbfl_no_encoding_utf8,
134 mbfl_no_encoding_euc_tw,
135 mbfl_no_encoding_big5
136 };
137
138 static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = {
139 mbfl_no_encoding_ascii,
140 mbfl_no_encoding_utf8,
141 mbfl_no_encoding_euc_kr,
142 mbfl_no_encoding_uhc
143 };
144
145 static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = {
146 mbfl_no_encoding_ascii,
147 mbfl_no_encoding_utf8,
148 mbfl_no_encoding_koi8r,
149 mbfl_no_encoding_cp1251,
150 mbfl_no_encoding_cp866
151 };
152
153 static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = {
154 mbfl_no_encoding_ascii,
155 mbfl_no_encoding_utf8,
156 mbfl_no_encoding_armscii8
157 };
158
159 static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = {
160 mbfl_no_encoding_ascii,
161 mbfl_no_encoding_utf8,
162 mbfl_no_encoding_cp1254,
163 mbfl_no_encoding_8859_9
164 };
165
166 static const enum mbfl_no_encoding php_mb_default_identify_list_ua[] = {
167 mbfl_no_encoding_ascii,
168 mbfl_no_encoding_utf8,
169 mbfl_no_encoding_koi8u
170 };
171
172 static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
173 mbfl_no_encoding_ascii,
174 mbfl_no_encoding_utf8
175 };
176
177
178 static const php_mb_nls_ident_list php_mb_default_identify_list[] = {
179 { mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) },
180 { mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) },
181 { mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) },
182 { mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) },
183 { mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
184 { mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) },
185 { mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) },
186 { mbfl_no_language_ukrainian, php_mb_default_identify_list_ua, sizeof(php_mb_default_identify_list_ua) / sizeof(php_mb_default_identify_list_ua[0]) },
187 { mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }
188 };
189
190 /* }}} */
191
192 /* {{{ mb_overload_def mb_ovld[] */
193 static const struct mb_overload_def mb_ovld[] = {
194 {MB_OVERLOAD_MAIL, "mail", "mb_send_mail", "mb_orig_mail"},
195 {MB_OVERLOAD_STRING, "strlen", "mb_strlen", "mb_orig_strlen"},
196 {MB_OVERLOAD_STRING, "strpos", "mb_strpos", "mb_orig_strpos"},
197 {MB_OVERLOAD_STRING, "strrpos", "mb_strrpos", "mb_orig_strrpos"},
198 {MB_OVERLOAD_STRING, "stripos", "mb_stripos", "mb_orig_stripos"},
199 {MB_OVERLOAD_STRING, "strripos", "mb_strripos", "mb_orig_strripos"},
200 {MB_OVERLOAD_STRING, "strstr", "mb_strstr", "mb_orig_strstr"},
201 {MB_OVERLOAD_STRING, "strrchr", "mb_strrchr", "mb_orig_strrchr"},
202 {MB_OVERLOAD_STRING, "stristr", "mb_stristr", "mb_orig_stristr"},
203 {MB_OVERLOAD_STRING, "substr", "mb_substr", "mb_orig_substr"},
204 {MB_OVERLOAD_STRING, "strtolower", "mb_strtolower", "mb_orig_strtolower"},
205 {MB_OVERLOAD_STRING, "strtoupper", "mb_strtoupper", "mb_orig_strtoupper"},
206 {MB_OVERLOAD_STRING, "substr_count", "mb_substr_count", "mb_orig_substr_count"},
207 #if HAVE_MBREGEX
208 {MB_OVERLOAD_REGEX, "ereg", "mb_ereg", "mb_orig_ereg"},
209 {MB_OVERLOAD_REGEX, "eregi", "mb_eregi", "mb_orig_eregi"},
210 {MB_OVERLOAD_REGEX, "ereg_replace", "mb_ereg_replace", "mb_orig_ereg_replace"},
211 {MB_OVERLOAD_REGEX, "eregi_replace", "mb_eregi_replace", "mb_orig_eregi_replace"},
212 {MB_OVERLOAD_REGEX, "split", "mb_split", "mb_orig_split"},
213 #endif
214 {0, NULL, NULL, NULL}
215 };
216 /* }}} */
217
218 /* {{{ arginfo */
219 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_language, 0, 0, 0)
220 ZEND_ARG_INFO(0, language)
221 ZEND_END_ARG_INFO()
222
223 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_internal_encoding, 0, 0, 0)
224 ZEND_ARG_INFO(0, encoding)
225 ZEND_END_ARG_INFO()
226
227 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_input, 0, 0, 0)
228 ZEND_ARG_INFO(0, type)
229 ZEND_END_ARG_INFO()
230
231 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_output, 0, 0, 0)
232 ZEND_ARG_INFO(0, encoding)
233 ZEND_END_ARG_INFO()
234
235 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_order, 0, 0, 0)
236 ZEND_ARG_INFO(0, encoding)
237 ZEND_END_ARG_INFO()
238
239 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substitute_character, 0, 0, 0)
240 ZEND_ARG_INFO(0, substchar)
241 ZEND_END_ARG_INFO()
242
243 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_preferred_mime_name, 0, 0, 1)
244 ZEND_ARG_INFO(0, encoding)
245 ZEND_END_ARG_INFO()
246
247 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_parse_str, 0, 0, 1)
248 ZEND_ARG_INFO(0, encoded_string)
249 ZEND_ARG_INFO(1, result)
250 ZEND_END_ARG_INFO()
251
252 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_output_handler, 0, 0, 2)
253 ZEND_ARG_INFO(0, contents)
254 ZEND_ARG_INFO(0, status)
255 ZEND_END_ARG_INFO()
256
257 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strlen, 0, 0, 1)
258 ZEND_ARG_INFO(0, str)
259 ZEND_ARG_INFO(0, encoding)
260 ZEND_END_ARG_INFO()
261
262 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strpos, 0, 0, 2)
263 ZEND_ARG_INFO(0, haystack)
264 ZEND_ARG_INFO(0, needle)
265 ZEND_ARG_INFO(0, offset)
266 ZEND_ARG_INFO(0, encoding)
267 ZEND_END_ARG_INFO()
268
269 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrpos, 0, 0, 2)
270 ZEND_ARG_INFO(0, haystack)
271 ZEND_ARG_INFO(0, needle)
272 ZEND_ARG_INFO(0, offset)
273 ZEND_ARG_INFO(0, encoding)
274 ZEND_END_ARG_INFO()
275
276 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stripos, 0, 0, 2)
277 ZEND_ARG_INFO(0, haystack)
278 ZEND_ARG_INFO(0, needle)
279 ZEND_ARG_INFO(0, offset)
280 ZEND_ARG_INFO(0, encoding)
281 ZEND_END_ARG_INFO()
282
283 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strripos, 0, 0, 2)
284 ZEND_ARG_INFO(0, haystack)
285 ZEND_ARG_INFO(0, needle)
286 ZEND_ARG_INFO(0, offset)
287 ZEND_ARG_INFO(0, encoding)
288 ZEND_END_ARG_INFO()
289
290 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strstr, 0, 0, 2)
291 ZEND_ARG_INFO(0, haystack)
292 ZEND_ARG_INFO(0, needle)
293 ZEND_ARG_INFO(0, part)
294 ZEND_ARG_INFO(0, encoding)
295 ZEND_END_ARG_INFO()
296
297 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrchr, 0, 0, 2)
298 ZEND_ARG_INFO(0, haystack)
299 ZEND_ARG_INFO(0, needle)
300 ZEND_ARG_INFO(0, part)
301 ZEND_ARG_INFO(0, encoding)
302 ZEND_END_ARG_INFO()
303
304 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stristr, 0, 0, 2)
305 ZEND_ARG_INFO(0, haystack)
306 ZEND_ARG_INFO(0, needle)
307 ZEND_ARG_INFO(0, part)
308 ZEND_ARG_INFO(0, encoding)
309 ZEND_END_ARG_INFO()
310
311 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrichr, 0, 0, 2)
312 ZEND_ARG_INFO(0, haystack)
313 ZEND_ARG_INFO(0, needle)
314 ZEND_ARG_INFO(0, part)
315 ZEND_ARG_INFO(0, encoding)
316 ZEND_END_ARG_INFO()
317
318 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr_count, 0, 0, 2)
319 ZEND_ARG_INFO(0, haystack)
320 ZEND_ARG_INFO(0, needle)
321 ZEND_ARG_INFO(0, encoding)
322 ZEND_END_ARG_INFO()
323
324 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr, 0, 0, 2)
325 ZEND_ARG_INFO(0, str)
326 ZEND_ARG_INFO(0, start)
327 ZEND_ARG_INFO(0, length)
328 ZEND_ARG_INFO(0, encoding)
329 ZEND_END_ARG_INFO()
330
331 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strcut, 0, 0, 2)
332 ZEND_ARG_INFO(0, str)
333 ZEND_ARG_INFO(0, start)
334 ZEND_ARG_INFO(0, length)
335 ZEND_ARG_INFO(0, encoding)
336 ZEND_END_ARG_INFO()
337
338 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strwidth, 0, 0, 1)
339 ZEND_ARG_INFO(0, str)
340 ZEND_ARG_INFO(0, encoding)
341 ZEND_END_ARG_INFO()
342
343 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strimwidth, 0, 0, 3)
344 ZEND_ARG_INFO(0, str)
345 ZEND_ARG_INFO(0, start)
346 ZEND_ARG_INFO(0, width)
347 ZEND_ARG_INFO(0, trimmarker)
348 ZEND_ARG_INFO(0, encoding)
349 ZEND_END_ARG_INFO()
350
351 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_encoding, 0, 0, 2)
352 ZEND_ARG_INFO(0, str)
353 ZEND_ARG_INFO(0, to)
354 ZEND_ARG_INFO(0, from)
355 ZEND_END_ARG_INFO()
356
357 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_case, 0, 0, 2)
358 ZEND_ARG_INFO(0, sourcestring)
359 ZEND_ARG_INFO(0, mode)
360 ZEND_ARG_INFO(0, encoding)
361 ZEND_END_ARG_INFO()
362
363 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtoupper, 0, 0, 1)
364 ZEND_ARG_INFO(0, sourcestring)
365 ZEND_ARG_INFO(0, encoding)
366 ZEND_END_ARG_INFO()
367
368 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtolower, 0, 0, 1)
369 ZEND_ARG_INFO(0, sourcestring)
370 ZEND_ARG_INFO(0, encoding)
371 ZEND_END_ARG_INFO()
372
373 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_encoding, 0, 0, 1)
374 ZEND_ARG_INFO(0, str)
375 ZEND_ARG_INFO(0, encoding_list)
376 ZEND_ARG_INFO(0, strict)
377 ZEND_END_ARG_INFO()
378
379 ZEND_BEGIN_ARG_INFO(arginfo_mb_list_encodings, 0)
380 ZEND_END_ARG_INFO()
381
382 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encoding_aliases, 0, 0, 1)
383 ZEND_ARG_INFO(0, encoding)
384 ZEND_END_ARG_INFO()
385
386 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_mimeheader, 0, 0, 1)
387 ZEND_ARG_INFO(0, str)
388 ZEND_ARG_INFO(0, charset)
389 ZEND_ARG_INFO(0, transfer)
390 ZEND_ARG_INFO(0, linefeed)
391 ZEND_ARG_INFO(0, indent)
392 ZEND_END_ARG_INFO()
393
394 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_mimeheader, 0, 0, 1)
395 ZEND_ARG_INFO(0, string)
396 ZEND_END_ARG_INFO()
397
398 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_kana, 0, 0, 1)
399 ZEND_ARG_INFO(0, str)
400 ZEND_ARG_INFO(0, option)
401 ZEND_ARG_INFO(0, encoding)
402 ZEND_END_ARG_INFO()
403
404 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_variables, 1, 0, 3)
405 ZEND_ARG_INFO(0, to)
406 ZEND_ARG_INFO(0, from)
407 ZEND_ARG_INFO(1, ...)
408 ZEND_END_ARG_INFO()
409
410 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_numericentity, 0, 0, 2)
411 ZEND_ARG_INFO(0, string)
412 ZEND_ARG_INFO(0, convmap)
413 ZEND_ARG_INFO(0, encoding)
414 ZEND_END_ARG_INFO()
415
416 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_numericentity, 0, 0, 2)
417 ZEND_ARG_INFO(0, string)
418 ZEND_ARG_INFO(0, convmap)
419 ZEND_ARG_INFO(0, encoding)
420 ZEND_END_ARG_INFO()
421
422 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_send_mail, 0, 0, 3)
423 ZEND_ARG_INFO(0, to)
424 ZEND_ARG_INFO(0, subject)
425 ZEND_ARG_INFO(0, message)
426 ZEND_ARG_INFO(0, additional_headers)
427 ZEND_ARG_INFO(0, additional_parameters)
428 ZEND_END_ARG_INFO()
429
430 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_get_info, 0, 0, 0)
431 ZEND_ARG_INFO(0, type)
432 ZEND_END_ARG_INFO()
433
434 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_check_encoding, 0, 0, 0)
435 ZEND_ARG_INFO(0, var)
436 ZEND_ARG_INFO(0, encoding)
437 ZEND_END_ARG_INFO()
438
439 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_encoding, 0, 0, 0)
440 ZEND_ARG_INFO(0, encoding)
441 ZEND_END_ARG_INFO()
442
443 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg, 0, 0, 2)
444 ZEND_ARG_INFO(0, pattern)
445 ZEND_ARG_INFO(0, string)
446 ZEND_ARG_INFO(1, registers)
447 ZEND_END_ARG_INFO()
448
449 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi, 0, 0, 2)
450 ZEND_ARG_INFO(0, pattern)
451 ZEND_ARG_INFO(0, string)
452 ZEND_ARG_INFO(1, registers)
453 ZEND_END_ARG_INFO()
454
455 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace, 0, 0, 3)
456 ZEND_ARG_INFO(0, pattern)
457 ZEND_ARG_INFO(0, replacement)
458 ZEND_ARG_INFO(0, string)
459 ZEND_ARG_INFO(0, option)
460 ZEND_END_ARG_INFO()
461
462 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi_replace, 0, 0, 3)
463 ZEND_ARG_INFO(0, pattern)
464 ZEND_ARG_INFO(0, replacement)
465 ZEND_ARG_INFO(0, string)
466 ZEND_END_ARG_INFO()
467
468 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_split, 0, 0, 2)
469 ZEND_ARG_INFO(0, pattern)
470 ZEND_ARG_INFO(0, string)
471 ZEND_ARG_INFO(0, limit)
472 ZEND_END_ARG_INFO()
473
474 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_match, 0, 0, 2)
475 ZEND_ARG_INFO(0, pattern)
476 ZEND_ARG_INFO(0, string)
477 ZEND_ARG_INFO(0, option)
478 ZEND_END_ARG_INFO()
479
480 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search, 0, 0, 0)
481 ZEND_ARG_INFO(0, pattern)
482 ZEND_ARG_INFO(0, option)
483 ZEND_END_ARG_INFO()
484
485 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_pos, 0, 0, 0)
486 ZEND_ARG_INFO(0, pattern)
487 ZEND_ARG_INFO(0, option)
488 ZEND_END_ARG_INFO()
489
490 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_regs, 0, 0, 0)
491 ZEND_ARG_INFO(0, pattern)
492 ZEND_ARG_INFO(0, option)
493 ZEND_END_ARG_INFO()
494
495 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_init, 0, 0, 1)
496 ZEND_ARG_INFO(0, string)
497 ZEND_ARG_INFO(0, pattern)
498 ZEND_ARG_INFO(0, option)
499 ZEND_END_ARG_INFO()
500
501 ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getregs, 0)
502 ZEND_END_ARG_INFO()
503
504 ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getpos, 0)
505 ZEND_END_ARG_INFO()
506
507 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_setpos, 0, 0, 1)
508 ZEND_ARG_INFO(0, position)
509 ZEND_END_ARG_INFO()
510
511 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_set_options, 0, 0, 0)
512 ZEND_ARG_INFO(0, options)
513 ZEND_END_ARG_INFO()
514 /* }}} */
515
516 /* {{{ zend_function_entry mbstring_functions[] */
517 const zend_function_entry mbstring_functions[] = {
518 PHP_FE(mb_convert_case, arginfo_mb_convert_case)
519 PHP_FE(mb_strtoupper, arginfo_mb_strtoupper)
520 PHP_FE(mb_strtolower, arginfo_mb_strtolower)
521 PHP_FE(mb_language, arginfo_mb_language)
522 PHP_FE(mb_internal_encoding, arginfo_mb_internal_encoding)
523 PHP_FE(mb_http_input, arginfo_mb_http_input)
524 PHP_FE(mb_http_output, arginfo_mb_http_output)
525 PHP_FE(mb_detect_order, arginfo_mb_detect_order)
526 PHP_FE(mb_substitute_character, arginfo_mb_substitute_character)
527 PHP_FE(mb_parse_str, arginfo_mb_parse_str)
528 PHP_FE(mb_output_handler, arginfo_mb_output_handler)
529 PHP_FE(mb_preferred_mime_name, arginfo_mb_preferred_mime_name)
530 PHP_FE(mb_strlen, arginfo_mb_strlen)
531 PHP_FE(mb_strpos, arginfo_mb_strpos)
532 PHP_FE(mb_strrpos, arginfo_mb_strrpos)
533 PHP_FE(mb_stripos, arginfo_mb_stripos)
534 PHP_FE(mb_strripos, arginfo_mb_strripos)
535 PHP_FE(mb_strstr, arginfo_mb_strstr)
536 PHP_FE(mb_strrchr, arginfo_mb_strrchr)
537 PHP_FE(mb_stristr, arginfo_mb_stristr)
538 PHP_FE(mb_strrichr, arginfo_mb_strrichr)
539 PHP_FE(mb_substr_count, arginfo_mb_substr_count)
540 PHP_FE(mb_substr, arginfo_mb_substr)
541 PHP_FE(mb_strcut, arginfo_mb_strcut)
542 PHP_FE(mb_strwidth, arginfo_mb_strwidth)
543 PHP_FE(mb_strimwidth, arginfo_mb_strimwidth)
544 PHP_FE(mb_convert_encoding, arginfo_mb_convert_encoding)
545 PHP_FE(mb_detect_encoding, arginfo_mb_detect_encoding)
546 PHP_FE(mb_list_encodings, arginfo_mb_list_encodings)
547 PHP_FE(mb_encoding_aliases, arginfo_mb_encoding_aliases)
548 PHP_FE(mb_convert_kana, arginfo_mb_convert_kana)
549 PHP_FE(mb_encode_mimeheader, arginfo_mb_encode_mimeheader)
550 PHP_FE(mb_decode_mimeheader, arginfo_mb_decode_mimeheader)
551 PHP_FE(mb_convert_variables, arginfo_mb_convert_variables)
552 PHP_FE(mb_encode_numericentity, arginfo_mb_encode_numericentity)
553 PHP_FE(mb_decode_numericentity, arginfo_mb_decode_numericentity)
554 PHP_FE(mb_send_mail, arginfo_mb_send_mail)
555 PHP_FE(mb_get_info, arginfo_mb_get_info)
556 PHP_FE(mb_check_encoding, arginfo_mb_check_encoding)
557 #if HAVE_MBREGEX
558 PHP_MBREGEX_FUNCTION_ENTRIES
559 #endif
560 PHP_FE_END
561 };
562 /* }}} */
563
564 /* {{{ zend_module_entry mbstring_module_entry */
565 zend_module_entry mbstring_module_entry = {
566 STANDARD_MODULE_HEADER,
567 "mbstring",
568 mbstring_functions,
569 PHP_MINIT(mbstring),
570 PHP_MSHUTDOWN(mbstring),
571 PHP_RINIT(mbstring),
572 PHP_RSHUTDOWN(mbstring),
573 PHP_MINFO(mbstring),
574 NO_VERSION_YET,
575 PHP_MODULE_GLOBALS(mbstring),
576 PHP_GINIT(mbstring),
577 PHP_GSHUTDOWN(mbstring),
578 NULL,
579 STANDARD_MODULE_PROPERTIES_EX
580 };
581 /* }}} */
582
583 /* {{{ static sapi_post_entry php_post_entries[] */
584 static sapi_post_entry php_post_entries[] = {
585 { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_std_post_handler },
586 { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
587 { NULL, 0, NULL, NULL }
588 };
589 /* }}} */
590
591 #ifdef COMPILE_DL_MBSTRING
ZEND_GET_MODULE(mbstring)592 ZEND_GET_MODULE(mbstring)
593 #endif
594
595 /* {{{ allocators */
596 static void *_php_mb_allocators_malloc(unsigned int sz)
597 {
598 return emalloc(sz);
599 }
600
_php_mb_allocators_realloc(void * ptr,unsigned int sz)601 static void *_php_mb_allocators_realloc(void *ptr, unsigned int sz)
602 {
603 return erealloc(ptr, sz);
604 }
605
_php_mb_allocators_calloc(unsigned int nelems,unsigned int szelem)606 static void *_php_mb_allocators_calloc(unsigned int nelems, unsigned int szelem)
607 {
608 return ecalloc(nelems, szelem);
609 }
610
_php_mb_allocators_free(void * ptr)611 static void _php_mb_allocators_free(void *ptr)
612 {
613 efree(ptr);
614 }
615
_php_mb_allocators_pmalloc(unsigned int sz)616 static void *_php_mb_allocators_pmalloc(unsigned int sz)
617 {
618 return pemalloc(sz, 1);
619 }
620
_php_mb_allocators_prealloc(void * ptr,unsigned int sz)621 static void *_php_mb_allocators_prealloc(void *ptr, unsigned int sz)
622 {
623 return perealloc(ptr, sz, 1);
624 }
625
_php_mb_allocators_pfree(void * ptr)626 static void _php_mb_allocators_pfree(void *ptr)
627 {
628 pefree(ptr, 1);
629 }
630
631 static mbfl_allocators _php_mb_allocators = {
632 _php_mb_allocators_malloc,
633 _php_mb_allocators_realloc,
634 _php_mb_allocators_calloc,
635 _php_mb_allocators_free,
636 _php_mb_allocators_pmalloc,
637 _php_mb_allocators_prealloc,
638 _php_mb_allocators_pfree
639 };
640 /* }}} */
641
642 /* {{{ static sapi_post_entry mbstr_post_entries[] */
643 static sapi_post_entry mbstr_post_entries[] = {
644 { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_mb_post_handler },
645 { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
646 { NULL, 0, NULL, NULL }
647 };
648 /* }}} */
649
650 /* {{{ static int php_mb_parse_encoding_list()
651 * Return 0 if input contains any illegal encoding, otherwise 1.
652 * Even if any illegal encoding is detected the result may contain a list
653 * of parsed encodings.
654 */
655 static int
php_mb_parse_encoding_list(const char * value,int value_length,enum mbfl_no_encoding ** return_list,int * return_size,int persistent TSRMLS_DC)656 php_mb_parse_encoding_list(const char *value, int value_length, enum mbfl_no_encoding **return_list, int *return_size, int persistent TSRMLS_DC)
657 {
658 int n, l, size, bauto, ret = 1;
659 char *p, *p1, *p2, *endp, *tmpstr;
660 enum mbfl_no_encoding no_encoding;
661 enum mbfl_no_encoding *src, *entry, *list;
662
663 list = NULL;
664 if (value == NULL || value_length <= 0) {
665 if (return_list) {
666 *return_list = NULL;
667 }
668 if (return_size) {
669 *return_size = 0;
670 }
671 return 0;
672 } else {
673 enum mbfl_no_encoding *identify_list;
674 int identify_list_size;
675
676 identify_list = MBSTRG(default_detect_order_list);
677 identify_list_size = MBSTRG(default_detect_order_list_size);
678
679 /* copy the value string for work */
680 if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
681 tmpstr = (char *)estrndup(value+1, value_length-2);
682 value_length -= 2;
683 }
684 else
685 tmpstr = (char *)estrndup(value, value_length);
686 if (tmpstr == NULL) {
687 return 0;
688 }
689 /* count the number of listed encoding names */
690 endp = tmpstr + value_length;
691 n = 1;
692 p1 = tmpstr;
693 while ((p2 = php_memnstr(p1, ",", 1, endp)) != NULL) {
694 p1 = p2 + 1;
695 n++;
696 }
697 size = n + identify_list_size;
698 /* make list */
699 list = (enum mbfl_no_encoding *)pecalloc(size, sizeof(int), persistent);
700 if (list != NULL) {
701 entry = list;
702 n = 0;
703 bauto = 0;
704 p1 = tmpstr;
705 do {
706 p2 = p = php_memnstr(p1, ",", 1, endp);
707 if (p == NULL) {
708 p = endp;
709 }
710 *p = '\0';
711 /* trim spaces */
712 while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
713 p1++;
714 }
715 p--;
716 while (p > p1 && (*p == ' ' || *p == '\t')) {
717 *p = '\0';
718 p--;
719 }
720 /* convert to the encoding number and check encoding */
721 if (strcasecmp(p1, "auto") == 0) {
722 if (!bauto) {
723 bauto = 1;
724 l = identify_list_size;
725 src = identify_list;
726 while (l > 0) {
727 *entry++ = *src++;
728 l--;
729 n++;
730 }
731 }
732 } else {
733 no_encoding = mbfl_name2no_encoding(p1);
734 if (no_encoding != mbfl_no_encoding_invalid) {
735 *entry++ = no_encoding;
736 n++;
737 } else {
738 ret = 0;
739 }
740 }
741 p1 = p2 + 1;
742 } while (n < size && p2 != NULL);
743 if (n > 0) {
744 if (return_list) {
745 *return_list = list;
746 } else {
747 pefree(list, persistent);
748 }
749 } else {
750 pefree(list, persistent);
751 if (return_list) {
752 *return_list = NULL;
753 }
754 ret = 0;
755 }
756 if (return_size) {
757 *return_size = n;
758 }
759 } else {
760 if (return_list) {
761 *return_list = NULL;
762 }
763 if (return_size) {
764 *return_size = 0;
765 }
766 ret = 0;
767 }
768 efree(tmpstr);
769 }
770
771 return ret;
772 }
773 /* }}} */
774
775 /* {{{ MBSTRING_API php_mb_check_encoding_list */
php_mb_check_encoding_list(const char * encoding_list TSRMLS_DC)776 MBSTRING_API int php_mb_check_encoding_list(const char *encoding_list TSRMLS_DC) {
777 return php_mb_parse_encoding_list(encoding_list, strlen(encoding_list), NULL, NULL, 0 TSRMLS_CC);
778 }
779 /* }}} */
780
781 /* {{{ static int php_mb_parse_encoding_array()
782 * Return 0 if input contains any illegal encoding, otherwise 1.
783 * Even if any illegal encoding is detected the result may contain a list
784 * of parsed encodings.
785 */
786 static int
php_mb_parse_encoding_array(zval * array,enum mbfl_no_encoding ** return_list,int * return_size,int persistent TSRMLS_DC)787 php_mb_parse_encoding_array(zval *array, enum mbfl_no_encoding **return_list, int *return_size, int persistent TSRMLS_DC)
788 {
789 zval **hash_entry;
790 HashTable *target_hash;
791 int i, n, l, size, bauto,ret = 1;
792 enum mbfl_no_encoding no_encoding;
793 enum mbfl_no_encoding *src, *list, *entry;
794
795 list = NULL;
796 if (Z_TYPE_P(array) == IS_ARRAY) {
797 enum mbfl_no_encoding *identify_list;
798 int identify_list_size;
799
800 identify_list = MBSTRG(default_detect_order_list);
801 identify_list_size = MBSTRG(default_detect_order_list_size);
802
803 target_hash = Z_ARRVAL_P(array);
804 zend_hash_internal_pointer_reset(target_hash);
805 i = zend_hash_num_elements(target_hash);
806 size = i + identify_list_size;
807 list = (enum mbfl_no_encoding *)pecalloc(size, sizeof(int), persistent);
808 if (list != NULL) {
809 entry = list;
810 bauto = 0;
811 n = 0;
812 while (i > 0) {
813 if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
814 break;
815 }
816 convert_to_string_ex(hash_entry);
817 if (strcasecmp(Z_STRVAL_PP(hash_entry), "auto") == 0) {
818 if (!bauto) {
819 bauto = 1;
820 l = identify_list_size;
821 src = identify_list;
822 while (l > 0) {
823 *entry++ = *src++;
824 l--;
825 n++;
826 }
827 }
828 } else {
829 no_encoding = mbfl_name2no_encoding(Z_STRVAL_PP(hash_entry));
830 if (no_encoding != mbfl_no_encoding_invalid) {
831 *entry++ = no_encoding;
832 n++;
833 } else {
834 ret = 0;
835 }
836 }
837 zend_hash_move_forward(target_hash);
838 i--;
839 }
840 if (n > 0) {
841 if (return_list) {
842 *return_list = list;
843 } else {
844 pefree(list, persistent);
845 }
846 } else {
847 pefree(list, persistent);
848 if (return_list) {
849 *return_list = NULL;
850 }
851 ret = 0;
852 }
853 if (return_size) {
854 *return_size = n;
855 }
856 } else {
857 if (return_list) {
858 *return_list = NULL;
859 }
860 if (return_size) {
861 *return_size = 0;
862 }
863 ret = 0;
864 }
865 }
866
867 return ret;
868 }
869 /* }}} */
870
871 static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC);
872 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len);
873 static void _php_mb_free_regex(void *opaque);
874
875 #if HAVE_ONIG
876 /* {{{ _php_mb_compile_regex */
_php_mb_compile_regex(const char * pattern TSRMLS_DC)877 static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC)
878 {
879 php_mb_regex_t *retval;
880 OnigErrorInfo err_info;
881 int err_code;
882
883 if ((err_code = onig_new(&retval,
884 (const OnigUChar *)pattern,
885 (const OnigUChar *)pattern + strlen(pattern),
886 ONIG_OPTION_IGNORECASE | ONIG_OPTION_DONT_CAPTURE_GROUP,
887 ONIG_ENCODING_ASCII, &OnigSyntaxPerl, &err_info))) {
888 OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
889 onig_error_code_to_str(err_str, err_code, err_info);
890 php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s: %s", pattern, err_str);
891 retval = NULL;
892 }
893 return retval;
894 }
895 /* }}} */
896
897 /* {{{ _php_mb_match_regex */
_php_mb_match_regex(void * opaque,const char * str,size_t str_len)898 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
899 {
900 return onig_search((php_mb_regex_t *)opaque, (const OnigUChar *)str,
901 (const OnigUChar*)str + str_len, (const OnigUChar *)str,
902 (const OnigUChar*)str + str_len, NULL, ONIG_OPTION_NONE) >= 0;
903 }
904 /* }}} */
905
906 /* {{{ _php_mb_free_regex */
_php_mb_free_regex(void * opaque)907 static void _php_mb_free_regex(void *opaque)
908 {
909 onig_free((php_mb_regex_t *)opaque);
910 }
911 /* }}} */
912 #elif HAVE_PCRE || HAVE_BUNDLED_PCRE
913 /* {{{ _php_mb_compile_regex */
_php_mb_compile_regex(const char * pattern TSRMLS_DC)914 static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC)
915 {
916 pcre *retval;
917 const char *err_str;
918 int err_offset;
919
920 if (!(retval = pcre_compile(pattern,
921 PCRE_CASELESS, &err_str, &err_offset, NULL))) {
922 php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s (offset=%d): %s", pattern, err_offset, err_str);
923 }
924 return retval;
925 }
926 /* }}} */
927
928 /* {{{ _php_mb_match_regex */
_php_mb_match_regex(void * opaque,const char * str,size_t str_len)929 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
930 {
931 return pcre_exec((pcre *)opaque, NULL, str, (int)str_len, 0,
932 0, NULL, 0) >= 0;
933 }
934 /* }}} */
935
936 /* {{{ _php_mb_free_regex */
_php_mb_free_regex(void * opaque)937 static void _php_mb_free_regex(void *opaque)
938 {
939 pcre_free(opaque);
940 }
941 /* }}} */
942 #endif
943
944 /* {{{ php_mb_nls_get_default_detect_order_list */
php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang,enum mbfl_no_encoding ** plist,int * plist_size)945 static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, int* plist_size)
946 {
947 size_t i;
948
949 *plist = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
950 *plist_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
951
952 for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) {
953 if (php_mb_default_identify_list[i].lang == lang) {
954 *plist = (enum mbfl_no_encoding *)php_mb_default_identify_list[i].list;
955 *plist_size = php_mb_default_identify_list[i].list_size;
956 return 1;
957 }
958 }
959 return 0;
960 }
961 /* }}} */
962
963 /* {{{ php.ini directive handler */
964 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_language) */
PHP_INI_MH(OnUpdate_mbstring_language)965 static PHP_INI_MH(OnUpdate_mbstring_language)
966 {
967 enum mbfl_no_language no_language;
968
969 no_language = mbfl_name2no_language(new_value);
970 if (no_language == mbfl_no_language_invalid) {
971 MBSTRG(language) = mbfl_no_language_neutral;
972 return FAILURE;
973 }
974 MBSTRG(language) = no_language;
975 php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
976 return SUCCESS;
977 }
978 /* }}} */
979
980 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */
PHP_INI_MH(OnUpdate_mbstring_detect_order)981 static PHP_INI_MH(OnUpdate_mbstring_detect_order)
982 {
983 enum mbfl_no_encoding *list;
984 int size;
985
986 if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
987 if (MBSTRG(detect_order_list)) {
988 free(MBSTRG(detect_order_list));
989 }
990 MBSTRG(detect_order_list) = list;
991 MBSTRG(detect_order_list_size) = size;
992 } else {
993 if (MBSTRG(detect_order_list)) {
994 free(MBSTRG(detect_order_list));
995 MBSTRG(detect_order_list) = NULL;
996 }
997 return FAILURE;
998 }
999
1000 return SUCCESS;
1001 }
1002 /* }}} */
1003
1004 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */
PHP_INI_MH(OnUpdate_mbstring_http_input)1005 static PHP_INI_MH(OnUpdate_mbstring_http_input)
1006 {
1007 enum mbfl_no_encoding *list;
1008 int size;
1009
1010 if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
1011 if (MBSTRG(http_input_list)) {
1012 free(MBSTRG(http_input_list));
1013 }
1014 MBSTRG(http_input_list) = list;
1015 MBSTRG(http_input_list_size) = size;
1016 } else {
1017 if (MBSTRG(http_input_list)) {
1018 free(MBSTRG(http_input_list));
1019 MBSTRG(http_input_list) = NULL;
1020 }
1021 MBSTRG(http_input_list_size) = 0;
1022 return FAILURE;
1023 }
1024
1025 return SUCCESS;
1026 }
1027 /* }}} */
1028
1029 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */
PHP_INI_MH(OnUpdate_mbstring_http_output)1030 static PHP_INI_MH(OnUpdate_mbstring_http_output)
1031 {
1032 enum mbfl_no_encoding no_encoding;
1033
1034 no_encoding = mbfl_name2no_encoding(new_value);
1035 if (no_encoding != mbfl_no_encoding_invalid) {
1036 MBSTRG(http_output_encoding) = no_encoding;
1037 MBSTRG(current_http_output_encoding) = no_encoding;
1038 } else {
1039 MBSTRG(http_output_encoding) = mbfl_no_encoding_pass;
1040 MBSTRG(current_http_output_encoding) = mbfl_no_encoding_pass;
1041 if (new_value != NULL && new_value_length > 0) {
1042 return FAILURE;
1043 }
1044 }
1045
1046 return SUCCESS;
1047 }
1048 /* }}} */
1049
1050 /* {{{ static _php_mb_ini_mbstring_internal_encoding_set */
_php_mb_ini_mbstring_internal_encoding_set(const char * new_value,uint new_value_length TSRMLS_DC)1051 int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint new_value_length TSRMLS_DC)
1052 {
1053 enum mbfl_no_encoding no_encoding;
1054 const char *enc_name = NULL;
1055 uint enc_name_len = 0;
1056
1057 no_encoding = new_value ? mbfl_name2no_encoding(new_value):
1058 mbfl_no_encoding_invalid;
1059 if (no_encoding != mbfl_no_encoding_invalid) {
1060 enc_name = new_value;
1061 enc_name_len = new_value_length;
1062 } else {
1063 switch (MBSTRG(language)) {
1064 case mbfl_no_language_uni:
1065 enc_name = "UTF-8";
1066 enc_name_len = sizeof("UTF-8") - 1;
1067 break;
1068 case mbfl_no_language_japanese:
1069 enc_name = "EUC-JP";
1070 enc_name_len = sizeof("EUC-JP") - 1;
1071 break;
1072 case mbfl_no_language_korean:
1073 enc_name = "EUC-KR";
1074 enc_name_len = sizeof("EUC-KR") - 1;
1075 break;
1076 case mbfl_no_language_simplified_chinese:
1077 enc_name = "EUC-CN";
1078 enc_name_len = sizeof("EUC-CN") - 1;
1079 break;
1080 case mbfl_no_language_traditional_chinese:
1081 enc_name = "EUC-TW";
1082 enc_name_len = sizeof("EUC-TW") - 1;
1083 break;
1084 case mbfl_no_language_russian:
1085 enc_name = "KOI8-R";
1086 enc_name_len = sizeof("KOI8-R") - 1;
1087 break;
1088 case mbfl_no_language_german:
1089 enc_name = "ISO-8859-15";
1090 enc_name_len = sizeof("ISO-8859-15") - 1;
1091 break;
1092 case mbfl_no_language_armenian:
1093 enc_name = "ArmSCII-8";
1094 enc_name_len = sizeof("ArmSCII-8") - 1;
1095 break;
1096 case mbfl_no_language_turkish:
1097 enc_name = "ISO-8859-9";
1098 enc_name_len = sizeof("ISO-8859-9") - 1;
1099 break;
1100 default:
1101 enc_name = "ISO-8859-1";
1102 enc_name_len = sizeof("ISO-8859-1") - 1;
1103 break;
1104 }
1105 no_encoding = mbfl_name2no_encoding(enc_name);
1106 }
1107 MBSTRG(internal_encoding) = no_encoding;
1108 MBSTRG(current_internal_encoding) = no_encoding;
1109 #if HAVE_MBREGEX
1110 {
1111 const char *enc_name = new_value;
1112 if (FAILURE == php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC)) {
1113 /* falls back to EUC-JP if an unknown encoding name is given */
1114 enc_name = "EUC-JP";
1115 php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC);
1116 }
1117 php_mb_regex_set_mbctype(new_value TSRMLS_CC);
1118 }
1119 #endif
1120 return SUCCESS;
1121 }
1122 /* }}} */
1123
1124 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) */
PHP_INI_MH(OnUpdate_mbstring_internal_encoding)1125 static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
1126 {
1127 if (stage == PHP_INI_STAGE_STARTUP || stage == PHP_INI_STAGE_SHUTDOWN
1128 || stage == PHP_INI_STAGE_RUNTIME) {
1129 return _php_mb_ini_mbstring_internal_encoding_set(new_value, new_value_length TSRMLS_CC);
1130 } else {
1131 /* the corresponding mbstring globals needs to be set according to the
1132 * ini value in the later stage because it never falls back to the
1133 * default value if 1. no value for mbstring.internal_encoding is given,
1134 * 2. mbstring.language directive is processed in per-dir or runtime
1135 * context and 3. call to the handler for mbstring.language is done
1136 * after mbstring.internal_encoding is handled. */
1137 return SUCCESS;
1138 }
1139 }
1140 /* }}} */
1141
1142 #ifdef ZEND_MULTIBYTE
1143 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_script_encoding) */
PHP_INI_MH(OnUpdate_mbstring_script_encoding)1144 static PHP_INI_MH(OnUpdate_mbstring_script_encoding)
1145 {
1146 int *list, size;
1147
1148 if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
1149 if (MBSTRG(script_encoding_list) != NULL) {
1150 free(MBSTRG(script_encoding_list));
1151 }
1152 MBSTRG(script_encoding_list) = list;
1153 MBSTRG(script_encoding_list_size) = size;
1154 } else {
1155 if (MBSTRG(script_encoding_list) != NULL) {
1156 free(MBSTRG(script_encoding_list));
1157 }
1158 MBSTRG(script_encoding_list) = NULL;
1159 MBSTRG(script_encoding_list_size) = 0;
1160 return FAILURE;
1161 }
1162
1163 return SUCCESS;
1164 }
1165 /* }}} */
1166 #endif /* ZEND_MULTIBYTE */
1167
1168 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */
PHP_INI_MH(OnUpdate_mbstring_substitute_character)1169 static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
1170 {
1171 int c;
1172 char *endptr = NULL;
1173
1174 if (new_value != NULL) {
1175 if (strcasecmp("none", new_value) == 0) {
1176 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1177 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1178 } else if (strcasecmp("long", new_value) == 0) {
1179 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1180 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1181 } else if (strcasecmp("entity", new_value) == 0) {
1182 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1183 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1184 } else {
1185 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1186 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1187 if (new_value_length >0) {
1188 c = strtol(new_value, &endptr, 0);
1189 if (*endptr == '\0') {
1190 MBSTRG(filter_illegal_substchar) = c;
1191 MBSTRG(current_filter_illegal_substchar) = c;
1192 }
1193 }
1194 }
1195 } else {
1196 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1197 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1198 MBSTRG(filter_illegal_substchar) = 0x3f; /* '?' */
1199 MBSTRG(current_filter_illegal_substchar) = 0x3f; /* '?' */
1200 }
1201
1202 return SUCCESS;
1203 }
1204 /* }}} */
1205
1206 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) */
PHP_INI_MH(OnUpdate_mbstring_encoding_translation)1207 static PHP_INI_MH(OnUpdate_mbstring_encoding_translation)
1208 {
1209 if (new_value == NULL) {
1210 return FAILURE;
1211 }
1212
1213 OnUpdateBool(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC);
1214
1215 if (MBSTRG(encoding_translation)) {
1216 sapi_unregister_post_entry(php_post_entries TSRMLS_CC);
1217 sapi_register_post_entries(mbstr_post_entries TSRMLS_CC);
1218 } else {
1219 sapi_unregister_post_entry(mbstr_post_entries TSRMLS_CC);
1220 sapi_register_post_entries(php_post_entries TSRMLS_CC);
1221 }
1222
1223 return SUCCESS;
1224 }
1225 /* }}} */
1226
1227 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes */
PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)1228 static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)
1229 {
1230 zval tmp;
1231 void *re = NULL;
1232
1233 if (!new_value) {
1234 new_value = entry->orig_value;
1235 new_value_length = entry->orig_value_length;
1236 }
1237 php_trim(new_value, new_value_length, NULL, 0, &tmp, 3 TSRMLS_CC);
1238
1239 if (Z_STRLEN(tmp) > 0) {
1240 if (!(re = _php_mb_compile_regex(Z_STRVAL(tmp) TSRMLS_CC))) {
1241 zval_dtor(&tmp);
1242 return FAILURE;
1243 }
1244 }
1245
1246 if (MBSTRG(http_output_conv_mimetypes)) {
1247 _php_mb_free_regex(MBSTRG(http_output_conv_mimetypes));
1248 }
1249
1250 MBSTRG(http_output_conv_mimetypes) = re;
1251
1252 zval_dtor(&tmp);
1253 return SUCCESS;
1254 }
1255 /* }}} */
1256 /* }}} */
1257
1258 /* {{{ php.ini directive registration */
1259 PHP_INI_BEGIN()
1260 PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_ALL, OnUpdate_mbstring_language)
1261 PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order)
1262 PHP_INI_ENTRY("mbstring.http_input", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_input)
1263 PHP_INI_ENTRY("mbstring.http_output", "pass", PHP_INI_ALL, OnUpdate_mbstring_http_output)
1264 PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding)
1265 #ifdef ZEND_MULTIBYTE
1266 PHP_INI_ENTRY("mbstring.script_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_script_encoding)
1267 #endif /* ZEND_MULTIBYTE */
1268 PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
1269 STD_PHP_INI_ENTRY("mbstring.func_overload", "0",
1270 PHP_INI_SYSTEM, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals)
1271
1272 STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0",
1273 PHP_INI_SYSTEM | PHP_INI_PERDIR,
1274 OnUpdate_mbstring_encoding_translation,
1275 encoding_translation, zend_mbstring_globals, mbstring_globals)
1276 PHP_INI_ENTRY("mbstring.http_output_conv_mimetypes",
1277 "^(text/|application/xhtml\\+xml)",
1278 PHP_INI_ALL,
1279 OnUpdate_mbstring_http_output_conv_mimetypes)
1280
1281 STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0",
1282 PHP_INI_ALL,
1283 OnUpdateLong,
1284 strict_detection, zend_mbstring_globals, mbstring_globals)
PHP_INI_END()1285 PHP_INI_END()
1286 /* }}} */
1287
1288 /* {{{ module global initialize handler */
1289 static PHP_GINIT_FUNCTION(mbstring)
1290 {
1291 mbstring_globals->language = mbfl_no_language_uni;
1292 mbstring_globals->internal_encoding = mbfl_no_encoding_invalid;
1293 mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding;
1294 #ifdef ZEND_MULTIBYTE
1295 mbstring_globals->script_encoding_list = NULL;
1296 mbstring_globals->script_encoding_list_size = 0;
1297 #endif /* ZEND_MULTIBYTE */
1298 mbstring_globals->http_output_encoding = mbfl_no_encoding_pass;
1299 mbstring_globals->current_http_output_encoding = mbfl_no_encoding_pass;
1300 mbstring_globals->http_input_identify = mbfl_no_encoding_invalid;
1301 mbstring_globals->http_input_identify_get = mbfl_no_encoding_invalid;
1302 mbstring_globals->http_input_identify_post = mbfl_no_encoding_invalid;
1303 mbstring_globals->http_input_identify_cookie = mbfl_no_encoding_invalid;
1304 mbstring_globals->http_input_identify_string = mbfl_no_encoding_invalid;
1305 mbstring_globals->http_input_list = NULL;
1306 mbstring_globals->http_input_list_size = 0;
1307 mbstring_globals->detect_order_list = NULL;
1308 mbstring_globals->detect_order_list_size = 0;
1309 mbstring_globals->current_detect_order_list = NULL;
1310 mbstring_globals->current_detect_order_list_size = 0;
1311 mbstring_globals->default_detect_order_list = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1312 mbstring_globals->default_detect_order_list_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1313 mbstring_globals->filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1314 mbstring_globals->filter_illegal_substchar = 0x3f; /* '?' */
1315 mbstring_globals->current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1316 mbstring_globals->current_filter_illegal_substchar = 0x3f; /* '?' */
1317 mbstring_globals->illegalchars = 0;
1318 mbstring_globals->func_overload = 0;
1319 mbstring_globals->encoding_translation = 0;
1320 mbstring_globals->strict_detection = 0;
1321 mbstring_globals->outconv = NULL;
1322 mbstring_globals->http_output_conv_mimetypes = NULL;
1323 #if HAVE_MBREGEX
1324 mbstring_globals->mb_regex_globals = php_mb_regex_globals_alloc(TSRMLS_C);
1325 #endif
1326 }
1327 /* }}} */
1328
1329 /* {{{ PHP_GSHUTDOWN_FUNCTION */
PHP_GSHUTDOWN_FUNCTION(mbstring)1330 static PHP_GSHUTDOWN_FUNCTION(mbstring)
1331 {
1332 if (mbstring_globals->http_input_list) {
1333 free(mbstring_globals->http_input_list);
1334 }
1335 #ifdef ZEND_MULTIBYTE
1336 if (mbstring_globals->script_encoding_list) {
1337 free(mbstring_globals->script_encoding_list);
1338 }
1339 #endif /* ZEND_MULTIBYTE */
1340 if (mbstring_globals->detect_order_list) {
1341 free(mbstring_globals->detect_order_list);
1342 }
1343 if (mbstring_globals->http_output_conv_mimetypes) {
1344 _php_mb_free_regex(mbstring_globals->http_output_conv_mimetypes);
1345 }
1346 #if HAVE_MBREGEX
1347 php_mb_regex_globals_free(mbstring_globals->mb_regex_globals TSRMLS_CC);
1348 #endif
1349 }
1350 /* }}} */
1351
1352 /* {{{ PHP_MINIT_FUNCTION(mbstring) */
PHP_MINIT_FUNCTION(mbstring)1353 PHP_MINIT_FUNCTION(mbstring)
1354 {
1355 __mbfl_allocators = &_php_mb_allocators;
1356
1357 REGISTER_INI_ENTRIES();
1358
1359 /* This is a global handler. Should not be set in a per-request handler. */
1360 sapi_register_treat_data(mbstr_treat_data);
1361
1362 /* Post handlers are stored in the thread-local context. */
1363 if (MBSTRG(encoding_translation)) {
1364 sapi_register_post_entries(mbstr_post_entries TSRMLS_CC);
1365 }
1366
1367 REGISTER_LONG_CONSTANT("MB_OVERLOAD_MAIL", MB_OVERLOAD_MAIL, CONST_CS | CONST_PERSISTENT);
1368 REGISTER_LONG_CONSTANT("MB_OVERLOAD_STRING", MB_OVERLOAD_STRING, CONST_CS | CONST_PERSISTENT);
1369 REGISTER_LONG_CONSTANT("MB_OVERLOAD_REGEX", MB_OVERLOAD_REGEX, CONST_CS | CONST_PERSISTENT);
1370
1371 REGISTER_LONG_CONSTANT("MB_CASE_UPPER", PHP_UNICODE_CASE_UPPER, CONST_CS | CONST_PERSISTENT);
1372 REGISTER_LONG_CONSTANT("MB_CASE_LOWER", PHP_UNICODE_CASE_LOWER, CONST_CS | CONST_PERSISTENT);
1373 REGISTER_LONG_CONSTANT("MB_CASE_TITLE", PHP_UNICODE_CASE_TITLE, CONST_CS | CONST_PERSISTENT);
1374
1375 #if HAVE_MBREGEX
1376 PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1377 #endif
1378 return SUCCESS;
1379 }
1380 /* }}} */
1381
1382 /* {{{ PHP_MSHUTDOWN_FUNCTION(mbstring) */
PHP_MSHUTDOWN_FUNCTION(mbstring)1383 PHP_MSHUTDOWN_FUNCTION(mbstring)
1384 {
1385 UNREGISTER_INI_ENTRIES();
1386
1387 #if HAVE_MBREGEX
1388 PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1389 #endif
1390
1391 return SUCCESS;
1392 }
1393 /* }}} */
1394
1395 /* {{{ PHP_RINIT_FUNCTION(mbstring) */
PHP_RINIT_FUNCTION(mbstring)1396 PHP_RINIT_FUNCTION(mbstring)
1397 {
1398 int n;
1399 enum mbfl_no_encoding *list=NULL, *entry;
1400 zend_function *func, *orig;
1401 const struct mb_overload_def *p;
1402
1403 MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding);
1404 MBSTRG(current_http_output_encoding) = MBSTRG(http_output_encoding);
1405 MBSTRG(current_filter_illegal_mode) = MBSTRG(filter_illegal_mode);
1406 MBSTRG(current_filter_illegal_substchar) = MBSTRG(filter_illegal_substchar);
1407
1408 MBSTRG(illegalchars) = 0;
1409
1410 n = 0;
1411 if (MBSTRG(detect_order_list)) {
1412 list = MBSTRG(detect_order_list);
1413 n = MBSTRG(detect_order_list_size);
1414 }
1415 if (n <= 0) {
1416 list = MBSTRG(default_detect_order_list);
1417 n = MBSTRG(default_detect_order_list_size);
1418 }
1419 entry = (enum mbfl_no_encoding *)safe_emalloc(n, sizeof(int), 0);
1420 MBSTRG(current_detect_order_list) = entry;
1421 MBSTRG(current_detect_order_list_size) = n;
1422 while (n > 0) {
1423 *entry++ = *list++;
1424 n--;
1425 }
1426
1427 /* override original function. */
1428 if (MBSTRG(func_overload)){
1429 p = &(mb_ovld[0]);
1430
1431 while (p->type > 0) {
1432 if ((MBSTRG(func_overload) & p->type) == p->type &&
1433 zend_hash_find(EG(function_table), p->save_func,
1434 strlen(p->save_func)+1, (void **)&orig) != SUCCESS) {
1435
1436 zend_hash_find(EG(function_table), p->ovld_func, strlen(p->ovld_func)+1 , (void **)&func);
1437
1438 if (zend_hash_find(EG(function_table), p->orig_func, strlen(p->orig_func)+1, (void **)&orig) != SUCCESS) {
1439 php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring couldn't find function %s.", p->orig_func);
1440 return FAILURE;
1441 } else {
1442 zend_hash_add(EG(function_table), p->save_func, strlen(p->save_func)+1, orig, sizeof(zend_function), NULL);
1443
1444 if (zend_hash_update(EG(function_table), p->orig_func, strlen(p->orig_func)+1, func, sizeof(zend_function),
1445 NULL) == FAILURE) {
1446 php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring couldn't replace function %s.", p->orig_func);
1447 return FAILURE;
1448 }
1449 }
1450 }
1451 p++;
1452 }
1453 }
1454 #if HAVE_MBREGEX
1455 PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1456 #endif
1457 #ifdef ZEND_MULTIBYTE
1458 zend_multibyte_set_internal_encoding(mbfl_no_encoding2name(MBSTRG(internal_encoding)) TSRMLS_CC);
1459 php_mb_set_zend_encoding(TSRMLS_C);
1460 #endif /* ZEND_MULTIBYTE */
1461
1462 return SUCCESS;
1463 }
1464 /* }}} */
1465
1466 /* {{{ PHP_RSHUTDOWN_FUNCTION(mbstring) */
PHP_RSHUTDOWN_FUNCTION(mbstring)1467 PHP_RSHUTDOWN_FUNCTION(mbstring)
1468 {
1469 const struct mb_overload_def *p;
1470 zend_function *orig;
1471
1472 if (MBSTRG(current_detect_order_list) != NULL) {
1473 efree(MBSTRG(current_detect_order_list));
1474 MBSTRG(current_detect_order_list) = NULL;
1475 MBSTRG(current_detect_order_list_size) = 0;
1476 }
1477 if (MBSTRG(outconv) != NULL) {
1478 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
1479 mbfl_buffer_converter_delete(MBSTRG(outconv));
1480 MBSTRG(outconv) = NULL;
1481 }
1482
1483 /* clear http input identification. */
1484 MBSTRG(http_input_identify) = mbfl_no_encoding_invalid;
1485 MBSTRG(http_input_identify_post) = mbfl_no_encoding_invalid;
1486 MBSTRG(http_input_identify_get) = mbfl_no_encoding_invalid;
1487 MBSTRG(http_input_identify_cookie) = mbfl_no_encoding_invalid;
1488 MBSTRG(http_input_identify_string) = mbfl_no_encoding_invalid;
1489
1490 /* clear overloaded function. */
1491 if (MBSTRG(func_overload)){
1492 p = &(mb_ovld[0]);
1493 while (p->type > 0) {
1494 if ((MBSTRG(func_overload) & p->type) == p->type &&
1495 zend_hash_find(EG(function_table), p->save_func,
1496 strlen(p->save_func)+1, (void **)&orig) == SUCCESS) {
1497
1498 zend_hash_update(EG(function_table), p->orig_func, strlen(p->orig_func)+1, orig, sizeof(zend_function), NULL);
1499 zend_hash_del(EG(function_table), p->save_func, strlen(p->save_func)+1);
1500 }
1501 p++;
1502 }
1503 }
1504
1505 #if HAVE_MBREGEX
1506 PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1507 #endif
1508
1509 return SUCCESS;
1510 }
1511 /* }}} */
1512
1513 /* {{{ PHP_MINFO_FUNCTION(mbstring) */
PHP_MINFO_FUNCTION(mbstring)1514 PHP_MINFO_FUNCTION(mbstring)
1515 {
1516 php_info_print_table_start();
1517 php_info_print_table_row(2, "Multibyte Support", "enabled");
1518 php_info_print_table_row(2, "Multibyte string engine", "libmbfl");
1519 php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled");
1520 php_info_print_table_end();
1521
1522 php_info_print_table_start();
1523 php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1.");
1524 php_info_print_table_end();
1525
1526 #if HAVE_MBREGEX
1527 PHP_MINFO(mb_regex)(ZEND_MODULE_INFO_FUNC_ARGS_PASSTHRU);
1528 #endif
1529
1530 DISPLAY_INI_ENTRIES();
1531 }
1532 /* }}} */
1533
1534 /* {{{ proto string mb_language([string language])
1535 Sets the current language or Returns the current language as a string */
PHP_FUNCTION(mb_language)1536 PHP_FUNCTION(mb_language)
1537 {
1538 char *name = NULL;
1539 int name_len = 0;
1540
1541 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) {
1542 return;
1543 }
1544 if (name == NULL) {
1545 RETVAL_STRING((char *)mbfl_no_language2name(MBSTRG(language)), 1);
1546 } else {
1547 if (FAILURE == zend_alter_ini_entry(
1548 "mbstring.language", sizeof("mbstring.language"),
1549 name, name_len, PHP_INI_USER, PHP_INI_STAGE_RUNTIME)) {
1550 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown language \"%s\"", name);
1551 RETVAL_FALSE;
1552 } else {
1553 RETVAL_TRUE;
1554 }
1555 }
1556 }
1557 /* }}} */
1558
1559 /* {{{ proto string mb_internal_encoding([string encoding])
1560 Sets the current internal encoding or Returns the current internal encoding as a string */
PHP_FUNCTION(mb_internal_encoding)1561 PHP_FUNCTION(mb_internal_encoding)
1562 {
1563 char *name = NULL;
1564 int name_len;
1565 enum mbfl_no_encoding no_encoding;
1566
1567 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) {
1568 RETURN_FALSE;
1569 }
1570 if (name == NULL) {
1571 name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding));
1572 if (name != NULL) {
1573 RETURN_STRING(name, 1);
1574 } else {
1575 RETURN_FALSE;
1576 }
1577 } else {
1578 no_encoding = mbfl_name2no_encoding(name);
1579 if (no_encoding == mbfl_no_encoding_invalid) {
1580 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
1581 RETURN_FALSE;
1582 } else {
1583 MBSTRG(current_internal_encoding) = no_encoding;
1584 #ifdef ZEND_MULTIBYTE
1585 /* TODO: make independent from mbstring.encoding_translation? */
1586 if (MBSTRG(encoding_translation)) {
1587 zend_multibyte_set_internal_encoding(name TSRMLS_CC);
1588 }
1589 #endif /* ZEND_MULTIBYTE */
1590 RETURN_TRUE;
1591 }
1592 }
1593 }
1594 /* }}} */
1595
1596 /* {{{ proto mixed mb_http_input([string type])
1597 Returns the input encoding */
PHP_FUNCTION(mb_http_input)1598 PHP_FUNCTION(mb_http_input)
1599 {
1600 char *typ = NULL;
1601 int typ_len;
1602 int retname, n;
1603 char *name, *list, *temp;
1604 enum mbfl_no_encoding *entry;
1605 enum mbfl_no_encoding result = mbfl_no_encoding_invalid;
1606
1607 retname = 1;
1608 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
1609 RETURN_FALSE;
1610 }
1611 if (typ == NULL) {
1612 result = MBSTRG(http_input_identify);
1613 } else {
1614 switch (*typ) {
1615 case 'G':
1616 case 'g':
1617 result = MBSTRG(http_input_identify_get);
1618 break;
1619 case 'P':
1620 case 'p':
1621 result = MBSTRG(http_input_identify_post);
1622 break;
1623 case 'C':
1624 case 'c':
1625 result = MBSTRG(http_input_identify_cookie);
1626 break;
1627 case 'S':
1628 case 's':
1629 result = MBSTRG(http_input_identify_string);
1630 break;
1631 case 'I':
1632 case 'i':
1633 array_init(return_value);
1634 entry = MBSTRG(http_input_list);
1635 n = MBSTRG(http_input_list_size);
1636 while (n > 0) {
1637 name = (char *)mbfl_no_encoding2name(*entry);
1638 if (name) {
1639 add_next_index_string(return_value, name, 1);
1640 }
1641 entry++;
1642 n--;
1643 }
1644 retname = 0;
1645 break;
1646 case 'L':
1647 case 'l':
1648 entry = MBSTRG(http_input_list);
1649 n = MBSTRG(http_input_list_size);
1650 list = NULL;
1651 while (n > 0) {
1652 name = (char *)mbfl_no_encoding2name(*entry);
1653 if (name) {
1654 if (list) {
1655 temp = list;
1656 spprintf(&list, 0, "%s,%s", temp, name);
1657 efree(temp);
1658 if (!list) {
1659 break;
1660 }
1661 } else {
1662 list = estrdup(name);
1663 }
1664 }
1665 entry++;
1666 n--;
1667 }
1668 if (!list) {
1669 RETURN_FALSE;
1670 }
1671 RETVAL_STRING(list, 0);
1672 retname = 0;
1673 break;
1674 default:
1675 result = MBSTRG(http_input_identify);
1676 break;
1677 }
1678 }
1679
1680 if (retname) {
1681 if (result != mbfl_no_encoding_invalid &&
1682 (name = (char *)mbfl_no_encoding2name(result)) != NULL) {
1683 RETVAL_STRING(name, 1);
1684 } else {
1685 RETVAL_FALSE;
1686 }
1687 }
1688 }
1689 /* }}} */
1690
1691 /* {{{ proto string mb_http_output([string encoding])
1692 Sets the current output_encoding or returns the current output_encoding as a string */
PHP_FUNCTION(mb_http_output)1693 PHP_FUNCTION(mb_http_output)
1694 {
1695 char *name = NULL;
1696 int name_len;
1697 enum mbfl_no_encoding no_encoding;
1698
1699 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", (char **)&name, &name_len) == FAILURE) {
1700 RETURN_FALSE;
1701 }
1702
1703 if (name == NULL) {
1704 name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding));
1705 if (name != NULL) {
1706 RETURN_STRING(name, 1);
1707 } else {
1708 RETURN_FALSE;
1709 }
1710 } else {
1711 no_encoding = mbfl_name2no_encoding(name);
1712 if (no_encoding == mbfl_no_encoding_invalid) {
1713 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
1714 RETURN_FALSE;
1715 } else {
1716 MBSTRG(current_http_output_encoding) = no_encoding;
1717 RETURN_TRUE;
1718 }
1719 }
1720 }
1721 /* }}} */
1722
1723 /* {{{ proto bool|array mb_detect_order([mixed encoding-list])
1724 Sets the current detect_order or Return the current detect_order as a array */
PHP_FUNCTION(mb_detect_order)1725 PHP_FUNCTION(mb_detect_order)
1726 {
1727 zval **arg1 = NULL;
1728 int n, size;
1729 enum mbfl_no_encoding *list, *entry;
1730 char *name;
1731
1732 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|Z", &arg1) == FAILURE) {
1733 return;
1734 }
1735
1736 if (!arg1) {
1737 array_init(return_value);
1738 entry = MBSTRG(current_detect_order_list);
1739 n = MBSTRG(current_detect_order_list_size);
1740 while (n > 0) {
1741 name = (char *)mbfl_no_encoding2name(*entry);
1742 if (name) {
1743 add_next_index_string(return_value, name, 1);
1744 }
1745 entry++;
1746 n--;
1747 }
1748 } else {
1749 list = NULL;
1750 size = 0;
1751 switch (Z_TYPE_PP(arg1)) {
1752 case IS_ARRAY:
1753 if (!php_mb_parse_encoding_array(*arg1, &list, &size, 0 TSRMLS_CC)) {
1754 if (list) {
1755 efree(list);
1756 }
1757 RETURN_FALSE;
1758 }
1759 break;
1760 default:
1761 convert_to_string_ex(arg1);
1762 if (!php_mb_parse_encoding_list(Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1), &list, &size, 0 TSRMLS_CC)) {
1763 if (list) {
1764 efree(list);
1765 }
1766 RETURN_FALSE;
1767 }
1768 break;
1769 }
1770
1771 if (list == NULL) {
1772 RETURN_FALSE;
1773 }
1774
1775 if (MBSTRG(current_detect_order_list)) {
1776 efree(MBSTRG(current_detect_order_list));
1777 }
1778 MBSTRG(current_detect_order_list) = list;
1779 MBSTRG(current_detect_order_list_size) = size;
1780 RETURN_TRUE;
1781 }
1782 }
1783 /* }}} */
1784
1785 /* {{{ proto mixed mb_substitute_character([mixed substchar])
1786 Sets the current substitute_character or returns the current substitute_character */
PHP_FUNCTION(mb_substitute_character)1787 PHP_FUNCTION(mb_substitute_character)
1788 {
1789 zval **arg1 = NULL;
1790
1791 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|Z", &arg1) == FAILURE) {
1792 return;
1793 }
1794
1795 if (!arg1) {
1796 if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
1797 RETURN_STRING("none", 1);
1798 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
1799 RETURN_STRING("long", 1);
1800 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
1801 RETURN_STRING("entity", 1);
1802 } else {
1803 RETURN_LONG(MBSTRG(current_filter_illegal_substchar));
1804 }
1805 } else {
1806 RETVAL_TRUE;
1807
1808 switch (Z_TYPE_PP(arg1)) {
1809 case IS_STRING:
1810 if (strncasecmp("none", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
1811 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1812 } else if (strncasecmp("long", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
1813 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1814 } else if (strncasecmp("entity", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
1815 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1816 } else {
1817 convert_to_long_ex(arg1);
1818
1819 if (Z_LVAL_PP(arg1) < 0xffff && Z_LVAL_PP(arg1) > 0x0) {
1820 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1821 MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1);
1822 } else {
1823 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character.");
1824 RETURN_FALSE;
1825 }
1826 }
1827 break;
1828 default:
1829 convert_to_long_ex(arg1);
1830 if (Z_LVAL_PP(arg1) < 0xffff && Z_LVAL_PP(arg1) > 0x0) {
1831 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1832 MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1);
1833 } else {
1834 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character.");
1835 RETURN_FALSE;
1836 }
1837 break;
1838 }
1839 }
1840 }
1841 /* }}} */
1842
1843 /* {{{ proto string mb_preferred_mime_name(string encoding)
1844 Return the preferred MIME name (charset) as a string */
PHP_FUNCTION(mb_preferred_mime_name)1845 PHP_FUNCTION(mb_preferred_mime_name)
1846 {
1847 enum mbfl_no_encoding no_encoding;
1848 char *name = NULL;
1849 int name_len;
1850
1851 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, &name_len) == FAILURE) {
1852 return;
1853 } else {
1854 no_encoding = mbfl_name2no_encoding(name);
1855 if (no_encoding == mbfl_no_encoding_invalid) {
1856 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
1857 RETVAL_FALSE;
1858 } else {
1859 const char *preferred_name = mbfl_no2preferred_mime_name(no_encoding);
1860 if (preferred_name == NULL || *preferred_name == '\0') {
1861 php_error_docref(NULL TSRMLS_CC, E_WARNING, "No MIME preferred name corresponding to \"%s\"", name);
1862 RETVAL_FALSE;
1863 } else {
1864 RETVAL_STRING((char *)preferred_name, 1);
1865 }
1866 }
1867 }
1868 }
1869 /* }}} */
1870
1871 #define IS_SJIS1(c) ((((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xf5)) ? 1 : 0)
1872 #define IS_SJIS2(c) ((((c)>=0x40 && (c)<=0x7e) || ((c)>=0x80 && (c)<=0xfc)) ? 1 : 0)
1873
1874 /* {{{ proto bool mb_parse_str(string encoded_string [, array result])
1875 Parses GET/POST/COOKIE data and sets global variables */
PHP_FUNCTION(mb_parse_str)1876 PHP_FUNCTION(mb_parse_str)
1877 {
1878 zval *track_vars_array = NULL;
1879 char *encstr = NULL;
1880 int encstr_len;
1881 php_mb_encoding_handler_info_t info;
1882 enum mbfl_no_encoding detected;
1883
1884 track_vars_array = NULL;
1885 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|z", &encstr, &encstr_len, &track_vars_array) == FAILURE) {
1886 return;
1887 }
1888
1889 /* Clear out the array */
1890 if (track_vars_array != NULL) {
1891 zval_dtor(track_vars_array);
1892 array_init(track_vars_array);
1893 }
1894
1895 encstr = estrndup(encstr, encstr_len);
1896
1897 info.data_type = PARSE_STRING;
1898 info.separator = PG(arg_separator).input;
1899 info.force_register_globals = (track_vars_array == NULL);
1900 info.report_errors = 1;
1901 info.to_encoding = MBSTRG(current_internal_encoding);
1902 info.to_language = MBSTRG(language);
1903 info.from_encodings = MBSTRG(http_input_list);
1904 info.num_from_encodings = MBSTRG(http_input_list_size);
1905 info.from_language = MBSTRG(language);
1906
1907 detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr TSRMLS_CC);
1908
1909 MBSTRG(http_input_identify) = detected;
1910
1911 RETVAL_BOOL(detected != mbfl_no_encoding_invalid);
1912
1913 if (encstr != NULL) efree(encstr);
1914 }
1915 /* }}} */
1916
1917 /* {{{ proto string mb_output_handler(string contents, int status)
1918 Returns string in output buffer converted to the http_output encoding */
PHP_FUNCTION(mb_output_handler)1919 PHP_FUNCTION(mb_output_handler)
1920 {
1921 char *arg_string;
1922 int arg_string_len;
1923 long arg_status;
1924 mbfl_string string, result;
1925 const char *charset;
1926 char *p;
1927 enum mbfl_no_encoding encoding;
1928 int last_feed, len;
1929 unsigned char send_text_mimetype = 0;
1930 char *s, *mimetype = NULL;
1931
1932 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl", &arg_string, &arg_string_len, &arg_status) == FAILURE) {
1933 return;
1934 }
1935
1936 encoding = MBSTRG(current_http_output_encoding);
1937
1938 /* start phase only */
1939 if ((arg_status & PHP_OUTPUT_HANDLER_START) != 0) {
1940 /* delete the converter just in case. */
1941 if (MBSTRG(outconv)) {
1942 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
1943 mbfl_buffer_converter_delete(MBSTRG(outconv));
1944 MBSTRG(outconv) = NULL;
1945 }
1946 if (encoding == mbfl_no_encoding_pass) {
1947 RETURN_STRINGL(arg_string, arg_string_len, 1);
1948 }
1949
1950 /* analyze mime type */
1951 if (SG(sapi_headers).mimetype &&
1952 _php_mb_match_regex(
1953 MBSTRG(http_output_conv_mimetypes),
1954 SG(sapi_headers).mimetype,
1955 strlen(SG(sapi_headers).mimetype))) {
1956 if ((s = strchr(SG(sapi_headers).mimetype,';')) == NULL){
1957 mimetype = estrdup(SG(sapi_headers).mimetype);
1958 } else {
1959 mimetype = estrndup(SG(sapi_headers).mimetype,s-SG(sapi_headers).mimetype);
1960 }
1961 send_text_mimetype = 1;
1962 } else if (SG(sapi_headers).send_default_content_type) {
1963 mimetype = SG(default_mimetype) ? SG(default_mimetype) : SAPI_DEFAULT_MIMETYPE;
1964 }
1965
1966 /* if content-type is not yet set, set it and activate the converter */
1967 if (SG(sapi_headers).send_default_content_type || send_text_mimetype) {
1968 charset = mbfl_no2preferred_mime_name(encoding);
1969 if (charset) {
1970 len = spprintf( &p, 0, "Content-Type: %s; charset=%s", mimetype, charset );
1971 if (sapi_add_header(p, len, 0) != FAILURE) {
1972 SG(sapi_headers).send_default_content_type = 0;
1973 }
1974 }
1975 /* activate the converter */
1976 MBSTRG(outconv) = mbfl_buffer_converter_new(MBSTRG(current_internal_encoding), encoding, 0);
1977 if (send_text_mimetype){
1978 efree(mimetype);
1979 }
1980 }
1981 }
1982
1983 /* just return if the converter is not activated. */
1984 if (MBSTRG(outconv) == NULL) {
1985 RETURN_STRINGL(arg_string, arg_string_len, 1);
1986 }
1987
1988 /* flag */
1989 last_feed = ((arg_status & PHP_OUTPUT_HANDLER_END) != 0);
1990 /* mode */
1991 mbfl_buffer_converter_illegal_mode(MBSTRG(outconv), MBSTRG(current_filter_illegal_mode));
1992 mbfl_buffer_converter_illegal_substchar(MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar));
1993
1994 /* feed the string */
1995 mbfl_string_init(&string);
1996 string.no_language = MBSTRG(language);
1997 string.no_encoding = MBSTRG(current_internal_encoding);
1998 string.val = (unsigned char *)arg_string;
1999 string.len = arg_string_len;
2000 mbfl_buffer_converter_feed(MBSTRG(outconv), &string);
2001 if (last_feed) {
2002 mbfl_buffer_converter_flush(MBSTRG(outconv));
2003 }
2004 /* get the converter output, and return it */
2005 mbfl_buffer_converter_result(MBSTRG(outconv), &result);
2006 RETVAL_STRINGL((char *)result.val, result.len, 0); /* the string is already strdup()'ed */
2007
2008 /* delete the converter if it is the last feed. */
2009 if (last_feed) {
2010 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2011 mbfl_buffer_converter_delete(MBSTRG(outconv));
2012 MBSTRG(outconv) = NULL;
2013 }
2014 }
2015 /* }}} */
2016
2017 /* {{{ proto int mb_strlen(string str [, string encoding])
2018 Get character numbers of a string */
PHP_FUNCTION(mb_strlen)2019 PHP_FUNCTION(mb_strlen)
2020 {
2021 int n;
2022 mbfl_string string;
2023 char *enc_name = NULL;
2024 int enc_name_len;
2025
2026 mbfl_string_init(&string);
2027
2028 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
2029 RETURN_FALSE;
2030 }
2031
2032 string.no_language = MBSTRG(language);
2033 if (enc_name == NULL) {
2034 string.no_encoding = MBSTRG(current_internal_encoding);
2035 } else {
2036 string.no_encoding = mbfl_name2no_encoding(enc_name);
2037 if (string.no_encoding == mbfl_no_encoding_invalid) {
2038 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2039 RETURN_FALSE;
2040 }
2041 }
2042
2043 n = mbfl_strlen(&string);
2044 if (n >= 0) {
2045 RETVAL_LONG(n);
2046 } else {
2047 RETVAL_FALSE;
2048 }
2049 }
2050 /* }}} */
2051
2052 /* {{{ proto int mb_strpos(string haystack, string needle [, int offset [, string encoding]])
2053 Find position of first occurrence of a string within another */
PHP_FUNCTION(mb_strpos)2054 PHP_FUNCTION(mb_strpos)
2055 {
2056 int n, reverse = 0;
2057 long offset;
2058 mbfl_string haystack, needle;
2059 char *enc_name = NULL;
2060 int enc_name_len;
2061
2062 mbfl_string_init(&haystack);
2063 mbfl_string_init(&needle);
2064 haystack.no_language = MBSTRG(language);
2065 haystack.no_encoding = MBSTRG(current_internal_encoding);
2066 needle.no_language = MBSTRG(language);
2067 needle.no_encoding = MBSTRG(current_internal_encoding);
2068 offset = 0;
2069
2070 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &enc_name, &enc_name_len) == FAILURE) {
2071 RETURN_FALSE;
2072 }
2073
2074 if (enc_name != NULL) {
2075 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2076 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2077 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2078 RETURN_FALSE;
2079 }
2080 }
2081
2082 if (offset < 0 || offset > mbfl_strlen(&haystack)) {
2083 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset not contained in string");
2084 RETURN_FALSE;
2085 }
2086 if (needle.len == 0) {
2087 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2088 RETURN_FALSE;
2089 }
2090
2091 n = mbfl_strpos(&haystack, &needle, offset, reverse);
2092 if (n >= 0) {
2093 RETVAL_LONG(n);
2094 } else {
2095 switch (-n) {
2096 case 1:
2097 break;
2098 case 2:
2099 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Needle has not positive length");
2100 break;
2101 case 4:
2102 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding or conversion error");
2103 break;
2104 case 8:
2105 php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Argument is empty");
2106 break;
2107 default:
2108 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error in mb_strpos");
2109 break;
2110 }
2111 RETVAL_FALSE;
2112 }
2113 }
2114 /* }}} */
2115
2116 /* {{{ proto int mb_strrpos(string haystack, string needle [, int offset [, string encoding]])
2117 Find position of last occurrence of a string within another */
PHP_FUNCTION(mb_strrpos)2118 PHP_FUNCTION(mb_strrpos)
2119 {
2120 int n;
2121 mbfl_string haystack, needle;
2122 char *enc_name = NULL;
2123 int enc_name_len;
2124 zval **zoffset = NULL;
2125 long offset = 0, str_flg;
2126 char *enc_name2 = NULL;
2127 int enc_name_len2;
2128
2129 mbfl_string_init(&haystack);
2130 mbfl_string_init(&needle);
2131 haystack.no_language = MBSTRG(language);
2132 haystack.no_encoding = MBSTRG(current_internal_encoding);
2133 needle.no_language = MBSTRG(language);
2134 needle.no_encoding = MBSTRG(current_internal_encoding);
2135
2136 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|Zs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &zoffset, &enc_name, &enc_name_len) == FAILURE) {
2137 RETURN_FALSE;
2138 }
2139
2140 if (zoffset) {
2141 if (Z_TYPE_PP(zoffset) == IS_STRING) {
2142 enc_name2 = Z_STRVAL_PP(zoffset);
2143 enc_name_len2 = Z_STRLEN_PP(zoffset);
2144 str_flg = 1;
2145
2146 if (enc_name2 != NULL) {
2147 switch (*enc_name2) {
2148 case '0':
2149 case '1':
2150 case '2':
2151 case '3':
2152 case '4':
2153 case '5':
2154 case '6':
2155 case '7':
2156 case '8':
2157 case '9':
2158 case ' ':
2159 case '-':
2160 case '.':
2161 break;
2162 default :
2163 str_flg = 0;
2164 break;
2165 }
2166 }
2167
2168 if (str_flg) {
2169 convert_to_long_ex(zoffset);
2170 offset = Z_LVAL_PP(zoffset);
2171 } else {
2172 enc_name = enc_name2;
2173 enc_name_len = enc_name_len2;
2174 }
2175 } else {
2176 convert_to_long_ex(zoffset);
2177 offset = Z_LVAL_PP(zoffset);
2178 }
2179 }
2180
2181 if (enc_name != NULL) {
2182 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2183 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2184 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2185 RETURN_FALSE;
2186 }
2187 }
2188
2189 if (haystack.len <= 0) {
2190 RETURN_FALSE;
2191 }
2192 if (needle.len <= 0) {
2193 RETURN_FALSE;
2194 }
2195
2196 {
2197 int haystack_char_len = mbfl_strlen(&haystack);
2198 if ((offset > 0 && offset > haystack_char_len) ||
2199 (offset < 0 && -offset > haystack_char_len)) {
2200 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset is greater than the length of haystack string");
2201 RETURN_FALSE;
2202 }
2203 }
2204
2205 n = mbfl_strpos(&haystack, &needle, offset, 1);
2206 if (n >= 0) {
2207 RETVAL_LONG(n);
2208 } else {
2209 RETVAL_FALSE;
2210 }
2211 }
2212 /* }}} */
2213
2214 /* {{{ proto int mb_stripos(string haystack, string needle [, int offset [, string encoding]])
2215 Finds position of first occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_stripos)2216 PHP_FUNCTION(mb_stripos)
2217 {
2218 int n;
2219 long offset;
2220 mbfl_string haystack, needle;
2221 char *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
2222 int from_encoding_len;
2223 n = -1;
2224 offset = 0;
2225
2226 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
2227 RETURN_FALSE;
2228 }
2229 if (needle.len == 0) {
2230 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2231 RETURN_FALSE;
2232 }
2233 n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding TSRMLS_CC);
2234
2235 if (n >= 0) {
2236 RETVAL_LONG(n);
2237 } else {
2238 RETVAL_FALSE;
2239 }
2240 }
2241 /* }}} */
2242
2243 /* {{{ proto int mb_strripos(string haystack, string needle [, int offset [, string encoding]])
2244 Finds position of last occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_strripos)2245 PHP_FUNCTION(mb_strripos)
2246 {
2247 int n;
2248 long offset;
2249 mbfl_string haystack, needle;
2250 const char *from_encoding = mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
2251 int from_encoding_len;
2252 n = -1;
2253 offset = 0;
2254
2255 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
2256 RETURN_FALSE;
2257 }
2258
2259 n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding TSRMLS_CC);
2260
2261 if (n >= 0) {
2262 RETVAL_LONG(n);
2263 } else {
2264 RETVAL_FALSE;
2265 }
2266 }
2267 /* }}} */
2268
2269 /* {{{ proto string mb_strstr(string haystack, string needle[, bool part[, string encoding]])
2270 Finds first occurrence of a string within another */
PHP_FUNCTION(mb_strstr)2271 PHP_FUNCTION(mb_strstr)
2272 {
2273 int n, len, mblen;
2274 mbfl_string haystack, needle, result, *ret = NULL;
2275 char *enc_name = NULL;
2276 int enc_name_len;
2277 zend_bool part = 0;
2278
2279 mbfl_string_init(&haystack);
2280 mbfl_string_init(&needle);
2281 haystack.no_language = MBSTRG(language);
2282 haystack.no_encoding = MBSTRG(current_internal_encoding);
2283 needle.no_language = MBSTRG(language);
2284 needle.no_encoding = MBSTRG(current_internal_encoding);
2285
2286 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
2287 RETURN_FALSE;
2288 }
2289
2290 if (enc_name != NULL) {
2291 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2292 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2293 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2294 RETURN_FALSE;
2295 }
2296 }
2297
2298 if (needle.len <= 0) {
2299 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2300 RETURN_FALSE;
2301 }
2302 n = mbfl_strpos(&haystack, &needle, 0, 0);
2303 if (n >= 0) {
2304 mblen = mbfl_strlen(&haystack);
2305 if (part) {
2306 ret = mbfl_substr(&haystack, &result, 0, n);
2307 if (ret != NULL) {
2308 RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2309 } else {
2310 RETVAL_FALSE;
2311 }
2312 } else {
2313 len = (mblen - n);
2314 ret = mbfl_substr(&haystack, &result, n, len);
2315 if (ret != NULL) {
2316 RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2317 } else {
2318 RETVAL_FALSE;
2319 }
2320 }
2321 } else {
2322 RETVAL_FALSE;
2323 }
2324 }
2325 /* }}} */
2326
2327 /* {{{ proto string mb_strrchr(string haystack, string needle[, bool part[, string encoding]])
2328 Finds the last occurrence of a character in a string within another */
PHP_FUNCTION(mb_strrchr)2329 PHP_FUNCTION(mb_strrchr)
2330 {
2331 int n, len, mblen;
2332 mbfl_string haystack, needle, result, *ret = NULL;
2333 char *enc_name = NULL;
2334 int enc_name_len;
2335 zend_bool part = 0;
2336
2337 mbfl_string_init(&haystack);
2338 mbfl_string_init(&needle);
2339 haystack.no_language = MBSTRG(language);
2340 haystack.no_encoding = MBSTRG(current_internal_encoding);
2341 needle.no_language = MBSTRG(language);
2342 needle.no_encoding = MBSTRG(current_internal_encoding);
2343
2344 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
2345 RETURN_FALSE;
2346 }
2347
2348 if (enc_name != NULL) {
2349 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2350 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2351 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2352 RETURN_FALSE;
2353 }
2354 }
2355
2356 if (haystack.len <= 0) {
2357 RETURN_FALSE;
2358 }
2359 if (needle.len <= 0) {
2360 RETURN_FALSE;
2361 }
2362 n = mbfl_strpos(&haystack, &needle, 0, 1);
2363 if (n >= 0) {
2364 mblen = mbfl_strlen(&haystack);
2365 if (part) {
2366 ret = mbfl_substr(&haystack, &result, 0, n);
2367 if (ret != NULL) {
2368 RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2369 } else {
2370 RETVAL_FALSE;
2371 }
2372 } else {
2373 len = (mblen - n);
2374 ret = mbfl_substr(&haystack, &result, n, len);
2375 if (ret != NULL) {
2376 RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2377 } else {
2378 RETVAL_FALSE;
2379 }
2380 }
2381 } else {
2382 RETVAL_FALSE;
2383 }
2384 }
2385 /* }}} */
2386
2387 /* {{{ proto string mb_stristr(string haystack, string needle[, bool part[, string encoding]])
2388 Finds first occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_stristr)2389 PHP_FUNCTION(mb_stristr)
2390 {
2391 zend_bool part = 0;
2392 unsigned int from_encoding_len, len, mblen;
2393 int n;
2394 mbfl_string haystack, needle, result, *ret = NULL;
2395 const char *from_encoding = mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
2396 mbfl_string_init(&haystack);
2397 mbfl_string_init(&needle);
2398 haystack.no_language = MBSTRG(language);
2399 haystack.no_encoding = MBSTRG(current_internal_encoding);
2400 needle.no_language = MBSTRG(language);
2401 needle.no_encoding = MBSTRG(current_internal_encoding);
2402
2403
2404 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
2405 RETURN_FALSE;
2406 }
2407
2408 if (!needle.len) {
2409 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2410 RETURN_FALSE;
2411 }
2412
2413 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
2414 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2415 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
2416 RETURN_FALSE;
2417 }
2418
2419 n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding TSRMLS_CC);
2420
2421 if (n <0) {
2422 RETURN_FALSE;
2423 }
2424
2425 mblen = mbfl_strlen(&haystack);
2426
2427 if (part) {
2428 ret = mbfl_substr(&haystack, &result, 0, n);
2429 if (ret != NULL) {
2430 RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2431 } else {
2432 RETVAL_FALSE;
2433 }
2434 } else {
2435 len = (mblen - n);
2436 ret = mbfl_substr(&haystack, &result, n, len);
2437 if (ret != NULL) {
2438 RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2439 } else {
2440 RETVAL_FALSE;
2441 }
2442 }
2443 }
2444 /* }}} */
2445
2446 /* {{{ proto string mb_strrichr(string haystack, string needle[, bool part[, string encoding]])
2447 Finds the last occurrence of a character in a string within another, case insensitive */
PHP_FUNCTION(mb_strrichr)2448 PHP_FUNCTION(mb_strrichr)
2449 {
2450 zend_bool part = 0;
2451 int n, from_encoding_len, len, mblen;
2452 mbfl_string haystack, needle, result, *ret = NULL;
2453 char *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
2454 mbfl_string_init(&haystack);
2455 mbfl_string_init(&needle);
2456 haystack.no_language = MBSTRG(language);
2457 haystack.no_encoding = MBSTRG(current_internal_encoding);
2458 needle.no_language = MBSTRG(language);
2459 needle.no_encoding = MBSTRG(current_internal_encoding);
2460
2461
2462 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
2463 RETURN_FALSE;
2464 }
2465
2466 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
2467 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2468 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
2469 RETURN_FALSE;
2470 }
2471
2472 n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding TSRMLS_CC);
2473
2474 if (n <0) {
2475 RETURN_FALSE;
2476 }
2477
2478 mblen = mbfl_strlen(&haystack);
2479
2480 if (part) {
2481 ret = mbfl_substr(&haystack, &result, 0, n);
2482 if (ret != NULL) {
2483 RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2484 } else {
2485 RETVAL_FALSE;
2486 }
2487 } else {
2488 len = (mblen - n);
2489 ret = mbfl_substr(&haystack, &result, n, len);
2490 if (ret != NULL) {
2491 RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2492 } else {
2493 RETVAL_FALSE;
2494 }
2495 }
2496 }
2497 /* }}} */
2498
2499 /* {{{ proto int mb_substr_count(string haystack, string needle [, string encoding])
2500 Count the number of substring occurrences */
PHP_FUNCTION(mb_substr_count)2501 PHP_FUNCTION(mb_substr_count)
2502 {
2503 int n;
2504 mbfl_string haystack, needle;
2505 char *enc_name = NULL;
2506 int enc_name_len;
2507
2508 mbfl_string_init(&haystack);
2509 mbfl_string_init(&needle);
2510 haystack.no_language = MBSTRG(language);
2511 haystack.no_encoding = MBSTRG(current_internal_encoding);
2512 needle.no_language = MBSTRG(language);
2513 needle.no_encoding = MBSTRG(current_internal_encoding);
2514
2515 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &enc_name, &enc_name_len) == FAILURE) {
2516 return;
2517 }
2518
2519 if (enc_name != NULL) {
2520 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2521 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2522 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2523 RETURN_FALSE;
2524 }
2525 }
2526
2527 if (needle.len <= 0) {
2528 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty substring");
2529 RETURN_FALSE;
2530 }
2531
2532 n = mbfl_substr_count(&haystack, &needle);
2533 if (n >= 0) {
2534 RETVAL_LONG(n);
2535 } else {
2536 RETVAL_FALSE;
2537 }
2538 }
2539 /* }}} */
2540
2541 /* {{{ proto string mb_substr(string str, int start [, int length [, string encoding]])
2542 Returns part of a string */
PHP_FUNCTION(mb_substr)2543 PHP_FUNCTION(mb_substr)
2544 {
2545 size_t argc = ZEND_NUM_ARGS();
2546 char *str, *encoding;
2547 long from, len;
2548 int mblen, str_len, encoding_len;
2549 mbfl_string string, result, *ret;
2550
2551 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|ls", &str, &str_len, &from, &len, &encoding, &encoding_len) == FAILURE) {
2552 return;
2553 }
2554
2555 mbfl_string_init(&string);
2556 string.no_language = MBSTRG(language);
2557 string.no_encoding = MBSTRG(current_internal_encoding);
2558
2559 if (argc == 4) {
2560 string.no_encoding = mbfl_name2no_encoding(encoding);
2561 if (string.no_encoding == mbfl_no_encoding_invalid) {
2562 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
2563 RETURN_FALSE;
2564 }
2565 }
2566
2567 string.val = (unsigned char *)str;
2568 string.len = str_len;
2569
2570 if (argc < 3) {
2571 len = str_len;
2572 }
2573
2574 /* measures length */
2575 mblen = 0;
2576 if (from < 0 || len < 0) {
2577 mblen = mbfl_strlen(&string);
2578 }
2579
2580 /* if "from" position is negative, count start position from the end
2581 * of the string
2582 */
2583 if (from < 0) {
2584 from = mblen + from;
2585 if (from < 0) {
2586 from = 0;
2587 }
2588 }
2589
2590 /* if "length" position is negative, set it to the length
2591 * needed to stop that many chars from the end of the string
2592 */
2593 if (len < 0) {
2594 len = (mblen - from) + len;
2595 if (len < 0) {
2596 len = 0;
2597 }
2598 }
2599
2600 if (((MBSTRG(func_overload) & MB_OVERLOAD_STRING) == MB_OVERLOAD_STRING)
2601 && (from >= mbfl_strlen(&string))) {
2602 RETURN_FALSE;
2603 }
2604
2605 ret = mbfl_substr(&string, &result, from, len);
2606 if (NULL == ret) {
2607 RETURN_FALSE;
2608 }
2609
2610 RETURN_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
2611 }
2612 /* }}} */
2613
2614 /* {{{ proto string mb_strcut(string str, int start [, int length [, string encoding]])
2615 Returns part of a string */
PHP_FUNCTION(mb_strcut)2616 PHP_FUNCTION(mb_strcut)
2617 {
2618 size_t argc = ZEND_NUM_ARGS();
2619 char *encoding;
2620 long from, len;
2621 int encoding_len;
2622 mbfl_string string, result, *ret;
2623
2624 mbfl_string_init(&string);
2625 string.no_language = MBSTRG(language);
2626 string.no_encoding = MBSTRG(current_internal_encoding);
2627
2628 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|ls", (char **)&string.val, (int **)&string.len, &from, &len, &encoding, &encoding_len) == FAILURE) {
2629 return;
2630 }
2631
2632 if (argc == 4) {
2633 string.no_encoding = mbfl_name2no_encoding(encoding);
2634 if (string.no_encoding == mbfl_no_encoding_invalid) {
2635 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
2636 RETURN_FALSE;
2637 }
2638 }
2639
2640 if (argc < 3) {
2641 len = string.len;
2642 }
2643
2644 /* if "from" position is negative, count start position from the end
2645 * of the string
2646 */
2647 if (from < 0) {
2648 from = string.len + from;
2649 if (from < 0) {
2650 from = 0;
2651 }
2652 }
2653
2654 /* if "length" position is negative, set it to the length
2655 * needed to stop that many chars from the end of the string
2656 */
2657 if (len < 0) {
2658 len = (string.len - from) + len;
2659 if (len < 0) {
2660 len = 0;
2661 }
2662 }
2663
2664 if ((unsigned int)from > string.len) {
2665 RETURN_FALSE;
2666 }
2667
2668 ret = mbfl_strcut(&string, &result, from, len);
2669 if (ret == NULL) {
2670 RETURN_FALSE;
2671 }
2672
2673 RETURN_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
2674 }
2675 /* }}} */
2676
2677 /* {{{ proto int mb_strwidth(string str [, string encoding])
2678 Gets terminal width of a string */
PHP_FUNCTION(mb_strwidth)2679 PHP_FUNCTION(mb_strwidth)
2680 {
2681 int n;
2682 mbfl_string string;
2683 char *enc_name = NULL;
2684 int enc_name_len;
2685
2686 mbfl_string_init(&string);
2687
2688 string.no_language = MBSTRG(language);
2689 string.no_encoding = MBSTRG(current_internal_encoding);
2690
2691 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
2692 return;
2693 }
2694
2695 if (enc_name != NULL) {
2696 string.no_encoding = mbfl_name2no_encoding(enc_name);
2697 if (string.no_encoding == mbfl_no_encoding_invalid) {
2698 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2699 RETURN_FALSE;
2700 }
2701 }
2702
2703 n = mbfl_strwidth(&string);
2704 if (n >= 0) {
2705 RETVAL_LONG(n);
2706 } else {
2707 RETVAL_FALSE;
2708 }
2709 }
2710 /* }}} */
2711
2712 /* {{{ proto string mb_strimwidth(string str, int start, int width [, string trimmarker [, string encoding]])
2713 Trim the string in terminal width */
PHP_FUNCTION(mb_strimwidth)2714 PHP_FUNCTION(mb_strimwidth)
2715 {
2716 char *str, *trimmarker, *encoding;
2717 long from, width;
2718 int str_len, trimmarker_len, encoding_len;
2719 mbfl_string string, result, marker, *ret;
2720
2721 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sll|ss", &str, &str_len, &from, &width, &trimmarker, &trimmarker_len, &encoding, &encoding_len) == FAILURE) {
2722 return;
2723 }
2724
2725 mbfl_string_init(&string);
2726 mbfl_string_init(&marker);
2727 string.no_language = MBSTRG(language);
2728 string.no_encoding = MBSTRG(current_internal_encoding);
2729 marker.no_language = MBSTRG(language);
2730 marker.no_encoding = MBSTRG(current_internal_encoding);
2731 marker.val = NULL;
2732 marker.len = 0;
2733
2734 if (ZEND_NUM_ARGS() == 5) {
2735 string.no_encoding = marker.no_encoding = mbfl_name2no_encoding(encoding);
2736 if (string.no_encoding == mbfl_no_encoding_invalid) {
2737 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
2738 RETURN_FALSE;
2739 }
2740 }
2741
2742 string.val = (unsigned char *)str;
2743 string.len = str_len;
2744
2745 if (from < 0 || from > str_len) {
2746 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Start position is out of range");
2747 RETURN_FALSE;
2748 }
2749
2750 if (width < 0) {
2751 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Width is negative value");
2752 RETURN_FALSE;
2753 }
2754
2755 if (ZEND_NUM_ARGS() >= 4) {
2756 marker.val = (unsigned char *)trimmarker;
2757 marker.len = trimmarker_len;
2758 }
2759
2760 ret = mbfl_strimwidth(&string, &marker, &result, from, width);
2761
2762 if (ret == NULL) {
2763 RETURN_FALSE;
2764 }
2765
2766 RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
2767 }
2768 /* }}} */
2769
2770 /* {{{ MBSTRING_API char *php_mb_convert_encoding() */
php_mb_convert_encoding(const char * input,size_t length,const char * _to_encoding,const char * _from_encodings,size_t * output_len TSRMLS_DC)2771 MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len TSRMLS_DC)
2772 {
2773 mbfl_string string, result, *ret;
2774 enum mbfl_no_encoding from_encoding, to_encoding;
2775 mbfl_buffer_converter *convd;
2776 int size, *list;
2777 char *output=NULL;
2778
2779 if (output_len) {
2780 *output_len = 0;
2781 }
2782 if (!input) {
2783 return NULL;
2784 }
2785 /* new encoding */
2786 if (_to_encoding && strlen(_to_encoding)) {
2787 to_encoding = mbfl_name2no_encoding(_to_encoding);
2788 if (to_encoding == mbfl_no_encoding_invalid) {
2789 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", _to_encoding);
2790 return NULL;
2791 }
2792 } else {
2793 to_encoding = MBSTRG(current_internal_encoding);
2794 }
2795
2796 /* initialize string */
2797 mbfl_string_init(&string);
2798 mbfl_string_init(&result);
2799 from_encoding = MBSTRG(current_internal_encoding);
2800 string.no_encoding = from_encoding;
2801 string.no_language = MBSTRG(language);
2802 string.val = (unsigned char *)input;
2803 string.len = length;
2804
2805 /* pre-conversion encoding */
2806 if (_from_encodings) {
2807 list = NULL;
2808 size = 0;
2809 php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0 TSRMLS_CC);
2810 if (size == 1) {
2811 from_encoding = *list;
2812 string.no_encoding = from_encoding;
2813 } else if (size > 1) {
2814 /* auto detect */
2815 from_encoding = mbfl_identify_encoding_no(&string, list, size, MBSTRG(strict_detection));
2816 if (from_encoding != mbfl_no_encoding_invalid) {
2817 string.no_encoding = from_encoding;
2818 } else {
2819 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect character encoding");
2820 from_encoding = mbfl_no_encoding_pass;
2821 to_encoding = from_encoding;
2822 string.no_encoding = from_encoding;
2823 }
2824 } else {
2825 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal character encoding specified");
2826 }
2827 if (list != NULL) {
2828 efree((void *)list);
2829 }
2830 }
2831
2832 /* initialize converter */
2833 convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
2834 if (convd == NULL) {
2835 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create character encoding converter");
2836 return NULL;
2837 }
2838 mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
2839 mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
2840
2841 /* do it */
2842 ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
2843 if (ret) {
2844 if (output_len) {
2845 *output_len = ret->len;
2846 }
2847 output = (char *)ret->val;
2848 }
2849
2850 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
2851 mbfl_buffer_converter_delete(convd);
2852 return output;
2853 }
2854 /* }}} */
2855
2856 /* {{{ proto string mb_convert_encoding(string str, string to-encoding [, mixed from-encoding])
2857 Returns converted string in desired encoding */
PHP_FUNCTION(mb_convert_encoding)2858 PHP_FUNCTION(mb_convert_encoding)
2859 {
2860 char *arg_str, *arg_new;
2861 int str_len, new_len;
2862 zval *arg_old;
2863 int i;
2864 size_t size, l, n;
2865 char *_from_encodings = NULL, *ret, *s_free = NULL;
2866
2867 zval **hash_entry;
2868 HashTable *target_hash;
2869
2870 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|z", &arg_str, &str_len, &arg_new, &new_len, &arg_old) == FAILURE) {
2871 return;
2872 }
2873
2874 if (ZEND_NUM_ARGS() == 3) {
2875 switch (Z_TYPE_P(arg_old)) {
2876 case IS_ARRAY:
2877 target_hash = Z_ARRVAL_P(arg_old);
2878 zend_hash_internal_pointer_reset(target_hash);
2879 i = zend_hash_num_elements(target_hash);
2880 _from_encodings = NULL;
2881
2882 while (i > 0) {
2883 if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
2884 break;
2885 }
2886
2887 convert_to_string_ex(hash_entry);
2888
2889 if ( _from_encodings) {
2890 l = strlen(_from_encodings);
2891 n = strlen(Z_STRVAL_PP(hash_entry));
2892 _from_encodings = erealloc(_from_encodings, l+n+2);
2893 strcpy(_from_encodings+l, ",");
2894 strcpy(_from_encodings+l+1, Z_STRVAL_PP(hash_entry));
2895 } else {
2896 _from_encodings = estrdup(Z_STRVAL_PP(hash_entry));
2897 }
2898
2899 zend_hash_move_forward(target_hash);
2900 i--;
2901 }
2902
2903 if (_from_encodings != NULL && !strlen(_from_encodings)) {
2904 efree(_from_encodings);
2905 _from_encodings = NULL;
2906 }
2907 s_free = _from_encodings;
2908 break;
2909 default:
2910 convert_to_string(arg_old);
2911 _from_encodings = Z_STRVAL_P(arg_old);
2912 break;
2913 }
2914 }
2915
2916 /* new encoding */
2917 ret = php_mb_convert_encoding(arg_str, str_len, arg_new, _from_encodings, &size TSRMLS_CC);
2918 if (ret != NULL) {
2919 RETVAL_STRINGL(ret, size, 0); /* the string is already strdup()'ed */
2920 } else {
2921 RETVAL_FALSE;
2922 }
2923
2924 if ( s_free) {
2925 efree(s_free);
2926 }
2927 }
2928 /* }}} */
2929
2930 /* {{{ proto string mb_convert_case(string sourcestring, int mode [, string encoding])
2931 Returns a case-folded version of sourcestring */
PHP_FUNCTION(mb_convert_case)2932 PHP_FUNCTION(mb_convert_case)
2933 {
2934 char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
2935 int str_len, from_encoding_len;
2936 long case_mode = 0;
2937 char *newstr;
2938 size_t ret_len;
2939
2940 RETVAL_FALSE;
2941 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|s!", &str, &str_len,
2942 &case_mode, &from_encoding, &from_encoding_len) == FAILURE)
2943 RETURN_FALSE;
2944
2945 newstr = php_unicode_convert_case(case_mode, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
2946
2947 if (newstr) {
2948 RETVAL_STRINGL(newstr, ret_len, 0);
2949 }
2950 }
2951 /* }}} */
2952
2953 /* {{{ proto string mb_strtoupper(string sourcestring [, string encoding])
2954 * Returns a uppercased version of sourcestring
2955 */
PHP_FUNCTION(mb_strtoupper)2956 PHP_FUNCTION(mb_strtoupper)
2957 {
2958 char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
2959 int str_len, from_encoding_len;
2960 char *newstr;
2961 size_t ret_len;
2962
2963 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!", &str, &str_len,
2964 &from_encoding, &from_encoding_len) == FAILURE) {
2965 return;
2966 }
2967 newstr = php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
2968
2969 if (newstr) {
2970 RETURN_STRINGL(newstr, ret_len, 0);
2971 }
2972 RETURN_FALSE;
2973 }
2974 /* }}} */
2975
2976 /* {{{ proto string mb_strtolower(string sourcestring [, string encoding])
2977 * Returns a lowercased version of sourcestring
2978 */
PHP_FUNCTION(mb_strtolower)2979 PHP_FUNCTION(mb_strtolower)
2980 {
2981 char *str, *from_encoding = (char*)mbfl_no2preferred_mime_name(MBSTRG(current_internal_encoding));
2982 int str_len, from_encoding_len;
2983 char *newstr;
2984 size_t ret_len;
2985
2986 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!", &str, &str_len,
2987 &from_encoding, &from_encoding_len) == FAILURE) {
2988 return;
2989 }
2990 newstr = php_unicode_convert_case(PHP_UNICODE_CASE_LOWER, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
2991
2992 if (newstr) {
2993 RETURN_STRINGL(newstr, ret_len, 0);
2994 }
2995 RETURN_FALSE;
2996 }
2997 /* }}} */
2998
2999 /* {{{ proto string mb_detect_encoding(string str [, mixed encoding_list [, bool strict]])
3000 Encodings of the given string is returned (as a string) */
PHP_FUNCTION(mb_detect_encoding)3001 PHP_FUNCTION(mb_detect_encoding)
3002 {
3003 char *str;
3004 int str_len;
3005 zend_bool strict=0;
3006 zval *encoding_list;
3007
3008 mbfl_string string;
3009 const char *ret;
3010 enum mbfl_no_encoding *elist;
3011 int size, *list;
3012
3013 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|zb", &str, &str_len, &encoding_list, &strict) == FAILURE) {
3014 return;
3015 }
3016
3017 /* make encoding list */
3018 list = NULL;
3019 size = 0;
3020 if (ZEND_NUM_ARGS() >= 2 && !ZVAL_IS_NULL(encoding_list)) {
3021 switch (Z_TYPE_P(encoding_list)) {
3022 case IS_ARRAY:
3023 if (!php_mb_parse_encoding_array(encoding_list, &list, &size, 0 TSRMLS_CC)) {
3024 if (list) {
3025 efree(list);
3026 list = NULL;
3027 size = 0;
3028 }
3029 }
3030 break;
3031 default:
3032 convert_to_string(encoding_list);
3033 if (!php_mb_parse_encoding_list(Z_STRVAL_P(encoding_list), Z_STRLEN_P(encoding_list), &list, &size, 0 TSRMLS_CC)) {
3034 if (list) {
3035 efree(list);
3036 list = NULL;
3037 size = 0;
3038 }
3039 }
3040 break;
3041 }
3042 if (size <= 0) {
3043 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal argument");
3044 }
3045 }
3046
3047 if (ZEND_NUM_ARGS() < 3) {
3048 strict = (zend_bool)MBSTRG(strict_detection);
3049 }
3050
3051 if (size > 0 && list != NULL) {
3052 elist = list;
3053 } else {
3054 elist = MBSTRG(current_detect_order_list);
3055 size = MBSTRG(current_detect_order_list_size);
3056 }
3057
3058 mbfl_string_init(&string);
3059 string.no_language = MBSTRG(language);
3060 string.val = (unsigned char *)str;
3061 string.len = str_len;
3062 ret = mbfl_identify_encoding_name(&string, elist, size, strict);
3063
3064 if (list != NULL) {
3065 efree((void *)list);
3066 }
3067
3068 if (ret == NULL) {
3069 RETURN_FALSE;
3070 }
3071
3072 RETVAL_STRING((char *)ret, 1);
3073 }
3074 /* }}} */
3075
3076 /* {{{ proto mixed mb_list_encodings()
3077 Returns an array of all supported entity encodings */
PHP_FUNCTION(mb_list_encodings)3078 PHP_FUNCTION(mb_list_encodings)
3079 {
3080 const mbfl_encoding **encodings;
3081 const mbfl_encoding *encoding;
3082 int i;
3083
3084 array_init(return_value);
3085 i = 0;
3086 encodings = mbfl_get_supported_encodings();
3087 while ((encoding = encodings[i++]) != NULL) {
3088 add_next_index_string(return_value, (char *) encoding->name, 1);
3089 }
3090 }
3091 /* }}} */
3092
3093 /* {{{ proto array mb_encoding_aliases(string encoding)
3094 Returns an array of the aliases of a given encoding name */
PHP_FUNCTION(mb_encoding_aliases)3095 PHP_FUNCTION(mb_encoding_aliases)
3096 {
3097 const mbfl_encoding *encoding;
3098 char *name = NULL;
3099 int name_len;
3100
3101 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, &name_len) == FAILURE) {
3102 RETURN_FALSE;
3103 }
3104
3105 encoding = mbfl_name2encoding(name);
3106 if (!encoding) {
3107 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
3108 RETURN_FALSE;
3109 }
3110
3111 array_init(return_value);
3112 if (encoding->aliases != NULL) {
3113 const char **alias;
3114 for (alias = *encoding->aliases; *alias; ++alias) {
3115 add_next_index_string(return_value, (char *)*alias, 1);
3116 }
3117 }
3118 }
3119 /* }}} */
3120
3121 /* {{{ proto string mb_encode_mimeheader(string str [, string charset [, string transfer-encoding [, string linefeed [, int indent]]]])
3122 Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */
PHP_FUNCTION(mb_encode_mimeheader)3123 PHP_FUNCTION(mb_encode_mimeheader)
3124 {
3125 enum mbfl_no_encoding charset, transenc;
3126 mbfl_string string, result, *ret;
3127 char *charset_name = NULL;
3128 int charset_name_len;
3129 char *trans_enc_name = NULL;
3130 int trans_enc_name_len;
3131 char *linefeed = "\r\n";
3132 int linefeed_len;
3133 long indent = 0;
3134
3135 mbfl_string_init(&string);
3136 string.no_language = MBSTRG(language);
3137 string.no_encoding = MBSTRG(current_internal_encoding);
3138
3139 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|sssl", (char **)&string.val, &string.len, &charset_name, &charset_name_len, &trans_enc_name, &trans_enc_name_len, &linefeed, &linefeed_len, &indent) == FAILURE) {
3140 return;
3141 }
3142
3143 charset = mbfl_no_encoding_pass;
3144 transenc = mbfl_no_encoding_base64;
3145
3146 if (charset_name != NULL) {
3147 charset = mbfl_name2no_encoding(charset_name);
3148 if (charset == mbfl_no_encoding_invalid) {
3149 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", charset_name);
3150 RETURN_FALSE;
3151 }
3152 } else {
3153 const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
3154 if (lang != NULL) {
3155 charset = lang->mail_charset;
3156 transenc = lang->mail_header_encoding;
3157 }
3158 }
3159
3160 if (trans_enc_name != NULL) {
3161 if (*trans_enc_name == 'B' || *trans_enc_name == 'b') {
3162 transenc = mbfl_no_encoding_base64;
3163 } else if (*trans_enc_name == 'Q' || *trans_enc_name == 'q') {
3164 transenc = mbfl_no_encoding_qprint;
3165 }
3166 }
3167
3168 mbfl_string_init(&result);
3169 ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, indent);
3170 if (ret != NULL) {
3171 RETVAL_STRINGL((char *)ret->val, ret->len, 0) /* the string is already strdup()'ed */
3172 } else {
3173 RETVAL_FALSE;
3174 }
3175 }
3176 /* }}} */
3177
3178 /* {{{ proto string mb_decode_mimeheader(string string)
3179 Decodes the MIME "encoded-word" in the string */
PHP_FUNCTION(mb_decode_mimeheader)3180 PHP_FUNCTION(mb_decode_mimeheader)
3181 {
3182 mbfl_string string, result, *ret;
3183
3184 mbfl_string_init(&string);
3185 string.no_language = MBSTRG(language);
3186 string.no_encoding = MBSTRG(current_internal_encoding);
3187
3188 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", (char **)&string.val, &string.len) == FAILURE) {
3189 return;
3190 }
3191
3192 mbfl_string_init(&result);
3193 ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding));
3194 if (ret != NULL) {
3195 RETVAL_STRINGL((char *)ret->val, ret->len, 0) /* the string is already strdup()'ed */
3196 } else {
3197 RETVAL_FALSE;
3198 }
3199 }
3200 /* }}} */
3201
3202 /* {{{ proto string mb_convert_kana(string str [, string option] [, string encoding])
3203 Conversion between full-width character and half-width character (Japanese) */
PHP_FUNCTION(mb_convert_kana)3204 PHP_FUNCTION(mb_convert_kana)
3205 {
3206 int opt, i;
3207 mbfl_string string, result, *ret;
3208 char *optstr = NULL;
3209 int optstr_len;
3210 char *encname = NULL;
3211 int encname_len;
3212
3213 mbfl_string_init(&string);
3214 string.no_language = MBSTRG(language);
3215 string.no_encoding = MBSTRG(current_internal_encoding);
3216
3217 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|ss", (char **)&string.val, &string.len, &optstr, &optstr_len, &encname, &encname_len) == FAILURE) {
3218 return;
3219 }
3220
3221 /* option */
3222 if (optstr != NULL) {
3223 char *p = optstr;
3224 int n = optstr_len;
3225 i = 0;
3226 opt = 0;
3227 while (i < n) {
3228 i++;
3229 switch (*p++) {
3230 case 'A':
3231 opt |= 0x1;
3232 break;
3233 case 'a':
3234 opt |= 0x10;
3235 break;
3236 case 'R':
3237 opt |= 0x2;
3238 break;
3239 case 'r':
3240 opt |= 0x20;
3241 break;
3242 case 'N':
3243 opt |= 0x4;
3244 break;
3245 case 'n':
3246 opt |= 0x40;
3247 break;
3248 case 'S':
3249 opt |= 0x8;
3250 break;
3251 case 's':
3252 opt |= 0x80;
3253 break;
3254 case 'K':
3255 opt |= 0x100;
3256 break;
3257 case 'k':
3258 opt |= 0x1000;
3259 break;
3260 case 'H':
3261 opt |= 0x200;
3262 break;
3263 case 'h':
3264 opt |= 0x2000;
3265 break;
3266 case 'V':
3267 opt |= 0x800;
3268 break;
3269 case 'C':
3270 opt |= 0x10000;
3271 break;
3272 case 'c':
3273 opt |= 0x20000;
3274 break;
3275 case 'M':
3276 opt |= 0x100000;
3277 break;
3278 case 'm':
3279 opt |= 0x200000;
3280 break;
3281 }
3282 }
3283 } else {
3284 opt = 0x900;
3285 }
3286
3287 /* encoding */
3288 if (encname != NULL) {
3289 string.no_encoding = mbfl_name2no_encoding(encname);
3290 if (string.no_encoding == mbfl_no_encoding_invalid) {
3291 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encname);
3292 RETURN_FALSE;
3293 }
3294 }
3295
3296 ret = mbfl_ja_jp_hantozen(&string, &result, opt);
3297 if (ret != NULL) {
3298 RETVAL_STRINGL((char *)ret->val, ret->len, 0); /* the string is already strdup()'ed */
3299 } else {
3300 RETVAL_FALSE;
3301 }
3302 }
3303 /* }}} */
3304
3305 #define PHP_MBSTR_STACK_BLOCK_SIZE 32
3306
3307 /* {{{ proto string mb_convert_variables(string to-encoding, mixed from-encoding, mixed vars [, ...])
3308 Converts the string resource in variables to desired encoding */
PHP_FUNCTION(mb_convert_variables)3309 PHP_FUNCTION(mb_convert_variables)
3310 {
3311 zval ***args, ***stack, **var, **hash_entry, **zfrom_enc;
3312 HashTable *target_hash;
3313 mbfl_string string, result, *ret;
3314 enum mbfl_no_encoding from_encoding, to_encoding;
3315 mbfl_encoding_detector *identd;
3316 mbfl_buffer_converter *convd;
3317 int n, to_enc_len, argc, stack_level, stack_max, elistsz;
3318 enum mbfl_no_encoding *elist;
3319 char *name, *to_enc;
3320 void *ptmp;
3321
3322 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sZ+", &to_enc, &to_enc_len, &zfrom_enc, &args, &argc) == FAILURE) {
3323 return;
3324 }
3325
3326 /* new encoding */
3327 to_encoding = mbfl_name2no_encoding(to_enc);
3328 if (to_encoding == mbfl_no_encoding_invalid) {
3329 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", to_enc);
3330 efree(args);
3331 RETURN_FALSE;
3332 }
3333
3334 /* initialize string */
3335 mbfl_string_init(&string);
3336 mbfl_string_init(&result);
3337 from_encoding = MBSTRG(current_internal_encoding);
3338 string.no_encoding = from_encoding;
3339 string.no_language = MBSTRG(language);
3340
3341 /* pre-conversion encoding */
3342 elist = NULL;
3343 elistsz = 0;
3344 switch (Z_TYPE_PP(zfrom_enc)) {
3345 case IS_ARRAY:
3346 php_mb_parse_encoding_array(*zfrom_enc, &elist, &elistsz, 0 TSRMLS_CC);
3347 break;
3348 default:
3349 convert_to_string_ex(zfrom_enc);
3350 php_mb_parse_encoding_list(Z_STRVAL_PP(zfrom_enc), Z_STRLEN_PP(zfrom_enc), &elist, &elistsz, 0 TSRMLS_CC);
3351 break;
3352 }
3353 if (elistsz <= 0) {
3354 from_encoding = mbfl_no_encoding_pass;
3355 } else if (elistsz == 1) {
3356 from_encoding = *elist;
3357 } else {
3358 /* auto detect */
3359 from_encoding = mbfl_no_encoding_invalid;
3360 stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
3361 stack = (zval ***)safe_emalloc(stack_max, sizeof(zval **), 0);
3362 stack_level = 0;
3363 identd = mbfl_encoding_detector_new(elist, elistsz, MBSTRG(strict_detection));
3364 if (identd != NULL) {
3365 n = 0;
3366 while (n < argc || stack_level > 0) {
3367 if (stack_level <= 0) {
3368 var = args[n++];
3369 if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3370 target_hash = HASH_OF(*var);
3371 if (target_hash != NULL) {
3372 zend_hash_internal_pointer_reset(target_hash);
3373 }
3374 }
3375 } else {
3376 stack_level--;
3377 var = stack[stack_level];
3378 }
3379 if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3380 target_hash = HASH_OF(*var);
3381 if (target_hash != NULL) {
3382 while (zend_hash_get_current_data(target_hash, (void **) &hash_entry) != FAILURE) {
3383 zend_hash_move_forward(target_hash);
3384 if (Z_TYPE_PP(hash_entry) == IS_ARRAY || Z_TYPE_PP(hash_entry) == IS_OBJECT) {
3385 if (stack_level >= stack_max) {
3386 stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
3387 ptmp = erealloc(stack, sizeof(zval **)*stack_max);
3388 stack = (zval ***)ptmp;
3389 }
3390 stack[stack_level] = var;
3391 stack_level++;
3392 var = hash_entry;
3393 target_hash = HASH_OF(*var);
3394 if (target_hash != NULL) {
3395 zend_hash_internal_pointer_reset(target_hash);
3396 continue;
3397 }
3398 } else if (Z_TYPE_PP(hash_entry) == IS_STRING) {
3399 string.val = (unsigned char *)Z_STRVAL_PP(hash_entry);
3400 string.len = Z_STRLEN_PP(hash_entry);
3401 if (mbfl_encoding_detector_feed(identd, &string)) {
3402 goto detect_end; /* complete detecting */
3403 }
3404 }
3405 }
3406 }
3407 } else if (Z_TYPE_PP(var) == IS_STRING) {
3408 string.val = (unsigned char *)Z_STRVAL_PP(var);
3409 string.len = Z_STRLEN_PP(var);
3410 if (mbfl_encoding_detector_feed(identd, &string)) {
3411 goto detect_end; /* complete detecting */
3412 }
3413 }
3414 }
3415 detect_end:
3416 from_encoding = mbfl_encoding_detector_judge(identd);
3417 mbfl_encoding_detector_delete(identd);
3418 }
3419 efree(stack);
3420
3421 if (from_encoding == mbfl_no_encoding_invalid) {
3422 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect encoding");
3423 from_encoding = mbfl_no_encoding_pass;
3424 }
3425 }
3426 if (elist != NULL) {
3427 efree((void *)elist);
3428 }
3429 /* create converter */
3430 convd = NULL;
3431 if (from_encoding != mbfl_no_encoding_pass) {
3432 convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0);
3433 if (convd == NULL) {
3434 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter");
3435 RETURN_FALSE;
3436 }
3437 mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
3438 mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
3439 }
3440
3441 /* convert */
3442 if (convd != NULL) {
3443 stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
3444 stack = (zval ***)safe_emalloc(stack_max, sizeof(zval **), 0);
3445 stack_level = 0;
3446 n = 0;
3447 while (n < argc || stack_level > 0) {
3448 if (stack_level <= 0) {
3449 var = args[n++];
3450 if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3451 target_hash = HASH_OF(*var);
3452 if (target_hash != NULL) {
3453 zend_hash_internal_pointer_reset(target_hash);
3454 }
3455 }
3456 } else {
3457 stack_level--;
3458 var = stack[stack_level];
3459 }
3460 if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3461 target_hash = HASH_OF(*var);
3462 if (target_hash != NULL) {
3463 while (zend_hash_get_current_data(target_hash, (void **) &hash_entry) != FAILURE) {
3464 zend_hash_move_forward(target_hash);
3465 if (Z_TYPE_PP(hash_entry) == IS_ARRAY || Z_TYPE_PP(hash_entry) == IS_OBJECT) {
3466 if (stack_level >= stack_max) {
3467 stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
3468 ptmp = erealloc(stack, sizeof(zval **)*stack_max);
3469 stack = (zval ***)ptmp;
3470 }
3471 stack[stack_level] = var;
3472 stack_level++;
3473 var = hash_entry;
3474 SEPARATE_ZVAL(hash_entry);
3475 target_hash = HASH_OF(*var);
3476 if (target_hash != NULL) {
3477 zend_hash_internal_pointer_reset(target_hash);
3478 continue;
3479 }
3480 } else if (Z_TYPE_PP(hash_entry) == IS_STRING) {
3481 string.val = (unsigned char *)Z_STRVAL_PP(hash_entry);
3482 string.len = Z_STRLEN_PP(hash_entry);
3483 ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3484 if (ret != NULL) {
3485 if (Z_REFCOUNT_PP(hash_entry) > 1) {
3486 Z_DELREF_PP(hash_entry);
3487 MAKE_STD_ZVAL(*hash_entry);
3488 } else {
3489 zval_dtor(*hash_entry);
3490 }
3491 ZVAL_STRINGL(*hash_entry, (char *)ret->val, ret->len, 0);
3492 }
3493 }
3494 }
3495 }
3496 } else if (Z_TYPE_PP(var) == IS_STRING) {
3497 string.val = (unsigned char *)Z_STRVAL_PP(var);
3498 string.len = Z_STRLEN_PP(var);
3499 ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3500 if (ret != NULL) {
3501 zval_dtor(*var);
3502 ZVAL_STRINGL(*var, (char *)ret->val, ret->len, 0);
3503 }
3504 }
3505 }
3506 efree(stack);
3507
3508 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
3509 mbfl_buffer_converter_delete(convd);
3510 }
3511
3512 efree(args);
3513
3514 name = (char *)mbfl_no_encoding2name(from_encoding);
3515 if (name != NULL) {
3516 RETURN_STRING(name, 1);
3517 } else {
3518 RETURN_FALSE;
3519 }
3520 }
3521 /* }}} */
3522
3523 /* {{{ HTML numeric entity */
3524 /* {{{ static void php_mb_numericentity_exec() */
3525 static void
php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS,int type)3526 php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
3527 {
3528 char *str, *encoding;
3529 int str_len, encoding_len;
3530 zval *zconvmap, **hash_entry;
3531 HashTable *target_hash;
3532 size_t argc = ZEND_NUM_ARGS();
3533 int i, *convmap, *mapelm, mapsize=0;
3534 mbfl_string string, result, *ret;
3535 enum mbfl_no_encoding no_encoding;
3536
3537 if (zend_parse_parameters(argc TSRMLS_CC, "szs", &str, &str_len, &zconvmap, &encoding, &encoding_len) == FAILURE) {
3538 return;
3539 }
3540
3541 mbfl_string_init(&string);
3542 string.no_language = MBSTRG(language);
3543 string.no_encoding = MBSTRG(current_internal_encoding);
3544 string.val = (unsigned char *)str;
3545 string.len = str_len;
3546
3547 /* encoding */
3548 if (argc == 3) {
3549 no_encoding = mbfl_name2no_encoding(encoding);
3550 if (no_encoding == mbfl_no_encoding_invalid) {
3551 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
3552 RETURN_FALSE;
3553 } else {
3554 string.no_encoding = no_encoding;
3555 }
3556 }
3557
3558 /* conversion map */
3559 convmap = NULL;
3560 if (Z_TYPE_P(zconvmap) == IS_ARRAY) {
3561 target_hash = Z_ARRVAL_P(zconvmap);
3562 zend_hash_internal_pointer_reset(target_hash);
3563 i = zend_hash_num_elements(target_hash);
3564 if (i > 0) {
3565 convmap = (int *)safe_emalloc(i, sizeof(int), 0);
3566 mapelm = convmap;
3567 mapsize = 0;
3568 while (i > 0) {
3569 if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
3570 break;
3571 }
3572 convert_to_long_ex(hash_entry);
3573 *mapelm++ = Z_LVAL_PP(hash_entry);
3574 mapsize++;
3575 i--;
3576 zend_hash_move_forward(target_hash);
3577 }
3578 }
3579 }
3580 if (convmap == NULL) {
3581 RETURN_FALSE;
3582 }
3583 mapsize /= 4;
3584
3585 ret = mbfl_html_numeric_entity(&string, &result, convmap, mapsize, type);
3586 if (ret != NULL) {
3587 RETVAL_STRINGL((char *)ret->val, ret->len, 0);
3588 } else {
3589 RETVAL_FALSE;
3590 }
3591 efree((void *)convmap);
3592 }
3593 /* }}} */
3594
3595 /* {{{ proto string mb_encode_numericentity(string string, array convmap [, string encoding])
3596 Converts specified characters to HTML numeric entities */
PHP_FUNCTION(mb_encode_numericentity)3597 PHP_FUNCTION(mb_encode_numericentity)
3598 {
3599 php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
3600 }
3601 /* }}} */
3602
3603 /* {{{ proto string mb_decode_numericentity(string string, array convmap [, string encoding])
3604 Converts HTML numeric entities to character code */
PHP_FUNCTION(mb_decode_numericentity)3605 PHP_FUNCTION(mb_decode_numericentity)
3606 {
3607 php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
3608 }
3609 /* }}} */
3610 /* }}} */
3611
3612 /* {{{ proto int mb_send_mail(string to, string subject, string message [, string additional_headers [, string additional_parameters]])
3613 * Sends an email message with MIME scheme
3614 */
3615
3616 #define SKIP_LONG_HEADER_SEP_MBSTRING(str, pos) \
3617 if (str[pos] == '\r' && str[pos + 1] == '\n' && (str[pos + 2] == ' ' || str[pos + 2] == '\t')) { \
3618 pos += 2; \
3619 while (str[pos + 1] == ' ' || str[pos + 1] == '\t') { \
3620 pos++; \
3621 } \
3622 continue; \
3623 }
3624
3625 #define MAIL_ASCIIZ_CHECK_MBSTRING(str, len) \
3626 pp = str; \
3627 ee = pp + len; \
3628 while ((pp = memchr(pp, '\0', (ee - pp)))) { \
3629 *pp = ' '; \
3630 } \
3631
3632 #define APPEND_ONE_CHAR(ch) do { \
3633 if (token.a > 0) { \
3634 smart_str_appendc(&token, ch); \
3635 } else {\
3636 token.len++; \
3637 } \
3638 } while (0)
3639
3640 #define SEPARATE_SMART_STR(str) do {\
3641 if ((str)->a == 0) { \
3642 char *tmp_ptr; \
3643 (str)->a = 1; \
3644 while ((str)->a < (str)->len) { \
3645 (str)->a <<= 1; \
3646 } \
3647 tmp_ptr = emalloc((str)->a + 1); \
3648 memcpy(tmp_ptr, (str)->c, (str)->len); \
3649 (str)->c = tmp_ptr; \
3650 } \
3651 } while (0)
3652
my_smart_str_dtor(smart_str * s)3653 static void my_smart_str_dtor(smart_str *s)
3654 {
3655 if (s->a > 0) {
3656 smart_str_free(s);
3657 }
3658 }
3659
_php_mbstr_parse_mail_headers(HashTable * ht,const char * str,size_t str_len)3660 static int _php_mbstr_parse_mail_headers(HashTable *ht, const char *str, size_t str_len)
3661 {
3662 const char *ps;
3663 size_t icnt;
3664 int state = 0;
3665 int crlf_state = -1;
3666
3667 smart_str token = { 0, 0, 0 };
3668 smart_str fld_name = { 0, 0, 0 }, fld_val = { 0, 0, 0 };
3669
3670 ps = str;
3671 icnt = str_len;
3672
3673 /*
3674 * C o n t e n t - T y p e : t e x t / h t m l \r\n
3675 * ^ ^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^^^^^^^^^^^^^^^ ^^^^
3676 * state 0 1 2 3
3677 *
3678 * C o n t e n t - T y p e : t e x t / h t m l \r\n
3679 * ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^
3680 * crlf_state -1 0 1 -1
3681 *
3682 */
3683
3684 while (icnt > 0) {
3685 switch (*ps) {
3686 case ':':
3687 if (crlf_state == 1) {
3688 APPEND_ONE_CHAR('\r');
3689 }
3690
3691 if (state == 0 || state == 1) {
3692 fld_name = token;
3693
3694 state = 2;
3695 } else {
3696 APPEND_ONE_CHAR(*ps);
3697 }
3698
3699 crlf_state = 0;
3700 break;
3701
3702 case '\n':
3703 if (crlf_state == -1) {
3704 goto out;
3705 }
3706 crlf_state = -1;
3707 break;
3708
3709 case '\r':
3710 if (crlf_state == 1) {
3711 APPEND_ONE_CHAR('\r');
3712 } else {
3713 crlf_state = 1;
3714 }
3715 break;
3716
3717 case ' ': case '\t':
3718 if (crlf_state == -1) {
3719 if (state == 3) {
3720 /* continuing from the previous line */
3721 SEPARATE_SMART_STR(&token);
3722 state = 4;
3723 } else {
3724 /* simply skipping this new line */
3725 state = 5;
3726 }
3727 } else {
3728 if (crlf_state == 1) {
3729 APPEND_ONE_CHAR('\r');
3730 }
3731 if (state == 1 || state == 3) {
3732 APPEND_ONE_CHAR(*ps);
3733 }
3734 }
3735 crlf_state = 0;
3736 break;
3737
3738 default:
3739 switch (state) {
3740 case 0:
3741 token.c = (char *)ps;
3742 token.len = 0;
3743 token.a = 0;
3744 state = 1;
3745 break;
3746
3747 case 2:
3748 if (crlf_state != -1) {
3749 token.c = (char *)ps;
3750 token.len = 0;
3751 token.a = 0;
3752
3753 state = 3;
3754 break;
3755 }
3756 /* break is missing intentionally */
3757
3758 case 3:
3759 if (crlf_state == -1) {
3760 fld_val = token;
3761
3762 if (fld_name.c != NULL && fld_val.c != NULL) {
3763 char *dummy;
3764
3765 /* FIXME: some locale free implementation is
3766 * really required here,,, */
3767 SEPARATE_SMART_STR(&fld_name);
3768 php_strtoupper(fld_name.c, fld_name.len);
3769
3770 zend_hash_update(ht, (char *)fld_name.c, fld_name.len, &fld_val, sizeof(smart_str), (void **)&dummy);
3771
3772 my_smart_str_dtor(&fld_name);
3773 }
3774
3775 memset(&fld_name, 0, sizeof(smart_str));
3776 memset(&fld_val, 0, sizeof(smart_str));
3777
3778 token.c = (char *)ps;
3779 token.len = 0;
3780 token.a = 0;
3781
3782 state = 1;
3783 }
3784 break;
3785
3786 case 4:
3787 APPEND_ONE_CHAR(' ');
3788 state = 3;
3789 break;
3790 }
3791
3792 if (crlf_state == 1) {
3793 APPEND_ONE_CHAR('\r');
3794 }
3795
3796 APPEND_ONE_CHAR(*ps);
3797
3798 crlf_state = 0;
3799 break;
3800 }
3801 ps++, icnt--;
3802 }
3803 out:
3804 if (state == 2) {
3805 token.c = "";
3806 token.len = 0;
3807 token.a = 0;
3808
3809 state = 3;
3810 }
3811 if (state == 3) {
3812 fld_val = token;
3813
3814 if (fld_name.c != NULL && fld_val.c != NULL) {
3815 void *dummy;
3816
3817 /* FIXME: some locale free implementation is
3818 * really required here,,, */
3819 SEPARATE_SMART_STR(&fld_name);
3820 php_strtoupper(fld_name.c, fld_name.len);
3821
3822 zend_hash_update(ht, (char *)fld_name.c, fld_name.len, &fld_val, sizeof(smart_str), (void **)&dummy);
3823
3824 my_smart_str_dtor(&fld_name);
3825 }
3826 }
3827 return state;
3828 }
3829
PHP_FUNCTION(mb_send_mail)3830 PHP_FUNCTION(mb_send_mail)
3831 {
3832 int n;
3833 char *to = NULL;
3834 int to_len;
3835 char *message = NULL;
3836 int message_len;
3837 char *headers = NULL;
3838 int headers_len;
3839 char *subject = NULL;
3840 int subject_len;
3841 char *extra_cmd = NULL;
3842 int extra_cmd_len;
3843 int i;
3844 char *to_r = NULL;
3845 char *force_extra_parameters = INI_STR("mail.force_extra_parameters");
3846 struct {
3847 int cnt_type:1;
3848 int cnt_trans_enc:1;
3849 } suppressed_hdrs = { 0, 0 };
3850
3851 char *message_buf = NULL, *subject_buf = NULL, *p;
3852 mbfl_string orig_str, conv_str;
3853 mbfl_string *pstr; /* pointer to mbfl string for return value */
3854 enum mbfl_no_encoding
3855 tran_cs, /* transfar text charset */
3856 head_enc, /* header transfar encoding */
3857 body_enc; /* body transfar encoding */
3858 mbfl_memory_device device; /* automatic allocateable buffer for additional header */
3859 const mbfl_language *lang;
3860 int err = 0;
3861 HashTable ht_headers;
3862 smart_str *s;
3863 extern void mbfl_memory_device_unput(mbfl_memory_device *device);
3864 char *pp, *ee;
3865
3866 if (PG(safe_mode) && (ZEND_NUM_ARGS() == 5)) {
3867 php_error_docref(NULL TSRMLS_CC, E_WARNING, "SAFE MODE Restriction in effect. The fifth parameter is disabled in SAFE MODE.");
3868 RETURN_FALSE;
3869 }
3870
3871 /* initialize */
3872 mbfl_memory_device_init(&device, 0, 0);
3873 mbfl_string_init(&orig_str);
3874 mbfl_string_init(&conv_str);
3875
3876 /* character-set, transfer-encoding */
3877 tran_cs = mbfl_no_encoding_utf8;
3878 head_enc = mbfl_no_encoding_base64;
3879 body_enc = mbfl_no_encoding_base64;
3880 lang = mbfl_no2language(MBSTRG(language));
3881 if (lang != NULL) {
3882 tran_cs = lang->mail_charset;
3883 head_enc = lang->mail_header_encoding;
3884 body_enc = lang->mail_body_encoding;
3885 }
3886
3887 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sss|ss", &to, &to_len, &subject, &subject_len, &message, &message_len, &headers, &headers_len, &extra_cmd, &extra_cmd_len) == FAILURE) {
3888 return;
3889 }
3890
3891 /* ASCIIZ check */
3892 MAIL_ASCIIZ_CHECK_MBSTRING(to, to_len);
3893 MAIL_ASCIIZ_CHECK_MBSTRING(subject, subject_len);
3894 MAIL_ASCIIZ_CHECK_MBSTRING(message, message_len);
3895 if (headers) {
3896 MAIL_ASCIIZ_CHECK_MBSTRING(headers, headers_len);
3897 }
3898 if (extra_cmd) {
3899 MAIL_ASCIIZ_CHECK_MBSTRING(extra_cmd, extra_cmd_len);
3900 }
3901
3902 zend_hash_init(&ht_headers, 0, NULL, (dtor_func_t) my_smart_str_dtor, 0);
3903
3904 if (headers != NULL) {
3905 _php_mbstr_parse_mail_headers(&ht_headers, headers, headers_len);
3906 }
3907
3908 if (zend_hash_find(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1, (void **)&s) == SUCCESS) {
3909 char *tmp;
3910 char *param_name;
3911 char *charset = NULL;
3912
3913 SEPARATE_SMART_STR(s);
3914 smart_str_0(s);
3915
3916 p = strchr(s->c, ';');
3917
3918 if (p != NULL) {
3919 /* skipping the padded spaces */
3920 do {
3921 ++p;
3922 } while (*p == ' ' || *p == '\t');
3923
3924 if (*p != '\0') {
3925 if ((param_name = php_strtok_r(p, "= ", &tmp)) != NULL) {
3926 if (strcasecmp(param_name, "charset") == 0) {
3927 enum mbfl_no_encoding _tran_cs = tran_cs;
3928
3929 charset = php_strtok_r(NULL, "= \"", &tmp);
3930 if (charset != NULL) {
3931 _tran_cs = mbfl_name2no_encoding(charset);
3932 }
3933
3934 if (_tran_cs == mbfl_no_encoding_invalid) {
3935 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported charset \"%s\" - will be regarded as ascii", charset);
3936 _tran_cs = mbfl_no_encoding_ascii;
3937 }
3938 tran_cs = _tran_cs;
3939 }
3940 }
3941 }
3942 }
3943 suppressed_hdrs.cnt_type = 1;
3944 }
3945
3946 if (zend_hash_find(&ht_headers, "CONTENT-TRANSFER-ENCODING", sizeof("CONTENT-TRANSFER-ENCODING") - 1, (void **)&s) == SUCCESS) {
3947 enum mbfl_no_encoding _body_enc;
3948 SEPARATE_SMART_STR(s);
3949 smart_str_0(s);
3950
3951 _body_enc = mbfl_name2no_encoding(s->c);
3952 switch (_body_enc) {
3953 case mbfl_no_encoding_base64:
3954 case mbfl_no_encoding_7bit:
3955 case mbfl_no_encoding_8bit:
3956 body_enc = _body_enc;
3957 break;
3958
3959 default:
3960 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported transfer encoding \"%s\" - will be regarded as 8bit", s->c);
3961 body_enc = mbfl_no_encoding_8bit;
3962 break;
3963 }
3964 suppressed_hdrs.cnt_trans_enc = 1;
3965 }
3966
3967 /* To: */
3968 if (to != NULL) {
3969 if (to_len > 0) {
3970 to_r = estrndup(to, to_len);
3971 for (; to_len; to_len--) {
3972 if (!isspace((unsigned char) to_r[to_len - 1])) {
3973 break;
3974 }
3975 to_r[to_len - 1] = '\0';
3976 }
3977 for (i = 0; to_r[i]; i++) {
3978 if (iscntrl((unsigned char) to_r[i])) {
3979 /* According to RFC 822, section 3.1.1 long headers may be separated into
3980 * parts using CRLF followed at least one linear-white-space character ('\t' or ' ').
3981 * To prevent these separators from being replaced with a space, we use the
3982 * SKIP_LONG_HEADER_SEP_MBSTRING to skip over them.
3983 */
3984 SKIP_LONG_HEADER_SEP_MBSTRING(to_r, i);
3985 to_r[i] = ' ';
3986 }
3987 }
3988 } else {
3989 to_r = to;
3990 }
3991 } else {
3992 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing To: field");
3993 err = 1;
3994 }
3995
3996 /* Subject: */
3997 if (subject != NULL && subject_len >= 0) {
3998 orig_str.no_language = MBSTRG(language);
3999 orig_str.val = (unsigned char *)subject;
4000 orig_str.len = subject_len;
4001 orig_str.no_encoding = MBSTRG(current_internal_encoding);
4002 if (orig_str.no_encoding == mbfl_no_encoding_invalid
4003 || orig_str.no_encoding == mbfl_no_encoding_pass) {
4004 orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
4005 }
4006 pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]"));
4007 if (pstr != NULL) {
4008 subject_buf = subject = (char *)pstr->val;
4009 }
4010 } else {
4011 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing Subject: field");
4012 err = 1;
4013 }
4014
4015 /* message body */
4016 if (message != NULL) {
4017 orig_str.no_language = MBSTRG(language);
4018 orig_str.val = (unsigned char *)message;
4019 orig_str.len = (unsigned int)message_len;
4020 orig_str.no_encoding = MBSTRG(current_internal_encoding);
4021
4022 if (orig_str.no_encoding == mbfl_no_encoding_invalid
4023 || orig_str.no_encoding == mbfl_no_encoding_pass) {
4024 orig_str.no_encoding = mbfl_identify_encoding_no(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
4025 }
4026
4027 pstr = NULL;
4028 {
4029 mbfl_string tmpstr;
4030
4031 if (mbfl_convert_encoding(&orig_str, &tmpstr, tran_cs) != NULL) {
4032 tmpstr.no_encoding=mbfl_no_encoding_8bit;
4033 pstr = mbfl_convert_encoding(&tmpstr, &conv_str, body_enc);
4034 efree(tmpstr.val);
4035 }
4036 }
4037 if (pstr != NULL) {
4038 message_buf = message = (char *)pstr->val;
4039 }
4040 } else {
4041 /* this is not really an error, so it is allowed. */
4042 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty message body");
4043 message = NULL;
4044 }
4045
4046 /* other headers */
4047 #define PHP_MBSTR_MAIL_MIME_HEADER1 "MIME-Version: 1.0"
4048 #define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain"
4049 #define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset="
4050 #define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: "
4051 if (headers != NULL) {
4052 p = headers;
4053 n = headers_len;
4054 mbfl_memory_device_strncat(&device, p, n);
4055 if (n > 0 && p[n - 1] != '\n') {
4056 mbfl_memory_device_strncat(&device, "\n", 1);
4057 }
4058 }
4059
4060 if (!zend_hash_exists(&ht_headers, "MIME-VERSION", sizeof("MIME-VERSION") - 1)) {
4061 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER1, sizeof(PHP_MBSTR_MAIL_MIME_HEADER1) - 1);
4062 mbfl_memory_device_strncat(&device, "\n", 1);
4063 }
4064
4065 if (!suppressed_hdrs.cnt_type) {
4066 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2, sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1);
4067
4068 p = (char *)mbfl_no2preferred_mime_name(tran_cs);
4069 if (p != NULL) {
4070 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3, sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1);
4071 mbfl_memory_device_strcat(&device, p);
4072 }
4073 mbfl_memory_device_strncat(&device, "\n", 1);
4074 }
4075 if (!suppressed_hdrs.cnt_trans_enc) {
4076 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4, sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1);
4077 p = (char *)mbfl_no2preferred_mime_name(body_enc);
4078 if (p == NULL) {
4079 p = "7bit";
4080 }
4081 mbfl_memory_device_strcat(&device, p);
4082 mbfl_memory_device_strncat(&device, "\n", 1);
4083 }
4084
4085 mbfl_memory_device_unput(&device);
4086 mbfl_memory_device_output('\0', &device);
4087 headers = (char *)device.buffer;
4088
4089 if (force_extra_parameters) {
4090 extra_cmd = php_escape_shell_cmd(force_extra_parameters);
4091 } else if (extra_cmd) {
4092 extra_cmd = php_escape_shell_cmd(extra_cmd);
4093 }
4094
4095 if (!err && php_mail(to_r, subject, message, headers, extra_cmd TSRMLS_CC)) {
4096 RETVAL_TRUE;
4097 } else {
4098 RETVAL_FALSE;
4099 }
4100
4101 if (extra_cmd) {
4102 efree(extra_cmd);
4103 }
4104 if (to_r != to) {
4105 efree(to_r);
4106 }
4107 if (subject_buf) {
4108 efree((void *)subject_buf);
4109 }
4110 if (message_buf) {
4111 efree((void *)message_buf);
4112 }
4113 mbfl_memory_device_clear(&device);
4114 zend_hash_destroy(&ht_headers);
4115 }
4116
4117 #undef SKIP_LONG_HEADER_SEP_MBSTRING
4118 #undef MAIL_ASCIIZ_CHECK_MBSTRING
4119 #undef APPEND_ONE_CHAR
4120 #undef SEPARATE_SMART_STR
4121 #undef PHP_MBSTR_MAIL_MIME_HEADER1
4122 #undef PHP_MBSTR_MAIL_MIME_HEADER2
4123 #undef PHP_MBSTR_MAIL_MIME_HEADER3
4124 #undef PHP_MBSTR_MAIL_MIME_HEADER4
4125 /* }}} */
4126
4127 /* {{{ proto mixed mb_get_info([string type])
4128 Returns the current settings of mbstring */
PHP_FUNCTION(mb_get_info)4129 PHP_FUNCTION(mb_get_info)
4130 {
4131 char *typ = NULL;
4132 int typ_len, n;
4133 char *name;
4134 const struct mb_overload_def *over_func;
4135 zval *row1, *row2;
4136 const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
4137 enum mbfl_no_encoding *entry;
4138 #ifdef ZEND_MULTIBYTE
4139 zval *row3;
4140 #endif /* ZEND_MULTIBYTE */
4141
4142 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
4143 RETURN_FALSE;
4144 }
4145
4146 if (!typ || !strcasecmp("all", typ)) {
4147 array_init(return_value);
4148 if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding))) != NULL) {
4149 add_assoc_string(return_value, "internal_encoding", name, 1);
4150 }
4151 if ((name = (char *)mbfl_no_encoding2name(MBSTRG(http_input_identify))) != NULL) {
4152 add_assoc_string(return_value, "http_input", name, 1);
4153 }
4154 if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding))) != NULL) {
4155 add_assoc_string(return_value, "http_output", name, 1);
4156 }
4157 if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) {
4158 add_assoc_string(return_value, "http_output_conv_mimetypes", name, 1);
4159 }
4160 add_assoc_long(return_value, "func_overload", MBSTRG(func_overload));
4161 if (MBSTRG(func_overload)){
4162 over_func = &(mb_ovld[0]);
4163 MAKE_STD_ZVAL(row1);
4164 array_init(row1);
4165 while (over_func->type > 0) {
4166 if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4167 add_assoc_string(row1, over_func->orig_func, over_func->ovld_func, 1);
4168 }
4169 over_func++;
4170 }
4171 add_assoc_zval(return_value, "func_overload_list", row1);
4172 } else {
4173 add_assoc_string(return_value, "func_overload_list", "no overload", 1);
4174 }
4175 if (lang != NULL) {
4176 if ((name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4177 add_assoc_string(return_value, "mail_charset", name, 1);
4178 }
4179 if ((name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4180 add_assoc_string(return_value, "mail_header_encoding", name, 1);
4181 }
4182 if ((name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4183 add_assoc_string(return_value, "mail_body_encoding", name, 1);
4184 }
4185 }
4186 add_assoc_long(return_value, "illegal_chars", MBSTRG(illegalchars));
4187 if (MBSTRG(encoding_translation)) {
4188 add_assoc_string(return_value, "encoding_translation", "On", 1);
4189 } else {
4190 add_assoc_string(return_value, "encoding_translation", "Off", 1);
4191 }
4192 if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4193 add_assoc_string(return_value, "language", name, 1);
4194 }
4195 n = MBSTRG(current_detect_order_list_size);
4196 entry = MBSTRG(current_detect_order_list);
4197 if(n > 0) {
4198 MAKE_STD_ZVAL(row2);
4199 array_init(row2);
4200 while (n > 0) {
4201 if ((name = (char *)mbfl_no_encoding2name(*entry)) != NULL) {
4202 add_next_index_string(row2, name, 1);
4203 }
4204 entry++;
4205 n--;
4206 }
4207 add_assoc_zval(return_value, "detect_order", row2);
4208 }
4209 if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4210 add_assoc_string(return_value, "substitute_character", "none", 1);
4211 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4212 add_assoc_string(return_value, "substitute_character", "long", 1);
4213 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4214 add_assoc_string(return_value, "substitute_character", "entity", 1);
4215 } else {
4216 add_assoc_long(return_value, "substitute_character", MBSTRG(current_filter_illegal_substchar));
4217 }
4218 if (MBSTRG(strict_detection)) {
4219 add_assoc_string(return_value, "strict_detection", "On", 1);
4220 } else {
4221 add_assoc_string(return_value, "strict_detection", "Off", 1);
4222 }
4223 #ifdef ZEND_MULTIBYTE
4224 entry = MBSTRG(script_encoding_list);
4225 n = MBSTRG(script_encoding_list_size);
4226 if(n > 0) {
4227 MAKE_STD_ZVAL(row3);
4228 array_init(row3);
4229 while (n > 0) {
4230 if ((name = (char *)mbfl_no_encoding2name(*entry)) != NULL) {
4231 add_next_index_string(row3, name, 1);
4232 }
4233 entry++;
4234 n--;
4235 }
4236 add_assoc_zval(return_value, "script_encoding", row3);
4237 }
4238 #endif /* ZEND_MULTIBYTE */
4239 } else if (!strcasecmp("internal_encoding", typ)) {
4240 if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_internal_encoding))) != NULL) {
4241 RETVAL_STRING(name, 1);
4242 }
4243 } else if (!strcasecmp("http_input", typ)) {
4244 if ((name = (char *)mbfl_no_encoding2name(MBSTRG(http_input_identify))) != NULL) {
4245 RETVAL_STRING(name, 1);
4246 }
4247 } else if (!strcasecmp("http_output", typ)) {
4248 if ((name = (char *)mbfl_no_encoding2name(MBSTRG(current_http_output_encoding))) != NULL) {
4249 RETVAL_STRING(name, 1);
4250 }
4251 } else if (!strcasecmp("http_output_conv_mimetypes", typ)) {
4252 if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) {
4253 RETVAL_STRING(name, 1);
4254 }
4255 } else if (!strcasecmp("func_overload", typ)) {
4256 RETVAL_LONG(MBSTRG(func_overload));
4257 } else if (!strcasecmp("func_overload_list", typ)) {
4258 if (MBSTRG(func_overload)){
4259 over_func = &(mb_ovld[0]);
4260 array_init(return_value);
4261 while (over_func->type > 0) {
4262 if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4263 add_assoc_string(return_value, over_func->orig_func, over_func->ovld_func, 1);
4264 }
4265 over_func++;
4266 }
4267 } else {
4268 RETVAL_STRING("no overload", 1);
4269 }
4270 } else if (!strcasecmp("mail_charset", typ)) {
4271 if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4272 RETVAL_STRING(name, 1);
4273 }
4274 } else if (!strcasecmp("mail_header_encoding", typ)) {
4275 if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4276 RETVAL_STRING(name, 1);
4277 }
4278 } else if (!strcasecmp("mail_body_encoding", typ)) {
4279 if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4280 RETVAL_STRING(name, 1);
4281 }
4282 } else if (!strcasecmp("illegal_chars", typ)) {
4283 RETVAL_LONG(MBSTRG(illegalchars));
4284 } else if (!strcasecmp("encoding_translation", typ)) {
4285 if (MBSTRG(encoding_translation)) {
4286 RETVAL_STRING("On", 1);
4287 } else {
4288 RETVAL_STRING("Off", 1);
4289 }
4290 } else if (!strcasecmp("language", typ)) {
4291 if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4292 RETVAL_STRING(name, 1);
4293 }
4294 } else if (!strcasecmp("detect_order", typ)) {
4295 n = MBSTRG(current_detect_order_list_size);
4296 entry = MBSTRG(current_detect_order_list);
4297 if(n > 0) {
4298 array_init(return_value);
4299 while (n > 0) {
4300 name = (char *)mbfl_no_encoding2name(*entry);
4301 if (name) {
4302 add_next_index_string(return_value, name, 1);
4303 }
4304 entry++;
4305 n--;
4306 }
4307 }
4308 } else if (!strcasecmp("substitute_character", typ)) {
4309 if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4310 RETVAL_STRING("none", 1);
4311 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4312 RETVAL_STRING("long", 1);
4313 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4314 RETVAL_STRING("entity", 1);
4315 } else {
4316 RETVAL_LONG(MBSTRG(current_filter_illegal_substchar));
4317 }
4318 } else if (!strcasecmp("strict_detection", typ)) {
4319 if (MBSTRG(strict_detection)) {
4320 RETVAL_STRING("On", 1);
4321 } else {
4322 RETVAL_STRING("Off", 1);
4323 }
4324 } else {
4325 #ifdef ZEND_MULTIBYTE
4326 if (!strcasecmp("script_encoding", typ)) {
4327 entry = MBSTRG(script_encoding_list);
4328 n = MBSTRG(script_encoding_list_size);
4329 if(n > 0) {
4330 array_init(return_value);
4331 while (n > 0) {
4332 name = (char *)mbfl_no_encoding2name(*entry);
4333 if (name) {
4334 add_next_index_string(return_value, name, 1);
4335 }
4336 entry++;
4337 n--;
4338 }
4339 }
4340 return;
4341 }
4342 #endif /* ZEND_MULTIBYTE */
4343 RETURN_FALSE;
4344 }
4345 }
4346 /* }}} */
4347
4348 /* {{{ proto bool mb_check_encoding([string var[, string encoding]])
4349 Check if the string is valid for the specified encoding */
PHP_FUNCTION(mb_check_encoding)4350 PHP_FUNCTION(mb_check_encoding)
4351 {
4352 char *var = NULL;
4353 int var_len;
4354 char *enc = NULL;
4355 int enc_len;
4356 mbfl_buffer_converter *convd;
4357 enum mbfl_no_encoding no_encoding = MBSTRG(current_internal_encoding);
4358 mbfl_string string, result, *ret = NULL;
4359 long illegalchars = 0;
4360
4361 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|ss", &var, &var_len, &enc, &enc_len) == FAILURE) {
4362 RETURN_FALSE;
4363 }
4364
4365 if (var == NULL) {
4366 RETURN_BOOL(MBSTRG(illegalchars) == 0);
4367 }
4368
4369 if (enc != NULL) {
4370 no_encoding = mbfl_name2no_encoding(enc);
4371 if (no_encoding == mbfl_no_encoding_invalid || no_encoding == mbfl_no_encoding_pass) {
4372 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid encoding \"%s\"", enc);
4373 RETURN_FALSE;
4374 }
4375 }
4376
4377 convd = mbfl_buffer_converter_new(no_encoding, no_encoding, 0);
4378 if (convd == NULL) {
4379 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter");
4380 RETURN_FALSE;
4381 }
4382 mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE);
4383 mbfl_buffer_converter_illegal_substchar(convd, 0);
4384
4385 /* initialize string */
4386 mbfl_string_init_set(&string, mbfl_no_language_neutral, no_encoding);
4387 mbfl_string_init(&result);
4388
4389 string.val = (unsigned char *)var;
4390 string.len = var_len;
4391 ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
4392 illegalchars = mbfl_buffer_illegalchars(convd);
4393 mbfl_buffer_converter_delete(convd);
4394
4395 RETVAL_FALSE;
4396 if (ret != NULL) {
4397 if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) {
4398 RETVAL_TRUE;
4399 }
4400 mbfl_string_clear(&result);
4401 }
4402 }
4403 /* }}} */
4404
4405 /* {{{ MBSTRING_API int php_mb_encoding_translation() */
php_mb_encoding_translation(TSRMLS_D)4406 MBSTRING_API int php_mb_encoding_translation(TSRMLS_D)
4407 {
4408 return MBSTRG(encoding_translation);
4409 }
4410 /* }}} */
4411
4412 /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes_ex() */
php_mb_mbchar_bytes_ex(const char * s,const mbfl_encoding * enc)4413 MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc)
4414 {
4415 if (enc != NULL) {
4416 if (enc->flag & MBFL_ENCTYPE_MBCS) {
4417 if (enc->mblen_table != NULL) {
4418 if (s != NULL) return enc->mblen_table[*(unsigned char *)s];
4419 }
4420 } else if (enc->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
4421 return 2;
4422 } else if (enc->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
4423 return 4;
4424 }
4425 }
4426 return 1;
4427 }
4428 /* }}} */
4429
4430 /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes() */
php_mb_mbchar_bytes(const char * s TSRMLS_DC)4431 MBSTRING_API size_t php_mb_mbchar_bytes(const char *s TSRMLS_DC)
4432 {
4433 return php_mb_mbchar_bytes_ex(s,
4434 mbfl_no2encoding(MBSTRG(internal_encoding)));
4435 }
4436 /* }}} */
4437
4438 /* {{{ MBSTRING_API char *php_mb_safe_strrchr_ex() */
php_mb_safe_strrchr_ex(const char * s,unsigned int c,size_t nbytes,const mbfl_encoding * enc)4439 MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc)
4440 {
4441 register const char *p = s;
4442 char *last=NULL;
4443
4444 if (nbytes == (size_t)-1) {
4445 size_t nb = 0;
4446
4447 while (*p != '\0') {
4448 if (nb == 0) {
4449 if ((unsigned char)*p == (unsigned char)c) {
4450 last = (char *)p;
4451 }
4452 nb = php_mb_mbchar_bytes_ex(p, enc);
4453 if (nb == 0) {
4454 return NULL; /* something is going wrong! */
4455 }
4456 }
4457 --nb;
4458 ++p;
4459 }
4460 } else {
4461 register size_t bcnt = nbytes;
4462 register size_t nbytes_char;
4463 while (bcnt > 0) {
4464 if ((unsigned char)*p == (unsigned char)c) {
4465 last = (char *)p;
4466 }
4467 nbytes_char = php_mb_mbchar_bytes_ex(p, enc);
4468 if (bcnt < nbytes_char) {
4469 return NULL;
4470 }
4471 p += nbytes_char;
4472 bcnt -= nbytes_char;
4473 }
4474 }
4475 return last;
4476 }
4477 /* }}} */
4478
4479 /* {{{ MBSTRING_API char *php_mb_safe_strrchr() */
php_mb_safe_strrchr(const char * s,unsigned int c,size_t nbytes TSRMLS_DC)4480 MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes TSRMLS_DC)
4481 {
4482 return php_mb_safe_strrchr_ex(s, c, nbytes,
4483 mbfl_no2encoding(MBSTRG(internal_encoding)));
4484 }
4485 /* }}} */
4486
4487 /* {{{ MBSTRING_API char *php_mb_strrchr() */
php_mb_strrchr(const char * s,char c TSRMLS_DC)4488 MBSTRING_API char *php_mb_strrchr(const char *s, char c TSRMLS_DC)
4489 {
4490 return php_mb_safe_strrchr(s, c, -1 TSRMLS_CC);
4491 }
4492 /* }}} */
4493
4494 /* {{{ MBSTRING_API size_t php_mb_gpc_mbchar_bytes() */
php_mb_gpc_mbchar_bytes(const char * s TSRMLS_DC)4495 MBSTRING_API size_t php_mb_gpc_mbchar_bytes(const char *s TSRMLS_DC)
4496 {
4497
4498 if (MBSTRG(http_input_identify) != mbfl_no_encoding_invalid){
4499 return php_mb_mbchar_bytes_ex(s,
4500 mbfl_no2encoding(MBSTRG(http_input_identify)));
4501 } else {
4502 return php_mb_mbchar_bytes_ex(s,
4503 mbfl_no2encoding(MBSTRG(internal_encoding)));
4504 }
4505 }
4506 /* }}} */
4507
4508 /* {{{ MBSTRING_API int php_mb_gpc_encoding_converter() */
php_mb_gpc_encoding_converter(char ** str,int * len,int num,const char * encoding_to,const char * encoding_from TSRMLS_DC)4509 MBSTRING_API int php_mb_gpc_encoding_converter(char **str, int *len, int num, const char *encoding_to, const char *encoding_from TSRMLS_DC)
4510 {
4511 int i;
4512 mbfl_string string, result, *ret = NULL;
4513 enum mbfl_no_encoding from_encoding, to_encoding;
4514 mbfl_buffer_converter *convd;
4515
4516 if (encoding_to) {
4517 /* new encoding */
4518 to_encoding = mbfl_name2no_encoding(encoding_to);
4519 if (to_encoding == mbfl_no_encoding_invalid) {
4520 return -1;
4521 }
4522 } else {
4523 to_encoding = MBSTRG(current_internal_encoding);
4524 }
4525 if (encoding_from) {
4526 /* old encoding */
4527 from_encoding = mbfl_name2no_encoding(encoding_from);
4528 if (from_encoding == mbfl_no_encoding_invalid) {
4529 return -1;
4530 }
4531 } else {
4532 from_encoding = MBSTRG(http_input_identify);
4533 }
4534
4535 if (from_encoding == mbfl_no_encoding_pass) {
4536 return 0;
4537 }
4538
4539 /* initialize string */
4540 mbfl_string_init(&string);
4541 mbfl_string_init(&result);
4542 string.no_encoding = from_encoding;
4543 string.no_language = MBSTRG(language);
4544
4545 for (i=0; i<num; i++){
4546 string.val = (unsigned char *)str[i];
4547 string.len = len[i];
4548
4549 /* initialize converter */
4550 convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
4551 if (convd == NULL) {
4552 return -1;
4553 }
4554 mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
4555 mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
4556
4557 /* do it */
4558 ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
4559 if (ret != NULL) {
4560 efree(str[i]);
4561 str[i] = (char *)ret->val;
4562 len[i] = (int)ret->len;
4563 }
4564
4565 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
4566 mbfl_buffer_converter_delete(convd);
4567 }
4568
4569 return ret ? 0 : -1;
4570 }
4571 /* }}} */
4572
4573 /* {{{ MBSTRING_API int php_mb_gpc_encoding_detector()
4574 */
php_mb_gpc_encoding_detector(char ** arg_string,int * arg_length,int num,char * arg_list TSRMLS_DC)4575 MBSTRING_API int php_mb_gpc_encoding_detector(char **arg_string, int *arg_length, int num, char *arg_list TSRMLS_DC)
4576 {
4577 mbfl_string string;
4578 enum mbfl_no_encoding *elist;
4579 enum mbfl_no_encoding encoding = mbfl_no_encoding_invalid;
4580 mbfl_encoding_detector *identd = NULL;
4581
4582 int size;
4583 enum mbfl_no_encoding *list;
4584
4585 if (MBSTRG(http_input_list_size) == 1 &&
4586 MBSTRG(http_input_list)[0] == mbfl_no_encoding_pass) {
4587 MBSTRG(http_input_identify) = mbfl_no_encoding_pass;
4588 return SUCCESS;
4589 }
4590
4591 if (MBSTRG(http_input_list_size) == 1 &&
4592 MBSTRG(http_input_list)[0] != mbfl_no_encoding_auto &&
4593 mbfl_no_encoding2name(MBSTRG(http_input_list)[0]) != NULL) {
4594 MBSTRG(http_input_identify) = MBSTRG(http_input_list)[0];
4595 return SUCCESS;
4596 }
4597
4598 if (arg_list && strlen(arg_list)>0) {
4599 /* make encoding list */
4600 list = NULL;
4601 size = 0;
4602 php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0 TSRMLS_CC);
4603
4604 if (size > 0 && list != NULL) {
4605 elist = list;
4606 } else {
4607 elist = MBSTRG(current_detect_order_list);
4608 size = MBSTRG(current_detect_order_list_size);
4609 if (size <= 0){
4610 elist = MBSTRG(default_detect_order_list);
4611 size = MBSTRG(default_detect_order_list_size);
4612 }
4613 }
4614 } else {
4615 elist = MBSTRG(current_detect_order_list);
4616 size = MBSTRG(current_detect_order_list_size);
4617 if (size <= 0){
4618 elist = MBSTRG(default_detect_order_list);
4619 size = MBSTRG(default_detect_order_list_size);
4620 }
4621 }
4622
4623 mbfl_string_init(&string);
4624 string.no_language = MBSTRG(language);
4625
4626 identd = mbfl_encoding_detector_new(elist, size, MBSTRG(strict_detection));
4627
4628 if (identd) {
4629 int n = 0;
4630 while(n < num){
4631 string.val = (unsigned char *)arg_string[n];
4632 string.len = arg_length[n];
4633 if (mbfl_encoding_detector_feed(identd, &string)) {
4634 break;
4635 }
4636 n++;
4637 }
4638 encoding = mbfl_encoding_detector_judge(identd);
4639 mbfl_encoding_detector_delete(identd);
4640 }
4641
4642 if (encoding != mbfl_no_encoding_invalid) {
4643 MBSTRG(http_input_identify) = encoding;
4644 return SUCCESS;
4645 } else {
4646 return FAILURE;
4647 }
4648 }
4649 /* }}} */
4650
4651 /* {{{ MBSTRING_API int php_mb_stripos()
4652 */
php_mb_stripos(int mode,const char * old_haystack,unsigned int old_haystack_len,const char * old_needle,unsigned int old_needle_len,long offset,const char * from_encoding TSRMLS_DC)4653 MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int old_haystack_len, const char *old_needle, unsigned int old_needle_len, long offset, const char *from_encoding TSRMLS_DC)
4654 {
4655 int n;
4656 mbfl_string haystack, needle;
4657 n = -1;
4658
4659 mbfl_string_init(&haystack);
4660 mbfl_string_init(&needle);
4661 haystack.no_language = MBSTRG(language);
4662 haystack.no_encoding = MBSTRG(current_internal_encoding);
4663 needle.no_language = MBSTRG(language);
4664 needle.no_encoding = MBSTRG(current_internal_encoding);
4665
4666 do {
4667 size_t len = 0;
4668 haystack.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_haystack, old_haystack_len, &len, from_encoding TSRMLS_CC);
4669 haystack.len = len;
4670
4671 if (!haystack.val) {
4672 break;
4673 }
4674
4675 if (haystack.len <= 0) {
4676 break;
4677 }
4678
4679 needle.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_needle, old_needle_len, &len, from_encoding TSRMLS_CC);
4680 needle.len = len;
4681
4682 if (!needle.val) {
4683 break;
4684 }
4685
4686 if (needle.len <= 0) {
4687 break;
4688 }
4689
4690 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
4691 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
4692 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
4693 break;
4694 }
4695
4696 {
4697 int haystack_char_len = mbfl_strlen(&haystack);
4698
4699 if (mode) {
4700 if ((offset > 0 && offset > haystack_char_len) ||
4701 (offset < 0 && -offset > haystack_char_len)) {
4702 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset is greater than the length of haystack string");
4703 break;
4704 }
4705 } else {
4706 if (offset < 0 || offset > haystack_char_len) {
4707 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset not contained in string");
4708 break;
4709 }
4710 }
4711 }
4712
4713 n = mbfl_strpos(&haystack, &needle, offset, mode);
4714 } while(0);
4715
4716 if (haystack.val) {
4717 efree(haystack.val);
4718 }
4719
4720 if (needle.val) {
4721 efree(needle.val);
4722 }
4723
4724 return n;
4725 }
4726 /* }}} */
4727
4728 #ifdef ZEND_MULTIBYTE
4729 /* {{{ php_mb_set_zend_encoding() */
php_mb_set_zend_encoding(TSRMLS_D)4730 static int php_mb_set_zend_encoding(TSRMLS_D)
4731 {
4732 /* 'd better use mbfl_memory_device? */
4733 char *name, *list = NULL;
4734 int n, *entry, list_size = 0;
4735 zend_encoding_detector encoding_detector;
4736 zend_encoding_converter encoding_converter;
4737 zend_encoding_oddlen encoding_oddlen;
4738
4739 /* notify script encoding to Zend Engine */
4740 entry = MBSTRG(script_encoding_list);
4741 n = MBSTRG(script_encoding_list_size);
4742 while (n > 0) {
4743 name = (char *)mbfl_no_encoding2name(*entry);
4744 if (name) {
4745 list_size += strlen(name) + 1;
4746 if (!list) {
4747 list = (char*)emalloc(list_size);
4748 *list = '\0';
4749 } else {
4750 list = (char*)erealloc(list, list_size);
4751 strcat(list, ",");
4752 }
4753 strcat(list, name);
4754 }
4755 entry++;
4756 n--;
4757 }
4758 zend_multibyte_set_script_encoding(list, (list ? strlen(list) : 0) TSRMLS_CC);
4759 if (list) {
4760 efree(list);
4761 }
4762 encoding_detector = php_mb_encoding_detector;
4763 encoding_converter = php_mb_encoding_converter;
4764 encoding_oddlen = php_mb_oddlen;
4765
4766 /* TODO: make independent from mbstring.encoding_translation? */
4767 if (MBSTRG(encoding_translation)) {
4768 /* notify internal encoding to Zend Engine */
4769 name = (char*)mbfl_no_encoding2name(MBSTRG(current_internal_encoding));
4770 zend_multibyte_set_internal_encoding(name TSRMLS_CC);
4771 }
4772
4773 zend_multibyte_set_functions(encoding_detector, encoding_converter, encoding_oddlen TSRMLS_CC);
4774
4775 return 0;
4776 }
4777 /* }}} */
4778
4779 /* {{{ char *php_mb_encoding_detector()
4780 * Interface for Zend Engine
4781 */
php_mb_encoding_detector(const unsigned char * arg_string,size_t arg_length,char * arg_list TSRMLS_DC)4782 static char* php_mb_encoding_detector(const unsigned char *arg_string, size_t arg_length, char *arg_list TSRMLS_DC)
4783 {
4784 mbfl_string string;
4785 const char *ret;
4786 enum mbfl_no_encoding *elist;
4787 int size, *list;
4788
4789 /* make encoding list */
4790 list = NULL;
4791 size = 0;
4792 php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0 TSRMLS_CC);
4793 if (size <= 0) {
4794 return NULL;
4795 }
4796 if (size > 0 && list != NULL) {
4797 elist = list;
4798 } else {
4799 elist = MBSTRG(current_detect_order_list);
4800 size = MBSTRG(current_detect_order_list_size);
4801 }
4802
4803 mbfl_string_init(&string);
4804 string.no_language = MBSTRG(language);
4805 string.val = (unsigned char *)arg_string;
4806 string.len = arg_length;
4807 ret = mbfl_identify_encoding_name(&string, elist, size, 0);
4808 if (list != NULL) {
4809 efree((void *)list);
4810 }
4811 if (ret != NULL) {
4812 return estrdup(ret);
4813 } else {
4814 return NULL;
4815 }
4816 }
4817 /* }}} */
4818
4819 /* {{{ int php_mb_encoding_converter() */
php_mb_encoding_converter(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length,const char * encoding_to,const char * encoding_from TSRMLS_DC)4820 static int php_mb_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const char *encoding_to, const char *encoding_from TSRMLS_DC)
4821 {
4822 mbfl_string string, result, *ret;
4823 enum mbfl_no_encoding from_encoding, to_encoding;
4824 mbfl_buffer_converter *convd;
4825
4826 /* new encoding */
4827 to_encoding = mbfl_name2no_encoding(encoding_to);
4828 if (to_encoding == mbfl_no_encoding_invalid) {
4829 return -1;
4830 }
4831 /* old encoding */
4832 from_encoding = mbfl_name2no_encoding(encoding_from);
4833 if (from_encoding == mbfl_no_encoding_invalid) {
4834 return -1;
4835 }
4836 /* initialize string */
4837 mbfl_string_init(&string);
4838 mbfl_string_init(&result);
4839 string.no_encoding = from_encoding;
4840 string.no_language = MBSTRG(language);
4841 string.val = (unsigned char*)from;
4842 string.len = from_length;
4843
4844 /* initialize converter */
4845 convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
4846 if (convd == NULL) {
4847 return -1;
4848 }
4849 mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
4850 mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
4851
4852 /* do it */
4853 ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
4854 if (ret != NULL) {
4855 *to = ret->val;
4856 *to_length = ret->len;
4857 }
4858
4859 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
4860 mbfl_buffer_converter_delete(convd);
4861
4862 return ret ? 0 : -1;
4863 }
4864 /* }}} */
4865
4866 /* {{{ int php_mb_oddlen()
4867 * returns number of odd (e.g. appears only first byte of multibyte
4868 * character) chars
4869 */
php_mb_oddlen(const unsigned char * string,size_t length,const char * encoding TSRMLS_DC)4870 static size_t php_mb_oddlen(const unsigned char *string, size_t length, const char *encoding TSRMLS_DC)
4871 {
4872 mbfl_string mb_string;
4873
4874 mbfl_string_init(&mb_string);
4875 mb_string.no_language = MBSTRG(language);
4876 mb_string.no_encoding = mbfl_name2no_encoding(encoding);
4877 mb_string.val = (unsigned char *)string;
4878 mb_string.len = length;
4879
4880 if (mb_string.no_encoding == mbfl_no_encoding_invalid) {
4881 return 0;
4882 }
4883 return mbfl_oddlen(&mb_string);
4884 }
4885 /* }}} */
4886 #endif /* ZEND_MULTIBYTE */
4887
4888 #endif /* HAVE_MBSTRING */
4889
4890 /*
4891 * Local variables:
4892 * tab-width: 4
4893 * c-basic-offset: 4
4894 * End:
4895 * vim600: fdm=marker
4896 * vim: noet sw=4 ts=4
4897 */
4898