1 /*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | http://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp> |
14 | Rui Hirokawa <hirokawa@php.net> |
15 | Hironori Sato <satoh@jpnnet.com> |
16 | Shigeru Kanemoto <sgk@happysize.co.jp> |
17 +----------------------------------------------------------------------+
18 */
19
20 /* {{{ includes */
21 #include "libmbfl/config.h"
22 #include "php.h"
23 #include "php_ini.h"
24 #include "php_variables.h"
25 #include "mbstring.h"
26 #include "ext/standard/php_string.h"
27 #include "ext/standard/php_mail.h"
28 #include "ext/standard/exec.h"
29 #include "ext/standard/url.h"
30 #include "main/php_output.h"
31 #include "ext/standard/info.h"
32 #include "ext/pcre/php_pcre.h"
33
34 #include "libmbfl/mbfl/mbfilter_8bit.h"
35 #include "libmbfl/mbfl/mbfilter_pass.h"
36 #include "libmbfl/mbfl/mbfilter_wchar.h"
37 #include "libmbfl/filters/mbfilter_ascii.h"
38 #include "libmbfl/filters/mbfilter_base64.h"
39 #include "libmbfl/filters/mbfilter_qprint.h"
40 #include "libmbfl/filters/mbfilter_ucs4.h"
41 #include "libmbfl/filters/mbfilter_utf8.h"
42 #include "libmbfl/filters/mbfilter_tl_jisx0201_jisx0208.h"
43
44 #include "php_variables.h"
45 #include "php_globals.h"
46 #include "rfc1867.h"
47 #include "php_content_types.h"
48 #include "SAPI.h"
49 #include "php_unicode.h"
50 #include "TSRM.h"
51
52 #include "mb_gpc.h"
53
54 #ifdef HAVE_MBREGEX
55 # include "php_mbregex.h"
56 #endif
57
58 #include "zend_multibyte.h"
59 #include "mbstring_arginfo.h"
60 /* }}} */
61
62 /* {{{ prototypes */
63 ZEND_DECLARE_MODULE_GLOBALS(mbstring)
64
65 static PHP_GINIT_FUNCTION(mbstring);
66 static PHP_GSHUTDOWN_FUNCTION(mbstring);
67
68 static void php_mb_populate_current_detect_order_list(void);
69
70 static int php_mb_encoding_translation(void);
71
72 static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size);
73
74 static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding);
75
76 static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc);
77
78 static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc);
79 /* }}} */
80
81 /* {{{ php_mb_default_identify_list */
82 typedef struct _php_mb_nls_ident_list {
83 enum mbfl_no_language lang;
84 const enum mbfl_no_encoding *list;
85 size_t list_size;
86 } php_mb_nls_ident_list;
87
88 static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = {
89 mbfl_no_encoding_ascii,
90 mbfl_no_encoding_jis,
91 mbfl_no_encoding_utf8,
92 mbfl_no_encoding_euc_jp,
93 mbfl_no_encoding_sjis
94 };
95
96 static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = {
97 mbfl_no_encoding_ascii,
98 mbfl_no_encoding_utf8,
99 mbfl_no_encoding_euc_cn,
100 mbfl_no_encoding_cp936
101 };
102
103 static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
104 mbfl_no_encoding_ascii,
105 mbfl_no_encoding_utf8,
106 mbfl_no_encoding_euc_tw,
107 mbfl_no_encoding_big5
108 };
109
110 static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = {
111 mbfl_no_encoding_ascii,
112 mbfl_no_encoding_utf8,
113 mbfl_no_encoding_euc_kr,
114 mbfl_no_encoding_uhc
115 };
116
117 static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = {
118 mbfl_no_encoding_ascii,
119 mbfl_no_encoding_utf8,
120 mbfl_no_encoding_koi8r,
121 mbfl_no_encoding_cp1251,
122 mbfl_no_encoding_cp866
123 };
124
125 static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = {
126 mbfl_no_encoding_ascii,
127 mbfl_no_encoding_utf8,
128 mbfl_no_encoding_armscii8
129 };
130
131 static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = {
132 mbfl_no_encoding_ascii,
133 mbfl_no_encoding_utf8,
134 mbfl_no_encoding_cp1254,
135 mbfl_no_encoding_8859_9
136 };
137
138 static const enum mbfl_no_encoding php_mb_default_identify_list_ua[] = {
139 mbfl_no_encoding_ascii,
140 mbfl_no_encoding_utf8,
141 mbfl_no_encoding_koi8u
142 };
143
144 static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
145 mbfl_no_encoding_ascii,
146 mbfl_no_encoding_utf8
147 };
148
149
150 static const php_mb_nls_ident_list php_mb_default_identify_list[] = {
151 { mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) },
152 { mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) },
153 { mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) },
154 { mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) },
155 { mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
156 { mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) },
157 { mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) },
158 { mbfl_no_language_ukrainian, php_mb_default_identify_list_ua, sizeof(php_mb_default_identify_list_ua) / sizeof(php_mb_default_identify_list_ua[0]) },
159 { mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }
160 };
161
162 /* }}} */
163
164 /* {{{ mbstring_deps[] */
165 static const zend_module_dep mbstring_deps[] = {
166 ZEND_MOD_REQUIRED("pcre")
167 ZEND_MOD_END
168 };
169 /* }}} */
170
171 /* {{{ zend_module_entry mbstring_module_entry */
172 zend_module_entry mbstring_module_entry = {
173 STANDARD_MODULE_HEADER_EX,
174 NULL,
175 mbstring_deps,
176 "mbstring",
177 ext_functions,
178 PHP_MINIT(mbstring),
179 PHP_MSHUTDOWN(mbstring),
180 PHP_RINIT(mbstring),
181 PHP_RSHUTDOWN(mbstring),
182 PHP_MINFO(mbstring),
183 PHP_MBSTRING_VERSION,
184 PHP_MODULE_GLOBALS(mbstring),
185 PHP_GINIT(mbstring),
186 PHP_GSHUTDOWN(mbstring),
187 NULL,
188 STANDARD_MODULE_PROPERTIES_EX
189 };
190 /* }}} */
191
192 /* {{{ static sapi_post_entry php_post_entries[] */
193 static const sapi_post_entry php_post_entries[] = {
194 { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_std_post_handler },
195 { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
196 { NULL, 0, NULL, NULL }
197 };
198 /* }}} */
199
200 #ifdef COMPILE_DL_MBSTRING
201 #ifdef ZTS
202 ZEND_TSRMLS_CACHE_DEFINE()
203 #endif
204 ZEND_GET_MODULE(mbstring)
205 #endif
206
207 /* {{{ static sapi_post_entry mbstr_post_entries[] */
208 static const sapi_post_entry mbstr_post_entries[] = {
209 { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_mb_post_handler },
210 { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
211 { NULL, 0, NULL, NULL }
212 };
213 /* }}} */
214
php_mb_get_encoding(zend_string * encoding_name,uint32_t arg_num)215 static const mbfl_encoding *php_mb_get_encoding(zend_string *encoding_name, uint32_t arg_num) {
216 if (encoding_name) {
217 const mbfl_encoding *encoding;
218 zend_string *last_encoding_name = MBSTRG(last_used_encoding_name);
219 if (last_encoding_name && (last_encoding_name == encoding_name
220 || !strcasecmp(ZSTR_VAL(encoding_name), ZSTR_VAL(last_encoding_name)))) {
221 return MBSTRG(last_used_encoding);
222 }
223
224 encoding = mbfl_name2encoding(ZSTR_VAL(encoding_name));
225 if (!encoding) {
226 zend_argument_value_error(arg_num, "must be a valid encoding, \"%s\" given", ZSTR_VAL(encoding_name));
227 return NULL;
228 }
229
230 if (last_encoding_name) {
231 zend_string_release(last_encoding_name);
232 }
233 MBSTRG(last_used_encoding_name) = zend_string_copy(encoding_name);
234 MBSTRG(last_used_encoding) = encoding;
235 return encoding;
236 } else {
237 return MBSTRG(current_internal_encoding);
238 }
239 }
240
php_mb_get_encoding_or_pass(const char * encoding_name)241 static const mbfl_encoding *php_mb_get_encoding_or_pass(const char *encoding_name) {
242 if (strcmp(encoding_name, "pass") == 0) {
243 return &mbfl_encoding_pass;
244 }
245
246 return mbfl_name2encoding(encoding_name);
247 }
248
count_commas(const char * p,const char * end)249 static size_t count_commas(const char *p, const char *end) {
250 size_t count = 0;
251 while ((p = memchr(p, ',', end - p))) {
252 count++;
253 p++;
254 }
255 return count;
256 }
257
258 /* {{{ static zend_result php_mb_parse_encoding_list()
259 * Return FAILURE if input contains any illegal encoding, otherwise SUCCESS.
260 * Emits a ValueError in function context and a warning in INI context, in INI context arg_num must be 0.
261 */
php_mb_parse_encoding_list(const char * value,size_t value_length,const mbfl_encoding *** return_list,size_t * return_size,bool persistent,uint32_t arg_num,zend_bool allow_pass_encoding)262 static zend_result php_mb_parse_encoding_list(const char *value, size_t value_length,
263 const mbfl_encoding ***return_list, size_t *return_size, bool persistent, uint32_t arg_num,
264 zend_bool allow_pass_encoding)
265 {
266 if (value == NULL || value_length == 0) {
267 *return_list = NULL;
268 *return_size = 0;
269 return SUCCESS;
270 } else {
271 zend_bool included_auto;
272 size_t n, size;
273 char *p1, *endp, *tmpstr;
274 const mbfl_encoding **entry, **list;
275
276 /* copy the value string for work */
277 if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
278 tmpstr = (char *)estrndup(value+1, value_length-2);
279 value_length -= 2;
280 } else {
281 tmpstr = (char *)estrndup(value, value_length);
282 }
283
284 endp = tmpstr + value_length;
285 size = 1 + count_commas(tmpstr, endp) + MBSTRG(default_detect_order_list_size);
286 list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
287 entry = list;
288 n = 0;
289 included_auto = 0;
290 p1 = tmpstr;
291 while (1) {
292 char *comma = (char *) php_memnstr(p1, ",", 1, endp);
293 char *p = comma ? comma : endp;
294 *p = '\0';
295 /* trim spaces */
296 while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
297 p1++;
298 }
299 p--;
300 while (p > p1 && (*p == ' ' || *p == '\t')) {
301 *p = '\0';
302 p--;
303 }
304 /* convert to the encoding number and check encoding */
305 if (strcasecmp(p1, "auto") == 0) {
306 if (!included_auto) {
307 const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
308 const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
309 size_t i;
310 included_auto = 1;
311 for (i = 0; i < identify_list_size; i++) {
312 *entry++ = mbfl_no2encoding(*src++);
313 n++;
314 }
315 }
316 } else {
317 const mbfl_encoding *encoding =
318 allow_pass_encoding ? php_mb_get_encoding_or_pass(p1) : mbfl_name2encoding(p1);
319 if (!encoding) {
320 /* Called from an INI setting modification */
321 if (arg_num == 0) {
322 php_error_docref("ref.mbstring", E_WARNING, "INI setting contains invalid encoding \"%s\"", p1);
323 } else {
324 zend_argument_value_error(arg_num, "contains invalid encoding \"%s\"", p1);
325 }
326 efree(tmpstr);
327 pefree(ZEND_VOIDP(list), persistent);
328 return FAILURE;
329 }
330
331 *entry++ = encoding;
332 n++;
333 }
334 if (n >= size || comma == NULL) {
335 break;
336 }
337 p1 = comma + 1;
338 }
339 *return_list = list;
340 *return_size = n;
341 efree(tmpstr);
342 }
343
344 return SUCCESS;
345 }
346 /* }}} */
347
348 /* {{{ static int php_mb_parse_encoding_array()
349 * Return FAILURE if input contains any illegal encoding, otherwise SUCCESS.
350 * Emits a ValueError in function context and a warning in INI context, in INI context arg_num must be 0.
351 */
php_mb_parse_encoding_array(HashTable * target_hash,const mbfl_encoding *** return_list,size_t * return_size,uint32_t arg_num)352 static int php_mb_parse_encoding_array(HashTable *target_hash, const mbfl_encoding ***return_list,
353 size_t *return_size, uint32_t arg_num)
354 {
355 /* Allocate enough space to include the default detect order if "auto" is used. */
356 size_t size = zend_hash_num_elements(target_hash) + MBSTRG(default_detect_order_list_size);
357 const mbfl_encoding **list = ecalloc(size, sizeof(mbfl_encoding*));
358 const mbfl_encoding **entry = list;
359 zend_bool included_auto = 0;
360 size_t n = 0;
361 zval *hash_entry;
362 ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
363 zend_string *encoding_str = zval_try_get_string(hash_entry);
364 if (UNEXPECTED(!encoding_str)) {
365 efree(ZEND_VOIDP(list));
366 return FAILURE;
367 }
368
369 if (strcasecmp(ZSTR_VAL(encoding_str), "auto") == 0) {
370 if (!included_auto) {
371 const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
372 const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
373 size_t j;
374
375 included_auto = 1;
376 for (j = 0; j < identify_list_size; j++) {
377 *entry++ = mbfl_no2encoding(*src++);
378 n++;
379 }
380 }
381 } else {
382 const mbfl_encoding *encoding = mbfl_name2encoding(ZSTR_VAL(encoding_str));
383 if (encoding) {
384 *entry++ = encoding;
385 n++;
386 } else {
387 zend_argument_value_error(arg_num, "contains invalid encoding \"%s\"", ZSTR_VAL(encoding_str));
388 zend_string_release(encoding_str);
389 efree(ZEND_VOIDP(list));
390 return FAILURE;
391 }
392 }
393 zend_string_release(encoding_str);
394 } ZEND_HASH_FOREACH_END();
395 *return_list = list;
396 *return_size = n;
397 return SUCCESS;
398 }
399 /* }}} */
400
401 /* {{{ zend_multibyte interface */
php_mb_zend_encoding_fetcher(const char * encoding_name)402 static const zend_encoding* php_mb_zend_encoding_fetcher(const char *encoding_name)
403 {
404 return (const zend_encoding*)mbfl_name2encoding(encoding_name);
405 }
406
php_mb_zend_encoding_name_getter(const zend_encoding * encoding)407 static const char *php_mb_zend_encoding_name_getter(const zend_encoding *encoding)
408 {
409 return ((const mbfl_encoding *)encoding)->name;
410 }
411
php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding * _encoding)412 static bool php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding *_encoding)
413 {
414 const mbfl_encoding *encoding = (const mbfl_encoding*)_encoding;
415 if (encoding->flag & MBFL_ENCTYPE_SBCS) {
416 return 1;
417 }
418 if ((encoding->flag & (MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE)) == MBFL_ENCTYPE_MBCS) {
419 return 1;
420 }
421 return 0;
422 }
423
php_mb_zend_encoding_detector(const unsigned char * arg_string,size_t arg_length,const zend_encoding ** list,size_t list_size)424 static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *arg_string, size_t arg_length, const zend_encoding **list, size_t list_size)
425 {
426 mbfl_string string;
427
428 if (!list) {
429 list = (const zend_encoding **)MBSTRG(current_detect_order_list);
430 list_size = MBSTRG(current_detect_order_list_size);
431 }
432
433 mbfl_string_init(&string);
434 string.val = (unsigned char *)arg_string;
435 string.len = arg_length;
436 return (const zend_encoding *) mbfl_identify_encoding(&string, (const mbfl_encoding **)list, list_size, 0);
437 }
438
php_mb_zend_encoding_converter(unsigned char ** to,size_t * to_length,const unsigned char * from,size_t from_length,const zend_encoding * encoding_to,const zend_encoding * encoding_from)439 static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from)
440 {
441 mbfl_string string, result;
442 mbfl_buffer_converter *convd;
443
444 /* new encoding */
445 /* initialize string */
446 string.encoding = (const mbfl_encoding*)encoding_from;
447 string.val = (unsigned char*)from;
448 string.len = from_length;
449
450 /* initialize converter */
451 convd = mbfl_buffer_converter_new((const mbfl_encoding *)encoding_from, (const mbfl_encoding *)encoding_to, string.len);
452 if (convd == NULL) {
453 return (size_t) -1;
454 }
455
456 mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
457 mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
458
459 /* do it */
460 size_t loc = mbfl_buffer_converter_feed(convd, &string);
461
462 mbfl_buffer_converter_flush(convd);
463 mbfl_string_init(&result);
464 if (!mbfl_buffer_converter_result(convd, &result)) {
465 mbfl_buffer_converter_delete(convd);
466 return (size_t)-1;
467 }
468
469 *to = result.val;
470 *to_length = result.len;
471
472 mbfl_buffer_converter_delete(convd);
473
474 return loc;
475 }
476
php_mb_zend_encoding_list_parser(const char * encoding_list,size_t encoding_list_len,const zend_encoding *** return_list,size_t * return_size,bool persistent)477 static zend_result php_mb_zend_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, bool persistent)
478 {
479 return php_mb_parse_encoding_list(
480 encoding_list, encoding_list_len,
481 (const mbfl_encoding ***)return_list, return_size,
482 persistent, /* arg_num */ 0, /* allow_pass_encoding */ 1);
483 }
484
php_mb_zend_internal_encoding_getter(void)485 static const zend_encoding *php_mb_zend_internal_encoding_getter(void)
486 {
487 return (const zend_encoding *)MBSTRG(internal_encoding);
488 }
489
php_mb_zend_internal_encoding_setter(const zend_encoding * encoding)490 static zend_result php_mb_zend_internal_encoding_setter(const zend_encoding *encoding)
491 {
492 MBSTRG(internal_encoding) = (const mbfl_encoding *)encoding;
493 return SUCCESS;
494 }
495
496 static zend_multibyte_functions php_mb_zend_multibyte_functions = {
497 "mbstring",
498 php_mb_zend_encoding_fetcher,
499 php_mb_zend_encoding_name_getter,
500 php_mb_zend_encoding_lexer_compatibility_checker,
501 php_mb_zend_encoding_detector,
502 php_mb_zend_encoding_converter,
503 php_mb_zend_encoding_list_parser,
504 php_mb_zend_internal_encoding_getter,
505 php_mb_zend_internal_encoding_setter
506 };
507 /* }}} */
508
509 /* {{{ _php_mb_compile_regex */
_php_mb_compile_regex(const char * pattern)510 static void *_php_mb_compile_regex(const char *pattern)
511 {
512 pcre2_code *retval;
513 PCRE2_SIZE err_offset;
514 int errnum;
515
516 if (!(retval = pcre2_compile((PCRE2_SPTR)pattern, PCRE2_ZERO_TERMINATED,
517 PCRE2_CASELESS, &errnum, &err_offset, php_pcre_cctx()))) {
518 PCRE2_UCHAR err_str[128];
519 pcre2_get_error_message(errnum, err_str, sizeof(err_str));
520 php_error_docref(NULL, E_WARNING, "%s (offset=%zu): %s", pattern, err_offset, err_str);
521 }
522 return retval;
523 }
524 /* }}} */
525
526 /* {{{ _php_mb_match_regex */
_php_mb_match_regex(void * opaque,const char * str,size_t str_len)527 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
528 {
529 int res;
530
531 pcre2_match_data *match_data = php_pcre_create_match_data(0, opaque);
532 if (NULL == match_data) {
533 pcre2_code_free(opaque);
534 php_error_docref(NULL, E_WARNING, "Cannot allocate match data");
535 return FAILURE;
536 }
537 res = pcre2_match(opaque, (PCRE2_SPTR)str, str_len, 0, 0, match_data, php_pcre_mctx()) >= 0;
538 php_pcre_free_match_data(match_data);
539
540 return res;
541 }
542 /* }}} */
543
544 /* {{{ _php_mb_free_regex */
_php_mb_free_regex(void * opaque)545 static void _php_mb_free_regex(void *opaque)
546 {
547 pcre2_code_free(opaque);
548 }
549 /* }}} */
550
551 /* {{{ php_mb_nls_get_default_detect_order_list */
php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang,enum mbfl_no_encoding ** plist,size_t * plist_size)552 static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, size_t *plist_size)
553 {
554 size_t i;
555
556 *plist = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
557 *plist_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
558
559 for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) {
560 if (php_mb_default_identify_list[i].lang == lang) {
561 *plist = (enum mbfl_no_encoding *)php_mb_default_identify_list[i].list;
562 *plist_size = php_mb_default_identify_list[i].list_size;
563 return 1;
564 }
565 }
566 return 0;
567 }
568 /* }}} */
569
php_mb_rfc1867_substring_conf(const zend_encoding * encoding,char * start,size_t len,char quote)570 static char *php_mb_rfc1867_substring_conf(const zend_encoding *encoding, char *start, size_t len, char quote)
571 {
572 char *result = emalloc(len + 2);
573 char *resp = result;
574 size_t i;
575
576 for (i = 0; i < len && start[i] != quote; ++i) {
577 if (start[i] == '\\' && (start[i + 1] == '\\' || (quote && start[i + 1] == quote))) {
578 *resp++ = start[++i];
579 } else {
580 size_t j = php_mb_mbchar_bytes_ex(start+i, (const mbfl_encoding *)encoding);
581
582 while (j-- > 0 && i < len) {
583 *resp++ = start[i++];
584 }
585 --i;
586 }
587 }
588
589 *resp = '\0';
590 return result;
591 }
592
php_mb_rfc1867_getword(const zend_encoding * encoding,char ** line,char stop)593 static char *php_mb_rfc1867_getword(const zend_encoding *encoding, char **line, char stop) /* {{{ */
594 {
595 char *pos = *line, quote;
596 char *res;
597
598 while (*pos && *pos != stop) {
599 if ((quote = *pos) == '"' || quote == '\'') {
600 ++pos;
601 while (*pos && *pos != quote) {
602 if (*pos == '\\' && pos[1] && pos[1] == quote) {
603 pos += 2;
604 } else {
605 ++pos;
606 }
607 }
608 if (*pos) {
609 ++pos;
610 }
611 } else {
612 pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
613
614 }
615 }
616 if (*pos == '\0') {
617 res = estrdup(*line);
618 *line += strlen(*line);
619 return res;
620 }
621
622 res = estrndup(*line, pos - *line);
623
624 while (*pos == stop) {
625 pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
626 }
627
628 *line = pos;
629 return res;
630 }
631 /* }}} */
632
php_mb_rfc1867_getword_conf(const zend_encoding * encoding,char * str)633 static char *php_mb_rfc1867_getword_conf(const zend_encoding *encoding, char *str) /* {{{ */
634 {
635 while (*str && isspace(*(unsigned char *)str)) {
636 ++str;
637 }
638
639 if (!*str) {
640 return estrdup("");
641 }
642
643 if (*str == '"' || *str == '\'') {
644 char quote = *str;
645
646 str++;
647 return php_mb_rfc1867_substring_conf(encoding, str, strlen(str), quote);
648 } else {
649 char *strend = str;
650
651 while (*strend && !isspace(*(unsigned char *)strend)) {
652 ++strend;
653 }
654 return php_mb_rfc1867_substring_conf(encoding, str, strend - str, 0);
655 }
656 }
657 /* }}} */
658
php_mb_rfc1867_basename(const zend_encoding * encoding,char * filename)659 static char *php_mb_rfc1867_basename(const zend_encoding *encoding, char *filename) /* {{{ */
660 {
661 char *s, *s2;
662 const size_t filename_len = strlen(filename);
663
664 /* The \ check should technically be needed for win32 systems only where
665 * it is a valid path separator. However, IE in all it's wisdom always sends
666 * the full path of the file on the user's filesystem, which means that unless
667 * the user does basename() they get a bogus file name. Until IE's user base drops
668 * to nill or problem is fixed this code must remain enabled for all systems. */
669 s = php_mb_safe_strrchr_ex(filename, '\\', filename_len, (const mbfl_encoding *)encoding);
670 s2 = php_mb_safe_strrchr_ex(filename, '/', filename_len, (const mbfl_encoding *)encoding);
671
672 if (s && s2) {
673 if (s > s2) {
674 return ++s;
675 } else {
676 return ++s2;
677 }
678 } else if (s) {
679 return ++s;
680 } else if (s2) {
681 return ++s2;
682 } else {
683 return filename;
684 }
685 }
686 /* }}} */
687
688 /* {{{ php.ini directive handler */
689 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_language) */
PHP_INI_MH(OnUpdate_mbstring_language)690 static PHP_INI_MH(OnUpdate_mbstring_language)
691 {
692 enum mbfl_no_language no_language;
693
694 no_language = mbfl_name2no_language(ZSTR_VAL(new_value));
695 if (no_language == mbfl_no_language_invalid) {
696 MBSTRG(language) = mbfl_no_language_neutral;
697 return FAILURE;
698 }
699 MBSTRG(language) = no_language;
700 php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
701 return SUCCESS;
702 }
703 /* }}} */
704
705 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_detect_order) */
PHP_INI_MH(OnUpdate_mbstring_detect_order)706 static PHP_INI_MH(OnUpdate_mbstring_detect_order)
707 {
708 const mbfl_encoding **list;
709 size_t size;
710
711 if (!new_value) {
712 if (MBSTRG(detect_order_list)) {
713 pefree(ZEND_VOIDP(MBSTRG(detect_order_list)), 1);
714 }
715 MBSTRG(detect_order_list) = NULL;
716 MBSTRG(detect_order_list_size) = 0;
717 return SUCCESS;
718 }
719
720 if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(new_value), ZSTR_LEN(new_value), &list, &size, /* persistent */ 1, /* arg_num */ 0, /* allow_pass_encoding */ 0) || size == 0) {
721 return FAILURE;
722 }
723
724 if (MBSTRG(detect_order_list)) {
725 pefree(ZEND_VOIDP(MBSTRG(detect_order_list)), 1);
726 }
727 MBSTRG(detect_order_list) = list;
728 MBSTRG(detect_order_list_size) = size;
729 return SUCCESS;
730 }
731 /* }}} */
732
_php_mb_ini_mbstring_http_input_set(const char * new_value,size_t new_value_length)733 static int _php_mb_ini_mbstring_http_input_set(const char *new_value, size_t new_value_length) {
734 const mbfl_encoding **list;
735 size_t size;
736 if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, /* persistent */ 1, /* arg_num */ 0, /* allow_pass_encoding */ 1) || size == 0) {
737 return FAILURE;
738 }
739 if (MBSTRG(http_input_list)) {
740 pefree(ZEND_VOIDP(MBSTRG(http_input_list)), 1);
741 }
742 MBSTRG(http_input_list) = list;
743 MBSTRG(http_input_list_size) = size;
744 return SUCCESS;
745 }
746
747 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_input) */
PHP_INI_MH(OnUpdate_mbstring_http_input)748 static PHP_INI_MH(OnUpdate_mbstring_http_input)
749 {
750 if (new_value) {
751 php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.http_input is deprecated");
752 }
753
754 if (!new_value || !ZSTR_VAL(new_value)) {
755 const char *encoding = php_get_input_encoding();
756 MBSTRG(http_input_set) = 0;
757 _php_mb_ini_mbstring_http_input_set(encoding, strlen(encoding));
758 return SUCCESS;
759 }
760
761 MBSTRG(http_input_set) = 1;
762 return _php_mb_ini_mbstring_http_input_set(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
763 }
764 /* }}} */
765
_php_mb_ini_mbstring_http_output_set(const char * new_value)766 static int _php_mb_ini_mbstring_http_output_set(const char *new_value) {
767 const mbfl_encoding *encoding = php_mb_get_encoding_or_pass(new_value);
768 if (!encoding) {
769 return FAILURE;
770 }
771
772 MBSTRG(http_output_encoding) = encoding;
773 MBSTRG(current_http_output_encoding) = encoding;
774 return SUCCESS;
775 }
776
777 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output) */
PHP_INI_MH(OnUpdate_mbstring_http_output)778 static PHP_INI_MH(OnUpdate_mbstring_http_output)
779 {
780 if (new_value) {
781 php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.http_output is deprecated");
782 }
783
784 if (new_value == NULL || ZSTR_LEN(new_value) == 0) {
785 MBSTRG(http_output_set) = 0;
786 _php_mb_ini_mbstring_http_output_set(php_get_output_encoding());
787 return SUCCESS;
788 }
789
790 MBSTRG(http_output_set) = 1;
791 return _php_mb_ini_mbstring_http_output_set(ZSTR_VAL(new_value));
792 }
793 /* }}} */
794
795 /* {{{ static _php_mb_ini_mbstring_internal_encoding_set */
_php_mb_ini_mbstring_internal_encoding_set(const char * new_value,size_t new_value_length)796 static int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, size_t new_value_length)
797 {
798 const mbfl_encoding *encoding;
799
800 if (!new_value || !new_value_length || !(encoding = mbfl_name2encoding(new_value))) {
801 /* falls back to UTF-8 if an unknown encoding name is given */
802 if (new_value) {
803 php_error_docref("ref.mbstring", E_WARNING, "Unknown encoding \"%s\" in ini setting", new_value);
804 }
805 encoding = &mbfl_encoding_utf8;
806 }
807 MBSTRG(internal_encoding) = encoding;
808 MBSTRG(current_internal_encoding) = encoding;
809 #ifdef HAVE_MBREGEX
810 {
811 const char *enc_name = new_value;
812 if (FAILURE == php_mb_regex_set_default_mbctype(enc_name)) {
813 /* falls back to UTF-8 if an unknown encoding name is given */
814 enc_name = "UTF-8";
815 php_mb_regex_set_default_mbctype(enc_name);
816 }
817 php_mb_regex_set_mbctype(new_value);
818 }
819 #endif
820 return SUCCESS;
821 }
822 /* }}} */
823
824 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_internal_encoding) */
PHP_INI_MH(OnUpdate_mbstring_internal_encoding)825 static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
826 {
827 if (new_value) {
828 php_error_docref("ref.mbstring", E_DEPRECATED, "Use of mbstring.internal_encoding is deprecated");
829 }
830
831 if (OnUpdateString(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage) == FAILURE) {
832 return FAILURE;
833 }
834
835 if (new_value && ZSTR_LEN(new_value)) {
836 MBSTRG(internal_encoding_set) = 1;
837 return _php_mb_ini_mbstring_internal_encoding_set(ZSTR_VAL(new_value), ZSTR_LEN(new_value));
838 } else {
839 const char *encoding = php_get_internal_encoding();
840 MBSTRG(internal_encoding_set) = 0;
841 return _php_mb_ini_mbstring_internal_encoding_set(encoding, strlen(encoding));
842 }
843 }
844 /* }}} */
845
846 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_substitute_character) */
PHP_INI_MH(OnUpdate_mbstring_substitute_character)847 static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
848 {
849 int c;
850 char *endptr = NULL;
851
852 if (new_value != NULL) {
853 if (strcasecmp("none", ZSTR_VAL(new_value)) == 0) {
854 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
855 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
856 } else if (strcasecmp("long", ZSTR_VAL(new_value)) == 0) {
857 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
858 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
859 } else if (strcasecmp("entity", ZSTR_VAL(new_value)) == 0) {
860 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
861 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
862 } else {
863 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
864 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
865 if (ZSTR_LEN(new_value) > 0) {
866 c = strtol(ZSTR_VAL(new_value), &endptr, 0);
867 if (*endptr == '\0') {
868 MBSTRG(filter_illegal_substchar) = c;
869 MBSTRG(current_filter_illegal_substchar) = c;
870 }
871 }
872 }
873 } else {
874 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
875 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
876 MBSTRG(filter_illegal_substchar) = 0x3f; /* '?' */
877 MBSTRG(current_filter_illegal_substchar) = 0x3f; /* '?' */
878 }
879
880 return SUCCESS;
881 }
882 /* }}} */
883
884 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_encoding_translation) */
PHP_INI_MH(OnUpdate_mbstring_encoding_translation)885 static PHP_INI_MH(OnUpdate_mbstring_encoding_translation)
886 {
887 if (new_value == NULL) {
888 return FAILURE;
889 }
890
891 OnUpdateBool(entry, new_value, mh_arg1, mh_arg2, mh_arg3, stage);
892
893 if (MBSTRG(encoding_translation)) {
894 sapi_unregister_post_entry(php_post_entries);
895 sapi_register_post_entries(mbstr_post_entries);
896 } else {
897 sapi_unregister_post_entry(mbstr_post_entries);
898 sapi_register_post_entries(php_post_entries);
899 }
900
901 return SUCCESS;
902 }
903 /* }}} */
904
905 /* {{{ static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes */
PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)906 static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)
907 {
908 zend_string *tmp;
909 void *re = NULL;
910
911 if (!new_value) {
912 new_value = entry->orig_value;
913 }
914 tmp = php_trim(new_value, NULL, 0, 3);
915
916 if (ZSTR_LEN(tmp) > 0) {
917 if (!(re = _php_mb_compile_regex(ZSTR_VAL(tmp)))) {
918 zend_string_release_ex(tmp, 0);
919 return FAILURE;
920 }
921 }
922
923 if (MBSTRG(http_output_conv_mimetypes)) {
924 _php_mb_free_regex(MBSTRG(http_output_conv_mimetypes));
925 }
926
927 MBSTRG(http_output_conv_mimetypes) = re;
928
929 zend_string_release_ex(tmp, 0);
930 return SUCCESS;
931 }
932 /* }}} */
933 /* }}} */
934
935 /* {{{ php.ini directive registration */
936 PHP_INI_BEGIN()
937 PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_ALL, OnUpdate_mbstring_language)
938 PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order)
939 PHP_INI_ENTRY("mbstring.http_input", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_input)
940 PHP_INI_ENTRY("mbstring.http_output", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_output)
941 STD_PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding, internal_encoding_name, zend_mbstring_globals, mbstring_globals)
942 PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
943
944 STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0",
945 PHP_INI_SYSTEM | PHP_INI_PERDIR,
946 OnUpdate_mbstring_encoding_translation,
947 encoding_translation, zend_mbstring_globals, mbstring_globals)
948 PHP_INI_ENTRY("mbstring.http_output_conv_mimetypes",
949 "^(text/|application/xhtml\\+xml)",
950 PHP_INI_ALL,
951 OnUpdate_mbstring_http_output_conv_mimetypes)
952
953 STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0",
954 PHP_INI_ALL,
955 OnUpdateBool,
956 strict_detection, zend_mbstring_globals, mbstring_globals)
957 #ifdef HAVE_MBREGEX
958 STD_PHP_INI_ENTRY("mbstring.regex_stack_limit", "100000",PHP_INI_ALL, OnUpdateLong, regex_stack_limit, zend_mbstring_globals, mbstring_globals)
959 STD_PHP_INI_ENTRY("mbstring.regex_retry_limit", "1000000",PHP_INI_ALL, OnUpdateLong, regex_retry_limit, zend_mbstring_globals, mbstring_globals)
960 #endif
PHP_INI_END()961 PHP_INI_END()
962 /* }}} */
963
964 static void mbstring_internal_encoding_changed_hook(void) {
965 /* One of the internal_encoding / input_encoding / output_encoding ini settings changed. */
966 if (!MBSTRG(internal_encoding_set)) {
967 const char *encoding = php_get_internal_encoding();
968 _php_mb_ini_mbstring_internal_encoding_set(encoding, strlen(encoding));
969 }
970
971 if (!MBSTRG(http_output_set)) {
972 const char *encoding = php_get_output_encoding();
973 _php_mb_ini_mbstring_http_output_set(encoding);
974 }
975
976 if (!MBSTRG(http_input_set)) {
977 const char *encoding = php_get_input_encoding();
978 _php_mb_ini_mbstring_http_input_set(encoding, strlen(encoding));
979 }
980 }
981
982 /* {{{ module global initialize handler */
PHP_GINIT_FUNCTION(mbstring)983 static PHP_GINIT_FUNCTION(mbstring)
984 {
985 #if defined(COMPILE_DL_MBSTRING) && defined(ZTS)
986 ZEND_TSRMLS_CACHE_UPDATE();
987 #endif
988
989 mbstring_globals->language = mbfl_no_language_uni;
990 mbstring_globals->internal_encoding = NULL;
991 mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding;
992 mbstring_globals->http_output_encoding = &mbfl_encoding_pass;
993 mbstring_globals->current_http_output_encoding = &mbfl_encoding_pass;
994 mbstring_globals->http_input_identify = NULL;
995 mbstring_globals->http_input_identify_get = NULL;
996 mbstring_globals->http_input_identify_post = NULL;
997 mbstring_globals->http_input_identify_cookie = NULL;
998 mbstring_globals->http_input_identify_string = NULL;
999 mbstring_globals->http_input_list = NULL;
1000 mbstring_globals->http_input_list_size = 0;
1001 mbstring_globals->detect_order_list = NULL;
1002 mbstring_globals->detect_order_list_size = 0;
1003 mbstring_globals->current_detect_order_list = NULL;
1004 mbstring_globals->current_detect_order_list_size = 0;
1005 mbstring_globals->default_detect_order_list = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1006 mbstring_globals->default_detect_order_list_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1007 mbstring_globals->filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1008 mbstring_globals->filter_illegal_substchar = 0x3f; /* '?' */
1009 mbstring_globals->current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1010 mbstring_globals->current_filter_illegal_substchar = 0x3f; /* '?' */
1011 mbstring_globals->illegalchars = 0;
1012 mbstring_globals->encoding_translation = 0;
1013 mbstring_globals->strict_detection = 0;
1014 mbstring_globals->outconv = NULL;
1015 mbstring_globals->http_output_conv_mimetypes = NULL;
1016 #ifdef HAVE_MBREGEX
1017 mbstring_globals->mb_regex_globals = php_mb_regex_globals_alloc();
1018 #endif
1019 mbstring_globals->last_used_encoding_name = NULL;
1020 mbstring_globals->last_used_encoding = NULL;
1021 mbstring_globals->internal_encoding_set = 0;
1022 mbstring_globals->http_output_set = 0;
1023 mbstring_globals->http_input_set = 0;
1024 }
1025 /* }}} */
1026
1027 /* {{{ PHP_GSHUTDOWN_FUNCTION */
PHP_GSHUTDOWN_FUNCTION(mbstring)1028 static PHP_GSHUTDOWN_FUNCTION(mbstring)
1029 {
1030 if (mbstring_globals->http_input_list) {
1031 free(ZEND_VOIDP(mbstring_globals->http_input_list));
1032 }
1033 if (mbstring_globals->detect_order_list) {
1034 free(ZEND_VOIDP(mbstring_globals->detect_order_list));
1035 }
1036 if (mbstring_globals->http_output_conv_mimetypes) {
1037 _php_mb_free_regex(mbstring_globals->http_output_conv_mimetypes);
1038 }
1039 #ifdef HAVE_MBREGEX
1040 php_mb_regex_globals_free(mbstring_globals->mb_regex_globals);
1041 #endif
1042 }
1043 /* }}} */
1044
1045 /* {{{ PHP_MINIT_FUNCTION(mbstring) */
PHP_MINIT_FUNCTION(mbstring)1046 PHP_MINIT_FUNCTION(mbstring)
1047 {
1048 #if defined(COMPILE_DL_MBSTRING) && defined(ZTS)
1049 ZEND_TSRMLS_CACHE_UPDATE();
1050 #endif
1051
1052 REGISTER_INI_ENTRIES();
1053
1054 /* We assume that we're the only user of the hook. */
1055 ZEND_ASSERT(php_internal_encoding_changed == NULL);
1056 php_internal_encoding_changed = mbstring_internal_encoding_changed_hook;
1057 mbstring_internal_encoding_changed_hook();
1058
1059 /* This is a global handler. Should not be set in a per-request handler. */
1060 sapi_register_treat_data(mbstr_treat_data);
1061
1062 /* Post handlers are stored in the thread-local context. */
1063 if (MBSTRG(encoding_translation)) {
1064 sapi_register_post_entries(mbstr_post_entries);
1065 }
1066
1067 REGISTER_LONG_CONSTANT("MB_CASE_UPPER", PHP_UNICODE_CASE_UPPER, CONST_CS | CONST_PERSISTENT);
1068 REGISTER_LONG_CONSTANT("MB_CASE_LOWER", PHP_UNICODE_CASE_LOWER, CONST_CS | CONST_PERSISTENT);
1069 REGISTER_LONG_CONSTANT("MB_CASE_TITLE", PHP_UNICODE_CASE_TITLE, CONST_CS | CONST_PERSISTENT);
1070 REGISTER_LONG_CONSTANT("MB_CASE_FOLD", PHP_UNICODE_CASE_FOLD, CONST_CS | CONST_PERSISTENT);
1071 REGISTER_LONG_CONSTANT("MB_CASE_UPPER_SIMPLE", PHP_UNICODE_CASE_UPPER_SIMPLE, CONST_CS | CONST_PERSISTENT);
1072 REGISTER_LONG_CONSTANT("MB_CASE_LOWER_SIMPLE", PHP_UNICODE_CASE_LOWER_SIMPLE, CONST_CS | CONST_PERSISTENT);
1073 REGISTER_LONG_CONSTANT("MB_CASE_TITLE_SIMPLE", PHP_UNICODE_CASE_TITLE_SIMPLE, CONST_CS | CONST_PERSISTENT);
1074 REGISTER_LONG_CONSTANT("MB_CASE_FOLD_SIMPLE", PHP_UNICODE_CASE_FOLD_SIMPLE, CONST_CS | CONST_PERSISTENT);
1075
1076 #ifdef HAVE_MBREGEX
1077 PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1078 #endif
1079
1080 if (FAILURE == zend_multibyte_set_functions(&php_mb_zend_multibyte_functions)) {
1081 return FAILURE;
1082 }
1083
1084 php_rfc1867_set_multibyte_callbacks(
1085 php_mb_encoding_translation,
1086 php_mb_gpc_get_detect_order,
1087 php_mb_gpc_set_input_encoding,
1088 php_mb_rfc1867_getword,
1089 php_mb_rfc1867_getword_conf,
1090 php_mb_rfc1867_basename);
1091
1092 return SUCCESS;
1093 }
1094 /* }}} */
1095
1096 /* {{{ PHP_MSHUTDOWN_FUNCTION(mbstring) */
PHP_MSHUTDOWN_FUNCTION(mbstring)1097 PHP_MSHUTDOWN_FUNCTION(mbstring)
1098 {
1099 UNREGISTER_INI_ENTRIES();
1100
1101 zend_multibyte_restore_functions();
1102
1103 #ifdef HAVE_MBREGEX
1104 PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1105 #endif
1106
1107 php_internal_encoding_changed = NULL;
1108
1109 return SUCCESS;
1110 }
1111 /* }}} */
1112
1113 /* {{{ PHP_RINIT_FUNCTION(mbstring) */
PHP_RINIT_FUNCTION(mbstring)1114 PHP_RINIT_FUNCTION(mbstring)
1115 {
1116 MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding);
1117 MBSTRG(current_http_output_encoding) = MBSTRG(http_output_encoding);
1118 MBSTRG(current_filter_illegal_mode) = MBSTRG(filter_illegal_mode);
1119 MBSTRG(current_filter_illegal_substchar) = MBSTRG(filter_illegal_substchar);
1120
1121 MBSTRG(illegalchars) = 0;
1122
1123 php_mb_populate_current_detect_order_list();
1124
1125 #ifdef HAVE_MBREGEX
1126 PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1127 #endif
1128 zend_multibyte_set_internal_encoding((const zend_encoding *)MBSTRG(internal_encoding));
1129
1130 return SUCCESS;
1131 }
1132 /* }}} */
1133
1134 /* {{{ PHP_RSHUTDOWN_FUNCTION(mbstring) */
PHP_RSHUTDOWN_FUNCTION(mbstring)1135 PHP_RSHUTDOWN_FUNCTION(mbstring)
1136 {
1137 if (MBSTRG(current_detect_order_list) != NULL) {
1138 efree(ZEND_VOIDP(MBSTRG(current_detect_order_list)));
1139 MBSTRG(current_detect_order_list) = NULL;
1140 MBSTRG(current_detect_order_list_size) = 0;
1141 }
1142 if (MBSTRG(outconv) != NULL) {
1143 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
1144 mbfl_buffer_converter_delete(MBSTRG(outconv));
1145 MBSTRG(outconv) = NULL;
1146 }
1147
1148 /* clear http input identification. */
1149 MBSTRG(http_input_identify) = NULL;
1150 MBSTRG(http_input_identify_post) = NULL;
1151 MBSTRG(http_input_identify_get) = NULL;
1152 MBSTRG(http_input_identify_cookie) = NULL;
1153 MBSTRG(http_input_identify_string) = NULL;
1154
1155 if (MBSTRG(last_used_encoding_name)) {
1156 zend_string_release(MBSTRG(last_used_encoding_name));
1157 MBSTRG(last_used_encoding_name) = NULL;
1158 }
1159
1160 MBSTRG(internal_encoding_set) = 0;
1161 MBSTRG(http_output_set) = 0;
1162 MBSTRG(http_input_set) = 0;
1163
1164 #ifdef HAVE_MBREGEX
1165 PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1166 #endif
1167
1168 return SUCCESS;
1169 }
1170 /* }}} */
1171
1172 /* {{{ PHP_MINFO_FUNCTION(mbstring) */
PHP_MINFO_FUNCTION(mbstring)1173 PHP_MINFO_FUNCTION(mbstring)
1174 {
1175 php_info_print_table_start();
1176 php_info_print_table_row(2, "Multibyte Support", "enabled");
1177 php_info_print_table_row(2, "Multibyte string engine", "libmbfl");
1178 php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled");
1179 {
1180 char tmp[256];
1181 snprintf(tmp, sizeof(tmp), "%d.%d.%d", MBFL_VERSION_MAJOR, MBFL_VERSION_MINOR, MBFL_VERSION_TEENY);
1182 php_info_print_table_row(2, "libmbfl version", tmp);
1183 }
1184 php_info_print_table_end();
1185
1186 php_info_print_table_start();
1187 php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1.");
1188 php_info_print_table_end();
1189
1190 #ifdef HAVE_MBREGEX
1191 PHP_MINFO(mb_regex)(ZEND_MODULE_INFO_FUNC_ARGS_PASSTHRU);
1192 #endif
1193
1194 DISPLAY_INI_ENTRIES();
1195 }
1196 /* }}} */
1197
1198 /* {{{ Sets the current language or Returns the current language as a string */
PHP_FUNCTION(mb_language)1199 PHP_FUNCTION(mb_language)
1200 {
1201 zend_string *name = NULL;
1202
1203 ZEND_PARSE_PARAMETERS_START(0, 1)
1204 Z_PARAM_OPTIONAL
1205 Z_PARAM_STR_OR_NULL(name)
1206 ZEND_PARSE_PARAMETERS_END();
1207
1208 if (name == NULL) {
1209 RETVAL_STRING((char *)mbfl_no_language2name(MBSTRG(language)));
1210 } else {
1211 zend_string *ini_name = zend_string_init("mbstring.language", sizeof("mbstring.language") - 1, 0);
1212 if (FAILURE == zend_alter_ini_entry(ini_name, name, PHP_INI_USER, PHP_INI_STAGE_RUNTIME)) {
1213 zend_argument_value_error(1, "must be a valid language, \"%s\" given", ZSTR_VAL(name));
1214 zend_string_release_ex(ini_name, 0);
1215 RETURN_THROWS();
1216 }
1217 // TODO Make return void
1218 RETVAL_TRUE;
1219 zend_string_release_ex(ini_name, 0);
1220 }
1221 }
1222 /* }}} */
1223
1224 /* {{{ Sets the current internal encoding or Returns the current internal encoding as a string */
PHP_FUNCTION(mb_internal_encoding)1225 PHP_FUNCTION(mb_internal_encoding)
1226 {
1227 char *name = NULL;
1228 size_t name_len;
1229 const mbfl_encoding *encoding;
1230
1231 ZEND_PARSE_PARAMETERS_START(0, 1)
1232 Z_PARAM_OPTIONAL
1233 Z_PARAM_STRING_OR_NULL(name, name_len)
1234 ZEND_PARSE_PARAMETERS_END();
1235
1236 if (name == NULL) {
1237 ZEND_ASSERT(MBSTRG(current_internal_encoding));
1238 RETURN_STRING(MBSTRG(current_internal_encoding)->name);
1239 } else {
1240 encoding = mbfl_name2encoding(name);
1241 if (!encoding) {
1242 zend_argument_value_error(1, "must be a valid encoding, \"%s\" given", name);
1243 RETURN_THROWS();
1244 } else {
1245 MBSTRG(current_internal_encoding) = encoding;
1246 MBSTRG(internal_encoding_set) = 1;
1247 /* TODO Return old encoding */
1248 RETURN_TRUE;
1249 }
1250 }
1251 }
1252 /* }}} */
1253
1254 /* {{{ Returns the input encoding */
PHP_FUNCTION(mb_http_input)1255 PHP_FUNCTION(mb_http_input)
1256 {
1257 char *type = NULL;
1258 size_t type_len = 0, n;
1259 const mbfl_encoding **entry;
1260 const mbfl_encoding *encoding;
1261
1262 ZEND_PARSE_PARAMETERS_START(0, 1)
1263 Z_PARAM_OPTIONAL
1264 Z_PARAM_STRING_OR_NULL(type, type_len)
1265 ZEND_PARSE_PARAMETERS_END();
1266
1267 if (type == NULL) {
1268 encoding = MBSTRG(http_input_identify);
1269 } else {
1270 switch (*type) {
1271 case 'G':
1272 case 'g':
1273 encoding = MBSTRG(http_input_identify_get);
1274 break;
1275 case 'P':
1276 case 'p':
1277 encoding = MBSTRG(http_input_identify_post);
1278 break;
1279 case 'C':
1280 case 'c':
1281 encoding = MBSTRG(http_input_identify_cookie);
1282 break;
1283 case 'S':
1284 case 's':
1285 encoding = MBSTRG(http_input_identify_string);
1286 break;
1287 case 'I':
1288 case 'i':
1289 entry = MBSTRG(http_input_list);
1290 n = MBSTRG(http_input_list_size);
1291 array_init(return_value);
1292 for (size_t i = 0; i < n; i++, entry++) {
1293 add_next_index_string(return_value, (*entry)->name);
1294 }
1295 return;
1296 case 'L':
1297 case 'l':
1298 entry = MBSTRG(http_input_list);
1299 n = MBSTRG(http_input_list_size);
1300 if (n == 0) {
1301 // TODO should return empty string?
1302 RETURN_FALSE;
1303 }
1304 // TODO Use smart_str instead.
1305 mbfl_string result;
1306 mbfl_memory_device device;
1307 mbfl_memory_device_init(&device, n * 12, 0);
1308 for (size_t i = 0; i < n; i++, entry++) {
1309 mbfl_memory_device_strcat(&device, (*entry)->name);
1310 mbfl_memory_device_output(',', &device);
1311 }
1312 mbfl_memory_device_unput(&device); /* Remove trailing comma */
1313 mbfl_memory_device_result(&device, &result);
1314 RETVAL_STRINGL((const char*)result.val, result.len);
1315 mbfl_string_clear(&result);
1316 return;
1317 default:
1318 zend_argument_value_error(1,
1319 "must be one of \"G\", \"P\", \"C\", \"S\", \"I\", or \"L\"");
1320 RETURN_THROWS();
1321 }
1322 }
1323
1324 if (encoding) {
1325 RETURN_STRING(encoding->name);
1326 } else {
1327 RETURN_FALSE;
1328 }
1329 }
1330 /* }}} */
1331
1332 /* {{{ Sets the current output_encoding or returns the current output_encoding as a string */
PHP_FUNCTION(mb_http_output)1333 PHP_FUNCTION(mb_http_output)
1334 {
1335 char *name = NULL;
1336 size_t name_len;
1337
1338 ZEND_PARSE_PARAMETERS_START(0, 1)
1339 Z_PARAM_OPTIONAL
1340 Z_PARAM_STRING_OR_NULL(name, name_len)
1341 ZEND_PARSE_PARAMETERS_END();
1342
1343 if (name == NULL) {
1344 ZEND_ASSERT(MBSTRG(current_http_output_encoding));
1345 RETURN_STRING(MBSTRG(current_http_output_encoding)->name);
1346 } else {
1347 const mbfl_encoding *encoding = php_mb_get_encoding_or_pass(name);
1348 if (!encoding) {
1349 zend_argument_value_error(1, "must be a valid encoding, \"%s\" given", name);
1350 RETURN_THROWS();
1351 } else {
1352 MBSTRG(http_output_set) = 1;
1353 MBSTRG(current_http_output_encoding) = encoding;
1354 /* TODO Return previous encoding? */
1355 RETURN_TRUE;
1356 }
1357 }
1358 }
1359 /* }}} */
1360
1361 /* {{{ Sets the current detect_order or Return the current detect_order as a array */
PHP_FUNCTION(mb_detect_order)1362 PHP_FUNCTION(mb_detect_order)
1363 {
1364 zend_string *order_str = NULL;
1365 HashTable *order_ht = NULL;
1366
1367 ZEND_PARSE_PARAMETERS_START(0, 1)
1368 Z_PARAM_OPTIONAL
1369 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(order_ht, order_str)
1370 ZEND_PARSE_PARAMETERS_END();
1371
1372 if (!order_str && !order_ht) {
1373 size_t n = MBSTRG(current_detect_order_list_size);
1374 const mbfl_encoding **entry = MBSTRG(current_detect_order_list);
1375 array_init(return_value);
1376 for (size_t i = 0; i < n; i++) {
1377 add_next_index_string(return_value, (*entry)->name);
1378 entry++;
1379 }
1380 } else {
1381 const mbfl_encoding **list;
1382 size_t size;
1383 if (order_ht) {
1384 if (FAILURE == php_mb_parse_encoding_array(order_ht, &list, &size, 1)) {
1385 RETURN_THROWS();
1386 }
1387 } else {
1388 if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(order_str), ZSTR_LEN(order_str), &list, &size, /* persistent */ 0, /* arg_num */ 1, /* allow_pass_encoding */ 0)) {
1389 RETURN_THROWS();
1390 }
1391 }
1392
1393 if (size == 0) {
1394 efree(ZEND_VOIDP(list));
1395 zend_argument_value_error(1, "must specify at least one encoding");
1396 RETURN_THROWS();
1397 }
1398
1399 if (MBSTRG(current_detect_order_list)) {
1400 efree(ZEND_VOIDP(MBSTRG(current_detect_order_list)));
1401 }
1402 MBSTRG(current_detect_order_list) = list;
1403 MBSTRG(current_detect_order_list_size) = size;
1404 RETURN_TRUE;
1405 }
1406 }
1407 /* }}} */
1408
php_mb_check_code_point(zend_long cp)1409 static inline int php_mb_check_code_point(zend_long cp)
1410 {
1411 if (cp < 0 || cp >= 0x110000) {
1412 /* Out of Unicode range */
1413 return 0;
1414 }
1415
1416 if (cp >= 0xd800 && cp <= 0xdfff) {
1417 /* Surrogate code-point. These are never valid on their own and we only allow a single
1418 * substitute character. */
1419 return 0;
1420 }
1421
1422 /* As we do not know the target encoding of the conversion operation that is going to
1423 * use the substitution character, we cannot check whether the codepoint is actually mapped
1424 * in the given encoding at this point. Thus we have to accept everything. */
1425 return 1;
1426 }
1427
1428 /* {{{ Sets the current substitute_character or returns the current substitute_character */
PHP_FUNCTION(mb_substitute_character)1429 PHP_FUNCTION(mb_substitute_character)
1430 {
1431 zend_string *substitute_character = NULL;
1432 zend_long substitute_codepoint;
1433 zend_bool substitute_is_null = 1;
1434
1435 ZEND_PARSE_PARAMETERS_START(0, 1)
1436 Z_PARAM_OPTIONAL
1437 Z_PARAM_STR_OR_LONG_OR_NULL(substitute_character, substitute_codepoint, substitute_is_null)
1438 ZEND_PARSE_PARAMETERS_END();
1439
1440 if (substitute_is_null) {
1441 if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
1442 RETURN_STRING("none");
1443 }
1444 if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
1445 RETURN_STRING("long");
1446 }
1447 if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
1448 RETURN_STRING("entity");
1449 }
1450 RETURN_LONG(MBSTRG(current_filter_illegal_substchar));
1451 }
1452
1453 if (substitute_character != NULL) {
1454 if (zend_string_equals_literal_ci(substitute_character, "none")) {
1455 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1456 RETURN_TRUE;
1457 }
1458 if (zend_string_equals_literal_ci(substitute_character, "long")) {
1459 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1460 RETURN_TRUE;
1461 }
1462 if (zend_string_equals_literal_ci(substitute_character, "entity")) {
1463 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1464 RETURN_TRUE;
1465 }
1466 /* Invalid string value */
1467 zend_argument_value_error(1, "must be \"none\", \"long\", \"entity\" or a valid codepoint");
1468 RETURN_THROWS();
1469 }
1470 /* Integer codepoint passed */
1471 if (!php_mb_check_code_point(substitute_codepoint)) {
1472 zend_argument_value_error(1, "is not a valid codepoint");
1473 RETURN_THROWS();
1474 }
1475
1476 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1477 MBSTRG(current_filter_illegal_substchar) = substitute_codepoint;
1478 RETURN_TRUE;
1479 }
1480 /* }}} */
1481
1482 /* {{{ Return the preferred MIME name (charset) as a string */
PHP_FUNCTION(mb_preferred_mime_name)1483 PHP_FUNCTION(mb_preferred_mime_name)
1484 {
1485 enum mbfl_no_encoding no_encoding;
1486 char *name = NULL;
1487 size_t name_len;
1488
1489 ZEND_PARSE_PARAMETERS_START(1, 1)
1490 Z_PARAM_STRING(name, name_len)
1491 ZEND_PARSE_PARAMETERS_END();
1492
1493 no_encoding = mbfl_name2no_encoding(name);
1494 if (no_encoding == mbfl_no_encoding_invalid) {
1495 zend_argument_value_error(1, "must be a valid encoding, \"%s\" given", name);
1496 RETURN_THROWS();
1497 }
1498
1499 const char *preferred_name = mbfl_no2preferred_mime_name(no_encoding);
1500 if (preferred_name == NULL || *preferred_name == '\0') {
1501 php_error_docref(NULL, E_WARNING, "No MIME preferred name corresponding to \"%s\"", name);
1502 RETVAL_FALSE;
1503 } else {
1504 RETVAL_STRING((char *)preferred_name);
1505 }
1506 }
1507 /* }}} */
1508
1509 #define IS_SJIS1(c) ((((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xf5)) ? 1 : 0)
1510 #define IS_SJIS2(c) ((((c)>=0x40 && (c)<=0x7e) || ((c)>=0x80 && (c)<=0xfc)) ? 1 : 0)
1511
1512 /* {{{ Parses GET/POST/COOKIE data and sets global variables */
PHP_FUNCTION(mb_parse_str)1513 PHP_FUNCTION(mb_parse_str)
1514 {
1515 zval *track_vars_array = NULL;
1516 char *encstr;
1517 size_t encstr_len;
1518 php_mb_encoding_handler_info_t info;
1519 const mbfl_encoding *detected;
1520
1521 ZEND_PARSE_PARAMETERS_START(2, 2)
1522 Z_PARAM_STRING(encstr, encstr_len)
1523 Z_PARAM_ZVAL(track_vars_array)
1524 ZEND_PARSE_PARAMETERS_END();
1525
1526 track_vars_array = zend_try_array_init(track_vars_array);
1527 if (!track_vars_array) {
1528 RETURN_THROWS();
1529 }
1530
1531 encstr = estrndup(encstr, encstr_len);
1532
1533 info.data_type = PARSE_STRING;
1534 info.separator = PG(arg_separator).input;
1535 info.report_errors = 1;
1536 info.to_encoding = MBSTRG(current_internal_encoding);
1537 info.to_language = MBSTRG(language);
1538 info.from_encodings = MBSTRG(http_input_list);
1539 info.num_from_encodings = MBSTRG(http_input_list_size);
1540 info.from_language = MBSTRG(language);
1541
1542 detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr);
1543
1544 MBSTRG(http_input_identify) = detected;
1545
1546 RETVAL_BOOL(detected);
1547
1548 if (encstr != NULL) efree(encstr);
1549 }
1550 /* }}} */
1551
1552 /* {{{ Returns string in output buffer converted to the http_output encoding */
PHP_FUNCTION(mb_output_handler)1553 PHP_FUNCTION(mb_output_handler)
1554 {
1555 char *arg_string;
1556 size_t arg_string_len;
1557 zend_long arg_status;
1558 mbfl_string string, result;
1559 const char *charset;
1560 char *p;
1561 const mbfl_encoding *encoding;
1562 int last_feed;
1563 size_t len;
1564 unsigned char send_text_mimetype = 0;
1565 char *s, *mimetype = NULL;
1566
1567 ZEND_PARSE_PARAMETERS_START(2, 2)
1568 Z_PARAM_STRING(arg_string, arg_string_len)
1569 Z_PARAM_LONG(arg_status)
1570 ZEND_PARSE_PARAMETERS_END();
1571
1572 encoding = MBSTRG(current_http_output_encoding);
1573
1574 /* start phase only */
1575 if ((arg_status & PHP_OUTPUT_HANDLER_START) != 0) {
1576 /* delete the converter just in case. */
1577 if (MBSTRG(outconv)) {
1578 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
1579 mbfl_buffer_converter_delete(MBSTRG(outconv));
1580 MBSTRG(outconv) = NULL;
1581 }
1582
1583 if (encoding == &mbfl_encoding_pass) {
1584 RETURN_STRINGL(arg_string, arg_string_len);
1585 }
1586
1587 /* analyze mime type */
1588 if (SG(sapi_headers).mimetype &&
1589 _php_mb_match_regex(
1590 MBSTRG(http_output_conv_mimetypes),
1591 SG(sapi_headers).mimetype,
1592 strlen(SG(sapi_headers).mimetype))) {
1593 if ((s = strchr(SG(sapi_headers).mimetype,';')) == NULL) {
1594 mimetype = estrdup(SG(sapi_headers).mimetype);
1595 } else {
1596 mimetype = estrndup(SG(sapi_headers).mimetype,s-SG(sapi_headers).mimetype);
1597 }
1598 send_text_mimetype = 1;
1599 } else if (SG(sapi_headers).send_default_content_type) {
1600 mimetype = SG(default_mimetype) ? SG(default_mimetype) : SAPI_DEFAULT_MIMETYPE;
1601 }
1602
1603 /* if content-type is not yet set, set it and activate the converter */
1604 if (SG(sapi_headers).send_default_content_type || send_text_mimetype) {
1605 charset = encoding->mime_name;
1606 if (charset) {
1607 len = spprintf( &p, 0, "Content-Type: %s; charset=%s", mimetype, charset );
1608 if (sapi_add_header(p, len, 0) != FAILURE) {
1609 SG(sapi_headers).send_default_content_type = 0;
1610 }
1611 }
1612 /* activate the converter */
1613 MBSTRG(outconv) = mbfl_buffer_converter_new(MBSTRG(current_internal_encoding), encoding, 0);
1614 if (send_text_mimetype){
1615 efree(mimetype);
1616 }
1617 }
1618 }
1619
1620 /* just return if the converter is not activated. */
1621 if (MBSTRG(outconv) == NULL) {
1622 RETURN_STRINGL(arg_string, arg_string_len);
1623 }
1624
1625 /* flag */
1626 last_feed = ((arg_status & PHP_OUTPUT_HANDLER_END) != 0);
1627 /* mode */
1628 mbfl_buffer_converter_illegal_mode(MBSTRG(outconv), MBSTRG(current_filter_illegal_mode));
1629 mbfl_buffer_converter_illegal_substchar(MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar));
1630
1631 /* feed the string */
1632 mbfl_string_init(&string);
1633 /* these are not needed. convd has encoding info.
1634 string.encoding = MBSTRG(current_internal_encoding);
1635 */
1636 string.val = (unsigned char *)arg_string;
1637 string.len = arg_string_len;
1638
1639 mbfl_buffer_converter_feed(MBSTRG(outconv), &string);
1640 if (last_feed) {
1641 mbfl_buffer_converter_flush(MBSTRG(outconv));
1642 }
1643 /* get the converter output, and return it */
1644 mbfl_buffer_converter_result(MBSTRG(outconv), &result);
1645
1646 // TODO: avoid reallocation ???
1647 RETVAL_STRINGL((char *)result.val, result.len); /* the string is already strdup()'ed */
1648 efree(result.val);
1649
1650 /* delete the converter if it is the last feed. */
1651 if (last_feed) {
1652 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
1653 mbfl_buffer_converter_delete(MBSTRG(outconv));
1654 MBSTRG(outconv) = NULL;
1655 }
1656 }
1657 /* }}} */
1658
1659 /* {{{ Convert a multibyte string to an array. If split_length is specified,
1660 break the string down into chunks each split_length characters long. */
1661
1662 /* structure to pass split params to the callback */
1663 struct mbfl_split_params {
1664 zval *return_value; /* php function return value structure pointer */
1665 mbfl_string *result_string; /* string to store result chunk */
1666 size_t mb_chunk_length; /* actual chunk length in chars */
1667 size_t split_length; /* split length in chars */
1668 mbfl_convert_filter *next_filter; /* widechar to encoding converter */
1669 };
1670
1671 /* callback function to fill split array */
mbfl_split_output(int c,void * data)1672 static int mbfl_split_output(int c, void *data)
1673 {
1674 struct mbfl_split_params *params = (struct mbfl_split_params *)data; /* cast passed data */
1675
1676 (*params->next_filter->filter_function)(c, params->next_filter); /* decoder filter */
1677
1678 if (params->split_length == ++params->mb_chunk_length) { /* if current chunk size reached defined chunk size or last char reached */
1679 mbfl_convert_filter_flush(params->next_filter);/* concatenate separate decoded chars to the solid string */
1680 mbfl_memory_device *device = (mbfl_memory_device *)params->next_filter->data; /* chars container */
1681 mbfl_string *chunk = params->result_string;
1682 mbfl_memory_device_result(device, chunk); /* make chunk */
1683 add_next_index_stringl(params->return_value, (const char *)chunk->val, chunk->len); /* add chunk to the array */
1684 efree(chunk->val);
1685 params->mb_chunk_length = 0; /* reset mb_chunk size */
1686 }
1687
1688 return 0;
1689 }
1690
PHP_FUNCTION(mb_str_split)1691 PHP_FUNCTION(mb_str_split)
1692 {
1693 zend_string *str, *encoding = NULL;
1694 size_t mb_len, chunks, chunk_len;
1695 const char *p, *last; /* pointer for the string cursor and last string char */
1696 mbfl_string string, result_string;
1697 const mbfl_encoding *mbfl_encoding;
1698 zend_long split_length = 1;
1699
1700 ZEND_PARSE_PARAMETERS_START(1, 3)
1701 Z_PARAM_STR(str)
1702 Z_PARAM_OPTIONAL
1703 Z_PARAM_LONG(split_length)
1704 Z_PARAM_STR_OR_NULL(encoding)
1705 ZEND_PARSE_PARAMETERS_END();
1706
1707 if (split_length <= 0) {
1708 zend_argument_value_error(2, "must be greater than 0");
1709 RETURN_THROWS();
1710 }
1711
1712 /* fill mbfl_string structure */
1713 string.val = (unsigned char *) ZSTR_VAL(str);
1714 string.len = ZSTR_LEN(str);
1715 string.encoding = php_mb_get_encoding(encoding, 3);
1716 if (!string.encoding) {
1717 RETURN_THROWS();
1718 }
1719
1720 p = ZSTR_VAL(str); /* string cursor pointer */
1721 last = ZSTR_VAL(str) + ZSTR_LEN(str); /* last string char pointer */
1722
1723 mbfl_encoding = string.encoding;
1724
1725 /* first scenario: 1,2,4-bytes fixed width encodings (head part) */
1726 if (mbfl_encoding->flag & MBFL_ENCTYPE_SBCS) { /* 1 byte */
1727 mb_len = string.len;
1728 chunk_len = (size_t)split_length; /* chunk length in bytes */
1729 } else if (mbfl_encoding->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) { /* 2 bytes */
1730 mb_len = string.len / 2;
1731 chunk_len = split_length * 2;
1732 } else if (mbfl_encoding->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) { /* 4 bytes */
1733 mb_len = string.len / 4;
1734 chunk_len = split_length * 4;
1735 } else if (mbfl_encoding->mblen_table != NULL) {
1736 /* second scenario: variable width encodings with length table */
1737 char unsigned const *mbtab = mbfl_encoding->mblen_table;
1738
1739 /* assume that we have 1-bytes characters */
1740 array_init_size(return_value, (string.len + split_length) / split_length); /* round up */
1741
1742 while (p < last) { /* split cycle work until the cursor has reached the last byte */
1743 char const *chunk_p = p; /* chunk first byte pointer */
1744 chunk_len = 0; /* chunk length in bytes */
1745 zend_long char_count;
1746
1747 for (char_count = 0; char_count < split_length && p < last; ++char_count) {
1748 char unsigned const m = mbtab[*(const unsigned char *)p]; /* single character length table */
1749 chunk_len += m;
1750 p += m;
1751 }
1752 if (p >= last) chunk_len -= p - last; /* check if chunk is in bounds */
1753 add_next_index_stringl(return_value, chunk_p, chunk_len);
1754 }
1755 return;
1756 } else {
1757 /* third scenario: other multibyte encodings */
1758 mbfl_convert_filter *filter, *decoder;
1759
1760 /* assume that we have 1-bytes characters */
1761 array_init_size(return_value, (string.len + split_length) / split_length); /* round up */
1762
1763 /* decoder filter to decode wchar to encoding */
1764 mbfl_memory_device device;
1765 mbfl_memory_device_init(&device, split_length + 1, 0);
1766
1767 decoder = mbfl_convert_filter_new(
1768 &mbfl_encoding_wchar,
1769 string.encoding,
1770 mbfl_memory_device_output,
1771 NULL,
1772 &device);
1773 /* assert that nothing is wrong with the decoder */
1774 ZEND_ASSERT(decoder != NULL);
1775
1776 /* wchar filter */
1777 mbfl_string_init(&result_string); /* mbfl_string to store chunk in the callback */
1778 struct mbfl_split_params params = { /* init callback function params structure */
1779 .return_value = return_value,
1780 .result_string = &result_string,
1781 .mb_chunk_length = 0,
1782 .split_length = (size_t)split_length,
1783 .next_filter = decoder,
1784 };
1785
1786 filter = mbfl_convert_filter_new(
1787 string.encoding,
1788 &mbfl_encoding_wchar,
1789 mbfl_split_output,
1790 NULL,
1791 ¶ms);
1792 /* assert that nothing is wrong with the filter */
1793 ZEND_ASSERT(filter != NULL);
1794
1795 while (p < last - 1) { /* cycle each byte except last with callback function */
1796 (*filter->filter_function)(*p++, filter);
1797 }
1798 params.mb_chunk_length = split_length - 1; /* force to finish current chunk */
1799 (*filter->filter_function)(*p++, filter); /* process last char */
1800
1801 mbfl_convert_filter_delete(decoder);
1802 mbfl_convert_filter_delete(filter);
1803 mbfl_memory_device_clear(&device);
1804 return;
1805 }
1806
1807 /* first scenario: 1,2,4-bytes fixed width encodings (tail part) */
1808 chunks = (mb_len + split_length - 1) / split_length; /* (round up idiom) */
1809 array_init_size(return_value, chunks);
1810 if (chunks != 0) {
1811 zend_long i;
1812
1813 for (i = 0; i < chunks - 1; p += chunk_len, ++i) {
1814 add_next_index_stringl(return_value, p, chunk_len);
1815 }
1816 add_next_index_stringl(return_value, p, last - p);
1817 }
1818 }
1819 /* }}} */
1820
1821 /* {{{ Get character numbers of a string */
PHP_FUNCTION(mb_strlen)1822 PHP_FUNCTION(mb_strlen)
1823 {
1824 mbfl_string string;
1825 char *str;
1826 zend_string *enc_name = NULL;
1827
1828 ZEND_PARSE_PARAMETERS_START(1, 2)
1829 Z_PARAM_STRING(str, string.len)
1830 Z_PARAM_OPTIONAL
1831 Z_PARAM_STR_OR_NULL(enc_name)
1832 ZEND_PARSE_PARAMETERS_END();
1833
1834 string.val = (unsigned char*)str;
1835 string.encoding = php_mb_get_encoding(enc_name, 2);
1836 if (!string.encoding) {
1837 RETURN_THROWS();
1838 }
1839
1840 size_t n = mbfl_strlen(&string);
1841 /* Only way this can fail is if the conversion creation fails
1842 * this would imply some sort of memory allocation failure which is a bug */
1843 ZEND_ASSERT(!mbfl_is_error(n));
1844 RETVAL_LONG(n);
1845 }
1846 /* }}} */
1847
handle_strpos_error(size_t error)1848 static void handle_strpos_error(size_t error) {
1849 switch (error) {
1850 case MBFL_ERROR_NOT_FOUND:
1851 break;
1852 case MBFL_ERROR_ENCODING:
1853 php_error_docref(NULL, E_WARNING, "Conversion error");
1854 break;
1855 case MBFL_ERROR_OFFSET:
1856 zend_argument_value_error(3, "must be contained in argument #1 ($haystack)");
1857 break;
1858 default:
1859 zend_value_error("mb_strpos(): Unknown error");
1860 break;
1861 }
1862 }
1863
1864 /* {{{ Find position of first occurrence of a string within another */
PHP_FUNCTION(mb_strpos)1865 PHP_FUNCTION(mb_strpos)
1866 {
1867 int reverse = 0;
1868 zend_long offset = 0;
1869 char *haystack_val, *needle_val;
1870 mbfl_string haystack, needle;
1871 zend_string *enc_name = NULL;
1872
1873 ZEND_PARSE_PARAMETERS_START(2, 4)
1874 Z_PARAM_STRING(haystack_val, haystack.len)
1875 Z_PARAM_STRING(needle_val, needle.len)
1876 Z_PARAM_OPTIONAL
1877 Z_PARAM_LONG(offset)
1878 Z_PARAM_STR_OR_NULL(enc_name)
1879 ZEND_PARSE_PARAMETERS_END();
1880
1881 haystack.val = (unsigned char*)haystack_val;
1882 needle.val = (unsigned char*)needle_val;
1883
1884 haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name, 4);
1885 if (!haystack.encoding) {
1886 RETURN_THROWS();
1887 }
1888
1889 size_t n = mbfl_strpos(&haystack, &needle, offset, reverse);
1890 if (!mbfl_is_error(n)) {
1891 RETVAL_LONG(n);
1892 } else {
1893 handle_strpos_error(n);
1894 RETVAL_FALSE;
1895 }
1896 }
1897 /* }}} */
1898
1899 /* {{{ Find position of last occurrence of a string within another */
PHP_FUNCTION(mb_strrpos)1900 PHP_FUNCTION(mb_strrpos)
1901 {
1902 mbfl_string haystack, needle;
1903 char *haystack_val, *needle_val;
1904 zend_string *enc_name = NULL;
1905 zend_long offset = 0;
1906
1907 ZEND_PARSE_PARAMETERS_START(2, 4)
1908 Z_PARAM_STRING(haystack_val, haystack.len)
1909 Z_PARAM_STRING(needle_val, needle.len)
1910 Z_PARAM_OPTIONAL
1911 Z_PARAM_LONG(offset)
1912 Z_PARAM_STR_OR_NULL(enc_name)
1913 ZEND_PARSE_PARAMETERS_END();
1914
1915 haystack.val = (unsigned char*)haystack_val;
1916 needle.val = (unsigned char*)needle_val;
1917
1918 haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name, 4);
1919 if (!haystack.encoding) {
1920 RETURN_THROWS();
1921 }
1922
1923 size_t n = mbfl_strpos(&haystack, &needle, offset, 1);
1924 if (!mbfl_is_error(n)) {
1925 RETVAL_LONG(n);
1926 } else {
1927 handle_strpos_error(n);
1928 RETVAL_FALSE;
1929 }
1930 }
1931 /* }}} */
1932
1933 /* {{{ Finds position of first occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_stripos)1934 PHP_FUNCTION(mb_stripos)
1935 {
1936 zend_long offset = 0;
1937 mbfl_string haystack, needle;
1938 char *haystack_val, *needle_val;
1939 zend_string *from_encoding = NULL;
1940
1941 ZEND_PARSE_PARAMETERS_START(2, 4)
1942 Z_PARAM_STRING(haystack_val, haystack.len)
1943 Z_PARAM_STRING(needle_val, needle.len)
1944 Z_PARAM_OPTIONAL
1945 Z_PARAM_LONG(offset)
1946 Z_PARAM_STR_OR_NULL(from_encoding)
1947 ZEND_PARSE_PARAMETERS_END();
1948
1949 haystack.val = (unsigned char*)haystack_val;
1950 needle.val = (unsigned char*)needle_val;
1951
1952 const mbfl_encoding *enc = php_mb_get_encoding(from_encoding, 4);
1953 if (!enc) {
1954 RETURN_THROWS();
1955 }
1956
1957 size_t n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, enc);
1958
1959 if (!mbfl_is_error(n)) {
1960 RETVAL_LONG(n);
1961 } else {
1962 handle_strpos_error(n);
1963 RETVAL_FALSE;
1964 }
1965 }
1966 /* }}} */
1967
1968 /* {{{ Finds position of last occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_strripos)1969 PHP_FUNCTION(mb_strripos)
1970 {
1971 zend_long offset = 0;
1972 mbfl_string haystack, needle;
1973 char *haystack_val, *needle_val;
1974 zend_string *from_encoding = NULL;
1975
1976 ZEND_PARSE_PARAMETERS_START(2, 4)
1977 Z_PARAM_STRING(haystack_val, haystack.len)
1978 Z_PARAM_STRING(needle_val, needle.len)
1979 Z_PARAM_OPTIONAL
1980 Z_PARAM_LONG(offset)
1981 Z_PARAM_STR_OR_NULL(from_encoding)
1982 ZEND_PARSE_PARAMETERS_END();
1983
1984 haystack.val = (unsigned char*)haystack_val;
1985 needle.val = (unsigned char*)needle_val;
1986
1987 const mbfl_encoding *enc = php_mb_get_encoding(from_encoding, 4);
1988 if (!enc) {
1989 RETURN_THROWS();
1990 }
1991
1992 size_t n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, enc);
1993
1994 if (!mbfl_is_error(n)) {
1995 RETVAL_LONG(n);
1996 } else {
1997 handle_strpos_error(n);
1998 RETVAL_FALSE;
1999 }
2000 }
2001 /* }}} */
2002
2003 #define MB_STRSTR 1
2004 #define MB_STRRCHR 2
2005 #define MB_STRISTR 3
2006 #define MB_STRRICHR 4
2007 /* {{{ php_mb_strstr_variants */
php_mb_strstr_variants(INTERNAL_FUNCTION_PARAMETERS,unsigned int variant)2008 static void php_mb_strstr_variants(INTERNAL_FUNCTION_PARAMETERS, unsigned int variant)
2009 {
2010 int reverse_mode = 0;
2011 size_t n;
2012 char *haystack_val, *needle_val;
2013 mbfl_string haystack, needle, result, *ret = NULL;
2014 zend_string *encoding_name = NULL;
2015 zend_bool part = 0;
2016
2017 ZEND_PARSE_PARAMETERS_START(2, 4)
2018 Z_PARAM_STRING(haystack_val, haystack.len)
2019 Z_PARAM_STRING(needle_val, needle.len)
2020 Z_PARAM_OPTIONAL
2021 Z_PARAM_BOOL(part)
2022 Z_PARAM_STR_OR_NULL(encoding_name)
2023 ZEND_PARSE_PARAMETERS_END();
2024
2025 haystack.val = (unsigned char*)haystack_val;
2026 needle.val = (unsigned char*)needle_val;
2027 haystack.encoding = needle.encoding = php_mb_get_encoding(encoding_name, 4);
2028 if (!haystack.encoding) {
2029 RETURN_THROWS();
2030 }
2031
2032 if (variant == MB_STRRCHR || variant == MB_STRRICHR) { reverse_mode = 1; }
2033
2034 if (variant == MB_STRISTR || variant == MB_STRRICHR) {
2035 n = php_mb_stripos(reverse_mode, (char *)haystack.val, haystack.len, (char *)needle.val,
2036 needle.len, 0, needle.encoding);
2037 } else {
2038 n = mbfl_strpos(&haystack, &needle, 0, reverse_mode);
2039 }
2040
2041 if (!mbfl_is_error(n)) {
2042 if (part) {
2043 ret = mbfl_substr(&haystack, &result, 0, n);
2044 ZEND_ASSERT(ret != NULL);
2045 // TODO: avoid reallocation ???
2046 RETVAL_STRINGL((char *)ret->val, ret->len);
2047 efree(ret->val);
2048 } else {
2049 ret = mbfl_substr(&haystack, &result, n, MBFL_SUBSTR_UNTIL_END);
2050 ZEND_ASSERT(ret != NULL);
2051 // TODO: avoid reallocation ???
2052 RETVAL_STRINGL((char *)ret->val, ret->len);
2053 efree(ret->val);
2054 }
2055 } else {
2056 // FIXME use handle_strpos_error(n)
2057 RETVAL_FALSE;
2058 }
2059 }
2060
2061 /* {{{ Finds first occurrence of a string within another */
PHP_FUNCTION(mb_strstr)2062 PHP_FUNCTION(mb_strstr)
2063 {
2064 php_mb_strstr_variants(INTERNAL_FUNCTION_PARAM_PASSTHRU, MB_STRSTR);
2065 }
2066 /* }}} */
2067
2068 /* {{{ Finds the last occurrence of a character in a string within another */
PHP_FUNCTION(mb_strrchr)2069 PHP_FUNCTION(mb_strrchr)
2070 {
2071 php_mb_strstr_variants(INTERNAL_FUNCTION_PARAM_PASSTHRU, MB_STRRCHR);
2072 }
2073 /* }}} */
2074
2075 /* {{{ Finds first occurrence of a string within another, case insensitive */
PHP_FUNCTION(mb_stristr)2076 PHP_FUNCTION(mb_stristr)
2077 {
2078 php_mb_strstr_variants(INTERNAL_FUNCTION_PARAM_PASSTHRU, MB_STRISTR);
2079 }
2080 /* }}} */
2081
2082 /* {{{ Finds the last occurrence of a character in a string within another, case insensitive */
PHP_FUNCTION(mb_strrichr)2083 PHP_FUNCTION(mb_strrichr)
2084 {
2085 php_mb_strstr_variants(INTERNAL_FUNCTION_PARAM_PASSTHRU, MB_STRRICHR);
2086 }
2087 /* }}} */
2088
2089 #undef MB_STRSTR
2090 #undef MB_STRRCHR
2091 #undef MB_STRISTR
2092 #undef MB_STRRICHR
2093
2094 /* {{{ Count the number of substring occurrences */
PHP_FUNCTION(mb_substr_count)2095 PHP_FUNCTION(mb_substr_count)
2096 {
2097 mbfl_string haystack, needle;
2098 char *haystack_val, *needle_val;
2099 zend_string *enc_name = NULL;
2100
2101 ZEND_PARSE_PARAMETERS_START(2, 3)
2102 Z_PARAM_STRING(haystack_val, haystack.len)
2103 Z_PARAM_STRING(needle_val, needle.len)
2104 Z_PARAM_OPTIONAL
2105 Z_PARAM_STR_OR_NULL(enc_name)
2106 ZEND_PARSE_PARAMETERS_END();
2107
2108 haystack.val = (unsigned char*)haystack_val;
2109 needle.val = (unsigned char*)needle_val;
2110
2111 if (needle.len == 0) {
2112 zend_argument_value_error(2, "must not be empty");
2113 RETURN_THROWS();
2114 }
2115
2116 haystack.encoding = needle.encoding = php_mb_get_encoding(enc_name, 3);
2117 if (!haystack.encoding) {
2118 RETURN_THROWS();
2119 }
2120
2121 size_t n = mbfl_substr_count(&haystack, &needle);
2122 /* An error can only occur if needle is empty,
2123 * an encoding error happens (which should not happen at this stage and is a bug)
2124 * or the haystack is more than sizeof(size_t) bytes
2125 * If one of these things occur this is a bug and should be flagged as such */
2126 ZEND_ASSERT(!mbfl_is_error(n));
2127 RETVAL_LONG(n);
2128 }
2129 /* }}} */
2130
2131 /* {{{ Returns part of a string */
PHP_FUNCTION(mb_substr)2132 PHP_FUNCTION(mb_substr)
2133 {
2134 char *str;
2135 zend_string *encoding = NULL;
2136 zend_long from, len;
2137 size_t real_from, real_len;
2138 size_t str_len;
2139 zend_bool len_is_null = 1;
2140 mbfl_string string, result, *ret;
2141
2142 ZEND_PARSE_PARAMETERS_START(2, 4)
2143 Z_PARAM_STRING(str, str_len)
2144 Z_PARAM_LONG(from)
2145 Z_PARAM_OPTIONAL
2146 Z_PARAM_LONG_OR_NULL(len, len_is_null)
2147 Z_PARAM_STR_OR_NULL(encoding)
2148 ZEND_PARSE_PARAMETERS_END();
2149
2150 string.encoding = php_mb_get_encoding(encoding, 4);
2151 if (!string.encoding) {
2152 RETURN_THROWS();
2153 }
2154
2155 string.val = (unsigned char *)str;
2156 string.len = str_len;
2157
2158 /* measures length */
2159 size_t mblen = 0;
2160 if (from < 0 || (!len_is_null && len < 0)) {
2161 mblen = mbfl_strlen(&string);
2162 }
2163
2164 /* if "from" position is negative, count start position from the end
2165 * of the string
2166 */
2167 if (from >= 0) {
2168 real_from = (size_t) from;
2169 } else if (-from < mblen) {
2170 real_from = mblen + from;
2171 } else {
2172 real_from = 0;
2173 }
2174
2175 /* if "length" position is negative, set it to the length
2176 * needed to stop that many chars from the end of the string
2177 */
2178 if (len_is_null) {
2179 real_len = MBFL_SUBSTR_UNTIL_END;
2180 } else if (len >= 0) {
2181 real_len = (size_t) len;
2182 } else if (real_from < mblen && -len < mblen - real_from) {
2183 real_len = (mblen - real_from) + len;
2184 } else {
2185 real_len = 0;
2186 }
2187
2188 ret = mbfl_substr(&string, &result, real_from, real_len);
2189 ZEND_ASSERT(ret != NULL);
2190
2191 // TODO: avoid reallocation ???
2192 RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
2193 efree(ret->val);
2194 }
2195 /* }}} */
2196
2197 /* {{{ Returns part of a string */
PHP_FUNCTION(mb_strcut)2198 PHP_FUNCTION(mb_strcut)
2199 {
2200 zend_string *encoding = NULL;
2201 char *string_val;
2202 zend_long from, len;
2203 zend_bool len_is_null = 1;
2204 mbfl_string string, result, *ret;
2205
2206 ZEND_PARSE_PARAMETERS_START(2, 4)
2207 Z_PARAM_STRING(string_val, string.len)
2208 Z_PARAM_LONG(from)
2209 Z_PARAM_OPTIONAL
2210 Z_PARAM_LONG_OR_NULL(len, len_is_null)
2211 Z_PARAM_STR_OR_NULL(encoding)
2212 ZEND_PARSE_PARAMETERS_END();
2213
2214 string.val = (unsigned char*)string_val;
2215 string.encoding = php_mb_get_encoding(encoding, 4);
2216 if (!string.encoding) {
2217 RETURN_THROWS();
2218 }
2219
2220 if (len_is_null) {
2221 len = string.len;
2222 }
2223
2224 /* if "from" position is negative, count start position from the end
2225 * of the string
2226 */
2227 if (from < 0) {
2228 from = string.len + from;
2229 if (from < 0) {
2230 from = 0;
2231 }
2232 }
2233
2234 /* if "length" position is negative, set it to the length
2235 * needed to stop that many chars from the end of the string
2236 */
2237 if (len < 0) {
2238 len = (string.len - from) + len;
2239 if (len < 0) {
2240 len = 0;
2241 }
2242 }
2243
2244 if (from > string.len) {
2245 RETURN_EMPTY_STRING();
2246 }
2247
2248 ret = mbfl_strcut(&string, &result, from, len);
2249 ZEND_ASSERT(ret != NULL);
2250
2251 // TODO: avoid reallocation ???
2252 RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
2253 efree(ret->val);
2254 }
2255 /* }}} */
2256
2257 /* {{{ Gets terminal width of a string */
PHP_FUNCTION(mb_strwidth)2258 PHP_FUNCTION(mb_strwidth)
2259 {
2260 char *string_val;
2261 mbfl_string string;
2262 zend_string *enc_name = NULL;
2263
2264 ZEND_PARSE_PARAMETERS_START(1, 2)
2265 Z_PARAM_STRING(string_val, string.len)
2266 Z_PARAM_OPTIONAL
2267 Z_PARAM_STR_OR_NULL(enc_name)
2268 ZEND_PARSE_PARAMETERS_END();
2269
2270 string.val = (unsigned char*)string_val;
2271 string.encoding = php_mb_get_encoding(enc_name, 2);
2272 if (!string.encoding) {
2273 RETURN_THROWS();
2274 }
2275
2276 size_t n = mbfl_strwidth(&string);
2277 ZEND_ASSERT(n != (size_t) -1);
2278 RETVAL_LONG(n);
2279 }
2280 /* }}} */
2281
2282 /* {{{ Trim the string in terminal width */
PHP_FUNCTION(mb_strimwidth)2283 PHP_FUNCTION(mb_strimwidth)
2284 {
2285 char *str, *trimmarker = NULL;
2286 zend_string *encoding = NULL;
2287 zend_long from, width, swidth = 0;
2288 size_t str_len, trimmarker_len;
2289 mbfl_string string, result, marker, *ret;
2290
2291 ZEND_PARSE_PARAMETERS_START(3, 5)
2292 Z_PARAM_STRING(str, str_len)
2293 Z_PARAM_LONG(from)
2294 Z_PARAM_LONG(width)
2295 Z_PARAM_OPTIONAL
2296 Z_PARAM_STRING(trimmarker, trimmarker_len)
2297 Z_PARAM_STR_OR_NULL(encoding)
2298 ZEND_PARSE_PARAMETERS_END();
2299
2300 string.encoding = marker.encoding = php_mb_get_encoding(encoding, 5);
2301 if (!string.encoding) {
2302 RETURN_THROWS();
2303 }
2304
2305 string.val = (unsigned char *)str;
2306 string.len = str_len;
2307 marker.val = NULL;
2308 marker.len = 0;
2309
2310 if ((from < 0) || (width < 0)) {
2311 swidth = mbfl_strwidth(&string);
2312 }
2313
2314 if (from < 0) {
2315 from += swidth;
2316 }
2317
2318 if (from < 0 || (size_t)from > str_len) {
2319 zend_argument_value_error(2, "is out of range");
2320 RETURN_THROWS();
2321 }
2322
2323 if (width < 0) {
2324 width = swidth + width - from;
2325 }
2326
2327 if (width < 0) {
2328 zend_argument_value_error(3, "is out of range");
2329 RETURN_THROWS();
2330 }
2331
2332 if (trimmarker) {
2333 marker.val = (unsigned char *)trimmarker;
2334 marker.len = trimmarker_len;
2335 }
2336
2337 ret = mbfl_strimwidth(&string, &marker, &result, from, width);
2338 ZEND_ASSERT(ret != NULL);
2339 // TODO: avoid reallocation ???
2340 RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
2341 efree(ret->val);
2342 }
2343 /* }}} */
2344
2345
2346 /* See mbfl_no_encoding definition for list of unsupported encodings */
php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc)2347 static inline zend_bool php_mb_is_unsupported_no_encoding(enum mbfl_no_encoding no_enc)
2348 {
2349 return ((no_enc >= mbfl_no_encoding_invalid && no_enc <= mbfl_no_encoding_qprint)
2350 || (no_enc >= mbfl_no_encoding_utf7 && no_enc <= mbfl_no_encoding_utf7imap)
2351 || (no_enc >= mbfl_no_encoding_jis && no_enc <= mbfl_no_encoding_2022jpms)
2352 || (no_enc >= mbfl_no_encoding_cp50220 && no_enc <= mbfl_no_encoding_cp50222));
2353 }
2354
2355
2356 /* See mbfl_no_encoding definition for list of UTF-8 encodings */
php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc)2357 static inline zend_bool php_mb_is_no_encoding_utf8(enum mbfl_no_encoding no_enc)
2358 {
2359 return (no_enc >= mbfl_no_encoding_utf8 && no_enc <= mbfl_no_encoding_utf8_sb);
2360 }
2361
php_mb_convert_encoding_ex(const char * input,size_t length,const mbfl_encoding * to_encoding,const mbfl_encoding * from_encoding,size_t * output_len)2362 MBSTRING_API char *php_mb_convert_encoding_ex(const char *input, size_t length, const mbfl_encoding *to_encoding, const mbfl_encoding *from_encoding, size_t *output_len)
2363 {
2364 mbfl_string string, result, *ret;
2365 mbfl_buffer_converter *convd;
2366 char *output = NULL;
2367
2368 if (output_len) {
2369 *output_len = 0;
2370 }
2371
2372 /* initialize string */
2373 string.encoding = from_encoding;
2374 string.val = (unsigned char *)input;
2375 string.len = length;
2376
2377 /* initialize converter */
2378 convd = mbfl_buffer_converter_new(from_encoding, to_encoding, string.len);
2379 /* If this assertion fails this means some memory allocation failure which is a bug */
2380 ZEND_ASSERT(convd != NULL);
2381
2382 mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
2383 mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
2384
2385 /* do it */
2386 mbfl_string_init(&result);
2387 ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
2388 if (ret) {
2389 if (output_len) {
2390 *output_len = ret->len;
2391 }
2392 output = (char *)ret->val;
2393 }
2394
2395 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
2396 mbfl_buffer_converter_delete(convd);
2397 return output;
2398 }
2399 /* }}} */
2400
2401 /* {{{ MBSTRING_API char *php_mb_convert_encoding() */
php_mb_convert_encoding(const char * input,size_t length,const mbfl_encoding * to_encoding,const mbfl_encoding ** from_encodings,size_t num_from_encodings,size_t * output_len)2402 MBSTRING_API char *php_mb_convert_encoding(const char *input, size_t length, const mbfl_encoding *to_encoding, const mbfl_encoding **from_encodings, size_t num_from_encodings, size_t *output_len)
2403 {
2404 const mbfl_encoding *from_encoding;
2405
2406 if (output_len) {
2407 *output_len = 0;
2408 }
2409
2410 /* pre-conversion encoding */
2411 ZEND_ASSERT(num_from_encodings >= 1);
2412 if (num_from_encodings == 1) {
2413 from_encoding = *from_encodings;
2414 } else {
2415 /* auto detect */
2416 mbfl_string string;
2417 mbfl_string_init(&string);
2418 string.val = (unsigned char *)input;
2419 string.len = length;
2420 from_encoding = mbfl_identify_encoding(
2421 &string, from_encodings, num_from_encodings, MBSTRG(strict_detection));
2422 if (!from_encoding) {
2423 php_error_docref(NULL, E_WARNING, "Unable to detect character encoding");
2424 return NULL;
2425 }
2426 }
2427
2428 return php_mb_convert_encoding_ex(input, length, to_encoding, from_encoding, output_len);
2429 }
2430 /* }}} */
2431
php_mb_convert_encoding_recursive(HashTable * input,const mbfl_encoding * to_encoding,const mbfl_encoding ** from_encodings,size_t num_from_encodings)2432 MBSTRING_API HashTable *php_mb_convert_encoding_recursive(HashTable *input, const mbfl_encoding *to_encoding, const mbfl_encoding **from_encodings, size_t num_from_encodings)
2433 {
2434 HashTable *output, *chash;
2435 zend_long idx;
2436 zend_string *key;
2437 zval *entry, entry_tmp;
2438 size_t ckey_len, cval_len;
2439 char *ckey, *cval;
2440
2441 if (!input) {
2442 return NULL;
2443 }
2444
2445 if (GC_IS_RECURSIVE(input)) {
2446 GC_UNPROTECT_RECURSION(input);
2447 php_error_docref(NULL, E_WARNING, "Cannot convert recursively referenced values");
2448 return NULL;
2449 }
2450 GC_TRY_PROTECT_RECURSION(input);
2451 output = zend_new_array(zend_hash_num_elements(input));
2452 ZEND_HASH_FOREACH_KEY_VAL(input, idx, key, entry) {
2453 /* convert key */
2454 if (key) {
2455 ckey = php_mb_convert_encoding(
2456 ZSTR_VAL(key), ZSTR_LEN(key),
2457 to_encoding, from_encodings, num_from_encodings, &ckey_len);
2458 key = zend_string_init(ckey, ckey_len, 0);
2459 efree(ckey);
2460 }
2461 /* convert value */
2462 ZEND_ASSERT(entry);
2463 try_again:
2464 switch(Z_TYPE_P(entry)) {
2465 case IS_STRING:
2466 cval = php_mb_convert_encoding(
2467 Z_STRVAL_P(entry), Z_STRLEN_P(entry),
2468 to_encoding, from_encodings, num_from_encodings, &cval_len);
2469 ZVAL_STRINGL(&entry_tmp, cval, cval_len);
2470 efree(cval);
2471 break;
2472 case IS_NULL:
2473 case IS_TRUE:
2474 case IS_FALSE:
2475 case IS_LONG:
2476 case IS_DOUBLE:
2477 ZVAL_COPY(&entry_tmp, entry);
2478 break;
2479 case IS_ARRAY:
2480 chash = php_mb_convert_encoding_recursive(
2481 Z_ARRVAL_P(entry), to_encoding, from_encodings, num_from_encodings);
2482 if (chash) {
2483 ZVAL_ARR(&entry_tmp, chash);
2484 } else {
2485 ZVAL_EMPTY_ARRAY(&entry_tmp);
2486 }
2487 break;
2488 case IS_REFERENCE:
2489 entry = Z_REFVAL_P(entry);
2490 goto try_again;
2491 case IS_OBJECT:
2492 default:
2493 if (key) {
2494 zend_string_release(key);
2495 }
2496 php_error_docref(NULL, E_WARNING, "Object is not supported");
2497 continue;
2498 }
2499 if (key) {
2500 zend_hash_add(output, key, &entry_tmp);
2501 zend_string_release(key);
2502 } else {
2503 zend_hash_index_add(output, idx, &entry_tmp);
2504 }
2505 } ZEND_HASH_FOREACH_END();
2506 GC_TRY_UNPROTECT_RECURSION(input);
2507
2508 return output;
2509 }
2510 /* }}} */
2511
2512 /* {{{ Returns converted string in desired encoding */
PHP_FUNCTION(mb_convert_encoding)2513 PHP_FUNCTION(mb_convert_encoding)
2514 {
2515 zend_string *to_encoding_name;
2516 zend_string *input_str, *from_encodings_str = NULL;
2517 HashTable *input_ht, *from_encodings_ht = NULL;
2518 const mbfl_encoding **from_encodings;
2519 size_t num_from_encodings;
2520 zend_bool free_from_encodings;
2521
2522 ZEND_PARSE_PARAMETERS_START(2, 3)
2523 Z_PARAM_ARRAY_HT_OR_STR(input_ht, input_str)
2524 Z_PARAM_STR(to_encoding_name)
2525 Z_PARAM_OPTIONAL
2526 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(from_encodings_ht, from_encodings_str)
2527 ZEND_PARSE_PARAMETERS_END();
2528
2529 const mbfl_encoding *to_encoding = php_mb_get_encoding(to_encoding_name, 2);
2530 if (!to_encoding) {
2531 RETURN_THROWS();
2532 }
2533
2534 if (from_encodings_ht) {
2535 if (php_mb_parse_encoding_array(from_encodings_ht, &from_encodings, &num_from_encodings, 3) == FAILURE) {
2536 RETURN_THROWS();
2537 }
2538 free_from_encodings = 1;
2539 } else if (from_encodings_str) {
2540 if (php_mb_parse_encoding_list(ZSTR_VAL(from_encodings_str), ZSTR_LEN(from_encodings_str),
2541 &from_encodings, &num_from_encodings,
2542 /* persistent */ 0, /* arg_num */ 3, /* allow_pass_encoding */ 0) == FAILURE) {
2543 RETURN_THROWS();
2544 }
2545 free_from_encodings = 1;
2546 } else {
2547 from_encodings = &MBSTRG(current_internal_encoding);
2548 num_from_encodings = 1;
2549 free_from_encodings = 0;
2550 }
2551
2552 if (!num_from_encodings) {
2553 efree(ZEND_VOIDP(from_encodings));
2554 zend_argument_value_error(3, "must specify at least one encoding");
2555 RETURN_THROWS();
2556 }
2557
2558 if (input_str) {
2559 /* new encoding */
2560 size_t size;
2561 char *ret = php_mb_convert_encoding(ZSTR_VAL(input_str), ZSTR_LEN(input_str),
2562 to_encoding, from_encodings, num_from_encodings, &size);
2563 if (ret != NULL) {
2564 // TODO: avoid reallocation ???
2565 RETVAL_STRINGL(ret, size); /* the string is already strdup()'ed */
2566 efree(ret);
2567 } else {
2568 RETVAL_FALSE;
2569 }
2570 } else {
2571 HashTable *tmp;
2572 tmp = php_mb_convert_encoding_recursive(
2573 input_ht, to_encoding, from_encodings, num_from_encodings);
2574 RETVAL_ARR(tmp);
2575 }
2576
2577 if (free_from_encodings) {
2578 efree(ZEND_VOIDP(from_encodings));
2579 }
2580 }
2581 /* }}} */
2582
mbstring_convert_case(int case_mode,const char * str,size_t str_len,size_t * ret_len,const mbfl_encoding * enc)2583 static char *mbstring_convert_case(
2584 int case_mode, const char *str, size_t str_len, size_t *ret_len,
2585 const mbfl_encoding *enc) {
2586 return php_unicode_convert_case(
2587 case_mode, str, str_len, ret_len, enc,
2588 MBSTRG(current_filter_illegal_mode), MBSTRG(current_filter_illegal_substchar));
2589 }
2590
2591 /* {{{ Returns a case-folded version of source_string */
PHP_FUNCTION(mb_convert_case)2592 PHP_FUNCTION(mb_convert_case)
2593 {
2594 zend_string *from_encoding = NULL;
2595 char *str;
2596 size_t str_len, ret_len;
2597 zend_long case_mode = 0;
2598
2599 ZEND_PARSE_PARAMETERS_START(2, 3)
2600 Z_PARAM_STRING(str, str_len)
2601 Z_PARAM_LONG(case_mode)
2602 Z_PARAM_OPTIONAL
2603 Z_PARAM_STR_OR_NULL(from_encoding)
2604 ZEND_PARSE_PARAMETERS_END();
2605
2606 const mbfl_encoding *enc = php_mb_get_encoding(from_encoding, 3);
2607 if (!enc) {
2608 RETURN_THROWS();
2609 }
2610
2611 if (case_mode < 0 || case_mode > PHP_UNICODE_CASE_MODE_MAX) {
2612 zend_argument_value_error(2, "must be one of the MB_CASE_* constants");
2613 RETURN_THROWS();
2614 }
2615
2616 char *newstr = mbstring_convert_case(case_mode, str, str_len, &ret_len, enc);
2617 /* If newstr is NULL something went wrong in mbfl and this is a bug */
2618 ZEND_ASSERT(newstr != NULL);
2619
2620 // TODO: avoid reallocation ???
2621 RETVAL_STRINGL(newstr, ret_len);
2622 efree(newstr);
2623 }
2624 /* }}} */
2625
2626 /* {{{ Returns a upper cased version of source_string */
PHP_FUNCTION(mb_strtoupper)2627 PHP_FUNCTION(mb_strtoupper)
2628 {
2629 zend_string *from_encoding = NULL;
2630 char *str;
2631 size_t str_len, ret_len;
2632
2633 ZEND_PARSE_PARAMETERS_START(1, 2)
2634 Z_PARAM_STRING(str, str_len)
2635 Z_PARAM_OPTIONAL
2636 Z_PARAM_STR_OR_NULL(from_encoding)
2637 ZEND_PARSE_PARAMETERS_END();
2638
2639 const mbfl_encoding *enc = php_mb_get_encoding(from_encoding, 2);
2640 if (!enc) {
2641 RETURN_THROWS();
2642 }
2643
2644 char *newstr = mbstring_convert_case(PHP_UNICODE_CASE_UPPER, str, str_len, &ret_len, enc);
2645 /* If newstr is NULL something went wrong in mbfl and this is a bug */
2646 ZEND_ASSERT(newstr != NULL);
2647
2648 // TODO: avoid reallocation ???
2649 RETVAL_STRINGL(newstr, ret_len);
2650 efree(newstr);
2651 }
2652 /* }}} */
2653
2654 /* {{{ Returns a lower cased version of source_string */
PHP_FUNCTION(mb_strtolower)2655 PHP_FUNCTION(mb_strtolower)
2656 {
2657 zend_string *from_encoding = NULL;
2658 char *str;
2659 size_t str_len;
2660 char *newstr;
2661 size_t ret_len;
2662 const mbfl_encoding *enc;
2663
2664 ZEND_PARSE_PARAMETERS_START(1, 2)
2665 Z_PARAM_STRING(str, str_len)
2666 Z_PARAM_OPTIONAL
2667 Z_PARAM_STR_OR_NULL(from_encoding)
2668 ZEND_PARSE_PARAMETERS_END();
2669
2670 enc = php_mb_get_encoding(from_encoding, 2);
2671 if (!enc) {
2672 RETURN_THROWS();
2673 }
2674
2675 newstr = mbstring_convert_case(PHP_UNICODE_CASE_LOWER, str, str_len, &ret_len, enc);
2676 /* If newstr is NULL something went wrong in mbfl and this is a bug */
2677 ZEND_ASSERT(newstr != NULL);
2678
2679 // TODO: avoid reallocation ???
2680 RETVAL_STRINGL(newstr, ret_len);
2681 efree(newstr);
2682 }
2683 /* }}} */
2684
2685 /* {{{ Encodings of the given string is returned (as a string) */
PHP_FUNCTION(mb_detect_encoding)2686 PHP_FUNCTION(mb_detect_encoding)
2687 {
2688 char *str;
2689 size_t str_len;
2690 zend_string *encoding_str = NULL;
2691 HashTable *encoding_ht = NULL;
2692 zend_bool strict = 0;
2693
2694 mbfl_string string;
2695 const mbfl_encoding *ret;
2696 const mbfl_encoding **elist;
2697 size_t size;
2698 zend_bool free_elist;
2699
2700 ZEND_PARSE_PARAMETERS_START(1, 3)
2701 Z_PARAM_STRING(str, str_len)
2702 Z_PARAM_OPTIONAL
2703 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(encoding_ht, encoding_str)
2704 Z_PARAM_BOOL(strict)
2705 ZEND_PARSE_PARAMETERS_END();
2706
2707 /* make encoding list */
2708 if (encoding_ht) {
2709 if (FAILURE == php_mb_parse_encoding_array(encoding_ht, &elist, &size, 2)) {
2710 RETURN_THROWS();
2711 }
2712 free_elist = 1;
2713 } else if (encoding_str) {
2714 if (FAILURE == php_mb_parse_encoding_list(ZSTR_VAL(encoding_str), ZSTR_LEN(encoding_str), &elist, &size, /* persistent */ 0, /* arg_num */ 2, /* allow_pass_encoding */ 0)) {
2715 RETURN_THROWS();
2716 }
2717 free_elist = 1;
2718 } else {
2719 elist = MBSTRG(current_detect_order_list);
2720 size = MBSTRG(current_detect_order_list_size);
2721 free_elist = 0;
2722 }
2723
2724 if (size == 0) {
2725 efree(ZEND_VOIDP(elist));
2726 zend_argument_value_error(2, "must specify at least one encoding");
2727 RETURN_THROWS();
2728 }
2729
2730 if (ZEND_NUM_ARGS() < 3) {
2731 strict = MBSTRG(strict_detection);
2732 }
2733
2734 mbfl_string_init(&string);
2735 string.val = (unsigned char *)str;
2736 string.len = str_len;
2737 ret = mbfl_identify_encoding(&string, elist, size, strict);
2738
2739 if (free_elist) {
2740 efree(ZEND_VOIDP(elist));
2741 }
2742
2743 if (ret == NULL) {
2744 RETURN_FALSE;
2745 }
2746
2747 RETVAL_STRING((char *)ret->name);
2748 }
2749 /* }}} */
2750
2751 /* {{{ Returns an array of all supported entity encodings */
PHP_FUNCTION(mb_list_encodings)2752 PHP_FUNCTION(mb_list_encodings)
2753 {
2754 ZEND_PARSE_PARAMETERS_NONE();
2755
2756 array_init(return_value);
2757 for (const mbfl_encoding **encodings = mbfl_get_supported_encodings(); *encodings; encodings++) {
2758 add_next_index_string(return_value, (*encodings)->name);
2759 }
2760 }
2761 /* }}} */
2762
2763 /* {{{ Returns an array of the aliases of a given encoding name */
PHP_FUNCTION(mb_encoding_aliases)2764 PHP_FUNCTION(mb_encoding_aliases)
2765 {
2766 const mbfl_encoding *encoding;
2767 zend_string *encoding_name = NULL;
2768
2769 ZEND_PARSE_PARAMETERS_START(1, 1)
2770 Z_PARAM_STR(encoding_name)
2771 ZEND_PARSE_PARAMETERS_END();
2772
2773 encoding = php_mb_get_encoding(encoding_name, 1);
2774 if (!encoding) {
2775 RETURN_THROWS();
2776 }
2777
2778 array_init(return_value);
2779 if (encoding->aliases != NULL) {
2780 const char **alias;
2781 for (alias = *encoding->aliases; *alias; ++alias) {
2782 add_next_index_string(return_value, (char *)*alias);
2783 }
2784 }
2785 }
2786 /* }}} */
2787
2788 /* {{{ Converts the string to MIME "encoded-word" in the format of =?charset?(B|Q)?encoded_string?= */
PHP_FUNCTION(mb_encode_mimeheader)2789 PHP_FUNCTION(mb_encode_mimeheader)
2790 {
2791 const mbfl_encoding *charset, *transenc;
2792 mbfl_string string, result, *ret;
2793 zend_string *charset_name = NULL;
2794 char *trans_enc_name = NULL, *string_val;
2795 size_t trans_enc_name_len;
2796 char *linefeed = "\r\n";
2797 size_t linefeed_len;
2798 zend_long indent = 0;
2799
2800 string.encoding = MBSTRG(current_internal_encoding);
2801
2802 ZEND_PARSE_PARAMETERS_START(1, 5)
2803 Z_PARAM_STRING(string_val, string.len)
2804 Z_PARAM_OPTIONAL
2805 Z_PARAM_STR(charset_name)
2806 Z_PARAM_STRING(trans_enc_name, trans_enc_name_len)
2807 Z_PARAM_STRING(linefeed, linefeed_len)
2808 Z_PARAM_LONG(indent)
2809 ZEND_PARSE_PARAMETERS_END();
2810
2811 string.val = (unsigned char*)string_val;
2812 charset = &mbfl_encoding_pass;
2813 transenc = &mbfl_encoding_base64;
2814
2815 if (charset_name != NULL) {
2816 charset = php_mb_get_encoding(charset_name, 2);
2817 if (!charset) {
2818 RETURN_THROWS();
2819 }
2820 } else {
2821 const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
2822 if (lang != NULL) {
2823 charset = mbfl_no2encoding(lang->mail_charset);
2824 transenc = mbfl_no2encoding(lang->mail_header_encoding);
2825 }
2826 }
2827
2828 if (trans_enc_name != NULL) {
2829 if (*trans_enc_name == 'B' || *trans_enc_name == 'b') {
2830 transenc = &mbfl_encoding_base64;
2831 } else if (*trans_enc_name == 'Q' || *trans_enc_name == 'q') {
2832 transenc = &mbfl_encoding_qprint;
2833 }
2834 }
2835
2836 mbfl_string_init(&result);
2837 ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, indent);
2838 ZEND_ASSERT(ret != NULL);
2839 // TODO: avoid reallocation ???
2840 RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
2841 efree(ret->val);
2842 }
2843 /* }}} */
2844
2845 /* {{{ Decodes the MIME "encoded-word" in the string */
PHP_FUNCTION(mb_decode_mimeheader)2846 PHP_FUNCTION(mb_decode_mimeheader)
2847 {
2848 char *string_val;
2849 mbfl_string string, result, *ret;
2850
2851 string.encoding = MBSTRG(current_internal_encoding);
2852
2853 ZEND_PARSE_PARAMETERS_START(1, 1)
2854 Z_PARAM_STRING(string_val, string.len)
2855 ZEND_PARSE_PARAMETERS_END();
2856
2857 string.val = (unsigned char*)string_val;
2858 mbfl_string_init(&result);
2859 ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding));
2860 ZEND_ASSERT(ret != NULL);
2861 // TODO: avoid reallocation ???
2862 RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
2863 efree(ret->val);
2864 }
2865 /* }}} */
2866
2867 /* {{{ Conversion between full-width character and half-width character (Japanese) */
PHP_FUNCTION(mb_convert_kana)2868 PHP_FUNCTION(mb_convert_kana)
2869 {
2870 int opt;
2871 mbfl_string string, result, *ret;
2872 char *optstr = NULL, *string_val;
2873 size_t optstr_len;
2874 zend_string *encname = NULL;
2875
2876 ZEND_PARSE_PARAMETERS_START(1, 3)
2877 Z_PARAM_STRING(string_val, string.len)
2878 Z_PARAM_OPTIONAL
2879 Z_PARAM_STRING(optstr, optstr_len)
2880 Z_PARAM_STR_OR_NULL(encname)
2881 ZEND_PARSE_PARAMETERS_END();
2882
2883 string.val = (unsigned char*)string_val;
2884
2885 /* "Zen" is 全, or "full"; "Han" is 半, or "half"
2886 * This refers to "fullwidth" or "halfwidth" variants of characters used for writing Japanese */
2887 if (optstr != NULL) {
2888 char *p = optstr, *e = p + optstr_len;
2889 opt = 0;
2890 while (p < e) {
2891 switch (*p++) {
2892 case 'A':
2893 opt |= MBFL_FILT_TL_HAN2ZEN_ALL;
2894 break;
2895 case 'a':
2896 opt |= MBFL_FILT_TL_ZEN2HAN_ALL;
2897 break;
2898 case 'R':
2899 opt |= MBFL_FILT_TL_HAN2ZEN_ALPHA;
2900 break;
2901 case 'r':
2902 opt |= MBFL_FILT_TL_ZEN2HAN_ALPHA;
2903 break;
2904 case 'N':
2905 opt |= MBFL_FILT_TL_HAN2ZEN_NUMERIC;
2906 break;
2907 case 'n':
2908 opt |= MBFL_FILT_TL_ZEN2HAN_NUMERIC;
2909 break;
2910 case 'S':
2911 opt |= MBFL_FILT_TL_HAN2ZEN_SPACE;
2912 break;
2913 case 's':
2914 opt |= MBFL_FILT_TL_ZEN2HAN_SPACE;
2915 break;
2916 case 'K':
2917 opt |= MBFL_FILT_TL_HAN2ZEN_KATAKANA;
2918 break;
2919 case 'k':
2920 opt |= MBFL_FILT_TL_ZEN2HAN_KATAKANA;
2921 break;
2922 case 'H':
2923 opt |= MBFL_FILT_TL_HAN2ZEN_HIRAGANA;
2924 break;
2925 case 'h':
2926 opt |= MBFL_FILT_TL_ZEN2HAN_HIRAGANA;
2927 break;
2928 case 'V':
2929 opt |= MBFL_FILT_TL_HAN2ZEN_GLUE;
2930 break;
2931 case 'C':
2932 opt |= MBFL_FILT_TL_ZEN2HAN_HIRA2KANA;
2933 break;
2934 case 'c':
2935 opt |= MBFL_FILT_TL_ZEN2HAN_KANA2HIRA;
2936 break;
2937 case 'M':
2938 /* TODO: figure out what 'M' and 'm' are for, and rename the constant
2939 * to something meaningful */
2940 opt |= MBFL_FILT_TL_HAN2ZEN_COMPAT1;
2941 break;
2942 case 'm':
2943 opt |= MBFL_FILT_TL_ZEN2HAN_COMPAT1;
2944 break;
2945 }
2946 }
2947 } else {
2948 opt = MBFL_FILT_TL_HAN2ZEN_KATAKANA | MBFL_FILT_TL_HAN2ZEN_GLUE;
2949 }
2950
2951 /* encoding */
2952 string.encoding = php_mb_get_encoding(encname, 3);
2953 if (!string.encoding) {
2954 RETURN_THROWS();
2955 }
2956
2957 ret = mbfl_ja_jp_hantozen(&string, &result, opt);
2958 ZEND_ASSERT(ret != NULL);
2959 // TODO: avoid reallocation ???
2960 RETVAL_STRINGL((char *)ret->val, ret->len); /* the string is already strdup()'ed */
2961 efree(ret->val);
2962 }
2963 /* }}} */
2964
mb_recursive_encoder_detector_feed(mbfl_encoding_detector * identd,zval * var,int * recursion_error)2965 static int mb_recursive_encoder_detector_feed(mbfl_encoding_detector *identd, zval *var, int *recursion_error) /* {{{ */
2966 {
2967 mbfl_string string;
2968 HashTable *ht;
2969 zval *entry;
2970
2971 ZVAL_DEREF(var);
2972 if (Z_TYPE_P(var) == IS_STRING) {
2973 string.val = (unsigned char *)Z_STRVAL_P(var);
2974 string.len = Z_STRLEN_P(var);
2975 if (mbfl_encoding_detector_feed(identd, &string)) {
2976 return 1; /* complete detecting */
2977 }
2978 } else if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
2979 if (Z_REFCOUNTED_P(var)) {
2980 if (Z_IS_RECURSIVE_P(var)) {
2981 *recursion_error = 1;
2982 return 0;
2983 }
2984 Z_PROTECT_RECURSION_P(var);
2985 }
2986
2987 ht = HASH_OF(var);
2988 if (ht != NULL) {
2989 ZEND_HASH_FOREACH_VAL_IND(ht, entry) {
2990 if (mb_recursive_encoder_detector_feed(identd, entry, recursion_error)) {
2991 if (Z_REFCOUNTED_P(var)) {
2992 Z_UNPROTECT_RECURSION_P(var);
2993 }
2994 return 1;
2995 } else if (*recursion_error) {
2996 if (Z_REFCOUNTED_P(var)) {
2997 Z_UNPROTECT_RECURSION_P(var);
2998 }
2999 return 0;
3000 }
3001 } ZEND_HASH_FOREACH_END();
3002 }
3003
3004 if (Z_REFCOUNTED_P(var)) {
3005 Z_UNPROTECT_RECURSION_P(var);
3006 }
3007 }
3008 return 0;
3009 } /* }}} */
3010
mb_recursive_convert_variable(mbfl_buffer_converter * convd,zval * var)3011 static int mb_recursive_convert_variable(mbfl_buffer_converter *convd, zval *var) /* {{{ */
3012 {
3013 mbfl_string string, result, *ret;
3014 HashTable *ht;
3015 zval *entry, *orig_var;
3016
3017 orig_var = var;
3018 ZVAL_DEREF(var);
3019 if (Z_TYPE_P(var) == IS_STRING) {
3020 string.val = (unsigned char *)Z_STRVAL_P(var);
3021 string.len = Z_STRLEN_P(var);
3022 ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3023 if (ret != NULL) {
3024 zval_ptr_dtor(orig_var);
3025 // TODO: avoid reallocation ???
3026 ZVAL_STRINGL(orig_var, (char *)ret->val, ret->len);
3027 efree(ret->val);
3028 }
3029 } else if (Z_TYPE_P(var) == IS_ARRAY || Z_TYPE_P(var) == IS_OBJECT) {
3030 if (Z_TYPE_P(var) == IS_ARRAY) {
3031 SEPARATE_ARRAY(var);
3032 }
3033 if (Z_REFCOUNTED_P(var)) {
3034 if (Z_IS_RECURSIVE_P(var)) {
3035 return 1;
3036 }
3037 Z_PROTECT_RECURSION_P(var);
3038 }
3039
3040 ht = HASH_OF(var);
3041 if (ht != NULL) {
3042 ZEND_HASH_FOREACH_VAL_IND(ht, entry) {
3043 if (mb_recursive_convert_variable(convd, entry)) {
3044 if (Z_REFCOUNTED_P(var)) {
3045 Z_UNPROTECT_RECURSION_P(var);
3046 }
3047 return 1;
3048 }
3049 } ZEND_HASH_FOREACH_END();
3050 }
3051
3052 if (Z_REFCOUNTED_P(var)) {
3053 Z_UNPROTECT_RECURSION_P(var);
3054 }
3055 }
3056 return 0;
3057 } /* }}} */
3058
3059 /* {{{ Converts the string resource in variables to desired encoding */
PHP_FUNCTION(mb_convert_variables)3060 PHP_FUNCTION(mb_convert_variables)
3061 {
3062 zval *args;
3063 zend_string *to_enc_str;
3064 zend_string *from_enc_str;
3065 HashTable *from_enc_ht;
3066 mbfl_string string, result;
3067 const mbfl_encoding *from_encoding, *to_encoding;
3068 mbfl_encoding_detector *identd;
3069 mbfl_buffer_converter *convd;
3070 int n, argc;
3071 size_t elistsz;
3072 const mbfl_encoding **elist;
3073 int recursion_error = 0;
3074
3075 ZEND_PARSE_PARAMETERS_START(3, -1)
3076 Z_PARAM_STR(to_enc_str)
3077 Z_PARAM_ARRAY_HT_OR_STR(from_enc_ht, from_enc_str)
3078 Z_PARAM_VARIADIC('+', args, argc)
3079 ZEND_PARSE_PARAMETERS_END();
3080
3081 /* new encoding */
3082 to_encoding = php_mb_get_encoding(to_enc_str, 1);
3083 if (!to_encoding) {
3084 RETURN_THROWS();
3085 }
3086
3087 /* initialize string */
3088 from_encoding = MBSTRG(current_internal_encoding);
3089 mbfl_string_init_set(&string, from_encoding);
3090 mbfl_string_init(&result);
3091
3092 /* pre-conversion encoding */
3093 if (from_enc_ht) {
3094 if (php_mb_parse_encoding_array(from_enc_ht, &elist, &elistsz, 2) == FAILURE) {
3095 RETURN_THROWS();
3096 }
3097 } else {
3098 if (php_mb_parse_encoding_list(ZSTR_VAL(from_enc_str), ZSTR_LEN(from_enc_str), &elist, &elistsz, /* persistent */ 0, /* arg_num */ 2, /* allow_pass_encoding */ 0) == FAILURE) {
3099 RETURN_THROWS();
3100 }
3101 }
3102
3103 if (elistsz == 0) {
3104 efree(ZEND_VOIDP(elist));
3105 zend_argument_value_error(2, "must specify at least one encoding");
3106 RETURN_THROWS();
3107 }
3108
3109 if (elistsz == 1) {
3110 from_encoding = *elist;
3111 } else {
3112 /* auto detect */
3113 from_encoding = NULL;
3114 identd = mbfl_encoding_detector_new(elist, elistsz, MBSTRG(strict_detection));
3115 if (identd != NULL) {
3116 n = 0;
3117 while (n < argc) {
3118 if (mb_recursive_encoder_detector_feed(identd, &args[n], &recursion_error)) {
3119 break;
3120 }
3121 n++;
3122 }
3123 from_encoding = mbfl_encoding_detector_judge(identd);
3124 mbfl_encoding_detector_delete(identd);
3125 if (recursion_error) {
3126 efree(ZEND_VOIDP(elist));
3127 php_error_docref(NULL, E_WARNING, "Cannot handle recursive references");
3128 RETURN_FALSE;
3129 }
3130 }
3131
3132 if (!from_encoding) {
3133 php_error_docref(NULL, E_WARNING, "Unable to detect encoding");
3134 efree(ZEND_VOIDP(elist));
3135 RETURN_FALSE;
3136 }
3137 }
3138
3139 efree(ZEND_VOIDP(elist));
3140
3141 convd = mbfl_buffer_converter_new(from_encoding, to_encoding, 0);
3142 /* If this assertion fails this means some memory allocation failure which is a bug */
3143 ZEND_ASSERT(convd != NULL);
3144
3145 mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
3146 mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
3147
3148 /* convert */
3149 n = 0;
3150 while (n < argc) {
3151 zval *zv = &args[n];
3152
3153 ZVAL_DEREF(zv);
3154 recursion_error = mb_recursive_convert_variable(convd, zv);
3155 if (recursion_error) {
3156 break;
3157 }
3158 n++;
3159 }
3160
3161 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
3162 mbfl_buffer_converter_delete(convd);
3163
3164 if (recursion_error) {
3165 php_error_docref(NULL, E_WARNING, "Cannot handle recursive references");
3166 RETURN_FALSE;
3167 }
3168
3169 RETURN_STRING(from_encoding->name);
3170 }
3171 /* }}} */
3172
3173 /* HTML numeric entities */
3174
3175 /* Convert PHP array to data structure required by mbfl_html_numeric_entity */
make_conversion_map(HashTable * target_hash,int * convmap_size)3176 static int *make_conversion_map(HashTable *target_hash, int *convmap_size)
3177 {
3178 zval *hash_entry;
3179
3180 int n_elems = zend_hash_num_elements(target_hash);
3181 if (n_elems % 4 != 0) {
3182 zend_argument_value_error(2, "must have a multiple of 4 elements");
3183 return NULL;
3184 }
3185
3186 int *convmap = (int *)safe_emalloc(n_elems, sizeof(int), 0);
3187 int *mapelm = convmap;
3188
3189 ZEND_HASH_FOREACH_VAL(target_hash, hash_entry) {
3190 *mapelm++ = zval_get_long(hash_entry);
3191 } ZEND_HASH_FOREACH_END();
3192
3193 *convmap_size = n_elems / 4;
3194 return convmap;
3195 }
3196
3197 /* {{{ Converts specified characters to HTML numeric entities */
PHP_FUNCTION(mb_encode_numericentity)3198 PHP_FUNCTION(mb_encode_numericentity)
3199 {
3200 char *str = NULL;
3201 zend_string *encoding = NULL;
3202 int mapsize;
3203 HashTable *target_hash;
3204 zend_bool is_hex = 0;
3205 mbfl_string string, result, *ret;
3206
3207 ZEND_PARSE_PARAMETERS_START(2, 4)
3208 Z_PARAM_STRING(str, string.len)
3209 Z_PARAM_ARRAY_HT(target_hash)
3210 Z_PARAM_OPTIONAL
3211 Z_PARAM_STR_OR_NULL(encoding)
3212 Z_PARAM_BOOL(is_hex)
3213 ZEND_PARSE_PARAMETERS_END();
3214
3215 string.val = (unsigned char *)str;
3216 string.encoding = php_mb_get_encoding(encoding, 3);
3217 if (!string.encoding) {
3218 RETURN_THROWS();
3219 }
3220
3221 int *convmap = make_conversion_map(target_hash, &mapsize);
3222 if (convmap == NULL) {
3223 RETURN_THROWS();
3224 }
3225
3226 ret = mbfl_html_numeric_entity(&string, &result, convmap, mapsize, is_hex ? 2 : 0);
3227 ZEND_ASSERT(ret != NULL);
3228 // TODO: avoid reallocation ???
3229 RETVAL_STRINGL((char *)ret->val, ret->len);
3230 efree(ret->val);
3231 efree(convmap);
3232 }
3233 /* }}} */
3234
3235 /* {{{ Converts HTML numeric entities to character code */
PHP_FUNCTION(mb_decode_numericentity)3236 PHP_FUNCTION(mb_decode_numericentity)
3237 {
3238 char *str = NULL;
3239 zend_string *encoding = NULL;
3240 int mapsize;
3241 HashTable *target_hash;
3242 mbfl_string string, result, *ret;
3243
3244 ZEND_PARSE_PARAMETERS_START(2, 3)
3245 Z_PARAM_STRING(str, string.len)
3246 Z_PARAM_ARRAY_HT(target_hash)
3247 Z_PARAM_OPTIONAL
3248 Z_PARAM_STR_OR_NULL(encoding)
3249 ZEND_PARSE_PARAMETERS_END();
3250
3251 string.val = (unsigned char *)str;
3252 string.encoding = php_mb_get_encoding(encoding, 3);
3253 if (!string.encoding) {
3254 RETURN_THROWS();
3255 }
3256
3257 int *convmap = make_conversion_map(target_hash, &mapsize);
3258 if (convmap == NULL) {
3259 RETURN_THROWS();
3260 }
3261
3262 ret = mbfl_html_numeric_entity(&string, &result, convmap, mapsize, 1);
3263 ZEND_ASSERT(ret != NULL);
3264 // TODO: avoid reallocation ???
3265 RETVAL_STRINGL((char *)ret->val, ret->len);
3266 efree(ret->val);
3267 efree((void *)convmap);
3268 }
3269 /* }}} */
3270
3271 /* {{{ Sends an email message with MIME scheme */
3272
3273 #define SKIP_LONG_HEADER_SEP_MBSTRING(str, pos) \
3274 if (str[pos] == '\r' && str[pos + 1] == '\n' && (str[pos + 2] == ' ' || str[pos + 2] == '\t')) { \
3275 pos += 2; \
3276 while (str[pos + 1] == ' ' || str[pos + 1] == '\t') { \
3277 pos++; \
3278 } \
3279 continue; \
3280 }
3281
3282 #define CRLF "\r\n"
3283
_php_mbstr_parse_mail_headers(HashTable * ht,const char * str,size_t str_len)3284 static int _php_mbstr_parse_mail_headers(HashTable *ht, const char *str, size_t str_len)
3285 {
3286 const char *ps;
3287 size_t icnt;
3288 int state = 0;
3289 int crlf_state = -1;
3290 char *token = NULL;
3291 size_t token_pos = 0;
3292 zend_string *fld_name, *fld_val;
3293
3294 ps = str;
3295 icnt = str_len;
3296 fld_name = fld_val = NULL;
3297
3298 /*
3299 * C o n t e n t - T y p e : t e x t / h t m l \r\n
3300 * ^ ^^^^^^^^^^^^^^^^^^^^^ ^^^ ^^^^^^^^^^^^^^^^^ ^^^^
3301 * state 0 1 2 3
3302 *
3303 * C o n t e n t - T y p e : t e x t / h t m l \r\n
3304 * ^ ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ ^^^^
3305 * crlf_state -1 0 1 -1
3306 *
3307 */
3308
3309 while (icnt > 0) {
3310 switch (*ps) {
3311 case ':':
3312 if (crlf_state == 1) {
3313 token_pos++;
3314 }
3315
3316 if (state == 0 || state == 1) {
3317 if(token && token_pos > 0) {
3318 fld_name = zend_string_init(token, token_pos, 0);
3319 }
3320 state = 2;
3321 } else {
3322 token_pos++;
3323 }
3324
3325 crlf_state = 0;
3326 break;
3327
3328 case '\n':
3329 if (crlf_state == -1) {
3330 goto out;
3331 }
3332 crlf_state = -1;
3333 break;
3334
3335 case '\r':
3336 if (crlf_state == 1) {
3337 token_pos++;
3338 } else {
3339 crlf_state = 1;
3340 }
3341 break;
3342
3343 case ' ': case '\t':
3344 if (crlf_state == -1) {
3345 if (state == 3) {
3346 /* continuing from the previous line */
3347 state = 4;
3348 } else {
3349 /* simply skipping this new line */
3350 state = 5;
3351 }
3352 } else {
3353 if (crlf_state == 1) {
3354 token_pos++;
3355 }
3356 if (state == 1 || state == 3) {
3357 token_pos++;
3358 }
3359 }
3360 crlf_state = 0;
3361 break;
3362
3363 default:
3364 switch (state) {
3365 case 0:
3366 token = (char*)ps;
3367 token_pos = 0;
3368 state = 1;
3369 break;
3370
3371 case 2:
3372 if (crlf_state != -1) {
3373 token = (char*)ps;
3374 token_pos = 0;
3375
3376 state = 3;
3377 break;
3378 }
3379 /* break is missing intentionally */
3380
3381 case 3:
3382 if (crlf_state == -1) {
3383 if(token && token_pos > 0) {
3384 fld_val = zend_string_init(token, token_pos, 0);
3385 }
3386
3387 if (fld_name != NULL && fld_val != NULL) {
3388 zval val;
3389 /* FIXME: some locale free implementation is
3390 * really required here,,, */
3391 php_strtoupper(ZSTR_VAL(fld_name), ZSTR_LEN(fld_name));
3392 ZVAL_STR(&val, fld_val);
3393
3394 zend_hash_update(ht, fld_name, &val);
3395
3396 zend_string_release_ex(fld_name, 0);
3397 }
3398
3399 fld_name = fld_val = NULL;
3400 token = (char*)ps;
3401 token_pos = 0;
3402
3403 state = 1;
3404 }
3405 break;
3406
3407 case 4:
3408 token_pos++;
3409 state = 3;
3410 break;
3411 }
3412
3413 if (crlf_state == 1) {
3414 token_pos++;
3415 }
3416
3417 token_pos++;
3418
3419 crlf_state = 0;
3420 break;
3421 }
3422 ps++, icnt--;
3423 }
3424 out:
3425 if (state == 2) {
3426 token = "";
3427 token_pos = 0;
3428
3429 state = 3;
3430 }
3431 if (state == 3) {
3432 if(token && token_pos > 0) {
3433 fld_val = zend_string_init(token, token_pos, 0);
3434 }
3435 if (fld_name != NULL && fld_val != NULL) {
3436 zval val;
3437 /* FIXME: some locale free implementation is
3438 * really required here,,, */
3439 php_strtoupper(ZSTR_VAL(fld_name), ZSTR_LEN(fld_name));
3440 ZVAL_STR(&val, fld_val);
3441
3442 zend_hash_update(ht, fld_name, &val);
3443
3444 zend_string_release_ex(fld_name, 0);
3445 }
3446 }
3447 return state;
3448 }
3449
PHP_FUNCTION(mb_send_mail)3450 PHP_FUNCTION(mb_send_mail)
3451 {
3452 char *to;
3453 size_t to_len;
3454 char *message;
3455 size_t message_len;
3456 char *subject;
3457 size_t subject_len;
3458 zend_string *extra_cmd = NULL;
3459 HashTable *headers_ht = NULL;
3460 zend_string *str_headers = NULL;
3461 size_t n, i;
3462 char *to_r = NULL;
3463 char *force_extra_parameters = INI_STR("mail.force_extra_parameters");
3464 struct {
3465 int cnt_type:1;
3466 int cnt_trans_enc:1;
3467 } suppressed_hdrs = { 0, 0 };
3468
3469 char *message_buf = NULL, *subject_buf = NULL, *p;
3470 mbfl_string orig_str, conv_str;
3471 mbfl_string *pstr; /* pointer to mbfl string for return value */
3472 enum mbfl_no_encoding;
3473 const mbfl_encoding *tran_cs, /* transfer text charset */
3474 *head_enc, /* header transfer encoding */
3475 *body_enc; /* body transfer encoding */
3476 mbfl_memory_device device; /* automatic allocateable buffer for additional header */
3477 const mbfl_language *lang;
3478 int err = 0;
3479 HashTable ht_headers;
3480 zval *s;
3481 extern void mbfl_memory_device_unput(mbfl_memory_device *device);
3482
3483 /* initialize */
3484 mbfl_memory_device_init(&device, 0, 0);
3485 mbfl_string_init(&orig_str);
3486 mbfl_string_init(&conv_str);
3487
3488 /* character-set, transfer-encoding */
3489 tran_cs = &mbfl_encoding_utf8;
3490 head_enc = &mbfl_encoding_base64;
3491 body_enc = &mbfl_encoding_base64;
3492 lang = mbfl_no2language(MBSTRG(language));
3493 if (lang != NULL) {
3494 tran_cs = mbfl_no2encoding(lang->mail_charset);
3495 head_enc = mbfl_no2encoding(lang->mail_header_encoding);
3496 body_enc = mbfl_no2encoding(lang->mail_body_encoding);
3497 }
3498
3499 ZEND_PARSE_PARAMETERS_START(3, 5)
3500 Z_PARAM_PATH(to, to_len)
3501 Z_PARAM_PATH(subject, subject_len)
3502 Z_PARAM_PATH(message, message_len)
3503 Z_PARAM_OPTIONAL
3504 Z_PARAM_ARRAY_HT_OR_STR(headers_ht, str_headers)
3505 Z_PARAM_PATH_STR_OR_NULL(extra_cmd)
3506 ZEND_PARSE_PARAMETERS_END();
3507
3508 if (str_headers) {
3509 if (strlen(ZSTR_VAL(str_headers)) != ZSTR_LEN(str_headers)) {
3510 zend_argument_value_error(4, "must not contain any null bytes");
3511 RETURN_THROWS();
3512 }
3513 str_headers = php_trim(str_headers, NULL, 0, 2);
3514 } else if (headers_ht) {
3515 str_headers = php_mail_build_headers(headers_ht);
3516 if (EG(exception)) {
3517 RETURN_THROWS();
3518 }
3519 }
3520
3521 zend_hash_init(&ht_headers, 0, NULL, ZVAL_PTR_DTOR, 0);
3522
3523 if (str_headers != NULL) {
3524 _php_mbstr_parse_mail_headers(&ht_headers, ZSTR_VAL(str_headers), ZSTR_LEN(str_headers));
3525 }
3526
3527 if ((s = zend_hash_str_find(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1))) {
3528 char *tmp;
3529 char *param_name;
3530 char *charset = NULL;
3531
3532 ZEND_ASSERT(Z_TYPE_P(s) == IS_STRING);
3533 p = strchr(Z_STRVAL_P(s), ';');
3534
3535 if (p != NULL) {
3536 /* skipping the padded spaces */
3537 do {
3538 ++p;
3539 } while (*p == ' ' || *p == '\t');
3540
3541 if (*p != '\0') {
3542 if ((param_name = php_strtok_r(p, "= ", &tmp)) != NULL) {
3543 if (strcasecmp(param_name, "charset") == 0) {
3544 const mbfl_encoding *_tran_cs = tran_cs;
3545
3546 charset = php_strtok_r(NULL, "= \"", &tmp);
3547 if (charset != NULL) {
3548 _tran_cs = mbfl_name2encoding(charset);
3549 }
3550
3551 if (!_tran_cs) {
3552 php_error_docref(NULL, E_WARNING, "Unsupported charset \"%s\" - will be regarded as ascii", charset);
3553 _tran_cs = &mbfl_encoding_ascii;
3554 }
3555 tran_cs = _tran_cs;
3556 }
3557 }
3558 }
3559 }
3560 suppressed_hdrs.cnt_type = 1;
3561 }
3562
3563 if ((s = zend_hash_str_find(&ht_headers, "CONTENT-TRANSFER-ENCODING", sizeof("CONTENT-TRANSFER-ENCODING") - 1))) {
3564 const mbfl_encoding *_body_enc;
3565
3566 ZEND_ASSERT(Z_TYPE_P(s) == IS_STRING);
3567 _body_enc = mbfl_name2encoding(Z_STRVAL_P(s));
3568 switch (_body_enc ? _body_enc->no_encoding : mbfl_no_encoding_invalid) {
3569 case mbfl_no_encoding_base64:
3570 case mbfl_no_encoding_7bit:
3571 case mbfl_no_encoding_8bit:
3572 body_enc = _body_enc;
3573 break;
3574
3575 default:
3576 php_error_docref(NULL, E_WARNING, "Unsupported transfer encoding \"%s\" - will be regarded as 8bit", Z_STRVAL_P(s));
3577 body_enc = &mbfl_encoding_8bit;
3578 break;
3579 }
3580 suppressed_hdrs.cnt_trans_enc = 1;
3581 }
3582
3583 /* To: */
3584 if (to_len > 0) {
3585 to_r = estrndup(to, to_len);
3586 for (; to_len; to_len--) {
3587 if (!isspace((unsigned char) to_r[to_len - 1])) {
3588 break;
3589 }
3590 to_r[to_len - 1] = '\0';
3591 }
3592 for (i = 0; to_r[i]; i++) {
3593 if (iscntrl((unsigned char) to_r[i])) {
3594 /* According to RFC 822, section 3.1.1 long headers may be separated into
3595 * parts using CRLF followed at least one linear-white-space character ('\t' or ' ').
3596 * To prevent these separators from being replaced with a space, we use the
3597 * SKIP_LONG_HEADER_SEP_MBSTRING to skip over them.
3598 */
3599 SKIP_LONG_HEADER_SEP_MBSTRING(to_r, i);
3600 to_r[i] = ' ';
3601 }
3602 }
3603 } else {
3604 to_r = to;
3605 }
3606
3607 /* Subject: */
3608 orig_str.val = (unsigned char *)subject;
3609 orig_str.len = subject_len;
3610 orig_str.encoding = MBSTRG(current_internal_encoding);
3611 if (orig_str.encoding->no_encoding == mbfl_no_encoding_invalid
3612 || orig_str.encoding->no_encoding == mbfl_no_encoding_pass) {
3613 orig_str.encoding = mbfl_identify_encoding(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
3614 }
3615 pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, CRLF, sizeof("Subject: [PHP-jp nnnnnnnn]" CRLF) - 1);
3616 if (pstr != NULL) {
3617 subject_buf = subject = (char *)pstr->val;
3618 }
3619
3620 /* message body */
3621 orig_str.val = (unsigned char *)message;
3622 orig_str.len = message_len;
3623 orig_str.encoding = MBSTRG(current_internal_encoding);
3624
3625 if (orig_str.encoding->no_encoding == mbfl_no_encoding_invalid
3626 || orig_str.encoding->no_encoding == mbfl_no_encoding_pass) {
3627 orig_str.encoding = mbfl_identify_encoding(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
3628 }
3629
3630 pstr = NULL;
3631 {
3632 mbfl_string tmpstr;
3633
3634 if (mbfl_convert_encoding(&orig_str, &tmpstr, tran_cs) != NULL) {
3635 tmpstr.encoding = &mbfl_encoding_8bit;
3636 pstr = mbfl_convert_encoding(&tmpstr, &conv_str, body_enc);
3637 efree(tmpstr.val);
3638 }
3639 }
3640 if (pstr != NULL) {
3641 message_buf = message = (char *)pstr->val;
3642 }
3643
3644 /* other headers */
3645 #define PHP_MBSTR_MAIL_MIME_HEADER1 "MIME-Version: 1.0"
3646 #define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain"
3647 #define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset="
3648 #define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: "
3649 if (str_headers != NULL) {
3650 p = ZSTR_VAL(str_headers);
3651 n = ZSTR_LEN(str_headers);
3652 mbfl_memory_device_strncat(&device, p, n);
3653 if (n > 0 && p[n - 1] != '\n') {
3654 mbfl_memory_device_strncat(&device, CRLF, sizeof(CRLF)-1);
3655 }
3656 zend_string_release_ex(str_headers, 0);
3657 }
3658
3659 if (!zend_hash_str_exists(&ht_headers, "MIME-VERSION", sizeof("MIME-VERSION") - 1)) {
3660 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER1, sizeof(PHP_MBSTR_MAIL_MIME_HEADER1) - 1);
3661 mbfl_memory_device_strncat(&device, CRLF, sizeof(CRLF)-1);
3662 }
3663
3664 if (!suppressed_hdrs.cnt_type) {
3665 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2, sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1);
3666
3667 p = (char *)mbfl_no2preferred_mime_name(tran_cs->no_encoding);
3668 if (p != NULL) {
3669 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3, sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1);
3670 mbfl_memory_device_strcat(&device, p);
3671 }
3672 mbfl_memory_device_strncat(&device, CRLF, sizeof(CRLF)-1);
3673 }
3674 if (!suppressed_hdrs.cnt_trans_enc) {
3675 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4, sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1);
3676 p = (char *)mbfl_no2preferred_mime_name(body_enc->no_encoding);
3677 if (p == NULL) {
3678 p = "7bit";
3679 }
3680 mbfl_memory_device_strcat(&device, p);
3681 mbfl_memory_device_strncat(&device, CRLF, sizeof(CRLF)-1);
3682 }
3683
3684 mbfl_memory_device_unput(&device);
3685 mbfl_memory_device_unput(&device);
3686 mbfl_memory_device_output('\0', &device);
3687 str_headers = zend_string_init((char *)device.buffer, strlen((char *)device.buffer), 0);
3688
3689 if (force_extra_parameters) {
3690 extra_cmd = php_escape_shell_cmd(force_extra_parameters);
3691 } else if (extra_cmd) {
3692 extra_cmd = php_escape_shell_cmd(ZSTR_VAL(extra_cmd));
3693 }
3694
3695 if (!err && php_mail(to_r, subject, message, ZSTR_VAL(str_headers), extra_cmd ? ZSTR_VAL(extra_cmd) : NULL)) {
3696 RETVAL_TRUE;
3697 } else {
3698 RETVAL_FALSE;
3699 }
3700
3701 if (extra_cmd) {
3702 zend_string_release_ex(extra_cmd, 0);
3703 }
3704
3705 if (to_r != to) {
3706 efree(to_r);
3707 }
3708 if (subject_buf) {
3709 efree((void *)subject_buf);
3710 }
3711 if (message_buf) {
3712 efree((void *)message_buf);
3713 }
3714 mbfl_memory_device_clear(&device);
3715 zend_hash_destroy(&ht_headers);
3716 if (str_headers) {
3717 zend_string_release_ex(str_headers, 0);
3718 }
3719 }
3720
3721 #undef SKIP_LONG_HEADER_SEP_MBSTRING
3722 #undef CRLF
3723 #undef MAIL_ASCIIZ_CHECK_MBSTRING
3724 #undef PHP_MBSTR_MAIL_MIME_HEADER1
3725 #undef PHP_MBSTR_MAIL_MIME_HEADER2
3726 #undef PHP_MBSTR_MAIL_MIME_HEADER3
3727 #undef PHP_MBSTR_MAIL_MIME_HEADER4
3728 /* }}} */
3729
3730 /* {{{ Returns the current settings of mbstring */
PHP_FUNCTION(mb_get_info)3731 PHP_FUNCTION(mb_get_info)
3732 {
3733 char *typ = NULL;
3734 size_t typ_len;
3735 size_t n;
3736 char *name;
3737 zval row;
3738 const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
3739 const mbfl_encoding **entry;
3740
3741 ZEND_PARSE_PARAMETERS_START(0, 1)
3742 Z_PARAM_OPTIONAL
3743 Z_PARAM_STRING(typ, typ_len)
3744 ZEND_PARSE_PARAMETERS_END();
3745
3746 if (!typ || !strcasecmp("all", typ)) {
3747 array_init(return_value);
3748 if (MBSTRG(current_internal_encoding)) {
3749 add_assoc_string(return_value, "internal_encoding", (char *)MBSTRG(current_internal_encoding)->name);
3750 }
3751 if (MBSTRG(http_input_identify)) {
3752 add_assoc_string(return_value, "http_input", (char *)MBSTRG(http_input_identify)->name);
3753 }
3754 if (MBSTRG(current_http_output_encoding)) {
3755 add_assoc_string(return_value, "http_output", (char *)MBSTRG(current_http_output_encoding)->name);
3756 }
3757 if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes") - 1, 0)) != NULL) {
3758 add_assoc_string(return_value, "http_output_conv_mimetypes", name);
3759 }
3760 if (lang != NULL) {
3761 if ((name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
3762 add_assoc_string(return_value, "mail_charset", name);
3763 }
3764 if ((name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
3765 add_assoc_string(return_value, "mail_header_encoding", name);
3766 }
3767 if ((name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
3768 add_assoc_string(return_value, "mail_body_encoding", name);
3769 }
3770 }
3771 add_assoc_long(return_value, "illegal_chars", MBSTRG(illegalchars));
3772 if (MBSTRG(encoding_translation)) {
3773 add_assoc_string(return_value, "encoding_translation", "On");
3774 } else {
3775 add_assoc_string(return_value, "encoding_translation", "Off");
3776 }
3777 if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
3778 add_assoc_string(return_value, "language", name);
3779 }
3780 n = MBSTRG(current_detect_order_list_size);
3781 entry = MBSTRG(current_detect_order_list);
3782 if (n > 0) {
3783 size_t i;
3784 array_init(&row);
3785 for (i = 0; i < n; i++) {
3786 add_next_index_string(&row, (*entry)->name);
3787 entry++;
3788 }
3789 add_assoc_zval(return_value, "detect_order", &row);
3790 }
3791 if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
3792 add_assoc_string(return_value, "substitute_character", "none");
3793 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
3794 add_assoc_string(return_value, "substitute_character", "long");
3795 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
3796 add_assoc_string(return_value, "substitute_character", "entity");
3797 } else {
3798 add_assoc_long(return_value, "substitute_character", MBSTRG(current_filter_illegal_substchar));
3799 }
3800 if (MBSTRG(strict_detection)) {
3801 add_assoc_string(return_value, "strict_detection", "On");
3802 } else {
3803 add_assoc_string(return_value, "strict_detection", "Off");
3804 }
3805 } else if (!strcasecmp("internal_encoding", typ)) {
3806 if (MBSTRG(current_internal_encoding)) {
3807 RETVAL_STRING((char *)MBSTRG(current_internal_encoding)->name);
3808 }
3809 } else if (!strcasecmp("http_input", typ)) {
3810 if (MBSTRG(http_input_identify)) {
3811 RETVAL_STRING((char *)MBSTRG(http_input_identify)->name);
3812 }
3813 } else if (!strcasecmp("http_output", typ)) {
3814 if (MBSTRG(current_http_output_encoding)) {
3815 RETVAL_STRING((char *)MBSTRG(current_http_output_encoding)->name);
3816 }
3817 } else if (!strcasecmp("http_output_conv_mimetypes", typ)) {
3818 if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes") - 1, 0)) != NULL) {
3819 RETVAL_STRING(name);
3820 }
3821 } else if (!strcasecmp("mail_charset", typ)) {
3822 if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
3823 RETVAL_STRING(name);
3824 }
3825 } else if (!strcasecmp("mail_header_encoding", typ)) {
3826 if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
3827 RETVAL_STRING(name);
3828 }
3829 } else if (!strcasecmp("mail_body_encoding", typ)) {
3830 if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
3831 RETVAL_STRING(name);
3832 }
3833 } else if (!strcasecmp("illegal_chars", typ)) {
3834 RETVAL_LONG(MBSTRG(illegalchars));
3835 } else if (!strcasecmp("encoding_translation", typ)) {
3836 if (MBSTRG(encoding_translation)) {
3837 RETVAL_STRING("On");
3838 } else {
3839 RETVAL_STRING("Off");
3840 }
3841 } else if (!strcasecmp("language", typ)) {
3842 if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
3843 RETVAL_STRING(name);
3844 }
3845 } else if (!strcasecmp("detect_order", typ)) {
3846 n = MBSTRG(current_detect_order_list_size);
3847 entry = MBSTRG(current_detect_order_list);
3848 if (n > 0) {
3849 size_t i;
3850 array_init(return_value);
3851 for (i = 0; i < n; i++) {
3852 add_next_index_string(return_value, (*entry)->name);
3853 entry++;
3854 }
3855 }
3856 } else if (!strcasecmp("substitute_character", typ)) {
3857 if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
3858 RETVAL_STRING("none");
3859 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
3860 RETVAL_STRING("long");
3861 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
3862 RETVAL_STRING("entity");
3863 } else {
3864 RETVAL_LONG(MBSTRG(current_filter_illegal_substchar));
3865 }
3866 } else if (!strcasecmp("strict_detection", typ)) {
3867 if (MBSTRG(strict_detection)) {
3868 RETVAL_STRING("On");
3869 } else {
3870 RETVAL_STRING("Off");
3871 }
3872 } else {
3873 // TODO Convert to ValueError
3874 RETURN_FALSE;
3875 }
3876 }
3877 /* }}} */
3878
3879
php_mb_init_convd(const mbfl_encoding * encoding)3880 static inline mbfl_buffer_converter *php_mb_init_convd(const mbfl_encoding *encoding)
3881 {
3882 mbfl_buffer_converter *convd;
3883
3884 convd = mbfl_buffer_converter_new(encoding, encoding, 0);
3885 if (convd == NULL) {
3886 return NULL;
3887 }
3888 mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE);
3889 mbfl_buffer_converter_illegal_substchar(convd, 0);
3890 return convd;
3891 }
3892
3893
php_mb_check_encoding_impl(mbfl_buffer_converter * convd,const char * input,size_t length,const mbfl_encoding * encoding)3894 static inline int php_mb_check_encoding_impl(mbfl_buffer_converter *convd, const char *input, size_t length, const mbfl_encoding *encoding) {
3895 mbfl_string string, result;
3896
3897 mbfl_string_init_set(&string, encoding);
3898 mbfl_string_init(&result);
3899
3900 string.val = (unsigned char *) input;
3901 string.len = length;
3902
3903 mbfl_string *ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3904 size_t illegalchars = mbfl_buffer_illegalchars(convd);
3905
3906 if (ret != NULL) {
3907 if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) {
3908 mbfl_string_clear(&result);
3909 return 1;
3910 }
3911 mbfl_string_clear(&result);
3912 }
3913 return 0;
3914 }
3915
php_mb_check_encoding(const char * input,size_t length,const mbfl_encoding * encoding)3916 MBSTRING_API int php_mb_check_encoding(const char *input, size_t length, const mbfl_encoding *encoding)
3917 {
3918 mbfl_buffer_converter *convd = php_mb_init_convd(encoding);
3919 /* If this assertion fails this means some memory allocation failure which is a bug */
3920 ZEND_ASSERT(convd != NULL);
3921
3922 int result = php_mb_check_encoding_impl(convd, input, length, encoding);
3923 mbfl_buffer_converter_delete(convd);
3924 return result;
3925 }
3926
php_mb_check_encoding_recursive(HashTable * vars,const mbfl_encoding * encoding)3927 static int php_mb_check_encoding_recursive(HashTable *vars, const mbfl_encoding *encoding)
3928 {
3929 mbfl_buffer_converter *convd;
3930 zend_long idx;
3931 zend_string *key;
3932 zval *entry;
3933 int valid = 1;
3934
3935 (void)(idx);
3936
3937 convd = php_mb_init_convd(encoding);
3938 /* If this assertion fails this means some memory allocation failure which is a bug */
3939 ZEND_ASSERT(convd != NULL);
3940
3941 if (GC_IS_RECURSIVE(vars)) {
3942 mbfl_buffer_converter_delete(convd);
3943 php_error_docref(NULL, E_WARNING, "Cannot not handle circular references");
3944 return 0;
3945 }
3946 GC_TRY_PROTECT_RECURSION(vars);
3947 ZEND_HASH_FOREACH_KEY_VAL(vars, idx, key, entry) {
3948 ZVAL_DEREF(entry);
3949 if (key) {
3950 if (!php_mb_check_encoding_impl(convd, ZSTR_VAL(key), ZSTR_LEN(key), encoding)) {
3951 valid = 0;
3952 break;
3953 }
3954 }
3955 switch (Z_TYPE_P(entry)) {
3956 case IS_STRING:
3957 if (!php_mb_check_encoding_impl(convd, Z_STRVAL_P(entry), Z_STRLEN_P(entry), encoding)) {
3958 valid = 0;
3959 break;
3960 }
3961 break;
3962 case IS_ARRAY:
3963 if (!php_mb_check_encoding_recursive(Z_ARRVAL_P(entry), encoding)) {
3964 valid = 0;
3965 break;
3966 }
3967 break;
3968 case IS_LONG:
3969 case IS_DOUBLE:
3970 case IS_NULL:
3971 case IS_TRUE:
3972 case IS_FALSE:
3973 break;
3974 default:
3975 /* Other types are error. */
3976 valid = 0;
3977 break;
3978 }
3979 } ZEND_HASH_FOREACH_END();
3980 GC_TRY_UNPROTECT_RECURSION(vars);
3981 mbfl_buffer_converter_delete(convd);
3982 return valid;
3983 }
3984
3985
3986 /* {{{ Check if the string is valid for the specified encoding */
PHP_FUNCTION(mb_check_encoding)3987 PHP_FUNCTION(mb_check_encoding)
3988 {
3989 zend_string *input_str = NULL, *enc = NULL;
3990 HashTable *input_ht = NULL;
3991 const mbfl_encoding *encoding;
3992
3993 ZEND_PARSE_PARAMETERS_START(0, 2)
3994 Z_PARAM_OPTIONAL
3995 Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(input_ht, input_str)
3996 Z_PARAM_STR_OR_NULL(enc)
3997 ZEND_PARSE_PARAMETERS_END();
3998
3999 encoding = php_mb_get_encoding(enc, 2);
4000 if (!encoding) {
4001 RETURN_THROWS();
4002 }
4003
4004 if (input_ht) {
4005 RETURN_BOOL(php_mb_check_encoding_recursive(input_ht, encoding));
4006 } else if (input_str) {
4007 RETURN_BOOL(php_mb_check_encoding(ZSTR_VAL(input_str), ZSTR_LEN(input_str), encoding));
4008 } else {
4009 /* FIXME: Actually check all inputs, except $_FILES file content. */
4010 RETURN_BOOL(MBSTRG(illegalchars) == 0);
4011 }
4012 }
4013 /* }}} */
4014
4015
php_mb_ord(const char * str,size_t str_len,zend_string * enc_name,const uint32_t enc_name_arg_num)4016 static inline zend_long php_mb_ord(const char *str, size_t str_len, zend_string *enc_name,
4017 const uint32_t enc_name_arg_num)
4018 {
4019 const mbfl_encoding *enc;
4020 enum mbfl_no_encoding no_enc;
4021
4022 ZEND_ASSERT(str_len > 0);
4023
4024 enc = php_mb_get_encoding(enc_name, enc_name_arg_num);
4025 if (!enc) {
4026 return -2;
4027 }
4028
4029 no_enc = enc->no_encoding;
4030 if (php_mb_is_unsupported_no_encoding(no_enc)) {
4031 zend_value_error("mb_ord() does not support the \"%s\" encoding", enc->name);
4032 return -2;
4033 }
4034
4035 {
4036 mbfl_wchar_device dev;
4037 mbfl_convert_filter *filter;
4038 zend_long cp;
4039
4040 mbfl_wchar_device_init(&dev);
4041 filter = mbfl_convert_filter_new(enc, &mbfl_encoding_wchar, mbfl_wchar_device_output, 0, &dev);
4042 /* If this assertion fails this means some memory allocation failure which is a bug */
4043 ZEND_ASSERT(filter != NULL);
4044
4045 mbfl_convert_filter_feed_string(filter, (unsigned char*)str, str_len);
4046 mbfl_convert_filter_flush(filter);
4047
4048 if (dev.pos < 1 || filter->num_illegalchar || dev.buffer[0] >= MBFL_WCSGROUP_UCS4MAX) {
4049 mbfl_convert_filter_delete(filter);
4050 mbfl_wchar_device_clear(&dev);
4051 return -1;
4052 }
4053
4054 cp = dev.buffer[0];
4055 mbfl_convert_filter_delete(filter);
4056 mbfl_wchar_device_clear(&dev);
4057 return cp;
4058 }
4059 }
4060
4061
4062 /* {{{ */
PHP_FUNCTION(mb_ord)4063 PHP_FUNCTION(mb_ord)
4064 {
4065 char *str;
4066 size_t str_len;
4067 zend_string *enc = NULL;
4068 zend_long cp;
4069
4070 ZEND_PARSE_PARAMETERS_START(1, 2)
4071 Z_PARAM_STRING(str, str_len)
4072 Z_PARAM_OPTIONAL
4073 Z_PARAM_STR_OR_NULL(enc)
4074 ZEND_PARSE_PARAMETERS_END();
4075
4076 if (str_len == 0) {
4077 zend_argument_value_error(1, "must not be empty");
4078 RETURN_THROWS();
4079 }
4080
4081 cp = php_mb_ord(str, str_len, enc, 2);
4082
4083 if (0 > cp) {
4084 if (cp == -2) {
4085 RETURN_THROWS();
4086 }
4087 RETURN_FALSE;
4088 }
4089
4090 RETURN_LONG(cp);
4091 }
4092 /* }}} */
4093
4094
php_mb_chr(zend_long cp,zend_string * enc_name,uint32_t enc_name_arg_num)4095 static inline zend_string *php_mb_chr(zend_long cp, zend_string *enc_name, uint32_t enc_name_arg_num)
4096 {
4097 const mbfl_encoding *enc;
4098 enum mbfl_no_encoding no_enc;
4099 zend_string *ret;
4100 char* buf;
4101 size_t buf_len;
4102
4103 enc = php_mb_get_encoding(enc_name, enc_name_arg_num);
4104 if (!enc) {
4105 return NULL;
4106 }
4107
4108 no_enc = enc->no_encoding;
4109 if (php_mb_is_unsupported_no_encoding(no_enc)) {
4110 zend_value_error("mb_chr() does not support the \"%s\" encoding", enc->name);
4111 return NULL;
4112 }
4113
4114 if (cp < 0 || cp > 0x10ffff) {
4115 return NULL;
4116 }
4117
4118 if (php_mb_is_no_encoding_utf8(no_enc)) {
4119 if (cp > 0xd7ff && 0xe000 > cp) {
4120 return NULL;
4121 }
4122
4123 if (cp < 0x80) {
4124 ret = ZSTR_CHAR(cp);
4125 } else if (cp < 0x800) {
4126 ret = zend_string_alloc(2, 0);
4127 ZSTR_VAL(ret)[0] = 0xc0 | (cp >> 6);
4128 ZSTR_VAL(ret)[1] = 0x80 | (cp & 0x3f);
4129 ZSTR_VAL(ret)[2] = 0;
4130 } else if (cp < 0x10000) {
4131 ret = zend_string_alloc(3, 0);
4132 ZSTR_VAL(ret)[0] = 0xe0 | (cp >> 12);
4133 ZSTR_VAL(ret)[1] = 0x80 | ((cp >> 6) & 0x3f);
4134 ZSTR_VAL(ret)[2] = 0x80 | (cp & 0x3f);
4135 ZSTR_VAL(ret)[3] = 0;
4136 } else {
4137 ret = zend_string_alloc(4, 0);
4138 ZSTR_VAL(ret)[0] = 0xf0 | (cp >> 18);
4139 ZSTR_VAL(ret)[1] = 0x80 | ((cp >> 12) & 0x3f);
4140 ZSTR_VAL(ret)[2] = 0x80 | ((cp >> 6) & 0x3f);
4141 ZSTR_VAL(ret)[3] = 0x80 | (cp & 0x3f);
4142 ZSTR_VAL(ret)[4] = 0;
4143 }
4144
4145 return ret;
4146 }
4147
4148 buf_len = 4;
4149 buf = (char *) emalloc(buf_len + 1);
4150 buf[0] = (cp >> 24) & 0xff;
4151 buf[1] = (cp >> 16) & 0xff;
4152 buf[2] = (cp >> 8) & 0xff;
4153 buf[3] = cp & 0xff;
4154 buf[4] = 0;
4155
4156 char *ret_str;
4157 size_t ret_len;
4158 long orig_illegalchars = MBSTRG(illegalchars);
4159 MBSTRG(illegalchars) = 0;
4160 ret_str = php_mb_convert_encoding_ex(buf, buf_len, enc, &mbfl_encoding_ucs4be, &ret_len);
4161 if (MBSTRG(illegalchars) != 0) {
4162 efree(buf);
4163 efree(ret_str);
4164 MBSTRG(illegalchars) = orig_illegalchars;
4165 return NULL;
4166 }
4167
4168 ret = zend_string_init(ret_str, ret_len, 0);
4169 efree(ret_str);
4170 MBSTRG(illegalchars) = orig_illegalchars;
4171
4172 efree(buf);
4173 return ret;
4174 }
4175
4176
4177 /* {{{ */
PHP_FUNCTION(mb_chr)4178 PHP_FUNCTION(mb_chr)
4179 {
4180 zend_long cp;
4181 zend_string *enc = NULL;
4182
4183 ZEND_PARSE_PARAMETERS_START(1, 2)
4184 Z_PARAM_LONG(cp)
4185 Z_PARAM_OPTIONAL
4186 Z_PARAM_STR_OR_NULL(enc)
4187 ZEND_PARSE_PARAMETERS_END();
4188
4189 zend_string* ret = php_mb_chr(cp, enc, 2);
4190 if (ret == NULL) {
4191 RETURN_FALSE;
4192 }
4193
4194 RETURN_STR(ret);
4195 }
4196 /* }}} */
4197
4198 /* {{{ */
PHP_FUNCTION(mb_scrub)4199 PHP_FUNCTION(mb_scrub)
4200 {
4201 char* str;
4202 size_t str_len;
4203 zend_string *enc_name = NULL;
4204
4205 ZEND_PARSE_PARAMETERS_START(1, 2)
4206 Z_PARAM_STRING(str, str_len)
4207 Z_PARAM_OPTIONAL
4208 Z_PARAM_STR_OR_NULL(enc_name)
4209 ZEND_PARSE_PARAMETERS_END();
4210
4211 const mbfl_encoding *enc = php_mb_get_encoding(enc_name, 2);
4212 if (!enc) {
4213 RETURN_THROWS();
4214 }
4215
4216 size_t ret_len;
4217 char *ret = php_mb_convert_encoding_ex(str, str_len, enc, enc, &ret_len);
4218
4219 RETVAL_STRINGL(ret, ret_len);
4220 efree(ret);
4221 }
4222 /* }}} */
4223
4224
4225 /* {{{ php_mb_populate_current_detect_order_list */
php_mb_populate_current_detect_order_list(void)4226 static void php_mb_populate_current_detect_order_list(void)
4227 {
4228 const mbfl_encoding **entry = 0;
4229 size_t nentries;
4230
4231 if (MBSTRG(detect_order_list) && MBSTRG(detect_order_list_size)) {
4232 nentries = MBSTRG(detect_order_list_size);
4233 entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
4234 memcpy(ZEND_VOIDP(entry), MBSTRG(detect_order_list), sizeof(mbfl_encoding*) * nentries);
4235 } else {
4236 const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
4237 size_t i;
4238 nentries = MBSTRG(default_detect_order_list_size);
4239 entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
4240 for (i = 0; i < nentries; i++) {
4241 entry[i] = mbfl_no2encoding(src[i]);
4242 }
4243 }
4244 MBSTRG(current_detect_order_list) = entry;
4245 MBSTRG(current_detect_order_list_size) = nentries;
4246 }
4247 /* }}} */
4248
4249 /* {{{ static int php_mb_encoding_translation() */
php_mb_encoding_translation(void)4250 static int php_mb_encoding_translation(void)
4251 {
4252 return MBSTRG(encoding_translation);
4253 }
4254 /* }}} */
4255
4256 /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes_ex() */
php_mb_mbchar_bytes_ex(const char * s,const mbfl_encoding * enc)4257 MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc)
4258 {
4259 if (enc != NULL) {
4260 if (enc->flag & MBFL_ENCTYPE_MBCS) {
4261 if (enc->mblen_table != NULL) {
4262 if (s != NULL) return enc->mblen_table[*(unsigned char *)s];
4263 }
4264 } else if (enc->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
4265 return 2;
4266 } else if (enc->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
4267 return 4;
4268 }
4269 }
4270 return 1;
4271 }
4272 /* }}} */
4273
4274 /* {{{ MBSTRING_API size_t php_mb_mbchar_bytes() */
php_mb_mbchar_bytes(const char * s)4275 MBSTRING_API size_t php_mb_mbchar_bytes(const char *s)
4276 {
4277 return php_mb_mbchar_bytes_ex(s, MBSTRG(internal_encoding));
4278 }
4279 /* }}} */
4280
4281 /* {{{ MBSTRING_API char *php_mb_safe_strrchr_ex() */
php_mb_safe_strrchr_ex(const char * s,unsigned int c,size_t nbytes,const mbfl_encoding * enc)4282 MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc)
4283 {
4284 register const char *p = s;
4285 char *last=NULL;
4286
4287 if (nbytes == (size_t)-1) {
4288 size_t nb = 0;
4289
4290 while (*p != '\0') {
4291 if (nb == 0) {
4292 if ((unsigned char)*p == (unsigned char)c) {
4293 last = (char *)p;
4294 }
4295 nb = php_mb_mbchar_bytes_ex(p, enc);
4296 if (nb == 0) {
4297 return NULL; /* something is going wrong! */
4298 }
4299 }
4300 --nb;
4301 ++p;
4302 }
4303 } else {
4304 register size_t bcnt = nbytes;
4305 register size_t nbytes_char;
4306 while (bcnt > 0) {
4307 if ((unsigned char)*p == (unsigned char)c) {
4308 last = (char *)p;
4309 }
4310 nbytes_char = php_mb_mbchar_bytes_ex(p, enc);
4311 if (bcnt < nbytes_char) {
4312 return NULL;
4313 }
4314 p += nbytes_char;
4315 bcnt -= nbytes_char;
4316 }
4317 }
4318 return last;
4319 }
4320 /* }}} */
4321
4322 /* {{{ MBSTRING_API char *php_mb_safe_strrchr() */
php_mb_safe_strrchr(const char * s,unsigned int c,size_t nbytes)4323 MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes)
4324 {
4325 return php_mb_safe_strrchr_ex(s, c, nbytes, MBSTRG(internal_encoding));
4326 }
4327 /* }}} */
4328
4329 /* {{{ MBSTRING_API int php_mb_stripos() */
php_mb_stripos(int mode,const char * old_haystack,size_t old_haystack_len,const char * old_needle,size_t old_needle_len,zend_long offset,const mbfl_encoding * enc)4330 MBSTRING_API size_t php_mb_stripos(int mode, const char *old_haystack, size_t old_haystack_len, const char *old_needle, size_t old_needle_len, zend_long offset, const mbfl_encoding *enc)
4331 {
4332 size_t n = (size_t) -1;
4333 mbfl_string haystack, needle;
4334
4335 mbfl_string_init_set(&haystack, enc);
4336 mbfl_string_init_set(&needle, enc);
4337
4338 do {
4339 /* We're using simple case-folding here, because we'd have to deal with remapping of
4340 * offsets otherwise. */
4341
4342 size_t len = 0;
4343 haystack.val = (unsigned char *)mbstring_convert_case(PHP_UNICODE_CASE_FOLD_SIMPLE, (char *)old_haystack, old_haystack_len, &len, enc);
4344 haystack.len = len;
4345
4346 if (!haystack.val) {
4347 break;
4348 }
4349
4350 if (haystack.len == 0) {
4351 break;
4352 }
4353
4354 needle.val = (unsigned char *)mbstring_convert_case(PHP_UNICODE_CASE_FOLD_SIMPLE, (char *)old_needle, old_needle_len, &len, enc);
4355 needle.len = len;
4356
4357 if (!needle.val) {
4358 break;
4359 }
4360
4361 n = mbfl_strpos(&haystack, &needle, offset, mode);
4362 } while(0);
4363
4364 if (haystack.val) {
4365 efree(haystack.val);
4366 }
4367
4368 if (needle.val) {
4369 efree(needle.val);
4370 }
4371
4372 return n;
4373 }
4374 /* }}} */
4375
php_mb_gpc_get_detect_order(const zend_encoding *** list,size_t * list_size)4376 static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size) /* {{{ */
4377 {
4378 *list = (const zend_encoding **)MBSTRG(http_input_list);
4379 *list_size = MBSTRG(http_input_list_size);
4380 }
4381 /* }}} */
4382
php_mb_gpc_set_input_encoding(const zend_encoding * encoding)4383 static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding) /* {{{ */
4384 {
4385 MBSTRG(http_input_identify) = (const mbfl_encoding*)encoding;
4386 }
4387 /* }}} */
4388