1--TEST--
2mb_str_split() tests UTF-8 illegal chars & UTF-16 surrogate pairs
3--EXTENSIONS--
4mbstring
5--INI--
6output_handler=
7--FILE--
8<?php
9ini_set('include_path','.');
10include_once('common.inc');
11
12/* 123 string and 4-bytes length character 0xf09280a9 */
13$utf8 = pack("H*", "313233f09280a9");
14
15/* 123 string and 4-bytes length character 0xf09280a9 head without tail */
16$utf8_bad = pack("H*", "313233f092");
17
18/* very first and very last utf-16 4-bytes characters  */
19$utf16_first_be = pack("H*", "d800dc00");
20$utf16_first_le = pack("H*", "00d800dc");
21
22$utf16_last_be = pack("H*", "dbffdfff");
23$utf16_last_le = pack("H*", "ffdbffdf");
24$utf16be_char_bad = pack("H*", "dc00dc00"); /* this char is illegal because it starts from low surrogate char */
25$utf16le_char_bad = pack("H*", "00dc00dc"); /* this char is illegal because it starts from low surrogate char */
26
27
28$utf16be = $utf16_first_be . $utf16_last_be;
29$utf16le = $utf16_first_le . $utf16_last_le;
30
31$utf16be_bad = $utf16_first_be . $utf16be_char_bad;
32$utf16le_bad = $utf16_first_le . $utf16le_char_bad;
33
34/* print each chunk as HEX string */
35echo "UTF-8:";
36foreach(mb_str_split($utf8, 2) as $chunk){
37    printf(" l:%d v:%s", strlen($chunk), unpack("H*", $chunk)[1]);
38}
39echo PHP_EOL;
40
41echo "BAD UTF-8:";
42foreach(mb_str_split($utf8_bad, 2) as $chunk){
43    printf(" l:%d v:%s", strlen($chunk), unpack("H*", $chunk)[1]);
44}
45echo PHP_EOL;
46
47echo "UTF-16BE:";
48foreach(mb_str_split($utf16be, 1, "UTF-16BE") as $chunk){
49    printf(" l:%d v:%s", strlen($chunk), unpack("H*", $chunk)[1]);
50}
51echo PHP_EOL;
52
53echo "UTF-16LE:";
54foreach(mb_str_split($utf16le, 1, "UTF-16LE") as $chunk){
55    printf(" l:%d v:%s", strlen($chunk), unpack("H*", $chunk)[1]);
56}
57echo PHP_EOL;
58
59echo "BAD UTF-16BE:";
60foreach(mb_str_split($utf16be_bad, 1, "UTF-16BE") as $chunk){
61    printf(" l:%d v:%s", strlen($chunk), unpack("H*", $chunk)[1]);
62}
63echo PHP_EOL;
64
65echo "BAD UTF-16LE:";
66foreach(mb_str_split($utf16le_bad, 1, "UTF-16LE") as $chunk){
67    printf(" l:%d v:%s", strlen($chunk), unpack("H*", $chunk)[1]);
68}
69echo PHP_EOL;
70
71var_dump(mb_str_split("", 1, "ASCII"));
72var_dump(mb_str_split("", 1, "UTF-8"));
73var_dump(mb_str_split("", 1, "UTF-16LE"));
74
75?>
76--EXPECT--
77UTF-8: l:2 v:3132 l:5 v:33f09280a9
78BAD UTF-8: l:2 v:3132 l:3 v:33f092
79UTF-16BE: l:4 v:d800dc00 l:4 v:dbffdfff
80UTF-16LE: l:4 v:00d800dc l:4 v:ffdbffdf
81BAD UTF-16BE: l:4 v:d800dc00 l:2 v:003f l:2 v:003f
82BAD UTF-16LE: l:4 v:00d800dc l:2 v:3f00 l:2 v:3f00
83array(0) {
84}
85array(0) {
86}
87array(0) {
88}
89