1--TEST--
2mb_str_split() tests UTF-8 illegal chars & UTF-16 surrogate pairs
3--SKIPIF--
4<?php extension_loaded('mbstring') or die('skip mbstring not available'); ?>
5--INI--
6output_handler=
7mbstring.func_overload=0
8--FILE--
9<?php
10ini_set('include_path','.');
11include_once('common.inc');
12
13/* 123 string and 4-bytes length character 0xf09280a9 */
14$utf8 = pack("H*", "313233f09280a9");
15
16/* 123 string and 4-bytes length character 0xf09280a9 head without tail */
17$utf8_bad = pack("H*", "313233f092");
18
19/* very first and very last utf-16 4-bytes characters  */
20$utf16_first_be = pack("H*", "d800dc00");
21$utf16_first_le = pack("H*", "00d800dc");
22
23$utf16_last_be = pack("H*", "dbffdfff");
24$utf16_last_le = pack("H*", "ffdbffdf");
25$utf16be_char_bad = pack("H*", "dc00dc00"); /* this char is illegal because it starts from low surrogate char */
26$utf16le_char_bad = pack("H*", "00dc00dc"); /* this char is illegal because it starts from low surrogate char */
27
28
29$utf16be = $utf16_first_be . $utf16_last_be;
30$utf16le = $utf16_first_le . $utf16_last_le;
31
32$utf16be_bad = $utf16_first_be . $utf16be_char_bad;
33$utf16le_bad = $utf16_first_le . $utf16le_char_bad;
34
35/* print each chunk as HEX string */
36echo "UTF-8:";
37foreach(mb_str_split($utf8, 2) as $chunk){
38    printf(" l:%d v:%s", strlen($chunk), unpack("H*", $chunk)[1]);
39}
40echo PHP_EOL;
41
42echo "BAD UTF-8:";
43foreach(mb_str_split($utf8_bad, 2) as $chunk){
44    printf(" l:%d v:%s", strlen($chunk), unpack("H*", $chunk)[1]);
45}
46echo PHP_EOL;
47
48echo "UTF-16BE:";
49foreach(mb_str_split($utf16be, 1, "UTF-16BE") as $chunk){
50    printf(" l:%d v:%s", strlen($chunk), unpack("H*", $chunk)[1]);
51}
52echo PHP_EOL;
53
54echo "UTF-16LE:";
55foreach(mb_str_split($utf16le, 1, "UTF-16LE") as $chunk){
56    printf(" l:%d v:%s", strlen($chunk), unpack("H*", $chunk)[1]);
57}
58echo PHP_EOL;
59
60echo "BAD UTF-16BE:";
61foreach(mb_str_split($utf16be_bad, 1, "UTF-16BE") as $chunk){
62    printf(" l:%d v:%s", strlen($chunk), unpack("H*", $chunk)[1]);
63}
64echo PHP_EOL;
65
66echo "BAD UTF-16LE:";
67foreach(mb_str_split($utf16le_bad, 1, "UTF-16LE") as $chunk){
68    printf(" l:%d v:%s", strlen($chunk), unpack("H*", $chunk)[1]);
69}
70echo PHP_EOL;
71
72var_dump(mb_str_split("", 1, "ASCII"));
73var_dump(mb_str_split("", 1, "UTF-8"));
74var_dump(mb_str_split("", 1, "UTF-16LE"));
75
76?>
77--EXPECT--
78UTF-8: l:2 v:3132 l:5 v:33f09280a9
79BAD UTF-8: l:2 v:3132 l:3 v:33f092
80UTF-16BE: l:4 v:d800dc00 l:4 v:dbffdfff
81UTF-16LE: l:4 v:00d800dc l:4 v:ffdbffdf
82BAD UTF-16BE: l:4 v:d800dc00 l:2 v:003f l:2 v:003f
83BAD UTF-16LE: l:4 v:00d800dc l:2 v:3f00 l:2 v:3f00
84array(0) {
85}
86array(0) {
87}
88array(0) {
89}
90