xref: /php-src/ext/mbstring/tests/mb_strcut.phpt (revision cffdeb81)
1--TEST--
2mb_strcut()
3--EXTENSIONS--
4mbstring
5--INI--
6output_handler=
7--FILE--
8<?php
9function MBStringChars($string, $encoding) {
10  $chars = mb_str_split($string, 1, $encoding);
11  return '[' . implode(' ', array_map(function($char) {
12    return join(unpack('H*', $char));
13  }, $chars)) . ']';
14}
15
16ini_set('include_path', __DIR__);
17include_once('common.inc');
18
19$euc_jp = pack('H*', '30313233a4b3a4cecab8bbfacef3a4cfc6fccbdcb8eca4c7a4b9a1a34555432d4a50a4f2bbc8a4c3a4c6a4a4a4dea4b9a1a3c6fccbdcb8eca4cfccccc5ddbdada4a4a1a3');
20$utf8 = pack('H*', 'e288ae2045e28b856461203d2051'); // has 2 multi-byte characters: [e288ae 20 45 e28b85 64 61 20 3d 20 51]
21$utf16le = pack('H*', '1a043804400438043b043b04380446043004200069007300200043007900720069006c006c0069006300');
22$utf32be = mb_convert_encoding($utf8, 'UTF-32BE', 'UTF-8');
23$iso2022jp = mb_convert_encoding("漢字 abc カナ", 'ISO-2022-JP', 'UTF-8'); // [<escape sequence>1b2442 3441 3b7a <escape sequence>1b2842 20 61 62 63 20 <escape sequence>1b2442 252b 254a <escape sequence>1b2842]
24$jis = mb_convert_encoding("漢字 abc カナ", 'JIS', 'UTF-8');
25// For testing ISO-2022-JP-2004, add a Kanji character which is in JISX 0213
26$iso2022jp2004 = mb_convert_encoding("漢字 abc カナ凜", 'ISO-2022-JP-2004', 'UTF-8'); // [1b242851 3441 3b7a 1b2842 20 61 62 63 20 1b242851 252b 254a 7425 1b2842]
27$iso2022jpms = mb_convert_encoding("漢字 abc カナ", 'ISO-2022-JP-MS', 'UTF-8'); // [1b2442 3441 3b7a 1b2842 20 61 62 63 20 1b2442 252b 254a 1b2842]
28$iso2022jp_kddi = mb_convert_encoding("漢字 abc カナ", 'ISO-2022-JP-KDDI', 'UTF-8');
29$gb18030 = mb_convert_encoding("漢字 abc カナ", 'GB18030', 'UTF-8');
30
31print "== EUC-JP ==\n";
32print MBStringChars(mb_strcut($euc_jp,  6,   5, 'EUC-JP'), 'EUC-JP') . "\n";
33print MBStringChars(mb_strcut($euc_jp,  5,   5, 'EUC-JP'), 'EUC-JP') . "\n";
34print MBStringChars(mb_strcut($euc_jp,  0, 100, 'EUC-JP'), 'EUC-JP') . "\n";
35
36$str = mb_strcut($euc_jp, 100, 10, 'EUC-JP');
37($str === "") ? print "OK\n" : print "No good\n";
38
39$str = mb_strcut($euc_jp, -100, 10, 'EUC-JP');
40($str !== "") ?	print "OK\n" : print "No good\n";
41
42print "== UTF-8 ==\n";
43print MBStringChars(mb_strcut($utf8, 0, 0, 'UTF-8'), 'UTF-8') . "\n";
44print MBStringChars(mb_strcut($utf8, 0, 1, 'UTF-8'), 'UTF-8') . "\n";
45print MBStringChars(mb_strcut($utf8, 0, 2, 'UTF-8'), 'UTF-8') . "\n";
46print MBStringChars(mb_strcut($utf8, 0, 3, 'UTF-8'), 'UTF-8') . "\n";
47print MBStringChars(mb_strcut($utf8, 0, 4, 'UTF-8'), 'UTF-8') . "\n";
48print MBStringChars(mb_strcut($utf8, 0, 5, 'UTF-8'), 'UTF-8') . "\n";
49print MBStringChars(mb_strcut($utf8, 1, 2, 'UTF-8'), 'UTF-8') . "\n";
50print MBStringChars(mb_strcut($utf8, 1, 3, 'UTF-8'), 'UTF-8') . "\n";
51print MBStringChars(mb_strcut($utf8, 1, 4, 'UTF-8'), 'UTF-8') . "\n";
52
53print MBStringChars(mb_strcut('AåBäCöDü', 2, 100, 'UTF-8'), 'UTF-8') . "\n";
54
55print "== UTF-16 ==\n";
56print "Single byte: [" . bin2hex(mb_strcut("\xFF", 0, 100, 'UTF-16')) . "]\n";
57print "With from=1: [" . bin2hex(mb_strcut("\xff\x01", 1, 100, "UTF-16")) . "]\n";
58print "Bad surrogate: [" . bin2hex(mb_strcut("\xD9\xFF", 0, 100, "UTF-16")) . "]\n";
59print "Bad surrogate followed by other bytes: [" . bin2hex(mb_strcut("\xd9\x00\x12C", 0, 100, "UTF-16")) . "]\n";
60print "BE byte order mark: [" . bin2hex(mb_strcut("\xFE\xFF", 0, 100, "UTF-16")) . "]\n";
61print "LE byte order mark: [" . bin2hex(mb_strcut("\xFF\xFE", 0, 100, "UTF-16")) . "]\n";
62print "Length=0: [" . bin2hex(mb_strcut("\x00\x01\x00\x00", 1, -512, "UTF-16")) . "]\n";
63
64print "== UTF-16LE ==\n";
65print MBStringChars(mb_strcut($utf16le, 0, 0, 'UTF-16LE'), 'UTF-16LE') . "\n";
66print MBStringChars(mb_strcut($utf16le, 0, 1, 'UTF-16LE'), 'UTF-16LE') . "\n";
67print MBStringChars(mb_strcut($utf16le, 0, 2, 'UTF-16LE'), 'UTF-16LE') . "\n";
68print MBStringChars(mb_strcut($utf16le, 0, 3, 'UTF-16LE'), 'UTF-16LE') . "\n";
69print MBStringChars(mb_strcut($utf16le, 1, 2, 'UTF-16LE'), 'UTF-16LE') . "\n";
70print MBStringChars(mb_strcut($utf16le, 1, 3, 'UTF-16LE'), 'UTF-16LE') . "\n";
71print MBStringChars(mb_strcut($utf16le, 1, 4, 'UTF-16LE'), 'UTF-16LE') . "\n";
72
73print "Single byte: [" . bin2hex(mb_strcut("\xFF", 0, 100, 'UTF-16LE')) . "]\n";
74
75print "== UTF-32BE ==\n";
76print MBStringChars(mb_strcut($utf32be, 0, 3, 'UTF-32BE'), 'UTF-32BE') . "\n";
77print MBStringChars(mb_strcut($utf32be, 0, 4, 'UTF-32BE'), 'UTF-32BE') . "\n";
78print MBStringChars(mb_strcut($utf32be, 0, 5, 'UTF-32BE'), 'UTF-32BE') . "\n";
79print MBStringChars(mb_strcut($utf32be, 1, 8, 'UTF-32BE'), 'UTF-32BE') . "\n";
80print MBStringChars(mb_strcut($utf32be, 3, 9, 'UTF-32BE'), 'UTF-32BE') . "\n";
81
82print "== ISO-2022-JP ==\n";
83print MBStringChars(mb_strcut($iso2022jp, 0, 3, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n";
84print MBStringChars(mb_strcut($iso2022jp, 0, 4, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n";
85print MBStringChars(mb_strcut($iso2022jp, 0, 5, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n";
86print MBStringChars(mb_strcut($iso2022jp, 0, 6, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n";
87print MBStringChars(mb_strcut($iso2022jp, 0, 7, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n";
88print MBStringChars(mb_strcut($iso2022jp, 0, 8, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n";
89
90print MBStringChars(mb_strcut($iso2022jp, 1, 3, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n";
91print MBStringChars(mb_strcut($iso2022jp, 1, 6, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n";
92print MBStringChars(mb_strcut($iso2022jp, 1, 8, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n";
93
94print MBStringChars(mb_strcut($iso2022jp, 2, 5, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n";
95print MBStringChars(mb_strcut($iso2022jp, 5, 9, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n";
96print MBStringChars(mb_strcut($iso2022jp, 5, 11, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n";
97print MBStringChars(mb_strcut($iso2022jp, 6, 13, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n";
98print MBStringChars(mb_strcut($iso2022jp, 7, 13, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n";
99
100print MBStringChars(mb_strcut($iso2022jp, 1, 100, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n";
101print MBStringChars(mb_strcut($iso2022jp, 50, 100, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n";
102
103print "Error followed by ASCII char: [" . bin2hex(mb_strcut("\xdaK", 0, 100, "ISO-2022-JP")) . "]\n";
104
105print "== ISO-2022-JP-2004 ==\n";
106print MBStringChars(mb_strcut($iso2022jp2004, 0, 3, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n";
107print MBStringChars(mb_strcut($iso2022jp2004, 0, 4, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n";
108print MBStringChars(mb_strcut($iso2022jp2004, 0, 5, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n";
109print MBStringChars(mb_strcut($iso2022jp2004, 0, 6, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n";
110print MBStringChars(mb_strcut($iso2022jp2004, 0, 7, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n";
111print MBStringChars(mb_strcut($iso2022jp2004, 0, 8, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n";
112print MBStringChars(mb_strcut($iso2022jp2004, 0, 9, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n";
113
114print MBStringChars(mb_strcut($iso2022jp2004, 1, 3, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n";
115print MBStringChars(mb_strcut($iso2022jp2004, 1, 6, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n";
116print MBStringChars(mb_strcut($iso2022jp2004, 1, 8, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n";
117print MBStringChars(mb_strcut($iso2022jp2004, 1, 9, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n";
118
119print MBStringChars(mb_strcut($iso2022jp2004, 2, 5, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n";
120print MBStringChars(mb_strcut($iso2022jp2004, 5, 9, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n";
121print MBStringChars(mb_strcut($iso2022jp2004, 5, 11, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n";
122print MBStringChars(mb_strcut($iso2022jp2004, 6, 13, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n";
123print MBStringChars(mb_strcut($iso2022jp2004, 7, 13, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n";
124
125print MBStringChars(mb_strcut($iso2022jp2004, 1, 100, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n";
126print MBStringChars(mb_strcut($iso2022jp2004, 50, 100, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n";
127
128print "== ISO-2022-JP-MS ==\n";
129print MBStringChars(mb_strcut($iso2022jpms, 0, 3, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n";
130print MBStringChars(mb_strcut($iso2022jpms, 0, 4, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n";
131print MBStringChars(mb_strcut($iso2022jpms, 0, 5, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n";
132print MBStringChars(mb_strcut($iso2022jpms, 0, 6, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n";
133print MBStringChars(mb_strcut($iso2022jpms, 0, 7, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n";
134print MBStringChars(mb_strcut($iso2022jpms, 0, 8, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n";
135print MBStringChars(mb_strcut($iso2022jpms, 0, 9, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n";
136
137print MBStringChars(mb_strcut($iso2022jpms, 1, 3, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n";
138print MBStringChars(mb_strcut($iso2022jpms, 1, 6, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n";
139print MBStringChars(mb_strcut($iso2022jpms, 1, 8, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n";
140print MBStringChars(mb_strcut($iso2022jpms, 1, 9, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n";
141
142print MBStringChars(mb_strcut($iso2022jpms, 2, 5, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n";
143print MBStringChars(mb_strcut($iso2022jpms, 5, 9, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n";
144print MBStringChars(mb_strcut($iso2022jpms, 5, 11, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n";
145print MBStringChars(mb_strcut($iso2022jpms, 6, 13, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n";
146print MBStringChars(mb_strcut($iso2022jpms, 7, 13, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n";
147
148print MBStringChars(mb_strcut($iso2022jpms, 1, 100, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n";
149print MBStringChars(mb_strcut($iso2022jpms, 50, 100, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n";
150
151print "== JIS ==\n";
152print MBStringChars(mb_strcut($jis, 0, 3, 'JIS'), 'JIS') . "\n";
153print MBStringChars(mb_strcut($jis, 0, 4, 'JIS'), 'JIS') . "\n";
154print MBStringChars(mb_strcut($jis, 0, 5, 'JIS'), 'JIS') . "\n";
155print MBStringChars(mb_strcut($jis, 0, 6, 'JIS'), 'JIS') . "\n";
156print MBStringChars(mb_strcut($jis, 0, 7, 'JIS'), 'JIS') . "\n";
157print MBStringChars(mb_strcut($jis, 0, 8, 'JIS'), 'JIS') . "\n";
158
159print MBStringChars(mb_strcut($jis, 1, 3, 'JIS'), 'JIS') . "\n";
160print MBStringChars(mb_strcut($jis, 1, 6, 'JIS'), 'JIS') . "\n";
161print MBStringChars(mb_strcut($jis, 1, 8, 'JIS'), 'JIS') . "\n";
162
163print MBStringChars(mb_strcut($jis, 2, 5, 'JIS'), 'JIS') . "\n";
164print MBStringChars(mb_strcut($jis, 5, 9, 'JIS'), 'JIS') . "\n";
165print MBStringChars(mb_strcut($jis, 5, 11, 'JIS'), 'JIS') . "\n";
166print MBStringChars(mb_strcut($jis, 6, 13, 'JIS'), 'JIS') . "\n";
167print MBStringChars(mb_strcut($jis, 7, 13, 'JIS'), 'JIS') . "\n";
168
169print MBStringChars(mb_strcut($jis, 1, 100, 'JIS'), 'JIS') . "\n";
170print MBStringChars(mb_strcut($jis, 50, 100, 'JIS'), 'JIS') . "\n";
171
172print "0xA3: [" . bin2hex(mb_strcut("\xA3aaaaaa", 0, 100, 'JIS')) . "]\n";
173print "Bad escape sequence followed by null byte: [" . bin2hex(mb_strcut("\x1b\x00", 1, 100, "JIS")) . "]\n";
174
175print "== ISO-2022-JP-KDDI ==\n";
176print MBStringChars(mb_strcut($iso2022jp_kddi, 0, 3, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n";
177print MBStringChars(mb_strcut($iso2022jp_kddi, 0, 4, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n";
178print MBStringChars(mb_strcut($iso2022jp_kddi, 0, 5, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n";
179print MBStringChars(mb_strcut($iso2022jp_kddi, 0, 6, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n";
180print MBStringChars(mb_strcut($iso2022jp_kddi, 0, 7, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n";
181print MBStringChars(mb_strcut($iso2022jp_kddi, 0, 8, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n";
182
183print MBStringChars(mb_strcut($iso2022jp_kddi, 1, 3, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n";
184print MBStringChars(mb_strcut($iso2022jp_kddi, 1, 6, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n";
185print MBStringChars(mb_strcut($iso2022jp_kddi, 1, 8, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n";
186
187print MBStringChars(mb_strcut($iso2022jp_kddi, 2, 5, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n";
188print MBStringChars(mb_strcut($iso2022jp_kddi, 5, 9, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n";
189print MBStringChars(mb_strcut($iso2022jp_kddi, 5, 11, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n";
190print MBStringChars(mb_strcut($iso2022jp_kddi, 6, 13, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n";
191print MBStringChars(mb_strcut($iso2022jp_kddi, 7, 13, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n";
192
193print MBStringChars(mb_strcut($iso2022jp_kddi, 1, 100, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n";
194print MBStringChars(mb_strcut($iso2022jp_kddi, 50, 100, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n";
195
196print "== CP50220 ==\n";
197
198print "Single byte 0xFF: [" . bin2hex(mb_strcut("\xFF", 0, 100, 'CP50220')) . "]\n";
199print "Double byte 0xFF: [" . bin2hex(mb_strcut("\xFF\xFF", 0, 100, 'CP50220')) . "]\n";
200print "Sample string with multiple null bytes: [" . bin2hex(mb_strcut("\xCF\x00\x00\x00\x00\x00d\x00\x00", 0, 100, 'CP50220')) . "]\n";
201print "Bad escape sequence preceded by bad bytes: [" . bin2hex(mb_strcut("\xFF\xFF\x1B\x00", 0, 100, 'CP50220')) . "]\n";
202print "Good JISX 0208 sequence, but it won't fit in max number of bytes: [" . bin2hex(mb_strcut("\x1B\$BGV\x17", 0, 100, 'CP50220')) . "]\n";
203print "Bad escape sequence followed by GR kana: [" . bin2hex(mb_strcut("\x1B\$\xAC\x13", 0, 100, 'CP50220')) . "]\n";
204
205print "== UTF-7 ==\n";
206
207print "Single byte 0x01: [" . mb_strcut("\x01", 0, 100, 'UTF-7') . "]\n";
208print "UTF-16 section ends abruptly: [" . mb_strcut("+Q", 1, 100, 'UTF-7') . "]\n";
209print "UTF-16 section ends abruptly in middle of 2nd codepoint: [" . mb_strcut("+QxxC", 0, 100, 'UTF-7') . "]\n";
210print "Cutting in middle of UTF-16 section: [" . mb_strcut("+UUU", -1, 255, "UTF-7") . "]\n";
211print "Cutting in middle of UTF-16 section (2): [" . mb_strcut("+UUUU", -2, 255, "UTF-7") . "]\n";
212
213print "== UTF7-IMAP ==\n";
214
215print "Single byte 0x01: [" . mb_strcut("\x01", 0, 100, 'UTF7-IMAP') . "]\n";
216print "UTF-16 section ends abruptly: [" . mb_strcut("&Q", 1, 100, 'UTF7-IMAP') . "]\n";
217print "UTF-16 section ends abruptly in middle of 2nd codepoint: [" . mb_strcut("&QxxC", 0, 100, 'UTF7-IMAP') . "]\n";
218print "UTF-16 section is terminated improperly: [" . mb_strcut("&i6o\x83", 0, 100, 'UTF7-IMAP') . "]\n";
219
220print "== GB18030 ==\n";
221
222print "Empty string: [" . bin2hex(mb_strcut("", 0, 5, 'GB18030')) . "]\n";
223print "Empty string 2: [" . bin2hex(mb_strcut("", -2, 1, 'GB18030')) . "]\n";
224print "Empty string 3: [" . bin2hex(mb_strcut("", 0, -1, 'GB18030')) . "]\n";
225print "Invalid byte 0xF5: [" . bin2hex(mb_strcut("\xF5a", 1, 100, 'GB18030')) . "]\n";
226print "Double-byte char: [" . bin2hex(mb_strcut("\xAFw", -1, 100, "GB18030")) . "]\n";
227
228print MBStringChars(mb_strcut($gb18030, 0, 0, 'GB18030'), 'GB18030') . "\n";
229print MBStringChars(mb_strcut($gb18030, 0, 1, 'GB18030'), 'GB18030') . "\n";
230print MBStringChars(mb_strcut($gb18030, 0, 2, 'GB18030'), 'GB18030') . "\n";
231print MBStringChars(mb_strcut($gb18030, 0, 3, 'GB18030'), 'GB18030') . "\n";
232print MBStringChars(mb_strcut($gb18030, 0, 4, 'GB18030'), 'GB18030') . "\n";
233print MBStringChars(mb_strcut($gb18030, 0, 5, 'GB18030'), 'GB18030') . "\n";
234print MBStringChars(mb_strcut($gb18030, 1, 2, 'GB18030'), 'GB18030') . "\n";
235print MBStringChars(mb_strcut($gb18030, 1, 3, 'GB18030'), 'GB18030') . "\n";
236print MBStringChars(mb_strcut($gb18030, 1, 4, 'GB18030'), 'GB18030') . "\n";
237
238// U+210A is encoded using 4 bytes in GB18030
239print "Operating on 4-byte GB18030 character:\n";
240$fourbyte = mb_convert_encoding("\x21\x0A", 'GB18030', 'UTF-16BE');
241print MBStringChars(mb_strcut($fourbyte, 0, 4, 'GB18030'), 'GB18030') . "\n";
242print MBStringChars(mb_strcut($fourbyte, 1, 4, 'GB18030'), 'GB18030') . "\n";
243print MBStringChars(mb_strcut($fourbyte, 2, 4, 'GB18030'), 'GB18030') . "\n";
244print MBStringChars(mb_strcut($fourbyte, 3, 4, 'GB18030'), 'GB18030') . "\n";
245print MBStringChars(mb_strcut($fourbyte, 4, 4, 'GB18030'), 'GB18030') . "\n";
246print MBStringChars(mb_strcut($fourbyte, 1, 3, 'GB18030'), 'GB18030') . "\n";
247print MBStringChars(mb_strcut($fourbyte, 2, 3, 'GB18030'), 'GB18030') . "\n";
248print MBStringChars(mb_strcut($fourbyte, 2, 4, 'GB18030'), 'GB18030') . "\n";
249print MBStringChars(mb_strcut($fourbyte, 0, -1, 'GB18030'), 'GB18030') . "\n";
250
251print "[" . bin2hex(mb_strcut(hex2bin("84308130"), 2, null, "GB18030")) . "]\n";
252
253print "== UHC ==\n";
254
255print "Single byte 0x96: [" . bin2hex(mb_strcut("\x96", 1, 1280, "UHC")) . "]\n";
256
257print "== ASCII ==\n";
258
259print "Empty: [" . bin2hex(mb_strcut("ABC", 0, 0, "ASCII")) . "]\n";
260print "Empty: [" . bin2hex(mb_strcut("ABC", 1, 0, "ASCII")) . "]\n";
261print "Empty: [" . bin2hex(mb_strcut("ABC", 2, 0, "ASCII")) . "]\n";
262
263print "One char: [" . bin2hex(mb_strcut("ABC", 2, 1, "ASCII")) . "]\n";
264print "Two chars: [" . bin2hex(mb_strcut("ABC", 1, 2, "ASCII")) . "]\n";
265print "Two chars: [" . bin2hex(mb_strcut("ABC", 1, 3, "ASCII")) . "]\n";
266
267print "== UCS-2BE ==\n";
268
269print "Empty: [" . bin2hex(mb_strcut("\x00A\x00B\x00C", 0, 0, "UCS-2BE")) . "]\n";
270print "Empty: [" . bin2hex(mb_strcut("\x00A\x00B\x00C", 1, 0, "UCS-2BE")) . "]\n";
271print "Empty: [" . bin2hex(mb_strcut("\x00A\x00B\x00C", 2, 0, "UCS-2BE")) . "]\n";
272
273print "Empty: [" . bin2hex(mb_strcut("\x00A\x00B\x00C", 2, 1, "UCS-2BE")) . "]\n";
274print "One char: [" . bin2hex(mb_strcut("\x00A\x00B\x00C", 1, 2, "UCS-2BE")) . "]\n";
275print "Cut in middle of following char: [" . bin2hex(mb_strcut("\x00A\x00B\x00C", 1, 3, "UCS-2BE")) . "]\n";
276print "Two chars: [" . bin2hex(mb_strcut("\x00A\x00B\x00C", 1, 4, "UCS-2BE")) . "]\n";
277
278print "== UCS-4BE ==\n";
279
280print "From 1, Length 5: [" . bin2hex(mb_strcut("\x00\x00\x00\x41\x00\x00\x00\x42", 1, 5, "UCS-4BE")) . "]\n";
281print "From 1, Length 6: [" . bin2hex(mb_strcut("\x00\x00\x00\x41\x00\x00\x00\x42", 1, 6, "UCS-4BE")) . "]\n";
282print "From 1, Length 8: [" . bin2hex(mb_strcut("\x00\x00\x00\x41\x00\x00\x00\x42", 1, 8, "UCS-4BE")) . "]\n";
283
284?>
285--EXPECT--
286== EUC-JP ==
287[a4ce cab8]
288[a4b3 a4ce]
289[30 31 32 33 a4b3 a4ce cab8 bbfa cef3 a4cf c6fc cbdc b8ec a4c7 a4b9 a1a3 45 55 43 2d 4a 50 a4f2 bbc8 a4c3 a4c6 a4a4 a4de a4b9 a1a3 c6fc cbdc b8ec a4cf cccc c5dd bdad a4a4 a1a3]
290OK
291OK
292== UTF-8 ==
293[]
294[]
295[]
296[e288ae]
297[e288ae 20]
298[e288ae 20 45]
299[]
300[e288ae]
301[e288ae 20]
302[c3a5 42 c3a4 43 c3b6 44 c3bc]
303== UTF-16 ==
304Single byte: []
305With from=1: []
306Bad surrogate: []
307Bad surrogate followed by other bytes: [d9001243]
308BE byte order mark: []
309LE byte order mark: []
310Length=0: []
311== UTF-16LE ==
312[]
313[]
314[1a04]
315[1a04]
316[1a04]
317[1a04]
318[1a04 3804]
319Single byte: []
320== UTF-32BE ==
321[]
322[0000222e]
323[0000222e]
324[0000222e 00000020]
325[0000222e 00000020]
326== ISO-2022-JP ==
327[]
328[]
329[]
330[]
331[]
332[1b244234411b2842]
333[]
334[]
335[1b244234411b2842]
336[]
337[1b24423b7a1b2842 20]
338[1b24423b7a1b2842 20 61 62]
339[1b24423b7a1b2842 20 61 62 63 20]
340[20 61 62 63 20 1b2442252b1b2842]
341[1b244234411b2842 1b24423b7a1b2842 20 61 62 63 20 1b2442252b1b2842]
342[]
343Error followed by ASCII char: [4b]
344== ISO-2022-JP-2004 ==
345[]
346[]
347[]
348[]
349[]
350[]
351[1b24285134411b2842]
352[]
353[]
354[]
355[1b24285134411b2842]
356[]
357[1b24285134411b2842]
358[1b24285134411b2842 1b2428513b7a1b2842]
359[1b2428513b7a1b2842 20 61 62 63]
360[1b2428513b7a1b2842 20 61 62 63]
361[1b24285134411b2842 1b2428513b7a1b2842 20 61 62 63 20 1b242851252b1b2842 1b242851254a1b2842]
362[]
363== ISO-2022-JP-MS ==
364[]
365[]
366[]
367[]
368[]
369[1b244234411b2842]
370[1b244234411b2842]
371[]
372[]
373[1b244234411b2842]
374[1b244234411b2842]
375[]
376[1b24423b7a1b2842 20]
377[1b24423b7a1b2842 20 61 62]
378[1b24423b7a1b2842 20 61 62 63 20]
379[20 61 62 63 20 1b2442252b1b2842]
380[1b244234411b2842 1b24423b7a1b2842 20 61 62 63 20 1b2442252b1b2842]
381[]
382== JIS ==
383[]
384[]
385[]
386[]
387[]
388[1b244234411b2842]
389[]
390[]
391[1b244234411b2842]
392[]
393[1b24423b7a1b2842 20]
394[1b24423b7a1b2842 20 61 62]
395[1b24423b7a1b2842 20 61 62 63 20]
396[20 61 62 63 20 1b2442252b1b2842]
397[1b244234411b2842 1b24423b7a1b2842 20 61 62 63 20 1b2442252b1b2842]
398[]
3990xA3: []
400Bad escape sequence followed by null byte: []
401== ISO-2022-JP-KDDI ==
402[]
403[]
404[]
405[]
406[]
407[1b244234411b2842]
408[]
409[]
410[1b244234411b2842]
411[]
412[1b24423b7a1b2842 20]
413[1b24423b7a1b2842 20 61 62]
414[1b24423b7a1b2842 20 61 62 63 20]
415[20 61 62 63 20 1b2442252b1b2842]
416[1b244234411b2842 1b24423b7a1b2842 20 61 62 63 20 1b2442252b1b2842]
417[]
418== CP50220 ==
419Single byte 0xFF: []
420Double byte 0xFF: [3f]
421Sample string with multiple null bytes: [1b2442255e001b2842]
422Bad escape sequence preceded by bad bytes: [3f3f3f00]
423Good JISX 0208 sequence, but it won't fit in max number of bytes: []
424Bad escape sequence followed by GR kana: []
425== UTF-7 ==
426Single byte 0x01: []
427UTF-16 section ends abruptly: []
428UTF-16 section ends abruptly in middle of 2nd codepoint: [+Qxw-]
429Cutting in middle of UTF-16 section: []
430Cutting in middle of UTF-16 section (2): []
431== UTF7-IMAP ==
432Single byte 0x01: [?]
433UTF-16 section ends abruptly: []
434UTF-16 section ends abruptly in middle of 2nd codepoint: []
435UTF-16 section is terminated improperly: []
436== GB18030 ==
437Empty string: []
438Empty string 2: []
439Empty string 3: []
440Invalid byte 0xF5: []
441Double-byte char: []
442[]
443[]
444[9d68]
445[9d68]
446[9d68 d7d6]
447[9d68 d7d6 20]
448[9d68]
449[9d68]
450[9d68 d7d6]
451Operating on 4-byte GB18030 character:
452[8136bc32]
453[]
454[]
455[]
456[]
457[]
458[]
459[]
460[]
461[]
462== UHC ==
463Single byte 0x96: [96]
464== ASCII ==
465Empty: []
466Empty: []
467Empty: []
468One char: [43]
469Two chars: [4243]
470Two chars: [4243]
471== UCS-2BE ==
472Empty: []
473Empty: []
474Empty: []
475Empty: []
476One char: [0041]
477Cut in middle of following char: [0041]
478Two chars: [00410042]
479== UCS-4BE ==
480From 1, Length 5: [00000041]
481From 1, Length 6: [00000041]
482From 1, Length 8: [0000004100000042]
483