1--TEST-- 2mb_strcut() 3--EXTENSIONS-- 4mbstring 5--INI-- 6output_handler= 7--FILE-- 8<?php 9function MBStringChars($string, $encoding) { 10 $chars = mb_str_split($string, 1, $encoding); 11 return '[' . implode(' ', array_map(function($char) { 12 return join(unpack('H*', $char)); 13 }, $chars)) . ']'; 14} 15 16ini_set('include_path', __DIR__); 17include_once('common.inc'); 18 19$euc_jp = pack('H*', '30313233a4b3a4cecab8bbfacef3a4cfc6fccbdcb8eca4c7a4b9a1a34555432d4a50a4f2bbc8a4c3a4c6a4a4a4dea4b9a1a3c6fccbdcb8eca4cfccccc5ddbdada4a4a1a3'); 20$utf8 = pack('H*', 'e288ae2045e28b856461203d2051'); // has 2 multi-byte characters: [e288ae 20 45 e28b85 64 61 20 3d 20 51] 21$utf16le = pack('H*', '1a043804400438043b043b04380446043004200069007300200043007900720069006c006c0069006300'); 22$utf32be = mb_convert_encoding($utf8, 'UTF-32BE', 'UTF-8'); 23$iso2022jp = mb_convert_encoding("漢字 abc カナ", 'ISO-2022-JP', 'UTF-8'); // [<escape sequence>1b2442 3441 3b7a <escape sequence>1b2842 20 61 62 63 20 <escape sequence>1b2442 252b 254a <escape sequence>1b2842] 24$jis = mb_convert_encoding("漢字 abc カナ", 'JIS', 'UTF-8'); 25// For testing ISO-2022-JP-2004, add a Kanji character which is in JISX 0213 26$iso2022jp2004 = mb_convert_encoding("漢字 abc カナ凜", 'ISO-2022-JP-2004', 'UTF-8'); // [1b242851 3441 3b7a 1b2842 20 61 62 63 20 1b242851 252b 254a 7425 1b2842] 27$iso2022jpms = mb_convert_encoding("漢字 abc カナ", 'ISO-2022-JP-MS', 'UTF-8'); // [1b2442 3441 3b7a 1b2842 20 61 62 63 20 1b2442 252b 254a 1b2842] 28$iso2022jp_kddi = mb_convert_encoding("漢字 abc カナ", 'ISO-2022-JP-KDDI', 'UTF-8'); 29$gb18030 = mb_convert_encoding("漢字 abc カナ", 'GB18030', 'UTF-8'); 30 31print "== EUC-JP ==\n"; 32print MBStringChars(mb_strcut($euc_jp, 6, 5, 'EUC-JP'), 'EUC-JP') . "\n"; 33print MBStringChars(mb_strcut($euc_jp, 5, 5, 'EUC-JP'), 'EUC-JP') . "\n"; 34print MBStringChars(mb_strcut($euc_jp, 0, 100, 'EUC-JP'), 'EUC-JP') . "\n"; 35 36$str = mb_strcut($euc_jp, 100, 10, 'EUC-JP'); 37($str === "") ? print "OK\n" : print "No good\n"; 38 39$str = mb_strcut($euc_jp, -100, 10, 'EUC-JP'); 40($str !== "") ? print "OK\n" : print "No good\n"; 41 42print "== UTF-8 ==\n"; 43print MBStringChars(mb_strcut($utf8, 0, 0, 'UTF-8'), 'UTF-8') . "\n"; 44print MBStringChars(mb_strcut($utf8, 0, 1, 'UTF-8'), 'UTF-8') . "\n"; 45print MBStringChars(mb_strcut($utf8, 0, 2, 'UTF-8'), 'UTF-8') . "\n"; 46print MBStringChars(mb_strcut($utf8, 0, 3, 'UTF-8'), 'UTF-8') . "\n"; 47print MBStringChars(mb_strcut($utf8, 0, 4, 'UTF-8'), 'UTF-8') . "\n"; 48print MBStringChars(mb_strcut($utf8, 0, 5, 'UTF-8'), 'UTF-8') . "\n"; 49print MBStringChars(mb_strcut($utf8, 1, 2, 'UTF-8'), 'UTF-8') . "\n"; 50print MBStringChars(mb_strcut($utf8, 1, 3, 'UTF-8'), 'UTF-8') . "\n"; 51print MBStringChars(mb_strcut($utf8, 1, 4, 'UTF-8'), 'UTF-8') . "\n"; 52 53print MBStringChars(mb_strcut('AåBäCöDü', 2, 100, 'UTF-8'), 'UTF-8') . "\n"; 54 55print "== UTF-16 ==\n"; 56print "Single byte: [" . bin2hex(mb_strcut("\xFF", 0, 100, 'UTF-16')) . "]\n"; 57print "With from=1: [" . bin2hex(mb_strcut("\xff\x01", 1, 100, "UTF-16")) . "]\n"; 58print "Bad surrogate: [" . bin2hex(mb_strcut("\xD9\xFF", 0, 100, "UTF-16")) . "]\n"; 59print "Bad surrogate followed by other bytes: [" . bin2hex(mb_strcut("\xd9\x00\x12C", 0, 100, "UTF-16")) . "]\n"; 60print "BE byte order mark: [" . bin2hex(mb_strcut("\xFE\xFF", 0, 100, "UTF-16")) . "]\n"; 61print "LE byte order mark: [" . bin2hex(mb_strcut("\xFF\xFE", 0, 100, "UTF-16")) . "]\n"; 62print "Length=0: [" . bin2hex(mb_strcut("\x00\x01\x00\x00", 1, -512, "UTF-16")) . "]\n"; 63 64print "== UTF-16LE ==\n"; 65print MBStringChars(mb_strcut($utf16le, 0, 0, 'UTF-16LE'), 'UTF-16LE') . "\n"; 66print MBStringChars(mb_strcut($utf16le, 0, 1, 'UTF-16LE'), 'UTF-16LE') . "\n"; 67print MBStringChars(mb_strcut($utf16le, 0, 2, 'UTF-16LE'), 'UTF-16LE') . "\n"; 68print MBStringChars(mb_strcut($utf16le, 0, 3, 'UTF-16LE'), 'UTF-16LE') . "\n"; 69print MBStringChars(mb_strcut($utf16le, 1, 2, 'UTF-16LE'), 'UTF-16LE') . "\n"; 70print MBStringChars(mb_strcut($utf16le, 1, 3, 'UTF-16LE'), 'UTF-16LE') . "\n"; 71print MBStringChars(mb_strcut($utf16le, 1, 4, 'UTF-16LE'), 'UTF-16LE') . "\n"; 72 73print "Single byte: [" . bin2hex(mb_strcut("\xFF", 0, 100, 'UTF-16LE')) . "]\n"; 74 75print "== UTF-32BE ==\n"; 76print MBStringChars(mb_strcut($utf32be, 0, 3, 'UTF-32BE'), 'UTF-32BE') . "\n"; 77print MBStringChars(mb_strcut($utf32be, 0, 4, 'UTF-32BE'), 'UTF-32BE') . "\n"; 78print MBStringChars(mb_strcut($utf32be, 0, 5, 'UTF-32BE'), 'UTF-32BE') . "\n"; 79print MBStringChars(mb_strcut($utf32be, 1, 8, 'UTF-32BE'), 'UTF-32BE') . "\n"; 80print MBStringChars(mb_strcut($utf32be, 3, 9, 'UTF-32BE'), 'UTF-32BE') . "\n"; 81 82print "== ISO-2022-JP ==\n"; 83print MBStringChars(mb_strcut($iso2022jp, 0, 3, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n"; 84print MBStringChars(mb_strcut($iso2022jp, 0, 4, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n"; 85print MBStringChars(mb_strcut($iso2022jp, 0, 5, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n"; 86print MBStringChars(mb_strcut($iso2022jp, 0, 6, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n"; 87print MBStringChars(mb_strcut($iso2022jp, 0, 7, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n"; 88print MBStringChars(mb_strcut($iso2022jp, 0, 8, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n"; 89 90print MBStringChars(mb_strcut($iso2022jp, 1, 3, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n"; 91print MBStringChars(mb_strcut($iso2022jp, 1, 6, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n"; 92print MBStringChars(mb_strcut($iso2022jp, 1, 8, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n"; 93 94print MBStringChars(mb_strcut($iso2022jp, 2, 5, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n"; 95print MBStringChars(mb_strcut($iso2022jp, 5, 9, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n"; 96print MBStringChars(mb_strcut($iso2022jp, 5, 11, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n"; 97print MBStringChars(mb_strcut($iso2022jp, 6, 13, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n"; 98print MBStringChars(mb_strcut($iso2022jp, 7, 13, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n"; 99 100print MBStringChars(mb_strcut($iso2022jp, 1, 100, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n"; 101print MBStringChars(mb_strcut($iso2022jp, 50, 100, 'ISO-2022-JP'), 'ISO-2022-JP') . "\n"; 102 103print "Error followed by ASCII char: [" . bin2hex(mb_strcut("\xdaK", 0, 100, "ISO-2022-JP")) . "]\n"; 104 105print "== ISO-2022-JP-2004 ==\n"; 106print MBStringChars(mb_strcut($iso2022jp2004, 0, 3, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n"; 107print MBStringChars(mb_strcut($iso2022jp2004, 0, 4, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n"; 108print MBStringChars(mb_strcut($iso2022jp2004, 0, 5, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n"; 109print MBStringChars(mb_strcut($iso2022jp2004, 0, 6, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n"; 110print MBStringChars(mb_strcut($iso2022jp2004, 0, 7, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n"; 111print MBStringChars(mb_strcut($iso2022jp2004, 0, 8, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n"; 112print MBStringChars(mb_strcut($iso2022jp2004, 0, 9, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n"; 113 114print MBStringChars(mb_strcut($iso2022jp2004, 1, 3, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n"; 115print MBStringChars(mb_strcut($iso2022jp2004, 1, 6, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n"; 116print MBStringChars(mb_strcut($iso2022jp2004, 1, 8, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n"; 117print MBStringChars(mb_strcut($iso2022jp2004, 1, 9, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n"; 118 119print MBStringChars(mb_strcut($iso2022jp2004, 2, 5, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n"; 120print MBStringChars(mb_strcut($iso2022jp2004, 5, 9, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n"; 121print MBStringChars(mb_strcut($iso2022jp2004, 5, 11, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n"; 122print MBStringChars(mb_strcut($iso2022jp2004, 6, 13, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n"; 123print MBStringChars(mb_strcut($iso2022jp2004, 7, 13, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n"; 124 125print MBStringChars(mb_strcut($iso2022jp2004, 1, 100, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n"; 126print MBStringChars(mb_strcut($iso2022jp2004, 50, 100, 'ISO-2022-JP-2004'), 'ISO-2022-JP-2004') . "\n"; 127 128print "== ISO-2022-JP-MS ==\n"; 129print MBStringChars(mb_strcut($iso2022jpms, 0, 3, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n"; 130print MBStringChars(mb_strcut($iso2022jpms, 0, 4, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n"; 131print MBStringChars(mb_strcut($iso2022jpms, 0, 5, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n"; 132print MBStringChars(mb_strcut($iso2022jpms, 0, 6, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n"; 133print MBStringChars(mb_strcut($iso2022jpms, 0, 7, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n"; 134print MBStringChars(mb_strcut($iso2022jpms, 0, 8, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n"; 135print MBStringChars(mb_strcut($iso2022jpms, 0, 9, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n"; 136 137print MBStringChars(mb_strcut($iso2022jpms, 1, 3, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n"; 138print MBStringChars(mb_strcut($iso2022jpms, 1, 6, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n"; 139print MBStringChars(mb_strcut($iso2022jpms, 1, 8, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n"; 140print MBStringChars(mb_strcut($iso2022jpms, 1, 9, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n"; 141 142print MBStringChars(mb_strcut($iso2022jpms, 2, 5, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n"; 143print MBStringChars(mb_strcut($iso2022jpms, 5, 9, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n"; 144print MBStringChars(mb_strcut($iso2022jpms, 5, 11, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n"; 145print MBStringChars(mb_strcut($iso2022jpms, 6, 13, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n"; 146print MBStringChars(mb_strcut($iso2022jpms, 7, 13, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n"; 147 148print MBStringChars(mb_strcut($iso2022jpms, 1, 100, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n"; 149print MBStringChars(mb_strcut($iso2022jpms, 50, 100, 'ISO-2022-JP-MS'), 'ISO-2022-JP-MS') . "\n"; 150 151print "== JIS ==\n"; 152print MBStringChars(mb_strcut($jis, 0, 3, 'JIS'), 'JIS') . "\n"; 153print MBStringChars(mb_strcut($jis, 0, 4, 'JIS'), 'JIS') . "\n"; 154print MBStringChars(mb_strcut($jis, 0, 5, 'JIS'), 'JIS') . "\n"; 155print MBStringChars(mb_strcut($jis, 0, 6, 'JIS'), 'JIS') . "\n"; 156print MBStringChars(mb_strcut($jis, 0, 7, 'JIS'), 'JIS') . "\n"; 157print MBStringChars(mb_strcut($jis, 0, 8, 'JIS'), 'JIS') . "\n"; 158 159print MBStringChars(mb_strcut($jis, 1, 3, 'JIS'), 'JIS') . "\n"; 160print MBStringChars(mb_strcut($jis, 1, 6, 'JIS'), 'JIS') . "\n"; 161print MBStringChars(mb_strcut($jis, 1, 8, 'JIS'), 'JIS') . "\n"; 162 163print MBStringChars(mb_strcut($jis, 2, 5, 'JIS'), 'JIS') . "\n"; 164print MBStringChars(mb_strcut($jis, 5, 9, 'JIS'), 'JIS') . "\n"; 165print MBStringChars(mb_strcut($jis, 5, 11, 'JIS'), 'JIS') . "\n"; 166print MBStringChars(mb_strcut($jis, 6, 13, 'JIS'), 'JIS') . "\n"; 167print MBStringChars(mb_strcut($jis, 7, 13, 'JIS'), 'JIS') . "\n"; 168 169print MBStringChars(mb_strcut($jis, 1, 100, 'JIS'), 'JIS') . "\n"; 170print MBStringChars(mb_strcut($jis, 50, 100, 'JIS'), 'JIS') . "\n"; 171 172print "0xA3: [" . bin2hex(mb_strcut("\xA3aaaaaa", 0, 100, 'JIS')) . "]\n"; 173print "Bad escape sequence followed by null byte: [" . bin2hex(mb_strcut("\x1b\x00", 1, 100, "JIS")) . "]\n"; 174 175print "== ISO-2022-JP-KDDI ==\n"; 176print MBStringChars(mb_strcut($iso2022jp_kddi, 0, 3, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n"; 177print MBStringChars(mb_strcut($iso2022jp_kddi, 0, 4, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n"; 178print MBStringChars(mb_strcut($iso2022jp_kddi, 0, 5, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n"; 179print MBStringChars(mb_strcut($iso2022jp_kddi, 0, 6, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n"; 180print MBStringChars(mb_strcut($iso2022jp_kddi, 0, 7, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n"; 181print MBStringChars(mb_strcut($iso2022jp_kddi, 0, 8, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n"; 182 183print MBStringChars(mb_strcut($iso2022jp_kddi, 1, 3, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n"; 184print MBStringChars(mb_strcut($iso2022jp_kddi, 1, 6, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n"; 185print MBStringChars(mb_strcut($iso2022jp_kddi, 1, 8, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n"; 186 187print MBStringChars(mb_strcut($iso2022jp_kddi, 2, 5, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n"; 188print MBStringChars(mb_strcut($iso2022jp_kddi, 5, 9, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n"; 189print MBStringChars(mb_strcut($iso2022jp_kddi, 5, 11, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n"; 190print MBStringChars(mb_strcut($iso2022jp_kddi, 6, 13, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n"; 191print MBStringChars(mb_strcut($iso2022jp_kddi, 7, 13, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n"; 192 193print MBStringChars(mb_strcut($iso2022jp_kddi, 1, 100, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n"; 194print MBStringChars(mb_strcut($iso2022jp_kddi, 50, 100, 'ISO-2022-JP-KDDI'), 'ISO-2022-JP-KDDI') . "\n"; 195 196print "== CP50220 ==\n"; 197 198print "Single byte 0xFF: [" . bin2hex(mb_strcut("\xFF", 0, 100, 'CP50220')) . "]\n"; 199print "Double byte 0xFF: [" . bin2hex(mb_strcut("\xFF\xFF", 0, 100, 'CP50220')) . "]\n"; 200print "Sample string with multiple null bytes: [" . bin2hex(mb_strcut("\xCF\x00\x00\x00\x00\x00d\x00\x00", 0, 100, 'CP50220')) . "]\n"; 201print "Bad escape sequence preceded by bad bytes: [" . bin2hex(mb_strcut("\xFF\xFF\x1B\x00", 0, 100, 'CP50220')) . "]\n"; 202print "Good JISX 0208 sequence, but it won't fit in max number of bytes: [" . bin2hex(mb_strcut("\x1B\$BGV\x17", 0, 100, 'CP50220')) . "]\n"; 203print "Bad escape sequence followed by GR kana: [" . bin2hex(mb_strcut("\x1B\$\xAC\x13", 0, 100, 'CP50220')) . "]\n"; 204 205print "== UTF-7 ==\n"; 206 207print "Single byte 0x01: [" . mb_strcut("\x01", 0, 100, 'UTF-7') . "]\n"; 208print "UTF-16 section ends abruptly: [" . mb_strcut("+Q", 1, 100, 'UTF-7') . "]\n"; 209print "UTF-16 section ends abruptly in middle of 2nd codepoint: [" . mb_strcut("+QxxC", 0, 100, 'UTF-7') . "]\n"; 210print "Cutting in middle of UTF-16 section: [" . mb_strcut("+UUU", -1, 255, "UTF-7") . "]\n"; 211print "Cutting in middle of UTF-16 section (2): [" . mb_strcut("+UUUU", -2, 255, "UTF-7") . "]\n"; 212 213print "== UTF7-IMAP ==\n"; 214 215print "Single byte 0x01: [" . mb_strcut("\x01", 0, 100, 'UTF7-IMAP') . "]\n"; 216print "UTF-16 section ends abruptly: [" . mb_strcut("&Q", 1, 100, 'UTF7-IMAP') . "]\n"; 217print "UTF-16 section ends abruptly in middle of 2nd codepoint: [" . mb_strcut("&QxxC", 0, 100, 'UTF7-IMAP') . "]\n"; 218print "UTF-16 section is terminated improperly: [" . mb_strcut("&i6o\x83", 0, 100, 'UTF7-IMAP') . "]\n"; 219 220print "== GB18030 ==\n"; 221 222print "Empty string: [" . bin2hex(mb_strcut("", 0, 5, 'GB18030')) . "]\n"; 223print "Empty string 2: [" . bin2hex(mb_strcut("", -2, 1, 'GB18030')) . "]\n"; 224print "Empty string 3: [" . bin2hex(mb_strcut("", 0, -1, 'GB18030')) . "]\n"; 225print "Invalid byte 0xF5: [" . bin2hex(mb_strcut("\xF5a", 1, 100, 'GB18030')) . "]\n"; 226print "Double-byte char: [" . bin2hex(mb_strcut("\xAFw", -1, 100, "GB18030")) . "]\n"; 227 228print MBStringChars(mb_strcut($gb18030, 0, 0, 'GB18030'), 'GB18030') . "\n"; 229print MBStringChars(mb_strcut($gb18030, 0, 1, 'GB18030'), 'GB18030') . "\n"; 230print MBStringChars(mb_strcut($gb18030, 0, 2, 'GB18030'), 'GB18030') . "\n"; 231print MBStringChars(mb_strcut($gb18030, 0, 3, 'GB18030'), 'GB18030') . "\n"; 232print MBStringChars(mb_strcut($gb18030, 0, 4, 'GB18030'), 'GB18030') . "\n"; 233print MBStringChars(mb_strcut($gb18030, 0, 5, 'GB18030'), 'GB18030') . "\n"; 234print MBStringChars(mb_strcut($gb18030, 1, 2, 'GB18030'), 'GB18030') . "\n"; 235print MBStringChars(mb_strcut($gb18030, 1, 3, 'GB18030'), 'GB18030') . "\n"; 236print MBStringChars(mb_strcut($gb18030, 1, 4, 'GB18030'), 'GB18030') . "\n"; 237 238// U+210A is encoded using 4 bytes in GB18030 239print "Operating on 4-byte GB18030 character:\n"; 240$fourbyte = mb_convert_encoding("\x21\x0A", 'GB18030', 'UTF-16BE'); 241print MBStringChars(mb_strcut($fourbyte, 0, 4, 'GB18030'), 'GB18030') . "\n"; 242print MBStringChars(mb_strcut($fourbyte, 1, 4, 'GB18030'), 'GB18030') . "\n"; 243print MBStringChars(mb_strcut($fourbyte, 2, 4, 'GB18030'), 'GB18030') . "\n"; 244print MBStringChars(mb_strcut($fourbyte, 3, 4, 'GB18030'), 'GB18030') . "\n"; 245print MBStringChars(mb_strcut($fourbyte, 4, 4, 'GB18030'), 'GB18030') . "\n"; 246print MBStringChars(mb_strcut($fourbyte, 1, 3, 'GB18030'), 'GB18030') . "\n"; 247print MBStringChars(mb_strcut($fourbyte, 2, 3, 'GB18030'), 'GB18030') . "\n"; 248print MBStringChars(mb_strcut($fourbyte, 2, 4, 'GB18030'), 'GB18030') . "\n"; 249print MBStringChars(mb_strcut($fourbyte, 0, -1, 'GB18030'), 'GB18030') . "\n"; 250 251print "[" . bin2hex(mb_strcut(hex2bin("84308130"), 2, null, "GB18030")) . "]\n"; 252 253print "== UHC ==\n"; 254 255print "Single byte 0x96: [" . bin2hex(mb_strcut("\x96", 1, 1280, "UHC")) . "]\n"; 256 257print "== ASCII ==\n"; 258 259print "Empty: [" . bin2hex(mb_strcut("ABC", 0, 0, "ASCII")) . "]\n"; 260print "Empty: [" . bin2hex(mb_strcut("ABC", 1, 0, "ASCII")) . "]\n"; 261print "Empty: [" . bin2hex(mb_strcut("ABC", 2, 0, "ASCII")) . "]\n"; 262 263print "One char: [" . bin2hex(mb_strcut("ABC", 2, 1, "ASCII")) . "]\n"; 264print "Two chars: [" . bin2hex(mb_strcut("ABC", 1, 2, "ASCII")) . "]\n"; 265print "Two chars: [" . bin2hex(mb_strcut("ABC", 1, 3, "ASCII")) . "]\n"; 266 267print "== UCS-2BE ==\n"; 268 269print "Empty: [" . bin2hex(mb_strcut("\x00A\x00B\x00C", 0, 0, "UCS-2BE")) . "]\n"; 270print "Empty: [" . bin2hex(mb_strcut("\x00A\x00B\x00C", 1, 0, "UCS-2BE")) . "]\n"; 271print "Empty: [" . bin2hex(mb_strcut("\x00A\x00B\x00C", 2, 0, "UCS-2BE")) . "]\n"; 272 273print "Empty: [" . bin2hex(mb_strcut("\x00A\x00B\x00C", 2, 1, "UCS-2BE")) . "]\n"; 274print "One char: [" . bin2hex(mb_strcut("\x00A\x00B\x00C", 1, 2, "UCS-2BE")) . "]\n"; 275print "Cut in middle of following char: [" . bin2hex(mb_strcut("\x00A\x00B\x00C", 1, 3, "UCS-2BE")) . "]\n"; 276print "Two chars: [" . bin2hex(mb_strcut("\x00A\x00B\x00C", 1, 4, "UCS-2BE")) . "]\n"; 277 278print "== UCS-4BE ==\n"; 279 280print "From 1, Length 5: [" . bin2hex(mb_strcut("\x00\x00\x00\x41\x00\x00\x00\x42", 1, 5, "UCS-4BE")) . "]\n"; 281print "From 1, Length 6: [" . bin2hex(mb_strcut("\x00\x00\x00\x41\x00\x00\x00\x42", 1, 6, "UCS-4BE")) . "]\n"; 282print "From 1, Length 8: [" . bin2hex(mb_strcut("\x00\x00\x00\x41\x00\x00\x00\x42", 1, 8, "UCS-4BE")) . "]\n"; 283 284?> 285--EXPECT-- 286== EUC-JP == 287[a4ce cab8] 288[a4b3 a4ce] 289[30 31 32 33 a4b3 a4ce cab8 bbfa cef3 a4cf c6fc cbdc b8ec a4c7 a4b9 a1a3 45 55 43 2d 4a 50 a4f2 bbc8 a4c3 a4c6 a4a4 a4de a4b9 a1a3 c6fc cbdc b8ec a4cf cccc c5dd bdad a4a4 a1a3] 290OK 291OK 292== UTF-8 == 293[] 294[] 295[] 296[e288ae] 297[e288ae 20] 298[e288ae 20 45] 299[] 300[e288ae] 301[e288ae 20] 302[c3a5 42 c3a4 43 c3b6 44 c3bc] 303== UTF-16 == 304Single byte: [] 305With from=1: [] 306Bad surrogate: [] 307Bad surrogate followed by other bytes: [d9001243] 308BE byte order mark: [] 309LE byte order mark: [] 310Length=0: [] 311== UTF-16LE == 312[] 313[] 314[1a04] 315[1a04] 316[1a04] 317[1a04] 318[1a04 3804] 319Single byte: [] 320== UTF-32BE == 321[] 322[0000222e] 323[0000222e] 324[0000222e 00000020] 325[0000222e 00000020] 326== ISO-2022-JP == 327[] 328[] 329[] 330[] 331[] 332[1b244234411b2842] 333[] 334[] 335[1b244234411b2842] 336[] 337[1b24423b7a1b2842 20] 338[1b24423b7a1b2842 20 61 62] 339[1b24423b7a1b2842 20 61 62 63 20] 340[20 61 62 63 20 1b2442252b1b2842] 341[1b244234411b2842 1b24423b7a1b2842 20 61 62 63 20 1b2442252b1b2842] 342[] 343Error followed by ASCII char: [4b] 344== ISO-2022-JP-2004 == 345[] 346[] 347[] 348[] 349[] 350[] 351[1b24285134411b2842] 352[] 353[] 354[] 355[1b24285134411b2842] 356[] 357[1b24285134411b2842] 358[1b24285134411b2842 1b2428513b7a1b2842] 359[1b2428513b7a1b2842 20 61 62 63] 360[1b2428513b7a1b2842 20 61 62 63] 361[1b24285134411b2842 1b2428513b7a1b2842 20 61 62 63 20 1b242851252b1b2842 1b242851254a1b2842] 362[] 363== ISO-2022-JP-MS == 364[] 365[] 366[] 367[] 368[] 369[1b244234411b2842] 370[1b244234411b2842] 371[] 372[] 373[1b244234411b2842] 374[1b244234411b2842] 375[] 376[1b24423b7a1b2842 20] 377[1b24423b7a1b2842 20 61 62] 378[1b24423b7a1b2842 20 61 62 63 20] 379[20 61 62 63 20 1b2442252b1b2842] 380[1b244234411b2842 1b24423b7a1b2842 20 61 62 63 20 1b2442252b1b2842] 381[] 382== JIS == 383[] 384[] 385[] 386[] 387[] 388[1b244234411b2842] 389[] 390[] 391[1b244234411b2842] 392[] 393[1b24423b7a1b2842 20] 394[1b24423b7a1b2842 20 61 62] 395[1b24423b7a1b2842 20 61 62 63 20] 396[20 61 62 63 20 1b2442252b1b2842] 397[1b244234411b2842 1b24423b7a1b2842 20 61 62 63 20 1b2442252b1b2842] 398[] 3990xA3: [] 400Bad escape sequence followed by null byte: [] 401== ISO-2022-JP-KDDI == 402[] 403[] 404[] 405[] 406[] 407[1b244234411b2842] 408[] 409[] 410[1b244234411b2842] 411[] 412[1b24423b7a1b2842 20] 413[1b24423b7a1b2842 20 61 62] 414[1b24423b7a1b2842 20 61 62 63 20] 415[20 61 62 63 20 1b2442252b1b2842] 416[1b244234411b2842 1b24423b7a1b2842 20 61 62 63 20 1b2442252b1b2842] 417[] 418== CP50220 == 419Single byte 0xFF: [] 420Double byte 0xFF: [3f] 421Sample string with multiple null bytes: [1b2442255e001b2842] 422Bad escape sequence preceded by bad bytes: [3f3f3f00] 423Good JISX 0208 sequence, but it won't fit in max number of bytes: [] 424Bad escape sequence followed by GR kana: [] 425== UTF-7 == 426Single byte 0x01: [] 427UTF-16 section ends abruptly: [] 428UTF-16 section ends abruptly in middle of 2nd codepoint: [+Qxw-] 429Cutting in middle of UTF-16 section: [] 430Cutting in middle of UTF-16 section (2): [] 431== UTF7-IMAP == 432Single byte 0x01: [?] 433UTF-16 section ends abruptly: [] 434UTF-16 section ends abruptly in middle of 2nd codepoint: [] 435UTF-16 section is terminated improperly: [] 436== GB18030 == 437Empty string: [] 438Empty string 2: [] 439Empty string 3: [] 440Invalid byte 0xF5: [] 441Double-byte char: [] 442[] 443[] 444[9d68] 445[9d68] 446[9d68 d7d6] 447[9d68 d7d6 20] 448[9d68] 449[9d68] 450[9d68 d7d6] 451Operating on 4-byte GB18030 character: 452[8136bc32] 453[] 454[] 455[] 456[] 457[] 458[] 459[] 460[] 461[] 462== UHC == 463Single byte 0x96: [96] 464== ASCII == 465Empty: [] 466Empty: [] 467Empty: [] 468One char: [43] 469Two chars: [4243] 470Two chars: [4243] 471== UCS-2BE == 472Empty: [] 473Empty: [] 474Empty: [] 475Empty: [] 476One char: [0041] 477Cut in middle of following char: [0041] 478Two chars: [00410042] 479== UCS-4BE == 480From 1, Length 5: [00000041] 481From 1, Length 6: [00000041] 482From 1, Length 8: [0000004100000042] 483