1--TEST-- 2mb_substr() 3--EXTENSIONS-- 4mbstring 5--INI-- 6output_handler= 7--FILE-- 8<?php 9// TODO: Add more encodings 10ini_set('include_path','.'); 11include_once('common.inc'); 12 13// EUC-JP 14$euc_jp = mb_convert_encoding('0123この文字列は日本語です。EUC-JPを使っています。日本語は面倒臭い。', 'EUC-JP', 'UTF-8'); 15// SJIS 16$sjis = mb_convert_encoding('日本語テキストです。0123456789。', 'SJIS', 'UTF-8'); 17// ISO-2022-JP 18$iso2022jp = "\x1B\$B\x21\x21!r\x1B(BABC"; 19// GB-18030 20$gb18030 = mb_convert_encoding('密码用户名密码名称名称', 'GB18030', 'UTF-8'); 21// HZ 22$hz = "The next sentence is in GB.~{<:Ky2;S{#,NpJ)l6HK!#~}Bye."; 23// UTF-8 24$utf8 = "Greek: Σὲ γνωρίζω ἀπὸ τὴν κόψη Russian: Зарегистрируйтесь"; 25// UTF-32 26$utf32 = mb_convert_encoding($utf8, 'UTF-32', 'UTF-8'); 27// UTF-7 28$utf7 = mb_convert_encoding($utf8, 'UTF-7', 'UTF-8'); 29 30print "1: ". bin2hex(mb_substr($euc_jp, 10, 10,'EUC-JP')) . "\n"; 31print "2: ". bin2hex(mb_substr($euc_jp, 0, 100,'EUC-JP')) . "\n"; 32 33$str = mb_substr($euc_jp, 100, 10,'EUC-JP'); 34// Note: returns last character 35($str === "") ? print "3 OK\n" : print "NG: ".bin2hex($str)."\n"; 36 37$str = mb_substr($euc_jp, -100, 10, 'EUC-JP'); 38print ($str !== "") ? "4 OK: " . bin2hex($str) . "\n" : "BAD: " . bin2hex($str) . "\n"; 39 40echo "SJIS:\n"; 41print "1: " . bin2hex(mb_substr($sjis, 0, 3, 'SJIS')) . "\n"; 42print "2: " . bin2hex(mb_substr($sjis, -1, null, 'SJIS')) . "\n"; 43print "3: " . bin2hex(mb_substr($sjis, -5, 3, 'SJIS')) . "\n"; 44print "4: " . bin2hex(mb_substr($sjis, 1, null, 'SJIS')) . "\n"; 45print "5:" . bin2hex(mb_substr($sjis, 10, 0, 'SJIS')) . "\n"; 46echo "-- Testing illegal SJIS byte 0x80 --\n"; 47print bin2hex(mb_substr("\x80abc\x80\xA1", 3, 2, 'SJIS')) . "\n"; 48print bin2hex(mb_substr("\x80abc\x80\xA1", 0, 3, 'SJIS')) . "\n"; 49 50echo "SJIS-2004:\n"; 51print bin2hex(mb_substr("\x80abc\x80\xA1", 3, 2, 'SJIS-2004')) . "\n"; 52print bin2hex(mb_substr("\x80abc\x80\xA1", 0, 3, 'SJIS-2004')) . "\n"; 53 54echo "MacJapanese:\n"; 55print bin2hex(mb_substr("\x80abc\x80\xA1", 3, 2, 'MacJapanese')) . "\n"; 56print bin2hex(mb_substr("\x80abc\x80\xA1", 0, 3, 'MacJapanese')) . "\n"; 57 58echo "SJIS-Mobile#DOCOMO:\n"; 59print bin2hex(mb_substr("\x80abc\x80\xA1", 3, 2, 'SJIS-Mobile#DOCOMO')) . "\n"; 60print bin2hex(mb_substr("\x80abc\x80\xA1", 0, 3, 'SJIS-Mobile#DOCOMO')) . "\n"; 61 62echo "SJIS-Mobile#KDDI:\n"; 63print bin2hex(mb_substr("\x80abc\x80\xA1", 3, 2, 'SJIS-Mobile#KDDI')) . "\n"; 64print bin2hex(mb_substr("\x80abc\x80\xA1", 0, 3, 'SJIS-Mobile#KDDI')) . "\n"; 65 66echo "SJIS-Mobile#SoftBank:\n"; 67print bin2hex(mb_substr("\x80abc\x80\xA1", 3, 2, 'SJIS-Mobile#SoftBank')) . "\n"; 68print bin2hex(mb_substr("\x80abc\x80\xA1", 0, 3, 'SJIS-Mobile#SoftBank')) . "\n"; 69 70echo "ISO-2022-JP:\n"; 71print "1: " . bin2hex(mb_substr($iso2022jp, 0, 3, 'ISO-2022-JP')) . "\n"; 72print "2: " . bin2hex(mb_substr($iso2022jp, -1, null, 'ISO-2022-JP')) . "\n"; 73print "3: " . bin2hex(mb_substr($iso2022jp, -6, 3, 'ISO-2022-JP')) . "\n"; 74print "4: " . bin2hex(mb_substr($iso2022jp, -3, 2, 'ISO-2022-JP')) . "\n"; 75print "5: " . bin2hex(mb_substr($iso2022jp, 1, null, 'ISO-2022-JP')) . "\n"; 76print "6:" . bin2hex(mb_substr($iso2022jp, 10, 0, 'ISO-2022-JP')) . "\n"; 77print "7:" . bin2hex(mb_substr($iso2022jp, 100, 10, 'ISO-2022-JP')) . "\n"; 78 79echo "GB-18030:\n"; 80print "1: " . bin2hex(mb_substr($gb18030, 0, 3, 'GB-18030')) . "\n"; 81print "2: " . bin2hex(mb_substr($gb18030, -1, null, 'GB-18030')) . "\n"; 82print "3: " . bin2hex(mb_substr($gb18030, -5, 3, 'GB-18030')) . "\n"; 83print "4: " . bin2hex(mb_substr($gb18030, 1, null, 'GB-18030')) . "\n"; 84print "5:" . bin2hex(mb_substr($gb18030, 10, 0, 'GB-18030')) . "\n"; 85 86echo "HZ:\n"; 87print "1: " . mb_substr($hz, 0, 3, 'HZ') . "\n"; 88print "2: " . mb_substr($hz, -1, null, 'HZ') . "\n"; 89print "3: " . mb_substr($hz, -5, 3, 'HZ') . "\n"; 90print "4: " . mb_substr($hz, 1, null, 'HZ') . "\n"; 91print "5:" . mb_substr($hz, 10, 0, 'HZ') . "\n"; 92 93echo "UTF-8:\n"; 94print "1: " . mb_substr($utf8, 0, 3, 'UTF-8') . "\n"; 95print "2: " . mb_substr($utf8, -1, null, 'UTF-8') . "\n"; 96print "3: " . mb_substr($utf8, -5, 3, 'UTF-8') . "\n"; 97print "4: " . mb_substr($utf8, 1, null, 'UTF-8') . "\n"; 98print "5:" . mb_substr($utf8, 10, 0, 'UTF-8') . "\n"; 99 100echo "UTF-32:\n"; 101print "1: " . mb_convert_encoding(mb_substr($utf32, 0, 3, 'UTF-32'), 'UTF-8', 'UTF-32') . "\n"; 102print "2: " . mb_convert_encoding(mb_substr($utf32, -1, null, 'UTF-32'), 'UTF-8', 'UTF-32') . "\n"; 103print "3: " . mb_convert_encoding(mb_substr($utf32, -5, 3, 'UTF-32'), 'UTF-8', 'UTF-32') . "\n"; 104print "4: " . mb_convert_encoding(mb_substr($utf32, 1, null, 'UTF-32'), 'UTF-8', 'UTF-32') . "\n"; 105print "5:" . mb_convert_encoding(mb_substr($utf32, 10, 0, 'UTF-32'), 'UTF-8', 'UTF-32') . "\n"; 106 107echo "UTF-7:\n"; 108print "1: " . mb_convert_encoding(mb_substr($utf7, 0, 3, 'UTF-7'), 'UTF-8', 'UTF-7') . "\n"; 109print "2: " . mb_convert_encoding(mb_substr($utf7, -1, null, 'UTF-7'), 'UTF-8', 'UTF-7') . "\n"; 110print "3: " . mb_convert_encoding(mb_substr($utf7, -5, 3, 'UTF-7'), 'UTF-8', 'UTF-7') . "\n"; 111print "4: " . mb_convert_encoding(mb_substr($utf7, 1, null, 'UTF-7'), 'UTF-8', 'UTF-7') . "\n"; 112print "5:" . mb_convert_encoding(mb_substr($utf7, 10, 0, 'UTF-7'), 'UTF-8', 'UTF-7') . "\n"; 113 114?> 115--EXPECT-- 1161: c6fccbdcb8eca4c7a4b9a1a34555432d 1172: 30313233a4b3a4cecab8bbfacef3a4cfc6fccbdcb8eca4c7a4b9a1a34555432d4a50a4f2bbc8a4c3a4c6a4a4a4dea4b9a1a3c6fccbdcb8eca4cfccccc5ddbdada4a4a1a3 1183 OK 1194 OK: 30313233a4b3a4cecab8bbfacef3a4cf 120SJIS: 1211: 93fa967b8cea 1222: 8142 1233: 825582568257 1244: 967b8cea8365834c8358836782c582b781423031323334825482558256825782588142 1255: 126-- Testing illegal SJIS byte 0x80 -- 1276380 128806162 129SJIS-2004: 1306380 131806162 132MacJapanese: 1336380 134806162 135SJIS-Mobile#DOCOMO: 1366380 137806162 138SJIS-Mobile#KDDI: 1396380 140806162 141SJIS-Mobile#SoftBank: 1426380 143806162 144ISO-2022-JP: 1451: 1b2442212121721b284241 1462: 43 1473: 1b2442212121721b284241 1484: 4142 1495: 1b244221721b2842414243 1506: 1517: 152GB-18030: 1531: c3dcc2ebd3c3 1542: b3c6 1553: c2ebc3fbb3c6 1564: c2ebd3c3bba7c3fbc3dcc2ebc3fbb3c6c3fbb3c6 1575: 158HZ: 1591: The 1602: . 1613: ~{!#~}By 1624: he next sentence is in GB.~{<:Ky2;S{#,NpJ)l6HK!#~}Bye. 1635: 164UTF-8: 1651: Gre 1662: ь 1673: йте 1684: reek: Σὲ γνωρίζω ἀπὸ τὴν κόψη Russian: Зарегистрируйтесь 1695: 170UTF-32: 1711: Gre 1722: ь 1733: йте 1744: reek: Σὲ γνωρίζω ἀπὸ τὴν κόψη Russian: Зарегистрируйтесь 1755: 176UTF-7: 1771: Gre 1782: ь 1793: йте 1804: reek: Σὲ γνωρίζω ἀπὸ τὴν κόψη Russian: Зарегистрируйтесь 1815: 182