xref: /PHP-8.1/ext/mbstring/tests/mb_substr.phpt (revision d104481a)
1--TEST--
2mb_substr()
3--EXTENSIONS--
4mbstring
5--INI--
6output_handler=
7--FILE--
8<?php
9// TODO: Add more encodings
10ini_set('include_path','.');
11include_once('common.inc');
12
13// EUC-JP
14$euc_jp = mb_convert_encoding('0123この文字列は日本語です。EUC-JPを使っています。日本語は面倒臭い。', 'EUC-JP', 'UTF-8');
15// SJIS
16$sjis = mb_convert_encoding('日本語テキストです。0123456789。', 'SJIS', 'UTF-8');
17// ISO-2022-JP
18$iso2022jp = "\x1B\$B\x21\x21!r\x1B(BABC";
19// GB-18030
20$gb18030 = mb_convert_encoding('密码用户名密码名称名称', 'GB18030', 'UTF-8');
21// HZ
22$hz = "The next sentence is in GB.~{<:Ky2;S{#,NpJ)l6HK!#~}Bye.";
23// UTF-8
24$utf8 = "Greek: Σὲ γνωρίζω ἀπὸ τὴν κόψη Russian: Зарегистрируйтесь";
25// UTF-32
26$utf32 = mb_convert_encoding($utf8, 'UTF-32', 'UTF-8');
27// UTF-7
28$utf7 = mb_convert_encoding($utf8, 'UTF-7', 'UTF-8');
29
30print  "1: ". bin2hex(mb_substr($euc_jp,  10,  10,'EUC-JP')) . "\n";
31print  "2: ". bin2hex(mb_substr($euc_jp,   0, 100,'EUC-JP')) . "\n";
32
33$str = mb_substr($euc_jp, 100, 10,'EUC-JP');
34// Note: returns last character
35($str === "") ? print "3 OK\n" : print "NG: ".bin2hex($str)."\n";
36
37$str = mb_substr($euc_jp, -100, 10, 'EUC-JP');
38print ($str !== "") ? "4 OK: " . bin2hex($str) . "\n" : "BAD: " . bin2hex($str) . "\n";
39
40echo "SJIS:\n";
41print "1: " . bin2hex(mb_substr($sjis, 0, 3, 'SJIS')) . "\n";
42print "2: " . bin2hex(mb_substr($sjis, -1, null, 'SJIS')) . "\n";
43print "3: " . bin2hex(mb_substr($sjis, -5, 3, 'SJIS')) . "\n";
44print "4: " . bin2hex(mb_substr($sjis, 1, null, 'SJIS')) . "\n";
45print "5:" . bin2hex(mb_substr($sjis, 10, 0, 'SJIS')) . "\n";
46echo "-- Testing illegal SJIS byte 0x80 --\n";
47print bin2hex(mb_substr("\x80abc\x80\xA1", 3, 2, 'SJIS')) . "\n";
48print bin2hex(mb_substr("\x80abc\x80\xA1", 0, 3, 'SJIS')) . "\n";
49
50echo "SJIS-2004:\n";
51print bin2hex(mb_substr("\x80abc\x80\xA1", 3, 2, 'SJIS-2004')) . "\n";
52print bin2hex(mb_substr("\x80abc\x80\xA1", 0, 3, 'SJIS-2004')) . "\n";
53
54echo "MacJapanese:\n";
55print bin2hex(mb_substr("\x80abc\x80\xA1", 3, 2, 'MacJapanese')) . "\n";
56print bin2hex(mb_substr("\x80abc\x80\xA1", 0, 3, 'MacJapanese')) . "\n";
57
58echo "SJIS-Mobile#DOCOMO:\n";
59print bin2hex(mb_substr("\x80abc\x80\xA1", 3, 2, 'SJIS-Mobile#DOCOMO')) . "\n";
60print bin2hex(mb_substr("\x80abc\x80\xA1", 0, 3, 'SJIS-Mobile#DOCOMO')) . "\n";
61
62echo "SJIS-Mobile#KDDI:\n";
63print bin2hex(mb_substr("\x80abc\x80\xA1", 3, 2, 'SJIS-Mobile#KDDI')) . "\n";
64print bin2hex(mb_substr("\x80abc\x80\xA1", 0, 3, 'SJIS-Mobile#KDDI')) . "\n";
65
66echo "SJIS-Mobile#SoftBank:\n";
67print bin2hex(mb_substr("\x80abc\x80\xA1", 3, 2, 'SJIS-Mobile#SoftBank')) . "\n";
68print bin2hex(mb_substr("\x80abc\x80\xA1", 0, 3, 'SJIS-Mobile#SoftBank')) . "\n";
69
70echo "ISO-2022-JP:\n";
71print "1: " . bin2hex(mb_substr($iso2022jp, 0, 3, 'ISO-2022-JP')) . "\n";
72print "2: " . bin2hex(mb_substr($iso2022jp, -1, null, 'ISO-2022-JP')) . "\n";
73print "3: " . bin2hex(mb_substr($iso2022jp, -6, 3, 'ISO-2022-JP')) . "\n";
74print "4: " . bin2hex(mb_substr($iso2022jp, -3, 2, 'ISO-2022-JP')) . "\n";
75print "5: " . bin2hex(mb_substr($iso2022jp, 1, null, 'ISO-2022-JP')) . "\n";
76print "6:" . bin2hex(mb_substr($iso2022jp, 10, 0, 'ISO-2022-JP')) . "\n";
77print "7:" . bin2hex(mb_substr($iso2022jp, 100, 10, 'ISO-2022-JP')) . "\n";
78
79echo "GB-18030:\n";
80print "1: " . bin2hex(mb_substr($gb18030, 0, 3, 'GB-18030')) . "\n";
81print "2: " . bin2hex(mb_substr($gb18030, -1, null, 'GB-18030')) . "\n";
82print "3: " . bin2hex(mb_substr($gb18030, -5, 3, 'GB-18030')) . "\n";
83print "4: " . bin2hex(mb_substr($gb18030, 1, null, 'GB-18030')) . "\n";
84print "5:" . bin2hex(mb_substr($gb18030, 10, 0, 'GB-18030')) . "\n";
85
86echo "HZ:\n";
87print "1: " . mb_substr($hz, 0, 3, 'HZ') . "\n";
88print "2: " . mb_substr($hz, -1, null, 'HZ') . "\n";
89print "3: " . mb_substr($hz, -5, 3, 'HZ') . "\n";
90print "4: " . mb_substr($hz, 1, null, 'HZ') . "\n";
91print "5:" . mb_substr($hz, 10, 0, 'HZ') . "\n";
92
93echo "UTF-8:\n";
94print "1: " . mb_substr($utf8, 0, 3, 'UTF-8') . "\n";
95print "2: " . mb_substr($utf8, -1, null, 'UTF-8') . "\n";
96print "3: " . mb_substr($utf8, -5, 3, 'UTF-8') . "\n";
97print "4: " . mb_substr($utf8, 1, null, 'UTF-8') . "\n";
98print "5:" . mb_substr($utf8, 10, 0, 'UTF-8') . "\n";
99
100echo "UTF-32:\n";
101print "1: " . mb_convert_encoding(mb_substr($utf32, 0, 3, 'UTF-32'), 'UTF-8', 'UTF-32') . "\n";
102print "2: " . mb_convert_encoding(mb_substr($utf32, -1, null, 'UTF-32'), 'UTF-8', 'UTF-32') . "\n";
103print "3: " . mb_convert_encoding(mb_substr($utf32, -5, 3, 'UTF-32'), 'UTF-8', 'UTF-32') . "\n";
104print "4: " . mb_convert_encoding(mb_substr($utf32, 1, null, 'UTF-32'), 'UTF-8', 'UTF-32') . "\n";
105print "5:" . mb_convert_encoding(mb_substr($utf32, 10, 0, 'UTF-32'), 'UTF-8', 'UTF-32') . "\n";
106
107echo "UTF-7:\n";
108print "1: " . mb_convert_encoding(mb_substr($utf7, 0, 3, 'UTF-7'), 'UTF-8', 'UTF-7') . "\n";
109print "2: " . mb_convert_encoding(mb_substr($utf7, -1, null, 'UTF-7'), 'UTF-8', 'UTF-7') . "\n";
110print "3: " . mb_convert_encoding(mb_substr($utf7, -5, 3, 'UTF-7'), 'UTF-8', 'UTF-7') . "\n";
111print "4: " . mb_convert_encoding(mb_substr($utf7, 1, null, 'UTF-7'), 'UTF-8', 'UTF-7') . "\n";
112print "5:" . mb_convert_encoding(mb_substr($utf7, 10, 0, 'UTF-7'), 'UTF-8', 'UTF-7') . "\n";
113
114?>
115--EXPECT--
1161: c6fccbdcb8eca4c7a4b9a1a34555432d
1172: 30313233a4b3a4cecab8bbfacef3a4cfc6fccbdcb8eca4c7a4b9a1a34555432d4a50a4f2bbc8a4c3a4c6a4a4a4dea4b9a1a3c6fccbdcb8eca4cfccccc5ddbdada4a4a1a3
1183 OK
1194 OK: 30313233a4b3a4cecab8bbfacef3a4cf
120SJIS:
1211: 93fa967b8cea
1222: 8142
1233: 825582568257
1244: 967b8cea8365834c8358836782c582b781423031323334825482558256825782588142
1255:
126-- Testing illegal SJIS byte 0x80 --
1276380
128806162
129SJIS-2004:
1306380
131806162
132MacJapanese:
1336380
134806162
135SJIS-Mobile#DOCOMO:
1366380
137806162
138SJIS-Mobile#KDDI:
1396380
140806162
141SJIS-Mobile#SoftBank:
1426380
143806162
144ISO-2022-JP:
1451: 1b2442212121721b284241
1462: 43
1473: 1b2442212121721b284241
1484: 4142
1495: 1b244221721b2842414243
1506:
1517:
152GB-18030:
1531: c3dcc2ebd3c3
1542: b3c6
1553: c2ebc3fbb3c6
1564: c2ebd3c3bba7c3fbc3dcc2ebc3fbb3c6c3fbb3c6
1575:
158HZ:
1591: The
1602: .
1613: ~{!#~}By
1624: he next sentence is in GB.~{<:Ky2;S{#,NpJ)l6HK!#~}Bye.
1635:
164UTF-8:
1651: Gre
1662: ь
1673: йте
1684: reek: Σὲ γνωρίζω ἀπὸ τὴν κόψη Russian: Зарегистрируйтесь
1695:
170UTF-32:
1711: Gre
1722: ь
1733: йте
1744: reek: Σὲ γνωρίζω ἀπὸ τὴν κόψη Russian: Зарегистрируйтесь
1755:
176UTF-7:
1771: Gre
1782: ь
1793: йте
1804: reek: Σὲ γνωρίζω ἀπὸ τὴν κόψη Russian: Зарегистрируйтесь
1815:
182