xref: /PHP-8.2/ext/mbstring/tests/mb_substr.phpt (revision 1751f34c)
1--TEST--
2mb_substr()
3--EXTENSIONS--
4mbstring
5--FILE--
6<?php
7ini_set('include_path','.');
8include_once('common.inc');
9
10// EUC-JP
11$euc_jp = mb_convert_encoding('0123この文字列は日本語です。EUC-JPを使っています。日本語は面倒臭い。', 'EUC-JP', 'UTF-8');
12// SJIS
13$sjis = mb_convert_encoding('日本語テキストです。0123456789。', 'SJIS', 'UTF-8');
14// ISO-2022-JP
15$iso2022jp = "\x1B\$B\x21\x21!r\x1B(BABC";
16// GB-18030
17$gb18030 = mb_convert_encoding('密码用户名密码名称名称', 'GB18030', 'UTF-8');
18// HZ
19$hz = "The next sentence is in GB.~{<:Ky2;S{#,NpJ)l6HK!#~}Bye.";
20// UTF-8
21$utf8 = "Greek: Σὲ γνωρίζω ἀπὸ τὴν κόψη Russian: Зарегистрируйтесь";
22// UTF-32
23$utf32 = mb_convert_encoding($utf8, 'UTF-32', 'UTF-8');
24// UTF-7
25$utf7 = mb_convert_encoding($utf8, 'UTF-7', 'UTF-8');
26
27echo "EUC-JP:\n";
28print "1: ". bin2hex(mb_substr($euc_jp, 10,  10, 'EUC-JP')) . "\n";
29print "2: ". bin2hex(mb_substr($euc_jp,  0, 100, 'EUC-JP')) . "\n";
30
31$str = mb_substr($euc_jp, 100, 10, 'EUC-JP');
32print ($str === "") ? "3 OK\n" : "BAD: " . bin2hex($str) . "\n";
33
34$str = mb_substr($euc_jp, -100, 10, 'EUC-JP');
35print ($str !== "") ? "4 OK: " . bin2hex($str) . "\n" : "BAD: " . bin2hex($str) . "\n";
36
37echo "SJIS:\n";
38print "1: " . bin2hex(mb_substr($sjis, 0, 3, 'SJIS')) . "\n";
39print "2: " . bin2hex(mb_substr($sjis, -1, null, 'SJIS')) . "\n";
40print "3: " . bin2hex(mb_substr($sjis, -5, 3, 'SJIS')) . "\n";
41print "4: " . bin2hex(mb_substr($sjis, 1, null, 'SJIS')) . "\n";
42print "5:" . bin2hex(mb_substr($sjis, 10, 0, 'SJIS')) . "\n";
43echo "-- Testing illegal SJIS byte 0x80 --\n";
44print bin2hex(mb_substr("\x80abc\x80\xA1", 3, 2, 'SJIS')) . "\n";
45print bin2hex(mb_substr("\x80abc\x80\xA1", 0, 3, 'SJIS')) . "\n";
46
47echo "SJIS-2004:\n";
48print bin2hex(mb_substr("\x80abc\x80\xA1", 3, 2, 'SJIS-2004')) . "\n";
49print bin2hex(mb_substr("\x80abc\x80\xA1", 0, 3, 'SJIS-2004')) . "\n";
50
51echo "MacJapanese:\n";
52print bin2hex(mb_substr("\x80abc\x80\xA1", 3, 2, 'MacJapanese')) . "\n";
53print bin2hex(mb_substr("\x80abc\x80\xA1", 0, 3, 'MacJapanese')) . "\n";
54
55echo "SJIS-Mobile#DOCOMO:\n";
56print bin2hex(mb_substr("\x80abc\x80\xA1", 3, 2, 'SJIS-Mobile#DOCOMO')) . "\n";
57print bin2hex(mb_substr("\x80abc\x80\xA1", 0, 3, 'SJIS-Mobile#DOCOMO')) . "\n";
58
59echo "SJIS-Mobile#KDDI:\n";
60print bin2hex(mb_substr("\x80abc\x80\xA1", 3, 2, 'SJIS-Mobile#KDDI')) . "\n";
61print bin2hex(mb_substr("\x80abc\x80\xA1", 0, 3, 'SJIS-Mobile#KDDI')) . "\n";
62
63echo "SJIS-Mobile#SoftBank:\n";
64print bin2hex(mb_substr("\x80abc\x80\xA1", 3, 2, 'SJIS-Mobile#SoftBank')) . "\n";
65print bin2hex(mb_substr("\x80abc\x80\xA1", 0, 3, 'SJIS-Mobile#SoftBank')) . "\n";
66
67echo "ISO-2022-JP:\n";
68print "1: " . bin2hex(mb_substr($iso2022jp, 0, 3, 'ISO-2022-JP')) . "\n";
69print "2: " . bin2hex(mb_substr($iso2022jp, -1, null, 'ISO-2022-JP')) . "\n";
70print "3: " . bin2hex(mb_substr($iso2022jp, -6, 3, 'ISO-2022-JP')) . "\n";
71print "4: " . bin2hex(mb_substr($iso2022jp, -3, 2, 'ISO-2022-JP')) . "\n";
72print "5: " . bin2hex(mb_substr($iso2022jp, 1, null, 'ISO-2022-JP')) . "\n";
73print "6:" . bin2hex(mb_substr($iso2022jp, 10, 0, 'ISO-2022-JP')) . "\n";
74print "7:" . bin2hex(mb_substr($iso2022jp, 100, 10, 'ISO-2022-JP')) . "\n";
75
76echo "GB-18030:\n";
77print "1: " . bin2hex(mb_substr($gb18030, 0, 3, 'GB-18030')) . "\n";
78print "2: " . bin2hex(mb_substr($gb18030, -1, null, 'GB-18030')) . "\n";
79print "3: " . bin2hex(mb_substr($gb18030, -5, 3, 'GB-18030')) . "\n";
80print "4: " . bin2hex(mb_substr($gb18030, 1, null, 'GB-18030')) . "\n";
81print "5:" . bin2hex(mb_substr($gb18030, 10, 0, 'GB-18030')) . "\n";
82
83echo "HZ:\n";
84print "1: " . mb_substr($hz, 0, 3, 'HZ') . "\n";
85print "2: " . mb_substr($hz, -1, null, 'HZ') . "\n";
86print "3: " . mb_substr($hz, -5, 3, 'HZ') . "\n";
87print "4: " . mb_substr($hz, 1, null, 'HZ') . "\n";
88print "5:" . mb_substr($hz, 10, 0, 'HZ') . "\n";
89
90echo "UTF-8:\n";
91print "1: " . mb_substr($utf8, 0, 3, 'UTF-8') . "\n";
92print "2: " . mb_substr($utf8, -1, null, 'UTF-8') . "\n";
93print "3: " . mb_substr($utf8, -5, 3, 'UTF-8') . "\n";
94print "4: " . mb_substr($utf8, 1, null, 'UTF-8') . "\n";
95print "5:" . mb_substr($utf8, 10, 0, 'UTF-8') . "\n";
96
97echo "UTF-32:\n";
98print "1: " . mb_convert_encoding(mb_substr($utf32, 0, 3, 'UTF-32'), 'UTF-8', 'UTF-32') . "\n";
99print "2: " . mb_convert_encoding(mb_substr($utf32, -1, null, 'UTF-32'), 'UTF-8', 'UTF-32') . "\n";
100print "3: " . mb_convert_encoding(mb_substr($utf32, -5, 3, 'UTF-32'), 'UTF-8', 'UTF-32') . "\n";
101print "4: " . mb_convert_encoding(mb_substr($utf32, 1, null, 'UTF-32'), 'UTF-8', 'UTF-32') . "\n";
102print "5:" . mb_convert_encoding(mb_substr($utf32, 10, 0, 'UTF-32'), 'UTF-8', 'UTF-32') . "\n";
103
104echo "UTF-7:\n";
105print "1: " . mb_convert_encoding(mb_substr($utf7, 0, 3, 'UTF-7'), 'UTF-8', 'UTF-7') . "\n";
106print "2: " . mb_convert_encoding(mb_substr($utf7, -1, null, 'UTF-7'), 'UTF-8', 'UTF-7') . "\n";
107print "3: " . mb_convert_encoding(mb_substr($utf7, -5, 3, 'UTF-7'), 'UTF-8', 'UTF-7') . "\n";
108print "4: " . mb_convert_encoding(mb_substr($utf7, 1, null, 'UTF-7'), 'UTF-8', 'UTF-7') . "\n";
109print "5:" . mb_convert_encoding(mb_substr($utf7, 10, 0, 'UTF-7'), 'UTF-8', 'UTF-7') . "\n";
110
111?>
112--EXPECT--
113EUC-JP:
1141: c6fccbdcb8eca4c7a4b9a1a34555432d
1152: 30313233a4b3a4cecab8bbfacef3a4cfc6fccbdcb8eca4c7a4b9a1a34555432d4a50a4f2bbc8a4c3a4c6a4a4a4dea4b9a1a3c6fccbdcb8eca4cfccccc5ddbdada4a4a1a3
1163 OK
1174 OK: 30313233a4b3a4cecab8bbfacef3a4cf
118SJIS:
1191: 93fa967b8cea
1202: 8142
1213: 825582568257
1224: 967b8cea8365834c8358836782c582b781423031323334825482558256825782588142
1235:
124-- Testing illegal SJIS byte 0x80 --
1256380
126806162
127SJIS-2004:
1286380
129806162
130MacJapanese:
1316380
132806162
133SJIS-Mobile#DOCOMO:
1346380
135806162
136SJIS-Mobile#KDDI:
1376380
138806162
139SJIS-Mobile#SoftBank:
1406380
141806162
142ISO-2022-JP:
1431: 1b2442212121721b284241
1442: 43
1453: 1b2442212121721b284241
1464: 4142
1475: 1b244221721b2842414243
1486:
1497:
150GB-18030:
1511: c3dcc2ebd3c3
1522: b3c6
1533: c2ebc3fbb3c6
1544: c2ebd3c3bba7c3fbc3dcc2ebc3fbb3c6c3fbb3c6
1555:
156HZ:
1571: The
1582: .
1593: ~{!#~}By
1604: he next sentence is in GB.~{<:Ky2;S{#,NpJ)l6HK!#~}Bye.
1615:
162UTF-8:
1631: Gre
1642: ь
1653: йте
1664: reek: Σὲ γνωρίζω ἀπὸ τὴν κόψη Russian: Зарегистрируйтесь
1675:
168UTF-32:
1691: Gre
1702: ь
1713: йте
1724: reek: Σὲ γνωρίζω ἀπὸ τὴν κόψη Russian: Зарегистрируйтесь
1735:
174UTF-7:
1751: Gre
1762: ь
1773: йте
1784: reek: Σὲ γνωρίζω ἀπὸ τὴν κόψη Russian: Зарегистрируйтесь
1795:
180