xref: /php-src/ext/mbstring/tests/gh10648.phpt (revision b721d0f7)
1--TEST--
2GH-10648 (mb_check_encoding() returns true for incorrect but interpretable ISO-2022-JP byte sequences)
3--EXTENSIONS--
4mbstring
5--FILE--
6<?php
7
8$testcases = [
9    'ISO-2022-JP bytes' => '1b244224221b2842', // 'あ' in ISO-2022-JP
10    'ISO-2022-JP bytes without escape sequence' => '1b24422422', // 'ア' in JIS
11    'JIS X 0201 7bit kana with escape sequence' => '1b2849311b2842', // 'ア' in JIS
12    'JIS X 0201 7bit kana with SO/SI' => '0e310f', // 'ア' in JIS
13    'JIS X 0201 8bit kana' => 'b1', // 'ア' in JIS
14    'JIS X 0201 7bit kana with SO and ESC' => '0e311b2842', // 'ア' in JIS
15    'JIS X 0201 7bit kana with ESC and SI' => '1b2849310f', // 'ア' in JIS
16    'JIS X 0208 character' => '1b244242641b2842', // '鯛' in JIS and ISO-2022-JP, included in JIS X 0208
17    'JIS X 0212 character' => '1b2428446a591b2842', // '鮋' in JIS, included in JIS X 0212
18    'JIS X 0213 character' => '1b2428507d4c1b2842', // '��' in ISO-2022-JP-2004, included in JIS X 0213
19    'JIS C 6220-1969 ESC ( H' => '1b284a1b2848', // an escape sequence transitioning to ASCII
20    'SO/SI when not in ASCII mode' => '1b284a0e0f1b2842', // an escape sequence transitioning to ASCII
21];
22
23foreach ($testcases as $title => $case) {
24    echo $title . PHP_EOL;
25    echo 'JIS:' . PHP_EOL;
26    var_dump(mb_check_encoding(hex2bin($case), 'JIS'));
27    echo mb_convert_encoding(hex2bin($case), 'UTF-8', 'JIS'). PHP_EOL;
28    var_dump(mb_get_info('illegal_chars'));
29    echo 'ISO-2022-JP:' . PHP_EOL;
30    var_dump(mb_check_encoding(hex2bin($case), 'ISO-2022-JP'));
31    echo mb_convert_encoding(hex2bin($case), 'UTF-8', 'ISO-2022-JP'). PHP_EOL;
32    var_dump(mb_get_info('illegal_chars'));
33    echo PHP_EOL;
34}
35?>
36--EXPECT--
37ISO-2022-JP bytes
38JIS:
39bool(true)
4041int(0)
42ISO-2022-JP:
43bool(true)
4445int(0)
46
47ISO-2022-JP bytes without escape sequence
48JIS:
49bool(false)
5051int(0)
52ISO-2022-JP:
53bool(false)
5455int(0)
56
57JIS X 0201 7bit kana with escape sequence
58JIS:
59bool(true)
6061int(0)
62ISO-2022-JP:
63bool(false)
6465int(0)
66
67JIS X 0201 7bit kana with SO/SI
68JIS:
69bool(true)
7071int(0)
72ISO-2022-JP:
73bool(false)
7475int(0)
76
77JIS X 0201 8bit kana
78JIS:
79bool(true)
8081int(0)
82ISO-2022-JP:
83bool(false)
8485int(0)
86
87JIS X 0201 7bit kana with SO and ESC
88JIS:
89bool(false)
9091int(0)
92ISO-2022-JP:
93bool(false)
9495int(0)
96
97JIS X 0201 7bit kana with ESC and SI
98JIS:
99bool(false)
100101int(0)
102ISO-2022-JP:
103bool(false)
104105int(0)
106
107JIS X 0208 character
108JIS:
109bool(true)
110111int(0)
112ISO-2022-JP:
113bool(true)
114115int(0)
116
117JIS X 0212 character
118JIS:
119bool(true)
120121int(0)
122ISO-2022-JP:
123bool(false)
124125int(0)
126
127JIS X 0213 character
128JIS:
129bool(false)
130?$(P}L
131int(1)
132ISO-2022-JP:
133bool(false)
134?$(P}L
135int(2)
136
137JIS C 6220-1969 ESC ( H
138JIS:
139bool(true)
140
141int(2)
142ISO-2022-JP:
143bool(false)
144
145int(2)
146
147SO/SI when not in ASCII mode
148JIS:
149bool(false)
150
151int(2)
152ISO-2022-JP:
153bool(false)
154
155int(2)
156