xref: /php-src/ext/mbstring/tests/gh9535.phpt (revision d3933e0b)
1--TEST--
2GH-9535 (mb_strcut(): The behavior of mb_strcut in mbstring has been changed in PHP8.1)
3--EXTENSIONS--
4mbstring
5--FILE--
6<?php
7$encodings = [
8    'UTF-16',
9    'UTF-16BE',
10    'UTF-16LE',
11    'UTF-7',
12    'UTF7-IMAP',
13    'ISO-2022-JP-MS',
14    'GB18030',
15    'HZ',
16    'ISO-2022-KR',
17    'ISO-2022-JP-MOBILE#KDDI',
18    'CP50220',
19    'CP50221',
20    'CP50222',
21];
22
23$input = '宛如繁星般宛如皎月般';
24$bytes_length = 15;
25foreach($encodings as $encoding) {
26    $converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
27    $cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
28    $reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
29    echo $encoding.': '.$reconverted_str.PHP_EOL;
30}
31
32echo PHP_EOL;
33
34$input = '星のように月のように';
35$bytes_length = 20;
36foreach($encodings as $encoding) {
37    $converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
38    $cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
39    $reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
40    echo $encoding.': '.$reconverted_str.PHP_EOL;
41}
42
43echo PHP_EOL;
44
45$input = 'あaいb';
46$bytes_length = 10;
47foreach($encodings as $encoding) {
48    $converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
49    $cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
50    $reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
51    echo $encoding.': '.$reconverted_str.PHP_EOL;
52}
53
54echo PHP_EOL;
55
56$input = 'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA';
57$bytes_length = 10;
58// For ISO-2022-KR, the initial escape sequence 'ESC $ ) C' will occupy 4 bytes of the output;
59// this will make mb_strcut only pick out 6 'A' characters from the input string and not 10
60foreach($encodings as $encoding) {
61    $converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
62    $cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
63    $reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
64    echo $encoding.': '.$reconverted_str.PHP_EOL;
65}
66
67echo PHP_EOL;
68
69$input = '???';
70$bytes_length = 2;
71// ISO-2022-KR will be affected by the initial escape sequence as stated above
72foreach($encodings as $encoding) {
73    $converted_str = mb_convert_encoding($input, $encoding, mb_internal_encoding());
74    $cut_str = mb_strcut($converted_str, 0, $bytes_length, $encoding);
75    $reconverted_str = mb_convert_encoding($cut_str, mb_internal_encoding(), $encoding);
76    echo $encoding.trim(': '.$reconverted_str).PHP_EOL;
77}
78
79echo PHP_EOL;
80
81foreach($encodings as $encoding) {
82    echo $encoding.trim(': '.mb_strcut($input, 0, $bytes_length, $encoding)).PHP_EOL;
83}
84
85?>
86--EXPECT--
87UTF-16: 宛如繁星般宛如
88UTF-16BE: 宛如繁星般宛如
89UTF-16LE: 宛如繁星般宛如
90UTF-7: 宛如繁星
91UTF7-IMAP: 宛如繁星
92ISO-2022-JP-MS: 宛如繁星
93GB18030: 宛如繁星般宛如
94HZ: 宛如繁星般
95ISO-2022-KR: 宛如繁星
96ISO-2022-JP-MOBILE#KDDI: 宛如繁星
97CP50220: 宛如繁星
98CP50221: 宛如繁星
99CP50222: 宛如繁星
100
101UTF-16: 星のように月のように
102UTF-16BE: 星のように月のように
103UTF-16LE: 星のように月のように
104UTF-7: 星のように月
105UTF7-IMAP: 星のように月
106ISO-2022-JP-MS: 星のように月の
107GB18030: 星のように月のように
108HZ: 星のように月のよ
109ISO-2022-KR: 星のように月の
110ISO-2022-JP-MOBILE#KDDI: 星のように月の
111CP50220: 星のように月の
112CP50221: 星のように月の
113CP50222: 星のように月の
114
115UTF-16: あaいb
116UTF-16BE: あaいb
117UTF-16LE: あaいb
118UTF-7: あa
119UTF7-IMAP: あa
120ISO-2022-JP-MS: あa
121GB18030: あaいb
122HZ: あa
123ISO-2022-KR: あa
124ISO-2022-JP-MOBILE#KDDI: あa
125CP50220: あa
126CP50221: あa
127CP50222: あa
128
129UTF-16: AAAAA
130UTF-16BE: AAAAA
131UTF-16LE: AAAAA
132UTF-7: AAAAAAAAAA
133UTF7-IMAP: AAAAAAAAAA
134ISO-2022-JP-MS: AAAAAAAAAA
135GB18030: AAAAAAAAAA
136HZ: AAAAAAAAAA
137ISO-2022-KR: AAAAAA
138ISO-2022-JP-MOBILE#KDDI: AAAAAAAAAA
139CP50220: AAAAAAAAAA
140CP50221: AAAAAAAAAA
141CP50222: AAAAAAAAAA
142
143UTF-16: ?
144UTF-16BE: ?
145UTF-16LE: ?
146UTF-7: ??
147UTF7-IMAP: ??
148ISO-2022-JP-MS: ??
149GB18030: ??
150HZ: ??
151ISO-2022-KR:
152ISO-2022-JP-MOBILE#KDDI: ??
153CP50220: ??
154CP50221: ??
155CP50222: ??
156
157UTF-16: ??
158UTF-16BE: ??
159UTF-16LE: ??
160UTF-7: ??
161UTF7-IMAP: ??
162ISO-2022-JP-MS: ??
163GB18030: ??
164HZ: ??
165ISO-2022-KR:
166ISO-2022-JP-MOBILE#KDDI: ??
167CP50220: ??
168CP50221: ??
169CP50222: ??
170