1--TEST--
2mb_strimwidth()
3--EXTENSIONS--
4mbstring
5--INI--
6output_handler=
7--FILE--
8<?php
9
10function MBStringChars($string, $encoding) {
11  $chars = mb_str_split($string, 1, $encoding);
12  return implode(' ', array_map(function($char) {
13    return join(unpack('H*', $char));
14  }, $chars));
15}
16
17function MBStringWidths($string, $encoding) {
18  $chars = mb_str_split($string, 1, $encoding);
19  return implode(' ', array_map(function($char) use(&$encoding) {
20    return mb_strwidth($char, $encoding);
21  }, $chars));
22}
23
24function testStrimwidthWithMarker($string, $trimmark, $start, $width, $encoding) {
25    $result = mb_strimwidth($string, $start, $width, $trimmark, $encoding);
26    print "start=$start width=$width result=[";
27    print MBStringChars($result, $encoding);
28    print "] length=" . mb_strlen($result, $encoding);
29    print " width=" . mb_strwidth($result, $encoding) . "\n";
30}
31
32function testStrimwidth($string, $start, $width, $encoding) {
33  testStrimwidthWithMarker($string, mb_convert_encoding('...', $encoding, 'ASCII'), $start, $width, $encoding);
34}
35
36echo "== UTF-16LE ==\n";
37
38// In UTF-8, this is: 123abc漢字かな
39$utf16le = pack('H*', '310032003300610062006300226f575b4b306a30');
40
41print "String length: " . mb_strlen($utf16le, 'UTF-16LE') . "\n";
42print "String width: " . mb_strwidth($utf16le, 'UTF-16LE') . "\n";
43echo 'Characters: [' . MBStringChars($utf16le, 'UTF-16LE') . "]\n";
44echo 'Character widths: [' . MBStringWidths($utf16le, 'UTF-16LE') . "]\n\n";
45
46// Just take the whole string with plenty of room to spare
47testStrimwidth($utf16le, 0, 100, 'UTF-16LE');
48
49// OK, now the string will just barely fit in the allowed width...
50testStrimwidth($utf16le, 0, 14, 'UTF-16LE');
51
52// Now the last hiragana won't quite fit
53testStrimwidth($utf16le, 0, 13, 'UTF-16LE');
54
55// Even a bit tighter
56testStrimwidth($utf16le, 0, 12, 'UTF-16LE');
57
58// What if we expect an output width of zero?
59// (It will still output the trim marker, pushing the width beyond the stipulated 'maximum')
60testStrimwidth($utf16le, 0, 0, 'UTF-16LE');
61
62// Or output width of one?
63// (Likewise, it will still output the trim marker)
64testStrimwidth($utf16le, 0, 1, 'UTF-16LE');
65
66// OK, let's count back 3 characters from the end of the string, then allow a width of 5
67// (Negative starting index)
68testStrimwidth($utf16le, -3, 5, 'UTF-16LE');
69
70// Let's also try a negative width
71// We'll count back 4 characters, then allow a width of ((4 * 2) - 2) = 6
72// Since the output will not reach the END of the string, the trim marker
73// will have to be added, and will consume a width of 3
74// We also suppress the deprecation for negative width as of PHP 8.3
75@testStrimwidth($utf16le, -4, -2, 'UTF-16LE');
76
77echo "\n== EUC-JP ==\n";
78
79// In UTF-8, this is: 0123この文字列は日本語です。EUC-JPを使っています。日本語は面倒臭い。
80$euc_jp = "0123\xa4\xb3\xa4\xce\xca\xb8\xbb\xfa\xce\xf3\xa4\xcf\xc6\xfc\xcb\xdc\xb8\xec\xa4\xc7\xa4\xb9\xa1\xa3EUC-JP\xa4\xf2\xbb\xc8\xa4\xc3\xa4\xc6\xa4\xa4\xa4\xde\xa4\xb9\xa1\xa3\xc6\xfc\xcb\xdc\xb8\xec\xa4\xcf\xcc\xcc\xc5\xdd\xbd\xad\xa4\xa4\xa1\xa3";
81
82print "String length: " . mb_strlen($euc_jp, 'EUC-JP') . "\n";
83print "String width: " . mb_strwidth($euc_jp, 'EUC-JP') . "\n";
84echo 'Characters: [' . MBStringChars($euc_jp, 'EUC-JP') . "]\n";
85echo 'Character widths: [' . MBStringWidths($euc_jp, 'EUC-JP') . "]\n\n";
86
87// Cut down to a width of 15, which is 10 characters in this case.
88// Trim marker will be added
89testStrimwidth($euc_jp, 0, 15, 'EUC-JP');
90
91// With max width of 100, trim marker will not be added
92testStrimwidth($euc_jp, 0, 100, 'EUC-JP');
93
94// Skip 15 characters into string
95testStrimwidth($euc_jp, 15, 100, 'EUC-JP');
96
97// Count 30 characters back from end of string, then limit to width of 5
98// Since width of trim marker is 3, this will only get a single char from string
99testStrimwidth($euc_jp, -30, 5, 'EUC-JP');
100
101// Count 9 characters from start of string. Since string is 39 characters
102// long, this will have the same result as the previous test
103testStrimwidth($euc_jp, 9, 5, 'EUC-JP');
104
105// Skip 15 characters, which leaves a total width of 42. Then trim string down
106// to 5 less than that, which is a width of 37.
107// We also suppress the deprecation for negative width as of PHP 8.3
108@testStrimwidth($euc_jp, 15, -5, 'EUC-JP');
109
110// Take the last 30 characters, which have a width of 54. Trim string down to
111// 25 less than that, which is 29.
112// We also suppress the deprecation for negative width as of PHP 8.3
113@testStrimwidth($euc_jp, -30, -25, 'EUC-JP');
114
115// Skip over 39 characters... but since string is only 39 characters long,
116// it takes us to the end of the string, and output is empty
117testStrimwidth($euc_jp, 39, 10, 'EUC-JP');
118
119// Take the last 10 characters, which have a width of 20. Trim string down to
120// 12 less than that, which is a width of 8.
121// We also suppress the deprecation for negative width as of PHP 8.3
122@testStrimwidth($euc_jp, -10, -12, 'EUC-JP');
123
124try {
125    // We also suppress the deprecation for negative width as of PHP 8.3
126    var_dump(@mb_strimwidth($euc_jp, 0, -100,'...','EUC-JP'));
127} catch (\ValueError $e) {
128    echo $e->getMessage() . \PHP_EOL;
129}
130try {
131    var_dump(mb_strimwidth($euc_jp, 100, 10,'...','EUC-JP'));
132} catch (\ValueError $e) {
133    echo $e->getMessage() . \PHP_EOL;
134}
135try {
136    var_dump(mb_strimwidth($euc_jp, -100, 10,'...','EUC-JP'));
137} catch (\ValueError $e) {
138    echo $e->getMessage() . \PHP_EOL;
139}
140try {
141    // We also suppress the deprecation for negative width as of PHP 8.3
142    var_dump(@mb_strimwidth($euc_jp, -10, -21,'...','EUC-JP'));
143} catch (\ValueError $e) {
144    echo $e->getMessage() . \PHP_EOL;
145}
146
147echo "\n== Other ==\n";
148
149$str = 'abcdefghijklmnop';
150for ($from = -5; $from <= 5; $from++) {
151    for ($width = -5; $width <= 5; $width++) {
152        if ($from < 0 && $width < 0 && $width < $from) {
153            // This case is illegal and will throw an exception
154            $pass = false;
155            try {
156                /* Shut up deprecation notice for now */
157                @mb_strimwidth($str, $from, $width, '...', 'ASCII');
158            } catch (\ValueError $e) {
159                $pass = true;
160            }
161            if (!$pass)
162                die("Expected exception to be thrown");
163            continue;
164        }
165
166        /* Shut up deprecation notice for now */
167        $result = @mb_strimwidth($str, $from, $width, '...', 'ASCII');
168
169        if ($from < 0 && $width < 0 && ($width - $from) <= 3) {
170            if ($result !== '...')
171                die("Output should have just been trim marker. string=" . $str . " result=" . $result . " from=" . $from . " width=" . $width);
172            continue;
173        }
174
175        if ($width < 0 || $width > 3) {
176            if (mb_substr($result, 0, 1, 'ASCII') !== mb_substr($str, $from, 1, 'ASCII'))
177                die("Output string did not start at the expected point! string=" . $str . " result=" . $result . " from=" . $from . " width=" . $width);
178        }
179
180        if ($width >= 3 && ($from >= 0 || $width <= abs($from))) {
181            if (mb_strwidth($result, 'ASCII') !== $width)
182                die("Width was different from requested. string=" . $str . " result=" . $result . " from=" . $from . " width=" . $width);
183        }
184
185        if ($width < 0) {
186            if (mb_substr($result, -3, 3, 'ASCII') !== '...')
187                die("Expected trim marker");
188            if (mb_substr($result, -4, 1, 'ASCII') !== mb_substr($str, $width-4, 1, 'ASCII'))
189                die("Output string did not end at the expected point. string=" . $str . " result=" . $result . " from=" . $from . " width=" . $width);
190        }
191    }
192}
193
194
195// Regression test found by fuzzer; old implementation would pass input string
196// through when requested width=0
197testStrimwidth("\x00\x00\x00\x00+\x00\x00\x00\x00k\x00'\x11Yz", 1, 0, 'greek');
198
199// Regression test; new implementation originally had a bug whereby it would
200// sometimes not skip over characters at beginning of string when requested to
201testStrimwidthWithMarker(str_repeat("a", 268), '', 12, 255, 'ASCII');
202
203// Try invalid string; invalid sequences will be converted to error markers
204testStrimwidth("\x80\x80\x80", 0, 10, 'UTF-8');
205
206// Try invalid marker
207// It will be directly concatenated onto truncated string without checking for validity
208testStrimwidthWithMarker("abcdefghijklmnop", "\x80\x80\x80", 0, 10, 'UTF-8');
209
210// Regression test; old implementation would pass input string through, even when
211// it was wider than requested width, if the trim marker string was wider than
212// the input string
213testStrimwidthWithMarker("abc", "abcdefghijklmnop", 0, 1, 'ASCII');
214
215// Regression test; old implementation did not handle negative 'from' argument
216// correctly when portion being skipped over included fullwidth characters
217testStrimwidth("日本語abc", -3, 10, 'UTF-8');
218
219// Regression test; old implementation did not handle positive 'from' argument
220// combined with negative 'width' argument correctly when portion being skipped
221// over included fullwidth characters
222// We also suppress the deprecation for negative width as of PHP 8.3
223@testStrimwidth("日本語abcdef", 3, -1, 'UTF-8');
224
225?>
226--EXPECT--
227== UTF-16LE ==
228String length: 10
229String width: 14
230Characters: [3100 3200 3300 6100 6200 6300 226f 575b 4b30 6a30]
231Character widths: [1 1 1 1 1 1 2 2 2 2]
232
233start=0 width=100 result=[3100 3200 3300 6100 6200 6300 226f 575b 4b30 6a30] length=10 width=14
234start=0 width=14 result=[3100 3200 3300 6100 6200 6300 226f 575b 4b30 6a30] length=10 width=14
235start=0 width=13 result=[3100 3200 3300 6100 6200 6300 226f 575b 2e00 2e00 2e00] length=11 width=13
236start=0 width=12 result=[3100 3200 3300 6100 6200 6300 226f 2e00 2e00 2e00] length=10 width=11
237start=0 width=0 result=[2e00 2e00 2e00] length=3 width=3
238start=0 width=1 result=[2e00 2e00 2e00] length=3 width=3
239start=-3 width=5 result=[575b 2e00 2e00 2e00] length=4 width=5
240start=-4 width=-2 result=[226f 2e00 2e00 2e00] length=4 width=5
241
242== EUC-JP ==
243String length: 39
244String width: 68
245Characters: [30 31 32 33 a4b3 a4ce cab8 bbfa cef3 a4cf c6fc cbdc b8ec a4c7 a4b9 a1a3 45 55 43 2d 4a 50 a4f2 bbc8 a4c3 a4c6 a4a4 a4de a4b9 a1a3 c6fc cbdc b8ec a4cf cccc c5dd bdad a4a4 a1a3]
246Character widths: [1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2]
247
248start=0 width=15 result=[30 31 32 33 a4b3 a4ce cab8 bbfa 2e 2e 2e] length=11 width=15
249start=0 width=100 result=[30 31 32 33 a4b3 a4ce cab8 bbfa cef3 a4cf c6fc cbdc b8ec a4c7 a4b9 a1a3 45 55 43 2d 4a 50 a4f2 bbc8 a4c3 a4c6 a4a4 a4de a4b9 a1a3 c6fc cbdc b8ec a4cf cccc c5dd bdad a4a4 a1a3] length=39 width=68
250start=15 width=100 result=[a1a3 45 55 43 2d 4a 50 a4f2 bbc8 a4c3 a4c6 a4a4 a4de a4b9 a1a3 c6fc cbdc b8ec a4cf cccc c5dd bdad a4a4 a1a3] length=24 width=42
251start=-30 width=5 result=[a4cf 2e 2e 2e] length=4 width=5
252start=9 width=5 result=[a4cf 2e 2e 2e] length=4 width=5
253start=15 width=-5 result=[a1a3 45 55 43 2d 4a 50 a4f2 bbc8 a4c3 a4c6 a4a4 a4de a4b9 a1a3 c6fc cbdc b8ec a4cf cccc 2e 2e 2e] length=23 width=37
254start=-30 width=-25 result=[a4cf c6fc cbdc b8ec a4c7 a4b9 a1a3 45 55 43 2d 4a 50 a4f2 bbc8 a4c3 2e 2e 2e] length=19 width=29
255start=39 width=10 result=[] length=0 width=0
256start=-10 width=-12 result=[a1a3 c6fc 2e 2e 2e] length=5 width=7
257mb_strimwidth(): Argument #3 ($width) is out of range
258mb_strimwidth(): Argument #2 ($start) is out of range
259mb_strimwidth(): Argument #2 ($start) is out of range
260mb_strimwidth(): Argument #3 ($width) is out of range
261
262== Other ==
263start=1 width=0 result=[2e 2e 2e] length=3 width=3
264start=12 width=255 result=[61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61] length=255 width=255
265start=0 width=10 result=[3f 3f 3f] length=3 width=3
266start=0 width=10 result=[61 62 63 64 65 66 67 80 80 80] length=10 width=10
267start=0 width=1 result=[61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f 70] length=16 width=16
268start=-3 width=10 result=[61 62 63] length=3 width=3
269start=3 width=-1 result=[61 62 2e 2e 2e] length=5 width=5
270