1--TEST--
2mb_strimwidth()
3--EXTENSIONS--
4mbstring
5--INI--
6output_handler=
7--FILE--
8<?php
9
10function MBStringChars($string, $encoding) {
11  $chars = mb_str_split($string, 1, $encoding);
12  return implode(' ', array_map(function($char) {
13    return join(unpack('H*', $char));
14  }, $chars));
15}
16
17function MBStringWidths($string, $encoding) {
18  $chars = mb_str_split($string, 1, $encoding);
19  return implode(' ', array_map(function($char) use(&$encoding) {
20    return mb_strwidth($char, $encoding);
21  }, $chars));
22}
23
24function testStrimwidthWithMarker($string, $trimmark, $start, $width, $encoding) {
25    $result = mb_strimwidth($string, $start, $width, $trimmark, $encoding);
26    print "start=$start width=$width result=[";
27    print MBStringChars($result, $encoding);
28    print "] length=" . mb_strlen($result, $encoding);
29    print " width=" . mb_strwidth($result, $encoding) . "\n";
30}
31
32function testStrimwidth($string, $start, $width, $encoding) {
33  testStrimwidthWithMarker($string, mb_convert_encoding('...', $encoding, 'ASCII'), $start, $width, $encoding);
34}
35
36echo "== UTF-16LE ==\n";
37
38// In UTF-8, this is: 123abc漢字かな
39$utf16le = pack('H*', '310032003300610062006300226f575b4b306a30');
40
41print "String length: " . mb_strlen($utf16le, 'UTF-16LE') . "\n";
42print "String width: " . mb_strwidth($utf16le, 'UTF-16LE') . "\n";
43echo 'Characters: [' . MBStringChars($utf16le, 'UTF-16LE') . "]\n";
44echo 'Character widths: [' . MBStringWidths($utf16le, 'UTF-16LE') . "]\n\n";
45
46// Just take the whole string with plenty of room to spare
47testStrimwidth($utf16le, 0, 100, 'UTF-16LE');
48
49// OK, now the string will just barely fit in the allowed width...
50testStrimwidth($utf16le, 0, 14, 'UTF-16LE');
51
52// Now the last hiragana won't quite fit
53testStrimwidth($utf16le, 0, 13, 'UTF-16LE');
54
55// Even a bit tighter
56testStrimwidth($utf16le, 0, 12, 'UTF-16LE');
57
58// What if we expect an output width of zero?
59// (It will still output the trim marker, pushing the width beyond the stipulated 'maximum')
60testStrimwidth($utf16le, 0, 0, 'UTF-16LE');
61
62// Or output width of one?
63// (Likewise, it will still output the trim marker)
64testStrimwidth($utf16le, 0, 1, 'UTF-16LE');
65
66// OK, let's count back 3 characters from the end of the string, then allow a width of 5
67// (Negative starting index)
68testStrimwidth($utf16le, -3, 5, 'UTF-16LE');
69
70// Let's also try a negative width
71// We'll count back 4 characters, then allow a width of ((4 * 2) - 2) = 6
72// Since the output will not reach the END of the string, the trim marker
73// will have to be added, and will consume a width of 3
74testStrimwidth($utf16le, -4, -2, 'UTF-16LE');
75
76echo "\n== EUC-JP ==\n";
77
78// In UTF-8, this is: 0123この文字列は日本語です。EUC-JPを使っています。日本語は面倒臭い。
79$euc_jp = "0123\xa4\xb3\xa4\xce\xca\xb8\xbb\xfa\xce\xf3\xa4\xcf\xc6\xfc\xcb\xdc\xb8\xec\xa4\xc7\xa4\xb9\xa1\xa3EUC-JP\xa4\xf2\xbb\xc8\xa4\xc3\xa4\xc6\xa4\xa4\xa4\xde\xa4\xb9\xa1\xa3\xc6\xfc\xcb\xdc\xb8\xec\xa4\xcf\xcc\xcc\xc5\xdd\xbd\xad\xa4\xa4\xa1\xa3";
80
81print "String length: " . mb_strlen($euc_jp, 'EUC-JP') . "\n";
82print "String width: " . mb_strwidth($euc_jp, 'EUC-JP') . "\n";
83echo 'Characters: [' . MBStringChars($euc_jp, 'EUC-JP') . "]\n";
84echo 'Character widths: [' . MBStringWidths($euc_jp, 'EUC-JP') . "]\n\n";
85
86// Cut down to a width of 15, which is 10 characters in this case.
87// Trim marker will be added
88testStrimwidth($euc_jp, 0, 15, 'EUC-JP');
89
90// With max width of 100, trim marker will not be added
91testStrimwidth($euc_jp, 0, 100, 'EUC-JP');
92
93// Skip 15 characters into string
94testStrimwidth($euc_jp, 15, 100, 'EUC-JP');
95
96// Count 30 characters back from end of string, then limit to width of 5
97// Since width of trim marker is 3, this will only get a single char from string
98testStrimwidth($euc_jp, -30, 5, 'EUC-JP');
99
100// Count 9 characters from start of string. Since string is 39 characters
101// long, this will have the same result as the previous test
102testStrimwidth($euc_jp, 9, 5, 'EUC-JP');
103
104// Skip 15 characters, which leaves a total width of 42. Then trim string down
105// to 5 less than that, which is a width of 37.
106testStrimwidth($euc_jp, 15, -5, 'EUC-JP');
107
108// Take the last 30 characters, which have a width of 54. Trim string down to
109// 25 less than that, which is 29.
110testStrimwidth($euc_jp, -30, -25, 'EUC-JP');
111
112// Skip over 39 characters... but since string is only 39 characters long,
113// it takes us to the end of the string, and output is empty
114testStrimwidth($euc_jp, 39, 10, 'EUC-JP');
115
116// Take the last 10 characters, which have a width of 20. Trim string down to
117// 12 less than that, which is a width of 8.
118testStrimwidth($euc_jp, -10, -12, 'EUC-JP');
119
120try {
121    var_dump(mb_strimwidth($euc_jp, 0, -100,'...','EUC-JP'));
122} catch (\ValueError $e) {
123    echo $e->getMessage() . \PHP_EOL;
124}
125try {
126    var_dump(mb_strimwidth($euc_jp, 100, 10,'...','EUC-JP'));
127} catch (\ValueError $e) {
128    echo $e->getMessage() . \PHP_EOL;
129}
130try {
131    var_dump(mb_strimwidth($euc_jp, -100, 10,'...','EUC-JP'));
132} catch (\ValueError $e) {
133    echo $e->getMessage() . \PHP_EOL;
134}
135try {
136    var_dump(mb_strimwidth($euc_jp, -10, -21,'...','EUC-JP'));
137} catch (\ValueError $e) {
138    echo $e->getMessage() . \PHP_EOL;
139}
140
141echo "\n== Other ==\n";
142
143$str = 'abcdefghijklmnop';
144for ($from = -5; $from <= 5; $from++) {
145    for ($width = -5; $width <= 5; $width++) {
146        if ($from < 0 && $width < 0 && $width < $from) {
147            // This case is illegal and will throw an exception
148            $pass = false;
149            try {
150                mb_strimwidth($str, $from, $width, '...', 'ASCII');
151            } catch (\ValueError $e) {
152                $pass = true;
153            }
154            if (!$pass)
155                die("Expected exception to be thrown");
156            continue;
157        }
158
159        $result = mb_strimwidth($str, $from, $width, '...', 'ASCII');
160
161        if ($from < 0 && $width < 0 && ($width - $from) <= 3) {
162            if ($result !== '...')
163                die("Output should have just been trim marker. string=" . $str . " result=" . $result . " from=" . $from . " width=" . $width);
164            continue;
165        }
166
167        if ($width < 0 || $width > 3) {
168            if (mb_substr($result, 0, 1, 'ASCII') !== mb_substr($str, $from, 1, 'ASCII'))
169                die("Output string did not start at the expected point! string=" . $str . " result=" . $result . " from=" . $from . " width=" . $width);
170        }
171
172        if ($width >= 3 && ($from >= 0 || $width <= abs($from))) {
173            if (mb_strwidth($result, 'ASCII') !== $width)
174                die("Width was different from requested. string=" . $str . " result=" . $result . " from=" . $from . " width=" . $width);
175        }
176
177        if ($width < 0) {
178            if (mb_substr($result, -3, 3, 'ASCII') !== '...')
179                die("Expected trim marker");
180            if (mb_substr($result, -4, 1, 'ASCII') !== mb_substr($str, $width-4, 1, 'ASCII'))
181                die("Output string did not end at the expected point. string=" . $str . " result=" . $result . " from=" . $from . " width=" . $width);
182        }
183    }
184}
185
186
187// Regression test found by fuzzer; old implementation would pass input string
188// through when requested width=0
189testStrimwidth("\x00\x00\x00\x00+\x00\x00\x00\x00k\x00'\x11Yz", 1, 0, 'greek');
190
191// Regression test; new implementation originally had a bug whereby it would
192// sometimes not skip over characters at beginning of string when requested to
193testStrimwidthWithMarker(str_repeat("a", 268), '', 12, 255, 'ASCII');
194
195// Try invalid string; invalid sequences will be converted to error markers
196testStrimwidth("\x80\x80\x80", 0, 10, 'UTF-8');
197
198// Try invalid marker
199// It will be directly concatenated onto truncated string without checking for validity
200testStrimwidthWithMarker("abcdefghijklmnop", "\x80\x80\x80", 0, 10, 'UTF-8');
201
202// Regression test; old implementation would pass input string through, even when
203// it was wider than requested width, if the trim marker string was wider than
204// the input string
205testStrimwidthWithMarker("abc", "abcdefghijklmnop", 0, 1, 'ASCII');
206
207// Regression test; old implementation did not handle negative 'from' argument
208// correctly when portion being skipped over included fullwidth characters
209testStrimwidth("日本語abc", -3, 10, 'UTF-8');
210
211// Regression test; old implementation did not handle positive 'from' argument
212// combined with negative 'width' argument correctly when portion being skipped
213// over included fullwidth characters
214testStrimwidth("日本語abcdef", 3, -1, 'UTF-8');
215
216?>
217--EXPECT--
218== UTF-16LE ==
219String length: 10
220String width: 14
221Characters: [3100 3200 3300 6100 6200 6300 226f 575b 4b30 6a30]
222Character widths: [1 1 1 1 1 1 2 2 2 2]
223
224start=0 width=100 result=[3100 3200 3300 6100 6200 6300 226f 575b 4b30 6a30] length=10 width=14
225start=0 width=14 result=[3100 3200 3300 6100 6200 6300 226f 575b 4b30 6a30] length=10 width=14
226start=0 width=13 result=[3100 3200 3300 6100 6200 6300 226f 575b 2e00 2e00 2e00] length=11 width=13
227start=0 width=12 result=[3100 3200 3300 6100 6200 6300 226f 2e00 2e00 2e00] length=10 width=11
228start=0 width=0 result=[2e00 2e00 2e00] length=3 width=3
229start=0 width=1 result=[2e00 2e00 2e00] length=3 width=3
230start=-3 width=5 result=[575b 2e00 2e00 2e00] length=4 width=5
231start=-4 width=-2 result=[226f 2e00 2e00 2e00] length=4 width=5
232
233== EUC-JP ==
234String length: 39
235String width: 68
236Characters: [30 31 32 33 a4b3 a4ce cab8 bbfa cef3 a4cf c6fc cbdc b8ec a4c7 a4b9 a1a3 45 55 43 2d 4a 50 a4f2 bbc8 a4c3 a4c6 a4a4 a4de a4b9 a1a3 c6fc cbdc b8ec a4cf cccc c5dd bdad a4a4 a1a3]
237Character widths: [1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2]
238
239start=0 width=15 result=[30 31 32 33 a4b3 a4ce cab8 bbfa 2e 2e 2e] length=11 width=15
240start=0 width=100 result=[30 31 32 33 a4b3 a4ce cab8 bbfa cef3 a4cf c6fc cbdc b8ec a4c7 a4b9 a1a3 45 55 43 2d 4a 50 a4f2 bbc8 a4c3 a4c6 a4a4 a4de a4b9 a1a3 c6fc cbdc b8ec a4cf cccc c5dd bdad a4a4 a1a3] length=39 width=68
241start=15 width=100 result=[a1a3 45 55 43 2d 4a 50 a4f2 bbc8 a4c3 a4c6 a4a4 a4de a4b9 a1a3 c6fc cbdc b8ec a4cf cccc c5dd bdad a4a4 a1a3] length=24 width=42
242start=-30 width=5 result=[a4cf 2e 2e 2e] length=4 width=5
243start=9 width=5 result=[a4cf 2e 2e 2e] length=4 width=5
244start=15 width=-5 result=[a1a3 45 55 43 2d 4a 50 a4f2 bbc8 a4c3 a4c6 a4a4 a4de a4b9 a1a3 c6fc cbdc b8ec a4cf cccc 2e 2e 2e] length=23 width=37
245start=-30 width=-25 result=[a4cf c6fc cbdc b8ec a4c7 a4b9 a1a3 45 55 43 2d 4a 50 a4f2 bbc8 a4c3 2e 2e 2e] length=19 width=29
246start=39 width=10 result=[] length=0 width=0
247start=-10 width=-12 result=[a1a3 c6fc 2e 2e 2e] length=5 width=7
248mb_strimwidth(): Argument #3 ($width) is out of range
249mb_strimwidth(): Argument #2 ($start) is out of range
250mb_strimwidth(): Argument #2 ($start) is out of range
251mb_strimwidth(): Argument #3 ($width) is out of range
252
253== Other ==
254start=1 width=0 result=[2e 2e 2e] length=3 width=3
255start=12 width=255 result=[61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61] length=255 width=255
256start=0 width=10 result=[3f 3f 3f] length=3 width=3
257start=0 width=10 result=[61 62 63 64 65 66 67 80 80 80] length=10 width=10
258start=0 width=1 result=[61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f 70] length=16 width=16
259start=-3 width=10 result=[61 62 63] length=3 width=3
260start=3 width=-1 result=[61 62 2e 2e 2e] length=5 width=5
261