1--TEST-- 2mb_strimwidth() 3--EXTENSIONS-- 4mbstring 5--INI-- 6output_handler= 7--FILE-- 8<?php 9 10function MBStringChars($string, $encoding) { 11 $chars = mb_str_split($string, 1, $encoding); 12 return implode(' ', array_map(function($char) { 13 return join(unpack('H*', $char)); 14 }, $chars)); 15} 16 17function MBStringWidths($string, $encoding) { 18 $chars = mb_str_split($string, 1, $encoding); 19 return implode(' ', array_map(function($char) use(&$encoding) { 20 return mb_strwidth($char, $encoding); 21 }, $chars)); 22} 23 24function testStrimwidthWithMarker($string, $trimmark, $start, $width, $encoding) { 25 $result = mb_strimwidth($string, $start, $width, $trimmark, $encoding); 26 print "start=$start width=$width result=["; 27 print MBStringChars($result, $encoding); 28 print "] length=" . mb_strlen($result, $encoding); 29 print " width=" . mb_strwidth($result, $encoding) . "\n"; 30} 31 32function testStrimwidth($string, $start, $width, $encoding) { 33 testStrimwidthWithMarker($string, mb_convert_encoding('...', $encoding, 'ASCII'), $start, $width, $encoding); 34} 35 36echo "== UTF-16LE ==\n"; 37 38// In UTF-8, this is: 123abc漢字かな 39$utf16le = pack('H*', '310032003300610062006300226f575b4b306a30'); 40 41print "String length: " . mb_strlen($utf16le, 'UTF-16LE') . "\n"; 42print "String width: " . mb_strwidth($utf16le, 'UTF-16LE') . "\n"; 43echo 'Characters: [' . MBStringChars($utf16le, 'UTF-16LE') . "]\n"; 44echo 'Character widths: [' . MBStringWidths($utf16le, 'UTF-16LE') . "]\n\n"; 45 46// Just take the whole string with plenty of room to spare 47testStrimwidth($utf16le, 0, 100, 'UTF-16LE'); 48 49// OK, now the string will just barely fit in the allowed width... 50testStrimwidth($utf16le, 0, 14, 'UTF-16LE'); 51 52// Now the last hiragana won't quite fit 53testStrimwidth($utf16le, 0, 13, 'UTF-16LE'); 54 55// Even a bit tighter 56testStrimwidth($utf16le, 0, 12, 'UTF-16LE'); 57 58// What if we expect an output width of zero? 59// (It will still output the trim marker, pushing the width beyond the stipulated 'maximum') 60testStrimwidth($utf16le, 0, 0, 'UTF-16LE'); 61 62// Or output width of one? 63// (Likewise, it will still output the trim marker) 64testStrimwidth($utf16le, 0, 1, 'UTF-16LE'); 65 66// OK, let's count back 3 characters from the end of the string, then allow a width of 5 67// (Negative starting index) 68testStrimwidth($utf16le, -3, 5, 'UTF-16LE'); 69 70// Let's also try a negative width 71// We'll count back 4 characters, then allow a width of ((4 * 2) - 2) = 6 72// Since the output will not reach the END of the string, the trim marker 73// will have to be added, and will consume a width of 3 74// We also suppress the deprecation for negative width as of PHP 8.3 75@testStrimwidth($utf16le, -4, -2, 'UTF-16LE'); 76 77echo "\n== EUC-JP ==\n"; 78 79// In UTF-8, this is: 0123この文字列は日本語です。EUC-JPを使っています。日本語は面倒臭い。 80$euc_jp = "0123\xa4\xb3\xa4\xce\xca\xb8\xbb\xfa\xce\xf3\xa4\xcf\xc6\xfc\xcb\xdc\xb8\xec\xa4\xc7\xa4\xb9\xa1\xa3EUC-JP\xa4\xf2\xbb\xc8\xa4\xc3\xa4\xc6\xa4\xa4\xa4\xde\xa4\xb9\xa1\xa3\xc6\xfc\xcb\xdc\xb8\xec\xa4\xcf\xcc\xcc\xc5\xdd\xbd\xad\xa4\xa4\xa1\xa3"; 81 82print "String length: " . mb_strlen($euc_jp, 'EUC-JP') . "\n"; 83print "String width: " . mb_strwidth($euc_jp, 'EUC-JP') . "\n"; 84echo 'Characters: [' . MBStringChars($euc_jp, 'EUC-JP') . "]\n"; 85echo 'Character widths: [' . MBStringWidths($euc_jp, 'EUC-JP') . "]\n\n"; 86 87// Cut down to a width of 15, which is 10 characters in this case. 88// Trim marker will be added 89testStrimwidth($euc_jp, 0, 15, 'EUC-JP'); 90 91// With max width of 100, trim marker will not be added 92testStrimwidth($euc_jp, 0, 100, 'EUC-JP'); 93 94// Skip 15 characters into string 95testStrimwidth($euc_jp, 15, 100, 'EUC-JP'); 96 97// Count 30 characters back from end of string, then limit to width of 5 98// Since width of trim marker is 3, this will only get a single char from string 99testStrimwidth($euc_jp, -30, 5, 'EUC-JP'); 100 101// Count 9 characters from start of string. Since string is 39 characters 102// long, this will have the same result as the previous test 103testStrimwidth($euc_jp, 9, 5, 'EUC-JP'); 104 105// Skip 15 characters, which leaves a total width of 42. Then trim string down 106// to 5 less than that, which is a width of 37. 107// We also suppress the deprecation for negative width as of PHP 8.3 108@testStrimwidth($euc_jp, 15, -5, 'EUC-JP'); 109 110// Take the last 30 characters, which have a width of 54. Trim string down to 111// 25 less than that, which is 29. 112// We also suppress the deprecation for negative width as of PHP 8.3 113@testStrimwidth($euc_jp, -30, -25, 'EUC-JP'); 114 115// Skip over 39 characters... but since string is only 39 characters long, 116// it takes us to the end of the string, and output is empty 117testStrimwidth($euc_jp, 39, 10, 'EUC-JP'); 118 119// Take the last 10 characters, which have a width of 20. Trim string down to 120// 12 less than that, which is a width of 8. 121// We also suppress the deprecation for negative width as of PHP 8.3 122@testStrimwidth($euc_jp, -10, -12, 'EUC-JP'); 123 124try { 125 // We also suppress the deprecation for negative width as of PHP 8.3 126 var_dump(@mb_strimwidth($euc_jp, 0, -100,'...','EUC-JP')); 127} catch (\ValueError $e) { 128 echo $e->getMessage() . \PHP_EOL; 129} 130try { 131 var_dump(mb_strimwidth($euc_jp, 100, 10,'...','EUC-JP')); 132} catch (\ValueError $e) { 133 echo $e->getMessage() . \PHP_EOL; 134} 135try { 136 var_dump(mb_strimwidth($euc_jp, -100, 10,'...','EUC-JP')); 137} catch (\ValueError $e) { 138 echo $e->getMessage() . \PHP_EOL; 139} 140try { 141 // We also suppress the deprecation for negative width as of PHP 8.3 142 var_dump(@mb_strimwidth($euc_jp, -10, -21,'...','EUC-JP')); 143} catch (\ValueError $e) { 144 echo $e->getMessage() . \PHP_EOL; 145} 146 147echo "\n== Other ==\n"; 148 149$str = 'abcdefghijklmnop'; 150for ($from = -5; $from <= 5; $from++) { 151 for ($width = -5; $width <= 5; $width++) { 152 if ($from < 0 && $width < 0 && $width < $from) { 153 // This case is illegal and will throw an exception 154 $pass = false; 155 try { 156 /* Shut up deprecation notice for now */ 157 @mb_strimwidth($str, $from, $width, '...', 'ASCII'); 158 } catch (\ValueError $e) { 159 $pass = true; 160 } 161 if (!$pass) 162 die("Expected exception to be thrown"); 163 continue; 164 } 165 166 /* Shut up deprecation notice for now */ 167 $result = @mb_strimwidth($str, $from, $width, '...', 'ASCII'); 168 169 if ($from < 0 && $width < 0 && ($width - $from) <= 3) { 170 if ($result !== '...') 171 die("Output should have just been trim marker. string=" . $str . " result=" . $result . " from=" . $from . " width=" . $width); 172 continue; 173 } 174 175 if ($width < 0 || $width > 3) { 176 if (mb_substr($result, 0, 1, 'ASCII') !== mb_substr($str, $from, 1, 'ASCII')) 177 die("Output string did not start at the expected point! string=" . $str . " result=" . $result . " from=" . $from . " width=" . $width); 178 } 179 180 if ($width >= 3 && ($from >= 0 || $width <= abs($from))) { 181 if (mb_strwidth($result, 'ASCII') !== $width) 182 die("Width was different from requested. string=" . $str . " result=" . $result . " from=" . $from . " width=" . $width); 183 } 184 185 if ($width < 0) { 186 if (mb_substr($result, -3, 3, 'ASCII') !== '...') 187 die("Expected trim marker"); 188 if (mb_substr($result, -4, 1, 'ASCII') !== mb_substr($str, $width-4, 1, 'ASCII')) 189 die("Output string did not end at the expected point. string=" . $str . " result=" . $result . " from=" . $from . " width=" . $width); 190 } 191 } 192} 193 194 195// Regression test found by fuzzer; old implementation would pass input string 196// through when requested width=0 197testStrimwidth("\x00\x00\x00\x00+\x00\x00\x00\x00k\x00'\x11Yz", 1, 0, 'greek'); 198 199// Regression test; new implementation originally had a bug whereby it would 200// sometimes not skip over characters at beginning of string when requested to 201testStrimwidthWithMarker(str_repeat("a", 268), '', 12, 255, 'ASCII'); 202 203// Try invalid string; invalid sequences will be converted to error markers 204testStrimwidth("\x80\x80\x80", 0, 10, 'UTF-8'); 205 206// Try invalid marker 207// It will be directly concatenated onto truncated string without checking for validity 208testStrimwidthWithMarker("abcdefghijklmnop", "\x80\x80\x80", 0, 10, 'UTF-8'); 209 210// Regression test; old implementation would pass input string through, even when 211// it was wider than requested width, if the trim marker string was wider than 212// the input string 213testStrimwidthWithMarker("abc", "abcdefghijklmnop", 0, 1, 'ASCII'); 214 215// Regression test; old implementation did not handle negative 'from' argument 216// correctly when portion being skipped over included fullwidth characters 217testStrimwidth("日本語abc", -3, 10, 'UTF-8'); 218 219// Regression test; old implementation did not handle positive 'from' argument 220// combined with negative 'width' argument correctly when portion being skipped 221// over included fullwidth characters 222// We also suppress the deprecation for negative width as of PHP 8.3 223@testStrimwidth("日本語abcdef", 3, -1, 'UTF-8'); 224 225?> 226--EXPECT-- 227== UTF-16LE == 228String length: 10 229String width: 14 230Characters: [3100 3200 3300 6100 6200 6300 226f 575b 4b30 6a30] 231Character widths: [1 1 1 1 1 1 2 2 2 2] 232 233start=0 width=100 result=[3100 3200 3300 6100 6200 6300 226f 575b 4b30 6a30] length=10 width=14 234start=0 width=14 result=[3100 3200 3300 6100 6200 6300 226f 575b 4b30 6a30] length=10 width=14 235start=0 width=13 result=[3100 3200 3300 6100 6200 6300 226f 575b 2e00 2e00 2e00] length=11 width=13 236start=0 width=12 result=[3100 3200 3300 6100 6200 6300 226f 2e00 2e00 2e00] length=10 width=11 237start=0 width=0 result=[2e00 2e00 2e00] length=3 width=3 238start=0 width=1 result=[2e00 2e00 2e00] length=3 width=3 239start=-3 width=5 result=[575b 2e00 2e00 2e00] length=4 width=5 240start=-4 width=-2 result=[226f 2e00 2e00 2e00] length=4 width=5 241 242== EUC-JP == 243String length: 39 244String width: 68 245Characters: [30 31 32 33 a4b3 a4ce cab8 bbfa cef3 a4cf c6fc cbdc b8ec a4c7 a4b9 a1a3 45 55 43 2d 4a 50 a4f2 bbc8 a4c3 a4c6 a4a4 a4de a4b9 a1a3 c6fc cbdc b8ec a4cf cccc c5dd bdad a4a4 a1a3] 246Character widths: [1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2] 247 248start=0 width=15 result=[30 31 32 33 a4b3 a4ce cab8 bbfa 2e 2e 2e] length=11 width=15 249start=0 width=100 result=[30 31 32 33 a4b3 a4ce cab8 bbfa cef3 a4cf c6fc cbdc b8ec a4c7 a4b9 a1a3 45 55 43 2d 4a 50 a4f2 bbc8 a4c3 a4c6 a4a4 a4de a4b9 a1a3 c6fc cbdc b8ec a4cf cccc c5dd bdad a4a4 a1a3] length=39 width=68 250start=15 width=100 result=[a1a3 45 55 43 2d 4a 50 a4f2 bbc8 a4c3 a4c6 a4a4 a4de a4b9 a1a3 c6fc cbdc b8ec a4cf cccc c5dd bdad a4a4 a1a3] length=24 width=42 251start=-30 width=5 result=[a4cf 2e 2e 2e] length=4 width=5 252start=9 width=5 result=[a4cf 2e 2e 2e] length=4 width=5 253start=15 width=-5 result=[a1a3 45 55 43 2d 4a 50 a4f2 bbc8 a4c3 a4c6 a4a4 a4de a4b9 a1a3 c6fc cbdc b8ec a4cf cccc 2e 2e 2e] length=23 width=37 254start=-30 width=-25 result=[a4cf c6fc cbdc b8ec a4c7 a4b9 a1a3 45 55 43 2d 4a 50 a4f2 bbc8 a4c3 2e 2e 2e] length=19 width=29 255start=39 width=10 result=[] length=0 width=0 256start=-10 width=-12 result=[a1a3 c6fc 2e 2e 2e] length=5 width=7 257mb_strimwidth(): Argument #3 ($width) is out of range 258mb_strimwidth(): Argument #2 ($start) is out of range 259mb_strimwidth(): Argument #2 ($start) is out of range 260mb_strimwidth(): Argument #3 ($width) is out of range 261 262== Other == 263start=1 width=0 result=[2e 2e 2e] length=3 width=3 264start=12 width=255 result=[61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61] length=255 width=255 265start=0 width=10 result=[3f 3f 3f] length=3 width=3 266start=0 width=10 result=[61 62 63 64 65 66 67 80 80 80] length=10 width=10 267start=0 width=1 result=[61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f 70] length=16 width=16 268start=-3 width=10 result=[61 62 63] length=3 width=3 269start=3 width=-1 result=[61 62 2e 2e 2e] length=5 width=5 270