1--TEST-- 2mb_strimwidth() 3--EXTENSIONS-- 4mbstring 5--INI-- 6output_handler= 7--FILE-- 8<?php 9 10function MBStringChars($string, $encoding) { 11 $chars = mb_str_split($string, 1, $encoding); 12 return implode(' ', array_map(function($char) { 13 return join(unpack('H*', $char)); 14 }, $chars)); 15} 16 17function MBStringWidths($string, $encoding) { 18 $chars = mb_str_split($string, 1, $encoding); 19 return implode(' ', array_map(function($char) use(&$encoding) { 20 return mb_strwidth($char, $encoding); 21 }, $chars)); 22} 23 24function testStrimwidthWithMarker($string, $trimmark, $start, $width, $encoding) { 25 $result = mb_strimwidth($string, $start, $width, $trimmark, $encoding); 26 print "start=$start width=$width result=["; 27 print MBStringChars($result, $encoding); 28 print "] length=" . mb_strlen($result, $encoding); 29 print " width=" . mb_strwidth($result, $encoding) . "\n"; 30} 31 32function testStrimwidth($string, $start, $width, $encoding) { 33 testStrimwidthWithMarker($string, mb_convert_encoding('...', $encoding, 'ASCII'), $start, $width, $encoding); 34} 35 36echo "== UTF-16LE ==\n"; 37 38// In UTF-8, this is: 123abc漢字かな 39$utf16le = pack('H*', '310032003300610062006300226f575b4b306a30'); 40 41print "String length: " . mb_strlen($utf16le, 'UTF-16LE') . "\n"; 42print "String width: " . mb_strwidth($utf16le, 'UTF-16LE') . "\n"; 43echo 'Characters: [' . MBStringChars($utf16le, 'UTF-16LE') . "]\n"; 44echo 'Character widths: [' . MBStringWidths($utf16le, 'UTF-16LE') . "]\n\n"; 45 46// Just take the whole string with plenty of room to spare 47testStrimwidth($utf16le, 0, 100, 'UTF-16LE'); 48 49// OK, now the string will just barely fit in the allowed width... 50testStrimwidth($utf16le, 0, 14, 'UTF-16LE'); 51 52// Now the last hiragana won't quite fit 53testStrimwidth($utf16le, 0, 13, 'UTF-16LE'); 54 55// Even a bit tighter 56testStrimwidth($utf16le, 0, 12, 'UTF-16LE'); 57 58// What if we expect an output width of zero? 59// (It will still output the trim marker, pushing the width beyond the stipulated 'maximum') 60testStrimwidth($utf16le, 0, 0, 'UTF-16LE'); 61 62// Or output width of one? 63// (Likewise, it will still output the trim marker) 64testStrimwidth($utf16le, 0, 1, 'UTF-16LE'); 65 66// OK, let's count back 3 characters from the end of the string, then allow a width of 5 67// (Negative starting index) 68testStrimwidth($utf16le, -3, 5, 'UTF-16LE'); 69 70// Let's also try a negative width 71// We'll count back 4 characters, then allow a width of ((4 * 2) - 2) = 6 72// Since the output will not reach the END of the string, the trim marker 73// will have to be added, and will consume a width of 3 74testStrimwidth($utf16le, -4, -2, 'UTF-16LE'); 75 76echo "\n== EUC-JP ==\n"; 77 78// In UTF-8, this is: 0123この文字列は日本語です。EUC-JPを使っています。日本語は面倒臭い。 79$euc_jp = "0123\xa4\xb3\xa4\xce\xca\xb8\xbb\xfa\xce\xf3\xa4\xcf\xc6\xfc\xcb\xdc\xb8\xec\xa4\xc7\xa4\xb9\xa1\xa3EUC-JP\xa4\xf2\xbb\xc8\xa4\xc3\xa4\xc6\xa4\xa4\xa4\xde\xa4\xb9\xa1\xa3\xc6\xfc\xcb\xdc\xb8\xec\xa4\xcf\xcc\xcc\xc5\xdd\xbd\xad\xa4\xa4\xa1\xa3"; 80 81print "String length: " . mb_strlen($euc_jp, 'EUC-JP') . "\n"; 82print "String width: " . mb_strwidth($euc_jp, 'EUC-JP') . "\n"; 83echo 'Characters: [' . MBStringChars($euc_jp, 'EUC-JP') . "]\n"; 84echo 'Character widths: [' . MBStringWidths($euc_jp, 'EUC-JP') . "]\n\n"; 85 86// Cut down to a width of 15, which is 10 characters in this case. 87// Trim marker will be added 88testStrimwidth($euc_jp, 0, 15, 'EUC-JP'); 89 90// With max width of 100, trim marker will not be added 91testStrimwidth($euc_jp, 0, 100, 'EUC-JP'); 92 93// Skip 15 characters into string 94testStrimwidth($euc_jp, 15, 100, 'EUC-JP'); 95 96// Count 30 characters back from end of string, then limit to width of 5 97// Since width of trim marker is 3, this will only get a single char from string 98testStrimwidth($euc_jp, -30, 5, 'EUC-JP'); 99 100// Count 9 characters from start of string. Since string is 39 characters 101// long, this will have the same result as the previous test 102testStrimwidth($euc_jp, 9, 5, 'EUC-JP'); 103 104// Skip 15 characters, which leaves a total width of 42. Then trim string down 105// to 5 less than that, which is a width of 37. 106testStrimwidth($euc_jp, 15, -5, 'EUC-JP'); 107 108// Take the last 30 characters, which have a width of 54. Trim string down to 109// 25 less than that, which is 29. 110testStrimwidth($euc_jp, -30, -25, 'EUC-JP'); 111 112// Skip over 39 characters... but since string is only 39 characters long, 113// it takes us to the end of the string, and output is empty 114testStrimwidth($euc_jp, 39, 10, 'EUC-JP'); 115 116// Take the last 10 characters, which have a width of 20. Trim string down to 117// 12 less than that, which is a width of 8. 118testStrimwidth($euc_jp, -10, -12, 'EUC-JP'); 119 120try { 121 var_dump(mb_strimwidth($euc_jp, 0, -100,'...','EUC-JP')); 122} catch (\ValueError $e) { 123 echo $e->getMessage() . \PHP_EOL; 124} 125try { 126 var_dump(mb_strimwidth($euc_jp, 100, 10,'...','EUC-JP')); 127} catch (\ValueError $e) { 128 echo $e->getMessage() . \PHP_EOL; 129} 130try { 131 var_dump(mb_strimwidth($euc_jp, -100, 10,'...','EUC-JP')); 132} catch (\ValueError $e) { 133 echo $e->getMessage() . \PHP_EOL; 134} 135try { 136 var_dump(mb_strimwidth($euc_jp, -10, -21,'...','EUC-JP')); 137} catch (\ValueError $e) { 138 echo $e->getMessage() . \PHP_EOL; 139} 140 141echo "\n== Other ==\n"; 142 143$str = 'abcdefghijklmnop'; 144for ($from = -5; $from <= 5; $from++) { 145 for ($width = -5; $width <= 5; $width++) { 146 if ($from < 0 && $width < 0 && $width < $from) { 147 // This case is illegal and will throw an exception 148 $pass = false; 149 try { 150 mb_strimwidth($str, $from, $width, '...', 'ASCII'); 151 } catch (\ValueError $e) { 152 $pass = true; 153 } 154 if (!$pass) 155 die("Expected exception to be thrown"); 156 continue; 157 } 158 159 $result = mb_strimwidth($str, $from, $width, '...', 'ASCII'); 160 161 if ($from < 0 && $width < 0 && ($width - $from) <= 3) { 162 if ($result !== '...') 163 die("Output should have just been trim marker. string=" . $str . " result=" . $result . " from=" . $from . " width=" . $width); 164 continue; 165 } 166 167 if ($width < 0 || $width > 3) { 168 if (mb_substr($result, 0, 1, 'ASCII') !== mb_substr($str, $from, 1, 'ASCII')) 169 die("Output string did not start at the expected point! string=" . $str . " result=" . $result . " from=" . $from . " width=" . $width); 170 } 171 172 if ($width >= 3 && ($from >= 0 || $width <= abs($from))) { 173 if (mb_strwidth($result, 'ASCII') !== $width) 174 die("Width was different from requested. string=" . $str . " result=" . $result . " from=" . $from . " width=" . $width); 175 } 176 177 if ($width < 0) { 178 if (mb_substr($result, -3, 3, 'ASCII') !== '...') 179 die("Expected trim marker"); 180 if (mb_substr($result, -4, 1, 'ASCII') !== mb_substr($str, $width-4, 1, 'ASCII')) 181 die("Output string did not end at the expected point. string=" . $str . " result=" . $result . " from=" . $from . " width=" . $width); 182 } 183 } 184} 185 186 187// Regression test found by fuzzer; old implementation would pass input string 188// through when requested width=0 189testStrimwidth("\x00\x00\x00\x00+\x00\x00\x00\x00k\x00'\x11Yz", 1, 0, 'greek'); 190 191// Regression test; new implementation originally had a bug whereby it would 192// sometimes not skip over characters at beginning of string when requested to 193testStrimwidthWithMarker(str_repeat("a", 268), '', 12, 255, 'ASCII'); 194 195// Try invalid string; invalid sequences will be converted to error markers 196testStrimwidth("\x80\x80\x80", 0, 10, 'UTF-8'); 197 198// Try invalid marker 199// It will be directly concatenated onto truncated string without checking for validity 200testStrimwidthWithMarker("abcdefghijklmnop", "\x80\x80\x80", 0, 10, 'UTF-8'); 201 202// Regression test; old implementation would pass input string through, even when 203// it was wider than requested width, if the trim marker string was wider than 204// the input string 205testStrimwidthWithMarker("abc", "abcdefghijklmnop", 0, 1, 'ASCII'); 206 207// Regression test; old implementation did not handle negative 'from' argument 208// correctly when portion being skipped over included fullwidth characters 209testStrimwidth("日本語abc", -3, 10, 'UTF-8'); 210 211// Regression test; old implementation did not handle positive 'from' argument 212// combined with negative 'width' argument correctly when portion being skipped 213// over included fullwidth characters 214testStrimwidth("日本語abcdef", 3, -1, 'UTF-8'); 215 216?> 217--EXPECT-- 218== UTF-16LE == 219String length: 10 220String width: 14 221Characters: [3100 3200 3300 6100 6200 6300 226f 575b 4b30 6a30] 222Character widths: [1 1 1 1 1 1 2 2 2 2] 223 224start=0 width=100 result=[3100 3200 3300 6100 6200 6300 226f 575b 4b30 6a30] length=10 width=14 225start=0 width=14 result=[3100 3200 3300 6100 6200 6300 226f 575b 4b30 6a30] length=10 width=14 226start=0 width=13 result=[3100 3200 3300 6100 6200 6300 226f 575b 2e00 2e00 2e00] length=11 width=13 227start=0 width=12 result=[3100 3200 3300 6100 6200 6300 226f 2e00 2e00 2e00] length=10 width=11 228start=0 width=0 result=[2e00 2e00 2e00] length=3 width=3 229start=0 width=1 result=[2e00 2e00 2e00] length=3 width=3 230start=-3 width=5 result=[575b 2e00 2e00 2e00] length=4 width=5 231start=-4 width=-2 result=[226f 2e00 2e00 2e00] length=4 width=5 232 233== EUC-JP == 234String length: 39 235String width: 68 236Characters: [30 31 32 33 a4b3 a4ce cab8 bbfa cef3 a4cf c6fc cbdc b8ec a4c7 a4b9 a1a3 45 55 43 2d 4a 50 a4f2 bbc8 a4c3 a4c6 a4a4 a4de a4b9 a1a3 c6fc cbdc b8ec a4cf cccc c5dd bdad a4a4 a1a3] 237Character widths: [1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2] 238 239start=0 width=15 result=[30 31 32 33 a4b3 a4ce cab8 bbfa 2e 2e 2e] length=11 width=15 240start=0 width=100 result=[30 31 32 33 a4b3 a4ce cab8 bbfa cef3 a4cf c6fc cbdc b8ec a4c7 a4b9 a1a3 45 55 43 2d 4a 50 a4f2 bbc8 a4c3 a4c6 a4a4 a4de a4b9 a1a3 c6fc cbdc b8ec a4cf cccc c5dd bdad a4a4 a1a3] length=39 width=68 241start=15 width=100 result=[a1a3 45 55 43 2d 4a 50 a4f2 bbc8 a4c3 a4c6 a4a4 a4de a4b9 a1a3 c6fc cbdc b8ec a4cf cccc c5dd bdad a4a4 a1a3] length=24 width=42 242start=-30 width=5 result=[a4cf 2e 2e 2e] length=4 width=5 243start=9 width=5 result=[a4cf 2e 2e 2e] length=4 width=5 244start=15 width=-5 result=[a1a3 45 55 43 2d 4a 50 a4f2 bbc8 a4c3 a4c6 a4a4 a4de a4b9 a1a3 c6fc cbdc b8ec a4cf cccc 2e 2e 2e] length=23 width=37 245start=-30 width=-25 result=[a4cf c6fc cbdc b8ec a4c7 a4b9 a1a3 45 55 43 2d 4a 50 a4f2 bbc8 a4c3 2e 2e 2e] length=19 width=29 246start=39 width=10 result=[] length=0 width=0 247start=-10 width=-12 result=[a1a3 c6fc 2e 2e 2e] length=5 width=7 248mb_strimwidth(): Argument #3 ($width) is out of range 249mb_strimwidth(): Argument #2 ($start) is out of range 250mb_strimwidth(): Argument #2 ($start) is out of range 251mb_strimwidth(): Argument #3 ($width) is out of range 252 253== Other == 254start=1 width=0 result=[2e 2e 2e] length=3 width=3 255start=12 width=255 result=[61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61 61] length=255 width=255 256start=0 width=10 result=[3f 3f 3f] length=3 width=3 257start=0 width=10 result=[61 62 63 64 65 66 67 80 80 80] length=10 width=10 258start=0 width=1 result=[61 62 63 64 65 66 67 68 69 6a 6b 6c 6d 6e 6f 70] length=16 width=16 259start=-3 width=10 result=[61 62 63] length=3 width=3 260start=3 width=-1 result=[61 62 2e 2e 2e] length=5 width=5 261