1--TEST-- 2mb_substr_count() 3--EXTENSIONS-- 4mbstring 5--INI-- 6output_handler= 7--FILE-- 8<?php 9 mb_internal_encoding("EUC-JP"); 10 11 print "== Empty needle should raise an error ==\n"; 12 try { 13 var_dump(mb_substr_count("", "")); 14 } catch (\ValueError $e) { 15 echo $e->getMessage() . \PHP_EOL; 16 } 17 try { 18 var_dump(mb_substr_count("��", "")); 19 } catch (\ValueError $e) { 20 echo $e->getMessage() . \PHP_EOL; 21 } 22 try { 23 // Although the needle below contains 3 bytes, it decodes to zero Unicode codepoints 24 // So the needle is actually 'empty', although it doesn't appear so 25 var_dump(mb_substr_count("abcdef", "\x1B(B", "ISO-2022-JP")); 26 } catch (\ValueError $e) { 27 echo $e->getMessage() . \PHP_EOL; 28 } 29 30 print "== Return value for empty haystack should always be zero ==\n"; 31 var_dump(mb_substr_count("", "\xA4\xA2")); 32 var_dump(mb_substr_count("", chr(0))); 33 34 print "== Try searching using various encodings ==\n"; 35 $a = str_repeat("abcacba", 100); 36 var_dump(mb_substr_count($a, "bca")); 37 38 $a = str_repeat("\xA4\xA2\xA4\xA4\xA4\xA6\xA4\xA2\xA4\xA6\xA4\xA4\xA4\xA2", 100); 39 $b = "\xA4\xA4\xA4\xA6\xA4\xA2"; 40 var_dump(mb_substr_count($a, $b)); 41 42 $to_enc = "UTF-8"; 43 var_dump(mb_substr_count(mb_convert_encoding($a, $to_enc), 44 mb_convert_encoding($b, $to_enc), $to_enc)); 45 46 $to_enc = "Shift_JIS"; 47 var_dump(mb_substr_count(mb_convert_encoding($a, $to_enc), 48 mb_convert_encoding($b, $to_enc), $to_enc)); 49 50 $a = str_repeat("abcacbabca", 100); 51 var_dump(mb_substr_count($a, "bca")); 52 53 print "== Regression tests ==\n"; 54 55 // The old implementation had a bug; it could only recognize a maximum of one 56 // match for each byte that it fed into the decoder, even if feeding in that 57 // byte caused two codepoints to be emitted (because the decoder was holding 58 // cached data), and both of those codepoints matched a 1-codepoint needle 59 // (For this example, two error markers are emitted for the final byte 0xFF) 60 echo mb_substr_count("\xef\xff", "\xf8", "UTF-8"), "\n"; 61 62 // Another thing about the old implementation: if a final codepoint was emitted 63 // by a decoder flush function, and that codepoint finished a match with the 64 // needle, that match would be disregarded and not counted in the returned value 65 // (In practice, the only thing emitted from decoder flush functions is an error 66 // marker, if the string ended in an illegal state) 67 echo mb_substr_count("+", "+", "UTF7-IMAP"), "\n"; 68 69?> 70--EXPECT-- 71== Empty needle should raise an error == 72mb_substr_count(): Argument #2 ($needle) must not be empty 73mb_substr_count(): Argument #2 ($needle) must not be empty 74mb_substr_count(): Argument #2 ($needle) must not be empty 75== Return value for empty haystack should always be zero == 76int(0) 77int(0) 78== Try searching using various encodings == 79int(100) 80int(100) 81int(100) 82int(100) 83int(200) 84== Regression tests == 852 861 87