1--TEST--
2mb_substr_count()
3--EXTENSIONS--
4mbstring
5--INI--
6output_handler=
7--FILE--
8<?php
9    mb_internal_encoding("EUC-JP");
10
11    print "== Empty needle should raise an error ==\n";
12    try {
13        var_dump(mb_substr_count("", ""));
14    } catch (\ValueError $e) {
15        echo $e->getMessage() . \PHP_EOL;
16    }
17    try {
18        var_dump(mb_substr_count("��", ""));
19    } catch (\ValueError $e) {
20        echo $e->getMessage() . \PHP_EOL;
21    }
22    try {
23        // Although the needle below contains 3 bytes, it decodes to zero Unicode codepoints
24        // So the needle is actually 'empty', although it doesn't appear so
25        var_dump(mb_substr_count("abcdef", "\x1B(B", "ISO-2022-JP"));
26    } catch (\ValueError $e) {
27        echo $e->getMessage() . \PHP_EOL;
28    }
29
30    print "== Return value for empty haystack should always be zero ==\n";
31    var_dump(mb_substr_count("", "\xA4\xA2"));
32    var_dump(mb_substr_count("", chr(0)));
33
34    print "== Try searching using various encodings ==\n";
35    $a = str_repeat("abcacba", 100);
36    var_dump(mb_substr_count($a, "bca"));
37
38    $a = str_repeat("\xA4\xA2\xA4\xA4\xA4\xA6\xA4\xA2\xA4\xA6\xA4\xA4\xA4\xA2", 100);
39    $b = "\xA4\xA4\xA4\xA6\xA4\xA2";
40    var_dump(mb_substr_count($a, $b));
41
42    $to_enc = "UTF-8";
43    var_dump(mb_substr_count(mb_convert_encoding($a, $to_enc),
44                              mb_convert_encoding($b, $to_enc), $to_enc));
45
46    $to_enc = "Shift_JIS";
47    var_dump(mb_substr_count(mb_convert_encoding($a, $to_enc),
48                              mb_convert_encoding($b, $to_enc), $to_enc));
49
50    $a = str_repeat("abcacbabca", 100);
51    var_dump(mb_substr_count($a, "bca"));
52
53    print "== Regression tests ==\n";
54
55    // The old implementation had a bug; it could only recognize a maximum of one
56    // match for each byte that it fed into the decoder, even if feeding in that
57    // byte caused two codepoints to be emitted (because the decoder was holding
58    // cached data), and both of those codepoints matched a 1-codepoint needle
59    // (For this example, two error markers are emitted for the final byte 0xFF)
60    echo mb_substr_count("\xef\xff", "\xf8", "UTF-8"), "\n";
61
62    // Another thing about the old implementation: if a final codepoint was emitted
63    // by a decoder flush function, and that codepoint finished a match with the
64    // needle, that match would be disregarded and not counted in the returned value
65    // (In practice, the only thing emitted from decoder flush functions is an error
66    // marker, if the string ended in an illegal state)
67    echo mb_substr_count("+", "+", "UTF7-IMAP"), "\n";
68
69?>
70--EXPECT--
71== Empty needle should raise an error ==
72mb_substr_count(): Argument #2 ($needle) must not be empty
73mb_substr_count(): Argument #2 ($needle) must not be empty
74mb_substr_count(): Argument #2 ($needle) must not be empty
75== Return value for empty haystack should always be zero ==
76int(0)
77int(0)
78== Try searching using various encodings ==
79int(100)
80int(100)
81int(100)
82int(100)
83int(200)
84== Regression tests ==
852
861
87