1--TEST--
2Test get_next_char(), used by htmlentities()/htmlspecialchars(): validity of UTF-8 sequences
3--FILE--
4<?php
5
6/* conformance to Unicode 5.2, section 3.9, D92 */
7
8$val_ranges = array(
9    array(array(0x00, 0x7F)),
10    array(array(0xC2, 0xDF), array(0x80, 0xBF)),
11    array(array(0xE0, 0xE0), array(0xA0, 0xBF), array(0x80, 0xBF)),
12    array(array(0xE1, 0xEC), array(0x80, 0xBF), array(0x80, 0xBF)),
13    array(array(0xED, 0xED), array(0x80, 0x9F), array(0x80, 0xBF)),
14    array(array(0xEE, 0xEF), array(0x80, 0xBF), array(0x80, 0xBF)),
15    array(array(0xF0, 0xF0), array(0x90, 0xBF), array(0x80, 0xBF), array(0x80, 0xBF)),
16    array(array(0xF1, 0xF3), array(0x80, 0xBF), array(0x80, 0xBF), array(0x80, 0xBF)),
17    array(array(0xF4, 0xF4), array(0x80, 0x8F), array(0x80, 0xBF), array(0x80, 0xBF)),
18);
19
20function is_valid($seq) {
21    global $val_ranges;
22    $b = ord($seq[0]);
23    foreach ($val_ranges as $l) {
24        if ($b >= $l[0][0] && $b <= $l[0][1]) {
25            if (count($l) != strlen($seq)) {
26                return false;
27            }
28            for ($n = 1; $n < strlen($seq); $n++) {
29                if (ord($seq[$n]) < $l[$n][0] || ord($seq[$n]) > $l[$n][1]) {
30                    return false;
31                }
32            }
33            return true;
34        }
35    }
36    return false;
37}
38
39function concordance($s) {
40    $vhe = strlen(htmlspecialchars($s, ENT_QUOTES, "UTF-8")) > 0;
41    $v = is_valid($s);
42    return ($vhe === $v);
43}
44
45for ($b1 = 0xC0; $b1 < 0xE0; $b1++) {
46    for ($b2 = 0x80; $b2 < 0xBF; $b2++) {
47        $s = chr($b1).chr($b2);
48        if (!concordance($s))
49            echo "Discordance for ".bin2hex($s),"\n";
50    }
51}
52
53
54for ($b1 = 0xE0; $b1 < 0xEF; $b1++) {
55    for ($b2 = 0x80; $b2 < 0xBF; $b2++) {
56        $s = chr($b1).chr($b2)."\x80";
57        if (!concordance($s))
58            echo "Discordance for ".bin2hex($s),"\n";
59        $s = chr($b1).chr($b2)."\xBF";
60        if (!concordance($s))
61            echo "Discordance for ".bin2hex($s),"\n";
62    }
63}
64
65for ($b1 = 0xF0; $b1 < 0xFF; $b1++) {
66    for ($b2 = 0x80; $b2 < 0xBF; $b2++) {
67        $s = chr($b1).chr($b2)."\x80\x80";
68        if (!concordance($s))
69            echo "Discordance for ".bin2hex($s),"\n";
70        $s = chr($b1).chr($b2)."\xBF\x80";
71        if (!concordance($s))
72            echo "Discordance for ".bin2hex($s),"\n";
73        $s = chr($b1).chr($b2)."\x80\xBF";
74        if (!concordance($s))
75            echo "Discordance for ".bin2hex($s),"\n";
76        $s = chr($b1).chr($b2)."\xBF\xBF";
77        if (!concordance($s))
78            echo "Discordance for ".bin2hex($s),"\n";
79    }
80}
81echo "Done.\n";
82?>
83--EXPECT--
84Done.
85