1--TEST--
2Check that invalid UTF-8 strings are NOT marked as valid UTF-8
3--EXTENSIONS--
4zend_test
5--FILE--
6<?php
7// Invalid 2 Octet Sequence
8$non_utf8 = "\xc3\x28";
9
10echo "Integer cast to string concatenated to invalid UTF-8:\n";
11$i = 2563;
12$s = (string) $i;
13$s .= "\xc3\x28";
14var_dump(zend_test_is_string_marked_as_valid_utf8($s));
15
16echo "Float cast to string concatenated to invalid UTF-8:\n";
17$f = 26.7;
18$s = (string) $f;
19$s .= "\xc3\x28";
20var_dump(zend_test_is_string_marked_as_valid_utf8($s));
21$f = 2e100;
22$s = (string) $f;
23$s .= "\xc3\x28";
24var_dump(zend_test_is_string_marked_as_valid_utf8($s));
25
26echo "Concatenation known valid UTF-8 strings in variables, followed by concatenation of invalid UTF-8:\n";
27$s1 = "f";
28$s2 = "o";
29$s = $s1 . $s2;
30$s = $s . $non_utf8;
31var_dump(zend_test_is_string_marked_as_valid_utf8($s));
32
33echo "Multiple concatenation known valid UTF-8 strings in variables, followed by concatenation of invalid UTF-8:\n";
34$s1 = "f";
35$s2 = "o";
36$s = $s1 . $s2 . $non_utf8;
37var_dump(zend_test_is_string_marked_as_valid_utf8($s));
38
39echo "Concatenation known valid UTF-8 with invalid UTF-8 in assignment:\n";
40$s = "f" . "\xc3\x28";
41var_dump(zend_test_is_string_marked_as_valid_utf8($s));
42
43// The "foo" string matches with a "Foo" class which is registered by the zend_test extension.
44// That class name does not have the "valid UTF-8" flag because class names in general
45// don't have to be UTF-8. As the "foo" string here goes through the interning logic,
46// the string gets replaced by the "foo" string from the class, which does
47// not have the "valid UTF-8" flag. We therefore choose a different test case: "fxo".
48// The previous "foo" test case works because it is not interned.
49echo "Multiple concatenation known valid UTF-8 and invalid UTF-8 in assignment:\n";
50$s = "f" . "o" . "\xc3\x28";
51var_dump(zend_test_is_string_marked_as_valid_utf8($s));
52
53echo "Concatenation known valid UTF-8 string with empty string in variables, followed by concatenation of invalid UTF-8:\n";
54$s1 = "f";
55$s2 = "";
56$s = $s1 . $s2;
57$s = $s . $non_utf8;
58var_dump(zend_test_is_string_marked_as_valid_utf8($s));
59$s1 = "f";
60$s2 = "";
61$s = $s2 . $s1;
62$s = $s . $non_utf8;
63var_dump(zend_test_is_string_marked_as_valid_utf8($s));
64
65echo "Concatenation known valid UTF-8 string with empty string in assignment, followed by concatenation of invalid UTF-8:\n";
66$s = "f" . "";
67$s = $s . $non_utf8;
68var_dump(zend_test_is_string_marked_as_valid_utf8($s));
69$s = "" . "f";
70$s = $s . $non_utf8;
71var_dump(zend_test_is_string_marked_as_valid_utf8($s));
72
73echo "Concatenation in loop:\n";
74const COPY_TIMES = 10_000;
75$string = "a";
76
77$string_concat = $string;
78for ($i = 1; $i < COPY_TIMES; $i++) {
79    $string_concat = $string_concat . $string;
80}
81$string_concat = $string_concat . $non_utf8;
82var_dump(zend_test_is_string_marked_as_valid_utf8($string_concat));
83
84echo "Concatenation in loop (compound assignment):\n";
85$string = "a";
86
87$string_concat = $string;
88for ($i = 1; $i < COPY_TIMES; $i++) {
89    $string_concat .= $string;
90}
91$string_concat = $string_concat . $non_utf8;
92var_dump(zend_test_is_string_marked_as_valid_utf8($string_concat));
93
94echo "Concatenation of objects:\n";
95class ToString {
96    public function __toString() : string{
97        return "z";
98    }
99}
100$o = new ToString();
101$s = $o . $o;
102$s = $s . $non_utf8;
103var_dump(zend_test_is_string_marked_as_valid_utf8($s));
104
105echo "Rope concat:\n";
106$foo = 'f';
107$bar = "\xc3";
108$baz = 'a';
109$rope = "$foo$bar$baz";
110var_dump(zend_test_is_string_marked_as_valid_utf8($rope));
111?>
112--EXPECT--
113Integer cast to string concatenated to invalid UTF-8:
114bool(false)
115Float cast to string concatenated to invalid UTF-8:
116bool(false)
117bool(false)
118Concatenation known valid UTF-8 strings in variables, followed by concatenation of invalid UTF-8:
119bool(false)
120Multiple concatenation known valid UTF-8 strings in variables, followed by concatenation of invalid UTF-8:
121bool(false)
122Concatenation known valid UTF-8 with invalid UTF-8 in assignment:
123bool(false)
124Multiple concatenation known valid UTF-8 and invalid UTF-8 in assignment:
125bool(false)
126Concatenation known valid UTF-8 string with empty string in variables, followed by concatenation of invalid UTF-8:
127bool(false)
128bool(false)
129Concatenation known valid UTF-8 string with empty string in assignment, followed by concatenation of invalid UTF-8:
130bool(false)
131bool(false)
132Concatenation in loop:
133bool(false)
134Concatenation in loop (compound assignment):
135bool(false)
136Concatenation of objects:
137bool(false)
138Rope concat:
139bool(false)
140