1--TEST-- 2Check that invalid UTF-8 strings are NOT marked as valid UTF-8 3--EXTENSIONS-- 4zend_test 5--FILE-- 6<?php 7// Invalid 2 Octet Sequence 8$non_utf8 = "\xc3\x28"; 9 10echo "Integer cast to string concatenated to invalid UTF-8:\n"; 11$i = 2563; 12$s = (string) $i; 13$s .= "\xc3\x28"; 14var_dump(zend_test_is_string_marked_as_valid_utf8($s)); 15 16echo "Float cast to string concatenated to invalid UTF-8:\n"; 17$f = 26.7; 18$s = (string) $f; 19$s .= "\xc3\x28"; 20var_dump(zend_test_is_string_marked_as_valid_utf8($s)); 21$f = 2e100; 22$s = (string) $f; 23$s .= "\xc3\x28"; 24var_dump(zend_test_is_string_marked_as_valid_utf8($s)); 25 26echo "Concatenation known valid UTF-8 strings in variables, followed by concatenation of invalid UTF-8:\n"; 27$s1 = "f"; 28$s2 = "o"; 29$s = $s1 . $s2; 30$s = $s . $non_utf8; 31var_dump(zend_test_is_string_marked_as_valid_utf8($s)); 32 33echo "Multiple concatenation known valid UTF-8 strings in variables, followed by concatenation of invalid UTF-8:\n"; 34$s1 = "f"; 35$s2 = "o"; 36$s = $s1 . $s2 . $non_utf8; 37var_dump(zend_test_is_string_marked_as_valid_utf8($s)); 38 39echo "Concatenation known valid UTF-8 with invalid UTF-8 in assignment:\n"; 40$s = "f" . "\xc3\x28"; 41var_dump(zend_test_is_string_marked_as_valid_utf8($s)); 42 43// The "foo" string matches with a "Foo" class which is registered by the zend_test extension. 44// That class name does not have the "valid UTF-8" flag because class names in general 45// don't have to be UTF-8. As the "foo" string here goes through the interning logic, 46// the string gets replaced by the "foo" string from the class, which does 47// not have the "valid UTF-8" flag. We therefore choose a different test case: "fxo". 48// The previous "foo" test case works because it is not interned. 49echo "Multiple concatenation known valid UTF-8 and invalid UTF-8 in assignment:\n"; 50$s = "f" . "o" . "\xc3\x28"; 51var_dump(zend_test_is_string_marked_as_valid_utf8($s)); 52 53echo "Concatenation known valid UTF-8 string with empty string in variables, followed by concatenation of invalid UTF-8:\n"; 54$s1 = "f"; 55$s2 = ""; 56$s = $s1 . $s2; 57$s = $s . $non_utf8; 58var_dump(zend_test_is_string_marked_as_valid_utf8($s)); 59$s1 = "f"; 60$s2 = ""; 61$s = $s2 . $s1; 62$s = $s . $non_utf8; 63var_dump(zend_test_is_string_marked_as_valid_utf8($s)); 64 65echo "Concatenation known valid UTF-8 string with empty string in assignment, followed by concatenation of invalid UTF-8:\n"; 66$s = "f" . ""; 67$s = $s . $non_utf8; 68var_dump(zend_test_is_string_marked_as_valid_utf8($s)); 69$s = "" . "f"; 70$s = $s . $non_utf8; 71var_dump(zend_test_is_string_marked_as_valid_utf8($s)); 72 73echo "Concatenation in loop:\n"; 74const COPY_TIMES = 10_000; 75$string = "a"; 76 77$string_concat = $string; 78for ($i = 1; $i < COPY_TIMES; $i++) { 79 $string_concat = $string_concat . $string; 80} 81$string_concat = $string_concat . $non_utf8; 82var_dump(zend_test_is_string_marked_as_valid_utf8($string_concat)); 83 84echo "Concatenation in loop (compound assignment):\n"; 85$string = "a"; 86 87$string_concat = $string; 88for ($i = 1; $i < COPY_TIMES; $i++) { 89 $string_concat .= $string; 90} 91$string_concat = $string_concat . $non_utf8; 92var_dump(zend_test_is_string_marked_as_valid_utf8($string_concat)); 93 94echo "Concatenation of objects:\n"; 95class ToString { 96 public function __toString() : string{ 97 return "z"; 98 } 99} 100$o = new ToString(); 101$s = $o . $o; 102$s = $s . $non_utf8; 103var_dump(zend_test_is_string_marked_as_valid_utf8($s)); 104 105echo "Rope concat:\n"; 106$foo = 'f'; 107$bar = "\xc3"; 108$baz = 'a'; 109$rope = "$foo$bar$baz"; 110var_dump(zend_test_is_string_marked_as_valid_utf8($rope)); 111?> 112--EXPECT-- 113Integer cast to string concatenated to invalid UTF-8: 114bool(false) 115Float cast to string concatenated to invalid UTF-8: 116bool(false) 117bool(false) 118Concatenation known valid UTF-8 strings in variables, followed by concatenation of invalid UTF-8: 119bool(false) 120Multiple concatenation known valid UTF-8 strings in variables, followed by concatenation of invalid UTF-8: 121bool(false) 122Concatenation known valid UTF-8 with invalid UTF-8 in assignment: 123bool(false) 124Multiple concatenation known valid UTF-8 and invalid UTF-8 in assignment: 125bool(false) 126Concatenation known valid UTF-8 string with empty string in variables, followed by concatenation of invalid UTF-8: 127bool(false) 128bool(false) 129Concatenation known valid UTF-8 string with empty string in assignment, followed by concatenation of invalid UTF-8: 130bool(false) 131bool(false) 132Concatenation in loop: 133bool(false) 134Concatenation in loop (compound assignment): 135bool(false) 136Concatenation of objects: 137bool(false) 138Rope concat: 139bool(false) 140