1--TEST-- 2Confirm error handling for UTF-8 complies with WHATWG spec 3--EXTENSIONS-- 4mbstring 5--FILE-- 6<?php 7/* The WHATWG specifies not just how web browsers should handle _valid_ 8 * UTF-8 text, but how they should handle _invalid_ UTF-8 text (such 9 * as how many error markers each invalid byte sequence should decode 10 * to). 11 * That specification is followed by the JavaScript Encoding API. 12 * 13 * The API documentation for mb_convert_encoding does not specify how 14 * many error markers we will emit for each possible invalid byte 15 * sequence, so we might as well comply with the WHATWG specification. 16 * 17 * Thanks to Martin Auswöger for pointing this out... and another big 18 * thanks for providing test cases! 19 * 20 * Ref: https://encoding.spec.whatwg.org/#utf-8-decoder 21 */ 22mb_substitute_character(0x25); 23 24$testCases = [ 25 ["\x80", "%"], 26 ["\xFF", "%"], 27 ["\xC2\x7F", "%\x7F"], 28 ["\xC2\x80", "\xC2\x80"], 29 ["\xDF\xBF", "\xDF\xBF"], 30 ["\xDF\xC0", "%%"], 31 ["\xE0\xA0\x7F", "%\x7F"], 32 ["\xE0\xA0\x80", "\xE0\xA0\x80"], 33 ["\xEF\xBF\xBF", "\xEF\xBF\xBF"], 34 ["\xEF\xBF\xC0", "%%"], 35 ["\xF0\x90\x80\x7F", "%\x7F"], 36 ["\xF0\x90\x80\x80", "\xF0\x90\x80\x80"], 37 ["\xF4\x8F\xBF\xBF", "\xF4\x8F\xBF\xBF"], 38 ["\xF4\x8F\xBF\xC0", "%%"], 39 ["\xFA\x80\x80\x80\x80", "%%%%%"], 40 ["\xFB\xBF\xBF\xBF\xBF", "%%%%%"], 41 ["\xFD\x80\x80\x80\x80\x80", "%%%%%%"], 42 ["\xFD\xBF\xBF\xBF\xBF\xBF", "%%%%%%"] 43]; 44 45foreach ($testCases as $testCase) { 46 $result = mb_convert_encoding($testCase[0], 'UTF-8', 'UTF-8'); 47 if ($result !== $testCase[1]) { 48 die("Expected UTF-8 string " . bin2hex($testCase[0]) . " to convert to UTF-8 string " . bin2hex($testCase[1]) . "; got " . bin2hex($result)); 49 } 50} 51 52echo "All done!\n"; 53 54?> 55--EXPECT-- 56All done! 57