--TEST-- Torture test for UTF-{7,8,16,32} --EXTENSIONS-- mbstring --SKIPIF-- --FILE-- 0xDFFF) && $cp !== 0xFEFF) $validCodepoints[pack('N', $cp)] = true; } } function testValidCodepoints($encoding) { global $validCodepoints; $good = array_keys($validCodepoints); shuffle($good); while (!empty($good)) { $string = ''; $length = min(rand(20,30), count($good)); while ($length--) { $string .= array_pop($good); } $converted = mb_convert_encoding($string, $encoding, 'UTF-32BE'); if ($converted === false) die("mb_convert_encoding failed to convert UTF-32BE to $encoding." . "\nString: " . bin2hex($string)); testValidString($converted, $string, $encoding, 'UTF-32BE'); } } function testInvalidCodepoints($invalid, $encoding) { global $validCodepoints; $good = array_keys($validCodepoints); shuffle($good); foreach ($invalid as $bad => $expected) { $good1 = array_pop($good); $string = $bad . mb_convert_encoding($good1, $encoding, 'UTF-32BE'); testInvalidString($string, $expected . $good1, $encoding, 'UTF-32BE'); } } echo "== UTF-8 ==\n"; testValidCodepoints('UTF-8'); testValidString('', '', 'UTF-8', 'UTF-32BE'); $invalid = array( // Codepoints outside of valid 0-0x10FFFF range for Unicode "\xF4\x90\x80\x80" => str_repeat("\x00\x00\x00%", 4), // CP 0x110000 "\xF7\x80\x80\x80" => str_repeat("\x00\x00\x00%", 4), // CP 0x1C0000 "\xF7\xBF\xBF\xBF" => str_repeat("\x00\x00\x00%", 4), // CP 0x1FFFFF // Reserved range for UTF-16 surrogate pairs "\xED\xA0\x80" => str_repeat("\x00\x00\x00%", 3), // CP 0xD800 "\xED\xAF\xBF" => str_repeat("\x00\x00\x00%", 3), // CP 0xDBFF "\xED\xBF\xBF" => str_repeat("\x00\x00\x00%", 3), // CP 0xDFFF // Truncated characters "\xDF" => "\x00\x00\x00%", // should have been 2-byte "\xEF\xBF" => "\x00\x00\x00%", // should have been 3-byte "\xF0\xBF\xBF" => "\x00\x00\x00%", // should have been 4-byte "\xF1\x96" => "\x00\x00\x00%", "\xF1\x96\x80" => "\x00\x00\x00%", "\xF2\x94" => "\x00\x00\x00%", "\xF2\x94\x80" => "\x00\x00\x00%", "\xF3\x94" => "\x00\x00\x00%", "\xF3\x94\x80" => "\x00\x00\x00%", "\xE0\x9F" => "\x00\x00\x00%\x00\x00\x00%", "\xED\xA6" => "\x00\x00\x00%\x00\x00\x00%", // Multi-byte characters which end too soon and go to ASCII "\xDFA" => "\x00\x00\x00%\x00\x00\x00A", "\xEF\xBFA" => "\x00\x00\x00%\x00\x00\x00A", "\xF0\xBFA" => "\x00\x00\x00%\x00\x00\x00A", "\xF0\xBF\xBFA" => "\x00\x00\x00%\x00\x00\x00A", // Multi-byte characters which end too soon and go to another MB char "\xDF\xDF\xBF" => "\x00\x00\x00%\x00\x00\x07\xFF", "\xEF\xBF\xDF\xBF" => "\x00\x00\x00%\x00\x00\x07\xFF", "\xF0\xBF\xBF\xDF\xBF" => "\x00\x00\x00%\x00\x00\x07\xFF", // Multi-byte characters which end too soon and go to a junk byte // (Which isn't even valid to start a new character) "\xF0\xBF\xBF\xFF" => str_repeat("\x00\x00\x00%", 2), "\xF0\xBF\xFF" => str_repeat("\x00\x00\x00%", 2), // Continuation bytes which appear outside of a MB char "\x80" => "\x00\x00\x00%", "A\x80" => "\x00\x00\x00A\x00\x00\x00%", "\xDF\xBF\x80" => "\x00\x00\x07\xFF\x00\x00\x00%", // Overlong code units // (Using more bytes than needed to encode a character) "\xC1\xBF" => str_repeat("\x00\x00\x00%", 2), // didn't need 2 bytes "\xE0\x9F\xBF" => str_repeat("\x00\x00\x00%", 3), // didn't need 3 bytes "\xF0\x8F\xBF\xBF" => str_repeat("\x00\x00\x00%", 4) // didn't need 4 bytes ); testInvalidCodepoints($invalid, 'UTF-8'); // Regression test for bug in SSE2-based accelerated UTF-8 validation function $truncated16byte = [ "k\x08`\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xc6", "k\x08`\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xef", "k\x08`\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xef\xbf", "k\x08`\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0", "k\x08`\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0\xbf", "k\x08`\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\xf0\xbf\xbf" ]; foreach ($truncated16byte as $trunc) { if (mb_check_encoding($trunc, 'UTF-8')) die("UTF-8 validation was incorrect on 16-byte string with truncated multi-byte char at end"); } echo "== UTF-16 ==\n"; testValidCodepoints("UTF-16"); testValidCodepoints("UTF-16LE"); testValidCodepoints("UTF-16BE"); testValidString('', '', 'UTF-16', 'UTF-32BE'); testValidString('', '', 'UTF-16LE', 'UTF-32BE'); testValidString('', '', 'UTF-16BE', 'UTF-32BE'); $invalid = array( // UTF-16 _cannot_ represent codepoints bigger than 0x10FFFF, so we're not // worried about that. But there are plenty of other ways to mess up... // Second half of surrogate pair comes first "\xDC\x01\xD8\x02" => "\x00\x00\x00%\x00\x00\x00%", // First half of surrogate pair not followed by second part "\xD8\x01\x00A" => "\x00\x00\x00%\x00\x00\x00A", // First half of surrogate pair at end of string "\xD8\x01" => "\x00\x00\x00%", ); testInvalidCodepoints($invalid, 'UTF-16'); testInvalidCodepoints($invalid, 'UTF-16BE'); // Truncated strings testInvalidString("\x00", "\x00\x00\x00%", 'UTF-16', 'UTF-32BE'); testInvalidString("\x00A\x01", "\x00\x00\x00A\x00\x00\x00%", 'UTF-16', 'UTF-32BE'); testInvalidString("\x00", "\x00\x00\x00%", 'UTF-16BE', 'UTF-32BE'); testInvalidString("\x00A\x01", "\x00\x00\x00A\x00\x00\x00%", 'UTF-16BE', 'UTF-32BE'); $invalid = array( // Second half of surrogate pair comes first "\x01\xDC\x02\xD8" => "\x00\x00\x00%\x00\x00\x00%", // First half of surrogate pair not followed by second part "\x01\xD8A\x00" => "\x00\x00\x00%\x00\x00\x00A", // First half of surrogate pair at end of string "\x01\xD8" => "\x00\x00\x00%", // Two successive codepoints which are both the 1st part of a surrogate pair "\x01\xD8\x02\xD8" => "\x00\x00\x00%\x00\x00\x00%" ); testInvalidCodepoints($invalid, 'UTF-16LE'); // Truncated testInvalidString("\x00", "\x00\x00\x00%", 'UTF-16LE', 'UTF-32BE'); testInvalidString("A\x00\x01", "\x00\x00\x00A\x00\x00\x00%", 'UTF-16LE', 'UTF-32BE'); // Test treatment of BOM testValidString("\xFE\xFF\x12\x34", "\x00\x00\x12\x34", 'UTF-16', 'UTF-32BE', false); testValidString("\xFF\xFE\x12\x34", "\x00\x00\x34\x12", 'UTF-16', 'UTF-32BE', false); // Test treatment of (illegal) codepoints between U+D800 and U+DFFF testValidString("\xD8\x00", "\xD8\x00", 'UCS-2BE', 'UTF-16BE', false); testValidString("\xDB\xFF", "\xDB\xFF", 'UCS-2BE', 'UTF-16BE', false); testValidString("\xDC\x00", "\xDC\x00", 'UCS-2BE', 'UTF-16BE', false); testValidString("\xD8\x00", "\x00\xD8", 'UCS-2BE', 'UTF-16LE', false); testValidString("\xDC\x00", "\x00\xDC", 'UCS-2BE', 'UTF-16LE', false); // Try codepoint over U+10FFFF convertInvalidString("\x00\x11\x56\x78", "\x00%", 'UCS-4BE', 'UTF-16BE'); convertInvalidString("\x00\x11\x56\x78", "%\x00", 'UCS-4BE', 'UTF-16LE'); // Regression tests for bugs with initial AVX2-accelerated implementation convertInvalidString(str_repeat("a\x00", 15) . "\x00\xD8\x00\xFC", str_repeat("\x00a", 15) . "\x00%\xFC\x00", 'UTF-16LE', 'UCS-2BE'); convertInvalidString(str_repeat("\x00a", 15) . "\xD8\x00\xFC\x00", str_repeat("\x00a", 15) . "\x00%\xFC\x00", 'UTF-16BE', 'UCS-2BE'); // This string caused an out-of-bounds read; it was found by a fuzzer $str = "\xdb\xdb\xdb#\xdb\xdb\xdf\xdb\xdf\xdb\xdb\x0b\xdb\x00\xdc\xdb\xdf\xdb\xdf\xdb\xda\x0b\xdb\x00\xdcY\xdf\x03\xdb\x03\xd9\xd9\xd8"; convertInvalidString($str, "\x00\x25\x00\x25\xdb\xdb\xdf\xdb\x00\x25\x00\x25\xdb\x00\xdc\xdb\x00\x25\x00\x25\x00\x25\xdb\x00\xdc\x59\x00\x25\x00\x25\x00\x25\x00\x25", 'UTF-16BE', 'UTF-16BE'); $str = "\xda\xda\xda\xda\xda\xda\xd9\xdb\xda\xda\xda\xda\xdd\xda\xda\xd9\xdb\xda\xda\xda\xda\xdd\xda\xdd\xd9\x0a\xda\xda\xda\xda\xdd\xda\xdd\xd9\xda\xda\xda\xda\xda\xda\xda\xda\xda\xd9\xdb\xda\xda\xda\xd9\xdb\xda\xda\xda\xda\xdd\xda\xda\xd9\xdb"; convertInvalidString($str, "\x25\x00\x25\x00\x25\x00\x25\x00\x25\x00\x25\x00\x25\x00\x25\x00\x25\x00\xda\xda\xda\xdd\x25\x00\xd9\x0a\x25\x00\x25\x00\x25\x00\x25\x00\x25\x00\x25\x00\x25\x00\x25\x00\x25\x00\x25\x00\x25\x00\x25\x00\x25\x00\x25\x00\x25\x00\x25\x00\x25\x00", 'UTF-16LE', 'UTF-16LE'); echo "== UTF-32 ==\n"; testValidCodepoints("UTF-32LE"); testValidCodepoints("UTF-32BE"); // Empty string testValidString('', '', 'UTF-32', 'UTF-32BE'); testValidString('', '', 'UTF-32BE', 'UTF-32'); testValidString('', '', 'UTF-32LE', 'UTF-32BE'); $invalid = array( // Codepoints which are too big "\x00\x11\x00\x00" => "\x00\x00\x00%", "\x80\x00\x00\x00" => "\x00\x00\x00%", "\xff\xff\xfe\xff" => "\x00\x00\x00%", // Surrogates "\x00\x00\xd8\x00" => "\x00\x00\x00%", "\x00\x00\xdb\xff" => "\x00\x00\x00%", "\x00\x00\xdc\x00" => "\x00\x00\x00%", "\x00\x00\xdf\xff" => "\x00\x00\x00%", ); testInvalidCodepoints($invalid, 'UTF-32'); testInvalidCodepoints($invalid, 'UTF-32BE'); // Truncated code units testInvalidString("\x00\x01\x01", "\x00\x00\x00%", 'UTF-32', 'UTF-32BE'); testInvalidString("\x00\x01", "\x00\x00\x00%", 'UTF-32', 'UTF-32BE'); testInvalidString("\x00", "\x00\x00\x00%", 'UTF-32', 'UTF-32BE'); testInvalidString("\x00", "\x00\x00\x00%", 'UTF-32BE', 'UTF-32'); testInvalidString("\x00", "\x00\x00\x00%", 'UTF-32BE', 'UTF-32'); testInvalidString("\x00", "\x00\x00\x00%", 'UTF-32BE', 'UTF-32'); $invalid = array( // Codepoints which are too big "\x00\x00\x11\x00" => "\x00\x00\x00%", "\x00\x00\x00\x80" => "\x00\x00\x00%", "\xff\xfe\xff\xff" => "\x00\x00\x00%", // Surrogates "\x00\xd8\x00\x00" => "\x00\x00\x00%", "\xff\xdb\x00\x00" => "\x00\x00\x00%", "\x00\xdc\x00\x00" => "\x00\x00\x00%", "\xff\xdf\x00\x00" => "\x00\x00\x00%", ); testInvalidCodepoints($invalid, 'UTF-32LE'); // Truncated code units testInvalidString("\x00\x01\x01", "\x00\x00\x00%", 'UTF-32LE', 'UTF-32BE'); testInvalidString("\x00\x01", "\x00\x00\x00%", 'UTF-32LE', 'UTF-32BE'); testInvalidString("\x00", "\x00\x00\x00%", 'UTF-32LE', 'UTF-32BE'); // Test treatment of BOM testValidString("\x00\x00\xFE\xFF\x00\x00\x12\x34", "\x00\x00\x12\x34", 'UTF-32', 'UTF-32BE', false); testValidString("\xFF\xFE\x00\x00\x12\x34\x00\x00", "\x00\x00\x34\x12", 'UTF-32', 'UTF-32BE', false); // Test treatment of (illegal) codepoints between U+D800 and U+DFFF testValidString("\xD8\x00", "\x00\x00\xD8\x00", 'UCS-2BE', 'UTF-32BE', false); testValidString("\xDB\xFF", "\x00\x00\xDB\xFF", 'UCS-2BE', 'UTF-32BE', false); testValidString("\xDC\x00", "\x00\x00\xDC\x00", 'UCS-2BE', 'UTF-32BE', false); testValidString("\xD8\x00", "\x00\xD8\x00\x00", 'UCS-2BE', 'UTF-32LE', false); testValidString("\xDC\x00", "\x00\xDC\x00\x00", 'UCS-2BE', 'UTF-32LE', false); echo "== UTF-7 ==\n"; testValidString('', '', 'UTF-7', 'UTF-32BE'); // 'Direct' characters foreach (range(ord('A'), ord('Z')) as $byte) testValidString(chr($byte), "\x00\x00\x00" . chr($byte), 'UTF-7', 'UTF-32BE'); foreach (range(ord('a'), ord('z')) as $byte) testValidString(chr($byte), "\x00\x00\x00" . chr($byte), 'UTF-7', 'UTF-32BE'); foreach (range(ord('0'), ord('9')) as $byte) testValidString(chr($byte), "\x00\x00\x00" . chr($byte), 'UTF-7', 'UTF-32BE'); foreach (str_split("'(),-./:?") as $char) testValidString($char, "\x00\x00\x00" . $char, 'UTF-7', 'UTF-32BE'); // 'Optional direct' characters are Base64-encoded in mbstring's implementation // Whitespace foreach (str_split(" \t\r\n\x00") as $char) testValidString($char, "\x00\x00\x00" . $char, 'UTF-7', 'UTF-32BE'); // Encoding + as +- testValidString('+-', "\x00\x00\x00+", 'UTF-7', 'UTF-32BE', false); // UTF-16 + Base64 encoding function encode($str, $encoding) { // Base64 encoding for UTF-7 doesn't use '=' for padding return str_replace('=', '', base64_encode(mb_convert_encoding($str, 'UTF-16BE', $encoding))); } for ($i = 0; $i < 256; $i++) { $reversible = true; if ($i >= ord('A') && $i <= ord('Z')) $reversible = false; if ($i >= ord('a') && $i <= ord('z')) $reversible = false; if ($i >= ord('0') && $i <= ord('9')) $reversible = false; if (strpos("'(),-./:?\x00 \t\r\n", chr($i)) !== false) $reversible = false; testValidString('+' . encode("\x00" . chr($i), 'UTF-16BE') . '-', "\x00\x00\x00" . chr($i), 'UTF-7', 'UTF-32BE', $reversible); } testValidString('+' . encode("\x12\x34", 'UTF-16BE') . '-', "\x00\x00\x12\x34", 'UTF-7', 'UTF-32BE'); testValidString('+' . encode("\x12\x34\x56\x78", 'UTF-16BE') . '-', "\x00\x00\x12\x34\x00\x00\x56\x78", 'UTF-7', 'UTF-32BE'); testValidString('+' . encode("\x12\x34\x56\x78\x00\x40", 'UTF-16BE') . '-', "\x00\x00\x12\x34\x00\x00\x56\x78\x00\x00\x00\x40", 'UTF-7', 'UTF-32BE'); testValidString('+' . encode("\xFF\xEE\xEE\xFF", 'UTF-16BE') . '-', "\x00\x00\xFF\xEE\x00\x00\xEE\xFF", 'UTF-7', 'UTF-32BE'); // Surrogate pair testValidString('+' . encode("\x00\x01\x04\x00", 'UTF-32BE') . '-', "\x00\x01\x04\x00", 'UTF-7', 'UTF-32BE'); testValidString('+' . encode("\x00\x00\x00A\x00\x01\x04\x00\x00\x00\x00B", 'UTF-32BE') . '-', "\x00\x00\x00A\x00\x01\x04\x00\x00\x00\x00B", 'UTF-7', 'UTF-32BE', false); testValidString('+' . encode("\x00\x01\x04\x00\x00\x01\x04\x00", 'UTF-32BE') . '-', "\x00\x01\x04\x00\x00\x01\x04\x00", 'UTF-7', 'UTF-32BE'); // Unterminated + section // (This is not considered illegal) testValidString('+' . encode('ABC', 'ASCII'), "\x00A\x00B\x00C", 'UTF-7', 'UTF-16BE', false); // + sections immediately after each other // (This isn't illegal either) testValidString('+' . encode('AB', 'ASCII') . '-+' . encode('CD', 'ASCII') . '-', "\x00A\x00B\x00C\x00D", 'UTF-7', 'UTF-16BE', false); // + sections not immediately after each other // (Just trying to be exhaustive here) testValidString('+' . encode('AB', 'ASCII') . '-!+' . encode('CD', 'ASCII') . '-', "\x00A\x00B\x00!\x00C\x00D", 'UTF-7', 'UTF-16BE', false); // + section terminated by a non-Base64 direct character which is NOT - foreach (str_split(" \t\r\n'(),.:?!\"#$%&*;<=>@[]^_`{|}\x00") as $char) { testValidString('+' . encode("\x12\x34", 'UTF-16BE') . $char, "\x00\x00\x12\x34\x00\x00\x00" . $char, 'UTF-7', 'UTF-32BE', false); } // Non-direct character followed by direct character testValidString('%A', '+ACU-A', 'ASCII', 'UTF-7'); testValidString('%%A', '+ACUAJQ-A', 'ASCII', 'UTF-7'); testValidString('%%%A', '+ACUAJQAl-A', 'ASCII', 'UTF-7'); // Now let's see how UTF-7 can go BAD... function rawEncode($str) { return str_replace('=', '', base64_encode($str)); } // Totally bogus byte testInvalidString("\xFF", "%", 'UTF-7', 'UTF-8'); // Totally bogus codepoint... '+ACU-' is '%' in UTF-7' testInvalidString("\x12\x34\x56\x78", "+ACU-", 'UTF-32BE', 'UTF-7'); // First, messed up UTF16 in + section // Second half of surrogate pair coming first testInvalidString('+' . rawEncode("\xDC\x01\xD8\x02") . '-', "\x00\x00\x00%\x00\x00\x00%", 'UTF-7', 'UTF-32BE'); testInvalidString('+' . rawEncode("\x00.\xDC\x01\xD8\x02") . '-', "\x00\x00\x00.\x00\x00\x00%\x00\x00\x00%", 'UTF-7', 'UTF-32BE'); testInvalidString('+' . rawEncode("\x00.\x00.\xDC\x01\xD8\x02") . '-', "\x00\x00\x00.\x00\x00\x00.\x00\x00\x00%\x00\x00\x00%", 'UTF-7', 'UTF-32BE'); // First half of surrogate pair not followed by second half testInvalidString('+' . rawEncode("\xD8\x01\x00A") . '-', "\x00\x00\x00%\x00\x00\x00A", 'UTF-7', 'UTF-32BE'); testInvalidString('+' . rawEncode("\xD8\x01\xD9\x02") . '-', "\x00\x00\x00%\x00\x00\x00%", 'UTF-7', 'UTF-32BE'); testInvalidString('+' . rawEncode("\x00.\xD8\x01\x00A") . '-', "\x00\x00\x00.\x00\x00\x00%\x00\x00\x00A", 'UTF-7', 'UTF-32BE'); testInvalidString('+' . rawEncode("\x00.\xD8\x01\xD9\x02") . '-', "\x00\x00\x00.\x00\x00\x00%\x00\x00\x00%", 'UTF-7', 'UTF-32BE'); testInvalidString('+' . rawEncode("\x00.\x00.\xD8\x01\x00A") . '-', "\x00\x00\x00.\x00\x00\x00.\x00\x00\x00%\x00\x00\x00A", 'UTF-7', 'UTF-32BE'); testInvalidString('+' . rawEncode("\x00.\x00.\xD8\x01\xD9\x02") . '-', "\x00\x00\x00.\x00\x00\x00.\x00\x00\x00%\x00\x00\x00%", 'UTF-7', 'UTF-32BE'); // First half of surrogate pair appearing at end of string testInvalidString('+' . rawEncode("\xD8\x01") . '-', "\x00\x00\x00%", 'UTF-7', 'UTF-32BE'); testInvalidString('+' . rawEncode("\xD8\x01"), "\x00\x00\x00%", 'UTF-7', 'UTF-32BE'); testInvalidString("+999999uJ", "\xEF\x9F\x9F\xE7\xB7\xB7%", 'UTF-7', 'UTF-8'); testInvalidString("+999euJ", "\xEF\x9F\x9F\xE5\xBA\xB8%", "UTF-7", "UTF-8"); testInvalidString("+euJ", "\xE7\xAB\xA2%", "UTF-7", "UTF-8"); // Truncated string testInvalidString('+' . rawEncode("\x01") . '-', "\x00\x00\x00%", 'UTF-7', 'UTF-32BE'); testInvalidString('+l', "\x00\x00\x00%", 'UTF-7', 'UTF-32BE'); // Base64 section should not have 4 ASCII characters; the first 3 can encode one // UTF-16 character, so there is no need for the 4th testInvalidString('+RR8I', "\xE4\x94\x9F%", 'UTF-7', 'UTF-8'); // Likewise with 7 characters testInvalidString('+RR8IAAA', "\xE4\x94\x9F\xE0\xA0\x80%", 'UTF-7', 'UTF-8'); // Similarly, it is useless for a Base64 section to only contain a single 'A' // (which decodes to only zero bits) testInvalidString("+A", "\x00\x00\x00%", 'UTF-7', 'UTF-32BE'); // And then, messed up Base64 encoding // Bad padding on + section (not zeroes) $encoded = encode("\x12\x34", 'UTF-16BE'); // 3 Base64 bytes, 2 bits of padding... $corrupted = substr($encoded, 0, 2) . chr(ord($encoded[2]) + 1); testInvalidString('+' . $corrupted . '-', "\x00\x00\x12\x34\x00\x00\x00%", 'UTF-7', 'UTF-32BE'); // Characters which are not Base64 (and not even ASCII) appearing in Base64 section testInvalidString("+\x80", "\x00\x00\x00%", 'UTF-7', 'UTF-32BE'); // Try codepoint over U+10FFFF; '+ACU-' is the error marker '%' convertInvalidString("\x12\x34\x56\x78", "+ACU-", 'UCS-4BE', 'UTF-7'); convertInvalidString("\x00\x11\x56\x78", "+ACU-", 'UCS-4BE', 'UTF-7'); // If error marker character needs to be ASCII-encoded but is able to serve as an // ending character for a Base64 section, no need to add an additional dash mb_substitute_character(0x3F); // ? convertInvalidString("\x1E\xBE", '+AB4?', 'UTF-7', 'UTF-7'); echo "Done!\n"; ?> --EXPECT-- == UTF-8 == == UTF-16 == == UTF-32 == == UTF-7 == Done!