1--TEST-- 2Test of ASCII and KS X 1001-1992 support in ISO-2022-KR encoding 3--EXTENSIONS-- 4mbstring 5--SKIPIF-- 6<?php 7if (getenv("SKIP_SLOW_TESTS")) die("skip slow test"); 8?> 9--FILE-- 10<?php 11include('encoding_tests.inc'); 12mb_substitute_character(0x25); // '%' 13 14readConversionTable(__DIR__ . '/data/KSX1001.txt', $ksxChars, $unused); 15 16function testValid($from, $to, $bothWays = true) { 17 identifyValidString($from, 'ISO-2022-KR'); 18 convertValidString($from, $to, 'ISO-2022-KR', 'UTF-16BE', false); 19 20 if ($bothWays) { 21 /* 0xF at the beginning of an ISO-2022 string is redundant; it switches 22 * to ASCII mode, but ASCII mode is default */ 23 if (strlen($from) > 0 && $from[0] == "\x0F") 24 $from = substr($from, 1, strlen($from) - 1); 25 /* If the string switches to a different charset, it should switch back to 26 * ASCII at the end */ 27 if (strpos($from, "\x1B\$C") !== false) 28 $from .= "\x0F"; 29 30 convertValidString($to, $from, 'UTF-16BE', 'ISO-2022-KR', false); 31 } 32} 33 34function testInvalid($from, $to) { 35 testInvalidString($from, $to, 'ISO-2022-KR', 'UTF-16BE'); 36} 37 38testValid("", ""); 39echo "Empty string OK\n"; 40 41for ($i = 0; $i < 0x80; $i++) { 42 if ($i == 0xE || $i == 0xF || $i == 0x1B) 43 continue; 44 testValid(chr($i), "\x00" . chr($i)); 45 testValid("\x0F" . chr($i), "\x00" . chr($i)); /* 0xF is 'Shift In' code */ 46} 47 48for ($i = 0x80; $i < 256; $i++) { 49 testInvalid(chr($i), "\x00%"); 50 testInvalid("\x0F" . chr($i), "\x00%"); 51} 52 53echo "ASCII support OK\n"; 54 55foreach ($ksxChars as $ksx => $utf16BE) { 56 testValid("\x0E" . $ksx, $utf16BE, false); 57 testValid("\x1B$)C\x0E" . $ksx, $utf16BE, false); 58 testValid("\x1B$)C\x0E" . $ksx . "\x0F", $utf16BE); 59} 60 61findInvalidChars($ksxChars, $invalidKsx, $truncatedKsx); 62 63$badChars = array_keys($invalidKsx); 64foreach ($badChars as $badChar) { 65 if ($badChar[0] == "\x0E" || $badChar[0] == "\x0F" || $badChar[0] == "\x1B") 66 continue; 67 testInvalid("\x1B$)C\x0E" . $badChar, "\x00%"); 68} 69 70$badChars = array_keys($truncatedKsx); 71foreach ($badChars as $badChar) { 72 testInvalid("\x1B$)C\x0E" . $badChar, "\x00%"); 73} 74 75echo "KS X 1001 support OK\n"; 76 77/* After a valid ESC sequence, we are still in ASCII mode; 'Shift Out' is needed to start KS X 1001 */ 78testValid("\x1B$)Cabc", "\x00a\x00b\x00c", false); 79 80/* Test invalid and truncated ESC sequences */ 81testInvalid("\x1B", "\x00%"); 82testInvalid("\x1B$", "\x00%"); 83testInvalid("\x1B$)", "\x00%"); 84 85for ($i = 0; $i < 256; $i++) { 86 if (chr($i) != '$') 87 testInvalid("\x1B" . chr($i), "\x00%"); 88 if (chr($i) != ')') 89 testInvalid("\x1B$" . chr($i), "\x00%"); 90 if (chr($i) != 'C') 91 testInvalid("\x1B$)" . chr($i), "\x00%"); 92} 93 94/* We can switch back and forth between ASCII and KS X 1001 */ 95testValid("\x0E\x0E\x0F\x0E\x0Fabc", "\x00a\x00b\x00c", false); 96 97echo "Escapes behave as expected\n"; 98 99// Test "long" illegal character markers 100mb_substitute_character("long"); 101convertInvalidString("\x1B", "%", "ISO-2022-KR", "UTF-8"); 102convertInvalidString("\x1B$", "%", "ISO-2022-KR", "UTF-8"); 103convertInvalidString("\x1B$)", "%", "ISO-2022-KR", "UTF-8"); 104convertInvalidString("\x1B$)C\x0E\x7C\x84", "%", "ISO-2022-KR", "UTF-8"); 105 106echo "Done!\n"; 107?> 108--EXPECT-- 109Empty string OK 110ASCII support OK 111KS X 1001 support OK 112Escapes behave as expected 113Done! 114