1--TEST--
2Test of ASCII and KS X 1001-1992 support in ISO-2022-KR encoding
3--EXTENSIONS--
4mbstring
5--SKIPIF--
6<?php
7if (getenv("SKIP_SLOW_TESTS")) die("skip slow test");
8?>
9--FILE--
10<?php
11include('encoding_tests.inc');
12mb_substitute_character(0x25); // '%'
13
14readConversionTable(__DIR__ . '/data/KSX1001.txt', $ksxChars, $unused);
15
16function testValid($from, $to, $bothWays = true) {
17  identifyValidString($from, 'ISO-2022-KR');
18  convertValidString($from, $to, 'ISO-2022-KR', 'UTF-16BE', false);
19
20  if ($bothWays) {
21    /* 0xF at the beginning of an ISO-2022 string is redundant; it switches
22     * to ASCII mode, but ASCII mode is default */
23    if (strlen($from) > 0 && $from[0] == "\x0F")
24      $from = substr($from, 1, strlen($from) - 1);
25    /* If the string switches to a different charset, it should switch back to
26     * ASCII at the end */
27    if (strpos($from, "\x1B\$C") !== false)
28      $from .= "\x0F";
29
30    convertValidString($to, $from, 'UTF-16BE', 'ISO-2022-KR', false);
31  }
32}
33
34function testInvalid($from, $to) {
35  testInvalidString($from, $to, 'ISO-2022-KR', 'UTF-16BE');
36}
37
38testValid("", "");
39echo "Empty string OK\n";
40
41for ($i = 0; $i < 0x80; $i++) {
42  if ($i == 0xE || $i == 0xF || $i == 0x1B)
43    continue;
44  testValid(chr($i),          "\x00" . chr($i));
45  testValid("\x0F" . chr($i), "\x00" . chr($i)); /* 0xF is 'Shift In' code */
46}
47
48for ($i = 0x80; $i < 256; $i++) {
49  testInvalid(chr($i),          "\x00%");
50  testInvalid("\x0F" . chr($i), "\x00%");
51}
52
53echo "ASCII support OK\n";
54
55foreach ($ksxChars as $ksx => $utf16BE) {
56  testValid("\x0E" . $ksx, $utf16BE, false);
57  testValid("\x1B$)C\x0E" . $ksx, $utf16BE, false);
58  testValid("\x1B$)C\x0E" . $ksx . "\x0F", $utf16BE);
59}
60
61findInvalidChars($ksxChars, $invalidKsx, $truncatedKsx);
62
63$badChars = array_keys($invalidKsx);
64foreach ($badChars as $badChar) {
65  if ($badChar[0] == "\x0E" || $badChar[0] == "\x0F" || $badChar[0] == "\x1B")
66    continue;
67  testInvalid("\x1B$)C\x0E" . $badChar, "\x00%");
68}
69
70$badChars = array_keys($truncatedKsx);
71foreach ($badChars as $badChar) {
72  testInvalid("\x1B$)C\x0E" . $badChar, "\x00%");
73}
74
75echo "KS X 1001 support OK\n";
76
77/* After a valid ESC sequence, we are still in ASCII mode; 'Shift Out' is needed to start KS X 1001 */
78testValid("\x1B$)Cabc", "\x00a\x00b\x00c", false);
79
80/* Test invalid and truncated ESC sequences */
81testInvalid("\x1B", "\x00%");
82testInvalid("\x1B$", "\x00%");
83testInvalid("\x1B$)", "\x00%");
84
85for ($i = 0; $i < 256; $i++) {
86  if (chr($i) != '$')
87    testInvalid("\x1B" . chr($i), "\x00%");
88  if (chr($i) != ')')
89    testInvalid("\x1B$" . chr($i), "\x00%");
90  if (chr($i) != 'C')
91    testInvalid("\x1B$)" . chr($i), "\x00%");
92}
93
94/* We can switch back and forth between ASCII and KS X 1001 */
95testValid("\x0E\x0E\x0F\x0E\x0Fabc", "\x00a\x00b\x00c", false);
96
97echo "Escapes behave as expected\n";
98
99// Test "long" illegal character markers
100mb_substitute_character("long");
101convertInvalidString("\x1B", "%", "ISO-2022-KR", "UTF-8");
102convertInvalidString("\x1B$", "%", "ISO-2022-KR", "UTF-8");
103convertInvalidString("\x1B$)", "%", "ISO-2022-KR", "UTF-8");
104convertInvalidString("\x1B$)C\x0E\x7C\x84", "%", "ISO-2022-KR", "UTF-8");
105
106echo "Done!\n";
107?>
108--EXPECT--
109Empty string OK
110ASCII support OK
111KS X 1001 support OK
112Escapes behave as expected
113Done!
114