1--TEST-- 2Exhaustive test of verification and conversion of HZ text 3--EXTENSIONS-- 4mbstring 5--SKIPIF-- 6<?php 7if (getenv("SKIP_SLOW_TESTS")) die("skip slow test"); 8?> 9--FILE-- 10<?php 11include('encoding_tests.inc'); 12srand(1000); // Make results consistent 13mb_substitute_character(0x25); // '%' 14 15for ($i = 0; $i < 0x80; $i++) { 16 if ($i != 0x7E) // ~ is special and will be tested separately 17 testValidString(chr($i), chr($i), 'ASCII', 'HZ'); 18} 19echo "Tested ASCII -> HZ\n"; 20 21for ($i = 0; $i < 0x80; $i++) { 22 if ($i != 0x7E) 23 testValidString(chr($i), chr($i), 'HZ', 'ASCII'); 24} 25echo "Tested HZ -> ASCII\n"; 26 27for ($i = 0x80; $i < 0xFF; $i++) { 28 testInvalidString(chr($i), '%', 'HZ', 'ASCII'); 29} 30echo "Tested non-ASCII bytes in ASCII mode\n"; 31 32testValidString('~~', '~', 'HZ', 'ASCII'); 33testValidString("~\n", '', 'HZ', 'ASCII', false); 34testValidString('~{~}', '', 'HZ', 'ASCII', false); 35testValidString("~{~\n~}", '', 'HZ', 'ASCII', false); 36echo "Tested valid ~ escapes\n"; 37 38for ($i = 0; $i < 0xFF; $i++) { 39 if ($i != 0x0A) { 40 // Try invalid ~ escapes both in ASCII and GB modes 41 if ($i != 0x7E && $i != 0x7B) // not { 42 testInvalidString("~" . chr($i), '%', 'HZ', 'ASCII'); 43 if ($i != 0x7D) // not } 44 testInvalidString("~{~" . chr($i) . "~}", '%', 'HZ', 'ASCII'); 45 } 46} 47echo "Tested all invalid ~ escapes\n"; 48 49readConversionTable(__DIR__ . '/data/GB2312.txt', $toUnicode, $fromUnicode); 50 51findInvalidChars($toUnicode, $invalid, $truncated); 52 53// Two characters in ISO-2022-CN convert to Unicode 0x2225 54$irreversible = ["\x21\x2C" => true]; 55 56// Test all good GB2312 characters within ~{ ~} escapes 57$goodChars = array_keys($toUnicode); 58shuffle($goodChars); 59while (!empty($goodChars)) { 60 $reversible = true; 61 $length = 1; //min(rand(5,10), count($goodChars)); 62 $fromString = $toString = ''; 63 while ($length--) { 64 $goodChar = array_pop($goodChars); 65 $fromString .= $goodChar; 66 $toString .= $toUnicode[$goodChar]; 67 if (isset($irreversible[$goodChar])) 68 $reversible = false; 69 } 70 71 testValidString('~{' . $fromString . '~}', $toString, 'HZ', 'UTF-16BE', $reversible); 72} 73 74// Test all invalid GB2312 characters within ~{ ~} escapes 75// However, don't test escape sequences; we will do those separately below 76unset($invalid["~"]); 77$badChars = array_keys($invalid); 78$goodChars = array(); 79while (!empty($badChars)) { 80 if (empty($goodChars)) { 81 $goodChars = array_keys($toUnicode); 82 shuffle($goodChars); 83 } 84 $goodChar = array_pop($goodChars); 85 $fromString = array_pop($badChars) . $goodChar; 86 $toString = "\x00%" . $toUnicode[$goodChar]; 87 88 testInvalidString('~{' . $fromString . '~}', $toString, 'HZ', 'UTF-16BE'); 89} 90 91$truncatedChars = array_keys($truncated); 92foreach ($truncatedChars as $truncatedChar) { 93 testInvalidString('~{' . $truncatedChar, "\x00%", 'HZ', 'UTF-16BE'); 94} 95 96echo "Tested HZ -> UTF-16BE (for all GB2312 characters)\n"; 97 98findInvalidChars($fromUnicode, $invalid, $unused, array_fill_keys(range(0,0xFF), 2)); 99 100// Although they do not appear in the Unicode -> GB2312 map, ASCII characters *are* 101// valid to convert to HZ 102for ($i = 0; $i <= 0x7F; $i++) 103 unset($invalid["\x00" . chr($i)]); 104 105$badChars = array_keys($invalid); 106$goodChars = array(); 107while (!empty($badChars)) { 108 if (empty($goodChars)) { 109 $goodChars = array_keys($fromUnicode); 110 shuffle($goodChars); 111 } 112 $goodChar = array_pop($goodChars); 113 $fromString = array_pop($badChars) . $goodChar; 114 $toString = "%~{" . $fromUnicode[$goodChar] . "~}"; 115 116 convertInvalidString($fromString, $toString, 'UTF-16BE', 'HZ'); 117} 118 119echo "Tested UTF-16BE -> HZ (for all GB2312 characters)\n"; 120 121// Test "long" illegal character markers 122mb_substitute_character("long"); 123convertInvalidString("~A", "%", "HZ", "UTF-8"); 124convertInvalidString("\x80", "%", "HZ", "UTF-8"); 125convertInvalidString("~{\x22\x21", "%", "HZ", "UTF-8"); 126 127echo "Done!\n"; 128?> 129--EXPECT-- 130Tested ASCII -> HZ 131Tested HZ -> ASCII 132Tested non-ASCII bytes in ASCII mode 133Tested valid ~ escapes 134Tested all invalid ~ escapes 135Tested HZ -> UTF-16BE (for all GB2312 characters) 136Tested UTF-16BE -> HZ (for all GB2312 characters) 137Done! 138