1--TEST-- 2Exhaustive test of MacJapanese encoding verification and conversion 3--EXTENSIONS-- 4mbstring 5--SKIPIF-- 6<?php 7if (getenv("SKIP_SLOW_TESTS")) die("skip slow test"); 8?> 9--FILE-- 10<?php 11srand(300); /* Make results consistent */ 12include('encoding_tests.inc'); 13mb_substitute_character(0x25); // '%' 14 15/* Read in the table of all characters in MacJapanese */ 16$validChars = array(); /* MacJapanese string -> UTF-32BE string */ 17$fromUnicode = array(); /* UTF-16BE -> MacJapanese */ 18$fp = fopen(__DIR__ . '/data/MacJapanese-SJIS.txt', 'r+'); 19while ($line = fgets($fp, 256)) { 20 if ($line[0] == '#') 21 continue; 22 23 $cp1 = $cp2 = $cp3 = $cp4 = $cp5 = null; 24 if (sscanf($line, "0x%x\t0x%x+0x%x+0x%x+0x%x+0x%x", $bytes, $cp1, $cp2, $cp3, $cp4, $cp5) >= 2) { 25 if ($bytes < 256) { 26 $macJap = chr($bytes); 27 } else { 28 $macJap = pack('n', $bytes); 29 } 30 31 if ($cp5) { 32 $validChars[$macJap] = pack('NNNNN', $cp1, $cp2, $cp3, $cp4, $cp5); 33 $fromUnicode[pack('nnnnn', $cp1, $cp2, $cp3, $cp4, $cp5)] = $macJap; 34 } else if ($cp4) { 35 $validChars[$macJap] = pack('NNNN', $cp1, $cp2, $cp3, $cp4); 36 $fromUnicode[pack('nnnn', $cp1, $cp2, $cp3, $cp4)] = $macJap; 37 } else if ($cp3) { 38 $validChars[$macJap] = pack('NNN', $cp1, $cp2, $cp3); 39 $fromUnicode[pack('nnn', $cp1, $cp2, $cp3)] = $macJap; 40 } else if ($cp2) { 41 $validChars[$macJap] = pack('NN', $cp1, $cp2); 42 $fromUnicode[pack('nn', $cp1, $cp2)] = $macJap; 43 } else { 44 $validChars[$macJap] = pack('N', $cp1); 45 $fromUnicode[pack('n', $cp1)] = $macJap; 46 } 47 } 48} 49/* Although not included in the table, 0x0-0x1F and 0x7F are valid; 50 * these are 'control characters' */ 51for ($i = 0; $i < 0x20; $i++) { 52 $validChars[chr($i)] = pack('N', $i); 53 $fromUnicode[pack('n', $i)] = chr($i); 54} 55$validChars["\x7F"] = pack('N', 0x7F); 56$fromUnicode["\x00\x7F"] = "\x7F"; 57 58/* While Shift-JIS 0x815C normally corresponds to U+2015 (HORIZONTAL BAR), 59 * for MacJapanese we convert 0x815C to U+2014 (EM DASH) 60 * (See recommendations in JAPANESE.txt from the Unicode Consortium, under 61 * 'Unicode mapping issues', point 3) 62 * However, when converting Unicode -> MacJapanese, we accept both U+2014 63 * and U+2015 */ 64$fromUnicode["\x20\x15"] = "\x81\x5C"; 65 66/* Convert U+203E (OVERLINE) to 0x8150 (FULLWIDTH MACRON) */ 67$fromUnicode["\x20\x3E"] = "\x81\x50"; 68/* And also U+00AF (MACRON) */ 69$fromUnicode["\x00\xAF"] = "\x81\x50"; 70 71/* Convert U+FF5E (FULLWIDTH TILDE) to 0x8160 (WAVE DASH) */ 72$fromUnicode["\xFF\x5E"] = "\x81\x60"; 73 74testAllValidChars($validChars, 'SJIS-mac', 'UTF-32BE'); 75echo "MacJapanese verification and conversion works on all valid characters\n"; 76 77findInvalidChars($validChars, $invalidChars, $truncated, 78 array_fill_keys(range(0x81, 0x9F), 2) + array_fill_keys(range(0xE0, 0xED), 2)); 79testAllInvalidChars($invalidChars, $validChars, 'SJIS-mac', 'UTF-32BE', "\x00\x00\x00%"); 80testTruncatedChars($truncated, 'SJIS-mac', 'UTF-32BE', "\x00\x00\x00%"); 81echo "MacJapanese verification and conversion rejects all invalid characters\n"; 82 83testAllValidChars($fromUnicode, 'UTF-16BE', 'SJIS-mac', false); 84echo "Unicode -> SJIS-mac conversion works on all valid characters\n"; 85 86findInvalidChars($fromUnicode, $invalidChars, $unused, array_fill_keys(range(0, 0xFF), 2)); 87convertAllInvalidChars($invalidChars, $fromUnicode, 'UTF-16BE', 'SJIS-mac', '%'); 88echo "Unicode -> SJIS-mac conversion works on all invalid characters\n"; 89 90// Regression test 91convertValidString("\x20\x26\x6B\xAA", "\x81\x63\x9F\x6F", "UTF-16BE", "SJIS-mac"); 92 93// Test special combining characters for MacJapanese when *not* appearing in 94// an expected combination 95convertInvalidString("\x20\x10\xF8\x7A", "\x81\x5D%", "UTF-16BE", "SJIS-mac"); 96convertInvalidString("\x20\x10\x20\xDD", "\x81\x5D%", "UTF-16BE", "SJIS-mac"); 97convertInvalidString("\x20\x10\xF8\x7F", "\x81\x5D%", "UTF-16BE", "SJIS-mac"); 98convertInvalidString("\x21\xE6\xF8\x7E", "\x86\xD0%", "UTF-16BE", "SJIS-mac"); 99 100convertInvalidString("\xF8\x60\x00\x30\x12\x34", "%%%", "UTF-16BE", "SJIS-mac"); 101convertInvalidString("\xF8\x61\x00\x46\x00\x41\x12\x34", "%%%%", "UTF-16BE", "SJIS-mac"); 102convertInvalidString("\xF8\x62\x00\x58\x00\x49\x00\x49\x12\x34", "%%%%%", "UTF-16BE", "SJIS-mac"); 103 104// Test "long" illegal character markers 105mb_substitute_character("long"); 106convertInvalidString("\x81", "%", "SJIS-mac", "UTF-8"); 107convertInvalidString("\x81\x20", "%", "SJIS-mac", "UTF-8"); 108convertInvalidString("\xED\x9F", "%", "SJIS-mac", "UTF-8"); 109 110echo "Done!\n"; 111?> 112--EXPECT-- 113MacJapanese verification and conversion works on all valid characters 114MacJapanese verification and conversion rejects all invalid characters 115Unicode -> SJIS-mac conversion works on all valid characters 116Unicode -> SJIS-mac conversion works on all invalid characters 117Done! 118