1--TEST-- 2Exhaustive test of EUC-JP-MS (AKA EUC-JP-WIN) text encoding 3--EXTENSIONS-- 4mbstring 5--SKIPIF-- 6<?php 7if (getenv("SKIP_SLOW_TESTS")) die("skip slow test"); 8?> 9--FILE-- 10<?php 11srand(555); /* Make results consistent */ 12include('encoding_tests.inc'); 13mb_substitute_character(0x25); // '%' 14 15readConversionTable(__DIR__ . '/data/EUC-JP-MS.txt', $toUnicode, $fromUnicode); 16readConversionTable(__DIR__ . '/data/EUC-JP-MS.IRREVERSIBLE.txt', $toUnicodeIrreversible, $_); 17 18foreach ($toUnicodeIrreversible as $char => $codepoint) { 19 if (!isset($fromUnicode[$codepoint])) 20 $fromUnicode[$codepoint] = $char; 21} 22 23// The conversion table has several cases where more than one EUC-JP-WIN code maps to the 24// same Unicode codepoint. Make sure we use the intended reverse mapping for tests: 25$fromUnicode["\x21\x21"] = "\xAD\xE4"; 26$fromUnicode["\x21\x61"] = "\xAD\xB6"; 27$fromUnicode["\x21\x62"] = "\xAD\xB7"; 28$fromUnicode["\x21\x63"] = "\xAD\xB8"; 29$fromUnicode["\x21\x65"] = "\xAD\xBA"; 30$fromUnicode["\x21\x68"] = "\xAD\xBD"; 31$fromUnicode["\x21\x69"] = "\xAD\xBE"; 32 33$fromUnicode["\x22\x1A"] = "\xA2\xE5"; 34$fromUnicode["\x22\x20"] = "\xA2\xDC"; 35$fromUnicode["\x22\x29"] = "\xA2\xC1"; 36$fromUnicode["\x22\x2A"] = "\xA2\xC0"; 37$fromUnicode["\x22\x2B"] = "\xA2\xE9"; 38$fromUnicode["\x22\x35"] = "\xA2\xE8"; 39$fromUnicode["\x22\x52"] = "\xA2\xE2"; 40$fromUnicode["\x22\x61"] = "\xA2\xE1"; 41$fromUnicode["\x22\xA5"] = "\xA2\xDD"; 42 43$fromUnicode["\x32\x31"] = "\xAD\xEA"; 44 45$fromUnicode["\xFF\x5E"] = "\xA1\xC1"; 46 47findInvalidChars($toUnicode, $invalid, $truncated, array_fill_keys(range(0xA1,0xFE), 2) + [0x8F => 3]); 48testAllValidChars($toUnicode, 'eucJP-win', 'UTF-16BE', false); 49testAllInvalidChars($invalid, $toUnicode, 'eucJP-win', 'UTF-16BE', "\x00%"); 50testTruncatedChars($truncated, 'eucJP-win', 'UTF-16BE', "\x00%"); 51echo "Tested eucJP-win -> UTF-16BE\n"; 52 53findInvalidChars($fromUnicode, $invalid, $unused, array_fill_keys(range(0,0xFF), 2)); 54convertAllInvalidChars($invalid, $fromUnicode, 'UTF-16BE', 'eucJP-win', '%'); 55echo "Tested UTF-16BE -> eucJP-win\n"; 56 57// Test "long" illegal character markers 58mb_substitute_character("long"); 59convertInvalidString("\x80", "%", "eucJP-win", "UTF-8"); 60convertInvalidString("\xFE\xFF", "%", "eucJP-win", "UTF-8"); 61 62?> 63--EXPECT-- 64Tested eucJP-win -> UTF-16BE 65Tested UTF-16BE -> eucJP-win 66