1--TEST-- 2Exhaustive test of verification and conversion of CP936 text 3--EXTENSIONS-- 4mbstring 5--SKIPIF-- 6<?php 7if (getenv("SKIP_SLOW_TESTS")) die("skip slow test"); 8?> 9--FILE-- 10<?php 11include('encoding_tests.inc'); 12 13srand(1000); // Make results consistent 14mb_substitute_character(0x25); // '%' 15readConversionTable(__DIR__ . '/data/CP936.txt', $toUnicode, $fromUnicode); 16 17// Unicode has more than one codepoint for a 'tilde' character 18// On output, we emit U+FF5E FULLWIDTH TILDE, but we accept U+223C as input 19$fromUnicode["\x22\x3c"] = "\xa1\xab"; 20 21// Circle 22// On output, we emit U+00B0 RING OPERATOR, but we accept U+2218 as input 23$fromUnicode["\x22\x18"] = "\xa1\xe3"; 24 25// Overline 26// On output, we emit U+FFE3 FULLWIDTH MACRON, but we accept U+203E as input 27$fromUnicode["\x20\x3e"] = "\xa3\xfe"; 28 29// We support some CJK compatibility ideographs 30// Ref: https://en.wikipedia.org/wiki/CJK_Compatibility_Ideographs 31// These are accepted when converting Unicode -> CP936, but are not produced 32// when converting CP936 -> Unicode 33$fromUnicode["\xf9\x00"] = "\xd8\x4d"; 34$fromUnicode["\xf9\x01"] = "\xb8\xfc"; 35$fromUnicode["\xf9\x02"] = "\xdc\x87"; 36$fromUnicode["\xf9\x03"] = "\xd9\x5a"; 37$fromUnicode["\xf9\x04"] = "\xbb\xac"; 38$fromUnicode["\xf9\x05"] = "\xb4\xae"; 39$fromUnicode["\xf9\x06"] = "\xbe\xe4"; 40$fromUnicode["\xf9\x07"] = "\xfd\x94"; 41$fromUnicode["\xf9\x08"] = "\xfd\x94"; 42$fromUnicode["\xf9\x09"] = "\xc6\xf5"; 43$fromUnicode["\xf9\x0a"] = "\xbd\xf0"; 44$fromUnicode["\xf9\x0b"] = "\xc0\xae"; 45$fromUnicode["\xf9\x0c"] = "\xc4\xce"; 46$fromUnicode["\xf9\x0d"] = "\x91\xd0"; 47$fromUnicode["\xf9\x0e"] = "\xb0\x5d"; 48$fromUnicode["\xf9\x0f"] = "\xc1\x5f"; 49$fromUnicode["\xf9\x10"] = "\xcc\x7d"; 50$fromUnicode["\xf9\x11"] = "\xc2\xdd"; 51$fromUnicode["\xf9\x12"] = "\xc2\xe3"; 52$fromUnicode["\xf9\x13"] = "\xdf\x89"; 53$fromUnicode["\xf9\x14"] = "\x98\xb7"; 54$fromUnicode["\xf9\x15"] = "\xc2\xe5"; 55$fromUnicode["\xf9\x16"] = "\xc0\xd3"; 56$fromUnicode["\xf9\x17"] = "\xe7\xf3"; 57$fromUnicode["\xf9\x18"] = "\xc2\xe4"; 58$fromUnicode["\xf9\x19"] = "\xc0\xd2"; 59$fromUnicode["\xf9\x1a"] = "\xf1\x98"; 60$fromUnicode["\xf9\x1b"] = "\x81\x79"; 61$fromUnicode["\xf9\x1c"] = "\xc2\xd1"; 62$fromUnicode["\xf9\x1d"] = "\x99\xda"; 63$fromUnicode["\xf9\x1e"] = "\xa0\x80"; 64$fromUnicode["\xf9\x1f"] = "\xcc\x6d"; 65$fromUnicode["\xf9\x20"] = "\xfb\x5b"; 66$fromUnicode["\xf9\x21"] = "\x8d\xb9"; 67$fromUnicode["\xf9\x22"] = "\x9e\x45"; 68$fromUnicode["\xf9\x23"] = "\xcb\x7b"; 69$fromUnicode["\xf9\x24"] = "\xd2\x68"; 70$fromUnicode["\xf9\x25"] = "\xc0\xad"; 71$fromUnicode["\xf9\x26"] = "\xc5\x44"; 72$fromUnicode["\xf9\x27"] = "\xcf\x9e"; 73$fromUnicode["\xf9\x28"] = "\xc0\xc8"; 74$fromUnicode["\xf9\x29"] = "\xc0\xca"; 75$fromUnicode["\xf9\x2a"] = "\xc0\xcb"; 76$fromUnicode["\xf9\x2b"] = "\xc0\xc7"; 77$fromUnicode["\xf9\x2c"] = "\xfd\x9c"; 78$fromUnicode["\xf9\x2d"] = "\x81\xed"; 79$fromUnicode["\xf9\x2e"] = "\xc0\xe4"; 80$fromUnicode["\xf9\x2f"] = "\x84\xda"; 81$fromUnicode["\xf9\x30"] = "\x93\xef"; 82$fromUnicode["\xf9\x31"] = "\x99\xa9"; 83$fromUnicode["\xf9\x32"] = "\xa0\x74"; 84$fromUnicode["\xf9\x33"] = "\xb1\x52"; 85$fromUnicode["\xf9\x34"] = "\xc0\xcf"; 86$fromUnicode["\xf9\x35"] = "\xcc\x4a"; 87$fromUnicode["\xf9\x36"] = "\xcc\x94"; 88$fromUnicode["\xf9\x37"] = "\xc2\xb7"; 89$fromUnicode["\xf9\x38"] = "\xc2\xb6"; 90$fromUnicode["\xf9\x39"] = "\xf4\x94"; 91$fromUnicode["\xf9\x3a"] = "\xfa\x98"; 92$fromUnicode["\xf9\x3b"] = "\xc2\xb5"; 93$fromUnicode["\xf9\x3c"] = "\xb5\x93"; 94$fromUnicode["\xf9\x3d"] = "\xbe\x47"; 95$fromUnicode["\xf9\x3e"] = "\xc7\x8a"; 96$fromUnicode["\xf9\x3f"] = "\xe4\x9b"; 97$fromUnicode["\xf9\x40"] = "\xc2\xb9"; 98$fromUnicode["\xf9\x41"] = "\xd5\x93"; 99$fromUnicode["\xf9\x42"] = "\x89\xc5"; 100$fromUnicode["\xf9\x43"] = "\xc5\xaa"; 101$fromUnicode["\xf9\x44"] = "\xbb\x5c"; 102$fromUnicode["\xf9\x45"] = "\xc3\x40"; 103$fromUnicode["\xf9\x46"] = "\xc0\xce"; 104$fromUnicode["\xf9\x47"] = "\xc0\xda"; 105$fromUnicode["\xf9\x48"] = "\xd9\x54"; 106$fromUnicode["\xf9\x49"] = "\xc0\xd7"; 107$fromUnicode["\xf9\x4a"] = "\x89\xbe"; 108$fromUnicode["\xf9\x4b"] = "\x8c\xd2"; 109$fromUnicode["\xf9\x4c"] = "\x98\xc7"; 110$fromUnicode["\xf9\x4d"] = "\x9c\x49"; 111$fromUnicode["\xf9\x4e"] = "\xc2\xa9"; 112$fromUnicode["\xf9\x4f"] = "\xc0\xdb"; 113$fromUnicode["\xf9\x50"] = "\xbf\x7c"; 114$fromUnicode["\xf9\x51"] = "\xc2\xaa"; 115$fromUnicode["\xf9\x52"] = "\xc0\xd5"; 116$fromUnicode["\xf9\x53"] = "\xc0\xdf"; 117$fromUnicode["\xf9\x54"] = "\x84\x43"; 118$fromUnicode["\xf9\x55"] = "\xc1\xe8"; 119$fromUnicode["\xf9\x56"] = "\xb6\xa0"; 120$fromUnicode["\xf9\x57"] = "\xbe\x63"; 121$fromUnicode["\xf9\x58"] = "\xc1\xe2"; 122$fromUnicode["\xf9\x59"] = "\xc1\xea"; 123$fromUnicode["\xf9\x5a"] = "\xd7\x78"; 124$fromUnicode["\xf9\x5b"] = "\x92\x82"; 125$fromUnicode["\xf9\x5c"] = "\x98\xb7"; 126$fromUnicode["\xf9\x5d"] = "\xd6\x5a"; 127$fromUnicode["\xf9\x5e"] = "\xb5\xa4"; 128$fromUnicode["\xf9\x5f"] = "\x8c\x8e"; 129$fromUnicode["\xf9\x60"] = "\xc5\xad"; 130$fromUnicode["\xf9\x61"] = "\xc2\xca"; 131$fromUnicode["\xf9\x62"] = "\xae\x90"; 132$fromUnicode["\xf9\x63"] = "\xb1\xb1"; 133$fromUnicode["\xf9\x64"] = "\xb4\x91"; 134$fromUnicode["\xf9\x65"] = "\xb1\xe3"; 135$fromUnicode["\xf9\x66"] = "\x8f\xcd"; 136$fromUnicode["\xf9\x67"] = "\xb2\xbb"; 137$fromUnicode["\xf9\x68"] = "\xc3\xda"; 138$fromUnicode["\xf9\x69"] = "\x94\xb5"; 139$fromUnicode["\xf9\x6a"] = "\xcb\xf7"; 140$fromUnicode["\xf9\x6b"] = "\x85\xa2"; 141$fromUnicode["\xf9\x6c"] = "\xc8\xfb"; 142$fromUnicode["\xf9\x6d"] = "\xca\xa1"; 143$fromUnicode["\xf9\x6e"] = "\xc8\x7e"; 144$fromUnicode["\xf9\x6f"] = "\xd5\x66"; 145$fromUnicode["\xf9\x70"] = "\x9a\xa2"; 146$fromUnicode["\xf9\x71"] = "\xb3\xbd"; 147$fromUnicode["\xf9\x72"] = "\xc9\xf2"; 148$fromUnicode["\xf9\x73"] = "\xca\xb0"; 149$fromUnicode["\xf9\x74"] = "\xc8\xf4"; 150$fromUnicode["\xf9\x75"] = "\xc2\xd3"; 151$fromUnicode["\xf9\x76"] = "\xc2\xd4"; 152$fromUnicode["\xf9\x77"] = "\xc1\xc1"; 153$fromUnicode["\xf9\x78"] = "\x83\xc9"; 154$fromUnicode["\xf9\x7a"] = "\xc1\xba"; 155$fromUnicode["\xf9\x7b"] = "\xbc\x5a"; 156$fromUnicode["\xf9\x7c"] = "\xc1\xbc"; 157$fromUnicode["\xf9\x7d"] = "\xd5\x8f"; 158$fromUnicode["\xf9\x7e"] = "\xc1\xbf"; 159$fromUnicode["\xf9\x7f"] = "\x84\xee"; 160$fromUnicode["\xf9\x80"] = "\x85\xce"; 161$fromUnicode["\xf9\x81"] = "\xc5\xae"; 162$fromUnicode["\xf9\x82"] = "\x8f\x5d"; 163$fromUnicode["\xf9\x83"] = "\xc2\xc3"; 164$fromUnicode["\xf9\x84"] = "\x9e\x56"; 165$fromUnicode["\xf9\x85"] = "\xb5\x5a"; 166$fromUnicode["\xf9\x86"] = "\xe9\x82"; 167$fromUnicode["\xf9\x87"] = "\xf3\x50"; 168$fromUnicode["\xf9\x88"] = "\xfb\x90"; 169$fromUnicode["\xf9\x89"] = "\xc0\xe8"; 170$fromUnicode["\xf9\x8a"] = "\xc1\xa6"; 171$fromUnicode["\xf9\x8b"] = "\x95\xd1"; 172$fromUnicode["\xf9\x8c"] = "\x9a\x76"; 173$fromUnicode["\xf9\x8d"] = "\xde\x5d"; 174$fromUnicode["\xf9\x8e"] = "\xc4\xea"; 175$fromUnicode["\xf9\x8f"] = "\x91\x7a"; 176$fromUnicode["\xf9\x90"] = "\x91\xd9"; 177$fromUnicode["\xf9\x91"] = "\x93\xd3"; 178$fromUnicode["\xf9\x92"] = "\x9d\x69"; 179$fromUnicode["\xf9\x93"] = "\x9f\x92"; 180$fromUnicode["\xf9\x94"] = "\xad\x49"; 181$fromUnicode["\xf9\x95"] = "\xfd\x9e"; 182$fromUnicode["\xf9\x96"] = "\xbe\x9a"; 183$fromUnicode["\xf9\x97"] = "\xc2\x93"; 184$fromUnicode["\xf9\x98"] = "\xdd\x82"; 185$fromUnicode["\xf9\x99"] = "\xc9\x8f"; 186$fromUnicode["\xf9\x9a"] = "\xdf\x42"; 187$fromUnicode["\xf9\x9b"] = "\xe5\x80"; 188$fromUnicode["\xf9\x9c"] = "\xc1\xd0"; 189$fromUnicode["\xf9\x9d"] = "\xc1\xd3"; 190$fromUnicode["\xf9\x9e"] = "\xd1\xca"; 191$fromUnicode["\xf9\x9f"] = "\xc1\xd2"; 192$fromUnicode["\xf9\xa0"] = "\xc1\xd1"; 193$fromUnicode["\xf9\xa1"] = "\xd5\x66"; 194$fromUnicode["\xf9\xa2"] = "\xc1\xae"; 195$fromUnicode["\xf9\xa3"] = "\xc4\xee"; 196$fromUnicode["\xf9\xa4"] = "\xc4\xed"; 197$fromUnicode["\xf9\xa5"] = "\x9a\x9a"; 198$fromUnicode["\xf9\xa6"] = "\xba\x9f"; 199$fromUnicode["\xf9\xa7"] = "\xab\x43"; 200$fromUnicode["\xf9\xa8"] = "\xc1\xee"; 201$fromUnicode["\xf9\xa9"] = "\xe0\xf2"; 202$fromUnicode["\xf9\xaa"] = "\x8c\x8e"; 203$fromUnicode["\xf9\xab"] = "\x8e\x58"; 204$fromUnicode["\xf9\xac"] = "\xc1\xaf"; 205$fromUnicode["\xf9\xad"] = "\xc1\xe1"; 206$fromUnicode["\xf9\xae"] = "\xac\x93"; 207$fromUnicode["\xf9\xaf"] = "\xc1\xe7"; 208$fromUnicode["\xf9\xb0"] = "\xf1\xf6"; 209$fromUnicode["\xf9\xb1"] = "\xe2\x8f"; 210$fromUnicode["\xf9\xb2"] = "\xc1\xe3"; 211$fromUnicode["\xf9\xb3"] = "\xec\x60"; 212$fromUnicode["\xf9\xb4"] = "\xee\x49"; 213$fromUnicode["\xf9\xb5"] = "\xc0\xfd"; 214$fromUnicode["\xf9\xb6"] = "\xb6\x59"; 215$fromUnicode["\xf9\xb7"] = "\xf5\xb7"; 216$fromUnicode["\xf9\xb8"] = "\xeb\x60"; 217$fromUnicode["\xf9\xb9"] = "\x90\xba"; 218$fromUnicode["\xf9\xba"] = "\xc1\xcb"; 219$fromUnicode["\xf9\xbb"] = "\xc1\xc5"; 220$fromUnicode["\xf9\xbc"] = "\xe5\xbc"; 221$fromUnicode["\xf9\xbd"] = "\xc4\xf2"; 222$fromUnicode["\xf9\xbe"] = "\xc1\xcf"; 223$fromUnicode["\xf9\xbf"] = "\x98\xb7"; 224$fromUnicode["\xf9\xc0"] = "\xc1\xc7"; 225$fromUnicode["\xf9\xc1"] = "\xaf\x9f"; 226$fromUnicode["\xf9\xc2"] = "\xde\xa4"; 227$fromUnicode["\xf9\xc3"] = "\xdf\x7c"; 228$fromUnicode["\xf9\xc4"] = "\xfd\x88"; 229$fromUnicode["\xf9\xc5"] = "\x95\x9e"; 230$fromUnicode["\xf9\xc6"] = "\xc8\xee"; 231$fromUnicode["\xf9\xc7"] = "\x84\xa2"; 232$fromUnicode["\xf9\xc8"] = "\x96\x83"; 233$fromUnicode["\xf9\xc9"] = "\xc1\xf8"; 234$fromUnicode["\xf9\xca"] = "\xc1\xf7"; 235$fromUnicode["\xf9\xcb"] = "\xc1\xef"; 236$fromUnicode["\xf9\xcc"] = "\xc1\xf0"; 237$fromUnicode["\xf9\xcd"] = "\xc1\xf4"; 238$fromUnicode["\xf9\xce"] = "\xc1\xf2"; 239$fromUnicode["\xf9\xcf"] = "\xbc\x7e"; 240$fromUnicode["\xf9\xd0"] = "\xee\x90"; 241$fromUnicode["\xf9\xd1"] = "\xc1\xf9"; 242$fromUnicode["\xf9\xd2"] = "\xc2\xbe"; 243$fromUnicode["\xf9\xd3"] = "\xea\x91"; 244$fromUnicode["\xf9\xd4"] = "\x82\x90"; 245$fromUnicode["\xf9\xd5"] = "\x8d\x91"; 246$fromUnicode["\xf9\xd6"] = "\x9c\x53"; 247$fromUnicode["\xf9\xd7"] = "\xdd\x86"; 248$fromUnicode["\xf9\xd8"] = "\xc2\xc9"; 249$fromUnicode["\xf9\xd9"] = "\x90\xfc"; 250$fromUnicode["\xf9\xda"] = "\xc0\xf5"; 251$fromUnicode["\xf9\xdb"] = "\xc2\xca"; 252$fromUnicode["\xf9\xdc"] = "\xc2\xa1"; 253$fromUnicode["\xf9\xdd"] = "\xc0\xfb"; 254$fromUnicode["\xf9\xde"] = "\xc0\xf4"; 255$fromUnicode["\xf9\xdf"] = "\xc2\xc4"; 256$fromUnicode["\xf9\xe0"] = "\xd2\xd7"; 257$fromUnicode["\xf9\xe1"] = "\xc0\xee"; 258$fromUnicode["\xf9\xe2"] = "\xc0\xe6"; 259$fromUnicode["\xf9\xe3"] = "\xc4\xe0"; 260$fromUnicode["\xf9\xe4"] = "\xc0\xed"; 261$fromUnicode["\xf9\xe5"] = "\xc1\xa1"; 262$fromUnicode["\xf9\xe6"] = "\xee\xbe"; 263$fromUnicode["\xf9\xe8"] = "\xd1\x65"; 264$fromUnicode["\xf9\xe9"] = "\xc0\xef"; 265$fromUnicode["\xf9\xea"] = "\xeb\x78"; 266$fromUnicode["\xf9\xeb"] = "\xc4\xe4"; 267$fromUnicode["\xf9\xec"] = "\xc4\xe7"; 268$fromUnicode["\xf9\xed"] = "\xc1\xdf"; 269$fromUnicode["\xf9\xee"] = "\x9f\xfb"; 270$fromUnicode["\xf9\xef"] = "\xad\x55"; 271$fromUnicode["\xf9\xf0"] = "\xcc\x41"; 272$fromUnicode["\xf9\xf2"] = "\xf7\x5b"; 273$fromUnicode["\xf9\xf3"] = "\xf7\xeb"; 274$fromUnicode["\xf9\xf4"] = "\xc1\xd6"; 275$fromUnicode["\xf9\xf5"] = "\xc1\xdc"; 276$fromUnicode["\xf9\xf6"] = "\xc5\x52"; 277$fromUnicode["\xf9\xf7"] = "\xc1\xa2"; 278$fromUnicode["\xf9\xf8"] = "\xf3\xd2"; 279$fromUnicode["\xf9\xf9"] = "\xc1\xa3"; 280$fromUnicode["\xf9\xfa"] = "\xa0\xee"; 281$fromUnicode["\xf9\xfb"] = "\xd6\xcb"; 282$fromUnicode["\xf9\xfc"] = "\xd7\x52"; 283$fromUnicode["\xf9\xfd"] = "\xca\xb2"; 284$fromUnicode["\xf9\xfe"] = "\xb2\xe8"; 285$fromUnicode["\xf9\xff"] = "\xb4\xcc"; 286$fromUnicode["\xfa\x00"] = "\xc7\xd0"; 287$fromUnicode["\xfa\x01"] = "\xb6\xc8"; 288$fromUnicode["\xfa\x02"] = "\xcd\xd8"; 289$fromUnicode["\xfa\x03"] = "\xcc\xc7"; 290$fromUnicode["\xfa\x04"] = "\xd5\xac"; 291$fromUnicode["\xfa\x05"] = "\xb6\xb4"; 292$fromUnicode["\xfa\x06"] = "\xb1\xa9"; 293$fromUnicode["\xfa\x07"] = "\xdd\x97"; 294$fromUnicode["\xfa\x08"] = "\xd0\xd0"; 295$fromUnicode["\xfa\x09"] = "\xbd\xb5"; 296$fromUnicode["\xfa\x0a"] = "\xd2\x8a"; 297$fromUnicode["\xfa\x0b"] = "\xc0\xaa"; 298$fromUnicode["\xfa\x10"] = "\x89\x56"; 299$fromUnicode["\xfa\x12"] = "\xc7\xe7"; 300$fromUnicode["\xfa\x15"] = "\x84\x44"; 301$fromUnicode["\xfa\x16"] = "\xd8\x69"; 302$fromUnicode["\xfa\x17"] = "\xd2\xe6"; 303$fromUnicode["\xfa\x19"] = "\xc9\xf1"; 304$fromUnicode["\xfa\x1a"] = "\xcf\xe9"; 305$fromUnicode["\xfa\x1b"] = "\xb8\xa3"; 306$fromUnicode["\xfa\x1c"] = "\xbe\xb8"; 307$fromUnicode["\xfa\x1d"] = "\xbe\xab"; 308$fromUnicode["\xfa\x1e"] = "\xd3\xf0"; 309$fromUnicode["\xfa\x22"] = "\xd6\x54"; 310$fromUnicode["\xfa\x25"] = "\xd2\xdd"; 311$fromUnicode["\xfa\x26"] = "\xb6\xbc"; 312$fromUnicode["\xfa\x2a"] = "\xef\x88"; 313$fromUnicode["\xfa\x2b"] = "\xef\x95"; 314$fromUnicode["\xfa\x2c"] = "\xf0\x5e"; 315$fromUnicode["\xfa\x2d"] = "\xfa\x51"; 316 317findInvalidChars($toUnicode, $invalid, $truncated); 318testAllValidChars($toUnicode, 'CP936', 'UTF-16BE'); 319testAllInvalidChars($invalid, $toUnicode, 'CP936', 'UTF-16BE', "\x00%"); 320testTruncatedChars($truncated, 'CP936', 'UTF-16BE', "\x00%"); 321echo "Tested CP936 -> UTF-16BE\n"; 322 323findInvalidChars($fromUnicode, $invalid, $unused, array_fill_keys(range(0,0xFF), 2)); 324convertAllInvalidChars($invalid, $fromUnicode, 'UTF-16BE', 'CP936', '%'); 325echo "Tested UTF-16BE -> CP936\n"; 326 327// Test "long" illegal character markers 328mb_substitute_character("long"); 329convertInvalidString("\x81\x20", "%", "CP936", "UTF-8"); 330convertInvalidString("\x81\x7F", "%", "CP936", "UTF-8"); 331convertInvalidString("\xFE\xFF", "%", "CP936", "UTF-8"); 332 333echo "Done!\n"; 334?> 335--EXPECT-- 336Tested CP936 -> UTF-16BE 337Tested UTF-16BE -> CP936 338Done! 339