--TEST-- Unicode standard conformance test (ill-formed UTF sequences.) --SKIPIF-- --FILE-- > 12), 0x80 | ($i >> 6) & 0x3f, 0x80 | $i & 0x3f), 3); if ($s === false) { $cnt++; } else { $out .= $s; } } var_dump($cnt); var_dump($out); echo "UTF-32 code range\n"; var_dump(chk_enc("\x00\x11\x00\x00", 1, "UTF-32BE")); var_dump(chk_enc("\x00\x10\xff\xff", 0, "UTF-32BE")); var_dump(chk_enc("\x00\x00\x11\x00", 1, "UTF-32LE")); var_dump(chk_enc("\xff\xff\x10\x00", 0, "UTF-32LE")); var_dump(chk_enc("\x00\x11\x00\x00", 1, "UTF-32")); var_dump(chk_enc("\x00\x10\xff\xff", 0, "UTF-32")); var_dump(chk_enc("\x00\x00\xfe\xff\x00\x11\x00\x00", 0, "UTF-32")); var_dump(chk_enc("\x00\x00\xfe\xff\x00\x10\xff\xff", 0, "UTF-32")); var_dump(chk_enc("\xff\xfe\x00\x00\x00\x00\x11\x00", 0, "UTF-32")); var_dump(chk_enc("\xff\xfe\x00\x00\xff\xff\x10\x00", 0, "UTF-32")); echo "UTF-32 and surrogates area\n"; $out = ''; $cnt = 0; for ($i = 0xd7ff; $i <= 0xe000; ++$i) { $s = chk_enc(pack('C4', $i >> 24, ($i >> 16) & 0xff, ($i >> 8) & 0xff, $i & 0xff), 1, "UTF-32BE"); if ($s === false) { $cnt++; } else { $out .= $s; } } var_dump($cnt); var_dump($out); $out = ''; $cnt = 0; for ($i = 0xd7ff; $i <= 0xe000; ++$i) { $s = chk_enc(pack('C4', $i & 0xff, ($i >> 8) & 0xff, ($i >> 16) & 0xff, ($i >> 24) & 0xff), 1, "UTF-32LE"); if ($s === false) { $cnt++; } else { $out .= $s; } } var_dump($cnt); var_dump($out); $out = ''; $cnt = 0; for ($i = 0xd7ff; $i <= 0xe000; ++$i) { $s = chk_enc(pack('C4', $i >> 24, ($i >> 16) & 0xff, ($i >> 8) & 0xff, $i & 0xff), 1, "UTF-32"); if ($s === false) { $cnt++; } else { $out .= $s; } } var_dump($cnt); var_dump($out); echo "UTF-32 and surrogates area with BOM\n"; $out = ''; $cnt = 0; for ($i = 0xd7ff; $i <= 0xe000; ++$i) { $s = chk_enc("\x00\x00\xfe\xff". pack('C4', $i >> 24, ($i >> 16) & 0xff, ($i >> 8) & 0xff, $i & 0xff), 1, "UTF-32", true); if ($s === false) { $cnt++; } else { $out .= $s; } } var_dump($cnt); var_dump(str_replace("0000feff","",$out)); $out = ''; $cnt = 0; for ($i = 0xd7ff; $i <= 0xe000; ++$i) { $s = chk_enc("\xff\xfe\x00\x00". pack('C4', $i & 0xff, ($i >> 8) & 0xff, ($i >> 16) & 0xff, ($i >> 24) & 0xff), 1, "UTF-32", true); if ($s === false) { $cnt++; } else { $out .= $s; } } var_dump($cnt); var_dump(str_replace("0000feff","",$out)); ?> --EXPECT-- UTF-8 redundancy string(24) "000000310000003200000033" string(24) "000000410000004200000043" bool(false) bool(false) bool(false) bool(false) bool(false) bool(false) bool(false) bool(false) bool(false) bool(false) string(24) "000000a2000000a3000000a5" bool(false) bool(false) bool(false) bool(false) bool(false) string(8) "00000080" string(8) "000007ff" bool(false) string(8) "00000800" string(8) "0000ffff" bool(false) string(8) "00010000" bool(false) bool(false) bool(false) bool(false) bool(false) bool(false) bool(false) bool(false) UTF-8 and surrogates area int(2048) string(16) "0000d7ff0000e000" UTF-32 code range bool(false) string(8) "0010ffff" bool(false) string(8) "0010ffff" bool(false) string(8) "0010ffff" string(16) "0000feff0000fffd" string(16) "0000feff0010ffff" string(16) "0000feff0000fffd" string(16) "0000feff0010ffff" UTF-32 and surrogates area int(2048) string(16) "0000d7ff0000e000" int(2048) string(16) "0000d7ff0000e000" int(2048) string(16) "0000d7ff0000e000" UTF-32 and surrogates area with BOM int(2048) string(16) "0000d7ff0000e000" int(2048) string(16) "0000d7ff0000e000"