1--TEST-- 2Test mb_decode_numericentity() function : Convert HTML entities to text 3--EXTENSIONS-- 4mbstring 5--FILE-- 6<?php 7 8function varDumpToString($var) 9{ 10 ob_start(); 11 var_dump($var); 12 return trim(ob_get_clean()); 13} 14 15function test($desc, $str, $expected, $convmap, $encoding) { 16 $result = mb_decode_numericentity($str, $convmap, $encoding); 17 echo $desc, ": ", varDumpToString($str), " => ", varDumpToString($result); 18 if ($result === $expected) 19 echo " (Good)\n"; 20 else 21 echo " (BAD; expected ", varDumpToString($expected), ")\n"; 22} 23 24function testNonAscii($desc, $str, $expected, $convmap, $encoding) { 25 $result = mb_decode_numericentity($str, $convmap, $encoding); 26 echo $desc, ": ", bin2hex($str), " => ", bin2hex($result); 27 if ($result === $expected) 28 echo " (Good)\n"; 29 else 30 echo " (BAD; expected ", bin2hex($expected), ")\n"; 31} 32 33$str1 = '¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ'; 34$str2 = 'ƒΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩαβγδεζηθικλμνξοπρςστυφχψωϑϒϖ•…′″‾⁄℘ℑℜ™ℵ←↑→↓↔↵⇐⇑⇒⇓⇔∀∂∃∅∇∈∉∋∏∑−∗√∝∞∠∧∨∩∪∫∴∼≅≈≠≡≤≥⊂⊃⊄⊆⊇⊕⊗⊥⋅⌈⌉⌊⌋〈〉◊♠♣♥♦'; 35$str3 = 'aŒbœcŠdše€fg'; 36 37// Typical convmap, typical numeric entity-encoded string 38$convmap = array(0x0, 0x2FFFF, 0, 0xFFFF); 39echo "1: " . mb_decode_numericentity($str1, $convmap, "UTF-8") . "\n"; 40echo "2: " . mb_decode_numericentity($str2, $convmap, "UTF-8") . "\n"; 41echo "3: " . mb_decode_numericentity($str3, $convmap, "UTF-8") . "\n"; 42 43// Numeric entities which are truncated at end of string 44echo "4: " . mb_decode_numericentity('�', $convmap), "\n"; // Entity is too big 45echo "5: " . mb_decode_numericentity('�', $convmap), "\n"; // Entity is too big 46echo "6: " . mb_decode_numericentity('�', $convmap), "\n"; // Too many digits 47echo "7: " . mb_decode_numericentity('�', $convmap), "\n"; // Too many digits 48echo "8: " . mb_decode_numericentity('�', $convmap), "\n"; // Too many digits 49echo "9: " . mb_decode_numericentity('�', $convmap), "\n"; // Too many digits 50echo "10: " . bin2hex(mb_decode_numericentity('�', $convmap)), "\n"; // OK 51echo "11: " . bin2hex(mb_decode_numericentity('�', $convmap)), "\n"; // OK 52// Try with hex, not just decimal entities 53echo "11b: " . bin2hex(mb_decode_numericentity('�', $convmap)), "\n"; // OK 54echo "11c: " . bin2hex(mb_decode_numericentity('�', $convmap)), "\n"; // OK 55echo "11d: " . bin2hex(mb_decode_numericentity('𐀀', $convmap)), "\n"; // OK 56echo "11e: " . mb_decode_numericentity('�', $convmap), "\n"; // Too many digits 57 58// Large decimal entity, converting from non-ASCII input encoding 59echo "12: " . bin2hex(mb_decode_numericentity(mb_convert_encoding('�', 'UCS-4', 'ASCII'), [0, 0x7FFFFFFF, 0, 0x7FFFFFFF], 'UCS-4')), "\n"; 60 61$convmap = []; 62echo "13: " . mb_decode_numericentity('föo', $convmap, "UTF-8") . "\n"; 63 64echo "15: " . bin2hex(mb_decode_numericentity('�', [0, 1, 0, 0xFFFF], 'UTF-8')) . "\n"; 65echo "16: " . bin2hex(mb_decode_numericentity('�', [0, 1, 0, 0xFFFF], 'UTF-8')) . "\n"; 66 67// Weird convmap 68$convmap = [ 69 0, 0, 0, 0, // Only one codepoint, empty mask 70 100, 50, 0, 0xFFFF // 'End' codepoint is before 'start' codepoint 71]; 72echo "17: " . mb_decode_numericentity('föo', $convmap, "UTF-8") . "\n"; 73 74// Convmap with positive offset 75$convmap = [0, 10, 1000, 0xFFFF]; 76echo "18: " . bin2hex(mb_decode_numericentity("ϩϪϫ", $convmap, "UTF-8")) . "\n"; 77echo "19: " . bin2hex(mb_decode_numericentity("ϩϪϫ", $convmap, "UTF-8")) . "\n"; 78 79echo "20: " . mb_decode_numericentity("{a;", [0, 0xFFFF, 0, 0xFFFF]) . "\n"; 80 81test("10 digits for decimal entity", "A", "A", [0, 0xFFFF, 0, 0xFFFF], 'ASCII'); 82test("More than 10 digits for decimal entity", "¥", "¥", [0, 0xFFFF, 0, 0xFFFF], 'ASCII'); 83 84test("8 digits for hex entity", "A", "A", [0, 0xFFFF, 0, 0xFFFF], 'ASCII'); 85test("More than 8 digits for hex entity", "Ł", "Ł", [0, 0xFFFF, 0, 0xFFFF], 'ASCII'); 86 87test("Single &", "&", "&", [0, 0xFFFF, 0, 0xFFFF], "ASCII"); 88 89// An entity can come right after a preceding ampersand 90test("Successive &", "&A,", "&A,", [0, 0xFFFF, 0, 0xFFFF], 'ASCII'); 91 92// An entity can come right after a preceding &# 93test("Successive &#", "&#2", "", [0, 0xFFFF, 0, 0xFFFF], 'ASCII'); 94test("Successive &#x", "&#x2", "", [0, 0xFFFF, 0, 0xFFFF], 'ASCII'); 95 96test("&#x only", "&#x;", "&#x;", [0, 0xFFFF, 0, 0xFFFF], 'ASCII'); 97 98// The starting & of an entity can terminate a preceding entity 99test("Successive A", "AA", "AA", [0, 0xFFFF, 0, 0xFFFF], 'ASCII'); 100test("Successive hex entities", "22", "22", [0, 0xFFFF, 0, 0xFFFF], 'ASCII'); 101 102// An entity can come right after an entity which is invalid because of being too long 103test("Starting entity immediately after decimal entity which is too long", "�A", "�A", [0, 0xFFFF, 0, 0xFFFF], 'ASCII'); 104test("Starting entity immediately after hex entity which is too long", "�A", "�", [0, 0xFFFF, 0, 0xFFFF], 'ASCII'); 105 106test("Starting entity immediately after invalid decimal entity", "�A", "�A", [0x1, 0xFFFF, 0, 0xFFFF], 'ASCII'); 107test("Starting entity immediately after invalid hex entity", "�A", "
", [0x1, 0xFFFF, 0, 0xFFFF], 'ASCII'); 108 109// Try with '&', '&#', or '&#' at the end of a buffer of wchars, with more input 110// still left to process in the next buffer 111// (mb_decode_numericentity splits its input into 'chunks' and processes it one 112// chunk at a time) 113$convmap = [0, 0xFFFF, 0, 0xFFFF]; 114for ($i = 0; $i < 256; $i++) { 115 $padding = str_repeat("a", $i); 116 // First try invalid decimal/hex entities 117 if (mb_decode_numericentity($padding . "&#ZZZ", $convmap, 'UTF-8') !== $padding . "&#ZZZ") 118 die("&#ZZZ is broken when it spans two buffers!"); 119 if (mb_decode_numericentity($padding . "&#xZZZ", $convmap, 'UTF-8') !== $padding . "&#xZZZ") 120 die("&#xZZZ is broken when it spans two buffers!"); 121 // Now try valid decimal/hex entities 122 if (mb_decode_numericentity($padding . "A", $convmap, 'UTF-8') !== $padding . "A") 123 die("A is broken when it spans two buffers!"); 124 if (mb_decode_numericentity($padding . "A", $convmap, 'UTF-8') !== $padding . "A") 125 die("A is broken when it spans two buffers!"); 126} 127 128// Try huge entities, big enough to fill an entire buffer 129for ($i = 12; $i < 256; $i++) { 130 $str = "&#" . str_repeat("0", $i) . "65"; 131 if (mb_decode_numericentity($str, $convmap, 'UTF-8') !== $str) 132 die("Decimal entity with huge number of digits broken"); 133 134 $str = "&#x" . str_repeat("0", $i) . "41"; 135 if (mb_decode_numericentity($str, $convmap, 'UTF-8') !== $str) 136 die("Hexadecimal entity with huge number of digits broken"); 137} 138 139?> 140--EXPECT-- 1411: ¡¢£¤¥¦§¨©ª«¬®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖ×ØÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿ 1422: ƒΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩαβγδεζηθικλμνξοπρςστυφχψωϑϒϖ•…′″‾⁄℘ℑℜ™ℵ←↑→↓↔↵⇐⇑⇒⇓⇔∀∂∃∅∇∈∉∋∏∑−∗√∝∞∠∧∨∩∪∫∴∼≅≈≠≡≤≥⊂⊃⊄⊆⊇⊕⊗⊥⋅⌈⌉⌊⌋〈〉◊♠♣♥♦ 1433: aŒbœcŠdše€fg 1444: � 1455: � 1466: � 1477: � 1488: � 1499: � 15010: 00 15111: 00 15211b: 00 15311c: 00 15411d: f0908080 15511e: � 15612: 00bc614e 15713: föo 15815: 00 15916: 00 16017: föo 16118: 010203 16219: 010203 16320: {a; 16410 digits for decimal entity: string(13) "A" => string(1) "A" (Good) 165More than 10 digits for decimal entity: string(14) "¥" => string(14) "¥" (Good) 1668 digits for hex entity: string(12) "A" => string(1) "A" (Good) 167More than 8 digits for hex entity: string(13) "Ł" => string(13) "Ł" (Good) 168Single &: string(1) "&" => string(1) "&" (Good) 169Successive &: string(6) "&A," => string(3) "&A," (Good) 170Successive &#: string(8) "&#2" => string(3) "" (Good) 171Successive &#x: string(9) "&#x2" => string(4) "" (Good) 172&#x only: string(4) "&#x;" => string(4) "&#x;" (Good) 173Successive A: string(9) "AA" => string(2) "AA" (Good) 174Successive hex entities: string(11) "22" => string(2) "22" (Good) 175Starting entity immediately after decimal entity which is too long: string(18) "�A" => string(14) "�A" (Good) 176Starting entity immediately after hex entity which is too long: string(17) "�A" => string(13) "�" (Good) 177Starting entity immediately after invalid decimal entity: string(8) "�A" => string(4) "�A" (Good) 178Starting entity immediately after invalid hex entity: string(9) "�A" => string(5) "
" (Good) 179