1--TEST-- 2Bug #32001 (xml_parse*() goes into infinite loop when autodetection in effect), using UTF-* 3--SKIPIF-- 4<?php 5require_once("skipif.inc"); 6if (!extension_loaded('iconv')) die ("skip iconv extension not available"); 7?> 8--FILE-- 9<?php 10class testcase { 11 private $encoding; 12 private $bom; 13 private $prologue; 14 private $tags; 15 private $chunk_size; 16 17 function testcase($enc, $chunk_size = 0, $bom = 0, $omit_prologue = 0) { 18 $this->encoding = $enc; 19 $this->chunk_size = $chunk_size; 20 $this->bom = $bom; 21 $this->prologue = !$omit_prologue; 22 $this->tags = array(); 23 } 24 25 function start_element($parser, $name, $attrs) { 26 $attrs = array_map('bin2hex', $attrs); 27 $this->tags[] = bin2hex($name).": ".implode(', ', $attrs); 28 } 29 30 function end_element($parser, $name) { 31 } 32 33 function run() { 34 $data = ''; 35 36 if ($this->prologue) { 37 $canonical_name = preg_replace('/BE|LE/i', '', $this->encoding); 38 $data .= "<?xml version=\"1.0\" encoding=\"$canonical_name\" ?>\n"; 39 } 40 41 $data .= <<<HERE 42<テスト:テスト1 xmlns:テスト="http://www.example.com/テスト/" テスト="テスト"> 43 <テスト:テスト2 テスト="テスト"> 44 <テスト:テスト3> 45 test! 46 </テスト:テスト3> 47 </テスト:テスト2> 48</テスト:テスト1> 49HERE; 50 51 $data = iconv("UTF-8", $this->encoding, $data); 52 53 if ($this->bom) { 54 switch (strtoupper($this->encoding)) { 55 case 'UTF-8': 56 case 'UTF8': 57 $data = "\xef\xbb\xbf".$data; 58 break; 59 60 case 'UTF-16': 61 case 'UTF16': 62 case 'UTF-16BE': 63 case 'UTF16BE': 64 case 'UCS-2': 65 case 'UCS2': 66 case 'UCS-2BE': 67 case 'UCS2BE': 68 $data = "\xfe\xff".$data; 69 break; 70 71 case 'UTF-16LE': 72 case 'UTF16LE': 73 case 'UCS-2LE': 74 case 'UCS2LE': 75 $data = "\xff\xfe".$data; 76 break; 77 78 case 'UTF-32': 79 case 'UTF32': 80 case 'UTF-32BE': 81 case 'UTF32BE': 82 case 'UCS-4': 83 case 'UCS4': 84 case 'UCS-4BE': 85 case 'UCS4BE': 86 $data = "\x00\x00\xfe\xff".$data; 87 break; 88 89 case 'UTF-32LE': 90 case 'UTF32LE': 91 case 'UCS-4LE': 92 case 'UCS4LE': 93 $data = "\xff\xfe\x00\x00".$data; 94 break; 95 } 96 } 97 98 $parser = xml_parser_create(NULL); 99 xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0); 100 xml_set_element_handler($parser, "start_element", "end_element"); 101 xml_set_object($parser, $this); 102 103 if ($this->chunk_size == 0) { 104 $success = @xml_parse($parser, $data, true); 105 } else { 106 for ($offset = 0; $offset < strlen($data); 107 $offset += $this->chunk_size) { 108 $success = @xml_parse($parser, substr($data, $offset, $this->chunk_size), false); 109 if (!$success) { 110 break; 111 } 112 } 113 if ($success) { 114 $success = @xml_parse($parser, "", true); 115 } 116 } 117 118 echo "Encoding: $this->encoding\n"; 119 echo "XML Prologue: ".($this->prologue ? 'present': 'not present'), "\n"; 120 echo "Chunk size: ".($this->chunk_size ? "$this->chunk_size byte(s)\n": "all data at once\n"); 121 echo "BOM: ".($this->bom ? 'prepended': 'not prepended'), "\n"; 122 123 if ($success) { 124 var_dump($this->tags); 125 } else { 126 echo "[Error] ", xml_error_string(xml_get_error_code($parser)), "\n"; 127 } 128 } 129} 130$suite = array( 131 new testcase("UTF-8", 0, 0, 0), 132 new testcase("UTF-8", 0, 0, 1), 133 new testcase("UTF-8", 0, 1, 0), 134 new testcase("UTF-8", 0, 1, 1), 135 new testcase("UTF-16BE", 0, 0, 0), 136 new testcase("UTF-16BE", 0, 1, 0), 137 new testcase("UTF-16BE", 0, 1, 1), 138 new testcase("UTF-16LE", 0, 0, 0), 139 new testcase("UTF-16LE", 0, 1, 0), 140 new testcase("UTF-16LE", 0, 1, 1), 141 new testcase("UTF-8", 1, 0, 0), 142 new testcase("UTF-8", 1, 0, 1), 143 new testcase("UTF-8", 1, 1, 0), 144 new testcase("UTF-8", 1, 1, 1), 145 new testcase("UTF-16BE", 1, 0, 0), 146 new testcase("UTF-16BE", 1, 1, 0), 147 new testcase("UTF-16BE", 1, 1, 1), 148 new testcase("UTF-16LE", 1, 0, 0), 149 new testcase("UTF-16LE", 1, 1, 0), 150 new testcase("UTF-16LE", 1, 1, 1), 151); 152 153if (XML_SAX_IMPL == 'libxml') { 154 echo "libxml2 Version => " . LIBXML_DOTTED_VERSION. "\n"; 155} else { 156 echo "libxml2 Version => NONE\n"; 157} 158 159foreach ($suite as $testcase) { 160 $testcase->run(); 161} 162 163// vim600: sts=4 sw=4 ts=4 encoding=UTF-8 164?> 165--EXPECTF-- 166libxml2 Version => %s 167Encoding: UTF-8 168XML Prologue: present 169Chunk size: all data at once 170BOM: not prepended 171array(3) { 172 [0]=> 173 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 174 [1]=> 175 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 176 [2]=> 177 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 178} 179Encoding: UTF-8 180XML Prologue: not present 181Chunk size: all data at once 182BOM: not prepended 183array(3) { 184 [0]=> 185 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 186 [1]=> 187 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 188 [2]=> 189 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 190} 191Encoding: UTF-8 192XML Prologue: present 193Chunk size: all data at once 194BOM: prepended 195array(3) { 196 [0]=> 197 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 198 [1]=> 199 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 200 [2]=> 201 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 202} 203Encoding: UTF-8 204XML Prologue: not present 205Chunk size: all data at once 206BOM: prepended 207array(3) { 208 [0]=> 209 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 210 [1]=> 211 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 212 [2]=> 213 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 214} 215Encoding: UTF-16BE 216XML Prologue: present 217Chunk size: all data at once 218BOM: not prepended 219array(3) { 220 [0]=> 221 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 222 [1]=> 223 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 224 [2]=> 225 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 226} 227Encoding: UTF-16BE 228XML Prologue: present 229Chunk size: all data at once 230BOM: prepended 231array(3) { 232 [0]=> 233 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 234 [1]=> 235 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 236 [2]=> 237 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 238} 239Encoding: UTF-16BE 240XML Prologue: not present 241Chunk size: all data at once 242BOM: prepended 243array(3) { 244 [0]=> 245 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 246 [1]=> 247 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 248 [2]=> 249 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 250} 251Encoding: UTF-16LE 252XML Prologue: present 253Chunk size: all data at once 254BOM: not prepended 255array(3) { 256 [0]=> 257 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 258 [1]=> 259 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 260 [2]=> 261 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 262} 263Encoding: UTF-16LE 264XML Prologue: present 265Chunk size: all data at once 266BOM: prepended 267array(3) { 268 [0]=> 269 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 270 [1]=> 271 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 272 [2]=> 273 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 274} 275Encoding: UTF-16LE 276XML Prologue: not present 277Chunk size: all data at once 278BOM: prepended 279array(3) { 280 [0]=> 281 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 282 [1]=> 283 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 284 [2]=> 285 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 286} 287Encoding: UTF-8 288XML Prologue: present 289Chunk size: 1 byte(s) 290BOM: not prepended 291array(3) { 292 [0]=> 293 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 294 [1]=> 295 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 296 [2]=> 297 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 298} 299Encoding: UTF-8 300XML Prologue: not present 301Chunk size: 1 byte(s) 302BOM: not prepended 303array(3) { 304 [0]=> 305 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 306 [1]=> 307 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 308 [2]=> 309 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 310} 311Encoding: UTF-8 312XML Prologue: present 313Chunk size: 1 byte(s) 314BOM: prepended 315array(3) { 316 [0]=> 317 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 318 [1]=> 319 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 320 [2]=> 321 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 322} 323Encoding: UTF-8 324XML Prologue: not present 325Chunk size: 1 byte(s) 326BOM: prepended 327array(3) { 328 [0]=> 329 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 330 [1]=> 331 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 332 [2]=> 333 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 334} 335Encoding: UTF-16BE 336XML Prologue: present 337Chunk size: 1 byte(s) 338BOM: not prepended 339array(3) { 340 [0]=> 341 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 342 [1]=> 343 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 344 [2]=> 345 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 346} 347Encoding: UTF-16BE 348XML Prologue: present 349Chunk size: 1 byte(s) 350BOM: prepended 351array(3) { 352 [0]=> 353 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 354 [1]=> 355 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 356 [2]=> 357 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 358} 359Encoding: UTF-16BE 360XML Prologue: not present 361Chunk size: 1 byte(s) 362BOM: prepended 363array(3) { 364 [0]=> 365 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 366 [1]=> 367 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 368 [2]=> 369 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 370} 371Encoding: UTF-16LE 372XML Prologue: present 373Chunk size: 1 byte(s) 374BOM: not prepended 375array(3) { 376 [0]=> 377 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 378 [1]=> 379 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 380 [2]=> 381 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 382} 383Encoding: UTF-16LE 384XML Prologue: present 385Chunk size: 1 byte(s) 386BOM: prepended 387array(3) { 388 [0]=> 389 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 390 [1]=> 391 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 392 [2]=> 393 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 394} 395Encoding: UTF-16LE 396XML Prologue: not present 397Chunk size: 1 byte(s) 398BOM: prepended 399array(3) { 400 [0]=> 401 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 402 [1]=> 403 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 404 [2]=> 405 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 406} 407