1--TEST-- 2Bug #32001 (xml_parse*() goes into infinite loop when autodetection in effect), using UTF-* 3--EXTENSIONS-- 4iconv 5xml 6--SKIPIF-- 7<?php 8if (ICONV_IMPL == 'glibc' && version_compare(ICONV_VERSION, '2.12', '<=')) 9 die("skip iconv of glibc <= 2.12 is buggy"); 10?> 11--FILE-- 12<?php 13class testcase { 14 private $encoding; 15 private $bom; 16 private $prologue; 17 private $tags; 18 private $chunk_size; 19 20 function __construct($enc, $chunk_size = 0, $bom = 0, $omit_prologue = 0) { 21 $this->encoding = $enc; 22 $this->chunk_size = $chunk_size; 23 $this->bom = $bom; 24 $this->prologue = !$omit_prologue; 25 $this->tags = array(); 26 } 27 28 function start_element($parser, $name, $attrs) { 29 $attrs = array_map('bin2hex', $attrs); 30 $this->tags[] = bin2hex($name).": ".implode(', ', $attrs); 31 } 32 33 function end_element($parser, $name) { 34 } 35 36 function run() { 37 $data = ''; 38 39 if ($this->prologue) { 40 $canonical_name = preg_replace('/BE|LE/i', '', $this->encoding); 41 $data .= "<?xml version=\"1.0\" encoding=\"$canonical_name\" ?>\n"; 42 } 43 44 $data .= <<<HERE 45<テスト:テスト1 xmlns:テスト="http://www.example.com/テスト/" テスト="テスト"> 46 <テスト:テスト2 テスト="テスト"> 47 <テスト:テスト3> 48 test! 49 </テスト:テスト3> 50 </テスト:テスト2> 51</テスト:テスト1> 52HERE; 53 54 $data = iconv("UTF-8", $this->encoding, $data); 55 56 if ($this->bom) { 57 switch (strtoupper($this->encoding)) { 58 case 'UTF-8': 59 case 'UTF8': 60 $data = "\xef\xbb\xbf".$data; 61 break; 62 63 case 'UTF-16': 64 case 'UTF16': 65 case 'UTF-16BE': 66 case 'UTF16BE': 67 case 'UCS-2': 68 case 'UCS2': 69 case 'UCS-2BE': 70 case 'UCS2BE': 71 $data = "\xfe\xff".$data; 72 break; 73 74 case 'UTF-16LE': 75 case 'UTF16LE': 76 case 'UCS-2LE': 77 case 'UCS2LE': 78 $data = "\xff\xfe".$data; 79 break; 80 81 case 'UTF-32': 82 case 'UTF32': 83 case 'UTF-32BE': 84 case 'UTF32BE': 85 case 'UCS-4': 86 case 'UCS4': 87 case 'UCS-4BE': 88 case 'UCS4BE': 89 $data = "\x00\x00\xfe\xff".$data; 90 break; 91 92 case 'UTF-32LE': 93 case 'UTF32LE': 94 case 'UCS-4LE': 95 case 'UCS4LE': 96 $data = "\xff\xfe\x00\x00".$data; 97 break; 98 } 99 } 100 101 $parser = xml_parser_create(NULL); 102 xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0); 103 xml_set_element_handler($parser, $this->start_element(...), $this->end_element(...)); 104 105 if ($this->chunk_size == 0) { 106 $success = @xml_parse($parser, $data, true); 107 } else { 108 for ($offset = 0; $offset < strlen($data); 109 $offset += $this->chunk_size) { 110 $success = @xml_parse($parser, substr($data, $offset, $this->chunk_size), false); 111 if (!$success) { 112 break; 113 } 114 } 115 if ($success) { 116 $success = @xml_parse($parser, "", true); 117 } 118 } 119 120 echo "Encoding: $this->encoding\n"; 121 echo "XML Prologue: ".($this->prologue ? 'present': 'not present'), "\n"; 122 echo "Chunk size: ".($this->chunk_size ? "$this->chunk_size byte(s)\n": "all data at once\n"); 123 echo "BOM: ".($this->bom ? 'prepended': 'not prepended'), "\n"; 124 125 if ($success) { 126 var_dump($this->tags); 127 } else { 128 echo "[Error] ", xml_error_string(xml_get_error_code($parser)), "\n"; 129 } 130 } 131} 132$suite = array( 133 new testcase("UTF-8", 0, 0, 0), 134 new testcase("UTF-8", 0, 0, 1), 135 new testcase("UTF-8", 0, 1, 0), 136 new testcase("UTF-8", 0, 1, 1), 137 new testcase("UTF-16BE", 0, 0, 0), 138 new testcase("UTF-16BE", 0, 1, 0), 139 new testcase("UTF-16BE", 0, 1, 1), 140 new testcase("UTF-16LE", 0, 0, 0), 141 new testcase("UTF-16LE", 0, 1, 0), 142 new testcase("UTF-16LE", 0, 1, 1), 143 new testcase("UTF-8", 1, 0, 0), 144 new testcase("UTF-8", 1, 0, 1), 145 new testcase("UTF-8", 1, 1, 0), 146 new testcase("UTF-8", 1, 1, 1), 147 new testcase("UTF-16BE", 1, 0, 0), 148 new testcase("UTF-16BE", 1, 1, 0), 149 new testcase("UTF-16BE", 1, 1, 1), 150 new testcase("UTF-16LE", 1, 0, 0), 151 new testcase("UTF-16LE", 1, 1, 0), 152 new testcase("UTF-16LE", 1, 1, 1), 153); 154 155if (XML_SAX_IMPL == 'libxml') { 156 echo "libxml2 Version => " . LIBXML_DOTTED_VERSION. "\n"; 157} else { 158 echo "libxml2 Version => NONE\n"; 159} 160 161foreach ($suite as $testcase) { 162 $testcase->run(); 163} 164 165?> 166--EXPECTF-- 167libxml2 Version => %s 168Encoding: UTF-8 169XML Prologue: present 170Chunk size: all data at once 171BOM: not prepended 172array(3) { 173 [0]=> 174 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 175 [1]=> 176 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 177 [2]=> 178 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 179} 180Encoding: UTF-8 181XML Prologue: not present 182Chunk size: all data at once 183BOM: not prepended 184array(3) { 185 [0]=> 186 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 187 [1]=> 188 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 189 [2]=> 190 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 191} 192Encoding: UTF-8 193XML Prologue: present 194Chunk size: all data at once 195BOM: prepended 196array(3) { 197 [0]=> 198 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 199 [1]=> 200 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 201 [2]=> 202 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 203} 204Encoding: UTF-8 205XML Prologue: not present 206Chunk size: all data at once 207BOM: prepended 208array(3) { 209 [0]=> 210 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 211 [1]=> 212 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 213 [2]=> 214 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 215} 216Encoding: UTF-16BE 217XML Prologue: present 218Chunk size: all data at once 219BOM: not prepended 220array(3) { 221 [0]=> 222 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 223 [1]=> 224 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 225 [2]=> 226 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 227} 228Encoding: UTF-16BE 229XML Prologue: present 230Chunk size: all data at once 231BOM: prepended 232array(3) { 233 [0]=> 234 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 235 [1]=> 236 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 237 [2]=> 238 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 239} 240Encoding: UTF-16BE 241XML Prologue: not present 242Chunk size: all data at once 243BOM: prepended 244array(3) { 245 [0]=> 246 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 247 [1]=> 248 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 249 [2]=> 250 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 251} 252Encoding: UTF-16LE 253XML Prologue: present 254Chunk size: all data at once 255BOM: not prepended 256array(3) { 257 [0]=> 258 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 259 [1]=> 260 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 261 [2]=> 262 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 263} 264Encoding: UTF-16LE 265XML Prologue: present 266Chunk size: all data at once 267BOM: prepended 268array(3) { 269 [0]=> 270 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 271 [1]=> 272 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 273 [2]=> 274 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 275} 276Encoding: UTF-16LE 277XML Prologue: not present 278Chunk size: all data at once 279BOM: prepended 280array(3) { 281 [0]=> 282 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 283 [1]=> 284 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 285 [2]=> 286 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 287} 288Encoding: UTF-8 289XML Prologue: present 290Chunk size: 1 byte(s) 291BOM: not prepended 292array(3) { 293 [0]=> 294 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 295 [1]=> 296 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 297 [2]=> 298 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 299} 300Encoding: UTF-8 301XML Prologue: not present 302Chunk size: 1 byte(s) 303BOM: not prepended 304array(3) { 305 [0]=> 306 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 307 [1]=> 308 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 309 [2]=> 310 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 311} 312Encoding: UTF-8 313XML Prologue: present 314Chunk size: 1 byte(s) 315BOM: prepended 316array(3) { 317 [0]=> 318 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 319 [1]=> 320 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 321 [2]=> 322 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 323} 324Encoding: UTF-8 325XML Prologue: not present 326Chunk size: 1 byte(s) 327BOM: prepended 328array(3) { 329 [0]=> 330 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 331 [1]=> 332 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 333 [2]=> 334 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 335} 336Encoding: UTF-16BE 337XML Prologue: present 338Chunk size: 1 byte(s) 339BOM: not prepended 340array(3) { 341 [0]=> 342 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 343 [1]=> 344 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 345 [2]=> 346 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 347} 348Encoding: UTF-16BE 349XML Prologue: present 350Chunk size: 1 byte(s) 351BOM: prepended 352array(3) { 353 [0]=> 354 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 355 [1]=> 356 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 357 [2]=> 358 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 359} 360Encoding: UTF-16BE 361XML Prologue: not present 362Chunk size: 1 byte(s) 363BOM: prepended 364array(3) { 365 [0]=> 366 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 367 [1]=> 368 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 369 [2]=> 370 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 371} 372Encoding: UTF-16LE 373XML Prologue: present 374Chunk size: 1 byte(s) 375BOM: not prepended 376array(3) { 377 [0]=> 378 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 379 [1]=> 380 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 381 [2]=> 382 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 383} 384Encoding: UTF-16LE 385XML Prologue: present 386Chunk size: 1 byte(s) 387BOM: prepended 388array(3) { 389 [0]=> 390 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 391 [1]=> 392 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 393 [2]=> 394 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 395} 396Encoding: UTF-16LE 397XML Prologue: not present 398Chunk size: 1 byte(s) 399BOM: prepended 400array(3) { 401 [0]=> 402 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 403 [1]=> 404 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 405 [2]=> 406 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 407} 408