1--TEST-- 2Bug #32001 (xml_parse*() goes into infinite loop when autodetection in effect), using UTF-* 3--SKIPIF-- 4<?php 5require_once("skipif.inc"); 6if (!extension_loaded('iconv')) die ("skip iconv extension not available"); 7if (ICONV_IMPL == 'glibc' && version_compare(ICONV_VERSION, '2.12', '<=')) 8 die("skip iconv of glibc <= 2.12 is buggy"); 9?> 10--FILE-- 11<?php 12class testcase { 13 private $encoding; 14 private $bom; 15 private $prologue; 16 private $tags; 17 private $chunk_size; 18 19 function testcase($enc, $chunk_size = 0, $bom = 0, $omit_prologue = 0) { 20 $this->encoding = $enc; 21 $this->chunk_size = $chunk_size; 22 $this->bom = $bom; 23 $this->prologue = !$omit_prologue; 24 $this->tags = array(); 25 } 26 27 function start_element($parser, $name, $attrs) { 28 $attrs = array_map('bin2hex', $attrs); 29 $this->tags[] = bin2hex($name).": ".implode(', ', $attrs); 30 } 31 32 function end_element($parser, $name) { 33 } 34 35 function run() { 36 $data = ''; 37 38 if ($this->prologue) { 39 $canonical_name = preg_replace('/BE|LE/i', '', $this->encoding); 40 $data .= "<?xml version=\"1.0\" encoding=\"$canonical_name\" ?>\n"; 41 } 42 43 $data .= <<<HERE 44<テスト:テスト1 xmlns:テスト="http://www.example.com/テスト/" テスト="テスト"> 45 <テスト:テスト2 テスト="テスト"> 46 <テスト:テスト3> 47 test! 48 </テスト:テスト3> 49 </テスト:テスト2> 50</テスト:テスト1> 51HERE; 52 53 $data = iconv("UTF-8", $this->encoding, $data); 54 55 if ($this->bom) { 56 switch (strtoupper($this->encoding)) { 57 case 'UTF-8': 58 case 'UTF8': 59 $data = "\xef\xbb\xbf".$data; 60 break; 61 62 case 'UTF-16': 63 case 'UTF16': 64 case 'UTF-16BE': 65 case 'UTF16BE': 66 case 'UCS-2': 67 case 'UCS2': 68 case 'UCS-2BE': 69 case 'UCS2BE': 70 $data = "\xfe\xff".$data; 71 break; 72 73 case 'UTF-16LE': 74 case 'UTF16LE': 75 case 'UCS-2LE': 76 case 'UCS2LE': 77 $data = "\xff\xfe".$data; 78 break; 79 80 case 'UTF-32': 81 case 'UTF32': 82 case 'UTF-32BE': 83 case 'UTF32BE': 84 case 'UCS-4': 85 case 'UCS4': 86 case 'UCS-4BE': 87 case 'UCS4BE': 88 $data = "\x00\x00\xfe\xff".$data; 89 break; 90 91 case 'UTF-32LE': 92 case 'UTF32LE': 93 case 'UCS-4LE': 94 case 'UCS4LE': 95 $data = "\xff\xfe\x00\x00".$data; 96 break; 97 } 98 } 99 100 $parser = xml_parser_create(NULL); 101 xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0); 102 xml_set_element_handler($parser, "start_element", "end_element"); 103 xml_set_object($parser, $this); 104 105 if ($this->chunk_size == 0) { 106 $success = @xml_parse($parser, $data, true); 107 } else { 108 for ($offset = 0; $offset < strlen($data); 109 $offset += $this->chunk_size) { 110 $success = @xml_parse($parser, substr($data, $offset, $this->chunk_size), false); 111 if (!$success) { 112 break; 113 } 114 } 115 if ($success) { 116 $success = @xml_parse($parser, "", true); 117 } 118 } 119 120 echo "Encoding: $this->encoding\n"; 121 echo "XML Prologue: ".($this->prologue ? 'present': 'not present'), "\n"; 122 echo "Chunk size: ".($this->chunk_size ? "$this->chunk_size byte(s)\n": "all data at once\n"); 123 echo "BOM: ".($this->bom ? 'prepended': 'not prepended'), "\n"; 124 125 if ($success) { 126 var_dump($this->tags); 127 } else { 128 echo "[Error] ", xml_error_string(xml_get_error_code($parser)), "\n"; 129 } 130 } 131} 132$suite = array( 133 new testcase("UTF-8", 0, 0, 0), 134 new testcase("UTF-8", 0, 0, 1), 135 new testcase("UTF-8", 0, 1, 0), 136 new testcase("UTF-8", 0, 1, 1), 137 new testcase("UTF-16BE", 0, 0, 0), 138 new testcase("UTF-16BE", 0, 1, 0), 139 new testcase("UTF-16BE", 0, 1, 1), 140 new testcase("UTF-16LE", 0, 0, 0), 141 new testcase("UTF-16LE", 0, 1, 0), 142 new testcase("UTF-16LE", 0, 1, 1), 143 new testcase("UTF-8", 1, 0, 0), 144 new testcase("UTF-8", 1, 0, 1), 145 new testcase("UTF-8", 1, 1, 0), 146 new testcase("UTF-8", 1, 1, 1), 147 new testcase("UTF-16BE", 1, 0, 0), 148 new testcase("UTF-16BE", 1, 1, 0), 149 new testcase("UTF-16BE", 1, 1, 1), 150 new testcase("UTF-16LE", 1, 0, 0), 151 new testcase("UTF-16LE", 1, 1, 0), 152 new testcase("UTF-16LE", 1, 1, 1), 153); 154 155if (XML_SAX_IMPL == 'libxml') { 156 echo "libxml2 Version => " . LIBXML_DOTTED_VERSION. "\n"; 157} else { 158 echo "libxml2 Version => NONE\n"; 159} 160 161foreach ($suite as $testcase) { 162 $testcase->run(); 163} 164 165// vim600: sts=4 sw=4 ts=4 encoding=UTF-8 166?> 167--EXPECTF-- 168libxml2 Version => %s 169Encoding: UTF-8 170XML Prologue: present 171Chunk size: all data at once 172BOM: not prepended 173array(3) { 174 [0]=> 175 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 176 [1]=> 177 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 178 [2]=> 179 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 180} 181Encoding: UTF-8 182XML Prologue: not present 183Chunk size: all data at once 184BOM: not prepended 185array(3) { 186 [0]=> 187 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 188 [1]=> 189 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 190 [2]=> 191 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 192} 193Encoding: UTF-8 194XML Prologue: present 195Chunk size: all data at once 196BOM: prepended 197array(3) { 198 [0]=> 199 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 200 [1]=> 201 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 202 [2]=> 203 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 204} 205Encoding: UTF-8 206XML Prologue: not present 207Chunk size: all data at once 208BOM: prepended 209array(3) { 210 [0]=> 211 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 212 [1]=> 213 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 214 [2]=> 215 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 216} 217Encoding: UTF-16BE 218XML Prologue: present 219Chunk size: all data at once 220BOM: not prepended 221array(3) { 222 [0]=> 223 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 224 [1]=> 225 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 226 [2]=> 227 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 228} 229Encoding: UTF-16BE 230XML Prologue: present 231Chunk size: all data at once 232BOM: prepended 233array(3) { 234 [0]=> 235 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 236 [1]=> 237 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 238 [2]=> 239 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 240} 241Encoding: UTF-16BE 242XML Prologue: not present 243Chunk size: all data at once 244BOM: prepended 245array(3) { 246 [0]=> 247 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 248 [1]=> 249 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 250 [2]=> 251 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 252} 253Encoding: UTF-16LE 254XML Prologue: present 255Chunk size: all data at once 256BOM: not prepended 257array(3) { 258 [0]=> 259 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 260 [1]=> 261 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 262 [2]=> 263 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 264} 265Encoding: UTF-16LE 266XML Prologue: present 267Chunk size: all data at once 268BOM: prepended 269array(3) { 270 [0]=> 271 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 272 [1]=> 273 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 274 [2]=> 275 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 276} 277Encoding: UTF-16LE 278XML Prologue: not present 279Chunk size: all data at once 280BOM: prepended 281array(3) { 282 [0]=> 283 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 284 [1]=> 285 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 286 [2]=> 287 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 288} 289Encoding: UTF-8 290XML Prologue: present 291Chunk size: 1 byte(s) 292BOM: not prepended 293array(3) { 294 [0]=> 295 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 296 [1]=> 297 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 298 [2]=> 299 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 300} 301Encoding: UTF-8 302XML Prologue: not present 303Chunk size: 1 byte(s) 304BOM: not prepended 305array(3) { 306 [0]=> 307 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 308 [1]=> 309 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 310 [2]=> 311 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 312} 313Encoding: UTF-8 314XML Prologue: present 315Chunk size: 1 byte(s) 316BOM: prepended 317array(3) { 318 [0]=> 319 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 320 [1]=> 321 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 322 [2]=> 323 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 324} 325Encoding: UTF-8 326XML Prologue: not present 327Chunk size: 1 byte(s) 328BOM: prepended 329array(3) { 330 [0]=> 331 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 332 [1]=> 333 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 334 [2]=> 335 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 336} 337Encoding: UTF-16BE 338XML Prologue: present 339Chunk size: 1 byte(s) 340BOM: not prepended 341array(3) { 342 [0]=> 343 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 344 [1]=> 345 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 346 [2]=> 347 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 348} 349Encoding: UTF-16BE 350XML Prologue: present 351Chunk size: 1 byte(s) 352BOM: prepended 353array(3) { 354 [0]=> 355 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 356 [1]=> 357 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 358 [2]=> 359 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 360} 361Encoding: UTF-16BE 362XML Prologue: not present 363Chunk size: 1 byte(s) 364BOM: prepended 365array(3) { 366 [0]=> 367 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 368 [1]=> 369 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 370 [2]=> 371 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 372} 373Encoding: UTF-16LE 374XML Prologue: present 375Chunk size: 1 byte(s) 376BOM: not prepended 377array(3) { 378 [0]=> 379 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 380 [1]=> 381 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 382 [2]=> 383 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 384} 385Encoding: UTF-16LE 386XML Prologue: present 387Chunk size: 1 byte(s) 388BOM: prepended 389array(3) { 390 [0]=> 391 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 392 [1]=> 393 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 394 [2]=> 395 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 396} 397Encoding: UTF-16LE 398XML Prologue: not present 399Chunk size: 1 byte(s) 400BOM: prepended 401array(3) { 402 [0]=> 403 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 404 [1]=> 405 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 406 [2]=> 407 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 408} 409