1--TEST-- 2Bug #32001 (xml_parse*() goes into infinite loop when autodetection in effect), using UTF-* 3--EXTENSIONS-- 4iconv 5xml 6--SKIPIF-- 7<?php 8if (ICONV_IMPL == 'glibc' && version_compare(ICONV_VERSION, '2.12', '<=')) 9 die("skip iconv of glibc <= 2.12 is buggy"); 10?> 11--FILE-- 12<?php 13class testcase { 14 private $encoding; 15 private $bom; 16 private $prologue; 17 private $tags; 18 private $chunk_size; 19 20 function __construct($enc, $chunk_size = 0, $bom = 0, $omit_prologue = 0) { 21 $this->encoding = $enc; 22 $this->chunk_size = $chunk_size; 23 $this->bom = $bom; 24 $this->prologue = !$omit_prologue; 25 $this->tags = array(); 26 } 27 28 function start_element($parser, $name, $attrs) { 29 $attrs = array_map('bin2hex', $attrs); 30 $this->tags[] = bin2hex($name).": ".implode(', ', $attrs); 31 } 32 33 function end_element($parser, $name) { 34 } 35 36 function run() { 37 $data = ''; 38 39 if ($this->prologue) { 40 $canonical_name = preg_replace('/BE|LE/i', '', $this->encoding); 41 $data .= "<?xml version=\"1.0\" encoding=\"$canonical_name\" ?>\n"; 42 } 43 44 $data .= <<<HERE 45<テスト:テスト1 xmlns:テスト="http://www.example.com/テスト/" テスト="テスト"> 46 <テスト:テスト2 テスト="テスト"> 47 <テスト:テスト3> 48 test! 49 </テスト:テスト3> 50 </テスト:テスト2> 51</テスト:テスト1> 52HERE; 53 54 $data = iconv("UTF-8", $this->encoding, $data); 55 56 if ($this->bom) { 57 switch (strtoupper($this->encoding)) { 58 case 'UTF-8': 59 case 'UTF8': 60 $data = "\xef\xbb\xbf".$data; 61 break; 62 63 case 'UTF-16': 64 case 'UTF16': 65 case 'UTF-16BE': 66 case 'UTF16BE': 67 case 'UCS-2': 68 case 'UCS2': 69 case 'UCS-2BE': 70 case 'UCS2BE': 71 $data = "\xfe\xff".$data; 72 break; 73 74 case 'UTF-16LE': 75 case 'UTF16LE': 76 case 'UCS-2LE': 77 case 'UCS2LE': 78 $data = "\xff\xfe".$data; 79 break; 80 81 case 'UTF-32': 82 case 'UTF32': 83 case 'UTF-32BE': 84 case 'UTF32BE': 85 case 'UCS-4': 86 case 'UCS4': 87 case 'UCS-4BE': 88 case 'UCS4BE': 89 $data = "\x00\x00\xfe\xff".$data; 90 break; 91 92 case 'UTF-32LE': 93 case 'UTF32LE': 94 case 'UCS-4LE': 95 case 'UCS4LE': 96 $data = "\xff\xfe\x00\x00".$data; 97 break; 98 } 99 } 100 101 $parser = xml_parser_create(NULL); 102 xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0); 103 xml_set_element_handler($parser, "start_element", "end_element"); 104 xml_set_object($parser, $this); 105 106 if ($this->chunk_size == 0) { 107 $success = @xml_parse($parser, $data, true); 108 } else { 109 for ($offset = 0; $offset < strlen($data); 110 $offset += $this->chunk_size) { 111 $success = @xml_parse($parser, substr($data, $offset, $this->chunk_size), false); 112 if (!$success) { 113 break; 114 } 115 } 116 if ($success) { 117 $success = @xml_parse($parser, "", true); 118 } 119 } 120 121 echo "Encoding: $this->encoding\n"; 122 echo "XML Prologue: ".($this->prologue ? 'present': 'not present'), "\n"; 123 echo "Chunk size: ".($this->chunk_size ? "$this->chunk_size byte(s)\n": "all data at once\n"); 124 echo "BOM: ".($this->bom ? 'prepended': 'not prepended'), "\n"; 125 126 if ($success) { 127 var_dump($this->tags); 128 } else { 129 echo "[Error] ", xml_error_string(xml_get_error_code($parser)), "\n"; 130 } 131 } 132} 133$suite = array( 134 new testcase("UTF-8", 0, 0, 0), 135 new testcase("UTF-8", 0, 0, 1), 136 new testcase("UTF-8", 0, 1, 0), 137 new testcase("UTF-8", 0, 1, 1), 138 new testcase("UTF-16BE", 0, 0, 0), 139 new testcase("UTF-16BE", 0, 1, 0), 140 new testcase("UTF-16BE", 0, 1, 1), 141 new testcase("UTF-16LE", 0, 0, 0), 142 new testcase("UTF-16LE", 0, 1, 0), 143 new testcase("UTF-16LE", 0, 1, 1), 144 new testcase("UTF-8", 1, 0, 0), 145 new testcase("UTF-8", 1, 0, 1), 146 new testcase("UTF-8", 1, 1, 0), 147 new testcase("UTF-8", 1, 1, 1), 148 new testcase("UTF-16BE", 1, 0, 0), 149 new testcase("UTF-16BE", 1, 1, 0), 150 new testcase("UTF-16BE", 1, 1, 1), 151 new testcase("UTF-16LE", 1, 0, 0), 152 new testcase("UTF-16LE", 1, 1, 0), 153 new testcase("UTF-16LE", 1, 1, 1), 154); 155 156if (XML_SAX_IMPL == 'libxml') { 157 echo "libxml2 Version => " . LIBXML_DOTTED_VERSION. "\n"; 158} else { 159 echo "libxml2 Version => NONE\n"; 160} 161 162foreach ($suite as $testcase) { 163 $testcase->run(); 164} 165 166?> 167--EXPECTF-- 168libxml2 Version => %s 169Encoding: UTF-8 170XML Prologue: present 171Chunk size: all data at once 172BOM: not prepended 173array(3) { 174 [0]=> 175 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 176 [1]=> 177 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 178 [2]=> 179 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 180} 181Encoding: UTF-8 182XML Prologue: not present 183Chunk size: all data at once 184BOM: not prepended 185array(3) { 186 [0]=> 187 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 188 [1]=> 189 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 190 [2]=> 191 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 192} 193Encoding: UTF-8 194XML Prologue: present 195Chunk size: all data at once 196BOM: prepended 197array(3) { 198 [0]=> 199 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 200 [1]=> 201 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 202 [2]=> 203 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 204} 205Encoding: UTF-8 206XML Prologue: not present 207Chunk size: all data at once 208BOM: prepended 209array(3) { 210 [0]=> 211 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 212 [1]=> 213 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 214 [2]=> 215 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 216} 217Encoding: UTF-16BE 218XML Prologue: present 219Chunk size: all data at once 220BOM: not prepended 221array(3) { 222 [0]=> 223 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 224 [1]=> 225 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 226 [2]=> 227 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 228} 229Encoding: UTF-16BE 230XML Prologue: present 231Chunk size: all data at once 232BOM: prepended 233array(3) { 234 [0]=> 235 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 236 [1]=> 237 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 238 [2]=> 239 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 240} 241Encoding: UTF-16BE 242XML Prologue: not present 243Chunk size: all data at once 244BOM: prepended 245array(3) { 246 [0]=> 247 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 248 [1]=> 249 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 250 [2]=> 251 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 252} 253Encoding: UTF-16LE 254XML Prologue: present 255Chunk size: all data at once 256BOM: not prepended 257array(3) { 258 [0]=> 259 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 260 [1]=> 261 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 262 [2]=> 263 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 264} 265Encoding: UTF-16LE 266XML Prologue: present 267Chunk size: all data at once 268BOM: prepended 269array(3) { 270 [0]=> 271 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 272 [1]=> 273 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 274 [2]=> 275 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 276} 277Encoding: UTF-16LE 278XML Prologue: not present 279Chunk size: all data at once 280BOM: prepended 281array(3) { 282 [0]=> 283 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 284 [1]=> 285 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 286 [2]=> 287 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 288} 289Encoding: UTF-8 290XML Prologue: present 291Chunk size: 1 byte(s) 292BOM: not prepended 293array(3) { 294 [0]=> 295 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 296 [1]=> 297 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 298 [2]=> 299 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 300} 301Encoding: UTF-8 302XML Prologue: not present 303Chunk size: 1 byte(s) 304BOM: not prepended 305array(3) { 306 [0]=> 307 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 308 [1]=> 309 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 310 [2]=> 311 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 312} 313Encoding: UTF-8 314XML Prologue: present 315Chunk size: 1 byte(s) 316BOM: prepended 317array(3) { 318 [0]=> 319 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 320 [1]=> 321 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 322 [2]=> 323 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 324} 325Encoding: UTF-8 326XML Prologue: not present 327Chunk size: 1 byte(s) 328BOM: prepended 329array(3) { 330 [0]=> 331 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 332 [1]=> 333 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 334 [2]=> 335 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 336} 337Encoding: UTF-16BE 338XML Prologue: present 339Chunk size: 1 byte(s) 340BOM: not prepended 341array(3) { 342 [0]=> 343 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 344 [1]=> 345 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 346 [2]=> 347 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 348} 349Encoding: UTF-16BE 350XML Prologue: present 351Chunk size: 1 byte(s) 352BOM: prepended 353array(3) { 354 [0]=> 355 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 356 [1]=> 357 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 358 [2]=> 359 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 360} 361Encoding: UTF-16BE 362XML Prologue: not present 363Chunk size: 1 byte(s) 364BOM: prepended 365array(3) { 366 [0]=> 367 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 368 [1]=> 369 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 370 [2]=> 371 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 372} 373Encoding: UTF-16LE 374XML Prologue: present 375Chunk size: 1 byte(s) 376BOM: not prepended 377array(3) { 378 [0]=> 379 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 380 [1]=> 381 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 382 [2]=> 383 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 384} 385Encoding: UTF-16LE 386XML Prologue: present 387Chunk size: 1 byte(s) 388BOM: prepended 389array(3) { 390 [0]=> 391 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 392 [1]=> 393 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 394 [2]=> 395 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 396} 397Encoding: UTF-16LE 398XML Prologue: not present 399Chunk size: 1 byte(s) 400BOM: prepended 401array(3) { 402 [0]=> 403 string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388" 404 [1]=> 405 string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388" 406 [2]=> 407 string(42) "e38386e382b9e383883ae38386e382b9e3838833: " 408} 409