xref: /PHP-5.5/ext/xml/tests/bug32001.phpt (revision 948d84fd)
1--TEST--
2Bug #32001 (xml_parse*() goes into infinite loop when autodetection in effect), using UTF-*
3--SKIPIF--
4<?php
5require_once("skipif.inc");
6if (!extension_loaded('iconv')) die ("skip iconv extension not available");
7?>
8--FILE--
9<?php
10class testcase {
11	private $encoding;
12	private $bom;
13	private $prologue;
14	private $tags;
15	private $chunk_size;
16
17	function testcase($enc, $chunk_size = 0, $bom = 0, $omit_prologue = 0) {
18		$this->encoding = $enc;
19		$this->chunk_size = $chunk_size;
20		$this->bom = $bom;
21		$this->prologue = !$omit_prologue;
22		$this->tags = array();
23	}
24
25	function start_element($parser, $name, $attrs) {
26		$attrs = array_map('bin2hex', $attrs);
27		$this->tags[] = bin2hex($name).": ".implode(', ', $attrs);
28	}
29
30	function end_element($parser, $name) {
31	}
32
33	function run() {
34		$data = '';
35
36		if ($this->prologue) {
37			$canonical_name = preg_replace('/BE|LE/i', '', $this->encoding);
38			$data .= "<?xml version=\"1.0\" encoding=\"$canonical_name\" ?>\n";
39		}
40
41		$data .= <<<HERE
42<テスト:テスト1 xmlns:テスト="http://www.example.com/テスト/" テスト="テスト">
43  <テスト:テスト2 テスト="テスト">
44	<テスト:テスト3>
45	  test!
46	</テスト:テスト3>
47  </テスト:テスト2>
48</テスト:テスト1>
49HERE;
50
51		$data = iconv("UTF-8", $this->encoding, $data);
52
53		if ($this->bom) {
54			switch (strtoupper($this->encoding)) {
55				case 'UTF-8':
56				case 'UTF8':
57					$data = "\xef\xbb\xbf".$data;
58					break;
59
60				case 'UTF-16':
61				case 'UTF16':
62				case 'UTF-16BE':
63				case 'UTF16BE':
64				case 'UCS-2':
65				case 'UCS2':
66				case 'UCS-2BE':
67				case 'UCS2BE':
68					$data = "\xfe\xff".$data;
69					break;
70
71				case 'UTF-16LE':
72				case 'UTF16LE':
73				case 'UCS-2LE':
74				case 'UCS2LE':
75					$data = "\xff\xfe".$data;
76					break;
77
78				case 'UTF-32':
79				case 'UTF32':
80				case 'UTF-32BE':
81				case 'UTF32BE':
82				case 'UCS-4':
83				case 'UCS4':
84				case 'UCS-4BE':
85				case 'UCS4BE':
86					$data = "\x00\x00\xfe\xff".$data;
87					break;
88
89				case 'UTF-32LE':
90				case 'UTF32LE':
91				case 'UCS-4LE':
92				case 'UCS4LE':
93					$data = "\xff\xfe\x00\x00".$data;
94					break;
95			}
96		}
97
98		$parser = xml_parser_create(NULL);
99		xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0);
100		xml_set_element_handler($parser, "start_element", "end_element");
101		xml_set_object($parser, $this);
102
103		if ($this->chunk_size == 0) {
104			$success = @xml_parse($parser, $data, true);
105		} else {
106			for ($offset = 0; $offset < strlen($data);
107					$offset += $this->chunk_size) {
108				$success = @xml_parse($parser, substr($data, $offset, $this->chunk_size), false);
109				if (!$success) {
110					break;
111				}
112			}
113			if ($success) {
114				$success = @xml_parse($parser, "", true);
115			}
116		}
117
118		echo "Encoding: $this->encoding\n";
119		echo "XML Prologue: ".($this->prologue ? 'present': 'not present'), "\n";
120		echo "Chunk size: ".($this->chunk_size ? "$this->chunk_size byte(s)\n": "all data at once\n");
121		echo "BOM: ".($this->bom ? 'prepended': 'not prepended'), "\n";
122
123		if ($success) {
124			var_dump($this->tags);
125		} else {
126			echo "[Error] ", xml_error_string(xml_get_error_code($parser)), "\n";
127		}
128	}
129}
130$suite = array(
131	new testcase("UTF-8",     0, 0, 0),
132	new testcase("UTF-8",     0, 0, 1),
133	new testcase("UTF-8",     0, 1, 0),
134	new testcase("UTF-8",     0, 1, 1),
135	new testcase("UTF-16BE",  0, 0, 0),
136	new testcase("UTF-16BE",  0, 1, 0),
137	new testcase("UTF-16BE",  0, 1, 1),
138	new testcase("UTF-16LE",  0, 0, 0),
139	new testcase("UTF-16LE",  0, 1, 0),
140	new testcase("UTF-16LE",  0, 1, 1),
141	new testcase("UTF-8",     1, 0, 0),
142	new testcase("UTF-8",     1, 0, 1),
143	new testcase("UTF-8",     1, 1, 0),
144	new testcase("UTF-8",     1, 1, 1),
145	new testcase("UTF-16BE",  1, 0, 0),
146	new testcase("UTF-16BE",  1, 1, 0),
147	new testcase("UTF-16BE",  1, 1, 1),
148	new testcase("UTF-16LE",  1, 0, 0),
149	new testcase("UTF-16LE",  1, 1, 0),
150	new testcase("UTF-16LE",  1, 1, 1),
151);
152
153if (XML_SAX_IMPL == 'libxml') {
154  echo "libxml2 Version => " . LIBXML_DOTTED_VERSION. "\n";
155} else {
156  echo "libxml2 Version => NONE\n";
157}
158
159foreach ($suite as $testcase) {
160	$testcase->run();
161}
162
163// vim600: sts=4 sw=4 ts=4 encoding=UTF-8
164?>
165--EXPECTF--
166libxml2 Version => %s
167Encoding: UTF-8
168XML Prologue: present
169Chunk size: all data at once
170BOM: not prepended
171array(3) {
172  [0]=>
173  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
174  [1]=>
175  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
176  [2]=>
177  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
178}
179Encoding: UTF-8
180XML Prologue: not present
181Chunk size: all data at once
182BOM: not prepended
183array(3) {
184  [0]=>
185  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
186  [1]=>
187  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
188  [2]=>
189  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
190}
191Encoding: UTF-8
192XML Prologue: present
193Chunk size: all data at once
194BOM: prepended
195array(3) {
196  [0]=>
197  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
198  [1]=>
199  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
200  [2]=>
201  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
202}
203Encoding: UTF-8
204XML Prologue: not present
205Chunk size: all data at once
206BOM: prepended
207array(3) {
208  [0]=>
209  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
210  [1]=>
211  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
212  [2]=>
213  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
214}
215Encoding: UTF-16BE
216XML Prologue: present
217Chunk size: all data at once
218BOM: not prepended
219array(3) {
220  [0]=>
221  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
222  [1]=>
223  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
224  [2]=>
225  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
226}
227Encoding: UTF-16BE
228XML Prologue: present
229Chunk size: all data at once
230BOM: prepended
231array(3) {
232  [0]=>
233  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
234  [1]=>
235  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
236  [2]=>
237  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
238}
239Encoding: UTF-16BE
240XML Prologue: not present
241Chunk size: all data at once
242BOM: prepended
243array(3) {
244  [0]=>
245  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
246  [1]=>
247  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
248  [2]=>
249  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
250}
251Encoding: UTF-16LE
252XML Prologue: present
253Chunk size: all data at once
254BOM: not prepended
255array(3) {
256  [0]=>
257  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
258  [1]=>
259  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
260  [2]=>
261  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
262}
263Encoding: UTF-16LE
264XML Prologue: present
265Chunk size: all data at once
266BOM: prepended
267array(3) {
268  [0]=>
269  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
270  [1]=>
271  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
272  [2]=>
273  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
274}
275Encoding: UTF-16LE
276XML Prologue: not present
277Chunk size: all data at once
278BOM: prepended
279array(3) {
280  [0]=>
281  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
282  [1]=>
283  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
284  [2]=>
285  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
286}
287Encoding: UTF-8
288XML Prologue: present
289Chunk size: 1 byte(s)
290BOM: not prepended
291array(3) {
292  [0]=>
293  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
294  [1]=>
295  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
296  [2]=>
297  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
298}
299Encoding: UTF-8
300XML Prologue: not present
301Chunk size: 1 byte(s)
302BOM: not prepended
303array(3) {
304  [0]=>
305  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
306  [1]=>
307  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
308  [2]=>
309  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
310}
311Encoding: UTF-8
312XML Prologue: present
313Chunk size: 1 byte(s)
314BOM: prepended
315array(3) {
316  [0]=>
317  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
318  [1]=>
319  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
320  [2]=>
321  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
322}
323Encoding: UTF-8
324XML Prologue: not present
325Chunk size: 1 byte(s)
326BOM: prepended
327array(3) {
328  [0]=>
329  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
330  [1]=>
331  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
332  [2]=>
333  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
334}
335Encoding: UTF-16BE
336XML Prologue: present
337Chunk size: 1 byte(s)
338BOM: not prepended
339array(3) {
340  [0]=>
341  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
342  [1]=>
343  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
344  [2]=>
345  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
346}
347Encoding: UTF-16BE
348XML Prologue: present
349Chunk size: 1 byte(s)
350BOM: prepended
351array(3) {
352  [0]=>
353  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
354  [1]=>
355  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
356  [2]=>
357  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
358}
359Encoding: UTF-16BE
360XML Prologue: not present
361Chunk size: 1 byte(s)
362BOM: prepended
363array(3) {
364  [0]=>
365  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
366  [1]=>
367  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
368  [2]=>
369  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
370}
371Encoding: UTF-16LE
372XML Prologue: present
373Chunk size: 1 byte(s)
374BOM: not prepended
375array(3) {
376  [0]=>
377  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
378  [1]=>
379  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
380  [2]=>
381  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
382}
383Encoding: UTF-16LE
384XML Prologue: present
385Chunk size: 1 byte(s)
386BOM: prepended
387array(3) {
388  [0]=>
389  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
390  [1]=>
391  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
392  [2]=>
393  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
394}
395Encoding: UTF-16LE
396XML Prologue: not present
397Chunk size: 1 byte(s)
398BOM: prepended
399array(3) {
400  [0]=>
401  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
402  [1]=>
403  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
404  [2]=>
405  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
406}
407