xref: /PHP-7.4/ext/xml/tests/bug32001.phpt (revision 92ac598a)
1--TEST--
2Bug #32001 (xml_parse*() goes into infinite loop when autodetection in effect), using UTF-*
3--SKIPIF--
4<?php
5require_once("skipif.inc");
6if (!extension_loaded('iconv')) die ("skip iconv extension not available");
7if (ICONV_IMPL == 'glibc' && version_compare(ICONV_VERSION, '2.12', '<='))
8	die("skip iconv of glibc <= 2.12 is buggy");
9?>
10--FILE--
11<?php
12class testcase {
13	private $encoding;
14	private $bom;
15	private $prologue;
16	private $tags;
17	private $chunk_size;
18
19	function __construct($enc, $chunk_size = 0, $bom = 0, $omit_prologue = 0) {
20		$this->encoding = $enc;
21		$this->chunk_size = $chunk_size;
22		$this->bom = $bom;
23		$this->prologue = !$omit_prologue;
24		$this->tags = array();
25	}
26
27	function start_element($parser, $name, $attrs) {
28		$attrs = array_map('bin2hex', $attrs);
29		$this->tags[] = bin2hex($name).": ".implode(', ', $attrs);
30	}
31
32	function end_element($parser, $name) {
33	}
34
35	function run() {
36		$data = '';
37
38		if ($this->prologue) {
39			$canonical_name = preg_replace('/BE|LE/i', '', $this->encoding);
40			$data .= "<?xml version=\"1.0\" encoding=\"$canonical_name\" ?>\n";
41		}
42
43		$data .= <<<HERE
44<テスト:テスト1 xmlns:テスト="http://www.example.com/テスト/" テスト="テスト">
45  <テスト:テスト2 テスト="テスト">
46	<テスト:テスト3>
47	  test!
48	</テスト:テスト3>
49  </テスト:テスト2>
50</テスト:テスト1>
51HERE;
52
53		$data = iconv("UTF-8", $this->encoding, $data);
54
55		if ($this->bom) {
56			switch (strtoupper($this->encoding)) {
57				case 'UTF-8':
58				case 'UTF8':
59					$data = "\xef\xbb\xbf".$data;
60					break;
61
62				case 'UTF-16':
63				case 'UTF16':
64				case 'UTF-16BE':
65				case 'UTF16BE':
66				case 'UCS-2':
67				case 'UCS2':
68				case 'UCS-2BE':
69				case 'UCS2BE':
70					$data = "\xfe\xff".$data;
71					break;
72
73				case 'UTF-16LE':
74				case 'UTF16LE':
75				case 'UCS-2LE':
76				case 'UCS2LE':
77					$data = "\xff\xfe".$data;
78					break;
79
80				case 'UTF-32':
81				case 'UTF32':
82				case 'UTF-32BE':
83				case 'UTF32BE':
84				case 'UCS-4':
85				case 'UCS4':
86				case 'UCS-4BE':
87				case 'UCS4BE':
88					$data = "\x00\x00\xfe\xff".$data;
89					break;
90
91				case 'UTF-32LE':
92				case 'UTF32LE':
93				case 'UCS-4LE':
94				case 'UCS4LE':
95					$data = "\xff\xfe\x00\x00".$data;
96					break;
97			}
98		}
99
100		$parser = xml_parser_create(NULL);
101		xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0);
102		xml_set_element_handler($parser, "start_element", "end_element");
103		xml_set_object($parser, $this);
104
105		if ($this->chunk_size == 0) {
106			$success = @xml_parse($parser, $data, true);
107		} else {
108			for ($offset = 0; $offset < strlen($data);
109					$offset += $this->chunk_size) {
110				$success = @xml_parse($parser, substr($data, $offset, $this->chunk_size), false);
111				if (!$success) {
112					break;
113				}
114			}
115			if ($success) {
116				$success = @xml_parse($parser, "", true);
117			}
118		}
119
120		echo "Encoding: $this->encoding\n";
121		echo "XML Prologue: ".($this->prologue ? 'present': 'not present'), "\n";
122		echo "Chunk size: ".($this->chunk_size ? "$this->chunk_size byte(s)\n": "all data at once\n");
123		echo "BOM: ".($this->bom ? 'prepended': 'not prepended'), "\n";
124
125		if ($success) {
126			var_dump($this->tags);
127		} else {
128			echo "[Error] ", xml_error_string(xml_get_error_code($parser)), "\n";
129		}
130	}
131}
132$suite = array(
133	new testcase("UTF-8",     0, 0, 0),
134	new testcase("UTF-8",     0, 0, 1),
135	new testcase("UTF-8",     0, 1, 0),
136	new testcase("UTF-8",     0, 1, 1),
137	new testcase("UTF-16BE",  0, 0, 0),
138	new testcase("UTF-16BE",  0, 1, 0),
139	new testcase("UTF-16BE",  0, 1, 1),
140	new testcase("UTF-16LE",  0, 0, 0),
141	new testcase("UTF-16LE",  0, 1, 0),
142	new testcase("UTF-16LE",  0, 1, 1),
143	new testcase("UTF-8",     1, 0, 0),
144	new testcase("UTF-8",     1, 0, 1),
145	new testcase("UTF-8",     1, 1, 0),
146	new testcase("UTF-8",     1, 1, 1),
147	new testcase("UTF-16BE",  1, 0, 0),
148	new testcase("UTF-16BE",  1, 1, 0),
149	new testcase("UTF-16BE",  1, 1, 1),
150	new testcase("UTF-16LE",  1, 0, 0),
151	new testcase("UTF-16LE",  1, 1, 0),
152	new testcase("UTF-16LE",  1, 1, 1),
153);
154
155if (XML_SAX_IMPL == 'libxml') {
156  echo "libxml2 Version => " . LIBXML_DOTTED_VERSION. "\n";
157} else {
158  echo "libxml2 Version => NONE\n";
159}
160
161foreach ($suite as $testcase) {
162	$testcase->run();
163}
164
165?>
166--EXPECTF--
167libxml2 Version => %s
168Encoding: UTF-8
169XML Prologue: present
170Chunk size: all data at once
171BOM: not prepended
172array(3) {
173  [0]=>
174  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
175  [1]=>
176  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
177  [2]=>
178  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
179}
180Encoding: UTF-8
181XML Prologue: not present
182Chunk size: all data at once
183BOM: not prepended
184array(3) {
185  [0]=>
186  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
187  [1]=>
188  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
189  [2]=>
190  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
191}
192Encoding: UTF-8
193XML Prologue: present
194Chunk size: all data at once
195BOM: prepended
196array(3) {
197  [0]=>
198  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
199  [1]=>
200  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
201  [2]=>
202  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
203}
204Encoding: UTF-8
205XML Prologue: not present
206Chunk size: all data at once
207BOM: prepended
208array(3) {
209  [0]=>
210  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
211  [1]=>
212  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
213  [2]=>
214  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
215}
216Encoding: UTF-16BE
217XML Prologue: present
218Chunk size: all data at once
219BOM: not prepended
220array(3) {
221  [0]=>
222  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
223  [1]=>
224  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
225  [2]=>
226  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
227}
228Encoding: UTF-16BE
229XML Prologue: present
230Chunk size: all data at once
231BOM: prepended
232array(3) {
233  [0]=>
234  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
235  [1]=>
236  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
237  [2]=>
238  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
239}
240Encoding: UTF-16BE
241XML Prologue: not present
242Chunk size: all data at once
243BOM: prepended
244array(3) {
245  [0]=>
246  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
247  [1]=>
248  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
249  [2]=>
250  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
251}
252Encoding: UTF-16LE
253XML Prologue: present
254Chunk size: all data at once
255BOM: not prepended
256array(3) {
257  [0]=>
258  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
259  [1]=>
260  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
261  [2]=>
262  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
263}
264Encoding: UTF-16LE
265XML Prologue: present
266Chunk size: all data at once
267BOM: prepended
268array(3) {
269  [0]=>
270  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
271  [1]=>
272  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
273  [2]=>
274  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
275}
276Encoding: UTF-16LE
277XML Prologue: not present
278Chunk size: all data at once
279BOM: prepended
280array(3) {
281  [0]=>
282  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
283  [1]=>
284  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
285  [2]=>
286  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
287}
288Encoding: UTF-8
289XML Prologue: present
290Chunk size: 1 byte(s)
291BOM: not prepended
292array(3) {
293  [0]=>
294  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
295  [1]=>
296  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
297  [2]=>
298  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
299}
300Encoding: UTF-8
301XML Prologue: not present
302Chunk size: 1 byte(s)
303BOM: not prepended
304array(3) {
305  [0]=>
306  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
307  [1]=>
308  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
309  [2]=>
310  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
311}
312Encoding: UTF-8
313XML Prologue: present
314Chunk size: 1 byte(s)
315BOM: prepended
316array(3) {
317  [0]=>
318  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
319  [1]=>
320  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
321  [2]=>
322  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
323}
324Encoding: UTF-8
325XML Prologue: not present
326Chunk size: 1 byte(s)
327BOM: prepended
328array(3) {
329  [0]=>
330  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
331  [1]=>
332  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
333  [2]=>
334  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
335}
336Encoding: UTF-16BE
337XML Prologue: present
338Chunk size: 1 byte(s)
339BOM: not prepended
340array(3) {
341  [0]=>
342  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
343  [1]=>
344  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
345  [2]=>
346  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
347}
348Encoding: UTF-16BE
349XML Prologue: present
350Chunk size: 1 byte(s)
351BOM: prepended
352array(3) {
353  [0]=>
354  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
355  [1]=>
356  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
357  [2]=>
358  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
359}
360Encoding: UTF-16BE
361XML Prologue: not present
362Chunk size: 1 byte(s)
363BOM: prepended
364array(3) {
365  [0]=>
366  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
367  [1]=>
368  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
369  [2]=>
370  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
371}
372Encoding: UTF-16LE
373XML Prologue: present
374Chunk size: 1 byte(s)
375BOM: not prepended
376array(3) {
377  [0]=>
378  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
379  [1]=>
380  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
381  [2]=>
382  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
383}
384Encoding: UTF-16LE
385XML Prologue: present
386Chunk size: 1 byte(s)
387BOM: prepended
388array(3) {
389  [0]=>
390  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
391  [1]=>
392  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
393  [2]=>
394  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
395}
396Encoding: UTF-16LE
397XML Prologue: not present
398Chunk size: 1 byte(s)
399BOM: prepended
400array(3) {
401  [0]=>
402  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
403  [1]=>
404  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
405  [2]=>
406  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
407}
408