xref: /PHP-7.3/ext/xml/tests/bug32001.phpt (revision 782352c5)
1--TEST--
2Bug #32001 (xml_parse*() goes into infinite loop when autodetection in effect), using UTF-*
3--SKIPIF--
4<?php
5require_once("skipif.inc");
6if (!extension_loaded('iconv')) die ("skip iconv extension not available");
7if (ICONV_IMPL == 'glibc' && version_compare(ICONV_VERSION, '2.12', '<='))
8	die("skip iconv of glibc <= 2.12 is buggy");
9?>
10--FILE--
11<?php
12class testcase {
13	private $encoding;
14	private $bom;
15	private $prologue;
16	private $tags;
17	private $chunk_size;
18
19	function __construct($enc, $chunk_size = 0, $bom = 0, $omit_prologue = 0) {
20		$this->encoding = $enc;
21		$this->chunk_size = $chunk_size;
22		$this->bom = $bom;
23		$this->prologue = !$omit_prologue;
24		$this->tags = array();
25	}
26
27	function start_element($parser, $name, $attrs) {
28		$attrs = array_map('bin2hex', $attrs);
29		$this->tags[] = bin2hex($name).": ".implode(', ', $attrs);
30	}
31
32	function end_element($parser, $name) {
33	}
34
35	function run() {
36		$data = '';
37
38		if ($this->prologue) {
39			$canonical_name = preg_replace('/BE|LE/i', '', $this->encoding);
40			$data .= "<?xml version=\"1.0\" encoding=\"$canonical_name\" ?>\n";
41		}
42
43		$data .= <<<HERE
44<テスト:テスト1 xmlns:テスト="http://www.example.com/テスト/" テスト="テスト">
45  <テスト:テスト2 テスト="テスト">
46	<テスト:テスト3>
47	  test!
48	</テスト:テスト3>
49  </テスト:テスト2>
50</テスト:テスト1>
51HERE;
52
53		$data = iconv("UTF-8", $this->encoding, $data);
54
55		if ($this->bom) {
56			switch (strtoupper($this->encoding)) {
57				case 'UTF-8':
58				case 'UTF8':
59					$data = "\xef\xbb\xbf".$data;
60					break;
61
62				case 'UTF-16':
63				case 'UTF16':
64				case 'UTF-16BE':
65				case 'UTF16BE':
66				case 'UCS-2':
67				case 'UCS2':
68				case 'UCS-2BE':
69				case 'UCS2BE':
70					$data = "\xfe\xff".$data;
71					break;
72
73				case 'UTF-16LE':
74				case 'UTF16LE':
75				case 'UCS-2LE':
76				case 'UCS2LE':
77					$data = "\xff\xfe".$data;
78					break;
79
80				case 'UTF-32':
81				case 'UTF32':
82				case 'UTF-32BE':
83				case 'UTF32BE':
84				case 'UCS-4':
85				case 'UCS4':
86				case 'UCS-4BE':
87				case 'UCS4BE':
88					$data = "\x00\x00\xfe\xff".$data;
89					break;
90
91				case 'UTF-32LE':
92				case 'UTF32LE':
93				case 'UCS-4LE':
94				case 'UCS4LE':
95					$data = "\xff\xfe\x00\x00".$data;
96					break;
97			}
98		}
99
100		$parser = xml_parser_create(NULL);
101		xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0);
102		xml_set_element_handler($parser, "start_element", "end_element");
103		xml_set_object($parser, $this);
104
105		if ($this->chunk_size == 0) {
106			$success = @xml_parse($parser, $data, true);
107		} else {
108			for ($offset = 0; $offset < strlen($data);
109					$offset += $this->chunk_size) {
110				$success = @xml_parse($parser, substr($data, $offset, $this->chunk_size), false);
111				if (!$success) {
112					break;
113				}
114			}
115			if ($success) {
116				$success = @xml_parse($parser, "", true);
117			}
118		}
119
120		echo "Encoding: $this->encoding\n";
121		echo "XML Prologue: ".($this->prologue ? 'present': 'not present'), "\n";
122		echo "Chunk size: ".($this->chunk_size ? "$this->chunk_size byte(s)\n": "all data at once\n");
123		echo "BOM: ".($this->bom ? 'prepended': 'not prepended'), "\n";
124
125		if ($success) {
126			var_dump($this->tags);
127		} else {
128			echo "[Error] ", xml_error_string(xml_get_error_code($parser)), "\n";
129		}
130	}
131}
132$suite = array(
133	new testcase("UTF-8",     0, 0, 0),
134	new testcase("UTF-8",     0, 0, 1),
135	new testcase("UTF-8",     0, 1, 0),
136	new testcase("UTF-8",     0, 1, 1),
137	new testcase("UTF-16BE",  0, 0, 0),
138	new testcase("UTF-16BE",  0, 1, 0),
139	new testcase("UTF-16BE",  0, 1, 1),
140	new testcase("UTF-16LE",  0, 0, 0),
141	new testcase("UTF-16LE",  0, 1, 0),
142	new testcase("UTF-16LE",  0, 1, 1),
143	new testcase("UTF-8",     1, 0, 0),
144	new testcase("UTF-8",     1, 0, 1),
145	new testcase("UTF-8",     1, 1, 0),
146	new testcase("UTF-8",     1, 1, 1),
147	new testcase("UTF-16BE",  1, 0, 0),
148	new testcase("UTF-16BE",  1, 1, 0),
149	new testcase("UTF-16BE",  1, 1, 1),
150	new testcase("UTF-16LE",  1, 0, 0),
151	new testcase("UTF-16LE",  1, 1, 0),
152	new testcase("UTF-16LE",  1, 1, 1),
153);
154
155if (XML_SAX_IMPL == 'libxml') {
156  echo "libxml2 Version => " . LIBXML_DOTTED_VERSION. "\n";
157} else {
158  echo "libxml2 Version => NONE\n";
159}
160
161foreach ($suite as $testcase) {
162	$testcase->run();
163}
164
165// vim600: sts=4 sw=4 ts=4 encoding=UTF-8
166?>
167--EXPECTF--
168libxml2 Version => %s
169Encoding: UTF-8
170XML Prologue: present
171Chunk size: all data at once
172BOM: not prepended
173array(3) {
174  [0]=>
175  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
176  [1]=>
177  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
178  [2]=>
179  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
180}
181Encoding: UTF-8
182XML Prologue: not present
183Chunk size: all data at once
184BOM: not prepended
185array(3) {
186  [0]=>
187  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
188  [1]=>
189  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
190  [2]=>
191  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
192}
193Encoding: UTF-8
194XML Prologue: present
195Chunk size: all data at once
196BOM: prepended
197array(3) {
198  [0]=>
199  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
200  [1]=>
201  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
202  [2]=>
203  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
204}
205Encoding: UTF-8
206XML Prologue: not present
207Chunk size: all data at once
208BOM: prepended
209array(3) {
210  [0]=>
211  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
212  [1]=>
213  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
214  [2]=>
215  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
216}
217Encoding: UTF-16BE
218XML Prologue: present
219Chunk size: all data at once
220BOM: not prepended
221array(3) {
222  [0]=>
223  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
224  [1]=>
225  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
226  [2]=>
227  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
228}
229Encoding: UTF-16BE
230XML Prologue: present
231Chunk size: all data at once
232BOM: prepended
233array(3) {
234  [0]=>
235  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
236  [1]=>
237  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
238  [2]=>
239  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
240}
241Encoding: UTF-16BE
242XML Prologue: not present
243Chunk size: all data at once
244BOM: prepended
245array(3) {
246  [0]=>
247  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
248  [1]=>
249  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
250  [2]=>
251  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
252}
253Encoding: UTF-16LE
254XML Prologue: present
255Chunk size: all data at once
256BOM: not prepended
257array(3) {
258  [0]=>
259  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
260  [1]=>
261  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
262  [2]=>
263  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
264}
265Encoding: UTF-16LE
266XML Prologue: present
267Chunk size: all data at once
268BOM: prepended
269array(3) {
270  [0]=>
271  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
272  [1]=>
273  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
274  [2]=>
275  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
276}
277Encoding: UTF-16LE
278XML Prologue: not present
279Chunk size: all data at once
280BOM: prepended
281array(3) {
282  [0]=>
283  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
284  [1]=>
285  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
286  [2]=>
287  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
288}
289Encoding: UTF-8
290XML Prologue: present
291Chunk size: 1 byte(s)
292BOM: not prepended
293array(3) {
294  [0]=>
295  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
296  [1]=>
297  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
298  [2]=>
299  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
300}
301Encoding: UTF-8
302XML Prologue: not present
303Chunk size: 1 byte(s)
304BOM: not prepended
305array(3) {
306  [0]=>
307  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
308  [1]=>
309  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
310  [2]=>
311  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
312}
313Encoding: UTF-8
314XML Prologue: present
315Chunk size: 1 byte(s)
316BOM: prepended
317array(3) {
318  [0]=>
319  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
320  [1]=>
321  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
322  [2]=>
323  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
324}
325Encoding: UTF-8
326XML Prologue: not present
327Chunk size: 1 byte(s)
328BOM: prepended
329array(3) {
330  [0]=>
331  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
332  [1]=>
333  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
334  [2]=>
335  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
336}
337Encoding: UTF-16BE
338XML Prologue: present
339Chunk size: 1 byte(s)
340BOM: not prepended
341array(3) {
342  [0]=>
343  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
344  [1]=>
345  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
346  [2]=>
347  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
348}
349Encoding: UTF-16BE
350XML Prologue: present
351Chunk size: 1 byte(s)
352BOM: prepended
353array(3) {
354  [0]=>
355  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
356  [1]=>
357  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
358  [2]=>
359  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
360}
361Encoding: UTF-16BE
362XML Prologue: not present
363Chunk size: 1 byte(s)
364BOM: prepended
365array(3) {
366  [0]=>
367  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
368  [1]=>
369  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
370  [2]=>
371  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
372}
373Encoding: UTF-16LE
374XML Prologue: present
375Chunk size: 1 byte(s)
376BOM: not prepended
377array(3) {
378  [0]=>
379  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
380  [1]=>
381  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
382  [2]=>
383  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
384}
385Encoding: UTF-16LE
386XML Prologue: present
387Chunk size: 1 byte(s)
388BOM: prepended
389array(3) {
390  [0]=>
391  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
392  [1]=>
393  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
394  [2]=>
395  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
396}
397Encoding: UTF-16LE
398XML Prologue: not present
399Chunk size: 1 byte(s)
400BOM: prepended
401array(3) {
402  [0]=>
403  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
404  [1]=>
405  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
406  [2]=>
407  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
408}
409