xref: /php-src/ext/xml/tests/bug32001.phpt (revision 25b46965)
1--TEST--
2Bug #32001 (xml_parse*() goes into infinite loop when autodetection in effect), using UTF-*
3--EXTENSIONS--
4iconv
5xml
6--SKIPIF--
7<?php
8if (ICONV_IMPL == 'glibc' && version_compare(ICONV_VERSION, '2.12', '<='))
9    die("skip iconv of glibc <= 2.12 is buggy");
10?>
11--FILE--
12<?php
13class testcase {
14    private $encoding;
15    private $bom;
16    private $prologue;
17    private $tags;
18    private $chunk_size;
19
20    function __construct($enc, $chunk_size = 0, $bom = 0, $omit_prologue = 0) {
21        $this->encoding = $enc;
22        $this->chunk_size = $chunk_size;
23        $this->bom = $bom;
24        $this->prologue = !$omit_prologue;
25        $this->tags = array();
26    }
27
28    function start_element($parser, $name, $attrs) {
29        $attrs = array_map('bin2hex', $attrs);
30        $this->tags[] = bin2hex($name).": ".implode(', ', $attrs);
31    }
32
33    function end_element($parser, $name) {
34    }
35
36    function run() {
37        $data = '';
38
39        if ($this->prologue) {
40            $canonical_name = preg_replace('/BE|LE/i', '', $this->encoding);
41            $data .= "<?xml version=\"1.0\" encoding=\"$canonical_name\" ?>\n";
42        }
43
44        $data .= <<<HERE
45<テスト:テスト1 xmlns:テスト="http://www.example.com/テスト/" テスト="テスト">
46  <テスト:テスト2 テスト="テスト">
47    <テスト:テスト3>
48      test!
49    </テスト:テスト3>
50  </テスト:テスト2>
51</テスト:テスト1>
52HERE;
53
54        $data = iconv("UTF-8", $this->encoding, $data);
55
56        if ($this->bom) {
57            switch (strtoupper($this->encoding)) {
58                case 'UTF-8':
59                case 'UTF8':
60                    $data = "\xef\xbb\xbf".$data;
61                    break;
62
63                case 'UTF-16':
64                case 'UTF16':
65                case 'UTF-16BE':
66                case 'UTF16BE':
67                case 'UCS-2':
68                case 'UCS2':
69                case 'UCS-2BE':
70                case 'UCS2BE':
71                    $data = "\xfe\xff".$data;
72                    break;
73
74                case 'UTF-16LE':
75                case 'UTF16LE':
76                case 'UCS-2LE':
77                case 'UCS2LE':
78                    $data = "\xff\xfe".$data;
79                    break;
80
81                case 'UTF-32':
82                case 'UTF32':
83                case 'UTF-32BE':
84                case 'UTF32BE':
85                case 'UCS-4':
86                case 'UCS4':
87                case 'UCS-4BE':
88                case 'UCS4BE':
89                    $data = "\x00\x00\xfe\xff".$data;
90                    break;
91
92                case 'UTF-32LE':
93                case 'UTF32LE':
94                case 'UCS-4LE':
95                case 'UCS4LE':
96                    $data = "\xff\xfe\x00\x00".$data;
97                    break;
98            }
99        }
100
101        $parser = xml_parser_create(NULL);
102        xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0);
103        xml_set_element_handler($parser, $this->start_element(...), $this->end_element(...));
104
105        if ($this->chunk_size == 0) {
106            $success = @xml_parse($parser, $data, true);
107        } else {
108            for ($offset = 0; $offset < strlen($data);
109                    $offset += $this->chunk_size) {
110                $success = @xml_parse($parser, substr($data, $offset, $this->chunk_size), false);
111                if (!$success) {
112                    break;
113                }
114            }
115            if ($success) {
116                $success = @xml_parse($parser, "", true);
117            }
118        }
119
120        echo "Encoding: $this->encoding\n";
121        echo "XML Prologue: ".($this->prologue ? 'present': 'not present'), "\n";
122        echo "Chunk size: ".($this->chunk_size ? "$this->chunk_size byte(s)\n": "all data at once\n");
123        echo "BOM: ".($this->bom ? 'prepended': 'not prepended'), "\n";
124
125        if ($success) {
126            var_dump($this->tags);
127        } else {
128            echo "[Error] ", xml_error_string(xml_get_error_code($parser)), "\n";
129        }
130    }
131}
132$suite = array(
133    new testcase("UTF-8",     0, 0, 0),
134    new testcase("UTF-8",     0, 0, 1),
135    new testcase("UTF-8",     0, 1, 0),
136    new testcase("UTF-8",     0, 1, 1),
137    new testcase("UTF-16BE",  0, 0, 0),
138    new testcase("UTF-16BE",  0, 1, 0),
139    new testcase("UTF-16BE",  0, 1, 1),
140    new testcase("UTF-16LE",  0, 0, 0),
141    new testcase("UTF-16LE",  0, 1, 0),
142    new testcase("UTF-16LE",  0, 1, 1),
143    new testcase("UTF-8",     1, 0, 0),
144    new testcase("UTF-8",     1, 0, 1),
145    new testcase("UTF-8",     1, 1, 0),
146    new testcase("UTF-8",     1, 1, 1),
147    new testcase("UTF-16BE",  1, 0, 0),
148    new testcase("UTF-16BE",  1, 1, 0),
149    new testcase("UTF-16BE",  1, 1, 1),
150    new testcase("UTF-16LE",  1, 0, 0),
151    new testcase("UTF-16LE",  1, 1, 0),
152    new testcase("UTF-16LE",  1, 1, 1),
153);
154
155if (XML_SAX_IMPL == 'libxml') {
156  echo "libxml2 Version => " . LIBXML_DOTTED_VERSION. "\n";
157} else {
158  echo "libxml2 Version => NONE\n";
159}
160
161foreach ($suite as $testcase) {
162    $testcase->run();
163}
164
165?>
166--EXPECTF--
167libxml2 Version => %s
168Encoding: UTF-8
169XML Prologue: present
170Chunk size: all data at once
171BOM: not prepended
172array(3) {
173  [0]=>
174  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
175  [1]=>
176  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
177  [2]=>
178  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
179}
180Encoding: UTF-8
181XML Prologue: not present
182Chunk size: all data at once
183BOM: not prepended
184array(3) {
185  [0]=>
186  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
187  [1]=>
188  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
189  [2]=>
190  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
191}
192Encoding: UTF-8
193XML Prologue: present
194Chunk size: all data at once
195BOM: prepended
196array(3) {
197  [0]=>
198  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
199  [1]=>
200  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
201  [2]=>
202  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
203}
204Encoding: UTF-8
205XML Prologue: not present
206Chunk size: all data at once
207BOM: prepended
208array(3) {
209  [0]=>
210  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
211  [1]=>
212  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
213  [2]=>
214  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
215}
216Encoding: UTF-16BE
217XML Prologue: present
218Chunk size: all data at once
219BOM: not prepended
220array(3) {
221  [0]=>
222  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
223  [1]=>
224  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
225  [2]=>
226  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
227}
228Encoding: UTF-16BE
229XML Prologue: present
230Chunk size: all data at once
231BOM: prepended
232array(3) {
233  [0]=>
234  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
235  [1]=>
236  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
237  [2]=>
238  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
239}
240Encoding: UTF-16BE
241XML Prologue: not present
242Chunk size: all data at once
243BOM: prepended
244array(3) {
245  [0]=>
246  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
247  [1]=>
248  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
249  [2]=>
250  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
251}
252Encoding: UTF-16LE
253XML Prologue: present
254Chunk size: all data at once
255BOM: not prepended
256array(3) {
257  [0]=>
258  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
259  [1]=>
260  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
261  [2]=>
262  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
263}
264Encoding: UTF-16LE
265XML Prologue: present
266Chunk size: all data at once
267BOM: prepended
268array(3) {
269  [0]=>
270  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
271  [1]=>
272  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
273  [2]=>
274  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
275}
276Encoding: UTF-16LE
277XML Prologue: not present
278Chunk size: all data at once
279BOM: prepended
280array(3) {
281  [0]=>
282  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
283  [1]=>
284  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
285  [2]=>
286  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
287}
288Encoding: UTF-8
289XML Prologue: present
290Chunk size: 1 byte(s)
291BOM: not prepended
292array(3) {
293  [0]=>
294  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
295  [1]=>
296  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
297  [2]=>
298  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
299}
300Encoding: UTF-8
301XML Prologue: not present
302Chunk size: 1 byte(s)
303BOM: not prepended
304array(3) {
305  [0]=>
306  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
307  [1]=>
308  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
309  [2]=>
310  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
311}
312Encoding: UTF-8
313XML Prologue: present
314Chunk size: 1 byte(s)
315BOM: prepended
316array(3) {
317  [0]=>
318  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
319  [1]=>
320  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
321  [2]=>
322  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
323}
324Encoding: UTF-8
325XML Prologue: not present
326Chunk size: 1 byte(s)
327BOM: prepended
328array(3) {
329  [0]=>
330  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
331  [1]=>
332  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
333  [2]=>
334  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
335}
336Encoding: UTF-16BE
337XML Prologue: present
338Chunk size: 1 byte(s)
339BOM: not prepended
340array(3) {
341  [0]=>
342  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
343  [1]=>
344  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
345  [2]=>
346  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
347}
348Encoding: UTF-16BE
349XML Prologue: present
350Chunk size: 1 byte(s)
351BOM: prepended
352array(3) {
353  [0]=>
354  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
355  [1]=>
356  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
357  [2]=>
358  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
359}
360Encoding: UTF-16BE
361XML Prologue: not present
362Chunk size: 1 byte(s)
363BOM: prepended
364array(3) {
365  [0]=>
366  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
367  [1]=>
368  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
369  [2]=>
370  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
371}
372Encoding: UTF-16LE
373XML Prologue: present
374Chunk size: 1 byte(s)
375BOM: not prepended
376array(3) {
377  [0]=>
378  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
379  [1]=>
380  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
381  [2]=>
382  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
383}
384Encoding: UTF-16LE
385XML Prologue: present
386Chunk size: 1 byte(s)
387BOM: prepended
388array(3) {
389  [0]=>
390  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
391  [1]=>
392  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
393  [2]=>
394  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
395}
396Encoding: UTF-16LE
397XML Prologue: not present
398Chunk size: 1 byte(s)
399BOM: prepended
400array(3) {
401  [0]=>
402  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
403  [1]=>
404  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
405  [2]=>
406  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
407}
408