xref: /PHP-8.2/ext/xml/tests/bug32001.phpt (revision 8567bc10)
1--TEST--
2Bug #32001 (xml_parse*() goes into infinite loop when autodetection in effect), using UTF-*
3--EXTENSIONS--
4iconv
5xml
6--SKIPIF--
7<?php
8if (ICONV_IMPL == 'glibc' && version_compare(ICONV_VERSION, '2.12', '<='))
9    die("skip iconv of glibc <= 2.12 is buggy");
10?>
11--FILE--
12<?php
13class testcase {
14    private $encoding;
15    private $bom;
16    private $prologue;
17    private $tags;
18    private $chunk_size;
19
20    function __construct($enc, $chunk_size = 0, $bom = 0, $omit_prologue = 0) {
21        $this->encoding = $enc;
22        $this->chunk_size = $chunk_size;
23        $this->bom = $bom;
24        $this->prologue = !$omit_prologue;
25        $this->tags = array();
26    }
27
28    function start_element($parser, $name, $attrs) {
29        $attrs = array_map('bin2hex', $attrs);
30        $this->tags[] = bin2hex($name).": ".implode(', ', $attrs);
31    }
32
33    function end_element($parser, $name) {
34    }
35
36    function run() {
37        $data = '';
38
39        if ($this->prologue) {
40            $canonical_name = preg_replace('/BE|LE/i', '', $this->encoding);
41            $data .= "<?xml version=\"1.0\" encoding=\"$canonical_name\" ?>\n";
42        }
43
44        $data .= <<<HERE
45<テスト:テスト1 xmlns:テスト="http://www.example.com/テスト/" テスト="テスト">
46  <テスト:テスト2 テスト="テスト">
47    <テスト:テスト3>
48      test!
49    </テスト:テスト3>
50  </テスト:テスト2>
51</テスト:テスト1>
52HERE;
53
54        $data = iconv("UTF-8", $this->encoding, $data);
55
56        if ($this->bom) {
57            switch (strtoupper($this->encoding)) {
58                case 'UTF-8':
59                case 'UTF8':
60                    $data = "\xef\xbb\xbf".$data;
61                    break;
62
63                case 'UTF-16':
64                case 'UTF16':
65                case 'UTF-16BE':
66                case 'UTF16BE':
67                case 'UCS-2':
68                case 'UCS2':
69                case 'UCS-2BE':
70                case 'UCS2BE':
71                    $data = "\xfe\xff".$data;
72                    break;
73
74                case 'UTF-16LE':
75                case 'UTF16LE':
76                case 'UCS-2LE':
77                case 'UCS2LE':
78                    $data = "\xff\xfe".$data;
79                    break;
80
81                case 'UTF-32':
82                case 'UTF32':
83                case 'UTF-32BE':
84                case 'UTF32BE':
85                case 'UCS-4':
86                case 'UCS4':
87                case 'UCS-4BE':
88                case 'UCS4BE':
89                    $data = "\x00\x00\xfe\xff".$data;
90                    break;
91
92                case 'UTF-32LE':
93                case 'UTF32LE':
94                case 'UCS-4LE':
95                case 'UCS4LE':
96                    $data = "\xff\xfe\x00\x00".$data;
97                    break;
98            }
99        }
100
101        $parser = xml_parser_create(NULL);
102        xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0);
103        xml_set_element_handler($parser, "start_element", "end_element");
104        xml_set_object($parser, $this);
105
106        if ($this->chunk_size == 0) {
107            $success = @xml_parse($parser, $data, true);
108        } else {
109            for ($offset = 0; $offset < strlen($data);
110                    $offset += $this->chunk_size) {
111                $success = @xml_parse($parser, substr($data, $offset, $this->chunk_size), false);
112                if (!$success) {
113                    break;
114                }
115            }
116            if ($success) {
117                $success = @xml_parse($parser, "", true);
118            }
119        }
120
121        echo "Encoding: $this->encoding\n";
122        echo "XML Prologue: ".($this->prologue ? 'present': 'not present'), "\n";
123        echo "Chunk size: ".($this->chunk_size ? "$this->chunk_size byte(s)\n": "all data at once\n");
124        echo "BOM: ".($this->bom ? 'prepended': 'not prepended'), "\n";
125
126        if ($success) {
127            var_dump($this->tags);
128        } else {
129            echo "[Error] ", xml_error_string(xml_get_error_code($parser)), "\n";
130        }
131    }
132}
133$suite = array(
134    new testcase("UTF-8",     0, 0, 0),
135    new testcase("UTF-8",     0, 0, 1),
136    new testcase("UTF-8",     0, 1, 0),
137    new testcase("UTF-8",     0, 1, 1),
138    new testcase("UTF-16BE",  0, 0, 0),
139    new testcase("UTF-16BE",  0, 1, 0),
140    new testcase("UTF-16BE",  0, 1, 1),
141    new testcase("UTF-16LE",  0, 0, 0),
142    new testcase("UTF-16LE",  0, 1, 0),
143    new testcase("UTF-16LE",  0, 1, 1),
144    new testcase("UTF-8",     1, 0, 0),
145    new testcase("UTF-8",     1, 0, 1),
146    new testcase("UTF-8",     1, 1, 0),
147    new testcase("UTF-8",     1, 1, 1),
148    new testcase("UTF-16BE",  1, 0, 0),
149    new testcase("UTF-16BE",  1, 1, 0),
150    new testcase("UTF-16BE",  1, 1, 1),
151    new testcase("UTF-16LE",  1, 0, 0),
152    new testcase("UTF-16LE",  1, 1, 0),
153    new testcase("UTF-16LE",  1, 1, 1),
154);
155
156if (XML_SAX_IMPL == 'libxml') {
157  echo "libxml2 Version => " . LIBXML_DOTTED_VERSION. "\n";
158} else {
159  echo "libxml2 Version => NONE\n";
160}
161
162foreach ($suite as $testcase) {
163    $testcase->run();
164}
165
166?>
167--EXPECTF--
168libxml2 Version => %s
169Encoding: UTF-8
170XML Prologue: present
171Chunk size: all data at once
172BOM: not prepended
173array(3) {
174  [0]=>
175  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
176  [1]=>
177  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
178  [2]=>
179  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
180}
181Encoding: UTF-8
182XML Prologue: not present
183Chunk size: all data at once
184BOM: not prepended
185array(3) {
186  [0]=>
187  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
188  [1]=>
189  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
190  [2]=>
191  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
192}
193Encoding: UTF-8
194XML Prologue: present
195Chunk size: all data at once
196BOM: prepended
197array(3) {
198  [0]=>
199  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
200  [1]=>
201  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
202  [2]=>
203  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
204}
205Encoding: UTF-8
206XML Prologue: not present
207Chunk size: all data at once
208BOM: prepended
209array(3) {
210  [0]=>
211  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
212  [1]=>
213  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
214  [2]=>
215  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
216}
217Encoding: UTF-16BE
218XML Prologue: present
219Chunk size: all data at once
220BOM: not prepended
221array(3) {
222  [0]=>
223  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
224  [1]=>
225  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
226  [2]=>
227  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
228}
229Encoding: UTF-16BE
230XML Prologue: present
231Chunk size: all data at once
232BOM: prepended
233array(3) {
234  [0]=>
235  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
236  [1]=>
237  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
238  [2]=>
239  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
240}
241Encoding: UTF-16BE
242XML Prologue: not present
243Chunk size: all data at once
244BOM: prepended
245array(3) {
246  [0]=>
247  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
248  [1]=>
249  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
250  [2]=>
251  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
252}
253Encoding: UTF-16LE
254XML Prologue: present
255Chunk size: all data at once
256BOM: not prepended
257array(3) {
258  [0]=>
259  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
260  [1]=>
261  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
262  [2]=>
263  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
264}
265Encoding: UTF-16LE
266XML Prologue: present
267Chunk size: all data at once
268BOM: prepended
269array(3) {
270  [0]=>
271  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
272  [1]=>
273  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
274  [2]=>
275  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
276}
277Encoding: UTF-16LE
278XML Prologue: not present
279Chunk size: all data at once
280BOM: prepended
281array(3) {
282  [0]=>
283  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
284  [1]=>
285  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
286  [2]=>
287  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
288}
289Encoding: UTF-8
290XML Prologue: present
291Chunk size: 1 byte(s)
292BOM: not prepended
293array(3) {
294  [0]=>
295  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
296  [1]=>
297  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
298  [2]=>
299  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
300}
301Encoding: UTF-8
302XML Prologue: not present
303Chunk size: 1 byte(s)
304BOM: not prepended
305array(3) {
306  [0]=>
307  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
308  [1]=>
309  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
310  [2]=>
311  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
312}
313Encoding: UTF-8
314XML Prologue: present
315Chunk size: 1 byte(s)
316BOM: prepended
317array(3) {
318  [0]=>
319  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
320  [1]=>
321  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
322  [2]=>
323  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
324}
325Encoding: UTF-8
326XML Prologue: not present
327Chunk size: 1 byte(s)
328BOM: prepended
329array(3) {
330  [0]=>
331  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
332  [1]=>
333  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
334  [2]=>
335  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
336}
337Encoding: UTF-16BE
338XML Prologue: present
339Chunk size: 1 byte(s)
340BOM: not prepended
341array(3) {
342  [0]=>
343  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
344  [1]=>
345  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
346  [2]=>
347  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
348}
349Encoding: UTF-16BE
350XML Prologue: present
351Chunk size: 1 byte(s)
352BOM: prepended
353array(3) {
354  [0]=>
355  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
356  [1]=>
357  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
358  [2]=>
359  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
360}
361Encoding: UTF-16BE
362XML Prologue: not present
363Chunk size: 1 byte(s)
364BOM: prepended
365array(3) {
366  [0]=>
367  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
368  [1]=>
369  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
370  [2]=>
371  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
372}
373Encoding: UTF-16LE
374XML Prologue: present
375Chunk size: 1 byte(s)
376BOM: not prepended
377array(3) {
378  [0]=>
379  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
380  [1]=>
381  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
382  [2]=>
383  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
384}
385Encoding: UTF-16LE
386XML Prologue: present
387Chunk size: 1 byte(s)
388BOM: prepended
389array(3) {
390  [0]=>
391  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
392  [1]=>
393  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
394  [2]=>
395  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
396}
397Encoding: UTF-16LE
398XML Prologue: not present
399Chunk size: 1 byte(s)
400BOM: prepended
401array(3) {
402  [0]=>
403  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
404  [1]=>
405  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
406  [2]=>
407  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
408}
409