xref: /PHP-8.0/ext/xml/tests/bug32001b.phpt (revision 4c89ed61)
1--TEST--
2Bug #32001 (xml_parse*() goes into infinite loop when autodetection in effect), using EUC-JP, Shift_JIS, GB2312
3--SKIPIF--
4<?php
5require_once("skipif.inc");
6if (!extension_loaded('iconv')) die ("skip iconv extension not available");
7foreach(array('EUC-JP', 'Shift_JISP', 'GB2312') as $encoding) {
8    try {
9        xml_parser_create($encoding);
10    } catch (ValueError) {
11        die("skip libxml2 does not support $encoding encoding");
12    }
13}
14?>
15--FILE--
16<?php
17class testcase {
18    private $encoding;
19    private $bom;
20    private $prologue;
21    private $tags;
22    private $chunk_size;
23
24    function testcase($enc, $chunk_size = 0, $bom = 0, $omit_prologue = 0) {
25        $this->encoding = $enc;
26        $this->chunk_size = $chunk_size;
27        $this->bom = $bom;
28        $this->prologue = !$omit_prologue;
29        $this->tags = array();
30    }
31
32    function start_element($parser, $name, $attrs) {
33        $attrs = array_map('bin2hex', $attrs);
34        $this->tags[] = bin2hex($name).": ".implode(', ', $attrs);
35    }
36
37    function end_element($parser, $name) {
38    }
39
40    function run() {
41        $data = '';
42
43        if ($this->prologue) {
44            $canonical_name = preg_replace('/BE|LE/i', '', $this->encoding);
45            $data .= "<?xml version=\"1.0\" encoding=\"$canonical_name\" ?>\n";
46        }
47
48        $data .= <<<HERE
49<テスト:テスト1 xmlns:テスト="http://www.example.com/テスト/" テスト="テスト">
50  <テスト:テスト2 テスト="テスト">
51    <テスト:テスト3>
52      test!
53    </テスト:テスト3>
54  </テスト:テスト2>
55</テスト:テスト1>
56HERE;
57
58        $data = iconv("UTF-8", $this->encoding, $data);
59
60        $parser = xml_parser_create(NULL);
61        xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0);
62        xml_set_element_handler($parser, "start_element", "end_element");
63        xml_set_object($parser, $this);
64
65        if ($this->chunk_size == 0) {
66            $success = @xml_parse($parser, $data, true);
67        } else {
68            for ($offset = 0; $offset < strlen($data);
69                    $offset += $this->chunk_size) {
70                $success = @xml_parse($parser, substr($data, $offset, $this->chunk_size), false);
71                if (!$success) {
72                    break;
73                }
74            }
75            if ($success) {
76                $success = @xml_parse($parser, "", true);
77            }
78        }
79
80        echo "Encoding: $this->encoding\n";
81        echo "XML Prologue: ".($this->prologue ? 'present': 'not present'), "\n";
82        echo "Chunk size: ".($this->chunk_size ? "$this->chunk_size byte(s)\n": "all data at once\n");
83        echo "BOM: ".($this->bom ? 'prepended': 'not prepended'), "\n";
84
85        if ($success) {
86            var_dump($this->tags);
87        } else {
88            echo "[Error] ", xml_error_string(xml_get_error_code($parser)), "\n";
89        }
90    }
91}
92$suite = array(
93    new testcase("EUC-JP"  ,  0),
94    new testcase("EUC-JP"  ,  1),
95    new testcase("Shift_JIS", 0),
96    new testcase("Shift_JIS", 1),
97    new testcase("GB2312",    0),
98    new testcase("GB2312",    1),
99);
100
101if (XML_SAX_IMPL == 'libxml') {
102  echo "libxml2 Version => " . LIBXML_DOTTED_VERSION. "\n";
103} else {
104  echo "libxml2 Version => NONE\n";
105}
106
107foreach ($suite as $testcase) {
108    $testcase->run();
109}
110
111?>
112--EXPECTF--
113libxml2 Version => %s
114Encoding: EUC-JP
115XML Prologue: present
116Chunk size: all data at once
117BOM: not prepended
118array(3) {
119  [0]=>
120  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
121  [1]=>
122  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
123  [2]=>
124  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
125}
126Encoding: EUC-JP
127XML Prologue: present
128Chunk size: 1 byte(s)
129BOM: not prepended
130array(3) {
131  [0]=>
132  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
133  [1]=>
134  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
135  [2]=>
136  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
137}
138Encoding: Shift_JIS
139XML Prologue: present
140Chunk size: all data at once
141BOM: not prepended
142array(3) {
143  [0]=>
144  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
145  [1]=>
146  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
147  [2]=>
148  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
149}
150Encoding: Shift_JIS
151XML Prologue: present
152Chunk size: 1 byte(s)
153BOM: not prepended
154array(3) {
155  [0]=>
156  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
157  [1]=>
158  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
159  [2]=>
160  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
161}
162Encoding: GB2312
163XML Prologue: present
164Chunk size: all data at once
165BOM: not prepended
166array(3) {
167  [0]=>
168  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
169  [1]=>
170  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
171  [2]=>
172  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
173}
174Encoding: GB2312
175XML Prologue: present
176Chunk size: 1 byte(s)
177BOM: not prepended
178array(3) {
179  [0]=>
180  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
181  [1]=>
182  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
183  [2]=>
184  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
185}
186