xref: /php-src/ext/xml/tests/bug32001b.phpt (revision 8567bc10)
1--TEST--
2Bug #32001 (xml_parse*() goes into infinite loop when autodetection in effect), using EUC-JP, Shift_JIS, GB2312
3--EXTENSIONS--
4iconv
5xml
6--SKIPIF--
7<?php
8foreach(array('EUC-JP', 'Shift_JISP', 'GB2312') as $encoding) {
9    try {
10        xml_parser_create($encoding);
11    } catch (ValueError) {
12        die("skip libxml2 does not support $encoding encoding");
13    }
14}
15?>
16--FILE--
17<?php
18class testcase {
19    private $encoding;
20    private $bom;
21    private $prologue;
22    private $tags;
23    private $chunk_size;
24
25    function testcase($enc, $chunk_size = 0, $bom = 0, $omit_prologue = 0) {
26        $this->encoding = $enc;
27        $this->chunk_size = $chunk_size;
28        $this->bom = $bom;
29        $this->prologue = !$omit_prologue;
30        $this->tags = array();
31    }
32
33    function start_element($parser, $name, $attrs) {
34        $attrs = array_map('bin2hex', $attrs);
35        $this->tags[] = bin2hex($name).": ".implode(', ', $attrs);
36    }
37
38    function end_element($parser, $name) {
39    }
40
41    function run() {
42        $data = '';
43
44        if ($this->prologue) {
45            $canonical_name = preg_replace('/BE|LE/i', '', $this->encoding);
46            $data .= "<?xml version=\"1.0\" encoding=\"$canonical_name\" ?>\n";
47        }
48
49        $data .= <<<HERE
50<テスト:テスト1 xmlns:テスト="http://www.example.com/テスト/" テスト="テスト">
51  <テスト:テスト2 テスト="テスト">
52    <テスト:テスト3>
53      test!
54    </テスト:テスト3>
55  </テスト:テスト2>
56</テスト:テスト1>
57HERE;
58
59        $data = iconv("UTF-8", $this->encoding, $data);
60
61        $parser = xml_parser_create(NULL);
62        xml_parser_set_option($parser, XML_OPTION_CASE_FOLDING, 0);
63        xml_set_element_handler($parser, "start_element", "end_element");
64        xml_set_object($parser, $this);
65
66        if ($this->chunk_size == 0) {
67            $success = @xml_parse($parser, $data, true);
68        } else {
69            for ($offset = 0; $offset < strlen($data);
70                    $offset += $this->chunk_size) {
71                $success = @xml_parse($parser, substr($data, $offset, $this->chunk_size), false);
72                if (!$success) {
73                    break;
74                }
75            }
76            if ($success) {
77                $success = @xml_parse($parser, "", true);
78            }
79        }
80
81        echo "Encoding: $this->encoding\n";
82        echo "XML Prologue: ".($this->prologue ? 'present': 'not present'), "\n";
83        echo "Chunk size: ".($this->chunk_size ? "$this->chunk_size byte(s)\n": "all data at once\n");
84        echo "BOM: ".($this->bom ? 'prepended': 'not prepended'), "\n";
85
86        if ($success) {
87            var_dump($this->tags);
88        } else {
89            echo "[Error] ", xml_error_string(xml_get_error_code($parser)), "\n";
90        }
91    }
92}
93$suite = array(
94    new testcase("EUC-JP"  ,  0),
95    new testcase("EUC-JP"  ,  1),
96    new testcase("Shift_JIS", 0),
97    new testcase("Shift_JIS", 1),
98    new testcase("GB2312",    0),
99    new testcase("GB2312",    1),
100);
101
102if (XML_SAX_IMPL == 'libxml') {
103  echo "libxml2 Version => " . LIBXML_DOTTED_VERSION. "\n";
104} else {
105  echo "libxml2 Version => NONE\n";
106}
107
108foreach ($suite as $testcase) {
109    $testcase->run();
110}
111
112?>
113--EXPECTF--
114libxml2 Version => %s
115Encoding: EUC-JP
116XML Prologue: present
117Chunk size: all data at once
118BOM: not prepended
119array(3) {
120  [0]=>
121  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
122  [1]=>
123  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
124  [2]=>
125  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
126}
127Encoding: EUC-JP
128XML Prologue: present
129Chunk size: 1 byte(s)
130BOM: not prepended
131array(3) {
132  [0]=>
133  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
134  [1]=>
135  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
136  [2]=>
137  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
138}
139Encoding: Shift_JIS
140XML Prologue: present
141Chunk size: all data at once
142BOM: not prepended
143array(3) {
144  [0]=>
145  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
146  [1]=>
147  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
148  [2]=>
149  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
150}
151Encoding: Shift_JIS
152XML Prologue: present
153Chunk size: 1 byte(s)
154BOM: not prepended
155array(3) {
156  [0]=>
157  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
158  [1]=>
159  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
160  [2]=>
161  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
162}
163Encoding: GB2312
164XML Prologue: present
165Chunk size: all data at once
166BOM: not prepended
167array(3) {
168  [0]=>
169  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
170  [1]=>
171  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
172  [2]=>
173  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
174}
175Encoding: GB2312
176XML Prologue: present
177Chunk size: 1 byte(s)
178BOM: not prepended
179array(3) {
180  [0]=>
181  string(128) "e38386e382b9e383883ae38386e382b9e3838831: 687474703a2f2f7777772e6578616d706c652e636f6d2fe38386e382b9e383882f, e38386e382b9e38388"
182  [1]=>
183  string(60) "e38386e382b9e383883ae38386e382b9e3838832: e38386e382b9e38388"
184  [2]=>
185  string(42) "e38386e382b9e383883ae38386e382b9e3838833: "
186}
187