1 /*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | https://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Authors: Niels Dossche <nielsdos@php.net> |
14 +----------------------------------------------------------------------+
15 */
16
17 #ifdef HAVE_CONFIG_H
18 #include "config.h"
19 #endif
20
21 #include "php.h"
22 #if defined(HAVE_LIBXML) && defined(HAVE_DOM)
23 #include "php_dom.h"
24 #include "namespace_compat.h"
25
check_options_validity(uint32_t arg_num,zend_long options)26 static bool check_options_validity(uint32_t arg_num, zend_long options)
27 {
28 const zend_long VALID_OPTIONS = XML_PARSE_NOENT
29 | XML_PARSE_DTDLOAD
30 | XML_PARSE_DTDATTR
31 | XML_PARSE_DTDVALID
32 | XML_PARSE_NOERROR
33 | XML_PARSE_NOWARNING
34 | XML_PARSE_NOBLANKS
35 | XML_PARSE_XINCLUDE
36 | XML_PARSE_NSCLEAN
37 | XML_PARSE_NOCDATA
38 | XML_PARSE_NONET
39 | XML_PARSE_PEDANTIC
40 | XML_PARSE_COMPACT
41 | XML_PARSE_HUGE
42 | XML_PARSE_BIG_LINES;
43 if ((options & ~VALID_OPTIONS) != 0) {
44 zend_argument_value_error(2, "contains invalid flags (allowed flags: "
45 "LIBXML_NOENT, "
46 "LIBXML_DTDLOAD, "
47 "LIBXML_DTDATTR, "
48 "LIBXML_DTDVALID, "
49 "LIBXML_NOERROR, "
50 "LIBXML_NOWARNING, "
51 "LIBXML_NOBLANKS, "
52 "LIBXML_XINCLUDE, "
53 "LIBXML_NSCLEAN, "
54 "LIBXML_NOCDATA, "
55 "LIBXML_NONET, "
56 "LIBXML_PEDANTIC, "
57 "LIBXML_COMPACT, "
58 "LIBXML_PARSEHUGE, "
59 "LIBXML_BIGLINES)");
60 return false;
61 }
62 return true;
63 }
64
65 /* Living spec never creates explicit namespace declaration nodes.
66 * They are only written upon serialization but never appear in the tree.
67 * So in principle we could just ignore them outright.
68 * However, step 10 in https://html.spec.whatwg.org/multipage/parsing.html#create-an-element-for-the-token
69 * requires us to have the declaration as an attribute available */
dom_mark_namespaces_as_attributes_too(xmlDocPtr doc)70 static void dom_mark_namespaces_as_attributes_too(xmlDocPtr doc)
71 {
72 xmlNodePtr node = doc->children;
73 while (node != NULL) {
74 if (node->type == XML_ELEMENT_NODE) {
75 dom_ns_compat_mark_attribute_list(node->nsDef);
76
77 if (node->children) {
78 node = node->children;
79 continue;
80 }
81 }
82
83 if (node->next) {
84 node = node->next;
85 } else {
86 /* Go upwards, until we find a parent node with a next sibling, or until we hit the base. */
87 do {
88 node = node->parent;
89 if (node == NULL) {
90 return;
91 }
92 } while (node->next == NULL);
93 node = node->next;
94 }
95 }
96 }
97
dom_mark_namespaces_for_copy_based_on_copy(xmlNodePtr copy,const xmlNode * original)98 void dom_mark_namespaces_for_copy_based_on_copy(xmlNodePtr copy, const xmlNode *original)
99 {
100 xmlNodePtr copy_current = copy;
101 const xmlNode *original_current = original;
102 while (copy_current != NULL) {
103 ZEND_ASSERT(original_current != NULL);
104
105 if (copy_current->type == XML_ELEMENT_NODE) {
106 dom_ns_compat_copy_attribute_list_mark(copy_current->nsDef, original_current->nsDef);
107
108 if (copy_current->children) {
109 copy_current = copy_current->children;
110 original_current = original_current->children;
111 continue;
112 }
113 }
114
115 if (copy_current->next) {
116 copy_current = copy_current->next;
117 original_current = original_current->next;
118 } else {
119 /* Go upwards, until we find a parent node with a next sibling, or until we hit the base. */
120 do {
121 copy_current = copy_current->parent;
122 if (copy_current == NULL) {
123 return;
124 }
125 original_current = original_current->parent;
126 } while (copy_current->next == NULL);
127 copy_current = copy_current->next;
128 original_current = original_current->next;
129 }
130 }
131 }
132
PHP_METHOD(DOM_XMLDocument,createEmpty)133 PHP_METHOD(DOM_XMLDocument, createEmpty)
134 {
135 const char *version = NULL;
136 size_t encoding_len = strlen("UTF-8");
137 const char *encoding = "UTF-8";
138 size_t version_len;
139 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|sp", &version, &version_len, &encoding, &encoding_len) == FAILURE) {
140 RETURN_THROWS();
141 }
142
143 xmlCharEncodingHandlerPtr handler = xmlFindCharEncodingHandler(encoding);
144
145 if (handler != NULL) {
146 xmlCharEncCloseFunc(handler);
147 } else {
148 zend_argument_value_error(2, "is not a valid document encoding");
149 RETURN_THROWS();
150 }
151
152 xmlDocPtr lxml_doc = xmlNewDoc((const xmlChar *) version);
153 if (UNEXPECTED(lxml_doc == NULL)) {
154 goto oom;
155 }
156
157 lxml_doc->encoding = xmlStrdup((const xmlChar *) encoding);
158
159 dom_object *intern = php_dom_instantiate_object_helper(
160 return_value,
161 dom_xml_document_class_entry,
162 (xmlNodePtr) lxml_doc,
163 NULL
164 );
165 intern->document->is_modern_api_class = true;
166 return;
167
168 oom:
169 php_dom_throw_error(INVALID_STATE_ERR, 1);
170 RETURN_THROWS();
171 }
172
load_from_helper(INTERNAL_FUNCTION_PARAMETERS,int mode)173 static void load_from_helper(INTERNAL_FUNCTION_PARAMETERS, int mode)
174 {
175 const char *source, *override_encoding = NULL;
176 size_t source_len, override_encoding_len;
177 zend_long options = 0;
178 if (zend_parse_parameters(
179 ZEND_NUM_ARGS(),
180 "s|lp!",
181 &source,
182 &source_len,
183 &options,
184 &override_encoding,
185 &override_encoding_len
186 ) == FAILURE) {
187 RETURN_THROWS();
188 }
189
190 if (!source_len) {
191 zend_argument_value_error(1, "must not be empty");
192 RETURN_THROWS();
193 }
194
195 if (ZEND_SIZE_T_INT_OVFL(source_len)) {
196 zend_argument_value_error(1, "is too long");
197 RETURN_THROWS();
198 }
199
200 /* See php_libxml_streams_IO_open_wrapper(), apparently this caused issues in the past. */
201 if (mode == DOM_LOAD_FILE && strstr(source, "%00")) {
202 zend_argument_value_error(1, "must not contain percent-encoded NUL bytes");
203 RETURN_THROWS();
204 }
205
206 if (!check_options_validity(2, options)) {
207 RETURN_THROWS();
208 }
209
210 xmlCharEncodingHandlerPtr encoding = NULL;
211 if (override_encoding != NULL) {
212 encoding = xmlFindCharEncodingHandler(override_encoding);
213 if (!encoding) {
214 zend_argument_value_error(3, "must be a valid document encoding");
215 RETURN_THROWS();
216 }
217 options |= XML_PARSE_IGNORE_ENC;
218 }
219
220 xmlDocPtr lxml_doc = dom_document_parser(NULL, mode, source, source_len, options, encoding);
221 if (UNEXPECTED(lxml_doc == NULL)) {
222 if (!EG(exception)) {
223 if (mode == DOM_LOAD_FILE) {
224 zend_throw_exception_ex(NULL, 0, "Cannot open file '%s'", source);
225 } else {
226 php_dom_throw_error(INVALID_STATE_ERR, 1);
227 }
228 }
229 RETURN_THROWS();
230 }
231 if (lxml_doc->encoding == NULL) {
232 if (override_encoding) {
233 lxml_doc->encoding = xmlStrdup((const xmlChar *) override_encoding);
234 } else {
235 lxml_doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
236 }
237 }
238 dom_object *intern = php_dom_instantiate_object_helper(
239 return_value,
240 dom_xml_document_class_entry,
241 (xmlNodePtr) lxml_doc,
242 NULL
243 );
244 intern->document->is_modern_api_class = true;
245 dom_mark_namespaces_as_attributes_too(lxml_doc);
246 }
247
PHP_METHOD(DOM_XMLDocument,createFromString)248 PHP_METHOD(DOM_XMLDocument, createFromString)
249 {
250 load_from_helper(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_STRING);
251 }
252
PHP_METHOD(DOM_XMLDocument,createFromFile)253 PHP_METHOD(DOM_XMLDocument, createFromFile)
254 {
255 load_from_helper(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_FILE);
256 }
257
258 #endif /* HAVE_LIBXML && HAVE_DOM */
259