1 /*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | https://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Authors: Niels Dossche <nielsdos@php.net> |
14 +----------------------------------------------------------------------+
15 */
16
17 #ifdef HAVE_CONFIG_H
18 #include <config.h>
19 #endif
20
21 #include "php.h"
22 #if defined(HAVE_LIBXML) && defined(HAVE_DOM)
23 #include "php_dom.h"
24 #include "namespace_compat.h"
25 #include "xml_serializer.h"
26 #include <libxml/xmlsave.h>
27
check_options_validity(uint32_t arg_num,zend_long options)28 static bool check_options_validity(uint32_t arg_num, zend_long options)
29 {
30 const zend_long VALID_OPTIONS = XML_PARSE_RECOVER
31 | XML_PARSE_NOENT
32 #if LIBXML_VERSION >= 21300
33 | XML_PARSE_NO_XXE
34 #endif
35 | XML_PARSE_DTDLOAD
36 | XML_PARSE_DTDATTR
37 | XML_PARSE_DTDVALID
38 | XML_PARSE_NOERROR
39 | XML_PARSE_NOWARNING
40 | XML_PARSE_NOBLANKS
41 | XML_PARSE_XINCLUDE
42 | XML_PARSE_NSCLEAN
43 | XML_PARSE_NOCDATA
44 | XML_PARSE_NONET
45 | XML_PARSE_PEDANTIC
46 | XML_PARSE_COMPACT
47 | XML_PARSE_HUGE
48 | XML_PARSE_BIG_LINES;
49 if ((options & ~VALID_OPTIONS) != 0) {
50 zend_argument_value_error(arg_num, "contains invalid flags (allowed flags: "
51 "LIBXML_RECOVER, "
52 "LIBXML_NOENT, "
53 #if LIBXML_VERSION >= 21300
54 "LIBXML_NO_XXE, "
55 #endif
56 "LIBXML_DTDLOAD, "
57 "LIBXML_DTDATTR, "
58 "LIBXML_DTDVALID, "
59 "LIBXML_NOERROR, "
60 "LIBXML_NOWARNING, "
61 "LIBXML_NOBLANKS, "
62 "LIBXML_XINCLUDE, "
63 "LIBXML_NSCLEAN, "
64 "LIBXML_NOCDATA, "
65 "LIBXML_NONET, "
66 "LIBXML_PEDANTIC, "
67 "LIBXML_COMPACT, "
68 "LIBXML_PARSEHUGE, "
69 "LIBXML_BIGLINES)");
70 return false;
71 }
72 return true;
73 }
74
75 /* Living spec never creates explicit namespace declaration nodes.
76 * They are only written upon serialization but never appear in the tree.
77 * So in principle we could just ignore them outright.
78 * However, step 10 in https://html.spec.whatwg.org/multipage/parsing.html#create-an-element-for-the-token (Date 2023-12-15)
79 * requires us to have the declaration as an attribute available */
dom_mark_namespaces_as_attributes_too(php_dom_libxml_ns_mapper * ns_mapper,xmlDocPtr doc)80 void dom_mark_namespaces_as_attributes_too(php_dom_libxml_ns_mapper *ns_mapper, xmlDocPtr doc)
81 {
82 xmlNodePtr node = doc->children;
83 while (node != NULL) {
84 if (node->type == XML_ELEMENT_NODE) {
85 php_dom_ns_compat_mark_attribute_list(ns_mapper, node);
86 }
87
88 node = php_dom_next_in_tree_order(node, NULL);
89 }
90 }
91
PHP_METHOD(Dom_XMLDocument,createEmpty)92 PHP_METHOD(Dom_XMLDocument, createEmpty)
93 {
94 const char *version = NULL;
95 size_t encoding_len = strlen("UTF-8");
96 const char *encoding = "UTF-8";
97 size_t version_len;
98 if (zend_parse_parameters(ZEND_NUM_ARGS(), "|sp", &version, &version_len, &encoding, &encoding_len) == FAILURE) {
99 RETURN_THROWS();
100 }
101
102 xmlCharEncodingHandlerPtr handler = xmlFindCharEncodingHandler(encoding);
103
104 if (handler != NULL) {
105 xmlCharEncCloseFunc(handler);
106 } else {
107 zend_argument_value_error(2, "is not a valid document encoding");
108 RETURN_THROWS();
109 }
110
111 xmlDocPtr lxml_doc = xmlNewDoc((const xmlChar *) version);
112 if (UNEXPECTED(lxml_doc == NULL)) {
113 goto oom;
114 }
115
116 lxml_doc->encoding = xmlStrdup((const xmlChar *) encoding);
117
118 dom_object *intern = php_dom_instantiate_object_helper(
119 return_value,
120 dom_xml_document_class_entry,
121 (xmlNodePtr) lxml_doc,
122 NULL
123 );
124 dom_set_xml_class(intern->document);
125 intern->document->private_data = php_dom_libxml_ns_mapper_header(php_dom_libxml_ns_mapper_create());
126 return;
127
128 oom:
129 php_dom_throw_error(INVALID_STATE_ERR, true);
130 RETURN_THROWS();
131 }
132
load_from_helper(INTERNAL_FUNCTION_PARAMETERS,int mode)133 static void load_from_helper(INTERNAL_FUNCTION_PARAMETERS, int mode)
134 {
135 const char *source, *override_encoding = NULL;
136 size_t source_len, override_encoding_len;
137 zend_long options = 0;
138 if (zend_parse_parameters(
139 ZEND_NUM_ARGS(),
140 "s|lp!",
141 &source,
142 &source_len,
143 &options,
144 &override_encoding,
145 &override_encoding_len
146 ) == FAILURE) {
147 RETURN_THROWS();
148 }
149
150 if (!source_len) {
151 zend_argument_value_error(1, "must not be empty");
152 RETURN_THROWS();
153 }
154
155 if (ZEND_SIZE_T_INT_OVFL(source_len)) {
156 zend_argument_value_error(1, "is too long");
157 RETURN_THROWS();
158 }
159
160 /* See php_libxml_streams_IO_open_wrapper(), apparently this caused issues in the past. */
161 if (mode == DOM_LOAD_FILE && strstr(source, "%00")) {
162 zend_argument_value_error(1, "must not contain percent-encoded NUL bytes");
163 RETURN_THROWS();
164 }
165
166 if (!check_options_validity(2, options)) {
167 RETURN_THROWS();
168 }
169
170 xmlCharEncodingHandlerPtr encoding = NULL;
171 if (override_encoding != NULL) {
172 encoding = xmlFindCharEncodingHandler(override_encoding);
173 if (!encoding) {
174 zend_argument_value_error(3, "must be a valid document encoding");
175 RETURN_THROWS();
176 }
177 options |= XML_PARSE_IGNORE_ENC;
178 }
179
180 xmlDocPtr lxml_doc = dom_document_parser(NULL, mode, source, source_len, options, encoding);
181 if (UNEXPECTED(lxml_doc == NULL || lxml_doc == DOM_DOCUMENT_MALFORMED)) {
182 if (!EG(exception)) {
183 if (lxml_doc == DOM_DOCUMENT_MALFORMED) {
184 php_dom_throw_error_with_message(SYNTAX_ERR, "XML fragment is not well-formed", true);
185 } else {
186 if (mode == DOM_LOAD_FILE) {
187 zend_throw_exception_ex(NULL, 0, "Cannot open file '%s'", source);
188 } else {
189 php_dom_throw_error(INVALID_STATE_ERR, true);
190 }
191 }
192 }
193 RETURN_THROWS();
194 }
195 if (lxml_doc->encoding == NULL) {
196 if (override_encoding) {
197 lxml_doc->encoding = xmlStrdup((const xmlChar *) override_encoding);
198 } else {
199 lxml_doc->encoding = xmlStrdup((const xmlChar *) "UTF-8");
200 }
201 }
202 if (mode == DOM_LOAD_FILE && lxml_doc->URL != NULL) {
203 if (!php_is_stream_path((char *) lxml_doc->URL)) {
204 /* Check for "file:/" instead of "file://" because of libxml2 quirk */
205 if (strncmp((const char *) lxml_doc->URL, "file:/", sizeof("file:/") - 1) != 0) {
206 #ifdef PHP_WIN32
207 xmlChar *buffer = xmlStrdup((const xmlChar *) "file:///");
208 #else
209 xmlChar *buffer = xmlStrdup((const xmlChar *) "file://");
210 #endif
211 if (buffer != NULL) {
212 xmlChar *new_buffer = xmlStrcat(buffer, lxml_doc->URL);
213 if (new_buffer != NULL) {
214 xmlFree(BAD_CAST lxml_doc->URL);
215 lxml_doc->URL = new_buffer;
216 } else {
217 xmlFree(buffer);
218 }
219 }
220 } else {
221 #ifdef PHP_WIN32
222 lxml_doc->URL = php_dom_libxml_fix_file_path(BAD_CAST lxml_doc->URL);
223 #endif
224 }
225 }
226 }
227 dom_object *intern = php_dom_instantiate_object_helper(
228 return_value,
229 dom_xml_document_class_entry,
230 (xmlNodePtr) lxml_doc,
231 NULL
232 );
233 dom_set_xml_class(intern->document);
234 dom_document_convert_to_modern(intern->document, lxml_doc);
235 }
236
dom_document_convert_to_modern(php_libxml_ref_obj * document,xmlDocPtr lxml_doc)237 void dom_document_convert_to_modern(php_libxml_ref_obj *document, xmlDocPtr lxml_doc)
238 {
239 php_dom_libxml_ns_mapper *ns_mapper = php_dom_libxml_ns_mapper_create();
240 document->private_data = php_dom_libxml_ns_mapper_header(ns_mapper);
241 dom_mark_namespaces_as_attributes_too(ns_mapper, lxml_doc);
242 }
243
PHP_METHOD(Dom_XMLDocument,createFromString)244 PHP_METHOD(Dom_XMLDocument, createFromString)
245 {
246 load_from_helper(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_STRING);
247 }
248
PHP_METHOD(Dom_XMLDocument,createFromFile)249 PHP_METHOD(Dom_XMLDocument, createFromFile)
250 {
251 load_from_helper(INTERNAL_FUNCTION_PARAM_PASSTHRU, DOM_LOAD_FILE);
252 }
253
php_new_dom_write_smart_str(void * context,const char * buffer,int len)254 static int php_new_dom_write_smart_str(void *context, const char *buffer, int len)
255 {
256 smart_str *str = context;
257 smart_str_appendl(str, buffer, len);
258 return len;
259 }
260
php_new_dom_dump_node_to_str_ex(xmlNodePtr node,int options,bool format,const char * encoding)261 static zend_string *php_new_dom_dump_node_to_str_ex(xmlNodePtr node, int options, bool format, const char *encoding)
262 {
263 smart_str str = {0};
264
265 int status = -1;
266 xmlSaveCtxtPtr ctxt = xmlSaveToIO(php_new_dom_write_smart_str, NULL, &str, encoding, XML_SAVE_AS_XML | options);
267 if (EXPECTED(ctxt != NULL)) {
268 xmlCharEncodingHandlerPtr handler = xmlFindCharEncodingHandler(encoding);
269 xmlOutputBufferPtr out = xmlOutputBufferCreateIO(php_new_dom_write_smart_str, NULL, &str, handler);
270 if (EXPECTED(out != NULL)) {
271 status = dom_xml_serialize(ctxt, out, node, format, false);
272 status |= xmlOutputBufferFlush(out);
273 status |= xmlOutputBufferClose(out);
274 } else {
275 xmlCharEncCloseFunc(handler);
276 }
277 (void) xmlSaveClose(ctxt);
278 }
279
280 if (UNEXPECTED(status < 0)) {
281 smart_str_free_ex(&str, false);
282 return NULL;
283 }
284
285 return smart_str_extract(&str);
286 }
287
php_new_dom_dump_node_to_str(xmlDocPtr doc,xmlNodePtr node,bool format,const char * encoding)288 static zend_string *php_new_dom_dump_node_to_str(xmlDocPtr doc, xmlNodePtr node, bool format, const char *encoding)
289 {
290 return php_new_dom_dump_node_to_str_ex(node, 0, format, encoding);
291 }
292
php_new_dom_dump_doc_to_str(xmlDocPtr doc,int options,const char * encoding)293 static zend_string *php_new_dom_dump_doc_to_str(xmlDocPtr doc, int options, const char *encoding)
294 {
295 return php_new_dom_dump_node_to_str_ex((xmlNodePtr) doc, options, options & XML_SAVE_FORMAT, encoding);
296 }
297
php_new_dom_dump_node_to_file(const char * filename,xmlDocPtr doc,xmlNodePtr node,bool format,const char * encoding)298 zend_long php_new_dom_dump_node_to_file(const char *filename, xmlDocPtr doc, xmlNodePtr node, bool format, const char *encoding)
299 {
300 xmlCharEncodingHandlerPtr handler = xmlFindCharEncodingHandler(encoding);
301 xmlOutputBufferPtr out = xmlOutputBufferCreateFilename(filename, handler, 0);
302 if (!out) {
303 xmlCharEncCloseFunc(handler);
304 return -1;
305 }
306
307 php_stream *stream = out->context;
308
309 int status = -1;
310 xmlSaveCtxtPtr ctxt = xmlSaveToIO(out->writecallback, NULL, stream, encoding, XML_SAVE_AS_XML);
311 if (EXPECTED(ctxt != NULL)) {
312 status = dom_xml_serialize(ctxt, out, node, format, false);
313 status |= xmlOutputBufferFlush(out);
314 (void) xmlSaveClose(ctxt);
315 }
316
317 size_t offset = php_stream_tell(stream);
318
319 (void) xmlOutputBufferClose(out);
320
321 return status < 0 ? status : (zend_long) offset;
322 }
323
php_new_dom_dump_doc_to_file(const char * filename,xmlDocPtr doc,bool format,const char * encoding)324 static zend_long php_new_dom_dump_doc_to_file(const char *filename, xmlDocPtr doc, bool format, const char *encoding)
325 {
326 return php_new_dom_dump_node_to_file(filename, doc, (xmlNodePtr) doc, format, encoding);
327 }
328
329 static const php_libxml_document_handlers php_new_dom_default_document_handlers = {
330 .dump_node_to_str = php_new_dom_dump_node_to_str,
331 .dump_doc_to_str = php_new_dom_dump_doc_to_str,
332 .dump_node_to_file = php_new_dom_dump_node_to_file,
333 .dump_doc_to_file = php_new_dom_dump_doc_to_file,
334 };
335
dom_set_xml_class(php_libxml_ref_obj * document)336 void dom_set_xml_class(php_libxml_ref_obj *document)
337 {
338 document->class_type = PHP_LIBXML_CLASS_MODERN;
339 document->handlers = &php_new_dom_default_document_handlers;
340 }
341
342 #endif /* HAVE_LIBXML && HAVE_DOM */
343