xref: /PHP-8.4/ext/libxml/libxml.c (revision 6980eba8)
1 /*
2    +----------------------------------------------------------------------+
3    | Copyright (c) The PHP Group                                          |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | https://www.php.net/license/3_01.txt                                 |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Authors: Shane Caraveo <shane@php.net>                               |
14    |          Wez Furlong <wez@thebrainroom.com>                          |
15    +----------------------------------------------------------------------+
16  */
17 
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21 
22 #include "php.h"
23 #include "SAPI.h"
24 
25 #include "zend_attributes.h"
26 #include "zend_variables.h"
27 #include "ext/standard/info.h"
28 #include "ext/standard/file.h"
29 
30 #ifdef HAVE_LIBXML
31 
32 #include <libxml/parser.h>
33 #include <libxml/parserInternals.h>
34 #include <libxml/tree.h>
35 #include <libxml/uri.h>
36 #include <libxml/xmlerror.h>
37 #include <libxml/xmlsave.h>
38 #include <libxml/xmlerror.h>
39 #include <libxml/entities.h>
40 #ifdef LIBXML_SCHEMAS_ENABLED
41 #include <libxml/relaxng.h>
42 #include <libxml/xmlschemas.h>
43 #endif
44 
45 #include "php_libxml.h"
46 
47 #define PHP_LIBXML_LOADED_VERSION ((char *)xmlParserVersion)
48 
49 #include "libxml_arginfo.h"
50 
51 /* a true global for initialization */
52 static int _php_libxml_initialized = 0;
53 static int _php_libxml_per_request_initialization = 1;
54 static xmlExternalEntityLoader _php_libxml_default_entity_loader;
55 
56 typedef struct _php_libxml_func_handler {
57 	php_libxml_export_node export_func;
58 } php_libxml_func_handler;
59 
60 static HashTable php_libxml_exports;
61 
62 static ZEND_DECLARE_MODULE_GLOBALS(libxml)
63 static PHP_GINIT_FUNCTION(libxml);
64 
65 static zend_class_entry *libxmlerror_class_entry;
66 
67 /* {{{ dynamically loadable module stuff */
68 #ifdef COMPILE_DL_LIBXML
69 #ifdef ZTS
70 ZEND_TSRMLS_CACHE_DEFINE()
71 #endif
72 ZEND_GET_MODULE(libxml)
73 #endif /* COMPILE_DL_LIBXML */
74 /* }}} */
75 
76 /* {{{ function prototypes */
77 static PHP_MINIT_FUNCTION(libxml);
78 static PHP_RINIT_FUNCTION(libxml);
79 static PHP_RSHUTDOWN_FUNCTION(libxml);
80 static PHP_MSHUTDOWN_FUNCTION(libxml);
81 static PHP_MINFO_FUNCTION(libxml);
82 static zend_result php_libxml_post_deactivate(void);
83 
84 static zend_string *php_libxml_default_dump_node_to_str(xmlDocPtr doc, xmlNodePtr node, bool format, const char *encoding);
85 static zend_string *php_libxml_default_dump_doc_to_str(xmlDocPtr doc, int options, const char *encoding);
86 static zend_long php_libxml_dump_node_to_file(const char *filename, xmlDocPtr doc, xmlNodePtr node, bool format, const char *encoding);
87 static zend_long php_libxml_default_dump_doc_to_file(const char *filename, xmlDocPtr doc, bool format, const char *encoding);
88 
89 /* }}} */
90 
91 zend_module_entry libxml_module_entry = {
92 	STANDARD_MODULE_HEADER,
93 	"libxml",                /* extension name */
94 	ext_functions,           /* extension function list */
95 	PHP_MINIT(libxml),       /* extension-wide startup function */
96 	PHP_MSHUTDOWN(libxml),   /* extension-wide shutdown function */
97 	PHP_RINIT(libxml),       /* per-request startup function */
98 	PHP_RSHUTDOWN(libxml),   /* per-request shutdown function */
99 	PHP_MINFO(libxml),       /* information function */
100 	PHP_LIBXML_VERSION,
101 	PHP_MODULE_GLOBALS(libxml), /* globals descriptor */
102 	PHP_GINIT(libxml),          /* globals ctor */
103 	NULL,                       /* globals dtor */
104 	php_libxml_post_deactivate, /* post deactivate */
105 	STANDARD_MODULE_PROPERTIES_EX
106 };
107 
108 /* }}} */
109 
110 static const php_libxml_document_handlers php_libxml_default_document_handlers = {
111 	.dump_node_to_str = php_libxml_default_dump_node_to_str,
112 	.dump_doc_to_str = php_libxml_default_dump_doc_to_str,
113 	.dump_node_to_file = php_libxml_dump_node_to_file,
114 	.dump_doc_to_file = php_libxml_default_dump_doc_to_file,
115 };
116 
php_libxml_set_old_ns_list(xmlDocPtr doc,xmlNsPtr first,xmlNsPtr last)117 static void php_libxml_set_old_ns_list(xmlDocPtr doc, xmlNsPtr first, xmlNsPtr last)
118 {
119 	if (UNEXPECTED(doc == NULL)) {
120 		return;
121 	}
122 
123 	ZEND_ASSERT(last->next == NULL);
124 
125 	/* Note: we'll use a prepend strategy instead of append to
126 	 * make sure we don't lose performance when the list is long.
127 	 * As libxml2 could assume the xml node is the first one, we'll place our
128 	 * new entries after the first one. */
129 
130 	if (UNEXPECTED(doc->oldNs == NULL)) {
131 		doc->oldNs = (xmlNsPtr) xmlMalloc(sizeof(xmlNs));
132 		if (doc->oldNs == NULL) {
133 			return;
134 		}
135 		memset(doc->oldNs, 0, sizeof(xmlNs));
136 		doc->oldNs->type = XML_LOCAL_NAMESPACE;
137 		doc->oldNs->href = xmlStrdup(XML_XML_NAMESPACE);
138 		doc->oldNs->prefix = xmlStrdup((const xmlChar *)"xml");
139 	} else {
140 		last->next = doc->oldNs->next;
141 	}
142 	doc->oldNs->next = first;
143 }
144 
php_libxml_set_old_ns(xmlDocPtr doc,xmlNsPtr ns)145 PHP_LIBXML_API void php_libxml_set_old_ns(xmlDocPtr doc, xmlNsPtr ns)
146 {
147 	php_libxml_set_old_ns_list(doc, ns, ns);
148 }
149 
150 /* Function pointer typedef changed in 2.9.8, see https://github.com/GNOME/libxml2/commit/e03f0a199a67017b2f8052354cf732b2b4cae787 */
151 #if LIBXML_VERSION >= 20908
php_libxml_unlink_entity(void * data,void * table,const xmlChar * name)152 static void php_libxml_unlink_entity(void *data, void *table, const xmlChar *name)
153 #else
154 static void php_libxml_unlink_entity(void *data, void *table, xmlChar *name)
155 #endif
156 {
157 	xmlEntityPtr entity = data;
158 	if (entity->_private != NULL) {
159 		xmlHashRemoveEntry(table, name, NULL);
160 	}
161 }
162 
163 /* {{{ internal functions for interoperability */
php_libxml_unregister_node(xmlNodePtr nodep)164 static void php_libxml_unregister_node(xmlNodePtr nodep)
165 {
166 	php_libxml_node_ptr *nodeptr = nodep->_private;
167 
168 	if (nodeptr != NULL) {
169 		php_libxml_node_object *wrapper = nodeptr->_private;
170 		if (wrapper) {
171 			php_libxml_decrement_node_ptr(wrapper);
172 			php_libxml_decrement_doc_ref(wrapper);
173 		} else {
174 			if (nodep->type != XML_DOCUMENT_NODE) {
175 				nodep->_private = NULL;
176 			}
177 			nodeptr->node = NULL;
178 		}
179 	}
180 }
181 
182 /* Workaround for libxml2 peculiarity */
php_libxml_unlink_entity_decl(xmlEntityPtr entity)183 static void php_libxml_unlink_entity_decl(xmlEntityPtr entity)
184 {
185 	xmlDtdPtr dtd = entity->parent;
186 	if (dtd != NULL) {
187 		if (xmlHashLookup(dtd->entities, entity->name) == entity) {
188 			xmlHashRemoveEntry(dtd->entities, entity->name, NULL);
189 		}
190 		if (xmlHashLookup(dtd->pentities, entity->name) == entity) {
191 			xmlHashRemoveEntry(dtd->pentities, entity->name, NULL);
192 		}
193 	}
194 }
195 
php_libxml_node_free(xmlNodePtr node)196 static void php_libxml_node_free(xmlNodePtr node)
197 {
198 	if (node->_private != NULL) {
199 		((php_libxml_node_ptr *) node->_private)->node = NULL;
200 	}
201 	switch (node->type) {
202 		case XML_ATTRIBUTE_NODE:
203 			xmlFreeProp((xmlAttrPtr) node);
204 			break;
205 		/* libxml2 has a peculiarity where if you unlink an entity it'll only unlink it from the dtd if the
206 		 * dtd is attached to the document. This works around the issue by inspecting the parent directly. */
207 		case XML_ENTITY_DECL: {
208 			xmlEntityPtr entity = (xmlEntityPtr) node;
209 			if (entity->etype != XML_INTERNAL_PREDEFINED_ENTITY) {
210 				php_libxml_unlink_entity_decl(entity);
211 #if LIBXML_VERSION >= 21200
212 				xmlFreeEntity(entity);
213 #else
214 				if (entity->children != NULL && entity->owner && entity == (xmlEntityPtr) entity->children->parent) {
215 					xmlFreeNodeList(entity->children);
216 				}
217 				xmlDictPtr dict = entity->doc != NULL ? entity->doc->dict : NULL;
218 				if (dict == NULL || !xmlDictOwns(dict, entity->name)) {
219 					xmlFree((xmlChar *) entity->name);
220 				}
221 				if (dict == NULL || !xmlDictOwns(dict, entity->ExternalID)) {
222 					xmlFree((xmlChar *) entity->ExternalID);
223 				}
224 				if (dict == NULL || !xmlDictOwns(dict, entity->SystemID)) {
225 					xmlFree((xmlChar *) entity->SystemID);
226 				}
227 				if (dict == NULL || !xmlDictOwns(dict, entity->URI)) {
228 					xmlFree((xmlChar *) entity->URI);
229 				}
230 				if (dict == NULL || !xmlDictOwns(dict, entity->content)) {
231 					xmlFree(entity->content);
232 				}
233 				if (dict == NULL || !xmlDictOwns(dict, entity->orig)) {
234 					xmlFree(entity->orig);
235 				}
236 				xmlFree(entity);
237 #endif
238 			}
239 			break;
240 		}
241 		case XML_NOTATION_NODE: {
242 			/* See create_notation(), these aren't regular XML_NOTATION_NODE, but entities in disguise... */
243 			xmlEntityPtr entity = (xmlEntityPtr) node;
244 			if (node->name != NULL) {
245 				xmlFree((char *) node->name);
246 			}
247 			if (entity->ExternalID != NULL) {
248 				xmlFree((char *) entity->ExternalID);
249 			}
250 			if (entity->SystemID != NULL) {
251 				xmlFree((char *) entity->SystemID);
252 			}
253 			xmlFree(node);
254 			break;
255 		}
256 		case XML_ELEMENT_DECL:
257 		case XML_ATTRIBUTE_DECL:
258 			break;
259 		case XML_NAMESPACE_DECL:
260 			if (node->ns) {
261 				xmlFreeNs(node->ns);
262 				node->ns = NULL;
263 			}
264 			node->type = XML_ELEMENT_NODE;
265 			xmlFreeNode(node);
266 			break;
267 		case XML_DTD_NODE: {
268 			xmlDtdPtr dtd = (xmlDtdPtr) node;
269 			if (dtd->_private == NULL) {
270 				/* There's no userland reference to the dtd,
271 				 * but there might be entities referenced from userland. Unlink those. */
272 				xmlHashScan(dtd->entities, php_libxml_unlink_entity, dtd->entities);
273 				xmlHashScan(dtd->pentities, php_libxml_unlink_entity, dtd->pentities);
274 				/* No unlinking of notations, see remark above at case XML_NOTATION_NODE. */
275 			}
276 			xmlFreeDtd(dtd);
277 			break;
278 		}
279 		case XML_ELEMENT_NODE: {
280 			if (node->ns && (((uintptr_t) node->ns->_private) & 1) == LIBXML_NS_TAG_HOOK) {
281 				/* Special destruction routine hook should be called because it belongs to a "special" namespace. */
282 				php_libxml_private_data_header *header = (php_libxml_private_data_header *) (((uintptr_t) node->ns->_private) & ~1);
283 				header->ns_hook(header, node);
284 			}
285 			if (node->nsDef && node->doc) {
286 				/* Make the namespace declaration survive the destruction of the holding element.
287 				 * This prevents a use-after-free on the namespace declaration.
288 				 *
289 				 * The main problem is that libxml2 doesn't have a reference count on the namespace declaration.
290 				 * We don't actually need to save the namespace declaration if we know the subtree it belongs to
291 				 * has no references from userland. However, we can't know that without traversing the whole subtree
292 				 * (=> slow), or without adding some subtree metadata (=> also slow).
293 				 * So we have to assume we need to save everything.
294 				 *
295 				 * However, namespace declarations are quite rare in comparison to other node types.
296 				 * Most node types are either elements, text or attributes.
297 				 * And you only need one namespace declaration per namespace (in principle).
298 				 * So I expect the number of namespace declarations to be low for an average XML document.
299 				 *
300 				 * In the worst possible case we have to save all namespace declarations when we for example remove
301 				 * the whole document. But given the above reasoning this likely won't be a lot of declarations even
302 				 * in the worst case.
303 				 * A single declaration only takes about 48 bytes of memory, and I don't expect the worst case to occur
304 				 * very often (why would you remove the whole document?).
305 				 */
306 				xmlNsPtr ns = node->nsDef;
307 				xmlNsPtr last = ns;
308 				while (last->next) {
309 					last = last->next;
310 				}
311 				php_libxml_set_old_ns_list(node->doc, ns, last);
312 				node->nsDef = NULL;
313 			}
314 			xmlFreeNode(node);
315 			break;
316 		}
317 		default:
318 			xmlFreeNode(node);
319 			break;
320 	}
321 }
322 
php_libxml_node_free_list(xmlNodePtr node)323 PHP_LIBXML_API void php_libxml_node_free_list(xmlNodePtr node)
324 {
325 	xmlNodePtr curnode;
326 
327 	if (node != NULL) {
328 		curnode = node;
329 		while (curnode != NULL) {
330 			/* If the _private field is set, there's still a userland reference somewhere. We'll delay freeing in this case. */
331 			if (curnode->_private) {
332 				xmlNodePtr next = curnode->next;
333 				/* Must unlink such that freeing of the parent doesn't free this child. */
334 				xmlUnlinkNode(curnode);
335 				if (curnode->type == XML_ELEMENT_NODE) {
336 					/* This ensures that namespace references in this subtree are defined within this subtree,
337 					 * otherwise a use-after-free would be possible when the original namespace holder gets freed. */
338 					php_libxml_node_ptr *ptr = curnode->_private;
339 
340 					/* Checking in case it runs out of reference */
341 					if (ptr->_private) {
342 						php_libxml_node_object *obj = ptr->_private;
343 						if (!obj->document || obj->document->class_type < PHP_LIBXML_CLASS_MODERN) {
344 							xmlReconciliateNs(curnode->doc, curnode);
345 						}
346 					}
347 				}
348 				/* Skip freeing */
349 				curnode = next;
350 				continue;
351 			}
352 
353 			node = curnode;
354 			switch (node->type) {
355 				/* Skip property freeing for the following types */
356 				case XML_ENTITY_REF_NODE:
357 				case XML_NOTATION_NODE:
358 					break;
359 				case XML_ENTITY_DECL:
360 					php_libxml_unlink_entity_decl((xmlEntityPtr) node);
361 					break;
362 				case XML_ATTRIBUTE_NODE:
363 					if ((node->doc != NULL) && (((xmlAttrPtr) node)->atype == XML_ATTRIBUTE_ID)) {
364 						xmlRemoveID(node->doc, (xmlAttrPtr) node);
365 					}
366 					ZEND_FALLTHROUGH;
367 				case XML_ATTRIBUTE_DECL:
368 				case XML_DTD_NODE:
369 				case XML_DOCUMENT_TYPE_NODE:
370 				case XML_NAMESPACE_DECL:
371 				case XML_TEXT_NODE:
372 					php_libxml_node_free_list(node->children);
373 					break;
374 				default:
375 					php_libxml_node_free_list(node->children);
376 					php_libxml_node_free_list((xmlNodePtr) node->properties);
377 			}
378 
379 			curnode = node->next;
380 			xmlUnlinkNode(node);
381 			php_libxml_unregister_node(node);
382 			php_libxml_node_free(node);
383 		}
384 	}
385 }
386 
387 /* }}} */
388 
389 /* {{{ startup, shutdown and info functions */
PHP_GINIT_FUNCTION(libxml)390 static PHP_GINIT_FUNCTION(libxml)
391 {
392 #if defined(COMPILE_DL_LIBXML) && defined(ZTS)
393 	ZEND_TSRMLS_CACHE_UPDATE();
394 #endif
395 	ZVAL_UNDEF(&libxml_globals->stream_context);
396 	libxml_globals->error_buffer.s = NULL;
397 	libxml_globals->error_list = NULL;
398 	libxml_globals->entity_loader_callback = empty_fcall_info_cache;
399 }
400 
php_libxml_get_stream_context(void)401 PHP_LIBXML_API php_stream_context *php_libxml_get_stream_context(void)
402 {
403 	return php_stream_context_from_zval(Z_ISUNDEF(LIBXML(stream_context)) ? NULL : &LIBXML(stream_context), false);
404 }
405 
406 /* Channel libxml file io layer through the PHP streams subsystem.
407  * This allows use of ftps:// and https:// urls */
408 
php_libxml_streams_IO_open_wrapper(const char * filename,const char * mode,const int read_only)409 static void *php_libxml_streams_IO_open_wrapper(const char *filename, const char *mode, const int read_only)
410 {
411 	php_stream_statbuf ssbuf;
412 	char *resolved_path;
413 	const char *path_to_open = NULL;
414 	bool isescaped = false;
415 
416 	if (strstr(filename, "%00")) {
417 		php_error_docref(NULL, E_WARNING, "URI must not contain percent-encoded NUL bytes");
418 		return NULL;
419 	}
420 
421 	xmlURI *uri = xmlParseURI(filename);
422 	if (uri && (uri->scheme == NULL ||
423 			(xmlStrncmp(BAD_CAST uri->scheme, BAD_CAST "file", 4) == 0))) {
424 		resolved_path = xmlURIUnescapeString(filename, 0, NULL);
425 		isescaped = 1;
426 #if LIBXML_VERSION >= 20902 && defined(PHP_WIN32)
427 		/* Libxml 2.9.2 prefixes local paths with file:/ instead of file://,
428 			thus the php stream wrapper will fail on a valid case. For this
429 			reason the prefix is rather better cut off. */
430 		{
431 			size_t pre_len = sizeof("file:/") - 1;
432 
433 			if (strncasecmp(resolved_path, "file:/", pre_len) == 0
434 				&& '/' != resolved_path[pre_len]) {
435 				xmlChar *tmp = xmlStrdup(resolved_path + pre_len);
436 				xmlFree(resolved_path);
437 				resolved_path = tmp;
438 			}
439 		}
440 #endif
441 	} else {
442 		resolved_path = (char *)filename;
443 	}
444 
445 	if (uri) {
446 		xmlFreeURI(uri);
447 	}
448 
449 	if (resolved_path == NULL) {
450 		return NULL;
451 	}
452 
453 	/* logic copied from _php_stream_stat, but we only want to fail
454 	   if the wrapper supports stat, otherwise, figure it out from
455 	   the open.  This logic is only to support hiding warnings
456 	   that the streams layer puts out at times, but for libxml we
457 	   may try to open files that don't exist, but it is not a failure
458 	   in xml processing (eg. DTD files)  */
459 	php_stream_wrapper *wrapper = php_stream_locate_url_wrapper(resolved_path, &path_to_open, 0);
460 	if (wrapper && read_only && wrapper->wops->url_stat) {
461 		if (wrapper->wops->url_stat(wrapper, path_to_open, PHP_STREAM_URL_STAT_QUIET, &ssbuf, NULL) == -1) {
462 			if (isescaped) {
463 				xmlFree(resolved_path);
464 			}
465 			return NULL;
466 		}
467 	}
468 
469 	php_stream_context *context = php_libxml_get_stream_context();
470 
471 	php_stream *ret_val = php_stream_open_wrapper_ex(path_to_open, mode, REPORT_ERRORS, NULL, context);
472 	if (ret_val) {
473 		/* Prevent from closing this by fclose() */
474 		ret_val->flags |= PHP_STREAM_FLAG_NO_FCLOSE;
475 	}
476 	if (isescaped) {
477 		xmlFree(resolved_path);
478 	}
479 	return ret_val;
480 }
481 
php_libxml_streams_IO_open_read_wrapper(const char * filename)482 static void *php_libxml_streams_IO_open_read_wrapper(const char *filename)
483 {
484 	return php_libxml_streams_IO_open_wrapper(filename, "rb", 1);
485 }
486 
php_libxml_streams_IO_open_write_wrapper(const char * filename)487 static void *php_libxml_streams_IO_open_write_wrapper(const char *filename)
488 {
489 	return php_libxml_streams_IO_open_wrapper(filename, "wb", 0);
490 }
491 
php_libxml_streams_IO_read(void * context,char * buffer,int len)492 static int php_libxml_streams_IO_read(void *context, char *buffer, int len)
493 {
494 	return php_stream_read((php_stream*)context, buffer, len);
495 }
496 
php_libxml_streams_IO_write(void * context,const char * buffer,int len)497 static int php_libxml_streams_IO_write(void *context, const char *buffer, int len)
498 {
499 	return php_stream_write((php_stream*)context, buffer, len);
500 }
501 
php_libxml_streams_IO_close(void * context)502 static int php_libxml_streams_IO_close(void *context)
503 {
504 	return php_stream_close((php_stream*)context);
505 }
506 
507 static xmlParserInputBufferPtr
php_libxml_input_buffer_create_filename(const char * URI,xmlCharEncoding enc)508 php_libxml_input_buffer_create_filename(const char *URI, xmlCharEncoding enc)
509 {
510 	xmlParserInputBufferPtr ret;
511 	void *context = NULL;
512 
513 	if (LIBXML(entity_loader_disabled)) {
514 		return NULL;
515 	}
516 
517 	if (URI == NULL)
518 		return(NULL);
519 
520 	context = php_libxml_streams_IO_open_read_wrapper(URI);
521 
522 	if (context == NULL) {
523 		return(NULL);
524 	}
525 
526 	/* Check if there's been an external transport protocol with an encoding information */
527 	if (enc == XML_CHAR_ENCODING_NONE) {
528 		php_stream *s  = (php_stream *) context;
529 		zend_string *charset = php_libxml_sniff_charset_from_stream(s);
530 		if (charset != NULL) {
531 			enc = xmlParseCharEncoding(ZSTR_VAL(charset));
532 			if (enc <= XML_CHAR_ENCODING_NONE) {
533 				enc = XML_CHAR_ENCODING_NONE;
534 			}
535 			zend_string_release_ex(charset, false);
536 		}
537 	}
538 
539 	/* Allocate the Input buffer front-end. */
540 	ret = xmlAllocParserInputBuffer(enc);
541 	if (ret != NULL) {
542 		ret->context = context;
543 		ret->readcallback = php_libxml_streams_IO_read;
544 		ret->closecallback = php_libxml_streams_IO_close;
545 	} else
546 		php_libxml_streams_IO_close(context);
547 
548 	return(ret);
549 }
550 
551 static xmlOutputBufferPtr
php_libxml_output_buffer_create_filename(const char * URI,xmlCharEncodingHandlerPtr encoder,int compression)552 php_libxml_output_buffer_create_filename(const char *URI,
553                               xmlCharEncodingHandlerPtr encoder,
554                               int compression)
555 {
556 	ZEND_IGNORE_VALUE(compression);
557 
558 	xmlOutputBufferPtr ret;
559 	xmlURIPtr puri;
560 	void *context = NULL;
561 	char *unescaped = NULL;
562 
563 	if (URI == NULL)
564 		return(NULL);
565 
566 	if (strstr(URI, "%00")) {
567 		php_error_docref(NULL, E_WARNING, "URI must not contain percent-encoded NUL bytes");
568 		return NULL;
569 	}
570 
571 	puri = xmlParseURI(URI);
572 	if (puri != NULL) {
573 		if (puri->scheme != NULL)
574 			unescaped = xmlURIUnescapeString(URI, 0, NULL);
575 		xmlFreeURI(puri);
576 	}
577 
578 	if (unescaped != NULL) {
579 		context = php_libxml_streams_IO_open_write_wrapper(unescaped);
580 		xmlFree(unescaped);
581 	}
582 
583 	/* try with a non-escaped URI this may be a strange filename */
584 	if (context == NULL) {
585 		context = php_libxml_streams_IO_open_write_wrapper(URI);
586 	}
587 
588 	if (context == NULL) {
589 		return(NULL);
590 	}
591 
592 	/* Allocate the Output buffer front-end. */
593 	ret = xmlAllocOutputBuffer(encoder);
594 	if (ret != NULL) {
595 		ret->context = context;
596 		ret->writecallback = php_libxml_streams_IO_write;
597 		ret->closecallback = php_libxml_streams_IO_close;
598 	}
599 
600 	return(ret);
601 }
602 
_php_libxml_free_error(void * ptr)603 static void _php_libxml_free_error(void *ptr)
604 {
605 	/* This will free the libxml alloc'd memory */
606 	xmlResetError((xmlErrorPtr) ptr);
607 }
608 
609 #if LIBXML_VERSION >= 21200
_php_list_set_error_structure(const xmlError * error,const char * msg,int line,int column)610 static void _php_list_set_error_structure(const xmlError *error, const char *msg, int line, int column)
611 #else
612 static void _php_list_set_error_structure(xmlError *error, const char *msg, int line, int column)
613 #endif
614 {
615 	xmlError error_copy;
616 	int ret;
617 
618 
619 	memset(&error_copy, 0, sizeof(xmlError));
620 
621 	if (error) {
622 		ret = xmlCopyError(error, &error_copy);
623 	} else {
624 		error_copy.code = XML_ERR_INTERNAL_ERROR;
625 		error_copy.level = XML_ERR_ERROR;
626 		error_copy.line = line;
627 		error_copy.int2 = column;
628 		error_copy.message = (char*)xmlStrdup((const xmlChar*)msg);
629 		ret = 0;
630 	}
631 
632 	if (ret == 0) {
633 		zend_llist_add_element(LIBXML(error_list), &error_copy);
634 	}
635 }
636 
php_libxml_ctx_error_level(int level,void * ctx,const char * msg,int line)637 static void php_libxml_ctx_error_level(int level, void *ctx, const char *msg, int line)
638 {
639 	xmlParserCtxtPtr parser;
640 
641 	parser = (xmlParserCtxtPtr) ctx;
642 
643 	if (parser != NULL && parser->input != NULL) {
644 		if (parser->input->filename) {
645 			php_error_docref(NULL, level, "%s in %s, line: %d", msg, parser->input->filename, line);
646 		} else {
647 			php_error_docref(NULL, level, "%s in Entity, line: %d", msg, line);
648 		}
649 	} else {
650 		php_error_docref(NULL, E_WARNING, "%s", msg);
651 	}
652 }
653 
php_libxml_issue_error(int level,const char * msg)654 void php_libxml_issue_error(int level, const char *msg)
655 {
656 	if (LIBXML(error_list)) {
657 		_php_list_set_error_structure(NULL, msg, 0, 0);
658 	} else {
659 		php_error_docref(NULL, level, "%s", msg);
660 	}
661 }
662 
php_libxml_internal_error_handler_ex(php_libxml_error_level error_type,void * ctx,const char * msg,va_list ap,int line,int column)663 static void php_libxml_internal_error_handler_ex(php_libxml_error_level error_type, void *ctx, const char *msg, va_list ap, int line, int column)
664 {
665 	char *buf;
666 	bool output = false;
667 
668 	size_t len = vspprintf(&buf, 0, msg, ap);
669 	size_t len_iter = len;
670 
671 	/* remove any trailing \n */
672 	while (len_iter && buf[--len_iter] == '\n') {
673 		buf[len_iter] = '\0';
674 		output = true;
675 	}
676 
677 	smart_str_appendl(&LIBXML(error_buffer), buf, len);
678 
679 	efree(buf);
680 
681 	if (output) {
682 		if (LIBXML(error_list)) {
683 			_php_list_set_error_structure(NULL, ZSTR_VAL(LIBXML(error_buffer).s), line, column);
684 		} else if (!EG(exception)) {
685 			/* Don't throw additional notices/warnings if an exception has already been thrown. */
686 			switch (error_type) {
687 				case PHP_LIBXML_CTX_ERROR:
688 					php_libxml_ctx_error_level(E_WARNING, ctx, ZSTR_VAL(LIBXML(error_buffer).s), line);
689 					break;
690 				case PHP_LIBXML_CTX_WARNING:
691 					php_libxml_ctx_error_level(E_NOTICE, ctx, ZSTR_VAL(LIBXML(error_buffer).s), line);
692 					break;
693 				default:
694 					php_error_docref(NULL, E_WARNING, "%s", ZSTR_VAL(LIBXML(error_buffer).s));
695 			}
696 		}
697 		smart_str_free(&LIBXML(error_buffer));
698 	}
699 }
700 
php_libxml_error_handler_va(php_libxml_error_level error_type,void * ctx,const char * msg,va_list ap)701 PHP_LIBXML_API void php_libxml_error_handler_va(php_libxml_error_level error_type, void *ctx, const char *msg, va_list ap)
702 {
703 	int line = 0;
704 	int column = 0;
705 	xmlParserCtxtPtr parser = (xmlParserCtxtPtr) ctx;
706 	/* Context is not valid for PHP_LIBXML_ERROR, don't dereference it in that case */
707 	if (error_type != PHP_LIBXML_ERROR && parser != NULL && parser->input != NULL) {
708 		line = parser->input->line;
709 		column = parser->input->col;
710 	}
711 	php_libxml_internal_error_handler_ex(error_type, ctx, msg, ap, line, column);
712 }
713 
_php_libxml_external_entity_loader(const char * URL,const char * ID,xmlParserCtxtPtr context)714 static xmlParserInputPtr _php_libxml_external_entity_loader(const char *URL,
715 		const char *ID, xmlParserCtxtPtr context)
716 {
717 	xmlParserInputPtr	ret			= NULL;
718 	const char			*resource	= NULL;
719 	zval 				*ctxzv, retval;
720 	zval				params[3];
721 
722 	/* no custom user-land callback set up; delegate to original loader */
723 	if (!ZEND_FCC_INITIALIZED(LIBXML(entity_loader_callback))) {
724 		return _php_libxml_default_entity_loader(URL, ID, context);
725 	}
726 
727 	if (ID != NULL) {
728 		ZVAL_STRING(&params[0], ID);
729 	} else {
730 		ZVAL_NULL(&params[0]);
731 	}
732 	if (URL != NULL) {
733 		ZVAL_STRING(&params[1], URL);
734 	} else {
735 		ZVAL_NULL(&params[1]);
736 	}
737 	ctxzv = &params[2];
738 	array_init_size(ctxzv, 4);
739 
740 #define ADD_NULL_OR_STRING_KEY(memb) \
741 	if (context->memb == NULL) { \
742 		add_assoc_null_ex(ctxzv, #memb, sizeof(#memb) - 1); \
743 	} else { \
744 		add_assoc_string_ex(ctxzv, #memb, sizeof(#memb) - 1, \
745 				(char *)context->memb); \
746 	}
747 
748 	ADD_NULL_OR_STRING_KEY(directory)
749 	ADD_NULL_OR_STRING_KEY(intSubName)
750 	ADD_NULL_OR_STRING_KEY(extSubURI)
751 	ADD_NULL_OR_STRING_KEY(extSubSystem)
752 
753 #undef ADD_NULL_OR_STRING_KEY
754 
755 	zend_call_known_fcc(&LIBXML(entity_loader_callback), &retval, 3, params, /* named_params */ NULL);
756 
757 	if (Z_ISUNDEF(retval)) {
758 		php_libxml_ctx_error(context,
759 				"Call to user entity loader callback '%s' has failed",
760 				ZSTR_VAL(LIBXML(entity_loader_callback).function_handler->common.function_name));
761 	} else {
762 		if (Z_TYPE(retval) == IS_STRING) {
763 is_string:
764 			resource = Z_STRVAL(retval);
765 		} else if (Z_TYPE(retval) == IS_RESOURCE) {
766 			php_stream *stream;
767 			php_stream_from_zval_no_verify(stream, &retval);
768 			if (stream == NULL) {
769 				php_libxml_ctx_error(context,
770 						"The user entity loader callback '%s' has returned a "
771 						"resource, but it is not a stream",
772 						ZSTR_VAL(LIBXML(entity_loader_callback).function_handler->common.function_name));
773 			} else {
774 				/* TODO: allow storing the encoding in the stream context? */
775 				xmlCharEncoding enc = XML_CHAR_ENCODING_NONE;
776 				xmlParserInputBufferPtr pib = xmlAllocParserInputBuffer(enc);
777 				if (pib == NULL) {
778 					php_libxml_ctx_error(context, "Could not allocate parser "
779 							"input buffer");
780 				} else {
781 					/* make stream not being closed when the zval is freed */
782 					GC_ADDREF(stream->res);
783 					pib->context = stream;
784 					pib->readcallback = php_libxml_streams_IO_read;
785 					pib->closecallback = php_libxml_streams_IO_close;
786 
787 					ret = xmlNewIOInputStream(context, pib, enc);
788 					if (ret == NULL) {
789 						xmlFreeParserInputBuffer(pib);
790 					}
791 				}
792 			}
793 		} else if (Z_TYPE(retval) != IS_NULL) {
794 			/* retval not string nor resource nor null; convert to string */
795 			if (try_convert_to_string(&retval)) {
796 				goto is_string;
797 			}
798 		} /* else is null; don't try anything */
799 	}
800 
801 	if (ret == NULL) {
802 		if (resource == NULL) {
803 			if (ID == NULL) {
804 				php_libxml_ctx_error(context,
805 						"Failed to load external entity because the resolver function returned null\n");
806 			} else {
807 				php_libxml_ctx_error(context,
808 						"Failed to load external entity \"%s\"\n", ID);
809 			}
810 		} else {
811 			/* we got the resource in the form of a string; open it */
812 			ret = xmlNewInputFromFile(context, resource);
813 		}
814 	}
815 
816 	zval_ptr_dtor(&params[0]);
817 	zval_ptr_dtor(&params[1]);
818 	zval_ptr_dtor(&params[2]);
819 	zval_ptr_dtor(&retval);
820 	return ret;
821 }
822 
_php_libxml_pre_ext_ent_loader(const char * URL,const char * ID,xmlParserCtxtPtr context)823 static xmlParserInputPtr _php_libxml_pre_ext_ent_loader(const char *URL,
824 		const char *ID, xmlParserCtxtPtr context)
825 {
826 
827 	/* Check whether we're running in a PHP context, since the entity loader
828 	 * we've defined is an application level (true global) setting.
829 	 * If we are, we also want to check whether we've finished activating
830 	 * the modules (RINIT phase). Using our external entity loader during a
831 	 * RINIT should not be problem per se (though during MINIT it is, because
832 	 * we don't even have a resource list by then), but then whether one
833 	 * extension would be using the custom external entity loader or not
834 	 * could depend on extension loading order
835 	 * (if _php_libxml_per_request_initialization */
836 	if (xmlGenericError == php_libxml_error_handler && PG(modules_activated)) {
837 		return _php_libxml_external_entity_loader(URL, ID, context);
838 	} else {
839 		return _php_libxml_default_entity_loader(URL, ID, context);
840 	}
841 }
842 
php_libxml_pretend_ctx_error_ex(const char * file,int line,int column,const char * msg,...)843 PHP_LIBXML_API void php_libxml_pretend_ctx_error_ex(const char *file, int line, int column, const char *msg,...)
844 {
845 	va_list args;
846 	va_start(args, msg);
847 	php_libxml_internal_error_handler_ex(PHP_LIBXML_CTX_ERROR, NULL, msg, args, line, column);
848 	va_end(args);
849 
850 	/* Propagate back into libxml */
851 	if (LIBXML(error_list)) {
852 		xmlErrorPtr last = zend_llist_get_last(LIBXML(error_list));
853 		if (last && !last->file) {
854 			last->file = strdup(file);
855 		}
856 	}
857 }
858 
php_libxml_ctx_error(void * ctx,const char * msg,...)859 PHP_LIBXML_API void php_libxml_ctx_error(void *ctx, const char *msg, ...)
860 {
861 	va_list args;
862 	va_start(args, msg);
863 	php_libxml_error_handler_va(PHP_LIBXML_CTX_ERROR, ctx, msg, args);
864 	va_end(args);
865 }
866 
php_libxml_ctx_warning(void * ctx,const char * msg,...)867 PHP_LIBXML_API void php_libxml_ctx_warning(void *ctx, const char *msg, ...)
868 {
869 	va_list args;
870 	va_start(args, msg);
871 	php_libxml_error_handler_va(PHP_LIBXML_CTX_WARNING, ctx, msg, args);
872 	va_end(args);
873 }
874 
875 #if LIBXML_VERSION >= 21200
php_libxml_structured_error_handler(void * userData,const xmlError * error)876 static void php_libxml_structured_error_handler(void *userData, const xmlError *error)
877 #else
878 static void php_libxml_structured_error_handler(void *userData, xmlErrorPtr error)
879 #endif
880 {
881 	_php_list_set_error_structure(error, NULL, 0, 0);
882 }
883 
php_libxml_error_handler(void * ctx,const char * msg,...)884 PHP_LIBXML_API void php_libxml_error_handler(void *ctx, const char *msg, ...)
885 {
886 	va_list args;
887 	va_start(args, msg);
888 	php_libxml_error_handler_va(PHP_LIBXML_ERROR, ctx, msg, args);
889 	va_end(args);
890 }
891 
php_libxml_exports_dtor(zval * zv)892 static void php_libxml_exports_dtor(zval *zv)
893 {
894 	free(Z_PTR_P(zv));
895 }
896 
php_libxml_initialize(void)897 PHP_LIBXML_API void php_libxml_initialize(void)
898 {
899 	if (!_php_libxml_initialized) {
900 		/* we should be the only one's to ever init!! */
901 		ZEND_IGNORE_LEAKS_BEGIN();
902 		xmlInitParser();
903 		ZEND_IGNORE_LEAKS_END();
904 
905 		_php_libxml_default_entity_loader = xmlGetExternalEntityLoader();
906 		xmlSetExternalEntityLoader(_php_libxml_pre_ext_ent_loader);
907 
908 		zend_hash_init(&php_libxml_exports, 0, NULL, php_libxml_exports_dtor, 1);
909 
910 		_php_libxml_initialized = 1;
911 	}
912 }
913 
php_libxml_shutdown(void)914 PHP_LIBXML_API void php_libxml_shutdown(void)
915 {
916 	if (_php_libxml_initialized) {
917 #if defined(LIBXML_SCHEMAS_ENABLED) && LIBXML_VERSION < 21000
918 		xmlRelaxNGCleanupTypes();
919 #endif
920 		/* xmlCleanupParser(); */
921 		zend_hash_destroy(&php_libxml_exports);
922 
923 		xmlSetExternalEntityLoader(_php_libxml_default_entity_loader);
924 		_php_libxml_initialized = 0;
925 	}
926 }
927 
php_libxml_switch_context(zval * context,zval * oldcontext)928 PHP_LIBXML_API void php_libxml_switch_context(zval *context, zval *oldcontext)
929 {
930 	if (oldcontext) {
931 		ZVAL_COPY_VALUE(oldcontext, &LIBXML(stream_context));
932 	}
933 	if (context) {
934 		ZVAL_COPY_VALUE(&LIBXML(stream_context), context);
935 	}
936 }
937 
PHP_MINIT_FUNCTION(libxml)938 static PHP_MINIT_FUNCTION(libxml)
939 {
940 	php_libxml_initialize();
941 
942 	register_libxml_symbols(module_number);
943 
944 	libxmlerror_class_entry = register_class_LibXMLError();
945 
946 	if (sapi_module.name) {
947 		static const char * const supported_sapis[] = {
948 			"cgi-fcgi",
949 			"litespeed",
950 			NULL
951 		};
952 		const char * const *sapi_name;
953 
954 		for (sapi_name = supported_sapis; *sapi_name; sapi_name++) {
955 			if (strcmp(sapi_module.name, *sapi_name) == 0) {
956 				_php_libxml_per_request_initialization = 0;
957 				break;
958 			}
959 		}
960 	}
961 
962 	if (!_php_libxml_per_request_initialization) {
963 		/* report errors via handler rather than stderr */
964 		xmlSetGenericErrorFunc(NULL, php_libxml_error_handler);
965 		xmlParserInputBufferCreateFilenameDefault(php_libxml_input_buffer_create_filename);
966 		xmlOutputBufferCreateFilenameDefault(php_libxml_output_buffer_create_filename);
967 	}
968 
969 	return SUCCESS;
970 }
971 
972 
PHP_RINIT_FUNCTION(libxml)973 static PHP_RINIT_FUNCTION(libxml)
974 {
975 	if (_php_libxml_per_request_initialization) {
976 		/* report errors via handler rather than stderr */
977 		xmlSetGenericErrorFunc(NULL, php_libxml_error_handler);
978 		xmlParserInputBufferCreateFilenameDefault(php_libxml_input_buffer_create_filename);
979 		xmlOutputBufferCreateFilenameDefault(php_libxml_output_buffer_create_filename);
980 	}
981 
982 	/* Enable the entity loader by default. This ensures that
983 	 * other threads/requests that might have disabled the loader
984 	 * do not affect the current request.
985 	 */
986 	LIBXML(entity_loader_disabled) = 0;
987 
988 	return SUCCESS;
989 }
990 
PHP_RSHUTDOWN_FUNCTION(libxml)991 static PHP_RSHUTDOWN_FUNCTION(libxml)
992 {
993 	if (ZEND_FCC_INITIALIZED(LIBXML(entity_loader_callback))) {
994 		zend_fcc_dtor(&LIBXML(entity_loader_callback));
995 	}
996 
997 	return SUCCESS;
998 }
999 
PHP_MSHUTDOWN_FUNCTION(libxml)1000 static PHP_MSHUTDOWN_FUNCTION(libxml)
1001 {
1002 	if (!_php_libxml_per_request_initialization) {
1003 		xmlSetGenericErrorFunc(NULL, NULL);
1004 
1005 		xmlParserInputBufferCreateFilenameDefault(NULL);
1006 		xmlOutputBufferCreateFilenameDefault(NULL);
1007 	}
1008 	php_libxml_shutdown();
1009 
1010 	return SUCCESS;
1011 }
1012 
php_libxml_post_deactivate(void)1013 static zend_result php_libxml_post_deactivate(void)
1014 {
1015 	/* reset libxml generic error handling */
1016 	if (_php_libxml_per_request_initialization) {
1017 		xmlSetGenericErrorFunc(NULL, NULL);
1018 
1019 		xmlParserInputBufferCreateFilenameDefault(NULL);
1020 		xmlOutputBufferCreateFilenameDefault(NULL);
1021 	}
1022 	xmlSetStructuredErrorFunc(NULL, NULL);
1023 
1024 	/* the steam_context resource will be released by resource list destructor */
1025 	ZVAL_UNDEF(&LIBXML(stream_context));
1026 	smart_str_free(&LIBXML(error_buffer));
1027 	if (LIBXML(error_list)) {
1028 		zend_llist_destroy(LIBXML(error_list));
1029 		efree(LIBXML(error_list));
1030 		LIBXML(error_list) = NULL;
1031 	}
1032 	xmlResetLastError();
1033 
1034 	return SUCCESS;
1035 }
1036 
1037 
PHP_MINFO_FUNCTION(libxml)1038 static PHP_MINFO_FUNCTION(libxml)
1039 {
1040 	php_info_print_table_start();
1041 	php_info_print_table_row(2, "libXML support", "active");
1042 	php_info_print_table_row(2, "libXML Compiled Version", LIBXML_DOTTED_VERSION);
1043 	php_info_print_table_row(2, "libXML Loaded Version", (char *)xmlParserVersion);
1044 	php_info_print_table_row(2, "libXML streams", "enabled");
1045 	php_info_print_table_end();
1046 }
1047 /* }}} */
1048 
1049 /* {{{ Set the streams context for the next libxml document load or write */
PHP_FUNCTION(libxml_set_streams_context)1050 PHP_FUNCTION(libxml_set_streams_context)
1051 {
1052 	zval *arg;
1053 
1054 	ZEND_PARSE_PARAMETERS_START(1, 1)
1055 		Z_PARAM_RESOURCE(arg)
1056 	ZEND_PARSE_PARAMETERS_END();
1057 
1058 	if (php_stream_context_from_zval(arg, true) != NULL) {
1059 		if (!Z_ISUNDEF(LIBXML(stream_context))) {
1060 			zval_ptr_dtor(&LIBXML(stream_context));
1061 		}
1062 		ZVAL_COPY(&LIBXML(stream_context), arg);
1063 	}
1064 }
1065 /* }}} */
1066 
php_libxml_uses_internal_errors(void)1067 PHP_LIBXML_API bool php_libxml_uses_internal_errors(void)
1068 {
1069 	return xmlStructuredError == php_libxml_structured_error_handler;
1070 }
1071 
1072 /* {{{ Disable libxml errors and allow user to fetch error information as needed */
PHP_FUNCTION(libxml_use_internal_errors)1073 PHP_FUNCTION(libxml_use_internal_errors)
1074 {
1075 	bool use_errors, use_errors_is_null = true;
1076 
1077 	ZEND_PARSE_PARAMETERS_START(0, 1)
1078 		Z_PARAM_OPTIONAL
1079 		Z_PARAM_BOOL_OR_NULL(use_errors, use_errors_is_null)
1080 	ZEND_PARSE_PARAMETERS_END();
1081 
1082 	bool retval = php_libxml_uses_internal_errors();
1083 
1084 	if (use_errors_is_null) {
1085 		RETURN_BOOL(retval);
1086 	}
1087 
1088 	if (use_errors == 0) {
1089 		xmlSetStructuredErrorFunc(NULL, NULL);
1090 		if (LIBXML(error_list)) {
1091 			zend_llist_destroy(LIBXML(error_list));
1092 			efree(LIBXML(error_list));
1093 			LIBXML(error_list) = NULL;
1094 		}
1095 	} else {
1096 		xmlSetStructuredErrorFunc(NULL, php_libxml_structured_error_handler);
1097 		if (LIBXML(error_list) == NULL) {
1098 			LIBXML(error_list) = (zend_llist *) emalloc(sizeof(zend_llist));
1099 			zend_llist_init(LIBXML(error_list), sizeof(xmlError), _php_libxml_free_error, 0);
1100 		}
1101 	}
1102 	RETURN_BOOL(retval);
1103 }
1104 /* }}} */
1105 
php_libxml_create_error_object(zval * return_value,const xmlError * error)1106 static void php_libxml_create_error_object(zval *return_value, const xmlError *error)
1107 {
1108 	object_init_ex(return_value, libxmlerror_class_entry);
1109 	add_property_long(return_value, "level", error->level);
1110 	add_property_long(return_value, "code", error->code);
1111 	add_property_long(return_value, "column", error->int2);
1112 	if (error->message) {
1113 		add_property_string(return_value, "message", error->message);
1114 	} else {
1115 		add_property_str(return_value, "message", zend_empty_string);
1116 	}
1117 	if (error->file) {
1118 		add_property_string(return_value, "file", error->file);
1119 	} else {
1120 		add_property_str(return_value, "file", zend_empty_string);
1121 	}
1122 	add_property_long(return_value, "line", error->line);
1123 }
1124 
1125 /* {{{ Retrieve last error from libxml */
PHP_FUNCTION(libxml_get_last_error)1126 PHP_FUNCTION(libxml_get_last_error)
1127 {
1128 	ZEND_PARSE_PARAMETERS_NONE();
1129 
1130 	const xmlError *error;
1131 
1132 	if (LIBXML(error_list)) {
1133 		error = zend_llist_get_last(LIBXML(error_list));
1134 	} else {
1135 		error = xmlGetLastError();
1136 	}
1137 
1138 	if (error) {
1139 		php_libxml_create_error_object(return_value, error);
1140 	} else {
1141 		RETURN_FALSE;
1142 	}
1143 }
1144 /* }}} */
1145 
1146 /* {{{ Retrieve array of errors */
PHP_FUNCTION(libxml_get_errors)1147 PHP_FUNCTION(libxml_get_errors)
1148 {
1149 	xmlErrorPtr error;
1150 
1151 	ZEND_PARSE_PARAMETERS_NONE();
1152 
1153 	if (LIBXML(error_list)) {
1154 		array_init(return_value);
1155 		error = zend_llist_get_first(LIBXML(error_list));
1156 
1157 		while (error != NULL) {
1158 			zval z_error;
1159 			php_libxml_create_error_object(&z_error, error);
1160 			add_next_index_zval(return_value, &z_error);
1161 			error = zend_llist_get_next(LIBXML(error_list));
1162 		}
1163 	} else {
1164 		RETURN_EMPTY_ARRAY();
1165 	}
1166 }
1167 /* }}} */
1168 
1169 /* {{{ Clear last error from libxml */
PHP_FUNCTION(libxml_clear_errors)1170 PHP_FUNCTION(libxml_clear_errors)
1171 {
1172 	ZEND_PARSE_PARAMETERS_NONE();
1173 
1174 	xmlResetLastError();
1175 	if (LIBXML(error_list)) {
1176 		zend_llist_clean(LIBXML(error_list));
1177 	}
1178 }
1179 /* }}} */
1180 
php_libxml_disable_entity_loader(bool disable)1181 PHP_LIBXML_API bool php_libxml_disable_entity_loader(bool disable) /* {{{ */
1182 {
1183 	bool old = LIBXML(entity_loader_disabled);
1184 
1185 	LIBXML(entity_loader_disabled) = disable;
1186 	return old;
1187 } /* }}} */
1188 
1189 /* {{{ Disable/Enable ability to load external entities */
PHP_FUNCTION(libxml_disable_entity_loader)1190 PHP_FUNCTION(libxml_disable_entity_loader)
1191 {
1192 	bool disable = 1;
1193 
1194 	ZEND_PARSE_PARAMETERS_START(0, 1)
1195 		Z_PARAM_OPTIONAL
1196 		Z_PARAM_BOOL(disable)
1197 	ZEND_PARSE_PARAMETERS_END();
1198 
1199 	RETURN_BOOL(php_libxml_disable_entity_loader(disable));
1200 }
1201 /* }}} */
1202 
1203 /* {{{ Changes the default external entity loader */
PHP_FUNCTION(libxml_set_external_entity_loader)1204 PHP_FUNCTION(libxml_set_external_entity_loader)
1205 {
1206 	zend_fcall_info			fci;
1207 	zend_fcall_info_cache	fcc;
1208 
1209 	ZEND_PARSE_PARAMETERS_START(1, 1)
1210 		Z_PARAM_FUNC_NO_TRAMPOLINE_FREE_OR_NULL(fci, fcc)
1211 	ZEND_PARSE_PARAMETERS_END();
1212 
1213 	/* Unset old callback if it's defined */
1214 	if (ZEND_FCC_INITIALIZED(LIBXML(entity_loader_callback))) {
1215 		zend_fcc_dtor(&LIBXML(entity_loader_callback));
1216 	}
1217 	if (ZEND_FCI_INITIALIZED(fci)) { /* argument not null */
1218 		zend_fcc_dup(&LIBXML(entity_loader_callback), &fcc);
1219 	}
1220 	RETURN_TRUE;
1221 }
1222 /* }}} */
1223 
1224 /* {{{ Get the current external entity loader, or null if the default loader is installer. */
PHP_FUNCTION(libxml_get_external_entity_loader)1225 PHP_FUNCTION(libxml_get_external_entity_loader)
1226 {
1227 	ZEND_PARSE_PARAMETERS_NONE();
1228 
1229 	if (ZEND_FCC_INITIALIZED(LIBXML(entity_loader_callback))) {
1230 		zend_get_callable_zval_from_fcc(&LIBXML(entity_loader_callback), return_value);
1231 		return;
1232 	}
1233 	RETURN_NULL();
1234 }
1235 /* }}} */
1236 
1237 /* {{{ Common functions shared by extensions */
php_libxml_xmlCheckUTF8(const unsigned char * s)1238 int php_libxml_xmlCheckUTF8(const unsigned char *s)
1239 {
1240 	size_t i;
1241 	unsigned char c;
1242 
1243 	for (i = 0; (c = s[i++]);) {
1244 		if ((c & 0x80) == 0) {
1245 		} else if ((c & 0xe0) == 0xc0) {
1246 			if ((s[i++] & 0xc0) != 0x80) {
1247 				return 0;
1248 			}
1249 		} else if ((c & 0xf0) == 0xe0) {
1250 			if ((s[i++] & 0xc0) != 0x80 || (s[i++] & 0xc0) != 0x80) {
1251 				return 0;
1252 			}
1253 		} else if ((c & 0xf8) == 0xf0) {
1254 			if ((s[i++] & 0xc0) != 0x80 || (s[i++] & 0xc0) != 0x80 || (s[i++] & 0xc0) != 0x80) {
1255 				return 0;
1256 			}
1257 		} else {
1258 			return 0;
1259 		}
1260 	}
1261 	return 1;
1262 }
1263 
php_libxml_register_export(zend_class_entry * ce,php_libxml_export_node export_function)1264 zval *php_libxml_register_export(zend_class_entry *ce, php_libxml_export_node export_function)
1265 {
1266 	php_libxml_func_handler export_hnd;
1267 
1268 	/* Initialize in case this module hasn't been loaded yet */
1269 	php_libxml_initialize();
1270 	export_hnd.export_func = export_function;
1271 
1272 	return zend_hash_add_mem(&php_libxml_exports, ce->name, &export_hnd, sizeof(export_hnd));
1273 }
1274 
php_libxml_import_node(zval * object)1275 PHP_LIBXML_API xmlNodePtr php_libxml_import_node(zval *object)
1276 {
1277 	zend_class_entry *ce = NULL;
1278 	xmlNodePtr node = NULL;
1279 	php_libxml_func_handler *export_hnd;
1280 
1281 	if (Z_TYPE_P(object) == IS_OBJECT) {
1282 		ce = Z_OBJCE_P(object);
1283 		while (ce->parent != NULL) {
1284 			ce = ce->parent;
1285 		}
1286 		if ((export_hnd = zend_hash_find_ptr(&php_libxml_exports, ce->name))) {
1287 			node = export_hnd->export_func(object);
1288 		}
1289 	}
1290 	return node;
1291 }
1292 
php_libxml_increment_node_ptr(php_libxml_node_object * object,xmlNodePtr node,void * private_data)1293 PHP_LIBXML_API int php_libxml_increment_node_ptr(php_libxml_node_object *object, xmlNodePtr node, void *private_data)
1294 {
1295 	int ret_refcount = -1;
1296 
1297 	if (object != NULL && node != NULL) {
1298 		if (object->node != NULL) {
1299 			if (object->node->node == node) {
1300 				return object->node->refcount;
1301 			} else {
1302 				php_libxml_decrement_node_ptr(object);
1303 			}
1304 		}
1305 		if (node->_private != NULL) {
1306 			object->node = node->_private;
1307 			ret_refcount = ++object->node->refcount;
1308 			/* Only dom uses _private */
1309 			if (object->node->_private == NULL) {
1310 				object->node->_private = private_data;
1311 			}
1312 		} else {
1313 			object->node = emalloc(sizeof(php_libxml_node_ptr));
1314 			ret_refcount = 1;
1315 			object->node->node = node;
1316 			object->node->refcount = 1;
1317 			object->node->_private = private_data;
1318 			node->_private = object->node;
1319 		}
1320 	}
1321 
1322 	return ret_refcount;
1323 }
1324 
php_libxml_decrement_node_ptr_ref(php_libxml_node_ptr * ptr)1325 PHP_LIBXML_API int php_libxml_decrement_node_ptr_ref(php_libxml_node_ptr *ptr)
1326 {
1327 	ZEND_ASSERT(ptr != NULL);
1328 
1329 	int ret_refcount = --ptr->refcount;
1330 	if (ret_refcount == 0) {
1331 		if (ptr->node != NULL) {
1332 			ptr->node->_private = NULL;
1333 		}
1334 		if (ptr->_private) {
1335 			php_libxml_node_object *object = (php_libxml_node_object *) ptr->_private;
1336 			object->node = NULL;
1337 		}
1338 		efree(ptr);
1339 	}
1340 	return ret_refcount;
1341 }
1342 
php_libxml_decrement_node_ptr(php_libxml_node_object * object)1343 PHP_LIBXML_API int php_libxml_decrement_node_ptr(php_libxml_node_object *object)
1344 {
1345 	if (object != NULL && object->node != NULL) {
1346 		return php_libxml_decrement_node_ptr_ref(object->node);
1347 	}
1348 	return -1;
1349 }
1350 
php_libxml_increment_doc_ref(php_libxml_node_object * object,xmlDocPtr docp)1351 PHP_LIBXML_API int php_libxml_increment_doc_ref(php_libxml_node_object *object, xmlDocPtr docp)
1352 {
1353 	int ret_refcount = -1;
1354 
1355 	if (object->document != NULL) {
1356 		object->document->refcount++;
1357 		ret_refcount = object->document->refcount;
1358 	} else if (docp != NULL) {
1359 		ret_refcount = 1;
1360 		object->document = emalloc(sizeof(php_libxml_ref_obj));
1361 		object->document->ptr = docp;
1362 		object->document->refcount = ret_refcount;
1363 		object->document->doc_props = NULL;
1364 		object->document->cache_tag.modification_nr = 1; /* iterators start at 0, such that they will start in an uninitialised state */
1365 		object->document->private_data = NULL;
1366 		object->document->class_type = PHP_LIBXML_CLASS_UNSET;
1367 		object->document->handlers = &php_libxml_default_document_handlers;
1368 		object->document->quirks_mode = PHP_LIBXML_NO_QUIRKS;
1369 	}
1370 
1371 	return ret_refcount;
1372 }
1373 
php_libxml_decrement_doc_ref_directly(php_libxml_ref_obj * document)1374 PHP_LIBXML_API int php_libxml_decrement_doc_ref_directly(php_libxml_ref_obj *document)
1375 {
1376 	int ret_refcount = --document->refcount;
1377 	if (ret_refcount == 0) {
1378 		if (document->private_data != NULL) {
1379 			document->private_data->dtor(document->private_data);
1380 		}
1381 		if (document->ptr != NULL) {
1382 			xmlFreeDoc((xmlDoc *) document->ptr);
1383 		}
1384 		if (document->doc_props != NULL) {
1385 			if (document->doc_props->classmap) {
1386 				zend_hash_destroy(document->doc_props->classmap);
1387 				FREE_HASHTABLE(document->doc_props->classmap);
1388 			}
1389 			efree(document->doc_props);
1390 		}
1391 		efree(document);
1392 	}
1393 
1394 	return ret_refcount;
1395 }
1396 
php_libxml_decrement_doc_ref(php_libxml_node_object * object)1397 PHP_LIBXML_API int php_libxml_decrement_doc_ref(php_libxml_node_object *object)
1398 {
1399 	int ret_refcount = -1;
1400 
1401 	if (object != NULL && object->document != NULL) {
1402 		ret_refcount = php_libxml_decrement_doc_ref_directly(object->document);
1403 		object->document = NULL;
1404 	}
1405 
1406 	return ret_refcount;
1407 }
1408 
php_libxml_node_free_resource(xmlNodePtr node)1409 PHP_LIBXML_API void php_libxml_node_free_resource(xmlNodePtr node)
1410 {
1411 	if (!node) {
1412 		return;
1413 	}
1414 
1415 	switch (node->type) {
1416 		case XML_DOCUMENT_NODE:
1417 		case XML_HTML_DOCUMENT_NODE:
1418 			break;
1419 		case XML_ENTITY_REF_NODE:
1420 			/* Entity reference nodes are special: their children point to entity declarations,
1421 			 * but they don't own the declarations and therefore shouldn't free the children.
1422 			 * Moreover, there can be more than one reference node for a single entity declarations. */
1423 			php_libxml_unregister_node(node);
1424 			if (node->parent == NULL) {
1425 				php_libxml_node_free(node);
1426 			}
1427 			break;
1428 		default:
1429 			if (node->parent == NULL || node->type == XML_NAMESPACE_DECL) {
1430 				php_libxml_node_free_list((xmlNodePtr) node->children);
1431 				if (node->type == XML_ELEMENT_NODE) {
1432 					php_libxml_node_free_list((xmlNodePtr) node->properties);
1433 				}
1434 				php_libxml_unregister_node(node);
1435 				php_libxml_node_free(node);
1436 			} else {
1437 				php_libxml_unregister_node(node);
1438 			}
1439 	}
1440 }
1441 
php_libxml_node_decrement_resource(php_libxml_node_object * object)1442 PHP_LIBXML_API void php_libxml_node_decrement_resource(php_libxml_node_object *object)
1443 {
1444 	if (object != NULL && object->node != NULL) {
1445 		php_libxml_node_ptr *obj_node = (php_libxml_node_ptr *) object->node;
1446 		xmlNodePtr nodep = obj_node->node;
1447 		int ret_refcount = php_libxml_decrement_node_ptr(object);
1448 		if (ret_refcount == 0) {
1449 			php_libxml_node_free_resource(nodep);
1450 		} else {
1451 			if (object == obj_node->_private) {
1452 				obj_node->_private = NULL;
1453 			}
1454 		}
1455 	}
1456 	if (object != NULL && object->document != NULL) {
1457 		/* Safe to call as if the resource were freed then doc pointer is NULL */
1458 		php_libxml_decrement_doc_ref(object);
1459 	}
1460 }
1461 /* }}} */
1462 
php_libxml_attr_value(const xmlAttr * attr,bool * free)1463 PHP_LIBXML_API xmlChar *php_libxml_attr_value(const xmlAttr *attr, bool *free)
1464 {
1465 	/* For attributes we can have an optimized fast-path.
1466 	 * This fast-path is only possible in the (common) case where the attribute
1467 	 * has a single text child. Note that if the child or the content is NULL, this
1468 	 * is equivalent to not having content (i.e. the attribute has the empty string as value). */
1469 
1470 	*free = false;
1471 
1472 	if (attr->children == NULL) {
1473 		return BAD_CAST "";
1474 	}
1475 
1476 	if (attr->children->type == XML_TEXT_NODE && attr->children->next == NULL) {
1477 		if (attr->children->content == NULL) {
1478 			return BAD_CAST "";
1479 		} else {
1480 			return attr->children->content;
1481 		}
1482 	}
1483 
1484 	xmlChar *value = xmlNodeGetContent((const xmlNode *) attr);
1485 	if (UNEXPECTED(value == NULL)) {
1486 		return BAD_CAST "";
1487 	}
1488 
1489 	*free = true;
1490 	return value;
1491 }
1492 
php_libxml_write_smart_str(void * context,const char * buffer,int len)1493 static int php_libxml_write_smart_str(void *context, const char *buffer, int len)
1494 {
1495 	smart_str *str = context;
1496 	smart_str_appendl(str, buffer, len);
1497 	return len;
1498 }
1499 
php_libxml_default_dump_doc_to_str(xmlDocPtr doc,int options,const char * encoding)1500 static zend_string *php_libxml_default_dump_doc_to_str(xmlDocPtr doc, int options, const char *encoding)
1501 {
1502 	smart_str str = {0};
1503 
1504 	/* Encoding is handled from the encoding property set on the document */
1505 	xmlSaveCtxtPtr ctxt = xmlSaveToIO(php_libxml_write_smart_str, NULL, &str, encoding, options);
1506 	if (!ctxt) {
1507 		return NULL;
1508 	}
1509 
1510 	long status = xmlSaveDoc(ctxt, doc);
1511 	(void) xmlSaveClose(ctxt);
1512 	if (status < 0) {
1513 		smart_str_free_ex(&str, false);
1514 		return NULL;
1515 	}
1516 
1517 	return smart_str_extract(&str);
1518 }
1519 
php_libxml_default_dump_node_to_str(xmlDocPtr doc,xmlNodePtr node,bool format,const char * encoding)1520 static zend_string *php_libxml_default_dump_node_to_str(xmlDocPtr doc, xmlNodePtr node, bool format, const char *encoding)
1521 {
1522 	smart_str str = {0};
1523 	// TODO: should this buffer take an encoding? For now keep it NULL for BC.
1524 	xmlOutputBufferPtr buf = xmlOutputBufferCreateIO(php_libxml_write_smart_str, NULL, &str, NULL);
1525 	if (!buf) {
1526 		return NULL;
1527 	}
1528 
1529 	xmlNodeDumpOutput(buf, doc, node, 0, format, encoding);
1530 
1531 	if (xmlOutputBufferFlush(buf) < 0) {
1532 		smart_str_free_ex(&str, false);
1533 		xmlOutputBufferClose(buf);
1534 		return NULL;
1535 	}
1536 
1537 	xmlOutputBufferClose(buf);
1538 
1539 	return smart_str_extract(&str);
1540 }
1541 
php_libxml_default_dump_doc_to_file(const char * filename,xmlDocPtr doc,bool format,const char * encoding)1542 static zend_long php_libxml_default_dump_doc_to_file(const char *filename, xmlDocPtr doc, bool format, const char *encoding)
1543 {
1544 	return xmlSaveFormatFileEnc(filename, doc, encoding, format);
1545 }
1546 
php_libxml_dump_node_to_file(const char * filename,xmlDocPtr doc,xmlNodePtr node,bool format,const char * encoding)1547 static zend_long php_libxml_dump_node_to_file(const char *filename, xmlDocPtr doc, xmlNodePtr node, bool format, const char *encoding)
1548 {
1549 	xmlOutputBufferPtr outbuf = xmlOutputBufferCreateFilename(filename, NULL, 0);
1550 	if (!outbuf) {
1551 		return -1;
1552 	}
1553 
1554 	xmlNodeDumpOutput(outbuf, doc, node, 0, format, encoding);
1555 	return xmlOutputBufferClose(outbuf);
1556 }
1557 
1558 #if defined(PHP_WIN32) && defined(COMPILE_DL_LIBXML)
DllMain(HINSTANCE hinstDLL,DWORD fdwReason,LPVOID lpvReserved)1559 PHP_LIBXML_API BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpvReserved)
1560 {
1561 	return xmlDllMain(hinstDLL, fdwReason, lpvReserved);
1562 }
1563 #endif
1564 
1565 #endif
1566