xref: /php-src/ext/libxml/php_libxml.h (revision 6366da48)
1 /*
2    +----------------------------------------------------------------------+
3    | Copyright (c) The PHP Group                                          |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | https://www.php.net/license/3_01.txt                                 |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Authors: Shane Caraveo <shane@php.net>                               |
14    |          Wez Furlong <wez@thebrainroom.com>                          |
15    +----------------------------------------------------------------------+
16 */
17 
18 #ifndef PHP_LIBXML_H
19 #define PHP_LIBXML_H
20 
21 #ifdef HAVE_LIBXML
22 extern zend_module_entry libxml_module_entry;
23 #define libxml_module_ptr &libxml_module_entry
24 
25 #include "php_version.h"
26 #define PHP_LIBXML_VERSION PHP_VERSION
27 
28 #ifdef PHP_WIN32
29 #	define PHP_LIBXML_API __declspec(dllexport)
30 #elif defined(__GNUC__) && __GNUC__ >= 4
31 #	define PHP_LIBXML_API __attribute__ ((visibility("default")))
32 #else
33 #	define PHP_LIBXML_API
34 #endif
35 
36 #include "zend_smart_str.h"
37 #include <libxml/tree.h>
38 #include <libxml/parser.h>
39 
40 #define LIBXML_SAVE_NOEMPTYTAG 1<<2
41 
42 #define LIBXML_NS_TAG_HOOK 1
43 
44 ZEND_BEGIN_MODULE_GLOBALS(libxml)
45 	zval stream_context;
46 	smart_str error_buffer;
47 	zend_llist *error_list;
48 	zend_fcall_info_cache entity_loader_callback;
49 	bool entity_loader_disabled;
50 ZEND_END_MODULE_GLOBALS(libxml)
51 
52 typedef struct _libxml_doc_props {
53 	HashTable *classmap;
54 	bool formatoutput;
55 	bool validateonparse;
56 	bool resolveexternals;
57 	bool preservewhitespace;
58 	bool substituteentities;
59 	bool stricterror;
60 	bool recover;
61 } libxml_doc_props;
62 
63 /* Modification tracking: when the object changes, we increment its counter.
64  * When this counter no longer matches the counter at the time of caching,
65  * we know that the object has changed and we have to update the cache. */
66 typedef struct {
67 	size_t modification_nr;
68 } php_libxml_cache_tag;
69 
70 typedef struct php_libxml_private_data_header {
71 	void (*dtor)(struct php_libxml_private_data_header *);
72 	void (*ns_hook)(struct php_libxml_private_data_header *, xmlNodePtr);
73 	/* extra fields */
74 } php_libxml_private_data_header;
75 
76 /**
77  * It's possible to set custom handlers for certain actions depending on the type of document.
78  * For example, there exist multiple ways to serialize an XML document,
79  * therefore this structure allows setting up a custom handler.
80  */
81 typedef struct php_libxml_document_handlers {
82 	zend_string *(*dump_node_to_str)(xmlDocPtr doc, xmlNodePtr node, bool format, const char *encoding);
83 	zend_string *(*dump_doc_to_str)(xmlDocPtr doc, int options, const char *encoding);
84 	zend_long (*dump_node_to_file)(const char *filename, xmlDocPtr doc, xmlNodePtr node, bool format, const char *encoding);
85 	zend_long (*dump_doc_to_file)(const char *filename, xmlDocPtr doc, bool format, const char *encoding);
86 } php_libxml_document_handlers;
87 
88 /**
89  * Multiple representations are possible of the same underlying node data.
90  * This is the case for example when a SimpleXML node is imported into DOM.
91  * It must not be possible to obtain both a legacy and a modern representation
92  * of the same node, as they have different assumptions. The class_type field
93  * allows us to pin the representation to one of the two. If it is unset, no
94  * representation has been forced upon the node yet, and thus no assumptions
95  * have yet been made. This is the case for example when a SimpleXML node is
96  * created by SimpleXML itself and never leaves SimpleXML.
97  */
98 typedef enum _php_libxml_class_type {
99 	PHP_LIBXML_CLASS_UNSET = 0,
100 	PHP_LIBXML_CLASS_LEGACY = 1,
101 	PHP_LIBXML_CLASS_MODERN = 2,
102 } php_libxml_class_type;
103 
104 typedef enum php_libxml_quirks_mode {
105 	PHP_LIBXML_NO_QUIRKS = 0,
106 	PHP_LIBXML_QUIRKS,
107 	PHP_LIBXML_LIMITED_QUIRKS,
108 } php_libxml_quirks_mode;
109 
110 typedef struct _php_libxml_ref_obj {
111 	void *ptr;
112 	libxml_doc_props *doc_props;
113 	php_libxml_cache_tag cache_tag;
114 	php_libxml_private_data_header *private_data;
115 	const php_libxml_document_handlers *handlers;
116 	unsigned int refcount;
117 	php_libxml_class_type class_type : 8;
118 	php_libxml_quirks_mode quirks_mode : 8;
119 } php_libxml_ref_obj;
120 
121 typedef struct _php_libxml_node_ptr {
122 	xmlNodePtr node;
123 	unsigned int refcount;
124 	void *_private;
125 } php_libxml_node_ptr;
126 
127 typedef struct _php_libxml_node_object {
128 	php_libxml_node_ptr *node;
129 	php_libxml_ref_obj *document;
130 	zend_object  std;
131 } php_libxml_node_object;
132 
133 
php_libxml_node_fetch_object(zend_object * obj)134 static inline php_libxml_node_object *php_libxml_node_fetch_object(zend_object *obj) {
135 	return (php_libxml_node_object *)((char*)(obj) - obj->handlers->offset);
136 }
137 
php_libxml_invalidate_cache_tag(php_libxml_cache_tag * cache_tag)138 static zend_always_inline void php_libxml_invalidate_cache_tag(php_libxml_cache_tag *cache_tag)
139 {
140 #if SIZEOF_SIZE_T == 8
141 	/* If one operation happens every nanosecond, then it would still require 584 years to overflow
142 	 * the counter. So we'll just assume this never happens. */
143 	cache_tag->modification_nr++;
144 #else
145 	size_t new_modification_nr = cache_tag->modification_nr + 1;
146 	if (EXPECTED(new_modification_nr > 0)) { /* unsigned overflow; checking after addition results in one less instruction */
147 		cache_tag->modification_nr = new_modification_nr;
148 	}
149 #endif
150 }
151 
php_libxml_is_cache_tag_stale(const php_libxml_cache_tag * object_tag,const php_libxml_cache_tag * cache_tag)152 static zend_always_inline bool php_libxml_is_cache_tag_stale(const php_libxml_cache_tag *object_tag, const php_libxml_cache_tag *cache_tag)
153 {
154 	ZEND_ASSERT(object_tag != NULL);
155 	ZEND_ASSERT(cache_tag != NULL);
156 	/* See overflow comment in php_libxml_invalidate_node_list_cache(). */
157 #if SIZEOF_SIZE_T == 8
158 	return cache_tag->modification_nr != object_tag->modification_nr;
159 #else
160 	return cache_tag->modification_nr != object_tag->modification_nr || UNEXPECTED(object_tag->modification_nr == SIZE_MAX);
161 #endif
162 }
163 
php_libxml_invalidate_node_list_cache(php_libxml_ref_obj * doc_ptr)164 static zend_always_inline void php_libxml_invalidate_node_list_cache(php_libxml_ref_obj *doc_ptr)
165 {
166 	if (doc_ptr) {
167 		php_libxml_invalidate_cache_tag(&doc_ptr->cache_tag);
168 	}
169 }
170 
php_libxml_invalidate_node_list_cache_from_doc(xmlDocPtr docp)171 static zend_always_inline void php_libxml_invalidate_node_list_cache_from_doc(xmlDocPtr docp)
172 {
173 	if (docp && docp->_private) { /* docp is NULL for detached nodes */
174 		php_libxml_node_ptr *node_private = (php_libxml_node_ptr *) docp->_private;
175 		php_libxml_node_object *object_private = (php_libxml_node_object *) node_private->_private;
176 		if (object_private) {
177 			php_libxml_invalidate_node_list_cache(object_private->document);
178 		}
179 	}
180 }
181 
182 #define Z_LIBXML_NODE_P(zv) php_libxml_node_fetch_object(Z_OBJ_P((zv)))
183 
184 typedef void * (*php_libxml_export_node) (zval *object);
185 
186 typedef enum {
187 	PHP_LIBXML_ERROR = 0,
188 	PHP_LIBXML_CTX_ERROR = 1,
189 	PHP_LIBXML_CTX_WARNING = 2,
190 } php_libxml_error_level;
191 
192 PHP_LIBXML_API unsigned int php_libxml_increment_node_ptr(php_libxml_node_object *object, xmlNodePtr node, void *private_data);
193 PHP_LIBXML_API unsigned int php_libxml_decrement_node_ptr(php_libxml_node_object *object);
194 PHP_LIBXML_API unsigned int php_libxml_decrement_node_ptr_ref(php_libxml_node_ptr *ptr);
195 PHP_LIBXML_API unsigned int php_libxml_increment_doc_ref(php_libxml_node_object *object, xmlDocPtr docp);
196 PHP_LIBXML_API unsigned int php_libxml_decrement_doc_ref_directly(php_libxml_ref_obj *document);
197 PHP_LIBXML_API unsigned int php_libxml_decrement_doc_ref(php_libxml_node_object *object);
198 PHP_LIBXML_API xmlNodePtr php_libxml_import_node(zval *object);
199 PHP_LIBXML_API zval *php_libxml_register_export(zend_class_entry *ce, php_libxml_export_node export_function);
200 /* When an explicit freeing of node and children is required */
201 PHP_LIBXML_API void php_libxml_node_free_list(xmlNodePtr node);
202 PHP_LIBXML_API void php_libxml_node_free_resource(xmlNodePtr node);
203 /* When object dtor is called as node may still be referenced */
204 PHP_LIBXML_API void php_libxml_node_decrement_resource(php_libxml_node_object *object);
205 PHP_LIBXML_API void php_libxml_error_handler(void *ctx, const char *msg, ...);
206 PHP_LIBXML_API void php_libxml_ctx_warning(void *ctx, const char *msg, ...);
207 PHP_LIBXML_API void php_libxml_pretend_ctx_error_ex(const char *file, int line, int column, const char *msg,...);
208 PHP_LIBXML_API void php_libxml_ctx_error(void *ctx, const char *msg, ...);
209 PHP_LIBXML_API void php_libxml_error_handler_va(php_libxml_error_level error_type, void *ctx, const char *msg, va_list args);
210 PHP_LIBXML_API int php_libxml_xmlCheckUTF8(const unsigned char *s);
211 PHP_LIBXML_API void php_libxml_switch_context(zval *context, zval *oldcontext);
212 PHP_LIBXML_API void php_libxml_issue_error(int level, const char *msg);
213 PHP_LIBXML_API bool php_libxml_disable_entity_loader(bool disable);
214 PHP_LIBXML_API void php_libxml_set_old_ns(xmlDocPtr doc, xmlNsPtr ns);
215 PHP_LIBXML_API php_stream_context *php_libxml_get_stream_context(void);
216 PHP_LIBXML_API bool php_libxml_uses_internal_errors(void);
217 
218 PHP_LIBXML_API xmlChar *php_libxml_attr_value(const xmlAttr *attr, bool *free);
219 
220 PHP_LIBXML_API zend_string *php_libxml_sniff_charset_from_string(const char *start, const char *end);
221 PHP_LIBXML_API zend_string *php_libxml_sniff_charset_from_stream(const php_stream *s);
222 
223 /* Init/shutdown functions*/
224 PHP_LIBXML_API void php_libxml_initialize(void);
225 PHP_LIBXML_API void php_libxml_shutdown(void);
226 
227 #define LIBXML(v) ZEND_MODULE_GLOBALS_ACCESSOR(libxml, v)
228 
229 #if defined(ZTS) && defined(COMPILE_DL_LIBXML)
ZEND_TSRMLS_CACHE_EXTERN()230 ZEND_TSRMLS_CACHE_EXTERN()
231 #endif
232 
233 /* Other extension may override the global state options, these global options
234  * are copied initially to ctxt->options. Set the options to a known good value.
235  * See libxml2 globals.c and parserInternals.c.
236  * The unique_name argument allows multiple sanitizes and restores within the
237  * same function, even nested is necessary. */
238 # define PHP_LIBXML_SANITIZE_GLOBALS(unique_name) \
239 	ZEND_DIAGNOSTIC_IGNORED_START("-Wdeprecated-declarations") \
240 	int xml_old_loadsubset_##unique_name = xmlLoadExtDtdDefaultValue; \
241 	xmlLoadExtDtdDefaultValue = 0; \
242 	int xml_old_validate_##unique_name = xmlDoValidityCheckingDefaultValue; \
243 	xmlDoValidityCheckingDefaultValue = 0; \
244 	int xml_old_pedantic_##unique_name = xmlPedanticParserDefault(0); \
245 	int xml_old_substitute_##unique_name = xmlSubstituteEntitiesDefault(0); \
246 	int xml_old_linenrs_##unique_name = xmlLineNumbersDefault(0); \
247 	int xml_old_blanks_##unique_name = xmlKeepBlanksDefault(1); \
248 	ZEND_DIAGNOSTIC_IGNORED_END
249 
250 # define PHP_LIBXML_RESTORE_GLOBALS(unique_name) \
251 	ZEND_DIAGNOSTIC_IGNORED_START("-Wdeprecated-declarations") \
252 	xmlLoadExtDtdDefaultValue = xml_old_loadsubset_##unique_name; \
253 	xmlDoValidityCheckingDefaultValue = xml_old_validate_##unique_name; \
254 	(void) xmlPedanticParserDefault(xml_old_pedantic_##unique_name); \
255 	(void) xmlSubstituteEntitiesDefault(xml_old_substitute_##unique_name); \
256 	(void) xmlLineNumbersDefault(xml_old_linenrs_##unique_name); \
257 	(void) xmlKeepBlanksDefault(xml_old_blanks_##unique_name); \
258 	ZEND_DIAGNOSTIC_IGNORED_END
259 
260 /* Alternative for above, working directly on the context and not setting globals.
261  * Generally faster because no locking is involved, and this has the advantage that it sets the options to a known good value. */
262 static zend_always_inline void php_libxml_sanitize_parse_ctxt_options(xmlParserCtxtPtr ctxt)
263 {
264 	ZEND_DIAGNOSTIC_IGNORED_START("-Wdeprecated-declarations") \
265 	ctxt->loadsubset = 0;
266 	ctxt->validate = 0;
267 	ctxt->pedantic = 0;
268 	ctxt->replaceEntities = 0;
269 	ctxt->linenumbers = 0;
270 	ctxt->keepBlanks = 1;
271 	ctxt->options = 0;
272 	ZEND_DIAGNOSTIC_IGNORED_END
273 }
274 #endif
275 
276 #define phpext_libxml_ptr libxml_module_ptr
277 
278 #endif /* PHP_LIBXML_H */
279