1 /*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | https://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Authors: Shane Caraveo <shane@php.net> |
14 | Wez Furlong <wez@thebrainroom.com> |
15 +----------------------------------------------------------------------+
16 */
17
18 #ifndef PHP_LIBXML_H
19 #define PHP_LIBXML_H
20
21 #ifdef HAVE_LIBXML
22 extern zend_module_entry libxml_module_entry;
23 #define libxml_module_ptr &libxml_module_entry
24
25 #include "php_version.h"
26 #define PHP_LIBXML_VERSION PHP_VERSION
27
28 #ifdef PHP_WIN32
29 # define PHP_LIBXML_API __declspec(dllexport)
30 #elif defined(__GNUC__) && __GNUC__ >= 4
31 # define PHP_LIBXML_API __attribute__ ((visibility("default")))
32 #else
33 # define PHP_LIBXML_API
34 #endif
35
36 #include "zend_smart_str.h"
37 #include <libxml/tree.h>
38 #include <libxml/parser.h>
39
40 #define LIBXML_SAVE_NOEMPTYTAG 1<<2
41
42 ZEND_BEGIN_MODULE_GLOBALS(libxml)
43 zval stream_context;
44 smart_str error_buffer;
45 zend_llist *error_list;
46 zend_fcall_info_cache entity_loader_callback;
47 bool entity_loader_disabled;
48 ZEND_END_MODULE_GLOBALS(libxml)
49
50 typedef struct _libxml_doc_props {
51 HashTable *classmap;
52 bool formatoutput;
53 bool validateonparse;
54 bool resolveexternals;
55 bool preservewhitespace;
56 bool substituteentities;
57 bool stricterror;
58 bool recover;
59 } libxml_doc_props;
60
61 typedef struct {
62 size_t modification_nr;
63 } php_libxml_cache_tag;
64
65 typedef struct _php_libxml_private_data_header {
66 void (*dtor)(struct _php_libxml_private_data_header *);
67 /* extra fields */
68 } php_libxml_private_data_header;
69
70 /**
71 * Multiple representations are possible of the same underlying node data.
72 * This is the case for example when a SimpleXML node is imported into DOM.
73 * It must not be possible to obtain both a legacy and a modern representation
74 * of the same node, as they have different assumptions. The class_type field
75 * allows us to pin the representation to one of the two. If it is unset, no
76 * representation has been forced upon the node yet, and thus no assumptions
77 * have yet been made. This is the case for example when a SimpleXML node is
78 * created by SimpleXML itself and never leaves SimpleXML.
79 */
80 typedef enum _php_libxml_class_type {
81 PHP_LIBXML_CLASS_UNSET = 0,
82 PHP_LIBXML_CLASS_LEGACY = 1,
83 PHP_LIBXML_CLASS_MODERN = 2,
84 } php_libxml_class_type;
85
86 typedef struct _php_libxml_ref_obj {
87 void *ptr;
88 libxml_doc_props *doc_props;
89 php_libxml_cache_tag cache_tag;
90 php_libxml_private_data_header *private_data;
91 int refcount;
92 php_libxml_class_type class_type;
93 } php_libxml_ref_obj;
94
95 typedef struct _php_libxml_node_ptr {
96 xmlNodePtr node;
97 int refcount;
98 void *_private;
99 } php_libxml_node_ptr;
100
101 typedef struct _php_libxml_node_object {
102 php_libxml_node_ptr *node;
103 php_libxml_ref_obj *document;
104 zend_object std;
105 } php_libxml_node_object;
106
107
php_libxml_node_fetch_object(zend_object * obj)108 static inline php_libxml_node_object *php_libxml_node_fetch_object(zend_object *obj) {
109 return (php_libxml_node_object *)((char*)(obj) - obj->handlers->offset);
110 }
111
php_libxml_invalidate_node_list_cache(php_libxml_ref_obj * doc_ptr)112 static zend_always_inline void php_libxml_invalidate_node_list_cache(php_libxml_ref_obj *doc_ptr)
113 {
114 if (!doc_ptr) {
115 return;
116 }
117 #if SIZEOF_SIZE_T == 8
118 /* If one operation happens every nanosecond, then it would still require 584 years to overflow
119 * the counter. So we'll just assume this never happens. */
120 doc_ptr->cache_tag.modification_nr++;
121 #else
122 size_t new_modification_nr = doc_ptr->cache_tag.modification_nr + 1;
123 if (EXPECTED(new_modification_nr > 0)) { /* unsigned overflow; checking after addition results in one less instruction */
124 doc_ptr->cache_tag.modification_nr = new_modification_nr;
125 }
126 #endif
127 }
128
php_libxml_invalidate_node_list_cache_from_doc(xmlDocPtr docp)129 static zend_always_inline void php_libxml_invalidate_node_list_cache_from_doc(xmlDocPtr docp)
130 {
131 if (docp && docp->_private) { /* docp is NULL for detached nodes */
132 php_libxml_node_ptr *node_private = (php_libxml_node_ptr *) docp->_private;
133 php_libxml_node_object *object_private = (php_libxml_node_object *) node_private->_private;
134 if (object_private) {
135 php_libxml_invalidate_node_list_cache(object_private->document);
136 }
137 }
138 }
139
140 #define Z_LIBXML_NODE_P(zv) php_libxml_node_fetch_object(Z_OBJ_P((zv)))
141
142 typedef void * (*php_libxml_export_node) (zval *object);
143
144 typedef enum {
145 PHP_LIBXML_ERROR = 0,
146 PHP_LIBXML_CTX_ERROR = 1,
147 PHP_LIBXML_CTX_WARNING = 2,
148 } php_libxml_error_level;
149
150 PHP_LIBXML_API int php_libxml_increment_node_ptr(php_libxml_node_object *object, xmlNodePtr node, void *private_data);
151 PHP_LIBXML_API int php_libxml_decrement_node_ptr(php_libxml_node_object *object);
152 PHP_LIBXML_API int php_libxml_increment_doc_ref(php_libxml_node_object *object, xmlDocPtr docp);
153 PHP_LIBXML_API int php_libxml_decrement_doc_ref_directly(php_libxml_ref_obj *document);
154 PHP_LIBXML_API int php_libxml_decrement_doc_ref(php_libxml_node_object *object);
155 PHP_LIBXML_API xmlNodePtr php_libxml_import_node(zval *object);
156 PHP_LIBXML_API zval *php_libxml_register_export(zend_class_entry *ce, php_libxml_export_node export_function);
157 /* When an explicit freeing of node and children is required */
158 PHP_LIBXML_API void php_libxml_node_free_list(xmlNodePtr node);
159 PHP_LIBXML_API void php_libxml_node_free_resource(xmlNodePtr node);
160 /* When object dtor is called as node may still be referenced */
161 PHP_LIBXML_API void php_libxml_node_decrement_resource(php_libxml_node_object *object);
162 PHP_LIBXML_API void php_libxml_error_handler(void *ctx, const char *msg, ...);
163 PHP_LIBXML_API void php_libxml_ctx_warning(void *ctx, const char *msg, ...);
164 PHP_LIBXML_API void php_libxml_pretend_ctx_error_ex(const char *file, int line, int column, const char *msg,...);
165 PHP_LIBXML_API void php_libxml_ctx_error(void *ctx, const char *msg, ...);
166 PHP_LIBXML_API void php_libxml_error_handler_va(php_libxml_error_level error_type, void *ctx, const char *msg, va_list args);
167 PHP_LIBXML_API int php_libxml_xmlCheckUTF8(const unsigned char *s);
168 PHP_LIBXML_API void php_libxml_switch_context(zval *context, zval *oldcontext);
169 PHP_LIBXML_API void php_libxml_issue_error(int level, const char *msg);
170 PHP_LIBXML_API bool php_libxml_disable_entity_loader(bool disable);
171 PHP_LIBXML_API void php_libxml_set_old_ns(xmlDocPtr doc, xmlNsPtr ns);
172 PHP_LIBXML_API php_stream_context *php_libxml_get_stream_context(void);
173 PHP_LIBXML_API bool php_libxml_uses_internal_errors(void);
174
175 PHP_LIBXML_API zend_string *php_libxml_sniff_charset_from_string(const char *start, const char *end);
176 PHP_LIBXML_API zend_string *php_libxml_sniff_charset_from_stream(const php_stream *s);
177
178 /* Init/shutdown functions*/
179 PHP_LIBXML_API void php_libxml_initialize(void);
180 PHP_LIBXML_API void php_libxml_shutdown(void);
181
182 #define LIBXML(v) ZEND_MODULE_GLOBALS_ACCESSOR(libxml, v)
183
184 #if defined(ZTS) && defined(COMPILE_DL_LIBXML)
ZEND_TSRMLS_CACHE_EXTERN()185 ZEND_TSRMLS_CACHE_EXTERN()
186 #endif
187
188 /* Other extension may override the global state options, these global options
189 * are copied initially to ctxt->options. Set the options to a known good value.
190 * See libxml2 globals.c and parserInternals.c.
191 * The unique_name argument allows multiple sanitizes and restores within the
192 * same function, even nested is necessary. */
193 # define PHP_LIBXML_SANITIZE_GLOBALS(unique_name) \
194 ZEND_DIAGNOSTIC_IGNORED_START("-Wdeprecated-declarations") \
195 int xml_old_loadsubset_##unique_name = xmlLoadExtDtdDefaultValue; \
196 xmlLoadExtDtdDefaultValue = 0; \
197 int xml_old_validate_##unique_name = xmlDoValidityCheckingDefaultValue; \
198 xmlDoValidityCheckingDefaultValue = 0; \
199 int xml_old_pedantic_##unique_name = xmlPedanticParserDefault(0); \
200 int xml_old_substitute_##unique_name = xmlSubstituteEntitiesDefault(0); \
201 int xml_old_linenrs_##unique_name = xmlLineNumbersDefault(0); \
202 int xml_old_blanks_##unique_name = xmlKeepBlanksDefault(1); \
203 ZEND_DIAGNOSTIC_IGNORED_END
204
205 # define PHP_LIBXML_RESTORE_GLOBALS(unique_name) \
206 ZEND_DIAGNOSTIC_IGNORED_START("-Wdeprecated-declarations") \
207 xmlLoadExtDtdDefaultValue = xml_old_loadsubset_##unique_name; \
208 xmlDoValidityCheckingDefaultValue = xml_old_validate_##unique_name; \
209 (void) xmlPedanticParserDefault(xml_old_pedantic_##unique_name); \
210 (void) xmlSubstituteEntitiesDefault(xml_old_substitute_##unique_name); \
211 (void) xmlLineNumbersDefault(xml_old_linenrs_##unique_name); \
212 (void) xmlKeepBlanksDefault(xml_old_blanks_##unique_name); \
213 ZEND_DIAGNOSTIC_IGNORED_END
214
215 /* Alternative for above, working directly on the context and not setting globals.
216 * Generally faster because no locking is involved, and this has the advantage that it sets the options to a known good value. */
217 static zend_always_inline void php_libxml_sanitize_parse_ctxt_options(xmlParserCtxtPtr ctxt)
218 {
219 ctxt->loadsubset = 0;
220 ctxt->validate = 0;
221 ctxt->pedantic = 0;
222 ctxt->replaceEntities = 0;
223 ctxt->linenumbers = 0;
224 ctxt->keepBlanks = 1;
225 ctxt->options = 0;
226 }
227
228 #else /* HAVE_LIBXML */
229 #define libxml_module_ptr NULL
230 #endif
231
232 #define phpext_libxml_ptr libxml_module_ptr
233
234 #endif /* PHP_LIBXML_H */
235