1 /*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | https://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Authors: Christian Stocker <chregu@php.net> |
14 | Rob Richards <rrichards@php.net> |
15 | Marcus Borger <helly@php.net> |
16 +----------------------------------------------------------------------+
17 */
18
19 #ifndef PHP_DOM_H
20 #define PHP_DOM_H
21
22 extern zend_module_entry dom_module_entry;
23 #define phpext_dom_ptr &dom_module_entry
24
25 #ifdef ZTS
26 #include "TSRM.h"
27 #endif
28
29 #include <libxml/parser.h>
30 #include <libxml/parserInternals.h>
31 #include <libxml/tree.h>
32 #include <libxml/uri.h>
33 #include <libxml/xmlerror.h>
34 #include <libxml/xinclude.h>
35 #include <libxml/hash.h>
36 #include <libxml/c14n.h>
37 #ifdef LIBXML_HTML_ENABLED
38 #include <libxml/HTMLparser.h>
39 #include <libxml/HTMLtree.h>
40 #endif
41 #ifdef LIBXML_XPATH_ENABLED
42 #include <libxml/xpath.h>
43 #include <libxml/xpathInternals.h>
44 #endif
45 #ifdef LIBXML_XPTR_ENABLED
46 #include <libxml/xpointer.h>
47 #endif
48 #ifdef PHP_WIN32
49 #ifndef DOM_EXPORTS
50 #define DOM_EXPORTS
51 #endif
52 #endif
53
54 #include "xml_common.h"
55 #include "ext/libxml/php_libxml.h"
56 #include "xpath_callbacks.h"
57 #include "zend_exceptions.h"
58 #include "dom_ce.h"
59 /* DOM API_VERSION, please bump it up, if you change anything in the API
60 therefore it's easier for the script-programmers to check, what's working how
61 Can be checked with phpversion("dom");
62 */
63 #define DOM_API_VERSION "20031129"
64 /* Define a custom type for iterating using an unused nodetype */
65 #define DOM_NODESET XML_XINCLUDE_START
66
67 typedef struct _dom_xpath_object {
68 php_dom_xpath_callbacks xpath_callbacks;
69 bool register_node_ns;
70 dom_object dom;
71 } dom_xpath_object;
72
php_xpath_obj_from_obj(zend_object * obj)73 static inline dom_xpath_object *php_xpath_obj_from_obj(zend_object *obj) {
74 return (dom_xpath_object*)((char*)(obj)
75 - XtOffsetOf(dom_xpath_object, dom) - XtOffsetOf(dom_object, std));
76 }
77
78 #define Z_XPATHOBJ_P(zv) php_xpath_obj_from_obj(Z_OBJ_P((zv)))
79
80 typedef struct _dom_nnodemap_object {
81 dom_object *baseobj;
82 zval baseobj_zv;
83 int nodetype;
84 int cached_length;
85 xmlHashTable *ht;
86 xmlChar *local, *local_lower;
87 xmlChar *ns;
88 php_libxml_cache_tag cache_tag;
89 dom_object *cached_obj;
90 int cached_obj_index;
91 bool free_local : 1;
92 bool free_ns : 1;
93 } dom_nnodemap_object;
94
95 typedef struct {
96 zend_object_iterator intern;
97 zval curobj;
98 HashPosition pos;
99 php_libxml_cache_tag cache_tag;
100 } php_dom_iterator;
101
102 typedef struct {
103 /* This may be a fake object that isn't actually in the children list of the parent.
104 * This is because some namespace declaration nodes aren't stored on the parent in libxml2, so we have to fake it.
105 * We could use a zval for this, but since this is always going to be an object let's save space... */
106 dom_object *parent_intern;
107 dom_object dom;
108 } dom_object_namespace_node;
109
110 typedef enum _dom_iterator_type {
111 DOM_NODELIST,
112 DOM_NAMEDNODEMAP,
113 DOM_DTD_NAMEDNODEMAP,
114 DOM_HTMLCOLLECTION,
115 } dom_iterator_type;
116
php_dom_namespace_node_obj_from_obj(zend_object * obj)117 static inline dom_object_namespace_node *php_dom_namespace_node_obj_from_obj(zend_object *obj) {
118 return (dom_object_namespace_node*)((char*)(obj) - XtOffsetOf(dom_object_namespace_node, dom.std));
119 }
120
121 #include "domexception.h"
122
123 #define DOM_HTML_NO_DEFAULT_NS (1U << 31)
124
125 void dom_objects_free_storage(zend_object *object);
126 dom_doc_propsptr dom_get_doc_props(php_libxml_ref_obj *document);
127 libxml_doc_props const* dom_get_doc_props_read_only(const php_libxml_ref_obj *document);
128 zend_object *dom_objects_new(zend_class_entry *class_type);
129 zend_object *dom_nnodemap_objects_new(zend_class_entry *class_type);
130 #ifdef LIBXML_XPATH_ENABLED
131 zend_object *dom_xpath_objects_new(zend_class_entry *class_type);
132 #endif
133 bool dom_get_strict_error(php_libxml_ref_obj *document);
134 void node_list_unlink(xmlNodePtr node);
135 int dom_check_qname(char *qname, char **localname, char **prefix, int uri_len, int name_len);
136 xmlNsPtr dom_get_ns(xmlNodePtr node, char *uri, int *errorcode, char *prefix);
137 xmlNsPtr dom_get_ns_unchecked(xmlNodePtr nodep, char *uri, char *prefix);
138 void dom_reconcile_ns(xmlDocPtr doc, xmlNodePtr nodep);
139 void dom_reconcile_ns_list(xmlDocPtr doc, xmlNodePtr nodep, xmlNodePtr last);
140 xmlNsPtr dom_get_nsdecl(xmlNode *node, xmlChar *localName);
141 void php_dom_normalize_legacy(xmlNodePtr nodep);
142 void php_dom_normalize_modern(xmlNodePtr nodep);
143 xmlNode *dom_get_elements_by_tag_name_ns_raw(xmlNodePtr basep, xmlNodePtr nodep, xmlChar *ns, xmlChar *local, xmlChar *local_lower, int *cur, int index);
144 void php_dom_create_implementation(zval *retval, bool modern);
145 int dom_hierarchy(xmlNodePtr parent, xmlNodePtr child);
146 bool dom_has_feature(zend_string *feature, zend_string *version);
147 int dom_node_is_read_only(const xmlNode *node);
148 bool dom_node_children_valid(const xmlNode *node);
149 void php_dom_create_iterator(zval *return_value, dom_iterator_type iterator_type, bool modern);
150 void dom_namednode_iter(dom_object *basenode, int ntype, dom_object *intern, xmlHashTablePtr ht, const char *local, size_t local_len, const char *ns, size_t ns_len);
151 xmlNodePtr create_notation(const xmlChar *name, const xmlChar *ExternalID, const xmlChar *SystemID);
152 xmlNode *php_dom_libxml_hash_iter(dom_nnodemap_object *objmap, int index);
153 zend_object_iterator *php_dom_get_iterator(zend_class_entry *ce, zval *object, int by_ref);
154 void dom_set_doc_classmap(php_libxml_ref_obj *document, zend_class_entry *basece, zend_class_entry *ce);
155 xmlNodePtr php_dom_create_fake_namespace_decl(xmlNodePtr nodep, xmlNsPtr original, zval *return_value, dom_object *parent_intern);
156 void php_dom_get_content_into_zval(const xmlNode *nodep, zval *target, bool default_is_null);
157 zend_string *dom_node_concatenated_name_helper(size_t name_len, const char *name, size_t prefix_len, const char *prefix);
158 zend_string *dom_node_get_node_name_attribute_or_element(const xmlNode *nodep, bool uppercase);
159 bool php_dom_is_node_connected(const xmlNode *node);
160 bool php_dom_adopt_node(xmlNodePtr nodep, dom_object *dom_object_new_document, xmlDocPtr new_document);
161 xmlNsPtr dom_get_ns_resolve_prefix_conflict(xmlNodePtr tree, const char *uri);
162 int dom_validate_and_extract(const zend_string *namespace, const zend_string *qname, xmlChar **localName, xmlChar **prefix);
163 bool dom_match_qualified_name_according_to_spec(const xmlChar *qname, const xmlNode *nodep);
164 bool php_dom_has_sibling_following_node(xmlNodePtr node, xmlElementType type);
165 bool php_dom_has_sibling_preceding_node(xmlNodePtr node, xmlElementType type);
166 bool php_dom_has_child_of_type(xmlNodePtr node, xmlElementType type);
167 void php_dom_update_document_after_clone(dom_object *original, xmlNodePtr original_node, dom_object *clone, xmlNodePtr cloned_node);
168 xmlAttrPtr php_dom_get_attribute_node(xmlNodePtr elem, const xmlChar *name, size_t name_len);
169 xmlChar *php_dom_libxml_fix_file_path(xmlChar *path);
170 void dom_document_convert_to_modern(php_libxml_ref_obj *document, xmlDocPtr lxml_doc);
171 dom_object *php_dom_instantiate_object_helper(zval *return_value, zend_class_entry *ce, xmlNodePtr obj, dom_object *parent);
172 xmlDocPtr php_dom_create_html_doc(void);
173 xmlEntityPtr dom_entity_reference_fetch_and_sync_declaration(xmlNodePtr reference);
174 void dom_set_xml_class(php_libxml_ref_obj *document);
175 const char *dom_locate_a_namespace(const xmlNode *node, const zend_string *prefix);
176 void dom_mark_namespaces_as_attributes_too(php_dom_libxml_ns_mapper *ns_mapper, xmlDocPtr doc);
177 bool dom_compare_value(const xmlAttr *attr, const xmlChar *value);
178 void dom_attr_value_will_change(dom_object *obj, xmlAttrPtr attrp);
179
180 typedef enum {
181 DOM_LOAD_STRING = 0,
182 DOM_LOAD_FILE = 1,
183 } dom_load_mode;
184
185 #define DOM_DOCUMENT_MALFORMED ((xmlDocPtr) -1)
186
187 xmlDocPtr dom_document_parser(zval *id, dom_load_mode mode, const char *source, size_t source_len, size_t options, xmlCharEncodingHandlerPtr encoding);
188
189 /* parentnode */
190 void dom_parent_node_prepend(dom_object *context, zval *nodes, uint32_t nodesc);
191 void dom_parent_node_append(dom_object *context, zval *nodes, uint32_t nodesc);
192 void dom_parent_node_after(dom_object *context, zval *nodes, uint32_t nodesc);
193 void dom_parent_node_before(dom_object *context, zval *nodes, uint32_t nodesc);
194 void dom_parent_node_replace_children(dom_object *context, zval *nodes, uint32_t nodesc);
195 void dom_child_node_remove(dom_object *context);
196 void dom_child_replace_with(dom_object *context, zval *nodes, uint32_t nodesc);
197 void dom_remove_all_children(xmlNodePtr nodep);
198 bool php_dom_fragment_insertion_hierarchy_check_pre_insertion(xmlNodePtr parent, xmlNodePtr node, xmlNodePtr child);
199 bool php_dom_fragment_insertion_hierarchy_check_replace(xmlNodePtr parent, xmlNodePtr node, xmlNodePtr child);
200 void php_dom_node_append(php_libxml_ref_obj *document, xmlNodePtr node, xmlNodePtr parent);
201 bool php_dom_pre_insert(php_libxml_ref_obj *document, xmlNodePtr node, xmlNodePtr parent, xmlNodePtr insertion_point);
202 bool php_dom_pre_insert_is_parent_invalid(xmlNodePtr parent);
203 void dom_parent_node_query_selector(xmlNodePtr thisp, dom_object *intern, zval *return_value, const zend_string *selectors_str);
204 void dom_parent_node_query_selector_all(xmlNodePtr thisp, dom_object *intern, zval *return_value, const zend_string *selectors_str);
205 void dom_element_matches(xmlNodePtr thisp, dom_object *intern, zval *return_value, const zend_string *selectors_str);
206 void dom_element_closest(xmlNodePtr thisp, dom_object *intern, zval *return_value, const zend_string *selectors_str);
207
208 /* nodemap and nodelist APIs */
209 xmlNodePtr php_dom_named_node_map_get_named_item(dom_nnodemap_object *objmap, const zend_string *named, bool may_transform);
210 void php_dom_named_node_map_get_named_item_into_zval(dom_nnodemap_object *objmap, const zend_string *named, zval *return_value);
211 xmlNodePtr php_dom_named_node_map_get_item(dom_nnodemap_object *objmap, zend_long index);
212 void php_dom_named_node_map_get_item_into_zval(dom_nnodemap_object *objmap, zend_long index, zval *return_value);
213 int php_dom_get_namednodemap_length(dom_object *obj);
214
215 xmlNodePtr dom_clone_node(php_dom_libxml_ns_mapper *ns_mapper, xmlNodePtr node, xmlDocPtr doc, bool recursive);
216
217 #define DOM_GET_INTERN(__id, __intern) { \
218 __intern = Z_DOMOBJ_P(__id); \
219 if (UNEXPECTED(__intern->ptr == NULL)) { \
220 zend_throw_error(NULL, "Couldn't fetch %s", ZSTR_VAL(__intern->std.ce->name));\
221 RETURN_THROWS();\
222 } \
223 }
224
225 #define DOM_GET_THIS_INTERN(__intern) DOM_GET_INTERN(ZEND_THIS, __intern)
226
227 #define DOM_GET_OBJ(__ptr, __id, __prtype, __intern) { \
228 DOM_GET_INTERN(__id, __intern); \
229 __ptr = (__prtype)((php_libxml_node_ptr *)__intern->ptr)->node; \
230 }
231
php_dom_is_cache_tag_stale_from_doc_ptr(const php_libxml_cache_tag * cache_tag,const php_libxml_ref_obj * doc_ptr)232 static zend_always_inline bool php_dom_is_cache_tag_stale_from_doc_ptr(const php_libxml_cache_tag *cache_tag, const php_libxml_ref_obj *doc_ptr)
233 {
234 ZEND_ASSERT(doc_ptr != NULL);
235 return php_libxml_is_cache_tag_stale(cache_tag, &doc_ptr->cache_tag);
236 }
237
php_dom_is_cache_tag_stale_from_node(const php_libxml_cache_tag * cache_tag,const xmlNodePtr node)238 static zend_always_inline bool php_dom_is_cache_tag_stale_from_node(const php_libxml_cache_tag *cache_tag, const xmlNodePtr node)
239 {
240 ZEND_ASSERT(node != NULL);
241 php_libxml_node_ptr *_private = node->_private;
242 if (!_private) {
243 return true;
244 }
245 php_libxml_node_object *object_private = _private->_private;
246 if (!object_private || !object_private->document) {
247 return true;
248 }
249 return php_dom_is_cache_tag_stale_from_doc_ptr(cache_tag, object_private->document);
250 }
251
php_dom_mark_cache_tag_up_to_date_from_node(php_libxml_cache_tag * cache_tag,const xmlNodePtr node)252 static zend_always_inline void php_dom_mark_cache_tag_up_to_date_from_node(php_libxml_cache_tag *cache_tag, const xmlNodePtr node)
253 {
254 ZEND_ASSERT(cache_tag != NULL);
255 php_libxml_node_ptr *_private = node->_private;
256 if (_private) {
257 php_libxml_node_object *object_private = _private->_private;
258 if (object_private->document) {
259 cache_tag->modification_nr = object_private->document->cache_tag.modification_nr;
260 }
261 }
262 }
263
php_dom_mark_cache_tag_up_to_date_from_doc_ref(php_libxml_cache_tag * cache_tag,php_libxml_ref_obj * document)264 static zend_always_inline void php_dom_mark_cache_tag_up_to_date_from_doc_ref(php_libxml_cache_tag *cache_tag, php_libxml_ref_obj *document)
265 {
266 ZEND_ASSERT(cache_tag != NULL);
267 ZEND_ASSERT(document != NULL);
268 cache_tag->modification_nr = document->cache_tag.modification_nr;
269 }
270
php_dom_follow_spec_node(const xmlNode * node)271 static zend_always_inline bool php_dom_follow_spec_node(const xmlNode *node)
272 {
273 ZEND_ASSERT(node != NULL);
274 php_libxml_node_ptr *_private = node->_private;
275 if (_private) {
276 php_libxml_node_object *object_private = _private->_private;
277 if (object_private->document) {
278 return php_dom_follow_spec_doc_ref(object_private->document);
279 }
280 }
281 return false;
282 }
283
284 /* Returns the first child of a container node (e.g. elements, fragments, documents, ...). */
php_dom_first_child_of_container_node(xmlNodePtr parent)285 static zend_always_inline xmlNodePtr php_dom_first_child_of_container_node(xmlNodePtr parent)
286 {
287 if (parent->type == XML_DOCUMENT_NODE || parent->type == XML_HTML_DOCUMENT_NODE) {
288 return xmlDocGetRootElement((xmlDoc *) parent);
289 } else {
290 return parent->children;
291 }
292 }
293
294 PHP_MINIT_FUNCTION(dom);
295 PHP_MSHUTDOWN_FUNCTION(dom);
296 PHP_MINFO_FUNCTION(dom);
297
298 #endif /* PHP_DOM_H */
299