xref: /php-src/ext/soap/php_xml.c (revision cc046426)
1 /*
2   +----------------------------------------------------------------------+
3   | Copyright (c) The PHP Group                                          |
4   +----------------------------------------------------------------------+
5   | This source file is subject to version 3.01 of the PHP license,      |
6   | that is bundled with this package in the file LICENSE, and is        |
7   | available through the world-wide-web at the following url:           |
8   | https://www.php.net/license/3_01.txt                                 |
9   | If you did not receive a copy of the PHP license and are unable to   |
10   | obtain it through the world-wide-web, please send a note to          |
11   | license@php.net so we can mail you a copy immediately.               |
12   +----------------------------------------------------------------------+
13   | Authors: Brad Lafountain <rodif_bl@yahoo.com>                        |
14   |          Shane Caraveo <shane@caraveo.com>                           |
15   |          Dmitry Stogov <dmitry@php.net>                              |
16   +----------------------------------------------------------------------+
17 */
18 
19 #include "php_soap.h"
20 #include "ext/libxml/php_libxml.h"
21 #include "libxml/parser.h"
22 #include "libxml/parserInternals.h"
23 
24 /* Channel libxml file io layer through the PHP streams subsystem.
25  * This allows use of ftps:// and https:// urls */
26 
is_blank(const xmlChar * str)27 static bool is_blank(const xmlChar* str)
28 {
29 	while (*str != '\0') {
30 		if (*str != ' '  && *str != 0x9 && *str != 0xa && *str != 0xd) {
31 			return false;
32 		}
33 		str++;
34 	}
35 	return true;
36 }
37 
38 /* removes all empty text, comments and other insignoficant nodes */
cleanup_xml_node(xmlNodePtr node)39 static void cleanup_xml_node(xmlNodePtr node)
40 {
41 	xmlNodePtr trav;
42 	xmlNodePtr del = NULL;
43 
44 	trav = node->children;
45 	while (trav != NULL) {
46 		if (del != NULL) {
47 			xmlUnlinkNode(del);
48 			xmlFreeNode(del);
49 			del = NULL;
50 		}
51 		if (trav->type == XML_TEXT_NODE) {
52 			if (is_blank(trav->content)) {
53 				del = trav;
54 			}
55 		} else if ((trav->type != XML_ELEMENT_NODE) &&
56 		           (trav->type != XML_CDATA_SECTION_NODE)) {
57 			del = trav;
58 		} else if (trav->children != NULL) {
59 			cleanup_xml_node(trav);
60 		}
61 		trav = trav->next;
62 	}
63 	if (del != NULL) {
64 		xmlUnlinkNode(del);
65 		xmlFreeNode(del);
66 	}
67 }
68 
soap_ignorableWhitespace(void * ctx,const xmlChar * ch,int len)69 static void soap_ignorableWhitespace(void *ctx, const xmlChar *ch, int len)
70 {
71 }
72 
soap_Comment(void * ctx,const xmlChar * value)73 static void soap_Comment(void *ctx, const xmlChar *value)
74 {
75 }
76 
soap_xmlParseFile(const char * filename)77 xmlDocPtr soap_xmlParseFile(const char *filename)
78 {
79 	xmlParserCtxtPtr ctxt = NULL;
80 	xmlDocPtr ret;
81 	bool old_allow_url_fopen;
82 
83 /*
84 	xmlInitParser();
85 */
86 
87 	old_allow_url_fopen = PG(allow_url_fopen);
88 	PG(allow_url_fopen) = 1;
89 	ctxt = xmlCreateFileParserCtxt(filename);
90 	PG(allow_url_fopen) = old_allow_url_fopen;
91 	if (ctxt) {
92 		bool old;
93 
94 		php_libxml_sanitize_parse_ctxt_options(ctxt);
95 		/* TODO: In libxml2 2.14.0 change this to the new options API so we don't rely on deprecated APIs. */
96 		ZEND_DIAGNOSTIC_IGNORED_START("-Wdeprecated-declarations")
97 		ctxt->keepBlanks = 0;
98 		ctxt->options |= XML_PARSE_HUGE;
99 		ZEND_DIAGNOSTIC_IGNORED_END
100 		ctxt->sax->ignorableWhitespace = soap_ignorableWhitespace;
101 		ctxt->sax->comment = soap_Comment;
102 		ctxt->sax->warning = NULL;
103 		ctxt->sax->error = NULL;
104 		/*ctxt->sax->fatalError = NULL;*/
105 		old = php_libxml_disable_entity_loader(1);
106 		xmlParseDocument(ctxt);
107 		php_libxml_disable_entity_loader(old);
108 		if (ctxt->wellFormed) {
109 			ret = ctxt->myDoc;
110 			if (ret->URL == NULL && ctxt->directory != NULL) {
111 				ret->URL = xmlCharStrdup(ctxt->directory);
112 			}
113 		} else {
114 			ret = NULL;
115 			xmlFreeDoc(ctxt->myDoc);
116 			ctxt->myDoc = NULL;
117 		}
118 		xmlFreeParserCtxt(ctxt);
119 	} else {
120 		ret = NULL;
121 	}
122 
123 /*
124 	xmlCleanupParser();
125 */
126 
127 	if (ret) {
128 		cleanup_xml_node((xmlNodePtr)ret);
129 	}
130 	return ret;
131 }
132 
soap_xmlParseMemory(const void * buf,size_t buf_size)133 xmlDocPtr soap_xmlParseMemory(const void *buf, size_t buf_size)
134 {
135 	xmlParserCtxtPtr ctxt = NULL;
136 	xmlDocPtr ret;
137 
138 
139 /*
140 	xmlInitParser();
141 */
142 	ctxt = xmlCreateMemoryParserCtxt(buf, buf_size);
143 	if (ctxt) {
144 		bool old;
145 
146 		php_libxml_sanitize_parse_ctxt_options(ctxt);
147 		ctxt->sax->ignorableWhitespace = soap_ignorableWhitespace;
148 		ctxt->sax->comment = soap_Comment;
149 		ctxt->sax->warning = NULL;
150 		ctxt->sax->error = NULL;
151 		/*ctxt->sax->fatalError = NULL;*/
152 		/* TODO: In libxml2 2.14.0 change this to the new options API so we don't rely on deprecated APIs. */
153 		ZEND_DIAGNOSTIC_IGNORED_START("-Wdeprecated-declarations")
154 		ctxt->options |= XML_PARSE_HUGE;
155 		ZEND_DIAGNOSTIC_IGNORED_END
156 		old = php_libxml_disable_entity_loader(1);
157 		xmlParseDocument(ctxt);
158 		php_libxml_disable_entity_loader(old);
159 		if (ctxt->wellFormed) {
160 			ret = ctxt->myDoc;
161 			if (ret->URL == NULL && ctxt->directory != NULL) {
162 				ret->URL = xmlCharStrdup(ctxt->directory);
163 			}
164 		} else {
165 			ret = NULL;
166 			xmlFreeDoc(ctxt->myDoc);
167 			ctxt->myDoc = NULL;
168 		}
169 		xmlFreeParserCtxt(ctxt);
170 	} else {
171 		ret = NULL;
172 	}
173 
174 /*
175 	xmlCleanupParser();
176 */
177 
178 /*
179 	if (ret) {
180 		cleanup_xml_node((xmlNodePtr)ret);
181 	}
182 */
183 	return ret;
184 }
185 
attr_find_ns(xmlAttrPtr node)186 xmlNsPtr attr_find_ns(xmlAttrPtr node)
187 {
188 	if (node->ns) {
189 		return node->ns;
190 	} else if (node->parent->ns) {
191 		return node->parent->ns;
192 	} else {
193 		return xmlSearchNs(node->doc, node->parent, NULL);
194 	}
195 }
196 
node_find_ns(xmlNodePtr node)197 xmlNsPtr node_find_ns(xmlNodePtr node)
198 {
199 	if (node->ns) {
200 		return node->ns;
201 	} else {
202 		return xmlSearchNs(node->doc, node, NULL);
203 	}
204 }
205 
attr_is_equal_ex(xmlAttrPtr node,char * name,char * ns)206 int attr_is_equal_ex(xmlAttrPtr node, char *name, char *ns)
207 {
208 	if (name == NULL || ((node->name) && strcmp((char*)node->name, name) == 0)) {
209 		if (ns) {
210 			xmlNsPtr nsPtr = attr_find_ns(node);
211 			if (nsPtr) {
212 				return (strcmp((char*)nsPtr->href, ns) == 0);
213 			} else {
214 				return FALSE;
215 			}
216 		}
217 		return TRUE;
218 	}
219 	return FALSE;
220 }
221 
node_is_equal_ex(xmlNodePtr node,char * name,char * ns)222 int node_is_equal_ex(xmlNodePtr node, char *name, char *ns)
223 {
224 	if (name == NULL || ((node->name) && strcmp((char*)node->name, name) == 0)) {
225 		if (ns) {
226 			xmlNsPtr nsPtr = node_find_ns(node);
227 			if (nsPtr) {
228 				return (strcmp((char*)nsPtr->href, ns) == 0);
229 			} else {
230 				return FALSE;
231 			}
232 		}
233 		return TRUE;
234 	}
235 	return FALSE;
236 }
237 
238 
get_attribute_ex(xmlAttrPtr node,char * name,char * ns)239 xmlAttrPtr get_attribute_ex(xmlAttrPtr node, char *name, char *ns)
240 {
241 	while (node!=NULL) {
242 		if (attr_is_equal_ex(node, name, ns)) {
243 			return node;
244 		}
245 		node = node->next;
246 	}
247 	return NULL;
248 }
249 
get_node_ex(xmlNodePtr node,char * name,char * ns)250 xmlNodePtr get_node_ex(xmlNodePtr node, char *name, char *ns)
251 {
252 	while (node!=NULL) {
253 		if (node_is_equal_ex(node, name, ns)) {
254 			return node;
255 		}
256 		node = node->next;
257 	}
258 	return NULL;
259 }
260 
get_node_recurisve_ex(xmlNodePtr node,char * name,char * ns)261 xmlNodePtr get_node_recurisve_ex(xmlNodePtr node, char *name, char *ns)
262 {
263 	while (node != NULL) {
264 		if (node_is_equal_ex(node, name, ns)) {
265 			return node;
266 		} else if (node->children != NULL) {
267 			xmlNodePtr tmp = get_node_recurisve_ex(node->children, name, ns);
268 			if (tmp) {
269 				return tmp;
270 			}
271 		}
272 		node = node->next;
273 	}
274 	return NULL;
275 }
276 
get_node_with_attribute_ex(xmlNodePtr node,char * name,char * name_ns,char * attribute,char * value,char * attr_ns)277 xmlNodePtr get_node_with_attribute_ex(xmlNodePtr node, char *name, char *name_ns, char *attribute, char *value, char *attr_ns)
278 {
279 	xmlAttrPtr attr;
280 
281 	while (node != NULL) {
282 		if (name != NULL) {
283 			node = get_node_ex(node, name, name_ns);
284 			if (node==NULL) {
285 				return NULL;
286 			}
287 		}
288 
289 		attr = get_attribute_ex(node->properties, attribute, attr_ns);
290 		if (attr != NULL && strcmp((char*)attr->children->content, value) == 0) {
291 			return node;
292 		}
293 		node = node->next;
294 	}
295 	return NULL;
296 }
297 
get_node_with_attribute_recursive_ex(xmlNodePtr node,char * name,char * name_ns,char * attribute,char * value,char * attr_ns)298 xmlNodePtr get_node_with_attribute_recursive_ex(xmlNodePtr node, char *name, char *name_ns, char *attribute, char *value, char *attr_ns)
299 {
300 	while (node != NULL) {
301 		if (node_is_equal_ex(node, name, name_ns)) {
302 			xmlAttrPtr attr = get_attribute_ex(node->properties, attribute, attr_ns);
303 			if (attr != NULL && strcmp((char*)attr->children->content, value) == 0) {
304 				return node;
305 			}
306 		}
307 		if (node->children != NULL) {
308 			xmlNodePtr tmp = get_node_with_attribute_recursive_ex(node->children, name, name_ns, attribute, value, attr_ns);
309 			if (tmp) {
310 				return tmp;
311 			}
312 		}
313 		node = node->next;
314 	}
315 	return NULL;
316 }
317 
318 /* namespace is either a copy or NULL, value is never NULL and never a copy. */
parse_namespace(const xmlChar * inval,const char ** value,char ** namespace)319 void parse_namespace(const xmlChar *inval, const char **value, char **namespace)
320 {
321 	const char *found = strrchr((const char *) inval, ':');
322 
323 	if (found != NULL && found != (const char *) inval) {
324 		(*namespace) = estrndup((const char *) inval, found - (const char *) inval);
325 		(*value) = ++found;
326 	} else {
327 		(*value) = (const char *) inval;
328 		(*namespace) = NULL;
329 	}
330 }
331