xref: /php-src/ext/dom/xpath.c (revision a136117e)
1 /*
2    +----------------------------------------------------------------------+
3    | Copyright (c) The PHP Group                                          |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | https://www.php.net/license/3_01.txt                                 |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Authors: Christian Stocker <chregu@php.net>                          |
14    |          Rob Richards <rrichards@php.net>                            |
15    +----------------------------------------------------------------------+
16 */
17 
18 #ifdef HAVE_CONFIG_H
19 #include "config.h"
20 #endif
21 
22 #include "php.h"
23 #if defined(HAVE_LIBXML) && defined(HAVE_DOM)
24 #include "php_dom.h"
25 #include "namespace_compat.h"
26 
27 #define PHP_DOM_XPATH_QUERY 0
28 #define PHP_DOM_XPATH_EVALUATE 1
29 
30 /*
31 * class DOMXPath
32 */
33 
34 #ifdef LIBXML_XPATH_ENABLED
35 
dom_xpath_objects_free_storage(zend_object * object)36 void dom_xpath_objects_free_storage(zend_object *object)
37 {
38 	dom_xpath_object *intern = php_xpath_obj_from_obj(object);
39 
40 	zend_object_std_dtor(&intern->dom.std);
41 
42 	if (intern->dom.ptr != NULL) {
43 		xmlXPathFreeContext((xmlXPathContextPtr) intern->dom.ptr);
44 		php_libxml_decrement_doc_ref((php_libxml_node_object *) &intern->dom);
45 	}
46 
47 	php_dom_xpath_callbacks_dtor(&intern->xpath_callbacks);
48 }
49 
dom_xpath_get_gc(zend_object * object,zval ** table,int * n)50 HashTable *dom_xpath_get_gc(zend_object *object, zval **table, int *n)
51 {
52 	dom_xpath_object *intern = php_xpath_obj_from_obj(object);
53 	return php_dom_xpath_callbacks_get_gc_for_whole_object(&intern->xpath_callbacks, object, table, n);
54 }
55 
dom_xpath_proxy_factory(xmlNodePtr node,zval * child,dom_object * intern,xmlXPathParserContextPtr ctxt)56 static void dom_xpath_proxy_factory(xmlNodePtr node, zval *child, dom_object *intern, xmlXPathParserContextPtr ctxt)
57 {
58 	ZEND_IGNORE_VALUE(ctxt);
59 
60 	ZEND_ASSERT(node->type != XML_NAMESPACE_DECL);
61 
62 	php_dom_create_object(node, child, intern);
63 }
64 
dom_xpath_ext_fetch_intern(xmlXPathParserContextPtr ctxt)65 static dom_xpath_object *dom_xpath_ext_fetch_intern(xmlXPathParserContextPtr ctxt)
66 {
67 	if (UNEXPECTED(!zend_is_executing())) {
68 		xmlGenericError(xmlGenericErrorContext,
69 		"xmlExtFunctionTest: Function called from outside of PHP\n");
70 		return NULL;
71 	}
72 
73 	dom_xpath_object *intern = (dom_xpath_object *) ctxt->context->userData;
74 	if (UNEXPECTED(intern == NULL)) {
75 		xmlGenericError(xmlGenericErrorContext,
76 		"xmlExtFunctionTest: failed to get the internal object\n");
77 		return NULL;
78 	}
79 
80 	return intern;
81 }
82 
dom_xpath_ext_function_php(xmlXPathParserContextPtr ctxt,int nargs,php_dom_xpath_nodeset_evaluation_mode evaluation_mode)83 static void dom_xpath_ext_function_php(xmlXPathParserContextPtr ctxt, int nargs, php_dom_xpath_nodeset_evaluation_mode evaluation_mode) /* {{{ */
84 {
85 	dom_xpath_object *intern = dom_xpath_ext_fetch_intern(ctxt);
86 	if (!intern) {
87 		php_dom_xpath_callbacks_clean_argument_stack(ctxt, nargs);
88 	} else {
89 		php_dom_xpath_callbacks_call_php_ns(&intern->xpath_callbacks, ctxt, nargs, evaluation_mode, &intern->dom, dom_xpath_proxy_factory);
90 	}
91 }
92 /* }}} */
93 
dom_xpath_ext_function_string_php(xmlXPathParserContextPtr ctxt,int nargs)94 static void dom_xpath_ext_function_string_php(xmlXPathParserContextPtr ctxt, int nargs) /* {{{ */
95 {
96 	dom_xpath_ext_function_php(ctxt, nargs, PHP_DOM_XPATH_EVALUATE_NODESET_TO_STRING);
97 }
98 /* }}} */
99 
dom_xpath_ext_function_object_php(xmlXPathParserContextPtr ctxt,int nargs)100 static void dom_xpath_ext_function_object_php(xmlXPathParserContextPtr ctxt, int nargs) /* {{{ */
101 {
102 	dom_xpath_ext_function_php(ctxt, nargs, PHP_DOM_XPATH_EVALUATE_NODESET_TO_NODESET);
103 }
104 /* }}} */
105 
dom_xpath_ext_function_trampoline(xmlXPathParserContextPtr ctxt,int nargs)106 static void dom_xpath_ext_function_trampoline(xmlXPathParserContextPtr ctxt, int nargs)
107 {
108 	dom_xpath_object *intern = dom_xpath_ext_fetch_intern(ctxt);
109 	if (!intern) {
110 		php_dom_xpath_callbacks_clean_argument_stack(ctxt, nargs);
111 	} else {
112 		php_dom_xpath_callbacks_call_custom_ns(&intern->xpath_callbacks, ctxt, nargs, PHP_DOM_XPATH_EVALUATE_NODESET_TO_NODESET, &intern->dom, dom_xpath_proxy_factory);
113 	}
114 }
115 
116 /* {{{ */
dom_xpath_construct(INTERNAL_FUNCTION_PARAMETERS,zend_class_entry * document_ce)117 static void dom_xpath_construct(INTERNAL_FUNCTION_PARAMETERS, zend_class_entry *document_ce)
118 {
119 	zval *doc;
120 	bool register_node_ns = true;
121 	xmlDocPtr docp = NULL;
122 	dom_object *docobj;
123 
124 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "O|b", &doc, document_ce, &register_node_ns) != SUCCESS) {
125 		RETURN_THROWS();
126 	}
127 
128 	DOM_GET_OBJ(docp, doc, xmlDocPtr, docobj);
129 
130 	xmlXPathContextPtr ctx = xmlXPathNewContext(docp);
131 	if (ctx == NULL) {
132 		php_dom_throw_error(INVALID_STATE_ERR, true);
133 		RETURN_THROWS();
134 	}
135 
136 	dom_xpath_object *intern = Z_XPATHOBJ_P(ZEND_THIS);
137 	xmlXPathContextPtr oldctx = intern->dom.ptr;
138 	if (oldctx != NULL) {
139 		php_libxml_decrement_doc_ref((php_libxml_node_object *) &intern->dom);
140 		xmlXPathFreeContext(oldctx);
141 		php_dom_xpath_callbacks_dtor(&intern->xpath_callbacks);
142 		php_dom_xpath_callbacks_ctor(&intern->xpath_callbacks);
143 	}
144 
145 	xmlXPathRegisterFuncNS (ctx, (const xmlChar *) "functionString",
146 					(const xmlChar *) "http://php.net/xpath",
147 					dom_xpath_ext_function_string_php);
148 	xmlXPathRegisterFuncNS (ctx, (const xmlChar *) "function",
149 					(const xmlChar *) "http://php.net/xpath",
150 					dom_xpath_ext_function_object_php);
151 
152 	intern->dom.ptr = ctx;
153 	ctx->userData = (void *)intern;
154 	intern->dom.document = docobj->document;
155 	intern->register_node_ns = register_node_ns;
156 	php_libxml_increment_doc_ref((php_libxml_node_object *) &intern->dom, docp);
157 }
158 
PHP_METHOD(DOMXPath,__construct)159 PHP_METHOD(DOMXPath, __construct)
160 {
161 	dom_xpath_construct(INTERNAL_FUNCTION_PARAM_PASSTHRU, dom_document_class_entry);
162 }
163 
PHP_METHOD(DOM_XPath,__construct)164 PHP_METHOD(DOM_XPath, __construct)
165 {
166 	dom_xpath_construct(INTERNAL_FUNCTION_PARAM_PASSTHRU, dom_abstract_base_document_class_entry);
167 }
168 /* }}} end DOMXPath::__construct */
169 
170 /* {{{ document DOMDocument*/
dom_xpath_document_read(dom_object * obj,zval * retval)171 zend_result dom_xpath_document_read(dom_object *obj, zval *retval)
172 {
173 	xmlDoc *docp = NULL;
174 	xmlXPathContextPtr ctx = (xmlXPathContextPtr) obj->ptr;
175 
176 	if (ctx) {
177 		docp = (xmlDocPtr) ctx->doc;
178 	}
179 
180 	php_dom_create_object((xmlNodePtr) docp, retval, obj);
181 	return SUCCESS;
182 }
183 /* }}} */
184 
185 /* {{{ registerNodeNamespaces bool*/
php_xpath_obj_from_dom_obj(dom_object * obj)186 static inline dom_xpath_object *php_xpath_obj_from_dom_obj(dom_object *obj) {
187 	return (dom_xpath_object*)((char*)(obj) - XtOffsetOf(dom_xpath_object, dom));
188 }
189 
dom_xpath_register_node_ns_read(dom_object * obj,zval * retval)190 zend_result dom_xpath_register_node_ns_read(dom_object *obj, zval *retval)
191 {
192 	ZVAL_BOOL(retval, php_xpath_obj_from_dom_obj(obj)->register_node_ns);
193 
194 	return SUCCESS;
195 }
196 
dom_xpath_register_node_ns_write(dom_object * obj,zval * newval)197 zend_result dom_xpath_register_node_ns_write(dom_object *obj, zval *newval)
198 {
199 	php_xpath_obj_from_dom_obj(obj)->register_node_ns = zend_is_true(newval);
200 
201 	return SUCCESS;
202 }
203 /* }}} */
204 
205 /* {{{ */
PHP_METHOD(DOMXPath,registerNamespace)206 PHP_METHOD(DOMXPath, registerNamespace)
207 {
208 	size_t prefix_len, ns_uri_len;
209 	unsigned char *prefix, *ns_uri;
210 
211 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "ss", &prefix, &prefix_len, &ns_uri, &ns_uri_len) == FAILURE) {
212 		RETURN_THROWS();
213 	}
214 
215 	dom_xpath_object *intern = Z_XPATHOBJ_P(ZEND_THIS);
216 
217 	xmlXPathContextPtr ctxp = intern->dom.ptr;
218 	if (ctxp == NULL) {
219 		zend_throw_error(NULL, "Invalid XPath Context");
220 		RETURN_THROWS();
221 	}
222 
223 	if (xmlXPathRegisterNs(ctxp, prefix, ns_uri) != 0) {
224 		RETURN_FALSE;
225 	}
226 	RETURN_TRUE;
227 }
228 /* }}} */
229 
dom_xpath_iter(zval * baseobj,dom_object * intern)230 static void dom_xpath_iter(zval *baseobj, dom_object *intern) /* {{{ */
231 {
232 	dom_nnodemap_object *mapptr = (dom_nnodemap_object *) intern->ptr;
233 
234 	ZVAL_COPY_VALUE(&mapptr->baseobj_zv, baseobj);
235 	mapptr->nodetype = DOM_NODESET;
236 }
237 /* }}} */
238 
php_xpath_eval(INTERNAL_FUNCTION_PARAMETERS,int type,bool modern)239 static void php_xpath_eval(INTERNAL_FUNCTION_PARAMETERS, int type, bool modern) /* {{{ */
240 {
241 	zval *context = NULL;
242 	xmlNodePtr nodep = NULL;
243 	size_t expr_len, xpath_type;
244 	dom_object *nodeobj;
245 	char *expr;
246 
247 	dom_xpath_object *intern = Z_XPATHOBJ_P(ZEND_THIS);
248 	bool register_node_ns = intern->register_node_ns;
249 
250 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "s|O!b", &expr, &expr_len, &context, modern ? dom_modern_node_class_entry : dom_node_class_entry, &register_node_ns) == FAILURE) {
251 		RETURN_THROWS();
252 	}
253 
254 	xmlXPathContextPtr ctxp = intern->dom.ptr;
255 	if (ctxp == NULL) {
256 		zend_throw_error(NULL, "Invalid XPath Context");
257 		RETURN_THROWS();
258 	}
259 
260 	xmlDocPtr docp = ctxp->doc;
261 	if (docp == NULL) {
262 		if (modern) {
263 			zend_throw_error(NULL, "Invalid XPath Document Pointer");
264 			RETURN_THROWS();
265 		} else {
266 			php_error_docref(NULL, E_WARNING, "Invalid XPath Document Pointer");
267 			RETURN_FALSE;
268 		}
269 	}
270 
271 	if (context != NULL) {
272 		DOM_GET_OBJ(nodep, context, xmlNodePtr, nodeobj);
273 	}
274 
275 	if (!nodep) {
276 		nodep = xmlDocGetRootElement(docp);
277 	}
278 
279 	if (nodep && docp != nodep->doc) {
280 		zend_throw_error(NULL, "Node from wrong document");
281 		RETURN_THROWS();
282 	}
283 
284 	ctxp->node = nodep;
285 
286 	php_dom_in_scope_ns in_scope_ns;
287 	if (register_node_ns && nodep != NULL) {
288 		if (modern) {
289 			php_dom_libxml_ns_mapper *ns_mapper = php_dom_get_ns_mapper(&intern->dom);
290 			in_scope_ns = php_dom_get_in_scope_ns(ns_mapper, nodep);
291 		} else {
292 			in_scope_ns = php_dom_get_in_scope_ns_legacy(nodep);
293 		}
294 		ctxp->namespaces = in_scope_ns.list;
295 		ctxp->nsNr = in_scope_ns.count;
296 	}
297 
298 	xmlXPathObjectPtr xpathobjp = xmlXPathEvalExpression(BAD_CAST expr, ctxp);
299 	ctxp->node = NULL;
300 
301 	if (register_node_ns && nodep != NULL) {
302 		php_dom_in_scope_ns_destroy(&in_scope_ns);
303 		ctxp->namespaces = NULL;
304 		ctxp->nsNr = 0;
305 	}
306 
307 	if (! xpathobjp) {
308 		if (modern) {
309 			if (!EG(exception)) {
310 				zend_throw_error(NULL, "Could not evaluate XPath expression");
311 			}
312 			RETURN_THROWS();
313 		} else {
314 			/* Should have already emit a warning by libxml */
315 			RETURN_FALSE;
316 		}
317 	}
318 
319 	if (type == PHP_DOM_XPATH_QUERY) {
320 		xpath_type = XPATH_NODESET;
321 	} else {
322 		xpath_type = xpathobjp->type;
323 	}
324 
325 	switch (xpath_type) {
326 
327 		case  XPATH_NODESET:
328 		{
329 			xmlNodeSetPtr nodesetp;
330 			zval retval;
331 
332 			if (xpathobjp->type == XPATH_NODESET && NULL != (nodesetp = xpathobjp->nodesetval) && nodesetp->nodeNr) {
333 				array_init_size(&retval, nodesetp->nodeNr);
334 				zend_hash_real_init_packed(Z_ARRVAL_P(&retval));
335 				for (int i = 0; i < nodesetp->nodeNr; i++) {
336 					xmlNodePtr node = nodesetp->nodeTab[i];
337 					zval child;
338 
339 					if (node->type == XML_NAMESPACE_DECL) {
340 						if (modern) {
341 							continue;
342 						}
343 
344 						xmlNodePtr nsparent = node->_private;
345 						xmlNsPtr original = (xmlNsPtr) node;
346 
347 						/* Make sure parent dom object exists, so we can take an extra reference. */
348 						zval parent_zval; /* don't destroy me, my lifetime is transfered to the fake namespace decl */
349 						php_dom_create_object(nsparent, &parent_zval, &intern->dom);
350 						dom_object *parent_intern = Z_DOMOBJ_P(&parent_zval);
351 
352 						node = php_dom_create_fake_namespace_decl(nsparent, original, &child, parent_intern);
353 					} else {
354 						php_dom_create_object(node, &child, &intern->dom);
355 					}
356 					add_next_index_zval(&retval, &child);
357 				}
358 			} else {
359 				ZVAL_EMPTY_ARRAY(&retval);
360 			}
361 			php_dom_create_iterator(return_value, DOM_NODELIST, modern);
362 			nodeobj = Z_DOMOBJ_P(return_value);
363 			dom_xpath_iter(&retval, nodeobj);
364 			break;
365 		}
366 
367 		case XPATH_BOOLEAN:
368 			RETVAL_BOOL(xpathobjp->boolval);
369 			break;
370 
371 		case XPATH_NUMBER:
372 			RETVAL_DOUBLE(xpathobjp->floatval);
373 			break;
374 
375 		case XPATH_STRING:
376 			RETVAL_STRING((char *) xpathobjp->stringval);
377 			break;
378 
379 		default:
380 			RETVAL_NULL();
381 			break;
382 	}
383 
384 	xmlXPathFreeObject(xpathobjp);
385 }
386 /* }}} */
387 
388 /* {{{ */
PHP_METHOD(DOMXPath,query)389 PHP_METHOD(DOMXPath, query)
390 {
391 	php_xpath_eval(INTERNAL_FUNCTION_PARAM_PASSTHRU, PHP_DOM_XPATH_QUERY, false);
392 }
393 
PHP_METHOD(DOM_XPath,query)394 PHP_METHOD(DOM_XPath, query)
395 {
396 	php_xpath_eval(INTERNAL_FUNCTION_PARAM_PASSTHRU, PHP_DOM_XPATH_QUERY, true);
397 }
398 /* }}} end dom_xpath_query */
399 
400 /* {{{ */
PHP_METHOD(DOMXPath,evaluate)401 PHP_METHOD(DOMXPath, evaluate)
402 {
403 	php_xpath_eval(INTERNAL_FUNCTION_PARAM_PASSTHRU, PHP_DOM_XPATH_EVALUATE, false);
404 }
405 
PHP_METHOD(DOM_XPath,evaluate)406 PHP_METHOD(DOM_XPath, evaluate)
407 {
408 	php_xpath_eval(INTERNAL_FUNCTION_PARAM_PASSTHRU, PHP_DOM_XPATH_EVALUATE, true);
409 }
410 /* }}} end dom_xpath_evaluate */
411 
412 /* {{{ */
PHP_METHOD(DOMXPath,registerPhpFunctions)413 PHP_METHOD(DOMXPath, registerPhpFunctions)
414 {
415 	dom_xpath_object *intern = Z_XPATHOBJ_P(ZEND_THIS);
416 
417 	zend_string *callable_name = NULL;
418 	HashTable *callable_ht = NULL;
419 
420 	ZEND_PARSE_PARAMETERS_START(0, 1)
421 		Z_PARAM_OPTIONAL
422 		Z_PARAM_ARRAY_HT_OR_STR_OR_NULL(callable_ht, callable_name)
423 	ZEND_PARSE_PARAMETERS_END();
424 
425 	php_dom_xpath_callbacks_update_method_handler(
426 		&intern->xpath_callbacks,
427 		intern->dom.ptr,
428 		NULL,
429 		callable_name,
430 		callable_ht,
431 		PHP_DOM_XPATH_CALLBACK_NAME_VALIDATE_NULLS,
432 		NULL
433 	);
434 }
435 /* }}} end dom_xpath_register_php_functions */
436 
dom_xpath_register_func_in_ctx(void * ctxt,const zend_string * ns,const zend_string * name)437 static void dom_xpath_register_func_in_ctx(void *ctxt, const zend_string *ns, const zend_string *name)
438 {
439 	xmlXPathRegisterFuncNS((xmlXPathContextPtr) ctxt, (const xmlChar *) ZSTR_VAL(name), (const xmlChar *) ZSTR_VAL(ns), dom_xpath_ext_function_trampoline);
440 }
441 
PHP_METHOD(DOMXPath,registerPhpFunctionNS)442 PHP_METHOD(DOMXPath, registerPhpFunctionNS)
443 {
444 	dom_xpath_object *intern = Z_XPATHOBJ_P(ZEND_THIS);
445 
446 	zend_string *namespace, *name;
447 	zend_fcall_info fci;
448 	zend_fcall_info_cache fcc;
449 
450 	ZEND_PARSE_PARAMETERS_START(3, 3)
451 		Z_PARAM_PATH_STR(namespace)
452 		Z_PARAM_PATH_STR(name)
453 		Z_PARAM_FUNC_NO_TRAMPOLINE_FREE(fci, fcc)
454 	ZEND_PARSE_PARAMETERS_END();
455 
456 	if (zend_string_equals_literal(namespace, "http://php.net/xpath")) {
457 		zend_argument_value_error(1, "must not be \"http://php.net/xpath\" because it is reserved by PHP");
458 		RETURN_THROWS();
459 	}
460 
461 	php_dom_xpath_callbacks_update_single_method_handler(
462 		&intern->xpath_callbacks,
463 		intern->dom.ptr,
464 		namespace,
465 		name,
466 		&fcc,
467 		PHP_DOM_XPATH_CALLBACK_NAME_VALIDATE_NCNAME,
468 		dom_xpath_register_func_in_ctx
469 	);
470 }
471 
472 /* {{{ */
PHP_METHOD(DOMXPath,quote)473 PHP_METHOD(DOMXPath, quote) {
474 	const char *input;
475 	size_t input_len;
476 	if (zend_parse_parameters(ZEND_NUM_ARGS(), "p", &input, &input_len) == FAILURE) {
477 		RETURN_THROWS();
478 	}
479 	if (memchr(input, '\'', input_len) == NULL) {
480 		zend_string *const output = zend_string_safe_alloc(1, input_len, 2, false);
481 		output->val[0] = '\'';
482 		memcpy(output->val + 1, input, input_len);
483 		output->val[input_len + 1] = '\'';
484 		output->val[input_len + 2] = '\0';
485 		RETURN_STR(output);
486 	} else if (memchr(input, '"', input_len) == NULL) {
487 		zend_string *const output = zend_string_safe_alloc(1, input_len, 2, false);
488 		output->val[0] = '"';
489 		memcpy(output->val + 1, input, input_len);
490 		output->val[input_len + 1] = '"';
491 		output->val[input_len + 2] = '\0';
492 		RETURN_STR(output);
493 	} else {
494 		smart_str output = {0};
495 		// need to use the concat() trick published by Robert Rossney at https://stackoverflow.com/a/1352556/1067003
496 		smart_str_appendl(&output, "concat(", 7);
497 		const char *ptr = input;
498 		const char *const end = input + input_len;
499 		while (ptr < end) {
500 			const char *const single_quote_ptr = memchr(ptr, '\'', end - ptr);
501 			const char *const double_quote_ptr = memchr(ptr, '"', end - ptr);
502 			const size_t distance_to_single_quote = single_quote_ptr ? single_quote_ptr - ptr : end - ptr;
503 			const size_t distance_to_double_quote = double_quote_ptr ? double_quote_ptr - ptr : end - ptr;
504 			const size_t bytes_until_quote = MAX(distance_to_single_quote, distance_to_double_quote);
505 			const char quote_method = (distance_to_single_quote > distance_to_double_quote) ? '\'' : '"';
506 			smart_str_appendc(&output, quote_method);
507 			smart_str_appendl(&output, ptr, bytes_until_quote);
508 			smart_str_appendc(&output, quote_method);
509 			ptr += bytes_until_quote;
510 			smart_str_appendc(&output, ',');
511 		}
512 		ZEND_ASSERT(ptr == end);
513 		output.s->val[output.s->len - 1] = ')';
514 		RETURN_STR(smart_str_extract(&output));
515 	}
516 }
517 /* }}} */
518 
519 #endif /* LIBXML_XPATH_ENABLED */
520 
521 #endif
522