xref: /php-src/ext/dom/xml_serializer.c (revision ed54d6de)
1 /*
2    +----------------------------------------------------------------------+
3    | Copyright (c) The PHP Group                                          |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | https://www.php.net/license/3_01.txt                                 |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Authors: Niels Dossche <nielsdos@php.net>                            |
14    +----------------------------------------------------------------------+
15 */
16 
17 #ifdef HAVE_CONFIG_H
18 #include <config.h>
19 #endif
20 
21 #include "php.h"
22 #if defined(HAVE_LIBXML) && defined(HAVE_DOM)
23 #include "xml_serializer.h"
24 #include "private_data.h"
25 #include "namespace_compat.h"
26 #include "serialize_common.h"
27 #include "internal_helpers.h"
28 #include <libxml/chvalid.h>
29 
30 // TODO: implement iterative approach instead of recursive?
31 
32 /* This file implements the XML serialization algorithm.
33  * https://w3c.github.io/DOM-Parsing/#dom-xmlserializer-serializetostring (Date 2021-05-02)
34  *
35  * The following are spec issues that were fixed in this implementation, but are not yet fixed
36  * in the spec itself:
37  * https://github.com/w3c/DOM-Parsing/issues/28
38  * https://github.com/w3c/DOM-Parsing/issues/29
39  * https://github.com/w3c/DOM-Parsing/issues/38
40  * https://github.com/w3c/DOM-Parsing/issues/43
41  * https://github.com/w3c/DOM-Parsing/issues/44
42  * https://github.com/w3c/DOM-Parsing/issues/45
43  * https://github.com/w3c/DOM-Parsing/issues/47
44  * https://github.com/w3c/DOM-Parsing/issues/50
45  * https://github.com/w3c/DOM-Parsing/issues/52
46  * https://github.com/w3c/DOM-Parsing/issues/59
47  * https://github.com/w3c/DOM-Parsing/issues/71
48  */
49 
50 #define TRY(x) do { if (UNEXPECTED((x) < 0)) { return -1; } } while (0)
51 #define TRY_OR_CLEANUP(x) do { if (UNEXPECTED((x) < 0)) { goto cleanup; } } while (0)
52 
53 #define xmlOutputBufferWriteLit(out, literal) xmlOutputBufferWrite((out), strlen("" literal), "" literal)
54 
55 /* https://w3c.github.io/DOM-Parsing/#dfn-namespace-prefix-map
56  * This associates a namespace uri with a list of possible prefixes. */
57 typedef struct {
58 	HashTable *ht;
59 } dom_xml_ns_prefix_map;
60 
61 /* https://w3c.github.io/DOM-Parsing/#dfn-local-prefixes-map */
62 typedef struct {
63 	HashTable ht;
64 } dom_xml_local_prefix_map;
65 
66 typedef struct {
67 	const xmlChar *prefix, *name;
68 } dom_qname_pair;
69 
70 typedef struct dom_xml_serialize_ctx {
71 	xmlSaveCtxtPtr ctxt;
72 	xmlOutputBufferPtr out;
73 	php_dom_private_data *private_data;
74 } dom_xml_serialize_ctx;
75 
76 static int dom_xml_serialization_algorithm(
77 	dom_xml_serialize_ctx *ctx,
78 	dom_xml_ns_prefix_map *namespace_prefix_map,
79 	xmlNodePtr node,
80 	const xmlChar *namespace,
81 	unsigned int *prefix_index,
82 	int indent,
83 	bool require_well_formed
84 );
85 
dom_xml_str_equals_treat_nulls_as_empty(const xmlChar * s1,const xmlChar * s2)86 static bool dom_xml_str_equals_treat_nulls_as_empty(const xmlChar *s1, const xmlChar *s2)
87 {
88 	if (s1 == s2) {
89 		return true;
90 	}
91 	if (s1 == NULL) {
92 		return s2 == NULL || *s2 == '\0';
93 	}
94 	if (s2 == NULL) {
95 		/* Note: at this point we know that s1 != NULL. */
96 		return *s1 == '\0';
97 	}
98 	return strcmp((const char *) s1, (const char *) s2) == 0;
99 }
100 
dom_xml_str_equals_treat_nulls_as_nulls(const xmlChar * s1,const xmlChar * s2)101 static zend_always_inline bool dom_xml_str_equals_treat_nulls_as_nulls(const xmlChar *s1, const xmlChar *s2)
102 {
103 	if (s1 == s2) {
104 		return true;
105 	}
106 	if (s1 == NULL || s2 == NULL) {
107 		return false;
108 	}
109 	return strcmp((const char *) s1, (const char *) s2) == 0;
110 }
111 
dom_xml_ns_prefix_map_ctor(dom_xml_ns_prefix_map * map)112 static zend_always_inline void dom_xml_ns_prefix_map_ctor(dom_xml_ns_prefix_map *map)
113 {
114 	ALLOC_HASHTABLE(map->ht);
115 	zend_hash_init(map->ht, 8, NULL, NULL, false);
116 }
117 
dom_xml_ns_prefix_map_destroy(dom_xml_ns_prefix_map * map)118 static void dom_xml_ns_prefix_map_destroy(dom_xml_ns_prefix_map *map)
119 {
120 	HashTable *list;
121 	ZEND_HASH_MAP_FOREACH_PTR(map->ht, list) {
122 		if (GC_DELREF(list) == 0) {
123 			zval *tmp;
124 			ZEND_HASH_PACKED_FOREACH_VAL(list, tmp) {
125 				if (DOM_Z_IS_OWNED(tmp)) {
126 					efree(Z_PTR_P(tmp));
127 				}
128 			} ZEND_HASH_FOREACH_END();
129 
130 			zend_hash_destroy(list);
131 			efree(list);
132 		}
133 	} ZEND_HASH_FOREACH_END();
134 
135 	zend_hash_destroy(map->ht);
136 	efree(map->ht);
137 	map->ht = NULL;
138 }
139 
dom_xml_ns_prefix_map_dtor(dom_xml_ns_prefix_map * map)140 static zend_always_inline void dom_xml_ns_prefix_map_dtor(dom_xml_ns_prefix_map *map)
141 {
142 	if (GC_DELREF(map->ht) == 0) {
143 		dom_xml_ns_prefix_map_destroy(map);
144 	}
145 }
146 
dom_xml_ns_prefix_map_copy(dom_xml_ns_prefix_map * dst,const dom_xml_ns_prefix_map * src)147 static zend_always_inline void dom_xml_ns_prefix_map_copy(dom_xml_ns_prefix_map *dst, const dom_xml_ns_prefix_map *src)
148 {
149 	dst->ht = src->ht;
150 	GC_ADDREF(dst->ht);
151 }
152 
dom_xml_local_prefix_map_ctor(dom_xml_local_prefix_map * map)153 static zend_always_inline void dom_xml_local_prefix_map_ctor(dom_xml_local_prefix_map *map)
154 {
155 	zend_hash_init(&map->ht, 8, NULL, NULL, false);
156 }
157 
dom_xml_local_prefix_map_dtor(dom_xml_local_prefix_map * map)158 static zend_always_inline void dom_xml_local_prefix_map_dtor(dom_xml_local_prefix_map *map)
159 {
160 	zend_hash_destroy(&map->ht);
161 }
162 
dom_xml_local_prefix_map_add(dom_xml_local_prefix_map * map,const xmlChar * prefix,size_t prefix_len,const xmlChar * ns)163 static zend_always_inline void dom_xml_local_prefix_map_add(
164 	dom_xml_local_prefix_map *map,
165 	const xmlChar *prefix,
166 	size_t prefix_len,
167 	const xmlChar *ns
168 )
169 {
170 	ZEND_ASSERT(prefix != NULL);
171 	zend_hash_str_add_ptr(&map->ht, (const char *) prefix, prefix_len, (void *) ns);
172 }
173 
dom_xml_local_prefix_map_find(const dom_xml_local_prefix_map * map,const xmlChar * prefix,size_t prefix_len)174 static zend_always_inline const xmlChar *dom_xml_local_prefix_map_find(
175 	const dom_xml_local_prefix_map *map,
176 	const xmlChar *prefix,
177 	size_t prefix_len
178 )
179 {
180 	ZEND_ASSERT(prefix != NULL);
181 	return zend_hash_str_find_ptr(&map->ht, (const char *) prefix, prefix_len);
182 }
183 
dom_xml_local_prefix_map_conflicts(const dom_xml_local_prefix_map * map,const xmlChar * prefix,size_t prefix_len,const xmlChar * ns)184 static zend_always_inline bool dom_xml_local_prefix_map_conflicts(
185 	const dom_xml_local_prefix_map *map,
186 	const xmlChar *prefix,
187 	size_t prefix_len,
188 	const xmlChar *ns
189 )
190 {
191 	const xmlChar *result = dom_xml_local_prefix_map_find(map, prefix, prefix_len);
192 	if (result == NULL) {
193 		return false;
194 	}
195 	return !dom_xml_str_equals_treat_nulls_as_empty(result, ns);
196 }
197 
dom_xml_local_prefix_map_contains(const dom_xml_local_prefix_map * map,const xmlChar * prefix,size_t prefix_len)198 static zend_always_inline bool dom_xml_local_prefix_map_contains(
199 	const dom_xml_local_prefix_map *map,
200 	const xmlChar *prefix,
201 	size_t prefix_len
202 )
203 {
204 	return dom_xml_local_prefix_map_find(map, prefix, prefix_len) != NULL;
205 }
206 
207 /* https://w3c.github.io/DOM-Parsing/#dfn-add */
dom_xml_ns_prefix_map_add(dom_xml_ns_prefix_map * map,const xmlChar * prefix,bool prefix_owned,const xmlChar * ns,size_t ns_length)208 static void dom_xml_ns_prefix_map_add(
209 	dom_xml_ns_prefix_map *map,
210 	const xmlChar *prefix,
211 	bool prefix_owned,
212 	const xmlChar *ns,
213 	size_t ns_length
214 )
215 {
216 	ZEND_ASSERT(map->ht != NULL);
217 	ZEND_ASSERT(prefix != NULL);
218 
219 	if (ns == NULL) {
220 		ns = BAD_CAST "";
221 	}
222 
223 	if (GC_REFCOUNT(map->ht) > 1) {
224 		GC_DELREF(map->ht);
225 		map->ht = zend_array_dup(map->ht);
226 
227 		HashTable *list;
228 		ZEND_HASH_MAP_FOREACH_PTR(map->ht, list) {
229 			GC_ADDREF(list);
230 		} ZEND_HASH_FOREACH_END();
231 	}
232 
233 	/* 1. Let candidates list be the result of retrieving a list from map where there exists a key in map
234 	*     that matches the value of ns
235 	 *    or if there is no such key, then let candidates list be null. */
236 	HashTable *list = zend_hash_str_find_ptr(map->ht, (const char *) ns, ns_length);
237 
238 	/* 2. If candidates list is null, then create a new list with prefix as the only item in the list,
239 	 *    and associate that list with a new key ns in map. */
240 	if (list == NULL) {
241 		ALLOC_HASHTABLE(list);
242 		zend_hash_init(list, 8, NULL, NULL, false);
243 		zend_hash_str_add_new_ptr(map->ht, (const char *) ns, ns_length, list);
244 	} else if (GC_REFCOUNT(list) > 1) {
245 		GC_DELREF(list);
246 		list = zend_array_dup(list);
247 		zend_hash_str_update_ptr(map->ht, (const char *) ns, ns_length, list);
248 	}
249 
250 	/* 3. (Otherwise), append prefix to the end of candidates list. */
251 	zval tmp;
252 	if (prefix_owned) {
253 		DOM_Z_OWNED(&tmp, prefix);
254 	} else {
255 		DOM_Z_UNOWNED(&tmp, prefix);
256 	}
257 	zend_hash_next_index_insert_new(list, &tmp);
258 }
259 
260 /* https://w3c.github.io/DOM-Parsing/#dfn-found */
dom_get_candidates_list(dom_xml_ns_prefix_map * map,const xmlChar * ns,size_t ns_length)261 static zend_always_inline HashTable *dom_get_candidates_list(dom_xml_ns_prefix_map *map, const xmlChar *ns, size_t ns_length)
262 {
263 	ZEND_ASSERT(map->ht != NULL);
264 
265 	/* 1. Let candidates list be the result of retrieving a list from map where there exists a key in map that matches
266 	 *    the value of ns
267 	 *    or if there is no such key, then let candidates list be null. */
268 	return zend_hash_str_find_ptr(map->ht, (const char *) ns, ns_length);
269 }
270 
271 /* https://w3c.github.io/DOM-Parsing/#dfn-found */
dom_prefix_in_candidate_list(const HashTable * list,const xmlChar * prefix)272 static zend_always_inline bool dom_prefix_in_candidate_list(const HashTable *list, const xmlChar *prefix)
273 {
274 	ZEND_ASSERT(prefix != NULL);
275 
276 	if (list == NULL) {
277 		return false;
278 	}
279 
280 	/* 2. If the value of prefix occurs at least once in candidates list, return true, otherwise return false. */
281 	const char *tmp;
282 	ZEND_HASH_PACKED_FOREACH_PTR(list, tmp) {
283 		if (dom_xml_str_equals_treat_nulls_as_empty(BAD_CAST tmp, prefix)) {
284 			return true;
285 		}
286 	} ZEND_HASH_FOREACH_END();
287 
288 	return false;
289 }
290 
291 /* https://w3c.github.io/DOM-Parsing/#dfn-found */
dom_prefix_found_in_ns_prefix_map(dom_xml_ns_prefix_map * map,const xmlChar * prefix,const xmlChar * ns,size_t ns_length)292 static zend_always_inline bool dom_prefix_found_in_ns_prefix_map(
293 	dom_xml_ns_prefix_map *map,
294 	const xmlChar *prefix,
295 	const xmlChar *ns,
296 	size_t ns_length
297 )
298 {
299 	ZEND_ASSERT(ns != NULL);
300 	HashTable *list = dom_get_candidates_list(map, ns, ns_length);
301 	return dom_prefix_in_candidate_list(list, prefix);
302 }
303 
304 /* Helper to get the attribute value, will return "" instead of NULL for empty values, to mimic getAttribute()'s behaviour. */
dom_get_attribute_value(const xmlAttr * attr)305 static zend_always_inline const xmlChar *dom_get_attribute_value(const xmlAttr *attr)
306 {
307 	if (attr->children == NULL) {
308 		return BAD_CAST "";
309 	}
310 	return attr->children->content ? attr->children->content : BAD_CAST "";
311 }
312 
313 /* https://w3c.github.io/DOM-Parsing/#dfn-recording-the-namespace-information */
dom_recording_the_namespace_information(dom_xml_ns_prefix_map * namespace_prefix_map,dom_xml_local_prefix_map * local_prefixes_map,xmlNodePtr element)314 static const xmlChar *dom_recording_the_namespace_information(
315 	dom_xml_ns_prefix_map *namespace_prefix_map,
316 	dom_xml_local_prefix_map *local_prefixes_map,
317 	xmlNodePtr element
318 )
319 {
320 	ZEND_ASSERT(element->type == XML_ELEMENT_NODE);
321 
322 	/* 1. Let default namespace attr value be null. */
323 	const xmlChar *default_namespace_attr_value = NULL;
324 
325 	/* 2. [MAIN] For each attribute attr in element's attributes, in the order they are specified in the element's attribute list: */
326 	for (xmlAttrPtr attr = element->properties; attr != NULL; attr = attr->next) {
327 		/* Steps 2.1-2.2 fetch namespace information from the attribute, but let's defer that for simplicity to the if's body. */
328 
329 		/* 2.3. If the attribute namespace is the XMLNS namespace, then: */
330 		if (php_dom_ns_is_fast((xmlNodePtr) attr, php_dom_ns_is_xmlns_magic_token)) {
331 			/* 2.3.1. If attribute prefix is null, then attr is a default namespace declaration.
332 			 *        Set the default namespace attr value to attr's value and stop running these steps,
333 			 *        returning to Main to visit the next attribute. */
334 			if (attr->ns->prefix == NULL) {
335 				default_namespace_attr_value = dom_get_attribute_value(attr);
336 				continue;
337 			}
338 			/* 2.3.2. Otherwise, the attribute prefix is not null and attr is a namespace prefix definition.
339 			 *        Run the following steps: */
340 			else {
341 				/* 2.3.2.1. Let prefix definition be the value of attr's localName. */
342 				const xmlChar *prefix_definition = attr->name;
343 				ZEND_ASSERT(prefix_definition != NULL);
344 
345 				/* 2.3.2.2. Let namespace definition be the value of attr's value. */
346 				const xmlChar *namespace_definition = dom_get_attribute_value(attr);
347 				ZEND_ASSERT(namespace_definition != NULL);
348 
349 				/* 2.3.2.3. If namespace definition is the XML namespace, then stop running these steps,
350 				 *          and return to Main to visit the next attribute. */
351 				if (strcmp((const char *) namespace_definition, DOM_XML_NS_URI) == 0) {
352 					continue;
353 				}
354 
355 				/* 2.3.2.4. If namespace definition is the empty string (the declarative form of having no namespace),
356 				 *          then let namespace definition be null instead.
357 				 *          => This gets delayed until later down. */
358 
359 				size_t namespace_definition_length = strlen((const char *) namespace_definition);
360 
361 				/* 2.3.2.5. If prefix definition is found in map given the namespace namespace definition,
362 				 *          then stop running these steps, and return to Main to visit the next attribute. */
363 				if (dom_prefix_found_in_ns_prefix_map(namespace_prefix_map, prefix_definition, namespace_definition, namespace_definition_length)) {
364 					continue;
365 				}
366 
367 				/* Delayed step 2.3.2.4 */
368 				if (*namespace_definition == '\0') {
369 					namespace_definition = NULL;
370 				}
371 
372 				/* 2.3.2.6. Add the prefix prefix definition to map given namespace namespace definition. */
373 				dom_xml_ns_prefix_map_add(namespace_prefix_map, prefix_definition, false, namespace_definition, namespace_definition_length);
374 
375 				/* 2.3.2.7. Add the value of prefix definition as a new key to the local prefixes map,
376 				 *          with the namespace definition as the key's value replacing the value of null with the empty string if applicable. */
377 				size_t prefix_definition_length = strlen((const char *) prefix_definition);
378 				namespace_definition = namespace_definition == NULL ? BAD_CAST "" : namespace_definition;
379 				dom_xml_local_prefix_map_add(local_prefixes_map, prefix_definition, prefix_definition_length, namespace_definition);
380 			}
381 		}
382 	}
383 
384 	/* 3. Return the value of default namespace attr value. */
385 	return default_namespace_attr_value;
386 }
387 
388 /* https://w3c.github.io/DOM-Parsing/#dfn-retrieving-a-preferred-prefix-string */
dom_retrieve_a_preferred_prefix_string(dom_xml_ns_prefix_map * namespace_prefix_map,dom_xml_local_prefix_map * local_prefixes_map,const xmlChar * preferred_prefix,const xmlChar * ns,size_t ns_length)389 static const xmlChar *dom_retrieve_a_preferred_prefix_string(
390 	dom_xml_ns_prefix_map *namespace_prefix_map,
391 	dom_xml_local_prefix_map *local_prefixes_map,
392 	const xmlChar *preferred_prefix,
393 	const xmlChar *ns,
394 	size_t ns_length
395 )
396 {
397 	ZEND_ASSERT(namespace_prefix_map->ht != NULL);
398 
399 	if (ns == NULL) {
400 		ns = BAD_CAST "";
401 	}
402 
403 	/* 1. Let candidates list be the result of retrieving a list from map where there exists a key in map that matches
404 	 *    the value of ns or if there is no such key, then stop running these steps, and return the null value. */
405 	HashTable *list = dom_get_candidates_list(namespace_prefix_map, ns, ns_length);
406 	if (list == NULL) {
407 		return NULL;
408 	}
409 
410 	/* 2. Otherwise, for each prefix value prefix in candidates list, iterating from beginning to end: */
411 	const xmlChar *prefix = NULL;
412 	const xmlChar *last_non_conflicting_in_list = NULL;
413 
414 	/* Reverse so that the "nearest" ns gets priority: https://github.com/w3c/DOM-Parsing/issues/45 */
415 	ZEND_HASH_PACKED_REVERSE_FOREACH_PTR(list, prefix) {
416 		ZEND_ASSERT(prefix != NULL);
417 
418 		/* 2.1. If prefix matches preferred prefix, then stop running these steps and return prefix. */
419 		/* Adapted for https://github.com/w3c/DOM-Parsing/issues/45 */
420 		if (!dom_xml_local_prefix_map_conflicts(local_prefixes_map, prefix, strlen((const char *) prefix), ns)) {
421 			if (dom_xml_str_equals_treat_nulls_as_empty(preferred_prefix, prefix)) {
422 				return prefix;
423 			}
424 
425 			if (last_non_conflicting_in_list == NULL) {
426 				last_non_conflicting_in_list = prefix;
427 			}
428 		}
429 	} ZEND_HASH_FOREACH_END();
430 
431 	/* 2.2. If prefix is the last item in the candidates list, then stop running these steps and return prefix. */
432 	/* Note: previously the last item was "prefix", but we loop backwards now. */
433 	return last_non_conflicting_in_list;
434 }
435 
436 /* https://w3c.github.io/DOM-Parsing/#dfn-generating-a-prefix */
dom_xml_generate_a_prefix(dom_xml_ns_prefix_map * map,dom_xml_local_prefix_map * local_prefixes_map,const xmlChar * new_namespace,size_t new_namespace_length,unsigned int * prefix_index)437 static xmlChar *dom_xml_generate_a_prefix(
438 	dom_xml_ns_prefix_map *map,
439 	dom_xml_local_prefix_map *local_prefixes_map,
440 	const xmlChar *new_namespace,
441 	size_t new_namespace_length,
442 	unsigned int *prefix_index
443 )
444 {
445 	/* 1. Let generated prefix be the concatenation of the string "ns" and the current numerical value of prefix index. */
446 	char buffer[32];
447 	buffer[0] = 'n';
448 	buffer[1] = 's';
449 	size_t length;
450 	do {
451 		length = snprintf(buffer + 2, sizeof(buffer) - 2, "%u", *prefix_index) + 2;
452 
453 		/* 2. Let the value of prefix index be incremented by one. */
454 		(*prefix_index)++;
455 
456 		/* Loop condition is for https://github.com/w3c/DOM-Parsing/issues/44 */
457 	} while (dom_xml_local_prefix_map_contains(local_prefixes_map, (const xmlChar *) buffer, length));
458 
459 	xmlChar *generated_prefix = emalloc(length + 1);
460 	memcpy(generated_prefix, buffer, length + 1);
461 
462 	/* 3. Add to map the generated prefix given the new namespace namespace. */
463 	dom_xml_ns_prefix_map_add(map, generated_prefix, true, new_namespace, new_namespace_length);
464 	/* Continuation of https://github.com/w3c/DOM-Parsing/issues/44 */
465 	dom_xml_local_prefix_map_add(local_prefixes_map, generated_prefix, length, new_namespace);
466 
467 	/* 4. Return the value of generated prefix. */
468 	return generated_prefix;
469 }
470 
dom_xml_output_qname(xmlOutputBufferPtr out,const dom_qname_pair * qname)471 static int dom_xml_output_qname(xmlOutputBufferPtr out, const dom_qname_pair *qname)
472 {
473 	if (qname->prefix != NULL) {
474 		TRY(xmlOutputBufferWriteString(out, (const char *) qname->prefix));
475 		TRY(xmlOutputBufferWriteLit(out, ":"));
476 	}
477 	return xmlOutputBufferWriteString(out, (const char *) qname->name);
478 }
479 
480 /* This is a utility method used by both
481  * https://w3c.github.io/DOM-Parsing/#dfn-xml-serializing-an-element-node
482  * and https://w3c.github.io/DOM-Parsing/#dfn-serializing-an-attribute-value */
dom_xml_common_text_serialization(xmlOutputBufferPtr out,const char * content,bool attribute_mode)483 static int dom_xml_common_text_serialization(xmlOutputBufferPtr out, const char *content, bool attribute_mode)
484 {
485 	if (content == NULL) {
486 		return 0;
487 	}
488 
489 	const char *last_output = content;
490 	const char *mask = attribute_mode ? "&<>\"\t\n\r" : "&<>";
491 
492 	while (true) {
493 		size_t chunk_length = strcspn(content, mask);
494 
495 		content += chunk_length;
496 		if (*content == '\0') {
497 			break;
498 		}
499 
500 		TRY(xmlOutputBufferWrite(out, content - last_output, last_output));
501 
502 		switch (*content) {
503 			case '&': {
504 				TRY(xmlOutputBufferWriteLit(out, "&amp;"));
505 				break;
506 			}
507 
508 			case '<': {
509 				TRY(xmlOutputBufferWriteLit(out, "&lt;"));
510 				break;
511 			}
512 
513 			case '>': {
514 				TRY(xmlOutputBufferWriteLit(out, "&gt;"));
515 				break;
516 			}
517 
518 			case '"': {
519 				TRY(xmlOutputBufferWriteLit(out, "&quot;"));
520 				break;
521 			}
522 
523 			/* The following three are added to address https://github.com/w3c/DOM-Parsing/issues/59 */
524 
525 			case '\t': {
526 				TRY(xmlOutputBufferWriteLit(out, "&#9;"));
527 				break;
528 			}
529 
530 			case '\n': {
531 				TRY(xmlOutputBufferWriteLit(out, "&#10;"));
532 				break;
533 			}
534 
535 			case '\r': {
536 				TRY(xmlOutputBufferWriteLit(out, "&#13;"));
537 				break;
538 			}
539 		}
540 
541 		content++;
542 		last_output = content;
543 	}
544 
545 	return xmlOutputBufferWrite(out, content - last_output, last_output);
546 }
547 
dom_xml_check_char_production(const xmlChar * content)548 static int dom_xml_check_char_production(const xmlChar *content)
549 {
550 	// TODO: optimization idea: fast-pass for ASCII-only data
551 
552 	const xmlChar *ptr = content;
553 	while (*ptr != '\0') {
554 		int len = 4;
555 		int c = xmlGetUTF8Char(ptr, &len);
556 		if (c < 0 || !xmlIsCharQ(c)) {
557 			return -1;
558 		}
559 		ptr += len;
560 	}
561 
562 	return 0;
563 }
564 
565 /* https://w3c.github.io/DOM-Parsing/#xml-serializing-a-text-node */
dom_xml_serialize_text_node(xmlOutputBufferPtr out,xmlNodePtr text,bool require_well_formed)566 static zend_always_inline int dom_xml_serialize_text_node(xmlOutputBufferPtr out, xmlNodePtr text, bool require_well_formed)
567 {
568 	/* 1. If the require well-formed flag is set and node's data contains characters that are not matched by the XML Char production,
569 	 *    then throw an exception. */
570 	if (require_well_formed && text->content != NULL) {
571 		TRY(dom_xml_check_char_production(text->content));
572 	}
573 
574 	return dom_xml_common_text_serialization(out, (const char *) text->content, false);
575 }
576 
dom_xml_attribute_namespace(const xmlAttr * attr)577 static zend_always_inline const xmlChar *dom_xml_attribute_namespace(const xmlAttr *attr)
578 {
579 	return attr->ns == NULL ? NULL : attr->ns->href;
580 }
581 
dom_xml_serialize_attribute_node_value(xmlOutputBufferPtr out,xmlAttrPtr attr)582 static int dom_xml_serialize_attribute_node_value(xmlOutputBufferPtr out, xmlAttrPtr attr)
583 {
584 	TRY(xmlOutputBufferWriteString(out, (const char *) attr->name));
585 	TRY(xmlOutputBufferWriteLit(out, "=\""));
586 	for (xmlNodePtr child = attr->children; child != NULL; child = child->next) {
587 		if (child->type == XML_TEXT_NODE) {
588 			if (child->content != NULL) {
589 				TRY(dom_xml_common_text_serialization(out, (const char *) child->content, true));
590 			}
591 		} else if (child->type == XML_ENTITY_REF_NODE) {
592 			TRY(xmlOutputBufferWriteLit(out, "&"));
593 			TRY(dom_xml_common_text_serialization(out, (const char *) child->name, true));
594 			TRY(xmlOutputBufferWriteLit(out, ";"));
595 		}
596 	}
597 	return xmlOutputBufferWriteLit(out, "\"");
598 }
599 
600 /* These steps are from the attribute serialization algorithm's well-formed checks.
601  * Note that this does not return a boolean but an int to be compatible with the TRY/TRY_CLEANUP interface
602  * that we do for compatibility with libxml's interfaces. */
dom_xml_check_xmlns_attribute_requirements(const xmlAttr * attr,const xmlChar * candidate_prefix)603 static zend_always_inline int dom_xml_check_xmlns_attribute_requirements(const xmlAttr *attr, const xmlChar *candidate_prefix)
604 {
605 	const xmlChar *attr_value = dom_get_attribute_value(attr);
606 
607 	/* 3.5.2.2. If the require well-formed flag is set and the value of attr's value attribute matches the XMLNS namespace, then throw an exception */
608 	if (strcmp((const char *) attr_value, DOM_XMLNS_NS_URI) == 0) {
609 		return -1;
610 	}
611 
612 	/* 3.5.2.3. If the require well-formed flag is set and the value of attr's value attribute is the empty string.
613 	 * Errata: an "xmlns" attribute is allowed but not one with a prefix, so the idea in the spec is right but the description isn't. */
614 	if (*attr_value == '\0' && candidate_prefix != NULL) {
615 		return -1;
616 	}
617 
618 	return 0;
619 }
620 
621 /* Spec says to do nothing, but that's inconsistent/wrong, see https://github.com/w3c/DOM-Parsing/issues/28
622  * This does not have a require_well_formed argument because the only way to get here is via saveXML(), which has it off. */
dom_xml_serialize_attribute_node(xmlOutputBufferPtr out,xmlNodePtr attr)623 static int dom_xml_serialize_attribute_node(xmlOutputBufferPtr out, xmlNodePtr attr)
624 {
625 	if (attr->ns != NULL && attr->ns->prefix != NULL) {
626 		TRY(xmlOutputBufferWriteString(out, (const char *) attr->ns->prefix));
627 		TRY(xmlOutputBufferWriteLit(out, ":"));
628 	}
629 	return dom_xml_serialize_attribute_node_value(out, (xmlAttrPtr) attr);
630 }
631 
632 /* https://w3c.github.io/DOM-Parsing/#dfn-xml-serializing-a-comment-node */
dom_xml_serialize_comment_node(xmlOutputBufferPtr out,xmlNodePtr comment,bool require_well_formed)633 static int dom_xml_serialize_comment_node(xmlOutputBufferPtr out, xmlNodePtr comment, bool require_well_formed)
634 {
635 	/* Step 1 deals with well-formed flag */
636 	if (require_well_formed) {
637 		/* node's data contains characters that are not matched by the XML Char production or contains "--"
638 		 * (two adjacent U+002D HYPHEN-MINUS characters) or that ends with a "-" (U+002D HYPHEN-MINUS) character,
639 		 * then throw an exception */
640 		const xmlChar *ptr = comment->content;
641 		if (ptr != NULL) {
642 			TRY(dom_xml_check_char_production(ptr));
643 			if (strstr((const char *) ptr, "--") != NULL || ptr[strlen((const char *) ptr) - 1] == '-') {
644 				return -1;
645 			}
646 		}
647 	}
648 
649 	TRY(xmlOutputBufferWriteLit(out, "<!--"));
650 	if (EXPECTED(comment->content != NULL)) {
651 		TRY(xmlOutputBufferWriteString(out, (const char *) comment->content));
652 	}
653 	return xmlOutputBufferWriteLit(out, "-->");
654 }
655 
656 /* https://w3c.github.io/DOM-Parsing/#xml-serializing-a-processinginstruction-node */
dom_xml_serialize_processing_instruction(xmlOutputBufferPtr out,xmlNodePtr pi,bool require_well_formed)657 static int dom_xml_serialize_processing_instruction(xmlOutputBufferPtr out, xmlNodePtr pi, bool require_well_formed)
658 {
659 	/* Steps 1-2 deal with well-formed flag */
660 	if (require_well_formed) {
661 		/* target contains a ":" (U+003A COLON) character or is an ASCII case-insensitive match for the string "xml", then throw an exception */
662 		if (strchr((const char *) pi->name, ':') != NULL || strcasecmp((const char *) pi->name, "xml") == 0) {
663 			return -1;
664 		}
665 
666 		/* node's data contains characters that are not matched by the XML Char production or contains the string "?>"
667 		 * (U+003F QUESTION MARK, U+003E GREATER-THAN SIGN), then throw an exception */
668 		if (pi->content != NULL) {
669 			TRY(dom_xml_check_char_production(pi->content));
670 			if (strstr((const char *) pi->content, "?>") != NULL) {
671 				return -1;
672 			}
673 		}
674 	}
675 
676 	TRY(xmlOutputBufferWriteLit(out, "<?"));
677 	TRY(xmlOutputBufferWriteString(out, (const char *) pi->name));
678 	TRY(xmlOutputBufferWriteLit(out, " "));
679 	if (EXPECTED(pi->content != NULL)) {
680 		TRY(xmlOutputBufferWriteString(out, (const char *) pi->content));
681 	}
682 	return xmlOutputBufferWriteLit(out, "?>");
683 }
684 
685 /* https://github.com/w3c/DOM-Parsing/issues/38
686  * and https://github.com/w3c/DOM-Parsing/blob/ab8d1ac9699ed43ae6de9f4be2b0f3cfc5f3709e/index.html#L1510 */
dom_xml_serialize_cdata_section_node(xmlOutputBufferPtr out,xmlNodePtr cdata)687 static int dom_xml_serialize_cdata_section_node(xmlOutputBufferPtr out, xmlNodePtr cdata)
688 {
689 	TRY(xmlOutputBufferWriteLit(out, "<![CDATA["));
690 	if (EXPECTED(cdata->content != NULL)) {
691 		TRY(xmlOutputBufferWriteString(out, (const char *) cdata->content));
692 	}
693 	return xmlOutputBufferWriteLit(out, "]]>");
694 }
695 
dom_xml_create_localname_set_key(const xmlAttr * attr)696 static zend_string *dom_xml_create_localname_set_key(const xmlAttr *attr)
697 {
698 	if (attr->ns == NULL || attr->ns->href == NULL) {
699 		return zend_string_init((const char *) attr->name, strlen((const char *) attr->name), false);
700 	}
701 
702 	/* Spec requires us to create a tuple as a key, however HashTable doesn't support that natively.
703 	 * Fortunately, href and name cannot have embedded NUL bytes in them, so we can create a
704 	 * "tuple" by concatenating them against each other, separated by a \0 byte.
705 	 */
706 	return zend_string_concat3(
707 		(const char *) attr->ns->href, strlen((const char *) attr->ns->href),
708 		"", 1, /* include the \0 */
709 		(const char *) attr->name, strlen((const char *) attr->name)
710 	);
711 }
712 
713 /* https://w3c.github.io/DOM-Parsing/#dfn-xml-serialization-of-the-attributes */
dom_xml_serialize_attributes(xmlOutputBufferPtr out,xmlNodePtr element,dom_xml_ns_prefix_map * map,dom_xml_local_prefix_map * local_prefixes_map,unsigned int * prefix_index,bool ignore_namespace_definition_attribute,bool require_well_formed)714 static int dom_xml_serialize_attributes(
715 	xmlOutputBufferPtr out,
716 	xmlNodePtr element,
717 	dom_xml_ns_prefix_map *map,
718 	dom_xml_local_prefix_map *local_prefixes_map,
719 	unsigned int *prefix_index,
720 	bool ignore_namespace_definition_attribute,
721 	bool require_well_formed
722 )
723 {
724 	/* 1. Let result be the empty string.
725 	 *    => We're going to write directly to the output buffer. */
726 
727 	/* 2. Let localname set be a new empty namespace localname set.
728 	 *    We can do this unconditionally even if we don't use it, because this doesn't allocate memory anyway. */
729 	HashTable localname_set;
730 	zend_hash_init(&localname_set, 8, NULL, NULL, false);
731 
732 	/* 3. [LOOP] For each attribute attr in element's attributes, in the order they are specified in the element's attribute list: */
733 	for (xmlAttrPtr attr = element->properties; attr != NULL; attr = attr->next) {
734 		if (require_well_formed) {
735 			zend_string *key = dom_xml_create_localname_set_key(attr);
736 			/* 3.1. If the require well-formed flag is set and the localname set contains a tuple whose values match those of a
737 			 *      new tuple consisting of attr's namespaceURI attribute and localName attribute, then throw an exception
738 			 * 3.2. Create a new tuple consisting of attr's namespaceURI attribute and localName attribute, and add it to the localname set. */
739 			bool duplicate = zend_hash_add_empty_element(&localname_set, key) == NULL;
740 			zend_string_release_ex(key, false);
741 			if (duplicate) {
742 				goto cleanup;
743 			}
744 		}
745 
746 		/* 3.3. Let attribute namespace be the value of attr's namespaceURI value. */
747 		const xmlChar *attribute_namespace = dom_xml_attribute_namespace(attr);
748 
749 		/* 3.4. Let candidate prefix be null. */
750 		const xmlChar *candidate_prefix = NULL;
751 
752 		/* 3.5. If attribute namespace is not null, then run these sub-steps: */
753 		if (attribute_namespace != NULL) {
754 			/* 3.5.1. Let candidate prefix be the result of retrieving a preferred prefix string from map
755 			 *        given namespace attribute namespace with preferred prefix being attr's prefix value. */
756 			candidate_prefix = dom_retrieve_a_preferred_prefix_string(
757 				map,
758 				local_prefixes_map,
759 				attr->ns->prefix,
760 				attribute_namespace,
761 				strlen((const char *) attribute_namespace)
762 			);
763 
764 			/* 3.5.2. If the value of attribute namespace is the XMLNS namespace, then run these steps: */
765 			if (php_dom_ns_is_fast((xmlNodePtr) attr, php_dom_ns_is_xmlns_magic_token)) {
766 				const xmlChar *attr_value = dom_get_attribute_value(attr);
767 
768 				/* 3.5.2.1. If any of the following are true, then stop running these steps and goto Loop to visit the next attribute: */
769 				/* the attr's value is the XML namespace; */
770 				if (strcmp((const char *) attr_value, DOM_XML_NS_URI) == 0) {
771 					continue;
772 				}
773 				/* the attr's prefix is null and the ignore namespace definition attribute flag is true */
774 				if (ignore_namespace_definition_attribute && attr->ns->prefix == NULL) {
775 					/* https://github.com/w3c/DOM-Parsing/issues/47 */
776 					if (!dom_xml_str_equals_treat_nulls_as_empty(element->ns == NULL ? NULL : element->ns->href, attr_value)) {
777 						continue;
778 					}
779 				}
780 				/* the attr's prefix is not null and either */
781 				if (attr->ns->prefix != NULL) {
782 					/* the attr's localName is not a key contained in the local prefixes map
783 					 * or the attr's localName is present in the local prefixes map but the value of the key does not match attr's value
784 					 * and furthermore that the attr's localName (as the prefix to find) is found in the namespace prefix map
785 					 * given the namespace consisting of the attr's value */
786 					const xmlChar *value = dom_xml_local_prefix_map_find(local_prefixes_map, attr->name, strlen((const char *) attr->name));
787 					if (value == NULL || strcmp((const char *) value, (const char *) attr_value) != 0) {
788 						if (dom_prefix_found_in_ns_prefix_map(map, attr->name, attr_value, strlen((const char *) attr_value))) {
789 							continue;
790 						}
791 					}
792 				}
793 
794 				/* 3.5.2.4. the attr's prefix matches the string "xmlns", then let candidate prefix be the string "xmlns". */
795 				if (attr->ns->prefix != NULL && strcmp((const char *) attr->ns->prefix, "xmlns") == 0) {
796 					candidate_prefix = BAD_CAST "xmlns";
797 				}
798 
799 				/* Errata: step 3.5.2.3 can only really be checked if we already know the candidate prefix. */
800 				if (require_well_formed) {
801 					/* 3.5.2.2 and 3.5.2.3 are done by this call. */
802 					TRY_OR_CLEANUP(dom_xml_check_xmlns_attribute_requirements(attr, candidate_prefix));
803 				}
804 			}
805 			/* 3.5.3. Otherwise, the attribute namespace in not the XMLNS namespace. Run these steps: */
806 			else if (candidate_prefix == NULL) { /* https://github.com/w3c/DOM-Parsing/issues/29 */
807 				/* Continuation of https://github.com/w3c/DOM-Parsing/issues/29 */
808 				if (attr->ns->prefix == NULL
809 					|| dom_xml_local_prefix_map_contains(local_prefixes_map, attr->ns->prefix, strlen((const char *) attr->ns->prefix))) {
810 					/* 3.5.3.1. Let candidate prefix be the result of generating a prefix providing map,
811 					 *          attribute namespace, and prefix index as input. */
812 					candidate_prefix = dom_xml_generate_a_prefix(
813 						map,
814 						local_prefixes_map,
815 						attribute_namespace,
816 						strlen((const char *) attribute_namespace),
817 						prefix_index
818 					);
819 				} else {
820 					candidate_prefix = attr->ns->prefix;
821 					/* Continuation of https://github.com/w3c/DOM-Parsing/issues/29 */
822 					dom_xml_ns_prefix_map_add(
823 						map,
824 						candidate_prefix,
825 						false,
826 						attribute_namespace,
827 						strlen((const char *) attribute_namespace)
828 					);
829 					dom_xml_local_prefix_map_add(
830 						local_prefixes_map,
831 						candidate_prefix,
832 						strlen((const char *) candidate_prefix),
833 						attribute_namespace
834 					);
835 				}
836 
837 				/* 3.5.3.2. Append the following to result, in the order listed: */
838 				TRY_OR_CLEANUP(xmlOutputBufferWriteLit(out, " xmlns:"));
839 				TRY_OR_CLEANUP(xmlOutputBufferWriteString(out, (const char *) candidate_prefix));
840 				TRY_OR_CLEANUP(xmlOutputBufferWriteLit(out, "=\""));
841 				TRY_OR_CLEANUP(dom_xml_common_text_serialization(out, (const char *) attribute_namespace, true));
842 				TRY_OR_CLEANUP(xmlOutputBufferWriteLit(out, "\""));
843 			}
844 		}
845 
846 		/* 3.6. Append a " " (U+0020 SPACE) to result. */
847 		TRY_OR_CLEANUP(xmlOutputBufferWriteLit(out, " "));
848 
849 		/* 3.7. If candidate prefix is not null, then append to result the concatenation of candidate prefix with ":" (U+003A COLON). */
850 		if (candidate_prefix != NULL) {
851 			TRY_OR_CLEANUP(xmlOutputBufferWriteString(out, (const char *) candidate_prefix));
852 			TRY_OR_CLEANUP(xmlOutputBufferWriteLit(out, ":"));
853 		}
854 
855 		if (require_well_formed) {
856 			/* 3.8. If the require well-formed flag is set and
857 			 *      this attr's localName attribute contains the character ":" (U+003A COLON)
858 			 *      or does not match the XML Name production
859 			 *      or equals "xmlns" and attribute namespace is null */
860 			if (xmlValidateNCName(attr->name, /* space */ 0) != 0
861 				|| (strcmp((const char *) attr->name, "xmlns") == 0 && dom_xml_attribute_namespace(attr) == NULL)) {
862 				goto cleanup;
863 			}
864 		}
865 
866 		/* 3.9. Append the following strings to result, in the order listed: */
867 		TRY_OR_CLEANUP(dom_xml_serialize_attribute_node_value(out, attr));
868 	}
869 
870 	/* 4. Return the value of result.
871 	 *    => We're writing directly to the output buffer. */
872 
873 	zend_hash_destroy(&localname_set);
874 	return 0;
875 
876 cleanup:
877 	zend_hash_destroy(&localname_set);
878 	return -1;
879 }
880 
881 /* Only format output if there are no text/entityrefs/cdata nodes as children. */
dom_xml_should_format_element(xmlNodePtr element)882 static bool dom_xml_should_format_element(xmlNodePtr element)
883 {
884 	xmlNodePtr child = element->children;
885 	ZEND_ASSERT(child != NULL);
886 	do {
887 		if (child->type == XML_TEXT_NODE || child->type == XML_ENTITY_REF_NODE || child->type == XML_CDATA_SECTION_NODE) {
888 			return false;
889 		}
890 		child = child->next;
891 	} while (child != NULL);
892 	return true;
893 }
894 
dom_xml_output_indents(xmlOutputBufferPtr out,int indent)895 static int dom_xml_output_indents(xmlOutputBufferPtr out, int indent)
896 {
897 	TRY(xmlOutputBufferWriteLit(out, "\n"));
898 	for (int i = 0; i < indent; i++) {
899 		TRY(xmlOutputBufferWriteLit(out, "  "));
900 	}
901 	return 0;
902 }
903 
904 /* https://w3c.github.io/DOM-Parsing/#dfn-xml-serializing-an-element-node */
dom_xml_serialize_element_node(dom_xml_serialize_ctx * ctx,const xmlChar * namespace,dom_xml_ns_prefix_map * namespace_prefix_map,xmlNodePtr element,unsigned int * prefix_index,int indent,bool require_well_formed)905 static int dom_xml_serialize_element_node(
906 	dom_xml_serialize_ctx *ctx,
907 	const xmlChar *namespace,
908 	dom_xml_ns_prefix_map *namespace_prefix_map,
909 	xmlNodePtr element,
910 	unsigned int *prefix_index,
911 	int indent,
912 	bool require_well_formed
913 )
914 {
915 	/* 1. If the require well-formed flag is set and this node's localName attribute contains
916 	 *    the character ":" (U+003A COLON) or does not match the XML Name production, then throw an exception. */
917 	if (require_well_formed) {
918 		if (xmlValidateNCName(element->name, /* space */ 0) != 0) {
919 			return -1;
920 		}
921 	}
922 
923 	bool should_format = indent >= 0 && element->children != NULL && dom_xml_should_format_element(element);
924 
925 	/* 2. Let markup be the string "<" (U+003C LESS-THAN SIGN). */
926 	TRY(xmlOutputBufferWriteLit(ctx->out, "<"));
927 
928 	/* 3. Let qualified name be an empty string.
929 	 *    => We're going to do it a bit differently.
930 	 *       To avoid string allocations, we're going to store the qualified name separately as prefix+name.
931 	 *       If the prefix is NULL then the qualified name will be == name, otherwise == prefix:name. */
932 	dom_qname_pair qualified_name = { NULL, NULL };
933 
934 	/* 4. Let skip end tag be a boolean flag with value false. */
935 	bool skip_end_tag = false;
936 
937 	/* 5. Let ignore namespace definition attribute be a boolean flag with value false. */
938 	bool ignore_namespace_definition_attribute = false;
939 
940 	/* 6. Given prefix map, copy a namespace prefix map and let map be the result. */
941 	dom_xml_ns_prefix_map map;
942 	dom_xml_ns_prefix_map_copy(&map, namespace_prefix_map);
943 
944 	/* 7. Let local prefixes map be an empty map. */
945 	dom_xml_local_prefix_map local_prefixes_map;
946 	dom_xml_local_prefix_map_ctor(&local_prefixes_map);
947 
948 	/* 8. Let local default namespace be the result of recording the namespace information for node given map and local prefixes map. */
949 	const xmlChar *local_default_namespace = dom_recording_the_namespace_information(&map, &local_prefixes_map, element);
950 
951 	/* 9. Let inherited ns be a copy of namespace. */
952 	const xmlChar *inherited_ns = namespace;
953 
954 	/* 10. Let ns be the value of node's namespaceURI attribute. */
955 	const xmlChar *const ns = element->ns == NULL ? NULL : element->ns->href;
956 
957 	/* 11. If inherited ns is equal to ns, then: */
958 	if (dom_xml_str_equals_treat_nulls_as_nulls(inherited_ns, ns)) {
959 		/* 11.1. If local default namespace is not null, then set ignore namespace definition attribute to true. */
960 		if (local_default_namespace != NULL) {
961 			ignore_namespace_definition_attribute = true;
962 		}
963 
964 		/* 11.2. If ns is the XML namespace,
965 		 *       then append to qualified name the concatenation of the string "xml:" and the value of node's localName. */
966 		if (php_dom_ns_is_fast(element, php_dom_ns_is_xml_magic_token)) {
967 			qualified_name.prefix = BAD_CAST "xml";
968 			qualified_name.name = element->name;
969 		}
970 		/* 11.3. Otherwise, append to qualified name the value of node's localName. */
971 		else {
972 			qualified_name.name = element->name;
973 		}
974 
975 		/* 11.4. Append the value of qualified name to markup. */
976 		TRY_OR_CLEANUP(dom_xml_output_qname(ctx->out, &qualified_name));
977 	}
978 	/* 12. Otherwise, inherited ns is not equal to ns */
979 	else {
980 		/* 12.1. Let prefix be the value of node's prefix attribute. */
981 		const xmlChar *prefix = element->ns == NULL ? NULL : element->ns->prefix;
982 
983 		/* 12.2. Let candidate prefix be the result of retrieving a preferred prefix string prefix from map given namespace ns. */
984 		/* https://github.com/w3c/DOM-Parsing/issues/52 */
985 		const xmlChar *candidate_prefix;
986 		if (prefix == NULL && dom_xml_str_equals_treat_nulls_as_empty(ns, local_default_namespace)) {
987 			candidate_prefix = NULL;
988 		} else {
989 			size_t ns_length = ns == NULL ? 0 : strlen((const char *) ns);
990 			candidate_prefix = dom_retrieve_a_preferred_prefix_string(&map, &local_prefixes_map, prefix, ns, ns_length);
991 		}
992 
993 		/* 12.3. If the value of prefix matches "xmlns", then run the following steps: */
994 		if (prefix != NULL && strcmp((const char *) prefix, "xmlns") == 0) {
995 			/* 12.3.1. If the require well-formed flag is set, then throw an error. */
996 			if (require_well_formed) {
997 				goto cleanup;
998 			}
999 
1000 			/* 12.3.2. Let candidate prefix be the value of prefix. */
1001 			candidate_prefix = prefix;
1002 		}
1003 
1004 		/* 12.4. if candidate prefix is not null (a namespace prefix is defined which maps to ns), then: */
1005 		if (candidate_prefix != NULL) {
1006 			/* 12.4.1. Append to qualified name the concatenation of candidate prefix, ":" (U+003A COLON), and node's localName. */
1007 			qualified_name.prefix = candidate_prefix;
1008 			qualified_name.name = element->name;
1009 
1010 			/* 12.4.2. If the local default namespace is not null (there exists a locally-defined default namespace declaration attribute)
1011 			 *         and its value is not the XML namespace ... */
1012 			if (local_default_namespace != NULL && strcmp((const char *) local_default_namespace, DOM_XML_NS_URI) != 0) {
1013 				if (*local_default_namespace == '\0') {
1014 					inherited_ns = NULL;
1015 				} else {
1016 					inherited_ns = local_default_namespace;
1017 				}
1018 			}
1019 
1020 			/* 12.4.3. Append the value of qualified name to markup. */
1021 			TRY_OR_CLEANUP(dom_xml_output_qname(ctx->out, &qualified_name));
1022 		}
1023 		/* 12.5. Otherwise, if prefix is not null, then: */
1024 		else if (prefix != NULL) {
1025 			size_t ns_length = ns == NULL ? 0 : strlen((const char *) ns);
1026 
1027 			/* 12.5.1. If the local prefixes map contains a key matching prefix, ... */
1028 			size_t prefix_length = strlen((const char *) prefix);
1029 			if (dom_xml_local_prefix_map_contains(&local_prefixes_map, prefix, prefix_length)) {
1030 				prefix = dom_xml_generate_a_prefix(&map, &local_prefixes_map, ns, ns_length, prefix_index);
1031 			} else { /* else branch fixes spec issue: generating a prefix already adds it to the maps. */
1032 				/* 12.5.2. Add prefix to map given namespace ns. */
1033 				dom_xml_ns_prefix_map_add(&map, prefix, false, ns, ns_length);
1034 				/* This is not spelled out in spec, but we have to do this to avoid conflicts (see default_namespace_move.phpt). */
1035 				dom_xml_local_prefix_map_add(&local_prefixes_map, prefix, prefix_length, ns);
1036 			}
1037 
1038 			/* 12.5.3. Append to qualified name the concatenation of prefix, ":" (U+003A COLON), and node's localName. */
1039 			qualified_name.prefix = prefix;
1040 			qualified_name.name = element->name;
1041 
1042 			/* 12.5.4. Append the value of qualified name to markup. */
1043 			TRY_OR_CLEANUP(dom_xml_output_qname(ctx->out, &qualified_name));
1044 
1045 			/* 12.5.5. Append the following to markup, in the order listed: ... */
1046 			TRY_OR_CLEANUP(xmlOutputBufferWriteLit(ctx->out, " xmlns:")); /* 12.5.5.1 - 12.5.5.2 */
1047 			TRY_OR_CLEANUP(xmlOutputBufferWriteString(ctx->out, (const char *) prefix));
1048 			TRY_OR_CLEANUP(xmlOutputBufferWriteLit(ctx->out, "=\""));
1049 			TRY_OR_CLEANUP(dom_xml_common_text_serialization(ctx->out, (const char *) ns, true));
1050 			TRY_OR_CLEANUP(xmlOutputBufferWriteLit(ctx->out, "\""));
1051 
1052 			/* 12.5.6. If local default namespace is not null ... (editorial numbering error: https://github.com/w3c/DOM-Parsing/issues/43) */
1053 			if (local_default_namespace != NULL) {
1054 				if (*local_default_namespace == '\0') {
1055 					inherited_ns = NULL;
1056 				} else {
1057 					inherited_ns = local_default_namespace;
1058 				}
1059 			}
1060 		}
1061 		/* 12.6. Otherwise, if local default namespace is null, or local default namespace is not null and its value is not equal to ns, then: */
1062 		/* Note: https://github.com/w3c/DOM-Parsing/issues/47 */
1063 		else if (local_default_namespace == NULL || !dom_xml_str_equals_treat_nulls_as_empty(local_default_namespace, ns)) {
1064 			/* 12.6.1. Set the ignore namespace definition attribute flag to true. */
1065 			ignore_namespace_definition_attribute = true;
1066 
1067 			/* 12.6.2. Append to qualified name the value of node's localName. */
1068 			qualified_name.name = element->name;
1069 
1070 			/* 12.6.3. Let the value of inherited ns be ns. */
1071 			inherited_ns = ns;
1072 
1073 			/* 12.6.4. Append the value of qualified name to markup. */
1074 			TRY_OR_CLEANUP(dom_xml_output_qname(ctx->out, &qualified_name));
1075 
1076 			/* 12.6.5. Append the following to markup, in the order listed: ... */
1077 			TRY_OR_CLEANUP(xmlOutputBufferWriteLit(ctx->out, " xmlns=\"")); /* 12.6.5.1 - 12.6.5.2 */
1078 			TRY_OR_CLEANUP(dom_xml_common_text_serialization(ctx->out, (const char *) ns, true));
1079 			TRY_OR_CLEANUP(xmlOutputBufferWriteLit(ctx->out, "\""));
1080 		}
1081 		/* 12.7. Otherwise, the node has a local default namespace that matches ns ... */
1082 		else {
1083 			qualified_name.name = element->name;
1084 			inherited_ns = ns;
1085 			TRY_OR_CLEANUP(dom_xml_output_qname(ctx->out, &qualified_name));
1086 		}
1087 	}
1088 
1089 	/* 13. Append to markup the result of the XML serialization of node's attributes given map, prefix index,
1090 	 *     local prefixes map, ignore namespace definition attribute flag, and require well-formed flag. */
1091 	TRY_OR_CLEANUP(dom_xml_serialize_attributes(ctx->out, element, &map, &local_prefixes_map, prefix_index, ignore_namespace_definition_attribute, require_well_formed));
1092 
1093 	/* 14. If ns is the HTML namespace, and the node's list of children is empty, and the node's localName matches
1094 	 *     any one of the following void elements: ... */
1095 	if (element->children == NULL) {
1096 		if (xmlSaveNoEmptyTags) {
1097 			/* Do nothing, use the <x></x> closing style. */
1098 		} else if (php_dom_ns_is_fast(element, php_dom_ns_is_html_magic_token)) {
1099 			size_t name_length = strlen((const char *) element->name);
1100 			if (dom_local_name_compare_ex(element, "area", strlen("area"), name_length)
1101 				|| dom_local_name_compare_ex(element, "base", strlen("base"), name_length)
1102 				|| dom_local_name_compare_ex(element, "basefont", strlen("basefont"), name_length)
1103 				|| dom_local_name_compare_ex(element, "bgsound", strlen("bgsound"), name_length)
1104 				|| dom_local_name_compare_ex(element, "br", strlen("br"), name_length)
1105 				|| dom_local_name_compare_ex(element, "col", strlen("col"), name_length)
1106 				|| dom_local_name_compare_ex(element, "embed", strlen("embed"), name_length)
1107 				|| dom_local_name_compare_ex(element, "frame", strlen("frame"), name_length)
1108 				|| dom_local_name_compare_ex(element, "hr", strlen("hr"), name_length)
1109 				|| dom_local_name_compare_ex(element, "img", strlen("img"), name_length)
1110 				|| dom_local_name_compare_ex(element, "input", strlen("input"), name_length)
1111 				|| dom_local_name_compare_ex(element, "keygen", strlen("keygen"), name_length)
1112 				|| dom_local_name_compare_ex(element, "link", strlen("link"), name_length)
1113 				|| dom_local_name_compare_ex(element, "menuitem", strlen("menuitem"), name_length)
1114 				|| dom_local_name_compare_ex(element, "meta", strlen("meta"), name_length)
1115 				|| dom_local_name_compare_ex(element, "param", strlen("param"), name_length)
1116 				|| dom_local_name_compare_ex(element, "source", strlen("source"), name_length)
1117 				|| dom_local_name_compare_ex(element, "track", strlen("track"), name_length)
1118 				|| dom_local_name_compare_ex(element, "wbr", strlen("wbr"), name_length)) {
1119 				TRY_OR_CLEANUP(xmlOutputBufferWriteLit(ctx->out, " /"));
1120 				skip_end_tag = true;
1121 			}
1122 		} else {
1123 			/* 15. If ns is not the HTML namespace, and the node's list of children is empty,
1124 			 *     then append "/" (U+002F SOLIDUS) to markup and set the skip end tag flag to true. */
1125 			TRY_OR_CLEANUP(xmlOutputBufferWriteLit(ctx->out, "/"));
1126 			skip_end_tag = true;
1127 		}
1128 	}
1129 
1130 	/* 16. Append ">" (U+003E GREATER-THAN SIGN) to markup. */
1131 	TRY_OR_CLEANUP(xmlOutputBufferWriteLit(ctx->out, ">"));
1132 
1133 	/* 17. If the value of skip end tag is true, then return the value of markup and skip the remaining steps. */
1134 	if (!skip_end_tag) {
1135 		if (should_format) {
1136 			indent++;
1137 		} else {
1138 			indent = -1;
1139 		}
1140 
1141 		/* 18. If ns is the HTML namespace, and the node's localName matches the string "template",
1142 		 *     then this is a template element.
1143 		 *     Append to markup the result of XML serializing a DocumentFragment node. */
1144 		xmlNodePtr child = NULL;
1145 		if (php_dom_ns_is_fast(element, php_dom_ns_is_html_magic_token) && xmlStrEqual(element->name, BAD_CAST "template")) {
1146 			if (ctx->private_data != NULL) {
1147 				child = php_dom_retrieve_templated_content(ctx->private_data, element);
1148 			}
1149 		} else {
1150 			child = element->children;
1151 		}
1152 
1153 		/* 19. Otherwise, append to markup the result of running the XML serialization algorithm on each of node's children. */
1154 		for (; child != NULL; child = child->next) {
1155 			if (should_format) {
1156 				TRY_OR_CLEANUP(dom_xml_output_indents(ctx->out, indent));
1157 			}
1158 			TRY_OR_CLEANUP(dom_xml_serialization_algorithm(ctx, &map, child, inherited_ns, prefix_index, indent, require_well_formed));
1159 		}
1160 
1161 		if (should_format) {
1162 			indent--;
1163 			TRY_OR_CLEANUP(dom_xml_output_indents(ctx->out, indent));
1164 		}
1165 
1166 		/* 20. Append the following to markup, in the order listed: */
1167 		TRY_OR_CLEANUP(xmlOutputBufferWriteLit(ctx->out, "</"));
1168 		TRY_OR_CLEANUP(dom_xml_output_qname(ctx->out, &qualified_name));
1169 		TRY_OR_CLEANUP(xmlOutputBufferWriteLit(ctx->out, ">"));
1170 	}
1171 
1172 	/* 21. Return the value of markup.
1173 	 *     => We use the output buffer instead. */
1174 	dom_xml_ns_prefix_map_dtor(&map);
1175 	dom_xml_local_prefix_map_dtor(&local_prefixes_map);
1176 	return 0;
1177 
1178 cleanup:
1179 	dom_xml_ns_prefix_map_dtor(&map);
1180 	dom_xml_local_prefix_map_dtor(&local_prefixes_map);
1181 	return -1;
1182 }
1183 
1184 /* https://w3c.github.io/DOM-Parsing/#xml-serializing-a-documentfragment-node */
dom_xml_serializing_a_document_fragment_node(dom_xml_serialize_ctx * ctx,dom_xml_ns_prefix_map * namespace_prefix_map,xmlNodePtr node,const xmlChar * namespace,unsigned int * prefix_index,int indent,bool require_well_formed)1185 static int dom_xml_serializing_a_document_fragment_node(
1186 	dom_xml_serialize_ctx *ctx,
1187 	dom_xml_ns_prefix_map *namespace_prefix_map,
1188 	xmlNodePtr node,
1189 	const xmlChar *namespace,
1190 	unsigned int *prefix_index,
1191 	int indent,
1192 	bool require_well_formed
1193 )
1194 {
1195 	/* 1. Let markup the empty string.
1196 	 *    => We use the output buffer instead. */
1197 
1198 	/* 2. For each child child of node, in tree order, run the XML serialization algorithm on the child ... */
1199 	xmlNodePtr child = node->children;
1200 	while (child != NULL) {
1201 		TRY(dom_xml_serialization_algorithm(ctx, namespace_prefix_map, child, namespace, prefix_index, indent, require_well_formed));
1202 		child = child->next;
1203 	}
1204 
1205 	/* 3. Return the value of markup
1206 	 *    => We use the output buffer instead. */
1207 	return 0;
1208 }
1209 
1210 /* https://w3c.github.io/DOM-Parsing/#dfn-xml-serializing-a-document-node */
dom_xml_serializing_a_document_node(dom_xml_serialize_ctx * ctx,dom_xml_ns_prefix_map * namespace_prefix_map,xmlNodePtr node,const xmlChar * namespace,unsigned int * prefix_index,int indent,bool require_well_formed)1211 static int dom_xml_serializing_a_document_node(
1212 	dom_xml_serialize_ctx *ctx,
1213 	dom_xml_ns_prefix_map *namespace_prefix_map,
1214 	xmlNodePtr node,
1215 	const xmlChar *namespace,
1216 	unsigned int *prefix_index,
1217 	int indent,
1218 	bool require_well_formed
1219 )
1220 {
1221 	/* 1. Let serialized document be an empty string.
1222 	 *    => We use the output buffer instead. */
1223 
1224 	xmlNodePtr child = node->children;
1225 	node->children = NULL;
1226 
1227 	/* https://github.com/w3c/DOM-Parsing/issues/50 */
1228 	TRY(xmlOutputBufferFlush(ctx->out));
1229 	TRY(xmlSaveDoc(ctx->ctxt, (xmlDocPtr) node));
1230 	TRY(xmlSaveFlush(ctx->ctxt));
1231 
1232 	node->children = child;
1233 
1234 	/* 2. For each child child of node, in tree order, run the XML serialization algorithm on the child passing along the provided arguments,
1235 	 *    and append the result to serialized document. */
1236 	while (child != NULL) {
1237 		TRY(dom_xml_serialization_algorithm(ctx, namespace_prefix_map, child, namespace, prefix_index, indent, require_well_formed));
1238 		child = child->next;
1239 	}
1240 
1241 	/* 3. Return the value of serialized document.
1242 	 *    => We use the output buffer instead. */
1243 	return 0;
1244 }
1245 
1246 /* https://w3c.github.io/DOM-Parsing/#dfn-xml-serialization-algorithm */
dom_xml_serialization_algorithm(dom_xml_serialize_ctx * ctx,dom_xml_ns_prefix_map * namespace_prefix_map,xmlNodePtr node,const xmlChar * namespace,unsigned int * prefix_index,int indent,bool require_well_formed)1247 static int dom_xml_serialization_algorithm(
1248 	dom_xml_serialize_ctx *ctx,
1249 	dom_xml_ns_prefix_map *namespace_prefix_map,
1250 	xmlNodePtr node,
1251 	const xmlChar *namespace,
1252 	unsigned int *prefix_index,
1253 	int indent,
1254 	bool require_well_formed
1255 )
1256 {
1257 	/* If node's interface is: */
1258 	switch (node->type) {
1259 		case XML_ELEMENT_NODE:
1260 			return dom_xml_serialize_element_node(ctx, namespace, namespace_prefix_map, node, prefix_index, indent, require_well_formed);
1261 
1262 		case XML_DOCUMENT_FRAG_NODE:
1263 			return dom_xml_serializing_a_document_fragment_node(ctx, namespace_prefix_map, node, namespace, prefix_index, indent, require_well_formed);
1264 
1265 		case XML_HTML_DOCUMENT_NODE:
1266 		case XML_DOCUMENT_NODE:
1267 			return dom_xml_serializing_a_document_node(ctx, namespace_prefix_map, node, namespace, prefix_index, indent, require_well_formed);
1268 
1269 		case XML_TEXT_NODE:
1270 			return dom_xml_serialize_text_node(ctx->out, node, require_well_formed);
1271 
1272 		case XML_COMMENT_NODE:
1273 			return dom_xml_serialize_comment_node(ctx->out, node, require_well_formed);
1274 
1275 		case XML_PI_NODE:
1276 			return dom_xml_serialize_processing_instruction(ctx->out, node, require_well_formed);
1277 
1278 		case XML_CDATA_SECTION_NODE:
1279 			return dom_xml_serialize_cdata_section_node(ctx->out, node);
1280 
1281 		case XML_ATTRIBUTE_NODE:
1282 			return dom_xml_serialize_attribute_node(ctx->out, node);
1283 
1284 		default:
1285 			TRY(xmlOutputBufferFlush(ctx->out));
1286 			TRY(xmlSaveTree(ctx->ctxt, node));
1287 			TRY(xmlSaveFlush(ctx->ctxt));
1288 			if (node->type == XML_DTD_NODE) {
1289 				return xmlOutputBufferWriteLit(ctx->out, "\n");
1290 			}
1291 			return 0;
1292 	}
1293 
1294 	ZEND_UNREACHABLE();
1295 }
1296 
1297 /* https://w3c.github.io/DOM-Parsing/#dfn-xml-serialization */
dom_xml_serialize(xmlSaveCtxtPtr ctxt,xmlOutputBufferPtr out,xmlNodePtr node,bool format,bool require_well_formed,php_dom_private_data * private_data)1298 int dom_xml_serialize(xmlSaveCtxtPtr ctxt, xmlOutputBufferPtr out, xmlNodePtr node, bool format, bool require_well_formed, php_dom_private_data *private_data)
1299 {
1300 	/* 1. Let namespace be a context namespace with value null. */
1301 	const xmlChar *namespace = NULL;
1302 
1303 	/* 2. Let prefix map be a new namespace prefix map. */
1304 	dom_xml_ns_prefix_map namespace_prefix_map;
1305 	dom_xml_ns_prefix_map_ctor(&namespace_prefix_map);
1306 
1307 	/* 3. Add the XML namespace with prefix value "xml" to prefix map. */
1308 	dom_xml_ns_prefix_map_add(&namespace_prefix_map, BAD_CAST "xml", false, BAD_CAST DOM_XML_NS_URI, strlen(DOM_XML_NS_URI));
1309 
1310 	/* 4. Let prefix index be a generated namespace prefix index with value 1. */
1311 	unsigned int prefix_index = 1;
1312 
1313 	/* 5. Return the result of running the XML serialization algorithm ... */
1314 	dom_xml_serialize_ctx ctx;
1315 	ctx.out = out;
1316 	ctx.ctxt = ctxt;
1317 	ctx.private_data = private_data;
1318 	int indent = format ? 0 : -1;
1319 	int result = dom_xml_serialization_algorithm(&ctx, &namespace_prefix_map, node, namespace, &prefix_index, indent, require_well_formed);
1320 
1321 	dom_xml_ns_prefix_map_dtor(&namespace_prefix_map);
1322 
1323 	return result;
1324 }
1325 
1326 #endif  /* HAVE_LIBXML && HAVE_DOM */
1327