xref: /php-src/ext/dom/infra.c (revision 41996e8d)
1 /*
2    +----------------------------------------------------------------------+
3    | Copyright (c) The PHP Group                                          |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | https://www.php.net/license/3_01.txt                                 |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Authors: Niels Dossche <nielsdos@php.net>                            |
14    +----------------------------------------------------------------------+
15 */
16 
17 #ifdef HAVE_CONFIG_H
18 #include <config.h>
19 #endif
20 
21 #include "php.h"
22 #if defined(HAVE_LIBXML) && defined(HAVE_DOM)
23 #include "infra.h"
24 
25 /* https://infra.spec.whatwg.org/#ascii-whitespace */
26 const char *ascii_whitespace = "\x09\x0A\x0C\x0D\x20";
27 
28 /* https://infra.spec.whatwg.org/#strip-and-collapse-ascii-whitespace */
dom_strip_and_collapse_ascii_whitespace(zend_string * input)29 zend_string *dom_strip_and_collapse_ascii_whitespace(zend_string *input)
30 {
31 	if (input == zend_empty_string) {
32 		return input;
33 	}
34 
35 	ZEND_ASSERT(!ZSTR_IS_INTERNED(input));
36 	ZEND_ASSERT(GC_REFCOUNT(input) == 1);
37 
38 	char *write_ptr = ZSTR_VAL(input);
39 
40 	const char *start = ZSTR_VAL(input);
41 	const char *current = start;
42 	const char *end = current + ZSTR_LEN(input);
43 
44 	current += strspn(current, ascii_whitespace);
45 
46 	while (current < end) {
47 		/* Copy non-whitespace */
48 		size_t non_whitespace_len = strcspn(current, ascii_whitespace);
49 		/* If the pointers are equal, we still haven't encountered collapsible or strippable whitespace. */
50 		if (write_ptr != current) {
51 			memmove(write_ptr, current, non_whitespace_len);
52 		}
53 		current += non_whitespace_len;
54 		write_ptr += non_whitespace_len;
55 
56 		/* Skip whitespace */
57 		current += strspn(current, ascii_whitespace);
58 		if (current < end) {
59 			/* Only make a space when we're not yet at the end of the input, because that means more non-whitespace
60 			 * input is to come. */
61 			*write_ptr++ = ' ';
62 		}
63 	}
64 
65 	*write_ptr = '\0';
66 
67 	size_t len = write_ptr - start;
68 	if (len != ZSTR_LEN(input)) {
69 		return zend_string_truncate(input, len, false);
70 	} else {
71 		/* Forget the hash value since we may have transformed non-space-whitespace into spaces. */
72 		zend_string_forget_hash_val(input);
73 		return input;
74 	}
75 }
76 
77 #endif  /* HAVE_LIBXML && HAVE_DOM */
78