1 /*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | https://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Authors: Niels Dossche <nielsdos@php.net> |
14 +----------------------------------------------------------------------+
15 */
16
17 #ifdef HAVE_CONFIG_H
18 #include <config.h>
19 #endif
20
21 #include "php.h"
22 #if defined(HAVE_LIBXML) && defined(HAVE_DOM)
23 #include "infra.h"
24
25 /* https://infra.spec.whatwg.org/#ascii-whitespace */
26 const char *ascii_whitespace = "\x09\x0A\x0C\x0D\x20";
27
28 /* https://infra.spec.whatwg.org/#strip-and-collapse-ascii-whitespace */
dom_strip_and_collapse_ascii_whitespace(zend_string * input)29 zend_string *dom_strip_and_collapse_ascii_whitespace(zend_string *input)
30 {
31 if (input == zend_empty_string) {
32 return input;
33 }
34
35 ZEND_ASSERT(!ZSTR_IS_INTERNED(input));
36 ZEND_ASSERT(GC_REFCOUNT(input) == 1);
37
38 char *write_ptr = ZSTR_VAL(input);
39
40 const char *start = ZSTR_VAL(input);
41 const char *current = start;
42 const char *end = current + ZSTR_LEN(input);
43
44 current += strspn(current, ascii_whitespace);
45
46 while (current < end) {
47 /* Copy non-whitespace */
48 size_t non_whitespace_len = strcspn(current, ascii_whitespace);
49 /* If the pointers are equal, we still haven't encountered collapsable or strippable whitespace. */
50 if (write_ptr != current) {
51 memmove(write_ptr, current, non_whitespace_len);
52 }
53 current += non_whitespace_len;
54 write_ptr += non_whitespace_len;
55
56 /* Skip whitespace */
57 current += strspn(current, ascii_whitespace);
58 if (current < end) {
59 /* Only make a space when we're not yet at the end of the input, because that means more non-whitespace
60 * input is to come. */
61 *write_ptr++ = ' ';
62 }
63 }
64
65 *write_ptr = '\0';
66
67 size_t len = write_ptr - start;
68 if (len != ZSTR_LEN(input)) {
69 return zend_string_truncate(input, len, false);
70 } else {
71 /* Forget the hash value since we may have transformed non-space-whitespace into spaces. */
72 zend_string_forget_hash_val(input);
73 return input;
74 }
75 }
76
77 #endif /* HAVE_LIBXML && HAVE_DOM */
78