xref: /PHP-8.1/ext/filter/sanitizing_filters.c (revision 01b3fc03)
1 /*
2   +----------------------------------------------------------------------+
3   | Copyright (c) The PHP Group                                          |
4   +----------------------------------------------------------------------+
5   | This source file is subject to version 3.01 of the PHP license,      |
6   | that is bundled with this package in the file LICENSE, and is        |
7   | available through the world-wide-web at the following url:           |
8   | https://www.php.net/license/3_01.txt                                 |
9   | If you did not receive a copy of the PHP license and are unable to   |
10   | obtain it through the world-wide-web, please send a note to          |
11   | license@php.net so we can mail you a copy immediately.               |
12   +----------------------------------------------------------------------+
13   | Authors: Derick Rethans <derick@php.net>                             |
14   +----------------------------------------------------------------------+
15 */
16 
17 #include "php_filter.h"
18 #include "filter_private.h"
19 #include "zend_smart_str.h"
20 
21 /* {{{ STRUCTS */
22 typedef unsigned long filter_map[256];
23 /* }}} */
24 
25 /* {{{ HELPER FUNCTIONS */
php_filter_encode_html(zval * value,const unsigned char * chars)26 static void php_filter_encode_html(zval *value, const unsigned char *chars)
27 {
28 	smart_str str = {0};
29 	size_t len = Z_STRLEN_P(value);
30 	unsigned char *s = (unsigned char *)Z_STRVAL_P(value);
31 	unsigned char *e = s + len;
32 
33 	if (Z_STRLEN_P(value) == 0) {
34 		return;
35 	}
36 
37 	while (s < e) {
38 		if (chars[*s]) {
39 			smart_str_appendl(&str, "&#", 2);
40 			smart_str_append_unsigned(&str, (zend_ulong)*s);
41 			smart_str_appendc(&str, ';');
42 		} else {
43 			/* XXX: this needs to be optimized to work with blocks of 'safe' chars */
44 			smart_str_appendc(&str, *s);
45 		}
46 		s++;
47 	}
48 
49 	smart_str_0(&str);
50 	zval_ptr_dtor(value);
51 	ZVAL_NEW_STR(value, str.s);
52 }
53 
54 static const unsigned char hexchars[] = "0123456789ABCDEF";
55 
56 #define LOWALPHA    "abcdefghijklmnopqrstuvwxyz"
57 #define HIALPHA     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
58 #define DIGIT       "0123456789"
59 
60 #define DEFAULT_URL_ENCODE    LOWALPHA HIALPHA DIGIT "-._"
61 
php_filter_encode_url(zval * value,const unsigned char * chars,const int char_len,int high,int low,int encode_nul)62 static void php_filter_encode_url(zval *value, const unsigned char* chars, const int char_len, int high, int low, int encode_nul)
63 {
64 	unsigned char *p;
65 	unsigned char tmp[256];
66 	unsigned char *s = (unsigned char *)chars;
67 	unsigned char *e = s + char_len;
68 	zend_string *str;
69 
70 	memset(tmp, 1, sizeof(tmp)-1);
71 
72 	while (s < e) {
73 		tmp[*s++] = '\0';
74 	}
75 
76 	str = zend_string_safe_alloc(Z_STRLEN_P(value), 3, 0, 0);
77 	p = (unsigned char *) ZSTR_VAL(str);
78 	s = (unsigned char *) Z_STRVAL_P(value);
79 	e = s + Z_STRLEN_P(value);
80 
81 	while (s < e) {
82 		if (tmp[*s]) {
83 			*p++ = '%';
84 			*p++ = hexchars[(unsigned char) *s >> 4];
85 			*p++ = hexchars[(unsigned char) *s & 15];
86 		} else {
87 			*p++ = *s;
88 		}
89 		s++;
90 	}
91 	*p = '\0';
92 	ZSTR_LEN(str) = p - (unsigned char *)ZSTR_VAL(str);
93 	zval_ptr_dtor(value);
94 	ZVAL_NEW_STR(value, str);
95 }
96 
php_filter_strip(zval * value,zend_long flags)97 static void php_filter_strip(zval *value, zend_long flags)
98 {
99 	unsigned char *str;
100 	size_t i;
101 	size_t c;
102 	zend_string *buf;
103 
104 	/* Optimization for if no strip flags are set */
105 	if (!(flags & (FILTER_FLAG_STRIP_LOW | FILTER_FLAG_STRIP_HIGH | FILTER_FLAG_STRIP_BACKTICK))) {
106 		return;
107 	}
108 
109 	str = (unsigned char *)Z_STRVAL_P(value);
110 	buf = zend_string_alloc(Z_STRLEN_P(value), 0);
111 	c = 0;
112 	for (i = 0; i < Z_STRLEN_P(value); i++) {
113 		if ((str[i] >= 127) && (flags & FILTER_FLAG_STRIP_HIGH)) {
114 		} else if ((str[i] < 32) && (flags & FILTER_FLAG_STRIP_LOW)) {
115 		} else if ((str[i] == '`') && (flags & FILTER_FLAG_STRIP_BACKTICK)) {
116 		} else {
117 			ZSTR_VAL(buf)[c] = str[i];
118 			++c;
119 		}
120 	}
121 	/* update zval string data */
122 	ZSTR_VAL(buf)[c] = '\0';
123 	ZSTR_LEN(buf) = c;
124 	zval_ptr_dtor(value);
125 	ZVAL_NEW_STR(value, buf);
126 }
127 /* }}} */
128 
129 /* {{{ FILTER MAP HELPERS */
filter_map_init(filter_map * map)130 static void filter_map_init(filter_map *map)
131 {
132 	memset(map, 0, sizeof(filter_map));
133 }
134 
filter_map_update(filter_map * map,int flag,const unsigned char * allowed_list)135 static void filter_map_update(filter_map *map, int flag, const unsigned char *allowed_list)
136 {
137 	size_t l, i;
138 
139 	l = strlen((const char*)allowed_list);
140 	for (i = 0; i < l; ++i) {
141 		(*map)[allowed_list[i]] = flag;
142 	}
143 }
144 
filter_map_apply(zval * value,filter_map * map)145 static void filter_map_apply(zval *value, filter_map *map)
146 {
147 	unsigned char *str;
148 	size_t i, c;
149 	zend_string *buf;
150 
151 	str = (unsigned char *)Z_STRVAL_P(value);
152 	buf = zend_string_alloc(Z_STRLEN_P(value), 0);
153 	c = 0;
154 	for (i = 0; i < Z_STRLEN_P(value); i++) {
155 		if ((*map)[str[i]]) {
156 			ZSTR_VAL(buf)[c] = str[i];
157 			++c;
158 		}
159 	}
160 	/* update zval string data */
161 	ZSTR_VAL(buf)[c] = '\0';
162 	ZSTR_LEN(buf) = c;
163 	zval_ptr_dtor(value);
164 	ZVAL_NEW_STR(value, buf);
165 }
166 /* }}} */
167 
168 /* {{{ php_filter_string */
php_filter_string(PHP_INPUT_FILTER_PARAM_DECL)169 void php_filter_string(PHP_INPUT_FILTER_PARAM_DECL)
170 {
171 	size_t new_len;
172 	unsigned char enc[256] = {0};
173 
174 	if (!Z_REFCOUNTED_P(value)) {
175 		ZVAL_STRINGL(value, Z_STRVAL_P(value), Z_STRLEN_P(value));
176 	}
177 
178 	/* strip high/strip low ( see flags )*/
179 	php_filter_strip(value, flags);
180 
181 	if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
182 		enc['\''] = enc['"'] = 1;
183 	}
184 	if (flags & FILTER_FLAG_ENCODE_AMP) {
185 		enc['&'] = 1;
186 	}
187 	if (flags & FILTER_FLAG_ENCODE_LOW) {
188 		memset(enc, 1, 32);
189 	}
190 	if (flags & FILTER_FLAG_ENCODE_HIGH) {
191 		memset(enc + 127, 1, sizeof(enc) - 127);
192 	}
193 
194 	php_filter_encode_html(value, enc);
195 
196 	/* strip tags, implicitly also removes \0 chars */
197 	new_len = php_strip_tags_ex(Z_STRVAL_P(value), Z_STRLEN_P(value), NULL, 0, 1);
198 	Z_STRLEN_P(value) = new_len;
199 
200 	if (new_len == 0) {
201 		zval_ptr_dtor(value);
202 		if (flags & FILTER_FLAG_EMPTY_STRING_NULL) {
203 			ZVAL_NULL(value);
204 		} else {
205 			ZVAL_EMPTY_STRING(value);
206 		}
207 		return;
208 	}
209 }
210 /* }}} */
211 
212 /* {{{ php_filter_encoded */
php_filter_encoded(PHP_INPUT_FILTER_PARAM_DECL)213 void php_filter_encoded(PHP_INPUT_FILTER_PARAM_DECL)
214 {
215 	/* apply strip_high and strip_low filters */
216 	php_filter_strip(value, flags);
217 	/* urlencode */
218 	php_filter_encode_url(value, (unsigned char *)DEFAULT_URL_ENCODE, sizeof(DEFAULT_URL_ENCODE)-1, flags & FILTER_FLAG_ENCODE_HIGH, flags & FILTER_FLAG_ENCODE_LOW, 1);
219 }
220 /* }}} */
221 
222 /* {{{ php_filter_special_chars */
php_filter_special_chars(PHP_INPUT_FILTER_PARAM_DECL)223 void php_filter_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
224 {
225 	unsigned char enc[256] = {0};
226 
227 	php_filter_strip(value, flags);
228 
229 	/* encodes ' " < > & \0 to numerical entities */
230 	enc['\''] = enc['"'] = enc['<'] = enc['>'] = enc['&'] = enc[0] = 1;
231 
232 	/* if strip low is not set, then we encode them as &#xx; */
233 	memset(enc, 1, 32);
234 
235 	if (flags & FILTER_FLAG_ENCODE_HIGH) {
236 		memset(enc + 127, 1, sizeof(enc) - 127);
237 	}
238 
239 	php_filter_encode_html(value, enc);
240 }
241 /* }}} */
242 
243 /* {{{ php_filter_full_special_chars */
php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL)244 void php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
245 {
246 	zend_string *buf;
247 	int quotes;
248 
249 	if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
250 		quotes = ENT_QUOTES;
251 	} else {
252 		quotes = ENT_NOQUOTES;
253 	}
254 	buf = php_escape_html_entities_ex(
255 		(unsigned char *) Z_STRVAL_P(value), Z_STRLEN_P(value), /* all */ 1, quotes,
256 		/* charset_hint */ NULL, /* double_encode */ 0, /* quiet */ 0);
257 	zval_ptr_dtor(value);
258 	ZVAL_STR(value, buf);
259 }
260 /* }}} */
261 
262 /* {{{ php_filter_unsafe_raw */
php_filter_unsafe_raw(PHP_INPUT_FILTER_PARAM_DECL)263 void php_filter_unsafe_raw(PHP_INPUT_FILTER_PARAM_DECL)
264 {
265 	/* Only if no flags are set (optimization) */
266 	if (flags != 0 && Z_STRLEN_P(value) > 0) {
267 		unsigned char enc[256] = {0};
268 
269 		php_filter_strip(value, flags);
270 
271 		if (flags & FILTER_FLAG_ENCODE_AMP) {
272 			enc['&'] = 1;
273 		}
274 		if (flags & FILTER_FLAG_ENCODE_LOW) {
275 			memset(enc, 1, 32);
276 		}
277 		if (flags & FILTER_FLAG_ENCODE_HIGH) {
278 			memset(enc + 127, 1, sizeof(enc) - 127);
279 		}
280 
281 		php_filter_encode_html(value, enc);
282 	} else if (flags & FILTER_FLAG_EMPTY_STRING_NULL && Z_STRLEN_P(value) == 0) {
283 		zval_ptr_dtor(value);
284 		ZVAL_NULL(value);
285 	}
286 }
287 /* }}} */
288 
289 /* {{{ php_filter_email */
290 #define SAFE        "$-_.+"
291 #define EXTRA       "!*'(),"
292 #define NATIONAL    "{}|\\^~[]`"
293 #define PUNCTUATION "<>#%\""
294 #define RESERVED    ";/?:@&="
295 
php_filter_email(PHP_INPUT_FILTER_PARAM_DECL)296 void php_filter_email(PHP_INPUT_FILTER_PARAM_DECL)
297 {
298 	/* Check section 6 of rfc 822 http://www.faqs.org/rfcs/rfc822.html */
299 	const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT "!#$%&'*+-=?^_`{|}~@.[]";
300 	filter_map     map;
301 
302 	filter_map_init(&map);
303 	filter_map_update(&map, 1, allowed_list);
304 	filter_map_apply(value, &map);
305 }
306 /* }}} */
307 
308 /* {{{ php_filter_url */
php_filter_url(PHP_INPUT_FILTER_PARAM_DECL)309 void php_filter_url(PHP_INPUT_FILTER_PARAM_DECL)
310 {
311 	/* Strip all chars not part of section 5 of
312 	 * http://www.faqs.org/rfcs/rfc1738.html */
313 	const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT SAFE EXTRA NATIONAL PUNCTUATION RESERVED;
314 	filter_map     map;
315 
316 	filter_map_init(&map);
317 	filter_map_update(&map, 1, allowed_list);
318 	filter_map_apply(value, &map);
319 }
320 /* }}} */
321 
322 /* {{{ php_filter_number_int */
php_filter_number_int(PHP_INPUT_FILTER_PARAM_DECL)323 void php_filter_number_int(PHP_INPUT_FILTER_PARAM_DECL)
324 {
325 	/* strip everything [^0-9+-] */
326 	const unsigned char allowed_list[] = "+-" DIGIT;
327 	filter_map     map;
328 
329 	filter_map_init(&map);
330 	filter_map_update(&map, 1, allowed_list);
331 	filter_map_apply(value, &map);
332 }
333 /* }}} */
334 
335 /* {{{ php_filter_number_float */
php_filter_number_float(PHP_INPUT_FILTER_PARAM_DECL)336 void php_filter_number_float(PHP_INPUT_FILTER_PARAM_DECL)
337 {
338 	/* strip everything [^0-9+-] */
339 	const unsigned char allowed_list[] = "+-" DIGIT;
340 	filter_map     map;
341 
342 	filter_map_init(&map);
343 	filter_map_update(&map, 1, allowed_list);
344 
345 	/* depending on flags, strip '.', 'e', ",", "'" */
346 	if (flags & FILTER_FLAG_ALLOW_FRACTION) {
347 		filter_map_update(&map, 2, (const unsigned char *) ".");
348 	}
349 	if (flags & FILTER_FLAG_ALLOW_THOUSAND) {
350 		filter_map_update(&map, 3,  (const unsigned char *) ",");
351 	}
352 	if (flags & FILTER_FLAG_ALLOW_SCIENTIFIC) {
353 		filter_map_update(&map, 4,  (const unsigned char *) "eE");
354 	}
355 	filter_map_apply(value, &map);
356 }
357 /* }}} */
358 
359 /* {{{ php_filter_add_slashes */
php_filter_add_slashes(PHP_INPUT_FILTER_PARAM_DECL)360 void php_filter_add_slashes(PHP_INPUT_FILTER_PARAM_DECL)
361 {
362 	zend_string *buf = php_addslashes(Z_STR_P(value));
363 
364 	zval_ptr_dtor(value);
365 	ZVAL_STR(value, buf);
366 }
367 /* }}} */
368