xref: /PHP-7.3/ext/filter/sanitizing_filters.c (revision ceae8166)
1 /*
2   +----------------------------------------------------------------------+
3   | PHP Version 7                                                        |
4   +----------------------------------------------------------------------+
5   | Copyright (c) 1997-2018 The PHP Group                                |
6   +----------------------------------------------------------------------+
7   | This source file is subject to version 3.01 of the PHP license,      |
8   | that is bundled with this package in the file LICENSE, and is        |
9   | available through the world-wide-web at the following url:           |
10   | http://www.php.net/license/3_01.txt                                  |
11   | If you did not receive a copy of the PHP license and are unable to   |
12   | obtain it through the world-wide-web, please send a note to          |
13   | license@php.net so we can mail you a copy immediately.               |
14   +----------------------------------------------------------------------+
15   | Authors: Derick Rethans <derick@php.net>                             |
16   +----------------------------------------------------------------------+
17 */
18 
19 #include "php_filter.h"
20 #include "filter_private.h"
21 #include "zend_smart_str.h"
22 
23 /* {{{ STRUCTS */
24 typedef unsigned long filter_map[256];
25 /* }}} */
26 
27 /* {{{ HELPER FUNCTIONS */
php_filter_encode_html(zval * value,const unsigned char * chars)28 static void php_filter_encode_html(zval *value, const unsigned char *chars)
29 {
30 	smart_str str = {0};
31 	size_t len = Z_STRLEN_P(value);
32 	unsigned char *s = (unsigned char *)Z_STRVAL_P(value);
33 	unsigned char *e = s + len;
34 
35 	if (Z_STRLEN_P(value) == 0) {
36 		return;
37 	}
38 
39 	while (s < e) {
40 		if (chars[*s]) {
41 			smart_str_appendl(&str, "&#", 2);
42 			smart_str_append_unsigned(&str, (zend_ulong)*s);
43 			smart_str_appendc(&str, ';');
44 		} else {
45 			/* XXX: this needs to be optimized to work with blocks of 'safe' chars */
46 			smart_str_appendc(&str, *s);
47 		}
48 		s++;
49 	}
50 
51 	smart_str_0(&str);
52 	zval_ptr_dtor(value);
53 	ZVAL_NEW_STR(value, str.s);
54 }
55 
56 static const unsigned char hexchars[] = "0123456789ABCDEF";
57 
58 #define LOWALPHA    "abcdefghijklmnopqrstuvwxyz"
59 #define HIALPHA     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
60 #define DIGIT       "0123456789"
61 
62 #define DEFAULT_URL_ENCODE    LOWALPHA HIALPHA DIGIT "-._"
63 
php_filter_encode_url(zval * value,const unsigned char * chars,const int char_len,int high,int low,int encode_nul)64 static void php_filter_encode_url(zval *value, const unsigned char* chars, const int char_len, int high, int low, int encode_nul)
65 {
66 	unsigned char *p;
67 	unsigned char tmp[256];
68 	unsigned char *s = (unsigned char *)chars;
69 	unsigned char *e = s + char_len;
70 	zend_string *str;
71 
72 	memset(tmp, 1, sizeof(tmp)-1);
73 
74 	while (s < e) {
75 		tmp[*s++] = '\0';
76 	}
77 /* XXX: This is not needed since these chars in the allowed list never include the high/low/null value
78 	if (encode_nul) {
79 		tmp[0] = 1;
80 	}
81 	if (high) {
82 		memset(tmp + 127, 1, sizeof(tmp) - 127);
83 	}
84 	if (low) {
85 		memset(tmp, 1, 32);
86 	}
87 */
88 	str = zend_string_safe_alloc(Z_STRLEN_P(value), 3, 0, 0);
89 	p = (unsigned char *) ZSTR_VAL(str);
90 	s = (unsigned char *) Z_STRVAL_P(value);
91 	e = s + Z_STRLEN_P(value);
92 
93 	while (s < e) {
94 		if (tmp[*s]) {
95 			*p++ = '%';
96 			*p++ = hexchars[(unsigned char) *s >> 4];
97 			*p++ = hexchars[(unsigned char) *s & 15];
98 		} else {
99 			*p++ = *s;
100 		}
101 		s++;
102 	}
103 	*p = '\0';
104 	ZSTR_LEN(str) = p - (unsigned char *)ZSTR_VAL(str);
105 	zval_ptr_dtor(value);
106 	ZVAL_NEW_STR(value, str);
107 }
108 
php_filter_strip(zval * value,zend_long flags)109 static void php_filter_strip(zval *value, zend_long flags)
110 {
111 	unsigned char *str;
112 	size_t i;
113 	size_t c;
114 	zend_string *buf;
115 
116 	/* Optimization for if no strip flags are set */
117 	if (!(flags & (FILTER_FLAG_STRIP_LOW | FILTER_FLAG_STRIP_HIGH | FILTER_FLAG_STRIP_BACKTICK))) {
118 		return;
119 	}
120 
121 	str = (unsigned char *)Z_STRVAL_P(value);
122 	buf = zend_string_alloc(Z_STRLEN_P(value), 0);
123 	c = 0;
124 	for (i = 0; i < Z_STRLEN_P(value); i++) {
125 		if ((str[i] >= 127) && (flags & FILTER_FLAG_STRIP_HIGH)) {
126 		} else if ((str[i] < 32) && (flags & FILTER_FLAG_STRIP_LOW)) {
127 		} else if ((str[i] == '`') && (flags & FILTER_FLAG_STRIP_BACKTICK)) {
128 		} else {
129 			ZSTR_VAL(buf)[c] = str[i];
130 			++c;
131 		}
132 	}
133 	/* update zval string data */
134 	ZSTR_VAL(buf)[c] = '\0';
135 	ZSTR_LEN(buf) = c;
136 	zval_ptr_dtor(value);
137 	ZVAL_NEW_STR(value, buf);
138 }
139 /* }}} */
140 
141 /* {{{ FILTER MAP HELPERS */
filter_map_init(filter_map * map)142 static void filter_map_init(filter_map *map)
143 {
144 	memset(map, 0, sizeof(filter_map));
145 }
146 
filter_map_update(filter_map * map,int flag,const unsigned char * allowed_list)147 static void filter_map_update(filter_map *map, int flag, const unsigned char *allowed_list)
148 {
149 	size_t l, i;
150 
151 	l = strlen((const char*)allowed_list);
152 	for (i = 0; i < l; ++i) {
153 		(*map)[allowed_list[i]] = flag;
154 	}
155 }
156 
filter_map_apply(zval * value,filter_map * map)157 static void filter_map_apply(zval *value, filter_map *map)
158 {
159 	unsigned char *str;
160 	size_t i, c;
161 	zend_string *buf;
162 
163 	str = (unsigned char *)Z_STRVAL_P(value);
164 	buf = zend_string_alloc(Z_STRLEN_P(value), 0);
165 	c = 0;
166 	for (i = 0; i < Z_STRLEN_P(value); i++) {
167 		if ((*map)[str[i]]) {
168 			ZSTR_VAL(buf)[c] = str[i];
169 			++c;
170 		}
171 	}
172 	/* update zval string data */
173 	ZSTR_VAL(buf)[c] = '\0';
174 	ZSTR_LEN(buf) = c;
175 	zval_ptr_dtor(value);
176 	ZVAL_NEW_STR(value, buf);
177 }
178 /* }}} */
179 
180 /* {{{ php_filter_string */
php_filter_string(PHP_INPUT_FILTER_PARAM_DECL)181 void php_filter_string(PHP_INPUT_FILTER_PARAM_DECL)
182 {
183 	size_t new_len;
184 	unsigned char enc[256] = {0};
185 
186 	if (!Z_REFCOUNTED_P(value)) {
187 		ZVAL_STRINGL(value, Z_STRVAL_P(value), Z_STRLEN_P(value));
188 	}
189 
190 	/* strip high/strip low ( see flags )*/
191 	php_filter_strip(value, flags);
192 
193 	if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
194 		enc['\''] = enc['"'] = 1;
195 	}
196 	if (flags & FILTER_FLAG_ENCODE_AMP) {
197 		enc['&'] = 1;
198 	}
199 	if (flags & FILTER_FLAG_ENCODE_LOW) {
200 		memset(enc, 1, 32);
201 	}
202 	if (flags & FILTER_FLAG_ENCODE_HIGH) {
203 		memset(enc + 127, 1, sizeof(enc) - 127);
204 	}
205 
206 	php_filter_encode_html(value, enc);
207 
208 	/* strip tags, implicitly also removes \0 chars */
209 	new_len = php_strip_tags_ex(Z_STRVAL_P(value), Z_STRLEN_P(value), NULL, NULL, 0, 1);
210 	Z_STRLEN_P(value) = new_len;
211 
212 	if (new_len == 0) {
213 		zval_ptr_dtor(value);
214 		if (flags & FILTER_FLAG_EMPTY_STRING_NULL) {
215 			ZVAL_NULL(value);
216 		} else {
217 			ZVAL_EMPTY_STRING(value);
218 		}
219 		return;
220 	}
221 }
222 /* }}} */
223 
224 /* {{{ php_filter_encoded */
php_filter_encoded(PHP_INPUT_FILTER_PARAM_DECL)225 void php_filter_encoded(PHP_INPUT_FILTER_PARAM_DECL)
226 {
227 	/* apply strip_high and strip_low filters */
228 	php_filter_strip(value, flags);
229 	/* urlencode */
230 	php_filter_encode_url(value, (unsigned char *)DEFAULT_URL_ENCODE, sizeof(DEFAULT_URL_ENCODE)-1, flags & FILTER_FLAG_ENCODE_HIGH, flags & FILTER_FLAG_ENCODE_LOW, 1);
231 }
232 /* }}} */
233 
234 /* {{{ php_filter_special_chars */
php_filter_special_chars(PHP_INPUT_FILTER_PARAM_DECL)235 void php_filter_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
236 {
237 	unsigned char enc[256] = {0};
238 
239 	php_filter_strip(value, flags);
240 
241 	/* encodes ' " < > & \0 to numerical entities */
242 	enc['\''] = enc['"'] = enc['<'] = enc['>'] = enc['&'] = enc[0] = 1;
243 
244 	/* if strip low is not set, then we encode them as &#xx; */
245 	memset(enc, 1, 32);
246 
247 	if (flags & FILTER_FLAG_ENCODE_HIGH) {
248 		memset(enc + 127, 1, sizeof(enc) - 127);
249 	}
250 
251 	php_filter_encode_html(value, enc);
252 }
253 /* }}} */
254 
255 /* {{{ php_filter_full_special_chars */
php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL)256 void php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
257 {
258 	zend_string *buf;
259 	int quotes;
260 
261 	if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
262 		quotes = ENT_QUOTES;
263 	} else {
264 		quotes = ENT_NOQUOTES;
265 	}
266 	buf = php_escape_html_entities_ex((unsigned char *) Z_STRVAL_P(value), Z_STRLEN_P(value), 1, quotes, SG(default_charset), 0);
267 	zval_ptr_dtor(value);
268 	ZVAL_STR(value, buf);
269 }
270 /* }}} */
271 
272 /* {{{ php_filter_unsafe_raw */
php_filter_unsafe_raw(PHP_INPUT_FILTER_PARAM_DECL)273 void php_filter_unsafe_raw(PHP_INPUT_FILTER_PARAM_DECL)
274 {
275 	/* Only if no flags are set (optimization) */
276 	if (flags != 0 && Z_STRLEN_P(value) > 0) {
277 		unsigned char enc[256] = {0};
278 
279 		php_filter_strip(value, flags);
280 
281 		if (flags & FILTER_FLAG_ENCODE_AMP) {
282 			enc['&'] = 1;
283 		}
284 		if (flags & FILTER_FLAG_ENCODE_LOW) {
285 			memset(enc, 1, 32);
286 		}
287 		if (flags & FILTER_FLAG_ENCODE_HIGH) {
288 			memset(enc + 127, 1, sizeof(enc) - 127);
289 		}
290 
291 		php_filter_encode_html(value, enc);
292 	} else if (flags & FILTER_FLAG_EMPTY_STRING_NULL && Z_STRLEN_P(value) == 0) {
293 		zval_ptr_dtor(value);
294 		ZVAL_NULL(value);
295 	}
296 }
297 /* }}} */
298 
299 /* {{{ php_filter_email */
300 #define SAFE        "$-_.+"
301 #define EXTRA       "!*'(),"
302 #define NATIONAL    "{}|\\^~[]`"
303 #define PUNCTUATION "<>#%\""
304 #define RESERVED    ";/?:@&="
305 
php_filter_email(PHP_INPUT_FILTER_PARAM_DECL)306 void php_filter_email(PHP_INPUT_FILTER_PARAM_DECL)
307 {
308 	/* Check section 6 of rfc 822 http://www.faqs.org/rfcs/rfc822.html */
309 	const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT "!#$%&'*+-=?^_`{|}~@.[]";
310 	filter_map     map;
311 
312 	filter_map_init(&map);
313 	filter_map_update(&map, 1, allowed_list);
314 	filter_map_apply(value, &map);
315 }
316 /* }}} */
317 
318 /* {{{ php_filter_url */
php_filter_url(PHP_INPUT_FILTER_PARAM_DECL)319 void php_filter_url(PHP_INPUT_FILTER_PARAM_DECL)
320 {
321 	/* Strip all chars not part of section 5 of
322 	 * http://www.faqs.org/rfcs/rfc1738.html */
323 	const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT SAFE EXTRA NATIONAL PUNCTUATION RESERVED;
324 	filter_map     map;
325 
326 	filter_map_init(&map);
327 	filter_map_update(&map, 1, allowed_list);
328 	filter_map_apply(value, &map);
329 }
330 /* }}} */
331 
332 /* {{{ php_filter_number_int */
php_filter_number_int(PHP_INPUT_FILTER_PARAM_DECL)333 void php_filter_number_int(PHP_INPUT_FILTER_PARAM_DECL)
334 {
335 	/* strip everything [^0-9+-] */
336 	const unsigned char allowed_list[] = "+-" DIGIT;
337 	filter_map     map;
338 
339 	filter_map_init(&map);
340 	filter_map_update(&map, 1, allowed_list);
341 	filter_map_apply(value, &map);
342 }
343 /* }}} */
344 
345 /* {{{ php_filter_number_float */
php_filter_number_float(PHP_INPUT_FILTER_PARAM_DECL)346 void php_filter_number_float(PHP_INPUT_FILTER_PARAM_DECL)
347 {
348 	/* strip everything [^0-9+-] */
349 	const unsigned char allowed_list[] = "+-" DIGIT;
350 	filter_map     map;
351 
352 	filter_map_init(&map);
353 	filter_map_update(&map, 1, allowed_list);
354 
355 	/* depending on flags, strip '.', 'e', ",", "'" */
356 	if (flags & FILTER_FLAG_ALLOW_FRACTION) {
357 		filter_map_update(&map, 2, (const unsigned char *) ".");
358 	}
359 	if (flags & FILTER_FLAG_ALLOW_THOUSAND) {
360 		filter_map_update(&map, 3,  (const unsigned char *) ",");
361 	}
362 	if (flags & FILTER_FLAG_ALLOW_SCIENTIFIC) {
363 		filter_map_update(&map, 4,  (const unsigned char *) "eE");
364 	}
365 	filter_map_apply(value, &map);
366 }
367 /* }}} */
368 
369 /* {{{ php_filter_add_slashes */
php_filter_add_slashes(PHP_INPUT_FILTER_PARAM_DECL)370 void php_filter_add_slashes(PHP_INPUT_FILTER_PARAM_DECL)
371 {
372 	/* This filter is used by both 'add_slashes' & 'magic_quotes' (legacy) */
373 
374 	zend_string *buf;
375 
376 	/* just call php_addslashes quotes */
377 	buf = php_addslashes(Z_STR_P(value));
378 
379 	zval_ptr_dtor(value);
380 	ZVAL_STR(value, buf);
381 }
382 /* }}} */
383 
384 /*
385  * Local variables:
386  * tab-width: 4
387  * c-basic-offset: 4
388  * End:
389  * vim600: noet sw=4 ts=4 fdm=marker
390  * vim<600: noet sw=4 ts=4
391  */
392