xref: /PHP-7.2/ext/filter/sanitizing_filters.c (revision 7a7ec01a)
1 /*
2   +----------------------------------------------------------------------+
3   | PHP Version 7                                                        |
4   +----------------------------------------------------------------------+
5   | Copyright (c) 1997-2018 The PHP Group                                |
6   +----------------------------------------------------------------------+
7   | This source file is subject to version 3.01 of the PHP license,      |
8   | that is bundled with this package in the file LICENSE, and is        |
9   | available through the world-wide-web at the following url:           |
10   | http://www.php.net/license/3_01.txt                                  |
11   | If you did not receive a copy of the PHP license and are unable to   |
12   | obtain it through the world-wide-web, please send a note to          |
13   | license@php.net so we can mail you a copy immediately.               |
14   +----------------------------------------------------------------------+
15   | Authors: Derick Rethans <derick@php.net>                             |
16   +----------------------------------------------------------------------+
17 */
18 
19 /* $Id$ */
20 
21 #include "php_filter.h"
22 #include "filter_private.h"
23 #include "zend_smart_str.h"
24 
25 /* {{{ STRUCTS */
26 typedef unsigned long filter_map[256];
27 /* }}} */
28 
29 /* {{{ HELPER FUNCTIONS */
php_filter_encode_html(zval * value,const unsigned char * chars)30 static void php_filter_encode_html(zval *value, const unsigned char *chars)
31 {
32 	smart_str str = {0};
33 	size_t len = Z_STRLEN_P(value);
34 	unsigned char *s = (unsigned char *)Z_STRVAL_P(value);
35 	unsigned char *e = s + len;
36 
37 	if (Z_STRLEN_P(value) == 0) {
38 		return;
39 	}
40 
41 	while (s < e) {
42 		if (chars[*s]) {
43 			smart_str_appendl(&str, "&#", 2);
44 			smart_str_append_unsigned(&str, (zend_ulong)*s);
45 			smart_str_appendc(&str, ';');
46 		} else {
47 			/* XXX: this needs to be optimized to work with blocks of 'safe' chars */
48 			smart_str_appendc(&str, *s);
49 		}
50 		s++;
51 	}
52 
53 	smart_str_0(&str);
54 	zval_ptr_dtor(value);
55 	ZVAL_NEW_STR(value, str.s);
56 }
57 
58 static const unsigned char hexchars[] = "0123456789ABCDEF";
59 
60 #define LOWALPHA    "abcdefghijklmnopqrstuvwxyz"
61 #define HIALPHA     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
62 #define DIGIT       "0123456789"
63 
64 #define DEFAULT_URL_ENCODE    LOWALPHA HIALPHA DIGIT "-._"
65 
php_filter_encode_url(zval * value,const unsigned char * chars,const int char_len,int high,int low,int encode_nul)66 static void php_filter_encode_url(zval *value, const unsigned char* chars, const int char_len, int high, int low, int encode_nul)
67 {
68 	unsigned char *p;
69 	unsigned char tmp[256];
70 	unsigned char *s = (unsigned char *)chars;
71 	unsigned char *e = s + char_len;
72 	zend_string *str;
73 
74 	memset(tmp, 1, sizeof(tmp)-1);
75 
76 	while (s < e) {
77 		tmp[*s++] = '\0';
78 	}
79 /* XXX: This is not needed since these chars in the allowed list never include the high/low/null value
80 	if (encode_nul) {
81 		tmp[0] = 1;
82 	}
83 	if (high) {
84 		memset(tmp + 127, 1, sizeof(tmp) - 127);
85 	}
86 	if (low) {
87 		memset(tmp, 1, 32);
88 	}
89 */
90 	str = zend_string_safe_alloc(Z_STRLEN_P(value), 3, 0, 0);
91 	p = (unsigned char *) ZSTR_VAL(str);
92 	s = (unsigned char *) Z_STRVAL_P(value);
93 	e = s + Z_STRLEN_P(value);
94 
95 	while (s < e) {
96 		if (tmp[*s]) {
97 			*p++ = '%';
98 			*p++ = hexchars[(unsigned char) *s >> 4];
99 			*p++ = hexchars[(unsigned char) *s & 15];
100 		} else {
101 			*p++ = *s;
102 		}
103 		s++;
104 	}
105 	*p = '\0';
106 	ZSTR_LEN(str) = p - (unsigned char *)ZSTR_VAL(str);
107 	zval_ptr_dtor(value);
108 	ZVAL_NEW_STR(value, str);
109 }
110 
php_filter_strip(zval * value,zend_long flags)111 static void php_filter_strip(zval *value, zend_long flags)
112 {
113 	unsigned char *str;
114 	size_t i;
115 	int c;
116 	zend_string *buf;
117 
118 	/* Optimization for if no strip flags are set */
119 	if (!(flags & (FILTER_FLAG_STRIP_LOW | FILTER_FLAG_STRIP_HIGH | FILTER_FLAG_STRIP_BACKTICK))) {
120 		return;
121 	}
122 
123 	str = (unsigned char *)Z_STRVAL_P(value);
124 	buf = zend_string_alloc(Z_STRLEN_P(value) + 1, 0);
125 	c = 0;
126 	for (i = 0; i < Z_STRLEN_P(value); i++) {
127 		if ((str[i] >= 127) && (flags & FILTER_FLAG_STRIP_HIGH)) {
128 		} else if ((str[i] < 32) && (flags & FILTER_FLAG_STRIP_LOW)) {
129 		} else if ((str[i] == '`') && (flags & FILTER_FLAG_STRIP_BACKTICK)) {
130 		} else {
131 			ZSTR_VAL(buf)[c] = str[i];
132 			++c;
133 		}
134 	}
135 	/* update zval string data */
136 	ZSTR_VAL(buf)[c] = '\0';
137 	ZSTR_LEN(buf) = c;
138 	zval_ptr_dtor(value);
139 	ZVAL_NEW_STR(value, buf);
140 }
141 /* }}} */
142 
143 /* {{{ FILTER MAP HELPERS */
filter_map_init(filter_map * map)144 static void filter_map_init(filter_map *map)
145 {
146 	memset(map, 0, sizeof(filter_map));
147 }
148 
filter_map_update(filter_map * map,int flag,const unsigned char * allowed_list)149 static void filter_map_update(filter_map *map, int flag, const unsigned char *allowed_list)
150 {
151 	size_t l, i;
152 
153 	l = strlen((const char*)allowed_list);
154 	for (i = 0; i < l; ++i) {
155 		(*map)[allowed_list[i]] = flag;
156 	}
157 }
158 
filter_map_apply(zval * value,filter_map * map)159 static void filter_map_apply(zval *value, filter_map *map)
160 {
161 	unsigned char *str;
162 	size_t i;
163 	int c;
164 	zend_string *buf;
165 
166 	str = (unsigned char *)Z_STRVAL_P(value);
167 	buf = zend_string_alloc(Z_STRLEN_P(value) + 1, 0);
168 	c = 0;
169 	for (i = 0; i < Z_STRLEN_P(value); i++) {
170 		if ((*map)[str[i]]) {
171 			ZSTR_VAL(buf)[c] = str[i];
172 			++c;
173 		}
174 	}
175 	/* update zval string data */
176 	ZSTR_VAL(buf)[c] = '\0';
177 	ZSTR_LEN(buf) = c;
178 	zval_ptr_dtor(value);
179 	ZVAL_NEW_STR(value, buf);
180 }
181 /* }}} */
182 
183 /* {{{ php_filter_string */
php_filter_string(PHP_INPUT_FILTER_PARAM_DECL)184 void php_filter_string(PHP_INPUT_FILTER_PARAM_DECL)
185 {
186 	size_t new_len;
187 	unsigned char enc[256] = {0};
188 
189 	if (!Z_REFCOUNTED_P(value)) {
190 		ZVAL_STRINGL(value, Z_STRVAL_P(value), Z_STRLEN_P(value));
191 	}
192 
193 	/* strip high/strip low ( see flags )*/
194 	php_filter_strip(value, flags);
195 
196 	if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
197 		enc['\''] = enc['"'] = 1;
198 	}
199 	if (flags & FILTER_FLAG_ENCODE_AMP) {
200 		enc['&'] = 1;
201 	}
202 	if (flags & FILTER_FLAG_ENCODE_LOW) {
203 		memset(enc, 1, 32);
204 	}
205 	if (flags & FILTER_FLAG_ENCODE_HIGH) {
206 		memset(enc + 127, 1, sizeof(enc) - 127);
207 	}
208 
209 	php_filter_encode_html(value, enc);
210 
211 	/* strip tags, implicitly also removes \0 chars */
212 	new_len = php_strip_tags_ex(Z_STRVAL_P(value), Z_STRLEN_P(value), NULL, NULL, 0, 1);
213 	Z_STRLEN_P(value) = new_len;
214 
215 	if (new_len == 0) {
216 		zval_ptr_dtor(value);
217 		if (flags & FILTER_FLAG_EMPTY_STRING_NULL) {
218 			ZVAL_NULL(value);
219 		} else {
220 			ZVAL_EMPTY_STRING(value);
221 		}
222 		return;
223 	}
224 }
225 /* }}} */
226 
227 /* {{{ php_filter_encoded */
php_filter_encoded(PHP_INPUT_FILTER_PARAM_DECL)228 void php_filter_encoded(PHP_INPUT_FILTER_PARAM_DECL)
229 {
230 	/* apply strip_high and strip_low filters */
231 	php_filter_strip(value, flags);
232 	/* urlencode */
233 	php_filter_encode_url(value, (unsigned char *)DEFAULT_URL_ENCODE, sizeof(DEFAULT_URL_ENCODE)-1, flags & FILTER_FLAG_ENCODE_HIGH, flags & FILTER_FLAG_ENCODE_LOW, 1);
234 }
235 /* }}} */
236 
237 /* {{{ php_filter_special_chars */
php_filter_special_chars(PHP_INPUT_FILTER_PARAM_DECL)238 void php_filter_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
239 {
240 	unsigned char enc[256] = {0};
241 
242 	php_filter_strip(value, flags);
243 
244 	/* encodes ' " < > & \0 to numerical entities */
245 	enc['\''] = enc['"'] = enc['<'] = enc['>'] = enc['&'] = enc[0] = 1;
246 
247 	/* if strip low is not set, then we encode them as &#xx; */
248 	memset(enc, 1, 32);
249 
250 	if (flags & FILTER_FLAG_ENCODE_HIGH) {
251 		memset(enc + 127, 1, sizeof(enc) - 127);
252 	}
253 
254 	php_filter_encode_html(value, enc);
255 }
256 /* }}} */
257 
258 /* {{{ php_filter_full_special_chars */
php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL)259 void php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
260 {
261 	zend_string *buf;
262 	int quotes;
263 
264 	if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
265 		quotes = ENT_QUOTES;
266 	} else {
267 		quotes = ENT_NOQUOTES;
268 	}
269 	buf = php_escape_html_entities_ex((unsigned char *) Z_STRVAL_P(value), Z_STRLEN_P(value), 1, quotes, SG(default_charset), 0);
270 	zval_ptr_dtor(value);
271 	ZVAL_STR(value, buf);
272 }
273 /* }}} */
274 
275 /* {{{ php_filter_unsafe_raw */
php_filter_unsafe_raw(PHP_INPUT_FILTER_PARAM_DECL)276 void php_filter_unsafe_raw(PHP_INPUT_FILTER_PARAM_DECL)
277 {
278 	/* Only if no flags are set (optimization) */
279 	if (flags != 0 && Z_STRLEN_P(value) > 0) {
280 		unsigned char enc[256] = {0};
281 
282 		php_filter_strip(value, flags);
283 
284 		if (flags & FILTER_FLAG_ENCODE_AMP) {
285 			enc['&'] = 1;
286 		}
287 		if (flags & FILTER_FLAG_ENCODE_LOW) {
288 			memset(enc, 1, 32);
289 		}
290 		if (flags & FILTER_FLAG_ENCODE_HIGH) {
291 			memset(enc + 127, 1, sizeof(enc) - 127);
292 		}
293 
294 		php_filter_encode_html(value, enc);
295 	} else if (flags & FILTER_FLAG_EMPTY_STRING_NULL && Z_STRLEN_P(value) == 0) {
296 		zval_ptr_dtor(value);
297 		ZVAL_NULL(value);
298 	}
299 }
300 /* }}} */
301 
302 /* {{{ php_filter_email */
303 #define SAFE        "$-_.+"
304 #define EXTRA       "!*'(),"
305 #define NATIONAL    "{}|\\^~[]`"
306 #define PUNCTUATION "<>#%\""
307 #define RESERVED    ";/?:@&="
308 
php_filter_email(PHP_INPUT_FILTER_PARAM_DECL)309 void php_filter_email(PHP_INPUT_FILTER_PARAM_DECL)
310 {
311 	/* Check section 6 of rfc 822 http://www.faqs.org/rfcs/rfc822.html */
312 	const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT "!#$%&'*+-=?^_`{|}~@.[]";
313 	filter_map     map;
314 
315 	filter_map_init(&map);
316 	filter_map_update(&map, 1, allowed_list);
317 	filter_map_apply(value, &map);
318 }
319 /* }}} */
320 
321 /* {{{ php_filter_url */
php_filter_url(PHP_INPUT_FILTER_PARAM_DECL)322 void php_filter_url(PHP_INPUT_FILTER_PARAM_DECL)
323 {
324 	/* Strip all chars not part of section 5 of
325 	 * http://www.faqs.org/rfcs/rfc1738.html */
326 	const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT SAFE EXTRA NATIONAL PUNCTUATION RESERVED;
327 	filter_map     map;
328 
329 	filter_map_init(&map);
330 	filter_map_update(&map, 1, allowed_list);
331 	filter_map_apply(value, &map);
332 }
333 /* }}} */
334 
335 /* {{{ php_filter_number_int */
php_filter_number_int(PHP_INPUT_FILTER_PARAM_DECL)336 void php_filter_number_int(PHP_INPUT_FILTER_PARAM_DECL)
337 {
338 	/* strip everything [^0-9+-] */
339 	const unsigned char allowed_list[] = "+-" DIGIT;
340 	filter_map     map;
341 
342 	filter_map_init(&map);
343 	filter_map_update(&map, 1, allowed_list);
344 	filter_map_apply(value, &map);
345 }
346 /* }}} */
347 
348 /* {{{ php_filter_number_float */
php_filter_number_float(PHP_INPUT_FILTER_PARAM_DECL)349 void php_filter_number_float(PHP_INPUT_FILTER_PARAM_DECL)
350 {
351 	/* strip everything [^0-9+-] */
352 	const unsigned char allowed_list[] = "+-" DIGIT;
353 	filter_map     map;
354 
355 	filter_map_init(&map);
356 	filter_map_update(&map, 1, allowed_list);
357 
358 	/* depending on flags, strip '.', 'e', ",", "'" */
359 	if (flags & FILTER_FLAG_ALLOW_FRACTION) {
360 		filter_map_update(&map, 2, (const unsigned char *) ".");
361 	}
362 	if (flags & FILTER_FLAG_ALLOW_THOUSAND) {
363 		filter_map_update(&map, 3,  (const unsigned char *) ",");
364 	}
365 	if (flags & FILTER_FLAG_ALLOW_SCIENTIFIC) {
366 		filter_map_update(&map, 4,  (const unsigned char *) "eE");
367 	}
368 	filter_map_apply(value, &map);
369 }
370 /* }}} */
371 
372 /* {{{ php_filter_magic_quotes */
php_filter_magic_quotes(PHP_INPUT_FILTER_PARAM_DECL)373 void php_filter_magic_quotes(PHP_INPUT_FILTER_PARAM_DECL)
374 {
375 	zend_string *buf;
376 
377 	/* just call php_addslashes quotes */
378 	buf = php_addslashes(Z_STR_P(value), 0);
379 
380 	zval_ptr_dtor(value);
381 	ZVAL_STR(value, buf);
382 }
383 /* }}} */
384 
385 /*
386  * Local variables:
387  * tab-width: 4
388  * c-basic-offset: 4
389  * End:
390  * vim600: noet sw=4 ts=4 fdm=marker
391  * vim<600: noet sw=4 ts=4
392  */
393