xref: /PHP-7.0/ext/filter/sanitizing_filters.c (revision 478f119a)
1 /*
2   +----------------------------------------------------------------------+
3   | PHP Version 7                                                        |
4   +----------------------------------------------------------------------+
5   | Copyright (c) 1997-2017 The PHP Group                                |
6   +----------------------------------------------------------------------+
7   | This source file is subject to version 3.01 of the PHP license,      |
8   | that is bundled with this package in the file LICENSE, and is        |
9   | available through the world-wide-web at the following url:           |
10   | http://www.php.net/license/3_01.txt                                  |
11   | If you did not receive a copy of the PHP license and are unable to   |
12   | obtain it through the world-wide-web, please send a note to          |
13   | license@php.net so we can mail you a copy immediately.               |
14   +----------------------------------------------------------------------+
15   | Authors: Derick Rethans <derick@php.net>                             |
16   +----------------------------------------------------------------------+
17 */
18 
19 /* $Id$ */
20 
21 #include "php_filter.h"
22 #include "filter_private.h"
23 #include "zend_smart_str.h"
24 
25 /* {{{ STRUCTS */
26 typedef unsigned long filter_map[256];
27 /* }}} */
28 
29 /* {{{ HELPER FUNCTIONS */
php_filter_encode_html(zval * value,const unsigned char * chars)30 static void php_filter_encode_html(zval *value, const unsigned char *chars)
31 {
32 	smart_str str = {0};
33 	size_t len = Z_STRLEN_P(value);
34 	unsigned char *s = (unsigned char *)Z_STRVAL_P(value);
35 	unsigned char *e = s + len;
36 
37 	if (Z_STRLEN_P(value) == 0) {
38 		return;
39 	}
40 
41 	while (s < e) {
42 		if (chars[*s]) {
43 			smart_str_appendl(&str, "&#", 2);
44 			smart_str_append_unsigned(&str, (zend_ulong)*s);
45 			smart_str_appendc(&str, ';');
46 		} else {
47 			/* XXX: this needs to be optimized to work with blocks of 'safe' chars */
48 			smart_str_appendc(&str, *s);
49 		}
50 		s++;
51 	}
52 
53 	smart_str_0(&str);
54 	zval_ptr_dtor(value);
55 	ZVAL_NEW_STR(value, str.s);
56 }
57 
58 static const unsigned char hexchars[] = "0123456789ABCDEF";
59 
60 #define LOWALPHA    "abcdefghijklmnopqrstuvwxyz"
61 #define HIALPHA     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
62 #define DIGIT       "0123456789"
63 
64 #define DEFAULT_URL_ENCODE    LOWALPHA HIALPHA DIGIT "-._"
65 
php_filter_encode_url(zval * value,const unsigned char * chars,const int char_len,int high,int low,int encode_nul)66 static void php_filter_encode_url(zval *value, const unsigned char* chars, const int char_len, int high, int low, int encode_nul)
67 {
68 	unsigned char *p;
69 	unsigned char tmp[256];
70 	unsigned char *s = (unsigned char *)chars;
71 	unsigned char *e = s + char_len;
72 	zend_string *str;
73 
74 	memset(tmp, 1, sizeof(tmp)-1);
75 
76 	while (s < e) {
77 		tmp[*s++] = '\0';
78 	}
79 /* XXX: This is not needed since these chars in the allowed list never include the high/low/null value
80 	if (encode_nul) {
81 		tmp[0] = 1;
82 	}
83 	if (high) {
84 		memset(tmp + 127, 1, sizeof(tmp) - 127);
85 	}
86 	if (low) {
87 		memset(tmp, 1, 32);
88 	}
89 */
90 	str = zend_string_safe_alloc(Z_STRLEN_P(value), 3, 0, 0);
91 	p = (unsigned char *) ZSTR_VAL(str);
92 	s = (unsigned char *) Z_STRVAL_P(value);
93 	e = s + Z_STRLEN_P(value);
94 
95 	while (s < e) {
96 		if (tmp[*s]) {
97 			*p++ = '%';
98 			*p++ = hexchars[(unsigned char) *s >> 4];
99 			*p++ = hexchars[(unsigned char) *s & 15];
100 		} else {
101 			*p++ = *s;
102 		}
103 		s++;
104 	}
105 	*p = '\0';
106 	ZSTR_LEN(str) = p - (unsigned char *)ZSTR_VAL(str);
107 	zval_ptr_dtor(value);
108 	ZVAL_NEW_STR(value, str);
109 }
110 
php_filter_strip(zval * value,zend_long flags)111 static void php_filter_strip(zval *value, zend_long flags)
112 {
113 	unsigned char *str;
114 	int   i, c;
115 	zend_string *buf;
116 
117 	/* Optimization for if no strip flags are set */
118 	if (!(flags & (FILTER_FLAG_STRIP_LOW | FILTER_FLAG_STRIP_HIGH | FILTER_FLAG_STRIP_BACKTICK))) {
119 		return;
120 	}
121 
122 	str = (unsigned char *)Z_STRVAL_P(value);
123 	buf = zend_string_alloc(Z_STRLEN_P(value) + 1, 0);
124 	c = 0;
125 	for (i = 0; i < Z_STRLEN_P(value); i++) {
126 		if ((str[i] >= 127) && (flags & FILTER_FLAG_STRIP_HIGH)) {
127 		} else if ((str[i] < 32) && (flags & FILTER_FLAG_STRIP_LOW)) {
128 		} else if ((str[i] == '`') && (flags & FILTER_FLAG_STRIP_BACKTICK)) {
129 		} else {
130 			ZSTR_VAL(buf)[c] = str[i];
131 			++c;
132 		}
133 	}
134 	/* update zval string data */
135 	ZSTR_VAL(buf)[c] = '\0';
136 	ZSTR_LEN(buf) = c;
137 	zval_ptr_dtor(value);
138 	ZVAL_NEW_STR(value, buf);
139 }
140 /* }}} */
141 
142 /* {{{ FILTER MAP HELPERS */
filter_map_init(filter_map * map)143 static void filter_map_init(filter_map *map)
144 {
145 	memset(map, 0, sizeof(filter_map));
146 }
147 
filter_map_update(filter_map * map,int flag,const unsigned char * allowed_list)148 static void filter_map_update(filter_map *map, int flag, const unsigned char *allowed_list)
149 {
150 	size_t l, i;
151 
152 	l = strlen((const char*)allowed_list);
153 	for (i = 0; i < l; ++i) {
154 		(*map)[allowed_list[i]] = flag;
155 	}
156 }
157 
filter_map_apply(zval * value,filter_map * map)158 static void filter_map_apply(zval *value, filter_map *map)
159 {
160 	unsigned char *str;
161 	int   i, c;
162 	zend_string *buf;
163 
164 	str = (unsigned char *)Z_STRVAL_P(value);
165 	buf = zend_string_alloc(Z_STRLEN_P(value) + 1, 0);
166 	c = 0;
167 	for (i = 0; i < Z_STRLEN_P(value); i++) {
168 		if ((*map)[str[i]]) {
169 			ZSTR_VAL(buf)[c] = str[i];
170 			++c;
171 		}
172 	}
173 	/* update zval string data */
174 	ZSTR_VAL(buf)[c] = '\0';
175 	ZSTR_LEN(buf) = c;
176 	zval_ptr_dtor(value);
177 	ZVAL_NEW_STR(value, buf);
178 }
179 /* }}} */
180 
181 /* {{{ php_filter_string */
php_filter_string(PHP_INPUT_FILTER_PARAM_DECL)182 void php_filter_string(PHP_INPUT_FILTER_PARAM_DECL)
183 {
184 	size_t new_len;
185 	unsigned char enc[256] = {0};
186 
187 	if (!Z_REFCOUNTED_P(value)) {
188 		ZVAL_STRINGL(value, Z_STRVAL_P(value), Z_STRLEN_P(value));
189 	}
190 
191 	/* strip high/strip low ( see flags )*/
192 	php_filter_strip(value, flags);
193 
194 	if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
195 		enc['\''] = enc['"'] = 1;
196 	}
197 	if (flags & FILTER_FLAG_ENCODE_AMP) {
198 		enc['&'] = 1;
199 	}
200 	if (flags & FILTER_FLAG_ENCODE_LOW) {
201 		memset(enc, 1, 32);
202 	}
203 	if (flags & FILTER_FLAG_ENCODE_HIGH) {
204 		memset(enc + 127, 1, sizeof(enc) - 127);
205 	}
206 
207 	php_filter_encode_html(value, enc);
208 
209 	/* strip tags, implicitly also removes \0 chars */
210 	new_len = php_strip_tags_ex(Z_STRVAL_P(value), Z_STRLEN_P(value), NULL, NULL, 0, 1);
211 	Z_STRLEN_P(value) = new_len;
212 
213 	if (new_len == 0) {
214 		zval_dtor(value);
215 		if (flags & FILTER_FLAG_EMPTY_STRING_NULL) {
216 			ZVAL_NULL(value);
217 		} else {
218 			ZVAL_EMPTY_STRING(value);
219 		}
220 		return;
221 	}
222 }
223 /* }}} */
224 
225 /* {{{ php_filter_encoded */
php_filter_encoded(PHP_INPUT_FILTER_PARAM_DECL)226 void php_filter_encoded(PHP_INPUT_FILTER_PARAM_DECL)
227 {
228 	/* apply strip_high and strip_low filters */
229 	php_filter_strip(value, flags);
230 	/* urlencode */
231 	php_filter_encode_url(value, (unsigned char *)DEFAULT_URL_ENCODE, sizeof(DEFAULT_URL_ENCODE)-1, flags & FILTER_FLAG_ENCODE_HIGH, flags & FILTER_FLAG_ENCODE_LOW, 1);
232 }
233 /* }}} */
234 
235 /* {{{ php_filter_special_chars */
php_filter_special_chars(PHP_INPUT_FILTER_PARAM_DECL)236 void php_filter_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
237 {
238 	unsigned char enc[256] = {0};
239 
240 	php_filter_strip(value, flags);
241 
242 	/* encodes ' " < > & \0 to numerical entities */
243 	enc['\''] = enc['"'] = enc['<'] = enc['>'] = enc['&'] = enc[0] = 1;
244 
245 	/* if strip low is not set, then we encode them as &#xx; */
246 	memset(enc, 1, 32);
247 
248 	if (flags & FILTER_FLAG_ENCODE_HIGH) {
249 		memset(enc + 127, 1, sizeof(enc) - 127);
250 	}
251 
252 	php_filter_encode_html(value, enc);
253 }
254 /* }}} */
255 
256 /* {{{ php_filter_full_special_chars */
php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL)257 void php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
258 {
259 	zend_string *buf;
260 	int quotes;
261 
262 	if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
263 		quotes = ENT_QUOTES;
264 	} else {
265 		quotes = ENT_NOQUOTES;
266 	}
267 	buf = php_escape_html_entities_ex((unsigned char *) Z_STRVAL_P(value), Z_STRLEN_P(value), 1, quotes, SG(default_charset), 0);
268 	zval_ptr_dtor(value);
269 	ZVAL_STR(value, buf);
270 }
271 /* }}} */
272 
273 /* {{{ php_filter_unsafe_raw */
php_filter_unsafe_raw(PHP_INPUT_FILTER_PARAM_DECL)274 void php_filter_unsafe_raw(PHP_INPUT_FILTER_PARAM_DECL)
275 {
276 	/* Only if no flags are set (optimization) */
277 	if (flags != 0 && Z_STRLEN_P(value) > 0) {
278 		unsigned char enc[256] = {0};
279 
280 		php_filter_strip(value, flags);
281 
282 		if (flags & FILTER_FLAG_ENCODE_AMP) {
283 			enc['&'] = 1;
284 		}
285 		if (flags & FILTER_FLAG_ENCODE_LOW) {
286 			memset(enc, 1, 32);
287 		}
288 		if (flags & FILTER_FLAG_ENCODE_HIGH) {
289 			memset(enc + 127, 1, sizeof(enc) - 127);
290 		}
291 
292 		php_filter_encode_html(value, enc);
293 	} else if (flags & FILTER_FLAG_EMPTY_STRING_NULL && Z_STRLEN_P(value) == 0) {
294 		zval_dtor(value);
295 		ZVAL_NULL(value);
296 	}
297 }
298 /* }}} */
299 
300 /* {{{ php_filter_email */
301 #define SAFE        "$-_.+"
302 #define EXTRA       "!*'(),"
303 #define NATIONAL    "{}|\\^~[]`"
304 #define PUNCTUATION "<>#%\""
305 #define RESERVED    ";/?:@&="
306 
php_filter_email(PHP_INPUT_FILTER_PARAM_DECL)307 void php_filter_email(PHP_INPUT_FILTER_PARAM_DECL)
308 {
309 	/* Check section 6 of rfc 822 http://www.faqs.org/rfcs/rfc822.html */
310 	const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT "!#$%&'*+-=?^_`{|}~@.[]";
311 	filter_map     map;
312 
313 	filter_map_init(&map);
314 	filter_map_update(&map, 1, allowed_list);
315 	filter_map_apply(value, &map);
316 }
317 /* }}} */
318 
319 /* {{{ php_filter_url */
php_filter_url(PHP_INPUT_FILTER_PARAM_DECL)320 void php_filter_url(PHP_INPUT_FILTER_PARAM_DECL)
321 {
322 	/* Strip all chars not part of section 5 of
323 	 * http://www.faqs.org/rfcs/rfc1738.html */
324 	const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT SAFE EXTRA NATIONAL PUNCTUATION RESERVED;
325 	filter_map     map;
326 
327 	filter_map_init(&map);
328 	filter_map_update(&map, 1, allowed_list);
329 	filter_map_apply(value, &map);
330 }
331 /* }}} */
332 
333 /* {{{ php_filter_number_int */
php_filter_number_int(PHP_INPUT_FILTER_PARAM_DECL)334 void php_filter_number_int(PHP_INPUT_FILTER_PARAM_DECL)
335 {
336 	/* strip everything [^0-9+-] */
337 	const unsigned char allowed_list[] = "+-" DIGIT;
338 	filter_map     map;
339 
340 	filter_map_init(&map);
341 	filter_map_update(&map, 1, allowed_list);
342 	filter_map_apply(value, &map);
343 }
344 /* }}} */
345 
346 /* {{{ php_filter_number_float */
php_filter_number_float(PHP_INPUT_FILTER_PARAM_DECL)347 void php_filter_number_float(PHP_INPUT_FILTER_PARAM_DECL)
348 {
349 	/* strip everything [^0-9+-] */
350 	const unsigned char allowed_list[] = "+-" DIGIT;
351 	filter_map     map;
352 
353 	filter_map_init(&map);
354 	filter_map_update(&map, 1, allowed_list);
355 
356 	/* depending on flags, strip '.', 'e', ",", "'" */
357 	if (flags & FILTER_FLAG_ALLOW_FRACTION) {
358 		filter_map_update(&map, 2, (const unsigned char *) ".");
359 	}
360 	if (flags & FILTER_FLAG_ALLOW_THOUSAND) {
361 		filter_map_update(&map, 3,  (const unsigned char *) ",");
362 	}
363 	if (flags & FILTER_FLAG_ALLOW_SCIENTIFIC) {
364 		filter_map_update(&map, 4,  (const unsigned char *) "eE");
365 	}
366 	filter_map_apply(value, &map);
367 }
368 /* }}} */
369 
370 /* {{{ php_filter_magic_quotes */
php_filter_magic_quotes(PHP_INPUT_FILTER_PARAM_DECL)371 void php_filter_magic_quotes(PHP_INPUT_FILTER_PARAM_DECL)
372 {
373 	zend_string *buf;
374 
375 	/* just call php_addslashes quotes */
376 	buf = php_addslashes(Z_STR_P(value), 0);
377 
378 	zval_ptr_dtor(value);
379 	ZVAL_STR(value, buf);
380 }
381 /* }}} */
382 
383 /*
384  * Local variables:
385  * tab-width: 4
386  * c-basic-offset: 4
387  * End:
388  * vim600: noet sw=4 ts=4 fdm=marker
389  * vim<600: noet sw=4 ts=4
390  */
391