xref: /php-src/ext/filter/sanitizing_filters.c (revision 4df3dd76)
1 /*
2   +----------------------------------------------------------------------+
3   | Copyright (c) The PHP Group                                          |
4   +----------------------------------------------------------------------+
5   | This source file is subject to version 3.01 of the PHP license,      |
6   | that is bundled with this package in the file LICENSE, and is        |
7   | available through the world-wide-web at the following url:           |
8   | https://www.php.net/license/3_01.txt                                 |
9   | If you did not receive a copy of the PHP license and are unable to   |
10   | obtain it through the world-wide-web, please send a note to          |
11   | license@php.net so we can mail you a copy immediately.               |
12   +----------------------------------------------------------------------+
13   | Authors: Derick Rethans <derick@php.net>                             |
14   +----------------------------------------------------------------------+
15 */
16 
17 #include "php_filter.h"
18 #include "filter_private.h"
19 #include "zend_smart_str.h"
20 
21 /* {{{ STRUCTS */
22 typedef unsigned long filter_map[256];
23 /* }}} */
24 
25 /* {{{ HELPER FUNCTIONS */
php_filter_encode_html(zval * value,const unsigned char * chars)26 static void php_filter_encode_html(zval *value, const unsigned char *chars)
27 {
28 	smart_str str = {0};
29 	size_t len = Z_STRLEN_P(value);
30 	unsigned char *s = (unsigned char *)Z_STRVAL_P(value);
31 	unsigned char *e = s + len;
32 
33 	if (Z_STRLEN_P(value) == 0) {
34 		return;
35 	}
36 
37 	while (s < e) {
38 		if (chars[*s]) {
39 			smart_str_appendl(&str, "&#", 2);
40 			smart_str_append_unsigned(&str, (zend_ulong)*s);
41 			smart_str_appendc(&str, ';');
42 		} else {
43 			/* XXX: this needs to be optimized to work with blocks of 'safe' chars */
44 			smart_str_appendc(&str, *s);
45 		}
46 		s++;
47 	}
48 
49 	zval_ptr_dtor(value);
50 	ZVAL_STR(value, smart_str_extract(&str));
51 }
52 
53 static const unsigned char hexchars[] = "0123456789ABCDEF";
54 
55 #define LOWALPHA    "abcdefghijklmnopqrstuvwxyz"
56 #define HIALPHA     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
57 #define DIGIT       "0123456789"
58 
59 #define DEFAULT_URL_ENCODE    LOWALPHA HIALPHA DIGIT "-._"
60 
php_filter_encode_url(zval * value,const unsigned char * chars,const int char_len,int high,int low,int encode_nul)61 static void php_filter_encode_url(zval *value, const unsigned char* chars, const int char_len, int high, int low, int encode_nul)
62 {
63 	unsigned char *p;
64 	unsigned char tmp[256];
65 	unsigned char *s = (unsigned char *)chars;
66 	unsigned char *e = s + char_len;
67 	zend_string *str;
68 
69 	memset(tmp, 1, sizeof(tmp)-1);
70 
71 	while (s < e) {
72 		tmp[*s++] = '\0';
73 	}
74 
75 	str = zend_string_safe_alloc(Z_STRLEN_P(value), 3, 0, 0);
76 	p = (unsigned char *) ZSTR_VAL(str);
77 	s = (unsigned char *) Z_STRVAL_P(value);
78 	e = s + Z_STRLEN_P(value);
79 
80 	while (s < e) {
81 		if (tmp[*s]) {
82 			*p++ = '%';
83 			*p++ = hexchars[(unsigned char) *s >> 4];
84 			*p++ = hexchars[(unsigned char) *s & 15];
85 		} else {
86 			*p++ = *s;
87 		}
88 		s++;
89 	}
90 	*p = '\0';
91 	ZSTR_LEN(str) = p - (unsigned char *)ZSTR_VAL(str);
92 	zval_ptr_dtor(value);
93 	ZVAL_NEW_STR(value, str);
94 }
95 
php_filter_strip(zval * value,zend_long flags)96 static void php_filter_strip(zval *value, zend_long flags)
97 {
98 	unsigned char *str;
99 	size_t i;
100 	size_t c;
101 	zend_string *buf;
102 
103 	/* Optimization for if no strip flags are set */
104 	if (!(flags & (FILTER_FLAG_STRIP_LOW | FILTER_FLAG_STRIP_HIGH | FILTER_FLAG_STRIP_BACKTICK))) {
105 		return;
106 	}
107 
108 	str = (unsigned char *)Z_STRVAL_P(value);
109 	buf = zend_string_alloc(Z_STRLEN_P(value), 0);
110 	c = 0;
111 	for (i = 0; i < Z_STRLEN_P(value); i++) {
112 		if ((str[i] >= 127) && (flags & FILTER_FLAG_STRIP_HIGH)) {
113 		} else if ((str[i] < 32) && (flags & FILTER_FLAG_STRIP_LOW)) {
114 		} else if ((str[i] == '`') && (flags & FILTER_FLAG_STRIP_BACKTICK)) {
115 		} else {
116 			ZSTR_VAL(buf)[c] = str[i];
117 			++c;
118 		}
119 	}
120 	/* update zval string data */
121 	ZSTR_VAL(buf)[c] = '\0';
122 	ZSTR_LEN(buf) = c;
123 	zval_ptr_dtor(value);
124 	ZVAL_NEW_STR(value, buf);
125 }
126 /* }}} */
127 
128 /* {{{ FILTER MAP HELPERS */
filter_map_init(filter_map * map)129 static void filter_map_init(filter_map *map)
130 {
131 	memset(map, 0, sizeof(filter_map));
132 }
133 
filter_map_update(filter_map * map,int flag,const unsigned char * allowed_list)134 static void filter_map_update(filter_map *map, int flag, const unsigned char *allowed_list)
135 {
136 	size_t l, i;
137 
138 	l = strlen((const char*)allowed_list);
139 	for (i = 0; i < l; ++i) {
140 		(*map)[allowed_list[i]] = flag;
141 	}
142 }
143 
filter_map_apply(zval * value,filter_map * map)144 static void filter_map_apply(zval *value, filter_map *map)
145 {
146 	unsigned char *str;
147 	size_t i, c;
148 	zend_string *buf;
149 
150 	str = (unsigned char *)Z_STRVAL_P(value);
151 	buf = zend_string_alloc(Z_STRLEN_P(value), 0);
152 	c = 0;
153 	for (i = 0; i < Z_STRLEN_P(value); i++) {
154 		if ((*map)[str[i]]) {
155 			ZSTR_VAL(buf)[c] = str[i];
156 			++c;
157 		}
158 	}
159 	/* update zval string data */
160 	ZSTR_VAL(buf)[c] = '\0';
161 	ZSTR_LEN(buf) = c;
162 	zval_ptr_dtor(value);
163 	ZVAL_NEW_STR(value, buf);
164 }
165 /* }}} */
166 
167 /* {{{ php_filter_string */
php_filter_string(PHP_INPUT_FILTER_PARAM_DECL)168 void php_filter_string(PHP_INPUT_FILTER_PARAM_DECL)
169 {
170 	size_t new_len;
171 	unsigned char enc[256] = {0};
172 
173 	if (!Z_REFCOUNTED_P(value)) {
174 		ZVAL_STRINGL(value, Z_STRVAL_P(value), Z_STRLEN_P(value));
175 	}
176 
177 	/* strip high/strip low ( see flags )*/
178 	php_filter_strip(value, flags);
179 
180 	if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
181 		enc['\''] = enc['"'] = 1;
182 	}
183 	if (flags & FILTER_FLAG_ENCODE_AMP) {
184 		enc['&'] = 1;
185 	}
186 	if (flags & FILTER_FLAG_ENCODE_LOW) {
187 		memset(enc, 1, 32);
188 	}
189 	if (flags & FILTER_FLAG_ENCODE_HIGH) {
190 		memset(enc + 127, 1, sizeof(enc) - 127);
191 	}
192 
193 	php_filter_encode_html(value, enc);
194 
195 	/* strip tags, implicitly also removes \0 chars */
196 	new_len = php_strip_tags_ex(Z_STRVAL_P(value), Z_STRLEN_P(value), NULL, 0, 1);
197 	Z_STRLEN_P(value) = new_len;
198 
199 	if (new_len == 0) {
200 		zval_ptr_dtor(value);
201 		if (flags & FILTER_FLAG_EMPTY_STRING_NULL) {
202 			ZVAL_NULL(value);
203 		} else {
204 			ZVAL_EMPTY_STRING(value);
205 		}
206 		return;
207 	}
208 }
209 /* }}} */
210 
211 /* {{{ php_filter_encoded */
php_filter_encoded(PHP_INPUT_FILTER_PARAM_DECL)212 void php_filter_encoded(PHP_INPUT_FILTER_PARAM_DECL)
213 {
214 	/* apply strip_high and strip_low filters */
215 	php_filter_strip(value, flags);
216 	/* urlencode */
217 	php_filter_encode_url(value, (unsigned char *)DEFAULT_URL_ENCODE, sizeof(DEFAULT_URL_ENCODE)-1, flags & FILTER_FLAG_ENCODE_HIGH, flags & FILTER_FLAG_ENCODE_LOW, 1);
218 }
219 /* }}} */
220 
221 /* {{{ php_filter_special_chars */
php_filter_special_chars(PHP_INPUT_FILTER_PARAM_DECL)222 void php_filter_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
223 {
224 	unsigned char enc[256] = {0};
225 
226 	php_filter_strip(value, flags);
227 
228 	/* encodes ' " < > & \0 to numerical entities */
229 	enc['\''] = enc['"'] = enc['<'] = enc['>'] = enc['&'] = enc[0] = 1;
230 
231 	/* if strip low is not set, then we encode them as &#xx; */
232 	memset(enc, 1, 32);
233 
234 	if (flags & FILTER_FLAG_ENCODE_HIGH) {
235 		memset(enc + 127, 1, sizeof(enc) - 127);
236 	}
237 
238 	php_filter_encode_html(value, enc);
239 }
240 /* }}} */
241 
242 /* {{{ php_filter_full_special_chars */
php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL)243 void php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
244 {
245 	zend_string *buf;
246 	int quotes;
247 
248 	if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
249 		quotes = ENT_QUOTES;
250 	} else {
251 		quotes = ENT_NOQUOTES;
252 	}
253 	buf = php_escape_html_entities_ex(
254 		(unsigned char *) Z_STRVAL_P(value), Z_STRLEN_P(value), /* all */ 1, quotes,
255 		/* charset_hint */ NULL, /* double_encode */ 0, /* quiet */ 0);
256 	zval_ptr_dtor(value);
257 	ZVAL_STR(value, buf);
258 }
259 /* }}} */
260 
261 /* {{{ php_filter_unsafe_raw */
php_filter_unsafe_raw(PHP_INPUT_FILTER_PARAM_DECL)262 void php_filter_unsafe_raw(PHP_INPUT_FILTER_PARAM_DECL)
263 {
264 	/* Only if no flags are set (optimization) */
265 	if (flags != 0 && Z_STRLEN_P(value) > 0) {
266 		unsigned char enc[256] = {0};
267 
268 		php_filter_strip(value, flags);
269 
270 		if (flags & FILTER_FLAG_ENCODE_AMP) {
271 			enc['&'] = 1;
272 		}
273 		if (flags & FILTER_FLAG_ENCODE_LOW) {
274 			memset(enc, 1, 32);
275 		}
276 		if (flags & FILTER_FLAG_ENCODE_HIGH) {
277 			memset(enc + 127, 1, sizeof(enc) - 127);
278 		}
279 
280 		php_filter_encode_html(value, enc);
281 	} else if (flags & FILTER_FLAG_EMPTY_STRING_NULL && Z_STRLEN_P(value) == 0) {
282 		zval_ptr_dtor(value);
283 		ZVAL_NULL(value);
284 	}
285 }
286 /* }}} */
287 
288 /* {{{ php_filter_email */
289 #define SAFE        "$-_.+"
290 #define EXTRA       "!*'(),"
291 #define NATIONAL    "{}|\\^~[]`"
292 #define PUNCTUATION "<>#%\""
293 #define RESERVED    ";/?:@&="
294 
php_filter_email(PHP_INPUT_FILTER_PARAM_DECL)295 void php_filter_email(PHP_INPUT_FILTER_PARAM_DECL)
296 {
297 	/* Check section 6 of rfc 822 http://www.faqs.org/rfcs/rfc822.html */
298 	const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT "!#$%&'*+-=?^_`{|}~@.[]";
299 	filter_map     map;
300 
301 	filter_map_init(&map);
302 	filter_map_update(&map, 1, allowed_list);
303 	filter_map_apply(value, &map);
304 }
305 /* }}} */
306 
307 /* {{{ php_filter_url */
php_filter_url(PHP_INPUT_FILTER_PARAM_DECL)308 void php_filter_url(PHP_INPUT_FILTER_PARAM_DECL)
309 {
310 	/* Strip all chars not part of section 5 of
311 	 * http://www.faqs.org/rfcs/rfc1738.html */
312 	const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT SAFE EXTRA NATIONAL PUNCTUATION RESERVED;
313 	filter_map     map;
314 
315 	filter_map_init(&map);
316 	filter_map_update(&map, 1, allowed_list);
317 	filter_map_apply(value, &map);
318 }
319 /* }}} */
320 
321 /* {{{ php_filter_number_int */
php_filter_number_int(PHP_INPUT_FILTER_PARAM_DECL)322 void php_filter_number_int(PHP_INPUT_FILTER_PARAM_DECL)
323 {
324 	/* strip everything [^0-9+-] */
325 	const unsigned char allowed_list[] = "+-" DIGIT;
326 	filter_map     map;
327 
328 	filter_map_init(&map);
329 	filter_map_update(&map, 1, allowed_list);
330 	filter_map_apply(value, &map);
331 }
332 /* }}} */
333 
334 /* {{{ php_filter_number_float */
php_filter_number_float(PHP_INPUT_FILTER_PARAM_DECL)335 void php_filter_number_float(PHP_INPUT_FILTER_PARAM_DECL)
336 {
337 	/* strip everything [^0-9+-] */
338 	const unsigned char allowed_list[] = "+-" DIGIT;
339 	filter_map     map;
340 
341 	filter_map_init(&map);
342 	filter_map_update(&map, 1, allowed_list);
343 
344 	/* depending on flags, strip '.', 'e', ",", "'" */
345 	if (flags & FILTER_FLAG_ALLOW_FRACTION) {
346 		filter_map_update(&map, 2, (const unsigned char *) ".");
347 	}
348 	if (flags & FILTER_FLAG_ALLOW_THOUSAND) {
349 		filter_map_update(&map, 3,  (const unsigned char *) ",");
350 	}
351 	if (flags & FILTER_FLAG_ALLOW_SCIENTIFIC) {
352 		filter_map_update(&map, 4,  (const unsigned char *) "eE");
353 	}
354 	filter_map_apply(value, &map);
355 }
356 /* }}} */
357 
358 /* {{{ php_filter_add_slashes */
php_filter_add_slashes(PHP_INPUT_FILTER_PARAM_DECL)359 void php_filter_add_slashes(PHP_INPUT_FILTER_PARAM_DECL)
360 {
361 	zend_string *buf = php_addslashes(Z_STR_P(value));
362 
363 	zval_ptr_dtor(value);
364 	ZVAL_STR(value, buf);
365 }
366 /* }}} */
367