xref: /php-src/ext/filter/sanitizing_filters.c (revision 25a51461)
1 /*
2   +----------------------------------------------------------------------+
3   | Copyright (c) The PHP Group                                          |
4   +----------------------------------------------------------------------+
5   | This source file is subject to version 3.01 of the PHP license,      |
6   | that is bundled with this package in the file LICENSE, and is        |
7   | available through the world-wide-web at the following url:           |
8   | https://www.php.net/license/3_01.txt                                 |
9   | If you did not receive a copy of the PHP license and are unable to   |
10   | obtain it through the world-wide-web, please send a note to          |
11   | license@php.net so we can mail you a copy immediately.               |
12   +----------------------------------------------------------------------+
13   | Authors: Derick Rethans <derick@php.net>                             |
14   +----------------------------------------------------------------------+
15 */
16 
17 #include "php_filter.h"
18 #include "filter_private.h"
19 #include "ext/standard/php_string.h"
20 #include "ext/standard/html.h"
21 #include "zend_smart_str.h"
22 
23 /* {{{ STRUCTS */
24 typedef unsigned long filter_map[256];
25 /* }}} */
26 
27 /* {{{ HELPER FUNCTIONS */
php_filter_encode_html(zval * value,const unsigned char * chars)28 static void php_filter_encode_html(zval *value, const unsigned char *chars)
29 {
30 	smart_str str = {0};
31 	size_t len = Z_STRLEN_P(value);
32 	unsigned char *s = (unsigned char *)Z_STRVAL_P(value);
33 	unsigned char *e = s + len;
34 
35 	if (Z_STRLEN_P(value) == 0) {
36 		return;
37 	}
38 
39 	while (s < e) {
40 		if (chars[*s]) {
41 			smart_str_appendl(&str, "&#", 2);
42 			smart_str_append_unsigned(&str, (zend_ulong)*s);
43 			smart_str_appendc(&str, ';');
44 		} else {
45 			/* XXX: this needs to be optimized to work with blocks of 'safe' chars */
46 			smart_str_appendc(&str, *s);
47 		}
48 		s++;
49 	}
50 
51 	zval_ptr_dtor(value);
52 	ZVAL_STR(value, smart_str_extract(&str));
53 }
54 
55 static const unsigned char hexchars[] = "0123456789ABCDEF";
56 
57 #define LOWALPHA    "abcdefghijklmnopqrstuvwxyz"
58 #define HIALPHA     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
59 #define DIGIT       "0123456789"
60 
61 #define DEFAULT_URL_ENCODE    LOWALPHA HIALPHA DIGIT "-._"
62 
php_filter_encode_url(zval * value,const unsigned char * chars,const int char_len,int high,int low,int encode_nul)63 static void php_filter_encode_url(zval *value, const unsigned char* chars, const int char_len, int high, int low, int encode_nul)
64 {
65 	unsigned char *p;
66 	unsigned char tmp[256];
67 	unsigned char *s = (unsigned char *)chars;
68 	unsigned char *e = s + char_len;
69 	zend_string *str;
70 
71 	memset(tmp, 1, sizeof(tmp)-1);
72 
73 	while (s < e) {
74 		tmp[*s++] = '\0';
75 	}
76 
77 	str = zend_string_safe_alloc(Z_STRLEN_P(value), 3, 0, 0);
78 	p = (unsigned char *) ZSTR_VAL(str);
79 	s = (unsigned char *) Z_STRVAL_P(value);
80 	e = s + Z_STRLEN_P(value);
81 
82 	while (s < e) {
83 		if (tmp[*s]) {
84 			*p++ = '%';
85 			*p++ = hexchars[(unsigned char) *s >> 4];
86 			*p++ = hexchars[(unsigned char) *s & 15];
87 		} else {
88 			*p++ = *s;
89 		}
90 		s++;
91 	}
92 	*p = '\0';
93 	ZSTR_LEN(str) = p - (unsigned char *)ZSTR_VAL(str);
94 	zval_ptr_dtor(value);
95 	ZVAL_NEW_STR(value, str);
96 }
97 
php_filter_strip(zval * value,zend_long flags)98 static void php_filter_strip(zval *value, zend_long flags)
99 {
100 	unsigned char *str;
101 	size_t i;
102 	size_t c;
103 	zend_string *buf;
104 
105 	/* Optimization for if no strip flags are set */
106 	if (!(flags & (FILTER_FLAG_STRIP_LOW | FILTER_FLAG_STRIP_HIGH | FILTER_FLAG_STRIP_BACKTICK))) {
107 		return;
108 	}
109 
110 	str = (unsigned char *)Z_STRVAL_P(value);
111 	buf = zend_string_alloc(Z_STRLEN_P(value), 0);
112 	c = 0;
113 	for (i = 0; i < Z_STRLEN_P(value); i++) {
114 		if ((str[i] >= 127) && (flags & FILTER_FLAG_STRIP_HIGH)) {
115 		} else if ((str[i] < 32) && (flags & FILTER_FLAG_STRIP_LOW)) {
116 		} else if ((str[i] == '`') && (flags & FILTER_FLAG_STRIP_BACKTICK)) {
117 		} else {
118 			ZSTR_VAL(buf)[c] = str[i];
119 			++c;
120 		}
121 	}
122 	/* update zval string data */
123 	ZSTR_VAL(buf)[c] = '\0';
124 	ZSTR_LEN(buf) = c;
125 	zval_ptr_dtor(value);
126 	ZVAL_NEW_STR(value, buf);
127 }
128 /* }}} */
129 
130 /* {{{ FILTER MAP HELPERS */
filter_map_init(filter_map * map)131 static void filter_map_init(filter_map *map)
132 {
133 	memset(map, 0, sizeof(filter_map));
134 }
135 
filter_map_update(filter_map * map,int flag,const unsigned char * allowed_list)136 static void filter_map_update(filter_map *map, int flag, const unsigned char *allowed_list)
137 {
138 	size_t l, i;
139 
140 	l = strlen((const char*)allowed_list);
141 	for (i = 0; i < l; ++i) {
142 		(*map)[allowed_list[i]] = flag;
143 	}
144 }
145 
filter_map_apply(zval * value,filter_map * map)146 static void filter_map_apply(zval *value, filter_map *map)
147 {
148 	unsigned char *str;
149 	size_t i, c;
150 	zend_string *buf;
151 
152 	str = (unsigned char *)Z_STRVAL_P(value);
153 	buf = zend_string_alloc(Z_STRLEN_P(value), 0);
154 	c = 0;
155 	for (i = 0; i < Z_STRLEN_P(value); i++) {
156 		if ((*map)[str[i]]) {
157 			ZSTR_VAL(buf)[c] = str[i];
158 			++c;
159 		}
160 	}
161 	/* update zval string data */
162 	ZSTR_VAL(buf)[c] = '\0';
163 	ZSTR_LEN(buf) = c;
164 	zval_ptr_dtor(value);
165 	ZVAL_NEW_STR(value, buf);
166 }
167 /* }}} */
168 
169 /* {{{ php_filter_string */
php_filter_string(PHP_INPUT_FILTER_PARAM_DECL)170 void php_filter_string(PHP_INPUT_FILTER_PARAM_DECL)
171 {
172 	size_t new_len;
173 	unsigned char enc[256] = {0};
174 
175 	if (!Z_REFCOUNTED_P(value)) {
176 		ZVAL_STRINGL(value, Z_STRVAL_P(value), Z_STRLEN_P(value));
177 	}
178 
179 	/* strip high/strip low ( see flags )*/
180 	php_filter_strip(value, flags);
181 
182 	if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
183 		enc['\''] = enc['"'] = 1;
184 	}
185 	if (flags & FILTER_FLAG_ENCODE_AMP) {
186 		enc['&'] = 1;
187 	}
188 	if (flags & FILTER_FLAG_ENCODE_LOW) {
189 		memset(enc, 1, 32);
190 	}
191 	if (flags & FILTER_FLAG_ENCODE_HIGH) {
192 		memset(enc + 127, 1, sizeof(enc) - 127);
193 	}
194 
195 	php_filter_encode_html(value, enc);
196 
197 	/* strip tags, implicitly also removes \0 chars */
198 	new_len = php_strip_tags_ex(Z_STRVAL_P(value), Z_STRLEN_P(value), NULL, 0, 1);
199 	Z_STRLEN_P(value) = new_len;
200 
201 	if (new_len == 0) {
202 		zval_ptr_dtor(value);
203 		if (flags & FILTER_FLAG_EMPTY_STRING_NULL) {
204 			ZVAL_NULL(value);
205 		} else {
206 			ZVAL_EMPTY_STRING(value);
207 		}
208 		return;
209 	}
210 }
211 /* }}} */
212 
213 /* {{{ php_filter_encoded */
php_filter_encoded(PHP_INPUT_FILTER_PARAM_DECL)214 void php_filter_encoded(PHP_INPUT_FILTER_PARAM_DECL)
215 {
216 	/* apply strip_high and strip_low filters */
217 	php_filter_strip(value, flags);
218 	/* urlencode */
219 	php_filter_encode_url(value, (unsigned char *)DEFAULT_URL_ENCODE, sizeof(DEFAULT_URL_ENCODE)-1, flags & FILTER_FLAG_ENCODE_HIGH, flags & FILTER_FLAG_ENCODE_LOW, 1);
220 }
221 /* }}} */
222 
223 /* {{{ php_filter_special_chars */
php_filter_special_chars(PHP_INPUT_FILTER_PARAM_DECL)224 void php_filter_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
225 {
226 	unsigned char enc[256] = {0};
227 
228 	php_filter_strip(value, flags);
229 
230 	/* encodes ' " < > & \0 to numerical entities */
231 	enc['\''] = enc['"'] = enc['<'] = enc['>'] = enc['&'] = enc[0] = 1;
232 
233 	/* if strip low is not set, then we encode them as &#xx; */
234 	memset(enc, 1, 32);
235 
236 	if (flags & FILTER_FLAG_ENCODE_HIGH) {
237 		memset(enc + 127, 1, sizeof(enc) - 127);
238 	}
239 
240 	php_filter_encode_html(value, enc);
241 }
242 /* }}} */
243 
244 /* {{{ php_filter_full_special_chars */
php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL)245 void php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
246 {
247 	zend_string *buf;
248 	int quotes;
249 
250 	if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
251 		quotes = ENT_QUOTES;
252 	} else {
253 		quotes = ENT_NOQUOTES;
254 	}
255 	buf = php_escape_html_entities_ex(
256 		(unsigned char *) Z_STRVAL_P(value), Z_STRLEN_P(value), /* all */ 1, quotes,
257 		/* charset_hint */ NULL, /* double_encode */ 0, /* quiet */ 0);
258 	zval_ptr_dtor(value);
259 	ZVAL_STR(value, buf);
260 }
261 /* }}} */
262 
263 /* {{{ php_filter_unsafe_raw */
php_filter_unsafe_raw(PHP_INPUT_FILTER_PARAM_DECL)264 void php_filter_unsafe_raw(PHP_INPUT_FILTER_PARAM_DECL)
265 {
266 	/* Only if no flags are set (optimization) */
267 	if (flags != 0 && Z_STRLEN_P(value) > 0) {
268 		unsigned char enc[256] = {0};
269 
270 		php_filter_strip(value, flags);
271 
272 		if (flags & FILTER_FLAG_ENCODE_AMP) {
273 			enc['&'] = 1;
274 		}
275 		if (flags & FILTER_FLAG_ENCODE_LOW) {
276 			memset(enc, 1, 32);
277 		}
278 		if (flags & FILTER_FLAG_ENCODE_HIGH) {
279 			memset(enc + 127, 1, sizeof(enc) - 127);
280 		}
281 
282 		php_filter_encode_html(value, enc);
283 	} else if (flags & FILTER_FLAG_EMPTY_STRING_NULL && Z_STRLEN_P(value) == 0) {
284 		zval_ptr_dtor(value);
285 		ZVAL_NULL(value);
286 	}
287 }
288 /* }}} */
289 
290 /* {{{ php_filter_email */
291 #define SAFE        "$-_.+"
292 #define EXTRA       "!*'(),"
293 #define NATIONAL    "{}|\\^~[]`"
294 #define PUNCTUATION "<>#%\""
295 #define RESERVED    ";/?:@&="
296 
php_filter_email(PHP_INPUT_FILTER_PARAM_DECL)297 void php_filter_email(PHP_INPUT_FILTER_PARAM_DECL)
298 {
299 	/* Check section 6 of rfc 822 http://www.faqs.org/rfcs/rfc822.html */
300 	const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT "!#$%&'*+-=?^_`{|}~@.[]";
301 	filter_map     map;
302 
303 	filter_map_init(&map);
304 	filter_map_update(&map, 1, allowed_list);
305 	filter_map_apply(value, &map);
306 }
307 /* }}} */
308 
309 /* {{{ php_filter_url */
php_filter_url(PHP_INPUT_FILTER_PARAM_DECL)310 void php_filter_url(PHP_INPUT_FILTER_PARAM_DECL)
311 {
312 	/* Strip all chars not part of section 5 of
313 	 * http://www.faqs.org/rfcs/rfc1738.html */
314 	const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT SAFE EXTRA NATIONAL PUNCTUATION RESERVED;
315 	filter_map     map;
316 
317 	filter_map_init(&map);
318 	filter_map_update(&map, 1, allowed_list);
319 	filter_map_apply(value, &map);
320 }
321 /* }}} */
322 
323 /* {{{ php_filter_number_int */
php_filter_number_int(PHP_INPUT_FILTER_PARAM_DECL)324 void php_filter_number_int(PHP_INPUT_FILTER_PARAM_DECL)
325 {
326 	/* strip everything [^0-9+-] */
327 	const unsigned char allowed_list[] = "+-" DIGIT;
328 	filter_map     map;
329 
330 	filter_map_init(&map);
331 	filter_map_update(&map, 1, allowed_list);
332 	filter_map_apply(value, &map);
333 }
334 /* }}} */
335 
336 /* {{{ php_filter_number_float */
php_filter_number_float(PHP_INPUT_FILTER_PARAM_DECL)337 void php_filter_number_float(PHP_INPUT_FILTER_PARAM_DECL)
338 {
339 	/* strip everything [^0-9+-] */
340 	const unsigned char allowed_list[] = "+-" DIGIT;
341 	filter_map     map;
342 
343 	filter_map_init(&map);
344 	filter_map_update(&map, 1, allowed_list);
345 
346 	/* depending on flags, strip '.', 'e', ",", "'" */
347 	if (flags & FILTER_FLAG_ALLOW_FRACTION) {
348 		filter_map_update(&map, 2, (const unsigned char *) ".");
349 	}
350 	if (flags & FILTER_FLAG_ALLOW_THOUSAND) {
351 		filter_map_update(&map, 3,  (const unsigned char *) ",");
352 	}
353 	if (flags & FILTER_FLAG_ALLOW_SCIENTIFIC) {
354 		filter_map_update(&map, 4,  (const unsigned char *) "eE");
355 	}
356 	filter_map_apply(value, &map);
357 }
358 /* }}} */
359 
360 /* {{{ php_filter_add_slashes */
php_filter_add_slashes(PHP_INPUT_FILTER_PARAM_DECL)361 void php_filter_add_slashes(PHP_INPUT_FILTER_PARAM_DECL)
362 {
363 	zend_string *buf = php_addslashes(Z_STR_P(value));
364 
365 	zval_ptr_dtor(value);
366 	ZVAL_STR(value, buf);
367 }
368 /* }}} */
369