xref: /PHP-5.4/ext/filter/sanitizing_filters.c (revision c0d060f5)
1 /*
2   +----------------------------------------------------------------------+
3   | PHP Version 5                                                        |
4   +----------------------------------------------------------------------+
5   | Copyright (c) 1997-2014 The PHP Group                                |
6   +----------------------------------------------------------------------+
7   | This source file is subject to version 3.01 of the PHP license,      |
8   | that is bundled with this package in the file LICENSE, and is        |
9   | available through the world-wide-web at the following url:           |
10   | http://www.php.net/license/3_01.txt                                  |
11   | If you did not receive a copy of the PHP license and are unable to   |
12   | obtain it through the world-wide-web, please send a note to          |
13   | license@php.net so we can mail you a copy immediately.               |
14   +----------------------------------------------------------------------+
15   | Authors: Derick Rethans <derick@php.net>                             |
16   +----------------------------------------------------------------------+
17 */
18 
19 /* $Id$ */
20 
21 #include "php_filter.h"
22 #include "filter_private.h"
23 #include "ext/standard/php_smart_str.h"
24 
25 /* {{{ STRUCTS */
26 typedef unsigned long filter_map[256];
27 /* }}} */
28 
29 /* {{{ HELPER FUNCTIONS */
php_filter_encode_html(zval * value,const unsigned char * chars)30 static void php_filter_encode_html(zval *value, const unsigned char *chars)
31 {
32 	smart_str str = {0};
33 	int len = Z_STRLEN_P(value);
34 	unsigned char *s = (unsigned char *)Z_STRVAL_P(value);
35 	unsigned char *e = s + len;
36 
37 	if (Z_STRLEN_P(value) == 0) {
38 		return;
39 	}
40 
41 	while (s < e) {
42 		if (chars[*s]) {
43 			smart_str_appendl(&str, "&#", 2);
44 			smart_str_append_unsigned(&str, (unsigned long)*s);
45 			smart_str_appendc(&str, ';');
46 		} else {
47 			/* XXX: this needs to be optimized to work with blocks of 'safe' chars */
48 			smart_str_appendc(&str, *s);
49 		}
50 		s++;
51 	}
52 
53 	smart_str_0(&str);
54 	str_efree(Z_STRVAL_P(value));
55 	Z_STRVAL_P(value) = str.c;
56 	Z_STRLEN_P(value) = str.len;
57 }
58 
59 static const unsigned char hexchars[] = "0123456789ABCDEF";
60 
61 #define LOWALPHA    "abcdefghijklmnopqrstuvwxyz"
62 #define HIALPHA     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
63 #define DIGIT       "0123456789"
64 
65 #define DEFAULT_URL_ENCODE    LOWALPHA HIALPHA DIGIT "-._"
66 
php_filter_encode_url(zval * value,const unsigned char * chars,const int char_len,int high,int low,int encode_nul)67 static void php_filter_encode_url(zval *value, const unsigned char* chars, const int char_len, int high, int low, int encode_nul)
68 {
69 	unsigned char *str, *p;
70 	unsigned char tmp[256];
71 	unsigned char *s = (unsigned char *)chars;
72 	unsigned char *e = s + char_len;
73 
74 	memset(tmp, 1, sizeof(tmp)-1);
75 
76 	while (s < e) {
77 		tmp[*s++] = 0;
78 	}
79 /* XXX: This is not needed since these chars in the allowed list never include the high/low/null value
80 	if (encode_nul) {
81 		tmp[0] = 1;
82 	}
83 	if (high) {
84 		memset(tmp + 127, 1, sizeof(tmp) - 127);
85 	}
86 	if (low) {
87 		memset(tmp, 1, 32);
88 	}
89 */
90 	p = str = (unsigned char *) safe_emalloc(3, Z_STRLEN_P(value), 1);
91 	s = (unsigned char *)Z_STRVAL_P(value);
92 	e = s + Z_STRLEN_P(value);
93 
94 	while (s < e) {
95 		if (tmp[*s]) {
96 			*p++ = '%';
97 			*p++ = hexchars[(unsigned char) *s >> 4];
98 			*p++ = hexchars[(unsigned char) *s & 15];
99 		} else {
100 			*p++ = *s;
101 		}
102 		s++;
103 	}
104 	*p = '\0';
105 	str_efree(Z_STRVAL_P(value));
106 	Z_STRVAL_P(value) = (char *)str;
107 	Z_STRLEN_P(value) = p - str;
108 }
109 
php_filter_strip(zval * value,long flags)110 static void php_filter_strip(zval *value, long flags)
111 {
112 	unsigned char *buf, *str;
113 	int   i, c;
114 
115 	/* Optimization for if no strip flags are set */
116 	if (! ((flags & FILTER_FLAG_STRIP_LOW) || (flags & FILTER_FLAG_STRIP_HIGH)) ) {
117 		return;
118 	}
119 
120 	str = (unsigned char *)Z_STRVAL_P(value);
121 	buf = safe_emalloc(1, Z_STRLEN_P(value) + 1, 1);
122 	c = 0;
123 	for (i = 0; i < Z_STRLEN_P(value); i++) {
124 		if ((str[i] > 127) && (flags & FILTER_FLAG_STRIP_HIGH)) {
125 		} else if ((str[i] < 32) && (flags & FILTER_FLAG_STRIP_LOW)) {
126 		} else if ((str[i] == '`') && (flags & FILTER_FLAG_STRIP_BACKTICK)) {
127 		} else {
128 			buf[c] = str[i];
129 			++c;
130 		}
131 	}
132 	/* update zval string data */
133 	buf[c] = '\0';
134 	str_efree(Z_STRVAL_P(value));
135 	Z_STRVAL_P(value) = (char *)buf;
136 	Z_STRLEN_P(value) = c;
137 }
138 /* }}} */
139 
140 /* {{{ FILTER MAP HELPERS */
filter_map_init(filter_map * map)141 static void filter_map_init(filter_map *map)
142 {
143 	memset(map, 0, sizeof(filter_map));
144 }
145 
filter_map_update(filter_map * map,int flag,const unsigned char * allowed_list)146 static void filter_map_update(filter_map *map, int flag, const unsigned char *allowed_list)
147 {
148 	int l, i;
149 
150 	l = strlen((const char*)allowed_list);
151 	for (i = 0; i < l; ++i) {
152 		(*map)[allowed_list[i]] = flag;
153 	}
154 }
155 
filter_map_apply(zval * value,filter_map * map)156 static void filter_map_apply(zval *value, filter_map *map)
157 {
158 	unsigned char *buf, *str;
159 	int   i, c;
160 
161 	str = (unsigned char *)Z_STRVAL_P(value);
162 	buf = safe_emalloc(1, Z_STRLEN_P(value) + 1, 1);
163 	c = 0;
164 	for (i = 0; i < Z_STRLEN_P(value); i++) {
165 		if ((*map)[str[i]]) {
166 			buf[c] = str[i];
167 			++c;
168 		}
169 	}
170 	/* update zval string data */
171 	buf[c] = '\0';
172 	str_efree(Z_STRVAL_P(value));
173 	Z_STRVAL_P(value) = (char *)buf;
174 	Z_STRLEN_P(value) = c;
175 }
176 /* }}} */
177 
178 /* {{{ php_filter_string */
php_filter_string(PHP_INPUT_FILTER_PARAM_DECL)179 void php_filter_string(PHP_INPUT_FILTER_PARAM_DECL)
180 {
181 	size_t new_len;
182 	unsigned char enc[256] = {0};
183 
184 	/* strip high/strip low ( see flags )*/
185 	php_filter_strip(value, flags);
186 
187 	if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
188 		enc['\''] = enc['"'] = 1;
189 	}
190 	if (flags & FILTER_FLAG_ENCODE_AMP) {
191 		enc['&'] = 1;
192 	}
193 	if (flags & FILTER_FLAG_ENCODE_LOW) {
194 		memset(enc, 1, 32);
195 	}
196 	if (flags & FILTER_FLAG_ENCODE_HIGH) {
197 		memset(enc + 127, 1, sizeof(enc) - 127);
198 	}
199 
200 	php_filter_encode_html(value, enc);
201 
202 	/* strip tags, implicitly also removes \0 chars */
203 	new_len = php_strip_tags_ex(Z_STRVAL_P(value), Z_STRLEN_P(value), NULL, NULL, 0, 1);
204 	Z_STRLEN_P(value) = new_len;
205 
206 	if (new_len == 0) {
207 		zval_dtor(value);
208 		if (flags & FILTER_FLAG_EMPTY_STRING_NULL) {
209 			ZVAL_NULL(value);
210 		} else {
211 			ZVAL_EMPTY_STRING(value);
212 		}
213 		return;
214 	}
215 }
216 /* }}} */
217 
218 /* {{{ php_filter_encoded */
php_filter_encoded(PHP_INPUT_FILTER_PARAM_DECL)219 void php_filter_encoded(PHP_INPUT_FILTER_PARAM_DECL)
220 {
221 	/* apply strip_high and strip_low filters */
222 	php_filter_strip(value, flags);
223 	/* urlencode */
224 	php_filter_encode_url(value, (unsigned char *)DEFAULT_URL_ENCODE, sizeof(DEFAULT_URL_ENCODE)-1, flags & FILTER_FLAG_ENCODE_HIGH, flags & FILTER_FLAG_ENCODE_LOW, 1);
225 }
226 /* }}} */
227 
228 /* {{{ php_filter_special_chars */
php_filter_special_chars(PHP_INPUT_FILTER_PARAM_DECL)229 void php_filter_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
230 {
231 	unsigned char enc[256] = {0};
232 
233 	php_filter_strip(value, flags);
234 
235 	/* encodes ' " < > & \0 to numerical entities */
236 	enc['\''] = enc['"'] = enc['<'] = enc['>'] = enc['&'] = enc[0] = 1;
237 
238 	/* if strip low is not set, then we encode them as &#xx; */
239 	memset(enc, 1, 32);
240 
241 	if (flags & FILTER_FLAG_ENCODE_HIGH) {
242 		memset(enc + 127, 1, sizeof(enc) - 127);
243 	}
244 
245 	php_filter_encode_html(value, enc);
246 }
247 /* }}} */
248 
249 /* {{{ php_filter_full_special_chars */
php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL)250 void php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
251 {
252 	char *buf;
253 	size_t len;
254 	int quotes;
255 
256 	if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
257 		quotes = ENT_QUOTES;
258 	} else {
259 		quotes = ENT_NOQUOTES;
260 	}
261 	buf = php_escape_html_entities_ex(Z_STRVAL_P(value), Z_STRLEN_P(value), &len, 1, quotes, SG(default_charset), 0 TSRMLS_CC);
262 	str_efree(Z_STRVAL_P(value));
263 	Z_STRVAL_P(value) = buf;
264 	Z_STRLEN_P(value) = len;
265 }
266 /* }}} */
267 
268 /* {{{ php_filter_unsafe_raw */
php_filter_unsafe_raw(PHP_INPUT_FILTER_PARAM_DECL)269 void php_filter_unsafe_raw(PHP_INPUT_FILTER_PARAM_DECL)
270 {
271 	/* Only if no flags are set (optimization) */
272 	if (flags != 0 && Z_STRLEN_P(value) > 0) {
273 		unsigned char enc[256] = {0};
274 
275 		php_filter_strip(value, flags);
276 
277 		if (flags & FILTER_FLAG_ENCODE_AMP) {
278 			enc['&'] = 1;
279 		}
280 		if (flags & FILTER_FLAG_ENCODE_LOW) {
281 			memset(enc, 1, 32);
282 		}
283 		if (flags & FILTER_FLAG_ENCODE_HIGH) {
284 			memset(enc + 127, 1, sizeof(enc) - 127);
285 		}
286 
287 		php_filter_encode_html(value, enc);
288 	} else if (flags & FILTER_FLAG_EMPTY_STRING_NULL && Z_STRLEN_P(value) == 0) {
289 		zval_dtor(value);
290 		ZVAL_NULL(value);
291 	}
292 }
293 /* }}} */
294 
295 
296 
297 /* {{{ php_filter_email */
298 #define SAFE        "$-_.+"
299 #define EXTRA       "!*'(),"
300 #define NATIONAL    "{}|\\^~[]`"
301 #define PUNCTUATION "<>#%\""
302 #define RESERVED    ";/?:@&="
303 
php_filter_email(PHP_INPUT_FILTER_PARAM_DECL)304 void php_filter_email(PHP_INPUT_FILTER_PARAM_DECL)
305 {
306 	/* Check section 6 of rfc 822 http://www.faqs.org/rfcs/rfc822.html */
307 	const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT "!#$%&'*+-=?^_`{|}~@.[]";
308 	filter_map     map;
309 
310 	filter_map_init(&map);
311 	filter_map_update(&map, 1, allowed_list);
312 	filter_map_apply(value, &map);
313 }
314 /* }}} */
315 
316 /* {{{ php_filter_url */
php_filter_url(PHP_INPUT_FILTER_PARAM_DECL)317 void php_filter_url(PHP_INPUT_FILTER_PARAM_DECL)
318 {
319 	/* Strip all chars not part of section 5 of
320 	 * http://www.faqs.org/rfcs/rfc1738.html */
321 	const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT SAFE EXTRA NATIONAL PUNCTUATION RESERVED;
322 	filter_map     map;
323 
324 	filter_map_init(&map);
325 	filter_map_update(&map, 1, allowed_list);
326 	filter_map_apply(value, &map);
327 }
328 /* }}} */
329 
330 /* {{{ php_filter_number_int */
php_filter_number_int(PHP_INPUT_FILTER_PARAM_DECL)331 void php_filter_number_int(PHP_INPUT_FILTER_PARAM_DECL)
332 {
333 	/* strip everything [^0-9+-] */
334 	const unsigned char allowed_list[] = "+-" DIGIT;
335 	filter_map     map;
336 
337 	filter_map_init(&map);
338 	filter_map_update(&map, 1, allowed_list);
339 	filter_map_apply(value, &map);
340 }
341 /* }}} */
342 
343 /* {{{ php_filter_number_float */
php_filter_number_float(PHP_INPUT_FILTER_PARAM_DECL)344 void php_filter_number_float(PHP_INPUT_FILTER_PARAM_DECL)
345 {
346 	/* strip everything [^0-9+-] */
347 	const unsigned char allowed_list[] = "+-" DIGIT;
348 	filter_map     map;
349 
350 	filter_map_init(&map);
351 	filter_map_update(&map, 1, allowed_list);
352 
353 	/* depending on flags, strip '.', 'e', ",", "'" */
354 	if (flags & FILTER_FLAG_ALLOW_FRACTION) {
355 		filter_map_update(&map, 2, (const unsigned char *) ".");
356 	}
357 	if (flags & FILTER_FLAG_ALLOW_THOUSAND) {
358 		filter_map_update(&map, 3,  (const unsigned char *) ",");
359 	}
360 	if (flags & FILTER_FLAG_ALLOW_SCIENTIFIC) {
361 		filter_map_update(&map, 4,  (const unsigned char *) "eE");
362 	}
363 	filter_map_apply(value, &map);
364 }
365 /* }}} */
366 
367 /* {{{ php_filter_magic_quotes */
php_filter_magic_quotes(PHP_INPUT_FILTER_PARAM_DECL)368 void php_filter_magic_quotes(PHP_INPUT_FILTER_PARAM_DECL)
369 {
370 	char *buf;
371 	int   len;
372 
373 	/* just call php_addslashes quotes */
374 	buf = php_addslashes(Z_STRVAL_P(value), Z_STRLEN_P(value), &len, 0 TSRMLS_CC);
375 
376 	str_efree(Z_STRVAL_P(value));
377 	Z_STRVAL_P(value) = buf;
378 	Z_STRLEN_P(value) = len;
379 }
380 /* }}} */
381 
382 /*
383  * Local variables:
384  * tab-width: 4
385  * c-basic-offset: 4
386  * End:
387  * vim600: noet sw=4 ts=4 fdm=marker
388  * vim<600: noet sw=4 ts=4
389  */
390