1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 7 |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 1997-2017 The PHP Group |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Authors: Derick Rethans <derick@php.net> |
16 +----------------------------------------------------------------------+
17 */
18
19 /* $Id$ */
20
21 #include "php_filter.h"
22 #include "filter_private.h"
23 #include "zend_smart_str.h"
24
25 /* {{{ STRUCTS */
26 typedef unsigned long filter_map[256];
27 /* }}} */
28
29 /* {{{ HELPER FUNCTIONS */
php_filter_encode_html(zval * value,const unsigned char * chars)30 static void php_filter_encode_html(zval *value, const unsigned char *chars)
31 {
32 smart_str str = {0};
33 size_t len = Z_STRLEN_P(value);
34 unsigned char *s = (unsigned char *)Z_STRVAL_P(value);
35 unsigned char *e = s + len;
36
37 if (Z_STRLEN_P(value) == 0) {
38 return;
39 }
40
41 while (s < e) {
42 if (chars[*s]) {
43 smart_str_appendl(&str, "&#", 2);
44 smart_str_append_unsigned(&str, (zend_ulong)*s);
45 smart_str_appendc(&str, ';');
46 } else {
47 /* XXX: this needs to be optimized to work with blocks of 'safe' chars */
48 smart_str_appendc(&str, *s);
49 }
50 s++;
51 }
52
53 smart_str_0(&str);
54 zval_ptr_dtor(value);
55 ZVAL_NEW_STR(value, str.s);
56 }
57
58 static const unsigned char hexchars[] = "0123456789ABCDEF";
59
60 #define LOWALPHA "abcdefghijklmnopqrstuvwxyz"
61 #define HIALPHA "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
62 #define DIGIT "0123456789"
63
64 #define DEFAULT_URL_ENCODE LOWALPHA HIALPHA DIGIT "-._"
65
php_filter_encode_url(zval * value,const unsigned char * chars,const int char_len,int high,int low,int encode_nul)66 static void php_filter_encode_url(zval *value, const unsigned char* chars, const int char_len, int high, int low, int encode_nul)
67 {
68 unsigned char *p;
69 unsigned char tmp[256];
70 unsigned char *s = (unsigned char *)chars;
71 unsigned char *e = s + char_len;
72 zend_string *str;
73
74 memset(tmp, 1, sizeof(tmp)-1);
75
76 while (s < e) {
77 tmp[*s++] = '\0';
78 }
79 /* XXX: This is not needed since these chars in the allowed list never include the high/low/null value
80 if (encode_nul) {
81 tmp[0] = 1;
82 }
83 if (high) {
84 memset(tmp + 127, 1, sizeof(tmp) - 127);
85 }
86 if (low) {
87 memset(tmp, 1, 32);
88 }
89 */
90 str = zend_string_safe_alloc(Z_STRLEN_P(value), 3, 0, 0);
91 p = (unsigned char *) ZSTR_VAL(str);
92 s = (unsigned char *) Z_STRVAL_P(value);
93 e = s + Z_STRLEN_P(value);
94
95 while (s < e) {
96 if (tmp[*s]) {
97 *p++ = '%';
98 *p++ = hexchars[(unsigned char) *s >> 4];
99 *p++ = hexchars[(unsigned char) *s & 15];
100 } else {
101 *p++ = *s;
102 }
103 s++;
104 }
105 *p = '\0';
106 ZSTR_LEN(str) = p - (unsigned char *)ZSTR_VAL(str);
107 zval_ptr_dtor(value);
108 ZVAL_NEW_STR(value, str);
109 }
110
php_filter_strip(zval * value,zend_long flags)111 static void php_filter_strip(zval *value, zend_long flags)
112 {
113 unsigned char *str;
114 int i, c;
115 zend_string *buf;
116
117 /* Optimization for if no strip flags are set */
118 if (!(flags & (FILTER_FLAG_STRIP_LOW | FILTER_FLAG_STRIP_HIGH | FILTER_FLAG_STRIP_BACKTICK))) {
119 return;
120 }
121
122 str = (unsigned char *)Z_STRVAL_P(value);
123 buf = zend_string_alloc(Z_STRLEN_P(value) + 1, 0);
124 c = 0;
125 for (i = 0; i < Z_STRLEN_P(value); i++) {
126 if ((str[i] >= 127) && (flags & FILTER_FLAG_STRIP_HIGH)) {
127 } else if ((str[i] < 32) && (flags & FILTER_FLAG_STRIP_LOW)) {
128 } else if ((str[i] == '`') && (flags & FILTER_FLAG_STRIP_BACKTICK)) {
129 } else {
130 ZSTR_VAL(buf)[c] = str[i];
131 ++c;
132 }
133 }
134 /* update zval string data */
135 ZSTR_VAL(buf)[c] = '\0';
136 ZSTR_LEN(buf) = c;
137 zval_ptr_dtor(value);
138 ZVAL_NEW_STR(value, buf);
139 }
140 /* }}} */
141
142 /* {{{ FILTER MAP HELPERS */
filter_map_init(filter_map * map)143 static void filter_map_init(filter_map *map)
144 {
145 memset(map, 0, sizeof(filter_map));
146 }
147
filter_map_update(filter_map * map,int flag,const unsigned char * allowed_list)148 static void filter_map_update(filter_map *map, int flag, const unsigned char *allowed_list)
149 {
150 size_t l, i;
151
152 l = strlen((const char*)allowed_list);
153 for (i = 0; i < l; ++i) {
154 (*map)[allowed_list[i]] = flag;
155 }
156 }
157
filter_map_apply(zval * value,filter_map * map)158 static void filter_map_apply(zval *value, filter_map *map)
159 {
160 unsigned char *str;
161 int i, c;
162 zend_string *buf;
163
164 str = (unsigned char *)Z_STRVAL_P(value);
165 buf = zend_string_alloc(Z_STRLEN_P(value) + 1, 0);
166 c = 0;
167 for (i = 0; i < Z_STRLEN_P(value); i++) {
168 if ((*map)[str[i]]) {
169 ZSTR_VAL(buf)[c] = str[i];
170 ++c;
171 }
172 }
173 /* update zval string data */
174 ZSTR_VAL(buf)[c] = '\0';
175 ZSTR_LEN(buf) = c;
176 zval_ptr_dtor(value);
177 ZVAL_NEW_STR(value, buf);
178 }
179 /* }}} */
180
181 /* {{{ php_filter_string */
php_filter_string(PHP_INPUT_FILTER_PARAM_DECL)182 void php_filter_string(PHP_INPUT_FILTER_PARAM_DECL)
183 {
184 size_t new_len;
185 unsigned char enc[256] = {0};
186
187 if (!Z_REFCOUNTED_P(value)) {
188 ZVAL_STRINGL(value, Z_STRVAL_P(value), Z_STRLEN_P(value));
189 }
190
191 /* strip high/strip low ( see flags )*/
192 php_filter_strip(value, flags);
193
194 if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
195 enc['\''] = enc['"'] = 1;
196 }
197 if (flags & FILTER_FLAG_ENCODE_AMP) {
198 enc['&'] = 1;
199 }
200 if (flags & FILTER_FLAG_ENCODE_LOW) {
201 memset(enc, 1, 32);
202 }
203 if (flags & FILTER_FLAG_ENCODE_HIGH) {
204 memset(enc + 127, 1, sizeof(enc) - 127);
205 }
206
207 php_filter_encode_html(value, enc);
208
209 /* strip tags, implicitly also removes \0 chars */
210 new_len = php_strip_tags_ex(Z_STRVAL_P(value), Z_STRLEN_P(value), NULL, NULL, 0, 1);
211 Z_STRLEN_P(value) = new_len;
212
213 if (new_len == 0) {
214 zval_dtor(value);
215 if (flags & FILTER_FLAG_EMPTY_STRING_NULL) {
216 ZVAL_NULL(value);
217 } else {
218 ZVAL_EMPTY_STRING(value);
219 }
220 return;
221 }
222 }
223 /* }}} */
224
225 /* {{{ php_filter_encoded */
php_filter_encoded(PHP_INPUT_FILTER_PARAM_DECL)226 void php_filter_encoded(PHP_INPUT_FILTER_PARAM_DECL)
227 {
228 /* apply strip_high and strip_low filters */
229 php_filter_strip(value, flags);
230 /* urlencode */
231 php_filter_encode_url(value, (unsigned char *)DEFAULT_URL_ENCODE, sizeof(DEFAULT_URL_ENCODE)-1, flags & FILTER_FLAG_ENCODE_HIGH, flags & FILTER_FLAG_ENCODE_LOW, 1);
232 }
233 /* }}} */
234
235 /* {{{ php_filter_special_chars */
php_filter_special_chars(PHP_INPUT_FILTER_PARAM_DECL)236 void php_filter_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
237 {
238 unsigned char enc[256] = {0};
239
240 php_filter_strip(value, flags);
241
242 /* encodes ' " < > & \0 to numerical entities */
243 enc['\''] = enc['"'] = enc['<'] = enc['>'] = enc['&'] = enc[0] = 1;
244
245 /* if strip low is not set, then we encode them as &#xx; */
246 memset(enc, 1, 32);
247
248 if (flags & FILTER_FLAG_ENCODE_HIGH) {
249 memset(enc + 127, 1, sizeof(enc) - 127);
250 }
251
252 php_filter_encode_html(value, enc);
253 }
254 /* }}} */
255
256 /* {{{ php_filter_full_special_chars */
php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL)257 void php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
258 {
259 zend_string *buf;
260 int quotes;
261
262 if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
263 quotes = ENT_QUOTES;
264 } else {
265 quotes = ENT_NOQUOTES;
266 }
267 buf = php_escape_html_entities_ex((unsigned char *) Z_STRVAL_P(value), Z_STRLEN_P(value), 1, quotes, SG(default_charset), 0);
268 zval_ptr_dtor(value);
269 ZVAL_STR(value, buf);
270 }
271 /* }}} */
272
273 /* {{{ php_filter_unsafe_raw */
php_filter_unsafe_raw(PHP_INPUT_FILTER_PARAM_DECL)274 void php_filter_unsafe_raw(PHP_INPUT_FILTER_PARAM_DECL)
275 {
276 /* Only if no flags are set (optimization) */
277 if (flags != 0 && Z_STRLEN_P(value) > 0) {
278 unsigned char enc[256] = {0};
279
280 php_filter_strip(value, flags);
281
282 if (flags & FILTER_FLAG_ENCODE_AMP) {
283 enc['&'] = 1;
284 }
285 if (flags & FILTER_FLAG_ENCODE_LOW) {
286 memset(enc, 1, 32);
287 }
288 if (flags & FILTER_FLAG_ENCODE_HIGH) {
289 memset(enc + 127, 1, sizeof(enc) - 127);
290 }
291
292 php_filter_encode_html(value, enc);
293 } else if (flags & FILTER_FLAG_EMPTY_STRING_NULL && Z_STRLEN_P(value) == 0) {
294 zval_dtor(value);
295 ZVAL_NULL(value);
296 }
297 }
298 /* }}} */
299
300 /* {{{ php_filter_email */
301 #define SAFE "$-_.+"
302 #define EXTRA "!*'(),"
303 #define NATIONAL "{}|\\^~[]`"
304 #define PUNCTUATION "<>#%\""
305 #define RESERVED ";/?:@&="
306
php_filter_email(PHP_INPUT_FILTER_PARAM_DECL)307 void php_filter_email(PHP_INPUT_FILTER_PARAM_DECL)
308 {
309 /* Check section 6 of rfc 822 http://www.faqs.org/rfcs/rfc822.html */
310 const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT "!#$%&'*+-=?^_`{|}~@.[]";
311 filter_map map;
312
313 filter_map_init(&map);
314 filter_map_update(&map, 1, allowed_list);
315 filter_map_apply(value, &map);
316 }
317 /* }}} */
318
319 /* {{{ php_filter_url */
php_filter_url(PHP_INPUT_FILTER_PARAM_DECL)320 void php_filter_url(PHP_INPUT_FILTER_PARAM_DECL)
321 {
322 /* Strip all chars not part of section 5 of
323 * http://www.faqs.org/rfcs/rfc1738.html */
324 const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT SAFE EXTRA NATIONAL PUNCTUATION RESERVED;
325 filter_map map;
326
327 filter_map_init(&map);
328 filter_map_update(&map, 1, allowed_list);
329 filter_map_apply(value, &map);
330 }
331 /* }}} */
332
333 /* {{{ php_filter_number_int */
php_filter_number_int(PHP_INPUT_FILTER_PARAM_DECL)334 void php_filter_number_int(PHP_INPUT_FILTER_PARAM_DECL)
335 {
336 /* strip everything [^0-9+-] */
337 const unsigned char allowed_list[] = "+-" DIGIT;
338 filter_map map;
339
340 filter_map_init(&map);
341 filter_map_update(&map, 1, allowed_list);
342 filter_map_apply(value, &map);
343 }
344 /* }}} */
345
346 /* {{{ php_filter_number_float */
php_filter_number_float(PHP_INPUT_FILTER_PARAM_DECL)347 void php_filter_number_float(PHP_INPUT_FILTER_PARAM_DECL)
348 {
349 /* strip everything [^0-9+-] */
350 const unsigned char allowed_list[] = "+-" DIGIT;
351 filter_map map;
352
353 filter_map_init(&map);
354 filter_map_update(&map, 1, allowed_list);
355
356 /* depending on flags, strip '.', 'e', ",", "'" */
357 if (flags & FILTER_FLAG_ALLOW_FRACTION) {
358 filter_map_update(&map, 2, (const unsigned char *) ".");
359 }
360 if (flags & FILTER_FLAG_ALLOW_THOUSAND) {
361 filter_map_update(&map, 3, (const unsigned char *) ",");
362 }
363 if (flags & FILTER_FLAG_ALLOW_SCIENTIFIC) {
364 filter_map_update(&map, 4, (const unsigned char *) "eE");
365 }
366 filter_map_apply(value, &map);
367 }
368 /* }}} */
369
370 /* {{{ php_filter_magic_quotes */
php_filter_magic_quotes(PHP_INPUT_FILTER_PARAM_DECL)371 void php_filter_magic_quotes(PHP_INPUT_FILTER_PARAM_DECL)
372 {
373 zend_string *buf;
374
375 /* just call php_addslashes quotes */
376 buf = php_addslashes(Z_STR_P(value), 0);
377
378 zval_ptr_dtor(value);
379 ZVAL_STR(value, buf);
380 }
381 /* }}} */
382
383 /*
384 * Local variables:
385 * tab-width: 4
386 * c-basic-offset: 4
387 * End:
388 * vim600: noet sw=4 ts=4 fdm=marker
389 * vim<600: noet sw=4 ts=4
390 */
391