1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 7 |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 1997-2018 The PHP Group |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Authors: Derick Rethans <derick@php.net> |
16 +----------------------------------------------------------------------+
17 */
18
19 /* $Id$ */
20
21 #include "php_filter.h"
22 #include "filter_private.h"
23 #include "zend_smart_str.h"
24
25 /* {{{ STRUCTS */
26 typedef unsigned long filter_map[256];
27 /* }}} */
28
29 /* {{{ HELPER FUNCTIONS */
php_filter_encode_html(zval * value,const unsigned char * chars)30 static void php_filter_encode_html(zval *value, const unsigned char *chars)
31 {
32 smart_str str = {0};
33 size_t len = Z_STRLEN_P(value);
34 unsigned char *s = (unsigned char *)Z_STRVAL_P(value);
35 unsigned char *e = s + len;
36
37 if (Z_STRLEN_P(value) == 0) {
38 return;
39 }
40
41 while (s < e) {
42 if (chars[*s]) {
43 smart_str_appendl(&str, "&#", 2);
44 smart_str_append_unsigned(&str, (zend_ulong)*s);
45 smart_str_appendc(&str, ';');
46 } else {
47 /* XXX: this needs to be optimized to work with blocks of 'safe' chars */
48 smart_str_appendc(&str, *s);
49 }
50 s++;
51 }
52
53 smart_str_0(&str);
54 zval_ptr_dtor(value);
55 ZVAL_NEW_STR(value, str.s);
56 }
57
58 static const unsigned char hexchars[] = "0123456789ABCDEF";
59
60 #define LOWALPHA "abcdefghijklmnopqrstuvwxyz"
61 #define HIALPHA "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
62 #define DIGIT "0123456789"
63
64 #define DEFAULT_URL_ENCODE LOWALPHA HIALPHA DIGIT "-._"
65
php_filter_encode_url(zval * value,const unsigned char * chars,const int char_len,int high,int low,int encode_nul)66 static void php_filter_encode_url(zval *value, const unsigned char* chars, const int char_len, int high, int low, int encode_nul)
67 {
68 unsigned char *p;
69 unsigned char tmp[256];
70 unsigned char *s = (unsigned char *)chars;
71 unsigned char *e = s + char_len;
72 zend_string *str;
73
74 memset(tmp, 1, sizeof(tmp)-1);
75
76 while (s < e) {
77 tmp[*s++] = '\0';
78 }
79 /* XXX: This is not needed since these chars in the allowed list never include the high/low/null value
80 if (encode_nul) {
81 tmp[0] = 1;
82 }
83 if (high) {
84 memset(tmp + 127, 1, sizeof(tmp) - 127);
85 }
86 if (low) {
87 memset(tmp, 1, 32);
88 }
89 */
90 str = zend_string_safe_alloc(Z_STRLEN_P(value), 3, 0, 0);
91 p = (unsigned char *) ZSTR_VAL(str);
92 s = (unsigned char *) Z_STRVAL_P(value);
93 e = s + Z_STRLEN_P(value);
94
95 while (s < e) {
96 if (tmp[*s]) {
97 *p++ = '%';
98 *p++ = hexchars[(unsigned char) *s >> 4];
99 *p++ = hexchars[(unsigned char) *s & 15];
100 } else {
101 *p++ = *s;
102 }
103 s++;
104 }
105 *p = '\0';
106 ZSTR_LEN(str) = p - (unsigned char *)ZSTR_VAL(str);
107 zval_ptr_dtor(value);
108 ZVAL_NEW_STR(value, str);
109 }
110
php_filter_strip(zval * value,zend_long flags)111 static void php_filter_strip(zval *value, zend_long flags)
112 {
113 unsigned char *str;
114 size_t i;
115 int c;
116 zend_string *buf;
117
118 /* Optimization for if no strip flags are set */
119 if (!(flags & (FILTER_FLAG_STRIP_LOW | FILTER_FLAG_STRIP_HIGH | FILTER_FLAG_STRIP_BACKTICK))) {
120 return;
121 }
122
123 str = (unsigned char *)Z_STRVAL_P(value);
124 buf = zend_string_alloc(Z_STRLEN_P(value) + 1, 0);
125 c = 0;
126 for (i = 0; i < Z_STRLEN_P(value); i++) {
127 if ((str[i] >= 127) && (flags & FILTER_FLAG_STRIP_HIGH)) {
128 } else if ((str[i] < 32) && (flags & FILTER_FLAG_STRIP_LOW)) {
129 } else if ((str[i] == '`') && (flags & FILTER_FLAG_STRIP_BACKTICK)) {
130 } else {
131 ZSTR_VAL(buf)[c] = str[i];
132 ++c;
133 }
134 }
135 /* update zval string data */
136 ZSTR_VAL(buf)[c] = '\0';
137 ZSTR_LEN(buf) = c;
138 zval_ptr_dtor(value);
139 ZVAL_NEW_STR(value, buf);
140 }
141 /* }}} */
142
143 /* {{{ FILTER MAP HELPERS */
filter_map_init(filter_map * map)144 static void filter_map_init(filter_map *map)
145 {
146 memset(map, 0, sizeof(filter_map));
147 }
148
filter_map_update(filter_map * map,int flag,const unsigned char * allowed_list)149 static void filter_map_update(filter_map *map, int flag, const unsigned char *allowed_list)
150 {
151 size_t l, i;
152
153 l = strlen((const char*)allowed_list);
154 for (i = 0; i < l; ++i) {
155 (*map)[allowed_list[i]] = flag;
156 }
157 }
158
filter_map_apply(zval * value,filter_map * map)159 static void filter_map_apply(zval *value, filter_map *map)
160 {
161 unsigned char *str;
162 size_t i;
163 int c;
164 zend_string *buf;
165
166 str = (unsigned char *)Z_STRVAL_P(value);
167 buf = zend_string_alloc(Z_STRLEN_P(value) + 1, 0);
168 c = 0;
169 for (i = 0; i < Z_STRLEN_P(value); i++) {
170 if ((*map)[str[i]]) {
171 ZSTR_VAL(buf)[c] = str[i];
172 ++c;
173 }
174 }
175 /* update zval string data */
176 ZSTR_VAL(buf)[c] = '\0';
177 ZSTR_LEN(buf) = c;
178 zval_ptr_dtor(value);
179 ZVAL_NEW_STR(value, buf);
180 }
181 /* }}} */
182
183 /* {{{ php_filter_string */
php_filter_string(PHP_INPUT_FILTER_PARAM_DECL)184 void php_filter_string(PHP_INPUT_FILTER_PARAM_DECL)
185 {
186 size_t new_len;
187 unsigned char enc[256] = {0};
188
189 if (!Z_REFCOUNTED_P(value)) {
190 ZVAL_STRINGL(value, Z_STRVAL_P(value), Z_STRLEN_P(value));
191 }
192
193 /* strip high/strip low ( see flags )*/
194 php_filter_strip(value, flags);
195
196 if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
197 enc['\''] = enc['"'] = 1;
198 }
199 if (flags & FILTER_FLAG_ENCODE_AMP) {
200 enc['&'] = 1;
201 }
202 if (flags & FILTER_FLAG_ENCODE_LOW) {
203 memset(enc, 1, 32);
204 }
205 if (flags & FILTER_FLAG_ENCODE_HIGH) {
206 memset(enc + 127, 1, sizeof(enc) - 127);
207 }
208
209 php_filter_encode_html(value, enc);
210
211 /* strip tags, implicitly also removes \0 chars */
212 new_len = php_strip_tags_ex(Z_STRVAL_P(value), Z_STRLEN_P(value), NULL, NULL, 0, 1);
213 Z_STRLEN_P(value) = new_len;
214
215 if (new_len == 0) {
216 zval_dtor(value);
217 if (flags & FILTER_FLAG_EMPTY_STRING_NULL) {
218 ZVAL_NULL(value);
219 } else {
220 ZVAL_EMPTY_STRING(value);
221 }
222 return;
223 }
224 }
225 /* }}} */
226
227 /* {{{ php_filter_encoded */
php_filter_encoded(PHP_INPUT_FILTER_PARAM_DECL)228 void php_filter_encoded(PHP_INPUT_FILTER_PARAM_DECL)
229 {
230 /* apply strip_high and strip_low filters */
231 php_filter_strip(value, flags);
232 /* urlencode */
233 php_filter_encode_url(value, (unsigned char *)DEFAULT_URL_ENCODE, sizeof(DEFAULT_URL_ENCODE)-1, flags & FILTER_FLAG_ENCODE_HIGH, flags & FILTER_FLAG_ENCODE_LOW, 1);
234 }
235 /* }}} */
236
237 /* {{{ php_filter_special_chars */
php_filter_special_chars(PHP_INPUT_FILTER_PARAM_DECL)238 void php_filter_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
239 {
240 unsigned char enc[256] = {0};
241
242 php_filter_strip(value, flags);
243
244 /* encodes ' " < > & \0 to numerical entities */
245 enc['\''] = enc['"'] = enc['<'] = enc['>'] = enc['&'] = enc[0] = 1;
246
247 /* if strip low is not set, then we encode them as &#xx; */
248 memset(enc, 1, 32);
249
250 if (flags & FILTER_FLAG_ENCODE_HIGH) {
251 memset(enc + 127, 1, sizeof(enc) - 127);
252 }
253
254 php_filter_encode_html(value, enc);
255 }
256 /* }}} */
257
258 /* {{{ php_filter_full_special_chars */
php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL)259 void php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
260 {
261 zend_string *buf;
262 int quotes;
263
264 if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
265 quotes = ENT_QUOTES;
266 } else {
267 quotes = ENT_NOQUOTES;
268 }
269 buf = php_escape_html_entities_ex((unsigned char *) Z_STRVAL_P(value), Z_STRLEN_P(value), 1, quotes, SG(default_charset), 0);
270 zval_ptr_dtor(value);
271 ZVAL_STR(value, buf);
272 }
273 /* }}} */
274
275 /* {{{ php_filter_unsafe_raw */
php_filter_unsafe_raw(PHP_INPUT_FILTER_PARAM_DECL)276 void php_filter_unsafe_raw(PHP_INPUT_FILTER_PARAM_DECL)
277 {
278 /* Only if no flags are set (optimization) */
279 if (flags != 0 && Z_STRLEN_P(value) > 0) {
280 unsigned char enc[256] = {0};
281
282 php_filter_strip(value, flags);
283
284 if (flags & FILTER_FLAG_ENCODE_AMP) {
285 enc['&'] = 1;
286 }
287 if (flags & FILTER_FLAG_ENCODE_LOW) {
288 memset(enc, 1, 32);
289 }
290 if (flags & FILTER_FLAG_ENCODE_HIGH) {
291 memset(enc + 127, 1, sizeof(enc) - 127);
292 }
293
294 php_filter_encode_html(value, enc);
295 } else if (flags & FILTER_FLAG_EMPTY_STRING_NULL && Z_STRLEN_P(value) == 0) {
296 zval_dtor(value);
297 ZVAL_NULL(value);
298 }
299 }
300 /* }}} */
301
302 /* {{{ php_filter_email */
303 #define SAFE "$-_.+"
304 #define EXTRA "!*'(),"
305 #define NATIONAL "{}|\\^~[]`"
306 #define PUNCTUATION "<>#%\""
307 #define RESERVED ";/?:@&="
308
php_filter_email(PHP_INPUT_FILTER_PARAM_DECL)309 void php_filter_email(PHP_INPUT_FILTER_PARAM_DECL)
310 {
311 /* Check section 6 of rfc 822 http://www.faqs.org/rfcs/rfc822.html */
312 const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT "!#$%&'*+-=?^_`{|}~@.[]";
313 filter_map map;
314
315 filter_map_init(&map);
316 filter_map_update(&map, 1, allowed_list);
317 filter_map_apply(value, &map);
318 }
319 /* }}} */
320
321 /* {{{ php_filter_url */
php_filter_url(PHP_INPUT_FILTER_PARAM_DECL)322 void php_filter_url(PHP_INPUT_FILTER_PARAM_DECL)
323 {
324 /* Strip all chars not part of section 5 of
325 * http://www.faqs.org/rfcs/rfc1738.html */
326 const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT SAFE EXTRA NATIONAL PUNCTUATION RESERVED;
327 filter_map map;
328
329 filter_map_init(&map);
330 filter_map_update(&map, 1, allowed_list);
331 filter_map_apply(value, &map);
332 }
333 /* }}} */
334
335 /* {{{ php_filter_number_int */
php_filter_number_int(PHP_INPUT_FILTER_PARAM_DECL)336 void php_filter_number_int(PHP_INPUT_FILTER_PARAM_DECL)
337 {
338 /* strip everything [^0-9+-] */
339 const unsigned char allowed_list[] = "+-" DIGIT;
340 filter_map map;
341
342 filter_map_init(&map);
343 filter_map_update(&map, 1, allowed_list);
344 filter_map_apply(value, &map);
345 }
346 /* }}} */
347
348 /* {{{ php_filter_number_float */
php_filter_number_float(PHP_INPUT_FILTER_PARAM_DECL)349 void php_filter_number_float(PHP_INPUT_FILTER_PARAM_DECL)
350 {
351 /* strip everything [^0-9+-] */
352 const unsigned char allowed_list[] = "+-" DIGIT;
353 filter_map map;
354
355 filter_map_init(&map);
356 filter_map_update(&map, 1, allowed_list);
357
358 /* depending on flags, strip '.', 'e', ",", "'" */
359 if (flags & FILTER_FLAG_ALLOW_FRACTION) {
360 filter_map_update(&map, 2, (const unsigned char *) ".");
361 }
362 if (flags & FILTER_FLAG_ALLOW_THOUSAND) {
363 filter_map_update(&map, 3, (const unsigned char *) ",");
364 }
365 if (flags & FILTER_FLAG_ALLOW_SCIENTIFIC) {
366 filter_map_update(&map, 4, (const unsigned char *) "eE");
367 }
368 filter_map_apply(value, &map);
369 }
370 /* }}} */
371
372 /* {{{ php_filter_magic_quotes */
php_filter_magic_quotes(PHP_INPUT_FILTER_PARAM_DECL)373 void php_filter_magic_quotes(PHP_INPUT_FILTER_PARAM_DECL)
374 {
375 zend_string *buf;
376
377 /* just call php_addslashes quotes */
378 buf = php_addslashes(Z_STR_P(value), 0);
379
380 zval_ptr_dtor(value);
381 ZVAL_STR(value, buf);
382 }
383 /* }}} */
384
385 /*
386 * Local variables:
387 * tab-width: 4
388 * c-basic-offset: 4
389 * End:
390 * vim600: noet sw=4 ts=4 fdm=marker
391 * vim<600: noet sw=4 ts=4
392 */
393