1 /*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | http://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Authors: Derick Rethans <derick@php.net> |
14 +----------------------------------------------------------------------+
15 */
16
17 #include "php_filter.h"
18 #include "filter_private.h"
19 #include "zend_smart_str.h"
20
21 /* {{{ STRUCTS */
22 typedef unsigned long filter_map[256];
23 /* }}} */
24
25 /* {{{ HELPER FUNCTIONS */
php_filter_encode_html(zval * value,const unsigned char * chars)26 static void php_filter_encode_html(zval *value, const unsigned char *chars)
27 {
28 smart_str str = {0};
29 size_t len = Z_STRLEN_P(value);
30 unsigned char *s = (unsigned char *)Z_STRVAL_P(value);
31 unsigned char *e = s + len;
32
33 if (Z_STRLEN_P(value) == 0) {
34 return;
35 }
36
37 while (s < e) {
38 if (chars[*s]) {
39 smart_str_appendl(&str, "&#", 2);
40 smart_str_append_unsigned(&str, (zend_ulong)*s);
41 smart_str_appendc(&str, ';');
42 } else {
43 /* XXX: this needs to be optimized to work with blocks of 'safe' chars */
44 smart_str_appendc(&str, *s);
45 }
46 s++;
47 }
48
49 smart_str_0(&str);
50 zval_ptr_dtor(value);
51 ZVAL_NEW_STR(value, str.s);
52 }
53
54 static const unsigned char hexchars[] = "0123456789ABCDEF";
55
56 #define LOWALPHA "abcdefghijklmnopqrstuvwxyz"
57 #define HIALPHA "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
58 #define DIGIT "0123456789"
59
60 #define DEFAULT_URL_ENCODE LOWALPHA HIALPHA DIGIT "-._"
61
php_filter_encode_url(zval * value,const unsigned char * chars,const int char_len,int high,int low,int encode_nul)62 static void php_filter_encode_url(zval *value, const unsigned char* chars, const int char_len, int high, int low, int encode_nul)
63 {
64 unsigned char *p;
65 unsigned char tmp[256];
66 unsigned char *s = (unsigned char *)chars;
67 unsigned char *e = s + char_len;
68 zend_string *str;
69
70 memset(tmp, 1, sizeof(tmp)-1);
71
72 while (s < e) {
73 tmp[*s++] = '\0';
74 }
75
76 str = zend_string_safe_alloc(Z_STRLEN_P(value), 3, 0, 0);
77 p = (unsigned char *) ZSTR_VAL(str);
78 s = (unsigned char *) Z_STRVAL_P(value);
79 e = s + Z_STRLEN_P(value);
80
81 while (s < e) {
82 if (tmp[*s]) {
83 *p++ = '%';
84 *p++ = hexchars[(unsigned char) *s >> 4];
85 *p++ = hexchars[(unsigned char) *s & 15];
86 } else {
87 *p++ = *s;
88 }
89 s++;
90 }
91 *p = '\0';
92 ZSTR_LEN(str) = p - (unsigned char *)ZSTR_VAL(str);
93 zval_ptr_dtor(value);
94 ZVAL_NEW_STR(value, str);
95 }
96
php_filter_strip(zval * value,zend_long flags)97 static void php_filter_strip(zval *value, zend_long flags)
98 {
99 unsigned char *str;
100 size_t i;
101 size_t c;
102 zend_string *buf;
103
104 /* Optimization for if no strip flags are set */
105 if (!(flags & (FILTER_FLAG_STRIP_LOW | FILTER_FLAG_STRIP_HIGH | FILTER_FLAG_STRIP_BACKTICK))) {
106 return;
107 }
108
109 str = (unsigned char *)Z_STRVAL_P(value);
110 buf = zend_string_alloc(Z_STRLEN_P(value), 0);
111 c = 0;
112 for (i = 0; i < Z_STRLEN_P(value); i++) {
113 if ((str[i] >= 127) && (flags & FILTER_FLAG_STRIP_HIGH)) {
114 } else if ((str[i] < 32) && (flags & FILTER_FLAG_STRIP_LOW)) {
115 } else if ((str[i] == '`') && (flags & FILTER_FLAG_STRIP_BACKTICK)) {
116 } else {
117 ZSTR_VAL(buf)[c] = str[i];
118 ++c;
119 }
120 }
121 /* update zval string data */
122 ZSTR_VAL(buf)[c] = '\0';
123 ZSTR_LEN(buf) = c;
124 zval_ptr_dtor(value);
125 ZVAL_NEW_STR(value, buf);
126 }
127 /* }}} */
128
129 /* {{{ FILTER MAP HELPERS */
filter_map_init(filter_map * map)130 static void filter_map_init(filter_map *map)
131 {
132 memset(map, 0, sizeof(filter_map));
133 }
134
filter_map_update(filter_map * map,int flag,const unsigned char * allowed_list)135 static void filter_map_update(filter_map *map, int flag, const unsigned char *allowed_list)
136 {
137 size_t l, i;
138
139 l = strlen((const char*)allowed_list);
140 for (i = 0; i < l; ++i) {
141 (*map)[allowed_list[i]] = flag;
142 }
143 }
144
filter_map_apply(zval * value,filter_map * map)145 static void filter_map_apply(zval *value, filter_map *map)
146 {
147 unsigned char *str;
148 size_t i, c;
149 zend_string *buf;
150
151 str = (unsigned char *)Z_STRVAL_P(value);
152 buf = zend_string_alloc(Z_STRLEN_P(value), 0);
153 c = 0;
154 for (i = 0; i < Z_STRLEN_P(value); i++) {
155 if ((*map)[str[i]]) {
156 ZSTR_VAL(buf)[c] = str[i];
157 ++c;
158 }
159 }
160 /* update zval string data */
161 ZSTR_VAL(buf)[c] = '\0';
162 ZSTR_LEN(buf) = c;
163 zval_ptr_dtor(value);
164 ZVAL_NEW_STR(value, buf);
165 }
166 /* }}} */
167
168 /* {{{ php_filter_string */
php_filter_string(PHP_INPUT_FILTER_PARAM_DECL)169 void php_filter_string(PHP_INPUT_FILTER_PARAM_DECL)
170 {
171 size_t new_len;
172 unsigned char enc[256] = {0};
173
174 if (!Z_REFCOUNTED_P(value)) {
175 ZVAL_STRINGL(value, Z_STRVAL_P(value), Z_STRLEN_P(value));
176 }
177
178 /* strip high/strip low ( see flags )*/
179 php_filter_strip(value, flags);
180
181 if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
182 enc['\''] = enc['"'] = 1;
183 }
184 if (flags & FILTER_FLAG_ENCODE_AMP) {
185 enc['&'] = 1;
186 }
187 if (flags & FILTER_FLAG_ENCODE_LOW) {
188 memset(enc, 1, 32);
189 }
190 if (flags & FILTER_FLAG_ENCODE_HIGH) {
191 memset(enc + 127, 1, sizeof(enc) - 127);
192 }
193
194 php_filter_encode_html(value, enc);
195
196 /* strip tags, implicitly also removes \0 chars */
197 new_len = php_strip_tags_ex(Z_STRVAL_P(value), Z_STRLEN_P(value), NULL, 0, 1);
198 Z_STRLEN_P(value) = new_len;
199
200 if (new_len == 0) {
201 zval_ptr_dtor(value);
202 if (flags & FILTER_FLAG_EMPTY_STRING_NULL) {
203 ZVAL_NULL(value);
204 } else {
205 ZVAL_EMPTY_STRING(value);
206 }
207 return;
208 }
209 }
210 /* }}} */
211
212 /* {{{ php_filter_encoded */
php_filter_encoded(PHP_INPUT_FILTER_PARAM_DECL)213 void php_filter_encoded(PHP_INPUT_FILTER_PARAM_DECL)
214 {
215 /* apply strip_high and strip_low filters */
216 php_filter_strip(value, flags);
217 /* urlencode */
218 php_filter_encode_url(value, (unsigned char *)DEFAULT_URL_ENCODE, sizeof(DEFAULT_URL_ENCODE)-1, flags & FILTER_FLAG_ENCODE_HIGH, flags & FILTER_FLAG_ENCODE_LOW, 1);
219 }
220 /* }}} */
221
222 /* {{{ php_filter_special_chars */
php_filter_special_chars(PHP_INPUT_FILTER_PARAM_DECL)223 void php_filter_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
224 {
225 unsigned char enc[256] = {0};
226
227 php_filter_strip(value, flags);
228
229 /* encodes ' " < > & \0 to numerical entities */
230 enc['\''] = enc['"'] = enc['<'] = enc['>'] = enc['&'] = enc[0] = 1;
231
232 /* if strip low is not set, then we encode them as &#xx; */
233 memset(enc, 1, 32);
234
235 if (flags & FILTER_FLAG_ENCODE_HIGH) {
236 memset(enc + 127, 1, sizeof(enc) - 127);
237 }
238
239 php_filter_encode_html(value, enc);
240 }
241 /* }}} */
242
243 /* {{{ php_filter_full_special_chars */
php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL)244 void php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
245 {
246 zend_string *buf;
247 int quotes;
248
249 if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
250 quotes = ENT_QUOTES;
251 } else {
252 quotes = ENT_NOQUOTES;
253 }
254 buf = php_escape_html_entities_ex(
255 (unsigned char *) Z_STRVAL_P(value), Z_STRLEN_P(value), /* all */ 1, quotes,
256 /* charset_hint */ NULL, /* double_encode */ 0, /* quiet */ 0);
257 zval_ptr_dtor(value);
258 ZVAL_STR(value, buf);
259 }
260 /* }}} */
261
262 /* {{{ php_filter_unsafe_raw */
php_filter_unsafe_raw(PHP_INPUT_FILTER_PARAM_DECL)263 void php_filter_unsafe_raw(PHP_INPUT_FILTER_PARAM_DECL)
264 {
265 /* Only if no flags are set (optimization) */
266 if (flags != 0 && Z_STRLEN_P(value) > 0) {
267 unsigned char enc[256] = {0};
268
269 php_filter_strip(value, flags);
270
271 if (flags & FILTER_FLAG_ENCODE_AMP) {
272 enc['&'] = 1;
273 }
274 if (flags & FILTER_FLAG_ENCODE_LOW) {
275 memset(enc, 1, 32);
276 }
277 if (flags & FILTER_FLAG_ENCODE_HIGH) {
278 memset(enc + 127, 1, sizeof(enc) - 127);
279 }
280
281 php_filter_encode_html(value, enc);
282 } else if (flags & FILTER_FLAG_EMPTY_STRING_NULL && Z_STRLEN_P(value) == 0) {
283 zval_ptr_dtor(value);
284 ZVAL_NULL(value);
285 }
286 }
287 /* }}} */
288
289 /* {{{ php_filter_email */
290 #define SAFE "$-_.+"
291 #define EXTRA "!*'(),"
292 #define NATIONAL "{}|\\^~[]`"
293 #define PUNCTUATION "<>#%\""
294 #define RESERVED ";/?:@&="
295
php_filter_email(PHP_INPUT_FILTER_PARAM_DECL)296 void php_filter_email(PHP_INPUT_FILTER_PARAM_DECL)
297 {
298 /* Check section 6 of rfc 822 http://www.faqs.org/rfcs/rfc822.html */
299 const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT "!#$%&'*+-=?^_`{|}~@.[]";
300 filter_map map;
301
302 filter_map_init(&map);
303 filter_map_update(&map, 1, allowed_list);
304 filter_map_apply(value, &map);
305 }
306 /* }}} */
307
308 /* {{{ php_filter_url */
php_filter_url(PHP_INPUT_FILTER_PARAM_DECL)309 void php_filter_url(PHP_INPUT_FILTER_PARAM_DECL)
310 {
311 /* Strip all chars not part of section 5 of
312 * http://www.faqs.org/rfcs/rfc1738.html */
313 const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT SAFE EXTRA NATIONAL PUNCTUATION RESERVED;
314 filter_map map;
315
316 filter_map_init(&map);
317 filter_map_update(&map, 1, allowed_list);
318 filter_map_apply(value, &map);
319 }
320 /* }}} */
321
322 /* {{{ php_filter_number_int */
php_filter_number_int(PHP_INPUT_FILTER_PARAM_DECL)323 void php_filter_number_int(PHP_INPUT_FILTER_PARAM_DECL)
324 {
325 /* strip everything [^0-9+-] */
326 const unsigned char allowed_list[] = "+-" DIGIT;
327 filter_map map;
328
329 filter_map_init(&map);
330 filter_map_update(&map, 1, allowed_list);
331 filter_map_apply(value, &map);
332 }
333 /* }}} */
334
335 /* {{{ php_filter_number_float */
php_filter_number_float(PHP_INPUT_FILTER_PARAM_DECL)336 void php_filter_number_float(PHP_INPUT_FILTER_PARAM_DECL)
337 {
338 /* strip everything [^0-9+-] */
339 const unsigned char allowed_list[] = "+-" DIGIT;
340 filter_map map;
341
342 filter_map_init(&map);
343 filter_map_update(&map, 1, allowed_list);
344
345 /* depending on flags, strip '.', 'e', ",", "'" */
346 if (flags & FILTER_FLAG_ALLOW_FRACTION) {
347 filter_map_update(&map, 2, (const unsigned char *) ".");
348 }
349 if (flags & FILTER_FLAG_ALLOW_THOUSAND) {
350 filter_map_update(&map, 3, (const unsigned char *) ",");
351 }
352 if (flags & FILTER_FLAG_ALLOW_SCIENTIFIC) {
353 filter_map_update(&map, 4, (const unsigned char *) "eE");
354 }
355 filter_map_apply(value, &map);
356 }
357 /* }}} */
358
359 /* {{{ php_filter_add_slashes */
php_filter_add_slashes(PHP_INPUT_FILTER_PARAM_DECL)360 void php_filter_add_slashes(PHP_INPUT_FILTER_PARAM_DECL)
361 {
362 zend_string *buf = php_addslashes(Z_STR_P(value));
363
364 zval_ptr_dtor(value);
365 ZVAL_STR(value, buf);
366 }
367 /* }}} */
368