1 /*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | https://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Authors: Derick Rethans <derick@php.net> |
14 +----------------------------------------------------------------------+
15 */
16
17 #include "php_filter.h"
18 #include "filter_private.h"
19 #include "zend_smart_str.h"
20
21 /* {{{ STRUCTS */
22 typedef unsigned long filter_map[256];
23 /* }}} */
24
25 /* {{{ HELPER FUNCTIONS */
php_filter_encode_html(zval * value,const unsigned char * chars)26 static void php_filter_encode_html(zval *value, const unsigned char *chars)
27 {
28 smart_str str = {0};
29 size_t len = Z_STRLEN_P(value);
30 unsigned char *s = (unsigned char *)Z_STRVAL_P(value);
31 unsigned char *e = s + len;
32
33 if (Z_STRLEN_P(value) == 0) {
34 return;
35 }
36
37 while (s < e) {
38 if (chars[*s]) {
39 smart_str_appendl(&str, "&#", 2);
40 smart_str_append_unsigned(&str, (zend_ulong)*s);
41 smart_str_appendc(&str, ';');
42 } else {
43 /* XXX: this needs to be optimized to work with blocks of 'safe' chars */
44 smart_str_appendc(&str, *s);
45 }
46 s++;
47 }
48
49 zval_ptr_dtor(value);
50 ZVAL_STR(value, smart_str_extract(&str));
51 }
52
53 static const unsigned char hexchars[] = "0123456789ABCDEF";
54
55 #define LOWALPHA "abcdefghijklmnopqrstuvwxyz"
56 #define HIALPHA "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
57 #define DIGIT "0123456789"
58
59 #define DEFAULT_URL_ENCODE LOWALPHA HIALPHA DIGIT "-._"
60
php_filter_encode_url(zval * value,const unsigned char * chars,const int char_len,int high,int low,int encode_nul)61 static void php_filter_encode_url(zval *value, const unsigned char* chars, const int char_len, int high, int low, int encode_nul)
62 {
63 unsigned char *p;
64 unsigned char tmp[256];
65 unsigned char *s = (unsigned char *)chars;
66 unsigned char *e = s + char_len;
67 zend_string *str;
68
69 memset(tmp, 1, sizeof(tmp)-1);
70
71 while (s < e) {
72 tmp[*s++] = '\0';
73 }
74
75 str = zend_string_safe_alloc(Z_STRLEN_P(value), 3, 0, 0);
76 p = (unsigned char *) ZSTR_VAL(str);
77 s = (unsigned char *) Z_STRVAL_P(value);
78 e = s + Z_STRLEN_P(value);
79
80 while (s < e) {
81 if (tmp[*s]) {
82 *p++ = '%';
83 *p++ = hexchars[(unsigned char) *s >> 4];
84 *p++ = hexchars[(unsigned char) *s & 15];
85 } else {
86 *p++ = *s;
87 }
88 s++;
89 }
90 *p = '\0';
91 ZSTR_LEN(str) = p - (unsigned char *)ZSTR_VAL(str);
92 zval_ptr_dtor(value);
93 ZVAL_NEW_STR(value, str);
94 }
95
php_filter_strip(zval * value,zend_long flags)96 static void php_filter_strip(zval *value, zend_long flags)
97 {
98 unsigned char *str;
99 size_t i;
100 size_t c;
101 zend_string *buf;
102
103 /* Optimization for if no strip flags are set */
104 if (!(flags & (FILTER_FLAG_STRIP_LOW | FILTER_FLAG_STRIP_HIGH | FILTER_FLAG_STRIP_BACKTICK))) {
105 return;
106 }
107
108 str = (unsigned char *)Z_STRVAL_P(value);
109 buf = zend_string_alloc(Z_STRLEN_P(value), 0);
110 c = 0;
111 for (i = 0; i < Z_STRLEN_P(value); i++) {
112 if ((str[i] >= 127) && (flags & FILTER_FLAG_STRIP_HIGH)) {
113 } else if ((str[i] < 32) && (flags & FILTER_FLAG_STRIP_LOW)) {
114 } else if ((str[i] == '`') && (flags & FILTER_FLAG_STRIP_BACKTICK)) {
115 } else {
116 ZSTR_VAL(buf)[c] = str[i];
117 ++c;
118 }
119 }
120 /* update zval string data */
121 ZSTR_VAL(buf)[c] = '\0';
122 ZSTR_LEN(buf) = c;
123 zval_ptr_dtor(value);
124 ZVAL_NEW_STR(value, buf);
125 }
126 /* }}} */
127
128 /* {{{ FILTER MAP HELPERS */
filter_map_init(filter_map * map)129 static void filter_map_init(filter_map *map)
130 {
131 memset(map, 0, sizeof(filter_map));
132 }
133
filter_map_update(filter_map * map,int flag,const unsigned char * allowed_list)134 static void filter_map_update(filter_map *map, int flag, const unsigned char *allowed_list)
135 {
136 size_t l, i;
137
138 l = strlen((const char*)allowed_list);
139 for (i = 0; i < l; ++i) {
140 (*map)[allowed_list[i]] = flag;
141 }
142 }
143
filter_map_apply(zval * value,filter_map * map)144 static void filter_map_apply(zval *value, filter_map *map)
145 {
146 unsigned char *str;
147 size_t i, c;
148 zend_string *buf;
149
150 str = (unsigned char *)Z_STRVAL_P(value);
151 buf = zend_string_alloc(Z_STRLEN_P(value), 0);
152 c = 0;
153 for (i = 0; i < Z_STRLEN_P(value); i++) {
154 if ((*map)[str[i]]) {
155 ZSTR_VAL(buf)[c] = str[i];
156 ++c;
157 }
158 }
159 /* update zval string data */
160 ZSTR_VAL(buf)[c] = '\0';
161 ZSTR_LEN(buf) = c;
162 zval_ptr_dtor(value);
163 ZVAL_NEW_STR(value, buf);
164 }
165 /* }}} */
166
167 /* {{{ php_filter_string */
php_filter_string(PHP_INPUT_FILTER_PARAM_DECL)168 void php_filter_string(PHP_INPUT_FILTER_PARAM_DECL)
169 {
170 size_t new_len;
171 unsigned char enc[256] = {0};
172
173 if (!Z_REFCOUNTED_P(value)) {
174 ZVAL_STRINGL(value, Z_STRVAL_P(value), Z_STRLEN_P(value));
175 }
176
177 /* strip high/strip low ( see flags )*/
178 php_filter_strip(value, flags);
179
180 if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
181 enc['\''] = enc['"'] = 1;
182 }
183 if (flags & FILTER_FLAG_ENCODE_AMP) {
184 enc['&'] = 1;
185 }
186 if (flags & FILTER_FLAG_ENCODE_LOW) {
187 memset(enc, 1, 32);
188 }
189 if (flags & FILTER_FLAG_ENCODE_HIGH) {
190 memset(enc + 127, 1, sizeof(enc) - 127);
191 }
192
193 php_filter_encode_html(value, enc);
194
195 /* strip tags, implicitly also removes \0 chars */
196 new_len = php_strip_tags_ex(Z_STRVAL_P(value), Z_STRLEN_P(value), NULL, 0, 1);
197 Z_STRLEN_P(value) = new_len;
198
199 if (new_len == 0) {
200 zval_ptr_dtor(value);
201 if (flags & FILTER_FLAG_EMPTY_STRING_NULL) {
202 ZVAL_NULL(value);
203 } else {
204 ZVAL_EMPTY_STRING(value);
205 }
206 return;
207 }
208 }
209 /* }}} */
210
211 /* {{{ php_filter_encoded */
php_filter_encoded(PHP_INPUT_FILTER_PARAM_DECL)212 void php_filter_encoded(PHP_INPUT_FILTER_PARAM_DECL)
213 {
214 /* apply strip_high and strip_low filters */
215 php_filter_strip(value, flags);
216 /* urlencode */
217 php_filter_encode_url(value, (unsigned char *)DEFAULT_URL_ENCODE, sizeof(DEFAULT_URL_ENCODE)-1, flags & FILTER_FLAG_ENCODE_HIGH, flags & FILTER_FLAG_ENCODE_LOW, 1);
218 }
219 /* }}} */
220
221 /* {{{ php_filter_special_chars */
php_filter_special_chars(PHP_INPUT_FILTER_PARAM_DECL)222 void php_filter_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
223 {
224 unsigned char enc[256] = {0};
225
226 php_filter_strip(value, flags);
227
228 /* encodes ' " < > & \0 to numerical entities */
229 enc['\''] = enc['"'] = enc['<'] = enc['>'] = enc['&'] = enc[0] = 1;
230
231 /* if strip low is not set, then we encode them as &#xx; */
232 memset(enc, 1, 32);
233
234 if (flags & FILTER_FLAG_ENCODE_HIGH) {
235 memset(enc + 127, 1, sizeof(enc) - 127);
236 }
237
238 php_filter_encode_html(value, enc);
239 }
240 /* }}} */
241
242 /* {{{ php_filter_full_special_chars */
php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL)243 void php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
244 {
245 zend_string *buf;
246 int quotes;
247
248 if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
249 quotes = ENT_QUOTES;
250 } else {
251 quotes = ENT_NOQUOTES;
252 }
253 buf = php_escape_html_entities_ex(
254 (unsigned char *) Z_STRVAL_P(value), Z_STRLEN_P(value), /* all */ 1, quotes,
255 /* charset_hint */ NULL, /* double_encode */ 0, /* quiet */ 0);
256 zval_ptr_dtor(value);
257 ZVAL_STR(value, buf);
258 }
259 /* }}} */
260
261 /* {{{ php_filter_unsafe_raw */
php_filter_unsafe_raw(PHP_INPUT_FILTER_PARAM_DECL)262 void php_filter_unsafe_raw(PHP_INPUT_FILTER_PARAM_DECL)
263 {
264 /* Only if no flags are set (optimization) */
265 if (flags != 0 && Z_STRLEN_P(value) > 0) {
266 unsigned char enc[256] = {0};
267
268 php_filter_strip(value, flags);
269
270 if (flags & FILTER_FLAG_ENCODE_AMP) {
271 enc['&'] = 1;
272 }
273 if (flags & FILTER_FLAG_ENCODE_LOW) {
274 memset(enc, 1, 32);
275 }
276 if (flags & FILTER_FLAG_ENCODE_HIGH) {
277 memset(enc + 127, 1, sizeof(enc) - 127);
278 }
279
280 php_filter_encode_html(value, enc);
281 } else if (flags & FILTER_FLAG_EMPTY_STRING_NULL && Z_STRLEN_P(value) == 0) {
282 zval_ptr_dtor(value);
283 ZVAL_NULL(value);
284 }
285 }
286 /* }}} */
287
288 /* {{{ php_filter_email */
289 #define SAFE "$-_.+"
290 #define EXTRA "!*'(),"
291 #define NATIONAL "{}|\\^~[]`"
292 #define PUNCTUATION "<>#%\""
293 #define RESERVED ";/?:@&="
294
php_filter_email(PHP_INPUT_FILTER_PARAM_DECL)295 void php_filter_email(PHP_INPUT_FILTER_PARAM_DECL)
296 {
297 /* Check section 6 of rfc 822 http://www.faqs.org/rfcs/rfc822.html */
298 const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT "!#$%&'*+-=?^_`{|}~@.[]";
299 filter_map map;
300
301 filter_map_init(&map);
302 filter_map_update(&map, 1, allowed_list);
303 filter_map_apply(value, &map);
304 }
305 /* }}} */
306
307 /* {{{ php_filter_url */
php_filter_url(PHP_INPUT_FILTER_PARAM_DECL)308 void php_filter_url(PHP_INPUT_FILTER_PARAM_DECL)
309 {
310 /* Strip all chars not part of section 5 of
311 * http://www.faqs.org/rfcs/rfc1738.html */
312 const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT SAFE EXTRA NATIONAL PUNCTUATION RESERVED;
313 filter_map map;
314
315 filter_map_init(&map);
316 filter_map_update(&map, 1, allowed_list);
317 filter_map_apply(value, &map);
318 }
319 /* }}} */
320
321 /* {{{ php_filter_number_int */
php_filter_number_int(PHP_INPUT_FILTER_PARAM_DECL)322 void php_filter_number_int(PHP_INPUT_FILTER_PARAM_DECL)
323 {
324 /* strip everything [^0-9+-] */
325 const unsigned char allowed_list[] = "+-" DIGIT;
326 filter_map map;
327
328 filter_map_init(&map);
329 filter_map_update(&map, 1, allowed_list);
330 filter_map_apply(value, &map);
331 }
332 /* }}} */
333
334 /* {{{ php_filter_number_float */
php_filter_number_float(PHP_INPUT_FILTER_PARAM_DECL)335 void php_filter_number_float(PHP_INPUT_FILTER_PARAM_DECL)
336 {
337 /* strip everything [^0-9+-] */
338 const unsigned char allowed_list[] = "+-" DIGIT;
339 filter_map map;
340
341 filter_map_init(&map);
342 filter_map_update(&map, 1, allowed_list);
343
344 /* depending on flags, strip '.', 'e', ",", "'" */
345 if (flags & FILTER_FLAG_ALLOW_FRACTION) {
346 filter_map_update(&map, 2, (const unsigned char *) ".");
347 }
348 if (flags & FILTER_FLAG_ALLOW_THOUSAND) {
349 filter_map_update(&map, 3, (const unsigned char *) ",");
350 }
351 if (flags & FILTER_FLAG_ALLOW_SCIENTIFIC) {
352 filter_map_update(&map, 4, (const unsigned char *) "eE");
353 }
354 filter_map_apply(value, &map);
355 }
356 /* }}} */
357
358 /* {{{ php_filter_add_slashes */
php_filter_add_slashes(PHP_INPUT_FILTER_PARAM_DECL)359 void php_filter_add_slashes(PHP_INPUT_FILTER_PARAM_DECL)
360 {
361 zend_string *buf = php_addslashes(Z_STR_P(value));
362
363 zval_ptr_dtor(value);
364 ZVAL_STR(value, buf);
365 }
366 /* }}} */
367