1 /*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | https://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Authors: Derick Rethans <derick@php.net> |
14 +----------------------------------------------------------------------+
15 */
16
17 #include "php_filter.h"
18 #include "filter_private.h"
19 #include "ext/standard/php_string.h"
20 #include "ext/standard/html.h"
21 #include "zend_smart_str.h"
22
23 /* {{{ STRUCTS */
24 typedef unsigned long filter_map[256];
25 /* }}} */
26
27 /* {{{ HELPER FUNCTIONS */
php_filter_encode_html(zval * value,const unsigned char * chars)28 static void php_filter_encode_html(zval *value, const unsigned char *chars)
29 {
30 smart_str str = {0};
31 size_t len = Z_STRLEN_P(value);
32 unsigned char *s = (unsigned char *)Z_STRVAL_P(value);
33 unsigned char *e = s + len;
34
35 if (Z_STRLEN_P(value) == 0) {
36 return;
37 }
38
39 while (s < e) {
40 if (chars[*s]) {
41 smart_str_appendl(&str, "&#", 2);
42 smart_str_append_unsigned(&str, (zend_ulong)*s);
43 smart_str_appendc(&str, ';');
44 } else {
45 /* XXX: this needs to be optimized to work with blocks of 'safe' chars */
46 smart_str_appendc(&str, *s);
47 }
48 s++;
49 }
50
51 zval_ptr_dtor(value);
52 ZVAL_STR(value, smart_str_extract(&str));
53 }
54
55 static const unsigned char hexchars[] = "0123456789ABCDEF";
56
57 #define LOWALPHA "abcdefghijklmnopqrstuvwxyz"
58 #define HIALPHA "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
59 #define DIGIT "0123456789"
60
61 #define DEFAULT_URL_ENCODE LOWALPHA HIALPHA DIGIT "-._"
62
php_filter_encode_url(zval * value,const unsigned char * chars,const int char_len,int high,int low,int encode_nul)63 static void php_filter_encode_url(zval *value, const unsigned char* chars, const int char_len, int high, int low, int encode_nul)
64 {
65 unsigned char *p;
66 unsigned char tmp[256];
67 unsigned char *s = (unsigned char *)chars;
68 unsigned char *e = s + char_len;
69 zend_string *str;
70
71 memset(tmp, 1, sizeof(tmp)-1);
72
73 while (s < e) {
74 tmp[*s++] = '\0';
75 }
76
77 str = zend_string_safe_alloc(Z_STRLEN_P(value), 3, 0, 0);
78 p = (unsigned char *) ZSTR_VAL(str);
79 s = (unsigned char *) Z_STRVAL_P(value);
80 e = s + Z_STRLEN_P(value);
81
82 while (s < e) {
83 if (tmp[*s]) {
84 *p++ = '%';
85 *p++ = hexchars[(unsigned char) *s >> 4];
86 *p++ = hexchars[(unsigned char) *s & 15];
87 } else {
88 *p++ = *s;
89 }
90 s++;
91 }
92 *p = '\0';
93 ZSTR_LEN(str) = p - (unsigned char *)ZSTR_VAL(str);
94 zval_ptr_dtor(value);
95 ZVAL_NEW_STR(value, str);
96 }
97
php_filter_strip(zval * value,zend_long flags)98 static void php_filter_strip(zval *value, zend_long flags)
99 {
100 unsigned char *str;
101 size_t i;
102 size_t c;
103 zend_string *buf;
104
105 /* Optimization for if no strip flags are set */
106 if (!(flags & (FILTER_FLAG_STRIP_LOW | FILTER_FLAG_STRIP_HIGH | FILTER_FLAG_STRIP_BACKTICK))) {
107 return;
108 }
109
110 str = (unsigned char *)Z_STRVAL_P(value);
111 buf = zend_string_alloc(Z_STRLEN_P(value), 0);
112 c = 0;
113 for (i = 0; i < Z_STRLEN_P(value); i++) {
114 if ((str[i] >= 127) && (flags & FILTER_FLAG_STRIP_HIGH)) {
115 } else if ((str[i] < 32) && (flags & FILTER_FLAG_STRIP_LOW)) {
116 } else if ((str[i] == '`') && (flags & FILTER_FLAG_STRIP_BACKTICK)) {
117 } else {
118 ZSTR_VAL(buf)[c] = str[i];
119 ++c;
120 }
121 }
122 /* update zval string data */
123 ZSTR_VAL(buf)[c] = '\0';
124 ZSTR_LEN(buf) = c;
125 zval_ptr_dtor(value);
126 ZVAL_NEW_STR(value, buf);
127 }
128 /* }}} */
129
130 /* {{{ FILTER MAP HELPERS */
filter_map_init(filter_map * map)131 static void filter_map_init(filter_map *map)
132 {
133 memset(map, 0, sizeof(filter_map));
134 }
135
filter_map_update(filter_map * map,int flag,const unsigned char * allowed_list)136 static void filter_map_update(filter_map *map, int flag, const unsigned char *allowed_list)
137 {
138 size_t l, i;
139
140 l = strlen((const char*)allowed_list);
141 for (i = 0; i < l; ++i) {
142 (*map)[allowed_list[i]] = flag;
143 }
144 }
145
filter_map_apply(zval * value,filter_map * map)146 static void filter_map_apply(zval *value, filter_map *map)
147 {
148 unsigned char *str;
149 size_t i, c;
150 zend_string *buf;
151
152 str = (unsigned char *)Z_STRVAL_P(value);
153 buf = zend_string_alloc(Z_STRLEN_P(value), 0);
154 c = 0;
155 for (i = 0; i < Z_STRLEN_P(value); i++) {
156 if ((*map)[str[i]]) {
157 ZSTR_VAL(buf)[c] = str[i];
158 ++c;
159 }
160 }
161 /* update zval string data */
162 ZSTR_VAL(buf)[c] = '\0';
163 ZSTR_LEN(buf) = c;
164 zval_ptr_dtor(value);
165 ZVAL_NEW_STR(value, buf);
166 }
167 /* }}} */
168
169 /* {{{ php_filter_string */
php_filter_string(PHP_INPUT_FILTER_PARAM_DECL)170 void php_filter_string(PHP_INPUT_FILTER_PARAM_DECL)
171 {
172 size_t new_len;
173 unsigned char enc[256] = {0};
174
175 if (!Z_REFCOUNTED_P(value)) {
176 ZVAL_STRINGL(value, Z_STRVAL_P(value), Z_STRLEN_P(value));
177 }
178
179 /* strip high/strip low ( see flags )*/
180 php_filter_strip(value, flags);
181
182 if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
183 enc['\''] = enc['"'] = 1;
184 }
185 if (flags & FILTER_FLAG_ENCODE_AMP) {
186 enc['&'] = 1;
187 }
188 if (flags & FILTER_FLAG_ENCODE_LOW) {
189 memset(enc, 1, 32);
190 }
191 if (flags & FILTER_FLAG_ENCODE_HIGH) {
192 memset(enc + 127, 1, sizeof(enc) - 127);
193 }
194
195 php_filter_encode_html(value, enc);
196
197 /* strip tags, implicitly also removes \0 chars */
198 new_len = php_strip_tags_ex(Z_STRVAL_P(value), Z_STRLEN_P(value), NULL, 0, 1);
199 Z_STRLEN_P(value) = new_len;
200
201 if (new_len == 0) {
202 zval_ptr_dtor(value);
203 if (flags & FILTER_FLAG_EMPTY_STRING_NULL) {
204 ZVAL_NULL(value);
205 } else {
206 ZVAL_EMPTY_STRING(value);
207 }
208 return;
209 }
210 }
211 /* }}} */
212
213 /* {{{ php_filter_encoded */
php_filter_encoded(PHP_INPUT_FILTER_PARAM_DECL)214 void php_filter_encoded(PHP_INPUT_FILTER_PARAM_DECL)
215 {
216 /* apply strip_high and strip_low filters */
217 php_filter_strip(value, flags);
218 /* urlencode */
219 php_filter_encode_url(value, (unsigned char *)DEFAULT_URL_ENCODE, sizeof(DEFAULT_URL_ENCODE)-1, flags & FILTER_FLAG_ENCODE_HIGH, flags & FILTER_FLAG_ENCODE_LOW, 1);
220 }
221 /* }}} */
222
223 /* {{{ php_filter_special_chars */
php_filter_special_chars(PHP_INPUT_FILTER_PARAM_DECL)224 void php_filter_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
225 {
226 unsigned char enc[256] = {0};
227
228 php_filter_strip(value, flags);
229
230 /* encodes ' " < > & \0 to numerical entities */
231 enc['\''] = enc['"'] = enc['<'] = enc['>'] = enc['&'] = enc[0] = 1;
232
233 /* if strip low is not set, then we encode them as &#xx; */
234 memset(enc, 1, 32);
235
236 if (flags & FILTER_FLAG_ENCODE_HIGH) {
237 memset(enc + 127, 1, sizeof(enc) - 127);
238 }
239
240 php_filter_encode_html(value, enc);
241 }
242 /* }}} */
243
244 /* {{{ php_filter_full_special_chars */
php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL)245 void php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
246 {
247 zend_string *buf;
248 int quotes;
249
250 if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
251 quotes = ENT_QUOTES;
252 } else {
253 quotes = ENT_NOQUOTES;
254 }
255 buf = php_escape_html_entities_ex(
256 (unsigned char *) Z_STRVAL_P(value), Z_STRLEN_P(value), /* all */ 1, quotes,
257 /* charset_hint */ NULL, /* double_encode */ 0, /* quiet */ 0);
258 zval_ptr_dtor(value);
259 ZVAL_STR(value, buf);
260 }
261 /* }}} */
262
263 /* {{{ php_filter_unsafe_raw */
php_filter_unsafe_raw(PHP_INPUT_FILTER_PARAM_DECL)264 void php_filter_unsafe_raw(PHP_INPUT_FILTER_PARAM_DECL)
265 {
266 /* Only if no flags are set (optimization) */
267 if (flags != 0 && Z_STRLEN_P(value) > 0) {
268 unsigned char enc[256] = {0};
269
270 php_filter_strip(value, flags);
271
272 if (flags & FILTER_FLAG_ENCODE_AMP) {
273 enc['&'] = 1;
274 }
275 if (flags & FILTER_FLAG_ENCODE_LOW) {
276 memset(enc, 1, 32);
277 }
278 if (flags & FILTER_FLAG_ENCODE_HIGH) {
279 memset(enc + 127, 1, sizeof(enc) - 127);
280 }
281
282 php_filter_encode_html(value, enc);
283 } else if (flags & FILTER_FLAG_EMPTY_STRING_NULL && Z_STRLEN_P(value) == 0) {
284 zval_ptr_dtor(value);
285 ZVAL_NULL(value);
286 }
287 }
288 /* }}} */
289
290 /* {{{ php_filter_email */
291 #define SAFE "$-_.+"
292 #define EXTRA "!*'(),"
293 #define NATIONAL "{}|\\^~[]`"
294 #define PUNCTUATION "<>#%\""
295 #define RESERVED ";/?:@&="
296
php_filter_email(PHP_INPUT_FILTER_PARAM_DECL)297 void php_filter_email(PHP_INPUT_FILTER_PARAM_DECL)
298 {
299 /* Check section 6 of rfc 822 http://www.faqs.org/rfcs/rfc822.html */
300 const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT "!#$%&'*+-=?^_`{|}~@.[]";
301 filter_map map;
302
303 filter_map_init(&map);
304 filter_map_update(&map, 1, allowed_list);
305 filter_map_apply(value, &map);
306 }
307 /* }}} */
308
309 /* {{{ php_filter_url */
php_filter_url(PHP_INPUT_FILTER_PARAM_DECL)310 void php_filter_url(PHP_INPUT_FILTER_PARAM_DECL)
311 {
312 /* Strip all chars not part of section 5 of
313 * http://www.faqs.org/rfcs/rfc1738.html */
314 const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT SAFE EXTRA NATIONAL PUNCTUATION RESERVED;
315 filter_map map;
316
317 filter_map_init(&map);
318 filter_map_update(&map, 1, allowed_list);
319 filter_map_apply(value, &map);
320 }
321 /* }}} */
322
323 /* {{{ php_filter_number_int */
php_filter_number_int(PHP_INPUT_FILTER_PARAM_DECL)324 void php_filter_number_int(PHP_INPUT_FILTER_PARAM_DECL)
325 {
326 /* strip everything [^0-9+-] */
327 const unsigned char allowed_list[] = "+-" DIGIT;
328 filter_map map;
329
330 filter_map_init(&map);
331 filter_map_update(&map, 1, allowed_list);
332 filter_map_apply(value, &map);
333 }
334 /* }}} */
335
336 /* {{{ php_filter_number_float */
php_filter_number_float(PHP_INPUT_FILTER_PARAM_DECL)337 void php_filter_number_float(PHP_INPUT_FILTER_PARAM_DECL)
338 {
339 /* strip everything [^0-9+-] */
340 const unsigned char allowed_list[] = "+-" DIGIT;
341 filter_map map;
342
343 filter_map_init(&map);
344 filter_map_update(&map, 1, allowed_list);
345
346 /* depending on flags, strip '.', 'e', ",", "'" */
347 if (flags & FILTER_FLAG_ALLOW_FRACTION) {
348 filter_map_update(&map, 2, (const unsigned char *) ".");
349 }
350 if (flags & FILTER_FLAG_ALLOW_THOUSAND) {
351 filter_map_update(&map, 3, (const unsigned char *) ",");
352 }
353 if (flags & FILTER_FLAG_ALLOW_SCIENTIFIC) {
354 filter_map_update(&map, 4, (const unsigned char *) "eE");
355 }
356 filter_map_apply(value, &map);
357 }
358 /* }}} */
359
360 /* {{{ php_filter_add_slashes */
php_filter_add_slashes(PHP_INPUT_FILTER_PARAM_DECL)361 void php_filter_add_slashes(PHP_INPUT_FILTER_PARAM_DECL)
362 {
363 zend_string *buf = php_addslashes(Z_STR_P(value));
364
365 zval_ptr_dtor(value);
366 ZVAL_STR(value, buf);
367 }
368 /* }}} */
369