1 /*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | http://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Authors: Derick Rethans <derick@php.net> |
14 | Pierre-A. Joye <pierre@php.net> |
15 | Kévin Dunglas <dunglas@gmail.com> |
16 +----------------------------------------------------------------------+
17 */
18
19 #include "php_filter.h"
20 #include "filter_private.h"
21 #include "ext/standard/url.h"
22 #include "ext/pcre/php_pcre.h"
23
24 #include "zend_multiply.h"
25
26 #ifdef HAVE_ARPA_INET_H
27 # include <arpa/inet.h>
28 #endif
29
30 #ifndef INADDR_NONE
31 # define INADDR_NONE ((unsigned long int) -1)
32 #endif
33
34
35 /* {{{ FETCH_DOUBLE_OPTION(var_name, option_name) */
36 #define FETCH_DOUBLE_OPTION(var_name, option_name) \
37 var_name = 0; \
38 var_name##_set = 0; \
39 if (option_array) { \
40 if ((option_val = zend_hash_str_find(Z_ARRVAL_P(option_array), option_name, sizeof(option_name) - 1)) != NULL) { \
41 var_name = zval_get_double(option_val); \
42 var_name##_set = 1; \
43 } \
44 }
45 /* }}} */
46
47 /* {{{ FETCH_LONG_OPTION(var_name, option_name) */
48 #define FETCH_LONG_OPTION(var_name, option_name) \
49 var_name = 0; \
50 var_name##_set = 0; \
51 if (option_array) { \
52 if ((option_val = zend_hash_str_find(Z_ARRVAL_P(option_array), option_name, sizeof(option_name) - 1)) != NULL) { \
53 var_name = zval_get_long(option_val); \
54 var_name##_set = 1; \
55 } \
56 }
57 /* }}} */
58
59 /* {{{ FETCH_STRING_OPTION(var_name, option_name) */
60 #define FETCH_STRING_OPTION(var_name, option_name) \
61 var_name = NULL; \
62 var_name##_set = 0; \
63 var_name##_len = 0; \
64 if (option_array) { \
65 if ((option_val = zend_hash_str_find_deref(Z_ARRVAL_P(option_array), option_name, sizeof(option_name) - 1)) != NULL) { \
66 if (Z_TYPE_P(option_val) == IS_STRING) { \
67 var_name = Z_STRVAL_P(option_val); \
68 var_name##_len = Z_STRLEN_P(option_val); \
69 var_name##_set = 1; \
70 } \
71 } \
72 }
73 /* }}} */
74
75 /* {{{ FETCH_STR_OPTION(var_name, option_name) */
76 #define FETCH_STR_OPTION(var_name, option_name) \
77 var_name = NULL; \
78 var_name##_set = 0; \
79 if (option_array) { \
80 if ((option_val = zend_hash_str_find_deref(Z_ARRVAL_P(option_array), option_name, sizeof(option_name) - 1)) != NULL) { \
81 if (Z_TYPE_P(option_val) == IS_STRING) { \
82 var_name = Z_STR_P(option_val); \
83 var_name##_set = 1; \
84 } \
85 } \
86 }
87 /* }}} */
88
89 #define FORMAT_IPV4 4
90 #define FORMAT_IPV6 6
91
92 static int _php_filter_validate_ipv6(char *str, size_t str_len, int ip[8]);
93
php_filter_parse_int(const char * str,size_t str_len,zend_long * ret)94 static int php_filter_parse_int(const char *str, size_t str_len, zend_long *ret) { /* {{{ */
95 zend_long ctx_value;
96 int sign = 0, digit = 0;
97 const char *end = str + str_len;
98
99 switch (*str) {
100 case '-':
101 sign = 1;
102 case '+':
103 str++;
104 default:
105 break;
106 }
107
108 if (*str == '0' && str + 1 == end) {
109 /* Special cases: +0 and -0 */
110 return 1;
111 }
112
113 /* must start with 1..9*/
114 if (str < end && *str >= '1' && *str <= '9') {
115 ctx_value = ((sign)?-1:1) * ((*(str++)) - '0');
116 } else {
117 return -1;
118 }
119
120 if ((end - str > MAX_LENGTH_OF_LONG - 1) /* number too long */
121 || (SIZEOF_LONG == 4 && (end - str == MAX_LENGTH_OF_LONG - 1) && *str > '2')) {
122 /* overflow */
123 return -1;
124 }
125
126 while (str < end) {
127 if (*str >= '0' && *str <= '9') {
128 digit = (*(str++) - '0');
129 if ( (!sign) && ctx_value <= (ZEND_LONG_MAX-digit)/10 ) {
130 ctx_value = (ctx_value * 10) + digit;
131 } else if ( sign && ctx_value >= (ZEND_LONG_MIN+digit)/10) {
132 ctx_value = (ctx_value * 10) - digit;
133 } else {
134 return -1;
135 }
136 } else {
137 return -1;
138 }
139 }
140
141 *ret = ctx_value;
142 return 1;
143 }
144 /* }}} */
145
php_filter_parse_octal(const char * str,size_t str_len,zend_long * ret)146 static int php_filter_parse_octal(const char *str, size_t str_len, zend_long *ret) { /* {{{ */
147 zend_ulong ctx_value = 0;
148 const char *end = str + str_len;
149
150 while (str < end) {
151 if (*str >= '0' && *str <= '7') {
152 zend_ulong n = ((*(str++)) - '0');
153
154 if ((ctx_value > ((zend_ulong)(~(zend_long)0)) / 8) ||
155 ((ctx_value = ctx_value * 8) > ((zend_ulong)(~(zend_long)0)) - n)) {
156 return -1;
157 }
158 ctx_value += n;
159 } else {
160 return -1;
161 }
162 }
163
164 *ret = (zend_long)ctx_value;
165 return 1;
166 }
167 /* }}} */
168
php_filter_parse_hex(const char * str,size_t str_len,zend_long * ret)169 static int php_filter_parse_hex(const char *str, size_t str_len, zend_long *ret) { /* {{{ */
170 zend_ulong ctx_value = 0;
171 const char *end = str + str_len;
172 zend_ulong n;
173
174 while (str < end) {
175 if (*str >= '0' && *str <= '9') {
176 n = ((*(str++)) - '0');
177 } else if (*str >= 'a' && *str <= 'f') {
178 n = ((*(str++)) - ('a' - 10));
179 } else if (*str >= 'A' && *str <= 'F') {
180 n = ((*(str++)) - ('A' - 10));
181 } else {
182 return -1;
183 }
184 if ((ctx_value > ((zend_ulong)(~(zend_long)0)) / 16) ||
185 ((ctx_value = ctx_value * 16) > ((zend_ulong)(~(zend_long)0)) - n)) {
186 return -1;
187 }
188 ctx_value += n;
189 }
190
191 *ret = (zend_long)ctx_value;
192 return 1;
193 }
194 /* }}} */
195
php_filter_int(PHP_INPUT_FILTER_PARAM_DECL)196 void php_filter_int(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
197 {
198 zval *option_val;
199 zend_long min_range, max_range, option_flags;
200 int min_range_set, max_range_set;
201 int allow_octal = 0, allow_hex = 0;
202 size_t len;
203 int error = 0;
204 zend_long ctx_value;
205 char *p;
206
207 /* Parse options */
208 FETCH_LONG_OPTION(min_range, "min_range");
209 FETCH_LONG_OPTION(max_range, "max_range");
210 option_flags = flags;
211
212 len = Z_STRLEN_P(value);
213
214 if (len == 0) {
215 RETURN_VALIDATION_FAILED
216 }
217
218 if (option_flags & FILTER_FLAG_ALLOW_OCTAL) {
219 allow_octal = 1;
220 }
221
222 if (option_flags & FILTER_FLAG_ALLOW_HEX) {
223 allow_hex = 1;
224 }
225
226 /* Start the validating loop */
227 p = Z_STRVAL_P(value);
228 ctx_value = 0;
229
230 PHP_FILTER_TRIM_DEFAULT(p, len);
231
232 if (*p == '0') {
233 p++; len--;
234 if (allow_hex && (*p == 'x' || *p == 'X')) {
235 p++; len--;
236 if (len == 0) {
237 RETURN_VALIDATION_FAILED
238 }
239 if (php_filter_parse_hex(p, len, &ctx_value) < 0) {
240 error = 1;
241 }
242 } else if (allow_octal) {
243 if (php_filter_parse_octal(p, len, &ctx_value) < 0) {
244 error = 1;
245 }
246 } else if (len != 0) {
247 error = 1;
248 }
249 } else {
250 if (php_filter_parse_int(p, len, &ctx_value) < 0) {
251 error = 1;
252 }
253 }
254
255 if (error > 0 || (min_range_set && (ctx_value < min_range)) || (max_range_set && (ctx_value > max_range))) {
256 RETURN_VALIDATION_FAILED
257 } else {
258 zval_ptr_dtor(value);
259 ZVAL_LONG(value, ctx_value);
260 return;
261 }
262 }
263 /* }}} */
264
php_filter_boolean(PHP_INPUT_FILTER_PARAM_DECL)265 void php_filter_boolean(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
266 {
267 char *str = Z_STRVAL_P(value);
268 size_t len = Z_STRLEN_P(value);
269 int ret;
270
271 PHP_FILTER_TRIM_DEFAULT_EX(str, len, 0);
272
273 /* returns true for "1", "true", "on" and "yes"
274 * returns false for "0", "false", "off", "no", and ""
275 * null otherwise. */
276 switch (len) {
277 case 0:
278 ret = 0;
279 break;
280 case 1:
281 if (*str == '1') {
282 ret = 1;
283 } else if (*str == '0') {
284 ret = 0;
285 } else {
286 ret = -1;
287 }
288 break;
289 case 2:
290 if (strncasecmp(str, "on", 2) == 0) {
291 ret = 1;
292 } else if (strncasecmp(str, "no", 2) == 0) {
293 ret = 0;
294 } else {
295 ret = -1;
296 }
297 break;
298 case 3:
299 if (strncasecmp(str, "yes", 3) == 0) {
300 ret = 1;
301 } else if (strncasecmp(str, "off", 3) == 0) {
302 ret = 0;
303 } else {
304 ret = -1;
305 }
306 break;
307 case 4:
308 if (strncasecmp(str, "true", 4) == 0) {
309 ret = 1;
310 } else {
311 ret = -1;
312 }
313 break;
314 case 5:
315 if (strncasecmp(str, "false", 5) == 0) {
316 ret = 0;
317 } else {
318 ret = -1;
319 }
320 break;
321 default:
322 ret = -1;
323 }
324
325 if (ret == -1) {
326 RETURN_VALIDATION_FAILED
327 } else {
328 zval_ptr_dtor(value);
329 ZVAL_BOOL(value, ret);
330 }
331 }
332 /* }}} */
333
php_filter_float(PHP_INPUT_FILTER_PARAM_DECL)334 void php_filter_float(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
335 {
336 size_t len;
337 char *str, *end;
338 char *num, *p;
339 zval *option_val;
340 char *decimal;
341 int decimal_set;
342 size_t decimal_len;
343 char dec_sep = '.';
344 char *thousand;
345 int thousand_set;
346 size_t thousand_len;
347 char *tsd_sep;
348
349 zend_long lval;
350 double dval;
351 double min_range, max_range;
352 int min_range_set, max_range_set;
353
354 int first, n;
355
356 len = Z_STRLEN_P(value);
357 str = Z_STRVAL_P(value);
358
359 PHP_FILTER_TRIM_DEFAULT(str, len);
360 end = str + len;
361
362 FETCH_STRING_OPTION(decimal, "decimal");
363
364 if (decimal_set) {
365 if (decimal_len != 1) {
366 zend_value_error("%s(): \"decimal\" option must be one character long", get_active_function_name());
367 RETURN_VALIDATION_FAILED
368 } else {
369 dec_sep = *decimal;
370 }
371 }
372
373 FETCH_STRING_OPTION(thousand, "thousand");
374
375 if (thousand_set) {
376 if (thousand_len < 1) {
377 zend_value_error("%s(): \"thousand\" option cannot be empty", get_active_function_name());
378 RETURN_VALIDATION_FAILED
379 } else {
380 tsd_sep = thousand;
381 }
382 } else {
383 tsd_sep = "',.";
384 }
385
386 FETCH_DOUBLE_OPTION(min_range, "min_range");
387 FETCH_DOUBLE_OPTION(max_range, "max_range");
388
389 num = p = emalloc(len+1);
390 if (str < end && (*str == '+' || *str == '-')) {
391 *p++ = *str++;
392 }
393 first = 1;
394 while (1) {
395 n = 0;
396 while (str < end && *str >= '0' && *str <= '9') {
397 ++n;
398 *p++ = *str++;
399 }
400 if (str == end || *str == dec_sep || *str == 'e' || *str == 'E') {
401 if (!first && n != 3) {
402 goto error;
403 }
404 if (*str == dec_sep) {
405 *p++ = '.';
406 str++;
407 while (str < end && *str >= '0' && *str <= '9') {
408 *p++ = *str++;
409 }
410 }
411 if (*str == 'e' || *str == 'E') {
412 *p++ = *str++;
413 if (str < end && (*str == '+' || *str == '-')) {
414 *p++ = *str++;
415 }
416 while (str < end && *str >= '0' && *str <= '9') {
417 *p++ = *str++;
418 }
419 }
420 break;
421 }
422 if ((flags & FILTER_FLAG_ALLOW_THOUSAND) && strchr(tsd_sep, *str)) {
423 if (first?(n < 1 || n > 3):(n != 3)) {
424 goto error;
425 }
426 first = 0;
427 str++;
428 } else {
429 goto error;
430 }
431 }
432 if (str != end) {
433 goto error;
434 }
435 *p = 0;
436
437 switch (is_numeric_string(num, p - num, &lval, &dval, 0)) {
438 case IS_LONG:
439 if ((min_range_set && (lval < min_range)) || (max_range_set && (lval > max_range))) {
440 goto error;
441 }
442 zval_ptr_dtor(value);
443 ZVAL_DOUBLE(value, (double)lval);
444 break;
445 case IS_DOUBLE:
446 if ((!dval && p - num > 1 && strpbrk(num, "123456789")) || !zend_finite(dval)) {
447 goto error;
448 }
449 if ((min_range_set && (dval < min_range)) || (max_range_set && (dval > max_range))) {
450 goto error;
451 }
452 zval_ptr_dtor(value);
453 ZVAL_DOUBLE(value, dval);
454 break;
455 default:
456 error:
457 efree(num);
458 RETURN_VALIDATION_FAILED
459 }
460 efree(num);
461 }
462 /* }}} */
463
php_filter_validate_regexp(PHP_INPUT_FILTER_PARAM_DECL)464 void php_filter_validate_regexp(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
465 {
466 zval *option_val;
467 zend_string *regexp;
468 int regexp_set;
469 pcre2_code *re = NULL;
470 pcre2_match_data *match_data = NULL;
471 uint32_t capture_count;
472 int rc;
473
474 /* Parse options */
475 FETCH_STR_OPTION(regexp, "regexp");
476
477 if (!regexp_set) {
478 zend_value_error("%s(): \"regexp\" option is missing", get_active_function_name());
479 RETURN_VALIDATION_FAILED
480 }
481
482 re = pcre_get_compiled_regex(regexp, &capture_count);
483 if (!re) {
484 RETURN_VALIDATION_FAILED
485 }
486 match_data = php_pcre_create_match_data(capture_count, re);
487 if (!match_data) {
488 RETURN_VALIDATION_FAILED
489 }
490 rc = pcre2_match(re, (PCRE2_SPTR)Z_STRVAL_P(value), Z_STRLEN_P(value), 0, 0, match_data, php_pcre_mctx());
491 php_pcre_free_match_data(match_data);
492
493 /* 0 means that the vector is too small to hold all the captured substring offsets */
494 if (rc < 0) {
495 RETURN_VALIDATION_FAILED
496 }
497 }
498
_php_filter_validate_domain(char * domain,size_t len,zend_long flags)499 static int _php_filter_validate_domain(char * domain, size_t len, zend_long flags) /* {{{ */
500 {
501 char *e, *s, *t;
502 size_t l;
503 int hostname = flags & FILTER_FLAG_HOSTNAME;
504 unsigned char i = 1;
505
506 s = domain;
507 l = len;
508 e = domain + l;
509 t = e - 1;
510
511 /* Ignore trailing dot */
512 if (l > 0 && *t == '.') {
513 e = t;
514 l--;
515 }
516
517 /* The total length cannot exceed 253 characters (final dot not included) */
518 if (l > 253) {
519 return 0;
520 }
521
522 /* First char must be alphanumeric */
523 if(*s == '.' || (hostname && !isalnum((int)*(unsigned char *)s))) {
524 return 0;
525 }
526
527 while (s < e) {
528 if (*s == '.') {
529 /* The first and the last character of a label must be alphanumeric */
530 if (*(s + 1) == '.' || (hostname && (!isalnum((int)*(unsigned char *)(s - 1)) || !isalnum((int)*(unsigned char *)(s + 1))))) {
531 return 0;
532 }
533
534 /* Reset label length counter */
535 i = 1;
536 } else {
537 if (i > 63 || (hostname && *s != '-' && !isalnum((int)*(unsigned char *)s))) {
538 return 0;
539 }
540
541 i++;
542 }
543
544 s++;
545 }
546
547 return 1;
548 }
549 /* }}} */
550
php_filter_validate_domain(PHP_INPUT_FILTER_PARAM_DECL)551 void php_filter_validate_domain(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
552 {
553 if (!_php_filter_validate_domain(Z_STRVAL_P(value), Z_STRLEN_P(value), flags)) {
554 RETURN_VALIDATION_FAILED
555 }
556 }
557 /* }}} */
558
is_userinfo_valid(zend_string * str)559 static int is_userinfo_valid(zend_string *str)
560 {
561 const char *valid = "-._~!$&'()*+,;=:";
562 const char *p = ZSTR_VAL(str);
563 while (p - ZSTR_VAL(str) < ZSTR_LEN(str)) {
564 if (isalpha(*p) || isdigit(*p) || strchr(valid, *p)) {
565 p++;
566 } else if (*p == '%' && p - ZSTR_VAL(str) <= ZSTR_LEN(str) - 3 && isdigit(*(p+1)) && isxdigit(*(p+2))) {
567 p += 3;
568 } else {
569 return 0;
570 }
571 }
572 return 1;
573 }
574
php_filter_validate_url(PHP_INPUT_FILTER_PARAM_DECL)575 void php_filter_validate_url(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
576 {
577 php_url *url;
578 size_t old_len = Z_STRLEN_P(value);
579
580 php_filter_url(value, flags, option_array, charset);
581
582 if (Z_TYPE_P(value) != IS_STRING || old_len != Z_STRLEN_P(value)) {
583 RETURN_VALIDATION_FAILED
584 }
585
586 /* Use parse_url - if it returns false, we return NULL */
587 url = php_url_parse_ex(Z_STRVAL_P(value), Z_STRLEN_P(value));
588
589 if (url == NULL) {
590 RETURN_VALIDATION_FAILED
591 }
592
593 if (url->scheme != NULL &&
594 (zend_string_equals_literal_ci(url->scheme, "http") || zend_string_equals_literal_ci(url->scheme, "https"))) {
595 char *e, *s, *t;
596 size_t l;
597
598 if (url->host == NULL) {
599 goto bad_url;
600 }
601
602 s = ZSTR_VAL(url->host);
603 l = ZSTR_LEN(url->host);
604 e = s + l;
605 t = e - 1;
606
607 /* An IPv6 enclosed by square brackets is a valid hostname */
608 if (*s == '[' && *t == ']' && _php_filter_validate_ipv6((s + 1), l - 2, NULL)) {
609 php_url_free(url);
610 return;
611 }
612
613 // Validate domain
614 if (!_php_filter_validate_domain(ZSTR_VAL(url->host), l, FILTER_FLAG_HOSTNAME)) {
615 php_url_free(url);
616 RETURN_VALIDATION_FAILED
617 }
618 }
619
620 if (
621 url->scheme == NULL ||
622 /* some schemas allow the host to be empty */
623 (url->host == NULL && (strcmp(ZSTR_VAL(url->scheme), "mailto") && strcmp(ZSTR_VAL(url->scheme), "news") && strcmp(ZSTR_VAL(url->scheme), "file"))) ||
624 ((flags & FILTER_FLAG_PATH_REQUIRED) && url->path == NULL) || ((flags & FILTER_FLAG_QUERY_REQUIRED) && url->query == NULL)
625 ) {
626 bad_url:
627 php_url_free(url);
628 RETURN_VALIDATION_FAILED
629 }
630
631 if ((url->user != NULL && !is_userinfo_valid(url->user))
632 || (url->pass != NULL && !is_userinfo_valid(url->pass))
633 ) {
634 php_url_free(url);
635 RETURN_VALIDATION_FAILED
636
637 }
638
639 php_url_free(url);
640 }
641 /* }}} */
642
php_filter_validate_email(PHP_INPUT_FILTER_PARAM_DECL)643 void php_filter_validate_email(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
644 {
645 /*
646 * The regex below is based on a regex by Michael Rushton.
647 * However, it is not identical. I changed it to only consider routeable
648 * addresses as valid. Michael's regex considers a@b a valid address
649 * which conflicts with section 2.3.5 of RFC 5321 which states that:
650 *
651 * Only resolvable, fully-qualified domain names (FQDNs) are permitted
652 * when domain names are used in SMTP. In other words, names that can
653 * be resolved to MX RRs or address (i.e., A or AAAA) RRs (as discussed
654 * in Section 5) are permitted, as are CNAME RRs whose targets can be
655 * resolved, in turn, to MX or address RRs. Local nicknames or
656 * unqualified names MUST NOT be used.
657 *
658 * This regex does not handle comments and folding whitespace. While
659 * this is technically valid in an email address, these parts aren't
660 * actually part of the address itself.
661 *
662 * Michael's regex carries this copyright:
663 *
664 * Copyright © Michael Rushton 2009-10
665 * http://squiloople.com/
666 * Feel free to use and redistribute this code. But please keep this copyright notice.
667 *
668 */
669 pcre2_code *re = NULL;
670 pcre2_match_data *match_data = NULL;
671 uint32_t capture_count;
672 zend_string *sregexp;
673 int rc;
674 const char regexp0[] = "/^(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){255,})(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){65,}@)(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E\\pL\\pN]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F\\pL\\pN]|(?:\\x5C[\\x00-\\x7F]))*\\x22))(?:\\.(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E\\pL\\pN]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F\\pL\\pN]|(?:\\x5C[\\x00-\\x7F]))*\\x22)))*@(?:(?:(?!.*[^.]{64,})(?:(?:(?:xn--)?[a-z0-9]+(?:-+[a-z0-9]+)*\\.){1,126}){1,}(?:(?:[a-z][a-z0-9]*)|(?:(?:xn--)[a-z0-9]+))(?:-+[a-z0-9]+)*)|(?:\\[(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){7})|(?:(?!(?:.*[a-f0-9][:\\]]){7,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?)))|(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){5}:)|(?:(?!(?:.*[a-f0-9]:){5,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3}:)?)))?(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))(?:\\.(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))){3}))\\]))$/iDu";
675 const char regexp1[] = "/^(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){255,})(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){65,}@)(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F]|(?:\\x5C[\\x00-\\x7F]))*\\x22))(?:\\.(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F]|(?:\\x5C[\\x00-\\x7F]))*\\x22)))*@(?:(?:(?!.*[^.]{64,})(?:(?:(?:xn--)?[a-z0-9]+(?:-+[a-z0-9]+)*\\.){1,126}){1,}(?:(?:[a-z][a-z0-9]*)|(?:(?:xn--)[a-z0-9]+))(?:-+[a-z0-9]+)*)|(?:\\[(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){7})|(?:(?!(?:.*[a-f0-9][:\\]]){7,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?)))|(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){5}:)|(?:(?!(?:.*[a-f0-9]:){5,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3}:)?)))?(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))(?:\\.(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))){3}))\\]))$/iD";
676 const char *regexp;
677 size_t regexp_len;
678
679 if (flags & FILTER_FLAG_EMAIL_UNICODE) {
680 regexp = regexp0;
681 regexp_len = sizeof(regexp0) - 1;
682 } else {
683 regexp = regexp1;
684 regexp_len = sizeof(regexp1) - 1;
685 }
686
687 /* The maximum length of an e-mail address is 320 octets, per RFC 2821. */
688 if (Z_STRLEN_P(value) > 320) {
689 RETURN_VALIDATION_FAILED
690 }
691
692 sregexp = zend_string_init(regexp, regexp_len, 0);
693 re = pcre_get_compiled_regex(sregexp, &capture_count);
694 zend_string_release_ex(sregexp, 0);
695 if (!re) {
696 RETURN_VALIDATION_FAILED
697 }
698 match_data = php_pcre_create_match_data(capture_count, re);
699 if (!match_data) {
700 RETURN_VALIDATION_FAILED
701 }
702 rc = pcre2_match(re, (PCRE2_SPTR)Z_STRVAL_P(value), Z_STRLEN_P(value), 0, 0, match_data, php_pcre_mctx());
703 php_pcre_free_match_data(match_data);
704
705 /* 0 means that the vector is too small to hold all the captured substring offsets */
706 if (rc < 0) {
707 RETURN_VALIDATION_FAILED
708 }
709
710 }
711 /* }}} */
712
_php_filter_validate_ipv4(char * str,size_t str_len,int * ip)713 static int _php_filter_validate_ipv4(char *str, size_t str_len, int *ip) /* {{{ */
714 {
715 const char *end = str + str_len;
716 int num, m;
717 int n = 0;
718
719 while (str < end) {
720 int leading_zero;
721 if (*str < '0' || *str > '9') {
722 return 0;
723 }
724 leading_zero = (*str == '0');
725 m = 1;
726 num = ((*(str++)) - '0');
727 while (str < end && (*str >= '0' && *str <= '9')) {
728 num = num * 10 + ((*(str++)) - '0');
729 if (num > 255 || ++m > 3) {
730 return 0;
731 }
732 }
733 /* don't allow a leading 0; that introduces octal numbers,
734 * which we don't support */
735 if (leading_zero && (num != 0 || m > 1))
736 return 0;
737 ip[n++] = num;
738 if (n == 4) {
739 return str == end;
740 } else if (str >= end || *(str++) != '.') {
741 return 0;
742 }
743 }
744 return 0;
745 }
746 /* }}} */
747
_php_filter_validate_ipv6(char * str,size_t str_len,int ip[8])748 static int _php_filter_validate_ipv6(char *str, size_t str_len, int ip[8]) /* {{{ */
749 {
750 int compressed_pos = -1;
751 int blocks = 0;
752 int num, n, i;
753 char *ipv4;
754 char *end;
755 int ip4elm[4];
756 char *s = str;
757
758 if (!memchr(str, ':', str_len)) {
759 return 0;
760 }
761
762 /* check for bundled IPv4 */
763 ipv4 = memchr(str, '.', str_len);
764 if (ipv4) {
765 while (ipv4 > str && *(ipv4-1) != ':') {
766 ipv4--;
767 }
768
769 if (!_php_filter_validate_ipv4(ipv4, (str_len - (ipv4 - str)), ip4elm)) {
770 return 0;
771 }
772
773 str_len = ipv4 - str; /* length excluding ipv4 */
774 if (str_len < 2) {
775 return 0;
776 }
777
778 if (ipv4[-2] != ':') {
779 /* don't include : before ipv4 unless it's a :: */
780 str_len--;
781 }
782
783 blocks = 2;
784 }
785
786 end = str + str_len;
787
788 while (str < end) {
789 if (*str == ':') {
790 if (++str >= end) {
791 /* cannot end in : without previous : */
792 return 0;
793 }
794 if (*str == ':') {
795 if (compressed_pos >= 0) {
796 return 0;
797 }
798 if (ip && blocks < 8) {
799 ip[blocks] = -1;
800 }
801 compressed_pos = blocks++; /* :: means 1 or more 16-bit 0 blocks */
802 if (++str == end) {
803 if (blocks > 8) {
804 return 0;
805 }
806 goto fixup_ip;
807 }
808 } else if ((str - 1) == s) {
809 /* don't allow leading : without another : following */
810 return 0;
811 }
812 }
813 num = n = 0;
814 while (str < end) {
815 if (*str >= '0' && *str <= '9') {
816 num = 16 * num + (*str - '0');
817 } else if (*str >= 'a' && *str <= 'f') {
818 num = 16 * num + (*str - 'a') + 10;
819 } else if (*str >= 'A' && *str <= 'F') {
820 num = 16 * num + (*str - 'A') + 10;
821 } else {
822 break;
823 }
824 n++;
825 str++;
826 }
827 if (ip && blocks < 8) {
828 ip[blocks] = num;
829 }
830 if (n < 1 || n > 4) {
831 return 0;
832 }
833 if (++blocks > 8)
834 return 0;
835 }
836
837 fixup_ip:
838 if (ip && ipv4) {
839 for (i = 0; i < 5; i++) {
840 ip[i] = 0;
841 }
842 ip[i++] = 0xffff;
843 ip[i++] = 256 * ip4elm[0] + ip4elm[1];
844 ip[i++] = 256 * ip4elm[2] + ip4elm[3];
845 } else if (ip && compressed_pos >= 0 && blocks <= 8) {
846 int offset = 8 - blocks;
847 for (i = 7; i > compressed_pos + offset; i--) {
848 ip[i] = ip[i - offset];
849 }
850 for (i = compressed_pos + offset; i >= compressed_pos; i--) {
851 ip[i] = 0;
852 }
853 }
854
855 return (compressed_pos >= 0 && blocks <= 8) || blocks == 8;
856 }
857 /* }}} */
858
php_filter_validate_ip(PHP_INPUT_FILTER_PARAM_DECL)859 void php_filter_validate_ip(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
860 {
861 /* validates an ipv4 or ipv6 IP, based on the flag (4, 6, or both) add a
862 * flag to throw out reserved ranges; multicast ranges... etc. If both
863 * allow_ipv4 and allow_ipv6 flags flag are used, then the first dot or
864 * colon determine the format */
865
866 int ip[8];
867 int mode;
868
869 if (memchr(Z_STRVAL_P(value), ':', Z_STRLEN_P(value))) {
870 mode = FORMAT_IPV6;
871 } else if (memchr(Z_STRVAL_P(value), '.', Z_STRLEN_P(value))) {
872 mode = FORMAT_IPV4;
873 } else {
874 RETURN_VALIDATION_FAILED
875 }
876
877 if ((flags & FILTER_FLAG_IPV4) && (flags & FILTER_FLAG_IPV6)) {
878 /* Both formats are cool */
879 } else if ((flags & FILTER_FLAG_IPV4) && mode == FORMAT_IPV6) {
880 RETURN_VALIDATION_FAILED
881 } else if ((flags & FILTER_FLAG_IPV6) && mode == FORMAT_IPV4) {
882 RETURN_VALIDATION_FAILED
883 }
884
885 switch (mode) {
886 case FORMAT_IPV4:
887 if (!_php_filter_validate_ipv4(Z_STRVAL_P(value), Z_STRLEN_P(value), ip)) {
888 RETURN_VALIDATION_FAILED
889 }
890
891 /* Check flags */
892 if (flags & FILTER_FLAG_NO_PRIV_RANGE) {
893 if (
894 (ip[0] == 10) ||
895 (ip[0] == 172 && ip[1] >= 16 && ip[1] <= 31) ||
896 (ip[0] == 192 && ip[1] == 168)
897 ) {
898 RETURN_VALIDATION_FAILED
899 }
900 }
901
902 if (flags & FILTER_FLAG_NO_RES_RANGE) {
903 if (
904 (ip[0] == 0) ||
905 (ip[0] >= 240) ||
906 (ip[0] == 127) ||
907 (ip[0] == 169 && ip[1] == 254)
908 ) {
909 RETURN_VALIDATION_FAILED
910 }
911 }
912 break;
913
914 case FORMAT_IPV6:
915 {
916 int res = 0;
917 res = _php_filter_validate_ipv6(Z_STRVAL_P(value), Z_STRLEN_P(value), ip);
918 if (res < 1) {
919 RETURN_VALIDATION_FAILED
920 }
921 /* Check flags */
922 if (flags & FILTER_FLAG_NO_PRIV_RANGE) {
923 if (ip[0] >= 0xfc00 && ip[0] <= 0xfdff) {
924 RETURN_VALIDATION_FAILED
925 }
926 }
927 if (flags & FILTER_FLAG_NO_RES_RANGE) {
928 if ((ip[0] == 0 && ip[1] == 0 && ip[2] == 0 && ip[3] == 0
929 && ip[4] == 0 && ip[5] == 0 && ip[6] == 0 && (ip[7] == 0 || ip[7] == 1))
930 || (ip[0] == 0x5f)
931 || (ip[0] >= 0xfe80 && ip[0] <= 0xfebf)
932 || (ip[0] == 0x2001 && (ip[1] == 0x0db8 || (ip[1] >= 0x0010 && ip[1] <= 0x001f)))
933 || (ip[0] == 0x3ff3)
934 ) {
935 RETURN_VALIDATION_FAILED
936 }
937 }
938 }
939 break;
940 }
941 }
942 /* }}} */
943
php_filter_validate_mac(PHP_INPUT_FILTER_PARAM_DECL)944 void php_filter_validate_mac(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
945 {
946 char *input = Z_STRVAL_P(value);
947 size_t input_len = Z_STRLEN_P(value);
948 int tokens, length, i, offset, exp_separator_set;
949 size_t exp_separator_len;
950 char separator;
951 char *exp_separator;
952 zend_long ret = 0;
953 zval *option_val;
954
955 FETCH_STRING_OPTION(exp_separator, "separator");
956
957 if (exp_separator_set && exp_separator_len != 1) {
958 zend_value_error("%s(): \"separator\" option must be one character long", get_active_function_name());
959 RETURN_VALIDATION_FAILED;
960 }
961
962 if (14 == input_len) {
963 /* EUI-64 format: Four hexadecimal digits separated by dots. Less
964 * commonly used but valid nonetheless.
965 */
966 tokens = 3;
967 length = 4;
968 separator = '.';
969 } else if (17 == input_len && input[2] == '-') {
970 /* IEEE 802 format: Six hexadecimal digits separated by hyphens. */
971 tokens = 6;
972 length = 2;
973 separator = '-';
974 } else if (17 == input_len && input[2] == ':') {
975 /* IEEE 802 format: Six hexadecimal digits separated by colons. */
976 tokens = 6;
977 length = 2;
978 separator = ':';
979 } else {
980 RETURN_VALIDATION_FAILED;
981 }
982
983 if (exp_separator_set && separator != exp_separator[0]) {
984 RETURN_VALIDATION_FAILED;
985 }
986
987 /* Essentially what we now have is a set of tokens each consisting of
988 * a hexadecimal number followed by a separator character. (With the
989 * exception of the last token which does not have the separator.)
990 */
991 for (i = 0; i < tokens; i++) {
992 offset = i * (length + 1);
993
994 if (i < tokens - 1 && input[offset + length] != separator) {
995 /* The current token did not end with e.g. a "." */
996 RETURN_VALIDATION_FAILED
997 }
998 if (php_filter_parse_hex(input + offset, length, &ret) < 0) {
999 /* The current token is no valid hexadecimal digit */
1000 RETURN_VALIDATION_FAILED
1001 }
1002 }
1003 }
1004 /* }}} */
1005