1 /*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | https://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Authors: Derick Rethans <derick@php.net> |
14 | Pierre-A. Joye <pierre@php.net> |
15 | Kévin Dunglas <dunglas@gmail.com> |
16 +----------------------------------------------------------------------+
17 */
18
19 #include "php_filter.h"
20 #include "filter_private.h"
21 #include "ext/standard/url.h"
22 #include "ext/pcre/php_pcre.h"
23
24 #include "zend_multiply.h"
25
26 #ifdef HAVE_ARPA_INET_H
27 # include <arpa/inet.h>
28 #endif
29
30 #ifndef INADDR_NONE
31 # define INADDR_NONE ((unsigned long int) -1)
32 #endif
33
34
35 /* {{{ FETCH_DOUBLE_OPTION(var_name, option_name) */
36 #define FETCH_DOUBLE_OPTION(var_name, option_name) \
37 var_name = 0; \
38 var_name##_set = 0; \
39 if (option_array) { \
40 if ((option_val = zend_hash_str_find(Z_ARRVAL_P(option_array), option_name, sizeof(option_name) - 1)) != NULL) { \
41 var_name = zval_get_double(option_val); \
42 var_name##_set = 1; \
43 } \
44 }
45 /* }}} */
46
47 /* {{{ FETCH_LONG_OPTION(var_name, option_name) */
48 #define FETCH_LONG_OPTION(var_name, option_name) \
49 var_name = 0; \
50 var_name##_set = 0; \
51 if (option_array) { \
52 if ((option_val = zend_hash_str_find(Z_ARRVAL_P(option_array), option_name, sizeof(option_name) - 1)) != NULL) { \
53 var_name = zval_get_long(option_val); \
54 var_name##_set = 1; \
55 } \
56 }
57 /* }}} */
58
59 /* {{{ FETCH_STRING_OPTION(var_name, option_name) */
60 #define FETCH_STRING_OPTION(var_name, option_name) \
61 var_name = NULL; \
62 var_name##_set = 0; \
63 var_name##_len = 0; \
64 if (option_array) { \
65 if ((option_val = zend_hash_str_find_deref(Z_ARRVAL_P(option_array), option_name, sizeof(option_name) - 1)) != NULL) { \
66 if (Z_TYPE_P(option_val) == IS_STRING) { \
67 var_name = Z_STRVAL_P(option_val); \
68 var_name##_len = Z_STRLEN_P(option_val); \
69 var_name##_set = 1; \
70 } \
71 } \
72 }
73 /* }}} */
74
75 /* {{{ FETCH_STR_OPTION(var_name, option_name) */
76 #define FETCH_STR_OPTION(var_name, option_name) \
77 var_name = NULL; \
78 var_name##_set = 0; \
79 if (option_array) { \
80 if ((option_val = zend_hash_str_find_deref(Z_ARRVAL_P(option_array), option_name, sizeof(option_name) - 1)) != NULL) { \
81 if (Z_TYPE_P(option_val) == IS_STRING) { \
82 var_name = Z_STR_P(option_val); \
83 var_name##_set = 1; \
84 } \
85 } \
86 }
87 /* }}} */
88
89 #define FORMAT_IPV4 4
90 #define FORMAT_IPV6 6
91
92 static int _php_filter_validate_ipv6(const char *str, size_t str_len, int ip[8]);
93
php_filter_parse_int(const char * str,size_t str_len,zend_long * ret)94 static int php_filter_parse_int(const char *str, size_t str_len, zend_long *ret) { /* {{{ */
95 zend_long ctx_value;
96 int sign = 0, digit = 0;
97 const char *end = str + str_len;
98
99 switch (*str) {
100 case '-':
101 sign = 1;
102 ZEND_FALLTHROUGH;
103 case '+':
104 str++;
105 default:
106 break;
107 }
108
109 if (*str == '0' && str + 1 == end) {
110 /* Special cases: +0 and -0 */
111 return 1;
112 }
113
114 /* must start with 1..9*/
115 if (str < end && *str >= '1' && *str <= '9') {
116 ctx_value = ((sign)?-1:1) * ((*(str++)) - '0');
117 } else {
118 return -1;
119 }
120
121 if ((end - str > MAX_LENGTH_OF_LONG - 1) /* number too long */
122 || (SIZEOF_LONG == 4 && (end - str == MAX_LENGTH_OF_LONG - 1) && *str > '2')) {
123 /* overflow */
124 return -1;
125 }
126
127 while (str < end) {
128 if (*str >= '0' && *str <= '9') {
129 digit = (*(str++) - '0');
130 if ( (!sign) && ctx_value <= (ZEND_LONG_MAX-digit)/10 ) {
131 ctx_value = (ctx_value * 10) + digit;
132 } else if ( sign && ctx_value >= (ZEND_LONG_MIN+digit)/10) {
133 ctx_value = (ctx_value * 10) - digit;
134 } else {
135 return -1;
136 }
137 } else {
138 return -1;
139 }
140 }
141
142 *ret = ctx_value;
143 return 1;
144 }
145 /* }}} */
146
php_filter_parse_octal(const char * str,size_t str_len,zend_long * ret)147 static int php_filter_parse_octal(const char *str, size_t str_len, zend_long *ret) { /* {{{ */
148 zend_ulong ctx_value = 0;
149 const char *end = str + str_len;
150
151 while (str < end) {
152 if (*str >= '0' && *str <= '7') {
153 zend_ulong n = ((*(str++)) - '0');
154
155 if ((ctx_value > ((zend_ulong)(~(zend_long)0)) / 8) ||
156 ((ctx_value = ctx_value * 8) > ((zend_ulong)(~(zend_long)0)) - n)) {
157 return -1;
158 }
159 ctx_value += n;
160 } else {
161 return -1;
162 }
163 }
164
165 *ret = (zend_long)ctx_value;
166 return 1;
167 }
168 /* }}} */
169
php_filter_parse_hex(const char * str,size_t str_len,zend_long * ret)170 static int php_filter_parse_hex(const char *str, size_t str_len, zend_long *ret) { /* {{{ */
171 zend_ulong ctx_value = 0;
172 const char *end = str + str_len;
173 zend_ulong n;
174
175 while (str < end) {
176 if (*str >= '0' && *str <= '9') {
177 n = ((*(str++)) - '0');
178 } else if (*str >= 'a' && *str <= 'f') {
179 n = ((*(str++)) - ('a' - 10));
180 } else if (*str >= 'A' && *str <= 'F') {
181 n = ((*(str++)) - ('A' - 10));
182 } else {
183 return -1;
184 }
185 if ((ctx_value > ((zend_ulong)(~(zend_long)0)) / 16) ||
186 ((ctx_value = ctx_value * 16) > ((zend_ulong)(~(zend_long)0)) - n)) {
187 return -1;
188 }
189 ctx_value += n;
190 }
191
192 *ret = (zend_long)ctx_value;
193 return 1;
194 }
195 /* }}} */
196
php_filter_int(PHP_INPUT_FILTER_PARAM_DECL)197 void php_filter_int(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
198 {
199 zval *option_val;
200 zend_long min_range, max_range, option_flags;
201 int min_range_set, max_range_set;
202 int allow_octal = 0, allow_hex = 0;
203 size_t len;
204 int error = 0;
205 zend_long ctx_value;
206 char *p;
207
208 /* Parse options */
209 FETCH_LONG_OPTION(min_range, "min_range");
210 FETCH_LONG_OPTION(max_range, "max_range");
211 option_flags = flags;
212
213 len = Z_STRLEN_P(value);
214
215 if (len == 0) {
216 RETURN_VALIDATION_FAILED
217 }
218
219 if (option_flags & FILTER_FLAG_ALLOW_OCTAL) {
220 allow_octal = 1;
221 }
222
223 if (option_flags & FILTER_FLAG_ALLOW_HEX) {
224 allow_hex = 1;
225 }
226
227 /* Start the validating loop */
228 p = Z_STRVAL_P(value);
229 ctx_value = 0;
230
231 PHP_FILTER_TRIM_DEFAULT(p, len);
232
233 if (*p == '0') {
234 p++; len--;
235 if (allow_hex && (*p == 'x' || *p == 'X')) {
236 p++; len--;
237 if (len == 0) {
238 RETURN_VALIDATION_FAILED
239 }
240 if (php_filter_parse_hex(p, len, &ctx_value) < 0) {
241 error = 1;
242 }
243 } else if (allow_octal) {
244 /* Support explicit octal prefix notation */
245 if (*p == 'o' || *p == 'O') {
246 p++; len--;
247 if (len == 0) {
248 RETURN_VALIDATION_FAILED
249 }
250 }
251 if (php_filter_parse_octal(p, len, &ctx_value) < 0) {
252 error = 1;
253 }
254 } else if (len != 0) {
255 error = 1;
256 }
257 } else {
258 if (php_filter_parse_int(p, len, &ctx_value) < 0) {
259 error = 1;
260 }
261 }
262
263 if (error > 0 || (min_range_set && (ctx_value < min_range)) || (max_range_set && (ctx_value > max_range))) {
264 RETURN_VALIDATION_FAILED
265 } else {
266 zval_ptr_dtor(value);
267 ZVAL_LONG(value, ctx_value);
268 return;
269 }
270 }
271 /* }}} */
272
php_filter_boolean(PHP_INPUT_FILTER_PARAM_DECL)273 void php_filter_boolean(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
274 {
275 char *str = Z_STRVAL_P(value);
276 size_t len = Z_STRLEN_P(value);
277 int ret;
278
279 PHP_FILTER_TRIM_DEFAULT_EX(str, len, 0);
280
281 /* returns true for "1", "true", "on" and "yes"
282 * returns false for "0", "false", "off", "no", and ""
283 * null otherwise. */
284 switch (len) {
285 case 0:
286 ret = 0;
287 break;
288 case 1:
289 if (*str == '1') {
290 ret = 1;
291 } else if (*str == '0') {
292 ret = 0;
293 } else {
294 ret = -1;
295 }
296 break;
297 case 2:
298 if (strncasecmp(str, "on", 2) == 0) {
299 ret = 1;
300 } else if (strncasecmp(str, "no", 2) == 0) {
301 ret = 0;
302 } else {
303 ret = -1;
304 }
305 break;
306 case 3:
307 if (strncasecmp(str, "yes", 3) == 0) {
308 ret = 1;
309 } else if (strncasecmp(str, "off", 3) == 0) {
310 ret = 0;
311 } else {
312 ret = -1;
313 }
314 break;
315 case 4:
316 if (strncasecmp(str, "true", 4) == 0) {
317 ret = 1;
318 } else {
319 ret = -1;
320 }
321 break;
322 case 5:
323 if (strncasecmp(str, "false", 5) == 0) {
324 ret = 0;
325 } else {
326 ret = -1;
327 }
328 break;
329 default:
330 ret = -1;
331 }
332
333 if (ret == -1) {
334 RETURN_VALIDATION_FAILED
335 } else {
336 zval_ptr_dtor(value);
337 ZVAL_BOOL(value, ret);
338 }
339 }
340 /* }}} */
341
php_filter_float(PHP_INPUT_FILTER_PARAM_DECL)342 void php_filter_float(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
343 {
344 size_t len;
345 char *str, *end;
346 char *num, *p;
347 zval *option_val;
348 char *decimal;
349 int decimal_set;
350 size_t decimal_len;
351 char dec_sep = '.';
352 char *thousand;
353 int thousand_set;
354 size_t thousand_len;
355 char *tsd_sep;
356
357 zend_long lval;
358 double dval;
359 double min_range, max_range;
360 int min_range_set, max_range_set;
361
362 int first, n;
363
364 len = Z_STRLEN_P(value);
365 str = Z_STRVAL_P(value);
366
367 PHP_FILTER_TRIM_DEFAULT(str, len);
368 end = str + len;
369
370 FETCH_STRING_OPTION(decimal, "decimal");
371
372 if (decimal_set) {
373 if (decimal_len != 1) {
374 zend_value_error("%s(): \"decimal\" option must be one character long", get_active_function_name());
375 RETURN_VALIDATION_FAILED
376 } else {
377 dec_sep = *decimal;
378 }
379 }
380
381 FETCH_STRING_OPTION(thousand, "thousand");
382
383 if (thousand_set) {
384 if (thousand_len < 1) {
385 zend_value_error("%s(): \"thousand\" option cannot be empty", get_active_function_name());
386 RETURN_VALIDATION_FAILED
387 } else {
388 tsd_sep = thousand;
389 }
390 } else {
391 tsd_sep = "',.";
392 }
393
394 FETCH_DOUBLE_OPTION(min_range, "min_range");
395 FETCH_DOUBLE_OPTION(max_range, "max_range");
396
397 num = p = emalloc(len+1);
398 if (str < end && (*str == '+' || *str == '-')) {
399 *p++ = *str++;
400 }
401 first = 1;
402 while (1) {
403 n = 0;
404 while (str < end && *str >= '0' && *str <= '9') {
405 ++n;
406 *p++ = *str++;
407 }
408 if (str == end || *str == dec_sep || *str == 'e' || *str == 'E') {
409 if (!first && n != 3) {
410 goto error;
411 }
412 if (*str == dec_sep) {
413 *p++ = '.';
414 str++;
415 while (str < end && *str >= '0' && *str <= '9') {
416 *p++ = *str++;
417 }
418 }
419 if (*str == 'e' || *str == 'E') {
420 *p++ = *str++;
421 if (str < end && (*str == '+' || *str == '-')) {
422 *p++ = *str++;
423 }
424 while (str < end && *str >= '0' && *str <= '9') {
425 *p++ = *str++;
426 }
427 }
428 break;
429 }
430 if ((flags & FILTER_FLAG_ALLOW_THOUSAND) && strchr(tsd_sep, *str)) {
431 if (first?(n < 1 || n > 3):(n != 3)) {
432 goto error;
433 }
434 first = 0;
435 str++;
436 } else {
437 goto error;
438 }
439 }
440 if (str != end) {
441 goto error;
442 }
443 *p = 0;
444
445 switch (is_numeric_string(num, p - num, &lval, &dval, 0)) {
446 case IS_LONG:
447 if ((min_range_set && (lval < min_range)) || (max_range_set && (lval > max_range))) {
448 goto error;
449 }
450 zval_ptr_dtor(value);
451 ZVAL_DOUBLE(value, (double)lval);
452 break;
453 case IS_DOUBLE:
454 if ((!dval && p - num > 1 && strpbrk(num, "123456789")) || !zend_finite(dval)) {
455 goto error;
456 }
457 if ((min_range_set && (dval < min_range)) || (max_range_set && (dval > max_range))) {
458 goto error;
459 }
460 zval_ptr_dtor(value);
461 ZVAL_DOUBLE(value, dval);
462 break;
463 default:
464 error:
465 efree(num);
466 RETURN_VALIDATION_FAILED
467 }
468 efree(num);
469 }
470 /* }}} */
471
php_filter_validate_regexp(PHP_INPUT_FILTER_PARAM_DECL)472 void php_filter_validate_regexp(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
473 {
474 zval *option_val;
475 zend_string *regexp;
476 int regexp_set;
477 pcre2_code *re = NULL;
478 pcre2_match_data *match_data = NULL;
479 uint32_t capture_count;
480 int rc;
481
482 /* Parse options */
483 FETCH_STR_OPTION(regexp, "regexp");
484
485 if (!regexp_set) {
486 zend_value_error("%s(): \"regexp\" option is missing", get_active_function_name());
487 RETURN_VALIDATION_FAILED
488 }
489
490 re = pcre_get_compiled_regex(regexp, &capture_count);
491 if (!re) {
492 RETURN_VALIDATION_FAILED
493 }
494 match_data = php_pcre_create_match_data(capture_count, re);
495 if (!match_data) {
496 RETURN_VALIDATION_FAILED
497 }
498 rc = pcre2_match(re, (PCRE2_SPTR)Z_STRVAL_P(value), Z_STRLEN_P(value), 0, 0, match_data, php_pcre_mctx());
499 php_pcre_free_match_data(match_data);
500
501 /* 0 means that the vector is too small to hold all the captured substring offsets */
502 if (rc < 0) {
503 RETURN_VALIDATION_FAILED
504 }
505 }
506
_php_filter_validate_domain(char * domain,size_t len,zend_long flags)507 static int _php_filter_validate_domain(char * domain, size_t len, zend_long flags) /* {{{ */
508 {
509 char *e, *s, *t;
510 size_t l;
511 int hostname = flags & FILTER_FLAG_HOSTNAME;
512 unsigned char i = 1;
513
514 s = domain;
515 l = len;
516 e = domain + l;
517 t = e - 1;
518
519 /* Ignore trailing dot */
520 if (l > 0 && *t == '.') {
521 e = t;
522 l--;
523 }
524
525 /* The total length cannot exceed 253 characters (final dot not included) */
526 if (l > 253) {
527 return 0;
528 }
529
530 /* First char must be alphanumeric */
531 if(*s == '.' || (hostname && !isalnum((int)*(unsigned char *)s))) {
532 return 0;
533 }
534
535 while (s < e) {
536 if (*s == '.') {
537 /* The first and the last character of a label must be alphanumeric */
538 if (*(s + 1) == '.' || (hostname && (!isalnum((int)*(unsigned char *)(s - 1)) || !isalnum((int)*(unsigned char *)(s + 1))))) {
539 return 0;
540 }
541
542 /* Reset label length counter */
543 i = 1;
544 } else {
545 if (i > 63 || (hostname && (*s != '-' || *(s + 1) == '\0') && !isalnum((int)*(unsigned char *)s))) {
546 return 0;
547 }
548
549 i++;
550 }
551
552 s++;
553 }
554
555 return 1;
556 }
557 /* }}} */
558
php_filter_validate_domain(PHP_INPUT_FILTER_PARAM_DECL)559 void php_filter_validate_domain(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
560 {
561 if (!_php_filter_validate_domain(Z_STRVAL_P(value), Z_STRLEN_P(value), flags)) {
562 RETURN_VALIDATION_FAILED
563 }
564 }
565 /* }}} */
566
is_userinfo_valid(zend_string * str)567 static int is_userinfo_valid(zend_string *str)
568 {
569 const char *valid = "-._~!$&'()*+,;=:";
570 const char *p = ZSTR_VAL(str);
571 while (p - ZSTR_VAL(str) < ZSTR_LEN(str)) {
572 if (isalpha(*p) || isdigit(*p) || strchr(valid, *p)) {
573 p++;
574 } else if (*p == '%' && p - ZSTR_VAL(str) <= ZSTR_LEN(str) - 3 && isdigit(*(p+1)) && isxdigit(*(p+2))) {
575 p += 3;
576 } else {
577 return 0;
578 }
579 }
580 return 1;
581 }
582
php_filter_is_valid_ipv6_hostname(const char * s,size_t l)583 static bool php_filter_is_valid_ipv6_hostname(const char *s, size_t l)
584 {
585 const char *e = s + l;
586 const char *t = e - 1;
587
588 return *s == '[' && *t == ']' && _php_filter_validate_ipv6(s + 1, l - 2, NULL);
589 }
590
php_filter_validate_url(PHP_INPUT_FILTER_PARAM_DECL)591 void php_filter_validate_url(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
592 {
593 php_url *url;
594 size_t old_len = Z_STRLEN_P(value);
595
596 php_filter_url(value, flags, option_array, charset);
597
598 if (Z_TYPE_P(value) != IS_STRING || old_len != Z_STRLEN_P(value)) {
599 RETURN_VALIDATION_FAILED
600 }
601
602 /* Use parse_url - if it returns false, we return NULL */
603 url = php_url_parse_ex(Z_STRVAL_P(value), Z_STRLEN_P(value));
604
605 if (url == NULL) {
606 RETURN_VALIDATION_FAILED
607 }
608
609 if (url->scheme != NULL &&
610 (zend_string_equals_literal_ci(url->scheme, "http") || zend_string_equals_literal_ci(url->scheme, "https"))) {
611 const char *s;
612 size_t l;
613
614 if (url->host == NULL) {
615 goto bad_url;
616 }
617
618 s = ZSTR_VAL(url->host);
619 l = ZSTR_LEN(url->host);
620
621 if (
622 /* An IPv6 enclosed by square brackets is a valid hostname.*/
623 !php_filter_is_valid_ipv6_hostname(s, l) &&
624 /* Validate domain.
625 * This includes a loose check for an IPv4 address. */
626 !_php_filter_validate_domain(ZSTR_VAL(url->host), l, FILTER_FLAG_HOSTNAME)
627 ) {
628 php_url_free(url);
629 RETURN_VALIDATION_FAILED
630 }
631 }
632
633 if (
634 url->scheme == NULL ||
635 /* some schemas allow the host to be empty */
636 (url->host == NULL && (!zend_string_equals_literal(url->scheme, "mailto") && !zend_string_equals_literal(url->scheme, "news") && !zend_string_equals_literal(url->scheme, "file"))) ||
637 ((flags & FILTER_FLAG_PATH_REQUIRED) && url->path == NULL) || ((flags & FILTER_FLAG_QUERY_REQUIRED) && url->query == NULL)
638 ) {
639 bad_url:
640 php_url_free(url);
641 RETURN_VALIDATION_FAILED
642 }
643
644 if ((url->user != NULL && !is_userinfo_valid(url->user))
645 || (url->pass != NULL && !is_userinfo_valid(url->pass))
646 ) {
647 php_url_free(url);
648 RETURN_VALIDATION_FAILED
649
650 }
651
652 php_url_free(url);
653 }
654 /* }}} */
655
php_filter_validate_email(PHP_INPUT_FILTER_PARAM_DECL)656 void php_filter_validate_email(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
657 {
658 /*
659 * The regex below is based on a regex by Michael Rushton.
660 * However, it is not identical. I changed it to only consider routeable
661 * addresses as valid. Michael's regex considers a@b a valid address
662 * which conflicts with section 2.3.5 of RFC 5321 which states that:
663 *
664 * Only resolvable, fully-qualified domain names (FQDNs) are permitted
665 * when domain names are used in SMTP. In other words, names that can
666 * be resolved to MX RRs or address (i.e., A or AAAA) RRs (as discussed
667 * in Section 5) are permitted, as are CNAME RRs whose targets can be
668 * resolved, in turn, to MX or address RRs. Local nicknames or
669 * unqualified names MUST NOT be used.
670 *
671 * This regex does not handle comments and folding whitespace. While
672 * this is technically valid in an email address, these parts aren't
673 * actually part of the address itself.
674 *
675 * Michael's regex carries this copyright:
676 *
677 * Copyright © Michael Rushton 2009-10
678 * http://squiloople.com/
679 * Feel free to use and redistribute this code. But please keep this copyright notice.
680 *
681 */
682 pcre2_code *re = NULL;
683 pcre2_match_data *match_data = NULL;
684 uint32_t capture_count;
685 zend_string *sregexp;
686 int rc;
687 const char regexp0[] = "/^(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){255,})(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){65,}@)(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E\\pL\\pN]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F\\pL\\pN]|(?:\\x5C[\\x00-\\x7F]))*\\x22))(?:\\.(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E\\pL\\pN]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F\\pL\\pN]|(?:\\x5C[\\x00-\\x7F]))*\\x22)))*@(?:(?:(?!.*[^.]{64,})(?:(?:(?:xn--)?[a-z0-9]+(?:-+[a-z0-9]+)*\\.){1,126}){1,}(?:(?:[a-z][a-z0-9]*)|(?:(?:xn--)[a-z0-9]+))(?:-+[a-z0-9]+)*)|(?:\\[(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){7})|(?:(?!(?:.*[a-f0-9][:\\]]){7,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?)))|(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){5}:)|(?:(?!(?:.*[a-f0-9]:){5,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3}:)?)))?(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))(?:\\.(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))){3}))\\]))$/iDu";
688 const char regexp1[] = "/^(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){255,})(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){65,}@)(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F]|(?:\\x5C[\\x00-\\x7F]))*\\x22))(?:\\.(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F]|(?:\\x5C[\\x00-\\x7F]))*\\x22)))*@(?:(?:(?!.*[^.]{64,})(?:(?:(?:xn--)?[a-z0-9]+(?:-+[a-z0-9]+)*\\.){1,126}){1,}(?:(?:[a-z][a-z0-9]*)|(?:(?:xn--)[a-z0-9]+))(?:-+[a-z0-9]+)*)|(?:\\[(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){7})|(?:(?!(?:.*[a-f0-9][:\\]]){7,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?)))|(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){5}:)|(?:(?!(?:.*[a-f0-9]:){5,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3}:)?)))?(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))(?:\\.(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))){3}))\\]))$/iD";
689 const char *regexp;
690 size_t regexp_len;
691
692 if (flags & FILTER_FLAG_EMAIL_UNICODE) {
693 regexp = regexp0;
694 regexp_len = sizeof(regexp0) - 1;
695 } else {
696 regexp = regexp1;
697 regexp_len = sizeof(regexp1) - 1;
698 }
699
700 /* The maximum length of an e-mail address is 320 octets, per RFC 2821. */
701 if (Z_STRLEN_P(value) > 320) {
702 RETURN_VALIDATION_FAILED
703 }
704
705 sregexp = zend_string_init(regexp, regexp_len, 0);
706 re = pcre_get_compiled_regex(sregexp, &capture_count);
707 zend_string_release_ex(sregexp, 0);
708 if (!re) {
709 RETURN_VALIDATION_FAILED
710 }
711 match_data = php_pcre_create_match_data(capture_count, re);
712 if (!match_data) {
713 RETURN_VALIDATION_FAILED
714 }
715 rc = pcre2_match(re, (PCRE2_SPTR)Z_STRVAL_P(value), Z_STRLEN_P(value), 0, 0, match_data, php_pcre_mctx());
716 php_pcre_free_match_data(match_data);
717
718 /* 0 means that the vector is too small to hold all the captured substring offsets */
719 if (rc < 0) {
720 RETURN_VALIDATION_FAILED
721 }
722
723 }
724 /* }}} */
725
_php_filter_validate_ipv4(char * str,size_t str_len,int * ip)726 static int _php_filter_validate_ipv4(char *str, size_t str_len, int *ip) /* {{{ */
727 {
728 const char *end = str + str_len;
729 int num, m;
730 int n = 0;
731
732 while (str < end) {
733 int leading_zero;
734 if (*str < '0' || *str > '9') {
735 return 0;
736 }
737 leading_zero = (*str == '0');
738 m = 1;
739 num = ((*(str++)) - '0');
740 while (str < end && (*str >= '0' && *str <= '9')) {
741 num = num * 10 + ((*(str++)) - '0');
742 if (num > 255 || ++m > 3) {
743 return 0;
744 }
745 }
746 /* don't allow a leading 0; that introduces octal numbers,
747 * which we don't support */
748 if (leading_zero && (num != 0 || m > 1))
749 return 0;
750 ip[n++] = num;
751 if (n == 4) {
752 return str == end;
753 } else if (str >= end || *(str++) != '.') {
754 return 0;
755 }
756 }
757 return 0;
758 }
759 /* }}} */
760
_php_filter_validate_ipv6(const char * str,size_t str_len,int ip[8])761 static int _php_filter_validate_ipv6(const char *str, size_t str_len, int ip[8]) /* {{{ */
762 {
763 int compressed_pos = -1;
764 int blocks = 0;
765 int num, n, i;
766 char *ipv4;
767 const char *end;
768 int ip4elm[4];
769 const char *s = str;
770
771 if (!memchr(str, ':', str_len)) {
772 return 0;
773 }
774
775 /* check for bundled IPv4 */
776 ipv4 = memchr(str, '.', str_len);
777 if (ipv4) {
778 while (ipv4 > str && *(ipv4-1) != ':') {
779 ipv4--;
780 }
781
782 if (!_php_filter_validate_ipv4(ipv4, (str_len - (ipv4 - str)), ip4elm)) {
783 return 0;
784 }
785
786 str_len = ipv4 - str; /* length excluding ipv4 */
787 if (str_len < 2) {
788 return 0;
789 }
790
791 if (ipv4[-2] != ':') {
792 /* don't include : before ipv4 unless it's a :: */
793 str_len--;
794 }
795
796 blocks = 2;
797 }
798
799 end = str + str_len;
800
801 while (str < end) {
802 if (*str == ':') {
803 if (++str >= end) {
804 /* cannot end in : without previous : */
805 return 0;
806 }
807 if (*str == ':') {
808 if (compressed_pos >= 0) {
809 return 0;
810 }
811 if (ip && blocks < 8) {
812 ip[blocks] = -1;
813 }
814 compressed_pos = blocks++; /* :: means 1 or more 16-bit 0 blocks */
815 if (++str == end) {
816 if (blocks > 8) {
817 return 0;
818 }
819 goto fixup_ip;
820 }
821 } else if ((str - 1) == s) {
822 /* don't allow leading : without another : following */
823 return 0;
824 }
825 }
826 num = n = 0;
827 while (str < end) {
828 if (*str >= '0' && *str <= '9') {
829 num = 16 * num + (*str - '0');
830 } else if (*str >= 'a' && *str <= 'f') {
831 num = 16 * num + (*str - 'a') + 10;
832 } else if (*str >= 'A' && *str <= 'F') {
833 num = 16 * num + (*str - 'A') + 10;
834 } else {
835 break;
836 }
837 n++;
838 str++;
839 }
840 if (ip && blocks < 8) {
841 ip[blocks] = num;
842 }
843 if (n < 1 || n > 4) {
844 return 0;
845 }
846 if (++blocks > 8)
847 return 0;
848 }
849
850 fixup_ip:
851 if (ip && ipv4) {
852 for (i = 0; i < 5; i++) {
853 ip[i] = 0;
854 }
855 ip[i++] = 0xffff;
856 ip[i++] = 256 * ip4elm[0] + ip4elm[1];
857 ip[i++] = 256 * ip4elm[2] + ip4elm[3];
858 } else if (ip && compressed_pos >= 0 && blocks <= 8) {
859 int offset = 8 - blocks;
860 for (i = 7; i > compressed_pos + offset; i--) {
861 ip[i] = ip[i - offset];
862 }
863 for (i = compressed_pos + offset; i >= compressed_pos; i--) {
864 ip[i] = 0;
865 }
866 }
867
868 return (compressed_pos >= 0 && blocks <= 8) || blocks == 8;
869 }
870 /* }}} */
871
php_filter_validate_ip(PHP_INPUT_FILTER_PARAM_DECL)872 void php_filter_validate_ip(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
873 {
874 /* validates an ipv4 or ipv6 IP, based on the flag (4, 6, or both) add a
875 * flag to throw out reserved ranges; multicast ranges... etc. If both
876 * allow_ipv4 and allow_ipv6 flags flag are used, then the first dot or
877 * colon determine the format */
878
879 int ip[8];
880 int mode;
881
882 if (memchr(Z_STRVAL_P(value), ':', Z_STRLEN_P(value))) {
883 mode = FORMAT_IPV6;
884 } else if (memchr(Z_STRVAL_P(value), '.', Z_STRLEN_P(value))) {
885 mode = FORMAT_IPV4;
886 } else {
887 RETURN_VALIDATION_FAILED
888 }
889
890 if ((flags & FILTER_FLAG_IPV4) && (flags & FILTER_FLAG_IPV6)) {
891 /* Both formats are cool */
892 } else if ((flags & FILTER_FLAG_IPV4) && mode == FORMAT_IPV6) {
893 RETURN_VALIDATION_FAILED
894 } else if ((flags & FILTER_FLAG_IPV6) && mode == FORMAT_IPV4) {
895 RETURN_VALIDATION_FAILED
896 }
897
898 switch (mode) {
899 case FORMAT_IPV4:
900 if (!_php_filter_validate_ipv4(Z_STRVAL_P(value), Z_STRLEN_P(value), ip)) {
901 RETURN_VALIDATION_FAILED
902 }
903
904 /* Check flags */
905 if (flags & FILTER_FLAG_NO_PRIV_RANGE || flags & FILTER_FLAG_GLOBAL_RANGE) {
906 if (
907 (ip[0] == 10) ||
908 (ip[0] == 172 && ip[1] >= 16 && ip[1] <= 31) ||
909 (ip[0] == 192 && ip[1] == 168)
910 ) {
911 RETURN_VALIDATION_FAILED
912 }
913 }
914
915 if (flags & FILTER_FLAG_NO_RES_RANGE || flags & FILTER_FLAG_GLOBAL_RANGE) {
916 if (
917 (ip[0] == 0) ||
918 (ip[0] >= 240) ||
919 (ip[0] == 127) ||
920 (ip[0] == 169 && ip[1] == 254)
921 ) {
922 RETURN_VALIDATION_FAILED
923 }
924 }
925
926 if (flags & FILTER_FLAG_GLOBAL_RANGE) {
927 if (
928 (ip[0] == 100 && ip[1] >= 64 && ip[1] <= 127 ) ||
929 (ip[0] == 192 && ip[1] == 0 && ip[2] == 0 ) ||
930 (ip[0] == 192 && ip[1] == 0 && ip[2] == 2 ) ||
931 (ip[0] == 198 && ip[1] >= 18 && ip[1] <= 19 ) ||
932 (ip[0] == 198 && ip[1] == 51 && ip[2] == 100 ) ||
933 (ip[0] == 203 && ip[1] == 0 && ip[2] == 113 )
934 ) {
935 RETURN_VALIDATION_FAILED
936 }
937 }
938
939 break;
940
941 case FORMAT_IPV6:
942 {
943 int res = 0;
944 res = _php_filter_validate_ipv6(Z_STRVAL_P(value), Z_STRLEN_P(value), ip);
945 if (res < 1) {
946 RETURN_VALIDATION_FAILED
947 }
948 /* Check flags */
949 if (flags & FILTER_FLAG_NO_PRIV_RANGE || flags & FILTER_FLAG_GLOBAL_RANGE) {
950 if (ip[0] >= 0xfc00 && ip[0] <= 0xfdff) {
951 RETURN_VALIDATION_FAILED
952 }
953 }
954 if (flags & FILTER_FLAG_NO_RES_RANGE || flags & FILTER_FLAG_GLOBAL_RANGE) {
955 if ((ip[0] == 0 && ip[1] == 0 && ip[2] == 0 && ip[3] == 0
956 && ip[4] == 0 && ip[5] == 0 && ip[6] == 0 && (ip[7] == 0 || ip[7] == 1))
957 || (ip[0] == 0x5f)
958 || (ip[0] >= 0xfe80 && ip[0] <= 0xfebf)
959 || (ip[0] == 0x2001 && (ip[1] == 0x0db8 || (ip[1] >= 0x0010 && ip[1] <= 0x001f)))
960 || (ip[0] == 0x3ff3)
961 ) {
962 RETURN_VALIDATION_FAILED
963 }
964 }
965 if (flags & FILTER_FLAG_GLOBAL_RANGE) {
966 if ((ip[0] == 0 && ip[1] == 0 && ip[2] == 0 && ip[3] == 0 && ip[4] == 0 && ip[5] == 0xffff) ||
967 (ip[0] == 0x0100 && ip[1] == 0 && ip[2] == 0 && ip[3] == 0) ||
968 (ip[0] == 0x2001 && ip[1] <= 0x01ff) ||
969 (ip[0] == 0x2001 && ip[1] == 0x0002 && ip[2] == 0) ||
970 (ip[0] >= 0xfc00 && ip[0] <= 0xfdff)
971 ) {
972 RETURN_VALIDATION_FAILED
973 }
974 }
975 }
976 break;
977 }
978 }
979 /* }}} */
980
php_filter_validate_mac(PHP_INPUT_FILTER_PARAM_DECL)981 void php_filter_validate_mac(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
982 {
983 char *input = Z_STRVAL_P(value);
984 size_t input_len = Z_STRLEN_P(value);
985 int tokens, length, i, offset, exp_separator_set;
986 size_t exp_separator_len;
987 char separator;
988 char *exp_separator;
989 zend_long ret = 0;
990 zval *option_val;
991
992 FETCH_STRING_OPTION(exp_separator, "separator");
993
994 if (exp_separator_set && exp_separator_len != 1) {
995 zend_value_error("%s(): \"separator\" option must be one character long", get_active_function_name());
996 RETURN_VALIDATION_FAILED;
997 }
998
999 if (14 == input_len) {
1000 /* EUI-64 format: Four hexadecimal digits separated by dots. Less
1001 * commonly used but valid nonetheless.
1002 */
1003 tokens = 3;
1004 length = 4;
1005 separator = '.';
1006 } else if (17 == input_len && input[2] == '-') {
1007 /* IEEE 802 format: Six hexadecimal digits separated by hyphens. */
1008 tokens = 6;
1009 length = 2;
1010 separator = '-';
1011 } else if (17 == input_len && input[2] == ':') {
1012 /* IEEE 802 format: Six hexadecimal digits separated by colons. */
1013 tokens = 6;
1014 length = 2;
1015 separator = ':';
1016 } else {
1017 RETURN_VALIDATION_FAILED;
1018 }
1019
1020 if (exp_separator_set && separator != exp_separator[0]) {
1021 RETURN_VALIDATION_FAILED;
1022 }
1023
1024 /* Essentially what we now have is a set of tokens each consisting of
1025 * a hexadecimal number followed by a separator character. (With the
1026 * exception of the last token which does not have the separator.)
1027 */
1028 for (i = 0; i < tokens; i++) {
1029 offset = i * (length + 1);
1030
1031 if (i < tokens - 1 && input[offset + length] != separator) {
1032 /* The current token did not end with e.g. a "." */
1033 RETURN_VALIDATION_FAILED
1034 }
1035 if (php_filter_parse_hex(input + offset, length, &ret) < 0) {
1036 /* The current token is no valid hexadecimal digit */
1037 RETURN_VALIDATION_FAILED
1038 }
1039 }
1040 }
1041 /* }}} */
1042