1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 7 |
4 +----------------------------------------------------------------------+
5 | Copyright (c) The PHP Group |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Authors: Derick Rethans <derick@php.net> |
16 | Pierre-A. Joye <pierre@php.net> |
17 | Kévin Dunglas <dunglas@gmail.com> |
18 +----------------------------------------------------------------------+
19 */
20
21 #include "php_filter.h"
22 #include "filter_private.h"
23 #include "ext/standard/url.h"
24 #include "ext/pcre/php_pcre.h"
25
26 #include "zend_multiply.h"
27
28 #if HAVE_ARPA_INET_H
29 # include <arpa/inet.h>
30 #endif
31
32 #ifndef INADDR_NONE
33 # define INADDR_NONE ((unsigned long int) -1)
34 #endif
35
36
37 /* {{{ FETCH_DOUBLE_OPTION(var_name, option_name) */
38 #define FETCH_DOUBLE_OPTION(var_name, option_name) \
39 var_name = 0; \
40 var_name##_set = 0; \
41 if (option_array) { \
42 if ((option_val = zend_hash_str_find(Z_ARRVAL_P(option_array), option_name, sizeof(option_name) - 1)) != NULL) { \
43 var_name = zval_get_double(option_val); \
44 var_name##_set = 1; \
45 } \
46 }
47 /* }}} */
48
49 /* {{{ FETCH_LONG_OPTION(var_name, option_name) */
50 #define FETCH_LONG_OPTION(var_name, option_name) \
51 var_name = 0; \
52 var_name##_set = 0; \
53 if (option_array) { \
54 if ((option_val = zend_hash_str_find(Z_ARRVAL_P(option_array), option_name, sizeof(option_name) - 1)) != NULL) { \
55 var_name = zval_get_long(option_val); \
56 var_name##_set = 1; \
57 } \
58 }
59 /* }}} */
60
61 /* {{{ FETCH_STRING_OPTION(var_name, option_name) */
62 #define FETCH_STRING_OPTION(var_name, option_name) \
63 var_name = NULL; \
64 var_name##_set = 0; \
65 var_name##_len = 0; \
66 if (option_array) { \
67 if ((option_val = zend_hash_str_find_deref(Z_ARRVAL_P(option_array), option_name, sizeof(option_name) - 1)) != NULL) { \
68 if (Z_TYPE_P(option_val) == IS_STRING) { \
69 var_name = Z_STRVAL_P(option_val); \
70 var_name##_len = Z_STRLEN_P(option_val); \
71 var_name##_set = 1; \
72 } \
73 } \
74 }
75 /* }}} */
76
77 /* {{{ FETCH_STR_OPTION(var_name, option_name) */
78 #define FETCH_STR_OPTION(var_name, option_name) \
79 var_name = NULL; \
80 var_name##_set = 0; \
81 if (option_array) { \
82 if ((option_val = zend_hash_str_find_deref(Z_ARRVAL_P(option_array), option_name, sizeof(option_name) - 1)) != NULL) { \
83 if (Z_TYPE_P(option_val) == IS_STRING) { \
84 var_name = Z_STR_P(option_val); \
85 var_name##_set = 1; \
86 } \
87 } \
88 }
89 /* }}} */
90
91 #define FORMAT_IPV4 4
92 #define FORMAT_IPV6 6
93
94 static int _php_filter_validate_ipv6(char *str, size_t str_len, int ip[8]);
95
php_filter_parse_int(const char * str,size_t str_len,zend_long * ret)96 static int php_filter_parse_int(const char *str, size_t str_len, zend_long *ret) { /* {{{ */
97 zend_long ctx_value;
98 int sign = 0, digit = 0;
99 const char *end = str + str_len;
100
101 switch (*str) {
102 case '-':
103 sign = 1;
104 case '+':
105 str++;
106 default:
107 break;
108 }
109
110 if (*str == '0' && str + 1 == end) {
111 /* Special cases: +0 and -0 */
112 return 1;
113 }
114
115 /* must start with 1..9*/
116 if (str < end && *str >= '1' && *str <= '9') {
117 ctx_value = ((sign)?-1:1) * ((*(str++)) - '0');
118 } else {
119 return -1;
120 }
121
122 if ((end - str > MAX_LENGTH_OF_LONG - 1) /* number too long */
123 || (SIZEOF_LONG == 4 && (end - str == MAX_LENGTH_OF_LONG - 1) && *str > '2')) {
124 /* overflow */
125 return -1;
126 }
127
128 while (str < end) {
129 if (*str >= '0' && *str <= '9') {
130 digit = (*(str++) - '0');
131 if ( (!sign) && ctx_value <= (ZEND_LONG_MAX-digit)/10 ) {
132 ctx_value = (ctx_value * 10) + digit;
133 } else if ( sign && ctx_value >= (ZEND_LONG_MIN+digit)/10) {
134 ctx_value = (ctx_value * 10) - digit;
135 } else {
136 return -1;
137 }
138 } else {
139 return -1;
140 }
141 }
142
143 *ret = ctx_value;
144 return 1;
145 }
146 /* }}} */
147
php_filter_parse_octal(const char * str,size_t str_len,zend_long * ret)148 static int php_filter_parse_octal(const char *str, size_t str_len, zend_long *ret) { /* {{{ */
149 zend_ulong ctx_value = 0;
150 const char *end = str + str_len;
151
152 while (str < end) {
153 if (*str >= '0' && *str <= '7') {
154 zend_ulong n = ((*(str++)) - '0');
155
156 if ((ctx_value > ((zend_ulong)(~(zend_long)0)) / 8) ||
157 ((ctx_value = ctx_value * 8) > ((zend_ulong)(~(zend_long)0)) - n)) {
158 return -1;
159 }
160 ctx_value += n;
161 } else {
162 return -1;
163 }
164 }
165
166 *ret = (zend_long)ctx_value;
167 return 1;
168 }
169 /* }}} */
170
php_filter_parse_hex(const char * str,size_t str_len,zend_long * ret)171 static int php_filter_parse_hex(const char *str, size_t str_len, zend_long *ret) { /* {{{ */
172 zend_ulong ctx_value = 0;
173 const char *end = str + str_len;
174 zend_ulong n;
175
176 while (str < end) {
177 if (*str >= '0' && *str <= '9') {
178 n = ((*(str++)) - '0');
179 } else if (*str >= 'a' && *str <= 'f') {
180 n = ((*(str++)) - ('a' - 10));
181 } else if (*str >= 'A' && *str <= 'F') {
182 n = ((*(str++)) - ('A' - 10));
183 } else {
184 return -1;
185 }
186 if ((ctx_value > ((zend_ulong)(~(zend_long)0)) / 16) ||
187 ((ctx_value = ctx_value * 16) > ((zend_ulong)(~(zend_long)0)) - n)) {
188 return -1;
189 }
190 ctx_value += n;
191 }
192
193 *ret = (zend_long)ctx_value;
194 return 1;
195 }
196 /* }}} */
197
php_filter_int(PHP_INPUT_FILTER_PARAM_DECL)198 void php_filter_int(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
199 {
200 zval *option_val;
201 zend_long min_range, max_range, option_flags;
202 int min_range_set, max_range_set;
203 int allow_octal = 0, allow_hex = 0;
204 size_t len;
205 int error = 0;
206 zend_long ctx_value;
207 char *p;
208
209 /* Parse options */
210 FETCH_LONG_OPTION(min_range, "min_range");
211 FETCH_LONG_OPTION(max_range, "max_range");
212 option_flags = flags;
213
214 len = Z_STRLEN_P(value);
215
216 if (len == 0) {
217 RETURN_VALIDATION_FAILED
218 }
219
220 if (option_flags & FILTER_FLAG_ALLOW_OCTAL) {
221 allow_octal = 1;
222 }
223
224 if (option_flags & FILTER_FLAG_ALLOW_HEX) {
225 allow_hex = 1;
226 }
227
228 /* Start the validating loop */
229 p = Z_STRVAL_P(value);
230 ctx_value = 0;
231
232 PHP_FILTER_TRIM_DEFAULT(p, len);
233
234 if (*p == '0') {
235 p++; len--;
236 if (allow_hex && (*p == 'x' || *p == 'X')) {
237 p++; len--;
238 if (php_filter_parse_hex(p, len, &ctx_value) < 0) {
239 error = 1;
240 }
241 } else if (allow_octal) {
242 if (php_filter_parse_octal(p, len, &ctx_value) < 0) {
243 error = 1;
244 }
245 } else if (len != 0) {
246 error = 1;
247 }
248 } else {
249 if (php_filter_parse_int(p, len, &ctx_value) < 0) {
250 error = 1;
251 }
252 }
253
254 if (error > 0 || (min_range_set && (ctx_value < min_range)) || (max_range_set && (ctx_value > max_range))) {
255 RETURN_VALIDATION_FAILED
256 } else {
257 zval_ptr_dtor(value);
258 ZVAL_LONG(value, ctx_value);
259 return;
260 }
261 }
262 /* }}} */
263
php_filter_boolean(PHP_INPUT_FILTER_PARAM_DECL)264 void php_filter_boolean(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
265 {
266 char *str = Z_STRVAL_P(value);
267 size_t len = Z_STRLEN_P(value);
268 int ret;
269
270 PHP_FILTER_TRIM_DEFAULT_EX(str, len, 0);
271
272 /* returns true for "1", "true", "on" and "yes"
273 * returns false for "0", "false", "off", "no", and ""
274 * null otherwise. */
275 switch (len) {
276 case 0:
277 ret = 0;
278 break;
279 case 1:
280 if (*str == '1') {
281 ret = 1;
282 } else if (*str == '0') {
283 ret = 0;
284 } else {
285 ret = -1;
286 }
287 break;
288 case 2:
289 if (strncasecmp(str, "on", 2) == 0) {
290 ret = 1;
291 } else if (strncasecmp(str, "no", 2) == 0) {
292 ret = 0;
293 } else {
294 ret = -1;
295 }
296 break;
297 case 3:
298 if (strncasecmp(str, "yes", 3) == 0) {
299 ret = 1;
300 } else if (strncasecmp(str, "off", 3) == 0) {
301 ret = 0;
302 } else {
303 ret = -1;
304 }
305 break;
306 case 4:
307 if (strncasecmp(str, "true", 4) == 0) {
308 ret = 1;
309 } else {
310 ret = -1;
311 }
312 break;
313 case 5:
314 if (strncasecmp(str, "false", 5) == 0) {
315 ret = 0;
316 } else {
317 ret = -1;
318 }
319 break;
320 default:
321 ret = -1;
322 }
323
324 if (ret == -1) {
325 RETURN_VALIDATION_FAILED
326 } else {
327 zval_ptr_dtor(value);
328 ZVAL_BOOL(value, ret);
329 }
330 }
331 /* }}} */
332
php_filter_float(PHP_INPUT_FILTER_PARAM_DECL)333 void php_filter_float(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
334 {
335 size_t len;
336 char *str, *end;
337 char *num, *p;
338 zval *option_val;
339 char *decimal;
340 int decimal_set;
341 size_t decimal_len;
342 char dec_sep = '.';
343 char *thousand;
344 int thousand_set;
345 size_t thousand_len;
346 char *tsd_sep;
347
348 zend_long lval;
349 double dval;
350 double min_range, max_range;
351 int min_range_set, max_range_set;
352
353 int first, n;
354
355 len = Z_STRLEN_P(value);
356 str = Z_STRVAL_P(value);
357
358 PHP_FILTER_TRIM_DEFAULT(str, len);
359 end = str + len;
360
361 FETCH_STRING_OPTION(decimal, "decimal");
362
363 if (decimal_set) {
364 if (decimal_len != 1) {
365 php_error_docref(NULL, E_WARNING, "decimal separator must be one char");
366 RETURN_VALIDATION_FAILED
367 } else {
368 dec_sep = *decimal;
369 }
370 }
371
372 FETCH_STRING_OPTION(thousand, "thousand");
373
374 if (thousand_set) {
375 if (thousand_len < 1) {
376 php_error_docref(NULL, E_WARNING, "thousand separator must be at least one char");
377 RETURN_VALIDATION_FAILED
378 } else {
379 tsd_sep = thousand;
380 }
381 } else {
382 tsd_sep = "',.";
383 }
384
385 FETCH_DOUBLE_OPTION(min_range, "min_range");
386 FETCH_DOUBLE_OPTION(max_range, "max_range");
387
388 num = p = emalloc(len+1);
389 if (str < end && (*str == '+' || *str == '-')) {
390 *p++ = *str++;
391 }
392 first = 1;
393 while (1) {
394 n = 0;
395 while (str < end && *str >= '0' && *str <= '9') {
396 ++n;
397 *p++ = *str++;
398 }
399 if (str == end || *str == dec_sep || *str == 'e' || *str == 'E') {
400 if (!first && n != 3) {
401 goto error;
402 }
403 if (*str == dec_sep) {
404 *p++ = '.';
405 str++;
406 while (str < end && *str >= '0' && *str <= '9') {
407 *p++ = *str++;
408 }
409 }
410 if (*str == 'e' || *str == 'E') {
411 *p++ = *str++;
412 if (str < end && (*str == '+' || *str == '-')) {
413 *p++ = *str++;
414 }
415 while (str < end && *str >= '0' && *str <= '9') {
416 *p++ = *str++;
417 }
418 }
419 break;
420 }
421 if ((flags & FILTER_FLAG_ALLOW_THOUSAND) && strchr(tsd_sep, *str)) {
422 if (first?(n < 1 || n > 3):(n != 3)) {
423 goto error;
424 }
425 first = 0;
426 str++;
427 } else {
428 goto error;
429 }
430 }
431 if (str != end) {
432 goto error;
433 }
434 *p = 0;
435
436 switch (is_numeric_string(num, p - num, &lval, &dval, 0)) {
437 case IS_LONG:
438 if ((min_range_set && (lval < min_range)) || (max_range_set && (lval > max_range))) {
439 goto error;
440 }
441 zval_ptr_dtor(value);
442 ZVAL_DOUBLE(value, (double)lval);
443 break;
444 case IS_DOUBLE:
445 if ((!dval && p - num > 1 && strpbrk(num, "123456789")) || !zend_finite(dval)) {
446 goto error;
447 }
448 if ((min_range_set && (dval < min_range)) || (max_range_set && (dval > max_range))) {
449 goto error;
450 }
451 zval_ptr_dtor(value);
452 ZVAL_DOUBLE(value, dval);
453 break;
454 default:
455 error:
456 efree(num);
457 RETURN_VALIDATION_FAILED
458 }
459 efree(num);
460 }
461 /* }}} */
462
php_filter_validate_regexp(PHP_INPUT_FILTER_PARAM_DECL)463 void php_filter_validate_regexp(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
464 {
465 zval *option_val;
466 zend_string *regexp;
467 int regexp_set;
468 pcre2_code *re = NULL;
469 pcre2_match_data *match_data = NULL;
470 uint32_t capture_count;
471 int rc;
472
473 /* Parse options */
474 FETCH_STR_OPTION(regexp, "regexp");
475
476 if (!regexp_set) {
477 php_error_docref(NULL, E_WARNING, "'regexp' option missing");
478 RETURN_VALIDATION_FAILED
479 }
480
481 re = pcre_get_compiled_regex(regexp, &capture_count);
482 if (!re) {
483 RETURN_VALIDATION_FAILED
484 }
485 match_data = php_pcre_create_match_data(capture_count, re);
486 if (!match_data) {
487 RETURN_VALIDATION_FAILED
488 }
489 rc = pcre2_match(re, (PCRE2_SPTR)Z_STRVAL_P(value), Z_STRLEN_P(value), 0, 0, match_data, php_pcre_mctx());
490 php_pcre_free_match_data(match_data);
491
492 /* 0 means that the vector is too small to hold all the captured substring offsets */
493 if (rc < 0) {
494 RETURN_VALIDATION_FAILED
495 }
496 }
497
_php_filter_validate_domain(char * domain,int len,zend_long flags)498 static int _php_filter_validate_domain(char * domain, int len, zend_long flags) /* {{{ */
499 {
500 char *e, *s, *t;
501 size_t l;
502 int hostname = flags & FILTER_FLAG_HOSTNAME;
503 unsigned char i = 1;
504
505 s = domain;
506 l = len;
507 e = domain + l;
508 t = e - 1;
509
510 /* Ignore trailing dot */
511 if (*t == '.') {
512 e = t;
513 l--;
514 }
515
516 /* The total length cannot exceed 253 characters (final dot not included) */
517 if (l > 253) {
518 return 0;
519 }
520
521 /* First char must be alphanumeric */
522 if(*s == '.' || (hostname && !isalnum((int)*(unsigned char *)s))) {
523 return 0;
524 }
525
526 while (s < e) {
527 if (*s == '.') {
528 /* The first and the last character of a label must be alphanumeric */
529 if (*(s + 1) == '.' || (hostname && (!isalnum((int)*(unsigned char *)(s - 1)) || !isalnum((int)*(unsigned char *)(s + 1))))) {
530 return 0;
531 }
532
533 /* Reset label length counter */
534 i = 1;
535 } else {
536 if (i > 63 || (hostname && *s != '-' && !isalnum((int)*(unsigned char *)s))) {
537 return 0;
538 }
539
540 i++;
541 }
542
543 s++;
544 }
545
546 return 1;
547 }
548 /* }}} */
549
php_filter_validate_domain(PHP_INPUT_FILTER_PARAM_DECL)550 void php_filter_validate_domain(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
551 {
552 if (!_php_filter_validate_domain(Z_STRVAL_P(value), Z_STRLEN_P(value), flags)) {
553 RETURN_VALIDATION_FAILED
554 }
555 }
556 /* }}} */
557
is_userinfo_valid(zend_string * str)558 static int is_userinfo_valid(zend_string *str)
559 {
560 const char *valid = "-._~!$&'()*+,;=:";
561 const char *p = ZSTR_VAL(str);
562 while (p - ZSTR_VAL(str) < ZSTR_LEN(str)) {
563 if (isalpha(*p) || isdigit(*p) || strchr(valid, *p)) {
564 p++;
565 } else if (*p == '%' && p - ZSTR_VAL(str) <= ZSTR_LEN(str) - 3 && isdigit(*(p+1)) && isxdigit(*(p+2))) {
566 p += 3;
567 } else {
568 return 0;
569 }
570 }
571 return 1;
572 }
573
php_filter_validate_url(PHP_INPUT_FILTER_PARAM_DECL)574 void php_filter_validate_url(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
575 {
576 php_url *url;
577 size_t old_len = Z_STRLEN_P(value);
578
579 if (flags & (FILTER_FLAG_SCHEME_REQUIRED | FILTER_FLAG_HOST_REQUIRED)) {
580 php_error_docref(NULL, E_DEPRECATED,
581 "explicit use of FILTER_FLAG_SCHEME_REQUIRED and FILTER_FLAG_HOST_REQUIRED is deprecated");
582 }
583
584 php_filter_url(value, flags, option_array, charset);
585
586 if (Z_TYPE_P(value) != IS_STRING || old_len != Z_STRLEN_P(value)) {
587 RETURN_VALIDATION_FAILED
588 }
589
590 /* Use parse_url - if it returns false, we return NULL */
591 url = php_url_parse_ex(Z_STRVAL_P(value), Z_STRLEN_P(value));
592
593 if (url == NULL) {
594 RETURN_VALIDATION_FAILED
595 }
596
597 if (url->scheme != NULL &&
598 (zend_string_equals_literal_ci(url->scheme, "http") || zend_string_equals_literal_ci(url->scheme, "https"))) {
599 char *e, *s, *t;
600 size_t l;
601
602 if (url->host == NULL) {
603 goto bad_url;
604 }
605
606 s = ZSTR_VAL(url->host);
607 l = ZSTR_LEN(url->host);
608 e = s + l;
609 t = e - 1;
610
611 /* An IPv6 enclosed by square brackets is a valid hostname */
612 if (*s == '[' && *t == ']' && _php_filter_validate_ipv6((s + 1), l - 2, NULL)) {
613 php_url_free(url);
614 return;
615 }
616
617 // Validate domain
618 if (!_php_filter_validate_domain(ZSTR_VAL(url->host), l, FILTER_FLAG_HOSTNAME)) {
619 php_url_free(url);
620 RETURN_VALIDATION_FAILED
621 }
622 }
623
624 if (
625 url->scheme == NULL ||
626 /* some schemas allow the host to be empty */
627 (url->host == NULL && (strcmp(ZSTR_VAL(url->scheme), "mailto") && strcmp(ZSTR_VAL(url->scheme), "news") && strcmp(ZSTR_VAL(url->scheme), "file"))) ||
628 ((flags & FILTER_FLAG_PATH_REQUIRED) && url->path == NULL) || ((flags & FILTER_FLAG_QUERY_REQUIRED) && url->query == NULL)
629 ) {
630 bad_url:
631 php_url_free(url);
632 RETURN_VALIDATION_FAILED
633 }
634
635 if ((url->user != NULL && !is_userinfo_valid(url->user))
636 || (url->pass != NULL && !is_userinfo_valid(url->pass))
637 ) {
638 php_url_free(url);
639 RETURN_VALIDATION_FAILED
640
641 }
642
643 php_url_free(url);
644 }
645 /* }}} */
646
php_filter_validate_email(PHP_INPUT_FILTER_PARAM_DECL)647 void php_filter_validate_email(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
648 {
649 /*
650 * The regex below is based on a regex by Michael Rushton.
651 * However, it is not identical. I changed it to only consider routeable
652 * addresses as valid. Michael's regex considers a@b a valid address
653 * which conflicts with section 2.3.5 of RFC 5321 which states that:
654 *
655 * Only resolvable, fully-qualified domain names (FQDNs) are permitted
656 * when domain names are used in SMTP. In other words, names that can
657 * be resolved to MX RRs or address (i.e., A or AAAA) RRs (as discussed
658 * in Section 5) are permitted, as are CNAME RRs whose targets can be
659 * resolved, in turn, to MX or address RRs. Local nicknames or
660 * unqualified names MUST NOT be used.
661 *
662 * This regex does not handle comments and folding whitespace. While
663 * this is technically valid in an email address, these parts aren't
664 * actually part of the address itself.
665 *
666 * Michael's regex carries this copyright:
667 *
668 * Copyright © Michael Rushton 2009-10
669 * http://squiloople.com/
670 * Feel free to use and redistribute this code. But please keep this copyright notice.
671 *
672 */
673 pcre2_code *re = NULL;
674 pcre2_match_data *match_data = NULL;
675 uint32_t capture_count;
676 zend_string *sregexp;
677 int rc;
678 const char regexp0[] = "/^(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){255,})(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){65,}@)(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E\\pL\\pN]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F\\pL\\pN]|(?:\\x5C[\\x00-\\x7F]))*\\x22))(?:\\.(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E\\pL\\pN]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F\\pL\\pN]|(?:\\x5C[\\x00-\\x7F]))*\\x22)))*@(?:(?:(?!.*[^.]{64,})(?:(?:(?:xn--)?[a-z0-9]+(?:-+[a-z0-9]+)*\\.){1,126}){1,}(?:(?:[a-z][a-z0-9]*)|(?:(?:xn--)[a-z0-9]+))(?:-+[a-z0-9]+)*)|(?:\\[(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){7})|(?:(?!(?:.*[a-f0-9][:\\]]){7,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?)))|(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){5}:)|(?:(?!(?:.*[a-f0-9]:){5,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3}:)?)))?(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))(?:\\.(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))){3}))\\]))$/iDu";
679 const char regexp1[] = "/^(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){255,})(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){65,}@)(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F]|(?:\\x5C[\\x00-\\x7F]))*\\x22))(?:\\.(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F]|(?:\\x5C[\\x00-\\x7F]))*\\x22)))*@(?:(?:(?!.*[^.]{64,})(?:(?:(?:xn--)?[a-z0-9]+(?:-+[a-z0-9]+)*\\.){1,126}){1,}(?:(?:[a-z][a-z0-9]*)|(?:(?:xn--)[a-z0-9]+))(?:-+[a-z0-9]+)*)|(?:\\[(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){7})|(?:(?!(?:.*[a-f0-9][:\\]]){7,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?)))|(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){5}:)|(?:(?!(?:.*[a-f0-9]:){5,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3}:)?)))?(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))(?:\\.(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))){3}))\\]))$/iD";
680 const char *regexp;
681 size_t regexp_len;
682
683 if (flags & FILTER_FLAG_EMAIL_UNICODE) {
684 regexp = regexp0;
685 regexp_len = sizeof(regexp0) - 1;
686 } else {
687 regexp = regexp1;
688 regexp_len = sizeof(regexp1) - 1;
689 }
690
691 /* The maximum length of an e-mail address is 320 octets, per RFC 2821. */
692 if (Z_STRLEN_P(value) > 320) {
693 RETURN_VALIDATION_FAILED
694 }
695
696 sregexp = zend_string_init(regexp, regexp_len, 0);
697 re = pcre_get_compiled_regex(sregexp, &capture_count);
698 zend_string_release_ex(sregexp, 0);
699 if (!re) {
700 RETURN_VALIDATION_FAILED
701 }
702 match_data = php_pcre_create_match_data(capture_count, re);
703 if (!match_data) {
704 RETURN_VALIDATION_FAILED
705 }
706 rc = pcre2_match(re, (PCRE2_SPTR)Z_STRVAL_P(value), Z_STRLEN_P(value), 0, 0, match_data, php_pcre_mctx());
707 php_pcre_free_match_data(match_data);
708
709 /* 0 means that the vector is too small to hold all the captured substring offsets */
710 if (rc < 0) {
711 RETURN_VALIDATION_FAILED
712 }
713
714 }
715 /* }}} */
716
_php_filter_validate_ipv4(char * str,size_t str_len,int * ip)717 static int _php_filter_validate_ipv4(char *str, size_t str_len, int *ip) /* {{{ */
718 {
719 const char *end = str + str_len;
720 int num, m;
721 int n = 0;
722
723 while (str < end) {
724 int leading_zero;
725 if (*str < '0' || *str > '9') {
726 return 0;
727 }
728 leading_zero = (*str == '0');
729 m = 1;
730 num = ((*(str++)) - '0');
731 while (str < end && (*str >= '0' && *str <= '9')) {
732 num = num * 10 + ((*(str++)) - '0');
733 if (num > 255 || ++m > 3) {
734 return 0;
735 }
736 }
737 /* don't allow a leading 0; that introduces octal numbers,
738 * which we don't support */
739 if (leading_zero && (num != 0 || m > 1))
740 return 0;
741 ip[n++] = num;
742 if (n == 4) {
743 return str == end;
744 } else if (str >= end || *(str++) != '.') {
745 return 0;
746 }
747 }
748 return 0;
749 }
750 /* }}} */
751
_php_filter_validate_ipv6(char * str,size_t str_len,int ip[8])752 static int _php_filter_validate_ipv6(char *str, size_t str_len, int ip[8]) /* {{{ */
753 {
754 int compressed_pos = -1;
755 int blocks = 0;
756 int num, n, i;
757 char *ipv4;
758 char *end;
759 int ip4elm[4];
760 char *s = str;
761
762 if (!memchr(str, ':', str_len)) {
763 return 0;
764 }
765
766 /* check for bundled IPv4 */
767 ipv4 = memchr(str, '.', str_len);
768 if (ipv4) {
769 while (ipv4 > str && *(ipv4-1) != ':') {
770 ipv4--;
771 }
772
773 if (!_php_filter_validate_ipv4(ipv4, (str_len - (ipv4 - str)), ip4elm)) {
774 return 0;
775 }
776
777 str_len = ipv4 - str; /* length excluding ipv4 */
778 if (str_len < 2) {
779 return 0;
780 }
781
782 if (ipv4[-2] != ':') {
783 /* don't include : before ipv4 unless it's a :: */
784 str_len--;
785 }
786
787 blocks = 2;
788 }
789
790 end = str + str_len;
791
792 while (str < end) {
793 if (*str == ':') {
794 if (++str >= end) {
795 /* cannot end in : without previous : */
796 return 0;
797 }
798 if (*str == ':') {
799 if (compressed_pos >= 0) {
800 return 0;
801 }
802 if (ip && blocks < 8) {
803 ip[blocks] = -1;
804 }
805 compressed_pos = blocks++; /* :: means 1 or more 16-bit 0 blocks */
806 if (++str == end) {
807 if (blocks > 8) {
808 return 0;
809 }
810 goto fixup_ip;
811 }
812 } else if ((str - 1) == s) {
813 /* don't allow leading : without another : following */
814 return 0;
815 }
816 }
817 num = n = 0;
818 while (str < end) {
819 if (*str >= '0' && *str <= '9') {
820 num = 16 * num + (*str - '0');
821 } else if (*str >= 'a' && *str <= 'f') {
822 num = 16 * num + (*str - 'a') + 10;
823 } else if (*str >= 'A' && *str <= 'F') {
824 num = 16 * num + (*str - 'A') + 10;
825 } else {
826 break;
827 }
828 n++;
829 str++;
830 }
831 if (ip && blocks < 8) {
832 ip[blocks] = num;
833 }
834 if (n < 1 || n > 4) {
835 return 0;
836 }
837 if (++blocks > 8)
838 return 0;
839 }
840
841 fixup_ip:
842 if (ip && ipv4) {
843 for (i = 0; i < 5; i++) {
844 ip[i] = 0;
845 }
846 ip[i++] = 0xffff;
847 ip[i++] = 256 * ip4elm[0] + ip4elm[1];
848 ip[i++] = 256 * ip4elm[2] + ip4elm[3];
849 } else if (ip && compressed_pos >= 0 && blocks <= 8) {
850 int offset = 8 - blocks;
851 for (i = 7; i > compressed_pos + offset; i--) {
852 ip[i] = ip[i - offset];
853 }
854 for (i = compressed_pos + offset; i >= compressed_pos; i--) {
855 ip[i] = 0;
856 }
857 }
858
859 return (compressed_pos >= 0 && blocks <= 8) || blocks == 8;
860 }
861 /* }}} */
862
php_filter_validate_ip(PHP_INPUT_FILTER_PARAM_DECL)863 void php_filter_validate_ip(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
864 {
865 /* validates an ipv4 or ipv6 IP, based on the flag (4, 6, or both) add a
866 * flag to throw out reserved ranges; multicast ranges... etc. If both
867 * allow_ipv4 and allow_ipv6 flags flag are used, then the first dot or
868 * colon determine the format */
869
870 int ip[8];
871 int mode;
872
873 if (memchr(Z_STRVAL_P(value), ':', Z_STRLEN_P(value))) {
874 mode = FORMAT_IPV6;
875 } else if (memchr(Z_STRVAL_P(value), '.', Z_STRLEN_P(value))) {
876 mode = FORMAT_IPV4;
877 } else {
878 RETURN_VALIDATION_FAILED
879 }
880
881 if ((flags & FILTER_FLAG_IPV4) && (flags & FILTER_FLAG_IPV6)) {
882 /* Both formats are cool */
883 } else if ((flags & FILTER_FLAG_IPV4) && mode == FORMAT_IPV6) {
884 RETURN_VALIDATION_FAILED
885 } else if ((flags & FILTER_FLAG_IPV6) && mode == FORMAT_IPV4) {
886 RETURN_VALIDATION_FAILED
887 }
888
889 switch (mode) {
890 case FORMAT_IPV4:
891 if (!_php_filter_validate_ipv4(Z_STRVAL_P(value), Z_STRLEN_P(value), ip)) {
892 RETURN_VALIDATION_FAILED
893 }
894
895 /* Check flags */
896 if (flags & FILTER_FLAG_NO_PRIV_RANGE) {
897 if (
898 (ip[0] == 10) ||
899 (ip[0] == 172 && ip[1] >= 16 && ip[1] <= 31) ||
900 (ip[0] == 192 && ip[1] == 168)
901 ) {
902 RETURN_VALIDATION_FAILED
903 }
904 }
905
906 if (flags & FILTER_FLAG_NO_RES_RANGE) {
907 if (
908 (ip[0] == 0) ||
909 (ip[0] >= 240) ||
910 (ip[0] == 127) ||
911 (ip[0] == 169 && ip[1] == 254)
912 ) {
913 RETURN_VALIDATION_FAILED
914 }
915 }
916 break;
917
918 case FORMAT_IPV6:
919 {
920 int res = 0;
921 res = _php_filter_validate_ipv6(Z_STRVAL_P(value), Z_STRLEN_P(value), ip);
922 if (res < 1) {
923 RETURN_VALIDATION_FAILED
924 }
925 /* Check flags */
926 if (flags & FILTER_FLAG_NO_PRIV_RANGE) {
927 if (ip[0] >= 0xfc00 && ip[0] <= 0xfdff) {
928 RETURN_VALIDATION_FAILED
929 }
930 }
931 if (flags & FILTER_FLAG_NO_RES_RANGE) {
932 if ((ip[0] == 0 && ip[1] == 0 && ip[2] == 0 && ip[3] == 0
933 && ip[4] == 0 && ip[5] == 0 && ip[6] == 0 && (ip[7] == 0 || ip[7] == 1))
934 || (ip[0] == 0x5f)
935 || (ip[0] >= 0xfe80 && ip[0] <= 0xfebf)
936 || ((ip[0] == 0x2001 && ip[1] == 0x0db8) || (ip[1] >= 0x0010 && ip[1] <= 0x001f))
937 || (ip[0] == 0x3ff3)
938 ) {
939 RETURN_VALIDATION_FAILED
940 }
941 }
942 }
943 break;
944 }
945 }
946 /* }}} */
947
php_filter_validate_mac(PHP_INPUT_FILTER_PARAM_DECL)948 void php_filter_validate_mac(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
949 {
950 char *input = Z_STRVAL_P(value);
951 size_t input_len = Z_STRLEN_P(value);
952 int tokens, length, i, offset, exp_separator_set;
953 size_t exp_separator_len;
954 char separator;
955 char *exp_separator;
956 zend_long ret = 0;
957 zval *option_val;
958
959 FETCH_STRING_OPTION(exp_separator, "separator");
960
961 if (exp_separator_set && exp_separator_len != 1) {
962 php_error_docref(NULL, E_WARNING, "Separator must be exactly one character long");
963 RETURN_VALIDATION_FAILED;
964 }
965
966 if (14 == input_len) {
967 /* EUI-64 format: Four hexadecimal digits separated by dots. Less
968 * commonly used but valid nonetheless.
969 */
970 tokens = 3;
971 length = 4;
972 separator = '.';
973 } else if (17 == input_len && input[2] == '-') {
974 /* IEEE 802 format: Six hexadecimal digits separated by hyphens. */
975 tokens = 6;
976 length = 2;
977 separator = '-';
978 } else if (17 == input_len && input[2] == ':') {
979 /* IEEE 802 format: Six hexadecimal digits separated by colons. */
980 tokens = 6;
981 length = 2;
982 separator = ':';
983 } else {
984 RETURN_VALIDATION_FAILED;
985 }
986
987 if (exp_separator_set && separator != exp_separator[0]) {
988 RETURN_VALIDATION_FAILED;
989 }
990
991 /* Essentially what we now have is a set of tokens each consisting of
992 * a hexadecimal number followed by a separator character. (With the
993 * exception of the last token which does not have the separator.)
994 */
995 for (i = 0; i < tokens; i++) {
996 offset = i * (length + 1);
997
998 if (i < tokens - 1 && input[offset + length] != separator) {
999 /* The current token did not end with e.g. a "." */
1000 RETURN_VALIDATION_FAILED
1001 }
1002 if (php_filter_parse_hex(input + offset, length, &ret) < 0) {
1003 /* The current token is no valid hexadecimal digit */
1004 RETURN_VALIDATION_FAILED
1005 }
1006 }
1007 }
1008 /* }}} */
1009