xref: /PHP-8.0/ext/filter/logical_filters.c (revision 2119ba21)
1 /*
2   +----------------------------------------------------------------------+
3   | Copyright (c) The PHP Group                                          |
4   +----------------------------------------------------------------------+
5   | This source file is subject to version 3.01 of the PHP license,      |
6   | that is bundled with this package in the file LICENSE, and is        |
7   | available through the world-wide-web at the following url:           |
8   | http://www.php.net/license/3_01.txt                                  |
9   | If you did not receive a copy of the PHP license and are unable to   |
10   | obtain it through the world-wide-web, please send a note to          |
11   | license@php.net so we can mail you a copy immediately.               |
12   +----------------------------------------------------------------------+
13   | Authors: Derick Rethans <derick@php.net>                             |
14   |          Pierre-A. Joye <pierre@php.net>                             |
15   |          Kévin Dunglas <dunglas@gmail.com>                           |
16   +----------------------------------------------------------------------+
17 */
18 
19 #include "php_filter.h"
20 #include "filter_private.h"
21 #include "ext/standard/url.h"
22 #include "ext/pcre/php_pcre.h"
23 
24 #include "zend_multiply.h"
25 
26 #ifdef HAVE_ARPA_INET_H
27 # include <arpa/inet.h>
28 #endif
29 
30 #ifndef INADDR_NONE
31 # define INADDR_NONE ((unsigned long int) -1)
32 #endif
33 
34 
35 /* {{{ FETCH_DOUBLE_OPTION(var_name, option_name) */
36 #define FETCH_DOUBLE_OPTION(var_name, option_name) \
37    	var_name = 0; \
38 	var_name##_set = 0; \
39 	if (option_array) { \
40 		if ((option_val = zend_hash_str_find(Z_ARRVAL_P(option_array), option_name, sizeof(option_name) - 1)) != NULL) {	\
41 			var_name = zval_get_double(option_val); \
42 			var_name##_set = 1; \
43 		} \
44 	}
45 /* }}} */
46 
47 /* {{{ FETCH_LONG_OPTION(var_name, option_name) */
48 #define FETCH_LONG_OPTION(var_name, option_name) \
49    	var_name = 0; \
50 	var_name##_set = 0; \
51 	if (option_array) { \
52 		if ((option_val = zend_hash_str_find(Z_ARRVAL_P(option_array), option_name, sizeof(option_name) - 1)) != NULL) {	\
53 			var_name = zval_get_long(option_val); \
54 			var_name##_set = 1; \
55 		} \
56 	}
57 /* }}} */
58 
59 /* {{{ FETCH_STRING_OPTION(var_name, option_name) */
60 #define FETCH_STRING_OPTION(var_name, option_name) \
61 	var_name = NULL; \
62 	var_name##_set = 0; \
63 	var_name##_len = 0; \
64 	if (option_array) { \
65 		if ((option_val = zend_hash_str_find_deref(Z_ARRVAL_P(option_array), option_name, sizeof(option_name) - 1)) != NULL) { \
66 			if (Z_TYPE_P(option_val) == IS_STRING) { \
67 				var_name = Z_STRVAL_P(option_val); \
68 				var_name##_len = Z_STRLEN_P(option_val); \
69 				var_name##_set = 1; \
70 			} \
71 		} \
72 	}
73 /* }}} */
74 
75 /* {{{ FETCH_STR_OPTION(var_name, option_name) */
76 #define FETCH_STR_OPTION(var_name, option_name) \
77 	var_name = NULL; \
78 	var_name##_set = 0; \
79 	if (option_array) { \
80 		if ((option_val = zend_hash_str_find_deref(Z_ARRVAL_P(option_array), option_name, sizeof(option_name) - 1)) != NULL) { \
81 			if (Z_TYPE_P(option_val) == IS_STRING) { \
82 				var_name = Z_STR_P(option_val); \
83 				var_name##_set = 1; \
84 			} \
85 		} \
86 	}
87 /* }}} */
88 
89 #define FORMAT_IPV4    4
90 #define FORMAT_IPV6    6
91 
92 static int _php_filter_validate_ipv6(char *str, size_t str_len, int ip[8]);
93 
php_filter_parse_int(const char * str,size_t str_len,zend_long * ret)94 static int php_filter_parse_int(const char *str, size_t str_len, zend_long *ret) { /* {{{ */
95 	zend_long ctx_value;
96 	int sign = 0, digit = 0;
97 	const char *end = str + str_len;
98 
99 	switch (*str) {
100 		case '-':
101 			sign = 1;
102 		case '+':
103 			str++;
104 		default:
105 			break;
106 	}
107 
108 	if (*str == '0' && str + 1 == end) {
109 		/* Special cases: +0 and -0 */
110 		return 1;
111 	}
112 
113 	/* must start with 1..9*/
114 	if (str < end && *str >= '1' && *str <= '9') {
115 		ctx_value = ((sign)?-1:1) * ((*(str++)) - '0');
116 	} else {
117 		return -1;
118 	}
119 
120 	if ((end - str > MAX_LENGTH_OF_LONG - 1) /* number too long */
121 	 || (SIZEOF_LONG == 4 && (end - str == MAX_LENGTH_OF_LONG - 1) && *str > '2')) {
122 		/* overflow */
123 		return -1;
124 	}
125 
126 	while (str < end) {
127 		if (*str >= '0' && *str <= '9') {
128 			digit = (*(str++) - '0');
129 			if ( (!sign) && ctx_value <= (ZEND_LONG_MAX-digit)/10 ) {
130 				ctx_value = (ctx_value * 10) + digit;
131 			} else if ( sign && ctx_value >= (ZEND_LONG_MIN+digit)/10) {
132 				ctx_value = (ctx_value * 10) - digit;
133 			} else {
134 				return -1;
135 			}
136 		} else {
137 			return -1;
138 		}
139 	}
140 
141 	*ret = ctx_value;
142 	return 1;
143 }
144 /* }}} */
145 
php_filter_parse_octal(const char * str,size_t str_len,zend_long * ret)146 static int php_filter_parse_octal(const char *str, size_t str_len, zend_long *ret) { /* {{{ */
147 	zend_ulong ctx_value = 0;
148 	const char *end = str + str_len;
149 
150 	while (str < end) {
151 		if (*str >= '0' && *str <= '7') {
152 			zend_ulong n = ((*(str++)) - '0');
153 
154 			if ((ctx_value > ((zend_ulong)(~(zend_long)0)) / 8) ||
155 				((ctx_value = ctx_value * 8) > ((zend_ulong)(~(zend_long)0)) - n)) {
156 				return -1;
157 			}
158 			ctx_value += n;
159 		} else {
160 			return -1;
161 		}
162 	}
163 
164 	*ret = (zend_long)ctx_value;
165 	return 1;
166 }
167 /* }}} */
168 
php_filter_parse_hex(const char * str,size_t str_len,zend_long * ret)169 static int php_filter_parse_hex(const char *str, size_t str_len, zend_long *ret) { /* {{{ */
170 	zend_ulong ctx_value = 0;
171 	const char *end = str + str_len;
172 	zend_ulong n;
173 
174 	while (str < end) {
175 		if (*str >= '0' && *str <= '9') {
176 			n = ((*(str++)) - '0');
177 		} else if (*str >= 'a' && *str <= 'f') {
178 			n = ((*(str++)) - ('a' - 10));
179 		} else if (*str >= 'A' && *str <= 'F') {
180 			n = ((*(str++)) - ('A' - 10));
181 		} else {
182 			return -1;
183 		}
184 		if ((ctx_value > ((zend_ulong)(~(zend_long)0)) / 16) ||
185 			((ctx_value = ctx_value * 16) > ((zend_ulong)(~(zend_long)0)) - n)) {
186 			return -1;
187 		}
188 		ctx_value += n;
189 	}
190 
191 	*ret = (zend_long)ctx_value;
192 	return 1;
193 }
194 /* }}} */
195 
php_filter_int(PHP_INPUT_FILTER_PARAM_DECL)196 void php_filter_int(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
197 {
198 	zval *option_val;
199 	zend_long  min_range, max_range, option_flags;
200 	int   min_range_set, max_range_set;
201 	int   allow_octal = 0, allow_hex = 0;
202 	size_t	  len;
203 	int error = 0;
204 	zend_long  ctx_value;
205 	char *p;
206 
207 	/* Parse options */
208 	FETCH_LONG_OPTION(min_range,    "min_range");
209 	FETCH_LONG_OPTION(max_range,    "max_range");
210 	option_flags = flags;
211 
212 	len = Z_STRLEN_P(value);
213 
214 	if (len == 0) {
215 		RETURN_VALIDATION_FAILED
216 	}
217 
218 	if (option_flags & FILTER_FLAG_ALLOW_OCTAL) {
219 		allow_octal = 1;
220 	}
221 
222 	if (option_flags & FILTER_FLAG_ALLOW_HEX) {
223 		allow_hex = 1;
224 	}
225 
226 	/* Start the validating loop */
227 	p = Z_STRVAL_P(value);
228 	ctx_value = 0;
229 
230 	PHP_FILTER_TRIM_DEFAULT(p, len);
231 
232 	if (*p == '0') {
233 		p++; len--;
234 		if (allow_hex && (*p == 'x' || *p == 'X')) {
235 			p++; len--;
236 			if (len == 0) {
237 				RETURN_VALIDATION_FAILED
238 			}
239 			if (php_filter_parse_hex(p, len, &ctx_value) < 0) {
240 				error = 1;
241 			}
242 		} else if (allow_octal) {
243 			if (php_filter_parse_octal(p, len, &ctx_value) < 0) {
244 				error = 1;
245 			}
246 		} else if (len != 0) {
247 			error = 1;
248 		}
249 	} else {
250 		if (php_filter_parse_int(p, len, &ctx_value) < 0) {
251 			error = 1;
252 		}
253 	}
254 
255 	if (error > 0 || (min_range_set && (ctx_value < min_range)) || (max_range_set && (ctx_value > max_range))) {
256 		RETURN_VALIDATION_FAILED
257 	} else {
258 		zval_ptr_dtor(value);
259 		ZVAL_LONG(value, ctx_value);
260 		return;
261 	}
262 }
263 /* }}} */
264 
php_filter_boolean(PHP_INPUT_FILTER_PARAM_DECL)265 void php_filter_boolean(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
266 {
267 	char *str = Z_STRVAL_P(value);
268 	size_t len = Z_STRLEN_P(value);
269 	int ret;
270 
271 	PHP_FILTER_TRIM_DEFAULT_EX(str, len, 0);
272 
273 	/* returns true for "1", "true", "on" and "yes"
274 	 * returns false for "0", "false", "off", "no", and ""
275 	 * null otherwise. */
276 	switch (len) {
277 		case 0:
278 			ret = 0;
279 			break;
280 		case 1:
281 			if (*str == '1') {
282 				ret = 1;
283 			} else if (*str == '0') {
284 				ret = 0;
285 			} else {
286 				ret = -1;
287 			}
288 			break;
289 		case 2:
290 			if (strncasecmp(str, "on", 2) == 0) {
291 				ret = 1;
292 			} else if (strncasecmp(str, "no", 2) == 0) {
293 				ret = 0;
294 			} else {
295 				ret = -1;
296 			}
297 			break;
298 		case 3:
299 			if (strncasecmp(str, "yes", 3) == 0) {
300 				ret = 1;
301 			} else if (strncasecmp(str, "off", 3) == 0) {
302 				ret = 0;
303 			} else {
304 				ret = -1;
305 			}
306 			break;
307 		case 4:
308 			if (strncasecmp(str, "true", 4) == 0) {
309 				ret = 1;
310 			} else {
311 				ret = -1;
312 			}
313 			break;
314 		case 5:
315 			if (strncasecmp(str, "false", 5) == 0) {
316 				ret = 0;
317 			} else {
318 				ret = -1;
319 			}
320 			break;
321 		default:
322 			ret = -1;
323 	}
324 
325 	if (ret == -1) {
326 		RETURN_VALIDATION_FAILED
327 	} else {
328 		zval_ptr_dtor(value);
329 		ZVAL_BOOL(value, ret);
330 	}
331 }
332 /* }}} */
333 
php_filter_float(PHP_INPUT_FILTER_PARAM_DECL)334 void php_filter_float(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
335 {
336 	size_t len;
337 	char *str, *end;
338 	char *num, *p;
339 	zval *option_val;
340 	char *decimal;
341 	int decimal_set;
342 	size_t decimal_len;
343 	char dec_sep = '.';
344 	char *thousand;
345 	int thousand_set;
346 	size_t thousand_len;
347 	char *tsd_sep;
348 
349 	zend_long lval;
350 	double dval;
351 	double min_range, max_range;
352 	int   min_range_set, max_range_set;
353 
354 	int first, n;
355 
356 	len = Z_STRLEN_P(value);
357 	str = Z_STRVAL_P(value);
358 
359 	PHP_FILTER_TRIM_DEFAULT(str, len);
360 	end = str + len;
361 
362 	FETCH_STRING_OPTION(decimal, "decimal");
363 
364 	if (decimal_set) {
365 		if (decimal_len != 1) {
366 			zend_value_error("%s(): \"decimal\" option must be one character long", get_active_function_name());
367 			RETURN_VALIDATION_FAILED
368 		} else {
369 			dec_sep = *decimal;
370 		}
371 	}
372 
373 	FETCH_STRING_OPTION(thousand, "thousand");
374 
375 	if (thousand_set) {
376 		if (thousand_len < 1) {
377 			zend_value_error("%s(): \"thousand\" option cannot be empty", get_active_function_name());
378 			RETURN_VALIDATION_FAILED
379 		} else {
380 			tsd_sep = thousand;
381 		}
382 	} else {
383 		tsd_sep = "',.";
384 	}
385 
386 	FETCH_DOUBLE_OPTION(min_range, "min_range");
387 	FETCH_DOUBLE_OPTION(max_range, "max_range");
388 
389 	num = p = emalloc(len+1);
390 	if (str < end && (*str == '+' || *str == '-')) {
391 		*p++ = *str++;
392 	}
393 	first = 1;
394 	while (1) {
395 		n = 0;
396 		while (str < end && *str >= '0' && *str <= '9') {
397 			++n;
398 			*p++ = *str++;
399 		}
400 		if (str == end || *str == dec_sep || *str == 'e' || *str == 'E') {
401 			if (!first && n != 3) {
402 				goto error;
403 			}
404 			if (*str == dec_sep) {
405 				*p++ = '.';
406 				str++;
407 				while (str < end && *str >= '0' && *str <= '9') {
408 					*p++ = *str++;
409 				}
410 			}
411 			if (*str == 'e' || *str == 'E') {
412 				*p++ = *str++;
413 				if (str < end && (*str == '+' || *str == '-')) {
414 					*p++ = *str++;
415 				}
416 				while (str < end && *str >= '0' && *str <= '9') {
417 					*p++ = *str++;
418 				}
419 			}
420 			break;
421 		}
422 		if ((flags & FILTER_FLAG_ALLOW_THOUSAND) && strchr(tsd_sep, *str)) {
423 			if (first?(n < 1 || n > 3):(n != 3)) {
424 				goto error;
425 			}
426 			first = 0;
427 			str++;
428 		} else {
429 			goto error;
430 		}
431 	}
432 	if (str != end) {
433 		goto error;
434 	}
435 	*p = 0;
436 
437 	switch (is_numeric_string(num, p - num, &lval, &dval, 0)) {
438 		case IS_LONG:
439 			if ((min_range_set && (lval < min_range)) || (max_range_set && (lval > max_range))) {
440 				goto error;
441 			}
442 			zval_ptr_dtor(value);
443 			ZVAL_DOUBLE(value, (double)lval);
444 			break;
445 		case IS_DOUBLE:
446 			if ((!dval && p - num > 1 && strpbrk(num, "123456789")) || !zend_finite(dval)) {
447 				goto error;
448 			}
449 			if ((min_range_set && (dval < min_range)) || (max_range_set && (dval > max_range))) {
450 				goto error;
451 			}
452 			zval_ptr_dtor(value);
453 			ZVAL_DOUBLE(value, dval);
454 			break;
455 		default:
456 error:
457 			efree(num);
458 			RETURN_VALIDATION_FAILED
459 	}
460 	efree(num);
461 }
462 /* }}} */
463 
php_filter_validate_regexp(PHP_INPUT_FILTER_PARAM_DECL)464 void php_filter_validate_regexp(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
465 {
466 	zval *option_val;
467 	zend_string *regexp;
468 	int regexp_set;
469 	pcre2_code *re = NULL;
470 	pcre2_match_data *match_data = NULL;
471 	uint32_t capture_count;
472 	int rc;
473 
474 	/* Parse options */
475 	FETCH_STR_OPTION(regexp, "regexp");
476 
477 	if (!regexp_set) {
478 		zend_value_error("%s(): \"regexp\" option is missing", get_active_function_name());
479 		RETURN_VALIDATION_FAILED
480 	}
481 
482 	re = pcre_get_compiled_regex(regexp, &capture_count);
483 	if (!re) {
484 		RETURN_VALIDATION_FAILED
485 	}
486 	match_data = php_pcre_create_match_data(capture_count, re);
487 	if (!match_data) {
488 		RETURN_VALIDATION_FAILED
489 	}
490 	rc = pcre2_match(re, (PCRE2_SPTR)Z_STRVAL_P(value), Z_STRLEN_P(value), 0, 0, match_data, php_pcre_mctx());
491 	php_pcre_free_match_data(match_data);
492 
493 	/* 0 means that the vector is too small to hold all the captured substring offsets */
494 	if (rc < 0) {
495 		RETURN_VALIDATION_FAILED
496 	}
497 }
498 
_php_filter_validate_domain(char * domain,size_t len,zend_long flags)499 static int _php_filter_validate_domain(char * domain, size_t len, zend_long flags) /* {{{ */
500 {
501 	char *e, *s, *t;
502 	size_t l;
503 	int hostname = flags & FILTER_FLAG_HOSTNAME;
504 	unsigned char i = 1;
505 
506 	s = domain;
507 	l = len;
508 	e = domain + l;
509 	t = e - 1;
510 
511 	/* Ignore trailing dot */
512 	if (l > 0 && *t == '.') {
513 		e = t;
514 		l--;
515 	}
516 
517 	/* The total length cannot exceed 253 characters (final dot not included) */
518 	if (l > 253) {
519 		return 0;
520 	}
521 
522 	/* First char must be alphanumeric */
523 	if(*s == '.' || (hostname && !isalnum((int)*(unsigned char *)s))) {
524 		return 0;
525 	}
526 
527 	while (s < e) {
528 		if (*s == '.') {
529 			/* The first and the last character of a label must be alphanumeric */
530 			if (*(s + 1) == '.' || (hostname && (!isalnum((int)*(unsigned char *)(s - 1)) || !isalnum((int)*(unsigned char *)(s + 1))))) {
531 				return 0;
532 			}
533 
534 			/* Reset label length counter */
535 			i = 1;
536 		} else {
537 			if (i > 63 || (hostname && *s != '-' && !isalnum((int)*(unsigned char *)s))) {
538 				return 0;
539 			}
540 
541 			i++;
542 		}
543 
544 		s++;
545 	}
546 
547 	return 1;
548 }
549 /* }}} */
550 
php_filter_validate_domain(PHP_INPUT_FILTER_PARAM_DECL)551 void php_filter_validate_domain(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
552 {
553 	if (!_php_filter_validate_domain(Z_STRVAL_P(value), Z_STRLEN_P(value), flags)) {
554 		RETURN_VALIDATION_FAILED
555 	}
556 }
557 /* }}} */
558 
is_userinfo_valid(zend_string * str)559 static int is_userinfo_valid(zend_string *str)
560 {
561 	const char *valid = "-._~!$&'()*+,;=:";
562 	const char *p = ZSTR_VAL(str);
563 	while (p - ZSTR_VAL(str) < ZSTR_LEN(str)) {
564 		if (isalpha(*p) || isdigit(*p) || strchr(valid, *p)) {
565 			p++;
566 		} else if (*p == '%' && p - ZSTR_VAL(str) <= ZSTR_LEN(str) - 3 && isdigit(*(p+1)) && isxdigit(*(p+2))) {
567 			p += 3;
568 		} else {
569 			return 0;
570 		}
571 	}
572 	return 1;
573 }
574 
php_filter_validate_url(PHP_INPUT_FILTER_PARAM_DECL)575 void php_filter_validate_url(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
576 {
577 	php_url *url;
578 	size_t old_len = Z_STRLEN_P(value);
579 
580 	php_filter_url(value, flags, option_array, charset);
581 
582 	if (Z_TYPE_P(value) != IS_STRING || old_len != Z_STRLEN_P(value)) {
583 		RETURN_VALIDATION_FAILED
584 	}
585 
586 	/* Use parse_url - if it returns false, we return NULL */
587 	url = php_url_parse_ex(Z_STRVAL_P(value), Z_STRLEN_P(value));
588 
589 	if (url == NULL) {
590 		RETURN_VALIDATION_FAILED
591 	}
592 
593 	if (url->scheme != NULL &&
594 		(zend_string_equals_literal_ci(url->scheme, "http") || zend_string_equals_literal_ci(url->scheme, "https"))) {
595 		char *e, *s, *t;
596 		size_t l;
597 
598 		if (url->host == NULL) {
599 			goto bad_url;
600 		}
601 
602 		s = ZSTR_VAL(url->host);
603 		l = ZSTR_LEN(url->host);
604 		e = s + l;
605 		t = e - 1;
606 
607 		/* An IPv6 enclosed by square brackets is a valid hostname */
608 		if (*s == '[' && *t == ']' && _php_filter_validate_ipv6((s + 1), l - 2, NULL)) {
609 			php_url_free(url);
610 			return;
611 		}
612 
613 		// Validate domain
614 		if (!_php_filter_validate_domain(ZSTR_VAL(url->host), l, FILTER_FLAG_HOSTNAME)) {
615 			php_url_free(url);
616 			RETURN_VALIDATION_FAILED
617 		}
618 	}
619 
620 	if (
621 		url->scheme == NULL ||
622 		/* some schemas allow the host to be empty */
623 		(url->host == NULL && (strcmp(ZSTR_VAL(url->scheme), "mailto") && strcmp(ZSTR_VAL(url->scheme), "news") && strcmp(ZSTR_VAL(url->scheme), "file"))) ||
624 		((flags & FILTER_FLAG_PATH_REQUIRED) && url->path == NULL) || ((flags & FILTER_FLAG_QUERY_REQUIRED) && url->query == NULL)
625 	) {
626 bad_url:
627 		php_url_free(url);
628 		RETURN_VALIDATION_FAILED
629 	}
630 
631 	if ((url->user != NULL && !is_userinfo_valid(url->user))
632 		|| (url->pass != NULL && !is_userinfo_valid(url->pass))
633 	) {
634 		php_url_free(url);
635 		RETURN_VALIDATION_FAILED
636 
637 	}
638 
639 	php_url_free(url);
640 }
641 /* }}} */
642 
php_filter_validate_email(PHP_INPUT_FILTER_PARAM_DECL)643 void php_filter_validate_email(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
644 {
645 	/*
646 	 * The regex below is based on a regex by Michael Rushton.
647 	 * However, it is not identical.  I changed it to only consider routeable
648 	 * addresses as valid.  Michael's regex considers a@b a valid address
649 	 * which conflicts with section 2.3.5 of RFC 5321 which states that:
650 	 *
651 	 *   Only resolvable, fully-qualified domain names (FQDNs) are permitted
652 	 *   when domain names are used in SMTP.  In other words, names that can
653 	 *   be resolved to MX RRs or address (i.e., A or AAAA) RRs (as discussed
654 	 *   in Section 5) are permitted, as are CNAME RRs whose targets can be
655 	 *   resolved, in turn, to MX or address RRs.  Local nicknames or
656 	 *   unqualified names MUST NOT be used.
657 	 *
658 	 * This regex does not handle comments and folding whitespace.  While
659 	 * this is technically valid in an email address, these parts aren't
660 	 * actually part of the address itself.
661 	 *
662 	 * Michael's regex carries this copyright:
663 	 *
664 	 * Copyright © Michael Rushton 2009-10
665 	 * http://squiloople.com/
666 	 * Feel free to use and redistribute this code. But please keep this copyright notice.
667 	 *
668 	 */
669 	pcre2_code *re = NULL;
670 	pcre2_match_data *match_data = NULL;
671 	uint32_t capture_count;
672 	zend_string *sregexp;
673 	int rc;
674 	const char regexp0[] = "/^(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){255,})(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){65,}@)(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E\\pL\\pN]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F\\pL\\pN]|(?:\\x5C[\\x00-\\x7F]))*\\x22))(?:\\.(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E\\pL\\pN]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F\\pL\\pN]|(?:\\x5C[\\x00-\\x7F]))*\\x22)))*@(?:(?:(?!.*[^.]{64,})(?:(?:(?:xn--)?[a-z0-9]+(?:-+[a-z0-9]+)*\\.){1,126}){1,}(?:(?:[a-z][a-z0-9]*)|(?:(?:xn--)[a-z0-9]+))(?:-+[a-z0-9]+)*)|(?:\\[(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){7})|(?:(?!(?:.*[a-f0-9][:\\]]){7,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?)))|(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){5}:)|(?:(?!(?:.*[a-f0-9]:){5,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3}:)?)))?(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))(?:\\.(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))){3}))\\]))$/iDu";
675 	const char regexp1[] = "/^(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){255,})(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){65,}@)(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F]|(?:\\x5C[\\x00-\\x7F]))*\\x22))(?:\\.(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F]|(?:\\x5C[\\x00-\\x7F]))*\\x22)))*@(?:(?:(?!.*[^.]{64,})(?:(?:(?:xn--)?[a-z0-9]+(?:-+[a-z0-9]+)*\\.){1,126}){1,}(?:(?:[a-z][a-z0-9]*)|(?:(?:xn--)[a-z0-9]+))(?:-+[a-z0-9]+)*)|(?:\\[(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){7})|(?:(?!(?:.*[a-f0-9][:\\]]){7,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?)))|(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){5}:)|(?:(?!(?:.*[a-f0-9]:){5,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3}:)?)))?(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))(?:\\.(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))){3}))\\]))$/iD";
676 	const char *regexp;
677 	size_t regexp_len;
678 
679 	if (flags & FILTER_FLAG_EMAIL_UNICODE) {
680 		regexp = regexp0;
681 		regexp_len = sizeof(regexp0) - 1;
682 	} else {
683 		regexp = regexp1;
684 		regexp_len = sizeof(regexp1) - 1;
685 	}
686 
687 	/* The maximum length of an e-mail address is 320 octets, per RFC 2821. */
688 	if (Z_STRLEN_P(value) > 320) {
689 		RETURN_VALIDATION_FAILED
690 	}
691 
692 	sregexp = zend_string_init(regexp, regexp_len, 0);
693 	re = pcre_get_compiled_regex(sregexp, &capture_count);
694 	zend_string_release_ex(sregexp, 0);
695 	if (!re) {
696 		RETURN_VALIDATION_FAILED
697 	}
698 	match_data = php_pcre_create_match_data(capture_count, re);
699 	if (!match_data) {
700 		RETURN_VALIDATION_FAILED
701 	}
702 	rc = pcre2_match(re, (PCRE2_SPTR)Z_STRVAL_P(value), Z_STRLEN_P(value), 0, 0, match_data, php_pcre_mctx());
703 	php_pcre_free_match_data(match_data);
704 
705 	/* 0 means that the vector is too small to hold all the captured substring offsets */
706 	if (rc < 0) {
707 		RETURN_VALIDATION_FAILED
708 	}
709 
710 }
711 /* }}} */
712 
_php_filter_validate_ipv4(char * str,size_t str_len,int * ip)713 static int _php_filter_validate_ipv4(char *str, size_t str_len, int *ip) /* {{{ */
714 {
715 	const char *end = str + str_len;
716 	int num, m;
717 	int n = 0;
718 
719 	while (str < end) {
720 		int leading_zero;
721 		if (*str < '0' || *str > '9') {
722 			return 0;
723 		}
724 		leading_zero = (*str == '0');
725 		m = 1;
726 		num = ((*(str++)) - '0');
727 		while (str < end && (*str >= '0' && *str <= '9')) {
728 			num = num * 10 + ((*(str++)) - '0');
729 			if (num > 255 || ++m > 3) {
730 				return 0;
731 			}
732 		}
733 		/* don't allow a leading 0; that introduces octal numbers,
734 		 * which we don't support */
735 		if (leading_zero && (num != 0 || m > 1))
736 			return 0;
737 		ip[n++] = num;
738 		if (n == 4) {
739 			return str == end;
740 		} else if (str >= end || *(str++) != '.') {
741 			return 0;
742 		}
743 	}
744 	return 0;
745 }
746 /* }}} */
747 
_php_filter_validate_ipv6(char * str,size_t str_len,int ip[8])748 static int _php_filter_validate_ipv6(char *str, size_t str_len, int ip[8]) /* {{{ */
749 {
750 	int compressed_pos = -1;
751 	int blocks = 0;
752 	int num, n, i;
753 	char *ipv4;
754 	char *end;
755 	int ip4elm[4];
756 	char *s = str;
757 
758 	if (!memchr(str, ':', str_len)) {
759 		return 0;
760 	}
761 
762 	/* check for bundled IPv4 */
763 	ipv4 = memchr(str, '.', str_len);
764 	if (ipv4) {
765  		while (ipv4 > str && *(ipv4-1) != ':') {
766 			ipv4--;
767 		}
768 
769 		if (!_php_filter_validate_ipv4(ipv4, (str_len - (ipv4 - str)), ip4elm)) {
770 			return 0;
771 		}
772 
773 		str_len = ipv4 - str; /* length excluding ipv4 */
774 		if (str_len < 2) {
775 			return 0;
776 		}
777 
778 		if (ipv4[-2] != ':') {
779 			/* don't include : before ipv4 unless it's a :: */
780 			str_len--;
781 		}
782 
783 		blocks = 2;
784 	}
785 
786 	end = str + str_len;
787 
788 	while (str < end) {
789 		if (*str == ':') {
790 			if (++str >= end) {
791 				/* cannot end in : without previous : */
792 				return 0;
793 			}
794 			if (*str == ':') {
795 				if (compressed_pos >= 0) {
796 					return 0;
797 				}
798 				if (ip && blocks < 8) {
799 					ip[blocks] = -1;
800 				}
801 				compressed_pos = blocks++; /* :: means 1 or more 16-bit 0 blocks */
802 				if (++str == end) {
803 					if (blocks > 8) {
804 						return 0;
805 					}
806 					goto fixup_ip;
807 				}
808 			} else if ((str - 1) == s) {
809 				/* don't allow leading : without another : following */
810 				return 0;
811 			}
812 		}
813 		num = n = 0;
814 		while (str < end) {
815 			if (*str >= '0' && *str <= '9') {
816 				num = 16 * num + (*str - '0');
817 			} else if (*str >= 'a' && *str <= 'f') {
818 				num = 16 * num + (*str - 'a') + 10;
819 			} else if (*str >= 'A' && *str <= 'F') {
820 				num = 16 * num + (*str - 'A') + 10;
821 			} else {
822 				break;
823 			}
824 			n++;
825 			str++;
826 		}
827 		if (ip && blocks < 8) {
828 			ip[blocks] = num;
829 		}
830 		if (n < 1 || n > 4) {
831 			return 0;
832 		}
833 		if (++blocks > 8)
834 			return 0;
835 	}
836 
837 fixup_ip:
838 	if (ip && ipv4) {
839 		for (i = 0; i < 5; i++) {
840 			ip[i] = 0;
841 		}
842 		ip[i++] = 0xffff;
843 		ip[i++] = 256 * ip4elm[0] + ip4elm[1];
844 		ip[i++] = 256 * ip4elm[2] + ip4elm[3];
845 	} else if (ip && compressed_pos >= 0 && blocks <= 8) {
846 		int offset = 8 - blocks;
847 		for (i = 7; i > compressed_pos + offset; i--) {
848 			ip[i] = ip[i - offset];
849 		}
850 		for (i = compressed_pos + offset; i >= compressed_pos; i--) {
851 			ip[i] = 0;
852 		}
853 	}
854 
855 	return (compressed_pos >= 0 && blocks <= 8) || blocks == 8;
856 }
857 /* }}} */
858 
php_filter_validate_ip(PHP_INPUT_FILTER_PARAM_DECL)859 void php_filter_validate_ip(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
860 {
861 	/* validates an ipv4 or ipv6 IP, based on the flag (4, 6, or both) add a
862 	 * flag to throw out reserved ranges; multicast ranges... etc. If both
863 	 * allow_ipv4 and allow_ipv6 flags flag are used, then the first dot or
864 	 * colon determine the format */
865 
866 	int            ip[8];
867 	int            mode;
868 
869 	if (memchr(Z_STRVAL_P(value), ':', Z_STRLEN_P(value))) {
870 		mode = FORMAT_IPV6;
871 	} else if (memchr(Z_STRVAL_P(value), '.', Z_STRLEN_P(value))) {
872 		mode = FORMAT_IPV4;
873 	} else {
874 		RETURN_VALIDATION_FAILED
875 	}
876 
877 	if ((flags & FILTER_FLAG_IPV4) && (flags & FILTER_FLAG_IPV6)) {
878 		/* Both formats are cool */
879 	} else if ((flags & FILTER_FLAG_IPV4) && mode == FORMAT_IPV6) {
880 		RETURN_VALIDATION_FAILED
881 	} else if ((flags & FILTER_FLAG_IPV6) && mode == FORMAT_IPV4) {
882 		RETURN_VALIDATION_FAILED
883 	}
884 
885 	switch (mode) {
886 		case FORMAT_IPV4:
887 			if (!_php_filter_validate_ipv4(Z_STRVAL_P(value), Z_STRLEN_P(value), ip)) {
888 				RETURN_VALIDATION_FAILED
889 			}
890 
891 			/* Check flags */
892 			if (flags & FILTER_FLAG_NO_PRIV_RANGE) {
893 				if (
894 					(ip[0] == 10) ||
895 					(ip[0] == 172 && ip[1] >= 16 && ip[1] <= 31) ||
896 					(ip[0] == 192 && ip[1] == 168)
897 				) {
898 					RETURN_VALIDATION_FAILED
899 				}
900 			}
901 
902 			if (flags & FILTER_FLAG_NO_RES_RANGE) {
903 				if (
904 					(ip[0] == 0) ||
905 					(ip[0] >= 240) ||
906 					(ip[0] == 127) ||
907 					(ip[0] == 169 && ip[1] == 254)
908 				) {
909 					RETURN_VALIDATION_FAILED
910 				}
911 			}
912 			break;
913 
914 		case FORMAT_IPV6:
915 			{
916 				int res = 0;
917 				res = _php_filter_validate_ipv6(Z_STRVAL_P(value), Z_STRLEN_P(value), ip);
918 				if (res < 1) {
919 					RETURN_VALIDATION_FAILED
920 				}
921 				/* Check flags */
922 				if (flags & FILTER_FLAG_NO_PRIV_RANGE) {
923 					if (ip[0] >= 0xfc00 && ip[0] <= 0xfdff) {
924 						RETURN_VALIDATION_FAILED
925 					}
926 				}
927 				if (flags & FILTER_FLAG_NO_RES_RANGE) {
928 					if ((ip[0] == 0 && ip[1] == 0 && ip[2] == 0 && ip[3] == 0
929 						&& ip[4] == 0 && ip[5] == 0 && ip[6] == 0 && (ip[7] == 0 || ip[7] == 1))
930 						|| (ip[0] == 0x5f)
931 						|| (ip[0] >= 0xfe80 && ip[0] <= 0xfebf)
932 						|| (ip[0] == 0x2001 && (ip[1] == 0x0db8 || (ip[1] >= 0x0010 && ip[1] <= 0x001f)))
933 						|| (ip[0] == 0x3ff3)
934 					) {
935 						RETURN_VALIDATION_FAILED
936 					}
937 				}
938 			}
939 			break;
940 	}
941 }
942 /* }}} */
943 
php_filter_validate_mac(PHP_INPUT_FILTER_PARAM_DECL)944 void php_filter_validate_mac(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
945 {
946 	char *input = Z_STRVAL_P(value);
947 	size_t input_len = Z_STRLEN_P(value);
948 	int tokens, length, i, offset, exp_separator_set;
949 	size_t exp_separator_len;
950 	char separator;
951 	char *exp_separator;
952 	zend_long ret = 0;
953 	zval *option_val;
954 
955 	FETCH_STRING_OPTION(exp_separator, "separator");
956 
957 	if (exp_separator_set && exp_separator_len != 1) {
958 		zend_value_error("%s(): \"separator\" option must be one character long", get_active_function_name());
959 		RETURN_VALIDATION_FAILED;
960 	}
961 
962 	if (14 == input_len) {
963 		/* EUI-64 format: Four hexadecimal digits separated by dots. Less
964 		 * commonly used but valid nonetheless.
965 		 */
966 		tokens = 3;
967 		length = 4;
968 		separator = '.';
969 	} else if (17 == input_len && input[2] == '-') {
970 		/* IEEE 802 format: Six hexadecimal digits separated by hyphens. */
971 		tokens = 6;
972 		length = 2;
973 		separator = '-';
974 	} else if (17 == input_len && input[2] == ':') {
975 		/* IEEE 802 format: Six hexadecimal digits separated by colons. */
976 		tokens = 6;
977 		length = 2;
978 		separator = ':';
979 	} else {
980 		RETURN_VALIDATION_FAILED;
981 	}
982 
983 	if (exp_separator_set && separator != exp_separator[0]) {
984 		RETURN_VALIDATION_FAILED;
985 	}
986 
987 	/* Essentially what we now have is a set of tokens each consisting of
988 	 * a hexadecimal number followed by a separator character. (With the
989 	 * exception of the last token which does not have the separator.)
990 	 */
991 	for (i = 0; i < tokens; i++) {
992 		offset = i * (length + 1);
993 
994 		if (i < tokens - 1 && input[offset + length] != separator) {
995 			/* The current token did not end with e.g. a "." */
996 			RETURN_VALIDATION_FAILED
997 		}
998 		if (php_filter_parse_hex(input + offset, length, &ret) < 0) {
999 			/* The current token is no valid hexadecimal digit */
1000 			RETURN_VALIDATION_FAILED
1001 		}
1002 	}
1003 }
1004 /* }}} */
1005