xref: /PHP-8.2/ext/filter/logical_filters.c (revision f9ce5e79)
1 /*
2   +----------------------------------------------------------------------+
3   | Copyright (c) The PHP Group                                          |
4   +----------------------------------------------------------------------+
5   | This source file is subject to version 3.01 of the PHP license,      |
6   | that is bundled with this package in the file LICENSE, and is        |
7   | available through the world-wide-web at the following url:           |
8   | https://www.php.net/license/3_01.txt                                 |
9   | If you did not receive a copy of the PHP license and are unable to   |
10   | obtain it through the world-wide-web, please send a note to          |
11   | license@php.net so we can mail you a copy immediately.               |
12   +----------------------------------------------------------------------+
13   | Authors: Derick Rethans <derick@php.net>                             |
14   |          Pierre-A. Joye <pierre@php.net>                             |
15   |          Kévin Dunglas <dunglas@gmail.com>                           |
16   +----------------------------------------------------------------------+
17 */
18 
19 #include "php_filter.h"
20 #include "filter_private.h"
21 #include "ext/standard/url.h"
22 #include "ext/pcre/php_pcre.h"
23 
24 #include "zend_multiply.h"
25 
26 #ifdef HAVE_ARPA_INET_H
27 # include <arpa/inet.h>
28 #endif
29 
30 #ifndef INADDR_NONE
31 # define INADDR_NONE ((unsigned long int) -1)
32 #endif
33 
34 
35 /* {{{ FETCH_DOUBLE_OPTION(var_name, option_name) */
36 #define FETCH_DOUBLE_OPTION(var_name, option_name) \
37 	var_name = 0; \
38 	var_name##_set = 0; \
39 	if (option_array) { \
40 		if ((option_val = zend_hash_str_find(Z_ARRVAL_P(option_array), option_name, sizeof(option_name) - 1)) != NULL) {	\
41 			var_name = zval_get_double(option_val); \
42 			var_name##_set = 1; \
43 		} \
44 	}
45 /* }}} */
46 
47 /* {{{ FETCH_LONG_OPTION(var_name, option_name) */
48 #define FETCH_LONG_OPTION(var_name, option_name) \
49 	var_name = 0; \
50 	var_name##_set = 0; \
51 	if (option_array) { \
52 		if ((option_val = zend_hash_str_find(Z_ARRVAL_P(option_array), option_name, sizeof(option_name) - 1)) != NULL) {	\
53 			var_name = zval_get_long(option_val); \
54 			var_name##_set = 1; \
55 		} \
56 	}
57 /* }}} */
58 
59 /* {{{ FETCH_STRING_OPTION(var_name, option_name) */
60 #define FETCH_STRING_OPTION(var_name, option_name) \
61 	var_name = NULL; \
62 	var_name##_set = 0; \
63 	var_name##_len = 0; \
64 	if (option_array) { \
65 		if ((option_val = zend_hash_str_find_deref(Z_ARRVAL_P(option_array), option_name, sizeof(option_name) - 1)) != NULL) { \
66 			if (Z_TYPE_P(option_val) == IS_STRING) { \
67 				var_name = Z_STRVAL_P(option_val); \
68 				var_name##_len = Z_STRLEN_P(option_val); \
69 				var_name##_set = 1; \
70 			} \
71 		} \
72 	}
73 /* }}} */
74 
75 /* {{{ FETCH_STR_OPTION(var_name, option_name) */
76 #define FETCH_STR_OPTION(var_name, option_name) \
77 	var_name = NULL; \
78 	var_name##_set = 0; \
79 	if (option_array) { \
80 		if ((option_val = zend_hash_str_find_deref(Z_ARRVAL_P(option_array), option_name, sizeof(option_name) - 1)) != NULL) { \
81 			if (Z_TYPE_P(option_val) == IS_STRING) { \
82 				var_name = Z_STR_P(option_val); \
83 				var_name##_set = 1; \
84 			} \
85 		} \
86 	}
87 /* }}} */
88 
89 #define FORMAT_IPV4    4
90 #define FORMAT_IPV6    6
91 
92 static int _php_filter_validate_ipv6(const char *str, size_t str_len, int ip[8]);
93 
php_filter_parse_int(const char * str,size_t str_len,zend_long * ret)94 static int php_filter_parse_int(const char *str, size_t str_len, zend_long *ret) { /* {{{ */
95 	zend_long ctx_value;
96 	int sign = 0, digit = 0;
97 	const char *end = str + str_len;
98 
99 	switch (*str) {
100 		case '-':
101 			sign = 1;
102 			ZEND_FALLTHROUGH;
103 		case '+':
104 			str++;
105 		default:
106 			break;
107 	}
108 
109 	if (*str == '0' && str + 1 == end) {
110 		/* Special cases: +0 and -0 */
111 		return 1;
112 	}
113 
114 	/* must start with 1..9*/
115 	if (str < end && *str >= '1' && *str <= '9') {
116 		ctx_value = ((sign)?-1:1) * ((*(str++)) - '0');
117 	} else {
118 		return -1;
119 	}
120 
121 	if ((end - str > MAX_LENGTH_OF_LONG - 1) /* number too long */
122 	 || (SIZEOF_LONG == 4 && (end - str == MAX_LENGTH_OF_LONG - 1) && *str > '2')) {
123 		/* overflow */
124 		return -1;
125 	}
126 
127 	while (str < end) {
128 		if (*str >= '0' && *str <= '9') {
129 			digit = (*(str++) - '0');
130 			if ( (!sign) && ctx_value <= (ZEND_LONG_MAX-digit)/10 ) {
131 				ctx_value = (ctx_value * 10) + digit;
132 			} else if ( sign && ctx_value >= (ZEND_LONG_MIN+digit)/10) {
133 				ctx_value = (ctx_value * 10) - digit;
134 			} else {
135 				return -1;
136 			}
137 		} else {
138 			return -1;
139 		}
140 	}
141 
142 	*ret = ctx_value;
143 	return 1;
144 }
145 /* }}} */
146 
php_filter_parse_octal(const char * str,size_t str_len,zend_long * ret)147 static int php_filter_parse_octal(const char *str, size_t str_len, zend_long *ret) { /* {{{ */
148 	zend_ulong ctx_value = 0;
149 	const char *end = str + str_len;
150 
151 	while (str < end) {
152 		if (*str >= '0' && *str <= '7') {
153 			zend_ulong n = ((*(str++)) - '0');
154 
155 			if ((ctx_value > ((zend_ulong)(~(zend_long)0)) / 8) ||
156 				((ctx_value = ctx_value * 8) > ((zend_ulong)(~(zend_long)0)) - n)) {
157 				return -1;
158 			}
159 			ctx_value += n;
160 		} else {
161 			return -1;
162 		}
163 	}
164 
165 	*ret = (zend_long)ctx_value;
166 	return 1;
167 }
168 /* }}} */
169 
php_filter_parse_hex(const char * str,size_t str_len,zend_long * ret)170 static int php_filter_parse_hex(const char *str, size_t str_len, zend_long *ret) { /* {{{ */
171 	zend_ulong ctx_value = 0;
172 	const char *end = str + str_len;
173 	zend_ulong n;
174 
175 	while (str < end) {
176 		if (*str >= '0' && *str <= '9') {
177 			n = ((*(str++)) - '0');
178 		} else if (*str >= 'a' && *str <= 'f') {
179 			n = ((*(str++)) - ('a' - 10));
180 		} else if (*str >= 'A' && *str <= 'F') {
181 			n = ((*(str++)) - ('A' - 10));
182 		} else {
183 			return -1;
184 		}
185 		if ((ctx_value > ((zend_ulong)(~(zend_long)0)) / 16) ||
186 			((ctx_value = ctx_value * 16) > ((zend_ulong)(~(zend_long)0)) - n)) {
187 			return -1;
188 		}
189 		ctx_value += n;
190 	}
191 
192 	*ret = (zend_long)ctx_value;
193 	return 1;
194 }
195 /* }}} */
196 
php_filter_int(PHP_INPUT_FILTER_PARAM_DECL)197 void php_filter_int(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
198 {
199 	zval *option_val;
200 	zend_long  min_range, max_range, option_flags;
201 	int   min_range_set, max_range_set;
202 	int   allow_octal = 0, allow_hex = 0;
203 	size_t	  len;
204 	int error = 0;
205 	zend_long  ctx_value;
206 	char *p;
207 
208 	/* Parse options */
209 	FETCH_LONG_OPTION(min_range,    "min_range");
210 	FETCH_LONG_OPTION(max_range,    "max_range");
211 	option_flags = flags;
212 
213 	len = Z_STRLEN_P(value);
214 
215 	if (len == 0) {
216 		RETURN_VALIDATION_FAILED
217 	}
218 
219 	if (option_flags & FILTER_FLAG_ALLOW_OCTAL) {
220 		allow_octal = 1;
221 	}
222 
223 	if (option_flags & FILTER_FLAG_ALLOW_HEX) {
224 		allow_hex = 1;
225 	}
226 
227 	/* Start the validating loop */
228 	p = Z_STRVAL_P(value);
229 	ctx_value = 0;
230 
231 	PHP_FILTER_TRIM_DEFAULT(p, len);
232 
233 	if (*p == '0') {
234 		p++; len--;
235 		if (allow_hex && (*p == 'x' || *p == 'X')) {
236 			p++; len--;
237 			if (len == 0) {
238 				RETURN_VALIDATION_FAILED
239 			}
240 			if (php_filter_parse_hex(p, len, &ctx_value) < 0) {
241 				error = 1;
242 			}
243 		} else if (allow_octal) {
244 			/* Support explicit octal prefix notation */
245 			if (*p == 'o' || *p == 'O') {
246 				p++; len--;
247 				if (len == 0) {
248 					RETURN_VALIDATION_FAILED
249 				}
250 			}
251 			if (php_filter_parse_octal(p, len, &ctx_value) < 0) {
252 				error = 1;
253 			}
254 		} else if (len != 0) {
255 			error = 1;
256 		}
257 	} else {
258 		if (php_filter_parse_int(p, len, &ctx_value) < 0) {
259 			error = 1;
260 		}
261 	}
262 
263 	if (error > 0 || (min_range_set && (ctx_value < min_range)) || (max_range_set && (ctx_value > max_range))) {
264 		RETURN_VALIDATION_FAILED
265 	} else {
266 		zval_ptr_dtor(value);
267 		ZVAL_LONG(value, ctx_value);
268 		return;
269 	}
270 }
271 /* }}} */
272 
php_filter_boolean(PHP_INPUT_FILTER_PARAM_DECL)273 void php_filter_boolean(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
274 {
275 	char *str = Z_STRVAL_P(value);
276 	size_t len = Z_STRLEN_P(value);
277 	int ret;
278 
279 	PHP_FILTER_TRIM_DEFAULT_EX(str, len, 0);
280 
281 	/* returns true for "1", "true", "on" and "yes"
282 	 * returns false for "0", "false", "off", "no", and ""
283 	 * null otherwise. */
284 	switch (len) {
285 		case 0:
286 			ret = 0;
287 			break;
288 		case 1:
289 			if (*str == '1') {
290 				ret = 1;
291 			} else if (*str == '0') {
292 				ret = 0;
293 			} else {
294 				ret = -1;
295 			}
296 			break;
297 		case 2:
298 			if (strncasecmp(str, "on", 2) == 0) {
299 				ret = 1;
300 			} else if (strncasecmp(str, "no", 2) == 0) {
301 				ret = 0;
302 			} else {
303 				ret = -1;
304 			}
305 			break;
306 		case 3:
307 			if (strncasecmp(str, "yes", 3) == 0) {
308 				ret = 1;
309 			} else if (strncasecmp(str, "off", 3) == 0) {
310 				ret = 0;
311 			} else {
312 				ret = -1;
313 			}
314 			break;
315 		case 4:
316 			if (strncasecmp(str, "true", 4) == 0) {
317 				ret = 1;
318 			} else {
319 				ret = -1;
320 			}
321 			break;
322 		case 5:
323 			if (strncasecmp(str, "false", 5) == 0) {
324 				ret = 0;
325 			} else {
326 				ret = -1;
327 			}
328 			break;
329 		default:
330 			ret = -1;
331 	}
332 
333 	if (ret == -1) {
334 		RETURN_VALIDATION_FAILED
335 	} else {
336 		zval_ptr_dtor(value);
337 		ZVAL_BOOL(value, ret);
338 	}
339 }
340 /* }}} */
341 
php_filter_float(PHP_INPUT_FILTER_PARAM_DECL)342 void php_filter_float(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
343 {
344 	size_t len;
345 	char *str, *end;
346 	char *num, *p;
347 	zval *option_val;
348 	char *decimal;
349 	int decimal_set;
350 	size_t decimal_len;
351 	char dec_sep = '.';
352 	char *thousand;
353 	int thousand_set;
354 	size_t thousand_len;
355 	char *tsd_sep;
356 
357 	zend_long lval;
358 	double dval;
359 	double min_range, max_range;
360 	int   min_range_set, max_range_set;
361 
362 	int first, n;
363 
364 	len = Z_STRLEN_P(value);
365 	str = Z_STRVAL_P(value);
366 
367 	PHP_FILTER_TRIM_DEFAULT(str, len);
368 	end = str + len;
369 
370 	FETCH_STRING_OPTION(decimal, "decimal");
371 
372 	if (decimal_set) {
373 		if (decimal_len != 1) {
374 			zend_value_error("%s(): \"decimal\" option must be one character long", get_active_function_name());
375 			RETURN_VALIDATION_FAILED
376 		} else {
377 			dec_sep = *decimal;
378 		}
379 	}
380 
381 	FETCH_STRING_OPTION(thousand, "thousand");
382 
383 	if (thousand_set) {
384 		if (thousand_len < 1) {
385 			zend_value_error("%s(): \"thousand\" option cannot be empty", get_active_function_name());
386 			RETURN_VALIDATION_FAILED
387 		} else {
388 			tsd_sep = thousand;
389 		}
390 	} else {
391 		tsd_sep = "',.";
392 	}
393 
394 	FETCH_DOUBLE_OPTION(min_range, "min_range");
395 	FETCH_DOUBLE_OPTION(max_range, "max_range");
396 
397 	num = p = emalloc(len+1);
398 	if (str < end && (*str == '+' || *str == '-')) {
399 		*p++ = *str++;
400 	}
401 	first = 1;
402 	while (1) {
403 		n = 0;
404 		while (str < end && *str >= '0' && *str <= '9') {
405 			++n;
406 			*p++ = *str++;
407 		}
408 		if (str == end || *str == dec_sep || *str == 'e' || *str == 'E') {
409 			if (!first && n != 3) {
410 				goto error;
411 			}
412 			if (*str == dec_sep) {
413 				*p++ = '.';
414 				str++;
415 				while (str < end && *str >= '0' && *str <= '9') {
416 					*p++ = *str++;
417 				}
418 			}
419 			if (*str == 'e' || *str == 'E') {
420 				*p++ = *str++;
421 				if (str < end && (*str == '+' || *str == '-')) {
422 					*p++ = *str++;
423 				}
424 				while (str < end && *str >= '0' && *str <= '9') {
425 					*p++ = *str++;
426 				}
427 			}
428 			break;
429 		}
430 		if ((flags & FILTER_FLAG_ALLOW_THOUSAND) && strchr(tsd_sep, *str)) {
431 			if (first?(n < 1 || n > 3):(n != 3)) {
432 				goto error;
433 			}
434 			first = 0;
435 			str++;
436 		} else {
437 			goto error;
438 		}
439 	}
440 	if (str != end) {
441 		goto error;
442 	}
443 	*p = 0;
444 
445 	switch (is_numeric_string(num, p - num, &lval, &dval, 0)) {
446 		case IS_LONG:
447 			if ((min_range_set && (lval < min_range)) || (max_range_set && (lval > max_range))) {
448 				goto error;
449 			}
450 			zval_ptr_dtor(value);
451 			ZVAL_DOUBLE(value, (double)lval);
452 			break;
453 		case IS_DOUBLE:
454 			if ((!dval && p - num > 1 && strpbrk(num, "123456789")) || !zend_finite(dval)) {
455 				goto error;
456 			}
457 			if ((min_range_set && (dval < min_range)) || (max_range_set && (dval > max_range))) {
458 				goto error;
459 			}
460 			zval_ptr_dtor(value);
461 			ZVAL_DOUBLE(value, dval);
462 			break;
463 		default:
464 error:
465 			efree(num);
466 			RETURN_VALIDATION_FAILED
467 	}
468 	efree(num);
469 }
470 /* }}} */
471 
php_filter_validate_regexp(PHP_INPUT_FILTER_PARAM_DECL)472 void php_filter_validate_regexp(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
473 {
474 	zval *option_val;
475 	zend_string *regexp;
476 	int regexp_set;
477 	pcre2_code *re = NULL;
478 	pcre2_match_data *match_data = NULL;
479 	uint32_t capture_count;
480 	int rc;
481 
482 	/* Parse options */
483 	FETCH_STR_OPTION(regexp, "regexp");
484 
485 	if (!regexp_set) {
486 		zend_value_error("%s(): \"regexp\" option is missing", get_active_function_name());
487 		RETURN_VALIDATION_FAILED
488 	}
489 
490 	re = pcre_get_compiled_regex(regexp, &capture_count);
491 	if (!re) {
492 		RETURN_VALIDATION_FAILED
493 	}
494 	match_data = php_pcre_create_match_data(capture_count, re);
495 	if (!match_data) {
496 		RETURN_VALIDATION_FAILED
497 	}
498 	rc = pcre2_match(re, (PCRE2_SPTR)Z_STRVAL_P(value), Z_STRLEN_P(value), 0, 0, match_data, php_pcre_mctx());
499 	php_pcre_free_match_data(match_data);
500 
501 	/* 0 means that the vector is too small to hold all the captured substring offsets */
502 	if (rc < 0) {
503 		RETURN_VALIDATION_FAILED
504 	}
505 }
506 
_php_filter_validate_domain(char * domain,size_t len,zend_long flags)507 static int _php_filter_validate_domain(char * domain, size_t len, zend_long flags) /* {{{ */
508 {
509 	char *e, *s, *t;
510 	size_t l;
511 	int hostname = flags & FILTER_FLAG_HOSTNAME;
512 	unsigned char i = 1;
513 
514 	s = domain;
515 	l = len;
516 	e = domain + l;
517 	t = e - 1;
518 
519 	/* Ignore trailing dot */
520 	if (l > 0 && *t == '.') {
521 		e = t;
522 		l--;
523 	}
524 
525 	/* The total length cannot exceed 253 characters (final dot not included) */
526 	if (l > 253) {
527 		return 0;
528 	}
529 
530 	/* First char must be alphanumeric */
531 	if(*s == '.' || (hostname && !isalnum((int)*(unsigned char *)s))) {
532 		return 0;
533 	}
534 
535 	while (s < e) {
536 		if (*s == '.') {
537 			/* The first and the last character of a label must be alphanumeric */
538 			if (*(s + 1) == '.' || (hostname && (!isalnum((int)*(unsigned char *)(s - 1)) || !isalnum((int)*(unsigned char *)(s + 1))))) {
539 				return 0;
540 			}
541 
542 			/* Reset label length counter */
543 			i = 1;
544 		} else {
545 			if (i > 63 || (hostname && (*s != '-' || *(s + 1) == '\0') && !isalnum((int)*(unsigned char *)s))) {
546 				return 0;
547 			}
548 
549 			i++;
550 		}
551 
552 		s++;
553 	}
554 
555 	return 1;
556 }
557 /* }}} */
558 
php_filter_validate_domain(PHP_INPUT_FILTER_PARAM_DECL)559 void php_filter_validate_domain(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
560 {
561 	if (!_php_filter_validate_domain(Z_STRVAL_P(value), Z_STRLEN_P(value), flags)) {
562 		RETURN_VALIDATION_FAILED
563 	}
564 }
565 /* }}} */
566 
is_userinfo_valid(zend_string * str)567 static int is_userinfo_valid(zend_string *str)
568 {
569 	const char *valid = "-._~!$&'()*+,;=:";
570 	const char *p = ZSTR_VAL(str);
571 	while (p - ZSTR_VAL(str) < ZSTR_LEN(str)) {
572 		if (isalpha(*p) || isdigit(*p) || strchr(valid, *p)) {
573 			p++;
574 		} else if (*p == '%' && p - ZSTR_VAL(str) <= ZSTR_LEN(str) - 3 && isdigit(*(p+1)) && isxdigit(*(p+2))) {
575 			p += 3;
576 		} else {
577 			return 0;
578 		}
579 	}
580 	return 1;
581 }
582 
php_filter_is_valid_ipv6_hostname(const char * s,size_t l)583 static bool php_filter_is_valid_ipv6_hostname(const char *s, size_t l)
584 {
585 	const char *e = s + l;
586 	const char *t = e - 1;
587 
588 	return *s == '[' && *t == ']' && _php_filter_validate_ipv6(s + 1, l - 2, NULL);
589 }
590 
php_filter_validate_url(PHP_INPUT_FILTER_PARAM_DECL)591 void php_filter_validate_url(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
592 {
593 	php_url *url;
594 	size_t old_len = Z_STRLEN_P(value);
595 
596 	php_filter_url(value, flags, option_array, charset);
597 
598 	if (Z_TYPE_P(value) != IS_STRING || old_len != Z_STRLEN_P(value)) {
599 		RETURN_VALIDATION_FAILED
600 	}
601 
602 	/* Use parse_url - if it returns false, we return NULL */
603 	url = php_url_parse_ex(Z_STRVAL_P(value), Z_STRLEN_P(value));
604 
605 	if (url == NULL) {
606 		RETURN_VALIDATION_FAILED
607 	}
608 
609 	if (url->scheme != NULL &&
610 		(zend_string_equals_literal_ci(url->scheme, "http") || zend_string_equals_literal_ci(url->scheme, "https"))) {
611 		const char *s;
612 		size_t l;
613 
614 		if (url->host == NULL) {
615 			goto bad_url;
616 		}
617 
618 		s = ZSTR_VAL(url->host);
619 		l = ZSTR_LEN(url->host);
620 
621 		if (
622 			/* An IPv6 enclosed by square brackets is a valid hostname.*/
623 			!php_filter_is_valid_ipv6_hostname(s, l) &&
624 			/* Validate domain.
625 			 * This includes a loose check for an IPv4 address. */
626 			!_php_filter_validate_domain(ZSTR_VAL(url->host), l, FILTER_FLAG_HOSTNAME)
627 		) {
628 			php_url_free(url);
629 			RETURN_VALIDATION_FAILED
630 		}
631 	}
632 
633 	if (
634 		url->scheme == NULL ||
635 		/* some schemas allow the host to be empty */
636 		(url->host == NULL && (!zend_string_equals_literal(url->scheme, "mailto") && !zend_string_equals_literal(url->scheme, "news") && !zend_string_equals_literal(url->scheme, "file"))) ||
637 		((flags & FILTER_FLAG_PATH_REQUIRED) && url->path == NULL) || ((flags & FILTER_FLAG_QUERY_REQUIRED) && url->query == NULL)
638 	) {
639 bad_url:
640 		php_url_free(url);
641 		RETURN_VALIDATION_FAILED
642 	}
643 
644 	if ((url->user != NULL && !is_userinfo_valid(url->user))
645 		|| (url->pass != NULL && !is_userinfo_valid(url->pass))
646 	) {
647 		php_url_free(url);
648 		RETURN_VALIDATION_FAILED
649 
650 	}
651 
652 	php_url_free(url);
653 }
654 /* }}} */
655 
php_filter_validate_email(PHP_INPUT_FILTER_PARAM_DECL)656 void php_filter_validate_email(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
657 {
658 	/*
659 	 * The regex below is based on a regex by Michael Rushton.
660 	 * However, it is not identical.  I changed it to only consider routeable
661 	 * addresses as valid.  Michael's regex considers a@b a valid address
662 	 * which conflicts with section 2.3.5 of RFC 5321 which states that:
663 	 *
664 	 *   Only resolvable, fully-qualified domain names (FQDNs) are permitted
665 	 *   when domain names are used in SMTP.  In other words, names that can
666 	 *   be resolved to MX RRs or address (i.e., A or AAAA) RRs (as discussed
667 	 *   in Section 5) are permitted, as are CNAME RRs whose targets can be
668 	 *   resolved, in turn, to MX or address RRs.  Local nicknames or
669 	 *   unqualified names MUST NOT be used.
670 	 *
671 	 * This regex does not handle comments and folding whitespace.  While
672 	 * this is technically valid in an email address, these parts aren't
673 	 * actually part of the address itself.
674 	 *
675 	 * Michael's regex carries this copyright:
676 	 *
677 	 * Copyright © Michael Rushton 2009-10
678 	 * http://squiloople.com/
679 	 * Feel free to use and redistribute this code. But please keep this copyright notice.
680 	 *
681 	 */
682 	pcre2_code *re = NULL;
683 	pcre2_match_data *match_data = NULL;
684 	uint32_t capture_count;
685 	zend_string *sregexp;
686 	int rc;
687 	const char regexp0[] = "/^(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){255,})(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){65,}@)(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E\\pL\\pN]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F\\pL\\pN]|(?:\\x5C[\\x00-\\x7F]))*\\x22))(?:\\.(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E\\pL\\pN]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F\\pL\\pN]|(?:\\x5C[\\x00-\\x7F]))*\\x22)))*@(?:(?:(?!.*[^.]{64,})(?:(?:(?:xn--)?[a-z0-9]+(?:-+[a-z0-9]+)*\\.){1,126}){1,}(?:(?:[a-z][a-z0-9]*)|(?:(?:xn--)[a-z0-9]+))(?:-+[a-z0-9]+)*)|(?:\\[(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){7})|(?:(?!(?:.*[a-f0-9][:\\]]){7,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?)))|(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){5}:)|(?:(?!(?:.*[a-f0-9]:){5,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3}:)?)))?(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))(?:\\.(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))){3}))\\]))$/iDu";
688 	const char regexp1[] = "/^(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){255,})(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){65,}@)(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F]|(?:\\x5C[\\x00-\\x7F]))*\\x22))(?:\\.(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F]|(?:\\x5C[\\x00-\\x7F]))*\\x22)))*@(?:(?:(?!.*[^.]{64,})(?:(?:(?:xn--)?[a-z0-9]+(?:-+[a-z0-9]+)*\\.){1,126}){1,}(?:(?:[a-z][a-z0-9]*)|(?:(?:xn--)[a-z0-9]+))(?:-+[a-z0-9]+)*)|(?:\\[(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){7})|(?:(?!(?:.*[a-f0-9][:\\]]){7,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?)))|(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){5}:)|(?:(?!(?:.*[a-f0-9]:){5,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3}:)?)))?(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))(?:\\.(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))){3}))\\]))$/iD";
689 	const char *regexp;
690 	size_t regexp_len;
691 
692 	if (flags & FILTER_FLAG_EMAIL_UNICODE) {
693 		regexp = regexp0;
694 		regexp_len = sizeof(regexp0) - 1;
695 	} else {
696 		regexp = regexp1;
697 		regexp_len = sizeof(regexp1) - 1;
698 	}
699 
700 	/* The maximum length of an e-mail address is 320 octets, per RFC 2821. */
701 	if (Z_STRLEN_P(value) > 320) {
702 		RETURN_VALIDATION_FAILED
703 	}
704 
705 	sregexp = zend_string_init(regexp, regexp_len, 0);
706 	re = pcre_get_compiled_regex(sregexp, &capture_count);
707 	zend_string_release_ex(sregexp, 0);
708 	if (!re) {
709 		RETURN_VALIDATION_FAILED
710 	}
711 	match_data = php_pcre_create_match_data(capture_count, re);
712 	if (!match_data) {
713 		RETURN_VALIDATION_FAILED
714 	}
715 	rc = pcre2_match(re, (PCRE2_SPTR)Z_STRVAL_P(value), Z_STRLEN_P(value), 0, 0, match_data, php_pcre_mctx());
716 	php_pcre_free_match_data(match_data);
717 
718 	/* 0 means that the vector is too small to hold all the captured substring offsets */
719 	if (rc < 0) {
720 		RETURN_VALIDATION_FAILED
721 	}
722 
723 }
724 /* }}} */
725 
_php_filter_validate_ipv4(char * str,size_t str_len,int * ip)726 static int _php_filter_validate_ipv4(char *str, size_t str_len, int *ip) /* {{{ */
727 {
728 	const char *end = str + str_len;
729 	int num, m;
730 	int n = 0;
731 
732 	while (str < end) {
733 		int leading_zero;
734 		if (*str < '0' || *str > '9') {
735 			return 0;
736 		}
737 		leading_zero = (*str == '0');
738 		m = 1;
739 		num = ((*(str++)) - '0');
740 		while (str < end && (*str >= '0' && *str <= '9')) {
741 			num = num * 10 + ((*(str++)) - '0');
742 			if (num > 255 || ++m > 3) {
743 				return 0;
744 			}
745 		}
746 		/* don't allow a leading 0; that introduces octal numbers,
747 		 * which we don't support */
748 		if (leading_zero && (num != 0 || m > 1))
749 			return 0;
750 		ip[n++] = num;
751 		if (n == 4) {
752 			return str == end;
753 		} else if (str >= end || *(str++) != '.') {
754 			return 0;
755 		}
756 	}
757 	return 0;
758 }
759 /* }}} */
760 
_php_filter_validate_ipv6(const char * str,size_t str_len,int ip[8])761 static int _php_filter_validate_ipv6(const char *str, size_t str_len, int ip[8]) /* {{{ */
762 {
763 	int compressed_pos = -1;
764 	int blocks = 0;
765 	int num, n, i;
766 	char *ipv4;
767 	const char *end;
768 	int ip4elm[4];
769 	const char *s = str;
770 
771 	if (!memchr(str, ':', str_len)) {
772 		return 0;
773 	}
774 
775 	/* check for bundled IPv4 */
776 	ipv4 = memchr(str, '.', str_len);
777 	if (ipv4) {
778 		while (ipv4 > str && *(ipv4-1) != ':') {
779 			ipv4--;
780 		}
781 
782 		if (!_php_filter_validate_ipv4(ipv4, (str_len - (ipv4 - str)), ip4elm)) {
783 			return 0;
784 		}
785 
786 		str_len = ipv4 - str; /* length excluding ipv4 */
787 		if (str_len < 2) {
788 			return 0;
789 		}
790 
791 		if (ipv4[-2] != ':') {
792 			/* don't include : before ipv4 unless it's a :: */
793 			str_len--;
794 		}
795 
796 		blocks = 2;
797 	}
798 
799 	end = str + str_len;
800 
801 	while (str < end) {
802 		if (*str == ':') {
803 			if (++str >= end) {
804 				/* cannot end in : without previous : */
805 				return 0;
806 			}
807 			if (*str == ':') {
808 				if (compressed_pos >= 0) {
809 					return 0;
810 				}
811 				if (ip && blocks < 8) {
812 					ip[blocks] = -1;
813 				}
814 				compressed_pos = blocks++; /* :: means 1 or more 16-bit 0 blocks */
815 				if (++str == end) {
816 					if (blocks > 8) {
817 						return 0;
818 				}
819 					goto fixup_ip;
820 				}
821 			} else if ((str - 1) == s) {
822 				/* don't allow leading : without another : following */
823 				return 0;
824 			}
825 		}
826 		num = n = 0;
827 		while (str < end) {
828 			if (*str >= '0' && *str <= '9') {
829 				num = 16 * num + (*str - '0');
830 			} else if (*str >= 'a' && *str <= 'f') {
831 				num = 16 * num + (*str - 'a') + 10;
832 			} else if (*str >= 'A' && *str <= 'F') {
833 				num = 16 * num + (*str - 'A') + 10;
834 			} else {
835 				break;
836 			}
837 			n++;
838 			str++;
839 		}
840 		if (ip && blocks < 8) {
841 			ip[blocks] = num;
842 		}
843 		if (n < 1 || n > 4) {
844 			return 0;
845 		}
846 		if (++blocks > 8)
847 			return 0;
848 	}
849 
850 fixup_ip:
851 	if (ip && ipv4) {
852 		for (i = 0; i < 5; i++) {
853 			ip[i] = 0;
854 }
855 		ip[i++] = 0xffff;
856 		ip[i++] = 256 * ip4elm[0] + ip4elm[1];
857 		ip[i++] = 256 * ip4elm[2] + ip4elm[3];
858 	} else if (ip && compressed_pos >= 0 && blocks <= 8) {
859 		int offset = 8 - blocks;
860 		for (i = 7; i > compressed_pos + offset; i--) {
861 			ip[i] = ip[i - offset];
862 		}
863 		for (i = compressed_pos + offset; i >= compressed_pos; i--) {
864 			ip[i] = 0;
865 		}
866 	}
867 
868 	return (compressed_pos >= 0 && blocks <= 8) || blocks == 8;
869 }
870 /* }}} */
871 
php_filter_validate_ip(PHP_INPUT_FILTER_PARAM_DECL)872 void php_filter_validate_ip(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
873 {
874 	/* validates an ipv4 or ipv6 IP, based on the flag (4, 6, or both) add a
875 	 * flag to throw out reserved ranges; multicast ranges... etc. If both
876 	 * allow_ipv4 and allow_ipv6 flags flag are used, then the first dot or
877 	 * colon determine the format */
878 
879 	int            ip[8];
880 	int            mode;
881 
882 	if (memchr(Z_STRVAL_P(value), ':', Z_STRLEN_P(value))) {
883 		mode = FORMAT_IPV6;
884 	} else if (memchr(Z_STRVAL_P(value), '.', Z_STRLEN_P(value))) {
885 		mode = FORMAT_IPV4;
886 	} else {
887 		RETURN_VALIDATION_FAILED
888 	}
889 
890 	if ((flags & FILTER_FLAG_IPV4) && (flags & FILTER_FLAG_IPV6)) {
891 		/* Both formats are cool */
892 	} else if ((flags & FILTER_FLAG_IPV4) && mode == FORMAT_IPV6) {
893 		RETURN_VALIDATION_FAILED
894 	} else if ((flags & FILTER_FLAG_IPV6) && mode == FORMAT_IPV4) {
895 		RETURN_VALIDATION_FAILED
896 	}
897 
898 	switch (mode) {
899 		case FORMAT_IPV4:
900 			if (!_php_filter_validate_ipv4(Z_STRVAL_P(value), Z_STRLEN_P(value), ip)) {
901 				RETURN_VALIDATION_FAILED
902 			}
903 
904 			/* Check flags */
905 			if (flags & FILTER_FLAG_NO_PRIV_RANGE  || flags & FILTER_FLAG_GLOBAL_RANGE) {
906 				if (
907 					(ip[0] == 10) ||
908 					(ip[0] == 172 && ip[1] >= 16 && ip[1] <= 31) ||
909 					(ip[0] == 192 && ip[1] == 168)
910 				) {
911 					RETURN_VALIDATION_FAILED
912 				}
913 			}
914 
915 			if (flags & FILTER_FLAG_NO_RES_RANGE || flags & FILTER_FLAG_GLOBAL_RANGE) {
916 				if (
917 					(ip[0] == 0) ||
918 					(ip[0] >= 240) ||
919 					(ip[0] == 127) ||
920 					(ip[0] == 169 && ip[1] == 254)
921 				) {
922 					RETURN_VALIDATION_FAILED
923 				}
924 			}
925 
926 			if (flags & FILTER_FLAG_GLOBAL_RANGE) {
927 				if (
928 						(ip[0] == 100 && ip[1] >= 64 && ip[1] <= 127 ) ||
929 						(ip[0] == 192 && ip[1] == 0 && ip[2] == 0 ) ||
930 						(ip[0] == 192 && ip[1] == 0 && ip[2] == 2 ) ||
931 						(ip[0] == 198 && ip[1] >= 18 && ip[1] <= 19 ) ||
932 						(ip[0] == 198 && ip[1] == 51 && ip[2] == 100 ) ||
933 						(ip[0] == 203 && ip[1] == 0 && ip[2] == 113 )
934 		   ) {
935 					RETURN_VALIDATION_FAILED
936 				}
937 			}
938 
939 			break;
940 
941 		case FORMAT_IPV6:
942 			{
943 				int res = 0;
944 				res = _php_filter_validate_ipv6(Z_STRVAL_P(value), Z_STRLEN_P(value), ip);
945 				if (res < 1) {
946 					RETURN_VALIDATION_FAILED
947 				}
948 				/* Check flags */
949 				if (flags & FILTER_FLAG_NO_PRIV_RANGE || flags & FILTER_FLAG_GLOBAL_RANGE) {
950 					if (ip[0] >= 0xfc00 && ip[0] <= 0xfdff) {
951 						RETURN_VALIDATION_FAILED
952 					}
953 				}
954 				if (flags & FILTER_FLAG_NO_RES_RANGE || flags & FILTER_FLAG_GLOBAL_RANGE) {
955 					if ((ip[0] == 0 && ip[1] == 0 && ip[2] == 0 && ip[3] == 0
956 							&& ip[4] == 0 && ip[5] == 0 && ip[6] == 0 && (ip[7] == 0 || ip[7] == 1))
957 						|| (ip[0] == 0x5f)
958 						|| (ip[0] >= 0xfe80 && ip[0] <= 0xfebf)
959 						|| (ip[0] == 0x2001 && (ip[1] == 0x0db8 || (ip[1] >= 0x0010 && ip[1] <= 0x001f)))
960 						|| (ip[0] == 0x3ff3)
961 								) {
962 									RETURN_VALIDATION_FAILED
963 								}
964 				}
965 				if (flags & FILTER_FLAG_GLOBAL_RANGE) {
966 					if ((ip[0] == 0 && ip[1] == 0 && ip[2] == 0 && ip[3] == 0 && ip[4] == 0 && ip[5] == 0xffff) ||
967 							(ip[0] == 0x0100 && ip[1] == 0 && ip[2] == 0 && ip[3] == 0) ||
968 							(ip[0] == 0x2001 && ip[1] <= 0x01ff) ||
969 							(ip[0] == 0x2001 && ip[1] == 0x0002 && ip[2] == 0) ||
970 							(ip[0] >= 0xfc00 && ip[0] <= 0xfdff)
971 					   ) {
972 						RETURN_VALIDATION_FAILED
973 					}
974 				}
975 			}
976 			break;
977 	}
978 }
979 /* }}} */
980 
php_filter_validate_mac(PHP_INPUT_FILTER_PARAM_DECL)981 void php_filter_validate_mac(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
982 {
983 	char *input = Z_STRVAL_P(value);
984 	size_t input_len = Z_STRLEN_P(value);
985 	int tokens, length, i, offset, exp_separator_set;
986 	size_t exp_separator_len;
987 	char separator;
988 	char *exp_separator;
989 	zend_long ret = 0;
990 	zval *option_val;
991 
992 	FETCH_STRING_OPTION(exp_separator, "separator");
993 
994 	if (exp_separator_set && exp_separator_len != 1) {
995 		zend_value_error("%s(): \"separator\" option must be one character long", get_active_function_name());
996 		RETURN_VALIDATION_FAILED;
997 	}
998 
999 	if (14 == input_len) {
1000 		/* EUI-64 format: Four hexadecimal digits separated by dots. Less
1001 		 * commonly used but valid nonetheless.
1002 		 */
1003 		tokens = 3;
1004 		length = 4;
1005 		separator = '.';
1006 	} else if (17 == input_len && input[2] == '-') {
1007 		/* IEEE 802 format: Six hexadecimal digits separated by hyphens. */
1008 		tokens = 6;
1009 		length = 2;
1010 		separator = '-';
1011 	} else if (17 == input_len && input[2] == ':') {
1012 		/* IEEE 802 format: Six hexadecimal digits separated by colons. */
1013 		tokens = 6;
1014 		length = 2;
1015 		separator = ':';
1016 	} else {
1017 		RETURN_VALIDATION_FAILED;
1018 	}
1019 
1020 	if (exp_separator_set && separator != exp_separator[0]) {
1021 		RETURN_VALIDATION_FAILED;
1022 	}
1023 
1024 	/* Essentially what we now have is a set of tokens each consisting of
1025 	 * a hexadecimal number followed by a separator character. (With the
1026 	 * exception of the last token which does not have the separator.)
1027 	 */
1028 	for (i = 0; i < tokens; i++) {
1029 		offset = i * (length + 1);
1030 
1031 		if (i < tokens - 1 && input[offset + length] != separator) {
1032 			/* The current token did not end with e.g. a "." */
1033 			RETURN_VALIDATION_FAILED
1034 		}
1035 		if (php_filter_parse_hex(input + offset, length, &ret) < 0) {
1036 			/* The current token is no valid hexadecimal digit */
1037 			RETURN_VALIDATION_FAILED
1038 		}
1039 	}
1040 }
1041 /* }}} */
1042