xref: /PHP-7.4/ext/filter/logical_filters.c (revision dce5e561)
1 /*
2   +----------------------------------------------------------------------+
3   | PHP Version 7                                                        |
4   +----------------------------------------------------------------------+
5   | Copyright (c) The PHP Group                                          |
6   +----------------------------------------------------------------------+
7   | This source file is subject to version 3.01 of the PHP license,      |
8   | that is bundled with this package in the file LICENSE, and is        |
9   | available through the world-wide-web at the following url:           |
10   | http://www.php.net/license/3_01.txt                                  |
11   | If you did not receive a copy of the PHP license and are unable to   |
12   | obtain it through the world-wide-web, please send a note to          |
13   | license@php.net so we can mail you a copy immediately.               |
14   +----------------------------------------------------------------------+
15   | Authors: Derick Rethans <derick@php.net>                             |
16   |          Pierre-A. Joye <pierre@php.net>                             |
17   |          Kévin Dunglas <dunglas@gmail.com>                           |
18   +----------------------------------------------------------------------+
19 */
20 
21 #include "php_filter.h"
22 #include "filter_private.h"
23 #include "ext/standard/url.h"
24 #include "ext/pcre/php_pcre.h"
25 
26 #include "zend_multiply.h"
27 
28 #if HAVE_ARPA_INET_H
29 # include <arpa/inet.h>
30 #endif
31 
32 #ifndef INADDR_NONE
33 # define INADDR_NONE ((unsigned long int) -1)
34 #endif
35 
36 
37 /* {{{ FETCH_DOUBLE_OPTION(var_name, option_name) */
38 #define FETCH_DOUBLE_OPTION(var_name, option_name) \
39    	var_name = 0; \
40 	var_name##_set = 0; \
41 	if (option_array) { \
42 		if ((option_val = zend_hash_str_find(Z_ARRVAL_P(option_array), option_name, sizeof(option_name) - 1)) != NULL) {	\
43 			var_name = zval_get_double(option_val); \
44 			var_name##_set = 1; \
45 		} \
46 	}
47 /* }}} */
48 
49 /* {{{ FETCH_LONG_OPTION(var_name, option_name) */
50 #define FETCH_LONG_OPTION(var_name, option_name) \
51    	var_name = 0; \
52 	var_name##_set = 0; \
53 	if (option_array) { \
54 		if ((option_val = zend_hash_str_find(Z_ARRVAL_P(option_array), option_name, sizeof(option_name) - 1)) != NULL) {	\
55 			var_name = zval_get_long(option_val); \
56 			var_name##_set = 1; \
57 		} \
58 	}
59 /* }}} */
60 
61 /* {{{ FETCH_STRING_OPTION(var_name, option_name) */
62 #define FETCH_STRING_OPTION(var_name, option_name) \
63 	var_name = NULL; \
64 	var_name##_set = 0; \
65 	var_name##_len = 0; \
66 	if (option_array) { \
67 		if ((option_val = zend_hash_str_find_deref(Z_ARRVAL_P(option_array), option_name, sizeof(option_name) - 1)) != NULL) { \
68 			if (Z_TYPE_P(option_val) == IS_STRING) { \
69 				var_name = Z_STRVAL_P(option_val); \
70 				var_name##_len = Z_STRLEN_P(option_val); \
71 				var_name##_set = 1; \
72 			} \
73 		} \
74 	}
75 /* }}} */
76 
77 /* {{{ FETCH_STR_OPTION(var_name, option_name) */
78 #define FETCH_STR_OPTION(var_name, option_name) \
79 	var_name = NULL; \
80 	var_name##_set = 0; \
81 	if (option_array) { \
82 		if ((option_val = zend_hash_str_find_deref(Z_ARRVAL_P(option_array), option_name, sizeof(option_name) - 1)) != NULL) { \
83 			if (Z_TYPE_P(option_val) == IS_STRING) { \
84 				var_name = Z_STR_P(option_val); \
85 				var_name##_set = 1; \
86 			} \
87 		} \
88 	}
89 /* }}} */
90 
91 #define FORMAT_IPV4    4
92 #define FORMAT_IPV6    6
93 
94 static int _php_filter_validate_ipv6(char *str, size_t str_len, int ip[8]);
95 
php_filter_parse_int(const char * str,size_t str_len,zend_long * ret)96 static int php_filter_parse_int(const char *str, size_t str_len, zend_long *ret) { /* {{{ */
97 	zend_long ctx_value;
98 	int sign = 0, digit = 0;
99 	const char *end = str + str_len;
100 
101 	switch (*str) {
102 		case '-':
103 			sign = 1;
104 		case '+':
105 			str++;
106 		default:
107 			break;
108 	}
109 
110 	if (*str == '0' && str + 1 == end) {
111 		/* Special cases: +0 and -0 */
112 		return 1;
113 	}
114 
115 	/* must start with 1..9*/
116 	if (str < end && *str >= '1' && *str <= '9') {
117 		ctx_value = ((sign)?-1:1) * ((*(str++)) - '0');
118 	} else {
119 		return -1;
120 	}
121 
122 	if ((end - str > MAX_LENGTH_OF_LONG - 1) /* number too long */
123 	 || (SIZEOF_LONG == 4 && (end - str == MAX_LENGTH_OF_LONG - 1) && *str > '2')) {
124 		/* overflow */
125 		return -1;
126 	}
127 
128 	while (str < end) {
129 		if (*str >= '0' && *str <= '9') {
130 			digit = (*(str++) - '0');
131 			if ( (!sign) && ctx_value <= (ZEND_LONG_MAX-digit)/10 ) {
132 				ctx_value = (ctx_value * 10) + digit;
133 			} else if ( sign && ctx_value >= (ZEND_LONG_MIN+digit)/10) {
134 				ctx_value = (ctx_value * 10) - digit;
135 			} else {
136 				return -1;
137 			}
138 		} else {
139 			return -1;
140 		}
141 	}
142 
143 	*ret = ctx_value;
144 	return 1;
145 }
146 /* }}} */
147 
php_filter_parse_octal(const char * str,size_t str_len,zend_long * ret)148 static int php_filter_parse_octal(const char *str, size_t str_len, zend_long *ret) { /* {{{ */
149 	zend_ulong ctx_value = 0;
150 	const char *end = str + str_len;
151 
152 	while (str < end) {
153 		if (*str >= '0' && *str <= '7') {
154 			zend_ulong n = ((*(str++)) - '0');
155 
156 			if ((ctx_value > ((zend_ulong)(~(zend_long)0)) / 8) ||
157 				((ctx_value = ctx_value * 8) > ((zend_ulong)(~(zend_long)0)) - n)) {
158 				return -1;
159 			}
160 			ctx_value += n;
161 		} else {
162 			return -1;
163 		}
164 	}
165 
166 	*ret = (zend_long)ctx_value;
167 	return 1;
168 }
169 /* }}} */
170 
php_filter_parse_hex(const char * str,size_t str_len,zend_long * ret)171 static int php_filter_parse_hex(const char *str, size_t str_len, zend_long *ret) { /* {{{ */
172 	zend_ulong ctx_value = 0;
173 	const char *end = str + str_len;
174 	zend_ulong n;
175 
176 	while (str < end) {
177 		if (*str >= '0' && *str <= '9') {
178 			n = ((*(str++)) - '0');
179 		} else if (*str >= 'a' && *str <= 'f') {
180 			n = ((*(str++)) - ('a' - 10));
181 		} else if (*str >= 'A' && *str <= 'F') {
182 			n = ((*(str++)) - ('A' - 10));
183 		} else {
184 			return -1;
185 		}
186 		if ((ctx_value > ((zend_ulong)(~(zend_long)0)) / 16) ||
187 			((ctx_value = ctx_value * 16) > ((zend_ulong)(~(zend_long)0)) - n)) {
188 			return -1;
189 		}
190 		ctx_value += n;
191 	}
192 
193 	*ret = (zend_long)ctx_value;
194 	return 1;
195 }
196 /* }}} */
197 
php_filter_int(PHP_INPUT_FILTER_PARAM_DECL)198 void php_filter_int(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
199 {
200 	zval *option_val;
201 	zend_long  min_range, max_range, option_flags;
202 	int   min_range_set, max_range_set;
203 	int   allow_octal = 0, allow_hex = 0;
204 	size_t	  len;
205 	int error = 0;
206 	zend_long  ctx_value;
207 	char *p;
208 
209 	/* Parse options */
210 	FETCH_LONG_OPTION(min_range,    "min_range");
211 	FETCH_LONG_OPTION(max_range,    "max_range");
212 	option_flags = flags;
213 
214 	len = Z_STRLEN_P(value);
215 
216 	if (len == 0) {
217 		RETURN_VALIDATION_FAILED
218 	}
219 
220 	if (option_flags & FILTER_FLAG_ALLOW_OCTAL) {
221 		allow_octal = 1;
222 	}
223 
224 	if (option_flags & FILTER_FLAG_ALLOW_HEX) {
225 		allow_hex = 1;
226 	}
227 
228 	/* Start the validating loop */
229 	p = Z_STRVAL_P(value);
230 	ctx_value = 0;
231 
232 	PHP_FILTER_TRIM_DEFAULT(p, len);
233 
234 	if (*p == '0') {
235 		p++; len--;
236 		if (allow_hex && (*p == 'x' || *p == 'X')) {
237 			p++; len--;
238 			if (php_filter_parse_hex(p, len, &ctx_value) < 0) {
239 				error = 1;
240 			}
241 		} else if (allow_octal) {
242 			if (php_filter_parse_octal(p, len, &ctx_value) < 0) {
243 				error = 1;
244 			}
245 		} else if (len != 0) {
246 			error = 1;
247 		}
248 	} else {
249 		if (php_filter_parse_int(p, len, &ctx_value) < 0) {
250 			error = 1;
251 		}
252 	}
253 
254 	if (error > 0 || (min_range_set && (ctx_value < min_range)) || (max_range_set && (ctx_value > max_range))) {
255 		RETURN_VALIDATION_FAILED
256 	} else {
257 		zval_ptr_dtor(value);
258 		ZVAL_LONG(value, ctx_value);
259 		return;
260 	}
261 }
262 /* }}} */
263 
php_filter_boolean(PHP_INPUT_FILTER_PARAM_DECL)264 void php_filter_boolean(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
265 {
266 	char *str = Z_STRVAL_P(value);
267 	size_t len = Z_STRLEN_P(value);
268 	int ret;
269 
270 	PHP_FILTER_TRIM_DEFAULT_EX(str, len, 0);
271 
272 	/* returns true for "1", "true", "on" and "yes"
273 	 * returns false for "0", "false", "off", "no", and ""
274 	 * null otherwise. */
275 	switch (len) {
276 		case 0:
277 			ret = 0;
278 			break;
279 		case 1:
280 			if (*str == '1') {
281 				ret = 1;
282 			} else if (*str == '0') {
283 				ret = 0;
284 			} else {
285 				ret = -1;
286 			}
287 			break;
288 		case 2:
289 			if (strncasecmp(str, "on", 2) == 0) {
290 				ret = 1;
291 			} else if (strncasecmp(str, "no", 2) == 0) {
292 				ret = 0;
293 			} else {
294 				ret = -1;
295 			}
296 			break;
297 		case 3:
298 			if (strncasecmp(str, "yes", 3) == 0) {
299 				ret = 1;
300 			} else if (strncasecmp(str, "off", 3) == 0) {
301 				ret = 0;
302 			} else {
303 				ret = -1;
304 			}
305 			break;
306 		case 4:
307 			if (strncasecmp(str, "true", 4) == 0) {
308 				ret = 1;
309 			} else {
310 				ret = -1;
311 			}
312 			break;
313 		case 5:
314 			if (strncasecmp(str, "false", 5) == 0) {
315 				ret = 0;
316 			} else {
317 				ret = -1;
318 			}
319 			break;
320 		default:
321 			ret = -1;
322 	}
323 
324 	if (ret == -1) {
325 		RETURN_VALIDATION_FAILED
326 	} else {
327 		zval_ptr_dtor(value);
328 		ZVAL_BOOL(value, ret);
329 	}
330 }
331 /* }}} */
332 
php_filter_float(PHP_INPUT_FILTER_PARAM_DECL)333 void php_filter_float(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
334 {
335 	size_t len;
336 	char *str, *end;
337 	char *num, *p;
338 	zval *option_val;
339 	char *decimal;
340 	int decimal_set;
341 	size_t decimal_len;
342 	char dec_sep = '.';
343 	char *thousand;
344 	int thousand_set;
345 	size_t thousand_len;
346 	char *tsd_sep;
347 
348 	zend_long lval;
349 	double dval;
350 	double min_range, max_range;
351 	int   min_range_set, max_range_set;
352 
353 	int first, n;
354 
355 	len = Z_STRLEN_P(value);
356 	str = Z_STRVAL_P(value);
357 
358 	PHP_FILTER_TRIM_DEFAULT(str, len);
359 	end = str + len;
360 
361 	FETCH_STRING_OPTION(decimal, "decimal");
362 
363 	if (decimal_set) {
364 		if (decimal_len != 1) {
365 			php_error_docref(NULL, E_WARNING, "decimal separator must be one char");
366 			RETURN_VALIDATION_FAILED
367 		} else {
368 			dec_sep = *decimal;
369 		}
370 	}
371 
372 	FETCH_STRING_OPTION(thousand, "thousand");
373 
374 	if (thousand_set) {
375 		if (thousand_len < 1) {
376 			php_error_docref(NULL, E_WARNING, "thousand separator must be at least one char");
377 			RETURN_VALIDATION_FAILED
378 		} else {
379 			tsd_sep = thousand;
380 		}
381 	} else {
382 		tsd_sep = "',.";
383 	}
384 
385 	FETCH_DOUBLE_OPTION(min_range, "min_range");
386 	FETCH_DOUBLE_OPTION(max_range, "max_range");
387 
388 	num = p = emalloc(len+1);
389 	if (str < end && (*str == '+' || *str == '-')) {
390 		*p++ = *str++;
391 	}
392 	first = 1;
393 	while (1) {
394 		n = 0;
395 		while (str < end && *str >= '0' && *str <= '9') {
396 			++n;
397 			*p++ = *str++;
398 		}
399 		if (str == end || *str == dec_sep || *str == 'e' || *str == 'E') {
400 			if (!first && n != 3) {
401 				goto error;
402 			}
403 			if (*str == dec_sep) {
404 				*p++ = '.';
405 				str++;
406 				while (str < end && *str >= '0' && *str <= '9') {
407 					*p++ = *str++;
408 				}
409 			}
410 			if (*str == 'e' || *str == 'E') {
411 				*p++ = *str++;
412 				if (str < end && (*str == '+' || *str == '-')) {
413 					*p++ = *str++;
414 				}
415 				while (str < end && *str >= '0' && *str <= '9') {
416 					*p++ = *str++;
417 				}
418 			}
419 			break;
420 		}
421 		if ((flags & FILTER_FLAG_ALLOW_THOUSAND) && strchr(tsd_sep, *str)) {
422 			if (first?(n < 1 || n > 3):(n != 3)) {
423 				goto error;
424 			}
425 			first = 0;
426 			str++;
427 		} else {
428 			goto error;
429 		}
430 	}
431 	if (str != end) {
432 		goto error;
433 	}
434 	*p = 0;
435 
436 	switch (is_numeric_string(num, p - num, &lval, &dval, 0)) {
437 		case IS_LONG:
438 			if ((min_range_set && (lval < min_range)) || (max_range_set && (lval > max_range))) {
439 				goto error;
440 			}
441 			zval_ptr_dtor(value);
442 			ZVAL_DOUBLE(value, (double)lval);
443 			break;
444 		case IS_DOUBLE:
445 			if ((!dval && p - num > 1 && strpbrk(num, "123456789")) || !zend_finite(dval)) {
446 				goto error;
447 			}
448 			if ((min_range_set && (dval < min_range)) || (max_range_set && (dval > max_range))) {
449 				goto error;
450 			}
451 			zval_ptr_dtor(value);
452 			ZVAL_DOUBLE(value, dval);
453 			break;
454 		default:
455 error:
456 			efree(num);
457 			RETURN_VALIDATION_FAILED
458 	}
459 	efree(num);
460 }
461 /* }}} */
462 
php_filter_validate_regexp(PHP_INPUT_FILTER_PARAM_DECL)463 void php_filter_validate_regexp(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
464 {
465 	zval *option_val;
466 	zend_string *regexp;
467 	int regexp_set;
468 	pcre2_code *re = NULL;
469 	pcre2_match_data *match_data = NULL;
470 	uint32_t capture_count;
471 	int rc;
472 
473 	/* Parse options */
474 	FETCH_STR_OPTION(regexp, "regexp");
475 
476 	if (!regexp_set) {
477 		php_error_docref(NULL, E_WARNING, "'regexp' option missing");
478 		RETURN_VALIDATION_FAILED
479 	}
480 
481 	re = pcre_get_compiled_regex(regexp, &capture_count);
482 	if (!re) {
483 		RETURN_VALIDATION_FAILED
484 	}
485 	match_data = php_pcre_create_match_data(capture_count, re);
486 	if (!match_data) {
487 		RETURN_VALIDATION_FAILED
488 	}
489 	rc = pcre2_match(re, (PCRE2_SPTR)Z_STRVAL_P(value), Z_STRLEN_P(value), 0, 0, match_data, php_pcre_mctx());
490 	php_pcre_free_match_data(match_data);
491 
492 	/* 0 means that the vector is too small to hold all the captured substring offsets */
493 	if (rc < 0) {
494 		RETURN_VALIDATION_FAILED
495 	}
496 }
497 
_php_filter_validate_domain(char * domain,int len,zend_long flags)498 static int _php_filter_validate_domain(char * domain, int len, zend_long flags) /* {{{ */
499 {
500 	char *e, *s, *t;
501 	size_t l;
502 	int hostname = flags & FILTER_FLAG_HOSTNAME;
503 	unsigned char i = 1;
504 
505 	s = domain;
506 	l = len;
507 	e = domain + l;
508 	t = e - 1;
509 
510 	/* Ignore trailing dot */
511 	if (*t == '.') {
512 		e = t;
513 		l--;
514 	}
515 
516 	/* The total length cannot exceed 253 characters (final dot not included) */
517 	if (l > 253) {
518 		return 0;
519 	}
520 
521 	/* First char must be alphanumeric */
522 	if(*s == '.' || (hostname && !isalnum((int)*(unsigned char *)s))) {
523 		return 0;
524 	}
525 
526 	while (s < e) {
527 		if (*s == '.') {
528 			/* The first and the last character of a label must be alphanumeric */
529 			if (*(s + 1) == '.' || (hostname && (!isalnum((int)*(unsigned char *)(s - 1)) || !isalnum((int)*(unsigned char *)(s + 1))))) {
530 				return 0;
531 			}
532 
533 			/* Reset label length counter */
534 			i = 1;
535 		} else {
536 			if (i > 63 || (hostname && *s != '-' && !isalnum((int)*(unsigned char *)s))) {
537 				return 0;
538 			}
539 
540 			i++;
541 		}
542 
543 		s++;
544 	}
545 
546 	return 1;
547 }
548 /* }}} */
549 
php_filter_validate_domain(PHP_INPUT_FILTER_PARAM_DECL)550 void php_filter_validate_domain(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
551 {
552 	if (!_php_filter_validate_domain(Z_STRVAL_P(value), Z_STRLEN_P(value), flags)) {
553 		RETURN_VALIDATION_FAILED
554 	}
555 }
556 /* }}} */
557 
is_userinfo_valid(zend_string * str)558 static int is_userinfo_valid(zend_string *str)
559 {
560 	const char *valid = "-._~!$&'()*+,;=:";
561 	const char *p = ZSTR_VAL(str);
562 	while (p - ZSTR_VAL(str) < ZSTR_LEN(str)) {
563 		if (isalpha(*p) || isdigit(*p) || strchr(valid, *p)) {
564 			p++;
565 		} else if (*p == '%' && p - ZSTR_VAL(str) <= ZSTR_LEN(str) - 3 && isdigit(*(p+1)) && isxdigit(*(p+2))) {
566 			p += 3;
567 		} else {
568 			return 0;
569 		}
570 	}
571 	return 1;
572 }
573 
php_filter_validate_url(PHP_INPUT_FILTER_PARAM_DECL)574 void php_filter_validate_url(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
575 {
576 	php_url *url;
577 	size_t old_len = Z_STRLEN_P(value);
578 
579 	if (flags & (FILTER_FLAG_SCHEME_REQUIRED | FILTER_FLAG_HOST_REQUIRED)) {
580 		php_error_docref(NULL, E_DEPRECATED,
581 			"explicit use of FILTER_FLAG_SCHEME_REQUIRED and FILTER_FLAG_HOST_REQUIRED is deprecated");
582 	}
583 
584 	php_filter_url(value, flags, option_array, charset);
585 
586 	if (Z_TYPE_P(value) != IS_STRING || old_len != Z_STRLEN_P(value)) {
587 		RETURN_VALIDATION_FAILED
588 	}
589 
590 	/* Use parse_url - if it returns false, we return NULL */
591 	url = php_url_parse_ex(Z_STRVAL_P(value), Z_STRLEN_P(value));
592 
593 	if (url == NULL) {
594 		RETURN_VALIDATION_FAILED
595 	}
596 
597 	if (url->scheme != NULL &&
598 		(zend_string_equals_literal_ci(url->scheme, "http") || zend_string_equals_literal_ci(url->scheme, "https"))) {
599 		char *e, *s, *t;
600 		size_t l;
601 
602 		if (url->host == NULL) {
603 			goto bad_url;
604 		}
605 
606 		s = ZSTR_VAL(url->host);
607 		l = ZSTR_LEN(url->host);
608 		e = s + l;
609 		t = e - 1;
610 
611 		/* An IPv6 enclosed by square brackets is a valid hostname */
612 		if (*s == '[' && *t == ']' && _php_filter_validate_ipv6((s + 1), l - 2, NULL)) {
613 			php_url_free(url);
614 			return;
615 		}
616 
617 		// Validate domain
618 		if (!_php_filter_validate_domain(ZSTR_VAL(url->host), l, FILTER_FLAG_HOSTNAME)) {
619 			php_url_free(url);
620 			RETURN_VALIDATION_FAILED
621 		}
622 	}
623 
624 	if (
625 		url->scheme == NULL ||
626 		/* some schemas allow the host to be empty */
627 		(url->host == NULL && (strcmp(ZSTR_VAL(url->scheme), "mailto") && strcmp(ZSTR_VAL(url->scheme), "news") && strcmp(ZSTR_VAL(url->scheme), "file"))) ||
628 		((flags & FILTER_FLAG_PATH_REQUIRED) && url->path == NULL) || ((flags & FILTER_FLAG_QUERY_REQUIRED) && url->query == NULL)
629 	) {
630 bad_url:
631 		php_url_free(url);
632 		RETURN_VALIDATION_FAILED
633 	}
634 
635 	if ((url->user != NULL && !is_userinfo_valid(url->user))
636 		|| (url->pass != NULL && !is_userinfo_valid(url->pass))
637 	) {
638 		php_url_free(url);
639 		RETURN_VALIDATION_FAILED
640 
641 	}
642 
643 	php_url_free(url);
644 }
645 /* }}} */
646 
php_filter_validate_email(PHP_INPUT_FILTER_PARAM_DECL)647 void php_filter_validate_email(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
648 {
649 	/*
650 	 * The regex below is based on a regex by Michael Rushton.
651 	 * However, it is not identical.  I changed it to only consider routeable
652 	 * addresses as valid.  Michael's regex considers a@b a valid address
653 	 * which conflicts with section 2.3.5 of RFC 5321 which states that:
654 	 *
655 	 *   Only resolvable, fully-qualified domain names (FQDNs) are permitted
656 	 *   when domain names are used in SMTP.  In other words, names that can
657 	 *   be resolved to MX RRs or address (i.e., A or AAAA) RRs (as discussed
658 	 *   in Section 5) are permitted, as are CNAME RRs whose targets can be
659 	 *   resolved, in turn, to MX or address RRs.  Local nicknames or
660 	 *   unqualified names MUST NOT be used.
661 	 *
662 	 * This regex does not handle comments and folding whitespace.  While
663 	 * this is technically valid in an email address, these parts aren't
664 	 * actually part of the address itself.
665 	 *
666 	 * Michael's regex carries this copyright:
667 	 *
668 	 * Copyright © Michael Rushton 2009-10
669 	 * http://squiloople.com/
670 	 * Feel free to use and redistribute this code. But please keep this copyright notice.
671 	 *
672 	 */
673 	pcre2_code *re = NULL;
674 	pcre2_match_data *match_data = NULL;
675 	uint32_t capture_count;
676 	zend_string *sregexp;
677 	int rc;
678 	const char regexp0[] = "/^(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){255,})(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){65,}@)(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E\\pL\\pN]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F\\pL\\pN]|(?:\\x5C[\\x00-\\x7F]))*\\x22))(?:\\.(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E\\pL\\pN]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F\\pL\\pN]|(?:\\x5C[\\x00-\\x7F]))*\\x22)))*@(?:(?:(?!.*[^.]{64,})(?:(?:(?:xn--)?[a-z0-9]+(?:-+[a-z0-9]+)*\\.){1,126}){1,}(?:(?:[a-z][a-z0-9]*)|(?:(?:xn--)[a-z0-9]+))(?:-+[a-z0-9]+)*)|(?:\\[(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){7})|(?:(?!(?:.*[a-f0-9][:\\]]){7,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?)))|(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){5}:)|(?:(?!(?:.*[a-f0-9]:){5,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3}:)?)))?(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))(?:\\.(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))){3}))\\]))$/iDu";
679 	const char regexp1[] = "/^(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){255,})(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){65,}@)(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F]|(?:\\x5C[\\x00-\\x7F]))*\\x22))(?:\\.(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F]|(?:\\x5C[\\x00-\\x7F]))*\\x22)))*@(?:(?:(?!.*[^.]{64,})(?:(?:(?:xn--)?[a-z0-9]+(?:-+[a-z0-9]+)*\\.){1,126}){1,}(?:(?:[a-z][a-z0-9]*)|(?:(?:xn--)[a-z0-9]+))(?:-+[a-z0-9]+)*)|(?:\\[(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){7})|(?:(?!(?:.*[a-f0-9][:\\]]){7,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?)))|(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){5}:)|(?:(?!(?:.*[a-f0-9]:){5,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3}:)?)))?(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))(?:\\.(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))){3}))\\]))$/iD";
680 	const char *regexp;
681 	size_t regexp_len;
682 
683 	if (flags & FILTER_FLAG_EMAIL_UNICODE) {
684 		regexp = regexp0;
685 		regexp_len = sizeof(regexp0) - 1;
686 	} else {
687 		regexp = regexp1;
688 		regexp_len = sizeof(regexp1) - 1;
689 	}
690 
691 	/* The maximum length of an e-mail address is 320 octets, per RFC 2821. */
692 	if (Z_STRLEN_P(value) > 320) {
693 		RETURN_VALIDATION_FAILED
694 	}
695 
696 	sregexp = zend_string_init(regexp, regexp_len, 0);
697 	re = pcre_get_compiled_regex(sregexp, &capture_count);
698 	zend_string_release_ex(sregexp, 0);
699 	if (!re) {
700 		RETURN_VALIDATION_FAILED
701 	}
702 	match_data = php_pcre_create_match_data(capture_count, re);
703 	if (!match_data) {
704 		RETURN_VALIDATION_FAILED
705 	}
706 	rc = pcre2_match(re, (PCRE2_SPTR)Z_STRVAL_P(value), Z_STRLEN_P(value), 0, 0, match_data, php_pcre_mctx());
707 	php_pcre_free_match_data(match_data);
708 
709 	/* 0 means that the vector is too small to hold all the captured substring offsets */
710 	if (rc < 0) {
711 		RETURN_VALIDATION_FAILED
712 	}
713 
714 }
715 /* }}} */
716 
_php_filter_validate_ipv4(char * str,size_t str_len,int * ip)717 static int _php_filter_validate_ipv4(char *str, size_t str_len, int *ip) /* {{{ */
718 {
719 	const char *end = str + str_len;
720 	int num, m;
721 	int n = 0;
722 
723 	while (str < end) {
724 		int leading_zero;
725 		if (*str < '0' || *str > '9') {
726 			return 0;
727 		}
728 		leading_zero = (*str == '0');
729 		m = 1;
730 		num = ((*(str++)) - '0');
731 		while (str < end && (*str >= '0' && *str <= '9')) {
732 			num = num * 10 + ((*(str++)) - '0');
733 			if (num > 255 || ++m > 3) {
734 				return 0;
735 			}
736 		}
737 		/* don't allow a leading 0; that introduces octal numbers,
738 		 * which we don't support */
739 		if (leading_zero && (num != 0 || m > 1))
740 			return 0;
741 		ip[n++] = num;
742 		if (n == 4) {
743 			return str == end;
744 		} else if (str >= end || *(str++) != '.') {
745 			return 0;
746 		}
747 	}
748 	return 0;
749 }
750 /* }}} */
751 
_php_filter_validate_ipv6(char * str,size_t str_len,int ip[8])752 static int _php_filter_validate_ipv6(char *str, size_t str_len, int ip[8]) /* {{{ */
753 {
754 	int compressed_pos = -1;
755 	int blocks = 0;
756 	int num, n, i;
757 	char *ipv4;
758 	char *end;
759 	int ip4elm[4];
760 	char *s = str;
761 
762 	if (!memchr(str, ':', str_len)) {
763 		return 0;
764 	}
765 
766 	/* check for bundled IPv4 */
767 	ipv4 = memchr(str, '.', str_len);
768 	if (ipv4) {
769  		while (ipv4 > str && *(ipv4-1) != ':') {
770 			ipv4--;
771 		}
772 
773 		if (!_php_filter_validate_ipv4(ipv4, (str_len - (ipv4 - str)), ip4elm)) {
774 			return 0;
775 		}
776 
777 		str_len = ipv4 - str; /* length excluding ipv4 */
778 		if (str_len < 2) {
779 			return 0;
780 		}
781 
782 		if (ipv4[-2] != ':') {
783 			/* don't include : before ipv4 unless it's a :: */
784 			str_len--;
785 		}
786 
787 		blocks = 2;
788 	}
789 
790 	end = str + str_len;
791 
792 	while (str < end) {
793 		if (*str == ':') {
794 			if (++str >= end) {
795 				/* cannot end in : without previous : */
796 				return 0;
797 			}
798 			if (*str == ':') {
799 				if (compressed_pos >= 0) {
800 					return 0;
801 				}
802 				if (ip && blocks < 8) {
803 					ip[blocks] = -1;
804 				}
805 				compressed_pos = blocks++; /* :: means 1 or more 16-bit 0 blocks */
806 				if (++str == end) {
807 					if (blocks > 8) {
808 						return 0;
809 					}
810 					goto fixup_ip;
811 				}
812 			} else if ((str - 1) == s) {
813 				/* don't allow leading : without another : following */
814 				return 0;
815 			}
816 		}
817 		num = n = 0;
818 		while (str < end) {
819 			if (*str >= '0' && *str <= '9') {
820 				num = 16 * num + (*str - '0');
821 			} else if (*str >= 'a' && *str <= 'f') {
822 				num = 16 * num + (*str - 'a') + 10;
823 			} else if (*str >= 'A' && *str <= 'F') {
824 				num = 16 * num + (*str - 'A') + 10;
825 			} else {
826 				break;
827 			}
828 			n++;
829 			str++;
830 		}
831 		if (ip && blocks < 8) {
832 			ip[blocks] = num;
833 		}
834 		if (n < 1 || n > 4) {
835 			return 0;
836 		}
837 		if (++blocks > 8)
838 			return 0;
839 	}
840 
841 fixup_ip:
842 	if (ip && ipv4) {
843 		for (i = 0; i < 5; i++) {
844 			ip[i] = 0;
845 		}
846 		ip[i++] = 0xffff;
847 		ip[i++] = 256 * ip4elm[0] + ip4elm[1];
848 		ip[i++] = 256 * ip4elm[2] + ip4elm[3];
849 	} else if (ip && compressed_pos >= 0 && blocks <= 8) {
850 		int offset = 8 - blocks;
851 		for (i = 7; i > compressed_pos + offset; i--) {
852 			ip[i] = ip[i - offset];
853 		}
854 		for (i = compressed_pos + offset; i >= compressed_pos; i--) {
855 			ip[i] = 0;
856 		}
857 	}
858 
859 	return (compressed_pos >= 0 && blocks <= 8) || blocks == 8;
860 }
861 /* }}} */
862 
php_filter_validate_ip(PHP_INPUT_FILTER_PARAM_DECL)863 void php_filter_validate_ip(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
864 {
865 	/* validates an ipv4 or ipv6 IP, based on the flag (4, 6, or both) add a
866 	 * flag to throw out reserved ranges; multicast ranges... etc. If both
867 	 * allow_ipv4 and allow_ipv6 flags flag are used, then the first dot or
868 	 * colon determine the format */
869 
870 	int            ip[8];
871 	int            mode;
872 
873 	if (memchr(Z_STRVAL_P(value), ':', Z_STRLEN_P(value))) {
874 		mode = FORMAT_IPV6;
875 	} else if (memchr(Z_STRVAL_P(value), '.', Z_STRLEN_P(value))) {
876 		mode = FORMAT_IPV4;
877 	} else {
878 		RETURN_VALIDATION_FAILED
879 	}
880 
881 	if ((flags & FILTER_FLAG_IPV4) && (flags & FILTER_FLAG_IPV6)) {
882 		/* Both formats are cool */
883 	} else if ((flags & FILTER_FLAG_IPV4) && mode == FORMAT_IPV6) {
884 		RETURN_VALIDATION_FAILED
885 	} else if ((flags & FILTER_FLAG_IPV6) && mode == FORMAT_IPV4) {
886 		RETURN_VALIDATION_FAILED
887 	}
888 
889 	switch (mode) {
890 		case FORMAT_IPV4:
891 			if (!_php_filter_validate_ipv4(Z_STRVAL_P(value), Z_STRLEN_P(value), ip)) {
892 				RETURN_VALIDATION_FAILED
893 			}
894 
895 			/* Check flags */
896 			if (flags & FILTER_FLAG_NO_PRIV_RANGE) {
897 				if (
898 					(ip[0] == 10) ||
899 					(ip[0] == 172 && ip[1] >= 16 && ip[1] <= 31) ||
900 					(ip[0] == 192 && ip[1] == 168)
901 				) {
902 					RETURN_VALIDATION_FAILED
903 				}
904 			}
905 
906 			if (flags & FILTER_FLAG_NO_RES_RANGE) {
907 				if (
908 					(ip[0] == 0) ||
909 					(ip[0] >= 240) ||
910 					(ip[0] == 127) ||
911 					(ip[0] == 169 && ip[1] == 254)
912 				) {
913 					RETURN_VALIDATION_FAILED
914 				}
915 			}
916 			break;
917 
918 		case FORMAT_IPV6:
919 			{
920 				int res = 0;
921 				res = _php_filter_validate_ipv6(Z_STRVAL_P(value), Z_STRLEN_P(value), ip);
922 				if (res < 1) {
923 					RETURN_VALIDATION_FAILED
924 				}
925 				/* Check flags */
926 				if (flags & FILTER_FLAG_NO_PRIV_RANGE) {
927 					if (ip[0] >= 0xfc00 && ip[0] <= 0xfdff) {
928 						RETURN_VALIDATION_FAILED
929 					}
930 				}
931 				if (flags & FILTER_FLAG_NO_RES_RANGE) {
932 					if ((ip[0] == 0 && ip[1] == 0 && ip[2] == 0 && ip[3] == 0
933 						&& ip[4] == 0 && ip[5] == 0 && ip[6] == 0 && (ip[7] == 0 || ip[7] == 1))
934 						|| (ip[0] == 0x5f)
935 						|| (ip[0] >= 0xfe80 && ip[0] <= 0xfebf)
936 						|| ((ip[0] == 0x2001 && ip[1] == 0x0db8) || (ip[1] >= 0x0010 && ip[1] <= 0x001f))
937 						|| (ip[0] == 0x3ff3)
938 					) {
939 						RETURN_VALIDATION_FAILED
940 					}
941 				}
942 			}
943 			break;
944 	}
945 }
946 /* }}} */
947 
php_filter_validate_mac(PHP_INPUT_FILTER_PARAM_DECL)948 void php_filter_validate_mac(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
949 {
950 	char *input = Z_STRVAL_P(value);
951 	size_t input_len = Z_STRLEN_P(value);
952 	int tokens, length, i, offset, exp_separator_set;
953 	size_t exp_separator_len;
954 	char separator;
955 	char *exp_separator;
956 	zend_long ret = 0;
957 	zval *option_val;
958 
959 	FETCH_STRING_OPTION(exp_separator, "separator");
960 
961 	if (exp_separator_set && exp_separator_len != 1) {
962 		php_error_docref(NULL, E_WARNING, "Separator must be exactly one character long");
963 		RETURN_VALIDATION_FAILED;
964 	}
965 
966 	if (14 == input_len) {
967 		/* EUI-64 format: Four hexadecimal digits separated by dots. Less
968 		 * commonly used but valid nonetheless.
969 		 */
970 		tokens = 3;
971 		length = 4;
972 		separator = '.';
973 	} else if (17 == input_len && input[2] == '-') {
974 		/* IEEE 802 format: Six hexadecimal digits separated by hyphens. */
975 		tokens = 6;
976 		length = 2;
977 		separator = '-';
978 	} else if (17 == input_len && input[2] == ':') {
979 		/* IEEE 802 format: Six hexadecimal digits separated by colons. */
980 		tokens = 6;
981 		length = 2;
982 		separator = ':';
983 	} else {
984 		RETURN_VALIDATION_FAILED;
985 	}
986 
987 	if (exp_separator_set && separator != exp_separator[0]) {
988 		RETURN_VALIDATION_FAILED;
989 	}
990 
991 	/* Essentially what we now have is a set of tokens each consisting of
992 	 * a hexadecimal number followed by a separator character. (With the
993 	 * exception of the last token which does not have the separator.)
994 	 */
995 	for (i = 0; i < tokens; i++) {
996 		offset = i * (length + 1);
997 
998 		if (i < tokens - 1 && input[offset + length] != separator) {
999 			/* The current token did not end with e.g. a "." */
1000 			RETURN_VALIDATION_FAILED
1001 		}
1002 		if (php_filter_parse_hex(input + offset, length, &ret) < 0) {
1003 			/* The current token is no valid hexadecimal digit */
1004 			RETURN_VALIDATION_FAILED
1005 		}
1006 	}
1007 }
1008 /* }}} */
1009