xref: /php-src/ext/filter/logical_filters.c (revision d8fc05c0)
1 /*
2   +----------------------------------------------------------------------+
3   | Copyright (c) The PHP Group                                          |
4   +----------------------------------------------------------------------+
5   | This source file is subject to version 3.01 of the PHP license,      |
6   | that is bundled with this package in the file LICENSE, and is        |
7   | available through the world-wide-web at the following url:           |
8   | https://www.php.net/license/3_01.txt                                 |
9   | If you did not receive a copy of the PHP license and are unable to   |
10   | obtain it through the world-wide-web, please send a note to          |
11   | license@php.net so we can mail you a copy immediately.               |
12   +----------------------------------------------------------------------+
13   | Authors: Derick Rethans <derick@php.net>                             |
14   |          Pierre-A. Joye <pierre@php.net>                             |
15   |          Kévin Dunglas <dunglas@gmail.com>                           |
16   +----------------------------------------------------------------------+
17 */
18 
19 #include "php_filter.h"
20 #include "filter_private.h"
21 #include "ext/standard/url.h"
22 #include "ext/pcre/php_pcre.h"
23 
24 #include "zend_multiply.h"
25 
26 #ifdef HAVE_ARPA_INET_H
27 # include <arpa/inet.h>
28 #endif
29 
30 #ifndef INADDR_NONE
31 # define INADDR_NONE ((unsigned long int) -1)
32 #endif
33 
34 
35 /* {{{ FETCH_DOUBLE_OPTION(var_name, option_name) */
36 #define FETCH_DOUBLE_OPTION(var_name, option_name) \
37 	var_name = 0; \
38 	var_name##_set = 0; \
39 	if (option_array) { \
40 		if ((option_val = zend_hash_str_find(Z_ARRVAL_P(option_array), option_name, sizeof(option_name) - 1)) != NULL) {	\
41 			var_name = zval_get_double(option_val); \
42 			var_name##_set = 1; \
43 		} \
44 	}
45 /* }}} */
46 
47 /* {{{ FETCH_LONG_OPTION(var_name, option_name) */
48 #define FETCH_LONG_OPTION(var_name, option_name) \
49 	var_name = 0; \
50 	var_name##_set = 0; \
51 	if (option_array) { \
52 		if ((option_val = zend_hash_str_find(Z_ARRVAL_P(option_array), option_name, sizeof(option_name) - 1)) != NULL) {	\
53 			var_name = zval_get_long(option_val); \
54 			var_name##_set = 1; \
55 		} \
56 	}
57 /* }}} */
58 
59 /* {{{ FETCH_STRING_OPTION(var_name, option_name) */
60 #define FETCH_STRING_OPTION(var_name, option_name) \
61 	var_name = NULL; \
62 	var_name##_set = 0; \
63 	var_name##_len = 0; \
64 	if (option_array) { \
65 		if ((option_val = zend_hash_str_find_deref(Z_ARRVAL_P(option_array), option_name, sizeof(option_name) - 1)) != NULL) { \
66 			if (Z_TYPE_P(option_val) == IS_STRING) { \
67 				var_name = Z_STRVAL_P(option_val); \
68 				var_name##_len = Z_STRLEN_P(option_val); \
69 				var_name##_set = 1; \
70 			} \
71 		} \
72 	}
73 /* }}} */
74 
75 /* {{{ FETCH_STR_OPTION(var_name, option_name) */
76 #define FETCH_STR_OPTION(var_name, option_name) \
77 	var_name = NULL; \
78 	var_name##_set = 0; \
79 	if (option_array) { \
80 		if ((option_val = zend_hash_str_find_deref(Z_ARRVAL_P(option_array), option_name, sizeof(option_name) - 1)) != NULL) { \
81 			if (Z_TYPE_P(option_val) == IS_STRING) { \
82 				var_name = Z_STR_P(option_val); \
83 				var_name##_set = 1; \
84 			} \
85 		} \
86 	}
87 /* }}} */
88 
89 #define FORMAT_IPV4    4
90 #define FORMAT_IPV6    6
91 
92 static int _php_filter_validate_ipv6(char *str, size_t str_len, int ip[8]);
93 
php_filter_parse_int(const char * str,size_t str_len,zend_long * ret)94 static int php_filter_parse_int(const char *str, size_t str_len, zend_long *ret) { /* {{{ */
95 	zend_long ctx_value;
96 	int sign = 0, digit = 0;
97 	const char *end = str + str_len;
98 
99 	switch (*str) {
100 		case '-':
101 			sign = 1;
102 			ZEND_FALLTHROUGH;
103 		case '+':
104 			str++;
105 		default:
106 			break;
107 	}
108 
109 	if (*str == '0' && str + 1 == end) {
110 		/* Special cases: +0 and -0 */
111 		return 1;
112 	}
113 
114 	/* must start with 1..9*/
115 	if (str < end && *str >= '1' && *str <= '9') {
116 		ctx_value = ((sign)?-1:1) * ((*(str++)) - '0');
117 	} else {
118 		return -1;
119 	}
120 
121 	if ((end - str > MAX_LENGTH_OF_LONG - 1) /* number too long */
122 	 || (SIZEOF_LONG == 4 && (end - str == MAX_LENGTH_OF_LONG - 1) && *str > '2')) {
123 		/* overflow */
124 		return -1;
125 	}
126 
127 	while (str < end) {
128 		if (*str >= '0' && *str <= '9') {
129 			digit = (*(str++) - '0');
130 			if ( (!sign) && ctx_value <= (ZEND_LONG_MAX-digit)/10 ) {
131 				ctx_value = (ctx_value * 10) + digit;
132 			} else if ( sign && ctx_value >= (ZEND_LONG_MIN+digit)/10) {
133 				ctx_value = (ctx_value * 10) - digit;
134 			} else {
135 				return -1;
136 			}
137 		} else {
138 			return -1;
139 		}
140 	}
141 
142 	*ret = ctx_value;
143 	return 1;
144 }
145 /* }}} */
146 
php_filter_parse_octal(const char * str,size_t str_len,zend_long * ret)147 static int php_filter_parse_octal(const char *str, size_t str_len, zend_long *ret) { /* {{{ */
148 	zend_ulong ctx_value = 0;
149 	const char *end = str + str_len;
150 
151 	while (str < end) {
152 		if (*str >= '0' && *str <= '7') {
153 			zend_ulong n = ((*(str++)) - '0');
154 
155 			if ((ctx_value > ((zend_ulong)(~(zend_long)0)) / 8) ||
156 				((ctx_value = ctx_value * 8) > ((zend_ulong)(~(zend_long)0)) - n)) {
157 				return -1;
158 			}
159 			ctx_value += n;
160 		} else {
161 			return -1;
162 		}
163 	}
164 
165 	*ret = (zend_long)ctx_value;
166 	return 1;
167 }
168 /* }}} */
169 
php_filter_parse_hex(const char * str,size_t str_len,zend_long * ret)170 static int php_filter_parse_hex(const char *str, size_t str_len, zend_long *ret) { /* {{{ */
171 	zend_ulong ctx_value = 0;
172 	const char *end = str + str_len;
173 	zend_ulong n;
174 
175 	while (str < end) {
176 		if (*str >= '0' && *str <= '9') {
177 			n = ((*(str++)) - '0');
178 		} else if (*str >= 'a' && *str <= 'f') {
179 			n = ((*(str++)) - ('a' - 10));
180 		} else if (*str >= 'A' && *str <= 'F') {
181 			n = ((*(str++)) - ('A' - 10));
182 		} else {
183 			return -1;
184 		}
185 		if ((ctx_value > ((zend_ulong)(~(zend_long)0)) / 16) ||
186 			((ctx_value = ctx_value * 16) > ((zend_ulong)(~(zend_long)0)) - n)) {
187 			return -1;
188 		}
189 		ctx_value += n;
190 	}
191 
192 	*ret = (zend_long)ctx_value;
193 	return 1;
194 }
195 /* }}} */
196 
php_filter_int(PHP_INPUT_FILTER_PARAM_DECL)197 void php_filter_int(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
198 {
199 	zval *option_val;
200 	zend_long  min_range, max_range, option_flags;
201 	int   min_range_set, max_range_set;
202 	int   allow_octal = 0, allow_hex = 0;
203 	size_t	  len;
204 	int error = 0;
205 	zend_long  ctx_value;
206 	char *p;
207 
208 	/* Parse options */
209 	FETCH_LONG_OPTION(min_range,    "min_range");
210 	FETCH_LONG_OPTION(max_range,    "max_range");
211 	option_flags = flags;
212 
213 	len = Z_STRLEN_P(value);
214 
215 	if (len == 0) {
216 		RETURN_VALIDATION_FAILED
217 	}
218 
219 	if (option_flags & FILTER_FLAG_ALLOW_OCTAL) {
220 		allow_octal = 1;
221 	}
222 
223 	if (option_flags & FILTER_FLAG_ALLOW_HEX) {
224 		allow_hex = 1;
225 	}
226 
227 	/* Start the validating loop */
228 	p = Z_STRVAL_P(value);
229 	ctx_value = 0;
230 
231 	PHP_FILTER_TRIM_DEFAULT(p, len);
232 
233 	if (*p == '0') {
234 		p++; len--;
235 		if (allow_hex && (*p == 'x' || *p == 'X')) {
236 			p++; len--;
237 			if (len == 0) {
238 				RETURN_VALIDATION_FAILED
239 			}
240 			if (php_filter_parse_hex(p, len, &ctx_value) < 0) {
241 				error = 1;
242 			}
243 		} else if (allow_octal) {
244 			/* Support explicit octal prefix notation */
245 			if (*p == 'o' || *p == 'O') {
246 				p++; len--;
247 				if (len == 0) {
248 					RETURN_VALIDATION_FAILED
249 				}
250 			}
251 			if (php_filter_parse_octal(p, len, &ctx_value) < 0) {
252 				error = 1;
253 			}
254 		} else if (len != 0) {
255 			error = 1;
256 		}
257 	} else {
258 		if (php_filter_parse_int(p, len, &ctx_value) < 0) {
259 			error = 1;
260 		}
261 	}
262 
263 	if (error > 0 || (min_range_set && (ctx_value < min_range)) || (max_range_set && (ctx_value > max_range))) {
264 		RETURN_VALIDATION_FAILED
265 	} else {
266 		zval_ptr_dtor(value);
267 		ZVAL_LONG(value, ctx_value);
268 		return;
269 	}
270 }
271 /* }}} */
272 
php_filter_boolean(PHP_INPUT_FILTER_PARAM_DECL)273 void php_filter_boolean(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
274 {
275 	char *str = Z_STRVAL_P(value);
276 	size_t len = Z_STRLEN_P(value);
277 	int ret;
278 
279 	PHP_FILTER_TRIM_DEFAULT_EX(str, len, 0);
280 
281 	/* returns true for "1", "true", "on" and "yes"
282 	 * returns false for "0", "false", "off", "no", and ""
283 	 * null otherwise. */
284 	switch (len) {
285 		case 0:
286 			ret = 0;
287 			break;
288 		case 1:
289 			if (*str == '1') {
290 				ret = 1;
291 			} else if (*str == '0') {
292 				ret = 0;
293 			} else {
294 				ret = -1;
295 			}
296 			break;
297 		case 2:
298 			if (strncasecmp(str, "on", 2) == 0) {
299 				ret = 1;
300 			} else if (strncasecmp(str, "no", 2) == 0) {
301 				ret = 0;
302 			} else {
303 				ret = -1;
304 			}
305 			break;
306 		case 3:
307 			if (strncasecmp(str, "yes", 3) == 0) {
308 				ret = 1;
309 			} else if (strncasecmp(str, "off", 3) == 0) {
310 				ret = 0;
311 			} else {
312 				ret = -1;
313 			}
314 			break;
315 		case 4:
316 			if (strncasecmp(str, "true", 4) == 0) {
317 				ret = 1;
318 			} else {
319 				ret = -1;
320 			}
321 			break;
322 		case 5:
323 			if (strncasecmp(str, "false", 5) == 0) {
324 				ret = 0;
325 			} else {
326 				ret = -1;
327 			}
328 			break;
329 		default:
330 			ret = -1;
331 	}
332 
333 	if (ret == -1) {
334 		RETURN_VALIDATION_FAILED
335 	} else {
336 		zval_ptr_dtor(value);
337 		ZVAL_BOOL(value, ret);
338 	}
339 }
340 /* }}} */
341 
php_filter_float(PHP_INPUT_FILTER_PARAM_DECL)342 void php_filter_float(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
343 {
344 	size_t len;
345 	char *str, *end;
346 	char *num, *p;
347 	zval *option_val;
348 	char *decimal;
349 	int decimal_set;
350 	size_t decimal_len;
351 	char dec_sep = '.';
352 	char *thousand;
353 	int thousand_set;
354 	size_t thousand_len;
355 	char *tsd_sep;
356 
357 	zend_long lval;
358 	double dval;
359 	double min_range, max_range;
360 	int   min_range_set, max_range_set;
361 
362 	int first, n;
363 
364 	len = Z_STRLEN_P(value);
365 	str = Z_STRVAL_P(value);
366 
367 	PHP_FILTER_TRIM_DEFAULT(str, len);
368 	end = str + len;
369 
370 	FETCH_STRING_OPTION(decimal, "decimal");
371 
372 	if (decimal_set) {
373 		if (decimal_len != 1) {
374 			zend_value_error("%s(): \"decimal\" option must be one character long", get_active_function_name());
375 			RETURN_VALIDATION_FAILED
376 		} else {
377 			dec_sep = *decimal;
378 		}
379 	}
380 
381 	FETCH_STRING_OPTION(thousand, "thousand");
382 
383 	if (thousand_set) {
384 		if (thousand_len < 1) {
385 			zend_value_error("%s(): \"thousand\" option cannot be empty", get_active_function_name());
386 			RETURN_VALIDATION_FAILED
387 		} else {
388 			tsd_sep = thousand;
389 		}
390 	} else {
391 		tsd_sep = "',.";
392 	}
393 
394 	FETCH_DOUBLE_OPTION(min_range, "min_range");
395 	FETCH_DOUBLE_OPTION(max_range, "max_range");
396 
397 	num = p = emalloc(len+1);
398 	if (str < end && (*str == '+' || *str == '-')) {
399 		*p++ = *str++;
400 	}
401 	first = 1;
402 	while (1) {
403 		n = 0;
404 		while (str < end && *str >= '0' && *str <= '9') {
405 			++n;
406 			*p++ = *str++;
407 		}
408 		if (str == end || *str == dec_sep || *str == 'e' || *str == 'E') {
409 			if (!first && n != 3) {
410 				goto error;
411 			}
412 			if (*str == dec_sep) {
413 				*p++ = '.';
414 				str++;
415 				while (str < end && *str >= '0' && *str <= '9') {
416 					*p++ = *str++;
417 				}
418 			}
419 			if (*str == 'e' || *str == 'E') {
420 				*p++ = *str++;
421 				if (str < end && (*str == '+' || *str == '-')) {
422 					*p++ = *str++;
423 				}
424 				while (str < end && *str >= '0' && *str <= '9') {
425 					*p++ = *str++;
426 				}
427 			}
428 			break;
429 		}
430 		if ((flags & FILTER_FLAG_ALLOW_THOUSAND) && strchr(tsd_sep, *str)) {
431 			if (first?(n < 1 || n > 3):(n != 3)) {
432 				goto error;
433 			}
434 			first = 0;
435 			str++;
436 		} else {
437 			goto error;
438 		}
439 	}
440 	if (str != end) {
441 		goto error;
442 	}
443 	*p = 0;
444 
445 	switch (is_numeric_string(num, p - num, &lval, &dval, 0)) {
446 		case IS_LONG:
447 			if ((min_range_set && (lval < min_range)) || (max_range_set && (lval > max_range))) {
448 				goto error;
449 			}
450 			zval_ptr_dtor(value);
451 			ZVAL_DOUBLE(value, (double)lval);
452 			break;
453 		case IS_DOUBLE:
454 			if ((!dval && p - num > 1 && strpbrk(num, "123456789")) || !zend_finite(dval)) {
455 				goto error;
456 			}
457 			if ((min_range_set && (dval < min_range)) || (max_range_set && (dval > max_range))) {
458 				goto error;
459 			}
460 			zval_ptr_dtor(value);
461 			ZVAL_DOUBLE(value, dval);
462 			break;
463 		default:
464 error:
465 			efree(num);
466 			RETURN_VALIDATION_FAILED
467 	}
468 	efree(num);
469 }
470 /* }}} */
471 
php_filter_validate_regexp(PHP_INPUT_FILTER_PARAM_DECL)472 void php_filter_validate_regexp(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
473 {
474 	zval *option_val;
475 	zend_string *regexp;
476 	int regexp_set;
477 	pcre2_code *re = NULL;
478 	pcre2_match_data *match_data = NULL;
479 	uint32_t capture_count;
480 	int rc;
481 
482 	/* Parse options */
483 	FETCH_STR_OPTION(regexp, "regexp");
484 
485 	if (!regexp_set) {
486 		zend_value_error("%s(): \"regexp\" option is missing", get_active_function_name());
487 		RETURN_VALIDATION_FAILED
488 	}
489 
490 	re = pcre_get_compiled_regex(regexp, &capture_count);
491 	if (!re) {
492 		RETURN_VALIDATION_FAILED
493 	}
494 	match_data = php_pcre_create_match_data(capture_count, re);
495 	if (!match_data) {
496 		RETURN_VALIDATION_FAILED
497 	}
498 	rc = pcre2_match(re, (PCRE2_SPTR)Z_STRVAL_P(value), Z_STRLEN_P(value), 0, 0, match_data, php_pcre_mctx());
499 	php_pcre_free_match_data(match_data);
500 
501 	/* 0 means that the vector is too small to hold all the captured substring offsets */
502 	if (rc < 0) {
503 		RETURN_VALIDATION_FAILED
504 	}
505 }
506 
_php_filter_validate_domain(char * domain,size_t len,zend_long flags)507 static int _php_filter_validate_domain(char * domain, size_t len, zend_long flags) /* {{{ */
508 {
509 	char *e, *s, *t;
510 	size_t l;
511 	int hostname = flags & FILTER_FLAG_HOSTNAME;
512 	unsigned char i = 1;
513 
514 	s = domain;
515 	l = len;
516 	e = domain + l;
517 	t = e - 1;
518 
519 	/* Ignore trailing dot */
520 	if (l > 0 && *t == '.') {
521 		e = t;
522 		l--;
523 	}
524 
525 	/* The total length cannot exceed 253 characters (final dot not included) */
526 	if (l > 253) {
527 		return 0;
528 	}
529 
530 	/* First char must be alphanumeric */
531 	if(*s == '.' || (hostname && !isalnum((int)*(unsigned char *)s))) {
532 		return 0;
533 	}
534 
535 	while (s < e) {
536 		if (*s == '.') {
537 			/* The first and the last character of a label must be alphanumeric */
538 			if (*(s + 1) == '.' || (hostname && (!isalnum((int)*(unsigned char *)(s - 1)) || !isalnum((int)*(unsigned char *)(s + 1))))) {
539 				return 0;
540 			}
541 
542 			/* Reset label length counter */
543 			i = 1;
544 		} else {
545 			if (i > 63 || (hostname && *s != '-' && !isalnum((int)*(unsigned char *)s))) {
546 				return 0;
547 			}
548 
549 			i++;
550 		}
551 
552 		s++;
553 	}
554 
555 	return 1;
556 }
557 /* }}} */
558 
php_filter_validate_domain(PHP_INPUT_FILTER_PARAM_DECL)559 void php_filter_validate_domain(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
560 {
561 	if (!_php_filter_validate_domain(Z_STRVAL_P(value), Z_STRLEN_P(value), flags)) {
562 		RETURN_VALIDATION_FAILED
563 	}
564 }
565 /* }}} */
566 
is_userinfo_valid(zend_string * str)567 static int is_userinfo_valid(zend_string *str)
568 {
569 	const char *valid = "-._~!$&'()*+,;=:";
570 	const char *p = ZSTR_VAL(str);
571 	while (p - ZSTR_VAL(str) < ZSTR_LEN(str)) {
572 		if (isalpha(*p) || isdigit(*p) || strchr(valid, *p)) {
573 			p++;
574 		} else if (*p == '%' && p - ZSTR_VAL(str) <= ZSTR_LEN(str) - 3 && isdigit(*(p+1)) && isxdigit(*(p+2))) {
575 			p += 3;
576 		} else {
577 			return 0;
578 		}
579 	}
580 	return 1;
581 }
582 
php_filter_validate_url(PHP_INPUT_FILTER_PARAM_DECL)583 void php_filter_validate_url(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
584 {
585 	php_url *url;
586 	size_t old_len = Z_STRLEN_P(value);
587 
588 	php_filter_url(value, flags, option_array, charset);
589 
590 	if (Z_TYPE_P(value) != IS_STRING || old_len != Z_STRLEN_P(value)) {
591 		RETURN_VALIDATION_FAILED
592 	}
593 
594 	/* Use parse_url - if it returns false, we return NULL */
595 	url = php_url_parse_ex(Z_STRVAL_P(value), Z_STRLEN_P(value));
596 
597 	if (url == NULL) {
598 		RETURN_VALIDATION_FAILED
599 	}
600 
601 	if (url->scheme != NULL &&
602 		(zend_string_equals_literal_ci(url->scheme, "http") || zend_string_equals_literal_ci(url->scheme, "https"))) {
603 		char *e, *s, *t;
604 		size_t l;
605 
606 		if (url->host == NULL) {
607 			goto bad_url;
608 		}
609 
610 		s = ZSTR_VAL(url->host);
611 		l = ZSTR_LEN(url->host);
612 		e = s + l;
613 		t = e - 1;
614 
615 		/* An IPv6 enclosed by square brackets is a valid hostname */
616 		if (*s == '[' && *t == ']' && _php_filter_validate_ipv6((s + 1), l - 2, NULL)) {
617 			php_url_free(url);
618 			return;
619 		}
620 
621 		// Validate domain
622 		if (!_php_filter_validate_domain(ZSTR_VAL(url->host), l, FILTER_FLAG_HOSTNAME)) {
623 			php_url_free(url);
624 			RETURN_VALIDATION_FAILED
625 		}
626 	}
627 
628 	if (
629 		url->scheme == NULL ||
630 		/* some schemas allow the host to be empty */
631 		(url->host == NULL && (!zend_string_equals_literal(url->scheme, "mailto") && !zend_string_equals_literal(url->scheme, "news") && !zend_string_equals_literal(url->scheme, "file"))) ||
632 		((flags & FILTER_FLAG_PATH_REQUIRED) && url->path == NULL) || ((flags & FILTER_FLAG_QUERY_REQUIRED) && url->query == NULL)
633 	) {
634 bad_url:
635 		php_url_free(url);
636 		RETURN_VALIDATION_FAILED
637 	}
638 
639 	if ((url->user != NULL && !is_userinfo_valid(url->user))
640 		|| (url->pass != NULL && !is_userinfo_valid(url->pass))
641 	) {
642 		php_url_free(url);
643 		RETURN_VALIDATION_FAILED
644 
645 	}
646 
647 	php_url_free(url);
648 }
649 /* }}} */
650 
php_filter_validate_email(PHP_INPUT_FILTER_PARAM_DECL)651 void php_filter_validate_email(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
652 {
653 	/*
654 	 * The regex below is based on a regex by Michael Rushton.
655 	 * However, it is not identical.  I changed it to only consider routeable
656 	 * addresses as valid.  Michael's regex considers a@b a valid address
657 	 * which conflicts with section 2.3.5 of RFC 5321 which states that:
658 	 *
659 	 *   Only resolvable, fully-qualified domain names (FQDNs) are permitted
660 	 *   when domain names are used in SMTP.  In other words, names that can
661 	 *   be resolved to MX RRs or address (i.e., A or AAAA) RRs (as discussed
662 	 *   in Section 5) are permitted, as are CNAME RRs whose targets can be
663 	 *   resolved, in turn, to MX or address RRs.  Local nicknames or
664 	 *   unqualified names MUST NOT be used.
665 	 *
666 	 * This regex does not handle comments and folding whitespace.  While
667 	 * this is technically valid in an email address, these parts aren't
668 	 * actually part of the address itself.
669 	 *
670 	 * Michael's regex carries this copyright:
671 	 *
672 	 * Copyright © Michael Rushton 2009-10
673 	 * http://squiloople.com/
674 	 * Feel free to use and redistribute this code. But please keep this copyright notice.
675 	 *
676 	 */
677 	pcre2_code *re = NULL;
678 	pcre2_match_data *match_data = NULL;
679 	uint32_t capture_count;
680 	zend_string *sregexp;
681 	int rc;
682 	const char regexp0[] = "/^(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){255,})(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){65,}@)(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E\\pL\\pN]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F\\pL\\pN]|(?:\\x5C[\\x00-\\x7F]))*\\x22))(?:\\.(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E\\pL\\pN]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F\\pL\\pN]|(?:\\x5C[\\x00-\\x7F]))*\\x22)))*@(?:(?:(?!.*[^.]{64,})(?:(?:(?:xn--)?[a-z0-9]+(?:-+[a-z0-9]+)*\\.){1,126}){1,}(?:(?:[a-z][a-z0-9]*)|(?:(?:xn--)[a-z0-9]+))(?:-+[a-z0-9]+)*)|(?:\\[(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){7})|(?:(?!(?:.*[a-f0-9][:\\]]){7,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?)))|(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){5}:)|(?:(?!(?:.*[a-f0-9]:){5,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3}:)?)))?(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))(?:\\.(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))){3}))\\]))$/iDu";
683 	const char regexp1[] = "/^(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){255,})(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){65,}@)(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F]|(?:\\x5C[\\x00-\\x7F]))*\\x22))(?:\\.(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F]|(?:\\x5C[\\x00-\\x7F]))*\\x22)))*@(?:(?:(?!.*[^.]{64,})(?:(?:(?:xn--)?[a-z0-9]+(?:-+[a-z0-9]+)*\\.){1,126}){1,}(?:(?:[a-z][a-z0-9]*)|(?:(?:xn--)[a-z0-9]+))(?:-+[a-z0-9]+)*)|(?:\\[(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){7})|(?:(?!(?:.*[a-f0-9][:\\]]){7,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?)))|(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){5}:)|(?:(?!(?:.*[a-f0-9]:){5,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3}:)?)))?(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))(?:\\.(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))){3}))\\]))$/iD";
684 	const char *regexp;
685 	size_t regexp_len;
686 
687 	if (flags & FILTER_FLAG_EMAIL_UNICODE) {
688 		regexp = regexp0;
689 		regexp_len = sizeof(regexp0) - 1;
690 	} else {
691 		regexp = regexp1;
692 		regexp_len = sizeof(regexp1) - 1;
693 	}
694 
695 	/* The maximum length of an e-mail address is 320 octets, per RFC 2821. */
696 	if (Z_STRLEN_P(value) > 320) {
697 		RETURN_VALIDATION_FAILED
698 	}
699 
700 	sregexp = zend_string_init(regexp, regexp_len, 0);
701 	re = pcre_get_compiled_regex(sregexp, &capture_count);
702 	zend_string_release_ex(sregexp, 0);
703 	if (!re) {
704 		RETURN_VALIDATION_FAILED
705 	}
706 	match_data = php_pcre_create_match_data(capture_count, re);
707 	if (!match_data) {
708 		RETURN_VALIDATION_FAILED
709 	}
710 	rc = pcre2_match(re, (PCRE2_SPTR)Z_STRVAL_P(value), Z_STRLEN_P(value), 0, 0, match_data, php_pcre_mctx());
711 	php_pcre_free_match_data(match_data);
712 
713 	/* 0 means that the vector is too small to hold all the captured substring offsets */
714 	if (rc < 0) {
715 		RETURN_VALIDATION_FAILED
716 	}
717 
718 }
719 /* }}} */
720 
_php_filter_validate_ipv4(char * str,size_t str_len,int * ip)721 static int _php_filter_validate_ipv4(char *str, size_t str_len, int *ip) /* {{{ */
722 {
723 	const char *end = str + str_len;
724 	int num, m;
725 	int n = 0;
726 
727 	while (str < end) {
728 		int leading_zero;
729 		if (*str < '0' || *str > '9') {
730 			return 0;
731 		}
732 		leading_zero = (*str == '0');
733 		m = 1;
734 		num = ((*(str++)) - '0');
735 		while (str < end && (*str >= '0' && *str <= '9')) {
736 			num = num * 10 + ((*(str++)) - '0');
737 			if (num > 255 || ++m > 3) {
738 				return 0;
739 			}
740 		}
741 		/* don't allow a leading 0; that introduces octal numbers,
742 		 * which we don't support */
743 		if (leading_zero && (num != 0 || m > 1))
744 			return 0;
745 		ip[n++] = num;
746 		if (n == 4) {
747 			return str == end;
748 		} else if (str >= end || *(str++) != '.') {
749 			return 0;
750 		}
751 	}
752 	return 0;
753 }
754 /* }}} */
755 
_php_filter_validate_ipv6(char * str,size_t str_len,int ip[8])756 static int _php_filter_validate_ipv6(char *str, size_t str_len, int ip[8]) /* {{{ */
757 {
758 	int compressed_pos = -1;
759 	int blocks = 0;
760 	int num, n, i;
761 	char *ipv4;
762 	char *end;
763 	int ip4elm[4];
764 	char *s = str;
765 
766 	if (!memchr(str, ':', str_len)) {
767 		return 0;
768 	}
769 
770 	/* check for bundled IPv4 */
771 	ipv4 = memchr(str, '.', str_len);
772 	if (ipv4) {
773 		while (ipv4 > str && *(ipv4-1) != ':') {
774 			ipv4--;
775 		}
776 
777 		if (!_php_filter_validate_ipv4(ipv4, (str_len - (ipv4 - str)), ip4elm)) {
778 			return 0;
779 		}
780 
781 		str_len = ipv4 - str; /* length excluding ipv4 */
782 		if (str_len < 2) {
783 			return 0;
784 		}
785 
786 		if (ipv4[-2] != ':') {
787 			/* don't include : before ipv4 unless it's a :: */
788 			str_len--;
789 		}
790 
791 		blocks = 2;
792 	}
793 
794 	end = str + str_len;
795 
796 	while (str < end) {
797 		if (*str == ':') {
798 			if (++str >= end) {
799 				/* cannot end in : without previous : */
800 				return 0;
801 			}
802 			if (*str == ':') {
803 				if (compressed_pos >= 0) {
804 					return 0;
805 				}
806 				if (ip && blocks < 8) {
807 					ip[blocks] = -1;
808 				}
809 				compressed_pos = blocks++; /* :: means 1 or more 16-bit 0 blocks */
810 				if (++str == end) {
811 					if (blocks > 8) {
812 						return 0;
813 				}
814 					goto fixup_ip;
815 				}
816 			} else if ((str - 1) == s) {
817 				/* don't allow leading : without another : following */
818 				return 0;
819 			}
820 		}
821 		num = n = 0;
822 		while (str < end) {
823 			if (*str >= '0' && *str <= '9') {
824 				num = 16 * num + (*str - '0');
825 			} else if (*str >= 'a' && *str <= 'f') {
826 				num = 16 * num + (*str - 'a') + 10;
827 			} else if (*str >= 'A' && *str <= 'F') {
828 				num = 16 * num + (*str - 'A') + 10;
829 			} else {
830 				break;
831 			}
832 			n++;
833 			str++;
834 		}
835 		if (ip && blocks < 8) {
836 			ip[blocks] = num;
837 		}
838 		if (n < 1 || n > 4) {
839 			return 0;
840 		}
841 		if (++blocks > 8)
842 			return 0;
843 	}
844 
845 fixup_ip:
846 	if (ip && ipv4) {
847 		for (i = 0; i < 5; i++) {
848 			ip[i] = 0;
849 }
850 		ip[i++] = 0xffff;
851 		ip[i++] = 256 * ip4elm[0] + ip4elm[1];
852 		ip[i++] = 256 * ip4elm[2] + ip4elm[3];
853 	} else if (ip && compressed_pos >= 0 && blocks <= 8) {
854 		int offset = 8 - blocks;
855 		for (i = 7; i > compressed_pos + offset; i--) {
856 			ip[i] = ip[i - offset];
857 		}
858 		for (i = compressed_pos + offset; i >= compressed_pos; i--) {
859 			ip[i] = 0;
860 		}
861 	}
862 
863 	return (compressed_pos >= 0 && blocks <= 8) || blocks == 8;
864 }
865 /* }}} */
866 
php_filter_validate_ip(PHP_INPUT_FILTER_PARAM_DECL)867 void php_filter_validate_ip(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
868 {
869 	/* validates an ipv4 or ipv6 IP, based on the flag (4, 6, or both) add a
870 	 * flag to throw out reserved ranges; multicast ranges... etc. If both
871 	 * allow_ipv4 and allow_ipv6 flags flag are used, then the first dot or
872 	 * colon determine the format */
873 
874 	int            ip[8];
875 	int            mode;
876 
877 	if (memchr(Z_STRVAL_P(value), ':', Z_STRLEN_P(value))) {
878 		mode = FORMAT_IPV6;
879 	} else if (memchr(Z_STRVAL_P(value), '.', Z_STRLEN_P(value))) {
880 		mode = FORMAT_IPV4;
881 	} else {
882 		RETURN_VALIDATION_FAILED
883 	}
884 
885 	if ((flags & FILTER_FLAG_IPV4) && (flags & FILTER_FLAG_IPV6)) {
886 		/* Both formats are cool */
887 	} else if ((flags & FILTER_FLAG_IPV4) && mode == FORMAT_IPV6) {
888 		RETURN_VALIDATION_FAILED
889 	} else if ((flags & FILTER_FLAG_IPV6) && mode == FORMAT_IPV4) {
890 		RETURN_VALIDATION_FAILED
891 	}
892 
893 	switch (mode) {
894 		case FORMAT_IPV4:
895 			if (!_php_filter_validate_ipv4(Z_STRVAL_P(value), Z_STRLEN_P(value), ip)) {
896 				RETURN_VALIDATION_FAILED
897 			}
898 
899 			/* Check flags */
900 			if (flags & FILTER_FLAG_NO_PRIV_RANGE  || flags & FILTER_FLAG_GLOBAL_RANGE) {
901 				if (
902 					(ip[0] == 10) ||
903 					(ip[0] == 172 && ip[1] >= 16 && ip[1] <= 31) ||
904 					(ip[0] == 192 && ip[1] == 168)
905 				) {
906 					RETURN_VALIDATION_FAILED
907 				}
908 			}
909 
910 			if (flags & FILTER_FLAG_NO_RES_RANGE || flags & FILTER_FLAG_GLOBAL_RANGE) {
911 				if (
912 					(ip[0] == 0) ||
913 					(ip[0] >= 240) ||
914 					(ip[0] == 127) ||
915 					(ip[0] == 169 && ip[1] == 254)
916 				) {
917 					RETURN_VALIDATION_FAILED
918 				}
919 			}
920 
921 			if (flags & FILTER_FLAG_GLOBAL_RANGE) {
922 				if (
923 						(ip[0] == 100 && ip[1] >= 64 && ip[1] <= 127 ) ||
924 						(ip[0] == 192 && ip[1] == 0 && ip[2] == 0 ) ||
925 						(ip[0] == 192 && ip[1] == 0 && ip[2] == 2 ) ||
926 						(ip[0] == 198 && ip[1] >= 18 && ip[1] <= 19 ) ||
927 						(ip[0] == 198 && ip[1] == 51 && ip[2] == 100 ) ||
928 						(ip[0] == 203 && ip[1] == 0 && ip[2] == 113 )
929 		   ) {
930 					RETURN_VALIDATION_FAILED
931 				}
932 			}
933 
934 			break;
935 
936 		case FORMAT_IPV6:
937 			{
938 				int res = 0;
939 				res = _php_filter_validate_ipv6(Z_STRVAL_P(value), Z_STRLEN_P(value), ip);
940 				if (res < 1) {
941 					RETURN_VALIDATION_FAILED
942 				}
943 				/* Check flags */
944 				if (flags & FILTER_FLAG_NO_PRIV_RANGE || flags & FILTER_FLAG_GLOBAL_RANGE) {
945 					if (ip[0] >= 0xfc00 && ip[0] <= 0xfdff) {
946 						RETURN_VALIDATION_FAILED
947 					}
948 				}
949 				if (flags & FILTER_FLAG_NO_RES_RANGE || flags & FILTER_FLAG_GLOBAL_RANGE) {
950 					if ((ip[0] == 0 && ip[1] == 0 && ip[2] == 0 && ip[3] == 0
951 							&& ip[4] == 0 && ip[5] == 0 && ip[6] == 0 && (ip[7] == 0 || ip[7] == 1))
952 						|| (ip[0] == 0x5f)
953 						|| (ip[0] >= 0xfe80 && ip[0] <= 0xfebf)
954 						|| (ip[0] == 0x2001 && (ip[1] == 0x0db8 || (ip[1] >= 0x0010 && ip[1] <= 0x001f)))
955 						|| (ip[0] == 0x3ff3)
956 								) {
957 									RETURN_VALIDATION_FAILED
958 								}
959 				}
960 				if (flags & FILTER_FLAG_GLOBAL_RANGE) {
961 					if ((ip[0] == 0 && ip[1] == 0 && ip[2] == 0 && ip[3] == 0 && ip[4] == 0 && ip[5] == 0xffff) ||
962 							(ip[0] == 0x0100 && ip[1] == 0 && ip[2] == 0 && ip[3] == 0) ||
963 							(ip[0] == 0x2001 && ip[1] <= 0x01ff) ||
964 							(ip[0] == 0x2001 && ip[1] == 0x0002 && ip[2] == 0) ||
965 							(ip[0] >= 0xfc00 && ip[0] <= 0xfdff)
966 					   ) {
967 						RETURN_VALIDATION_FAILED
968 					}
969 				}
970 			}
971 			break;
972 	}
973 }
974 /* }}} */
975 
php_filter_validate_mac(PHP_INPUT_FILTER_PARAM_DECL)976 void php_filter_validate_mac(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
977 {
978 	char *input = Z_STRVAL_P(value);
979 	size_t input_len = Z_STRLEN_P(value);
980 	int tokens, length, i, offset, exp_separator_set;
981 	size_t exp_separator_len;
982 	char separator;
983 	char *exp_separator;
984 	zend_long ret = 0;
985 	zval *option_val;
986 
987 	FETCH_STRING_OPTION(exp_separator, "separator");
988 
989 	if (exp_separator_set && exp_separator_len != 1) {
990 		zend_value_error("%s(): \"separator\" option must be one character long", get_active_function_name());
991 		RETURN_VALIDATION_FAILED;
992 	}
993 
994 	if (14 == input_len) {
995 		/* EUI-64 format: Four hexadecimal digits separated by dots. Less
996 		 * commonly used but valid nonetheless.
997 		 */
998 		tokens = 3;
999 		length = 4;
1000 		separator = '.';
1001 	} else if (17 == input_len && input[2] == '-') {
1002 		/* IEEE 802 format: Six hexadecimal digits separated by hyphens. */
1003 		tokens = 6;
1004 		length = 2;
1005 		separator = '-';
1006 	} else if (17 == input_len && input[2] == ':') {
1007 		/* IEEE 802 format: Six hexadecimal digits separated by colons. */
1008 		tokens = 6;
1009 		length = 2;
1010 		separator = ':';
1011 	} else {
1012 		RETURN_VALIDATION_FAILED;
1013 	}
1014 
1015 	if (exp_separator_set && separator != exp_separator[0]) {
1016 		RETURN_VALIDATION_FAILED;
1017 	}
1018 
1019 	/* Essentially what we now have is a set of tokens each consisting of
1020 	 * a hexadecimal number followed by a separator character. (With the
1021 	 * exception of the last token which does not have the separator.)
1022 	 */
1023 	for (i = 0; i < tokens; i++) {
1024 		offset = i * (length + 1);
1025 
1026 		if (i < tokens - 1 && input[offset + length] != separator) {
1027 			/* The current token did not end with e.g. a "." */
1028 			RETURN_VALIDATION_FAILED
1029 		}
1030 		if (php_filter_parse_hex(input + offset, length, &ret) < 0) {
1031 			/* The current token is no valid hexadecimal digit */
1032 			RETURN_VALIDATION_FAILED
1033 		}
1034 	}
1035 }
1036 /* }}} */
1037