xref: /PHP-7.3/ext/filter/logical_filters.c (revision a5538c62)
1 /*
2   +----------------------------------------------------------------------+
3   | PHP Version 7                                                        |
4   +----------------------------------------------------------------------+
5   | Copyright (c) 1997-2018 The PHP Group                                |
6   +----------------------------------------------------------------------+
7   | This source file is subject to version 3.01 of the PHP license,      |
8   | that is bundled with this package in the file LICENSE, and is        |
9   | available through the world-wide-web at the following url:           |
10   | http://www.php.net/license/3_01.txt                                  |
11   | If you did not receive a copy of the PHP license and are unable to   |
12   | obtain it through the world-wide-web, please send a note to          |
13   | license@php.net so we can mail you a copy immediately.               |
14   +----------------------------------------------------------------------+
15   | Authors: Derick Rethans <derick@php.net>                             |
16   |          Pierre-A. Joye <pierre@php.net>                             |
17   |          Kévin Dunglas <dunglas@gmail.com>                           |
18   +----------------------------------------------------------------------+
19 */
20 
21 #include "php_filter.h"
22 #include "filter_private.h"
23 #include "ext/standard/url.h"
24 #include "ext/pcre/php_pcre.h"
25 
26 #include "zend_multiply.h"
27 
28 #if HAVE_ARPA_INET_H
29 # include <arpa/inet.h>
30 #endif
31 
32 #ifndef INADDR_NONE
33 # define INADDR_NONE ((unsigned long int) -1)
34 #endif
35 
36 
37 /* {{{ FETCH_LONG_OPTION(var_name, option_name) */
38 #define FETCH_LONG_OPTION(var_name, option_name) \
39    	var_name = 0; \
40 	var_name##_set = 0; \
41 	if (option_array) { \
42 		if ((option_val = zend_hash_str_find(HASH_OF(option_array), option_name, sizeof(option_name) - 1)) != NULL) {	\
43 			var_name = zval_get_long(option_val); \
44 			var_name##_set = 1; \
45 		} \
46 	}
47 /* }}} */
48 
49 /* {{{ FETCH_STRING_OPTION(var_name, option_name) */
50 #define FETCH_STRING_OPTION(var_name, option_name) \
51 	var_name = NULL; \
52 	var_name##_set = 0; \
53 	var_name##_len = 0; \
54 	if (option_array) { \
55 		if ((option_val = zend_hash_str_find(HASH_OF(option_array), option_name, sizeof(option_name) - 1)) != NULL) { \
56 			if (Z_TYPE_P(option_val) == IS_STRING) { \
57 				var_name = Z_STRVAL_P(option_val); \
58 				var_name##_len = Z_STRLEN_P(option_val); \
59 				var_name##_set = 1; \
60 			} \
61 		} \
62 	}
63 /* }}} */
64 
65 /* {{{ FETCH_STR_OPTION(var_name, option_name) */
66 #define FETCH_STR_OPTION(var_name, option_name) \
67 	var_name = NULL; \
68 	var_name##_set = 0; \
69 	if (option_array) { \
70 		if ((option_val = zend_hash_str_find(HASH_OF(option_array), option_name, sizeof(option_name) - 1)) != NULL) { \
71 			if (Z_TYPE_P(option_val) == IS_STRING) { \
72 				var_name = Z_STR_P(option_val); \
73 				var_name##_set = 1; \
74 			} \
75 		} \
76 	}
77 /* }}} */
78 
79 #define FORMAT_IPV4    4
80 #define FORMAT_IPV6    6
81 
82 static int _php_filter_validate_ipv6(char *str, size_t str_len);
83 
php_filter_parse_int(const char * str,size_t str_len,zend_long * ret)84 static int php_filter_parse_int(const char *str, size_t str_len, zend_long *ret) { /* {{{ */
85 	zend_long ctx_value;
86 	int sign = 0, digit = 0;
87 	const char *end = str + str_len;
88 
89 	switch (*str) {
90 		case '-':
91 			sign = 1;
92 		case '+':
93 			str++;
94 		default:
95 			break;
96 	}
97 
98 	if (*str == '0' && str + 1 == end) {
99 		/* Special cases: +0 and -0 */
100 		return 1;
101 	}
102 
103 	/* must start with 1..9*/
104 	if (str < end && *str >= '1' && *str <= '9') {
105 		ctx_value = ((sign)?-1:1) * ((*(str++)) - '0');
106 	} else {
107 		return -1;
108 	}
109 
110 	if ((end - str > MAX_LENGTH_OF_LONG - 1) /* number too long */
111 	 || (SIZEOF_LONG == 4 && (end - str == MAX_LENGTH_OF_LONG - 1) && *str > '2')) {
112 		/* overflow */
113 		return -1;
114 	}
115 
116 	while (str < end) {
117 		if (*str >= '0' && *str <= '9') {
118 			digit = (*(str++) - '0');
119 			if ( (!sign) && ctx_value <= (ZEND_LONG_MAX-digit)/10 ) {
120 				ctx_value = (ctx_value * 10) + digit;
121 			} else if ( sign && ctx_value >= (ZEND_LONG_MIN+digit)/10) {
122 				ctx_value = (ctx_value * 10) - digit;
123 			} else {
124 				return -1;
125 			}
126 		} else {
127 			return -1;
128 		}
129 	}
130 
131 	*ret = ctx_value;
132 	return 1;
133 }
134 /* }}} */
135 
php_filter_parse_octal(const char * str,size_t str_len,zend_long * ret)136 static int php_filter_parse_octal(const char *str, size_t str_len, zend_long *ret) { /* {{{ */
137 	zend_ulong ctx_value = 0;
138 	const char *end = str + str_len;
139 
140 	while (str < end) {
141 		if (*str >= '0' && *str <= '7') {
142 			zend_ulong n = ((*(str++)) - '0');
143 
144 			if ((ctx_value > ((zend_ulong)(~(zend_long)0)) / 8) ||
145 				((ctx_value = ctx_value * 8) > ((zend_ulong)(~(zend_long)0)) - n)) {
146 				return -1;
147 			}
148 			ctx_value += n;
149 		} else {
150 			return -1;
151 		}
152 	}
153 
154 	*ret = (zend_long)ctx_value;
155 	return 1;
156 }
157 /* }}} */
158 
php_filter_parse_hex(const char * str,size_t str_len,zend_long * ret)159 static int php_filter_parse_hex(const char *str, size_t str_len, zend_long *ret) { /* {{{ */
160 	zend_ulong ctx_value = 0;
161 	const char *end = str + str_len;
162 	zend_ulong n;
163 
164 	while (str < end) {
165 		if (*str >= '0' && *str <= '9') {
166 			n = ((*(str++)) - '0');
167 		} else if (*str >= 'a' && *str <= 'f') {
168 			n = ((*(str++)) - ('a' - 10));
169 		} else if (*str >= 'A' && *str <= 'F') {
170 			n = ((*(str++)) - ('A' - 10));
171 		} else {
172 			return -1;
173 		}
174 		if ((ctx_value > ((zend_ulong)(~(zend_long)0)) / 16) ||
175 			((ctx_value = ctx_value * 16) > ((zend_ulong)(~(zend_long)0)) - n)) {
176 			return -1;
177 		}
178 		ctx_value += n;
179 	}
180 
181 	*ret = (zend_long)ctx_value;
182 	return 1;
183 }
184 /* }}} */
185 
php_filter_int(PHP_INPUT_FILTER_PARAM_DECL)186 void php_filter_int(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
187 {
188 	zval *option_val;
189 	zend_long  min_range, max_range, option_flags;
190 	int   min_range_set, max_range_set;
191 	int   allow_octal = 0, allow_hex = 0;
192 	size_t	  len;
193 	int error = 0;
194 	zend_long  ctx_value;
195 	char *p;
196 
197 	/* Parse options */
198 	FETCH_LONG_OPTION(min_range,    "min_range");
199 	FETCH_LONG_OPTION(max_range,    "max_range");
200 	option_flags = flags;
201 
202 	len = Z_STRLEN_P(value);
203 
204 	if (len == 0) {
205 		RETURN_VALIDATION_FAILED
206 	}
207 
208 	if (option_flags & FILTER_FLAG_ALLOW_OCTAL) {
209 		allow_octal = 1;
210 	}
211 
212 	if (option_flags & FILTER_FLAG_ALLOW_HEX) {
213 		allow_hex = 1;
214 	}
215 
216 	/* Start the validating loop */
217 	p = Z_STRVAL_P(value);
218 	ctx_value = 0;
219 
220 	PHP_FILTER_TRIM_DEFAULT(p, len);
221 
222 	if (*p == '0') {
223 		p++; len--;
224 		if (allow_hex && (*p == 'x' || *p == 'X')) {
225 			p++; len--;
226 			if (php_filter_parse_hex(p, len, &ctx_value) < 0) {
227 				error = 1;
228 			}
229 		} else if (allow_octal) {
230 			if (php_filter_parse_octal(p, len, &ctx_value) < 0) {
231 				error = 1;
232 			}
233 		} else if (len != 0) {
234 			error = 1;
235 		}
236 	} else {
237 		if (php_filter_parse_int(p, len, &ctx_value) < 0) {
238 			error = 1;
239 		}
240 	}
241 
242 	if (error > 0 || (min_range_set && (ctx_value < min_range)) || (max_range_set && (ctx_value > max_range))) {
243 		RETURN_VALIDATION_FAILED
244 	} else {
245 		zval_ptr_dtor(value);
246 		ZVAL_LONG(value, ctx_value);
247 		return;
248 	}
249 }
250 /* }}} */
251 
php_filter_boolean(PHP_INPUT_FILTER_PARAM_DECL)252 void php_filter_boolean(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
253 {
254 	char *str = Z_STRVAL_P(value);
255 	size_t len = Z_STRLEN_P(value);
256 	int ret;
257 
258 	PHP_FILTER_TRIM_DEFAULT_EX(str, len, 0);
259 
260 	/* returns true for "1", "true", "on" and "yes"
261 	 * returns false for "0", "false", "off", "no", and ""
262 	 * null otherwise. */
263 	switch (len) {
264 		case 0:
265 			ret = 0;
266 			break;
267 		case 1:
268 			if (*str == '1') {
269 				ret = 1;
270 			} else if (*str == '0') {
271 				ret = 0;
272 			} else {
273 				ret = -1;
274 			}
275 			break;
276 		case 2:
277 			if (strncasecmp(str, "on", 2) == 0) {
278 				ret = 1;
279 			} else if (strncasecmp(str, "no", 2) == 0) {
280 				ret = 0;
281 			} else {
282 				ret = -1;
283 			}
284 			break;
285 		case 3:
286 			if (strncasecmp(str, "yes", 3) == 0) {
287 				ret = 1;
288 			} else if (strncasecmp(str, "off", 3) == 0) {
289 				ret = 0;
290 			} else {
291 				ret = -1;
292 			}
293 			break;
294 		case 4:
295 			if (strncasecmp(str, "true", 4) == 0) {
296 				ret = 1;
297 			} else {
298 				ret = -1;
299 			}
300 			break;
301 		case 5:
302 			if (strncasecmp(str, "false", 5) == 0) {
303 				ret = 0;
304 			} else {
305 				ret = -1;
306 			}
307 			break;
308 		default:
309 			ret = -1;
310 	}
311 
312 	if (ret == -1) {
313 		RETURN_VALIDATION_FAILED
314 	} else {
315 		zval_ptr_dtor(value);
316 		ZVAL_BOOL(value, ret);
317 	}
318 }
319 /* }}} */
320 
php_filter_float(PHP_INPUT_FILTER_PARAM_DECL)321 void php_filter_float(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
322 {
323 	size_t len;
324 	char *str, *end;
325 	char *num, *p;
326 	zval *option_val;
327 	char *decimal;
328 	int decimal_set;
329 	size_t decimal_len;
330 	char dec_sep = '.';
331 	char *thousand;
332 	int thousand_set;
333 	size_t thousand_len;
334 	char *tsd_sep;
335 
336 	zend_long lval;
337 	double dval;
338 
339 	int first, n;
340 
341 	len = Z_STRLEN_P(value);
342 	str = Z_STRVAL_P(value);
343 
344 	PHP_FILTER_TRIM_DEFAULT(str, len);
345 	end = str + len;
346 
347 	FETCH_STRING_OPTION(decimal, "decimal");
348 
349 	if (decimal_set) {
350 		if (decimal_len != 1) {
351 			php_error_docref(NULL, E_WARNING, "decimal separator must be one char");
352 			RETURN_VALIDATION_FAILED
353 		} else {
354 			dec_sep = *decimal;
355 		}
356 	}
357 
358 	FETCH_STRING_OPTION(thousand, "thousand");
359 
360 	if (thousand_set) {
361 		if (thousand_len < 1) {
362 			php_error_docref(NULL, E_WARNING, "thousand separator must be at least one char");
363 			RETURN_VALIDATION_FAILED
364 		} else {
365 			tsd_sep = thousand;
366 		}
367 	} else {
368 		tsd_sep = "',.";
369 	}
370 
371 	num = p = emalloc(len+1);
372 	if (str < end && (*str == '+' || *str == '-')) {
373 		*p++ = *str++;
374 	}
375 	first = 1;
376 	while (1) {
377 		n = 0;
378 		while (str < end && *str >= '0' && *str <= '9') {
379 			++n;
380 			*p++ = *str++;
381 		}
382 		if (str == end || *str == dec_sep || *str == 'e' || *str == 'E') {
383 			if (!first && n != 3) {
384 				goto error;
385 			}
386 			if (*str == dec_sep) {
387 				*p++ = '.';
388 				str++;
389 				while (str < end && *str >= '0' && *str <= '9') {
390 					*p++ = *str++;
391 				}
392 			}
393 			if (*str == 'e' || *str == 'E') {
394 				*p++ = *str++;
395 				if (str < end && (*str == '+' || *str == '-')) {
396 					*p++ = *str++;
397 				}
398 				while (str < end && *str >= '0' && *str <= '9') {
399 					*p++ = *str++;
400 				}
401 			}
402 			break;
403 		}
404 		if ((flags & FILTER_FLAG_ALLOW_THOUSAND) && strchr(tsd_sep, *str)) {
405 			if (first?(n < 1 || n > 3):(n != 3)) {
406 				goto error;
407 			}
408 			first = 0;
409 			str++;
410 		} else {
411 			goto error;
412 		}
413 	}
414 	if (str != end) {
415 		goto error;
416 	}
417 	*p = 0;
418 
419 	switch (is_numeric_string(num, p - num, &lval, &dval, 0)) {
420 		case IS_LONG:
421 			zval_ptr_dtor(value);
422 			ZVAL_DOUBLE(value, (double)lval);
423 			break;
424 		case IS_DOUBLE:
425 			if ((!dval && p - num > 1 && strpbrk(num, "123456789")) || !zend_finite(dval)) {
426 				goto error;
427 			}
428 			zval_ptr_dtor(value);
429 			ZVAL_DOUBLE(value, dval);
430 			break;
431 		default:
432 error:
433 			efree(num);
434 			RETURN_VALIDATION_FAILED
435 	}
436 	efree(num);
437 }
438 /* }}} */
439 
php_filter_validate_regexp(PHP_INPUT_FILTER_PARAM_DECL)440 void php_filter_validate_regexp(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
441 {
442 	zval *option_val;
443 	zend_string *regexp;
444 	int regexp_set;
445 	pcre2_code *re = NULL;
446 	pcre2_match_data *match_data = NULL;
447 	uint32_t preg_options, capture_count;
448 	int rc;
449 
450 	/* Parse options */
451 	FETCH_STR_OPTION(regexp, "regexp");
452 
453 	if (!regexp_set) {
454 		php_error_docref(NULL, E_WARNING, "'regexp' option missing");
455 		RETURN_VALIDATION_FAILED
456 	}
457 
458 	re = pcre_get_compiled_regex(regexp, &capture_count, &preg_options);
459 	if (!re) {
460 		RETURN_VALIDATION_FAILED
461 	}
462 	match_data = php_pcre_create_match_data(capture_count, re);
463 	if (!match_data) {
464 		RETURN_VALIDATION_FAILED
465 	}
466 	rc = pcre2_match(re, (PCRE2_SPTR)Z_STRVAL_P(value), Z_STRLEN_P(value), 0, preg_options, match_data, php_pcre_mctx());
467 	php_pcre_free_match_data(match_data);
468 
469 	/* 0 means that the vector is too small to hold all the captured substring offsets */
470 	if (rc < 0) {
471 		RETURN_VALIDATION_FAILED
472 	}
473 }
474 
_php_filter_validate_domain(char * domain,int len,zend_long flags)475 static int _php_filter_validate_domain(char * domain, int len, zend_long flags) /* {{{ */
476 {
477 	char *e, *s, *t;
478 	size_t l;
479 	int hostname = flags & FILTER_FLAG_HOSTNAME;
480 	unsigned char i = 1;
481 
482 	s = domain;
483 	l = len;
484 	e = domain + l;
485 	t = e - 1;
486 
487 	/* Ignore trailing dot */
488 	if (*t == '.') {
489 		e = t;
490 		l--;
491 	}
492 
493 	/* The total length cannot exceed 253 characters (final dot not included) */
494 	if (l > 253) {
495 		return 0;
496 	}
497 
498 	/* First char must be alphanumeric */
499 	if(*s == '.' || (hostname && !isalnum((int)*(unsigned char *)s))) {
500 		return 0;
501 	}
502 
503 	while (s < e) {
504 		if (*s == '.') {
505 			/* The first and the last character of a label must be alphanumeric */
506 			if (*(s + 1) == '.' || (hostname && (!isalnum((int)*(unsigned char *)(s - 1)) || !isalnum((int)*(unsigned char *)(s + 1))))) {
507 				return 0;
508 			}
509 
510 			/* Reset label length counter */
511 			i = 1;
512 		} else {
513 			if (i > 63 || (hostname && *s != '-' && !isalnum((int)*(unsigned char *)s))) {
514 				return 0;
515 			}
516 
517 			i++;
518 		}
519 
520 		s++;
521 	}
522 
523 	return 1;
524 }
525 /* }}} */
526 
php_filter_validate_domain(PHP_INPUT_FILTER_PARAM_DECL)527 void php_filter_validate_domain(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
528 {
529 	if (!_php_filter_validate_domain(Z_STRVAL_P(value), Z_STRLEN_P(value), flags)) {
530 		RETURN_VALIDATION_FAILED
531 	}
532 }
533 /* }}} */
534 
is_userinfo_valid(zend_string * str)535 static int is_userinfo_valid(zend_string *str)
536 {
537 	const char *valid = "-._~!$&'()*+,;=:";
538 	const char *p = ZSTR_VAL(str);
539 	while (p - ZSTR_VAL(str) < ZSTR_LEN(str)) {
540 		if (isalpha(*p) || isdigit(*p) || strchr(valid, *p)) {
541 			p++;
542 		} else if (*p == '%' && p - ZSTR_VAL(str) <= ZSTR_LEN(str) - 3 && isdigit(*(p+1)) && isxdigit(*(p+2))) {
543 			p += 3;
544 		} else {
545 			return 0;
546 		}
547 	}
548 	return 1;
549 }
550 
php_filter_validate_url(PHP_INPUT_FILTER_PARAM_DECL)551 void php_filter_validate_url(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
552 {
553 	php_url *url;
554 	size_t old_len = Z_STRLEN_P(value);
555 
556 	if (flags & (FILTER_FLAG_SCHEME_REQUIRED | FILTER_FLAG_HOST_REQUIRED)) {
557 		php_error_docref(NULL, E_DEPRECATED,
558 			"explicit use of FILTER_FLAG_SCHEME_REQUIRED and FILTER_FLAG_HOST_REQUIRED is deprecated");
559 	}
560 
561 	php_filter_url(value, flags, option_array, charset);
562 
563 	if (Z_TYPE_P(value) != IS_STRING || old_len != Z_STRLEN_P(value)) {
564 		RETURN_VALIDATION_FAILED
565 	}
566 
567 	/* Use parse_url - if it returns false, we return NULL */
568 	url = php_url_parse_ex(Z_STRVAL_P(value), Z_STRLEN_P(value));
569 
570 	if (url == NULL) {
571 		RETURN_VALIDATION_FAILED
572 	}
573 
574 	if (url->scheme != NULL &&
575 		(zend_string_equals_literal_ci(url->scheme, "http") || zend_string_equals_literal_ci(url->scheme, "https"))) {
576 		char *e, *s, *t;
577 		size_t l;
578 
579 		if (url->host == NULL) {
580 			goto bad_url;
581 		}
582 
583 		s = ZSTR_VAL(url->host);
584 		l = ZSTR_LEN(url->host);
585 		e = s + l;
586 		t = e - 1;
587 
588 		/* An IPv6 enclosed by square brackets is a valid hostname */
589 		if (*s == '[' && *t == ']' && _php_filter_validate_ipv6((s + 1), l - 2)) {
590 			php_url_free(url);
591 			return;
592 		}
593 
594 		// Validate domain
595 		if (!_php_filter_validate_domain(ZSTR_VAL(url->host), l, FILTER_FLAG_HOSTNAME)) {
596 			php_url_free(url);
597 			RETURN_VALIDATION_FAILED
598 		}
599 	}
600 
601 	if (
602 		url->scheme == NULL ||
603 		/* some schemas allow the host to be empty */
604 		(url->host == NULL && (strcmp(ZSTR_VAL(url->scheme), "mailto") && strcmp(ZSTR_VAL(url->scheme), "news") && strcmp(ZSTR_VAL(url->scheme), "file"))) ||
605 		((flags & FILTER_FLAG_PATH_REQUIRED) && url->path == NULL) || ((flags & FILTER_FLAG_QUERY_REQUIRED) && url->query == NULL)
606 	) {
607 bad_url:
608 		php_url_free(url);
609 		RETURN_VALIDATION_FAILED
610 	}
611 
612 	if (url->user != NULL && !is_userinfo_valid(url->user)
613 		|| url->pass != NULL && !is_userinfo_valid(url->pass)
614 	) {
615 		php_url_free(url);
616 		RETURN_VALIDATION_FAILED
617 
618 	}
619 
620 	php_url_free(url);
621 }
622 /* }}} */
623 
php_filter_validate_email(PHP_INPUT_FILTER_PARAM_DECL)624 void php_filter_validate_email(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
625 {
626 	/*
627 	 * The regex below is based on a regex by Michael Rushton.
628 	 * However, it is not identical.  I changed it to only consider routeable
629 	 * addresses as valid.  Michael's regex considers a@b a valid address
630 	 * which conflicts with section 2.3.5 of RFC 5321 which states that:
631 	 *
632 	 *   Only resolvable, fully-qualified domain names (FQDNs) are permitted
633 	 *   when domain names are used in SMTP.  In other words, names that can
634 	 *   be resolved to MX RRs or address (i.e., A or AAAA) RRs (as discussed
635 	 *   in Section 5) are permitted, as are CNAME RRs whose targets can be
636 	 *   resolved, in turn, to MX or address RRs.  Local nicknames or
637 	 *   unqualified names MUST NOT be used.
638 	 *
639 	 * This regex does not handle comments and folding whitespace.  While
640 	 * this is technically valid in an email address, these parts aren't
641 	 * actually part of the address itself.
642 	 *
643 	 * Michael's regex carries this copyright:
644 	 *
645 	 * Copyright © Michael Rushton 2009-10
646 	 * http://squiloople.com/
647 	 * Feel free to use and redistribute this code. But please keep this copyright notice.
648 	 *
649 	 */
650 	pcre2_code *re = NULL;
651 	pcre2_match_data *match_data = NULL;
652 	uint32_t preg_options = 0, capture_count;
653 	zend_string *sregexp;
654 	int rc;
655 	const char regexp0[] = "/^(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){255,})(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){65,}@)(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E\\pL\\pN]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F\\pL\\pN]|(?:\\x5C[\\x00-\\x7F]))*\\x22))(?:\\.(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E\\pL\\pN]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F\\pL\\pN]|(?:\\x5C[\\x00-\\x7F]))*\\x22)))*@(?:(?:(?!.*[^.]{64,})(?:(?:(?:xn--)?[a-z0-9]+(?:-+[a-z0-9]+)*\\.){1,126}){1,}(?:(?:[a-z][a-z0-9]*)|(?:(?:xn--)[a-z0-9]+))(?:-+[a-z0-9]+)*)|(?:\\[(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){7})|(?:(?!(?:.*[a-f0-9][:\\]]){7,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?)))|(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){5}:)|(?:(?!(?:.*[a-f0-9]:){5,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3}:)?)))?(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))(?:\\.(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))){3}))\\]))$/iDu";
656 	const char regexp1[] = "/^(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){255,})(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){65,}@)(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F]|(?:\\x5C[\\x00-\\x7F]))*\\x22))(?:\\.(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F]|(?:\\x5C[\\x00-\\x7F]))*\\x22)))*@(?:(?:(?!.*[^.]{64,})(?:(?:(?:xn--)?[a-z0-9]+(?:-+[a-z0-9]+)*\\.){1,126}){1,}(?:(?:[a-z][a-z0-9]*)|(?:(?:xn--)[a-z0-9]+))(?:-+[a-z0-9]+)*)|(?:\\[(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){7})|(?:(?!(?:.*[a-f0-9][:\\]]){7,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?)))|(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){5}:)|(?:(?!(?:.*[a-f0-9]:){5,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3}:)?)))?(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))(?:\\.(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))){3}))\\]))$/iD";
657 	const char *regexp;
658 	size_t regexp_len;
659 
660 	if (flags & FILTER_FLAG_EMAIL_UNICODE) {
661 		regexp = regexp0;
662 		regexp_len = sizeof(regexp0) - 1;
663 	} else {
664 		regexp = regexp1;
665 		regexp_len = sizeof(regexp1) - 1;
666 	}
667 
668 	/* The maximum length of an e-mail address is 320 octets, per RFC 2821. */
669 	if (Z_STRLEN_P(value) > 320) {
670 		RETURN_VALIDATION_FAILED
671 	}
672 
673 	sregexp = zend_string_init(regexp, regexp_len, 0);
674 	re = pcre_get_compiled_regex(sregexp, &capture_count, &preg_options);
675 	zend_string_release_ex(sregexp, 0);
676 	if (!re) {
677 		RETURN_VALIDATION_FAILED
678 	}
679 	match_data = php_pcre_create_match_data(capture_count, re);
680 	if (!match_data) {
681 		RETURN_VALIDATION_FAILED
682 	}
683 	rc = pcre2_match(re, (PCRE2_SPTR)Z_STRVAL_P(value), Z_STRLEN_P(value), 0, preg_options, match_data, php_pcre_mctx());
684 	php_pcre_free_match_data(match_data);
685 
686 	/* 0 means that the vector is too small to hold all the captured substring offsets */
687 	if (rc < 0) {
688 		RETURN_VALIDATION_FAILED
689 	}
690 
691 }
692 /* }}} */
693 
_php_filter_validate_ipv4(char * str,size_t str_len,int * ip)694 static int _php_filter_validate_ipv4(char *str, size_t str_len, int *ip) /* {{{ */
695 {
696 	const char *end = str + str_len;
697 	int num, m;
698 	int n = 0;
699 
700 	while (str < end) {
701 		int leading_zero;
702 		if (*str < '0' || *str > '9') {
703 			return 0;
704 		}
705 		leading_zero = (*str == '0');
706 		m = 1;
707 		num = ((*(str++)) - '0');
708 		while (str < end && (*str >= '0' && *str <= '9')) {
709 			num = num * 10 + ((*(str++)) - '0');
710 			if (num > 255 || ++m > 3) {
711 				return 0;
712 			}
713 		}
714 		/* don't allow a leading 0; that introduces octal numbers,
715 		 * which we don't support */
716 		if (leading_zero && (num != 0 || m > 1))
717 			return 0;
718 		ip[n++] = num;
719 		if (n == 4) {
720 			return str == end;
721 		} else if (str >= end || *(str++) != '.') {
722 			return 0;
723 		}
724 	}
725 	return 0;
726 }
727 /* }}} */
728 
_php_filter_validate_ipv6(char * str,size_t str_len)729 static int _php_filter_validate_ipv6(char *str, size_t str_len) /* {{{ */
730 {
731 	int compressed = 0;
732 	int blocks = 0;
733 	int n;
734 	char *ipv4;
735 	char *end;
736 	int ip4elm[4];
737 	char *s = str;
738 
739 	if (!memchr(str, ':', str_len)) {
740 		return 0;
741 	}
742 
743 	/* check for bundled IPv4 */
744 	ipv4 = memchr(str, '.', str_len);
745 	if (ipv4) {
746  		while (ipv4 > str && *(ipv4-1) != ':') {
747 			ipv4--;
748 		}
749 
750 		if (!_php_filter_validate_ipv4(ipv4, (str_len - (ipv4 - str)), ip4elm)) {
751 			return 0;
752 		}
753 
754 		str_len = ipv4 - str; /* length excluding ipv4 */
755 		if (str_len < 2) {
756 			return 0;
757 		}
758 
759 		if (ipv4[-2] != ':') {
760 			/* don't include : before ipv4 unless it's a :: */
761 			str_len--;
762 		}
763 
764 		blocks = 2;
765 	}
766 
767 	end = str + str_len;
768 
769 	while (str < end) {
770 		if (*str == ':') {
771 			if (++str >= end) {
772 				/* cannot end in : without previous : */
773 				return 0;
774 			}
775 			if (*str == ':') {
776 				if (compressed) {
777 					return 0;
778 				}
779 				blocks++; /* :: means 1 or more 16-bit 0 blocks */
780 				compressed = 1;
781 
782 				if (++str == end) {
783 					return (blocks <= 8);
784 				}
785 			} else if ((str - 1) == s) {
786 				/* don't allow leading : without another : following */
787 				return 0;
788 			}
789 		}
790 		n = 0;
791 		while ((str < end) &&
792 		       ((*str >= '0' && *str <= '9') ||
793 		        (*str >= 'a' && *str <= 'f') ||
794 		        (*str >= 'A' && *str <= 'F'))) {
795 			n++;
796 			str++;
797 		}
798 		if (n < 1 || n > 4) {
799 			return 0;
800 		}
801 		if (++blocks > 8)
802 			return 0;
803 	}
804 	return ((compressed && blocks <= 8) || blocks == 8);
805 }
806 /* }}} */
807 
php_filter_validate_ip(PHP_INPUT_FILTER_PARAM_DECL)808 void php_filter_validate_ip(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
809 {
810 	/* validates an ipv4 or ipv6 IP, based on the flag (4, 6, or both) add a
811 	 * flag to throw out reserved ranges; multicast ranges... etc. If both
812 	 * allow_ipv4 and allow_ipv6 flags flag are used, then the first dot or
813 	 * colon determine the format */
814 
815 	int            ip[4];
816 	int            mode;
817 
818 	if (memchr(Z_STRVAL_P(value), ':', Z_STRLEN_P(value))) {
819 		mode = FORMAT_IPV6;
820 	} else if (memchr(Z_STRVAL_P(value), '.', Z_STRLEN_P(value))) {
821 		mode = FORMAT_IPV4;
822 	} else {
823 		RETURN_VALIDATION_FAILED
824 	}
825 
826 	if ((flags & FILTER_FLAG_IPV4) && (flags & FILTER_FLAG_IPV6)) {
827 		/* Both formats are cool */
828 	} else if ((flags & FILTER_FLAG_IPV4) && mode == FORMAT_IPV6) {
829 		RETURN_VALIDATION_FAILED
830 	} else if ((flags & FILTER_FLAG_IPV6) && mode == FORMAT_IPV4) {
831 		RETURN_VALIDATION_FAILED
832 	}
833 
834 	switch (mode) {
835 		case FORMAT_IPV4:
836 			if (!_php_filter_validate_ipv4(Z_STRVAL_P(value), Z_STRLEN_P(value), ip)) {
837 				RETURN_VALIDATION_FAILED
838 			}
839 
840 			/* Check flags */
841 			if (flags & FILTER_FLAG_NO_PRIV_RANGE) {
842 				if (
843 					(ip[0] == 10) ||
844 					(ip[0] == 172 && ip[1] >= 16 && ip[1] <= 31) ||
845 					(ip[0] == 192 && ip[1] == 168)
846 				) {
847 					RETURN_VALIDATION_FAILED
848 				}
849 			}
850 
851 			if (flags & FILTER_FLAG_NO_RES_RANGE) {
852 				if (
853 					(ip[0] == 0) ||
854 					(ip[0] >= 240) ||
855 					(ip[0] == 127) ||
856 					(ip[0] == 169 && ip[1] == 254)
857 				) {
858 					RETURN_VALIDATION_FAILED
859 				}
860 			}
861 			break;
862 
863 		case FORMAT_IPV6:
864 			{
865 				int res = 0;
866 				res = _php_filter_validate_ipv6(Z_STRVAL_P(value), Z_STRLEN_P(value));
867 				if (res < 1) {
868 					RETURN_VALIDATION_FAILED
869 				}
870 				/* Check flags */
871 				if (flags & FILTER_FLAG_NO_PRIV_RANGE) {
872 					if (Z_STRLEN_P(value) >=2 && (!strncasecmp("FC", Z_STRVAL_P(value), 2) || !strncasecmp("FD", Z_STRVAL_P(value), 2))) {
873 						RETURN_VALIDATION_FAILED
874 					}
875 				}
876 				if (flags & FILTER_FLAG_NO_RES_RANGE) {
877 					switch (Z_STRLEN_P(value)) {
878 						case 1: case 0:
879 							break;
880 						case 2:
881 							if (!strcmp("::", Z_STRVAL_P(value))) {
882 								RETURN_VALIDATION_FAILED
883 							}
884 							break;
885 						case 3:
886 							if (!strcmp("::1", Z_STRVAL_P(value)) || !strcmp("5f:", Z_STRVAL_P(value))) {
887 								RETURN_VALIDATION_FAILED
888 							}
889 							break;
890 						default:
891 							if (Z_STRLEN_P(value) >= 5) {
892 								if (
893 									!strncasecmp("fe8", Z_STRVAL_P(value), 3) ||
894 									!strncasecmp("fe9", Z_STRVAL_P(value), 3) ||
895 									!strncasecmp("fea", Z_STRVAL_P(value), 3) ||
896 									!strncasecmp("feb", Z_STRVAL_P(value), 3)
897 								) {
898 									RETURN_VALIDATION_FAILED
899 								}
900 							}
901 							if (
902 								(Z_STRLEN_P(value) >= 9 &&  !strncasecmp("2001:0db8", Z_STRVAL_P(value), 9)) ||
903 								(Z_STRLEN_P(value) >= 2 &&  !strncasecmp("5f", Z_STRVAL_P(value), 2)) ||
904 								(Z_STRLEN_P(value) >= 4 &&  !strncasecmp("3ff3", Z_STRVAL_P(value), 4)) ||
905 								(Z_STRLEN_P(value) >= 8 &&  !strncasecmp("2001:001", Z_STRVAL_P(value), 8))
906 							) {
907 								RETURN_VALIDATION_FAILED
908 							}
909 					}
910 				}
911 			}
912 			break;
913 	}
914 }
915 /* }}} */
916 
php_filter_validate_mac(PHP_INPUT_FILTER_PARAM_DECL)917 void php_filter_validate_mac(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
918 {
919 	char *input = Z_STRVAL_P(value);
920 	size_t input_len = Z_STRLEN_P(value);
921 	int tokens, length, i, offset, exp_separator_set;
922 	size_t exp_separator_len;
923 	char separator;
924 	char *exp_separator;
925 	zend_long ret = 0;
926 	zval *option_val;
927 
928 	FETCH_STRING_OPTION(exp_separator, "separator");
929 
930 	if (exp_separator_set && exp_separator_len != 1) {
931 		php_error_docref(NULL, E_WARNING, "Separator must be exactly one character long");
932 		RETURN_VALIDATION_FAILED;
933 	}
934 
935 	if (14 == input_len) {
936 		/* EUI-64 format: Four hexadecimal digits separated by dots. Less
937 		 * commonly used but valid nonetheless.
938 		 */
939 		tokens = 3;
940 		length = 4;
941 		separator = '.';
942 	} else if (17 == input_len && input[2] == '-') {
943 		/* IEEE 802 format: Six hexadecimal digits separated by hyphens. */
944 		tokens = 6;
945 		length = 2;
946 		separator = '-';
947 	} else if (17 == input_len && input[2] == ':') {
948 		/* IEEE 802 format: Six hexadecimal digits separated by colons. */
949 		tokens = 6;
950 		length = 2;
951 		separator = ':';
952 	} else {
953 		RETURN_VALIDATION_FAILED;
954 	}
955 
956 	if (exp_separator_set && separator != exp_separator[0]) {
957 		RETURN_VALIDATION_FAILED;
958 	}
959 
960 	/* Essentially what we now have is a set of tokens each consisting of
961 	 * a hexadecimal number followed by a separator character. (With the
962 	 * exception of the last token which does not have the separator.)
963 	 */
964 	for (i = 0; i < tokens; i++) {
965 		offset = i * (length + 1);
966 
967 		if (i < tokens - 1 && input[offset + length] != separator) {
968 			/* The current token did not end with e.g. a "." */
969 			RETURN_VALIDATION_FAILED
970 		}
971 		if (php_filter_parse_hex(input + offset, length, &ret) < 0) {
972 			/* The current token is no valid hexadecimal digit */
973 			RETURN_VALIDATION_FAILED
974 		}
975 	}
976 }
977 /* }}} */
978 
979 /*
980  * Local variables:
981  * tab-width: 4
982  * c-basic-offset: 4
983  * End:
984  * vim600: noet sw=4 ts=4 fdm=marker
985  * vim<600: noet sw=4 ts=4
986  */
987