xref: /PHP-7.2/ext/filter/logical_filters.c (revision 7a7ec01a)
1 /*
2   +----------------------------------------------------------------------+
3   | PHP Version 7                                                        |
4   +----------------------------------------------------------------------+
5   | Copyright (c) 1997-2018 The PHP Group                                |
6   +----------------------------------------------------------------------+
7   | This source file is subject to version 3.01 of the PHP license,      |
8   | that is bundled with this package in the file LICENSE, and is        |
9   | available through the world-wide-web at the following url:           |
10   | http://www.php.net/license/3_01.txt                                  |
11   | If you did not receive a copy of the PHP license and are unable to   |
12   | obtain it through the world-wide-web, please send a note to          |
13   | license@php.net so we can mail you a copy immediately.               |
14   +----------------------------------------------------------------------+
15   | Authors: Derick Rethans <derick@php.net>                             |
16   |          Pierre-A. Joye <pierre@php.net>                             |
17   |          Kévin Dunglas <dunglas@gmail.com>                           |
18   +----------------------------------------------------------------------+
19 */
20 
21 /* $Id$ */
22 
23 #include "php_filter.h"
24 #include "filter_private.h"
25 #include "ext/standard/url.h"
26 #include "ext/pcre/php_pcre.h"
27 
28 #include "zend_multiply.h"
29 
30 #if HAVE_ARPA_INET_H
31 # include <arpa/inet.h>
32 #endif
33 
34 #ifndef INADDR_NONE
35 # define INADDR_NONE ((unsigned long int) -1)
36 #endif
37 
38 
39 /* {{{ FETCH_LONG_OPTION(var_name, option_name) */
40 #define FETCH_LONG_OPTION(var_name, option_name) \
41    	var_name = 0; \
42 	var_name##_set = 0; \
43 	if (option_array) { \
44 		if ((option_val = zend_hash_str_find(HASH_OF(option_array), option_name, sizeof(option_name) - 1)) != NULL) {	\
45 			var_name = zval_get_long(option_val); \
46 			var_name##_set = 1; \
47 		} \
48 	}
49 /* }}} */
50 
51 /* {{{ FETCH_STRING_OPTION(var_name, option_name) */
52 #define FETCH_STRING_OPTION(var_name, option_name) \
53 	var_name = NULL; \
54 	var_name##_set = 0; \
55 	var_name##_len = 0; \
56 	if (option_array) { \
57 		if ((option_val = zend_hash_str_find(HASH_OF(option_array), option_name, sizeof(option_name) - 1)) != NULL) { \
58 			if (Z_TYPE_P(option_val) == IS_STRING) { \
59 				var_name = Z_STRVAL_P(option_val); \
60 				var_name##_len = Z_STRLEN_P(option_val); \
61 				var_name##_set = 1; \
62 			} \
63 		} \
64 	}
65 /* }}} */
66 
67 /* {{{ FETCH_STR_OPTION(var_name, option_name) */
68 #define FETCH_STR_OPTION(var_name, option_name) \
69 	var_name = NULL; \
70 	var_name##_set = 0; \
71 	if (option_array) { \
72 		if ((option_val = zend_hash_str_find(HASH_OF(option_array), option_name, sizeof(option_name) - 1)) != NULL) { \
73 			if (Z_TYPE_P(option_val) == IS_STRING) { \
74 				var_name = Z_STR_P(option_val); \
75 				var_name##_set = 1; \
76 			} \
77 		} \
78 	}
79 /* }}} */
80 
81 #define FORMAT_IPV4    4
82 #define FORMAT_IPV6    6
83 
84 static int _php_filter_validate_ipv6(char *str, size_t str_len);
85 
php_filter_parse_int(const char * str,size_t str_len,zend_long * ret)86 static int php_filter_parse_int(const char *str, size_t str_len, zend_long *ret) { /* {{{ */
87 	zend_long ctx_value;
88 	int sign = 0, digit = 0;
89 	const char *end = str + str_len;
90 
91 	switch (*str) {
92 		case '-':
93 			sign = 1;
94 		case '+':
95 			str++;
96 		default:
97 			break;
98 	}
99 
100 	if (*str == '0' && str + 1 == end) {
101 		/* Special cases: +0 and -0 */
102 		return 1;
103 	}
104 
105 	/* must start with 1..9*/
106 	if (str < end && *str >= '1' && *str <= '9') {
107 		ctx_value = ((sign)?-1:1) * ((*(str++)) - '0');
108 	} else {
109 		return -1;
110 	}
111 
112 	if ((end - str > MAX_LENGTH_OF_LONG - 1) /* number too long */
113 	 || (SIZEOF_LONG == 4 && (end - str == MAX_LENGTH_OF_LONG - 1) && *str > '2')) {
114 		/* overflow */
115 		return -1;
116 	}
117 
118 	while (str < end) {
119 		if (*str >= '0' && *str <= '9') {
120 			digit = (*(str++) - '0');
121 			if ( (!sign) && ctx_value <= (ZEND_LONG_MAX-digit)/10 ) {
122 				ctx_value = (ctx_value * 10) + digit;
123 			} else if ( sign && ctx_value >= (ZEND_LONG_MIN+digit)/10) {
124 				ctx_value = (ctx_value * 10) - digit;
125 			} else {
126 				return -1;
127 			}
128 		} else {
129 			return -1;
130 		}
131 	}
132 
133 	*ret = ctx_value;
134 	return 1;
135 }
136 /* }}} */
137 
php_filter_parse_octal(const char * str,size_t str_len,zend_long * ret)138 static int php_filter_parse_octal(const char *str, size_t str_len, zend_long *ret) { /* {{{ */
139 	zend_ulong ctx_value = 0;
140 	const char *end = str + str_len;
141 
142 	while (str < end) {
143 		if (*str >= '0' && *str <= '7') {
144 			zend_ulong n = ((*(str++)) - '0');
145 
146 			if ((ctx_value > ((zend_ulong)(~(zend_long)0)) / 8) ||
147 				((ctx_value = ctx_value * 8) > ((zend_ulong)(~(zend_long)0)) - n)) {
148 				return -1;
149 			}
150 			ctx_value += n;
151 		} else {
152 			return -1;
153 		}
154 	}
155 
156 	*ret = (zend_long)ctx_value;
157 	return 1;
158 }
159 /* }}} */
160 
php_filter_parse_hex(const char * str,size_t str_len,zend_long * ret)161 static int php_filter_parse_hex(const char *str, size_t str_len, zend_long *ret) { /* {{{ */
162 	zend_ulong ctx_value = 0;
163 	const char *end = str + str_len;
164 	zend_ulong n;
165 
166 	while (str < end) {
167 		if (*str >= '0' && *str <= '9') {
168 			n = ((*(str++)) - '0');
169 		} else if (*str >= 'a' && *str <= 'f') {
170 			n = ((*(str++)) - ('a' - 10));
171 		} else if (*str >= 'A' && *str <= 'F') {
172 			n = ((*(str++)) - ('A' - 10));
173 		} else {
174 			return -1;
175 		}
176 		if ((ctx_value > ((zend_ulong)(~(zend_long)0)) / 16) ||
177 			((ctx_value = ctx_value * 16) > ((zend_ulong)(~(zend_long)0)) - n)) {
178 			return -1;
179 		}
180 		ctx_value += n;
181 	}
182 
183 	*ret = (zend_long)ctx_value;
184 	return 1;
185 }
186 /* }}} */
187 
php_filter_int(PHP_INPUT_FILTER_PARAM_DECL)188 void php_filter_int(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
189 {
190 	zval *option_val;
191 	zend_long  min_range, max_range, option_flags;
192 	int   min_range_set, max_range_set;
193 	int   allow_octal = 0, allow_hex = 0;
194 	size_t	  len;
195 	int error = 0;
196 	zend_long  ctx_value;
197 	char *p;
198 
199 	/* Parse options */
200 	FETCH_LONG_OPTION(min_range,    "min_range");
201 	FETCH_LONG_OPTION(max_range,    "max_range");
202 	option_flags = flags;
203 
204 	len = Z_STRLEN_P(value);
205 
206 	if (len == 0) {
207 		RETURN_VALIDATION_FAILED
208 	}
209 
210 	if (option_flags & FILTER_FLAG_ALLOW_OCTAL) {
211 		allow_octal = 1;
212 	}
213 
214 	if (option_flags & FILTER_FLAG_ALLOW_HEX) {
215 		allow_hex = 1;
216 	}
217 
218 	/* Start the validating loop */
219 	p = Z_STRVAL_P(value);
220 	ctx_value = 0;
221 
222 	PHP_FILTER_TRIM_DEFAULT(p, len);
223 
224 	if (*p == '0') {
225 		p++; len--;
226 		if (allow_hex && (*p == 'x' || *p == 'X')) {
227 			p++; len--;
228 			if (php_filter_parse_hex(p, len, &ctx_value) < 0) {
229 				error = 1;
230 			}
231 		} else if (allow_octal) {
232 			if (php_filter_parse_octal(p, len, &ctx_value) < 0) {
233 				error = 1;
234 			}
235 		} else if (len != 0) {
236 			error = 1;
237 		}
238 	} else {
239 		if (php_filter_parse_int(p, len, &ctx_value) < 0) {
240 			error = 1;
241 		}
242 	}
243 
244 	if (error > 0 || (min_range_set && (ctx_value < min_range)) || (max_range_set && (ctx_value > max_range))) {
245 		RETURN_VALIDATION_FAILED
246 	} else {
247 		zval_ptr_dtor(value);
248 		ZVAL_LONG(value, ctx_value);
249 		return;
250 	}
251 }
252 /* }}} */
253 
php_filter_boolean(PHP_INPUT_FILTER_PARAM_DECL)254 void php_filter_boolean(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
255 {
256 	char *str = Z_STRVAL_P(value);
257 	size_t len = Z_STRLEN_P(value);
258 	int ret;
259 
260 	PHP_FILTER_TRIM_DEFAULT_EX(str, len, 0);
261 
262 	/* returns true for "1", "true", "on" and "yes"
263 	 * returns false for "0", "false", "off", "no", and ""
264 	 * null otherwise. */
265 	switch (len) {
266 		case 0:
267 			ret = 0;
268 			break;
269 		case 1:
270 			if (*str == '1') {
271 				ret = 1;
272 			} else if (*str == '0') {
273 				ret = 0;
274 			} else {
275 				ret = -1;
276 			}
277 			break;
278 		case 2:
279 			if (strncasecmp(str, "on", 2) == 0) {
280 				ret = 1;
281 			} else if (strncasecmp(str, "no", 2) == 0) {
282 				ret = 0;
283 			} else {
284 				ret = -1;
285 			}
286 			break;
287 		case 3:
288 			if (strncasecmp(str, "yes", 3) == 0) {
289 				ret = 1;
290 			} else if (strncasecmp(str, "off", 3) == 0) {
291 				ret = 0;
292 			} else {
293 				ret = -1;
294 			}
295 			break;
296 		case 4:
297 			if (strncasecmp(str, "true", 4) == 0) {
298 				ret = 1;
299 			} else {
300 				ret = -1;
301 			}
302 			break;
303 		case 5:
304 			if (strncasecmp(str, "false", 5) == 0) {
305 				ret = 0;
306 			} else {
307 				ret = -1;
308 			}
309 			break;
310 		default:
311 			ret = -1;
312 	}
313 
314 	if (ret == -1) {
315 		RETURN_VALIDATION_FAILED
316 	} else {
317 		zval_ptr_dtor(value);
318 		ZVAL_BOOL(value, ret);
319 	}
320 }
321 /* }}} */
322 
php_filter_float(PHP_INPUT_FILTER_PARAM_DECL)323 void php_filter_float(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
324 {
325 	size_t len;
326 	char *str, *end;
327 	char *num, *p;
328 	zval *option_val;
329 	char *decimal;
330 	int decimal_set;
331 	size_t decimal_len;
332 	char dec_sep = '.';
333 	char tsd_sep[3] = "',.";
334 
335 	zend_long lval;
336 	double dval;
337 
338 	int first, n;
339 
340 	len = Z_STRLEN_P(value);
341 	str = Z_STRVAL_P(value);
342 
343 	PHP_FILTER_TRIM_DEFAULT(str, len);
344 	end = str + len;
345 
346 	FETCH_STRING_OPTION(decimal, "decimal");
347 
348 	if (decimal_set) {
349 		if (decimal_len != 1) {
350 			php_error_docref(NULL, E_WARNING, "decimal separator must be one char");
351 			RETURN_VALIDATION_FAILED
352 		} else {
353 			dec_sep = *decimal;
354 		}
355 	}
356 
357 	num = p = emalloc(len+1);
358 	if (str < end && (*str == '+' || *str == '-')) {
359 		*p++ = *str++;
360 	}
361 	first = 1;
362 	while (1) {
363 		n = 0;
364 		while (str < end && *str >= '0' && *str <= '9') {
365 			++n;
366 			*p++ = *str++;
367 		}
368 		if (str == end || *str == dec_sep || *str == 'e' || *str == 'E') {
369 			if (!first && n != 3) {
370 				goto error;
371 			}
372 			if (*str == dec_sep) {
373 				*p++ = '.';
374 				str++;
375 				while (str < end && *str >= '0' && *str <= '9') {
376 					*p++ = *str++;
377 				}
378 			}
379 			if (*str == 'e' || *str == 'E') {
380 				*p++ = *str++;
381 				if (str < end && (*str == '+' || *str == '-')) {
382 					*p++ = *str++;
383 				}
384 				while (str < end && *str >= '0' && *str <= '9') {
385 					*p++ = *str++;
386 				}
387 			}
388 			break;
389 		}
390 		if ((flags & FILTER_FLAG_ALLOW_THOUSAND) && (*str == tsd_sep[0] || *str == tsd_sep[1] || *str == tsd_sep[2])) {
391 			if (first?(n < 1 || n > 3):(n != 3)) {
392 				goto error;
393 			}
394 			first = 0;
395 			str++;
396 		} else {
397 			goto error;
398 		}
399 	}
400 	if (str != end) {
401 		goto error;
402 	}
403 	*p = 0;
404 
405 	switch (is_numeric_string(num, p - num, &lval, &dval, 0)) {
406 		case IS_LONG:
407 			zval_ptr_dtor(value);
408 			ZVAL_DOUBLE(value, (double)lval);
409 			break;
410 		case IS_DOUBLE:
411 			if ((!dval && p - num > 1 && strpbrk(num, "123456789")) || !zend_finite(dval)) {
412 				goto error;
413 			}
414 			zval_ptr_dtor(value);
415 			ZVAL_DOUBLE(value, dval);
416 			break;
417 		default:
418 error:
419 			efree(num);
420 			RETURN_VALIDATION_FAILED
421 	}
422 	efree(num);
423 }
424 /* }}} */
425 
php_filter_validate_regexp(PHP_INPUT_FILTER_PARAM_DECL)426 void php_filter_validate_regexp(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
427 {
428 	zval *option_val;
429 	zend_string *regexp;
430 	int regexp_set;
431 	pcre *re = NULL;
432 	pcre_extra *pcre_extra = NULL;
433 	int preg_options = 0;
434 	int ovector[3];
435 	int matches;
436 
437 	/* Parse options */
438 	FETCH_STR_OPTION(regexp, "regexp");
439 
440 	if (!regexp_set) {
441 		php_error_docref(NULL, E_WARNING, "'regexp' option missing");
442 		RETURN_VALIDATION_FAILED
443 	}
444 
445 	re = pcre_get_compiled_regex(regexp, &pcre_extra, &preg_options);
446 	if (!re) {
447 		RETURN_VALIDATION_FAILED
448 	}
449 	matches = pcre_exec(re, NULL, Z_STRVAL_P(value), (int)Z_STRLEN_P(value), 0, 0, ovector, 3);
450 
451 	/* 0 means that the vector is too small to hold all the captured substring offsets */
452 	if (matches < 0) {
453 		RETURN_VALIDATION_FAILED
454 	}
455 }
456 
_php_filter_validate_domain(char * domain,int len,zend_long flags)457 static int _php_filter_validate_domain(char * domain, int len, zend_long flags) /* {{{ */
458 {
459 	char *e, *s, *t;
460 	size_t l;
461 	int hostname = flags & FILTER_FLAG_HOSTNAME;
462 	unsigned char i = 1;
463 
464 	s = domain;
465 	l = len;
466 	e = domain + l;
467 	t = e - 1;
468 
469 	/* Ignore trailing dot */
470 	if (*t == '.') {
471 		e = t;
472 		l--;
473 	}
474 
475 	/* The total length cannot exceed 253 characters (final dot not included) */
476 	if (l > 253) {
477 		return 0;
478 	}
479 
480 	/* First char must be alphanumeric */
481 	if(*s == '.' || (hostname && !isalnum((int)*(unsigned char *)s))) {
482 		return 0;
483 	}
484 
485 	while (s < e) {
486 		if (*s == '.') {
487 			/* The first and the last character of a label must be alphanumeric */
488 			if (*(s + 1) == '.' || (hostname && (!isalnum((int)*(unsigned char *)(s - 1)) || !isalnum((int)*(unsigned char *)(s + 1))))) {
489 				return 0;
490 			}
491 
492 			/* Reset label length counter */
493 			i = 1;
494 		} else {
495 			if (i > 63 || (hostname && *s != '-' && !isalnum((int)*(unsigned char *)s))) {
496 				return 0;
497 			}
498 
499 			i++;
500 		}
501 
502 		s++;
503 	}
504 
505 	return 1;
506 }
507 /* }}} */
508 
php_filter_validate_domain(PHP_INPUT_FILTER_PARAM_DECL)509 void php_filter_validate_domain(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
510 {
511 	if (!_php_filter_validate_domain(Z_STRVAL_P(value), Z_STRLEN_P(value), flags)) {
512 		RETURN_VALIDATION_FAILED
513 	}
514 }
515 /* }}} */
516 
php_filter_validate_url(PHP_INPUT_FILTER_PARAM_DECL)517 void php_filter_validate_url(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
518 {
519 	php_url *url;
520 	size_t old_len = Z_STRLEN_P(value);
521 
522 	php_filter_url(value, flags, option_array, charset);
523 
524 	if (Z_TYPE_P(value) != IS_STRING || old_len != Z_STRLEN_P(value)) {
525 		RETURN_VALIDATION_FAILED
526 	}
527 
528 	/* Use parse_url - if it returns false, we return NULL */
529 	url = php_url_parse_ex(Z_STRVAL_P(value), Z_STRLEN_P(value));
530 
531 	if (url == NULL) {
532 		RETURN_VALIDATION_FAILED
533 	}
534 
535 	if (url->scheme != NULL && (!strcasecmp(url->scheme, "http") || !strcasecmp(url->scheme, "https"))) {
536 		char *e, *s, *t;
537 		size_t l;
538 
539 		if (url->host == NULL) {
540 			goto bad_url;
541 		}
542 
543 		s = url->host;
544 		l = strlen(s);
545 		e = url->host + l;
546 		t = e - 1;
547 
548 		/* An IPv6 enclosed by square brackets is a valid hostname */
549 		if (*s == '[' && *t == ']' && _php_filter_validate_ipv6((s + 1), l - 2)) {
550 			php_url_free(url);
551 			return;
552 		}
553 
554 		// Validate domain
555 		if (!_php_filter_validate_domain(url->host, l, FILTER_FLAG_HOSTNAME)) {
556 			php_url_free(url);
557 			RETURN_VALIDATION_FAILED
558 		}
559 	}
560 
561 	if (
562 		url->scheme == NULL ||
563 		/* some schemas allow the host to be empty */
564 		(url->host == NULL && (strcmp(url->scheme, "mailto") && strcmp(url->scheme, "news") && strcmp(url->scheme, "file"))) ||
565 		((flags & FILTER_FLAG_PATH_REQUIRED) && url->path == NULL) || ((flags & FILTER_FLAG_QUERY_REQUIRED) && url->query == NULL)
566 	) {
567 bad_url:
568 		php_url_free(url);
569 		RETURN_VALIDATION_FAILED
570 	}
571 	php_url_free(url);
572 }
573 /* }}} */
574 
php_filter_validate_email(PHP_INPUT_FILTER_PARAM_DECL)575 void php_filter_validate_email(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
576 {
577 	/*
578 	 * The regex below is based on a regex by Michael Rushton.
579 	 * However, it is not identical.  I changed it to only consider routeable
580 	 * addresses as valid.  Michael's regex considers a@b a valid address
581 	 * which conflicts with section 2.3.5 of RFC 5321 which states that:
582 	 *
583 	 *   Only resolvable, fully-qualified domain names (FQDNs) are permitted
584 	 *   when domain names are used in SMTP.  In other words, names that can
585 	 *   be resolved to MX RRs or address (i.e., A or AAAA) RRs (as discussed
586 	 *   in Section 5) are permitted, as are CNAME RRs whose targets can be
587 	 *   resolved, in turn, to MX or address RRs.  Local nicknames or
588 	 *   unqualified names MUST NOT be used.
589 	 *
590 	 * This regex does not handle comments and folding whitespace.  While
591 	 * this is technically valid in an email address, these parts aren't
592 	 * actually part of the address itself.
593 	 *
594 	 * Michael's regex carries this copyright:
595 	 *
596 	 * Copyright © Michael Rushton 2009-10
597 	 * http://squiloople.com/
598 	 * Feel free to use and redistribute this code. But please keep this copyright notice.
599 	 *
600 	 */
601 	pcre       *re = NULL;
602 	pcre_extra *pcre_extra = NULL;
603 	int preg_options = 0;
604 	int         ovector[150]; /* Needs to be a multiple of 3 */
605 	int         matches;
606 	zend_string *sregexp;
607 	const char regexp0[] = "/^(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){255,})(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){65,}@)(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E\\pL\\pN]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F\\pL\\pN]|(?:\\x5C[\\x00-\\x7F]))*\\x22))(?:\\.(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E\\pL\\pN]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F\\pL\\pN]|(?:\\x5C[\\x00-\\x7F]))*\\x22)))*@(?:(?:(?!.*[^.]{64,})(?:(?:(?:xn--)?[a-z0-9]+(?:-+[a-z0-9]+)*\\.){1,126}){1,}(?:(?:[a-z][a-z0-9]*)|(?:(?:xn--)[a-z0-9]+))(?:-+[a-z0-9]+)*)|(?:\\[(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){7})|(?:(?!(?:.*[a-f0-9][:\\]]){7,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?)))|(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){5}:)|(?:(?!(?:.*[a-f0-9]:){5,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3}:)?)))?(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))(?:\\.(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))){3}))\\]))$/iDu";
608 	const char regexp1[] = "/^(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){255,})(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){65,}@)(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F]|(?:\\x5C[\\x00-\\x7F]))*\\x22))(?:\\.(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F]|(?:\\x5C[\\x00-\\x7F]))*\\x22)))*@(?:(?:(?!.*[^.]{64,})(?:(?:(?:xn--)?[a-z0-9]+(?:-+[a-z0-9]+)*\\.){1,126}){1,}(?:(?:[a-z][a-z0-9]*)|(?:(?:xn--)[a-z0-9]+))(?:-+[a-z0-9]+)*)|(?:\\[(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){7})|(?:(?!(?:.*[a-f0-9][:\\]]){7,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?)))|(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){5}:)|(?:(?!(?:.*[a-f0-9]:){5,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3}:)?)))?(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))(?:\\.(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))){3}))\\]))$/iD";
609 	const char *regexp;
610 	size_t regexp_len;
611 
612 	if (flags & FILTER_FLAG_EMAIL_UNICODE) {
613 		regexp = regexp0;
614 		regexp_len = sizeof(regexp0) - 1;
615 	} else {
616 		regexp = regexp1;
617 		regexp_len = sizeof(regexp1) - 1;
618 	}
619 
620 	/* The maximum length of an e-mail address is 320 octets, per RFC 2821. */
621 	if (Z_STRLEN_P(value) > 320) {
622 		RETURN_VALIDATION_FAILED
623 	}
624 
625 	sregexp = zend_string_init(regexp, regexp_len, 0);
626 	re = pcre_get_compiled_regex(sregexp, &pcre_extra, &preg_options);
627 	if (!re) {
628 		zend_string_release(sregexp);
629 		RETURN_VALIDATION_FAILED
630 	}
631 	zend_string_release(sregexp);
632 	matches = pcre_exec(re, NULL, Z_STRVAL_P(value), (int)Z_STRLEN_P(value), 0, 0, ovector, 3);
633 
634 	/* 0 means that the vector is too small to hold all the captured substring offsets */
635 	if (matches < 0) {
636 		RETURN_VALIDATION_FAILED
637 	}
638 
639 }
640 /* }}} */
641 
_php_filter_validate_ipv4(char * str,size_t str_len,int * ip)642 static int _php_filter_validate_ipv4(char *str, size_t str_len, int *ip) /* {{{ */
643 {
644 	const char *end = str + str_len;
645 	int num, m;
646 	int n = 0;
647 
648 	while (str < end) {
649 		int leading_zero;
650 		if (*str < '0' || *str > '9') {
651 			return 0;
652 		}
653 		leading_zero = (*str == '0');
654 		m = 1;
655 		num = ((*(str++)) - '0');
656 		while (str < end && (*str >= '0' && *str <= '9')) {
657 			num = num * 10 + ((*(str++)) - '0');
658 			if (num > 255 || ++m > 3) {
659 				return 0;
660 			}
661 		}
662 		/* don't allow a leading 0; that introduces octal numbers,
663 		 * which we don't support */
664 		if (leading_zero && (num != 0 || m > 1))
665 			return 0;
666 		ip[n++] = num;
667 		if (n == 4) {
668 			return str == end;
669 		} else if (str >= end || *(str++) != '.') {
670 			return 0;
671 		}
672 	}
673 	return 0;
674 }
675 /* }}} */
676 
_php_filter_validate_ipv6(char * str,size_t str_len)677 static int _php_filter_validate_ipv6(char *str, size_t str_len) /* {{{ */
678 {
679 	int compressed = 0;
680 	int blocks = 0;
681 	int n;
682 	char *ipv4;
683 	char *end;
684 	int ip4elm[4];
685 	char *s = str;
686 
687 	if (!memchr(str, ':', str_len)) {
688 		return 0;
689 	}
690 
691 	/* check for bundled IPv4 */
692 	ipv4 = memchr(str, '.', str_len);
693 	if (ipv4) {
694  		while (ipv4 > str && *(ipv4-1) != ':') {
695 			ipv4--;
696 		}
697 
698 		if (!_php_filter_validate_ipv4(ipv4, (str_len - (ipv4 - str)), ip4elm)) {
699 			return 0;
700 		}
701 
702 		str_len = ipv4 - str; /* length excluding ipv4 */
703 		if (str_len < 2) {
704 			return 0;
705 		}
706 
707 		if (ipv4[-2] != ':') {
708 			/* don't include : before ipv4 unless it's a :: */
709 			str_len--;
710 		}
711 
712 		blocks = 2;
713 	}
714 
715 	end = str + str_len;
716 
717 	while (str < end) {
718 		if (*str == ':') {
719 			if (++str >= end) {
720 				/* cannot end in : without previous : */
721 				return 0;
722 			}
723 			if (*str == ':') {
724 				if (compressed) {
725 					return 0;
726 				}
727 				blocks++; /* :: means 1 or more 16-bit 0 blocks */
728 				compressed = 1;
729 
730 				if (++str == end) {
731 					return (blocks <= 8);
732 				}
733 			} else if ((str - 1) == s) {
734 				/* dont allow leading : without another : following */
735 				return 0;
736 			}
737 		}
738 		n = 0;
739 		while ((str < end) &&
740 		       ((*str >= '0' && *str <= '9') ||
741 		        (*str >= 'a' && *str <= 'f') ||
742 		        (*str >= 'A' && *str <= 'F'))) {
743 			n++;
744 			str++;
745 		}
746 		if (n < 1 || n > 4) {
747 			return 0;
748 		}
749 		if (++blocks > 8)
750 			return 0;
751 	}
752 	return ((compressed && blocks <= 8) || blocks == 8);
753 }
754 /* }}} */
755 
php_filter_validate_ip(PHP_INPUT_FILTER_PARAM_DECL)756 void php_filter_validate_ip(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
757 {
758 	/* validates an ipv4 or ipv6 IP, based on the flag (4, 6, or both) add a
759 	 * flag to throw out reserved ranges; multicast ranges... etc. If both
760 	 * allow_ipv4 and allow_ipv6 flags flag are used, then the first dot or
761 	 * colon determine the format */
762 
763 	int            ip[4];
764 	int            mode;
765 
766 	if (memchr(Z_STRVAL_P(value), ':', Z_STRLEN_P(value))) {
767 		mode = FORMAT_IPV6;
768 	} else if (memchr(Z_STRVAL_P(value), '.', Z_STRLEN_P(value))) {
769 		mode = FORMAT_IPV4;
770 	} else {
771 		RETURN_VALIDATION_FAILED
772 	}
773 
774 	if ((flags & FILTER_FLAG_IPV4) && (flags & FILTER_FLAG_IPV6)) {
775 		/* Both formats are cool */
776 	} else if ((flags & FILTER_FLAG_IPV4) && mode == FORMAT_IPV6) {
777 		RETURN_VALIDATION_FAILED
778 	} else if ((flags & FILTER_FLAG_IPV6) && mode == FORMAT_IPV4) {
779 		RETURN_VALIDATION_FAILED
780 	}
781 
782 	switch (mode) {
783 		case FORMAT_IPV4:
784 			if (!_php_filter_validate_ipv4(Z_STRVAL_P(value), Z_STRLEN_P(value), ip)) {
785 				RETURN_VALIDATION_FAILED
786 			}
787 
788 			/* Check flags */
789 			if (flags & FILTER_FLAG_NO_PRIV_RANGE) {
790 				if (
791 					(ip[0] == 10) ||
792 					(ip[0] == 172 && ip[1] >= 16 && ip[1] <= 31) ||
793 					(ip[0] == 192 && ip[1] == 168)
794 				) {
795 					RETURN_VALIDATION_FAILED
796 				}
797 			}
798 
799 			if (flags & FILTER_FLAG_NO_RES_RANGE) {
800 				if (
801 					(ip[0] == 0) ||
802 					(ip[0] >= 240) ||
803 					(ip[0] == 127) ||
804 					(ip[0] == 169 && ip[1] == 254)
805 				) {
806 					RETURN_VALIDATION_FAILED
807 				}
808 			}
809 			break;
810 
811 		case FORMAT_IPV6:
812 			{
813 				int res = 0;
814 				res = _php_filter_validate_ipv6(Z_STRVAL_P(value), Z_STRLEN_P(value));
815 				if (res < 1) {
816 					RETURN_VALIDATION_FAILED
817 				}
818 				/* Check flags */
819 				if (flags & FILTER_FLAG_NO_PRIV_RANGE) {
820 					if (Z_STRLEN_P(value) >=2 && (!strncasecmp("FC", Z_STRVAL_P(value), 2) || !strncasecmp("FD", Z_STRVAL_P(value), 2))) {
821 						RETURN_VALIDATION_FAILED
822 					}
823 				}
824 				if (flags & FILTER_FLAG_NO_RES_RANGE) {
825 					switch (Z_STRLEN_P(value)) {
826 						case 1: case 0:
827 							break;
828 						case 2:
829 							if (!strcmp("::", Z_STRVAL_P(value))) {
830 								RETURN_VALIDATION_FAILED
831 							}
832 							break;
833 						case 3:
834 							if (!strcmp("::1", Z_STRVAL_P(value)) || !strcmp("5f:", Z_STRVAL_P(value))) {
835 								RETURN_VALIDATION_FAILED
836 							}
837 							break;
838 						default:
839 							if (Z_STRLEN_P(value) >= 5) {
840 								if (
841 									!strncasecmp("fe8", Z_STRVAL_P(value), 3) ||
842 									!strncasecmp("fe9", Z_STRVAL_P(value), 3) ||
843 									!strncasecmp("fea", Z_STRVAL_P(value), 3) ||
844 									!strncasecmp("feb", Z_STRVAL_P(value), 3)
845 								) {
846 									RETURN_VALIDATION_FAILED
847 								}
848 							}
849 							if (
850 								(Z_STRLEN_P(value) >= 9 &&  !strncasecmp("2001:0db8", Z_STRVAL_P(value), 9)) ||
851 								(Z_STRLEN_P(value) >= 2 &&  !strncasecmp("5f", Z_STRVAL_P(value), 2)) ||
852 								(Z_STRLEN_P(value) >= 4 &&  !strncasecmp("3ff3", Z_STRVAL_P(value), 4)) ||
853 								(Z_STRLEN_P(value) >= 8 &&  !strncasecmp("2001:001", Z_STRVAL_P(value), 8))
854 							) {
855 								RETURN_VALIDATION_FAILED
856 							}
857 					}
858 				}
859 			}
860 			break;
861 	}
862 }
863 /* }}} */
864 
php_filter_validate_mac(PHP_INPUT_FILTER_PARAM_DECL)865 void php_filter_validate_mac(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
866 {
867 	char *input = Z_STRVAL_P(value);
868 	size_t input_len = Z_STRLEN_P(value);
869 	int tokens, length, i, offset, exp_separator_set;
870 	size_t exp_separator_len;
871 	char separator;
872 	char *exp_separator;
873 	zend_long ret = 0;
874 	zval *option_val;
875 
876 	FETCH_STRING_OPTION(exp_separator, "separator");
877 
878 	if (exp_separator_set && exp_separator_len != 1) {
879 		php_error_docref(NULL, E_WARNING, "Separator must be exactly one character long");
880 		RETURN_VALIDATION_FAILED;
881 	}
882 
883 	if (14 == input_len) {
884 		/* EUI-64 format: Four hexadecimal digits separated by dots. Less
885 		 * commonly used but valid nonetheless.
886 		 */
887 		tokens = 3;
888 		length = 4;
889 		separator = '.';
890 	} else if (17 == input_len && input[2] == '-') {
891 		/* IEEE 802 format: Six hexadecimal digits separated by hyphens. */
892 		tokens = 6;
893 		length = 2;
894 		separator = '-';
895 	} else if (17 == input_len && input[2] == ':') {
896 		/* IEEE 802 format: Six hexadecimal digits separated by colons. */
897 		tokens = 6;
898 		length = 2;
899 		separator = ':';
900 	} else {
901 		RETURN_VALIDATION_FAILED;
902 	}
903 
904 	if (exp_separator_set && separator != exp_separator[0]) {
905 		RETURN_VALIDATION_FAILED;
906 	}
907 
908 	/* Essentially what we now have is a set of tokens each consisting of
909 	 * a hexadecimal number followed by a separator character. (With the
910 	 * exception of the last token which does not have the separator.)
911 	 */
912 	for (i = 0; i < tokens; i++) {
913 		offset = i * (length + 1);
914 
915 		if (i < tokens - 1 && input[offset + length] != separator) {
916 			/* The current token did not end with e.g. a "." */
917 			RETURN_VALIDATION_FAILED
918 		}
919 		if (php_filter_parse_hex(input + offset, length, &ret) < 0) {
920 			/* The current token is no valid hexadecimal digit */
921 			RETURN_VALIDATION_FAILED
922 		}
923 	}
924 }
925 /* }}} */
926 
927 /*
928  * Local variables:
929  * tab-width: 4
930  * c-basic-offset: 4
931  * End:
932  * vim600: noet sw=4 ts=4 fdm=marker
933  * vim<600: noet sw=4 ts=4
934  */
935