xref: /PHP-7.0/ext/filter/logical_filters.c (revision 478f119a)
1 /*
2   +----------------------------------------------------------------------+
3   | PHP Version 7                                                        |
4   +----------------------------------------------------------------------+
5   | Copyright (c) 1997-2017 The PHP Group                                |
6   +----------------------------------------------------------------------+
7   | This source file is subject to version 3.01 of the PHP license,      |
8   | that is bundled with this package in the file LICENSE, and is        |
9   | available through the world-wide-web at the following url:           |
10   | http://www.php.net/license/3_01.txt                                  |
11   | If you did not receive a copy of the PHP license and are unable to   |
12   | obtain it through the world-wide-web, please send a note to          |
13   | license@php.net so we can mail you a copy immediately.               |
14   +----------------------------------------------------------------------+
15   | Authors: Derick Rethans <derick@php.net>                             |
16   |          Pierre-A. Joye <pierre@php.net>                             |
17   |          Kévin Dunglas <dunglas@gmail.com>                           |
18   +----------------------------------------------------------------------+
19 */
20 
21 /* $Id$ */
22 
23 #include "php_filter.h"
24 #include "filter_private.h"
25 #include "ext/standard/url.h"
26 #include "ext/pcre/php_pcre.h"
27 
28 #include "zend_multiply.h"
29 
30 #if HAVE_ARPA_INET_H
31 # include <arpa/inet.h>
32 #endif
33 
34 #ifndef INADDR_NONE
35 # define INADDR_NONE ((unsigned long int) -1)
36 #endif
37 
38 
39 /* {{{ FETCH_LONG_OPTION(var_name, option_name) */
40 #define FETCH_LONG_OPTION(var_name, option_name) \
41    	var_name = 0; \
42 	var_name##_set = 0; \
43 	if (option_array) { \
44 		if ((option_val = zend_hash_str_find(HASH_OF(option_array), option_name, sizeof(option_name) - 1)) != NULL) {	\
45 			var_name = zval_get_long(option_val); \
46 			var_name##_set = 1; \
47 		} \
48 	}
49 /* }}} */
50 
51 /* {{{ FETCH_STRING_OPTION(var_name, option_name) */
52 #define FETCH_STRING_OPTION(var_name, option_name) \
53 	var_name = NULL; \
54 	var_name##_set = 0; \
55 	var_name##_len = 0; \
56 	if (option_array) { \
57 		if ((option_val = zend_hash_str_find(HASH_OF(option_array), option_name, sizeof(option_name) - 1)) != NULL) { \
58 			if (Z_TYPE_P(option_val) == IS_STRING) { \
59 				var_name = Z_STRVAL_P(option_val); \
60 				var_name##_len = Z_STRLEN_P(option_val); \
61 				var_name##_set = 1; \
62 			} \
63 		} \
64 	}
65 /* }}} */
66 
67 /* {{{ FETCH_STR_OPTION(var_name, option_name) */
68 #define FETCH_STR_OPTION(var_name, option_name) \
69 	var_name = NULL; \
70 	var_name##_set = 0; \
71 	if (option_array) { \
72 		if ((option_val = zend_hash_str_find(HASH_OF(option_array), option_name, sizeof(option_name) - 1)) != NULL) { \
73 			if (Z_TYPE_P(option_val) == IS_STRING) { \
74 				var_name = Z_STR_P(option_val); \
75 				var_name##_set = 1; \
76 			} \
77 		} \
78 	}
79 /* }}} */
80 
81 #define FORMAT_IPV4    4
82 #define FORMAT_IPV6    6
83 
84 static int _php_filter_validate_ipv6(char *str, size_t str_len);
85 
php_filter_parse_int(const char * str,size_t str_len,zend_long * ret)86 static int php_filter_parse_int(const char *str, size_t str_len, zend_long *ret) { /* {{{ */
87 	zend_long ctx_value;
88 	int sign = 0, digit = 0;
89 	const char *end = str + str_len;
90 
91 	switch (*str) {
92 		case '-':
93 			sign = 1;
94 		case '+':
95 			str++;
96 		default:
97 			break;
98 	}
99 
100 	if (*str == '0' && str + 1 == end) {
101 		/* Special cases: +0 and -0 */
102 		return 1;
103 	}
104 
105 	/* must start with 1..9*/
106 	if (str < end && *str >= '1' && *str <= '9') {
107 		ctx_value = ((sign)?-1:1) * ((*(str++)) - '0');
108 	} else {
109 		return -1;
110 	}
111 
112 	if ((end - str > MAX_LENGTH_OF_LONG - 1) /* number too long */
113 	 || (SIZEOF_LONG == 4 && (end - str == MAX_LENGTH_OF_LONG - 1) && *str > '2')) {
114 		/* overflow */
115 		return -1;
116 	}
117 
118 	while (str < end) {
119 		if (*str >= '0' && *str <= '9') {
120 			digit = (*(str++) - '0');
121 			if ( (!sign) && ctx_value <= (ZEND_LONG_MAX-digit)/10 ) {
122 				ctx_value = (ctx_value * 10) + digit;
123 			} else if ( sign && ctx_value >= (ZEND_LONG_MIN+digit)/10) {
124 				ctx_value = (ctx_value * 10) - digit;
125 			} else {
126 				return -1;
127 			}
128 		} else {
129 			return -1;
130 		}
131 	}
132 
133 	*ret = ctx_value;
134 	return 1;
135 }
136 /* }}} */
137 
php_filter_parse_octal(const char * str,size_t str_len,zend_long * ret)138 static int php_filter_parse_octal(const char *str, size_t str_len, zend_long *ret) { /* {{{ */
139 	zend_ulong ctx_value = 0;
140 	const char *end = str + str_len;
141 
142 	while (str < end) {
143 		if (*str >= '0' && *str <= '7') {
144 			zend_ulong n = ((*(str++)) - '0');
145 
146 			if ((ctx_value > ((zend_ulong)(~(zend_long)0)) / 8) ||
147 				((ctx_value = ctx_value * 8) > ((zend_ulong)(~(zend_long)0)) - n)) {
148 				return -1;
149 			}
150 			ctx_value += n;
151 		} else {
152 			return -1;
153 		}
154 	}
155 
156 	*ret = (zend_long)ctx_value;
157 	return 1;
158 }
159 /* }}} */
160 
php_filter_parse_hex(const char * str,size_t str_len,zend_long * ret)161 static int php_filter_parse_hex(const char *str, size_t str_len, zend_long *ret) { /* {{{ */
162 	zend_ulong ctx_value = 0;
163 	const char *end = str + str_len;
164 	zend_ulong n;
165 
166 	while (str < end) {
167 		if (*str >= '0' && *str <= '9') {
168 			n = ((*(str++)) - '0');
169 		} else if (*str >= 'a' && *str <= 'f') {
170 			n = ((*(str++)) - ('a' - 10));
171 		} else if (*str >= 'A' && *str <= 'F') {
172 			n = ((*(str++)) - ('A' - 10));
173 		} else {
174 			return -1;
175 		}
176 		if ((ctx_value > ((zend_ulong)(~(zend_long)0)) / 16) ||
177 			((ctx_value = ctx_value * 16) > ((zend_ulong)(~(zend_long)0)) - n)) {
178 			return -1;
179 		}
180 		ctx_value += n;
181 	}
182 
183 	*ret = (zend_long)ctx_value;
184 	return 1;
185 }
186 /* }}} */
187 
php_filter_int(PHP_INPUT_FILTER_PARAM_DECL)188 void php_filter_int(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
189 {
190 	zval *option_val;
191 	zend_long  min_range, max_range, option_flags;
192 	int   min_range_set, max_range_set;
193 	int   allow_octal = 0, allow_hex = 0;
194 	size_t	  len;
195 	int error = 0;
196 	zend_long  ctx_value;
197 	char *p;
198 
199 	/* Parse options */
200 	FETCH_LONG_OPTION(min_range,    "min_range");
201 	FETCH_LONG_OPTION(max_range,    "max_range");
202 	option_flags = flags;
203 
204 	len = Z_STRLEN_P(value);
205 
206 	if (len == 0) {
207 		RETURN_VALIDATION_FAILED
208 	}
209 
210 	if (option_flags & FILTER_FLAG_ALLOW_OCTAL) {
211 		allow_octal = 1;
212 	}
213 
214 	if (option_flags & FILTER_FLAG_ALLOW_HEX) {
215 		allow_hex = 1;
216 	}
217 
218 	/* Start the validating loop */
219 	p = Z_STRVAL_P(value);
220 	ctx_value = 0;
221 
222 	PHP_FILTER_TRIM_DEFAULT(p, len);
223 
224 	if (*p == '0') {
225 		p++; len--;
226 		if (allow_hex && (*p == 'x' || *p == 'X')) {
227 			p++; len--;
228 			if (php_filter_parse_hex(p, len, &ctx_value) < 0) {
229 				error = 1;
230 			}
231 		} else if (allow_octal) {
232 			if (php_filter_parse_octal(p, len, &ctx_value) < 0) {
233 				error = 1;
234 			}
235 		} else if (len != 0) {
236 			error = 1;
237 		}
238 	} else {
239 		if (php_filter_parse_int(p, len, &ctx_value) < 0) {
240 			error = 1;
241 		}
242 	}
243 
244 	if (error > 0 || (min_range_set && (ctx_value < min_range)) || (max_range_set && (ctx_value > max_range))) {
245 		RETURN_VALIDATION_FAILED
246 	} else {
247 		zval_ptr_dtor(value);
248 		ZVAL_LONG(value, ctx_value);
249 		return;
250 	}
251 }
252 /* }}} */
253 
php_filter_boolean(PHP_INPUT_FILTER_PARAM_DECL)254 void php_filter_boolean(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
255 {
256 	char *str = Z_STRVAL_P(value);
257 	size_t len = Z_STRLEN_P(value);
258 	int ret;
259 
260 	PHP_FILTER_TRIM_DEFAULT_EX(str, len, 0);
261 
262 	/* returns true for "1", "true", "on" and "yes"
263 	 * returns false for "0", "false", "off", "no", and ""
264 	 * null otherwise. */
265 	switch (len) {
266 		case 0:
267 			ret = 0;
268 			break;
269 		case 1:
270 			if (*str == '1') {
271 				ret = 1;
272 			} else if (*str == '0') {
273 				ret = 0;
274 			} else {
275 				ret = -1;
276 			}
277 			break;
278 		case 2:
279 			if (strncasecmp(str, "on", 2) == 0) {
280 				ret = 1;
281 			} else if (strncasecmp(str, "no", 2) == 0) {
282 				ret = 0;
283 			} else {
284 				ret = -1;
285 			}
286 			break;
287 		case 3:
288 			if (strncasecmp(str, "yes", 3) == 0) {
289 				ret = 1;
290 			} else if (strncasecmp(str, "off", 3) == 0) {
291 				ret = 0;
292 			} else {
293 				ret = -1;
294 			}
295 			break;
296 		case 4:
297 			if (strncasecmp(str, "true", 4) == 0) {
298 				ret = 1;
299 			} else {
300 				ret = -1;
301 			}
302 			break;
303 		case 5:
304 			if (strncasecmp(str, "false", 5) == 0) {
305 				ret = 0;
306 			} else {
307 				ret = -1;
308 			}
309 			break;
310 		default:
311 			ret = -1;
312 	}
313 
314 	if (ret == -1) {
315 		RETURN_VALIDATION_FAILED
316 	} else {
317 		zval_dtor(value);
318 		ZVAL_BOOL(value, ret);
319 	}
320 }
321 /* }}} */
322 
php_filter_float(PHP_INPUT_FILTER_PARAM_DECL)323 void php_filter_float(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
324 {
325 	size_t len;
326 	char *str, *end;
327 	char *num, *p;
328 	zval *option_val;
329 	char *decimal;
330 	int decimal_set;
331 	size_t decimal_len;
332 	char dec_sep = '.';
333 	char tsd_sep[3] = "',.";
334 
335 	zend_long lval;
336 	double dval;
337 
338 	int first, n;
339 
340 	len = Z_STRLEN_P(value);
341 	str = Z_STRVAL_P(value);
342 
343 	PHP_FILTER_TRIM_DEFAULT(str, len);
344 	end = str + len;
345 
346 	FETCH_STRING_OPTION(decimal, "decimal");
347 
348 	if (decimal_set) {
349 		if (decimal_len != 1) {
350 			php_error_docref(NULL, E_WARNING, "decimal separator must be one char");
351 			RETURN_VALIDATION_FAILED
352 		} else {
353 			dec_sep = *decimal;
354 		}
355 	}
356 
357 	num = p = emalloc(len+1);
358 	if (str < end && (*str == '+' || *str == '-')) {
359 		*p++ = *str++;
360 	}
361 	first = 1;
362 	while (1) {
363 		n = 0;
364 		while (str < end && *str >= '0' && *str <= '9') {
365 			++n;
366 			*p++ = *str++;
367 		}
368 		if (str == end || *str == dec_sep || *str == 'e' || *str == 'E') {
369 			if (!first && n != 3) {
370 				goto error;
371 			}
372 			if (*str == dec_sep) {
373 				*p++ = '.';
374 				str++;
375 				while (str < end && *str >= '0' && *str <= '9') {
376 					*p++ = *str++;
377 				}
378 			}
379 			if (*str == 'e' || *str == 'E') {
380 				*p++ = *str++;
381 				if (str < end && (*str == '+' || *str == '-')) {
382 					*p++ = *str++;
383 				}
384 				while (str < end && *str >= '0' && *str <= '9') {
385 					*p++ = *str++;
386 				}
387 			}
388 			break;
389 		}
390 		if ((flags & FILTER_FLAG_ALLOW_THOUSAND) && (*str == tsd_sep[0] || *str == tsd_sep[1] || *str == tsd_sep[2])) {
391 			if (first?(n < 1 || n > 3):(n != 3)) {
392 				goto error;
393 			}
394 			first = 0;
395 			str++;
396 		} else {
397 			goto error;
398 		}
399 	}
400 	if (str != end) {
401 		goto error;
402 	}
403 	*p = 0;
404 
405 	switch (is_numeric_string(num, p - num, &lval, &dval, 0)) {
406 		case IS_LONG:
407 			zval_ptr_dtor(value);
408 			ZVAL_DOUBLE(value, (double)lval);
409 			break;
410 		case IS_DOUBLE:
411 			if ((!dval && p - num > 1 && strpbrk(num, "123456789")) || !zend_finite(dval)) {
412 				goto error;
413 			}
414 			zval_ptr_dtor(value);
415 			ZVAL_DOUBLE(value, dval);
416 			break;
417 		default:
418 error:
419 			efree(num);
420 			RETURN_VALIDATION_FAILED
421 	}
422 	efree(num);
423 }
424 /* }}} */
425 
php_filter_validate_regexp(PHP_INPUT_FILTER_PARAM_DECL)426 void php_filter_validate_regexp(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
427 {
428 	zval *option_val;
429 	zend_string *regexp;
430 	int regexp_set;
431 	pcre *re = NULL;
432 	pcre_extra *pcre_extra = NULL;
433 	int preg_options = 0;
434 	int ovector[3];
435 	int matches;
436 
437 	/* Parse options */
438 	FETCH_STR_OPTION(regexp, "regexp");
439 
440 	if (!regexp_set) {
441 		php_error_docref(NULL, E_WARNING, "'regexp' option missing");
442 		RETURN_VALIDATION_FAILED
443 	}
444 
445 	re = pcre_get_compiled_regex(regexp, &pcre_extra, &preg_options);
446 	if (!re) {
447 		RETURN_VALIDATION_FAILED
448 	}
449 	matches = pcre_exec(re, NULL, Z_STRVAL_P(value), (int)Z_STRLEN_P(value), 0, 0, ovector, 3);
450 
451 	/* 0 means that the vector is too small to hold all the captured substring offsets */
452 	if (matches < 0) {
453 		RETURN_VALIDATION_FAILED
454 	}
455 }
456 
_php_filter_validate_domain(char * domain,int len,zend_long flags)457 static int _php_filter_validate_domain(char * domain, int len, zend_long flags) /* {{{ */
458 {
459 	char *e, *s, *t;
460 	size_t l;
461 	int hostname = flags & FILTER_FLAG_HOSTNAME;
462 	unsigned char i = 1;
463 
464 	s = domain;
465 	l = len;
466 	e = domain + l;
467 	t = e - 1;
468 
469 	/* Ignore trailing dot */
470 	if (*t == '.') {
471 		e = t;
472 		l--;
473 	}
474 
475 	/* The total length cannot exceed 253 characters (final dot not included) */
476 	if (l > 253) {
477 		return 0;
478 	}
479 
480 	/* First char must be alphanumeric */
481 	if(*s == '.' || (hostname && !isalnum((int)*(unsigned char *)s))) {
482 		return 0;
483 	}
484 
485 	while (s < e) {
486 		if (*s == '.') {
487 			/* The first and the last character of a label must be alphanumeric */
488 			if (*(s + 1) == '.' || (hostname && (!isalnum((int)*(unsigned char *)(s - 1)) || !isalnum((int)*(unsigned char *)(s + 1))))) {
489 				return 0;
490 			}
491 
492 			/* Reset label length counter */
493 			i = 1;
494 		} else {
495 			if (i > 63 || (hostname && *s != '-' && !isalnum((int)*(unsigned char *)s))) {
496 				return 0;
497 			}
498 
499 			i++;
500 		}
501 
502 		s++;
503 	}
504 
505 	return 1;
506 }
507 /* }}} */
508 
php_filter_validate_domain(PHP_INPUT_FILTER_PARAM_DECL)509 void php_filter_validate_domain(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
510 {
511 	if (!_php_filter_validate_domain(Z_STRVAL_P(value), Z_STRLEN_P(value), flags)) {
512 		RETURN_VALIDATION_FAILED
513 	}
514 }
515 /* }}} */
516 
php_filter_validate_url(PHP_INPUT_FILTER_PARAM_DECL)517 void php_filter_validate_url(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
518 {
519 	php_url *url;
520 	int old_len = (int)Z_STRLEN_P(value);
521 
522 	php_filter_url(value, flags, option_array, charset);
523 
524 	if (Z_TYPE_P(value) != IS_STRING || old_len != Z_STRLEN_P(value)) {
525 		RETURN_VALIDATION_FAILED
526 	}
527 
528 	/* Use parse_url - if it returns false, we return NULL */
529 	url = php_url_parse_ex(Z_STRVAL_P(value), Z_STRLEN_P(value));
530 
531 	if (url == NULL) {
532 		RETURN_VALIDATION_FAILED
533 	}
534 
535 	if (url->scheme != NULL && (!strcasecmp(url->scheme, "http") || !strcasecmp(url->scheme, "https"))) {
536 		char *e, *s, *t;
537 		size_t l;
538 
539 		if (url->host == NULL) {
540 			goto bad_url;
541 		}
542 
543 		s = url->host;
544 		l = strlen(s);
545 		e = url->host + l;
546 		t = e - 1;
547 
548 		/* An IPv6 enclosed by square brackets is a valid hostname */
549 		if (*s == '[' && *t == ']' && _php_filter_validate_ipv6((s + 1), l - 2)) {
550 			php_url_free(url);
551 			return;
552 		}
553 
554 		// Validate domain
555 		if (!_php_filter_validate_domain(url->host, l, FILTER_FLAG_HOSTNAME)) {
556 			php_url_free(url);
557 			RETURN_VALIDATION_FAILED
558 		}
559 	}
560 
561 	if (
562 		url->scheme == NULL ||
563 		/* some schemas allow the host to be empty */
564 		(url->host == NULL && (strcmp(url->scheme, "mailto") && strcmp(url->scheme, "news") && strcmp(url->scheme, "file"))) ||
565 		((flags & FILTER_FLAG_PATH_REQUIRED) && url->path == NULL) || ((flags & FILTER_FLAG_QUERY_REQUIRED) && url->query == NULL)
566 	) {
567 bad_url:
568 		php_url_free(url);
569 		RETURN_VALIDATION_FAILED
570 	}
571 	php_url_free(url);
572 }
573 /* }}} */
574 
php_filter_validate_email(PHP_INPUT_FILTER_PARAM_DECL)575 void php_filter_validate_email(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
576 {
577 	/*
578 	 * The regex below is based on a regex by Michael Rushton.
579 	 * However, it is not identical.  I changed it to only consider routeable
580 	 * addresses as valid.  Michael's regex considers a@b a valid address
581 	 * which conflicts with section 2.3.5 of RFC 5321 which states that:
582 	 *
583 	 *   Only resolvable, fully-qualified domain names (FQDNs) are permitted
584 	 *   when domain names are used in SMTP.  In other words, names that can
585 	 *   be resolved to MX RRs or address (i.e., A or AAAA) RRs (as discussed
586 	 *   in Section 5) are permitted, as are CNAME RRs whose targets can be
587 	 *   resolved, in turn, to MX or address RRs.  Local nicknames or
588 	 *   unqualified names MUST NOT be used.
589 	 *
590 	 * This regex does not handle comments and folding whitespace.  While
591 	 * this is technically valid in an email address, these parts aren't
592 	 * actually part of the address itself.
593 	 *
594 	 * Michael's regex carries this copyright:
595 	 *
596 	 * Copyright © Michael Rushton 2009-10
597 	 * http://squiloople.com/
598 	 * Feel free to use and redistribute this code. But please keep this copyright notice.
599 	 *
600 	 */
601 	const char regexp[] = "/^(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){255,})(?!(?:(?:\\x22?\\x5C[\\x00-\\x7E]\\x22?)|(?:\\x22?[^\\x5C\\x22]\\x22?)){65,}@)(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F]|(?:\\x5C[\\x00-\\x7F]))*\\x22))(?:\\.(?:(?:[\\x21\\x23-\\x27\\x2A\\x2B\\x2D\\x2F-\\x39\\x3D\\x3F\\x5E-\\x7E]+)|(?:\\x22(?:[\\x01-\\x08\\x0B\\x0C\\x0E-\\x1F\\x21\\x23-\\x5B\\x5D-\\x7F]|(?:\\x5C[\\x00-\\x7F]))*\\x22)))*@(?:(?:(?!.*[^.]{64,})(?:(?:(?:xn--)?[a-z0-9]+(?:-+[a-z0-9]+)*\\.){1,126}){1,}(?:(?:[a-z][a-z0-9]*)|(?:(?:xn--)[a-z0-9]+))(?:-+[a-z0-9]+)*)|(?:\\[(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){7})|(?:(?!(?:.*[a-f0-9][:\\]]){7,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,5})?)))|(?:(?:IPv6:(?:(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){5}:)|(?:(?!(?:.*[a-f0-9]:){5,})(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3})?::(?:[a-f0-9]{1,4}(?::[a-f0-9]{1,4}){0,3}:)?)))?(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))(?:\\.(?:(?:25[0-5])|(?:2[0-4][0-9])|(?:1[0-9]{2})|(?:[1-9]?[0-9]))){3}))\\]))$/iD";
602 	pcre       *re = NULL;
603 	pcre_extra *pcre_extra = NULL;
604 	int preg_options = 0;
605 	int         ovector[150]; /* Needs to be a multiple of 3 */
606 	int         matches;
607 	zend_string *sregexp;
608 
609 
610 	/* The maximum length of an e-mail address is 320 octets, per RFC 2821. */
611 	if (Z_STRLEN_P(value) > 320) {
612 		RETURN_VALIDATION_FAILED
613 	}
614 
615 	sregexp = zend_string_init(regexp, sizeof(regexp) - 1, 0);
616 	re = pcre_get_compiled_regex(sregexp, &pcre_extra, &preg_options);
617 	if (!re) {
618 		zend_string_release(sregexp);
619 		RETURN_VALIDATION_FAILED
620 	}
621 	zend_string_release(sregexp);
622 	matches = pcre_exec(re, NULL, Z_STRVAL_P(value), (int)Z_STRLEN_P(value), 0, 0, ovector, 3);
623 
624 	/* 0 means that the vector is too small to hold all the captured substring offsets */
625 	if (matches < 0) {
626 		RETURN_VALIDATION_FAILED
627 	}
628 
629 }
630 /* }}} */
631 
_php_filter_validate_ipv4(char * str,size_t str_len,int * ip)632 static int _php_filter_validate_ipv4(char *str, size_t str_len, int *ip) /* {{{ */
633 {
634 	const char *end = str + str_len;
635 	int num, m;
636 	int n = 0;
637 
638 	while (str < end) {
639 		int leading_zero;
640 		if (*str < '0' || *str > '9') {
641 			return 0;
642 		}
643 		leading_zero = (*str == '0');
644 		m = 1;
645 		num = ((*(str++)) - '0');
646 		while (str < end && (*str >= '0' && *str <= '9')) {
647 			num = num * 10 + ((*(str++)) - '0');
648 			if (num > 255 || ++m > 3) {
649 				return 0;
650 			}
651 		}
652 		/* don't allow a leading 0; that introduces octal numbers,
653 		 * which we don't support */
654 		if (leading_zero && (num != 0 || m > 1))
655 			return 0;
656 		ip[n++] = num;
657 		if (n == 4) {
658 			return str == end;
659 		} else if (str >= end || *(str++) != '.') {
660 			return 0;
661 		}
662 	}
663 	return 0;
664 }
665 /* }}} */
666 
_php_filter_validate_ipv6(char * str,size_t str_len)667 static int _php_filter_validate_ipv6(char *str, size_t str_len) /* {{{ */
668 {
669 	int compressed = 0;
670 	int blocks = 0;
671 	int n;
672 	char *ipv4;
673 	char *end;
674 	int ip4elm[4];
675 	char *s = str;
676 
677 	if (!memchr(str, ':', str_len)) {
678 		return 0;
679 	}
680 
681 	/* check for bundled IPv4 */
682 	ipv4 = memchr(str, '.', str_len);
683 	if (ipv4) {
684  		while (ipv4 > str && *(ipv4-1) != ':') {
685 			ipv4--;
686 		}
687 
688 		if (!_php_filter_validate_ipv4(ipv4, (str_len - (ipv4 - str)), ip4elm)) {
689 			return 0;
690 		}
691 
692 		str_len = ipv4 - str; /* length excluding ipv4 */
693 		if (str_len < 2) {
694 			return 0;
695 		}
696 
697 		if (ipv4[-2] != ':') {
698 			/* don't include : before ipv4 unless it's a :: */
699 			str_len--;
700 		}
701 
702 		blocks = 2;
703 	}
704 
705 	end = str + str_len;
706 
707 	while (str < end) {
708 		if (*str == ':') {
709 			if (++str >= end) {
710 				/* cannot end in : without previous : */
711 				return 0;
712 			}
713 			if (*str == ':') {
714 				if (compressed) {
715 					return 0;
716 				}
717 				blocks++; /* :: means 1 or more 16-bit 0 blocks */
718 				compressed = 1;
719 
720 				if (++str == end) {
721 					return (blocks <= 8);
722 				}
723 			} else if ((str - 1) == s) {
724 				/* dont allow leading : without another : following */
725 				return 0;
726 			}
727 		}
728 		n = 0;
729 		while ((str < end) &&
730 		       ((*str >= '0' && *str <= '9') ||
731 		        (*str >= 'a' && *str <= 'f') ||
732 		        (*str >= 'A' && *str <= 'F'))) {
733 			n++;
734 			str++;
735 		}
736 		if (n < 1 || n > 4) {
737 			return 0;
738 		}
739 		if (++blocks > 8)
740 			return 0;
741 	}
742 	return ((compressed && blocks <= 8) || blocks == 8);
743 }
744 /* }}} */
745 
php_filter_validate_ip(PHP_INPUT_FILTER_PARAM_DECL)746 void php_filter_validate_ip(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
747 {
748 	/* validates an ipv4 or ipv6 IP, based on the flag (4, 6, or both) add a
749 	 * flag to throw out reserved ranges; multicast ranges... etc. If both
750 	 * allow_ipv4 and allow_ipv6 flags flag are used, then the first dot or
751 	 * colon determine the format */
752 
753 	int            ip[4];
754 	int            mode;
755 
756 	if (memchr(Z_STRVAL_P(value), ':', Z_STRLEN_P(value))) {
757 		mode = FORMAT_IPV6;
758 	} else if (memchr(Z_STRVAL_P(value), '.', Z_STRLEN_P(value))) {
759 		mode = FORMAT_IPV4;
760 	} else {
761 		RETURN_VALIDATION_FAILED
762 	}
763 
764 	if ((flags & FILTER_FLAG_IPV4) && (flags & FILTER_FLAG_IPV6)) {
765 		/* Both formats are cool */
766 	} else if ((flags & FILTER_FLAG_IPV4) && mode == FORMAT_IPV6) {
767 		RETURN_VALIDATION_FAILED
768 	} else if ((flags & FILTER_FLAG_IPV6) && mode == FORMAT_IPV4) {
769 		RETURN_VALIDATION_FAILED
770 	}
771 
772 	switch (mode) {
773 		case FORMAT_IPV4:
774 			if (!_php_filter_validate_ipv4(Z_STRVAL_P(value), Z_STRLEN_P(value), ip)) {
775 				RETURN_VALIDATION_FAILED
776 			}
777 
778 			/* Check flags */
779 			if (flags & FILTER_FLAG_NO_PRIV_RANGE) {
780 				if (
781 					(ip[0] == 10) ||
782 					(ip[0] == 172 && ip[1] >= 16 && ip[1] <= 31) ||
783 					(ip[0] == 192 && ip[1] == 168)
784 				) {
785 					RETURN_VALIDATION_FAILED
786 				}
787 			}
788 
789 			if (flags & FILTER_FLAG_NO_RES_RANGE) {
790 				if (
791 					(ip[0] == 0) ||
792 					(ip[0] >= 240) ||
793 					(ip[0] == 127) ||
794 					(ip[0] == 169 && ip[1] == 254)
795 				) {
796 					RETURN_VALIDATION_FAILED
797 				}
798 			}
799 			break;
800 
801 		case FORMAT_IPV6:
802 			{
803 				int res = 0;
804 				res = _php_filter_validate_ipv6(Z_STRVAL_P(value), Z_STRLEN_P(value));
805 				if (res < 1) {
806 					RETURN_VALIDATION_FAILED
807 				}
808 				/* Check flags */
809 				if (flags & FILTER_FLAG_NO_PRIV_RANGE) {
810 					if (Z_STRLEN_P(value) >=2 && (!strncasecmp("FC", Z_STRVAL_P(value), 2) || !strncasecmp("FD", Z_STRVAL_P(value), 2))) {
811 						RETURN_VALIDATION_FAILED
812 					}
813 				}
814 				if (flags & FILTER_FLAG_NO_RES_RANGE) {
815 					switch (Z_STRLEN_P(value)) {
816 						case 1: case 0:
817 							break;
818 						case 2:
819 							if (!strcmp("::", Z_STRVAL_P(value))) {
820 								RETURN_VALIDATION_FAILED
821 							}
822 							break;
823 						case 3:
824 							if (!strcmp("::1", Z_STRVAL_P(value)) || !strcmp("5f:", Z_STRVAL_P(value))) {
825 								RETURN_VALIDATION_FAILED
826 							}
827 							break;
828 						default:
829 							if (Z_STRLEN_P(value) >= 5) {
830 								if (
831 									!strncasecmp("fe8", Z_STRVAL_P(value), 3) ||
832 									!strncasecmp("fe9", Z_STRVAL_P(value), 3) ||
833 									!strncasecmp("fea", Z_STRVAL_P(value), 3) ||
834 									!strncasecmp("feb", Z_STRVAL_P(value), 3)
835 								) {
836 									RETURN_VALIDATION_FAILED
837 								}
838 							}
839 							if (
840 								(Z_STRLEN_P(value) >= 9 &&  !strncasecmp("2001:0db8", Z_STRVAL_P(value), 9)) ||
841 								(Z_STRLEN_P(value) >= 2 &&  !strncasecmp("5f", Z_STRVAL_P(value), 2)) ||
842 								(Z_STRLEN_P(value) >= 4 &&  !strncasecmp("3ff3", Z_STRVAL_P(value), 4)) ||
843 								(Z_STRLEN_P(value) >= 8 &&  !strncasecmp("2001:001", Z_STRVAL_P(value), 8))
844 							) {
845 								RETURN_VALIDATION_FAILED
846 							}
847 					}
848 				}
849 			}
850 			break;
851 	}
852 }
853 /* }}} */
854 
php_filter_validate_mac(PHP_INPUT_FILTER_PARAM_DECL)855 void php_filter_validate_mac(PHP_INPUT_FILTER_PARAM_DECL) /* {{{ */
856 {
857 	char *input = Z_STRVAL_P(value);
858 	size_t input_len = Z_STRLEN_P(value);
859 	int tokens, length, i, offset, exp_separator_set;
860 	size_t exp_separator_len;
861 	char separator;
862 	char *exp_separator;
863 	zend_long ret = 0;
864 	zval *option_val;
865 
866 	FETCH_STRING_OPTION(exp_separator, "separator");
867 
868 	if (exp_separator_set && exp_separator_len != 1) {
869 		php_error_docref(NULL, E_WARNING, "Separator must be exactly one character long");
870 		RETURN_VALIDATION_FAILED;
871 	}
872 
873 	if (14 == input_len) {
874 		/* EUI-64 format: Four hexadecimal digits separated by dots. Less
875 		 * commonly used but valid nonetheless.
876 		 */
877 		tokens = 3;
878 		length = 4;
879 		separator = '.';
880 	} else if (17 == input_len && input[2] == '-') {
881 		/* IEEE 802 format: Six hexadecimal digits separated by hyphens. */
882 		tokens = 6;
883 		length = 2;
884 		separator = '-';
885 	} else if (17 == input_len && input[2] == ':') {
886 		/* IEEE 802 format: Six hexadecimal digits separated by colons. */
887 		tokens = 6;
888 		length = 2;
889 		separator = ':';
890 	} else {
891 		RETURN_VALIDATION_FAILED;
892 	}
893 
894 	if (exp_separator_set && separator != exp_separator[0]) {
895 		RETURN_VALIDATION_FAILED;
896 	}
897 
898 	/* Essentially what we now have is a set of tokens each consisting of
899 	 * a hexadecimal number followed by a separator character. (With the
900 	 * exception of the last token which does not have the separator.)
901 	 */
902 	for (i = 0; i < tokens; i++) {
903 		offset = i * (length + 1);
904 
905 		if (i < tokens - 1 && input[offset + length] != separator) {
906 			/* The current token did not end with e.g. a "." */
907 			RETURN_VALIDATION_FAILED
908 		}
909 		if (php_filter_parse_hex(input + offset, length, &ret) < 0) {
910 			/* The current token is no valid hexadecimal digit */
911 			RETURN_VALIDATION_FAILED
912 		}
913 	}
914 }
915 /* }}} */
916 
917 /*
918  * Local variables:
919  * tab-width: 4
920  * c-basic-offset: 4
921  * End:
922  * vim600: noet sw=4 ts=4 fdm=marker
923  * vim<600: noet sw=4 ts=4
924  */
925