1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 7                                                        |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | http://www.php.net/license/3_01.txt                                  |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Authors: Vadim Savchuk <vsavchuk@productengine.com>                  |
14    |          Dmitry Lakhtyuk <dlakhtyuk@productengine.com>               |
15    +----------------------------------------------------------------------+
16  */
17 
18 #include "collator_is_numeric.h"
19 
20 /* {{{ collator_u_strtod
21  * Taken from PHP6:zend_u_strtod()
22  */
collator_u_strtod(const UChar * nptr,UChar ** endptr)23 static double collator_u_strtod(const UChar *nptr, UChar **endptr) /* {{{ */
24 {
25 	const UChar *u = nptr, *nstart;
26 	UChar c = *u;
27 	int any = 0;
28 
29 	while (u_isspace(c)) {
30 		c = *++u;
31 	}
32 	nstart = u;
33 
34 	if (c == 0x2D /*'-'*/ || c == 0x2B /*'+'*/) {
35 		c = *++u;
36 	}
37 
38 	while (c >= 0x30 /*'0'*/ && c <= 0x39 /*'9'*/) {
39 		any = 1;
40 		c = *++u;
41 	}
42 
43 	if (c == 0x2E /*'.'*/) {
44 		c = *++u;
45 		while (c >= 0x30 /*'0'*/ && c <= 0x39 /*'9'*/) {
46 			any = 1;
47 			c = *++u;
48 		}
49 	}
50 
51 	if ((c == 0x65 /*'e'*/ || c == 0x45 /*'E'*/) && any) {
52 		const UChar *e = u;
53 		int any_exp = 0;
54 
55 		c = *++u;
56 		if (c == 0x2D /*'-'*/ || c == 0x2B /*'+'*/) {
57 			c = *++u;
58 		}
59 
60 		while (c >= 0x30 /*'0'*/ && c <= 0x39 /*'9'*/) {
61 			any_exp = 1;
62 			c = *++u;
63 		}
64 
65 		if (!any_exp) {
66 			u = e;
67 		}
68 	}
69 
70 	if (any) {
71 		char buf[64], *numbuf, *bufpos;
72 		size_t length = u - nstart;
73 		double value;
74 		ALLOCA_FLAG(use_heap);
75 
76 		if (length < sizeof(buf)) {
77 			numbuf = buf;
78 		} else {
79 			numbuf = (char *) do_alloca(length + 1, use_heap);
80 		}
81 
82 		bufpos = numbuf;
83 
84 		while (nstart < u) {
85 			*bufpos++ = (char) *nstart++;
86 		}
87 
88 		*bufpos = '\0';
89 		value = zend_strtod(numbuf, NULL);
90 
91 		if (numbuf != buf) {
92 			free_alloca(numbuf, use_heap);
93 		}
94 
95 		if (endptr != NULL) {
96 			*endptr = (UChar *)u;
97 		}
98 
99 		return value;
100 	}
101 
102 	if (endptr != NULL) {
103 		*endptr = (UChar *)nptr;
104 	}
105 
106 	return 0;
107 }
108 /* }}} */
109 
110 /* {{{ collator_u_strtol
111  * Taken from PHP6:zend_u_strtol()
112  *
113  * Convert a Unicode string to a long integer.
114  *
115  * Ignores `locale' stuff.
116  */
collator_u_strtol(nptr,endptr,base)117 static zend_long collator_u_strtol(nptr, endptr, base)
118 	const UChar *nptr;
119 	UChar **endptr;
120 	register int base;
121 {
122 	register const UChar *s = nptr;
123 	register zend_ulong acc;
124 	register UChar c;
125 	register zend_ulong cutoff;
126 	register int neg = 0, any, cutlim;
127 
128 	if (s == NULL) {
129 		errno = ERANGE;
130 		if (endptr != NULL) {
131 			*endptr = NULL;
132 		}
133 		return 0;
134 	}
135 
136 	/*
137 	 * Skip white space and pick up leading +/- sign if any.
138 	 * If base is 0, allow 0x for hex and 0 for octal, else
139 	 * assume decimal; if base is already 16, allow 0x.
140 	 */
141 	do {
142 		c = *s++;
143 	} while (u_isspace(c));
144 	if (c == 0x2D /*'-'*/) {
145 		neg = 1;
146 		c = *s++;
147 	} else if (c == 0x2B /*'+'*/)
148 		c = *s++;
149 	if ((base == 0 || base == 16) &&
150 	    (c == 0x30 /*'0'*/)
151 		 && (*s == 0x78 /*'x'*/ || *s == 0x58 /*'X'*/)) {
152 		c = s[1];
153 		s += 2;
154 		base = 16;
155 	}
156 	if (base == 0)
157 		base = (c == 0x30 /*'0'*/) ? 8 : 10;
158 
159 	/*
160 	 * Compute the cutoff value between legal numbers and illegal
161 	 * numbers.  That is the largest legal value, divided by the
162 	 * base.  An input number that is greater than this value, if
163 	 * followed by a legal input character, is too big.  One that
164 	 * is equal to this value may be valid or not; the limit
165 	 * between valid and invalid numbers is then based on the last
166 	 * digit.  For instance, if the range for longs is
167 	 * [-2147483648..2147483647] and the input base is 10,
168 	 * cutoff will be set to 214748364 and cutlim to either
169 	 * 7 (neg==0) or 8 (neg==1), meaning that if we have accumulated
170 	 * a value > 214748364, or equal but the next digit is > 7 (or 8),
171 	 * the number is too big, and we will return a range error.
172 	 *
173 	 * Set any if any `digits' consumed; make it negative to indicate
174 	 * overflow.
175 	 */
176 	cutoff = neg ? -(zend_ulong)ZEND_LONG_MIN : ZEND_LONG_MAX;
177 	cutlim = cutoff % (zend_ulong)base;
178 	cutoff /= (zend_ulong)base;
179 	for (acc = 0, any = 0;; c = *s++) {
180 		if (c >= 0x30 /*'0'*/ && c <= 0x39 /*'9'*/)
181 			c -= 0x30 /*'0'*/;
182 		else if (c >= 0x41 /*'A'*/ && c <= 0x5A /*'Z'*/)
183 			c -= 0x41 /*'A'*/ - 10;
184 		else if (c >= 0x61 /*'a'*/ && c <= 0x7A /*'z'*/)
185 			c -= 0x61 /*'a'*/ - 10;
186 		else
187 			break;
188 		if (c >= base)
189 			break;
190 
191 		if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim))
192 			any = -1;
193 		else {
194 			any = 1;
195 			acc *= base;
196 			acc += c;
197 		}
198 	}
199 	if (any < 0) {
200 		acc = neg ? ZEND_LONG_MIN : ZEND_LONG_MAX;
201 		errno = ERANGE;
202 	} else if (neg)
203 		acc = -acc;
204 	if (endptr != NULL)
205 		*endptr = (UChar *)(any ? s - 1 : nptr);
206 	return (acc);
207 }
208 /* }}} */
209 
210 
211 /* {{{ collator_is_numeric]
212  * Taken from PHP6:is_numeric_unicode()
213  */
collator_is_numeric(UChar * str,int32_t length,zend_long * lval,double * dval,int allow_errors)214 zend_uchar collator_is_numeric( UChar *str, int32_t length, zend_long *lval, double *dval, int allow_errors )
215 {
216 	zend_long local_lval;
217 	double local_dval;
218 	UChar *end_ptr_long, *end_ptr_double;
219 	int conv_base=10;
220 
221 	if (!length) {
222 		return 0;
223 	}
224 
225 	/* handle hex numbers */
226 	if (length>=2 && str[0]=='0' && (str[1]=='x' || str[1]=='X')) {
227 		conv_base=16;
228 	}
229 
230 	errno=0;
231 	local_lval = collator_u_strtol(str, &end_ptr_long, conv_base);
232 	if (errno != ERANGE) {
233 		if (end_ptr_long == str+length) { /* integer string */
234 			if (lval) {
235 				*lval = local_lval;
236 			}
237 			return IS_LONG;
238 		} else if (end_ptr_long == str && *end_ptr_long != '\0' && *str != '.' && *str != '-') { /* ignore partial string matches */
239 			return 0;
240 		}
241 	} else {
242 		end_ptr_long = NULL;
243 	}
244 
245 	if (conv_base == 16) { /* hex string, under UNIX strtod() messes it up */
246 		/* UTODO: keep compatibility with is_numeric_string() here? */
247 		return 0;
248 	}
249 
250 	local_dval = collator_u_strtod(str, &end_ptr_double);
251 	if (local_dval == 0 && end_ptr_double == str) {
252 		end_ptr_double = NULL;
253 	} else {
254 		if (end_ptr_double == str+length) { /* floating point string */
255 			if (!zend_finite(local_dval)) {
256 				/* "inf","nan" and maybe other weird ones */
257 				return 0;
258 			}
259 
260 			if (dval) {
261 				*dval = local_dval;
262 			}
263 			return IS_DOUBLE;
264 		}
265 	}
266 
267 	if (!allow_errors) {
268 		return 0;
269 	}
270 	if (allow_errors == -1) {
271 		zend_error(E_NOTICE, "A non well formed numeric value encountered");
272 	}
273 
274 	if (allow_errors) {
275 		if (end_ptr_double > end_ptr_long && dval) {
276 			*dval = local_dval;
277 			return IS_DOUBLE;
278 		} else if (end_ptr_long && lval) {
279 			*lval = local_lval;
280 			return IS_LONG;
281 		}
282 	}
283 	return 0;
284 }
285 /* }}} */
286