1 /*
2    +----------------------------------------------------------------------+
3    | PHP Version 5                                                        |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | http://www.php.net/license/3_01.txt                                  |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Authors: Vadim Savchuk <vsavchuk@productengine.com>                  |
14    |          Dmitry Lakhtyuk <dlakhtyuk@productengine.com>               |
15    +----------------------------------------------------------------------+
16  */
17 
18 #include "collator_is_numeric.h"
19 
20 #if ZEND_MODULE_API_NO < 20071006
21 /* not 5.3 */
22 #ifndef ALLOCA_FLAG
23 #define ALLOCA_FLAG(use_heap)
24 #endif
25 #define _do_alloca(x, y) do_alloca((x))
26 #define _free_alloca(x, y) free_alloca((x))
27 #else
28 #define _do_alloca do_alloca
29 #define _free_alloca free_alloca
30 #endif
31 /* {{{ collator_u_strtod
32  * Taken from PHP6:zend_u_strtod()
33  */
collator_u_strtod(const UChar * nptr,UChar ** endptr)34 static double collator_u_strtod(const UChar *nptr, UChar **endptr) /* {{{ */
35 {
36 	const UChar *u = nptr, *nstart;
37 	UChar c = *u;
38 	int any = 0;
39 	ALLOCA_FLAG(use_heap);
40 
41 	while (u_isspace(c)) {
42 		c = *++u;
43 	}
44 	nstart = u;
45 
46 	if (c == 0x2D /*'-'*/ || c == 0x2B /*'+'*/) {
47 		c = *++u;
48 	}
49 
50 	while (c >= 0x30 /*'0'*/ && c <= 0x39 /*'9'*/) {
51 		any = 1;
52 		c = *++u;
53 	}
54 
55 	if (c == 0x2E /*'.'*/) {
56 		c = *++u;
57 		while (c >= 0x30 /*'0'*/ && c <= 0x39 /*'9'*/) {
58 			any = 1;
59 			c = *++u;
60 		}
61 	}
62 
63 	if ((c == 0x65 /*'e'*/ || c == 0x45 /*'E'*/) && any) {
64 		const UChar *e = u;
65 		int any_exp = 0;
66 
67 		c = *++u;
68 		if (c == 0x2D /*'-'*/ || c == 0x2B /*'+'*/) {
69 			c = *++u;
70 		}
71 
72 		while (c >= 0x30 /*'0'*/ && c <= 0x39 /*'9'*/) {
73 			any_exp = 1;
74 			c = *++u;
75 		}
76 
77 		if (!any_exp) {
78 			u = e;
79 		}
80 	}
81 
82 	if (any) {
83 		char buf[64], *numbuf, *bufpos;
84 		int length = u - nstart;
85 		double value;
86 
87 		if (length < sizeof(buf)) {
88 			numbuf = buf;
89 		} else {
90 			numbuf = (char *) _do_alloca(length + 1, use_heap);
91 		}
92 
93 		bufpos = numbuf;
94 
95 		while (nstart < u) {
96 			*bufpos++ = (char) *nstart++;
97 		}
98 
99 		*bufpos = '\0';
100 		value = zend_strtod(numbuf, NULL);
101 
102 		if (numbuf != buf) {
103 			_free_alloca(numbuf, use_heap);
104 		}
105 
106 		if (endptr != NULL) {
107 			*endptr = (UChar *)u;
108 		}
109 
110 		return value;
111 	}
112 
113 	if (endptr != NULL) {
114 		*endptr = (UChar *)nptr;
115 	}
116 
117 	return 0;
118 }
119 /* }}} */
120 
121 /* {{{ collator_u_strtol
122  * Taken from PHP6:zend_u_strtol()
123  *
124  * Convert a Unicode string to a long integer.
125  *
126  * Ignores `locale' stuff.
127  */
collator_u_strtol(nptr,endptr,base)128 static long collator_u_strtol(nptr, endptr, base)
129 	const UChar *nptr;
130 	UChar **endptr;
131 	register int base;
132 {
133 	register const UChar *s = nptr;
134 	register unsigned long acc;
135 	register UChar c;
136 	register unsigned long cutoff;
137 	register int neg = 0, any, cutlim;
138 
139 	if (s == NULL) {
140 		errno = ERANGE;
141 		if (endptr != NULL) {
142 			*endptr = NULL;
143 		}
144 		return 0;
145 	}
146 
147 	/*
148 	 * Skip white space and pick up leading +/- sign if any.
149 	 * If base is 0, allow 0x for hex and 0 for octal, else
150 	 * assume decimal; if base is already 16, allow 0x.
151 	 */
152 	do {
153 		c = *s++;
154 	} while (u_isspace(c));
155 	if (c == 0x2D /*'-'*/) {
156 		neg = 1;
157 		c = *s++;
158 	} else if (c == 0x2B /*'+'*/)
159 		c = *s++;
160 	if ((base == 0 || base == 16) &&
161 	    (c == 0x30 /*'0'*/)
162 		 && (*s == 0x78 /*'x'*/ || *s == 0x58 /*'X'*/)) {
163 		c = s[1];
164 		s += 2;
165 		base = 16;
166 	}
167 	if (base == 0)
168 		base = (c == 0x30 /*'0'*/) ? 8 : 10;
169 
170 	/*
171 	 * Compute the cutoff value between legal numbers and illegal
172 	 * numbers.  That is the largest legal value, divided by the
173 	 * base.  An input number that is greater than this value, if
174 	 * followed by a legal input character, is too big.  One that
175 	 * is equal to this value may be valid or not; the limit
176 	 * between valid and invalid numbers is then based on the last
177 	 * digit.  For instance, if the range for longs is
178 	 * [-2147483648..2147483647] and the input base is 10,
179 	 * cutoff will be set to 214748364 and cutlim to either
180 	 * 7 (neg==0) or 8 (neg==1), meaning that if we have accumulated
181 	 * a value > 214748364, or equal but the next digit is > 7 (or 8),
182 	 * the number is too big, and we will return a range error.
183 	 *
184 	 * Set any if any `digits' consumed; make it negative to indicate
185 	 * overflow.
186 	 */
187 	cutoff = neg ? -(unsigned long)LONG_MIN : LONG_MAX;
188 	cutlim = cutoff % (unsigned long)base;
189 	cutoff /= (unsigned long)base;
190 	for (acc = 0, any = 0;; c = *s++) {
191 		if (c >= 0x30 /*'0'*/ && c <= 0x39 /*'9'*/)
192 			c -= 0x30 /*'0'*/;
193 		else if (c >= 0x41 /*'A'*/ && c <= 0x5A /*'Z'*/)
194 			c -= 0x41 /*'A'*/ - 10;
195 		else if (c >= 0x61 /*'a'*/ && c <= 0x7A /*'z'*/)
196 			c -= 0x61 /*'a'*/ - 10;
197 		else
198 			break;
199 		if (c >= base)
200 			break;
201 
202 		if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim))
203 			any = -1;
204 		else {
205 			any = 1;
206 			acc *= base;
207 			acc += c;
208 		}
209 	}
210 	if (any < 0) {
211 		acc = neg ? LONG_MIN : LONG_MAX;
212 		errno = ERANGE;
213 	} else if (neg)
214 		acc = -acc;
215 	if (endptr != NULL)
216 		*endptr = (UChar *)(any ? s - 1 : nptr);
217 	return (acc);
218 }
219 /* }}} */
220 
221 
222 /* {{{ collator_is_numeric]
223  * Taken from PHP6:is_numeric_unicode()
224  */
collator_is_numeric(UChar * str,int length,long * lval,double * dval,int allow_errors)225 zend_uchar collator_is_numeric( UChar *str, int length, long *lval, double *dval, int allow_errors )
226 {
227 	long local_lval;
228 	double local_dval;
229 	UChar *end_ptr_long, *end_ptr_double;
230 	int conv_base=10;
231 
232 	if (!length) {
233 		return 0;
234 	}
235 
236 	/* handle hex numbers */
237 	if (length>=2 && str[0]=='0' && (str[1]=='x' || str[1]=='X')) {
238 		conv_base=16;
239 	}
240 
241 	errno=0;
242 	local_lval = collator_u_strtol(str, &end_ptr_long, conv_base);
243 	if (errno != ERANGE) {
244 		if (end_ptr_long == str+length) { /* integer string */
245 			if (lval) {
246 				*lval = local_lval;
247 			}
248 			return IS_LONG;
249 		} else if (end_ptr_long == str && *end_ptr_long != '\0' && *str != '.' && *str != '-') { /* ignore partial string matches */
250 			return 0;
251 		}
252 	} else {
253 		end_ptr_long = NULL;
254 	}
255 
256 	if (conv_base == 16) { /* hex string, under UNIX strtod() messes it up */
257 		/* UTODO: keep compatibility with is_numeric_string() here? */
258 		return 0;
259 	}
260 
261 	local_dval = collator_u_strtod(str, &end_ptr_double);
262 	if (local_dval == 0 && end_ptr_double == str) {
263 		end_ptr_double = NULL;
264 	} else {
265 		if (end_ptr_double == str+length) { /* floating point string */
266 			if (!zend_finite(local_dval)) {
267 				/* "inf","nan" and maybe other weird ones */
268 				return 0;
269 			}
270 
271 			if (dval) {
272 				*dval = local_dval;
273 			}
274 			return IS_DOUBLE;
275 		}
276 	}
277 
278 	if (!allow_errors) {
279 		return 0;
280 	}
281 	if (allow_errors == -1) {
282 		zend_error(E_NOTICE, "A non well formed numeric value encountered");
283 	}
284 
285 	if (allow_errors) {
286 		if (end_ptr_double > end_ptr_long && dval) {
287 			*dval = local_dval;
288 			return IS_DOUBLE;
289 		} else if (end_ptr_long && lval) {
290 			*lval = local_lval;
291 			return IS_LONG;
292 		}
293 	}
294 	return 0;
295 }
296 /* }}} */
297 
298 /*
299  * Local variables:
300  * tab-width: 4
301  * c-basic-offset: 4
302  * End:
303  * vim600: noet sw=4 ts=4 fdm=marker
304  * vim<600: noet sw=4 ts=4
305  */
306