1 /*
2    +----------------------------------------------------------------------+
3    | This source file is subject to version 3.01 of the PHP license,      |
4    | that is bundled with this package in the file LICENSE, and is        |
5    | available through the world-wide-web at the following url:           |
6    | http://www.php.net/license/3_01.txt                                  |
7    | If you did not receive a copy of the PHP license and are unable to   |
8    | obtain it through the world-wide-web, please send a note to          |
9    | license@php.net so we can mail you a copy immediately.               |
10    +----------------------------------------------------------------------+
11    | Authors: Vadim Savchuk <vsavchuk@productengine.com>                  |
12    |          Dmitry Lakhtyuk <dlakhtyuk@productengine.com>               |
13    +----------------------------------------------------------------------+
14  */
15 
16 #include "collator_is_numeric.h"
17 
18 /* {{{ Taken from PHP6:zend_u_strtod() */
collator_u_strtod(const UChar * nptr,UChar ** endptr)19 static double collator_u_strtod(const UChar *nptr, UChar **endptr) /* {{{ */
20 {
21 	const UChar *u = nptr, *nstart;
22 	UChar c = *u;
23 	int any = 0;
24 
25 	while (u_isspace(c)) {
26 		c = *++u;
27 	}
28 	nstart = u;
29 
30 	if (c == 0x2D /*'-'*/ || c == 0x2B /*'+'*/) {
31 		c = *++u;
32 	}
33 
34 	while (c >= 0x30 /*'0'*/ && c <= 0x39 /*'9'*/) {
35 		any = 1;
36 		c = *++u;
37 	}
38 
39 	if (c == 0x2E /*'.'*/) {
40 		c = *++u;
41 		while (c >= 0x30 /*'0'*/ && c <= 0x39 /*'9'*/) {
42 			any = 1;
43 			c = *++u;
44 		}
45 	}
46 
47 	if ((c == 0x65 /*'e'*/ || c == 0x45 /*'E'*/) && any) {
48 		const UChar *e = u;
49 		int any_exp = 0;
50 
51 		c = *++u;
52 		if (c == 0x2D /*'-'*/ || c == 0x2B /*'+'*/) {
53 			c = *++u;
54 		}
55 
56 		while (c >= 0x30 /*'0'*/ && c <= 0x39 /*'9'*/) {
57 			any_exp = 1;
58 			c = *++u;
59 		}
60 
61 		if (!any_exp) {
62 			u = e;
63 		}
64 	}
65 
66 	if (any) {
67 		char buf[64], *numbuf, *bufpos;
68 		size_t length = u - nstart;
69 		double value;
70 		ALLOCA_FLAG(use_heap = 0);
71 
72 		if (length < sizeof(buf)) {
73 			numbuf = buf;
74 		} else {
75 			numbuf = (char *) do_alloca(length + 1, use_heap);
76 		}
77 
78 		bufpos = numbuf;
79 
80 		while (nstart < u) {
81 			*bufpos++ = (char) *nstart++;
82 		}
83 
84 		*bufpos = '\0';
85 		value = zend_strtod(numbuf, NULL);
86 
87 		if (numbuf != buf) {
88 			free_alloca(numbuf, use_heap);
89 		}
90 
91 		if (endptr != NULL) {
92 			*endptr = (UChar *)u;
93 		}
94 
95 		return value;
96 	}
97 
98 	if (endptr != NULL) {
99 		*endptr = (UChar *)nptr;
100 	}
101 
102 	return 0;
103 }
104 /* }}} */
105 
106 /* {{{ collator_u_strtol
107  * Taken from PHP6:zend_u_strtol()
108  *
109  * Convert a Unicode string to a long integer.
110  *
111  * Ignores `locale' stuff.
112  */
collator_u_strtol(nptr,endptr,base)113 static zend_long collator_u_strtol(nptr, endptr, base)
114 	const UChar *nptr;
115 	UChar **endptr;
116 	register int base;
117 {
118 	register const UChar *s = nptr;
119 	register zend_ulong acc;
120 	register UChar c;
121 	register zend_ulong cutoff;
122 	register int neg = 0, any, cutlim;
123 
124 	if (s == NULL) {
125 		errno = ERANGE;
126 		if (endptr != NULL) {
127 			*endptr = NULL;
128 		}
129 		return 0;
130 	}
131 
132 	/*
133 	 * Skip white space and pick up leading +/- sign if any.
134 	 * If base is 0, allow 0x for hex and 0 for octal, else
135 	 * assume decimal; if base is already 16, allow 0x.
136 	 */
137 	do {
138 		c = *s++;
139 	} while (u_isspace(c));
140 	if (c == 0x2D /*'-'*/) {
141 		neg = 1;
142 		c = *s++;
143 	} else if (c == 0x2B /*'+'*/)
144 		c = *s++;
145 	if ((base == 0 || base == 16) &&
146 	    (c == 0x30 /*'0'*/)
147 		 && (*s == 0x78 /*'x'*/ || *s == 0x58 /*'X'*/)) {
148 		c = s[1];
149 		s += 2;
150 		base = 16;
151 	}
152 	if (base == 0)
153 		base = (c == 0x30 /*'0'*/) ? 8 : 10;
154 
155 	/*
156 	 * Compute the cutoff value between legal numbers and illegal
157 	 * numbers.  That is the largest legal value, divided by the
158 	 * base.  An input number that is greater than this value, if
159 	 * followed by a legal input character, is too big.  One that
160 	 * is equal to this value may be valid or not; the limit
161 	 * between valid and invalid numbers is then based on the last
162 	 * digit.  For instance, if the range for longs is
163 	 * [-2147483648..2147483647] and the input base is 10,
164 	 * cutoff will be set to 214748364 and cutlim to either
165 	 * 7 (neg==0) or 8 (neg==1), meaning that if we have accumulated
166 	 * a value > 214748364, or equal but the next digit is > 7 (or 8),
167 	 * the number is too big, and we will return a range error.
168 	 *
169 	 * Set any if any `digits' consumed; make it negative to indicate
170 	 * overflow.
171 	 */
172 	cutoff = neg ? -(zend_ulong)ZEND_LONG_MIN : ZEND_LONG_MAX;
173 	cutlim = cutoff % (zend_ulong)base;
174 	cutoff /= (zend_ulong)base;
175 	for (acc = 0, any = 0;; c = *s++) {
176 		if (c >= 0x30 /*'0'*/ && c <= 0x39 /*'9'*/)
177 			c -= 0x30 /*'0'*/;
178 		else if (c >= 0x41 /*'A'*/ && c <= 0x5A /*'Z'*/)
179 			c -= 0x41 /*'A'*/ - 10;
180 		else if (c >= 0x61 /*'a'*/ && c <= 0x7A /*'z'*/)
181 			c -= 0x61 /*'a'*/ - 10;
182 		else
183 			break;
184 		if (c >= base)
185 			break;
186 
187 		if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim))
188 			any = -1;
189 		else {
190 			any = 1;
191 			acc *= base;
192 			acc += c;
193 		}
194 	}
195 	if (any < 0) {
196 		acc = neg ? ZEND_LONG_MIN : ZEND_LONG_MAX;
197 		errno = ERANGE;
198 	} else if (neg)
199 		acc = -acc;
200 	if (endptr != NULL)
201 		*endptr = (UChar *)(any ? s - 1 : nptr);
202 	return (acc);
203 }
204 /* }}} */
205 
206 
207 /* {{{ collator_is_numeric]
208  * Taken from PHP6:is_numeric_unicode()
209  */
collator_is_numeric(UChar * str,int32_t length,zend_long * lval,double * dval,bool allow_errors)210 zend_uchar collator_is_numeric( UChar *str, int32_t length, zend_long *lval, double *dval, bool allow_errors )
211 {
212 	zend_long local_lval;
213 	double local_dval;
214 	UChar *end_ptr_long, *end_ptr_double;
215 
216 	if (!length) {
217 		return 0;
218 	}
219 
220 	errno=0;
221 	local_lval = collator_u_strtol(str, &end_ptr_long, 10);
222 	if (errno != ERANGE) {
223 		if (end_ptr_long == str+length) { /* integer string */
224 			if (lval) {
225 				*lval = local_lval;
226 			}
227 			return IS_LONG;
228 		} else if (end_ptr_long == str && *end_ptr_long != '\0' && *str != '.' && *str != '-') { /* ignore partial string matches */
229 			return 0;
230 		}
231 	} else {
232 		end_ptr_long = NULL;
233 	}
234 
235 	local_dval = collator_u_strtod(str, &end_ptr_double);
236 	if (local_dval == 0 && end_ptr_double == str) {
237 		end_ptr_double = NULL;
238 	} else {
239 		if (end_ptr_double == str+length) { /* floating point string */
240 			if (!zend_finite(local_dval)) {
241 				/* "inf","nan" and maybe other weird ones */
242 				return 0;
243 			}
244 
245 			if (dval) {
246 				*dval = local_dval;
247 			}
248 			return IS_DOUBLE;
249 		}
250 	}
251 
252 	if (!allow_errors) {
253 		return 0;
254 	}
255 
256 	if (allow_errors) {
257 		if (end_ptr_double > end_ptr_long && dval) {
258 			*dval = local_dval;
259 			return IS_DOUBLE;
260 		} else if (end_ptr_long && lval) {
261 			*lval = local_lval;
262 			return IS_LONG;
263 		}
264 	}
265 	return 0;
266 }
267 /* }}} */
268