1 /*
2    +----------------------------------------------------------------------+
3    | This source file is subject to version 3.01 of the PHP license,      |
4    | that is bundled with this package in the file LICENSE, and is        |
5    | available through the world-wide-web at the following url:           |
6    | https://www.php.net/license/3_01.txt                                 |
7    | If you did not receive a copy of the PHP license and are unable to   |
8    | obtain it through the world-wide-web, please send a note to          |
9    | license@php.net so we can mail you a copy immediately.               |
10    +----------------------------------------------------------------------+
11    | Authors: Vadim Savchuk <vsavchuk@productengine.com>                  |
12    |          Dmitry Lakhtyuk <dlakhtyuk@productengine.com>               |
13    +----------------------------------------------------------------------+
14  */
15 
16 #include "collator_is_numeric.h"
17 
18 /* {{{ Taken from PHP6:zend_u_strtod() */
collator_u_strtod(const UChar * nptr,UChar ** endptr)19 static double collator_u_strtod(const UChar *nptr, UChar **endptr) /* {{{ */
20 {
21 	const UChar *u = nptr, *nstart;
22 	UChar c = *u;
23 	int any = 0;
24 
25 	while (u_isspace(c)) {
26 		c = *++u;
27 	}
28 	nstart = u;
29 
30 	if (c == 0x2D /*'-'*/ || c == 0x2B /*'+'*/) {
31 		c = *++u;
32 	}
33 
34 	while (c >= 0x30 /*'0'*/ && c <= 0x39 /*'9'*/) {
35 		any = 1;
36 		c = *++u;
37 	}
38 
39 	if (c == 0x2E /*'.'*/) {
40 		c = *++u;
41 		while (c >= 0x30 /*'0'*/ && c <= 0x39 /*'9'*/) {
42 			any = 1;
43 			c = *++u;
44 		}
45 	}
46 
47 	if ((c == 0x65 /*'e'*/ || c == 0x45 /*'E'*/) && any) {
48 		const UChar *e = u;
49 		int any_exp = 0;
50 
51 		c = *++u;
52 		if (c == 0x2D /*'-'*/ || c == 0x2B /*'+'*/) {
53 			c = *++u;
54 		}
55 
56 		while (c >= 0x30 /*'0'*/ && c <= 0x39 /*'9'*/) {
57 			any_exp = 1;
58 			c = *++u;
59 		}
60 
61 		if (!any_exp) {
62 			u = e;
63 		}
64 	}
65 
66 	if (any) {
67 		char buf[64], *numbuf, *bufpos;
68 		size_t length = u - nstart;
69 		double value;
70 		ALLOCA_FLAG(use_heap = 0);
71 
72 		if (length < sizeof(buf)) {
73 			numbuf = buf;
74 		} else {
75 			numbuf = (char *) do_alloca(length + 1, use_heap);
76 		}
77 
78 		bufpos = numbuf;
79 
80 		while (nstart < u) {
81 			*bufpos++ = (char) *nstart++;
82 		}
83 
84 		*bufpos = '\0';
85 		value = zend_strtod(numbuf, NULL);
86 
87 		if (numbuf != buf) {
88 			free_alloca(numbuf, use_heap);
89 		}
90 
91 		if (endptr != NULL) {
92 			*endptr = (UChar *)u;
93 		}
94 
95 		return value;
96 	}
97 
98 	if (endptr != NULL) {
99 		*endptr = (UChar *)nptr;
100 	}
101 
102 	return 0;
103 }
104 /* }}} */
105 
106 /* {{{ collator_u_strtol
107  * Taken from PHP6:zend_u_strtol()
108  *
109  * Convert a Unicode string to a long integer.
110  *
111  * Ignores `locale' stuff.
112  */
collator_u_strtol(const UChar * nptr,UChar ** endptr,int base)113 static zend_long collator_u_strtol(const UChar *nptr, UChar **endptr, int base)
114 {
115 	const UChar *s = nptr;
116 	zend_ulong acc;
117 	UChar c;
118 	zend_ulong cutoff;
119 	int neg = 0, any, cutlim;
120 
121 	if (s == NULL) {
122 		errno = ERANGE;
123 		if (endptr != NULL) {
124 			*endptr = NULL;
125 		}
126 		return 0;
127 	}
128 
129 	/*
130 	 * Skip white space and pick up leading +/- sign if any.
131 	 * If base is 0, allow 0x for hex and 0 for octal, else
132 	 * assume decimal; if base is already 16, allow 0x.
133 	 */
134 	do {
135 		c = *s++;
136 	} while (u_isspace(c));
137 	if (c == 0x2D /*'-'*/) {
138 		neg = 1;
139 		c = *s++;
140 	} else if (c == 0x2B /*'+'*/)
141 		c = *s++;
142 	if ((base == 0 || base == 16) &&
143 	    (c == 0x30 /*'0'*/)
144 		 && (*s == 0x78 /*'x'*/ || *s == 0x58 /*'X'*/)) {
145 		c = s[1];
146 		s += 2;
147 		base = 16;
148 	}
149 	if (base == 0)
150 		base = (c == 0x30 /*'0'*/) ? 8 : 10;
151 
152 	/*
153 	 * Compute the cutoff value between legal numbers and illegal
154 	 * numbers.  That is the largest legal value, divided by the
155 	 * base.  An input number that is greater than this value, if
156 	 * followed by a legal input character, is too big.  One that
157 	 * is equal to this value may be valid or not; the limit
158 	 * between valid and invalid numbers is then based on the last
159 	 * digit.  For instance, if the range for longs is
160 	 * [-2147483648..2147483647] and the input base is 10,
161 	 * cutoff will be set to 214748364 and cutlim to either
162 	 * 7 (neg==0) or 8 (neg==1), meaning that if we have accumulated
163 	 * a value > 214748364, or equal but the next digit is > 7 (or 8),
164 	 * the number is too big, and we will return a range error.
165 	 *
166 	 * Set any if any `digits' consumed; make it negative to indicate
167 	 * overflow.
168 	 */
169 	cutoff = neg ? -(zend_ulong)ZEND_LONG_MIN : ZEND_LONG_MAX;
170 	cutlim = cutoff % (zend_ulong)base;
171 	cutoff /= (zend_ulong)base;
172 	for (acc = 0, any = 0;; c = *s++) {
173 		if (c >= 0x30 /*'0'*/ && c <= 0x39 /*'9'*/)
174 			c -= 0x30 /*'0'*/;
175 		else if (c >= 0x41 /*'A'*/ && c <= 0x5A /*'Z'*/)
176 			c -= 0x41 /*'A'*/ - 10;
177 		else if (c >= 0x61 /*'a'*/ && c <= 0x7A /*'z'*/)
178 			c -= 0x61 /*'a'*/ - 10;
179 		else
180 			break;
181 		if (c >= base)
182 			break;
183 
184 		if (any < 0 || acc > cutoff || (acc == cutoff && c > cutlim))
185 			any = -1;
186 		else {
187 			any = 1;
188 			acc *= base;
189 			acc += c;
190 		}
191 	}
192 	if (any < 0) {
193 		acc = neg ? ZEND_LONG_MIN : ZEND_LONG_MAX;
194 		errno = ERANGE;
195 	} else if (neg)
196 		acc = -acc;
197 	if (endptr != NULL)
198 		*endptr = (UChar *)(any ? s - 1 : nptr);
199 	return (acc);
200 }
201 /* }}} */
202 
203 
204 /* {{{ collator_is_numeric]
205  * Taken from PHP6:is_numeric_unicode()
206  */
collator_is_numeric(UChar * str,int32_t length,zend_long * lval,double * dval,bool allow_errors)207 uint8_t collator_is_numeric( UChar *str, int32_t length, zend_long *lval, double *dval, bool allow_errors )
208 {
209 	zend_long local_lval;
210 	double local_dval;
211 	UChar *end_ptr_long, *end_ptr_double;
212 
213 	if (!length) {
214 		return 0;
215 	}
216 
217 	errno=0;
218 	local_lval = collator_u_strtol(str, &end_ptr_long, 10);
219 	if (errno != ERANGE) {
220 		if (end_ptr_long == str+length) { /* integer string */
221 			if (lval) {
222 				*lval = local_lval;
223 			}
224 			return IS_LONG;
225 		} else if (end_ptr_long == str && *end_ptr_long != '\0' && *str != '.' && *str != '-') { /* ignore partial string matches */
226 			return 0;
227 		}
228 	} else {
229 		end_ptr_long = NULL;
230 	}
231 
232 	local_dval = collator_u_strtod(str, &end_ptr_double);
233 	if (local_dval == 0 && end_ptr_double == str) {
234 		end_ptr_double = NULL;
235 	} else {
236 		if (end_ptr_double == str+length) { /* floating point string */
237 			if (!zend_finite(local_dval)) {
238 				/* "inf","nan" and maybe other weird ones */
239 				return 0;
240 			}
241 
242 			if (dval) {
243 				*dval = local_dval;
244 			}
245 			return IS_DOUBLE;
246 		}
247 	}
248 
249 	if (!allow_errors) {
250 		return 0;
251 	}
252 
253 	if (allow_errors) {
254 		if (end_ptr_double > end_ptr_long && dval) {
255 			*dval = local_dval;
256 			return IS_DOUBLE;
257 		} else if (end_ptr_long && lval) {
258 			*lval = local_lval;
259 			return IS_LONG;
260 		}
261 	}
262 	return 0;
263 }
264 /* }}} */
265