xref: /php-src/ext/dom/lexbor/lexbor/core/strtod.c (revision bffab33a)
1 /*
2  * Copyright (C) Alexander Borisov
3  *
4  * Based on nxt_strtod.c from NGINX NJS project
5  *
6  * An internal strtod() implementation based upon V8 src/strtod.cc
7  * without bignum support.
8  *
9  * Copyright 2012 the V8 project authors. All rights reserved.
10  * Use of this source code is governed by a BSD-style license
11  * that can be found in the LICENSE file.
12  */
13 
14 #include <stdint.h>
15 #include <math.h>
16 
17 #include "lexbor/core/diyfp.h"
18 #include "lexbor/core/strtod.h"
19 
20 
21 /*
22  * Max double: 1.7976931348623157 x 10^308
23  * Min non-zero double: 4.9406564584124654 x 10^-324
24  * Any x >= 10^309 is interpreted as +infinity.
25  * Any x <= 10^-324 is interpreted as 0.
26  * Note that 2.5e-324 (despite being smaller than the min double)
27  * will be read as non-zero (equal to the min non-zero double).
28  */
29 
30 #define LEXBOR_DECIMAL_POWER_MAX 309
31 #define LEXBOR_DECIMAL_POWER_MIN (-324)
32 
33 #define LEXBOR_UINT64_MAX lexbor_uint64_hl(0xFFFFFFFF, 0xFFFFFFFF)
34 #define LEXBOR_UINT64_DECIMAL_DIGITS_MAX 19
35 
36 #define LEXBOR_DENOM_LOG   3
37 #define LEXBOR_DENOM       (1 << LEXBOR_DENOM_LOG)
38 
39 
40 static lexbor_diyfp_t
41 lexbor_strtod_diyfp_read(const lxb_char_t *start, size_t length,
42                          int *remaining);
43 
44 static double
45 lexbor_strtod_diyfp_strtod(const lxb_char_t *start, size_t length, int exp);
46 
47 
48 /*
49  * Reads digits from the buffer and converts them to a uint64.
50  * Reads in as many digits as fit into a uint64.
51  * When the string starts with "1844674407370955161" no further digit is read.
52  * Since 2^64 = 18446744073709551616 it would still be possible read another
53  * digit if it was less or equal than 6, but this would complicate the code.
54  */
55 lxb_inline uint64_t
lexbor_strtod_read_uint64(const lxb_char_t * start,size_t length,size_t * ndigits)56 lexbor_strtod_read_uint64(const lxb_char_t *start, size_t length,
57                           size_t *ndigits)
58 {
59     lxb_char_t d;
60     uint64_t value;
61     const lxb_char_t *p, *e;
62 
63     value = 0;
64 
65     p = start;
66     e = p + length;
67 
68     while (p < e && value <= (UINT64_MAX / 10 - 1)) {
69         d = *p++ - '0';
70         value = 10 * value + d;
71     }
72 
73     *ndigits = p - start;
74 
75     return value;
76 }
77 
78 /*
79  * Reads a nxt_diyfp_t from the buffer.
80  * The returned nxt_diyfp_t is not necessarily normalized.
81  * If remaining is zero then the returned nxt_diyfp_t is accurate.
82  * Otherwise it has been rounded and has error of at most 1/2 ulp.
83  */
84 static lexbor_diyfp_t
lexbor_strtod_diyfp_read(const lxb_char_t * start,size_t length,int * remaining)85 lexbor_strtod_diyfp_read(const lxb_char_t *start, size_t length, int *remaining)
86 {
87     size_t read;
88     uint64_t significand;
89 
90     significand = lexbor_strtod_read_uint64(start, length, &read);
91 
92     /* Round the significand. */
93 
94     if (length != read) {
95         if (start[read] >= '5') {
96             significand++;
97         }
98     }
99 
100     *remaining = (int) (length - read);
101 
102     return lexbor_diyfp(significand, 0);
103 }
104 
105 /*
106  * Returns 10^exp as an exact nxt_diyfp_t.
107  * The given exp must be in the range [1; NXT_DECIMAL_EXPONENT_DIST[.
108  */
109 lxb_inline lexbor_diyfp_t
lexbor_strtod_adjust_pow10(int exp)110 lexbor_strtod_adjust_pow10(int exp)
111 {
112     switch (exp) {
113     case 1:
114         return lexbor_diyfp(lexbor_uint64_hl(0xa0000000, 00000000), -60);
115     case 2:
116         return lexbor_diyfp(lexbor_uint64_hl(0xc8000000, 00000000), -57);
117     case 3:
118         return lexbor_diyfp(lexbor_uint64_hl(0xfa000000, 00000000), -54);
119     case 4:
120         return lexbor_diyfp(lexbor_uint64_hl(0x9c400000, 00000000), -50);
121     case 5:
122         return lexbor_diyfp(lexbor_uint64_hl(0xc3500000, 00000000), -47);
123     case 6:
124         return lexbor_diyfp(lexbor_uint64_hl(0xf4240000, 00000000), -44);
125     case 7:
126         return lexbor_diyfp(lexbor_uint64_hl(0x98968000, 00000000), -40);
127     default:
128         return lexbor_diyfp(0, 0);
129     }
130 }
131 
132 /*
133  * Returns the significand size for a given order of magnitude.
134  * If v = f*2^e with 2^p-1 <= f <= 2^p then p+e is v's order of magnitude.
135  * This function returns the number of significant binary digits v will have
136  * once its encoded into a double. In almost all cases this is equal to
137  * NXT_SIGNIFICAND_SIZE. The only exception are denormals. They start with
138  * leading zeroes and their effective significand-size is hence smaller.
139  */
140 lxb_inline int
lexbor_strtod_diyfp_sgnd_size(int order)141 lexbor_strtod_diyfp_sgnd_size(int order)
142 {
143     if (order >= (LEXBOR_DBL_EXPONENT_DENORMAL + LEXBOR_SIGNIFICAND_SIZE)) {
144         return LEXBOR_SIGNIFICAND_SIZE;
145     }
146 
147     if (order <= LEXBOR_DBL_EXPONENT_DENORMAL) {
148         return 0;
149     }
150 
151     return order - LEXBOR_DBL_EXPONENT_DENORMAL;
152 }
153 
154 /*
155  * Returns either the correct double or the double that is just below
156  * the correct double.
157  */
158 static double
lexbor_strtod_diyfp_strtod(const lxb_char_t * start,size_t length,int exp)159 lexbor_strtod_diyfp_strtod(const lxb_char_t *start, size_t length, int exp)
160 {
161     int magnitude, prec_digits;
162     int remaining, dec_exp, adj_exp, orig_e, shift;
163     int64_t error;
164     uint64_t prec_bits, half_way;
165     lexbor_diyfp_t value, pow, adj_pow, rounded;
166 
167     value = lexbor_strtod_diyfp_read(start, length, &remaining);
168 
169     exp += remaining;
170 
171     /*
172      * Since some digits may have been dropped the value is not accurate.
173      * If remaining is different than 0 than the error is at most .5 ulp
174      * (unit in the last place).
175      * Using a common denominator to avoid dealing with fractions.
176      */
177 
178     error = (remaining == 0 ? 0 : LEXBOR_DENOM / 2);
179 
180     orig_e = value.exp;
181     value = lexbor_diyfp_normalize(value);
182     error <<= orig_e - value.exp;
183 
184     if (exp < LEXBOR_DECIMAL_EXPONENT_MIN) {
185         return 0.0;
186     }
187 
188     pow = lexbor_cached_power_dec(exp, &dec_exp);
189 
190     if (dec_exp != exp) {
191         adj_exp = exp - dec_exp;
192         adj_pow = lexbor_strtod_adjust_pow10(exp - dec_exp);
193         value = lexbor_diyfp_mul(value, adj_pow);
194 
195         if (LEXBOR_UINT64_DECIMAL_DIGITS_MAX - (int) length < adj_exp) {
196             /*
197              * The adjustment power is exact. There is hence only
198              * an error of 0.5.
199              */
200             error += LEXBOR_DENOM / 2;
201         }
202     }
203 
204     value = lexbor_diyfp_mul(value, pow);
205 
206     /*
207      * The error introduced by a multiplication of a * b equals
208      *  error_a + error_b + error_a * error_b / 2^64 + 0.5
209      *  Substituting a with 'value' and b with 'pow':
210      *  error_b = 0.5  (all cached powers have an error of less than 0.5 ulp),
211      *  error_ab = 0 or 1 / NXT_DENOM > error_a * error_b / 2^64.
212      */
213 
214     error += LEXBOR_DENOM + (error != 0 ? 1 : 0);
215 
216     orig_e = value.exp;
217     value = lexbor_diyfp_normalize(value);
218     error <<= orig_e - value.exp;
219 
220     /*
221      * Check whether the double's significand changes when the error is added
222      * or subtracted.
223      */
224 
225     magnitude = LEXBOR_DIYFP_SIGNIFICAND_SIZE + value.exp;
226     prec_digits = LEXBOR_DIYFP_SIGNIFICAND_SIZE
227         - lexbor_strtod_diyfp_sgnd_size(magnitude);
228 
229     if (prec_digits + LEXBOR_DENOM_LOG >= LEXBOR_DIYFP_SIGNIFICAND_SIZE) {
230         /*
231          * This can only happen for very small denormals. In this case the
232          * half-way multiplied by the denominator exceeds the range of uint64.
233          * Simply shift everything to the right.
234          */
235         shift = prec_digits + LEXBOR_DENOM_LOG
236             - LEXBOR_DIYFP_SIGNIFICAND_SIZE + 1;
237 
238         value = lexbor_diyfp_shift_right(value, shift);
239 
240         /*
241          * Add 1 for the lost precision of error, and NXT_DENOM
242          * for the lost precision of value.significand.
243          */
244         error = (error >> shift) + 1 + LEXBOR_DENOM;
245         prec_digits -= shift;
246     }
247 
248     prec_bits = value.significand & (((uint64_t) 1 << prec_digits) - 1);
249     prec_bits *= LEXBOR_DENOM;
250 
251     half_way = (uint64_t) 1 << (prec_digits - 1);
252     half_way *= LEXBOR_DENOM;
253 
254     rounded = lexbor_diyfp_shift_right(value, prec_digits);
255 
256     if (prec_bits >= half_way + error) {
257         rounded.significand++;
258     }
259 
260     return lexbor_diyfp_2d(rounded);
261 }
262 
263 double
lexbor_strtod_internal(const lxb_char_t * start,size_t length,int exp)264 lexbor_strtod_internal(const lxb_char_t *start, size_t length, int exp)
265 {
266     size_t left, right;
267     const lxb_char_t *p, *e, *b;
268 
269     /* Trim leading zeroes. */
270 
271     p = start;
272     e = p + length;
273 
274     while (p < e) {
275         if (*p != '0') {
276             start = p;
277             break;
278         }
279 
280         p++;
281     }
282 
283     left = e - p;
284 
285     /* Trim trailing zeroes. */
286 
287     b = start;
288     p = b + left - 1;
289 
290     while (p > b) {
291         if (*p != '0') {
292             break;
293         }
294 
295         p--;
296     }
297 
298     right = p - b + 1;
299 
300     length = right;
301 
302     if (length == 0) {
303         return 0.0;
304     }
305 
306     exp += (int) (left - right);
307 
308     if (exp + (int) length - 1 >= LEXBOR_DECIMAL_POWER_MAX) {
309         return INFINITY;
310     }
311 
312     if (exp + (int) length <= LEXBOR_DECIMAL_POWER_MIN) {
313         return 0.0;
314     }
315 
316     return lexbor_strtod_diyfp_strtod(start, length, exp);
317 }
318 
319 #undef LEXBOR_DECIMAL_POWER_MAX
320 #undef LEXBOR_DECIMAL_POWER_MIN
321 
322 #undef LEXBOR_UINT64_MAX
323 #undef LEXBOR_UINT64_DECIMAL_DIGITS_MAX
324 
325 #undef LEXBOR_DENOM_LOG
326 #undef LEXBOR_DENOM
327