xref: /php-src/ext/bcmath/libbcmath/src/str2num.c (revision fad899e5)
1 /* str2num.c: bcmath library file. */
2 /*
3     Copyright (C) 1991, 1992, 1993, 1994, 1997 Free Software Foundation, Inc.
4     Copyright (C) 2000 Philip A. Nelson
5 
6     This library is free software; you can redistribute it and/or
7     modify it under the terms of the GNU Lesser General Public
8     License as published by the Free Software Foundation; either
9     version 2 of the License, or (at your option) any later version.
10 
11     This library is distributed in the hope that it will be useful,
12     but WITHOUT ANY WARRANTY; without even the implied warranty of
13     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14     Lesser General Public License for more details.  (LICENSE)
15 
16     You should have received a copy of the GNU Lesser General Public
17     License along with this library; if not, write to:
18 
19       The Free Software Foundation, Inc.
20       59 Temple Place, Suite 330
21       Boston, MA 02111-1307 USA.
22 
23     You may contact the author by:
24        e-mail:  philnelson@acm.org
25       us-mail:  Philip A. Nelson
26                 Computer Science Department, 9062
27                 Western Washington University
28                 Bellingham, WA 98226-9062
29 
30 *************************************************************************/
31 
32 #include "bcmath.h"
33 #include "convert.h"
34 #include "private.h"
35 #include <stdbool.h>
36 #include <stddef.h>
37 #ifdef __SSE2__
38 # include <emmintrin.h>
39 #endif
40 
41 /* Convert strings to bc numbers.  Base 10 only.*/
bc_count_digits(const char * str,const char * end)42 static const char *bc_count_digits(const char *str, const char *end)
43 {
44 	/* Process in bulk */
45 #ifdef __SSE2__
46 	const __m128i offset = _mm_set1_epi8((signed char) (SCHAR_MIN - '0'));
47 	/* we use the less than comparator, so add 1 */
48 	const __m128i threshold = _mm_set1_epi8(SCHAR_MIN + ('9' + 1 - '0'));
49 
50 	while (str + sizeof(__m128i) <= end) {
51 		__m128i bytes = _mm_loadu_si128((const __m128i *) str);
52 		/* Wrapping-add the offset to the bytes, such that all bytes below '0' are positive and others are negative.
53 		 * More specifically, '0' will be -128 and '9' will be -119. */
54 		bytes = _mm_add_epi8(bytes, offset);
55 		/* Now mark all bytes that are <= '9', i.e. <= -119, i.e. < -118, i.e. the threshold. */
56 		bytes = _mm_cmplt_epi8(bytes, threshold);
57 
58 		int mask = _mm_movemask_epi8(bytes);
59 		if (mask != 0xffff) {
60 			/* At least one of the bytes is not within range. Move to the first offending byte. */
61 #ifdef PHP_HAVE_BUILTIN_CTZL
62 			return str + __builtin_ctz(~mask);
63 #else
64 			break;
65 #endif
66 		}
67 
68 		str += sizeof(__m128i);
69 	}
70 #endif
71 
72 	while (*str >= '0' && *str <= '9') {
73 		str++;
74 	}
75 
76 	return str;
77 }
78 
bc_skip_zero_reverse(const char * scanner,const char * stop)79 static inline const char *bc_skip_zero_reverse(const char *scanner, const char *stop)
80 {
81 	/* Check in bulk */
82 #ifdef __SSE2__
83 	const __m128i c_zero_repeat = _mm_set1_epi8('0');
84 	while (scanner - sizeof(__m128i) >= stop) {
85 		scanner -= sizeof(__m128i);
86 		__m128i bytes = _mm_loadu_si128((const __m128i *) scanner);
87 		/* Checks if all numeric strings are equal to '0'. */
88 		bytes = _mm_cmpeq_epi8(bytes, c_zero_repeat);
89 
90 		int mask = _mm_movemask_epi8(bytes);
91 		/* The probability of having 16 trailing 0s in a row is very low, so we use EXPECTED. */
92 		if (EXPECTED(mask != 0xffff)) {
93 			/* Move the pointer back and check each character in loop. */
94 			scanner += sizeof(__m128i);
95 			break;
96 		}
97 	}
98 #endif
99 
100 	/* Exclude trailing zeros. */
101 	while (scanner - 1 >= stop && scanner[-1] == '0') {
102 		scanner--;
103 	}
104 
105 	return scanner;
106 }
107 
108 /* Assumes `num` points to NULL, i.e. does yet not hold a number. */
bc_str2num(bc_num * num,const char * str,const char * end,size_t scale,size_t * full_scale,bool auto_scale)109 bool bc_str2num(bc_num *num, const char *str, const char *end, size_t scale, size_t *full_scale, bool auto_scale)
110 {
111 	size_t str_scale = 0;
112 	const char *ptr = str;
113 	const char *fractional_ptr = NULL;
114 	const char *fractional_end = NULL;
115 	bool zero_int = false;
116 
117 	ZEND_ASSERT(*num == NULL);
118 
119 	/* Check for valid number and count digits. */
120 	if ((*ptr == '+') || (*ptr == '-')) {
121 		/* Skip Sign */
122 		ptr++;
123 	}
124 	/* Skip leading zeros. */
125 	while (*ptr == '0') {
126 		ptr++;
127 	}
128 	const char *integer_ptr = ptr;
129 	/* digits before the decimal point */
130 	ptr = bc_count_digits(ptr, end);
131 	size_t digits = ptr - integer_ptr;
132 	/* decimal point */
133 	const char *decimal_point = (*ptr == '.') ? ptr : NULL;
134 
135 	/* If a non-digit and non-decimal-point indicator is in the string, i.e. an invalid character */
136 	if (UNEXPECTED(!decimal_point && *ptr != '\0')) {
137 		goto fail;
138 	}
139 
140 	/* search and validate fractional end if exists */
141 	if (decimal_point) {
142 		/* search */
143 		fractional_ptr = fractional_end = decimal_point + 1;
144 		/* For strings that end with a decimal point, such as "012." */
145 		if (UNEXPECTED(*fractional_ptr == '\0')) {
146 			if (full_scale) {
147 				*full_scale = 0;
148 			}
149 			goto after_fractional;
150 		}
151 
152 		/* validate */
153 		fractional_end = bc_count_digits(fractional_ptr, end);
154 		if (UNEXPECTED(*fractional_end != '\0')) {
155 			/* invalid num */
156 			goto fail;
157 		}
158 
159 		if (full_scale) {
160 			*full_scale = fractional_end - fractional_ptr;
161 		}
162 
163 		/* Exclude trailing zeros. */
164 		fractional_end = bc_skip_zero_reverse(fractional_end, fractional_ptr);
165 
166 		/* Calculate the length of the fraction excluding trailing zero. */
167 		str_scale = fractional_end - fractional_ptr;
168 
169 		/*
170 		 * If set the scale manually and it is smaller than the automatically calculated scale,
171 		 * adjust it to match the manual setting.
172 		 */
173 		if (str_scale > scale && !auto_scale) {
174 			fractional_end -= str_scale - scale;
175 			str_scale = scale;
176 		}
177 	} else {
178 		if (full_scale) {
179 			*full_scale = 0;
180 		}
181 	}
182 
183 after_fractional:
184 
185 	if (digits + str_scale == 0) {
186 		goto zero;
187 	}
188 
189 	/* Adjust numbers and allocate storage and initialize fields. */
190 	if (digits == 0) {
191 		zero_int = true;
192 		digits = 1;
193 	}
194 	*num = bc_new_num_nonzeroed(digits, str_scale);
195 	(*num)->n_sign = *str == '-' ? MINUS : PLUS;
196 	char *nptr = (*num)->n_value;
197 
198 	if (zero_int) {
199 		*nptr++ = 0;
200 		/*
201 		 * If zero_int is true and the str_scale is 0, there is an early return,
202 		 * so here str_scale is always greater than 0.
203 		 */
204 		nptr = bc_copy_and_toggle_bcd(nptr, fractional_ptr, fractional_end);
205 	} else {
206 		const char *integer_end = integer_ptr + digits;
207 		nptr = bc_copy_and_toggle_bcd(nptr, integer_ptr, integer_end);
208 		if (str_scale > 0) {
209 			nptr = bc_copy_and_toggle_bcd(nptr, fractional_ptr, fractional_end);
210 		}
211 	}
212 
213 	return true;
214 
215 zero:
216 	*num = bc_copy_num(BCG(_zero_));
217 	return true;
218 
219 fail:
220 	*num = bc_copy_num(BCG(_zero_));
221 	return false;
222 }
223