xref: /php-src/ext/bcmath/libbcmath/src/convert.c (revision 3c9ab6eb)
1 /*
2    +----------------------------------------------------------------------+
3    | Copyright (c) The PHP Group                                          |
4    +----------------------------------------------------------------------+
5    | This source file is subject to version 3.01 of the PHP license,      |
6    | that is bundled with this package in the file LICENSE, and is        |
7    | available through the world-wide-web at the following url:           |
8    | https://www.php.net/license/3_01.txt                                 |
9    | If you did not receive a copy of the PHP license and are unable to   |
10    | obtain it through the world-wide-web, please send a note to          |
11    | license@php.net so we can mail you a copy immediately.               |
12    +----------------------------------------------------------------------+
13    | Authors: Niels Dossche <nielsdos@php.net>                            |
14    +----------------------------------------------------------------------+
15 */
16 
17 #include "bcmath.h"
18 #include "convert.h"
19 #include "private.h"
20 #ifdef __SSE2__
21 # include <emmintrin.h>
22 #endif
23 
bc_copy_and_toggle_bcd(char * restrict dest,const char * source,const char * source_end)24 char *bc_copy_and_toggle_bcd(char *restrict dest, const char *source, const char *source_end)
25 {
26 	const size_t bulk_shift = SWAR_REPEAT('0');
27 
28 #ifdef __SSE2__
29 	/* SIMD SSE2 bulk shift + copy */
30 	__m128i shift_vector = _mm_set1_epi8('0');
31 	while (source + sizeof(__m128i) <= source_end) {
32 		__m128i bytes = _mm_loadu_si128((const __m128i *) source);
33 		bytes = _mm_xor_si128(bytes, shift_vector);
34 		_mm_storeu_si128((__m128i *) dest, bytes);
35 
36 		source += sizeof(__m128i);
37 		dest += sizeof(__m128i);
38 	}
39 #endif
40 
41 	/* Handle sizeof(size_t) (i.e. 4/8) bytes at once.
42 	 * We know that adding/subtracting an individual byte cannot overflow,
43 	 * so it is possible to add/subtract an entire word of bytes at once
44 	 * by using SWAR_REPEAT. */
45 	while (source + sizeof(size_t) <= source_end) {
46 		size_t bytes;
47 		memcpy(&bytes, source, sizeof(bytes));
48 
49 		bytes ^= bulk_shift;
50 		memcpy(dest, &bytes, sizeof(bytes));
51 
52 		source += sizeof(size_t);
53 		dest += sizeof(size_t);
54 	}
55 
56 	while (source < source_end) {
57 		*dest = *source ^ '0';
58 		dest++;
59 		source++;
60 	}
61 
62 	return dest;
63 }
64 
65 /* This is based on the technique described in https://kholdstare.github.io/technical/2020/05/26/faster-integer-parsing.html.
66  * This function transforms AABBCCDD into 1000 * AA + 100 * BB + 10 * CC + DD,
67  * with the caveat that all components must be in the interval [0, 25] to prevent overflow
68  * due to the multiplication by power of 10 (10 * 25 = 250 is the largest number that fits in a byte).
69  * The advantage of this method instead of using shifts + 3 multiplications is that this is cheaper
70  * due to its divide-and-conquer nature.
71  */
72 #if SIZEOF_SIZE_T == 4
bc_parse_chunk_chars(const char * str)73 BC_VECTOR bc_parse_chunk_chars(const char *str)
74 {
75 	BC_VECTOR tmp;
76 	memcpy(&tmp, str, sizeof(tmp));
77 #if !BC_LITTLE_ENDIAN
78 	tmp = BC_BSWAP(tmp);
79 #endif
80 
81 	BC_VECTOR lower_digits = (tmp & 0x0f000f00) >> 8;
82 	BC_VECTOR upper_digits = (tmp & 0x000f000f) * 10;
83 
84 	tmp = lower_digits + upper_digits;
85 
86 	lower_digits = (tmp & 0x00ff0000) >> 16;
87 	upper_digits = (tmp & 0x000000ff) * 100;
88 
89 	return lower_digits + upper_digits;
90 }
91 #elif SIZEOF_SIZE_T == 8
bc_parse_chunk_chars(const char * str)92 BC_VECTOR bc_parse_chunk_chars(const char *str)
93 {
94 	BC_VECTOR tmp;
95 	memcpy(&tmp, str, sizeof(tmp));
96 #if !BC_LITTLE_ENDIAN
97 	tmp = BC_BSWAP(tmp);
98 #endif
99 
100 	BC_VECTOR lower_digits = (tmp & 0x0f000f000f000f00) >> 8;
101 	BC_VECTOR upper_digits = (tmp & 0x000f000f000f000f) * 10;
102 
103 	tmp = lower_digits + upper_digits;
104 
105 	lower_digits = (tmp & 0x00ff000000ff0000) >> 16;
106 	upper_digits = (tmp & 0x000000ff000000ff) * 100;
107 
108 	tmp = lower_digits + upper_digits;
109 
110 	lower_digits = (tmp & 0x0000ffff00000000) >> 32;
111 	upper_digits = (tmp & 0x000000000000ffff) * 10000;
112 
113 	return lower_digits + upper_digits;
114 }
115 #endif
116 
117 #if BC_LITTLE_ENDIAN
118 # define BC_ENCODE_LUT(A, B) ((A) | (B) << 4)
119 #else
120 # define BC_ENCODE_LUT(A, B) ((B) | (A) << 4)
121 #endif
122 
123 #define LUT_ITERATE(_, A) \
124 	_(A, 0), _(A, 1), _(A, 2), _(A, 3), _(A, 4), _(A, 5), _(A, 6), _(A, 7), _(A, 8), _(A, 9)
125 
126 /* This LUT encodes the decimal representation of numbers 0-100
127  * such that we can avoid taking modulos and divisions which would be slow. */
128 static const unsigned char LUT[100] = {
129 	LUT_ITERATE(BC_ENCODE_LUT, 0),
130 	LUT_ITERATE(BC_ENCODE_LUT, 1),
131 	LUT_ITERATE(BC_ENCODE_LUT, 2),
132 	LUT_ITERATE(BC_ENCODE_LUT, 3),
133 	LUT_ITERATE(BC_ENCODE_LUT, 4),
134 	LUT_ITERATE(BC_ENCODE_LUT, 5),
135 	LUT_ITERATE(BC_ENCODE_LUT, 6),
136 	LUT_ITERATE(BC_ENCODE_LUT, 7),
137 	LUT_ITERATE(BC_ENCODE_LUT, 8),
138 	LUT_ITERATE(BC_ENCODE_LUT, 9),
139 };
140 
bc_expand_lut(unsigned char c)141 static inline unsigned short bc_expand_lut(unsigned char c)
142 {
143 	return (c & 0x0f) | (c & 0xf0) << 4;
144 }
145 
146 /* Writes the character representation of the number encoded in value.
147  * E.g. if value = 1234, then the string "1234" will be written to str. */
bc_write_bcd_representation(uint32_t value,char * str)148 void bc_write_bcd_representation(uint32_t value, char *str)
149 {
150 	uint32_t upper = value / 100; /* e.g. 12 */
151 	uint32_t lower = value % 100; /* e.g. 34 */
152 
153 #if BC_LITTLE_ENDIAN
154 	/* Note: little endian, so `lower` comes before `upper`! */
155 	uint32_t digits = bc_expand_lut(LUT[lower]) << 16 | bc_expand_lut(LUT[upper]);
156 #else
157 	/* Note: big endian, so `upper` comes before `lower`! */
158 	uint32_t digits = bc_expand_lut(LUT[upper]) << 16 | bc_expand_lut(LUT[lower]);
159 #endif
160 	memcpy(str, &digits, sizeof(digits));
161 }
162