1 /*
2 +----------------------------------------------------------------------+
3 | Copyright (c) The PHP Group |
4 +----------------------------------------------------------------------+
5 | This source file is subject to version 3.01 of the PHP license, |
6 | that is bundled with this package in the file LICENSE, and is |
7 | available through the world-wide-web at the following url: |
8 | https://www.php.net/license/3_01.txt |
9 | If you did not receive a copy of the PHP license and are unable to |
10 | obtain it through the world-wide-web, please send a note to |
11 | license@php.net so we can mail you a copy immediately. |
12 +----------------------------------------------------------------------+
13 | Authors: Niels Dossche <nielsdos@php.net> |
14 +----------------------------------------------------------------------+
15 */
16
17 #include "bcmath.h"
18 #include "convert.h"
19 #include "private.h"
20 #ifdef __SSE2__
21 # include <emmintrin.h>
22 #endif
23
bc_copy_and_toggle_bcd(char * restrict dest,const char * source,const char * source_end)24 char *bc_copy_and_toggle_bcd(char *restrict dest, const char *source, const char *source_end)
25 {
26 const size_t bulk_shift = SWAR_REPEAT('0');
27
28 #ifdef __SSE2__
29 /* SIMD SSE2 bulk shift + copy */
30 __m128i shift_vector = _mm_set1_epi8('0');
31 while (source + sizeof(__m128i) <= source_end) {
32 __m128i bytes = _mm_loadu_si128((const __m128i *) source);
33 bytes = _mm_xor_si128(bytes, shift_vector);
34 _mm_storeu_si128((__m128i *) dest, bytes);
35
36 source += sizeof(__m128i);
37 dest += sizeof(__m128i);
38 }
39 #endif
40
41 /* Handle sizeof(size_t) (i.e. 4/8) bytes at once.
42 * We know that adding/subtracting an individual byte cannot overflow,
43 * so it is possible to add/subtract an entire word of bytes at once
44 * by using SWAR_REPEAT. */
45 while (source + sizeof(size_t) <= source_end) {
46 size_t bytes;
47 memcpy(&bytes, source, sizeof(bytes));
48
49 bytes ^= bulk_shift;
50 memcpy(dest, &bytes, sizeof(bytes));
51
52 source += sizeof(size_t);
53 dest += sizeof(size_t);
54 }
55
56 while (source < source_end) {
57 *dest = *source ^ '0';
58 dest++;
59 source++;
60 }
61
62 return dest;
63 }
64
65 /* This is based on the technique described in https://kholdstare.github.io/technical/2020/05/26/faster-integer-parsing.html.
66 * This function transforms AABBCCDD into 1000 * AA + 100 * BB + 10 * CC + DD,
67 * with the caveat that all components must be in the interval [0, 25] to prevent overflow
68 * due to the multiplication by power of 10 (10 * 25 = 250 is the largest number that fits in a byte).
69 * The advantage of this method instead of using shifts + 3 multiplications is that this is cheaper
70 * due to its divide-and-conquer nature.
71 */
72 #if SIZEOF_SIZE_T == 4
bc_parse_chunk_chars(const char * str)73 BC_VECTOR bc_parse_chunk_chars(const char *str)
74 {
75 BC_VECTOR tmp;
76 memcpy(&tmp, str, sizeof(tmp));
77 #if !BC_LITTLE_ENDIAN
78 tmp = BC_BSWAP(tmp);
79 #endif
80
81 BC_VECTOR lower_digits = (tmp & 0x0f000f00) >> 8;
82 BC_VECTOR upper_digits = (tmp & 0x000f000f) * 10;
83
84 tmp = lower_digits + upper_digits;
85
86 lower_digits = (tmp & 0x00ff0000) >> 16;
87 upper_digits = (tmp & 0x000000ff) * 100;
88
89 return lower_digits + upper_digits;
90 }
91 #elif SIZEOF_SIZE_T == 8
bc_parse_chunk_chars(const char * str)92 BC_VECTOR bc_parse_chunk_chars(const char *str)
93 {
94 BC_VECTOR tmp;
95 memcpy(&tmp, str, sizeof(tmp));
96 #if !BC_LITTLE_ENDIAN
97 tmp = BC_BSWAP(tmp);
98 #endif
99
100 BC_VECTOR lower_digits = (tmp & 0x0f000f000f000f00) >> 8;
101 BC_VECTOR upper_digits = (tmp & 0x000f000f000f000f) * 10;
102
103 tmp = lower_digits + upper_digits;
104
105 lower_digits = (tmp & 0x00ff000000ff0000) >> 16;
106 upper_digits = (tmp & 0x000000ff000000ff) * 100;
107
108 tmp = lower_digits + upper_digits;
109
110 lower_digits = (tmp & 0x0000ffff00000000) >> 32;
111 upper_digits = (tmp & 0x000000000000ffff) * 10000;
112
113 return lower_digits + upper_digits;
114 }
115 #endif
116
117 #if BC_LITTLE_ENDIAN
118 # define BC_ENCODE_LUT(A, B) ((A) | (B) << 4)
119 #else
120 # define BC_ENCODE_LUT(A, B) ((B) | (A) << 4)
121 #endif
122
123 #define LUT_ITERATE(_, A) \
124 _(A, 0), _(A, 1), _(A, 2), _(A, 3), _(A, 4), _(A, 5), _(A, 6), _(A, 7), _(A, 8), _(A, 9)
125
126 /* This LUT encodes the decimal representation of numbers 0-100
127 * such that we can avoid taking modulos and divisions which would be slow. */
128 static const unsigned char LUT[100] = {
129 LUT_ITERATE(BC_ENCODE_LUT, 0),
130 LUT_ITERATE(BC_ENCODE_LUT, 1),
131 LUT_ITERATE(BC_ENCODE_LUT, 2),
132 LUT_ITERATE(BC_ENCODE_LUT, 3),
133 LUT_ITERATE(BC_ENCODE_LUT, 4),
134 LUT_ITERATE(BC_ENCODE_LUT, 5),
135 LUT_ITERATE(BC_ENCODE_LUT, 6),
136 LUT_ITERATE(BC_ENCODE_LUT, 7),
137 LUT_ITERATE(BC_ENCODE_LUT, 8),
138 LUT_ITERATE(BC_ENCODE_LUT, 9),
139 };
140
bc_expand_lut(unsigned char c)141 static inline unsigned short bc_expand_lut(unsigned char c)
142 {
143 return (c & 0x0f) | (c & 0xf0) << 4;
144 }
145
146 /* Writes the character representation of the number encoded in value.
147 * E.g. if value = 1234, then the string "1234" will be written to str. */
bc_write_bcd_representation(uint32_t value,char * str)148 void bc_write_bcd_representation(uint32_t value, char *str)
149 {
150 uint32_t upper = value / 100; /* e.g. 12 */
151 uint32_t lower = value % 100; /* e.g. 34 */
152
153 #if BC_LITTLE_ENDIAN
154 /* Note: little endian, so `lower` comes before `upper`! */
155 uint32_t digits = bc_expand_lut(LUT[lower]) << 16 | bc_expand_lut(LUT[upper]);
156 #else
157 /* Note: big endian, so `upper` comes before `lower`! */
158 uint32_t digits = bc_expand_lut(LUT[upper]) << 16 | bc_expand_lut(LUT[lower]);
159 #endif
160 memcpy(str, &digits, sizeof(digits));
161 }
162