1 /*
2 * IR - Lightweight JIT Compilation Framework
3 * (Folding engine generator)
4 * Copyright (C) 2022 Zend by Perforce.
5 * Authors: Dmitry Stogov <dmitry@php.net>
6 *
7 * Based on Mike Pall's implementation for LuaJIT.
8 */
9
10 #include "ir.h"
11 #include <string.h>
12
13 #include "ir_strtab.c"
14
15 #define MAX_RULES 2048
16 #define MAX_SLOTS (MAX_RULES * 4)
17
18 #define USE_SEMI_PERFECT_HASH 1
19 #define USE_SHL_HASH 1
20 #define USE_ROL_HASH 0
21
22 static ir_strtab strtab;
23
print_hash(uint32_t * mask,uint32_t count)24 void print_hash(uint32_t *mask, uint32_t count)
25 {
26 uint32_t i;
27
28 printf("static const uint32_t _ir_fold_hash[%d] = {\n", count + 1);
29 for (i = 0; i < count; i++) {
30 printf("\t0x%08x,\n", mask[i]);
31 }
32 printf("\t0x%08x\n", 0);
33 printf("};\n\n");
34 }
35
36 #if USE_SHL_HASH
hash_shl2(uint32_t mask,uint32_t r1,uint32_t r2)37 static uint32_t hash_shl2(uint32_t mask, uint32_t r1, uint32_t r2)
38 {
39 return ((mask << r1) - mask) << r2;
40 }
41 #endif
42
43 #if USE_ROL_HASH
44 #define ir_rol(x, n) (((x)<<(n)) | ((x)>>(-(int)(n)&(8*sizeof(x)-1))))
45 #define ir_ror(x, n) (((x)<<(-(int)(n)&(8*sizeof(x)-1))) | ((x)>>(n)))
46
hash_rol2(uint32_t mask,uint32_t r1,uint32_t r2)47 static uint32_t hash_rol2(uint32_t mask, uint32_t r1, uint32_t r2)
48 {
49 return ir_rol((ir_rol(mask, r1) - mask), r2);
50 }
51 #endif
52
53 /* Find a perfect hash function */
find_hash(uint32_t * mask,uint32_t count)54 int find_hash(uint32_t *mask, uint32_t count)
55 {
56 uint32_t hash[MAX_SLOTS];
57 uint32_t n, r1, r2, i, h;
58
59 for (n = (count | 1); n < MAX_SLOTS; n += 2) {
60 #if USE_SEMI_PERFECT_HASH
61 int semi_perfect = 0;
62 #endif
63
64 for (r1 = 0; r1 < 31; r1++) {
65 for (r2 = 0; r2 < 32; r2++) {
66 #if USE_SHL_HASH
67 memset(hash, 0, n * sizeof(uint32_t));
68 for (i = 0; i < count; i++) {
69 h = hash_shl2(mask[i] & 0x1fffff, r1, r2) % n;
70 if (hash[h]) {
71 #if USE_SEMI_PERFECT_HASH
72 h++;
73 if (!hash[h]) {
74 hash[h] = mask[i];
75 semi_perfect = 1;
76 continue;
77 }
78 #endif
79 break; /* collision */
80 }
81 hash[h] = mask[i];
82 }
83 if (i == count) {
84 print_hash(hash, n);
85 #if USE_SEMI_PERFECT_HASH
86 if (semi_perfect) {
87 printf("#define IR_FOLD_SEMI_PERFECT_HASH\n\n");
88 }
89 #endif
90 printf("static uint32_t _ir_fold_hashkey(uint32_t h)\n{\n\treturn (((h << %d) - h) << %d) %% %d;\n}\n", r1, r2, n);
91 return 1;
92 }
93 #endif
94 #if USE_ROL_HASH
95 memset(hash, 0, n * sizeof(uint32_t));
96 for (i = 0; i < count; i++) {
97 h = hash_rol2(mask[i] & 0x1fffff, r1, r2) % n;
98 if (hash[h]) {
99 #if USE_SEMI_PERFECT_HASH
100 h++;
101 if (!hash[h]) {
102 hash[h] = mask[i];
103 semi_perfect = 1;
104 continue;
105 }
106 #endif
107 break; /* collision */
108 }
109 hash[h] = mask[i];
110 }
111 if (i == count) {
112 print_hash(hash, n);
113 #if USE_SEMI_PERFECT_HASH
114 if (semi_perfect) {
115 printf("#define IR_FOLD_SEMI_PERFECT_HASH\n\n");
116 }
117 #endif
118 printf("static uint32_t _ir_fold_hashkey(uint32_t h)\n{\nreturn ir_rol32((ir_rol32(h, %d) - h), %d) %% %d;\n}\n", r1, r2, n);
119 return 1;
120 }
121 #endif
122 }
123 }
124 }
125
126 hash[0] = 0;
127 print_hash(hash, 1);
128 printf("static uint32_t _ir_fold_hashkey(uint32_t h)\n{\n\treturn 0;\n}\n");
129 return 0;
130 }
131
find_op(const char * s,size_t len)132 static int find_op(const char *s, size_t len)
133 {
134 return ir_strtab_find(&strtab, s, (uint8_t)len) - 1;
135 }
136
parse_rule(const char * buf)137 static int parse_rule(const char *buf)
138 {
139 const char *p = buf + sizeof("IR_FOLD(") - 1;
140 const char *q;
141 int op, mask;
142
143 while (*p == ' ' || *p == '\t') {
144 p++;
145 }
146 if (*p < 'A' || *p > 'Z') {
147 return 0;
148 }
149 q = p + 1;
150 while ((*q >= 'A' && *q <= 'Z')
151 || (*q >= '0' && *q <= '9')
152 || *q == '_') {
153 q++;
154 }
155 op = find_op(p, q - p);
156 if (op < 0) {
157 return 0;
158 }
159 mask = op;
160
161 while (*q == ' ' || *q == '\t') {
162 q++;
163 }
164 if (*q == ')') {
165 return mask; /* unused operands */
166 } else if (*q != '(') {
167 return 0;
168 }
169
170 p = q + 1;
171 while (*p == ' ' || *p == '\t') {
172 p++;
173 }
174 if (*p == '_') {
175 q = p + 1;
176 } else if (*p >= 'A' && *p <= 'Z') {
177 q = p + 1;
178 while ((*q >= 'A' && *q <= 'Z')
179 || (*q >= '0' && *q <= '9')
180 || *q == '_') {
181 q++;
182 }
183 op = find_op(p, q - p);
184 if (op < 0) {
185 return 0;
186 }
187 mask |= op << 7;
188 } else {
189 return 0;
190 }
191
192 while (*q == ' ' || *q == '\t') {
193 q++;
194 }
195 if (*q == ')') {
196 return mask; /* unused op2 */
197 } else if (*q != ',') {
198 return 0;
199 }
200
201 p = q + 1;
202 while (*p == ' ' || *p == '\t') {
203 p++;
204 }
205 if (*p == '_') {
206 q = p + 1;
207 } else if (*p >= 'A' && *p <= 'Z') {
208 q = p + 1;
209 while ((*q >= 'A' && *q <= 'Z')
210 || (*q >= '0' && *q <= '9')
211 || *q == '_') {
212 q++;
213 }
214 op = find_op(p, q - p);
215 if (op < 0) {
216 return 0;
217 }
218 mask |= op << 14;
219 } else {
220 return 0;
221 }
222
223 while (*q == ' ' || *q == '\t') {
224 q++;
225 }
226 if (*q != ')') {
227 return 0;
228 }
229
230 q++;
231 while (*q == ' ' || *q == '\t') {
232 q++;
233 }
234 if (*q != ')') {
235 return 0;
236 }
237
238 return mask;
239 }
240
main(int argc,char ** argv)241 int main(int argc, char **argv)
242 {
243 char buf[4096];
244 FILE *f = stdin;
245 int line = 0;
246 int rules = 0;
247 int i;
248 uint32_t mask[MAX_RULES];
249 uint32_t rule[MAX_RULES];
250
251 ir_strtab_init(&strtab, IR_LAST_OP, 0);
252
253 #define IR_OP_ADD(name, flags, op1, op2, op3) \
254 ir_strtab_lookup(&strtab, #name, sizeof(#name) - 1, IR_ ## name + 1);
255
256 IR_OPS(IR_OP_ADD)
257
258 ir_strtab_lookup(&strtab, "C_INTPTR", sizeof("C_INTPTR") - 1, IR_C_INTPTR + 1);
259 ir_strtab_lookup(&strtab, "C_UINTPTR", sizeof("C_IINTPTR") - 1, IR_C_UINTPTR + 1);
260
261 while (fgets(buf, sizeof(buf) - 1, f)) {
262 size_t len = strlen(buf);
263 if (len > 0 && (buf[len - 1] == '\r' || buf[len - 1] == '\n')) {
264 buf[len - 1] = 0;
265 len--;
266 line++;
267 }
268 if (len >= sizeof("IR_FOLD(")-1
269 && memcmp(buf, "IR_FOLD(", sizeof("IR_FOLD(")-1) == 0) {
270 if (rules >= MAX_RULES) {
271 fprintf(stderr, "ERROR: Too many rules\n");
272 return 1;
273 }
274 i = parse_rule(buf);
275 if (!i) {
276 fprintf(stderr, "ERROR: Incorrect '%s' rule on line %d\n", buf, line);
277 return 1;
278 }
279 // TODO: few masks may share the same rule ???
280 rule[rules] = line;
281 mask[rules] = i | (rules << 21);
282 rules++;
283 }
284 }
285 ir_strtab_free(&strtab);
286
287 #if 0
288 for (i = 0; i < rules; i++) {
289 printf("0x%08x\n", mask[i]);
290 }
291 #endif
292
293 printf("/* This file is generated from \"ir_fold.h\". Do not edit! */\n\n");
294 printf("typedef enum _ir_fold_rule_id {\n");
295 for (i = 0; i < rules; i++) {
296 printf("\tIR_RULE_%d,\n", rule[i]);
297 }
298 printf("\t_IR_RULE_LAST\n");
299 printf("} ir_fold_rule_id;\n\n");
300
301 if (!find_hash(mask, rules)) {
302 fprintf(stderr, "ERROR: Cannot find a good hash function\n");
303 return 1;
304 }
305
306 return 0;
307 }
308