1 /*
2 * IR - Lightweight JIT Compilation Framework
3 * (Folding engine generator)
4 * Copyright (C) 2022 Zend by Perforce.
5 * Authors: Dmitry Stogov <dmitry@php.net>
6 *
7 * Based on Mike Pall's implementation for LuaJIT.
8 */
9
10 #include "ir.h"
11 #include <string.h>
12
13 #include "ir_strtab.c"
14
15 #define MAX_RULES 2048
16 #define MAX_SLOTS (MAX_RULES * 4)
17
18 static ir_strtab strtab;
19
print_hash(uint32_t * mask,uint32_t count)20 void print_hash(uint32_t *mask, uint32_t count)
21 {
22 uint32_t i;
23
24 printf("static const uint32_t _ir_fold_hash[%d] = {\n", count);
25 for (i = 0; i < count; i++) {
26 printf("\t0x%08x,\n", mask[i]);
27 }
28 printf("};\n\n");
29 }
30
hash_shl2(uint32_t mask,uint32_t r1,uint32_t r2)31 static uint32_t hash_shl2(uint32_t mask, uint32_t r1, uint32_t r2)
32 {
33 return ((mask << r1) - mask) << r2;
34 }
35
36 #if 0
37 #define ir_rol(x, n) (((x)<<(n)) | ((x)>>(-(int)(n)&(8*sizeof(x)-1))))
38 #define ir_ror(x, n) (((x)<<(-(int)(n)&(8*sizeof(x)-1))) | ((x)>>(n)))
39
40 static uint32_t hash_rol2(uint32_t mask, uint32_t r1, uint32_t r2)
41 {
42 return ir_rol((ir_rol(mask, r1) - mask), r2);
43 }
44 #endif
45
46 /* Find a perfect hash function */
find_hash(uint32_t * mask,uint32_t count)47 int find_hash(uint32_t *mask, uint32_t count)
48 {
49 uint32_t hash[MAX_SLOTS];
50 uint32_t n, r1, r2, i, h;
51
52 for (n = (count | 1); n < MAX_SLOTS; n += 2) {
53 for (r1 = 0; r1 < 31; r1++) {
54 for (r2 = 0; r2 < 32; r2++) {
55 memset(hash, 0, n * sizeof(uint32_t));
56 for (i = 0; i < count; i++) {
57 h = hash_shl2(mask[i] & 0x1fffff, r1, r2) % n;
58 if (hash[h]) break; /* collision */
59 hash[h] = mask[i];
60 }
61 if (i == count) {
62 print_hash(hash, n);
63 printf("static uint32_t _ir_fold_hashkey(uint32_t h)\n{\n\treturn (((h << %d) - h) << %d) %% %d;\n}\n", r1, r2, n);
64 return 1;
65 }
66 #if 0
67 memset(hash, 0, n * sizeof(uint32_t));
68 for (i = 0; i < count; i++) {
69 h = hash_rol2(mask[i] & 0x1fffff, r1, r2) % n;
70 if (hash[h]) break; /* collision */
71 hash[h] = mask[i];
72 }
73 if (i == count) {
74 print_hash(hash, n);
75 printf("static uint32_t _ir_fold_hashkey(uint32_t h)\n{\nreturn 0; /*rol2(%u,%u,%u)*/\n}\n", r1, r2, n);
76 return 1;
77 }
78 #endif
79 }
80 }
81 }
82
83 hash[0] = 0;
84 print_hash(hash, 1);
85 printf("static uint32_t _ir_fold_hashkey(uint32_t h)\n{\n\treturn 0;\n}\n");
86 return 0;
87 }
88
find_op(const char * s,size_t len)89 static int find_op(const char *s, size_t len)
90 {
91 return ir_strtab_find(&strtab, s, (uint8_t)len) - 1;
92 }
93
parse_rule(const char * buf)94 static int parse_rule(const char *buf)
95 {
96 const char *p = buf + sizeof("IR_FOLD(") - 1;
97 const char *q;
98 int op, mask;
99
100 while (*p == ' ' || *p == '\t') {
101 p++;
102 }
103 if (*p < 'A' || *p > 'Z') {
104 return 0;
105 }
106 q = p + 1;
107 while ((*q >= 'A' && *q <= 'Z')
108 || (*q >= '0' && *q <= '9')
109 || *q == '_') {
110 q++;
111 }
112 op = find_op(p, q - p);
113 if (op < 0) {
114 return 0;
115 }
116 mask = op;
117
118 while (*q == ' ' || *q == '\t') {
119 q++;
120 }
121 if (*q == ')') {
122 return mask; /* unused operands */
123 } else if (*q != '(') {
124 return 0;
125 }
126
127 p = q + 1;
128 while (*p == ' ' || *p == '\t') {
129 p++;
130 }
131 if (*p == '_') {
132 q = p + 1;
133 } else if (*p >= 'A' && *p <= 'Z') {
134 q = p + 1;
135 while ((*q >= 'A' && *q <= 'Z')
136 || (*q >= '0' && *q <= '9')
137 || *q == '_') {
138 q++;
139 }
140 op = find_op(p, q - p);
141 if (op < 0) {
142 return 0;
143 }
144 mask |= op << 7;
145 } else {
146 return 0;
147 }
148
149 while (*q == ' ' || *q == '\t') {
150 q++;
151 }
152 if (*q == ')') {
153 return mask; /* unused op2 */
154 } else if (*q != ',') {
155 return 0;
156 }
157
158 p = q + 1;
159 while (*p == ' ' || *p == '\t') {
160 p++;
161 }
162 if (*p == '_') {
163 q = p + 1;
164 } else if (*p >= 'A' && *p <= 'Z') {
165 q = p + 1;
166 while ((*q >= 'A' && *q <= 'Z')
167 || (*q >= '0' && *q <= '9')
168 || *q == '_') {
169 q++;
170 }
171 op = find_op(p, q - p);
172 if (op < 0) {
173 return 0;
174 }
175 mask |= op << 14;
176 } else {
177 return 0;
178 }
179
180 while (*q == ' ' || *q == '\t') {
181 q++;
182 }
183 if (*q != ')') {
184 return 0;
185 }
186
187 q++;
188 while (*q == ' ' || *q == '\t') {
189 q++;
190 }
191 if (*q != ')') {
192 return 0;
193 }
194
195 return mask;
196 }
197
main()198 int main()
199 {
200 char buf[4096];
201 FILE *f = stdin;
202 int line = 0;
203 int rules = 0;
204 int i;
205 uint32_t mask[MAX_RULES];
206 uint32_t rule[MAX_RULES];
207
208 ir_strtab_init(&strtab, IR_LAST_OP, 0);
209
210 #define IR_OP_ADD(name, flags, op1, op2, op3) \
211 ir_strtab_lookup(&strtab, #name, sizeof(#name) - 1, IR_ ## name + 1);
212
213 IR_OPS(IR_OP_ADD)
214
215 while (fgets(buf, sizeof(buf) - 1, f)) {
216 size_t len = strlen(buf);
217 if (len > 0 && (buf[len - 1] == '\r' || buf[len - 1] == '\n')) {
218 buf[len - 1] = 0;
219 len--;
220 line++;
221 }
222 if (len >= sizeof("IR_FOLD(")-1
223 && memcmp(buf, "IR_FOLD(", sizeof("IR_FOLD(")-1) == 0) {
224 if (rules >= MAX_RULES) {
225 fprintf(stderr, "ERROR: Too many rules\n");
226 return 1;
227 }
228 i = parse_rule(buf);
229 if (!i) {
230 fprintf(stderr, "ERROR: Incorrect '%s' rule on line %d\n", buf, line);
231 return 1;
232 }
233 // TODO: few masks may share the same rule ???
234 rule[rules] = line;
235 mask[rules] = i | (rules << 21);
236 rules++;
237 }
238 }
239 ir_strtab_free(&strtab);
240
241 #if 0
242 for (i = 0; i < rules; i++) {
243 printf("0x%08x\n", mask[i]);
244 }
245 #endif
246
247 printf("/* This file is generated from \"ir_fold.h\". Do not edit! */\n\n");
248 printf("typedef enum _ir_fold_rule_id {\n");
249 for (i = 0; i < rules; i++) {
250 printf("\tIR_RULE_%d,\n", rule[i]);
251 }
252 printf("\t_IR_RULE_LAST\n");
253 printf("} ir_fold_rule_id;\n\n");
254
255 if (!find_hash(mask, rules)) {
256 fprintf(stderr, "ERROR: Cannot find a good hash function\n");
257 return 1;
258 }
259
260 return 0;
261 }
262