xref: /php-src/ext/opcache/jit/ir/gen_ir_fold_hash.c (revision eb513503)
1 /*
2  * IR - Lightweight JIT Compilation Framework
3  * (Folding engine generator)
4  * Copyright (C) 2022 Zend by Perforce.
5  * Authors: Dmitry Stogov <dmitry@php.net>
6  *
7  * Based on Mike Pall's implementation for LuaJIT.
8  */
9 
10 #include "ir.h"
11 #include <string.h>
12 
13 #include "ir_strtab.c"
14 
15 #define MAX_RULES 2048
16 #define MAX_SLOTS (MAX_RULES * 4)
17 
18 #define USE_SEMI_PERFECT_HASH 1
19 #define USE_SHL_HASH 1
20 #define USE_ROL_HASH 0
21 
22 static ir_strtab strtab;
23 
print_hash(uint32_t * mask,uint32_t count)24 void print_hash(uint32_t *mask, uint32_t count)
25 {
26 	uint32_t i;
27 
28 	printf("static const uint32_t _ir_fold_hash[%d] = {\n", count + 1);
29 	for (i = 0; i < count; i++) {
30 		printf("\t0x%08x,\n", mask[i]);
31 	}
32 	printf("\t0x%08x\n", 0);
33 	printf("};\n\n");
34 }
35 
36 #if USE_SHL_HASH
hash_shl2(uint32_t mask,uint32_t r1,uint32_t r2)37 static uint32_t hash_shl2(uint32_t mask, uint32_t r1, uint32_t r2)
38 {
39 	return ((mask << r1) - mask) << r2;
40 }
41 #endif
42 
43 #if USE_ROL_HASH
44 #define ir_rol(x, n)	(((x)<<(n)) | ((x)>>(-(int)(n)&(8*sizeof(x)-1))))
45 #define ir_ror(x, n)	(((x)<<(-(int)(n)&(8*sizeof(x)-1))) | ((x)>>(n)))
46 
hash_rol2(uint32_t mask,uint32_t r1,uint32_t r2)47 static uint32_t hash_rol2(uint32_t mask, uint32_t r1, uint32_t r2)
48 {
49 	return ir_rol((ir_rol(mask, r1) - mask), r2);
50 }
51 #endif
52 
53 /* Find a perfect hash function */
find_hash(uint32_t * mask,uint32_t count)54 int find_hash(uint32_t *mask, uint32_t count)
55 {
56 	uint32_t hash[MAX_SLOTS];
57 	uint32_t n, r1, r2, i, h;
58 
59 	for (n = (count | 1); n < MAX_SLOTS; n += 2) {
60 #if USE_SEMI_PERFECT_HASH
61 		int semi_perfect = 0;
62 #endif
63 
64 		for (r1 = 0; r1 < 31; r1++) {
65 			for (r2 = 0; r2 < 32; r2++) {
66 #if USE_SHL_HASH
67 				memset(hash, 0, n * sizeof(uint32_t));
68 				for (i = 0; i < count; i++) {
69 					h = hash_shl2(mask[i] & 0x1fffff, r1, r2) % n;
70 					if (hash[h]) {
71 #if USE_SEMI_PERFECT_HASH
72 						h++;
73 						if (!hash[h]) {
74 							hash[h] = mask[i];
75 							semi_perfect = 1;
76 							continue;
77 						}
78 #endif
79 						break; /* collision */
80 					}
81 					hash[h] = mask[i];
82 				}
83 				if (i == count) {
84 					print_hash(hash, n);
85 #if USE_SEMI_PERFECT_HASH
86 					if (semi_perfect) {
87 						printf("#define IR_FOLD_SEMI_PERFECT_HASH\n\n");
88 					}
89 #endif
90 					printf("static uint32_t _ir_fold_hashkey(uint32_t h)\n{\n\treturn (((h << %d) - h) << %d) %% %d;\n}\n", r1, r2, n);
91 					return 1;
92 				}
93 #endif
94 #if USE_ROL_HASH
95 				memset(hash, 0, n * sizeof(uint32_t));
96 				for (i = 0; i < count; i++) {
97 					h = hash_rol2(mask[i] & 0x1fffff, r1, r2) % n;
98 					if (hash[h]) {
99 #if USE_SEMI_PERFECT_HASH
100 						h++;
101 						if (!hash[h]) {
102 							hash[h] = mask[i];
103 							semi_perfect = 1;
104 							continue;
105 						}
106 #endif
107 						break; /* collision */
108 					}
109 					hash[h] = mask[i];
110 				}
111 				if (i == count) {
112 					print_hash(hash, n);
113 #if USE_SEMI_PERFECT_HASH
114 					if (semi_perfect) {
115 						printf("#define IR_FOLD_SEMI_PERFECT_HASH\n\n");
116 					}
117 #endif
118 					printf("static uint32_t _ir_fold_hashkey(uint32_t h)\n{\nreturn ir_rol32((ir_rol32(h, %d) - h), %d) %% %d;\n}\n", r1, r2, n);
119 					return 1;
120 				}
121 #endif
122 			}
123 		}
124 	}
125 
126 	hash[0] = 0;
127 	print_hash(hash, 1);
128 	printf("static uint32_t _ir_fold_hashkey(uint32_t h)\n{\n\treturn 0;\n}\n");
129 	return 0;
130 }
131 
find_op(const char * s,size_t len)132 static int find_op(const char *s, size_t len)
133 {
134 	return ir_strtab_find(&strtab, s, (uint8_t)len) - 1;
135 }
136 
parse_rule(const char * buf)137 static int parse_rule(const char *buf)
138 {
139 	const char *p = buf + sizeof("IR_FOLD(") - 1;
140 	const char *q;
141 	int op, mask;
142 
143 	while (*p == ' ' || *p == '\t') {
144 		p++;
145 	}
146 	if (*p < 'A' || *p > 'Z') {
147 		return 0;
148 	}
149 	q = p + 1;
150 	while ((*q >= 'A' && *q <= 'Z')
151 	 || (*q >= '0' && *q <= '9')
152 	 || *q == '_') {
153 		q++;
154 	}
155 	op = find_op(p, q - p);
156 	if (op < 0) {
157 		return 0;
158 	}
159 	mask = op;
160 
161 	while (*q == ' ' || *q == '\t') {
162 		q++;
163 	}
164 	if (*q == ')') {
165 		return mask; /* unused operands */
166 	} else if (*q != '(') {
167 		return 0;
168 	}
169 
170 	p = q + 1;
171 	while (*p == ' ' || *p == '\t') {
172 		p++;
173 	}
174 	if (*p == '_') {
175 		q = p + 1;
176 	} else if (*p >= 'A' && *p <= 'Z') {
177 		q = p + 1;
178 		while ((*q >= 'A' && *q <= 'Z')
179 		 || (*q >= '0' && *q <= '9')
180 		 || *q == '_') {
181 			q++;
182 		}
183 		op = find_op(p, q - p);
184 		if (op < 0) {
185 			return 0;
186 		}
187 		mask |= op << 7;
188 	} else {
189 		return 0;
190 	}
191 
192 	while (*q == ' ' || *q == '\t') {
193 		q++;
194 	}
195 	if (*q == ')') {
196 		return mask; /* unused op2 */
197 	} else if (*q != ',') {
198 		return 0;
199 	}
200 
201 	p = q + 1;
202 	while (*p == ' ' || *p == '\t') {
203 		p++;
204 	}
205 	if (*p == '_') {
206 		q = p + 1;
207 	} else if (*p >= 'A' && *p <= 'Z') {
208 		q = p + 1;
209 		while ((*q >= 'A' && *q <= 'Z')
210 		 || (*q >= '0' && *q <= '9')
211 		 || *q == '_') {
212 			q++;
213 		}
214 		op = find_op(p, q - p);
215 		if (op < 0) {
216 			return 0;
217 		}
218 		mask |= op << 14;
219 	} else {
220 		return 0;
221 	}
222 
223 	while (*q == ' ' || *q == '\t') {
224 		q++;
225 	}
226 	if (*q != ')') {
227 		return 0;
228 	}
229 
230 	q++;
231 	while (*q == ' ' || *q == '\t') {
232 		q++;
233 	}
234 	if (*q != ')') {
235 		return 0;
236 	}
237 
238 	return mask;
239 }
240 
main(int argc,char ** argv)241 int main(int argc, char **argv)
242 {
243 	char buf[4096];
244 	FILE *f = stdin;
245 	int line = 0;
246 	int rules = 0;
247 	int i;
248 	uint32_t mask[MAX_RULES];
249 	uint32_t rule[MAX_RULES];
250 
251 	ir_strtab_init(&strtab, IR_LAST_OP, 0);
252 
253 #define IR_OP_ADD(name, flags, op1, op2, op3) \
254 	ir_strtab_lookup(&strtab, #name, sizeof(#name) - 1, IR_ ## name + 1);
255 
256 	IR_OPS(IR_OP_ADD)
257 
258 	ir_strtab_lookup(&strtab, "C_INTPTR",  sizeof("C_INTPTR") - 1, IR_C_INTPTR + 1);
259 	ir_strtab_lookup(&strtab, "C_UINTPTR", sizeof("C_IINTPTR") - 1, IR_C_UINTPTR + 1);
260 
261 	while (fgets(buf, sizeof(buf) - 1, f)) {
262 		size_t len = strlen(buf);
263 		if (len > 0 && (buf[len - 1] == '\r' || buf[len - 1] == '\n')) {
264 			buf[len - 1] = 0;
265 			len--;
266 			line++;
267 		}
268 		if (len >= sizeof("IR_FOLD(")-1
269 		 && memcmp(buf, "IR_FOLD(", sizeof("IR_FOLD(")-1) == 0) {
270 		    if (rules >= MAX_RULES) {
271 				fprintf(stderr, "ERROR: Too many rules\n");
272 				return 1;
273 			}
274 			i = parse_rule(buf);
275 			if (!i) {
276 				fprintf(stderr, "ERROR: Incorrect '%s' rule on line %d\n", buf, line);
277 				return 1;
278 			}
279 			// TODO: few masks may share the same rule ???
280 			rule[rules] = line;
281 			mask[rules] = i | (rules << 21);
282 			rules++;
283 		}
284 	}
285 	ir_strtab_free(&strtab);
286 
287 #if 0
288 	for (i = 0; i < rules; i++) {
289 		printf("0x%08x\n", mask[i]);
290 	}
291 #endif
292 
293 	printf("/* This file is generated from \"ir_fold.h\". Do not edit! */\n\n");
294 	printf("typedef enum _ir_fold_rule_id {\n");
295 	for (i = 0; i < rules; i++) {
296 		printf("\tIR_RULE_%d,\n", rule[i]);
297 	}
298 	printf("\t_IR_RULE_LAST\n");
299 	printf("} ir_fold_rule_id;\n\n");
300 
301 	if (!find_hash(mask, rules)) {
302 		fprintf(stderr, "ERROR: Cannot find a good hash function\n");
303 		return 1;
304 	}
305 
306 	return 0;
307 }
308