xref: /php-src/ext/opcache/jit/ir/ir_private.h (revision bb21d195)
1 /*
2  * IR - Lightweight JIT Compilation Framework
3  * (Common data structures and non public definitions)
4  * Copyright (C) 2022 Zend by Perforce.
5  * Authors: Dmitry Stogov <dmitry@php.net>
6  */
7 
8 #ifndef IR_PRIVATE_H
9 #define IR_PRIVATE_H
10 #include <string.h>
11 #include <stdlib.h>
12 
13 #ifdef IR_DEBUG
14 # include <assert.h>
15 # define IR_ASSERT(x) assert(x)
16 #else
17 # define IR_ASSERT(x)
18 #endif
19 
20 #ifdef _WIN32
21 # include <intrin.h>
22 # ifdef _M_X64
23 #  pragma intrinsic(_BitScanForward64)
24 #  pragma intrinsic(_BitScanReverse64)
25 # endif
26 # pragma intrinsic(_BitScanForward)
27 # pragma intrinsic(_BitScanReverse)
28 #endif
29 
30 #ifdef __has_builtin
31 # if __has_builtin(__builtin_expect)
32 #   define EXPECTED(condition)   __builtin_expect(!!(condition), 1)
33 #   define UNEXPECTED(condition) __builtin_expect(!!(condition), 0)
34 # endif
35 # if __has_attribute(__aligned__)
36 #  define IR_SET_ALIGNED(alignment, decl) decl __attribute__ ((__aligned__ (alignment)))
37 # endif
38 # if __has_attribute(__fallthrough__)
39 #  define IR_FALLTHROUGH __attribute__((__fallthrough__))
40 # endif
41 #elif defined(_WIN32)
42 # define IR_SET_ALIGNED(alignment, decl) __declspec(align(alignment)) decl
43 #else /* GCC prior to 10 or non-clang/msvc compilers */
44 #define __has_builtin(x) 0
45 #endif
46 #ifndef EXPECTED
47 # define EXPECTED(condition)   (condition)
48 # define UNEXPECTED(condition) (condition)
49 #endif
50 #ifndef IR_SET_ALIGNED
51 #  define IR_SET_ALIGNED(alignment, decl) decl
52 #endif
53 #ifndef IR_FALLTHROUGH
54 #  define IR_FALLTHROUGH ((void)0)
55 #endif
56 
57 /*** Helper routines ***/
58 
59 #define IR_ALIGNED_SIZE(size, alignment) \
60 	(((size) + ((alignment) - 1)) & ~((alignment) - 1))
61 
62 #define IR_MAX(a, b)          (((a) > (b)) ? (a) : (b))
63 #define IR_MIN(a, b)          (((a) < (b)) ? (a) : (b))
64 
65 #define IR_IS_POWER_OF_TWO(x) (!((x) & ((x) - 1)))
66 
67 #define IR_LOG2(x) ir_ntzl(x)
68 
ir_rol8(uint8_t op1,uint8_t op2)69 IR_ALWAYS_INLINE uint8_t ir_rol8(uint8_t op1, uint8_t op2)
70 {
71 	return (op1 << op2) | (op1 >> (8 - op2));
72 }
73 
ir_rol16(uint16_t op1,uint16_t op2)74 IR_ALWAYS_INLINE uint16_t ir_rol16(uint16_t op1, uint16_t op2)
75 {
76 	return (op1 << op2) | (op1 >> (16 - op2));
77 }
78 
ir_rol32(uint32_t op1,uint32_t op2)79 IR_ALWAYS_INLINE uint32_t ir_rol32(uint32_t op1, uint32_t op2)
80 {
81 	return (op1 << op2) | (op1 >> (32 - op2));
82 }
83 
ir_rol64(uint64_t op1,uint64_t op2)84 IR_ALWAYS_INLINE uint64_t ir_rol64(uint64_t op1, uint64_t op2)
85 {
86 	return (op1 << op2) | (op1 >> (64 - op2));
87 }
88 
ir_ror8(uint8_t op1,uint8_t op2)89 IR_ALWAYS_INLINE uint8_t ir_ror8(uint8_t op1, uint8_t op2)
90 {
91 	return (op1 >> op2) | (op1 << (8 - op2));
92 }
93 
ir_ror16(uint16_t op1,uint16_t op2)94 IR_ALWAYS_INLINE uint16_t ir_ror16(uint16_t op1, uint16_t op2)
95 {
96 	return (op1 >> op2) | (op1 << (16 - op2));
97 }
98 
ir_ror32(uint32_t op1,uint32_t op2)99 IR_ALWAYS_INLINE uint32_t ir_ror32(uint32_t op1, uint32_t op2)
100 {
101 	return (op1 >> op2) | (op1 << (32 - op2));
102 }
103 
ir_ror64(uint64_t op1,uint64_t op2)104 IR_ALWAYS_INLINE uint64_t ir_ror64(uint64_t op1, uint64_t op2)
105 {
106 	return (op1 >> op2) | (op1 << (64 - op2));
107 }
108 
109 /* Number of trailing zero bits (0x01 -> 0; 0x40 -> 6; 0x00 -> LEN) */
ir_ntz(uint32_t num)110 IR_ALWAYS_INLINE uint32_t ir_ntz(uint32_t num)
111 {
112 #if (defined(__GNUC__) || __has_builtin(__builtin_ctz))
113 	return __builtin_ctz(num);
114 #elif defined(_WIN32)
115 	uint32_t index;
116 
117 	if (!_BitScanForward(&index, num)) {
118 		/* undefined behavior */
119 		return 32;
120 	}
121 
122 	return index;
123 #else
124 	int n;
125 
126 	if (num == 0) return 32;
127 
128 	n = 1;
129 	if ((num & 0x0000ffff) == 0) {n += 16; num = num >> 16;}
130 	if ((num & 0x000000ff) == 0) {n +=  8; num = num >>  8;}
131 	if ((num & 0x0000000f) == 0) {n +=  4; num = num >>  4;}
132 	if ((num & 0x00000003) == 0) {n +=  2; num = num >>  2;}
133 	return n - (num & 1);
134 #endif
135 }
136 
137 /* Number of trailing zero bits (0x01 -> 0; 0x40 -> 6; 0x00 -> LEN) */
ir_ntzl(uint64_t num)138 IR_ALWAYS_INLINE uint32_t ir_ntzl(uint64_t num)
139 {
140 #if (defined(__GNUC__) || __has_builtin(__builtin_ctzl))
141 	return __builtin_ctzl(num);
142 #elif defined(_WIN64)
143 	unsigned long index;
144 
145 	if (!_BitScanForward64(&index, num)) {
146 		/* undefined behavior */
147 		return 64;
148 	}
149 
150 	return (uint32_t) index;
151 #else
152 	uint32_t n;
153 
154 	if (num == 0) return 64;
155 
156 	n = 1;
157 	if ((num & 0xffffffff) == 0) {n += 32; num = num >> 32;}
158 	if ((num & 0x0000ffff) == 0) {n += 16; num = num >> 16;}
159 	if ((num & 0x000000ff) == 0) {n +=  8; num = num >>  8;}
160 	if ((num & 0x0000000f) == 0) {n +=  4; num = num >>  4;}
161 	if ((num & 0x00000003) == 0) {n +=  2; num = num >>  2;}
162 	return n - (uint32_t)(num & 1);
163 #endif
164 }
165 
166 /* Number of leading zero bits (Undefined for zero) */
ir_nlz(uint32_t num)167 IR_ALWAYS_INLINE int ir_nlz(uint32_t num)
168 {
169 #if (defined(__GNUC__) || __has_builtin(__builtin_clz))
170 	return __builtin_clz(num);
171 #elif defined(_WIN32)
172 	uint32_t index;
173 
174 	if (!_BitScanReverse(&index, num)) {
175 		/* undefined behavior */
176 		return 32;
177 	}
178 
179 	return (int) (32 - 1) - index;
180 #else
181 	uint32_t x;
182 	uint32_t n;
183 
184 	n = 32;
185 	x = num >> 16; if (x != 0) {n -= 16; num = x;}
186 	x = num >> 8;  if (x != 0) {n -=  8; num = x;}
187 	x = num >> 4;  if (x != 0) {n -=  4; num = x;}
188 	x = num >> 2;  if (x != 0) {n -=  2; num = x;}
189 	x = num >> 1;  if (x != 0) return n - 2;
190 	return n - num;
191 #endif
192 }
193 
ir_nlzl(uint64_t num)194 IR_ALWAYS_INLINE int ir_nlzl(uint64_t num)
195 {
196 #if (defined(__GNUC__) || __has_builtin(__builtin_clzll))
197 	return __builtin_clzll(num);
198 #elif defined(_WIN64)
199 	unsigned long index;
200 
201 	if (!_BitScanReverse64(&index, num)) {
202 		/* undefined behavior */
203 		return 64;
204 	}
205 
206 	return (int) (64 - 1) - index;
207 #else
208 	uint64_t x;
209 	uint32_t n;
210 
211 	n = 64;
212 	x = num >> 32; if (x != 0) {n -= 32; num = x;}
213 	x = num >> 16; if (x != 0) {n -= 16; num = x;}
214 	x = num >> 8;  if (x != 0) {n -=  8; num = x;}
215 	x = num >> 4;  if (x != 0) {n -=  4; num = x;}
216 	x = num >> 2;  if (x != 0) {n -=  2; num = x;}
217 	x = num >> 1;  if (x != 0) return n - 2;
218 	return n - (uint32_t)num;
219 #endif
220 }
221 
222 /*** Helper data types ***/
223 
224 /* Arena */
225 struct _ir_arena {
226 	char     *ptr;
227 	char     *end;
228 	ir_arena *prev;
229 };
230 
ir_arena_create(size_t size)231 IR_ALWAYS_INLINE ir_arena* ir_arena_create(size_t size)
232 {
233 	ir_arena *arena;
234 
235 	IR_ASSERT(size >= IR_ALIGNED_SIZE(sizeof(ir_arena), 8));
236 	arena = (ir_arena*)ir_mem_malloc(size);
237 	arena->ptr = (char*) arena + IR_ALIGNED_SIZE(sizeof(ir_arena), 8);
238 	arena->end = (char*) arena + size;
239 	arena->prev = NULL;
240 	return arena;
241 }
242 
ir_arena_free(ir_arena * arena)243 IR_ALWAYS_INLINE void ir_arena_free(ir_arena *arena)
244 {
245 	do {
246 		ir_arena *prev = arena->prev;
247 		ir_mem_free(arena);
248 		arena = prev;
249 	} while (arena);
250 }
251 
ir_arena_alloc(ir_arena ** arena_ptr,size_t size)252 IR_ALWAYS_INLINE void* ir_arena_alloc(ir_arena **arena_ptr, size_t size)
253 {
254 	ir_arena *arena = *arena_ptr;
255 	char *ptr = arena->ptr;
256 
257 	size = IR_ALIGNED_SIZE(size, 8);
258 
259 	if (EXPECTED(size <= (size_t)(arena->end - ptr))) {
260 		arena->ptr = ptr + size;
261 	} else {
262 		size_t arena_size =
263 			UNEXPECTED((size + IR_ALIGNED_SIZE(sizeof(ir_arena), 8)) > (size_t)(arena->end - (char*) arena)) ?
264 				(size + IR_ALIGNED_SIZE(sizeof(ir_arena), 8)) :
265 				(size_t)(arena->end - (char*) arena);
266 		ir_arena *new_arena = (ir_arena*)ir_mem_malloc(arena_size);
267 
268 		ptr = (char*) new_arena + IR_ALIGNED_SIZE(sizeof(ir_arena), 8);
269 		new_arena->ptr = (char*) new_arena + IR_ALIGNED_SIZE(sizeof(ir_arena), 8) + size;
270 		new_arena->end = (char*) new_arena + arena_size;
271 		new_arena->prev = arena;
272 		*arena_ptr = new_arena;
273 	}
274 
275 	return (void*) ptr;
276 }
277 
ir_arena_checkpoint(ir_arena * arena)278 IR_ALWAYS_INLINE void* ir_arena_checkpoint(ir_arena *arena)
279 {
280 	return arena->ptr;
281 }
282 
ir_release(ir_arena ** arena_ptr,void * checkpoint)283 IR_ALWAYS_INLINE void ir_release(ir_arena **arena_ptr, void *checkpoint)
284 {
285 	ir_arena *arena = *arena_ptr;
286 
287 	while (UNEXPECTED((char*)checkpoint > arena->end) ||
288 	       UNEXPECTED((char*)checkpoint <= (char*)arena)) {
289 		ir_arena *prev = arena->prev;
290 		ir_mem_free(arena);
291 		*arena_ptr = arena = prev;
292 	}
293 	IR_ASSERT((char*)checkpoint > (char*)arena && (char*)checkpoint <= arena->end);
294 	arena->ptr = (char*)checkpoint;
295 }
296 
297 /* Bitsets */
298 #if defined(IR_TARGET_X86)
299 # define IR_BITSET_BITS 32
300 # define IR_BITSET_ONE  1U
301 # define ir_bitset_base_t uint32_t
302 # define ir_bitset_ntz  ir_ntz
303 #else
304 # define IR_BITSET_BITS   64
305 # ifdef _M_X64 /* MSVC*/
306 #  define IR_BITSET_ONE    1ui64
307 # else
308 #  define IR_BITSET_ONE    1UL
309 # endif
310 # define ir_bitset_base_t uint64_t
311 # define ir_bitset_ntz    ir_ntzl
312 #endif
313 
314 typedef ir_bitset_base_t *ir_bitset;
315 
ir_bitset_len(uint32_t n)316 IR_ALWAYS_INLINE uint32_t ir_bitset_len(uint32_t n)
317 {
318 	return (n + (IR_BITSET_BITS - 1)) / IR_BITSET_BITS;
319 }
320 
ir_bitset_malloc(uint32_t n)321 IR_ALWAYS_INLINE ir_bitset ir_bitset_malloc(uint32_t n)
322 {
323 	return ir_mem_calloc(ir_bitset_len(n), IR_BITSET_BITS / 8);
324 }
325 
ir_bitset_incl(ir_bitset set,uint32_t n)326 IR_ALWAYS_INLINE void ir_bitset_incl(ir_bitset set, uint32_t n)
327 {
328 	set[n / IR_BITSET_BITS] |= IR_BITSET_ONE << (n % IR_BITSET_BITS);
329 }
330 
ir_bitset_excl(ir_bitset set,uint32_t n)331 IR_ALWAYS_INLINE void ir_bitset_excl(ir_bitset set, uint32_t n)
332 {
333 	set[n / IR_BITSET_BITS] &= ~(IR_BITSET_ONE << (n % IR_BITSET_BITS));
334 }
335 
ir_bitset_in(const ir_bitset set,uint32_t n)336 IR_ALWAYS_INLINE bool ir_bitset_in(const ir_bitset set, uint32_t n)
337 {
338 	return (set[(n / IR_BITSET_BITS)] & (IR_BITSET_ONE << (n % IR_BITSET_BITS))) != 0;
339 }
340 
ir_bitset_clear(ir_bitset set,uint32_t len)341 IR_ALWAYS_INLINE void ir_bitset_clear(ir_bitset set, uint32_t len)
342 {
343 	memset(set, 0, len * (IR_BITSET_BITS / 8));
344 }
345 
ir_bitset_fill(ir_bitset set,uint32_t len)346 IR_ALWAYS_INLINE void ir_bitset_fill(ir_bitset set, uint32_t len)
347 {
348 	memset(set, 0xff, len * (IR_BITSET_BITS / 8));
349 }
350 
ir_bitset_empty(const ir_bitset set,uint32_t len)351 IR_ALWAYS_INLINE bool ir_bitset_empty(const ir_bitset set, uint32_t len)
352 {
353 	uint32_t i;
354 	for (i = 0; i < len; i++) {
355 		if (set[i]) {
356 			return 0;
357 		}
358 	}
359 	return 1;
360 }
361 
ir_bitset_equal(const ir_bitset set1,const ir_bitset set2,uint32_t len)362 IR_ALWAYS_INLINE bool ir_bitset_equal(const ir_bitset set1, const ir_bitset set2, uint32_t len)
363 {
364     return memcmp(set1, set2, len * (IR_BITSET_BITS / 8)) == 0;
365 }
366 
ir_bitset_copy(ir_bitset set1,const ir_bitset set2,uint32_t len)367 IR_ALWAYS_INLINE void ir_bitset_copy(ir_bitset set1, const ir_bitset set2, uint32_t len)
368 {
369     memcpy(set1, set2, len * (IR_BITSET_BITS / 8));
370 }
371 
ir_bitset_intersection(ir_bitset set1,const ir_bitset set2,uint32_t len)372 IR_ALWAYS_INLINE void ir_bitset_intersection(ir_bitset set1, const ir_bitset set2, uint32_t len)
373 {
374     uint32_t i;
375 
376     for (i = 0; i < len; i++) {
377 		set1[i] &= set2[i];
378 	}
379 }
380 
ir_bitset_union(ir_bitset set1,const ir_bitset set2,uint32_t len)381 IR_ALWAYS_INLINE void ir_bitset_union(ir_bitset set1, const ir_bitset set2, uint32_t len)
382 {
383 	uint32_t i;
384 
385 	for (i = 0; i < len; i++) {
386 		set1[i] |= set2[i];
387 	}
388 }
389 
ir_bitset_difference(ir_bitset set1,const ir_bitset set2,uint32_t len)390 IR_ALWAYS_INLINE void ir_bitset_difference(ir_bitset set1, const ir_bitset set2, uint32_t len)
391 {
392 	uint32_t i;
393 
394 	for (i = 0; i < len; i++) {
395 		set1[i] = set1[i] & ~set2[i];
396 	}
397 }
398 
ir_bitset_is_subset(const ir_bitset set1,const ir_bitset set2,uint32_t len)399 IR_ALWAYS_INLINE bool ir_bitset_is_subset(const ir_bitset set1, const ir_bitset set2, uint32_t len)
400 {
401 	uint32_t i;
402 
403 	for (i = 0; i < len; i++) {
404 		if (set1[i] & ~set2[i]) {
405 			return 0;
406 		}
407 	}
408 	return 1;
409 }
410 
ir_bitset_first(const ir_bitset set,uint32_t len)411 IR_ALWAYS_INLINE int ir_bitset_first(const ir_bitset set, uint32_t len)
412 {
413 	uint32_t i;
414 
415 	for (i = 0; i < len; i++) {
416 		if (set[i]) {
417 			return IR_BITSET_BITS * i + ir_bitset_ntz(set[i]);
418 		}
419 	}
420 	return -1; /* empty set */
421 }
422 
ir_bitset_last(const ir_bitset set,uint32_t len)423 IR_ALWAYS_INLINE int ir_bitset_last(const ir_bitset set, uint32_t len)
424 {
425 	uint32_t i = len;
426 
427 	while (i > 0) {
428 		i--;
429 		if (set[i]) {
430 			uint32_t j = IR_BITSET_BITS * i - 1;
431 			ir_bitset_base_t x = set[i];
432 			do {
433 				x = x >> 1;
434 				j++;
435 			} while (x != 0);
436 			return j;
437 		}
438 	}
439 	return -1; /* empty set */
440 }
441 
ir_bitset_pop_first(ir_bitset set,uint32_t len)442 IR_ALWAYS_INLINE int ir_bitset_pop_first(ir_bitset set, uint32_t len)
443 {
444 	uint32_t i;
445 
446 	for (i = 0; i < len; i++) {
447 		ir_bitset_base_t x = set[i];
448 
449 		if (x) {
450 			int bit = IR_BITSET_BITS * i + ir_bitset_ntz(x);
451 			set[i] = x & (x - 1);
452 			return bit;
453 		}
454 	}
455 	return -1; /* empty set */
456 }
457 
458 #define IR_BITSET_FOREACH(set, len, bit) do { \
459 	ir_bitset _set = (set); \
460 	uint32_t _i, _len = (len); \
461 	for (_i = 0; _i < _len; _set++, _i++) { \
462 		ir_bitset_base_t _x = *_set; \
463 		while (_x) { \
464 			(bit) = IR_BITSET_BITS * _i + ir_bitset_ntz(_x); \
465 			_x &= _x - 1;
466 
467 #define IR_BITSET_FOREACH_DIFFERENCE(set1, set2, len, bit) do { \
468 	ir_bitset _set1 = (set1); \
469 	ir_bitset _set2 = (set2); \
470 	uint32_t _i, _len = (len); \
471 	for (_i = 0; _i < _len; _i++) { \
472 		ir_bitset_base_t _x = _set1[_i] & ~_set2[_i]; \
473 		while (_x) { \
474 			(bit) = IR_BITSET_BITS * _i + ir_bitset_ntz(_x); \
475 			_x &= _x - 1;
476 
477 #define IR_BITSET_FOREACH_END() \
478 		} \
479 	} \
480 } while (0)
481 
482 /* Sparse Set */
483 typedef struct _ir_sparse_set {
484 	uint32_t size;
485 	uint32_t len;
486 	uint32_t *data;
487 } ir_sparse_set;
488 
489 #define IR_SPARSE_SET_DENSE(set,  n) (set)->data[n]
490 #define IR_SPARSE_SET_SPARSE(set, n) (set)->data[-1 - ((int32_t)(n))]
491 
ir_sparse_set_init(ir_sparse_set * set,uint32_t size)492 IR_ALWAYS_INLINE void ir_sparse_set_init(ir_sparse_set *set, uint32_t size)
493 {
494 	set->size = size;
495 	set->len = 0;
496 	set->data = (uint32_t*)ir_mem_malloc(sizeof(uint32_t) * 2 * size) + size;
497 #if IR_DEBUG
498 	/* initialize sparse part to avoid valgrind warnings */
499 	memset(&IR_SPARSE_SET_SPARSE(set, size - 1), 0, size * sizeof(uint32_t));
500 #endif
501 }
502 
ir_sparse_set_clear(ir_sparse_set * set)503 IR_ALWAYS_INLINE void ir_sparse_set_clear(ir_sparse_set *set)
504 {
505 	set->len = 0;
506 }
507 
ir_sparse_set_free(ir_sparse_set * set)508 IR_ALWAYS_INLINE void ir_sparse_set_free(ir_sparse_set *set)
509 {
510 	ir_mem_free(set->data - set->size);
511 }
512 
ir_sparse_set_empty(const ir_sparse_set * set)513 IR_ALWAYS_INLINE bool ir_sparse_set_empty(const ir_sparse_set *set)
514 {
515 	return set->len == 0;
516 }
517 
ir_sparse_set_in(const ir_sparse_set * set,uint32_t n)518 IR_ALWAYS_INLINE bool ir_sparse_set_in(const ir_sparse_set *set, uint32_t n)
519 {
520 	uint32_t idx = IR_SPARSE_SET_SPARSE(set, n);
521 
522 	return idx < set->len && IR_SPARSE_SET_DENSE(set, idx) == n;
523 }
524 
ir_sparse_set_add(ir_sparse_set * set,uint32_t n)525 IR_ALWAYS_INLINE void ir_sparse_set_add(ir_sparse_set *set, uint32_t n)
526 {
527 	uint32_t idx;
528 
529 	IR_ASSERT(!ir_sparse_set_in(set, n));
530 	idx = set->len++;
531 	IR_SPARSE_SET_DENSE(set,	idx) = n;
532 	IR_SPARSE_SET_SPARSE(set, n) = idx;
533 }
534 
ir_sparse_set_del(ir_sparse_set * set,uint32_t n)535 IR_ALWAYS_INLINE void ir_sparse_set_del(ir_sparse_set *set, uint32_t n)
536 {
537 	uint32_t last;
538 
539 	IR_ASSERT(ir_sparse_set_in(set, n));
540 	last = IR_SPARSE_SET_DENSE(set,	set->len - 1);
541 	if (last != n) {
542 		uint32_t idx = IR_SPARSE_SET_SPARSE(set, n);
543 
544 		IR_SPARSE_SET_DENSE(set, idx) = last;
545 		IR_SPARSE_SET_SPARSE(set, last) = idx;
546 
547 	}
548 	set->len--;
549 }
550 
ir_sparse_set_pop(ir_sparse_set * set)551 IR_ALWAYS_INLINE uint32_t ir_sparse_set_pop(ir_sparse_set *set)
552 {
553 	if (set->len > 0) {
554 		set->len--;
555 		return IR_SPARSE_SET_DENSE(set, set->len);
556 	}
557 	return -1; /* empty set */
558 }
559 
560 #define IR_SPARSE_SET_FOREACH(set, bit) do { \
561 	ir_sparse_set *_set = (set); \
562 	uint32_t _i, _len = _set->len; \
563 	uint32_t *_p = _set->data; \
564 	for (_i = 0; _i < _len; _p++, _i++) { \
565 		(bit) = *_p; \
566 
567 #define IR_SPARSE_SET_FOREACH_END() \
568 	} \
569 } while (0)
570 
571 /* Bit Queue */
572 typedef struct _ir_bitqueue {
573 	uint32_t  len;
574 	uint32_t  pos;
575 	ir_bitset set;
576 } ir_bitqueue;
577 
ir_bitqueue_init(ir_bitqueue * q,uint32_t n)578 IR_ALWAYS_INLINE void ir_bitqueue_init(ir_bitqueue *q, uint32_t n)
579 {
580 	q->len = ir_bitset_len(n);
581 	q->pos = q->len - 1;
582 	q->set = ir_bitset_malloc(n);
583 }
584 
ir_bitqueue_grow(ir_bitqueue * q,uint32_t n)585 IR_ALWAYS_INLINE void ir_bitqueue_grow(ir_bitqueue *q, uint32_t n)
586 {
587 	uint32_t len = ir_bitset_len(n);
588 	IR_ASSERT(len >= q->len);
589 	if (len > q->len) {
590 		q->set = ir_mem_realloc(q->set, len * (IR_BITSET_BITS / 8));
591 		memset(q->set + q->len, 0, (len - q->len) * (IR_BITSET_BITS / 8));
592 		q->len = len;
593 	}
594 }
595 
ir_bitqueue_free(ir_bitqueue * q)596 IR_ALWAYS_INLINE void ir_bitqueue_free(ir_bitqueue *q)
597 {
598 	ir_mem_free(q->set);
599 }
600 
ir_bitqueue_clear(ir_bitqueue * q)601 IR_ALWAYS_INLINE void ir_bitqueue_clear(ir_bitqueue *q)
602 {
603 	q->pos = q->len - 1;
604 	ir_bitset_clear(q->set, q->len);
605 }
606 
ir_bitqueue_pop(ir_bitqueue * q)607 IR_ALWAYS_INLINE int ir_bitqueue_pop(ir_bitqueue *q)
608 {
609 	uint32_t i = q->pos;
610 	ir_bitset_base_t x, *p = q->set + i;
611 	do {
612 		x = *p;
613 		if (x) {
614 			int bit = IR_BITSET_BITS * i + ir_bitset_ntz(x);
615 			*p = x & (x - 1);
616 			q->pos = i;
617 			return bit;
618 		}
619 		p++;
620 		i++;
621 	} while (i < q->len);
622 	q->pos = q->len - 1;
623 	return -1; /* empty set */
624 }
625 
ir_bitqueue_add(ir_bitqueue * q,uint32_t n)626 IR_ALWAYS_INLINE void ir_bitqueue_add(ir_bitqueue *q, uint32_t n)
627 {
628 	uint32_t i = n / IR_BITSET_BITS;
629 	q->set[i] |= IR_BITSET_ONE << (n % IR_BITSET_BITS);
630 	if (i < q->pos) {
631 		q->pos = i;
632 	}
633 }
634 
ir_bitqueue_del(ir_bitqueue * q,uint32_t n)635 IR_ALWAYS_INLINE void ir_bitqueue_del(ir_bitqueue *q, uint32_t n)
636 {
637 	ir_bitset_excl(q->set, n);
638 }
639 
ir_bitqueue_in(const ir_bitqueue * q,uint32_t n)640 IR_ALWAYS_INLINE bool ir_bitqueue_in(const ir_bitqueue *q, uint32_t n)
641 {
642 	return ir_bitset_in(q->set, n);
643 }
644 
645 /* Dynamic array of numeric references */
646 typedef struct _ir_array {
647 	ir_ref   *refs;
648 	uint32_t  size;
649 } ir_array;
650 
651 void ir_array_grow(ir_array *a, uint32_t size);
652 void ir_array_insert(ir_array *a, uint32_t i, ir_ref val);
653 void ir_array_remove(ir_array *a, uint32_t i);
654 
ir_array_init(ir_array * a,uint32_t size)655 IR_ALWAYS_INLINE void ir_array_init(ir_array *a, uint32_t size)
656 {
657 	a->refs = ir_mem_malloc(size * sizeof(ir_ref));
658 	a->size = size;
659 }
660 
ir_array_free(ir_array * a)661 IR_ALWAYS_INLINE void ir_array_free(ir_array *a)
662 {
663 	ir_mem_free(a->refs);
664 	a->refs = NULL;
665 	a->size = 0;
666 }
667 
ir_array_size(const ir_array * a)668 IR_ALWAYS_INLINE uint32_t ir_array_size(const ir_array *a)
669 {
670 	return a->size;
671 }
672 
ir_array_get(const ir_array * a,uint32_t i)673 IR_ALWAYS_INLINE ir_ref ir_array_get(const ir_array *a, uint32_t i)
674 {
675 	return (i < a->size) ? a->refs[i] : IR_UNUSED;
676 }
677 
ir_array_at(const ir_array * a,uint32_t i)678 IR_ALWAYS_INLINE ir_ref ir_array_at(const ir_array *a, uint32_t i)
679 {
680 	IR_ASSERT(i < a->size);
681 	return a->refs[i];
682 }
683 
ir_array_set(ir_array * a,uint32_t i,ir_ref val)684 IR_ALWAYS_INLINE void ir_array_set(ir_array *a, uint32_t i, ir_ref val)
685 {
686 	if (i >= a->size) {
687 		ir_array_grow(a, i + 1);
688 	}
689 	a->refs[i] = val;
690 }
691 
ir_array_set_unchecked(ir_array * a,uint32_t i,ir_ref val)692 IR_ALWAYS_INLINE void ir_array_set_unchecked(ir_array *a, uint32_t i, ir_ref val)
693 {
694 	IR_ASSERT(i < a->size);
695 	a->refs[i] = val;
696 }
697 
698 /* List/Stack of numeric references */
699 typedef struct _ir_list {
700 	ir_array a;
701 	uint32_t len;
702 } ir_list;
703 
704 bool ir_list_contains(const ir_list *l, ir_ref val);
705 void ir_list_insert(ir_list *l, uint32_t i, ir_ref val);
706 void ir_list_remove(ir_list *l, uint32_t i);
707 
ir_list_init(ir_list * l,uint32_t size)708 IR_ALWAYS_INLINE void ir_list_init(ir_list *l, uint32_t size)
709 {
710 	ir_array_init(&l->a, size);
711 	l->len = 0;
712 }
713 
ir_list_free(ir_list * l)714 IR_ALWAYS_INLINE void ir_list_free(ir_list *l)
715 {
716 	ir_array_free(&l->a);
717 	l->len = 0;
718 }
719 
ir_list_clear(ir_list * l)720 IR_ALWAYS_INLINE void ir_list_clear(ir_list *l)
721 {
722 	l->len = 0;
723 }
724 
ir_list_len(const ir_list * l)725 IR_ALWAYS_INLINE uint32_t ir_list_len(const ir_list *l)
726 {
727 	return l->len;
728 }
729 
ir_list_capasity(const ir_list * l)730 IR_ALWAYS_INLINE uint32_t ir_list_capasity(const ir_list *l)
731 {
732 	return ir_array_size(&l->a);
733 }
734 
ir_list_push(ir_list * l,ir_ref val)735 IR_ALWAYS_INLINE void ir_list_push(ir_list *l, ir_ref val)
736 {
737 	ir_array_set(&l->a, l->len++, val);
738 }
739 
ir_list_push_unchecked(ir_list * l,ir_ref val)740 IR_ALWAYS_INLINE void ir_list_push_unchecked(ir_list *l, ir_ref val)
741 {
742 	ir_array_set_unchecked(&l->a, l->len++, val);
743 }
744 
ir_list_pop(ir_list * l)745 IR_ALWAYS_INLINE ir_ref ir_list_pop(ir_list *l)
746 {
747 	IR_ASSERT(l->len > 0);
748 	return ir_array_at(&l->a, --l->len);
749 }
750 
ir_list_peek(const ir_list * l)751 IR_ALWAYS_INLINE ir_ref ir_list_peek(const ir_list *l)
752 {
753 	IR_ASSERT(l->len > 0);
754 	return ir_array_at(&l->a, l->len - 1);
755 }
756 
ir_list_at(const ir_list * l,uint32_t i)757 IR_ALWAYS_INLINE ir_ref ir_list_at(const ir_list *l, uint32_t i)
758 {
759 	IR_ASSERT(i < l->len);
760 	return ir_array_at(&l->a, i);
761 }
762 
ir_list_set(ir_list * l,uint32_t i,ir_ref val)763 IR_ALWAYS_INLINE void ir_list_set(ir_list *l, uint32_t i, ir_ref val)
764 {
765 	IR_ASSERT(i < l->len);
766 	ir_array_set_unchecked(&l->a, i, val);
767 }
768 
769 /* Worklist (unique list) */
770 typedef struct _ir_worklist {
771 	ir_list l;
772 	ir_bitset visited;
773 } ir_worklist;
774 
ir_worklist_init(ir_worklist * w,uint32_t size)775 IR_ALWAYS_INLINE void ir_worklist_init(ir_worklist *w, uint32_t size)
776 {
777 	ir_list_init(&w->l, size);
778 	w->visited = ir_bitset_malloc(size);
779 }
780 
ir_worklist_free(ir_worklist * w)781 IR_ALWAYS_INLINE void ir_worklist_free(ir_worklist *w)
782 {
783 	ir_list_free(&w->l);
784 	ir_mem_free(w->visited);
785 }
786 
ir_worklist_len(const ir_worklist * w)787 IR_ALWAYS_INLINE uint32_t ir_worklist_len(const ir_worklist *w)
788 {
789 	return ir_list_len(&w->l);
790 }
791 
ir_worklist_capasity(const ir_worklist * w)792 IR_ALWAYS_INLINE uint32_t ir_worklist_capasity(const ir_worklist *w)
793 {
794 	return ir_list_capasity(&w->l);
795 }
796 
ir_worklist_clear(ir_worklist * w)797 IR_ALWAYS_INLINE void ir_worklist_clear(ir_worklist *w)
798 {
799 	ir_list_clear(&w->l);
800 	ir_bitset_clear(w->visited, ir_bitset_len(ir_worklist_capasity(w)));
801 }
802 
ir_worklist_push(ir_worklist * w,ir_ref val)803 IR_ALWAYS_INLINE bool ir_worklist_push(ir_worklist *w, ir_ref val)
804 {
805 	IR_ASSERT(val >= 0 && (uint32_t)val < ir_worklist_capasity(w));
806 	if (ir_bitset_in(w->visited, val)) {
807 		return 0;
808 	}
809 	ir_bitset_incl(w->visited, val);
810 	IR_ASSERT(ir_list_len(&w->l) < ir_list_capasity(&w->l));
811 	ir_list_push_unchecked(&w->l, val);
812 	return 1;
813 }
814 
ir_worklist_pop(ir_worklist * w)815 IR_ALWAYS_INLINE ir_ref ir_worklist_pop(ir_worklist *w)
816 {
817 	return ir_list_pop(&w->l);
818 }
819 
ir_worklist_peek(const ir_worklist * w)820 IR_ALWAYS_INLINE ir_ref ir_worklist_peek(const ir_worklist *w)
821 {
822 	return ir_list_peek(&w->l);
823 }
824 
825 /* IR Hash Table */
826 #define IR_INVALID_IDX 0xffffffff
827 #define IR_INVALID_VAL 0x80000000
828 
829 typedef struct _ir_hashtab_bucket {
830 	uint32_t    key;
831 	ir_ref      val;
832 	uint32_t    next;
833 } ir_hashtab_bucket;
834 
835 typedef struct _ir_hashtab {
836 	void       *data;
837 	uint32_t    mask;
838 	uint32_t    size;
839 	uint32_t    count;
840 	uint32_t    pos;
841 } ir_hashtab;
842 
843 void ir_hashtab_init(ir_hashtab *tab, uint32_t size);
844 void ir_hashtab_free(ir_hashtab *tab);
845 ir_ref ir_hashtab_find(const ir_hashtab *tab, uint32_t key);
846 bool ir_hashtab_add(ir_hashtab *tab, uint32_t key, ir_ref val);
847 void ir_hashtab_key_sort(ir_hashtab *tab);
848 
849 /* IR Addr Table */
850 typedef struct _ir_addrtab_bucket {
851 	uint64_t    key;
852 	ir_ref      val;
853 	uint32_t    next;
854 } ir_addrtab_bucket;
855 
856 void ir_addrtab_init(ir_hashtab *tab, uint32_t size);
857 void ir_addrtab_free(ir_hashtab *tab);
858 ir_ref ir_addrtab_find(const ir_hashtab *tab, uint64_t key);
859 void ir_addrtab_set(ir_hashtab *tab, uint64_t key, ir_ref val);
860 
861 /*** IR OP info ***/
862 extern const uint8_t ir_type_flags[IR_LAST_TYPE];
863 extern const char *ir_type_name[IR_LAST_TYPE];
864 extern const char *ir_type_cname[IR_LAST_TYPE];
865 extern const uint8_t ir_type_size[IR_LAST_TYPE];
866 extern const uint32_t ir_op_flags[IR_LAST_OP];
867 extern const char *ir_op_name[IR_LAST_OP];
868 
869 void ir_print_escaped_str(const char *s, size_t len, FILE *f);
870 
871 #define IR_IS_CONST_OP(op)       ((op) > IR_NOP && (op) <= IR_C_FLOAT)
872 #define IR_IS_FOLDABLE_OP(op)    ((op) <= IR_LAST_FOLDABLE_OP)
873 #define IR_IS_SYM_CONST(op)      ((op) == IR_STR || (op) == IR_SYM || (op) == IR_FUNC)
874 
875 ir_ref ir_const_ex(ir_ctx *ctx, ir_val val, uint8_t type, uint32_t optx);
876 
ir_const_is_true(const ir_insn * v)877 IR_ALWAYS_INLINE bool ir_const_is_true(const ir_insn *v)
878 {
879 	if (IR_IS_SYM_CONST(v->op)) {
880 		return 1;
881 	} else if (v->type == IR_BOOL) {
882 		return v->val.b;
883 	} else if (IR_IS_TYPE_INT(v->type)) {
884 		return v->val.i64 != 0;
885 	} else if (v->type == IR_DOUBLE) {
886 		return v->val.d != 0.0;
887 	} else {
888 		IR_ASSERT(v->type == IR_FLOAT);
889 		return v->val.f != 0.0;
890 	}
891 	return 0;
892 }
893 
ir_ref_is_true(ir_ctx * ctx,ir_ref ref)894 IR_ALWAYS_INLINE bool ir_ref_is_true(ir_ctx *ctx, ir_ref ref)
895 {
896 	if (ref == IR_TRUE) {
897 		return 1;
898 	} else if (ref == IR_FALSE) {
899 		return 0;
900 	} else {
901 		IR_ASSERT(IR_IS_CONST_REF(ref));
902 		return ir_const_is_true(&ctx->ir_base[ref]);
903 	}
904 }
905 
906 /* IR OP flags */
907 #define IR_OP_FLAG_OPERANDS_SHIFT 3
908 
909 #define IR_OP_FLAG_EDGES_MASK     0x03
910 #define IR_OP_FLAG_VAR_INPUTS     0x04
911 #define IR_OP_FLAG_OPERANDS_MASK  0x18
912 #define IR_OP_FLAG_MEM_MASK       ((1<<6)|(1<<7))
913 
914 #define IR_OP_FLAG_DATA           (1<<8)
915 #define IR_OP_FLAG_CONTROL        (1<<9)
916 #define IR_OP_FLAG_MEM            (1<<10)
917 #define IR_OP_FLAG_COMMUTATIVE    (1<<11)
918 #define IR_OP_FLAG_BB_START       (1<<12)
919 #define IR_OP_FLAG_BB_END         (1<<13)
920 #define IR_OP_FLAG_TERMINATOR     (1<<14)
921 #define IR_OP_FLAG_PINNED         (1<<15)
922 
923 #define IR_OP_FLAG_MEM_LOAD       ((0<<6)|(0<<7))
924 #define IR_OP_FLAG_MEM_STORE      ((0<<6)|(1<<7))
925 #define IR_OP_FLAG_MEM_CALL       ((1<<6)|(0<<7))
926 #define IR_OP_FLAG_MEM_ALLOC      ((1<<6)|(1<<7))
927 #define IR_OP_FLAG_MEM_MASK       ((1<<6)|(1<<7))
928 
929 #define IR_OPND_UNUSED            0x0
930 #define IR_OPND_DATA              0x1
931 #define IR_OPND_CONTROL           0x2
932 #define IR_OPND_CONTROL_DEP       0x3
933 #define IR_OPND_CONTROL_REF       0x4
934 #define IR_OPND_STR               0x5
935 #define IR_OPND_NUM               0x6
936 #define IR_OPND_PROB              0x7
937 #define IR_OPND_PROTO             0x8
938 
939 #define IR_OP_FLAGS(op_flags, op1_flags, op2_flags, op3_flags) \
940 	((op_flags) | ((op1_flags) << 20) | ((op2_flags) << 24) | ((op3_flags) << 28))
941 
942 #define IR_INPUT_EDGES_COUNT(flags) (flags & IR_OP_FLAG_EDGES_MASK)
943 #define IR_OPERANDS_COUNT(flags)    ((flags & IR_OP_FLAG_OPERANDS_MASK) >> IR_OP_FLAG_OPERANDS_SHIFT)
944 
945 #define IR_OP_HAS_VAR_INPUTS(flags) ((flags) & IR_OP_FLAG_VAR_INPUTS)
946 
947 #define IR_OPND_KIND(flags, i) \
948 	(((flags) >> (16 + (4 * (((i) > 3) ? 3 : (i))))) & 0xf)
949 
950 #define IR_IS_REF_OPND_KIND(kind) \
951 	((kind) >= IR_OPND_DATA && (kind) <= IR_OPND_CONTROL_REF)
952 
ir_operands_count(const ir_ctx * ctx,const ir_insn * insn)953 IR_ALWAYS_INLINE ir_ref ir_operands_count(const ir_ctx *ctx, const ir_insn *insn)
954 {
955 	uint32_t flags = ir_op_flags[insn->op];
956 	uint32_t n = IR_OPERANDS_COUNT(flags);
957 
958 	if (UNEXPECTED(IR_OP_HAS_VAR_INPUTS(flags))) {
959 		/* MERGE, PHI, CALL, etc */
960 		n = insn->inputs_count;
961 	}
962 	return n;
963 }
964 
ir_input_edges_count(const ir_ctx * ctx,const ir_insn * insn)965 IR_ALWAYS_INLINE ir_ref ir_input_edges_count(const ir_ctx *ctx, const ir_insn *insn)
966 {
967 	uint32_t flags = ir_op_flags[insn->op];
968 	uint32_t n = IR_INPUT_EDGES_COUNT(flags);
969 	if (UNEXPECTED(IR_OP_HAS_VAR_INPUTS(flags))) {
970 		/* MERGE, PHI, CALL, etc */
971 		n = insn->inputs_count;
972 	}
973 	return n;
974 }
975 
ir_insn_inputs_to_len(uint32_t inputs_count)976 IR_ALWAYS_INLINE uint32_t ir_insn_inputs_to_len(uint32_t inputs_count)
977 {
978 	return 1 + (inputs_count >> 2);
979 }
980 
ir_insn_len(const ir_insn * insn)981 IR_ALWAYS_INLINE uint32_t ir_insn_len(const ir_insn *insn)
982 {
983 	return ir_insn_inputs_to_len(insn->inputs_count);
984 }
985 
986 /*** IR Context Private Flags (ir_ctx->flags2) ***/
987 #define IR_CFG_HAS_LOOPS       (1<<0)
988 #define IR_IRREDUCIBLE_CFG     (1<<1)
989 #define IR_HAS_ALLOCA          (1<<2)
990 #define IR_HAS_CALLS           (1<<3)
991 #define IR_OPT_IN_SCCP         (1<<4)
992 #define IR_LINEAR              (1<<5)
993 #define IR_HAS_VA_START        (1<<6)
994 #define IR_HAS_VA_COPY         (1<<7)
995 #define IR_HAS_VA_ARG_GP       (1<<8)
996 #define IR_HAS_VA_ARG_FP       (1<<9)
997 #define IR_HAS_FP_RET_SLOT     (1<<10)
998 #define IR_16B_FRAME_ALIGNMENT (1<<11)
999 
1000 /* Temporary: SCCP -> CFG */
1001 #define IR_SCCP_DONE           (1<<25)
1002 #define IR_CFG_REACHABLE       (1<<26)
1003 
1004 /* Temporary: Dominators -> Loops */
1005 #define IR_NO_LOOPS            (1<<25)
1006 
1007 /* Temporary: Live Ranges */
1008 #define IR_LR_HAVE_DESSA_MOVES (1<<25)
1009 
1010 /* Temporary: Register Allocator */
1011 #define IR_RA_HAVE_SPLITS      (1<<25)
1012 #define IR_RA_HAVE_SPILLS      (1<<26)
1013 
1014 #define IR_RESERVED_FLAG_1     (1U<<31)
1015 
1016 /*** IR Binding ***/
ir_binding_find(const ir_ctx * ctx,ir_ref ref)1017 IR_ALWAYS_INLINE ir_ref ir_binding_find(const ir_ctx *ctx, ir_ref ref)
1018 {
1019 	ir_ref var = ir_hashtab_find(ctx->binding, ref);
1020 	return (var != (ir_ref)IR_INVALID_VAL) ? var : 0;
1021 }
1022 
1023 /*** IR Use Lists ***/
1024 struct _ir_use_list {
1025 	ir_ref        refs; /* index in ir_ctx->use_edges[] array */
1026 	ir_ref        count;
1027 };
1028 
1029 void ir_use_list_remove_all(ir_ctx *ctx, ir_ref from, ir_ref use);
1030 void ir_use_list_remove_one(ir_ctx *ctx, ir_ref from, ir_ref use);
1031 void ir_use_list_replace_all(ir_ctx *ctx, ir_ref ref, ir_ref use, ir_ref new_use);
1032 void ir_use_list_replace_one(ir_ctx *ctx, ir_ref ref, ir_ref use, ir_ref new_use);
1033 bool ir_use_list_add(ir_ctx *ctx, ir_ref to, ir_ref new_use);
1034 
1035 /*** Modification helpers ***/
1036 #define MAKE_NOP(_insn) do { \
1037 		ir_insn *__insn = _insn; \
1038 		__insn->optx = IR_NOP; \
1039 		__insn->op1 = __insn->op2 = __insn->op3 = IR_UNUSED; \
1040 	} while (0)
1041 
1042 #define CLEAR_USES(_ref) do { \
1043 		ir_use_list *__use_list = &ctx->use_lists[_ref]; \
1044 		__use_list->count = 0; \
1045 	} while (0)
1046 
1047 #define SWAP_REFS(_ref1, _ref2) do { \
1048 		ir_ref _tmp = _ref1; \
1049 		_ref1 = _ref2; \
1050 		_ref2 = _tmp; \
1051 	} while (0)
1052 
1053 #define SWAP_INSNS(_insn1, _insn2) do { \
1054 		ir_insn *_tmp = _insn1; \
1055 		_insn1 = _insn2; \
1056 		_insn2 = _tmp; \
1057 	} while (0)
1058 
1059 /*** IR Basic Blocks info ***/
1060 #define IR_IS_BB_START(op) \
1061 	((ir_op_flags[op] & IR_OP_FLAG_BB_START) != 0)
1062 
1063 #define IR_IS_BB_MERGE(op) \
1064 	((op) == IR_MERGE || (op) == IR_LOOP_BEGIN)
1065 
1066 #define IR_IS_BB_END(op) \
1067 	((ir_op_flags[op] & IR_OP_FLAG_BB_END) != 0)
1068 
1069 #define IR_BB_UNREACHABLE      (1<<0)
1070 #define IR_BB_START            (1<<1)
1071 #define IR_BB_ENTRY            (1<<2)
1072 #define IR_BB_LOOP_HEADER      (1<<3)
1073 #define IR_BB_IRREDUCIBLE_LOOP (1<<4)
1074 #define IR_BB_DESSA_MOVES      (1<<5) /* translation out of SSA requires MOVEs */
1075 #define IR_BB_EMPTY            (1<<6)
1076 #define IR_BB_PREV_EMPTY_ENTRY (1<<7)
1077 #define IR_BB_OSR_ENTRY_LOADS  (1<<8) /* OSR Entry-point with register LOADs   */
1078 #define IR_BB_LOOP_WITH_ENTRY  (1<<9) /* set together with LOOP_HEADER if there is an ENTRY in the loop */
1079 
1080 /* The following flags are set by GCM */
1081 #define IR_BB_HAS_PHI          (1<<10)
1082 #define IR_BB_HAS_PI           (1<<11)
1083 #define IR_BB_HAS_PARAM        (1<<12)
1084 #define IR_BB_HAS_VAR          (1<<13)
1085 
1086 /* The following flags are set by BB scheduler */
1087 #define IR_BB_ALIGN_LOOP       (1<<14)
1088 
1089 struct _ir_block {
1090 	uint32_t flags;
1091 	ir_ref   start;              /* index of first instruction                 */
1092 	ir_ref   end;                /* index of last instruction                  */
1093 	uint32_t successors;         /* index in ir_ctx->cfg_edges[] array         */
1094 	uint32_t successors_count;
1095 	uint32_t predecessors;       /* index in ir_ctx->cfg_edges[] array         */
1096 	uint32_t predecessors_count;
1097 	union {
1098 		uint32_t dom_parent;     /* immediate dominator block                  */
1099 		uint32_t idom;           /* immediate dominator block                  */
1100 	};
1101 	union {
1102 		uint32_t dom_depth;      /* depth from the root of the dominators tree */
1103 		uint32_t postnum;        /* used temporary during tree constructon     */
1104 	};
1105 	uint32_t     dom_child;      /* first dominated blocks                     */
1106 	uint32_t     dom_next_child; /* next dominated block (linked list)         */
1107 	uint32_t     loop_header;
1108 	uint32_t     loop_depth;
1109 };
1110 
1111 uint32_t ir_skip_empty_target_blocks(const ir_ctx *ctx, uint32_t b);
1112 uint32_t ir_next_block(const ir_ctx *ctx, uint32_t b);
1113 void ir_get_true_false_blocks(const ir_ctx *ctx, uint32_t b, uint32_t *true_block, uint32_t *false_block);
1114 
ir_phi_input_number(const ir_ctx * ctx,const ir_block * bb,uint32_t from)1115 IR_ALWAYS_INLINE uint32_t ir_phi_input_number(const ir_ctx *ctx, const ir_block *bb, uint32_t from)
1116 {
1117 	uint32_t n, *p;
1118 
1119 	for (n = 0, p = &ctx->cfg_edges[bb->predecessors]; n < bb->predecessors_count; p++, n++) {
1120 		if (*p == from) {
1121 			return n + 2; /* first input is a reference to MERGE */
1122 		}
1123 	}
1124 	IR_ASSERT(0);
1125 	return 0;
1126 }
1127 
1128 /*** Folding Engine (see ir.c and ir_fold.h) ***/
1129 typedef enum _ir_fold_action {
1130 	IR_FOLD_DO_RESTART,
1131 	IR_FOLD_DO_CSE,
1132 	IR_FOLD_DO_EMIT,
1133 	IR_FOLD_DO_COPY,
1134 	IR_FOLD_DO_CONST
1135 } ir_fold_action;
1136 
1137 ir_ref ir_folding(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3, ir_insn *op1_insn, ir_insn *op2_insn, ir_insn *op3_insn);
1138 
1139 /*** IR Live Info ***/
1140 typedef ir_ref                   ir_live_pos;
1141 typedef struct _ir_use_pos       ir_use_pos;
1142 
1143 #define IR_SUB_REFS_COUNT                4
1144 
1145 #define IR_LOAD_SUB_REF                  0
1146 #define IR_USE_SUB_REF                   1
1147 #define IR_DEF_SUB_REF                   2
1148 #define IR_SAVE_SUB_REF                  3
1149 
1150 #define IR_LIVE_POS_TO_REF(pos)          ((pos) / IR_SUB_REFS_COUNT)
1151 #define IR_LIVE_POS_TO_SUB_REF(pos)      ((pos) % IR_SUB_REFS_COUNT)
1152 
1153 #define IR_LIVE_POS_FROM_REF(ref)        ((ref) * IR_SUB_REFS_COUNT)
1154 
1155 #define IR_START_LIVE_POS_FROM_REF(ref)  ((ref) * IR_SUB_REFS_COUNT)
1156 #define IR_LOAD_LIVE_POS_FROM_REF(ref)   ((ref) * IR_SUB_REFS_COUNT + IR_LOAD_SUB_REF)
1157 #define IR_USE_LIVE_POS_FROM_REF(ref)    ((ref) * IR_SUB_REFS_COUNT + IR_USE_SUB_REF)
1158 #define IR_DEF_LIVE_POS_FROM_REF(ref)    ((ref) * IR_SUB_REFS_COUNT + IR_DEF_SUB_REF)
1159 #define IR_SAVE_LIVE_POS_FROM_REF(ref)   ((ref) * IR_SUB_REFS_COUNT + IR_SAVE_SUB_REF)
1160 #define IR_END_LIVE_POS_FROM_REF(ref)    ((ref) * IR_SUB_REFS_COUNT + IR_SUB_REFS_COUNT)
1161 
1162 /* ir_use_pos.flags bits */
1163 #define IR_USE_MUST_BE_IN_REG            (1<<0)
1164 #define IR_USE_SHOULD_BE_IN_REG          (1<<1)
1165 #define IR_DEF_REUSES_OP1_REG            (1<<2)
1166 #define IR_DEF_CONFLICTS_WITH_INPUT_REGS (1<<3)
1167 
1168 #define IR_FUSED_USE                     (1<<6)
1169 #define IR_PHI_USE                       (1<<7)
1170 
1171 #define IR_OP1_MUST_BE_IN_REG            (1<<8)
1172 #define IR_OP1_SHOULD_BE_IN_REG          (1<<9)
1173 #define IR_OP2_MUST_BE_IN_REG            (1<<10)
1174 #define IR_OP2_SHOULD_BE_IN_REG          (1<<11)
1175 #define IR_OP3_MUST_BE_IN_REG            (1<<12)
1176 #define IR_OP3_SHOULD_BE_IN_REG          (1<<13)
1177 
1178 #define IR_USE_FLAGS(def_flags, op_num)  (((def_flags) >> (6 + (IR_MIN((op_num), 3) * 2))) & 3)
1179 
1180 struct _ir_use_pos {
1181 	uint16_t       op_num; /* 0 - means result */
1182 	int8_t         hint;
1183 	uint8_t        flags;
1184 	ir_ref         hint_ref; /* negative references are used for FUSION anf PHI */
1185 	ir_live_pos    pos;
1186 	ir_use_pos    *next;
1187 };
1188 
1189 struct _ir_live_range {
1190 	ir_live_pos    start; /* inclusive */
1191 	ir_live_pos    end;   /* exclusive */
1192 	ir_live_range *next;
1193 };
1194 
1195 /* ir_live_interval.flags bits (two low bits are reserved for temporary register number) */
1196 #define IR_LIVE_INTERVAL_FIXED           (1<<0)
1197 #define IR_LIVE_INTERVAL_TEMP            (1<<1)
1198 #define IR_LIVE_INTERVAL_HAS_HINT_REGS   (1<<2)
1199 #define IR_LIVE_INTERVAL_HAS_HINT_REFS   (1<<3)
1200 #define IR_LIVE_INTERVAL_MEM_PARAM       (1<<4)
1201 #define IR_LIVE_INTERVAL_MEM_LOAD        (1<<5)
1202 #define IR_LIVE_INTERVAL_COALESCED       (1<<6)
1203 #define IR_LIVE_INTERVAL_SPILL_SPECIAL   (1<<7) /* spill slot is pre-allocated in a special area (see ir_ctx.spill_reserved_base) */
1204 #define IR_LIVE_INTERVAL_SPILLED         (1<<8)
1205 #define IR_LIVE_INTERVAL_SPLIT_CHILD     (1<<9)
1206 
1207 struct _ir_live_interval {
1208 	uint8_t           type;
1209 	int8_t            reg;
1210 	uint16_t          flags;
1211 	union {
1212 		int32_t       vreg;
1213 		int32_t       tmp_ref;
1214 	};
1215 	union {
1216 		int32_t       stack_spill_pos;
1217 		ir_ref        tmp_op_num;
1218 	};
1219 	ir_live_pos       end;       /* end of the last live range (cahce of ival.range.{next->}end) */
1220 	ir_live_range     range;
1221 	ir_live_range    *current_range;
1222 	ir_use_pos       *use_pos;
1223 	ir_live_interval *next;
1224 	ir_live_interval *list_next; /* linked list of active, inactive or unhandled intervals */
1225 };
1226 
1227 typedef int (*emit_copy_t)(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to);
1228 
1229 int ir_gen_dessa_moves(ir_ctx *ctx, uint32_t b, emit_copy_t emit_copy);
1230 
1231 #if defined(IR_REGSET_64BIT)
1232 
1233 /*typedef enum _ir_reg ir_reg;*/
1234 typedef int8_t ir_reg;
1235 
1236 /*** Register Sets ***/
1237 #if IR_REGSET_64BIT
1238 typedef uint64_t ir_regset;
1239 #else
1240 typedef uint32_t ir_regset;
1241 #endif
1242 
1243 #define IR_REGSET_EMPTY 0
1244 
1245 #define IR_REGSET_IS_EMPTY(regset) \
1246 	(regset == IR_REGSET_EMPTY)
1247 
1248 #define IR_REGSET_IS_SINGLETON(regset) \
1249 	(regset && !(regset & (regset - 1)))
1250 
1251 #if IR_REGSET_64BIT
1252 # define IR_REGSET(reg) \
1253 	(1ull << (reg))
1254 #else
1255 # define IR_REGSET(reg) \
1256 	(1u << (reg))
1257 #endif
1258 
1259 #if IR_REGSET_64BIT
1260 # define IR_REGSET_INTERVAL(reg1, reg2) \
1261 	(((1ull << ((reg2) - (reg1) + 1)) - 1) << (reg1))
1262 #else
1263 # define IR_REGSET_INTERVAL(reg1, reg2) \
1264 	(((1u << ((reg2) - (reg1) + 1)) - 1) << (reg1))
1265 #endif
1266 
1267 #define IR_REGSET_IN(regset, reg) \
1268 	(((regset) & IR_REGSET(reg)) != 0)
1269 
1270 #define IR_REGSET_INCL(regset, reg) \
1271 	(regset) |= IR_REGSET(reg)
1272 
1273 #define IR_REGSET_EXCL(regset, reg) \
1274 	(regset) &= ~IR_REGSET(reg)
1275 
1276 #define IR_REGSET_UNION(set1, set2) \
1277 	((set1) | (set2))
1278 
1279 #define IR_REGSET_INTERSECTION(set1, set2) \
1280 	((set1) & (set2))
1281 
1282 #define IR_REGSET_DIFFERENCE(set1, set2) \
1283 	((set1) & ~(set2))
1284 
1285 #if IR_REGSET_64BIT
1286 # define IR_REGSET_FIRST(set) ((ir_reg)ir_ntzl(set))
1287 # define ir_REGSET_LAST(set)  ((ir_reg)(ir_nlzl(set)(set)^63))
1288 #else
1289 # define IR_REGSET_FIRST(set) ((ir_reg)ir_ntz(set))
1290 # define IR_REGSET_LAST(set)  ((ir_reg)(ir_nlz(set)^31))
1291 #endif
1292 
ir_regset_pop_first(ir_regset * set)1293 IR_ALWAYS_INLINE ir_reg ir_regset_pop_first(ir_regset *set)
1294 {
1295 	ir_reg reg;
1296 
1297 	IR_ASSERT(!IR_REGSET_IS_EMPTY(*set));
1298 	reg = IR_REGSET_FIRST(*set);
1299 	*set = (*set) & ((*set) - 1);
1300 	return reg;
1301 }
1302 
1303 #define IR_REGSET_FOREACH(set, reg) \
1304 	do { \
1305 		ir_regset _tmp = (set); \
1306 		while (!IR_REGSET_IS_EMPTY(_tmp)) { \
1307 			reg = ir_regset_pop_first(&_tmp);
1308 
1309 #define IR_REGSET_FOREACH_END() \
1310 		} \
1311 	} while (0)
1312 
1313 #endif /* defined(IR_REGSET_64BIT) */
1314 
1315 /*** IR Register Allocation ***/
1316 /* Flags for ctx->regs[][] (low bits are used for register number itself) */
1317 typedef struct _ir_reg_alloc_data {
1318 	int32_t unused_slot_4;
1319 	int32_t unused_slot_2;
1320 	int32_t unused_slot_1;
1321 	ir_live_interval **handled;
1322 } ir_reg_alloc_data;
1323 
1324 int32_t ir_allocate_spill_slot(ir_ctx *ctx, ir_type type, ir_reg_alloc_data *data);
1325 
ir_set_alocated_reg(ir_ctx * ctx,ir_ref ref,int op_num,int8_t reg)1326 IR_ALWAYS_INLINE void ir_set_alocated_reg(ir_ctx *ctx, ir_ref ref, int op_num, int8_t reg)
1327 {
1328 	int8_t *regs = ctx->regs[ref];
1329 
1330 	if (op_num > 0) {
1331 		/* regs[] is not limited by the declared boundary 4, the real boundary checked below */
1332 		IR_ASSERT(op_num <= IR_MAX(3, ctx->ir_base[ref].inputs_count));
1333 	}
1334 	regs[op_num] = reg;
1335 }
1336 
ir_get_alocated_reg(const ir_ctx * ctx,ir_ref ref,int op_num)1337 IR_ALWAYS_INLINE int8_t ir_get_alocated_reg(const ir_ctx *ctx, ir_ref ref, int op_num)
1338 {
1339 	int8_t *regs = ctx->regs[ref];
1340 
1341 	/* regs[] is not limited by the declared boundary 4, the real boundary checked below */
1342 	IR_ASSERT(op_num <= IR_MAX(3, ctx->ir_base[ref].inputs_count));
1343 	return regs[op_num];
1344 }
1345 
1346 /*** IR Target Interface ***/
1347 
1348 /* ctx->rules[] flags */
1349 #define IR_FUSED     (1U<<31) /* Insn is fused into others (code is generated as part of the fusion root) */
1350 #define IR_SKIPPED   (1U<<30) /* Insn is skipped (code is not generated) */
1351 #define IR_SIMPLE    (1U<<29) /* Insn doesn't have any target constraints */
1352 #define IR_FUSED_REG (1U<<28) /* Register assignemnt may be stored in ctx->fused_regs instead of ctx->regs */
1353 #define IR_MAY_SWAP  (1U<<27) /* Allow swapping operands for better register allocation */
1354 #define IR_MAY_REUSE (1U<<26) /* Result may reuse register of the source */
1355 
1356 #define IR_RULE_MASK 0xff
1357 
1358 extern const char *ir_rule_name[];
1359 
1360 typedef struct _ir_target_constraints ir_target_constraints;
1361 
1362 #define IR_TMP_REG(_num, _type, _start, _end) \
1363 	(ir_tmp_reg){.num=(_num), .type=(_type), .start=(_start), .end=(_end)}
1364 #define IR_SCRATCH_REG(_reg, _start, _end) \
1365 	(ir_tmp_reg){.reg=(_reg), .type=IR_VOID, .start=(_start), .end=(_end)}
1366 
1367 int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *constraints);
1368 
1369 void ir_fix_stack_frame(ir_ctx *ctx);
1370 
1371 /* Utility */
1372 ir_type ir_get_return_type(ir_ctx *ctx);
1373 bool ir_is_fastcall(const ir_ctx *ctx, const ir_insn *insn);
1374 bool ir_is_vararg(const ir_ctx *ctx, ir_insn *insn);
1375 
1376 //#define IR_BITSET_LIVENESS
1377 
1378 #endif /* IR_PRIVATE_H */
1379