1 /*
2 * IR - Lightweight JIT Compilation Framework
3 * (Common data structures and non public definitions)
4 * Copyright (C) 2022 Zend by Perforce.
5 * Authors: Dmitry Stogov <dmitry@php.net>
6 */
7
8 #ifndef IR_PRIVATE_H
9 #define IR_PRIVATE_H
10 #include <string.h>
11 #include <stdlib.h>
12
13 #ifdef IR_DEBUG
14 # include <assert.h>
15 # define IR_ASSERT(x) assert(x)
16 #else
17 # define IR_ASSERT(x)
18 #endif
19
20 #ifdef _WIN32
21 # include <intrin.h>
22 # ifdef _M_X64
23 # pragma intrinsic(_BitScanForward64)
24 # pragma intrinsic(_BitScanReverse64)
25 # endif
26 # pragma intrinsic(_BitScanForward)
27 # pragma intrinsic(_BitScanReverse)
28 #endif
29
30 #ifdef __has_builtin
31 # if __has_builtin(__builtin_expect)
32 # define EXPECTED(condition) __builtin_expect(!!(condition), 1)
33 # define UNEXPECTED(condition) __builtin_expect(!!(condition), 0)
34 # endif
35 # if __has_attribute(__aligned__)
36 # define IR_SET_ALIGNED(alignment, decl) decl __attribute__ ((__aligned__ (alignment)))
37 # endif
38 # if __has_attribute(__fallthrough__)
39 # define IR_FALLTHROUGH __attribute__((__fallthrough__))
40 # endif
41 #elif defined(_WIN32)
42 # define IR_SET_ALIGNED(alignment, decl) __declspec(align(alignment)) decl
43 #else /* GCC prior to 10 or non-clang/msvc compilers */
44 #define __has_builtin(x) 0
45 #endif
46 #ifndef EXPECTED
47 # define EXPECTED(condition) (condition)
48 # define UNEXPECTED(condition) (condition)
49 #endif
50 #ifndef IR_SET_ALIGNED
51 # define IR_SET_ALIGNED(alignment, decl) decl
52 #endif
53 #ifndef IR_FALLTHROUGH
54 # define IR_FALLTHROUGH ((void)0)
55 #endif
56
57 /*** Helper routines ***/
58
59 #define IR_ALIGNED_SIZE(size, alignment) \
60 (((size) + ((alignment) - 1)) & ~((alignment) - 1))
61
62 #define IR_MAX(a, b) (((a) > (b)) ? (a) : (b))
63 #define IR_MIN(a, b) (((a) < (b)) ? (a) : (b))
64
65 #define IR_IS_POWER_OF_TWO(x) (!((x) & ((x) - 1)))
66
67 #define IR_LOG2(x) ir_ntzl(x)
68
ir_rol8(uint8_t op1,uint8_t op2)69 IR_ALWAYS_INLINE uint8_t ir_rol8(uint8_t op1, uint8_t op2)
70 {
71 return (op1 << op2) | (op1 >> (8 - op2));
72 }
73
ir_rol16(uint16_t op1,uint16_t op2)74 IR_ALWAYS_INLINE uint16_t ir_rol16(uint16_t op1, uint16_t op2)
75 {
76 return (op1 << op2) | (op1 >> (16 - op2));
77 }
78
ir_rol32(uint32_t op1,uint32_t op2)79 IR_ALWAYS_INLINE uint32_t ir_rol32(uint32_t op1, uint32_t op2)
80 {
81 return (op1 << op2) | (op1 >> (32 - op2));
82 }
83
ir_rol64(uint64_t op1,uint64_t op2)84 IR_ALWAYS_INLINE uint64_t ir_rol64(uint64_t op1, uint64_t op2)
85 {
86 return (op1 << op2) | (op1 >> (64 - op2));
87 }
88
ir_ror8(uint8_t op1,uint8_t op2)89 IR_ALWAYS_INLINE uint8_t ir_ror8(uint8_t op1, uint8_t op2)
90 {
91 return (op1 >> op2) | (op1 << (8 - op2));
92 }
93
ir_ror16(uint16_t op1,uint16_t op2)94 IR_ALWAYS_INLINE uint16_t ir_ror16(uint16_t op1, uint16_t op2)
95 {
96 return (op1 >> op2) | (op1 << (16 - op2));
97 }
98
ir_ror32(uint32_t op1,uint32_t op2)99 IR_ALWAYS_INLINE uint32_t ir_ror32(uint32_t op1, uint32_t op2)
100 {
101 return (op1 >> op2) | (op1 << (32 - op2));
102 }
103
ir_ror64(uint64_t op1,uint64_t op2)104 IR_ALWAYS_INLINE uint64_t ir_ror64(uint64_t op1, uint64_t op2)
105 {
106 return (op1 >> op2) | (op1 << (64 - op2));
107 }
108
109 /* Number of trailing zero bits (0x01 -> 0; 0x40 -> 6; 0x00 -> LEN) */
ir_ntz(uint32_t num)110 IR_ALWAYS_INLINE uint32_t ir_ntz(uint32_t num)
111 {
112 #if (defined(__GNUC__) || __has_builtin(__builtin_ctz))
113 return __builtin_ctz(num);
114 #elif defined(_WIN32)
115 uint32_t index;
116
117 if (!_BitScanForward(&index, num)) {
118 /* undefined behavior */
119 return 32;
120 }
121
122 return index;
123 #else
124 int n;
125
126 if (num == 0) return 32;
127
128 n = 1;
129 if ((num & 0x0000ffff) == 0) {n += 16; num = num >> 16;}
130 if ((num & 0x000000ff) == 0) {n += 8; num = num >> 8;}
131 if ((num & 0x0000000f) == 0) {n += 4; num = num >> 4;}
132 if ((num & 0x00000003) == 0) {n += 2; num = num >> 2;}
133 return n - (num & 1);
134 #endif
135 }
136
137 /* Number of trailing zero bits (0x01 -> 0; 0x40 -> 6; 0x00 -> LEN) */
ir_ntzl(uint64_t num)138 IR_ALWAYS_INLINE uint32_t ir_ntzl(uint64_t num)
139 {
140 #if (defined(__GNUC__) || __has_builtin(__builtin_ctzl))
141 return __builtin_ctzl(num);
142 #elif defined(_WIN64)
143 unsigned long index;
144
145 if (!_BitScanForward64(&index, num)) {
146 /* undefined behavior */
147 return 64;
148 }
149
150 return (uint32_t) index;
151 #else
152 uint32_t n;
153
154 if (num == 0) return 64;
155
156 n = 1;
157 if ((num & 0xffffffff) == 0) {n += 32; num = num >> 32;}
158 if ((num & 0x0000ffff) == 0) {n += 16; num = num >> 16;}
159 if ((num & 0x000000ff) == 0) {n += 8; num = num >> 8;}
160 if ((num & 0x0000000f) == 0) {n += 4; num = num >> 4;}
161 if ((num & 0x00000003) == 0) {n += 2; num = num >> 2;}
162 return n - (uint32_t)(num & 1);
163 #endif
164 }
165
166 /* Number of leading zero bits (Undefined for zero) */
ir_nlz(uint32_t num)167 IR_ALWAYS_INLINE int ir_nlz(uint32_t num)
168 {
169 #if (defined(__GNUC__) || __has_builtin(__builtin_clz))
170 return __builtin_clz(num);
171 #elif defined(_WIN32)
172 uint32_t index;
173
174 if (!_BitScanReverse(&index, num)) {
175 /* undefined behavior */
176 return 32;
177 }
178
179 return (int) (32 - 1) - index;
180 #else
181 uint32_t x;
182 uint32_t n;
183
184 n = 32;
185 x = num >> 16; if (x != 0) {n -= 16; num = x;}
186 x = num >> 8; if (x != 0) {n -= 8; num = x;}
187 x = num >> 4; if (x != 0) {n -= 4; num = x;}
188 x = num >> 2; if (x != 0) {n -= 2; num = x;}
189 x = num >> 1; if (x != 0) return n - 2;
190 return n - num;
191 #endif
192 }
193
ir_nlzl(uint64_t num)194 IR_ALWAYS_INLINE int ir_nlzl(uint64_t num)
195 {
196 #if (defined(__GNUC__) || __has_builtin(__builtin_clzll))
197 return __builtin_clzll(num);
198 #elif defined(_WIN64)
199 unsigned long index;
200
201 if (!_BitScanReverse64(&index, num)) {
202 /* undefined behavior */
203 return 64;
204 }
205
206 return (int) (64 - 1) - index;
207 #else
208 uint64_t x;
209 uint32_t n;
210
211 n = 64;
212 x = num >> 32; if (x != 0) {n -= 32; num = x;}
213 x = num >> 16; if (x != 0) {n -= 16; num = x;}
214 x = num >> 8; if (x != 0) {n -= 8; num = x;}
215 x = num >> 4; if (x != 0) {n -= 4; num = x;}
216 x = num >> 2; if (x != 0) {n -= 2; num = x;}
217 x = num >> 1; if (x != 0) return n - 2;
218 return n - (uint32_t)num;
219 #endif
220 }
221
222 /*** Helper data types ***/
223
224 /* Arena */
225 struct _ir_arena {
226 char *ptr;
227 char *end;
228 ir_arena *prev;
229 };
230
ir_arena_create(size_t size)231 IR_ALWAYS_INLINE ir_arena* ir_arena_create(size_t size)
232 {
233 ir_arena *arena;
234
235 IR_ASSERT(size >= IR_ALIGNED_SIZE(sizeof(ir_arena), 8));
236 arena = (ir_arena*)ir_mem_malloc(size);
237 arena->ptr = (char*) arena + IR_ALIGNED_SIZE(sizeof(ir_arena), 8);
238 arena->end = (char*) arena + size;
239 arena->prev = NULL;
240 return arena;
241 }
242
ir_arena_free(ir_arena * arena)243 IR_ALWAYS_INLINE void ir_arena_free(ir_arena *arena)
244 {
245 do {
246 ir_arena *prev = arena->prev;
247 ir_mem_free(arena);
248 arena = prev;
249 } while (arena);
250 }
251
ir_arena_alloc(ir_arena ** arena_ptr,size_t size)252 IR_ALWAYS_INLINE void* ir_arena_alloc(ir_arena **arena_ptr, size_t size)
253 {
254 ir_arena *arena = *arena_ptr;
255 char *ptr = arena->ptr;
256
257 size = IR_ALIGNED_SIZE(size, 8);
258
259 if (EXPECTED(size <= (size_t)(arena->end - ptr))) {
260 arena->ptr = ptr + size;
261 } else {
262 size_t arena_size =
263 UNEXPECTED((size + IR_ALIGNED_SIZE(sizeof(ir_arena), 8)) > (size_t)(arena->end - (char*) arena)) ?
264 (size + IR_ALIGNED_SIZE(sizeof(ir_arena), 8)) :
265 (size_t)(arena->end - (char*) arena);
266 ir_arena *new_arena = (ir_arena*)ir_mem_malloc(arena_size);
267
268 ptr = (char*) new_arena + IR_ALIGNED_SIZE(sizeof(ir_arena), 8);
269 new_arena->ptr = (char*) new_arena + IR_ALIGNED_SIZE(sizeof(ir_arena), 8) + size;
270 new_arena->end = (char*) new_arena + arena_size;
271 new_arena->prev = arena;
272 *arena_ptr = new_arena;
273 }
274
275 return (void*) ptr;
276 }
277
ir_arena_checkpoint(ir_arena * arena)278 IR_ALWAYS_INLINE void* ir_arena_checkpoint(ir_arena *arena)
279 {
280 return arena->ptr;
281 }
282
ir_release(ir_arena ** arena_ptr,void * checkpoint)283 IR_ALWAYS_INLINE void ir_release(ir_arena **arena_ptr, void *checkpoint)
284 {
285 ir_arena *arena = *arena_ptr;
286
287 while (UNEXPECTED((char*)checkpoint > arena->end) ||
288 UNEXPECTED((char*)checkpoint <= (char*)arena)) {
289 ir_arena *prev = arena->prev;
290 ir_mem_free(arena);
291 *arena_ptr = arena = prev;
292 }
293 IR_ASSERT((char*)checkpoint > (char*)arena && (char*)checkpoint <= arena->end);
294 arena->ptr = (char*)checkpoint;
295 }
296
297 /* Bitsets */
298 #if defined(IR_TARGET_X86)
299 # define IR_BITSET_BITS 32
300 # define IR_BITSET_ONE 1U
301 # define ir_bitset_base_t uint32_t
302 # define ir_bitset_ntz ir_ntz
303 #else
304 # define IR_BITSET_BITS 64
305 # ifdef _M_X64 /* MSVC*/
306 # define IR_BITSET_ONE 1ui64
307 # else
308 # define IR_BITSET_ONE 1UL
309 # endif
310 # define ir_bitset_base_t uint64_t
311 # define ir_bitset_ntz ir_ntzl
312 #endif
313
314 typedef ir_bitset_base_t *ir_bitset;
315
ir_bitset_len(uint32_t n)316 IR_ALWAYS_INLINE uint32_t ir_bitset_len(uint32_t n)
317 {
318 return (n + (IR_BITSET_BITS - 1)) / IR_BITSET_BITS;
319 }
320
ir_bitset_malloc(uint32_t n)321 IR_ALWAYS_INLINE ir_bitset ir_bitset_malloc(uint32_t n)
322 {
323 return ir_mem_calloc(ir_bitset_len(n), IR_BITSET_BITS / 8);
324 }
325
ir_bitset_incl(ir_bitset set,uint32_t n)326 IR_ALWAYS_INLINE void ir_bitset_incl(ir_bitset set, uint32_t n)
327 {
328 set[n / IR_BITSET_BITS] |= IR_BITSET_ONE << (n % IR_BITSET_BITS);
329 }
330
ir_bitset_excl(ir_bitset set,uint32_t n)331 IR_ALWAYS_INLINE void ir_bitset_excl(ir_bitset set, uint32_t n)
332 {
333 set[n / IR_BITSET_BITS] &= ~(IR_BITSET_ONE << (n % IR_BITSET_BITS));
334 }
335
ir_bitset_in(const ir_bitset set,uint32_t n)336 IR_ALWAYS_INLINE bool ir_bitset_in(const ir_bitset set, uint32_t n)
337 {
338 return (set[(n / IR_BITSET_BITS)] & (IR_BITSET_ONE << (n % IR_BITSET_BITS))) != 0;
339 }
340
ir_bitset_clear(ir_bitset set,uint32_t len)341 IR_ALWAYS_INLINE void ir_bitset_clear(ir_bitset set, uint32_t len)
342 {
343 memset(set, 0, len * (IR_BITSET_BITS / 8));
344 }
345
ir_bitset_fill(ir_bitset set,uint32_t len)346 IR_ALWAYS_INLINE void ir_bitset_fill(ir_bitset set, uint32_t len)
347 {
348 memset(set, 0xff, len * (IR_BITSET_BITS / 8));
349 }
350
ir_bitset_empty(const ir_bitset set,uint32_t len)351 IR_ALWAYS_INLINE bool ir_bitset_empty(const ir_bitset set, uint32_t len)
352 {
353 uint32_t i;
354 for (i = 0; i < len; i++) {
355 if (set[i]) {
356 return 0;
357 }
358 }
359 return 1;
360 }
361
ir_bitset_equal(const ir_bitset set1,const ir_bitset set2,uint32_t len)362 IR_ALWAYS_INLINE bool ir_bitset_equal(const ir_bitset set1, const ir_bitset set2, uint32_t len)
363 {
364 return memcmp(set1, set2, len * (IR_BITSET_BITS / 8)) == 0;
365 }
366
ir_bitset_copy(ir_bitset set1,const ir_bitset set2,uint32_t len)367 IR_ALWAYS_INLINE void ir_bitset_copy(ir_bitset set1, const ir_bitset set2, uint32_t len)
368 {
369 memcpy(set1, set2, len * (IR_BITSET_BITS / 8));
370 }
371
ir_bitset_intersection(ir_bitset set1,const ir_bitset set2,uint32_t len)372 IR_ALWAYS_INLINE void ir_bitset_intersection(ir_bitset set1, const ir_bitset set2, uint32_t len)
373 {
374 uint32_t i;
375
376 for (i = 0; i < len; i++) {
377 set1[i] &= set2[i];
378 }
379 }
380
ir_bitset_union(ir_bitset set1,const ir_bitset set2,uint32_t len)381 IR_ALWAYS_INLINE void ir_bitset_union(ir_bitset set1, const ir_bitset set2, uint32_t len)
382 {
383 uint32_t i;
384
385 for (i = 0; i < len; i++) {
386 set1[i] |= set2[i];
387 }
388 }
389
ir_bitset_difference(ir_bitset set1,const ir_bitset set2,uint32_t len)390 IR_ALWAYS_INLINE void ir_bitset_difference(ir_bitset set1, const ir_bitset set2, uint32_t len)
391 {
392 uint32_t i;
393
394 for (i = 0; i < len; i++) {
395 set1[i] = set1[i] & ~set2[i];
396 }
397 }
398
ir_bitset_is_subset(const ir_bitset set1,const ir_bitset set2,uint32_t len)399 IR_ALWAYS_INLINE bool ir_bitset_is_subset(const ir_bitset set1, const ir_bitset set2, uint32_t len)
400 {
401 uint32_t i;
402
403 for (i = 0; i < len; i++) {
404 if (set1[i] & ~set2[i]) {
405 return 0;
406 }
407 }
408 return 1;
409 }
410
ir_bitset_first(const ir_bitset set,uint32_t len)411 IR_ALWAYS_INLINE int ir_bitset_first(const ir_bitset set, uint32_t len)
412 {
413 uint32_t i;
414
415 for (i = 0; i < len; i++) {
416 if (set[i]) {
417 return IR_BITSET_BITS * i + ir_bitset_ntz(set[i]);
418 }
419 }
420 return -1; /* empty set */
421 }
422
ir_bitset_last(const ir_bitset set,uint32_t len)423 IR_ALWAYS_INLINE int ir_bitset_last(const ir_bitset set, uint32_t len)
424 {
425 uint32_t i = len;
426
427 while (i > 0) {
428 i--;
429 if (set[i]) {
430 uint32_t j = IR_BITSET_BITS * i - 1;
431 ir_bitset_base_t x = set[i];
432 do {
433 x = x >> 1;
434 j++;
435 } while (x != 0);
436 return j;
437 }
438 }
439 return -1; /* empty set */
440 }
441
ir_bitset_pop_first(ir_bitset set,uint32_t len)442 IR_ALWAYS_INLINE int ir_bitset_pop_first(ir_bitset set, uint32_t len)
443 {
444 uint32_t i;
445
446 for (i = 0; i < len; i++) {
447 ir_bitset_base_t x = set[i];
448
449 if (x) {
450 int bit = IR_BITSET_BITS * i + ir_bitset_ntz(x);
451 set[i] = x & (x - 1);
452 return bit;
453 }
454 }
455 return -1; /* empty set */
456 }
457
458 #define IR_BITSET_FOREACH(set, len, bit) do { \
459 ir_bitset _set = (set); \
460 uint32_t _i, _len = (len); \
461 for (_i = 0; _i < _len; _set++, _i++) { \
462 ir_bitset_base_t _x = *_set; \
463 while (_x) { \
464 (bit) = IR_BITSET_BITS * _i + ir_bitset_ntz(_x); \
465 _x &= _x - 1;
466
467 #define IR_BITSET_FOREACH_DIFFERENCE(set1, set2, len, bit) do { \
468 ir_bitset _set1 = (set1); \
469 ir_bitset _set2 = (set2); \
470 uint32_t _i, _len = (len); \
471 for (_i = 0; _i < _len; _i++) { \
472 ir_bitset_base_t _x = _set1[_i] & ~_set2[_i]; \
473 while (_x) { \
474 (bit) = IR_BITSET_BITS * _i + ir_bitset_ntz(_x); \
475 _x &= _x - 1;
476
477 #define IR_BITSET_FOREACH_END() \
478 } \
479 } \
480 } while (0)
481
482 /* Sparse Set */
483 typedef struct _ir_sparse_set {
484 uint32_t size;
485 uint32_t len;
486 uint32_t *data;
487 } ir_sparse_set;
488
489 #define IR_SPARSE_SET_DENSE(set, n) (set)->data[n]
490 #define IR_SPARSE_SET_SPARSE(set, n) (set)->data[-1 - ((int32_t)(n))]
491
ir_sparse_set_init(ir_sparse_set * set,uint32_t size)492 IR_ALWAYS_INLINE void ir_sparse_set_init(ir_sparse_set *set, uint32_t size)
493 {
494 set->size = size;
495 set->len = 0;
496 set->data = (uint32_t*)ir_mem_malloc(sizeof(uint32_t) * 2 * size) + size;
497 #if IR_DEBUG
498 /* initialize sparse part to avoid valgrind warnings */
499 memset(&IR_SPARSE_SET_SPARSE(set, size - 1), 0, size * sizeof(uint32_t));
500 #endif
501 }
502
ir_sparse_set_clear(ir_sparse_set * set)503 IR_ALWAYS_INLINE void ir_sparse_set_clear(ir_sparse_set *set)
504 {
505 set->len = 0;
506 }
507
ir_sparse_set_free(ir_sparse_set * set)508 IR_ALWAYS_INLINE void ir_sparse_set_free(ir_sparse_set *set)
509 {
510 ir_mem_free(set->data - set->size);
511 }
512
ir_sparse_set_empty(const ir_sparse_set * set)513 IR_ALWAYS_INLINE bool ir_sparse_set_empty(const ir_sparse_set *set)
514 {
515 return set->len == 0;
516 }
517
ir_sparse_set_in(const ir_sparse_set * set,uint32_t n)518 IR_ALWAYS_INLINE bool ir_sparse_set_in(const ir_sparse_set *set, uint32_t n)
519 {
520 uint32_t idx = IR_SPARSE_SET_SPARSE(set, n);
521
522 return idx < set->len && IR_SPARSE_SET_DENSE(set, idx) == n;
523 }
524
ir_sparse_set_add(ir_sparse_set * set,uint32_t n)525 IR_ALWAYS_INLINE void ir_sparse_set_add(ir_sparse_set *set, uint32_t n)
526 {
527 uint32_t idx;
528
529 IR_ASSERT(!ir_sparse_set_in(set, n));
530 idx = set->len++;
531 IR_SPARSE_SET_DENSE(set, idx) = n;
532 IR_SPARSE_SET_SPARSE(set, n) = idx;
533 }
534
ir_sparse_set_del(ir_sparse_set * set,uint32_t n)535 IR_ALWAYS_INLINE void ir_sparse_set_del(ir_sparse_set *set, uint32_t n)
536 {
537 uint32_t last;
538
539 IR_ASSERT(ir_sparse_set_in(set, n));
540 last = IR_SPARSE_SET_DENSE(set, set->len - 1);
541 if (last != n) {
542 uint32_t idx = IR_SPARSE_SET_SPARSE(set, n);
543
544 IR_SPARSE_SET_DENSE(set, idx) = last;
545 IR_SPARSE_SET_SPARSE(set, last) = idx;
546
547 }
548 set->len--;
549 }
550
ir_sparse_set_pop(ir_sparse_set * set)551 IR_ALWAYS_INLINE uint32_t ir_sparse_set_pop(ir_sparse_set *set)
552 {
553 if (set->len > 0) {
554 set->len--;
555 return IR_SPARSE_SET_DENSE(set, set->len);
556 }
557 return -1; /* empty set */
558 }
559
560 #define IR_SPARSE_SET_FOREACH(set, bit) do { \
561 ir_sparse_set *_set = (set); \
562 uint32_t _i, _len = _set->len; \
563 uint32_t *_p = _set->data; \
564 for (_i = 0; _i < _len; _p++, _i++) { \
565 (bit) = *_p; \
566
567 #define IR_SPARSE_SET_FOREACH_END() \
568 } \
569 } while (0)
570
571 /* Bit Queue */
572 typedef struct _ir_bitqueue {
573 uint32_t len;
574 uint32_t pos;
575 ir_bitset set;
576 } ir_bitqueue;
577
ir_bitqueue_init(ir_bitqueue * q,uint32_t n)578 IR_ALWAYS_INLINE void ir_bitqueue_init(ir_bitqueue *q, uint32_t n)
579 {
580 q->len = ir_bitset_len(n);
581 q->pos = q->len - 1;
582 q->set = ir_bitset_malloc(n);
583 }
584
ir_bitqueue_free(ir_bitqueue * q)585 IR_ALWAYS_INLINE void ir_bitqueue_free(ir_bitqueue *q)
586 {
587 ir_mem_free(q->set);
588 }
589
ir_bitqueue_clear(ir_bitqueue * q)590 IR_ALWAYS_INLINE void ir_bitqueue_clear(ir_bitqueue *q)
591 {
592 q->pos = q->len - 1;
593 ir_bitset_clear(q->set, q->len);
594 }
595
ir_bitqueue_pop(ir_bitqueue * q)596 IR_ALWAYS_INLINE int ir_bitqueue_pop(ir_bitqueue *q)
597 {
598 uint32_t i = q->pos;
599 ir_bitset_base_t x, *p = q->set + i;
600 do {
601 x = *p;
602 if (x) {
603 int bit = IR_BITSET_BITS * i + ir_bitset_ntz(x);
604 *p = x & (x - 1);
605 q->pos = i;
606 return bit;
607 }
608 p++;
609 i++;
610 } while (i < q->len);
611 q->pos = q->len - 1;
612 return -1; /* empty set */
613 }
614
ir_bitqueue_add(ir_bitqueue * q,uint32_t n)615 IR_ALWAYS_INLINE void ir_bitqueue_add(ir_bitqueue *q, uint32_t n)
616 {
617 uint32_t i = n / IR_BITSET_BITS;
618 q->set[i] |= IR_BITSET_ONE << (n % IR_BITSET_BITS);
619 if (i < q->pos) {
620 q->pos = i;
621 }
622 }
623
ir_bitqueue_del(ir_bitqueue * q,uint32_t n)624 IR_ALWAYS_INLINE void ir_bitqueue_del(ir_bitqueue *q, uint32_t n)
625 {
626 ir_bitset_excl(q->set, n);
627 }
628
ir_bitqueue_in(const ir_bitqueue * q,uint32_t n)629 IR_ALWAYS_INLINE bool ir_bitqueue_in(const ir_bitqueue *q, uint32_t n)
630 {
631 return ir_bitset_in(q->set, n);
632 }
633
634 /* Dynamic array of numeric references */
635 typedef struct _ir_array {
636 ir_ref *refs;
637 uint32_t size;
638 } ir_array;
639
640 void ir_array_grow(ir_array *a, uint32_t size);
641 void ir_array_insert(ir_array *a, uint32_t i, ir_ref val);
642 void ir_array_remove(ir_array *a, uint32_t i);
643
ir_array_init(ir_array * a,uint32_t size)644 IR_ALWAYS_INLINE void ir_array_init(ir_array *a, uint32_t size)
645 {
646 a->refs = ir_mem_malloc(size * sizeof(ir_ref));
647 a->size = size;
648 }
649
ir_array_free(ir_array * a)650 IR_ALWAYS_INLINE void ir_array_free(ir_array *a)
651 {
652 ir_mem_free(a->refs);
653 a->refs = NULL;
654 a->size = 0;
655 }
656
ir_array_size(const ir_array * a)657 IR_ALWAYS_INLINE uint32_t ir_array_size(const ir_array *a)
658 {
659 return a->size;
660 }
661
ir_array_get(const ir_array * a,uint32_t i)662 IR_ALWAYS_INLINE ir_ref ir_array_get(const ir_array *a, uint32_t i)
663 {
664 return (i < a->size) ? a->refs[i] : IR_UNUSED;
665 }
666
ir_array_at(const ir_array * a,uint32_t i)667 IR_ALWAYS_INLINE ir_ref ir_array_at(const ir_array *a, uint32_t i)
668 {
669 IR_ASSERT(i < a->size);
670 return a->refs[i];
671 }
672
ir_array_set(ir_array * a,uint32_t i,ir_ref val)673 IR_ALWAYS_INLINE void ir_array_set(ir_array *a, uint32_t i, ir_ref val)
674 {
675 if (i >= a->size) {
676 ir_array_grow(a, i + 1);
677 }
678 a->refs[i] = val;
679 }
680
ir_array_set_unchecked(ir_array * a,uint32_t i,ir_ref val)681 IR_ALWAYS_INLINE void ir_array_set_unchecked(ir_array *a, uint32_t i, ir_ref val)
682 {
683 IR_ASSERT(i < a->size);
684 a->refs[i] = val;
685 }
686
687 /* List/Stack of numeric references */
688 typedef struct _ir_list {
689 ir_array a;
690 uint32_t len;
691 } ir_list;
692
693 bool ir_list_contains(const ir_list *l, ir_ref val);
694 void ir_list_insert(ir_list *l, uint32_t i, ir_ref val);
695 void ir_list_remove(ir_list *l, uint32_t i);
696
ir_list_init(ir_list * l,uint32_t size)697 IR_ALWAYS_INLINE void ir_list_init(ir_list *l, uint32_t size)
698 {
699 ir_array_init(&l->a, size);
700 l->len = 0;
701 }
702
ir_list_free(ir_list * l)703 IR_ALWAYS_INLINE void ir_list_free(ir_list *l)
704 {
705 ir_array_free(&l->a);
706 l->len = 0;
707 }
708
ir_list_clear(ir_list * l)709 IR_ALWAYS_INLINE void ir_list_clear(ir_list *l)
710 {
711 l->len = 0;
712 }
713
ir_list_len(const ir_list * l)714 IR_ALWAYS_INLINE uint32_t ir_list_len(const ir_list *l)
715 {
716 return l->len;
717 }
718
ir_list_capasity(const ir_list * l)719 IR_ALWAYS_INLINE uint32_t ir_list_capasity(const ir_list *l)
720 {
721 return ir_array_size(&l->a);
722 }
723
ir_list_push(ir_list * l,ir_ref val)724 IR_ALWAYS_INLINE void ir_list_push(ir_list *l, ir_ref val)
725 {
726 ir_array_set(&l->a, l->len++, val);
727 }
728
ir_list_push_unchecked(ir_list * l,ir_ref val)729 IR_ALWAYS_INLINE void ir_list_push_unchecked(ir_list *l, ir_ref val)
730 {
731 ir_array_set_unchecked(&l->a, l->len++, val);
732 }
733
ir_list_pop(ir_list * l)734 IR_ALWAYS_INLINE ir_ref ir_list_pop(ir_list *l)
735 {
736 IR_ASSERT(l->len > 0);
737 return ir_array_at(&l->a, --l->len);
738 }
739
ir_list_peek(const ir_list * l)740 IR_ALWAYS_INLINE ir_ref ir_list_peek(const ir_list *l)
741 {
742 IR_ASSERT(l->len > 0);
743 return ir_array_at(&l->a, l->len - 1);
744 }
745
ir_list_at(const ir_list * l,uint32_t i)746 IR_ALWAYS_INLINE ir_ref ir_list_at(const ir_list *l, uint32_t i)
747 {
748 IR_ASSERT(i < l->len);
749 return ir_array_at(&l->a, i);
750 }
751
ir_list_set(ir_list * l,uint32_t i,ir_ref val)752 IR_ALWAYS_INLINE void ir_list_set(ir_list *l, uint32_t i, ir_ref val)
753 {
754 IR_ASSERT(i < l->len);
755 ir_array_set_unchecked(&l->a, i, val);
756 }
757
758 /* Worklist (unique list) */
759 typedef struct _ir_worklist {
760 ir_list l;
761 ir_bitset visited;
762 } ir_worklist;
763
ir_worklist_init(ir_worklist * w,uint32_t size)764 IR_ALWAYS_INLINE void ir_worklist_init(ir_worklist *w, uint32_t size)
765 {
766 ir_list_init(&w->l, size);
767 w->visited = ir_bitset_malloc(size);
768 }
769
ir_worklist_free(ir_worklist * w)770 IR_ALWAYS_INLINE void ir_worklist_free(ir_worklist *w)
771 {
772 ir_list_free(&w->l);
773 ir_mem_free(w->visited);
774 }
775
ir_worklist_len(const ir_worklist * w)776 IR_ALWAYS_INLINE uint32_t ir_worklist_len(const ir_worklist *w)
777 {
778 return ir_list_len(&w->l);
779 }
780
ir_worklist_capasity(const ir_worklist * w)781 IR_ALWAYS_INLINE uint32_t ir_worklist_capasity(const ir_worklist *w)
782 {
783 return ir_list_capasity(&w->l);
784 }
785
ir_worklist_clear(ir_worklist * w)786 IR_ALWAYS_INLINE void ir_worklist_clear(ir_worklist *w)
787 {
788 ir_list_clear(&w->l);
789 ir_bitset_clear(w->visited, ir_bitset_len(ir_worklist_capasity(w)));
790 }
791
ir_worklist_push(ir_worklist * w,ir_ref val)792 IR_ALWAYS_INLINE bool ir_worklist_push(ir_worklist *w, ir_ref val)
793 {
794 IR_ASSERT(val >= 0 && (uint32_t)val < ir_worklist_capasity(w));
795 if (ir_bitset_in(w->visited, val)) {
796 return 0;
797 }
798 ir_bitset_incl(w->visited, val);
799 IR_ASSERT(ir_list_len(&w->l) < ir_list_capasity(&w->l));
800 ir_list_push_unchecked(&w->l, val);
801 return 1;
802 }
803
ir_worklist_pop(ir_worklist * w)804 IR_ALWAYS_INLINE ir_ref ir_worklist_pop(ir_worklist *w)
805 {
806 return ir_list_pop(&w->l);
807 }
808
ir_worklist_peek(const ir_worklist * w)809 IR_ALWAYS_INLINE ir_ref ir_worklist_peek(const ir_worklist *w)
810 {
811 return ir_list_peek(&w->l);
812 }
813
814 /* IR Hash Table */
815 #define IR_INVALID_IDX 0xffffffff
816 #define IR_INVALID_VAL 0x80000000
817
818 typedef struct _ir_hashtab_bucket {
819 uint32_t key;
820 ir_ref val;
821 uint32_t next;
822 } ir_hashtab_bucket;
823
824 typedef struct _ir_hashtab {
825 void *data;
826 uint32_t mask;
827 uint32_t size;
828 uint32_t count;
829 uint32_t pos;
830 } ir_hashtab;
831
832 void ir_hashtab_init(ir_hashtab *tab, uint32_t size);
833 void ir_hashtab_free(ir_hashtab *tab);
834 ir_ref ir_hashtab_find(const ir_hashtab *tab, uint32_t key);
835 bool ir_hashtab_add(ir_hashtab *tab, uint32_t key, ir_ref val);
836 void ir_hashtab_key_sort(ir_hashtab *tab);
837
838 /* IR Addr Table */
839 typedef struct _ir_addrtab_bucket {
840 uint64_t key;
841 ir_ref val;
842 uint32_t next;
843 } ir_addrtab_bucket;
844
845 void ir_addrtab_init(ir_hashtab *tab, uint32_t size);
846 void ir_addrtab_free(ir_hashtab *tab);
847 ir_ref ir_addrtab_find(const ir_hashtab *tab, uint64_t key);
848 void ir_addrtab_set(ir_hashtab *tab, uint64_t key, ir_ref val);
849
850 /*** IR OP info ***/
851 extern const uint8_t ir_type_flags[IR_LAST_TYPE];
852 extern const char *ir_type_name[IR_LAST_TYPE];
853 extern const char *ir_type_cname[IR_LAST_TYPE];
854 extern const uint8_t ir_type_size[IR_LAST_TYPE];
855 extern const uint32_t ir_op_flags[IR_LAST_OP];
856 extern const char *ir_op_name[IR_LAST_OP];
857
858 void ir_print_escaped_str(const char *s, size_t len, FILE *f);
859
860 #define IR_IS_CONST_OP(op) ((op) > IR_NOP && (op) <= IR_C_FLOAT)
861 #define IR_IS_FOLDABLE_OP(op) ((op) <= IR_LAST_FOLDABLE_OP)
862 #define IR_IS_SYM_CONST(op) ((op) == IR_STR || (op) == IR_SYM || (op) == IR_FUNC)
863
864 ir_ref ir_const_ex(ir_ctx *ctx, ir_val val, uint8_t type, uint32_t optx);
865
ir_const_is_true(const ir_insn * v)866 IR_ALWAYS_INLINE bool ir_const_is_true(const ir_insn *v)
867 {
868 if (IR_IS_SYM_CONST(v->op)) {
869 return 1;
870 } else if (v->type == IR_BOOL) {
871 return v->val.b;
872 } else if (IR_IS_TYPE_INT(v->type)) {
873 return v->val.i64 != 0;
874 } else if (v->type == IR_DOUBLE) {
875 return v->val.d != 0.0;
876 } else {
877 IR_ASSERT(v->type == IR_FLOAT);
878 return v->val.f != 0.0;
879 }
880 return 0;
881 }
882
ir_ref_is_true(ir_ctx * ctx,ir_ref ref)883 IR_ALWAYS_INLINE bool ir_ref_is_true(ir_ctx *ctx, ir_ref ref)
884 {
885 if (ref == IR_TRUE) {
886 return 1;
887 } else if (ref == IR_FALSE) {
888 return 0;
889 } else {
890 IR_ASSERT(IR_IS_CONST_REF(ref));
891 return ir_const_is_true(&ctx->ir_base[ref]);
892 }
893 }
894
895 /* IR OP flags */
896 #define IR_OP_FLAG_OPERANDS_SHIFT 3
897
898 #define IR_OP_FLAG_EDGES_MASK 0x03
899 #define IR_OP_FLAG_VAR_INPUTS 0x04
900 #define IR_OP_FLAG_OPERANDS_MASK 0x18
901 #define IR_OP_FLAG_MEM_MASK ((1<<6)|(1<<7))
902
903 #define IR_OP_FLAG_DATA (1<<8)
904 #define IR_OP_FLAG_CONTROL (1<<9)
905 #define IR_OP_FLAG_MEM (1<<10)
906 #define IR_OP_FLAG_COMMUTATIVE (1<<11)
907 #define IR_OP_FLAG_BB_START (1<<12)
908 #define IR_OP_FLAG_BB_END (1<<13)
909 #define IR_OP_FLAG_TERMINATOR (1<<14)
910 #define IR_OP_FLAG_PINNED (1<<15)
911
912 #define IR_OP_FLAG_MEM_LOAD ((0<<6)|(0<<7))
913 #define IR_OP_FLAG_MEM_STORE ((0<<6)|(1<<7))
914 #define IR_OP_FLAG_MEM_CALL ((1<<6)|(0<<7))
915 #define IR_OP_FLAG_MEM_ALLOC ((1<<6)|(1<<7))
916 #define IR_OP_FLAG_MEM_MASK ((1<<6)|(1<<7))
917
918 #define IR_OPND_UNUSED 0x0
919 #define IR_OPND_DATA 0x1
920 #define IR_OPND_CONTROL 0x2
921 #define IR_OPND_CONTROL_DEP 0x3
922 #define IR_OPND_CONTROL_REF 0x4
923 #define IR_OPND_STR 0x5
924 #define IR_OPND_NUM 0x6
925 #define IR_OPND_PROB 0x7
926 #define IR_OPND_PROTO 0x8
927
928 #define IR_OP_FLAGS(op_flags, op1_flags, op2_flags, op3_flags) \
929 ((op_flags) | ((op1_flags) << 20) | ((op2_flags) << 24) | ((op3_flags) << 28))
930
931 #define IR_INPUT_EDGES_COUNT(flags) (flags & IR_OP_FLAG_EDGES_MASK)
932 #define IR_OPERANDS_COUNT(flags) ((flags & IR_OP_FLAG_OPERANDS_MASK) >> IR_OP_FLAG_OPERANDS_SHIFT)
933
934 #define IR_OP_HAS_VAR_INPUTS(flags) ((flags) & IR_OP_FLAG_VAR_INPUTS)
935
936 #define IR_OPND_KIND(flags, i) \
937 (((flags) >> (16 + (4 * (((i) > 3) ? 3 : (i))))) & 0xf)
938
939 #define IR_IS_REF_OPND_KIND(kind) \
940 ((kind) >= IR_OPND_DATA && (kind) <= IR_OPND_CONTROL_REF)
941
ir_operands_count(const ir_ctx * ctx,const ir_insn * insn)942 IR_ALWAYS_INLINE ir_ref ir_operands_count(const ir_ctx *ctx, const ir_insn *insn)
943 {
944 uint32_t flags = ir_op_flags[insn->op];
945 uint32_t n = IR_OPERANDS_COUNT(flags);
946
947 if (UNEXPECTED(IR_OP_HAS_VAR_INPUTS(flags))) {
948 /* MERGE, PHI, CALL, etc */
949 n = insn->inputs_count;
950 }
951 return n;
952 }
953
ir_input_edges_count(const ir_ctx * ctx,const ir_insn * insn)954 IR_ALWAYS_INLINE ir_ref ir_input_edges_count(const ir_ctx *ctx, const ir_insn *insn)
955 {
956 uint32_t flags = ir_op_flags[insn->op];
957 uint32_t n = IR_INPUT_EDGES_COUNT(flags);
958 if (UNEXPECTED(IR_OP_HAS_VAR_INPUTS(flags))) {
959 /* MERGE, PHI, CALL, etc */
960 n = insn->inputs_count;
961 }
962 return n;
963 }
964
ir_insn_inputs_to_len(uint32_t inputs_count)965 IR_ALWAYS_INLINE uint32_t ir_insn_inputs_to_len(uint32_t inputs_count)
966 {
967 return 1 + (inputs_count >> 2);
968 }
969
ir_insn_len(const ir_insn * insn)970 IR_ALWAYS_INLINE uint32_t ir_insn_len(const ir_insn *insn)
971 {
972 return ir_insn_inputs_to_len(insn->inputs_count);
973 }
974
975 /*** IR Context Private Flags (ir_ctx->flags2) ***/
976 #define IR_CFG_HAS_LOOPS (1<<0)
977 #define IR_IRREDUCIBLE_CFG (1<<1)
978 #define IR_HAS_ALLOCA (1<<2)
979 #define IR_HAS_CALLS (1<<3)
980 #define IR_OPT_IN_SCCP (1<<4)
981 #define IR_LINEAR (1<<5)
982 #define IR_HAS_VA_START (1<<6)
983 #define IR_HAS_VA_COPY (1<<7)
984 #define IR_HAS_VA_ARG_GP (1<<8)
985 #define IR_HAS_VA_ARG_FP (1<<9)
986 #define IR_HAS_FP_RET_SLOT (1<<10)
987 #define IR_16B_FRAME_ALIGNMENT (1<<11)
988
989 /* Temporary: SCCP -> CFG */
990 #define IR_SCCP_DONE (1<<25)
991 #define IR_CFG_REACHABLE (1<<26)
992
993 /* Temporary: Dominators -> Loops */
994 #define IR_NO_LOOPS (1<<25)
995
996 /* Temporary: Live Ranges */
997 #define IR_LR_HAVE_DESSA_MOVES (1<<25)
998
999 /* Temporary: Register Allocator */
1000 #define IR_RA_HAVE_SPLITS (1<<25)
1001 #define IR_RA_HAVE_SPILLS (1<<26)
1002
1003 #define IR_RESERVED_FLAG_1 (1U<<31)
1004
1005 /*** IR Binding ***/
ir_binding_find(const ir_ctx * ctx,ir_ref ref)1006 IR_ALWAYS_INLINE ir_ref ir_binding_find(const ir_ctx *ctx, ir_ref ref)
1007 {
1008 ir_ref var = ir_hashtab_find(ctx->binding, ref);
1009 return (var != (ir_ref)IR_INVALID_VAL) ? var : 0;
1010 }
1011
1012 /*** IR Use Lists ***/
1013 struct _ir_use_list {
1014 ir_ref refs; /* index in ir_ctx->use_edges[] array */
1015 ir_ref count;
1016 };
1017
1018 void ir_use_list_remove_all(ir_ctx *ctx, ir_ref from, ir_ref use);
1019 void ir_use_list_remove_one(ir_ctx *ctx, ir_ref from, ir_ref use);
1020 void ir_use_list_replace_all(ir_ctx *ctx, ir_ref ref, ir_ref use, ir_ref new_use);
1021 void ir_use_list_replace_one(ir_ctx *ctx, ir_ref ref, ir_ref use, ir_ref new_use);
1022 bool ir_use_list_add(ir_ctx *ctx, ir_ref to, ir_ref new_use);
1023
1024 /*** Modification helpers ***/
1025 #define MAKE_NOP(_insn) do { \
1026 ir_insn *__insn = _insn; \
1027 __insn->optx = IR_NOP; \
1028 __insn->op1 = __insn->op2 = __insn->op3 = IR_UNUSED; \
1029 } while (0)
1030
1031 #define CLEAR_USES(_ref) do { \
1032 ir_use_list *__use_list = &ctx->use_lists[_ref]; \
1033 __use_list->count = 0; \
1034 } while (0)
1035
1036 #define SWAP_REFS(_ref1, _ref2) do { \
1037 ir_ref _tmp = _ref1; \
1038 _ref1 = _ref2; \
1039 _ref2 = _tmp; \
1040 } while (0)
1041
1042 #define SWAP_INSNS(_insn1, _insn2) do { \
1043 ir_insn *_tmp = _insn1; \
1044 _insn1 = _insn2; \
1045 _insn2 = _tmp; \
1046 } while (0)
1047
1048 /*** IR Basic Blocks info ***/
1049 #define IR_IS_BB_START(op) \
1050 ((ir_op_flags[op] & IR_OP_FLAG_BB_START) != 0)
1051
1052 #define IR_IS_BB_MERGE(op) \
1053 ((op) == IR_MERGE || (op) == IR_LOOP_BEGIN)
1054
1055 #define IR_IS_BB_END(op) \
1056 ((ir_op_flags[op] & IR_OP_FLAG_BB_END) != 0)
1057
1058 #define IR_BB_UNREACHABLE (1<<0)
1059 #define IR_BB_START (1<<1)
1060 #define IR_BB_ENTRY (1<<2)
1061 #define IR_BB_LOOP_HEADER (1<<3)
1062 #define IR_BB_IRREDUCIBLE_LOOP (1<<4)
1063 #define IR_BB_DESSA_MOVES (1<<5) /* translation out of SSA requires MOVEs */
1064 #define IR_BB_EMPTY (1<<6)
1065 #define IR_BB_PREV_EMPTY_ENTRY (1<<7)
1066 #define IR_BB_OSR_ENTRY_LOADS (1<<8) /* OSR Entry-point with register LOADs */
1067 #define IR_BB_LOOP_WITH_ENTRY (1<<9) /* set together with LOOP_HEADER if there is an ENTRY in the loop */
1068
1069 /* The following flags are set by GCM */
1070 #define IR_BB_HAS_PHI (1<<10)
1071 #define IR_BB_HAS_PI (1<<11)
1072 #define IR_BB_HAS_PARAM (1<<12)
1073 #define IR_BB_HAS_VAR (1<<13)
1074
1075 /* The following flags are set by BB scheduler */
1076 #define IR_BB_ALIGN_LOOP (1<<14)
1077
1078 struct _ir_block {
1079 uint32_t flags;
1080 ir_ref start; /* index of first instruction */
1081 ir_ref end; /* index of last instruction */
1082 uint32_t successors; /* index in ir_ctx->cfg_edges[] array */
1083 uint32_t successors_count;
1084 uint32_t predecessors; /* index in ir_ctx->cfg_edges[] array */
1085 uint32_t predecessors_count;
1086 union {
1087 uint32_t dom_parent; /* immediate dominator block */
1088 uint32_t idom; /* immediate dominator block */
1089 };
1090 union {
1091 uint32_t dom_depth; /* depth from the root of the dominators tree */
1092 uint32_t postnum; /* used temporary during tree constructon */
1093 };
1094 uint32_t dom_child; /* first dominated blocks */
1095 uint32_t dom_next_child; /* next dominated block (linked list) */
1096 uint32_t loop_header;
1097 uint32_t loop_depth;
1098 };
1099
1100 uint32_t ir_skip_empty_target_blocks(const ir_ctx *ctx, uint32_t b);
1101 uint32_t ir_next_block(const ir_ctx *ctx, uint32_t b);
1102 void ir_get_true_false_blocks(const ir_ctx *ctx, uint32_t b, uint32_t *true_block, uint32_t *false_block);
1103
ir_phi_input_number(const ir_ctx * ctx,const ir_block * bb,uint32_t from)1104 IR_ALWAYS_INLINE uint32_t ir_phi_input_number(const ir_ctx *ctx, const ir_block *bb, uint32_t from)
1105 {
1106 uint32_t n, *p;
1107
1108 for (n = 0, p = &ctx->cfg_edges[bb->predecessors]; n < bb->predecessors_count; p++, n++) {
1109 if (*p == from) {
1110 return n + 2; /* first input is a reference to MERGE */
1111 }
1112 }
1113 IR_ASSERT(0);
1114 return 0;
1115 }
1116
1117 /*** Folding Engine (see ir.c and ir_fold.h) ***/
1118 typedef enum _ir_fold_action {
1119 IR_FOLD_DO_RESTART,
1120 IR_FOLD_DO_CSE,
1121 IR_FOLD_DO_EMIT,
1122 IR_FOLD_DO_COPY,
1123 IR_FOLD_DO_CONST
1124 } ir_fold_action;
1125
1126 ir_ref ir_folding(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3, ir_insn *op1_insn, ir_insn *op2_insn, ir_insn *op3_insn);
1127
1128 /*** IR Live Info ***/
1129 typedef ir_ref ir_live_pos;
1130 typedef struct _ir_use_pos ir_use_pos;
1131
1132 #define IR_SUB_REFS_COUNT 4
1133
1134 #define IR_LOAD_SUB_REF 0
1135 #define IR_USE_SUB_REF 1
1136 #define IR_DEF_SUB_REF 2
1137 #define IR_SAVE_SUB_REF 3
1138
1139 #define IR_LIVE_POS_TO_REF(pos) ((pos) / IR_SUB_REFS_COUNT)
1140 #define IR_LIVE_POS_TO_SUB_REF(pos) ((pos) % IR_SUB_REFS_COUNT)
1141
1142 #define IR_LIVE_POS_FROM_REF(ref) ((ref) * IR_SUB_REFS_COUNT)
1143
1144 #define IR_START_LIVE_POS_FROM_REF(ref) ((ref) * IR_SUB_REFS_COUNT)
1145 #define IR_LOAD_LIVE_POS_FROM_REF(ref) ((ref) * IR_SUB_REFS_COUNT + IR_LOAD_SUB_REF)
1146 #define IR_USE_LIVE_POS_FROM_REF(ref) ((ref) * IR_SUB_REFS_COUNT + IR_USE_SUB_REF)
1147 #define IR_DEF_LIVE_POS_FROM_REF(ref) ((ref) * IR_SUB_REFS_COUNT + IR_DEF_SUB_REF)
1148 #define IR_SAVE_LIVE_POS_FROM_REF(ref) ((ref) * IR_SUB_REFS_COUNT + IR_SAVE_SUB_REF)
1149 #define IR_END_LIVE_POS_FROM_REF(ref) ((ref) * IR_SUB_REFS_COUNT + IR_SUB_REFS_COUNT)
1150
1151 /* ir_use_pos.flags bits */
1152 #define IR_USE_MUST_BE_IN_REG (1<<0)
1153 #define IR_USE_SHOULD_BE_IN_REG (1<<1)
1154 #define IR_DEF_REUSES_OP1_REG (1<<2)
1155 #define IR_DEF_CONFLICTS_WITH_INPUT_REGS (1<<3)
1156
1157 #define IR_FUSED_USE (1<<6)
1158 #define IR_PHI_USE (1<<7)
1159
1160 #define IR_OP1_MUST_BE_IN_REG (1<<8)
1161 #define IR_OP1_SHOULD_BE_IN_REG (1<<9)
1162 #define IR_OP2_MUST_BE_IN_REG (1<<10)
1163 #define IR_OP2_SHOULD_BE_IN_REG (1<<11)
1164 #define IR_OP3_MUST_BE_IN_REG (1<<12)
1165 #define IR_OP3_SHOULD_BE_IN_REG (1<<13)
1166
1167 #define IR_USE_FLAGS(def_flags, op_num) (((def_flags) >> (6 + (IR_MIN((op_num), 3) * 2))) & 3)
1168
1169 struct _ir_use_pos {
1170 uint16_t op_num; /* 0 - means result */
1171 int8_t hint;
1172 uint8_t flags;
1173 ir_ref hint_ref; /* negative references are used for FUSION anf PHI */
1174 ir_live_pos pos;
1175 ir_use_pos *next;
1176 };
1177
1178 struct _ir_live_range {
1179 ir_live_pos start; /* inclusive */
1180 ir_live_pos end; /* exclusive */
1181 ir_live_range *next;
1182 };
1183
1184 /* ir_live_interval.flags bits (two low bits are reserved for temporary register number) */
1185 #define IR_LIVE_INTERVAL_FIXED (1<<0)
1186 #define IR_LIVE_INTERVAL_TEMP (1<<1)
1187 #define IR_LIVE_INTERVAL_HAS_HINT_REGS (1<<2)
1188 #define IR_LIVE_INTERVAL_HAS_HINT_REFS (1<<3)
1189 #define IR_LIVE_INTERVAL_MEM_PARAM (1<<4)
1190 #define IR_LIVE_INTERVAL_MEM_LOAD (1<<5)
1191 #define IR_LIVE_INTERVAL_COALESCED (1<<6)
1192 #define IR_LIVE_INTERVAL_SPILL_SPECIAL (1<<7) /* spill slot is pre-allocated in a special area (see ir_ctx.spill_reserved_base) */
1193 #define IR_LIVE_INTERVAL_SPILLED (1<<8)
1194 #define IR_LIVE_INTERVAL_SPLIT_CHILD (1<<9)
1195
1196 struct _ir_live_interval {
1197 uint8_t type;
1198 int8_t reg;
1199 uint16_t flags;
1200 union {
1201 int32_t vreg;
1202 int32_t tmp_ref;
1203 };
1204 union {
1205 int32_t stack_spill_pos;
1206 ir_ref tmp_op_num;
1207 };
1208 ir_live_pos end; /* end of the last live range (cahce of ival.range.{next->}end) */
1209 ir_live_range range;
1210 ir_live_range *current_range;
1211 ir_use_pos *use_pos;
1212 ir_live_interval *next;
1213 ir_live_interval *list_next; /* linked list of active, inactive or unhandled intervals */
1214 };
1215
1216 typedef int (*emit_copy_t)(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to);
1217
1218 int ir_gen_dessa_moves(ir_ctx *ctx, uint32_t b, emit_copy_t emit_copy);
1219
1220 #if defined(IR_REGSET_64BIT)
1221
1222 /*typedef enum _ir_reg ir_reg;*/
1223 typedef int8_t ir_reg;
1224
1225 /*** Register Sets ***/
1226 #if IR_REGSET_64BIT
1227 typedef uint64_t ir_regset;
1228 #else
1229 typedef uint32_t ir_regset;
1230 #endif
1231
1232 #define IR_REGSET_EMPTY 0
1233
1234 #define IR_REGSET_IS_EMPTY(regset) \
1235 (regset == IR_REGSET_EMPTY)
1236
1237 #define IR_REGSET_IS_SINGLETON(regset) \
1238 (regset && !(regset & (regset - 1)))
1239
1240 #if IR_REGSET_64BIT
1241 # define IR_REGSET(reg) \
1242 (1ull << (reg))
1243 #else
1244 # define IR_REGSET(reg) \
1245 (1u << (reg))
1246 #endif
1247
1248 #if IR_REGSET_64BIT
1249 # define IR_REGSET_INTERVAL(reg1, reg2) \
1250 (((1ull << ((reg2) - (reg1) + 1)) - 1) << (reg1))
1251 #else
1252 # define IR_REGSET_INTERVAL(reg1, reg2) \
1253 (((1u << ((reg2) - (reg1) + 1)) - 1) << (reg1))
1254 #endif
1255
1256 #define IR_REGSET_IN(regset, reg) \
1257 (((regset) & IR_REGSET(reg)) != 0)
1258
1259 #define IR_REGSET_INCL(regset, reg) \
1260 (regset) |= IR_REGSET(reg)
1261
1262 #define IR_REGSET_EXCL(regset, reg) \
1263 (regset) &= ~IR_REGSET(reg)
1264
1265 #define IR_REGSET_UNION(set1, set2) \
1266 ((set1) | (set2))
1267
1268 #define IR_REGSET_INTERSECTION(set1, set2) \
1269 ((set1) & (set2))
1270
1271 #define IR_REGSET_DIFFERENCE(set1, set2) \
1272 ((set1) & ~(set2))
1273
1274 #if IR_REGSET_64BIT
1275 # define IR_REGSET_FIRST(set) ((ir_reg)ir_ntzl(set))
1276 # define ir_REGSET_LAST(set) ((ir_reg)(ir_nlzl(set)(set)^63))
1277 #else
1278 # define IR_REGSET_FIRST(set) ((ir_reg)ir_ntz(set))
1279 # define IR_REGSET_LAST(set) ((ir_reg)(ir_nlz(set)^31))
1280 #endif
1281
ir_regset_pop_first(ir_regset * set)1282 IR_ALWAYS_INLINE ir_reg ir_regset_pop_first(ir_regset *set)
1283 {
1284 ir_reg reg;
1285
1286 IR_ASSERT(!IR_REGSET_IS_EMPTY(*set));
1287 reg = IR_REGSET_FIRST(*set);
1288 *set = (*set) & ((*set) - 1);
1289 return reg;
1290 }
1291
1292 #define IR_REGSET_FOREACH(set, reg) \
1293 do { \
1294 ir_regset _tmp = (set); \
1295 while (!IR_REGSET_IS_EMPTY(_tmp)) { \
1296 reg = ir_regset_pop_first(&_tmp);
1297
1298 #define IR_REGSET_FOREACH_END() \
1299 } \
1300 } while (0)
1301
1302 #endif /* defined(IR_REGSET_64BIT) */
1303
1304 /*** IR Register Allocation ***/
1305 /* Flags for ctx->regs[][] (low bits are used for register number itself) */
1306 typedef struct _ir_reg_alloc_data {
1307 int32_t unused_slot_4;
1308 int32_t unused_slot_2;
1309 int32_t unused_slot_1;
1310 ir_live_interval **handled;
1311 } ir_reg_alloc_data;
1312
1313 int32_t ir_allocate_spill_slot(ir_ctx *ctx, ir_type type, ir_reg_alloc_data *data);
1314
ir_set_alocated_reg(ir_ctx * ctx,ir_ref ref,int op_num,int8_t reg)1315 IR_ALWAYS_INLINE void ir_set_alocated_reg(ir_ctx *ctx, ir_ref ref, int op_num, int8_t reg)
1316 {
1317 int8_t *regs = ctx->regs[ref];
1318
1319 if (op_num > 0) {
1320 /* regs[] is not limited by the declared boundary 4, the real boundary checked below */
1321 IR_ASSERT(op_num <= IR_MAX(3, ctx->ir_base[ref].inputs_count));
1322 }
1323 regs[op_num] = reg;
1324 }
1325
ir_get_alocated_reg(const ir_ctx * ctx,ir_ref ref,int op_num)1326 IR_ALWAYS_INLINE int8_t ir_get_alocated_reg(const ir_ctx *ctx, ir_ref ref, int op_num)
1327 {
1328 int8_t *regs = ctx->regs[ref];
1329
1330 /* regs[] is not limited by the declared boundary 4, the real boundary checked below */
1331 IR_ASSERT(op_num <= IR_MAX(3, ctx->ir_base[ref].inputs_count));
1332 return regs[op_num];
1333 }
1334
1335 /*** IR Target Interface ***/
1336
1337 /* ctx->rules[] flags */
1338 #define IR_FUSED (1U<<31) /* Insn is fused into others (code is generated as part of the fusion root) */
1339 #define IR_SKIPPED (1U<<30) /* Insn is skipped (code is not generated) */
1340 #define IR_SIMPLE (1U<<29) /* Insn doesn't have any target constraints */
1341 #define IR_FUSED_REG (1U<<28) /* Register assignemnt may be stored in ctx->fused_regs instead of ctx->regs */
1342 #define IR_MAY_SWAP (1U<<27) /* Allow swapping operands for better register allocation */
1343 #define IR_MAY_REUSE (1U<<26) /* Result may reuse register of the source */
1344
1345 #define IR_RULE_MASK 0xff
1346
1347 extern const char *ir_rule_name[];
1348
1349 typedef struct _ir_target_constraints ir_target_constraints;
1350
1351 #define IR_TMP_REG(_num, _type, _start, _end) \
1352 (ir_tmp_reg){.num=(_num), .type=(_type), .start=(_start), .end=(_end)}
1353 #define IR_SCRATCH_REG(_reg, _start, _end) \
1354 (ir_tmp_reg){.reg=(_reg), .type=IR_VOID, .start=(_start), .end=(_end)}
1355
1356 int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *constraints);
1357
1358 void ir_fix_stack_frame(ir_ctx *ctx);
1359
1360 /* Utility */
1361 ir_type ir_get_return_type(ir_ctx *ctx);
1362 bool ir_is_fastcall(const ir_ctx *ctx, const ir_insn *insn);
1363 bool ir_is_vararg(const ir_ctx *ctx, ir_insn *insn);
1364
1365 //#define IR_BITSET_LIVENESS
1366
1367 #endif /* IR_PRIVATE_H */
1368