1 /*
2 * IR - Lightweight JIT Compilation Framework
3 * (RA - Register Allocation, Liveness, Coalescing, SSA Deconstruction)
4 * Copyright (C) 2022 Zend by Perforce.
5 * Authors: Dmitry Stogov <dmitry@php.net>
6 *
7 * See: "Linear Scan Register Allocation on SSA Form", Christian Wimmer and
8 * Michael Franz, CGO'10 (2010)
9 * See: "Optimized Interval Splitting in a Linear Scan Register Allocator",
10 * Christian Wimmer VEE'10 (2005)
11 */
12
13 #ifndef _GNU_SOURCE
14 # define _GNU_SOURCE
15 #endif
16
17 #include <stdlib.h>
18 #include "ir.h"
19
20 #if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
21 # include "ir_x86.h"
22 #elif defined(IR_TARGET_AARCH64)
23 # include "ir_aarch64.h"
24 #else
25 # error "Unknown IR target"
26 #endif
27
28 #include "ir_private.h"
29
ir_regs_number(void)30 int ir_regs_number(void)
31 {
32 return IR_REG_NUM;
33 }
34
ir_reg_is_int(int32_t reg)35 bool ir_reg_is_int(int32_t reg)
36 {
37 IR_ASSERT(reg >= 0 && reg < IR_REG_NUM);
38 return reg >= IR_REG_GP_FIRST && reg <= IR_REG_GP_LAST;
39 }
40
ir_assign_virtual_registers_slow(ir_ctx * ctx)41 static int ir_assign_virtual_registers_slow(ir_ctx *ctx)
42 {
43 uint32_t *vregs;
44 uint32_t vregs_count = 0;
45 uint32_t b;
46 ir_ref i, n;
47 ir_block *bb;
48 ir_insn *insn;
49 uint32_t flags;
50
51 /* Assign unique virtual register to each data node */
52 vregs = ir_mem_calloc(ctx->insns_count, sizeof(ir_ref));
53 n = 1;
54 for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) {
55 IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
56 i = bb->start;
57
58 /* skip first instruction */
59 insn = ctx->ir_base + i;
60 n = ir_insn_len(insn);
61 i += n;
62 insn += n;
63 while (i < bb->end) {
64 flags = ir_op_flags[insn->op];
65 if (((flags & IR_OP_FLAG_DATA) && insn->op != IR_VAR && (insn->op != IR_PARAM || ctx->use_lists[i].count > 0))
66 || ((flags & IR_OP_FLAG_MEM) && ctx->use_lists[i].count > 1)) {
67 if (!ctx->rules || !(ctx->rules[i] & (IR_FUSED|IR_SKIPPED))) {
68 vregs[i] = ++vregs_count;
69 }
70 }
71 n = ir_insn_len(insn);
72 i += n;
73 insn += n;
74 }
75 }
76 ctx->vregs_count = vregs_count;
77 ctx->vregs = vregs;
78
79 return 1;
80 }
81
ir_assign_virtual_registers(ir_ctx * ctx)82 int ir_assign_virtual_registers(ir_ctx *ctx)
83 {
84 uint32_t *vregs;
85 uint32_t vregs_count = 0;
86 ir_ref i;
87 ir_insn *insn;
88
89 if (!ctx->rules) {
90 return ir_assign_virtual_registers_slow(ctx);
91 }
92
93 /* Assign unique virtual register to each rule that needs it */
94 vregs = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref));
95
96 for (i = 1, insn = &ctx->ir_base[1]; i < ctx->insns_count; i++, insn++) {
97 uint32_t v = 0;
98
99 if (ctx->rules[i] && !(ctx->rules[i] & (IR_FUSED|IR_SKIPPED))) {
100 uint32_t flags = ir_op_flags[insn->op];
101
102 if ((flags & IR_OP_FLAG_DATA)
103 || ((flags & IR_OP_FLAG_MEM) && ctx->use_lists[i].count > 1)) {
104 v = ++vregs_count;
105 }
106 }
107 vregs[i] = v;
108 }
109
110 ctx->vregs_count = vregs_count;
111 ctx->vregs = vregs;
112
113 return 1;
114 }
115
116 /* Lifetime intervals construction */
117
ir_new_live_range(ir_ctx * ctx,int v,ir_live_pos start,ir_live_pos end)118 static ir_live_interval *ir_new_live_range(ir_ctx *ctx, int v, ir_live_pos start, ir_live_pos end)
119 {
120 ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval));
121
122 ival->type = IR_VOID;
123 ival->reg = IR_REG_NONE;
124 ival->flags = 0;
125 ival->vreg = v;
126 ival->stack_spill_pos = -1; // not allocated
127 ival->range.start = start;
128 ival->range.end = ival->end = end;
129 ival->range.next = NULL;
130 ival->use_pos = NULL;
131 ival->next = NULL;
132
133 ctx->live_intervals[v] = ival;
134 return ival;
135 }
136
ir_add_live_range(ir_ctx * ctx,int v,ir_live_pos start,ir_live_pos end)137 static ir_live_interval *ir_add_live_range(ir_ctx *ctx, int v, ir_live_pos start, ir_live_pos end)
138 {
139 ir_live_interval *ival = ctx->live_intervals[v];
140 ir_live_range *p, *q;
141
142 if (!ival) {
143 return ir_new_live_range(ctx, v, start, end);
144 }
145
146 p = &ival->range;
147 if (end >= p->start) {
148 ir_live_range *prev = NULL;
149
150 do {
151 if (p->end >= start) {
152 if (start < p->start) {
153 p->start = start;
154 }
155 if (end > p->end) {
156 /* merge with next */
157 ir_live_range *next = p->next;
158
159 p->end = end;
160 while (next && p->end >= next->start) {
161 if (next->end > p->end) {
162 p->end = next->end;
163 }
164 p->next = next->next;
165 /* remember in the "unused_ranges" list */
166 next->next = ctx->unused_ranges;
167 ctx->unused_ranges = next;
168 next = p->next;
169 }
170 if (!p->next) {
171 ival->end = p->end;
172 }
173 }
174 return ival;
175 }
176 prev = p;
177 p = prev->next;
178 } while (p && end >= p->start);
179 if (!p) {
180 ival->end = end;
181 }
182 if (prev) {
183 if (ctx->unused_ranges) {
184 /* reuse */
185 q = ctx->unused_ranges;
186 ctx->unused_ranges = q->next;
187 } else {
188 q = ir_arena_alloc(&ctx->arena, sizeof(ir_live_range));
189 }
190 prev->next = q;
191 q->start = start;
192 q->end = end;
193 q->next = p;
194 return ival;
195 }
196 }
197
198 if (ctx->unused_ranges) {
199 /* reuse */
200 q = ctx->unused_ranges;
201 ctx->unused_ranges = q->next;
202 } else {
203 q = ir_arena_alloc(&ctx->arena, sizeof(ir_live_range));
204 }
205 q->start = p->start;
206 q->end = p->end;
207 q->next = p->next;
208 p->start = start;
209 p->end = end;
210 p->next = q;
211 return ival;
212 }
213
ir_add_prev_live_range(ir_ctx * ctx,int v,ir_live_pos start,ir_live_pos end)214 IR_ALWAYS_INLINE ir_live_interval *ir_add_prev_live_range(ir_ctx *ctx, int v, ir_live_pos start, ir_live_pos end)
215 {
216 ir_live_interval *ival = ctx->live_intervals[v];
217
218 if (ival && ival->range.start == end) {
219 ival->range.start = start;
220 return ival;
221 }
222 return ir_add_live_range(ctx, v, start, end);
223 }
224
ir_add_fixed_live_range(ir_ctx * ctx,ir_reg reg,ir_live_pos start,ir_live_pos end)225 static void ir_add_fixed_live_range(ir_ctx *ctx, ir_reg reg, ir_live_pos start, ir_live_pos end)
226 {
227 int v = ctx->vregs_count + 1 + reg;
228 ir_live_interval *ival = ctx->live_intervals[v];
229 ir_live_range *q;
230
231 if (!ival) {
232 ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval));
233 ival->type = IR_VOID;
234 ival->reg = reg;
235 ival->flags = IR_LIVE_INTERVAL_FIXED;
236 ival->vreg = v;
237 ival->stack_spill_pos = -1; // not allocated
238 ival->range.start = start;
239 ival->range.end = ival->end = end;
240 ival->range.next = NULL;
241 ival->use_pos = NULL;
242 ival->next = NULL;
243
244 ctx->live_intervals[v] = ival;
245 } else if (EXPECTED(end < ival->range.start)) {
246 if (ctx->unused_ranges) {
247 /* reuse */
248 q = ctx->unused_ranges;
249 ctx->unused_ranges = q->next;
250 } else {
251 q = ir_arena_alloc(&ctx->arena, sizeof(ir_live_range));
252 }
253
254 q->start = ival->range.start;
255 q->end = ival->range.end;
256 q->next = ival->range.next;
257 ival->range.start = start;
258 ival->range.end = end;
259 ival->range.next = q;
260 } else if (end == ival->range.start) {
261 ival->range.start = start;
262 } else {
263 ir_add_live_range(ctx, v, start, end);
264 }
265 }
266
ir_add_tmp(ir_ctx * ctx,ir_ref ref,ir_ref tmp_ref,int32_t tmp_op_num,ir_tmp_reg tmp_reg)267 static void ir_add_tmp(ir_ctx *ctx, ir_ref ref, ir_ref tmp_ref, int32_t tmp_op_num, ir_tmp_reg tmp_reg)
268 {
269 ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval));
270
271 ival->type = tmp_reg.type;
272 ival->reg = IR_REG_NONE;
273 ival->flags = IR_LIVE_INTERVAL_TEMP;
274 ival->tmp_ref = tmp_ref;
275 ival->tmp_op_num = tmp_op_num;
276 ival->range.start = IR_START_LIVE_POS_FROM_REF(ref) + tmp_reg.start;
277 ival->range.end = ival->end = IR_START_LIVE_POS_FROM_REF(ref) + tmp_reg.end;
278 ival->range.next = NULL;
279 ival->use_pos = NULL;
280
281 if (!ctx->live_intervals[0]) {
282 ival->next = NULL;
283 ctx->live_intervals[0] = ival;
284 } else if (ival->range.start >= ctx->live_intervals[0]->range.start) {
285 ir_live_interval *prev = ctx->live_intervals[0];
286
287 while (prev->next && ival->range.start >= prev->next->range.start) {
288 prev = prev->next;
289 }
290 ival->next = prev->next;
291 prev->next = ival;
292 } else {
293 ir_live_interval *next = ctx->live_intervals[0];
294
295 ival->next = next;
296 ctx->live_intervals[0] = ival;
297 }
298 return;
299 }
300
ir_has_tmp(ir_ctx * ctx,ir_ref ref,int32_t op_num)301 static bool ir_has_tmp(ir_ctx *ctx, ir_ref ref, int32_t op_num)
302 {
303 ir_live_interval *ival = ctx->live_intervals[0];
304
305 if (ival) {
306 while (ival && IR_LIVE_POS_TO_REF(ival->range.start) <= ref) {
307 if (ival->tmp_ref == ref && ival->tmp_op_num == op_num) {
308 return 1;
309 }
310 ival = ival->next;
311 }
312 }
313 return 0;
314 }
315
ir_fix_live_range(ir_ctx * ctx,int v,ir_live_pos old_start,ir_live_pos new_start)316 static ir_live_interval *ir_fix_live_range(ir_ctx *ctx, int v, ir_live_pos old_start, ir_live_pos new_start)
317 {
318 ir_live_interval *ival = ctx->live_intervals[v];
319 ir_live_range *p = &ival->range;
320
321 #if 0
322 while (p && p->start < old_start) {
323 p = p->next;
324 }
325 #endif
326 IR_ASSERT(ival && p->start == old_start);
327 p->start = new_start;
328 return ival;
329 }
330
ir_add_use_pos(ir_ctx * ctx,ir_live_interval * ival,ir_use_pos * use_pos)331 static void ir_add_use_pos(ir_ctx *ctx, ir_live_interval *ival, ir_use_pos *use_pos)
332 {
333 ir_use_pos *p = ival->use_pos;
334
335 if (EXPECTED(!p || p->pos > use_pos->pos)) {
336 use_pos->next = p;
337 ival->use_pos = use_pos;
338 } else {
339 ir_use_pos *prev;
340
341 do {
342 prev = p;
343 p = p->next;
344 } while (p && p->pos < use_pos->pos);
345
346 use_pos->next = prev->next;
347 prev->next = use_pos;
348 }
349 }
350
351
ir_add_use(ir_ctx * ctx,ir_live_interval * ival,int op_num,ir_live_pos pos,ir_reg hint,uint8_t use_flags,ir_ref hint_ref)352 IR_ALWAYS_INLINE void ir_add_use(ir_ctx *ctx, ir_live_interval *ival, int op_num, ir_live_pos pos, ir_reg hint, uint8_t use_flags, ir_ref hint_ref)
353 {
354 ir_use_pos *use_pos;
355
356 use_pos = ir_arena_alloc(&ctx->arena, sizeof(ir_use_pos));
357 use_pos->op_num = op_num;
358 use_pos->hint = hint;
359 use_pos->flags = use_flags;
360 use_pos->hint_ref = hint_ref;
361 use_pos->pos = pos;
362
363 if (hint != IR_REG_NONE) {
364 ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REGS;
365 }
366 if (hint_ref > 0) {
367 ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REFS;
368 }
369
370 ir_add_use_pos(ctx, ival, use_pos);
371 }
372
ir_add_phi_use(ir_ctx * ctx,ir_live_interval * ival,int op_num,ir_live_pos pos,ir_ref phi_ref)373 static void ir_add_phi_use(ir_ctx *ctx, ir_live_interval *ival, int op_num, ir_live_pos pos, ir_ref phi_ref)
374 {
375 ir_use_pos *use_pos;
376
377 IR_ASSERT(phi_ref > 0);
378 use_pos = ir_arena_alloc(&ctx->arena, sizeof(ir_use_pos));
379 use_pos->op_num = op_num;
380 use_pos->hint = IR_REG_NONE;
381 use_pos->flags = IR_PHI_USE | IR_USE_SHOULD_BE_IN_REG; // TODO: ???
382 use_pos->hint_ref = -phi_ref;
383 use_pos->pos = pos;
384
385 ir_add_use_pos(ctx, ival, use_pos);
386 }
387
ir_add_hint(ir_ctx * ctx,ir_ref ref,ir_live_pos pos,ir_reg hint)388 static void ir_add_hint(ir_ctx *ctx, ir_ref ref, ir_live_pos pos, ir_reg hint)
389 {
390 ir_live_interval *ival = ctx->live_intervals[ctx->vregs[ref]];
391
392 if (!(ival->flags & IR_LIVE_INTERVAL_HAS_HINT_REGS)) {
393 ir_use_pos *use_pos = ival->use_pos;
394
395 while (use_pos) {
396 if (use_pos->pos == pos) {
397 if (use_pos->hint == IR_REG_NONE) {
398 use_pos->hint = hint;
399 ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REGS;
400 }
401 }
402 use_pos = use_pos->next;
403 }
404 }
405 }
406
ir_hint_propagation(ir_ctx * ctx)407 static void ir_hint_propagation(ir_ctx *ctx)
408 {
409 int i;
410 ir_live_interval *ival;
411 ir_use_pos *use_pos;
412 ir_use_pos *hint_use_pos;
413
414 for (i = ctx->vregs_count; i > 0; i--) {
415 ival = ctx->live_intervals[i];
416 if (ival
417 && (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)) == (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)) {
418 use_pos = ival->use_pos;
419 hint_use_pos = NULL;
420 while (use_pos) {
421 if (use_pos->op_num == 0) {
422 if (use_pos->hint_ref > 0) {
423 hint_use_pos = use_pos;
424 }
425 } else if (use_pos->hint != IR_REG_NONE) {
426 if (hint_use_pos) {
427 ir_add_hint(ctx, hint_use_pos->hint_ref, hint_use_pos->pos, use_pos->hint);
428 hint_use_pos = NULL;
429 }
430 }
431 use_pos = use_pos->next;
432 }
433 }
434 }
435 }
436
437 #ifdef IR_BITSET_LIVENESS
438 /* DFS + Loop-Forest livness for SSA using bitset(s) */
ir_add_osr_entry_loads(ir_ctx * ctx,ir_block * bb,ir_bitset live,uint32_t len,uint32_t b)439 static void ir_add_osr_entry_loads(ir_ctx *ctx, ir_block *bb, ir_bitset live, uint32_t len, uint32_t b)
440 {
441 bool ok = 1;
442 int count = 0;
443 ir_list *list = (ir_list*)ctx->osr_entry_loads;
444 ir_ref i;
445
446 IR_BITSET_FOREACH(live, len, i) {
447 /* Skip live references from ENTRY to PARAM. TODO: duplicate PARAM in each ENTRY ??? */
448 ir_use_pos *use_pos = ctx->live_intervals[i]->use_pos;
449 ir_ref ref = (use_pos->hint_ref < 0) ? -use_pos->hint_ref : IR_LIVE_POS_TO_REF(use_pos->pos);
450
451 if (use_pos->op_num) {
452 ir_ref *ops = ctx->ir_base[ref].ops;
453 ref = ops[use_pos->op_num];
454 }
455
456 if (ctx->ir_base[ref].op == IR_PARAM) {
457 continue;
458 }
459 if (ctx->binding) {
460 ir_ref var = ir_binding_find(ctx, ref);
461 if (var < 0) {
462 /* We may load the value at OSR entry-point */
463 if (!count) {
464 bb->flags &= ~IR_BB_EMPTY;
465 bb->flags |= IR_BB_OSR_ENTRY_LOADS;
466 if (!ctx->osr_entry_loads) {
467 list = ctx->osr_entry_loads = ir_mem_malloc(sizeof(ir_list));
468 ir_list_init(list, 16);
469 }
470 ir_list_push(list, b);
471 ir_list_push(list, 0);
472 }
473 ir_list_push(list, ref);
474 count++;
475 continue;
476 }
477 }
478 fprintf(stderr, "ENTRY %d (block %d start %d) - live var %d\n", ctx->ir_base[bb->start].op2, b, bb->start, ref);
479 ok = 0;
480 } IR_BITSET_FOREACH_END();
481
482 if (!ok) {
483 IR_ASSERT(0);
484 }
485 if (count) {
486 ir_list_set(list, ir_list_len(ctx->osr_entry_loads) - (count + 1), count);
487
488 #if 0
489 /* ENTRY "clobbers" all registers */
490 ir_ref ref = ctx->ir_base[bb->start].op1;
491 ir_add_fixed_live_range(ctx, IR_REG_ALL,
492 IR_DEF_LIVE_POS_FROM_REF(ref),
493 IR_SAVE_LIVE_POS_FROM_REF(ref));
494 #endif
495 }
496 }
497
ir_add_fusion_ranges(ir_ctx * ctx,ir_ref ref,ir_ref input,ir_block * bb,ir_bitset live)498 static void ir_add_fusion_ranges(ir_ctx *ctx, ir_ref ref, ir_ref input, ir_block *bb, ir_bitset live)
499 {
500 ir_ref stack[4];
501 int stack_pos = 0;
502 ir_target_constraints constraints;
503 ir_insn *insn;
504 uint32_t j, n, flags, def_flags;
505 ir_ref *p, child;
506 uint8_t use_flags;
507 ir_reg reg;
508 ir_live_pos use_pos;
509 ir_live_interval *ival;
510
511 while (1) {
512 IR_ASSERT(input > 0 && ctx->rules[input] & IR_FUSED);
513
514 if (!(ctx->rules[input] & IR_SIMPLE)) {
515 def_flags = ir_get_target_constraints(ctx, input, &constraints);
516 n = constraints.tmps_count;
517 while (n > 0) {
518 n--;
519 if (constraints.tmp_regs[n].type) {
520 ir_add_tmp(ctx, ref, input, constraints.tmp_regs[n].num, constraints.tmp_regs[n]);
521 } else {
522 /* CPU specific constraints */
523 ir_add_fixed_live_range(ctx, constraints.tmp_regs[n].reg,
524 IR_START_LIVE_POS_FROM_REF(ref) + constraints.tmp_regs[n].start,
525 IR_START_LIVE_POS_FROM_REF(ref) + constraints.tmp_regs[n].end);
526 }
527 }
528 } else {
529 def_flags = IR_OP1_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG;
530 constraints.hints_count = 0;
531 }
532
533 insn = &ctx->ir_base[input];
534 flags = ir_op_flags[insn->op];
535 n = IR_INPUT_EDGES_COUNT(flags);
536 j = 1;
537 p = insn->ops + j;
538 if (flags & IR_OP_FLAG_CONTROL) {
539 j++;
540 p++;
541 }
542 for (; j <= n; j++, p++) {
543 IR_ASSERT(IR_OPND_KIND(flags, j) == IR_OPND_DATA);
544 child = *p;
545 if (child > 0) {
546 uint32_t v = ctx->vregs[child];
547
548 if (v) {
549 use_flags = IR_FUSED_USE | IR_USE_FLAGS(def_flags, j);
550 reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE;
551 use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
552 if (EXPECTED(reg == IR_REG_NONE)) {
553 use_pos += IR_USE_SUB_REF;
554 }
555
556 if (!ir_bitset_in(live, v)) {
557 /* live.add(opd) */
558 ir_bitset_incl(live, v);
559 /* intervals[opd].addRange(b.from, op.id) */
560 ival = ir_add_live_range(ctx, v,
561 IR_START_LIVE_POS_FROM_REF(bb->start), use_pos);
562 } else {
563 ival = ctx->live_intervals[v];
564 }
565 ir_add_use(ctx, ival, j, use_pos, reg, use_flags, -input);
566 } else if (ctx->rules[child] & IR_FUSED) {
567 IR_ASSERT(stack_pos < (int)(sizeof(stack)/sizeof(stack_pos)));
568 stack[stack_pos++] = child;
569 } else if (ctx->rules[child] == (IR_SKIPPED|IR_RLOAD)) {
570 ir_set_alocated_reg(ctx, input, j, ctx->ir_base[child].op2);
571 }
572 }
573 }
574 if (!stack_pos) {
575 break;
576 }
577 input = stack[--stack_pos];
578 }
579 }
580
ir_compute_live_ranges(ir_ctx * ctx)581 int ir_compute_live_ranges(ir_ctx *ctx)
582 {
583 uint32_t b, i, j, k, n, succ, *p;
584 ir_ref ref;
585 uint32_t len;
586 ir_insn *insn;
587 ir_block *bb, *succ_bb;
588 #ifdef IR_DEBUG
589 ir_bitset visited;
590 #endif
591 ir_bitset live, bb_live;
592 ir_bitset loops = NULL;
593 ir_bitqueue queue;
594 ir_live_interval *ival;
595
596 if (!(ctx->flags2 & IR_LINEAR) || !ctx->vregs) {
597 return 0;
598 }
599
600 if (ctx->rules) {
601 ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count);
602 memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count);
603 }
604
605 /* Root of the list of IR_VARs */
606 ctx->vars = IR_UNUSED;
607
608 /* Compute Live Ranges */
609 ctx->flags2 &= ~IR_LR_HAVE_DESSA_MOVES;
610 len = ir_bitset_len(ctx->vregs_count + 1);
611 bb_live = ir_mem_malloc((ctx->cfg_blocks_count + 1) * len * sizeof(ir_bitset_base_t));
612
613 /* vregs + tmp + fixed + SRATCH + ALL */
614 ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 2, sizeof(ir_live_interval*));
615
616 #ifdef IR_DEBUG
617 visited = ir_bitset_malloc(ctx->cfg_blocks_count + 1);
618 #endif
619
620 if (!ctx->arena) {
621 ctx->arena = ir_arena_create(16 * 1024);
622 }
623
624 /* for each basic block in reverse order */
625 for (b = ctx->cfg_blocks_count; b > 0; b--) {
626 bb = &ctx->cfg_blocks[b];
627 IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
628 /* for each successor of b */
629
630 #ifdef IR_DEBUG
631 ir_bitset_incl(visited, b);
632 #endif
633 live = bb_live + (len * b);
634 n = bb->successors_count;
635 if (n == 0) {
636 ir_bitset_clear(live, len);
637 } else {
638 p = &ctx->cfg_edges[bb->successors];
639 succ = *p;
640
641 #ifdef IR_DEBUG
642 /* blocks must be ordered where all dominators of a block are before this block */
643 IR_ASSERT(ir_bitset_in(visited, succ) || bb->loop_header == succ);
644 #endif
645
646 /* live = union of successors.liveIn */
647 if (EXPECTED(succ > b) && EXPECTED(!(ctx->cfg_blocks[succ].flags & IR_BB_ENTRY))) {
648 ir_bitset_copy(live, bb_live + (len * succ), len);
649 } else {
650 IR_ASSERT(succ > b || (ctx->cfg_blocks[succ].flags & IR_BB_LOOP_HEADER));
651 ir_bitset_clear(live, len);
652 }
653 if (n > 1) {
654 for (p++, n--; n > 0; p++, n--) {
655 succ = *p;
656 if (EXPECTED(succ > b) && EXPECTED(!(ctx->cfg_blocks[succ].flags & IR_BB_ENTRY))) {
657 ir_bitset_union(live, bb_live + (len * succ), len);
658 } else {
659 IR_ASSERT(succ > b || (ctx->cfg_blocks[succ].flags & IR_BB_LOOP_HEADER));
660 }
661 }
662 }
663
664 /* for each opd in live */
665 IR_BITSET_FOREACH(live, len, i) {
666 /* intervals[opd].addRange(b.from, b.to) */
667 ir_add_prev_live_range(ctx, i,
668 IR_START_LIVE_POS_FROM_REF(bb->start),
669 IR_END_LIVE_POS_FROM_REF(bb->end));
670 } IR_BITSET_FOREACH_END();
671 }
672
673 if (bb->successors_count == 1) {
674 /* for each phi function phi of successor */
675 succ = ctx->cfg_edges[bb->successors];
676 succ_bb = &ctx->cfg_blocks[succ];
677 if (succ_bb->flags & IR_BB_HAS_PHI) {
678 ir_use_list *use_list = &ctx->use_lists[succ_bb->start];
679
680 k = ir_phi_input_number(ctx, succ_bb, b);
681 IR_ASSERT(k != 0);
682 for (ref = 0; ref < use_list->count; ref++) {
683 ir_ref use = ctx->use_edges[use_list->refs + ref];
684 insn = &ctx->ir_base[use];
685 if (insn->op == IR_PHI) {
686 ir_ref input = ir_insn_op(insn, k);
687 if (input > 0) {
688 uint32_t v = ctx->vregs[input];
689
690 /* live.add(phi.inputOf(b)) */
691 IR_ASSERT(v);
692 ir_bitset_incl(live, v);
693 /* intervals[phi.inputOf(b)].addRange(b.from, b.to) */
694 ival = ir_add_prev_live_range(ctx, v,
695 IR_START_LIVE_POS_FROM_REF(bb->start),
696 IR_END_LIVE_POS_FROM_REF(bb->end));
697 ir_add_phi_use(ctx, ival, k, IR_DEF_LIVE_POS_FROM_REF(bb->end), use);
698 }
699 }
700 }
701 }
702 }
703
704 /* for each operation op of b in reverse order */
705 ref = bb->end;
706 insn = &ctx->ir_base[ref];
707 if (insn->op == IR_END || insn->op == IR_LOOP_END) {
708 ref = ctx->prev_ref[ref];
709 }
710 for (; ref > bb->start; ref = ctx->prev_ref[ref]) {
711 uint32_t def_flags;
712 uint32_t flags;
713 ir_ref *p;
714 ir_target_constraints constraints;
715 uint32_t v;
716
717 if (ctx->rules) {
718 int n;
719
720 if (ctx->rules[ref] & (IR_FUSED|IR_SKIPPED)) {
721 if (((ctx->rules[ref] & IR_RULE_MASK) == IR_VAR
722 || (ctx->rules[ref] & IR_RULE_MASK) == IR_ALLOCA)
723 && ctx->use_lists[ref].count > 0) {
724 insn = &ctx->ir_base[ref];
725 if (insn->op != IR_VADDR) {
726 insn->op3 = ctx->vars;
727 ctx->vars = ref;
728 }
729 }
730 continue;
731 }
732
733 def_flags = ir_get_target_constraints(ctx, ref, &constraints);
734 n = constraints.tmps_count;
735 while (n > 0) {
736 n--;
737 if (constraints.tmp_regs[n].type) {
738 ir_add_tmp(ctx, ref, ref, constraints.tmp_regs[n].num, constraints.tmp_regs[n]);
739 } else {
740 /* CPU specific constraints */
741 ir_add_fixed_live_range(ctx, constraints.tmp_regs[n].reg,
742 IR_START_LIVE_POS_FROM_REF(ref) + constraints.tmp_regs[n].start,
743 IR_START_LIVE_POS_FROM_REF(ref) + constraints.tmp_regs[n].end);
744 }
745 }
746 } else {
747 def_flags = 0;
748 constraints.def_reg = IR_REG_NONE;
749 constraints.hints_count = 0;
750 }
751
752 insn = &ctx->ir_base[ref];
753 v = ctx->vregs[ref];
754 if (v) {
755 IR_ASSERT(ir_bitset_in(live, v));
756
757 if (insn->op != IR_PHI) {
758 ir_live_pos def_pos;
759 ir_ref hint_ref = 0;
760 ir_reg reg = constraints.def_reg;
761
762 if (reg != IR_REG_NONE) {
763 def_pos = IR_SAVE_LIVE_POS_FROM_REF(ref);
764 if (insn->op == IR_PARAM || insn->op == IR_RLOAD) {
765 /* parameter register must be kept before it's copied */
766 ir_add_fixed_live_range(ctx, reg, IR_START_LIVE_POS_FROM_REF(bb->start), def_pos);
767 }
768 } else if (def_flags & IR_DEF_REUSES_OP1_REG) {
769 if (!IR_IS_CONST_REF(insn->op1) && ctx->vregs[insn->op1]) {
770 hint_ref = insn->op1;
771 }
772 def_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
773 } else if (def_flags & IR_DEF_CONFLICTS_WITH_INPUT_REGS) {
774 def_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
775 } else {
776 if (insn->op == IR_PARAM) {
777 /* We may reuse parameter stack slot for spilling */
778 ctx->live_intervals[v]->flags |= IR_LIVE_INTERVAL_MEM_PARAM;
779 } else if (insn->op == IR_VLOAD) {
780 /* Load may be fused into the usage instruction */
781 ctx->live_intervals[v]->flags |= IR_LIVE_INTERVAL_MEM_LOAD;
782 }
783 def_pos = IR_DEF_LIVE_POS_FROM_REF(ref);
784 }
785 /* live.remove(opd) */
786 ir_bitset_excl(live, v);
787 /* intervals[opd].setFrom(op.id) */
788 ival = ir_fix_live_range(ctx, v,
789 IR_START_LIVE_POS_FROM_REF(bb->start), def_pos);
790 ival->type = insn->type;
791 ir_add_use(ctx, ival, 0, def_pos, reg, def_flags, hint_ref);
792 } else {
793 /* live.remove(opd) */
794 ir_bitset_excl(live, v);
795 /* PHIs inputs must not be processed */
796 ival = ctx->live_intervals[v];
797 if (UNEXPECTED(!ival)) {
798 /* Dead PHI */
799 ival = ir_add_live_range(ctx, v, IR_DEF_LIVE_POS_FROM_REF(ref), IR_USE_LIVE_POS_FROM_REF(ref));
800 }
801 ival->type = insn->type;
802 ir_add_use(ctx, ival, 0, IR_DEF_LIVE_POS_FROM_REF(ref), IR_REG_NONE, IR_USE_SHOULD_BE_IN_REG, 0);
803 continue;
804 }
805 }
806
807 IR_ASSERT(insn->op != IR_PHI && (!ctx->rules || !(ctx->rules[ref] & (IR_FUSED|IR_SKIPPED))));
808 flags = ir_op_flags[insn->op];
809 j = 1;
810 p = insn->ops + 1;
811 if (flags & (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_PINNED)) {
812 j++;
813 p++;
814 }
815 for (; j <= insn->inputs_count; j++, p++) {
816 ir_ref input = *p;
817 ir_reg reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE;
818 ir_live_pos use_pos;
819 ir_ref hint_ref = 0;
820 uint32_t v;
821
822 if (input > 0) {
823 v = ctx->vregs[input];
824 if (v) {
825 use_pos = IR_USE_LIVE_POS_FROM_REF(ref);
826 if (reg != IR_REG_NONE) {
827 use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
828 ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF);
829 } else if (def_flags & IR_DEF_REUSES_OP1_REG) {
830 if (j == 1) {
831 use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
832 IR_ASSERT(ctx->vregs[ref]);
833 hint_ref = ref;
834 } else if (input == insn->op1) {
835 /* Input is the same as "op1" */
836 use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
837 }
838 }
839 if (!ir_bitset_in(live, v)) {
840 /* live.add(opd) */
841 ir_bitset_incl(live, v);
842 /* intervals[opd].addRange(b.from, op.id) */
843 ival = ir_add_live_range(ctx, v, IR_START_LIVE_POS_FROM_REF(bb->start), use_pos);
844 } else {
845 ival = ctx->live_intervals[v];
846 }
847 ir_add_use(ctx, ival, j, use_pos, reg, IR_USE_FLAGS(def_flags, j), hint_ref);
848 } else if (ctx->rules) {
849 if (ctx->rules[input] & IR_FUSED) {
850 ir_add_fusion_ranges(ctx, ref, input, bb, live);
851 } else if (ctx->rules[input] == (IR_SKIPPED|IR_RLOAD)) {
852 ir_set_alocated_reg(ctx, ref, j, ctx->ir_base[input].op2);
853 }
854 }
855 } else if (reg != IR_REG_NONE) {
856 use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
857 ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF);
858 }
859 }
860 }
861
862 /* if b is loop header */
863 if ((bb->flags & IR_BB_LOOP_HEADER)
864 && !ir_bitset_empty(live, len)) {
865 /* variables live at loop header are alive at the whole loop body */
866 uint32_t bb_set_len = ir_bitset_len(ctx->cfg_blocks_count + 1);
867 uint32_t child;
868 ir_block *child_bb;
869 ir_bitset child_live_in;
870
871 if (!loops) {
872 loops = ir_bitset_malloc(ctx->cfg_blocks_count + 1);
873 ir_bitqueue_init(&queue, ctx->cfg_blocks_count + 1);
874 } else {
875 ir_bitset_clear(loops, bb_set_len);
876 ir_bitqueue_clear(&queue);
877 }
878 ir_bitset_incl(loops, b);
879 child = b;
880 do {
881 child_bb = &ctx->cfg_blocks[child];
882 child_live_in = bb_live + (len * child);
883
884 IR_BITSET_FOREACH(live, len, i) {
885 ir_bitset_incl(child_live_in, i);
886 ir_add_live_range(ctx, i,
887 IR_START_LIVE_POS_FROM_REF(child_bb->start),
888 IR_END_LIVE_POS_FROM_REF(child_bb->end));
889 } IR_BITSET_FOREACH_END();
890
891 child = child_bb->dom_child;
892 while (child) {
893 child_bb = &ctx->cfg_blocks[child];
894 if (child_bb->loop_header && ir_bitset_in(loops, child_bb->loop_header)) {
895 ir_bitqueue_add(&queue, child);
896 if (child_bb->flags & IR_BB_LOOP_HEADER) {
897 ir_bitset_incl(loops, child);
898 }
899 }
900 child = child_bb->dom_next_child;
901 }
902 } while ((child = ir_bitqueue_pop(&queue)) != (uint32_t)-1);
903 }
904 }
905
906 if (ctx->entries) {
907 for (i = 0; i < ctx->entries_count; i++) {
908 b = ctx->entries[i];
909 bb = &ctx->cfg_blocks[b];
910 live = bb_live + (len * b);
911 ir_add_osr_entry_loads(ctx, bb, live, len, b);
912 }
913 if (ctx->osr_entry_loads) {
914 ir_list_push((ir_list*)ctx->osr_entry_loads, 0);
915 }
916 }
917
918 if (loops) {
919 ir_mem_free(loops);
920 ir_bitqueue_free(&queue);
921 }
922
923 ir_mem_free(bb_live);
924 #ifdef IR_DEBUG
925 ir_mem_free(visited);
926 #endif
927
928 return 1;
929 }
930
931 #else
932 /* Path exploration by definition liveness for SSA using sets represented by linked lists */
933
934 #define IS_LIVE_IN_BLOCK(v, b) \
935 (live_in_block[v] == b)
936 #define SET_LIVE_IN_BLOCK(v, b) do { \
937 live_in_block[v] = b; \
938 } while (0)
939
940 /* Returns the last virtual register alive at the end of the block (it is used as an already-visited marker) */
ir_live_out_top(ir_ctx * ctx,uint32_t * live_outs,ir_list * live_lists,uint32_t b)941 IR_ALWAYS_INLINE uint32_t ir_live_out_top(ir_ctx *ctx, uint32_t *live_outs, ir_list *live_lists, uint32_t b)
942 {
943 #if 0
944 return live_outs[b];
945 #else
946 if (!live_outs[b]) {
947 return -1;
948 }
949 return ir_list_at(live_lists, live_outs[b]);
950 #endif
951 }
952
953 /* Remember a virtual register alive at the end of the block */
ir_live_out_push(ir_ctx * ctx,uint32_t * live_outs,ir_list * live_lists,uint32_t b,uint32_t v)954 IR_ALWAYS_INLINE void ir_live_out_push(ir_ctx *ctx, uint32_t *live_outs, ir_list *live_lists, uint32_t b, uint32_t v)
955 {
956 #if 0
957 ir_block *bb = &ctx->cfg_blocks[b];
958 live_outs[b] = v;
959 ir_add_prev_live_range(ctx, v,
960 IR_START_LIVE_POS_FROM_REF(bb->start),
961 IR_END_LIVE_POS_FROM_REF(bb->end));
962 #else
963 if (live_lists->len >= live_lists->a.size) {
964 ir_array_grow(&live_lists->a, live_lists->a.size + 1024);
965 }
966 /* Form a linked list of virtual register live at the end of the block */
967 ir_list_push_unchecked(live_lists, live_outs[b]); /* push old root of the list (previous element of the list) */
968 live_outs[b] = ir_list_len(live_lists); /* remember the new root */
969 ir_list_push_unchecked(live_lists, v); /* push a virtual register */
970 #endif
971 }
972
973 /*
974 * Computes live-out sets for each basic-block per variable using def-use chains.
975 *
976 * The implementation is based on algorithms 6 and 7 desriebed in
977 * "Computing Liveness Sets for SSA-Form Programs", Florian Brandner, Benoit Boissinot.
978 * Alain Darte, Benoit Dupont de Dinechin, Fabrice Rastello. TR Inria RR-7503, 2011
979 */
ir_compute_live_sets(ir_ctx * ctx,uint32_t * live_outs,ir_list * live_lists)980 static void ir_compute_live_sets(ir_ctx *ctx, uint32_t *live_outs, ir_list *live_lists)
981 {
982 ir_list block_queue, fuse_queue;
983 ir_ref i;
984
985 ir_list_init(&fuse_queue, 16);
986 ir_list_init(&block_queue, 256);
987
988 /* For each virtual register explore paths from all uses to definition */
989 for (i = ctx->insns_count - 1; i > 0; i--) {
990 uint32_t v = ctx->vregs[i];
991
992 if (v) {
993 uint32_t def_block = ctx->cfg_map[i];
994 ir_use_list *use_list = &ctx->use_lists[i];
995 ir_ref *p, n = use_list->count;
996
997 /* Collect all blocks where 'v' is used into a 'block_queue' */
998 for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
999 ir_ref use = *p;
1000 ir_insn *insn = &ctx->ir_base[use];
1001
1002 if (UNEXPECTED(insn->op == IR_PHI)) {
1003 ir_ref n = insn->inputs_count - 1;
1004 ir_ref *p = insn->ops + 2; /* PHI data inputs */
1005 ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */
1006
1007 for (;n > 0; p++, q++, n--) {
1008 if (*p == i) {
1009 uint32_t pred_block = ctx->cfg_map[*q];
1010
1011 if (ir_live_out_top(ctx, live_outs, live_lists, pred_block) != v) {
1012 ir_live_out_push(ctx, live_outs, live_lists, pred_block, v);
1013 if (pred_block != def_block) {
1014 ir_list_push(&block_queue, pred_block);
1015 }
1016 }
1017 }
1018 }
1019 } else if (ctx->rules && UNEXPECTED(ctx->rules[use] & IR_FUSED)) {
1020 while (1) {
1021 ir_use_list *use_list = &ctx->use_lists[use];
1022 ir_ref *p, n = use_list->count;
1023
1024 for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
1025 ir_ref use = *p;
1026
1027 if (ctx->rules[use] & IR_FUSED) {
1028 ir_list_push(&fuse_queue, use);
1029 } else {
1030 uint32_t use_block = ctx->cfg_map[use];
1031
1032 if (def_block != use_block && ir_live_out_top(ctx, live_outs, live_lists, use_block) != v) {
1033 ir_list_push(&block_queue, use_block);
1034 }
1035 }
1036 }
1037 if (!ir_list_len(&fuse_queue)) {
1038 break;
1039 }
1040 use = ir_list_pop(&fuse_queue);
1041 }
1042 } else {
1043 uint32_t use_block = ctx->cfg_map[use];
1044
1045 /* Check if the virtual register is alive at the start of 'use_block' */
1046 if (def_block != use_block && ir_live_out_top(ctx, live_outs, live_lists, use_block) != v) {
1047 ir_list_push(&block_queue, use_block);
1048 }
1049 }
1050 }
1051
1052 /* UP_AND_MARK: Traverse through predecessor blocks until we reach the block where 'v' is defined*/
1053 while (ir_list_len(&block_queue)) {
1054 uint32_t b = ir_list_pop(&block_queue);
1055 ir_block *bb = &ctx->cfg_blocks[b];
1056 uint32_t *p, n = bb->predecessors_count;
1057
1058 if (bb->flags & IR_BB_ENTRY) {
1059 /* live_in_push(ENTRY, v) */
1060 ir_insn *insn = &ctx->ir_base[bb->start];
1061
1062 IR_ASSERT(insn->op == IR_ENTRY);
1063 IR_ASSERT(insn->op3 >= 0 && insn->op3 < (ir_ref)ctx->entries_count);
1064 if (live_lists->len >= live_lists->a.size) {
1065 ir_array_grow(&live_lists->a, live_lists->a.size + 1024);
1066 }
1067 ir_list_push_unchecked(live_lists, live_outs[ctx->cfg_blocks_count + 1 + insn->op3]);
1068 ir_list_push_unchecked(live_lists, v);
1069 live_outs[ctx->cfg_blocks_count + 1 + insn->op3] = ir_list_len(live_lists) - 1;
1070 continue;
1071 }
1072 for (p = &ctx->cfg_edges[bb->predecessors]; n > 0; p++, n--) {
1073 uint32_t pred_block = *p;
1074
1075 /* Check if 'pred_block' wasn't traversed before */
1076 if (ir_live_out_top(ctx, live_outs, live_lists, pred_block) != v) {
1077 /* Mark a virtual register 'v' alive at the end of 'pred_block' */
1078 ir_live_out_push(ctx, live_outs, live_lists, pred_block, v);
1079 if (pred_block != def_block) {
1080 ir_list_push(&block_queue, pred_block);
1081 }
1082 }
1083 }
1084 }
1085 }
1086 }
1087
1088 ir_list_free(&block_queue);
1089 ir_list_free(&fuse_queue);
1090 }
1091
ir_add_osr_entry_loads(ir_ctx * ctx,ir_block * bb,uint32_t pos,ir_list * live_lists,uint32_t b)1092 static void ir_add_osr_entry_loads(ir_ctx *ctx, ir_block *bb, uint32_t pos, ir_list *live_lists, uint32_t b)
1093 {
1094 bool ok = 1;
1095 int count = 0;
1096 ir_list *list = (ir_list*)ctx->osr_entry_loads;
1097 ir_ref i;
1098
1099 while (pos) {
1100 i = ir_list_at(live_lists, pos);
1101 pos = ir_list_at(live_lists, pos - 1);
1102
1103 /* Skip live references from ENTRY to PARAM. TODO: duplicate PARAM in each ENTRY ??? */
1104 ir_use_pos *use_pos = ctx->live_intervals[i]->use_pos;
1105 ir_ref ref = (use_pos->hint_ref < 0) ? -use_pos->hint_ref : IR_LIVE_POS_TO_REF(use_pos->pos);
1106
1107 if (use_pos->op_num) {
1108 ir_ref *ops = ctx->ir_base[ref].ops;
1109 ref = ops[use_pos->op_num];
1110 }
1111
1112 if (ctx->ir_base[ref].op == IR_PARAM) {
1113 continue;
1114 }
1115 if (ctx->binding) {
1116 ir_ref var = ir_binding_find(ctx, ref);
1117 if (var < 0) {
1118 /* We may load the value at OSR entry-point */
1119 if (!count) {
1120 bb->flags &= ~IR_BB_EMPTY;
1121 bb->flags |= IR_BB_OSR_ENTRY_LOADS;
1122 if (!ctx->osr_entry_loads) {
1123 list = ctx->osr_entry_loads = ir_mem_malloc(sizeof(ir_list));
1124 ir_list_init(list, 16);
1125 }
1126 ir_list_push(list, b);
1127 ir_list_push(list, 0);
1128 }
1129 ir_list_push(list, ref);
1130 count++;
1131 continue;
1132 }
1133 }
1134 fprintf(stderr, "ENTRY %d (block %d start %d) - live var %d\n", ctx->ir_base[bb->start].op2, b, bb->start, ref);
1135 ok = 0;
1136 }
1137
1138 if (!ok) {
1139 IR_ASSERT(0);
1140 }
1141 if (count) {
1142 ir_list_set(list, ir_list_len(ctx->osr_entry_loads) - (count + 1), count);
1143
1144 #if 0
1145 /* ENTRY "clobbers" all registers */
1146 ir_ref ref = ctx->ir_base[bb->start].op1;
1147 ir_add_fixed_live_range(ctx, IR_REG_ALL,
1148 IR_DEF_LIVE_POS_FROM_REF(ref),
1149 IR_SAVE_LIVE_POS_FROM_REF(ref));
1150 #endif
1151 }
1152 }
1153
ir_add_fusion_ranges(ir_ctx * ctx,ir_ref ref,ir_ref input,ir_block * bb,uint32_t * live_in_block,uint32_t b)1154 static void ir_add_fusion_ranges(ir_ctx *ctx, ir_ref ref, ir_ref input, ir_block *bb, uint32_t *live_in_block, uint32_t b)
1155 {
1156 ir_ref stack[4];
1157 int stack_pos = 0;
1158 ir_target_constraints constraints;
1159 ir_insn *insn;
1160 uint32_t j, n, flags, def_flags;
1161 ir_ref *p, child;
1162 uint8_t use_flags;
1163 ir_reg reg;
1164 ir_live_pos pos = IR_START_LIVE_POS_FROM_REF(ref);
1165 ir_live_pos use_pos;
1166 ir_live_interval *ival;
1167
1168 while (1) {
1169 IR_ASSERT(input > 0 && ctx->rules[input] & IR_FUSED);
1170
1171 if (!(ctx->rules[input] & IR_SIMPLE)) {
1172 def_flags = ir_get_target_constraints(ctx, input, &constraints);
1173 n = constraints.tmps_count;
1174 while (n > 0) {
1175 n--;
1176 if (constraints.tmp_regs[n].type) {
1177 ir_add_tmp(ctx, ref, input, constraints.tmp_regs[n].num, constraints.tmp_regs[n]);
1178 } else {
1179 /* CPU specific constraints */
1180 ir_add_fixed_live_range(ctx, constraints.tmp_regs[n].reg,
1181 pos + constraints.tmp_regs[n].start,
1182 pos + constraints.tmp_regs[n].end);
1183 }
1184 }
1185 } else {
1186 def_flags = IR_OP1_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG;
1187 constraints.hints_count = 0;
1188 }
1189
1190 insn = &ctx->ir_base[input];
1191 flags = ir_op_flags[insn->op];
1192 IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(flags));
1193 n = IR_INPUT_EDGES_COUNT(flags);
1194 j = 1;
1195 p = insn->ops + j;
1196 if (flags & IR_OP_FLAG_CONTROL) {
1197 j++;
1198 p++;
1199 }
1200 for (; j <= n; j++, p++) {
1201 IR_ASSERT(IR_OPND_KIND(flags, j) == IR_OPND_DATA);
1202 child = *p;
1203 if (child > 0) {
1204 uint32_t v = ctx->vregs[child];
1205
1206 if (v) {
1207 reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE;
1208 use_pos = pos;
1209 if (EXPECTED(reg == IR_REG_NONE)) {
1210 use_pos += IR_USE_SUB_REF;
1211 }
1212
1213 if (!IS_LIVE_IN_BLOCK(v, b)) {
1214 /* live.add(opd) */
1215 SET_LIVE_IN_BLOCK(v, b);
1216 /* intervals[opd].addRange(b.from, op.id) */
1217 ival = ir_add_live_range(ctx, v,
1218 IR_START_LIVE_POS_FROM_REF(bb->start), use_pos);
1219 } else {
1220 ival = ctx->live_intervals[v];
1221 }
1222 use_flags = IR_FUSED_USE | IR_USE_FLAGS(def_flags, j);
1223 ir_add_use(ctx, ival, j, use_pos, reg, use_flags, -input);
1224 } else if (ctx->rules[child] & IR_FUSED) {
1225 IR_ASSERT(stack_pos < (int)(sizeof(stack)/sizeof(stack_pos)));
1226 stack[stack_pos++] = child;
1227 } else if (ctx->rules[child] == (IR_SKIPPED|IR_RLOAD)) {
1228 ir_set_alocated_reg(ctx, input, j, ctx->ir_base[child].op2);
1229 }
1230 }
1231 }
1232 if (!stack_pos) {
1233 break;
1234 }
1235 input = stack[--stack_pos];
1236 }
1237 }
1238
ir_compute_live_ranges(ir_ctx * ctx)1239 int ir_compute_live_ranges(ir_ctx *ctx)
1240 {
1241 uint32_t b, i, j, k, n, succ;
1242 ir_ref ref;
1243 ir_insn *insn;
1244 ir_block *bb, *succ_bb;
1245 uint32_t *live_outs;
1246 uint32_t *live_in_block;
1247 ir_list live_lists;
1248 ir_live_interval *ival;
1249
1250 if (!(ctx->flags2 & IR_LINEAR) || !ctx->vregs) {
1251 return 0;
1252 }
1253
1254 if (ctx->rules) {
1255 ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count);
1256 memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count);
1257 }
1258
1259 /* Root of the list of IR_VARs */
1260 ctx->vars = IR_UNUSED;
1261
1262 /* Compute Live Ranges */
1263 ctx->flags2 &= ~IR_LR_HAVE_DESSA_MOVES;
1264
1265 /* vregs + tmp + fixed + SRATCH + ALL */
1266 ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 2, sizeof(ir_live_interval*));
1267
1268 if (!ctx->arena) {
1269 ctx->arena = ir_arena_create(16 * 1024);
1270 }
1271
1272 live_outs = ir_mem_calloc(ctx->cfg_blocks_count + 1 + ctx->entries_count, sizeof(uint32_t));
1273 ir_list_init(&live_lists, 1024);
1274 ir_compute_live_sets(ctx, live_outs, &live_lists);
1275 live_in_block = ir_mem_calloc(ctx->vregs_count + 1, sizeof(uint32_t));
1276
1277 /* for each basic block in reverse order */
1278 for (b = ctx->cfg_blocks_count; b > 0; b--) {
1279 bb = &ctx->cfg_blocks[b];
1280 IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
1281
1282 /* For all virtual register alive at the end of the block */
1283 n = live_outs[b];
1284 while (n != 0) {
1285 i = ir_list_at(&live_lists, n);
1286 SET_LIVE_IN_BLOCK(i, b);
1287 ir_add_prev_live_range(ctx, i,
1288 IR_START_LIVE_POS_FROM_REF(bb->start),
1289 IR_END_LIVE_POS_FROM_REF(bb->end));
1290 n = ir_list_at(&live_lists, n - 1);
1291 }
1292
1293 if (bb->successors_count == 1) {
1294 /* for each phi function of the successor */
1295 succ = ctx->cfg_edges[bb->successors];
1296 succ_bb = &ctx->cfg_blocks[succ];
1297 if (succ_bb->flags & IR_BB_HAS_PHI) {
1298 ir_use_list *use_list = &ctx->use_lists[succ_bb->start];
1299 ir_ref n, *p;
1300
1301 k = ir_phi_input_number(ctx, succ_bb, b);
1302 IR_ASSERT(k != 0);
1303 n = use_list->count;
1304 for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
1305 ir_ref use = *p;
1306 insn = &ctx->ir_base[use];
1307 if (insn->op == IR_PHI) {
1308 ir_ref input = ir_insn_op(insn, k);
1309 if (input > 0) {
1310 uint32_t v = ctx->vregs[input];
1311
1312 if (v) {
1313 ival = ctx->live_intervals[v];
1314 ir_add_phi_use(ctx, ival, k, IR_DEF_LIVE_POS_FROM_REF(bb->end), use);
1315 }
1316 }
1317 }
1318 }
1319 }
1320 }
1321
1322 /* for each operation of the block in reverse order */
1323 ref = bb->end;
1324 insn = &ctx->ir_base[ref];
1325 if (insn->op == IR_END || insn->op == IR_LOOP_END) {
1326 ref = ctx->prev_ref[ref];
1327 }
1328 for (; ref > bb->start; ref = ctx->prev_ref[ref]) {
1329 uint32_t def_flags;
1330 uint32_t flags;
1331 ir_ref *p;
1332 ir_target_constraints constraints;
1333 uint32_t v;
1334
1335 if (ctx->rules) {
1336 int n;
1337
1338 if (ctx->rules[ref] & (IR_FUSED|IR_SKIPPED)) {
1339 if (((ctx->rules[ref] & IR_RULE_MASK) == IR_VAR
1340 || (ctx->rules[ref] & IR_RULE_MASK) == IR_ALLOCA)
1341 && ctx->use_lists[ref].count > 0) {
1342 insn = &ctx->ir_base[ref];
1343 if (insn->op != IR_VADDR) {
1344 insn->op3 = ctx->vars;
1345 ctx->vars = ref;
1346 }
1347 }
1348 continue;
1349 }
1350
1351 def_flags = ir_get_target_constraints(ctx, ref, &constraints);
1352 n = constraints.tmps_count;
1353 while (n > 0) {
1354 n--;
1355 if (constraints.tmp_regs[n].type) {
1356 ir_add_tmp(ctx, ref, ref, constraints.tmp_regs[n].num, constraints.tmp_regs[n]);
1357 } else {
1358 /* CPU specific constraints */
1359 ir_add_fixed_live_range(ctx, constraints.tmp_regs[n].reg,
1360 IR_START_LIVE_POS_FROM_REF(ref) + constraints.tmp_regs[n].start,
1361 IR_START_LIVE_POS_FROM_REF(ref) + constraints.tmp_regs[n].end);
1362 }
1363 }
1364 } else {
1365 def_flags = 0;
1366 constraints.def_reg = IR_REG_NONE;
1367 constraints.hints_count = 0;
1368 }
1369
1370 insn = &ctx->ir_base[ref];
1371 v = ctx->vregs[ref];
1372 if (v) {
1373 if (insn->op != IR_PHI) {
1374 ir_live_pos def_pos;
1375 ir_ref hint_ref = 0;
1376 ir_reg reg = constraints.def_reg;
1377
1378 if (reg != IR_REG_NONE) {
1379 def_pos = IR_SAVE_LIVE_POS_FROM_REF(ref);
1380 if (insn->op == IR_PARAM || insn->op == IR_RLOAD) {
1381 /* parameter register must be kept before it's copied */
1382 ir_add_fixed_live_range(ctx, reg, IR_START_LIVE_POS_FROM_REF(bb->start), def_pos);
1383 }
1384 } else if (def_flags & IR_DEF_REUSES_OP1_REG) {
1385 if (!IR_IS_CONST_REF(insn->op1) && ctx->vregs[insn->op1]) {
1386 hint_ref = insn->op1;
1387 }
1388 if (def_flags & IR_DEF_CONFLICTS_WITH_INPUT_REGS) {
1389 def_pos = IR_USE_LIVE_POS_FROM_REF(ref);
1390 } else {
1391 def_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
1392 }
1393 } else if (def_flags & IR_DEF_CONFLICTS_WITH_INPUT_REGS) {
1394 def_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
1395 } else {
1396 if (insn->op == IR_PARAM) {
1397 /* We may reuse parameter stack slot for spilling */
1398 ctx->live_intervals[v]->flags |= IR_LIVE_INTERVAL_MEM_PARAM;
1399 } else if (insn->op == IR_VLOAD) {
1400 /* Load may be fused into the usage instruction */
1401 ctx->live_intervals[v]->flags |= IR_LIVE_INTERVAL_MEM_LOAD;
1402 }
1403 def_pos = IR_DEF_LIVE_POS_FROM_REF(ref);
1404 }
1405 /* intervals[opd].setFrom(op.id) */
1406 ival = ir_fix_live_range(ctx, v,
1407 IR_START_LIVE_POS_FROM_REF(bb->start), def_pos);
1408 ival->type = insn->type;
1409 ir_add_use(ctx, ival, 0, def_pos, reg, def_flags, hint_ref);
1410 } else {
1411 /* PHIs inputs must not be processed */
1412 ival = ctx->live_intervals[v];
1413 if (UNEXPECTED(!ival)) {
1414 /* Dead PHI */
1415 ival = ir_add_live_range(ctx, v, IR_DEF_LIVE_POS_FROM_REF(ref), IR_USE_LIVE_POS_FROM_REF(ref));
1416 }
1417 ival->type = insn->type;
1418 ir_add_use(ctx, ival, 0, IR_DEF_LIVE_POS_FROM_REF(ref), IR_REG_NONE, IR_USE_SHOULD_BE_IN_REG, 0);
1419 continue;
1420 }
1421 }
1422
1423 IR_ASSERT(insn->op != IR_PHI && (!ctx->rules || !(ctx->rules[ref] & (IR_FUSED|IR_SKIPPED))));
1424 flags = ir_op_flags[insn->op];
1425 j = 1;
1426 p = insn->ops + 1;
1427 if (flags & (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_PINNED)) {
1428 j++;
1429 p++;
1430 }
1431 for (; j <= insn->inputs_count; j++, p++) {
1432 ir_ref input = *p;
1433 ir_reg reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE;
1434 ir_live_pos use_pos;
1435 ir_ref hint_ref = 0;
1436 uint32_t v;
1437
1438 if (input > 0) {
1439 v = ctx->vregs[input];
1440 if (v) {
1441 use_pos = IR_USE_LIVE_POS_FROM_REF(ref);
1442 if (reg != IR_REG_NONE) {
1443 use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
1444 ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF);
1445 } else if (def_flags & IR_DEF_REUSES_OP1_REG) {
1446 if (j == 1) {
1447 if (def_flags & IR_DEF_CONFLICTS_WITH_INPUT_REGS) {
1448 use_pos = IR_USE_LIVE_POS_FROM_REF(ref);
1449 } else {
1450 use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
1451 }
1452 IR_ASSERT(ctx->vregs[ref]);
1453 hint_ref = ref;
1454 } else if (input == insn->op1) {
1455 /* Input is the same as "op1" */
1456 use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
1457 }
1458 }
1459 if (!IS_LIVE_IN_BLOCK(v, b)) {
1460 /* live.add(opd) */
1461 SET_LIVE_IN_BLOCK(v, b);
1462 /* intervals[opd].addRange(b.from, op.id) */
1463 ival = ir_add_live_range(ctx, v, IR_START_LIVE_POS_FROM_REF(bb->start), use_pos);
1464 } else {
1465 ival = ctx->live_intervals[v];
1466 }
1467 ir_add_use(ctx, ival, j, use_pos, reg, IR_USE_FLAGS(def_flags, j), hint_ref);
1468 } else if (ctx->rules) {
1469 if (ctx->rules[input] & IR_FUSED) {
1470 ir_add_fusion_ranges(ctx, ref, input, bb, live_in_block, b);
1471 } else {
1472 if (ctx->rules[input] == (IR_SKIPPED|IR_RLOAD)) {
1473 ir_set_alocated_reg(ctx, ref, j, ctx->ir_base[input].op2);
1474 }
1475 if (reg != IR_REG_NONE) {
1476 use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
1477 ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF);
1478 }
1479 }
1480 }
1481 } else if (reg != IR_REG_NONE) {
1482 use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
1483 ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF);
1484 }
1485 }
1486 }
1487 }
1488
1489 if (ctx->entries) {
1490 for (i = 0; i < ctx->entries_count; i++) {
1491 b = ctx->entries[i];
1492 bb = &ctx->cfg_blocks[b];
1493 IR_ASSERT(bb->predecessors_count == 1);
1494 ir_add_osr_entry_loads(ctx, bb, live_outs[ctx->cfg_blocks_count + 1 + i], &live_lists, b);
1495 }
1496 if (ctx->osr_entry_loads) {
1497 ir_list_push((ir_list*)ctx->osr_entry_loads, 0);
1498 }
1499 }
1500
1501 ir_list_free(&live_lists);
1502 ir_mem_free(live_outs);
1503 ir_mem_free(live_in_block);
1504
1505 return 1;
1506 }
1507
1508 #endif
1509
1510 /* Live Ranges coalescing */
1511
ir_ivals_overlap(ir_live_range * lrg1,ir_live_range * lrg2)1512 static ir_live_pos ir_ivals_overlap(ir_live_range *lrg1, ir_live_range *lrg2)
1513 {
1514 while (1) {
1515 if (lrg2->start < lrg1->end) {
1516 if (lrg1->start < lrg2->end) {
1517 return IR_MAX(lrg1->start, lrg2->start);
1518 } else {
1519 lrg2 = lrg2->next;
1520 if (!lrg2) {
1521 return 0;
1522 }
1523 }
1524 } else {
1525 lrg1 = lrg1->next;
1526 if (!lrg1) {
1527 return 0;
1528 }
1529 }
1530 }
1531 }
1532
ir_vregs_overlap(ir_ctx * ctx,uint32_t r1,uint32_t r2)1533 static ir_live_pos ir_vregs_overlap(ir_ctx *ctx, uint32_t r1, uint32_t r2)
1534 {
1535 ir_live_interval *ival1 = ctx->live_intervals[r1];
1536 ir_live_interval *ival2 = ctx->live_intervals[r2];
1537
1538 #if 0
1539 if (ival2->range.start >= ival1->end
1540 || ival1->range.start >= ival2->end) {
1541 return 0;
1542 }
1543 #endif
1544 return ir_ivals_overlap(&ival1->range, &ival2->range);
1545 }
1546
ir_ivals_inside(ir_live_range * parent,ir_live_range * child)1547 static bool ir_ivals_inside(ir_live_range *parent, ir_live_range *child)
1548 {
1549 do {
1550 while (parent && parent->end < child->start) {
1551 parent = parent->next;
1552 }
1553 if (!parent || parent->start > child->start || parent->end < child->end) {
1554 return 0;
1555 }
1556 child = child->next;
1557 } while (child);
1558 return 1;
1559 }
1560
ir_vregs_inside(ir_ctx * ctx,uint32_t parent,uint32_t child)1561 static bool ir_vregs_inside(ir_ctx *ctx, uint32_t parent, uint32_t child)
1562 {
1563 ir_live_interval *child_ival = ctx->live_intervals[child];
1564 ir_live_interval *parent_ival = ctx->live_intervals[parent];
1565
1566 if ((child_ival->flags | parent_ival->flags) & IR_LIVE_INTERVAL_COALESCED) {
1567 // TODO: Support valid cases with already coalesced "parent_ival
1568 return 0;
1569 }
1570 #if 0
1571 if (child_ival->end >= parent_ival->end) {
1572 return 0;
1573 }
1574 #endif
1575 return ir_ivals_inside(&parent_ival->range, &child_ival->range);
1576 }
1577
ir_vregs_join(ir_ctx * ctx,uint32_t r1,uint32_t r2)1578 static void ir_vregs_join(ir_ctx *ctx, uint32_t r1, uint32_t r2)
1579 {
1580 ir_live_interval *ival = ctx->live_intervals[r2];
1581 ir_live_range *live_range = &ival->range;
1582 ir_live_range *next;
1583 ir_use_pos *use_pos, *next_pos, **prev;
1584
1585 #if 0
1586 fprintf(stderr, "COALESCE %d -> %d\n", r2, r1);
1587 #endif
1588
1589 ir_add_live_range(ctx, r1, live_range->start, live_range->end);
1590 live_range = live_range->next;
1591 while (live_range) {
1592 next = live_range->next;
1593 live_range->next = ctx->unused_ranges;
1594 ctx->unused_ranges = live_range;
1595 ir_add_live_range(ctx, r1, live_range->start, live_range->end);
1596 live_range = next;
1597 }
1598
1599 /* merge sorted use_pos lists */
1600 prev = &ctx->live_intervals[r1]->use_pos;
1601 use_pos = ival->use_pos;
1602 while (use_pos) {
1603 if (use_pos->hint_ref > 0 && ctx->vregs[use_pos->hint_ref] == r1) {
1604 use_pos->hint_ref = 0;
1605 }
1606 while (*prev && ((*prev)->pos < use_pos->pos ||
1607 ((*prev)->pos == use_pos->pos &&
1608 (use_pos->op_num == 0 || (*prev)->op_num < use_pos->op_num)))) {
1609 if ((*prev)->hint_ref > 0 && ctx->vregs[(*prev)->hint_ref] == r2) {
1610 (*prev)->hint_ref = 0;
1611 }
1612 prev = &(*prev)->next;
1613 }
1614 next_pos = use_pos->next;
1615 use_pos->next = *prev;
1616 *prev = use_pos;
1617 prev = &use_pos->next;
1618 use_pos = next_pos;
1619 }
1620 use_pos = *prev;
1621 while (use_pos) {
1622 if (use_pos->hint_ref > 0 && ctx->vregs[use_pos->hint_ref] == r2) {
1623 use_pos->hint_ref = 0;
1624 }
1625 use_pos = use_pos->next;
1626 }
1627
1628 ctx->live_intervals[r1]->flags |=
1629 IR_LIVE_INTERVAL_COALESCED | (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS));
1630 if (ctx->ir_base[IR_LIVE_POS_TO_REF(ctx->live_intervals[r1]->use_pos->pos)].op != IR_VLOAD) {
1631 ctx->live_intervals[r1]->flags &= ~IR_LIVE_INTERVAL_MEM_LOAD;
1632 }
1633 ctx->live_intervals[r2] = NULL;
1634
1635 // TODO: remember to reuse ???
1636 //ir_mem_free(ival);
1637 }
1638
ir_vregs_coalesce(ir_ctx * ctx,uint32_t v1,uint32_t v2,ir_ref from,ir_ref to)1639 static void ir_vregs_coalesce(ir_ctx *ctx, uint32_t v1, uint32_t v2, ir_ref from, ir_ref to)
1640 {
1641 ir_ref i;
1642 uint16_t f1 = ctx->live_intervals[v1]->flags;
1643 uint16_t f2 = ctx->live_intervals[v2]->flags;
1644
1645 #if 0
1646 if (ctx->binding) {
1647 ir_ref b1 = ir_binding_find(ctx, from);
1648 ir_ref b2 = ir_binding_find(ctx, to);
1649 IR_ASSERT(b1 == b2);
1650 }
1651 #endif
1652 if ((f1 & IR_LIVE_INTERVAL_COALESCED) && !(f2 & IR_LIVE_INTERVAL_COALESCED)) {
1653 ir_vregs_join(ctx, v1, v2);
1654 ctx->vregs[to] = v1;
1655 } else if ((f2 & IR_LIVE_INTERVAL_COALESCED) && !(f1 & IR_LIVE_INTERVAL_COALESCED)) {
1656 ir_vregs_join(ctx, v2, v1);
1657 ctx->vregs[from] = v2;
1658 } else if (from < to) {
1659 ir_vregs_join(ctx, v1, v2);
1660 if (f2 & IR_LIVE_INTERVAL_COALESCED) {
1661 for (i = 1; i < ctx->insns_count; i++) {
1662 if (ctx->vregs[i] == v2) {
1663 ctx->vregs[i] = v1;
1664 }
1665 }
1666 } else {
1667 ctx->vregs[to] = v1;
1668 }
1669 } else {
1670 ir_vregs_join(ctx, v2, v1);
1671 if (f1 & IR_LIVE_INTERVAL_COALESCED) {
1672 for (i = 1; i < ctx->insns_count; i++) {
1673 if (ctx->vregs[i] == v1) {
1674 ctx->vregs[i] = v2;
1675 }
1676 }
1677 } else {
1678 ctx->vregs[from] = v2;
1679 }
1680 }
1681 }
1682
ir_add_phi_move(ir_ctx * ctx,uint32_t b,ir_ref from,ir_ref to)1683 static void ir_add_phi_move(ir_ctx *ctx, uint32_t b, ir_ref from, ir_ref to)
1684 {
1685 if (IR_IS_CONST_REF(from) || ctx->vregs[from] != ctx->vregs[to]) {
1686 ctx->cfg_blocks[b].flags &= ~IR_BB_EMPTY;
1687 ctx->cfg_blocks[b].flags |= IR_BB_DESSA_MOVES;
1688 ctx->flags2 |= IR_LR_HAVE_DESSA_MOVES;
1689 #if 0
1690 fprintf(stderr, "BB%d: MOV %d -> %d\n", b, from, to);
1691 #endif
1692 }
1693 }
1694
1695 typedef struct _ir_coalesce_block {
1696 uint32_t b;
1697 uint32_t loop_depth;
1698 } ir_coalesce_block;
1699
ir_block_cmp(const void * b1,const void * b2)1700 static int ir_block_cmp(const void *b1, const void *b2)
1701 {
1702 ir_coalesce_block *d1 = (ir_coalesce_block*)b1;
1703 ir_coalesce_block *d2 = (ir_coalesce_block*)b2;
1704
1705 if (d1->loop_depth > d2->loop_depth) {
1706 return -1;
1707 } else if (d1->loop_depth == d2->loop_depth) {
1708 if (d1->b < d2->b) {
1709 return -1;
1710 } else {
1711 return 1;
1712 }
1713 } else {
1714 return 1;
1715 }
1716 }
1717
ir_swap_operands(ir_ctx * ctx,ir_ref i,ir_insn * insn)1718 static void ir_swap_operands(ir_ctx *ctx, ir_ref i, ir_insn *insn)
1719 {
1720 ir_live_pos pos = IR_USE_LIVE_POS_FROM_REF(i);
1721 ir_live_pos load_pos = IR_LOAD_LIVE_POS_FROM_REF(i);
1722 ir_live_interval *ival;
1723 ir_live_range *r;
1724 ir_use_pos *p, *p1 = NULL, *p2 = NULL;
1725 ir_ref tmp;
1726
1727 tmp = insn->op1;
1728 insn->op1 = insn->op2;
1729 insn->op2 = tmp;
1730
1731 ival = ctx->live_intervals[ctx->vregs[insn->op1]];
1732 p = ival->use_pos;
1733 while (p) {
1734 if (p->pos == pos) {
1735 p->pos = load_pos;
1736 p->op_num = 1;
1737 p1 = p;
1738 break;
1739 }
1740 p = p->next;
1741 }
1742
1743 ival = ctx->live_intervals[ctx->vregs[i]];
1744 p = ival->use_pos;
1745 while (p) {
1746 if (p->pos == load_pos) {
1747 p->hint_ref = insn->op1;
1748 break;
1749 }
1750 p = p->next;
1751 }
1752
1753 if (insn->op2 > 0 && ctx->vregs[insn->op2]) {
1754 ival = ctx->live_intervals[ctx->vregs[insn->op2]];
1755 r = &ival->range;
1756 while (r) {
1757 if (r->end == load_pos) {
1758 r->end = pos;
1759 if (!r->next) {
1760 ival->end = pos;
1761 }
1762 break;
1763 }
1764 r = r->next;
1765 }
1766 p = ival->use_pos;
1767 while (p) {
1768 if (p->pos == load_pos) {
1769 p->pos = pos;
1770 p->op_num = 2;
1771 p2 = p;
1772 break;
1773 }
1774 p = p->next;
1775 }
1776 }
1777 if (p1 && p2) {
1778 uint8_t tmp = p1->flags;
1779 p1->flags = p2->flags;
1780 p2->flags = tmp;
1781 }
1782 }
1783
ir_hint_conflict(ir_ctx * ctx,ir_ref ref,int use,int def)1784 static int ir_hint_conflict(ir_ctx *ctx, ir_ref ref, int use, int def)
1785 {
1786 ir_use_pos *p;
1787 ir_reg r1 = IR_REG_NONE;
1788 ir_reg r2 = IR_REG_NONE;
1789
1790 p = ctx->live_intervals[use]->use_pos;
1791 while (p) {
1792 if (IR_LIVE_POS_TO_REF(p->pos) == ref) {
1793 break;
1794 }
1795 if (p->hint != IR_REG_NONE) {
1796 r1 = p->hint;
1797 }
1798 p = p->next;
1799 }
1800
1801 p = ctx->live_intervals[def]->use_pos;
1802 while (p) {
1803 if (IR_LIVE_POS_TO_REF(p->pos) > ref) {
1804 if (p->hint != IR_REG_NONE) {
1805 r2 = p->hint;
1806 break;
1807 }
1808 }
1809 p = p->next;
1810 }
1811 return r1 != r2 && r1 != IR_REG_NONE && r2 != IR_REG_NONE;
1812 }
1813
ir_try_swap_operands(ir_ctx * ctx,ir_ref i,ir_insn * insn)1814 static int ir_try_swap_operands(ir_ctx *ctx, ir_ref i, ir_insn *insn)
1815 {
1816 if (ctx->vregs[insn->op1]
1817 && ctx->vregs[insn->op1] != ctx->vregs[i]
1818 && !ir_vregs_overlap(ctx, ctx->vregs[insn->op1], ctx->vregs[i])
1819 && !ir_hint_conflict(ctx, i, ctx->vregs[insn->op1], ctx->vregs[i])) {
1820 /* pass */
1821 } else {
1822 if (ctx->vregs[insn->op2] && ctx->vregs[insn->op2] != ctx->vregs[i]) {
1823 ir_live_pos pos = IR_USE_LIVE_POS_FROM_REF(i);
1824 ir_live_pos load_pos = IR_LOAD_LIVE_POS_FROM_REF(i);
1825 ir_live_interval *ival = ctx->live_intervals[ctx->vregs[insn->op2]];
1826 ir_live_range *r = &ival->range;
1827
1828 if ((ival->flags & IR_LIVE_INTERVAL_MEM_PARAM) && ctx->use_lists[insn->op2].count == 1) {
1829 return 0;
1830 }
1831 while (r) {
1832 if (r->end == pos) {
1833 r->end = load_pos;
1834 if (!r->next) {
1835 ival->end = load_pos;
1836 }
1837 if (!ir_vregs_overlap(ctx, ctx->vregs[insn->op2], ctx->vregs[i])
1838 && !ir_hint_conflict(ctx, i, ctx->vregs[insn->op2], ctx->vregs[i])) {
1839 ir_swap_operands(ctx, i, insn);
1840 return 1;
1841 } else {
1842 r->end = pos;
1843 if (!r->next) {
1844 ival->end = pos;
1845 }
1846 }
1847 break;
1848 }
1849 r = r->next;
1850 }
1851 }
1852 }
1853 return 0;
1854 }
1855
ir_coalesce(ir_ctx * ctx)1856 int ir_coalesce(ir_ctx *ctx)
1857 {
1858 uint32_t b, n, succ, pred_b, count = 0;
1859 ir_ref *p, use, input, k, j;
1860 ir_block *bb, *succ_bb;
1861 ir_use_list *use_list;
1862 ir_insn *insn;
1863 ir_bitset visited;
1864 ir_coalesce_block *list;
1865 bool compact = 0;
1866
1867 /* Collect a list of blocks which are predecossors to block with phi functions */
1868 list = ir_mem_malloc(sizeof(ir_coalesce_block) * ctx->cfg_blocks_count);
1869 visited = ir_bitset_malloc(ctx->cfg_blocks_count + 1);
1870 for (b = 1, bb = &ctx->cfg_blocks[1]; b <= ctx->cfg_blocks_count; b++, bb++) {
1871 IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
1872 if (bb->flags & IR_BB_HAS_PHI) {
1873 k = bb->predecessors_count;
1874 if (k > 1) {
1875 use_list = &ctx->use_lists[bb->start];
1876 n = use_list->count;
1877 IR_ASSERT(k == ctx->ir_base[bb->start].inputs_count);
1878 for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
1879 use = *p;
1880 insn = &ctx->ir_base[use];
1881 if (insn->op == IR_PHI) {
1882 do {
1883 k--;
1884 pred_b = ctx->cfg_edges[bb->predecessors + k];
1885 if (!ir_bitset_in(visited, pred_b)) {
1886 ir_bitset_incl(visited, pred_b);
1887 list[count].b = pred_b;
1888 list[count].loop_depth = ctx->cfg_blocks[pred_b].loop_depth;
1889 count++;
1890 }
1891 } while (k > 0);
1892 break;
1893 }
1894 }
1895 }
1896 }
1897 }
1898 ir_mem_free(visited);
1899
1900 /* Sort blocks according to their loop depth */
1901 qsort(list, count, sizeof(ir_coalesce_block), ir_block_cmp);
1902
1903 while (count > 0) {
1904 uint32_t i;
1905
1906 count--;
1907 b = list[count].b;
1908 bb = &ctx->cfg_blocks[b];
1909 IR_ASSERT(bb->successors_count == 1);
1910 succ = ctx->cfg_edges[bb->successors];
1911 succ_bb = &ctx->cfg_blocks[succ];
1912 IR_ASSERT(succ_bb->predecessors_count > 1);
1913 k = ir_phi_input_number(ctx, succ_bb, b);
1914 use_list = &ctx->use_lists[succ_bb->start];
1915 n = use_list->count;
1916 for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) {
1917 use = *p;
1918 insn = &ctx->ir_base[use];
1919 if (insn->op == IR_PHI) {
1920 input = ir_insn_op(insn, k);
1921 if (input > 0 && ctx->vregs[input]) {
1922 uint32_t v1 = ctx->vregs[input];
1923 uint32_t v2 = ctx->vregs[use];
1924
1925 if (v1 == v2) {
1926 /* already coalesced */
1927 } else {
1928 if (!ir_vregs_overlap(ctx, v1, v2)) {
1929 ir_vregs_coalesce(ctx, v1, v2, input, use);
1930 compact = 1;
1931 } else {
1932 #if 1
1933 if (ctx->rules && (ctx->rules[input] & IR_MAY_SWAP)) {
1934 ir_insn *input_insn = &ctx->ir_base[input];
1935
1936 IR_ASSERT(ir_op_flags[input_insn->op] & IR_OP_FLAG_COMMUTATIVE);
1937 if (input_insn->op2 == use
1938 && input_insn->op1 != use
1939 && (ctx->live_intervals[v1]->use_pos->flags & IR_DEF_REUSES_OP1_REG)) {
1940 ir_live_range *r = &ctx->live_intervals[v2]->range;
1941
1942 do {
1943 if (r->end == IR_USE_LIVE_POS_FROM_REF(input)) {
1944 break;
1945 }
1946 r = r->next;
1947 } while (r);
1948 if (r) {
1949 r->end = IR_LOAD_LIVE_POS_FROM_REF(input);
1950 if (!r->next) {
1951 ctx->live_intervals[v2]->end = IR_LOAD_LIVE_POS_FROM_REF(input);
1952 }
1953 if (ir_vregs_overlap(ctx, v1, v2)) {
1954 r->end = IR_USE_LIVE_POS_FROM_REF(input);
1955 if (!r->next) {
1956 ctx->live_intervals[v2]->end = IR_USE_LIVE_POS_FROM_REF(input);
1957 }
1958 } else {
1959 ir_swap_operands(ctx, input, input_insn);
1960 IR_ASSERT(!ir_vregs_overlap(ctx, v1, v2));
1961 ir_vregs_coalesce(ctx, v1, v2, input, use);
1962 compact = 1;
1963 continue;
1964 }
1965 }
1966 }
1967 }
1968 #endif
1969 ir_add_phi_move(ctx, b, input, use);
1970 }
1971 }
1972 } else {
1973 /* Move for constant input */
1974 ir_add_phi_move(ctx, b, input, use);
1975 }
1976 }
1977 }
1978 }
1979 ir_mem_free(list);
1980
1981 ir_hint_propagation(ctx);
1982
1983 if (ctx->rules) {
1984 /* try to swap operands of commutative instructions for better register allocation */
1985 uint32_t *rule = ctx->rules + 1;
1986 ir_ref i;
1987
1988 for (i = 1; i < ctx->insns_count; rule++, i++) {
1989 if ((*rule) & (IR_MAY_SWAP|IR_MAY_REUSE)) {
1990 insn = &ctx->ir_base[i];
1991 IR_ASSERT(ctx->vregs[i]);
1992 if ((*rule) & IR_MAY_SWAP) {
1993 IR_ASSERT(ir_op_flags[insn->op] & IR_OP_FLAG_COMMUTATIVE);
1994 if (ctx->live_intervals[ctx->vregs[i]]->use_pos
1995 && (ctx->live_intervals[ctx->vregs[i]]->use_pos->flags & IR_DEF_REUSES_OP1_REG)
1996 && insn->op2 > 0
1997 && insn->op1 > 0
1998 && insn->op1 != insn->op2) {
1999 ir_try_swap_operands(ctx, i, insn);
2000 }
2001 } else {
2002 IR_ASSERT((*rule) & IR_MAY_REUSE);
2003 if (insn->op1 > 0
2004 && ctx->vregs[insn->op1]
2005 && ctx->vregs[i] != ctx->vregs[insn->op1]) {
2006 if (ir_vregs_inside(ctx, ctx->vregs[insn->op1], ctx->vregs[i])) {
2007 if (ctx->binding) {
2008 ir_ref b1 = ir_binding_find(ctx, i);
2009 ir_ref b2 = ir_binding_find(ctx, insn->op1);
2010 if (b1 && b1 != b2) {
2011 continue;
2012 }
2013 }
2014 ir_vregs_coalesce(ctx, ctx->vregs[i], ctx->vregs[insn->op1], i, insn->op1);
2015 compact = 1;
2016 }
2017 }
2018 }
2019 }
2020 }
2021 }
2022
2023 if (compact) {
2024 ir_ref i, n;
2025 uint32_t *xlat = ir_mem_malloc((ctx->vregs_count + 1) * sizeof(uint32_t));
2026
2027 for (i = 1, n = 1; i <= ctx->vregs_count; i++) {
2028 if (ctx->live_intervals[i]) {
2029 xlat[i] = n;
2030 if (i != n) {
2031 ctx->live_intervals[n] = ctx->live_intervals[i];
2032 ctx->live_intervals[n]->vreg = n;
2033 }
2034 n++;
2035 }
2036 }
2037 n--;
2038 if (n != ctx->vregs_count) {
2039 j = ctx->vregs_count - n;
2040 /* vregs + tmp + fixed + SRATCH + ALL */
2041 for (i = n + 1; i <= n + IR_REG_NUM + 2; i++) {
2042 ctx->live_intervals[i] = ctx->live_intervals[i + j];
2043 if (ctx->live_intervals[i]) {
2044 ctx->live_intervals[i]->vreg = i;
2045 }
2046 }
2047 for (j = 1; j < ctx->insns_count; j++) {
2048 if (ctx->vregs[j]) {
2049 ctx->vregs[j] = xlat[ctx->vregs[j]];
2050 }
2051 }
2052 ctx->vregs_count = n;
2053 }
2054 ir_mem_free(xlat);
2055 }
2056
2057 return 1;
2058 }
2059
2060 /* SSA Deconstruction */
2061
ir_compute_dessa_moves(ir_ctx * ctx)2062 int ir_compute_dessa_moves(ir_ctx *ctx)
2063 {
2064 uint32_t b, i, n;
2065 ir_ref j, k, *p, use;
2066 ir_block *bb;
2067 ir_use_list *use_list;
2068 ir_insn *insn;
2069
2070 for (b = 1, bb = &ctx->cfg_blocks[1]; b <= ctx->cfg_blocks_count; b++, bb++) {
2071 IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
2072 k = bb->predecessors_count;
2073 if (k > 1) {
2074 use_list = &ctx->use_lists[bb->start];
2075 n = use_list->count;
2076 if (n > 1) {
2077 IR_ASSERT(k == ctx->ir_base[bb->start].inputs_count);
2078 k++;
2079 for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) {
2080 use = *p;
2081 insn = &ctx->ir_base[use];
2082 if (insn->op == IR_PHI) {
2083 for (j = 2; j <= k; j++) {
2084 if (IR_IS_CONST_REF(ir_insn_op(insn, j)) || ctx->vregs[ir_insn_op(insn, j)] != ctx->vregs[use]) {
2085 int pred = ctx->cfg_edges[bb->predecessors + (j-2)];
2086 ctx->cfg_blocks[pred].flags &= ~IR_BB_EMPTY;
2087 ctx->cfg_blocks[pred].flags |= IR_BB_DESSA_MOVES;
2088 ctx->flags2 |= IR_LR_HAVE_DESSA_MOVES;
2089 }
2090 }
2091 }
2092 }
2093 }
2094 }
2095 }
2096 return 1;
2097 }
2098
2099 /*
2100 * Parallel copy sequentialization algorithm
2101 *
2102 * The implementation is based on algorithm 1 desriebed in
2103 * "Revisiting Out-of-SSA Translation for Correctness, Code Quality and Efficiency",
2104 * Benoit Boissinot, Alain Darte, Fabrice Rastello, Benoit Dupont de Dinechin, Christophe Guillon.
2105 * 2009 International Symposium on Code Generation and Optimization, Seattle, WA, USA, 2009,
2106 * pp. 114-125, doi: 10.1109/CGO.2009.19.
2107 */
ir_gen_dessa_moves(ir_ctx * ctx,uint32_t b,emit_copy_t emit_copy)2108 int ir_gen_dessa_moves(ir_ctx *ctx, uint32_t b, emit_copy_t emit_copy)
2109 {
2110 uint32_t succ, k, n = 0;
2111 ir_block *bb, *succ_bb;
2112 ir_use_list *use_list;
2113 ir_ref *loc, *pred, *src, *dst, i, *p, ref, input;
2114 ir_ref s, d;
2115 ir_insn *insn;
2116 uint32_t len;
2117 ir_bitset todo, ready;
2118 bool have_constants_or_addresses = 0;
2119
2120 bb = &ctx->cfg_blocks[b];
2121 if (!(bb->flags & IR_BB_DESSA_MOVES)) {
2122 return 0;
2123 }
2124 IR_ASSERT(bb->successors_count == 1);
2125 succ = ctx->cfg_edges[bb->successors];
2126 succ_bb = &ctx->cfg_blocks[succ];
2127 IR_ASSERT(succ_bb->predecessors_count > 1);
2128 use_list = &ctx->use_lists[succ_bb->start];
2129
2130 k = ir_phi_input_number(ctx, succ_bb, b);
2131
2132 loc = ir_mem_malloc((ctx->vregs_count + 1) * 4 * sizeof(ir_ref));
2133 pred = loc + ctx->vregs_count + 1;
2134 src = pred + ctx->vregs_count + 1;
2135 dst = src + ctx->vregs_count + 1;
2136 len = ir_bitset_len(ctx->vregs_count + 1);
2137 todo = ir_bitset_malloc(ctx->vregs_count + 1);
2138
2139 for (i = 0, p = &ctx->use_edges[use_list->refs]; i < use_list->count; i++, p++) {
2140 ref = *p;
2141 insn = &ctx->ir_base[ref];
2142 if (insn->op == IR_PHI) {
2143 input = ir_insn_op(insn, k);
2144 if (IR_IS_CONST_REF(input) || !ctx->vregs[input]) {
2145 have_constants_or_addresses = 1;
2146 } else if (ctx->vregs[input] != ctx->vregs[ref]) {
2147 s = ctx->vregs[input];
2148 d = ctx->vregs[ref];
2149 src[s] = input;
2150 dst[d] = ref;
2151 loc[d] = pred[s] = 0;
2152 ir_bitset_incl(todo, d);
2153 n++;
2154 }
2155 }
2156 }
2157
2158 if (n > 0) {
2159 src[0] = dst[0] = 0;
2160 ready = ir_bitset_malloc(ctx->vregs_count + 1);
2161 IR_BITSET_FOREACH(todo, len, d) {
2162 ref = dst[d];
2163 insn = &ctx->ir_base[ref];
2164 IR_ASSERT(insn->op == IR_PHI);
2165 input = ir_insn_op(insn, k);
2166 s = ctx->vregs[input];
2167 loc[s] = s;
2168 pred[d] = s;
2169 } IR_BITSET_FOREACH_END();
2170
2171 IR_BITSET_FOREACH(todo, len, i) {
2172 if (!loc[i]) {
2173 ir_bitset_incl(ready, i);
2174 }
2175 } IR_BITSET_FOREACH_END();
2176
2177 while (1) {
2178 ir_ref a, b, c;
2179
2180 while ((b = ir_bitset_pop_first(ready, len)) >= 0) {
2181 a = pred[b];
2182 c = loc[a];
2183 emit_copy(ctx, ctx->ir_base[dst[b]].type, src[c], dst[b]);
2184 ir_bitset_excl(todo, b);
2185 loc[a] = b;
2186 src[b] = dst[b];
2187 if (a == c && pred[a]) {
2188 ir_bitset_incl(ready, a);
2189 }
2190 }
2191 b = ir_bitset_pop_first(todo, len);
2192 if (b < 0) {
2193 break;
2194 }
2195 IR_ASSERT(b != loc[pred[b]]);
2196 emit_copy(ctx, ctx->ir_base[src[b]].type, src[b], 0);
2197 loc[b] = 0;
2198 ir_bitset_incl(ready, b);
2199 }
2200
2201 ir_mem_free(ready);
2202 }
2203
2204 ir_mem_free(todo);
2205 ir_mem_free(loc);
2206
2207 if (have_constants_or_addresses) {
2208 for (i = 0, p = &ctx->use_edges[use_list->refs]; i < use_list->count; i++, p++) {
2209 ref = *p;
2210 insn = &ctx->ir_base[ref];
2211 if (insn->op == IR_PHI) {
2212 input = ir_insn_op(insn, k);
2213 if (IR_IS_CONST_REF(input) || !ctx->vregs[input]) {
2214 emit_copy(ctx, insn->type, input, ref);
2215 }
2216 }
2217 }
2218 }
2219
2220 return 1;
2221 }
2222
2223 /* Linear Scan Register Allocation */
2224
2225 #ifdef IR_DEBUG
2226 # define IR_LOG_LSRA(action, ival, comment) do { \
2227 if (ctx->flags & IR_DEBUG_RA) { \
2228 ir_live_interval *_ival = (ival); \
2229 ir_live_pos _start = _ival->range.start; \
2230 ir_live_pos _end = _ival->end; \
2231 fprintf(stderr, action " R%d [%d.%d...%d.%d)" comment "\n", \
2232 (_ival->flags & IR_LIVE_INTERVAL_TEMP) ? 0 : _ival->vreg, \
2233 IR_LIVE_POS_TO_REF(_start), IR_LIVE_POS_TO_SUB_REF(_start), \
2234 IR_LIVE_POS_TO_REF(_end), IR_LIVE_POS_TO_SUB_REF(_end)); \
2235 } \
2236 } while (0)
2237 # define IR_LOG_LSRA_ASSIGN(action, ival, comment) do { \
2238 if (ctx->flags & IR_DEBUG_RA) { \
2239 ir_live_interval *_ival = (ival); \
2240 ir_live_pos _start = _ival->range.start; \
2241 ir_live_pos _end = _ival->end; \
2242 fprintf(stderr, action " R%d [%d.%d...%d.%d) to %s" comment "\n", \
2243 (_ival->flags & IR_LIVE_INTERVAL_TEMP) ? 0 : _ival->vreg, \
2244 IR_LIVE_POS_TO_REF(_start), IR_LIVE_POS_TO_SUB_REF(_start), \
2245 IR_LIVE_POS_TO_REF(_end), IR_LIVE_POS_TO_SUB_REF(_end), \
2246 ir_reg_name(_ival->reg, _ival->type)); \
2247 } \
2248 } while (0)
2249 # define IR_LOG_LSRA_SPLIT(ival, pos) do { \
2250 if (ctx->flags & IR_DEBUG_RA) { \
2251 ir_live_interval *_ival = (ival); \
2252 ir_live_pos _start = _ival->range.start; \
2253 ir_live_pos _end = _ival->end; \
2254 ir_live_pos _pos = (pos); \
2255 fprintf(stderr, " ---- Split R%d [%d.%d...%d.%d) at %d.%d\n", \
2256 (_ival->flags & IR_LIVE_INTERVAL_TEMP) ? 0 : _ival->vreg, \
2257 IR_LIVE_POS_TO_REF(_start), IR_LIVE_POS_TO_SUB_REF(_start), \
2258 IR_LIVE_POS_TO_REF(_end), IR_LIVE_POS_TO_SUB_REF(_end), \
2259 IR_LIVE_POS_TO_REF(_pos), IR_LIVE_POS_TO_SUB_REF(_pos)); \
2260 } \
2261 } while (0)
2262 # define IR_LOG_LSRA_CONFLICT(action, ival, pos) do { \
2263 if (ctx->flags & IR_DEBUG_RA) { \
2264 ir_live_interval *_ival = (ival); \
2265 ir_live_pos _start = _ival->range.start; \
2266 ir_live_pos _end = _ival->end; \
2267 ir_live_pos _pos = (pos); \
2268 fprintf(stderr, action " R%d [%d.%d...%d.%d) assigned to %s at %d.%d\n", \
2269 (_ival->flags & IR_LIVE_INTERVAL_TEMP) ? 0 : _ival->vreg, \
2270 IR_LIVE_POS_TO_REF(_start), IR_LIVE_POS_TO_SUB_REF(_start), \
2271 IR_LIVE_POS_TO_REF(_end), IR_LIVE_POS_TO_SUB_REF(_end), \
2272 ir_reg_name(_ival->reg, _ival->type), \
2273 IR_LIVE_POS_TO_REF(_pos), IR_LIVE_POS_TO_SUB_REF(_pos)); \
2274 } \
2275 } while (0)
2276 #else
2277 # define IR_LOG_LSRA(action, ival, comment)
2278 # define IR_LOG_LSRA_ASSIGN(action, ival, comment)
2279 # define IR_LOG_LSRA_SPLIT(ival, pos)
2280 # define IR_LOG_LSRA_CONFLICT(action, ival, pos);
2281 #endif
2282
ir_ival_covers(ir_live_interval * ival,ir_live_pos position)2283 static bool ir_ival_covers(ir_live_interval *ival, ir_live_pos position)
2284 {
2285 ir_live_range *live_range = &ival->range;
2286
2287 do {
2288 if (position < live_range->end) {
2289 return position >= live_range->start;
2290 }
2291 live_range = live_range->next;
2292 } while (live_range);
2293
2294 return 0;
2295 }
2296
ir_ival_has_hole_between(ir_live_interval * ival,ir_live_pos from,ir_live_pos to)2297 static bool ir_ival_has_hole_between(ir_live_interval *ival, ir_live_pos from, ir_live_pos to)
2298 {
2299 ir_live_range *r = &ival->range;
2300
2301 while (r) {
2302 if (from < r->start) {
2303 return 1;
2304 } else if (to <= r->end) {
2305 return 0;
2306 }
2307 r = r->next;
2308 }
2309 return 0;
2310 }
2311
2312
ir_last_use_pos_before(ir_live_interval * ival,ir_live_pos pos,uint8_t flags)2313 static ir_live_pos ir_last_use_pos_before(ir_live_interval *ival, ir_live_pos pos, uint8_t flags)
2314 {
2315 ir_live_pos ret = 0;
2316 ir_use_pos *p = ival->use_pos;
2317
2318 while (p && p->pos <= pos) {
2319 if (p->flags & flags) {
2320 ret = p->pos;
2321 }
2322 p = p->next;
2323 }
2324 return ret;
2325 }
2326
ir_first_use_pos_after(ir_live_interval * ival,ir_live_pos pos,uint8_t flags)2327 static ir_live_pos ir_first_use_pos_after(ir_live_interval *ival, ir_live_pos pos, uint8_t flags)
2328 {
2329 ir_use_pos *p = ival->use_pos;
2330
2331 while (p && p->pos < pos) {
2332 p = p->next;
2333 }
2334 if (p && p->pos == pos && p->op_num != 0) {
2335 p = p->next;
2336 }
2337 while (p && !(p->flags & flags)) {
2338 p = p->next;
2339 }
2340 return p ? p->pos : 0x7fffffff;
2341 }
2342
ir_first_use_pos(ir_live_interval * ival,uint8_t flags)2343 static ir_live_pos ir_first_use_pos(ir_live_interval *ival, uint8_t flags)
2344 {
2345 ir_use_pos *p = ival->use_pos;
2346
2347 while (p && !(p->flags & flags)) {
2348 p = p->next;
2349 }
2350 return p ? p->pos : 0x7fffffff;
2351 }
2352
ir_block_from_live_pos(ir_ctx * ctx,ir_live_pos pos)2353 static ir_block *ir_block_from_live_pos(ir_ctx *ctx, ir_live_pos pos)
2354 {
2355 ir_ref ref = IR_LIVE_POS_TO_REF(pos);
2356 uint32_t b = ctx->cfg_map[ref];
2357
2358 while (!b) {
2359 ref--;
2360 IR_ASSERT(ref > 0);
2361 b = ctx->cfg_map[ref];
2362 }
2363 IR_ASSERT(b <= ctx->cfg_blocks_count);
2364 return &ctx->cfg_blocks[b];
2365 }
2366
ir_find_optimal_split_position(ir_ctx * ctx,ir_live_interval * ival,ir_live_pos min_pos,ir_live_pos max_pos,bool prefer_max)2367 static ir_live_pos ir_find_optimal_split_position(ir_ctx *ctx, ir_live_interval *ival, ir_live_pos min_pos, ir_live_pos max_pos, bool prefer_max)
2368 {
2369 ir_block *min_bb, *max_bb;
2370
2371 if (min_pos == max_pos) {
2372 return max_pos;
2373 }
2374
2375 IR_ASSERT(min_pos < max_pos);
2376 IR_ASSERT(min_pos >= ival->range.start);
2377 IR_ASSERT(max_pos < ival->end);
2378
2379 min_bb = ir_block_from_live_pos(ctx, min_pos);
2380 max_bb = ir_block_from_live_pos(ctx, max_pos);
2381
2382 if (min_bb == max_bb
2383 || ir_ival_has_hole_between(ival, min_pos, max_pos)) { // TODO: ???
2384 return (prefer_max) ? max_pos : min_pos;
2385 }
2386
2387 if (max_bb->loop_depth > 0) {
2388 /* Split at the end of the loop entry */
2389 do {
2390 ir_block *bb;
2391
2392 if (max_bb->flags & IR_BB_LOOP_HEADER) {
2393 bb = max_bb;
2394 } else {
2395 IR_ASSERT(max_bb->loop_header);
2396 bb = &ctx->cfg_blocks[max_bb->loop_header];
2397 }
2398 bb = &ctx->cfg_blocks[bb->idom];
2399 if (IR_DEF_LIVE_POS_FROM_REF(bb->end) < min_pos) {
2400 break;
2401 }
2402 max_bb = bb;
2403 } while (max_bb->loop_depth > 0);
2404
2405 if (IR_DEF_LIVE_POS_FROM_REF(max_bb->end) < max_pos) {
2406 return IR_DEF_LIVE_POS_FROM_REF(max_bb->end);
2407 }
2408 }
2409
2410 if (IR_LOAD_LIVE_POS_FROM_REF(max_bb->start) > min_pos) {
2411 return IR_LOAD_LIVE_POS_FROM_REF(max_bb->start);
2412 } else {
2413 // TODO: "min_bb" is in a deeper loop than "max_bb" ???
2414 return max_pos;
2415 }
2416 }
2417
ir_split_interval_at(ir_ctx * ctx,ir_live_interval * ival,ir_live_pos pos)2418 static ir_live_interval *ir_split_interval_at(ir_ctx *ctx, ir_live_interval *ival, ir_live_pos pos)
2419 {
2420 ir_live_interval *child;
2421 ir_live_range *p, *prev;
2422 ir_use_pos *use_pos, *prev_use_pos;
2423
2424 IR_LOG_LSRA_SPLIT(ival, pos);
2425 IR_ASSERT(pos > ival->range.start);
2426 ctx->flags2 |= IR_RA_HAVE_SPLITS;
2427
2428 p = &ival->range;
2429 prev = NULL;
2430 while (p && pos >= p->end) {
2431 prev = p;
2432 p = prev->next;
2433 }
2434 IR_ASSERT(p);
2435
2436 if (pos < p->start) {
2437 /* split between ranges */
2438 pos = p->start;
2439 }
2440
2441 use_pos = ival->use_pos;
2442 prev_use_pos = NULL;
2443
2444 ival->flags &= ~(IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS);
2445 if (p->start == pos) {
2446 while (use_pos && pos > use_pos->pos) {
2447 if (use_pos->hint != IR_REG_NONE) {
2448 ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REGS;
2449 }
2450 if (use_pos->hint_ref > 0) {
2451 ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REFS;
2452 }
2453 prev_use_pos = use_pos;
2454 use_pos = use_pos->next;
2455 }
2456 } else {
2457 while (use_pos && pos >= use_pos->pos) {
2458 if (use_pos->hint != IR_REG_NONE) {
2459 ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REGS;
2460 }
2461 if (use_pos->hint_ref > 0) {
2462 ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REFS;
2463 }
2464 prev_use_pos = use_pos;
2465 use_pos = use_pos->next;
2466 }
2467 }
2468
2469 child = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval));
2470 child->type = ival->type;
2471 child->reg = IR_REG_NONE;
2472 child->flags = IR_LIVE_INTERVAL_SPLIT_CHILD;
2473 child->vreg = ival->vreg;
2474 child->stack_spill_pos = -1; // not allocated
2475 child->range.start = pos;
2476 child->range.end = p->end;
2477 child->range.next = p->next;
2478 child->end = ival->end;
2479 child->use_pos = prev_use_pos ? prev_use_pos->next : use_pos;
2480
2481 child->next = ival->next;
2482 ival->next = child;
2483
2484 if (pos == p->start) {
2485 prev->next = NULL;
2486 ival->end = prev->end;
2487 /* Cache to reuse */
2488 p->next = ctx->unused_ranges;
2489 ctx->unused_ranges = p;
2490 } else {
2491 p->end = ival->end = pos;
2492 p->next = NULL;
2493 }
2494 if (prev_use_pos) {
2495 prev_use_pos->next = NULL;
2496 } else {
2497 ival->use_pos = NULL;
2498 }
2499
2500 use_pos = child->use_pos;
2501 while (use_pos) {
2502 if (use_pos->hint != IR_REG_NONE) {
2503 child->flags |= IR_LIVE_INTERVAL_HAS_HINT_REGS;
2504 }
2505 if (use_pos->hint_ref > 0) {
2506 child->flags |= IR_LIVE_INTERVAL_HAS_HINT_REFS;
2507 }
2508 use_pos = use_pos->next;
2509 }
2510
2511 return child;
2512 }
2513
ir_allocate_small_spill_slot(ir_ctx * ctx,size_t size,ir_reg_alloc_data * data)2514 static int32_t ir_allocate_small_spill_slot(ir_ctx *ctx, size_t size, ir_reg_alloc_data *data)
2515 {
2516 int32_t ret;
2517
2518 IR_ASSERT(size == 0 || size == 1 || size == 2 || size == 4 || size == 8);
2519 if (data->handled && data->handled[size]) {
2520 ret = data->handled[size]->stack_spill_pos;
2521 data->handled[size] = data->handled[size]->list_next;
2522 } else if (size == 8) {
2523 ret = ctx->stack_frame_size;
2524 ctx->stack_frame_size += 8;
2525 } else if (size == 4) {
2526 if (data->unused_slot_4) {
2527 ret = data->unused_slot_4;
2528 data->unused_slot_4 = 0;
2529 } else if (data->handled && data->handled[8]) {
2530 ret = data->handled[8]->stack_spill_pos;
2531 data->handled[8] = data->handled[8]->list_next;
2532 data->unused_slot_4 = ret + 4;
2533 } else {
2534 ret = ctx->stack_frame_size;
2535 if (sizeof(void*) == 8) {
2536 data->unused_slot_4 = ctx->stack_frame_size + 4;
2537 ctx->stack_frame_size += 8;
2538 } else {
2539 ctx->stack_frame_size += 4;
2540 }
2541 }
2542 } else if (size == 2) {
2543 if (data->unused_slot_2) {
2544 ret = data->unused_slot_2;
2545 data->unused_slot_2 = 0;
2546 } else if (data->unused_slot_4) {
2547 ret = data->unused_slot_4;
2548 data->unused_slot_2 = data->unused_slot_4 + 2;
2549 data->unused_slot_4 = 0;
2550 } else if (data->handled && data->handled[4]) {
2551 ret = data->handled[4]->stack_spill_pos;
2552 data->handled[4] = data->handled[4]->list_next;
2553 data->unused_slot_2 = ret + 2;
2554 } else if (data->handled && data->handled[8]) {
2555 ret = data->handled[8]->stack_spill_pos;
2556 data->handled[8] = data->handled[8]->list_next;
2557 data->unused_slot_2 = ret + 2;
2558 data->unused_slot_4 = ret + 4;
2559 } else {
2560 ret = ctx->stack_frame_size;
2561 data->unused_slot_2 = ctx->stack_frame_size + 2;
2562 if (sizeof(void*) == 8) {
2563 data->unused_slot_4 = ctx->stack_frame_size + 4;
2564 ctx->stack_frame_size += 8;
2565 } else {
2566 ctx->stack_frame_size += 4;
2567 }
2568 }
2569 } else if (size == 1) {
2570 if (data->unused_slot_1) {
2571 ret = data->unused_slot_1;
2572 data->unused_slot_1 = 0;
2573 } else if (data->unused_slot_2) {
2574 ret = data->unused_slot_2;
2575 data->unused_slot_1 = data->unused_slot_2 + 1;
2576 data->unused_slot_2 = 0;
2577 } else if (data->unused_slot_4) {
2578 ret = data->unused_slot_4;
2579 data->unused_slot_1 = data->unused_slot_4 + 1;
2580 data->unused_slot_2 = data->unused_slot_4 + 2;
2581 data->unused_slot_4 = 0;
2582 } else if (data->handled && data->handled[2]) {
2583 ret = data->handled[2]->stack_spill_pos;
2584 data->handled[2] = data->handled[2]->list_next;
2585 data->unused_slot_1 = ret + 1;
2586 } else if (data->handled && data->handled[4]) {
2587 ret = data->handled[4]->stack_spill_pos;
2588 data->handled[4] = data->handled[4]->list_next;
2589 data->unused_slot_1 = ret + 1;
2590 data->unused_slot_2 = ret + 2;
2591 } else if (data->handled && data->handled[8]) {
2592 ret = data->handled[8]->stack_spill_pos;
2593 data->handled[8] = data->handled[8]->list_next;
2594 data->unused_slot_1 = ret + 1;
2595 data->unused_slot_2 = ret + 2;
2596 data->unused_slot_4 = ret + 4;
2597 } else {
2598 ret = ctx->stack_frame_size;
2599 data->unused_slot_1 = ctx->stack_frame_size + 1;
2600 data->unused_slot_2 = ctx->stack_frame_size + 2;
2601 if (sizeof(void*) == 8) {
2602 data->unused_slot_4 = ctx->stack_frame_size + 4;
2603 ctx->stack_frame_size += 8;
2604 } else {
2605 ctx->stack_frame_size += 4;
2606 }
2607 }
2608 } else {
2609 ret = IR_NULL;
2610 }
2611 return ret;
2612 }
2613
ir_allocate_spill_slot(ir_ctx * ctx,ir_type type,ir_reg_alloc_data * data)2614 int32_t ir_allocate_spill_slot(ir_ctx *ctx, ir_type type, ir_reg_alloc_data *data)
2615 {
2616 return ir_allocate_small_spill_slot(ctx, ir_type_size[type], data);
2617 }
2618
ir_allocate_big_spill_slot(ir_ctx * ctx,int32_t size,ir_reg_alloc_data * data)2619 static int32_t ir_allocate_big_spill_slot(ir_ctx *ctx, int32_t size, ir_reg_alloc_data *data)
2620 {
2621 int32_t ret;
2622
2623 if (size <= 8) {
2624 if (size == 3) {
2625 size = 4;
2626 } else if (size > 4 && size < 8) {
2627 size = 8;
2628 }
2629 return ir_allocate_small_spill_slot(ctx, size, data);
2630 }
2631
2632 /* Align stack allocated data to 16 byte */
2633 ctx->flags2 |= IR_16B_FRAME_ALIGNMENT;
2634 ret = IR_ALIGNED_SIZE(ctx->stack_frame_size, 16);
2635 size = IR_ALIGNED_SIZE(size, 8);
2636 ctx->stack_frame_size = ret + size;
2637
2638 return ret;
2639 }
2640
ir_get_first_reg_hint(ir_ctx * ctx,ir_live_interval * ival,ir_regset available)2641 static ir_reg ir_get_first_reg_hint(ir_ctx *ctx, ir_live_interval *ival, ir_regset available)
2642 {
2643 ir_use_pos *use_pos;
2644 ir_reg reg;
2645
2646 use_pos = ival->use_pos;
2647 while (use_pos) {
2648 reg = use_pos->hint;
2649 if (reg >= 0 && IR_REGSET_IN(available, reg)) {
2650 return reg;
2651 }
2652 use_pos = use_pos->next;
2653 }
2654
2655 return IR_REG_NONE;
2656 }
2657
ir_try_allocate_preferred_reg(ir_ctx * ctx,ir_live_interval * ival,ir_regset available,ir_live_pos * freeUntilPos)2658 static ir_reg ir_try_allocate_preferred_reg(ir_ctx *ctx, ir_live_interval *ival, ir_regset available, ir_live_pos *freeUntilPos)
2659 {
2660 ir_use_pos *use_pos;
2661 ir_reg reg;
2662
2663 if (ival->flags & IR_LIVE_INTERVAL_HAS_HINT_REGS) {
2664 use_pos = ival->use_pos;
2665 while (use_pos) {
2666 reg = use_pos->hint;
2667 if (reg >= 0 && IR_REGSET_IN(available, reg)) {
2668 if (ival->end <= freeUntilPos[reg]) {
2669 /* register available for the whole interval */
2670 return reg;
2671 }
2672 }
2673 use_pos = use_pos->next;
2674 }
2675 }
2676
2677 if (ival->flags & IR_LIVE_INTERVAL_HAS_HINT_REFS) {
2678 use_pos = ival->use_pos;
2679 while (use_pos) {
2680 if (use_pos->hint_ref > 0) {
2681 reg = ctx->live_intervals[ctx->vregs[use_pos->hint_ref]]->reg;
2682 if (reg >= 0 && IR_REGSET_IN(available, reg)) {
2683 if (ival->end <= freeUntilPos[reg]) {
2684 /* register available for the whole interval */
2685 return reg;
2686 }
2687 }
2688 }
2689 use_pos = use_pos->next;
2690 }
2691 }
2692
2693 return IR_REG_NONE;
2694 }
2695
ir_get_preferred_reg(ir_ctx * ctx,ir_live_interval * ival,ir_regset available)2696 static ir_reg ir_get_preferred_reg(ir_ctx *ctx, ir_live_interval *ival, ir_regset available)
2697 {
2698 ir_use_pos *use_pos;
2699 ir_reg reg;
2700
2701 use_pos = ival->use_pos;
2702 while (use_pos) {
2703 reg = use_pos->hint;
2704 if (reg >= 0 && IR_REGSET_IN(available, reg)) {
2705 return reg;
2706 } else if (use_pos->hint_ref > 0) {
2707 reg = ctx->live_intervals[ctx->vregs[use_pos->hint_ref]]->reg;
2708 if (reg >= 0 && IR_REGSET_IN(available, reg)) {
2709 return reg;
2710 }
2711 }
2712 use_pos = use_pos->next;
2713 }
2714
2715 return IR_REG_NONE;
2716 }
2717
ir_add_to_unhandled(ir_live_interval ** unhandled,ir_live_interval * ival)2718 static void ir_add_to_unhandled(ir_live_interval **unhandled, ir_live_interval *ival)
2719 {
2720 ir_live_pos pos = ival->range.start;
2721
2722 if (*unhandled == NULL
2723 || pos < (*unhandled)->range.start
2724 || (pos == (*unhandled)->range.start
2725 && (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS))
2726 && !((*unhandled)->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)))
2727 || (pos == (*unhandled)->range.start
2728 && ival->vreg > (*unhandled)->vreg)) {
2729 ival->list_next = *unhandled;
2730 *unhandled = ival;
2731 } else {
2732 ir_live_interval *prev = *unhandled;
2733
2734 while (prev->list_next) {
2735 if (pos < prev->list_next->range.start
2736 || (pos == prev->list_next->range.start
2737 && (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS))
2738 && !(prev->list_next->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)))
2739 || (pos == prev->list_next->range.start
2740 && ival->vreg > prev->list_next->vreg)) {
2741 break;
2742 }
2743 prev = prev->list_next;
2744 }
2745 ival->list_next = prev->list_next;
2746 prev->list_next = ival;
2747 }
2748 }
2749
2750 /* merge sorted lists */
ir_merge_to_unhandled(ir_live_interval ** unhandled,ir_live_interval * ival)2751 static void ir_merge_to_unhandled(ir_live_interval **unhandled, ir_live_interval *ival)
2752 {
2753 ir_live_interval **prev;
2754 ir_live_pos pos;
2755
2756 if (*unhandled == NULL) {
2757 *unhandled = ival;
2758 while (ival) {
2759 ival = ival->list_next = ival->next;
2760 }
2761 } else {
2762 prev = unhandled;
2763 while (ival) {
2764 pos = ival->range.start;
2765 while (*prev && pos >= (*prev)->range.start) {
2766 prev = &(*prev)->list_next;
2767 }
2768 ival->list_next = *prev;
2769 *prev = ival;
2770 prev = &ival->list_next;
2771 ival = ival->next;
2772 }
2773 }
2774 #ifdef IR_DEBUG
2775 ival = *unhandled;
2776 pos = 0;
2777
2778 while (ival) {
2779 IR_ASSERT(ival->range.start >= pos);
2780 pos = ival->range.start;
2781 ival = ival->list_next;
2782 }
2783 #endif
2784 }
2785
ir_add_to_unhandled_spill(ir_live_interval ** unhandled,ir_live_interval * ival)2786 static void ir_add_to_unhandled_spill(ir_live_interval **unhandled, ir_live_interval *ival)
2787 {
2788 ir_live_pos pos = ival->range.start;
2789
2790 if (*unhandled == NULL
2791 || pos <= (*unhandled)->range.start) {
2792 ival->list_next = *unhandled;
2793 *unhandled = ival;
2794 } else {
2795 ir_live_interval *prev = *unhandled;
2796
2797 while (prev->list_next) {
2798 if (pos <= prev->list_next->range.start) {
2799 break;
2800 }
2801 prev = prev->list_next;
2802 }
2803 ival->list_next = prev->list_next;
2804 prev->list_next = ival;
2805 }
2806 }
2807
ir_try_allocate_free_reg(ir_ctx * ctx,ir_live_interval * ival,ir_live_interval ** active,ir_live_interval * inactive,ir_live_interval ** unhandled)2808 static ir_reg ir_try_allocate_free_reg(ir_ctx *ctx, ir_live_interval *ival, ir_live_interval **active, ir_live_interval *inactive, ir_live_interval **unhandled)
2809 {
2810 ir_live_pos freeUntilPos[IR_REG_NUM];
2811 int i, reg;
2812 ir_live_pos pos, next;
2813 ir_live_interval *other;
2814 ir_regset available, overlapped, scratch;
2815
2816 if (IR_IS_TYPE_FP(ival->type)) {
2817 available = IR_REGSET_FP;
2818 /* set freeUntilPos of all physical registers to maxInt */
2819 for (i = IR_REG_FP_FIRST; i <= IR_REG_FP_LAST; i++) {
2820 freeUntilPos[i] = 0x7fffffff;
2821 }
2822 } else {
2823 available = IR_REGSET_GP;
2824 if (ctx->flags & IR_USE_FRAME_POINTER) {
2825 IR_REGSET_EXCL(available, IR_REG_FRAME_POINTER);
2826 }
2827 #if defined(IR_TARGET_X86)
2828 if (ir_type_size[ival->type] == 1) {
2829 /* TODO: if no registers avialivle, we may use of one this register for already allocated interval ??? */
2830 IR_REGSET_EXCL(available, IR_REG_RBP);
2831 IR_REGSET_EXCL(available, IR_REG_RSI);
2832 IR_REGSET_EXCL(available, IR_REG_RDI);
2833 }
2834 #endif
2835 /* set freeUntilPos of all physical registers to maxInt */
2836 for (i = IR_REG_GP_FIRST; i <= IR_REG_GP_LAST; i++) {
2837 freeUntilPos[i] = 0x7fffffff;
2838 }
2839 }
2840
2841 available = IR_REGSET_DIFFERENCE(available, (ir_regset)ctx->fixed_regset);
2842
2843 /* for each interval it in active */
2844 other = *active;
2845 while (other) {
2846 /* freeUntilPos[it.reg] = 0 */
2847 reg = other->reg;
2848 IR_ASSERT(reg >= 0);
2849 if (reg >= IR_REG_SCRATCH) {
2850 if (reg == IR_REG_SCRATCH) {
2851 available = IR_REGSET_DIFFERENCE(available, IR_REGSET_SCRATCH);
2852 } else {
2853 IR_ASSERT(reg == IR_REG_ALL);
2854 available = IR_REGSET_EMPTY;
2855 }
2856 } else {
2857 IR_REGSET_EXCL(available, reg);
2858 }
2859 other = other->list_next;
2860 }
2861
2862 /* for each interval it in inactive intersecting with current
2863 *
2864 * This loop is not necessary for program in SSA form (see LSRA on SSA fig. 6),
2865 * but it is still necessary after coalescing and splitting
2866 */
2867 overlapped = IR_REGSET_EMPTY;
2868 other = inactive;
2869 pos = ival->end;
2870 while (other) {
2871 /* freeUntilPos[it.reg] = next intersection of it with current */
2872 if (other->current_range->start < pos) {
2873 next = ir_ivals_overlap(&ival->range, other->current_range);
2874 if (next) {
2875 reg = other->reg;
2876 IR_ASSERT(reg >= 0);
2877 if (reg >= IR_REG_SCRATCH) {
2878 ir_regset regset;
2879
2880 if (reg == IR_REG_SCRATCH) {
2881 regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH);
2882 } else {
2883 IR_ASSERT(reg == IR_REG_ALL);
2884 regset = available;
2885 }
2886 overlapped = IR_REGSET_UNION(overlapped, regset);
2887 IR_REGSET_FOREACH(regset, reg) {
2888 if (next < freeUntilPos[reg]) {
2889 freeUntilPos[reg] = next;
2890 }
2891 } IR_REGSET_FOREACH_END();
2892 } else if (IR_REGSET_IN(available, reg)) {
2893 IR_REGSET_INCL(overlapped, reg);
2894 if (next < freeUntilPos[reg]) {
2895 freeUntilPos[reg] = next;
2896 }
2897 }
2898 }
2899 }
2900 other = other->list_next;
2901 }
2902
2903 available = IR_REGSET_DIFFERENCE(available, overlapped);
2904 if (available != IR_REGSET_EMPTY) {
2905
2906 if (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)) {
2907 /* Try to use hint */
2908 reg = ir_try_allocate_preferred_reg(ctx, ival, available, freeUntilPos);
2909 if (reg != IR_REG_NONE) {
2910 ival->reg = reg;
2911 IR_LOG_LSRA_ASSIGN(" ---- Assign", ival, " (hint available without spilling)");
2912 if (*unhandled && ival->end > (*unhandled)->range.start) {
2913 ival->list_next = *active;
2914 *active = ival;
2915 }
2916 return reg;
2917 }
2918 }
2919
2920 if (ival->flags & IR_LIVE_INTERVAL_SPLIT_CHILD) {
2921 /* Try to reuse the register previously allocated for splited interval */
2922 reg = ctx->live_intervals[ival->vreg]->reg;
2923 if (reg >= 0 && IR_REGSET_IN(available, reg)) {
2924 ival->reg = reg;
2925 IR_LOG_LSRA_ASSIGN(" ---- Assign", ival, " (available without spilling)");
2926 if (*unhandled && ival->end > (*unhandled)->range.start) {
2927 ival->list_next = *active;
2928 *active = ival;
2929 }
2930 return reg;
2931 }
2932 }
2933
2934 /* prefer caller-saved registers to avoid save/restore in prologue/epilogue */
2935 scratch = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH);
2936 if (scratch != IR_REGSET_EMPTY) {
2937 /* prefer registers that don't conflict with the hints for the following unhandled intervals */
2938 if (1) {
2939 ir_regset non_conflicting = scratch;
2940
2941 other = *unhandled;
2942 while (other && other->range.start < ival->range.end) {
2943 if (other->flags & IR_LIVE_INTERVAL_HAS_HINT_REGS) {
2944 reg = ir_get_first_reg_hint(ctx, other, non_conflicting);
2945
2946 if (reg >= 0) {
2947 IR_REGSET_EXCL(non_conflicting, reg);
2948 if (non_conflicting == IR_REGSET_EMPTY) {
2949 break;
2950 }
2951 }
2952 }
2953 other = other->list_next;
2954 }
2955 if (non_conflicting != IR_REGSET_EMPTY) {
2956 reg = IR_REGSET_FIRST(non_conflicting);
2957 } else {
2958 reg = IR_REGSET_FIRST(scratch);
2959 }
2960 } else {
2961 reg = IR_REGSET_FIRST(scratch);
2962 }
2963 } else {
2964 reg = IR_REGSET_FIRST(available);
2965 }
2966 ival->reg = reg;
2967 IR_LOG_LSRA_ASSIGN(" ---- Assign", ival, " (available without spilling)");
2968 if (*unhandled && ival->end > (*unhandled)->range.start) {
2969 ival->list_next = *active;
2970 *active = ival;
2971 }
2972 return reg;
2973 }
2974
2975 /* reg = register with highest freeUntilPos */
2976 reg = IR_REG_NONE;
2977 pos = 0;
2978 IR_REGSET_FOREACH(overlapped, i) {
2979 if (freeUntilPos[i] > pos) {
2980 pos = freeUntilPos[i];
2981 reg = i;
2982 } else if (freeUntilPos[i] == pos
2983 && !IR_REGSET_IN(IR_REGSET_SCRATCH, reg)
2984 && IR_REGSET_IN(IR_REGSET_SCRATCH, i)) {
2985 /* prefer caller-saved registers to avoid save/restore in prologue/epilogue */
2986 pos = freeUntilPos[i];
2987 reg = i;
2988 }
2989 } IR_REGSET_FOREACH_END();
2990
2991 if (pos > ival->range.start) {
2992 /* register available for the first part of the interval */
2993 /* split current before freeUntilPos[reg] */
2994 ir_live_pos split_pos = ir_last_use_pos_before(ival, pos,
2995 IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG);
2996 if (split_pos > ival->range.start) {
2997 split_pos = ir_find_optimal_split_position(ctx, ival, split_pos, pos, 0);
2998 other = ir_split_interval_at(ctx, ival, split_pos);
2999 if (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)) {
3000 ir_reg pref_reg = ir_try_allocate_preferred_reg(ctx, ival, IR_REGSET_UNION(available, overlapped), freeUntilPos);
3001
3002 if (pref_reg != IR_REG_NONE) {
3003 ival->reg = pref_reg;
3004 } else {
3005 ival->reg = reg;
3006 }
3007 } else {
3008 ival->reg = reg;
3009 }
3010 IR_LOG_LSRA_ASSIGN(" ---- Assign", ival, " (available without spilling for the first part)");
3011 if (*unhandled && ival->end > (*unhandled)->range.start) {
3012 ival->list_next = *active;
3013 *active = ival;
3014 }
3015 ir_add_to_unhandled(unhandled, other);
3016 IR_LOG_LSRA(" ---- Queue", other, "");
3017 return reg;
3018 }
3019 }
3020 return IR_REG_NONE;
3021 }
3022
ir_allocate_blocked_reg(ir_ctx * ctx,ir_live_interval * ival,ir_live_interval ** active,ir_live_interval ** inactive,ir_live_interval ** unhandled)3023 static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_live_interval **active, ir_live_interval **inactive, ir_live_interval **unhandled)
3024 {
3025 ir_live_pos nextUsePos[IR_REG_NUM];
3026 ir_live_pos blockPos[IR_REG_NUM];
3027 int i, reg;
3028 ir_live_pos pos, next_use_pos;
3029 ir_live_interval *other, *prev;
3030 ir_use_pos *use_pos;
3031 ir_regset available, tmp_regset;
3032
3033 if (!(ival->flags & IR_LIVE_INTERVAL_TEMP)) {
3034 use_pos = ival->use_pos;
3035 while (use_pos && !(use_pos->flags & IR_USE_MUST_BE_IN_REG)) {
3036 use_pos = use_pos->next;
3037 }
3038 if (!use_pos) {
3039 /* spill */
3040 IR_LOG_LSRA(" ---- Spill", ival, " (no use pos that must be in reg)");
3041 ctx->flags2 |= IR_RA_HAVE_SPILLS;
3042 return IR_REG_NONE;
3043 }
3044 next_use_pos = use_pos->pos;
3045 } else {
3046 next_use_pos = ival->range.end;
3047 }
3048
3049 if (IR_IS_TYPE_FP(ival->type)) {
3050 available = IR_REGSET_FP;
3051 /* set nextUsePos of all physical registers to maxInt */
3052 for (i = IR_REG_FP_FIRST; i <= IR_REG_FP_LAST; i++) {
3053 nextUsePos[i] = 0x7fffffff;
3054 blockPos[i] = 0x7fffffff;
3055 }
3056 } else {
3057 available = IR_REGSET_GP;
3058 if (ctx->flags & IR_USE_FRAME_POINTER) {
3059 IR_REGSET_EXCL(available, IR_REG_FRAME_POINTER);
3060 }
3061 #if defined(IR_TARGET_X86)
3062 if (ir_type_size[ival->type] == 1) {
3063 /* TODO: if no registers avialivle, we may use of one this register for already allocated interval ??? */
3064 IR_REGSET_EXCL(available, IR_REG_RBP);
3065 IR_REGSET_EXCL(available, IR_REG_RSI);
3066 IR_REGSET_EXCL(available, IR_REG_RDI);
3067 }
3068 #endif
3069 /* set nextUsePos of all physical registers to maxInt */
3070 for (i = IR_REG_GP_FIRST; i <= IR_REG_GP_LAST; i++) {
3071 nextUsePos[i] = 0x7fffffff;
3072 blockPos[i] = 0x7fffffff;
3073 }
3074 }
3075
3076 available = IR_REGSET_DIFFERENCE(available, (ir_regset)ctx->fixed_regset);
3077
3078 if (IR_REGSET_IS_EMPTY(available)) {
3079 fprintf(stderr, "LSRA Internal Error: No registers available. Allocation is not possible\n");
3080 IR_ASSERT(0);
3081 exit(-1);
3082 }
3083
3084 /* for each interval it in active */
3085 other = *active;
3086 while (other) {
3087 /* nextUsePos[it.reg] = next use of it after start of current */
3088 reg = other->reg;
3089 IR_ASSERT(reg >= 0);
3090 if (reg >= IR_REG_SCRATCH) {
3091 ir_regset regset;
3092
3093 if (reg == IR_REG_SCRATCH) {
3094 regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH);
3095 } else {
3096 IR_ASSERT(reg == IR_REG_ALL);
3097 regset = available;
3098 }
3099 IR_REGSET_FOREACH(regset, reg) {
3100 blockPos[reg] = nextUsePos[reg] = 0;
3101 } IR_REGSET_FOREACH_END();
3102 } else if (IR_REGSET_IN(available, reg)) {
3103 if (other->flags & (IR_LIVE_INTERVAL_FIXED|IR_LIVE_INTERVAL_TEMP)) {
3104 blockPos[reg] = nextUsePos[reg] = 0;
3105 } else {
3106 pos = ir_first_use_pos_after(other, ival->range.start,
3107 IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG);
3108 if (pos < nextUsePos[reg]) {
3109 nextUsePos[reg] = pos;
3110 }
3111 }
3112 }
3113 other = other->list_next;
3114 }
3115
3116 /* for each interval it in inactive intersecting with current */
3117 other = *inactive;
3118 while (other) {
3119 /* freeUntilPos[it.reg] = next intersection of it with current */
3120 reg = other->reg;
3121 IR_ASSERT(reg >= 0);
3122 if (reg >= IR_REG_SCRATCH) {
3123 ir_live_pos overlap = ir_ivals_overlap(&ival->range, other->current_range);
3124
3125 if (overlap) {
3126 ir_regset regset;
3127
3128 if (reg == IR_REG_SCRATCH) {
3129 regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH);
3130 } else {
3131 IR_ASSERT(reg == IR_REG_ALL);
3132 regset = available;
3133 }
3134 IR_REGSET_FOREACH(regset, reg) {
3135 if (overlap < nextUsePos[reg]) {
3136 nextUsePos[reg] = overlap;
3137 }
3138 if (overlap < blockPos[reg]) {
3139 blockPos[reg] = overlap;
3140 }
3141 } IR_REGSET_FOREACH_END();
3142 }
3143 } else if (IR_REGSET_IN(available, reg)) {
3144 ir_live_pos overlap = ir_ivals_overlap(&ival->range, other->current_range);
3145
3146 if (overlap) {
3147 if (other->flags & (IR_LIVE_INTERVAL_FIXED|IR_LIVE_INTERVAL_TEMP)) {
3148 if (overlap < nextUsePos[reg]) {
3149 nextUsePos[reg] = overlap;
3150 }
3151 if (overlap < blockPos[reg]) {
3152 blockPos[reg] = overlap;
3153 }
3154 } else {
3155 pos = ir_first_use_pos_after(other, ival->range.start,
3156 IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG);
3157 if (pos < nextUsePos[reg]) {
3158 nextUsePos[reg] = pos;
3159 }
3160 }
3161 }
3162 }
3163 other = other->list_next;
3164 }
3165
3166 /* register hinting */
3167 reg = IR_REG_NONE;
3168 if (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)) {
3169 reg = ir_get_preferred_reg(ctx, ival, available);
3170 }
3171 if (reg == IR_REG_NONE) {
3172 select_register:
3173 reg = IR_REGSET_FIRST(available);
3174 }
3175
3176 /* reg = register with highest nextUsePos */
3177 pos = nextUsePos[reg];
3178 tmp_regset = available;
3179 IR_REGSET_EXCL(tmp_regset, reg);
3180 IR_REGSET_FOREACH(tmp_regset, i) {
3181 if (nextUsePos[i] > pos) {
3182 pos = nextUsePos[i];
3183 reg = i;
3184 }
3185 } IR_REGSET_FOREACH_END();
3186
3187 /* if first usage of current is after nextUsePos[reg] then */
3188 if (next_use_pos > pos && !(ival->flags & IR_LIVE_INTERVAL_TEMP)) {
3189 /* all other intervals are used before current, so it is best to spill current itself */
3190 /* assign spill slot to current */
3191 /* split current before its first use position that requires a register */
3192 ir_live_pos split_pos;
3193
3194 spill_current:
3195 if (next_use_pos == ival->range.start) {
3196 IR_ASSERT(ival->use_pos && ival->use_pos->op_num == 0);
3197 /* split right after definition */
3198 split_pos = next_use_pos + 1;
3199 } else {
3200 split_pos = ir_find_optimal_split_position(ctx, ival, ival->range.start, next_use_pos - 1, 1);
3201 }
3202
3203 if (split_pos > ival->range.start) {
3204 IR_LOG_LSRA(" ---- Conflict with others", ival, " (all others are used before)");
3205 other = ir_split_interval_at(ctx, ival, split_pos);
3206 IR_LOG_LSRA(" ---- Spill", ival, "");
3207 ir_add_to_unhandled(unhandled, other);
3208 IR_LOG_LSRA(" ---- Queue", other, "");
3209 return IR_REG_NONE;
3210 }
3211 }
3212
3213 if (ival->end > blockPos[reg]) {
3214 /* spilling make a register free only for the first part of current */
3215 IR_LOG_LSRA(" ---- Conflict with others", ival, " (spilling make a register free only for the first part)");
3216 /* split current at optimal position before block_pos[reg] */
3217 ir_live_pos split_pos = ir_last_use_pos_before(ival, blockPos[reg] + 1,
3218 IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG);
3219 if (split_pos == 0) {
3220 split_pos = ir_first_use_pos_after(ival, blockPos[reg],
3221 IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG) - 1;
3222 other = ir_split_interval_at(ctx, ival, split_pos);
3223 ir_add_to_unhandled(unhandled, other);
3224 IR_LOG_LSRA(" ---- Queue", other, "");
3225 return IR_REG_NONE;
3226 }
3227 if (split_pos >= blockPos[reg]) {
3228 try_next_available_register:
3229 IR_REGSET_EXCL(available, reg);
3230 if (IR_REGSET_IS_EMPTY(available)) {
3231 fprintf(stderr, "LSRA Internal Error: Unsolvable conflict. Allocation is not possible\n");
3232 IR_ASSERT(0);
3233 exit(-1);
3234 }
3235 IR_LOG_LSRA(" ---- Restart", ival, "");
3236 goto select_register;
3237 }
3238 split_pos = ir_find_optimal_split_position(ctx, ival, split_pos, blockPos[reg], 1);
3239 other = ir_split_interval_at(ctx, ival, split_pos);
3240 ir_add_to_unhandled(unhandled, other);
3241 IR_LOG_LSRA(" ---- Queue", other, "");
3242 }
3243
3244 /* spill intervals that currently block reg */
3245 prev = NULL;
3246 other = *active;
3247 while (other) {
3248 ir_live_pos split_pos;
3249
3250 if (reg == other->reg) {
3251 /* split active interval for reg at position */
3252 ir_live_pos overlap = ir_ivals_overlap(&ival->range, other->current_range);
3253
3254 if (overlap) {
3255 ir_live_interval *child, *child2;
3256
3257 IR_ASSERT(other->type != IR_VOID);
3258 IR_LOG_LSRA_CONFLICT(" ---- Conflict with active", other, overlap);
3259
3260 split_pos = ir_last_use_pos_before(other, ival->range.start, IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG);
3261 if (split_pos == 0) {
3262 split_pos = ival->range.start;
3263 }
3264 split_pos = ir_find_optimal_split_position(ctx, other, split_pos, ival->range.start, 1);
3265 if (split_pos > other->range.start) {
3266 child = ir_split_interval_at(ctx, other, split_pos);
3267 if (prev) {
3268 prev->list_next = other->list_next;
3269 } else {
3270 *active = other->list_next;
3271 }
3272 IR_LOG_LSRA(" ---- Finish", other, "");
3273 } else {
3274 if (ir_first_use_pos(other, IR_USE_MUST_BE_IN_REG) <= other->end) {
3275 if (!(ival->flags & IR_LIVE_INTERVAL_TEMP)) {
3276 next_use_pos = ir_first_use_pos(ival, IR_USE_MUST_BE_IN_REG);
3277 if (next_use_pos == ival->range.start) {
3278 IR_ASSERT(ival->use_pos && ival->use_pos->op_num == 0);
3279 /* split right after definition */
3280 split_pos = next_use_pos + 1;
3281 } else {
3282 split_pos = ir_find_optimal_split_position(ctx, ival, ival->range.start, next_use_pos - 1, 1);
3283 }
3284
3285 if (split_pos > ival->range.start) {
3286 goto spill_current;
3287 }
3288 }
3289 goto try_next_available_register;
3290 }
3291 child = other;
3292 other->reg = IR_REG_NONE;
3293 if (prev) {
3294 prev->list_next = other->list_next;
3295 } else {
3296 *active = other->list_next;
3297 }
3298 IR_LOG_LSRA(" ---- Spill and Finish", other, " (it must not be in reg)");
3299 }
3300
3301 split_pos = ir_first_use_pos_after(child, ival->range.start, IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG) - 1; // TODO: ???
3302 if (split_pos > child->range.start && split_pos < child->end) {
3303 ir_live_pos opt_split_pos = ir_find_optimal_split_position(ctx, child, ival->range.start, split_pos, 1);
3304 if (opt_split_pos > child->range.start) {
3305 split_pos = opt_split_pos;
3306 }
3307 child2 = ir_split_interval_at(ctx, child, split_pos);
3308 IR_LOG_LSRA(" ---- Spill", child, "");
3309 ir_add_to_unhandled(unhandled, child2);
3310 IR_LOG_LSRA(" ---- Queue", child2, "");
3311 } else if (child != other) {
3312 // TODO: this may cause endless loop
3313 ir_add_to_unhandled(unhandled, child);
3314 IR_LOG_LSRA(" ---- Queue", child, "");
3315 }
3316 }
3317 break;
3318 }
3319 prev = other;
3320 other = other->list_next;
3321 }
3322
3323 /* split any inactive interval for reg at the end of its lifetime hole */
3324 other = *inactive;
3325 while (other) {
3326 /* freeUntilPos[it.reg] = next intersection of it with current */
3327 if (reg == other->reg) {
3328 ir_live_pos overlap = ir_ivals_overlap(&ival->range, other->current_range);
3329
3330 if (overlap) {
3331 ir_live_interval *child;
3332
3333 IR_ASSERT(other->type != IR_VOID);
3334 IR_LOG_LSRA_CONFLICT(" ---- Conflict with inactive", other, overlap);
3335 // TODO: optimal split position (this case is not tested)
3336 child = ir_split_interval_at(ctx, other, overlap);
3337 /* reset range cache */
3338 other->current_range = &other->range;
3339 ir_add_to_unhandled(unhandled, child);
3340 IR_LOG_LSRA(" ---- Queue", child, "");
3341 }
3342 }
3343 other = other->list_next;
3344 }
3345
3346 /* current.reg = reg */
3347 ival->reg = reg;
3348 IR_LOG_LSRA_ASSIGN(" ---- Assign", ival, " (after splitting others)");
3349
3350 if (*unhandled && ival->end > (*unhandled)->range.start) {
3351 ival->list_next = *active;
3352 *active = ival;
3353 }
3354 return reg;
3355 }
3356
ir_fix_dessa_tmps(ir_ctx * ctx,uint8_t type,ir_ref from,ir_ref to)3357 static int ir_fix_dessa_tmps(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to)
3358 {
3359 ir_block *bb = ctx->data;
3360 ir_tmp_reg tmp_reg;
3361
3362 if (to == 0) {
3363 if (IR_IS_TYPE_INT(type)) {
3364 tmp_reg.num = 0;
3365 tmp_reg.type = type;
3366 tmp_reg.start = IR_USE_SUB_REF;
3367 tmp_reg.end = IR_SAVE_SUB_REF;
3368 } else {
3369 IR_ASSERT(IR_IS_TYPE_FP(type));
3370 tmp_reg.num = 1;
3371 tmp_reg.type = type;
3372 tmp_reg.start = IR_USE_SUB_REF;
3373 tmp_reg.end = IR_SAVE_SUB_REF;
3374 }
3375 } else if (from != 0) {
3376 if (IR_IS_TYPE_INT(type)) {
3377 tmp_reg.num = 0;
3378 tmp_reg.type = type;
3379 tmp_reg.start = IR_USE_SUB_REF;
3380 tmp_reg.end = IR_SAVE_SUB_REF;
3381 } else {
3382 IR_ASSERT(IR_IS_TYPE_FP(type));
3383 tmp_reg.num = 1;
3384 tmp_reg.type = type;
3385 tmp_reg.start = IR_USE_SUB_REF;
3386 tmp_reg.end = IR_SAVE_SUB_REF;
3387 }
3388 } else {
3389 return 1;
3390 }
3391 if (!ir_has_tmp(ctx, bb->end, tmp_reg.num)) {
3392 ir_add_tmp(ctx, bb->end, bb->end, tmp_reg.num, tmp_reg);
3393 }
3394 return 1;
3395 }
3396
ir_ival_spill_for_fuse_load(ir_ctx * ctx,ir_live_interval * ival,ir_reg_alloc_data * data)3397 static bool ir_ival_spill_for_fuse_load(ir_ctx *ctx, ir_live_interval *ival, ir_reg_alloc_data *data)
3398 {
3399 ir_use_pos *use_pos = ival->use_pos;
3400 ir_insn *insn;
3401
3402 if (ival->flags & IR_LIVE_INTERVAL_MEM_PARAM) {
3403 IR_ASSERT(!ival->next && use_pos && use_pos->op_num == 0);
3404 insn = &ctx->ir_base[IR_LIVE_POS_TO_REF(use_pos->pos)];
3405 IR_ASSERT(insn->op == IR_PARAM);
3406 use_pos = use_pos->next;
3407 if (use_pos && (use_pos->next || (use_pos->flags & IR_USE_MUST_BE_IN_REG))) {
3408 return 0;
3409 }
3410
3411 if (use_pos) {
3412 ir_block *bb = ir_block_from_live_pos(ctx, use_pos->pos);
3413 if (bb->loop_depth) {
3414 return 0;
3415 }
3416 }
3417
3418 return 1;
3419 } else if (ival->flags & IR_LIVE_INTERVAL_MEM_LOAD) {
3420 insn = &ctx->ir_base[IR_LIVE_POS_TO_REF(use_pos->pos)];
3421 IR_ASSERT(insn->op == IR_VLOAD);
3422 IR_ASSERT(ctx->ir_base[insn->op2].op == IR_VAR);
3423 use_pos = use_pos->next;
3424 if (use_pos && (use_pos->next || (use_pos->flags & IR_USE_MUST_BE_IN_REG))) {
3425 return 0;
3426 }
3427
3428 if (use_pos) {
3429 ir_block *bb = ir_block_from_live_pos(ctx, use_pos->pos);
3430 if (bb->loop_depth && bb != ir_block_from_live_pos(ctx, ival->use_pos->pos)) {
3431 return 0;
3432 }
3433 /* check if VAR may be clobbered between VLOAD and use */
3434 ir_use_list *use_list = &ctx->use_lists[insn->op2];
3435 ir_ref n = use_list->count;
3436 ir_ref *p = &ctx->use_edges[use_list->refs];
3437 for (; n > 0; p++, n--) {
3438 ir_ref use = *p;
3439 if (ctx->ir_base[use].op == IR_VSTORE) {
3440 if (use > IR_LIVE_POS_TO_REF(ival->use_pos->pos) && use < IR_LIVE_POS_TO_REF(use_pos->pos)) {
3441 return 0;
3442 }
3443 } else if (ctx->ir_base[use].op == IR_VADDR) {
3444 return 0;
3445 }
3446 }
3447 }
3448 ival->stack_spill_pos = ctx->ir_base[insn->op2].op3;
3449
3450 return 1;
3451 }
3452 return 0;
3453 }
3454
ir_assign_bound_spill_slots(ir_ctx * ctx)3455 static void ir_assign_bound_spill_slots(ir_ctx *ctx)
3456 {
3457 ir_hashtab_bucket *b = ctx->binding->data;
3458 uint32_t n = ctx->binding->count;
3459 uint32_t v;
3460 ir_live_interval *ival;
3461
3462 while (n > 0) {
3463 v = ctx->vregs[b->key];
3464 if (v) {
3465 ival = ctx->live_intervals[v];
3466 if (ival
3467 && ival->stack_spill_pos == -1
3468 && (ival->next || ival->reg == IR_REG_NONE)) {
3469 IR_ASSERT(b->val < 0);
3470 /* special spill slot */
3471 ival->stack_spill_pos = -b->val;
3472 ival->flags |= IR_LIVE_INTERVAL_SPILLED | IR_LIVE_INTERVAL_SPILL_SPECIAL;
3473 }
3474 }
3475 b++;
3476 n--;
3477 }
3478 }
3479
ir_linear_scan(ir_ctx * ctx)3480 static int ir_linear_scan(ir_ctx *ctx)
3481 {
3482 uint32_t b;
3483 ir_block *bb;
3484 ir_live_interval *unhandled = NULL;
3485 ir_live_interval *active = NULL;
3486 ir_live_interval *inactive = NULL;
3487 ir_live_interval *ival, *other, *prev;
3488 int j;
3489 ir_live_pos position;
3490 ir_reg reg;
3491 ir_reg_alloc_data data;
3492 ir_ref vars = ctx->vars;
3493
3494 if (!ctx->live_intervals) {
3495 return 0;
3496 }
3497
3498 if (ctx->flags2 & IR_LR_HAVE_DESSA_MOVES) {
3499 /* Add fixed intervals for temporary registers used for DESSA moves */
3500 for (b = 1, bb = &ctx->cfg_blocks[1]; b <= ctx->cfg_blocks_count; b++, bb++) {
3501 IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
3502 if (bb->flags & IR_BB_DESSA_MOVES) {
3503 ctx->data = bb;
3504 ir_gen_dessa_moves(ctx, b, ir_fix_dessa_tmps);
3505 }
3506 }
3507 }
3508
3509 ctx->data = &data;
3510 ctx->stack_frame_size = 0;
3511 data.unused_slot_4 = 0;
3512 data.unused_slot_2 = 0;
3513 data.unused_slot_1 = 0;
3514 data.handled = NULL;
3515
3516 while (vars) {
3517 ir_ref var = vars;
3518 ir_insn *insn = &ctx->ir_base[var];
3519
3520 IR_ASSERT(insn->op == IR_VAR || insn->op == IR_ALLOCA);
3521 vars = insn->op3; /* list next */
3522
3523 if (insn->op == IR_VAR) {
3524 ir_ref slot = ir_allocate_spill_slot(ctx, insn->type, &data);;
3525 ir_use_list *use_list;
3526 ir_ref n, *p;
3527
3528 insn->op3 = slot;
3529 use_list = &ctx->use_lists[var];
3530 n = use_list->count;
3531 p = &ctx->use_edges[use_list->refs];
3532 for (; n > 0; p++, n--) {
3533 insn = &ctx->ir_base[*p];
3534 if (insn->op == IR_VADDR) {
3535 insn->op3 = slot;
3536 }
3537 }
3538 } else {
3539 ir_insn *val = &ctx->ir_base[insn->op2];
3540
3541 IR_ASSERT(IR_IS_CONST_REF(insn->op2));
3542 IR_ASSERT(IR_IS_TYPE_INT(val->type));
3543 IR_ASSERT(!IR_IS_SYM_CONST(val->op));
3544 IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 >= 0);
3545 IR_ASSERT(val->val.i64 < 0x7fffffff);
3546
3547 insn->op3 = ir_allocate_big_spill_slot(ctx, val->val.i32, &data);
3548 }
3549 }
3550
3551 for (j = ctx->vregs_count; j != 0; j--) {
3552 ival = ctx->live_intervals[j];
3553 if (ival) {
3554 if (!(ival->flags & (IR_LIVE_INTERVAL_MEM_PARAM|IR_LIVE_INTERVAL_MEM_LOAD))
3555 || !ir_ival_spill_for_fuse_load(ctx, ival, &data)) {
3556 ir_add_to_unhandled(&unhandled, ival);
3557 }
3558 }
3559 }
3560
3561 ival = ctx->live_intervals[0];
3562 if (ival) {
3563 ir_merge_to_unhandled(&unhandled, ival);
3564 }
3565
3566 /* vregs + tmp + fixed + SRATCH + ALL */
3567 for (j = ctx->vregs_count + 1; j <= ctx->vregs_count + IR_REG_NUM + 2; j++) {
3568 ival = ctx->live_intervals[j];
3569 if (ival) {
3570 ival->current_range = &ival->range;
3571 ival->list_next = inactive;
3572 inactive = ival;
3573 }
3574 }
3575
3576 ctx->flags2 &= ~(IR_RA_HAVE_SPLITS|IR_RA_HAVE_SPILLS);
3577
3578 #ifdef IR_DEBUG
3579 if (ctx->flags & IR_DEBUG_RA) {
3580 fprintf(stderr, "----\n");
3581 ir_dump_live_ranges(ctx, stderr);
3582 fprintf(stderr, "---- Start LSRA\n");
3583 }
3584 #endif
3585
3586 while (unhandled) {
3587 ival = unhandled;
3588 ival->current_range = &ival->range;
3589 unhandled = ival->list_next;
3590 position = ival->range.start;
3591
3592 IR_LOG_LSRA(" ---- Processing", ival, "...");
3593
3594 /* for each interval i in active */
3595 other = active;
3596 prev = NULL;
3597 while (other) {
3598 ir_live_range *r = other->current_range;
3599
3600 IR_ASSERT(r);
3601 if (r->end <= position) {
3602 do {
3603 r = r->next;
3604 } while (r && r->end <= position);
3605 if (!r) {
3606 /* move i from active to handled */
3607 other = other->list_next;
3608 if (prev) {
3609 prev->list_next = other;
3610 } else {
3611 active = other;
3612 }
3613 continue;
3614 }
3615 other->current_range = r;
3616 }
3617 if (position < r->start) {
3618 /* move i from active to inactive */
3619 if (prev) {
3620 prev->list_next = other->list_next;
3621 } else {
3622 active = other->list_next;
3623 }
3624 other->list_next = inactive;
3625 inactive = other;
3626 } else {
3627 prev = other;
3628 }
3629 other = prev ? prev->list_next : active;
3630 }
3631
3632 /* for each interval i in inactive */
3633 other = inactive;
3634 prev = NULL;
3635 while (other) {
3636 ir_live_range *r = other->current_range;
3637
3638 IR_ASSERT(r);
3639 if (r->end <= position) {
3640 do {
3641 r = r->next;
3642 } while (r && r->end <= position);
3643 if (!r) {
3644 /* move i from inactive to handled */
3645 other = other->list_next;
3646 if (prev) {
3647 prev->list_next = other;
3648 } else {
3649 inactive = other;
3650 }
3651 continue;
3652 }
3653 other->current_range = r;
3654 }
3655 if (position >= r->start) {
3656 /* move i from inactive to active */
3657 if (prev) {
3658 prev->list_next = other->list_next;
3659 } else {
3660 inactive = other->list_next;
3661 }
3662 other->list_next = active;
3663 active = other;
3664 } else {
3665 prev = other;
3666 }
3667 other = prev ? prev->list_next : inactive;
3668 }
3669
3670 reg = ir_try_allocate_free_reg(ctx, ival, &active, inactive, &unhandled);
3671 if (reg == IR_REG_NONE) {
3672 reg = ir_allocate_blocked_reg(ctx, ival, &active, &inactive, &unhandled);
3673 }
3674 }
3675
3676 #if 0 //def IR_DEBUG
3677 /* all intervals must be processed */
3678 ival = active;
3679 while (ival) {
3680 IR_ASSERT(!ival->next);
3681 ival = ival->list_next;
3682 }
3683 ival = inactive;
3684 while (ival) {
3685 IR_ASSERT(!ival->next);
3686 ival = ival->list_next;
3687 }
3688 #endif
3689
3690 if (ctx->flags2 & (IR_RA_HAVE_SPLITS|IR_RA_HAVE_SPILLS)) {
3691
3692 if (ctx->binding) {
3693 ir_assign_bound_spill_slots(ctx);
3694 }
3695
3696 /* Use simple linear-scan (without holes) to allocate and reuse spill slots */
3697 unhandled = NULL;
3698 for (j = ctx->vregs_count; j != 0; j--) {
3699 ival = ctx->live_intervals[j];
3700 if (ival
3701 && (ival->next || ival->reg == IR_REG_NONE)
3702 && ival->stack_spill_pos == -1) {
3703 ival->flags |= IR_LIVE_INTERVAL_SPILLED;
3704 if (!(ival->flags & IR_LIVE_INTERVAL_MEM_PARAM)) {
3705 ir_live_range *r;
3706
3707 other = ival;
3708 while (other->next) {
3709 other = other->next;
3710 }
3711 r = &other->range;
3712 while (r->next) {
3713 r = r->next;
3714 }
3715 ival->end = r->end;
3716 ir_add_to_unhandled_spill(&unhandled, ival);
3717 }
3718 }
3719 }
3720
3721 if (unhandled) {
3722 uint8_t size;
3723 ir_live_interval *handled[9] = {NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL};
3724 ir_live_interval *old;
3725
3726 data.handled = handled;
3727 active = NULL;
3728 while (unhandled) {
3729 ival = unhandled;
3730 ival->current_range = &ival->range;
3731 unhandled = ival->list_next;
3732 position = ival->range.start;
3733
3734 /* for each interval i in active */
3735 other = active;
3736 prev = NULL;
3737 while (other) {
3738 if (other->end <= position) {
3739 /* move i from active to handled */
3740 if (prev) {
3741 prev->list_next = other->list_next;
3742 } else {
3743 active = other->list_next;
3744 }
3745 size = ir_type_size[other->type];
3746 IR_ASSERT(size == 1 || size == 2 || size == 4 || size == 8);
3747 old = handled[size];
3748 while (old) {
3749 if (old->stack_spill_pos == other->stack_spill_pos) {
3750 break;
3751 }
3752 old = old->list_next;
3753 }
3754 if (!old) {
3755 other->list_next = handled[size];
3756 handled[size] = other;
3757 }
3758 } else {
3759 prev = other;
3760 }
3761 other = prev ? prev->list_next : active;
3762 }
3763
3764 ival->stack_spill_pos = ir_allocate_spill_slot(ctx, ival->type, &data);
3765 if (unhandled && ival->end > unhandled->range.start) {
3766 ival->list_next = active;
3767 active = ival;
3768 } else {
3769 size = ir_type_size[ival->type];
3770 IR_ASSERT(size == 1 || size == 2 || size == 4 || size == 8);
3771 old = handled[size];
3772 while (old) {
3773 if (old->stack_spill_pos == ival->stack_spill_pos) {
3774 break;
3775 }
3776 old = old->list_next;
3777 }
3778 if (!old) {
3779 ival->list_next = handled[size];
3780 handled[size] = ival;
3781 }
3782 }
3783 }
3784 data.handled = NULL;
3785 }
3786 }
3787
3788 #ifdef IR_TARGET_X86
3789 if (ctx->flags2 & IR_HAS_FP_RET_SLOT) {
3790 ctx->ret_slot = ir_allocate_spill_slot(ctx, IR_DOUBLE, &data);
3791 } else if (ctx->ret_type == IR_FLOAT || ctx->ret_type == IR_DOUBLE) {
3792 ctx->ret_slot = ir_allocate_spill_slot(ctx, ctx->ret_type, &data);
3793 } else {
3794 ctx->ret_slot = -1;
3795 }
3796 #endif
3797
3798 #ifdef IR_DEBUG
3799 if (ctx->flags & IR_DEBUG_RA) {
3800 fprintf(stderr, "---- Finish LSRA\n");
3801 ir_dump_live_ranges(ctx, stderr);
3802 fprintf(stderr, "----\n");
3803 }
3804 #endif
3805
3806 return 1;
3807 }
3808
needs_spill_reload(ir_ctx * ctx,ir_live_interval * ival,uint32_t b0,ir_bitset available)3809 static bool needs_spill_reload(ir_ctx *ctx, ir_live_interval *ival, uint32_t b0, ir_bitset available)
3810 {
3811 ir_worklist worklist;
3812 ir_block *bb;
3813 uint32_t b, *p, n;
3814
3815 ir_worklist_init(&worklist, ctx->cfg_blocks_count + 1);
3816 ir_worklist_push(&worklist, b0);
3817 while (ir_worklist_len(&worklist) != 0) {
3818 b = ir_worklist_pop(&worklist);
3819 bb = &ctx->cfg_blocks[b];
3820 if (bb->flags & (IR_BB_ENTRY|IR_BB_START)) {
3821 ir_worklist_free(&worklist);
3822 return 1;
3823 }
3824 n = bb->predecessors_count;
3825 for (p = &ctx->cfg_edges[bb->predecessors]; n > 0; p++, n--) {
3826 b = *p;
3827 bb = &ctx->cfg_blocks[b];
3828
3829 if (!ir_ival_covers(ival, IR_SAVE_LIVE_POS_FROM_REF(bb->end))) {
3830 ir_worklist_free(&worklist);
3831 return 1;
3832 } else if (!ir_bitset_in(available, b)) {
3833 ir_worklist_push(&worklist, b);
3834 }
3835 }
3836 }
3837 ir_worklist_free(&worklist);
3838 return 0;
3839 }
3840
needs_spill_load(ir_ctx * ctx,ir_live_interval * ival,ir_use_pos * use_pos)3841 static bool needs_spill_load(ir_ctx *ctx, ir_live_interval *ival, ir_use_pos *use_pos)
3842 {
3843 if (use_pos->next
3844 && use_pos->op_num == 1
3845 && use_pos->next->pos == use_pos->pos
3846 && !(use_pos->next->flags & IR_USE_MUST_BE_IN_REG)) {
3847 /* Support for R2 = ADD(R1, R1) */
3848 use_pos = use_pos->next;
3849 }
3850 return use_pos->next && use_pos->next->op_num != 0;
3851 }
3852
ir_set_fused_reg(ir_ctx * ctx,ir_ref root,ir_ref ref_and_op,int8_t reg)3853 static void ir_set_fused_reg(ir_ctx *ctx, ir_ref root, ir_ref ref_and_op, int8_t reg)
3854 {
3855 char key[10];
3856
3857 IR_ASSERT(reg != IR_REG_NONE);
3858 if (!ctx->fused_regs) {
3859 ctx->fused_regs = ir_mem_malloc(sizeof(ir_strtab));
3860 ir_strtab_init(ctx->fused_regs, 8, 128);
3861 }
3862 memcpy(key, &root, sizeof(ir_ref));
3863 memcpy(key + 4, &ref_and_op, sizeof(ir_ref));
3864 ir_strtab_lookup(ctx->fused_regs, key, 8, 0x10000000 | reg);
3865 }
3866
assign_regs(ir_ctx * ctx)3867 static void assign_regs(ir_ctx *ctx)
3868 {
3869 ir_ref i;
3870 ir_live_interval *ival, *top_ival;
3871 ir_use_pos *use_pos;
3872 int8_t reg, old_reg;
3873 ir_ref ref;
3874 ir_regset used_regs = 0;
3875
3876 if (!ctx->regs) {
3877 ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count);
3878 memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count);
3879 }
3880
3881 if (!(ctx->flags2 & (IR_RA_HAVE_SPLITS|IR_RA_HAVE_SPILLS))) {
3882 for (i = 1; i <= ctx->vregs_count; i++) {
3883 ival = ctx->live_intervals[i];
3884 if (ival) {
3885 do {
3886 if (ival->reg != IR_REG_NONE) {
3887 reg = ival->reg;
3888 IR_REGSET_INCL(used_regs, reg);
3889 use_pos = ival->use_pos;
3890 while (use_pos) {
3891 ref = (use_pos->hint_ref < 0) ? -use_pos->hint_ref : IR_LIVE_POS_TO_REF(use_pos->pos);
3892 ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg);
3893 use_pos = use_pos->next;
3894 }
3895 }
3896 ival = ival->next;
3897 } while (ival);
3898 }
3899 }
3900 } else {
3901 ir_bitset available = ir_bitset_malloc(ctx->cfg_blocks_count + 1);
3902
3903 for (i = 1; i <= ctx->vregs_count; i++) {
3904 top_ival = ival = ctx->live_intervals[i];
3905 if (ival) {
3906 if (!(ival->flags & IR_LIVE_INTERVAL_SPILLED)) {
3907 do {
3908 if (ival->reg != IR_REG_NONE) {
3909 IR_REGSET_INCL(used_regs, ival->reg);
3910 use_pos = ival->use_pos;
3911 while (use_pos) {
3912 reg = ival->reg;
3913 ref = IR_LIVE_POS_TO_REF(use_pos->pos);
3914 if (use_pos->hint_ref < 0) {
3915 ref = -use_pos->hint_ref;
3916 }
3917 ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg);
3918
3919 use_pos = use_pos->next;
3920 }
3921 }
3922 ival = ival->next;
3923 } while (ival);
3924 } else {
3925 do {
3926 if (ival->reg != IR_REG_NONE) {
3927 ir_ref prev_use_ref = IR_UNUSED;
3928
3929 ir_bitset_clear(available, ir_bitset_len(ctx->cfg_blocks_count + 1));
3930 IR_REGSET_INCL(used_regs, ival->reg);
3931 use_pos = ival->use_pos;
3932 while (use_pos) {
3933 reg = ival->reg;
3934 ref = IR_LIVE_POS_TO_REF(use_pos->pos);
3935 // TODO: Insert spill loads and stores in optimal positions (resolution)
3936 if (use_pos->op_num == 0) {
3937 if ((ctx->ir_base[ref].op == IR_COPY
3938 || ctx->ir_base[ref].op == IR_BITCAST
3939 || ctx->ir_base[ref].op == IR_TRUNC)
3940 && !IR_IS_CONST_REF(ctx->ir_base[ref].op1)
3941 && ctx->vregs[ctx->ir_base[ref].op1] == (uint32_t)i) {
3942 /* register reuse */
3943 ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg);
3944 prev_use_ref = ref;
3945 use_pos = use_pos->next;
3946 continue;
3947 }
3948 ir_bitset_clear(available, ir_bitset_len(ctx->cfg_blocks_count + 1));
3949 if (ctx->ir_base[ref].op == IR_PHI) {
3950 /* Spilled PHI var is passed through memory */
3951 reg = IR_REG_NONE;
3952 prev_use_ref = IR_UNUSED;
3953 } else if (ctx->ir_base[ref].op == IR_PARAM
3954 && (ival->flags & IR_LIVE_INTERVAL_MEM_PARAM)) {
3955 /* Stack PARAM var is passed through memory */
3956 reg = IR_REG_NONE;
3957 } else {
3958 uint32_t use_b = ctx->cfg_map[ref];
3959
3960 if (ir_ival_covers(ival, IR_SAVE_LIVE_POS_FROM_REF(ctx->cfg_blocks[use_b].end))) {
3961 ir_bitset_incl(available, use_b);
3962 }
3963 if (top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) {
3964 reg |= IR_REG_SPILL_SPECIAL;
3965 } else {
3966 reg |= IR_REG_SPILL_STORE;
3967 }
3968 prev_use_ref = ref;
3969 }
3970 } else if ((!prev_use_ref || ctx->cfg_map[prev_use_ref] != ctx->cfg_map[ref])
3971 && needs_spill_reload(ctx, ival, ctx->cfg_map[ref], available)) {
3972 if (!(use_pos->flags & IR_USE_MUST_BE_IN_REG)
3973 && use_pos->hint != reg
3974 // && ctx->ir_base[ref].op != IR_CALL
3975 // && ctx->ir_base[ref].op != IR_TAILCALL) {
3976 && ctx->ir_base[ref].op != IR_SNAPSHOT
3977 && !needs_spill_load(ctx, ival, use_pos)) {
3978 /* fuse spill load (valid only when register is not reused) */
3979 reg = IR_REG_NONE;
3980 if (use_pos->next
3981 && use_pos->op_num == 1
3982 && use_pos->next->pos == use_pos->pos
3983 && !(use_pos->next->flags & IR_USE_MUST_BE_IN_REG)) {
3984 /* Support for R2 = BINOP(R1, R1) */
3985 if (use_pos->hint_ref < 0) {
3986 ref = -use_pos->hint_ref;
3987 }
3988 ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg);
3989 use_pos = use_pos->next;
3990 }
3991 } else {
3992 if (top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) {
3993 reg |= IR_REG_SPILL_SPECIAL;
3994 } else {
3995 reg |= IR_REG_SPILL_LOAD;
3996 }
3997 if (ctx->ir_base[ref].op != IR_SNAPSHOT && !(use_pos->flags & IR_PHI_USE)) {
3998 uint32_t use_b = ctx->cfg_map[ref];
3999
4000 if (ir_ival_covers(ival, IR_SAVE_LIVE_POS_FROM_REF(ctx->cfg_blocks[use_b].end))) {
4001 ir_bitset_incl(available, use_b);
4002 }
4003 prev_use_ref = ref;
4004 }
4005 }
4006 if (use_pos->hint_ref < 0
4007 && (old_reg = ir_get_alocated_reg(ctx, -use_pos->hint_ref, use_pos->op_num)) != IR_REG_NONE) {
4008 if (top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) {
4009 reg |= IR_REG_SPILL_SPECIAL;
4010 } else {
4011 reg |= IR_REG_SPILL_LOAD;
4012 }
4013 if (reg != old_reg) {
4014 IR_ASSERT(ctx->rules[-use_pos->hint_ref] & IR_FUSED);
4015 ctx->rules[-use_pos->hint_ref] |= IR_FUSED_REG;
4016 ir_set_fused_reg(ctx, ref, -use_pos->hint_ref * sizeof(ir_ref) + use_pos->op_num, reg);
4017 use_pos = use_pos->next;
4018 continue;
4019 }
4020 }
4021 } else if (use_pos->flags & IR_PHI_USE) {
4022 IR_ASSERT(use_pos->hint_ref < 0);
4023 IR_ASSERT(ctx->vregs[-use_pos->hint_ref]);
4024 IR_ASSERT(ctx->live_intervals[ctx->vregs[-use_pos->hint_ref]]);
4025 if (ctx->live_intervals[ctx->vregs[-use_pos->hint_ref]]->flags & IR_LIVE_INTERVAL_SPILLED) {
4026 /* Spilled PHI var is passed through memory */
4027 reg = IR_REG_NONE;
4028 }
4029 } else if (use_pos->hint_ref < 0
4030 && (old_reg = ir_get_alocated_reg(ctx, -use_pos->hint_ref, use_pos->op_num)) != IR_REG_NONE) {
4031 if (reg != old_reg) {
4032 IR_ASSERT(ctx->rules[-use_pos->hint_ref] & IR_FUSED);
4033 ctx->rules[-use_pos->hint_ref] |= IR_FUSED_REG;
4034 ir_set_fused_reg(ctx, ref, -use_pos->hint_ref * sizeof(ir_ref) + use_pos->op_num, reg);
4035 use_pos = use_pos->next;
4036 continue;
4037 }
4038 } else {
4039 /* reuse register without spill load */
4040 }
4041 if (use_pos->hint_ref < 0) {
4042 ref = -use_pos->hint_ref;
4043 }
4044 ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg);
4045
4046 use_pos = use_pos->next;
4047 }
4048 } else if (!(top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL)) {
4049 use_pos = ival->use_pos;
4050 while (use_pos) {
4051 ref = IR_LIVE_POS_TO_REF(use_pos->pos);
4052 if (ctx->ir_base[ref].op == IR_SNAPSHOT) {
4053 IR_ASSERT(use_pos->hint_ref >= 0);
4054 /* A reference to a CPU spill slot */
4055 reg = IR_REG_SPILL_STORE | IR_REG_STACK_POINTER;
4056 ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg);
4057 }
4058 use_pos = use_pos->next;
4059 }
4060 }
4061 ival = ival->next;
4062 } while (ival);
4063 }
4064 }
4065 }
4066 ir_mem_free(available);
4067 }
4068
4069 /* Temporary registers */
4070 ival = ctx->live_intervals[0];
4071 if (ival) {
4072 do {
4073 IR_ASSERT(ival->reg != IR_REG_NONE);
4074 IR_REGSET_INCL(used_regs, ival->reg);
4075 reg = ival->reg;
4076 if (ival->tmp_op_num > 0) {
4077 ir_insn *insn = &ctx->ir_base[ival->tmp_ref];
4078
4079 if (ival->tmp_op_num <= insn->inputs_count) {
4080 ir_ref *ops = insn->ops;
4081 if (IR_IS_CONST_REF(ops[ival->tmp_op_num])) {
4082 /* constant rematerialization */
4083 reg |= IR_REG_SPILL_LOAD;
4084 } else if (ctx->ir_base[ops[ival->tmp_op_num]].op == IR_ALLOCA
4085 || ctx->ir_base[ops[ival->tmp_op_num]].op == IR_VADDR) {
4086 /* local address rematerialization */
4087 reg |= IR_REG_SPILL_LOAD;
4088 }
4089 }
4090 }
4091 ir_set_alocated_reg(ctx, ival->tmp_ref, ival->tmp_op_num, reg);
4092 ival = ival->next;
4093 } while (ival);
4094 }
4095
4096 if (ctx->fixed_stack_frame_size != -1) {
4097 ctx->used_preserved_regs = (ir_regset)ctx->fixed_save_regset;
4098 if (IR_REGSET_DIFFERENCE(IR_REGSET_INTERSECTION(used_regs, IR_REGSET_PRESERVED),
4099 ctx->used_preserved_regs)) {
4100 // TODO: Preserved reg and fixed frame conflict ???
4101 // IR_ASSERT(0 && "Preserved reg and fixed frame conflict");
4102 }
4103 } else {
4104 ctx->used_preserved_regs = IR_REGSET_UNION((ir_regset)ctx->fixed_save_regset,
4105 IR_REGSET_DIFFERENCE(IR_REGSET_INTERSECTION(used_regs, IR_REGSET_PRESERVED),
4106 (ctx->flags & IR_FUNCTION) ? (ir_regset)ctx->fixed_regset : IR_REGSET_PRESERVED));
4107 }
4108
4109 ir_fix_stack_frame(ctx);
4110 }
4111
ir_reg_alloc(ir_ctx * ctx)4112 int ir_reg_alloc(ir_ctx *ctx)
4113 {
4114 if (ir_linear_scan(ctx)) {
4115 assign_regs(ctx);
4116 return 1;
4117 }
4118 return 0;
4119 }
4120