1 /*
2 * IR - Lightweight JIT Compilation Framework
3 * (RA - Register Allocation, Liveness, Coalescing, SSA Deconstruction)
4 * Copyright (C) 2022 Zend by Perforce.
5 * Authors: Dmitry Stogov <dmitry@php.net>
6 *
7 * See: "Linear Scan Register Allocation on SSA Form", Christian Wimmer and
8 * Michael Franz, CGO'10 (2010)
9 * See: "Optimized Interval Splitting in a Linear Scan Register Allocator",
10 * Christian Wimmer VEE'10 (2005)
11 */
12
13 #ifndef _GNU_SOURCE
14 # define _GNU_SOURCE
15 #endif
16
17 #include <stdlib.h>
18 #include "ir.h"
19
20 #if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
21 # include "ir_x86.h"
22 #elif defined(IR_TARGET_AARCH64)
23 # include "ir_aarch64.h"
24 #else
25 # error "Unknown IR target"
26 #endif
27
28 #include "ir_private.h"
29
ir_regs_number(void)30 int ir_regs_number(void)
31 {
32 return IR_REG_NUM;
33 }
34
ir_reg_is_int(int32_t reg)35 bool ir_reg_is_int(int32_t reg)
36 {
37 IR_ASSERT(reg >= 0 && reg < IR_REG_NUM);
38 return reg >= IR_REG_GP_FIRST && reg <= IR_REG_GP_LAST;
39 }
40
ir_assign_virtual_registers_slow(ir_ctx * ctx)41 static int ir_assign_virtual_registers_slow(ir_ctx *ctx)
42 {
43 uint32_t *vregs;
44 uint32_t vregs_count = 0;
45 uint32_t b;
46 ir_ref i, n;
47 ir_block *bb;
48 ir_insn *insn;
49 uint32_t flags;
50
51 /* Assign unique virtual register to each data node */
52 vregs = ir_mem_calloc(ctx->insns_count, sizeof(ir_ref));
53 n = 1;
54 for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) {
55 IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
56 i = bb->start;
57
58 /* skip first instruction */
59 insn = ctx->ir_base + i;
60 n = ir_insn_len(insn);
61 i += n;
62 insn += n;
63 while (i < bb->end) {
64 flags = ir_op_flags[insn->op];
65 if (((flags & IR_OP_FLAG_DATA) && insn->op != IR_VAR && (insn->op != IR_PARAM || ctx->use_lists[i].count > 0))
66 || ((flags & IR_OP_FLAG_MEM) && ctx->use_lists[i].count > 1)) {
67 if (!ctx->rules || !(ctx->rules[i] & (IR_FUSED|IR_SKIPPED))) {
68 vregs[i] = ++vregs_count;
69 }
70 }
71 n = ir_insn_len(insn);
72 i += n;
73 insn += n;
74 }
75 }
76 ctx->vregs_count = vregs_count;
77 ctx->vregs = vregs;
78
79 return 1;
80 }
81
ir_assign_virtual_registers(ir_ctx * ctx)82 int ir_assign_virtual_registers(ir_ctx *ctx)
83 {
84 uint32_t *vregs;
85 uint32_t vregs_count = 0;
86 ir_ref i;
87 ir_insn *insn;
88
89 if (!ctx->rules) {
90 return ir_assign_virtual_registers_slow(ctx);
91 }
92
93 /* Assign unique virtual register to each rule that needs it */
94 vregs = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref));
95
96 for (i = 1, insn = &ctx->ir_base[1]; i < ctx->insns_count; i++, insn++) {
97 uint32_t v = 0;
98
99 if (ctx->rules[i] && !(ctx->rules[i] & (IR_FUSED|IR_SKIPPED))) {
100 uint32_t flags = ir_op_flags[insn->op];
101
102 if ((flags & IR_OP_FLAG_DATA)
103 || ((flags & IR_OP_FLAG_MEM) && ctx->use_lists[i].count > 1)) {
104 v = ++vregs_count;
105 }
106 }
107 vregs[i] = v;
108 }
109
110 ctx->vregs_count = vregs_count;
111 ctx->vregs = vregs;
112
113 return 1;
114 }
115
116 /* Lifetime intervals construction */
117
ir_new_live_range(ir_ctx * ctx,int v,ir_live_pos start,ir_live_pos end)118 static ir_live_interval *ir_new_live_range(ir_ctx *ctx, int v, ir_live_pos start, ir_live_pos end)
119 {
120 ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval));
121
122 ival->type = IR_VOID;
123 ival->reg = IR_REG_NONE;
124 ival->flags = 0;
125 ival->vreg = v;
126 ival->stack_spill_pos = -1; // not allocated
127 ival->range.start = start;
128 ival->range.end = ival->end = end;
129 ival->range.next = NULL;
130 ival->use_pos = NULL;
131 ival->next = NULL;
132
133 ctx->live_intervals[v] = ival;
134 return ival;
135 }
136
ir_add_live_range(ir_ctx * ctx,int v,ir_live_pos start,ir_live_pos end)137 static ir_live_interval *ir_add_live_range(ir_ctx *ctx, int v, ir_live_pos start, ir_live_pos end)
138 {
139 ir_live_interval *ival = ctx->live_intervals[v];
140 ir_live_range *p, *q;
141
142 if (!ival) {
143 return ir_new_live_range(ctx, v, start, end);
144 }
145
146 p = &ival->range;
147 if (end >= p->start) {
148 ir_live_range *prev = NULL;
149
150 do {
151 if (p->end >= start) {
152 if (start < p->start) {
153 p->start = start;
154 }
155 if (end > p->end) {
156 /* merge with next */
157 ir_live_range *next = p->next;
158
159 p->end = end;
160 while (next && p->end >= next->start) {
161 if (next->end > p->end) {
162 p->end = next->end;
163 }
164 p->next = next->next;
165 /* remember in the "unused_ranges" list */
166 next->next = ctx->unused_ranges;
167 ctx->unused_ranges = next;
168 next = p->next;
169 }
170 if (!p->next) {
171 ival->end = p->end;
172 }
173 }
174 return ival;
175 }
176 prev = p;
177 p = prev->next;
178 } while (p && end >= p->start);
179 if (!p) {
180 ival->end = end;
181 }
182 if (prev) {
183 if (ctx->unused_ranges) {
184 /* reuse */
185 q = ctx->unused_ranges;
186 ctx->unused_ranges = q->next;
187 } else {
188 q = ir_arena_alloc(&ctx->arena, sizeof(ir_live_range));
189 }
190 prev->next = q;
191 q->start = start;
192 q->end = end;
193 q->next = p;
194 return ival;
195 }
196 }
197
198 if (ctx->unused_ranges) {
199 /* reuse */
200 q = ctx->unused_ranges;
201 ctx->unused_ranges = q->next;
202 } else {
203 q = ir_arena_alloc(&ctx->arena, sizeof(ir_live_range));
204 }
205 q->start = p->start;
206 q->end = p->end;
207 q->next = p->next;
208 p->start = start;
209 p->end = end;
210 p->next = q;
211 return ival;
212 }
213
ir_add_prev_live_range(ir_ctx * ctx,int v,ir_live_pos start,ir_live_pos end)214 IR_ALWAYS_INLINE ir_live_interval *ir_add_prev_live_range(ir_ctx *ctx, int v, ir_live_pos start, ir_live_pos end)
215 {
216 ir_live_interval *ival = ctx->live_intervals[v];
217
218 if (ival && ival->range.start == end) {
219 ival->range.start = start;
220 return ival;
221 }
222 return ir_add_live_range(ctx, v, start, end);
223 }
224
ir_add_fixed_live_range(ir_ctx * ctx,ir_reg reg,ir_live_pos start,ir_live_pos end)225 static void ir_add_fixed_live_range(ir_ctx *ctx, ir_reg reg, ir_live_pos start, ir_live_pos end)
226 {
227 int v = ctx->vregs_count + 1 + reg;
228 ir_live_interval *ival = ctx->live_intervals[v];
229 ir_live_range *q;
230
231 if (!ival) {
232 ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval));
233 ival->type = IR_VOID;
234 ival->reg = reg;
235 ival->flags = IR_LIVE_INTERVAL_FIXED;
236 ival->vreg = v;
237 ival->stack_spill_pos = -1; // not allocated
238 ival->range.start = start;
239 ival->range.end = ival->end = end;
240 ival->range.next = NULL;
241 ival->use_pos = NULL;
242 ival->next = NULL;
243
244 ctx->live_intervals[v] = ival;
245 } else if (EXPECTED(end < ival->range.start)) {
246 if (ctx->unused_ranges) {
247 /* reuse */
248 q = ctx->unused_ranges;
249 ctx->unused_ranges = q->next;
250 } else {
251 q = ir_arena_alloc(&ctx->arena, sizeof(ir_live_range));
252 }
253
254 q->start = ival->range.start;
255 q->end = ival->range.end;
256 q->next = ival->range.next;
257 ival->range.start = start;
258 ival->range.end = end;
259 ival->range.next = q;
260 } else if (end == ival->range.start) {
261 ival->range.start = start;
262 } else {
263 ir_add_live_range(ctx, v, start, end);
264 }
265 }
266
ir_add_tmp(ir_ctx * ctx,ir_ref ref,ir_ref tmp_ref,int32_t tmp_op_num,ir_tmp_reg tmp_reg)267 static void ir_add_tmp(ir_ctx *ctx, ir_ref ref, ir_ref tmp_ref, int32_t tmp_op_num, ir_tmp_reg tmp_reg)
268 {
269 ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval));
270
271 ival->type = tmp_reg.type;
272 ival->reg = IR_REG_NONE;
273 ival->flags = IR_LIVE_INTERVAL_TEMP;
274 ival->tmp_ref = tmp_ref;
275 ival->tmp_op_num = tmp_op_num;
276 ival->range.start = IR_START_LIVE_POS_FROM_REF(ref) + tmp_reg.start;
277 ival->range.end = ival->end = IR_START_LIVE_POS_FROM_REF(ref) + tmp_reg.end;
278 ival->range.next = NULL;
279 ival->use_pos = NULL;
280
281 if (!ctx->live_intervals[0]) {
282 ival->next = NULL;
283 ctx->live_intervals[0] = ival;
284 } else if (ival->range.start >= ctx->live_intervals[0]->range.start) {
285 ir_live_interval *prev = ctx->live_intervals[0];
286
287 while (prev->next && ival->range.start >= prev->next->range.start) {
288 prev = prev->next;
289 }
290 ival->next = prev->next;
291 prev->next = ival;
292 } else {
293 ir_live_interval *next = ctx->live_intervals[0];
294
295 ival->next = next;
296 ctx->live_intervals[0] = ival;
297 }
298 return;
299 }
300
ir_has_tmp(ir_ctx * ctx,ir_ref ref,int32_t op_num)301 static bool ir_has_tmp(ir_ctx *ctx, ir_ref ref, int32_t op_num)
302 {
303 ir_live_interval *ival = ctx->live_intervals[0];
304
305 if (ival) {
306 while (ival && IR_LIVE_POS_TO_REF(ival->range.start) <= ref) {
307 if (ival->tmp_ref == ref && ival->tmp_op_num == op_num) {
308 return 1;
309 }
310 ival = ival->next;
311 }
312 }
313 return 0;
314 }
315
ir_fix_live_range(ir_ctx * ctx,int v,ir_live_pos old_start,ir_live_pos new_start)316 static ir_live_interval *ir_fix_live_range(ir_ctx *ctx, int v, ir_live_pos old_start, ir_live_pos new_start)
317 {
318 ir_live_interval *ival = ctx->live_intervals[v];
319 ir_live_range *p = &ival->range;
320
321 #if 0
322 while (p && p->start < old_start) {
323 p = p->next;
324 }
325 #endif
326 IR_ASSERT(ival && p->start == old_start);
327 p->start = new_start;
328 return ival;
329 }
330
ir_add_use_pos(ir_ctx * ctx,ir_live_interval * ival,ir_use_pos * use_pos)331 static void ir_add_use_pos(ir_ctx *ctx, ir_live_interval *ival, ir_use_pos *use_pos)
332 {
333 ir_use_pos *p = ival->use_pos;
334
335 if (EXPECTED(!p || p->pos > use_pos->pos)) {
336 use_pos->next = p;
337 ival->use_pos = use_pos;
338 } else {
339 ir_use_pos *prev;
340
341 do {
342 prev = p;
343 p = p->next;
344 } while (p && p->pos < use_pos->pos);
345
346 use_pos->next = prev->next;
347 prev->next = use_pos;
348 }
349 }
350
351
ir_add_use(ir_ctx * ctx,ir_live_interval * ival,int op_num,ir_live_pos pos,ir_reg hint,uint8_t use_flags,ir_ref hint_ref)352 IR_ALWAYS_INLINE void ir_add_use(ir_ctx *ctx, ir_live_interval *ival, int op_num, ir_live_pos pos, ir_reg hint, uint8_t use_flags, ir_ref hint_ref)
353 {
354 ir_use_pos *use_pos;
355
356 use_pos = ir_arena_alloc(&ctx->arena, sizeof(ir_use_pos));
357 use_pos->op_num = op_num;
358 use_pos->hint = hint;
359 use_pos->flags = use_flags;
360 use_pos->hint_ref = hint_ref;
361 use_pos->pos = pos;
362
363 if (hint != IR_REG_NONE) {
364 ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REGS;
365 }
366 if (hint_ref > 0) {
367 ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REFS;
368 }
369
370 ir_add_use_pos(ctx, ival, use_pos);
371 }
372
ir_add_phi_use(ir_ctx * ctx,ir_live_interval * ival,int op_num,ir_live_pos pos,ir_ref phi_ref)373 static void ir_add_phi_use(ir_ctx *ctx, ir_live_interval *ival, int op_num, ir_live_pos pos, ir_ref phi_ref)
374 {
375 ir_use_pos *use_pos;
376
377 IR_ASSERT(phi_ref > 0);
378 use_pos = ir_arena_alloc(&ctx->arena, sizeof(ir_use_pos));
379 use_pos->op_num = op_num;
380 use_pos->hint = IR_REG_NONE;
381 use_pos->flags = IR_PHI_USE | IR_USE_SHOULD_BE_IN_REG; // TODO: ???
382 use_pos->hint_ref = -phi_ref;
383 use_pos->pos = pos;
384
385 ir_add_use_pos(ctx, ival, use_pos);
386 }
387
ir_add_hint(ir_ctx * ctx,ir_ref ref,ir_live_pos pos,ir_reg hint)388 static void ir_add_hint(ir_ctx *ctx, ir_ref ref, ir_live_pos pos, ir_reg hint)
389 {
390 ir_live_interval *ival = ctx->live_intervals[ctx->vregs[ref]];
391
392 if (!(ival->flags & IR_LIVE_INTERVAL_HAS_HINT_REGS)) {
393 ir_use_pos *use_pos = ival->use_pos;
394
395 while (use_pos) {
396 if (use_pos->pos == pos) {
397 if (use_pos->hint == IR_REG_NONE) {
398 use_pos->hint = hint;
399 ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REGS;
400 }
401 }
402 use_pos = use_pos->next;
403 }
404 }
405 }
406
ir_hint_propagation(ir_ctx * ctx)407 static void ir_hint_propagation(ir_ctx *ctx)
408 {
409 int i;
410 ir_live_interval *ival;
411 ir_use_pos *use_pos;
412 ir_use_pos *hint_use_pos;
413
414 for (i = ctx->vregs_count; i > 0; i--) {
415 ival = ctx->live_intervals[i];
416 if (ival
417 && (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)) == (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)) {
418 use_pos = ival->use_pos;
419 hint_use_pos = NULL;
420 while (use_pos) {
421 if (use_pos->op_num == 0) {
422 if (use_pos->hint_ref > 0) {
423 hint_use_pos = use_pos;
424 }
425 } else if (use_pos->hint != IR_REG_NONE) {
426 if (hint_use_pos) {
427 ir_add_hint(ctx, hint_use_pos->hint_ref, hint_use_pos->pos, use_pos->hint);
428 hint_use_pos = NULL;
429 }
430 }
431 use_pos = use_pos->next;
432 }
433 }
434 }
435 }
436
437 #ifdef IR_BITSET_LIVENESS
438 /* DFS + Loop-Forest livness for SSA using bitset(s) */
ir_add_osr_entry_loads(ir_ctx * ctx,ir_block * bb,ir_bitset live,uint32_t len,uint32_t b)439 static void ir_add_osr_entry_loads(ir_ctx *ctx, ir_block *bb, ir_bitset live, uint32_t len, uint32_t b)
440 {
441 bool ok = 1;
442 int count = 0;
443 ir_list *list = (ir_list*)ctx->osr_entry_loads;
444 ir_ref i;
445
446 IR_BITSET_FOREACH(live, len, i) {
447 /* Skip live references from ENTRY to PARAM. TODO: duplicate PARAM in each ENTRY ??? */
448 ir_use_pos *use_pos = ctx->live_intervals[i]->use_pos;
449 ir_ref ref = (use_pos->hint_ref < 0) ? -use_pos->hint_ref : IR_LIVE_POS_TO_REF(use_pos->pos);
450
451 if (use_pos->op_num) {
452 ir_ref *ops = ctx->ir_base[ref].ops;
453 ref = ops[use_pos->op_num];
454 }
455
456 if (ctx->ir_base[ref].op == IR_PARAM) {
457 continue;
458 }
459 if (ctx->binding) {
460 ir_ref var = ir_binding_find(ctx, ref);
461 if (var < 0) {
462 /* We may load the value at OSR entry-point */
463 if (!count) {
464 bb->flags &= ~IR_BB_EMPTY;
465 bb->flags |= IR_BB_OSR_ENTRY_LOADS;
466 if (!ctx->osr_entry_loads) {
467 list = ctx->osr_entry_loads = ir_mem_malloc(sizeof(ir_list));
468 ir_list_init(list, 16);
469 }
470 ir_list_push(list, b);
471 ir_list_push(list, 0);
472 }
473 ir_list_push(list, ref);
474 count++;
475 continue;
476 }
477 }
478 fprintf(stderr, "ENTRY %d (block %d start %d) - live var %d\n", ctx->ir_base[bb->start].op2, b, bb->start, ref);
479 ok = 0;
480 } IR_BITSET_FOREACH_END();
481
482 if (!ok) {
483 IR_ASSERT(0);
484 }
485 if (count) {
486 ir_list_set(list, ir_list_len(ctx->osr_entry_loads) - (count + 1), count);
487
488 #if 0
489 /* ENTRY "clobbers" all registers */
490 ir_ref ref = ctx->ir_base[bb->start].op1;
491 ir_add_fixed_live_range(ctx, IR_REG_ALL,
492 IR_DEF_LIVE_POS_FROM_REF(ref),
493 IR_SAVE_LIVE_POS_FROM_REF(ref));
494 #endif
495 }
496 }
497
ir_add_fusion_ranges(ir_ctx * ctx,ir_ref ref,ir_ref input,ir_block * bb,ir_bitset live)498 static void ir_add_fusion_ranges(ir_ctx *ctx, ir_ref ref, ir_ref input, ir_block *bb, ir_bitset live)
499 {
500 ir_ref stack[4];
501 int stack_pos = 0;
502 ir_target_constraints constraints;
503 ir_insn *insn;
504 uint32_t j, n, flags, def_flags;
505 ir_ref *p, child;
506 uint8_t use_flags;
507 ir_reg reg;
508 ir_live_pos use_pos;
509 ir_live_interval *ival;
510
511 while (1) {
512 IR_ASSERT(input > 0 && ctx->rules[input] & IR_FUSED);
513
514 if (!(ctx->rules[input] & IR_SIMPLE)) {
515 def_flags = ir_get_target_constraints(ctx, input, &constraints);
516 n = constraints.tmps_count;
517 while (n > 0) {
518 n--;
519 if (constraints.tmp_regs[n].type) {
520 ir_add_tmp(ctx, ref, input, constraints.tmp_regs[n].num, constraints.tmp_regs[n]);
521 } else {
522 /* CPU specific constraints */
523 ir_add_fixed_live_range(ctx, constraints.tmp_regs[n].reg,
524 IR_START_LIVE_POS_FROM_REF(ref) + constraints.tmp_regs[n].start,
525 IR_START_LIVE_POS_FROM_REF(ref) + constraints.tmp_regs[n].end);
526 }
527 }
528 } else {
529 def_flags = IR_OP1_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG;
530 constraints.hints_count = 0;
531 }
532
533 insn = &ctx->ir_base[input];
534 flags = ir_op_flags[insn->op];
535 n = IR_INPUT_EDGES_COUNT(flags);
536 j = 1;
537 p = insn->ops + j;
538 if (flags & IR_OP_FLAG_CONTROL) {
539 j++;
540 p++;
541 }
542 for (; j <= n; j++, p++) {
543 IR_ASSERT(IR_OPND_KIND(flags, j) == IR_OPND_DATA);
544 child = *p;
545 if (child > 0) {
546 uint32_t v = ctx->vregs[child];
547
548 if (v) {
549 use_flags = IR_FUSED_USE | IR_USE_FLAGS(def_flags, j);
550 reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE;
551 use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
552 if (EXPECTED(reg == IR_REG_NONE)) {
553 use_pos += IR_USE_SUB_REF;
554 }
555
556 if (!ir_bitset_in(live, v)) {
557 /* live.add(opd) */
558 ir_bitset_incl(live, v);
559 /* intervals[opd].addRange(b.from, op.id) */
560 ival = ir_add_live_range(ctx, v,
561 IR_START_LIVE_POS_FROM_REF(bb->start), use_pos);
562 } else {
563 ival = ctx->live_intervals[v];
564 }
565 ir_add_use(ctx, ival, j, use_pos, reg, use_flags, -input);
566 } else if (ctx->rules[child] & IR_FUSED) {
567 IR_ASSERT(stack_pos < (int)(sizeof(stack)/sizeof(stack_pos)));
568 stack[stack_pos++] = child;
569 } else if (ctx->rules[child] == (IR_SKIPPED|IR_RLOAD)) {
570 ir_set_alocated_reg(ctx, input, j, ctx->ir_base[child].op2);
571 }
572 }
573 }
574 if (!stack_pos) {
575 break;
576 }
577 input = stack[--stack_pos];
578 }
579 }
580
ir_compute_live_ranges(ir_ctx * ctx)581 int ir_compute_live_ranges(ir_ctx *ctx)
582 {
583 uint32_t b, i, j, k, n, succ, *p;
584 ir_ref ref;
585 uint32_t len;
586 ir_insn *insn;
587 ir_block *bb, *succ_bb;
588 #ifdef IR_DEBUG
589 ir_bitset visited;
590 #endif
591 ir_bitset live, bb_live;
592 ir_bitset loops = NULL;
593 ir_bitqueue queue;
594 ir_live_interval *ival;
595
596 if (!(ctx->flags2 & IR_LINEAR) || !ctx->vregs) {
597 return 0;
598 }
599
600 if (ctx->rules) {
601 ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count);
602 memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count);
603 }
604
605 /* Root of the list of IR_VARs */
606 ctx->vars = IR_UNUSED;
607
608 /* Compute Live Ranges */
609 ctx->flags2 &= ~IR_LR_HAVE_DESSA_MOVES;
610 len = ir_bitset_len(ctx->vregs_count + 1);
611 bb_live = ir_mem_malloc((ctx->cfg_blocks_count + 1) * len * sizeof(ir_bitset_base_t));
612
613 /* vregs + tmp + fixed + SRATCH + ALL */
614 ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 2, sizeof(ir_live_interval*));
615
616 #ifdef IR_DEBUG
617 visited = ir_bitset_malloc(ctx->cfg_blocks_count + 1);
618 #endif
619
620 if (!ctx->arena) {
621 ctx->arena = ir_arena_create(16 * 1024);
622 }
623
624 /* for each basic block in reverse order */
625 for (b = ctx->cfg_blocks_count; b > 0; b--) {
626 bb = &ctx->cfg_blocks[b];
627 IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
628 /* for each successor of b */
629
630 #ifdef IR_DEBUG
631 ir_bitset_incl(visited, b);
632 #endif
633 live = bb_live + (len * b);
634 n = bb->successors_count;
635 if (n == 0) {
636 ir_bitset_clear(live, len);
637 } else {
638 p = &ctx->cfg_edges[bb->successors];
639 succ = *p;
640
641 #ifdef IR_DEBUG
642 /* blocks must be ordered where all dominators of a block are before this block */
643 IR_ASSERT(ir_bitset_in(visited, succ) || bb->loop_header == succ);
644 #endif
645
646 /* live = union of successors.liveIn */
647 if (EXPECTED(succ > b) && EXPECTED(!(ctx->cfg_blocks[succ].flags & IR_BB_ENTRY))) {
648 ir_bitset_copy(live, bb_live + (len * succ), len);
649 } else {
650 IR_ASSERT(succ > b || (ctx->cfg_blocks[succ].flags & IR_BB_LOOP_HEADER));
651 ir_bitset_clear(live, len);
652 }
653 if (n > 1) {
654 for (p++, n--; n > 0; p++, n--) {
655 succ = *p;
656 if (EXPECTED(succ > b) && EXPECTED(!(ctx->cfg_blocks[succ].flags & IR_BB_ENTRY))) {
657 ir_bitset_union(live, bb_live + (len * succ), len);
658 } else {
659 IR_ASSERT(succ > b || (ctx->cfg_blocks[succ].flags & IR_BB_LOOP_HEADER));
660 }
661 }
662 }
663
664 /* for each opd in live */
665 IR_BITSET_FOREACH(live, len, i) {
666 /* intervals[opd].addRange(b.from, b.to) */
667 ir_add_prev_live_range(ctx, i,
668 IR_START_LIVE_POS_FROM_REF(bb->start),
669 IR_END_LIVE_POS_FROM_REF(bb->end));
670 } IR_BITSET_FOREACH_END();
671 }
672
673 if (bb->successors_count == 1) {
674 /* for each phi function phi of successor */
675 succ = ctx->cfg_edges[bb->successors];
676 succ_bb = &ctx->cfg_blocks[succ];
677 if (succ_bb->flags & IR_BB_HAS_PHI) {
678 ir_use_list *use_list = &ctx->use_lists[succ_bb->start];
679
680 k = ir_phi_input_number(ctx, succ_bb, b);
681 IR_ASSERT(k != 0);
682 for (ref = 0; ref < use_list->count; ref++) {
683 ir_ref use = ctx->use_edges[use_list->refs + ref];
684 insn = &ctx->ir_base[use];
685 if (insn->op == IR_PHI) {
686 ir_ref input = ir_insn_op(insn, k);
687 if (input > 0) {
688 uint32_t v = ctx->vregs[input];
689
690 /* live.add(phi.inputOf(b)) */
691 IR_ASSERT(v);
692 ir_bitset_incl(live, v);
693 /* intervals[phi.inputOf(b)].addRange(b.from, b.to) */
694 ival = ir_add_prev_live_range(ctx, v,
695 IR_START_LIVE_POS_FROM_REF(bb->start),
696 IR_END_LIVE_POS_FROM_REF(bb->end));
697 ir_add_phi_use(ctx, ival, k, IR_DEF_LIVE_POS_FROM_REF(bb->end), use);
698 }
699 }
700 }
701 }
702 }
703
704 /* for each operation op of b in reverse order */
705 ref = bb->end;
706 insn = &ctx->ir_base[ref];
707 if (insn->op == IR_END || insn->op == IR_LOOP_END) {
708 ref = ctx->prev_ref[ref];
709 }
710 for (; ref > bb->start; ref = ctx->prev_ref[ref]) {
711 uint32_t def_flags;
712 uint32_t flags;
713 ir_ref *p;
714 ir_target_constraints constraints;
715 uint32_t v;
716
717 if (ctx->rules) {
718 int n;
719
720 if (ctx->rules[ref] & (IR_FUSED|IR_SKIPPED)) {
721 if (ctx->rules[ref] == (IR_SKIPPED|IR_VAR) && ctx->use_lists[ref].count > 0) {
722 insn = &ctx->ir_base[ref];
723 insn->op3 = ctx->vars;
724 ctx->vars = ref;
725 }
726 continue;
727 }
728
729 def_flags = ir_get_target_constraints(ctx, ref, &constraints);
730 n = constraints.tmps_count;
731 while (n > 0) {
732 n--;
733 if (constraints.tmp_regs[n].type) {
734 ir_add_tmp(ctx, ref, ref, constraints.tmp_regs[n].num, constraints.tmp_regs[n]);
735 } else {
736 /* CPU specific constraints */
737 ir_add_fixed_live_range(ctx, constraints.tmp_regs[n].reg,
738 IR_START_LIVE_POS_FROM_REF(ref) + constraints.tmp_regs[n].start,
739 IR_START_LIVE_POS_FROM_REF(ref) + constraints.tmp_regs[n].end);
740 }
741 }
742 } else {
743 def_flags = 0;
744 constraints.def_reg = IR_REG_NONE;
745 constraints.hints_count = 0;
746 }
747
748 insn = &ctx->ir_base[ref];
749 v = ctx->vregs[ref];
750 if (v) {
751 IR_ASSERT(ir_bitset_in(live, v));
752
753 if (insn->op != IR_PHI) {
754 ir_live_pos def_pos;
755 ir_ref hint_ref = 0;
756 ir_reg reg = constraints.def_reg;
757
758 if (reg != IR_REG_NONE) {
759 def_pos = IR_SAVE_LIVE_POS_FROM_REF(ref);
760 if (insn->op == IR_PARAM || insn->op == IR_RLOAD) {
761 /* parameter register must be kept before it's copied */
762 ir_add_fixed_live_range(ctx, reg, IR_START_LIVE_POS_FROM_REF(bb->start), def_pos);
763 }
764 } else if (def_flags & IR_DEF_REUSES_OP1_REG) {
765 if (!IR_IS_CONST_REF(insn->op1) && ctx->vregs[insn->op1]) {
766 hint_ref = insn->op1;
767 }
768 def_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
769 } else if (def_flags & IR_DEF_CONFLICTS_WITH_INPUT_REGS) {
770 def_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
771 } else {
772 if (insn->op == IR_PARAM) {
773 /* We may reuse parameter stack slot for spilling */
774 ctx->live_intervals[v]->flags |= IR_LIVE_INTERVAL_MEM_PARAM;
775 } else if (insn->op == IR_VLOAD) {
776 /* Load may be fused into the usage instruction */
777 ctx->live_intervals[v]->flags |= IR_LIVE_INTERVAL_MEM_LOAD;
778 }
779 def_pos = IR_DEF_LIVE_POS_FROM_REF(ref);
780 }
781 /* live.remove(opd) */
782 ir_bitset_excl(live, v);
783 /* intervals[opd].setFrom(op.id) */
784 ival = ir_fix_live_range(ctx, v,
785 IR_START_LIVE_POS_FROM_REF(bb->start), def_pos);
786 ival->type = insn->type;
787 ir_add_use(ctx, ival, 0, def_pos, reg, def_flags, hint_ref);
788 } else {
789 /* live.remove(opd) */
790 ir_bitset_excl(live, v);
791 /* PHIs inputs must not be processed */
792 ival = ctx->live_intervals[v];
793 if (UNEXPECTED(!ival)) {
794 /* Dead PHI */
795 ival = ir_add_live_range(ctx, v, IR_DEF_LIVE_POS_FROM_REF(ref), IR_USE_LIVE_POS_FROM_REF(ref));
796 }
797 ival->type = insn->type;
798 ir_add_use(ctx, ival, 0, IR_DEF_LIVE_POS_FROM_REF(ref), IR_REG_NONE, IR_USE_SHOULD_BE_IN_REG, 0);
799 continue;
800 }
801 }
802
803 IR_ASSERT(insn->op != IR_PHI && (!ctx->rules || !(ctx->rules[ref] & (IR_FUSED|IR_SKIPPED))));
804 flags = ir_op_flags[insn->op];
805 j = 1;
806 p = insn->ops + 1;
807 if (flags & (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_PINNED)) {
808 j++;
809 p++;
810 }
811 for (; j <= insn->inputs_count; j++, p++) {
812 ir_ref input = *p;
813 ir_reg reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE;
814 ir_live_pos use_pos;
815 ir_ref hint_ref = 0;
816 uint32_t v;
817
818 if (input > 0) {
819 v = ctx->vregs[input];
820 if (v) {
821 use_pos = IR_USE_LIVE_POS_FROM_REF(ref);
822 if (reg != IR_REG_NONE) {
823 use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
824 ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF);
825 } else if (def_flags & IR_DEF_REUSES_OP1_REG) {
826 if (j == 1) {
827 use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
828 IR_ASSERT(ctx->vregs[ref]);
829 hint_ref = ref;
830 } else if (input == insn->op1) {
831 /* Input is the same as "op1" */
832 use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
833 }
834 }
835 if (!ir_bitset_in(live, v)) {
836 /* live.add(opd) */
837 ir_bitset_incl(live, v);
838 /* intervals[opd].addRange(b.from, op.id) */
839 ival = ir_add_live_range(ctx, v, IR_START_LIVE_POS_FROM_REF(bb->start), use_pos);
840 } else {
841 ival = ctx->live_intervals[v];
842 }
843 ir_add_use(ctx, ival, j, use_pos, reg, IR_USE_FLAGS(def_flags, j), hint_ref);
844 } else if (ctx->rules) {
845 if (ctx->rules[input] & IR_FUSED) {
846 ir_add_fusion_ranges(ctx, ref, input, bb, live);
847 } else if (ctx->rules[input] == (IR_SKIPPED|IR_RLOAD)) {
848 ir_set_alocated_reg(ctx, ref, j, ctx->ir_base[input].op2);
849 }
850 }
851 } else if (reg != IR_REG_NONE) {
852 use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
853 ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF);
854 }
855 }
856 }
857
858 /* if b is loop header */
859 if ((bb->flags & IR_BB_LOOP_HEADER)
860 && !ir_bitset_empty(live, len)) {
861 /* variables live at loop header are alive at the whole loop body */
862 uint32_t bb_set_len = ir_bitset_len(ctx->cfg_blocks_count + 1);
863 uint32_t child;
864 ir_block *child_bb;
865 ir_bitset child_live_in;
866
867 if (!loops) {
868 loops = ir_bitset_malloc(ctx->cfg_blocks_count + 1);
869 ir_bitqueue_init(&queue, ctx->cfg_blocks_count + 1);
870 } else {
871 ir_bitset_clear(loops, bb_set_len);
872 ir_bitqueue_clear(&queue);
873 }
874 ir_bitset_incl(loops, b);
875 child = b;
876 do {
877 child_bb = &ctx->cfg_blocks[child];
878 child_live_in = bb_live + (len * child);
879
880 IR_BITSET_FOREACH(live, len, i) {
881 ir_bitset_incl(child_live_in, i);
882 ir_add_live_range(ctx, i,
883 IR_START_LIVE_POS_FROM_REF(child_bb->start),
884 IR_END_LIVE_POS_FROM_REF(child_bb->end));
885 } IR_BITSET_FOREACH_END();
886
887 child = child_bb->dom_child;
888 while (child) {
889 child_bb = &ctx->cfg_blocks[child];
890 if (child_bb->loop_header && ir_bitset_in(loops, child_bb->loop_header)) {
891 ir_bitqueue_add(&queue, child);
892 if (child_bb->flags & IR_BB_LOOP_HEADER) {
893 ir_bitset_incl(loops, child);
894 }
895 }
896 child = child_bb->dom_next_child;
897 }
898 } while ((child = ir_bitqueue_pop(&queue)) != (uint32_t)-1);
899 }
900 }
901
902 if (ctx->entries) {
903 for (i = 0; i < ctx->entries_count; i++) {
904 b = ctx->entries[i];
905 bb = &ctx->cfg_blocks[b];
906 live = bb_live + (len * b);
907 ir_add_osr_entry_loads(ctx, bb, live, len, b);
908 }
909 if (ctx->osr_entry_loads) {
910 ir_list_push((ir_list*)ctx->osr_entry_loads, 0);
911 }
912 }
913
914 if (loops) {
915 ir_mem_free(loops);
916 ir_bitqueue_free(&queue);
917 }
918
919 ir_mem_free(bb_live);
920 #ifdef IR_DEBUG
921 ir_mem_free(visited);
922 #endif
923
924 return 1;
925 }
926
927 #else
928 /* Path exploration by definition liveness for SSA using sets represented by linked lists */
929
930 #define IS_LIVE_IN_BLOCK(v, b) \
931 (live_in_block[v] == b)
932 #define SET_LIVE_IN_BLOCK(v, b) do { \
933 live_in_block[v] = b; \
934 } while (0)
935
936 /* Returns the last virtual register alive at the end of the block (it is used as an already-visited marker) */
ir_live_out_top(ir_ctx * ctx,uint32_t * live_outs,ir_list * live_lists,uint32_t b)937 IR_ALWAYS_INLINE uint32_t ir_live_out_top(ir_ctx *ctx, uint32_t *live_outs, ir_list *live_lists, uint32_t b)
938 {
939 #if 0
940 return live_outs[b];
941 #else
942 if (!live_outs[b]) {
943 return -1;
944 }
945 return ir_list_at(live_lists, live_outs[b]);
946 #endif
947 }
948
949 /* Remember a virtual register alive at the end of the block */
ir_live_out_push(ir_ctx * ctx,uint32_t * live_outs,ir_list * live_lists,uint32_t b,uint32_t v)950 IR_ALWAYS_INLINE void ir_live_out_push(ir_ctx *ctx, uint32_t *live_outs, ir_list *live_lists, uint32_t b, uint32_t v)
951 {
952 #if 0
953 ir_block *bb = &ctx->cfg_blocks[b];
954 live_outs[b] = v;
955 ir_add_prev_live_range(ctx, v,
956 IR_START_LIVE_POS_FROM_REF(bb->start),
957 IR_END_LIVE_POS_FROM_REF(bb->end));
958 #else
959 if (live_lists->len >= live_lists->a.size) {
960 ir_array_grow(&live_lists->a, live_lists->a.size + 1024);
961 }
962 /* Form a linked list of virtual register live at the end of the block */
963 ir_list_push_unchecked(live_lists, live_outs[b]); /* push old root of the list (previous element of the list) */
964 live_outs[b] = ir_list_len(live_lists); /* remember the new root */
965 ir_list_push_unchecked(live_lists, v); /* push a virtual register */
966 #endif
967 }
968
969 /*
970 * Computes live-out sets for each basic-block per variable using def-use chains.
971 *
972 * The implementation is based on algorithms 6 and 7 desriebed in
973 * "Computing Liveness Sets for SSA-Form Programs", Florian Brandner, Benoit Boissinot.
974 * Alain Darte, Benoit Dupont de Dinechin, Fabrice Rastello. TR Inria RR-7503, 2011
975 */
ir_compute_live_sets(ir_ctx * ctx,uint32_t * live_outs,ir_list * live_lists)976 static void ir_compute_live_sets(ir_ctx *ctx, uint32_t *live_outs, ir_list *live_lists)
977 {
978 ir_list block_queue, fuse_queue;
979 ir_ref i;
980
981 ir_list_init(&fuse_queue, 16);
982 ir_list_init(&block_queue, 256);
983
984 /* For each virtual register explore paths from all uses to definition */
985 for (i = ctx->insns_count - 1; i > 0; i--) {
986 uint32_t v = ctx->vregs[i];
987
988 if (v) {
989 uint32_t def_block = ctx->cfg_map[i];
990 ir_use_list *use_list = &ctx->use_lists[i];
991 ir_ref *p, n = use_list->count;
992
993 /* Collect all blocks where 'v' is used into a 'block_queue' */
994 for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
995 ir_ref use = *p;
996 ir_insn *insn = &ctx->ir_base[use];
997
998 if (UNEXPECTED(insn->op == IR_PHI)) {
999 ir_ref n = insn->inputs_count - 1;
1000 ir_ref *p = insn->ops + 2; /* PHI data inputs */
1001 ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */
1002
1003 for (;n > 0; p++, q++, n--) {
1004 if (*p == i) {
1005 uint32_t pred_block = ctx->cfg_map[*q];
1006
1007 if (ir_live_out_top(ctx, live_outs, live_lists, pred_block) != v) {
1008 ir_live_out_push(ctx, live_outs, live_lists, pred_block, v);
1009 if (pred_block != def_block) {
1010 ir_list_push(&block_queue, pred_block);
1011 }
1012 }
1013 }
1014 }
1015 } else if (ctx->rules && UNEXPECTED(ctx->rules[use] & IR_FUSED)) {
1016 while (1) {
1017 ir_use_list *use_list = &ctx->use_lists[use];
1018 ir_ref *p, n = use_list->count;
1019
1020 for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
1021 ir_ref use = *p;
1022
1023 if (ctx->rules[use] & IR_FUSED) {
1024 ir_list_push(&fuse_queue, use);
1025 } else {
1026 uint32_t use_block = ctx->cfg_map[use];
1027
1028 if (def_block != use_block && ir_live_out_top(ctx, live_outs, live_lists, use_block) != v) {
1029 ir_list_push(&block_queue, use_block);
1030 }
1031 }
1032 }
1033 if (!ir_list_len(&fuse_queue)) {
1034 break;
1035 }
1036 use = ir_list_pop(&fuse_queue);
1037 }
1038 } else {
1039 uint32_t use_block = ctx->cfg_map[use];
1040
1041 /* Check if the virtual register is alive at the start of 'use_block' */
1042 if (def_block != use_block && ir_live_out_top(ctx, live_outs, live_lists, use_block) != v) {
1043 ir_list_push(&block_queue, use_block);
1044 }
1045 }
1046 }
1047
1048 /* UP_AND_MARK: Traverse through predecessor blocks until we reach the block where 'v' is defined*/
1049 while (ir_list_len(&block_queue)) {
1050 uint32_t b = ir_list_pop(&block_queue);
1051 ir_block *bb = &ctx->cfg_blocks[b];
1052 uint32_t *p, n = bb->predecessors_count;
1053
1054 if (bb->flags & IR_BB_ENTRY) {
1055 /* live_in_push(ENTRY, v) */
1056 ir_insn *insn = &ctx->ir_base[bb->start];
1057
1058 IR_ASSERT(insn->op == IR_ENTRY);
1059 IR_ASSERT(insn->op3 >= 0 && insn->op3 < (ir_ref)ctx->entries_count);
1060 if (live_lists->len >= live_lists->a.size) {
1061 ir_array_grow(&live_lists->a, live_lists->a.size + 1024);
1062 }
1063 ir_list_push_unchecked(live_lists, live_outs[ctx->cfg_blocks_count + 1 + insn->op3]);
1064 ir_list_push_unchecked(live_lists, v);
1065 live_outs[ctx->cfg_blocks_count + 1 + insn->op3] = ir_list_len(live_lists) - 1;
1066 continue;
1067 }
1068 for (p = &ctx->cfg_edges[bb->predecessors]; n > 0; p++, n--) {
1069 uint32_t pred_block = *p;
1070
1071 /* Check if 'pred_block' wasn't traversed before */
1072 if (ir_live_out_top(ctx, live_outs, live_lists, pred_block) != v) {
1073 /* Mark a virtual register 'v' alive at the end of 'pred_block' */
1074 ir_live_out_push(ctx, live_outs, live_lists, pred_block, v);
1075 if (pred_block != def_block) {
1076 ir_list_push(&block_queue, pred_block);
1077 }
1078 }
1079 }
1080 }
1081 }
1082 }
1083
1084 ir_list_free(&block_queue);
1085 ir_list_free(&fuse_queue);
1086 }
1087
ir_add_osr_entry_loads(ir_ctx * ctx,ir_block * bb,uint32_t pos,ir_list * live_lists,uint32_t b)1088 static void ir_add_osr_entry_loads(ir_ctx *ctx, ir_block *bb, uint32_t pos, ir_list *live_lists, uint32_t b)
1089 {
1090 bool ok = 1;
1091 int count = 0;
1092 ir_list *list = (ir_list*)ctx->osr_entry_loads;
1093 ir_ref i;
1094
1095 while (pos) {
1096 i = ir_list_at(live_lists, pos);
1097 pos = ir_list_at(live_lists, pos - 1);
1098
1099 /* Skip live references from ENTRY to PARAM. TODO: duplicate PARAM in each ENTRY ??? */
1100 ir_use_pos *use_pos = ctx->live_intervals[i]->use_pos;
1101 ir_ref ref = (use_pos->hint_ref < 0) ? -use_pos->hint_ref : IR_LIVE_POS_TO_REF(use_pos->pos);
1102
1103 if (use_pos->op_num) {
1104 ir_ref *ops = ctx->ir_base[ref].ops;
1105 ref = ops[use_pos->op_num];
1106 }
1107
1108 if (ctx->ir_base[ref].op == IR_PARAM) {
1109 continue;
1110 }
1111 if (ctx->binding) {
1112 ir_ref var = ir_binding_find(ctx, ref);
1113 if (var < 0) {
1114 /* We may load the value at OSR entry-point */
1115 if (!count) {
1116 bb->flags &= ~IR_BB_EMPTY;
1117 bb->flags |= IR_BB_OSR_ENTRY_LOADS;
1118 if (!ctx->osr_entry_loads) {
1119 list = ctx->osr_entry_loads = ir_mem_malloc(sizeof(ir_list));
1120 ir_list_init(list, 16);
1121 }
1122 ir_list_push(list, b);
1123 ir_list_push(list, 0);
1124 }
1125 ir_list_push(list, ref);
1126 count++;
1127 continue;
1128 }
1129 }
1130 fprintf(stderr, "ENTRY %d (block %d start %d) - live var %d\n", ctx->ir_base[bb->start].op2, b, bb->start, ref);
1131 ok = 0;
1132 }
1133
1134 if (!ok) {
1135 IR_ASSERT(0);
1136 }
1137 if (count) {
1138 ir_list_set(list, ir_list_len(ctx->osr_entry_loads) - (count + 1), count);
1139
1140 #if 0
1141 /* ENTRY "clobbers" all registers */
1142 ir_ref ref = ctx->ir_base[bb->start].op1;
1143 ir_add_fixed_live_range(ctx, IR_REG_ALL,
1144 IR_DEF_LIVE_POS_FROM_REF(ref),
1145 IR_SAVE_LIVE_POS_FROM_REF(ref));
1146 #endif
1147 }
1148 }
1149
ir_add_fusion_ranges(ir_ctx * ctx,ir_ref ref,ir_ref input,ir_block * bb,uint32_t * live_in_block,uint32_t b)1150 static void ir_add_fusion_ranges(ir_ctx *ctx, ir_ref ref, ir_ref input, ir_block *bb, uint32_t *live_in_block, uint32_t b)
1151 {
1152 ir_ref stack[4];
1153 int stack_pos = 0;
1154 ir_target_constraints constraints;
1155 ir_insn *insn;
1156 uint32_t j, n, flags, def_flags;
1157 ir_ref *p, child;
1158 uint8_t use_flags;
1159 ir_reg reg;
1160 ir_live_pos pos = IR_START_LIVE_POS_FROM_REF(ref);
1161 ir_live_pos use_pos;
1162 ir_live_interval *ival;
1163
1164 while (1) {
1165 IR_ASSERT(input > 0 && ctx->rules[input] & IR_FUSED);
1166
1167 if (!(ctx->rules[input] & IR_SIMPLE)) {
1168 def_flags = ir_get_target_constraints(ctx, input, &constraints);
1169 n = constraints.tmps_count;
1170 while (n > 0) {
1171 n--;
1172 if (constraints.tmp_regs[n].type) {
1173 ir_add_tmp(ctx, ref, input, constraints.tmp_regs[n].num, constraints.tmp_regs[n]);
1174 } else {
1175 /* CPU specific constraints */
1176 ir_add_fixed_live_range(ctx, constraints.tmp_regs[n].reg,
1177 pos + constraints.tmp_regs[n].start,
1178 pos + constraints.tmp_regs[n].end);
1179 }
1180 }
1181 } else {
1182 def_flags = IR_OP1_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG;
1183 constraints.hints_count = 0;
1184 }
1185
1186 insn = &ctx->ir_base[input];
1187 flags = ir_op_flags[insn->op];
1188 IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(flags));
1189 n = IR_INPUT_EDGES_COUNT(flags);
1190 j = 1;
1191 p = insn->ops + j;
1192 if (flags & IR_OP_FLAG_CONTROL) {
1193 j++;
1194 p++;
1195 }
1196 for (; j <= n; j++, p++) {
1197 IR_ASSERT(IR_OPND_KIND(flags, j) == IR_OPND_DATA);
1198 child = *p;
1199 if (child > 0) {
1200 uint32_t v = ctx->vregs[child];
1201
1202 if (v) {
1203 reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE;
1204 use_pos = pos;
1205 if (EXPECTED(reg == IR_REG_NONE)) {
1206 use_pos += IR_USE_SUB_REF;
1207 }
1208
1209 if (!IS_LIVE_IN_BLOCK(v, b)) {
1210 /* live.add(opd) */
1211 SET_LIVE_IN_BLOCK(v, b);
1212 /* intervals[opd].addRange(b.from, op.id) */
1213 ival = ir_add_live_range(ctx, v,
1214 IR_START_LIVE_POS_FROM_REF(bb->start), use_pos);
1215 } else {
1216 ival = ctx->live_intervals[v];
1217 }
1218 use_flags = IR_FUSED_USE | IR_USE_FLAGS(def_flags, j);
1219 ir_add_use(ctx, ival, j, use_pos, reg, use_flags, -input);
1220 } else if (ctx->rules[child] & IR_FUSED) {
1221 IR_ASSERT(stack_pos < (int)(sizeof(stack)/sizeof(stack_pos)));
1222 stack[stack_pos++] = child;
1223 } else if (ctx->rules[child] == (IR_SKIPPED|IR_RLOAD)) {
1224 ir_set_alocated_reg(ctx, input, j, ctx->ir_base[child].op2);
1225 }
1226 }
1227 }
1228 if (!stack_pos) {
1229 break;
1230 }
1231 input = stack[--stack_pos];
1232 }
1233 }
1234
ir_compute_live_ranges(ir_ctx * ctx)1235 int ir_compute_live_ranges(ir_ctx *ctx)
1236 {
1237 uint32_t b, i, j, k, n, succ;
1238 ir_ref ref;
1239 ir_insn *insn;
1240 ir_block *bb, *succ_bb;
1241 uint32_t *live_outs;
1242 uint32_t *live_in_block;
1243 ir_list live_lists;
1244 ir_live_interval *ival;
1245
1246 if (!(ctx->flags2 & IR_LINEAR) || !ctx->vregs) {
1247 return 0;
1248 }
1249
1250 if (ctx->rules) {
1251 ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count);
1252 memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count);
1253 }
1254
1255 /* Root of the list of IR_VARs */
1256 ctx->vars = IR_UNUSED;
1257
1258 /* Compute Live Ranges */
1259 ctx->flags2 &= ~IR_LR_HAVE_DESSA_MOVES;
1260
1261 /* vregs + tmp + fixed + SRATCH + ALL */
1262 ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 2, sizeof(ir_live_interval*));
1263
1264 if (!ctx->arena) {
1265 ctx->arena = ir_arena_create(16 * 1024);
1266 }
1267
1268 live_outs = ir_mem_calloc(ctx->cfg_blocks_count + 1 + ctx->entries_count, sizeof(uint32_t));
1269 ir_list_init(&live_lists, 1024);
1270 ir_compute_live_sets(ctx, live_outs, &live_lists);
1271 live_in_block = ir_mem_calloc(ctx->vregs_count + 1, sizeof(uint32_t));
1272
1273 /* for each basic block in reverse order */
1274 for (b = ctx->cfg_blocks_count; b > 0; b--) {
1275 bb = &ctx->cfg_blocks[b];
1276 IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
1277
1278 /* For all virtual register alive at the end of the block */
1279 n = live_outs[b];
1280 while (n != 0) {
1281 i = ir_list_at(&live_lists, n);
1282 SET_LIVE_IN_BLOCK(i, b);
1283 ir_add_prev_live_range(ctx, i,
1284 IR_START_LIVE_POS_FROM_REF(bb->start),
1285 IR_END_LIVE_POS_FROM_REF(bb->end));
1286 n = ir_list_at(&live_lists, n - 1);
1287 }
1288
1289 if (bb->successors_count == 1) {
1290 /* for each phi function of the successor */
1291 succ = ctx->cfg_edges[bb->successors];
1292 succ_bb = &ctx->cfg_blocks[succ];
1293 if (succ_bb->flags & IR_BB_HAS_PHI) {
1294 ir_use_list *use_list = &ctx->use_lists[succ_bb->start];
1295 ir_ref n, *p;
1296
1297 k = ir_phi_input_number(ctx, succ_bb, b);
1298 IR_ASSERT(k != 0);
1299 n = use_list->count;
1300 for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
1301 ir_ref use = *p;
1302 insn = &ctx->ir_base[use];
1303 if (insn->op == IR_PHI) {
1304 ir_ref input = ir_insn_op(insn, k);
1305 if (input > 0) {
1306 uint32_t v = ctx->vregs[input];
1307
1308 IR_ASSERT(v);
1309 ival = ctx->live_intervals[v];
1310 ir_add_phi_use(ctx, ival, k, IR_DEF_LIVE_POS_FROM_REF(bb->end), use);
1311 }
1312 }
1313 }
1314 }
1315 }
1316
1317 /* for each operation of the block in reverse order */
1318 ref = bb->end;
1319 insn = &ctx->ir_base[ref];
1320 if (insn->op == IR_END || insn->op == IR_LOOP_END) {
1321 ref = ctx->prev_ref[ref];
1322 }
1323 for (; ref > bb->start; ref = ctx->prev_ref[ref]) {
1324 uint32_t def_flags;
1325 uint32_t flags;
1326 ir_ref *p;
1327 ir_target_constraints constraints;
1328 uint32_t v;
1329
1330 if (ctx->rules) {
1331 int n;
1332
1333 if (ctx->rules[ref] & (IR_FUSED|IR_SKIPPED)) {
1334 if (ctx->rules[ref] == (IR_SKIPPED|IR_VAR) && ctx->use_lists[ref].count > 0) {
1335 insn = &ctx->ir_base[ref];
1336 insn->op3 = ctx->vars;
1337 ctx->vars = ref;
1338 }
1339 continue;
1340 }
1341
1342 def_flags = ir_get_target_constraints(ctx, ref, &constraints);
1343 n = constraints.tmps_count;
1344 while (n > 0) {
1345 n--;
1346 if (constraints.tmp_regs[n].type) {
1347 ir_add_tmp(ctx, ref, ref, constraints.tmp_regs[n].num, constraints.tmp_regs[n]);
1348 } else {
1349 /* CPU specific constraints */
1350 ir_add_fixed_live_range(ctx, constraints.tmp_regs[n].reg,
1351 IR_START_LIVE_POS_FROM_REF(ref) + constraints.tmp_regs[n].start,
1352 IR_START_LIVE_POS_FROM_REF(ref) + constraints.tmp_regs[n].end);
1353 }
1354 }
1355 } else {
1356 def_flags = 0;
1357 constraints.def_reg = IR_REG_NONE;
1358 constraints.hints_count = 0;
1359 }
1360
1361 insn = &ctx->ir_base[ref];
1362 v = ctx->vregs[ref];
1363 if (v) {
1364 if (insn->op != IR_PHI) {
1365 ir_live_pos def_pos;
1366 ir_ref hint_ref = 0;
1367 ir_reg reg = constraints.def_reg;
1368
1369 if (reg != IR_REG_NONE) {
1370 def_pos = IR_SAVE_LIVE_POS_FROM_REF(ref);
1371 if (insn->op == IR_PARAM || insn->op == IR_RLOAD) {
1372 /* parameter register must be kept before it's copied */
1373 ir_add_fixed_live_range(ctx, reg, IR_START_LIVE_POS_FROM_REF(bb->start), def_pos);
1374 }
1375 } else if (def_flags & IR_DEF_REUSES_OP1_REG) {
1376 if (!IR_IS_CONST_REF(insn->op1) && ctx->vregs[insn->op1]) {
1377 hint_ref = insn->op1;
1378 }
1379 if (def_flags & IR_DEF_CONFLICTS_WITH_INPUT_REGS) {
1380 def_pos = IR_USE_LIVE_POS_FROM_REF(ref);
1381 } else {
1382 def_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
1383 }
1384 } else if (def_flags & IR_DEF_CONFLICTS_WITH_INPUT_REGS) {
1385 def_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
1386 } else {
1387 if (insn->op == IR_PARAM) {
1388 /* We may reuse parameter stack slot for spilling */
1389 ctx->live_intervals[v]->flags |= IR_LIVE_INTERVAL_MEM_PARAM;
1390 } else if (insn->op == IR_VLOAD) {
1391 /* Load may be fused into the usage instruction */
1392 ctx->live_intervals[v]->flags |= IR_LIVE_INTERVAL_MEM_LOAD;
1393 }
1394 def_pos = IR_DEF_LIVE_POS_FROM_REF(ref);
1395 }
1396 /* intervals[opd].setFrom(op.id) */
1397 ival = ir_fix_live_range(ctx, v,
1398 IR_START_LIVE_POS_FROM_REF(bb->start), def_pos);
1399 ival->type = insn->type;
1400 ir_add_use(ctx, ival, 0, def_pos, reg, def_flags, hint_ref);
1401 } else {
1402 /* PHIs inputs must not be processed */
1403 ival = ctx->live_intervals[v];
1404 if (UNEXPECTED(!ival)) {
1405 /* Dead PHI */
1406 ival = ir_add_live_range(ctx, v, IR_DEF_LIVE_POS_FROM_REF(ref), IR_USE_LIVE_POS_FROM_REF(ref));
1407 }
1408 ival->type = insn->type;
1409 ir_add_use(ctx, ival, 0, IR_DEF_LIVE_POS_FROM_REF(ref), IR_REG_NONE, IR_USE_SHOULD_BE_IN_REG, 0);
1410 continue;
1411 }
1412 }
1413
1414 IR_ASSERT(insn->op != IR_PHI && (!ctx->rules || !(ctx->rules[ref] & (IR_FUSED|IR_SKIPPED))));
1415 flags = ir_op_flags[insn->op];
1416 j = 1;
1417 p = insn->ops + 1;
1418 if (flags & (IR_OP_FLAG_CONTROL|IR_OP_FLAG_MEM|IR_OP_FLAG_PINNED)) {
1419 j++;
1420 p++;
1421 }
1422 for (; j <= insn->inputs_count; j++, p++) {
1423 ir_ref input = *p;
1424 ir_reg reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE;
1425 ir_live_pos use_pos;
1426 ir_ref hint_ref = 0;
1427 uint32_t v;
1428
1429 if (input > 0) {
1430 v = ctx->vregs[input];
1431 if (v) {
1432 use_pos = IR_USE_LIVE_POS_FROM_REF(ref);
1433 if (reg != IR_REG_NONE) {
1434 use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
1435 ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF);
1436 } else if (def_flags & IR_DEF_REUSES_OP1_REG) {
1437 if (j == 1) {
1438 if (def_flags & IR_DEF_CONFLICTS_WITH_INPUT_REGS) {
1439 use_pos = IR_USE_LIVE_POS_FROM_REF(ref);
1440 } else {
1441 use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
1442 }
1443 IR_ASSERT(ctx->vregs[ref]);
1444 hint_ref = ref;
1445 } else if (input == insn->op1) {
1446 /* Input is the same as "op1" */
1447 use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
1448 }
1449 }
1450 if (!IS_LIVE_IN_BLOCK(v, b)) {
1451 /* live.add(opd) */
1452 SET_LIVE_IN_BLOCK(v, b);
1453 /* intervals[opd].addRange(b.from, op.id) */
1454 ival = ir_add_live_range(ctx, v, IR_START_LIVE_POS_FROM_REF(bb->start), use_pos);
1455 } else {
1456 ival = ctx->live_intervals[v];
1457 }
1458 ir_add_use(ctx, ival, j, use_pos, reg, IR_USE_FLAGS(def_flags, j), hint_ref);
1459 } else if (ctx->rules) {
1460 if (ctx->rules[input] & IR_FUSED) {
1461 ir_add_fusion_ranges(ctx, ref, input, bb, live_in_block, b);
1462 } else {
1463 if (ctx->rules[input] == (IR_SKIPPED|IR_RLOAD)) {
1464 ir_set_alocated_reg(ctx, ref, j, ctx->ir_base[input].op2);
1465 }
1466 if (reg != IR_REG_NONE) {
1467 use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
1468 ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF);
1469 }
1470 }
1471 }
1472 } else if (reg != IR_REG_NONE) {
1473 use_pos = IR_LOAD_LIVE_POS_FROM_REF(ref);
1474 ir_add_fixed_live_range(ctx, reg, use_pos, use_pos + IR_USE_SUB_REF);
1475 }
1476 }
1477 }
1478 }
1479
1480 if (ctx->entries) {
1481 for (i = 0; i < ctx->entries_count; i++) {
1482 b = ctx->entries[i];
1483 bb = &ctx->cfg_blocks[b];
1484 IR_ASSERT(bb->predecessors_count == 1);
1485 ir_add_osr_entry_loads(ctx, bb, live_outs[ctx->cfg_blocks_count + 1 + i], &live_lists, b);
1486 }
1487 if (ctx->osr_entry_loads) {
1488 ir_list_push((ir_list*)ctx->osr_entry_loads, 0);
1489 }
1490 }
1491
1492 ir_list_free(&live_lists);
1493 ir_mem_free(live_outs);
1494 ir_mem_free(live_in_block);
1495
1496 return 1;
1497 }
1498
1499 #endif
1500
1501 /* Live Ranges coalescing */
1502
ir_ivals_overlap(ir_live_range * lrg1,ir_live_range * lrg2)1503 static ir_live_pos ir_ivals_overlap(ir_live_range *lrg1, ir_live_range *lrg2)
1504 {
1505 while (1) {
1506 if (lrg2->start < lrg1->end) {
1507 if (lrg1->start < lrg2->end) {
1508 return IR_MAX(lrg1->start, lrg2->start);
1509 } else {
1510 lrg2 = lrg2->next;
1511 if (!lrg2) {
1512 return 0;
1513 }
1514 }
1515 } else {
1516 lrg1 = lrg1->next;
1517 if (!lrg1) {
1518 return 0;
1519 }
1520 }
1521 }
1522 }
1523
ir_vregs_overlap(ir_ctx * ctx,uint32_t r1,uint32_t r2)1524 static ir_live_pos ir_vregs_overlap(ir_ctx *ctx, uint32_t r1, uint32_t r2)
1525 {
1526 ir_live_interval *ival1 = ctx->live_intervals[r1];
1527 ir_live_interval *ival2 = ctx->live_intervals[r2];
1528
1529 #if 0
1530 if (ival2->range.start >= ival1->end
1531 || ival1->range.start >= ival2->end) {
1532 return 0;
1533 }
1534 #endif
1535 return ir_ivals_overlap(&ival1->range, &ival2->range);
1536 }
1537
ir_vregs_join(ir_ctx * ctx,uint32_t r1,uint32_t r2)1538 static void ir_vregs_join(ir_ctx *ctx, uint32_t r1, uint32_t r2)
1539 {
1540 ir_live_interval *ival = ctx->live_intervals[r2];
1541 ir_live_range *live_range = &ival->range;
1542 ir_live_range *next;
1543 ir_use_pos *use_pos, *next_pos, **prev;
1544
1545 #if 0
1546 fprintf(stderr, "COALESCE %d -> %d\n", r2, r1);
1547 #endif
1548
1549 ir_add_live_range(ctx, r1, live_range->start, live_range->end);
1550 live_range = live_range->next;
1551 while (live_range) {
1552 next = live_range->next;
1553 live_range->next = ctx->unused_ranges;
1554 ctx->unused_ranges = live_range;
1555 ir_add_live_range(ctx, r1, live_range->start, live_range->end);
1556 live_range = next;
1557 }
1558
1559 /* merge sorted use_pos lists */
1560 prev = &ctx->live_intervals[r1]->use_pos;
1561 use_pos = ival->use_pos;
1562 while (use_pos) {
1563 if (use_pos->hint_ref > 0 && ctx->vregs[use_pos->hint_ref] == r1) {
1564 use_pos->hint_ref = 0;
1565 }
1566 while (*prev && ((*prev)->pos < use_pos->pos ||
1567 ((*prev)->pos == use_pos->pos &&
1568 (use_pos->op_num == 0 || (*prev)->op_num < use_pos->op_num)))) {
1569 if ((*prev)->hint_ref > 0 && ctx->vregs[(*prev)->hint_ref] == r2) {
1570 (*prev)->hint_ref = 0;
1571 }
1572 prev = &(*prev)->next;
1573 }
1574 next_pos = use_pos->next;
1575 use_pos->next = *prev;
1576 *prev = use_pos;
1577 prev = &use_pos->next;
1578 use_pos = next_pos;
1579 }
1580 use_pos = *prev;
1581 while (use_pos) {
1582 if (use_pos->hint_ref > 0 && ctx->vregs[use_pos->hint_ref] == r2) {
1583 use_pos->hint_ref = 0;
1584 }
1585 use_pos = use_pos->next;
1586 }
1587
1588 ctx->live_intervals[r1]->flags |=
1589 IR_LIVE_INTERVAL_COALESCED | (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS));
1590 if (ctx->ir_base[IR_LIVE_POS_TO_REF(ctx->live_intervals[r1]->use_pos->pos)].op != IR_VLOAD) {
1591 ctx->live_intervals[r1]->flags &= ~IR_LIVE_INTERVAL_MEM_LOAD;
1592 }
1593 ctx->live_intervals[r2] = NULL;
1594
1595 // TODO: remember to reuse ???
1596 //ir_mem_free(ival);
1597 }
1598
ir_vregs_coalesce(ir_ctx * ctx,uint32_t v1,uint32_t v2,ir_ref from,ir_ref to)1599 static void ir_vregs_coalesce(ir_ctx *ctx, uint32_t v1, uint32_t v2, ir_ref from, ir_ref to)
1600 {
1601 ir_ref i;
1602 uint16_t f1 = ctx->live_intervals[v1]->flags;
1603 uint16_t f2 = ctx->live_intervals[v2]->flags;
1604
1605 if ((f1 & IR_LIVE_INTERVAL_COALESCED) && !(f2 & IR_LIVE_INTERVAL_COALESCED)) {
1606 ir_vregs_join(ctx, v1, v2);
1607 ctx->vregs[to] = v1;
1608 } else if ((f2 & IR_LIVE_INTERVAL_COALESCED) && !(f1 & IR_LIVE_INTERVAL_COALESCED)) {
1609 ir_vregs_join(ctx, v2, v1);
1610 ctx->vregs[from] = v2;
1611 } else if (from < to) {
1612 ir_vregs_join(ctx, v1, v2);
1613 if (f2 & IR_LIVE_INTERVAL_COALESCED) {
1614 for (i = 1; i < ctx->insns_count; i++) {
1615 if (ctx->vregs[i] == v2) {
1616 ctx->vregs[i] = v1;
1617 }
1618 }
1619 } else {
1620 ctx->vregs[to] = v1;
1621 }
1622 } else {
1623 ir_vregs_join(ctx, v2, v1);
1624 if (f1 & IR_LIVE_INTERVAL_COALESCED) {
1625 for (i = 1; i < ctx->insns_count; i++) {
1626 if (ctx->vregs[i] == v1) {
1627 ctx->vregs[i] = v2;
1628 }
1629 }
1630 } else {
1631 ctx->vregs[from] = v2;
1632 }
1633 }
1634 }
1635
ir_add_phi_move(ir_ctx * ctx,uint32_t b,ir_ref from,ir_ref to)1636 static void ir_add_phi_move(ir_ctx *ctx, uint32_t b, ir_ref from, ir_ref to)
1637 {
1638 if (IR_IS_CONST_REF(from) || ctx->vregs[from] != ctx->vregs[to]) {
1639 ctx->cfg_blocks[b].flags &= ~IR_BB_EMPTY;
1640 ctx->cfg_blocks[b].flags |= IR_BB_DESSA_MOVES;
1641 ctx->flags2 |= IR_LR_HAVE_DESSA_MOVES;
1642 #if 0
1643 fprintf(stderr, "BB%d: MOV %d -> %d\n", b, from, to);
1644 #endif
1645 }
1646 }
1647
1648 typedef struct _ir_coalesce_block {
1649 uint32_t b;
1650 uint32_t loop_depth;
1651 } ir_coalesce_block;
1652
ir_block_cmp(const void * b1,const void * b2)1653 static int ir_block_cmp(const void *b1, const void *b2)
1654 {
1655 ir_coalesce_block *d1 = (ir_coalesce_block*)b1;
1656 ir_coalesce_block *d2 = (ir_coalesce_block*)b2;
1657
1658 if (d1->loop_depth > d2->loop_depth) {
1659 return -1;
1660 } else if (d1->loop_depth == d2->loop_depth) {
1661 if (d1->b < d2->b) {
1662 return -1;
1663 } else {
1664 return 1;
1665 }
1666 } else {
1667 return 1;
1668 }
1669 }
1670
ir_swap_operands(ir_ctx * ctx,ir_ref i,ir_insn * insn)1671 static void ir_swap_operands(ir_ctx *ctx, ir_ref i, ir_insn *insn)
1672 {
1673 ir_live_pos pos = IR_USE_LIVE_POS_FROM_REF(i);
1674 ir_live_pos load_pos = IR_LOAD_LIVE_POS_FROM_REF(i);
1675 ir_live_interval *ival;
1676 ir_live_range *r;
1677 ir_use_pos *p, *p1 = NULL, *p2 = NULL;
1678 ir_ref tmp;
1679
1680 tmp = insn->op1;
1681 insn->op1 = insn->op2;
1682 insn->op2 = tmp;
1683
1684 ival = ctx->live_intervals[ctx->vregs[insn->op1]];
1685 p = ival->use_pos;
1686 while (p) {
1687 if (p->pos == pos) {
1688 p->pos = load_pos;
1689 p->op_num = 1;
1690 p1 = p;
1691 break;
1692 }
1693 p = p->next;
1694 }
1695
1696 ival = ctx->live_intervals[ctx->vregs[i]];
1697 p = ival->use_pos;
1698 while (p) {
1699 if (p->pos == load_pos) {
1700 p->hint_ref = insn->op1;
1701 break;
1702 }
1703 p = p->next;
1704 }
1705
1706 if (insn->op2 > 0 && ctx->vregs[insn->op2]) {
1707 ival = ctx->live_intervals[ctx->vregs[insn->op2]];
1708 r = &ival->range;
1709 while (r) {
1710 if (r->end == load_pos) {
1711 r->end = pos;
1712 if (!r->next) {
1713 ival->end = pos;
1714 }
1715 break;
1716 }
1717 r = r->next;
1718 }
1719 p = ival->use_pos;
1720 while (p) {
1721 if (p->pos == load_pos) {
1722 p->pos = pos;
1723 p->op_num = 2;
1724 p2 = p;
1725 break;
1726 }
1727 p = p->next;
1728 }
1729 }
1730 if (p1 && p2) {
1731 uint8_t tmp = p1->flags;
1732 p1->flags = p2->flags;
1733 p2->flags = tmp;
1734 }
1735 }
1736
ir_hint_conflict(ir_ctx * ctx,ir_ref ref,int use,int def)1737 static int ir_hint_conflict(ir_ctx *ctx, ir_ref ref, int use, int def)
1738 {
1739 ir_use_pos *p;
1740 ir_reg r1 = IR_REG_NONE;
1741 ir_reg r2 = IR_REG_NONE;
1742
1743 p = ctx->live_intervals[use]->use_pos;
1744 while (p) {
1745 if (IR_LIVE_POS_TO_REF(p->pos) == ref) {
1746 break;
1747 }
1748 if (p->hint != IR_REG_NONE) {
1749 r1 = p->hint;
1750 }
1751 p = p->next;
1752 }
1753
1754 p = ctx->live_intervals[def]->use_pos;
1755 while (p) {
1756 if (IR_LIVE_POS_TO_REF(p->pos) > ref) {
1757 if (p->hint != IR_REG_NONE) {
1758 r2 = p->hint;
1759 break;
1760 }
1761 }
1762 p = p->next;
1763 }
1764 return r1 != r2 && r1 != IR_REG_NONE && r2 != IR_REG_NONE;
1765 }
1766
ir_try_swap_operands(ir_ctx * ctx,ir_ref i,ir_insn * insn)1767 static int ir_try_swap_operands(ir_ctx *ctx, ir_ref i, ir_insn *insn)
1768 {
1769 if (ctx->vregs[insn->op1]
1770 && ctx->vregs[insn->op1] != ctx->vregs[i]
1771 && !ir_vregs_overlap(ctx, ctx->vregs[insn->op1], ctx->vregs[i])
1772 && !ir_hint_conflict(ctx, i, ctx->vregs[insn->op1], ctx->vregs[i])) {
1773 /* pass */
1774 } else {
1775 if (ctx->vregs[insn->op2] && ctx->vregs[insn->op2] != ctx->vregs[i]) {
1776 ir_live_pos pos = IR_USE_LIVE_POS_FROM_REF(i);
1777 ir_live_pos load_pos = IR_LOAD_LIVE_POS_FROM_REF(i);
1778 ir_live_interval *ival = ctx->live_intervals[ctx->vregs[insn->op2]];
1779 ir_live_range *r = &ival->range;
1780
1781 if ((ival->flags & IR_LIVE_INTERVAL_MEM_PARAM) && ctx->use_lists[insn->op2].count == 1) {
1782 return 0;
1783 }
1784 while (r) {
1785 if (r->end == pos) {
1786 r->end = load_pos;
1787 if (!r->next) {
1788 ival->end = load_pos;
1789 }
1790 if (!ir_vregs_overlap(ctx, ctx->vregs[insn->op2], ctx->vregs[i])
1791 && !ir_hint_conflict(ctx, i, ctx->vregs[insn->op2], ctx->vregs[i])) {
1792 ir_swap_operands(ctx, i, insn);
1793 return 1;
1794 } else {
1795 r->end = pos;
1796 if (!r->next) {
1797 ival->end = pos;
1798 }
1799 }
1800 break;
1801 }
1802 r = r->next;
1803 }
1804 }
1805 }
1806 return 0;
1807 }
1808
ir_coalesce(ir_ctx * ctx)1809 int ir_coalesce(ir_ctx *ctx)
1810 {
1811 uint32_t b, n, succ, pred_b, count = 0;
1812 ir_ref *p, use, input, k, j;
1813 ir_block *bb, *succ_bb;
1814 ir_use_list *use_list;
1815 ir_insn *insn;
1816 ir_bitset visited;
1817 ir_coalesce_block *list;
1818 bool compact = 0;
1819
1820 /* Collect a list of blocks which are predecossors to block with phi functions */
1821 list = ir_mem_malloc(sizeof(ir_coalesce_block) * ctx->cfg_blocks_count);
1822 visited = ir_bitset_malloc(ctx->cfg_blocks_count + 1);
1823 for (b = 1, bb = &ctx->cfg_blocks[1]; b <= ctx->cfg_blocks_count; b++, bb++) {
1824 IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
1825 if (bb->flags & IR_BB_HAS_PHI) {
1826 k = bb->predecessors_count;
1827 if (k > 1) {
1828 use_list = &ctx->use_lists[bb->start];
1829 n = use_list->count;
1830 IR_ASSERT(k == ctx->ir_base[bb->start].inputs_count);
1831 for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
1832 use = *p;
1833 insn = &ctx->ir_base[use];
1834 if (insn->op == IR_PHI) {
1835 do {
1836 k--;
1837 pred_b = ctx->cfg_edges[bb->predecessors + k];
1838 if (!ir_bitset_in(visited, pred_b)) {
1839 ir_bitset_incl(visited, pred_b);
1840 list[count].b = pred_b;
1841 list[count].loop_depth = ctx->cfg_blocks[pred_b].loop_depth;
1842 count++;
1843 }
1844 } while (k > 0);
1845 break;
1846 }
1847 }
1848 }
1849 }
1850 }
1851 ir_mem_free(visited);
1852
1853 /* Sort blocks according to their loop depth */
1854 qsort(list, count, sizeof(ir_coalesce_block), ir_block_cmp);
1855
1856 while (count > 0) {
1857 uint32_t i;
1858
1859 count--;
1860 b = list[count].b;
1861 bb = &ctx->cfg_blocks[b];
1862 IR_ASSERT(bb->successors_count == 1);
1863 succ = ctx->cfg_edges[bb->successors];
1864 succ_bb = &ctx->cfg_blocks[succ];
1865 IR_ASSERT(succ_bb->predecessors_count > 1);
1866 k = ir_phi_input_number(ctx, succ_bb, b);
1867 use_list = &ctx->use_lists[succ_bb->start];
1868 n = use_list->count;
1869 for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) {
1870 use = *p;
1871 insn = &ctx->ir_base[use];
1872 if (insn->op == IR_PHI) {
1873 input = ir_insn_op(insn, k);
1874 if (input > 0) {
1875 uint32_t v1 = ctx->vregs[input];
1876 uint32_t v2 = ctx->vregs[use];
1877
1878 if (v1 == v2) {
1879 /* already coalesced */
1880 } else {
1881 if (!ir_vregs_overlap(ctx, v1, v2)) {
1882 ir_vregs_coalesce(ctx, v1, v2, input, use);
1883 compact = 1;
1884 } else {
1885 #if 1
1886 ir_insn *input_insn = &ctx->ir_base[input];
1887
1888 if ((ir_op_flags[input_insn->op] & IR_OP_FLAG_COMMUTATIVE)
1889 && input_insn->op2 == use
1890 && input_insn->op1 != use
1891 && (ctx->live_intervals[v1]->use_pos->flags & IR_DEF_REUSES_OP1_REG)
1892 && ctx->live_intervals[v2]->end == IR_USE_LIVE_POS_FROM_REF(input)) {
1893 ir_live_range *r = &ctx->live_intervals[v2]->range;
1894
1895 while (r->next) {
1896 r = r->next;
1897 }
1898 r->end = IR_LOAD_LIVE_POS_FROM_REF(input);
1899 ctx->live_intervals[v2]->end = IR_LOAD_LIVE_POS_FROM_REF(input);
1900 ir_swap_operands(ctx, input, input_insn);
1901 IR_ASSERT(!ir_vregs_overlap(ctx, v1, v2));
1902 ir_vregs_coalesce(ctx, v1, v2, input, use);
1903 compact = 1;
1904 continue;
1905 }
1906 #endif
1907 ir_add_phi_move(ctx, b, input, use);
1908 }
1909 }
1910 } else {
1911 /* Move for constant input */
1912 ir_add_phi_move(ctx, b, input, use);
1913 }
1914 }
1915 }
1916 }
1917 ir_mem_free(list);
1918
1919 ir_hint_propagation(ctx);
1920
1921 if (ctx->rules) {
1922 /* try to swap operands of commutative instructions for better register allocation */
1923 for (b = 1, bb = &ctx->cfg_blocks[1]; b <= ctx->cfg_blocks_count; b++, bb++) {
1924 ir_ref i;
1925
1926 IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
1927 i = bb->end;
1928
1929 /* skip last instruction */
1930 i = ctx->prev_ref[i];
1931
1932 while (i != bb->start) {
1933 insn = &ctx->ir_base[i];
1934 if ((ir_op_flags[insn->op] & IR_OP_FLAG_COMMUTATIVE)
1935 && ctx->vregs[i]
1936 && ctx->live_intervals[ctx->vregs[i]]->use_pos
1937 && (ctx->live_intervals[ctx->vregs[i]]->use_pos->flags & IR_DEF_REUSES_OP1_REG)
1938 && insn->op2 > 0
1939 && insn->op1 > 0
1940 && insn->op1 != insn->op2) {
1941 ir_try_swap_operands(ctx, i, insn);
1942 }
1943 i = ctx->prev_ref[i];
1944 }
1945 }
1946 }
1947
1948 if (compact) {
1949 ir_ref i, n;
1950 uint32_t *xlat = ir_mem_malloc((ctx->vregs_count + 1) * sizeof(uint32_t));
1951
1952 for (i = 1, n = 1; i <= ctx->vregs_count; i++) {
1953 if (ctx->live_intervals[i]) {
1954 xlat[i] = n;
1955 if (i != n) {
1956 ctx->live_intervals[n] = ctx->live_intervals[i];
1957 ctx->live_intervals[n]->vreg = n;
1958 }
1959 n++;
1960 }
1961 }
1962 n--;
1963 if (n != ctx->vregs_count) {
1964 j = ctx->vregs_count - n;
1965 /* vregs + tmp + fixed + SRATCH + ALL */
1966 for (i = n + 1; i <= n + IR_REG_NUM + 2; i++) {
1967 ctx->live_intervals[i] = ctx->live_intervals[i + j];
1968 if (ctx->live_intervals[i]) {
1969 ctx->live_intervals[i]->vreg = i;
1970 }
1971 }
1972 for (j = 1; j < ctx->insns_count; j++) {
1973 if (ctx->vregs[j]) {
1974 ctx->vregs[j] = xlat[ctx->vregs[j]];
1975 }
1976 }
1977 ctx->vregs_count = n;
1978 }
1979 ir_mem_free(xlat);
1980 }
1981
1982 return 1;
1983 }
1984
1985 /* SSA Deconstruction */
1986
ir_compute_dessa_moves(ir_ctx * ctx)1987 int ir_compute_dessa_moves(ir_ctx *ctx)
1988 {
1989 uint32_t b, i, n;
1990 ir_ref j, k, *p, use;
1991 ir_block *bb;
1992 ir_use_list *use_list;
1993 ir_insn *insn;
1994
1995 for (b = 1, bb = &ctx->cfg_blocks[1]; b <= ctx->cfg_blocks_count; b++, bb++) {
1996 IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
1997 k = bb->predecessors_count;
1998 if (k > 1) {
1999 use_list = &ctx->use_lists[bb->start];
2000 n = use_list->count;
2001 if (n > 1) {
2002 IR_ASSERT(k == ctx->ir_base[bb->start].inputs_count);
2003 k++;
2004 for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) {
2005 use = *p;
2006 insn = &ctx->ir_base[use];
2007 if (insn->op == IR_PHI) {
2008 for (j = 2; j <= k; j++) {
2009 if (IR_IS_CONST_REF(ir_insn_op(insn, j)) || ctx->vregs[ir_insn_op(insn, j)] != ctx->vregs[use]) {
2010 int pred = ctx->cfg_edges[bb->predecessors + (j-2)];
2011 ctx->cfg_blocks[pred].flags &= ~IR_BB_EMPTY;
2012 ctx->cfg_blocks[pred].flags |= IR_BB_DESSA_MOVES;
2013 ctx->flags2 |= IR_LR_HAVE_DESSA_MOVES;
2014 }
2015 }
2016 }
2017 }
2018 }
2019 }
2020 }
2021 return 1;
2022 }
2023
2024 /*
2025 * Parallel copy sequentialization algorithm
2026 *
2027 * The implementation is based on algorithm 1 desriebed in
2028 * "Revisiting Out-of-SSA Translation for Correctness, Code Quality and Efficiency",
2029 * Benoit Boissinot, Alain Darte, Fabrice Rastello, Benoit Dupont de Dinechin, Christophe Guillon.
2030 * 2009 International Symposium on Code Generation and Optimization, Seattle, WA, USA, 2009,
2031 * pp. 114-125, doi: 10.1109/CGO.2009.19.
2032 */
ir_gen_dessa_moves(ir_ctx * ctx,uint32_t b,emit_copy_t emit_copy)2033 int ir_gen_dessa_moves(ir_ctx *ctx, uint32_t b, emit_copy_t emit_copy)
2034 {
2035 uint32_t succ, k, n = 0;
2036 ir_block *bb, *succ_bb;
2037 ir_use_list *use_list;
2038 ir_ref *loc, *pred, *src, *dst, i, *p, ref, input;
2039 ir_ref s, d;
2040 ir_insn *insn;
2041 uint32_t len;
2042 ir_bitset todo, ready;
2043 bool have_constants = 0;
2044
2045 bb = &ctx->cfg_blocks[b];
2046 if (!(bb->flags & IR_BB_DESSA_MOVES)) {
2047 return 0;
2048 }
2049 IR_ASSERT(bb->successors_count == 1);
2050 succ = ctx->cfg_edges[bb->successors];
2051 succ_bb = &ctx->cfg_blocks[succ];
2052 IR_ASSERT(succ_bb->predecessors_count > 1);
2053 use_list = &ctx->use_lists[succ_bb->start];
2054
2055 k = ir_phi_input_number(ctx, succ_bb, b);
2056
2057 loc = ir_mem_malloc((ctx->vregs_count + 1) * 4 * sizeof(ir_ref));
2058 pred = loc + ctx->vregs_count + 1;
2059 src = pred + ctx->vregs_count + 1;
2060 dst = src + ctx->vregs_count + 1;
2061 len = ir_bitset_len(ctx->vregs_count + 1);
2062 todo = ir_bitset_malloc(ctx->vregs_count + 1);
2063
2064 for (i = 0, p = &ctx->use_edges[use_list->refs]; i < use_list->count; i++, p++) {
2065 ref = *p;
2066 insn = &ctx->ir_base[ref];
2067 if (insn->op == IR_PHI) {
2068 input = ir_insn_op(insn, k);
2069 if (IR_IS_CONST_REF(input)) {
2070 have_constants = 1;
2071 } else if (ctx->vregs[input] != ctx->vregs[ref]) {
2072 s = ctx->vregs[input];
2073 d = ctx->vregs[ref];
2074 src[s] = input;
2075 dst[d] = ref;
2076 loc[d] = pred[s] = 0;
2077 ir_bitset_incl(todo, d);
2078 n++;
2079 }
2080 }
2081 }
2082
2083 if (n > 0) {
2084 src[0] = dst[0] = 0;
2085 ready = ir_bitset_malloc(ctx->vregs_count + 1);
2086 IR_BITSET_FOREACH(todo, len, d) {
2087 ref = dst[d];
2088 insn = &ctx->ir_base[ref];
2089 IR_ASSERT(insn->op == IR_PHI);
2090 input = ir_insn_op(insn, k);
2091 s = ctx->vregs[input];
2092 loc[s] = s;
2093 pred[d] = s;
2094 } IR_BITSET_FOREACH_END();
2095
2096 IR_BITSET_FOREACH(todo, len, i) {
2097 if (!loc[i]) {
2098 ir_bitset_incl(ready, i);
2099 }
2100 } IR_BITSET_FOREACH_END();
2101
2102 while (1) {
2103 ir_ref a, b, c;
2104
2105 while ((b = ir_bitset_pop_first(ready, len)) >= 0) {
2106 a = pred[b];
2107 c = loc[a];
2108 emit_copy(ctx, ctx->ir_base[dst[b]].type, src[c], dst[b]);
2109 ir_bitset_excl(todo, b);
2110 loc[a] = b;
2111 src[b] = dst[b];
2112 if (a == c && pred[a]) {
2113 ir_bitset_incl(ready, a);
2114 }
2115 }
2116 b = ir_bitset_pop_first(todo, len);
2117 if (b < 0) {
2118 break;
2119 }
2120 IR_ASSERT(b != loc[pred[b]]);
2121 emit_copy(ctx, ctx->ir_base[src[b]].type, src[b], 0);
2122 loc[b] = 0;
2123 ir_bitset_incl(ready, b);
2124 }
2125
2126 ir_mem_free(ready);
2127 }
2128
2129 ir_mem_free(todo);
2130 ir_mem_free(loc);
2131
2132 if (have_constants) {
2133 for (i = 0, p = &ctx->use_edges[use_list->refs]; i < use_list->count; i++, p++) {
2134 ref = *p;
2135 insn = &ctx->ir_base[ref];
2136 if (insn->op == IR_PHI) {
2137 input = ir_insn_op(insn, k);
2138 if (IR_IS_CONST_REF(input)) {
2139 emit_copy(ctx, insn->type, input, ref);
2140 }
2141 }
2142 }
2143 }
2144
2145 return 1;
2146 }
2147
2148 /* Linear Scan Register Allocation */
2149
2150 #ifdef IR_DEBUG
2151 # define IR_LOG_LSRA(action, ival, comment) do { \
2152 if (ctx->flags & IR_DEBUG_RA) { \
2153 ir_live_interval *_ival = (ival); \
2154 ir_live_pos _start = _ival->range.start; \
2155 ir_live_pos _end = _ival->end; \
2156 fprintf(stderr, action " R%d [%d.%d...%d.%d)" comment "\n", \
2157 (_ival->flags & IR_LIVE_INTERVAL_TEMP) ? 0 : _ival->vreg, \
2158 IR_LIVE_POS_TO_REF(_start), IR_LIVE_POS_TO_SUB_REF(_start), \
2159 IR_LIVE_POS_TO_REF(_end), IR_LIVE_POS_TO_SUB_REF(_end)); \
2160 } \
2161 } while (0)
2162 # define IR_LOG_LSRA_ASSIGN(action, ival, comment) do { \
2163 if (ctx->flags & IR_DEBUG_RA) { \
2164 ir_live_interval *_ival = (ival); \
2165 ir_live_pos _start = _ival->range.start; \
2166 ir_live_pos _end = _ival->end; \
2167 fprintf(stderr, action " R%d [%d.%d...%d.%d) to %s" comment "\n", \
2168 (_ival->flags & IR_LIVE_INTERVAL_TEMP) ? 0 : _ival->vreg, \
2169 IR_LIVE_POS_TO_REF(_start), IR_LIVE_POS_TO_SUB_REF(_start), \
2170 IR_LIVE_POS_TO_REF(_end), IR_LIVE_POS_TO_SUB_REF(_end), \
2171 ir_reg_name(_ival->reg, _ival->type)); \
2172 } \
2173 } while (0)
2174 # define IR_LOG_LSRA_SPLIT(ival, pos) do { \
2175 if (ctx->flags & IR_DEBUG_RA) { \
2176 ir_live_interval *_ival = (ival); \
2177 ir_live_pos _start = _ival->range.start; \
2178 ir_live_pos _end = _ival->end; \
2179 ir_live_pos _pos = (pos); \
2180 fprintf(stderr, " ---- Split R%d [%d.%d...%d.%d) at %d.%d\n", \
2181 (_ival->flags & IR_LIVE_INTERVAL_TEMP) ? 0 : _ival->vreg, \
2182 IR_LIVE_POS_TO_REF(_start), IR_LIVE_POS_TO_SUB_REF(_start), \
2183 IR_LIVE_POS_TO_REF(_end), IR_LIVE_POS_TO_SUB_REF(_end), \
2184 IR_LIVE_POS_TO_REF(_pos), IR_LIVE_POS_TO_SUB_REF(_pos)); \
2185 } \
2186 } while (0)
2187 # define IR_LOG_LSRA_CONFLICT(action, ival, pos) do { \
2188 if (ctx->flags & IR_DEBUG_RA) { \
2189 ir_live_interval *_ival = (ival); \
2190 ir_live_pos _start = _ival->range.start; \
2191 ir_live_pos _end = _ival->end; \
2192 ir_live_pos _pos = (pos); \
2193 fprintf(stderr, action " R%d [%d.%d...%d.%d) assigned to %s at %d.%d\n", \
2194 (_ival->flags & IR_LIVE_INTERVAL_TEMP) ? 0 : _ival->vreg, \
2195 IR_LIVE_POS_TO_REF(_start), IR_LIVE_POS_TO_SUB_REF(_start), \
2196 IR_LIVE_POS_TO_REF(_end), IR_LIVE_POS_TO_SUB_REF(_end), \
2197 ir_reg_name(_ival->reg, _ival->type), \
2198 IR_LIVE_POS_TO_REF(_pos), IR_LIVE_POS_TO_SUB_REF(_pos)); \
2199 } \
2200 } while (0)
2201 #else
2202 # define IR_LOG_LSRA(action, ival, comment)
2203 # define IR_LOG_LSRA_ASSIGN(action, ival, comment)
2204 # define IR_LOG_LSRA_SPLIT(ival, pos)
2205 # define IR_LOG_LSRA_CONFLICT(action, ival, pos);
2206 #endif
2207
ir_ival_covers(ir_live_interval * ival,ir_live_pos position)2208 static bool ir_ival_covers(ir_live_interval *ival, ir_live_pos position)
2209 {
2210 ir_live_range *live_range = &ival->range;
2211
2212 do {
2213 if (position < live_range->end) {
2214 return position >= live_range->start;
2215 }
2216 live_range = live_range->next;
2217 } while (live_range);
2218
2219 return 0;
2220 }
2221
ir_ival_has_hole_between(ir_live_interval * ival,ir_live_pos from,ir_live_pos to)2222 static bool ir_ival_has_hole_between(ir_live_interval *ival, ir_live_pos from, ir_live_pos to)
2223 {
2224 ir_live_range *r = &ival->range;
2225
2226 while (r) {
2227 if (from < r->start) {
2228 return 1;
2229 } else if (to <= r->end) {
2230 return 0;
2231 }
2232 r = r->next;
2233 }
2234 return 0;
2235 }
2236
2237
ir_last_use_pos_before(ir_live_interval * ival,ir_live_pos pos,uint8_t flags)2238 static ir_live_pos ir_last_use_pos_before(ir_live_interval *ival, ir_live_pos pos, uint8_t flags)
2239 {
2240 ir_live_pos ret = 0;
2241 ir_use_pos *p = ival->use_pos;
2242
2243 while (p && p->pos <= pos) {
2244 if (p->flags & flags) {
2245 ret = p->pos;
2246 }
2247 p = p->next;
2248 }
2249 return ret;
2250 }
2251
ir_first_use_pos_after(ir_live_interval * ival,ir_live_pos pos,uint8_t flags)2252 static ir_live_pos ir_first_use_pos_after(ir_live_interval *ival, ir_live_pos pos, uint8_t flags)
2253 {
2254 ir_use_pos *p = ival->use_pos;
2255
2256 while (p && p->pos <= pos) {
2257 p = p->next;
2258 }
2259 while (p && !(p->flags & flags)) {
2260 p = p->next;
2261 }
2262 return p ? p->pos : 0x7fffffff;
2263 }
2264
ir_first_use_pos(ir_live_interval * ival,uint8_t flags)2265 static ir_live_pos ir_first_use_pos(ir_live_interval *ival, uint8_t flags)
2266 {
2267 ir_use_pos *p = ival->use_pos;
2268
2269 while (p && !(p->flags & flags)) {
2270 p = p->next;
2271 }
2272 return p ? p->pos : 0x7fffffff;
2273 }
2274
ir_block_from_live_pos(ir_ctx * ctx,ir_live_pos pos)2275 static ir_block *ir_block_from_live_pos(ir_ctx *ctx, ir_live_pos pos)
2276 {
2277 ir_ref ref = IR_LIVE_POS_TO_REF(pos);
2278 uint32_t b = ctx->cfg_map[ref];
2279
2280 while (!b) {
2281 ref--;
2282 IR_ASSERT(ref > 0);
2283 b = ctx->cfg_map[ref];
2284 }
2285 IR_ASSERT(b <= ctx->cfg_blocks_count);
2286 return &ctx->cfg_blocks[b];
2287 }
2288
ir_find_optimal_split_position(ir_ctx * ctx,ir_live_interval * ival,ir_live_pos min_pos,ir_live_pos max_pos,bool prefer_max)2289 static ir_live_pos ir_find_optimal_split_position(ir_ctx *ctx, ir_live_interval *ival, ir_live_pos min_pos, ir_live_pos max_pos, bool prefer_max)
2290 {
2291 ir_block *min_bb, *max_bb;
2292
2293 if (min_pos == max_pos) {
2294 return max_pos;
2295 }
2296
2297 IR_ASSERT(min_pos < max_pos);
2298 IR_ASSERT(min_pos >= ival->range.start);
2299 IR_ASSERT(max_pos < ival->end);
2300
2301 min_bb = ir_block_from_live_pos(ctx, min_pos);
2302 max_bb = ir_block_from_live_pos(ctx, max_pos);
2303
2304 if (min_bb == max_bb
2305 || ir_ival_has_hole_between(ival, min_pos, max_pos)) { // TODO: ???
2306 return (prefer_max) ? max_pos : min_pos;
2307 }
2308
2309 if (max_bb->loop_depth > 0) {
2310 /* Split at the end of the loop entry */
2311 do {
2312 ir_block *bb;
2313
2314 if (max_bb->flags & IR_BB_LOOP_HEADER) {
2315 bb = max_bb;
2316 } else {
2317 IR_ASSERT(max_bb->loop_header);
2318 bb = &ctx->cfg_blocks[max_bb->loop_header];
2319 }
2320 bb = &ctx->cfg_blocks[bb->idom];
2321 if (IR_DEF_LIVE_POS_FROM_REF(bb->end) < min_pos) {
2322 break;
2323 }
2324 max_bb = bb;
2325 } while (max_bb->loop_depth > 0);
2326
2327 if (IR_DEF_LIVE_POS_FROM_REF(max_bb->end) < max_pos) {
2328 return IR_DEF_LIVE_POS_FROM_REF(max_bb->end);
2329 }
2330 }
2331
2332 if (IR_LOAD_LIVE_POS_FROM_REF(max_bb->start) > min_pos) {
2333 return IR_LOAD_LIVE_POS_FROM_REF(max_bb->start);
2334 } else {
2335 // TODO: "min_bb" is in a deeper loop than "max_bb" ???
2336 return max_pos;
2337 }
2338 }
2339
ir_split_interval_at(ir_ctx * ctx,ir_live_interval * ival,ir_live_pos pos)2340 static ir_live_interval *ir_split_interval_at(ir_ctx *ctx, ir_live_interval *ival, ir_live_pos pos)
2341 {
2342 ir_live_interval *child;
2343 ir_live_range *p, *prev;
2344 ir_use_pos *use_pos, *prev_use_pos;
2345
2346 IR_LOG_LSRA_SPLIT(ival, pos);
2347 IR_ASSERT(pos > ival->range.start);
2348 ctx->flags2 |= IR_RA_HAVE_SPLITS;
2349
2350 p = &ival->range;
2351 prev = NULL;
2352 while (p && pos >= p->end) {
2353 prev = p;
2354 p = prev->next;
2355 }
2356 IR_ASSERT(p);
2357
2358 if (pos < p->start) {
2359 /* split between ranges */
2360 pos = p->start;
2361 }
2362
2363 use_pos = ival->use_pos;
2364 prev_use_pos = NULL;
2365
2366 ival->flags &= ~(IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS);
2367 if (p->start == pos) {
2368 while (use_pos && pos > use_pos->pos) {
2369 if (use_pos->hint != IR_REG_NONE) {
2370 ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REGS;
2371 }
2372 if (use_pos->hint_ref > 0) {
2373 ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REFS;
2374 }
2375 prev_use_pos = use_pos;
2376 use_pos = use_pos->next;
2377 }
2378 } else {
2379 while (use_pos && pos >= use_pos->pos) {
2380 if (use_pos->hint != IR_REG_NONE) {
2381 ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REGS;
2382 }
2383 if (use_pos->hint_ref > 0) {
2384 ival->flags |= IR_LIVE_INTERVAL_HAS_HINT_REFS;
2385 }
2386 prev_use_pos = use_pos;
2387 use_pos = use_pos->next;
2388 }
2389 }
2390
2391 child = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval));
2392 child->type = ival->type;
2393 child->reg = IR_REG_NONE;
2394 child->flags = IR_LIVE_INTERVAL_SPLIT_CHILD;
2395 child->vreg = ival->vreg;
2396 child->stack_spill_pos = -1; // not allocated
2397 child->range.start = pos;
2398 child->range.end = p->end;
2399 child->range.next = p->next;
2400 child->end = ival->end;
2401 child->use_pos = prev_use_pos ? prev_use_pos->next : use_pos;
2402
2403 child->next = ival->next;
2404 ival->next = child;
2405
2406 if (pos == p->start) {
2407 prev->next = NULL;
2408 ival->end = prev->end;
2409 /* Cache to reuse */
2410 p->next = ctx->unused_ranges;
2411 ctx->unused_ranges = p;
2412 } else {
2413 p->end = ival->end = pos;
2414 p->next = NULL;
2415 }
2416 if (prev_use_pos) {
2417 prev_use_pos->next = NULL;
2418 } else {
2419 ival->use_pos = NULL;
2420 }
2421
2422 use_pos = child->use_pos;
2423 while (use_pos) {
2424 if (use_pos->hint != IR_REG_NONE) {
2425 child->flags |= IR_LIVE_INTERVAL_HAS_HINT_REGS;
2426 }
2427 if (use_pos->hint_ref > 0) {
2428 child->flags |= IR_LIVE_INTERVAL_HAS_HINT_REFS;
2429 }
2430 use_pos = use_pos->next;
2431 }
2432
2433 return child;
2434 }
2435
ir_allocate_spill_slot(ir_ctx * ctx,ir_type type,ir_reg_alloc_data * data)2436 int32_t ir_allocate_spill_slot(ir_ctx *ctx, ir_type type, ir_reg_alloc_data *data)
2437 {
2438 int32_t ret;
2439 uint8_t size = ir_type_size[type];
2440
2441 IR_ASSERT(size == 1 || size == 2 || size == 4 || size == 8);
2442 if (data->handled && data->handled[size]) {
2443 ret = data->handled[size]->stack_spill_pos;
2444 data->handled[size] = data->handled[size]->list_next;
2445 } else if (size == 8) {
2446 ret = ctx->stack_frame_size;
2447 ctx->stack_frame_size += 8;
2448 } else if (size == 4) {
2449 if (data->unused_slot_4) {
2450 ret = data->unused_slot_4;
2451 data->unused_slot_4 = 0;
2452 } else if (data->handled && data->handled[8]) {
2453 ret = data->handled[8]->stack_spill_pos;
2454 data->handled[8] = data->handled[8]->list_next;
2455 data->unused_slot_4 = ret + 4;
2456 } else {
2457 ret = ctx->stack_frame_size;
2458 if (sizeof(void*) == 8) {
2459 data->unused_slot_4 = ctx->stack_frame_size + 4;
2460 ctx->stack_frame_size += 8;
2461 } else {
2462 ctx->stack_frame_size += 4;
2463 }
2464 }
2465 } else if (size == 2) {
2466 if (data->unused_slot_2) {
2467 ret = data->unused_slot_2;
2468 data->unused_slot_2 = 0;
2469 } else if (data->unused_slot_4) {
2470 ret = data->unused_slot_4;
2471 data->unused_slot_2 = data->unused_slot_4 + 2;
2472 data->unused_slot_4 = 0;
2473 } else if (data->handled && data->handled[4]) {
2474 ret = data->handled[4]->stack_spill_pos;
2475 data->handled[4] = data->handled[4]->list_next;
2476 data->unused_slot_2 = ret + 2;
2477 } else if (data->handled && data->handled[8]) {
2478 ret = data->handled[8]->stack_spill_pos;
2479 data->handled[8] = data->handled[8]->list_next;
2480 data->unused_slot_2 = ret + 2;
2481 data->unused_slot_4 = ret + 4;
2482 } else {
2483 ret = ctx->stack_frame_size;
2484 data->unused_slot_2 = ctx->stack_frame_size + 2;
2485 if (sizeof(void*) == 8) {
2486 data->unused_slot_4 = ctx->stack_frame_size + 4;
2487 ctx->stack_frame_size += 8;
2488 } else {
2489 ctx->stack_frame_size += 4;
2490 }
2491 }
2492 } else {
2493 IR_ASSERT(size == 1);
2494 if (data->unused_slot_1) {
2495 ret = data->unused_slot_1;
2496 data->unused_slot_1 = 0;
2497 } else if (data->unused_slot_2) {
2498 ret = data->unused_slot_2;
2499 data->unused_slot_1 = data->unused_slot_2 + 1;
2500 data->unused_slot_2 = 0;
2501 } else if (data->unused_slot_4) {
2502 ret = data->unused_slot_4;
2503 data->unused_slot_1 = data->unused_slot_4 + 1;
2504 data->unused_slot_2 = data->unused_slot_4 + 2;
2505 data->unused_slot_4 = 0;
2506 } else if (data->handled && data->handled[2]) {
2507 ret = data->handled[2]->stack_spill_pos;
2508 data->handled[2] = data->handled[2]->list_next;
2509 data->unused_slot_1 = ret + 1;
2510 } else if (data->handled && data->handled[4]) {
2511 ret = data->handled[4]->stack_spill_pos;
2512 data->handled[4] = data->handled[4]->list_next;
2513 data->unused_slot_1 = ret + 1;
2514 data->unused_slot_2 = ret + 2;
2515 } else if (data->handled && data->handled[8]) {
2516 ret = data->handled[8]->stack_spill_pos;
2517 data->handled[8] = data->handled[8]->list_next;
2518 data->unused_slot_1 = ret + 1;
2519 data->unused_slot_2 = ret + 2;
2520 data->unused_slot_4 = ret + 4;
2521 } else {
2522 ret = ctx->stack_frame_size;
2523 data->unused_slot_1 = ctx->stack_frame_size + 1;
2524 data->unused_slot_2 = ctx->stack_frame_size + 2;
2525 if (sizeof(void*) == 8) {
2526 data->unused_slot_4 = ctx->stack_frame_size + 4;
2527 ctx->stack_frame_size += 8;
2528 } else {
2529 ctx->stack_frame_size += 4;
2530 }
2531 }
2532 }
2533 return ret;
2534 }
2535
ir_get_first_reg_hint(ir_ctx * ctx,ir_live_interval * ival,ir_regset available)2536 static ir_reg ir_get_first_reg_hint(ir_ctx *ctx, ir_live_interval *ival, ir_regset available)
2537 {
2538 ir_use_pos *use_pos;
2539 ir_reg reg;
2540
2541 use_pos = ival->use_pos;
2542 while (use_pos) {
2543 reg = use_pos->hint;
2544 if (reg >= 0 && IR_REGSET_IN(available, reg)) {
2545 return reg;
2546 }
2547 use_pos = use_pos->next;
2548 }
2549
2550 return IR_REG_NONE;
2551 }
2552
ir_try_allocate_preferred_reg(ir_ctx * ctx,ir_live_interval * ival,ir_regset available,ir_live_pos * freeUntilPos)2553 static ir_reg ir_try_allocate_preferred_reg(ir_ctx *ctx, ir_live_interval *ival, ir_regset available, ir_live_pos *freeUntilPos)
2554 {
2555 ir_use_pos *use_pos;
2556 ir_reg reg;
2557
2558 if (ival->flags & IR_LIVE_INTERVAL_HAS_HINT_REGS) {
2559 use_pos = ival->use_pos;
2560 while (use_pos) {
2561 reg = use_pos->hint;
2562 if (reg >= 0 && IR_REGSET_IN(available, reg)) {
2563 if (ival->end <= freeUntilPos[reg]) {
2564 /* register available for the whole interval */
2565 return reg;
2566 }
2567 }
2568 use_pos = use_pos->next;
2569 }
2570 }
2571
2572 if (ival->flags & IR_LIVE_INTERVAL_HAS_HINT_REFS) {
2573 use_pos = ival->use_pos;
2574 while (use_pos) {
2575 if (use_pos->hint_ref > 0) {
2576 reg = ctx->live_intervals[ctx->vregs[use_pos->hint_ref]]->reg;
2577 if (reg >= 0 && IR_REGSET_IN(available, reg)) {
2578 if (ival->end <= freeUntilPos[reg]) {
2579 /* register available for the whole interval */
2580 return reg;
2581 }
2582 }
2583 }
2584 use_pos = use_pos->next;
2585 }
2586 }
2587
2588 return IR_REG_NONE;
2589 }
2590
ir_get_preferred_reg(ir_ctx * ctx,ir_live_interval * ival,ir_regset available)2591 static ir_reg ir_get_preferred_reg(ir_ctx *ctx, ir_live_interval *ival, ir_regset available)
2592 {
2593 ir_use_pos *use_pos;
2594 ir_reg reg;
2595
2596 use_pos = ival->use_pos;
2597 while (use_pos) {
2598 reg = use_pos->hint;
2599 if (reg >= 0 && IR_REGSET_IN(available, reg)) {
2600 return reg;
2601 } else if (use_pos->hint_ref > 0) {
2602 reg = ctx->live_intervals[ctx->vregs[use_pos->hint_ref]]->reg;
2603 if (reg >= 0 && IR_REGSET_IN(available, reg)) {
2604 return reg;
2605 }
2606 }
2607 use_pos = use_pos->next;
2608 }
2609
2610 return IR_REG_NONE;
2611 }
2612
ir_add_to_unhandled(ir_live_interval ** unhandled,ir_live_interval * ival)2613 static void ir_add_to_unhandled(ir_live_interval **unhandled, ir_live_interval *ival)
2614 {
2615 ir_live_pos pos = ival->range.start;
2616
2617 if (*unhandled == NULL
2618 || pos < (*unhandled)->range.start
2619 || (pos == (*unhandled)->range.start
2620 && (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS))
2621 && !((*unhandled)->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)))
2622 || (pos == (*unhandled)->range.start
2623 && ival->vreg > (*unhandled)->vreg)) {
2624 ival->list_next = *unhandled;
2625 *unhandled = ival;
2626 } else {
2627 ir_live_interval *prev = *unhandled;
2628
2629 while (prev->list_next) {
2630 if (pos < prev->list_next->range.start
2631 || (pos == prev->list_next->range.start
2632 && (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS))
2633 && !(prev->list_next->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)))
2634 || (pos == prev->list_next->range.start
2635 && ival->vreg > prev->list_next->vreg)) {
2636 break;
2637 }
2638 prev = prev->list_next;
2639 }
2640 ival->list_next = prev->list_next;
2641 prev->list_next = ival;
2642 }
2643 }
2644
2645 /* merge sorted lists */
ir_merge_to_unhandled(ir_live_interval ** unhandled,ir_live_interval * ival)2646 static void ir_merge_to_unhandled(ir_live_interval **unhandled, ir_live_interval *ival)
2647 {
2648 ir_live_interval **prev;
2649 ir_live_pos pos;
2650
2651 if (*unhandled == NULL) {
2652 *unhandled = ival;
2653 while (ival) {
2654 ival = ival->list_next = ival->next;
2655 }
2656 } else {
2657 prev = unhandled;
2658 while (ival) {
2659 pos = ival->range.start;
2660 while (*prev && pos >= (*prev)->range.start) {
2661 prev = &(*prev)->list_next;
2662 }
2663 ival->list_next = *prev;
2664 *prev = ival;
2665 prev = &ival->list_next;
2666 ival = ival->next;
2667 }
2668 }
2669 #if IR_DEBUG
2670 ival = *unhandled;
2671 pos = 0;
2672
2673 while (ival) {
2674 IR_ASSERT(ival->range.start >= pos);
2675 pos = ival->range.start;
2676 ival = ival->list_next;
2677 }
2678 #endif
2679 }
2680
ir_add_to_unhandled_spill(ir_live_interval ** unhandled,ir_live_interval * ival)2681 static void ir_add_to_unhandled_spill(ir_live_interval **unhandled, ir_live_interval *ival)
2682 {
2683 ir_live_pos pos = ival->range.start;
2684
2685 if (*unhandled == NULL
2686 || pos <= (*unhandled)->range.start) {
2687 ival->list_next = *unhandled;
2688 *unhandled = ival;
2689 } else {
2690 ir_live_interval *prev = *unhandled;
2691
2692 while (prev->list_next) {
2693 if (pos <= prev->list_next->range.start) {
2694 break;
2695 }
2696 prev = prev->list_next;
2697 }
2698 ival->list_next = prev->list_next;
2699 prev->list_next = ival;
2700 }
2701 }
2702
ir_try_allocate_free_reg(ir_ctx * ctx,ir_live_interval * ival,ir_live_interval ** active,ir_live_interval * inactive,ir_live_interval ** unhandled)2703 static ir_reg ir_try_allocate_free_reg(ir_ctx *ctx, ir_live_interval *ival, ir_live_interval **active, ir_live_interval *inactive, ir_live_interval **unhandled)
2704 {
2705 ir_live_pos freeUntilPos[IR_REG_NUM];
2706 int i, reg;
2707 ir_live_pos pos, next;
2708 ir_live_interval *other;
2709 ir_regset available, overlapped, scratch;
2710
2711 if (IR_IS_TYPE_FP(ival->type)) {
2712 available = IR_REGSET_FP;
2713 /* set freeUntilPos of all physical registers to maxInt */
2714 for (i = IR_REG_FP_FIRST; i <= IR_REG_FP_LAST; i++) {
2715 freeUntilPos[i] = 0x7fffffff;
2716 }
2717 } else {
2718 available = IR_REGSET_GP;
2719 if (ctx->flags & IR_USE_FRAME_POINTER) {
2720 IR_REGSET_EXCL(available, IR_REG_FRAME_POINTER);
2721 }
2722 #if defined(IR_TARGET_X86)
2723 if (ir_type_size[ival->type] == 1) {
2724 /* TODO: if no registers avialivle, we may use of one this register for already allocated interval ??? */
2725 IR_REGSET_EXCL(available, IR_REG_RBP);
2726 IR_REGSET_EXCL(available, IR_REG_RSI);
2727 IR_REGSET_EXCL(available, IR_REG_RDI);
2728 }
2729 #endif
2730 /* set freeUntilPos of all physical registers to maxInt */
2731 for (i = IR_REG_GP_FIRST; i <= IR_REG_GP_LAST; i++) {
2732 freeUntilPos[i] = 0x7fffffff;
2733 }
2734 }
2735
2736 available = IR_REGSET_DIFFERENCE(available, (ir_regset)ctx->fixed_regset);
2737
2738 /* for each interval it in active */
2739 other = *active;
2740 while (other) {
2741 /* freeUntilPos[it.reg] = 0 */
2742 reg = other->reg;
2743 IR_ASSERT(reg >= 0);
2744 if (reg >= IR_REG_SCRATCH) {
2745 if (reg == IR_REG_SCRATCH) {
2746 available = IR_REGSET_DIFFERENCE(available, IR_REGSET_SCRATCH);
2747 } else {
2748 IR_ASSERT(reg == IR_REG_ALL);
2749 available = IR_REGSET_EMPTY;
2750 }
2751 } else {
2752 IR_REGSET_EXCL(available, reg);
2753 }
2754 other = other->list_next;
2755 }
2756
2757 /* for each interval it in inactive intersecting with current
2758 *
2759 * This loop is not necessary for program in SSA form (see LSRA on SSA fig. 6),
2760 * but it is still necessary after coalescing and splitting
2761 */
2762 overlapped = IR_REGSET_EMPTY;
2763 other = inactive;
2764 pos = ival->end;
2765 while (other) {
2766 /* freeUntilPos[it.reg] = next intersection of it with current */
2767 if (other->current_range->start < pos) {
2768 next = ir_ivals_overlap(&ival->range, other->current_range);
2769 if (next) {
2770 reg = other->reg;
2771 IR_ASSERT(reg >= 0);
2772 if (reg >= IR_REG_SCRATCH) {
2773 ir_regset regset;
2774
2775 if (reg == IR_REG_SCRATCH) {
2776 regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH);
2777 } else {
2778 IR_ASSERT(reg == IR_REG_ALL);
2779 regset = available;
2780 }
2781 overlapped = IR_REGSET_UNION(overlapped, regset);
2782 IR_REGSET_FOREACH(regset, reg) {
2783 if (next < freeUntilPos[reg]) {
2784 freeUntilPos[reg] = next;
2785 }
2786 } IR_REGSET_FOREACH_END();
2787 } else if (IR_REGSET_IN(available, reg)) {
2788 IR_REGSET_INCL(overlapped, reg);
2789 if (next < freeUntilPos[reg]) {
2790 freeUntilPos[reg] = next;
2791 }
2792 }
2793 }
2794 }
2795 other = other->list_next;
2796 }
2797
2798 available = IR_REGSET_DIFFERENCE(available, overlapped);
2799 if (available != IR_REGSET_EMPTY) {
2800
2801 if (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)) {
2802 /* Try to use hint */
2803 reg = ir_try_allocate_preferred_reg(ctx, ival, available, freeUntilPos);
2804 if (reg != IR_REG_NONE) {
2805 ival->reg = reg;
2806 IR_LOG_LSRA_ASSIGN(" ---- Assign", ival, " (hint available without spilling)");
2807 if (*unhandled && ival->end > (*unhandled)->range.start) {
2808 ival->list_next = *active;
2809 *active = ival;
2810 }
2811 return reg;
2812 }
2813 }
2814
2815 if (ival->flags & IR_LIVE_INTERVAL_SPLIT_CHILD) {
2816 /* Try to reuse the register previously allocated for splited interval */
2817 reg = ctx->live_intervals[ival->vreg]->reg;
2818 if (reg >= 0 && IR_REGSET_IN(available, reg)) {
2819 ival->reg = reg;
2820 IR_LOG_LSRA_ASSIGN(" ---- Assign", ival, " (available without spilling)");
2821 if (*unhandled && ival->end > (*unhandled)->range.start) {
2822 ival->list_next = *active;
2823 *active = ival;
2824 }
2825 return reg;
2826 }
2827 }
2828
2829 /* prefer caller-saved registers to avoid save/restore in prologue/epilogue */
2830 scratch = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH);
2831 if (scratch != IR_REGSET_EMPTY) {
2832 /* prefer registers that don't conflict with the hints for the following unhandled intervals */
2833 if (1) {
2834 ir_regset non_conflicting = scratch;
2835
2836 other = *unhandled;
2837 while (other && other->range.start < ival->range.end) {
2838 if (other->flags & IR_LIVE_INTERVAL_HAS_HINT_REGS) {
2839 reg = ir_get_first_reg_hint(ctx, other, non_conflicting);
2840
2841 if (reg >= 0) {
2842 IR_REGSET_EXCL(non_conflicting, reg);
2843 if (non_conflicting == IR_REGSET_EMPTY) {
2844 break;
2845 }
2846 }
2847 }
2848 other = other->list_next;
2849 }
2850 if (non_conflicting != IR_REGSET_EMPTY) {
2851 reg = IR_REGSET_FIRST(non_conflicting);
2852 } else {
2853 reg = IR_REGSET_FIRST(scratch);
2854 }
2855 } else {
2856 reg = IR_REGSET_FIRST(scratch);
2857 }
2858 } else {
2859 reg = IR_REGSET_FIRST(available);
2860 }
2861 ival->reg = reg;
2862 IR_LOG_LSRA_ASSIGN(" ---- Assign", ival, " (available without spilling)");
2863 if (*unhandled && ival->end > (*unhandled)->range.start) {
2864 ival->list_next = *active;
2865 *active = ival;
2866 }
2867 return reg;
2868 }
2869
2870 /* reg = register with highest freeUntilPos */
2871 reg = IR_REG_NONE;
2872 pos = 0;
2873 IR_REGSET_FOREACH(overlapped, i) {
2874 if (freeUntilPos[i] > pos) {
2875 pos = freeUntilPos[i];
2876 reg = i;
2877 } else if (freeUntilPos[i] == pos
2878 && !IR_REGSET_IN(IR_REGSET_SCRATCH, reg)
2879 && IR_REGSET_IN(IR_REGSET_SCRATCH, i)) {
2880 /* prefer caller-saved registers to avoid save/restore in prologue/epilogue */
2881 pos = freeUntilPos[i];
2882 reg = i;
2883 }
2884 } IR_REGSET_FOREACH_END();
2885
2886 if (pos > ival->range.start) {
2887 /* register available for the first part of the interval */
2888 /* split current before freeUntilPos[reg] */
2889 ir_live_pos split_pos = ir_last_use_pos_before(ival, pos,
2890 IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG);
2891 if (split_pos > ival->range.start) {
2892 split_pos = ir_find_optimal_split_position(ctx, ival, split_pos, pos, 0);
2893 other = ir_split_interval_at(ctx, ival, split_pos);
2894 if (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)) {
2895 ir_reg pref_reg = ir_try_allocate_preferred_reg(ctx, ival, IR_REGSET_UNION(available, overlapped), freeUntilPos);
2896
2897 if (pref_reg != IR_REG_NONE) {
2898 ival->reg = pref_reg;
2899 } else {
2900 ival->reg = reg;
2901 }
2902 } else {
2903 ival->reg = reg;
2904 }
2905 IR_LOG_LSRA_ASSIGN(" ---- Assign", ival, " (available without spilling for the first part)");
2906 if (*unhandled && ival->end > (*unhandled)->range.start) {
2907 ival->list_next = *active;
2908 *active = ival;
2909 }
2910 ir_add_to_unhandled(unhandled, other);
2911 IR_LOG_LSRA(" ---- Queue", other, "");
2912 return reg;
2913 }
2914 }
2915 return IR_REG_NONE;
2916 }
2917
ir_allocate_blocked_reg(ir_ctx * ctx,ir_live_interval * ival,ir_live_interval ** active,ir_live_interval ** inactive,ir_live_interval ** unhandled)2918 static ir_reg ir_allocate_blocked_reg(ir_ctx *ctx, ir_live_interval *ival, ir_live_interval **active, ir_live_interval **inactive, ir_live_interval **unhandled)
2919 {
2920 ir_live_pos nextUsePos[IR_REG_NUM];
2921 ir_live_pos blockPos[IR_REG_NUM];
2922 int i, reg;
2923 ir_live_pos pos, next_use_pos;
2924 ir_live_interval *other, *prev;
2925 ir_use_pos *use_pos;
2926 ir_regset available, tmp_regset;
2927
2928 if (!(ival->flags & IR_LIVE_INTERVAL_TEMP)) {
2929 use_pos = ival->use_pos;
2930 while (use_pos && !(use_pos->flags & IR_USE_MUST_BE_IN_REG)) {
2931 use_pos = use_pos->next;
2932 }
2933 if (!use_pos) {
2934 /* spill */
2935 IR_LOG_LSRA(" ---- Spill", ival, " (no use pos that must be in reg)");
2936 ctx->flags2 |= IR_RA_HAVE_SPILLS;
2937 return IR_REG_NONE;
2938 }
2939 next_use_pos = use_pos->pos;
2940 } else {
2941 next_use_pos = ival->range.end;
2942 }
2943
2944 if (IR_IS_TYPE_FP(ival->type)) {
2945 available = IR_REGSET_FP;
2946 /* set nextUsePos of all physical registers to maxInt */
2947 for (i = IR_REG_FP_FIRST; i <= IR_REG_FP_LAST; i++) {
2948 nextUsePos[i] = 0x7fffffff;
2949 blockPos[i] = 0x7fffffff;
2950 }
2951 } else {
2952 available = IR_REGSET_GP;
2953 if (ctx->flags & IR_USE_FRAME_POINTER) {
2954 IR_REGSET_EXCL(available, IR_REG_FRAME_POINTER);
2955 }
2956 #if defined(IR_TARGET_X86)
2957 if (ir_type_size[ival->type] == 1) {
2958 /* TODO: if no registers avialivle, we may use of one this register for already allocated interval ??? */
2959 IR_REGSET_EXCL(available, IR_REG_RBP);
2960 IR_REGSET_EXCL(available, IR_REG_RSI);
2961 IR_REGSET_EXCL(available, IR_REG_RDI);
2962 }
2963 #endif
2964 /* set nextUsePos of all physical registers to maxInt */
2965 for (i = IR_REG_GP_FIRST; i <= IR_REG_GP_LAST; i++) {
2966 nextUsePos[i] = 0x7fffffff;
2967 blockPos[i] = 0x7fffffff;
2968 }
2969 }
2970
2971 available = IR_REGSET_DIFFERENCE(available, (ir_regset)ctx->fixed_regset);
2972
2973 if (IR_REGSET_IS_EMPTY(available)) {
2974 fprintf(stderr, "LSRA Internal Error: No registers available. Allocation is not possible\n");
2975 IR_ASSERT(0);
2976 exit(-1);
2977 }
2978
2979 /* for each interval it in active */
2980 other = *active;
2981 while (other) {
2982 /* nextUsePos[it.reg] = next use of it after start of current */
2983 reg = other->reg;
2984 IR_ASSERT(reg >= 0);
2985 if (reg >= IR_REG_SCRATCH) {
2986 ir_regset regset;
2987
2988 if (reg == IR_REG_SCRATCH) {
2989 regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH);
2990 } else {
2991 IR_ASSERT(reg == IR_REG_ALL);
2992 regset = available;
2993 }
2994 IR_REGSET_FOREACH(regset, reg) {
2995 blockPos[reg] = nextUsePos[reg] = 0;
2996 } IR_REGSET_FOREACH_END();
2997 } else if (IR_REGSET_IN(available, reg)) {
2998 if (other->flags & (IR_LIVE_INTERVAL_FIXED|IR_LIVE_INTERVAL_TEMP)) {
2999 blockPos[reg] = nextUsePos[reg] = 0;
3000 } else {
3001 pos = ir_first_use_pos_after(other, ival->range.start,
3002 IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG);
3003 if (pos < nextUsePos[reg]) {
3004 nextUsePos[reg] = pos;
3005 }
3006 }
3007 }
3008 other = other->list_next;
3009 }
3010
3011 /* for each interval it in inactive intersecting with current */
3012 other = *inactive;
3013 while (other) {
3014 /* freeUntilPos[it.reg] = next intersection of it with current */
3015 reg = other->reg;
3016 IR_ASSERT(reg >= 0);
3017 if (reg >= IR_REG_SCRATCH) {
3018 ir_live_pos overlap = ir_ivals_overlap(&ival->range, other->current_range);
3019
3020 if (overlap) {
3021 ir_regset regset;
3022
3023 if (reg == IR_REG_SCRATCH) {
3024 regset = IR_REGSET_INTERSECTION(available, IR_REGSET_SCRATCH);
3025 } else {
3026 IR_ASSERT(reg == IR_REG_ALL);
3027 regset = available;
3028 }
3029 IR_REGSET_FOREACH(regset, reg) {
3030 if (overlap < nextUsePos[reg]) {
3031 nextUsePos[reg] = overlap;
3032 }
3033 if (overlap < blockPos[reg]) {
3034 blockPos[reg] = overlap;
3035 }
3036 } IR_REGSET_FOREACH_END();
3037 }
3038 } else if (IR_REGSET_IN(available, reg)) {
3039 ir_live_pos overlap = ir_ivals_overlap(&ival->range, other->current_range);
3040
3041 if (overlap) {
3042 if (other->flags & (IR_LIVE_INTERVAL_FIXED|IR_LIVE_INTERVAL_TEMP)) {
3043 if (overlap < nextUsePos[reg]) {
3044 nextUsePos[reg] = overlap;
3045 }
3046 if (overlap < blockPos[reg]) {
3047 blockPos[reg] = overlap;
3048 }
3049 } else {
3050 pos = ir_first_use_pos_after(other, ival->range.start,
3051 IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG);
3052 if (pos < nextUsePos[reg]) {
3053 nextUsePos[reg] = pos;
3054 }
3055 }
3056 }
3057 }
3058 other = other->list_next;
3059 }
3060
3061 /* register hinting */
3062 reg = IR_REG_NONE;
3063 if (ival->flags & (IR_LIVE_INTERVAL_HAS_HINT_REGS|IR_LIVE_INTERVAL_HAS_HINT_REFS)) {
3064 reg = ir_get_preferred_reg(ctx, ival, available);
3065 }
3066 if (reg == IR_REG_NONE) {
3067 select_register:
3068 reg = IR_REGSET_FIRST(available);
3069 }
3070
3071 /* reg = register with highest nextUsePos */
3072 pos = nextUsePos[reg];
3073 tmp_regset = available;
3074 IR_REGSET_EXCL(tmp_regset, reg);
3075 IR_REGSET_FOREACH(tmp_regset, i) {
3076 if (nextUsePos[i] > pos) {
3077 pos = nextUsePos[i];
3078 reg = i;
3079 }
3080 } IR_REGSET_FOREACH_END();
3081
3082 /* if first usage of current is after nextUsePos[reg] then */
3083 if (next_use_pos > pos && !(ival->flags & IR_LIVE_INTERVAL_TEMP)) {
3084 /* all other intervals are used before current, so it is best to spill current itself */
3085 /* assign spill slot to current */
3086 /* split current before its first use position that requires a register */
3087 ir_live_pos split_pos;
3088
3089 spill_current:
3090 if (next_use_pos == ival->range.start) {
3091 IR_ASSERT(ival->use_pos && ival->use_pos->op_num == 0);
3092 /* split right after definition */
3093 split_pos = next_use_pos + 1;
3094 } else {
3095 split_pos = ir_find_optimal_split_position(ctx, ival, ival->range.start, next_use_pos - 1, 1);
3096 }
3097
3098 if (split_pos > ival->range.start) {
3099 IR_LOG_LSRA(" ---- Conflict with others", ival, " (all others are used before)");
3100 other = ir_split_interval_at(ctx, ival, split_pos);
3101 IR_LOG_LSRA(" ---- Spill", ival, "");
3102 ir_add_to_unhandled(unhandled, other);
3103 IR_LOG_LSRA(" ---- Queue", other, "");
3104 return IR_REG_NONE;
3105 }
3106 }
3107
3108 if (ival->end > blockPos[reg]) {
3109 /* spilling make a register free only for the first part of current */
3110 IR_LOG_LSRA(" ---- Conflict with others", ival, " (spilling make a register free only for the first part)");
3111 /* split current at optimal position before block_pos[reg] */
3112 ir_live_pos split_pos = ir_last_use_pos_before(ival, blockPos[reg] + 1,
3113 IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG);
3114 if (split_pos == 0) {
3115 split_pos = ir_first_use_pos_after(ival, blockPos[reg],
3116 IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG) - 1;
3117 other = ir_split_interval_at(ctx, ival, split_pos);
3118 ir_add_to_unhandled(unhandled, other);
3119 IR_LOG_LSRA(" ---- Queue", other, "");
3120 return IR_REG_NONE;
3121 }
3122 if (split_pos >= blockPos[reg]) {
3123 try_next_available_register:
3124 IR_REGSET_EXCL(available, reg);
3125 if (IR_REGSET_IS_EMPTY(available)) {
3126 fprintf(stderr, "LSRA Internal Error: Unsolvable conflict. Allocation is not possible\n");
3127 IR_ASSERT(0);
3128 exit(-1);
3129 }
3130 IR_LOG_LSRA(" ---- Restart", ival, "");
3131 goto select_register;
3132 }
3133 split_pos = ir_find_optimal_split_position(ctx, ival, split_pos, blockPos[reg], 1);
3134 other = ir_split_interval_at(ctx, ival, split_pos);
3135 ir_add_to_unhandled(unhandled, other);
3136 IR_LOG_LSRA(" ---- Queue", other, "");
3137 }
3138
3139 /* spill intervals that currently block reg */
3140 prev = NULL;
3141 other = *active;
3142 while (other) {
3143 ir_live_pos split_pos;
3144
3145 if (reg == other->reg) {
3146 /* split active interval for reg at position */
3147 ir_live_pos overlap = ir_ivals_overlap(&ival->range, other->current_range);
3148
3149 if (overlap) {
3150 ir_live_interval *child, *child2;
3151
3152 IR_ASSERT(other->type != IR_VOID);
3153 IR_LOG_LSRA_CONFLICT(" ---- Conflict with active", other, overlap);
3154
3155 split_pos = ir_last_use_pos_before(other, ival->range.start, IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG);
3156 if (split_pos == 0) {
3157 split_pos = ival->range.start;
3158 }
3159 split_pos = ir_find_optimal_split_position(ctx, other, split_pos, ival->range.start, 1);
3160 if (split_pos > other->range.start) {
3161 child = ir_split_interval_at(ctx, other, split_pos);
3162 if (prev) {
3163 prev->list_next = other->list_next;
3164 } else {
3165 *active = other->list_next;
3166 }
3167 IR_LOG_LSRA(" ---- Finish", other, "");
3168 } else {
3169 if (ir_first_use_pos(other, IR_USE_MUST_BE_IN_REG) <= other->end) {
3170 if (!(ival->flags & IR_LIVE_INTERVAL_TEMP)) {
3171 next_use_pos = ir_first_use_pos(ival, IR_USE_MUST_BE_IN_REG);
3172 if (next_use_pos == ival->range.start) {
3173 IR_ASSERT(ival->use_pos && ival->use_pos->op_num == 0);
3174 /* split right after definition */
3175 split_pos = next_use_pos + 1;
3176 } else {
3177 split_pos = ir_find_optimal_split_position(ctx, ival, ival->range.start, next_use_pos - 1, 1);
3178 }
3179
3180 if (split_pos > ival->range.start) {
3181 goto spill_current;
3182 }
3183 }
3184 goto try_next_available_register;
3185 }
3186 child = other;
3187 other->reg = IR_REG_NONE;
3188 if (prev) {
3189 prev->list_next = other->list_next;
3190 } else {
3191 *active = other->list_next;
3192 }
3193 IR_LOG_LSRA(" ---- Spill and Finish", other, " (it must not be in reg)");
3194 }
3195
3196 split_pos = ir_first_use_pos_after(child, ival->range.start, IR_USE_MUST_BE_IN_REG | IR_USE_SHOULD_BE_IN_REG) - 1; // TODO: ???
3197 if (split_pos > child->range.start && split_pos < child->end) {
3198 ir_live_pos opt_split_pos = ir_find_optimal_split_position(ctx, child, ival->range.start, split_pos, 1);
3199 if (opt_split_pos > child->range.start) {
3200 split_pos = opt_split_pos;
3201 }
3202 child2 = ir_split_interval_at(ctx, child, split_pos);
3203 IR_LOG_LSRA(" ---- Spill", child, "");
3204 ir_add_to_unhandled(unhandled, child2);
3205 IR_LOG_LSRA(" ---- Queue", child2, "");
3206 } else if (child != other) {
3207 // TODO: this may cause endless loop
3208 ir_add_to_unhandled(unhandled, child);
3209 IR_LOG_LSRA(" ---- Queue", child, "");
3210 }
3211 }
3212 break;
3213 }
3214 prev = other;
3215 other = other->list_next;
3216 }
3217
3218 /* split any inactive interval for reg at the end of its lifetime hole */
3219 other = *inactive;
3220 while (other) {
3221 /* freeUntilPos[it.reg] = next intersection of it with current */
3222 if (reg == other->reg) {
3223 ir_live_pos overlap = ir_ivals_overlap(&ival->range, other->current_range);
3224
3225 if (overlap) {
3226 ir_live_interval *child;
3227
3228 IR_ASSERT(other->type != IR_VOID);
3229 IR_LOG_LSRA_CONFLICT(" ---- Conflict with inactive", other, overlap);
3230 // TODO: optimal split position (this case is not tested)
3231 child = ir_split_interval_at(ctx, other, overlap);
3232 /* reset range cache */
3233 other->current_range = &other->range;
3234 ir_add_to_unhandled(unhandled, child);
3235 IR_LOG_LSRA(" ---- Queue", child, "");
3236 }
3237 }
3238 other = other->list_next;
3239 }
3240
3241 /* current.reg = reg */
3242 ival->reg = reg;
3243 IR_LOG_LSRA_ASSIGN(" ---- Assign", ival, " (after splitting others)");
3244
3245 if (*unhandled && ival->end > (*unhandled)->range.start) {
3246 ival->list_next = *active;
3247 *active = ival;
3248 }
3249 return reg;
3250 }
3251
ir_fix_dessa_tmps(ir_ctx * ctx,uint8_t type,ir_ref from,ir_ref to)3252 static int ir_fix_dessa_tmps(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to)
3253 {
3254 ir_block *bb = ctx->data;
3255 ir_tmp_reg tmp_reg;
3256
3257 if (to == 0) {
3258 if (IR_IS_TYPE_INT(type)) {
3259 tmp_reg.num = 0;
3260 tmp_reg.type = type;
3261 tmp_reg.start = IR_DEF_SUB_REF;
3262 tmp_reg.end = IR_SAVE_SUB_REF;
3263 } else {
3264 IR_ASSERT(IR_IS_TYPE_FP(type));
3265 tmp_reg.num = 1;
3266 tmp_reg.type = type;
3267 tmp_reg.start = IR_DEF_SUB_REF;
3268 tmp_reg.end = IR_SAVE_SUB_REF;
3269 }
3270 } else if (from != 0) {
3271 if (IR_IS_TYPE_INT(type)) {
3272 tmp_reg.num = 0;
3273 tmp_reg.type = type;
3274 tmp_reg.start = IR_DEF_SUB_REF;
3275 tmp_reg.end = IR_SAVE_SUB_REF;
3276 } else {
3277 IR_ASSERT(IR_IS_TYPE_FP(type));
3278 tmp_reg.num = 1;
3279 tmp_reg.type = type;
3280 tmp_reg.start = IR_DEF_SUB_REF;
3281 tmp_reg.end = IR_SAVE_SUB_REF;
3282 }
3283 } else {
3284 return 1;
3285 }
3286 if (!ir_has_tmp(ctx, bb->end, tmp_reg.num)) {
3287 ir_add_tmp(ctx, bb->end, bb->end, tmp_reg.num, tmp_reg);
3288 }
3289 return 1;
3290 }
3291
ir_ival_spill_for_fuse_load(ir_ctx * ctx,ir_live_interval * ival,ir_reg_alloc_data * data)3292 static bool ir_ival_spill_for_fuse_load(ir_ctx *ctx, ir_live_interval *ival, ir_reg_alloc_data *data)
3293 {
3294 ir_use_pos *use_pos = ival->use_pos;
3295 ir_insn *insn;
3296
3297 if (ival->flags & IR_LIVE_INTERVAL_MEM_PARAM) {
3298 IR_ASSERT(!ival->next && use_pos && use_pos->op_num == 0);
3299 insn = &ctx->ir_base[IR_LIVE_POS_TO_REF(use_pos->pos)];
3300 IR_ASSERT(insn->op == IR_PARAM);
3301 use_pos = use_pos->next;
3302 if (use_pos && (use_pos->next || (use_pos->flags & IR_USE_MUST_BE_IN_REG))) {
3303 return 0;
3304 }
3305
3306 if (use_pos) {
3307 ir_block *bb = ir_block_from_live_pos(ctx, use_pos->pos);
3308 if (bb->loop_depth) {
3309 return 0;
3310 }
3311 }
3312
3313 return 1;
3314 } else if (ival->flags & IR_LIVE_INTERVAL_MEM_LOAD) {
3315 insn = &ctx->ir_base[IR_LIVE_POS_TO_REF(use_pos->pos)];
3316 IR_ASSERT(insn->op == IR_VLOAD);
3317 IR_ASSERT(ctx->ir_base[insn->op2].op == IR_VAR);
3318 use_pos = use_pos->next;
3319 if (use_pos && (use_pos->next || (use_pos->flags & IR_USE_MUST_BE_IN_REG))) {
3320 return 0;
3321 }
3322
3323 if (use_pos) {
3324 ir_block *bb = ir_block_from_live_pos(ctx, use_pos->pos);
3325 if (bb->loop_depth && bb != ir_block_from_live_pos(ctx, ival->use_pos->pos)) {
3326 return 0;
3327 }
3328 /* check if VAR may be clobbered between VLOAD and use */
3329 ir_use_list *use_list = &ctx->use_lists[insn->op2];
3330 ir_ref n = use_list->count;
3331 ir_ref *p = &ctx->use_edges[use_list->refs];
3332 for (; n > 0; p++, n--) {
3333 ir_ref use = *p;
3334 if (ctx->ir_base[use].op == IR_VSTORE) {
3335 if (use > IR_LIVE_POS_TO_REF(ival->use_pos->pos) && use < IR_LIVE_POS_TO_REF(use_pos->pos)) {
3336 return 0;
3337 }
3338 } else if (ctx->ir_base[use].op == IR_VADDR) {
3339 return 0;
3340 }
3341 }
3342 }
3343 ival->stack_spill_pos = ctx->ir_base[insn->op2].op3;
3344
3345 return 1;
3346 }
3347 return 0;
3348 }
3349
ir_assign_bound_spill_slots(ir_ctx * ctx)3350 static void ir_assign_bound_spill_slots(ir_ctx *ctx)
3351 {
3352 ir_hashtab_bucket *b = ctx->binding->data;
3353 uint32_t n = ctx->binding->count;
3354 uint32_t v;
3355 ir_live_interval *ival;
3356
3357 while (n > 0) {
3358 v = ctx->vregs[b->key];
3359 if (v) {
3360 ival = ctx->live_intervals[v];
3361 if (ival
3362 && ival->stack_spill_pos == -1
3363 && (ival->next || ival->reg == IR_REG_NONE)) {
3364 IR_ASSERT(b->val < 0);
3365 /* special spill slot */
3366 ival->stack_spill_pos = -b->val;
3367 ival->flags |= IR_LIVE_INTERVAL_SPILLED | IR_LIVE_INTERVAL_SPILL_SPECIAL;
3368 }
3369 }
3370 b++;
3371 n--;
3372 }
3373 }
3374
ir_linear_scan(ir_ctx * ctx)3375 static int ir_linear_scan(ir_ctx *ctx)
3376 {
3377 uint32_t b;
3378 ir_block *bb;
3379 ir_live_interval *unhandled = NULL;
3380 ir_live_interval *active = NULL;
3381 ir_live_interval *inactive = NULL;
3382 ir_live_interval *ival, *other, *prev;
3383 int j;
3384 ir_live_pos position;
3385 ir_reg reg;
3386 ir_reg_alloc_data data;
3387 ir_ref vars = ctx->vars;
3388
3389 if (!ctx->live_intervals) {
3390 return 0;
3391 }
3392
3393 if (ctx->flags2 & IR_LR_HAVE_DESSA_MOVES) {
3394 /* Add fixed intervals for temporary registers used for DESSA moves */
3395 for (b = 1, bb = &ctx->cfg_blocks[1]; b <= ctx->cfg_blocks_count; b++, bb++) {
3396 IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
3397 if (bb->flags & IR_BB_DESSA_MOVES) {
3398 ctx->data = bb;
3399 ir_gen_dessa_moves(ctx, b, ir_fix_dessa_tmps);
3400 }
3401 }
3402 }
3403
3404 ctx->data = &data;
3405 ctx->stack_frame_size = 0;
3406 data.unused_slot_4 = 0;
3407 data.unused_slot_2 = 0;
3408 data.unused_slot_1 = 0;
3409 data.handled = NULL;
3410
3411 while (vars) {
3412 ir_insn *insn = &ctx->ir_base[vars];
3413
3414 IR_ASSERT(insn->op == IR_VAR);
3415 vars = insn->op3; /* list next */
3416
3417 insn->op3 = ir_allocate_spill_slot(ctx, insn->type, &data);
3418 }
3419
3420 for (j = ctx->vregs_count; j != 0; j--) {
3421 ival = ctx->live_intervals[j];
3422 if (ival) {
3423 if (!(ival->flags & (IR_LIVE_INTERVAL_MEM_PARAM|IR_LIVE_INTERVAL_MEM_LOAD))
3424 || !ir_ival_spill_for_fuse_load(ctx, ival, &data)) {
3425 ir_add_to_unhandled(&unhandled, ival);
3426 }
3427 }
3428 }
3429
3430 ival = ctx->live_intervals[0];
3431 if (ival) {
3432 ir_merge_to_unhandled(&unhandled, ival);
3433 }
3434
3435 /* vregs + tmp + fixed + SRATCH + ALL */
3436 for (j = ctx->vregs_count + 1; j <= ctx->vregs_count + IR_REG_NUM + 2; j++) {
3437 ival = ctx->live_intervals[j];
3438 if (ival) {
3439 ival->current_range = &ival->range;
3440 ival->list_next = inactive;
3441 inactive = ival;
3442 }
3443 }
3444
3445 ctx->flags2 &= ~(IR_RA_HAVE_SPLITS|IR_RA_HAVE_SPILLS);
3446
3447 #ifdef IR_DEBUG
3448 if (ctx->flags & IR_DEBUG_RA) {
3449 fprintf(stderr, "----\n");
3450 ir_dump_live_ranges(ctx, stderr);
3451 fprintf(stderr, "---- Start LSRA\n");
3452 }
3453 #endif
3454
3455 while (unhandled) {
3456 ival = unhandled;
3457 ival->current_range = &ival->range;
3458 unhandled = ival->list_next;
3459 position = ival->range.start;
3460
3461 IR_LOG_LSRA(" ---- Processing", ival, "...");
3462
3463 /* for each interval i in active */
3464 other = active;
3465 prev = NULL;
3466 while (other) {
3467 ir_live_range *r = other->current_range;
3468
3469 IR_ASSERT(r);
3470 if (r->end <= position) {
3471 do {
3472 r = r->next;
3473 } while (r && r->end <= position);
3474 if (!r) {
3475 /* move i from active to handled */
3476 other = other->list_next;
3477 if (prev) {
3478 prev->list_next = other;
3479 } else {
3480 active = other;
3481 }
3482 continue;
3483 }
3484 other->current_range = r;
3485 }
3486 if (position < r->start) {
3487 /* move i from active to inactive */
3488 if (prev) {
3489 prev->list_next = other->list_next;
3490 } else {
3491 active = other->list_next;
3492 }
3493 other->list_next = inactive;
3494 inactive = other;
3495 } else {
3496 prev = other;
3497 }
3498 other = prev ? prev->list_next : active;
3499 }
3500
3501 /* for each interval i in inactive */
3502 other = inactive;
3503 prev = NULL;
3504 while (other) {
3505 ir_live_range *r = other->current_range;
3506
3507 IR_ASSERT(r);
3508 if (r->end <= position) {
3509 do {
3510 r = r->next;
3511 } while (r && r->end <= position);
3512 if (!r) {
3513 /* move i from inactive to handled */
3514 other = other->list_next;
3515 if (prev) {
3516 prev->list_next = other;
3517 } else {
3518 inactive = other;
3519 }
3520 continue;
3521 }
3522 other->current_range = r;
3523 }
3524 if (position >= r->start) {
3525 /* move i from inactive to active */
3526 if (prev) {
3527 prev->list_next = other->list_next;
3528 } else {
3529 inactive = other->list_next;
3530 }
3531 other->list_next = active;
3532 active = other;
3533 } else {
3534 prev = other;
3535 }
3536 other = prev ? prev->list_next : inactive;
3537 }
3538
3539 reg = ir_try_allocate_free_reg(ctx, ival, &active, inactive, &unhandled);
3540 if (reg == IR_REG_NONE) {
3541 reg = ir_allocate_blocked_reg(ctx, ival, &active, &inactive, &unhandled);
3542 }
3543 }
3544
3545 #if 0 //def IR_DEBUG
3546 /* all intervals must be processed */
3547 ival = active;
3548 while (ival) {
3549 IR_ASSERT(!ival->next);
3550 ival = ival->list_next;
3551 }
3552 ival = inactive;
3553 while (ival) {
3554 IR_ASSERT(!ival->next);
3555 ival = ival->list_next;
3556 }
3557 #endif
3558
3559 if (ctx->flags2 & (IR_RA_HAVE_SPLITS|IR_RA_HAVE_SPILLS)) {
3560
3561 if (ctx->binding) {
3562 ir_assign_bound_spill_slots(ctx);
3563 }
3564
3565 /* Use simple linear-scan (without holes) to allocate and reuse spill slots */
3566 unhandled = NULL;
3567 for (j = ctx->vregs_count; j != 0; j--) {
3568 ival = ctx->live_intervals[j];
3569 if (ival
3570 && (ival->next || ival->reg == IR_REG_NONE)
3571 && ival->stack_spill_pos == -1) {
3572 ival->flags |= IR_LIVE_INTERVAL_SPILLED;
3573 if (!(ival->flags & IR_LIVE_INTERVAL_MEM_PARAM)) {
3574 ir_live_range *r;
3575
3576 other = ival;
3577 while (other->next) {
3578 other = other->next;
3579 }
3580 r = &other->range;
3581 while (r->next) {
3582 r = r->next;
3583 }
3584 ival->end = r->end;
3585 ir_add_to_unhandled_spill(&unhandled, ival);
3586 }
3587 }
3588 }
3589
3590 if (unhandled) {
3591 uint8_t size;
3592 ir_live_interval *handled[9] = {NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL, NULL};
3593 ir_live_interval *old;
3594
3595 data.handled = handled;
3596 active = NULL;
3597 while (unhandled) {
3598 ival = unhandled;
3599 ival->current_range = &ival->range;
3600 unhandled = ival->list_next;
3601 position = ival->range.start;
3602
3603 /* for each interval i in active */
3604 other = active;
3605 prev = NULL;
3606 while (other) {
3607 if (other->end <= position) {
3608 /* move i from active to handled */
3609 if (prev) {
3610 prev->list_next = other->list_next;
3611 } else {
3612 active = other->list_next;
3613 }
3614 size = ir_type_size[other->type];
3615 IR_ASSERT(size == 1 || size == 2 || size == 4 || size == 8);
3616 old = handled[size];
3617 while (old) {
3618 if (old->stack_spill_pos == other->stack_spill_pos) {
3619 break;
3620 }
3621 old = old->list_next;
3622 }
3623 if (!old) {
3624 other->list_next = handled[size];
3625 handled[size] = other;
3626 }
3627 } else {
3628 prev = other;
3629 }
3630 other = prev ? prev->list_next : active;
3631 }
3632
3633 ival->stack_spill_pos = ir_allocate_spill_slot(ctx, ival->type, &data);
3634 if (unhandled && ival->end > unhandled->range.start) {
3635 ival->list_next = active;
3636 active = ival;
3637 } else {
3638 size = ir_type_size[ival->type];
3639 IR_ASSERT(size == 1 || size == 2 || size == 4 || size == 8);
3640 old = handled[size];
3641 while (old) {
3642 if (old->stack_spill_pos == ival->stack_spill_pos) {
3643 break;
3644 }
3645 old = old->list_next;
3646 }
3647 if (!old) {
3648 ival->list_next = handled[size];
3649 handled[size] = ival;
3650 }
3651 }
3652 }
3653 data.handled = NULL;
3654 }
3655 }
3656
3657 #ifdef IR_TARGET_X86
3658 if (ctx->flags2 & IR_HAS_FP_RET_SLOT) {
3659 ctx->ret_slot = ir_allocate_spill_slot(ctx, IR_DOUBLE, &data);
3660 } else if (ctx->ret_type == IR_FLOAT || ctx->ret_type == IR_DOUBLE) {
3661 ctx->ret_slot = ir_allocate_spill_slot(ctx, ctx->ret_type, &data);
3662 } else {
3663 ctx->ret_slot = -1;
3664 }
3665 #endif
3666
3667 #ifdef IR_DEBUG
3668 if (ctx->flags & IR_DEBUG_RA) {
3669 fprintf(stderr, "---- Finish LSRA\n");
3670 ir_dump_live_ranges(ctx, stderr);
3671 fprintf(stderr, "----\n");
3672 }
3673 #endif
3674
3675 return 1;
3676 }
3677
needs_spill_reload(ir_ctx * ctx,ir_live_interval * ival,uint32_t b0,ir_bitset available)3678 static bool needs_spill_reload(ir_ctx *ctx, ir_live_interval *ival, uint32_t b0, ir_bitset available)
3679 {
3680 ir_worklist worklist;
3681 ir_block *bb;
3682 uint32_t b, *p, n;
3683
3684 ir_worklist_init(&worklist, ctx->cfg_blocks_count + 1);
3685 ir_worklist_push(&worklist, b0);
3686 while (ir_worklist_len(&worklist) != 0) {
3687 b = ir_worklist_pop(&worklist);
3688 bb = &ctx->cfg_blocks[b];
3689 if (bb->flags & (IR_BB_ENTRY|IR_BB_START)) {
3690 ir_worklist_free(&worklist);
3691 return 1;
3692 }
3693 n = bb->predecessors_count;
3694 for (p = &ctx->cfg_edges[bb->predecessors]; n > 0; p++, n--) {
3695 b = *p;
3696 bb = &ctx->cfg_blocks[b];
3697
3698 if (!ir_ival_covers(ival, IR_SAVE_LIVE_POS_FROM_REF(bb->end))) {
3699 ir_worklist_free(&worklist);
3700 return 1;
3701 } else if (!ir_bitset_in(available, b)) {
3702 ir_worklist_push(&worklist, b);
3703 }
3704 }
3705 }
3706 ir_worklist_free(&worklist);
3707 return 0;
3708 }
3709
needs_spill_load(ir_ctx * ctx,ir_live_interval * ival,ir_use_pos * use_pos)3710 static bool needs_spill_load(ir_ctx *ctx, ir_live_interval *ival, ir_use_pos *use_pos)
3711 {
3712 if (use_pos->next
3713 && use_pos->op_num == 1
3714 && use_pos->next->pos == use_pos->pos
3715 && !(use_pos->next->flags & IR_USE_MUST_BE_IN_REG)) {
3716 /* Support for R2 = ADD(R1, R1) */
3717 use_pos = use_pos->next;
3718 }
3719 return use_pos->next && use_pos->next->op_num != 0;
3720 }
3721
ir_set_fused_reg(ir_ctx * ctx,ir_ref root,ir_ref ref_and_op,int8_t reg)3722 static void ir_set_fused_reg(ir_ctx *ctx, ir_ref root, ir_ref ref_and_op, int8_t reg)
3723 {
3724 char key[10];
3725
3726 IR_ASSERT(reg != IR_REG_NONE);
3727 if (!ctx->fused_regs) {
3728 ctx->fused_regs = ir_mem_malloc(sizeof(ir_strtab));
3729 ir_strtab_init(ctx->fused_regs, 8, 128);
3730 }
3731 memcpy(key, &root, sizeof(ir_ref));
3732 memcpy(key + 4, &ref_and_op, sizeof(ir_ref));
3733 ir_strtab_lookup(ctx->fused_regs, key, 8, 0x10000000 | reg);
3734 }
3735
assign_regs(ir_ctx * ctx)3736 static void assign_regs(ir_ctx *ctx)
3737 {
3738 ir_ref i;
3739 ir_live_interval *ival, *top_ival;
3740 ir_use_pos *use_pos;
3741 int8_t reg, old_reg;
3742 ir_ref ref;
3743 ir_regset used_regs = 0;
3744
3745 if (!ctx->regs) {
3746 ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count);
3747 memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count);
3748 }
3749
3750 if (!(ctx->flags2 & (IR_RA_HAVE_SPLITS|IR_RA_HAVE_SPILLS))) {
3751 for (i = 1; i <= ctx->vregs_count; i++) {
3752 ival = ctx->live_intervals[i];
3753 if (ival) {
3754 do {
3755 if (ival->reg != IR_REG_NONE) {
3756 reg = ival->reg;
3757 IR_REGSET_INCL(used_regs, reg);
3758 use_pos = ival->use_pos;
3759 while (use_pos) {
3760 ref = (use_pos->hint_ref < 0) ? -use_pos->hint_ref : IR_LIVE_POS_TO_REF(use_pos->pos);
3761 ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg);
3762 use_pos = use_pos->next;
3763 }
3764 }
3765 ival = ival->next;
3766 } while (ival);
3767 }
3768 }
3769 } else {
3770 ir_bitset available = ir_bitset_malloc(ctx->cfg_blocks_count + 1);
3771
3772 for (i = 1; i <= ctx->vregs_count; i++) {
3773 top_ival = ival = ctx->live_intervals[i];
3774 if (ival) {
3775 if (!(ival->flags & IR_LIVE_INTERVAL_SPILLED)) {
3776 do {
3777 if (ival->reg != IR_REG_NONE) {
3778 IR_REGSET_INCL(used_regs, ival->reg);
3779 use_pos = ival->use_pos;
3780 while (use_pos) {
3781 reg = ival->reg;
3782 ref = IR_LIVE_POS_TO_REF(use_pos->pos);
3783 if (use_pos->hint_ref < 0) {
3784 ref = -use_pos->hint_ref;
3785 }
3786 ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg);
3787
3788 use_pos = use_pos->next;
3789 }
3790 }
3791 ival = ival->next;
3792 } while (ival);
3793 } else {
3794 do {
3795 if (ival->reg != IR_REG_NONE) {
3796 ir_ref prev_use_ref = IR_UNUSED;
3797
3798 ir_bitset_clear(available, ir_bitset_len(ctx->cfg_blocks_count + 1));
3799 IR_REGSET_INCL(used_regs, ival->reg);
3800 use_pos = ival->use_pos;
3801 while (use_pos) {
3802 reg = ival->reg;
3803 ref = IR_LIVE_POS_TO_REF(use_pos->pos);
3804 // TODO: Insert spill loads and stores in optimal positions (resolution)
3805 if (use_pos->op_num == 0) {
3806 ir_bitset_clear(available, ir_bitset_len(ctx->cfg_blocks_count + 1));
3807 if (ctx->ir_base[ref].op == IR_PHI) {
3808 /* Spilled PHI var is passed through memory */
3809 reg = IR_REG_NONE;
3810 prev_use_ref = IR_UNUSED;
3811 } else if (ctx->ir_base[ref].op == IR_PARAM
3812 && (ival->flags & IR_LIVE_INTERVAL_MEM_PARAM)) {
3813 /* Stack PARAM var is passed through memory */
3814 reg = IR_REG_NONE;
3815 } else {
3816 uint32_t use_b = ctx->cfg_map[ref];
3817
3818 if (ir_ival_covers(ival, IR_SAVE_LIVE_POS_FROM_REF(ctx->cfg_blocks[use_b].end))) {
3819 ir_bitset_incl(available, use_b);
3820 }
3821 if (top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) {
3822 reg |= IR_REG_SPILL_SPECIAL;
3823 } else {
3824 reg |= IR_REG_SPILL_STORE;
3825 }
3826 prev_use_ref = ref;
3827 }
3828 } else if ((!prev_use_ref || ctx->cfg_map[prev_use_ref] != ctx->cfg_map[ref])
3829 && needs_spill_reload(ctx, ival, ctx->cfg_map[ref], available)) {
3830 if (!(use_pos->flags & IR_USE_MUST_BE_IN_REG)
3831 && use_pos->hint != reg
3832 // && ctx->ir_base[ref].op != IR_CALL
3833 // && ctx->ir_base[ref].op != IR_TAILCALL) {
3834 && ctx->ir_base[ref].op != IR_SNAPSHOT
3835 && !needs_spill_load(ctx, ival, use_pos)) {
3836 /* fuse spill load (valid only when register is not reused) */
3837 reg = IR_REG_NONE;
3838 if (use_pos->next
3839 && use_pos->op_num == 1
3840 && use_pos->next->pos == use_pos->pos
3841 && !(use_pos->next->flags & IR_USE_MUST_BE_IN_REG)) {
3842 /* Support for R2 = BINOP(R1, R1) */
3843 if (use_pos->hint_ref < 0) {
3844 ref = -use_pos->hint_ref;
3845 }
3846 ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg);
3847 use_pos = use_pos->next;
3848 }
3849 } else {
3850 if (top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) {
3851 reg |= IR_REG_SPILL_SPECIAL;
3852 } else {
3853 reg |= IR_REG_SPILL_LOAD;
3854 }
3855 if (ctx->ir_base[ref].op != IR_SNAPSHOT) {
3856 uint32_t use_b = ctx->cfg_map[ref];
3857
3858 if (ir_ival_covers(ival, IR_SAVE_LIVE_POS_FROM_REF(ctx->cfg_blocks[use_b].end))) {
3859 ir_bitset_incl(available, use_b);
3860 }
3861 prev_use_ref = ref;
3862 }
3863 }
3864 if (use_pos->hint_ref < 0
3865 && ctx->use_lists[-use_pos->hint_ref].count > 1
3866 && (old_reg = ir_get_alocated_reg(ctx, -use_pos->hint_ref, use_pos->op_num)) != IR_REG_NONE) {
3867 if (top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) {
3868 reg |= IR_REG_SPILL_SPECIAL;
3869 } else {
3870 reg |= IR_REG_SPILL_LOAD;
3871 }
3872 if (reg != old_reg) {
3873 IR_ASSERT(ctx->rules[-use_pos->hint_ref] & IR_FUSED);
3874 ctx->rules[-use_pos->hint_ref] |= IR_FUSED_REG;
3875 ir_set_fused_reg(ctx, ref, -use_pos->hint_ref * sizeof(ir_ref) + use_pos->op_num, reg);
3876 use_pos = use_pos->next;
3877 continue;
3878 }
3879 }
3880 } else if (use_pos->flags & IR_PHI_USE) {
3881 IR_ASSERT(use_pos->hint_ref < 0);
3882 IR_ASSERT(ctx->vregs[-use_pos->hint_ref]);
3883 IR_ASSERT(ctx->live_intervals[ctx->vregs[-use_pos->hint_ref]]);
3884 if (ctx->live_intervals[ctx->vregs[-use_pos->hint_ref]]->flags & IR_LIVE_INTERVAL_SPILLED) {
3885 /* Spilled PHI var is passed through memory */
3886 reg = IR_REG_NONE;
3887 }
3888 } else if (use_pos->hint_ref < 0
3889 && ctx->use_lists[-use_pos->hint_ref].count > 1
3890 && (old_reg = ir_get_alocated_reg(ctx, -use_pos->hint_ref, use_pos->op_num)) != IR_REG_NONE) {
3891 if (reg != old_reg) {
3892 IR_ASSERT(ctx->rules[-use_pos->hint_ref] & IR_FUSED);
3893 ctx->rules[-use_pos->hint_ref] |= IR_FUSED_REG;
3894 ir_set_fused_reg(ctx, ref, -use_pos->hint_ref * sizeof(ir_ref) + use_pos->op_num, reg);
3895 use_pos = use_pos->next;
3896 continue;
3897 }
3898 } else {
3899 /* reuse register without spill load */
3900 }
3901 if (use_pos->hint_ref < 0) {
3902 ref = -use_pos->hint_ref;
3903 }
3904 ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg);
3905
3906 use_pos = use_pos->next;
3907 }
3908 } else if (!(top_ival->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL)) {
3909 use_pos = ival->use_pos;
3910 while (use_pos) {
3911 ref = IR_LIVE_POS_TO_REF(use_pos->pos);
3912 if (ctx->ir_base[ref].op == IR_SNAPSHOT) {
3913 IR_ASSERT(use_pos->hint_ref >= 0);
3914 /* A reference to a CPU spill slot */
3915 reg = IR_REG_SPILL_STORE | IR_REG_STACK_POINTER;
3916 ir_set_alocated_reg(ctx, ref, use_pos->op_num, reg);
3917 }
3918 use_pos = use_pos->next;
3919 }
3920 }
3921 ival = ival->next;
3922 } while (ival);
3923 }
3924 }
3925 }
3926 ir_mem_free(available);
3927 }
3928
3929 /* Temporary registers */
3930 ival = ctx->live_intervals[0];
3931 if (ival) {
3932 do {
3933 IR_ASSERT(ival->reg != IR_REG_NONE);
3934 IR_REGSET_INCL(used_regs, ival->reg);
3935 ir_set_alocated_reg(ctx, ival->tmp_ref, ival->tmp_op_num, ival->reg);
3936 ival = ival->next;
3937 } while (ival);
3938 }
3939
3940 if (ctx->fixed_stack_frame_size != -1) {
3941 ctx->used_preserved_regs = (ir_regset)ctx->fixed_save_regset;
3942 if (IR_REGSET_DIFFERENCE(IR_REGSET_INTERSECTION(used_regs, IR_REGSET_PRESERVED),
3943 ctx->used_preserved_regs)) {
3944 // TODO: Preserved reg and fixed frame conflict ???
3945 // IR_ASSERT(0 && "Preserved reg and fixed frame conflict");
3946 }
3947 } else {
3948 ctx->used_preserved_regs = IR_REGSET_UNION((ir_regset)ctx->fixed_save_regset,
3949 IR_REGSET_DIFFERENCE(IR_REGSET_INTERSECTION(used_regs, IR_REGSET_PRESERVED),
3950 (ctx->flags & IR_FUNCTION) ? (ir_regset)ctx->fixed_regset : IR_REGSET_PRESERVED));
3951 }
3952
3953 ir_fix_stack_frame(ctx);
3954 }
3955
ir_reg_alloc(ir_ctx * ctx)3956 int ir_reg_alloc(ir_ctx *ctx)
3957 {
3958 if (ir_linear_scan(ctx)) {
3959 assign_regs(ctx);
3960 return 1;
3961 }
3962 return 0;
3963 }
3964