1 /*
2 * IR - Lightweight JIT Compilation Framework
3 * (GCM - Global Code Motion and Scheduler)
4 * Copyright (C) 2022 Zend by Perforce.
5 * Authors: Dmitry Stogov <dmitry@php.net>
6 *
7 * The GCM algorithm is based on Cliff Click's publication
8 * See: C. Click. "Global code motion, global value numbering" Submitted to PLDI'95.
9 */
10
11 #include "ir.h"
12 #include "ir_private.h"
13
14 #define IR_GCM_IS_SCHEDULED_EARLY(b) (((int32_t)(b)) < 0)
15 #define IR_GCM_EARLY_BLOCK(b) ((uint32_t)-((int32_t)(b)))
16
17 #define IR_GCM_SPLIT 1
18 #define IR_SCHEDULE_SWAP_OPS 1
19
ir_gcm_schedule_early(ir_ctx * ctx,ir_ref ref,ir_list * queue_late)20 static uint32_t ir_gcm_schedule_early(ir_ctx *ctx, ir_ref ref, ir_list *queue_late)
21 {
22 ir_ref n, *p, input;
23 ir_insn *insn;
24 uint32_t dom_depth;
25 uint32_t b, result;
26
27 insn = &ctx->ir_base[ref];
28
29 IR_ASSERT(insn->op != IR_PARAM && insn->op != IR_VAR);
30 IR_ASSERT(insn->op != IR_PHI && insn->op != IR_PI);
31
32 result = 1;
33 dom_depth = 0;
34
35 n = insn->inputs_count;
36 for (p = insn->ops + 1; n > 0; p++, n--) {
37 input = *p;
38 if (input > 0) {
39 b = ctx->cfg_map[input];
40 if (IR_GCM_IS_SCHEDULED_EARLY(b)) {
41 b = IR_GCM_EARLY_BLOCK(b);
42 } else if (!b) {
43 b = ir_gcm_schedule_early(ctx, input, queue_late);
44 }
45 if (dom_depth < ctx->cfg_blocks[b].dom_depth) {
46 dom_depth = ctx->cfg_blocks[b].dom_depth;
47 result = b;
48 }
49 }
50 }
51
52 ctx->cfg_map[ref] = IR_GCM_EARLY_BLOCK(result);
53 ir_list_push_unchecked(queue_late, ref);
54 return result;
55 }
56
57 /* Last Common Ancestor */
ir_gcm_find_lca(ir_ctx * ctx,uint32_t b1,uint32_t b2)58 static uint32_t ir_gcm_find_lca(ir_ctx *ctx, uint32_t b1, uint32_t b2)
59 {
60 uint32_t dom_depth;
61
62 dom_depth = ctx->cfg_blocks[b2].dom_depth;
63 while (ctx->cfg_blocks[b1].dom_depth > dom_depth) {
64 b1 = ctx->cfg_blocks[b1].dom_parent;
65 }
66 dom_depth = ctx->cfg_blocks[b1].dom_depth;
67 while (ctx->cfg_blocks[b2].dom_depth > dom_depth) {
68 b2 = ctx->cfg_blocks[b2].dom_parent;
69 }
70 while (b1 != b2) {
71 b1 = ctx->cfg_blocks[b1].dom_parent;
72 b2 = ctx->cfg_blocks[b2].dom_parent;
73 }
74 return b2;
75 }
76
ir_gcm_select_best_block(ir_ctx * ctx,ir_ref ref,uint32_t lca)77 static uint32_t ir_gcm_select_best_block(ir_ctx *ctx, ir_ref ref, uint32_t lca)
78 {
79 ir_block *bb = &ctx->cfg_blocks[lca];
80 uint32_t loop_depth = bb->loop_depth;
81 uint32_t flags, best, b;
82
83 if (!loop_depth) {
84 return lca;
85 }
86
87 #if 0 /* This is not necessary anymore. Conditions may be fused with IF across BBs. */
88 if (ctx->ir_base[ref].op >= IR_EQ && ctx->ir_base[ref].op <= IR_UGT) {
89 ir_use_list *use_list = &ctx->use_lists[ref];
90
91 if (use_list->count == 1) {
92 ir_ref use = ctx->use_edges[use_list->refs];
93 ir_insn *insn = &ctx->ir_base[use];
94 if (insn->op == IR_IF || insn->op == IR_GUARD || insn->op == IR_GUARD_NOT) {
95 /* Don't hoist invariant comparison */
96 return lca;
97 }
98 }
99 }
100 #endif
101
102 flags = (bb->flags & IR_BB_LOOP_HEADER) ? bb->flags : ctx->cfg_blocks[bb->loop_header].flags;
103 if ((flags & IR_BB_LOOP_WITH_ENTRY)
104 && !(ctx->binding && ir_binding_find(ctx, ref))) {
105 /* Don't move loop invariant code across an OSR ENTRY if we can't restore it */
106 return lca;
107 }
108
109 best = b = lca;
110 do {
111 b = bb->dom_parent;
112 bb = &ctx->cfg_blocks[b];
113 if (bb->loop_depth < loop_depth) {
114 if (!bb->loop_depth) {
115 best = b;
116 break;
117 }
118 flags = (bb->flags & IR_BB_LOOP_HEADER) ? bb->flags : ctx->cfg_blocks[bb->loop_header].flags;
119 if ((flags & IR_BB_LOOP_WITH_ENTRY)
120 && !(ctx->binding && ir_binding_find(ctx, ref))) {
121 break;
122 }
123 loop_depth = bb->loop_depth;
124 best = b;
125 }
126 } while (b != ctx->cfg_map[ref]);
127
128 return best;
129 }
130
131 #if IR_GCM_SPLIT
132 /* Partially Dead Code Elimination through splitting the node and sunking the clones
133 *
134 * This code is based on the Benedikt Meurer's idea first implemented in V8.
135 * See: https://codereview.chromium.org/899433005
136 */
137
138 typedef struct _ir_gcm_split_data {
139 ir_sparse_set totally_useful;
140 ir_list worklist;
141 } ir_gcm_split_data;
142
_push_predecessors(ir_ctx * ctx,ir_block * bb,ir_gcm_split_data * data)143 static void _push_predecessors(ir_ctx *ctx, ir_block *bb, ir_gcm_split_data *data)
144 {
145 uint32_t *p, i, n = bb->predecessors_count;
146
147 IR_ASSERT(n > 0);
148 p = ctx->cfg_edges + bb->predecessors;
149 do {
150 i = *p;
151 if (!ir_sparse_set_in(&data->totally_useful, i)) {
152 ir_list_push(&data->worklist, i);
153 }
154 p++;
155 n--;
156 } while (n > 0);
157 }
158
_check_successors(ir_ctx * ctx,ir_block * bb,ir_gcm_split_data * data)159 static bool _check_successors(ir_ctx *ctx, ir_block *bb, ir_gcm_split_data *data)
160 {
161 uint32_t *p, i, n = bb->successors_count;
162
163 if (n <= 1) {
164 IR_ASSERT(ir_sparse_set_in(&data->totally_useful, ctx->cfg_edges[bb->successors]));
165 return 1;
166 }
167
168 p = ctx->cfg_edges + bb->successors;
169 do {
170 i = *p;
171 if (!ir_sparse_set_in(&data->totally_useful, i)) {
172 return 0;
173 }
174 p++;
175 n--;
176 } while (n > 0);
177
178 return 1;
179 }
180
ir_split_partially_dead_node(ir_ctx * ctx,ir_ref ref,uint32_t b)181 static bool ir_split_partially_dead_node(ir_ctx *ctx, ir_ref ref, uint32_t b)
182 {
183 ir_use_list *use_list;
184 ir_insn *insn;
185 ir_ref n, *p, use;
186 uint32_t i;
187 ir_gcm_split_data *data = ctx->data;
188
189 IR_ASSERT(b > 0 && b <= ctx->cfg_blocks_count);
190
191 /* 1. Find a set of blocks where the node is TOTALLY_USEFUL (not PARTIALLY_DEAD)
192 * 1.1. Collect the blocks where the node is really USED.
193 */
194 ir_sparse_set_clear(&data->totally_useful);
195
196 use_list = &ctx->use_lists[ref];
197 n = use_list->count;
198 for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
199 use = *p;
200 insn = &ctx->ir_base[use];
201 if (insn->op == IR_PHI) {
202 ir_ref *p = insn->ops + 2; /* PHI data inputs */
203 ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */
204 ir_ref n = insn->inputs_count - 1;
205
206 for (;n > 0; p++, q++, n--) {
207 if (*p == ref) {
208 i = ctx->cfg_map[*q];
209 IR_ASSERT(i > 0 && i <= ctx->cfg_blocks_count);
210 if (!ir_sparse_set_in(&data->totally_useful, i)) {
211 if (i == b) return 0; /* node is totally-useful in the scheduled block */
212 ir_sparse_set_add(&data->totally_useful, i);
213 }
214 }
215 }
216 } else {
217 i = ctx->cfg_map[use];
218 if (!i) {
219 continue;
220 }
221 IR_ASSERT(i > 0 && i <= ctx->cfg_blocks_count);
222 if (!ir_sparse_set_in(&data->totally_useful, i)) {
223 if (i == b) return 0; /* node is totally-useful in the scheduled block */
224 ir_sparse_set_add(&data->totally_useful, i);
225 }
226 }
227 }
228
229 #ifdef IR_DEBUG
230 if (ctx->flags & IR_DEBUG_GCM_SPLIT) {
231 bool first = 1;
232 fprintf(stderr, "*** Split partially dead node d_%d scheduled to BB%d\n", ref, b);
233 IR_SPARSE_SET_FOREACH(&data->totally_useful, i) {
234 if (first) {
235 fprintf(stderr, "\td_%d is USED in [BB%d", ref, i);
236 first = 0;
237 } else {
238 fprintf(stderr, ", BB%d", i);
239 }
240 } IR_SPARSE_SET_FOREACH_END();
241 fprintf(stderr, "]\n");
242 }
243 #endif
244
245 /* 1.2. Iteratively check the predecessors of already found TOTALLY_USEFUL blocks and
246 * add them into TOTALLY_USEFUL set if all of their sucessors are already there.
247 */
248 IR_SPARSE_SET_FOREACH(&data->totally_useful, i) {
249 _push_predecessors(ctx, &ctx->cfg_blocks[i], data);
250 } IR_SPARSE_SET_FOREACH_END();
251
252 while (ir_list_len(&data->worklist)) {
253 i = ir_list_pop(&data->worklist);
254 if (!ir_sparse_set_in(&data->totally_useful, i)) {
255 ir_block *bb = &ctx->cfg_blocks[i];
256
257 if (_check_successors(ctx, bb, data)) {
258 if (i == b) {
259 /* node is TOTALLY_USEFUL in the scheduled block */
260 ir_list_clear(&data->worklist);
261 return 0;
262 }
263 ir_sparse_set_add(&data->totally_useful, i);
264 _push_predecessors(ctx, bb, data);
265 }
266 }
267 }
268
269 IR_ASSERT(!ir_sparse_set_in(&data->totally_useful, b));
270
271 #ifdef IR_DEBUG
272 if (ctx->flags & IR_DEBUG_GCM_SPLIT) {
273 bool first = 1;
274 IR_SPARSE_SET_FOREACH(&data->totally_useful, i) {
275 if (first) {
276 fprintf(stderr, "\td_%d is TOTALLY_USEFUL in [BB%d", ref, i);
277 first = 0;
278 } else {
279 fprintf(stderr, ", BB%d", i);
280 }
281 } IR_SPARSE_SET_FOREACH_END();
282 fprintf(stderr, "]\n");
283 }
284 #endif
285
286 /* 2. Split the USEs into partitions */
287 use_list = &ctx->use_lists[ref];
288 ir_hashtab hash;
289 uint32_t j, clone, clones_count = 0, uses_count = 0;
290 struct {
291 ir_ref ref;
292 uint32_t block;
293 uint32_t use_count;
294 uint32_t use;
295 } *clones = ir_mem_malloc(sizeof(*clones) * use_list->count);
296 struct {
297 ir_ref ref;
298 uint32_t block;
299 uint32_t next;
300 } *uses = ir_mem_malloc(sizeof(*uses) * use_list->count);
301
302 ir_hashtab_init(&hash, use_list->count);
303 n = use_list->count;
304 for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
305 use = *p;
306 insn = &ctx->ir_base[use];
307 if (insn->op == IR_PHI) {
308 ir_ref *p = insn->ops + 2; /* PHI data inputs */
309 ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */
310 ir_ref n = insn->inputs_count - 1;
311
312 /* PHIs must be processed once */
313 if (ir_hashtab_find(&hash, -use) != (ir_ref)IR_INVALID_VAL) {
314 continue;
315 }
316 ir_hashtab_add(&hash, -use, IR_NULL);
317 for (;n > 0; p++, q++, n--) {
318 if (*p == ref) {
319 j = i = ctx->cfg_map[*q];
320 while (ir_sparse_set_in(&data->totally_useful, ctx->cfg_blocks[j].idom)) {
321 j = ctx->cfg_blocks[j].idom;
322 }
323 clone = ir_hashtab_find(&hash, j);
324 if (clone == IR_INVALID_VAL) {
325 clone = clones_count++;
326 ir_hashtab_add(&hash, j, clone);
327 clones[clone].block = j;
328 clones[clone].use_count = 0;
329 clones[clone].use = (uint32_t)-1;
330 }
331 uses[uses_count].ref = use;
332 uses[uses_count].block = i;
333 uses[uses_count].next = clones[clone].use;
334 clones[clone].use_count++;
335 clones[clone].use = uses_count++;
336 }
337 }
338 } else {
339 j = i = ctx->cfg_map[use];
340 IR_ASSERT(i > 0);
341 while (ir_sparse_set_in(&data->totally_useful, ctx->cfg_blocks[j].idom)) {
342 j = ctx->cfg_blocks[j].idom;
343 }
344 clone = ir_hashtab_find(&hash, j);
345 if (clone == IR_INVALID_VAL) {
346 clone = clones_count++;
347 ir_hashtab_add(&hash, j, clone);
348 clones[clone].block = j;
349 clones[clone].use_count = 0;
350 clones[clone].use = -1;
351 }
352 uses[uses_count].ref = use;
353 uses[uses_count].block = i;
354 uses[uses_count].next = clones[clone].use;
355 clones[clone].use_count++;
356 clones[clone].use = uses_count++;
357 }
358 }
359
360 #ifdef IR_DEBUG
361 if (ctx->flags & IR_DEBUG_GCM_SPLIT) {
362 for (i = 0; i < clones_count; i++) {
363 uint32_t u = clones[i].use;
364
365 fprintf(stderr, "\tCLONE #%d in BB%d USES(%d)=[d_%d/BB%d",
366 i, clones[i].block, clones[i].use_count, uses[u].ref, uses[u].block);
367 u = uses[u].next;
368 while (u != (uint32_t)-1) {
369 fprintf(stderr, ", d_%d/BB%d", uses[u].ref, uses[u].block);
370 u = uses[u].next;
371 }
372 fprintf(stderr, "]\n");
373 }
374 }
375 #endif
376
377 /* Create Clones */
378 insn = &ctx->ir_base[ref];
379 clones[0].ref = ref;
380 for (i = 1; i < clones_count; i++) {
381 clones[i].ref = clone = ir_emit(ctx, insn->optx, insn->op1, insn->op2, insn->op3);
382 insn = &ctx->ir_base[ref];
383 if (insn->op1 > 0) ir_use_list_add(ctx, insn->op1, clone);
384 if (insn->op2 > 0) ir_use_list_add(ctx, insn->op2, clone);
385 if (insn->op3 > 0) ir_use_list_add(ctx, insn->op3, clone);
386 }
387
388 /* Reconstruct IR: Update DEF->USE lists, CFG mapping and etc */
389 ctx->use_lists = ir_mem_realloc(ctx->use_lists, ctx->insns_count * sizeof(ir_use_list));
390 ctx->cfg_map = ir_mem_realloc(ctx->cfg_map, ctx->insns_count * sizeof(uint32_t));
391 n = ctx->use_lists[ref].refs;
392 for (i = 0; i < clones_count; i++) {
393 clone = clones[i].ref;
394 if (clones[i].use_count == 1) {
395 /* TOTALLY_USEFUL block may be a head of a diamond above the real usage.
396 * Sink it down to the real usage block.
397 * Clones with few uses we be sunk into the LCA block.
398 */
399 clones[i].block = uses[clones[i].use].block;
400 }
401 ctx->cfg_map[clone] = clones[i].block;
402 ctx->use_lists[clone].count = clones[i].use_count;
403 ctx->use_lists[clone].refs = n;
404
405 uint32_t u = clones[i].use;
406 while (u != (uint32_t)-1) {
407 use = uses[u].ref;
408 ctx->use_edges[n++] = use;
409 u = uses[u].next;
410 if (i > 0) {
411 /* replace inputs */
412 ir_insn *insn = &ctx->ir_base[use];
413 ir_ref k, l = insn->inputs_count;
414
415 if (insn->op == IR_PHI) {
416 for (k = 1; k <= l; k++) {
417 if (ir_insn_op(insn, k) == ref) {
418 j = ctx->cfg_map[ir_insn_op(&ctx->ir_base[insn->op1], k - 1)];
419 if (j != clones[i].block) {
420 uint32_t dom_depth = ctx->cfg_blocks[clones[i].block].dom_depth;
421 while (ctx->cfg_blocks[j].dom_depth > dom_depth) {
422 j = ctx->cfg_blocks[j].dom_parent;
423 }
424 if (j != clones[i].block) {
425 continue;
426 }
427 }
428 ir_insn_set_op(insn, k, clone);
429 break;
430 }
431 }
432 } else {
433 for (k = 1; k <= l; k++) {
434 if (ir_insn_op(insn, k) == ref) {
435 ir_insn_set_op(insn, k, clone);
436 break;
437 }
438 }
439 }
440 }
441 }
442 }
443
444 ir_mem_free(uses);
445 ir_mem_free(clones);
446 ir_hashtab_free(&hash);
447
448 #ifdef IR_DEBUG
449 if (ctx->flags & IR_DEBUG_GCM_SPLIT) {
450 ir_check(ctx);
451 }
452 #endif
453
454 return 1;
455 }
456 #endif
457
458 #ifdef IR_DEBUG
ir_gcm_dominates(ir_ctx * ctx,uint32_t b1,uint32_t b2)459 static bool ir_gcm_dominates(ir_ctx *ctx, uint32_t b1, uint32_t b2)
460 {
461 uint32_t b1_depth = ctx->cfg_blocks[b1].dom_depth;
462 const ir_block *bb2 = &ctx->cfg_blocks[b2];
463
464 while (bb2->dom_depth > b1_depth) {
465 b2 = bb2->dom_parent;
466 bb2 = &ctx->cfg_blocks[b2];
467 }
468 return b1 == b2;
469 }
470 #endif
471
ir_gcm_schedule_late(ir_ctx * ctx,ir_ref ref,uint32_t b)472 static void ir_gcm_schedule_late(ir_ctx *ctx, ir_ref ref, uint32_t b)
473 {
474 ir_ref n, use;
475 uint32_t lca = 0;
476
477 IR_ASSERT(ctx->ir_base[ref].op != IR_PARAM && ctx->ir_base[ref].op != IR_VAR);
478 IR_ASSERT(ctx->ir_base[ref].op != IR_PHI && ctx->ir_base[ref].op != IR_PI);
479
480 IR_ASSERT(IR_GCM_IS_SCHEDULED_EARLY(b));
481 b = IR_GCM_EARLY_BLOCK(b);
482 ctx->cfg_map[ref] = b;
483
484 for (n = 0; n < ctx->use_lists[ref].count; n++) {
485 use = ctx->use_edges[ctx->use_lists[ref].refs + n];
486 b = ctx->cfg_map[use];
487 if (IR_GCM_IS_SCHEDULED_EARLY(b)) {
488 ir_gcm_schedule_late(ctx, use, b);
489 b = ctx->cfg_map[use];
490 IR_ASSERT(b != 0);
491 } else if (!b) {
492 continue;
493 } else if (ctx->ir_base[use].op == IR_PHI) {
494 ir_insn *insn = &ctx->ir_base[use];
495 ir_ref *p = insn->ops + 2; /* PHI data inputs */
496 ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */
497 ir_ref n = insn->inputs_count - 1;
498
499 for (;n > 0; p++, q++, n--) {
500 if (*p == ref) {
501 b = ctx->cfg_map[*q];
502 lca = !lca ? b : ir_gcm_find_lca(ctx, lca, b);
503 }
504 }
505 continue;
506 }
507 lca = !lca ? b : ir_gcm_find_lca(ctx, lca, b);
508 }
509
510 IR_ASSERT(lca != 0 && "No Common Ancestor");
511 IR_ASSERT(ir_gcm_dominates(ctx, ctx->cfg_map[ref], lca) && "Early placement doesn't dominate the late");
512
513 #if IR_GCM_SPLIT
514 if (ctx->use_lists[ref].count > 1
515 && ir_split_partially_dead_node(ctx, ref, lca)) {
516 return;
517 }
518 #endif
519
520 if (lca != ctx->cfg_map[ref]) {
521 b = ir_gcm_select_best_block(ctx, ref, lca);
522
523 ctx->cfg_map[ref] = b;
524
525 /* OVERFLOW is a projection of ADD/SUB/MUL_OV and must be scheduled into the same block */
526 if (ctx->ir_base[ref].op >= IR_ADD_OV && ctx->ir_base[ref].op <= IR_MUL_OV) {
527 ir_use_list *use_list = &ctx->use_lists[ref];
528 ir_ref n, *p, use;
529
530 for (n = use_list->count, p = &ctx->use_edges[use_list->refs]; n < 0; p++, n--) {
531 use = *p;
532 if (ctx->ir_base[use].op == IR_OVERFLOW) {
533 ctx->cfg_map[use] = b;
534 break;
535 }
536 }
537 }
538 }
539 }
540
ir_gcm(ir_ctx * ctx)541 int ir_gcm(ir_ctx *ctx)
542 {
543 ir_ref k, n, *p, ref;
544 ir_block *bb;
545 ir_list queue_early;
546 ir_list queue_late;
547 uint32_t *_blocks, b;
548 ir_insn *insn, *use_insn;
549 ir_use_list *use_list;
550
551 IR_ASSERT(ctx->cfg_map);
552 _blocks = ctx->cfg_map;
553
554 ir_list_init(&queue_early, ctx->insns_count);
555
556 if (ctx->cfg_blocks_count == 1) {
557 ref = ctx->cfg_blocks[1].end;
558 do {
559 insn = &ctx->ir_base[ref];
560 _blocks[ref] = 1; /* pin to block */
561 if (insn->inputs_count > 1) {
562 /* insn has input data edges */
563 ir_list_push_unchecked(&queue_early, ref);
564 }
565 ref = insn->op1; /* control predecessor */
566 } while (ref != 1); /* IR_START */
567 _blocks[1] = 1; /* pin to block */
568
569 use_list = &ctx->use_lists[1];
570 n = use_list->count;
571 for (p = &ctx->use_edges[use_list->refs]; n > 0; n--, p++) {
572 ref = *p;
573 use_insn = &ctx->ir_base[ref];
574 if (use_insn->op == IR_PARAM || use_insn->op == IR_VAR) {
575 ctx->cfg_blocks[1].flags |= (use_insn->op == IR_PARAM) ? IR_BB_HAS_PARAM : IR_BB_HAS_VAR;
576 _blocks[ref] = 1; /* pin to block */
577 }
578 }
579
580 /* Place all live nodes to the first block */
581 while (ir_list_len(&queue_early)) {
582 ref = ir_list_pop(&queue_early);
583 insn = &ctx->ir_base[ref];
584 n = insn->inputs_count;
585 for (p = insn->ops + 1; n > 0; p++, n--) {
586 ref = *p;
587 if (ref > 0 && _blocks[ref] == 0) {
588 _blocks[ref] = 1;
589 ir_list_push_unchecked(&queue_early, ref);
590 }
591 }
592 }
593
594 ir_list_free(&queue_early);
595
596 return 1;
597 }
598
599 ir_list_init(&queue_late, ctx->insns_count);
600
601 /* pin and collect control and control depended (PARAM, VAR, PHI, PI) instructions */
602 b = ctx->cfg_blocks_count;
603 for (bb = ctx->cfg_blocks + b; b > 0; bb--, b--) {
604 IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
605 ref = bb->end;
606
607 /* process the last instruction of the block */
608 insn = &ctx->ir_base[ref];
609 _blocks[ref] = b; /* pin to block */
610 if (insn->inputs_count > 1) {
611 /* insn has input data edges */
612 ir_list_push_unchecked(&queue_early, ref);
613 }
614 ref = insn->op1; /* control predecessor */
615
616 while (ref != bb->start) {
617 insn = &ctx->ir_base[ref];
618 _blocks[ref] = b; /* pin to block */
619 if (insn->inputs_count > 1) {
620 /* insn has input data edges */
621 ir_list_push_unchecked(&queue_early, ref);
622 }
623 if (insn->type != IR_VOID) {
624 IR_ASSERT(ir_op_flags[insn->op] & IR_OP_FLAG_MEM);
625 }
626 ref = insn->op1; /* control predecessor */
627 }
628
629 /* process the first instruction of the block */
630 _blocks[ref] = b; /* pin to block */
631
632 use_list = &ctx->use_lists[ref];
633 n = use_list->count;
634 if (n > 1) {
635 for (p = &ctx->use_edges[use_list->refs]; n > 0; n--, p++) {
636 ref = *p;
637 use_insn = &ctx->ir_base[ref];
638 if (use_insn->op == IR_PHI || use_insn->op == IR_PI) {
639 bb->flags |= (use_insn->op == IR_PHI) ? IR_BB_HAS_PHI : IR_BB_HAS_PI;
640 if (EXPECTED(ctx->use_lists[ref].count != 0)) {
641 _blocks[ref] = b; /* pin to block */
642 ir_list_push_unchecked(&queue_early, ref);
643 }
644 } else if (use_insn->op == IR_PARAM) {
645 bb->flags |= IR_BB_HAS_PARAM;
646 _blocks[ref] = b; /* pin to block */
647 } else if (use_insn->op == IR_VAR) {
648 bb->flags |= IR_BB_HAS_VAR;
649 _blocks[ref] = b; /* pin to block */
650 }
651 }
652 }
653 }
654
655 n = ir_list_len(&queue_early);
656 while (n > 0) {
657 n--;
658 ref = ir_list_at(&queue_early, n);
659 insn = &ctx->ir_base[ref];
660 k = insn->inputs_count - 1;
661 for (p = insn->ops + 2; k > 0; p++, k--) {
662 ref = *p;
663 if (ref > 0 && _blocks[ref] == 0) {
664 ir_gcm_schedule_early(ctx, ref, &queue_late);
665 }
666 }
667 }
668
669 #ifdef IR_DEBUG
670 if (ctx->flags & IR_DEBUG_GCM) {
671 fprintf(stderr, "GCM Schedule Early\n");
672 for (n = 1; n < ctx->insns_count; n++) {
673 fprintf(stderr, "%d -> %d\n", n, ctx->cfg_map[n]);
674 }
675 }
676 #endif
677
678 #if IR_GCM_SPLIT
679 ir_gcm_split_data data;
680
681 ir_sparse_set_init(&data.totally_useful, ctx->cfg_blocks_count + 1);
682 ir_list_init(&data.worklist, ctx->cfg_blocks_count + 1);
683 ctx->data = &data;
684 #endif
685
686 n = ir_list_len(&queue_late);
687 while (n > 0) {
688 n--;
689 ref = ir_list_at(&queue_late, n);
690 b = ctx->cfg_map[ref];
691 if (IR_GCM_IS_SCHEDULED_EARLY(b)) {
692 ir_gcm_schedule_late(ctx, ref, b);
693 }
694 }
695
696 #if IR_GCM_SPLIT
697 ir_list_free(&data.worklist);
698 ir_sparse_set_free(&data.totally_useful);
699 ctx->data = NULL;
700 #endif
701
702 ir_list_free(&queue_early);
703 ir_list_free(&queue_late);
704
705 #ifdef IR_DEBUG
706 if (ctx->flags & IR_DEBUG_GCM) {
707 fprintf(stderr, "GCM Schedule Late\n");
708 for (n = 1; n < ctx->insns_count; n++) {
709 fprintf(stderr, "%d -> %d\n", n, ctx->cfg_map[n]);
710 }
711 }
712 #endif
713
714 return 1;
715 }
716
ir_xlat_binding(ir_ctx * ctx,ir_ref * _xlat)717 static void ir_xlat_binding(ir_ctx *ctx, ir_ref *_xlat)
718 {
719 uint32_t n1, n2, pos;
720 ir_ref key;
721 ir_hashtab_bucket *b1, *b2;
722 ir_hashtab *binding = ctx->binding;
723 uint32_t hash_size = (uint32_t)(-(int32_t)binding->mask);
724
725 memset((char*)binding->data - (hash_size * sizeof(uint32_t)), -1, hash_size * sizeof(uint32_t));
726 n1 = binding->count;
727 n2 = 0;
728 pos = 0;
729 b1 = binding->data;
730 b2 = binding->data;
731 while (n1 > 0) {
732 key = b1->key;
733 IR_ASSERT(key < ctx->insns_count);
734 if (_xlat[key]) {
735 key = _xlat[key];
736 b2->key = key;
737 if (b1->val > 0) {
738 IR_ASSERT(_xlat[b1->val]);
739 b2->val = _xlat[b1->val];
740 } else {
741 b2->val = b1->val;
742 }
743 key |= binding->mask;
744 b2->next = ((uint32_t*)binding->data)[key];
745 ((uint32_t*)binding->data)[key] = pos;
746 pos += sizeof(ir_hashtab_bucket);
747 b2++;
748 n2++;
749 }
750 b1++;
751 n1--;
752 }
753 binding->count = n2;
754 }
755
ir_count_constant(ir_ref * _xlat,ir_ref ref)756 IR_ALWAYS_INLINE ir_ref ir_count_constant(ir_ref *_xlat, ir_ref ref)
757 {
758 if (!_xlat[ref]) {
759 _xlat[ref] = ref; /* this is only a "used constant" marker */
760 return 1;
761 }
762 return 0;
763 }
764
ir_schedule(ir_ctx * ctx)765 int ir_schedule(ir_ctx *ctx)
766 {
767 ir_ctx new_ctx;
768 ir_ref i, j, k, n, *p, *q, ref, new_ref, prev_ref, insns_count, consts_count, use_edges_count;
769 ir_ref *_xlat;
770 ir_ref *edges;
771 ir_ref prev_b_end;
772 uint32_t b, prev_b;
773 uint32_t *_blocks = ctx->cfg_map;
774 ir_ref *_next = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref));
775 ir_ref *_prev = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref));
776 ir_ref _move_down = 0;
777 ir_block *bb;
778 ir_insn *insn, *new_insn;
779 ir_use_list *lists, *use_list, *new_list;
780
781 /* Create a double-linked list of nodes ordered by BB, respecting BB->start and BB->end */
782 IR_ASSERT(_blocks[1] == 1);
783 prev_b = 1;
784 prev_b_end = ctx->cfg_blocks[1].end;
785 _prev[1] = 0;
786 _prev[prev_b_end] = 0;
787 for (i = 2, j = 1; i < ctx->insns_count; i++) {
788 b = _blocks[i];
789 IR_ASSERT((int32_t)b >= 0);
790 if (b == prev_b && i <= prev_b_end) {
791 /* add to the end of the list */
792 _next[j] = i;
793 _prev[i] = j;
794 j = i;
795 } else if (b > prev_b) {
796 bb = &ctx->cfg_blocks[b];
797 if (i == bb->start) {
798 IR_ASSERT(bb->end > bb->start);
799 prev_b = b;
800 prev_b_end = bb->end;
801 _prev[bb->end] = 0;
802 /* add to the end of the list */
803 _next[j] = i;
804 _prev[i] = j;
805 j = i;
806 } else {
807 IR_ASSERT(i != bb->end);
808 /* move down late (see the following loop) */
809 _next[i] = _move_down;
810 _move_down = i;
811 }
812 } else if (b) {
813 bb = &ctx->cfg_blocks[b];
814 IR_ASSERT(i != bb->start);
815 if (_prev[bb->end]) {
816 /* move up, insert before the end of the already scheduled BB */
817 k = bb->end;
818 } else {
819 /* move up, insert at the end of the block */
820 k = ctx->cfg_blocks[b + 1].start;
821 }
822 /* insert before "k" */
823 _prev[i] = _prev[k];
824 _next[i] = k;
825 _next[_prev[k]] = i;
826 _prev[k] = i;
827 }
828 }
829 _next[j] = 0;
830
831 while (_move_down) {
832 i = _move_down;
833 _move_down = _next[i];
834 b = _blocks[i];
835 bb = &ctx->cfg_blocks[b];
836 k = _next[bb->start];
837
838 if (bb->flags & (IR_BB_HAS_PHI|IR_BB_HAS_PI|IR_BB_HAS_PARAM|IR_BB_HAS_VAR)) {
839 /* insert after the start of the block and all PARAM, VAR, PI, PHI */
840 insn = &ctx->ir_base[k];
841 while (insn->op == IR_PHI || insn->op == IR_PARAM || insn->op == IR_VAR || insn->op == IR_PI) {
842 k = _next[k];
843 insn = &ctx->ir_base[k];
844 }
845 }
846
847 /* insert before "k" */
848 _prev[i] = _prev[k];
849 _next[i] = k;
850 _next[_prev[k]] = i;
851 _prev[k] = i;
852 }
853
854 #ifdef IR_DEBUG
855 if (ctx->flags & IR_DEBUG_SCHEDULE) {
856 fprintf(stderr, "Before Schedule\n");
857 for (i = 1; i != 0; i = _next[i]) {
858 fprintf(stderr, "%d -> %d\n", i, _blocks[i]);
859 }
860 }
861 #endif
862
863 _xlat = ir_mem_calloc((ctx->consts_count + ctx->insns_count), sizeof(ir_ref));
864 _xlat += ctx->consts_count;
865 _xlat[IR_TRUE] = IR_TRUE;
866 _xlat[IR_FALSE] = IR_FALSE;
867 _xlat[IR_NULL] = IR_NULL;
868 _xlat[IR_UNUSED] = IR_UNUSED;
869 insns_count = 1;
870 consts_count = -(IR_TRUE - 1);
871
872 /* Topological sort according dependencies inside each basic block */
873 for (b = 1, bb = ctx->cfg_blocks + 1; b <= ctx->cfg_blocks_count; b++, bb++) {
874 IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
875 /* Schedule BB start */
876 i = bb->start;
877 _xlat[i] = bb->start = insns_count;
878 insn = &ctx->ir_base[i];
879 if (insn->op == IR_CASE_VAL) {
880 IR_ASSERT(insn->op2 < IR_TRUE);
881 consts_count += ir_count_constant(_xlat, insn->op2);
882 }
883 n = insn->inputs_count;
884 insns_count += ir_insn_inputs_to_len(n);
885 i = _next[i];
886 insn = &ctx->ir_base[i];
887 if (bb->flags & (IR_BB_HAS_PHI|IR_BB_HAS_PI|IR_BB_HAS_PARAM|IR_BB_HAS_VAR)) {
888 /* Schedule PARAM, VAR, PI */
889 while (insn->op == IR_PARAM || insn->op == IR_VAR || insn->op == IR_PI) {
890 _xlat[i] = insns_count;
891 insns_count += 1;
892 i = _next[i];
893 insn = &ctx->ir_base[i];
894 }
895 /* Schedule PHIs */
896 while (insn->op == IR_PHI) {
897 ir_ref j, *p, input;
898
899 _xlat[i] = insns_count;
900 /* Reuse "n" from MERGE and skip first input */
901 insns_count += ir_insn_inputs_to_len(n + 1);
902 for (j = n, p = insn->ops + 2; j > 0; p++, j--) {
903 input = *p;
904 if (input < IR_TRUE) {
905 consts_count += ir_count_constant(_xlat, input);
906 }
907 }
908 i = _next[i];
909 insn = &ctx->ir_base[i];
910 }
911 }
912 if (bb->successors_count > 1) {
913 ir_ref input, j = bb->end;
914 ir_insn *end = &ctx->ir_base[j];
915
916 if (end->op == IR_IF) {
917 /* Move condition closer to IF */
918 input = end->op2;
919 if (input > 0 && _blocks[input] == b && !_xlat[input] && _prev[j] != input) {
920 if (input == i) {
921 i = _next[i];
922 insn = &ctx->ir_base[i];
923 }
924 /* remove "input" */
925 _prev[_next[input]] = _prev[input];
926 _next[_prev[input]] = _next[input];
927 /* insert before "j" */
928 _prev[input] = _prev[j];
929 _next[input] = j;
930 _next[_prev[j]] = input;
931 _prev[j] = input;
932 }
933 }
934 }
935 while (i != bb->end) {
936 ir_ref n, j, *p, input;
937
938 restart:
939 n = insn->inputs_count;
940 for (j = n, p = insn->ops + 1; j > 0; p++, j--) {
941 input = *p;
942 if (!_xlat[input]) {
943 /* input is not scheduled yet */
944 if (input > 0) {
945 if (_blocks[input] == b) {
946 /* "input" should be before "i" to satisfy dependency */
947 #ifdef IR_DEBUG
948 if (ctx->flags & IR_DEBUG_SCHEDULE) {
949 fprintf(stderr, "Wrong dependency %d:%d -> %d\n", b, input, i);
950 }
951 #endif
952 /* remove "input" */
953 _prev[_next[input]] = _prev[input];
954 _next[_prev[input]] = _next[input];
955 /* insert before "i" */
956 _prev[input] = _prev[i];
957 _next[input] = i;
958 _next[_prev[i]] = input;
959 _prev[i] = input;
960 /* restart from "input" */
961 i = input;
962 insn = &ctx->ir_base[i];
963 goto restart;
964 }
965 } else if (input < IR_TRUE) {
966 consts_count += ir_count_constant(_xlat, input);
967 }
968 }
969 }
970 _xlat[i] = insns_count;
971 insns_count += ir_insn_inputs_to_len(n);
972 i = _next[i];
973 insn = &ctx->ir_base[i];
974 }
975 /* Schedule BB end */
976 _xlat[i] = bb->end = insns_count;
977 insns_count++;
978 if (IR_INPUT_EDGES_COUNT(ir_op_flags[insn->op]) == 2) {
979 if (insn->op2 < IR_TRUE) {
980 consts_count += ir_count_constant(_xlat, insn->op2);
981 }
982 }
983 }
984
985 #ifdef IR_DEBUG
986 if (ctx->flags & IR_DEBUG_SCHEDULE) {
987 fprintf(stderr, "After Schedule\n");
988 for (i = 1; i != 0; i = _next[i]) {
989 fprintf(stderr, "%d -> %d\n", i, _blocks[i]);
990 }
991 }
992 #endif
993
994 #if 1
995 /* Check if scheduling didn't make any modifications */
996 if (consts_count == ctx->consts_count && insns_count == ctx->insns_count) {
997 bool changed = 0;
998
999 for (i = 1; i != 0; i = _next[i]) {
1000 if (_xlat[i] != i) {
1001 changed = 1;
1002 break;
1003 }
1004 }
1005 if (!changed) {
1006 _xlat -= ctx->consts_count;
1007 ir_mem_free(_xlat);
1008 ir_mem_free(_next);
1009
1010 ctx->prev_ref = _prev;
1011 ctx->flags2 |= IR_LINEAR;
1012 ir_truncate(ctx);
1013
1014 return 1;
1015 }
1016 }
1017 #endif
1018
1019 ir_mem_free(_prev);
1020
1021 ir_init(&new_ctx, ctx->flags, consts_count, insns_count);
1022 new_ctx.insns_count = insns_count;
1023 new_ctx.flags2 = ctx->flags2;
1024 new_ctx.ret_type = ctx->ret_type;
1025 new_ctx.mflags = ctx->mflags;
1026 new_ctx.spill_base = ctx->spill_base;
1027 new_ctx.fixed_stack_red_zone = ctx->fixed_stack_red_zone;
1028 new_ctx.fixed_stack_frame_size = ctx->fixed_stack_frame_size;
1029 new_ctx.fixed_call_stack_size = ctx->fixed_call_stack_size;
1030 new_ctx.fixed_regset = ctx->fixed_regset;
1031 new_ctx.fixed_save_regset = ctx->fixed_save_regset;
1032 new_ctx.entries_count = ctx->entries_count;
1033 #if defined(IR_TARGET_AARCH64)
1034 new_ctx.deoptimization_exits = ctx->deoptimization_exits;
1035 new_ctx.get_exit_addr = ctx->get_exit_addr;
1036 new_ctx.get_veneer = ctx->get_veneer;
1037 new_ctx.set_veneer = ctx->set_veneer;
1038 #endif
1039 new_ctx.loader = ctx->loader;
1040
1041 /* Copy constants */
1042 if (consts_count == ctx->consts_count) {
1043 new_ctx.consts_count = consts_count;
1044 ref = 1 - consts_count;
1045 insn = &ctx->ir_base[ref];
1046 new_insn = &new_ctx.ir_base[ref];
1047
1048 memcpy(new_insn, insn, sizeof(ir_insn) * (IR_TRUE - ref));
1049 if (ctx->strtab.data) {
1050 while (ref != IR_TRUE) {
1051 if (new_insn->op == IR_FUNC_ADDR) {
1052 if (new_insn->proto) {
1053 size_t len;
1054 const char *proto = ir_get_strl(ctx, new_insn->proto, &len);
1055 new_insn->proto = ir_strl(&new_ctx, proto, len);
1056 }
1057 } else if (new_insn->op == IR_FUNC) {
1058 new_insn->val.u64 = ir_str(&new_ctx, ir_get_str(ctx, new_insn->val.name));
1059 if (new_insn->proto) {
1060 size_t len;
1061 const char *proto = ir_get_strl(ctx, new_insn->proto, &len);
1062 new_insn->proto = ir_strl(&new_ctx, proto, len);
1063 }
1064 } else if (new_insn->op == IR_SYM || new_insn->op == IR_STR) {
1065 new_insn->val.u64 = ir_str(&new_ctx, ir_get_str(ctx, new_insn->val.name));
1066 }
1067 new_insn++;
1068 ref++;
1069 }
1070 }
1071 } else {
1072 new_ref = -new_ctx.consts_count;
1073 new_insn = &new_ctx.ir_base[new_ref];
1074 for (ref = IR_TRUE - 1, insn = &ctx->ir_base[ref]; ref > -ctx->consts_count; insn--, ref--) {
1075 if (!_xlat[ref]) {
1076 continue;
1077 }
1078 new_insn->optx = insn->optx;
1079 new_insn->prev_const = 0;
1080 if (insn->op == IR_FUNC_ADDR) {
1081 new_insn->val.u64 = insn->val.u64;
1082 if (insn->proto) {
1083 size_t len;
1084 const char *proto = ir_get_strl(ctx, insn->proto, &len);
1085 new_insn->proto = ir_strl(&new_ctx, proto, len);
1086 } else {
1087 new_insn->proto = 0;
1088 }
1089 } else if (insn->op == IR_FUNC) {
1090 new_insn->val.u64 = ir_str(&new_ctx, ir_get_str(ctx, insn->val.name));
1091 if (insn->proto) {
1092 size_t len;
1093 const char *proto = ir_get_strl(ctx, insn->proto, &len);
1094 new_insn->proto = ir_strl(&new_ctx, proto, len);
1095 } else {
1096 new_insn->proto = 0;
1097 }
1098 } else if (insn->op == IR_SYM || insn->op == IR_STR) {
1099 new_insn->val.u64 = ir_str(&new_ctx, ir_get_str(ctx, insn->val.name));
1100 } else {
1101 new_insn->val.u64 = insn->val.u64;
1102 }
1103 _xlat[ref] = new_ref;
1104 new_ref--;
1105 new_insn--;
1106 }
1107 new_ctx.consts_count = -new_ref;
1108 }
1109
1110 new_ctx.cfg_map = ir_mem_calloc(ctx->insns_count, sizeof(uint32_t));
1111 new_ctx.prev_ref = _prev = ir_mem_malloc(insns_count * sizeof(ir_ref));
1112 new_ctx.use_lists = lists = ir_mem_malloc(insns_count * sizeof(ir_use_list));
1113 new_ctx.use_edges = edges = ir_mem_malloc(ctx->use_edges_count * sizeof(ir_ref));
1114
1115 /* Copy instructions, use lists and use edges */
1116 prev_ref = 0;
1117 use_edges_count = 0;
1118 for (i = 1; i != 0; i = _next[i]) {
1119 new_ref = _xlat[i];
1120 new_ctx.cfg_map[new_ref] = _blocks[i];
1121 _prev[new_ref] = prev_ref;
1122 prev_ref = new_ref;
1123
1124 use_list = &ctx->use_lists[i];
1125 n = use_list->count;
1126 k = 0;
1127 if (n == 1) {
1128 ref = ctx->use_edges[use_list->refs];
1129 if (_xlat[ref]) {
1130 *edges = _xlat[ref];
1131 edges++;
1132 k = 1;
1133 }
1134 } else {
1135 p = &ctx->use_edges[use_list->refs];
1136 while (n--) {
1137 ref = *p;
1138 if (_xlat[ref]) {
1139 *edges = _xlat[ref];
1140 edges++;
1141 k++;
1142 }
1143 p++;
1144 }
1145 }
1146 new_list = &lists[new_ref];
1147 new_list->refs = use_edges_count;
1148 use_edges_count += k;
1149 new_list->count = k;
1150
1151 insn = &ctx->ir_base[i];
1152 new_insn = &new_ctx.ir_base[new_ref];
1153
1154 new_insn->optx = insn->optx;
1155 n = new_insn->inputs_count;
1156 switch (n) {
1157 case 0:
1158 new_insn->op1 = insn->op1;
1159 new_insn->op2 = insn->op2;
1160 new_insn->op3 = insn->op3;
1161 break;
1162 case 1:
1163 new_insn->op1 = _xlat[insn->op1];
1164 if (new_insn->op == IR_PARAM || insn->op == IR_VAR) {
1165 new_insn->op2 = ir_str(&new_ctx, ir_get_str(ctx, insn->op2));
1166 } else if (new_insn->op == IR_PROTO) {
1167 size_t len;
1168 const char *proto = ir_get_strl(ctx, insn->op2, &len);
1169 new_insn->op2 = ir_strl(&new_ctx, proto, len);
1170 } else {
1171 new_insn->op2 = insn->op2;
1172 }
1173 new_insn->op3 = insn->op3;
1174 break;
1175 case 2:
1176 new_insn->op1 = _xlat[insn->op1];
1177 new_insn->op2 = _xlat[insn->op2];
1178 new_insn->op3 = insn->op3;
1179 #if IR_SCHEDULE_SWAP_OPS
1180 /* Swap operands according to folding rules */
1181 if (new_insn->op1 < new_insn->op2) {
1182 switch (new_insn->op) {
1183 case IR_EQ:
1184 case IR_NE:
1185 case IR_ADD:
1186 case IR_MUL:
1187 case IR_ADD_OV:
1188 case IR_MUL_OV:
1189 case IR_OR:
1190 case IR_AND:
1191 case IR_XOR:
1192 case IR_MIN:
1193 case IR_MAX:
1194 SWAP_REFS(new_insn->op1, new_insn->op2);
1195 break;
1196 case IR_LT:
1197 case IR_GE:
1198 case IR_LE:
1199 case IR_GT:
1200 case IR_ULT:
1201 case IR_UGE:
1202 case IR_ULE:
1203 case IR_UGT:
1204 SWAP_REFS(new_insn->op1, new_insn->op2);
1205 new_insn->op ^= 3; /* [U]LT <-> [U]GT, [U]LE <-> [U]GE */
1206 break;
1207 }
1208 }
1209 #endif
1210 break;
1211 case 3:
1212 new_insn->op1 = _xlat[insn->op1];
1213 new_insn->op2 = _xlat[insn->op2];
1214 new_insn->op3 = _xlat[insn->op3];
1215 break;
1216 default:
1217 for (j = n, p = insn->ops + 1, q = new_insn->ops + 1; j > 0; p++, q++, j--) {
1218 *q = _xlat[*p];
1219 }
1220 break;
1221 }
1222 }
1223
1224 /* Update list of terminators (IR_OPND_CONTROL_REF) */
1225 insn = &new_ctx.ir_base[1];
1226 ref = insn->op1;
1227 if (ref) {
1228 insn->op1 = ref = _xlat[ref];
1229 while (1) {
1230 insn = &new_ctx.ir_base[ref];
1231 ref = insn->op3;
1232 if (!ref) {
1233 break;
1234 }
1235 insn->op3 = ref = _xlat[ref];
1236 }
1237 }
1238
1239 IR_ASSERT(ctx->use_edges_count >= use_edges_count);
1240 new_ctx.use_edges_count = use_edges_count;
1241 new_ctx.use_edges = ir_mem_realloc(new_ctx.use_edges, use_edges_count * sizeof(ir_ref));
1242
1243 if (ctx->binding) {
1244 ir_xlat_binding(ctx, _xlat);
1245 new_ctx.binding = ctx->binding;
1246 ctx->binding = NULL;
1247 }
1248
1249 _xlat -= ctx->consts_count;
1250 ir_mem_free(_xlat);
1251
1252 new_ctx.cfg_blocks_count = ctx->cfg_blocks_count;
1253 new_ctx.cfg_edges_count = ctx->cfg_edges_count;
1254 new_ctx.cfg_blocks = ctx->cfg_blocks;
1255 new_ctx.cfg_edges = ctx->cfg_edges;
1256 ctx->cfg_blocks = NULL;
1257 ctx->cfg_edges = NULL;
1258
1259 ir_free(ctx);
1260 IR_ASSERT(new_ctx.consts_count == new_ctx.consts_limit);
1261 IR_ASSERT(new_ctx.insns_count == new_ctx.insns_limit);
1262 memcpy(ctx, &new_ctx, sizeof(ir_ctx));
1263 ctx->flags2 |= IR_LINEAR;
1264
1265 ir_mem_free(_next);
1266
1267 return 1;
1268 }
1269
ir_build_prev_refs(ir_ctx * ctx)1270 void ir_build_prev_refs(ir_ctx *ctx)
1271 {
1272 uint32_t b;
1273 ir_block *bb;
1274 ir_ref i, n, prev;
1275 ir_insn *insn;
1276
1277 ctx->prev_ref = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref));
1278 prev = 0;
1279 for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) {
1280 IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
1281 for (i = bb->start, insn = ctx->ir_base + i; i < bb->end;) {
1282 ctx->prev_ref[i] = prev;
1283 n = ir_insn_len(insn);
1284 prev = i;
1285 i += n;
1286 insn += n;
1287 }
1288 ctx->prev_ref[i] = prev;
1289 }
1290 }
1291