1 /*
2 * IR - Lightweight JIT Compilation Framework
3 * (CFG - Control Flow Graph)
4 * Copyright (C) 2022 Zend by Perforce.
5 * Authors: Dmitry Stogov <dmitry@php.net>
6 */
7
8 #include "ir.h"
9 #include "ir_private.h"
10
_ir_add_successors(const ir_ctx * ctx,ir_ref ref,ir_worklist * worklist)11 IR_ALWAYS_INLINE void _ir_add_successors(const ir_ctx *ctx, ir_ref ref, ir_worklist *worklist)
12 {
13 ir_use_list *use_list = &ctx->use_lists[ref];
14 ir_ref *p, use, n = use_list->count;
15
16 if (n < 2) {
17 if (n == 1) {
18 use = ctx->use_edges[use_list->refs];
19 IR_ASSERT(ir_op_flags[ctx->ir_base[use].op] & IR_OP_FLAG_CONTROL);
20 ir_worklist_push(worklist, use);
21 }
22 } else {
23 p = &ctx->use_edges[use_list->refs];
24 if (n == 2) {
25 use = *p;
26 IR_ASSERT(ir_op_flags[ctx->ir_base[use].op] & IR_OP_FLAG_CONTROL);
27 ir_worklist_push(worklist, use);
28 use = *(p + 1);
29 IR_ASSERT(ir_op_flags[ctx->ir_base[use].op] & IR_OP_FLAG_CONTROL);
30 ir_worklist_push(worklist, use);
31 } else {
32 for (; n > 0; p++, n--) {
33 use = *p;
34 IR_ASSERT(ir_op_flags[ctx->ir_base[use].op] & IR_OP_FLAG_CONTROL);
35 ir_worklist_push(worklist, use);
36 }
37 }
38 }
39 }
40
_ir_add_predecessors(const ir_insn * insn,ir_worklist * worklist)41 IR_ALWAYS_INLINE void _ir_add_predecessors(const ir_insn *insn, ir_worklist *worklist)
42 {
43 ir_ref n, ref;
44 const ir_ref *p;
45
46 if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN) {
47 n = insn->inputs_count;
48 for (p = insn->ops + 1; n > 0; p++, n--) {
49 ref = *p;
50 IR_ASSERT(ref);
51 ir_worklist_push(worklist, ref);
52 }
53 } else if (insn->op != IR_START) {
54 if (EXPECTED(insn->op1)) {
55 ir_worklist_push(worklist, insn->op1);
56 }
57 }
58 }
59
ir_build_cfg(ir_ctx * ctx)60 int ir_build_cfg(ir_ctx *ctx)
61 {
62 ir_ref n, *p, ref, start, end, next;
63 uint32_t b;
64 ir_insn *insn;
65 ir_worklist worklist;
66 uint32_t bb_init_falgs;
67 uint32_t count, bb_count = 0;
68 uint32_t edges_count = 0;
69 ir_block *blocks, *bb;
70 uint32_t *_blocks, *edges;
71 ir_use_list *use_list;
72 uint32_t len = ir_bitset_len(ctx->insns_count);
73 ir_bitset bb_starts = ir_mem_calloc(len * 2, IR_BITSET_BITS / 8);
74 ir_bitset bb_leaks = bb_starts + len;
75 _blocks = ir_mem_calloc(ctx->insns_count, sizeof(uint32_t));
76 ir_worklist_init(&worklist, ctx->insns_count);
77
78 /* First try to perform backward DFS search starting from "stop" nodes */
79
80 /* Add all "stop" nodes */
81 ref = ctx->ir_base[1].op1;
82 while (ref) {
83 ir_worklist_push(&worklist, ref);
84 ref = ctx->ir_base[ref].op3;
85 }
86
87 while (ir_worklist_len(&worklist)) {
88 ref = ir_worklist_pop(&worklist);
89 insn = &ctx->ir_base[ref];
90
91 if (insn->op == IR_NOP) {
92 continue;
93 }
94 IR_ASSERT(IR_IS_BB_END(insn->op));
95 /* Remember BB end */
96 end = ref;
97 /* Some successors of IF and SWITCH nodes may be inaccessible by backward DFS */
98 use_list = &ctx->use_lists[end];
99 n = use_list->count;
100 if (n > 1) {
101 for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
102 /* Remember possible inaccessible successors */
103 ir_bitset_incl(bb_leaks, *p);
104 }
105 }
106 /* Skip control nodes untill BB start */
107 ref = insn->op1;
108 while (1) {
109 insn = &ctx->ir_base[ref];
110 if (IR_IS_BB_START(insn->op)) {
111 break;
112 }
113 ref = insn->op1; // follow connected control blocks untill BB start
114 }
115 /* Mark BB Start */
116 bb_count++;
117 _blocks[ref] = end;
118 ir_bitset_incl(bb_starts, ref);
119 /* Add predecessors */
120 _ir_add_predecessors(insn, &worklist);
121 }
122
123 /* Backward DFS way miss some branches ending by infinite loops. */
124 /* Try forward DFS. (in most cases all nodes are already proceed). */
125
126 /* START node may be inaccessible from "stop" nodes */
127 ir_bitset_incl(bb_leaks, 1);
128
129 /* Add not processed START and successor of IF and SWITCH */
130 IR_BITSET_FOREACH_DIFFERENCE(bb_leaks, bb_starts, len, start) {
131 ir_worklist_push(&worklist, start);
132 } IR_BITSET_FOREACH_END();
133
134 if (ir_worklist_len(&worklist)) {
135 ir_bitset_union(worklist.visited, bb_starts, len);
136 do {
137 ref = ir_worklist_pop(&worklist);
138 insn = &ctx->ir_base[ref];
139
140 if (insn->op == IR_NOP) {
141 continue;
142 }
143 IR_ASSERT(IR_IS_BB_START(insn->op));
144 /* Remember BB start */
145 start = ref;
146 /* Skip control nodes untill BB end */
147 while (1) {
148 use_list = &ctx->use_lists[ref];
149 n = use_list->count;
150 next = IR_UNUSED;
151 for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
152 next = *p;
153 insn = &ctx->ir_base[next];
154 if ((ir_op_flags[insn->op] & IR_OP_FLAG_CONTROL) && insn->op1 == ref) {
155 break;
156 }
157 }
158 IR_ASSERT(next != IR_UNUSED);
159 ref = next;
160 if (IR_IS_BB_END(insn->op)) {
161 break;
162 }
163 }
164 /* Mark BB Start */
165 bb_count++;
166 _blocks[start] = ref;
167 ir_bitset_incl(bb_starts, start);
168 /* Add successors */
169 _ir_add_successors(ctx, ref, &worklist);
170 } while (ir_worklist_len(&worklist));
171 }
172
173 IR_ASSERT(bb_count > 0);
174
175 /* Create array of basic blocks and count successor/predecessors edges for each BB */
176 blocks = ir_mem_malloc((bb_count + 1) * sizeof(ir_block));
177 b = 1;
178 bb = blocks + 1;
179 count = 0;
180 /* SCCP already removed UNREACHABKE blocks, otherwise all blocks are marked as UNREACHABLE first */
181 bb_init_falgs = (ctx->flags2 & IR_CFG_REACHABLE) ? 0 : IR_BB_UNREACHABLE;
182 IR_BITSET_FOREACH(bb_starts, len, start) {
183 insn = &ctx->ir_base[start];
184 if (insn->op == IR_NOP) {
185 _blocks[start] = 0;
186 continue;
187 }
188 end = _blocks[start];
189 _blocks[start] = b;
190 _blocks[end] = b;
191 IR_ASSERT(IR_IS_BB_START(insn->op));
192 IR_ASSERT(end > start);
193 bb->start = start;
194 bb->end = end;
195 bb->successors = count;
196 count += ctx->use_lists[end].count;
197 bb->successors_count = 0;
198 bb->predecessors = count;
199 bb->dom_parent = 0;
200 bb->dom_depth = 0;
201 bb->dom_child = 0;
202 bb->dom_next_child = 0;
203 bb->loop_header = 0;
204 bb->loop_depth = 0;
205 if (insn->op == IR_START) {
206 bb->flags = IR_BB_START;
207 bb->predecessors_count = 0;
208 } else {
209 bb->flags = bb_init_falgs;
210 if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN) {
211 n = insn->inputs_count;
212 bb->predecessors_count = n;
213 edges_count += n;
214 count += n;
215 } else if (EXPECTED(insn->op1)) {
216 if (insn->op == IR_ENTRY) {
217 bb->flags |= IR_BB_ENTRY;
218 ctx->entries_count++;
219 }
220 bb->predecessors_count = 1;
221 edges_count++;
222 count++;
223 } else {
224 IR_ASSERT(insn->op == IR_BEGIN); /* start of unreachable block */
225 bb->predecessors_count = 0;
226 }
227 }
228 b++;
229 bb++;
230 } IR_BITSET_FOREACH_END();
231 bb_count = b - 1;
232 IR_ASSERT(count == edges_count * 2);
233 ir_mem_free(bb_starts);
234
235 /* Create an array of successor/predecessors control edges */
236 edges = ir_mem_malloc(edges_count * 2 * sizeof(uint32_t));
237 bb = blocks + 1;
238 for (b = 1; b <= bb_count; b++, bb++) {
239 insn = &ctx->ir_base[bb->start];
240 if (bb->predecessors_count > 1) {
241 uint32_t *q = edges + bb->predecessors;
242 n = insn->inputs_count;
243 for (p = insn->ops + 1; n > 0; p++, q++, n--) {
244 ref = *p;
245 IR_ASSERT(ref);
246 ir_ref pred_b = _blocks[ref];
247 ir_block *pred_bb = &blocks[pred_b];
248 *q = pred_b;
249 edges[pred_bb->successors + pred_bb->successors_count++] = b;
250 }
251 } else if (bb->predecessors_count == 1) {
252 ref = insn->op1;
253 IR_ASSERT(ref);
254 IR_ASSERT(IR_OPND_KIND(ir_op_flags[insn->op], 1) == IR_OPND_CONTROL);
255 ir_ref pred_b = _blocks[ref];
256 ir_block *pred_bb = &blocks[pred_b];
257 edges[bb->predecessors] = pred_b;
258 edges[pred_bb->successors + pred_bb->successors_count++] = b;
259 }
260 }
261
262 ctx->cfg_blocks_count = bb_count;
263 ctx->cfg_edges_count = edges_count * 2;
264 ctx->cfg_blocks = blocks;
265 ctx->cfg_edges = edges;
266 ctx->cfg_map = _blocks;
267
268 if (!(ctx->flags2 & IR_CFG_REACHABLE)) {
269 uint32_t reachable_count = 0;
270
271 /* Mark reachable blocks */
272 ir_worklist_clear(&worklist);
273 ir_worklist_push(&worklist, 1);
274 while (ir_worklist_len(&worklist) != 0) {
275 uint32_t *p;
276
277 reachable_count++;
278 b = ir_worklist_pop(&worklist);
279 bb = &blocks[b];
280 bb->flags &= ~IR_BB_UNREACHABLE;
281 n = bb->successors_count;
282 if (n > 1) {
283 for (p = edges + bb->successors; n > 0; p++, n--) {
284 ir_worklist_push(&worklist, *p);
285 }
286 } else if (n == 1) {
287 ir_worklist_push(&worklist, edges[bb->successors]);
288 }
289 }
290 if (reachable_count != ctx->cfg_blocks_count) {
291 ir_remove_unreachable_blocks(ctx);
292 }
293 }
294
295 ir_worklist_free(&worklist);
296
297 return 1;
298 }
299
ir_remove_predecessor(ir_ctx * ctx,ir_block * bb,uint32_t from)300 static void ir_remove_predecessor(ir_ctx *ctx, ir_block *bb, uint32_t from)
301 {
302 uint32_t i, *p, *q, n = 0;
303
304 p = q = &ctx->cfg_edges[bb->predecessors];
305 for (i = 0; i < bb->predecessors_count; i++, p++) {
306 if (*p != from) {
307 if (p != q) {
308 *q = *p;
309 }
310 q++;
311 n++;
312 }
313 }
314 IR_ASSERT(n != bb->predecessors_count);
315 bb->predecessors_count = n;
316 }
317
ir_remove_merge_input(ir_ctx * ctx,ir_ref merge,ir_ref from)318 static void ir_remove_merge_input(ir_ctx *ctx, ir_ref merge, ir_ref from)
319 {
320 ir_ref i, j, n, k, *p, use;
321 ir_insn *use_insn;
322 ir_use_list *use_list;
323 ir_bitset life_inputs;
324 ir_insn *insn = &ctx->ir_base[merge];
325
326 IR_ASSERT(insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN);
327 n = insn->inputs_count;
328 i = 1;
329 life_inputs = ir_bitset_malloc(n + 1);
330 for (j = 1; j <= n; j++) {
331 ir_ref input = ir_insn_op(insn, j);
332
333 if (input != from) {
334 if (i != j) {
335 ir_insn_set_op(insn, i, input);
336 }
337 ir_bitset_incl(life_inputs, j);
338 i++;
339 }
340 }
341 i--;
342 if (i == 1) {
343 insn->op = IR_BEGIN;
344 insn->inputs_count = 1;
345 use_list = &ctx->use_lists[merge];
346 if (use_list->count > 1) {
347 for (k = 0, p = &ctx->use_edges[use_list->refs]; k < use_list->count; k++, p++) {
348 use = *p;
349 use_insn = &ctx->ir_base[use];
350 if (use_insn->op == IR_PHI) {
351 /* Convert PHI to COPY */
352 i = 2;
353 for (j = 2; j <= n; j++) {
354 ir_ref input = ir_insn_op(use_insn, j);
355
356 if (ir_bitset_in(life_inputs, j - 1)) {
357 use_insn->op1 = ir_insn_op(use_insn, j);
358 } else if (input > 0) {
359 ir_use_list_remove_all(ctx, input, use);
360 }
361 }
362 use_insn->op = IR_COPY;
363 use_insn->op2 = IR_UNUSED;
364 use_insn->op3 = IR_UNUSED;
365 ir_use_list_remove_all(ctx, merge, use);
366 }
367 }
368 }
369 } else {
370 insn->inputs_count = i;
371
372 n++;
373 use_list = &ctx->use_lists[merge];
374 if (use_list->count > 1) {
375 for (k = 0, p = &ctx->use_edges[use_list->refs]; k < use_list->count; k++, p++) {
376 use = *p;
377 use_insn = &ctx->ir_base[use];
378 if (use_insn->op == IR_PHI) {
379 i = 2;
380 for (j = 2; j <= n; j++) {
381 ir_ref input = ir_insn_op(use_insn, j);
382
383 if (ir_bitset_in(life_inputs, j - 1)) {
384 IR_ASSERT(input);
385 if (i != j) {
386 ir_insn_set_op(use_insn, i, input);
387 }
388 i++;
389 } else if (input > 0) {
390 ir_use_list_remove_all(ctx, input, use);
391 }
392 }
393 }
394 }
395 }
396 }
397 ir_mem_free(life_inputs);
398 ir_use_list_remove_all(ctx, from, merge);
399 }
400
401 /* CFG constructed after SCCP pass doesn't have unreachable BBs, otherwise they should be removed */
ir_remove_unreachable_blocks(ir_ctx * ctx)402 int ir_remove_unreachable_blocks(ir_ctx *ctx)
403 {
404 uint32_t b, *p, i;
405 uint32_t unreachable_count = 0;
406 uint32_t bb_count = ctx->cfg_blocks_count;
407 ir_block *bb = ctx->cfg_blocks + 1;
408
409 for (b = 1; b <= bb_count; b++, bb++) {
410 if (bb->flags & IR_BB_UNREACHABLE) {
411 #if 0
412 do {if (!unreachable_count) ir_dump_cfg(ctx, stderr);} while(0);
413 #endif
414 if (bb->successors_count) {
415 for (i = 0, p = &ctx->cfg_edges[bb->successors]; i < bb->successors_count; i++, p++) {
416 ir_block *succ_bb = &ctx->cfg_blocks[*p];
417
418 if (!(succ_bb->flags & IR_BB_UNREACHABLE)) {
419 ir_remove_predecessor(ctx, succ_bb, b);
420 ir_remove_merge_input(ctx, succ_bb->start, bb->end);
421 }
422 }
423 } else {
424 ir_ref prev, ref = bb->end;
425 ir_insn *insn = &ctx->ir_base[ref];
426
427 IR_ASSERT(ir_op_flags[insn->op] & IR_OP_FLAG_TERMINATOR);
428 /* remove from terminators list */
429 prev = ctx->ir_base[1].op1;
430 if (prev == ref) {
431 ctx->ir_base[1].op1 = insn->op3;
432 } else {
433 while (prev) {
434 if (ctx->ir_base[prev].op3 == ref) {
435 ctx->ir_base[prev].op3 = insn->op3;
436 break;
437 }
438 prev = ctx->ir_base[prev].op3;
439 }
440 }
441 }
442 ctx->cfg_map[bb->start] = 0;
443 ctx->cfg_map[bb->end] = 0;
444 unreachable_count++;
445 }
446 }
447
448 if (unreachable_count) {
449 ir_block *dst_bb;
450 uint32_t n = 1;
451 uint32_t *edges;
452
453 dst_bb = bb = ctx->cfg_blocks + 1;
454 for (b = 1; b <= bb_count; b++, bb++) {
455 if (!(bb->flags & IR_BB_UNREACHABLE)) {
456 if (dst_bb != bb) {
457 memcpy(dst_bb, bb, sizeof(ir_block));
458 ctx->cfg_map[dst_bb->start] = n;
459 ctx->cfg_map[dst_bb->end] = n;
460 }
461 dst_bb->successors_count = 0;
462 dst_bb++;
463 n++;
464 }
465 }
466 ctx->cfg_blocks_count = bb_count = n - 1;
467
468 /* Rebuild successor/predecessors control edges */
469 edges = ctx->cfg_edges;
470 bb = ctx->cfg_blocks + 1;
471 for (b = 1; b <= bb_count; b++, bb++) {
472 ir_insn *insn = &ctx->ir_base[bb->start];
473 ir_ref *p, ref;
474
475 n = bb->predecessors_count;
476 if (n > 1) {
477 uint32_t *q = edges + bb->predecessors;
478
479 IR_ASSERT(n == insn->inputs_count);
480 for (p = insn->ops + 1; n > 0; p++, q++, n--) {
481 ref = *p;
482 IR_ASSERT(ref);
483 ir_ref pred_b = ctx->cfg_map[ref];
484 ir_block *pred_bb = &ctx->cfg_blocks[pred_b];
485 *q = pred_b;
486 edges[pred_bb->successors + pred_bb->successors_count++] = b;
487 }
488 } else if (n == 1) {
489 ref = insn->op1;
490 IR_ASSERT(ref);
491 IR_ASSERT(IR_OPND_KIND(ir_op_flags[insn->op], 1) == IR_OPND_CONTROL);
492 ir_ref pred_b = ctx->cfg_map[ref];
493 ir_block *pred_bb = &ctx->cfg_blocks[pred_b];
494 edges[bb->predecessors] = pred_b;
495 edges[pred_bb->successors + pred_bb->successors_count++] = b;
496 }
497 }
498 }
499
500 return 1;
501 }
502
503 #if 0
504 static void compute_postnum(const ir_ctx *ctx, uint32_t *cur, uint32_t b)
505 {
506 uint32_t i, *p;
507 ir_block *bb = &ctx->cfg_blocks[b];
508
509 if (bb->postnum != 0) {
510 return;
511 }
512
513 if (bb->successors_count) {
514 bb->postnum = -1; /* Marker for "currently visiting" */
515 p = ctx->cfg_edges + bb->successors;
516 i = bb->successors_count;
517 do {
518 compute_postnum(ctx, cur, *p);
519 p++;
520 } while (--i);
521 }
522 bb->postnum = (*cur)++;
523 }
524
525 /* Computes dominator tree using algorithm from "A Simple, Fast Dominance Algorithm" by
526 * Cooper, Harvey and Kennedy. */
527 int ir_build_dominators_tree(ir_ctx *ctx)
528 {
529 uint32_t blocks_count, b, postnum;
530 ir_block *blocks, *bb;
531 uint32_t *edges;
532 bool changed;
533
534 ctx->flags2 &= ~IR_NO_LOOPS;
535
536 postnum = 1;
537 compute_postnum(ctx, &postnum, 1);
538
539 /* Find immediate dominators */
540 blocks = ctx->cfg_blocks;
541 edges = ctx->cfg_edges;
542 blocks_count = ctx->cfg_blocks_count;
543 blocks[1].idom = 1;
544 do {
545 changed = 0;
546 /* Iterating in Reverse Post Order */
547 for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) {
548 IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
549 if (bb->predecessors_count == 1) {
550 uint32_t pred_b = edges[bb->predecessors];
551
552 if (blocks[pred_b].idom <= 0) {
553 //IR_ASSERT("Wrong blocks order: BB is before its single predecessor");
554 } else if (bb->idom != pred_b) {
555 bb->idom = pred_b;
556 changed = 1;
557 }
558 } else if (bb->predecessors_count) {
559 uint32_t idom = 0;
560 uint32_t k = bb->predecessors_count;
561 uint32_t *p = edges + bb->predecessors;
562
563 do {
564 uint32_t pred_b = *p;
565 ir_block *pred_bb = &blocks[pred_b];
566 ir_block *idom_bb;
567
568 if (pred_bb->idom > 0) {
569 idom = pred_b;
570 idom_bb = &blocks[idom];
571
572 while (--k > 0) {
573 pred_b = *(++p);
574 pred_bb = &blocks[pred_b];
575 if (pred_bb->idom > 0) {
576 while (idom != pred_b) {
577 while (pred_bb->postnum < idom_bb->postnum) {
578 pred_b = pred_bb->idom;
579 pred_bb = &blocks[pred_b];
580 }
581 while (idom_bb->postnum < pred_bb->postnum) {
582 idom = idom_bb->idom;
583 idom_bb = &blocks[idom];
584 }
585 }
586 }
587 }
588
589 if (bb->idom != idom) {
590 bb->idom = idom;
591 changed = 1;
592 }
593 break;
594 }
595 p++;
596 } while (--k > 0);
597 }
598 }
599 } while (changed);
600 blocks[1].idom = 0;
601 blocks[1].dom_depth = 0;
602
603 /* Construct dominators tree */
604 for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) {
605 IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
606 if (bb->idom > 0) {
607 ir_block *idom_bb = &blocks[bb->idom];
608
609 bb->dom_depth = idom_bb->dom_depth + 1;
610 /* Sort by block number to traverse children in pre-order */
611 if (idom_bb->dom_child == 0) {
612 idom_bb->dom_child = b;
613 } else if (b < idom_bb->dom_child) {
614 bb->dom_next_child = idom_bb->dom_child;
615 idom_bb->dom_child = b;
616 } else {
617 int child = idom_bb->dom_child;
618 ir_block *child_bb = &blocks[child];
619
620 while (child_bb->dom_next_child > 0 && b > child_bb->dom_next_child) {
621 child = child_bb->dom_next_child;
622 child_bb = &blocks[child];
623 }
624 bb->dom_next_child = child_bb->dom_next_child;
625 child_bb->dom_next_child = b;
626 }
627 }
628 }
629
630 return 1;
631 }
632 #else
633 /* A single pass modification of "A Simple, Fast Dominance Algorithm" by
634 * Cooper, Harvey and Kennedy, that relays on IR block ordering.
635 * It may fallback to the general slow fixed-point algorithm. */
636 static int ir_build_dominators_tree_iterative(ir_ctx *ctx);
ir_build_dominators_tree(ir_ctx * ctx)637 int ir_build_dominators_tree(ir_ctx *ctx)
638 {
639 uint32_t blocks_count, b;
640 ir_block *blocks, *bb;
641 uint32_t *edges;
642 ir_list worklist;
643
644 ir_list_init(&worklist, ctx->cfg_blocks_count / 2);
645
646 ctx->flags2 |= IR_NO_LOOPS;
647
648 /* Find immediate dominators */
649 blocks = ctx->cfg_blocks;
650 edges = ctx->cfg_edges;
651 blocks_count = ctx->cfg_blocks_count;
652 blocks[1].idom = 1;
653 blocks[1].dom_depth = 0;
654
655 /* Iterating in Reverse Post Order */
656 for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) {
657 IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
658 IR_ASSERT(bb->predecessors_count > 0);
659 uint32_t k = bb->predecessors_count;
660 uint32_t *p = edges + bb->predecessors;
661 uint32_t idom = *p;
662 ir_block *idom_bb;
663
664 if (UNEXPECTED(idom > b)) {
665 /* In rare cases, LOOP_BEGIN.op1 may be a back-edge. Skip back-edges. */
666 ctx->flags2 &= ~IR_NO_LOOPS;
667 IR_ASSERT(k > 1 && "Wrong blocks order: BB is before its single predecessor");
668 ir_list_push(&worklist, idom);
669 while (1) {
670 k--;
671 p++;
672 idom = *p;
673 if (idom < b) {
674 break;
675 }
676 IR_ASSERT(k > 0);
677 ir_list_push(&worklist, idom);
678 }
679 }
680 IR_ASSERT(blocks[idom].idom > 0);
681 IR_ASSERT(k != 0);
682
683 while (--k > 0) {
684 uint32_t pred_b = *(++p);
685
686 if (pred_b < b) {
687 IR_ASSERT(blocks[pred_b].idom > 0);
688 while (idom != pred_b) {
689 while (pred_b > idom) {
690 pred_b = blocks[pred_b].idom;
691 }
692 while (idom > pred_b) {
693 idom = blocks[idom].idom;
694 }
695 }
696 } else {
697 ctx->flags2 &= ~IR_NO_LOOPS;
698 IR_ASSERT(bb->predecessors_count > 1);
699 ir_list_push(&worklist, pred_b);
700 }
701 }
702 bb->idom = idom;
703 idom_bb = &blocks[idom];
704
705 bb->dom_depth = idom_bb->dom_depth + 1;
706 /* Sort by block number to traverse children in pre-order */
707 if (idom_bb->dom_child == 0) {
708 idom_bb->dom_child = b;
709 } else if (b < idom_bb->dom_child) {
710 bb->dom_next_child = idom_bb->dom_child;
711 idom_bb->dom_child = b;
712 } else {
713 int child = idom_bb->dom_child;
714 ir_block *child_bb = &blocks[child];
715
716 while (child_bb->dom_next_child > 0 && b > child_bb->dom_next_child) {
717 child = child_bb->dom_next_child;
718 child_bb = &blocks[child];
719 }
720 bb->dom_next_child = child_bb->dom_next_child;
721 child_bb->dom_next_child = b;
722 }
723 }
724
725 blocks[1].idom = 0;
726
727 if (ir_list_len(&worklist) != 0) {
728 uint32_t dom_depth;
729 uint32_t succ_b;
730 bool complete = 1;
731
732 /* Check if all the back-edges lead to the loop headers */
733 do {
734 b = ir_list_pop(&worklist);
735 bb = &blocks[b];
736 succ_b = ctx->cfg_edges[bb->successors];
737 if (bb->successors_count != 1) {
738 /* LOOP_END/END may be linked with the following ENTRY by a fake edge */
739 IR_ASSERT(bb->successors_count == 2);
740 if (blocks[succ_b].flags & IR_BB_ENTRY) {
741 succ_b = ctx->cfg_edges[bb->successors + 1];
742 } else {
743 IR_ASSERT(blocks[ctx->cfg_edges[bb->successors + 1]].flags & IR_BB_ENTRY);
744 }
745 }
746 dom_depth = blocks[succ_b].dom_depth;;
747 while (bb->dom_depth > dom_depth) {
748 b = bb->dom_parent;
749 bb = &blocks[b];
750 }
751 if (UNEXPECTED(b != succ_b)) {
752 complete = 0;
753 break;
754 }
755 } while (ir_list_len(&worklist) != 0);
756
757 if (UNEXPECTED(!complete)) {
758 ir_list_free(&worklist);
759 return ir_build_dominators_tree_iterative(ctx);
760 }
761 }
762
763 ir_list_free(&worklist);
764
765 return 1;
766 }
767
ir_build_dominators_tree_iterative(ir_ctx * ctx)768 static int ir_build_dominators_tree_iterative(ir_ctx *ctx)
769 {
770 bool changed;
771 uint32_t blocks_count, b;
772 ir_block *blocks, *bb;
773 uint32_t *edges;
774
775 /* Find immediate dominators */
776 blocks = ctx->cfg_blocks;
777 edges = ctx->cfg_edges;
778 blocks_count = ctx->cfg_blocks_count;
779
780 /* Clear the dominators tree, but keep already found dominators */
781 for (b = 0, bb = &blocks[0]; b <= blocks_count; b++, bb++) {
782 bb->dom_depth = 0;
783 bb->dom_child = 0;
784 bb->dom_next_child = 0;
785 }
786
787 /* Find immediate dominators by iterative fixed-point algorithm */
788 blocks[1].idom = 1;
789 do {
790 changed = 0;
791
792 for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) {
793 IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
794 IR_ASSERT(bb->predecessors_count > 0);
795 uint32_t k = bb->predecessors_count;
796 uint32_t *p = edges + bb->predecessors;
797 uint32_t idom = *p;
798
799 if (blocks[idom].idom == 0) {
800 while (1) {
801 k--;
802 p++;
803 idom = *p;
804 if (blocks[idom].idom > 0) {
805 break;
806 }
807 IR_ASSERT(k > 0);
808 }
809 }
810 IR_ASSERT(k != 0);
811 while (--k > 0) {
812 uint32_t pred_b = *(++p);
813
814 if (blocks[pred_b].idom > 0) {
815 IR_ASSERT(blocks[pred_b].idom > 0);
816 while (idom != pred_b) {
817 while (pred_b > idom) {
818 pred_b = blocks[pred_b].idom;
819 }
820 while (idom > pred_b) {
821 idom = blocks[idom].idom;
822 }
823 }
824 }
825 }
826 if (bb->idom != idom) {
827 bb->idom = idom;
828 changed = 1;
829 }
830 }
831 } while (changed);
832
833 /* Build dominators tree */
834 blocks[1].idom = 0;
835 blocks[1].dom_depth = 0;
836 for (b = 2, bb = &blocks[2]; b <= blocks_count; b++, bb++) {
837 uint32_t idom = bb->idom;
838 ir_block *idom_bb = &blocks[idom];
839
840 bb->dom_depth = idom_bb->dom_depth + 1;
841 /* Sort by block number to traverse children in pre-order */
842 if (idom_bb->dom_child == 0) {
843 idom_bb->dom_child = b;
844 } else if (b < idom_bb->dom_child) {
845 bb->dom_next_child = idom_bb->dom_child;
846 idom_bb->dom_child = b;
847 } else {
848 int child = idom_bb->dom_child;
849 ir_block *child_bb = &blocks[child];
850
851 while (child_bb->dom_next_child > 0 && b > child_bb->dom_next_child) {
852 child = child_bb->dom_next_child;
853 child_bb = &blocks[child];
854 }
855 bb->dom_next_child = child_bb->dom_next_child;
856 child_bb->dom_next_child = b;
857 }
858 }
859
860 return 1;
861 }
862 #endif
863
ir_dominates(const ir_block * blocks,uint32_t b1,uint32_t b2)864 static bool ir_dominates(const ir_block *blocks, uint32_t b1, uint32_t b2)
865 {
866 uint32_t b1_depth = blocks[b1].dom_depth;
867 const ir_block *bb2 = &blocks[b2];
868
869 while (bb2->dom_depth > b1_depth) {
870 b2 = bb2->dom_parent;
871 bb2 = &blocks[b2];
872 }
873 return b1 == b2;
874 }
875
ir_find_loops(ir_ctx * ctx)876 int ir_find_loops(ir_ctx *ctx)
877 {
878 uint32_t i, j, n, count;
879 uint32_t *entry_times, *exit_times, *sorted_blocks, time = 1;
880 ir_block *blocks = ctx->cfg_blocks;
881 uint32_t *edges = ctx->cfg_edges;
882 ir_worklist work;
883
884 if (ctx->flags2 & IR_NO_LOOPS) {
885 return 1;
886 }
887
888 /* We don't materialize the DJ spanning tree explicitly, as we are only interested in ancestor
889 * queries. These are implemented by checking entry/exit times of the DFS search. */
890 ir_worklist_init(&work, ctx->cfg_blocks_count + 1);
891 entry_times = ir_mem_malloc((ctx->cfg_blocks_count + 1) * 3 * sizeof(uint32_t));
892 exit_times = entry_times + ctx->cfg_blocks_count + 1;
893 sorted_blocks = exit_times + ctx->cfg_blocks_count + 1;
894
895 memset(entry_times, 0, (ctx->cfg_blocks_count + 1) * sizeof(uint32_t));
896
897 ir_worklist_push(&work, 1);
898 while (ir_worklist_len(&work)) {
899 ir_block *bb;
900 int child;
901
902 next:
903 i = ir_worklist_peek(&work);
904 if (!entry_times[i]) {
905 entry_times[i] = time++;
906 }
907
908 /* Visit blocks immediately dominated by i. */
909 bb = &blocks[i];
910 for (child = bb->dom_child; child > 0; child = blocks[child].dom_next_child) {
911 if (ir_worklist_push(&work, child)) {
912 goto next;
913 }
914 }
915
916 /* Visit join edges. */
917 if (bb->successors_count) {
918 uint32_t *p = edges + bb->successors;
919 for (j = 0; j < bb->successors_count; j++,p++) {
920 uint32_t succ = *p;
921
922 if (blocks[succ].idom == i) {
923 continue;
924 } else if (ir_worklist_push(&work, succ)) {
925 goto next;
926 }
927 }
928 }
929 exit_times[i] = time++;
930 ir_worklist_pop(&work);
931 }
932
933 /* Sort blocks by level, which is the opposite order in which we want to process them */
934 sorted_blocks[1] = 1;
935 j = 1;
936 n = 2;
937 while (j != n) {
938 i = j;
939 j = n;
940 for (; i < j; i++) {
941 int child;
942 for (child = blocks[sorted_blocks[i]].dom_child; child > 0; child = blocks[child].dom_next_child) {
943 sorted_blocks[n++] = child;
944 }
945 }
946 }
947 count = n;
948
949 /* Identify loops. See Sreedhar et al, "Identifying Loops Using DJ Graphs". */
950 while (n > 1) {
951 i = sorted_blocks[--n];
952 ir_block *bb = &blocks[i];
953
954 if (bb->predecessors_count > 1) {
955 bool irreducible = 0;
956 uint32_t *p = &edges[bb->predecessors];
957
958 j = bb->predecessors_count;
959 do {
960 uint32_t pred = *p;
961
962 /* A join edge is one for which the predecessor does not
963 immediately dominate the successor. */
964 if (bb->idom != pred) {
965 /* In a loop back-edge (back-join edge), the successor dominates
966 the predecessor. */
967 if (ir_dominates(blocks, i, pred)) {
968 if (!ir_worklist_len(&work)) {
969 ir_bitset_clear(work.visited, ir_bitset_len(ir_worklist_capasity(&work)));
970 }
971 blocks[pred].loop_header = 0; /* support for merged loops */
972 ir_worklist_push(&work, pred);
973 } else {
974 /* Otherwise it's a cross-join edge. See if it's a branch
975 to an ancestor on the DJ spanning tree. */
976 if (entry_times[pred] > entry_times[i] && exit_times[pred] < exit_times[i]) {
977 irreducible = 1;
978 }
979 }
980 }
981 p++;
982 } while (--j);
983
984 if (UNEXPECTED(irreducible)) {
985 // TODO: Support for irreducible loops ???
986 bb->flags |= IR_BB_IRREDUCIBLE_LOOP;
987 ctx->flags2 |= IR_IRREDUCIBLE_CFG;
988 while (ir_worklist_len(&work)) {
989 ir_worklist_pop(&work);
990 }
991 } else if (ir_worklist_len(&work)) {
992 bb->flags |= IR_BB_LOOP_HEADER;
993 ctx->flags2 |= IR_CFG_HAS_LOOPS;
994 bb->loop_depth = 1;
995 while (ir_worklist_len(&work)) {
996 j = ir_worklist_pop(&work);
997 while (blocks[j].loop_header > 0) {
998 j = blocks[j].loop_header;
999 }
1000 if (j != i) {
1001 ir_block *bb = &blocks[j];
1002 if (bb->idom == 0 && j != 1) {
1003 /* Ignore blocks that are unreachable or only abnormally reachable. */
1004 continue;
1005 }
1006 bb->loop_header = i;
1007 if (bb->predecessors_count) {
1008 uint32_t *p = &edges[bb->predecessors];
1009 j = bb->predecessors_count;
1010 do {
1011 ir_worklist_push(&work, *p);
1012 p++;
1013 } while (--j);
1014 }
1015 }
1016 }
1017 }
1018 }
1019 }
1020
1021 if (ctx->flags2 & IR_CFG_HAS_LOOPS) {
1022 for (n = 1; n < count; n++) {
1023 i = sorted_blocks[n];
1024 ir_block *bb = &blocks[i];
1025 if (bb->loop_header > 0) {
1026 ir_block *loop = &blocks[bb->loop_header];
1027 uint32_t loop_depth = loop->loop_depth;
1028
1029 if (bb->flags & IR_BB_LOOP_HEADER) {
1030 loop_depth++;
1031 }
1032 bb->loop_depth = loop_depth;
1033 if (bb->flags & (IR_BB_ENTRY|IR_BB_LOOP_WITH_ENTRY)) {
1034 loop->flags |= IR_BB_LOOP_WITH_ENTRY;
1035 }
1036 }
1037 }
1038 }
1039
1040 ir_mem_free(entry_times);
1041 ir_worklist_free(&work);
1042
1043 return 1;
1044 }
1045
_ir_skip_empty_blocks(const ir_ctx * ctx,uint32_t b)1046 static uint32_t _ir_skip_empty_blocks(const ir_ctx *ctx, uint32_t b)
1047 {
1048 while (1) {
1049 ir_block *bb = &ctx->cfg_blocks[b];
1050
1051 if ((bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) == IR_BB_EMPTY) {
1052 IR_ASSERT(bb->successors_count == 1);
1053 b = ctx->cfg_edges[bb->successors];
1054 } else {
1055 return b;
1056 }
1057 }
1058 }
1059
1060 /* A variation of "Bottom-up Positioning" algorithm described by
1061 * Karl Pettis and Robert C. Hansen "Profile Guided Code Positioning"
1062 */
1063 typedef struct _ir_edge_info {
1064 uint32_t from;
1065 uint32_t to;
1066 float freq;
1067 } ir_edge_info;
1068
1069 typedef struct _ir_chain {
1070 uint32_t head;
1071 uint32_t next;
1072 union {
1073 uint32_t prev;
1074 uint32_t tail;
1075 };
1076 } ir_chain;
1077
ir_edge_info_cmp(const void * b1,const void * b2)1078 static int ir_edge_info_cmp(const void *b1, const void *b2)
1079 {
1080 ir_edge_info *e1 = (ir_edge_info*)b1;
1081 ir_edge_info *e2 = (ir_edge_info*)b2;
1082
1083 if (e1->freq != e2->freq) {
1084 return e1->freq < e2->freq ? 1 : -1;
1085 }
1086 /* In case of equal frequencies, try to avoid penalization of one of the "equal" paths by
1087 * preferring the first RPO successor (in conditional branches) and the last RPO predecessor
1088 * (in merge points).
1089 *
1090 * See "Static Basic Block Reordering Heuristics for Implicit Control Flow in Baseline JITs"
1091 * Polito Guillermo, Ducasse Stephane, and Tesone Pablo (2021)
1092 */
1093 if (e1->from != e2->from) {
1094 return e2->from - e1->from;
1095 } else {
1096 return e1->to - e2->to;
1097 }
1098 }
1099
ir_chain_head_path_compress(ir_chain * chains,uint32_t src,uint32_t head)1100 static IR_NEVER_INLINE uint32_t ir_chain_head_path_compress(ir_chain *chains, uint32_t src, uint32_t head)
1101 {
1102 IR_ASSERT(head != 0);
1103 do {
1104 head = chains[head].head;
1105 } while (chains[head].head != head);
1106 do {
1107 uint32_t next = chains[src].head;
1108 chains[src].head = head;
1109 src = next;
1110 } while (chains[src].head != head);
1111 return head;
1112 }
1113
ir_chain_head(ir_chain * chains,uint32_t src)1114 IR_ALWAYS_INLINE uint32_t ir_chain_head(ir_chain *chains, uint32_t src)
1115 {
1116 uint32_t head = chains[src].head;
1117 if (chains[head].head == head) {
1118 return head;
1119 } else {
1120 return ir_chain_head_path_compress(chains, src, head);
1121 }
1122 }
1123
ir_join_chains(ir_chain * chains,uint32_t src,uint32_t dst)1124 static void ir_join_chains(ir_chain *chains, uint32_t src, uint32_t dst)
1125 {
1126 uint32_t dst_tail = chains[dst].tail;
1127 uint32_t src_tail = chains[src].tail;
1128
1129 chains[dst_tail].next = src;
1130 chains[dst].prev = src_tail;
1131 chains[src_tail].next = dst;
1132 chains[src].tail = dst_tail;
1133 chains[dst].head = src;
1134 }
1135
ir_insert_chain_before(ir_chain * chains,uint32_t c,uint32_t before)1136 static void ir_insert_chain_before(ir_chain *chains, uint32_t c, uint32_t before)
1137 {
1138 ir_chain *this = &chains[c];
1139 ir_chain *next = &chains[before];
1140
1141 IR_ASSERT(chains[c].head == c);
1142 if (chains[before].head != before) {
1143 this->head = next->head;
1144 } else {
1145 next->head = c;
1146 }
1147 this->next = before;
1148 this->prev = next->prev;
1149 next->prev = c;
1150 chains[this->prev].next = c;
1151 }
1152
1153 #ifndef IR_DEBUG_BB_SCHEDULE_GRAPH
1154 # define IR_DEBUG_BB_SCHEDULE_GRAPH 0
1155 #endif
1156 #ifndef IR_DEBUG_BB_SCHEDULE_EDGES
1157 # define IR_DEBUG_BB_SCHEDULE_EDGES 0
1158 #endif
1159 #ifndef IR_DEBUG_BB_SCHEDULE_CHAINS
1160 # define IR_DEBUG_BB_SCHEDULE_CHAINS 0
1161 #endif
1162
1163 #if IR_DEBUG_BB_SCHEDULE_GRAPH
ir_dump_cfg_freq_graph(ir_ctx * ctx,float * bb_freq,uint32_t edges_count,ir_edge_info * edges,ir_chain * chains)1164 static void ir_dump_cfg_freq_graph(ir_ctx *ctx, float *bb_freq, uint32_t edges_count, ir_edge_info *edges, ir_chain *chains)
1165 {
1166 uint32_t b, i;
1167 ir_block *bb;
1168 uint8_t c, *colors;
1169 bool is_head, is_empty;
1170 uint32_t max_colors = 12;
1171
1172 colors = alloca(sizeof(uint8_t) * (ctx->cfg_blocks_count + 1));
1173 memset(colors, 0, sizeof(uint8_t) * (ctx->cfg_blocks_count + 1));
1174 i = 0;
1175 for (b = 1; b < ctx->cfg_blocks_count + 1; b++) {
1176 if (chains[b].head == b) {
1177 colors[b] = (i % max_colors) + 1;
1178 i++;
1179 }
1180 }
1181
1182 fprintf(stderr, "digraph {\n");
1183 fprintf(stderr, "\trankdir=TB;\n");
1184 for (b = 1; b <= ctx->cfg_blocks_count; b++) {
1185 bb = &ctx->cfg_blocks[b];
1186 c = (chains[b].head) ? colors[ir_chain_head(chains, b)] : 0;
1187 is_head = chains[b].head == b;
1188 is_empty = (bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) == IR_BB_EMPTY;
1189 if (c) {
1190 fprintf(stderr, "\tBB%d [label=\"BB%d: (%d),%0.3f\\n%s\\n%s\",colorscheme=set312,fillcolor=%d%s%s]\n", b, b,
1191 bb->loop_depth, bb_freq[b],
1192 ir_op_name[ctx->ir_base[bb->start].op], ir_op_name[ctx->ir_base[bb->end].op],
1193 c,
1194 is_head ? ",penwidth=3" : "",
1195 is_empty ? ",style=\"dotted,filled\"" : ",style=\"filled\"");
1196 } else {
1197 fprintf(stderr, "\tBB%d [label=\"BB%d: (%d),%0.3f\\n%s\\n%s\"%s%s]\n", b, b,
1198 bb->loop_depth, bb_freq[b],
1199 ir_op_name[ctx->ir_base[bb->start].op], ir_op_name[ctx->ir_base[bb->end].op],
1200 is_head ? ",penwidth=3" : "",
1201 is_empty ? ",style=\"dotted\"" : "");
1202 }
1203 }
1204 fprintf(stderr, "\n");
1205
1206 for (i = 0; i < edges_count; i++) {
1207 fprintf(stderr, "\tBB%d -> BB%d [label=\"%0.3f\"]\n", edges[i].from, edges[i].to, edges[i].freq);
1208 }
1209 fprintf(stderr, "}\n");
1210 }
1211 #endif
1212
1213 #if IR_DEBUG_BB_SCHEDULE_EDGES
ir_dump_edges(ir_ctx * ctx,uint32_t edges_count,ir_edge_info * edges)1214 static void ir_dump_edges(ir_ctx *ctx, uint32_t edges_count, ir_edge_info *edges)
1215 {
1216 uint32_t i;
1217
1218 fprintf(stderr, "Edges:\n");
1219 for (i = 0; i < edges_count; i++) {
1220 fprintf(stderr, "\tBB%d -> BB%d [label=\"%0.3f\"]\n", edges[i].from, edges[i].to, edges[i].freq);
1221 }
1222 fprintf(stderr, "}\n");
1223 }
1224 #endif
1225
1226 #if IR_DEBUG_BB_SCHEDULE_CHAINS
ir_dump_chains(ir_ctx * ctx,ir_chain * chains)1227 static void ir_dump_chains(ir_ctx *ctx, ir_chain *chains)
1228 {
1229 uint32_t b, tail, i;
1230
1231 fprintf(stderr, "Chains:\n");
1232 for (b = 1; b < ctx->cfg_blocks_count + 1; b++) {
1233 if (chains[b].head == b) {
1234 tail = chains[b].tail;
1235 i = b;
1236 fprintf(stderr, "(BB%d", i);
1237 while (i != tail) {
1238 i = chains[i].next;
1239 fprintf(stderr, ", BB%d", i);
1240 }
1241 fprintf(stderr, ")\n");
1242 }
1243 }
1244 }
1245 #endif
1246
ir_schedule_blocks_bottom_up(ir_ctx * ctx)1247 static int ir_schedule_blocks_bottom_up(ir_ctx *ctx)
1248 {
1249 uint32_t max_edges_count = ctx->cfg_edges_count / 2;
1250 uint32_t edges_count = 0;
1251 uint32_t b, i, loop_depth;
1252 float *bb_freq, freq;
1253 ir_block *bb;
1254 ir_edge_info *edges, *e;
1255 ir_chain *chains;
1256 ir_bitqueue worklist;
1257 ir_bitset visited;
1258 uint32_t *schedule_end, count;
1259
1260 ctx->cfg_schedule = ir_mem_malloc(sizeof(uint32_t) * (ctx->cfg_blocks_count + 2));
1261 schedule_end = ctx->cfg_schedule + ctx->cfg_blocks_count;
1262
1263 /* 1. Create initial chains for each BB */
1264 chains = ir_mem_malloc(sizeof(ir_chain) * (ctx->cfg_blocks_count + 1));
1265 chains[0].head = 0;
1266 chains[0].next = 0;
1267 chains[0].prev = 0;
1268 for (b = 1; b <= ctx->cfg_blocks_count; b++) {
1269 chains[b].head = b;
1270 chains[b].next = b;
1271 chains[b].prev = b;
1272 }
1273
1274 /* 2. Collect information about BBs and EDGEs frequencies */
1275 edges = ir_mem_malloc(sizeof(ir_edge_info) * max_edges_count);
1276 bb_freq = ir_mem_calloc(ctx->cfg_blocks_count + 1, sizeof(float));
1277 bb_freq[1] = 1.0f;
1278 visited = ir_bitset_malloc(ctx->cfg_blocks_count + 1);
1279 ir_bitqueue_init(&worklist, ctx->cfg_blocks_count + 1);
1280 ir_bitqueue_add(&worklist, 1);
1281 while ((b = ir_bitqueue_pop(&worklist)) != (uint32_t)-1) {
1282 restart:
1283 bb = &ctx->cfg_blocks[b];
1284 if (bb->predecessors_count) {
1285 uint32_t n = bb->predecessors_count;
1286 uint32_t *p = ctx->cfg_edges + bb->predecessors;
1287 for (; n > 0; p++, n--) {
1288 uint32_t predecessor = *p;
1289 /* Basic Blocks are ordered in a way that usual predecessors ids are less than successors.
1290 * So we may compare blocks ids (predecessor < b) instead of a more expensive check for back edge
1291 * (b != predecessor && ctx->cfg_blocks[predecessor].loop_header != b)
1292 */
1293 if (predecessor < b) {
1294 if (!ir_bitset_in(visited, predecessor)) {
1295 b = predecessor;
1296 ir_bitqueue_del(&worklist, b);
1297 goto restart;
1298 }
1299 } else if (b != predecessor && ctx->cfg_blocks[predecessor].loop_header != b) {
1300 ir_dump_cfg(ctx, stderr);
1301 IR_ASSERT(b == predecessor || ctx->cfg_blocks[predecessor].loop_header == b);
1302 }
1303 }
1304 }
1305
1306 ir_bitset_incl(visited, b);
1307
1308 if ((bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) == IR_BB_EMPTY) {
1309 uint32_t successor = ctx->cfg_edges[bb->successors];
1310
1311 /* move empty blocks to the end */
1312 IR_ASSERT(chains[b].head == b);
1313 chains[b].head = 0;
1314 *schedule_end = b;
1315 schedule_end--;
1316
1317 if (successor > b) {
1318 bb_freq[successor] += bb_freq[b];
1319 b = successor;
1320 ir_bitqueue_del(&worklist, b);
1321 goto restart;
1322 } else {
1323 continue;
1324 }
1325 }
1326
1327 loop_depth = bb->loop_depth;
1328 if (bb->flags & IR_BB_LOOP_HEADER) {
1329 // TODO: Estimate the loop iterations count
1330 bb_freq[b] *= 10;
1331 }
1332
1333 if (bb->successors_count) {
1334 uint32_t n = bb->successors_count;
1335 uint32_t *p = ctx->cfg_edges + bb->successors;
1336
1337 if (n == 1) {
1338 uint32_t successor = *p;
1339
1340 IR_ASSERT(edges_count < max_edges_count);
1341 freq = bb_freq[b];
1342 if (successor > b) {
1343 IR_ASSERT(!ir_bitset_in(visited, successor));
1344 bb_freq[successor] += freq;
1345 ir_bitqueue_add(&worklist, successor);
1346 }
1347 successor = _ir_skip_empty_blocks(ctx, successor);
1348 edges[edges_count].from = b;
1349 edges[edges_count].to = successor;
1350 edges[edges_count].freq = freq;
1351 edges_count++;
1352 } else if (n == 2 && ctx->ir_base[bb->end].op == IR_IF) {
1353 uint32_t successor1 = *p;
1354 ir_block *successor1_bb = &ctx->cfg_blocks[successor1];
1355 ir_insn *insn1 = &ctx->ir_base[successor1_bb->start];
1356 uint32_t successor2 = *(p + 1);
1357 ir_block *successor2_bb = &ctx->cfg_blocks[successor2];
1358 ir_insn *insn2 = &ctx->ir_base[successor2_bb->start];
1359 int prob1, prob2, probN = 100;
1360
1361 if (insn1->op2) {
1362 prob1 = insn1->op2;
1363 if (insn2->op2) {
1364 prob2 = insn2->op2;
1365 probN = prob1 + prob2;
1366 } else {
1367 if (prob1 > 100) {
1368 prob1 = 100;
1369 }
1370 prob2 = 100 - prob1;
1371 }
1372
1373 } else if (insn2->op2) {
1374 prob2 = insn2->op2;
1375 if (prob2 > 100) {
1376 prob2 = 100;
1377 }
1378 prob1 = 100 - prob2;
1379 } else if (successor1_bb->loop_depth >= loop_depth
1380 && successor2_bb->loop_depth < loop_depth) {
1381 prob1 = 90;
1382 prob2 = 10;
1383 } else if (successor1_bb->loop_depth < loop_depth
1384 && successor2_bb->loop_depth >= loop_depth) {
1385 prob1 = 10;
1386 prob2 = 90;
1387 } else if (successor2_bb->flags & IR_BB_EMPTY) {
1388 prob1 = 51;
1389 prob2 = 49;
1390 } else if (successor1_bb->flags & IR_BB_EMPTY) {
1391 prob1 = 49;
1392 prob2 = 51;
1393 } else {
1394 prob1 = prob2 = 50;
1395 }
1396 do {
1397 freq = bb_freq[b] * (float)prob1 / (float)probN;
1398 if (successor1 > b) {
1399 IR_ASSERT(!ir_bitset_in(visited, successor1));
1400 bb_freq[successor1] += freq;
1401 if (successor1_bb->successors_count == 0 && insn1->op2 == 1) {
1402 /* move cold block without successors to the end */
1403 IR_ASSERT(chains[successor1].head == successor1);
1404 chains[successor1].head = 0;
1405 *schedule_end = successor1;
1406 schedule_end--;
1407 break;
1408 } else {
1409 ir_bitqueue_add(&worklist, successor1);
1410 }
1411 }
1412 /* try to join edges early to reduce number of edges and the cost of their sorting */
1413 if (prob1 > prob2
1414 && (successor1_bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) != IR_BB_EMPTY) {
1415 uint32_t src = chains[b].next;
1416 IR_ASSERT(chains[src].head == src);
1417 IR_ASSERT(src == ir_chain_head(chains, b));
1418 IR_ASSERT(chains[successor1].head == successor1);
1419 ir_join_chains(chains, src, successor1);
1420 if (!IR_DEBUG_BB_SCHEDULE_GRAPH) break;
1421 }
1422 successor1 = _ir_skip_empty_blocks(ctx, successor1);
1423 IR_ASSERT(edges_count < max_edges_count);
1424 edges[edges_count].from = b;
1425 edges[edges_count].to = successor1;
1426 edges[edges_count].freq = freq;
1427 edges_count++;
1428 } while (0);
1429 do {
1430 freq = bb_freq[b] * (float)prob2 / (float)probN;
1431 if (successor2 > b) {
1432 IR_ASSERT(!ir_bitset_in(visited, successor2));
1433 bb_freq[successor2] += freq;
1434 if (successor2_bb->successors_count == 0 && insn2->op2 == 1) {
1435 /* move cold block without successors to the end */
1436 IR_ASSERT(chains[successor2].head == successor2);
1437 chains[successor2].head = 0;
1438 *schedule_end = successor2;
1439 schedule_end--;
1440 break;
1441 } else {
1442 ir_bitqueue_add(&worklist, successor2);
1443 }
1444 }
1445 if (prob2 > prob1
1446 && (successor2_bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) != IR_BB_EMPTY) {
1447 uint32_t src = chains[b].next;
1448 IR_ASSERT(chains[src].head == src);
1449 IR_ASSERT(src == ir_chain_head(chains, b));
1450 IR_ASSERT(chains[successor2].head == successor2);
1451 ir_join_chains(chains, src, successor2);
1452 if (!IR_DEBUG_BB_SCHEDULE_GRAPH) break;
1453 }
1454 successor2 = _ir_skip_empty_blocks(ctx, successor2);
1455 IR_ASSERT(edges_count < max_edges_count);
1456 edges[edges_count].from = b;
1457 edges[edges_count].to = successor2;
1458 edges[edges_count].freq = freq;
1459 edges_count++;
1460 } while (0);
1461 } else {
1462 int prob;
1463
1464 for (; n > 0; p++, n--) {
1465 uint32_t successor = *p;
1466 ir_block *successor_bb = &ctx->cfg_blocks[successor];
1467 ir_insn *insn = &ctx->ir_base[successor_bb->start];
1468
1469 if (insn->op == IR_CASE_DEFAULT) {
1470 prob = insn->op2;
1471 if (!prob) {
1472 prob = 100 / bb->successors_count;
1473 }
1474 } else if (insn->op == IR_CASE_VAL) {
1475 prob = insn->op3;
1476 if (!prob) {
1477 prob = 100 / bb->successors_count;
1478 }
1479 } else if (insn->op == IR_ENTRY) {
1480 if ((ctx->flags & IR_MERGE_EMPTY_ENTRIES) && (successor_bb->flags & IR_BB_EMPTY)) {
1481 prob = 99; /* prefer empty ENTRY block to go first */
1482 } else {
1483 prob = 1;
1484 }
1485 } else {
1486 prob = 100 / bb->successors_count;
1487 }
1488 freq = bb_freq[b] * (float)prob / 100.0f;
1489 if (successor > b) {
1490 IR_ASSERT(!ir_bitset_in(visited, successor));
1491 bb_freq[successor] += freq;
1492 ir_bitqueue_add(&worklist, successor);
1493 }
1494 successor = _ir_skip_empty_blocks(ctx, successor);
1495 IR_ASSERT(edges_count < max_edges_count);
1496 edges[edges_count].from = b;
1497 edges[edges_count].to = successor;
1498 edges[edges_count].freq = freq;
1499 edges_count++;
1500 }
1501 }
1502 }
1503 }
1504 ir_bitqueue_free(&worklist);
1505 ir_mem_free(visited);
1506
1507 /* 2. Sort EDGEs according to their frequencies */
1508 qsort(edges, edges_count, sizeof(ir_edge_info), ir_edge_info_cmp);
1509
1510 #if IR_DEBUG_BB_SCHEDULE_EDGES
1511 ir_dump_edges(ctx, edges_count, edges);
1512 #endif
1513
1514 /* 3. Process EDGEs in the decreasing frequency order and join the connected chains */
1515 for (e = edges, i = edges_count; i > 0; e++, i--) {
1516 uint32_t dst = chains[e->to].head;
1517 if (dst == e->to) {
1518 uint32_t src = chains[e->from].next;
1519 if (chains[src].head == src) {
1520 IR_ASSERT(src == ir_chain_head(chains, e->from) && chains[src].tail == e->from);
1521 if (src != dst) {
1522 ir_join_chains(chains, src, dst);
1523 } else if (ctx->cfg_blocks[e->from].successors_count < 2) {
1524 /* Try to rotate loop chian to finish it with a conditional branch */
1525 uint32_t tail = e->from;
1526 uint32_t prev = src;
1527 uint32_t next = chains[prev].next;
1528 uint32_t best = 0;
1529
1530 while (prev != tail) {
1531 if (ctx->ir_base[ctx->cfg_blocks[prev].end].op == IR_IF) {
1532 if (ctx->ir_base[ctx->cfg_blocks[prev].start].op == IR_LOOP_BEGIN
1533 && ctx->cfg_blocks[prev].loop_depth > 1) {
1534 best = next;
1535 break;
1536 } else if (!best || bb_freq[next] < bb_freq[best]) {
1537 /* Find the successor of IF with the least frequency */
1538 best = next;
1539 }
1540 }
1541 prev = next;
1542 next = chains[next].next;
1543 }
1544 if (best) {
1545 /* change the head of the chain */
1546 chains[src].head = best;
1547 chains[best].head = best;
1548 }
1549 }
1550 #if !IR_DEBUG_BB_SCHEDULE_GRAPH
1551 e->from = 0; /* reset "from" to avoid check on step #5 */
1552 #endif
1553 }
1554 }
1555 }
1556
1557 #if IR_DEBUG_BB_SCHEDULE_GRAPH
1558 ir_dump_cfg_freq_graph(ctx, bb_freq, edges_count, edges, chains);
1559 #endif
1560
1561 ir_mem_free(bb_freq);
1562
1563 #if IR_DEBUG_BB_SCHEDULE_CHAINS
1564 ir_dump_chains(ctx, chains);
1565 #endif
1566
1567 /* 4. Merge empty entry blocks */
1568 if ((ctx->flags & IR_MERGE_EMPTY_ENTRIES) && ctx->entries_count) {
1569 for (i = 0; i < ctx->entries_count; i++) {
1570 b = ctx->entries[i];
1571 IR_ASSERT(ctx->cfg_blocks[b].flags & IR_BB_ENTRY);
1572 if (b && chains[b].head == b && chains[b].tail == b) {
1573 bb = &ctx->cfg_blocks[b];
1574 if (bb->flags & IR_BB_EMPTY) {
1575 uint32_t successor;
1576
1577 do {
1578 IR_ASSERT(bb->successors_count == 1);
1579 successor = ctx->cfg_edges[bb->successors];
1580 bb = &ctx->cfg_blocks[successor];
1581 } while ((bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) == IR_BB_EMPTY);
1582 IR_ASSERT(chains[successor].head);
1583 ir_insert_chain_before(chains, b, successor);
1584 }
1585 }
1586 }
1587
1588 #if IR_DEBUG_BB_SCHEDULE_CHAINS
1589 ir_dump_chains(ctx, chains);
1590 #endif
1591 }
1592
1593 /* 5. Align loop headers */
1594 for (b = 1; b <= ctx->cfg_blocks_count; b++) {
1595 if (chains[b].head == b) {
1596 bb = &ctx->cfg_blocks[b];
1597 if (bb->loop_depth) {
1598 if ((bb->flags & IR_BB_LOOP_HEADER) || ir_chain_head(chains, bb->loop_header) == b) {
1599 bb->flags |= IR_BB_ALIGN_LOOP;
1600 }
1601 }
1602 }
1603 }
1604
1605 /* 6. Group chains according to the most frequent edge between them */
1606 // TODO: Try to find a better heuristic
1607 for (e = edges, i = edges_count; i > 0; e++, i--) {
1608 #if !IR_DEBUG_BB_SCHEDULE_GRAPH
1609 if (!e->from) continue;
1610 #endif
1611 uint32_t src = ir_chain_head(chains, e->from);
1612 uint32_t dst = ir_chain_head(chains, e->to);
1613 if (src != dst) {
1614 if (dst == 1) {
1615 ir_join_chains(chains, dst, src);
1616 } else {
1617 ir_join_chains(chains, src, dst);
1618 }
1619 }
1620 }
1621
1622 #if IR_DEBUG_BB_SCHEDULE_CHAINS
1623 ir_dump_chains(ctx, chains);
1624 #endif
1625
1626 /* 7. Form a final BB order */
1627 count = 0;
1628 for (b = 1; b <= ctx->cfg_blocks_count; b++) {
1629 if (chains[b].head == b) {
1630 uint32_t tail = chains[b].tail;
1631 uint32_t i = b;
1632 while (1) {
1633 count++;
1634 ctx->cfg_schedule[count] = i;
1635 if (i == tail) break;
1636 i = chains[i].next;
1637 }
1638 }
1639 }
1640
1641 IR_ASSERT(ctx->cfg_schedule + count == schedule_end);
1642 ctx->cfg_schedule[ctx->cfg_blocks_count + 1] = 0;
1643
1644 ir_mem_free(edges);
1645 ir_mem_free(chains);
1646
1647 return 1;
1648 }
1649
1650 /* A variation of "Top-down Positioning" algorithm described by
1651 * Karl Pettis and Robert C. Hansen "Profile Guided Code Positioning"
1652 */
ir_schedule_blocks_top_down(ir_ctx * ctx)1653 static int ir_schedule_blocks_top_down(ir_ctx *ctx)
1654 {
1655 ir_bitqueue blocks;
1656 uint32_t b, best_successor, last_non_empty;
1657 ir_block *bb, *best_successor_bb;
1658 ir_insn *insn;
1659 uint32_t *list, *schedule_end;
1660 uint32_t count = 0;
1661
1662 ir_bitqueue_init(&blocks, ctx->cfg_blocks_count + 1);
1663 blocks.pos = 0;
1664 list = ir_mem_malloc(sizeof(uint32_t) * (ctx->cfg_blocks_count + 2));
1665 list[ctx->cfg_blocks_count + 1] = 0;
1666 schedule_end = list + ctx->cfg_blocks_count;
1667 for (b = 1; b <= ctx->cfg_blocks_count; b++) {
1668 ir_bitset_incl(blocks.set, b);
1669 }
1670
1671 while ((b = ir_bitqueue_pop(&blocks)) != (uint32_t)-1) {
1672 bb = &ctx->cfg_blocks[b];
1673 /* Start trace */
1674 last_non_empty = 0;
1675 do {
1676 if (UNEXPECTED(bb->flags & IR_BB_PREV_EMPTY_ENTRY) && ir_bitqueue_in(&blocks, b - 1)) {
1677 /* Schedule the previous empty ENTRY block before this one */
1678 uint32_t predecessor = b - 1;
1679
1680 ir_bitqueue_del(&blocks, predecessor);
1681 count++;
1682 list[count] = predecessor;
1683 }
1684 if ((bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) == IR_BB_EMPTY) {
1685 /* move empty blocks to the end */
1686 *schedule_end = b;
1687 schedule_end--;
1688 } else {
1689 count++;
1690 list[count] = b;
1691 last_non_empty = b;
1692 }
1693 best_successor_bb = NULL;
1694 if (bb->successors_count == 1) {
1695 best_successor = ctx->cfg_edges[bb->successors];
1696 if (ir_bitqueue_in(&blocks, best_successor)) {
1697 best_successor_bb = &ctx->cfg_blocks[best_successor];
1698 }
1699 } else if (bb->successors_count > 1) {
1700 uint32_t prob, best_successor_prob;
1701 uint32_t *p, successor;
1702 ir_block *successor_bb;
1703
1704 for (b = 0, p = &ctx->cfg_edges[bb->successors]; b < bb->successors_count; b++, p++) {
1705 successor = *p;
1706 if (ir_bitqueue_in(&blocks, successor)) {
1707 successor_bb = &ctx->cfg_blocks[successor];
1708 insn = &ctx->ir_base[successor_bb->start];
1709 if (insn->op == IR_IF_TRUE || insn->op == IR_IF_FALSE) {
1710 prob = insn->op2;
1711 if (!prob) {
1712 prob = 100 / bb->successors_count;
1713 if (!(successor_bb->flags & IR_BB_EMPTY)) {
1714 prob++;
1715 }
1716 }
1717 } else if (insn->op == IR_CASE_DEFAULT) {
1718 prob = insn->op2;
1719 if (!prob) {
1720 prob = 100 / bb->successors_count;
1721 }
1722 } else if (insn->op == IR_CASE_VAL) {
1723 prob = insn->op3;
1724 if (!prob) {
1725 prob = 100 / bb->successors_count;
1726 }
1727 } else if (insn->op == IR_ENTRY) {
1728 if ((ctx->flags & IR_MERGE_EMPTY_ENTRIES) && (successor_bb->flags & IR_BB_EMPTY)) {
1729 prob = 99; /* prefer empty ENTRY block to go first */
1730 } else {
1731 prob = 1;
1732 }
1733 } else {
1734 prob = 100 / bb->successors_count;
1735 }
1736 if (!best_successor_bb
1737 || successor_bb->loop_depth > best_successor_bb->loop_depth
1738 || prob > best_successor_prob) {
1739 best_successor = successor;
1740 best_successor_bb = successor_bb;
1741 best_successor_prob = prob;
1742 }
1743 }
1744 }
1745 }
1746 if (!best_successor_bb) {
1747 /* Try to continue trace using the other successor of the last IF */
1748 if ((bb->flags & IR_BB_EMPTY) && last_non_empty) {
1749 bb = &ctx->cfg_blocks[last_non_empty];
1750 if (bb->successors_count == 2 && ctx->ir_base[bb->end].op == IR_IF) {
1751 b = ctx->cfg_edges[bb->successors];
1752
1753 if (!ir_bitqueue_in(&blocks, b)) {
1754 b = ctx->cfg_edges[bb->successors + 1];
1755 }
1756 if (ir_bitqueue_in(&blocks, b)) {
1757 bb = &ctx->cfg_blocks[b];
1758 ir_bitqueue_del(&blocks, b);
1759 continue;
1760 }
1761 }
1762 }
1763 /* End trace */
1764 break;
1765 }
1766 b = best_successor;
1767 bb = best_successor_bb;
1768 ir_bitqueue_del(&blocks, b);
1769 } while (1);
1770 }
1771
1772 IR_ASSERT(list + count == schedule_end);
1773 ctx->cfg_schedule = list;
1774 ir_bitqueue_free(&blocks);
1775
1776 return 1;
1777 }
1778
ir_schedule_blocks(ir_ctx * ctx)1779 int ir_schedule_blocks(ir_ctx *ctx)
1780 {
1781 ir_ref ref;
1782
1783 if (ctx->cfg_blocks_count <= 2) {
1784 return 1;
1785 }
1786
1787 /* Mark "stop" blocks termintated with UNREACHABLE as "unexpected" */
1788 ref = ctx->ir_base[1].op1;
1789 while (ref) {
1790 ir_insn *insn = &ctx->ir_base[ref];
1791
1792 if (insn->op == IR_UNREACHABLE && ctx->ir_base[insn->op1].op != IR_TAILCALL) {
1793 ir_block *bb = &ctx->cfg_blocks[ctx->cfg_map[ref]];
1794 uint32_t n = bb->predecessors_count;
1795
1796 if (n == 1) {
1797 ir_insn *start_insn = &ctx->ir_base[bb->start];
1798 if (start_insn->op == IR_IF_TRUE
1799 || start_insn->op == IR_IF_FALSE
1800 || start_insn->op == IR_CASE_DEFAULT) {
1801 if (!start_insn->op2) start_insn->op2 = 1;
1802 } else if (start_insn->op == IR_CASE_VAL) {
1803 if (!start_insn->op3) start_insn->op3 = 1;
1804 }
1805 } else if (n > 1) {
1806 uint32_t *p = &ctx->cfg_edges[bb->predecessors];
1807
1808 for (; n > 0; p++, n--) {
1809 bb = &ctx->cfg_blocks[*p];
1810 if (bb->predecessors_count == 1) {
1811 ir_insn *start_insn = &ctx->ir_base[bb->start];
1812 if (start_insn->op == IR_IF_TRUE
1813 || start_insn->op == IR_IF_FALSE
1814 || start_insn->op == IR_CASE_DEFAULT) {
1815 if (!start_insn->op2) start_insn->op2 = 1;
1816 } else if (start_insn->op == IR_CASE_VAL) {
1817 if (!start_insn->op3) start_insn->op3 = 1;
1818 }
1819 }
1820 }
1821 }
1822 }
1823 ref = insn->op3;
1824 }
1825
1826 /* The bottom-up Pettis-Hansen algorithm is expensive - O(n^3),
1827 * use it only for relatively small functions.
1828 *
1829 * TODO: make the choice between top-down and bottom-up algorithm configurable
1830 */
1831 if (UNEXPECTED(ctx->flags2 & IR_IRREDUCIBLE_CFG) || ctx->cfg_blocks_count > 256) {
1832 return ir_schedule_blocks_top_down(ctx);
1833 } else {
1834 return ir_schedule_blocks_bottom_up(ctx);
1835 }
1836 }
1837
1838 /* JMP target optimisation */
ir_skip_empty_target_blocks(const ir_ctx * ctx,uint32_t b)1839 uint32_t ir_skip_empty_target_blocks(const ir_ctx *ctx, uint32_t b)
1840 {
1841 return _ir_skip_empty_blocks(ctx, b);
1842 }
1843
ir_next_block(const ir_ctx * ctx,uint32_t b)1844 uint32_t ir_next_block(const ir_ctx *ctx, uint32_t b)
1845 {
1846 ir_block *bb;
1847
1848 if (ctx->cfg_schedule) {
1849 uint32_t ret = ctx->cfg_schedule[++b];
1850
1851 /* Check for empty ENTRY block */
1852 while (ret && ((ctx->cfg_blocks[ret].flags & (IR_BB_START|IR_BB_EMPTY)) == IR_BB_EMPTY)) {
1853 ret = ctx->cfg_schedule[++b];
1854 }
1855 return ret;
1856 }
1857
1858 b++;
1859 while (1) {
1860 if (b > ctx->cfg_blocks_count) {
1861 return 0;
1862 }
1863
1864 bb = &ctx->cfg_blocks[b];
1865
1866 if ((bb->flags & (IR_BB_START|IR_BB_EMPTY)) == IR_BB_EMPTY) {
1867 b++;
1868 } else {
1869 break;
1870 }
1871 }
1872 return b;
1873 }
1874
ir_get_true_false_blocks(const ir_ctx * ctx,uint32_t b,uint32_t * true_block,uint32_t * false_block)1875 void ir_get_true_false_blocks(const ir_ctx *ctx, uint32_t b, uint32_t *true_block, uint32_t *false_block)
1876 {
1877 ir_block *bb;
1878 uint32_t *p, use_block;
1879
1880 *true_block = 0;
1881 *false_block = 0;
1882 bb = &ctx->cfg_blocks[b];
1883 IR_ASSERT(ctx->ir_base[bb->end].op == IR_IF);
1884 IR_ASSERT(bb->successors_count == 2);
1885 p = &ctx->cfg_edges[bb->successors];
1886 use_block = *p;
1887 if (ctx->ir_base[ctx->cfg_blocks[use_block].start].op == IR_IF_TRUE) {
1888 *true_block = ir_skip_empty_target_blocks(ctx, use_block);
1889 use_block = *(p+1);
1890 IR_ASSERT(ctx->ir_base[ctx->cfg_blocks[use_block].start].op == IR_IF_FALSE);
1891 *false_block = ir_skip_empty_target_blocks(ctx, use_block);
1892 } else {
1893 IR_ASSERT(ctx->ir_base[ctx->cfg_blocks[use_block].start].op == IR_IF_FALSE);
1894 *false_block = ir_skip_empty_target_blocks(ctx, use_block);
1895 use_block = *(p+1);
1896 IR_ASSERT(ctx->ir_base[ctx->cfg_blocks[use_block].start].op == IR_IF_TRUE);
1897 *true_block = ir_skip_empty_target_blocks(ctx, use_block);
1898 }
1899 IR_ASSERT(*true_block && *false_block);
1900 }
1901