Lines Matching refs:ctx
20 static uint32_t ir_gcm_schedule_early(ir_ctx *ctx, ir_ref ref, ir_list *queue_late) in ir_gcm_schedule_early() argument
27 insn = &ctx->ir_base[ref]; in ir_gcm_schedule_early()
39 b = ctx->cfg_map[input]; in ir_gcm_schedule_early()
43 b = ir_gcm_schedule_early(ctx, input, queue_late); in ir_gcm_schedule_early()
45 if (dom_depth < ctx->cfg_blocks[b].dom_depth) { in ir_gcm_schedule_early()
46 dom_depth = ctx->cfg_blocks[b].dom_depth; in ir_gcm_schedule_early()
52 ctx->cfg_map[ref] = IR_GCM_EARLY_BLOCK(result); in ir_gcm_schedule_early()
58 static uint32_t ir_gcm_find_lca(ir_ctx *ctx, uint32_t b1, uint32_t b2) in ir_gcm_find_lca() argument
62 dom_depth = ctx->cfg_blocks[b2].dom_depth; in ir_gcm_find_lca()
63 while (ctx->cfg_blocks[b1].dom_depth > dom_depth) { in ir_gcm_find_lca()
64 b1 = ctx->cfg_blocks[b1].dom_parent; in ir_gcm_find_lca()
66 dom_depth = ctx->cfg_blocks[b1].dom_depth; in ir_gcm_find_lca()
67 while (ctx->cfg_blocks[b2].dom_depth > dom_depth) { in ir_gcm_find_lca()
68 b2 = ctx->cfg_blocks[b2].dom_parent; in ir_gcm_find_lca()
71 b1 = ctx->cfg_blocks[b1].dom_parent; in ir_gcm_find_lca()
72 b2 = ctx->cfg_blocks[b2].dom_parent; in ir_gcm_find_lca()
77 static uint32_t ir_gcm_select_best_block(ir_ctx *ctx, ir_ref ref, uint32_t lca) in ir_gcm_select_best_block() argument
79 ir_block *bb = &ctx->cfg_blocks[lca]; in ir_gcm_select_best_block()
88 if (ctx->ir_base[ref].op >= IR_EQ && ctx->ir_base[ref].op <= IR_UGT) { in ir_gcm_select_best_block()
89 ir_use_list *use_list = &ctx->use_lists[ref]; in ir_gcm_select_best_block()
92 ir_ref use = ctx->use_edges[use_list->refs]; in ir_gcm_select_best_block()
93 ir_insn *insn = &ctx->ir_base[use]; in ir_gcm_select_best_block()
102 flags = (bb->flags & IR_BB_LOOP_HEADER) ? bb->flags : ctx->cfg_blocks[bb->loop_header].flags; in ir_gcm_select_best_block()
104 && !(ctx->binding && ir_binding_find(ctx, ref))) { in ir_gcm_select_best_block()
112 bb = &ctx->cfg_blocks[b]; in ir_gcm_select_best_block()
117 ir_block *loop_bb = &ctx->cfg_blocks[best]; in ir_gcm_select_best_block()
120 loop_bb = &ctx->cfg_blocks[loop_bb->loop_header]; in ir_gcm_select_best_block()
124 uint32_t *p = ctx->cfg_edges + loop_bb->predecessors; in ir_gcm_select_best_block()
137 flags = (bb->flags & IR_BB_LOOP_HEADER) ? bb->flags : ctx->cfg_blocks[bb->loop_header].flags; in ir_gcm_select_best_block()
139 && !(ctx->binding && ir_binding_find(ctx, ref))) { in ir_gcm_select_best_block()
145 } while (b != ctx->cfg_map[ref]); in ir_gcm_select_best_block()
162 static void _push_predecessors(ir_ctx *ctx, ir_block *bb, ir_gcm_split_data *data) in _push_predecessors() argument
167 p = ctx->cfg_edges + bb->predecessors; in _push_predecessors()
178 static bool _check_successors(ir_ctx *ctx, ir_block *bb, ir_gcm_split_data *data) in _check_successors() argument
183 IR_ASSERT(ir_sparse_set_in(&data->totally_useful, ctx->cfg_edges[bb->successors])); in _check_successors()
187 p = ctx->cfg_edges + bb->successors; in _check_successors()
200 static bool ir_split_partially_dead_node(ir_ctx *ctx, ir_ref ref, uint32_t b) in ir_split_partially_dead_node() argument
206 ir_gcm_split_data *data = ctx->data; in ir_split_partially_dead_node()
208 IR_ASSERT(b > 0 && b <= ctx->cfg_blocks_count); in ir_split_partially_dead_node()
215 use_list = &ctx->use_lists[ref]; in ir_split_partially_dead_node()
217 for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { in ir_split_partially_dead_node()
219 insn = &ctx->ir_base[use]; in ir_split_partially_dead_node()
222 ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */ in ir_split_partially_dead_node()
227 i = ctx->cfg_map[*q]; in ir_split_partially_dead_node()
228 IR_ASSERT(i > 0 && i <= ctx->cfg_blocks_count); in ir_split_partially_dead_node()
236 i = ctx->cfg_map[use]; in ir_split_partially_dead_node()
240 IR_ASSERT(i > 0 && i <= ctx->cfg_blocks_count); in ir_split_partially_dead_node()
249 if (ctx->flags & IR_DEBUG_GCM_SPLIT) { in ir_split_partially_dead_node()
268 _push_predecessors(ctx, &ctx->cfg_blocks[i], data); in ir_split_partially_dead_node()
274 ir_block *bb = &ctx->cfg_blocks[i]; in ir_split_partially_dead_node()
276 if (_check_successors(ctx, bb, data)) { in ir_split_partially_dead_node()
283 _push_predecessors(ctx, bb, data); in ir_split_partially_dead_node()
291 if (ctx->flags & IR_DEBUG_GCM_SPLIT) { in ir_split_partially_dead_node()
306 use_list = &ctx->use_lists[ref]; in ir_split_partially_dead_node()
323 for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { in ir_split_partially_dead_node()
325 insn = &ctx->ir_base[use]; in ir_split_partially_dead_node()
328 ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */ in ir_split_partially_dead_node()
338 j = i = ctx->cfg_map[*q]; in ir_split_partially_dead_node()
339 while (ir_sparse_set_in(&data->totally_useful, ctx->cfg_blocks[j].idom)) { in ir_split_partially_dead_node()
340 j = ctx->cfg_blocks[j].idom; in ir_split_partially_dead_node()
358 j = i = ctx->cfg_map[use]; in ir_split_partially_dead_node()
360 while (ir_sparse_set_in(&data->totally_useful, ctx->cfg_blocks[j].idom)) { in ir_split_partially_dead_node()
361 j = ctx->cfg_blocks[j].idom; in ir_split_partially_dead_node()
380 if (ctx->flags & IR_DEBUG_GCM_SPLIT) { in ir_split_partially_dead_node()
397 insn = &ctx->ir_base[ref]; in ir_split_partially_dead_node()
400 clones[i].ref = clone = ir_emit(ctx, insn->optx, insn->op1, insn->op2, insn->op3); in ir_split_partially_dead_node()
401 insn = &ctx->ir_base[ref]; in ir_split_partially_dead_node()
402 if (insn->op1 > 0) ir_use_list_add(ctx, insn->op1, clone); in ir_split_partially_dead_node()
403 if (insn->op2 > 0) ir_use_list_add(ctx, insn->op2, clone); in ir_split_partially_dead_node()
404 if (insn->op3 > 0) ir_use_list_add(ctx, insn->op3, clone); in ir_split_partially_dead_node()
408 ctx->use_lists = ir_mem_realloc(ctx->use_lists, ctx->insns_count * sizeof(ir_use_list)); in ir_split_partially_dead_node()
409 ctx->cfg_map = ir_mem_realloc(ctx->cfg_map, ctx->insns_count * sizeof(uint32_t)); in ir_split_partially_dead_node()
410 n = ctx->use_lists[ref].refs; in ir_split_partially_dead_node()
420 ctx->cfg_map[clone] = clones[i].block; in ir_split_partially_dead_node()
421 ctx->use_lists[clone].count = clones[i].use_count; in ir_split_partially_dead_node()
422 ctx->use_lists[clone].refs = n; in ir_split_partially_dead_node()
427 ctx->use_edges[n++] = use; in ir_split_partially_dead_node()
431 ir_insn *insn = &ctx->ir_base[use]; in ir_split_partially_dead_node()
437 j = ctx->cfg_map[ir_insn_op(&ctx->ir_base[insn->op1], k - 1)]; in ir_split_partially_dead_node()
439 uint32_t dom_depth = ctx->cfg_blocks[clones[i].block].dom_depth; in ir_split_partially_dead_node()
440 while (ctx->cfg_blocks[j].dom_depth > dom_depth) { in ir_split_partially_dead_node()
441 j = ctx->cfg_blocks[j].dom_parent; in ir_split_partially_dead_node()
468 if (ctx->flags & IR_DEBUG_GCM_SPLIT) { in ir_split_partially_dead_node()
469 ir_check(ctx); in ir_split_partially_dead_node()
478 static bool ir_gcm_dominates(ir_ctx *ctx, uint32_t b1, uint32_t b2) in ir_gcm_dominates() argument
480 uint32_t b1_depth = ctx->cfg_blocks[b1].dom_depth; in ir_gcm_dominates()
481 const ir_block *bb2 = &ctx->cfg_blocks[b2]; in ir_gcm_dominates()
485 bb2 = &ctx->cfg_blocks[b2]; in ir_gcm_dominates()
491 static void ir_gcm_schedule_late(ir_ctx *ctx, ir_ref ref, uint32_t b) in ir_gcm_schedule_late() argument
496 IR_ASSERT(ctx->ir_base[ref].op != IR_PARAM && ctx->ir_base[ref].op != IR_VAR); in ir_gcm_schedule_late()
497 IR_ASSERT(ctx->ir_base[ref].op != IR_PHI && ctx->ir_base[ref].op != IR_PI); in ir_gcm_schedule_late()
501 ctx->cfg_map[ref] = b; in ir_gcm_schedule_late()
503 for (n = 0; n < ctx->use_lists[ref].count; n++) { in ir_gcm_schedule_late()
504 use = ctx->use_edges[ctx->use_lists[ref].refs + n]; in ir_gcm_schedule_late()
505 b = ctx->cfg_map[use]; in ir_gcm_schedule_late()
507 ir_gcm_schedule_late(ctx, use, b); in ir_gcm_schedule_late()
508 b = ctx->cfg_map[use]; in ir_gcm_schedule_late()
512 } else if (ctx->ir_base[use].op == IR_PHI) { in ir_gcm_schedule_late()
513 ir_insn *insn = &ctx->ir_base[use]; in ir_gcm_schedule_late()
515 ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */ in ir_gcm_schedule_late()
520 b = ctx->cfg_map[*q]; in ir_gcm_schedule_late()
521 lca = !lca ? b : ir_gcm_find_lca(ctx, lca, b); in ir_gcm_schedule_late()
526 lca = !lca ? b : ir_gcm_find_lca(ctx, lca, b); in ir_gcm_schedule_late()
530 …IR_ASSERT(ir_gcm_dominates(ctx, ctx->cfg_map[ref], lca) && "Early placement doesn't dominate the l… in ir_gcm_schedule_late()
533 if (ctx->use_lists[ref].count > 1 in ir_gcm_schedule_late()
534 && ir_split_partially_dead_node(ctx, ref, lca)) { in ir_gcm_schedule_late()
539 if (lca != ctx->cfg_map[ref]) { in ir_gcm_schedule_late()
540 b = ir_gcm_select_best_block(ctx, ref, lca); in ir_gcm_schedule_late()
542 ctx->cfg_map[ref] = b; in ir_gcm_schedule_late()
545 if (ctx->ir_base[ref].op >= IR_ADD_OV && ctx->ir_base[ref].op <= IR_MUL_OV) { in ir_gcm_schedule_late()
546 ir_use_list *use_list = &ctx->use_lists[ref]; in ir_gcm_schedule_late()
549 for (n = use_list->count, p = &ctx->use_edges[use_list->refs]; n < 0; p++, n--) { in ir_gcm_schedule_late()
551 if (ctx->ir_base[use].op == IR_OVERFLOW) { in ir_gcm_schedule_late()
552 ctx->cfg_map[use] = b; in ir_gcm_schedule_late()
560 int ir_gcm(ir_ctx *ctx) in ir_gcm() argument
570 IR_ASSERT(ctx->cfg_map); in ir_gcm()
571 _blocks = ctx->cfg_map; in ir_gcm()
573 ir_list_init(&queue_early, ctx->insns_count); in ir_gcm()
575 if (ctx->cfg_blocks_count == 1) { in ir_gcm()
576 ref = ctx->cfg_blocks[1].end; in ir_gcm()
578 insn = &ctx->ir_base[ref]; in ir_gcm()
588 use_list = &ctx->use_lists[1]; in ir_gcm()
590 for (p = &ctx->use_edges[use_list->refs]; n > 0; n--, p++) { in ir_gcm()
592 use_insn = &ctx->ir_base[ref]; in ir_gcm()
594 ctx->cfg_blocks[1].flags |= (use_insn->op == IR_PARAM) ? IR_BB_HAS_PARAM : IR_BB_HAS_VAR; in ir_gcm()
602 insn = &ctx->ir_base[ref]; in ir_gcm()
618 ir_list_init(&queue_late, ctx->insns_count); in ir_gcm()
621 b = ctx->cfg_blocks_count; in ir_gcm()
622 for (bb = ctx->cfg_blocks + b; b > 0; bb--, b--) { in ir_gcm()
627 insn = &ctx->ir_base[ref]; in ir_gcm()
636 insn = &ctx->ir_base[ref]; in ir_gcm()
651 use_list = &ctx->use_lists[ref]; in ir_gcm()
654 for (p = &ctx->use_edges[use_list->refs]; n > 0; n--, p++) { in ir_gcm()
656 use_insn = &ctx->ir_base[ref]; in ir_gcm()
659 if (EXPECTED(ctx->use_lists[ref].count != 0)) { in ir_gcm()
678 insn = &ctx->ir_base[ref]; in ir_gcm()
683 ir_gcm_schedule_early(ctx, ref, &queue_late); in ir_gcm()
689 if (ctx->flags & IR_DEBUG_GCM) { in ir_gcm()
691 for (n = 1; n < ctx->insns_count; n++) { in ir_gcm()
692 fprintf(stderr, "%d -> %d\n", n, ctx->cfg_map[n]); in ir_gcm()
700 ir_sparse_set_init(&data.totally_useful, ctx->cfg_blocks_count + 1); in ir_gcm()
701 ir_list_init(&data.worklist, ctx->cfg_blocks_count + 1); in ir_gcm()
702 ctx->data = &data; in ir_gcm()
709 b = ctx->cfg_map[ref]; in ir_gcm()
711 ir_gcm_schedule_late(ctx, ref, b); in ir_gcm()
718 ctx->data = NULL; in ir_gcm()
725 if (ctx->flags & IR_DEBUG_GCM) { in ir_gcm()
727 for (n = 1; n < ctx->insns_count; n++) { in ir_gcm()
728 fprintf(stderr, "%d -> %d\n", n, ctx->cfg_map[n]); in ir_gcm()
736 static void ir_xlat_binding(ir_ctx *ctx, ir_ref *_xlat) in ir_xlat_binding() argument
741 ir_hashtab *binding = ctx->binding; in ir_xlat_binding()
752 IR_ASSERT(key < ctx->insns_count); in ir_xlat_binding()
784 int ir_schedule(ir_ctx *ctx) in ir_schedule() argument
792 uint32_t *_blocks = ctx->cfg_map; in ir_schedule()
793 ir_ref *_next = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref)); in ir_schedule()
794 ir_ref *_prev = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref)); in ir_schedule()
803 prev_b_end = ctx->cfg_blocks[1].end; in ir_schedule()
806 for (i = 2, j = 1; i < ctx->insns_count; i++) { in ir_schedule()
815 bb = &ctx->cfg_blocks[b]; in ir_schedule()
832 bb = &ctx->cfg_blocks[b]; in ir_schedule()
839 k = ctx->cfg_blocks[b + 1].start; in ir_schedule()
854 bb = &ctx->cfg_blocks[b]; in ir_schedule()
859 insn = &ctx->ir_base[k]; in ir_schedule()
862 insn = &ctx->ir_base[k]; in ir_schedule()
874 if (ctx->flags & IR_DEBUG_SCHEDULE) { in ir_schedule()
882 _xlat = ir_mem_calloc((ctx->consts_count + ctx->insns_count), sizeof(ir_ref)); in ir_schedule()
883 _xlat += ctx->consts_count; in ir_schedule()
892 for (b = 1, bb = ctx->cfg_blocks + 1; b <= ctx->cfg_blocks_count; b++, bb++) { in ir_schedule()
897 insn = &ctx->ir_base[i]; in ir_schedule()
905 insn = &ctx->ir_base[i]; in ir_schedule()
912 insn = &ctx->ir_base[i]; in ir_schedule()
928 insn = &ctx->ir_base[i]; in ir_schedule()
933 ir_insn *end = &ctx->ir_base[j]; in ir_schedule()
941 insn = &ctx->ir_base[i]; in ir_schedule()
967 if (ctx->flags & IR_DEBUG_SCHEDULE) { in ir_schedule()
981 insn = &ctx->ir_base[i]; in ir_schedule()
992 insn = &ctx->ir_base[i]; in ir_schedule()
1005 if (ctx->flags & IR_DEBUG_SCHEDULE) { in ir_schedule()
1015 if (consts_count == ctx->consts_count && insns_count == ctx->insns_count) { in ir_schedule()
1025 _xlat -= ctx->consts_count; in ir_schedule()
1029 ctx->prev_ref = _prev; in ir_schedule()
1030 ctx->flags2 |= IR_LINEAR; in ir_schedule()
1031 ir_truncate(ctx); in ir_schedule()
1040 ir_init(&new_ctx, ctx->flags, consts_count, insns_count); in ir_schedule()
1042 new_ctx.flags2 = ctx->flags2; in ir_schedule()
1043 new_ctx.ret_type = ctx->ret_type; in ir_schedule()
1044 new_ctx.mflags = ctx->mflags; in ir_schedule()
1045 new_ctx.spill_base = ctx->spill_base; in ir_schedule()
1046 new_ctx.fixed_stack_red_zone = ctx->fixed_stack_red_zone; in ir_schedule()
1047 new_ctx.fixed_stack_frame_size = ctx->fixed_stack_frame_size; in ir_schedule()
1048 new_ctx.fixed_call_stack_size = ctx->fixed_call_stack_size; in ir_schedule()
1049 new_ctx.fixed_regset = ctx->fixed_regset; in ir_schedule()
1050 new_ctx.fixed_save_regset = ctx->fixed_save_regset; in ir_schedule()
1051 new_ctx.entries_count = ctx->entries_count; in ir_schedule()
1053 new_ctx.deoptimization_exits = ctx->deoptimization_exits; in ir_schedule()
1054 new_ctx.get_exit_addr = ctx->get_exit_addr; in ir_schedule()
1055 new_ctx.get_veneer = ctx->get_veneer; in ir_schedule()
1056 new_ctx.set_veneer = ctx->set_veneer; in ir_schedule()
1058 new_ctx.loader = ctx->loader; in ir_schedule()
1061 if (consts_count == ctx->consts_count) { in ir_schedule()
1064 insn = &ctx->ir_base[ref]; in ir_schedule()
1068 if (ctx->strtab.data) { in ir_schedule()
1073 const char *proto = ir_get_strl(ctx, new_insn->proto, &len); in ir_schedule()
1077 new_insn->val.u64 = ir_str(&new_ctx, ir_get_str(ctx, new_insn->val.name)); in ir_schedule()
1080 const char *proto = ir_get_strl(ctx, new_insn->proto, &len); in ir_schedule()
1084 new_insn->val.u64 = ir_str(&new_ctx, ir_get_str(ctx, new_insn->val.name)); in ir_schedule()
1093 for (ref = IR_TRUE - 1, insn = &ctx->ir_base[ref]; ref > -ctx->consts_count; insn--, ref--) { in ir_schedule()
1103 const char *proto = ir_get_strl(ctx, insn->proto, &len); in ir_schedule()
1109 new_insn->val.u64 = ir_str(&new_ctx, ir_get_str(ctx, insn->val.name)); in ir_schedule()
1112 const char *proto = ir_get_strl(ctx, insn->proto, &len); in ir_schedule()
1118 new_insn->val.u64 = ir_str(&new_ctx, ir_get_str(ctx, insn->val.name)); in ir_schedule()
1129 new_ctx.cfg_map = ir_mem_calloc(ctx->insns_count, sizeof(uint32_t)); in ir_schedule()
1132 new_ctx.use_edges = edges = ir_mem_malloc(ctx->use_edges_count * sizeof(ir_ref)); in ir_schedule()
1143 use_list = &ctx->use_lists[i]; in ir_schedule()
1147 ref = ctx->use_edges[use_list->refs]; in ir_schedule()
1154 p = &ctx->use_edges[use_list->refs]; in ir_schedule()
1170 insn = &ctx->ir_base[i]; in ir_schedule()
1184 new_insn->op2 = ir_str(&new_ctx, ir_get_str(ctx, insn->op2)); in ir_schedule()
1187 const char *proto = ir_get_strl(ctx, insn->op2, &len); in ir_schedule()
1258 IR_ASSERT(ctx->use_edges_count >= use_edges_count); in ir_schedule()
1262 if (ctx->binding) { in ir_schedule()
1263 ir_xlat_binding(ctx, _xlat); in ir_schedule()
1264 new_ctx.binding = ctx->binding; in ir_schedule()
1265 ctx->binding = NULL; in ir_schedule()
1268 _xlat -= ctx->consts_count; in ir_schedule()
1271 new_ctx.cfg_blocks_count = ctx->cfg_blocks_count; in ir_schedule()
1272 new_ctx.cfg_edges_count = ctx->cfg_edges_count; in ir_schedule()
1273 new_ctx.cfg_blocks = ctx->cfg_blocks; in ir_schedule()
1274 new_ctx.cfg_edges = ctx->cfg_edges; in ir_schedule()
1275 ctx->cfg_blocks = NULL; in ir_schedule()
1276 ctx->cfg_edges = NULL; in ir_schedule()
1278 ir_free(ctx); in ir_schedule()
1281 memcpy(ctx, &new_ctx, sizeof(ir_ctx)); in ir_schedule()
1282 ctx->flags2 |= IR_LINEAR; in ir_schedule()
1289 void ir_build_prev_refs(ir_ctx *ctx) in ir_build_prev_refs() argument
1296 ctx->prev_ref = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref)); in ir_build_prev_refs()
1298 for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { in ir_build_prev_refs()
1300 for (i = bb->start, insn = ctx->ir_base + i; i < bb->end;) { in ir_build_prev_refs()
1301 ctx->prev_ref[i] = prev; in ir_build_prev_refs()
1307 ctx->prev_ref[i] = prev; in ir_build_prev_refs()