Lines Matching refs:ctx
20 static uint32_t ir_gcm_schedule_early(ir_ctx *ctx, ir_ref ref, ir_list *queue_late) in ir_gcm_schedule_early() argument
27 insn = &ctx->ir_base[ref]; in ir_gcm_schedule_early()
39 b = ctx->cfg_map[input]; in ir_gcm_schedule_early()
43 b = ir_gcm_schedule_early(ctx, input, queue_late); in ir_gcm_schedule_early()
45 if (dom_depth < ctx->cfg_blocks[b].dom_depth) { in ir_gcm_schedule_early()
46 dom_depth = ctx->cfg_blocks[b].dom_depth; in ir_gcm_schedule_early()
52 ctx->cfg_map[ref] = IR_GCM_EARLY_BLOCK(result); in ir_gcm_schedule_early()
58 static uint32_t ir_gcm_find_lca(ir_ctx *ctx, uint32_t b1, uint32_t b2) in ir_gcm_find_lca() argument
62 dom_depth = ctx->cfg_blocks[b2].dom_depth; in ir_gcm_find_lca()
63 while (ctx->cfg_blocks[b1].dom_depth > dom_depth) { in ir_gcm_find_lca()
64 b1 = ctx->cfg_blocks[b1].dom_parent; in ir_gcm_find_lca()
66 dom_depth = ctx->cfg_blocks[b1].dom_depth; in ir_gcm_find_lca()
67 while (ctx->cfg_blocks[b2].dom_depth > dom_depth) { in ir_gcm_find_lca()
68 b2 = ctx->cfg_blocks[b2].dom_parent; in ir_gcm_find_lca()
71 b1 = ctx->cfg_blocks[b1].dom_parent; in ir_gcm_find_lca()
72 b2 = ctx->cfg_blocks[b2].dom_parent; in ir_gcm_find_lca()
77 static uint32_t ir_gcm_select_best_block(ir_ctx *ctx, ir_ref ref, uint32_t lca) in ir_gcm_select_best_block() argument
79 ir_block *bb = &ctx->cfg_blocks[lca]; in ir_gcm_select_best_block()
88 if (ctx->ir_base[ref].op >= IR_EQ && ctx->ir_base[ref].op <= IR_UGT) { in ir_gcm_select_best_block()
89 ir_use_list *use_list = &ctx->use_lists[ref]; in ir_gcm_select_best_block()
92 ir_ref use = ctx->use_edges[use_list->refs]; in ir_gcm_select_best_block()
93 ir_insn *insn = &ctx->ir_base[use]; in ir_gcm_select_best_block()
102 flags = (bb->flags & IR_BB_LOOP_HEADER) ? bb->flags : ctx->cfg_blocks[bb->loop_header].flags; in ir_gcm_select_best_block()
104 && !(ctx->binding && ir_binding_find(ctx, ref))) { in ir_gcm_select_best_block()
112 bb = &ctx->cfg_blocks[b]; in ir_gcm_select_best_block()
117 ir_block *loop_bb = &ctx->cfg_blocks[best]; in ir_gcm_select_best_block()
120 loop_bb = &ctx->cfg_blocks[loop_bb->loop_header]; in ir_gcm_select_best_block()
124 uint32_t *p = ctx->cfg_edges + loop_bb->predecessors; in ir_gcm_select_best_block()
137 flags = (bb->flags & IR_BB_LOOP_HEADER) ? bb->flags : ctx->cfg_blocks[bb->loop_header].flags; in ir_gcm_select_best_block()
139 && !(ctx->binding && ir_binding_find(ctx, ref))) { in ir_gcm_select_best_block()
145 } while (b != ctx->cfg_map[ref]); in ir_gcm_select_best_block()
162 static void _push_predecessors(ir_ctx *ctx, ir_block *bb, ir_gcm_split_data *data) in _push_predecessors() argument
167 p = ctx->cfg_edges + bb->predecessors; in _push_predecessors()
178 static bool _check_successors(ir_ctx *ctx, ir_block *bb, ir_gcm_split_data *data) in _check_successors() argument
183 IR_ASSERT(ir_sparse_set_in(&data->totally_useful, ctx->cfg_edges[bb->successors])); in _check_successors()
187 p = ctx->cfg_edges + bb->successors; in _check_successors()
200 static bool ir_split_partially_dead_node(ir_ctx *ctx, ir_ref ref, uint32_t b) in ir_split_partially_dead_node() argument
206 ir_gcm_split_data *data = ctx->data; in ir_split_partially_dead_node()
208 IR_ASSERT(b > 0 && b <= ctx->cfg_blocks_count); in ir_split_partially_dead_node()
215 use_list = &ctx->use_lists[ref]; in ir_split_partially_dead_node()
217 for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { in ir_split_partially_dead_node()
219 insn = &ctx->ir_base[use]; in ir_split_partially_dead_node()
222 ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */ in ir_split_partially_dead_node()
227 i = ctx->cfg_map[*q]; in ir_split_partially_dead_node()
228 IR_ASSERT(i > 0 && i <= ctx->cfg_blocks_count); in ir_split_partially_dead_node()
236 i = ctx->cfg_map[use]; in ir_split_partially_dead_node()
240 IR_ASSERT(i > 0 && i <= ctx->cfg_blocks_count); in ir_split_partially_dead_node()
249 if (ctx->flags & IR_DEBUG_GCM_SPLIT) { in ir_split_partially_dead_node()
268 _push_predecessors(ctx, &ctx->cfg_blocks[i], data); in ir_split_partially_dead_node()
274 ir_block *bb = &ctx->cfg_blocks[i]; in ir_split_partially_dead_node()
276 if (_check_successors(ctx, bb, data)) { in ir_split_partially_dead_node()
283 _push_predecessors(ctx, bb, data); in ir_split_partially_dead_node()
291 if (ctx->flags & IR_DEBUG_GCM_SPLIT) { in ir_split_partially_dead_node()
306 use_list = &ctx->use_lists[ref]; in ir_split_partially_dead_node()
323 for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { in ir_split_partially_dead_node()
325 insn = &ctx->ir_base[use]; in ir_split_partially_dead_node()
328 ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */ in ir_split_partially_dead_node()
338 j = i = ctx->cfg_map[*q]; in ir_split_partially_dead_node()
339 while (ir_sparse_set_in(&data->totally_useful, ctx->cfg_blocks[j].idom)) { in ir_split_partially_dead_node()
340 j = ctx->cfg_blocks[j].idom; in ir_split_partially_dead_node()
358 j = i = ctx->cfg_map[use]; in ir_split_partially_dead_node()
361 while (ir_sparse_set_in(&data->totally_useful, ctx->cfg_blocks[j].idom)) { in ir_split_partially_dead_node()
362 j = ctx->cfg_blocks[j].idom; in ir_split_partially_dead_node()
382 if (ctx->flags & IR_DEBUG_GCM_SPLIT) { in ir_split_partially_dead_node()
399 insn = &ctx->ir_base[ref]; in ir_split_partially_dead_node()
402 clones[i].ref = clone = ir_emit(ctx, insn->optx, insn->op1, insn->op2, insn->op3); in ir_split_partially_dead_node()
403 insn = &ctx->ir_base[ref]; in ir_split_partially_dead_node()
404 if (insn->op1 > 0) ir_use_list_add(ctx, insn->op1, clone); in ir_split_partially_dead_node()
405 if (insn->op2 > 0) ir_use_list_add(ctx, insn->op2, clone); in ir_split_partially_dead_node()
406 if (insn->op3 > 0) ir_use_list_add(ctx, insn->op3, clone); in ir_split_partially_dead_node()
410 ctx->use_lists = ir_mem_realloc(ctx->use_lists, ctx->insns_count * sizeof(ir_use_list)); in ir_split_partially_dead_node()
411 ctx->cfg_map = ir_mem_realloc(ctx->cfg_map, ctx->insns_count * sizeof(uint32_t)); in ir_split_partially_dead_node()
412 n = ctx->use_lists[ref].refs; in ir_split_partially_dead_node()
422 ctx->cfg_map[clone] = clones[i].block; in ir_split_partially_dead_node()
423 ctx->use_lists[clone].count = clones[i].use_count; in ir_split_partially_dead_node()
424 ctx->use_lists[clone].refs = n; in ir_split_partially_dead_node()
429 ctx->use_edges[n++] = use; in ir_split_partially_dead_node()
433 ir_insn *insn = &ctx->ir_base[use]; in ir_split_partially_dead_node()
439 j = ctx->cfg_map[ir_insn_op(&ctx->ir_base[insn->op1], k - 1)]; in ir_split_partially_dead_node()
441 uint32_t dom_depth = ctx->cfg_blocks[clones[i].block].dom_depth; in ir_split_partially_dead_node()
442 while (ctx->cfg_blocks[j].dom_depth > dom_depth) { in ir_split_partially_dead_node()
443 j = ctx->cfg_blocks[j].dom_parent; in ir_split_partially_dead_node()
470 if (ctx->flags & IR_DEBUG_GCM_SPLIT) { in ir_split_partially_dead_node()
471 ir_check(ctx); in ir_split_partially_dead_node()
480 static bool ir_gcm_dominates(ir_ctx *ctx, uint32_t b1, uint32_t b2) in ir_gcm_dominates() argument
482 uint32_t b1_depth = ctx->cfg_blocks[b1].dom_depth; in ir_gcm_dominates()
483 const ir_block *bb2 = &ctx->cfg_blocks[b2]; in ir_gcm_dominates()
487 bb2 = &ctx->cfg_blocks[b2]; in ir_gcm_dominates()
493 static void ir_gcm_schedule_late(ir_ctx *ctx, ir_ref ref, uint32_t b) in ir_gcm_schedule_late() argument
498 IR_ASSERT(ctx->ir_base[ref].op != IR_PARAM && ctx->ir_base[ref].op != IR_VAR); in ir_gcm_schedule_late()
499 IR_ASSERT(ctx->ir_base[ref].op != IR_PHI && ctx->ir_base[ref].op != IR_PI); in ir_gcm_schedule_late()
503 ctx->cfg_map[ref] = b; in ir_gcm_schedule_late()
505 for (n = 0; n < ctx->use_lists[ref].count; n++) { in ir_gcm_schedule_late()
506 use = ctx->use_edges[ctx->use_lists[ref].refs + n]; in ir_gcm_schedule_late()
507 b = ctx->cfg_map[use]; in ir_gcm_schedule_late()
509 ir_gcm_schedule_late(ctx, use, b); in ir_gcm_schedule_late()
510 b = ctx->cfg_map[use]; in ir_gcm_schedule_late()
514 } else if (ctx->ir_base[use].op == IR_PHI) { in ir_gcm_schedule_late()
515 ir_insn *insn = &ctx->ir_base[use]; in ir_gcm_schedule_late()
517 ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */ in ir_gcm_schedule_late()
522 b = ctx->cfg_map[*q]; in ir_gcm_schedule_late()
523 lca = !lca ? b : ir_gcm_find_lca(ctx, lca, b); in ir_gcm_schedule_late()
528 lca = !lca ? b : ir_gcm_find_lca(ctx, lca, b); in ir_gcm_schedule_late()
532 …IR_ASSERT(ir_gcm_dominates(ctx, ctx->cfg_map[ref], lca) && "Early placement doesn't dominate the l… in ir_gcm_schedule_late()
535 if (ctx->use_lists[ref].count > 1 in ir_gcm_schedule_late()
536 && ir_split_partially_dead_node(ctx, ref, lca)) { in ir_gcm_schedule_late()
541 if (lca != ctx->cfg_map[ref]) { in ir_gcm_schedule_late()
542 b = ir_gcm_select_best_block(ctx, ref, lca); in ir_gcm_schedule_late()
544 ctx->cfg_map[ref] = b; in ir_gcm_schedule_late()
547 if (ctx->ir_base[ref].op >= IR_ADD_OV && ctx->ir_base[ref].op <= IR_MUL_OV) { in ir_gcm_schedule_late()
548 ir_use_list *use_list = &ctx->use_lists[ref]; in ir_gcm_schedule_late()
551 for (n = use_list->count, p = &ctx->use_edges[use_list->refs]; n < 0; p++, n--) { in ir_gcm_schedule_late()
553 if (ctx->ir_base[use].op == IR_OVERFLOW) { in ir_gcm_schedule_late()
554 ctx->cfg_map[use] = b; in ir_gcm_schedule_late()
562 int ir_gcm(ir_ctx *ctx) in ir_gcm() argument
572 IR_ASSERT(ctx->cfg_map); in ir_gcm()
573 _blocks = ctx->cfg_map; in ir_gcm()
575 ir_list_init(&queue_early, ctx->insns_count); in ir_gcm()
577 if (ctx->cfg_blocks_count == 1) { in ir_gcm()
578 ref = ctx->cfg_blocks[1].end; in ir_gcm()
580 insn = &ctx->ir_base[ref]; in ir_gcm()
590 use_list = &ctx->use_lists[1]; in ir_gcm()
592 for (p = &ctx->use_edges[use_list->refs]; n > 0; n--, p++) { in ir_gcm()
594 use_insn = &ctx->ir_base[ref]; in ir_gcm()
596 ctx->cfg_blocks[1].flags |= (use_insn->op == IR_PARAM) ? IR_BB_HAS_PARAM : IR_BB_HAS_VAR; in ir_gcm()
604 insn = &ctx->ir_base[ref]; in ir_gcm()
620 ir_list_init(&queue_late, ctx->insns_count); in ir_gcm()
623 b = ctx->cfg_blocks_count; in ir_gcm()
624 for (bb = ctx->cfg_blocks + b; b > 0; bb--, b--) { in ir_gcm()
629 insn = &ctx->ir_base[ref]; in ir_gcm()
638 insn = &ctx->ir_base[ref]; in ir_gcm()
653 use_list = &ctx->use_lists[ref]; in ir_gcm()
656 for (p = &ctx->use_edges[use_list->refs]; n > 0; n--, p++) { in ir_gcm()
658 use_insn = &ctx->ir_base[ref]; in ir_gcm()
661 if (EXPECTED(ctx->use_lists[ref].count != 0)) { in ir_gcm()
680 insn = &ctx->ir_base[ref]; in ir_gcm()
685 ir_gcm_schedule_early(ctx, ref, &queue_late); in ir_gcm()
691 if (ctx->flags & IR_DEBUG_GCM) { in ir_gcm()
693 for (n = 1; n < ctx->insns_count; n++) { in ir_gcm()
694 fprintf(stderr, "%d -> %d\n", n, ctx->cfg_map[n]); in ir_gcm()
702 ir_sparse_set_init(&data.totally_useful, ctx->cfg_blocks_count + 1); in ir_gcm()
703 ir_list_init(&data.worklist, ctx->cfg_blocks_count + 1); in ir_gcm()
704 ctx->data = &data; in ir_gcm()
711 b = ctx->cfg_map[ref]; in ir_gcm()
713 ir_gcm_schedule_late(ctx, ref, b); in ir_gcm()
720 ctx->data = NULL; in ir_gcm()
727 if (ctx->flags & IR_DEBUG_GCM) { in ir_gcm()
729 for (n = 1; n < ctx->insns_count; n++) { in ir_gcm()
730 fprintf(stderr, "%d -> %d\n", n, ctx->cfg_map[n]); in ir_gcm()
738 static void ir_xlat_binding(ir_ctx *ctx, ir_ref *_xlat) in ir_xlat_binding() argument
743 ir_hashtab *binding = ctx->binding; in ir_xlat_binding()
754 IR_ASSERT(key < ctx->insns_count); in ir_xlat_binding()
786 int ir_schedule(ir_ctx *ctx) in ir_schedule() argument
794 uint32_t *_blocks = ctx->cfg_map; in ir_schedule()
795 ir_ref *_next = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref)); in ir_schedule()
796 ir_ref *_prev = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref)); in ir_schedule()
805 prev_b_end = ctx->cfg_blocks[1].end; in ir_schedule()
808 for (i = 2, j = 1; i < ctx->insns_count; i++) { in ir_schedule()
817 bb = &ctx->cfg_blocks[b]; in ir_schedule()
834 bb = &ctx->cfg_blocks[b]; in ir_schedule()
841 k = ctx->cfg_blocks[b + 1].start; in ir_schedule()
856 bb = &ctx->cfg_blocks[b]; in ir_schedule()
861 insn = &ctx->ir_base[k]; in ir_schedule()
864 insn = &ctx->ir_base[k]; in ir_schedule()
876 if (ctx->flags & IR_DEBUG_SCHEDULE) { in ir_schedule()
884 _xlat = ir_mem_calloc((ctx->consts_count + ctx->insns_count), sizeof(ir_ref)); in ir_schedule()
885 _xlat += ctx->consts_count; in ir_schedule()
894 for (b = 1, bb = ctx->cfg_blocks + 1; b <= ctx->cfg_blocks_count; b++, bb++) { in ir_schedule()
901 insn = &ctx->ir_base[i]; in ir_schedule()
909 insn = &ctx->ir_base[i]; in ir_schedule()
918 insn = &ctx->ir_base[i]; in ir_schedule()
935 insn = &ctx->ir_base[i]; in ir_schedule()
939 if (UNEXPECTED(count < ctx->use_lists[start].count - 1)) { in ir_schedule()
940 ir_use_list *use_list = &ctx->use_lists[start]; in ir_schedule()
944 for (p = &ctx->use_edges[use_list->refs]; count > 0; p++, count--) { in ir_schedule()
946 ir_insn *use_insn = &ctx->ir_base[use]; in ir_schedule()
982 insn = &ctx->ir_base[i]; in ir_schedule()
987 ir_insn *end = &ctx->ir_base[j]; in ir_schedule()
995 insn = &ctx->ir_base[i]; in ir_schedule()
1021 if (ctx->flags & IR_DEBUG_SCHEDULE) { in ir_schedule()
1035 insn = &ctx->ir_base[i]; in ir_schedule()
1046 insn = &ctx->ir_base[i]; in ir_schedule()
1059 if (ctx->flags & IR_DEBUG_SCHEDULE) { in ir_schedule()
1069 if (consts_count == ctx->consts_count && insns_count == ctx->insns_count) { in ir_schedule()
1079 _xlat -= ctx->consts_count; in ir_schedule()
1083 ctx->prev_ref = _prev; in ir_schedule()
1084 ctx->flags2 |= IR_LINEAR; in ir_schedule()
1085 ir_truncate(ctx); in ir_schedule()
1094 ir_init(&new_ctx, ctx->flags, consts_count, insns_count); in ir_schedule()
1096 new_ctx.flags2 = ctx->flags2; in ir_schedule()
1097 new_ctx.ret_type = ctx->ret_type; in ir_schedule()
1098 new_ctx.mflags = ctx->mflags; in ir_schedule()
1099 new_ctx.spill_base = ctx->spill_base; in ir_schedule()
1100 new_ctx.fixed_stack_red_zone = ctx->fixed_stack_red_zone; in ir_schedule()
1101 new_ctx.fixed_stack_frame_size = ctx->fixed_stack_frame_size; in ir_schedule()
1102 new_ctx.fixed_call_stack_size = ctx->fixed_call_stack_size; in ir_schedule()
1103 new_ctx.fixed_regset = ctx->fixed_regset; in ir_schedule()
1104 new_ctx.fixed_save_regset = ctx->fixed_save_regset; in ir_schedule()
1105 new_ctx.entries_count = ctx->entries_count; in ir_schedule()
1107 new_ctx.deoptimization_exits = ctx->deoptimization_exits; in ir_schedule()
1108 new_ctx.get_exit_addr = ctx->get_exit_addr; in ir_schedule()
1109 new_ctx.get_veneer = ctx->get_veneer; in ir_schedule()
1110 new_ctx.set_veneer = ctx->set_veneer; in ir_schedule()
1112 new_ctx.loader = ctx->loader; in ir_schedule()
1115 if (consts_count == ctx->consts_count) { in ir_schedule()
1118 insn = &ctx->ir_base[ref]; in ir_schedule()
1122 if (ctx->strtab.data) { in ir_schedule()
1127 const char *proto = ir_get_strl(ctx, new_insn->proto, &len); in ir_schedule()
1131 new_insn->val.u64 = ir_str(&new_ctx, ir_get_str(ctx, new_insn->val.name)); in ir_schedule()
1134 const char *proto = ir_get_strl(ctx, new_insn->proto, &len); in ir_schedule()
1138 new_insn->val.u64 = ir_str(&new_ctx, ir_get_str(ctx, new_insn->val.name)); in ir_schedule()
1147 for (ref = IR_TRUE - 1, insn = &ctx->ir_base[ref]; ref > -ctx->consts_count; insn--, ref--) { in ir_schedule()
1157 const char *proto = ir_get_strl(ctx, insn->proto, &len); in ir_schedule()
1163 new_insn->val.u64 = ir_str(&new_ctx, ir_get_str(ctx, insn->val.name)); in ir_schedule()
1166 const char *proto = ir_get_strl(ctx, insn->proto, &len); in ir_schedule()
1172 new_insn->val.u64 = ir_str(&new_ctx, ir_get_str(ctx, insn->val.name)); in ir_schedule()
1183 new_ctx.cfg_map = ir_mem_calloc(ctx->insns_count, sizeof(uint32_t)); in ir_schedule()
1186 new_ctx.use_edges = edges = ir_mem_malloc(ctx->use_edges_count * sizeof(ir_ref)); in ir_schedule()
1197 use_list = &ctx->use_lists[i]; in ir_schedule()
1201 ref = ctx->use_edges[use_list->refs]; in ir_schedule()
1208 p = &ctx->use_edges[use_list->refs]; in ir_schedule()
1224 insn = &ctx->ir_base[i]; in ir_schedule()
1238 new_insn->op2 = ir_str(&new_ctx, ir_get_str(ctx, insn->op2)); in ir_schedule()
1241 const char *proto = ir_get_strl(ctx, insn->op2, &len); in ir_schedule()
1312 IR_ASSERT(ctx->use_edges_count >= use_edges_count); in ir_schedule()
1316 if (ctx->binding) { in ir_schedule()
1317 ir_xlat_binding(ctx, _xlat); in ir_schedule()
1318 new_ctx.binding = ctx->binding; in ir_schedule()
1319 ctx->binding = NULL; in ir_schedule()
1322 _xlat -= ctx->consts_count; in ir_schedule()
1325 new_ctx.cfg_blocks_count = ctx->cfg_blocks_count; in ir_schedule()
1326 new_ctx.cfg_edges_count = ctx->cfg_edges_count; in ir_schedule()
1327 new_ctx.cfg_blocks = ctx->cfg_blocks; in ir_schedule()
1328 new_ctx.cfg_edges = ctx->cfg_edges; in ir_schedule()
1329 ctx->cfg_blocks = NULL; in ir_schedule()
1330 ctx->cfg_edges = NULL; in ir_schedule()
1332 ir_free(ctx); in ir_schedule()
1335 memcpy(ctx, &new_ctx, sizeof(ir_ctx)); in ir_schedule()
1336 ctx->flags2 |= IR_LINEAR; in ir_schedule()
1343 void ir_build_prev_refs(ir_ctx *ctx) in ir_build_prev_refs() argument
1350 ctx->prev_ref = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref)); in ir_build_prev_refs()
1352 for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { in ir_build_prev_refs()
1354 for (i = bb->start, insn = ctx->ir_base + i; i < bb->end;) { in ir_build_prev_refs()
1355 ctx->prev_ref[i] = prev; in ir_build_prev_refs()
1361 ctx->prev_ref[i] = prev; in ir_build_prev_refs()