1 /*
2 * IR - Lightweight JIT Compilation Framework
3 * (SCCP - Sparse Conditional Constant Propagation)
4 * Copyright (C) 2022 Zend by Perforce.
5 * Authors: Dmitry Stogov <dmitry@php.net>
6 *
7 * The SCCP algorithm is based on M. N. Wegman and F. K. Zadeck publication
8 * See: M. N. Wegman and F. K. Zadeck. "Constant propagation with conditional branches"
9 * ACM Transactions on Programming Languages and Systems, 13(2):181-210, April 1991
10 */
11
12 #include "ir.h"
13 #include "ir_private.h"
14
15 #define IR_TOP IR_UNUSED
16 #define IR_BOTTOM IR_LAST_OP
17
18 #define IR_MAKE_TOP(ref) do {IR_ASSERT(ref > 0); _values[ref].optx = IR_TOP;} while (0)
19 #define IR_MAKE_BOTTOM(ref) do {IR_ASSERT(ref > 0); _values[ref].optx = IR_BOTTOM;} while (0)
20
21 #define IR_IS_TOP(ref) (ref >= 0 && _values[ref].optx == IR_TOP)
22 #define IR_IS_BOTTOM(ref) (ref >= 0 && _values[ref].optx == IR_BOTTOM)
23 #define IR_IS_FEASIBLE(ref) (ref >= 0 && _values[ref].optx != IR_TOP)
24
25 #define IR_COMBO_COPY_PROPAGATION 1
26
27 #if IR_COMBO_COPY_PROPAGATION
ir_sccp_identity(ir_insn * _values,ir_ref a)28 IR_ALWAYS_INLINE ir_ref ir_sccp_identity(ir_insn *_values, ir_ref a)
29 {
30 if (a > 0 && _values[a].op == IR_COPY) {
31 a = _values[a].op1;
32 IR_ASSERT(a < 0 || _values[a].op != IR_COPY); /* this may be a copy of symbolic constant */
33 }
34 return a;
35 }
36 #endif
37
ir_sccp_fold(ir_ctx * ctx,ir_insn * _values,ir_ref res,uint32_t opt,ir_ref op1,ir_ref op2,ir_ref op3)38 static ir_ref ir_sccp_fold(ir_ctx *ctx, ir_insn *_values, ir_ref res, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3)
39 {
40 ir_insn *op1_insn, *op2_insn, *op3_insn, *insn;
41
42 #if IR_COMBO_COPY_PROPAGATION
43 op1 = ir_sccp_identity(_values, op1);
44 op2 = ir_sccp_identity(_values, op2);
45 op3 = ir_sccp_identity(_values, op3);
46 #endif
47
48 restart:
49 op1_insn = (op1 > 0 && IR_IS_CONST_OP(_values[op1].op)) ? _values + op1 : ctx->ir_base + op1;
50 op2_insn = (op2 > 0 && IR_IS_CONST_OP(_values[op2].op)) ? _values + op2 : ctx->ir_base + op2;
51 op3_insn = (op3 > 0 && IR_IS_CONST_OP(_values[op3].op)) ? _values + op3 : ctx->ir_base + op3;
52
53 switch (ir_folding(ctx, opt, op1, op2, op3, op1_insn, op2_insn, op3_insn)) {
54 case IR_FOLD_DO_RESTART:
55 opt = ctx->fold_insn.optx;
56 op1 = ctx->fold_insn.op1;
57 op2 = ctx->fold_insn.op2;
58 op3 = ctx->fold_insn.op3;
59 goto restart;
60 case IR_FOLD_DO_EMIT:
61 IR_MAKE_BOTTOM(res);
62 return 1;
63 case IR_FOLD_DO_COPY:
64 op1 = ctx->fold_insn.op1;
65 #if IR_COMBO_COPY_PROPAGATION
66 op1 = ir_sccp_identity(_values, op1);
67 #endif
68 insn = (op1 > 0 && IR_IS_CONST_OP(_values[op1].op)) ? _values + op1 : ctx->ir_base + op1;
69 if (IR_IS_CONST_OP(insn->op)) {
70 /* pass */
71 #if IR_COMBO_COPY_PROPAGATION
72 } else if (_values[res].optx == IR_TOP) {
73 _values[res].optx = IR_OPT(IR_COPY, insn->type);
74 _values[res].op1 = op1;
75 return 1;
76 } else if (_values[res].op == IR_COPY && _values[res].op1 == op1) {
77 return 0; /* not changed */
78 } else {
79 IR_ASSERT(_values[res].optx != IR_BOTTOM);
80 /* we don't check for widening */
81 _values[res].optx = IR_OPT(IR_COPY, insn->type);
82 _values[res].op1 = op1;
83 return 1;
84 #else
85 } else {
86 IR_MAKE_BOTTOM(res);
87 return 1;
88 #endif
89 }
90 break;
91 case IR_FOLD_DO_CONST:
92 insn = &ctx->fold_insn;
93 break;
94 default:
95 IR_ASSERT(0);
96 return 0;
97 }
98
99 if (IR_IS_TOP(res)) {
100 _values[res].optx = IR_OPT(insn->type, insn->type);
101 _values[res].val.u64 = insn->val.u64;
102 return 1;
103 } else if (_values[res].opt != IR_OPT(insn->type, insn->type) || _values[res].val.u64 != insn->val.u64) {
104 IR_MAKE_BOTTOM(res);
105 return 1;
106 }
107 return 0; /* not changed */
108 }
109
ir_sccp_meet_phi(ir_ctx * ctx,ir_insn * _values,ir_ref i,ir_insn * insn,ir_bitqueue * worklist)110 static bool ir_sccp_meet_phi(ir_ctx *ctx, ir_insn *_values, ir_ref i, ir_insn *insn, ir_bitqueue *worklist)
111 {
112 ir_ref j, n, input, *merge_input, *p;
113 ir_insn *v, *new_const = NULL;
114 #if IR_COMBO_COPY_PROPAGATION
115 ir_ref new_copy;
116 #endif
117
118 if (!IR_IS_FEASIBLE(insn->op1)) {
119 return 0;
120 }
121 n = insn->inputs_count;
122 if (n > 3 && _values[i].optx == IR_TOP) {
123 for (j = 0; j < (n>>2); j++) {
124 _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */
125 }
126 }
127
128 p = insn->ops + 2;
129 merge_input = ctx->ir_base[insn->op1].ops + 1;
130 for (; --n > 0; p++, merge_input++) {
131 IR_ASSERT(*merge_input > 0);
132 if (_values[*merge_input].optx == IR_TOP) {
133 continue;
134 }
135
136 input = *p;
137 if (IR_IS_CONST_REF(input)) {
138 v = &ctx->ir_base[input];
139 } else if (input == i) {
140 continue;
141 } else {
142 v = &_values[input];
143 if (v->op == IR_TOP) {
144 /* do backward propagaton only once */
145 if (!v->op1) {
146 v->op1 = 1;
147 ir_bitqueue_add(worklist, input);
148 }
149 continue;
150 #if IR_COMBO_COPY_PROPAGATION
151 } else if (v->op == IR_COPY) {
152 input = v->op1;
153 IR_ASSERT(input < 0 || _values[input].op != IR_COPY);
154 new_copy = input;
155 goto next;
156 } else if (v->op == IR_BOTTOM) {
157 new_copy = input;
158 goto next;
159 #else
160 } else if (v->op == IR_BOTTOM) {
161 IR_MAKE_BOTTOM(i);
162 return 1;
163 #endif
164 }
165 }
166 new_copy = IR_UNUSED;
167 new_const = v;
168 goto next;
169 }
170
171 IR_ASSERT(_values[i].optx == IR_TOP);
172 return 0;
173
174 next:
175 p++;
176 merge_input++;
177 /* for all live merge inputs */
178 for (; --n > 0; p++, merge_input++) {
179 IR_ASSERT(*merge_input > 0);
180 if (_values[*merge_input].optx == IR_TOP) {
181 continue;
182 }
183
184 input = *p;
185 if (IR_IS_CONST_REF(input)) {
186 v = &ctx->ir_base[input];
187 } else if (input == i) {
188 continue;
189 } else {
190 v = &_values[input];
191 if (v->op == IR_TOP) {
192 /* do backward propagaton only once */
193 if (!v->op1) {
194 v->op1 = 1;
195 ir_bitqueue_add(worklist, input);
196 }
197 continue;
198 #if IR_COMBO_COPY_PROPAGATION
199 } else if (v->op == IR_COPY) {
200 input = v->op1;
201 IR_ASSERT(input < 0 || _values[input].op != IR_COPY);
202 if (new_copy == input) {
203 continue;
204 } else {
205 IR_MAKE_BOTTOM(i);
206 return 1;
207 }
208 } else if (v->op == IR_BOTTOM) {
209 if (new_copy == input) {
210 continue;
211 } else {
212 IR_MAKE_BOTTOM(i);
213 return 1;
214 }
215 #else
216 } else if (v->op == IR_BOTTOM) {
217 IR_MAKE_BOTTOM(i);
218 return 1;
219 #endif
220 }
221 }
222 if (!new_const || new_const->opt != v->opt || new_const->val.u64 != v->val.u64) {
223 IR_MAKE_BOTTOM(i);
224 return 1;
225 }
226 }
227
228 #if IR_COMBO_COPY_PROPAGATION
229 if (new_copy) {
230 if (_values[i].op == IR_COPY && _values[i].op1 == new_copy) {
231 return 0; /* not changed */
232 } else {
233 IR_ASSERT(_values[i].optx != IR_BOTTOM);
234 /* we don't check for widening */
235 _values[i].optx = IR_OPT(IR_COPY, ctx->ir_base[new_copy].type);
236 _values[i].op1 = new_copy;
237 return 1;
238 }
239 }
240 #endif
241
242 if (_values[i].optx == IR_TOP) {
243 _values[i].optx = new_const->opt;
244 _values[i].val.u64 = new_const->val.u64;
245 return 1;
246 } else if (_values[i].opt == new_const->opt && _values[i].val.u64 == new_const->val.u64) {
247 return 0;
248 } else {
249 IR_MAKE_BOTTOM(i);
250 return 1;
251 }
252 }
253
ir_is_dead_load_ex(ir_ctx * ctx,ir_ref ref,uint32_t flags,ir_insn * insn)254 static bool ir_is_dead_load_ex(ir_ctx *ctx, ir_ref ref, uint32_t flags, ir_insn *insn)
255 {
256 if ((flags & (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_MASK)) == (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_LOAD)) {
257 return ctx->use_lists[ref].count == 1;
258 } else if (insn->op == IR_ALLOCA) {
259 return ctx->use_lists[ref].count == 1;
260 }
261 return 0;
262 }
263
ir_is_dead_load(ir_ctx * ctx,ir_ref ref)264 static bool ir_is_dead_load(ir_ctx *ctx, ir_ref ref)
265 {
266 if (ctx->use_lists[ref].count == 1) {
267 uint32_t flags = ir_op_flags[ctx->ir_base[ref].op];
268
269 if ((flags & (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_MASK)) == (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_LOAD)) {
270 return 1;
271 } else if (ctx->ir_base[ref].op == IR_ALLOCA) {
272 return 1;
273 }
274 }
275 return 0;
276 }
277
ir_is_dead(ir_ctx * ctx,ir_ref ref)278 static bool ir_is_dead(ir_ctx *ctx, ir_ref ref)
279 {
280 if (ctx->use_lists[ref].count == 0) {
281 return IR_IS_FOLDABLE_OP(ctx->ir_base[ref].op);
282 } else {
283 return ir_is_dead_load(ctx, ref);
284 }
285 return 0;
286 }
287
ir_find1(ir_ctx * ctx,uint32_t optx,ir_ref op1)288 static ir_ref ir_find1(ir_ctx *ctx, uint32_t optx, ir_ref op1)
289 {
290 IR_ASSERT(!IR_IS_CONST_REF(op1));
291
292 ir_use_list *use_list = &ctx->use_lists[op1];
293 ir_ref *p, n = use_list->count;
294
295 for (p = ctx->use_edges + use_list->refs; n > 0; p++, n--) {
296 ir_ref use = *p;
297 ir_insn *use_insn = &ctx->ir_base[use];
298
299 if (use_insn->optx == optx) {
300 IR_ASSERT(use_insn->op1 == op1);
301 return use;
302 }
303 }
304 return IR_UNUSED;
305 }
306
ir_sccp_is_true(ir_ctx * ctx,ir_insn * _values,ir_ref a)307 static bool ir_sccp_is_true(ir_ctx *ctx, ir_insn *_values, ir_ref a)
308 {
309 ir_insn *v = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a];
310
311 return ir_const_is_true(v);
312 }
313
ir_sccp_is_equal(ir_ctx * ctx,ir_insn * _values,ir_ref a,ir_ref b)314 static bool ir_sccp_is_equal(ir_ctx *ctx, ir_insn *_values, ir_ref a, ir_ref b)
315 {
316 ir_insn *v1 = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a];
317 ir_insn *v2 = IR_IS_CONST_REF(b) ? &ctx->ir_base[b] : &_values[b];
318
319 IR_ASSERT(!IR_IS_SYM_CONST(v1->op));
320 IR_ASSERT(!IR_IS_SYM_CONST(v2->op));
321 return v1->val.u64 == v2->val.u64;
322 }
323
ir_sccp_make_nop(ir_ctx * ctx,ir_ref ref)324 static void ir_sccp_make_nop(ir_ctx *ctx, ir_ref ref)
325 {
326 ir_ref j, n, *p;
327 ir_insn *insn;
328
329 CLEAR_USES(ref);
330 insn = &ctx->ir_base[ref];
331 n = insn->inputs_count;
332 insn->opt = IR_NOP; /* keep "inputs_count" */
333 for (j = 1, p = insn->ops + j; j <= n; j++, p++) {
334 *p = IR_UNUSED;
335 }
336 }
337
ir_sccp_remove_insn(ir_ctx * ctx,ir_insn * _values,ir_ref ref,ir_bitqueue * worklist)338 static void ir_sccp_remove_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_bitqueue *worklist)
339 {
340 ir_ref j, n, *p;
341 ir_insn *insn;
342
343 CLEAR_USES(ref);
344 insn = &ctx->ir_base[ref];
345 n = insn->inputs_count;
346 insn->opt = IR_NOP; /* keep "inputs_count" */
347 for (j = 1, p = insn->ops + j; j <= n; j++, p++) {
348 ir_ref input = *p;
349 *p = IR_UNUSED;
350 /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */
351 if (input > 0 && _values[input].op > IR_COPY) {
352 ir_use_list_remove_all(ctx, input, ref);
353 if (ir_is_dead(ctx, input)) {
354 /* schedule DCE */
355 ir_bitqueue_add(worklist, input);
356 }
357 }
358 }
359 }
360
ir_sccp_remove_insn2(ir_ctx * ctx,ir_ref ref,ir_bitqueue * worklist)361 static void ir_sccp_remove_insn2(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist)
362 {
363 ir_ref j, n, *p;
364 ir_insn *insn;
365
366 CLEAR_USES(ref);
367 insn = &ctx->ir_base[ref];
368 n = insn->inputs_count;
369 insn->opt = IR_NOP; /* keep "inputs_count" */
370 for (j = 1, p = insn->ops + j; j <= n; j++, p++) {
371 ir_ref input = *p;
372 *p = IR_UNUSED;
373 if (input > 0) {
374 ir_use_list_remove_all(ctx, input, ref);
375 if (ir_is_dead(ctx, input)) {
376 /* schedule DCE */
377 ir_bitqueue_add(worklist, input);
378 } else if (ctx->ir_base[input].op == IR_PHI && ctx->use_lists[input].count == 1) {
379 /* try to optimize PHI into ABS/MIN/MAX/COND */
380 ir_bitqueue_add(worklist, ctx->ir_base[input].op1);
381 }
382 }
383 }
384 }
385
ir_sccp_replace_insn(ir_ctx * ctx,ir_insn * _values,ir_ref ref,ir_ref new_ref,ir_bitqueue * worklist)386 static void ir_sccp_replace_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist)
387 {
388 ir_ref j, n, *p, use, k, l;
389 ir_insn *insn;
390 ir_use_list *use_list;
391
392 IR_ASSERT(ref != new_ref);
393
394 insn = &ctx->ir_base[ref];
395 n = insn->inputs_count;
396 insn->opt = IR_NOP; /* keep "inputs_count" */
397 for (j = 1, p = insn->ops + 1; j <= n; j++, p++) {
398 ir_ref input = *p;
399 *p = IR_UNUSED;
400 /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */
401 if (input > 0 && _values[input].op > IR_COPY) {
402 ir_use_list_remove_all(ctx, input, ref);
403 if (ir_is_dead(ctx, input)) {
404 /* schedule DCE */
405 ir_bitqueue_add(worklist, input);
406 }
407 }
408 }
409
410 use_list = &ctx->use_lists[ref];
411 n = use_list->count;
412 for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) {
413 use = *p;
414 if (IR_IS_FEASIBLE(use)) {
415 insn = &ctx->ir_base[use];
416 l = insn->inputs_count;
417 for (k = 1; k <= l; k++) {
418 if (ir_insn_op(insn, k) == ref) {
419 ir_insn_set_op(insn, k, new_ref);
420 }
421 }
422 #if IR_COMBO_COPY_PROPAGATION
423 if (new_ref > 0 && IR_IS_BOTTOM(use)) {
424 if (ir_use_list_add(ctx, new_ref, use)) {
425 /* restore after reallocation */
426 use_list = &ctx->use_lists[ref];
427 n = use_list->count;
428 p = &ctx->use_edges[use_list->refs + j];
429 }
430 }
431 #endif
432 /* we may skip nodes that are going to be removed by SCCP (TOP, CONST and COPY) */
433 if (worklist && _values[use].op > IR_COPY) {
434 /* schedule folding */
435 ir_bitqueue_add(worklist, use);
436 }
437 }
438 }
439
440 CLEAR_USES(ref);
441 }
442
ir_sccp_replace_insn2(ir_ctx * ctx,ir_ref ref,ir_ref new_ref,ir_bitqueue * worklist)443 static void ir_sccp_replace_insn2(ir_ctx *ctx, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist)
444 {
445 ir_ref j, n, *p, use, k, l;
446 ir_insn *insn;
447 ir_use_list *use_list;
448
449 IR_ASSERT(ref != new_ref);
450
451 insn = &ctx->ir_base[ref];
452 n = insn->inputs_count;
453 insn->opt = IR_NOP; /* keep "inputs_count" */
454 for (j = 1, p = insn->ops + 1; j <= n; j++, p++) {
455 ir_ref input = *p;
456 *p = IR_UNUSED;
457 if (input > 0) {
458 ir_use_list_remove_all(ctx, input, ref);
459 if (ir_is_dead(ctx, input)) {
460 /* schedule DCE */
461 ir_bitqueue_add(worklist, input);
462 } else if (ctx->ir_base[input].op == IR_PHI && ctx->use_lists[input].count == 1) {
463 /* try to optimize PHI into ABS/MIN/MAX/COND */
464 ir_bitqueue_add(worklist, input);
465 }
466 }
467 }
468
469 use_list = &ctx->use_lists[ref];
470 n = use_list->count;
471 for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) {
472 use = *p;
473 insn = &ctx->ir_base[use];
474 l = insn->inputs_count;
475 for (k = 1; k <= l; k++) {
476 if (ir_insn_op(insn, k) == ref) {
477 ir_insn_set_op(insn, k, new_ref);
478 }
479 }
480 #if IR_COMBO_COPY_PROPAGATION
481 if (new_ref > 0) {
482 if (ir_use_list_add(ctx, new_ref, use)) {
483 /* restore after reallocation */
484 use_list = &ctx->use_lists[ref];
485 n = use_list->count;
486 p = &ctx->use_edges[use_list->refs + j];
487 }
488 }
489 #endif
490 /* schedule folding */
491 ir_bitqueue_add(worklist, use);
492 }
493
494 CLEAR_USES(ref);
495 }
496
ir_sccp_fold2(ir_ctx * ctx,ir_ref ref,ir_bitqueue * worklist)497 static void ir_sccp_fold2(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist)
498 {
499 uint32_t opt;
500 ir_ref op1, op2, op3;
501 ir_insn *op1_insn, *op2_insn, *op3_insn, *insn;
502
503 insn = &ctx->ir_base[ref];
504 opt = insn->opt;
505 op1 = insn->op1;
506 op2 = insn->op2;
507 op3 = insn->op3;
508
509 restart:
510 op1_insn = ctx->ir_base + op1;
511 op2_insn = ctx->ir_base + op2;
512 op3_insn = ctx->ir_base + op3;
513
514 switch (ir_folding(ctx, opt, op1, op2, op3, op1_insn, op2_insn, op3_insn)) {
515 case IR_FOLD_DO_RESTART:
516 opt = ctx->fold_insn.optx;
517 op1 = ctx->fold_insn.op1;
518 op2 = ctx->fold_insn.op2;
519 op3 = ctx->fold_insn.op3;
520 goto restart;
521 case IR_FOLD_DO_EMIT:
522 insn = &ctx->ir_base[ref];
523 if (insn->opt != ctx->fold_insn.opt
524 || insn->op1 != ctx->fold_insn.op1
525 || insn->op2 != ctx->fold_insn.op2
526 || insn->op3 != ctx->fold_insn.op3) {
527
528 ir_use_list *use_list;
529 ir_ref n, j, *p, use;
530
531 insn->optx = ctx->fold_insn.opt;
532 IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(ir_op_flags[opt & IR_OPT_OP_MASK]));
533 insn->inputs_count = IR_INPUT_EDGES_COUNT(ir_op_flags[opt & IR_OPT_OP_MASK]);
534 if (insn->op1 != ctx->fold_insn.op1) {
535 if (insn->op1 > 0) {
536 ir_use_list_remove_one(ctx, insn->op1, ref);
537 }
538 if (ctx->fold_insn.op1 > 0) {
539 ir_use_list_add(ctx, ctx->fold_insn.op1, ref);
540 }
541 }
542 if (insn->op2 != ctx->fold_insn.op2) {
543 if (insn->op2 > 0) {
544 ir_use_list_remove_one(ctx, insn->op2, ref);
545 }
546 if (ctx->fold_insn.op2 > 0) {
547 ir_use_list_add(ctx, ctx->fold_insn.op2, ref);
548 }
549 }
550 if (insn->op3 != ctx->fold_insn.op3) {
551 if (insn->op3 > 0) {
552 ir_use_list_remove_one(ctx, insn->op3, ref);
553 }
554 if (ctx->fold_insn.op3 > 0) {
555 ir_use_list_add(ctx, ctx->fold_insn.op3, ref);
556 }
557 }
558 insn->op1 = ctx->fold_insn.op1;
559 insn->op2 = ctx->fold_insn.op2;
560 insn->op3 = ctx->fold_insn.op3;
561
562 use_list = &ctx->use_lists[ref];
563 n = use_list->count;
564 for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) {
565 use = *p;
566 ir_bitqueue_add(worklist, use);
567 }
568 }
569 break;
570 case IR_FOLD_DO_COPY:
571 op1 = ctx->fold_insn.op1;
572 ir_sccp_replace_insn2(ctx, ref, op1, worklist);
573 break;
574 case IR_FOLD_DO_CONST:
575 op1 = ir_const(ctx, ctx->fold_insn.val, ctx->fold_insn.type);
576 ir_sccp_replace_insn2(ctx, ref, op1, worklist);
577 break;
578 default:
579 IR_ASSERT(0);
580 break;
581 }
582 }
583
ir_sccp_remove_if(ir_ctx * ctx,ir_insn * _values,ir_ref ref,ir_ref dst)584 static void ir_sccp_remove_if(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref dst)
585 {
586 ir_ref next;
587 ir_insn *insn, *next_insn;
588
589 insn = &ctx->ir_base[ref];
590 if (ctx->use_lists[dst].count == 1) {
591 next = ctx->use_edges[ctx->use_lists[dst].refs];
592 next_insn = &ctx->ir_base[next];
593 /* remove IF and IF_TRUE/FALSE from double linked control list */
594 next_insn->op1 = insn->op1;
595 ir_use_list_replace_one(ctx, insn->op1, ref, next);
596 /* remove IF and IF_TRUE/FALSE instructions */
597 ir_sccp_make_nop(ctx, ref);
598 ir_sccp_make_nop(ctx, dst);
599 } else {
600 insn->op2 = IR_UNUSED;
601 insn->optx = IR_OPTX(IR_END, IR_VOID, 1);
602 next_insn = &ctx->ir_base[dst];
603 next_insn->op = IR_BEGIN;
604 }
605 }
606
ir_sccp_remove_unfeasible_merge_inputs(ir_ctx * ctx,ir_insn * _values,ir_ref ref,ir_ref unfeasible_inputs)607 static void ir_sccp_remove_unfeasible_merge_inputs(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref unfeasible_inputs)
608 {
609 ir_ref i, j, n, k, *p, use;
610 ir_insn *insn, *use_insn;
611 ir_use_list *use_list;
612 ir_bitset life_inputs;
613
614 insn = &ctx->ir_base[ref];
615 IR_ASSERT(insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN);
616 n = insn->inputs_count;
617 if (n - unfeasible_inputs == 1) {
618 /* remove MERGE completely */
619 for (j = 1; j <= n; j++) {
620 ir_ref input = ir_insn_op(insn, j);
621 if (input && IR_IS_FEASIBLE(input)) {
622 ir_insn *input_insn = &ctx->ir_base[input];
623
624 IR_ASSERT(input_insn->op == IR_END || input_insn->op == IR_LOOP_END||
625 input_insn->op == IR_IJMP || input_insn->op == IR_UNREACHABLE);
626 if (input_insn->op == IR_END || input_insn->op == IR_LOOP_END) {
627 if (input < ref) {
628 ir_ref prev, next = IR_UNUSED;
629 ir_insn *next_insn = NULL;
630
631 prev = input_insn->op1;
632 use_list = &ctx->use_lists[ref];
633 if (use_list->count == 1) {
634 next = ctx->use_edges[use_list->refs];
635 next_insn = &ctx->ir_base[next];
636 } else {
637 for (k = 0, p = &ctx->use_edges[use_list->refs]; k < use_list->count; k++, p++) {
638 use = *p;
639 use_insn = &ctx->ir_base[use];
640 IR_ASSERT((use_insn->op != IR_PHI) && "PHI must be already removed");
641 if (ir_op_flags[use_insn->op] & IR_OP_FLAG_CONTROL) {
642 IR_ASSERT(!next);
643 next = use;
644 next_insn = use_insn;
645 } else if (use_insn->op != IR_NOP) {
646 IR_ASSERT(use_insn->op1 == ref);
647 use_insn->op1 = prev;
648 ir_use_list_add(ctx, prev, use);
649 p = &ctx->use_edges[use_list->refs + k];
650 }
651 }
652 }
653 IR_ASSERT(prev && next);
654 /* remove MERGE and input END from double linked control list */
655 next_insn->op1 = prev;
656 ir_use_list_replace_one(ctx, prev, input, next);
657 /* remove MERGE and input END instructions */
658 ir_sccp_make_nop(ctx, ref);
659 ir_sccp_make_nop(ctx, input);
660 } else {
661 for (i = 2; i <= n; i++) {
662 ir_insn_set_op(insn, i, IR_UNUSED);
663 }
664 insn->op = IR_BEGIN;
665 insn->op1 = input;
666 input_insn->op = IR_END;
667 }
668 break;
669 } else {
670 for (i = 2; i <= n; i++) {
671 ir_insn_set_op(insn, i, IR_UNUSED);
672 }
673 insn->op = IR_BEGIN;
674 insn->op1 = input;
675 }
676 }
677 }
678 } else {
679 n = insn->inputs_count;
680 i = 1;
681 life_inputs = ir_bitset_malloc(n + 1);
682 for (j = 1; j <= n; j++) {
683 ir_ref input = ir_insn_op(insn, j);
684
685 if (input) {
686 if (i != j) {
687 ir_insn_set_op(insn, i, input);
688 }
689 ir_bitset_incl(life_inputs, j);
690 i++;
691 }
692 }
693 j = i;
694 while (j <= n) {
695 ir_insn_set_op(insn, j, IR_UNUSED);
696 j++;
697 }
698 i--;
699 insn->inputs_count = i;
700
701 n++;
702 use_list = &ctx->use_lists[ref];
703 if (use_list->count > 1) {
704 for (k = 0, p = &ctx->use_edges[use_list->refs]; k < use_list->count; k++, p++) {
705 use = *p;
706 use_insn = &ctx->ir_base[use];
707 if (use_insn->op == IR_PHI) {
708 i = 2;
709 for (j = 2; j <= n; j++) {
710 ir_ref input = ir_insn_op(use_insn, j);
711
712 if (ir_bitset_in(life_inputs, j - 1)) {
713 IR_ASSERT(input);
714 if (i != j) {
715 ir_insn_set_op(use_insn, i, input);
716 }
717 i++;
718 } else if (!IR_IS_CONST_REF(input)) {
719 ir_use_list_remove_one(ctx, input, use);
720 }
721 }
722 while (i <= n) {
723 ir_insn_set_op(use_insn, i, IR_UNUSED);
724 i++;
725 }
726 use_insn->inputs_count = insn->inputs_count + 1;
727 }
728 }
729 }
730 ir_mem_free(life_inputs);
731 }
732 }
733
ir_may_promote_d2f(ir_ctx * ctx,ir_ref ref)734 static bool ir_may_promote_d2f(ir_ctx *ctx, ir_ref ref)
735 {
736 ir_insn *insn = &ctx->ir_base[ref];
737
738 IR_ASSERT(insn->type == IR_DOUBLE);
739 if (IR_IS_CONST_REF(ref)) {
740 return !IR_IS_SYM_CONST(insn->op) && insn->val.d == (double)(float)insn->val.d;
741 } else {
742 switch (insn->op) {
743 case IR_FP2FP:
744 return 1;
745 // case IR_INT2FP:
746 // return ctx->use_lists[ref].count == 1;
747 case IR_NEG:
748 case IR_ABS:
749 return ctx->use_lists[ref].count == 1 &&
750 ir_may_promote_d2f(ctx, insn->op1);
751 case IR_ADD:
752 case IR_SUB:
753 case IR_MUL:
754 case IR_DIV:
755 case IR_MIN:
756 case IR_MAX:
757 return ctx->use_lists[ref].count == 1 &&
758 ir_may_promote_d2f(ctx, insn->op1) &&
759 ir_may_promote_d2f(ctx, insn->op2);
760 default:
761 break;
762 }
763 }
764 return 0;
765 }
766
ir_may_promote_f2d(ir_ctx * ctx,ir_ref ref)767 static bool ir_may_promote_f2d(ir_ctx *ctx, ir_ref ref)
768 {
769 ir_insn *insn = &ctx->ir_base[ref];
770
771 IR_ASSERT(insn->type == IR_FLOAT);
772 if (IR_IS_CONST_REF(ref)) {
773 return !IR_IS_SYM_CONST(insn->op) && insn->val.f == (float)(double)insn->val.f;
774 } else {
775 switch (insn->op) {
776 case IR_FP2FP:
777 return 1;
778 case IR_INT2FP:
779 return ctx->use_lists[ref].count == 1;
780 case IR_NEG:
781 case IR_ABS:
782 return ctx->use_lists[ref].count == 1 &&
783 ir_may_promote_f2d(ctx, insn->op1);
784 case IR_ADD:
785 case IR_SUB:
786 case IR_MUL:
787 // case IR_DIV:
788 case IR_MIN:
789 case IR_MAX:
790 return ctx->use_lists[ref].count == 1 &&
791 ir_may_promote_f2d(ctx, insn->op1) &&
792 ir_may_promote_f2d(ctx, insn->op2);
793 default:
794 break;
795 }
796 }
797 return 0;
798 }
799
ir_promote_d2f(ir_ctx * ctx,ir_ref ref,ir_ref use)800 static ir_ref ir_promote_d2f(ir_ctx *ctx, ir_ref ref, ir_ref use)
801 {
802 ir_insn *insn = &ctx->ir_base[ref];
803 uint32_t count;
804
805 IR_ASSERT(insn->type == IR_DOUBLE);
806 if (IR_IS_CONST_REF(ref)) {
807 return ir_const_float(ctx, (float)insn->val.d);
808 } else {
809 switch (insn->op) {
810 case IR_FP2FP:
811 count = ctx->use_lists[ref].count;
812 ir_use_list_remove_all(ctx, ref, use);
813 if (ctx->use_lists[ref].count == 0) {
814 ir_use_list_replace_one(ctx, insn->op1, ref, use);
815 if (count > 1) {
816 do {
817 ir_use_list_add(ctx, insn->op1, use);
818 } while (--count > 1);
819 }
820 ref = insn->op1;
821 MAKE_NOP(insn);
822 return ref;
823 } else {
824 ir_use_list_add(ctx, insn->op1, use);
825 count -= ctx->use_lists[ref].count;
826 if (count > 1) {
827 do {
828 ir_use_list_add(ctx, insn->op1, use);
829 } while (--count > 1);
830 }
831 }
832 return insn->op1;
833 // case IR_INT2FP:
834 // insn->type = IR_FLOAT;
835 // return ref;
836 case IR_NEG:
837 case IR_ABS:
838 insn->op1 = ir_promote_d2f(ctx, insn->op1, ref);
839 insn->type = IR_FLOAT;
840 return ref;
841 case IR_ADD:
842 case IR_SUB:
843 case IR_MUL:
844 case IR_DIV:
845 case IR_MIN:
846 case IR_MAX:
847 if (insn->op1 == insn->op2) {
848 insn->op2 = insn->op1 = ir_promote_d2f(ctx, insn->op1, ref);
849 } else {
850 insn->op1 = ir_promote_d2f(ctx, insn->op1, ref);
851 insn->op2 = ir_promote_d2f(ctx, insn->op2, ref);
852 }
853 insn->type = IR_FLOAT;
854 return ref;
855 default:
856 break;
857 }
858 }
859 IR_ASSERT(0);
860 return ref;
861 }
862
ir_promote_f2d(ir_ctx * ctx,ir_ref ref,ir_ref use)863 static ir_ref ir_promote_f2d(ir_ctx *ctx, ir_ref ref, ir_ref use)
864 {
865 ir_insn *insn = &ctx->ir_base[ref];
866 uint32_t count;
867 ir_ref old_ref;
868
869 IR_ASSERT(insn->type == IR_FLOAT);
870 if (IR_IS_CONST_REF(ref)) {
871 return ir_const_double(ctx, (double)insn->val.f);
872 } else {
873 switch (insn->op) {
874 case IR_FP2FP:
875 count = ctx->use_lists[ref].count;
876 ir_use_list_remove_all(ctx, ref, use);
877 if (ctx->use_lists[ref].count == 0) {
878 ir_use_list_replace_one(ctx, insn->op1, ref, use);
879 if (count > 1) {
880 do {
881 ir_use_list_add(ctx, insn->op1, use);
882 } while (--count > 1);
883 }
884 ref = insn->op1;
885 MAKE_NOP(insn);
886 return ref;
887 } else {
888 ir_use_list_add(ctx, insn->op1, use);
889 count -= ctx->use_lists[ref].count;
890 if (count > 1) {
891 do {
892 ir_use_list_add(ctx, insn->op1, use);
893 } while (--count > 1);
894 }
895 }
896 return insn->op1;
897 case IR_INT2FP:
898 old_ref = ir_find1(ctx, IR_OPTX(IR_INT2FP, IR_DOUBLE, 1), insn->op1);
899 if (old_ref) {
900 IR_ASSERT(ctx->use_lists[ref].count == 1);
901 ir_use_list_remove_one(ctx, insn->op1, ref);
902 CLEAR_USES(ref);
903 MAKE_NOP(insn);
904 ir_use_list_add(ctx, old_ref, use);
905 return old_ref;
906 }
907 insn->type = IR_DOUBLE;
908 return ref;
909 case IR_NEG:
910 case IR_ABS:
911 insn->op1 = ir_promote_f2d(ctx, insn->op1, ref);
912 insn->type = IR_DOUBLE;
913 return ref;
914 case IR_ADD:
915 case IR_SUB:
916 case IR_MUL:
917 // case IR_DIV:
918 case IR_MIN:
919 case IR_MAX:
920 if (insn->op1 == insn->op2) {
921 insn->op2 = insn->op1 = ir_promote_f2d(ctx, insn->op1, ref);
922 } else {
923 insn->op1 = ir_promote_f2d(ctx, insn->op1, ref);
924 insn->op2 = ir_promote_f2d(ctx, insn->op2, ref);
925 }
926 insn->type = IR_DOUBLE;
927 return ref;
928 default:
929 break;
930 }
931 }
932 IR_ASSERT(0);
933 return ref;
934 }
935
ir_may_promote_i2i(ir_ctx * ctx,ir_type type,ir_ref ref)936 static bool ir_may_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref)
937 {
938 ir_insn *insn = &ctx->ir_base[ref];
939
940 if (IR_IS_CONST_REF(ref)) {
941 return !IR_IS_SYM_CONST(insn->op);
942 } else {
943 switch (insn->op) {
944 case IR_ZEXT:
945 case IR_SEXT:
946 return ctx->ir_base[insn->op1].type == type;
947 case IR_NEG:
948 case IR_ABS:
949 case IR_NOT:
950 return ctx->use_lists[ref].count == 1 &&
951 ir_may_promote_i2i(ctx, type, insn->op1);
952 case IR_ADD:
953 case IR_SUB:
954 case IR_MUL:
955 // case IR_DIV:
956 case IR_MIN:
957 case IR_MAX:
958 case IR_OR:
959 case IR_AND:
960 case IR_XOR:
961 return ctx->use_lists[ref].count == 1 &&
962 ir_may_promote_i2i(ctx, type, insn->op1) &&
963 ir_may_promote_i2i(ctx, type, insn->op2);
964 default:
965 break;
966 }
967 }
968 return 0;
969 }
970
ir_promote_i2i(ir_ctx * ctx,ir_type type,ir_ref ref,ir_ref use)971 static ir_ref ir_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref use)
972 {
973 ir_insn *insn = &ctx->ir_base[ref];
974 uint32_t count;
975
976 if (IR_IS_CONST_REF(ref)) {
977 return ir_const(ctx, insn->val, type);
978 } else {
979 switch (insn->op) {
980 case IR_ZEXT:
981 case IR_SEXT:
982 count = ctx->use_lists[ref].count;
983 ir_use_list_remove_all(ctx, ref, use);
984 if (ctx->use_lists[ref].count == 0) {
985 ir_use_list_replace_one(ctx, insn->op1, ref, use);
986 if (count > 1) {
987 do {
988 ir_use_list_add(ctx, insn->op1, use);
989 } while (--count > 1);
990 }
991 ref = insn->op1;
992 MAKE_NOP(insn);
993 return ref;
994 } else {
995 ir_use_list_add(ctx, insn->op1, use);
996 count -= ctx->use_lists[ref].count;
997 if (count > 1) {
998 do {
999 ir_use_list_add(ctx, insn->op1, use);
1000 } while (--count > 1);
1001 }
1002 }
1003 return insn->op1;
1004 case IR_NEG:
1005 case IR_ABS:
1006 case IR_NOT:
1007 insn->op1 = ir_promote_i2i(ctx, type, insn->op1, ref);
1008 insn->type = type;
1009 return ref;
1010 case IR_ADD:
1011 case IR_SUB:
1012 case IR_MUL:
1013 // case IR_DIV:
1014 case IR_MIN:
1015 case IR_MAX:
1016 case IR_OR:
1017 case IR_AND:
1018 case IR_XOR:
1019 if (insn->op1 == insn->op2) {
1020 insn->op2 = insn->op1 = ir_promote_i2i(ctx, type, insn->op1, ref);
1021 } else {
1022 insn->op1 = ir_promote_i2i(ctx, type, insn->op1, ref);
1023 insn->op2 = ir_promote_i2i(ctx, type, insn->op2, ref);
1024 }
1025 insn->type = type;
1026 return ref;
1027 default:
1028 break;
1029 }
1030 }
1031 IR_ASSERT(0);
1032 return ref;
1033 }
1034
ir_ext_const(ir_ctx * ctx,ir_insn * val_insn,ir_op op,ir_type type)1035 static ir_ref ir_ext_const(ir_ctx *ctx, ir_insn *val_insn, ir_op op, ir_type type)
1036 {
1037 ir_val new_val;
1038
1039 switch (val_insn->type) {
1040 default:
1041 IR_ASSERT(0);
1042 case IR_I8:
1043 case IR_U8:
1044 case IR_BOOL:
1045 if (op == IR_SEXT) {
1046 new_val.i64 = (int64_t)val_insn->val.i8;
1047 } else {
1048 new_val.u64 = (uint64_t)val_insn->val.u8;
1049 }
1050 break;
1051 case IR_I16:
1052 case IR_U16:
1053 if (op == IR_SEXT) {
1054 new_val.i64 = (int64_t)val_insn->val.i16;
1055 } else {
1056 new_val.u64 = (uint64_t)val_insn->val.u16;
1057 }
1058 break;
1059 case IR_I32:
1060 case IR_U32:
1061 if (op == IR_SEXT) {
1062 new_val.i64 = (int64_t)val_insn->val.i32;
1063 } else {
1064 new_val.u64 = (uint64_t)val_insn->val.u32;
1065 }
1066 break;
1067 }
1068 return ir_const(ctx, new_val, type);
1069 }
1070
ir_ext_ref(ir_ctx * ctx,ir_ref var_ref,ir_ref src_ref,ir_op op,ir_type type,ir_bitqueue * worklist)1071 static ir_ref ir_ext_ref(ir_ctx *ctx, ir_ref var_ref, ir_ref src_ref, ir_op op, ir_type type, ir_bitqueue *worklist)
1072 {
1073 uint32_t optx = IR_OPTX(op, type, 1);
1074 ir_ref ref;
1075
1076 if (!IR_IS_CONST_REF(src_ref)) {
1077 ref = ir_find1(ctx, optx, src_ref);
1078 if (ref) {
1079 ir_use_list_add(ctx, ref, var_ref);
1080 if (!IR_IS_CONST_REF(src_ref)) {
1081 ir_use_list_remove_one(ctx, src_ref, var_ref);
1082 }
1083 ir_bitqueue_add(worklist, ref);
1084 return ref;
1085 }
1086 }
1087
1088 ref = ir_emit1(ctx, optx, src_ref);
1089 ctx->use_lists = ir_mem_realloc(ctx->use_lists, ctx->insns_count * sizeof(ir_use_list));
1090 ctx->use_lists[ref].count = 0;
1091 ctx->use_lists[ref].refs = IR_UNUSED;
1092 ir_use_list_add(ctx, ref, var_ref);
1093 if (!IR_IS_CONST_REF(src_ref)) {
1094 ir_use_list_replace_one(ctx, src_ref, var_ref, ref);
1095 }
1096 ir_bitqueue_grow(worklist, ref + 1);
1097 ir_bitqueue_add(worklist, ref);
1098 return ref;
1099 }
1100
ir_try_promote_ext(ir_ctx * ctx,ir_ref ext_ref,ir_insn * insn,ir_bitqueue * worklist)1101 static bool ir_try_promote_ext(ir_ctx *ctx, ir_ref ext_ref, ir_insn *insn, ir_bitqueue *worklist)
1102 {
1103 ir_type type = insn->type;
1104 ir_op op = insn->op;
1105 ir_ref ref = insn->op1;
1106 ir_insn *phi_insn = &ctx->ir_base[ref];
1107 ir_insn *op_insn;
1108 ir_use_list *use_list;
1109 ir_ref n, *p, use, op_ref;
1110
1111 /* Check for simple induction variable in the form: x2 = PHI(loop, x1, x3); x3 = ADD(x2, _); */
1112 if (phi_insn->op != IR_PHI
1113 || phi_insn->inputs_count != 3 /* (2 values) */
1114 || ctx->ir_base[phi_insn->op1].op != IR_LOOP_BEGIN) {
1115 return 0;
1116 }
1117
1118 op_ref = phi_insn->op3;
1119 op_insn = &ctx->ir_base[op_ref];
1120 if ((op_insn->op != IR_ADD && op_insn->op != IR_SUB && op_insn->op != IR_MUL)
1121 || (op_insn->op1 != ref && op_insn->op2 != ref)
1122 || ctx->use_lists[op_ref].count != 1) {
1123 return 0;
1124 }
1125
1126 /* Check if we may change the type of the induction variable */
1127 use_list = &ctx->use_lists[ref];
1128 n = use_list->count;
1129 for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
1130 use = *p;
1131 if (use == op_ref || use == ext_ref) {
1132 continue;
1133 } else {
1134 ir_insn *use_insn = &ctx->ir_base[use];
1135
1136 if ((use_insn->op >= IR_EQ && use_insn->op <= IR_UGT)
1137 && (use_insn->op1 == ref || use_insn->op2 == ref)) {
1138 continue;
1139 } else if (use_insn->op == IR_IF) {
1140 continue;
1141 } else {
1142 return 0;
1143 }
1144 }
1145 }
1146
1147 phi_insn->type = insn->type;
1148 op_insn->type = insn->type;
1149
1150 for (n = 0; n < ctx->use_lists[ref].count; n++) {
1151 /* "use_lists" may be reallocated by ir_ext_ref() */
1152 use = ctx->use_edges[ctx->use_lists[ref].refs + n];
1153 if (use == ext_ref) {
1154 continue;
1155 } else {
1156 ir_insn *use_insn = &ctx->ir_base[use];
1157
1158 if (use_insn->op == IR_IF) {
1159 continue;
1160 }
1161 IR_ASSERT(((use_insn->op >= IR_EQ && use_insn->op <= IR_UGT)
1162 || use_insn->op == IR_ADD || use_insn->op == IR_SUB || use_insn->op == IR_MUL)
1163 && (use_insn->op1 == ref || use_insn->op2 == ref));
1164 if (use_insn->op1 != ref) {
1165 if (IR_IS_CONST_REF(use_insn->op1)
1166 && !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op1].op)) {
1167 ctx->ir_base[use].op1 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op1], op, type);
1168 } else {
1169 ctx->ir_base[use].op1 = ir_ext_ref(ctx, use, use_insn->op1, op, type, worklist);
1170 }
1171 }
1172 if (use_insn->op2 != ref) {
1173 if (IR_IS_CONST_REF(use_insn->op2)
1174 && !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op2].op)) {
1175 ctx->ir_base[use].op2 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op2], op, type);
1176 } else {
1177 ctx->ir_base[use].op2 = ir_ext_ref(ctx, use, use_insn->op2, op, type, worklist);
1178 }
1179 }
1180 }
1181 }
1182
1183 ir_sccp_replace_insn2(ctx, ext_ref, ref, worklist);
1184
1185 phi_insn = &ctx->ir_base[ref];
1186 if (IR_IS_CONST_REF(phi_insn->op2)
1187 && !IR_IS_SYM_CONST(ctx->ir_base[phi_insn->op2].op)) {
1188 ctx->ir_base[ref].op2 = ir_ext_const(ctx, &ctx->ir_base[phi_insn->op2], op, type);
1189 } else {
1190 ctx->ir_base[ref].op2 = ir_ext_ref(ctx, ref, phi_insn->op2, op, type, worklist);
1191 }
1192
1193 return 1;
1194 }
1195
ir_get_true_false_refs(const ir_ctx * ctx,ir_ref if_ref,ir_ref * if_true_ref,ir_ref * if_false_ref)1196 static void ir_get_true_false_refs(const ir_ctx *ctx, ir_ref if_ref, ir_ref *if_true_ref, ir_ref *if_false_ref)
1197 {
1198 ir_use_list *use_list = &ctx->use_lists[if_ref];
1199 ir_ref *p = &ctx->use_edges[use_list->refs];
1200
1201 IR_ASSERT(use_list->count == 2);
1202 if (ctx->ir_base[*p].op == IR_IF_TRUE) {
1203 IR_ASSERT(ctx->ir_base[*(p + 1)].op == IR_IF_FALSE);
1204 *if_true_ref = *p;
1205 *if_false_ref = *(p + 1);
1206 } else {
1207 IR_ASSERT(ctx->ir_base[*p].op == IR_IF_FALSE);
1208 IR_ASSERT(ctx->ir_base[*(p + 1)].op == IR_IF_TRUE);
1209 *if_false_ref = *p;
1210 *if_true_ref = *(p + 1);
1211 }
1212 }
1213
ir_merge_blocks(ir_ctx * ctx,ir_ref end,ir_ref begin,ir_bitqueue * worklist2)1214 static void ir_merge_blocks(ir_ctx *ctx, ir_ref end, ir_ref begin, ir_bitqueue *worklist2)
1215 {
1216 ir_ref prev, next;
1217 ir_use_list *use_list;
1218
1219 IR_ASSERT(ctx->ir_base[begin].op == IR_BEGIN);
1220 IR_ASSERT(ctx->ir_base[end].op == IR_END);
1221 IR_ASSERT(ctx->ir_base[begin].op1 == end);
1222 IR_ASSERT(ctx->use_lists[end].count == 1);
1223
1224 prev = ctx->ir_base[end].op1;
1225
1226 use_list = &ctx->use_lists[begin];
1227 IR_ASSERT(use_list->count == 1);
1228 next = ctx->use_edges[use_list->refs];
1229
1230 /* remove BEGIN and END */
1231 MAKE_NOP(&ctx->ir_base[begin]); CLEAR_USES(begin);
1232 MAKE_NOP(&ctx->ir_base[end]); CLEAR_USES(end);
1233
1234 /* connect their predecessor and successor */
1235 ctx->ir_base[next].op1 = prev;
1236 ir_use_list_replace_all(ctx, prev, end, next);
1237
1238 if (ctx->ir_base[prev].op == IR_BEGIN || ctx->ir_base[prev].op == IR_MERGE) {
1239 ir_bitqueue_add(worklist2, prev);
1240 }
1241 }
1242
ir_try_remove_empty_diamond(ir_ctx * ctx,ir_ref ref,ir_insn * insn,ir_bitqueue * worklist)1243 static bool ir_try_remove_empty_diamond(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
1244 {
1245 if (insn->inputs_count == 2) {
1246 ir_ref end1_ref = insn->op1, end2_ref = insn->op2;
1247 ir_insn *end1 = &ctx->ir_base[end1_ref];
1248 ir_insn *end2 = &ctx->ir_base[end2_ref];
1249
1250 if (end1->op != IR_END || end2->op != IR_END) {
1251 return 0;
1252 }
1253
1254 ir_ref start1_ref = end1->op1, start2_ref = end2->op1;
1255 ir_insn *start1 = &ctx->ir_base[start1_ref];
1256 ir_insn *start2 = &ctx->ir_base[start2_ref];
1257
1258 if (start1->op1 != start2->op1) {
1259 return 0;
1260 }
1261
1262 ir_ref root_ref = start1->op1;
1263 ir_insn *root = &ctx->ir_base[root_ref];
1264
1265 if (root->op != IR_IF
1266 && !(root->op == IR_SWITCH && ctx->use_lists[root_ref].count == 2)) {
1267 return 0;
1268 }
1269
1270 /* Empty Diamond
1271 *
1272 * prev prev
1273 * | condition | condition
1274 * | / |
1275 * IF |
1276 * | \ |
1277 * | +-----+ |
1278 * | IF_FALSE |
1279 * IF_TRUE | => |
1280 * | END |
1281 * END / |
1282 * | +---+ |
1283 * | / |
1284 * MERGE |
1285 * | |
1286 * next next
1287 */
1288
1289 ir_ref next_ref = ctx->use_edges[ctx->use_lists[ref].refs];
1290 ir_insn *next = &ctx->ir_base[next_ref];
1291
1292 IR_ASSERT(ctx->use_lists[start1_ref].count == 1);
1293 IR_ASSERT(ctx->use_lists[start2_ref].count == 1);
1294
1295 next->op1 = root->op1;
1296 ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
1297 if (!IR_IS_CONST_REF(root->op2)) {
1298 ir_use_list_remove_all(ctx, root->op2, root_ref);
1299 if (ir_is_dead(ctx, root->op2)) {
1300 ir_bitqueue_add(worklist, root->op2);
1301 }
1302 }
1303
1304 MAKE_NOP(root); CLEAR_USES(root_ref);
1305 MAKE_NOP(start1); CLEAR_USES(start1_ref);
1306 MAKE_NOP(start2); CLEAR_USES(start2_ref);
1307 MAKE_NOP(end1); CLEAR_USES(end1_ref);
1308 MAKE_NOP(end2); CLEAR_USES(end2_ref);
1309 MAKE_NOP(insn); CLEAR_USES(ref);
1310
1311 if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) {
1312 ir_bitqueue_add(worklist, next->op1);
1313 }
1314
1315 return 1;
1316 } else {
1317 ir_ref i, count = insn->inputs_count, *ops = insn->ops + 1;
1318 ir_ref root_ref = IR_UNUSED;
1319
1320 for (i = 0; i < count; i++) {
1321 ir_ref end_ref, start_ref;
1322 ir_insn *end, *start;
1323
1324 end_ref = ops[i];
1325 end = &ctx->ir_base[end_ref];
1326 if (end->op != IR_END) {
1327 return 0;
1328 }
1329 start_ref = end->op1;
1330 start = &ctx->ir_base[start_ref];
1331 if (start->op != IR_CASE_VAL && start->op != IR_CASE_DEFAULT) {
1332 return 0;
1333 }
1334 IR_ASSERT(ctx->use_lists[start_ref].count == 1);
1335 if (!root_ref) {
1336 root_ref = start->op1;
1337 if (ctx->use_lists[root_ref].count != count) {
1338 return 0;
1339 }
1340 } else if (start->op1 != root_ref) {
1341 return 0;
1342 }
1343 }
1344
1345 /* Empty N-Diamond */
1346 ir_ref next_ref = ctx->use_edges[ctx->use_lists[ref].refs];
1347 ir_insn *next = &ctx->ir_base[next_ref];
1348 ir_insn *root = &ctx->ir_base[root_ref];
1349
1350 next->op1 = root->op1;
1351 ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
1352
1353 if (!IR_IS_CONST_REF(root->op2)) {
1354 ir_use_list_remove_all(ctx, root->op2, root_ref);
1355 if (ir_is_dead(ctx, root->op2)) {
1356 ir_bitqueue_add(worklist, root->op2);
1357 }
1358 }
1359
1360 MAKE_NOP(root); CLEAR_USES(root_ref);
1361
1362 for (i = 0; i < count; i++) {
1363 ir_ref end_ref = ops[i];
1364 ir_insn *end = &ctx->ir_base[end_ref];
1365 ir_ref start_ref = end->op1;
1366 ir_insn *start = &ctx->ir_base[start_ref];
1367
1368 MAKE_NOP(start); CLEAR_USES(start_ref);
1369 MAKE_NOP(end); CLEAR_USES(end_ref);
1370 }
1371
1372 MAKE_NOP(insn); CLEAR_USES(ref);
1373
1374 if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) {
1375 ir_bitqueue_add(worklist, next->op1);
1376 }
1377
1378 return 1;
1379 }
1380 }
1381
ir_is_zero(ir_ctx * ctx,ir_ref ref)1382 static bool ir_is_zero(ir_ctx *ctx, ir_ref ref)
1383 {
1384 return IR_IS_CONST_REF(ref)
1385 && !IR_IS_SYM_CONST(ctx->ir_base[ref].op)
1386 && ctx->ir_base[ref].val.u32 == 0;
1387 }
1388
ir_optimize_phi(ir_ctx * ctx,ir_ref merge_ref,ir_insn * merge,ir_ref ref,ir_insn * insn,ir_bitqueue * worklist)1389 static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
1390 {
1391 IR_ASSERT(insn->inputs_count == 3);
1392 IR_ASSERT(ctx->use_lists[merge_ref].count == 2);
1393
1394 ir_ref end1_ref = merge->op1, end2_ref = merge->op2;
1395 ir_insn *end1 = &ctx->ir_base[end1_ref];
1396 ir_insn *end2 = &ctx->ir_base[end2_ref];
1397
1398 if (end1->op == IR_END && end2->op == IR_END) {
1399 ir_ref start1_ref = end1->op1, start2_ref = end2->op1;
1400 ir_insn *start1 = &ctx->ir_base[start1_ref];
1401 ir_insn *start2 = &ctx->ir_base[start2_ref];
1402
1403 if (start1->op1 == start2->op1) {
1404 ir_ref root_ref = start1->op1;
1405 ir_insn *root = &ctx->ir_base[root_ref];
1406
1407 if (root->op == IR_IF && ctx->use_lists[root->op2].count == 1) {
1408 ir_ref cond_ref = root->op2;
1409 ir_insn *cond = &ctx->ir_base[cond_ref];
1410 ir_type type = insn->type;
1411 bool is_cmp, is_less;
1412
1413 if (IR_IS_TYPE_FP(type)) {
1414 is_cmp = (cond->op == IR_LT || cond->op == IR_LE || cond->op == IR_GT || cond->op == IR_GE ||
1415 cond->op == IR_ULT || cond->op == IR_ULE || cond->op == IR_UGT || cond->op == IR_UGE);
1416 is_less = (cond->op == IR_LT || cond->op == IR_LE ||
1417 cond->op == IR_ULT || cond->op == IR_ULE);
1418 } else if (IR_IS_TYPE_SIGNED(type)) {
1419 is_cmp = (cond->op == IR_LT || cond->op == IR_LE || cond->op == IR_GT || cond->op == IR_GE);
1420 is_less = (cond->op == IR_LT || cond->op == IR_LE);
1421 } else {
1422 IR_ASSERT(IR_IS_TYPE_UNSIGNED(type));
1423 is_cmp = (cond->op == IR_ULT || cond->op == IR_ULE || cond->op == IR_UGT || cond->op == IR_UGE);
1424 is_less = (cond->op == IR_ULT || cond->op == IR_ULE);
1425 }
1426
1427 if (is_cmp
1428 && ((insn->op2 == cond->op1 && insn->op3 == cond->op2)
1429 || (insn->op2 == cond->op2 && insn->op3 == cond->op1))) {
1430 /* MAX/MIN
1431 *
1432 * prev prev
1433 * | LT(A, B) |
1434 * | / |
1435 * IF |
1436 * | \ |
1437 * | +-----+ |
1438 * | IF_FALSE |
1439 * IF_TRUE | => |
1440 * | END |
1441 * END / |
1442 * | +---+ |
1443 * | / |
1444 * MERGE |
1445 * | \ |
1446 * | PHI(A, B) | MIN(A, B)
1447 * next next
1448 */
1449 ir_ref next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs];
1450 ir_insn *next;
1451
1452 if (next_ref == ref) {
1453 next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs + 1];
1454 }
1455 next = &ctx->ir_base[next_ref];
1456
1457 IR_ASSERT(ctx->use_lists[start1_ref].count == 1);
1458 IR_ASSERT(ctx->use_lists[start2_ref].count == 1);
1459
1460 insn->op = (
1461 (is_less ? cond->op1 : cond->op2)
1462 ==
1463 ((start1->op == IR_IF_TRUE) ? insn->op2 : insn->op3)
1464 ) ? IR_MIN : IR_MAX;
1465 insn->inputs_count = 2;
1466 if (insn->op2 > insn->op3) {
1467 insn->op1 = insn->op2;
1468 insn->op2 = insn->op3;
1469 } else {
1470 insn->op1 = insn->op3;
1471 }
1472 insn->op3 = IR_UNUSED;
1473
1474 next->op1 = root->op1;
1475 ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
1476 if (!IR_IS_CONST_REF(insn->op1)) {
1477 ir_use_list_remove_all(ctx, insn->op1, cond_ref);
1478 }
1479 if (!IR_IS_CONST_REF(insn->op2)) {
1480 ir_use_list_remove_all(ctx, insn->op2, cond_ref);
1481 }
1482
1483 MAKE_NOP(cond); CLEAR_USES(cond_ref);
1484 MAKE_NOP(root); CLEAR_USES(root_ref);
1485 MAKE_NOP(start1); CLEAR_USES(start1_ref);
1486 MAKE_NOP(start2); CLEAR_USES(start2_ref);
1487 MAKE_NOP(end1); CLEAR_USES(end1_ref);
1488 MAKE_NOP(end2); CLEAR_USES(end2_ref);
1489 MAKE_NOP(merge); CLEAR_USES(merge_ref);
1490
1491 if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) {
1492 ir_bitqueue_add(worklist, next->op1);
1493 }
1494
1495 return 1;
1496 } else if (is_cmp
1497 && ((ctx->ir_base[insn->op2].op == IR_NEG
1498 && ctx->use_lists[insn->op2].count == 1
1499 && ctx->ir_base[insn->op2].op1 == insn->op3
1500 && ((cond->op1 == insn->op3
1501 && ir_is_zero(ctx, cond->op2)
1502 && is_less == (start1->op == IR_IF_TRUE))
1503 || (cond->op2 == insn->op3
1504 && ir_is_zero(ctx, cond->op1)
1505 && is_less != (start1->op == IR_IF_TRUE))))
1506 || (ctx->ir_base[insn->op3].op == IR_NEG
1507 && ctx->use_lists[insn->op3].count == 1
1508 && ctx->ir_base[insn->op3].op1 == insn->op2
1509 && ((cond->op1 == insn->op2
1510 && ir_is_zero(ctx, cond->op2)
1511 && is_less != (start1->op == IR_IF_TRUE))
1512 || (cond->op2 == insn->op2
1513 && ir_is_zero(ctx, cond->op1)
1514 && is_less == (start1->op == IR_IF_TRUE)))))) {
1515 /* ABS
1516 *
1517 * prev prev
1518 * | LT(A, 0) |
1519 * | / |
1520 * IF |
1521 * | \ |
1522 * | +-----+ |
1523 * | IF_FALSE |
1524 * IF_TRUE | => |
1525 * | END |
1526 * END / |
1527 * | +---+ |
1528 * | / |
1529 * MERGE |
1530 * | \ |
1531 * | PHI(A, NEG(A)) | ABS(A)
1532 * next next
1533 */
1534 ir_ref neg_ref;
1535 ir_ref next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs];
1536 ir_insn *next;
1537
1538 if (next_ref == ref) {
1539 next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs + 1];
1540 }
1541 next = &ctx->ir_base[next_ref];
1542
1543 IR_ASSERT(ctx->use_lists[start1_ref].count == 1);
1544 IR_ASSERT(ctx->use_lists[start2_ref].count == 1);
1545
1546 insn->op = IR_ABS;
1547 insn->inputs_count = 1;
1548 if (ctx->ir_base[insn->op2].op == IR_NEG) {
1549 neg_ref = insn->op2;
1550 insn->op1 = insn->op3;
1551 } else {
1552 neg_ref = insn->op3;
1553 insn->op1 = insn->op2;
1554 }
1555 insn->op2 = IR_UNUSED;
1556 insn->op3 = IR_UNUSED;
1557
1558 next->op1 = root->op1;
1559 ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
1560 ir_use_list_remove_one(ctx, insn->op1, neg_ref);
1561 if (!IR_IS_CONST_REF(insn->op1)) {
1562 ir_use_list_remove_all(ctx, insn->op1, cond_ref);
1563 }
1564
1565 MAKE_NOP(cond); CLEAR_USES(cond_ref);
1566 MAKE_NOP(root); CLEAR_USES(root_ref);
1567 MAKE_NOP(start1); CLEAR_USES(start1_ref);
1568 MAKE_NOP(start2); CLEAR_USES(start2_ref);
1569 MAKE_NOP(end1); CLEAR_USES(end1_ref);
1570 MAKE_NOP(end2); CLEAR_USES(end2_ref);
1571 MAKE_NOP(merge); CLEAR_USES(merge_ref);
1572 MAKE_NOP(&ctx->ir_base[neg_ref]); CLEAR_USES(neg_ref);
1573
1574 if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) {
1575 ir_bitqueue_add(worklist, next->op1);
1576 }
1577
1578 return 1;
1579 #if 0
1580 } else {
1581 /* COND
1582 *
1583 * prev prev
1584 * | cond |
1585 * | / |
1586 * IF |
1587 * | \ |
1588 * | +-----+ |
1589 * | IF_FALSE |
1590 * IF_TRUE | => |
1591 * | END |
1592 * END / |
1593 * | +---+ |
1594 * | / |
1595 * MERGE |
1596 * | \ |
1597 * | PHI(A, B) | COND(cond, A, B)
1598 * next next
1599 */
1600 ir_ref next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs];
1601 ir_insn *next;
1602
1603 if (next_ref == ref) {
1604 next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs + 1];
1605 }
1606 next = &ctx->ir_base[next_ref];
1607
1608 IR_ASSERT(ctx->use_lists[start1_ref].count == 1);
1609 IR_ASSERT(ctx->use_lists[start2_ref].count == 1);
1610
1611 insn->op = IR_COND;
1612 insn->inputs_count = 3;
1613 insn->op1 = cond_ref;
1614 if (start1->op == IR_IF_FALSE) {
1615 SWAP_REFS(insn->op2, insn->op3);
1616 }
1617
1618 next->op1 = root->op1;
1619 ir_use_list_replace_one(ctx, cond_ref, root_ref, ref);
1620 ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
1621 ir_use_list_remove_all(ctx, root->op2, root_ref);
1622
1623 MAKE_NOP(root); CLEAR_USES(root_ref);
1624 MAKE_NOP(start1); CLEAR_USES(start1_ref);
1625 MAKE_NOP(start2); CLEAR_USES(start2_ref);
1626 MAKE_NOP(end1); CLEAR_USES(end1_ref);
1627 MAKE_NOP(end2); CLEAR_USES(end2_ref);
1628 MAKE_NOP(merge); CLEAR_USES(merge_ref);
1629
1630 if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) {
1631 ir_bitqueue_add(worklist, next->op1);
1632 }
1633
1634 return 1;
1635 #endif
1636 }
1637 }
1638 }
1639 }
1640
1641 return 0;
1642 }
1643
ir_cmp_is_true(ir_op op,ir_insn * op1,ir_insn * op2)1644 static bool ir_cmp_is_true(ir_op op, ir_insn *op1, ir_insn *op2)
1645 {
1646 IR_ASSERT(op1->type == op2->type);
1647 if (IR_IS_TYPE_INT(op1->type)) {
1648 if (op == IR_EQ) {
1649 return op1->val.u64 == op2->val.u64;
1650 } else if (op == IR_NE) {
1651 return op1->val.u64 != op2->val.u64;
1652 } else if (op == IR_LT) {
1653 if (IR_IS_TYPE_SIGNED(op1->type)) {
1654 return op1->val.i64 < op2->val.i64;
1655 } else {
1656 return op1->val.u64 < op2->val.u64;
1657 }
1658 } else if (op == IR_GE) {
1659 if (IR_IS_TYPE_SIGNED(op1->type)) {
1660 return op1->val.i64 >= op2->val.i64;
1661 } else {
1662 return op1->val.u64 >= op2->val.u64;
1663 }
1664 } else if (op == IR_LE) {
1665 if (IR_IS_TYPE_SIGNED(op1->type)) {
1666 return op1->val.i64 <= op2->val.i64;
1667 } else {
1668 return op1->val.u64 <= op2->val.u64;
1669 }
1670 } else if (op == IR_GT) {
1671 if (IR_IS_TYPE_SIGNED(op1->type)) {
1672 return op1->val.i64 > op2->val.i64;
1673 } else {
1674 return op1->val.u64 > op2->val.u64;
1675 }
1676 } else if (op == IR_ULT) {
1677 return op1->val.u64 < op2->val.u64;
1678 } else if (op == IR_UGE) {
1679 return op1->val.u64 >= op2->val.u64;
1680 } else if (op == IR_ULE) {
1681 return op1->val.u64 <= op2->val.u64;
1682 } else if (op == IR_UGT) {
1683 return op1->val.u64 > op2->val.u64;
1684 } else {
1685 IR_ASSERT(0);
1686 return 0;
1687 }
1688 } else if (op1->type == IR_DOUBLE) {
1689 if (op == IR_EQ) {
1690 return op1->val.d == op2->val.d;
1691 } else if (op == IR_NE) {
1692 return op1->val.d != op2->val.d;
1693 } else if (op == IR_LT) {
1694 return op1->val.d < op2->val.d;
1695 } else if (op == IR_GE) {
1696 return op1->val.d >= op2->val.d;
1697 } else if (op == IR_LE) {
1698 return op1->val.d <= op2->val.d;
1699 } else if (op == IR_GT) {
1700 return op1->val.d > op2->val.d;
1701 } else if (op == IR_ULT) {
1702 return !(op1->val.d >= op2->val.d);
1703 } else if (op == IR_UGE) {
1704 return !(op1->val.d < op2->val.d);
1705 } else if (op == IR_ULE) {
1706 return !(op1->val.d > op2->val.d);
1707 } else if (op == IR_UGT) {
1708 return !(op1->val.d <= op2->val.d);
1709 } else {
1710 IR_ASSERT(0);
1711 return 0;
1712 }
1713 } else {
1714 IR_ASSERT(op1->type == IR_FLOAT);
1715 if (op == IR_EQ) {
1716 return op1->val.f == op2->val.f;
1717 } else if (op == IR_NE) {
1718 return op1->val.f != op2->val.f;
1719 } else if (op == IR_LT) {
1720 return op1->val.f < op2->val.f;
1721 } else if (op == IR_GE) {
1722 return op1->val.f >= op2->val.f;
1723 } else if (op == IR_LE) {
1724 return op1->val.f <= op2->val.f;
1725 } else if (op == IR_GT) {
1726 return op1->val.f > op2->val.f;
1727 } else if (op == IR_ULT) {
1728 return !(op1->val.f >= op2->val.f);
1729 } else if (op == IR_UGE) {
1730 return !(op1->val.f < op2->val.f);
1731 } else if (op == IR_ULE) {
1732 return !(op1->val.f > op2->val.f);
1733 } else if (op == IR_UGT) {
1734 return !(op1->val.f <= op2->val.f);
1735 } else {
1736 IR_ASSERT(0);
1737 return 0;
1738 }
1739 }
1740 }
1741
ir_try_split_if(ir_ctx * ctx,ir_ref ref,ir_insn * insn,ir_bitqueue * worklist)1742 static bool ir_try_split_if(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
1743 {
1744 ir_ref cond_ref = insn->op2;
1745 ir_insn *cond = &ctx->ir_base[cond_ref];
1746
1747 if (cond->op == IR_PHI
1748 && cond->inputs_count == 3
1749 && cond->op1 == insn->op1
1750 && ((IR_IS_CONST_REF(cond->op2) && !IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op))
1751 || (IR_IS_CONST_REF(cond->op3) && !IR_IS_SYM_CONST(ctx->ir_base[cond->op3].op)))) {
1752 ir_ref merge_ref = insn->op1;
1753 ir_insn *merge = &ctx->ir_base[merge_ref];
1754
1755 if (ctx->use_lists[merge_ref].count == 2) {
1756 ir_ref end1_ref = merge->op1, end2_ref = merge->op2;
1757 ir_insn *end1 = &ctx->ir_base[end1_ref];
1758 ir_insn *end2 = &ctx->ir_base[end2_ref];
1759
1760 if (end1->op == IR_END && end2->op == IR_END) {
1761 ir_ref if_true_ref, if_false_ref;
1762 ir_insn *if_true, *if_false;
1763 ir_op op = IR_IF_FALSE;
1764
1765 ir_get_true_false_refs(ctx, ref, &if_true_ref, &if_false_ref);
1766
1767 if (!IR_IS_CONST_REF(cond->op2) || IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op)) {
1768 IR_ASSERT(IR_IS_CONST_REF(cond->op3));
1769 SWAP_REFS(cond->op2, cond->op3);
1770 SWAP_REFS(merge->op1, merge->op2);
1771 SWAP_REFS(end1_ref, end2_ref);
1772 SWAP_INSNS(end1, end2);
1773 }
1774 if (ir_const_is_true(&ctx->ir_base[cond->op2])) {
1775 SWAP_REFS(if_true_ref, if_false_ref);
1776 op = IR_IF_TRUE;
1777 }
1778 if_true = &ctx->ir_base[if_true_ref];
1779 if_false = &ctx->ir_base[if_false_ref];
1780
1781 if (IR_IS_CONST_REF(cond->op3) && !IR_IS_SYM_CONST(ctx->ir_base[cond->op3].op)) {
1782 if (ir_const_is_true(&ctx->ir_base[cond->op3]) ^ (op == IR_IF_TRUE)) {
1783 /* Simple IF Split
1784 *
1785 * | | | |
1786 * | END | END
1787 * END / END \
1788 * | +---+ | +
1789 * | / | |
1790 * MERGE | |
1791 * | \ | |
1792 * | PHI(false, true) | |
1793 * | / | |
1794 * IF => | |
1795 * | \ | |
1796 * | +------+ | |
1797 * | IF_TRUE | BEGIN
1798 * IF_FALSE | BEGIN
1799 * | |
1800 */
1801 ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref);
1802 ir_use_list_replace_one(ctx, end2_ref, merge_ref, if_true_ref);
1803
1804 MAKE_NOP(merge); CLEAR_USES(merge_ref);
1805 MAKE_NOP(cond); CLEAR_USES(cond_ref);
1806 MAKE_NOP(insn); CLEAR_USES(ref);
1807
1808 if_false->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1);
1809 if_false->op1 = end1_ref;
1810
1811 if_true->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1);
1812 if_true->op1 = end2_ref;
1813
1814 ir_bitqueue_add(worklist, if_false_ref);
1815 ir_bitqueue_add(worklist, if_true_ref);
1816
1817 return 1;
1818 } else {
1819 /* Simple IF Split
1820 *
1821 * | | | |
1822 * | END | END
1823 * END / END |
1824 * | +---+ | |
1825 * | / | |
1826 * MERGE | +
1827 * | \ | /
1828 * | PHI(false, false) | /
1829 * | / | /
1830 * IF => | /
1831 * | \ | /
1832 * | +------+ | /
1833 * | IF_TRUE | / BEGIN(unreachable)
1834 * IF_FALSE | MERGE
1835 * | |
1836 */
1837 ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref);
1838 ir_use_list_replace_one(ctx, end2_ref, merge_ref, if_false_ref);
1839
1840 MAKE_NOP(merge); CLEAR_USES(merge_ref);
1841 MAKE_NOP(cond); CLEAR_USES(cond_ref);
1842 MAKE_NOP(insn); CLEAR_USES(ref);
1843
1844 if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2);
1845 if_false->op1 = end1_ref;
1846 if_false->op2 = end2_ref;
1847
1848 if_true->optx = IR_BEGIN;
1849 if_true->op1 = IR_UNUSED;
1850
1851 ctx->flags2 &= ~IR_CFG_REACHABLE;
1852
1853 ir_bitqueue_add(worklist, if_false_ref);
1854
1855 return 1;
1856 }
1857 }
1858
1859 /* Simple IF Split
1860 *
1861 * | | | |
1862 * | END | IF(X)
1863 * END / END / \
1864 * | +---+ | +--+ +
1865 * | / | / |
1866 * MERGE | IF_FALSE |
1867 * | \ | | |
1868 * | PHI(false, X) | | |
1869 * | / | | |
1870 * IF => | END |
1871 * | \ | | |
1872 * | +------+ | | |
1873 * | IF_TRUE | | IF_TRUE
1874 * IF_FALSE | MERGE
1875 * | |
1876 */
1877 ir_use_list_remove_all(ctx, merge_ref, cond_ref);
1878 ir_use_list_remove_all(ctx, ref, if_true_ref);
1879 if (!IR_IS_CONST_REF(cond->op3)) {
1880 ir_use_list_replace_one(ctx, cond->op3, cond_ref, end2_ref);
1881 }
1882 ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref);
1883 ir_use_list_add(ctx, end2_ref, if_true_ref);
1884
1885 end2->optx = IR_OPTX(IR_IF, IR_VOID, 2);
1886 end2->op2 = cond->op3;
1887
1888 merge->optx = IR_OPTX(op, IR_VOID, 1);
1889 merge->op1 = end2_ref;
1890 merge->op2 = IR_UNUSED;
1891
1892 MAKE_NOP(cond);
1893 CLEAR_USES(cond_ref);
1894
1895 insn->optx = IR_OPTX(IR_END, IR_VOID, 1);
1896 insn->op1 = merge_ref;
1897 insn->op2 = IR_UNUSED;
1898
1899 if_true->op1 = end2_ref;
1900
1901 if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2);
1902 if_false->op1 = end1_ref;
1903 if_false->op2 = ref;
1904
1905 ir_bitqueue_add(worklist, if_false_ref);
1906 if (ctx->ir_base[end2->op1].op == IR_BEGIN || ctx->ir_base[end2->op1].op == IR_MERGE) {
1907 ir_bitqueue_add(worklist, end2->op1);
1908 }
1909
1910 return 1;
1911 }
1912 }
1913 }
1914
1915 return 0;
1916 }
1917
ir_try_split_if_cmp(ir_ctx * ctx,ir_ref ref,ir_insn * insn,ir_bitqueue * worklist)1918 static bool ir_try_split_if_cmp(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
1919 {
1920 ir_ref cond_ref = insn->op2;
1921 ir_insn *cond = &ctx->ir_base[cond_ref];
1922
1923 if (cond->op >= IR_EQ && cond->op <= IR_UGT
1924 && IR_IS_CONST_REF(cond->op2)
1925 && !IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op)
1926 && ctx->use_lists[insn->op2].count == 1) {
1927 ir_ref phi_ref = cond->op1;
1928 ir_insn *phi = &ctx->ir_base[phi_ref];
1929
1930 if (phi->op == IR_PHI
1931 && phi->inputs_count == 3
1932 && phi->op1 == insn->op1
1933 && ctx->use_lists[phi_ref].count == 1
1934 && ((IR_IS_CONST_REF(phi->op2) && !IR_IS_SYM_CONST(ctx->ir_base[phi->op2].op))
1935 || (IR_IS_CONST_REF(phi->op3) && !IR_IS_SYM_CONST(ctx->ir_base[phi->op3].op)))) {
1936 ir_ref merge_ref = insn->op1;
1937 ir_insn *merge = &ctx->ir_base[merge_ref];
1938
1939 if (ctx->use_lists[merge_ref].count == 2) {
1940 ir_ref end1_ref = merge->op1, end2_ref = merge->op2;
1941 ir_insn *end1 = &ctx->ir_base[end1_ref];
1942 ir_insn *end2 = &ctx->ir_base[end2_ref];
1943
1944 if (end1->op == IR_END && end2->op == IR_END) {
1945 ir_ref if_true_ref, if_false_ref;
1946 ir_insn *if_true, *if_false;
1947 ir_op op = IR_IF_FALSE;
1948
1949 ir_get_true_false_refs(ctx, ref, &if_true_ref, &if_false_ref);
1950
1951 if (!IR_IS_CONST_REF(phi->op2) || IR_IS_SYM_CONST(ctx->ir_base[phi->op2].op)) {
1952 IR_ASSERT(IR_IS_CONST_REF(phi->op3));
1953 SWAP_REFS(phi->op2, phi->op3);
1954 SWAP_REFS(merge->op1, merge->op2);
1955 SWAP_REFS(end1_ref, end2_ref);
1956 SWAP_INSNS(end1, end2);
1957 }
1958 if (ir_cmp_is_true(cond->op, &ctx->ir_base[phi->op2], &ctx->ir_base[cond->op2])) {
1959 SWAP_REFS(if_true_ref, if_false_ref);
1960 op = IR_IF_TRUE;
1961 }
1962 if_true = &ctx->ir_base[if_true_ref];
1963 if_false = &ctx->ir_base[if_false_ref];
1964
1965 if (IR_IS_CONST_REF(phi->op3) && !IR_IS_SYM_CONST(ctx->ir_base[phi->op3].op)) {
1966 if (ir_cmp_is_true(cond->op, &ctx->ir_base[phi->op3], &ctx->ir_base[cond->op2]) ^ (op == IR_IF_TRUE)) {
1967 /* IF Split
1968 *
1969 * | | | |
1970 * | END | END
1971 * END / END |
1972 * | +---+ | |
1973 * | / | |
1974 * MERGE | |
1975 * | \ | |
1976 * | PHI(C1, X) | |
1977 * | | | |
1978 * | CMP(_, C2) | |
1979 * | / | |
1980 * IF => | |
1981 * | \ | |
1982 * | +------+ | |
1983 * | IF_TRUE | BEGIN
1984 * IF_FALSE | BEGIN |
1985 * | |
1986 */
1987
1988 ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref);
1989 ir_use_list_replace_one(ctx, end2_ref, merge_ref, if_true_ref);
1990
1991 MAKE_NOP(merge); CLEAR_USES(merge_ref);
1992 MAKE_NOP(phi); CLEAR_USES(phi_ref);
1993 MAKE_NOP(cond); CLEAR_USES(cond_ref);
1994 MAKE_NOP(insn); CLEAR_USES(ref);
1995
1996 if_false->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1);
1997 if_false->op1 = end1_ref;
1998
1999 if_true->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1);
2000 if_true->op1 = end2_ref;
2001
2002 ir_bitqueue_add(worklist, if_false_ref);
2003 ir_bitqueue_add(worklist, if_true_ref);
2004
2005 return 1;
2006 } else {
2007 /* IF Split
2008 *
2009 * | | | |
2010 * | END | END
2011 * END / END |
2012 * | +---+ | |
2013 * | / | |
2014 * MERGE | |
2015 * | \ | |
2016 * | PHI(C1, X) | |
2017 * | | | +
2018 * | CMP(_, C2) | /
2019 * | / | /
2020 * IF => | /
2021 * | \ | /
2022 * | +------+ | /
2023 * | IF_TRUE | / BEGIN(unreachable)
2024 * IF_FALSE | MERGE |
2025 * | |
2026 */
2027
2028 ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref);
2029 ir_use_list_replace_one(ctx, end2_ref, merge_ref, if_false_ref);
2030
2031 MAKE_NOP(merge); CLEAR_USES(merge_ref);
2032 MAKE_NOP(phi); CLEAR_USES(phi_ref);
2033 MAKE_NOP(cond); CLEAR_USES(cond_ref);
2034 MAKE_NOP(insn); CLEAR_USES(ref);
2035
2036 if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2);
2037 if_false->op1 = end1_ref;
2038 if_false->op2 = end2_ref;
2039
2040 if_true->optx = IR_BEGIN;
2041 if_true->op1 = IR_UNUSED;
2042
2043 ctx->flags2 &= ~IR_CFG_REACHABLE;
2044
2045 ir_bitqueue_add(worklist, if_false_ref);
2046
2047 return 1;
2048 }
2049 } else {
2050 /* IF Split
2051 *
2052 * | | | |
2053 * | END | IF<----+
2054 * END / END / \ |
2055 * | +---+ | +--+ + |
2056 * | / | / | |
2057 * MERGE | IF_FALSE | |
2058 * | \ | | | |
2059 * | PHI(C1, X) | | | |
2060 * | | | | | |
2061 * | CMP(_, C2) | | | CMP(X, C2)
2062 * | / | | |
2063 * IF => | END |
2064 * | \ | | |
2065 * | +------+ | | |
2066 * | IF_TRUE | | IF_TRUE
2067 * IF_FALSE | MERGE
2068 * | |
2069 */
2070
2071 ir_use_list_remove_all(ctx, merge_ref, phi_ref);
2072 ir_use_list_remove_all(ctx, ref, if_true_ref);
2073 if (!IR_IS_CONST_REF(phi->op3)) {
2074 ir_use_list_replace_one(ctx, phi->op3, phi_ref, insn->op2);
2075 }
2076 ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref);
2077 ir_use_list_replace_one(ctx, cond_ref, ref, end2_ref);
2078 ir_use_list_add(ctx, end2_ref, if_true_ref);
2079
2080 end2->optx = IR_OPTX(IR_IF, IR_VOID, 2);
2081 end2->op2 = insn->op2;
2082
2083 merge->optx = IR_OPTX(op, IR_VOID, 1);
2084 merge->op1 = end2_ref;
2085 merge->op2 = IR_UNUSED;
2086
2087 cond->op1 = phi->op3;
2088 MAKE_NOP(phi);
2089 CLEAR_USES(phi_ref);
2090
2091 insn->optx = IR_OPTX(IR_END, IR_VOID, 1);
2092 insn->op1 = merge_ref;
2093 insn->op2 = IR_UNUSED;
2094
2095 if_true->op1 = end2_ref;
2096
2097 if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2);
2098 if_false->op1 = end1_ref;
2099 if_false->op2 = ref;
2100
2101 ir_bitqueue_add(worklist, if_false_ref);
2102 if (ctx->ir_base[end2->op1].op == IR_BEGIN || ctx->ir_base[end2->op1].op == IR_MERGE) {
2103 ir_bitqueue_add(worklist, end2->op1);
2104 }
2105
2106 return 1;
2107 }
2108 }
2109 }
2110 }
2111 }
2112
2113 return 0;
2114 }
2115
ir_optimize_merge(ir_ctx * ctx,ir_ref merge_ref,ir_insn * merge,ir_bitqueue * worklist)2116 static void ir_optimize_merge(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_bitqueue *worklist)
2117 {
2118 ir_use_list *use_list = &ctx->use_lists[merge_ref];
2119
2120 if (use_list->count == 1) {
2121 ir_try_remove_empty_diamond(ctx, merge_ref, merge, worklist);
2122 } else if (use_list->count == 2) {
2123 if (merge->inputs_count == 2) {
2124 ir_ref phi_ref = ctx->use_edges[use_list->refs];
2125 ir_insn *phi = &ctx->ir_base[phi_ref];
2126
2127 ir_ref next_ref = ctx->use_edges[use_list->refs + 1];
2128 ir_insn *next = &ctx->ir_base[next_ref];
2129 IR_ASSERT(next->op != IR_PHI);
2130
2131 if (phi->op == IR_PHI) {
2132 if (next->op == IR_IF && next->op1 == merge_ref && ctx->use_lists[phi_ref].count == 1) {
2133 if (next->op2 == phi_ref) {
2134 if (ir_try_split_if(ctx, next_ref, next, worklist)) {
2135 return;
2136 }
2137 } else {
2138 ir_insn *cmp = &ctx->ir_base[next->op2];
2139
2140 if (cmp->op >= IR_EQ && cmp->op <= IR_UGT
2141 && cmp->op1 == phi_ref
2142 && IR_IS_CONST_REF(cmp->op2)
2143 && !IR_IS_SYM_CONST(ctx->ir_base[cmp->op2].op)
2144 && ctx->use_lists[next->op2].count == 1) {
2145 if (ir_try_split_if_cmp(ctx, next_ref, next, worklist)) {
2146 return;
2147 }
2148 }
2149 }
2150 }
2151 ir_optimize_phi(ctx, merge_ref, merge, phi_ref, phi, worklist);
2152 }
2153 }
2154 }
2155 }
2156
ir_sccp(ir_ctx * ctx)2157 int ir_sccp(ir_ctx *ctx)
2158 {
2159 ir_ref i, j, n, *p, use;
2160 ir_use_list *use_list;
2161 ir_insn *insn, *use_insn, *value;
2162 uint32_t flags;
2163 ir_bitqueue worklist, worklist2;
2164 ir_insn *_values = ir_mem_calloc(ctx->insns_count, sizeof(ir_insn));
2165
2166 ctx->flags2 |= IR_OPT_IN_SCCP;
2167
2168 /* A bit modified SCCP algorithm of M. N. Wegman and F. K. Zadeck */
2169 ir_bitqueue_init(&worklist2, ctx->insns_count);
2170 ir_bitqueue_init(&worklist, ctx->insns_count);
2171 worklist.pos = 0;
2172 ir_bitset_incl(worklist.set, 1);
2173 while ((i = ir_bitqueue_pop(&worklist)) >= 0) {
2174 insn = &ctx->ir_base[i];
2175 flags = ir_op_flags[insn->op];
2176 if (flags & IR_OP_FLAG_DATA) {
2177 if (ctx->use_lists[i].count == 0) {
2178 /* dead code */
2179 continue;
2180 } else if (insn->op == IR_PHI) {
2181 if (!ir_sccp_meet_phi(ctx, _values, i, insn, &worklist)) {
2182 continue;
2183 }
2184 } else if (EXPECTED(IR_IS_FOLDABLE_OP(insn->op))) {
2185 bool may_benefit = 0;
2186 bool has_top = 0;
2187
2188 IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(flags));
2189 n = IR_INPUT_EDGES_COUNT(flags);
2190 for (p = insn->ops + 1; n > 0; p++, n--) {
2191 ir_ref input = *p;
2192 if (input > 0) {
2193 if (_values[input].optx == IR_TOP) {
2194 has_top = 1;
2195 /* do backward propagaton only once */
2196 if (!_values[input].op1) {
2197 _values[input].op1 = 1;
2198 ir_bitqueue_add(&worklist, input);
2199 }
2200 } else if (_values[input].optx != IR_BOTTOM) {
2201 /* Perform folding only if some of direct inputs
2202 * is going to be replaced by a constant or copy.
2203 * This approach may miss some folding optimizations
2204 * dependent on indirect inputs. e.g. reassociation.
2205 */
2206 may_benefit = 1;
2207 }
2208 }
2209 }
2210 if (has_top) {
2211 continue;
2212 }
2213 if (!may_benefit) {
2214 IR_MAKE_BOTTOM(i);
2215 if (insn->op == IR_FP2FP || insn->op == IR_FP2INT || insn->op == IR_TRUNC
2216 || insn->op == IR_ZEXT || insn->op == IR_SEXT) {
2217 ir_bitqueue_add(&worklist2, i);
2218 }
2219 } else if (!ir_sccp_fold(ctx, _values, i, insn->opt, insn->op1, insn->op2, insn->op3)) {
2220 /* not changed */
2221 continue;
2222 } else if (_values[i].optx == IR_BOTTOM) {
2223 insn = &ctx->ir_base[i];
2224 if (insn->op == IR_FP2FP || insn->op == IR_FP2INT || insn->op == IR_TRUNC
2225 || insn->op == IR_ZEXT || insn->op == IR_SEXT) {
2226 ir_bitqueue_add(&worklist2, i);
2227 }
2228 }
2229 } else {
2230 IR_MAKE_BOTTOM(i);
2231 }
2232 } else if (flags & IR_OP_FLAG_BB_START) {
2233 if (insn->op == IR_MERGE || insn->op == IR_BEGIN) {
2234 ir_bitqueue_add(&worklist2, i);
2235 }
2236 if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN) {
2237 ir_ref unfeasible_inputs = 0;
2238
2239 n = insn->inputs_count;
2240 if (n > 3 && _values[i].optx == IR_TOP) {
2241 for (j = 0; j < (n>>2); j++) {
2242 _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */
2243 }
2244 }
2245 for (p = insn->ops + 1; n > 0; p++, n--) {
2246 ir_ref input = *p;
2247 IR_ASSERT(input > 0);
2248 if (_values[input].optx == IR_TOP) {
2249 unfeasible_inputs++;
2250 }
2251 }
2252 if (unfeasible_inputs == 0) {
2253 IR_MAKE_BOTTOM(i);
2254 } else if (_values[i].op1 != unfeasible_inputs) {
2255 _values[i].optx = IR_MERGE;
2256 _values[i].op1 = unfeasible_inputs;
2257 } else {
2258 continue;
2259 }
2260 } else {
2261 IR_ASSERT(insn->op == IR_START || IR_IS_FEASIBLE(insn->op1));
2262 IR_MAKE_BOTTOM(i);
2263 }
2264 } else {
2265 IR_ASSERT(insn->op1 > 0);
2266 if (_values[insn->op1].optx == IR_TOP) {
2267 /* control inpt is not feasible */
2268 continue;
2269 }
2270 if (insn->op == IR_IF) {
2271 if (IR_IS_TOP(insn->op2)) {
2272 /* do backward propagaton only once */
2273 if (!_values[insn->op2].op1) {
2274 _values[insn->op2].op1 = 1;
2275 ir_bitqueue_add(&worklist, insn->op2);
2276 }
2277 continue;
2278 }
2279 if (!IR_IS_BOTTOM(insn->op2)
2280 #if IR_COMBO_COPY_PROPAGATION
2281 && (IR_IS_CONST_REF(insn->op2) || _values[insn->op2].op != IR_COPY)
2282 #endif
2283 ) {
2284 bool b = ir_sccp_is_true(ctx, _values, insn->op2);
2285 use_list = &ctx->use_lists[i];
2286 IR_ASSERT(use_list->count == 2);
2287 p = &ctx->use_edges[use_list->refs];
2288 use = *p;
2289 use_insn = &ctx->ir_base[use];
2290 IR_ASSERT(use_insn->op == IR_IF_TRUE || use_insn->op == IR_IF_FALSE);
2291 if ((use_insn->op == IR_IF_TRUE) != b) {
2292 use = *(p+1);
2293 IR_ASSERT(ctx->ir_base[use].op == IR_IF_TRUE || ctx->ir_base[use].op == IR_IF_FALSE);
2294 }
2295 if (_values[i].optx == IR_TOP) {
2296 _values[i].optx = IR_IF;
2297 _values[i].op1 = use;
2298 } else if (_values[i].optx != IR_IF || _values[i].op1 != use) {
2299 IR_MAKE_BOTTOM(i);
2300 }
2301 if (!IR_IS_BOTTOM(use)) {
2302 ir_bitqueue_add(&worklist, use);
2303 }
2304 continue;
2305 }
2306 IR_MAKE_BOTTOM(i);
2307 } else if (insn->op == IR_SWITCH) {
2308 if (IR_IS_TOP(insn->op2)) {
2309 /* do backward propagaton only once */
2310 if (!_values[insn->op2].op1) {
2311 _values[insn->op2].op1 = 1;
2312 ir_bitqueue_add(&worklist, insn->op2);
2313 }
2314 continue;
2315 }
2316 if (!IR_IS_BOTTOM(insn->op2)
2317 #if IR_COMBO_COPY_PROPAGATION
2318 && (IR_IS_CONST_REF(insn->op2) || _values[insn->op2].op != IR_COPY)
2319 #endif
2320 ) {
2321 ir_ref use_case = IR_UNUSED;
2322
2323 use_list = &ctx->use_lists[i];
2324 n = use_list->count;
2325 for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) {
2326 use = *p;
2327 IR_ASSERT(use > 0);
2328 use_insn = &ctx->ir_base[use];
2329 if (use_insn->op == IR_CASE_VAL) {
2330 if (ir_sccp_is_equal(ctx, _values, insn->op2, use_insn->op2)) {
2331 use_case = use;
2332 break;
2333 }
2334 } else if (use_insn->op == IR_CASE_DEFAULT) {
2335 use_case = use;
2336 }
2337 }
2338 if (use_case) {
2339 use_insn = &ctx->ir_base[use_case];
2340 if (_values[i].optx == IR_TOP) {
2341 _values[i].optx = IR_IF;
2342 _values[i].op1 = use_case;
2343 } else if (_values[i].optx != IR_IF || _values[i].op1 != use_case) {
2344 IR_MAKE_BOTTOM(i);
2345 }
2346 if (!IR_IS_BOTTOM(use_case)) {
2347 ir_bitqueue_add(&worklist, use_case);
2348 }
2349 }
2350 if (!IR_IS_BOTTOM(i)) {
2351 continue;
2352 }
2353 }
2354 IR_MAKE_BOTTOM(i);
2355 } else if (ir_is_dead_load_ex(ctx, i, flags, insn)) {
2356 /* dead load */
2357 _values[i].optx = IR_LOAD;
2358 } else {
2359 IR_MAKE_BOTTOM(i);
2360
2361 /* control, call, load and store instructions may have unprocessed inputs */
2362 n = IR_INPUT_EDGES_COUNT(flags);
2363 if (IR_OP_HAS_VAR_INPUTS(flags) && (n = insn->inputs_count) > 3) {
2364 for (j = 0; j < (n>>2); j++) {
2365 _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */
2366 }
2367 for (j = 2, p = insn->ops + j; j <= n; j++, p++) {
2368 IR_ASSERT(IR_OPND_KIND(flags, j) == IR_OPND_DATA);
2369 use = *p;
2370 if (use > 0 && UNEXPECTED(_values[use].optx == IR_TOP)) {
2371 ir_bitqueue_add(&worklist, use);
2372 }
2373 }
2374 } else if (n >= 2) {
2375 IR_ASSERT(IR_OPND_KIND(flags, 2) == IR_OPND_DATA);
2376 use = insn->op2;
2377 if (use > 0 && UNEXPECTED(_values[use].optx == IR_TOP)) {
2378 ir_bitqueue_add(&worklist, use);
2379 }
2380 if (n > 2) {
2381 IR_ASSERT(n == 3);
2382 IR_ASSERT(IR_OPND_KIND(flags, 3) == IR_OPND_DATA);
2383 use = insn->op3;
2384 if (use > 0 && UNEXPECTED(_values[use].optx == IR_TOP)) {
2385 ir_bitqueue_add(&worklist, use);
2386 }
2387 }
2388 }
2389 }
2390 }
2391 use_list = &ctx->use_lists[i];
2392 n = use_list->count;
2393 for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
2394 use = *p;
2395 if (_values[use].optx != IR_BOTTOM) {
2396 ir_bitqueue_add(&worklist, use);
2397 }
2398 }
2399 }
2400
2401 #ifdef IR_DEBUG
2402 if (ctx->flags & IR_DEBUG_SCCP) {
2403 for (i = 1; i < ctx->insns_count; i++) {
2404 if (IR_IS_CONST_OP(_values[i].op) || IR_IS_SYM_CONST(_values[i].op)) {
2405 fprintf(stderr, "%d. CONST(", i);
2406 ir_print_const(ctx, &_values[i], stderr, true);
2407 fprintf(stderr, ")\n");
2408 #if IR_COMBO_COPY_PROPAGATION
2409 } else if (_values[i].op == IR_COPY) {
2410 fprintf(stderr, "%d. COPY(%d)\n", i, _values[i].op1);
2411 #endif
2412 } else if (IR_IS_TOP(i)) {
2413 fprintf(stderr, "%d. TOP\n", i);
2414 } else if (_values[i].op == IR_IF) {
2415 fprintf(stderr, "%d. IF(%d)\n", i, _values[i].op1);
2416 } else if (_values[i].op == IR_MERGE) {
2417 fprintf(stderr, "%d. MERGE(%d)\n", i, _values[i].op1);
2418 } else if (!IR_IS_BOTTOM(i)) {
2419 fprintf(stderr, "%d. %d\n", i, _values[i].op);
2420 }
2421 }
2422 }
2423 #endif
2424
2425 for (i = 1, value = _values + i; i < ctx->insns_count; value++, i++) {
2426 if (value->op == IR_BOTTOM) {
2427 continue;
2428 } else if (IR_IS_CONST_OP(value->op)) {
2429 /* replace instruction by constant */
2430 j = ir_const(ctx, value->val, value->type);
2431 ir_sccp_replace_insn(ctx, _values, i, j, &worklist2);
2432 } else if (IR_IS_SYM_CONST(value->op)) {
2433 /* replace instruction by constant */
2434 j = ir_const_ex(ctx, value->val, value->type, value->optx);
2435 ir_sccp_replace_insn(ctx, _values, i, j, &worklist2);
2436 #if IR_COMBO_COPY_PROPAGATION
2437 } else if (value->op == IR_COPY) {
2438 ir_sccp_replace_insn(ctx, _values, i, value->op1, &worklist2);
2439 #endif
2440 } else if (value->op == IR_TOP) {
2441 /* remove unreachable instruction */
2442 insn = &ctx->ir_base[i];
2443 if (ir_op_flags[insn->op] & (IR_OP_FLAG_DATA|IR_OP_FLAG_MEM)) {
2444 if (insn->op != IR_PARAM && (insn->op != IR_VAR || _values[insn->op1].op == IR_TOP)) {
2445 ir_sccp_remove_insn(ctx, _values, i, &worklist2);
2446 }
2447 } else {
2448 if (ir_op_flags[insn->op] & IR_OP_FLAG_TERMINATOR) {
2449 /* remove from terminators list */
2450 ir_ref prev = ctx->ir_base[1].op1;
2451 if (prev == i) {
2452 ctx->ir_base[1].op1 = insn->op3;
2453 } else {
2454 while (prev) {
2455 if (ctx->ir_base[prev].op3 == i) {
2456 ctx->ir_base[prev].op3 = insn->op3;
2457 break;
2458 }
2459 prev = ctx->ir_base[prev].op3;
2460 }
2461 }
2462 }
2463 ir_sccp_replace_insn(ctx, _values, i, IR_UNUSED, &worklist2);
2464 }
2465 } else if (value->op == IR_IF) {
2466 /* remove one way IF/SWITCH */
2467 ir_sccp_remove_if(ctx, _values, i, value->op1);
2468 } else if (value->op == IR_MERGE) {
2469 /* schedule merge to remove unfeasible MERGE inputs */
2470 ir_bitqueue_add(&worklist, i);
2471 } else if (value->op == IR_LOAD) {
2472 /* schedule dead load elimination */
2473 ir_bitqueue_add(&worklist2, i);
2474 }
2475 }
2476
2477 while ((i = ir_bitqueue_pop(&worklist)) >= 0) {
2478 IR_ASSERT(_values[i].op == IR_MERGE);
2479 ir_sccp_remove_unfeasible_merge_inputs(ctx, _values, i, _values[i].op1);
2480 }
2481
2482 ctx->flags2 |= IR_CFG_REACHABLE;
2483
2484 while ((i = ir_bitqueue_pop(&worklist2)) >= 0) {
2485 insn = &ctx->ir_base[i];
2486 if (IR_IS_FOLDABLE_OP(insn->op)) {
2487 if (ctx->use_lists[i].count == 0) {
2488 if (insn->op == IR_PHI) {
2489 ir_bitqueue_add(&worklist2, insn->op1);
2490 }
2491 ir_sccp_remove_insn2(ctx, i, &worklist2);
2492 } else {
2493 insn = &ctx->ir_base[i];
2494 switch (insn->op) {
2495 case IR_FP2FP:
2496 if (insn->type == IR_FLOAT) {
2497 if (ir_may_promote_d2f(ctx, insn->op1)) {
2498 ir_ref ref = ir_promote_d2f(ctx, insn->op1, i);
2499 insn->op1 = ref;
2500 ir_sccp_replace_insn2(ctx, i, ref, &worklist2);
2501 break;
2502 }
2503 } else {
2504 if (ir_may_promote_f2d(ctx, insn->op1)) {
2505 ir_ref ref = ir_promote_f2d(ctx, insn->op1, i);
2506 insn->op1 = ref;
2507 ir_sccp_replace_insn2(ctx, i, ref, &worklist2);
2508 break;
2509 }
2510 }
2511 goto folding;
2512 case IR_FP2INT:
2513 if (ctx->ir_base[insn->op1].type == IR_DOUBLE) {
2514 if (ir_may_promote_d2f(ctx, insn->op1)) {
2515 insn->op1 = ir_promote_d2f(ctx, insn->op1, i);
2516 }
2517 } else {
2518 if (ir_may_promote_f2d(ctx, insn->op1)) {
2519 insn->op1 = ir_promote_f2d(ctx, insn->op1, i);
2520 }
2521 }
2522 goto folding;
2523 case IR_TRUNC:
2524 if (ir_may_promote_i2i(ctx, insn->type, insn->op1)) {
2525 ir_ref ref = ir_promote_i2i(ctx, insn->type, insn->op1, i);
2526 insn->op1 = ref;
2527 ir_sccp_replace_insn2(ctx, i, ref, &worklist2);
2528 break;
2529 }
2530 goto folding;
2531 case IR_SEXT:
2532 case IR_ZEXT:
2533 if (ir_try_promote_ext(ctx, i, insn, &worklist2)) {
2534 break;
2535 }
2536 goto folding;
2537 case IR_PHI:
2538 break;
2539 default:
2540 folding:
2541 ir_sccp_fold2(ctx, i, &worklist2);
2542 break;
2543 }
2544 }
2545 } else if (ir_op_flags[insn->op] & IR_OP_FLAG_BB_START) {
2546 if (!(ctx->flags & IR_OPT_CFG)) {
2547 /* pass */
2548 } else if (insn->op == IR_BEGIN) {
2549 if (ctx->ir_base[insn->op1].op == IR_END
2550 && ctx->use_lists[i].count == 1) {
2551 ir_merge_blocks(ctx, insn->op1, i, &worklist2);
2552 }
2553 } else if (insn->op == IR_MERGE) {
2554 ir_optimize_merge(ctx, i, insn, &worklist2);
2555 }
2556 } else if (ir_is_dead_load(ctx, i)) {
2557 ir_ref next = ctx->use_edges[ctx->use_lists[i].refs];
2558
2559 /* remove LOAD from double linked control list */
2560 ctx->ir_base[next].op1 = insn->op1;
2561 ir_use_list_replace_one(ctx, insn->op1, i, next);
2562 insn->op1 = IR_UNUSED;
2563 ir_sccp_remove_insn2(ctx, i, &worklist2);
2564 }
2565 }
2566
2567 ir_mem_free(_values);
2568 ir_bitqueue_free(&worklist);
2569 ir_bitqueue_free(&worklist2);
2570
2571 ctx->flags2 &= ~IR_OPT_IN_SCCP;
2572 ctx->flags2 |= IR_SCCP_DONE;
2573
2574 return 1;
2575 }
2576