1 /*
2 * IR - Lightweight JIT Compilation Framework
3 * (SCCP - Sparse Conditional Constant Propagation)
4 * Copyright (C) 2022 Zend by Perforce.
5 * Authors: Dmitry Stogov <dmitry@php.net>
6 *
7 * The SCCP algorithm is based on M. N. Wegman and F. K. Zadeck publication
8 * See: M. N. Wegman and F. K. Zadeck. "Constant propagation with conditional branches"
9 * ACM Transactions on Programming Languages and Systems, 13(2):181-210, April 1991
10 */
11
12 #include "ir.h"
13 #include "ir_private.h"
14
15 #define IR_TOP IR_UNUSED
16 #define IR_BOTTOM IR_LAST_OP
17
18 #define IR_MAKE_TOP(ref) do {IR_ASSERT(ref > 0); _values[ref].optx = IR_TOP;} while (0)
19 #define IR_MAKE_BOTTOM(ref) do {IR_ASSERT(ref > 0); _values[ref].optx = IR_BOTTOM;} while (0)
20
21 #define IR_IS_TOP(ref) (ref >= 0 && _values[ref].optx == IR_TOP)
22 #define IR_IS_BOTTOM(ref) (ref >= 0 && _values[ref].optx == IR_BOTTOM)
23 #define IR_IS_FEASIBLE(ref) (ref >= 0 && _values[ref].optx != IR_TOP)
24
25 #define IR_COMBO_COPY_PROPAGATION 1
26
27 #if IR_COMBO_COPY_PROPAGATION
ir_sccp_identity(ir_insn * _values,ir_ref a)28 IR_ALWAYS_INLINE ir_ref ir_sccp_identity(ir_insn *_values, ir_ref a)
29 {
30 if (a > 0 && _values[a].op == IR_COPY) {
31 a = _values[a].op1;
32 IR_ASSERT(a < 0 || _values[a].op != IR_COPY); /* this may be a copy of symbolic constant */
33 }
34 return a;
35 }
36 #endif
37
ir_sccp_fold(ir_ctx * ctx,ir_insn * _values,ir_ref res,uint32_t opt,ir_ref op1,ir_ref op2,ir_ref op3)38 static ir_ref ir_sccp_fold(ir_ctx *ctx, ir_insn *_values, ir_ref res, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3)
39 {
40 ir_insn *op1_insn, *op2_insn, *op3_insn, *insn;
41
42 #if IR_COMBO_COPY_PROPAGATION
43 op1 = ir_sccp_identity(_values, op1);
44 op2 = ir_sccp_identity(_values, op2);
45 op3 = ir_sccp_identity(_values, op3);
46 #endif
47
48 restart:
49 op1_insn = (op1 > 0 && IR_IS_CONST_OP(_values[op1].op)) ? _values + op1 : ctx->ir_base + op1;
50 op2_insn = (op2 > 0 && IR_IS_CONST_OP(_values[op2].op)) ? _values + op2 : ctx->ir_base + op2;
51 op3_insn = (op3 > 0 && IR_IS_CONST_OP(_values[op3].op)) ? _values + op3 : ctx->ir_base + op3;
52
53 switch (ir_folding(ctx, opt, op1, op2, op3, op1_insn, op2_insn, op3_insn)) {
54 case IR_FOLD_DO_RESTART:
55 opt = ctx->fold_insn.optx;
56 op1 = ctx->fold_insn.op1;
57 op2 = ctx->fold_insn.op2;
58 op3 = ctx->fold_insn.op3;
59 goto restart;
60 case IR_FOLD_DO_EMIT:
61 IR_MAKE_BOTTOM(res);
62 return 1;
63 case IR_FOLD_DO_COPY:
64 op1 = ctx->fold_insn.op1;
65 #if IR_COMBO_COPY_PROPAGATION
66 op1 = ir_sccp_identity(_values, op1);
67 #endif
68 insn = (op1 > 0 && IR_IS_CONST_OP(_values[op1].op)) ? _values + op1 : ctx->ir_base + op1;
69 if (IR_IS_CONST_OP(insn->op)) {
70 /* pass */
71 #if IR_COMBO_COPY_PROPAGATION
72 } else if (_values[res].optx == IR_TOP) {
73 _values[res].optx = IR_OPT(IR_COPY, insn->type);
74 _values[res].op1 = op1;
75 return 1;
76 } else if (_values[res].op == IR_COPY && _values[res].op1 == op1) {
77 return 0; /* not changed */
78 } else {
79 IR_ASSERT(_values[res].optx != IR_BOTTOM);
80 /* we don't check for widening */
81 _values[res].optx = IR_OPT(IR_COPY, insn->type);
82 _values[res].op1 = op1;
83 return 1;
84 #else
85 } else {
86 IR_MAKE_BOTTOM(res);
87 return 1;
88 #endif
89 }
90 break;
91 case IR_FOLD_DO_CONST:
92 insn = &ctx->fold_insn;
93 break;
94 default:
95 IR_ASSERT(0);
96 return 0;
97 }
98
99 if (IR_IS_TOP(res)) {
100 _values[res].optx = IR_OPT(insn->type, insn->type);
101 _values[res].val.u64 = insn->val.u64;
102 return 1;
103 } else if (_values[res].opt != IR_OPT(insn->type, insn->type) || _values[res].val.u64 != insn->val.u64) {
104 IR_MAKE_BOTTOM(res);
105 return 1;
106 }
107 return 0; /* not changed */
108 }
109
ir_sccp_meet_phi(ir_ctx * ctx,ir_insn * _values,ir_ref i,ir_insn * insn,ir_bitqueue * worklist)110 static bool ir_sccp_meet_phi(ir_ctx *ctx, ir_insn *_values, ir_ref i, ir_insn *insn, ir_bitqueue *worklist)
111 {
112 ir_ref j, n, input, *merge_input, *p;
113 ir_insn *v, *new_const = NULL;
114 #if IR_COMBO_COPY_PROPAGATION
115 ir_ref new_copy;
116 #endif
117
118 if (!IR_IS_FEASIBLE(insn->op1)) {
119 return 0;
120 }
121 n = insn->inputs_count;
122 if (n > 3 && _values[i].optx == IR_TOP) {
123 for (j = 0; j < (n>>2); j++) {
124 _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */
125 }
126 }
127
128 p = insn->ops + 2;
129 merge_input = ctx->ir_base[insn->op1].ops + 1;
130 for (; --n > 0; p++, merge_input++) {
131 IR_ASSERT(*merge_input > 0);
132 if (_values[*merge_input].optx == IR_TOP) {
133 continue;
134 }
135
136 input = *p;
137 if (IR_IS_CONST_REF(input)) {
138 v = &ctx->ir_base[input];
139 } else if (input == i) {
140 continue;
141 } else {
142 v = &_values[input];
143 if (v->op == IR_TOP) {
144 /* do backward propagaton only once */
145 if (!v->op1) {
146 v->op1 = 1;
147 ir_bitqueue_add(worklist, input);
148 }
149 continue;
150 #if IR_COMBO_COPY_PROPAGATION
151 } else if (v->op == IR_COPY) {
152 input = v->op1;
153 IR_ASSERT(input < 0 || _values[input].op != IR_COPY);
154 new_copy = input;
155 goto next;
156 } else if (v->op == IR_BOTTOM) {
157 new_copy = input;
158 goto next;
159 #else
160 } else if (v->op == IR_BOTTOM) {
161 IR_MAKE_BOTTOM(i);
162 return 1;
163 #endif
164 }
165 }
166 new_copy = IR_UNUSED;
167 new_const = v;
168 goto next;
169 }
170
171 IR_ASSERT(_values[i].optx == IR_TOP);
172 return 0;
173
174 next:
175 p++;
176 merge_input++;
177 /* for all live merge inputs */
178 for (; --n > 0; p++, merge_input++) {
179 IR_ASSERT(*merge_input > 0);
180 if (_values[*merge_input].optx == IR_TOP) {
181 continue;
182 }
183
184 input = *p;
185 if (IR_IS_CONST_REF(input)) {
186 v = &ctx->ir_base[input];
187 } else if (input == i) {
188 continue;
189 } else {
190 v = &_values[input];
191 if (v->op == IR_TOP) {
192 /* do backward propagaton only once */
193 if (!v->op1) {
194 v->op1 = 1;
195 ir_bitqueue_add(worklist, input);
196 }
197 continue;
198 #if IR_COMBO_COPY_PROPAGATION
199 } else if (v->op == IR_COPY) {
200 input = v->op1;
201 IR_ASSERT(input < 0 || _values[input].op != IR_COPY);
202 if (new_copy == input) {
203 continue;
204 } else {
205 IR_MAKE_BOTTOM(i);
206 return 1;
207 }
208 } else if (v->op == IR_BOTTOM) {
209 if (new_copy == input) {
210 continue;
211 } else {
212 IR_MAKE_BOTTOM(i);
213 return 1;
214 }
215 #else
216 } else if (v->op == IR_BOTTOM) {
217 IR_MAKE_BOTTOM(i);
218 return 1;
219 #endif
220 }
221 }
222 if (!new_const || new_const->opt != v->opt || new_const->val.u64 != v->val.u64) {
223 IR_MAKE_BOTTOM(i);
224 return 1;
225 }
226 }
227
228 #if IR_COMBO_COPY_PROPAGATION
229 if (new_copy) {
230 if (_values[i].op == IR_COPY && _values[i].op1 == new_copy) {
231 return 0; /* not changed */
232 } else {
233 IR_ASSERT(_values[i].optx != IR_BOTTOM);
234 /* we don't check for widening */
235 _values[i].optx = IR_OPT(IR_COPY, ctx->ir_base[new_copy].type);
236 _values[i].op1 = new_copy;
237 return 1;
238 }
239 }
240 #endif
241
242 if (_values[i].optx == IR_TOP) {
243 _values[i].optx = new_const->opt;
244 _values[i].val.u64 = new_const->val.u64;
245 return 1;
246 } else if (_values[i].opt == new_const->opt && _values[i].val.u64 == new_const->val.u64) {
247 return 0;
248 } else {
249 IR_MAKE_BOTTOM(i);
250 return 1;
251 }
252 }
253
ir_is_dead_load_ex(ir_ctx * ctx,ir_ref ref,uint32_t flags,ir_insn * insn)254 static bool ir_is_dead_load_ex(ir_ctx *ctx, ir_ref ref, uint32_t flags, ir_insn *insn)
255 {
256 if ((flags & (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_MASK)) == (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_LOAD)) {
257 return ctx->use_lists[ref].count == 1;
258 } else if (insn->op == IR_ALLOCA) {
259 return ctx->use_lists[ref].count == 1;
260 }
261 return 0;
262 }
263
ir_is_dead_load(ir_ctx * ctx,ir_ref ref)264 static bool ir_is_dead_load(ir_ctx *ctx, ir_ref ref)
265 {
266 if (ctx->use_lists[ref].count == 1) {
267 uint32_t flags = ir_op_flags[ctx->ir_base[ref].op];
268
269 if ((flags & (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_MASK)) == (IR_OP_FLAG_MEM|IR_OP_FLAG_MEM_LOAD)) {
270 return 1;
271 } else if (ctx->ir_base[ref].op == IR_ALLOCA) {
272 return 1;
273 }
274 }
275 return 0;
276 }
277
ir_is_dead(ir_ctx * ctx,ir_ref ref)278 static bool ir_is_dead(ir_ctx *ctx, ir_ref ref)
279 {
280 if (ctx->use_lists[ref].count == 0) {
281 return IR_IS_FOLDABLE_OP(ctx->ir_base[ref].op);
282 } else {
283 return ir_is_dead_load(ctx, ref);
284 }
285 return 0;
286 }
287
ir_find1(ir_ctx * ctx,uint32_t optx,ir_ref op1)288 static ir_ref ir_find1(ir_ctx *ctx, uint32_t optx, ir_ref op1)
289 {
290 IR_ASSERT(!IR_IS_CONST_REF(op1));
291
292 ir_use_list *use_list = &ctx->use_lists[op1];
293 ir_ref *p, n = use_list->count;
294
295 for (p = ctx->use_edges + use_list->refs; n > 0; p++, n--) {
296 ir_ref use = *p;
297 ir_insn *use_insn = &ctx->ir_base[use];
298
299 if (use_insn->optx == optx) {
300 IR_ASSERT(use_insn->op1 == op1);
301 return use;
302 }
303 }
304 return IR_UNUSED;
305 }
306
ir_sccp_is_true(ir_ctx * ctx,ir_insn * _values,ir_ref a)307 static bool ir_sccp_is_true(ir_ctx *ctx, ir_insn *_values, ir_ref a)
308 {
309 ir_insn *v = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a];
310
311 return ir_const_is_true(v);
312 }
313
ir_sccp_is_equal(ir_ctx * ctx,ir_insn * _values,ir_ref a,ir_ref b)314 static bool ir_sccp_is_equal(ir_ctx *ctx, ir_insn *_values, ir_ref a, ir_ref b)
315 {
316 ir_insn *v1 = IR_IS_CONST_REF(a) ? &ctx->ir_base[a] : &_values[a];
317 ir_insn *v2 = IR_IS_CONST_REF(b) ? &ctx->ir_base[b] : &_values[b];
318
319 IR_ASSERT(!IR_IS_SYM_CONST(v1->op));
320 IR_ASSERT(!IR_IS_SYM_CONST(v2->op));
321 return v1->val.u64 == v2->val.u64;
322 }
323
ir_sccp_make_nop(ir_ctx * ctx,ir_ref ref)324 static void ir_sccp_make_nop(ir_ctx *ctx, ir_ref ref)
325 {
326 ir_ref j, n, *p;
327 ir_insn *insn;
328
329 CLEAR_USES(ref);
330 insn = &ctx->ir_base[ref];
331 n = insn->inputs_count;
332 insn->opt = IR_NOP; /* keep "inputs_count" */
333 for (j = 1, p = insn->ops + j; j <= n; j++, p++) {
334 *p = IR_UNUSED;
335 }
336 }
337
ir_sccp_remove_insn(ir_ctx * ctx,ir_insn * _values,ir_ref ref,ir_bitqueue * worklist)338 static void ir_sccp_remove_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_bitqueue *worklist)
339 {
340 ir_ref j, n, *p;
341 ir_insn *insn;
342
343 CLEAR_USES(ref);
344 insn = &ctx->ir_base[ref];
345 n = insn->inputs_count;
346 insn->opt = IR_NOP; /* keep "inputs_count" */
347 for (j = 1, p = insn->ops + j; j <= n; j++, p++) {
348 ir_ref input = *p;
349 *p = IR_UNUSED;
350 if (input > 0 && _values[input].op == IR_BOTTOM) {
351 ir_use_list_remove_all(ctx, input, ref);
352 if (ir_is_dead(ctx, input)) {
353 /* schedule DCE */
354 ir_bitqueue_add(worklist, input);
355 }
356 }
357 }
358 }
359
ir_sccp_remove_insn2(ir_ctx * ctx,ir_ref ref,ir_bitqueue * worklist)360 static void ir_sccp_remove_insn2(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist)
361 {
362 ir_ref j, n, *p;
363 ir_insn *insn;
364
365 CLEAR_USES(ref);
366 insn = &ctx->ir_base[ref];
367 n = insn->inputs_count;
368 insn->opt = IR_NOP; /* keep "inputs_count" */
369 for (j = 1, p = insn->ops + j; j <= n; j++, p++) {
370 ir_ref input = *p;
371 *p = IR_UNUSED;
372 if (input > 0) {
373 ir_use_list_remove_all(ctx, input, ref);
374 if (ir_is_dead(ctx, input)) {
375 /* schedule DCE */
376 ir_bitqueue_add(worklist, input);
377 } else if (ctx->ir_base[input].op == IR_PHI && ctx->use_lists[input].count == 1) {
378 /* try to optimize PHI into ABS/MIN/MAX/COND */
379 ir_bitqueue_add(worklist, ctx->ir_base[input].op1);
380 }
381 }
382 }
383 }
384
ir_sccp_replace_insn(ir_ctx * ctx,ir_insn * _values,ir_ref ref,ir_ref new_ref,ir_bitqueue * worklist)385 static void ir_sccp_replace_insn(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist)
386 {
387 ir_ref j, n, *p, use, k, l;
388 ir_insn *insn;
389 ir_use_list *use_list;
390
391 IR_ASSERT(ref != new_ref);
392
393 insn = &ctx->ir_base[ref];
394 n = insn->inputs_count;
395 insn->opt = IR_NOP; /* keep "inputs_count" */
396 for (j = 1, p = insn->ops + 1; j <= n; j++, p++) {
397 ir_ref input = *p;
398 *p = IR_UNUSED;
399 if (input > 0) {
400 ir_use_list_remove_all(ctx, input, ref);
401 if (_values[input].op == IR_BOTTOM) {
402 if (ir_is_dead(ctx, input)) {
403 /* schedule DCE */
404 ir_bitqueue_add(worklist, input);
405 }
406 }
407 }
408 }
409
410 use_list = &ctx->use_lists[ref];
411 n = use_list->count;
412 for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) {
413 use = *p;
414 if (IR_IS_FEASIBLE(use)) {
415 insn = &ctx->ir_base[use];
416 l = insn->inputs_count;
417 for (k = 1; k <= l; k++) {
418 if (ir_insn_op(insn, k) == ref) {
419 ir_insn_set_op(insn, k, new_ref);
420 }
421 }
422 #if IR_COMBO_COPY_PROPAGATION
423 if (new_ref > 0 && IR_IS_BOTTOM(use)) {
424 if (ir_use_list_add(ctx, new_ref, use)) {
425 /* restore after reallocation */
426 use_list = &ctx->use_lists[ref];
427 n = use_list->count;
428 p = &ctx->use_edges[use_list->refs + j];
429 }
430 }
431 #endif
432 /* schedule folding */
433 if (worklist && _values[use].op == IR_BOTTOM) {
434 ir_bitqueue_add(worklist, use);
435 }
436 }
437 }
438
439 CLEAR_USES(ref);
440 }
441
ir_sccp_replace_insn2(ir_ctx * ctx,ir_ref ref,ir_ref new_ref,ir_bitqueue * worklist)442 static void ir_sccp_replace_insn2(ir_ctx *ctx, ir_ref ref, ir_ref new_ref, ir_bitqueue *worklist)
443 {
444 ir_ref j, n, *p, use, k, l;
445 ir_insn *insn;
446 ir_use_list *use_list;
447
448 IR_ASSERT(ref != new_ref);
449
450 insn = &ctx->ir_base[ref];
451 n = insn->inputs_count;
452 insn->opt = IR_NOP; /* keep "inputs_count" */
453 for (j = 1, p = insn->ops + 1; j <= n; j++, p++) {
454 ir_ref input = *p;
455 *p = IR_UNUSED;
456 if (input > 0) {
457 ir_use_list_remove_all(ctx, input, ref);
458 if (ir_is_dead(ctx, input)) {
459 /* schedule DCE */
460 ir_bitqueue_add(worklist, input);
461 } else if (ctx->ir_base[input].op == IR_PHI && ctx->use_lists[input].count == 1) {
462 /* try to optimize PHI into ABS/MIN/MAX/COND */
463 ir_bitqueue_add(worklist, input);
464 }
465 }
466 }
467
468 use_list = &ctx->use_lists[ref];
469 n = use_list->count;
470 for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) {
471 use = *p;
472 insn = &ctx->ir_base[use];
473 l = insn->inputs_count;
474 for (k = 1; k <= l; k++) {
475 if (ir_insn_op(insn, k) == ref) {
476 ir_insn_set_op(insn, k, new_ref);
477 }
478 }
479 #if IR_COMBO_COPY_PROPAGATION
480 if (new_ref > 0) {
481 if (ir_use_list_add(ctx, new_ref, use)) {
482 /* restore after reallocation */
483 use_list = &ctx->use_lists[ref];
484 n = use_list->count;
485 p = &ctx->use_edges[use_list->refs + j];
486 }
487 }
488 #endif
489 /* schedule folding */
490 ir_bitqueue_add(worklist, use);
491 }
492
493 CLEAR_USES(ref);
494 }
495
ir_sccp_fold2(ir_ctx * ctx,ir_ref ref,ir_bitqueue * worklist)496 static void ir_sccp_fold2(ir_ctx *ctx, ir_ref ref, ir_bitqueue *worklist)
497 {
498 uint32_t opt;
499 ir_ref op1, op2, op3;
500 ir_insn *op1_insn, *op2_insn, *op3_insn, *insn;
501
502 insn = &ctx->ir_base[ref];
503 opt = insn->opt;
504 op1 = insn->op1;
505 op2 = insn->op2;
506 op3 = insn->op3;
507
508 restart:
509 op1_insn = ctx->ir_base + op1;
510 op2_insn = ctx->ir_base + op2;
511 op3_insn = ctx->ir_base + op3;
512
513 switch (ir_folding(ctx, opt, op1, op2, op3, op1_insn, op2_insn, op3_insn)) {
514 case IR_FOLD_DO_RESTART:
515 opt = ctx->fold_insn.optx;
516 op1 = ctx->fold_insn.op1;
517 op2 = ctx->fold_insn.op2;
518 op3 = ctx->fold_insn.op3;
519 goto restart;
520 case IR_FOLD_DO_EMIT:
521 insn = &ctx->ir_base[ref];
522 if (insn->opt != ctx->fold_insn.opt
523 || insn->op1 != ctx->fold_insn.op1
524 || insn->op2 != ctx->fold_insn.op2
525 || insn->op3 != ctx->fold_insn.op3) {
526
527 ir_use_list *use_list;
528 ir_ref n, j, *p, use;
529
530 insn->optx = ctx->fold_insn.opt;
531 IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(ir_op_flags[opt & IR_OPT_OP_MASK]));
532 insn->inputs_count = IR_INPUT_EDGES_COUNT(ir_op_flags[opt & IR_OPT_OP_MASK]);
533 if (insn->op1 != ctx->fold_insn.op1) {
534 if (insn->op1 > 0) {
535 ir_use_list_remove_one(ctx, insn->op1, ref);
536 }
537 if (ctx->fold_insn.op1 > 0) {
538 ir_use_list_add(ctx, ctx->fold_insn.op1, ref);
539 }
540 }
541 if (insn->op2 != ctx->fold_insn.op2) {
542 if (insn->op2 > 0) {
543 ir_use_list_remove_one(ctx, insn->op2, ref);
544 }
545 if (ctx->fold_insn.op2 > 0) {
546 ir_use_list_add(ctx, ctx->fold_insn.op2, ref);
547 }
548 }
549 if (insn->op3 != ctx->fold_insn.op3) {
550 if (insn->op3 > 0) {
551 ir_use_list_remove_one(ctx, insn->op3, ref);
552 }
553 if (ctx->fold_insn.op3 > 0) {
554 ir_use_list_add(ctx, ctx->fold_insn.op3, ref);
555 }
556 }
557 insn->op1 = ctx->fold_insn.op1;
558 insn->op2 = ctx->fold_insn.op2;
559 insn->op3 = ctx->fold_insn.op3;
560
561 use_list = &ctx->use_lists[ref];
562 n = use_list->count;
563 for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) {
564 use = *p;
565 ir_bitqueue_add(worklist, use);
566 }
567 }
568 break;
569 case IR_FOLD_DO_COPY:
570 op1 = ctx->fold_insn.op1;
571 ir_sccp_replace_insn2(ctx, ref, op1, worklist);
572 break;
573 case IR_FOLD_DO_CONST:
574 op1 = ir_const(ctx, ctx->fold_insn.val, ctx->fold_insn.type);
575 ir_sccp_replace_insn2(ctx, ref, op1, worklist);
576 break;
577 default:
578 IR_ASSERT(0);
579 break;
580 }
581 }
582
ir_sccp_remove_if(ir_ctx * ctx,ir_insn * _values,ir_ref ref,ir_ref dst)583 static void ir_sccp_remove_if(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref dst)
584 {
585 ir_ref next;
586 ir_insn *insn, *next_insn;
587
588 insn = &ctx->ir_base[ref];
589 if (ctx->use_lists[dst].count == 1) {
590 next = ctx->use_edges[ctx->use_lists[dst].refs];
591 next_insn = &ctx->ir_base[next];
592 /* remove IF and IF_TRUE/FALSE from double linked control list */
593 next_insn->op1 = insn->op1;
594 ir_use_list_replace_one(ctx, insn->op1, ref, next);
595 /* remove IF and IF_TRUE/FALSE instructions */
596 ir_sccp_make_nop(ctx, ref);
597 ir_sccp_make_nop(ctx, dst);
598 } else {
599 insn->op2 = IR_UNUSED;
600 insn->optx = IR_OPTX(IR_END, IR_VOID, 1);
601 next_insn = &ctx->ir_base[dst];
602 next_insn->op = IR_BEGIN;
603 }
604 }
605
ir_sccp_remove_unfeasible_merge_inputs(ir_ctx * ctx,ir_insn * _values,ir_ref ref,ir_ref unfeasible_inputs)606 static void ir_sccp_remove_unfeasible_merge_inputs(ir_ctx *ctx, ir_insn *_values, ir_ref ref, ir_ref unfeasible_inputs)
607 {
608 ir_ref i, j, n, k, *p, use;
609 ir_insn *insn, *use_insn;
610 ir_use_list *use_list;
611 ir_bitset life_inputs;
612
613 insn = &ctx->ir_base[ref];
614 IR_ASSERT(insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN);
615 n = insn->inputs_count;
616 if (n - unfeasible_inputs == 1) {
617 /* remove MERGE completely */
618 for (j = 1; j <= n; j++) {
619 ir_ref input = ir_insn_op(insn, j);
620 if (input && IR_IS_FEASIBLE(input)) {
621 ir_insn *input_insn = &ctx->ir_base[input];
622
623 IR_ASSERT(input_insn->op == IR_END || input_insn->op == IR_LOOP_END||
624 input_insn->op == IR_IJMP || input_insn->op == IR_UNREACHABLE);
625 if (input_insn->op == IR_END || input_insn->op == IR_LOOP_END) {
626 if (input < ref) {
627 ir_ref prev, next = IR_UNUSED;
628 ir_insn *next_insn = NULL;
629
630 prev = input_insn->op1;
631 use_list = &ctx->use_lists[ref];
632 if (use_list->count == 1) {
633 next = ctx->use_edges[use_list->refs];
634 next_insn = &ctx->ir_base[next];
635 } else {
636 for (k = 0, p = &ctx->use_edges[use_list->refs]; k < use_list->count; k++, p++) {
637 use = *p;
638 use_insn = &ctx->ir_base[use];
639 IR_ASSERT((use_insn->op != IR_PHI) && "PHI must be already removed");
640 if (ir_op_flags[use_insn->op] & IR_OP_FLAG_CONTROL) {
641 IR_ASSERT(!next);
642 next = use;
643 next_insn = use_insn;
644 } else if (use_insn->op != IR_NOP) {
645 IR_ASSERT(use_insn->op1 == ref);
646 use_insn->op1 = prev;
647 ir_use_list_add(ctx, prev, use);
648 p = &ctx->use_edges[use_list->refs + k];
649 }
650 }
651 }
652 IR_ASSERT(prev && next);
653 /* remove MERGE and input END from double linked control list */
654 next_insn->op1 = prev;
655 ir_use_list_replace_one(ctx, prev, input, next);
656 /* remove MERGE and input END instructions */
657 ir_sccp_make_nop(ctx, ref);
658 ir_sccp_make_nop(ctx, input);
659 } else {
660 for (i = 2; i <= n; i++) {
661 ir_insn_set_op(insn, i, IR_UNUSED);
662 }
663 insn->op = IR_BEGIN;
664 insn->op1 = input;
665 input_insn->op = IR_END;
666 }
667 break;
668 } else {
669 for (i = 2; i <= n; i++) {
670 ir_insn_set_op(insn, i, IR_UNUSED);
671 }
672 insn->op = IR_BEGIN;
673 insn->op1 = input;
674 }
675 }
676 }
677 } else {
678 n = insn->inputs_count;
679 i = 1;
680 life_inputs = ir_bitset_malloc(n + 1);
681 for (j = 1; j <= n; j++) {
682 ir_ref input = ir_insn_op(insn, j);
683
684 if (input) {
685 if (i != j) {
686 ir_insn_set_op(insn, i, input);
687 }
688 ir_bitset_incl(life_inputs, j);
689 i++;
690 }
691 }
692 j = i;
693 while (j <= n) {
694 ir_insn_set_op(insn, j, IR_UNUSED);
695 j++;
696 }
697 i--;
698 insn->inputs_count = i;
699
700 n++;
701 use_list = &ctx->use_lists[ref];
702 if (use_list->count > 1) {
703 for (k = 0, p = &ctx->use_edges[use_list->refs]; k < use_list->count; k++, p++) {
704 use = *p;
705 use_insn = &ctx->ir_base[use];
706 if (use_insn->op == IR_PHI) {
707 i = 2;
708 for (j = 2; j <= n; j++) {
709 ir_ref input = ir_insn_op(use_insn, j);
710
711 if (ir_bitset_in(life_inputs, j - 1)) {
712 IR_ASSERT(input);
713 if (i != j) {
714 ir_insn_set_op(use_insn, i, input);
715 }
716 i++;
717 } else if (!IR_IS_CONST_REF(input)) {
718 ir_use_list_remove_one(ctx, input, use);
719 }
720 }
721 while (i <= n) {
722 ir_insn_set_op(use_insn, i, IR_UNUSED);
723 i++;
724 }
725 use_insn->inputs_count = insn->inputs_count + 1;
726 }
727 }
728 }
729 ir_mem_free(life_inputs);
730 }
731 }
732
ir_may_promote_d2f(ir_ctx * ctx,ir_ref ref)733 static bool ir_may_promote_d2f(ir_ctx *ctx, ir_ref ref)
734 {
735 ir_insn *insn = &ctx->ir_base[ref];
736
737 IR_ASSERT(insn->type == IR_DOUBLE);
738 if (IR_IS_CONST_REF(ref)) {
739 return !IR_IS_SYM_CONST(insn->op) && insn->val.d == (double)(float)insn->val.d;
740 } else {
741 switch (insn->op) {
742 case IR_FP2FP:
743 return 1;
744 // case IR_INT2FP:
745 // return ctx->use_lists[ref].count == 1;
746 case IR_NEG:
747 case IR_ABS:
748 return ctx->use_lists[ref].count == 1 &&
749 ir_may_promote_d2f(ctx, insn->op1);
750 case IR_ADD:
751 case IR_SUB:
752 case IR_MUL:
753 case IR_DIV:
754 case IR_MIN:
755 case IR_MAX:
756 return ctx->use_lists[ref].count == 1 &&
757 ir_may_promote_d2f(ctx, insn->op1) &&
758 ir_may_promote_d2f(ctx, insn->op2);
759 default:
760 break;
761 }
762 }
763 return 0;
764 }
765
ir_may_promote_f2d(ir_ctx * ctx,ir_ref ref)766 static bool ir_may_promote_f2d(ir_ctx *ctx, ir_ref ref)
767 {
768 ir_insn *insn = &ctx->ir_base[ref];
769
770 IR_ASSERT(insn->type == IR_FLOAT);
771 if (IR_IS_CONST_REF(ref)) {
772 return !IR_IS_SYM_CONST(insn->op) && insn->val.f == (float)(double)insn->val.f;
773 } else {
774 switch (insn->op) {
775 case IR_FP2FP:
776 return 1;
777 case IR_INT2FP:
778 return ctx->use_lists[ref].count == 1;
779 case IR_NEG:
780 case IR_ABS:
781 return ctx->use_lists[ref].count == 1 &&
782 ir_may_promote_f2d(ctx, insn->op1);
783 case IR_ADD:
784 case IR_SUB:
785 case IR_MUL:
786 // case IR_DIV:
787 case IR_MIN:
788 case IR_MAX:
789 return ctx->use_lists[ref].count == 1 &&
790 ir_may_promote_f2d(ctx, insn->op1) &&
791 ir_may_promote_f2d(ctx, insn->op2);
792 default:
793 break;
794 }
795 }
796 return 0;
797 }
798
ir_promote_d2f(ir_ctx * ctx,ir_ref ref,ir_ref use)799 static ir_ref ir_promote_d2f(ir_ctx *ctx, ir_ref ref, ir_ref use)
800 {
801 ir_insn *insn = &ctx->ir_base[ref];
802 uint32_t count;
803
804 IR_ASSERT(insn->type == IR_DOUBLE);
805 if (IR_IS_CONST_REF(ref)) {
806 return ir_const_float(ctx, (float)insn->val.d);
807 } else {
808 switch (insn->op) {
809 case IR_FP2FP:
810 count = ctx->use_lists[ref].count;
811 ir_use_list_remove_all(ctx, ref, use);
812 if (ctx->use_lists[ref].count == 0) {
813 ir_use_list_replace_one(ctx, insn->op1, ref, use);
814 if (count > 1) {
815 do {
816 ir_use_list_add(ctx, insn->op1, use);
817 } while (--count > 1);
818 }
819 ref = insn->op1;
820 MAKE_NOP(insn);
821 return ref;
822 } else {
823 ir_use_list_add(ctx, insn->op1, use);
824 count -= ctx->use_lists[ref].count;
825 if (count > 1) {
826 do {
827 ir_use_list_add(ctx, insn->op1, use);
828 } while (--count > 1);
829 }
830 }
831 return insn->op1;
832 // case IR_INT2FP:
833 // insn->type = IR_FLOAT;
834 // return ref;
835 case IR_NEG:
836 case IR_ABS:
837 insn->op1 = ir_promote_d2f(ctx, insn->op1, ref);
838 insn->type = IR_FLOAT;
839 return ref;
840 case IR_ADD:
841 case IR_SUB:
842 case IR_MUL:
843 case IR_DIV:
844 case IR_MIN:
845 case IR_MAX:
846 if (insn->op1 == insn->op2) {
847 insn->op2 = insn->op1 = ir_promote_d2f(ctx, insn->op1, ref);
848 } else {
849 insn->op1 = ir_promote_d2f(ctx, insn->op1, ref);
850 insn->op2 = ir_promote_d2f(ctx, insn->op2, ref);
851 }
852 insn->type = IR_FLOAT;
853 return ref;
854 default:
855 break;
856 }
857 }
858 IR_ASSERT(0);
859 return ref;
860 }
861
ir_promote_f2d(ir_ctx * ctx,ir_ref ref,ir_ref use)862 static ir_ref ir_promote_f2d(ir_ctx *ctx, ir_ref ref, ir_ref use)
863 {
864 ir_insn *insn = &ctx->ir_base[ref];
865 uint32_t count;
866 ir_ref old_ref;
867
868 IR_ASSERT(insn->type == IR_FLOAT);
869 if (IR_IS_CONST_REF(ref)) {
870 return ir_const_double(ctx, (double)insn->val.f);
871 } else {
872 switch (insn->op) {
873 case IR_FP2FP:
874 count = ctx->use_lists[ref].count;
875 ir_use_list_remove_all(ctx, ref, use);
876 if (ctx->use_lists[ref].count == 0) {
877 ir_use_list_replace_one(ctx, insn->op1, ref, use);
878 if (count > 1) {
879 do {
880 ir_use_list_add(ctx, insn->op1, use);
881 } while (--count > 1);
882 }
883 ref = insn->op1;
884 MAKE_NOP(insn);
885 return ref;
886 } else {
887 ir_use_list_add(ctx, insn->op1, use);
888 count -= ctx->use_lists[ref].count;
889 if (count > 1) {
890 do {
891 ir_use_list_add(ctx, insn->op1, use);
892 } while (--count > 1);
893 }
894 }
895 return insn->op1;
896 case IR_INT2FP:
897 old_ref = ir_find1(ctx, IR_OPTX(IR_INT2FP, IR_DOUBLE, 1), insn->op1);
898 if (old_ref) {
899 IR_ASSERT(ctx->use_lists[ref].count == 1);
900 ir_use_list_remove_one(ctx, insn->op1, ref);
901 CLEAR_USES(ref);
902 MAKE_NOP(insn);
903 ir_use_list_add(ctx, old_ref, use);
904 return old_ref;
905 }
906 insn->type = IR_DOUBLE;
907 return ref;
908 case IR_NEG:
909 case IR_ABS:
910 insn->op1 = ir_promote_f2d(ctx, insn->op1, ref);
911 insn->type = IR_DOUBLE;
912 return ref;
913 case IR_ADD:
914 case IR_SUB:
915 case IR_MUL:
916 // case IR_DIV:
917 case IR_MIN:
918 case IR_MAX:
919 if (insn->op1 == insn->op2) {
920 insn->op2 = insn->op1 = ir_promote_f2d(ctx, insn->op1, ref);
921 } else {
922 insn->op1 = ir_promote_f2d(ctx, insn->op1, ref);
923 insn->op2 = ir_promote_f2d(ctx, insn->op2, ref);
924 }
925 insn->type = IR_DOUBLE;
926 return ref;
927 default:
928 break;
929 }
930 }
931 IR_ASSERT(0);
932 return ref;
933 }
934
ir_may_promote_i2i(ir_ctx * ctx,ir_type type,ir_ref ref)935 static bool ir_may_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref)
936 {
937 ir_insn *insn = &ctx->ir_base[ref];
938
939 if (IR_IS_CONST_REF(ref)) {
940 return !IR_IS_SYM_CONST(insn->op);
941 } else {
942 switch (insn->op) {
943 case IR_ZEXT:
944 case IR_SEXT:
945 return ctx->ir_base[insn->op1].type == type;
946 case IR_NEG:
947 case IR_ABS:
948 case IR_NOT:
949 return ctx->use_lists[ref].count == 1 &&
950 ir_may_promote_i2i(ctx, type, insn->op1);
951 case IR_ADD:
952 case IR_SUB:
953 case IR_MUL:
954 // case IR_DIV:
955 case IR_MIN:
956 case IR_MAX:
957 case IR_OR:
958 case IR_AND:
959 case IR_XOR:
960 return ctx->use_lists[ref].count == 1 &&
961 ir_may_promote_i2i(ctx, type, insn->op1) &&
962 ir_may_promote_i2i(ctx, type, insn->op2);
963 default:
964 break;
965 }
966 }
967 return 0;
968 }
969
ir_promote_i2i(ir_ctx * ctx,ir_type type,ir_ref ref,ir_ref use)970 static ir_ref ir_promote_i2i(ir_ctx *ctx, ir_type type, ir_ref ref, ir_ref use)
971 {
972 ir_insn *insn = &ctx->ir_base[ref];
973 uint32_t count;
974
975 if (IR_IS_CONST_REF(ref)) {
976 return ir_const(ctx, insn->val, type);
977 } else {
978 switch (insn->op) {
979 case IR_ZEXT:
980 case IR_SEXT:
981 count = ctx->use_lists[ref].count;
982 ir_use_list_remove_all(ctx, ref, use);
983 if (ctx->use_lists[ref].count == 0) {
984 ir_use_list_replace_one(ctx, insn->op1, ref, use);
985 if (count > 1) {
986 do {
987 ir_use_list_add(ctx, insn->op1, use);
988 } while (--count > 1);
989 }
990 ref = insn->op1;
991 MAKE_NOP(insn);
992 return ref;
993 } else {
994 ir_use_list_add(ctx, insn->op1, use);
995 count -= ctx->use_lists[ref].count;
996 if (count > 1) {
997 do {
998 ir_use_list_add(ctx, insn->op1, use);
999 } while (--count > 1);
1000 }
1001 }
1002 return insn->op1;
1003 case IR_NEG:
1004 case IR_ABS:
1005 case IR_NOT:
1006 insn->op1 = ir_promote_i2i(ctx, type, insn->op1, ref);
1007 insn->type = type;
1008 return ref;
1009 case IR_ADD:
1010 case IR_SUB:
1011 case IR_MUL:
1012 // case IR_DIV:
1013 case IR_MIN:
1014 case IR_MAX:
1015 case IR_OR:
1016 case IR_AND:
1017 case IR_XOR:
1018 if (insn->op1 == insn->op2) {
1019 insn->op2 = insn->op1 = ir_promote_i2i(ctx, type, insn->op1, ref);
1020 } else {
1021 insn->op1 = ir_promote_i2i(ctx, type, insn->op1, ref);
1022 insn->op2 = ir_promote_i2i(ctx, type, insn->op2, ref);
1023 }
1024 insn->type = type;
1025 return ref;
1026 default:
1027 break;
1028 }
1029 }
1030 IR_ASSERT(0);
1031 return ref;
1032 }
1033
ir_ext_const(ir_ctx * ctx,ir_insn * val_insn,ir_op op,ir_type type)1034 static ir_ref ir_ext_const(ir_ctx *ctx, ir_insn *val_insn, ir_op op, ir_type type)
1035 {
1036 ir_val new_val;
1037
1038 switch (val_insn->type) {
1039 default:
1040 IR_ASSERT(0);
1041 case IR_I8:
1042 case IR_U8:
1043 case IR_BOOL:
1044 if (op == IR_SEXT) {
1045 new_val.i64 = (int64_t)val_insn->val.i8;
1046 } else {
1047 new_val.u64 = (uint64_t)val_insn->val.u8;
1048 }
1049 break;
1050 case IR_I16:
1051 case IR_U16:
1052 if (op == IR_SEXT) {
1053 new_val.i64 = (int64_t)val_insn->val.i16;
1054 } else {
1055 new_val.u64 = (uint64_t)val_insn->val.u16;
1056 }
1057 break;
1058 case IR_I32:
1059 case IR_U32:
1060 if (op == IR_SEXT) {
1061 new_val.i64 = (int64_t)val_insn->val.i32;
1062 } else {
1063 new_val.u64 = (uint64_t)val_insn->val.u32;
1064 }
1065 break;
1066 }
1067 return ir_const(ctx, new_val, type);
1068 }
1069
ir_ext_ref(ir_ctx * ctx,ir_ref var_ref,ir_ref src_ref,ir_op op,ir_type type)1070 static ir_ref ir_ext_ref(ir_ctx *ctx, ir_ref var_ref, ir_ref src_ref, ir_op op, ir_type type)
1071 {
1072 uint32_t optx = IR_OPTX(op, type, 1);
1073 ir_ref ref;
1074
1075 if (!IR_IS_CONST_REF(src_ref)) {
1076 ref = ir_find1(ctx, optx, src_ref);
1077 if (ref) {
1078 ir_use_list_add(ctx, ref, var_ref);
1079 if (!IR_IS_CONST_REF(src_ref)) {
1080 ir_use_list_remove_one(ctx, src_ref, var_ref);
1081 }
1082 return ref;
1083 }
1084 }
1085
1086 ref = ir_emit1(ctx, optx, src_ref);
1087 ctx->use_lists = ir_mem_realloc(ctx->use_lists, ctx->insns_count * sizeof(ir_use_list));
1088 ctx->use_lists[ref].count = 0;
1089 ctx->use_lists[ref].refs = IR_UNUSED;
1090 ir_use_list_add(ctx, ref, var_ref);
1091 if (!IR_IS_CONST_REF(src_ref)) {
1092 ir_use_list_replace_one(ctx, src_ref, var_ref, ref);
1093 }
1094 return ref;
1095 }
1096
ir_try_promote_ext(ir_ctx * ctx,ir_ref ext_ref,ir_insn * insn,ir_bitqueue * worklist)1097 static bool ir_try_promote_ext(ir_ctx *ctx, ir_ref ext_ref, ir_insn *insn, ir_bitqueue *worklist)
1098 {
1099 ir_type type = insn->type;
1100 ir_op op = insn->op;
1101 ir_ref ref = insn->op1;
1102 ir_insn *phi_insn = &ctx->ir_base[ref];
1103 ir_insn *op_insn;
1104 ir_use_list *use_list;
1105 ir_ref n, *p, use, op_ref;
1106
1107 /* Check for simple induction variable in the form: x2 = PHI(loop, x1, x3); x3 = ADD(x2, _); */
1108 if (phi_insn->op != IR_PHI
1109 || phi_insn->inputs_count != 3 /* (2 values) */
1110 || ctx->ir_base[phi_insn->op1].op != IR_LOOP_BEGIN) {
1111 return 0;
1112 }
1113
1114 op_ref = phi_insn->op3;
1115 op_insn = &ctx->ir_base[op_ref];
1116 if ((op_insn->op != IR_ADD && op_insn->op != IR_SUB && op_insn->op != IR_MUL)
1117 || (op_insn->op1 != ref && op_insn->op2 != ref)
1118 || ctx->use_lists[op_ref].count != 1) {
1119 return 0;
1120 }
1121
1122 /* Check if we may change the type of the induction variable */
1123 use_list = &ctx->use_lists[ref];
1124 n = use_list->count;
1125 for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
1126 use = *p;
1127 if (use == op_ref || use == ext_ref) {
1128 continue;
1129 } else {
1130 ir_insn *use_insn = &ctx->ir_base[use];
1131
1132 if ((use_insn->op >= IR_EQ && use_insn->op <= IR_UGT)
1133 && (use_insn->op1 == ref || use_insn->op2 == ref)) {
1134 continue;
1135 } else if (use_insn->op == IR_IF) {
1136 continue;
1137 } else {
1138 return 0;
1139 }
1140 }
1141 }
1142
1143 phi_insn->type = insn->type;
1144 op_insn->type = insn->type;
1145
1146 for (n = 0; n < ctx->use_lists[ref].count; n++) {
1147 /* "use_lists" may be reallocated by ir_ext_ref() */
1148 use = ctx->use_edges[ctx->use_lists[ref].refs + n];
1149 if (use == ext_ref) {
1150 continue;
1151 } else {
1152 ir_insn *use_insn = &ctx->ir_base[use];
1153
1154 if (use_insn->op == IR_IF) {
1155 continue;
1156 }
1157 IR_ASSERT(((use_insn->op >= IR_EQ && use_insn->op <= IR_UGT)
1158 || use_insn->op == IR_ADD || use_insn->op == IR_SUB || use_insn->op == IR_MUL)
1159 && (use_insn->op1 == ref || use_insn->op2 == ref));
1160 if (use_insn->op1 != ref) {
1161 if (IR_IS_CONST_REF(use_insn->op1)
1162 && !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op1].op)) {
1163 ctx->ir_base[use].op1 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op1], op, type);
1164 } else {
1165 ctx->ir_base[use].op1 = ir_ext_ref(ctx, use, use_insn->op1, op, type);
1166 ir_bitqueue_add(worklist, ctx->ir_base[use].op1);
1167 }
1168 }
1169 if (use_insn->op2 != ref) {
1170 if (IR_IS_CONST_REF(use_insn->op2)
1171 && !IR_IS_SYM_CONST(ctx->ir_base[use_insn->op2].op)) {
1172 ctx->ir_base[use].op2 = ir_ext_const(ctx, &ctx->ir_base[use_insn->op2], op, type);
1173 } else {
1174 ctx->ir_base[use].op2 = ir_ext_ref(ctx, use, use_insn->op2, op, type);
1175 ir_bitqueue_add(worklist, ctx->ir_base[use].op2);
1176 }
1177 }
1178 }
1179 }
1180
1181 ir_sccp_replace_insn2(ctx, ext_ref, ref, worklist);
1182
1183 phi_insn = &ctx->ir_base[ref];
1184 if (IR_IS_CONST_REF(phi_insn->op2)
1185 && !IR_IS_SYM_CONST(ctx->ir_base[phi_insn->op2].op)) {
1186 ctx->ir_base[ref].op2 = ir_ext_const(ctx, &ctx->ir_base[phi_insn->op2], op, type);
1187 } else {
1188 ctx->ir_base[ref].op2 = ir_ext_ref(ctx, ref, phi_insn->op2, op, type);
1189 ir_bitqueue_add(worklist, ctx->ir_base[ref].op2);
1190 }
1191
1192 return 1;
1193 }
1194
ir_get_true_false_refs(const ir_ctx * ctx,ir_ref if_ref,ir_ref * if_true_ref,ir_ref * if_false_ref)1195 static void ir_get_true_false_refs(const ir_ctx *ctx, ir_ref if_ref, ir_ref *if_true_ref, ir_ref *if_false_ref)
1196 {
1197 ir_use_list *use_list = &ctx->use_lists[if_ref];
1198 ir_ref *p = &ctx->use_edges[use_list->refs];
1199
1200 IR_ASSERT(use_list->count == 2);
1201 if (ctx->ir_base[*p].op == IR_IF_TRUE) {
1202 IR_ASSERT(ctx->ir_base[*(p + 1)].op == IR_IF_FALSE);
1203 *if_true_ref = *p;
1204 *if_false_ref = *(p + 1);
1205 } else {
1206 IR_ASSERT(ctx->ir_base[*p].op == IR_IF_FALSE);
1207 IR_ASSERT(ctx->ir_base[*(p + 1)].op == IR_IF_TRUE);
1208 *if_false_ref = *p;
1209 *if_true_ref = *(p + 1);
1210 }
1211 }
1212
ir_merge_blocks(ir_ctx * ctx,ir_ref end,ir_ref begin,ir_bitqueue * worklist2)1213 static void ir_merge_blocks(ir_ctx *ctx, ir_ref end, ir_ref begin, ir_bitqueue *worklist2)
1214 {
1215 ir_ref prev, next;
1216 ir_use_list *use_list;
1217
1218 IR_ASSERT(ctx->ir_base[begin].op == IR_BEGIN);
1219 IR_ASSERT(ctx->ir_base[end].op == IR_END);
1220 IR_ASSERT(ctx->ir_base[begin].op1 == end);
1221 IR_ASSERT(ctx->use_lists[end].count == 1);
1222
1223 prev = ctx->ir_base[end].op1;
1224
1225 use_list = &ctx->use_lists[begin];
1226 IR_ASSERT(use_list->count == 1);
1227 next = ctx->use_edges[use_list->refs];
1228
1229 /* remove BEGIN and END */
1230 MAKE_NOP(&ctx->ir_base[begin]); CLEAR_USES(begin);
1231 MAKE_NOP(&ctx->ir_base[end]); CLEAR_USES(end);
1232
1233 /* connect their predecessor and successor */
1234 ctx->ir_base[next].op1 = prev;
1235 ir_use_list_replace_all(ctx, prev, end, next);
1236
1237 if (ctx->ir_base[prev].op == IR_BEGIN || ctx->ir_base[prev].op == IR_MERGE) {
1238 ir_bitqueue_add(worklist2, prev);
1239 }
1240 }
1241
ir_try_remove_empty_diamond(ir_ctx * ctx,ir_ref ref,ir_insn * insn,ir_bitqueue * worklist)1242 static bool ir_try_remove_empty_diamond(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
1243 {
1244 if (insn->inputs_count == 2) {
1245 ir_ref end1_ref = insn->op1, end2_ref = insn->op2;
1246 ir_insn *end1 = &ctx->ir_base[end1_ref];
1247 ir_insn *end2 = &ctx->ir_base[end2_ref];
1248
1249 if (end1->op != IR_END || end2->op != IR_END) {
1250 return 0;
1251 }
1252
1253 ir_ref start1_ref = end1->op1, start2_ref = end2->op1;
1254 ir_insn *start1 = &ctx->ir_base[start1_ref];
1255 ir_insn *start2 = &ctx->ir_base[start2_ref];
1256
1257 if (start1->op1 != start2->op1) {
1258 return 0;
1259 }
1260
1261 ir_ref root_ref = start1->op1;
1262 ir_insn *root = &ctx->ir_base[root_ref];
1263
1264 if (root->op != IR_IF
1265 && !(root->op == IR_SWITCH && ctx->use_lists[root_ref].count == 2)) {
1266 return 0;
1267 }
1268
1269 /* Empty Diamond
1270 *
1271 * prev prev
1272 * | condition | condition
1273 * | / |
1274 * IF |
1275 * | \ |
1276 * | +-----+ |
1277 * | IF_FALSE |
1278 * IF_TRUE | => |
1279 * | END |
1280 * END / |
1281 * | +---+ |
1282 * | / |
1283 * MERGE |
1284 * | |
1285 * next next
1286 */
1287
1288 ir_ref next_ref = ctx->use_edges[ctx->use_lists[ref].refs];
1289 ir_insn *next = &ctx->ir_base[next_ref];
1290
1291 IR_ASSERT(ctx->use_lists[start1_ref].count == 1);
1292 IR_ASSERT(ctx->use_lists[start2_ref].count == 1);
1293
1294 next->op1 = root->op1;
1295 ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
1296 if (!IR_IS_CONST_REF(root->op2)) {
1297 ir_use_list_remove_all(ctx, root->op2, root_ref);
1298 if (ir_is_dead(ctx, root->op2)) {
1299 ir_bitqueue_add(worklist, root->op2);
1300 }
1301 }
1302
1303 MAKE_NOP(root); CLEAR_USES(root_ref);
1304 MAKE_NOP(start1); CLEAR_USES(start1_ref);
1305 MAKE_NOP(start2); CLEAR_USES(start2_ref);
1306 MAKE_NOP(end1); CLEAR_USES(end1_ref);
1307 MAKE_NOP(end2); CLEAR_USES(end2_ref);
1308 MAKE_NOP(insn); CLEAR_USES(ref);
1309
1310 if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) {
1311 ir_bitqueue_add(worklist, next->op1);
1312 }
1313
1314 return 1;
1315 } else {
1316 ir_ref i, count = insn->inputs_count, *ops = insn->ops + 1;
1317 ir_ref root_ref = IR_UNUSED;
1318
1319 for (i = 0; i < count; i++) {
1320 ir_ref end_ref, start_ref;
1321 ir_insn *end, *start;
1322
1323 end_ref = ops[i];
1324 end = &ctx->ir_base[end_ref];
1325 if (end->op != IR_END) {
1326 return 0;
1327 }
1328 start_ref = end->op1;
1329 start = &ctx->ir_base[start_ref];
1330 if (start->op != IR_CASE_VAL && start->op != IR_CASE_DEFAULT) {
1331 return 0;
1332 }
1333 IR_ASSERT(ctx->use_lists[start_ref].count == 1);
1334 if (!root_ref) {
1335 root_ref = start->op1;
1336 if (ctx->use_lists[root_ref].count != count) {
1337 return 0;
1338 }
1339 } else if (start->op1 != root_ref) {
1340 return 0;
1341 }
1342 }
1343
1344 /* Empty N-Diamond */
1345 ir_ref next_ref = ctx->use_edges[ctx->use_lists[ref].refs];
1346 ir_insn *next = &ctx->ir_base[next_ref];
1347 ir_insn *root = &ctx->ir_base[root_ref];
1348
1349 next->op1 = root->op1;
1350 ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
1351
1352 if (!IR_IS_CONST_REF(root->op2)) {
1353 ir_use_list_remove_all(ctx, root->op2, root_ref);
1354 if (ir_is_dead(ctx, root->op2)) {
1355 ir_bitqueue_add(worklist, root->op2);
1356 }
1357 }
1358
1359 MAKE_NOP(root); CLEAR_USES(root_ref);
1360
1361 for (i = 0; i < count; i++) {
1362 ir_ref end_ref = ops[i];
1363 ir_insn *end = &ctx->ir_base[end_ref];
1364 ir_ref start_ref = end->op1;
1365 ir_insn *start = &ctx->ir_base[start_ref];
1366
1367 MAKE_NOP(start); CLEAR_USES(start_ref);
1368 MAKE_NOP(end); CLEAR_USES(end_ref);
1369 }
1370
1371 MAKE_NOP(insn); CLEAR_USES(ref);
1372
1373 if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) {
1374 ir_bitqueue_add(worklist, next->op1);
1375 }
1376
1377 return 1;
1378 }
1379 }
1380
ir_is_zero(ir_ctx * ctx,ir_ref ref)1381 static bool ir_is_zero(ir_ctx *ctx, ir_ref ref)
1382 {
1383 return IR_IS_CONST_REF(ref)
1384 && !IR_IS_SYM_CONST(ctx->ir_base[ref].op)
1385 && ctx->ir_base[ref].val.u32 == 0;
1386 }
1387
ir_optimize_phi(ir_ctx * ctx,ir_ref merge_ref,ir_insn * merge,ir_ref ref,ir_insn * insn,ir_bitqueue * worklist)1388 static bool ir_optimize_phi(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
1389 {
1390 IR_ASSERT(insn->inputs_count == 3);
1391 IR_ASSERT(ctx->use_lists[merge_ref].count == 2);
1392
1393 ir_ref end1_ref = merge->op1, end2_ref = merge->op2;
1394 ir_insn *end1 = &ctx->ir_base[end1_ref];
1395 ir_insn *end2 = &ctx->ir_base[end2_ref];
1396
1397 if (end1->op == IR_END && end2->op == IR_END) {
1398 ir_ref start1_ref = end1->op1, start2_ref = end2->op1;
1399 ir_insn *start1 = &ctx->ir_base[start1_ref];
1400 ir_insn *start2 = &ctx->ir_base[start2_ref];
1401
1402 if (start1->op1 == start2->op1) {
1403 ir_ref root_ref = start1->op1;
1404 ir_insn *root = &ctx->ir_base[root_ref];
1405
1406 if (root->op == IR_IF && ctx->use_lists[root->op2].count == 1) {
1407 ir_ref cond_ref = root->op2;
1408 ir_insn *cond = &ctx->ir_base[cond_ref];
1409 ir_type type = insn->type;
1410 bool is_cmp, is_less;
1411
1412 if (IR_IS_TYPE_FP(type)) {
1413 is_cmp = (cond->op == IR_LT || cond->op == IR_LE || cond->op == IR_GT || cond->op == IR_GE ||
1414 cond->op == IR_ULT || cond->op == IR_ULE || cond->op == IR_UGT || cond->op == IR_UGE);
1415 is_less = (cond->op == IR_LT || cond->op == IR_LE ||
1416 cond->op == IR_ULT || cond->op == IR_ULE);
1417 } else if (IR_IS_TYPE_SIGNED(type)) {
1418 is_cmp = (cond->op == IR_LT || cond->op == IR_LE || cond->op == IR_GT || cond->op == IR_GE);
1419 is_less = (cond->op == IR_LT || cond->op == IR_LE);
1420 } else {
1421 IR_ASSERT(IR_IS_TYPE_UNSIGNED(type));
1422 is_cmp = (cond->op == IR_ULT || cond->op == IR_ULE || cond->op == IR_UGT || cond->op == IR_UGE);
1423 is_less = (cond->op == IR_ULT || cond->op == IR_ULE);
1424 }
1425
1426 if (is_cmp
1427 && ((insn->op2 == cond->op1 && insn->op3 == cond->op2)
1428 || (insn->op2 == cond->op2 && insn->op3 == cond->op1))) {
1429 /* MAX/MIN
1430 *
1431 * prev prev
1432 * | LT(A, B) |
1433 * | / |
1434 * IF |
1435 * | \ |
1436 * | +-----+ |
1437 * | IF_FALSE |
1438 * IF_TRUE | => |
1439 * | END |
1440 * END / |
1441 * | +---+ |
1442 * | / |
1443 * MERGE |
1444 * | \ |
1445 * | PHI(A, B) | MIN(A, B)
1446 * next next
1447 */
1448 ir_ref next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs];
1449 ir_insn *next;
1450
1451 if (next_ref == ref) {
1452 next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs + 1];
1453 }
1454 next = &ctx->ir_base[next_ref];
1455
1456 IR_ASSERT(ctx->use_lists[start1_ref].count == 1);
1457 IR_ASSERT(ctx->use_lists[start2_ref].count == 1);
1458
1459 insn->op = (
1460 (is_less ? cond->op1 : cond->op2)
1461 ==
1462 ((start1->op == IR_IF_TRUE) ? insn->op2 : insn->op3)
1463 ) ? IR_MIN : IR_MAX;
1464 insn->inputs_count = 2;
1465 if (insn->op2 > insn->op3) {
1466 insn->op1 = insn->op2;
1467 insn->op2 = insn->op3;
1468 } else {
1469 insn->op1 = insn->op3;
1470 }
1471 insn->op3 = IR_UNUSED;
1472
1473 next->op1 = root->op1;
1474 ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
1475 if (!IR_IS_CONST_REF(insn->op1)) {
1476 ir_use_list_remove_all(ctx, insn->op1, cond_ref);
1477 }
1478 if (!IR_IS_CONST_REF(insn->op2)) {
1479 ir_use_list_remove_all(ctx, insn->op2, cond_ref);
1480 }
1481
1482 MAKE_NOP(cond); CLEAR_USES(cond_ref);
1483 MAKE_NOP(root); CLEAR_USES(root_ref);
1484 MAKE_NOP(start1); CLEAR_USES(start1_ref);
1485 MAKE_NOP(start2); CLEAR_USES(start2_ref);
1486 MAKE_NOP(end1); CLEAR_USES(end1_ref);
1487 MAKE_NOP(end2); CLEAR_USES(end2_ref);
1488 MAKE_NOP(merge); CLEAR_USES(merge_ref);
1489
1490 if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) {
1491 ir_bitqueue_add(worklist, next->op1);
1492 }
1493
1494 return 1;
1495 } else if (is_cmp
1496 && ((ctx->ir_base[insn->op2].op == IR_NEG
1497 && ctx->use_lists[insn->op2].count == 1
1498 && ctx->ir_base[insn->op2].op1 == insn->op3
1499 && ((cond->op1 == insn->op3
1500 && ir_is_zero(ctx, cond->op2)
1501 && is_less == (start1->op == IR_IF_TRUE))
1502 || (cond->op2 == insn->op3
1503 && ir_is_zero(ctx, cond->op1)
1504 && is_less != (start1->op == IR_IF_TRUE))))
1505 || (ctx->ir_base[insn->op3].op == IR_NEG
1506 && ctx->use_lists[insn->op3].count == 1
1507 && ctx->ir_base[insn->op3].op1 == insn->op2
1508 && ((cond->op1 == insn->op2
1509 && ir_is_zero(ctx, cond->op2)
1510 && is_less != (start1->op == IR_IF_TRUE))
1511 || (cond->op2 == insn->op2
1512 && ir_is_zero(ctx, cond->op1)
1513 && is_less == (start1->op == IR_IF_TRUE)))))) {
1514 /* ABS
1515 *
1516 * prev prev
1517 * | LT(A, 0) |
1518 * | / |
1519 * IF |
1520 * | \ |
1521 * | +-----+ |
1522 * | IF_FALSE |
1523 * IF_TRUE | => |
1524 * | END |
1525 * END / |
1526 * | +---+ |
1527 * | / |
1528 * MERGE |
1529 * | \ |
1530 * | PHI(A, NEG(A)) | ABS(A)
1531 * next next
1532 */
1533 ir_ref neg_ref;
1534 ir_ref next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs];
1535 ir_insn *next;
1536
1537 if (next_ref == ref) {
1538 next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs + 1];
1539 }
1540 next = &ctx->ir_base[next_ref];
1541
1542 IR_ASSERT(ctx->use_lists[start1_ref].count == 1);
1543 IR_ASSERT(ctx->use_lists[start2_ref].count == 1);
1544
1545 insn->op = IR_ABS;
1546 insn->inputs_count = 1;
1547 if (ctx->ir_base[insn->op2].op == IR_NEG) {
1548 neg_ref = insn->op2;
1549 insn->op1 = insn->op3;
1550 } else {
1551 neg_ref = insn->op3;
1552 insn->op1 = insn->op2;
1553 }
1554 insn->op2 = IR_UNUSED;
1555 insn->op3 = IR_UNUSED;
1556
1557 next->op1 = root->op1;
1558 ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
1559 ir_use_list_remove_one(ctx, insn->op1, neg_ref);
1560 if (!IR_IS_CONST_REF(insn->op1)) {
1561 ir_use_list_remove_all(ctx, insn->op1, cond_ref);
1562 }
1563
1564 MAKE_NOP(cond); CLEAR_USES(cond_ref);
1565 MAKE_NOP(root); CLEAR_USES(root_ref);
1566 MAKE_NOP(start1); CLEAR_USES(start1_ref);
1567 MAKE_NOP(start2); CLEAR_USES(start2_ref);
1568 MAKE_NOP(end1); CLEAR_USES(end1_ref);
1569 MAKE_NOP(end2); CLEAR_USES(end2_ref);
1570 MAKE_NOP(merge); CLEAR_USES(merge_ref);
1571 MAKE_NOP(&ctx->ir_base[neg_ref]); CLEAR_USES(neg_ref);
1572
1573 if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) {
1574 ir_bitqueue_add(worklist, next->op1);
1575 }
1576
1577 return 1;
1578 #if 0
1579 } else {
1580 /* COND
1581 *
1582 * prev prev
1583 * | cond |
1584 * | / |
1585 * IF |
1586 * | \ |
1587 * | +-----+ |
1588 * | IF_FALSE |
1589 * IF_TRUE | => |
1590 * | END |
1591 * END / |
1592 * | +---+ |
1593 * | / |
1594 * MERGE |
1595 * | \ |
1596 * | PHI(A, B) | COND(cond, A, B)
1597 * next next
1598 */
1599 ir_ref next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs];
1600 ir_insn *next;
1601
1602 if (next_ref == ref) {
1603 next_ref = ctx->use_edges[ctx->use_lists[merge_ref].refs + 1];
1604 }
1605 next = &ctx->ir_base[next_ref];
1606
1607 IR_ASSERT(ctx->use_lists[start1_ref].count == 1);
1608 IR_ASSERT(ctx->use_lists[start2_ref].count == 1);
1609
1610 insn->op = IR_COND;
1611 insn->inputs_count = 3;
1612 insn->op1 = cond_ref;
1613 if (start1->op == IR_IF_FALSE) {
1614 SWAP_REFS(insn->op2, insn->op3);
1615 }
1616
1617 next->op1 = root->op1;
1618 ir_use_list_replace_one(ctx, cond_ref, root_ref, ref);
1619 ir_use_list_replace_one(ctx, root->op1, root_ref, next_ref);
1620 ir_use_list_remove_all(ctx, root->op2, root_ref);
1621
1622 MAKE_NOP(root); CLEAR_USES(root_ref);
1623 MAKE_NOP(start1); CLEAR_USES(start1_ref);
1624 MAKE_NOP(start2); CLEAR_USES(start2_ref);
1625 MAKE_NOP(end1); CLEAR_USES(end1_ref);
1626 MAKE_NOP(end2); CLEAR_USES(end2_ref);
1627 MAKE_NOP(merge); CLEAR_USES(merge_ref);
1628
1629 if (ctx->ir_base[next->op1].op == IR_BEGIN || ctx->ir_base[next->op1].op == IR_MERGE) {
1630 ir_bitqueue_add(worklist, next->op1);
1631 }
1632
1633 return 1;
1634 #endif
1635 }
1636 }
1637 }
1638 }
1639
1640 return 0;
1641 }
1642
ir_cmp_is_true(ir_op op,ir_insn * op1,ir_insn * op2)1643 static bool ir_cmp_is_true(ir_op op, ir_insn *op1, ir_insn *op2)
1644 {
1645 IR_ASSERT(op1->type == op2->type);
1646 if (IR_IS_TYPE_INT(op1->type)) {
1647 if (op == IR_EQ) {
1648 return op1->val.u64 == op2->val.u64;
1649 } else if (op == IR_NE) {
1650 return op1->val.u64 != op2->val.u64;
1651 } else if (op == IR_LT) {
1652 if (IR_IS_TYPE_SIGNED(op1->type)) {
1653 return op1->val.i64 < op2->val.i64;
1654 } else {
1655 return op1->val.u64 < op2->val.u64;
1656 }
1657 } else if (op == IR_GE) {
1658 if (IR_IS_TYPE_SIGNED(op1->type)) {
1659 return op1->val.i64 >= op2->val.i64;
1660 } else {
1661 return op1->val.u64 >= op2->val.u64;
1662 }
1663 } else if (op == IR_LE) {
1664 if (IR_IS_TYPE_SIGNED(op1->type)) {
1665 return op1->val.i64 <= op2->val.i64;
1666 } else {
1667 return op1->val.u64 <= op2->val.u64;
1668 }
1669 } else if (op == IR_GT) {
1670 if (IR_IS_TYPE_SIGNED(op1->type)) {
1671 return op1->val.i64 > op2->val.i64;
1672 } else {
1673 return op1->val.u64 > op2->val.u64;
1674 }
1675 } else if (op == IR_ULT) {
1676 return op1->val.u64 < op2->val.u64;
1677 } else if (op == IR_UGE) {
1678 return op1->val.u64 >= op2->val.u64;
1679 } else if (op == IR_ULE) {
1680 return op1->val.u64 <= op2->val.u64;
1681 } else if (op == IR_UGT) {
1682 return op1->val.u64 > op2->val.u64;
1683 } else {
1684 IR_ASSERT(0);
1685 return 0;
1686 }
1687 } else if (op1->type == IR_DOUBLE) {
1688 if (op == IR_EQ) {
1689 return op1->val.d == op2->val.d;
1690 } else if (op == IR_NE) {
1691 return op1->val.d != op2->val.d;
1692 } else if (op == IR_LT) {
1693 return op1->val.d < op2->val.d;
1694 } else if (op == IR_GE) {
1695 return op1->val.d >= op2->val.d;
1696 } else if (op == IR_LE) {
1697 return op1->val.d <= op2->val.d;
1698 } else if (op == IR_GT) {
1699 return op1->val.d > op2->val.d;
1700 } else if (op == IR_ULT) {
1701 return !(op1->val.d >= op2->val.d);
1702 } else if (op == IR_UGE) {
1703 return !(op1->val.d < op2->val.d);
1704 } else if (op == IR_ULE) {
1705 return !(op1->val.d > op2->val.d);
1706 } else if (op == IR_UGT) {
1707 return !(op1->val.d <= op2->val.d);
1708 } else {
1709 IR_ASSERT(0);
1710 return 0;
1711 }
1712 } else {
1713 IR_ASSERT(op1->type == IR_FLOAT);
1714 if (op == IR_EQ) {
1715 return op1->val.f == op2->val.f;
1716 } else if (op == IR_NE) {
1717 return op1->val.f != op2->val.f;
1718 } else if (op == IR_LT) {
1719 return op1->val.f < op2->val.f;
1720 } else if (op == IR_GE) {
1721 return op1->val.f >= op2->val.f;
1722 } else if (op == IR_LE) {
1723 return op1->val.f <= op2->val.f;
1724 } else if (op == IR_GT) {
1725 return op1->val.f > op2->val.f;
1726 } else if (op == IR_ULT) {
1727 return !(op1->val.f >= op2->val.f);
1728 } else if (op == IR_UGE) {
1729 return !(op1->val.f < op2->val.f);
1730 } else if (op == IR_ULE) {
1731 return !(op1->val.f > op2->val.f);
1732 } else if (op == IR_UGT) {
1733 return !(op1->val.f <= op2->val.f);
1734 } else {
1735 IR_ASSERT(0);
1736 return 0;
1737 }
1738 }
1739 }
1740
ir_try_split_if(ir_ctx * ctx,ir_ref ref,ir_insn * insn,ir_bitqueue * worklist)1741 static bool ir_try_split_if(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
1742 {
1743 ir_ref cond_ref = insn->op2;
1744 ir_insn *cond = &ctx->ir_base[cond_ref];
1745
1746 if (cond->op == IR_PHI
1747 && cond->inputs_count == 3
1748 && cond->op1 == insn->op1
1749 && ((IR_IS_CONST_REF(cond->op2) && !IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op))
1750 || (IR_IS_CONST_REF(cond->op3) && !IR_IS_SYM_CONST(ctx->ir_base[cond->op3].op)))) {
1751 ir_ref merge_ref = insn->op1;
1752 ir_insn *merge = &ctx->ir_base[merge_ref];
1753
1754 if (ctx->use_lists[merge_ref].count == 2) {
1755 ir_ref end1_ref = merge->op1, end2_ref = merge->op2;
1756 ir_insn *end1 = &ctx->ir_base[end1_ref];
1757 ir_insn *end2 = &ctx->ir_base[end2_ref];
1758
1759 if (end1->op == IR_END && end2->op == IR_END) {
1760 ir_ref if_true_ref, if_false_ref;
1761 ir_insn *if_true, *if_false;
1762 ir_op op = IR_IF_FALSE;
1763
1764 ir_get_true_false_refs(ctx, ref, &if_true_ref, &if_false_ref);
1765
1766 if (!IR_IS_CONST_REF(cond->op2) || IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op)) {
1767 IR_ASSERT(IR_IS_CONST_REF(cond->op3));
1768 SWAP_REFS(cond->op2, cond->op3);
1769 SWAP_REFS(merge->op1, merge->op2);
1770 SWAP_REFS(end1_ref, end2_ref);
1771 SWAP_INSNS(end1, end2);
1772 }
1773 if (ir_const_is_true(&ctx->ir_base[cond->op2])) {
1774 SWAP_REFS(if_true_ref, if_false_ref);
1775 op = IR_IF_TRUE;
1776 }
1777 if_true = &ctx->ir_base[if_true_ref];
1778 if_false = &ctx->ir_base[if_false_ref];
1779
1780 if (IR_IS_CONST_REF(cond->op3) && !IR_IS_SYM_CONST(ctx->ir_base[cond->op3].op)) {
1781 if (ir_const_is_true(&ctx->ir_base[cond->op3]) ^ (op == IR_IF_TRUE)) {
1782 /* Simple IF Split
1783 *
1784 * | | | |
1785 * | END | END
1786 * END / END \
1787 * | +---+ | +
1788 * | / | |
1789 * MERGE | |
1790 * | \ | |
1791 * | PHI(false, true) | |
1792 * | / | |
1793 * IF => | |
1794 * | \ | |
1795 * | +------+ | |
1796 * | IF_TRUE | BEGIN
1797 * IF_FALSE | BEGIN
1798 * | |
1799 */
1800 ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref);
1801 ir_use_list_replace_one(ctx, end2_ref, merge_ref, if_true_ref);
1802
1803 MAKE_NOP(merge); CLEAR_USES(merge_ref);
1804 MAKE_NOP(cond); CLEAR_USES(cond_ref);
1805 MAKE_NOP(insn); CLEAR_USES(ref);
1806
1807 if_false->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1);
1808 if_false->op1 = end1_ref;
1809
1810 if_true->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1);
1811 if_true->op1 = end2_ref;
1812
1813 ir_bitqueue_add(worklist, if_false_ref);
1814 ir_bitqueue_add(worklist, if_true_ref);
1815
1816 return 1;
1817 } else {
1818 /* Simple IF Split
1819 *
1820 * | | | |
1821 * | END | END
1822 * END / END |
1823 * | +---+ | |
1824 * | / | |
1825 * MERGE | +
1826 * | \ | /
1827 * | PHI(false, false) | /
1828 * | / | /
1829 * IF => | /
1830 * | \ | /
1831 * | +------+ | /
1832 * | IF_TRUE | / BEGIN(unreachable)
1833 * IF_FALSE | MERGE
1834 * | |
1835 */
1836 ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref);
1837 ir_use_list_replace_one(ctx, end2_ref, merge_ref, if_false_ref);
1838
1839 MAKE_NOP(merge); CLEAR_USES(merge_ref);
1840 MAKE_NOP(cond); CLEAR_USES(cond_ref);
1841 MAKE_NOP(insn); CLEAR_USES(ref);
1842
1843 if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2);
1844 if_false->op1 = end1_ref;
1845 if_false->op2 = end2_ref;
1846
1847 if_true->optx = IR_BEGIN;
1848 if_true->op1 = IR_UNUSED;
1849
1850 ctx->flags2 &= ~IR_CFG_REACHABLE;
1851
1852 ir_bitqueue_add(worklist, if_false_ref);
1853
1854 return 1;
1855 }
1856 }
1857
1858 /* Simple IF Split
1859 *
1860 * | | | |
1861 * | END | IF(X)
1862 * END / END / \
1863 * | +---+ | +--+ +
1864 * | / | / |
1865 * MERGE | IF_FALSE |
1866 * | \ | | |
1867 * | PHI(false, X) | | |
1868 * | / | | |
1869 * IF => | END |
1870 * | \ | | |
1871 * | +------+ | | |
1872 * | IF_TRUE | | IF_TRUE
1873 * IF_FALSE | MERGE
1874 * | |
1875 */
1876 ir_use_list_remove_all(ctx, merge_ref, cond_ref);
1877 ir_use_list_remove_all(ctx, ref, if_true_ref);
1878 if (!IR_IS_CONST_REF(cond->op3)) {
1879 ir_use_list_replace_one(ctx, cond->op3, cond_ref, end2_ref);
1880 }
1881 ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref);
1882 ir_use_list_add(ctx, end2_ref, if_true_ref);
1883
1884 end2->optx = IR_OPTX(IR_IF, IR_VOID, 2);
1885 end2->op2 = cond->op3;
1886
1887 merge->optx = IR_OPTX(op, IR_VOID, 1);
1888 merge->op1 = end2_ref;
1889 merge->op2 = IR_UNUSED;
1890
1891 MAKE_NOP(cond);
1892 CLEAR_USES(cond_ref);
1893
1894 insn->optx = IR_OPTX(IR_END, IR_VOID, 1);
1895 insn->op1 = merge_ref;
1896 insn->op2 = IR_UNUSED;
1897
1898 if_true->op1 = end2_ref;
1899
1900 if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2);
1901 if_false->op1 = end1_ref;
1902 if_false->op2 = ref;
1903
1904 ir_bitqueue_add(worklist, if_false_ref);
1905 if (ctx->ir_base[end2->op1].op == IR_BEGIN || ctx->ir_base[end2->op1].op == IR_MERGE) {
1906 ir_bitqueue_add(worklist, end2->op1);
1907 }
1908
1909 return 1;
1910 }
1911 }
1912 }
1913
1914 return 0;
1915 }
1916
ir_try_split_if_cmp(ir_ctx * ctx,ir_ref ref,ir_insn * insn,ir_bitqueue * worklist)1917 static bool ir_try_split_if_cmp(ir_ctx *ctx, ir_ref ref, ir_insn *insn, ir_bitqueue *worklist)
1918 {
1919 ir_ref cond_ref = insn->op2;
1920 ir_insn *cond = &ctx->ir_base[cond_ref];
1921
1922 if (cond->op >= IR_EQ && cond->op <= IR_UGT
1923 && IR_IS_CONST_REF(cond->op2)
1924 && !IR_IS_SYM_CONST(ctx->ir_base[cond->op2].op)
1925 && ctx->use_lists[insn->op2].count == 1) {
1926 ir_ref phi_ref = cond->op1;
1927 ir_insn *phi = &ctx->ir_base[phi_ref];
1928
1929 if (phi->op == IR_PHI
1930 && phi->inputs_count == 3
1931 && phi->op1 == insn->op1
1932 && ctx->use_lists[phi_ref].count == 1
1933 && ((IR_IS_CONST_REF(phi->op2) && !IR_IS_SYM_CONST(ctx->ir_base[phi->op2].op))
1934 || (IR_IS_CONST_REF(phi->op3) && !IR_IS_SYM_CONST(ctx->ir_base[phi->op3].op)))) {
1935 ir_ref merge_ref = insn->op1;
1936 ir_insn *merge = &ctx->ir_base[merge_ref];
1937
1938 if (ctx->use_lists[merge_ref].count == 2) {
1939 ir_ref end1_ref = merge->op1, end2_ref = merge->op2;
1940 ir_insn *end1 = &ctx->ir_base[end1_ref];
1941 ir_insn *end2 = &ctx->ir_base[end2_ref];
1942
1943 if (end1->op == IR_END && end2->op == IR_END) {
1944 ir_ref if_true_ref, if_false_ref;
1945 ir_insn *if_true, *if_false;
1946 ir_op op = IR_IF_FALSE;
1947
1948 ir_get_true_false_refs(ctx, ref, &if_true_ref, &if_false_ref);
1949
1950 if (!IR_IS_CONST_REF(phi->op2) || IR_IS_SYM_CONST(ctx->ir_base[phi->op2].op)) {
1951 IR_ASSERT(IR_IS_CONST_REF(phi->op3));
1952 SWAP_REFS(phi->op2, phi->op3);
1953 SWAP_REFS(merge->op1, merge->op2);
1954 SWAP_REFS(end1_ref, end2_ref);
1955 SWAP_INSNS(end1, end2);
1956 }
1957 if (ir_cmp_is_true(cond->op, &ctx->ir_base[phi->op2], &ctx->ir_base[cond->op2])) {
1958 SWAP_REFS(if_true_ref, if_false_ref);
1959 op = IR_IF_TRUE;
1960 }
1961 if_true = &ctx->ir_base[if_true_ref];
1962 if_false = &ctx->ir_base[if_false_ref];
1963
1964 if (IR_IS_CONST_REF(phi->op3) && !IR_IS_SYM_CONST(ctx->ir_base[phi->op3].op)) {
1965 if (ir_cmp_is_true(cond->op, &ctx->ir_base[phi->op3], &ctx->ir_base[cond->op2]) ^ (op == IR_IF_TRUE)) {
1966 /* IF Split
1967 *
1968 * | | | |
1969 * | END | END
1970 * END / END |
1971 * | +---+ | |
1972 * | / | |
1973 * MERGE | |
1974 * | \ | |
1975 * | PHI(C1, X) | |
1976 * | | | |
1977 * | CMP(_, C2) | |
1978 * | / | |
1979 * IF => | |
1980 * | \ | |
1981 * | +------+ | |
1982 * | IF_TRUE | BEGIN
1983 * IF_FALSE | BEGIN |
1984 * | |
1985 */
1986
1987 ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref);
1988 ir_use_list_replace_one(ctx, end2_ref, merge_ref, if_true_ref);
1989
1990 MAKE_NOP(merge); CLEAR_USES(merge_ref);
1991 MAKE_NOP(phi); CLEAR_USES(phi_ref);
1992 MAKE_NOP(cond); CLEAR_USES(cond_ref);
1993 MAKE_NOP(insn); CLEAR_USES(ref);
1994
1995 if_false->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1);
1996 if_false->op1 = end1_ref;
1997
1998 if_true->optx = IR_OPTX(IR_BEGIN, IR_VOID, 1);
1999 if_true->op1 = end2_ref;
2000
2001 ir_bitqueue_add(worklist, if_false_ref);
2002 ir_bitqueue_add(worklist, if_true_ref);
2003
2004 return 1;
2005 } else {
2006 /* IF Split
2007 *
2008 * | | | |
2009 * | END | END
2010 * END / END |
2011 * | +---+ | |
2012 * | / | |
2013 * MERGE | |
2014 * | \ | |
2015 * | PHI(C1, X) | |
2016 * | | | +
2017 * | CMP(_, C2) | /
2018 * | / | /
2019 * IF => | /
2020 * | \ | /
2021 * | +------+ | /
2022 * | IF_TRUE | / BEGIN(unreachable)
2023 * IF_FALSE | MERGE |
2024 * | |
2025 */
2026
2027 ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref);
2028 ir_use_list_replace_one(ctx, end2_ref, merge_ref, if_false_ref);
2029
2030 MAKE_NOP(merge); CLEAR_USES(merge_ref);
2031 MAKE_NOP(phi); CLEAR_USES(phi_ref);
2032 MAKE_NOP(cond); CLEAR_USES(cond_ref);
2033 MAKE_NOP(insn); CLEAR_USES(ref);
2034
2035 if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2);
2036 if_false->op1 = end1_ref;
2037 if_false->op2 = end2_ref;
2038
2039 if_true->optx = IR_BEGIN;
2040 if_true->op1 = IR_UNUSED;
2041
2042 ctx->flags2 &= ~IR_CFG_REACHABLE;
2043
2044 ir_bitqueue_add(worklist, if_false_ref);
2045
2046 return 1;
2047 }
2048 } else {
2049 /* IF Split
2050 *
2051 * | | | |
2052 * | END | IF<----+
2053 * END / END / \ |
2054 * | +---+ | +--+ + |
2055 * | / | / | |
2056 * MERGE | IF_FALSE | |
2057 * | \ | | | |
2058 * | PHI(C1, X) | | | |
2059 * | | | | | |
2060 * | CMP(_, C2) | | | CMP(X, C2)
2061 * | / | | |
2062 * IF => | END |
2063 * | \ | | |
2064 * | +------+ | | |
2065 * | IF_TRUE | | IF_TRUE
2066 * IF_FALSE | MERGE
2067 * | |
2068 */
2069
2070 ir_use_list_remove_all(ctx, merge_ref, phi_ref);
2071 ir_use_list_remove_all(ctx, ref, if_true_ref);
2072 if (!IR_IS_CONST_REF(phi->op3)) {
2073 ir_use_list_replace_one(ctx, phi->op3, phi_ref, insn->op2);
2074 }
2075 ir_use_list_replace_one(ctx, end1_ref, merge_ref, if_false_ref);
2076 ir_use_list_replace_one(ctx, cond_ref, ref, end2_ref);
2077 ir_use_list_add(ctx, end2_ref, if_true_ref);
2078
2079 end2->optx = IR_OPTX(IR_IF, IR_VOID, 2);
2080 end2->op2 = insn->op2;
2081
2082 merge->optx = IR_OPTX(op, IR_VOID, 1);
2083 merge->op1 = end2_ref;
2084 merge->op2 = IR_UNUSED;
2085
2086 cond->op1 = phi->op3;
2087 MAKE_NOP(phi);
2088 CLEAR_USES(phi_ref);
2089
2090 insn->optx = IR_OPTX(IR_END, IR_VOID, 1);
2091 insn->op1 = merge_ref;
2092 insn->op2 = IR_UNUSED;
2093
2094 if_true->op1 = end2_ref;
2095
2096 if_false->optx = IR_OPTX(IR_MERGE, IR_VOID, 2);
2097 if_false->op1 = end1_ref;
2098 if_false->op2 = ref;
2099
2100 ir_bitqueue_add(worklist, if_false_ref);
2101 if (ctx->ir_base[end2->op1].op == IR_BEGIN || ctx->ir_base[end2->op1].op == IR_MERGE) {
2102 ir_bitqueue_add(worklist, end2->op1);
2103 }
2104
2105 return 1;
2106 }
2107 }
2108 }
2109 }
2110 }
2111
2112 return 0;
2113 }
2114
ir_optimize_merge(ir_ctx * ctx,ir_ref merge_ref,ir_insn * merge,ir_bitqueue * worklist)2115 static void ir_optimize_merge(ir_ctx *ctx, ir_ref merge_ref, ir_insn *merge, ir_bitqueue *worklist)
2116 {
2117 ir_use_list *use_list = &ctx->use_lists[merge_ref];
2118
2119 if (use_list->count == 1) {
2120 ir_try_remove_empty_diamond(ctx, merge_ref, merge, worklist);
2121 } else if (use_list->count == 2) {
2122 if (merge->inputs_count == 2) {
2123 ir_ref phi_ref = ctx->use_edges[use_list->refs];
2124 ir_insn *phi = &ctx->ir_base[phi_ref];
2125
2126 ir_ref next_ref = ctx->use_edges[use_list->refs + 1];
2127 ir_insn *next = &ctx->ir_base[next_ref];
2128 IR_ASSERT(next->op != IR_PHI);
2129
2130 if (phi->op == IR_PHI) {
2131 if (next->op == IR_IF && next->op1 == merge_ref && ctx->use_lists[phi_ref].count == 1) {
2132 if (next->op2 == phi_ref) {
2133 if (ir_try_split_if(ctx, next_ref, next, worklist)) {
2134 return;
2135 }
2136 } else {
2137 ir_insn *cmp = &ctx->ir_base[next->op2];
2138
2139 if (cmp->op >= IR_EQ && cmp->op <= IR_UGT
2140 && cmp->op1 == phi_ref
2141 && IR_IS_CONST_REF(cmp->op2)
2142 && !IR_IS_SYM_CONST(ctx->ir_base[cmp->op2].op)
2143 && ctx->use_lists[next->op2].count == 1) {
2144 if (ir_try_split_if_cmp(ctx, next_ref, next, worklist)) {
2145 return;
2146 }
2147 }
2148 }
2149 }
2150 ir_optimize_phi(ctx, merge_ref, merge, phi_ref, phi, worklist);
2151 }
2152 }
2153 }
2154 }
2155
ir_sccp(ir_ctx * ctx)2156 int ir_sccp(ir_ctx *ctx)
2157 {
2158 ir_ref i, j, n, *p, use;
2159 ir_use_list *use_list;
2160 ir_insn *insn, *use_insn, *value;
2161 uint32_t flags;
2162 ir_bitqueue worklist, worklist2;
2163 ir_insn *_values = ir_mem_calloc(ctx->insns_count, sizeof(ir_insn));
2164
2165 ctx->flags2 |= IR_OPT_IN_SCCP;
2166
2167 /* A bit modified SCCP algorithm of M. N. Wegman and F. K. Zadeck */
2168 ir_bitqueue_init(&worklist2, ctx->insns_count);
2169 ir_bitqueue_init(&worklist, ctx->insns_count);
2170 worklist.pos = 0;
2171 ir_bitset_incl(worklist.set, 1);
2172 while ((i = ir_bitqueue_pop(&worklist)) >= 0) {
2173 insn = &ctx->ir_base[i];
2174 flags = ir_op_flags[insn->op];
2175 if (flags & IR_OP_FLAG_DATA) {
2176 if (ctx->use_lists[i].count == 0) {
2177 /* dead code */
2178 continue;
2179 } else if (insn->op == IR_PHI) {
2180 if (!ir_sccp_meet_phi(ctx, _values, i, insn, &worklist)) {
2181 continue;
2182 }
2183 } else if (EXPECTED(IR_IS_FOLDABLE_OP(insn->op))) {
2184 bool may_benefit = 0;
2185 bool has_top = 0;
2186
2187 IR_ASSERT(!IR_OP_HAS_VAR_INPUTS(flags));
2188 n = IR_INPUT_EDGES_COUNT(flags);
2189 for (p = insn->ops + 1; n > 0; p++, n--) {
2190 ir_ref input = *p;
2191 if (input > 0) {
2192 if (_values[input].optx == IR_TOP) {
2193 has_top = 1;
2194 /* do backward propagaton only once */
2195 if (!_values[input].op1) {
2196 _values[input].op1 = 1;
2197 ir_bitqueue_add(&worklist, input);
2198 }
2199 } else if (_values[input].optx != IR_BOTTOM) {
2200 /* Perform folding only if some of direct inputs
2201 * is going to be replaced by a constant or copy.
2202 * This approach may miss some folding optimizations
2203 * dependent on indirect inputs. e.g. reassociation.
2204 */
2205 may_benefit = 1;
2206 }
2207 }
2208 }
2209 if (has_top) {
2210 continue;
2211 }
2212 if (!may_benefit) {
2213 IR_MAKE_BOTTOM(i);
2214 if (insn->op == IR_FP2FP || insn->op == IR_FP2INT || insn->op == IR_TRUNC
2215 || insn->op == IR_ZEXT || insn->op == IR_SEXT) {
2216 ir_bitqueue_add(&worklist2, i);
2217 }
2218 } else if (!ir_sccp_fold(ctx, _values, i, insn->opt, insn->op1, insn->op2, insn->op3)) {
2219 /* not changed */
2220 continue;
2221 } else if (_values[i].optx == IR_BOTTOM) {
2222 insn = &ctx->ir_base[i];
2223 if (insn->op == IR_FP2FP || insn->op == IR_FP2INT || insn->op == IR_TRUNC
2224 || insn->op == IR_ZEXT || insn->op == IR_SEXT) {
2225 ir_bitqueue_add(&worklist2, i);
2226 }
2227 }
2228 } else {
2229 IR_MAKE_BOTTOM(i);
2230 }
2231 } else if (flags & IR_OP_FLAG_BB_START) {
2232 if (insn->op == IR_MERGE || insn->op == IR_BEGIN) {
2233 ir_bitqueue_add(&worklist2, i);
2234 }
2235 if (insn->op == IR_MERGE || insn->op == IR_LOOP_BEGIN) {
2236 ir_ref unfeasible_inputs = 0;
2237
2238 n = insn->inputs_count;
2239 if (n > 3 && _values[i].optx == IR_TOP) {
2240 for (j = 0; j < (n>>2); j++) {
2241 _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */
2242 }
2243 }
2244 for (p = insn->ops + 1; n > 0; p++, n--) {
2245 ir_ref input = *p;
2246 IR_ASSERT(input > 0);
2247 if (_values[input].optx == IR_TOP) {
2248 unfeasible_inputs++;
2249 }
2250 }
2251 if (unfeasible_inputs == 0) {
2252 IR_MAKE_BOTTOM(i);
2253 } else if (_values[i].op1 != unfeasible_inputs) {
2254 _values[i].optx = IR_MERGE;
2255 _values[i].op1 = unfeasible_inputs;
2256 } else {
2257 continue;
2258 }
2259 } else {
2260 IR_ASSERT(insn->op == IR_START || IR_IS_FEASIBLE(insn->op1));
2261 IR_MAKE_BOTTOM(i);
2262 }
2263 } else {
2264 IR_ASSERT(insn->op1 > 0);
2265 if (_values[insn->op1].optx == IR_TOP) {
2266 /* control inpt is not feasible */
2267 continue;
2268 }
2269 if (insn->op == IR_IF) {
2270 if (IR_IS_TOP(insn->op2)) {
2271 /* do backward propagaton only once */
2272 if (!_values[insn->op2].op1) {
2273 _values[insn->op2].op1 = 1;
2274 ir_bitqueue_add(&worklist, insn->op2);
2275 }
2276 continue;
2277 }
2278 if (!IR_IS_BOTTOM(insn->op2)
2279 #if IR_COMBO_COPY_PROPAGATION
2280 && (IR_IS_CONST_REF(insn->op2) || _values[insn->op2].op != IR_COPY)
2281 #endif
2282 ) {
2283 bool b = ir_sccp_is_true(ctx, _values, insn->op2);
2284 use_list = &ctx->use_lists[i];
2285 IR_ASSERT(use_list->count == 2);
2286 p = &ctx->use_edges[use_list->refs];
2287 use = *p;
2288 use_insn = &ctx->ir_base[use];
2289 IR_ASSERT(use_insn->op == IR_IF_TRUE || use_insn->op == IR_IF_FALSE);
2290 if ((use_insn->op == IR_IF_TRUE) != b) {
2291 use = *(p+1);
2292 IR_ASSERT(ctx->ir_base[use].op == IR_IF_TRUE || ctx->ir_base[use].op == IR_IF_FALSE);
2293 }
2294 if (_values[i].optx == IR_TOP) {
2295 _values[i].optx = IR_IF;
2296 _values[i].op1 = use;
2297 } else if (_values[i].optx != IR_IF || _values[i].op1 != use) {
2298 IR_MAKE_BOTTOM(i);
2299 }
2300 if (!IR_IS_BOTTOM(use)) {
2301 ir_bitqueue_add(&worklist, use);
2302 }
2303 continue;
2304 }
2305 IR_MAKE_BOTTOM(i);
2306 } else if (insn->op == IR_SWITCH) {
2307 if (IR_IS_TOP(insn->op2)) {
2308 /* do backward propagaton only once */
2309 if (!_values[insn->op2].op1) {
2310 _values[insn->op2].op1 = 1;
2311 ir_bitqueue_add(&worklist, insn->op2);
2312 }
2313 continue;
2314 }
2315 if (!IR_IS_BOTTOM(insn->op2)
2316 #if IR_COMBO_COPY_PROPAGATION
2317 && (IR_IS_CONST_REF(insn->op2) || _values[insn->op2].op != IR_COPY)
2318 #endif
2319 ) {
2320 ir_ref use_case = IR_UNUSED;
2321
2322 use_list = &ctx->use_lists[i];
2323 n = use_list->count;
2324 for (j = 0, p = &ctx->use_edges[use_list->refs]; j < n; j++, p++) {
2325 use = *p;
2326 IR_ASSERT(use > 0);
2327 use_insn = &ctx->ir_base[use];
2328 if (use_insn->op == IR_CASE_VAL) {
2329 if (ir_sccp_is_equal(ctx, _values, insn->op2, use_insn->op2)) {
2330 use_case = use;
2331 break;
2332 }
2333 } else if (use_insn->op == IR_CASE_DEFAULT) {
2334 use_case = use;
2335 }
2336 }
2337 if (use_case) {
2338 use_insn = &ctx->ir_base[use_case];
2339 if (_values[i].optx == IR_TOP) {
2340 _values[i].optx = IR_IF;
2341 _values[i].op1 = use_case;
2342 } else if (_values[i].optx != IR_IF || _values[i].op1 != use_case) {
2343 IR_MAKE_BOTTOM(i);
2344 }
2345 if (!IR_IS_BOTTOM(use_case)) {
2346 ir_bitqueue_add(&worklist, use_case);
2347 }
2348 }
2349 if (!IR_IS_BOTTOM(i)) {
2350 continue;
2351 }
2352 }
2353 IR_MAKE_BOTTOM(i);
2354 } else if (ir_is_dead_load_ex(ctx, i, flags, insn)) {
2355 /* dead load */
2356 _values[i].optx = IR_LOAD;
2357 } else {
2358 IR_MAKE_BOTTOM(i);
2359
2360 /* control, call, load and store instructions may have unprocessed inputs */
2361 n = IR_INPUT_EDGES_COUNT(flags);
2362 if (IR_OP_HAS_VAR_INPUTS(flags) && (n = insn->inputs_count) > 3) {
2363 for (j = 0; j < (n>>2); j++) {
2364 _values[i+j+1].optx = IR_BOTTOM; /* keep the tail of a long multislot instruction */
2365 }
2366 for (j = 2, p = insn->ops + j; j <= n; j++, p++) {
2367 IR_ASSERT(IR_OPND_KIND(flags, j) == IR_OPND_DATA);
2368 use = *p;
2369 if (use > 0 && UNEXPECTED(_values[use].optx == IR_TOP)) {
2370 ir_bitqueue_add(&worklist, use);
2371 }
2372 }
2373 } else if (n >= 2) {
2374 IR_ASSERT(IR_OPND_KIND(flags, 2) == IR_OPND_DATA);
2375 use = insn->op2;
2376 if (use > 0 && UNEXPECTED(_values[use].optx == IR_TOP)) {
2377 ir_bitqueue_add(&worklist, use);
2378 }
2379 if (n > 2) {
2380 IR_ASSERT(n == 3);
2381 IR_ASSERT(IR_OPND_KIND(flags, 3) == IR_OPND_DATA);
2382 use = insn->op3;
2383 if (use > 0 && UNEXPECTED(_values[use].optx == IR_TOP)) {
2384 ir_bitqueue_add(&worklist, use);
2385 }
2386 }
2387 }
2388 }
2389 }
2390 use_list = &ctx->use_lists[i];
2391 n = use_list->count;
2392 for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
2393 use = *p;
2394 if (_values[use].optx != IR_BOTTOM) {
2395 ir_bitqueue_add(&worklist, use);
2396 }
2397 }
2398 }
2399
2400 #ifdef IR_DEBUG
2401 if (ctx->flags & IR_DEBUG_SCCP) {
2402 for (i = 1; i < ctx->insns_count; i++) {
2403 if (IR_IS_CONST_OP(_values[i].op) || IR_IS_SYM_CONST(_values[i].op)) {
2404 fprintf(stderr, "%d. CONST(", i);
2405 ir_print_const(ctx, &_values[i], stderr, true);
2406 fprintf(stderr, ")\n");
2407 #if IR_COMBO_COPY_PROPAGATION
2408 } else if (_values[i].op == IR_COPY) {
2409 fprintf(stderr, "%d. COPY(%d)\n", i, _values[i].op1);
2410 #endif
2411 } else if (IR_IS_TOP(i)) {
2412 fprintf(stderr, "%d. TOP\n", i);
2413 } else if (_values[i].op == IR_IF) {
2414 fprintf(stderr, "%d. IF(%d)\n", i, _values[i].op1);
2415 } else if (_values[i].op == IR_MERGE) {
2416 fprintf(stderr, "%d. MERGE(%d)\n", i, _values[i].op1);
2417 } else if (!IR_IS_BOTTOM(i)) {
2418 fprintf(stderr, "%d. %d\n", i, _values[i].op);
2419 }
2420 }
2421 }
2422 #endif
2423
2424 for (i = 1, value = _values + i; i < ctx->insns_count; value++, i++) {
2425 if (value->op == IR_BOTTOM) {
2426 continue;
2427 } else if (IR_IS_CONST_OP(value->op)) {
2428 /* replace instruction by constant */
2429 j = ir_const(ctx, value->val, value->type);
2430 ir_sccp_replace_insn(ctx, _values, i, j, &worklist2);
2431 } else if (IR_IS_SYM_CONST(value->op)) {
2432 /* replace instruction by constant */
2433 j = ir_const_ex(ctx, value->val, value->type, value->optx);
2434 ir_sccp_replace_insn(ctx, _values, i, j, &worklist2);
2435 #if IR_COMBO_COPY_PROPAGATION
2436 } else if (value->op == IR_COPY) {
2437 ir_sccp_replace_insn(ctx, _values, i, value->op1, &worklist2);
2438 #endif
2439 } else if (value->op == IR_TOP) {
2440 /* remove unreachable instruction */
2441 insn = &ctx->ir_base[i];
2442 if (ir_op_flags[insn->op] & (IR_OP_FLAG_DATA|IR_OP_FLAG_MEM)) {
2443 if (insn->op != IR_PARAM && (insn->op != IR_VAR || _values[insn->op1].op == IR_TOP)) {
2444 ir_sccp_remove_insn(ctx, _values, i, &worklist2);
2445 }
2446 } else {
2447 if (ir_op_flags[insn->op] & IR_OP_FLAG_TERMINATOR) {
2448 /* remove from terminators list */
2449 ir_ref prev = ctx->ir_base[1].op1;
2450 if (prev == i) {
2451 ctx->ir_base[1].op1 = insn->op3;
2452 } else {
2453 while (prev) {
2454 if (ctx->ir_base[prev].op3 == i) {
2455 ctx->ir_base[prev].op3 = insn->op3;
2456 break;
2457 }
2458 prev = ctx->ir_base[prev].op3;
2459 }
2460 }
2461 }
2462 ir_sccp_replace_insn(ctx, _values, i, IR_UNUSED, &worklist2);
2463 }
2464 } else if (value->op == IR_IF) {
2465 /* remove one way IF/SWITCH */
2466 ir_sccp_remove_if(ctx, _values, i, value->op1);
2467 } else if (value->op == IR_MERGE) {
2468 /* schedule merge to remove unfeasible MERGE inputs */
2469 ir_bitqueue_add(&worklist, i);
2470 } else if (value->op == IR_LOAD) {
2471 /* schedule dead load elimination */
2472 ir_bitqueue_add(&worklist2, i);
2473 }
2474 }
2475
2476 while ((i = ir_bitqueue_pop(&worklist)) >= 0) {
2477 IR_ASSERT(_values[i].op == IR_MERGE);
2478 ir_sccp_remove_unfeasible_merge_inputs(ctx, _values, i, _values[i].op1);
2479 }
2480
2481 ctx->flags2 |= IR_CFG_REACHABLE;
2482
2483 while ((i = ir_bitqueue_pop(&worklist2)) >= 0) {
2484 insn = &ctx->ir_base[i];
2485 if (IR_IS_FOLDABLE_OP(insn->op)) {
2486 if (ctx->use_lists[i].count == 0) {
2487 if (insn->op == IR_PHI) {
2488 ir_bitqueue_add(&worklist2, insn->op1);
2489 }
2490 ir_sccp_remove_insn2(ctx, i, &worklist2);
2491 } else {
2492 insn = &ctx->ir_base[i];
2493 switch (insn->op) {
2494 case IR_FP2FP:
2495 if (insn->type == IR_FLOAT) {
2496 if (ir_may_promote_d2f(ctx, insn->op1)) {
2497 ir_ref ref = ir_promote_d2f(ctx, insn->op1, i);
2498 insn->op1 = ref;
2499 ir_sccp_replace_insn2(ctx, i, ref, &worklist2);
2500 break;
2501 }
2502 } else {
2503 if (ir_may_promote_f2d(ctx, insn->op1)) {
2504 ir_ref ref = ir_promote_f2d(ctx, insn->op1, i);
2505 insn->op1 = ref;
2506 ir_sccp_replace_insn2(ctx, i, ref, &worklist2);
2507 break;
2508 }
2509 }
2510 goto folding;
2511 case IR_FP2INT:
2512 if (ctx->ir_base[insn->op1].type == IR_DOUBLE) {
2513 if (ir_may_promote_d2f(ctx, insn->op1)) {
2514 insn->op1 = ir_promote_d2f(ctx, insn->op1, i);
2515 }
2516 } else {
2517 if (ir_may_promote_f2d(ctx, insn->op1)) {
2518 insn->op1 = ir_promote_f2d(ctx, insn->op1, i);
2519 }
2520 }
2521 goto folding;
2522 case IR_TRUNC:
2523 if (ir_may_promote_i2i(ctx, insn->type, insn->op1)) {
2524 ir_ref ref = ir_promote_i2i(ctx, insn->type, insn->op1, i);
2525 insn->op1 = ref;
2526 ir_sccp_replace_insn2(ctx, i, ref, &worklist2);
2527 break;
2528 }
2529 goto folding;
2530 case IR_SEXT:
2531 case IR_ZEXT:
2532 if (ir_try_promote_ext(ctx, i, insn, &worklist2)) {
2533 break;
2534 }
2535 goto folding;
2536 case IR_PHI:
2537 break;
2538 default:
2539 folding:
2540 ir_sccp_fold2(ctx, i, &worklist2);
2541 break;
2542 }
2543 }
2544 } else if (ir_op_flags[insn->op] & IR_OP_FLAG_BB_START) {
2545 if (!(ctx->flags & IR_OPT_CFG)) {
2546 /* pass */
2547 } else if (insn->op == IR_BEGIN) {
2548 if (ctx->ir_base[insn->op1].op == IR_END
2549 && ctx->use_lists[i].count == 1) {
2550 ir_merge_blocks(ctx, insn->op1, i, &worklist2);
2551 }
2552 } else if (insn->op == IR_MERGE) {
2553 ir_optimize_merge(ctx, i, insn, &worklist2);
2554 }
2555 } else if (ir_is_dead_load(ctx, i)) {
2556 ir_ref next = ctx->use_edges[ctx->use_lists[i].refs];
2557
2558 /* remove LOAD from double linked control list */
2559 ctx->ir_base[next].op1 = insn->op1;
2560 ir_use_list_replace_one(ctx, insn->op1, i, next);
2561 insn->op1 = IR_UNUSED;
2562 ir_sccp_remove_insn2(ctx, i, &worklist2);
2563 }
2564 }
2565
2566 ir_mem_free(_values);
2567 ir_bitqueue_free(&worklist);
2568 ir_bitqueue_free(&worklist2);
2569
2570 ctx->flags2 &= ~IR_OPT_IN_SCCP;
2571 ctx->flags2 |= IR_SCCP_DONE;
2572
2573 return 1;
2574 }
2575