1 /*
2 * IR - Lightweight JIT Compilation Framework
3 * (GCM - Global Code Motion and Scheduler)
4 * Copyright (C) 2022 Zend by Perforce.
5 * Authors: Dmitry Stogov <dmitry@php.net>
6 *
7 * The GCM algorithm is based on Cliff Click's publication
8 * See: C. Click. "Global code motion, global value numbering" Submitted to PLDI'95.
9 */
10
11 #include "ir.h"
12 #include "ir_private.h"
13
ir_gcm_schedule_early(ir_ctx * ctx,int32_t * _blocks,ir_ref ref,ir_list * queue_rest)14 static int32_t ir_gcm_schedule_early(ir_ctx *ctx, int32_t *_blocks, ir_ref ref, ir_list *queue_rest)
15 {
16 ir_ref n, *p, input;
17 ir_insn *insn;
18 uint32_t dom_depth;
19 int32_t b, result;
20 bool reschedule_late = 1;
21
22 insn = &ctx->ir_base[ref];
23
24 IR_ASSERT(insn->op != IR_PARAM && insn->op != IR_VAR);
25 IR_ASSERT(insn->op != IR_PHI && insn->op != IR_PI);
26
27 result = 1;
28 dom_depth = 0;
29
30 n = insn->inputs_count;
31 for (p = insn->ops + 1; n > 0; p++, n--) {
32 input = *p;
33 if (input > 0) {
34 b = _blocks[input];
35 if (b == 0) {
36 b = ir_gcm_schedule_early(ctx, _blocks, input, queue_rest);
37 } else if (b < 0) {
38 b = -b;
39 }
40 if (dom_depth < ctx->cfg_blocks[b].dom_depth) {
41 dom_depth = ctx->cfg_blocks[b].dom_depth;
42 result = b;
43 }
44 reschedule_late = 0;
45 }
46 }
47 _blocks[ref] = -result;
48
49 if (UNEXPECTED(reschedule_late)) {
50 /* Floating nodes that don't depend on other nodes
51 * (e.g. only on constants), have to be scheduled to the
52 * last common ancestor. Otherwise they always go to the
53 * first block.
54 */
55 ir_list_push_unchecked(queue_rest, ref);
56 }
57 return result;
58 }
59
60 /* Last Common Ancestor */
ir_gcm_find_lca(ir_ctx * ctx,int32_t b1,int32_t b2)61 static int32_t ir_gcm_find_lca(ir_ctx *ctx, int32_t b1, int32_t b2)
62 {
63 uint32_t dom_depth;
64
65 dom_depth = ctx->cfg_blocks[b2].dom_depth;
66 while (ctx->cfg_blocks[b1].dom_depth > dom_depth) {
67 b1 = ctx->cfg_blocks[b1].dom_parent;
68 }
69 dom_depth = ctx->cfg_blocks[b1].dom_depth;
70 while (ctx->cfg_blocks[b2].dom_depth > dom_depth) {
71 b2 = ctx->cfg_blocks[b2].dom_parent;
72 }
73 while (b1 != b2) {
74 b1 = ctx->cfg_blocks[b1].dom_parent;
75 b2 = ctx->cfg_blocks[b2].dom_parent;
76 }
77 return b2;
78 }
79
ir_gcm_schedule_late(ir_ctx * ctx,int32_t * _blocks,ir_ref ref)80 static void ir_gcm_schedule_late(ir_ctx *ctx, int32_t *_blocks, ir_ref ref)
81 {
82 ir_ref n, *p, use;
83 ir_insn *insn;
84 ir_use_list *use_list;
85
86 IR_ASSERT(_blocks[ref] < 0);
87 _blocks[ref] = -_blocks[ref];
88 use_list = &ctx->use_lists[ref];
89 n = use_list->count;
90 if (n) {
91 int32_t lca, b;
92
93 insn = &ctx->ir_base[ref];
94 IR_ASSERT(insn->op != IR_PARAM && insn->op != IR_VAR);
95 IR_ASSERT(insn->op != IR_PHI && insn->op != IR_PI);
96
97 lca = 0;
98 for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
99 use = *p;
100 b = _blocks[use];
101 if (!b) {
102 continue;
103 } else if (b < 0) {
104 ir_gcm_schedule_late(ctx, _blocks, use);
105 b = _blocks[use];
106 IR_ASSERT(b != 0);
107 }
108 insn = &ctx->ir_base[use];
109 if (insn->op == IR_PHI) {
110 ir_ref *p = insn->ops + 2; /* PHI data inputs */
111 ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */
112 ir_ref n = insn->inputs_count - 1;
113
114 for (;n > 0; p++, q++, n--) {
115 if (*p == ref) {
116 b = _blocks[*q];
117 lca = !lca ? b : ir_gcm_find_lca(ctx, lca, b);
118 }
119 }
120 } else {
121 lca = !lca ? b : ir_gcm_find_lca(ctx, lca, b);
122 }
123 }
124 IR_ASSERT(lca != 0 && "No Common Ancestor");
125 b = lca;
126
127 if (b != _blocks[ref]) {
128 ir_block *bb = &ctx->cfg_blocks[b];
129 uint32_t loop_depth = bb->loop_depth;
130
131 if (loop_depth) {
132 uint32_t flags;
133
134 use_list = &ctx->use_lists[ref];
135 if (use_list->count == 1) {
136 use = ctx->use_edges[use_list->refs];
137 insn = &ctx->ir_base[use];
138 if (insn->op == IR_IF || insn->op == IR_GUARD || insn->op == IR_GUARD_NOT) {
139 _blocks[ref] = b;
140 return;
141 }
142 }
143
144 flags = (bb->flags & IR_BB_LOOP_HEADER) ? bb->flags : ctx->cfg_blocks[bb->loop_header].flags;
145 if ((flags & IR_BB_LOOP_WITH_ENTRY)
146 && !(ctx->binding && ir_binding_find(ctx, ref))) {
147 /* Don't move loop invariant code across an OSR ENTRY if we can't restore it */
148 } else {
149 do {
150 lca = bb->dom_parent;
151 bb = &ctx->cfg_blocks[lca];
152 if (bb->loop_depth < loop_depth) {
153 if (!bb->loop_depth) {
154 b = lca;
155 break;
156 }
157 flags = (bb->flags & IR_BB_LOOP_HEADER) ? bb->flags : ctx->cfg_blocks[bb->loop_header].flags;
158 if ((flags & IR_BB_LOOP_WITH_ENTRY)
159 && !(ctx->binding && ir_binding_find(ctx, ref))) {
160 break;
161 }
162 loop_depth = bb->loop_depth;
163 b = lca;
164 }
165 } while (lca != _blocks[ref]);
166 }
167 }
168 _blocks[ref] = b;
169 if (ctx->ir_base[ref + 1].op == IR_OVERFLOW) {
170 /* OVERFLOW is a projection and must be scheduled together with previous ADD/SUB/MUL_OV */
171 _blocks[ref + 1] = b;
172 }
173 }
174 }
175 }
176
ir_gcm_schedule_rest(ir_ctx * ctx,int32_t * _blocks,ir_ref ref)177 static void ir_gcm_schedule_rest(ir_ctx *ctx, int32_t *_blocks, ir_ref ref)
178 {
179 ir_ref n, *p, use;
180 ir_insn *insn;
181
182 IR_ASSERT(_blocks[ref] < 0);
183 _blocks[ref] = -_blocks[ref];
184 n = ctx->use_lists[ref].count;
185 if (n) {
186 uint32_t lca;
187 int32_t b;
188
189 insn = &ctx->ir_base[ref];
190 IR_ASSERT(insn->op != IR_PARAM && insn->op != IR_VAR);
191 IR_ASSERT(insn->op != IR_PHI && insn->op != IR_PI);
192
193 lca = 0;
194 for (p = &ctx->use_edges[ctx->use_lists[ref].refs]; n > 0; p++, n--) {
195 use = *p;
196 b = _blocks[use];
197 if (!b) {
198 continue;
199 } else if (b < 0) {
200 ir_gcm_schedule_late(ctx, _blocks, use);
201 b = _blocks[use];
202 IR_ASSERT(b != 0);
203 }
204 insn = &ctx->ir_base[use];
205 if (insn->op == IR_PHI) {
206 ir_ref *p = insn->ops + 2; /* PHI data inputs */
207 ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */
208
209 ir_ref n = insn->inputs_count - 1;
210
211 for (;n > 0; p++, q++, n--) {
212 if (*p == ref) {
213 b = _blocks[*q];
214 lca = !lca ? b : ir_gcm_find_lca(ctx, lca, b);
215 }
216 }
217 } else {
218 lca = !lca ? b : ir_gcm_find_lca(ctx, lca, b);
219 }
220 }
221 IR_ASSERT(lca != 0 && "No Common Ancestor");
222 b = lca;
223 _blocks[ref] = b;
224 if (ctx->ir_base[ref + 1].op == IR_OVERFLOW) {
225 /* OVERFLOW is a projection and must be scheduled together with previous ADD/SUB/MUL_OV */
226 _blocks[ref + 1] = b;
227 }
228 }
229 }
230
ir_gcm(ir_ctx * ctx)231 int ir_gcm(ir_ctx *ctx)
232 {
233 ir_ref k, n, *p, ref;
234 ir_block *bb;
235 ir_list queue_early;
236 ir_list queue_late;
237 ir_list queue_rest;
238 int32_t *_blocks, b;
239 ir_insn *insn, *use_insn;
240 ir_use_list *use_list;
241
242 IR_ASSERT(ctx->cfg_map);
243 _blocks = (int32_t*)ctx->cfg_map;
244
245 ir_list_init(&queue_early, ctx->insns_count);
246
247 if (ctx->cfg_blocks_count == 1) {
248 ref = ctx->cfg_blocks[1].end;
249 do {
250 insn = &ctx->ir_base[ref];
251 _blocks[ref] = 1; /* pin to block */
252 if (insn->inputs_count > 1) {
253 /* insn has input data edges */
254 ir_list_push_unchecked(&queue_early, ref);
255 }
256 ref = insn->op1; /* control predecessor */
257 } while (ref != 1); /* IR_START */
258 _blocks[1] = 1; /* pin to block */
259
260 use_list = &ctx->use_lists[1];
261 n = use_list->count;
262 for (p = &ctx->use_edges[use_list->refs]; n > 0; n--, p++) {
263 ref = *p;
264 use_insn = &ctx->ir_base[ref];
265 if (use_insn->op == IR_PARAM || use_insn->op == IR_VAR) {
266 ctx->cfg_blocks[1].flags |= (use_insn->op == IR_PARAM) ? IR_BB_HAS_PARAM : IR_BB_HAS_VAR;
267 _blocks[ref] = 1; /* pin to block */
268 }
269 }
270
271 /* Place all live nodes to the first block */
272 while (ir_list_len(&queue_early)) {
273 ref = ir_list_pop(&queue_early);
274 insn = &ctx->ir_base[ref];
275 n = insn->inputs_count;
276 for (p = insn->ops + 1; n > 0; p++, n--) {
277 ref = *p;
278 if (ref > 0 && _blocks[ref] == 0) {
279 _blocks[ref] = 1;
280 ir_list_push_unchecked(&queue_early, ref);
281 }
282 }
283 }
284
285 ir_list_free(&queue_early);
286
287 return 1;
288 }
289
290 ir_list_init(&queue_late, ctx->insns_count);
291
292 /* pin and collect control and control depended (PARAM, VAR, PHI, PI) instructions */
293 b = ctx->cfg_blocks_count;
294 for (bb = ctx->cfg_blocks + b; b > 0; bb--, b--) {
295 IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
296 ref = bb->end;
297
298 /* process the last instruction of the block */
299 insn = &ctx->ir_base[ref];
300 _blocks[ref] = b; /* pin to block */
301 if (insn->inputs_count > 1) {
302 /* insn has input data edges */
303 ir_list_push_unchecked(&queue_early, ref);
304 }
305 ref = insn->op1; /* control predecessor */
306
307 while (ref != bb->start) {
308 insn = &ctx->ir_base[ref];
309 _blocks[ref] = b; /* pin to block */
310 if (insn->inputs_count > 1) {
311 /* insn has input data edges */
312 ir_list_push_unchecked(&queue_early, ref);
313 }
314 if (insn->type != IR_VOID) {
315 IR_ASSERT(ir_op_flags[insn->op] & IR_OP_FLAG_MEM);
316 ir_list_push_unchecked(&queue_late, ref);
317 }
318 ref = insn->op1; /* control predecessor */
319 }
320
321 /* process the first instruction of the block */
322 _blocks[ref] = b; /* pin to block */
323
324 use_list = &ctx->use_lists[ref];
325 n = use_list->count;
326 if (n > 1) {
327 for (p = &ctx->use_edges[use_list->refs]; n > 0; n--, p++) {
328 ref = *p;
329 use_insn = &ctx->ir_base[ref];
330 if (use_insn->op == IR_PHI || use_insn->op == IR_PI) {
331 bb->flags |= (use_insn->op == IR_PHI) ? IR_BB_HAS_PHI : IR_BB_HAS_PI;
332 if (EXPECTED(ctx->use_lists[ref].count != 0)) {
333 _blocks[ref] = b; /* pin to block */
334 ir_list_push_unchecked(&queue_early, ref);
335 ir_list_push_unchecked(&queue_late, ref);
336 }
337 } else if (use_insn->op == IR_PARAM) {
338 bb->flags |= IR_BB_HAS_PARAM;
339 _blocks[ref] = b; /* pin to block */
340 if (EXPECTED(ctx->use_lists[ref].count != 0)) {
341 ir_list_push_unchecked(&queue_late, ref);
342 }
343 } else if (use_insn->op == IR_VAR) {
344 bb->flags |= IR_BB_HAS_VAR;
345 _blocks[ref] = b; /* pin to block */
346 if (EXPECTED(ctx->use_lists[ref].count != 0)) {
347 /* This is necessary only for VADDR */
348 ir_list_push_unchecked(&queue_late, ref);
349 }
350 }
351 }
352 }
353 }
354
355 ir_list_init(&queue_rest, ctx->insns_count);
356
357 n = ir_list_len(&queue_early);
358 while (n > 0) {
359 n--;
360 ref = ir_list_at(&queue_early, n);
361 insn = &ctx->ir_base[ref];
362 k = insn->inputs_count - 1;
363 for (p = insn->ops + 2; k > 0; p++, k--) {
364 ref = *p;
365 if (ref > 0 && _blocks[ref] == 0) {
366 ir_gcm_schedule_early(ctx, _blocks, ref, &queue_rest);
367 }
368 }
369 }
370
371 #ifdef IR_DEBUG
372 if (ctx->flags & IR_DEBUG_GCM) {
373 fprintf(stderr, "GCM Schedule Early\n");
374 for (n = 1; n < ctx->insns_count; n++) {
375 fprintf(stderr, "%d -> %d\n", n, _blocks[n]);
376 }
377 }
378 #endif
379
380 n = ir_list_len(&queue_late);
381 while (n > 0) {
382 n--;
383 ref = ir_list_at(&queue_late, n);
384 use_list = &ctx->use_lists[ref];
385 k = use_list->count;
386 for (p = &ctx->use_edges[use_list->refs]; k > 0; p++, k--) {
387 ref = *p;
388 if (_blocks[ref] < 0) {
389 ir_gcm_schedule_late(ctx, _blocks, ref);
390 }
391 }
392 }
393
394 n = ir_list_len(&queue_rest);
395 while (n > 0) {
396 n--;
397 ref = ir_list_at(&queue_rest, n);
398 ir_gcm_schedule_rest(ctx, _blocks, ref);
399 }
400
401 ir_list_free(&queue_early);
402 ir_list_free(&queue_late);
403 ir_list_free(&queue_rest);
404
405 #ifdef IR_DEBUG
406 if (ctx->flags & IR_DEBUG_GCM) {
407 fprintf(stderr, "GCM Schedule Late\n");
408 for (n = 1; n < ctx->insns_count; n++) {
409 fprintf(stderr, "%d -> %d\n", n, _blocks[n]);
410 }
411 }
412 #endif
413
414 return 1;
415 }
416
ir_xlat_binding(ir_ctx * ctx,ir_ref * _xlat)417 static void ir_xlat_binding(ir_ctx *ctx, ir_ref *_xlat)
418 {
419 uint32_t n1, n2, pos;
420 ir_ref key;
421 ir_hashtab_bucket *b1, *b2;
422 ir_hashtab *binding = ctx->binding;
423 uint32_t hash_size = (uint32_t)(-(int32_t)binding->mask);
424
425 memset((char*)binding->data - (hash_size * sizeof(uint32_t)), -1, hash_size * sizeof(uint32_t));
426 n1 = binding->count;
427 n2 = 0;
428 pos = 0;
429 b1 = binding->data;
430 b2 = binding->data;
431 while (n1 > 0) {
432 key = b1->key;
433 IR_ASSERT(key < ctx->insns_count);
434 if (_xlat[key]) {
435 key = _xlat[key];
436 b2->key = key;
437 if (b1->val > 0) {
438 IR_ASSERT(_xlat[b1->val]);
439 b2->val = _xlat[b1->val];
440 } else {
441 b2->val = b1->val;
442 }
443 key |= binding->mask;
444 b2->next = ((uint32_t*)binding->data)[key];
445 ((uint32_t*)binding->data)[key] = pos;
446 pos += sizeof(ir_hashtab_bucket);
447 b2++;
448 n2++;
449 }
450 b1++;
451 n1--;
452 }
453 binding->count = n2;
454 }
455
ir_count_constant(ir_ref * _xlat,ir_ref ref)456 IR_ALWAYS_INLINE ir_ref ir_count_constant(ir_ref *_xlat, ir_ref ref)
457 {
458 if (!_xlat[ref]) {
459 _xlat[ref] = ref; /* this is only a "used constant" marker */
460 return 1;
461 }
462 return 0;
463 }
464
ir_schedule(ir_ctx * ctx)465 int ir_schedule(ir_ctx *ctx)
466 {
467 ir_ctx new_ctx;
468 ir_ref i, j, k, n, *p, *q, ref, new_ref, prev_ref, insns_count, consts_count, use_edges_count;
469 ir_ref *_xlat;
470 ir_ref *edges;
471 uint32_t b, prev_b;
472 uint32_t *_blocks = ctx->cfg_map;
473 ir_ref *_next = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref));
474 ir_ref *_prev = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref));
475 ir_ref _move_down = 0;
476 ir_block *bb;
477 ir_insn *insn, *new_insn;
478 ir_use_list *lists, *use_list, *new_list;
479
480 /* Create a double-linked list of nodes ordered by BB, respecting BB->start and BB->end */
481 prev_b = _blocks[1];
482 IR_ASSERT(prev_b);
483 _prev[1] = 0;
484 _prev[ctx->cfg_blocks[1].end] = 0;
485 for (i = 2, j = 1; i < ctx->insns_count; i++) {
486 b = _blocks[i];
487 IR_ASSERT((int32_t)b >= 0);
488 if (b == prev_b) {
489 /* add to the end of the list */
490 _next[j] = i;
491 _prev[i] = j;
492 j = i;
493 } else if (b > prev_b) {
494 bb = &ctx->cfg_blocks[b];
495 if (i == bb->start) {
496 IR_ASSERT(bb->end > bb->start);
497 prev_b = b;
498 _prev[bb->end] = 0;
499 /* add to the end of the list */
500 _next[j] = i;
501 _prev[i] = j;
502 j = i;
503 } else {
504 IR_ASSERT(i != bb->end);
505 /* move down late (see the following loop) */
506 _next[i] = _move_down;
507 _move_down = i;
508 }
509 } else if (b) {
510 bb = &ctx->cfg_blocks[b];
511 IR_ASSERT(i != bb->start);
512 if (_prev[bb->end]) {
513 /* move up, insert before the end of the already scheduled BB */
514 k = bb->end;
515 } else {
516 /* move up, insert at the end of the block */
517 k = ctx->cfg_blocks[b + 1].start;
518 }
519 /* insert before "k" */
520 _prev[i] = _prev[k];
521 _next[i] = k;
522 _next[_prev[k]] = i;
523 _prev[k] = i;
524 }
525 }
526 _next[j] = 0;
527
528 while (_move_down) {
529 i = _move_down;
530 _move_down = _next[i];
531 b = _blocks[i];
532 bb = &ctx->cfg_blocks[b];
533 k = _next[bb->start];
534
535 if (bb->flags & (IR_BB_HAS_PHI|IR_BB_HAS_PI|IR_BB_HAS_PARAM|IR_BB_HAS_VAR)) {
536 /* insert after the start of the block and all PARAM, VAR, PI, PHI */
537 insn = &ctx->ir_base[k];
538 while (insn->op == IR_PHI || insn->op == IR_PARAM || insn->op == IR_VAR || insn->op == IR_PI) {
539 k = _next[k];
540 insn = &ctx->ir_base[k];
541 }
542 }
543
544 /* insert before "k" */
545 _prev[i] = _prev[k];
546 _next[i] = k;
547 _next[_prev[k]] = i;
548 _prev[k] = i;
549 }
550
551 #ifdef IR_DEBUG
552 if (ctx->flags & IR_DEBUG_SCHEDULE) {
553 fprintf(stderr, "Before Schedule\n");
554 for (i = 1; i != 0; i = _next[i]) {
555 fprintf(stderr, "%d -> %d\n", i, _blocks[i]);
556 }
557 }
558 #endif
559
560 _xlat = ir_mem_calloc((ctx->consts_count + ctx->insns_count), sizeof(ir_ref));
561 _xlat += ctx->consts_count;
562 _xlat[IR_TRUE] = IR_TRUE;
563 _xlat[IR_FALSE] = IR_FALSE;
564 _xlat[IR_NULL] = IR_NULL;
565 _xlat[IR_UNUSED] = IR_UNUSED;
566 insns_count = 1;
567 consts_count = -(IR_TRUE - 1);
568
569 /* Topological sort according dependencies inside each basic block */
570 for (b = 1, bb = ctx->cfg_blocks + 1; b <= ctx->cfg_blocks_count; b++, bb++) {
571 IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
572 /* Schedule BB start */
573 i = bb->start;
574 _xlat[i] = bb->start = insns_count;
575 insn = &ctx->ir_base[i];
576 if (insn->op == IR_CASE_VAL) {
577 IR_ASSERT(insn->op2 < IR_TRUE);
578 consts_count += ir_count_constant(_xlat, insn->op2);
579 }
580 n = insn->inputs_count;
581 insns_count += ir_insn_inputs_to_len(n);
582 i = _next[i];
583 insn = &ctx->ir_base[i];
584 if (bb->flags & (IR_BB_HAS_PHI|IR_BB_HAS_PI|IR_BB_HAS_PARAM|IR_BB_HAS_VAR)) {
585 /* Schedule PARAM, VAR, PI */
586 while (insn->op == IR_PARAM || insn->op == IR_VAR || insn->op == IR_PI) {
587 _xlat[i] = insns_count;
588 insns_count += 1;
589 i = _next[i];
590 insn = &ctx->ir_base[i];
591 }
592 /* Schedule PHIs */
593 while (insn->op == IR_PHI) {
594 ir_ref j, *p, input;
595
596 _xlat[i] = insns_count;
597 /* Reuse "n" from MERGE and skip first input */
598 insns_count += ir_insn_inputs_to_len(n + 1);
599 for (j = n, p = insn->ops + 2; j > 0; p++, j--) {
600 input = *p;
601 if (input < IR_TRUE) {
602 consts_count += ir_count_constant(_xlat, input);
603 }
604 }
605 i = _next[i];
606 insn = &ctx->ir_base[i];
607 }
608 }
609 while (i != bb->end) {
610 ir_ref n, j, *p, input;
611
612 restart:
613 n = insn->inputs_count;
614 for (j = n, p = insn->ops + 1; j > 0; p++, j--) {
615 input = *p;
616 if (!_xlat[input]) {
617 /* input is not scheduled yet */
618 if (input > 0) {
619 if (_blocks[input] == b) {
620 /* "input" should be before "i" to satisfy dependency */
621 #ifdef IR_DEBUG
622 if (ctx->flags & IR_DEBUG_SCHEDULE) {
623 fprintf(stderr, "Wrong dependency %d:%d -> %d\n", b, input, i);
624 }
625 #endif
626 /* remove "input" */
627 _prev[_next[input]] = _prev[input];
628 _next[_prev[input]] = _next[input];
629 /* insert before "i" */
630 _prev[input] = _prev[i];
631 _next[input] = i;
632 _next[_prev[i]] = input;
633 _prev[i] = input;
634 /* restart from "input" */
635 i = input;
636 insn = &ctx->ir_base[i];
637 goto restart;
638 }
639 } else if (input < IR_TRUE) {
640 consts_count += ir_count_constant(_xlat, input);
641 }
642 }
643 }
644 _xlat[i] = insns_count;
645 insns_count += ir_insn_inputs_to_len(n);
646 i = _next[i];
647 insn = &ctx->ir_base[i];
648 }
649 /* Schedule BB end */
650 _xlat[i] = bb->end = insns_count;
651 insns_count++;
652 if (IR_INPUT_EDGES_COUNT(ir_op_flags[insn->op]) == 2) {
653 if (insn->op2 < IR_TRUE) {
654 consts_count += ir_count_constant(_xlat, insn->op2);
655 }
656 }
657 }
658
659 #ifdef IR_DEBUG
660 if (ctx->flags & IR_DEBUG_SCHEDULE) {
661 fprintf(stderr, "After Schedule\n");
662 for (i = 1; i != 0; i = _next[i]) {
663 fprintf(stderr, "%d -> %d\n", i, _blocks[i]);
664 }
665 }
666 #endif
667
668 #if 1
669 /* Check if scheduling didn't make any modifications */
670 if (consts_count == ctx->consts_count && insns_count == ctx->insns_count) {
671 bool changed = 0;
672
673 for (i = 1; i != 0; i = _next[i]) {
674 if (_xlat[i] != i) {
675 changed = 1;
676 break;
677 }
678 }
679 if (!changed) {
680 _xlat -= ctx->consts_count;
681 ir_mem_free(_xlat);
682 ir_mem_free(_next);
683
684 ctx->prev_ref = _prev;
685 ctx->flags2 |= IR_LINEAR;
686 ir_truncate(ctx);
687
688 return 1;
689 }
690 }
691 #endif
692
693 ir_mem_free(_prev);
694
695 ir_init(&new_ctx, ctx->flags, consts_count, insns_count);
696 new_ctx.insns_count = insns_count;
697 new_ctx.flags2 = ctx->flags2;
698 new_ctx.ret_type = ctx->ret_type;
699 new_ctx.mflags = ctx->mflags;
700 new_ctx.spill_base = ctx->spill_base;
701 new_ctx.fixed_stack_red_zone = ctx->fixed_stack_red_zone;
702 new_ctx.fixed_stack_frame_size = ctx->fixed_stack_frame_size;
703 new_ctx.fixed_call_stack_size = ctx->fixed_call_stack_size;
704 new_ctx.fixed_regset = ctx->fixed_regset;
705 new_ctx.fixed_save_regset = ctx->fixed_save_regset;
706 new_ctx.entries_count = ctx->entries_count;
707 #if defined(IR_TARGET_AARCH64)
708 new_ctx.deoptimization_exits = ctx->deoptimization_exits;
709 new_ctx.get_exit_addr = ctx->get_exit_addr;
710 new_ctx.get_veneer = ctx->get_veneer;
711 new_ctx.set_veneer = ctx->set_veneer;
712 #endif
713 new_ctx.loader = ctx->loader;
714
715 /* Copy constants */
716 if (consts_count == ctx->consts_count) {
717 new_ctx.consts_count = consts_count;
718 ref = 1 - consts_count;
719 insn = &ctx->ir_base[ref];
720 new_insn = &new_ctx.ir_base[ref];
721
722 memcpy(new_insn, insn, sizeof(ir_insn) * (IR_TRUE - ref));
723 if (ctx->strtab.data) {
724 while (ref != IR_TRUE) {
725 if (new_insn->op == IR_FUNC_ADDR) {
726 if (new_insn->proto) {
727 size_t len;
728 const char *proto = ir_get_strl(ctx, new_insn->proto, &len);
729 new_insn->proto = ir_strl(&new_ctx, proto, len);
730 }
731 } else if (new_insn->op == IR_FUNC) {
732 new_insn->val.u64 = ir_str(&new_ctx, ir_get_str(ctx, new_insn->val.name));
733 if (new_insn->proto) {
734 size_t len;
735 const char *proto = ir_get_strl(ctx, new_insn->proto, &len);
736 new_insn->proto = ir_strl(&new_ctx, proto, len);
737 }
738 } else if (new_insn->op == IR_SYM || new_insn->op == IR_STR) {
739 new_insn->val.u64 = ir_str(&new_ctx, ir_get_str(ctx, new_insn->val.name));
740 }
741 new_insn++;
742 ref++;
743 }
744 }
745 } else {
746 new_ref = -new_ctx.consts_count;
747 new_insn = &new_ctx.ir_base[new_ref];
748 for (ref = IR_TRUE - 1, insn = &ctx->ir_base[ref]; ref > -ctx->consts_count; insn--, ref--) {
749 if (!_xlat[ref]) {
750 continue;
751 }
752 new_insn->optx = insn->optx;
753 new_insn->prev_const = 0;
754 if (insn->op == IR_FUNC_ADDR) {
755 new_insn->val.u64 = insn->val.u64;
756 if (insn->proto) {
757 size_t len;
758 const char *proto = ir_get_strl(ctx, insn->proto, &len);
759 new_insn->proto = ir_strl(&new_ctx, proto, len);
760 } else {
761 new_insn->proto = 0;
762 }
763 } else if (insn->op == IR_FUNC) {
764 new_insn->val.u64 = ir_str(&new_ctx, ir_get_str(ctx, insn->val.name));
765 if (insn->proto) {
766 size_t len;
767 const char *proto = ir_get_strl(ctx, insn->proto, &len);
768 new_insn->proto = ir_strl(&new_ctx, proto, len);
769 } else {
770 new_insn->proto = 0;
771 }
772 } else if (insn->op == IR_SYM || insn->op == IR_STR) {
773 new_insn->val.u64 = ir_str(&new_ctx, ir_get_str(ctx, insn->val.name));
774 } else {
775 new_insn->val.u64 = insn->val.u64;
776 }
777 _xlat[ref] = new_ref;
778 new_ref--;
779 new_insn--;
780 }
781 new_ctx.consts_count = -new_ref;
782 }
783
784 new_ctx.cfg_map = ir_mem_calloc(ctx->insns_count, sizeof(uint32_t));
785 new_ctx.prev_ref = _prev = ir_mem_malloc(insns_count * sizeof(ir_ref));
786 new_ctx.use_lists = lists = ir_mem_malloc(insns_count * sizeof(ir_use_list));
787 new_ctx.use_edges = edges = ir_mem_malloc(ctx->use_edges_count * sizeof(ir_ref));
788
789 /* Copy instructions, use lists and use edges */
790 prev_ref = 0;
791 use_edges_count = 0;
792 for (i = 1; i != 0; i = _next[i]) {
793 new_ref = _xlat[i];
794 new_ctx.cfg_map[new_ref] = _blocks[i];
795 _prev[new_ref] = prev_ref;
796 prev_ref = new_ref;
797
798 use_list = &ctx->use_lists[i];
799 n = use_list->count;
800 k = 0;
801 if (n == 1) {
802 ref = ctx->use_edges[use_list->refs];
803 if (_xlat[ref]) {
804 *edges = _xlat[ref];
805 edges++;
806 k = 1;
807 }
808 } else {
809 p = &ctx->use_edges[use_list->refs];
810 while (n--) {
811 ref = *p;
812 if (_xlat[ref]) {
813 *edges = _xlat[ref];
814 edges++;
815 k++;
816 }
817 p++;
818 }
819 }
820 new_list = &lists[new_ref];
821 new_list->refs = use_edges_count;
822 use_edges_count += k;
823 new_list->count = k;
824
825 insn = &ctx->ir_base[i];
826 new_insn = &new_ctx.ir_base[new_ref];
827
828 new_insn->optx = insn->optx;
829 n = new_insn->inputs_count;
830 switch (n) {
831 case 0:
832 new_insn->op1 = insn->op1;
833 new_insn->op2 = insn->op2;
834 new_insn->op3 = insn->op3;
835 break;
836 case 1:
837 new_insn->op1 = _xlat[insn->op1];
838 if (new_insn->op == IR_PARAM || insn->op == IR_VAR) {
839 new_insn->op2 = ir_str(&new_ctx, ir_get_str(ctx, insn->op2));
840 } else if (new_insn->op == IR_PROTO) {
841 size_t len;
842 const char *proto = ir_get_strl(ctx, insn->op2, &len);
843 new_insn->op2 = ir_strl(&new_ctx, proto, len);
844 } else {
845 new_insn->op2 = insn->op2;
846 }
847 new_insn->op3 = insn->op3;
848 break;
849 case 2:
850 new_insn->op1 = _xlat[insn->op1];
851 new_insn->op2 = _xlat[insn->op2];
852 new_insn->op3 = insn->op3;
853 break;
854 case 3:
855 new_insn->op1 = _xlat[insn->op1];
856 new_insn->op2 = _xlat[insn->op2];
857 new_insn->op3 = _xlat[insn->op3];
858 break;
859 default:
860 for (j = n, p = insn->ops + 1, q = new_insn->ops + 1; j > 0; p++, q++, j--) {
861 *q = _xlat[*p];
862 }
863 break;
864 }
865 }
866
867 /* Update list of terminators (IR_OPND_CONTROL_REF) */
868 insn = &new_ctx.ir_base[1];
869 ref = insn->op1;
870 if (ref) {
871 insn->op1 = ref = _xlat[ref];
872 while (1) {
873 insn = &new_ctx.ir_base[ref];
874 ref = insn->op3;
875 if (!ref) {
876 break;
877 }
878 insn->op3 = ref = _xlat[ref];
879 }
880 }
881
882 IR_ASSERT(ctx->use_edges_count >= use_edges_count);
883 new_ctx.use_edges_count = use_edges_count;
884 new_ctx.use_edges = ir_mem_realloc(new_ctx.use_edges, use_edges_count * sizeof(ir_ref));
885
886 if (ctx->binding) {
887 ir_xlat_binding(ctx, _xlat);
888 new_ctx.binding = ctx->binding;
889 ctx->binding = NULL;
890 }
891
892 _xlat -= ctx->consts_count;
893 ir_mem_free(_xlat);
894
895 new_ctx.cfg_blocks_count = ctx->cfg_blocks_count;
896 new_ctx.cfg_edges_count = ctx->cfg_edges_count;
897 new_ctx.cfg_blocks = ctx->cfg_blocks;
898 new_ctx.cfg_edges = ctx->cfg_edges;
899 ctx->cfg_blocks = NULL;
900 ctx->cfg_edges = NULL;
901
902 ir_free(ctx);
903 IR_ASSERT(new_ctx.consts_count == new_ctx.consts_limit);
904 IR_ASSERT(new_ctx.insns_count == new_ctx.insns_limit);
905 memcpy(ctx, &new_ctx, sizeof(ir_ctx));
906 ctx->flags2 |= IR_LINEAR;
907
908 ir_mem_free(_next);
909
910 return 1;
911 }
912
ir_build_prev_refs(ir_ctx * ctx)913 void ir_build_prev_refs(ir_ctx *ctx)
914 {
915 uint32_t b;
916 ir_block *bb;
917 ir_ref i, n, prev;
918 ir_insn *insn;
919
920 ctx->prev_ref = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref));
921 prev = 0;
922 for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) {
923 IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
924 for (i = bb->start, insn = ctx->ir_base + i; i < bb->end;) {
925 ctx->prev_ref[i] = prev;
926 n = ir_insn_len(insn);
927 prev = i;
928 i += n;
929 insn += n;
930 }
931 ctx->prev_ref[i] = prev;
932 }
933 }
934