xref: /php-src/ext/opcache/jit/ir/ir_gcm.c (revision bad5d2c7)
1 /*
2  * IR - Lightweight JIT Compilation Framework
3  * (GCM - Global Code Motion and Scheduler)
4  * Copyright (C) 2022 Zend by Perforce.
5  * Authors: Dmitry Stogov <dmitry@php.net>
6  *
7  * The GCM algorithm is based on Cliff Click's publication
8  * See: C. Click. "Global code motion, global value numbering" Submitted to PLDI'95.
9  */
10 
11 #include "ir.h"
12 #include "ir_private.h"
13 
14 #define IR_GCM_IS_SCHEDULED_EARLY(b) (((int32_t)(b)) < 0)
15 #define IR_GCM_EARLY_BLOCK(b)        ((uint32_t)-((int32_t)(b)))
16 
17 #define IR_GCM_SPLIT 1
18 #define IR_SCHEDULE_SWAP_OPS 1
19 
ir_gcm_schedule_early(ir_ctx * ctx,ir_ref ref,ir_list * queue_late)20 static uint32_t ir_gcm_schedule_early(ir_ctx *ctx, ir_ref ref, ir_list *queue_late)
21 {
22 	ir_ref n, *p, input;
23 	ir_insn *insn;
24 	uint32_t dom_depth;
25 	uint32_t b, result;
26 
27 	insn = &ctx->ir_base[ref];
28 
29 	IR_ASSERT(insn->op != IR_PARAM && insn->op != IR_VAR);
30 	IR_ASSERT(insn->op != IR_PHI && insn->op != IR_PI);
31 
32 	result = 1;
33 	dom_depth = 0;
34 
35 	n = insn->inputs_count;
36 	for (p = insn->ops + 1; n > 0; p++, n--) {
37 		input = *p;
38 		if (input > 0) {
39 			b = ctx->cfg_map[input];
40 			if (IR_GCM_IS_SCHEDULED_EARLY(b)) {
41 				b = IR_GCM_EARLY_BLOCK(b);
42 			} else if (!b) {
43 				b = ir_gcm_schedule_early(ctx, input, queue_late);
44 			}
45 			if (dom_depth < ctx->cfg_blocks[b].dom_depth) {
46 				dom_depth = ctx->cfg_blocks[b].dom_depth;
47 				result = b;
48 			}
49 		}
50 	}
51 
52 	ctx->cfg_map[ref] = IR_GCM_EARLY_BLOCK(result);
53 	ir_list_push_unchecked(queue_late, ref);
54 	return result;
55 }
56 
57 /* Last Common Ancestor */
ir_gcm_find_lca(ir_ctx * ctx,uint32_t b1,uint32_t b2)58 static uint32_t ir_gcm_find_lca(ir_ctx *ctx, uint32_t b1, uint32_t b2)
59 {
60 	uint32_t dom_depth;
61 
62 	dom_depth = ctx->cfg_blocks[b2].dom_depth;
63 	while (ctx->cfg_blocks[b1].dom_depth > dom_depth) {
64 		b1 = ctx->cfg_blocks[b1].dom_parent;
65 	}
66 	dom_depth = ctx->cfg_blocks[b1].dom_depth;
67 	while (ctx->cfg_blocks[b2].dom_depth > dom_depth) {
68 		b2 = ctx->cfg_blocks[b2].dom_parent;
69 	}
70 	while (b1 != b2) {
71 		b1 = ctx->cfg_blocks[b1].dom_parent;
72 		b2 = ctx->cfg_blocks[b2].dom_parent;
73 	}
74 	return b2;
75 }
76 
ir_gcm_select_best_block(ir_ctx * ctx,ir_ref ref,uint32_t lca)77 static uint32_t ir_gcm_select_best_block(ir_ctx *ctx, ir_ref ref, uint32_t lca)
78 {
79 	ir_block *bb = &ctx->cfg_blocks[lca];
80 	uint32_t loop_depth = bb->loop_depth;
81 	uint32_t flags, best, b;
82 
83 	if (!loop_depth) {
84 		return lca;
85 	}
86 
87 	if (ctx->ir_base[ref].op >= IR_EQ && ctx->ir_base[ref].op <= IR_UGT) {
88 		ir_use_list *use_list = &ctx->use_lists[ref];
89 
90 		if (use_list->count == 1) {
91 			ir_ref use = ctx->use_edges[use_list->refs];
92 			ir_insn *insn = &ctx->ir_base[use];
93 			if (insn->op == IR_IF || insn->op == IR_GUARD || insn->op == IR_GUARD_NOT) {
94 				/* Don't hoist invariant comparison */
95 				return lca;
96 			}
97 		}
98 	}
99 
100 	flags = (bb->flags & IR_BB_LOOP_HEADER) ? bb->flags : ctx->cfg_blocks[bb->loop_header].flags;
101 	if ((flags & IR_BB_LOOP_WITH_ENTRY)
102 	 && !(ctx->binding && ir_binding_find(ctx, ref))) {
103 		/* Don't move loop invariant code across an OSR ENTRY if we can't restore it */
104 		return lca;
105 	}
106 
107 	best = b = lca;
108 	do {
109 		b = bb->dom_parent;
110 		bb = &ctx->cfg_blocks[b];
111 		if (bb->loop_depth < loop_depth) {
112 			if (!bb->loop_depth) {
113 				best = b;
114 				break;
115 			}
116 			flags = (bb->flags & IR_BB_LOOP_HEADER) ? bb->flags : ctx->cfg_blocks[bb->loop_header].flags;
117 			if ((flags & IR_BB_LOOP_WITH_ENTRY)
118 			 && !(ctx->binding && ir_binding_find(ctx, ref))) {
119 				break;
120 			}
121 			loop_depth = bb->loop_depth;
122 			best = b;
123 		}
124 	} while (b != ctx->cfg_map[ref]);
125 
126 	return best;
127 }
128 
129 #if IR_GCM_SPLIT
130 /* Partially Dead Code Elimination through splitting the node and sunking the clones
131  *
132  * This code is based on the Benedikt Meurer's idea first implemented in V8.
133  * See: https://codereview.chromium.org/899433005
134  */
135 
136 typedef struct _ir_gcm_split_data {
137 	ir_sparse_set totally_useful;
138 	ir_list       worklist;
139 } ir_gcm_split_data;
140 
_push_predecessors(ir_ctx * ctx,ir_block * bb,ir_gcm_split_data * data)141 static void _push_predecessors(ir_ctx *ctx, ir_block *bb, ir_gcm_split_data *data)
142 {
143 	uint32_t *p, i, n = bb->predecessors_count;
144 
145 	IR_ASSERT(n > 0);
146 	p = ctx->cfg_edges + bb->predecessors;
147 	do {
148 		i = *p;
149 		if (!ir_sparse_set_in(&data->totally_useful, i)) {
150 			ir_list_push(&data->worklist, i);
151 		}
152 		p++;
153 		n--;
154 	} while (n > 0);
155 }
156 
_check_successors(ir_ctx * ctx,ir_block * bb,ir_gcm_split_data * data)157 static bool _check_successors(ir_ctx *ctx, ir_block *bb, ir_gcm_split_data *data)
158 {
159 	uint32_t *p, i, n = bb->successors_count;
160 
161 	if (n <= 1) {
162 		IR_ASSERT(ir_sparse_set_in(&data->totally_useful, ctx->cfg_edges[bb->successors]));
163 		return 1;
164 	}
165 
166 	p = ctx->cfg_edges + bb->successors;
167 	do {
168 		i = *p;
169 		if (!ir_sparse_set_in(&data->totally_useful, i)) {
170 			return 0;
171 		}
172 		p++;
173 		n--;
174 	} while (n > 0);
175 
176 	return 1;
177 }
178 
ir_split_partially_dead_node(ir_ctx * ctx,ir_ref ref,uint32_t b)179 static bool ir_split_partially_dead_node(ir_ctx *ctx, ir_ref ref, uint32_t b)
180 {
181 	ir_use_list *use_list;
182 	ir_insn *insn;
183 	ir_ref n, *p, use;
184 	uint32_t i;
185 	ir_gcm_split_data *data = ctx->data;
186 
187 	IR_ASSERT(b > 0 && b <= ctx->cfg_blocks_count);
188 
189 	/* 1. Find a set of blocks where the node is TOTALLY_USEFUL (not PARTIALLY_DEAD)
190 	 * 1.1. Collect the blocks where the node is really USED.
191 	 */
192 	ir_sparse_set_clear(&data->totally_useful);
193 
194 	use_list = &ctx->use_lists[ref];
195 	n = use_list->count;
196 	for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
197 		use = *p;
198 		insn = &ctx->ir_base[use];
199 		if (insn->op == IR_PHI) {
200 			ir_ref *p = insn->ops + 2; /* PHI data inputs */
201 			ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */
202 			ir_ref n = insn->inputs_count - 1;
203 
204 			for (;n > 0; p++, q++, n--) {
205 				if (*p == ref) {
206 					i = ctx->cfg_map[*q];
207 					IR_ASSERT(i > 0 && i <= ctx->cfg_blocks_count);
208 					if (!ir_sparse_set_in(&data->totally_useful, i)) {
209 						if (i == b) return 0; /* node is totally-useful in the scheduled block */
210 						ir_sparse_set_add(&data->totally_useful, i);
211 					}
212 				}
213 			}
214 		} else {
215 			i = ctx->cfg_map[use];
216 			if (!i) {
217 				continue;
218 			}
219 			IR_ASSERT(i > 0 && i <= ctx->cfg_blocks_count);
220 			if (!ir_sparse_set_in(&data->totally_useful, i)) {
221 				if (i == b) return 0; /* node is totally-useful in the scheduled block */
222 				ir_sparse_set_add(&data->totally_useful, i);
223 			}
224 		}
225 	}
226 
227 #ifdef IR_DEBUG
228 	if (ctx->flags & IR_DEBUG_GCM_SPLIT) {
229 		bool first = 1;
230 		fprintf(stderr, "*** Split partially dead node d_%d scheduled to BB%d\n", ref, b);
231 		IR_SPARSE_SET_FOREACH(&data->totally_useful, i) {
232 			if (first) {
233 				fprintf(stderr, "\td_%d is USED in [BB%d", ref, i);
234 				first = 0;
235 			} else {
236 				fprintf(stderr, ", BB%d", i);
237 			}
238 		} IR_SPARSE_SET_FOREACH_END();
239 		fprintf(stderr, "]\n");
240 	}
241 #endif
242 
243 	/* 1.2. Iteratively check the predecessors of already found TOTALLY_USEFUL blocks and
244 	 *      add them into TOTALLY_USEFUL set if all of their sucessors are already there.
245 	 */
246 	IR_SPARSE_SET_FOREACH(&data->totally_useful, i) {
247 		_push_predecessors(ctx, &ctx->cfg_blocks[i], data);
248 	} IR_SPARSE_SET_FOREACH_END();
249 
250 	while (ir_list_len(&data->worklist)) {
251 		i = ir_list_pop(&data->worklist);
252 		if (!ir_sparse_set_in(&data->totally_useful, i)) {
253 			ir_block *bb = &ctx->cfg_blocks[i];
254 
255 			if (_check_successors(ctx, bb, data)) {
256 				if (i == b) {
257 					/* node is TOTALLY_USEFUL in the scheduled block */
258 					ir_list_clear(&data->worklist);
259 					return 0;
260 				}
261 				ir_sparse_set_add(&data->totally_useful, i);
262 				_push_predecessors(ctx, bb, data);
263 			}
264 		}
265 	}
266 
267 	IR_ASSERT(!ir_sparse_set_in(&data->totally_useful, b));
268 
269 #ifdef IR_DEBUG
270 	if (ctx->flags & IR_DEBUG_GCM_SPLIT) {
271 		bool first = 1;
272 		IR_SPARSE_SET_FOREACH(&data->totally_useful, i) {
273 			if (first) {
274 				fprintf(stderr, "\td_%d is TOTALLY_USEFUL in [BB%d", ref, i);
275 				first = 0;
276 			} else {
277 				fprintf(stderr, ", BB%d", i);
278 			}
279 		} IR_SPARSE_SET_FOREACH_END();
280 		fprintf(stderr, "]\n");
281 	}
282 #endif
283 
284 	/* 2. Split the USEs into partitions */
285 	use_list = &ctx->use_lists[ref];
286 	ir_hashtab hash;
287 	uint32_t j, clone, clones_count = 0, uses_count = 0;
288 	struct {
289 		ir_ref   ref;
290 		uint32_t block;
291 		uint32_t use_count;
292 		uint32_t use;
293 	} *clones = ir_mem_malloc(sizeof(*clones) * use_list->count);
294 	struct {
295 		ir_ref   ref;
296 		uint32_t block;
297 		uint32_t next;
298 	} *uses = ir_mem_malloc(sizeof(*uses) * use_list->count);
299 
300 	ir_hashtab_init(&hash, use_list->count);
301 	n = use_list->count;
302 	for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
303 		use = *p;
304 		insn = &ctx->ir_base[use];
305 		if (insn->op == IR_PHI) {
306 			ir_ref *p = insn->ops + 2; /* PHI data inputs */
307 			ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */
308 			ir_ref n = insn->inputs_count - 1;
309 
310 			/* PHIs must be processed once */
311 			if (ir_hashtab_find(&hash, -use) != (ir_ref)IR_INVALID_VAL) {
312 				continue;
313 			}
314 			ir_hashtab_add(&hash, -use, IR_NULL);
315 			for (;n > 0; p++, q++, n--) {
316 				if (*p == ref) {
317 					j = i = ctx->cfg_map[*q];
318 					while (ir_sparse_set_in(&data->totally_useful, ctx->cfg_blocks[j].idom)) {
319 						j = ctx->cfg_blocks[j].idom;
320 					}
321 					clone = ir_hashtab_find(&hash, j);
322 					if (clone == IR_INVALID_VAL) {
323 						clone = clones_count++;
324 						ir_hashtab_add(&hash, j, clone);
325 						clones[clone].block = j;
326 						clones[clone].use_count = 0;
327 						clones[clone].use = (uint32_t)-1;
328 					}
329 					uses[uses_count].ref = use;
330 					uses[uses_count].block = i;
331 					uses[uses_count].next = clones[clone].use;
332 					clones[clone].use_count++;
333 					clones[clone].use = uses_count++;
334 				}
335 			}
336 		} else {
337 			j = i = ctx->cfg_map[use];
338 			IR_ASSERT(i > 0);
339 			while (ir_sparse_set_in(&data->totally_useful, ctx->cfg_blocks[j].idom)) {
340 				j = ctx->cfg_blocks[j].idom;
341 			}
342 			clone = ir_hashtab_find(&hash, j);
343 			if (clone == IR_INVALID_VAL) {
344 				clone = clones_count++;
345 				ir_hashtab_add(&hash, j, clone);
346 				clones[clone].block = j;
347 				clones[clone].use_count = 0;
348 				clones[clone].use = -1;
349 			}
350 			uses[uses_count].ref = use;
351 			uses[uses_count].block = i;
352 			uses[uses_count].next = clones[clone].use;
353 			clones[clone].use_count++;
354 			clones[clone].use = uses_count++;
355 		}
356 	}
357 
358 #ifdef IR_DEBUG
359 	if (ctx->flags & IR_DEBUG_GCM_SPLIT) {
360 		for (i = 0; i < clones_count; i++) {
361 			uint32_t u = clones[i].use;
362 
363 			fprintf(stderr, "\tCLONE #%d in BB%d USES(%d)=[d_%d/BB%d",
364 				i, clones[i].block, clones[i].use_count, uses[u].ref, uses[u].block);
365 			u = uses[u].next;
366 			while (u != (uint32_t)-1) {
367 				fprintf(stderr, ", d_%d/BB%d", uses[u].ref, uses[u].block);
368 				u = uses[u].next;
369 			}
370 			fprintf(stderr, "]\n");
371 		}
372 	}
373 #endif
374 
375 	/* Create Clones */
376 	insn = &ctx->ir_base[ref];
377 	clones[0].ref = ref;
378 	for (i = 1; i < clones_count; i++) {
379 		clones[i].ref = clone = ir_emit(ctx, insn->optx, insn->op1, insn->op2, insn->op3);
380 		insn = &ctx->ir_base[ref];
381 		if (insn->op1 > 0) ir_use_list_add(ctx, insn->op1, clone);
382 		if (insn->op2 > 0) ir_use_list_add(ctx, insn->op2, clone);
383 		if (insn->op3 > 0) ir_use_list_add(ctx, insn->op3, clone);
384 	}
385 
386 	/* Reconstruct IR: Update DEF->USE lists, CFG mapping and etc */
387 	ctx->use_lists = ir_mem_realloc(ctx->use_lists, ctx->insns_count * sizeof(ir_use_list));
388 	ctx->cfg_map = ir_mem_realloc(ctx->cfg_map, ctx->insns_count * sizeof(uint32_t));
389 	n = ctx->use_lists[ref].refs;
390 	for (i = 0; i < clones_count; i++) {
391 		clone = clones[i].ref;
392 		ctx->cfg_map[clone] = clones[i].block;
393 		ctx->use_lists[clone].count = clones[i].use_count;
394 		ctx->use_lists[clone].refs = n;
395 
396 		uint32_t u = clones[i].use;
397 		while (u != (uint32_t)-1) {
398 			use = uses[u].ref;
399 			ctx->use_edges[n++] = use;
400 			u = uses[u].next;
401 			if (i > 0) {
402 				/* replace inputs */
403 				ir_insn *insn = &ctx->ir_base[use];
404 				ir_ref k, l = insn->inputs_count;
405 
406 				for (k = 1; k <= l; k++) {
407 					if (ir_insn_op(insn, k) == ref) {
408 						if (insn->op == IR_PHI) {
409 							j = ctx->cfg_map[ir_insn_op(&ctx->ir_base[insn->op1], k - 1)];
410 							while (ir_sparse_set_in(&data->totally_useful, ctx->cfg_blocks[j].idom)) {
411 								j = ctx->cfg_blocks[j].idom;
412 							}
413 							if (j != clones[i].block) {
414 								continue;
415 							}
416 						}
417 						ir_insn_set_op(insn, k, clone);
418 						break;
419 					}
420 				}
421 			}
422 		}
423 	}
424 
425 	ir_mem_free(uses);
426 	ir_mem_free(clones);
427 	ir_hashtab_free(&hash);
428 
429 #ifdef IR_DEBUG
430 	if (ctx->flags & IR_DEBUG_GCM_SPLIT) {
431 		ir_check(ctx);
432 	}
433 #endif
434 
435 	return 1;
436 }
437 #endif
438 
ir_gcm_schedule_late(ir_ctx * ctx,ir_ref ref,uint32_t b)439 static void ir_gcm_schedule_late(ir_ctx *ctx, ir_ref ref, uint32_t b)
440 {
441 	ir_ref n, use;
442 	uint32_t lca = 0;
443 
444 	IR_ASSERT(ctx->ir_base[ref].op != IR_PARAM && ctx->ir_base[ref].op != IR_VAR);
445 	IR_ASSERT(ctx->ir_base[ref].op != IR_PHI && ctx->ir_base[ref].op != IR_PI);
446 
447 	IR_ASSERT(IR_GCM_IS_SCHEDULED_EARLY(b));
448 	b = IR_GCM_EARLY_BLOCK(b);
449 	ctx->cfg_map[ref] = b;
450 
451 	for (n = 0; n < ctx->use_lists[ref].count; n++) {
452 		use = ctx->use_edges[ctx->use_lists[ref].refs + n];
453 		b = ctx->cfg_map[use];
454 		if (IR_GCM_IS_SCHEDULED_EARLY(b)) {
455 			ir_gcm_schedule_late(ctx, use, b);
456 			b = ctx->cfg_map[use];
457 			IR_ASSERT(b != 0);
458 		} else if (!b) {
459 			continue;
460 		} else if (ctx->ir_base[use].op == IR_PHI) {
461 			ir_insn *insn = &ctx->ir_base[use];
462 			ir_ref *p = insn->ops + 2; /* PHI data inputs */
463 			ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */
464 			ir_ref n = insn->inputs_count - 1;
465 
466 			for (;n > 0; p++, q++, n--) {
467 				if (*p == ref) {
468 					b = ctx->cfg_map[*q];
469 					lca = !lca ? b : ir_gcm_find_lca(ctx, lca, b);
470 				}
471 			}
472 			continue;
473 		}
474 		lca = !lca ? b : ir_gcm_find_lca(ctx, lca, b);
475 	}
476 
477 	IR_ASSERT(lca != 0 && "No Common Ancestor");
478 
479 #if IR_GCM_SPLIT
480 	if (ctx->use_lists[ref].count > 1
481 	 && ir_split_partially_dead_node(ctx, ref, lca)) {
482 		return;
483 	}
484 #endif
485 
486 	if (lca != ctx->cfg_map[ref]) {
487 		b = ir_gcm_select_best_block(ctx, ref, lca);
488 
489 		ctx->cfg_map[ref] = b;
490 		if (ctx->ir_base[ref + 1].op == IR_OVERFLOW) {
491 			/* OVERFLOW is a projection and must be scheduled together with previous ADD/SUB/MUL_OV */
492 			ctx->cfg_map[ref + 1] = b;
493 		}
494 	}
495 }
496 
ir_gcm(ir_ctx * ctx)497 int ir_gcm(ir_ctx *ctx)
498 {
499 	ir_ref k, n, *p, ref;
500 	ir_block *bb;
501 	ir_list queue_early;
502 	ir_list queue_late;
503 	uint32_t *_blocks, b;
504 	ir_insn *insn, *use_insn;
505 	ir_use_list *use_list;
506 
507 	IR_ASSERT(ctx->cfg_map);
508 	_blocks = ctx->cfg_map;
509 
510 	ir_list_init(&queue_early, ctx->insns_count);
511 
512 	if (ctx->cfg_blocks_count == 1) {
513 		ref = ctx->cfg_blocks[1].end;
514 		do {
515 			insn = &ctx->ir_base[ref];
516 			_blocks[ref] = 1; /* pin to block */
517 			if (insn->inputs_count > 1) {
518 				/* insn has input data edges */
519 				ir_list_push_unchecked(&queue_early, ref);
520 			}
521 			ref = insn->op1; /* control predecessor */
522 		} while (ref != 1); /* IR_START */
523 		_blocks[1] = 1; /* pin to block */
524 
525 		use_list = &ctx->use_lists[1];
526 		n = use_list->count;
527 		for (p = &ctx->use_edges[use_list->refs]; n > 0; n--, p++) {
528 			ref = *p;
529 			use_insn = &ctx->ir_base[ref];
530 			if (use_insn->op == IR_PARAM || use_insn->op == IR_VAR) {
531 				ctx->cfg_blocks[1].flags |= (use_insn->op == IR_PARAM) ? IR_BB_HAS_PARAM : IR_BB_HAS_VAR;
532 				_blocks[ref] = 1; /* pin to block */
533 			}
534 		}
535 
536 		/* Place all live nodes to the first block */
537 		while (ir_list_len(&queue_early)) {
538 			ref = ir_list_pop(&queue_early);
539 			insn = &ctx->ir_base[ref];
540 			n = insn->inputs_count;
541 			for (p = insn->ops + 1; n > 0; p++, n--) {
542 				ref = *p;
543 				if (ref > 0 && _blocks[ref] == 0) {
544 					_blocks[ref] = 1;
545 					ir_list_push_unchecked(&queue_early, ref);
546 				}
547 			}
548 		}
549 
550 		ir_list_free(&queue_early);
551 
552 		return 1;
553 	}
554 
555 	ir_list_init(&queue_late, ctx->insns_count);
556 
557 	/* pin and collect control and control depended (PARAM, VAR, PHI, PI) instructions */
558 	b = ctx->cfg_blocks_count;
559 	for (bb = ctx->cfg_blocks + b; b > 0; bb--, b--) {
560 		IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
561 		ref = bb->end;
562 
563 		/* process the last instruction of the block */
564 		insn = &ctx->ir_base[ref];
565 		_blocks[ref] = b; /* pin to block */
566 		if (insn->inputs_count > 1) {
567 			/* insn has input data edges */
568 			ir_list_push_unchecked(&queue_early, ref);
569 		}
570 		ref = insn->op1; /* control predecessor */
571 
572 		while (ref != bb->start) {
573 			insn = &ctx->ir_base[ref];
574 			_blocks[ref] = b; /* pin to block */
575 			if (insn->inputs_count > 1) {
576 				/* insn has input data edges */
577 				ir_list_push_unchecked(&queue_early, ref);
578 			}
579 			if (insn->type != IR_VOID) {
580 				IR_ASSERT(ir_op_flags[insn->op] & IR_OP_FLAG_MEM);
581 			}
582 			ref = insn->op1; /* control predecessor */
583 		}
584 
585 		/* process the first instruction of the block */
586 		_blocks[ref] = b; /* pin to block */
587 
588 		use_list = &ctx->use_lists[ref];
589 		n = use_list->count;
590 		if (n > 1) {
591 			for (p = &ctx->use_edges[use_list->refs]; n > 0; n--, p++) {
592 				ref = *p;
593 				use_insn = &ctx->ir_base[ref];
594 				if (use_insn->op == IR_PHI || use_insn->op == IR_PI) {
595 					bb->flags |= (use_insn->op == IR_PHI) ? IR_BB_HAS_PHI : IR_BB_HAS_PI;
596 					if (EXPECTED(ctx->use_lists[ref].count != 0)) {
597 						_blocks[ref] = b; /* pin to block */
598 						ir_list_push_unchecked(&queue_early, ref);
599 					}
600 				} else if (use_insn->op == IR_PARAM) {
601 					bb->flags |= IR_BB_HAS_PARAM;
602 					_blocks[ref] = b; /* pin to block */
603 				} else if (use_insn->op == IR_VAR) {
604 					bb->flags |= IR_BB_HAS_VAR;
605 					_blocks[ref] = b; /* pin to block */
606 				}
607 			}
608 		}
609 	}
610 
611 	n = ir_list_len(&queue_early);
612 	while (n > 0) {
613 		n--;
614 		ref = ir_list_at(&queue_early, n);
615 		insn = &ctx->ir_base[ref];
616 		k = insn->inputs_count - 1;
617 		for (p = insn->ops + 2; k > 0; p++, k--) {
618 			ref = *p;
619 			if (ref > 0 && _blocks[ref] == 0) {
620 				ir_gcm_schedule_early(ctx, ref, &queue_late);
621 			}
622 		}
623 	}
624 
625 #ifdef IR_DEBUG
626 	if (ctx->flags & IR_DEBUG_GCM) {
627 		fprintf(stderr, "GCM Schedule Early\n");
628 		for (n = 1; n < ctx->insns_count; n++) {
629 			fprintf(stderr, "%d -> %d\n", n, ctx->cfg_map[n]);
630 		}
631 	}
632 #endif
633 
634 #if IR_GCM_SPLIT
635 	ir_gcm_split_data data;
636 
637 	ir_sparse_set_init(&data.totally_useful, ctx->cfg_blocks_count + 1);
638 	ir_list_init(&data.worklist, ctx->cfg_blocks_count + 1);
639 	ctx->data = &data;
640 #endif
641 
642 	n = ir_list_len(&queue_late);
643 	while (n > 0) {
644 		n--;
645 		ref = ir_list_at(&queue_late, n);
646 		b = ctx->cfg_map[ref];
647 		if (IR_GCM_IS_SCHEDULED_EARLY(b)) {
648 			ir_gcm_schedule_late(ctx, ref, b);
649 		}
650 	}
651 
652 #if IR_GCM_SPLIT
653 	ir_list_free(&data.worklist);
654 	ir_sparse_set_free(&data.totally_useful);
655 	ctx->data = NULL;
656 #endif
657 
658 	ir_list_free(&queue_early);
659 	ir_list_free(&queue_late);
660 
661 #ifdef IR_DEBUG
662 	if (ctx->flags & IR_DEBUG_GCM) {
663 		fprintf(stderr, "GCM Schedule Late\n");
664 		for (n = 1; n < ctx->insns_count; n++) {
665 			fprintf(stderr, "%d -> %d\n", n, ctx->cfg_map[n]);
666 		}
667 	}
668 #endif
669 
670 	return 1;
671 }
672 
ir_xlat_binding(ir_ctx * ctx,ir_ref * _xlat)673 static void ir_xlat_binding(ir_ctx *ctx, ir_ref *_xlat)
674 {
675 	uint32_t n1, n2, pos;
676 	ir_ref key;
677 	ir_hashtab_bucket *b1, *b2;
678 	ir_hashtab *binding = ctx->binding;
679 	uint32_t hash_size = (uint32_t)(-(int32_t)binding->mask);
680 
681 	memset((char*)binding->data - (hash_size * sizeof(uint32_t)), -1, hash_size * sizeof(uint32_t));
682 	n1 = binding->count;
683 	n2 = 0;
684 	pos = 0;
685 	b1 = binding->data;
686 	b2 = binding->data;
687 	while (n1 > 0) {
688 		key = b1->key;
689 		IR_ASSERT(key < ctx->insns_count);
690 		if (_xlat[key]) {
691 			key = _xlat[key];
692 			b2->key = key;
693 			if (b1->val > 0) {
694 				IR_ASSERT(_xlat[b1->val]);
695 				b2->val = _xlat[b1->val];
696 			} else {
697 				b2->val = b1->val;
698 			}
699 			key |= binding->mask;
700 			b2->next = ((uint32_t*)binding->data)[key];
701 			((uint32_t*)binding->data)[key] = pos;
702 			pos += sizeof(ir_hashtab_bucket);
703 			b2++;
704 			n2++;
705 		}
706 		b1++;
707 		n1--;
708 	}
709 	binding->count = n2;
710 }
711 
ir_count_constant(ir_ref * _xlat,ir_ref ref)712 IR_ALWAYS_INLINE ir_ref ir_count_constant(ir_ref *_xlat, ir_ref ref)
713 {
714 	if (!_xlat[ref]) {
715 		_xlat[ref] = ref; /* this is only a "used constant" marker */
716 		return 1;
717 	}
718 	return 0;
719 }
720 
ir_schedule(ir_ctx * ctx)721 int ir_schedule(ir_ctx *ctx)
722 {
723 	ir_ctx new_ctx;
724 	ir_ref i, j, k, n, *p, *q, ref, new_ref, prev_ref, insns_count, consts_count, use_edges_count;
725 	ir_ref *_xlat;
726 	ir_ref *edges;
727 	ir_ref prev_b_end;
728 	uint32_t b, prev_b;
729 	uint32_t *_blocks = ctx->cfg_map;
730 	ir_ref *_next = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref));
731 	ir_ref *_prev = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref));
732 	ir_ref _move_down = 0;
733 	ir_block *bb;
734 	ir_insn *insn, *new_insn;
735 	ir_use_list *lists, *use_list, *new_list;
736 
737 	/* Create a double-linked list of nodes ordered by BB, respecting BB->start and BB->end */
738 	IR_ASSERT(_blocks[1] == 1);
739 	prev_b = 1;
740 	prev_b_end = ctx->cfg_blocks[1].end;
741 	_prev[1] = 0;
742 	_prev[prev_b_end] = 0;
743 	for (i = 2, j = 1; i < ctx->insns_count; i++) {
744 		b = _blocks[i];
745 		IR_ASSERT((int32_t)b >= 0);
746 		if (b == prev_b && i <= prev_b_end) {
747 			/* add to the end of the list */
748 			_next[j] = i;
749 			_prev[i] = j;
750 			j = i;
751 		} else if (b > prev_b) {
752 			bb = &ctx->cfg_blocks[b];
753 			if (i == bb->start) {
754 				IR_ASSERT(bb->end > bb->start);
755 				prev_b = b;
756 				prev_b_end = bb->end;
757 				_prev[bb->end] = 0;
758 				/* add to the end of the list */
759 				_next[j] = i;
760 				_prev[i] = j;
761 				j = i;
762 			} else {
763 				IR_ASSERT(i != bb->end);
764 				/* move down late (see the following loop) */
765 				_next[i] = _move_down;
766 				_move_down = i;
767 			}
768 		} else if (b) {
769 			bb = &ctx->cfg_blocks[b];
770 			IR_ASSERT(i != bb->start);
771 			if (_prev[bb->end]) {
772 				/* move up, insert before the end of the already scheduled BB */
773 				k = bb->end;
774 			} else {
775 				/* move up, insert at the end of the block */
776 				k = ctx->cfg_blocks[b + 1].start;
777 			}
778 			/* insert before "k" */
779 			_prev[i] = _prev[k];
780 			_next[i] = k;
781 			_next[_prev[k]] = i;
782 			_prev[k] = i;
783 		}
784 	}
785 	_next[j] = 0;
786 
787 	while (_move_down) {
788 		i = _move_down;
789 		_move_down = _next[i];
790 		b = _blocks[i];
791 		bb = &ctx->cfg_blocks[b];
792 		k = _next[bb->start];
793 
794 		if (bb->flags & (IR_BB_HAS_PHI|IR_BB_HAS_PI|IR_BB_HAS_PARAM|IR_BB_HAS_VAR)) {
795 			/* insert after the start of the block and all PARAM, VAR, PI, PHI */
796 			insn = &ctx->ir_base[k];
797 			while (insn->op == IR_PHI || insn->op == IR_PARAM || insn->op == IR_VAR || insn->op == IR_PI) {
798 				k = _next[k];
799 				insn = &ctx->ir_base[k];
800 			}
801 		}
802 
803 		/* insert before "k" */
804 		_prev[i] = _prev[k];
805 		_next[i] = k;
806 		_next[_prev[k]] = i;
807 		_prev[k] = i;
808 	}
809 
810 #ifdef IR_DEBUG
811 	if (ctx->flags & IR_DEBUG_SCHEDULE) {
812 		fprintf(stderr, "Before Schedule\n");
813 		for (i = 1; i != 0; i = _next[i]) {
814 			fprintf(stderr, "%d -> %d\n", i, _blocks[i]);
815 		}
816 	}
817 #endif
818 
819 	_xlat = ir_mem_calloc((ctx->consts_count + ctx->insns_count), sizeof(ir_ref));
820 	_xlat += ctx->consts_count;
821 	_xlat[IR_TRUE] = IR_TRUE;
822 	_xlat[IR_FALSE] = IR_FALSE;
823 	_xlat[IR_NULL] = IR_NULL;
824 	_xlat[IR_UNUSED] = IR_UNUSED;
825 	insns_count = 1;
826 	consts_count = -(IR_TRUE - 1);
827 
828 	/* Topological sort according dependencies inside each basic block */
829 	for (b = 1, bb = ctx->cfg_blocks + 1; b <= ctx->cfg_blocks_count; b++, bb++) {
830 		IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
831 		/* Schedule BB start */
832 		i = bb->start;
833 		_xlat[i] = bb->start = insns_count;
834 		insn = &ctx->ir_base[i];
835 		if (insn->op == IR_CASE_VAL) {
836 			IR_ASSERT(insn->op2 < IR_TRUE);
837 			consts_count += ir_count_constant(_xlat, insn->op2);
838 		}
839 		n = insn->inputs_count;
840 		insns_count += ir_insn_inputs_to_len(n);
841 		i = _next[i];
842 		insn = &ctx->ir_base[i];
843 		if (bb->flags & (IR_BB_HAS_PHI|IR_BB_HAS_PI|IR_BB_HAS_PARAM|IR_BB_HAS_VAR)) {
844 			/* Schedule PARAM, VAR, PI */
845 			while (insn->op == IR_PARAM || insn->op == IR_VAR || insn->op == IR_PI) {
846 				_xlat[i] = insns_count;
847 				insns_count += 1;
848 				i = _next[i];
849 				insn = &ctx->ir_base[i];
850 			}
851 			/* Schedule PHIs */
852 			while (insn->op == IR_PHI) {
853 				ir_ref j, *p, input;
854 
855 				_xlat[i] = insns_count;
856 				/* Reuse "n" from MERGE and skip first input */
857 				insns_count += ir_insn_inputs_to_len(n + 1);
858 				for (j = n, p = insn->ops + 2; j > 0; p++, j--) {
859 					input = *p;
860 					if (input < IR_TRUE) {
861 						consts_count += ir_count_constant(_xlat, input);
862 					}
863 				}
864 				i = _next[i];
865 				insn = &ctx->ir_base[i];
866 			}
867 		}
868 		if (bb->successors_count > 1) {
869 			ir_ref input, j = bb->end;
870 			ir_insn *end = &ctx->ir_base[j];
871 
872 			if (end->op == IR_IF) {
873 				/* Move condition closer to IF */
874 				input = end->op2;
875 				if (input > 0 && _blocks[input] == b && !_xlat[input] && _prev[j] != input) {
876 					if (input == i) {
877 						i = _next[i];
878 						insn = &ctx->ir_base[i];
879 					}
880 					/* remove "input" */
881 					_prev[_next[input]] = _prev[input];
882 					_next[_prev[input]] = _next[input];
883 					/* insert before "j" */
884 					_prev[input] = _prev[j];
885 					_next[input] = j;
886 					_next[_prev[j]] = input;
887 					_prev[j] = input;
888 				}
889 			}
890 		}
891 		while (i != bb->end) {
892 			ir_ref n, j, *p, input;
893 
894 restart:
895 			n = insn->inputs_count;
896 			for (j = n, p = insn->ops + 1; j > 0; p++, j--) {
897 				input = *p;
898 				if (!_xlat[input]) {
899 					/* input is not scheduled yet */
900 					if (input > 0) {
901 						if (_blocks[input] == b) {
902 							/* "input" should be before "i" to satisfy dependency */
903 #ifdef IR_DEBUG
904 							if (ctx->flags & IR_DEBUG_SCHEDULE) {
905 								fprintf(stderr, "Wrong dependency %d:%d -> %d\n", b, input, i);
906 							}
907 #endif
908 							/* remove "input" */
909 							_prev[_next[input]] = _prev[input];
910 							_next[_prev[input]] = _next[input];
911 							/* insert before "i" */
912 							_prev[input] = _prev[i];
913 							_next[input] = i;
914 							_next[_prev[i]] = input;
915 							_prev[i] = input;
916 							/* restart from "input" */
917 							i = input;
918 							insn = &ctx->ir_base[i];
919 							goto restart;
920 						}
921 					} else if (input < IR_TRUE) {
922 						consts_count += ir_count_constant(_xlat, input);
923 					}
924 				}
925 			}
926 			_xlat[i] = insns_count;
927 			insns_count += ir_insn_inputs_to_len(n);
928 			i = _next[i];
929 			insn = &ctx->ir_base[i];
930 		}
931 		/* Schedule BB end */
932 		_xlat[i] = bb->end = insns_count;
933 		insns_count++;
934 		if (IR_INPUT_EDGES_COUNT(ir_op_flags[insn->op]) == 2) {
935 			if (insn->op2 < IR_TRUE) {
936 				consts_count += ir_count_constant(_xlat, insn->op2);
937 			}
938 		}
939 	}
940 
941 #ifdef IR_DEBUG
942 	if (ctx->flags & IR_DEBUG_SCHEDULE) {
943 		fprintf(stderr, "After Schedule\n");
944 		for (i = 1; i != 0; i = _next[i]) {
945 			fprintf(stderr, "%d -> %d\n", i, _blocks[i]);
946 		}
947 	}
948 #endif
949 
950 #if 1
951 	/* Check if scheduling didn't make any modifications */
952 	if (consts_count == ctx->consts_count && insns_count == ctx->insns_count) {
953 		bool changed = 0;
954 
955 		for (i = 1; i != 0; i = _next[i]) {
956 			if (_xlat[i] != i) {
957 				changed = 1;
958 				break;
959 			}
960 		}
961 		if (!changed) {
962 			_xlat -= ctx->consts_count;
963 			ir_mem_free(_xlat);
964 			ir_mem_free(_next);
965 
966 			ctx->prev_ref = _prev;
967 			ctx->flags2 |= IR_LINEAR;
968 			ir_truncate(ctx);
969 
970 			return 1;
971 		}
972 	}
973 #endif
974 
975 	ir_mem_free(_prev);
976 
977 	ir_init(&new_ctx, ctx->flags, consts_count, insns_count);
978 	new_ctx.insns_count = insns_count;
979 	new_ctx.flags2 = ctx->flags2;
980 	new_ctx.ret_type = ctx->ret_type;
981 	new_ctx.mflags = ctx->mflags;
982 	new_ctx.spill_base = ctx->spill_base;
983 	new_ctx.fixed_stack_red_zone = ctx->fixed_stack_red_zone;
984 	new_ctx.fixed_stack_frame_size = ctx->fixed_stack_frame_size;
985 	new_ctx.fixed_call_stack_size = ctx->fixed_call_stack_size;
986 	new_ctx.fixed_regset = ctx->fixed_regset;
987 	new_ctx.fixed_save_regset = ctx->fixed_save_regset;
988 	new_ctx.entries_count = ctx->entries_count;
989 #if defined(IR_TARGET_AARCH64)
990 	new_ctx.deoptimization_exits = ctx->deoptimization_exits;
991 	new_ctx.get_exit_addr = ctx->get_exit_addr;
992 	new_ctx.get_veneer = ctx->get_veneer;
993 	new_ctx.set_veneer = ctx->set_veneer;
994 #endif
995 	new_ctx.loader = ctx->loader;
996 
997 	/* Copy constants */
998 	if (consts_count == ctx->consts_count) {
999 		new_ctx.consts_count = consts_count;
1000 		ref = 1 - consts_count;
1001 		insn = &ctx->ir_base[ref];
1002 		new_insn = &new_ctx.ir_base[ref];
1003 
1004 		memcpy(new_insn, insn, sizeof(ir_insn) * (IR_TRUE - ref));
1005 		if (ctx->strtab.data) {
1006 			while (ref != IR_TRUE) {
1007 				if (new_insn->op == IR_FUNC_ADDR) {
1008 					if (new_insn->proto) {
1009 						size_t len;
1010 						const char *proto = ir_get_strl(ctx, new_insn->proto, &len);
1011 						new_insn->proto = ir_strl(&new_ctx, proto, len);
1012 					}
1013 				} else if (new_insn->op == IR_FUNC) {
1014 					new_insn->val.u64 = ir_str(&new_ctx, ir_get_str(ctx, new_insn->val.name));
1015 					if (new_insn->proto) {
1016 						size_t len;
1017 						const char *proto = ir_get_strl(ctx, new_insn->proto, &len);
1018 						new_insn->proto = ir_strl(&new_ctx, proto, len);
1019 					}
1020 				} else if (new_insn->op == IR_SYM || new_insn->op == IR_STR) {
1021 					new_insn->val.u64 = ir_str(&new_ctx, ir_get_str(ctx, new_insn->val.name));
1022 				}
1023 				new_insn++;
1024 				ref++;
1025 			}
1026 		}
1027 	} else {
1028 		new_ref = -new_ctx.consts_count;
1029 		new_insn = &new_ctx.ir_base[new_ref];
1030 		for (ref = IR_TRUE - 1, insn = &ctx->ir_base[ref]; ref > -ctx->consts_count; insn--, ref--) {
1031 			if (!_xlat[ref]) {
1032 				continue;
1033 			}
1034 			new_insn->optx = insn->optx;
1035 			new_insn->prev_const = 0;
1036 			if (insn->op == IR_FUNC_ADDR) {
1037 				new_insn->val.u64 = insn->val.u64;
1038 				if (insn->proto) {
1039 					size_t len;
1040 					const char *proto = ir_get_strl(ctx, insn->proto, &len);
1041 					new_insn->proto = ir_strl(&new_ctx, proto, len);
1042 				} else {
1043 					new_insn->proto = 0;
1044 				}
1045 			} else if (insn->op == IR_FUNC) {
1046 				new_insn->val.u64 = ir_str(&new_ctx, ir_get_str(ctx, insn->val.name));
1047 				if (insn->proto) {
1048 					size_t len;
1049 					const char *proto = ir_get_strl(ctx, insn->proto, &len);
1050 					new_insn->proto = ir_strl(&new_ctx, proto, len);
1051 				} else {
1052 					new_insn->proto = 0;
1053 				}
1054 			} else if (insn->op == IR_SYM || insn->op == IR_STR) {
1055 				new_insn->val.u64 = ir_str(&new_ctx, ir_get_str(ctx, insn->val.name));
1056 			} else {
1057 				new_insn->val.u64 = insn->val.u64;
1058 			}
1059 			_xlat[ref] = new_ref;
1060 			new_ref--;
1061 			new_insn--;
1062 		}
1063 		new_ctx.consts_count = -new_ref;
1064 	}
1065 
1066 	new_ctx.cfg_map = ir_mem_calloc(ctx->insns_count, sizeof(uint32_t));
1067 	new_ctx.prev_ref = _prev = ir_mem_malloc(insns_count * sizeof(ir_ref));
1068 	new_ctx.use_lists = lists = ir_mem_malloc(insns_count * sizeof(ir_use_list));
1069 	new_ctx.use_edges = edges = ir_mem_malloc(ctx->use_edges_count * sizeof(ir_ref));
1070 
1071 	/* Copy instructions, use lists and use edges */
1072 	prev_ref = 0;
1073 	use_edges_count = 0;
1074 	for (i = 1; i != 0; i = _next[i]) {
1075 		new_ref = _xlat[i];
1076 		new_ctx.cfg_map[new_ref] = _blocks[i];
1077 		_prev[new_ref] = prev_ref;
1078 		prev_ref = new_ref;
1079 
1080 		use_list = &ctx->use_lists[i];
1081 		n = use_list->count;
1082 		k = 0;
1083 		if (n == 1) {
1084 			ref = ctx->use_edges[use_list->refs];
1085 			if (_xlat[ref]) {
1086 				*edges = _xlat[ref];
1087 				edges++;
1088 				k = 1;
1089 			}
1090 		} else {
1091 			p = &ctx->use_edges[use_list->refs];
1092 			while (n--) {
1093 				ref = *p;
1094 				if (_xlat[ref]) {
1095 					*edges = _xlat[ref];
1096 					edges++;
1097 					k++;
1098 				}
1099 				p++;
1100 			}
1101 		}
1102 		new_list = &lists[new_ref];
1103 		new_list->refs = use_edges_count;
1104 		use_edges_count += k;
1105 		new_list->count = k;
1106 
1107 		insn = &ctx->ir_base[i];
1108 		new_insn = &new_ctx.ir_base[new_ref];
1109 
1110 		new_insn->optx = insn->optx;
1111 		n = new_insn->inputs_count;
1112 		switch (n) {
1113 			case 0:
1114 				new_insn->op1 = insn->op1;
1115 				new_insn->op2 = insn->op2;
1116 				new_insn->op3 = insn->op3;
1117 				break;
1118 			case 1:
1119 				new_insn->op1 = _xlat[insn->op1];
1120 				if (new_insn->op == IR_PARAM || insn->op == IR_VAR) {
1121 					new_insn->op2 = ir_str(&new_ctx, ir_get_str(ctx, insn->op2));
1122 				} else if (new_insn->op == IR_PROTO) {
1123 					size_t len;
1124 					const char *proto = ir_get_strl(ctx, insn->op2, &len);
1125 					new_insn->op2 = ir_strl(&new_ctx, proto, len);
1126 				} else {
1127 					new_insn->op2 = insn->op2;
1128 				}
1129 				new_insn->op3 = insn->op3;
1130 				break;
1131 			case 2:
1132 				new_insn->op1 = _xlat[insn->op1];
1133 				new_insn->op2 = _xlat[insn->op2];
1134 				new_insn->op3 = insn->op3;
1135 #if IR_SCHEDULE_SWAP_OPS
1136 				/* Swap operands according to folding rules */
1137 				if (new_insn->op1 < new_insn->op2) {
1138 					switch (new_insn->op) {
1139 						case IR_EQ:
1140 						case IR_NE:
1141 						case IR_ADD:
1142 						case IR_MUL:
1143 						case IR_ADD_OV:
1144 						case IR_MUL_OV:
1145 						case IR_OR:
1146 						case IR_AND:
1147 						case IR_XOR:
1148 						case IR_MIN:
1149 						case IR_MAX:
1150 							SWAP_REFS(new_insn->op1, new_insn->op2);
1151 							break;
1152 						case IR_LT:
1153 						case IR_GE:
1154 						case IR_LE:
1155 						case IR_GT:
1156 						case IR_ULT:
1157 						case IR_UGE:
1158 						case IR_ULE:
1159 						case IR_UGT:
1160 							SWAP_REFS(new_insn->op1, new_insn->op2);
1161 							new_insn->op ^= 3; /* [U]LT <-> [U]GT, [U]LE <-> [U]GE */
1162 							break;
1163 					}
1164 				}
1165 #endif
1166 				break;
1167 			case 3:
1168 				new_insn->op1 = _xlat[insn->op1];
1169 				new_insn->op2 = _xlat[insn->op2];
1170 				new_insn->op3 = _xlat[insn->op3];
1171 				break;
1172 			default:
1173 				for (j = n, p = insn->ops + 1, q = new_insn->ops + 1; j > 0; p++, q++, j--) {
1174 					*q = _xlat[*p];
1175 				}
1176 				break;
1177 		}
1178 	}
1179 
1180 	/* Update list of terminators (IR_OPND_CONTROL_REF) */
1181 	insn = &new_ctx.ir_base[1];
1182 	ref = insn->op1;
1183 	if (ref) {
1184 		insn->op1 = ref = _xlat[ref];
1185 		while (1) {
1186 			insn = &new_ctx.ir_base[ref];
1187 			ref = insn->op3;
1188 			if (!ref) {
1189 				break;
1190 			}
1191 			insn->op3 = ref = _xlat[ref];
1192 		}
1193 	}
1194 
1195 	IR_ASSERT(ctx->use_edges_count >= use_edges_count);
1196 	new_ctx.use_edges_count = use_edges_count;
1197 	new_ctx.use_edges = ir_mem_realloc(new_ctx.use_edges, use_edges_count * sizeof(ir_ref));
1198 
1199 	if (ctx->binding) {
1200 		ir_xlat_binding(ctx, _xlat);
1201 		new_ctx.binding = ctx->binding;
1202 		ctx->binding = NULL;
1203 	}
1204 
1205 	_xlat -= ctx->consts_count;
1206 	ir_mem_free(_xlat);
1207 
1208 	new_ctx.cfg_blocks_count = ctx->cfg_blocks_count;
1209 	new_ctx.cfg_edges_count = ctx->cfg_edges_count;
1210 	new_ctx.cfg_blocks = ctx->cfg_blocks;
1211 	new_ctx.cfg_edges = ctx->cfg_edges;
1212 	ctx->cfg_blocks = NULL;
1213 	ctx->cfg_edges = NULL;
1214 
1215 	ir_free(ctx);
1216 	IR_ASSERT(new_ctx.consts_count == new_ctx.consts_limit);
1217 	IR_ASSERT(new_ctx.insns_count == new_ctx.insns_limit);
1218 	memcpy(ctx, &new_ctx, sizeof(ir_ctx));
1219 	ctx->flags2 |= IR_LINEAR;
1220 
1221 	ir_mem_free(_next);
1222 
1223 	return 1;
1224 }
1225 
ir_build_prev_refs(ir_ctx * ctx)1226 void ir_build_prev_refs(ir_ctx *ctx)
1227 {
1228 	uint32_t b;
1229 	ir_block *bb;
1230 	ir_ref i, n, prev;
1231 	ir_insn *insn;
1232 
1233 	ctx->prev_ref = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref));
1234 	prev = 0;
1235 	for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) {
1236 		IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
1237 		for (i = bb->start, insn = ctx->ir_base + i; i < bb->end;) {
1238 			ctx->prev_ref[i] = prev;
1239 			n = ir_insn_len(insn);
1240 			prev = i;
1241 			i += n;
1242 			insn += n;
1243 		}
1244 		ctx->prev_ref[i] = prev;
1245 	}
1246 }
1247