xref: /php-src/ext/opcache/jit/ir/ir_gcm.c (revision 93c20de2)
1 /*
2  * IR - Lightweight JIT Compilation Framework
3  * (GCM - Global Code Motion and Scheduler)
4  * Copyright (C) 2022 Zend by Perforce.
5  * Authors: Dmitry Stogov <dmitry@php.net>
6  *
7  * The GCM algorithm is based on Cliff Click's publication
8  * See: C. Click. "Global code motion, global value numbering" Submitted to PLDI'95.
9  */
10 
11 #include "ir.h"
12 #include "ir_private.h"
13 
14 #define IR_GCM_IS_SCHEDULED_EARLY(b) (((int32_t)(b)) < 0)
15 #define IR_GCM_EARLY_BLOCK(b)        ((uint32_t)-((int32_t)(b)))
16 
17 #define IR_GCM_SPLIT 1
18 #define IR_SCHEDULE_SWAP_OPS 1
19 
ir_gcm_schedule_early(ir_ctx * ctx,ir_ref ref,ir_list * queue_late)20 static uint32_t ir_gcm_schedule_early(ir_ctx *ctx, ir_ref ref, ir_list *queue_late)
21 {
22 	ir_ref n, *p, input;
23 	ir_insn *insn;
24 	uint32_t dom_depth;
25 	uint32_t b, result;
26 
27 	insn = &ctx->ir_base[ref];
28 
29 	IR_ASSERT(insn->op != IR_PARAM && insn->op != IR_VAR);
30 	IR_ASSERT(insn->op != IR_PHI && insn->op != IR_PI);
31 
32 	result = 1;
33 	dom_depth = 0;
34 
35 	n = insn->inputs_count;
36 	for (p = insn->ops + 1; n > 0; p++, n--) {
37 		input = *p;
38 		if (input > 0) {
39 			b = ctx->cfg_map[input];
40 			if (IR_GCM_IS_SCHEDULED_EARLY(b)) {
41 				b = IR_GCM_EARLY_BLOCK(b);
42 			} else if (!b) {
43 				b = ir_gcm_schedule_early(ctx, input, queue_late);
44 			}
45 			if (dom_depth < ctx->cfg_blocks[b].dom_depth) {
46 				dom_depth = ctx->cfg_blocks[b].dom_depth;
47 				result = b;
48 			}
49 		}
50 	}
51 
52 	ctx->cfg_map[ref] = IR_GCM_EARLY_BLOCK(result);
53 	ir_list_push_unchecked(queue_late, ref);
54 	return result;
55 }
56 
57 /* Last Common Ancestor */
ir_gcm_find_lca(ir_ctx * ctx,uint32_t b1,uint32_t b2)58 static uint32_t ir_gcm_find_lca(ir_ctx *ctx, uint32_t b1, uint32_t b2)
59 {
60 	uint32_t dom_depth;
61 
62 	dom_depth = ctx->cfg_blocks[b2].dom_depth;
63 	while (ctx->cfg_blocks[b1].dom_depth > dom_depth) {
64 		b1 = ctx->cfg_blocks[b1].dom_parent;
65 	}
66 	dom_depth = ctx->cfg_blocks[b1].dom_depth;
67 	while (ctx->cfg_blocks[b2].dom_depth > dom_depth) {
68 		b2 = ctx->cfg_blocks[b2].dom_parent;
69 	}
70 	while (b1 != b2) {
71 		b1 = ctx->cfg_blocks[b1].dom_parent;
72 		b2 = ctx->cfg_blocks[b2].dom_parent;
73 	}
74 	return b2;
75 }
76 
ir_gcm_select_best_block(ir_ctx * ctx,ir_ref ref,uint32_t lca)77 static uint32_t ir_gcm_select_best_block(ir_ctx *ctx, ir_ref ref, uint32_t lca)
78 {
79 	ir_block *bb = &ctx->cfg_blocks[lca];
80 	uint32_t loop_depth = bb->loop_depth;
81 	uint32_t flags, best, b;
82 
83 	if (!loop_depth) {
84 		return lca;
85 	}
86 
87 #if 0 /* This is not necessary anymore. Conditions may be fused with IF across BBs. */
88 	if (ctx->ir_base[ref].op >= IR_EQ && ctx->ir_base[ref].op <= IR_UGT) {
89 		ir_use_list *use_list = &ctx->use_lists[ref];
90 
91 		if (use_list->count == 1) {
92 			ir_ref use = ctx->use_edges[use_list->refs];
93 			ir_insn *insn = &ctx->ir_base[use];
94 			if (insn->op == IR_IF || insn->op == IR_GUARD || insn->op == IR_GUARD_NOT) {
95 				/* Don't hoist invariant comparison */
96 				return lca;
97 			}
98 		}
99 	}
100 #endif
101 
102 	flags = (bb->flags & IR_BB_LOOP_HEADER) ? bb->flags : ctx->cfg_blocks[bb->loop_header].flags;
103 	if ((flags & IR_BB_LOOP_WITH_ENTRY)
104 	 && !(ctx->binding && ir_binding_find(ctx, ref))) {
105 		/* Don't move loop invariant code across an OSR ENTRY if we can't restore it */
106 		return lca;
107 	}
108 
109 	best = b = lca;
110 	do {
111 		b = bb->dom_parent;
112 		bb = &ctx->cfg_blocks[b];
113 		if (bb->loop_depth < loop_depth) {
114 			if (!bb->loop_depth) {
115 				best = b;
116 				break;
117 			}
118 			flags = (bb->flags & IR_BB_LOOP_HEADER) ? bb->flags : ctx->cfg_blocks[bb->loop_header].flags;
119 			if ((flags & IR_BB_LOOP_WITH_ENTRY)
120 			 && !(ctx->binding && ir_binding_find(ctx, ref))) {
121 				break;
122 			}
123 			loop_depth = bb->loop_depth;
124 			best = b;
125 		}
126 	} while (b != ctx->cfg_map[ref]);
127 
128 	return best;
129 }
130 
131 #if IR_GCM_SPLIT
132 /* Partially Dead Code Elimination through splitting the node and sunking the clones
133  *
134  * This code is based on the Benedikt Meurer's idea first implemented in V8.
135  * See: https://codereview.chromium.org/899433005
136  */
137 
138 typedef struct _ir_gcm_split_data {
139 	ir_sparse_set totally_useful;
140 	ir_list       worklist;
141 } ir_gcm_split_data;
142 
_push_predecessors(ir_ctx * ctx,ir_block * bb,ir_gcm_split_data * data)143 static void _push_predecessors(ir_ctx *ctx, ir_block *bb, ir_gcm_split_data *data)
144 {
145 	uint32_t *p, i, n = bb->predecessors_count;
146 
147 	IR_ASSERT(n > 0);
148 	p = ctx->cfg_edges + bb->predecessors;
149 	do {
150 		i = *p;
151 		if (!ir_sparse_set_in(&data->totally_useful, i)) {
152 			ir_list_push(&data->worklist, i);
153 		}
154 		p++;
155 		n--;
156 	} while (n > 0);
157 }
158 
_check_successors(ir_ctx * ctx,ir_block * bb,ir_gcm_split_data * data)159 static bool _check_successors(ir_ctx *ctx, ir_block *bb, ir_gcm_split_data *data)
160 {
161 	uint32_t *p, i, n = bb->successors_count;
162 
163 	if (n <= 1) {
164 		IR_ASSERT(ir_sparse_set_in(&data->totally_useful, ctx->cfg_edges[bb->successors]));
165 		return 1;
166 	}
167 
168 	p = ctx->cfg_edges + bb->successors;
169 	do {
170 		i = *p;
171 		if (!ir_sparse_set_in(&data->totally_useful, i)) {
172 			return 0;
173 		}
174 		p++;
175 		n--;
176 	} while (n > 0);
177 
178 	return 1;
179 }
180 
ir_split_partially_dead_node(ir_ctx * ctx,ir_ref ref,uint32_t b)181 static bool ir_split_partially_dead_node(ir_ctx *ctx, ir_ref ref, uint32_t b)
182 {
183 	ir_use_list *use_list;
184 	ir_insn *insn;
185 	ir_ref n, *p, use;
186 	uint32_t i;
187 	ir_gcm_split_data *data = ctx->data;
188 
189 	IR_ASSERT(b > 0 && b <= ctx->cfg_blocks_count);
190 
191 	/* 1. Find a set of blocks where the node is TOTALLY_USEFUL (not PARTIALLY_DEAD)
192 	 * 1.1. Collect the blocks where the node is really USED.
193 	 */
194 	ir_sparse_set_clear(&data->totally_useful);
195 
196 	use_list = &ctx->use_lists[ref];
197 	n = use_list->count;
198 	for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
199 		use = *p;
200 		insn = &ctx->ir_base[use];
201 		if (insn->op == IR_PHI) {
202 			ir_ref *p = insn->ops + 2; /* PHI data inputs */
203 			ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */
204 			ir_ref n = insn->inputs_count - 1;
205 
206 			for (;n > 0; p++, q++, n--) {
207 				if (*p == ref) {
208 					i = ctx->cfg_map[*q];
209 					IR_ASSERT(i > 0 && i <= ctx->cfg_blocks_count);
210 					if (!ir_sparse_set_in(&data->totally_useful, i)) {
211 						if (i == b) return 0; /* node is totally-useful in the scheduled block */
212 						ir_sparse_set_add(&data->totally_useful, i);
213 					}
214 				}
215 			}
216 		} else {
217 			i = ctx->cfg_map[use];
218 			if (!i) {
219 				continue;
220 			}
221 			IR_ASSERT(i > 0 && i <= ctx->cfg_blocks_count);
222 			if (!ir_sparse_set_in(&data->totally_useful, i)) {
223 				if (i == b) return 0; /* node is totally-useful in the scheduled block */
224 				ir_sparse_set_add(&data->totally_useful, i);
225 			}
226 		}
227 	}
228 
229 #ifdef IR_DEBUG
230 	if (ctx->flags & IR_DEBUG_GCM_SPLIT) {
231 		bool first = 1;
232 		fprintf(stderr, "*** Split partially dead node d_%d scheduled to BB%d\n", ref, b);
233 		IR_SPARSE_SET_FOREACH(&data->totally_useful, i) {
234 			if (first) {
235 				fprintf(stderr, "\td_%d is USED in [BB%d", ref, i);
236 				first = 0;
237 			} else {
238 				fprintf(stderr, ", BB%d", i);
239 			}
240 		} IR_SPARSE_SET_FOREACH_END();
241 		fprintf(stderr, "]\n");
242 	}
243 #endif
244 
245 	/* 1.2. Iteratively check the predecessors of already found TOTALLY_USEFUL blocks and
246 	 *      add them into TOTALLY_USEFUL set if all of their sucessors are already there.
247 	 */
248 	IR_SPARSE_SET_FOREACH(&data->totally_useful, i) {
249 		_push_predecessors(ctx, &ctx->cfg_blocks[i], data);
250 	} IR_SPARSE_SET_FOREACH_END();
251 
252 	while (ir_list_len(&data->worklist)) {
253 		i = ir_list_pop(&data->worklist);
254 		if (!ir_sparse_set_in(&data->totally_useful, i)) {
255 			ir_block *bb = &ctx->cfg_blocks[i];
256 
257 			if (_check_successors(ctx, bb, data)) {
258 				if (i == b) {
259 					/* node is TOTALLY_USEFUL in the scheduled block */
260 					ir_list_clear(&data->worklist);
261 					return 0;
262 				}
263 				ir_sparse_set_add(&data->totally_useful, i);
264 				_push_predecessors(ctx, bb, data);
265 			}
266 		}
267 	}
268 
269 	IR_ASSERT(!ir_sparse_set_in(&data->totally_useful, b));
270 
271 #ifdef IR_DEBUG
272 	if (ctx->flags & IR_DEBUG_GCM_SPLIT) {
273 		bool first = 1;
274 		IR_SPARSE_SET_FOREACH(&data->totally_useful, i) {
275 			if (first) {
276 				fprintf(stderr, "\td_%d is TOTALLY_USEFUL in [BB%d", ref, i);
277 				first = 0;
278 			} else {
279 				fprintf(stderr, ", BB%d", i);
280 			}
281 		} IR_SPARSE_SET_FOREACH_END();
282 		fprintf(stderr, "]\n");
283 	}
284 #endif
285 
286 	/* 2. Split the USEs into partitions */
287 	use_list = &ctx->use_lists[ref];
288 	ir_hashtab hash;
289 	uint32_t j, clone, clones_count = 0, uses_count = 0;
290 	struct {
291 		ir_ref   ref;
292 		uint32_t block;
293 		uint32_t use_count;
294 		uint32_t use;
295 	} *clones = ir_mem_malloc(sizeof(*clones) * use_list->count);
296 	struct {
297 		ir_ref   ref;
298 		uint32_t block;
299 		uint32_t next;
300 	} *uses = ir_mem_malloc(sizeof(*uses) * use_list->count);
301 
302 	ir_hashtab_init(&hash, use_list->count);
303 	n = use_list->count;
304 	for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
305 		use = *p;
306 		insn = &ctx->ir_base[use];
307 		if (insn->op == IR_PHI) {
308 			ir_ref *p = insn->ops + 2; /* PHI data inputs */
309 			ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */
310 			ir_ref n = insn->inputs_count - 1;
311 
312 			/* PHIs must be processed once */
313 			if (ir_hashtab_find(&hash, -use) != (ir_ref)IR_INVALID_VAL) {
314 				continue;
315 			}
316 			ir_hashtab_add(&hash, -use, IR_NULL);
317 			for (;n > 0; p++, q++, n--) {
318 				if (*p == ref) {
319 					j = i = ctx->cfg_map[*q];
320 					while (ir_sparse_set_in(&data->totally_useful, ctx->cfg_blocks[j].idom)) {
321 						j = ctx->cfg_blocks[j].idom;
322 					}
323 					clone = ir_hashtab_find(&hash, j);
324 					if (clone == IR_INVALID_VAL) {
325 						clone = clones_count++;
326 						ir_hashtab_add(&hash, j, clone);
327 						clones[clone].block = j;
328 						clones[clone].use_count = 0;
329 						clones[clone].use = (uint32_t)-1;
330 					}
331 					uses[uses_count].ref = use;
332 					uses[uses_count].block = i;
333 					uses[uses_count].next = clones[clone].use;
334 					clones[clone].use_count++;
335 					clones[clone].use = uses_count++;
336 				}
337 			}
338 		} else {
339 			j = i = ctx->cfg_map[use];
340 			IR_ASSERT(i > 0);
341 			while (ir_sparse_set_in(&data->totally_useful, ctx->cfg_blocks[j].idom)) {
342 				j = ctx->cfg_blocks[j].idom;
343 			}
344 			clone = ir_hashtab_find(&hash, j);
345 			if (clone == IR_INVALID_VAL) {
346 				clone = clones_count++;
347 				ir_hashtab_add(&hash, j, clone);
348 				clones[clone].block = j;
349 				clones[clone].use_count = 0;
350 				clones[clone].use = -1;
351 			}
352 			uses[uses_count].ref = use;
353 			uses[uses_count].block = i;
354 			uses[uses_count].next = clones[clone].use;
355 			clones[clone].use_count++;
356 			clones[clone].use = uses_count++;
357 		}
358 	}
359 
360 #ifdef IR_DEBUG
361 	if (ctx->flags & IR_DEBUG_GCM_SPLIT) {
362 		for (i = 0; i < clones_count; i++) {
363 			uint32_t u = clones[i].use;
364 
365 			fprintf(stderr, "\tCLONE #%d in BB%d USES(%d)=[d_%d/BB%d",
366 				i, clones[i].block, clones[i].use_count, uses[u].ref, uses[u].block);
367 			u = uses[u].next;
368 			while (u != (uint32_t)-1) {
369 				fprintf(stderr, ", d_%d/BB%d", uses[u].ref, uses[u].block);
370 				u = uses[u].next;
371 			}
372 			fprintf(stderr, "]\n");
373 		}
374 	}
375 #endif
376 
377 	/* Create Clones */
378 	insn = &ctx->ir_base[ref];
379 	clones[0].ref = ref;
380 	for (i = 1; i < clones_count; i++) {
381 		clones[i].ref = clone = ir_emit(ctx, insn->optx, insn->op1, insn->op2, insn->op3);
382 		insn = &ctx->ir_base[ref];
383 		if (insn->op1 > 0) ir_use_list_add(ctx, insn->op1, clone);
384 		if (insn->op2 > 0) ir_use_list_add(ctx, insn->op2, clone);
385 		if (insn->op3 > 0) ir_use_list_add(ctx, insn->op3, clone);
386 	}
387 
388 	/* Reconstruct IR: Update DEF->USE lists, CFG mapping and etc */
389 	ctx->use_lists = ir_mem_realloc(ctx->use_lists, ctx->insns_count * sizeof(ir_use_list));
390 	ctx->cfg_map = ir_mem_realloc(ctx->cfg_map, ctx->insns_count * sizeof(uint32_t));
391 	n = ctx->use_lists[ref].refs;
392 	for (i = 0; i < clones_count; i++) {
393 		clone = clones[i].ref;
394 		if (clones[i].use_count == 1) {
395 			/* TOTALLY_USEFUL block may be a head of a diamond above the real usage.
396 			 * Sink it down to the real usage block.
397 			 * Clones with few uses we be sunk into the LCA block.
398 			 */
399 			clones[i].block = uses[clones[i].use].block;
400 		}
401 		ctx->cfg_map[clone] = clones[i].block;
402 		ctx->use_lists[clone].count = clones[i].use_count;
403 		ctx->use_lists[clone].refs = n;
404 
405 		uint32_t u = clones[i].use;
406 		while (u != (uint32_t)-1) {
407 			use = uses[u].ref;
408 			ctx->use_edges[n++] = use;
409 			u = uses[u].next;
410 			if (i > 0) {
411 				/* replace inputs */
412 				ir_insn *insn = &ctx->ir_base[use];
413 				ir_ref k, l = insn->inputs_count;
414 
415 				if (insn->op == IR_PHI) {
416 					for (k = 1; k <= l; k++) {
417 						if (ir_insn_op(insn, k) == ref) {
418 							j = ctx->cfg_map[ir_insn_op(&ctx->ir_base[insn->op1], k - 1)];
419 							if (j != clones[i].block) {
420 								uint32_t dom_depth = ctx->cfg_blocks[clones[i].block].dom_depth;
421 								while (ctx->cfg_blocks[j].dom_depth > dom_depth) {
422 									j = ctx->cfg_blocks[j].dom_parent;
423 								}
424 								if (j != clones[i].block) {
425 									continue;
426 								}
427 							}
428 							ir_insn_set_op(insn, k, clone);
429 							break;
430 						}
431 					}
432 				} else {
433 					for (k = 1; k <= l; k++) {
434 						if (ir_insn_op(insn, k) == ref) {
435 							ir_insn_set_op(insn, k, clone);
436 							break;
437 						}
438 					}
439 				}
440 			}
441 		}
442 	}
443 
444 	ir_mem_free(uses);
445 	ir_mem_free(clones);
446 	ir_hashtab_free(&hash);
447 
448 #ifdef IR_DEBUG
449 	if (ctx->flags & IR_DEBUG_GCM_SPLIT) {
450 		ir_check(ctx);
451 	}
452 #endif
453 
454 	return 1;
455 }
456 #endif
457 
458 #ifdef IR_DEBUG
ir_gcm_dominates(ir_ctx * ctx,uint32_t b1,uint32_t b2)459 static bool ir_gcm_dominates(ir_ctx *ctx, uint32_t b1, uint32_t b2)
460 {
461 	uint32_t b1_depth = ctx->cfg_blocks[b1].dom_depth;
462 	const ir_block *bb2 = &ctx->cfg_blocks[b2];
463 
464 	while (bb2->dom_depth > b1_depth) {
465 		b2 = bb2->dom_parent;
466 		bb2 = &ctx->cfg_blocks[b2];
467 	}
468 	return b1 == b2;
469 }
470 #endif
471 
ir_gcm_schedule_late(ir_ctx * ctx,ir_ref ref,uint32_t b)472 static void ir_gcm_schedule_late(ir_ctx *ctx, ir_ref ref, uint32_t b)
473 {
474 	ir_ref n, use;
475 	uint32_t lca = 0;
476 
477 	IR_ASSERT(ctx->ir_base[ref].op != IR_PARAM && ctx->ir_base[ref].op != IR_VAR);
478 	IR_ASSERT(ctx->ir_base[ref].op != IR_PHI && ctx->ir_base[ref].op != IR_PI);
479 
480 	IR_ASSERT(IR_GCM_IS_SCHEDULED_EARLY(b));
481 	b = IR_GCM_EARLY_BLOCK(b);
482 	ctx->cfg_map[ref] = b;
483 
484 	for (n = 0; n < ctx->use_lists[ref].count; n++) {
485 		use = ctx->use_edges[ctx->use_lists[ref].refs + n];
486 		b = ctx->cfg_map[use];
487 		if (IR_GCM_IS_SCHEDULED_EARLY(b)) {
488 			ir_gcm_schedule_late(ctx, use, b);
489 			b = ctx->cfg_map[use];
490 			IR_ASSERT(b != 0);
491 		} else if (!b) {
492 			continue;
493 		} else if (ctx->ir_base[use].op == IR_PHI) {
494 			ir_insn *insn = &ctx->ir_base[use];
495 			ir_ref *p = insn->ops + 2; /* PHI data inputs */
496 			ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */
497 			ir_ref n = insn->inputs_count - 1;
498 
499 			for (;n > 0; p++, q++, n--) {
500 				if (*p == ref) {
501 					b = ctx->cfg_map[*q];
502 					lca = !lca ? b : ir_gcm_find_lca(ctx, lca, b);
503 				}
504 			}
505 			continue;
506 		}
507 		lca = !lca ? b : ir_gcm_find_lca(ctx, lca, b);
508 	}
509 
510 	IR_ASSERT(lca != 0 && "No Common Ancestor");
511 	IR_ASSERT(ir_gcm_dominates(ctx, ctx->cfg_map[ref], lca) && "Early placement doesn't dominate the late");
512 
513 #if IR_GCM_SPLIT
514 	if (ctx->use_lists[ref].count > 1
515 	 && ir_split_partially_dead_node(ctx, ref, lca)) {
516 		return;
517 	}
518 #endif
519 
520 	if (lca != ctx->cfg_map[ref]) {
521 		b = ir_gcm_select_best_block(ctx, ref, lca);
522 
523 		ctx->cfg_map[ref] = b;
524 
525 		/* OVERFLOW is a projection of ADD/SUB/MUL_OV and must be scheduled into the same block */
526 		if (ctx->ir_base[ref].op >= IR_ADD_OV && ctx->ir_base[ref].op <= IR_MUL_OV) {
527 			ir_use_list *use_list = &ctx->use_lists[ref];
528 			ir_ref n, *p, use;
529 
530 			for (n = use_list->count, p = &ctx->use_edges[use_list->refs]; n < 0; p++, n--) {
531 				use = *p;
532 				if (ctx->ir_base[use].op == IR_OVERFLOW) {
533 					ctx->cfg_map[use] = b;
534 					break;
535 				}
536 			}
537 		}
538 	}
539 }
540 
ir_gcm(ir_ctx * ctx)541 int ir_gcm(ir_ctx *ctx)
542 {
543 	ir_ref k, n, *p, ref;
544 	ir_block *bb;
545 	ir_list queue_early;
546 	ir_list queue_late;
547 	uint32_t *_blocks, b;
548 	ir_insn *insn, *use_insn;
549 	ir_use_list *use_list;
550 
551 	IR_ASSERT(ctx->cfg_map);
552 	_blocks = ctx->cfg_map;
553 
554 	ir_list_init(&queue_early, ctx->insns_count);
555 
556 	if (ctx->cfg_blocks_count == 1) {
557 		ref = ctx->cfg_blocks[1].end;
558 		do {
559 			insn = &ctx->ir_base[ref];
560 			_blocks[ref] = 1; /* pin to block */
561 			if (insn->inputs_count > 1) {
562 				/* insn has input data edges */
563 				ir_list_push_unchecked(&queue_early, ref);
564 			}
565 			ref = insn->op1; /* control predecessor */
566 		} while (ref != 1); /* IR_START */
567 		_blocks[1] = 1; /* pin to block */
568 
569 		use_list = &ctx->use_lists[1];
570 		n = use_list->count;
571 		for (p = &ctx->use_edges[use_list->refs]; n > 0; n--, p++) {
572 			ref = *p;
573 			use_insn = &ctx->ir_base[ref];
574 			if (use_insn->op == IR_PARAM || use_insn->op == IR_VAR) {
575 				ctx->cfg_blocks[1].flags |= (use_insn->op == IR_PARAM) ? IR_BB_HAS_PARAM : IR_BB_HAS_VAR;
576 				_blocks[ref] = 1; /* pin to block */
577 			}
578 		}
579 
580 		/* Place all live nodes to the first block */
581 		while (ir_list_len(&queue_early)) {
582 			ref = ir_list_pop(&queue_early);
583 			insn = &ctx->ir_base[ref];
584 			n = insn->inputs_count;
585 			for (p = insn->ops + 1; n > 0; p++, n--) {
586 				ref = *p;
587 				if (ref > 0 && _blocks[ref] == 0) {
588 					_blocks[ref] = 1;
589 					ir_list_push_unchecked(&queue_early, ref);
590 				}
591 			}
592 		}
593 
594 		ir_list_free(&queue_early);
595 
596 		return 1;
597 	}
598 
599 	ir_list_init(&queue_late, ctx->insns_count);
600 
601 	/* pin and collect control and control depended (PARAM, VAR, PHI, PI) instructions */
602 	b = ctx->cfg_blocks_count;
603 	for (bb = ctx->cfg_blocks + b; b > 0; bb--, b--) {
604 		IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
605 		ref = bb->end;
606 
607 		/* process the last instruction of the block */
608 		insn = &ctx->ir_base[ref];
609 		_blocks[ref] = b; /* pin to block */
610 		if (insn->inputs_count > 1) {
611 			/* insn has input data edges */
612 			ir_list_push_unchecked(&queue_early, ref);
613 		}
614 		ref = insn->op1; /* control predecessor */
615 
616 		while (ref != bb->start) {
617 			insn = &ctx->ir_base[ref];
618 			_blocks[ref] = b; /* pin to block */
619 			if (insn->inputs_count > 1) {
620 				/* insn has input data edges */
621 				ir_list_push_unchecked(&queue_early, ref);
622 			}
623 			if (insn->type != IR_VOID) {
624 				IR_ASSERT(ir_op_flags[insn->op] & IR_OP_FLAG_MEM);
625 			}
626 			ref = insn->op1; /* control predecessor */
627 		}
628 
629 		/* process the first instruction of the block */
630 		_blocks[ref] = b; /* pin to block */
631 
632 		use_list = &ctx->use_lists[ref];
633 		n = use_list->count;
634 		if (n > 1) {
635 			for (p = &ctx->use_edges[use_list->refs]; n > 0; n--, p++) {
636 				ref = *p;
637 				use_insn = &ctx->ir_base[ref];
638 				if (use_insn->op == IR_PHI || use_insn->op == IR_PI) {
639 					bb->flags |= (use_insn->op == IR_PHI) ? IR_BB_HAS_PHI : IR_BB_HAS_PI;
640 					if (EXPECTED(ctx->use_lists[ref].count != 0)) {
641 						_blocks[ref] = b; /* pin to block */
642 						ir_list_push_unchecked(&queue_early, ref);
643 					}
644 				} else if (use_insn->op == IR_PARAM) {
645 					bb->flags |= IR_BB_HAS_PARAM;
646 					_blocks[ref] = b; /* pin to block */
647 				} else if (use_insn->op == IR_VAR) {
648 					bb->flags |= IR_BB_HAS_VAR;
649 					_blocks[ref] = b; /* pin to block */
650 				}
651 			}
652 		}
653 	}
654 
655 	n = ir_list_len(&queue_early);
656 	while (n > 0) {
657 		n--;
658 		ref = ir_list_at(&queue_early, n);
659 		insn = &ctx->ir_base[ref];
660 		k = insn->inputs_count - 1;
661 		for (p = insn->ops + 2; k > 0; p++, k--) {
662 			ref = *p;
663 			if (ref > 0 && _blocks[ref] == 0) {
664 				ir_gcm_schedule_early(ctx, ref, &queue_late);
665 			}
666 		}
667 	}
668 
669 #ifdef IR_DEBUG
670 	if (ctx->flags & IR_DEBUG_GCM) {
671 		fprintf(stderr, "GCM Schedule Early\n");
672 		for (n = 1; n < ctx->insns_count; n++) {
673 			fprintf(stderr, "%d -> %d\n", n, ctx->cfg_map[n]);
674 		}
675 	}
676 #endif
677 
678 #if IR_GCM_SPLIT
679 	ir_gcm_split_data data;
680 
681 	ir_sparse_set_init(&data.totally_useful, ctx->cfg_blocks_count + 1);
682 	ir_list_init(&data.worklist, ctx->cfg_blocks_count + 1);
683 	ctx->data = &data;
684 #endif
685 
686 	n = ir_list_len(&queue_late);
687 	while (n > 0) {
688 		n--;
689 		ref = ir_list_at(&queue_late, n);
690 		b = ctx->cfg_map[ref];
691 		if (IR_GCM_IS_SCHEDULED_EARLY(b)) {
692 			ir_gcm_schedule_late(ctx, ref, b);
693 		}
694 	}
695 
696 #if IR_GCM_SPLIT
697 	ir_list_free(&data.worklist);
698 	ir_sparse_set_free(&data.totally_useful);
699 	ctx->data = NULL;
700 #endif
701 
702 	ir_list_free(&queue_early);
703 	ir_list_free(&queue_late);
704 
705 #ifdef IR_DEBUG
706 	if (ctx->flags & IR_DEBUG_GCM) {
707 		fprintf(stderr, "GCM Schedule Late\n");
708 		for (n = 1; n < ctx->insns_count; n++) {
709 			fprintf(stderr, "%d -> %d\n", n, ctx->cfg_map[n]);
710 		}
711 	}
712 #endif
713 
714 	return 1;
715 }
716 
ir_xlat_binding(ir_ctx * ctx,ir_ref * _xlat)717 static void ir_xlat_binding(ir_ctx *ctx, ir_ref *_xlat)
718 {
719 	uint32_t n1, n2, pos;
720 	ir_ref key;
721 	ir_hashtab_bucket *b1, *b2;
722 	ir_hashtab *binding = ctx->binding;
723 	uint32_t hash_size = (uint32_t)(-(int32_t)binding->mask);
724 
725 	memset((char*)binding->data - (hash_size * sizeof(uint32_t)), -1, hash_size * sizeof(uint32_t));
726 	n1 = binding->count;
727 	n2 = 0;
728 	pos = 0;
729 	b1 = binding->data;
730 	b2 = binding->data;
731 	while (n1 > 0) {
732 		key = b1->key;
733 		IR_ASSERT(key < ctx->insns_count);
734 		if (_xlat[key]) {
735 			key = _xlat[key];
736 			b2->key = key;
737 			if (b1->val > 0) {
738 				IR_ASSERT(_xlat[b1->val]);
739 				b2->val = _xlat[b1->val];
740 			} else {
741 				b2->val = b1->val;
742 			}
743 			key |= binding->mask;
744 			b2->next = ((uint32_t*)binding->data)[key];
745 			((uint32_t*)binding->data)[key] = pos;
746 			pos += sizeof(ir_hashtab_bucket);
747 			b2++;
748 			n2++;
749 		}
750 		b1++;
751 		n1--;
752 	}
753 	binding->count = n2;
754 }
755 
ir_count_constant(ir_ref * _xlat,ir_ref ref)756 IR_ALWAYS_INLINE ir_ref ir_count_constant(ir_ref *_xlat, ir_ref ref)
757 {
758 	if (!_xlat[ref]) {
759 		_xlat[ref] = ref; /* this is only a "used constant" marker */
760 		return 1;
761 	}
762 	return 0;
763 }
764 
ir_schedule(ir_ctx * ctx)765 int ir_schedule(ir_ctx *ctx)
766 {
767 	ir_ctx new_ctx;
768 	ir_ref i, j, k, n, *p, *q, ref, new_ref, prev_ref, insns_count, consts_count, use_edges_count;
769 	ir_ref *_xlat;
770 	ir_ref *edges;
771 	ir_ref prev_b_end;
772 	uint32_t b, prev_b;
773 	uint32_t *_blocks = ctx->cfg_map;
774 	ir_ref *_next = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref));
775 	ir_ref *_prev = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref));
776 	ir_ref _move_down = 0;
777 	ir_block *bb;
778 	ir_insn *insn, *new_insn;
779 	ir_use_list *lists, *use_list, *new_list;
780 
781 	/* Create a double-linked list of nodes ordered by BB, respecting BB->start and BB->end */
782 	IR_ASSERT(_blocks[1] == 1);
783 	prev_b = 1;
784 	prev_b_end = ctx->cfg_blocks[1].end;
785 	_prev[1] = 0;
786 	_prev[prev_b_end] = 0;
787 	for (i = 2, j = 1; i < ctx->insns_count; i++) {
788 		b = _blocks[i];
789 		IR_ASSERT((int32_t)b >= 0);
790 		if (b == prev_b && i <= prev_b_end) {
791 			/* add to the end of the list */
792 			_next[j] = i;
793 			_prev[i] = j;
794 			j = i;
795 		} else if (b > prev_b) {
796 			bb = &ctx->cfg_blocks[b];
797 			if (i == bb->start) {
798 				IR_ASSERT(bb->end > bb->start);
799 				prev_b = b;
800 				prev_b_end = bb->end;
801 				_prev[bb->end] = 0;
802 				/* add to the end of the list */
803 				_next[j] = i;
804 				_prev[i] = j;
805 				j = i;
806 			} else {
807 				IR_ASSERT(i != bb->end);
808 				/* move down late (see the following loop) */
809 				_next[i] = _move_down;
810 				_move_down = i;
811 			}
812 		} else if (b) {
813 			bb = &ctx->cfg_blocks[b];
814 			IR_ASSERT(i != bb->start);
815 			if (_prev[bb->end]) {
816 				/* move up, insert before the end of the already scheduled BB */
817 				k = bb->end;
818 			} else {
819 				/* move up, insert at the end of the block */
820 				k = ctx->cfg_blocks[b + 1].start;
821 			}
822 			/* insert before "k" */
823 			_prev[i] = _prev[k];
824 			_next[i] = k;
825 			_next[_prev[k]] = i;
826 			_prev[k] = i;
827 		}
828 	}
829 	_next[j] = 0;
830 
831 	while (_move_down) {
832 		i = _move_down;
833 		_move_down = _next[i];
834 		b = _blocks[i];
835 		bb = &ctx->cfg_blocks[b];
836 		k = _next[bb->start];
837 
838 		if (bb->flags & (IR_BB_HAS_PHI|IR_BB_HAS_PI|IR_BB_HAS_PARAM|IR_BB_HAS_VAR)) {
839 			/* insert after the start of the block and all PARAM, VAR, PI, PHI */
840 			insn = &ctx->ir_base[k];
841 			while (insn->op == IR_PHI || insn->op == IR_PARAM || insn->op == IR_VAR || insn->op == IR_PI) {
842 				k = _next[k];
843 				insn = &ctx->ir_base[k];
844 			}
845 		}
846 
847 		/* insert before "k" */
848 		_prev[i] = _prev[k];
849 		_next[i] = k;
850 		_next[_prev[k]] = i;
851 		_prev[k] = i;
852 	}
853 
854 #ifdef IR_DEBUG
855 	if (ctx->flags & IR_DEBUG_SCHEDULE) {
856 		fprintf(stderr, "Before Schedule\n");
857 		for (i = 1; i != 0; i = _next[i]) {
858 			fprintf(stderr, "%d -> %d\n", i, _blocks[i]);
859 		}
860 	}
861 #endif
862 
863 	_xlat = ir_mem_calloc((ctx->consts_count + ctx->insns_count), sizeof(ir_ref));
864 	_xlat += ctx->consts_count;
865 	_xlat[IR_TRUE] = IR_TRUE;
866 	_xlat[IR_FALSE] = IR_FALSE;
867 	_xlat[IR_NULL] = IR_NULL;
868 	_xlat[IR_UNUSED] = IR_UNUSED;
869 	insns_count = 1;
870 	consts_count = -(IR_TRUE - 1);
871 
872 	/* Topological sort according dependencies inside each basic block */
873 	for (b = 1, bb = ctx->cfg_blocks + 1; b <= ctx->cfg_blocks_count; b++, bb++) {
874 		IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
875 		/* Schedule BB start */
876 		i = bb->start;
877 		_xlat[i] = bb->start = insns_count;
878 		insn = &ctx->ir_base[i];
879 		if (insn->op == IR_CASE_VAL) {
880 			IR_ASSERT(insn->op2 < IR_TRUE);
881 			consts_count += ir_count_constant(_xlat, insn->op2);
882 		}
883 		n = insn->inputs_count;
884 		insns_count += ir_insn_inputs_to_len(n);
885 		i = _next[i];
886 		insn = &ctx->ir_base[i];
887 		if (bb->flags & (IR_BB_HAS_PHI|IR_BB_HAS_PI|IR_BB_HAS_PARAM|IR_BB_HAS_VAR)) {
888 			/* Schedule PARAM, VAR, PI */
889 			while (insn->op == IR_PARAM || insn->op == IR_VAR || insn->op == IR_PI) {
890 				_xlat[i] = insns_count;
891 				insns_count += 1;
892 				i = _next[i];
893 				insn = &ctx->ir_base[i];
894 			}
895 			/* Schedule PHIs */
896 			while (insn->op == IR_PHI) {
897 				ir_ref j, *p, input;
898 
899 				_xlat[i] = insns_count;
900 				/* Reuse "n" from MERGE and skip first input */
901 				insns_count += ir_insn_inputs_to_len(n + 1);
902 				for (j = n, p = insn->ops + 2; j > 0; p++, j--) {
903 					input = *p;
904 					if (input < IR_TRUE) {
905 						consts_count += ir_count_constant(_xlat, input);
906 					}
907 				}
908 				i = _next[i];
909 				insn = &ctx->ir_base[i];
910 			}
911 		}
912 		if (bb->successors_count > 1) {
913 			ir_ref input, j = bb->end;
914 			ir_insn *end = &ctx->ir_base[j];
915 
916 			if (end->op == IR_IF) {
917 				/* Move condition closer to IF */
918 				input = end->op2;
919 				if (input > 0 && _blocks[input] == b && !_xlat[input] && _prev[j] != input) {
920 					if (input == i) {
921 						i = _next[i];
922 						insn = &ctx->ir_base[i];
923 					}
924 					/* remove "input" */
925 					_prev[_next[input]] = _prev[input];
926 					_next[_prev[input]] = _next[input];
927 					/* insert before "j" */
928 					_prev[input] = _prev[j];
929 					_next[input] = j;
930 					_next[_prev[j]] = input;
931 					_prev[j] = input;
932 				}
933 			}
934 		}
935 		while (i != bb->end) {
936 			ir_ref n, j, *p, input;
937 
938 restart:
939 			n = insn->inputs_count;
940 			for (j = n, p = insn->ops + 1; j > 0; p++, j--) {
941 				input = *p;
942 				if (!_xlat[input]) {
943 					/* input is not scheduled yet */
944 					if (input > 0) {
945 						if (_blocks[input] == b) {
946 							/* "input" should be before "i" to satisfy dependency */
947 #ifdef IR_DEBUG
948 							if (ctx->flags & IR_DEBUG_SCHEDULE) {
949 								fprintf(stderr, "Wrong dependency %d:%d -> %d\n", b, input, i);
950 							}
951 #endif
952 							/* remove "input" */
953 							_prev[_next[input]] = _prev[input];
954 							_next[_prev[input]] = _next[input];
955 							/* insert before "i" */
956 							_prev[input] = _prev[i];
957 							_next[input] = i;
958 							_next[_prev[i]] = input;
959 							_prev[i] = input;
960 							/* restart from "input" */
961 							i = input;
962 							insn = &ctx->ir_base[i];
963 							goto restart;
964 						}
965 					} else if (input < IR_TRUE) {
966 						consts_count += ir_count_constant(_xlat, input);
967 					}
968 				}
969 			}
970 			_xlat[i] = insns_count;
971 			insns_count += ir_insn_inputs_to_len(n);
972 			i = _next[i];
973 			insn = &ctx->ir_base[i];
974 		}
975 		/* Schedule BB end */
976 		_xlat[i] = bb->end = insns_count;
977 		insns_count++;
978 		if (IR_INPUT_EDGES_COUNT(ir_op_flags[insn->op]) == 2) {
979 			if (insn->op2 < IR_TRUE) {
980 				consts_count += ir_count_constant(_xlat, insn->op2);
981 			}
982 		}
983 	}
984 
985 #ifdef IR_DEBUG
986 	if (ctx->flags & IR_DEBUG_SCHEDULE) {
987 		fprintf(stderr, "After Schedule\n");
988 		for (i = 1; i != 0; i = _next[i]) {
989 			fprintf(stderr, "%d -> %d\n", i, _blocks[i]);
990 		}
991 	}
992 #endif
993 
994 #if 1
995 	/* Check if scheduling didn't make any modifications */
996 	if (consts_count == ctx->consts_count && insns_count == ctx->insns_count) {
997 		bool changed = 0;
998 
999 		for (i = 1; i != 0; i = _next[i]) {
1000 			if (_xlat[i] != i) {
1001 				changed = 1;
1002 				break;
1003 			}
1004 		}
1005 		if (!changed) {
1006 			_xlat -= ctx->consts_count;
1007 			ir_mem_free(_xlat);
1008 			ir_mem_free(_next);
1009 
1010 			ctx->prev_ref = _prev;
1011 			ctx->flags2 |= IR_LINEAR;
1012 			ir_truncate(ctx);
1013 
1014 			return 1;
1015 		}
1016 	}
1017 #endif
1018 
1019 	ir_mem_free(_prev);
1020 
1021 	ir_init(&new_ctx, ctx->flags, consts_count, insns_count);
1022 	new_ctx.insns_count = insns_count;
1023 	new_ctx.flags2 = ctx->flags2;
1024 	new_ctx.ret_type = ctx->ret_type;
1025 	new_ctx.mflags = ctx->mflags;
1026 	new_ctx.spill_base = ctx->spill_base;
1027 	new_ctx.fixed_stack_red_zone = ctx->fixed_stack_red_zone;
1028 	new_ctx.fixed_stack_frame_size = ctx->fixed_stack_frame_size;
1029 	new_ctx.fixed_call_stack_size = ctx->fixed_call_stack_size;
1030 	new_ctx.fixed_regset = ctx->fixed_regset;
1031 	new_ctx.fixed_save_regset = ctx->fixed_save_regset;
1032 	new_ctx.entries_count = ctx->entries_count;
1033 #if defined(IR_TARGET_AARCH64)
1034 	new_ctx.deoptimization_exits = ctx->deoptimization_exits;
1035 	new_ctx.get_exit_addr = ctx->get_exit_addr;
1036 	new_ctx.get_veneer = ctx->get_veneer;
1037 	new_ctx.set_veneer = ctx->set_veneer;
1038 #endif
1039 	new_ctx.loader = ctx->loader;
1040 
1041 	/* Copy constants */
1042 	if (consts_count == ctx->consts_count) {
1043 		new_ctx.consts_count = consts_count;
1044 		ref = 1 - consts_count;
1045 		insn = &ctx->ir_base[ref];
1046 		new_insn = &new_ctx.ir_base[ref];
1047 
1048 		memcpy(new_insn, insn, sizeof(ir_insn) * (IR_TRUE - ref));
1049 		if (ctx->strtab.data) {
1050 			while (ref != IR_TRUE) {
1051 				if (new_insn->op == IR_FUNC_ADDR) {
1052 					if (new_insn->proto) {
1053 						size_t len;
1054 						const char *proto = ir_get_strl(ctx, new_insn->proto, &len);
1055 						new_insn->proto = ir_strl(&new_ctx, proto, len);
1056 					}
1057 				} else if (new_insn->op == IR_FUNC) {
1058 					new_insn->val.u64 = ir_str(&new_ctx, ir_get_str(ctx, new_insn->val.name));
1059 					if (new_insn->proto) {
1060 						size_t len;
1061 						const char *proto = ir_get_strl(ctx, new_insn->proto, &len);
1062 						new_insn->proto = ir_strl(&new_ctx, proto, len);
1063 					}
1064 				} else if (new_insn->op == IR_SYM || new_insn->op == IR_STR) {
1065 					new_insn->val.u64 = ir_str(&new_ctx, ir_get_str(ctx, new_insn->val.name));
1066 				}
1067 				new_insn++;
1068 				ref++;
1069 			}
1070 		}
1071 	} else {
1072 		new_ref = -new_ctx.consts_count;
1073 		new_insn = &new_ctx.ir_base[new_ref];
1074 		for (ref = IR_TRUE - 1, insn = &ctx->ir_base[ref]; ref > -ctx->consts_count; insn--, ref--) {
1075 			if (!_xlat[ref]) {
1076 				continue;
1077 			}
1078 			new_insn->optx = insn->optx;
1079 			new_insn->prev_const = 0;
1080 			if (insn->op == IR_FUNC_ADDR) {
1081 				new_insn->val.u64 = insn->val.u64;
1082 				if (insn->proto) {
1083 					size_t len;
1084 					const char *proto = ir_get_strl(ctx, insn->proto, &len);
1085 					new_insn->proto = ir_strl(&new_ctx, proto, len);
1086 				} else {
1087 					new_insn->proto = 0;
1088 				}
1089 			} else if (insn->op == IR_FUNC) {
1090 				new_insn->val.u64 = ir_str(&new_ctx, ir_get_str(ctx, insn->val.name));
1091 				if (insn->proto) {
1092 					size_t len;
1093 					const char *proto = ir_get_strl(ctx, insn->proto, &len);
1094 					new_insn->proto = ir_strl(&new_ctx, proto, len);
1095 				} else {
1096 					new_insn->proto = 0;
1097 				}
1098 			} else if (insn->op == IR_SYM || insn->op == IR_STR) {
1099 				new_insn->val.u64 = ir_str(&new_ctx, ir_get_str(ctx, insn->val.name));
1100 			} else {
1101 				new_insn->val.u64 = insn->val.u64;
1102 			}
1103 			_xlat[ref] = new_ref;
1104 			new_ref--;
1105 			new_insn--;
1106 		}
1107 		new_ctx.consts_count = -new_ref;
1108 	}
1109 
1110 	new_ctx.cfg_map = ir_mem_calloc(ctx->insns_count, sizeof(uint32_t));
1111 	new_ctx.prev_ref = _prev = ir_mem_malloc(insns_count * sizeof(ir_ref));
1112 	new_ctx.use_lists = lists = ir_mem_malloc(insns_count * sizeof(ir_use_list));
1113 	new_ctx.use_edges = edges = ir_mem_malloc(ctx->use_edges_count * sizeof(ir_ref));
1114 
1115 	/* Copy instructions, use lists and use edges */
1116 	prev_ref = 0;
1117 	use_edges_count = 0;
1118 	for (i = 1; i != 0; i = _next[i]) {
1119 		new_ref = _xlat[i];
1120 		new_ctx.cfg_map[new_ref] = _blocks[i];
1121 		_prev[new_ref] = prev_ref;
1122 		prev_ref = new_ref;
1123 
1124 		use_list = &ctx->use_lists[i];
1125 		n = use_list->count;
1126 		k = 0;
1127 		if (n == 1) {
1128 			ref = ctx->use_edges[use_list->refs];
1129 			if (_xlat[ref]) {
1130 				*edges = _xlat[ref];
1131 				edges++;
1132 				k = 1;
1133 			}
1134 		} else {
1135 			p = &ctx->use_edges[use_list->refs];
1136 			while (n--) {
1137 				ref = *p;
1138 				if (_xlat[ref]) {
1139 					*edges = _xlat[ref];
1140 					edges++;
1141 					k++;
1142 				}
1143 				p++;
1144 			}
1145 		}
1146 		new_list = &lists[new_ref];
1147 		new_list->refs = use_edges_count;
1148 		use_edges_count += k;
1149 		new_list->count = k;
1150 
1151 		insn = &ctx->ir_base[i];
1152 		new_insn = &new_ctx.ir_base[new_ref];
1153 
1154 		new_insn->optx = insn->optx;
1155 		n = new_insn->inputs_count;
1156 		switch (n) {
1157 			case 0:
1158 				new_insn->op1 = insn->op1;
1159 				new_insn->op2 = insn->op2;
1160 				new_insn->op3 = insn->op3;
1161 				break;
1162 			case 1:
1163 				new_insn->op1 = _xlat[insn->op1];
1164 				if (new_insn->op == IR_PARAM || insn->op == IR_VAR) {
1165 					new_insn->op2 = ir_str(&new_ctx, ir_get_str(ctx, insn->op2));
1166 				} else if (new_insn->op == IR_PROTO) {
1167 					size_t len;
1168 					const char *proto = ir_get_strl(ctx, insn->op2, &len);
1169 					new_insn->op2 = ir_strl(&new_ctx, proto, len);
1170 				} else {
1171 					new_insn->op2 = insn->op2;
1172 				}
1173 				new_insn->op3 = insn->op3;
1174 				break;
1175 			case 2:
1176 				new_insn->op1 = _xlat[insn->op1];
1177 				new_insn->op2 = _xlat[insn->op2];
1178 				new_insn->op3 = insn->op3;
1179 #if IR_SCHEDULE_SWAP_OPS
1180 				/* Swap operands according to folding rules */
1181 				if (new_insn->op1 < new_insn->op2) {
1182 					switch (new_insn->op) {
1183 						case IR_EQ:
1184 						case IR_NE:
1185 						case IR_ADD:
1186 						case IR_MUL:
1187 						case IR_ADD_OV:
1188 						case IR_MUL_OV:
1189 						case IR_OR:
1190 						case IR_AND:
1191 						case IR_XOR:
1192 						case IR_MIN:
1193 						case IR_MAX:
1194 							SWAP_REFS(new_insn->op1, new_insn->op2);
1195 							break;
1196 						case IR_LT:
1197 						case IR_GE:
1198 						case IR_LE:
1199 						case IR_GT:
1200 						case IR_ULT:
1201 						case IR_UGE:
1202 						case IR_ULE:
1203 						case IR_UGT:
1204 							SWAP_REFS(new_insn->op1, new_insn->op2);
1205 							new_insn->op ^= 3; /* [U]LT <-> [U]GT, [U]LE <-> [U]GE */
1206 							break;
1207 					}
1208 				}
1209 #endif
1210 				break;
1211 			case 3:
1212 				new_insn->op1 = _xlat[insn->op1];
1213 				new_insn->op2 = _xlat[insn->op2];
1214 				new_insn->op3 = _xlat[insn->op3];
1215 				break;
1216 			default:
1217 				for (j = n, p = insn->ops + 1, q = new_insn->ops + 1; j > 0; p++, q++, j--) {
1218 					*q = _xlat[*p];
1219 				}
1220 				break;
1221 		}
1222 	}
1223 
1224 	/* Update list of terminators (IR_OPND_CONTROL_REF) */
1225 	insn = &new_ctx.ir_base[1];
1226 	ref = insn->op1;
1227 	if (ref) {
1228 		insn->op1 = ref = _xlat[ref];
1229 		while (1) {
1230 			insn = &new_ctx.ir_base[ref];
1231 			ref = insn->op3;
1232 			if (!ref) {
1233 				break;
1234 			}
1235 			insn->op3 = ref = _xlat[ref];
1236 		}
1237 	}
1238 
1239 	IR_ASSERT(ctx->use_edges_count >= use_edges_count);
1240 	new_ctx.use_edges_count = use_edges_count;
1241 	new_ctx.use_edges = ir_mem_realloc(new_ctx.use_edges, use_edges_count * sizeof(ir_ref));
1242 
1243 	if (ctx->binding) {
1244 		ir_xlat_binding(ctx, _xlat);
1245 		new_ctx.binding = ctx->binding;
1246 		ctx->binding = NULL;
1247 	}
1248 
1249 	_xlat -= ctx->consts_count;
1250 	ir_mem_free(_xlat);
1251 
1252 	new_ctx.cfg_blocks_count = ctx->cfg_blocks_count;
1253 	new_ctx.cfg_edges_count = ctx->cfg_edges_count;
1254 	new_ctx.cfg_blocks = ctx->cfg_blocks;
1255 	new_ctx.cfg_edges = ctx->cfg_edges;
1256 	ctx->cfg_blocks = NULL;
1257 	ctx->cfg_edges = NULL;
1258 
1259 	ir_free(ctx);
1260 	IR_ASSERT(new_ctx.consts_count == new_ctx.consts_limit);
1261 	IR_ASSERT(new_ctx.insns_count == new_ctx.insns_limit);
1262 	memcpy(ctx, &new_ctx, sizeof(ir_ctx));
1263 	ctx->flags2 |= IR_LINEAR;
1264 
1265 	ir_mem_free(_next);
1266 
1267 	return 1;
1268 }
1269 
ir_build_prev_refs(ir_ctx * ctx)1270 void ir_build_prev_refs(ir_ctx *ctx)
1271 {
1272 	uint32_t b;
1273 	ir_block *bb;
1274 	ir_ref i, n, prev;
1275 	ir_insn *insn;
1276 
1277 	ctx->prev_ref = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref));
1278 	prev = 0;
1279 	for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) {
1280 		IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
1281 		for (i = bb->start, insn = ctx->ir_base + i; i < bb->end;) {
1282 			ctx->prev_ref[i] = prev;
1283 			n = ir_insn_len(insn);
1284 			prev = i;
1285 			i += n;
1286 			insn += n;
1287 		}
1288 		ctx->prev_ref[i] = prev;
1289 	}
1290 }
1291