xref: /PHP-8.2/ext/opcache/jit/ir/ir_gcm.c (revision 4630f77e)
1 /*
2  * IR - Lightweight JIT Compilation Framework
3  * (GCM - Global Code Motion and Scheduler)
4  * Copyright (C) 2022 Zend by Perforce.
5  * Authors: Dmitry Stogov <dmitry@php.net>
6  *
7  * The GCM algorithm is based on Cliff Click's publication
8  * See: C. Click. "Global code motion, global value numbering" Submitted to PLDI'95.
9  */
10 
11 #include "ir.h"
12 #include "ir_private.h"
13 
ir_gcm_schedule_early(ir_ctx * ctx,int32_t * _blocks,ir_ref ref,ir_list * queue_rest)14 static int32_t ir_gcm_schedule_early(ir_ctx *ctx, int32_t *_blocks, ir_ref ref, ir_list *queue_rest)
15 {
16 	ir_ref n, *p, input;
17 	ir_insn *insn;
18 	uint32_t dom_depth;
19 	int32_t b, result;
20 	bool reschedule_late = 1;
21 
22 	insn = &ctx->ir_base[ref];
23 
24 	IR_ASSERT(insn->op != IR_PARAM && insn->op != IR_VAR);
25 	IR_ASSERT(insn->op != IR_PHI && insn->op != IR_PI);
26 
27 	result = 1;
28 	dom_depth = 0;
29 
30 	n = insn->inputs_count;
31 	for (p = insn->ops + 1; n > 0; p++, n--) {
32 		input = *p;
33 		if (input > 0) {
34 			b = _blocks[input];
35 			if (b == 0) {
36 				b = ir_gcm_schedule_early(ctx, _blocks, input, queue_rest);
37 			} else if (b < 0) {
38 				b = -b;
39 			}
40 			if (dom_depth < ctx->cfg_blocks[b].dom_depth) {
41 				dom_depth = ctx->cfg_blocks[b].dom_depth;
42 				result = b;
43 			}
44 			reschedule_late = 0;
45 		}
46 	}
47 	_blocks[ref] = -result;
48 
49 	if (UNEXPECTED(reschedule_late)) {
50 		/* Floating nodes that don't depend on other nodes
51 		 * (e.g. only on constants), have to be scheduled to the
52 		 * last common ancestor. Otherwise they always go to the
53 		 * first block.
54 		 */
55 		ir_list_push_unchecked(queue_rest, ref);
56 	}
57 	return result;
58 }
59 
60 /* Last Common Ancestor */
ir_gcm_find_lca(ir_ctx * ctx,int32_t b1,int32_t b2)61 static int32_t ir_gcm_find_lca(ir_ctx *ctx, int32_t b1, int32_t b2)
62 {
63 	uint32_t dom_depth;
64 
65 	dom_depth = ctx->cfg_blocks[b2].dom_depth;
66 	while (ctx->cfg_blocks[b1].dom_depth > dom_depth) {
67 		b1 = ctx->cfg_blocks[b1].dom_parent;
68 	}
69 	dom_depth = ctx->cfg_blocks[b1].dom_depth;
70 	while (ctx->cfg_blocks[b2].dom_depth > dom_depth) {
71 		b2 = ctx->cfg_blocks[b2].dom_parent;
72 	}
73 	while (b1 != b2) {
74 		b1 = ctx->cfg_blocks[b1].dom_parent;
75 		b2 = ctx->cfg_blocks[b2].dom_parent;
76 	}
77 	return b2;
78 }
79 
ir_gcm_schedule_late(ir_ctx * ctx,int32_t * _blocks,ir_ref ref)80 static void ir_gcm_schedule_late(ir_ctx *ctx, int32_t *_blocks, ir_ref ref)
81 {
82 	ir_ref n, *p, use;
83 	ir_insn *insn;
84 	ir_use_list *use_list;
85 
86 	IR_ASSERT(_blocks[ref] < 0);
87 	_blocks[ref] = -_blocks[ref];
88 	use_list = &ctx->use_lists[ref];
89 	n = use_list->count;
90 	if (n) {
91 		int32_t lca, b;
92 
93 		insn = &ctx->ir_base[ref];
94 		IR_ASSERT(insn->op != IR_PARAM && insn->op != IR_VAR);
95 		IR_ASSERT(insn->op != IR_PHI && insn->op != IR_PI);
96 
97 		lca = 0;
98 		for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
99 			use = *p;
100 			b = _blocks[use];
101 			if (!b) {
102 				continue;
103 			} else if (b < 0) {
104 				ir_gcm_schedule_late(ctx, _blocks, use);
105 				b = _blocks[use];
106 				IR_ASSERT(b != 0);
107 			}
108 			insn = &ctx->ir_base[use];
109 			if (insn->op == IR_PHI) {
110 				ir_ref *p = insn->ops + 2; /* PHI data inputs */
111 				ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */
112 				ir_ref n = insn->inputs_count - 1;
113 
114 				for (;n > 0; p++, q++, n--) {
115 					if (*p == ref) {
116 						b = _blocks[*q];
117 						lca = !lca ? b : ir_gcm_find_lca(ctx, lca, b);
118 					}
119 				}
120 			} else {
121 				lca = !lca ? b : ir_gcm_find_lca(ctx, lca, b);
122 			}
123 		}
124 		IR_ASSERT(lca != 0 && "No Common Ancestor");
125 		b = lca;
126 
127 		if (b != _blocks[ref]) {
128 			ir_block *bb = &ctx->cfg_blocks[b];
129 			uint32_t loop_depth = bb->loop_depth;
130 
131 			if (loop_depth) {
132 				uint32_t flags;
133 
134 				use_list = &ctx->use_lists[ref];
135 				if (use_list->count == 1) {
136 					use = ctx->use_edges[use_list->refs];
137 					insn = &ctx->ir_base[use];
138 					if (insn->op == IR_IF || insn->op == IR_GUARD || insn->op == IR_GUARD_NOT) {
139 						_blocks[ref] = b;
140 						return;
141 					}
142 				}
143 
144 				flags = (bb->flags & IR_BB_LOOP_HEADER) ? bb->flags : ctx->cfg_blocks[bb->loop_header].flags;
145 				if ((flags & IR_BB_LOOP_WITH_ENTRY)
146 				 && !(ctx->binding && ir_binding_find(ctx, ref))) {
147 					/* Don't move loop invariant code across an OSR ENTRY if we can't restore it */
148 				} else {
149 					do {
150 						lca = bb->dom_parent;
151 						bb = &ctx->cfg_blocks[lca];
152 						if (bb->loop_depth < loop_depth) {
153 							if (!bb->loop_depth) {
154 								b = lca;
155 								break;
156 							}
157 							flags = (bb->flags & IR_BB_LOOP_HEADER) ? bb->flags : ctx->cfg_blocks[bb->loop_header].flags;
158 							if ((flags & IR_BB_LOOP_WITH_ENTRY)
159 							 && !(ctx->binding && ir_binding_find(ctx, ref))) {
160 								break;
161 							}
162 							loop_depth = bb->loop_depth;
163 							b = lca;
164 						}
165 					} while (lca != _blocks[ref]);
166 				}
167 			}
168 			_blocks[ref] = b;
169 			if (ctx->ir_base[ref + 1].op == IR_OVERFLOW) {
170 				/* OVERFLOW is a projection and must be scheduled together with previous ADD/SUB/MUL_OV */
171 				_blocks[ref + 1] = b;
172 			}
173 		}
174 	}
175 }
176 
ir_gcm_schedule_rest(ir_ctx * ctx,int32_t * _blocks,ir_ref ref)177 static void ir_gcm_schedule_rest(ir_ctx *ctx, int32_t *_blocks, ir_ref ref)
178 {
179 	ir_ref n, *p, use;
180 	ir_insn *insn;
181 
182 	IR_ASSERT(_blocks[ref] < 0);
183 	_blocks[ref] = -_blocks[ref];
184 	n = ctx->use_lists[ref].count;
185 	if (n) {
186 		uint32_t lca;
187 		int32_t b;
188 
189 		insn = &ctx->ir_base[ref];
190 		IR_ASSERT(insn->op != IR_PARAM && insn->op != IR_VAR);
191 		IR_ASSERT(insn->op != IR_PHI && insn->op != IR_PI);
192 
193 		lca = 0;
194 		for (p = &ctx->use_edges[ctx->use_lists[ref].refs]; n > 0; p++, n--) {
195 			use = *p;
196 			b = _blocks[use];
197 			if (!b) {
198 				continue;
199 			} else if (b < 0) {
200 				ir_gcm_schedule_late(ctx, _blocks, use);
201 				b = _blocks[use];
202 				IR_ASSERT(b != 0);
203 			}
204 			insn = &ctx->ir_base[use];
205 			if (insn->op == IR_PHI) {
206 				ir_ref *p = insn->ops + 2; /* PHI data inputs */
207 				ir_ref *q = ctx->ir_base[insn->op1].ops + 1; /* MERGE inputs */
208 
209 				ir_ref n = insn->inputs_count - 1;
210 
211 				for (;n > 0; p++, q++, n--) {
212 					if (*p == ref) {
213 						b = _blocks[*q];
214 						lca = !lca ? b : ir_gcm_find_lca(ctx, lca, b);
215 					}
216 				}
217 			} else {
218 				lca = !lca ? b : ir_gcm_find_lca(ctx, lca, b);
219 			}
220 		}
221 		IR_ASSERT(lca != 0 && "No Common Ancestor");
222 		b = lca;
223 		_blocks[ref] = b;
224 		if (ctx->ir_base[ref + 1].op == IR_OVERFLOW) {
225 			/* OVERFLOW is a projection and must be scheduled together with previous ADD/SUB/MUL_OV */
226 			_blocks[ref + 1] = b;
227 		}
228 	}
229 }
230 
ir_gcm(ir_ctx * ctx)231 int ir_gcm(ir_ctx *ctx)
232 {
233 	ir_ref k, n, *p, ref;
234 	ir_block *bb;
235 	ir_list queue_early;
236 	ir_list queue_late;
237 	ir_list queue_rest;
238 	int32_t *_blocks, b;
239 	ir_insn *insn, *use_insn;
240 	ir_use_list *use_list;
241 
242 	IR_ASSERT(ctx->cfg_map);
243 	_blocks = (int32_t*)ctx->cfg_map;
244 
245 	ir_list_init(&queue_early, ctx->insns_count);
246 
247 	if (ctx->cfg_blocks_count == 1) {
248 		ref = ctx->cfg_blocks[1].end;
249 		do {
250 			insn = &ctx->ir_base[ref];
251 			_blocks[ref] = 1; /* pin to block */
252 			if (insn->inputs_count > 1) {
253 				/* insn has input data edges */
254 				ir_list_push_unchecked(&queue_early, ref);
255 			}
256 			ref = insn->op1; /* control predecessor */
257 		} while (ref != 1); /* IR_START */
258 		_blocks[1] = 1; /* pin to block */
259 
260 		use_list = &ctx->use_lists[1];
261 		n = use_list->count;
262 		for (p = &ctx->use_edges[use_list->refs]; n > 0; n--, p++) {
263 			ref = *p;
264 			use_insn = &ctx->ir_base[ref];
265 			if (use_insn->op == IR_PARAM || use_insn->op == IR_VAR) {
266 				ctx->cfg_blocks[1].flags |= (use_insn->op == IR_PARAM) ? IR_BB_HAS_PARAM : IR_BB_HAS_VAR;
267 				_blocks[ref] = 1; /* pin to block */
268 			}
269 		}
270 
271 		/* Place all live nodes to the first block */
272 		while (ir_list_len(&queue_early)) {
273 			ref = ir_list_pop(&queue_early);
274 			insn = &ctx->ir_base[ref];
275 			n = insn->inputs_count;
276 			for (p = insn->ops + 1; n > 0; p++, n--) {
277 				ref = *p;
278 				if (ref > 0 && _blocks[ref] == 0) {
279 					_blocks[ref] = 1;
280 					ir_list_push_unchecked(&queue_early, ref);
281 				}
282 			}
283 		}
284 
285 		ir_list_free(&queue_early);
286 
287 		return 1;
288 	}
289 
290 	ir_list_init(&queue_late, ctx->insns_count);
291 
292 	/* pin and collect control and control depended (PARAM, VAR, PHI, PI) instructions */
293 	b = ctx->cfg_blocks_count;
294 	for (bb = ctx->cfg_blocks + b; b > 0; bb--, b--) {
295 		IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
296 		ref = bb->end;
297 
298 		/* process the last instruction of the block */
299 		insn = &ctx->ir_base[ref];
300 		_blocks[ref] = b; /* pin to block */
301 		if (insn->inputs_count > 1) {
302 			/* insn has input data edges */
303 			ir_list_push_unchecked(&queue_early, ref);
304 		}
305 		ref = insn->op1; /* control predecessor */
306 
307 		while (ref != bb->start) {
308 			insn = &ctx->ir_base[ref];
309 			_blocks[ref] = b; /* pin to block */
310 			if (insn->inputs_count > 1) {
311 				/* insn has input data edges */
312 				ir_list_push_unchecked(&queue_early, ref);
313 			}
314 			if (insn->type != IR_VOID) {
315 				IR_ASSERT(ir_op_flags[insn->op] & IR_OP_FLAG_MEM);
316 				ir_list_push_unchecked(&queue_late, ref);
317 			}
318 			ref = insn->op1; /* control predecessor */
319 		}
320 
321 		/* process the first instruction of the block */
322 		_blocks[ref] = b; /* pin to block */
323 
324 		use_list = &ctx->use_lists[ref];
325 		n = use_list->count;
326 		if (n > 1) {
327 			for (p = &ctx->use_edges[use_list->refs]; n > 0; n--, p++) {
328 				ref = *p;
329 				use_insn = &ctx->ir_base[ref];
330 				if (use_insn->op == IR_PHI || use_insn->op == IR_PI) {
331 					bb->flags |= (use_insn->op == IR_PHI) ? IR_BB_HAS_PHI : IR_BB_HAS_PI;
332 					if (EXPECTED(ctx->use_lists[ref].count != 0)) {
333 						_blocks[ref] = b; /* pin to block */
334 						ir_list_push_unchecked(&queue_early, ref);
335 						ir_list_push_unchecked(&queue_late, ref);
336 					}
337 				} else if (use_insn->op == IR_PARAM) {
338 					bb->flags |= IR_BB_HAS_PARAM;
339 					_blocks[ref] = b; /* pin to block */
340 					if (EXPECTED(ctx->use_lists[ref].count != 0)) {
341 						ir_list_push_unchecked(&queue_late, ref);
342 					}
343 				} else if (use_insn->op == IR_VAR) {
344 					bb->flags |= IR_BB_HAS_VAR;
345 					_blocks[ref] = b; /* pin to block */
346 					if (EXPECTED(ctx->use_lists[ref].count != 0)) {
347 						/* This is necessary only for VADDR */
348 						ir_list_push_unchecked(&queue_late, ref);
349 					}
350 				}
351 			}
352 		}
353 	}
354 
355 	ir_list_init(&queue_rest, ctx->insns_count);
356 
357 	n = ir_list_len(&queue_early);
358 	while (n > 0) {
359 		n--;
360 		ref = ir_list_at(&queue_early, n);
361 		insn = &ctx->ir_base[ref];
362 		k = insn->inputs_count - 1;
363 		for (p = insn->ops + 2; k > 0; p++, k--) {
364 			ref = *p;
365 			if (ref > 0 && _blocks[ref] == 0) {
366 				ir_gcm_schedule_early(ctx, _blocks, ref, &queue_rest);
367 			}
368 		}
369 	}
370 
371 #ifdef IR_DEBUG
372 	if (ctx->flags & IR_DEBUG_GCM) {
373 		fprintf(stderr, "GCM Schedule Early\n");
374 		for (n = 1; n < ctx->insns_count; n++) {
375 			fprintf(stderr, "%d -> %d\n", n, _blocks[n]);
376 		}
377 	}
378 #endif
379 
380 	n = ir_list_len(&queue_late);
381 	while (n > 0) {
382 		n--;
383 		ref = ir_list_at(&queue_late, n);
384 		use_list = &ctx->use_lists[ref];
385 		k = use_list->count;
386 		for (p = &ctx->use_edges[use_list->refs]; k > 0; p++, k--) {
387 			ref = *p;
388 			if (_blocks[ref] < 0) {
389 				ir_gcm_schedule_late(ctx, _blocks, ref);
390 			}
391 		}
392 	}
393 
394 	n = ir_list_len(&queue_rest);
395 	while (n > 0) {
396 		n--;
397 		ref = ir_list_at(&queue_rest, n);
398 		ir_gcm_schedule_rest(ctx, _blocks, ref);
399 	}
400 
401 	ir_list_free(&queue_early);
402 	ir_list_free(&queue_late);
403 	ir_list_free(&queue_rest);
404 
405 #ifdef IR_DEBUG
406 	if (ctx->flags & IR_DEBUG_GCM) {
407 		fprintf(stderr, "GCM Schedule Late\n");
408 		for (n = 1; n < ctx->insns_count; n++) {
409 			fprintf(stderr, "%d -> %d\n", n, _blocks[n]);
410 		}
411 	}
412 #endif
413 
414 	return 1;
415 }
416 
ir_xlat_binding(ir_ctx * ctx,ir_ref * _xlat)417 static void ir_xlat_binding(ir_ctx *ctx, ir_ref *_xlat)
418 {
419 	uint32_t n1, n2, pos;
420 	ir_ref key;
421 	ir_hashtab_bucket *b1, *b2;
422 	ir_hashtab *binding = ctx->binding;
423 	uint32_t hash_size = (uint32_t)(-(int32_t)binding->mask);
424 
425 	memset((char*)binding->data - (hash_size * sizeof(uint32_t)), -1, hash_size * sizeof(uint32_t));
426 	n1 = binding->count;
427 	n2 = 0;
428 	pos = 0;
429 	b1 = binding->data;
430 	b2 = binding->data;
431 	while (n1 > 0) {
432 		key = b1->key;
433 		IR_ASSERT(key < ctx->insns_count);
434 		if (_xlat[key]) {
435 			key = _xlat[key];
436 			b2->key = key;
437 			if (b1->val > 0) {
438 				IR_ASSERT(_xlat[b1->val]);
439 				b2->val = _xlat[b1->val];
440 			} else {
441 				b2->val = b1->val;
442 			}
443 			key |= binding->mask;
444 			b2->next = ((uint32_t*)binding->data)[key];
445 			((uint32_t*)binding->data)[key] = pos;
446 			pos += sizeof(ir_hashtab_bucket);
447 			b2++;
448 			n2++;
449 		}
450 		b1++;
451 		n1--;
452 	}
453 	binding->count = n2;
454 }
455 
ir_count_constant(ir_ref * _xlat,ir_ref ref)456 IR_ALWAYS_INLINE ir_ref ir_count_constant(ir_ref *_xlat, ir_ref ref)
457 {
458 	if (!_xlat[ref]) {
459 		_xlat[ref] = ref; /* this is only a "used constant" marker */
460 		return 1;
461 	}
462 	return 0;
463 }
464 
ir_schedule(ir_ctx * ctx)465 int ir_schedule(ir_ctx *ctx)
466 {
467 	ir_ctx new_ctx;
468 	ir_ref i, j, k, n, *p, *q, ref, new_ref, prev_ref, insns_count, consts_count, use_edges_count;
469 	ir_ref *_xlat;
470 	ir_ref *edges;
471 	uint32_t b, prev_b;
472 	uint32_t *_blocks = ctx->cfg_map;
473 	ir_ref *_next = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref));
474 	ir_ref *_prev = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref));
475 	ir_ref _move_down = 0;
476 	ir_block *bb;
477 	ir_insn *insn, *new_insn;
478 	ir_use_list *lists, *use_list, *new_list;
479 
480 	/* Create a double-linked list of nodes ordered by BB, respecting BB->start and BB->end */
481 	prev_b = _blocks[1];
482 	IR_ASSERT(prev_b);
483 	_prev[1] = 0;
484 	_prev[ctx->cfg_blocks[1].end] = 0;
485 	for (i = 2, j = 1; i < ctx->insns_count; i++) {
486 		b = _blocks[i];
487 		IR_ASSERT((int32_t)b >= 0);
488 		if (b == prev_b) {
489 			/* add to the end of the list */
490 			_next[j] = i;
491 			_prev[i] = j;
492 			j = i;
493 		} else if (b > prev_b) {
494 			bb = &ctx->cfg_blocks[b];
495 			if (i == bb->start) {
496 				IR_ASSERT(bb->end > bb->start);
497 				prev_b = b;
498 				_prev[bb->end] = 0;
499 				/* add to the end of the list */
500 				_next[j] = i;
501 				_prev[i] = j;
502 				j = i;
503 			} else {
504 				IR_ASSERT(i != bb->end);
505 				/* move down late (see the following loop) */
506 				_next[i] = _move_down;
507 				_move_down = i;
508 			}
509 		} else if (b) {
510 			bb = &ctx->cfg_blocks[b];
511 			IR_ASSERT(i != bb->start);
512 			if (_prev[bb->end]) {
513 				/* move up, insert before the end of the already scheduled BB */
514 				k = bb->end;
515 			} else {
516 				/* move up, insert at the end of the block */
517 				k = ctx->cfg_blocks[b + 1].start;
518 			}
519 			/* insert before "k" */
520 			_prev[i] = _prev[k];
521 			_next[i] = k;
522 			_next[_prev[k]] = i;
523 			_prev[k] = i;
524 		}
525 	}
526 	_next[j] = 0;
527 
528 	while (_move_down) {
529 		i = _move_down;
530 		_move_down = _next[i];
531 		b = _blocks[i];
532 		bb = &ctx->cfg_blocks[b];
533 		k = _next[bb->start];
534 
535 		if (bb->flags & (IR_BB_HAS_PHI|IR_BB_HAS_PI|IR_BB_HAS_PARAM|IR_BB_HAS_VAR)) {
536 			/* insert after the start of the block and all PARAM, VAR, PI, PHI */
537 			insn = &ctx->ir_base[k];
538 			while (insn->op == IR_PHI || insn->op == IR_PARAM || insn->op == IR_VAR || insn->op == IR_PI) {
539 				k = _next[k];
540 				insn = &ctx->ir_base[k];
541 			}
542 		}
543 
544 		/* insert before "k" */
545 		_prev[i] = _prev[k];
546 		_next[i] = k;
547 		_next[_prev[k]] = i;
548 		_prev[k] = i;
549 	}
550 
551 #ifdef IR_DEBUG
552 	if (ctx->flags & IR_DEBUG_SCHEDULE) {
553 		fprintf(stderr, "Before Schedule\n");
554 		for (i = 1; i != 0; i = _next[i]) {
555 			fprintf(stderr, "%d -> %d\n", i, _blocks[i]);
556 		}
557 	}
558 #endif
559 
560 	_xlat = ir_mem_calloc((ctx->consts_count + ctx->insns_count), sizeof(ir_ref));
561 	_xlat += ctx->consts_count;
562 	_xlat[IR_TRUE] = IR_TRUE;
563 	_xlat[IR_FALSE] = IR_FALSE;
564 	_xlat[IR_NULL] = IR_NULL;
565 	_xlat[IR_UNUSED] = IR_UNUSED;
566 	insns_count = 1;
567 	consts_count = -(IR_TRUE - 1);
568 
569 	/* Topological sort according dependencies inside each basic block */
570 	for (b = 1, bb = ctx->cfg_blocks + 1; b <= ctx->cfg_blocks_count; b++, bb++) {
571 		IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
572 		/* Schedule BB start */
573 		i = bb->start;
574 		_xlat[i] = bb->start = insns_count;
575 		insn = &ctx->ir_base[i];
576 		if (insn->op == IR_CASE_VAL) {
577 			IR_ASSERT(insn->op2 < IR_TRUE);
578 			consts_count += ir_count_constant(_xlat, insn->op2);
579 		}
580 		n = insn->inputs_count;
581 		insns_count += ir_insn_inputs_to_len(n);
582 		i = _next[i];
583 		insn = &ctx->ir_base[i];
584 		if (bb->flags & (IR_BB_HAS_PHI|IR_BB_HAS_PI|IR_BB_HAS_PARAM|IR_BB_HAS_VAR)) {
585 			/* Schedule PARAM, VAR, PI */
586 			while (insn->op == IR_PARAM || insn->op == IR_VAR || insn->op == IR_PI) {
587 				_xlat[i] = insns_count;
588 				insns_count += 1;
589 				i = _next[i];
590 				insn = &ctx->ir_base[i];
591 			}
592 			/* Schedule PHIs */
593 			while (insn->op == IR_PHI) {
594 				ir_ref j, *p, input;
595 
596 				_xlat[i] = insns_count;
597 				/* Reuse "n" from MERGE and skip first input */
598 				insns_count += ir_insn_inputs_to_len(n + 1);
599 				for (j = n, p = insn->ops + 2; j > 0; p++, j--) {
600 					input = *p;
601 					if (input < IR_TRUE) {
602 						consts_count += ir_count_constant(_xlat, input);
603 					}
604 				}
605 				i = _next[i];
606 				insn = &ctx->ir_base[i];
607 			}
608 		}
609 		while (i != bb->end) {
610 			ir_ref n, j, *p, input;
611 
612 restart:
613 			n = insn->inputs_count;
614 			for (j = n, p = insn->ops + 1; j > 0; p++, j--) {
615 				input = *p;
616 				if (!_xlat[input]) {
617 					/* input is not scheduled yet */
618 					if (input > 0) {
619 						if (_blocks[input] == b) {
620 							/* "input" should be before "i" to satisfy dependency */
621 #ifdef IR_DEBUG
622 							if (ctx->flags & IR_DEBUG_SCHEDULE) {
623 								fprintf(stderr, "Wrong dependency %d:%d -> %d\n", b, input, i);
624 							}
625 #endif
626 							/* remove "input" */
627 							_prev[_next[input]] = _prev[input];
628 							_next[_prev[input]] = _next[input];
629 							/* insert before "i" */
630 							_prev[input] = _prev[i];
631 							_next[input] = i;
632 							_next[_prev[i]] = input;
633 							_prev[i] = input;
634 							/* restart from "input" */
635 							i = input;
636 							insn = &ctx->ir_base[i];
637 							goto restart;
638 						}
639 					} else if (input < IR_TRUE) {
640 						consts_count += ir_count_constant(_xlat, input);
641 					}
642 				}
643 			}
644 			_xlat[i] = insns_count;
645 			insns_count += ir_insn_inputs_to_len(n);
646 			i = _next[i];
647 			insn = &ctx->ir_base[i];
648 		}
649 		/* Schedule BB end */
650 		_xlat[i] = bb->end = insns_count;
651 		insns_count++;
652 		if (IR_INPUT_EDGES_COUNT(ir_op_flags[insn->op]) == 2) {
653 			if (insn->op2 < IR_TRUE) {
654 				consts_count += ir_count_constant(_xlat, insn->op2);
655 			}
656 		}
657 	}
658 
659 #ifdef IR_DEBUG
660 	if (ctx->flags & IR_DEBUG_SCHEDULE) {
661 		fprintf(stderr, "After Schedule\n");
662 		for (i = 1; i != 0; i = _next[i]) {
663 			fprintf(stderr, "%d -> %d\n", i, _blocks[i]);
664 		}
665 	}
666 #endif
667 
668 #if 1
669 	/* Check if scheduling didn't make any modifications */
670 	if (consts_count == ctx->consts_count && insns_count == ctx->insns_count) {
671 		bool changed = 0;
672 
673 		for (i = 1; i != 0; i = _next[i]) {
674 			if (_xlat[i] != i) {
675 				changed = 1;
676 				break;
677 			}
678 		}
679 		if (!changed) {
680 			_xlat -= ctx->consts_count;
681 			ir_mem_free(_xlat);
682 			ir_mem_free(_next);
683 
684 			ctx->prev_ref = _prev;
685 			ctx->flags2 |= IR_LINEAR;
686 			ir_truncate(ctx);
687 
688 			return 1;
689 		}
690 	}
691 #endif
692 
693 	ir_mem_free(_prev);
694 
695 	ir_init(&new_ctx, ctx->flags, consts_count, insns_count);
696 	new_ctx.insns_count = insns_count;
697 	new_ctx.flags2 = ctx->flags2;
698 	new_ctx.ret_type = ctx->ret_type;
699 	new_ctx.mflags = ctx->mflags;
700 	new_ctx.spill_base = ctx->spill_base;
701 	new_ctx.fixed_stack_red_zone = ctx->fixed_stack_red_zone;
702 	new_ctx.fixed_stack_frame_size = ctx->fixed_stack_frame_size;
703 	new_ctx.fixed_call_stack_size = ctx->fixed_call_stack_size;
704 	new_ctx.fixed_regset = ctx->fixed_regset;
705 	new_ctx.fixed_save_regset = ctx->fixed_save_regset;
706 	new_ctx.entries_count = ctx->entries_count;
707 #if defined(IR_TARGET_AARCH64)
708 	new_ctx.deoptimization_exits = ctx->deoptimization_exits;
709 	new_ctx.get_exit_addr = ctx->get_exit_addr;
710 	new_ctx.get_veneer = ctx->get_veneer;
711 	new_ctx.set_veneer = ctx->set_veneer;
712 #endif
713 	new_ctx.loader = ctx->loader;
714 
715 	/* Copy constants */
716 	if (consts_count == ctx->consts_count) {
717 		new_ctx.consts_count = consts_count;
718 		ref = 1 - consts_count;
719 		insn = &ctx->ir_base[ref];
720 		new_insn = &new_ctx.ir_base[ref];
721 
722 		memcpy(new_insn, insn, sizeof(ir_insn) * (IR_TRUE - ref));
723 		if (ctx->strtab.data) {
724 			while (ref != IR_TRUE) {
725 				if (new_insn->op == IR_FUNC_ADDR) {
726 					if (new_insn->proto) {
727 						size_t len;
728 						const char *proto = ir_get_strl(ctx, new_insn->proto, &len);
729 						new_insn->proto = ir_strl(&new_ctx, proto, len);
730 					}
731 				} else if (new_insn->op == IR_FUNC) {
732 					new_insn->val.u64 = ir_str(&new_ctx, ir_get_str(ctx, new_insn->val.name));
733 					if (new_insn->proto) {
734 						size_t len;
735 						const char *proto = ir_get_strl(ctx, new_insn->proto, &len);
736 						new_insn->proto = ir_strl(&new_ctx, proto, len);
737 					}
738 				} else if (new_insn->op == IR_SYM || new_insn->op == IR_STR) {
739 					new_insn->val.u64 = ir_str(&new_ctx, ir_get_str(ctx, new_insn->val.name));
740 				}
741 				new_insn++;
742 				ref++;
743 			}
744 		}
745 	} else {
746 		new_ref = -new_ctx.consts_count;
747 		new_insn = &new_ctx.ir_base[new_ref];
748 		for (ref = IR_TRUE - 1, insn = &ctx->ir_base[ref]; ref > -ctx->consts_count; insn--, ref--) {
749 			if (!_xlat[ref]) {
750 				continue;
751 			}
752 			new_insn->optx = insn->optx;
753 			new_insn->prev_const = 0;
754 			if (insn->op == IR_FUNC_ADDR) {
755 				new_insn->val.u64 = insn->val.u64;
756 				if (insn->proto) {
757 					size_t len;
758 					const char *proto = ir_get_strl(ctx, insn->proto, &len);
759 					new_insn->proto = ir_strl(&new_ctx, proto, len);
760 				} else {
761 					new_insn->proto = 0;
762 				}
763 			} else if (insn->op == IR_FUNC) {
764 				new_insn->val.u64 = ir_str(&new_ctx, ir_get_str(ctx, insn->val.name));
765 				if (insn->proto) {
766 					size_t len;
767 					const char *proto = ir_get_strl(ctx, insn->proto, &len);
768 					new_insn->proto = ir_strl(&new_ctx, proto, len);
769 				} else {
770 					new_insn->proto = 0;
771 				}
772 			} else if (insn->op == IR_SYM || insn->op == IR_STR) {
773 				new_insn->val.u64 = ir_str(&new_ctx, ir_get_str(ctx, insn->val.name));
774 			} else {
775 				new_insn->val.u64 = insn->val.u64;
776 			}
777 			_xlat[ref] = new_ref;
778 			new_ref--;
779 			new_insn--;
780 		}
781 		new_ctx.consts_count = -new_ref;
782 	}
783 
784 	new_ctx.cfg_map = ir_mem_calloc(ctx->insns_count, sizeof(uint32_t));
785 	new_ctx.prev_ref = _prev = ir_mem_malloc(insns_count * sizeof(ir_ref));
786 	new_ctx.use_lists = lists = ir_mem_malloc(insns_count * sizeof(ir_use_list));
787 	new_ctx.use_edges = edges = ir_mem_malloc(ctx->use_edges_count * sizeof(ir_ref));
788 
789 	/* Copy instructions, use lists and use edges */
790 	prev_ref = 0;
791 	use_edges_count = 0;
792 	for (i = 1; i != 0; i = _next[i]) {
793 		new_ref = _xlat[i];
794 		new_ctx.cfg_map[new_ref] = _blocks[i];
795 		_prev[new_ref] = prev_ref;
796 		prev_ref = new_ref;
797 
798 		use_list = &ctx->use_lists[i];
799 		n = use_list->count;
800 		k = 0;
801 		if (n == 1) {
802 			ref = ctx->use_edges[use_list->refs];
803 			if (_xlat[ref]) {
804 				*edges = _xlat[ref];
805 				edges++;
806 				k = 1;
807 			}
808 		} else {
809 			p = &ctx->use_edges[use_list->refs];
810 			while (n--) {
811 				ref = *p;
812 				if (_xlat[ref]) {
813 					*edges = _xlat[ref];
814 					edges++;
815 					k++;
816 				}
817 				p++;
818 			}
819 		}
820 		new_list = &lists[new_ref];
821 		new_list->refs = use_edges_count;
822 		use_edges_count += k;
823 		new_list->count = k;
824 
825 		insn = &ctx->ir_base[i];
826 		new_insn = &new_ctx.ir_base[new_ref];
827 
828 		new_insn->optx = insn->optx;
829 		n = new_insn->inputs_count;
830 		switch (n) {
831 			case 0:
832 				new_insn->op1 = insn->op1;
833 				new_insn->op2 = insn->op2;
834 				new_insn->op3 = insn->op3;
835 				break;
836 			case 1:
837 				new_insn->op1 = _xlat[insn->op1];
838 				if (new_insn->op == IR_PARAM || insn->op == IR_VAR) {
839 					new_insn->op2 = ir_str(&new_ctx, ir_get_str(ctx, insn->op2));
840 				} else if (new_insn->op == IR_PROTO) {
841 					size_t len;
842 					const char *proto = ir_get_strl(ctx, insn->op2, &len);
843 					new_insn->op2 = ir_strl(&new_ctx, proto, len);
844 				} else {
845 					new_insn->op2 = insn->op2;
846 				}
847 				new_insn->op3 = insn->op3;
848 				break;
849 			case 2:
850 				new_insn->op1 = _xlat[insn->op1];
851 				new_insn->op2 = _xlat[insn->op2];
852 				new_insn->op3 = insn->op3;
853 				break;
854 			case 3:
855 				new_insn->op1 = _xlat[insn->op1];
856 				new_insn->op2 = _xlat[insn->op2];
857 				new_insn->op3 = _xlat[insn->op3];
858 				break;
859 			default:
860 				for (j = n, p = insn->ops + 1, q = new_insn->ops + 1; j > 0; p++, q++, j--) {
861 					*q = _xlat[*p];
862 				}
863 				break;
864 		}
865 	}
866 
867 	/* Update list of terminators (IR_OPND_CONTROL_REF) */
868 	insn = &new_ctx.ir_base[1];
869 	ref = insn->op1;
870 	if (ref) {
871 		insn->op1 = ref = _xlat[ref];
872 		while (1) {
873 			insn = &new_ctx.ir_base[ref];
874 			ref = insn->op3;
875 			if (!ref) {
876 				break;
877 			}
878 			insn->op3 = ref = _xlat[ref];
879 		}
880 	}
881 
882 	IR_ASSERT(ctx->use_edges_count >= use_edges_count);
883 	new_ctx.use_edges_count = use_edges_count;
884 	new_ctx.use_edges = ir_mem_realloc(new_ctx.use_edges, use_edges_count * sizeof(ir_ref));
885 
886 	if (ctx->binding) {
887 		ir_xlat_binding(ctx, _xlat);
888 		new_ctx.binding = ctx->binding;
889 		ctx->binding = NULL;
890 	}
891 
892 	_xlat -= ctx->consts_count;
893 	ir_mem_free(_xlat);
894 
895 	new_ctx.cfg_blocks_count = ctx->cfg_blocks_count;
896 	new_ctx.cfg_edges_count = ctx->cfg_edges_count;
897 	new_ctx.cfg_blocks = ctx->cfg_blocks;
898 	new_ctx.cfg_edges = ctx->cfg_edges;
899 	ctx->cfg_blocks = NULL;
900 	ctx->cfg_edges = NULL;
901 
902 	ir_free(ctx);
903 	IR_ASSERT(new_ctx.consts_count == new_ctx.consts_limit);
904 	IR_ASSERT(new_ctx.insns_count == new_ctx.insns_limit);
905 	memcpy(ctx, &new_ctx, sizeof(ir_ctx));
906 	ctx->flags2 |= IR_LINEAR;
907 
908 	ir_mem_free(_next);
909 
910 	return 1;
911 }
912 
ir_build_prev_refs(ir_ctx * ctx)913 void ir_build_prev_refs(ir_ctx *ctx)
914 {
915 	uint32_t b;
916 	ir_block *bb;
917 	ir_ref i, n, prev;
918 	ir_insn *insn;
919 
920 	ctx->prev_ref = ir_mem_malloc(ctx->insns_count * sizeof(ir_ref));
921 	prev = 0;
922 	for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) {
923 		IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
924 		for (i = bb->start, insn = ctx->ir_base + i; i < bb->end;) {
925 			ctx->prev_ref[i] = prev;
926 			n = ir_insn_len(insn);
927 			prev = i;
928 			i += n;
929 			insn += n;
930 		}
931 		ctx->prev_ref[i] = prev;
932 	}
933 }
934