xref: /PHP-8.2/ext/opcache/jit/ir/ir_aarch64.dasc (revision 2289af88)
1/*
2 * IR - Lightweight JIT Compilation Framework
3 * (Aarch64 native code generator based on DynAsm)
4 * Copyright (C) 2022 Zend by Perforce.
5 * Authors: Dmitry Stogov <dmitry@php.net>
6 */
7
8|.arch arm64
9
10|.actionlist dasm_actions
11|.globals ir_lb
12|.section code, cold_code, rodata, jmp_table
13
14#ifdef IR_DEBUG
15typedef struct _ir_mem {uint64_t v;} ir_mem;
16
17# define IR_MEM_VAL(loc)            ((loc).v)
18#else
19typedef uint64_t ir_mem;
20
21# define IR_MEM_VAL(loc)            (loc)
22#endif
23
24#define IR_MEM_OFFSET(loc)          ((int32_t)(IR_MEM_VAL(loc) & 0xffffffff))
25#define IR_MEM_BASE(loc)            ((ir_reg)((IR_MEM_VAL(loc) >> 32) & 0xff))
26#define IR_MEM_INDEX(loc)           ((ir_reg)((IR_MEM_VAL(loc) >> 40) & 0xff))
27#define IR_MEM_SHIFT(loc)           ((int32_t)((IR_MEM_VAL(loc) >> 48) & 0xff))
28
29#define IR_MEM_O(addr)            IR_MEM(IR_REG_NONE, addr, IR_REG_NONE, 0)
30#define IR_MEM_B(base)            IR_MEM(base, 0, IR_REG_NONE, 0)
31#define IR_MEM_BO(base, offset)   IR_MEM(base, offset, IR_REG_NONE, 0)
32
33IR_ALWAYS_INLINE ir_mem IR_MEM(ir_reg base, int32_t offset, ir_reg index, int32_t shift)
34{
35	ir_mem mem;
36	IR_ASSERT(base == IR_REG_NONE || (base >= IR_REG_GP_FIRST && base <= IR_REG_GP_LAST));
37	IR_ASSERT(index == IR_REG_NONE || (index >= IR_REG_GP_FIRST && index <= IR_REG_GP_LAST));
38	IR_ASSERT(index == IR_REG_NONE || offset == 0);
39	IR_ASSERT(shift == 0); // TODO: ???
40#ifdef IR_DEBUG
41	mem.v =
42#else
43	mem =
44#endif
45		((uint64_t)(uint32_t)offset |
46		((uint64_t)(uint8_t)base << 32) |
47		((uint64_t)(uint8_t)index << 40) |
48		((uint64_t)(uint8_t)shift << 48));
49	return mem;
50}
51
52#define IR_SPILL_POS_TO_OFFSET(offset) \
53	((ctx->flags & IR_USE_FRAME_POINTER) ? \
54		((offset) + (int32_t)sizeof(void*) * 2) : \
55		((offset) + ctx->call_stack_size))
56
57#define B_IMM           (1<<27)        // signed imm26 * 4
58#define ADR_IMM         (1<<20)        // signed imm21
59#define ADRP_IMM        (1LL<<32)      // signed imm21 * 4096
60
61static bool aarch64_may_use_b(ir_code_buffer *code_buffer, const void *addr)
62{
63	if (code_buffer) {
64		if (addr >= code_buffer->start && (char*)addr < (char*)code_buffer->end) {
65			return (((char*)code_buffer->end - (char*)code_buffer->start) < B_IMM);
66		} else if ((char*)addr >= (char*)code_buffer->end) {
67			return (((char*)addr - (char*)code_buffer->start) < B_IMM);
68		} else if (addr < code_buffer->start) {
69			return (((char*)code_buffer->end - (char*)addr) < B_IMM);
70		}
71	}
72	return 0;
73}
74
75#if 0
76static bool aarch64_may_use_adr(ir_code_buffer *code_buffer, const void *addr)
77{
78	if (code_buffer) {
79		if (addr >= code_buffer->start && (char*)addr < (char*)code_buffer->end) {
80			return (((char*)code_buffer->end - (char*)code_buffer->start) < ADR_IMM);
81		} else if ((char*)addr >= (char*)code_buffer->end) {
82			return (((char*)addr - (char*)code_buffer->start) < ADR_IMM);
83		} else if (addr < code_buffer->start) {
84			return (((char*)code_buffer->end - (char*)addr) < ADR_IMM);
85		}
86	}
87	return 0;
88}
89
90static bool aarch64_may_use_adrp(ir_code_buffer *code_buffer, const void *addr)
91{
92	if (code_buffer) {
93		if (addr >= code_buffer->start && (char*)addr < (char*)code_buffer->end) {
94			return (((char*)code_buffer->end - (char*)code_buffer->start) < ADRP_IMM);
95		} else if ((char*)addr >= (char*)code_buffer->end) {
96			return (((char*)addr - (char*)code_buffer->start) < ADRP_IMM);
97		} else if (addr < code_buffer->start) {
98			return (((char*)code_buffer->end - (char*)addr) < ADRP_IMM);
99		}
100	}
101	return 0;
102}
103#endif
104
105/* Determine whether "val" falls into two allowed ranges:
106 *   Range 1: [0, 0xfff]
107 *   Range 2: LSL #12 to Range 1
108 * Used to guard the immediate encoding for add/adds/sub/subs/cmp/cmn instructions. */
109static bool aarch64_may_encode_imm12(const int64_t val)
110{
111	return (val >= 0 && (val <= 0xfff || !(val & 0xffffffffff000fff)));
112}
113
114/* Determine whether an immediate value can be encoded as the immediate operand of logical instructions. */
115static bool aarch64_may_encode_logical_imm(uint64_t value, uint32_t type_size)
116{
117	/* fast path: power of two */
118	if (value > 0 && !(value & (value - 1))) {
119		return 1;
120	}
121
122	if (type_size == 8) {
123		if (dasm_imm13((uint32_t)value, (uint32_t)(value >> 32)) != -1) {
124			return 1;
125		}
126	} else {
127		if (dasm_imm13((uint32_t)value, (uint32_t)value) != -1) {
128			return 1;
129		}
130	}
131
132	return 0;
133}
134
135static bool aarch64_may_encode_imm7_addr_offset(const int64_t offset, uint32_t type_size)
136{
137	return (uintptr_t)(offset) % type_size == 0
138		&& offset < 63 * (int32_t)type_size
139		&& offset >= -64 * (int32_t)type_size;
140}
141
142static bool aarch64_may_encode_addr_offset(int64_t offset, uint32_t type_size)
143{
144	return (uintptr_t)(offset) % type_size == 0 && (uintptr_t)(offset) < 0xfff * type_size;
145}
146
147|.macro ASM_REG_REG_OP, op, type, dst, src
148||	if (ir_type_size[type] == 8) {
149|		op Rx(dst), Rx(src)
150||	} else {
151|		op Rw(dst), Rw(src)
152||	}
153|.endmacro
154
155|.macro ASM_REG_REG_REG_OP, op, type, dst, src1, src2
156||	if (ir_type_size[type] == 8) {
157|		op Rx(dst), Rx(src1), Rx(src2)
158||	} else {
159|		op Rw(dst), Rw(src1), Rw(src2)
160||	}
161|.endmacro
162
163|.macro ASM_REG_REG_REG_TXT_OP, op, type, dst, src1, src2, txt
164||	if (ir_type_size[type] == 8) {
165|		op Rx(dst), Rx(src1), Rx(src2), txt
166||	} else {
167|		op Rw(dst), Rw(src1), Rw(src2), txt
168||	}
169|.endmacro
170
171|.macro ASM_REG_REG_REG_REG_OP, op, type, dst, src1, src2, src3
172||	if (ir_type_size[type] == 8) {
173|		op Rx(dst), Rx(src1), Rx(src2), Rx(src3)
174||	} else {
175|		op Rw(dst), Rw(src1), Rw(src2), Rw(src3);
176||	}
177|.endmacro
178
179|.macro ASM_REG_REG_IMM_OP, op, type, dst, src1, val
180||	if (ir_type_size[type] == 8) {
181|		op Rx(dst), Rx(src1), #val
182||	} else {
183|		op Rw(dst), Rw(src1), #val
184||	}
185|.endmacro
186
187|.macro ASM_REG_IMM_OP, op, type, reg, val
188||	if (ir_type_size[type] == 8) {
189|		op Rx(reg), #val
190||	} else {
191|		op Rw(reg), #val
192||	}
193|.endmacro
194
195|.macro ASM_FP_REG_IMM_OP, op, type, reg, val
196||	if (type == IR_DOUBLE) {
197|		op Rd(reg-IR_REG_FP_FIRST), #val
198||	} else {
199||		IR_ASSERT(type == IR_FLOAT);
200|		op Rs(reg-IR_REG_FP_FIRST), #val
201||	}
202|.endmacro
203
204|.macro ASM_FP_REG_REG_REG_OP, op, type, dst, src1, src2
205||	if (type == IR_DOUBLE) {
206|		op Rd(dst-IR_REG_FP_FIRST), Rd(src1-IR_REG_FP_FIRST), Rd(src2-IR_REG_FP_FIRST)
207||	} else {
208||		IR_ASSERT(type == IR_FLOAT);
209|		op Rs(dst-IR_REG_FP_FIRST), Rs(src1-IR_REG_FP_FIRST), Rs(src2-IR_REG_FP_FIRST)
210||	}
211|.endmacro
212
213typedef struct _ir_backend_data {
214    ir_reg_alloc_data  ra_data;
215	uint32_t           dessa_from_block;
216	dasm_State        *dasm_state;
217	ir_bitset          emit_constants;
218	int                rodata_label, jmp_table_label;
219} ir_backend_data;
220
221#define IR_GP_REG_NAME(code, name64, name32) \
222	#name64,
223#define IR_GP_REG_NAME32(code, name64, name32) \
224	#name32,
225#define IR_FP_REG_NAME(code, name64, name32, name16, name8) \
226	#name64,
227#define IR_FP_REG_NAME32(code, name64, name32, name16, name8) \
228	#name32,
229
230static const char *_ir_reg_name[IR_REG_NUM] = {
231	IR_GP_REGS(IR_GP_REG_NAME)
232	IR_FP_REGS(IR_FP_REG_NAME)
233};
234
235static const char *_ir_reg_name32[IR_REG_NUM] = {
236	IR_GP_REGS(IR_GP_REG_NAME32)
237	IR_FP_REGS(IR_FP_REG_NAME32)
238};
239
240/* Calling Convention */
241static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS] = {
242	IR_REG_INT_ARG1,
243	IR_REG_INT_ARG2,
244	IR_REG_INT_ARG3,
245	IR_REG_INT_ARG4,
246	IR_REG_INT_ARG5,
247	IR_REG_INT_ARG6,
248	IR_REG_INT_ARG7,
249	IR_REG_INT_ARG8,
250};
251
252static const int8_t _ir_fp_reg_params[IR_REG_FP_ARGS] = {
253	IR_REG_FP_ARG1,
254	IR_REG_FP_ARG2,
255	IR_REG_FP_ARG3,
256	IR_REG_FP_ARG4,
257	IR_REG_FP_ARG5,
258	IR_REG_FP_ARG6,
259	IR_REG_FP_ARG7,
260	IR_REG_FP_ARG8,
261};
262
263const char *ir_reg_name(int8_t reg, ir_type type)
264{
265	if (reg >= IR_REG_NUM) {
266		if (reg == IR_REG_SCRATCH) {
267			return "SCRATCH";
268		} else {
269			IR_ASSERT(reg == IR_REG_ALL);
270			return "ALL";
271		}
272	}
273	IR_ASSERT(reg >= 0 && reg < IR_REG_NUM);
274	if (type == IR_VOID) {
275		type = (reg < IR_REG_FP_FIRST) ? IR_ADDR : IR_DOUBLE;
276	}
277	if (ir_type_size[type] == 8) {
278		return _ir_reg_name[reg];
279	} else {
280		return _ir_reg_name32[reg];
281	}
282}
283
284#define IR_RULES(_)        \
285	_(CMP_INT)             \
286	_(CMP_FP)              \
287	_(MUL_PWR2)            \
288	_(DIV_PWR2)            \
289	_(MOD_PWR2)            \
290	_(SDIV_PWR2)           \
291	_(SMOD_PWR2)           \
292	_(OP_INT)              \
293	_(OP_FP)               \
294	_(BINOP_INT)           \
295	_(BINOP_FP)            \
296	_(SHIFT)               \
297	_(SHIFT_CONST)         \
298	_(COPY_INT)            \
299	_(COPY_FP)             \
300	_(CMP_AND_BRANCH_INT)  \
301	_(CMP_AND_BRANCH_FP)   \
302	_(GUARD_CMP_INT)       \
303	_(GUARD_CMP_FP)        \
304	_(GUARD_OVERFLOW)      \
305	_(OVERFLOW_AND_BRANCH) \
306	_(MIN_MAX_INT)         \
307	_(REG_BINOP_INT)       \
308	_(LOAD_INT)            \
309	_(LOAD_FP)             \
310	_(STORE_INT)           \
311	_(STORE_FP)            \
312	_(IF_INT)              \
313	_(RETURN_VOID)         \
314	_(RETURN_INT)          \
315	_(RETURN_FP)           \
316
317#define IR_RULE_ENUM(name) IR_ ## name,
318
319enum _ir_rule {
320	IR_FIRST_RULE = IR_LAST_OP,
321	IR_RULES(IR_RULE_ENUM)
322	IR_LAST_RULE
323};
324
325#define IR_RULE_NAME(name)  #name,
326const char *ir_rule_name[IR_LAST_OP] = {
327	NULL,
328	IR_RULES(IR_RULE_NAME)
329	NULL
330};
331
332/* register allocation */
333int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *constraints)
334{
335	uint32_t rule = ir_rule(ctx, ref);
336	const ir_insn *insn;
337	int n = 0;
338	int flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG;
339
340	constraints->def_reg = IR_REG_NONE;
341	constraints->hints_count = 0;
342	switch (rule & IR_RULE_MASK) {
343		case IR_BINOP_INT:
344			insn = &ctx->ir_base[ref];
345			n = 0;
346			if (IR_IS_CONST_REF(insn->op1)) {
347				constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
348				n++;
349			}
350			if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) {
351				const ir_insn *val_insn = &ctx->ir_base[insn->op2];
352				switch (insn->op) {
353					case IR_ADD:
354					case IR_ADD_OV:
355					case IR_SUB:
356					case IR_SUB_OV:
357						if (IR_IS_SYM_CONST(val_insn->op) || !aarch64_may_encode_imm12(val_insn->val.u64)) {
358							constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
359							n++;
360						}
361						break;
362					case IR_MUL_OV:
363						constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF);
364						n++;
365						break;
366					case IR_AND:
367					case IR_OR:
368					case IR_XOR:
369						if (IR_IS_SYM_CONST(val_insn->op) || !aarch64_may_encode_logical_imm(val_insn->val.u64, ir_type_size[insn->type])) {
370							constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
371							n++;
372						}
373						break;
374					case IR_MUL:
375					case IR_DIV:
376					case IR_MOD:
377						constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
378						n++;
379						break;
380				}
381			}
382			if (insn->op == IR_MOD) {
383				constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
384				n++;
385			} else if (insn->op == IR_MUL_OV && (ir_type_size[insn->type] == 8 || IR_IS_TYPE_SIGNED(insn->type))) {
386				constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF);
387				n++;
388			}
389			break;
390		case IR_MUL_PWR2:
391		case IR_DIV_PWR2:
392		case IR_MOD_PWR2:
393		case IR_SHIFT:
394		case IR_SHIFT_CONST:
395		case IR_OP_INT:
396		case IR_OP_FP:
397		case IR_INT2FP:
398		case IR_FP2INT:
399		case IR_FP2FP:
400			insn = &ctx->ir_base[ref];
401			n = 0;
402			if (IR_IS_CONST_REF(insn->op1)) {
403				constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
404				n++;
405			}
406			if (rule == IR_SHIFT_CONST
407			 && (insn->op == IR_ROL || insn->op == IR_ROR)
408			 && ir_type_size[insn->type] < 4) {
409				constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
410				n++;
411			} else if (rule == IR_SHIFT
412			 && (insn->op == IR_ROL || insn->op == IR_ROR)
413			 && ir_type_size[insn->type] < 4) {
414				if (insn->op == IR_ROL) {
415					flags |= IR_DEF_CONFLICTS_WITH_INPUT_REGS;
416				}
417				constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF);
418				n++;
419			} else if (rule == IR_SHIFT && insn->op == IR_ROL) {
420				constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
421				n++;
422			}
423			break;
424		case IR_SDIV_PWR2:
425			flags = IR_DEF_CONFLICTS_WITH_INPUT_REGS | IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG;
426			insn = &ctx->ir_base[ref];
427			n = 0;
428			if (IR_IS_CONST_REF(insn->op1)) {
429				constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
430				n++;
431			}
432			if (IR_IS_CONST_REF(insn->op2)) {
433				int64_t offset = ctx->ir_base[insn->op2].val.u64 - 1;
434				if (!aarch64_may_encode_imm12(offset)) {
435					constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
436					n++;
437				}
438			}
439			break;
440		case IR_SMOD_PWR2:
441			flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG;
442			insn = &ctx->ir_base[ref];
443			n = 0;
444			if (IR_IS_CONST_REF(insn->op1)) {
445				constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
446				n++;
447			}
448			constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_USE_SUB_REF, IR_SAVE_SUB_REF);
449			n++;
450			break;
451		case IR_CTPOP:
452			flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG;
453			insn = &ctx->ir_base[ref];
454			constraints->tmp_regs[0] = IR_TMP_REG(2, IR_DOUBLE, IR_USE_SUB_REF, IR_SAVE_SUB_REF);
455			n = 1;
456			break;
457		case IR_BINOP_FP:
458		case IR_MIN_MAX_INT:
459			insn = &ctx->ir_base[ref];
460			n = 0;
461			if (IR_IS_CONST_REF(insn->op1)) {
462				constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
463				n++;
464			}
465			if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) {
466				constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
467				n++;
468			}
469			break;
470		case IR_CMP_INT:
471			insn = &ctx->ir_base[ref];
472			n = 0;
473			if (IR_IS_CONST_REF(insn->op1)) {
474				constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
475				n++;
476			}
477			if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) {
478				insn = &ctx->ir_base[insn->op2];
479				if (IR_IS_SYM_CONST(insn->op) || !aarch64_may_encode_imm12(insn->val.u64)) {
480					constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
481					n++;
482				}
483			}
484			break;
485		case IR_CMP_FP:
486			insn = &ctx->ir_base[ref];
487			n = 0;
488			if (IR_IS_CONST_REF(insn->op1)) {
489				const ir_insn *val_insn = &ctx->ir_base[insn->op1];
490				constraints->tmp_regs[n] = IR_TMP_REG(1, val_insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
491				n++;
492			}
493			if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) {
494				const ir_insn *val_insn = &ctx->ir_base[insn->op2];
495				constraints->tmp_regs[n] = IR_TMP_REG(2, val_insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
496				n++;
497			}
498			break;
499		case IR_VSTORE:
500			insn = &ctx->ir_base[ref];
501			if (IR_IS_CONST_REF(insn->op3)) {
502				insn = &ctx->ir_base[insn->op3];
503				constraints->tmp_regs[0] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
504				n = 1;
505			}
506			break;
507		case IR_LOAD_FP:
508			insn = &ctx->ir_base[ref];
509			n = 0;
510			if (IR_IS_CONST_REF(insn->op2)) {
511				IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR);
512				constraints->tmp_regs[n] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
513				n++;
514			}
515			break;
516		case IR_STORE_INT:
517		case IR_STORE_FP:
518			insn = &ctx->ir_base[ref];
519			n = 0;
520			if (IR_IS_CONST_REF(insn->op2)) {
521				IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR);
522				constraints->tmp_regs[n] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
523				n++;
524			}
525			if (IR_IS_CONST_REF(insn->op3)) {
526				insn = &ctx->ir_base[insn->op3];
527				if (!IR_IS_TYPE_INT(insn->type) || IR_IS_SYM_CONST(insn->op) || insn->val.i64 != 0) {
528					constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
529					n++;
530				}
531			}
532			break;
533		case IR_SWITCH:
534			insn = &ctx->ir_base[ref];
535			n = 0;
536			if (IR_IS_CONST_REF(insn->op2)) {
537				insn = &ctx->ir_base[insn->op2];
538				constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
539				n++;
540			} else {
541				insn = &ctx->ir_base[insn->op2];
542				constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
543				n++;
544			}
545			constraints->tmp_regs[n] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
546			n++;
547			break;
548		case IR_CALL:
549			insn = &ctx->ir_base[ref];
550			constraints->def_reg = (IR_IS_TYPE_INT(insn->type)) ? IR_REG_INT_RET1 : IR_REG_FP_RET1;
551			constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_SCRATCH, IR_USE_SUB_REF, IR_DEF_SUB_REF);
552			n = 1;
553			IR_FALLTHROUGH;
554		case IR_TAILCALL:
555			insn = &ctx->ir_base[ref];
556			if (insn->inputs_count > 2) {
557				constraints->hints[2] = IR_REG_NONE;
558				constraints->hints_count = ir_get_args_regs(ctx, insn, constraints->hints);
559				if (!IR_IS_CONST_REF(insn->op2)) {
560					constraints->tmp_regs[n] = IR_TMP_REG(1, IR_ADDR, IR_LOAD_SUB_REF, IR_USE_SUB_REF);
561					n++;
562				}
563			}
564			flags = IR_USE_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG | IR_OP3_SHOULD_BE_IN_REG;
565			break;
566		case IR_COND:
567			insn = &ctx->ir_base[ref];
568			n = 0;
569			if (IR_IS_CONST_REF(insn->op1)) {
570				constraints->tmp_regs[n] = IR_TMP_REG(1, ctx->ir_base[insn->op1].type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
571				n++;
572			}
573			if (IR_IS_CONST_REF(insn->op2)) {
574				constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
575				n++;
576			}
577			if (IR_IS_CONST_REF(insn->op3)) {
578				constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
579				n++;
580			}
581			break;
582		case IR_COPY_INT:
583		case IR_COPY_FP:
584		case IR_TRUNC:
585		case IR_BITCAST:
586		case IR_PROTO:
587			flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG;
588			break;
589		case IR_ZEXT:
590		case IR_SEXT:
591			flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG;
592			break;
593		case IR_PARAM:
594			constraints->def_reg = ir_get_param_reg(ctx, ref);
595			flags = 0;
596			break;
597		case IR_PI:
598		case IR_PHI:
599			flags = IR_USE_SHOULD_BE_IN_REG;
600			break;
601		case IR_RLOAD:
602			constraints->def_reg = ctx->ir_base[ref].op2;
603			flags = IR_USE_SHOULD_BE_IN_REG;
604			break;
605		case IR_EXITCALL:
606			constraints->def_reg = IR_REG_INT_RET1;
607			break;
608		case IR_RSTORE:
609			flags = IR_OP3_SHOULD_BE_IN_REG;
610			break;
611		case IR_RETURN_INT:
612			flags = IR_OP2_SHOULD_BE_IN_REG;
613			constraints->hints[2] = IR_REG_INT_RET1;
614			constraints->hints_count = 3;
615			break;
616		case IR_RETURN_FP:
617			flags = IR_OP2_SHOULD_BE_IN_REG;
618			constraints->hints[2] = IR_REG_FP_RET1;
619			constraints->hints_count = 3;
620			break;
621		case IR_SNAPSHOT:
622			flags = 0;
623			break;
624		case IR_VA_START:
625			flags = IR_OP1_MUST_BE_IN_REG;
626			constraints->tmp_regs[0] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
627			n = 1;
628			break;
629		case IR_VA_ARG:
630			flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG;
631			constraints->tmp_regs[0] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF);
632			n = 1;
633			break;
634	}
635	constraints->tmps_count = n;
636
637	return flags;
638}
639
640/* instruction selection */
641static void ir_match_fuse_addr(ir_ctx *ctx, ir_ref addr_ref, ir_type type)
642{
643	if (!IR_IS_CONST_REF(addr_ref)) {
644		ir_insn *addr_insn = &ctx->ir_base[addr_ref];
645
646		if (addr_insn->op == IR_ADD
647		 && !IR_IS_CONST_REF(addr_insn->op1)
648		 && IR_IS_CONST_REF(addr_insn->op2)  // TODO: temporary workaround
649		 && !IR_IS_SYM_CONST(ctx->ir_base[addr_insn->op2].op)
650		 && aarch64_may_encode_addr_offset(ctx->ir_base[addr_insn->op2].val.i64, ir_type_size[type])) {
651			ir_use_list *use_list = &ctx->use_lists[addr_ref];
652			ir_ref j = use_list->count;
653
654			if (j > 1) {
655				/* check if address is used only in LOAD and STORE */
656				ir_ref *p = &ctx->use_edges[use_list->refs];
657
658				do {
659					ir_insn *insn = &ctx->ir_base[*p];
660					if (insn->op != IR_LOAD && (insn->op != IR_STORE || insn->op3 == addr_ref)) {
661						return;
662					}
663					p++;
664				} while (--j);
665			}
666			ctx->rules[addr_ref] = IR_FUSED | IR_SIMPLE | addr_insn->op;
667		}
668	}
669}
670
671static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref)
672{
673	ir_insn *op2_insn;
674	ir_insn *insn = &ctx->ir_base[ref];
675
676	switch (insn->op) {
677		case IR_EQ:
678		case IR_NE:
679		case IR_LT:
680		case IR_GE:
681		case IR_LE:
682		case IR_GT:
683		case IR_ULT:
684		case IR_UGE:
685		case IR_ULE:
686		case IR_UGT:
687			if (IR_IS_TYPE_INT(ctx->ir_base[insn->op1].type)) {
688				return IR_CMP_INT;
689			} else {
690				return IR_CMP_FP;
691			}
692			break;
693		case IR_ADD:
694		case IR_SUB:
695			if (IR_IS_TYPE_INT(insn->type)) {
696				if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) {
697					op2_insn = &ctx->ir_base[insn->op2];
698					if (IR_IS_SYM_CONST(op2_insn->op)) {
699						/* pass */
700					} else if (IR_IS_CONST_REF(insn->op1)) {
701						// const
702					} else if (op2_insn->val.i64 == 0) {
703						return IR_COPY_INT;
704					}
705				}
706binop_int:
707				return IR_BINOP_INT;
708			} else {
709binop_fp:
710				return IR_BINOP_FP;
711			}
712			break;
713		case IR_MUL:
714			if (IR_IS_TYPE_INT(insn->type)) {
715				if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) {
716					op2_insn = &ctx->ir_base[insn->op2];
717					if (IR_IS_SYM_CONST(op2_insn->op)) {
718						/* pass */
719					} else if (IR_IS_CONST_REF(insn->op1)) {
720						// const
721					} else if (op2_insn->val.u64 == 0) {
722						// 0
723					} else if (op2_insn->val.u64 == 1) {
724						return IR_COPY_INT;
725					} else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) {
726						return IR_MUL_PWR2;
727					}
728				}
729				return IR_BINOP_INT;
730			} else {
731				goto binop_fp;
732			}
733			break;
734		case IR_ADD_OV:
735		case IR_SUB_OV:
736			IR_ASSERT(IR_IS_TYPE_INT(insn->type));
737			goto binop_int;
738		case IR_MUL_OV:
739			IR_ASSERT(IR_IS_TYPE_INT(insn->type));
740			goto binop_int;
741		case IR_DIV:
742			if (IR_IS_TYPE_INT(insn->type)) {
743				if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) {
744					op2_insn = &ctx->ir_base[insn->op2];
745					if (IR_IS_SYM_CONST(op2_insn->op)) {
746						/* pass */
747					} else if (IR_IS_CONST_REF(insn->op1)) {
748						// const
749					} else if (op2_insn->val.u64 == 1) {
750						return IR_COPY_INT;
751					} else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) {
752						if (IR_IS_TYPE_UNSIGNED(insn->type)) {
753							return IR_DIV_PWR2;
754						} else {
755							return IR_SDIV_PWR2;
756						}
757					}
758				}
759				return IR_BINOP_INT;
760			} else {
761				goto binop_fp;
762			}
763			break;
764		case IR_MOD:
765			if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) {
766				op2_insn = &ctx->ir_base[insn->op2];
767				if (IR_IS_SYM_CONST(op2_insn->op)) {
768					/* pass */
769				} else if (IR_IS_CONST_REF(insn->op1)) {
770					// const
771				} else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) {
772					if (IR_IS_TYPE_UNSIGNED(insn->type)) {
773						return IR_MOD_PWR2;
774					} else {
775						return IR_SMOD_PWR2;
776					}
777				}
778			}
779			return IR_BINOP_INT;
780		case IR_BSWAP:
781		case IR_NOT:
782		case IR_CTLZ:
783		case IR_CTTZ:
784			IR_ASSERT(IR_IS_TYPE_INT(insn->type));
785			return IR_OP_INT;
786		case IR_NEG:
787		case IR_ABS:
788			if (IR_IS_TYPE_INT(insn->type)) {
789				return IR_OP_INT;
790			} else {
791				return IR_OP_FP;
792			}
793		case IR_OR:
794			if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) {
795				op2_insn = &ctx->ir_base[insn->op2];
796				if (IR_IS_SYM_CONST(op2_insn->op)) {
797					/* pass */
798				} else if (IR_IS_CONST_REF(insn->op1)) {
799					// const
800				} else if (op2_insn->val.i64 == 0) {
801					return IR_COPY_INT;
802				} else if (op2_insn->val.i64 == -1) {
803					// -1
804				}
805			}
806			goto binop_int;
807		case IR_AND:
808			if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) {
809				op2_insn = &ctx->ir_base[insn->op2];
810				if (IR_IS_SYM_CONST(op2_insn->op)) {
811					/* pass */
812				} else if (IR_IS_CONST_REF(insn->op1)) {
813					// const
814				} else if (op2_insn->val.i64 == 0) {
815					// 0
816				} else if (op2_insn->val.i64 == -1) {
817					return IR_COPY_INT;
818				}
819			}
820			goto binop_int;
821		case IR_XOR:
822			if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) {
823				op2_insn = &ctx->ir_base[insn->op2];
824				if (IR_IS_SYM_CONST(op2_insn->op)) {
825					/* pass */
826				} else if (IR_IS_CONST_REF(insn->op1)) {
827					// const
828				}
829			}
830			goto binop_int;
831		case IR_SHL:
832			if (IR_IS_CONST_REF(insn->op2)) {
833				if (ctx->flags & IR_OPT_CODEGEN) {
834					op2_insn = &ctx->ir_base[insn->op2];
835					if (IR_IS_SYM_CONST(op2_insn->op)) {
836						/* pass */
837					} else if (IR_IS_CONST_REF(insn->op1)) {
838						// const
839					} else if (op2_insn->val.u64 == 0) {
840						return IR_COPY_INT;
841					} else if (ir_type_size[insn->type] >= 4) {
842						if (op2_insn->val.u64 == 1) {
843							// lea [op1*2]
844						} else if (op2_insn->val.u64 == 2) {
845							// lea [op1*4]
846						} else if (op2_insn->val.u64 == 3) {
847							// lea [op1*8]
848						}
849					}
850				}
851				return IR_SHIFT_CONST;
852			}
853			return IR_SHIFT;
854		case IR_SHR:
855		case IR_SAR:
856		case IR_ROL:
857		case IR_ROR:
858			if (IR_IS_CONST_REF(insn->op2)) {
859				if (ctx->flags & IR_OPT_CODEGEN) {
860					op2_insn = &ctx->ir_base[insn->op2];
861					if (IR_IS_SYM_CONST(op2_insn->op)) {
862						/* pass */
863					} else if (IR_IS_CONST_REF(insn->op1)) {
864						// const
865					} else if (op2_insn->val.u64 == 0) {
866						return IR_COPY_INT;
867					}
868				}
869				return IR_SHIFT_CONST;
870			}
871			return IR_SHIFT;
872		case IR_MIN:
873		case IR_MAX:
874			if (IR_IS_TYPE_INT(insn->type)) {
875				return IR_MIN_MAX_INT;
876			} else {
877				goto binop_fp;
878			}
879			break;
880//		case IR_COND:
881		case IR_COPY:
882			if (IR_IS_TYPE_INT(insn->type)) {
883				return IR_COPY_INT;
884			} else {
885				return IR_COPY_FP;
886			}
887			break;
888		case IR_CALL:
889			ctx->flags2 |= IR_HAS_CALLS;
890			return IR_CALL;
891		case IR_VAR:
892			return IR_SKIPPED | IR_VAR;
893		case IR_PARAM:
894			return ctx->use_lists[ref].count > 0 ? IR_PARAM : IR_SKIPPED | IR_PARAM;
895		case IR_ALLOCA:
896			if (ctx->flags & IR_FUNCTION) {
897				ctx->flags |= IR_USE_FRAME_POINTER;
898				ctx->flags2 |= IR_HAS_ALLOCA;
899			}
900			return IR_ALLOCA;
901		case IR_LOAD:
902			ir_match_fuse_addr(ctx, insn->op2, insn->type);
903			if (IR_IS_TYPE_INT(insn->type)) {
904				return IR_LOAD_INT;
905			} else {
906				return IR_LOAD_FP;
907			}
908			break;
909		case IR_STORE:
910			ir_match_fuse_addr(ctx, insn->op2, ctx->ir_base[insn->op3].type);
911			if (IR_IS_TYPE_INT(ctx->ir_base[insn->op3].type)) {
912				return IR_STORE_INT;
913			} else {
914				return IR_STORE_FP;
915			}
916			break;
917		case IR_RLOAD:
918			if (IR_REGSET_IN(IR_REGSET_UNION((ir_regset)ctx->fixed_regset, IR_REGSET_FIXED), insn->op2)) {
919				return IR_SKIPPED | IR_RLOAD;
920			}
921			return IR_RLOAD;
922		case IR_RSTORE:
923			if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) {
924				if ((ctx->flags & IR_OPT_CODEGEN) && ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) {
925					ir_insn *op_insn = &ctx->ir_base[insn->op2];
926
927					if (!ctx->rules[insn->op2]) {
928						ctx->rules[insn->op2] = ir_match_insn(ctx, insn->op2);
929					}
930					if (ctx->rules[insn->op2] == IR_BINOP_INT) {
931						if (ctx->ir_base[op_insn->op1].op == IR_RLOAD
932						 && ctx->ir_base[op_insn->op1].op2 == insn->op3) {
933							ctx->rules[insn->op2] = IR_FUSED | IR_BINOP_INT;
934							ctx->rules[op_insn->op1] = IR_SKIPPED | IR_RLOAD;
935							return IR_REG_BINOP_INT;
936						} else if ((ir_op_flags[op_insn->op] & IR_OP_FLAG_COMMUTATIVE)
937						 && ctx->ir_base[op_insn->op2].op == IR_RLOAD
938						 && ctx->ir_base[op_insn->op2].op2 == insn->op3) {
939							ir_ref tmp = op_insn->op1;
940							op_insn->op1 = op_insn->op2;
941							op_insn->op2 = tmp;
942							ctx->rules[insn->op2] = IR_FUSED | IR_BINOP_INT;
943							ctx->rules[op_insn->op1] = IR_SKIPPED | IR_RLOAD;
944							return IR_REG_BINOP_INT;
945						}
946					}
947				}
948			}
949			return IR_RSTORE;
950		case IR_START:
951		case IR_BEGIN:
952		case IR_IF_TRUE:
953		case IR_IF_FALSE:
954		case IR_CASE_VAL:
955		case IR_CASE_DEFAULT:
956		case IR_MERGE:
957		case IR_LOOP_BEGIN:
958		case IR_UNREACHABLE:
959			return IR_SKIPPED | insn->op;
960		case IR_RETURN:
961			if (!insn->op2) {
962				return IR_RETURN_VOID;
963			} else if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) {
964				return IR_RETURN_INT;
965			} else {
966				return IR_RETURN_FP;
967			}
968		case IR_IF:
969			if (ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) {
970				op2_insn = &ctx->ir_base[insn->op2];
971				if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT) {
972					if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) {
973						ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT;
974						return IR_CMP_AND_BRANCH_INT;
975					} else {
976						ctx->rules[insn->op2] = IR_FUSED | IR_CMP_FP;
977						return IR_CMP_AND_BRANCH_FP;
978					}
979				} else if (op2_insn->op == IR_OVERFLOW) {
980					ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_OVERFLOW;
981					return IR_OVERFLOW_AND_BRANCH;
982				}
983			}
984			if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) {
985				return IR_IF_INT;
986			} else {
987				IR_ASSERT(0 && "NIY IR_IF_FP");
988				break;
989			}
990		case IR_GUARD:
991		case IR_GUARD_NOT:
992			if (ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) {
993				op2_insn = &ctx->ir_base[insn->op2];
994				if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT
995					// TODO: register allocator may clobber operands of CMP before they are used in the GUARD_CMP
996				 && (insn->op2 == ref - 1 ||
997				     (insn->op2 == ctx->prev_ref[ref] - 1
998				   && ctx->ir_base[ctx->prev_ref[ref]].op == IR_SNAPSHOT))) {
999					if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) {
1000						ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT;
1001						return IR_GUARD_CMP_INT;
1002					} else {
1003						ctx->rules[insn->op2] = IR_FUSED | IR_CMP_FP;
1004						return IR_GUARD_CMP_FP;
1005					}
1006				} else if (op2_insn->op == IR_OVERFLOW) {
1007					ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_OVERFLOW;
1008					return IR_GUARD_OVERFLOW;
1009				}
1010			}
1011			return insn->op;
1012		case IR_VA_START:
1013			ctx->flags2 |= IR_HAS_VA_START;
1014			if (ctx->ir_base[insn->op2].op == IR_ALLOCA) {
1015				ir_use_list *use_list = &ctx->use_lists[insn->op2];
1016				ir_ref *p, n = use_list->count;
1017				for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
1018					ir_insn *use_insn = &ctx->ir_base[*p];
1019					if (use_insn->op == IR_VA_START || use_insn->op == IR_VA_END) {
1020					} else if (use_insn->op == IR_VA_COPY) {
1021						if (use_insn->op3 == insn->op2) {
1022							ctx->flags2 |= IR_HAS_VA_COPY;
1023						}
1024					} else if (use_insn->op == IR_VA_ARG) {
1025						if (use_insn->op2 == insn->op2) {
1026							if (IR_IS_TYPE_INT(use_insn->type)) {
1027								ctx->flags2 |= IR_HAS_VA_ARG_GP;
1028							} else {
1029								IR_ASSERT(IR_IS_TYPE_FP(use_insn->type));
1030								ctx->flags2 |= IR_HAS_VA_ARG_FP;
1031							}
1032						}
1033					} else if (*p > ref) {
1034						/* diriect va_list access */
1035						ctx->flags2 |= IR_HAS_VA_ARG_GP|IR_HAS_VA_ARG_FP;
1036					}
1037				}
1038			}
1039			return IR_VA_START;
1040		case IR_VA_END:
1041			return IR_SKIPPED | IR_NOP;
1042		case IR_VADDR:
1043			if (ctx->use_lists[ref].count > 0) {
1044				ir_use_list *use_list = &ctx->use_lists[ref];
1045				ir_ref *p, n = use_list->count;
1046
1047				for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
1048					if (ctx->ir_base[*p].op != IR_VA_END) {
1049						return IR_VADDR;
1050					}
1051				}
1052			}
1053			return IR_SKIPPED | IR_NOP;
1054		default:
1055			break;
1056	}
1057
1058	return insn->op;
1059}
1060
1061static void ir_match_insn2(ir_ctx *ctx, ir_ref ref, uint32_t rule)
1062{
1063}
1064
1065/* code generation */
1066static int32_t ir_ref_spill_slot_offset(ir_ctx *ctx, ir_ref ref, ir_reg *reg)
1067{
1068	int32_t offset;
1069
1070	IR_ASSERT(ref >= 0);
1071	offset = ctx->live_intervals[ctx->vregs[ref]]->stack_spill_pos;
1072	IR_ASSERT(offset != -1);
1073	if (ctx->live_intervals[ctx->vregs[ref]]->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) {
1074		IR_ASSERT(ctx->spill_base != IR_REG_NONE);
1075		*reg = ctx->spill_base;
1076		return offset;
1077	}
1078	*reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
1079	return IR_SPILL_POS_TO_OFFSET(offset);
1080}
1081
1082static ir_mem ir_vreg_spill_slot(ir_ctx *ctx, ir_ref v)
1083{
1084	int32_t offset;
1085	ir_reg base;
1086
1087	IR_ASSERT(v > 0 && v <= ctx->vregs_count && ctx->live_intervals[v]);
1088	offset = ctx->live_intervals[v]->stack_spill_pos;
1089	IR_ASSERT(offset != -1);
1090	if (ctx->live_intervals[v]->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) {
1091		IR_ASSERT(ctx->spill_base != IR_REG_NONE);
1092		return IR_MEM_BO(ctx->spill_base, offset);
1093	}
1094	base = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
1095	offset = IR_SPILL_POS_TO_OFFSET(offset);
1096	return IR_MEM_BO(base, offset);
1097}
1098
1099static ir_mem ir_ref_spill_slot(ir_ctx *ctx, ir_ref ref)
1100{
1101	IR_ASSERT(!IR_IS_CONST_REF(ref));
1102	return ir_vreg_spill_slot(ctx, ctx->vregs[ref]);
1103}
1104
1105static bool ir_is_same_spill_slot(ir_ctx *ctx, ir_ref ref, ir_mem mem)
1106{
1107	return IR_MEM_VAL(ir_ref_spill_slot(ctx, ref)) == IR_MEM_VAL(mem);
1108}
1109
1110static int32_t ir_var_spill_slot(ir_ctx *ctx, ir_ref ref, ir_reg *reg)
1111{
1112	ir_insn *var_insn = &ctx->ir_base[ref];
1113
1114	IR_ASSERT(var_insn->op == IR_VAR);
1115	*reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
1116	return IR_SPILL_POS_TO_OFFSET(var_insn->op3);
1117}
1118
1119static bool ir_may_avoid_spill_load(ir_ctx *ctx, ir_ref ref, ir_ref use)
1120{
1121	ir_live_interval *ival;
1122
1123	IR_ASSERT(ctx->vregs[ref] && ctx->live_intervals[ctx->vregs[ref]]);
1124	ival = ctx->live_intervals[ctx->vregs[ref]];
1125	while (ival) {
1126		ir_use_pos *use_pos = ival->use_pos;
1127		while (use_pos) {
1128			if (IR_LIVE_POS_TO_REF(use_pos->pos) == use) {
1129				return !use_pos->next || use_pos->next->op_num == 0;
1130			}
1131			use_pos = use_pos->next;
1132		}
1133		ival = ival->next;
1134	}
1135	return 0;
1136}
1137
1138static void ir_emit_load_imm_int(ir_ctx *ctx, ir_type type, ir_reg reg, int64_t val)
1139{
1140	ir_backend_data *data = ctx->data;
1141	dasm_State **Dst = &data->dasm_state;
1142
1143	IR_ASSERT(IR_IS_TYPE_INT(type));
1144	if (ir_type_size[type] == 8) {
1145		if (val == 0) {
1146			if (reg != IR_REG_ZR) {
1147				|	mov Rx(reg), xzr
1148			}
1149		} else if (((uint64_t)(val)) <= 0xffff) {
1150			|	movz Rx(reg), #((uint64_t)(val))
1151		} else if (~((uint64_t)(val)) <= 0xffff) {
1152			|	movn Rx(reg), #(~((uint64_t)(val)))
1153		} else if ((uint64_t)(val) & 0xffff) {
1154			|	movz Rx(reg), #((uint64_t)(val) & 0xffff)
1155			if (((uint64_t)(val) >> 16) & 0xffff) {
1156				|	movk Rx(reg), #(((uint64_t)(val) >> 16) & 0xffff), lsl #16
1157			}
1158			if (((uint64_t)(val) >> 32) & 0xffff) {
1159				|	movk Rx(reg), #(((uint64_t)(val) >> 32) & 0xffff), lsl #32
1160			}
1161			if ((((uint64_t)(val) >> 48) & 0xffff)) {
1162				|	movk Rx(reg), #(((uint64_t)(val) >> 48) & 0xffff), lsl #48
1163			}
1164		} else if (((uint64_t)(val) >> 16) & 0xffff) {
1165			|	movz Rx(reg), #(((uint64_t)(val) >> 16) & 0xffff), lsl #16
1166			if (((uint64_t)(val) >> 32) & 0xffff) {
1167				|	movk Rx(reg), #(((uint64_t)(val) >> 32) & 0xffff), lsl #32
1168			}
1169			if ((((uint64_t)(val) >> 48) & 0xffff)) {
1170				|	movk Rx(reg), #(((uint64_t)(val) >> 48) & 0xffff), lsl #48
1171			}
1172		} else if (((uint64_t)(val) >> 32) & 0xffff) {
1173				|	movz Rx(reg), #(((uint64_t)(val) >> 32) & 0xffff), lsl #32
1174			if ((((uint64_t)(val) >> 48) & 0xffff)) {
1175				|	movk Rx(reg), #(((uint64_t)(val) >> 48) & 0xffff), lsl #48
1176			}
1177		} else {
1178			|	movz Rx(reg), #(((uint64_t)(val) >> 48) & 0xffff), lsl #48
1179		}
1180	} else {
1181		if (val == 0) {
1182			if (reg != IR_REG_ZR) {
1183				|	mov Rw(reg), wzr
1184			}
1185		} else if (((uint64_t)(val)) <= 0xffff) {
1186			|	movz Rw(reg), #((uint64_t)(val))
1187		} else if (~((uint64_t)(val)) <= 0xffff) {
1188			|	movn Rw(reg), #(~((uint64_t)(val)))
1189		} else if ((uint64_t)(val) & 0xffff) {
1190			|	movz Rw(reg), #((uint64_t)(val) & 0xffff)
1191			if (((uint64_t)(val) >> 16) & 0xffff) {
1192				|	movk Rw(reg), #(((uint64_t)(val) >> 16) & 0xffff), lsl #16
1193			}
1194		} else if (((uint64_t)(val) >> 16) & 0xffff) {
1195			|	movz Rw(reg), #(((uint64_t)(val) >> 16) & 0xffff), lsl #16
1196		}
1197	}
1198}
1199
1200static void ir_emit_load_mem_int(ir_ctx *ctx, ir_type type, ir_reg reg, ir_mem mem)
1201{
1202	ir_backend_data *data = ctx->data;
1203	dasm_State **Dst = &data->dasm_state;
1204	ir_reg base_reg = IR_MEM_BASE(mem);
1205	ir_reg index_reg = IR_MEM_INDEX(mem);
1206	int32_t offset = IR_MEM_OFFSET(mem);
1207
1208	if (index_reg == IR_REG_NONE) {
1209		if (aarch64_may_encode_addr_offset(offset, ir_type_size[type])) {
1210			switch (ir_type_size[type]) {
1211				default:
1212					IR_ASSERT(0);
1213				case 8:
1214					|	ldr Rx(reg), [Rx(base_reg), #offset]
1215					break;
1216				case 4:
1217					|	ldr Rw(reg), [Rx(base_reg), #offset]
1218					break;
1219				case 2:
1220					if (IR_IS_TYPE_SIGNED(type)) {
1221						|	ldrsh Rw(reg), [Rx(base_reg), #offset]
1222					} else {
1223						|	ldrh Rw(reg), [Rx(base_reg), #offset]
1224					}
1225					break;
1226				case 1:
1227					if (IR_IS_TYPE_SIGNED(type)) {
1228						|	ldrsb Rw(reg), [Rx(base_reg), #offset]
1229					} else {
1230						|	ldrb Rw(reg), [Rx(base_reg), #offset]
1231					}
1232					break;
1233			}
1234			return;
1235		} else {
1236			index_reg = IR_REG_INT_TMP; /* reserved temporary register */
1237
1238			ir_emit_load_imm_int(ctx, IR_ADDR, index_reg, offset);
1239		}
1240	} else {
1241		IR_ASSERT(offset == 0);
1242	}
1243
1244	switch (ir_type_size[type]) {
1245		default:
1246			IR_ASSERT(0);
1247		case 8:
1248			|	ldr Rx(reg), [Rx(base_reg), Rx(index_reg)]
1249			break;
1250		case 4:
1251			|	ldr Rw(reg), [Rx(base_reg), Rx(index_reg)]
1252			break;
1253		case 2:
1254			if (IR_IS_TYPE_SIGNED(type)) {
1255				|	ldrsh Rw(reg), [Rx(base_reg), Rx(index_reg)]
1256			} else {
1257				|	ldrh Rw(reg), [Rx(base_reg), Rx(index_reg)]
1258			}
1259			break;
1260		case 1:
1261			if (IR_IS_TYPE_SIGNED(type)) {
1262				|	ldrsb Rw(reg), [Rx(base_reg), Rx(index_reg)]
1263			} else {
1264				|	ldrb Rw(reg), [Rx(base_reg), Rx(index_reg)]
1265			}
1266			break;
1267	}
1268}
1269
1270static void ir_emit_load_imm_fp(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src)
1271{
1272	ir_backend_data *data = ctx->data;
1273	dasm_State **Dst = &data->dasm_state;
1274	ir_insn *insn = &ctx->ir_base[src];
1275	int label;
1276
1277	if (type == IR_FLOAT && insn->val.u32 == 0) {
1278		|	fmov Rs(reg-IR_REG_FP_FIRST), wzr
1279	} else if (type == IR_DOUBLE && insn->val.u64 == 0) {
1280		|	fmov Rd(reg-IR_REG_FP_FIRST), xzr
1281	} else {
1282		label = ir_const_label(ctx, src);
1283		if (type == IR_DOUBLE) {
1284			|	ldr Rd(reg-IR_REG_FP_FIRST), =>label
1285		} else {
1286			IR_ASSERT(type == IR_FLOAT);
1287			|	ldr Rs(reg-IR_REG_FP_FIRST), =>label
1288		}
1289	}
1290}
1291
1292static void ir_emit_load_mem_fp(ir_ctx *ctx, ir_type type, ir_reg reg, ir_mem mem)
1293{
1294	ir_backend_data *data = ctx->data;
1295	dasm_State **Dst = &data->dasm_state;
1296	ir_reg base_reg = IR_MEM_BASE(mem);
1297	ir_ref index_reg = IR_MEM_INDEX(mem);
1298	int32_t offset = IR_MEM_OFFSET(mem);
1299
1300	if (index_reg == IR_REG_NONE) {
1301		if (aarch64_may_encode_addr_offset(offset, ir_type_size[type])) {
1302			if (type == IR_DOUBLE) {
1303				|	ldr Rd(reg-IR_REG_FP_FIRST), [Rx(base_reg), #offset]
1304			} else {
1305				IR_ASSERT(type == IR_FLOAT);
1306				|	ldr Rs(reg-IR_REG_FP_FIRST), [Rx(base_reg), #offset]
1307			}
1308		} else {
1309			index_reg = IR_REG_INT_TMP; /* reserved temporary register */
1310
1311			ir_emit_load_imm_int(ctx, IR_ADDR, index_reg, offset);
1312		}
1313		return;
1314	} else {
1315		IR_ASSERT(offset == 0);
1316	}
1317
1318	if (type == IR_DOUBLE) {
1319		|	ldr Rd(reg-IR_REG_FP_FIRST), [Rx(base_reg), Rx(index_reg)]
1320	} else {
1321		IR_ASSERT(type == IR_FLOAT);
1322		|	ldr Rs(reg-IR_REG_FP_FIRST), [Rx(base_reg), Rx(index_reg)]
1323	}
1324}
1325
1326static void ir_emit_load_mem(ir_ctx *ctx, ir_type type, ir_reg reg, ir_mem mem)
1327{
1328	if (IR_IS_TYPE_INT(type)) {
1329		ir_emit_load_mem_int(ctx, type, reg, mem);
1330	} else {
1331		ir_emit_load_mem_fp(ctx, type, reg, mem);
1332	}
1333}
1334
1335static void ir_emit_load(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src)
1336{
1337	if (IR_IS_CONST_REF(src)) {
1338		if (IR_IS_TYPE_INT(type)) {
1339			ir_insn *insn = &ctx->ir_base[src];
1340
1341			if (insn->op == IR_SYM || insn->op == IR_FUNC) {
1342				void *addr = ir_sym_val(ctx, insn);
1343				IR_ASSERT(addr);
1344				ir_emit_load_imm_int(ctx, type, reg, (intptr_t)addr);
1345			} else if (insn->op == IR_STR) {
1346				ir_backend_data *data = ctx->data;
1347				dasm_State **Dst = &data->dasm_state;
1348				int label = ir_const_label(ctx, src);
1349
1350				|	adr Rx(reg), =>label
1351			} else {
1352				ir_emit_load_imm_int(ctx, type, reg, insn->val.i64);
1353			}
1354		} else {
1355			ir_emit_load_imm_fp(ctx, type, reg, src);
1356		}
1357	} else {
1358		ir_mem mem = ir_ref_spill_slot(ctx, src);
1359		ir_emit_load_mem(ctx, type, reg, mem);
1360	}
1361}
1362
1363static void ir_emit_store_mem_int(ir_ctx *ctx, ir_type type, ir_mem mem, ir_reg reg)
1364{
1365	ir_backend_data *data = ctx->data;
1366	dasm_State **Dst = &data->dasm_state;
1367	ir_reg base_reg = IR_MEM_BASE(mem);
1368	ir_reg index_reg = IR_MEM_INDEX(mem);
1369	int32_t offset = IR_MEM_OFFSET(mem);
1370
1371	if (index_reg == IR_REG_NONE) {
1372		if (aarch64_may_encode_addr_offset(offset, ir_type_size[type])) {
1373			switch (ir_type_size[type]) {
1374				default:
1375					IR_ASSERT(0);
1376				case 8:
1377					|	str Rx(reg), [Rx(base_reg), #offset]
1378					break;
1379				case 4:
1380					|	str Rw(reg), [Rx(base_reg), #offset]
1381					break;
1382				case 2:
1383					|	strh Rw(reg), [Rx(base_reg), #offset]
1384					break;
1385				case 1:
1386					|	strb Rw(reg), [Rx(base_reg), #offset]
1387					break;
1388			}
1389			return;
1390		} else {
1391			index_reg = IR_REG_INT_TMP; /* reserved temporary register */
1392
1393			ir_emit_load_imm_int(ctx, IR_ADDR, index_reg, offset);
1394	    }
1395	} else {
1396		IR_ASSERT(offset == 0);
1397	}
1398
1399	switch (ir_type_size[type]) {
1400		default:
1401			IR_ASSERT(0);
1402		case 8:
1403			|	str Rx(reg), [Rx(base_reg), Rx(index_reg)]
1404			break;
1405		case 4:
1406			|	str Rw(reg), [Rx(base_reg), Rx(index_reg)]
1407			break;
1408		case 2:
1409			|	strh Rw(reg), [Rx(base_reg), Rx(index_reg)]
1410			break;
1411		case 1:
1412			|	strb Rw(reg), [Rx(base_reg), Rx(index_reg)]
1413			break;
1414	}
1415}
1416
1417static void ir_emit_store_mem_fp(ir_ctx *ctx, ir_type type, ir_mem mem, ir_reg reg)
1418{
1419	ir_backend_data *data = ctx->data;
1420	dasm_State **Dst = &data->dasm_state;
1421	ir_reg base_reg = IR_MEM_BASE(mem);
1422	ir_reg index_reg = IR_MEM_INDEX(mem);
1423	int32_t offset = IR_MEM_OFFSET(mem);
1424
1425	if (index_reg == IR_REG_NONE) {
1426		if (aarch64_may_encode_addr_offset(offset, ir_type_size[type])) {
1427			if (type == IR_DOUBLE) {
1428				|	str Rd(reg-IR_REG_FP_FIRST), [Rx(base_reg), #offset]
1429			} else {
1430				IR_ASSERT(type == IR_FLOAT);
1431				|	str Rs(reg-IR_REG_FP_FIRST), [Rx(base_reg), #offset]
1432			}
1433		} else {
1434			index_reg = IR_REG_INT_TMP; /* reserved temporary register */
1435
1436			ir_emit_load_imm_int(ctx, IR_ADDR, index_reg, offset);
1437		}
1438		return;
1439	} else {
1440		IR_ASSERT(offset == 0);
1441	}
1442
1443	if (type == IR_DOUBLE) {
1444		|	str Rd(reg-IR_REG_FP_FIRST), [Rx(base_reg), Rx(index_reg)]
1445	} else {
1446		IR_ASSERT(type == IR_FLOAT);
1447		|	str Rs(reg-IR_REG_FP_FIRST), [Rx(base_reg), Rx(index_reg)]
1448	}
1449}
1450
1451static void ir_emit_store_mem(ir_ctx *ctx, ir_type type, ir_mem mem, ir_reg reg)
1452{
1453	if (IR_IS_TYPE_INT(type)) {
1454		ir_emit_store_mem_int(ctx, type, mem, reg);
1455	} else {
1456		ir_emit_store_mem_fp(ctx, type, mem, reg);
1457	}
1458}
1459
1460static void ir_emit_store(ir_ctx *ctx, ir_type type, ir_ref dst, ir_reg reg)
1461{
1462	IR_ASSERT(dst >= 0);
1463	ir_emit_store_mem(ctx, type, ir_ref_spill_slot(ctx, dst), reg);
1464}
1465
1466static void ir_emit_mov(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src)
1467{
1468	ir_backend_data *data = ctx->data;
1469	dasm_State **Dst = &data->dasm_state;
1470
1471	if (ir_type_size[type] == 8) {
1472		if (dst == IR_REG_STACK_POINTER) {
1473			|	mov sp, Rx(src)
1474		} else if (src == IR_REG_STACK_POINTER) {
1475			|	mov Rx(dst), sp
1476		} else {
1477			|	mov Rx(dst), Rx(src)
1478		}
1479	} else {
1480		|	mov Rw(dst), Rw(src)
1481	}
1482}
1483
1484static void ir_emit_mov_ext(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src)
1485{
1486	ir_backend_data *data = ctx->data;
1487	dasm_State **Dst = &data->dasm_state;
1488
1489	if (ir_type_size[type] == 8) {
1490		|	mov Rx(dst), Rx(src)
1491	} else {
1492		|	mov Rw(dst), Rw(src)
1493	}
1494}
1495static void ir_emit_fp_mov(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src)
1496{
1497	ir_backend_data *data = ctx->data;
1498	dasm_State **Dst = &data->dasm_state;
1499
1500	if (ir_type_size[type] == 8) {
1501		|	fmov Rd(dst-IR_REG_FP_FIRST), Rd(src-IR_REG_FP_FIRST)
1502	} else {
1503		|	fmov Rs(dst-IR_REG_FP_FIRST), Rs(src-IR_REG_FP_FIRST)
1504	}
1505}
1506
1507static void ir_emit_prologue(ir_ctx *ctx)
1508{
1509	ir_backend_data *data = ctx->data;
1510	dasm_State **Dst = &data->dasm_state;
1511	int offset;
1512
1513	if (ctx->flags & IR_USE_FRAME_POINTER) {
1514		offset = -(ctx->stack_frame_size+16);
1515		if (aarch64_may_encode_imm7_addr_offset(offset, 8)) {
1516			|	stp x29, x30, [sp, #offset]!
1517		} else {
1518			|	sub sp, sp, #(ctx->stack_frame_size+16)
1519			|	stp x29, x30, [sp]
1520		}
1521		|	mov x29, sp
1522		if (ctx->call_stack_size) {
1523			| sub sp, sp, #(ctx->call_stack_size)
1524		}
1525	} else if (ctx->stack_frame_size + ctx->call_stack_size) {
1526		if (ctx->fixed_stack_red_zone) {
1527			IR_ASSERT(ctx->stack_frame_size + ctx->call_stack_size <= ctx->fixed_stack_red_zone);
1528		} else {
1529			| sub sp, sp, #(ctx->stack_frame_size + ctx->call_stack_size)
1530		}
1531	}
1532	if (ctx->used_preserved_regs) {
1533	    ir_reg fp;
1534		uint32_t i;
1535		ir_reg prev = IR_REG_NONE;
1536		ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs;
1537
1538		if (ctx->flags & IR_USE_FRAME_POINTER) {
1539			fp = IR_REG_FRAME_POINTER;
1540			offset = ctx->stack_frame_size + sizeof(void*) * 2;
1541		} else {
1542			fp = IR_REG_STACK_POINTER;
1543			offset = ctx->stack_frame_size + ctx->call_stack_size;
1544		}
1545		for (i = 0; i < IR_REG_NUM; i++) {
1546			if (IR_REGSET_IN(used_preserved_regs, i)) {
1547			    if (prev == IR_REG_NONE) {
1548					prev = i;
1549				} else if (i < IR_REG_FP_FIRST) {
1550					offset -= sizeof(void*) * 2;
1551					if (aarch64_may_encode_imm7_addr_offset(offset, 8)) {
1552						|	stp Rx(prev), Rx(i), [Rx(fp), #offset]
1553					} else {
1554						IR_ASSERT(aarch64_may_encode_addr_offset(offset, 8));
1555						|	str Rx(prev), [Rx(fp), #offset]
1556						|	str Rx(i), [Rx(fp), #(offset+8)]
1557					}
1558					prev = IR_REG_NONE;
1559				} else {
1560					if (prev < IR_REG_FP_FIRST) {
1561						offset -= sizeof(void*);
1562						|	str Rx(prev), [Rx(fp), #offset]
1563						offset -= sizeof(void*);
1564						|	str Rd(i-IR_REG_FP_FIRST), [Rx(fp), #offset]
1565					} else {
1566						offset -= sizeof(void*) * 2;
1567						if (aarch64_may_encode_imm7_addr_offset(offset, 8)) {
1568							|	stp Rd(prev-IR_REG_FP_FIRST), Rd(i-IR_REG_FP_FIRST), [Rx(fp), #offset]
1569						} else {
1570							IR_ASSERT(aarch64_may_encode_addr_offset(offset, 8));
1571							|	str Rd(prev-IR_REG_FP_FIRST), [Rx(fp), #offset]
1572							|	str Rd(i-IR_REG_FP_FIRST), [Rx(fp), #(offset+8)]
1573						}
1574					}
1575					prev = IR_REG_NONE;
1576				}
1577			}
1578		}
1579	    if (prev != IR_REG_NONE) {
1580			if (prev < IR_REG_FP_FIRST) {
1581				offset -= sizeof(void*);
1582				|	str Rx(prev), [Rx(fp), #offset]
1583			} else {
1584				offset -= sizeof(void*);
1585				|	str Rd(prev-IR_REG_FP_FIRST), [Rx(fp), #offset]
1586			}
1587		}
1588	}
1589	if ((ctx->flags & IR_VARARG_FUNC) && (ctx->flags2 & IR_HAS_VA_START)) {
1590#ifndef __APPLE__
1591		const int8_t *int_reg_params = _ir_int_reg_params;
1592		const int8_t *fp_reg_params = _ir_fp_reg_params;
1593		ir_reg fp;
1594		int offset;
1595		int i;
1596
1597		if (ctx->flags & IR_USE_FRAME_POINTER) {
1598			fp = IR_REG_FRAME_POINTER;
1599
1600			offset = ctx->locals_area_size + sizeof(void*) * 2;
1601		} else {
1602			fp = IR_REG_STACK_POINTER;
1603			offset = ctx->locals_area_size + ctx->call_stack_size;
1604		}
1605
1606		if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) {
1607			ir_reg prev = IR_REG_NONE;
1608
1609			/* skip named args */
1610			offset += sizeof(void*) * ctx->gp_reg_params;
1611			for (i = ctx->gp_reg_params; i < IR_REG_INT_ARGS; i++) {
1612				if (prev != IR_REG_NONE) {
1613					if (aarch64_may_encode_imm7_addr_offset(offset, 8)) {
1614						|	stp Rx(prev), Rx(int_reg_params[i]), [Rx(fp), #offset]
1615					} else {
1616						IR_ASSERT(aarch64_may_encode_addr_offset(offset, 8));
1617						|	str Rx(prev), [Rx(fp), #offset]
1618						|	str Rx(int_reg_params[i]), [Rx(fp), #(offset+8)]
1619					}
1620					prev = IR_REG_NONE;
1621					offset += sizeof(void*) * 2;
1622				} else {
1623					prev = int_reg_params[i];
1624				}
1625			}
1626			if (prev != IR_REG_NONE) {
1627				|	str Rx(prev), [Rx(fp), #offset]
1628				offset += sizeof(void*);
1629			}
1630		}
1631		if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) {
1632			/* skip named args */
1633			offset += 16 * ctx->fp_reg_params;
1634			for (i = ctx->fp_reg_params; i < IR_REG_FP_ARGS; i++) {
1635				// TODO: Rd->Rq stur->str ???
1636				|	str Rd(fp_reg_params[i]-IR_REG_FP_FIRST), [Rx(fp), #offset]
1637				offset += 16;
1638			}
1639		}
1640#endif
1641	}
1642}
1643
1644static void ir_emit_epilogue(ir_ctx *ctx)
1645{
1646	ir_backend_data *data = ctx->data;
1647	dasm_State **Dst = &data->dasm_state;
1648
1649	if (ctx->used_preserved_regs) {
1650		int offset;
1651		uint32_t i;
1652		ir_reg prev = IR_REG_NONE;
1653		ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
1654		ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs;
1655
1656		if (ctx->flags & IR_USE_FRAME_POINTER) {
1657			offset = ctx->stack_frame_size + sizeof(void*) * 2;
1658		} else {
1659			offset = ctx->stack_frame_size + ctx->call_stack_size;
1660		}
1661		for (i = 0; i < IR_REG_NUM; i++) {
1662			if (IR_REGSET_IN(used_preserved_regs, i)) {
1663			    if (prev == IR_REG_NONE) {
1664					prev = i;
1665				} else if (i < IR_REG_FP_FIRST) {
1666					offset -= sizeof(void*) * 2;
1667					if (aarch64_may_encode_imm7_addr_offset(offset, 8)) {
1668						|	ldp Rx(prev), Rx(i), [Rx(fp), #offset]
1669					} else {
1670						IR_ASSERT(aarch64_may_encode_addr_offset(offset, 8));
1671						|	ldr Rx(prev), [Rx(fp), #offset]
1672						|	ldr Rx(i), [Rx(fp), #(offset+8)]
1673					}
1674					prev = IR_REG_NONE;
1675				} else {
1676					if (prev < IR_REG_FP_FIRST) {
1677						offset -= sizeof(void*);
1678						|	ldr Rx(prev), [Rx(fp), #offset]
1679						offset -= sizeof(void*);
1680						|	ldr Rd(i-IR_REG_FP_FIRST), [Rx(fp), #offset]
1681					} else {
1682						offset -= sizeof(void*) * 2;
1683						if (aarch64_may_encode_imm7_addr_offset(offset, 8)) {
1684							|	ldp Rd(prev-IR_REG_FP_FIRST), Rd(i-IR_REG_FP_FIRST), [Rx(fp), #offset]
1685						} else {
1686							IR_ASSERT(aarch64_may_encode_addr_offset(offset, 8));
1687							|	ldr Rd(prev-IR_REG_FP_FIRST), [Rx(fp), #offset]
1688							|	ldr Rd(i-IR_REG_FP_FIRST), [Rx(fp), #(offset+8)]
1689						}
1690					}
1691					prev = IR_REG_NONE;
1692				}
1693			}
1694		}
1695	    if (prev != IR_REG_NONE) {
1696			if (prev < IR_REG_FP_FIRST) {
1697				offset -= sizeof(void*);
1698				|	ldr Rx(prev), [Rx(fp), #offset]
1699			} else {
1700				offset -= sizeof(void*);
1701				|	ldr Rd(prev-IR_REG_FP_FIRST), [Rx(fp), #offset]
1702			}
1703		}
1704	}
1705
1706	if (ctx->flags & IR_USE_FRAME_POINTER) {
1707		if (ctx->call_stack_size || (ctx->flags2 & IR_HAS_ALLOCA)) {
1708			| mov sp, x29
1709		}
1710		if (aarch64_may_encode_imm7_addr_offset(ctx->stack_frame_size+16, 8)) {
1711			|	ldp x29, x30, [sp], #(ctx->stack_frame_size+16)
1712		} else {
1713			|	ldp x29, x30, [sp]
1714			|	add sp, sp, #(ctx->stack_frame_size+16)
1715		}
1716	} else if (ctx->stack_frame_size + ctx->call_stack_size) {
1717		if (ctx->fixed_stack_red_zone) {
1718			IR_ASSERT(ctx->stack_frame_size + ctx->call_stack_size <= ctx->fixed_stack_red_zone);
1719		} else {
1720			| add sp, sp, #(ctx->stack_frame_size + ctx->call_stack_size)
1721		}
1722	}
1723}
1724
1725static void ir_emit_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
1726{
1727	ir_backend_data *data = ctx->data;
1728	dasm_State **Dst = &data->dasm_state;
1729	ir_type type = insn->type;
1730	ir_ref op1 = insn->op1;
1731	ir_ref op2 = insn->op2;
1732	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
1733	ir_reg op1_reg = ctx->regs[def][1];
1734	ir_reg op2_reg = ctx->regs[def][2];
1735	ir_reg tmp_reg;
1736
1737	IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE);
1738
1739	if (IR_REG_SPILLED(op1_reg)  || IR_IS_CONST_REF(op1)) {
1740		op1_reg = IR_REG_NUM(op1_reg);
1741		ir_emit_load(ctx, type, op1_reg, op1);
1742	}
1743	if (op2_reg != IR_REG_NONE) {
1744		if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) {
1745			op2_reg = IR_REG_NUM(op2_reg);
1746			if (op1 != op2) {
1747				ir_emit_load(ctx, type, op2_reg, op2);
1748			}
1749		}
1750		switch (insn->op) {
1751			default:
1752				IR_ASSERT(0 && "NIY binary op");
1753			case IR_ADD:
1754				|	ASM_REG_REG_REG_OP add, type, def_reg, op1_reg, op2_reg
1755				break;
1756			case IR_ADD_OV:
1757				|	ASM_REG_REG_REG_OP adds, type, def_reg, op1_reg, op2_reg
1758				break;
1759			case IR_SUB:
1760				|	ASM_REG_REG_REG_OP sub, type, def_reg, op1_reg, op2_reg
1761				break;
1762			case IR_SUB_OV:
1763				|	ASM_REG_REG_REG_OP subs, type, def_reg, op1_reg, op2_reg
1764				break;
1765			case IR_MUL:
1766				|	ASM_REG_REG_REG_OP mul, type, def_reg, op1_reg, op2_reg
1767				break;
1768			case IR_MUL_OV:
1769				if (ir_type_size[type] == 8) {
1770					if (IR_IS_TYPE_SIGNED(type)) {
1771						tmp_reg = ctx->regs[def][3];
1772						IR_ASSERT(tmp_reg != IR_REG_NONE);
1773						|	smulh Rx(tmp_reg), Rx(op1_reg), Rx(op2_reg)
1774						|	mul Rx(def_reg), Rx(op1_reg), Rx(op2_reg)
1775						|	cmp Rx(tmp_reg), Rx(def_reg), asr #63
1776					} else {
1777						tmp_reg = ctx->regs[def][3];
1778						IR_ASSERT(tmp_reg != IR_REG_NONE);
1779						|	umulh Rx(tmp_reg), Rx(op1_reg), Rx(op2_reg)
1780						|	mul Rx(def_reg), Rx(op1_reg), Rx(op2_reg)
1781						|	cmp Rx(tmp_reg), xzr
1782					}
1783				} else {
1784					if (IR_IS_TYPE_SIGNED(type)) {
1785						tmp_reg = ctx->regs[def][3];
1786						IR_ASSERT(tmp_reg != IR_REG_NONE);
1787						|	smull Rx(def_reg), Rw(op1_reg), Rw(op2_reg)
1788						|	asr Rx(tmp_reg), Rx(def_reg), #32
1789						|	cmp Rx(tmp_reg), Rx(def_reg), asr #31
1790					} else {
1791						|	umull Rx(def_reg), Rw(op1_reg), Rw(op2_reg)
1792						|	cmp xzr, Rx(def_reg), lsr #32
1793					}
1794				}
1795				break;
1796			case IR_DIV:
1797				if (IR_IS_TYPE_SIGNED(type)) {
1798					|	ASM_REG_REG_REG_OP sdiv, type, def_reg, op1_reg, op2_reg
1799				} else {
1800					|	ASM_REG_REG_REG_OP udiv, type, def_reg, op1_reg, op2_reg
1801				}
1802				break;
1803			case IR_MOD:
1804				tmp_reg = ctx->regs[def][3];
1805				IR_ASSERT(tmp_reg != IR_REG_NONE);
1806				if (IR_IS_TYPE_SIGNED(type)) {
1807					|	ASM_REG_REG_REG_OP sdiv, type, tmp_reg, op1_reg, op2_reg
1808					|	ASM_REG_REG_REG_REG_OP msub, type, def_reg, tmp_reg, op2_reg, op1_reg
1809				} else {
1810					|	ASM_REG_REG_REG_OP udiv, type, tmp_reg, op1_reg, op2_reg
1811					|	ASM_REG_REG_REG_REG_OP msub, type, def_reg, tmp_reg, op2_reg, op1_reg
1812				}
1813				break;
1814			case IR_OR:
1815				|	ASM_REG_REG_REG_OP orr, type, def_reg, op1_reg, op2_reg
1816				break;
1817			case IR_AND:
1818				|	ASM_REG_REG_REG_OP and, type, def_reg, op1_reg, op2_reg
1819				break;
1820			case IR_XOR:
1821				|	ASM_REG_REG_REG_OP eor, type, def_reg, op1_reg, op2_reg
1822				break;
1823		}
1824	} else {
1825		IR_ASSERT(IR_IS_CONST_REF(op2));
1826		IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[op2].op));
1827		int32_t val = ctx->ir_base[op2].val.i32;
1828		switch (insn->op) {
1829			default:
1830				IR_ASSERT(0 && "NIY binary op");
1831			case IR_ADD:
1832				|	ASM_REG_REG_IMM_OP add, type, def_reg, op1_reg, val
1833				break;
1834			case IR_ADD_OV:
1835				|	ASM_REG_REG_IMM_OP adds, type, def_reg, op1_reg, val
1836				break;
1837			case IR_SUB:
1838				|	ASM_REG_REG_IMM_OP sub, type, def_reg, op1_reg, val
1839				break;
1840			case IR_SUB_OV:
1841				|	ASM_REG_REG_IMM_OP subs, type, def_reg, op1_reg, val
1842				break;
1843			case IR_OR:
1844				if (ir_type_size[type] == 8) {
1845					uint64_t val = ctx->ir_base[op2].val.u64;
1846					|	ASM_REG_REG_IMM_OP orr, type, def_reg, op1_reg, val
1847				} else {
1848					|	ASM_REG_REG_IMM_OP orr, type, def_reg, op1_reg, val
1849				}
1850				break;
1851			case IR_AND:
1852				if (ir_type_size[type] == 8) {
1853					uint64_t val = ctx->ir_base[op2].val.u64;
1854					|	ASM_REG_REG_IMM_OP and, type, def_reg, op1_reg, val
1855				} else {
1856					|	ASM_REG_REG_IMM_OP and, type, def_reg, op1_reg, val
1857				}
1858				break;
1859			case IR_XOR:
1860				if (ir_type_size[type] == 8) {
1861					uint64_t val = ctx->ir_base[op2].val.u64;
1862					|	ASM_REG_REG_IMM_OP eor, type, def_reg, op1_reg, val
1863				} else {
1864					|	ASM_REG_REG_IMM_OP eor, type, def_reg, op1_reg, val
1865				}
1866				break;
1867		}
1868	}
1869	if (IR_REG_SPILLED(ctx->regs[def][0])) {
1870		ir_emit_store(ctx, type, def, def_reg);
1871	}
1872}
1873
1874static void ir_emit_min_max_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
1875{
1876	ir_backend_data *data = ctx->data;
1877	dasm_State **Dst = &data->dasm_state;
1878	ir_type type = insn->type;
1879	ir_ref op1 = insn->op1;
1880	ir_ref op2 = insn->op2;
1881	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
1882	ir_reg op1_reg = ctx->regs[def][1];
1883	ir_reg op2_reg = ctx->regs[def][2];
1884
1885	IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE);
1886
1887	if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) {
1888		op1_reg = IR_REG_NUM(op1_reg);
1889		ir_emit_load(ctx, type, op1_reg, op1);
1890	}
1891	if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) {
1892		op2_reg = IR_REG_NUM(op2_reg);
1893		ir_emit_load(ctx, type, op2_reg, op2);
1894	}
1895
1896	if (op1 == op2) {
1897		return;
1898	}
1899
1900	if (ir_type_size[type] == 8) {
1901		|	cmp Rx(op1_reg), Rx(op2_reg)
1902		if (insn->op == IR_MIN) {
1903			if (IR_IS_TYPE_SIGNED(type)) {
1904				|	csel Rx(def_reg), Rx(op1_reg), Rx(op2_reg), le
1905			} else {
1906				|	csel Rx(def_reg), Rx(op1_reg), Rx(op2_reg), ls
1907			}
1908		} else {
1909			IR_ASSERT(insn->op == IR_MAX);
1910			if (IR_IS_TYPE_SIGNED(type)) {
1911				|	csel Rx(def_reg), Rx(op1_reg), Rx(op2_reg), ge
1912			} else {
1913				|	csel Rx(def_reg), Rx(op1_reg), Rx(op2_reg), hs
1914			}
1915		}
1916	} else {
1917		|	cmp Rw(op1_reg), Rw(op2_reg)
1918		if (insn->op == IR_MIN) {
1919			if (IR_IS_TYPE_SIGNED(type)) {
1920				|	csel Rw(def_reg), Rw(op1_reg), Rw(op2_reg), le
1921			} else {
1922				|	csel Rw(def_reg), Rw(op1_reg), Rw(op2_reg), ls
1923			}
1924		} else {
1925			IR_ASSERT(insn->op == IR_MAX);
1926			if (IR_IS_TYPE_SIGNED(type)) {
1927				|	csel Rw(def_reg), Rw(op1_reg), Rw(op2_reg), ge
1928			} else {
1929				|	csel Rw(def_reg), Rw(op1_reg), Rw(op2_reg), hs
1930			}
1931		}
1932	}
1933
1934	if (IR_REG_SPILLED(ctx->regs[def][0])) {
1935		ir_emit_store(ctx, type, def, def_reg);
1936	}
1937}
1938
1939static void ir_emit_overflow(ir_ctx *ctx, ir_ref def, ir_insn *insn)
1940{
1941	ir_backend_data *data = ctx->data;
1942	dasm_State **Dst = &data->dasm_state;
1943	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
1944	ir_insn *math_insn = &ctx->ir_base[insn->op1];
1945	ir_type type = math_insn->type;
1946
1947	IR_ASSERT(def_reg != IR_REG_NONE);
1948	IR_ASSERT(IR_IS_TYPE_INT(type));
1949	if (math_insn->op == IR_MUL_OV) {
1950		|	cset Rw(def_reg), ne
1951	} else if (IR_IS_TYPE_SIGNED(type)) {
1952		|	cset Rw(def_reg), vs
1953	} else {
1954		|	cset Rw(def_reg), cs
1955	}
1956	if (IR_REG_SPILLED(ctx->regs[def][0])) {
1957		ir_emit_store(ctx, insn->type, def, def_reg);
1958	}
1959}
1960
1961static void ir_emit_overflow_and_branch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
1962{
1963	ir_backend_data *data = ctx->data;
1964	dasm_State **Dst = &data->dasm_state;
1965	ir_insn *overflow_insn = &ctx->ir_base[insn->op2];
1966	ir_insn *math_insn = &ctx->ir_base[overflow_insn->op1];
1967	ir_type type = math_insn->type;
1968	uint32_t true_block, false_block, next_block;
1969	bool reverse = 0;
1970
1971	ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block);
1972	if (true_block == next_block) {
1973		reverse = 1;
1974		true_block = false_block;
1975		false_block = 0;
1976	} else if (false_block == next_block) {
1977		false_block = 0;
1978	}
1979
1980	if (math_insn->op == IR_MUL_OV) {
1981		if (reverse) {
1982			|	beq =>true_block
1983		} else {
1984			|	bne =>true_block
1985		}
1986	} else if (IR_IS_TYPE_SIGNED(type)) {
1987		if (reverse) {
1988			|	bvc =>true_block
1989		} else {
1990			|	bvs =>true_block
1991		}
1992	} else {
1993		if (reverse) {
1994			|	bcc =>true_block
1995		} else {
1996			|	bcs =>true_block
1997		}
1998	}
1999	if (false_block) {
2000		|	b =>false_block
2001	}
2002}
2003
2004static void ir_emit_reg_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
2005{
2006	ir_backend_data *data = ctx->data;
2007	dasm_State **Dst = &data->dasm_state;
2008	ir_insn *op_insn = &ctx->ir_base[insn->op2];
2009	ir_type type = op_insn->type;
2010	ir_ref op2 = op_insn->op2;
2011	ir_reg op2_reg = ctx->regs[insn->op2][2];
2012	ir_reg reg;
2013
2014	IR_ASSERT(insn->op == IR_RSTORE);
2015	reg = insn->op3;
2016
2017	if (op2_reg == IR_REG_NONE) {
2018		ir_val *val = &ctx->ir_base[op2].val;
2019
2020		IR_ASSERT(IR_IS_CONST_REF(op2));
2021		IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[op2].op));
2022		switch (op_insn->op) {
2023			default:
2024				IR_ASSERT(0 && "NIY binary op");
2025			case IR_ADD:
2026				|	ASM_REG_REG_IMM_OP add, type, reg, reg, val->i32
2027				break;
2028			case IR_SUB:
2029				|	ASM_REG_REG_IMM_OP sub, type, reg, reg, val->i32
2030				break;
2031			case IR_OR:
2032				|	ASM_REG_REG_IMM_OP orr, type, reg, reg, val->i32
2033				break;
2034			case IR_AND:
2035				|	ASM_REG_REG_IMM_OP and, type, reg, reg, val->i32
2036				break;
2037			case IR_XOR:
2038				|	ASM_REG_REG_IMM_OP eor, type, reg, reg, val->i32
2039				break;
2040		}
2041	} else {
2042		if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) {
2043			op2_reg = IR_REG_NUM(op2_reg);
2044			ir_emit_load(ctx, type, op2_reg, op2);
2045		}
2046		switch (op_insn->op) {
2047			default:
2048				IR_ASSERT(0 && "NIY binary op");
2049			case IR_ADD:
2050				|	ASM_REG_REG_REG_OP add, type, reg, reg, op2_reg
2051				break;
2052			case IR_SUB:
2053				|	ASM_REG_REG_REG_OP sub, type, reg, reg, op2_reg
2054				break;
2055			case IR_MUL:
2056				|	ASM_REG_REG_REG_OP mul, type, reg, reg, op2_reg
2057				break;
2058			case IR_OR:
2059				|	ASM_REG_REG_REG_OP orr, type, reg, reg, op2_reg
2060				break;
2061			case IR_AND:
2062				|	ASM_REG_REG_REG_OP and, type, reg, reg, op2_reg
2063				break;
2064			case IR_XOR:
2065				|	ASM_REG_REG_REG_OP eor, type, reg, reg, op2_reg
2066				break;
2067		}
2068	}
2069}
2070
2071static void ir_emit_mul_div_mod_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn)
2072{
2073	ir_backend_data *data = ctx->data;
2074	dasm_State **Dst = &data->dasm_state;
2075	ir_type type = insn->type;
2076	ir_ref op1 = insn->op1;
2077	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
2078	ir_reg op1_reg = ctx->regs[def][1];
2079
2080	IR_ASSERT(IR_IS_CONST_REF(insn->op2));
2081	IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[insn->op2].op));
2082	IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE);
2083
2084	if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) {
2085		op1_reg = IR_REG_NUM(op1_reg);
2086		ir_emit_load(ctx, type, op1_reg, op1);
2087	}
2088	if (insn->op == IR_MUL) {
2089		uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64);
2090		if (shift == 1) {
2091			|	ASM_REG_REG_REG_OP add, type, def_reg, op1_reg, op1_reg
2092		} else {
2093			|	ASM_REG_REG_IMM_OP lsl, type, def_reg, op1_reg, shift
2094		}
2095	} else if (insn->op == IR_DIV) {
2096		uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64);
2097		|	ASM_REG_REG_IMM_OP lsr, type, def_reg, op1_reg, shift
2098	} else {
2099		IR_ASSERT(insn->op == IR_MOD);
2100		uint64_t mask = ctx->ir_base[insn->op2].val.u64 - 1;
2101		|	ASM_REG_REG_IMM_OP and, type, def_reg, op1_reg, mask
2102	}
2103	if (IR_REG_SPILLED(ctx->regs[def][0])) {
2104		ir_emit_store(ctx, type, def, def_reg);
2105	}
2106}
2107
2108static void ir_emit_sdiv_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn)
2109{
2110	ir_backend_data *data = ctx->data;
2111	dasm_State **Dst = &data->dasm_state;
2112	ir_type type = insn->type;
2113	ir_ref op1 = insn->op1;
2114	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
2115	ir_reg op1_reg = ctx->regs[def][1];
2116	ir_reg op2_reg = ctx->regs[def][2];
2117	uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64);
2118	int64_t offset = ctx->ir_base[insn->op2].val.u64 - 1;
2119
2120	IR_ASSERT(shift != 0);
2121	IR_ASSERT(IR_IS_CONST_REF(insn->op2));
2122	IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[insn->op2].op));
2123	IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE && def_reg != op1_reg);
2124
2125	if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) {
2126		op1_reg = IR_REG_NUM(op1_reg);
2127		ir_emit_load(ctx, type, op1_reg, op1);
2128	}
2129
2130	if (op2_reg != IR_REG_NONE) {
2131		ir_emit_load_imm_int(ctx, type, op2_reg, offset);
2132	}
2133
2134	if (ir_type_size[type] == 8) {
2135		|	cmp Rx(op1_reg), #0
2136		if (op2_reg != IR_REG_NONE) {
2137			|	add Rx(def_reg), Rx(op1_reg), Rx(op2_reg)
2138		} else {
2139			|	add Rx(def_reg), Rx(op1_reg), #offset
2140		}
2141		|	csel Rx(def_reg), Rx(def_reg), Rx(op1_reg), lt
2142		|	asr Rx(def_reg), Rx(def_reg), #shift
2143	} else {
2144		|	cmp Rw(op1_reg), #0
2145		if (op2_reg != IR_REG_NONE) {
2146			|	add Rw(def_reg), Rw(op1_reg), Rw(op2_reg)
2147		} else {
2148			|	add Rw(def_reg), Rw(op1_reg), #offset
2149		}
2150		|	csel Rw(def_reg), Rw(def_reg), Rw(op1_reg), lt
2151		if (ir_type_size[type] == 4) {
2152			|	asr Rw(def_reg), Rw(def_reg), #shift
2153		} else if (ir_type_size[type] == 2) {
2154			|	ubfx Rw(def_reg), Rw(def_reg), #shift, #16
2155		} else {
2156			IR_ASSERT(ir_type_size[type] == 1);
2157			|	ubfx Rw(def_reg), Rw(def_reg), #shift, #8
2158		}
2159	}
2160	if (IR_REG_SPILLED(ctx->regs[def][0])) {
2161		ir_emit_store(ctx, type, def, def_reg);
2162	}
2163}
2164
2165static void ir_emit_smod_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn)
2166{
2167	ir_backend_data *data = ctx->data;
2168	dasm_State **Dst = &data->dasm_state;
2169	ir_type type = insn->type;
2170	ir_ref op1 = insn->op1;
2171	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
2172	ir_reg op1_reg = ctx->regs[def][1];
2173	ir_reg tmp_reg = ctx->regs[def][3];
2174//	uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64);
2175	uint64_t mask = ctx->ir_base[insn->op2].val.u64 - 1;
2176
2177	IR_ASSERT(mask != 0);
2178	IR_ASSERT(IR_IS_CONST_REF(insn->op2));
2179	IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[insn->op2].op));
2180	IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE && def_reg != tmp_reg);
2181
2182	if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) {
2183		op1_reg = IR_REG_NUM(op1_reg);
2184		ir_emit_load(ctx, type, op1_reg, op1);
2185	}
2186	if (def_reg != op1_reg) {
2187		if (op1_reg != IR_REG_NONE) {
2188			ir_emit_mov(ctx, type, def_reg, op1_reg);
2189		} else {
2190			ir_emit_load(ctx, type, def_reg, op1);
2191		}
2192	}
2193
2194//	|	ASM_REG_REG_IMM_OP asr, type, tmp_reg, def_reg, (ir_type_size[type]*8-1)
2195//	|	ASM_REG_REG_IMM_OP lsr, type, tmp_reg, tmp_reg, (ir_type_size[type]*8-shift)
2196//	|	ASM_REG_REG_REG_OP add, type, def_reg, def_reg, tmp_reg
2197//	|	ASM_REG_REG_IMM_OP and, type, def_reg, def_reg, mask
2198//	|	ASM_REG_REG_REG_OP sub, type, def_reg, def_reg, tmp_reg
2199
2200	|	ASM_REG_REG_OP negs, type, tmp_reg, def_reg
2201	|	ASM_REG_REG_IMM_OP and, type, def_reg, def_reg, mask
2202	|	ASM_REG_REG_IMM_OP and, type, tmp_reg, tmp_reg, mask
2203	|	ASM_REG_REG_REG_TXT_OP csneg, type, def_reg, def_reg, tmp_reg, mi
2204
2205	if (IR_REG_SPILLED(ctx->regs[def][0])) {
2206		ir_emit_store(ctx, type, def, def_reg);
2207	}
2208}
2209
2210static void ir_emit_shift(ir_ctx *ctx, ir_ref def, ir_insn *insn)
2211{
2212	ir_backend_data *data = ctx->data;
2213	dasm_State **Dst = &data->dasm_state;
2214	ir_type type = insn->type;
2215	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
2216	ir_reg op1_reg = ctx->regs[def][1];
2217	ir_reg op2_reg = ctx->regs[def][2];
2218	ir_reg tmp_reg;
2219
2220	IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE);
2221	if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(insn->op1)) {
2222		op1_reg = IR_REG_NUM(op1_reg);
2223		ir_emit_load(ctx, type, op1_reg, insn->op1);
2224	}
2225	if (IR_REG_SPILLED(op2_reg)) {
2226		op2_reg = IR_REG_NUM(op2_reg);
2227		ir_emit_load(ctx, type, op2_reg, insn->op2);
2228	}
2229	switch (insn->op) {
2230		default:
2231			IR_ASSERT(0);
2232		case IR_SHL:
2233			if (ir_type_size[type] == 1) {
2234				|	and Rw(def_reg), Rw(op1_reg), #0xff
2235				|	lsl Rw(def_reg), Rw(def_reg), Rw(op2_reg)
2236			} else if (ir_type_size[type] == 2) {
2237				|	and Rw(def_reg), Rw(op1_reg), #0xffff
2238				|	lsl Rw(def_reg), Rw(def_reg), Rw(op2_reg)
2239			} else {
2240				|	ASM_REG_REG_REG_OP lsl, type, def_reg, op1_reg, op2_reg
2241			}
2242			break;
2243		case IR_SHR:
2244			if (ir_type_size[type] == 1) {
2245				|	and Rw(def_reg), Rw(op1_reg), #0xff
2246				|	lsr Rw(def_reg), Rw(def_reg), Rw(op2_reg)
2247			} else if (ir_type_size[type] == 2) {
2248				|	and Rw(def_reg), Rw(op1_reg), #0xffff
2249				|	lsr Rw(def_reg), Rw(def_reg), Rw(op2_reg)
2250			} else {
2251				|	ASM_REG_REG_REG_OP lsr, type, def_reg, op1_reg, op2_reg
2252			}
2253			break;
2254		case IR_SAR:
2255			if (ir_type_size[type] == 1) {
2256				|	sxtb Rw(def_reg), Rw(op1_reg)
2257				|	asr Rw(def_reg), Rw(def_reg), Rw(op2_reg)
2258			} else if (ir_type_size[type] == 2) {
2259				|	sxth Rw(def_reg), Rw(op1_reg)
2260				|	asr Rw(def_reg), Rw(def_reg), Rw(op2_reg)
2261			} else {
2262				|	ASM_REG_REG_REG_OP asr, type, def_reg, op1_reg, op2_reg
2263			}
2264			break;
2265		case IR_ROL:
2266			tmp_reg = ctx->regs[def][3];
2267			IR_ASSERT(tmp_reg != IR_REG_NONE);
2268			if (ir_type_size[type] == 1) {
2269				|	and Rw(def_reg), Rw(op1_reg), #0xff
2270				|	add Rw(def_reg), Rw(def_reg), Rw(def_reg), lsl #8
2271				|	add Rw(def_reg), Rw(def_reg), Rw(def_reg), lsl #16
2272				|	neg Rw(tmp_reg), Rw(op2_reg)
2273				|	ror Rw(def_reg), Rw(def_reg), Rw(tmp_reg)
2274				|	and Rw(def_reg), Rw(def_reg), #0xff
2275			} else if (ir_type_size[type] == 2) {
2276				|	and Rw(def_reg), Rw(op1_reg), #0xffff
2277				|	add Rw(def_reg), Rw(def_reg), Rw(def_reg), lsl #16
2278				|	neg Rw(tmp_reg), Rw(op2_reg)
2279				|	ror Rw(def_reg), Rw(def_reg), Rw(tmp_reg)
2280				|	and Rw(def_reg), Rw(def_reg), #0xffff
2281			} else if (ir_type_size[type] == 8) {
2282				|	neg Rx(tmp_reg), Rx(op2_reg)
2283				|	ror Rx(def_reg), Rx(op1_reg), Rx(tmp_reg)
2284			} else {
2285				|	neg Rw(tmp_reg), Rw(op2_reg)
2286				|	ror Rw(def_reg), Rw(op1_reg), Rw(tmp_reg)
2287			}
2288			break;
2289		case IR_ROR:
2290			if (ir_type_size[type] == 1) {
2291				tmp_reg = ctx->regs[def][3];
2292				IR_ASSERT(tmp_reg != IR_REG_NONE);
2293				|	and Rw(tmp_reg), Rw(op1_reg), #0xff
2294				|	add Rw(tmp_reg), Rw(tmp_reg), Rw(tmp_reg), lsl #8
2295				|	add Rw(tmp_reg), Rw(tmp_reg), Rw(tmp_reg), lsl #16
2296				|	ror Rw(def_reg), Rw(tmp_reg), Rw(op2_reg)
2297				|	and Rw(def_reg), Rw(def_reg), #0xff
2298			} else if (ir_type_size[type] == 2) {
2299				tmp_reg = ctx->regs[def][3];
2300				IR_ASSERT(tmp_reg != IR_REG_NONE);
2301				|	and Rw(tmp_reg), Rw(op1_reg), #0xffff
2302				|	add Rw(tmp_reg), Rw(tmp_reg), Rw(tmp_reg), lsl #16
2303				|	ror Rw(def_reg), Rw(tmp_reg), Rw(op2_reg)
2304				|	and Rw(def_reg), Rw(def_reg), #0xffff
2305			} else {
2306				|	ASM_REG_REG_REG_OP ror, type, def_reg, op1_reg, op2_reg
2307			}
2308			break;
2309	}
2310	if (IR_REG_SPILLED(ctx->regs[def][0])) {
2311		ir_emit_store(ctx, type, def, def_reg);
2312	}
2313}
2314
2315static void ir_emit_shift_const(ir_ctx *ctx, ir_ref def, ir_insn *insn)
2316{
2317	ir_backend_data *data = ctx->data;
2318	dasm_State **Dst = &data->dasm_state;
2319	uint32_t shift = ctx->ir_base[insn->op2].val.u64;
2320	ir_type type = insn->type;
2321	ir_ref op1 = insn->op1;
2322	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
2323	ir_reg op1_reg = ctx->regs[def][1];
2324	ir_reg tmp_reg;
2325
2326	IR_ASSERT(IR_IS_CONST_REF(insn->op2));
2327	IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[insn->op2].op));
2328	IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE);
2329
2330	if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) {
2331		op1_reg = IR_REG_NUM(op1_reg);
2332		ir_emit_load(ctx, type, op1_reg, op1);
2333	}
2334	switch (insn->op) {
2335		default:
2336			IR_ASSERT(0);
2337		case IR_SHL:
2338			if (ir_type_size[type] == 1) {
2339				|	ubfiz Rw(def_reg), Rw(op1_reg), #shift, #(8-shift)
2340			} else if (ir_type_size[type] == 2) {
2341				|	ubfiz Rw(def_reg), Rw(op1_reg), #shift, #(16-shift)
2342			} else {
2343				|	ASM_REG_REG_IMM_OP lsl, type, def_reg, op1_reg, shift
2344			}
2345			break;
2346		case IR_SHR:
2347			if (ir_type_size[type] == 1) {
2348				|	ubfx Rw(def_reg), Rw(op1_reg), #shift, #(8-shift)
2349			} else if (ir_type_size[type] == 2) {
2350				|	ubfx Rw(def_reg), Rw(op1_reg), #shift, #(16-shift)
2351			} else {
2352				|	ASM_REG_REG_IMM_OP lsr, type, def_reg, op1_reg, shift
2353			}
2354			break;
2355		case IR_SAR:
2356			if (ir_type_size[type] == 1) {
2357				|	sbfx Rw(def_reg), Rw(op1_reg), #shift, #(8-shift)
2358			} else if (ir_type_size[type] == 2) {
2359				|	sbfx Rw(def_reg), Rw(op1_reg), #shift, #(16-shift)
2360			} else {
2361				|	ASM_REG_REG_IMM_OP asr, type, def_reg, op1_reg, shift
2362			}
2363			break;
2364		case IR_ROL:
2365			if (ir_type_size[type] == 1) {
2366				tmp_reg = ctx->regs[def][3];
2367				|	ubfx Rw(tmp_reg), Rw(op1_reg), #(8-shift), #shift
2368				|	orr Rw(def_reg), Rw(tmp_reg), Rw(op1_reg), lsl #shift
2369			} else if (ir_type_size[type] == 2) {
2370				tmp_reg = ctx->regs[def][3];
2371				|	ubfx Rw(tmp_reg), Rw(op1_reg), #(16-shift), #shift
2372				|	orr Rw(def_reg), Rw(tmp_reg), Rw(op1_reg), lsl #shift
2373			} else if (ir_type_size[type] == 8) {
2374				shift = (64 - shift) % 64;
2375				|	ror Rx(def_reg), Rx(op1_reg), #shift
2376			} else {
2377				shift = (32 - shift) % 32;
2378				|	ror Rw(def_reg), Rw(op1_reg), #shift
2379			}
2380			break;
2381		case IR_ROR:
2382			if (ir_type_size[type] == 1) {
2383				tmp_reg = ctx->regs[def][3];
2384				|	ubfx Rw(tmp_reg), Rw(op1_reg), #shift, #(8-shift)
2385				|	orr Rw(def_reg), Rw(tmp_reg), Rw(op1_reg), lsl #(8-shift)
2386			} else if (ir_type_size[type] == 2) {
2387				tmp_reg = ctx->regs[def][3];
2388				|	ubfx Rw(tmp_reg), Rw(op1_reg), #shift, #(16-shift)
2389				|	orr Rw(def_reg), Rw(tmp_reg), Rw(op1_reg), lsl #(16-shift)
2390			} else {
2391				|	ASM_REG_REG_IMM_OP ror, type, def_reg, op1_reg, shift
2392			}
2393			break;
2394	}
2395	if (IR_REG_SPILLED(ctx->regs[def][0])) {
2396		ir_emit_store(ctx, type, def, def_reg);
2397	}
2398}
2399
2400static void ir_emit_op_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
2401{
2402	ir_backend_data *data = ctx->data;
2403	dasm_State **Dst = &data->dasm_state;
2404	ir_type type = insn->type;
2405	ir_ref op1 = insn->op1;
2406	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
2407	ir_reg op1_reg = ctx->regs[def][1];
2408
2409	IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE);
2410
2411	if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) {
2412		op1_reg = IR_REG_NUM(op1_reg);
2413		ir_emit_load(ctx, type, op1_reg, op1);
2414	}
2415	if (insn->op == IR_NOT) {
2416		if (insn->type == IR_BOOL) {
2417			|	ASM_REG_IMM_OP cmp, type, op1_reg, 0
2418			|	cset Rw(def_reg), eq
2419		} else {
2420			|	ASM_REG_REG_OP mvn, insn->type, def_reg, op1_reg
2421		}
2422	} else if (insn->op == IR_NEG) {
2423		|	ASM_REG_REG_OP neg, insn->type, def_reg, op1_reg
2424	} else if (insn->op == IR_ABS) {
2425		if (ir_type_size[type] == 8) {
2426			|	cmp Rx(op1_reg), #0
2427			|	cneg Rx(def_reg), Rx(op1_reg), lt
2428		} else {
2429			|	cmp Rw(op1_reg), #0
2430			|	cneg Rw(def_reg), Rw(op1_reg), lt
2431		}
2432	} else if (insn->op == IR_CTLZ) {
2433		if (ir_type_size[type] == 1) {
2434			|	and	Rw(def_reg), Rw(op1_reg), #0xff
2435			|	clz Rw(def_reg), Rw(def_reg)
2436			|	sub Rw(def_reg), Rw(def_reg), #24
2437		} else if (ir_type_size[type] == 2) {
2438			|	and	Rw(def_reg), Rw(op1_reg), #0xffff
2439			|	clz Rw(def_reg), Rw(def_reg)
2440			|	sub Rw(def_reg), Rw(def_reg), #16
2441		} else {
2442			|	ASM_REG_REG_OP clz, type, def_reg, op1_reg
2443		}
2444	} else if (insn->op == IR_CTTZ) {
2445		|	ASM_REG_REG_OP rbit, insn->type, def_reg, op1_reg
2446		|	ASM_REG_REG_OP clz, insn->type, def_reg, def_reg
2447	} else {
2448		IR_ASSERT(insn->op == IR_BSWAP);
2449		|	ASM_REG_REG_OP rev, insn->type, def_reg, op1_reg
2450	}
2451	if (IR_REG_SPILLED(ctx->regs[def][0])) {
2452		ir_emit_store(ctx, type, def, def_reg);
2453	}
2454}
2455
2456static void ir_emit_ctpop(ir_ctx *ctx, ir_ref def, ir_insn *insn)
2457{
2458	ir_backend_data *data = ctx->data;
2459	dasm_State **Dst = &data->dasm_state;
2460	ir_type type = insn->type;
2461	ir_ref op1 = insn->op1;
2462	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
2463	ir_reg op1_reg = ctx->regs[def][1];
2464	ir_reg tmp_reg = ctx->regs[def][2];
2465	uint32_t code1 = 0x0e205800 | (tmp_reg-IR_REG_FP_FIRST); // cnt v0.8b, v0.8b
2466	uint32_t code2 = 0x0e31b800 | (tmp_reg-IR_REG_FP_FIRST); // addv b0, v0.8b
2467
2468	IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE);
2469
2470	if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) {
2471		op1_reg = IR_REG_NUM(op1_reg);
2472		ir_emit_load(ctx, type, op1_reg, op1);
2473	}
2474	switch (ir_type_size[insn->type]) {
2475		default:
2476			IR_ASSERT(0);
2477		case 1:
2478			|	and	Rw(def_reg), Rw(op1_reg), #0xff
2479			|	fmov Rs(tmp_reg-IR_REG_FP_FIRST), Rw(def_reg)
2480			|	.long code1 // cnt v0.8b, v0.8b
2481			|	.long code2 // addv b0, v0.8b
2482			|	fmov Rw(def_reg), Rs(tmp_reg-IR_REG_FP_FIRST)
2483			break;
2484		case 2:
2485			|	and	Rw(def_reg), Rw(op1_reg), #0xffff
2486			|	fmov Rs(tmp_reg-IR_REG_FP_FIRST), Rw(def_reg)
2487			|	.long code1 // cnt v0.8b, v0.8b
2488			|	.long code2 // addv b0, v0.8b
2489			|	fmov Rw(def_reg), Rs(tmp_reg-IR_REG_FP_FIRST)
2490			break;
2491		case 4:
2492			|	fmov Rs(tmp_reg-IR_REG_FP_FIRST), Rw(op1_reg)
2493			|	.long code1 // cnt v0.8b, v0.8b
2494			|	.long code2 // addv b0, v0.8b
2495			|	fmov Rw(def_reg), Rs(tmp_reg-IR_REG_FP_FIRST)
2496			break;
2497		case 8:
2498			|	fmov Rd(tmp_reg-IR_REG_FP_FIRST), Rx(op1_reg)
2499			|	.long code1 // cnt v0.8b, v0.8b
2500			|	.long code2 // addv b0, v0.8b
2501			|	fmov Rx(def_reg), Rd(tmp_reg-IR_REG_FP_FIRST)
2502			break;
2503	}
2504	if (IR_REG_SPILLED(ctx->regs[def][0])) {
2505		ir_emit_store(ctx, type, def, def_reg);
2506	}
2507}
2508
2509static void ir_emit_op_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn)
2510{
2511	ir_backend_data *data = ctx->data;
2512	dasm_State **Dst = &data->dasm_state;
2513	ir_type type = insn->type;
2514	ir_ref op1 = insn->op1;
2515	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
2516	ir_reg op1_reg = ctx->regs[def][1];
2517
2518	IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE);
2519
2520	if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) {
2521		op1_reg = IR_REG_NUM(op1_reg);
2522		ir_emit_load(ctx, type, op1_reg, op1);
2523	}
2524	if (insn->op == IR_NEG) {
2525		if (type == IR_DOUBLE) {
2526			|	fneg Rd(def_reg-IR_REG_FP_FIRST), Rd(op1_reg-IR_REG_FP_FIRST)
2527		} else {
2528			IR_ASSERT(type == IR_FLOAT);
2529			|	fneg Rs(def_reg-IR_REG_FP_FIRST), Rs(op1_reg-IR_REG_FP_FIRST)
2530		}
2531	} else {
2532		IR_ASSERT(insn->op == IR_ABS);
2533		if (type == IR_DOUBLE) {
2534			|	fabs Rd(def_reg-IR_REG_FP_FIRST), Rd(op1_reg-IR_REG_FP_FIRST)
2535		} else {
2536			IR_ASSERT(type == IR_FLOAT);
2537			|	fabs Rs(def_reg-IR_REG_FP_FIRST), Rs(op1_reg-IR_REG_FP_FIRST)
2538		}
2539	}
2540	if (IR_REG_SPILLED(ctx->regs[def][0])) {
2541		ir_emit_store(ctx, insn->type, def, def_reg);
2542	}
2543}
2544
2545static void ir_emit_binop_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn)
2546{
2547	ir_backend_data *data = ctx->data;
2548	dasm_State **Dst = &data->dasm_state;
2549	ir_type type = insn->type;
2550	ir_ref op1 = insn->op1;
2551	ir_ref op2 = insn->op2;
2552	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
2553	ir_reg op1_reg = ctx->regs[def][1];
2554	ir_reg op2_reg = ctx->regs[def][2];
2555
2556	IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE);
2557	if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) {
2558		op1_reg = IR_REG_NUM(op1_reg);
2559		ir_emit_load(ctx, type, op1_reg, op1);
2560	}
2561	if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) {
2562		op2_reg = IR_REG_NUM(op2_reg);
2563		if (op1 != op2) {
2564			ir_emit_load(ctx, type, op2_reg, op2);
2565		}
2566	}
2567	switch (insn->op) {
2568		default:
2569			IR_ASSERT(0 && "NIY binary op");
2570		case IR_ADD:
2571			|	ASM_FP_REG_REG_REG_OP fadd, type, def_reg, op1_reg, op2_reg
2572			break;
2573		case IR_SUB:
2574			|	ASM_FP_REG_REG_REG_OP fsub, type, def_reg, op1_reg, op2_reg
2575			break;
2576		case IR_MUL:
2577			|	ASM_FP_REG_REG_REG_OP fmul, type, def_reg, op1_reg, op2_reg
2578			break;
2579		case IR_DIV:
2580			|	ASM_FP_REG_REG_REG_OP fdiv, type, def_reg, op1_reg, op2_reg
2581			break;
2582		case IR_MIN:
2583			|	ASM_FP_REG_REG_REG_OP fmin, type, def_reg, op1_reg, op2_reg
2584			break;
2585		case IR_MAX:
2586			|	ASM_FP_REG_REG_REG_OP fmax, type, def_reg, op1_reg, op2_reg
2587			break;
2588	}
2589	if (IR_REG_SPILLED(ctx->regs[def][0])) {
2590		ir_emit_store(ctx, insn->type, def, def_reg);
2591	}
2592}
2593
2594static void ir_emit_fix_type(ir_ctx *ctx, ir_type type, ir_reg op1_reg)
2595{
2596	ir_backend_data *data = ctx->data;
2597	dasm_State **Dst = &data->dasm_state;
2598
2599	// TODO: prevent repeatable sign/zero extension ???
2600	if (ir_type_size[type] == 2) {
2601		if (IR_IS_TYPE_SIGNED(type)) {
2602			|	sxth Rw(op1_reg), Rw(op1_reg)
2603		} else {
2604			|	uxth Rw(op1_reg), Rw(op1_reg)
2605		}
2606	} else if (ir_type_size[type] == 1) {
2607		if (IR_IS_TYPE_SIGNED(type)) {
2608			|	sxtb Rw(op1_reg), Rw(op1_reg)
2609		} else {
2610			|	uxtb Rw(op1_reg), Rw(op1_reg)
2611		}
2612	}
2613}
2614
2615static void ir_emit_cmp_int_common(ir_ctx *ctx, ir_type type, ir_reg op1_reg, ir_ref op1, ir_reg op2_reg, ir_ref op2)
2616{
2617	ir_backend_data *data = ctx->data;
2618	dasm_State **Dst = &data->dasm_state;
2619
2620	IR_ASSERT(op1_reg != IR_REG_NONE);
2621	if (ir_type_size[type] < 4) {
2622		ir_emit_fix_type(ctx, type, op1_reg);
2623	}
2624	if (op2_reg != IR_REG_NONE) {
2625		if (ir_type_size[type] == 8) {
2626			|	cmp Rx(op1_reg), Rx(op2_reg)
2627		} else if (ir_type_size[type] == 4) {
2628			|	cmp Rw(op1_reg), Rw(op2_reg)
2629		} else if (ir_type_size[type] == 2) {
2630			if (IR_IS_TYPE_SIGNED(type)) {
2631				|	cmp Rw(op1_reg), Rw(op2_reg), sxth
2632			} else {
2633				|	cmp Rw(op1_reg), Rw(op2_reg), uxth
2634			}
2635		} else if (ir_type_size[type] == 1) {
2636			if (IR_IS_TYPE_SIGNED(type)) {
2637				|	cmp Rw(op1_reg), Rw(op2_reg), sxtb
2638			} else {
2639				|	cmp Rw(op1_reg), Rw(op2_reg), uxtb
2640			}
2641		} else {
2642			IR_ASSERT(0);
2643		}
2644	} else {
2645		IR_ASSERT(IR_IS_CONST_REF(op2));
2646		IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[op2].op));
2647		int32_t val = ctx->ir_base[op2].val.i32;
2648
2649		if (ir_type_size[type] == 8) {
2650			|	cmp Rx(op1_reg), #val
2651		} else {
2652			|	cmp Rw(op1_reg), #val
2653		}
2654	}
2655}
2656
2657static void ir_emit_cmp_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
2658{
2659	ir_backend_data *data = ctx->data;
2660	dasm_State **Dst = &data->dasm_state;
2661	ir_type type = ctx->ir_base[insn->op1].type;
2662	ir_op op = insn->op;
2663	ir_ref op1 = insn->op1;
2664	ir_ref op2 = insn->op2;
2665	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
2666	ir_reg op1_reg = ctx->regs[def][1];
2667	ir_reg op2_reg = ctx->regs[def][2];
2668
2669	IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE);
2670	if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) {
2671		op1_reg = IR_REG_NUM(op1_reg);
2672		ir_emit_load(ctx, type, op1_reg, op1);
2673	}
2674	if (op2_reg != IR_REG_NONE) {
2675		if (IR_REG_SPILLED(op2_reg)) {
2676			op2_reg = IR_REG_NUM(op2_reg);
2677			if (op1 != op2) {
2678				ir_emit_load(ctx, type, op2_reg, op2);
2679			}
2680		}
2681		if (IR_IS_CONST_REF(op2)) {
2682			ir_emit_load(ctx, type, op2_reg, op2);
2683		}
2684	}
2685	if (IR_IS_CONST_REF(insn->op2)
2686	 && !IR_IS_SYM_CONST(ctx->ir_base[insn->op2].op)
2687	 && ctx->ir_base[insn->op2].val.u64 == 0) {
2688		if (op == IR_ULT) {
2689			/* always false */
2690			ir_emit_load_imm_int(ctx, IR_BOOL, def_reg, 0);
2691			if (IR_REG_SPILLED(ctx->regs[def][0])) {
2692				ir_emit_store(ctx, insn->type, def, def_reg);
2693			}
2694			return;
2695		} else if (op == IR_UGE) {
2696			/* always true */
2697			ir_emit_load_imm_int(ctx, IR_BOOL, def_reg, 1);
2698			if (IR_REG_SPILLED(ctx->regs[def][0])) {
2699				ir_emit_store(ctx, insn->type, def, def_reg);
2700			}
2701			return;
2702		} else if (op == IR_ULE) {
2703			op = IR_EQ;
2704		} else if (op == IR_UGT) {
2705			op = IR_NE;
2706		}
2707	}
2708	ir_emit_cmp_int_common(ctx, type, op1_reg, op1, op2_reg, op2);
2709	switch (op) {
2710		default:
2711			IR_ASSERT(0 && "NIY binary op");
2712		case IR_EQ:
2713			|	cset Rw(def_reg), eq
2714			break;
2715		case IR_NE:
2716			|	cset Rw(def_reg), ne
2717			break;
2718		case IR_LT:
2719			|	cset Rw(def_reg), lt
2720			break;
2721		case IR_GE:
2722			|	cset Rw(def_reg), ge
2723			break;
2724		case IR_LE:
2725			|	cset Rw(def_reg), le
2726			break;
2727		case IR_GT:
2728			|	cset Rw(def_reg), gt
2729			break;
2730		case IR_ULT:
2731			|	cset Rw(def_reg), lo
2732			break;
2733		case IR_UGE:
2734			|	cset Rw(def_reg), hs
2735			break;
2736		case IR_ULE:
2737			|	cset Rw(def_reg), ls
2738			break;
2739		case IR_UGT:
2740			|	cset Rw(def_reg), hi
2741			break;
2742	}
2743	if (IR_REG_SPILLED(ctx->regs[def][0])) {
2744		ir_emit_store(ctx, insn->type, def, def_reg);
2745	}
2746}
2747
2748static ir_op ir_emit_cmp_fp_common(ir_ctx *ctx, ir_ref cmp_ref, ir_insn *cmp_insn)
2749{
2750	ir_backend_data *data = ctx->data;
2751	dasm_State **Dst = &data->dasm_state;
2752	ir_type type = ctx->ir_base[cmp_insn->op1].type;
2753	ir_op op = cmp_insn->op;
2754	ir_ref op1, op2;
2755	ir_reg op1_reg, op2_reg;
2756
2757	if (op == IR_LT || op == IR_LE) {
2758		/* swap operands to avoid P flag check */
2759		op ^= 3;
2760		op1 = cmp_insn->op2;
2761		op2 = cmp_insn->op1;
2762		op1_reg = ctx->regs[cmp_ref][2];
2763		op2_reg = ctx->regs[cmp_ref][1];
2764	} else {
2765		op1 = cmp_insn->op1;
2766		op2 = cmp_insn->op2;
2767		op1_reg = ctx->regs[cmp_ref][1];
2768		op2_reg = ctx->regs[cmp_ref][2];
2769	}
2770
2771	IR_ASSERT(op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE);
2772	if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) {
2773		op1_reg = IR_REG_NUM(op1_reg);
2774		ir_emit_load(ctx, type, op1_reg, op1);
2775	}
2776	if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) {
2777		op2_reg = IR_REG_NUM(op2_reg);
2778		if (op1 != op2) {
2779			ir_emit_load(ctx, type, op2_reg, op2);
2780		}
2781	}
2782	if (type == IR_DOUBLE) {
2783		|	fcmp Rd(op1_reg-IR_REG_FP_FIRST), Rd(op2_reg-IR_REG_FP_FIRST)
2784	} else {
2785		IR_ASSERT(type == IR_FLOAT);
2786		|	fcmp Rs(op1_reg-IR_REG_FP_FIRST), Rs(op2_reg-IR_REG_FP_FIRST)
2787	}
2788	return op;
2789}
2790
2791static void ir_emit_cmp_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn)
2792{
2793	ir_backend_data *data = ctx->data;
2794	dasm_State **Dst = &data->dasm_state;
2795	ir_op op = ir_emit_cmp_fp_common(ctx, def, insn);
2796	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
2797//???	ir_reg tmp_reg = ctx->regs[def][3]; // TODO: take into account vs flag
2798
2799	IR_ASSERT(def_reg != IR_REG_NONE);
2800	switch (op) {
2801		default:
2802			IR_ASSERT(0 && "NIY binary op");
2803		case IR_EQ:
2804			|	cset Rw(def_reg), eq
2805			break;
2806		case IR_NE:
2807			|	cset Rw(def_reg), ne
2808			break;
2809		case IR_LT:
2810			|	cset Rw(def_reg), mi
2811			break;
2812		case IR_GE:
2813			|	cset Rw(def_reg), ge
2814			break;
2815		case IR_LE:
2816			|	cset Rw(def_reg), ls
2817			break;
2818		case IR_GT:
2819			|	cset Rw(def_reg), gt
2820			break;
2821		case IR_ULT:
2822			|	cset Rw(def_reg), lt
2823			break;
2824		case IR_UGE:
2825			|	cset Rw(def_reg), hs
2826			break;
2827		case IR_ULE:
2828			|	cset Rw(def_reg), le
2829			break;
2830		case IR_UGT:
2831			|	cset Rw(def_reg), hi
2832			break;
2833	}
2834	if (IR_REG_SPILLED(ctx->regs[def][0])) {
2835		ir_emit_store(ctx, insn->type, def, def_reg);
2836	}
2837}
2838
2839static void ir_emit_jmp_true(ir_ctx *ctx, uint32_t b, ir_ref def)
2840{
2841	uint32_t true_block, false_block, next_block;
2842	ir_backend_data *data = ctx->data;
2843	dasm_State **Dst = &data->dasm_state;
2844
2845	ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block);
2846	if (true_block != next_block) {
2847		|	b =>true_block
2848	}
2849}
2850
2851static void ir_emit_jmp_false(ir_ctx *ctx, uint32_t b, ir_ref def)
2852{
2853	uint32_t true_block, false_block, next_block;
2854	ir_backend_data *data = ctx->data;
2855	dasm_State **Dst = &data->dasm_state;
2856
2857	ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block);
2858	if (false_block != next_block) {
2859		|	b =>false_block
2860	}
2861}
2862
2863static void ir_emit_jz(ir_ctx *ctx, uint8_t op, uint32_t b, ir_type type, ir_reg reg)
2864{
2865	uint32_t true_block, false_block, next_block;
2866	ir_backend_data *data = ctx->data;
2867	dasm_State **Dst = &data->dasm_state;
2868
2869	ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block);
2870	if (true_block == next_block) {
2871		IR_ASSERT(op < IR_LT);
2872		op ^= 1; // reverse
2873		true_block = false_block;
2874		false_block = 0;
2875	} else if (false_block == next_block) {
2876		false_block = 0;
2877	}
2878
2879	if (op == IR_EQ) {
2880		if (ir_type_size[type] == 8) {
2881			|	cbz Rx(reg), =>true_block
2882		} else {
2883			|	cbz Rw(reg), =>true_block
2884		}
2885	} else {
2886		IR_ASSERT(op == IR_NE);
2887		if (ir_type_size[type] == 8) {
2888			|	cbnz Rx(reg), =>true_block
2889		} else {
2890			|	cbnz Rw(reg), =>true_block
2891		}
2892	}
2893	if (false_block) {
2894		|	b =>false_block
2895	}
2896}
2897
2898static void ir_emit_jcc(ir_ctx *ctx, uint8_t op, uint32_t b, ir_ref def, ir_insn *insn, bool int_cmp)
2899{
2900	uint32_t true_block, false_block, next_block;
2901	ir_backend_data *data = ctx->data;
2902	dasm_State **Dst = &data->dasm_state;
2903
2904	ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block);
2905	if (true_block == next_block) {
2906		/* swap to avoid unconditional JMP */
2907		if (int_cmp || op == IR_EQ || op == IR_NE) {
2908			op ^= 1; // reverse
2909		} else {
2910			op ^= 5; // reverse
2911		}
2912		true_block = false_block;
2913		false_block = 0;
2914	} else if (false_block == next_block) {
2915		false_block = 0;
2916	}
2917
2918	if (int_cmp) {
2919		switch (op) {
2920			default:
2921				IR_ASSERT(0 && "NIY binary op");
2922			case IR_EQ:
2923				|	beq =>true_block
2924				break;
2925			case IR_NE:
2926				|	bne =>true_block
2927				break;
2928			case IR_LT:
2929				|	blt =>true_block
2930				break;
2931			case IR_GE:
2932				|	bge =>true_block
2933				break;
2934			case IR_LE:
2935				|	ble =>true_block
2936				break;
2937			case IR_GT:
2938				|	bgt =>true_block
2939				break;
2940			case IR_ULT:
2941				|	blo =>true_block
2942				break;
2943			case IR_UGE:
2944				|	bhs =>true_block
2945				break;
2946			case IR_ULE:
2947				|	bls =>true_block
2948				break;
2949			case IR_UGT:
2950				|	bhi =>true_block
2951				break;
2952		}
2953	} else {
2954		switch (op) {
2955			default:
2956				IR_ASSERT(0 && "NIY binary op");
2957			case IR_EQ:
2958				|	beq =>true_block
2959				break;
2960			case IR_NE:
2961				|	bne =>true_block
2962				break;
2963			case IR_LT:
2964				|	bmi =>true_block
2965				break;
2966			case IR_GE:
2967				|	bge =>true_block
2968				break;
2969			case IR_LE:
2970				|	bls =>true_block
2971				break;
2972			case IR_GT:
2973				|	bgt =>true_block
2974				break;
2975			case IR_ULT:
2976				|	blt =>true_block
2977				break;
2978			case IR_UGE:
2979				|	bhs =>true_block
2980				break;
2981			case IR_ULE:
2982				|	ble =>true_block
2983				break;
2984			case IR_UGT:
2985				|	bhi =>true_block
2986				break;
2987//			case IR_ULT: fprintf(stderr, "\tjb .LL%d\n", true_block); break;
2988//			case IR_UGE: fprintf(stderr, "\tjae .LL%d\n", true_block); break;
2989//			case IR_ULE: fprintf(stderr, "\tjbe .LL%d\n", true_block); break;
2990//			case IR_UGT: fprintf(stderr, "\tja .LL%d\n", true_block); break;
2991		}
2992	}
2993	if (false_block) {
2994		|	b =>false_block
2995	}
2996}
2997
2998static void ir_emit_cmp_and_branch_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
2999{
3000	ir_insn *cmp_insn = &ctx->ir_base[insn->op2];
3001	ir_op op = cmp_insn->op;
3002	ir_type type = ctx->ir_base[cmp_insn->op1].type;
3003	ir_ref op1 = cmp_insn->op1;
3004	ir_ref op2 = cmp_insn->op2;
3005	ir_reg op1_reg = ctx->regs[insn->op2][1];
3006	ir_reg op2_reg = ctx->regs[insn->op2][2];
3007
3008	if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) {
3009		op1_reg = IR_REG_NUM(op1_reg);
3010		ir_emit_load(ctx, type, op1_reg, op1);
3011	}
3012	if (op1_reg != IR_REG_NONE && IR_IS_CONST_REF(op1)) {
3013		ir_emit_load(ctx, type, op1_reg, op1);
3014	}
3015	if (op2_reg != IR_REG_NONE) {
3016		if (IR_REG_SPILLED(op2_reg)) {
3017			op2_reg = IR_REG_NUM(op2_reg);
3018			if (op1 != op2) {
3019				ir_emit_load(ctx, type, op2_reg, op2);
3020			}
3021		}
3022		if (IR_IS_CONST_REF(op2)) {
3023			ir_emit_load(ctx, type, op2_reg, op2);
3024		}
3025	}
3026	if (IR_IS_CONST_REF(op2)
3027	 && !IR_IS_SYM_CONST(ctx->ir_base[op2].op)
3028	 && ctx->ir_base[op2].val.u64 == 0) {
3029		if (op == IR_ULT) {
3030			/* always false */
3031			ir_emit_jmp_false(ctx, b, def);
3032			return;
3033		} else if (op == IR_UGE) {
3034			/* always true */
3035			ir_emit_jmp_true(ctx, b, def);
3036			return;
3037		} else if (op == IR_ULE) {
3038			op = IR_EQ;
3039		} else if (op == IR_UGT) {
3040			op = IR_NE;
3041		}
3042		if (op1_reg != IR_REG_NONE && (op == IR_EQ || op == IR_NE)) {
3043			ir_emit_jz(ctx, op, b, type, op1_reg);
3044			return;
3045		}
3046	}
3047	ir_emit_cmp_int_common(ctx, type, op1_reg, op1, op2_reg, op2);
3048	ir_emit_jcc(ctx, op, b, def, insn, 1);
3049}
3050
3051static void ir_emit_cmp_and_branch_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
3052{
3053	ir_op op = ir_emit_cmp_fp_common(ctx, insn->op2, &ctx->ir_base[insn->op2]);
3054	ir_emit_jcc(ctx, op, b, def, insn, 0);
3055}
3056
3057static void ir_emit_if_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
3058{
3059	ir_type type = ctx->ir_base[insn->op2].type;
3060	ir_reg op2_reg = ctx->regs[def][2];
3061	ir_backend_data *data = ctx->data;
3062	dasm_State **Dst = &data->dasm_state;
3063
3064	if (IR_IS_CONST_REF(insn->op2)) {
3065		uint32_t true_block, false_block, next_block;
3066
3067		ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block);
3068		if (ir_const_is_true(&ctx->ir_base[insn->op2])) {
3069			if (true_block != next_block) {
3070				|	b =>true_block
3071			}
3072		} else {
3073			if (false_block != next_block) {
3074				|	b =>false_block
3075			}
3076		}
3077		return;
3078	}
3079	IR_ASSERT(op2_reg != IR_REG_NONE);
3080	if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(insn->op2)) {
3081		op2_reg = IR_REG_NUM(op2_reg);
3082		ir_emit_load(ctx, type, op2_reg, insn->op2);
3083	}
3084	|	ASM_REG_IMM_OP cmp, type, op2_reg, 0
3085	ir_emit_jcc(ctx, IR_NE, b, def, insn, 1);
3086}
3087
3088static void ir_emit_cond(ir_ctx *ctx, ir_ref def, ir_insn *insn)
3089{
3090	ir_backend_data *data = ctx->data;
3091	dasm_State **Dst = &data->dasm_state;
3092	ir_type type = insn->type;
3093	ir_ref op1 = insn->op1;
3094	ir_ref op2 = insn->op2;
3095	ir_ref op3 = insn->op3;
3096	ir_type op1_type = ctx->ir_base[op1].type;
3097	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
3098	ir_reg op1_reg = ctx->regs[def][1];
3099	ir_reg op2_reg = ctx->regs[def][2];
3100	ir_reg op3_reg = ctx->regs[def][3];
3101
3102	IR_ASSERT(def_reg != IR_REG_NONE);
3103
3104	if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) {
3105		op2_reg = IR_REG_NUM(op2_reg);
3106		ir_emit_load(ctx, type, op2_reg, op2);
3107		if (op1 == op2) {
3108			op1_reg = op2_reg;
3109		}
3110		if (op3 == op2) {
3111			op3_reg = op2_reg;
3112		}
3113	}
3114	if (op3 != op2 && (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(op3))) {
3115		op3_reg = IR_REG_NUM(op3_reg);
3116		ir_emit_load(ctx, type, op3_reg, op3);
3117		if (op1 == op2) {
3118			op1_reg = op3_reg;
3119		}
3120	}
3121	if (op1 != op2 && op1 != op3 && (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1))) {
3122		op1_reg = IR_REG_NUM(op1_reg);
3123		ir_emit_load(ctx, op1_type, op1_reg, op1);
3124	}
3125
3126	if (IR_IS_TYPE_INT(op1_type)) {
3127		|	ASM_REG_IMM_OP cmp, op1_type, op1_reg, 0
3128	} else{
3129		|	ASM_FP_REG_IMM_OP fcmp, op1_type, op1_reg, 0.0
3130	}
3131
3132	if (IR_IS_TYPE_INT(type)) {
3133		if (ir_type_size[type] == 8) {
3134			|	csel Rx(def_reg), Rx(op2_reg), Rx(op3_reg), ne
3135		} else {
3136			|	csel Rw(def_reg), Rw(op2_reg), Rw(op3_reg), ne
3137		}
3138	} else{
3139		if (type == IR_DOUBLE) {
3140			|	fcsel Rd(def_reg-IR_REG_FP_FIRST), Rd(op2_reg-IR_REG_FP_FIRST), Rd(op3_reg-IR_REG_FP_FIRST), ne
3141		} else {
3142			|	fcsel Rs(def_reg-IR_REG_FP_FIRST), Rs(op2_reg-IR_REG_FP_FIRST), Rs(op3_reg-IR_REG_FP_FIRST), ne
3143		}
3144	}
3145
3146	if (IR_REG_SPILLED(ctx->regs[def][0])) {
3147		ir_emit_store(ctx, type, def, def_reg);
3148	}
3149}
3150
3151static void ir_emit_return_void(ir_ctx *ctx)
3152{
3153	ir_backend_data *data = ctx->data;
3154	dasm_State **Dst = &data->dasm_state;
3155
3156	ir_emit_epilogue(ctx);
3157	|	ret
3158}
3159
3160static void ir_emit_return_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn)
3161{
3162	ir_reg op2_reg = ctx->regs[ref][2];
3163
3164	if (op2_reg != IR_REG_INT_RET1) {
3165		ir_type type = ctx->ir_base[insn->op2].type;
3166
3167		if (op2_reg != IR_REG_NONE && !IR_REG_SPILLED(op2_reg)) {
3168			ir_emit_mov(ctx, type, IR_REG_INT_RET1, op2_reg);
3169		} else {
3170			ir_emit_load(ctx, type, IR_REG_INT_RET1, insn->op2);
3171		}
3172	}
3173	ir_emit_return_void(ctx);
3174}
3175
3176static void ir_emit_return_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn)
3177{
3178	ir_reg op2_reg = ctx->regs[ref][2];
3179	ir_type type = ctx->ir_base[insn->op2].type;
3180
3181	if (op2_reg != IR_REG_FP_RET1) {
3182		if (op2_reg != IR_REG_NONE && !IR_REG_SPILLED(op2_reg)) {
3183			ir_emit_fp_mov(ctx, type, IR_REG_FP_RET1, op2_reg);
3184		} else {
3185			ir_emit_load(ctx, type, IR_REG_FP_RET1, insn->op2);
3186		}
3187	}
3188	ir_emit_return_void(ctx);
3189}
3190
3191static void ir_emit_sext(ir_ctx *ctx, ir_ref def, ir_insn *insn)
3192{
3193	ir_type dst_type = insn->type;
3194	ir_type src_type = ctx->ir_base[insn->op1].type;
3195	ir_backend_data *data = ctx->data;
3196	dasm_State **Dst = &data->dasm_state;
3197	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
3198	ir_reg op1_reg = ctx->regs[def][1];
3199
3200	IR_ASSERT(IR_IS_TYPE_INT(src_type));
3201	IR_ASSERT(IR_IS_TYPE_INT(dst_type));
3202	IR_ASSERT(ir_type_size[dst_type] > ir_type_size[src_type]);
3203	IR_ASSERT(def_reg != IR_REG_NONE);
3204	if ((op1_reg != IR_REG_NONE) && (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(insn->op1))) {
3205		op1_reg = IR_REG_NUM(op1_reg);
3206		ir_emit_load(ctx, src_type, op1_reg, insn->op1);
3207	}
3208
3209	if (op1_reg != IR_REG_NONE) {
3210		if (ir_type_size[src_type] == 1) {
3211			if (ir_type_size[dst_type] == 2) {
3212				|	sxtb Rw(def_reg), Rw(op1_reg)
3213			} else if (ir_type_size[dst_type] == 4) {
3214				|	sxtb Rw(def_reg), Rw(op1_reg)
3215			} else {
3216				IR_ASSERT(ir_type_size[dst_type] == 8);
3217				|	sxtb Rx(def_reg), Rx(op1_reg)
3218			}
3219		} else if (ir_type_size[src_type] == 2) {
3220			if (ir_type_size[dst_type] == 4) {
3221				|	sxth Rw(def_reg), Rw(op1_reg)
3222			} else {
3223				IR_ASSERT(ir_type_size[dst_type] == 8);
3224				|	sxth Rx(def_reg), Rx(op1_reg)
3225			}
3226		} else {
3227			IR_ASSERT(ir_type_size[src_type] == 4);
3228			IR_ASSERT(ir_type_size[dst_type] == 8);
3229			|	sxtw Rx(def_reg), Rw(op1_reg)
3230		}
3231	} else if (IR_IS_CONST_REF(insn->op1)) {
3232		IR_ASSERT(0);
3233	} else {
3234		ir_reg fp;
3235		int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op1, &fp);
3236
3237		if (ir_type_size[src_type] == 1) {
3238			if (ir_type_size[dst_type] == 2) {
3239				|	ldrsb Rw(def_reg), [Rx(fp), #offset]
3240			} else if (ir_type_size[dst_type] == 4) {
3241				|	ldrsb Rw(def_reg), [Rx(fp), #offset]
3242			} else {
3243				IR_ASSERT(ir_type_size[dst_type] == 8);
3244				|	ldrsb Rx(def_reg), [Rx(fp), #offset]
3245			}
3246		} else if (ir_type_size[src_type] == 2) {
3247			if (ir_type_size[dst_type] == 4) {
3248				|	ldrsh Rw(def_reg), [Rx(fp), #offset]
3249			} else {
3250				IR_ASSERT(ir_type_size[dst_type] == 8);
3251				|	ldrsh Rx(def_reg), [Rx(fp), #offset]
3252			}
3253		} else {
3254			IR_ASSERT(ir_type_size[src_type] == 4);
3255			IR_ASSERT(ir_type_size[dst_type] == 8);
3256			|	ldrsw Rx(def_reg), [Rx(fp), #offset]
3257		}
3258	}
3259	if (IR_REG_SPILLED(ctx->regs[def][0])) {
3260		ir_emit_store(ctx, dst_type, def, def_reg);
3261	}
3262}
3263
3264static void ir_emit_zext(ir_ctx *ctx, ir_ref def, ir_insn *insn)
3265{
3266	ir_type dst_type = insn->type;
3267	ir_type src_type = ctx->ir_base[insn->op1].type;
3268	ir_backend_data *data = ctx->data;
3269	dasm_State **Dst = &data->dasm_state;
3270	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
3271	ir_reg op1_reg = ctx->regs[def][1];
3272
3273	IR_ASSERT(IR_IS_TYPE_INT(src_type));
3274	IR_ASSERT(IR_IS_TYPE_INT(dst_type));
3275	IR_ASSERT(ir_type_size[dst_type] > ir_type_size[src_type]);
3276	IR_ASSERT(def_reg != IR_REG_NONE);
3277	if ((op1_reg != IR_REG_NONE) && (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(insn->op1))) {
3278		op1_reg = IR_REG_NUM(op1_reg);
3279		ir_emit_load(ctx, src_type, op1_reg, insn->op1);
3280	}
3281
3282	if (op1_reg != IR_REG_NONE) {
3283		if (ir_type_size[src_type] == 1) {
3284			|	uxtb Rw(def_reg), Rw(op1_reg)
3285		} else if (ir_type_size[src_type] == 2) {
3286			|	uxth Rw(def_reg), Rw(op1_reg)
3287		} else {
3288			|	mov Rw(def_reg), Rw(op1_reg)
3289		}
3290	} else if (IR_IS_CONST_REF(insn->op1)) {
3291		IR_ASSERT(0);
3292	} else {
3293		ir_reg fp;
3294		int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op1, &fp);
3295
3296		if (ir_type_size[src_type] == 1) {
3297			|	ldrb Rw(def_reg), [Rx(fp), #offset]
3298		} else if (ir_type_size[src_type] == 2) {
3299			|	ldrh Rw(def_reg), [Rx(fp), #offset]
3300		} else {
3301			IR_ASSERT(ir_type_size[src_type] == 4);
3302			IR_ASSERT(ir_type_size[dst_type] == 8);
3303			|	ldr Rw(def_reg), [Rx(fp), #offset]
3304		}
3305	}
3306	if (IR_REG_SPILLED(ctx->regs[def][0])) {
3307		ir_emit_store(ctx, dst_type, def, def_reg);
3308	}
3309}
3310
3311static void ir_emit_trunc(ir_ctx *ctx, ir_ref def, ir_insn *insn)
3312{
3313	ir_backend_data *data = ctx->data;
3314	dasm_State **Dst = &data->dasm_state;
3315	ir_type dst_type = insn->type;
3316	ir_type src_type = ctx->ir_base[insn->op1].type;
3317	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
3318	ir_reg op1_reg = ctx->regs[def][1];
3319
3320	IR_ASSERT(IR_IS_TYPE_INT(src_type));
3321	IR_ASSERT(IR_IS_TYPE_INT(dst_type));
3322	IR_ASSERT(ir_type_size[dst_type] < ir_type_size[src_type]);
3323	IR_ASSERT(def_reg != IR_REG_NONE);
3324	if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) {
3325		op1_reg = IR_REG_NUM(op1_reg);
3326		ir_emit_load(ctx, src_type, op1_reg, insn->op1);
3327	}
3328	if (op1_reg != IR_REG_NONE) {
3329		if (ir_type_size[dst_type] == 1) {
3330			|	and Rw(def_reg), Rw(op1_reg), #0xff
3331		} else if (ir_type_size[dst_type] == 2) {
3332			|	and Rw(def_reg), Rw(op1_reg), #0xffff
3333		} else if (op1_reg != def_reg) {
3334			ir_emit_mov(ctx, dst_type, def_reg, op1_reg);
3335		}
3336	} else {
3337		ir_emit_load(ctx, dst_type, def_reg, insn->op1);
3338	}
3339	if (IR_REG_SPILLED(ctx->regs[def][0])) {
3340		ir_emit_store(ctx, dst_type, def, def_reg);
3341	}
3342}
3343
3344static void ir_emit_bitcast(ir_ctx *ctx, ir_ref def, ir_insn *insn)
3345{
3346	ir_type dst_type = insn->type;
3347	ir_type src_type = ctx->ir_base[insn->op1].type;
3348	ir_backend_data *data = ctx->data;
3349	dasm_State **Dst = &data->dasm_state;
3350	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
3351	ir_reg op1_reg = ctx->regs[def][1];
3352
3353	IR_ASSERT(ir_type_size[dst_type] == ir_type_size[src_type]);
3354	IR_ASSERT(def_reg != IR_REG_NONE);
3355	if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) {
3356		op1_reg = IR_REG_NUM(op1_reg);
3357		ir_emit_load(ctx, src_type, op1_reg, insn->op1);
3358	}
3359	if (IR_IS_TYPE_INT(src_type) && IR_IS_TYPE_INT(dst_type)) {
3360		if (op1_reg != IR_REG_NONE) {
3361			if (IR_REG_SPILLED(op1_reg)) {
3362				op1_reg = IR_REG_NUM(op1_reg);
3363				ir_emit_load(ctx, src_type, op1_reg, insn->op1);
3364			}
3365			if (op1_reg != def_reg) {
3366				ir_emit_mov(ctx, dst_type, def_reg, op1_reg);
3367			}
3368		} else {
3369			ir_emit_load(ctx, dst_type, def_reg, insn->op1);
3370		}
3371	} else if (IR_IS_TYPE_FP(src_type) && IR_IS_TYPE_FP(dst_type)) {
3372		if (op1_reg != IR_REG_NONE) {
3373			if (IR_REG_SPILLED(op1_reg)) {
3374				op1_reg = IR_REG_NUM(op1_reg);
3375				ir_emit_load(ctx, src_type, op1_reg, insn->op1);
3376			}
3377			if (op1_reg != def_reg) {
3378				ir_emit_fp_mov(ctx, dst_type, def_reg, op1_reg);
3379			}
3380		} else {
3381			ir_emit_load(ctx, dst_type, def_reg, insn->op1);
3382		}
3383	} else if (IR_IS_TYPE_FP(src_type)) {
3384		IR_ASSERT(IR_IS_TYPE_INT(dst_type));
3385		if (op1_reg != IR_REG_NONE) {
3386			if (IR_REG_SPILLED(op1_reg)) {
3387				op1_reg = IR_REG_NUM(op1_reg);
3388				ir_emit_load(ctx, src_type, op1_reg, insn->op1);
3389			}
3390			if (src_type == IR_DOUBLE) {
3391				|	fmov Rx(def_reg), Rd(op1_reg-IR_REG_FP_FIRST)
3392			} else {
3393				IR_ASSERT(src_type == IR_FLOAT);
3394				|	fmov Rw(def_reg), Rs(op1_reg-IR_REG_FP_FIRST)
3395			}
3396		} else if (IR_IS_CONST_REF(insn->op1)) {
3397			IR_ASSERT(0); //???
3398		} else {
3399			ir_reg fp;
3400			int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op1, &fp);
3401
3402			if (src_type == IR_DOUBLE) {
3403				|	ldr Rx(def_reg), [Rx(fp), #offset]
3404			} else {
3405				IR_ASSERT(src_type == IR_FLOAT);
3406				|	ldr Rw(def_reg), [Rx(fp), #offset]
3407			}
3408		}
3409	} else if (IR_IS_TYPE_FP(dst_type)) {
3410		IR_ASSERT(IR_IS_TYPE_INT(src_type));
3411		if (op1_reg != IR_REG_NONE) {
3412			if (IR_REG_SPILLED(op1_reg)) {
3413				op1_reg = IR_REG_NUM(op1_reg);
3414				ir_emit_load(ctx, src_type, op1_reg, insn->op1);
3415			}
3416			if (dst_type == IR_DOUBLE) {
3417				|	fmov Rd(def_reg-IR_REG_FP_FIRST), Rx(op1_reg)
3418			} else {
3419				IR_ASSERT(dst_type == IR_FLOAT);
3420				|	fmov Rs(def_reg-IR_REG_FP_FIRST), Rw(op1_reg)
3421			}
3422		} else if (IR_IS_CONST_REF(insn->op1)) {
3423			IR_ASSERT(0); //???
3424		} else {
3425			ir_reg fp;
3426			int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op1, &fp);
3427
3428			if (dst_type == IR_DOUBLE) {
3429				|	ldr Rd(def_reg), [Rx(fp), #offset]
3430			} else {
3431				IR_ASSERT(src_type == IR_FLOAT);
3432				|	ldr Rs(def_reg), [Rx(fp), #offset]
3433			}
3434		}
3435	}
3436	if (IR_REG_SPILLED(ctx->regs[def][0])) {
3437		ir_emit_store(ctx, dst_type, def, def_reg);
3438	}
3439}
3440
3441static void ir_emit_int2fp(ir_ctx *ctx, ir_ref def, ir_insn *insn)
3442{
3443	ir_type dst_type = insn->type;
3444	ir_type src_type = ctx->ir_base[insn->op1].type;
3445	ir_backend_data *data = ctx->data;
3446	dasm_State **Dst = &data->dasm_state;
3447	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
3448	ir_reg op1_reg = ctx->regs[def][1];
3449
3450	IR_ASSERT(IR_IS_TYPE_INT(src_type));
3451	IR_ASSERT(IR_IS_TYPE_FP(dst_type));
3452	IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE);
3453	if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(insn->op1)) {
3454		op1_reg = IR_REG_NUM(op1_reg);
3455		ir_emit_load(ctx, src_type, op1_reg, insn->op1);
3456	}
3457
3458	if (ir_type_size[src_type] == 8) {
3459		if (IR_IS_TYPE_SIGNED(src_type)) {
3460			if (dst_type == IR_DOUBLE) {
3461				|	scvtf Rd(def_reg-IR_REG_FP_FIRST), Rx(op1_reg)
3462			} else {
3463				IR_ASSERT(dst_type == IR_FLOAT);
3464				|	scvtf Rs(def_reg-IR_REG_FP_FIRST), Rx(op1_reg)
3465			}
3466		} else {
3467			if (dst_type == IR_DOUBLE) {
3468				|	ucvtf Rd(def_reg-IR_REG_FP_FIRST), Rx(op1_reg)
3469			} else {
3470				IR_ASSERT(dst_type == IR_FLOAT);
3471				|	ucvtf Rs(def_reg-IR_REG_FP_FIRST), Rx(op1_reg)
3472			}
3473		}
3474	} else {
3475		if (IR_IS_TYPE_SIGNED(src_type)) {
3476			if (ir_type_size[src_type] == 2) {
3477				ir_emit_fix_type(ctx, IR_I16, op1_reg);
3478			} else if (ir_type_size[src_type] == 1) {
3479				ir_emit_fix_type(ctx, IR_I8, op1_reg);
3480			}
3481			if (dst_type == IR_DOUBLE) {
3482				|	scvtf Rd(def_reg-IR_REG_FP_FIRST), Rw(op1_reg)
3483			} else {
3484				IR_ASSERT(dst_type == IR_FLOAT);
3485				|	scvtf Rs(def_reg-IR_REG_FP_FIRST), Rw(op1_reg)
3486			}
3487		} else {
3488			if (ir_type_size[src_type] == 2) {
3489				ir_emit_fix_type(ctx, IR_U16, op1_reg);
3490			} else if (ir_type_size[src_type] == 1) {
3491				ir_emit_fix_type(ctx, IR_U8, op1_reg);
3492			}
3493			if (dst_type == IR_DOUBLE) {
3494				|	ucvtf Rd(def_reg-IR_REG_FP_FIRST), Rw(op1_reg)
3495			} else {
3496				IR_ASSERT(dst_type == IR_FLOAT);
3497				|	ucvtf Rs(def_reg-IR_REG_FP_FIRST), Rw(op1_reg)
3498			}
3499		}
3500	}
3501	if (IR_REG_SPILLED(ctx->regs[def][0])) {
3502		ir_emit_store(ctx, dst_type, def, def_reg);
3503	}
3504}
3505
3506static void ir_emit_fp2int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
3507{
3508	ir_type dst_type = insn->type;
3509	ir_type src_type = ctx->ir_base[insn->op1].type;
3510	ir_backend_data *data = ctx->data;
3511	dasm_State **Dst = &data->dasm_state;
3512	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
3513	ir_reg op1_reg = ctx->regs[def][1];
3514
3515	IR_ASSERT(IR_IS_TYPE_FP(src_type));
3516	IR_ASSERT(IR_IS_TYPE_INT(dst_type));
3517	IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE);
3518	if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(insn->op1)) {
3519		op1_reg = IR_REG_NUM(op1_reg);
3520		ir_emit_load(ctx, src_type, op1_reg, insn->op1);
3521	}
3522	if (ir_type_size[dst_type] == 8) {
3523		if (IR_IS_TYPE_SIGNED(dst_type)) {
3524			if (src_type == IR_DOUBLE) {
3525				|	fcvtzs Rx(def_reg), Rd(op1_reg-IR_REG_FP_FIRST)
3526			} else {
3527				IR_ASSERT(src_type == IR_FLOAT);
3528				|	fcvtzs Rx(def_reg), Rs(op1_reg-IR_REG_FP_FIRST)
3529			}
3530		} else {
3531			if (src_type == IR_DOUBLE) {
3532				|	fcvtzu Rx(def_reg), Rd(op1_reg-IR_REG_FP_FIRST)
3533			} else {
3534				IR_ASSERT(src_type == IR_FLOAT);
3535				|	fcvtzu Rx(def_reg), Rs(op1_reg-IR_REG_FP_FIRST)
3536			}
3537		}
3538	} else {
3539		if (IR_IS_TYPE_SIGNED(dst_type)) {
3540			if (src_type == IR_DOUBLE) {
3541				|	fcvtzs Rw(def_reg), Rd(op1_reg-IR_REG_FP_FIRST)
3542			} else {
3543				IR_ASSERT(src_type == IR_FLOAT);
3544				|	fcvtzs Rw(def_reg), Rs(op1_reg-IR_REG_FP_FIRST)
3545			}
3546		} else {
3547			if (src_type == IR_DOUBLE) {
3548				|	fcvtzu Rw(def_reg), Rd(op1_reg-IR_REG_FP_FIRST)
3549			} else {
3550				IR_ASSERT(src_type == IR_FLOAT);
3551				|	fcvtzu Rw(def_reg), Rs(op1_reg-IR_REG_FP_FIRST)
3552			}
3553		}
3554	}
3555	if (IR_REG_SPILLED(ctx->regs[def][0])) {
3556		ir_emit_store(ctx, dst_type, def, def_reg);
3557	}
3558}
3559
3560static void ir_emit_fp2fp(ir_ctx *ctx, ir_ref def, ir_insn *insn)
3561{
3562	ir_type dst_type = insn->type;
3563	ir_type src_type = ctx->ir_base[insn->op1].type;
3564	ir_backend_data *data = ctx->data;
3565	dasm_State **Dst = &data->dasm_state;
3566	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
3567	ir_reg op1_reg = ctx->regs[def][1];
3568
3569	IR_ASSERT(IR_IS_TYPE_FP(src_type));
3570	IR_ASSERT(IR_IS_TYPE_FP(dst_type));
3571	IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE);
3572	if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(insn->op1)) {
3573		op1_reg = IR_REG_NUM(op1_reg);
3574		ir_emit_load(ctx, src_type, op1_reg, insn->op1);
3575	}
3576	if (src_type == dst_type) {
3577		if (op1_reg != def_reg) {
3578			ir_emit_fp_mov(ctx, dst_type, def_reg, op1_reg);
3579		}
3580	} else if (src_type == IR_DOUBLE) {
3581		|	fcvt Rs(def_reg-IR_REG_FP_FIRST), Rd(op1_reg-IR_REG_FP_FIRST)
3582	} else {
3583		IR_ASSERT(src_type == IR_FLOAT);
3584		|	fcvt Rd(def_reg-IR_REG_FP_FIRST), Rs(op1_reg-IR_REG_FP_FIRST)
3585	}
3586	if (IR_REG_SPILLED(ctx->regs[def][0])) {
3587		ir_emit_store(ctx, dst_type, def, def_reg);
3588	}
3589}
3590
3591static void ir_emit_copy_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
3592{
3593	ir_ref type = insn->type;
3594	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
3595	ir_reg op1_reg = ctx->regs[def][1];
3596
3597	IR_ASSERT(def_reg != IR_REG_NONE || op1_reg != IR_REG_NONE);
3598	if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) {
3599		op1_reg = IR_REG_NUM(op1_reg);
3600		ir_emit_load(ctx, type, op1_reg, insn->op1);
3601	}
3602	if (def_reg == op1_reg) {
3603		/* same reg */
3604	} else if (def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE) {
3605		ir_emit_mov(ctx, type, def_reg, op1_reg);
3606	} else if (def_reg != IR_REG_NONE) {
3607		ir_emit_load(ctx, type, def_reg, insn->op1);
3608	} else if (op1_reg != IR_REG_NONE) {
3609		ir_emit_store(ctx, type, def, op1_reg);
3610	} else {
3611		IR_ASSERT(0);
3612	}
3613	if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) {
3614		ir_emit_store(ctx, type, def, def_reg);
3615	}
3616}
3617
3618static void ir_emit_copy_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn)
3619{
3620	ir_type type = insn->type;
3621	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
3622	ir_reg op1_reg = ctx->regs[def][1];
3623
3624	IR_ASSERT(def_reg != IR_REG_NONE || op1_reg != IR_REG_NONE);
3625	if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) {
3626		op1_reg = IR_REG_NUM(op1_reg);
3627		ir_emit_load(ctx, type, op1_reg, insn->op1);
3628	}
3629	if (def_reg == op1_reg) {
3630		/* same reg */
3631	} else if (def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE) {
3632		ir_emit_fp_mov(ctx, type, def_reg, op1_reg);
3633	} else if (def_reg != IR_REG_NONE) {
3634		ir_emit_load(ctx, type, def_reg, insn->op1);
3635	} else if (op1_reg != IR_REG_NONE) {
3636		ir_emit_store(ctx, type, def, op1_reg);
3637	} else {
3638		IR_ASSERT(0);
3639	}
3640	if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) {
3641		ir_emit_store(ctx, type, def, def_reg);
3642	}
3643}
3644
3645static void ir_emit_vaddr(ir_ctx *ctx, ir_ref def, ir_insn *insn)
3646{
3647	ir_backend_data *data = ctx->data;
3648	dasm_State **Dst = &data->dasm_state;
3649	ir_ref type = insn->type;
3650	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
3651	int32_t offset;
3652	ir_reg fp;
3653
3654	IR_ASSERT(def_reg != IR_REG_NONE);
3655	offset = ir_var_spill_slot(ctx, insn->op1, &fp);
3656	|	add Rx(def_reg), Rx(fp), #offset
3657	if (IR_REG_SPILLED(ctx->regs[def][0])) {
3658		ir_emit_store(ctx, type, def, def_reg);
3659	}
3660}
3661
3662static void ir_emit_vload(ir_ctx *ctx, ir_ref def, ir_insn *insn)
3663{
3664	ir_insn *var_insn = &ctx->ir_base[insn->op2];
3665	ir_ref type = insn->type;
3666	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
3667	ir_reg fp;
3668	int32_t offset;
3669	ir_mem mem;
3670
3671	IR_ASSERT(var_insn->op == IR_VAR);
3672	fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
3673	offset = IR_SPILL_POS_TO_OFFSET(var_insn->op3);
3674	mem = IR_MEM_BO(fp, offset);
3675	if (def_reg == IR_REG_NONE && ir_is_same_mem_var(ctx, def, var_insn->op3)) {
3676		return; // fake load
3677	}
3678	IR_ASSERT(def_reg != IR_REG_NONE);
3679	ir_emit_load_mem(ctx, type, def_reg, mem);
3680	if (IR_REG_SPILLED(ctx->regs[def][0])) {
3681		ir_emit_store(ctx, type, def, def_reg);
3682	}
3683}
3684
3685static void ir_emit_vstore(ir_ctx *ctx, ir_ref ref, ir_insn *insn)
3686{
3687	ir_insn *var_insn = &ctx->ir_base[insn->op2];
3688	ir_insn *val_insn = &ctx->ir_base[insn->op3];
3689	ir_ref type = val_insn->type;
3690	ir_reg op3_reg = ctx->regs[ref][3];
3691	ir_reg fp;
3692	int32_t offset;
3693	ir_mem mem;
3694
3695	IR_ASSERT(var_insn->op == IR_VAR);
3696	fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
3697	offset = IR_SPILL_POS_TO_OFFSET(var_insn->op3);
3698	IR_ASSERT(op3_reg != IR_REG_NONE);
3699	if (IR_REG_SPILLED(op3_reg) && ir_is_same_mem_var(ctx, insn->op3, var_insn->op3)) {
3700		return; // fake store
3701	}
3702	if (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(insn->op3)) {
3703		op3_reg = IR_REG_NUM(op3_reg);
3704		ir_emit_load(ctx, type, op3_reg, insn->op3);
3705	}
3706	mem = IR_MEM_BO(fp, offset);
3707	ir_emit_store_mem(ctx, type, mem, op3_reg);
3708}
3709
3710static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref)
3711{
3712	ir_insn *addr_insn = &ctx->ir_base[ref];
3713	ir_reg reg;
3714
3715	IR_ASSERT(addr_insn->op == IR_ADD);
3716	IR_ASSERT(!IR_IS_CONST_REF(addr_insn->op1) && IR_IS_CONST_REF(addr_insn->op2));
3717	IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[addr_insn->op2].op));
3718	if (UNEXPECTED(ctx->rules[ref] & IR_FUSED_REG)) {
3719		reg = ir_get_fused_reg(ctx, root, ref * sizeof(ir_ref) + 1);
3720	} else {
3721		reg = ctx->regs[ref][1];
3722	}
3723	if (IR_REG_SPILLED(reg)) {
3724		reg = IR_REG_NUM(reg);
3725		ir_emit_load(ctx, IR_ADDR, reg, addr_insn->op1);
3726	}
3727	return IR_MEM_BO(reg, ctx->ir_base[addr_insn->op2].val.i32);
3728}
3729
3730static void ir_emit_load_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
3731{
3732	ir_ref type = insn->type;
3733	ir_reg op2_reg = ctx->regs[def][2];
3734	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
3735
3736	if (ctx->use_lists[def].count == 1) {
3737		/* dead load */
3738		return;
3739	}
3740	IR_ASSERT(def_reg != IR_REG_NONE);
3741	if (!IR_IS_CONST_REF(insn->op2) && (ir_rule(ctx, insn->op2) & IR_FUSED)) {
3742		ir_mem mem = ir_fuse_addr(ctx, def, insn->op2);
3743
3744		if (IR_REG_SPILLED(ctx->regs[def][0]) && ir_is_same_spill_slot(ctx, def, mem)) {
3745			if (!ir_may_avoid_spill_load(ctx, def, def)) {
3746				ir_emit_load_mem_int(ctx, type, def_reg, mem);
3747			}
3748			/* avoid load to the same location (valid only when register is not reused) */
3749			return;
3750		}
3751		ir_emit_load_mem_int(ctx, type, def_reg, mem);
3752	} else {
3753		if (op2_reg == IR_REG_NONE) {
3754			op2_reg = def_reg;
3755		}
3756		if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(insn->op2)) {
3757			op2_reg = IR_REG_NUM(op2_reg);
3758			IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR);
3759			ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
3760		}
3761		ir_emit_load_mem_int(ctx, type, def_reg, IR_MEM_B(op2_reg));
3762	}
3763	if (IR_REG_SPILLED(ctx->regs[def][0])) {
3764		ir_emit_store(ctx, type, def, def_reg);
3765	}
3766}
3767
3768static void ir_emit_load_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn)
3769{
3770	ir_ref type = insn->type;
3771	ir_reg op2_reg = ctx->regs[def][2];
3772	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
3773
3774	if (ctx->use_lists[def].count == 1) {
3775		/* dead load */
3776		return;
3777	}
3778	IR_ASSERT(def_reg != IR_REG_NONE);
3779	if (!IR_IS_CONST_REF(insn->op2) && (ir_rule(ctx, insn->op2) & IR_FUSED)) {
3780		ir_mem mem = ir_fuse_addr(ctx, def, insn->op2);
3781
3782		if (IR_REG_SPILLED(ctx->regs[def][0]) && ir_is_same_spill_slot(ctx, def, mem)) {
3783			if (!ir_may_avoid_spill_load(ctx, def, def)) {
3784				ir_emit_load_mem_fp(ctx, type, def_reg, mem);
3785			}
3786			/* avoid load to the same location (valid only when register is not reused) */
3787			return;
3788		}
3789		ir_emit_load_mem_fp(ctx, type, def_reg, mem);
3790	} else {
3791		if (op2_reg != IR_REG_NONE && (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(insn->op2))) {
3792			op2_reg = IR_REG_NUM(op2_reg);
3793			ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
3794		}
3795		if (op2_reg == IR_REG_NONE) {
3796			op2_reg = def_reg;
3797			IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR);
3798			ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
3799		}
3800		ir_emit_load_mem_fp(ctx, type, def_reg, IR_MEM_B(op2_reg));
3801	}
3802	if (IR_REG_SPILLED(ctx->regs[def][0])) {
3803		ir_emit_store(ctx, type, def, def_reg);
3804	}
3805}
3806
3807static void ir_emit_store_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn)
3808{
3809	ir_insn *val_insn = &ctx->ir_base[insn->op3];
3810	ir_ref type = val_insn->type;
3811	ir_reg op2_reg = ctx->regs[ref][2];
3812	ir_reg op3_reg = ctx->regs[ref][3];
3813
3814	if (!IR_IS_CONST_REF(insn->op2) && (ir_rule(ctx, insn->op2) & IR_FUSED)) {
3815		ir_mem mem = ir_fuse_addr(ctx, ref, insn->op2);
3816
3817		if (!IR_IS_CONST_REF(insn->op3) && IR_REG_SPILLED(op3_reg) && ir_is_same_spill_slot(ctx, insn->op3, mem)) {
3818			if (!ir_may_avoid_spill_load(ctx, insn->op3, ref)) {
3819				op3_reg = IR_REG_NUM(op3_reg);
3820				ir_emit_load(ctx, type, op3_reg, insn->op3);
3821			}
3822			/* avoid store to the same location */
3823			return;
3824		}
3825		if (op3_reg == IR_REG_NONE) {
3826			IR_ASSERT(IR_IS_CONST_REF(insn->op3) && !IR_IS_SYM_CONST(ctx->ir_base[insn->op3].op) && ctx->ir_base[insn->op3].val.i64 == 0);
3827			op3_reg = IR_REG_ZR;
3828		} else if (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(insn->op3)) {
3829			op3_reg = IR_REG_NUM(op3_reg);
3830			ir_emit_load(ctx, type, op3_reg, insn->op3);
3831		}
3832		ir_emit_store_mem_int(ctx, type, mem, op3_reg);
3833	} else {
3834		IR_ASSERT(op2_reg != IR_REG_NONE);
3835		if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(insn->op2)) {
3836			op2_reg = IR_REG_NUM(op2_reg);
3837			IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR);
3838			ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
3839		}
3840		if (op3_reg == IR_REG_NONE) {
3841			IR_ASSERT(IR_IS_CONST_REF(insn->op3) && !IR_IS_SYM_CONST(ctx->ir_base[insn->op3].op) && ctx->ir_base[insn->op3].val.i64 == 0);
3842			op3_reg = IR_REG_ZR;
3843		} else if (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(insn->op3)) {
3844			op3_reg = IR_REG_NUM(op3_reg);
3845			ir_emit_load(ctx, type, op3_reg, insn->op3);
3846		}
3847		ir_emit_store_mem_int(ctx, type, IR_MEM_B(op2_reg), op3_reg);
3848	}
3849}
3850
3851static void ir_emit_store_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn)
3852{
3853	ir_ref type = ctx->ir_base[insn->op3].type;
3854	ir_reg op2_reg = ctx->regs[ref][2];
3855	ir_reg op3_reg = ctx->regs[ref][3];
3856
3857	IR_ASSERT(op3_reg != IR_REG_NONE);
3858	if (!IR_IS_CONST_REF(insn->op2) && (ir_rule(ctx, insn->op2) & IR_FUSED)) {
3859		ir_mem mem = ir_fuse_addr(ctx, ref, insn->op2);
3860
3861		if (!IR_IS_CONST_REF(insn->op3) && IR_REG_SPILLED(op3_reg) && ir_is_same_spill_slot(ctx, insn->op3, mem)) {
3862			if (!ir_may_avoid_spill_load(ctx, insn->op3, ref)) {
3863				op3_reg = IR_REG_NUM(op3_reg);
3864				ir_emit_load(ctx, type, op3_reg, insn->op3);
3865			}
3866			/* avoid store to the same location */
3867			return;
3868		}
3869		if (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(insn->op3)) {
3870			op3_reg = IR_REG_NUM(op3_reg);
3871			ir_emit_load(ctx, type, op3_reg, insn->op3);
3872		}
3873		ir_emit_store_mem_fp(ctx, type, mem, op3_reg);
3874	} else {
3875		if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(insn->op2)) {
3876			op2_reg = IR_REG_NUM(op2_reg);
3877			IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR);
3878			ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
3879		}
3880		if (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(insn->op3)) {
3881			op3_reg = IR_REG_NUM(op3_reg);
3882			ir_emit_load(ctx, type, op3_reg, insn->op3);
3883		}
3884		ir_emit_store_mem_fp(ctx, type, IR_MEM_B(op2_reg), op3_reg);
3885	}
3886}
3887
3888static void ir_emit_rload(ir_ctx *ctx, ir_ref def, ir_insn *insn)
3889{
3890	ir_reg src_reg = insn->op2;
3891	ir_type type = insn->type;
3892
3893	if (IR_REGSET_IN(IR_REGSET_UNION((ir_regset)ctx->fixed_regset, IR_REGSET_FIXED), src_reg)) {
3894		if (ctx->vregs[def]
3895		 && ctx->live_intervals[ctx->vregs[def]]
3896		 && ctx->live_intervals[ctx->vregs[def]]->stack_spill_pos != -1) {
3897			ir_emit_store(ctx, type, def, src_reg);
3898		}
3899	} else {
3900		ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
3901
3902		if (def_reg == IR_REG_NONE) {
3903			/* op3 is used as a flag that the value is already stored in memory.
3904			 * If op3 is set we don't have to store the value once again (in case of spilling)
3905			 */
3906			if (!insn->op3 || !ir_is_same_spill_slot(ctx, def, IR_MEM_BO(ctx->spill_base, insn->op3))) {
3907				ir_emit_store(ctx, type, def, src_reg);
3908			}
3909		} else {
3910			if (src_reg != def_reg) {
3911				if (IR_IS_TYPE_INT(type)) {
3912					ir_emit_mov(ctx, type, def_reg, src_reg);
3913				} else {
3914					IR_ASSERT(IR_IS_TYPE_FP(type));
3915					ir_emit_fp_mov(ctx, type, def_reg, src_reg);
3916				}
3917			}
3918			if (IR_REG_SPILLED(ctx->regs[def][0])
3919			 && (!insn->op3 || !ir_is_same_spill_slot(ctx, def, IR_MEM_BO(ctx->spill_base, insn->op3)))) {
3920				ir_emit_store(ctx, type, def, def_reg);
3921			}
3922		}
3923	}
3924}
3925
3926static void ir_emit_rstore(ir_ctx *ctx, ir_ref ref, ir_insn *insn)
3927{
3928	ir_ref type = ctx->ir_base[insn->op2].type;
3929	ir_reg op2_reg = ctx->regs[ref][2];
3930	ir_reg dst_reg = insn->op3;
3931
3932	if (op2_reg != IR_REG_NONE) {
3933		if (IR_REG_SPILLED(op2_reg)) {
3934			op2_reg = IR_REG_NUM(op2_reg);
3935			ir_emit_load(ctx, type, op2_reg, insn->op2);
3936		}
3937		if (op2_reg != dst_reg) {
3938			if (IR_IS_TYPE_INT(type)) {
3939				ir_emit_mov(ctx, type, dst_reg, op2_reg);
3940			} else {
3941				IR_ASSERT(IR_IS_TYPE_FP(type));
3942				ir_emit_fp_mov(ctx, type, dst_reg, op2_reg);
3943			}
3944		}
3945	} else {
3946		ir_emit_load(ctx, type, dst_reg, insn->op2);
3947	}
3948}
3949
3950static void ir_emit_alloca(ir_ctx *ctx, ir_ref def, ir_insn *insn)
3951{
3952	ir_backend_data *data = ctx->data;
3953	dasm_State **Dst = &data->dasm_state;
3954	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
3955
3956	if (IR_IS_CONST_REF(insn->op2)) {
3957		ir_insn *val = &ctx->ir_base[insn->op2];
3958		int32_t size = val->val.i32;
3959
3960		IR_ASSERT(IR_IS_TYPE_INT(val->type));
3961		IR_ASSERT(!IR_IS_SYM_CONST(val->op));
3962		IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 >= 0);
3963
3964		if (ctx->flags2 & IR_HAS_CALLS) {
3965			/* Stack must be 16 byte aligned */
3966			size = IR_ALIGNED_SIZE(size, 16);
3967		} else {
3968			size = IR_ALIGNED_SIZE(size, 8);
3969		}
3970		if (aarch64_may_encode_imm12(size)) {
3971			|	sub sp, sp, #size
3972		} else {
3973			ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, size);
3974			|	sub sp, sp, Rx(IR_REG_INT_TMP)
3975		}
3976		if (!(ctx->flags & IR_USE_FRAME_POINTER)) {
3977			ctx->call_stack_size += size;
3978		}
3979	} else {
3980		int32_t alignment = (ctx->flags2 & IR_HAS_CALLS) ? 16 : 8;
3981		ir_reg op2_reg = ctx->regs[def][2];
3982		ir_type type = ctx->ir_base[insn->op2].type;
3983
3984		IR_ASSERT(ctx->flags & IR_FUNCTION);
3985		IR_ASSERT(ctx->flags & IR_USE_FRAME_POINTER);
3986		IR_ASSERT(def_reg != IR_REG_NONE && op2_reg != IR_REG_NONE);
3987		if (IR_REG_SPILLED(op2_reg)) {
3988			op2_reg = IR_REG_NUM(op2_reg);
3989			ir_emit_load(ctx, type, op2_reg, insn->op2);
3990		}
3991		|	add Rx(def_reg), Rx(op2_reg), #(alignment-1)
3992		|	and Rx(def_reg), Rx(def_reg), #(~(alignment-1))
3993		|	sub sp, sp, Rx(def_reg);
3994	}
3995	if (def_reg != IR_REG_NONE) {
3996		|	mov Rx(def_reg), sp
3997		if (IR_REG_SPILLED(ctx->regs[def][0])) {
3998			ir_emit_store(ctx, insn->type, def, def_reg);
3999		}
4000	} else {
4001		ir_emit_store(ctx, IR_ADDR, def, IR_REG_STACK_POINTER);
4002	}
4003}
4004
4005static void ir_emit_afree(ir_ctx *ctx, ir_ref def, ir_insn *insn)
4006{
4007	ir_backend_data *data = ctx->data;
4008	dasm_State **Dst = &data->dasm_state;
4009
4010	if (IR_IS_CONST_REF(insn->op2)) {
4011		ir_insn *val = &ctx->ir_base[insn->op2];
4012		int32_t size = val->val.i32;
4013
4014		IR_ASSERT(IR_IS_TYPE_INT(val->type));
4015		IR_ASSERT(!IR_IS_SYM_CONST(val->op));
4016		IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 > 0);
4017
4018		if (ctx->flags2 & IR_HAS_CALLS) {
4019			/* Stack must be 16 byte aligned */
4020			size = IR_ALIGNED_SIZE(size, 16);
4021		} else {
4022			size = IR_ALIGNED_SIZE(size, 8);
4023		}
4024		|	add sp, sp, #size
4025		if (!(ctx->flags & IR_USE_FRAME_POINTER)) {
4026			ctx->call_stack_size -= size;
4027		}
4028	} else {
4029//		int32_t alignment = (ctx->flags2 & IR_HAS_CALLS) ? 16 : 8;
4030		ir_reg op2_reg = ctx->regs[def][2];
4031		ir_type type = ctx->ir_base[insn->op2].type;
4032
4033		IR_ASSERT(ctx->flags & IR_FUNCTION);
4034		IR_ASSERT(op2_reg != IR_REG_NONE);
4035		if (IR_REG_SPILLED(op2_reg)) {
4036			op2_reg = IR_REG_NUM(op2_reg);
4037			ir_emit_load(ctx, type, op2_reg, insn->op2);
4038		}
4039
4040		// TODO: alignment
4041
4042		|	add sp, sp, Rx(op2_reg);
4043	}
4044}
4045
4046static void ir_emit_frame_addr(ir_ctx *ctx, ir_ref def)
4047{
4048	ir_backend_data *data = ctx->data;
4049	dasm_State **Dst = &data->dasm_state;
4050	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
4051
4052	if (ctx->flags & IR_USE_FRAME_POINTER) {
4053		|	mov Rx(def_reg), Rx(IR_REG_X29)
4054	} else {
4055		|	add Rx(def_reg), Rx(IR_REG_X31), #(ctx->stack_frame_size + ctx->call_stack_size)
4056	}
4057	if (IR_REG_SPILLED(ctx->regs[def][0])) {
4058		ir_emit_store(ctx, IR_ADDR, def, def_reg);
4059	}
4060}
4061
4062static void ir_emit_va_start(ir_ctx *ctx, ir_ref def, ir_insn *insn)
4063{
4064#ifdef __APPLE__
4065	ir_backend_data *data = ctx->data;
4066	dasm_State **Dst = &data->dasm_state;
4067	ir_reg fp;
4068	int arg_area_offset;
4069	ir_reg op2_reg = ctx->regs[def][2];
4070	ir_reg tmp_reg = ctx->regs[def][3];
4071
4072	IR_ASSERT(tmp_reg != IR_REG_NONE);
4073	if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) {
4074		op2_reg = IR_REG_NUM(op2_reg);
4075		ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
4076	}
4077
4078	if (ctx->flags & IR_USE_FRAME_POINTER) {
4079		fp = IR_REG_FRAME_POINTER;
4080		arg_area_offset = ctx->stack_frame_size + sizeof(void*) * 2 + ctx->param_stack_size;
4081	} else {
4082		fp = IR_REG_STACK_POINTER;
4083		arg_area_offset = ctx->call_stack_size + ctx->stack_frame_size + ctx->param_stack_size;
4084	}
4085	|	add Rx(tmp_reg), Rx(fp), #arg_area_offset
4086	if (op2_reg != IR_REG_NONE) {
4087		|	str Rx(tmp_reg), [Rx(op2_reg)]
4088	} else {
4089		int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op2, &op2_reg);
4090
4091		|	str Rx(tmp_reg), [Rx(op2_reg), #offset]
4092	}
4093#else
4094	ir_backend_data *data = ctx->data;
4095	dasm_State **Dst = &data->dasm_state;
4096	ir_reg fp;
4097	int reg_save_area_offset;
4098	int overflow_arg_area_offset;
4099	ir_reg op2_reg = ctx->regs[def][2];
4100	ir_reg tmp_reg = ctx->regs[def][3];
4101
4102	IR_ASSERT(op2_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE);
4103	if (IR_REG_SPILLED(op2_reg)) {
4104		op2_reg = IR_REG_NUM(op2_reg);
4105		ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
4106	}
4107
4108	if (ctx->flags & IR_USE_FRAME_POINTER) {
4109		fp = IR_REG_FRAME_POINTER;
4110		reg_save_area_offset = ctx->locals_area_size + sizeof(void*) * 2;
4111		overflow_arg_area_offset = ctx->stack_frame_size + sizeof(void*) * 2 + ctx->param_stack_size;
4112	} else {
4113		fp = IR_REG_STACK_POINTER;
4114		reg_save_area_offset = ctx->locals_area_size + ctx->call_stack_size;
4115		overflow_arg_area_offset = ctx->call_stack_size + ctx->stack_frame_size + ctx->param_stack_size;
4116	}
4117
4118	/* Set va_list.stack */
4119	|	add Rx(tmp_reg), Rx(fp), #overflow_arg_area_offset
4120	|	str Rx(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, stack)]
4121	if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) {
4122		reg_save_area_offset += sizeof(void*) * IR_REG_INT_ARGS;
4123		/* Set va_list.gr_top */
4124		if (overflow_arg_area_offset != reg_save_area_offset) {
4125			|	add Rx(tmp_reg), Rx(fp), #reg_save_area_offset
4126		}
4127		|	str Rx(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, gr_top)]
4128		/* Set va_list.gr_offset */
4129		|	movn Rw(tmp_reg), #~(0 - (sizeof(void*) * (IR_REG_INT_ARGS - ctx->gp_reg_params)))
4130		|	str Rw(tmp_reg),  [Rx(op2_reg), #offsetof(ir_va_list, gr_offset)]
4131	} else {
4132		/* Set va_list.gr_offset */
4133		|	str wzr,  [Rx(op2_reg), #offsetof(ir_va_list, gr_offset)]
4134	}
4135	if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) {
4136		reg_save_area_offset += 16 * IR_REG_FP_ARGS;
4137		/* Set va_list.vr_top */
4138		if (overflow_arg_area_offset != reg_save_area_offset) {
4139			|	add Rx(tmp_reg), Rx(fp), #reg_save_area_offset
4140		}
4141		|	str Rx(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, vr_top)]
4142		/* Set va_list.vr_offset */
4143		|	movn Rw(tmp_reg), #~(0 - (16 * (IR_REG_FP_ARGS - ctx->fp_reg_params)))
4144		|	str Rw(tmp_reg),  [Rx(op2_reg), #offsetof(ir_va_list, vr_offset)]
4145	} else {
4146		/* Set va_list.vr_offset */
4147		|	str wzr,  [Rx(op2_reg), #offsetof(ir_va_list, vr_offset)]
4148	}
4149#endif
4150}
4151
4152static void ir_emit_va_copy(ir_ctx *ctx, ir_ref def, ir_insn *insn)
4153{
4154	IR_ASSERT(0 && "NIY va_copy");
4155}
4156
4157static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn)
4158{
4159#ifdef __APPLE__
4160	ir_backend_data *data = ctx->data;
4161	dasm_State **Dst = &data->dasm_state;
4162	ir_type type = insn->type;
4163	ir_reg def_reg = ctx->regs[def][0];
4164	ir_reg op2_reg = ctx->regs[def][2];
4165	ir_reg tmp_reg = ctx->regs[def][3];
4166
4167	IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE);
4168	if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) {
4169		op2_reg = IR_REG_NUM(op2_reg);
4170		ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
4171	}
4172	|	ldr Rx(tmp_reg), [Rx(op2_reg)]
4173	ir_emit_load_mem(ctx, type, def_reg, IR_MEM_BO(tmp_reg, 0));
4174	|	add Rx(tmp_reg), Rx(tmp_reg), #IR_MAX(ir_type_size[type], sizeof(void*))
4175	if (op2_reg != IR_REG_NONE) {
4176		|	str Rx(tmp_reg), [Rx(op2_reg)]
4177	} else {
4178		int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op2, &op2_reg);
4179
4180		|	str Rx(tmp_reg), [Rx(op2_reg), #offset]
4181	}
4182	if (IR_REG_SPILLED(ctx->regs[def][0])) {
4183		ir_emit_store(ctx, type, def, def_reg);
4184	}
4185#else
4186	ir_backend_data *data = ctx->data;
4187	dasm_State **Dst = &data->dasm_state;
4188	ir_type type = insn->type;
4189	ir_reg def_reg = ctx->regs[def][0];
4190	ir_reg op2_reg = ctx->regs[def][2];
4191	ir_reg tmp_reg = ctx->regs[def][3];
4192
4193	IR_ASSERT(def_reg != IR_REG_NONE && op2_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE);
4194	if (IR_REG_SPILLED(op2_reg)) {
4195		op2_reg = IR_REG_NUM(op2_reg);
4196		ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
4197	}
4198	if (IR_IS_TYPE_INT(type)) {
4199		|	ldr Rw(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, gr_offset)]
4200		|	cmp Rw(tmp_reg), wzr
4201		|	bge >1
4202		|	ldr Rx(IR_REG_INT_TMP), [Rx(op2_reg), #offsetof(ir_va_list, gr_top)]
4203		|	sxtw Rx(tmp_reg), Rw(tmp_reg)
4204		|	add Rx(IR_REG_INT_TMP), Rx(tmp_reg), Rx(IR_REG_INT_TMP)
4205		|	ldr Rx(def_reg), [Rx(IR_REG_INT_TMP)]
4206		|	add Rw(tmp_reg), Rw(tmp_reg), #sizeof(void*)
4207		|	str Rw(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, gr_offset)]
4208		|	b >2
4209		|1:
4210		|	ldr Rx(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, stack)]
4211		|	ldr Rx(def_reg), [Rx(tmp_reg)]
4212		|	add Rx(tmp_reg), Rx(tmp_reg), #sizeof(void*)
4213		|	str Rx(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, stack)]
4214		|2:
4215	} else {
4216		|	ldr Rw(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, vr_offset)]
4217		|	cmp Rw(tmp_reg), wzr
4218		|	bge >1
4219		|	ldr Rx(IR_REG_INT_TMP), [Rx(op2_reg), #offsetof(ir_va_list, vr_top)]
4220		|	sxtw Rx(tmp_reg), Rw(tmp_reg)
4221		|	add Rx(IR_REG_INT_TMP), Rx(tmp_reg), Rx(IR_REG_INT_TMP)
4222		|	ldr Rd(def_reg-IR_REG_FP_FIRST), [Rx(IR_REG_INT_TMP)]
4223		|	add Rw(tmp_reg), Rw(tmp_reg), #16
4224		|	str Rw(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, vr_offset)]
4225		|	b >2
4226		|1:
4227		|	ldr Rx(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, stack)]
4228		|	ldr Rd(def_reg-IR_REG_FP_FIRST), [Rx(tmp_reg)]
4229		|	add Rx(tmp_reg), Rx(tmp_reg), #sizeof(void*)
4230		|	str Rx(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, stack)]
4231		|2:
4232	}
4233	if (IR_REG_SPILLED(ctx->regs[def][0])) {
4234		ir_emit_store(ctx, type, def, def_reg);
4235	}
4236#endif
4237}
4238
4239static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
4240{
4241	ir_backend_data *data = ctx->data;
4242	dasm_State **Dst = &data->dasm_state;
4243	ir_type type;
4244	ir_block *bb;
4245	ir_insn *use_insn, *val;
4246	uint32_t n, *p, use_block;
4247	int i;
4248	int label, default_label = 0;
4249	int count = 0;
4250	ir_val min, max;
4251	ir_reg op1_reg, op2_reg, tmp_reg;
4252
4253	type = ctx->ir_base[insn->op2].type;
4254	if (IR_IS_TYPE_SIGNED(type)) {
4255		min.u64 = 0x7fffffffffffffff;
4256		max.u64 = 0x8000000000000000;
4257	} else {
4258		min.u64 = 0xffffffffffffffff;
4259		max.u64 = 0x0;
4260	}
4261
4262	bb = &ctx->cfg_blocks[b];
4263	p = &ctx->cfg_edges[bb->successors];
4264	for (n = bb->successors_count; n != 0; p++, n--) {
4265		use_block = *p;
4266		use_insn = &ctx->ir_base[ctx->cfg_blocks[use_block].start];
4267		if (use_insn->op == IR_CASE_VAL) {
4268			val = &ctx->ir_base[use_insn->op2];
4269			IR_ASSERT(!IR_IS_SYM_CONST(val->op));
4270			if (IR_IS_TYPE_SIGNED(type)) {
4271				IR_ASSERT(IR_IS_TYPE_SIGNED(val->type));
4272				min.i64 = IR_MIN(min.i64, val->val.i64);
4273				max.i64 = IR_MAX(max.i64, val->val.i64);
4274			} else {
4275				IR_ASSERT(!IR_IS_TYPE_SIGNED(val->type));
4276				min.u64 = (int64_t)IR_MIN(min.u64, val->val.u64);
4277				max.u64 = (int64_t)IR_MAX(max.u64, val->val.u64);
4278			}
4279			count++;
4280		} else {
4281			IR_ASSERT(use_insn->op == IR_CASE_DEFAULT);
4282			default_label = ir_skip_empty_target_blocks(ctx, use_block);
4283		}
4284	}
4285
4286	op1_reg = ctx->regs[def][1];
4287	op2_reg = ctx->regs[def][2];
4288	tmp_reg = ctx->regs[def][3];
4289
4290	IR_ASSERT(op2_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE);
4291	if (IR_REG_SPILLED(op2_reg)) {
4292		op2_reg = IR_REG_NUM(op2_reg);
4293		ir_emit_load(ctx, type, op2_reg, insn->op2);
4294	} else if (IR_IS_CONST_REF(insn->op2)) {
4295		ir_emit_load(ctx, type, op2_reg, insn->op2);
4296	}
4297
4298	/* Generate a table jmp or a sequence of calls */
4299	if ((max.i64-min.i64) < count * 8) {
4300		int *labels = ir_mem_malloc(sizeof(int) * (max.i64 - min.i64 + 1));
4301
4302		for (i = 0; i <= (max.i64 - min.i64); i++) {
4303			labels[i] = default_label;
4304		}
4305		p = &ctx->cfg_edges[bb->successors];
4306		for (n = bb->successors_count; n != 0; p++, n--) {
4307			use_block = *p;
4308			use_insn = &ctx->ir_base[ctx->cfg_blocks[use_block].start];
4309			if (use_insn->op == IR_CASE_VAL) {
4310				val = &ctx->ir_base[use_insn->op2];
4311				IR_ASSERT(!IR_IS_SYM_CONST(val->op));
4312				label = ir_skip_empty_target_blocks(ctx, use_block);
4313				labels[val->val.i64 - min.i64] = label;
4314			}
4315		}
4316
4317		if (aarch64_may_encode_imm12(max.i64)) {
4318			|	ASM_REG_IMM_OP cmp, type, op2_reg, max.i64
4319		} else {
4320			ir_emit_load_imm_int(ctx, type, tmp_reg, max.i64);
4321			|	ASM_REG_REG_OP cmp, type, op2_reg, tmp_reg
4322		}
4323		if (IR_IS_TYPE_SIGNED(type)) {
4324			|	bgt =>default_label
4325		} else {
4326			|	bhi =>default_label
4327		}
4328
4329		if (op1_reg == IR_REG_NONE) {
4330			op1_reg = op2_reg;
4331		}
4332		if (aarch64_may_encode_imm12(min.i64)) {
4333			|	ASM_REG_REG_IMM_OP subs, type, op1_reg, op2_reg, min.i64
4334		} else {
4335			ir_emit_load_imm_int(ctx, type, tmp_reg, min.i64);
4336			|	ASM_REG_REG_REG_OP subs, type, op1_reg, op2_reg, tmp_reg
4337		}
4338		if (IR_IS_TYPE_SIGNED(type)) {
4339			|	blt =>default_label
4340		} else {
4341			|	blo =>default_label
4342		}
4343		|	adr Rx(tmp_reg), >1
4344		|	ldr Rx(tmp_reg), [Rx(tmp_reg), Rx(op1_reg), lsl #3]
4345		|	br Rx(tmp_reg)
4346		|.jmp_table
4347		if (!data->jmp_table_label) {
4348			data->jmp_table_label = ctx->cfg_blocks_count + ctx->consts_count + 3;
4349			|=>data->jmp_table_label:
4350		}
4351		|.align 8
4352		|1:
4353		for (i = 0; i <= (max.i64 - min.i64); i++) {
4354			int b = labels[i];
4355			ir_block *bb = &ctx->cfg_blocks[b];
4356			ir_insn *insn = &ctx->ir_base[bb->end];
4357
4358			if (insn->op == IR_IJMP && IR_IS_CONST_REF(insn->op2)) {
4359				ir_ref prev = ctx->prev_ref[bb->end];
4360				if (prev != bb->start && ctx->ir_base[prev].op == IR_SNAPSHOT) {
4361					prev = ctx->prev_ref[prev];
4362				}
4363				if (prev == bb->start) {
4364					void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]);
4365
4366					|	.addr &addr
4367					if (ctx->ir_base[bb->start].op != IR_CASE_DEFAULT) {
4368						bb->flags |= IR_BB_EMPTY;
4369					}
4370					continue;
4371				}
4372			}
4373			|	.addr =>b
4374		}
4375		|.code
4376		ir_mem_free(labels);
4377	} else {
4378		p = &ctx->cfg_edges[bb->successors];
4379		for (n = bb->successors_count; n != 0; p++, n--) {
4380			use_block = *p;
4381			use_insn = &ctx->ir_base[ctx->cfg_blocks[use_block].start];
4382			if (use_insn->op == IR_CASE_VAL) {
4383				val = &ctx->ir_base[use_insn->op2];
4384				IR_ASSERT(!IR_IS_SYM_CONST(val->op));
4385				label = ir_skip_empty_target_blocks(ctx, use_block);
4386				if (aarch64_may_encode_imm12(val->val.i64)) {
4387					|	ASM_REG_IMM_OP cmp, type, op2_reg, val->val.i64
4388				} else {
4389					ir_emit_load_imm_int(ctx, type, tmp_reg, val->val.i64);
4390					|	ASM_REG_REG_OP cmp, type, op2_reg, tmp_reg
4391
4392				}
4393				|	beq =>label
4394			}
4395		}
4396		if (default_label) {
4397			|	b =>default_label
4398		}
4399	}
4400}
4401
4402static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn)
4403{
4404	int j, n;
4405	ir_type type;
4406	int int_param = 0;
4407	int fp_param = 0;
4408	int int_reg_params_count = IR_REG_INT_ARGS;
4409	int fp_reg_params_count = IR_REG_FP_ARGS;
4410	int32_t used_stack = 0;
4411#ifdef __APPLE__
4412	const ir_proto_t *proto = ir_call_proto(ctx, insn);
4413	int last_named_input = (proto && (proto->flags & IR_VARARG_FUNC)) ? proto->params_count + 2 : insn->inputs_count;
4414#endif
4415
4416	n = insn->inputs_count;
4417	for (j = 3; j <= n; j++) {
4418		type = ctx->ir_base[ir_insn_op(insn, j)].type;
4419#ifdef __APPLE__
4420		if (j > last_named_input) {
4421			used_stack += IR_MAX(sizeof(void*), ir_type_size[type]);
4422		} else
4423#endif
4424		if (IR_IS_TYPE_INT(type)) {
4425			if (int_param >= int_reg_params_count) {
4426				used_stack += IR_MAX(sizeof(void*), ir_type_size[type]);
4427			}
4428			int_param++;
4429		} else {
4430			IR_ASSERT(IR_IS_TYPE_FP(type));
4431			if (fp_param >= fp_reg_params_count) {
4432				used_stack += IR_MAX(sizeof(void*), ir_type_size[type]);
4433			}
4434			fp_param++;
4435		}
4436	}
4437
4438	return used_stack;
4439}
4440
4441static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg tmp_reg)
4442{
4443	ir_backend_data *data = ctx->data;
4444	dasm_State **Dst = &data->dasm_state;
4445	int j, n;
4446	ir_ref arg;
4447	ir_insn *arg_insn;
4448	uint8_t type;
4449	ir_reg src_reg, dst_reg;
4450	int int_param = 0;
4451	int fp_param = 0;
4452	int count = 0;
4453	int int_reg_params_count = IR_REG_INT_ARGS;
4454	int fp_reg_params_count = IR_REG_FP_ARGS;
4455	const int8_t *int_reg_params = _ir_int_reg_params;
4456	const int8_t *fp_reg_params = _ir_fp_reg_params;
4457	int32_t used_stack, stack_offset = 0;
4458	ir_copy *copies;
4459	bool do_pass3 = 0;
4460	/* For temporaries we may use any scratch registers except for registers used for parameters */
4461	ir_reg tmp_fp_reg = IR_REG_FP_LAST; /* Temporary register for FP loads and swap */
4462
4463	n = insn->inputs_count;
4464	if (n < 3) {
4465		return 0;
4466	}
4467
4468	if (tmp_reg == IR_REG_NONE) {
4469		tmp_reg = IR_REG_IP0;
4470	}
4471
4472	if (insn->op == IR_CALL && (ctx->flags & IR_PREALLOCATED_STACK)) {
4473		// TODO: support for preallocated stack
4474		used_stack = 0;
4475	} else {
4476		used_stack = ir_call_used_stack(ctx, insn);
4477		/* Stack must be 16 byte aligned */
4478		used_stack = IR_ALIGNED_SIZE(used_stack, 16);
4479		if (ctx->fixed_call_stack_size && used_stack <= ctx->fixed_call_stack_size) {
4480			used_stack = 0;
4481		} else {
4482			ctx->call_stack_size += used_stack;
4483			if (used_stack) {
4484				if (insn->op == IR_TAILCALL && !(ctx->flags & IR_USE_FRAME_POINTER)) {
4485					ctx->flags |= IR_USE_FRAME_POINTER;
4486					|	stp x29, x30, [sp, # (-(ctx->stack_frame_size+16))]!
4487					|	mov x29, sp
4488				}
4489				|	sub sp, sp, #used_stack
4490			}
4491		}
4492	}
4493
4494#ifdef __APPLE__
4495	const ir_proto_t *proto = ir_call_proto(ctx, insn);
4496	int last_named_input = (proto && (proto->flags & IR_VARARG_FUNC)) ? proto->params_count + 2 : insn->inputs_count;
4497#endif
4498
4499	/* 1. move all register arguments that should be passed through stack
4500	 *    and collect arguments that should be passed through registers */
4501	copies = ir_mem_malloc((n - 2) * sizeof(ir_copy));
4502	for (j = 3; j <= n; j++) {
4503		arg = ir_insn_op(insn, j);
4504		src_reg = ir_get_alocated_reg(ctx, def, j);
4505		arg_insn = &ctx->ir_base[arg];
4506		type = arg_insn->type;
4507#ifdef __APPLE__
4508		if (j > last_named_input) {
4509			dst_reg = IR_REG_NONE; /* pass argument through stack */
4510		} else
4511#endif
4512		if (IR_IS_TYPE_INT(type)) {
4513			if (int_param < int_reg_params_count) {
4514				dst_reg = int_reg_params[int_param];
4515			} else {
4516				dst_reg = IR_REG_NONE; /* pass argument through stack */
4517			}
4518			int_param++;
4519		} else {
4520			IR_ASSERT(IR_IS_TYPE_FP(type));
4521			if (fp_param < fp_reg_params_count) {
4522				dst_reg = fp_reg_params[fp_param];
4523			} else {
4524				dst_reg = IR_REG_NONE; /* pass argument through stack */
4525			}
4526			fp_param++;
4527		}
4528		if (dst_reg != IR_REG_NONE) {
4529			if (IR_IS_CONST_REF(arg) || src_reg == IR_REG_NONE) {
4530				/* delay CONST->REG and MEM->REG moves to third pass */
4531				do_pass3 = 1;
4532			} else {
4533				IR_ASSERT(src_reg != IR_REG_NONE);
4534				if (IR_REG_SPILLED(src_reg)) {
4535					src_reg = IR_REG_NUM(src_reg);
4536					ir_emit_load(ctx, type, src_reg, arg);
4537				}
4538				if (src_reg != dst_reg) {
4539					/* delay REG->REG moves to second pass */
4540					copies[count].type = type;
4541					copies[count].from = src_reg;
4542					copies[count].to = dst_reg;
4543					count++;
4544				}
4545			}
4546		} else {
4547			/* Pass register arguments to stack (REG->MEM moves) */
4548			if (!IR_IS_CONST_REF(arg) && src_reg != IR_REG_NONE && !IR_REG_SPILLED(src_reg)) {
4549				ir_emit_store_mem(ctx, type, IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset), src_reg);
4550			} else {
4551				do_pass3 = 1;
4552			}
4553			stack_offset += IR_MAX(sizeof(void*), ir_type_size[type]);
4554		}
4555	}
4556
4557	/* 2. move all arguments that should be passed from one register to another (REG->REG movs) */
4558	if (count) {
4559		ir_parallel_copy(ctx, copies, count, tmp_reg, tmp_fp_reg);
4560	}
4561	ir_mem_free(copies);
4562
4563	/* 3. move the remaining memory and immediate values */
4564	if (do_pass3) {
4565		stack_offset = 0;
4566		int_param = 0;
4567		fp_param = 0;
4568		for (j = 3; j <= n; j++) {
4569			arg = ir_insn_op(insn, j);
4570			src_reg = ir_get_alocated_reg(ctx, def, j);
4571			arg_insn = &ctx->ir_base[arg];
4572			type = arg_insn->type;
4573#ifdef __APPLE__
4574			if (j > last_named_input) {
4575				dst_reg = IR_REG_NONE; /* pass argument through stack */
4576			} else
4577#endif
4578			if (IR_IS_TYPE_INT(type)) {
4579				if (int_param < int_reg_params_count) {
4580					dst_reg = int_reg_params[int_param];
4581				} else {
4582					dst_reg = IR_REG_NONE; /* argument already passed through stack */
4583				}
4584				int_param++;
4585			} else {
4586				IR_ASSERT(IR_IS_TYPE_FP(type));
4587				if (fp_param < fp_reg_params_count) {
4588					dst_reg = fp_reg_params[fp_param];
4589				} else {
4590					dst_reg = IR_REG_NONE; /* argument already passed through stack */
4591				}
4592				fp_param++;
4593			}
4594			if (dst_reg != IR_REG_NONE) {
4595				if (IR_IS_CONST_REF(arg) || src_reg == IR_REG_NONE) {
4596					if (IR_IS_CONST_REF(arg) && IR_IS_TYPE_INT(type)) {
4597						if (ir_type_size[type] == 1) {
4598							type = IR_ADDR;
4599						}
4600					}
4601					ir_emit_load(ctx, type, dst_reg, arg);
4602				}
4603			} else {
4604				if (IR_IS_TYPE_INT(type)) {
4605					if (IR_IS_CONST_REF(arg) || src_reg == IR_REG_NONE) {
4606						IR_ASSERT(tmp_reg != IR_REG_NONE);
4607						ir_emit_load(ctx, type, tmp_reg, arg);
4608						if (IR_IS_CONST_REF(arg)) {
4609							type = IR_ADDR; //TODO: ???
4610						}
4611						ir_emit_store_mem_int(ctx, type, IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset), tmp_reg);
4612					} else if (IR_REG_SPILLED(src_reg)) {
4613						src_reg = IR_REG_NUM(src_reg);
4614						ir_emit_load(ctx, type, src_reg, arg);
4615						ir_emit_store_mem_int(ctx, type, IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset), src_reg);
4616					}
4617				} else {
4618					if (IR_IS_CONST_REF(arg)) {
4619						ir_emit_load(ctx, type, tmp_fp_reg, arg);
4620						ir_emit_store_mem_fp(ctx, IR_DOUBLE, IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset), tmp_fp_reg);
4621					} else if (src_reg == IR_REG_NONE) {
4622						IR_ASSERT(tmp_fp_reg != IR_REG_NONE);
4623						ir_emit_load(ctx, type, tmp_fp_reg, arg);
4624						ir_emit_store_mem_fp(ctx, IR_DOUBLE, IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset), tmp_fp_reg);
4625					} else if (IR_REG_SPILLED(src_reg)) {
4626						src_reg = IR_REG_NUM(src_reg);
4627						ir_emit_load(ctx, type, src_reg, arg);
4628						ir_emit_store_mem_fp(ctx, type, IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset), src_reg);
4629					}
4630				}
4631				stack_offset += IR_MAX(sizeof(void*), ir_type_size[type]);
4632			}
4633		}
4634	}
4635	return used_stack;
4636}
4637
4638static void ir_emit_call_ex(ir_ctx *ctx, ir_ref def, ir_insn *insn, int32_t used_stack)
4639{
4640	ir_backend_data *data = ctx->data;
4641	dasm_State **Dst = &data->dasm_state;
4642	ir_reg def_reg;
4643
4644	if (IR_IS_CONST_REF(insn->op2)) {
4645		void *addr = ir_call_addr(ctx, insn, &ctx->ir_base[insn->op2]);
4646
4647		if (aarch64_may_use_b(ctx->code_buffer, addr)) {
4648			|	bl &addr
4649		} else {
4650			ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr);
4651			|	blr Rx(IR_REG_INT_TMP)
4652		}
4653    } else {
4654		ir_reg op2_reg = ctx->regs[def][2];
4655
4656		IR_ASSERT(op2_reg != IR_REG_NONE);
4657		if (IR_REG_SPILLED(op2_reg)) {
4658			op2_reg = IR_REG_NUM(op2_reg);
4659			ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
4660		}
4661		|	blr Rx(op2_reg)
4662    }
4663
4664	if (used_stack) {
4665		|	add sp, sp, #used_stack
4666		ctx->call_stack_size -= used_stack;
4667	}
4668
4669	if (insn->type != IR_VOID) {
4670		if (IR_IS_TYPE_INT(insn->type)) {
4671			def_reg = IR_REG_NUM(ctx->regs[def][0]);
4672			if (def_reg != IR_REG_NONE) {
4673				if (def_reg != IR_REG_INT_RET1) {
4674					ir_emit_mov(ctx, insn->type, def_reg, IR_REG_INT_RET1);
4675				}
4676				if (IR_REG_SPILLED(ctx->regs[def][0])) {
4677					ir_emit_store(ctx, insn->type, def, def_reg);
4678				}
4679			} else if (ctx->use_lists[def].count > 1) {
4680				ir_emit_store(ctx, insn->type, def, IR_REG_INT_RET1);
4681			}
4682		} else {
4683			IR_ASSERT(IR_IS_TYPE_FP(insn->type));
4684			def_reg = IR_REG_NUM(ctx->regs[def][0]);
4685			if (def_reg != IR_REG_NONE) {
4686				if (def_reg != IR_REG_FP_RET1) {
4687					ir_emit_fp_mov(ctx, insn->type, def_reg, IR_REG_FP_RET1);
4688				}
4689				if (IR_REG_SPILLED(ctx->regs[def][0])) {
4690					ir_emit_store(ctx, insn->type, def, def_reg);
4691				}
4692			} else if (ctx->use_lists[def].count > 1) {
4693				ir_emit_store(ctx, insn->type, def, IR_REG_FP_RET1);
4694			}
4695		}
4696	}
4697}
4698
4699static void ir_emit_call(ir_ctx *ctx, ir_ref def, ir_insn *insn)
4700{
4701	int32_t used_stack = ir_emit_arguments(ctx, def, insn, ctx->regs[def][1]);
4702	ir_emit_call_ex(ctx, def, insn, used_stack);
4703}
4704
4705static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn)
4706{
4707	ir_backend_data *data = ctx->data;
4708	dasm_State **Dst = &data->dasm_state;
4709	int32_t used_stack = ir_emit_arguments(ctx, def, insn, ctx->regs[def][1]);
4710
4711	if (used_stack != 0) {
4712		ir_emit_call_ex(ctx, def, insn, used_stack);
4713		ir_emit_return_void(ctx);
4714		return;
4715	}
4716
4717	ir_emit_epilogue(ctx);
4718
4719	if (IR_IS_CONST_REF(insn->op2)) {
4720		void *addr = ir_call_addr(ctx, insn, &ctx->ir_base[insn->op2]);
4721
4722		if (aarch64_may_use_b(ctx->code_buffer, addr)) {
4723			|	b &addr
4724		} else {
4725			ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr);
4726			|	br Rx(IR_REG_INT_TMP)
4727		}
4728    } else {
4729		ir_reg op2_reg = ctx->regs[def][2];
4730
4731		IR_ASSERT(op2_reg != IR_REG_NONE);
4732		if (IR_REG_SPILLED(op2_reg)) {
4733			op2_reg = IR_REG_NUM(op2_reg);
4734			ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
4735		}
4736		|	br Rx(op2_reg)
4737    }
4738}
4739
4740static void ir_emit_ijmp(ir_ctx *ctx, ir_ref def, ir_insn *insn)
4741{
4742	ir_backend_data *data = ctx->data;
4743	dasm_State **Dst = &data->dasm_state;
4744	ir_reg op2_reg = ctx->regs[def][2];
4745
4746	if (op2_reg != IR_REG_NONE) {
4747		if (IR_REG_SPILLED(op2_reg)) {
4748			op2_reg = IR_REG_NUM(op2_reg);
4749			ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
4750		}
4751		|	br Rx(op2_reg)
4752	} else if (IR_IS_CONST_REF(insn->op2)) {
4753		void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]);
4754
4755		if (aarch64_may_use_b(ctx->code_buffer, addr)) {
4756			|	b &addr
4757		} else {
4758			ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr);
4759			|	br Rx(IR_REG_INT_TMP)
4760		}
4761	} else {
4762		IR_ASSERT(0);
4763	}
4764}
4765
4766static void ir_emit_guard(ir_ctx *ctx, ir_ref def, ir_insn *insn)
4767{
4768	ir_backend_data *data = ctx->data;
4769	dasm_State **Dst = &data->dasm_state;
4770	ir_reg op2_reg = ctx->regs[def][2];
4771	ir_type type = ctx->ir_base[insn->op2].type;
4772
4773	IR_ASSERT(IR_IS_TYPE_INT(type));
4774	if (IR_IS_CONST_REF(insn->op2)) {
4775		bool is_true = ir_ref_is_true(ctx, insn->op2);
4776
4777		if ((insn->op == IR_GUARD && !is_true) || (insn->op == IR_GUARD_NOT && is_true)) {
4778			if (IR_IS_CONST_REF(insn->op3)) {
4779				void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]);
4780
4781				if (aarch64_may_use_b(ctx->code_buffer, addr)) {
4782					|	b &addr
4783				} else {
4784					ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr);
4785					|	br Rx(IR_REG_INT_TMP)
4786				}
4787			} else {
4788				IR_ASSERT(0);
4789			}
4790		}
4791		return;
4792	}
4793
4794	IR_ASSERT(op2_reg != IR_REG_NONE);
4795	if (IR_REG_SPILLED(op2_reg)) {
4796		op2_reg = IR_REG_NUM(op2_reg);
4797		ir_emit_load(ctx, type, op2_reg, insn->op2);
4798	}
4799
4800	if (IR_IS_CONST_REF(insn->op3)) {
4801		void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]);
4802
4803		if (insn->op == IR_GUARD) {
4804			if (ir_type_size[type] == 8) {
4805				|	cbz Rx(op2_reg), &addr
4806			} else {
4807				|	cbz Rw(op2_reg), &addr
4808			}
4809		} else {
4810			if (ir_type_size[type] == 8) {
4811				|	cbnz Rx(op2_reg), &addr
4812			} else {
4813				|	cbnz Rw(op2_reg), &addr
4814			}
4815		}
4816	} else {
4817		IR_ASSERT(0);
4818	}
4819}
4820
4821static void ir_emit_guard_jz(ir_ctx *ctx, uint8_t op, void *addr, ir_type type, ir_reg reg)
4822{
4823	ir_backend_data *data = ctx->data;
4824	dasm_State **Dst = &data->dasm_state;
4825
4826	if (op == IR_EQ) {
4827		if (ir_type_size[type] == 8) {
4828			|	cbnz Rx(reg), &addr
4829		} else {
4830			|	cbnz Rw(reg), &addr
4831		}
4832	} else {
4833		IR_ASSERT(op == IR_NE);
4834		if (ir_type_size[type] == 8) {
4835			|	cbz Rx(reg), &addr
4836		} else {
4837			|	cbz Rw(reg), &addr
4838		}
4839	}
4840}
4841
4842static void ir_emit_guard_jcc(ir_ctx *ctx, uint8_t op, void *addr, bool int_cmp)
4843{
4844	ir_backend_data *data = ctx->data;
4845	dasm_State **Dst = &data->dasm_state;
4846
4847	if (int_cmp) {
4848		switch (op) {
4849			default:
4850				IR_ASSERT(0 && "NIY binary op");
4851			case IR_EQ:
4852				|	beq &addr
4853				break;
4854			case IR_NE:
4855				|	bne &addr
4856				break;
4857			case IR_LT:
4858				|	blt &addr
4859				break;
4860			case IR_GE:
4861				|	bge &addr
4862				break;
4863			case IR_LE:
4864				|	ble &addr
4865				break;
4866			case IR_GT:
4867				|	bgt &addr
4868				break;
4869			case IR_ULT:
4870				|	blo &addr
4871				break;
4872			case IR_UGE:
4873				|	bhs &addr
4874				break;
4875			case IR_ULE:
4876				|	bls &addr
4877				break;
4878			case IR_UGT:
4879				|	bhi &addr
4880				break;
4881		}
4882	} else {
4883		switch (op) {
4884			default:
4885				IR_ASSERT(0 && "NIY binary op");
4886			case IR_EQ:
4887				|	beq &addr
4888				break;
4889			case IR_NE:
4890				|	bne &addr
4891				break;
4892			case IR_LT:
4893				|	bmi &addr
4894				break;
4895			case IR_GE:
4896				|	bge &addr
4897				break;
4898			case IR_LE:
4899				|	bls &addr
4900				break;
4901			case IR_GT:
4902				|	bgt &addr
4903				break;
4904//			case IR_ULT: fprintf(stderr, "\tjb .LL%d\n", true_block); break;
4905//			case IR_UGE: fprintf(stderr, "\tjae .LL%d\n", true_block); break;
4906//			case IR_ULE: fprintf(stderr, "\tjbe .LL%d\n", true_block); break;
4907//			case IR_UGT: fprintf(stderr, "\tja .LL%d\n", true_block); break;
4908		}
4909	}
4910}
4911
4912static void ir_emit_guard_cmp_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
4913{
4914	ir_backend_data *data = ctx->data;
4915	dasm_State **Dst = &data->dasm_state;
4916	ir_insn *cmp_insn = &ctx->ir_base[insn->op2];
4917	ir_op op = cmp_insn->op;
4918	ir_type type = ctx->ir_base[cmp_insn->op1].type;
4919	ir_ref op1 = cmp_insn->op1;
4920	ir_ref op2 = cmp_insn->op2;
4921	ir_reg op1_reg = ctx->regs[insn->op2][1];
4922	ir_reg op2_reg = ctx->regs[insn->op2][2];
4923	void *addr;
4924
4925	if (op1_reg != IR_REG_NONE && (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1))) {
4926		op1_reg = IR_REG_NUM(op1_reg);
4927		ir_emit_load(ctx, type, op1_reg, op1);
4928	}
4929	if (op2_reg != IR_REG_NONE && (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2))) {
4930		op2_reg = IR_REG_NUM(op2_reg);
4931		if (op1 != op2) {
4932			ir_emit_load(ctx, type, op2_reg, op2);
4933		}
4934	}
4935
4936	addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]);
4937
4938	if (IR_IS_CONST_REF(op2)
4939	 && !IR_IS_SYM_CONST(ctx->ir_base[op2].op)
4940	 && ctx->ir_base[op2].val.u64 == 0) {
4941		if (op == IR_ULT) {
4942			/* always false */
4943			if (aarch64_may_use_b(ctx->code_buffer, addr)) {
4944				|	b &addr
4945			} else {
4946				ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr);
4947				|	br Rx(IR_REG_INT_TMP)
4948			}
4949			return;
4950		} else if (op == IR_UGE) {
4951			/* always true */
4952			return;
4953		} else if (op == IR_ULE) {
4954			op = IR_EQ;
4955		} else if (op == IR_UGT) {
4956			op = IR_NE;
4957		}
4958		if (op1_reg != IR_REG_NONE && (op == IR_EQ || op == IR_NE)) {
4959			if (insn->op == IR_GUARD_NOT) {
4960				op ^= 1; // reverse
4961			}
4962			ir_emit_guard_jz(ctx, op, addr, type, op1_reg);
4963			return;
4964		}
4965	}
4966	ir_emit_cmp_int_common(ctx, type, op1_reg, op1, op2_reg, op2);
4967
4968	if (insn->op == IR_GUARD) {
4969		op ^= 1; // reverse
4970	}
4971
4972	ir_emit_guard_jcc(ctx, op, addr, 1);
4973}
4974
4975static void ir_emit_guard_cmp_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
4976{
4977	ir_op op = ir_emit_cmp_fp_common(ctx, insn->op2, &ctx->ir_base[insn->op2]);
4978	void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]);
4979
4980	if (insn->op == IR_GUARD) {
4981		op ^= 1; // reverse
4982	}
4983	ir_emit_guard_jcc(ctx, op, addr, 0);
4984}
4985
4986static void ir_emit_guard_overflow(ir_ctx *ctx, ir_ref def, ir_insn *insn)
4987{
4988	ir_backend_data *data = ctx->data;
4989	dasm_State **Dst = &data->dasm_state;
4990	ir_insn *overflow_insn = &ctx->ir_base[insn->op2];
4991	ir_insn *math_insn = &ctx->ir_base[overflow_insn->op1];
4992	ir_type type = math_insn->type;
4993	void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]);
4994
4995	IR_ASSERT(IR_IS_TYPE_INT(type));
4996	if (math_insn->op == IR_MUL_OV) {
4997		if (insn->op == IR_GUARD) {
4998			|	beq &addr
4999		} else {
5000			|	bne &addr
5001		}
5002	} else if (IR_IS_TYPE_SIGNED(type)) {
5003		if (insn->op == IR_GUARD) {
5004			|	bvc &addr
5005		} else {
5006			|	bvs &addr
5007		}
5008	} else {
5009		if (insn->op == IR_GUARD) {
5010			|	bcc &addr
5011		} else {
5012			|	bcs &addr
5013		}
5014	}
5015}
5016
5017static void ir_emit_tls(ir_ctx *ctx, ir_ref def, ir_insn *insn)
5018{
5019	ir_backend_data *data = ctx->data;
5020	dasm_State **Dst = &data->dasm_state;
5021	uint32_t code;
5022	ir_reg reg = IR_REG_NUM(ctx->regs[def][0]);
5023
5024	if (ctx->use_lists[def].count == 1) {
5025		/* dead load */
5026		return;
5027	}
5028
5029||#ifdef __APPLE__
5030||	code = 0xd53bd060 | reg; // TODO: hard-coded: mrs reg, tpidrro_el0
5031|	.long code
5032|	and Rx(reg), Rx(reg), #0xfffffffffffffff8
5033|//???	MEM_ACCESS_64_WITH_UOFFSET_64 ldr, Rx(reg), Rx(reg), #insn->op2, TMP1
5034|//???	MEM_ACCESS_64_WITH_UOFFSET_64 ldr, Rx(reg), Rx(reg), #insn->op3, TMP1
5035||#else
5036||	code = 0xd53bd040 | reg; // TODO: hard-coded: mrs reg, tpidr_el0
5037|	.long code
5038||//???	IR_ASSERT(insn->op2 <= LDR_STR_PIMM64);
5039|	ldr Rx(reg), [Rx(reg), #insn->op2]
5040||#endif
5041	if (IR_REG_SPILLED(ctx->regs[def][0])) {
5042		ir_emit_store(ctx, IR_ADDR, def, reg);
5043	}
5044}
5045
5046static void ir_emit_exitcall(ir_ctx *ctx, ir_ref def, ir_insn *insn)
5047{
5048	ir_backend_data *data = ctx->data;
5049	dasm_State **Dst = &data->dasm_state;
5050	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
5051
5052	IR_ASSERT(def_reg != IR_REG_NONE);
5053
5054	|	stp d30, d31, [sp, #-16]!
5055	|	stp d28, d29, [sp, #-16]!
5056	|	stp d26, d27, [sp, #-16]!
5057	|	stp d24, d25, [sp, #-16]!
5058	|	stp d22, d23, [sp, #-16]!
5059	|	stp d20, d21, [sp, #-16]!
5060	|	stp d18, d19, [sp, #-16]!
5061	|	stp d16, d17, [sp, #-16]!
5062	|	stp d14, d15, [sp, #-16]!
5063	|	stp d12, d13, [sp, #-16]!
5064	|	stp d10, d11, [sp, #-16]!
5065	|	stp d8, d9, [sp, #-16]!
5066	|	stp d6, d7, [sp, #-16]!
5067	|	stp d4, d5, [sp, #-16]!
5068	|	stp d2, d3, [sp, #-16]!
5069	|	stp d0, d1, [sp, #-16]!
5070
5071	|	str x30, [sp, #-16]!
5072	|	stp x28, x29, [sp, #-16]!
5073	|	stp x26, x27, [sp, #-16]!
5074	|	stp x24, x25, [sp, #-16]!
5075	|	stp x22, x23, [sp, #-16]!
5076	|	stp x20, x21, [sp, #-16]!
5077	|	stp x18, x19, [sp, #-16]!
5078	|	stp x16, x17, [sp, #-16]!
5079	|	stp x14, x15, [sp, #-16]!
5080	|	stp x12, x13, [sp, #-16]!
5081	|	stp x10, x11, [sp, #-16]!
5082	|	stp x8, x9, [sp, #-16]!
5083	|	stp x6, x7, [sp, #-16]!
5084	|	stp x4, x5, [sp, #-16]!
5085	|	stp x2, x3, [sp, #-16]!
5086	|	stp x0, x1, [sp, #-16]!
5087
5088	|	mov Rx(IR_REG_INT_ARG2), sp
5089	|	add Rx(IR_REG_INT_ARG1), Rx(IR_REG_INT_ARG2), #(32*8+32*8)
5090	|	str Rx(IR_REG_INT_ARG1), [sp, #(31*8)]
5091	|	mov Rx(IR_REG_INT_ARG1), Rx(IR_REG_INT_TMP)
5092
5093	if (IR_IS_CONST_REF(insn->op2)) {
5094		void *addr = ir_call_addr(ctx, insn, &ctx->ir_base[insn->op2]);
5095
5096		if (aarch64_may_use_b(ctx->code_buffer, addr)) {
5097			|	bl &addr
5098		} else {
5099			ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr);
5100			|	blr Rx(IR_REG_INT_TMP)
5101		}
5102	} else {
5103		IR_ASSERT(0);
5104	}
5105
5106	|	add sp, sp, #(32*8+32*8)
5107
5108	if (def_reg != IR_REG_INT_RET1) {
5109		ir_emit_mov(ctx, insn->type, def_reg, IR_REG_INT_RET1);
5110	}
5111	if (IR_REG_SPILLED(ctx->regs[def][0])) {
5112		ir_emit_store(ctx, insn->type, def, def_reg);
5113	}
5114}
5115
5116static void ir_emit_param_move(ir_ctx *ctx, uint8_t type, ir_reg from_reg, ir_reg to_reg, ir_ref to, int32_t offset)
5117{
5118	ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
5119
5120	IR_ASSERT(from_reg != IR_REG_NONE || to_reg != IR_REG_NONE);
5121
5122	if (IR_IS_TYPE_INT(type)) {
5123		if (from_reg != IR_REG_NONE) {
5124			if (to_reg != IR_REG_NONE) {
5125				ir_emit_mov(ctx, type, to_reg, from_reg);
5126			} else {
5127				ir_emit_store(ctx, type, to, from_reg);
5128			}
5129		} else {
5130			ir_emit_load_mem_int(ctx, type, to_reg, IR_MEM_BO(fp, offset));
5131		}
5132	} else {
5133		if (from_reg != IR_REG_NONE) {
5134			if (to_reg != IR_REG_NONE) {
5135				ir_emit_fp_mov(ctx, type, to_reg, from_reg);
5136			} else {
5137				ir_emit_store(ctx, type, to, from_reg);
5138			}
5139		} else {
5140			ir_emit_load_mem_fp(ctx, type, to_reg, IR_MEM_BO(fp, offset));
5141		}
5142	}
5143}
5144
5145static void ir_emit_load_params(ir_ctx *ctx)
5146{
5147	ir_use_list *use_list = &ctx->use_lists[1];
5148	ir_insn *insn;
5149	ir_ref i, n, *p, use;
5150	int int_param_num = 0;
5151	int fp_param_num = 0;
5152	ir_reg src_reg;
5153	ir_reg dst_reg;
5154	// TODO: Calling convention specific
5155	int int_reg_params_count = IR_REG_INT_ARGS;
5156	int fp_reg_params_count = IR_REG_FP_ARGS;
5157	const int8_t *int_reg_params = _ir_int_reg_params;
5158	const int8_t *fp_reg_params = _ir_fp_reg_params;
5159	int32_t stack_offset = 0;
5160
5161	if (ctx->flags & IR_USE_FRAME_POINTER) {
5162		stack_offset = sizeof(void*) * 2; /* skip old frame pointer and return address */
5163	} else {
5164		stack_offset = ctx->stack_frame_size + ctx->call_stack_size;
5165	}
5166	n = use_list->count;
5167	for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) {
5168		use = *p;
5169		insn = &ctx->ir_base[use];
5170		if (insn->op == IR_PARAM) {
5171			if (IR_IS_TYPE_INT(insn->type)) {
5172				if (int_param_num < int_reg_params_count) {
5173					src_reg = int_reg_params[int_param_num];
5174				} else {
5175					src_reg = IR_REG_NONE;
5176				}
5177				int_param_num++;
5178			} else {
5179				if (fp_param_num < fp_reg_params_count) {
5180					src_reg = fp_reg_params[fp_param_num];
5181				} else {
5182					src_reg = IR_REG_NONE;
5183				}
5184				fp_param_num++;
5185			}
5186			if (ctx->vregs[use]) {
5187				dst_reg = IR_REG_NUM(ctx->regs[use][0]);
5188				IR_ASSERT(src_reg != IR_REG_NONE || dst_reg != IR_REG_NONE ||
5189					stack_offset == ctx->live_intervals[ctx->vregs[use]]->stack_spill_pos +
5190						((ctx->flags & IR_USE_FRAME_POINTER) ? -ctx->stack_frame_size : ctx->call_stack_size));
5191				if (src_reg != dst_reg) {
5192					ir_emit_param_move(ctx, insn->type, src_reg, dst_reg, use, stack_offset);
5193				}
5194				if (dst_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[use][0])) {
5195					ir_emit_store(ctx, insn->type, use, dst_reg);
5196				}
5197			}
5198			if (src_reg == IR_REG_NONE) {
5199				if (sizeof(void*) == 8) {
5200					stack_offset += sizeof(void*);
5201				} else {
5202					stack_offset += IR_MAX(sizeof(void*), ir_type_size[insn->type]);
5203				}
5204			}
5205		}
5206	}
5207}
5208
5209static ir_reg ir_get_free_reg(ir_type type, ir_regset available)
5210{
5211	if (IR_IS_TYPE_INT(type)) {
5212		available = IR_REGSET_INTERSECTION(available, IR_REGSET_GP);
5213	} else {
5214		IR_ASSERT(IR_IS_TYPE_FP(type));
5215		available = IR_REGSET_INTERSECTION(available, IR_REGSET_FP);
5216	}
5217	IR_ASSERT(!IR_REGSET_IS_EMPTY(available));
5218	return IR_REGSET_FIRST(available);
5219}
5220
5221static int ir_fix_dessa_tmps(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to)
5222{
5223	ir_backend_data *data = ctx->data;
5224	ir_ref ref = ctx->cfg_blocks[data->dessa_from_block].end;
5225
5226	if (to == 0) {
5227		if (IR_IS_TYPE_INT(type)) {
5228			if (ctx->regs[ref][0] == IR_REG_NONE) {
5229				ctx->regs[ref][0] = IR_REG_X0;
5230			}
5231		} else {
5232			IR_ASSERT(IR_IS_TYPE_FP(type));
5233			if (ctx->regs[ref][1] == IR_REG_NONE) {
5234				ctx->regs[ref][1] = IR_REG_V0;
5235			}
5236		}
5237	} else if (from != 0) {
5238		if (IR_IS_TYPE_INT(type)) {
5239			if (ctx->regs[ref][0] == IR_REG_NONE) {
5240				ctx->regs[ref][0] = IR_REG_X0;
5241			}
5242		} else {
5243			IR_ASSERT(IR_IS_TYPE_FP(type));
5244			if (ctx->regs[ref][1] == IR_REG_NONE) {
5245				ctx->regs[ref][1] = IR_REG_V0;
5246			}
5247		}
5248	}
5249	return 1;
5250}
5251
5252static void ir_fix_param_spills(ir_ctx *ctx)
5253{
5254	ir_use_list *use_list = &ctx->use_lists[1];
5255	ir_insn *insn;
5256	ir_ref i, n, *p, use;
5257	int int_param_num = 0;
5258	int fp_param_num = 0;
5259	ir_reg src_reg;
5260	// TODO: Calling convention specific
5261	int int_reg_params_count = IR_REG_INT_ARGS;
5262	int fp_reg_params_count = IR_REG_FP_ARGS;
5263	const int8_t *int_reg_params = _ir_int_reg_params;
5264	const int8_t *fp_reg_params = _ir_fp_reg_params;
5265	int32_t stack_offset = 0;
5266	int32_t param_stack_size = 0;
5267
5268	if (ctx->flags & IR_USE_FRAME_POINTER) {
5269		/* skip old frame pointer and return address */
5270		stack_offset = sizeof(void*) * 2 + (ctx->stack_frame_size - ctx->stack_frame_alignment);
5271	} else {
5272		stack_offset = ctx->stack_frame_size;
5273	}
5274	n = use_list->count;
5275	for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) {
5276		use = *p;
5277		insn = &ctx->ir_base[use];
5278		if (insn->op == IR_PARAM) {
5279			if (IR_IS_TYPE_INT(insn->type)) {
5280				if (int_param_num < int_reg_params_count) {
5281					src_reg = int_reg_params[int_param_num];
5282				} else {
5283					src_reg = IR_REG_NONE;
5284				}
5285				int_param_num++;
5286			} else {
5287				if (fp_param_num < fp_reg_params_count) {
5288					src_reg = fp_reg_params[fp_param_num];
5289				} else {
5290					src_reg = IR_REG_NONE;
5291				}
5292				fp_param_num++;
5293			}
5294			if (src_reg == IR_REG_NONE) {
5295				if (ctx->vregs[use]) {
5296					ir_live_interval *ival = ctx->live_intervals[ctx->vregs[use]];
5297					if ((ival->flags & IR_LIVE_INTERVAL_MEM_PARAM)
5298					 && ival->stack_spill_pos == -1
5299					 && (ival->next || ival->reg == IR_REG_NONE)) {
5300						ival->stack_spill_pos = stack_offset;
5301					}
5302				}
5303				if (sizeof(void*) == 8) {
5304					stack_offset += sizeof(void*);
5305					param_stack_size += sizeof(void*);
5306				} else {
5307					stack_offset += IR_MAX(sizeof(void*), ir_type_size[insn->type]);
5308					param_stack_size += IR_MAX(sizeof(void*), ir_type_size[insn->type]);
5309				}
5310			}
5311		}
5312	}
5313
5314	ctx->gp_reg_params = IR_MIN(int_param_num, int_reg_params_count);
5315	ctx->fp_reg_params = IR_MIN(fp_param_num, fp_reg_params_count);
5316	ctx->param_stack_size = param_stack_size;
5317}
5318
5319static void ir_allocate_unique_spill_slots(ir_ctx *ctx)
5320{
5321	uint32_t b;
5322	ir_block *bb;
5323	ir_insn *insn;
5324	ir_ref i, n, j, *p;
5325	uint32_t *rule, insn_flags;
5326	ir_backend_data *data = ctx->data;
5327	ir_regset available = 0;
5328	ir_target_constraints constraints;
5329	uint32_t def_flags;
5330	ir_reg reg;
5331
5332	ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count);
5333	memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count);
5334
5335	/* vregs + tmp + fixed + SRATCH + ALL */
5336	ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 2, sizeof(ir_live_interval*));
5337
5338    if (!ctx->arena) {
5339		ctx->arena = ir_arena_create(16 * 1024);
5340	}
5341
5342	for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) {
5343		IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
5344		for (i = bb->start, insn = ctx->ir_base + i, rule = ctx->rules + i; i <= bb->end;) {
5345			switch (ctx->rules ? *rule : insn->op) {
5346				case IR_START:
5347				case IR_BEGIN:
5348				case IR_END:
5349				case IR_IF_TRUE:
5350				case IR_IF_FALSE:
5351				case IR_CASE_VAL:
5352				case IR_CASE_DEFAULT:
5353				case IR_MERGE:
5354				case IR_LOOP_BEGIN:
5355				case IR_LOOP_END:
5356					break;
5357				default:
5358					def_flags = ir_get_target_constraints(ctx, i, &constraints);
5359					if (ctx->rules
5360					 && *rule != IR_CMP_AND_BRANCH_INT
5361					 && *rule != IR_CMP_AND_BRANCH_FP
5362					 && *rule != IR_GUARD_CMP_INT
5363					 && *rule != IR_GUARD_CMP_FP) {
5364						available = IR_REGSET_SCRATCH;
5365					}
5366					if (ctx->vregs[i]) {
5367						reg = constraints.def_reg;
5368						if (reg != IR_REG_NONE && IR_REGSET_IN(available, reg)) {
5369							IR_REGSET_EXCL(available, reg);
5370							ctx->regs[i][0] = reg | IR_REG_SPILL_STORE;
5371						} else if (def_flags & IR_USE_MUST_BE_IN_REG) {
5372							if (insn->op == IR_VLOAD
5373							 && ctx->live_intervals[ctx->vregs[i]]
5374							 && ctx->live_intervals[ctx->vregs[i]]->stack_spill_pos != -1) {
5375								/* pass */
5376							} else if (insn->op != IR_PARAM) {
5377								reg = ir_get_free_reg(insn->type, available);
5378								IR_REGSET_EXCL(available, reg);
5379								ctx->regs[i][0] = reg | IR_REG_SPILL_STORE;
5380							}
5381						}
5382						if (!ctx->live_intervals[ctx->vregs[i]]) {
5383							ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval));
5384							memset(ival, 0, sizeof(ir_live_interval));
5385							ctx->live_intervals[ctx->vregs[i]] = ival;
5386							ival->type = insn->type;
5387							ival->reg = IR_REG_NONE;
5388							ival->vreg = ctx->vregs[i];
5389							ival->stack_spill_pos = -1;
5390							if (insn->op == IR_PARAM && reg == IR_REG_NONE) {
5391								ival->flags |= IR_LIVE_INTERVAL_MEM_PARAM;
5392							} else {
5393								ival->stack_spill_pos = ir_allocate_spill_slot(ctx, ival->type, &data->ra_data);
5394							}
5395						} else if (insn->op == IR_PARAM) {
5396							IR_ASSERT(0 && "unexpected PARAM");
5397							return;
5398						}
5399					} else if (insn->op == IR_VAR) {
5400						ir_use_list *use_list = &ctx->use_lists[i];
5401						ir_ref n = use_list->count;
5402
5403						if (n > 0) {
5404							int32_t stack_spill_pos = insn->op3 = ir_allocate_spill_slot(ctx, insn->type, &data->ra_data);
5405							ir_ref i, *p, use;
5406							ir_insn *use_insn;
5407
5408							for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) {
5409								use = *p;
5410								use_insn = &ctx->ir_base[use];
5411								if (use_insn->op == IR_VLOAD) {
5412									if (ctx->vregs[use]
5413									 && !ctx->live_intervals[ctx->vregs[use]]) {
5414										ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval));
5415										memset(ival, 0, sizeof(ir_live_interval));
5416										ctx->live_intervals[ctx->vregs[use]] = ival;
5417										ival->type = insn->type;
5418										ival->reg = IR_REG_NONE;
5419										ival->vreg = ctx->vregs[use];
5420										ival->stack_spill_pos = stack_spill_pos;
5421									}
5422								} else if (use_insn->op == IR_VSTORE) {
5423									if (!IR_IS_CONST_REF(use_insn->op3)
5424									 && ctx->vregs[use_insn->op3]
5425									 && !ctx->live_intervals[ctx->vregs[use_insn->op3]]) {
5426										ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval));
5427										memset(ival, 0, sizeof(ir_live_interval));
5428										ctx->live_intervals[ctx->vregs[use_insn->op3]] = ival;
5429										ival->type = insn->type;
5430										ival->reg = IR_REG_NONE;
5431										ival->vreg = ctx->vregs[use_insn->op3];
5432										ival->stack_spill_pos = stack_spill_pos;
5433									}
5434								}
5435							}
5436						}
5437					}
5438
5439					insn_flags = ir_op_flags[insn->op];
5440					n = constraints.tmps_count;
5441					if (n) {
5442						do {
5443							n--;
5444							if (constraints.tmp_regs[n].type) {
5445								ir_reg reg = ir_get_free_reg(constraints.tmp_regs[n].type, available);
5446								IR_REGSET_EXCL(available, reg);
5447								ctx->regs[i][constraints.tmp_regs[n].num] = reg;
5448							} else if (constraints.tmp_regs[n].reg == IR_REG_SCRATCH) {
5449								available = IR_REGSET_DIFFERENCE(available, IR_REGSET_SCRATCH);
5450							} else {
5451								IR_REGSET_EXCL(available, constraints.tmp_regs[n].reg);
5452							}
5453						} while (n);
5454					}
5455					n = insn->inputs_count;
5456					for (j = 1, p = insn->ops + 1; j <= n; j++, p++) {
5457						ir_ref input = *p;
5458						if (IR_OPND_KIND(insn_flags, j) == IR_OPND_DATA && input > 0 && ctx->vregs[input]) {
5459							if ((def_flags & IR_DEF_REUSES_OP1_REG) && j == 1) {
5460								ir_reg reg = IR_REG_NUM(ctx->regs[i][0]);
5461								ctx->regs[i][1] = reg | IR_REG_SPILL_LOAD;
5462							} else {
5463								uint8_t use_flags = IR_USE_FLAGS(def_flags, j);
5464								ir_reg reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE;
5465
5466								if (reg != IR_REG_NONE && IR_REGSET_IN(available, reg)) {
5467									IR_REGSET_EXCL(available, reg);
5468									ctx->regs[i][j] = reg | IR_REG_SPILL_LOAD;
5469								} else if (j > 1 && input == insn->op1 && ctx->regs[i][1] != IR_REG_NONE) {
5470									ctx->regs[i][j] = ctx->regs[i][1];
5471								} else if (use_flags & IR_USE_MUST_BE_IN_REG) {
5472									reg = ir_get_free_reg(ctx->ir_base[input].type, available);
5473									IR_REGSET_EXCL(available, reg);
5474									ctx->regs[i][j] = reg | IR_REG_SPILL_LOAD;
5475								}
5476							}
5477						}
5478					}
5479					break;
5480			}
5481			n = ir_insn_len(insn);
5482			i += n;
5483			insn += n;
5484			rule += n;
5485		}
5486		if (bb->flags & IR_BB_DESSA_MOVES) {
5487			data->dessa_from_block = b;
5488			ir_gen_dessa_moves(ctx, b, ir_fix_dessa_tmps);
5489		}
5490	}
5491
5492	ctx->used_preserved_regs = ctx->fixed_save_regset;
5493	ctx->flags |= IR_NO_STACK_COMBINE;
5494	ir_fix_stack_frame(ctx);
5495}
5496
5497static void ir_preallocate_call_stack(ir_ctx *ctx)
5498{
5499	int call_stack_size, peak_call_stack_size = 0;
5500	ir_ref i, n;
5501	ir_insn *insn;
5502
5503	for (i = 1, insn = ctx->ir_base + 1; i < ctx->insns_count;) {
5504		if (insn->op == IR_CALL) {
5505			call_stack_size = ir_call_used_stack(ctx, insn);
5506			if (call_stack_size > peak_call_stack_size) {
5507				peak_call_stack_size = call_stack_size;
5508			}
5509		}
5510		n = ir_insn_len(insn);
5511		i += n;
5512		insn += n;
5513	}
5514	if (peak_call_stack_size) {
5515		ctx->call_stack_size = peak_call_stack_size;
5516		ctx->flags |= IR_PREALLOCATED_STACK;
5517	}
5518}
5519
5520void ir_fix_stack_frame(ir_ctx *ctx)
5521{
5522	uint32_t additional_size = 0;
5523
5524	ctx->locals_area_size = ctx->stack_frame_size;
5525
5526	if (ctx->used_preserved_regs) {
5527		ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs;
5528		ir_reg reg;
5529		(void) reg;
5530
5531		IR_REGSET_FOREACH(used_preserved_regs, reg) {
5532			additional_size += sizeof(void*);
5533		} IR_REGSET_FOREACH_END();
5534	}
5535
5536	if ((ctx->flags & IR_VARARG_FUNC) && (ctx->flags2 & IR_HAS_VA_START)) {
5537		if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) {
5538			additional_size += sizeof(void*) * IR_REG_INT_ARGS;
5539		}
5540		if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) {
5541			additional_size += 16 * IR_REG_FP_ARGS;
5542		}
5543	}
5544
5545	ctx->stack_frame_size = IR_ALIGNED_SIZE(ctx->stack_frame_size, sizeof(void*));
5546	ctx->stack_frame_size += additional_size;
5547	ctx->stack_frame_alignment = 0;
5548	ctx->call_stack_size = 0;
5549
5550	if ((ctx->flags2 & IR_HAS_CALLS) && !(ctx->flags & IR_FUNCTION)) {
5551		while (IR_ALIGNED_SIZE(ctx->stack_frame_size, 16) != ctx->stack_frame_size) {
5552			ctx->stack_frame_size += sizeof(void*);
5553			ctx->stack_frame_alignment += sizeof(void*);
5554		}
5555	} else if (ctx->flags2 & IR_HAS_CALLS) {
5556		ctx->flags |= IR_USE_FRAME_POINTER;
5557		/* Stack must be 16 byte aligned */
5558		if (!(ctx->flags & IR_FUNCTION)) {
5559			while (IR_ALIGNED_SIZE(ctx->stack_frame_size, 16) != ctx->stack_frame_size) {
5560				ctx->stack_frame_size += sizeof(void*);
5561				ctx->stack_frame_alignment += sizeof(void*);
5562			}
5563		} else if (ctx->flags & IR_USE_FRAME_POINTER) {
5564			while (IR_ALIGNED_SIZE(ctx->stack_frame_size + sizeof(void*) * 2, 16) != ctx->stack_frame_size + sizeof(void*) * 2) {
5565				ctx->stack_frame_size += sizeof(void*);
5566				ctx->stack_frame_alignment += sizeof(void*);
5567			}
5568		} else {
5569			if (!(ctx->flags & IR_NO_STACK_COMBINE)) {
5570				ir_preallocate_call_stack(ctx);
5571			}
5572			while (IR_ALIGNED_SIZE(ctx->stack_frame_size + ctx->call_stack_size, 16) !=
5573					ctx->stack_frame_size + ctx->call_stack_size) {
5574				ctx->stack_frame_size += sizeof(void*);
5575				ctx->stack_frame_alignment += sizeof(void*);
5576			}
5577		}
5578	}
5579
5580	ir_fix_param_spills(ctx);
5581}
5582
5583static void* dasm_labels[ir_lb_MAX];
5584
5585/* Veneers support (TODO: avid global variable usage) */
5586static ir_ctx *ir_current_ctx;
5587
5588void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
5589{
5590	uint32_t b, n, target;
5591	ir_block *bb;
5592	ir_ref i;
5593	ir_insn *insn;
5594	uint32_t *rule;
5595	ir_backend_data data;
5596	dasm_State **Dst;
5597	int ret;
5598	void *entry;
5599	size_t size;
5600
5601	data.ra_data.unused_slot_4 = 0;
5602	data.ra_data.unused_slot_2 = 0;
5603	data.ra_data.unused_slot_1 = 0;
5604	data.ra_data.handled = NULL;
5605	data.rodata_label = 0;
5606	data.jmp_table_label = 0;
5607	ctx->data = &data;
5608
5609	if (!ctx->live_intervals) {
5610		ctx->stack_frame_size = 0;
5611		ctx->stack_frame_alignment = 0;
5612		ctx->call_stack_size = 0;
5613		ctx->used_preserved_regs = 0;
5614		ir_allocate_unique_spill_slots(ctx);
5615	}
5616
5617	if (ctx->fixed_stack_frame_size != -1) {
5618		if (ctx->fixed_stack_red_zone) {
5619			IR_ASSERT(ctx->fixed_stack_red_zone == ctx->fixed_stack_frame_size + ctx->fixed_call_stack_size);
5620		}
5621		if (ctx->stack_frame_size > ctx->fixed_stack_frame_size) {
5622			// TODO: report error to caller
5623#ifdef IR_DEBUG_MESSAGES
5624			fprintf(stderr, "IR Compilation Aborted: ctx->stack_frame_size > ctx->fixed_stack_frame_size at %s:%d\n",
5625				__FILE__, __LINE__);
5626#endif
5627			ctx->data = NULL;
5628			ctx->status = IR_ERROR_FIXED_STACK_FRAME_OVERFLOW;
5629			return NULL;
5630		}
5631		ctx->stack_frame_size = ctx->fixed_stack_frame_size;
5632		ctx->call_stack_size = ctx->fixed_call_stack_size;
5633		ctx->stack_frame_alignment = 0;
5634	}
5635
5636	Dst = &data.dasm_state;
5637	data.dasm_state = NULL;
5638	dasm_init(&data.dasm_state, DASM_MAXSECTION);
5639	dasm_setupglobal(&data.dasm_state, dasm_labels, ir_lb_MAX);
5640	dasm_setup(&data.dasm_state, dasm_actions);
5641	/* labels for each block + for each constant + rodata label + jmp_table label + for each entry + exit_table label */
5642	dasm_growpc(&data.dasm_state, ctx->cfg_blocks_count + 1 + ctx->consts_count + 1 + 1 + 1 + ctx->entries_count + 1);
5643	data.emit_constants = ir_bitset_malloc(ctx->consts_count);
5644
5645	if (!(ctx->flags & IR_SKIP_PROLOGUE)) {
5646		ir_emit_prologue(ctx);
5647	}
5648	if (ctx->flags & IR_FUNCTION) {
5649		ir_emit_load_params(ctx);
5650	}
5651
5652	for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) {
5653		IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
5654		if ((bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) == IR_BB_EMPTY) {
5655			continue;
5656		}
5657		|=>b:
5658
5659		i = bb->start;
5660		insn = ctx->ir_base + i;
5661		if (bb->flags & IR_BB_ENTRY) {
5662			uint32_t label = ctx->cfg_blocks_count + ctx->consts_count + 4 + insn->op3;
5663
5664			|=>label:
5665			ir_emit_prologue(ctx);
5666			ctx->entries[insn->op3] = i;
5667		}
5668
5669		/* skip first instruction */
5670		n = ir_insn_len(insn);
5671		i += n;
5672		insn += n;
5673		rule = ctx->rules + i;
5674
5675		while (i <= bb->end) {
5676			if (!((*rule) & (IR_FUSED|IR_SKIPPED)))
5677			switch (*rule) {
5678				case IR_VAR:
5679				case IR_PARAM:
5680				case IR_PI:
5681				case IR_PHI:
5682				case IR_SNAPSHOT:
5683				case IR_VA_END:
5684					break;
5685				case IR_MUL_PWR2:
5686				case IR_DIV_PWR2:
5687				case IR_MOD_PWR2:
5688					ir_emit_mul_div_mod_pwr2(ctx, i, insn);
5689					break;
5690				case IR_SDIV_PWR2:
5691					ir_emit_sdiv_pwr2(ctx, i, insn);
5692					break;
5693				case IR_SMOD_PWR2:
5694					ir_emit_smod_pwr2(ctx, i, insn);
5695					break;
5696				case IR_SHIFT:
5697					ir_emit_shift(ctx, i, insn);
5698					break;
5699				case IR_SHIFT_CONST:
5700					ir_emit_shift_const(ctx, i, insn);
5701					break;
5702				case IR_CTPOP:
5703					ir_emit_ctpop(ctx, i, insn);
5704					break;
5705				case IR_OP_INT:
5706					ir_emit_op_int(ctx, i, insn);
5707					break;
5708				case IR_OP_FP:
5709					ir_emit_op_fp(ctx, i, insn);
5710					break;
5711				case IR_BINOP_INT:
5712					ir_emit_binop_int(ctx, i, insn);
5713					break;
5714				case IR_BINOP_FP:
5715					ir_emit_binop_fp(ctx, i, insn);
5716					break;
5717				case IR_CMP_INT:
5718					ir_emit_cmp_int(ctx, i, insn);
5719					break;
5720				case IR_CMP_FP:
5721					ir_emit_cmp_fp(ctx, i, insn);
5722					break;
5723				case IR_SEXT:
5724					ir_emit_sext(ctx, i, insn);
5725					break;
5726				case IR_ZEXT:
5727					ir_emit_zext(ctx, i, insn);
5728					break;
5729				case IR_TRUNC:
5730					ir_emit_trunc(ctx, i, insn);
5731					break;
5732				case IR_BITCAST:
5733				case IR_PROTO:
5734					ir_emit_bitcast(ctx, i, insn);
5735					break;
5736				case IR_INT2FP:
5737					ir_emit_int2fp(ctx, i, insn);
5738					break;
5739				case IR_FP2INT:
5740					ir_emit_fp2int(ctx, i, insn);
5741					break;
5742				case IR_FP2FP:
5743					ir_emit_fp2fp(ctx, i, insn);
5744					break;
5745				case IR_COPY_INT:
5746					ir_emit_copy_int(ctx, i, insn);
5747					break;
5748				case IR_COPY_FP:
5749					ir_emit_copy_fp(ctx, i, insn);
5750					break;
5751				case IR_CMP_AND_BRANCH_INT:
5752					ir_emit_cmp_and_branch_int(ctx, b, i, insn);
5753					break;
5754				case IR_CMP_AND_BRANCH_FP:
5755					ir_emit_cmp_and_branch_fp(ctx, b, i, insn);
5756					break;
5757				case IR_GUARD_CMP_INT:
5758					ir_emit_guard_cmp_int(ctx, b, i, insn);
5759					break;
5760				case IR_GUARD_CMP_FP:
5761					ir_emit_guard_cmp_fp(ctx, b, i, insn);
5762					break;
5763				case IR_IF_INT:
5764					ir_emit_if_int(ctx, b, i, insn);
5765					break;
5766				case IR_COND:
5767					ir_emit_cond(ctx, i, insn);
5768					break;
5769				case IR_SWITCH:
5770					ir_emit_switch(ctx, b, i, insn);
5771					break;
5772				case IR_MIN_MAX_INT:
5773					ir_emit_min_max_int(ctx, i, insn);
5774					break;
5775				case IR_OVERFLOW:
5776					ir_emit_overflow(ctx, i, insn);
5777					break;
5778				case IR_OVERFLOW_AND_BRANCH:
5779					ir_emit_overflow_and_branch(ctx, b, i, insn);
5780					break;
5781				case IR_END:
5782				case IR_LOOP_END:
5783					if (bb->flags & IR_BB_OSR_ENTRY_LOADS) {
5784						ir_emit_osr_entry_loads(ctx, b, bb);
5785					}
5786					if (bb->flags & IR_BB_DESSA_MOVES) {
5787						ir_emit_dessa_moves(ctx, b, bb);
5788					}
5789					do {
5790						ir_ref succ = ctx->cfg_edges[bb->successors];
5791
5792						if (UNEXPECTED(bb->successors_count == 2)) {
5793							if (ctx->cfg_blocks[succ].flags & IR_BB_ENTRY) {
5794								succ = ctx->cfg_edges[bb->successors + 1];
5795							} else {
5796								IR_ASSERT(ctx->cfg_blocks[ctx->cfg_edges[bb->successors + 1]].flags & IR_BB_ENTRY);
5797							}
5798						} else {
5799							IR_ASSERT(bb->successors_count == 1);
5800						}
5801						target = ir_skip_empty_target_blocks(ctx, succ);
5802						if (b == ctx->cfg_blocks_count || target != ir_skip_empty_next_blocks(ctx, b + 1)) {
5803							|	b =>target
5804						}
5805					} while (0);
5806					break;
5807				case IR_RETURN_VOID:
5808					ir_emit_return_void(ctx);
5809					break;
5810				case IR_RETURN_INT:
5811					ir_emit_return_int(ctx, i, insn);
5812					break;
5813				case IR_RETURN_FP:
5814					ir_emit_return_fp(ctx, i, insn);
5815					break;
5816				case IR_CALL:
5817					ir_emit_call(ctx, i, insn);
5818					break;
5819				case IR_TAILCALL:
5820					ir_emit_tailcall(ctx, i, insn);
5821					break;
5822				case IR_IJMP:
5823					ir_emit_ijmp(ctx, i, insn);
5824					break;
5825				case IR_REG_BINOP_INT:
5826					ir_emit_reg_binop_int(ctx, i, insn);
5827					break;
5828				case IR_VADDR:
5829					ir_emit_vaddr(ctx, i, insn);
5830					break;
5831				case IR_VLOAD:
5832					ir_emit_vload(ctx, i, insn);
5833					break;
5834				case IR_VSTORE:
5835					ir_emit_vstore(ctx, i, insn);
5836					break;
5837				case IR_RLOAD:
5838					ir_emit_rload(ctx, i, insn);
5839					break;
5840				case IR_RSTORE:
5841					ir_emit_rstore(ctx, i, insn);
5842					break;
5843				case IR_LOAD_INT:
5844					ir_emit_load_int(ctx, i, insn);
5845					break;
5846				case IR_LOAD_FP:
5847					ir_emit_load_fp(ctx, i, insn);
5848					break;
5849				case IR_STORE_INT:
5850					ir_emit_store_int(ctx, i, insn);
5851					break;
5852				case IR_STORE_FP:
5853					ir_emit_store_fp(ctx, i, insn);
5854					break;
5855				case IR_ALLOCA:
5856					ir_emit_alloca(ctx, i, insn);
5857					break;
5858				case IR_VA_START:
5859					ir_emit_va_start(ctx, i, insn);
5860					break;
5861				case IR_VA_COPY:
5862					ir_emit_va_copy(ctx, i, insn);
5863					break;
5864				case IR_VA_ARG:
5865					ir_emit_va_arg(ctx, i, insn);
5866					break;
5867				case IR_AFREE:
5868					ir_emit_afree(ctx, i, insn);
5869					break;
5870				case IR_FRAME_ADDR:
5871					ir_emit_frame_addr(ctx, i);
5872					break;
5873				case IR_EXITCALL:
5874					ir_emit_exitcall(ctx, i, insn);
5875					break;
5876				case IR_GUARD:
5877				case IR_GUARD_NOT:
5878					ir_emit_guard(ctx, i, insn);
5879					break;
5880				case IR_GUARD_OVERFLOW:
5881					ir_emit_guard_overflow(ctx, i, insn);
5882					break;
5883				case IR_TLS:
5884					ir_emit_tls(ctx, i, insn);
5885					break;
5886				case IR_TRAP:
5887					|	brk
5888					break;
5889				default:
5890					IR_ASSERT(0 && "NIY rule/instruction");
5891					ir_mem_free(data.emit_constants);
5892					dasm_free(&data.dasm_state);
5893					ctx->data = NULL;
5894					ctx->status = IR_ERROR_UNSUPPORTED_CODE_RULE;
5895					return NULL;
5896			}
5897			n = ir_insn_len(insn);
5898			i += n;
5899			insn += n;
5900			rule += n;
5901		}
5902	}
5903
5904	if (ctx->deoptimization_exits) {
5905		uint32_t exit_table_label = ctx->cfg_blocks_count + 1 + ctx->consts_count + 1 + 1 + 1 + ctx->entries_count;
5906
5907		|=>exit_table_label:
5908		for (i = 0; i < ctx->deoptimization_exits; i++) {
5909			const void *exit_addr = ctx->get_exit_addr(i);
5910
5911			if (!exit_addr) {
5912				ctx->data = NULL;
5913				return 0;
5914			}
5915			|	b &exit_addr
5916		}
5917	}
5918
5919	if (data.rodata_label) {
5920		|.rodata
5921	}
5922	IR_BITSET_FOREACH(data.emit_constants, ir_bitset_len(ctx->consts_count), i) {
5923		insn = &ctx->ir_base[-i];
5924		if (IR_IS_TYPE_FP(insn->type)) {
5925			int label = ctx->cfg_blocks_count + i;
5926
5927			if (!data.rodata_label) {
5928				data.rodata_label = ctx->cfg_blocks_count + ctx->consts_count + 2;
5929
5930				|.rodata
5931				|=>data.rodata_label:
5932			}
5933			if (insn->type == IR_DOUBLE) {
5934				|.align 8
5935				|=>label:
5936				|.long insn->val.u32, insn->val.u32_hi
5937			} else {
5938				IR_ASSERT(insn->type == IR_FLOAT);
5939				|.align 4
5940				|=>label:
5941				|.long insn->val.u32
5942			}
5943		} else if (insn->op == IR_STR) {
5944			int label = ctx->cfg_blocks_count + i;
5945			const char *str = ir_get_str(ctx, insn->val.str);
5946			int i = 0;
5947
5948			if (!data.rodata_label) {
5949				data.rodata_label = ctx->cfg_blocks_count + ctx->consts_count + 2;
5950
5951				|.rodata
5952				|=>data.rodata_label:
5953			}
5954			|.align 8
5955			|=>label:
5956			while (1) {
5957				char c;
5958				uint32_t w = 0;
5959				int j;
5960
5961				for (j = 0; j < 4; j++) {
5962					c = str[i];
5963					if (!c) {
5964						break;
5965					}
5966					w |= c << (8 * j);
5967					i++;
5968				}
5969				|	.long w
5970				if (!c) {
5971					break;
5972				}
5973			}
5974
5975		} else {
5976			IR_ASSERT(0);
5977		}
5978	} IR_BITSET_FOREACH_END();
5979	if (data.rodata_label) {
5980		|.code
5981	}
5982	ir_mem_free(data.emit_constants);
5983
5984	if (ctx->status) {
5985		dasm_free(&data.dasm_state);
5986		ctx->data = NULL;
5987		return NULL;
5988	}
5989
5990	ret = dasm_link(&data.dasm_state, size_ptr);
5991	if (ret != DASM_S_OK) {
5992		IR_ASSERT(0);
5993		dasm_free(&data.dasm_state);
5994		ctx->data = NULL;
5995		ctx->status = IR_ERROR_LINK;
5996		return NULL;
5997	}
5998	size = *size_ptr;
5999
6000	if (ctx->code_buffer) {
6001		entry = ctx->code_buffer->pos;
6002		entry = (void*)IR_ALIGNED_SIZE(((size_t)(entry)), 16);
6003		if (size > (size_t)((char*)ctx->code_buffer->end - (char*)entry)) {
6004			ctx->data = NULL;
6005			ctx->status = IR_ERROR_CODE_MEM_OVERFLOW;
6006			return NULL;
6007		}
6008		ctx->code_buffer->pos = (char*)entry + size;
6009	} else {
6010		entry = ir_mem_mmap(size);
6011		if (!entry) {
6012			dasm_free(&data.dasm_state);
6013			ctx->data = NULL;
6014			ctx->status = IR_ERROR_CODE_MEM_OVERFLOW;
6015			return NULL;
6016		}
6017		ir_mem_unprotect(entry, size);
6018	}
6019
6020	if (ctx->deoptimization_exits) {
6021		uint32_t exit_table_label = ctx->cfg_blocks_count + 1 + ctx->consts_count + 1 + 1 + 1 + ctx->entries_count;
6022
6023		ctx->deoptimization_exits_base = (const void*)((char*)entry + dasm_getpclabel(&data.dasm_state, exit_table_label));
6024	}
6025
6026	ir_current_ctx = ctx;
6027	ret = dasm_encode(&data.dasm_state, entry);
6028	if (ret != DASM_S_OK) {
6029		IR_ASSERT(0);
6030		dasm_free(&data.dasm_state);
6031		if (ctx->code_buffer) {
6032			if (ctx->code_buffer->pos == (char*)entry + size) {
6033				/* rollback */
6034				ctx->code_buffer->pos = (char*)entry - size;
6035			}
6036		} else {
6037			ir_mem_unmap(entry, size);
6038		}
6039		ctx->data = NULL;
6040		ctx->status = IR_ERROR_ENCODE;
6041		return NULL;
6042	}
6043
6044	if (data.jmp_table_label) {
6045		uint32_t offset = dasm_getpclabel(&data.dasm_state, data.jmp_table_label);
6046		ctx->jmp_table_offset = offset;
6047	} else {
6048		ctx->jmp_table_offset = 0;
6049	}
6050	if (data.rodata_label) {
6051		uint32_t offset = dasm_getpclabel(&data.dasm_state, data.rodata_label);
6052		ctx->rodata_offset = offset;
6053	} else {
6054		ctx->rodata_offset = 0;
6055	}
6056
6057	if (ctx->entries_count) {
6058		/* For all entries */
6059		i = ctx->entries_count;
6060		do {
6061			ir_insn *insn = &ctx->ir_base[ctx->entries[--i]];
6062			uint32_t offset = dasm_getpclabel(&data.dasm_state, ctx->cfg_blocks_count + ctx->consts_count + 4 + insn->op3);
6063			insn->op3 = offset;
6064		} while (i != 0);
6065	}
6066
6067	dasm_free(&data.dasm_state);
6068
6069	if (ctx->code_buffer) {
6070		size = (char*)ctx->code_buffer->pos - (char*)entry;
6071	}
6072
6073	ir_mem_flush(entry, size);
6074
6075	if (!ctx->code_buffer) {
6076		ir_mem_protect(entry, size);
6077	}
6078
6079	ctx->data = NULL;
6080	return entry;
6081}
6082
6083const void *ir_emit_exitgroup(uint32_t first_exit_point, uint32_t exit_points_per_group, const void *exit_addr, ir_code_buffer *code_buffer, size_t *size_ptr)
6084{
6085	void *entry;
6086	size_t size;
6087	uint32_t i;
6088	dasm_State **Dst, *dasm_state;
6089	int ret;
6090
6091	IR_ASSERT(code_buffer);
6092	IR_ASSERT(aarch64_may_use_b(code_buffer, exit_addr));
6093
6094	Dst = &dasm_state;
6095	dasm_state = NULL;
6096	dasm_init(&dasm_state, DASM_MAXSECTION);
6097	dasm_setupglobal(&dasm_state, dasm_labels, ir_lb_MAX);
6098	dasm_setup(&dasm_state, dasm_actions);
6099
6100	|	bl >2
6101	|1:
6102	for (i = 1; i < exit_points_per_group; i++) {
6103		|	bl >2
6104	}
6105	|2:
6106	|	adr Rx(IR_REG_INT_TMP), <1
6107	|	sub Rx(IR_REG_INT_TMP), lr, Rx(IR_REG_INT_TMP)
6108	|	lsr Rx(IR_REG_INT_TMP), Rx(IR_REG_INT_TMP), #2
6109	if (first_exit_point) {
6110		|	add Rx(IR_REG_INT_TMP), Rx(IR_REG_INT_TMP), #first_exit_point
6111	}
6112	|	b &exit_addr
6113
6114	ret = dasm_link(&dasm_state, &size);
6115	if (ret != DASM_S_OK) {
6116		IR_ASSERT(0);
6117		dasm_free(&dasm_state);
6118		return NULL;
6119	}
6120
6121	entry = code_buffer->pos;
6122	entry = (void*)IR_ALIGNED_SIZE(((size_t)(entry)), 16);
6123	if (size > (size_t)((char*)code_buffer->end - (char*)entry)) {
6124		return NULL;
6125	}
6126	code_buffer->pos = (char*)entry + size;
6127
6128	ir_current_ctx = NULL;
6129	ret = dasm_encode(&dasm_state, entry);
6130	if (ret != DASM_S_OK) {
6131		IR_ASSERT(0);
6132		dasm_free(&dasm_state);
6133		if (code_buffer->pos == (char*)entry + size) {
6134			/* rollback */
6135			code_buffer->pos = (char*)entry - size;
6136		}
6137		return NULL;
6138	}
6139
6140	dasm_free(&dasm_state);
6141
6142	ir_mem_flush(entry, size);
6143
6144	*size_ptr = size;
6145	return entry;
6146}
6147
6148static int ir_add_veneer(dasm_State *Dst, void *buffer, uint32_t ins, int *b, uint32_t *cp, ptrdiff_t offset)
6149{
6150	ir_ctx *ctx = ir_current_ctx;
6151	const void *addr, *veneer = NULL;
6152	ptrdiff_t na;
6153	int n, m;
6154
6155	IR_ASSERT(ctx && ctx->code_buffer);
6156
6157	if ((ins >> 16) == DASM_REL_A) {
6158		addr = (void*)((((ptrdiff_t)(*(b-1))) << 32) | (unsigned int)(*(b-2)));
6159		if (ctx->get_veneer) {
6160			veneer = ctx->get_veneer(ctx, addr);
6161		}
6162	} else {
6163		IR_ASSERT(0 && "too long jmp distance");
6164		return 0;
6165	}
6166
6167	if (veneer) {
6168		na = (ptrdiff_t)veneer - (ptrdiff_t)cp + 4;
6169		n = (int)na;
6170
6171		/* check if we can jump to veneer */
6172		if ((ptrdiff_t)n != na) {
6173			/* pass */
6174		} else if (!(ins & 0xf800)) {  /* B, BL */
6175			if ((n & 3) == 0 && ((n+0x08000000) >> 28) == 0) {
6176				return n;
6177			}
6178		} else if ((ins & 0x800)) {  /* B.cond, CBZ, CBNZ, LDR* literal */
6179			if ((n & 3) == 0 && ((n+0x00100000) >> 21) == 0) {
6180				return n;
6181			}
6182		} else if ((ins & 0x3000) == 0x2000) {  /* ADR */
6183			/* pass */
6184		} else if ((ins & 0x3000) == 0x3000) {  /* ADRP */
6185			/* pass */
6186		} else if ((ins & 0x1000)) {  /* TBZ, TBNZ */
6187			if ((n & 3) == 0 && ((n+0x00008000) >> 16) == 0) {
6188				return n;
6189			}
6190		}
6191	}
6192
6193	veneer = ctx->code_buffer->pos;
6194	if ((char*)ctx->code_buffer->end - (char*)veneer < 4 ) {
6195		IR_ASSERT(0 && "too long jmp distance" && "jit buffer overflow");
6196		return 0; /* jit_buffer_size overflow */
6197	}
6198
6199	na = (ptrdiff_t)veneer - (ptrdiff_t)cp + 4;
6200	n = (int)na;
6201
6202	/* check if we can jump to veneer */
6203	if ((ptrdiff_t)n != na) {
6204		IR_ASSERT(0 && "too long jmp distance");
6205		return 0;
6206	} else if (!(ins & 0xf800)) {  /* B, BL */
6207		if ((n & 3) != 0 || ((n+0x08000000) >> 28) != 0) {
6208			IR_ASSERT(0 && "too long jmp distance");
6209			return 0;
6210		}
6211	} else if ((ins & 0x800)) {  /* B.cond, CBZ, CBNZ, LDR* literal */
6212		if ((n & 3) != 0 || ((n+0x00100000) >> 21) != 0) {
6213			IR_ASSERT(0 && "too long jmp distance");
6214			return 0;
6215		}
6216	} else if ((ins & 0x3000) == 0x2000) {  /* ADR */
6217		IR_ASSERT(0 && "too long jmp distance");
6218		return 0;
6219	} else if ((ins & 0x3000) == 0x3000) {  /* ADRP */
6220		IR_ASSERT(0 && "too long jmp distance");
6221		return 0;
6222	} else if ((ins & 0x1000)) {  /* TBZ, TBNZ */
6223		if ((n & 3) != 0 || ((n+0x00008000) >> 16) != 0) {
6224			IR_ASSERT(0 && "too long jmp distance");
6225			return 0;
6226		}
6227	} else if ((ins & 0x8000)) {  /* absolute */
6228		IR_ASSERT(0 && "too long jmp distance");
6229		return 0;
6230	} else {
6231		IR_ASSERT(0 && "too long jmp distance");
6232		return 0;
6233	}
6234
6235	/* check if we can use B to jump from veneer */
6236	na = (ptrdiff_t)cp + offset - (ptrdiff_t)veneer - 4;
6237	m = (int)na;
6238	if ((ptrdiff_t)m != na) {
6239		IR_ASSERT(0 && "too long jmp distance");
6240		return 0;
6241	} else if ((m & 3) != 0 || ((m+0x08000000) >> 28) != 0) {
6242		IR_ASSERT(0 && "too long jmp distance");
6243		return 0;
6244	}
6245
6246	if (!ctx->set_veneer || !ctx->set_veneer(ctx, addr, veneer)) {
6247		IR_ASSERT(0 && "too long jmp distance");
6248		return 0;
6249	}
6250
6251	/* generate B instruction */
6252	*(uint32_t*)veneer = 0x14000000 | ((m >> 2) & 0x03ffffff);
6253	ctx->code_buffer->pos = (char*)ctx->code_buffer->pos + 4;
6254
6255	return n;
6256}
6257
6258bool ir_needs_thunk(ir_code_buffer *code_buffer, void *addr)
6259{
6260	return !aarch64_may_use_b(code_buffer, addr);
6261}
6262
6263void *ir_emit_thunk(ir_code_buffer *code_buffer, void *addr, size_t *size_ptr)
6264{
6265	void *entry;
6266	size_t size;
6267	dasm_State **Dst, *dasm_state;
6268	int ret;
6269
6270	Dst = &dasm_state;
6271	dasm_state = NULL;
6272	dasm_init(&dasm_state, DASM_MAXSECTION);
6273	dasm_setupglobal(&dasm_state, dasm_labels, ir_lb_MAX);
6274	dasm_setup(&dasm_state, dasm_actions);
6275
6276	|.code
6277	|	movz Rx(IR_REG_INT_TMP), #((uint64_t)(addr) & 0xffff)
6278	|	movk Rx(IR_REG_INT_TMP), #(((uint64_t)(addr) >> 16) & 0xffff), lsl #16
6279	|	movk Rx(IR_REG_INT_TMP), #(((uint64_t)(addr) >> 32) & 0xffff), lsl #32
6280	|	movk Rx(IR_REG_INT_TMP), #(((uint64_t)(addr) >> 48) & 0xffff), lsl #48
6281	|	br Rx(IR_REG_INT_TMP)
6282
6283	ret = dasm_link(&dasm_state, &size);
6284	if (ret != DASM_S_OK) {
6285		IR_ASSERT(0);
6286		dasm_free(&dasm_state);
6287		return NULL;
6288	}
6289
6290	entry = code_buffer->pos;
6291	entry = (void*)IR_ALIGNED_SIZE(((size_t)(entry)), 4);
6292	if (size > (size_t)((char*)code_buffer->end - (char*)entry)) {
6293		dasm_free(&dasm_state);
6294		return NULL;
6295	}
6296
6297	ret = dasm_encode(&dasm_state, entry);
6298	if (ret != DASM_S_OK) {
6299		dasm_free(&dasm_state);
6300		return NULL;
6301	}
6302
6303	*size_ptr = size;
6304	code_buffer->pos = (char*)code_buffer->pos + size;
6305
6306	dasm_free(&dasm_state);
6307	ir_mem_flush(entry, size);
6308
6309	return entry;
6310}
6311
6312void ir_fix_thunk(void *thunk_entry, void *addr)
6313{
6314	uint32_t *code = thunk_entry;
6315	IR_ASSERT((code[0] & 0xffe00000) == 0xd2800000
6316		&& (code[1] & 0xffe00000) == 0xf2a00000
6317		&& (code[2] & 0xffe00000) == 0xf2c00000
6318		&& (code[3] & 0xffe00000) == 0xf2e00000
6319		&& (code[4] & 0xfffffc1f) == 0xd61f0000);
6320
6321	code[0] = (code[0] & 0xffe0001f) | (uint32_t)((uint64_t)(addr) & 0xffff) << 5;
6322	code[1] = (code[1] & 0xffe0001f) | (uint32_t)(((uint64_t)(addr) >> 16) & 0xffff) << 5;
6323	code[2] = (code[2] & 0xffe0001f) | (uint32_t)(((uint64_t)(addr) >> 32) & 0xffff) << 5;
6324	code[3] = (code[3] & 0xffe0001f) | (uint32_t)(((uint64_t)(addr) >> 48) & 0xffff) << 5;
6325
6326	ir_mem_flush(code, sizeof(uint32_t) * 4);
6327}
6328