xref: /php-src/ext/opcache/jit/ir/ir_aarch64.dasc (revision bad5d2c7)
1/*
2 * IR - Lightweight JIT Compilation Framework
3 * (Aarch64 native code generator based on DynAsm)
4 * Copyright (C) 2022 Zend by Perforce.
5 * Authors: Dmitry Stogov <dmitry@php.net>
6 */
7
8|.arch arm64
9
10|.actionlist dasm_actions
11|.globals ir_lb
12|.section code, cold_code, rodata, jmp_table
13
14|.define IR_LOOP_ALIGNMENT, 8
15
16#ifdef IR_DEBUG
17typedef struct _ir_mem {uint64_t v;} ir_mem;
18
19# define IR_MEM_VAL(loc)            ((loc).v)
20#else
21typedef uint64_t ir_mem;
22
23# define IR_MEM_VAL(loc)            (loc)
24#endif
25
26#define IR_MEM_OFFSET(loc)          ((int32_t)(IR_MEM_VAL(loc) & 0xffffffff))
27#define IR_MEM_BASE(loc)            ((ir_reg)((IR_MEM_VAL(loc) >> 32) & 0xff))
28#define IR_MEM_INDEX(loc)           ((ir_reg)((IR_MEM_VAL(loc) >> 40) & 0xff))
29#define IR_MEM_SHIFT(loc)           ((int32_t)((IR_MEM_VAL(loc) >> 48) & 0xff))
30
31#define IR_MEM_O(addr)            IR_MEM(IR_REG_NONE, addr, IR_REG_NONE, 0)
32#define IR_MEM_B(base)            IR_MEM(base, 0, IR_REG_NONE, 0)
33#define IR_MEM_BO(base, offset)   IR_MEM(base, offset, IR_REG_NONE, 0)
34
35IR_ALWAYS_INLINE ir_mem IR_MEM(ir_reg base, int32_t offset, ir_reg index, int32_t shift)
36{
37	ir_mem mem;
38	IR_ASSERT(base == IR_REG_NONE || (base >= IR_REG_GP_FIRST && base <= IR_REG_GP_LAST));
39	IR_ASSERT(index == IR_REG_NONE || (index >= IR_REG_GP_FIRST && index <= IR_REG_GP_LAST));
40	IR_ASSERT(index == IR_REG_NONE || offset == 0);
41	IR_ASSERT(shift == 0); // TODO: ???
42#ifdef IR_DEBUG
43	mem.v =
44#else
45	mem =
46#endif
47		((uint64_t)(uint32_t)offset |
48		((uint64_t)(uint8_t)base << 32) |
49		((uint64_t)(uint8_t)index << 40) |
50		((uint64_t)(uint8_t)shift << 48));
51	return mem;
52}
53
54#define IR_SPILL_POS_TO_OFFSET(offset) \
55	((ctx->flags & IR_USE_FRAME_POINTER) ? \
56		((offset) + (int32_t)sizeof(void*) * 2) : \
57		((offset) + ctx->call_stack_size))
58
59#define B_IMM           (1<<27)        // signed imm26 * 4
60#define ADR_IMM         (1<<20)        // signed imm21
61#define ADRP_IMM        (1LL<<32)      // signed imm21 * 4096
62
63static bool aarch64_may_use_b(ir_code_buffer *code_buffer, const void *addr)
64{
65	if (code_buffer) {
66		if (addr >= code_buffer->start && (char*)addr < (char*)code_buffer->end) {
67			return (((char*)code_buffer->end - (char*)code_buffer->start) < B_IMM);
68		} else if ((char*)addr >= (char*)code_buffer->end) {
69			return (((char*)addr - (char*)code_buffer->start) < B_IMM);
70		} else if (addr < code_buffer->start) {
71			return (((char*)code_buffer->end - (char*)addr) < B_IMM);
72		}
73	}
74	return 0;
75}
76
77#if 0
78static bool aarch64_may_use_adr(ir_code_buffer *code_buffer, const void *addr)
79{
80	if (code_buffer) {
81		if (addr >= code_buffer->start && (char*)addr < (char*)code_buffer->end) {
82			return (((char*)code_buffer->end - (char*)code_buffer->start) < ADR_IMM);
83		} else if ((char*)addr >= (char*)code_buffer->end) {
84			return (((char*)addr - (char*)code_buffer->start) < ADR_IMM);
85		} else if (addr < code_buffer->start) {
86			return (((char*)code_buffer->end - (char*)addr) < ADR_IMM);
87		}
88	}
89	return 0;
90}
91
92static bool aarch64_may_use_adrp(ir_code_buffer *code_buffer, const void *addr)
93{
94	if (code_buffer) {
95		if (addr >= code_buffer->start && (char*)addr < (char*)code_buffer->end) {
96			return (((char*)code_buffer->end - (char*)code_buffer->start) < ADRP_IMM);
97		} else if ((char*)addr >= (char*)code_buffer->end) {
98			return (((char*)addr - (char*)code_buffer->start) < ADRP_IMM);
99		} else if (addr < code_buffer->start) {
100			return (((char*)code_buffer->end - (char*)addr) < ADRP_IMM);
101		}
102	}
103	return 0;
104}
105#endif
106
107/* Determine whether "val" falls into two allowed ranges:
108 *   Range 1: [0, 0xfff]
109 *   Range 2: LSL #12 to Range 1
110 * Used to guard the immediate encoding for add/adds/sub/subs/cmp/cmn instructions. */
111static bool aarch64_may_encode_imm12(const int64_t val)
112{
113	return (val >= 0 && (val <= 0xfff || !(val & 0xffffffffff000fff)));
114}
115
116/* Determine whether an immediate value can be encoded as the immediate operand of logical instructions. */
117static bool aarch64_may_encode_logical_imm(uint64_t value, uint32_t type_size)
118{
119	/* fast path: power of two */
120	if (value > 0 && !(value & (value - 1))) {
121		return 1;
122	}
123
124	if (type_size == 8) {
125		if (dasm_imm13((uint32_t)value, (uint32_t)(value >> 32)) != -1) {
126			return 1;
127		}
128	} else {
129		if (dasm_imm13((uint32_t)value, (uint32_t)value) != -1) {
130			return 1;
131		}
132	}
133
134	return 0;
135}
136
137static bool aarch64_may_encode_imm7_addr_offset(const int64_t offset, uint32_t type_size)
138{
139	return (uintptr_t)(offset) % type_size == 0
140		&& offset < 63 * (int32_t)type_size
141		&& offset >= -64 * (int32_t)type_size;
142}
143
144static bool aarch64_may_encode_addr_offset(int64_t offset, uint32_t type_size)
145{
146	return (uintptr_t)(offset) % type_size == 0 && (uintptr_t)(offset) < 0xfff * type_size;
147}
148
149|.macro ASM_REG_REG_OP, op, type, dst, src
150||	if (ir_type_size[type] == 8) {
151|		op Rx(dst), Rx(src)
152||	} else {
153|		op Rw(dst), Rw(src)
154||	}
155|.endmacro
156
157|.macro ASM_REG_REG_REG_OP, op, type, dst, src1, src2
158||	if (ir_type_size[type] == 8) {
159|		op Rx(dst), Rx(src1), Rx(src2)
160||	} else {
161|		op Rw(dst), Rw(src1), Rw(src2)
162||	}
163|.endmacro
164
165|.macro ASM_REG_REG_REG_TXT_OP, op, type, dst, src1, src2, txt
166||	if (ir_type_size[type] == 8) {
167|		op Rx(dst), Rx(src1), Rx(src2), txt
168||	} else {
169|		op Rw(dst), Rw(src1), Rw(src2), txt
170||	}
171|.endmacro
172
173|.macro ASM_REG_REG_REG_REG_OP, op, type, dst, src1, src2, src3
174||	if (ir_type_size[type] == 8) {
175|		op Rx(dst), Rx(src1), Rx(src2), Rx(src3)
176||	} else {
177|		op Rw(dst), Rw(src1), Rw(src2), Rw(src3);
178||	}
179|.endmacro
180
181|.macro ASM_REG_REG_IMM_OP, op, type, dst, src1, val
182||	if (ir_type_size[type] == 8) {
183|		op Rx(dst), Rx(src1), #val
184||	} else {
185|		op Rw(dst), Rw(src1), #val
186||	}
187|.endmacro
188
189|.macro ASM_REG_IMM_OP, op, type, reg, val
190||	if (ir_type_size[type] == 8) {
191|		op Rx(reg), #val
192||	} else {
193|		op Rw(reg), #val
194||	}
195|.endmacro
196
197|.macro ASM_FP_REG_IMM_OP, op, type, reg, val
198||	if (type == IR_DOUBLE) {
199|		op Rd(reg-IR_REG_FP_FIRST), #val
200||	} else {
201||		IR_ASSERT(type == IR_FLOAT);
202|		op Rs(reg-IR_REG_FP_FIRST), #val
203||	}
204|.endmacro
205
206|.macro ASM_FP_REG_REG_REG_OP, op, type, dst, src1, src2
207||	if (type == IR_DOUBLE) {
208|		op Rd(dst-IR_REG_FP_FIRST), Rd(src1-IR_REG_FP_FIRST), Rd(src2-IR_REG_FP_FIRST)
209||	} else {
210||		IR_ASSERT(type == IR_FLOAT);
211|		op Rs(dst-IR_REG_FP_FIRST), Rs(src1-IR_REG_FP_FIRST), Rs(src2-IR_REG_FP_FIRST)
212||	}
213|.endmacro
214
215typedef struct _ir_backend_data {
216    ir_reg_alloc_data  ra_data;
217	uint32_t           dessa_from_block;
218	dasm_State        *dasm_state;
219	ir_bitset          emit_constants;
220	int                rodata_label, jmp_table_label;
221} ir_backend_data;
222
223#define IR_GP_REG_NAME(code, name64, name32) \
224	#name64,
225#define IR_GP_REG_NAME32(code, name64, name32) \
226	#name32,
227#define IR_FP_REG_NAME(code, name64, name32, name16, name8) \
228	#name64,
229#define IR_FP_REG_NAME32(code, name64, name32, name16, name8) \
230	#name32,
231
232static const char *_ir_reg_name[IR_REG_NUM] = {
233	IR_GP_REGS(IR_GP_REG_NAME)
234	IR_FP_REGS(IR_FP_REG_NAME)
235};
236
237static const char *_ir_reg_name32[IR_REG_NUM] = {
238	IR_GP_REGS(IR_GP_REG_NAME32)
239	IR_FP_REGS(IR_FP_REG_NAME32)
240};
241
242/* Calling Convention */
243static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS] = {
244	IR_REG_INT_ARG1,
245	IR_REG_INT_ARG2,
246	IR_REG_INT_ARG3,
247	IR_REG_INT_ARG4,
248	IR_REG_INT_ARG5,
249	IR_REG_INT_ARG6,
250	IR_REG_INT_ARG7,
251	IR_REG_INT_ARG8,
252};
253
254static const int8_t _ir_fp_reg_params[IR_REG_FP_ARGS] = {
255	IR_REG_FP_ARG1,
256	IR_REG_FP_ARG2,
257	IR_REG_FP_ARG3,
258	IR_REG_FP_ARG4,
259	IR_REG_FP_ARG5,
260	IR_REG_FP_ARG6,
261	IR_REG_FP_ARG7,
262	IR_REG_FP_ARG8,
263};
264
265const char *ir_reg_name(int8_t reg, ir_type type)
266{
267	if (reg >= IR_REG_NUM) {
268		if (reg == IR_REG_SCRATCH) {
269			return "SCRATCH";
270		} else {
271			IR_ASSERT(reg == IR_REG_ALL);
272			return "ALL";
273		}
274	}
275	IR_ASSERT(reg >= 0 && reg < IR_REG_NUM);
276	if (type == IR_VOID) {
277		type = (reg < IR_REG_FP_FIRST) ? IR_ADDR : IR_DOUBLE;
278	}
279	if (ir_type_size[type] == 8) {
280		return _ir_reg_name[reg];
281	} else {
282		return _ir_reg_name32[reg];
283	}
284}
285
286#define IR_RULES(_)        \
287	_(CMP_INT)             \
288	_(CMP_FP)              \
289	_(MUL_PWR2)            \
290	_(DIV_PWR2)            \
291	_(MOD_PWR2)            \
292	_(SDIV_PWR2)           \
293	_(SMOD_PWR2)           \
294	_(OP_INT)              \
295	_(OP_FP)               \
296	_(BINOP_INT)           \
297	_(BINOP_FP)            \
298	_(SHIFT)               \
299	_(SHIFT_CONST)         \
300	_(COPY_INT)            \
301	_(COPY_FP)             \
302	_(CMP_AND_BRANCH_INT)  \
303	_(CMP_AND_BRANCH_FP)   \
304	_(GUARD_CMP_INT)       \
305	_(GUARD_CMP_FP)        \
306	_(GUARD_OVERFLOW)      \
307	_(OVERFLOW_AND_BRANCH) \
308	_(MIN_MAX_INT)         \
309	_(REG_BINOP_INT)       \
310	_(LOAD_INT)            \
311	_(LOAD_FP)             \
312	_(STORE_INT)           \
313	_(STORE_FP)            \
314	_(IF_INT)              \
315	_(RETURN_VOID)         \
316	_(RETURN_INT)          \
317	_(RETURN_FP)           \
318
319#define IR_RULE_ENUM(name) IR_ ## name,
320
321#define IR_STATIC_ALLOCA   (IR_SKIPPED | IR_FUSED | IR_SIMPLE | IR_ALLOCA)
322
323enum _ir_rule {
324	IR_FIRST_RULE = IR_LAST_OP,
325	IR_RULES(IR_RULE_ENUM)
326	IR_LAST_RULE
327};
328
329#define IR_RULE_NAME(name)  #name,
330const char *ir_rule_name[IR_LAST_OP] = {
331	NULL,
332	IR_RULES(IR_RULE_NAME)
333	NULL
334};
335
336/* register allocation */
337int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *constraints)
338{
339	uint32_t rule = ir_rule(ctx, ref);
340	const ir_insn *insn;
341	int n = 0;
342	int flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG;
343
344	constraints->def_reg = IR_REG_NONE;
345	constraints->hints_count = 0;
346	switch (rule & IR_RULE_MASK) {
347		case IR_BINOP_INT:
348			insn = &ctx->ir_base[ref];
349			n = 0;
350			if (IR_IS_CONST_REF(insn->op1)) {
351				constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
352				n++;
353			} else if (ir_rule(ctx, insn->op1) == IR_STATIC_ALLOCA) {
354				constraints->tmp_regs[n] = IR_TMP_REG(1, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
355				n++;
356			}
357			if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) {
358				const ir_insn *val_insn = &ctx->ir_base[insn->op2];
359				switch (insn->op) {
360					case IR_ADD:
361					case IR_ADD_OV:
362					case IR_SUB:
363					case IR_SUB_OV:
364						if (IR_IS_SYM_CONST(val_insn->op) || !aarch64_may_encode_imm12(val_insn->val.u64)) {
365							constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
366							n++;
367						}
368						break;
369					case IR_MUL_OV:
370						constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF);
371						n++;
372						break;
373					case IR_AND:
374					case IR_OR:
375					case IR_XOR:
376						if (IR_IS_SYM_CONST(val_insn->op) || !aarch64_may_encode_logical_imm(val_insn->val.u64, ir_type_size[insn->type])) {
377							constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
378							n++;
379						}
380						break;
381					case IR_MUL:
382					case IR_DIV:
383					case IR_MOD:
384						constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
385						n++;
386						break;
387				}
388			} else if (ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) {
389				constraints->tmp_regs[n] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
390				n++;
391			}
392			if (insn->op == IR_MOD) {
393				constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
394				n++;
395			} else if (insn->op == IR_MUL_OV && (ir_type_size[insn->type] == 8 || IR_IS_TYPE_SIGNED(insn->type))) {
396				constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF);
397				n++;
398			}
399			break;
400		case IR_MUL_PWR2:
401		case IR_DIV_PWR2:
402		case IR_MOD_PWR2:
403		case IR_SHIFT:
404		case IR_SHIFT_CONST:
405		case IR_OP_INT:
406		case IR_OP_FP:
407		case IR_INT2FP:
408		case IR_FP2INT:
409		case IR_FP2FP:
410			insn = &ctx->ir_base[ref];
411			n = 0;
412			if (IR_IS_CONST_REF(insn->op1)) {
413				constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
414				n++;
415			}
416			if (rule == IR_SHIFT_CONST
417			 && (insn->op == IR_ROL || insn->op == IR_ROR)
418			 && ir_type_size[insn->type] < 4) {
419				constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
420				n++;
421			} else if (rule == IR_SHIFT
422			 && (insn->op == IR_ROL || insn->op == IR_ROR)
423			 && ir_type_size[insn->type] < 4) {
424				if (insn->op == IR_ROL) {
425					flags |= IR_DEF_CONFLICTS_WITH_INPUT_REGS;
426				}
427				constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF);
428				n++;
429			} else if (rule == IR_SHIFT && insn->op == IR_ROL) {
430				constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
431				n++;
432			}
433			break;
434		case IR_SDIV_PWR2:
435			flags = IR_DEF_CONFLICTS_WITH_INPUT_REGS | IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG;
436			insn = &ctx->ir_base[ref];
437			n = 0;
438			if (IR_IS_CONST_REF(insn->op1)) {
439				constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
440				n++;
441			}
442			if (IR_IS_CONST_REF(insn->op2)) {
443				int64_t offset = ctx->ir_base[insn->op2].val.u64 - 1;
444				if (!aarch64_may_encode_imm12(offset)) {
445					constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
446					n++;
447				}
448			}
449			break;
450		case IR_SMOD_PWR2:
451			flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG;
452			insn = &ctx->ir_base[ref];
453			n = 0;
454			if (IR_IS_CONST_REF(insn->op1)) {
455				constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
456				n++;
457			}
458			constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_USE_SUB_REF, IR_SAVE_SUB_REF);
459			n++;
460			break;
461		case IR_CTPOP:
462			flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG;
463			insn = &ctx->ir_base[ref];
464			constraints->tmp_regs[0] = IR_TMP_REG(2, IR_DOUBLE, IR_USE_SUB_REF, IR_SAVE_SUB_REF);
465			n = 1;
466			break;
467		case IR_BINOP_FP:
468		case IR_MIN_MAX_INT:
469			insn = &ctx->ir_base[ref];
470			n = 0;
471			if (IR_IS_CONST_REF(insn->op1)) {
472				constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
473				n++;
474			}
475			if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) {
476				constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
477				n++;
478			}
479			break;
480		case IR_CMP_INT:
481			insn = &ctx->ir_base[ref];
482			n = 0;
483			if (IR_IS_CONST_REF(insn->op1)) {
484				constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
485				n++;
486			} else if (ir_rule(ctx, insn->op1) == IR_STATIC_ALLOCA) {
487				constraints->tmp_regs[0] = IR_TMP_REG(1, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
488				n = 1;
489			}
490			if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) {
491				insn = &ctx->ir_base[insn->op2];
492				if (IR_IS_SYM_CONST(insn->op) || !aarch64_may_encode_imm12(insn->val.u64)) {
493					constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
494					n++;
495				}
496			} else if (ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) {
497				constraints->tmp_regs[n] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
498				n++;
499			}
500			break;
501		case IR_CMP_FP:
502			insn = &ctx->ir_base[ref];
503			n = 0;
504			if (IR_IS_CONST_REF(insn->op1)) {
505				const ir_insn *val_insn = &ctx->ir_base[insn->op1];
506				constraints->tmp_regs[n] = IR_TMP_REG(1, val_insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
507				n++;
508			}
509			if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) {
510				const ir_insn *val_insn = &ctx->ir_base[insn->op2];
511				constraints->tmp_regs[n] = IR_TMP_REG(2, val_insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
512				n++;
513			}
514			break;
515		case IR_VSTORE:
516			insn = &ctx->ir_base[ref];
517			if (IR_IS_CONST_REF(insn->op3)) {
518				insn = &ctx->ir_base[insn->op3];
519				constraints->tmp_regs[0] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
520				n = 1;
521			} else if (ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA) {
522				constraints->tmp_regs[0] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
523				n = 1;
524			}
525			break;
526		case IR_LOAD_FP:
527			insn = &ctx->ir_base[ref];
528			n = 0;
529			if (IR_IS_CONST_REF(insn->op2)) {
530				IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR);
531				constraints->tmp_regs[n] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
532				n++;
533			}
534			break;
535		case IR_STORE_INT:
536		case IR_STORE_FP:
537			insn = &ctx->ir_base[ref];
538			n = 0;
539			if (IR_IS_CONST_REF(insn->op2)) {
540				IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR);
541				constraints->tmp_regs[n] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
542				n++;
543			}
544			if (IR_IS_CONST_REF(insn->op3)) {
545				insn = &ctx->ir_base[insn->op3];
546				if (!IR_IS_TYPE_INT(insn->type) || IR_IS_SYM_CONST(insn->op) || insn->val.i64 != 0) {
547					constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
548					n++;
549				}
550			} else if (ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA) {
551				constraints->tmp_regs[n] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
552				n++;
553			}
554			break;
555		case IR_SWITCH:
556			insn = &ctx->ir_base[ref];
557			n = 0;
558			if (IR_IS_CONST_REF(insn->op2)) {
559				insn = &ctx->ir_base[insn->op2];
560				constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
561				n++;
562			} else {
563				insn = &ctx->ir_base[insn->op2];
564				constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
565				n++;
566			}
567			constraints->tmp_regs[n] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
568			n++;
569			break;
570		case IR_CALL:
571			insn = &ctx->ir_base[ref];
572			constraints->def_reg = (IR_IS_TYPE_INT(insn->type)) ? IR_REG_INT_RET1 : IR_REG_FP_RET1;
573			constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_SCRATCH, IR_USE_SUB_REF, IR_DEF_SUB_REF);
574			n = 1;
575			IR_FALLTHROUGH;
576		case IR_TAILCALL:
577			insn = &ctx->ir_base[ref];
578			if (insn->inputs_count > 2) {
579				constraints->hints[2] = IR_REG_NONE;
580				constraints->hints_count = ir_get_args_regs(ctx, insn, constraints->hints);
581				if (!IR_IS_CONST_REF(insn->op2)) {
582					constraints->tmp_regs[n] = IR_TMP_REG(1, IR_ADDR, IR_LOAD_SUB_REF, IR_USE_SUB_REF);
583					n++;
584				}
585			}
586			flags = IR_USE_SHOULD_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_SHOULD_BE_IN_REG;
587			break;
588		case IR_COND:
589			insn = &ctx->ir_base[ref];
590			n = 0;
591			if (IR_IS_CONST_REF(insn->op1)) {
592				constraints->tmp_regs[n] = IR_TMP_REG(1, ctx->ir_base[insn->op1].type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
593				n++;
594			}
595			if (IR_IS_CONST_REF(insn->op2)) {
596				constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
597				n++;
598			}
599			if (IR_IS_CONST_REF(insn->op3)) {
600				constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
601				n++;
602			}
603			break;
604		case IR_COPY_INT:
605		case IR_COPY_FP:
606		case IR_TRUNC:
607		case IR_BITCAST:
608		case IR_PROTO:
609			flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG;
610			break;
611		case IR_ZEXT:
612		case IR_SEXT:
613			flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG;
614			break;
615		case IR_PARAM:
616			constraints->def_reg = ir_get_param_reg(ctx, ref);
617			flags = 0;
618			break;
619		case IR_PI:
620		case IR_PHI:
621			flags = IR_USE_SHOULD_BE_IN_REG;
622			break;
623		case IR_RLOAD:
624			constraints->def_reg = ctx->ir_base[ref].op2;
625			flags = IR_USE_SHOULD_BE_IN_REG;
626			break;
627		case IR_EXITCALL:
628			constraints->def_reg = IR_REG_INT_RET1;
629			break;
630		case IR_RSTORE:
631			flags = IR_OP3_SHOULD_BE_IN_REG;
632			break;
633		case IR_RETURN_INT:
634			flags = IR_OP2_SHOULD_BE_IN_REG;
635			constraints->hints[2] = IR_REG_INT_RET1;
636			constraints->hints_count = 3;
637			break;
638		case IR_RETURN_FP:
639			flags = IR_OP2_SHOULD_BE_IN_REG;
640			constraints->hints[2] = IR_REG_FP_RET1;
641			constraints->hints_count = 3;
642			break;
643		case IR_SNAPSHOT:
644			flags = 0;
645			break;
646		case IR_VA_START:
647			flags = IR_OP2_MUST_BE_IN_REG;
648			constraints->tmp_regs[0] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
649			n = 1;
650			break;
651		case IR_VA_ARG:
652			flags = IR_USE_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG;
653			constraints->tmp_regs[0] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF);
654			n = 1;
655			break;
656		case IR_VA_COPY:
657			flags = IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG;
658			constraints->tmp_regs[0] = IR_TMP_REG(1, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF);
659			n = 1;
660			break;
661	}
662	constraints->tmps_count = n;
663
664	return flags;
665}
666
667/* instruction selection */
668static void ir_match_fuse_addr(ir_ctx *ctx, ir_ref addr_ref, ir_type type)
669{
670	if (!IR_IS_CONST_REF(addr_ref)) {
671		ir_insn *addr_insn = &ctx->ir_base[addr_ref];
672
673		if (addr_insn->op == IR_ADD
674		 && !IR_IS_CONST_REF(addr_insn->op1)
675		 && IR_IS_CONST_REF(addr_insn->op2)  // TODO: temporary workaround
676		 && !IR_IS_SYM_CONST(ctx->ir_base[addr_insn->op2].op)
677		 && aarch64_may_encode_addr_offset(ctx->ir_base[addr_insn->op2].val.i64, ir_type_size[type])) {
678			ir_use_list *use_list = &ctx->use_lists[addr_ref];
679			ir_ref j = use_list->count;
680
681			if (j > 1) {
682				/* check if address is used only in LOAD and STORE */
683				ir_ref *p = &ctx->use_edges[use_list->refs];
684
685				do {
686					ir_insn *insn = &ctx->ir_base[*p];
687					if (insn->op != IR_LOAD && (insn->op != IR_STORE || insn->op3 == addr_ref)) {
688						return;
689					}
690					p++;
691				} while (--j);
692			}
693			ctx->rules[addr_ref] = IR_FUSED | IR_SIMPLE | addr_insn->op;
694		}
695	}
696}
697
698static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref)
699{
700	ir_insn *op2_insn;
701	ir_insn *insn = &ctx->ir_base[ref];
702
703	switch (insn->op) {
704		case IR_EQ:
705		case IR_NE:
706		case IR_LT:
707		case IR_GE:
708		case IR_LE:
709		case IR_GT:
710		case IR_ULT:
711		case IR_UGE:
712		case IR_ULE:
713		case IR_UGT:
714			if (IR_IS_TYPE_INT(ctx->ir_base[insn->op1].type)) {
715				return IR_CMP_INT;
716			} else {
717				return IR_CMP_FP;
718			}
719			break;
720		case IR_ADD:
721		case IR_SUB:
722			if (IR_IS_TYPE_INT(insn->type)) {
723				if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) {
724					op2_insn = &ctx->ir_base[insn->op2];
725					if (IR_IS_SYM_CONST(op2_insn->op)) {
726						/* pass */
727					} else if (IR_IS_CONST_REF(insn->op1)) {
728						// const
729					} else if (op2_insn->val.i64 == 0) {
730						// return IR_COPY_INT;
731					}
732				}
733binop_int:
734				return IR_BINOP_INT;
735			} else {
736binop_fp:
737				return IR_BINOP_FP;
738			}
739			break;
740		case IR_MUL:
741			if (IR_IS_TYPE_INT(insn->type)) {
742				if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) {
743					op2_insn = &ctx->ir_base[insn->op2];
744					if (IR_IS_SYM_CONST(op2_insn->op)) {
745						/* pass */
746					} else if (IR_IS_CONST_REF(insn->op1)) {
747						// const
748					} else if (op2_insn->val.u64 == 0) {
749						// 0
750					} else if (op2_insn->val.u64 == 1) {
751						// return IR_COPY_INT;
752					} else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) {
753						return IR_MUL_PWR2;
754					}
755				}
756				return IR_BINOP_INT;
757			} else {
758				goto binop_fp;
759			}
760			break;
761		case IR_ADD_OV:
762		case IR_SUB_OV:
763			IR_ASSERT(IR_IS_TYPE_INT(insn->type));
764			goto binop_int;
765		case IR_MUL_OV:
766			IR_ASSERT(IR_IS_TYPE_INT(insn->type));
767			goto binop_int;
768		case IR_DIV:
769			if (IR_IS_TYPE_INT(insn->type)) {
770				if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) {
771					op2_insn = &ctx->ir_base[insn->op2];
772					if (IR_IS_SYM_CONST(op2_insn->op)) {
773						/* pass */
774					} else if (IR_IS_CONST_REF(insn->op1)) {
775						// const
776					} else if (op2_insn->val.u64 == 1) {
777						// return IR_COPY_INT;
778					} else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) {
779						if (IR_IS_TYPE_UNSIGNED(insn->type)) {
780							return IR_DIV_PWR2;
781						} else {
782							return IR_SDIV_PWR2;
783						}
784					}
785				}
786				return IR_BINOP_INT;
787			} else {
788				goto binop_fp;
789			}
790			break;
791		case IR_MOD:
792			if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) {
793				op2_insn = &ctx->ir_base[insn->op2];
794				if (IR_IS_SYM_CONST(op2_insn->op)) {
795					/* pass */
796				} else if (IR_IS_CONST_REF(insn->op1)) {
797					// const
798				} else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) {
799					if (IR_IS_TYPE_UNSIGNED(insn->type)) {
800						return IR_MOD_PWR2;
801					} else {
802						return IR_SMOD_PWR2;
803					}
804				}
805			}
806			return IR_BINOP_INT;
807		case IR_BSWAP:
808		case IR_NOT:
809		case IR_CTLZ:
810		case IR_CTTZ:
811			IR_ASSERT(IR_IS_TYPE_INT(insn->type));
812			return IR_OP_INT;
813		case IR_NEG:
814		case IR_ABS:
815			if (IR_IS_TYPE_INT(insn->type)) {
816				return IR_OP_INT;
817			} else {
818				return IR_OP_FP;
819			}
820		case IR_OR:
821			if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) {
822				op2_insn = &ctx->ir_base[insn->op2];
823				if (IR_IS_SYM_CONST(op2_insn->op)) {
824					/* pass */
825				} else if (IR_IS_CONST_REF(insn->op1)) {
826					// const
827				} else if (op2_insn->val.i64 == 0) {
828					// return IR_COPY_INT;
829				} else if (op2_insn->val.i64 == -1) {
830					// -1
831				}
832			}
833			goto binop_int;
834		case IR_AND:
835			if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) {
836				op2_insn = &ctx->ir_base[insn->op2];
837				if (IR_IS_SYM_CONST(op2_insn->op)) {
838					/* pass */
839				} else if (IR_IS_CONST_REF(insn->op1)) {
840					// const
841				} else if (op2_insn->val.i64 == 0) {
842					// 0
843				} else if (op2_insn->val.i64 == -1) {
844					// return IR_COPY_INT;
845				}
846			}
847			goto binop_int;
848		case IR_XOR:
849			if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) {
850				op2_insn = &ctx->ir_base[insn->op2];
851				if (IR_IS_SYM_CONST(op2_insn->op)) {
852					/* pass */
853				} else if (IR_IS_CONST_REF(insn->op1)) {
854					// const
855				}
856			}
857			goto binop_int;
858		case IR_SHL:
859			if (IR_IS_CONST_REF(insn->op2)) {
860				if (ctx->flags & IR_OPT_CODEGEN) {
861					op2_insn = &ctx->ir_base[insn->op2];
862					if (IR_IS_SYM_CONST(op2_insn->op)) {
863						/* pass */
864					} else if (IR_IS_CONST_REF(insn->op1)) {
865						// const
866					} else if (op2_insn->val.u64 == 0) {
867						// return IR_COPY_INT;
868					} else if (ir_type_size[insn->type] >= 4) {
869						if (op2_insn->val.u64 == 1) {
870							// lea [op1*2]
871						} else if (op2_insn->val.u64 == 2) {
872							// lea [op1*4]
873						} else if (op2_insn->val.u64 == 3) {
874							// lea [op1*8]
875						}
876					}
877				}
878				return IR_SHIFT_CONST;
879			}
880			return IR_SHIFT;
881		case IR_SHR:
882		case IR_SAR:
883		case IR_ROL:
884		case IR_ROR:
885			if (IR_IS_CONST_REF(insn->op2)) {
886				if (ctx->flags & IR_OPT_CODEGEN) {
887					op2_insn = &ctx->ir_base[insn->op2];
888					if (IR_IS_SYM_CONST(op2_insn->op)) {
889						/* pass */
890					} else if (IR_IS_CONST_REF(insn->op1)) {
891						// const
892					} else if (op2_insn->val.u64 == 0) {
893						// return IR_COPY_INT;
894					}
895				}
896				return IR_SHIFT_CONST;
897			}
898			return IR_SHIFT;
899		case IR_MIN:
900		case IR_MAX:
901			if (IR_IS_TYPE_INT(insn->type)) {
902				return IR_MIN_MAX_INT;
903			} else {
904				goto binop_fp;
905			}
906			break;
907//		case IR_COND:
908		case IR_COPY:
909			if (IR_IS_TYPE_INT(insn->type)) {
910				return IR_COPY_INT | IR_MAY_REUSE;
911			} else {
912				return IR_COPY_FP | IR_MAY_REUSE;
913			}
914			break;
915//		case IR_TRUNC:
916		case IR_PROTO:
917			return insn->op | IR_MAY_REUSE;
918		case IR_BITCAST:
919			if (IR_IS_TYPE_INT(insn->type) && IR_IS_TYPE_INT(ctx->ir_base[insn->op1].type)) {
920				return insn->op | IR_MAY_REUSE;
921			} else {
922				return insn->op;
923			}
924		case IR_CALL:
925			if (ctx->flags & IR_FUNCTION) {
926				ctx->flags |= IR_USE_FRAME_POINTER;
927			}
928			ctx->flags2 |= IR_HAS_CALLS | IR_16B_FRAME_ALIGNMENT;
929			return IR_CALL;
930		case IR_VAR:
931			return IR_SKIPPED | IR_VAR;
932		case IR_PARAM:
933			return ctx->use_lists[ref].count > 0 ? IR_PARAM : IR_SKIPPED | IR_PARAM;
934		case IR_ALLOCA:
935			if (ctx->flags & IR_FUNCTION) {
936				if (IR_IS_CONST_REF(insn->op2) && ctx->cfg_map[ref] == 1) {
937					ir_insn *val = &ctx->ir_base[insn->op2];
938
939					if (!IR_IS_SYM_CONST(val->op)) {
940						return IR_STATIC_ALLOCA;
941					}
942				}
943				ctx->flags |= IR_USE_FRAME_POINTER;
944				ctx->flags2 |= IR_HAS_ALLOCA | IR_16B_FRAME_ALIGNMENT;
945			}
946			return IR_ALLOCA;
947		case IR_LOAD:
948			ir_match_fuse_addr(ctx, insn->op2, insn->type);
949			if (IR_IS_TYPE_INT(insn->type)) {
950				return IR_LOAD_INT;
951			} else {
952				return IR_LOAD_FP;
953			}
954			break;
955		case IR_STORE:
956			ir_match_fuse_addr(ctx, insn->op2, ctx->ir_base[insn->op3].type);
957			if (IR_IS_TYPE_INT(ctx->ir_base[insn->op3].type)) {
958				return IR_STORE_INT;
959			} else {
960				return IR_STORE_FP;
961			}
962			break;
963		case IR_RLOAD:
964			if (IR_REGSET_IN(IR_REGSET_UNION((ir_regset)ctx->fixed_regset, IR_REGSET_FIXED), insn->op2)) {
965				return IR_SKIPPED | IR_RLOAD;
966			}
967			return IR_RLOAD;
968		case IR_RSTORE:
969			if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) {
970				if ((ctx->flags & IR_OPT_CODEGEN) && ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) {
971					ir_insn *op_insn = &ctx->ir_base[insn->op2];
972
973					if (!ctx->rules[insn->op2]) {
974						ctx->rules[insn->op2] = ir_match_insn(ctx, insn->op2);
975					}
976					if (ctx->rules[insn->op2] == IR_BINOP_INT) {
977						if (ctx->ir_base[op_insn->op1].op == IR_RLOAD
978						 && ctx->ir_base[op_insn->op1].op2 == insn->op3) {
979							ctx->rules[insn->op2] = IR_FUSED | IR_BINOP_INT;
980							ctx->rules[op_insn->op1] = IR_SKIPPED | IR_RLOAD;
981							return IR_REG_BINOP_INT;
982						} else if ((ir_op_flags[op_insn->op] & IR_OP_FLAG_COMMUTATIVE)
983						 && ctx->ir_base[op_insn->op2].op == IR_RLOAD
984						 && ctx->ir_base[op_insn->op2].op2 == insn->op3) {
985							SWAP_REFS(op_insn->op1, op_insn->op2);
986							ctx->rules[insn->op2] = IR_FUSED | IR_BINOP_INT;
987							ctx->rules[op_insn->op1] = IR_SKIPPED | IR_RLOAD;
988							return IR_REG_BINOP_INT;
989						}
990					}
991				}
992			}
993			return IR_RSTORE;
994		case IR_START:
995		case IR_BEGIN:
996		case IR_IF_TRUE:
997		case IR_IF_FALSE:
998		case IR_CASE_VAL:
999		case IR_CASE_DEFAULT:
1000		case IR_MERGE:
1001		case IR_LOOP_BEGIN:
1002		case IR_UNREACHABLE:
1003			return IR_SKIPPED | insn->op;
1004		case IR_RETURN:
1005			if (!insn->op2) {
1006				return IR_RETURN_VOID;
1007			} else if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) {
1008				return IR_RETURN_INT;
1009			} else {
1010				return IR_RETURN_FP;
1011			}
1012		case IR_IF:
1013			if (ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) {
1014				op2_insn = &ctx->ir_base[insn->op2];
1015				if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT) {
1016					if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) {
1017						ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT;
1018						return IR_CMP_AND_BRANCH_INT;
1019					} else {
1020						ctx->rules[insn->op2] = IR_FUSED | IR_CMP_FP;
1021						return IR_CMP_AND_BRANCH_FP;
1022					}
1023				} else if (op2_insn->op == IR_OVERFLOW) {
1024					ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_OVERFLOW;
1025					return IR_OVERFLOW_AND_BRANCH;
1026				}
1027			}
1028			if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) {
1029				return IR_IF_INT;
1030			} else {
1031				IR_ASSERT(0 && "NIY IR_IF_FP");
1032				break;
1033			}
1034		case IR_GUARD:
1035		case IR_GUARD_NOT:
1036			if (ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) {
1037				op2_insn = &ctx->ir_base[insn->op2];
1038				if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT
1039					// TODO: register allocator may clobber operands of CMP before they are used in the GUARD_CMP
1040				 && (insn->op2 == ref - 1 ||
1041				     (insn->op2 == ctx->prev_ref[ref] - 1
1042				   && ctx->ir_base[ctx->prev_ref[ref]].op == IR_SNAPSHOT))) {
1043					if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) {
1044						ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT;
1045						return IR_GUARD_CMP_INT;
1046					} else {
1047						ctx->rules[insn->op2] = IR_FUSED | IR_CMP_FP;
1048						return IR_GUARD_CMP_FP;
1049					}
1050				} else if (op2_insn->op == IR_OVERFLOW) {
1051					ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_OVERFLOW;
1052					return IR_GUARD_OVERFLOW;
1053				}
1054			}
1055			return insn->op;
1056		case IR_VA_START:
1057			ctx->flags2 |= IR_HAS_VA_START;
1058			if ((ctx->ir_base[insn->op2].op == IR_ALLOCA) || (ctx->ir_base[insn->op2].op == IR_VADDR)) {
1059				ir_use_list *use_list = &ctx->use_lists[insn->op2];
1060				ir_ref *p, n = use_list->count;
1061				for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
1062					ir_insn *use_insn = &ctx->ir_base[*p];
1063					if (use_insn->op == IR_VA_START || use_insn->op == IR_VA_END) {
1064					} else if (use_insn->op == IR_VA_COPY) {
1065						if (use_insn->op3 == insn->op2) {
1066							ctx->flags2 |= IR_HAS_VA_COPY;
1067						}
1068					} else if (use_insn->op == IR_VA_ARG) {
1069						if (use_insn->op2 == insn->op2) {
1070							if (IR_IS_TYPE_INT(use_insn->type)) {
1071								ctx->flags2 |= IR_HAS_VA_ARG_GP;
1072							} else {
1073								IR_ASSERT(IR_IS_TYPE_FP(use_insn->type));
1074								ctx->flags2 |= IR_HAS_VA_ARG_FP;
1075							}
1076						}
1077					} else if (*p > ref) {
1078						/* diriect va_list access */
1079						ctx->flags2 |= IR_HAS_VA_ARG_GP|IR_HAS_VA_ARG_FP;
1080					}
1081				}
1082			}
1083			return IR_VA_START;
1084		case IR_VA_END:
1085			return IR_SKIPPED | IR_NOP;
1086		case IR_VADDR:
1087			if (ctx->use_lists[ref].count > 0) {
1088				ir_use_list *use_list = &ctx->use_lists[ref];
1089				ir_ref *p, n = use_list->count;
1090
1091				for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) {
1092					if (ctx->ir_base[*p].op != IR_VA_END) {
1093						return IR_STATIC_ALLOCA;
1094					}
1095				}
1096			}
1097			return IR_SKIPPED | IR_NOP;
1098		default:
1099			break;
1100	}
1101
1102	return insn->op;
1103}
1104
1105static void ir_match_insn2(ir_ctx *ctx, ir_ref ref, uint32_t rule)
1106{
1107}
1108
1109/* code generation */
1110static int32_t ir_ref_spill_slot_offset(ir_ctx *ctx, ir_ref ref, ir_reg *reg)
1111{
1112	int32_t offset;
1113
1114	IR_ASSERT(ref >= 0);
1115	offset = ctx->live_intervals[ctx->vregs[ref]]->stack_spill_pos;
1116	IR_ASSERT(offset != -1);
1117	if (ctx->live_intervals[ctx->vregs[ref]]->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) {
1118		IR_ASSERT(ctx->spill_base != IR_REG_NONE);
1119		*reg = ctx->spill_base;
1120		return offset;
1121	}
1122	*reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
1123	return IR_SPILL_POS_TO_OFFSET(offset);
1124}
1125
1126static ir_mem ir_vreg_spill_slot(ir_ctx *ctx, ir_ref v)
1127{
1128	int32_t offset;
1129	ir_reg base;
1130
1131	IR_ASSERT(v > 0 && v <= ctx->vregs_count && ctx->live_intervals[v]);
1132	offset = ctx->live_intervals[v]->stack_spill_pos;
1133	IR_ASSERT(offset != -1);
1134	if (ctx->live_intervals[v]->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) {
1135		IR_ASSERT(ctx->spill_base != IR_REG_NONE);
1136		return IR_MEM_BO(ctx->spill_base, offset);
1137	}
1138	base = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
1139	offset = IR_SPILL_POS_TO_OFFSET(offset);
1140	return IR_MEM_BO(base, offset);
1141}
1142
1143static ir_mem ir_ref_spill_slot(ir_ctx *ctx, ir_ref ref)
1144{
1145	IR_ASSERT(!IR_IS_CONST_REF(ref));
1146	return ir_vreg_spill_slot(ctx, ctx->vregs[ref]);
1147}
1148
1149static bool ir_is_same_spill_slot(ir_ctx *ctx, ir_ref ref, ir_mem mem)
1150{
1151	return IR_MEM_VAL(ir_ref_spill_slot(ctx, ref)) == IR_MEM_VAL(mem);
1152}
1153
1154static int32_t ir_var_spill_slot(ir_ctx *ctx, ir_ref ref, ir_reg *reg)
1155{
1156	ir_insn *var_insn = &ctx->ir_base[ref];
1157
1158	IR_ASSERT(var_insn->op == IR_VAR);
1159	*reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
1160	return IR_SPILL_POS_TO_OFFSET(var_insn->op3);
1161}
1162
1163static bool ir_may_avoid_spill_load(ir_ctx *ctx, ir_ref ref, ir_ref use)
1164{
1165	ir_live_interval *ival;
1166
1167	IR_ASSERT(ctx->vregs[ref] && ctx->live_intervals[ctx->vregs[ref]]);
1168	ival = ctx->live_intervals[ctx->vregs[ref]];
1169	while (ival) {
1170		ir_use_pos *use_pos = ival->use_pos;
1171		while (use_pos) {
1172			if (IR_LIVE_POS_TO_REF(use_pos->pos) == use) {
1173				return !use_pos->next || use_pos->next->op_num == 0;
1174			}
1175			use_pos = use_pos->next;
1176		}
1177		ival = ival->next;
1178	}
1179	return 0;
1180}
1181
1182static void ir_emit_load_imm_int(ir_ctx *ctx, ir_type type, ir_reg reg, int64_t val)
1183{
1184	ir_backend_data *data = ctx->data;
1185	dasm_State **Dst = &data->dasm_state;
1186
1187	IR_ASSERT(IR_IS_TYPE_INT(type));
1188	if (ir_type_size[type] == 8) {
1189		if (val == 0) {
1190			if (reg != IR_REG_ZR) {
1191				|	mov Rx(reg), xzr
1192			}
1193		} else if (((uint64_t)(val)) <= 0xffff) {
1194			|	movz Rx(reg), #((uint64_t)(val))
1195		} else if (~((uint64_t)(val)) <= 0xffff) {
1196			|	movn Rx(reg), #(~((uint64_t)(val)))
1197		} else if ((uint64_t)(val) & 0xffff) {
1198			|	movz Rx(reg), #((uint64_t)(val) & 0xffff)
1199			if (((uint64_t)(val) >> 16) & 0xffff) {
1200				|	movk Rx(reg), #(((uint64_t)(val) >> 16) & 0xffff), lsl #16
1201			}
1202			if (((uint64_t)(val) >> 32) & 0xffff) {
1203				|	movk Rx(reg), #(((uint64_t)(val) >> 32) & 0xffff), lsl #32
1204			}
1205			if ((((uint64_t)(val) >> 48) & 0xffff)) {
1206				|	movk Rx(reg), #(((uint64_t)(val) >> 48) & 0xffff), lsl #48
1207			}
1208		} else if (((uint64_t)(val) >> 16) & 0xffff) {
1209			|	movz Rx(reg), #(((uint64_t)(val) >> 16) & 0xffff), lsl #16
1210			if (((uint64_t)(val) >> 32) & 0xffff) {
1211				|	movk Rx(reg), #(((uint64_t)(val) >> 32) & 0xffff), lsl #32
1212			}
1213			if ((((uint64_t)(val) >> 48) & 0xffff)) {
1214				|	movk Rx(reg), #(((uint64_t)(val) >> 48) & 0xffff), lsl #48
1215			}
1216		} else if (((uint64_t)(val) >> 32) & 0xffff) {
1217				|	movz Rx(reg), #(((uint64_t)(val) >> 32) & 0xffff), lsl #32
1218			if ((((uint64_t)(val) >> 48) & 0xffff)) {
1219				|	movk Rx(reg), #(((uint64_t)(val) >> 48) & 0xffff), lsl #48
1220			}
1221		} else {
1222			|	movz Rx(reg), #(((uint64_t)(val) >> 48) & 0xffff), lsl #48
1223		}
1224	} else {
1225		if (val == 0) {
1226			if (reg != IR_REG_ZR) {
1227				|	mov Rw(reg), wzr
1228			}
1229		} else if (((uint64_t)(val)) <= 0xffff) {
1230			|	movz Rw(reg), #((uint64_t)(val))
1231		} else if (~((uint64_t)(val)) <= 0xffff) {
1232			|	movn Rw(reg), #(~((uint64_t)(val)))
1233		} else if ((uint64_t)(val) & 0xffff) {
1234			|	movz Rw(reg), #((uint64_t)(val) & 0xffff)
1235			if (((uint64_t)(val) >> 16) & 0xffff) {
1236				|	movk Rw(reg), #(((uint64_t)(val) >> 16) & 0xffff), lsl #16
1237			}
1238		} else if (((uint64_t)(val) >> 16) & 0xffff) {
1239			|	movz Rw(reg), #(((uint64_t)(val) >> 16) & 0xffff), lsl #16
1240		}
1241	}
1242}
1243
1244static void ir_emit_load_mem_int(ir_ctx *ctx, ir_type type, ir_reg reg, ir_mem mem)
1245{
1246	ir_backend_data *data = ctx->data;
1247	dasm_State **Dst = &data->dasm_state;
1248	ir_reg base_reg = IR_MEM_BASE(mem);
1249	ir_reg index_reg = IR_MEM_INDEX(mem);
1250	int32_t offset = IR_MEM_OFFSET(mem);
1251
1252	if (index_reg == IR_REG_NONE) {
1253		if (aarch64_may_encode_addr_offset(offset, ir_type_size[type])) {
1254			switch (ir_type_size[type]) {
1255				default:
1256					IR_ASSERT(0);
1257				case 8:
1258					|	ldr Rx(reg), [Rx(base_reg), #offset]
1259					break;
1260				case 4:
1261					|	ldr Rw(reg), [Rx(base_reg), #offset]
1262					break;
1263				case 2:
1264					if (IR_IS_TYPE_SIGNED(type)) {
1265						|	ldrsh Rw(reg), [Rx(base_reg), #offset]
1266					} else {
1267						|	ldrh Rw(reg), [Rx(base_reg), #offset]
1268					}
1269					break;
1270				case 1:
1271					if (IR_IS_TYPE_SIGNED(type)) {
1272						|	ldrsb Rw(reg), [Rx(base_reg), #offset]
1273					} else {
1274						|	ldrb Rw(reg), [Rx(base_reg), #offset]
1275					}
1276					break;
1277			}
1278			return;
1279		} else {
1280			index_reg = IR_REG_INT_TMP; /* reserved temporary register */
1281
1282			ir_emit_load_imm_int(ctx, IR_ADDR, index_reg, offset);
1283		}
1284	} else {
1285		IR_ASSERT(offset == 0);
1286	}
1287
1288	switch (ir_type_size[type]) {
1289		default:
1290			IR_ASSERT(0);
1291		case 8:
1292			|	ldr Rx(reg), [Rx(base_reg), Rx(index_reg)]
1293			break;
1294		case 4:
1295			|	ldr Rw(reg), [Rx(base_reg), Rx(index_reg)]
1296			break;
1297		case 2:
1298			if (IR_IS_TYPE_SIGNED(type)) {
1299				|	ldrsh Rw(reg), [Rx(base_reg), Rx(index_reg)]
1300			} else {
1301				|	ldrh Rw(reg), [Rx(base_reg), Rx(index_reg)]
1302			}
1303			break;
1304		case 1:
1305			if (IR_IS_TYPE_SIGNED(type)) {
1306				|	ldrsb Rw(reg), [Rx(base_reg), Rx(index_reg)]
1307			} else {
1308				|	ldrb Rw(reg), [Rx(base_reg), Rx(index_reg)]
1309			}
1310			break;
1311	}
1312}
1313
1314static void ir_emit_load_imm_fp(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src)
1315{
1316	ir_backend_data *data = ctx->data;
1317	dasm_State **Dst = &data->dasm_state;
1318	ir_insn *insn = &ctx->ir_base[src];
1319	int label;
1320
1321	if (type == IR_FLOAT && insn->val.u32 == 0) {
1322		|	fmov Rs(reg-IR_REG_FP_FIRST), wzr
1323	} else if (type == IR_DOUBLE && insn->val.u64 == 0) {
1324		|	fmov Rd(reg-IR_REG_FP_FIRST), xzr
1325	} else {
1326		label = ir_const_label(ctx, src);
1327		if (type == IR_DOUBLE) {
1328			|	ldr Rd(reg-IR_REG_FP_FIRST), =>label
1329		} else {
1330			IR_ASSERT(type == IR_FLOAT);
1331			|	ldr Rs(reg-IR_REG_FP_FIRST), =>label
1332		}
1333	}
1334}
1335
1336static void ir_emit_load_mem_fp(ir_ctx *ctx, ir_type type, ir_reg reg, ir_mem mem)
1337{
1338	ir_backend_data *data = ctx->data;
1339	dasm_State **Dst = &data->dasm_state;
1340	ir_reg base_reg = IR_MEM_BASE(mem);
1341	ir_ref index_reg = IR_MEM_INDEX(mem);
1342	int32_t offset = IR_MEM_OFFSET(mem);
1343
1344	if (index_reg == IR_REG_NONE) {
1345		if (aarch64_may_encode_addr_offset(offset, ir_type_size[type])) {
1346			if (type == IR_DOUBLE) {
1347				|	ldr Rd(reg-IR_REG_FP_FIRST), [Rx(base_reg), #offset]
1348			} else {
1349				IR_ASSERT(type == IR_FLOAT);
1350				|	ldr Rs(reg-IR_REG_FP_FIRST), [Rx(base_reg), #offset]
1351			}
1352		} else {
1353			index_reg = IR_REG_INT_TMP; /* reserved temporary register */
1354
1355			ir_emit_load_imm_int(ctx, IR_ADDR, index_reg, offset);
1356		}
1357		return;
1358	} else {
1359		IR_ASSERT(offset == 0);
1360	}
1361
1362	if (type == IR_DOUBLE) {
1363		|	ldr Rd(reg-IR_REG_FP_FIRST), [Rx(base_reg), Rx(index_reg)]
1364	} else {
1365		IR_ASSERT(type == IR_FLOAT);
1366		|	ldr Rs(reg-IR_REG_FP_FIRST), [Rx(base_reg), Rx(index_reg)]
1367	}
1368}
1369
1370static void ir_emit_load_mem(ir_ctx *ctx, ir_type type, ir_reg reg, ir_mem mem)
1371{
1372	if (IR_IS_TYPE_INT(type)) {
1373		ir_emit_load_mem_int(ctx, type, reg, mem);
1374	} else {
1375		ir_emit_load_mem_fp(ctx, type, reg, mem);
1376	}
1377}
1378
1379static void ir_load_local_addr(ir_ctx *ctx, ir_reg reg, ir_ref src)
1380{
1381	ir_backend_data *data = ctx->data;
1382	dasm_State **Dst = &data->dasm_state;
1383	ir_reg base = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
1384	int32_t offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[src].op3);
1385
1386	IR_ASSERT(ir_rule(ctx, src) == IR_STATIC_ALLOCA);
1387	if (aarch64_may_encode_imm12(offset)) {
1388		|	add Rx(reg), Rx(base), #offset
1389	} else {
1390		ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset);
1391		|	add sp, sp, Rx(IR_REG_INT_TMP)
1392	}
1393}
1394
1395
1396static void ir_emit_load(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src)
1397{
1398	if (IR_IS_CONST_REF(src)) {
1399		if (IR_IS_TYPE_INT(type)) {
1400			ir_insn *insn = &ctx->ir_base[src];
1401
1402			if (insn->op == IR_SYM || insn->op == IR_FUNC) {
1403				void *addr = ir_sym_val(ctx, insn);
1404				IR_ASSERT(addr);
1405				ir_emit_load_imm_int(ctx, type, reg, (intptr_t)addr);
1406			} else if (insn->op == IR_STR) {
1407				ir_backend_data *data = ctx->data;
1408				dasm_State **Dst = &data->dasm_state;
1409				int label = ir_const_label(ctx, src);
1410
1411				|	adr Rx(reg), =>label
1412			} else {
1413				ir_emit_load_imm_int(ctx, type, reg, insn->val.i64);
1414			}
1415		} else {
1416			ir_emit_load_imm_fp(ctx, type, reg, src);
1417		}
1418	} else if (ctx->vregs[src]) {
1419		ir_mem mem = ir_ref_spill_slot(ctx, src);
1420		ir_emit_load_mem(ctx, type, reg, mem);
1421	} else {
1422		ir_load_local_addr(ctx, reg, src);
1423	}
1424}
1425
1426static void ir_emit_store_mem_int(ir_ctx *ctx, ir_type type, ir_mem mem, ir_reg reg)
1427{
1428	ir_backend_data *data = ctx->data;
1429	dasm_State **Dst = &data->dasm_state;
1430	ir_reg base_reg = IR_MEM_BASE(mem);
1431	ir_reg index_reg = IR_MEM_INDEX(mem);
1432	int32_t offset = IR_MEM_OFFSET(mem);
1433
1434	if (index_reg == IR_REG_NONE) {
1435		if (aarch64_may_encode_addr_offset(offset, ir_type_size[type])) {
1436			switch (ir_type_size[type]) {
1437				default:
1438					IR_ASSERT(0);
1439				case 8:
1440					|	str Rx(reg), [Rx(base_reg), #offset]
1441					break;
1442				case 4:
1443					|	str Rw(reg), [Rx(base_reg), #offset]
1444					break;
1445				case 2:
1446					|	strh Rw(reg), [Rx(base_reg), #offset]
1447					break;
1448				case 1:
1449					|	strb Rw(reg), [Rx(base_reg), #offset]
1450					break;
1451			}
1452			return;
1453		} else {
1454			index_reg = IR_REG_INT_TMP; /* reserved temporary register */
1455
1456			ir_emit_load_imm_int(ctx, IR_ADDR, index_reg, offset);
1457	    }
1458	} else {
1459		IR_ASSERT(offset == 0);
1460	}
1461
1462	switch (ir_type_size[type]) {
1463		default:
1464			IR_ASSERT(0);
1465		case 8:
1466			|	str Rx(reg), [Rx(base_reg), Rx(index_reg)]
1467			break;
1468		case 4:
1469			|	str Rw(reg), [Rx(base_reg), Rx(index_reg)]
1470			break;
1471		case 2:
1472			|	strh Rw(reg), [Rx(base_reg), Rx(index_reg)]
1473			break;
1474		case 1:
1475			|	strb Rw(reg), [Rx(base_reg), Rx(index_reg)]
1476			break;
1477	}
1478}
1479
1480static void ir_emit_store_mem_fp(ir_ctx *ctx, ir_type type, ir_mem mem, ir_reg reg)
1481{
1482	ir_backend_data *data = ctx->data;
1483	dasm_State **Dst = &data->dasm_state;
1484	ir_reg base_reg = IR_MEM_BASE(mem);
1485	ir_reg index_reg = IR_MEM_INDEX(mem);
1486	int32_t offset = IR_MEM_OFFSET(mem);
1487
1488	if (index_reg == IR_REG_NONE) {
1489		if (aarch64_may_encode_addr_offset(offset, ir_type_size[type])) {
1490			if (type == IR_DOUBLE) {
1491				|	str Rd(reg-IR_REG_FP_FIRST), [Rx(base_reg), #offset]
1492			} else {
1493				IR_ASSERT(type == IR_FLOAT);
1494				|	str Rs(reg-IR_REG_FP_FIRST), [Rx(base_reg), #offset]
1495			}
1496		} else {
1497			index_reg = IR_REG_INT_TMP; /* reserved temporary register */
1498
1499			ir_emit_load_imm_int(ctx, IR_ADDR, index_reg, offset);
1500		}
1501		return;
1502	} else {
1503		IR_ASSERT(offset == 0);
1504	}
1505
1506	if (type == IR_DOUBLE) {
1507		|	str Rd(reg-IR_REG_FP_FIRST), [Rx(base_reg), Rx(index_reg)]
1508	} else {
1509		IR_ASSERT(type == IR_FLOAT);
1510		|	str Rs(reg-IR_REG_FP_FIRST), [Rx(base_reg), Rx(index_reg)]
1511	}
1512}
1513
1514static void ir_emit_store_mem(ir_ctx *ctx, ir_type type, ir_mem mem, ir_reg reg)
1515{
1516	if (IR_IS_TYPE_INT(type)) {
1517		ir_emit_store_mem_int(ctx, type, mem, reg);
1518	} else {
1519		ir_emit_store_mem_fp(ctx, type, mem, reg);
1520	}
1521}
1522
1523static void ir_emit_store(ir_ctx *ctx, ir_type type, ir_ref dst, ir_reg reg)
1524{
1525	IR_ASSERT(dst >= 0);
1526	ir_emit_store_mem(ctx, type, ir_ref_spill_slot(ctx, dst), reg);
1527}
1528
1529static void ir_emit_mov(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src)
1530{
1531	ir_backend_data *data = ctx->data;
1532	dasm_State **Dst = &data->dasm_state;
1533
1534	if (ir_type_size[type] == 8) {
1535		if (dst == IR_REG_STACK_POINTER) {
1536			|	mov sp, Rx(src)
1537		} else if (src == IR_REG_STACK_POINTER) {
1538			|	mov Rx(dst), sp
1539		} else {
1540			|	mov Rx(dst), Rx(src)
1541		}
1542	} else {
1543		|	mov Rw(dst), Rw(src)
1544	}
1545}
1546
1547static void ir_emit_mov_ext(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src)
1548{
1549	ir_backend_data *data = ctx->data;
1550	dasm_State **Dst = &data->dasm_state;
1551
1552	if (ir_type_size[type] == 8) {
1553		|	mov Rx(dst), Rx(src)
1554	} else {
1555		|	mov Rw(dst), Rw(src)
1556	}
1557}
1558static void ir_emit_fp_mov(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src)
1559{
1560	ir_backend_data *data = ctx->data;
1561	dasm_State **Dst = &data->dasm_state;
1562
1563	if (ir_type_size[type] == 8) {
1564		|	fmov Rd(dst-IR_REG_FP_FIRST), Rd(src-IR_REG_FP_FIRST)
1565	} else {
1566		|	fmov Rs(dst-IR_REG_FP_FIRST), Rs(src-IR_REG_FP_FIRST)
1567	}
1568}
1569
1570static void ir_emit_prologue(ir_ctx *ctx)
1571{
1572	ir_backend_data *data = ctx->data;
1573	dasm_State **Dst = &data->dasm_state;
1574	int offset;
1575
1576	if (ctx->flags & IR_USE_FRAME_POINTER) {
1577		offset = -(ctx->stack_frame_size+16);
1578		if (aarch64_may_encode_imm7_addr_offset(offset, 8)) {
1579			|	stp x29, x30, [sp, #offset]!
1580		} else {
1581			|	sub sp, sp, #(ctx->stack_frame_size+16)
1582			|	stp x29, x30, [sp]
1583		}
1584		|	mov x29, sp
1585		if (ctx->call_stack_size) {
1586			| sub sp, sp, #(ctx->call_stack_size)
1587		}
1588	} else if (ctx->stack_frame_size + ctx->call_stack_size) {
1589		if (ctx->fixed_stack_red_zone) {
1590			IR_ASSERT(ctx->stack_frame_size + ctx->call_stack_size <= ctx->fixed_stack_red_zone);
1591		} else {
1592			| sub sp, sp, #(ctx->stack_frame_size + ctx->call_stack_size)
1593		}
1594	}
1595	if (ctx->used_preserved_regs) {
1596	    ir_reg fp;
1597		uint32_t i;
1598		ir_reg prev = IR_REG_NONE;
1599		ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs;
1600
1601		if (ctx->flags & IR_USE_FRAME_POINTER) {
1602			fp = IR_REG_FRAME_POINTER;
1603			offset = ctx->stack_frame_size + sizeof(void*) * 2;
1604		} else {
1605			fp = IR_REG_STACK_POINTER;
1606			offset = ctx->stack_frame_size + ctx->call_stack_size;
1607		}
1608		for (i = 0; i < IR_REG_NUM; i++) {
1609			if (IR_REGSET_IN(used_preserved_regs, i)) {
1610			    if (prev == IR_REG_NONE) {
1611					prev = i;
1612				} else if (i < IR_REG_FP_FIRST) {
1613					offset -= sizeof(void*) * 2;
1614					if (aarch64_may_encode_imm7_addr_offset(offset, 8)) {
1615						|	stp Rx(prev), Rx(i), [Rx(fp), #offset]
1616					} else {
1617						IR_ASSERT(aarch64_may_encode_addr_offset(offset, 8));
1618						|	str Rx(prev), [Rx(fp), #offset]
1619						|	str Rx(i), [Rx(fp), #(offset+8)]
1620					}
1621					prev = IR_REG_NONE;
1622				} else {
1623					if (prev < IR_REG_FP_FIRST) {
1624						offset -= sizeof(void*);
1625						|	str Rx(prev), [Rx(fp), #offset]
1626						offset -= sizeof(void*);
1627						|	str Rd(i-IR_REG_FP_FIRST), [Rx(fp), #offset]
1628					} else {
1629						offset -= sizeof(void*) * 2;
1630						if (aarch64_may_encode_imm7_addr_offset(offset, 8)) {
1631							|	stp Rd(prev-IR_REG_FP_FIRST), Rd(i-IR_REG_FP_FIRST), [Rx(fp), #offset]
1632						} else {
1633							IR_ASSERT(aarch64_may_encode_addr_offset(offset, 8));
1634							|	str Rd(prev-IR_REG_FP_FIRST), [Rx(fp), #offset]
1635							|	str Rd(i-IR_REG_FP_FIRST), [Rx(fp), #(offset+8)]
1636						}
1637					}
1638					prev = IR_REG_NONE;
1639				}
1640			}
1641		}
1642	    if (prev != IR_REG_NONE) {
1643			if (prev < IR_REG_FP_FIRST) {
1644				offset -= sizeof(void*);
1645				|	str Rx(prev), [Rx(fp), #offset]
1646			} else {
1647				offset -= sizeof(void*);
1648				|	str Rd(prev-IR_REG_FP_FIRST), [Rx(fp), #offset]
1649			}
1650		}
1651	}
1652	if ((ctx->flags & IR_VARARG_FUNC) && (ctx->flags2 & IR_HAS_VA_START)) {
1653#ifndef __APPLE__
1654		const int8_t *int_reg_params = _ir_int_reg_params;
1655		const int8_t *fp_reg_params = _ir_fp_reg_params;
1656		ir_reg fp;
1657		int offset;
1658		int i;
1659
1660		if (ctx->flags & IR_USE_FRAME_POINTER) {
1661			fp = IR_REG_FRAME_POINTER;
1662
1663			offset = ctx->locals_area_size + sizeof(void*) * 2;
1664		} else {
1665			fp = IR_REG_STACK_POINTER;
1666			offset = ctx->locals_area_size + ctx->call_stack_size;
1667		}
1668
1669		if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) {
1670			ir_reg prev = IR_REG_NONE;
1671
1672			/* skip named args */
1673			offset += sizeof(void*) * ctx->gp_reg_params;
1674			for (i = ctx->gp_reg_params; i < IR_REG_INT_ARGS; i++) {
1675				if (prev != IR_REG_NONE) {
1676					if (aarch64_may_encode_imm7_addr_offset(offset, 8)) {
1677						|	stp Rx(prev), Rx(int_reg_params[i]), [Rx(fp), #offset]
1678					} else {
1679						IR_ASSERT(aarch64_may_encode_addr_offset(offset, 8));
1680						|	str Rx(prev), [Rx(fp), #offset]
1681						|	str Rx(int_reg_params[i]), [Rx(fp), #(offset+8)]
1682					}
1683					prev = IR_REG_NONE;
1684					offset += sizeof(void*) * 2;
1685				} else {
1686					prev = int_reg_params[i];
1687				}
1688			}
1689			if (prev != IR_REG_NONE) {
1690				|	str Rx(prev), [Rx(fp), #offset]
1691				offset += sizeof(void*);
1692			}
1693		}
1694		if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) {
1695			/* skip named args */
1696			offset += 16 * ctx->fp_reg_params;
1697			for (i = ctx->fp_reg_params; i < IR_REG_FP_ARGS; i++) {
1698				// TODO: Rd->Rq stur->str ???
1699				|	str Rd(fp_reg_params[i]-IR_REG_FP_FIRST), [Rx(fp), #offset]
1700				offset += 16;
1701			}
1702		}
1703#endif
1704	}
1705}
1706
1707static void ir_emit_epilogue(ir_ctx *ctx)
1708{
1709	ir_backend_data *data = ctx->data;
1710	dasm_State **Dst = &data->dasm_state;
1711
1712	if (ctx->used_preserved_regs) {
1713		int offset;
1714		uint32_t i;
1715		ir_reg prev = IR_REG_NONE;
1716		ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
1717		ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs;
1718
1719		if (ctx->flags & IR_USE_FRAME_POINTER) {
1720			offset = ctx->stack_frame_size + sizeof(void*) * 2;
1721		} else {
1722			offset = ctx->stack_frame_size + ctx->call_stack_size;
1723		}
1724		for (i = 0; i < IR_REG_NUM; i++) {
1725			if (IR_REGSET_IN(used_preserved_regs, i)) {
1726			    if (prev == IR_REG_NONE) {
1727					prev = i;
1728				} else if (i < IR_REG_FP_FIRST) {
1729					offset -= sizeof(void*) * 2;
1730					if (aarch64_may_encode_imm7_addr_offset(offset, 8)) {
1731						|	ldp Rx(prev), Rx(i), [Rx(fp), #offset]
1732					} else {
1733						IR_ASSERT(aarch64_may_encode_addr_offset(offset, 8));
1734						|	ldr Rx(prev), [Rx(fp), #offset]
1735						|	ldr Rx(i), [Rx(fp), #(offset+8)]
1736					}
1737					prev = IR_REG_NONE;
1738				} else {
1739					if (prev < IR_REG_FP_FIRST) {
1740						offset -= sizeof(void*);
1741						|	ldr Rx(prev), [Rx(fp), #offset]
1742						offset -= sizeof(void*);
1743						|	ldr Rd(i-IR_REG_FP_FIRST), [Rx(fp), #offset]
1744					} else {
1745						offset -= sizeof(void*) * 2;
1746						if (aarch64_may_encode_imm7_addr_offset(offset, 8)) {
1747							|	ldp Rd(prev-IR_REG_FP_FIRST), Rd(i-IR_REG_FP_FIRST), [Rx(fp), #offset]
1748						} else {
1749							IR_ASSERT(aarch64_may_encode_addr_offset(offset, 8));
1750							|	ldr Rd(prev-IR_REG_FP_FIRST), [Rx(fp), #offset]
1751							|	ldr Rd(i-IR_REG_FP_FIRST), [Rx(fp), #(offset+8)]
1752						}
1753					}
1754					prev = IR_REG_NONE;
1755				}
1756			}
1757		}
1758	    if (prev != IR_REG_NONE) {
1759			if (prev < IR_REG_FP_FIRST) {
1760				offset -= sizeof(void*);
1761				|	ldr Rx(prev), [Rx(fp), #offset]
1762			} else {
1763				offset -= sizeof(void*);
1764				|	ldr Rd(prev-IR_REG_FP_FIRST), [Rx(fp), #offset]
1765			}
1766		}
1767	}
1768
1769	if (ctx->flags & IR_USE_FRAME_POINTER) {
1770		if (ctx->call_stack_size || (ctx->flags2 & IR_HAS_ALLOCA)) {
1771			| mov sp, x29
1772		}
1773		if (aarch64_may_encode_imm7_addr_offset(ctx->stack_frame_size+16, 8)) {
1774			|	ldp x29, x30, [sp], #(ctx->stack_frame_size+16)
1775		} else {
1776			|	ldp x29, x30, [sp]
1777			|	add sp, sp, #(ctx->stack_frame_size+16)
1778		}
1779	} else if (ctx->stack_frame_size + ctx->call_stack_size) {
1780		if (ctx->fixed_stack_red_zone) {
1781			IR_ASSERT(ctx->stack_frame_size + ctx->call_stack_size <= ctx->fixed_stack_red_zone);
1782		} else {
1783			| add sp, sp, #(ctx->stack_frame_size + ctx->call_stack_size)
1784		}
1785	}
1786}
1787
1788static void ir_emit_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
1789{
1790	ir_backend_data *data = ctx->data;
1791	dasm_State **Dst = &data->dasm_state;
1792	ir_type type = insn->type;
1793	ir_ref op1 = insn->op1;
1794	ir_ref op2 = insn->op2;
1795	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
1796	ir_reg op1_reg = ctx->regs[def][1];
1797	ir_reg op2_reg = ctx->regs[def][2];
1798	ir_reg tmp_reg;
1799
1800	IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE);
1801
1802	if (IR_REG_SPILLED(op1_reg)) {
1803		op1_reg = IR_REG_NUM(op1_reg);
1804		ir_emit_load(ctx, type, op1_reg, op1);
1805	}
1806	if (op2_reg != IR_REG_NONE) {
1807		if (IR_REG_SPILLED(op2_reg)) {
1808			op2_reg = IR_REG_NUM(op2_reg);
1809			if (op1 != op2) {
1810				ir_emit_load(ctx, type, op2_reg, op2);
1811			}
1812		}
1813		switch (insn->op) {
1814			default:
1815				IR_ASSERT(0 && "NIY binary op");
1816			case IR_ADD:
1817				|	ASM_REG_REG_REG_OP add, type, def_reg, op1_reg, op2_reg
1818				break;
1819			case IR_ADD_OV:
1820				|	ASM_REG_REG_REG_OP adds, type, def_reg, op1_reg, op2_reg
1821				break;
1822			case IR_SUB:
1823				|	ASM_REG_REG_REG_OP sub, type, def_reg, op1_reg, op2_reg
1824				break;
1825			case IR_SUB_OV:
1826				|	ASM_REG_REG_REG_OP subs, type, def_reg, op1_reg, op2_reg
1827				break;
1828			case IR_MUL:
1829				|	ASM_REG_REG_REG_OP mul, type, def_reg, op1_reg, op2_reg
1830				break;
1831			case IR_MUL_OV:
1832				if (ir_type_size[type] == 8) {
1833					if (IR_IS_TYPE_SIGNED(type)) {
1834						tmp_reg = ctx->regs[def][3];
1835						IR_ASSERT(tmp_reg != IR_REG_NONE);
1836						|	smulh Rx(tmp_reg), Rx(op1_reg), Rx(op2_reg)
1837						|	mul Rx(def_reg), Rx(op1_reg), Rx(op2_reg)
1838						|	cmp Rx(tmp_reg), Rx(def_reg), asr #63
1839					} else {
1840						tmp_reg = ctx->regs[def][3];
1841						IR_ASSERT(tmp_reg != IR_REG_NONE);
1842						|	umulh Rx(tmp_reg), Rx(op1_reg), Rx(op2_reg)
1843						|	mul Rx(def_reg), Rx(op1_reg), Rx(op2_reg)
1844						|	cmp Rx(tmp_reg), xzr
1845					}
1846				} else {
1847					if (IR_IS_TYPE_SIGNED(type)) {
1848						tmp_reg = ctx->regs[def][3];
1849						IR_ASSERT(tmp_reg != IR_REG_NONE);
1850						|	smull Rx(def_reg), Rw(op1_reg), Rw(op2_reg)
1851						|	asr Rx(tmp_reg), Rx(def_reg), #32
1852						|	cmp Rx(tmp_reg), Rx(def_reg), asr #31
1853					} else {
1854						|	umull Rx(def_reg), Rw(op1_reg), Rw(op2_reg)
1855						|	cmp xzr, Rx(def_reg), lsr #32
1856					}
1857				}
1858				break;
1859			case IR_DIV:
1860				if (IR_IS_TYPE_SIGNED(type)) {
1861					|	ASM_REG_REG_REG_OP sdiv, type, def_reg, op1_reg, op2_reg
1862				} else {
1863					|	ASM_REG_REG_REG_OP udiv, type, def_reg, op1_reg, op2_reg
1864				}
1865				break;
1866			case IR_MOD:
1867				tmp_reg = ctx->regs[def][3];
1868				IR_ASSERT(tmp_reg != IR_REG_NONE);
1869				if (IR_IS_TYPE_SIGNED(type)) {
1870					|	ASM_REG_REG_REG_OP sdiv, type, tmp_reg, op1_reg, op2_reg
1871					|	ASM_REG_REG_REG_REG_OP msub, type, def_reg, tmp_reg, op2_reg, op1_reg
1872				} else {
1873					|	ASM_REG_REG_REG_OP udiv, type, tmp_reg, op1_reg, op2_reg
1874					|	ASM_REG_REG_REG_REG_OP msub, type, def_reg, tmp_reg, op2_reg, op1_reg
1875				}
1876				break;
1877			case IR_OR:
1878				|	ASM_REG_REG_REG_OP orr, type, def_reg, op1_reg, op2_reg
1879				break;
1880			case IR_AND:
1881				|	ASM_REG_REG_REG_OP and, type, def_reg, op1_reg, op2_reg
1882				break;
1883			case IR_XOR:
1884				|	ASM_REG_REG_REG_OP eor, type, def_reg, op1_reg, op2_reg
1885				break;
1886		}
1887	} else {
1888		IR_ASSERT(IR_IS_CONST_REF(op2));
1889		IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[op2].op));
1890		int32_t val = ctx->ir_base[op2].val.i32;
1891		switch (insn->op) {
1892			default:
1893				IR_ASSERT(0 && "NIY binary op");
1894			case IR_ADD:
1895				|	ASM_REG_REG_IMM_OP add, type, def_reg, op1_reg, val
1896				break;
1897			case IR_ADD_OV:
1898				|	ASM_REG_REG_IMM_OP adds, type, def_reg, op1_reg, val
1899				break;
1900			case IR_SUB:
1901				|	ASM_REG_REG_IMM_OP sub, type, def_reg, op1_reg, val
1902				break;
1903			case IR_SUB_OV:
1904				|	ASM_REG_REG_IMM_OP subs, type, def_reg, op1_reg, val
1905				break;
1906			case IR_OR:
1907				if (ir_type_size[type] == 8) {
1908					uint64_t val = ctx->ir_base[op2].val.u64;
1909					|	ASM_REG_REG_IMM_OP orr, type, def_reg, op1_reg, val
1910				} else {
1911					|	ASM_REG_REG_IMM_OP orr, type, def_reg, op1_reg, val
1912				}
1913				break;
1914			case IR_AND:
1915				if (ir_type_size[type] == 8) {
1916					uint64_t val = ctx->ir_base[op2].val.u64;
1917					|	ASM_REG_REG_IMM_OP and, type, def_reg, op1_reg, val
1918				} else {
1919					|	ASM_REG_REG_IMM_OP and, type, def_reg, op1_reg, val
1920				}
1921				break;
1922			case IR_XOR:
1923				if (ir_type_size[type] == 8) {
1924					uint64_t val = ctx->ir_base[op2].val.u64;
1925					|	ASM_REG_REG_IMM_OP eor, type, def_reg, op1_reg, val
1926				} else {
1927					|	ASM_REG_REG_IMM_OP eor, type, def_reg, op1_reg, val
1928				}
1929				break;
1930		}
1931	}
1932	if (IR_REG_SPILLED(ctx->regs[def][0])) {
1933		ir_emit_store(ctx, type, def, def_reg);
1934	}
1935}
1936
1937static void ir_emit_min_max_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
1938{
1939	ir_backend_data *data = ctx->data;
1940	dasm_State **Dst = &data->dasm_state;
1941	ir_type type = insn->type;
1942	ir_ref op1 = insn->op1;
1943	ir_ref op2 = insn->op2;
1944	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
1945	ir_reg op1_reg = ctx->regs[def][1];
1946	ir_reg op2_reg = ctx->regs[def][2];
1947
1948	IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE);
1949
1950	if (IR_REG_SPILLED(op1_reg)) {
1951		op1_reg = IR_REG_NUM(op1_reg);
1952		ir_emit_load(ctx, type, op1_reg, op1);
1953	}
1954	if (IR_REG_SPILLED(op2_reg)) {
1955		op2_reg = IR_REG_NUM(op2_reg);
1956		ir_emit_load(ctx, type, op2_reg, op2);
1957	}
1958
1959	if (op1 == op2) {
1960		return;
1961	}
1962
1963	if (ir_type_size[type] == 8) {
1964		|	cmp Rx(op1_reg), Rx(op2_reg)
1965		if (insn->op == IR_MIN) {
1966			if (IR_IS_TYPE_SIGNED(type)) {
1967				|	csel Rx(def_reg), Rx(op1_reg), Rx(op2_reg), le
1968			} else {
1969				|	csel Rx(def_reg), Rx(op1_reg), Rx(op2_reg), ls
1970			}
1971		} else {
1972			IR_ASSERT(insn->op == IR_MAX);
1973			if (IR_IS_TYPE_SIGNED(type)) {
1974				|	csel Rx(def_reg), Rx(op1_reg), Rx(op2_reg), ge
1975			} else {
1976				|	csel Rx(def_reg), Rx(op1_reg), Rx(op2_reg), hs
1977			}
1978		}
1979	} else {
1980		|	cmp Rw(op1_reg), Rw(op2_reg)
1981		if (insn->op == IR_MIN) {
1982			if (IR_IS_TYPE_SIGNED(type)) {
1983				|	csel Rw(def_reg), Rw(op1_reg), Rw(op2_reg), le
1984			} else {
1985				|	csel Rw(def_reg), Rw(op1_reg), Rw(op2_reg), ls
1986			}
1987		} else {
1988			IR_ASSERT(insn->op == IR_MAX);
1989			if (IR_IS_TYPE_SIGNED(type)) {
1990				|	csel Rw(def_reg), Rw(op1_reg), Rw(op2_reg), ge
1991			} else {
1992				|	csel Rw(def_reg), Rw(op1_reg), Rw(op2_reg), hs
1993			}
1994		}
1995	}
1996
1997	if (IR_REG_SPILLED(ctx->regs[def][0])) {
1998		ir_emit_store(ctx, type, def, def_reg);
1999	}
2000}
2001
2002static void ir_emit_overflow(ir_ctx *ctx, ir_ref def, ir_insn *insn)
2003{
2004	ir_backend_data *data = ctx->data;
2005	dasm_State **Dst = &data->dasm_state;
2006	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
2007	ir_insn *math_insn = &ctx->ir_base[insn->op1];
2008	ir_type type = math_insn->type;
2009
2010	IR_ASSERT(def_reg != IR_REG_NONE);
2011	IR_ASSERT(IR_IS_TYPE_INT(type));
2012	if (math_insn->op == IR_MUL_OV) {
2013		|	cset Rw(def_reg), ne
2014	} else if (IR_IS_TYPE_SIGNED(type)) {
2015		|	cset Rw(def_reg), vs
2016	} else {
2017		|	cset Rw(def_reg), cs
2018	}
2019	if (IR_REG_SPILLED(ctx->regs[def][0])) {
2020		ir_emit_store(ctx, insn->type, def, def_reg);
2021	}
2022}
2023
2024static void ir_emit_overflow_and_branch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block)
2025{
2026	ir_backend_data *data = ctx->data;
2027	dasm_State **Dst = &data->dasm_state;
2028	ir_insn *overflow_insn = &ctx->ir_base[insn->op2];
2029	ir_insn *math_insn = &ctx->ir_base[overflow_insn->op1];
2030	ir_type type = math_insn->type;
2031	uint32_t true_block, false_block;
2032	bool reverse = 0;
2033
2034	ir_get_true_false_blocks(ctx, b, &true_block, &false_block);
2035	if (true_block == next_block) {
2036		reverse = 1;
2037		true_block = false_block;
2038		false_block = 0;
2039	} else if (false_block == next_block) {
2040		false_block = 0;
2041	}
2042
2043	if (math_insn->op == IR_MUL_OV) {
2044		if (reverse) {
2045			|	beq =>true_block
2046		} else {
2047			|	bne =>true_block
2048		}
2049	} else if (IR_IS_TYPE_SIGNED(type)) {
2050		if (reverse) {
2051			|	bvc =>true_block
2052		} else {
2053			|	bvs =>true_block
2054		}
2055	} else {
2056		if (reverse) {
2057			|	bcc =>true_block
2058		} else {
2059			|	bcs =>true_block
2060		}
2061	}
2062	if (false_block) {
2063		|	b =>false_block
2064	}
2065}
2066
2067static void ir_emit_reg_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
2068{
2069	ir_backend_data *data = ctx->data;
2070	dasm_State **Dst = &data->dasm_state;
2071	ir_insn *op_insn = &ctx->ir_base[insn->op2];
2072	ir_type type = op_insn->type;
2073	ir_ref op2 = op_insn->op2;
2074	ir_reg op2_reg = ctx->regs[insn->op2][2];
2075	ir_reg reg;
2076
2077	IR_ASSERT(insn->op == IR_RSTORE);
2078	reg = insn->op3;
2079
2080	if (op2_reg == IR_REG_NONE) {
2081		ir_val *val = &ctx->ir_base[op2].val;
2082
2083		IR_ASSERT(IR_IS_CONST_REF(op2));
2084		IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[op2].op));
2085		switch (op_insn->op) {
2086			default:
2087				IR_ASSERT(0 && "NIY binary op");
2088			case IR_ADD:
2089				|	ASM_REG_REG_IMM_OP add, type, reg, reg, val->i32
2090				break;
2091			case IR_SUB:
2092				|	ASM_REG_REG_IMM_OP sub, type, reg, reg, val->i32
2093				break;
2094			case IR_OR:
2095				|	ASM_REG_REG_IMM_OP orr, type, reg, reg, val->i32
2096				break;
2097			case IR_AND:
2098				|	ASM_REG_REG_IMM_OP and, type, reg, reg, val->i32
2099				break;
2100			case IR_XOR:
2101				|	ASM_REG_REG_IMM_OP eor, type, reg, reg, val->i32
2102				break;
2103		}
2104	} else {
2105		if (IR_REG_SPILLED(op2_reg)) {
2106			op2_reg = IR_REG_NUM(op2_reg);
2107			ir_emit_load(ctx, type, op2_reg, op2);
2108		}
2109		switch (op_insn->op) {
2110			default:
2111				IR_ASSERT(0 && "NIY binary op");
2112			case IR_ADD:
2113				|	ASM_REG_REG_REG_OP add, type, reg, reg, op2_reg
2114				break;
2115			case IR_SUB:
2116				|	ASM_REG_REG_REG_OP sub, type, reg, reg, op2_reg
2117				break;
2118			case IR_MUL:
2119				|	ASM_REG_REG_REG_OP mul, type, reg, reg, op2_reg
2120				break;
2121			case IR_OR:
2122				|	ASM_REG_REG_REG_OP orr, type, reg, reg, op2_reg
2123				break;
2124			case IR_AND:
2125				|	ASM_REG_REG_REG_OP and, type, reg, reg, op2_reg
2126				break;
2127			case IR_XOR:
2128				|	ASM_REG_REG_REG_OP eor, type, reg, reg, op2_reg
2129				break;
2130		}
2131	}
2132}
2133
2134static void ir_emit_mul_div_mod_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn)
2135{
2136	ir_backend_data *data = ctx->data;
2137	dasm_State **Dst = &data->dasm_state;
2138	ir_type type = insn->type;
2139	ir_ref op1 = insn->op1;
2140	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
2141	ir_reg op1_reg = ctx->regs[def][1];
2142
2143	IR_ASSERT(IR_IS_CONST_REF(insn->op2));
2144	IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[insn->op2].op));
2145	IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE);
2146
2147	if (IR_REG_SPILLED(op1_reg)) {
2148		op1_reg = IR_REG_NUM(op1_reg);
2149		ir_emit_load(ctx, type, op1_reg, op1);
2150	}
2151	if (insn->op == IR_MUL) {
2152		uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64);
2153		if (shift == 1) {
2154			|	ASM_REG_REG_REG_OP add, type, def_reg, op1_reg, op1_reg
2155		} else {
2156			|	ASM_REG_REG_IMM_OP lsl, type, def_reg, op1_reg, shift
2157		}
2158	} else if (insn->op == IR_DIV) {
2159		uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64);
2160		|	ASM_REG_REG_IMM_OP lsr, type, def_reg, op1_reg, shift
2161	} else {
2162		IR_ASSERT(insn->op == IR_MOD);
2163		uint64_t mask = ctx->ir_base[insn->op2].val.u64 - 1;
2164		|	ASM_REG_REG_IMM_OP and, type, def_reg, op1_reg, mask
2165	}
2166	if (IR_REG_SPILLED(ctx->regs[def][0])) {
2167		ir_emit_store(ctx, type, def, def_reg);
2168	}
2169}
2170
2171static void ir_emit_sdiv_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn)
2172{
2173	ir_backend_data *data = ctx->data;
2174	dasm_State **Dst = &data->dasm_state;
2175	ir_type type = insn->type;
2176	ir_ref op1 = insn->op1;
2177	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
2178	ir_reg op1_reg = ctx->regs[def][1];
2179	ir_reg op2_reg = ctx->regs[def][2];
2180	uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64);
2181	int64_t offset = ctx->ir_base[insn->op2].val.u64 - 1;
2182
2183	IR_ASSERT(shift != 0);
2184	IR_ASSERT(IR_IS_CONST_REF(insn->op2));
2185	IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[insn->op2].op));
2186	IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE && def_reg != op1_reg);
2187
2188	if (IR_REG_SPILLED(op1_reg)) {
2189		op1_reg = IR_REG_NUM(op1_reg);
2190		ir_emit_load(ctx, type, op1_reg, op1);
2191	}
2192
2193	if (op2_reg != IR_REG_NONE) {
2194		op2_reg = IR_REG_NUM(op2_reg);
2195		ir_emit_load_imm_int(ctx, type, op2_reg, offset);
2196	}
2197
2198	if (ir_type_size[type] == 8) {
2199		|	cmp Rx(op1_reg), #0
2200		if (op2_reg != IR_REG_NONE) {
2201			|	add Rx(def_reg), Rx(op1_reg), Rx(op2_reg)
2202		} else {
2203			|	add Rx(def_reg), Rx(op1_reg), #offset
2204		}
2205		|	csel Rx(def_reg), Rx(def_reg), Rx(op1_reg), lt
2206		|	asr Rx(def_reg), Rx(def_reg), #shift
2207	} else {
2208		|	cmp Rw(op1_reg), #0
2209		if (op2_reg != IR_REG_NONE) {
2210			|	add Rw(def_reg), Rw(op1_reg), Rw(op2_reg)
2211		} else {
2212			|	add Rw(def_reg), Rw(op1_reg), #offset
2213		}
2214		|	csel Rw(def_reg), Rw(def_reg), Rw(op1_reg), lt
2215		if (ir_type_size[type] == 4) {
2216			|	asr Rw(def_reg), Rw(def_reg), #shift
2217		} else if (ir_type_size[type] == 2) {
2218			|	ubfx Rw(def_reg), Rw(def_reg), #shift, #16
2219		} else {
2220			IR_ASSERT(ir_type_size[type] == 1);
2221			|	ubfx Rw(def_reg), Rw(def_reg), #shift, #8
2222		}
2223	}
2224	if (IR_REG_SPILLED(ctx->regs[def][0])) {
2225		ir_emit_store(ctx, type, def, def_reg);
2226	}
2227}
2228
2229static void ir_emit_smod_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn)
2230{
2231	ir_backend_data *data = ctx->data;
2232	dasm_State **Dst = &data->dasm_state;
2233	ir_type type = insn->type;
2234	ir_ref op1 = insn->op1;
2235	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
2236	ir_reg op1_reg = ctx->regs[def][1];
2237	ir_reg tmp_reg = ctx->regs[def][3];
2238//	uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64);
2239	uint64_t mask = ctx->ir_base[insn->op2].val.u64 - 1;
2240
2241	IR_ASSERT(mask != 0);
2242	IR_ASSERT(IR_IS_CONST_REF(insn->op2));
2243	IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[insn->op2].op));
2244	IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE && def_reg != tmp_reg);
2245
2246	if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) {
2247		op1_reg = IR_REG_NUM(op1_reg);
2248		ir_emit_load(ctx, type, op1_reg, op1);
2249	}
2250	if (def_reg != op1_reg) {
2251		if (op1_reg != IR_REG_NONE) {
2252			ir_emit_mov(ctx, type, def_reg, op1_reg);
2253		} else {
2254			ir_emit_load(ctx, type, def_reg, op1);
2255		}
2256	}
2257
2258//	|	ASM_REG_REG_IMM_OP asr, type, tmp_reg, def_reg, (ir_type_size[type]*8-1)
2259//	|	ASM_REG_REG_IMM_OP lsr, type, tmp_reg, tmp_reg, (ir_type_size[type]*8-shift)
2260//	|	ASM_REG_REG_REG_OP add, type, def_reg, def_reg, tmp_reg
2261//	|	ASM_REG_REG_IMM_OP and, type, def_reg, def_reg, mask
2262//	|	ASM_REG_REG_REG_OP sub, type, def_reg, def_reg, tmp_reg
2263
2264	|	ASM_REG_REG_OP negs, type, tmp_reg, def_reg
2265	|	ASM_REG_REG_IMM_OP and, type, def_reg, def_reg, mask
2266	|	ASM_REG_REG_IMM_OP and, type, tmp_reg, tmp_reg, mask
2267	|	ASM_REG_REG_REG_TXT_OP csneg, type, def_reg, def_reg, tmp_reg, mi
2268
2269	if (IR_REG_SPILLED(ctx->regs[def][0])) {
2270		ir_emit_store(ctx, type, def, def_reg);
2271	}
2272}
2273
2274static void ir_emit_shift(ir_ctx *ctx, ir_ref def, ir_insn *insn)
2275{
2276	ir_backend_data *data = ctx->data;
2277	dasm_State **Dst = &data->dasm_state;
2278	ir_type type = insn->type;
2279	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
2280	ir_reg op1_reg = ctx->regs[def][1];
2281	ir_reg op2_reg = ctx->regs[def][2];
2282	ir_reg tmp_reg;
2283
2284	IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE);
2285	if (IR_REG_SPILLED(op1_reg)) {
2286		op1_reg = IR_REG_NUM(op1_reg);
2287		ir_emit_load(ctx, type, op1_reg, insn->op1);
2288	}
2289	if (IR_REG_SPILLED(op2_reg)) {
2290		op2_reg = IR_REG_NUM(op2_reg);
2291		ir_emit_load(ctx, type, op2_reg, insn->op2);
2292	}
2293	switch (insn->op) {
2294		default:
2295			IR_ASSERT(0);
2296		case IR_SHL:
2297			if (ir_type_size[type] == 1) {
2298				|	and Rw(def_reg), Rw(op1_reg), #0xff
2299				|	lsl Rw(def_reg), Rw(def_reg), Rw(op2_reg)
2300			} else if (ir_type_size[type] == 2) {
2301				|	and Rw(def_reg), Rw(op1_reg), #0xffff
2302				|	lsl Rw(def_reg), Rw(def_reg), Rw(op2_reg)
2303			} else {
2304				|	ASM_REG_REG_REG_OP lsl, type, def_reg, op1_reg, op2_reg
2305			}
2306			break;
2307		case IR_SHR:
2308			if (ir_type_size[type] == 1) {
2309				|	and Rw(def_reg), Rw(op1_reg), #0xff
2310				|	lsr Rw(def_reg), Rw(def_reg), Rw(op2_reg)
2311			} else if (ir_type_size[type] == 2) {
2312				|	and Rw(def_reg), Rw(op1_reg), #0xffff
2313				|	lsr Rw(def_reg), Rw(def_reg), Rw(op2_reg)
2314			} else {
2315				|	ASM_REG_REG_REG_OP lsr, type, def_reg, op1_reg, op2_reg
2316			}
2317			break;
2318		case IR_SAR:
2319			if (ir_type_size[type] == 1) {
2320				|	sxtb Rw(def_reg), Rw(op1_reg)
2321				|	asr Rw(def_reg), Rw(def_reg), Rw(op2_reg)
2322			} else if (ir_type_size[type] == 2) {
2323				|	sxth Rw(def_reg), Rw(op1_reg)
2324				|	asr Rw(def_reg), Rw(def_reg), Rw(op2_reg)
2325			} else {
2326				|	ASM_REG_REG_REG_OP asr, type, def_reg, op1_reg, op2_reg
2327			}
2328			break;
2329		case IR_ROL:
2330			tmp_reg = ctx->regs[def][3];
2331			IR_ASSERT(tmp_reg != IR_REG_NONE);
2332			if (ir_type_size[type] == 1) {
2333				|	and Rw(def_reg), Rw(op1_reg), #0xff
2334				|	add Rw(def_reg), Rw(def_reg), Rw(def_reg), lsl #8
2335				|	add Rw(def_reg), Rw(def_reg), Rw(def_reg), lsl #16
2336				|	neg Rw(tmp_reg), Rw(op2_reg)
2337				|	ror Rw(def_reg), Rw(def_reg), Rw(tmp_reg)
2338				|	and Rw(def_reg), Rw(def_reg), #0xff
2339			} else if (ir_type_size[type] == 2) {
2340				|	and Rw(def_reg), Rw(op1_reg), #0xffff
2341				|	add Rw(def_reg), Rw(def_reg), Rw(def_reg), lsl #16
2342				|	neg Rw(tmp_reg), Rw(op2_reg)
2343				|	ror Rw(def_reg), Rw(def_reg), Rw(tmp_reg)
2344				|	and Rw(def_reg), Rw(def_reg), #0xffff
2345			} else if (ir_type_size[type] == 8) {
2346				|	neg Rx(tmp_reg), Rx(op2_reg)
2347				|	ror Rx(def_reg), Rx(op1_reg), Rx(tmp_reg)
2348			} else {
2349				|	neg Rw(tmp_reg), Rw(op2_reg)
2350				|	ror Rw(def_reg), Rw(op1_reg), Rw(tmp_reg)
2351			}
2352			break;
2353		case IR_ROR:
2354			if (ir_type_size[type] == 1) {
2355				tmp_reg = ctx->regs[def][3];
2356				IR_ASSERT(tmp_reg != IR_REG_NONE);
2357				|	and Rw(tmp_reg), Rw(op1_reg), #0xff
2358				|	add Rw(tmp_reg), Rw(tmp_reg), Rw(tmp_reg), lsl #8
2359				|	add Rw(tmp_reg), Rw(tmp_reg), Rw(tmp_reg), lsl #16
2360				|	ror Rw(def_reg), Rw(tmp_reg), Rw(op2_reg)
2361				|	and Rw(def_reg), Rw(def_reg), #0xff
2362			} else if (ir_type_size[type] == 2) {
2363				tmp_reg = ctx->regs[def][3];
2364				IR_ASSERT(tmp_reg != IR_REG_NONE);
2365				|	and Rw(tmp_reg), Rw(op1_reg), #0xffff
2366				|	add Rw(tmp_reg), Rw(tmp_reg), Rw(tmp_reg), lsl #16
2367				|	ror Rw(def_reg), Rw(tmp_reg), Rw(op2_reg)
2368				|	and Rw(def_reg), Rw(def_reg), #0xffff
2369			} else {
2370				|	ASM_REG_REG_REG_OP ror, type, def_reg, op1_reg, op2_reg
2371			}
2372			break;
2373	}
2374	if (IR_REG_SPILLED(ctx->regs[def][0])) {
2375		ir_emit_store(ctx, type, def, def_reg);
2376	}
2377}
2378
2379static void ir_emit_shift_const(ir_ctx *ctx, ir_ref def, ir_insn *insn)
2380{
2381	ir_backend_data *data = ctx->data;
2382	dasm_State **Dst = &data->dasm_state;
2383	uint32_t shift = ctx->ir_base[insn->op2].val.u64;
2384	ir_type type = insn->type;
2385	ir_ref op1 = insn->op1;
2386	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
2387	ir_reg op1_reg = ctx->regs[def][1];
2388	ir_reg tmp_reg;
2389
2390	IR_ASSERT(IR_IS_CONST_REF(insn->op2));
2391	IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[insn->op2].op));
2392	IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE);
2393
2394	if (IR_REG_SPILLED(op1_reg)) {
2395		op1_reg = IR_REG_NUM(op1_reg);
2396		ir_emit_load(ctx, type, op1_reg, op1);
2397	}
2398	switch (insn->op) {
2399		default:
2400			IR_ASSERT(0);
2401		case IR_SHL:
2402			if (ir_type_size[type] == 1) {
2403				|	ubfiz Rw(def_reg), Rw(op1_reg), #shift, #(8-shift)
2404			} else if (ir_type_size[type] == 2) {
2405				|	ubfiz Rw(def_reg), Rw(op1_reg), #shift, #(16-shift)
2406			} else {
2407				|	ASM_REG_REG_IMM_OP lsl, type, def_reg, op1_reg, shift
2408			}
2409			break;
2410		case IR_SHR:
2411			if (ir_type_size[type] == 1) {
2412				|	ubfx Rw(def_reg), Rw(op1_reg), #shift, #(8-shift)
2413			} else if (ir_type_size[type] == 2) {
2414				|	ubfx Rw(def_reg), Rw(op1_reg), #shift, #(16-shift)
2415			} else {
2416				|	ASM_REG_REG_IMM_OP lsr, type, def_reg, op1_reg, shift
2417			}
2418			break;
2419		case IR_SAR:
2420			if (ir_type_size[type] == 1) {
2421				|	sbfx Rw(def_reg), Rw(op1_reg), #shift, #(8-shift)
2422			} else if (ir_type_size[type] == 2) {
2423				|	sbfx Rw(def_reg), Rw(op1_reg), #shift, #(16-shift)
2424			} else {
2425				|	ASM_REG_REG_IMM_OP asr, type, def_reg, op1_reg, shift
2426			}
2427			break;
2428		case IR_ROL:
2429			if (ir_type_size[type] == 1) {
2430				tmp_reg = ctx->regs[def][3];
2431				|	ubfx Rw(tmp_reg), Rw(op1_reg), #(8-shift), #shift
2432				|	orr Rw(def_reg), Rw(tmp_reg), Rw(op1_reg), lsl #shift
2433			} else if (ir_type_size[type] == 2) {
2434				tmp_reg = ctx->regs[def][3];
2435				|	ubfx Rw(tmp_reg), Rw(op1_reg), #(16-shift), #shift
2436				|	orr Rw(def_reg), Rw(tmp_reg), Rw(op1_reg), lsl #shift
2437			} else if (ir_type_size[type] == 8) {
2438				shift = (64 - shift) % 64;
2439				|	ror Rx(def_reg), Rx(op1_reg), #shift
2440			} else {
2441				shift = (32 - shift) % 32;
2442				|	ror Rw(def_reg), Rw(op1_reg), #shift
2443			}
2444			break;
2445		case IR_ROR:
2446			if (ir_type_size[type] == 1) {
2447				tmp_reg = ctx->regs[def][3];
2448				|	ubfx Rw(tmp_reg), Rw(op1_reg), #shift, #(8-shift)
2449				|	orr Rw(def_reg), Rw(tmp_reg), Rw(op1_reg), lsl #(8-shift)
2450			} else if (ir_type_size[type] == 2) {
2451				tmp_reg = ctx->regs[def][3];
2452				|	ubfx Rw(tmp_reg), Rw(op1_reg), #shift, #(16-shift)
2453				|	orr Rw(def_reg), Rw(tmp_reg), Rw(op1_reg), lsl #(16-shift)
2454			} else {
2455				|	ASM_REG_REG_IMM_OP ror, type, def_reg, op1_reg, shift
2456			}
2457			break;
2458	}
2459	if (IR_REG_SPILLED(ctx->regs[def][0])) {
2460		ir_emit_store(ctx, type, def, def_reg);
2461	}
2462}
2463
2464static void ir_emit_op_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
2465{
2466	ir_backend_data *data = ctx->data;
2467	dasm_State **Dst = &data->dasm_state;
2468	ir_type type = insn->type;
2469	ir_ref op1 = insn->op1;
2470	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
2471	ir_reg op1_reg = ctx->regs[def][1];
2472
2473	IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE);
2474
2475	if (IR_REG_SPILLED(op1_reg)) {
2476		op1_reg = IR_REG_NUM(op1_reg);
2477		ir_emit_load(ctx, type, op1_reg, op1);
2478	}
2479	if (insn->op == IR_NOT) {
2480		if (insn->type == IR_BOOL) {
2481			|	ASM_REG_IMM_OP cmp, type, op1_reg, 0
2482			|	cset Rw(def_reg), eq
2483		} else {
2484			|	ASM_REG_REG_OP mvn, insn->type, def_reg, op1_reg
2485		}
2486	} else if (insn->op == IR_NEG) {
2487		|	ASM_REG_REG_OP neg, insn->type, def_reg, op1_reg
2488	} else if (insn->op == IR_ABS) {
2489		if (ir_type_size[type] == 8) {
2490			|	cmp Rx(op1_reg), #0
2491			|	cneg Rx(def_reg), Rx(op1_reg), lt
2492		} else {
2493			|	cmp Rw(op1_reg), #0
2494			|	cneg Rw(def_reg), Rw(op1_reg), lt
2495		}
2496	} else if (insn->op == IR_CTLZ) {
2497		if (ir_type_size[type] == 1) {
2498			|	and	Rw(def_reg), Rw(op1_reg), #0xff
2499			|	clz Rw(def_reg), Rw(def_reg)
2500			|	sub Rw(def_reg), Rw(def_reg), #24
2501		} else if (ir_type_size[type] == 2) {
2502			|	and	Rw(def_reg), Rw(op1_reg), #0xffff
2503			|	clz Rw(def_reg), Rw(def_reg)
2504			|	sub Rw(def_reg), Rw(def_reg), #16
2505		} else {
2506			|	ASM_REG_REG_OP clz, type, def_reg, op1_reg
2507		}
2508	} else if (insn->op == IR_CTTZ) {
2509		|	ASM_REG_REG_OP rbit, insn->type, def_reg, op1_reg
2510		|	ASM_REG_REG_OP clz, insn->type, def_reg, def_reg
2511	} else {
2512		IR_ASSERT(insn->op == IR_BSWAP);
2513		|	ASM_REG_REG_OP rev, insn->type, def_reg, op1_reg
2514	}
2515	if (IR_REG_SPILLED(ctx->regs[def][0])) {
2516		ir_emit_store(ctx, type, def, def_reg);
2517	}
2518}
2519
2520static void ir_emit_ctpop(ir_ctx *ctx, ir_ref def, ir_insn *insn)
2521{
2522	ir_backend_data *data = ctx->data;
2523	dasm_State **Dst = &data->dasm_state;
2524	ir_type type = insn->type;
2525	ir_ref op1 = insn->op1;
2526	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
2527	ir_reg op1_reg = ctx->regs[def][1];
2528	ir_reg tmp_reg = ctx->regs[def][2];
2529	uint32_t code1 = 0x0e205800 | (tmp_reg-IR_REG_FP_FIRST); // cnt v0.8b, v0.8b
2530	uint32_t code2 = 0x0e31b800 | (tmp_reg-IR_REG_FP_FIRST); // addv b0, v0.8b
2531
2532	IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE);
2533
2534	if (IR_REG_SPILLED(op1_reg)) {
2535		op1_reg = IR_REG_NUM(op1_reg);
2536		ir_emit_load(ctx, type, op1_reg, op1);
2537	}
2538	switch (ir_type_size[insn->type]) {
2539		default:
2540			IR_ASSERT(0);
2541		case 1:
2542			|	and	Rw(def_reg), Rw(op1_reg), #0xff
2543			|	fmov Rs(tmp_reg-IR_REG_FP_FIRST), Rw(def_reg)
2544			|	.long code1 // cnt v0.8b, v0.8b
2545			|	.long code2 // addv b0, v0.8b
2546			|	fmov Rw(def_reg), Rs(tmp_reg-IR_REG_FP_FIRST)
2547			break;
2548		case 2:
2549			|	and	Rw(def_reg), Rw(op1_reg), #0xffff
2550			|	fmov Rs(tmp_reg-IR_REG_FP_FIRST), Rw(def_reg)
2551			|	.long code1 // cnt v0.8b, v0.8b
2552			|	.long code2 // addv b0, v0.8b
2553			|	fmov Rw(def_reg), Rs(tmp_reg-IR_REG_FP_FIRST)
2554			break;
2555		case 4:
2556			|	fmov Rs(tmp_reg-IR_REG_FP_FIRST), Rw(op1_reg)
2557			|	.long code1 // cnt v0.8b, v0.8b
2558			|	.long code2 // addv b0, v0.8b
2559			|	fmov Rw(def_reg), Rs(tmp_reg-IR_REG_FP_FIRST)
2560			break;
2561		case 8:
2562			|	fmov Rd(tmp_reg-IR_REG_FP_FIRST), Rx(op1_reg)
2563			|	.long code1 // cnt v0.8b, v0.8b
2564			|	.long code2 // addv b0, v0.8b
2565			|	fmov Rx(def_reg), Rd(tmp_reg-IR_REG_FP_FIRST)
2566			break;
2567	}
2568	if (IR_REG_SPILLED(ctx->regs[def][0])) {
2569		ir_emit_store(ctx, type, def, def_reg);
2570	}
2571}
2572
2573static void ir_emit_op_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn)
2574{
2575	ir_backend_data *data = ctx->data;
2576	dasm_State **Dst = &data->dasm_state;
2577	ir_type type = insn->type;
2578	ir_ref op1 = insn->op1;
2579	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
2580	ir_reg op1_reg = ctx->regs[def][1];
2581
2582	IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE);
2583
2584	if (IR_REG_SPILLED(op1_reg)) {
2585		op1_reg = IR_REG_NUM(op1_reg);
2586		ir_emit_load(ctx, type, op1_reg, op1);
2587	}
2588	if (insn->op == IR_NEG) {
2589		if (type == IR_DOUBLE) {
2590			|	fneg Rd(def_reg-IR_REG_FP_FIRST), Rd(op1_reg-IR_REG_FP_FIRST)
2591		} else {
2592			IR_ASSERT(type == IR_FLOAT);
2593			|	fneg Rs(def_reg-IR_REG_FP_FIRST), Rs(op1_reg-IR_REG_FP_FIRST)
2594		}
2595	} else {
2596		IR_ASSERT(insn->op == IR_ABS);
2597		if (type == IR_DOUBLE) {
2598			|	fabs Rd(def_reg-IR_REG_FP_FIRST), Rd(op1_reg-IR_REG_FP_FIRST)
2599		} else {
2600			IR_ASSERT(type == IR_FLOAT);
2601			|	fabs Rs(def_reg-IR_REG_FP_FIRST), Rs(op1_reg-IR_REG_FP_FIRST)
2602		}
2603	}
2604	if (IR_REG_SPILLED(ctx->regs[def][0])) {
2605		ir_emit_store(ctx, insn->type, def, def_reg);
2606	}
2607}
2608
2609static void ir_emit_binop_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn)
2610{
2611	ir_backend_data *data = ctx->data;
2612	dasm_State **Dst = &data->dasm_state;
2613	ir_type type = insn->type;
2614	ir_ref op1 = insn->op1;
2615	ir_ref op2 = insn->op2;
2616	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
2617	ir_reg op1_reg = ctx->regs[def][1];
2618	ir_reg op2_reg = ctx->regs[def][2];
2619
2620	IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE);
2621	if (IR_REG_SPILLED(op1_reg)) {
2622		op1_reg = IR_REG_NUM(op1_reg);
2623		ir_emit_load(ctx, type, op1_reg, op1);
2624	}
2625	if (IR_REG_SPILLED(op2_reg)) {
2626		op2_reg = IR_REG_NUM(op2_reg);
2627		if (op1 != op2) {
2628			ir_emit_load(ctx, type, op2_reg, op2);
2629		}
2630	}
2631	switch (insn->op) {
2632		default:
2633			IR_ASSERT(0 && "NIY binary op");
2634		case IR_ADD:
2635			|	ASM_FP_REG_REG_REG_OP fadd, type, def_reg, op1_reg, op2_reg
2636			break;
2637		case IR_SUB:
2638			|	ASM_FP_REG_REG_REG_OP fsub, type, def_reg, op1_reg, op2_reg
2639			break;
2640		case IR_MUL:
2641			|	ASM_FP_REG_REG_REG_OP fmul, type, def_reg, op1_reg, op2_reg
2642			break;
2643		case IR_DIV:
2644			|	ASM_FP_REG_REG_REG_OP fdiv, type, def_reg, op1_reg, op2_reg
2645			break;
2646		case IR_MIN:
2647			|	ASM_FP_REG_REG_REG_OP fmin, type, def_reg, op1_reg, op2_reg
2648			break;
2649		case IR_MAX:
2650			|	ASM_FP_REG_REG_REG_OP fmax, type, def_reg, op1_reg, op2_reg
2651			break;
2652	}
2653	if (IR_REG_SPILLED(ctx->regs[def][0])) {
2654		ir_emit_store(ctx, insn->type, def, def_reg);
2655	}
2656}
2657
2658static void ir_emit_fix_type(ir_ctx *ctx, ir_type type, ir_reg op1_reg)
2659{
2660	ir_backend_data *data = ctx->data;
2661	dasm_State **Dst = &data->dasm_state;
2662
2663	// TODO: prevent repeatable sign/zero extension ???
2664	if (ir_type_size[type] == 2) {
2665		if (IR_IS_TYPE_SIGNED(type)) {
2666			|	sxth Rw(op1_reg), Rw(op1_reg)
2667		} else {
2668			|	uxth Rw(op1_reg), Rw(op1_reg)
2669		}
2670	} else if (ir_type_size[type] == 1) {
2671		if (IR_IS_TYPE_SIGNED(type)) {
2672			|	sxtb Rw(op1_reg), Rw(op1_reg)
2673		} else {
2674			|	uxtb Rw(op1_reg), Rw(op1_reg)
2675		}
2676	}
2677}
2678
2679static void ir_emit_cmp_int_common(ir_ctx *ctx, ir_type type, ir_reg op1_reg, ir_ref op1, ir_reg op2_reg, ir_ref op2)
2680{
2681	ir_backend_data *data = ctx->data;
2682	dasm_State **Dst = &data->dasm_state;
2683
2684	IR_ASSERT(op1_reg != IR_REG_NONE);
2685	if (ir_type_size[type] < 4) {
2686		ir_emit_fix_type(ctx, type, op1_reg);
2687	}
2688	if (op2_reg != IR_REG_NONE) {
2689		if (ir_type_size[type] == 8) {
2690			|	cmp Rx(op1_reg), Rx(op2_reg)
2691		} else if (ir_type_size[type] == 4) {
2692			|	cmp Rw(op1_reg), Rw(op2_reg)
2693		} else if (ir_type_size[type] == 2) {
2694			if (IR_IS_TYPE_SIGNED(type)) {
2695				|	cmp Rw(op1_reg), Rw(op2_reg), sxth
2696			} else {
2697				|	cmp Rw(op1_reg), Rw(op2_reg), uxth
2698			}
2699		} else if (ir_type_size[type] == 1) {
2700			if (IR_IS_TYPE_SIGNED(type)) {
2701				|	cmp Rw(op1_reg), Rw(op2_reg), sxtb
2702			} else {
2703				|	cmp Rw(op1_reg), Rw(op2_reg), uxtb
2704			}
2705		} else {
2706			IR_ASSERT(0);
2707		}
2708	} else {
2709		IR_ASSERT(IR_IS_CONST_REF(op2));
2710		IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[op2].op));
2711		int32_t val = ctx->ir_base[op2].val.i32;
2712
2713		if (ir_type_size[type] == 8) {
2714			|	cmp Rx(op1_reg), #val
2715		} else {
2716			|	cmp Rw(op1_reg), #val
2717		}
2718	}
2719}
2720
2721static void ir_emit_cmp_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
2722{
2723	ir_backend_data *data = ctx->data;
2724	dasm_State **Dst = &data->dasm_state;
2725	ir_type type = ctx->ir_base[insn->op1].type;
2726	ir_op op = insn->op;
2727	ir_ref op1 = insn->op1;
2728	ir_ref op2 = insn->op2;
2729	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
2730	ir_reg op1_reg = ctx->regs[def][1];
2731	ir_reg op2_reg = ctx->regs[def][2];
2732
2733	IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE);
2734	if (IR_REG_SPILLED(op1_reg)) {
2735		op1_reg = IR_REG_NUM(op1_reg);
2736		ir_emit_load(ctx, type, op1_reg, op1);
2737	}
2738	if (op2_reg != IR_REG_NONE) {
2739		if (IR_REG_SPILLED(op2_reg)) {
2740			op2_reg = IR_REG_NUM(op2_reg);
2741			if (op1 != op2) {
2742				ir_emit_load(ctx, type, op2_reg, op2);
2743			}
2744		}
2745	}
2746	if (IR_IS_CONST_REF(insn->op2)
2747	 && !IR_IS_SYM_CONST(ctx->ir_base[insn->op2].op)
2748	 && ctx->ir_base[insn->op2].val.u64 == 0) {
2749		if (op == IR_ULT) {
2750			/* always false */
2751			ir_emit_load_imm_int(ctx, IR_BOOL, def_reg, 0);
2752			if (IR_REG_SPILLED(ctx->regs[def][0])) {
2753				ir_emit_store(ctx, insn->type, def, def_reg);
2754			}
2755			return;
2756		} else if (op == IR_UGE) {
2757			/* always true */
2758			ir_emit_load_imm_int(ctx, IR_BOOL, def_reg, 1);
2759			if (IR_REG_SPILLED(ctx->regs[def][0])) {
2760				ir_emit_store(ctx, insn->type, def, def_reg);
2761			}
2762			return;
2763		} else if (op == IR_ULE) {
2764			op = IR_EQ;
2765		} else if (op == IR_UGT) {
2766			op = IR_NE;
2767		}
2768	}
2769	ir_emit_cmp_int_common(ctx, type, op1_reg, op1, op2_reg, op2);
2770	switch (op) {
2771		default:
2772			IR_ASSERT(0 && "NIY binary op");
2773		case IR_EQ:
2774			|	cset Rw(def_reg), eq
2775			break;
2776		case IR_NE:
2777			|	cset Rw(def_reg), ne
2778			break;
2779		case IR_LT:
2780			|	cset Rw(def_reg), lt
2781			break;
2782		case IR_GE:
2783			|	cset Rw(def_reg), ge
2784			break;
2785		case IR_LE:
2786			|	cset Rw(def_reg), le
2787			break;
2788		case IR_GT:
2789			|	cset Rw(def_reg), gt
2790			break;
2791		case IR_ULT:
2792			|	cset Rw(def_reg), lo
2793			break;
2794		case IR_UGE:
2795			|	cset Rw(def_reg), hs
2796			break;
2797		case IR_ULE:
2798			|	cset Rw(def_reg), ls
2799			break;
2800		case IR_UGT:
2801			|	cset Rw(def_reg), hi
2802			break;
2803	}
2804	if (IR_REG_SPILLED(ctx->regs[def][0])) {
2805		ir_emit_store(ctx, insn->type, def, def_reg);
2806	}
2807}
2808
2809static ir_op ir_emit_cmp_fp_common(ir_ctx *ctx, ir_ref cmp_ref, ir_insn *cmp_insn)
2810{
2811	ir_backend_data *data = ctx->data;
2812	dasm_State **Dst = &data->dasm_state;
2813	ir_type type = ctx->ir_base[cmp_insn->op1].type;
2814	ir_op op = cmp_insn->op;
2815	ir_ref op1, op2;
2816	ir_reg op1_reg, op2_reg;
2817
2818	if (op == IR_LT || op == IR_LE) {
2819		/* swap operands to avoid P flag check */
2820		op ^= 3;
2821		op1 = cmp_insn->op2;
2822		op2 = cmp_insn->op1;
2823		op1_reg = ctx->regs[cmp_ref][2];
2824		op2_reg = ctx->regs[cmp_ref][1];
2825	} else {
2826		op1 = cmp_insn->op1;
2827		op2 = cmp_insn->op2;
2828		op1_reg = ctx->regs[cmp_ref][1];
2829		op2_reg = ctx->regs[cmp_ref][2];
2830	}
2831
2832	IR_ASSERT(op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE);
2833	if (IR_REG_SPILLED(op1_reg)) {
2834		op1_reg = IR_REG_NUM(op1_reg);
2835		ir_emit_load(ctx, type, op1_reg, op1);
2836	}
2837	if (IR_REG_SPILLED(op2_reg)) {
2838		op2_reg = IR_REG_NUM(op2_reg);
2839		if (op1 != op2) {
2840			ir_emit_load(ctx, type, op2_reg, op2);
2841		}
2842	}
2843	if (type == IR_DOUBLE) {
2844		|	fcmp Rd(op1_reg-IR_REG_FP_FIRST), Rd(op2_reg-IR_REG_FP_FIRST)
2845	} else {
2846		IR_ASSERT(type == IR_FLOAT);
2847		|	fcmp Rs(op1_reg-IR_REG_FP_FIRST), Rs(op2_reg-IR_REG_FP_FIRST)
2848	}
2849	return op;
2850}
2851
2852static void ir_emit_cmp_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn)
2853{
2854	ir_backend_data *data = ctx->data;
2855	dasm_State **Dst = &data->dasm_state;
2856	ir_op op = ir_emit_cmp_fp_common(ctx, def, insn);
2857	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
2858//???	ir_reg tmp_reg = ctx->regs[def][3]; // TODO: take into account vs flag
2859
2860	IR_ASSERT(def_reg != IR_REG_NONE);
2861	switch (op) {
2862		default:
2863			IR_ASSERT(0 && "NIY binary op");
2864		case IR_EQ:
2865			|	cset Rw(def_reg), eq
2866			break;
2867		case IR_NE:
2868			|	cset Rw(def_reg), ne
2869			break;
2870		case IR_LT:
2871			|	cset Rw(def_reg), mi
2872			break;
2873		case IR_GE:
2874			|	cset Rw(def_reg), ge
2875			break;
2876		case IR_LE:
2877			|	cset Rw(def_reg), ls
2878			break;
2879		case IR_GT:
2880			|	cset Rw(def_reg), gt
2881			break;
2882		case IR_ULT:
2883			|	cset Rw(def_reg), lt
2884			break;
2885		case IR_UGE:
2886			|	cset Rw(def_reg), hs
2887			break;
2888		case IR_ULE:
2889			|	cset Rw(def_reg), le
2890			break;
2891		case IR_UGT:
2892			|	cset Rw(def_reg), hi
2893			break;
2894	}
2895	if (IR_REG_SPILLED(ctx->regs[def][0])) {
2896		ir_emit_store(ctx, insn->type, def, def_reg);
2897	}
2898}
2899
2900static void ir_emit_jmp_true(ir_ctx *ctx, uint32_t b, ir_ref def, uint32_t next_block)
2901{
2902	uint32_t true_block, false_block;
2903	ir_backend_data *data = ctx->data;
2904	dasm_State **Dst = &data->dasm_state;
2905
2906	ir_get_true_false_blocks(ctx, b, &true_block, &false_block);
2907	if (true_block != next_block) {
2908		|	b =>true_block
2909	}
2910}
2911
2912static void ir_emit_jmp_false(ir_ctx *ctx, uint32_t b, ir_ref def, uint32_t next_block)
2913{
2914	uint32_t true_block, false_block;
2915	ir_backend_data *data = ctx->data;
2916	dasm_State **Dst = &data->dasm_state;
2917
2918	ir_get_true_false_blocks(ctx, b, &true_block, &false_block);
2919	if (false_block != next_block) {
2920		|	b =>false_block
2921	}
2922}
2923
2924static void ir_emit_jz(ir_ctx *ctx, uint32_t b, uint32_t next_block, uint8_t op, ir_type type, ir_reg reg)
2925{
2926	uint32_t true_block, false_block;
2927	ir_backend_data *data = ctx->data;
2928	dasm_State **Dst = &data->dasm_state;
2929
2930	ir_get_true_false_blocks(ctx, b, &true_block, &false_block);
2931	if (true_block == next_block) {
2932		IR_ASSERT(op < IR_LT);
2933		op ^= 1; // reverse
2934		true_block = false_block;
2935		false_block = 0;
2936	} else if (false_block == next_block) {
2937		false_block = 0;
2938	}
2939
2940	if (op == IR_EQ) {
2941		if (ir_type_size[type] == 8) {
2942			|	cbz Rx(reg), =>true_block
2943		} else {
2944			|	cbz Rw(reg), =>true_block
2945		}
2946	} else {
2947		IR_ASSERT(op == IR_NE);
2948		if (ir_type_size[type] == 8) {
2949			|	cbnz Rx(reg), =>true_block
2950		} else {
2951			|	cbnz Rw(reg), =>true_block
2952		}
2953	}
2954	if (false_block) {
2955		|	b =>false_block
2956	}
2957}
2958
2959static void ir_emit_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block, uint8_t op, bool int_cmp)
2960{
2961	uint32_t true_block, false_block;
2962	ir_backend_data *data = ctx->data;
2963	dasm_State **Dst = &data->dasm_state;
2964
2965	ir_get_true_false_blocks(ctx, b, &true_block, &false_block);
2966	if (true_block == next_block) {
2967		/* swap to avoid unconditional JMP */
2968		if (int_cmp || op == IR_EQ || op == IR_NE) {
2969			op ^= 1; // reverse
2970		} else {
2971			op ^= 5; // reverse
2972		}
2973		true_block = false_block;
2974		false_block = 0;
2975	} else if (false_block == next_block) {
2976		false_block = 0;
2977	}
2978
2979	if (int_cmp) {
2980		switch (op) {
2981			default:
2982				IR_ASSERT(0 && "NIY binary op");
2983			case IR_EQ:
2984				|	beq =>true_block
2985				break;
2986			case IR_NE:
2987				|	bne =>true_block
2988				break;
2989			case IR_LT:
2990				|	blt =>true_block
2991				break;
2992			case IR_GE:
2993				|	bge =>true_block
2994				break;
2995			case IR_LE:
2996				|	ble =>true_block
2997				break;
2998			case IR_GT:
2999				|	bgt =>true_block
3000				break;
3001			case IR_ULT:
3002				|	blo =>true_block
3003				break;
3004			case IR_UGE:
3005				|	bhs =>true_block
3006				break;
3007			case IR_ULE:
3008				|	bls =>true_block
3009				break;
3010			case IR_UGT:
3011				|	bhi =>true_block
3012				break;
3013		}
3014	} else {
3015		switch (op) {
3016			default:
3017				IR_ASSERT(0 && "NIY binary op");
3018			case IR_EQ:
3019				|	beq =>true_block
3020				break;
3021			case IR_NE:
3022				|	bne =>true_block
3023				break;
3024			case IR_LT:
3025				|	bmi =>true_block
3026				break;
3027			case IR_GE:
3028				|	bge =>true_block
3029				break;
3030			case IR_LE:
3031				|	bls =>true_block
3032				break;
3033			case IR_GT:
3034				|	bgt =>true_block
3035				break;
3036			case IR_ULT:
3037				|	blt =>true_block
3038				break;
3039			case IR_UGE:
3040				|	bhs =>true_block
3041				break;
3042			case IR_ULE:
3043				|	ble =>true_block
3044				break;
3045			case IR_UGT:
3046				|	bhi =>true_block
3047				break;
3048//			case IR_ULT: fprintf(stderr, "\tjb .LL%d\n", true_block); break;
3049//			case IR_UGE: fprintf(stderr, "\tjae .LL%d\n", true_block); break;
3050//			case IR_ULE: fprintf(stderr, "\tjbe .LL%d\n", true_block); break;
3051//			case IR_UGT: fprintf(stderr, "\tja .LL%d\n", true_block); break;
3052		}
3053	}
3054	if (false_block) {
3055		|	b =>false_block
3056	}
3057}
3058
3059static void ir_emit_cmp_and_branch_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block)
3060{
3061	ir_insn *cmp_insn = &ctx->ir_base[insn->op2];
3062	ir_op op = cmp_insn->op;
3063	ir_type type = ctx->ir_base[cmp_insn->op1].type;
3064	ir_ref op1 = cmp_insn->op1;
3065	ir_ref op2 = cmp_insn->op2;
3066	ir_reg op1_reg = ctx->regs[insn->op2][1];
3067	ir_reg op2_reg = ctx->regs[insn->op2][2];
3068
3069	if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) {
3070		op1_reg = IR_REG_NUM(op1_reg);
3071		ir_emit_load(ctx, type, op1_reg, op1);
3072	}
3073	if (op2_reg != IR_REG_NONE) {
3074		if (IR_REG_SPILLED(op2_reg)) {
3075			op2_reg = IR_REG_NUM(op2_reg);
3076			if (op1 != op2) {
3077				ir_emit_load(ctx, type, op2_reg, op2);
3078			}
3079		}
3080	}
3081	if (IR_IS_CONST_REF(op2)
3082	 && !IR_IS_SYM_CONST(ctx->ir_base[op2].op)
3083	 && ctx->ir_base[op2].val.u64 == 0) {
3084		if (op == IR_ULT) {
3085			/* always false */
3086			ir_emit_jmp_false(ctx, b, def, next_block);
3087			return;
3088		} else if (op == IR_UGE) {
3089			/* always true */
3090			ir_emit_jmp_true(ctx, b, def, next_block);
3091			return;
3092		} else if (op == IR_ULE) {
3093			op = IR_EQ;
3094		} else if (op == IR_UGT) {
3095			op = IR_NE;
3096		}
3097		if (op1_reg != IR_REG_NONE && (op == IR_EQ || op == IR_NE)) {
3098			ir_emit_jz(ctx, b, next_block, op, type, op1_reg);
3099			return;
3100		}
3101	}
3102	ir_emit_cmp_int_common(ctx, type, op1_reg, op1, op2_reg, op2);
3103	ir_emit_jcc(ctx, b, def, insn, next_block, op, 1);
3104}
3105
3106static void ir_emit_cmp_and_branch_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block)
3107{
3108	ir_op op = ir_emit_cmp_fp_common(ctx, insn->op2, &ctx->ir_base[insn->op2]);
3109	ir_emit_jcc(ctx, b, def, insn, next_block, op, 0);
3110}
3111
3112static void ir_emit_if_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block)
3113{
3114	ir_type type = ctx->ir_base[insn->op2].type;
3115	ir_reg op2_reg = ctx->regs[def][2];
3116	ir_backend_data *data = ctx->data;
3117	dasm_State **Dst = &data->dasm_state;
3118
3119	if (IR_IS_CONST_REF(insn->op2)) {
3120		uint32_t true_block, false_block;
3121
3122		ir_get_true_false_blocks(ctx, b, &true_block, &false_block);
3123		if (ir_const_is_true(&ctx->ir_base[insn->op2])) {
3124			if (true_block != next_block) {
3125				|	b =>true_block
3126			}
3127		} else {
3128			if (false_block != next_block) {
3129				|	b =>false_block
3130			}
3131		}
3132		return;
3133	} else if (ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) {
3134		uint32_t true_block, false_block;
3135
3136		ir_get_true_false_blocks(ctx, b, &true_block, &false_block);
3137		if (true_block != next_block) {
3138			|	b =>true_block
3139		}
3140		return;
3141	}
3142	IR_ASSERT(op2_reg != IR_REG_NONE);
3143	if (IR_REG_SPILLED(op2_reg)) {
3144		op2_reg = IR_REG_NUM(op2_reg);
3145		ir_emit_load(ctx, type, op2_reg, insn->op2);
3146	}
3147	|	ASM_REG_IMM_OP cmp, type, op2_reg, 0
3148	ir_emit_jcc(ctx, b, def, insn, next_block, IR_NE, 1);
3149}
3150
3151static void ir_emit_cond(ir_ctx *ctx, ir_ref def, ir_insn *insn)
3152{
3153	ir_backend_data *data = ctx->data;
3154	dasm_State **Dst = &data->dasm_state;
3155	ir_type type = insn->type;
3156	ir_ref op1 = insn->op1;
3157	ir_ref op2 = insn->op2;
3158	ir_ref op3 = insn->op3;
3159	ir_type op1_type = ctx->ir_base[op1].type;
3160	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
3161	ir_reg op1_reg = ctx->regs[def][1];
3162	ir_reg op2_reg = ctx->regs[def][2];
3163	ir_reg op3_reg = ctx->regs[def][3];
3164
3165	IR_ASSERT(def_reg != IR_REG_NONE);
3166
3167	if (IR_REG_SPILLED(op2_reg)) {
3168		op2_reg = IR_REG_NUM(op2_reg);
3169		ir_emit_load(ctx, type, op2_reg, op2);
3170		if (op1 == op2) {
3171			op1_reg = op2_reg;
3172		}
3173		if (op3 == op2) {
3174			op3_reg = op2_reg;
3175		}
3176	}
3177	if (op3 != op2 && IR_REG_SPILLED(op3_reg)) {
3178		op3_reg = IR_REG_NUM(op3_reg);
3179		ir_emit_load(ctx, type, op3_reg, op3);
3180		if (op1 == op2) {
3181			op1_reg = op3_reg;
3182		}
3183	}
3184	if (op1 != op2 && op1 != op3 && IR_REG_SPILLED(op1_reg)) {
3185		op1_reg = IR_REG_NUM(op1_reg);
3186		ir_emit_load(ctx, op1_type, op1_reg, op1);
3187	}
3188
3189	if (IR_IS_TYPE_INT(op1_type)) {
3190		|	ASM_REG_IMM_OP cmp, op1_type, op1_reg, 0
3191	} else{
3192		|	ASM_FP_REG_IMM_OP fcmp, op1_type, op1_reg, 0.0
3193	}
3194
3195	if (IR_IS_TYPE_INT(type)) {
3196		if (ir_type_size[type] == 8) {
3197			|	csel Rx(def_reg), Rx(op2_reg), Rx(op3_reg), ne
3198		} else {
3199			|	csel Rw(def_reg), Rw(op2_reg), Rw(op3_reg), ne
3200		}
3201	} else{
3202		if (type == IR_DOUBLE) {
3203			|	fcsel Rd(def_reg-IR_REG_FP_FIRST), Rd(op2_reg-IR_REG_FP_FIRST), Rd(op3_reg-IR_REG_FP_FIRST), ne
3204		} else {
3205			|	fcsel Rs(def_reg-IR_REG_FP_FIRST), Rs(op2_reg-IR_REG_FP_FIRST), Rs(op3_reg-IR_REG_FP_FIRST), ne
3206		}
3207	}
3208
3209	if (IR_REG_SPILLED(ctx->regs[def][0])) {
3210		ir_emit_store(ctx, type, def, def_reg);
3211	}
3212}
3213
3214static void ir_emit_return_void(ir_ctx *ctx)
3215{
3216	ir_backend_data *data = ctx->data;
3217	dasm_State **Dst = &data->dasm_state;
3218
3219	ir_emit_epilogue(ctx);
3220	|	ret
3221}
3222
3223static void ir_emit_return_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn)
3224{
3225	ir_reg op2_reg = ctx->regs[ref][2];
3226
3227	if (op2_reg != IR_REG_INT_RET1) {
3228		ir_type type = ctx->ir_base[insn->op2].type;
3229
3230		if (op2_reg != IR_REG_NONE && !IR_REG_SPILLED(op2_reg)) {
3231			ir_emit_mov(ctx, type, IR_REG_INT_RET1, op2_reg);
3232		} else {
3233			ir_emit_load(ctx, type, IR_REG_INT_RET1, insn->op2);
3234		}
3235	}
3236	ir_emit_return_void(ctx);
3237}
3238
3239static void ir_emit_return_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn)
3240{
3241	ir_reg op2_reg = ctx->regs[ref][2];
3242	ir_type type = ctx->ir_base[insn->op2].type;
3243
3244	if (op2_reg != IR_REG_FP_RET1) {
3245		if (op2_reg != IR_REG_NONE && !IR_REG_SPILLED(op2_reg)) {
3246			ir_emit_fp_mov(ctx, type, IR_REG_FP_RET1, op2_reg);
3247		} else {
3248			ir_emit_load(ctx, type, IR_REG_FP_RET1, insn->op2);
3249		}
3250	}
3251	ir_emit_return_void(ctx);
3252}
3253
3254static void ir_emit_sext(ir_ctx *ctx, ir_ref def, ir_insn *insn)
3255{
3256	ir_type dst_type = insn->type;
3257	ir_type src_type = ctx->ir_base[insn->op1].type;
3258	ir_backend_data *data = ctx->data;
3259	dasm_State **Dst = &data->dasm_state;
3260	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
3261	ir_reg op1_reg = ctx->regs[def][1];
3262
3263	IR_ASSERT(IR_IS_TYPE_INT(src_type));
3264	IR_ASSERT(IR_IS_TYPE_INT(dst_type));
3265	IR_ASSERT(ir_type_size[dst_type] > ir_type_size[src_type]);
3266	IR_ASSERT(def_reg != IR_REG_NONE);
3267	if ((op1_reg != IR_REG_NONE) && IR_REG_SPILLED(op1_reg)) {
3268		op1_reg = IR_REG_NUM(op1_reg);
3269		ir_emit_load(ctx, src_type, op1_reg, insn->op1);
3270	}
3271
3272	if (op1_reg != IR_REG_NONE) {
3273		if (ir_type_size[src_type] == 1) {
3274			if (ir_type_size[dst_type] == 2) {
3275				|	sxtb Rw(def_reg), Rw(op1_reg)
3276			} else if (ir_type_size[dst_type] == 4) {
3277				|	sxtb Rw(def_reg), Rw(op1_reg)
3278			} else {
3279				IR_ASSERT(ir_type_size[dst_type] == 8);
3280				|	sxtb Rx(def_reg), Rx(op1_reg)
3281			}
3282		} else if (ir_type_size[src_type] == 2) {
3283			if (ir_type_size[dst_type] == 4) {
3284				|	sxth Rw(def_reg), Rw(op1_reg)
3285			} else {
3286				IR_ASSERT(ir_type_size[dst_type] == 8);
3287				|	sxth Rx(def_reg), Rx(op1_reg)
3288			}
3289		} else {
3290			IR_ASSERT(ir_type_size[src_type] == 4);
3291			IR_ASSERT(ir_type_size[dst_type] == 8);
3292			|	sxtw Rx(def_reg), Rw(op1_reg)
3293		}
3294	} else if (IR_IS_CONST_REF(insn->op1)) {
3295		IR_ASSERT(0);
3296	} else {
3297		ir_reg fp;
3298		int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op1, &fp);
3299
3300		if (ir_type_size[src_type] == 1) {
3301			if (ir_type_size[dst_type] == 2) {
3302				|	ldrsb Rw(def_reg), [Rx(fp), #offset]
3303			} else if (ir_type_size[dst_type] == 4) {
3304				|	ldrsb Rw(def_reg), [Rx(fp), #offset]
3305			} else {
3306				IR_ASSERT(ir_type_size[dst_type] == 8);
3307				|	ldrsb Rx(def_reg), [Rx(fp), #offset]
3308			}
3309		} else if (ir_type_size[src_type] == 2) {
3310			if (ir_type_size[dst_type] == 4) {
3311				|	ldrsh Rw(def_reg), [Rx(fp), #offset]
3312			} else {
3313				IR_ASSERT(ir_type_size[dst_type] == 8);
3314				|	ldrsh Rx(def_reg), [Rx(fp), #offset]
3315			}
3316		} else {
3317			IR_ASSERT(ir_type_size[src_type] == 4);
3318			IR_ASSERT(ir_type_size[dst_type] == 8);
3319			|	ldrsw Rx(def_reg), [Rx(fp), #offset]
3320		}
3321	}
3322	if (IR_REG_SPILLED(ctx->regs[def][0])) {
3323		ir_emit_store(ctx, dst_type, def, def_reg);
3324	}
3325}
3326
3327static void ir_emit_zext(ir_ctx *ctx, ir_ref def, ir_insn *insn)
3328{
3329	ir_type dst_type = insn->type;
3330	ir_type src_type = ctx->ir_base[insn->op1].type;
3331	ir_backend_data *data = ctx->data;
3332	dasm_State **Dst = &data->dasm_state;
3333	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
3334	ir_reg op1_reg = ctx->regs[def][1];
3335
3336	IR_ASSERT(IR_IS_TYPE_INT(src_type));
3337	IR_ASSERT(IR_IS_TYPE_INT(dst_type));
3338	IR_ASSERT(ir_type_size[dst_type] > ir_type_size[src_type]);
3339	IR_ASSERT(def_reg != IR_REG_NONE);
3340	if ((op1_reg != IR_REG_NONE) && IR_REG_SPILLED(op1_reg)) {
3341		op1_reg = IR_REG_NUM(op1_reg);
3342		ir_emit_load(ctx, src_type, op1_reg, insn->op1);
3343	}
3344
3345	if (op1_reg != IR_REG_NONE) {
3346		if (ir_type_size[src_type] == 1) {
3347			|	uxtb Rw(def_reg), Rw(op1_reg)
3348		} else if (ir_type_size[src_type] == 2) {
3349			|	uxth Rw(def_reg), Rw(op1_reg)
3350		} else {
3351			|	mov Rw(def_reg), Rw(op1_reg)
3352		}
3353	} else if (IR_IS_CONST_REF(insn->op1)) {
3354		IR_ASSERT(0);
3355	} else {
3356		ir_reg fp;
3357		int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op1, &fp);
3358
3359		if (ir_type_size[src_type] == 1) {
3360			|	ldrb Rw(def_reg), [Rx(fp), #offset]
3361		} else if (ir_type_size[src_type] == 2) {
3362			|	ldrh Rw(def_reg), [Rx(fp), #offset]
3363		} else {
3364			IR_ASSERT(ir_type_size[src_type] == 4);
3365			IR_ASSERT(ir_type_size[dst_type] == 8);
3366			|	ldr Rw(def_reg), [Rx(fp), #offset]
3367		}
3368	}
3369	if (IR_REG_SPILLED(ctx->regs[def][0])) {
3370		ir_emit_store(ctx, dst_type, def, def_reg);
3371	}
3372}
3373
3374static void ir_emit_trunc(ir_ctx *ctx, ir_ref def, ir_insn *insn)
3375{
3376	ir_backend_data *data = ctx->data;
3377	dasm_State **Dst = &data->dasm_state;
3378	ir_type dst_type = insn->type;
3379	ir_type src_type = ctx->ir_base[insn->op1].type;
3380	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
3381	ir_reg op1_reg = ctx->regs[def][1];
3382
3383	IR_ASSERT(IR_IS_TYPE_INT(src_type));
3384	IR_ASSERT(IR_IS_TYPE_INT(dst_type));
3385	IR_ASSERT(ir_type_size[dst_type] < ir_type_size[src_type]);
3386	IR_ASSERT(def_reg != IR_REG_NONE);
3387	if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) {
3388		op1_reg = IR_REG_NUM(op1_reg);
3389		ir_emit_load(ctx, src_type, op1_reg, insn->op1);
3390	}
3391	if (op1_reg != IR_REG_NONE) {
3392		if (ir_type_size[dst_type] == 1) {
3393			|	and Rw(def_reg), Rw(op1_reg), #0xff
3394		} else if (ir_type_size[dst_type] == 2) {
3395			|	and Rw(def_reg), Rw(op1_reg), #0xffff
3396		} else if (op1_reg != def_reg) {
3397			ir_emit_mov(ctx, dst_type, def_reg, op1_reg);
3398		}
3399	} else {
3400		ir_emit_load(ctx, dst_type, def_reg, insn->op1);
3401	}
3402	if (IR_REG_SPILLED(ctx->regs[def][0])) {
3403		ir_emit_store(ctx, dst_type, def, def_reg);
3404	}
3405}
3406
3407static void ir_emit_bitcast(ir_ctx *ctx, ir_ref def, ir_insn *insn)
3408{
3409	ir_type dst_type = insn->type;
3410	ir_type src_type = ctx->ir_base[insn->op1].type;
3411	ir_backend_data *data = ctx->data;
3412	dasm_State **Dst = &data->dasm_state;
3413	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
3414	ir_reg op1_reg = ctx->regs[def][1];
3415
3416	IR_ASSERT(ir_type_size[dst_type] == ir_type_size[src_type]);
3417	IR_ASSERT(def_reg != IR_REG_NONE);
3418	if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) {
3419		op1_reg = IR_REG_NUM(op1_reg);
3420		ir_emit_load(ctx, src_type, op1_reg, insn->op1);
3421	}
3422	if (IR_IS_TYPE_INT(src_type) && IR_IS_TYPE_INT(dst_type)) {
3423		if (op1_reg != IR_REG_NONE) {
3424			if (IR_REG_SPILLED(op1_reg)) {
3425				op1_reg = IR_REG_NUM(op1_reg);
3426				ir_emit_load(ctx, src_type, op1_reg, insn->op1);
3427			}
3428			if (op1_reg != def_reg) {
3429				ir_emit_mov(ctx, dst_type, def_reg, op1_reg);
3430			}
3431		} else {
3432			ir_emit_load(ctx, dst_type, def_reg, insn->op1);
3433		}
3434	} else if (IR_IS_TYPE_FP(src_type) && IR_IS_TYPE_FP(dst_type)) {
3435		if (op1_reg != IR_REG_NONE) {
3436			if (IR_REG_SPILLED(op1_reg)) {
3437				op1_reg = IR_REG_NUM(op1_reg);
3438				ir_emit_load(ctx, src_type, op1_reg, insn->op1);
3439			}
3440			if (op1_reg != def_reg) {
3441				ir_emit_fp_mov(ctx, dst_type, def_reg, op1_reg);
3442			}
3443		} else {
3444			ir_emit_load(ctx, dst_type, def_reg, insn->op1);
3445		}
3446	} else if (IR_IS_TYPE_FP(src_type)) {
3447		IR_ASSERT(IR_IS_TYPE_INT(dst_type));
3448		if (op1_reg != IR_REG_NONE) {
3449			if (IR_REG_SPILLED(op1_reg)) {
3450				op1_reg = IR_REG_NUM(op1_reg);
3451				ir_emit_load(ctx, src_type, op1_reg, insn->op1);
3452			}
3453			if (src_type == IR_DOUBLE) {
3454				|	fmov Rx(def_reg), Rd(op1_reg-IR_REG_FP_FIRST)
3455			} else {
3456				IR_ASSERT(src_type == IR_FLOAT);
3457				|	fmov Rw(def_reg), Rs(op1_reg-IR_REG_FP_FIRST)
3458			}
3459		} else if (IR_IS_CONST_REF(insn->op1)) {
3460			IR_ASSERT(0); //???
3461		} else {
3462			ir_reg fp;
3463			int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op1, &fp);
3464
3465			if (src_type == IR_DOUBLE) {
3466				|	ldr Rx(def_reg), [Rx(fp), #offset]
3467			} else {
3468				IR_ASSERT(src_type == IR_FLOAT);
3469				|	ldr Rw(def_reg), [Rx(fp), #offset]
3470			}
3471		}
3472	} else if (IR_IS_TYPE_FP(dst_type)) {
3473		IR_ASSERT(IR_IS_TYPE_INT(src_type));
3474		if (op1_reg != IR_REG_NONE) {
3475			if (IR_REG_SPILLED(op1_reg)) {
3476				op1_reg = IR_REG_NUM(op1_reg);
3477				ir_emit_load(ctx, src_type, op1_reg, insn->op1);
3478			}
3479			if (dst_type == IR_DOUBLE) {
3480				|	fmov Rd(def_reg-IR_REG_FP_FIRST), Rx(op1_reg)
3481			} else {
3482				IR_ASSERT(dst_type == IR_FLOAT);
3483				|	fmov Rs(def_reg-IR_REG_FP_FIRST), Rw(op1_reg)
3484			}
3485		} else if (IR_IS_CONST_REF(insn->op1)) {
3486			IR_ASSERT(0); //???
3487		} else {
3488			ir_reg fp;
3489			int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op1, &fp);
3490
3491			if (dst_type == IR_DOUBLE) {
3492				|	ldr Rd(def_reg), [Rx(fp), #offset]
3493			} else {
3494				IR_ASSERT(src_type == IR_FLOAT);
3495				|	ldr Rs(def_reg), [Rx(fp), #offset]
3496			}
3497		}
3498	}
3499	if (IR_REG_SPILLED(ctx->regs[def][0])) {
3500		ir_emit_store(ctx, dst_type, def, def_reg);
3501	}
3502}
3503
3504static void ir_emit_int2fp(ir_ctx *ctx, ir_ref def, ir_insn *insn)
3505{
3506	ir_type dst_type = insn->type;
3507	ir_type src_type = ctx->ir_base[insn->op1].type;
3508	ir_backend_data *data = ctx->data;
3509	dasm_State **Dst = &data->dasm_state;
3510	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
3511	ir_reg op1_reg = ctx->regs[def][1];
3512
3513	IR_ASSERT(IR_IS_TYPE_INT(src_type));
3514	IR_ASSERT(IR_IS_TYPE_FP(dst_type));
3515	IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE);
3516	if (IR_REG_SPILLED(op1_reg)) {
3517		op1_reg = IR_REG_NUM(op1_reg);
3518		ir_emit_load(ctx, src_type, op1_reg, insn->op1);
3519	}
3520
3521	if (ir_type_size[src_type] == 8) {
3522		if (IR_IS_TYPE_SIGNED(src_type)) {
3523			if (dst_type == IR_DOUBLE) {
3524				|	scvtf Rd(def_reg-IR_REG_FP_FIRST), Rx(op1_reg)
3525			} else {
3526				IR_ASSERT(dst_type == IR_FLOAT);
3527				|	scvtf Rs(def_reg-IR_REG_FP_FIRST), Rx(op1_reg)
3528			}
3529		} else {
3530			if (dst_type == IR_DOUBLE) {
3531				|	ucvtf Rd(def_reg-IR_REG_FP_FIRST), Rx(op1_reg)
3532			} else {
3533				IR_ASSERT(dst_type == IR_FLOAT);
3534				|	ucvtf Rs(def_reg-IR_REG_FP_FIRST), Rx(op1_reg)
3535			}
3536		}
3537	} else {
3538		if (IR_IS_TYPE_SIGNED(src_type)) {
3539			if (ir_type_size[src_type] == 2) {
3540				ir_emit_fix_type(ctx, IR_I16, op1_reg);
3541			} else if (ir_type_size[src_type] == 1) {
3542				ir_emit_fix_type(ctx, IR_I8, op1_reg);
3543			}
3544			if (dst_type == IR_DOUBLE) {
3545				|	scvtf Rd(def_reg-IR_REG_FP_FIRST), Rw(op1_reg)
3546			} else {
3547				IR_ASSERT(dst_type == IR_FLOAT);
3548				|	scvtf Rs(def_reg-IR_REG_FP_FIRST), Rw(op1_reg)
3549			}
3550		} else {
3551			if (ir_type_size[src_type] == 2) {
3552				ir_emit_fix_type(ctx, IR_U16, op1_reg);
3553			} else if (ir_type_size[src_type] == 1) {
3554				ir_emit_fix_type(ctx, IR_U8, op1_reg);
3555			}
3556			if (dst_type == IR_DOUBLE) {
3557				|	ucvtf Rd(def_reg-IR_REG_FP_FIRST), Rw(op1_reg)
3558			} else {
3559				IR_ASSERT(dst_type == IR_FLOAT);
3560				|	ucvtf Rs(def_reg-IR_REG_FP_FIRST), Rw(op1_reg)
3561			}
3562		}
3563	}
3564	if (IR_REG_SPILLED(ctx->regs[def][0])) {
3565		ir_emit_store(ctx, dst_type, def, def_reg);
3566	}
3567}
3568
3569static void ir_emit_fp2int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
3570{
3571	ir_type dst_type = insn->type;
3572	ir_type src_type = ctx->ir_base[insn->op1].type;
3573	ir_backend_data *data = ctx->data;
3574	dasm_State **Dst = &data->dasm_state;
3575	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
3576	ir_reg op1_reg = ctx->regs[def][1];
3577
3578	IR_ASSERT(IR_IS_TYPE_FP(src_type));
3579	IR_ASSERT(IR_IS_TYPE_INT(dst_type));
3580	IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE);
3581	if (IR_REG_SPILLED(op1_reg)) {
3582		op1_reg = IR_REG_NUM(op1_reg);
3583		ir_emit_load(ctx, src_type, op1_reg, insn->op1);
3584	}
3585	if (ir_type_size[dst_type] == 8) {
3586		if (IR_IS_TYPE_SIGNED(dst_type)) {
3587			if (src_type == IR_DOUBLE) {
3588				|	fcvtzs Rx(def_reg), Rd(op1_reg-IR_REG_FP_FIRST)
3589			} else {
3590				IR_ASSERT(src_type == IR_FLOAT);
3591				|	fcvtzs Rx(def_reg), Rs(op1_reg-IR_REG_FP_FIRST)
3592			}
3593		} else {
3594			if (src_type == IR_DOUBLE) {
3595				|	fcvtzu Rx(def_reg), Rd(op1_reg-IR_REG_FP_FIRST)
3596			} else {
3597				IR_ASSERT(src_type == IR_FLOAT);
3598				|	fcvtzu Rx(def_reg), Rs(op1_reg-IR_REG_FP_FIRST)
3599			}
3600		}
3601	} else {
3602		if (IR_IS_TYPE_SIGNED(dst_type)) {
3603			if (src_type == IR_DOUBLE) {
3604				|	fcvtzs Rw(def_reg), Rd(op1_reg-IR_REG_FP_FIRST)
3605			} else {
3606				IR_ASSERT(src_type == IR_FLOAT);
3607				|	fcvtzs Rw(def_reg), Rs(op1_reg-IR_REG_FP_FIRST)
3608			}
3609		} else {
3610			if (src_type == IR_DOUBLE) {
3611				|	fcvtzu Rw(def_reg), Rd(op1_reg-IR_REG_FP_FIRST)
3612			} else {
3613				IR_ASSERT(src_type == IR_FLOAT);
3614				|	fcvtzu Rw(def_reg), Rs(op1_reg-IR_REG_FP_FIRST)
3615			}
3616		}
3617	}
3618	if (IR_REG_SPILLED(ctx->regs[def][0])) {
3619		ir_emit_store(ctx, dst_type, def, def_reg);
3620	}
3621}
3622
3623static void ir_emit_fp2fp(ir_ctx *ctx, ir_ref def, ir_insn *insn)
3624{
3625	ir_type dst_type = insn->type;
3626	ir_type src_type = ctx->ir_base[insn->op1].type;
3627	ir_backend_data *data = ctx->data;
3628	dasm_State **Dst = &data->dasm_state;
3629	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
3630	ir_reg op1_reg = ctx->regs[def][1];
3631
3632	IR_ASSERT(IR_IS_TYPE_FP(src_type));
3633	IR_ASSERT(IR_IS_TYPE_FP(dst_type));
3634	IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE);
3635	if (IR_REG_SPILLED(op1_reg)) {
3636		op1_reg = IR_REG_NUM(op1_reg);
3637		ir_emit_load(ctx, src_type, op1_reg, insn->op1);
3638	}
3639	if (src_type == dst_type) {
3640		if (op1_reg != def_reg) {
3641			ir_emit_fp_mov(ctx, dst_type, def_reg, op1_reg);
3642		}
3643	} else if (src_type == IR_DOUBLE) {
3644		|	fcvt Rs(def_reg-IR_REG_FP_FIRST), Rd(op1_reg-IR_REG_FP_FIRST)
3645	} else {
3646		IR_ASSERT(src_type == IR_FLOAT);
3647		|	fcvt Rd(def_reg-IR_REG_FP_FIRST), Rs(op1_reg-IR_REG_FP_FIRST)
3648	}
3649	if (IR_REG_SPILLED(ctx->regs[def][0])) {
3650		ir_emit_store(ctx, dst_type, def, def_reg);
3651	}
3652}
3653
3654static void ir_emit_copy_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
3655{
3656	ir_ref type = insn->type;
3657	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
3658	ir_reg op1_reg = ctx->regs[def][1];
3659
3660	IR_ASSERT(def_reg != IR_REG_NONE || op1_reg != IR_REG_NONE);
3661	if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) {
3662		op1_reg = IR_REG_NUM(op1_reg);
3663		ir_emit_load(ctx, type, op1_reg, insn->op1);
3664	}
3665	if (def_reg == op1_reg) {
3666		/* same reg */
3667	} else if (def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE) {
3668		ir_emit_mov(ctx, type, def_reg, op1_reg);
3669	} else if (def_reg != IR_REG_NONE) {
3670		ir_emit_load(ctx, type, def_reg, insn->op1);
3671	} else if (op1_reg != IR_REG_NONE) {
3672		ir_emit_store(ctx, type, def, op1_reg);
3673	} else {
3674		IR_ASSERT(0);
3675	}
3676	if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) {
3677		ir_emit_store(ctx, type, def, def_reg);
3678	}
3679}
3680
3681static void ir_emit_copy_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn)
3682{
3683	ir_type type = insn->type;
3684	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
3685	ir_reg op1_reg = ctx->regs[def][1];
3686
3687	IR_ASSERT(def_reg != IR_REG_NONE || op1_reg != IR_REG_NONE);
3688	if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) {
3689		op1_reg = IR_REG_NUM(op1_reg);
3690		ir_emit_load(ctx, type, op1_reg, insn->op1);
3691	}
3692	if (def_reg == op1_reg) {
3693		/* same reg */
3694	} else if (def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE) {
3695		ir_emit_fp_mov(ctx, type, def_reg, op1_reg);
3696	} else if (def_reg != IR_REG_NONE) {
3697		ir_emit_load(ctx, type, def_reg, insn->op1);
3698	} else if (op1_reg != IR_REG_NONE) {
3699		ir_emit_store(ctx, type, def, op1_reg);
3700	} else {
3701		IR_ASSERT(0);
3702	}
3703	if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) {
3704		ir_emit_store(ctx, type, def, def_reg);
3705	}
3706}
3707
3708static void ir_emit_vaddr(ir_ctx *ctx, ir_ref def, ir_insn *insn)
3709{
3710	ir_backend_data *data = ctx->data;
3711	dasm_State **Dst = &data->dasm_state;
3712	ir_ref type = insn->type;
3713	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
3714	int32_t offset;
3715	ir_reg fp;
3716
3717	IR_ASSERT(def_reg != IR_REG_NONE);
3718	offset = ir_var_spill_slot(ctx, insn->op1, &fp);
3719	|	add Rx(def_reg), Rx(fp), #offset
3720	if (IR_REG_SPILLED(ctx->regs[def][0])) {
3721		ir_emit_store(ctx, type, def, def_reg);
3722	}
3723}
3724
3725static void ir_emit_vload(ir_ctx *ctx, ir_ref def, ir_insn *insn)
3726{
3727	ir_insn *var_insn = &ctx->ir_base[insn->op2];
3728	ir_ref type = insn->type;
3729	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
3730	ir_reg fp;
3731	int32_t offset;
3732	ir_mem mem;
3733
3734	IR_ASSERT(var_insn->op == IR_VAR);
3735	fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
3736	offset = IR_SPILL_POS_TO_OFFSET(var_insn->op3);
3737	mem = IR_MEM_BO(fp, offset);
3738	if (def_reg == IR_REG_NONE && ir_is_same_mem_var(ctx, def, var_insn->op3)) {
3739		return; // fake load
3740	}
3741	IR_ASSERT(def_reg != IR_REG_NONE);
3742	ir_emit_load_mem(ctx, type, def_reg, mem);
3743	if (IR_REG_SPILLED(ctx->regs[def][0])) {
3744		ir_emit_store(ctx, type, def, def_reg);
3745	}
3746}
3747
3748static void ir_emit_vstore(ir_ctx *ctx, ir_ref ref, ir_insn *insn)
3749{
3750	ir_insn *var_insn = &ctx->ir_base[insn->op2];
3751	ir_insn *val_insn = &ctx->ir_base[insn->op3];
3752	ir_ref type = val_insn->type;
3753	ir_reg op3_reg = ctx->regs[ref][3];
3754	ir_reg fp;
3755	int32_t offset;
3756	ir_mem mem;
3757
3758	IR_ASSERT(var_insn->op == IR_VAR);
3759	fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
3760	offset = IR_SPILL_POS_TO_OFFSET(var_insn->op3);
3761	IR_ASSERT(op3_reg != IR_REG_NONE);
3762	if (IR_REG_SPILLED(op3_reg)
3763	 && !IR_IS_CONST_REF(insn->op3)
3764	 && ir_rule(ctx, insn->op3) != IR_STATIC_ALLOCA
3765	 && ir_is_same_mem_var(ctx, insn->op3, var_insn->op3)) {
3766		return; // fake store
3767	}
3768	if (IR_REG_SPILLED(op3_reg)) {
3769		op3_reg = IR_REG_NUM(op3_reg);
3770		ir_emit_load(ctx, type, op3_reg, insn->op3);
3771	}
3772	mem = IR_MEM_BO(fp, offset);
3773	ir_emit_store_mem(ctx, type, mem, op3_reg);
3774}
3775
3776static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref)
3777{
3778	ir_insn *addr_insn = &ctx->ir_base[ref];
3779	ir_reg reg;
3780	int32_t offset;
3781
3782	if (addr_insn->op == IR_ADD) {
3783		IR_ASSERT(!IR_IS_CONST_REF(addr_insn->op1) && IR_IS_CONST_REF(addr_insn->op2));
3784		IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[addr_insn->op2].op));
3785		if (ir_rule(ctx, addr_insn->op1) == IR_STATIC_ALLOCA) {
3786			reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
3787			offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[addr_insn->op1].op3);
3788			offset += ctx->ir_base[addr_insn->op2].val.i32;
3789			return IR_MEM_BO(reg, offset);
3790		} else {
3791			if (UNEXPECTED(ctx->rules[ref] & IR_FUSED_REG)) {
3792				reg = ir_get_fused_reg(ctx, root, ref * sizeof(ir_ref) + 1);
3793			} else {
3794				reg = ctx->regs[ref][1];
3795			}
3796			if (IR_REG_SPILLED(reg)) {
3797				reg = IR_REG_NUM(reg);
3798				ir_emit_load(ctx, IR_ADDR, reg, addr_insn->op1);
3799			}
3800			return IR_MEM_BO(reg, ctx->ir_base[addr_insn->op2].val.i32);
3801		}
3802	} else {
3803		IR_ASSERT(addr_insn->op == IR_ALLOCA || addr_insn->op == IR_VADDR);
3804		reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
3805		offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[ref].op3);
3806		return IR_MEM_BO(reg, offset);
3807	}
3808}
3809
3810static void ir_emit_load_int(ir_ctx *ctx, ir_ref def, ir_insn *insn)
3811{
3812	ir_ref type = insn->type;
3813	ir_reg op2_reg = ctx->regs[def][2];
3814	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
3815	ir_mem mem;
3816
3817	if (ctx->use_lists[def].count == 1) {
3818		/* dead load */
3819		return;
3820	}
3821	IR_ASSERT(def_reg != IR_REG_NONE);
3822	if (op2_reg != IR_REG_NONE) {
3823		if (IR_REG_SPILLED(op2_reg)) {
3824			op2_reg = IR_REG_NUM(op2_reg);
3825			IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR);
3826			ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
3827		}
3828		mem = IR_MEM_B(op2_reg);
3829	} else if (IR_IS_CONST_REF(insn->op2)) {
3830		op2_reg = def_reg;
3831		ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
3832		mem = IR_MEM_B(op2_reg);
3833	} else {
3834		IR_ASSERT(ir_rule(ctx, insn->op2) & IR_FUSED);
3835		mem = ir_fuse_addr(ctx, def, insn->op2);
3836		if (IR_REG_SPILLED(ctx->regs[def][0]) && ir_is_same_spill_slot(ctx, def, mem)) {
3837			if (!ir_may_avoid_spill_load(ctx, def, def)) {
3838				ir_emit_load_mem_int(ctx, type, def_reg, mem);
3839			}
3840			/* avoid load to the same location (valid only when register is not reused) */
3841			return;
3842		}
3843	}
3844	ir_emit_load_mem_int(ctx, type, def_reg, mem);
3845	if (IR_REG_SPILLED(ctx->regs[def][0])) {
3846		ir_emit_store(ctx, type, def, def_reg);
3847	}
3848}
3849
3850static void ir_emit_load_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn)
3851{
3852	ir_ref type = insn->type;
3853	ir_reg op2_reg = ctx->regs[def][2];
3854	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
3855	ir_mem mem;
3856
3857	if (ctx->use_lists[def].count == 1) {
3858		/* dead load */
3859		return;
3860	}
3861	IR_ASSERT(def_reg != IR_REG_NONE);
3862	if (op2_reg != IR_REG_NONE) {
3863		if (IR_REG_SPILLED(op2_reg)) {
3864			op2_reg = IR_REG_NUM(op2_reg);
3865			IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR);
3866			ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
3867		}
3868		mem = IR_MEM_B(op2_reg);
3869	} else if (IR_IS_CONST_REF(insn->op2)) {
3870		op2_reg = def_reg;
3871		ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
3872		mem = IR_MEM_B(op2_reg);
3873	} else {
3874		IR_ASSERT(ir_rule(ctx, insn->op2) & IR_FUSED);
3875		mem = ir_fuse_addr(ctx, def, insn->op2);
3876		if (IR_REG_SPILLED(ctx->regs[def][0]) && ir_is_same_spill_slot(ctx, def, mem)) {
3877			if (!ir_may_avoid_spill_load(ctx, def, def)) {
3878				ir_emit_load_mem_fp(ctx, type, def_reg, mem);
3879			}
3880			/* avoid load to the same location (valid only when register is not reused) */
3881			return;
3882		}
3883	}
3884	ir_emit_load_mem_fp(ctx, type, def_reg, mem);
3885	if (IR_REG_SPILLED(ctx->regs[def][0])) {
3886		ir_emit_store(ctx, type, def, def_reg);
3887	}
3888}
3889
3890static void ir_emit_store_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn)
3891{
3892	ir_insn *val_insn = &ctx->ir_base[insn->op3];
3893	ir_ref type = val_insn->type;
3894	ir_reg op2_reg = ctx->regs[ref][2];
3895	ir_reg op3_reg = ctx->regs[ref][3];
3896	ir_mem mem;
3897
3898	if (op2_reg != IR_REG_NONE) {
3899		if (IR_REG_SPILLED(op2_reg)) {
3900			op2_reg = IR_REG_NUM(op2_reg);
3901			IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR);
3902			ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
3903		}
3904		mem = IR_MEM_B(op2_reg);
3905	} else {
3906		IR_ASSERT(!IR_IS_CONST_REF(insn->op2) && (ir_rule(ctx, insn->op2) & IR_FUSED));
3907		mem = ir_fuse_addr(ctx, ref, insn->op2);
3908		if (!IR_IS_CONST_REF(insn->op3)
3909		 && IR_REG_SPILLED(op3_reg)
3910		 && ir_rule(ctx, insn->op3) != IR_STATIC_ALLOCA
3911		 && ir_is_same_spill_slot(ctx, insn->op3, mem)) {
3912			if (!ir_may_avoid_spill_load(ctx, insn->op3, ref)) {
3913				op3_reg = IR_REG_NUM(op3_reg);
3914				ir_emit_load(ctx, type, op3_reg, insn->op3);
3915			}
3916			/* avoid store to the same location */
3917			return;
3918		}
3919	}
3920
3921	if (op3_reg != IR_REG_NONE) {
3922		if (IR_REG_SPILLED(op3_reg)) {
3923			op3_reg = IR_REG_NUM(op3_reg);
3924			ir_emit_load(ctx, type, op3_reg, insn->op3);
3925		}
3926	} else {
3927		IR_ASSERT(IR_IS_CONST_REF(insn->op3) && !IR_IS_SYM_CONST(ctx->ir_base[insn->op3].op) && ctx->ir_base[insn->op3].val.i64 == 0);
3928		op3_reg = IR_REG_ZR;
3929	}
3930	ir_emit_store_mem_int(ctx, type, mem, op3_reg);
3931}
3932
3933static void ir_emit_store_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn)
3934{
3935	ir_ref type = ctx->ir_base[insn->op3].type;
3936	ir_reg op2_reg = ctx->regs[ref][2];
3937	ir_reg op3_reg = ctx->regs[ref][3];
3938	ir_mem mem;
3939
3940	IR_ASSERT(op3_reg != IR_REG_NONE);
3941	if (op2_reg != IR_REG_NONE) {
3942		if (IR_REG_SPILLED(op2_reg)) {
3943			op2_reg = IR_REG_NUM(op2_reg);
3944			IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR);
3945			ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
3946		}
3947		mem = IR_MEM_B(op2_reg);
3948	} else {
3949		IR_ASSERT(!IR_IS_CONST_REF(insn->op2) && (ir_rule(ctx, insn->op2) & IR_FUSED));
3950		mem = ir_fuse_addr(ctx, ref, insn->op2);
3951		if (!IR_IS_CONST_REF(insn->op3)
3952		 && IR_REG_SPILLED(op3_reg)
3953		 && ir_rule(ctx, insn->op3) != IR_STATIC_ALLOCA
3954		 && ir_is_same_spill_slot(ctx, insn->op3, mem)) {
3955			if (!ir_may_avoid_spill_load(ctx, insn->op3, ref)) {
3956				op3_reg = IR_REG_NUM(op3_reg);
3957				ir_emit_load(ctx, type, op3_reg, insn->op3);
3958			}
3959			/* avoid store to the same location */
3960			return;
3961		}
3962	}
3963
3964	if (IR_REG_SPILLED(op3_reg)) {
3965		op3_reg = IR_REG_NUM(op3_reg);
3966		ir_emit_load(ctx, type, op3_reg, insn->op3);
3967	}
3968	ir_emit_store_mem_fp(ctx, type, mem, op3_reg);
3969}
3970
3971static void ir_emit_rload(ir_ctx *ctx, ir_ref def, ir_insn *insn)
3972{
3973	ir_reg src_reg = insn->op2;
3974	ir_type type = insn->type;
3975
3976	if (IR_REGSET_IN(IR_REGSET_UNION((ir_regset)ctx->fixed_regset, IR_REGSET_FIXED), src_reg)) {
3977		if (ctx->vregs[def]
3978		 && ctx->live_intervals[ctx->vregs[def]]
3979		 && ctx->live_intervals[ctx->vregs[def]]->stack_spill_pos != -1) {
3980			ir_emit_store(ctx, type, def, src_reg);
3981		}
3982	} else {
3983		ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
3984
3985		if (def_reg == IR_REG_NONE) {
3986			/* op3 is used as a flag that the value is already stored in memory.
3987			 * If op3 is set we don't have to store the value once again (in case of spilling)
3988			 */
3989			if (!insn->op3 || !ir_is_same_spill_slot(ctx, def, IR_MEM_BO(ctx->spill_base, insn->op3))) {
3990				ir_emit_store(ctx, type, def, src_reg);
3991			}
3992		} else {
3993			if (src_reg != def_reg) {
3994				if (IR_IS_TYPE_INT(type)) {
3995					ir_emit_mov(ctx, type, def_reg, src_reg);
3996				} else {
3997					IR_ASSERT(IR_IS_TYPE_FP(type));
3998					ir_emit_fp_mov(ctx, type, def_reg, src_reg);
3999				}
4000			}
4001			if (IR_REG_SPILLED(ctx->regs[def][0])
4002			 && (!insn->op3 || !ir_is_same_spill_slot(ctx, def, IR_MEM_BO(ctx->spill_base, insn->op3)))) {
4003				ir_emit_store(ctx, type, def, def_reg);
4004			}
4005		}
4006	}
4007}
4008
4009static void ir_emit_rstore(ir_ctx *ctx, ir_ref ref, ir_insn *insn)
4010{
4011	ir_ref type = ctx->ir_base[insn->op2].type;
4012	ir_reg op2_reg = ctx->regs[ref][2];
4013	ir_reg dst_reg = insn->op3;
4014
4015	if (op2_reg != IR_REG_NONE) {
4016		if (IR_REG_SPILLED(op2_reg)) {
4017			op2_reg = IR_REG_NUM(op2_reg);
4018			ir_emit_load(ctx, type, op2_reg, insn->op2);
4019		}
4020		if (op2_reg != dst_reg) {
4021			if (IR_IS_TYPE_INT(type)) {
4022				ir_emit_mov(ctx, type, dst_reg, op2_reg);
4023			} else {
4024				IR_ASSERT(IR_IS_TYPE_FP(type));
4025				ir_emit_fp_mov(ctx, type, dst_reg, op2_reg);
4026			}
4027		}
4028	} else {
4029		ir_emit_load(ctx, type, dst_reg, insn->op2);
4030	}
4031}
4032
4033static void ir_emit_alloca(ir_ctx *ctx, ir_ref def, ir_insn *insn)
4034{
4035	ir_backend_data *data = ctx->data;
4036	dasm_State **Dst = &data->dasm_state;
4037	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
4038
4039	if (ctx->use_lists[def].count == 1) {
4040		/* dead alloca */
4041		return;
4042	}
4043	if (IR_IS_CONST_REF(insn->op2)) {
4044		ir_insn *val = &ctx->ir_base[insn->op2];
4045		int32_t size = val->val.i32;
4046
4047		IR_ASSERT(IR_IS_TYPE_INT(val->type));
4048		IR_ASSERT(!IR_IS_SYM_CONST(val->op));
4049		IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 >= 0);
4050
4051		/* Stack must be 16 byte aligned */
4052		size = IR_ALIGNED_SIZE(size, 16);
4053		if (aarch64_may_encode_imm12(size)) {
4054			|	sub sp, sp, #size
4055		} else {
4056			ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, size);
4057			|	sub sp, sp, Rx(IR_REG_INT_TMP)
4058		}
4059		if (!(ctx->flags & IR_USE_FRAME_POINTER)) {
4060			ctx->call_stack_size += size;
4061		}
4062	} else {
4063		int32_t alignment = 16;
4064		ir_reg op2_reg = ctx->regs[def][2];
4065		ir_type type = ctx->ir_base[insn->op2].type;
4066
4067		IR_ASSERT(ctx->flags & IR_FUNCTION);
4068		IR_ASSERT(ctx->flags & IR_USE_FRAME_POINTER);
4069		IR_ASSERT(def_reg != IR_REG_NONE && op2_reg != IR_REG_NONE);
4070		if (IR_REG_SPILLED(op2_reg)) {
4071			op2_reg = IR_REG_NUM(op2_reg);
4072			ir_emit_load(ctx, type, op2_reg, insn->op2);
4073		}
4074		|	add Rx(def_reg), Rx(op2_reg), #(alignment-1)
4075		|	and Rx(def_reg), Rx(def_reg), #(~(alignment-1))
4076		|	sub sp, sp, Rx(def_reg);
4077	}
4078	if (def_reg != IR_REG_NONE) {
4079		|	mov Rx(def_reg), sp
4080		if (IR_REG_SPILLED(ctx->regs[def][0])) {
4081			ir_emit_store(ctx, insn->type, def, def_reg);
4082		}
4083	} else {
4084		ir_emit_store(ctx, IR_ADDR, def, IR_REG_STACK_POINTER);
4085	}
4086}
4087
4088static void ir_emit_afree(ir_ctx *ctx, ir_ref def, ir_insn *insn)
4089{
4090	ir_backend_data *data = ctx->data;
4091	dasm_State **Dst = &data->dasm_state;
4092
4093	if (IR_IS_CONST_REF(insn->op2)) {
4094		ir_insn *val = &ctx->ir_base[insn->op2];
4095		int32_t size = val->val.i32;
4096
4097		IR_ASSERT(IR_IS_TYPE_INT(val->type));
4098		IR_ASSERT(!IR_IS_SYM_CONST(val->op));
4099		IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 > 0);
4100
4101		/* Stack must be 16 byte aligned */
4102		size = IR_ALIGNED_SIZE(size, 16);
4103		|	add sp, sp, #size
4104		if (!(ctx->flags & IR_USE_FRAME_POINTER)) {
4105			ctx->call_stack_size -= size;
4106		}
4107	} else {
4108//		int32_t alignment = 16;
4109		ir_reg op2_reg = ctx->regs[def][2];
4110		ir_type type = ctx->ir_base[insn->op2].type;
4111
4112		IR_ASSERT(ctx->flags & IR_FUNCTION);
4113		IR_ASSERT(op2_reg != IR_REG_NONE);
4114		if (IR_REG_SPILLED(op2_reg)) {
4115			op2_reg = IR_REG_NUM(op2_reg);
4116			ir_emit_load(ctx, type, op2_reg, insn->op2);
4117		}
4118
4119		// TODO: alignment
4120
4121		|	add sp, sp, Rx(op2_reg);
4122	}
4123}
4124
4125static void ir_emit_block_begin(ir_ctx *ctx, ir_ref def, ir_insn *insn)
4126{
4127	ir_backend_data *data = ctx->data;
4128	dasm_State **Dst = &data->dasm_state;
4129	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
4130
4131	|	mov Rx(def_reg), sp
4132
4133	if (IR_REG_SPILLED(ctx->regs[def][0])) {
4134		ir_emit_store(ctx, IR_ADDR, def, def_reg);
4135	}
4136}
4137
4138static void ir_emit_block_end(ir_ctx *ctx, ir_ref def, ir_insn *insn)
4139{
4140	ir_backend_data *data = ctx->data;
4141	dasm_State **Dst = &data->dasm_state;
4142	ir_reg op2_reg = ctx->regs[def][2];
4143
4144	IR_ASSERT(op2_reg != IR_REG_NONE);
4145	if (IR_REG_SPILLED(op2_reg)) {
4146		op2_reg = IR_REG_NUM(op2_reg);
4147		ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
4148	}
4149
4150	|	mov sp, Rx(op2_reg)
4151}
4152
4153static void ir_emit_frame_addr(ir_ctx *ctx, ir_ref def)
4154{
4155	ir_backend_data *data = ctx->data;
4156	dasm_State **Dst = &data->dasm_state;
4157	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
4158
4159	if (ctx->flags & IR_USE_FRAME_POINTER) {
4160		|	mov Rx(def_reg), Rx(IR_REG_X29)
4161	} else {
4162		|	add Rx(def_reg), Rx(IR_REG_X31), #(ctx->stack_frame_size + ctx->call_stack_size)
4163	}
4164	if (IR_REG_SPILLED(ctx->regs[def][0])) {
4165		ir_emit_store(ctx, IR_ADDR, def, def_reg);
4166	}
4167}
4168
4169static void ir_emit_va_start(ir_ctx *ctx, ir_ref def, ir_insn *insn)
4170{
4171#ifdef __APPLE__
4172	ir_backend_data *data = ctx->data;
4173	dasm_State **Dst = &data->dasm_state;
4174	ir_reg fp;
4175	int arg_area_offset;
4176	ir_reg op2_reg = ctx->regs[def][2];
4177	ir_reg tmp_reg = ctx->regs[def][3];
4178	int32_t offset;
4179
4180	IR_ASSERT(tmp_reg != IR_REG_NONE);
4181	if (op2_reg != IR_REG_NONE) {
4182		if (IR_REG_SPILLED(op2_reg)) {
4183			op2_reg = IR_REG_NUM(op2_reg);
4184			ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
4185		}
4186		offset = 0;
4187	} else {
4188		IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA);
4189		op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
4190		offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3);
4191	}
4192
4193	if (ctx->flags & IR_USE_FRAME_POINTER) {
4194		fp = IR_REG_FRAME_POINTER;
4195		arg_area_offset = ctx->stack_frame_size + sizeof(void*) * 2 + ctx->param_stack_size;
4196	} else {
4197		fp = IR_REG_STACK_POINTER;
4198		arg_area_offset = ctx->call_stack_size + ctx->stack_frame_size + ctx->param_stack_size;
4199	}
4200	|	add Rx(tmp_reg), Rx(fp), #arg_area_offset
4201	|	str Rx(tmp_reg), [Rx(op2_reg), #offset]
4202#else
4203	ir_backend_data *data = ctx->data;
4204	dasm_State **Dst = &data->dasm_state;
4205	ir_reg fp;
4206	int reg_save_area_offset;
4207	int overflow_arg_area_offset;
4208	ir_reg op2_reg = ctx->regs[def][2];
4209	ir_reg tmp_reg = ctx->regs[def][3];
4210	int32_t offset;
4211
4212	IR_ASSERT(tmp_reg != IR_REG_NONE);
4213	if (op2_reg != IR_REG_NONE) {
4214		if (IR_REG_SPILLED(op2_reg)) {
4215			op2_reg = IR_REG_NUM(op2_reg);
4216			ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
4217		}
4218		offset = 0;
4219	} else {
4220		IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA);
4221		op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
4222		offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3);
4223	}
4224
4225	if (ctx->flags & IR_USE_FRAME_POINTER) {
4226		fp = IR_REG_FRAME_POINTER;
4227		reg_save_area_offset = ctx->locals_area_size + sizeof(void*) * 2;
4228		overflow_arg_area_offset = ctx->stack_frame_size + sizeof(void*) * 2 + ctx->param_stack_size;
4229	} else {
4230		fp = IR_REG_STACK_POINTER;
4231		reg_save_area_offset = ctx->locals_area_size + ctx->call_stack_size;
4232		overflow_arg_area_offset = ctx->call_stack_size + ctx->stack_frame_size + ctx->param_stack_size;
4233	}
4234
4235	/* Set va_list.stack */
4236	|	add Rx(tmp_reg), Rx(fp), #overflow_arg_area_offset
4237	|	str Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, stack))]
4238	if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) {
4239		reg_save_area_offset += sizeof(void*) * IR_REG_INT_ARGS;
4240		/* Set va_list.gr_top */
4241		if (overflow_arg_area_offset != reg_save_area_offset) {
4242			|	add Rx(tmp_reg), Rx(fp), #reg_save_area_offset
4243		}
4244		|	str Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, gr_top))]
4245		/* Set va_list.gr_offset */
4246		|	movn Rw(tmp_reg), #~(0 - (sizeof(void*) * (IR_REG_INT_ARGS - ctx->gp_reg_params)))
4247		|	str Rw(tmp_reg),  [Rx(op2_reg), #(offset+offsetof(ir_va_list, gr_offset))]
4248	} else {
4249		/* Set va_list.gr_offset */
4250		|	str wzr,  [Rx(op2_reg), #(offset+offsetof(ir_va_list, gr_offset))]
4251	}
4252	if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) {
4253		reg_save_area_offset += 16 * IR_REG_FP_ARGS;
4254		/* Set va_list.vr_top */
4255		if (overflow_arg_area_offset != reg_save_area_offset) {
4256			|	add Rx(tmp_reg), Rx(fp), #reg_save_area_offset
4257		}
4258		|	str Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, vr_top))]
4259		/* Set va_list.vr_offset */
4260		|	movn Rw(tmp_reg), #~(0 - (16 * (IR_REG_FP_ARGS - ctx->fp_reg_params)))
4261		|	str Rw(tmp_reg),  [Rx(op2_reg), #(offset+offsetof(ir_va_list, vr_offset))]
4262	} else {
4263		/* Set va_list.vr_offset */
4264		|	str wzr,  [Rx(op2_reg), #(offset+offsetof(ir_va_list, vr_offset))]
4265	}
4266#endif
4267}
4268
4269static void ir_emit_va_copy(ir_ctx *ctx, ir_ref def, ir_insn *insn)
4270{
4271#ifdef __APPLE__
4272	ir_backend_data *data = ctx->data;
4273	dasm_State **Dst = &data->dasm_state;
4274	ir_reg tmp_reg = ctx->regs[def][1];
4275	ir_reg op2_reg = ctx->regs[def][2];
4276	ir_reg op3_reg = ctx->regs[def][3];
4277	int32_t op2_offset, op3_offset;
4278
4279	IR_ASSERT(tmp_reg != IR_REG_NONE);
4280	if (op2_reg != IR_REG_NONE) {
4281		if (IR_REG_SPILLED(op2_reg)) {
4282			op2_reg = IR_REG_NUM(op2_reg);
4283			ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
4284		}
4285		op2_offset = 0;
4286	} else {
4287		IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA);
4288		op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
4289		op2_offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3);
4290	}
4291	if (op3_reg != IR_REG_NONE) {
4292		if (IR_REG_SPILLED(op3_reg)) {
4293			op3_reg = IR_REG_NUM(op3_reg);
4294			ir_emit_load(ctx, IR_ADDR, op3_reg, insn->op3);
4295		}
4296		op3_offset = 0;
4297	} else {
4298		IR_ASSERT(ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA);
4299		op3_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
4300		op3_offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op3].op3);
4301	}
4302	|	ldr Rx(tmp_reg), [Rx(op3_reg), #op3_offset]
4303	|	str Rx(tmp_reg), [Rx(op2_reg), #op2_offset]
4304#else
4305	ir_backend_data *data = ctx->data;
4306	dasm_State **Dst = &data->dasm_state;
4307	ir_reg tmp_reg = ctx->regs[def][1];
4308	ir_reg op2_reg = ctx->regs[def][2];
4309	ir_reg op3_reg = ctx->regs[def][3];
4310	int32_t op2_offset, op3_offset;
4311
4312	IR_ASSERT(tmp_reg != IR_REG_NONE);
4313	if (op2_reg != IR_REG_NONE) {
4314		if (IR_REG_SPILLED(op2_reg)) {
4315			op2_reg = IR_REG_NUM(op2_reg);
4316			ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
4317		}
4318		op2_offset = 0;
4319	} else {
4320		IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA);
4321		op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
4322		op2_offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3);
4323	}
4324	if (op3_reg != IR_REG_NONE) {
4325		if (IR_REG_SPILLED(op3_reg)) {
4326			op3_reg = IR_REG_NUM(op3_reg);
4327			ir_emit_load(ctx, IR_ADDR, op3_reg, insn->op3);
4328		}
4329		op3_offset = 0;
4330	} else {
4331		IR_ASSERT(ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA);
4332		op3_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
4333		op3_offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op3].op3);
4334	}
4335	|	ldr Rx(tmp_reg), [Rx(op3_reg), #op3_offset]
4336	|	str Rx(tmp_reg), [Rx(op2_reg), #op2_offset]
4337	|	ldr Rx(tmp_reg), [Rx(op3_reg), #(op3_offset+8)]
4338	|	str Rx(tmp_reg), [Rx(op2_reg), #(op2_offset+8)]
4339	|	ldr Rx(tmp_reg), [Rx(op3_reg), #(op3_offset+16)]
4340	|	str Rx(tmp_reg), [Rx(op2_reg), #(op2_offset+16)]
4341	|	ldr Rx(tmp_reg), [Rx(op3_reg), #(op3_offset+24)]
4342	|	str Rx(tmp_reg), [Rx(op2_reg), #(op2_offset+24)]
4343#endif
4344}
4345
4346static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn)
4347{
4348#ifdef __APPLE__
4349	ir_backend_data *data = ctx->data;
4350	dasm_State **Dst = &data->dasm_state;
4351	ir_type type = insn->type;
4352	ir_reg def_reg = ctx->regs[def][0];
4353	ir_reg op2_reg = ctx->regs[def][2];
4354	ir_reg tmp_reg = ctx->regs[def][3];
4355	int32_t offset;
4356
4357	IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE);
4358	if (op2_reg != IR_REG_NONE) {
4359		if (IR_REG_SPILLED(op2_reg)) {
4360			op2_reg = IR_REG_NUM(op2_reg);
4361			ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
4362		}
4363		offset = 0;
4364	} else {
4365		IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA);
4366		op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
4367		offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3);
4368	}
4369	|	ldr Rx(tmp_reg), [Rx(op2_reg), #offset]
4370	ir_emit_load_mem(ctx, type, def_reg, IR_MEM_BO(tmp_reg, 0));
4371	|	add Rx(tmp_reg), Rx(tmp_reg), #IR_MAX(ir_type_size[type], sizeof(void*))
4372	|	str Rx(tmp_reg), [Rx(op2_reg), #offset]
4373	if (IR_REG_SPILLED(ctx->regs[def][0])) {
4374		ir_emit_store(ctx, type, def, def_reg);
4375	}
4376#else
4377	ir_backend_data *data = ctx->data;
4378	dasm_State **Dst = &data->dasm_state;
4379	ir_type type = insn->type;
4380	ir_reg def_reg = ctx->regs[def][0];
4381	ir_reg op2_reg = ctx->regs[def][2];
4382	ir_reg tmp_reg = ctx->regs[def][3];
4383	int32_t offset;
4384
4385	IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE);
4386	if (op2_reg != IR_REG_NONE) {
4387		if (IR_REG_SPILLED(op2_reg)) {
4388			op2_reg = IR_REG_NUM(op2_reg);
4389			ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
4390		}
4391		offset = 0;
4392	} else {
4393		IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA);
4394		op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
4395		offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3);
4396	}
4397	if (IR_IS_TYPE_INT(type)) {
4398		|	ldr Rw(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, gr_offset))]
4399		|	cmp Rw(tmp_reg), wzr
4400		|	bge >1
4401		|	ldr Rx(IR_REG_INT_TMP), [Rx(op2_reg), #(offset+offsetof(ir_va_list, gr_top))]
4402		|	sxtw Rx(tmp_reg), Rw(tmp_reg)
4403		|	add Rx(IR_REG_INT_TMP), Rx(tmp_reg), Rx(IR_REG_INT_TMP)
4404		|	ldr Rx(def_reg), [Rx(IR_REG_INT_TMP)]
4405		|	add Rw(tmp_reg), Rw(tmp_reg), #sizeof(void*)
4406		|	str Rw(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, gr_offset))]
4407		|	b >2
4408		|1:
4409		|	ldr Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, stack))]
4410		|	ldr Rx(def_reg), [Rx(tmp_reg)]
4411		|	add Rx(tmp_reg), Rx(tmp_reg), #sizeof(void*)
4412		|	str Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, stack))]
4413		|2:
4414	} else {
4415		|	ldr Rw(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, vr_offset))]
4416		|	cmp Rw(tmp_reg), wzr
4417		|	bge >1
4418		|	ldr Rx(IR_REG_INT_TMP), [Rx(op2_reg), #(offset+offsetof(ir_va_list, vr_top))]
4419		|	sxtw Rx(tmp_reg), Rw(tmp_reg)
4420		|	add Rx(IR_REG_INT_TMP), Rx(tmp_reg), Rx(IR_REG_INT_TMP)
4421		|	ldr Rd(def_reg-IR_REG_FP_FIRST), [Rx(IR_REG_INT_TMP)]
4422		|	add Rw(tmp_reg), Rw(tmp_reg), #16
4423		|	str Rw(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, vr_offset))]
4424		|	b >2
4425		|1:
4426		|	ldr Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, stack))]
4427		|	ldr Rd(def_reg-IR_REG_FP_FIRST), [Rx(tmp_reg)]
4428		|	add Rx(tmp_reg), Rx(tmp_reg), #sizeof(void*)
4429		|	str Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, stack))]
4430		|2:
4431	}
4432	if (IR_REG_SPILLED(ctx->regs[def][0])) {
4433		ir_emit_store(ctx, type, def, def_reg);
4434	}
4435#endif
4436}
4437
4438static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
4439{
4440	ir_backend_data *data = ctx->data;
4441	dasm_State **Dst = &data->dasm_state;
4442	ir_type type;
4443	ir_block *bb;
4444	ir_insn *use_insn, *val;
4445	uint32_t n, *p, use_block;
4446	int i;
4447	int label, default_label = 0;
4448	int count = 0;
4449	ir_val min, max;
4450	ir_reg op1_reg, op2_reg, tmp_reg;
4451
4452	type = ctx->ir_base[insn->op2].type;
4453	if (IR_IS_TYPE_SIGNED(type)) {
4454		min.u64 = 0x7fffffffffffffff;
4455		max.u64 = 0x8000000000000000;
4456	} else {
4457		min.u64 = 0xffffffffffffffff;
4458		max.u64 = 0x0;
4459	}
4460
4461	bb = &ctx->cfg_blocks[b];
4462	p = &ctx->cfg_edges[bb->successors];
4463	for (n = bb->successors_count; n != 0; p++, n--) {
4464		use_block = *p;
4465		use_insn = &ctx->ir_base[ctx->cfg_blocks[use_block].start];
4466		if (use_insn->op == IR_CASE_VAL) {
4467			val = &ctx->ir_base[use_insn->op2];
4468			IR_ASSERT(!IR_IS_SYM_CONST(val->op));
4469			if (IR_IS_TYPE_SIGNED(type)) {
4470				IR_ASSERT(IR_IS_TYPE_SIGNED(val->type));
4471				min.i64 = IR_MIN(min.i64, val->val.i64);
4472				max.i64 = IR_MAX(max.i64, val->val.i64);
4473			} else {
4474				IR_ASSERT(!IR_IS_TYPE_SIGNED(val->type));
4475				min.u64 = (int64_t)IR_MIN(min.u64, val->val.u64);
4476				max.u64 = (int64_t)IR_MAX(max.u64, val->val.u64);
4477			}
4478			count++;
4479		} else {
4480			IR_ASSERT(use_insn->op == IR_CASE_DEFAULT);
4481			default_label = ir_skip_empty_target_blocks(ctx, use_block);
4482		}
4483	}
4484
4485	op1_reg = ctx->regs[def][1];
4486	op2_reg = ctx->regs[def][2];
4487	tmp_reg = ctx->regs[def][3];
4488
4489	IR_ASSERT(op2_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE);
4490	if (IR_REG_SPILLED(op2_reg)) {
4491		op2_reg = IR_REG_NUM(op2_reg);
4492		ir_emit_load(ctx, type, op2_reg, insn->op2);
4493	}
4494
4495	/* Generate a table jmp or a sequence of calls */
4496	if (count > 2 && (max.i64-min.i64) < count * 8) {
4497		int *labels = ir_mem_malloc(sizeof(int) * (max.i64 - min.i64 + 1));
4498
4499		for (i = 0; i <= (max.i64 - min.i64); i++) {
4500			labels[i] = default_label;
4501		}
4502		p = &ctx->cfg_edges[bb->successors];
4503		for (n = bb->successors_count; n != 0; p++, n--) {
4504			use_block = *p;
4505			use_insn = &ctx->ir_base[ctx->cfg_blocks[use_block].start];
4506			if (use_insn->op == IR_CASE_VAL) {
4507				val = &ctx->ir_base[use_insn->op2];
4508				IR_ASSERT(!IR_IS_SYM_CONST(val->op));
4509				label = ir_skip_empty_target_blocks(ctx, use_block);
4510				labels[val->val.i64 - min.i64] = label;
4511			}
4512		}
4513
4514		if (default_label) {
4515			if (aarch64_may_encode_imm12(max.i64)) {
4516				|	ASM_REG_IMM_OP cmp, type, op2_reg, max.i64
4517			} else {
4518				ir_emit_load_imm_int(ctx, type, tmp_reg, max.i64);
4519				|	ASM_REG_REG_OP cmp, type, op2_reg, tmp_reg
4520			}
4521			if (IR_IS_TYPE_SIGNED(type)) {
4522				|	bgt =>default_label
4523			} else {
4524				|	bhi =>default_label
4525			}
4526		}
4527
4528		if (op1_reg == IR_REG_NONE) {
4529			op1_reg = op2_reg;
4530		}
4531		if (aarch64_may_encode_imm12(min.i64)) {
4532			|	ASM_REG_REG_IMM_OP subs, type, op1_reg, op2_reg, min.i64
4533		} else {
4534			ir_emit_load_imm_int(ctx, type, tmp_reg, min.i64);
4535			|	ASM_REG_REG_REG_OP subs, type, op1_reg, op2_reg, tmp_reg
4536		}
4537
4538		if (default_label) {
4539			if (IR_IS_TYPE_SIGNED(type)) {
4540				|	blt =>default_label
4541			} else {
4542				|	blo =>default_label
4543			}
4544		}
4545
4546		|	adr Rx(tmp_reg), >1
4547		|	ldr Rx(tmp_reg), [Rx(tmp_reg), Rx(op1_reg), lsl #3]
4548		|	br Rx(tmp_reg)
4549		|.jmp_table
4550		if (!data->jmp_table_label) {
4551			data->jmp_table_label = ctx->cfg_blocks_count + ctx->consts_count + 3;
4552			|=>data->jmp_table_label:
4553		}
4554		|.align 8
4555		|1:
4556		for (i = 0; i <= (max.i64 - min.i64); i++) {
4557			int b = labels[i];
4558			if (b) {
4559				ir_block *bb = &ctx->cfg_blocks[b];
4560				ir_insn *insn = &ctx->ir_base[bb->end];
4561
4562				if (insn->op == IR_IJMP && IR_IS_CONST_REF(insn->op2)) {
4563					ir_ref prev = ctx->prev_ref[bb->end];
4564					if (prev != bb->start && ctx->ir_base[prev].op == IR_SNAPSHOT) {
4565						prev = ctx->prev_ref[prev];
4566					}
4567					if (prev == bb->start) {
4568						void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]);
4569
4570						|	.addr &addr
4571						if (ctx->ir_base[bb->start].op != IR_CASE_DEFAULT) {
4572							bb->flags |= IR_BB_EMPTY;
4573						}
4574						continue;
4575					}
4576				}
4577				|	.addr =>b
4578			} else {
4579				|	.addr 0
4580			}
4581		}
4582		|.code
4583		ir_mem_free(labels);
4584	} else {
4585		p = &ctx->cfg_edges[bb->successors];
4586		for (n = bb->successors_count; n != 0; p++, n--) {
4587			use_block = *p;
4588			use_insn = &ctx->ir_base[ctx->cfg_blocks[use_block].start];
4589			if (use_insn->op == IR_CASE_VAL) {
4590				val = &ctx->ir_base[use_insn->op2];
4591				IR_ASSERT(!IR_IS_SYM_CONST(val->op));
4592				label = ir_skip_empty_target_blocks(ctx, use_block);
4593				if (aarch64_may_encode_imm12(val->val.i64)) {
4594					|	ASM_REG_IMM_OP cmp, type, op2_reg, val->val.i64
4595				} else {
4596					ir_emit_load_imm_int(ctx, type, tmp_reg, val->val.i64);
4597					|	ASM_REG_REG_OP cmp, type, op2_reg, tmp_reg
4598
4599				}
4600				|	beq =>label
4601			}
4602		}
4603		if (default_label) {
4604			|	b =>default_label
4605		}
4606	}
4607}
4608
4609static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn)
4610{
4611	int j, n;
4612	ir_type type;
4613	int int_param = 0;
4614	int fp_param = 0;
4615	int int_reg_params_count = IR_REG_INT_ARGS;
4616	int fp_reg_params_count = IR_REG_FP_ARGS;
4617	int32_t used_stack = 0;
4618#ifdef __APPLE__
4619	const ir_proto_t *proto = ir_call_proto(ctx, insn);
4620	int last_named_input = (proto && (proto->flags & IR_VARARG_FUNC)) ? proto->params_count + 2 : insn->inputs_count;
4621#endif
4622
4623	n = insn->inputs_count;
4624	for (j = 3; j <= n; j++) {
4625		type = ctx->ir_base[ir_insn_op(insn, j)].type;
4626#ifdef __APPLE__
4627		if (j > last_named_input) {
4628			used_stack += IR_MAX(sizeof(void*), ir_type_size[type]);
4629		} else
4630#endif
4631		if (IR_IS_TYPE_INT(type)) {
4632			if (int_param >= int_reg_params_count) {
4633				used_stack += IR_MAX(sizeof(void*), ir_type_size[type]);
4634			}
4635			int_param++;
4636		} else {
4637			IR_ASSERT(IR_IS_TYPE_FP(type));
4638			if (fp_param >= fp_reg_params_count) {
4639				used_stack += IR_MAX(sizeof(void*), ir_type_size[type]);
4640			}
4641			fp_param++;
4642		}
4643	}
4644
4645	return used_stack;
4646}
4647
4648static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg tmp_reg)
4649{
4650	ir_backend_data *data = ctx->data;
4651	dasm_State **Dst = &data->dasm_state;
4652	int j, n;
4653	ir_ref arg;
4654	ir_insn *arg_insn;
4655	uint8_t type;
4656	ir_reg src_reg, dst_reg;
4657	int int_param = 0;
4658	int fp_param = 0;
4659	int count = 0;
4660	int int_reg_params_count = IR_REG_INT_ARGS;
4661	int fp_reg_params_count = IR_REG_FP_ARGS;
4662	const int8_t *int_reg_params = _ir_int_reg_params;
4663	const int8_t *fp_reg_params = _ir_fp_reg_params;
4664	int32_t used_stack, stack_offset = 0;
4665	ir_copy *copies;
4666	bool do_pass3 = 0;
4667	/* For temporaries we may use any scratch registers except for registers used for parameters */
4668	ir_reg tmp_fp_reg = IR_REG_FP_LAST; /* Temporary register for FP loads and swap */
4669
4670	n = insn->inputs_count;
4671	if (n < 3) {
4672		return 0;
4673	}
4674
4675	if (tmp_reg == IR_REG_NONE) {
4676		tmp_reg = IR_REG_IP0;
4677	}
4678
4679	if (insn->op == IR_CALL && (ctx->flags & IR_PREALLOCATED_STACK)) {
4680		// TODO: support for preallocated stack
4681		used_stack = 0;
4682	} else {
4683		used_stack = ir_call_used_stack(ctx, insn);
4684		/* Stack must be 16 byte aligned */
4685		used_stack = IR_ALIGNED_SIZE(used_stack, 16);
4686		if (ctx->fixed_call_stack_size && used_stack <= ctx->fixed_call_stack_size) {
4687			used_stack = 0;
4688		} else {
4689			ctx->call_stack_size += used_stack;
4690			if (used_stack) {
4691				if (insn->op == IR_TAILCALL && !(ctx->flags & IR_USE_FRAME_POINTER)) {
4692					ctx->flags |= IR_USE_FRAME_POINTER;
4693					|	stp x29, x30, [sp, # (-(ctx->stack_frame_size+16))]!
4694					|	mov x29, sp
4695				}
4696				|	sub sp, sp, #used_stack
4697			}
4698		}
4699	}
4700
4701#ifdef __APPLE__
4702	const ir_proto_t *proto = ir_call_proto(ctx, insn);
4703	int last_named_input = (proto && (proto->flags & IR_VARARG_FUNC)) ? proto->params_count + 2 : insn->inputs_count;
4704#endif
4705
4706	/* 1. move all register arguments that should be passed through stack
4707	 *    and collect arguments that should be passed through registers */
4708	copies = ir_mem_malloc((n - 2) * sizeof(ir_copy));
4709	for (j = 3; j <= n; j++) {
4710		arg = ir_insn_op(insn, j);
4711		src_reg = ir_get_alocated_reg(ctx, def, j);
4712		arg_insn = &ctx->ir_base[arg];
4713		type = arg_insn->type;
4714#ifdef __APPLE__
4715		if (j > last_named_input) {
4716			dst_reg = IR_REG_NONE; /* pass argument through stack */
4717		} else
4718#endif
4719		if (IR_IS_TYPE_INT(type)) {
4720			if (int_param < int_reg_params_count) {
4721				dst_reg = int_reg_params[int_param];
4722			} else {
4723				dst_reg = IR_REG_NONE; /* pass argument through stack */
4724			}
4725			int_param++;
4726		} else {
4727			IR_ASSERT(IR_IS_TYPE_FP(type));
4728			if (fp_param < fp_reg_params_count) {
4729				dst_reg = fp_reg_params[fp_param];
4730			} else {
4731				dst_reg = IR_REG_NONE; /* pass argument through stack */
4732			}
4733			fp_param++;
4734		}
4735		if (dst_reg != IR_REG_NONE) {
4736			if (src_reg == IR_REG_NONE) {
4737				/* delay CONST->REG and MEM->REG moves to third pass */
4738				do_pass3 = 1;
4739			} else {
4740				IR_ASSERT(src_reg != IR_REG_NONE);
4741				if (IR_REG_SPILLED(src_reg)) {
4742					src_reg = IR_REG_NUM(src_reg);
4743					ir_emit_load(ctx, type, src_reg, arg);
4744				}
4745				if (src_reg != dst_reg) {
4746					/* delay REG->REG moves to second pass */
4747					copies[count].type = type;
4748					copies[count].from = src_reg;
4749					copies[count].to = dst_reg;
4750					count++;
4751				}
4752			}
4753		} else {
4754			/* Pass register arguments to stack (REG->MEM moves) */
4755			if (!IR_IS_CONST_REF(arg) && src_reg != IR_REG_NONE && !IR_REG_SPILLED(src_reg)) {
4756				ir_emit_store_mem(ctx, type, IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset), src_reg);
4757			} else {
4758				do_pass3 = 1;
4759			}
4760			stack_offset += IR_MAX(sizeof(void*), ir_type_size[type]);
4761		}
4762	}
4763
4764	/* 2. move all arguments that should be passed from one register to another (REG->REG movs) */
4765	if (count) {
4766		ir_parallel_copy(ctx, copies, count, tmp_reg, tmp_fp_reg);
4767	}
4768	ir_mem_free(copies);
4769
4770	/* 3. move the remaining memory and immediate values */
4771	if (do_pass3) {
4772		stack_offset = 0;
4773		int_param = 0;
4774		fp_param = 0;
4775		for (j = 3; j <= n; j++) {
4776			arg = ir_insn_op(insn, j);
4777			src_reg = ir_get_alocated_reg(ctx, def, j);
4778			arg_insn = &ctx->ir_base[arg];
4779			type = arg_insn->type;
4780#ifdef __APPLE__
4781			if (j > last_named_input) {
4782				dst_reg = IR_REG_NONE; /* pass argument through stack */
4783			} else
4784#endif
4785			if (IR_IS_TYPE_INT(type)) {
4786				if (int_param < int_reg_params_count) {
4787					dst_reg = int_reg_params[int_param];
4788				} else {
4789					dst_reg = IR_REG_NONE; /* argument already passed through stack */
4790				}
4791				int_param++;
4792			} else {
4793				IR_ASSERT(IR_IS_TYPE_FP(type));
4794				if (fp_param < fp_reg_params_count) {
4795					dst_reg = fp_reg_params[fp_param];
4796				} else {
4797					dst_reg = IR_REG_NONE; /* argument already passed through stack */
4798				}
4799				fp_param++;
4800			}
4801			if (dst_reg != IR_REG_NONE) {
4802				if (src_reg == IR_REG_NONE) {
4803					if (IR_IS_CONST_REF(arg) && IR_IS_TYPE_INT(type)) {
4804						if (ir_type_size[type] == 1) {
4805							type = IR_ADDR;
4806						}
4807					}
4808					ir_emit_load(ctx, type, dst_reg, arg);
4809				}
4810			} else {
4811				if (IR_IS_TYPE_INT(type)) {
4812					if (src_reg == IR_REG_NONE) {
4813						IR_ASSERT(tmp_reg != IR_REG_NONE);
4814						ir_emit_load(ctx, type, tmp_reg, arg);
4815						if (IR_IS_CONST_REF(arg)) {
4816							type = IR_ADDR; //TODO: ???
4817						}
4818						ir_emit_store_mem_int(ctx, type, IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset), tmp_reg);
4819					} else if (IR_REG_SPILLED(src_reg)) {
4820						src_reg = IR_REG_NUM(src_reg);
4821						ir_emit_load(ctx, type, src_reg, arg);
4822						ir_emit_store_mem_int(ctx, type, IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset), src_reg);
4823					}
4824				} else {
4825					if (src_reg == IR_REG_NONE) {
4826						IR_ASSERT(tmp_fp_reg != IR_REG_NONE);
4827						ir_emit_load(ctx, type, tmp_fp_reg, arg);
4828						ir_emit_store_mem_fp(ctx, IR_DOUBLE, IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset), tmp_fp_reg);
4829					} else if (IR_REG_SPILLED(src_reg)) {
4830						src_reg = IR_REG_NUM(src_reg);
4831						ir_emit_load(ctx, type, src_reg, arg);
4832						ir_emit_store_mem_fp(ctx, type, IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset), src_reg);
4833					}
4834				}
4835				stack_offset += IR_MAX(sizeof(void*), ir_type_size[type]);
4836			}
4837		}
4838	}
4839	return used_stack;
4840}
4841
4842static void ir_emit_call_ex(ir_ctx *ctx, ir_ref def, ir_insn *insn, int32_t used_stack)
4843{
4844	ir_backend_data *data = ctx->data;
4845	dasm_State **Dst = &data->dasm_state;
4846	ir_reg def_reg;
4847
4848	if (IR_IS_CONST_REF(insn->op2)) {
4849		void *addr = ir_call_addr(ctx, insn, &ctx->ir_base[insn->op2]);
4850
4851		if (aarch64_may_use_b(ctx->code_buffer, addr)) {
4852			|	bl &addr
4853		} else {
4854			ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr);
4855			|	blr Rx(IR_REG_INT_TMP)
4856		}
4857    } else {
4858		ir_reg op2_reg = ctx->regs[def][2];
4859
4860		IR_ASSERT(op2_reg != IR_REG_NONE);
4861		if (IR_REG_SPILLED(op2_reg)) {
4862			op2_reg = IR_REG_NUM(op2_reg);
4863			ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
4864		}
4865		|	blr Rx(op2_reg)
4866    }
4867
4868	if (used_stack) {
4869		|	add sp, sp, #used_stack
4870		ctx->call_stack_size -= used_stack;
4871	}
4872
4873	if (insn->type != IR_VOID) {
4874		if (IR_IS_TYPE_INT(insn->type)) {
4875			def_reg = IR_REG_NUM(ctx->regs[def][0]);
4876			if (def_reg != IR_REG_NONE) {
4877				if (def_reg != IR_REG_INT_RET1) {
4878					ir_emit_mov(ctx, insn->type, def_reg, IR_REG_INT_RET1);
4879				}
4880				if (IR_REG_SPILLED(ctx->regs[def][0])) {
4881					ir_emit_store(ctx, insn->type, def, def_reg);
4882				}
4883			} else if (ctx->use_lists[def].count > 1) {
4884				ir_emit_store(ctx, insn->type, def, IR_REG_INT_RET1);
4885			}
4886		} else {
4887			IR_ASSERT(IR_IS_TYPE_FP(insn->type));
4888			def_reg = IR_REG_NUM(ctx->regs[def][0]);
4889			if (def_reg != IR_REG_NONE) {
4890				if (def_reg != IR_REG_FP_RET1) {
4891					ir_emit_fp_mov(ctx, insn->type, def_reg, IR_REG_FP_RET1);
4892				}
4893				if (IR_REG_SPILLED(ctx->regs[def][0])) {
4894					ir_emit_store(ctx, insn->type, def, def_reg);
4895				}
4896			} else if (ctx->use_lists[def].count > 1) {
4897				ir_emit_store(ctx, insn->type, def, IR_REG_FP_RET1);
4898			}
4899		}
4900	}
4901}
4902
4903static void ir_emit_call(ir_ctx *ctx, ir_ref def, ir_insn *insn)
4904{
4905	int32_t used_stack = ir_emit_arguments(ctx, def, insn, ctx->regs[def][1]);
4906	ir_emit_call_ex(ctx, def, insn, used_stack);
4907}
4908
4909static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn)
4910{
4911	ir_backend_data *data = ctx->data;
4912	dasm_State **Dst = &data->dasm_state;
4913	int32_t used_stack = ir_emit_arguments(ctx, def, insn, ctx->regs[def][1]);
4914
4915	if (used_stack != 0) {
4916		ir_emit_call_ex(ctx, def, insn, used_stack);
4917		ir_emit_return_void(ctx);
4918		return;
4919	}
4920
4921	ir_emit_epilogue(ctx);
4922
4923	if (IR_IS_CONST_REF(insn->op2)) {
4924		void *addr = ir_call_addr(ctx, insn, &ctx->ir_base[insn->op2]);
4925
4926		if (aarch64_may_use_b(ctx->code_buffer, addr)) {
4927			|	b &addr
4928		} else {
4929			ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr);
4930			|	br Rx(IR_REG_INT_TMP)
4931		}
4932    } else {
4933		ir_reg op2_reg = ctx->regs[def][2];
4934
4935		IR_ASSERT(op2_reg != IR_REG_NONE);
4936		if (IR_REG_SPILLED(op2_reg)) {
4937			op2_reg = IR_REG_NUM(op2_reg);
4938			ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
4939		}
4940		|	br Rx(op2_reg)
4941    }
4942}
4943
4944static void ir_emit_ijmp(ir_ctx *ctx, ir_ref def, ir_insn *insn)
4945{
4946	ir_backend_data *data = ctx->data;
4947	dasm_State **Dst = &data->dasm_state;
4948	ir_reg op2_reg = ctx->regs[def][2];
4949
4950	if (op2_reg != IR_REG_NONE) {
4951		if (IR_REG_SPILLED(op2_reg)) {
4952			op2_reg = IR_REG_NUM(op2_reg);
4953			ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2);
4954		}
4955		|	br Rx(op2_reg)
4956	} else if (IR_IS_CONST_REF(insn->op2)) {
4957		void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]);
4958
4959		if (aarch64_may_use_b(ctx->code_buffer, addr)) {
4960			|	b &addr
4961		} else {
4962			ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr);
4963			|	br Rx(IR_REG_INT_TMP)
4964		}
4965	} else {
4966		IR_ASSERT(0);
4967	}
4968}
4969
4970static void ir_emit_guard(ir_ctx *ctx, ir_ref def, ir_insn *insn)
4971{
4972	ir_backend_data *data = ctx->data;
4973	dasm_State **Dst = &data->dasm_state;
4974	ir_reg op2_reg = ctx->regs[def][2];
4975	ir_type type = ctx->ir_base[insn->op2].type;
4976
4977	IR_ASSERT(IR_IS_TYPE_INT(type));
4978	if (IR_IS_CONST_REF(insn->op2)) {
4979		bool is_true = ir_ref_is_true(ctx, insn->op2);
4980
4981		if ((insn->op == IR_GUARD && !is_true) || (insn->op == IR_GUARD_NOT && is_true)) {
4982			if (IR_IS_CONST_REF(insn->op3)) {
4983				void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]);
4984
4985				if (aarch64_may_use_b(ctx->code_buffer, addr)) {
4986					|	b &addr
4987				} else {
4988					ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr);
4989					|	br Rx(IR_REG_INT_TMP)
4990				}
4991			} else {
4992				IR_ASSERT(0);
4993			}
4994		}
4995		return;
4996	}
4997
4998	IR_ASSERT(op2_reg != IR_REG_NONE);
4999	if (IR_REG_SPILLED(op2_reg)) {
5000		op2_reg = IR_REG_NUM(op2_reg);
5001		ir_emit_load(ctx, type, op2_reg, insn->op2);
5002	}
5003
5004	if (IR_IS_CONST_REF(insn->op3)) {
5005		void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]);
5006
5007		if (insn->op == IR_GUARD) {
5008			if (ir_type_size[type] == 8) {
5009				|	cbz Rx(op2_reg), &addr
5010			} else {
5011				|	cbz Rw(op2_reg), &addr
5012			}
5013		} else {
5014			if (ir_type_size[type] == 8) {
5015				|	cbnz Rx(op2_reg), &addr
5016			} else {
5017				|	cbnz Rw(op2_reg), &addr
5018			}
5019		}
5020	} else {
5021		IR_ASSERT(0);
5022	}
5023}
5024
5025static void ir_emit_guard_jz(ir_ctx *ctx, uint8_t op, void *addr, ir_type type, ir_reg reg)
5026{
5027	ir_backend_data *data = ctx->data;
5028	dasm_State **Dst = &data->dasm_state;
5029
5030	if (op == IR_EQ) {
5031		if (ir_type_size[type] == 8) {
5032			|	cbnz Rx(reg), &addr
5033		} else {
5034			|	cbnz Rw(reg), &addr
5035		}
5036	} else {
5037		IR_ASSERT(op == IR_NE);
5038		if (ir_type_size[type] == 8) {
5039			|	cbz Rx(reg), &addr
5040		} else {
5041			|	cbz Rw(reg), &addr
5042		}
5043	}
5044}
5045
5046static void ir_emit_guard_jcc(ir_ctx *ctx, uint8_t op, void *addr, bool int_cmp)
5047{
5048	ir_backend_data *data = ctx->data;
5049	dasm_State **Dst = &data->dasm_state;
5050
5051	if (int_cmp) {
5052		switch (op) {
5053			default:
5054				IR_ASSERT(0 && "NIY binary op");
5055			case IR_EQ:
5056				|	beq &addr
5057				break;
5058			case IR_NE:
5059				|	bne &addr
5060				break;
5061			case IR_LT:
5062				|	blt &addr
5063				break;
5064			case IR_GE:
5065				|	bge &addr
5066				break;
5067			case IR_LE:
5068				|	ble &addr
5069				break;
5070			case IR_GT:
5071				|	bgt &addr
5072				break;
5073			case IR_ULT:
5074				|	blo &addr
5075				break;
5076			case IR_UGE:
5077				|	bhs &addr
5078				break;
5079			case IR_ULE:
5080				|	bls &addr
5081				break;
5082			case IR_UGT:
5083				|	bhi &addr
5084				break;
5085		}
5086	} else {
5087		switch (op) {
5088			default:
5089				IR_ASSERT(0 && "NIY binary op");
5090			case IR_EQ:
5091				|	beq &addr
5092				break;
5093			case IR_NE:
5094				|	bne &addr
5095				break;
5096			case IR_LT:
5097				|	bmi &addr
5098				break;
5099			case IR_GE:
5100				|	bge &addr
5101				break;
5102			case IR_LE:
5103				|	bls &addr
5104				break;
5105			case IR_GT:
5106				|	bgt &addr
5107				break;
5108//			case IR_ULT: fprintf(stderr, "\tjb .LL%d\n", true_block); break;
5109//			case IR_UGE: fprintf(stderr, "\tjae .LL%d\n", true_block); break;
5110//			case IR_ULE: fprintf(stderr, "\tjbe .LL%d\n", true_block); break;
5111//			case IR_UGT: fprintf(stderr, "\tja .LL%d\n", true_block); break;
5112		}
5113	}
5114}
5115
5116static void ir_emit_guard_cmp_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
5117{
5118	ir_backend_data *data = ctx->data;
5119	dasm_State **Dst = &data->dasm_state;
5120	ir_insn *cmp_insn = &ctx->ir_base[insn->op2];
5121	ir_op op = cmp_insn->op;
5122	ir_type type = ctx->ir_base[cmp_insn->op1].type;
5123	ir_ref op1 = cmp_insn->op1;
5124	ir_ref op2 = cmp_insn->op2;
5125	ir_reg op1_reg = ctx->regs[insn->op2][1];
5126	ir_reg op2_reg = ctx->regs[insn->op2][2];
5127	void *addr;
5128
5129	if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) {
5130		op1_reg = IR_REG_NUM(op1_reg);
5131		ir_emit_load(ctx, type, op1_reg, op1);
5132	}
5133	if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) {
5134		op2_reg = IR_REG_NUM(op2_reg);
5135		if (op1 != op2) {
5136			ir_emit_load(ctx, type, op2_reg, op2);
5137		}
5138	}
5139
5140	addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]);
5141
5142	if (IR_IS_CONST_REF(op2)
5143	 && !IR_IS_SYM_CONST(ctx->ir_base[op2].op)
5144	 && ctx->ir_base[op2].val.u64 == 0) {
5145		if (op == IR_ULT) {
5146			/* always false */
5147			if (aarch64_may_use_b(ctx->code_buffer, addr)) {
5148				|	b &addr
5149			} else {
5150				ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr);
5151				|	br Rx(IR_REG_INT_TMP)
5152			}
5153			return;
5154		} else if (op == IR_UGE) {
5155			/* always true */
5156			return;
5157		} else if (op == IR_ULE) {
5158			op = IR_EQ;
5159		} else if (op == IR_UGT) {
5160			op = IR_NE;
5161		}
5162		if (op1_reg != IR_REG_NONE && (op == IR_EQ || op == IR_NE)) {
5163			if (insn->op == IR_GUARD_NOT) {
5164				op ^= 1; // reverse
5165			}
5166			ir_emit_guard_jz(ctx, op, addr, type, op1_reg);
5167			return;
5168		}
5169	}
5170	ir_emit_cmp_int_common(ctx, type, op1_reg, op1, op2_reg, op2);
5171
5172	if (insn->op == IR_GUARD) {
5173		op ^= 1; // reverse
5174	}
5175
5176	ir_emit_guard_jcc(ctx, op, addr, 1);
5177}
5178
5179static void ir_emit_guard_cmp_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn)
5180{
5181	ir_op op = ir_emit_cmp_fp_common(ctx, insn->op2, &ctx->ir_base[insn->op2]);
5182	void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]);
5183
5184	if (insn->op == IR_GUARD) {
5185		op ^= 1; // reverse
5186	}
5187	ir_emit_guard_jcc(ctx, op, addr, 0);
5188}
5189
5190static void ir_emit_guard_overflow(ir_ctx *ctx, ir_ref def, ir_insn *insn)
5191{
5192	ir_backend_data *data = ctx->data;
5193	dasm_State **Dst = &data->dasm_state;
5194	ir_insn *overflow_insn = &ctx->ir_base[insn->op2];
5195	ir_insn *math_insn = &ctx->ir_base[overflow_insn->op1];
5196	ir_type type = math_insn->type;
5197	void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]);
5198
5199	IR_ASSERT(IR_IS_TYPE_INT(type));
5200	if (math_insn->op == IR_MUL_OV) {
5201		if (insn->op == IR_GUARD) {
5202			|	beq &addr
5203		} else {
5204			|	bne &addr
5205		}
5206	} else if (IR_IS_TYPE_SIGNED(type)) {
5207		if (insn->op == IR_GUARD) {
5208			|	bvc &addr
5209		} else {
5210			|	bvs &addr
5211		}
5212	} else {
5213		if (insn->op == IR_GUARD) {
5214			|	bcc &addr
5215		} else {
5216			|	bcs &addr
5217		}
5218	}
5219}
5220
5221static void ir_emit_tls(ir_ctx *ctx, ir_ref def, ir_insn *insn)
5222{
5223	ir_backend_data *data = ctx->data;
5224	dasm_State **Dst = &data->dasm_state;
5225	uint32_t code;
5226	ir_reg reg = IR_REG_NUM(ctx->regs[def][0]);
5227
5228	if (ctx->use_lists[def].count == 1) {
5229		/* dead load */
5230		return;
5231	}
5232
5233||#ifdef __APPLE__
5234||	code = 0xd53bd060 | reg; // TODO: hard-coded: mrs reg, tpidrro_el0
5235|	.long code
5236|	and Rx(reg), Rx(reg), #0xfffffffffffffff8
5237|//???	MEM_ACCESS_64_WITH_UOFFSET_64 ldr, Rx(reg), Rx(reg), #insn->op2, TMP1
5238|//???	MEM_ACCESS_64_WITH_UOFFSET_64 ldr, Rx(reg), Rx(reg), #insn->op3, TMP1
5239||#else
5240||	code = 0xd53bd040 | reg; // TODO: hard-coded: mrs reg, tpidr_el0
5241|	.long code
5242||//???	IR_ASSERT(insn->op2 <= LDR_STR_PIMM64);
5243|	ldr Rx(reg), [Rx(reg), #insn->op2]
5244||#endif
5245	if (IR_REG_SPILLED(ctx->regs[def][0])) {
5246		ir_emit_store(ctx, IR_ADDR, def, reg);
5247	}
5248}
5249
5250static void ir_emit_exitcall(ir_ctx *ctx, ir_ref def, ir_insn *insn)
5251{
5252	ir_backend_data *data = ctx->data;
5253	dasm_State **Dst = &data->dasm_state;
5254	ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]);
5255
5256	IR_ASSERT(def_reg != IR_REG_NONE);
5257
5258	|	stp d30, d31, [sp, #-16]!
5259	|	stp d28, d29, [sp, #-16]!
5260	|	stp d26, d27, [sp, #-16]!
5261	|	stp d24, d25, [sp, #-16]!
5262	|	stp d22, d23, [sp, #-16]!
5263	|	stp d20, d21, [sp, #-16]!
5264	|	stp d18, d19, [sp, #-16]!
5265	|	stp d16, d17, [sp, #-16]!
5266	|	stp d14, d15, [sp, #-16]!
5267	|	stp d12, d13, [sp, #-16]!
5268	|	stp d10, d11, [sp, #-16]!
5269	|	stp d8, d9, [sp, #-16]!
5270	|	stp d6, d7, [sp, #-16]!
5271	|	stp d4, d5, [sp, #-16]!
5272	|	stp d2, d3, [sp, #-16]!
5273	|	stp d0, d1, [sp, #-16]!
5274
5275	|	str x30, [sp, #-16]!
5276	|	stp x28, x29, [sp, #-16]!
5277	|	stp x26, x27, [sp, #-16]!
5278	|	stp x24, x25, [sp, #-16]!
5279	|	stp x22, x23, [sp, #-16]!
5280	|	stp x20, x21, [sp, #-16]!
5281	|	stp x18, x19, [sp, #-16]!
5282	|	stp x16, x17, [sp, #-16]!
5283	|	stp x14, x15, [sp, #-16]!
5284	|	stp x12, x13, [sp, #-16]!
5285	|	stp x10, x11, [sp, #-16]!
5286	|	stp x8, x9, [sp, #-16]!
5287	|	stp x6, x7, [sp, #-16]!
5288	|	stp x4, x5, [sp, #-16]!
5289	|	stp x2, x3, [sp, #-16]!
5290	|	stp x0, x1, [sp, #-16]!
5291
5292	|	mov Rx(IR_REG_INT_ARG2), sp
5293	|	add Rx(IR_REG_INT_ARG1), Rx(IR_REG_INT_ARG2), #(32*8+32*8)
5294	|	str Rx(IR_REG_INT_ARG1), [sp, #(31*8)]
5295	|	mov Rx(IR_REG_INT_ARG1), Rx(IR_REG_INT_TMP)
5296
5297	if (IR_IS_CONST_REF(insn->op2)) {
5298		void *addr = ir_call_addr(ctx, insn, &ctx->ir_base[insn->op2]);
5299
5300		if (aarch64_may_use_b(ctx->code_buffer, addr)) {
5301			|	bl &addr
5302		} else {
5303			ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr);
5304			|	blr Rx(IR_REG_INT_TMP)
5305		}
5306	} else {
5307		IR_ASSERT(0);
5308	}
5309
5310	|	add sp, sp, #(32*8+32*8)
5311
5312	if (def_reg != IR_REG_INT_RET1) {
5313		ir_emit_mov(ctx, insn->type, def_reg, IR_REG_INT_RET1);
5314	}
5315	if (IR_REG_SPILLED(ctx->regs[def][0])) {
5316		ir_emit_store(ctx, insn->type, def, def_reg);
5317	}
5318}
5319
5320static void ir_emit_param_move(ir_ctx *ctx, uint8_t type, ir_reg from_reg, ir_reg to_reg, ir_ref to, int32_t offset)
5321{
5322	ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER;
5323
5324	IR_ASSERT(from_reg != IR_REG_NONE || to_reg != IR_REG_NONE);
5325
5326	if (IR_IS_TYPE_INT(type)) {
5327		if (from_reg != IR_REG_NONE) {
5328			if (to_reg != IR_REG_NONE) {
5329				ir_emit_mov(ctx, type, to_reg, from_reg);
5330			} else {
5331				ir_emit_store(ctx, type, to, from_reg);
5332			}
5333		} else {
5334			ir_emit_load_mem_int(ctx, type, to_reg, IR_MEM_BO(fp, offset));
5335		}
5336	} else {
5337		if (from_reg != IR_REG_NONE) {
5338			if (to_reg != IR_REG_NONE) {
5339				ir_emit_fp_mov(ctx, type, to_reg, from_reg);
5340			} else {
5341				ir_emit_store(ctx, type, to, from_reg);
5342			}
5343		} else {
5344			ir_emit_load_mem_fp(ctx, type, to_reg, IR_MEM_BO(fp, offset));
5345		}
5346	}
5347}
5348
5349static void ir_emit_load_params(ir_ctx *ctx)
5350{
5351	ir_use_list *use_list = &ctx->use_lists[1];
5352	ir_insn *insn;
5353	ir_ref i, n, *p, use;
5354	int int_param_num = 0;
5355	int fp_param_num = 0;
5356	ir_reg src_reg;
5357	ir_reg dst_reg;
5358	// TODO: Calling convention specific
5359	int int_reg_params_count = IR_REG_INT_ARGS;
5360	int fp_reg_params_count = IR_REG_FP_ARGS;
5361	const int8_t *int_reg_params = _ir_int_reg_params;
5362	const int8_t *fp_reg_params = _ir_fp_reg_params;
5363	int32_t stack_offset = 0;
5364
5365	if (ctx->flags & IR_USE_FRAME_POINTER) {
5366		stack_offset = sizeof(void*) * 2; /* skip old frame pointer and return address */
5367	} else {
5368		stack_offset = ctx->stack_frame_size + ctx->call_stack_size;
5369	}
5370	n = use_list->count;
5371	for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) {
5372		use = *p;
5373		insn = &ctx->ir_base[use];
5374		if (insn->op == IR_PARAM) {
5375			if (IR_IS_TYPE_INT(insn->type)) {
5376				if (int_param_num < int_reg_params_count) {
5377					src_reg = int_reg_params[int_param_num];
5378				} else {
5379					src_reg = IR_REG_NONE;
5380				}
5381				int_param_num++;
5382			} else {
5383				if (fp_param_num < fp_reg_params_count) {
5384					src_reg = fp_reg_params[fp_param_num];
5385				} else {
5386					src_reg = IR_REG_NONE;
5387				}
5388				fp_param_num++;
5389			}
5390			if (ctx->vregs[use]) {
5391				dst_reg = IR_REG_NUM(ctx->regs[use][0]);
5392				IR_ASSERT(src_reg != IR_REG_NONE || dst_reg != IR_REG_NONE ||
5393					stack_offset == ctx->live_intervals[ctx->vregs[use]]->stack_spill_pos +
5394						((ctx->flags & IR_USE_FRAME_POINTER) ?
5395							-(ctx->stack_frame_size - ctx->stack_frame_alignment) :
5396							ctx->call_stack_size));
5397				if (src_reg != dst_reg) {
5398					ir_emit_param_move(ctx, insn->type, src_reg, dst_reg, use, stack_offset);
5399				}
5400				if (dst_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[use][0])) {
5401					ir_emit_store(ctx, insn->type, use, dst_reg);
5402				}
5403			}
5404			if (src_reg == IR_REG_NONE) {
5405				if (sizeof(void*) == 8) {
5406					stack_offset += sizeof(void*);
5407				} else {
5408					stack_offset += IR_MAX(sizeof(void*), ir_type_size[insn->type]);
5409				}
5410			}
5411		}
5412	}
5413}
5414
5415static ir_reg ir_get_free_reg(ir_type type, ir_regset available)
5416{
5417	if (IR_IS_TYPE_INT(type)) {
5418		available = IR_REGSET_INTERSECTION(available, IR_REGSET_GP);
5419	} else {
5420		IR_ASSERT(IR_IS_TYPE_FP(type));
5421		available = IR_REGSET_INTERSECTION(available, IR_REGSET_FP);
5422	}
5423	IR_ASSERT(!IR_REGSET_IS_EMPTY(available));
5424	return IR_REGSET_FIRST(available);
5425}
5426
5427static int ir_fix_dessa_tmps(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to)
5428{
5429	ir_backend_data *data = ctx->data;
5430	ir_ref ref = ctx->cfg_blocks[data->dessa_from_block].end;
5431
5432	if (to == 0) {
5433		if (IR_IS_TYPE_INT(type)) {
5434			if (ctx->regs[ref][0] == IR_REG_NONE) {
5435				ctx->regs[ref][0] = IR_REG_X0;
5436			}
5437		} else {
5438			IR_ASSERT(IR_IS_TYPE_FP(type));
5439			if (ctx->regs[ref][1] == IR_REG_NONE) {
5440				ctx->regs[ref][1] = IR_REG_V0;
5441			}
5442		}
5443	} else if (from != 0) {
5444		if (IR_IS_TYPE_INT(type)) {
5445			if (ctx->regs[ref][0] == IR_REG_NONE) {
5446				ctx->regs[ref][0] = IR_REG_X0;
5447			}
5448		} else {
5449			IR_ASSERT(IR_IS_TYPE_FP(type));
5450			if (ctx->regs[ref][1] == IR_REG_NONE) {
5451				ctx->regs[ref][1] = IR_REG_V0;
5452			}
5453		}
5454	}
5455	return 1;
5456}
5457
5458static void ir_fix_param_spills(ir_ctx *ctx)
5459{
5460	ir_use_list *use_list = &ctx->use_lists[1];
5461	ir_insn *insn;
5462	ir_ref i, n, *p, use;
5463	int int_param_num = 0;
5464	int fp_param_num = 0;
5465	ir_reg src_reg;
5466	// TODO: Calling convention specific
5467	int int_reg_params_count = IR_REG_INT_ARGS;
5468	int fp_reg_params_count = IR_REG_FP_ARGS;
5469	const int8_t *int_reg_params = _ir_int_reg_params;
5470	const int8_t *fp_reg_params = _ir_fp_reg_params;
5471	int32_t stack_offset = 0;
5472	int32_t param_stack_size = 0;
5473
5474	if (ctx->flags & IR_USE_FRAME_POINTER) {
5475		/* skip old frame pointer and return address */
5476		stack_offset = sizeof(void*) * 2 + (ctx->stack_frame_size - ctx->stack_frame_alignment);
5477	} else {
5478		stack_offset = ctx->stack_frame_size;
5479	}
5480	n = use_list->count;
5481	for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) {
5482		use = *p;
5483		insn = &ctx->ir_base[use];
5484		if (insn->op == IR_PARAM) {
5485			if (IR_IS_TYPE_INT(insn->type)) {
5486				if (int_param_num < int_reg_params_count) {
5487					src_reg = int_reg_params[int_param_num];
5488				} else {
5489					src_reg = IR_REG_NONE;
5490				}
5491				int_param_num++;
5492			} else {
5493				if (fp_param_num < fp_reg_params_count) {
5494					src_reg = fp_reg_params[fp_param_num];
5495				} else {
5496					src_reg = IR_REG_NONE;
5497				}
5498				fp_param_num++;
5499			}
5500			if (src_reg == IR_REG_NONE) {
5501				if (ctx->vregs[use]) {
5502					ir_live_interval *ival = ctx->live_intervals[ctx->vregs[use]];
5503					if ((ival->flags & IR_LIVE_INTERVAL_MEM_PARAM)
5504					 && ival->stack_spill_pos == -1
5505					 && (ival->next || ival->reg == IR_REG_NONE)) {
5506						ival->stack_spill_pos = stack_offset;
5507					}
5508				}
5509				if (sizeof(void*) == 8) {
5510					stack_offset += sizeof(void*);
5511					param_stack_size += sizeof(void*);
5512				} else {
5513					stack_offset += IR_MAX(sizeof(void*), ir_type_size[insn->type]);
5514					param_stack_size += IR_MAX(sizeof(void*), ir_type_size[insn->type]);
5515				}
5516			}
5517		}
5518	}
5519
5520	ctx->gp_reg_params = IR_MIN(int_param_num, int_reg_params_count);
5521	ctx->fp_reg_params = IR_MIN(fp_param_num, fp_reg_params_count);
5522	ctx->param_stack_size = param_stack_size;
5523}
5524
5525static void ir_allocate_unique_spill_slots(ir_ctx *ctx)
5526{
5527	uint32_t b;
5528	ir_block *bb;
5529	ir_insn *insn;
5530	ir_ref i, n, j, *p;
5531	uint32_t *rule, insn_flags;
5532	ir_backend_data *data = ctx->data;
5533	ir_regset available = 0;
5534	ir_target_constraints constraints;
5535	uint32_t def_flags;
5536	ir_reg reg;
5537
5538	ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count);
5539	memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count);
5540
5541	/* vregs + tmp + fixed + SRATCH + ALL */
5542	ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 2, sizeof(ir_live_interval*));
5543
5544    if (!ctx->arena) {
5545		ctx->arena = ir_arena_create(16 * 1024);
5546	}
5547
5548	for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) {
5549		IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
5550		for (i = bb->start, insn = ctx->ir_base + i, rule = ctx->rules + i; i <= bb->end;) {
5551			switch (ctx->rules ? *rule : insn->op) {
5552				case IR_START:
5553				case IR_BEGIN:
5554				case IR_END:
5555				case IR_IF_TRUE:
5556				case IR_IF_FALSE:
5557				case IR_CASE_VAL:
5558				case IR_CASE_DEFAULT:
5559				case IR_MERGE:
5560				case IR_LOOP_BEGIN:
5561				case IR_LOOP_END:
5562					break;
5563				default:
5564					def_flags = ir_get_target_constraints(ctx, i, &constraints);
5565					if (ctx->rules
5566					 && *rule != IR_CMP_AND_BRANCH_INT
5567					 && *rule != IR_CMP_AND_BRANCH_FP
5568					 && *rule != IR_GUARD_CMP_INT
5569					 && *rule != IR_GUARD_CMP_FP) {
5570						available = IR_REGSET_SCRATCH;
5571					}
5572					if (ctx->vregs[i]) {
5573						reg = constraints.def_reg;
5574						if (reg != IR_REG_NONE && IR_REGSET_IN(available, reg)) {
5575							IR_REGSET_EXCL(available, reg);
5576							ctx->regs[i][0] = reg | IR_REG_SPILL_STORE;
5577						} else if (def_flags & IR_USE_MUST_BE_IN_REG) {
5578							if (insn->op == IR_VLOAD
5579							 && ctx->live_intervals[ctx->vregs[i]]
5580							 && ctx->live_intervals[ctx->vregs[i]]->stack_spill_pos != -1) {
5581								/* pass */
5582							} else if (insn->op != IR_PARAM) {
5583								reg = ir_get_free_reg(insn->type, available);
5584								IR_REGSET_EXCL(available, reg);
5585								ctx->regs[i][0] = reg | IR_REG_SPILL_STORE;
5586							}
5587						}
5588						if (!ctx->live_intervals[ctx->vregs[i]]) {
5589							ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval));
5590							memset(ival, 0, sizeof(ir_live_interval));
5591							ctx->live_intervals[ctx->vregs[i]] = ival;
5592							ival->type = insn->type;
5593							ival->reg = IR_REG_NONE;
5594							ival->vreg = ctx->vregs[i];
5595							ival->stack_spill_pos = -1;
5596							if (insn->op == IR_PARAM && reg == IR_REG_NONE) {
5597								ival->flags |= IR_LIVE_INTERVAL_MEM_PARAM;
5598							} else {
5599								ival->stack_spill_pos = ir_allocate_spill_slot(ctx, ival->type, &data->ra_data);
5600							}
5601						} else if (insn->op == IR_PARAM) {
5602							IR_ASSERT(0 && "unexpected PARAM");
5603							return;
5604						}
5605					} else if (insn->op == IR_VAR) {
5606						ir_use_list *use_list = &ctx->use_lists[i];
5607						ir_ref n = use_list->count;
5608
5609						if (n > 0) {
5610							int32_t stack_spill_pos = insn->op3 = ir_allocate_spill_slot(ctx, insn->type, &data->ra_data);
5611							ir_ref i, *p, use;
5612							ir_insn *use_insn;
5613
5614							for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) {
5615								use = *p;
5616								use_insn = &ctx->ir_base[use];
5617								if (use_insn->op == IR_VLOAD) {
5618									if (ctx->vregs[use]
5619									 && !ctx->live_intervals[ctx->vregs[use]]) {
5620										ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval));
5621										memset(ival, 0, sizeof(ir_live_interval));
5622										ctx->live_intervals[ctx->vregs[use]] = ival;
5623										ival->type = insn->type;
5624										ival->reg = IR_REG_NONE;
5625										ival->vreg = ctx->vregs[use];
5626										ival->stack_spill_pos = stack_spill_pos;
5627									}
5628								} else if (use_insn->op == IR_VSTORE) {
5629									if (!IR_IS_CONST_REF(use_insn->op3)
5630									 && ctx->vregs[use_insn->op3]
5631									 && !ctx->live_intervals[ctx->vregs[use_insn->op3]]) {
5632										ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval));
5633										memset(ival, 0, sizeof(ir_live_interval));
5634										ctx->live_intervals[ctx->vregs[use_insn->op3]] = ival;
5635										ival->type = insn->type;
5636										ival->reg = IR_REG_NONE;
5637										ival->vreg = ctx->vregs[use_insn->op3];
5638										ival->stack_spill_pos = stack_spill_pos;
5639									}
5640								}
5641							}
5642						}
5643					}
5644
5645					insn_flags = ir_op_flags[insn->op];
5646					n = constraints.tmps_count;
5647					if (n) {
5648						do {
5649							n--;
5650							if (constraints.tmp_regs[n].type) {
5651								ir_reg reg = ir_get_free_reg(constraints.tmp_regs[n].type, available);
5652								ir_ref *ops = insn->ops;
5653								IR_REGSET_EXCL(available, reg);
5654								if (constraints.tmp_regs[n].num > 0
5655								 && IR_IS_CONST_REF(ops[constraints.tmp_regs[n].num])) {
5656									/* rematerialization */
5657									reg |= IR_REG_SPILL_LOAD;
5658								}
5659								ctx->regs[i][constraints.tmp_regs[n].num] = reg;
5660							} else if (constraints.tmp_regs[n].reg == IR_REG_SCRATCH) {
5661								available = IR_REGSET_DIFFERENCE(available, IR_REGSET_SCRATCH);
5662							} else {
5663								IR_REGSET_EXCL(available, constraints.tmp_regs[n].reg);
5664							}
5665						} while (n);
5666					}
5667					n = insn->inputs_count;
5668					for (j = 1, p = insn->ops + 1; j <= n; j++, p++) {
5669						ir_ref input = *p;
5670						if (IR_OPND_KIND(insn_flags, j) == IR_OPND_DATA && input > 0 && ctx->vregs[input]) {
5671							if ((def_flags & IR_DEF_REUSES_OP1_REG) && j == 1) {
5672								ir_reg reg = IR_REG_NUM(ctx->regs[i][0]);
5673								ctx->regs[i][1] = reg | IR_REG_SPILL_LOAD;
5674							} else {
5675								uint8_t use_flags = IR_USE_FLAGS(def_flags, j);
5676								ir_reg reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE;
5677
5678								if (reg != IR_REG_NONE && IR_REGSET_IN(available, reg)) {
5679									IR_REGSET_EXCL(available, reg);
5680									ctx->regs[i][j] = reg | IR_REG_SPILL_LOAD;
5681								} else if (j > 1 && input == insn->op1 && ctx->regs[i][1] != IR_REG_NONE) {
5682									ctx->regs[i][j] = ctx->regs[i][1];
5683								} else if (use_flags & IR_USE_MUST_BE_IN_REG) {
5684									reg = ir_get_free_reg(ctx->ir_base[input].type, available);
5685									IR_REGSET_EXCL(available, reg);
5686									ctx->regs[i][j] = reg | IR_REG_SPILL_LOAD;
5687								}
5688							}
5689						}
5690					}
5691					break;
5692			}
5693			n = ir_insn_len(insn);
5694			i += n;
5695			insn += n;
5696			rule += n;
5697		}
5698		if (bb->flags & IR_BB_DESSA_MOVES) {
5699			data->dessa_from_block = b;
5700			ir_gen_dessa_moves(ctx, b, ir_fix_dessa_tmps);
5701		}
5702	}
5703
5704	ctx->used_preserved_regs = ctx->fixed_save_regset;
5705	ctx->flags |= IR_NO_STACK_COMBINE;
5706	ir_fix_stack_frame(ctx);
5707}
5708
5709static void ir_preallocate_call_stack(ir_ctx *ctx)
5710{
5711	int call_stack_size, peak_call_stack_size = 0;
5712	ir_ref i, n;
5713	ir_insn *insn;
5714
5715	for (i = 1, insn = ctx->ir_base + 1; i < ctx->insns_count;) {
5716		if (insn->op == IR_CALL) {
5717			call_stack_size = ir_call_used_stack(ctx, insn);
5718			if (call_stack_size > peak_call_stack_size) {
5719				peak_call_stack_size = call_stack_size;
5720			}
5721		}
5722		n = ir_insn_len(insn);
5723		i += n;
5724		insn += n;
5725	}
5726	if (peak_call_stack_size) {
5727		ctx->call_stack_size = peak_call_stack_size;
5728		ctx->flags |= IR_PREALLOCATED_STACK;
5729	}
5730}
5731
5732void ir_fix_stack_frame(ir_ctx *ctx)
5733{
5734	uint32_t additional_size = 0;
5735
5736	ctx->locals_area_size = ctx->stack_frame_size;
5737
5738	if (ctx->used_preserved_regs) {
5739		ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs;
5740		ir_reg reg;
5741		(void) reg;
5742
5743		IR_REGSET_FOREACH(used_preserved_regs, reg) {
5744			additional_size += sizeof(void*);
5745		} IR_REGSET_FOREACH_END();
5746	}
5747
5748	if ((ctx->flags & IR_VARARG_FUNC) && (ctx->flags2 & IR_HAS_VA_START)) {
5749		if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) {
5750			additional_size += sizeof(void*) * IR_REG_INT_ARGS;
5751		}
5752		if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) {
5753			additional_size += 16 * IR_REG_FP_ARGS;
5754		}
5755	}
5756
5757	ctx->stack_frame_size = IR_ALIGNED_SIZE(ctx->stack_frame_size, sizeof(void*));
5758	ctx->stack_frame_size += additional_size;
5759	ctx->stack_frame_alignment = 0;
5760	ctx->call_stack_size = 0;
5761
5762	if ((ctx->flags2 & IR_16B_FRAME_ALIGNMENT) && !(ctx->flags & IR_FUNCTION)) {
5763		while (IR_ALIGNED_SIZE(ctx->stack_frame_size, 16) != ctx->stack_frame_size) {
5764			ctx->stack_frame_size += sizeof(void*);
5765			ctx->stack_frame_alignment += sizeof(void*);
5766		}
5767	} else if (ctx->flags2 & IR_16B_FRAME_ALIGNMENT) {
5768		/* Stack must be 16 byte aligned */
5769		if (!(ctx->flags & IR_FUNCTION)) {
5770			while (IR_ALIGNED_SIZE(ctx->stack_frame_size, 16) != ctx->stack_frame_size) {
5771				ctx->stack_frame_size += sizeof(void*);
5772				ctx->stack_frame_alignment += sizeof(void*);
5773			}
5774		} else if (ctx->flags & IR_USE_FRAME_POINTER) {
5775			while (IR_ALIGNED_SIZE(ctx->stack_frame_size + sizeof(void*) * 2, 16) != ctx->stack_frame_size + sizeof(void*) * 2) {
5776				ctx->stack_frame_size += sizeof(void*);
5777				ctx->stack_frame_alignment += sizeof(void*);
5778			}
5779		} else {
5780			if (!(ctx->flags & IR_NO_STACK_COMBINE)) {
5781				ir_preallocate_call_stack(ctx);
5782			}
5783			while (IR_ALIGNED_SIZE(ctx->stack_frame_size + ctx->call_stack_size, 16) !=
5784					ctx->stack_frame_size + ctx->call_stack_size) {
5785				ctx->stack_frame_size += sizeof(void*);
5786				ctx->stack_frame_alignment += sizeof(void*);
5787			}
5788		}
5789	}
5790
5791	ir_fix_param_spills(ctx);
5792}
5793
5794static void* dasm_labels[ir_lb_MAX];
5795
5796/* Veneers support (TODO: avid global variable usage) */
5797static ir_ctx *ir_current_ctx;
5798
5799static uint32_t _ir_next_block(ir_ctx *ctx, uint32_t _b)
5800{
5801	uint32_t b = ctx->cfg_schedule[++_b];
5802
5803	/* Check for empty ENTRY block */
5804	while (b && ((ctx->cfg_blocks[b].flags & (IR_BB_START|IR_BB_EMPTY)) == IR_BB_EMPTY)) {
5805		b = ctx->cfg_schedule[++_b];
5806	}
5807	return b;
5808}
5809
5810void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr)
5811{
5812	uint32_t _b, b, n, target;
5813	ir_block *bb;
5814	ir_ref i;
5815	ir_insn *insn;
5816	uint32_t *rule;
5817	ir_backend_data data;
5818	dasm_State **Dst;
5819	int ret;
5820	void *entry;
5821	size_t size;
5822
5823	data.ra_data.unused_slot_4 = 0;
5824	data.ra_data.unused_slot_2 = 0;
5825	data.ra_data.unused_slot_1 = 0;
5826	data.ra_data.handled = NULL;
5827	data.rodata_label = 0;
5828	data.jmp_table_label = 0;
5829	ctx->data = &data;
5830
5831	if (!ctx->live_intervals) {
5832		ctx->stack_frame_size = 0;
5833		ctx->stack_frame_alignment = 0;
5834		ctx->call_stack_size = 0;
5835		ctx->used_preserved_regs = 0;
5836		ir_allocate_unique_spill_slots(ctx);
5837	}
5838
5839	if (ctx->fixed_stack_frame_size != -1) {
5840		if (ctx->fixed_stack_red_zone) {
5841			IR_ASSERT(ctx->fixed_stack_red_zone == ctx->fixed_stack_frame_size + ctx->fixed_call_stack_size);
5842		}
5843		if (ctx->stack_frame_size > ctx->fixed_stack_frame_size) {
5844			// TODO: report error to caller
5845#ifdef IR_DEBUG_MESSAGES
5846			fprintf(stderr, "IR Compilation Aborted: ctx->stack_frame_size > ctx->fixed_stack_frame_size at %s:%d\n",
5847				__FILE__, __LINE__);
5848#endif
5849			ctx->data = NULL;
5850			ctx->status = IR_ERROR_FIXED_STACK_FRAME_OVERFLOW;
5851			return NULL;
5852		}
5853		ctx->stack_frame_size = ctx->fixed_stack_frame_size;
5854		ctx->call_stack_size = ctx->fixed_call_stack_size;
5855		ctx->stack_frame_alignment = 0;
5856	}
5857
5858	Dst = &data.dasm_state;
5859	data.dasm_state = NULL;
5860	dasm_init(&data.dasm_state, DASM_MAXSECTION);
5861	dasm_setupglobal(&data.dasm_state, dasm_labels, ir_lb_MAX);
5862	dasm_setup(&data.dasm_state, dasm_actions);
5863	/* labels for each block + for each constant + rodata label + jmp_table label + for each entry + exit_table label */
5864	dasm_growpc(&data.dasm_state, ctx->cfg_blocks_count + 1 + ctx->consts_count + 1 + 1 + 1 + ctx->entries_count + 1);
5865	data.emit_constants = ir_bitset_malloc(ctx->consts_count);
5866
5867	if (!(ctx->flags & IR_SKIP_PROLOGUE)) {
5868		ir_emit_prologue(ctx);
5869	}
5870	if (ctx->flags & IR_FUNCTION) {
5871		ir_emit_load_params(ctx);
5872	}
5873
5874	if (UNEXPECTED(!ctx->cfg_schedule)) {
5875		uint32_t *list = ctx->cfg_schedule = ir_mem_malloc(sizeof(uint32_t) * (ctx->cfg_blocks_count + 2));
5876		for (b = 0; b <= ctx->cfg_blocks_count; b++) {
5877			list[b] = b;
5878		}
5879		list[ctx->cfg_blocks_count + 1] = 0;
5880	}
5881
5882	for (_b = 1; _b <= ctx->cfg_blocks_count; _b++) {
5883		b = ctx->cfg_schedule[_b];
5884		bb = &ctx->cfg_blocks[b];
5885		IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
5886		if ((bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) == IR_BB_EMPTY) {
5887			continue;
5888		}
5889		if (bb->flags & IR_BB_ALIGN_LOOP) {
5890			|	.align IR_LOOP_ALIGNMENT
5891		}
5892		|=>b:
5893
5894		i = bb->start;
5895		insn = ctx->ir_base + i;
5896		if (bb->flags & IR_BB_ENTRY) {
5897			uint32_t label = ctx->cfg_blocks_count + ctx->consts_count + 4 + insn->op3;
5898
5899			|=>label:
5900			ir_emit_prologue(ctx);
5901			ctx->entries[insn->op3] = i;
5902		}
5903
5904		/* skip first instruction */
5905		n = ir_insn_len(insn);
5906		i += n;
5907		insn += n;
5908		rule = ctx->rules + i;
5909
5910		while (i <= bb->end) {
5911			if (!((*rule) & (IR_FUSED|IR_SKIPPED)))
5912			switch ((*rule) & IR_RULE_MASK) {
5913				case IR_VAR:
5914				case IR_PARAM:
5915				case IR_PI:
5916				case IR_PHI:
5917				case IR_SNAPSHOT:
5918				case IR_VA_END:
5919					break;
5920				case IR_MUL_PWR2:
5921				case IR_DIV_PWR2:
5922				case IR_MOD_PWR2:
5923					ir_emit_mul_div_mod_pwr2(ctx, i, insn);
5924					break;
5925				case IR_SDIV_PWR2:
5926					ir_emit_sdiv_pwr2(ctx, i, insn);
5927					break;
5928				case IR_SMOD_PWR2:
5929					ir_emit_smod_pwr2(ctx, i, insn);
5930					break;
5931				case IR_SHIFT:
5932					ir_emit_shift(ctx, i, insn);
5933					break;
5934				case IR_SHIFT_CONST:
5935					ir_emit_shift_const(ctx, i, insn);
5936					break;
5937				case IR_CTPOP:
5938					ir_emit_ctpop(ctx, i, insn);
5939					break;
5940				case IR_OP_INT:
5941					ir_emit_op_int(ctx, i, insn);
5942					break;
5943				case IR_OP_FP:
5944					ir_emit_op_fp(ctx, i, insn);
5945					break;
5946				case IR_BINOP_INT:
5947					ir_emit_binop_int(ctx, i, insn);
5948					break;
5949				case IR_BINOP_FP:
5950					ir_emit_binop_fp(ctx, i, insn);
5951					break;
5952				case IR_CMP_INT:
5953					ir_emit_cmp_int(ctx, i, insn);
5954					break;
5955				case IR_CMP_FP:
5956					ir_emit_cmp_fp(ctx, i, insn);
5957					break;
5958				case IR_SEXT:
5959					ir_emit_sext(ctx, i, insn);
5960					break;
5961				case IR_ZEXT:
5962					ir_emit_zext(ctx, i, insn);
5963					break;
5964				case IR_TRUNC:
5965					ir_emit_trunc(ctx, i, insn);
5966					break;
5967				case IR_BITCAST:
5968				case IR_PROTO:
5969					ir_emit_bitcast(ctx, i, insn);
5970					break;
5971				case IR_INT2FP:
5972					ir_emit_int2fp(ctx, i, insn);
5973					break;
5974				case IR_FP2INT:
5975					ir_emit_fp2int(ctx, i, insn);
5976					break;
5977				case IR_FP2FP:
5978					ir_emit_fp2fp(ctx, i, insn);
5979					break;
5980				case IR_COPY_INT:
5981					ir_emit_copy_int(ctx, i, insn);
5982					break;
5983				case IR_COPY_FP:
5984					ir_emit_copy_fp(ctx, i, insn);
5985					break;
5986				case IR_CMP_AND_BRANCH_INT:
5987					ir_emit_cmp_and_branch_int(ctx, b, i, insn, _ir_next_block(ctx, _b));
5988					break;
5989				case IR_CMP_AND_BRANCH_FP:
5990					ir_emit_cmp_and_branch_fp(ctx, b, i, insn, _ir_next_block(ctx, _b));
5991					break;
5992				case IR_GUARD_CMP_INT:
5993					ir_emit_guard_cmp_int(ctx, b, i, insn);
5994					break;
5995				case IR_GUARD_CMP_FP:
5996					ir_emit_guard_cmp_fp(ctx, b, i, insn);
5997					break;
5998				case IR_IF_INT:
5999					ir_emit_if_int(ctx, b, i, insn, _ir_next_block(ctx, _b));
6000					break;
6001				case IR_COND:
6002					ir_emit_cond(ctx, i, insn);
6003					break;
6004				case IR_SWITCH:
6005					ir_emit_switch(ctx, b, i, insn);
6006					break;
6007				case IR_MIN_MAX_INT:
6008					ir_emit_min_max_int(ctx, i, insn);
6009					break;
6010				case IR_OVERFLOW:
6011					ir_emit_overflow(ctx, i, insn);
6012					break;
6013				case IR_OVERFLOW_AND_BRANCH:
6014					ir_emit_overflow_and_branch(ctx, b, i, insn, _ir_next_block(ctx, _b));
6015					break;
6016				case IR_END:
6017				case IR_LOOP_END:
6018					if (bb->flags & IR_BB_OSR_ENTRY_LOADS) {
6019						ir_emit_osr_entry_loads(ctx, b, bb);
6020					}
6021					if (bb->flags & IR_BB_DESSA_MOVES) {
6022						ir_emit_dessa_moves(ctx, b, bb);
6023					}
6024					do {
6025						ir_ref succ = ctx->cfg_edges[bb->successors];
6026
6027						if (UNEXPECTED(bb->successors_count == 2)) {
6028							if (ctx->cfg_blocks[succ].flags & IR_BB_ENTRY) {
6029								succ = ctx->cfg_edges[bb->successors + 1];
6030							} else {
6031								IR_ASSERT(ctx->cfg_blocks[ctx->cfg_edges[bb->successors + 1]].flags & IR_BB_ENTRY);
6032							}
6033						} else {
6034							IR_ASSERT(bb->successors_count == 1);
6035						}
6036						target = ir_skip_empty_target_blocks(ctx, succ);
6037						if (target != _ir_next_block(ctx, _b)) {
6038							|	b =>target
6039						}
6040					} while (0);
6041					break;
6042				case IR_RETURN_VOID:
6043					ir_emit_return_void(ctx);
6044					break;
6045				case IR_RETURN_INT:
6046					ir_emit_return_int(ctx, i, insn);
6047					break;
6048				case IR_RETURN_FP:
6049					ir_emit_return_fp(ctx, i, insn);
6050					break;
6051				case IR_CALL:
6052					ir_emit_call(ctx, i, insn);
6053					break;
6054				case IR_TAILCALL:
6055					ir_emit_tailcall(ctx, i, insn);
6056					break;
6057				case IR_IJMP:
6058					ir_emit_ijmp(ctx, i, insn);
6059					break;
6060				case IR_REG_BINOP_INT:
6061					ir_emit_reg_binop_int(ctx, i, insn);
6062					break;
6063				case IR_VADDR:
6064					ir_emit_vaddr(ctx, i, insn);
6065					break;
6066				case IR_VLOAD:
6067					ir_emit_vload(ctx, i, insn);
6068					break;
6069				case IR_VSTORE:
6070					ir_emit_vstore(ctx, i, insn);
6071					break;
6072				case IR_RLOAD:
6073					ir_emit_rload(ctx, i, insn);
6074					break;
6075				case IR_RSTORE:
6076					ir_emit_rstore(ctx, i, insn);
6077					break;
6078				case IR_LOAD_INT:
6079					ir_emit_load_int(ctx, i, insn);
6080					break;
6081				case IR_LOAD_FP:
6082					ir_emit_load_fp(ctx, i, insn);
6083					break;
6084				case IR_STORE_INT:
6085					ir_emit_store_int(ctx, i, insn);
6086					break;
6087				case IR_STORE_FP:
6088					ir_emit_store_fp(ctx, i, insn);
6089					break;
6090				case IR_ALLOCA:
6091					ir_emit_alloca(ctx, i, insn);
6092					break;
6093				case IR_VA_START:
6094					ir_emit_va_start(ctx, i, insn);
6095					break;
6096				case IR_VA_COPY:
6097					ir_emit_va_copy(ctx, i, insn);
6098					break;
6099				case IR_VA_ARG:
6100					ir_emit_va_arg(ctx, i, insn);
6101					break;
6102				case IR_AFREE:
6103					ir_emit_afree(ctx, i, insn);
6104					break;
6105				case IR_BLOCK_BEGIN:
6106					ir_emit_block_begin(ctx, i, insn);
6107					break;
6108				case IR_BLOCK_END:
6109					ir_emit_block_end(ctx, i, insn);
6110					break;
6111				case IR_FRAME_ADDR:
6112					ir_emit_frame_addr(ctx, i);
6113					break;
6114				case IR_EXITCALL:
6115					ir_emit_exitcall(ctx, i, insn);
6116					break;
6117				case IR_GUARD:
6118				case IR_GUARD_NOT:
6119					ir_emit_guard(ctx, i, insn);
6120					break;
6121				case IR_GUARD_OVERFLOW:
6122					ir_emit_guard_overflow(ctx, i, insn);
6123					break;
6124				case IR_TLS:
6125					ir_emit_tls(ctx, i, insn);
6126					break;
6127				case IR_TRAP:
6128					|	brk
6129					break;
6130				default:
6131					IR_ASSERT(0 && "NIY rule/instruction");
6132					ir_mem_free(data.emit_constants);
6133					dasm_free(&data.dasm_state);
6134					ctx->data = NULL;
6135					ctx->status = IR_ERROR_UNSUPPORTED_CODE_RULE;
6136					return NULL;
6137			}
6138			n = ir_insn_len(insn);
6139			i += n;
6140			insn += n;
6141			rule += n;
6142		}
6143	}
6144
6145	if (ctx->deoptimization_exits) {
6146		uint32_t exit_table_label = ctx->cfg_blocks_count + 1 + ctx->consts_count + 1 + 1 + 1 + ctx->entries_count;
6147
6148		|=>exit_table_label:
6149		for (i = 0; i < ctx->deoptimization_exits; i++) {
6150			const void *exit_addr = ctx->get_exit_addr(i);
6151
6152			if (!exit_addr) {
6153				ctx->data = NULL;
6154				return 0;
6155			}
6156			|	b &exit_addr
6157		}
6158	}
6159
6160	if (data.rodata_label) {
6161		|.rodata
6162	}
6163	IR_BITSET_FOREACH(data.emit_constants, ir_bitset_len(ctx->consts_count), i) {
6164		insn = &ctx->ir_base[-i];
6165		if (IR_IS_TYPE_FP(insn->type)) {
6166			int label = ctx->cfg_blocks_count + i;
6167
6168			if (!data.rodata_label) {
6169				data.rodata_label = ctx->cfg_blocks_count + ctx->consts_count + 2;
6170
6171				|.rodata
6172				|=>data.rodata_label:
6173			}
6174			if (insn->type == IR_DOUBLE) {
6175				|.align 8
6176				|=>label:
6177				|.long insn->val.u32, insn->val.u32_hi
6178			} else {
6179				IR_ASSERT(insn->type == IR_FLOAT);
6180				|.align 4
6181				|=>label:
6182				|.long insn->val.u32
6183			}
6184		} else if (insn->op == IR_STR) {
6185			int label = ctx->cfg_blocks_count + i;
6186			const char *str = ir_get_str(ctx, insn->val.str);
6187			int i = 0;
6188
6189			if (!data.rodata_label) {
6190				data.rodata_label = ctx->cfg_blocks_count + ctx->consts_count + 2;
6191
6192				|.rodata
6193				|=>data.rodata_label:
6194			}
6195			|.align 8
6196			|=>label:
6197			while (1) {
6198				char c;
6199				uint32_t w = 0;
6200				int j;
6201
6202				for (j = 0; j < 4; j++) {
6203					c = str[i];
6204					if (!c) {
6205						break;
6206					}
6207					w |= c << (8 * j);
6208					i++;
6209				}
6210				|	.long w
6211				if (!c) {
6212					break;
6213				}
6214			}
6215
6216		} else {
6217			IR_ASSERT(0);
6218		}
6219	} IR_BITSET_FOREACH_END();
6220	if (data.rodata_label) {
6221		|.code
6222	}
6223	ir_mem_free(data.emit_constants);
6224
6225	if (ctx->status) {
6226		dasm_free(&data.dasm_state);
6227		ctx->data = NULL;
6228		return NULL;
6229	}
6230
6231	ret = dasm_link(&data.dasm_state, size_ptr);
6232	if (ret != DASM_S_OK) {
6233		IR_ASSERT(0);
6234		dasm_free(&data.dasm_state);
6235		ctx->data = NULL;
6236		ctx->status = IR_ERROR_LINK;
6237		return NULL;
6238	}
6239	size = *size_ptr;
6240
6241	if (ctx->code_buffer) {
6242		entry = ctx->code_buffer->pos;
6243		entry = (void*)IR_ALIGNED_SIZE(((size_t)(entry)), 16);
6244		if (size > (size_t)((char*)ctx->code_buffer->end - (char*)entry)) {
6245			ctx->data = NULL;
6246			ctx->status = IR_ERROR_CODE_MEM_OVERFLOW;
6247			return NULL;
6248		}
6249		ctx->code_buffer->pos = (char*)entry + size;
6250	} else {
6251		entry = ir_mem_mmap(size);
6252		if (!entry) {
6253			dasm_free(&data.dasm_state);
6254			ctx->data = NULL;
6255			ctx->status = IR_ERROR_CODE_MEM_OVERFLOW;
6256			return NULL;
6257		}
6258		ir_mem_unprotect(entry, size);
6259	}
6260
6261	if (ctx->deoptimization_exits) {
6262		uint32_t exit_table_label = ctx->cfg_blocks_count + 1 + ctx->consts_count + 1 + 1 + 1 + ctx->entries_count;
6263
6264		ctx->deoptimization_exits_base = (const void*)((char*)entry + dasm_getpclabel(&data.dasm_state, exit_table_label));
6265	}
6266
6267	ir_current_ctx = ctx;
6268	ret = dasm_encode(&data.dasm_state, entry);
6269	if (ret != DASM_S_OK) {
6270		IR_ASSERT(0);
6271		dasm_free(&data.dasm_state);
6272		if (ctx->code_buffer) {
6273			if (ctx->code_buffer->pos == (char*)entry + size) {
6274				/* rollback */
6275				ctx->code_buffer->pos = (char*)entry - size;
6276			}
6277		} else {
6278			ir_mem_unmap(entry, size);
6279		}
6280		ctx->data = NULL;
6281		ctx->status = IR_ERROR_ENCODE;
6282		return NULL;
6283	}
6284
6285	if (data.jmp_table_label) {
6286		uint32_t offset = dasm_getpclabel(&data.dasm_state, data.jmp_table_label);
6287		ctx->jmp_table_offset = offset;
6288	} else {
6289		ctx->jmp_table_offset = 0;
6290	}
6291	if (data.rodata_label) {
6292		uint32_t offset = dasm_getpclabel(&data.dasm_state, data.rodata_label);
6293		ctx->rodata_offset = offset;
6294	} else {
6295		ctx->rodata_offset = 0;
6296	}
6297
6298	if (ctx->entries_count) {
6299		/* For all entries */
6300		i = ctx->entries_count;
6301		do {
6302			ir_insn *insn = &ctx->ir_base[ctx->entries[--i]];
6303			uint32_t offset = dasm_getpclabel(&data.dasm_state, ctx->cfg_blocks_count + ctx->consts_count + 4 + insn->op3);
6304			insn->op3 = offset;
6305		} while (i != 0);
6306	}
6307
6308	dasm_free(&data.dasm_state);
6309
6310	if (ctx->code_buffer) {
6311		size = (char*)ctx->code_buffer->pos - (char*)entry;
6312	}
6313
6314	ir_mem_flush(entry, size);
6315
6316	if (!ctx->code_buffer) {
6317		ir_mem_protect(entry, size);
6318	}
6319
6320	ctx->data = NULL;
6321	return entry;
6322}
6323
6324const void *ir_emit_exitgroup(uint32_t first_exit_point, uint32_t exit_points_per_group, const void *exit_addr, ir_code_buffer *code_buffer, size_t *size_ptr)
6325{
6326	void *entry;
6327	size_t size;
6328	uint32_t i;
6329	dasm_State **Dst, *dasm_state;
6330	int ret;
6331
6332	IR_ASSERT(code_buffer);
6333	IR_ASSERT(aarch64_may_use_b(code_buffer, exit_addr));
6334
6335	Dst = &dasm_state;
6336	dasm_state = NULL;
6337	dasm_init(&dasm_state, DASM_MAXSECTION);
6338	dasm_setupglobal(&dasm_state, dasm_labels, ir_lb_MAX);
6339	dasm_setup(&dasm_state, dasm_actions);
6340
6341	|	bl >2
6342	|1:
6343	for (i = 1; i < exit_points_per_group; i++) {
6344		|	bl >2
6345	}
6346	|2:
6347	|	adr Rx(IR_REG_INT_TMP), <1
6348	|	sub Rx(IR_REG_INT_TMP), lr, Rx(IR_REG_INT_TMP)
6349	|	lsr Rx(IR_REG_INT_TMP), Rx(IR_REG_INT_TMP), #2
6350	if (first_exit_point) {
6351		|	add Rx(IR_REG_INT_TMP), Rx(IR_REG_INT_TMP), #first_exit_point
6352	}
6353	|	b &exit_addr
6354
6355	ret = dasm_link(&dasm_state, &size);
6356	if (ret != DASM_S_OK) {
6357		IR_ASSERT(0);
6358		dasm_free(&dasm_state);
6359		return NULL;
6360	}
6361
6362	entry = code_buffer->pos;
6363	entry = (void*)IR_ALIGNED_SIZE(((size_t)(entry)), 16);
6364	if (size > (size_t)((char*)code_buffer->end - (char*)entry)) {
6365		return NULL;
6366	}
6367	code_buffer->pos = (char*)entry + size;
6368
6369	ir_current_ctx = NULL;
6370	ret = dasm_encode(&dasm_state, entry);
6371	if (ret != DASM_S_OK) {
6372		IR_ASSERT(0);
6373		dasm_free(&dasm_state);
6374		if (code_buffer->pos == (char*)entry + size) {
6375			/* rollback */
6376			code_buffer->pos = (char*)entry - size;
6377		}
6378		return NULL;
6379	}
6380
6381	dasm_free(&dasm_state);
6382
6383	ir_mem_flush(entry, size);
6384
6385	*size_ptr = size;
6386	return entry;
6387}
6388
6389static int ir_add_veneer(dasm_State *Dst, void *buffer, uint32_t ins, int *b, uint32_t *cp, ptrdiff_t offset)
6390{
6391	ir_ctx *ctx = ir_current_ctx;
6392	const void *addr, *veneer = NULL;
6393	ptrdiff_t na;
6394	int n, m;
6395
6396	IR_ASSERT(ctx && ctx->code_buffer);
6397
6398	if ((ins >> 16) == DASM_REL_A) {
6399		addr = (void*)((((ptrdiff_t)(*(b-1))) << 32) | (unsigned int)(*(b-2)));
6400		if (ctx->get_veneer) {
6401			veneer = ctx->get_veneer(ctx, addr);
6402		}
6403	} else {
6404		IR_ASSERT(0 && "too long jmp distance");
6405		return 0;
6406	}
6407
6408	if (veneer) {
6409		na = (ptrdiff_t)veneer - (ptrdiff_t)cp + 4;
6410		n = (int)na;
6411
6412		/* check if we can jump to veneer */
6413		if ((ptrdiff_t)n != na) {
6414			/* pass */
6415		} else if (!(ins & 0xf800)) {  /* B, BL */
6416			if ((n & 3) == 0 && ((n+0x08000000) >> 28) == 0) {
6417				return n;
6418			}
6419		} else if ((ins & 0x800)) {  /* B.cond, CBZ, CBNZ, LDR* literal */
6420			if ((n & 3) == 0 && ((n+0x00100000) >> 21) == 0) {
6421				return n;
6422			}
6423		} else if ((ins & 0x3000) == 0x2000) {  /* ADR */
6424			/* pass */
6425		} else if ((ins & 0x3000) == 0x3000) {  /* ADRP */
6426			/* pass */
6427		} else if ((ins & 0x1000)) {  /* TBZ, TBNZ */
6428			if ((n & 3) == 0 && ((n+0x00008000) >> 16) == 0) {
6429				return n;
6430			}
6431		}
6432	}
6433
6434	veneer = ctx->code_buffer->pos;
6435	if ((char*)ctx->code_buffer->end - (char*)veneer < 4 ) {
6436		IR_ASSERT(0 && "too long jmp distance" && "jit buffer overflow");
6437		return 0; /* jit_buffer_size overflow */
6438	}
6439
6440	na = (ptrdiff_t)veneer - (ptrdiff_t)cp + 4;
6441	n = (int)na;
6442
6443	/* check if we can jump to veneer */
6444	if ((ptrdiff_t)n != na) {
6445		IR_ASSERT(0 && "too long jmp distance");
6446		return 0;
6447	} else if (!(ins & 0xf800)) {  /* B, BL */
6448		if ((n & 3) != 0 || ((n+0x08000000) >> 28) != 0) {
6449			IR_ASSERT(0 && "too long jmp distance");
6450			return 0;
6451		}
6452	} else if ((ins & 0x800)) {  /* B.cond, CBZ, CBNZ, LDR* literal */
6453		if ((n & 3) != 0 || ((n+0x00100000) >> 21) != 0) {
6454			IR_ASSERT(0 && "too long jmp distance");
6455			return 0;
6456		}
6457	} else if ((ins & 0x3000) == 0x2000) {  /* ADR */
6458		IR_ASSERT(0 && "too long jmp distance");
6459		return 0;
6460	} else if ((ins & 0x3000) == 0x3000) {  /* ADRP */
6461		IR_ASSERT(0 && "too long jmp distance");
6462		return 0;
6463	} else if ((ins & 0x1000)) {  /* TBZ, TBNZ */
6464		if ((n & 3) != 0 || ((n+0x00008000) >> 16) != 0) {
6465			IR_ASSERT(0 && "too long jmp distance");
6466			return 0;
6467		}
6468	} else if ((ins & 0x8000)) {  /* absolute */
6469		IR_ASSERT(0 && "too long jmp distance");
6470		return 0;
6471	} else {
6472		IR_ASSERT(0 && "too long jmp distance");
6473		return 0;
6474	}
6475
6476	/* check if we can use B to jump from veneer */
6477	na = (ptrdiff_t)cp + offset - (ptrdiff_t)veneer - 4;
6478	m = (int)na;
6479	if ((ptrdiff_t)m != na) {
6480		IR_ASSERT(0 && "too long jmp distance");
6481		return 0;
6482	} else if ((m & 3) != 0 || ((m+0x08000000) >> 28) != 0) {
6483		IR_ASSERT(0 && "too long jmp distance");
6484		return 0;
6485	}
6486
6487	if (!ctx->set_veneer || !ctx->set_veneer(ctx, addr, veneer)) {
6488		IR_ASSERT(0 && "too long jmp distance");
6489		return 0;
6490	}
6491
6492	/* generate B instruction */
6493	*(uint32_t*)veneer = 0x14000000 | ((m >> 2) & 0x03ffffff);
6494	ctx->code_buffer->pos = (char*)ctx->code_buffer->pos + 4;
6495
6496	return n;
6497}
6498
6499bool ir_needs_thunk(ir_code_buffer *code_buffer, void *addr)
6500{
6501	return !aarch64_may_use_b(code_buffer, addr);
6502}
6503
6504void *ir_emit_thunk(ir_code_buffer *code_buffer, void *addr, size_t *size_ptr)
6505{
6506	void *entry;
6507	size_t size;
6508	dasm_State **Dst, *dasm_state;
6509	int ret;
6510
6511	Dst = &dasm_state;
6512	dasm_state = NULL;
6513	dasm_init(&dasm_state, DASM_MAXSECTION);
6514	dasm_setupglobal(&dasm_state, dasm_labels, ir_lb_MAX);
6515	dasm_setup(&dasm_state, dasm_actions);
6516
6517	|.code
6518	|	movz Rx(IR_REG_INT_TMP), #((uint64_t)(addr) & 0xffff)
6519	|	movk Rx(IR_REG_INT_TMP), #(((uint64_t)(addr) >> 16) & 0xffff), lsl #16
6520	|	movk Rx(IR_REG_INT_TMP), #(((uint64_t)(addr) >> 32) & 0xffff), lsl #32
6521	|	movk Rx(IR_REG_INT_TMP), #(((uint64_t)(addr) >> 48) & 0xffff), lsl #48
6522	|	br Rx(IR_REG_INT_TMP)
6523
6524	ret = dasm_link(&dasm_state, &size);
6525	if (ret != DASM_S_OK) {
6526		IR_ASSERT(0);
6527		dasm_free(&dasm_state);
6528		return NULL;
6529	}
6530
6531	entry = code_buffer->pos;
6532	entry = (void*)IR_ALIGNED_SIZE(((size_t)(entry)), 4);
6533	if (size > (size_t)((char*)code_buffer->end - (char*)entry)) {
6534		dasm_free(&dasm_state);
6535		return NULL;
6536	}
6537
6538	ret = dasm_encode(&dasm_state, entry);
6539	if (ret != DASM_S_OK) {
6540		dasm_free(&dasm_state);
6541		return NULL;
6542	}
6543
6544	*size_ptr = size;
6545	code_buffer->pos = (char*)code_buffer->pos + size;
6546
6547	dasm_free(&dasm_state);
6548	ir_mem_flush(entry, size);
6549
6550	return entry;
6551}
6552
6553void ir_fix_thunk(void *thunk_entry, void *addr)
6554{
6555	uint32_t *code = thunk_entry;
6556	IR_ASSERT((code[0] & 0xffe00000) == 0xd2800000
6557		&& (code[1] & 0xffe00000) == 0xf2a00000
6558		&& (code[2] & 0xffe00000) == 0xf2c00000
6559		&& (code[3] & 0xffe00000) == 0xf2e00000
6560		&& (code[4] & 0xfffffc1f) == 0xd61f0000);
6561
6562	code[0] = (code[0] & 0xffe0001f) | (uint32_t)((uint64_t)(addr) & 0xffff) << 5;
6563	code[1] = (code[1] & 0xffe0001f) | (uint32_t)(((uint64_t)(addr) >> 16) & 0xffff) << 5;
6564	code[2] = (code[2] & 0xffe0001f) | (uint32_t)(((uint64_t)(addr) >> 32) & 0xffff) << 5;
6565	code[3] = (code[3] & 0xffe0001f) | (uint32_t)(((uint64_t)(addr) >> 48) & 0xffff) << 5;
6566
6567	ir_mem_flush(code, sizeof(uint32_t) * 4);
6568}
6569