1/* 2 * IR - Lightweight JIT Compilation Framework 3 * (Aarch64 native code generator based on DynAsm) 4 * Copyright (C) 2022 Zend by Perforce. 5 * Authors: Dmitry Stogov <dmitry@php.net> 6 */ 7 8|.arch arm64 9 10|.actionlist dasm_actions 11|.globals ir_lb 12|.section code, cold_code, rodata, jmp_table 13 14|.define IR_LOOP_ALIGNMENT, 8 15 16#ifdef IR_DEBUG 17typedef struct _ir_mem {uint64_t v;} ir_mem; 18 19# define IR_MEM_VAL(loc) ((loc).v) 20#else 21typedef uint64_t ir_mem; 22 23# define IR_MEM_VAL(loc) (loc) 24#endif 25 26#define IR_MEM_OFFSET(loc) ((int32_t)(IR_MEM_VAL(loc) & 0xffffffff)) 27#define IR_MEM_BASE(loc) ((ir_reg)((IR_MEM_VAL(loc) >> 32) & 0xff)) 28#define IR_MEM_INDEX(loc) ((ir_reg)((IR_MEM_VAL(loc) >> 40) & 0xff)) 29#define IR_MEM_SHIFT(loc) ((int32_t)((IR_MEM_VAL(loc) >> 48) & 0xff)) 30 31#define IR_MEM_O(addr) IR_MEM(IR_REG_NONE, addr, IR_REG_NONE, 0) 32#define IR_MEM_B(base) IR_MEM(base, 0, IR_REG_NONE, 0) 33#define IR_MEM_BO(base, offset) IR_MEM(base, offset, IR_REG_NONE, 0) 34 35IR_ALWAYS_INLINE ir_mem IR_MEM(ir_reg base, int32_t offset, ir_reg index, int32_t shift) 36{ 37 ir_mem mem; 38 IR_ASSERT(base == IR_REG_NONE || (base >= IR_REG_GP_FIRST && base <= IR_REG_GP_LAST)); 39 IR_ASSERT(index == IR_REG_NONE || (index >= IR_REG_GP_FIRST && index <= IR_REG_GP_LAST)); 40 IR_ASSERT(index == IR_REG_NONE || offset == 0); 41 IR_ASSERT(shift == 0); // TODO: ??? 42#ifdef IR_DEBUG 43 mem.v = 44#else 45 mem = 46#endif 47 ((uint64_t)(uint32_t)offset | 48 ((uint64_t)(uint8_t)base << 32) | 49 ((uint64_t)(uint8_t)index << 40) | 50 ((uint64_t)(uint8_t)shift << 48)); 51 return mem; 52} 53 54#define IR_SPILL_POS_TO_OFFSET(offset) \ 55 ((ctx->flags & IR_USE_FRAME_POINTER) ? \ 56 ((offset) + (int32_t)sizeof(void*) * 2) : \ 57 ((offset) + ctx->call_stack_size)) 58 59#define B_IMM (1<<27) // signed imm26 * 4 60#define ADR_IMM (1<<20) // signed imm21 61#define ADRP_IMM (1LL<<32) // signed imm21 * 4096 62 63static bool aarch64_may_use_b(ir_code_buffer *code_buffer, const void *addr) 64{ 65 if (code_buffer) { 66 if (addr >= code_buffer->start && (char*)addr < (char*)code_buffer->end) { 67 return (((char*)code_buffer->end - (char*)code_buffer->start) < B_IMM); 68 } else if ((char*)addr >= (char*)code_buffer->end) { 69 return (((char*)addr - (char*)code_buffer->start) < B_IMM); 70 } else if (addr < code_buffer->start) { 71 return (((char*)code_buffer->end - (char*)addr) < B_IMM); 72 } 73 } 74 return 0; 75} 76 77#if 0 78static bool aarch64_may_use_adr(ir_code_buffer *code_buffer, const void *addr) 79{ 80 if (code_buffer) { 81 if (addr >= code_buffer->start && (char*)addr < (char*)code_buffer->end) { 82 return (((char*)code_buffer->end - (char*)code_buffer->start) < ADR_IMM); 83 } else if ((char*)addr >= (char*)code_buffer->end) { 84 return (((char*)addr - (char*)code_buffer->start) < ADR_IMM); 85 } else if (addr < code_buffer->start) { 86 return (((char*)code_buffer->end - (char*)addr) < ADR_IMM); 87 } 88 } 89 return 0; 90} 91 92static bool aarch64_may_use_adrp(ir_code_buffer *code_buffer, const void *addr) 93{ 94 if (code_buffer) { 95 if (addr >= code_buffer->start && (char*)addr < (char*)code_buffer->end) { 96 return (((char*)code_buffer->end - (char*)code_buffer->start) < ADRP_IMM); 97 } else if ((char*)addr >= (char*)code_buffer->end) { 98 return (((char*)addr - (char*)code_buffer->start) < ADRP_IMM); 99 } else if (addr < code_buffer->start) { 100 return (((char*)code_buffer->end - (char*)addr) < ADRP_IMM); 101 } 102 } 103 return 0; 104} 105#endif 106 107/* Determine whether "val" falls into two allowed ranges: 108 * Range 1: [0, 0xfff] 109 * Range 2: LSL #12 to Range 1 110 * Used to guard the immediate encoding for add/adds/sub/subs/cmp/cmn instructions. */ 111static bool aarch64_may_encode_imm12(const int64_t val) 112{ 113 return (val >= 0 && (val <= 0xfff || !(val & 0xffffffffff000fff))); 114} 115 116/* Determine whether an immediate value can be encoded as the immediate operand of logical instructions. */ 117static bool aarch64_may_encode_logical_imm(uint64_t value, uint32_t type_size) 118{ 119 /* fast path: power of two */ 120 if (value > 0 && !(value & (value - 1))) { 121 return 1; 122 } 123 124 if (type_size == 8) { 125 if (dasm_imm13((uint32_t)value, (uint32_t)(value >> 32)) != -1) { 126 return 1; 127 } 128 } else { 129 if (dasm_imm13((uint32_t)value, (uint32_t)value) != -1) { 130 return 1; 131 } 132 } 133 134 return 0; 135} 136 137static bool aarch64_may_encode_imm7_addr_offset(const int64_t offset, uint32_t type_size) 138{ 139 return (uintptr_t)(offset) % type_size == 0 140 && offset < 63 * (int32_t)type_size 141 && offset >= -64 * (int32_t)type_size; 142} 143 144static bool aarch64_may_encode_addr_offset(int64_t offset, uint32_t type_size) 145{ 146 return (uintptr_t)(offset) % type_size == 0 && (uintptr_t)(offset) < 0xfff * type_size; 147} 148 149|.macro ASM_REG_REG_OP, op, type, dst, src 150|| if (ir_type_size[type] == 8) { 151| op Rx(dst), Rx(src) 152|| } else { 153| op Rw(dst), Rw(src) 154|| } 155|.endmacro 156 157|.macro ASM_REG_REG_REG_OP, op, type, dst, src1, src2 158|| if (ir_type_size[type] == 8) { 159| op Rx(dst), Rx(src1), Rx(src2) 160|| } else { 161| op Rw(dst), Rw(src1), Rw(src2) 162|| } 163|.endmacro 164 165|.macro ASM_REG_REG_REG_TXT_OP, op, type, dst, src1, src2, txt 166|| if (ir_type_size[type] == 8) { 167| op Rx(dst), Rx(src1), Rx(src2), txt 168|| } else { 169| op Rw(dst), Rw(src1), Rw(src2), txt 170|| } 171|.endmacro 172 173|.macro ASM_REG_REG_REG_REG_OP, op, type, dst, src1, src2, src3 174|| if (ir_type_size[type] == 8) { 175| op Rx(dst), Rx(src1), Rx(src2), Rx(src3) 176|| } else { 177| op Rw(dst), Rw(src1), Rw(src2), Rw(src3); 178|| } 179|.endmacro 180 181|.macro ASM_REG_REG_IMM_OP, op, type, dst, src1, val 182|| if (ir_type_size[type] == 8) { 183| op Rx(dst), Rx(src1), #val 184|| } else { 185| op Rw(dst), Rw(src1), #val 186|| } 187|.endmacro 188 189|.macro ASM_REG_IMM_OP, op, type, reg, val 190|| if (ir_type_size[type] == 8) { 191| op Rx(reg), #val 192|| } else { 193| op Rw(reg), #val 194|| } 195|.endmacro 196 197|.macro ASM_FP_REG_IMM_OP, op, type, reg, val 198|| if (type == IR_DOUBLE) { 199| op Rd(reg-IR_REG_FP_FIRST), #val 200|| } else { 201|| IR_ASSERT(type == IR_FLOAT); 202| op Rs(reg-IR_REG_FP_FIRST), #val 203|| } 204|.endmacro 205 206|.macro ASM_FP_REG_REG_REG_OP, op, type, dst, src1, src2 207|| if (type == IR_DOUBLE) { 208| op Rd(dst-IR_REG_FP_FIRST), Rd(src1-IR_REG_FP_FIRST), Rd(src2-IR_REG_FP_FIRST) 209|| } else { 210|| IR_ASSERT(type == IR_FLOAT); 211| op Rs(dst-IR_REG_FP_FIRST), Rs(src1-IR_REG_FP_FIRST), Rs(src2-IR_REG_FP_FIRST) 212|| } 213|.endmacro 214 215typedef struct _ir_backend_data { 216 ir_reg_alloc_data ra_data; 217 uint32_t dessa_from_block; 218 dasm_State *dasm_state; 219 ir_bitset emit_constants; 220 int rodata_label, jmp_table_label; 221} ir_backend_data; 222 223#define IR_GP_REG_NAME(code, name64, name32) \ 224 #name64, 225#define IR_GP_REG_NAME32(code, name64, name32) \ 226 #name32, 227#define IR_FP_REG_NAME(code, name64, name32, name16, name8) \ 228 #name64, 229#define IR_FP_REG_NAME32(code, name64, name32, name16, name8) \ 230 #name32, 231 232static const char *_ir_reg_name[IR_REG_NUM] = { 233 IR_GP_REGS(IR_GP_REG_NAME) 234 IR_FP_REGS(IR_FP_REG_NAME) 235}; 236 237static const char *_ir_reg_name32[IR_REG_NUM] = { 238 IR_GP_REGS(IR_GP_REG_NAME32) 239 IR_FP_REGS(IR_FP_REG_NAME32) 240}; 241 242/* Calling Convention */ 243static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS] = { 244 IR_REG_INT_ARG1, 245 IR_REG_INT_ARG2, 246 IR_REG_INT_ARG3, 247 IR_REG_INT_ARG4, 248 IR_REG_INT_ARG5, 249 IR_REG_INT_ARG6, 250 IR_REG_INT_ARG7, 251 IR_REG_INT_ARG8, 252}; 253 254static const int8_t _ir_fp_reg_params[IR_REG_FP_ARGS] = { 255 IR_REG_FP_ARG1, 256 IR_REG_FP_ARG2, 257 IR_REG_FP_ARG3, 258 IR_REG_FP_ARG4, 259 IR_REG_FP_ARG5, 260 IR_REG_FP_ARG6, 261 IR_REG_FP_ARG7, 262 IR_REG_FP_ARG8, 263}; 264 265const char *ir_reg_name(int8_t reg, ir_type type) 266{ 267 if (reg >= IR_REG_NUM) { 268 if (reg == IR_REG_SCRATCH) { 269 return "SCRATCH"; 270 } else { 271 IR_ASSERT(reg == IR_REG_ALL); 272 return "ALL"; 273 } 274 } 275 IR_ASSERT(reg >= 0 && reg < IR_REG_NUM); 276 if (type == IR_VOID) { 277 type = (reg < IR_REG_FP_FIRST) ? IR_ADDR : IR_DOUBLE; 278 } 279 if (ir_type_size[type] == 8) { 280 return _ir_reg_name[reg]; 281 } else { 282 return _ir_reg_name32[reg]; 283 } 284} 285 286#define IR_RULES(_) \ 287 _(CMP_INT) \ 288 _(CMP_FP) \ 289 _(MUL_PWR2) \ 290 _(DIV_PWR2) \ 291 _(MOD_PWR2) \ 292 _(SDIV_PWR2) \ 293 _(SMOD_PWR2) \ 294 _(OP_INT) \ 295 _(OP_FP) \ 296 _(BINOP_INT) \ 297 _(BINOP_FP) \ 298 _(SHIFT) \ 299 _(SHIFT_CONST) \ 300 _(COPY_INT) \ 301 _(COPY_FP) \ 302 _(CMP_AND_BRANCH_INT) \ 303 _(CMP_AND_BRANCH_FP) \ 304 _(GUARD_CMP_INT) \ 305 _(GUARD_CMP_FP) \ 306 _(GUARD_OVERFLOW) \ 307 _(OVERFLOW_AND_BRANCH) \ 308 _(MIN_MAX_INT) \ 309 _(REG_BINOP_INT) \ 310 _(LOAD_INT) \ 311 _(LOAD_FP) \ 312 _(STORE_INT) \ 313 _(STORE_FP) \ 314 _(IF_INT) \ 315 _(RETURN_VOID) \ 316 _(RETURN_INT) \ 317 _(RETURN_FP) \ 318 319#define IR_RULE_ENUM(name) IR_ ## name, 320 321#define IR_STATIC_ALLOCA (IR_SKIPPED | IR_FUSED | IR_SIMPLE | IR_ALLOCA) 322 323enum _ir_rule { 324 IR_FIRST_RULE = IR_LAST_OP, 325 IR_RULES(IR_RULE_ENUM) 326 IR_LAST_RULE 327}; 328 329#define IR_RULE_NAME(name) #name, 330const char *ir_rule_name[IR_LAST_OP] = { 331 NULL, 332 IR_RULES(IR_RULE_NAME) 333 NULL 334}; 335 336/* register allocation */ 337int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *constraints) 338{ 339 uint32_t rule = ir_rule(ctx, ref); 340 const ir_insn *insn; 341 int n = 0; 342 int flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; 343 344 constraints->def_reg = IR_REG_NONE; 345 constraints->hints_count = 0; 346 switch (rule & IR_RULE_MASK) { 347 case IR_BINOP_INT: 348 insn = &ctx->ir_base[ref]; 349 n = 0; 350 if (IR_IS_CONST_REF(insn->op1)) { 351 constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 352 n++; 353 } else if (ir_rule(ctx, insn->op1) == IR_STATIC_ALLOCA) { 354 constraints->tmp_regs[n] = IR_TMP_REG(1, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 355 n++; 356 } 357 if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { 358 const ir_insn *val_insn = &ctx->ir_base[insn->op2]; 359 switch (insn->op) { 360 case IR_ADD: 361 case IR_ADD_OV: 362 case IR_SUB: 363 case IR_SUB_OV: 364 if (IR_IS_SYM_CONST(val_insn->op) || !aarch64_may_encode_imm12(val_insn->val.u64)) { 365 constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 366 n++; 367 } 368 break; 369 case IR_MUL_OV: 370 constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); 371 n++; 372 break; 373 case IR_AND: 374 case IR_OR: 375 case IR_XOR: 376 if (IR_IS_SYM_CONST(val_insn->op) || !aarch64_may_encode_logical_imm(val_insn->val.u64, ir_type_size[insn->type])) { 377 constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 378 n++; 379 } 380 break; 381 case IR_MUL: 382 case IR_DIV: 383 case IR_MOD: 384 constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 385 n++; 386 break; 387 } 388 } else if (ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) { 389 constraints->tmp_regs[n] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 390 n++; 391 } 392 if (insn->op == IR_MOD) { 393 constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 394 n++; 395 } else if (insn->op == IR_MUL_OV && (ir_type_size[insn->type] == 8 || IR_IS_TYPE_SIGNED(insn->type))) { 396 constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); 397 n++; 398 } 399 break; 400 case IR_MUL_PWR2: 401 case IR_DIV_PWR2: 402 case IR_MOD_PWR2: 403 case IR_SHIFT: 404 case IR_SHIFT_CONST: 405 case IR_OP_INT: 406 case IR_OP_FP: 407 case IR_INT2FP: 408 case IR_FP2INT: 409 case IR_FP2FP: 410 insn = &ctx->ir_base[ref]; 411 n = 0; 412 if (IR_IS_CONST_REF(insn->op1)) { 413 constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 414 n++; 415 } 416 if (rule == IR_SHIFT_CONST 417 && (insn->op == IR_ROL || insn->op == IR_ROR) 418 && ir_type_size[insn->type] < 4) { 419 constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 420 n++; 421 } else if (rule == IR_SHIFT 422 && (insn->op == IR_ROL || insn->op == IR_ROR) 423 && ir_type_size[insn->type] < 4) { 424 if (insn->op == IR_ROL) { 425 flags |= IR_DEF_CONFLICTS_WITH_INPUT_REGS; 426 } 427 constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); 428 n++; 429 } else if (rule == IR_SHIFT && insn->op == IR_ROL) { 430 constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 431 n++; 432 } 433 break; 434 case IR_SDIV_PWR2: 435 flags = IR_DEF_CONFLICTS_WITH_INPUT_REGS | IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG; 436 insn = &ctx->ir_base[ref]; 437 n = 0; 438 if (IR_IS_CONST_REF(insn->op1)) { 439 constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 440 n++; 441 } 442 if (IR_IS_CONST_REF(insn->op2)) { 443 int64_t offset = ctx->ir_base[insn->op2].val.u64 - 1; 444 if (!aarch64_may_encode_imm12(offset)) { 445 constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 446 n++; 447 } 448 } 449 break; 450 case IR_SMOD_PWR2: 451 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; 452 insn = &ctx->ir_base[ref]; 453 n = 0; 454 if (IR_IS_CONST_REF(insn->op1)) { 455 constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 456 n++; 457 } 458 constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_USE_SUB_REF, IR_SAVE_SUB_REF); 459 n++; 460 break; 461 case IR_CTPOP: 462 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG; 463 insn = &ctx->ir_base[ref]; 464 constraints->tmp_regs[0] = IR_TMP_REG(2, IR_DOUBLE, IR_USE_SUB_REF, IR_SAVE_SUB_REF); 465 n = 1; 466 break; 467 case IR_BINOP_FP: 468 case IR_MIN_MAX_INT: 469 insn = &ctx->ir_base[ref]; 470 n = 0; 471 if (IR_IS_CONST_REF(insn->op1)) { 472 constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 473 n++; 474 } 475 if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { 476 constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 477 n++; 478 } 479 break; 480 case IR_CMP_INT: 481 insn = &ctx->ir_base[ref]; 482 n = 0; 483 if (IR_IS_CONST_REF(insn->op1)) { 484 constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 485 n++; 486 } else if (ir_rule(ctx, insn->op1) == IR_STATIC_ALLOCA) { 487 constraints->tmp_regs[0] = IR_TMP_REG(1, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 488 n = 1; 489 } 490 if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { 491 insn = &ctx->ir_base[insn->op2]; 492 if (IR_IS_SYM_CONST(insn->op) || !aarch64_may_encode_imm12(insn->val.u64)) { 493 constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 494 n++; 495 } 496 } else if (ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) { 497 constraints->tmp_regs[n] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 498 n++; 499 } 500 break; 501 case IR_CMP_FP: 502 insn = &ctx->ir_base[ref]; 503 n = 0; 504 if (IR_IS_CONST_REF(insn->op1)) { 505 const ir_insn *val_insn = &ctx->ir_base[insn->op1]; 506 constraints->tmp_regs[n] = IR_TMP_REG(1, val_insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 507 n++; 508 } 509 if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { 510 const ir_insn *val_insn = &ctx->ir_base[insn->op2]; 511 constraints->tmp_regs[n] = IR_TMP_REG(2, val_insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 512 n++; 513 } 514 break; 515 case IR_VSTORE: 516 insn = &ctx->ir_base[ref]; 517 if (IR_IS_CONST_REF(insn->op3)) { 518 insn = &ctx->ir_base[insn->op3]; 519 constraints->tmp_regs[0] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 520 n = 1; 521 } else if (ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA) { 522 constraints->tmp_regs[0] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 523 n = 1; 524 } 525 break; 526 case IR_LOAD_FP: 527 insn = &ctx->ir_base[ref]; 528 n = 0; 529 if (IR_IS_CONST_REF(insn->op2)) { 530 IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); 531 constraints->tmp_regs[n] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 532 n++; 533 } 534 break; 535 case IR_STORE_INT: 536 case IR_STORE_FP: 537 insn = &ctx->ir_base[ref]; 538 n = 0; 539 if (IR_IS_CONST_REF(insn->op2)) { 540 IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); 541 constraints->tmp_regs[n] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 542 n++; 543 } 544 if (IR_IS_CONST_REF(insn->op3)) { 545 insn = &ctx->ir_base[insn->op3]; 546 if (!IR_IS_TYPE_INT(insn->type) || IR_IS_SYM_CONST(insn->op) || insn->val.i64 != 0) { 547 constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 548 n++; 549 } 550 } else if (ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA) { 551 constraints->tmp_regs[n] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 552 n++; 553 } 554 break; 555 case IR_SWITCH: 556 insn = &ctx->ir_base[ref]; 557 n = 0; 558 if (IR_IS_CONST_REF(insn->op2)) { 559 insn = &ctx->ir_base[insn->op2]; 560 constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 561 n++; 562 } else { 563 insn = &ctx->ir_base[insn->op2]; 564 constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 565 n++; 566 } 567 constraints->tmp_regs[n] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 568 n++; 569 break; 570 case IR_CALL: 571 insn = &ctx->ir_base[ref]; 572 constraints->def_reg = (IR_IS_TYPE_INT(insn->type)) ? IR_REG_INT_RET1 : IR_REG_FP_RET1; 573 constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_SCRATCH, IR_USE_SUB_REF, IR_DEF_SUB_REF); 574 n = 1; 575 IR_FALLTHROUGH; 576 case IR_TAILCALL: 577 insn = &ctx->ir_base[ref]; 578 if (insn->inputs_count > 2) { 579 constraints->hints[2] = IR_REG_NONE; 580 constraints->hints_count = ir_get_args_regs(ctx, insn, constraints->hints); 581 if (!IR_IS_CONST_REF(insn->op2)) { 582 constraints->tmp_regs[n] = IR_TMP_REG(1, IR_ADDR, IR_LOAD_SUB_REF, IR_USE_SUB_REF); 583 n++; 584 } 585 } 586 flags = IR_USE_SHOULD_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_SHOULD_BE_IN_REG; 587 break; 588 case IR_COND: 589 insn = &ctx->ir_base[ref]; 590 n = 0; 591 if (IR_IS_CONST_REF(insn->op1)) { 592 constraints->tmp_regs[n] = IR_TMP_REG(1, ctx->ir_base[insn->op1].type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 593 n++; 594 } 595 if (IR_IS_CONST_REF(insn->op2)) { 596 constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 597 n++; 598 } 599 if (IR_IS_CONST_REF(insn->op3)) { 600 constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 601 n++; 602 } 603 break; 604 case IR_COPY_INT: 605 case IR_COPY_FP: 606 case IR_TRUNC: 607 case IR_BITCAST: 608 case IR_PROTO: 609 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; 610 break; 611 case IR_ZEXT: 612 case IR_SEXT: 613 flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG; 614 break; 615 case IR_PARAM: 616 constraints->def_reg = ir_get_param_reg(ctx, ref); 617 flags = 0; 618 break; 619 case IR_PI: 620 case IR_PHI: 621 flags = IR_USE_SHOULD_BE_IN_REG; 622 break; 623 case IR_RLOAD: 624 constraints->def_reg = ctx->ir_base[ref].op2; 625 flags = IR_USE_SHOULD_BE_IN_REG; 626 break; 627 case IR_EXITCALL: 628 constraints->def_reg = IR_REG_INT_RET1; 629 break; 630 case IR_RSTORE: 631 flags = IR_OP3_SHOULD_BE_IN_REG; 632 break; 633 case IR_RETURN_INT: 634 flags = IR_OP2_SHOULD_BE_IN_REG; 635 constraints->hints[2] = IR_REG_INT_RET1; 636 constraints->hints_count = 3; 637 break; 638 case IR_RETURN_FP: 639 flags = IR_OP2_SHOULD_BE_IN_REG; 640 constraints->hints[2] = IR_REG_FP_RET1; 641 constraints->hints_count = 3; 642 break; 643 case IR_SNAPSHOT: 644 flags = 0; 645 break; 646 case IR_VA_START: 647 flags = IR_OP2_MUST_BE_IN_REG; 648 constraints->tmp_regs[0] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 649 n = 1; 650 break; 651 case IR_VA_ARG: 652 flags = IR_USE_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG; 653 constraints->tmp_regs[0] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); 654 n = 1; 655 break; 656 case IR_VA_COPY: 657 flags = IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; 658 constraints->tmp_regs[0] = IR_TMP_REG(1, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 659 n = 1; 660 break; 661 } 662 constraints->tmps_count = n; 663 664 return flags; 665} 666 667/* instruction selection */ 668static void ir_match_fuse_addr(ir_ctx *ctx, ir_ref addr_ref, ir_type type) 669{ 670 if (!IR_IS_CONST_REF(addr_ref)) { 671 ir_insn *addr_insn = &ctx->ir_base[addr_ref]; 672 673 if (addr_insn->op == IR_ADD 674 && !IR_IS_CONST_REF(addr_insn->op1) 675 && IR_IS_CONST_REF(addr_insn->op2) // TODO: temporary workaround 676 && !IR_IS_SYM_CONST(ctx->ir_base[addr_insn->op2].op) 677 && aarch64_may_encode_addr_offset(ctx->ir_base[addr_insn->op2].val.i64, ir_type_size[type])) { 678 ir_use_list *use_list = &ctx->use_lists[addr_ref]; 679 ir_ref j = use_list->count; 680 681 if (j > 1) { 682 /* check if address is used only in LOAD and STORE */ 683 ir_ref *p = &ctx->use_edges[use_list->refs]; 684 685 do { 686 ir_insn *insn = &ctx->ir_base[*p]; 687 if (insn->op != IR_LOAD && (insn->op != IR_STORE || insn->op3 == addr_ref)) { 688 return; 689 } 690 p++; 691 } while (--j); 692 } 693 ctx->rules[addr_ref] = IR_FUSED | IR_SIMPLE | addr_insn->op; 694 } 695 } 696} 697 698static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref) 699{ 700 ir_insn *op2_insn; 701 ir_insn *insn = &ctx->ir_base[ref]; 702 703 switch (insn->op) { 704 case IR_EQ: 705 case IR_NE: 706 case IR_LT: 707 case IR_GE: 708 case IR_LE: 709 case IR_GT: 710 case IR_ULT: 711 case IR_UGE: 712 case IR_ULE: 713 case IR_UGT: 714 if (IR_IS_TYPE_INT(ctx->ir_base[insn->op1].type)) { 715 return IR_CMP_INT; 716 } else { 717 return IR_CMP_FP; 718 } 719 break; 720 case IR_ADD: 721 case IR_SUB: 722 if (IR_IS_TYPE_INT(insn->type)) { 723 if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { 724 op2_insn = &ctx->ir_base[insn->op2]; 725 if (IR_IS_SYM_CONST(op2_insn->op)) { 726 /* pass */ 727 } else if (IR_IS_CONST_REF(insn->op1)) { 728 // const 729 } else if (op2_insn->val.i64 == 0) { 730 // return IR_COPY_INT; 731 } 732 } 733binop_int: 734 return IR_BINOP_INT; 735 } else { 736binop_fp: 737 return IR_BINOP_FP; 738 } 739 break; 740 case IR_MUL: 741 if (IR_IS_TYPE_INT(insn->type)) { 742 if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { 743 op2_insn = &ctx->ir_base[insn->op2]; 744 if (IR_IS_SYM_CONST(op2_insn->op)) { 745 /* pass */ 746 } else if (IR_IS_CONST_REF(insn->op1)) { 747 // const 748 } else if (op2_insn->val.u64 == 0) { 749 // 0 750 } else if (op2_insn->val.u64 == 1) { 751 // return IR_COPY_INT; 752 } else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) { 753 return IR_MUL_PWR2; 754 } 755 } 756 return IR_BINOP_INT; 757 } else { 758 goto binop_fp; 759 } 760 break; 761 case IR_ADD_OV: 762 case IR_SUB_OV: 763 IR_ASSERT(IR_IS_TYPE_INT(insn->type)); 764 goto binop_int; 765 case IR_MUL_OV: 766 IR_ASSERT(IR_IS_TYPE_INT(insn->type)); 767 goto binop_int; 768 case IR_DIV: 769 if (IR_IS_TYPE_INT(insn->type)) { 770 if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { 771 op2_insn = &ctx->ir_base[insn->op2]; 772 if (IR_IS_SYM_CONST(op2_insn->op)) { 773 /* pass */ 774 } else if (IR_IS_CONST_REF(insn->op1)) { 775 // const 776 } else if (op2_insn->val.u64 == 1) { 777 // return IR_COPY_INT; 778 } else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) { 779 if (IR_IS_TYPE_UNSIGNED(insn->type)) { 780 return IR_DIV_PWR2; 781 } else { 782 return IR_SDIV_PWR2; 783 } 784 } 785 } 786 return IR_BINOP_INT; 787 } else { 788 goto binop_fp; 789 } 790 break; 791 case IR_MOD: 792 if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { 793 op2_insn = &ctx->ir_base[insn->op2]; 794 if (IR_IS_SYM_CONST(op2_insn->op)) { 795 /* pass */ 796 } else if (IR_IS_CONST_REF(insn->op1)) { 797 // const 798 } else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) { 799 if (IR_IS_TYPE_UNSIGNED(insn->type)) { 800 return IR_MOD_PWR2; 801 } else { 802 return IR_SMOD_PWR2; 803 } 804 } 805 } 806 return IR_BINOP_INT; 807 case IR_BSWAP: 808 case IR_NOT: 809 case IR_CTLZ: 810 case IR_CTTZ: 811 IR_ASSERT(IR_IS_TYPE_INT(insn->type)); 812 return IR_OP_INT; 813 case IR_NEG: 814 case IR_ABS: 815 if (IR_IS_TYPE_INT(insn->type)) { 816 return IR_OP_INT; 817 } else { 818 return IR_OP_FP; 819 } 820 case IR_OR: 821 if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { 822 op2_insn = &ctx->ir_base[insn->op2]; 823 if (IR_IS_SYM_CONST(op2_insn->op)) { 824 /* pass */ 825 } else if (IR_IS_CONST_REF(insn->op1)) { 826 // const 827 } else if (op2_insn->val.i64 == 0) { 828 // return IR_COPY_INT; 829 } else if (op2_insn->val.i64 == -1) { 830 // -1 831 } 832 } 833 goto binop_int; 834 case IR_AND: 835 if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { 836 op2_insn = &ctx->ir_base[insn->op2]; 837 if (IR_IS_SYM_CONST(op2_insn->op)) { 838 /* pass */ 839 } else if (IR_IS_CONST_REF(insn->op1)) { 840 // const 841 } else if (op2_insn->val.i64 == 0) { 842 // 0 843 } else if (op2_insn->val.i64 == -1) { 844 // return IR_COPY_INT; 845 } 846 } 847 goto binop_int; 848 case IR_XOR: 849 if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { 850 op2_insn = &ctx->ir_base[insn->op2]; 851 if (IR_IS_SYM_CONST(op2_insn->op)) { 852 /* pass */ 853 } else if (IR_IS_CONST_REF(insn->op1)) { 854 // const 855 } 856 } 857 goto binop_int; 858 case IR_SHL: 859 if (IR_IS_CONST_REF(insn->op2)) { 860 if (ctx->flags & IR_OPT_CODEGEN) { 861 op2_insn = &ctx->ir_base[insn->op2]; 862 if (IR_IS_SYM_CONST(op2_insn->op)) { 863 /* pass */ 864 } else if (IR_IS_CONST_REF(insn->op1)) { 865 // const 866 } else if (op2_insn->val.u64 == 0) { 867 // return IR_COPY_INT; 868 } else if (ir_type_size[insn->type] >= 4) { 869 if (op2_insn->val.u64 == 1) { 870 // lea [op1*2] 871 } else if (op2_insn->val.u64 == 2) { 872 // lea [op1*4] 873 } else if (op2_insn->val.u64 == 3) { 874 // lea [op1*8] 875 } 876 } 877 } 878 return IR_SHIFT_CONST; 879 } 880 return IR_SHIFT; 881 case IR_SHR: 882 case IR_SAR: 883 case IR_ROL: 884 case IR_ROR: 885 if (IR_IS_CONST_REF(insn->op2)) { 886 if (ctx->flags & IR_OPT_CODEGEN) { 887 op2_insn = &ctx->ir_base[insn->op2]; 888 if (IR_IS_SYM_CONST(op2_insn->op)) { 889 /* pass */ 890 } else if (IR_IS_CONST_REF(insn->op1)) { 891 // const 892 } else if (op2_insn->val.u64 == 0) { 893 // return IR_COPY_INT; 894 } 895 } 896 return IR_SHIFT_CONST; 897 } 898 return IR_SHIFT; 899 case IR_MIN: 900 case IR_MAX: 901 if (IR_IS_TYPE_INT(insn->type)) { 902 return IR_MIN_MAX_INT; 903 } else { 904 goto binop_fp; 905 } 906 break; 907// case IR_COND: 908 case IR_COPY: 909 if (IR_IS_TYPE_INT(insn->type)) { 910 return IR_COPY_INT | IR_MAY_REUSE; 911 } else { 912 return IR_COPY_FP | IR_MAY_REUSE; 913 } 914 break; 915// case IR_TRUNC: 916 case IR_PROTO: 917 return insn->op | IR_MAY_REUSE; 918 case IR_BITCAST: 919 if (IR_IS_TYPE_INT(insn->type) && IR_IS_TYPE_INT(ctx->ir_base[insn->op1].type)) { 920 return insn->op | IR_MAY_REUSE; 921 } else { 922 return insn->op; 923 } 924 case IR_CALL: 925 if (ctx->flags & IR_FUNCTION) { 926 ctx->flags |= IR_USE_FRAME_POINTER; 927 } 928 ctx->flags2 |= IR_HAS_CALLS | IR_16B_FRAME_ALIGNMENT; 929 return IR_CALL; 930 case IR_VAR: 931 return IR_SKIPPED | IR_VAR; 932 case IR_PARAM: 933 return ctx->use_lists[ref].count > 0 ? IR_PARAM : IR_SKIPPED | IR_PARAM; 934 case IR_ALLOCA: 935 if (ctx->flags & IR_FUNCTION) { 936 if (IR_IS_CONST_REF(insn->op2) && ctx->cfg_map[ref] == 1) { 937 ir_insn *val = &ctx->ir_base[insn->op2]; 938 939 if (!IR_IS_SYM_CONST(val->op)) { 940 return IR_STATIC_ALLOCA; 941 } 942 } 943 ctx->flags |= IR_USE_FRAME_POINTER; 944 ctx->flags2 |= IR_HAS_ALLOCA | IR_16B_FRAME_ALIGNMENT; 945 } 946 return IR_ALLOCA; 947 case IR_LOAD: 948 ir_match_fuse_addr(ctx, insn->op2, insn->type); 949 if (IR_IS_TYPE_INT(insn->type)) { 950 return IR_LOAD_INT; 951 } else { 952 return IR_LOAD_FP; 953 } 954 break; 955 case IR_STORE: 956 ir_match_fuse_addr(ctx, insn->op2, ctx->ir_base[insn->op3].type); 957 if (IR_IS_TYPE_INT(ctx->ir_base[insn->op3].type)) { 958 return IR_STORE_INT; 959 } else { 960 return IR_STORE_FP; 961 } 962 break; 963 case IR_RLOAD: 964 if (IR_REGSET_IN(IR_REGSET_UNION((ir_regset)ctx->fixed_regset, IR_REGSET_FIXED), insn->op2)) { 965 return IR_SKIPPED | IR_RLOAD; 966 } 967 return IR_RLOAD; 968 case IR_RSTORE: 969 if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { 970 if ((ctx->flags & IR_OPT_CODEGEN) && ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) { 971 ir_insn *op_insn = &ctx->ir_base[insn->op2]; 972 973 if (!ctx->rules[insn->op2]) { 974 ctx->rules[insn->op2] = ir_match_insn(ctx, insn->op2); 975 } 976 if (ctx->rules[insn->op2] == IR_BINOP_INT) { 977 if (ctx->ir_base[op_insn->op1].op == IR_RLOAD 978 && ctx->ir_base[op_insn->op1].op2 == insn->op3) { 979 ctx->rules[insn->op2] = IR_FUSED | IR_BINOP_INT; 980 ctx->rules[op_insn->op1] = IR_SKIPPED | IR_RLOAD; 981 return IR_REG_BINOP_INT; 982 } else if ((ir_op_flags[op_insn->op] & IR_OP_FLAG_COMMUTATIVE) 983 && ctx->ir_base[op_insn->op2].op == IR_RLOAD 984 && ctx->ir_base[op_insn->op2].op2 == insn->op3) { 985 SWAP_REFS(op_insn->op1, op_insn->op2); 986 ctx->rules[insn->op2] = IR_FUSED | IR_BINOP_INT; 987 ctx->rules[op_insn->op1] = IR_SKIPPED | IR_RLOAD; 988 return IR_REG_BINOP_INT; 989 } 990 } 991 } 992 } 993 return IR_RSTORE; 994 case IR_START: 995 case IR_BEGIN: 996 case IR_IF_TRUE: 997 case IR_IF_FALSE: 998 case IR_CASE_VAL: 999 case IR_CASE_DEFAULT: 1000 case IR_MERGE: 1001 case IR_LOOP_BEGIN: 1002 case IR_UNREACHABLE: 1003 return IR_SKIPPED | insn->op; 1004 case IR_RETURN: 1005 if (!insn->op2) { 1006 return IR_RETURN_VOID; 1007 } else if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { 1008 return IR_RETURN_INT; 1009 } else { 1010 return IR_RETURN_FP; 1011 } 1012 case IR_IF: 1013 if (ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) { 1014 op2_insn = &ctx->ir_base[insn->op2]; 1015 if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT) { 1016 if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) { 1017 ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT; 1018 return IR_CMP_AND_BRANCH_INT; 1019 } else { 1020 ctx->rules[insn->op2] = IR_FUSED | IR_CMP_FP; 1021 return IR_CMP_AND_BRANCH_FP; 1022 } 1023 } else if (op2_insn->op == IR_OVERFLOW) { 1024 ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_OVERFLOW; 1025 return IR_OVERFLOW_AND_BRANCH; 1026 } 1027 } 1028 if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { 1029 return IR_IF_INT; 1030 } else { 1031 IR_ASSERT(0 && "NIY IR_IF_FP"); 1032 break; 1033 } 1034 case IR_GUARD: 1035 case IR_GUARD_NOT: 1036 if (ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) { 1037 op2_insn = &ctx->ir_base[insn->op2]; 1038 if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT 1039 // TODO: register allocator may clobber operands of CMP before they are used in the GUARD_CMP 1040 && (insn->op2 == ref - 1 || 1041 (insn->op2 == ctx->prev_ref[ref] - 1 1042 && ctx->ir_base[ctx->prev_ref[ref]].op == IR_SNAPSHOT))) { 1043 if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) { 1044 ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT; 1045 return IR_GUARD_CMP_INT; 1046 } else { 1047 ctx->rules[insn->op2] = IR_FUSED | IR_CMP_FP; 1048 return IR_GUARD_CMP_FP; 1049 } 1050 } else if (op2_insn->op == IR_OVERFLOW) { 1051 ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_OVERFLOW; 1052 return IR_GUARD_OVERFLOW; 1053 } 1054 } 1055 return insn->op; 1056 case IR_VA_START: 1057 ctx->flags2 |= IR_HAS_VA_START; 1058 if ((ctx->ir_base[insn->op2].op == IR_ALLOCA) || (ctx->ir_base[insn->op2].op == IR_VADDR)) { 1059 ir_use_list *use_list = &ctx->use_lists[insn->op2]; 1060 ir_ref *p, n = use_list->count; 1061 for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { 1062 ir_insn *use_insn = &ctx->ir_base[*p]; 1063 if (use_insn->op == IR_VA_START || use_insn->op == IR_VA_END) { 1064 } else if (use_insn->op == IR_VA_COPY) { 1065 if (use_insn->op3 == insn->op2) { 1066 ctx->flags2 |= IR_HAS_VA_COPY; 1067 } 1068 } else if (use_insn->op == IR_VA_ARG) { 1069 if (use_insn->op2 == insn->op2) { 1070 if (IR_IS_TYPE_INT(use_insn->type)) { 1071 ctx->flags2 |= IR_HAS_VA_ARG_GP; 1072 } else { 1073 IR_ASSERT(IR_IS_TYPE_FP(use_insn->type)); 1074 ctx->flags2 |= IR_HAS_VA_ARG_FP; 1075 } 1076 } 1077 } else if (*p > ref) { 1078 /* diriect va_list access */ 1079 ctx->flags2 |= IR_HAS_VA_ARG_GP|IR_HAS_VA_ARG_FP; 1080 } 1081 } 1082 } 1083 return IR_VA_START; 1084 case IR_VA_END: 1085 return IR_SKIPPED | IR_NOP; 1086 case IR_VADDR: 1087 if (ctx->use_lists[ref].count > 0) { 1088 ir_use_list *use_list = &ctx->use_lists[ref]; 1089 ir_ref *p, n = use_list->count; 1090 1091 for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { 1092 if (ctx->ir_base[*p].op != IR_VA_END) { 1093 return IR_STATIC_ALLOCA; 1094 } 1095 } 1096 } 1097 return IR_SKIPPED | IR_NOP; 1098 default: 1099 break; 1100 } 1101 1102 return insn->op; 1103} 1104 1105static void ir_match_insn2(ir_ctx *ctx, ir_ref ref, uint32_t rule) 1106{ 1107} 1108 1109/* code generation */ 1110static int32_t ir_ref_spill_slot_offset(ir_ctx *ctx, ir_ref ref, ir_reg *reg) 1111{ 1112 int32_t offset; 1113 1114 IR_ASSERT(ref >= 0); 1115 offset = ctx->live_intervals[ctx->vregs[ref]]->stack_spill_pos; 1116 IR_ASSERT(offset != -1); 1117 if (ctx->live_intervals[ctx->vregs[ref]]->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) { 1118 IR_ASSERT(ctx->spill_base != IR_REG_NONE); 1119 *reg = ctx->spill_base; 1120 return offset; 1121 } 1122 *reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 1123 return IR_SPILL_POS_TO_OFFSET(offset); 1124} 1125 1126static ir_mem ir_vreg_spill_slot(ir_ctx *ctx, ir_ref v) 1127{ 1128 int32_t offset; 1129 ir_reg base; 1130 1131 IR_ASSERT(v > 0 && v <= ctx->vregs_count && ctx->live_intervals[v]); 1132 offset = ctx->live_intervals[v]->stack_spill_pos; 1133 IR_ASSERT(offset != -1); 1134 if (ctx->live_intervals[v]->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) { 1135 IR_ASSERT(ctx->spill_base != IR_REG_NONE); 1136 return IR_MEM_BO(ctx->spill_base, offset); 1137 } 1138 base = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 1139 offset = IR_SPILL_POS_TO_OFFSET(offset); 1140 return IR_MEM_BO(base, offset); 1141} 1142 1143static ir_mem ir_ref_spill_slot(ir_ctx *ctx, ir_ref ref) 1144{ 1145 IR_ASSERT(!IR_IS_CONST_REF(ref)); 1146 return ir_vreg_spill_slot(ctx, ctx->vregs[ref]); 1147} 1148 1149static bool ir_is_same_spill_slot(ir_ctx *ctx, ir_ref ref, ir_mem mem) 1150{ 1151 return IR_MEM_VAL(ir_ref_spill_slot(ctx, ref)) == IR_MEM_VAL(mem); 1152} 1153 1154static int32_t ir_var_spill_slot(ir_ctx *ctx, ir_ref ref, ir_reg *reg) 1155{ 1156 ir_insn *var_insn = &ctx->ir_base[ref]; 1157 1158 IR_ASSERT(var_insn->op == IR_VAR); 1159 *reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 1160 return IR_SPILL_POS_TO_OFFSET(var_insn->op3); 1161} 1162 1163static bool ir_may_avoid_spill_load(ir_ctx *ctx, ir_ref ref, ir_ref use) 1164{ 1165 ir_live_interval *ival; 1166 1167 IR_ASSERT(ctx->vregs[ref] && ctx->live_intervals[ctx->vregs[ref]]); 1168 ival = ctx->live_intervals[ctx->vregs[ref]]; 1169 while (ival) { 1170 ir_use_pos *use_pos = ival->use_pos; 1171 while (use_pos) { 1172 if (IR_LIVE_POS_TO_REF(use_pos->pos) == use) { 1173 return !use_pos->next || use_pos->next->op_num == 0; 1174 } 1175 use_pos = use_pos->next; 1176 } 1177 ival = ival->next; 1178 } 1179 return 0; 1180} 1181 1182static void ir_emit_load_imm_int(ir_ctx *ctx, ir_type type, ir_reg reg, int64_t val) 1183{ 1184 ir_backend_data *data = ctx->data; 1185 dasm_State **Dst = &data->dasm_state; 1186 1187 IR_ASSERT(IR_IS_TYPE_INT(type)); 1188 if (ir_type_size[type] == 8) { 1189 if (val == 0) { 1190 if (reg != IR_REG_ZR) { 1191 | mov Rx(reg), xzr 1192 } 1193 } else if (((uint64_t)(val)) <= 0xffff) { 1194 | movz Rx(reg), #((uint64_t)(val)) 1195 } else if (~((uint64_t)(val)) <= 0xffff) { 1196 | movn Rx(reg), #(~((uint64_t)(val))) 1197 } else if ((uint64_t)(val) & 0xffff) { 1198 | movz Rx(reg), #((uint64_t)(val) & 0xffff) 1199 if (((uint64_t)(val) >> 16) & 0xffff) { 1200 | movk Rx(reg), #(((uint64_t)(val) >> 16) & 0xffff), lsl #16 1201 } 1202 if (((uint64_t)(val) >> 32) & 0xffff) { 1203 | movk Rx(reg), #(((uint64_t)(val) >> 32) & 0xffff), lsl #32 1204 } 1205 if ((((uint64_t)(val) >> 48) & 0xffff)) { 1206 | movk Rx(reg), #(((uint64_t)(val) >> 48) & 0xffff), lsl #48 1207 } 1208 } else if (((uint64_t)(val) >> 16) & 0xffff) { 1209 | movz Rx(reg), #(((uint64_t)(val) >> 16) & 0xffff), lsl #16 1210 if (((uint64_t)(val) >> 32) & 0xffff) { 1211 | movk Rx(reg), #(((uint64_t)(val) >> 32) & 0xffff), lsl #32 1212 } 1213 if ((((uint64_t)(val) >> 48) & 0xffff)) { 1214 | movk Rx(reg), #(((uint64_t)(val) >> 48) & 0xffff), lsl #48 1215 } 1216 } else if (((uint64_t)(val) >> 32) & 0xffff) { 1217 | movz Rx(reg), #(((uint64_t)(val) >> 32) & 0xffff), lsl #32 1218 if ((((uint64_t)(val) >> 48) & 0xffff)) { 1219 | movk Rx(reg), #(((uint64_t)(val) >> 48) & 0xffff), lsl #48 1220 } 1221 } else { 1222 | movz Rx(reg), #(((uint64_t)(val) >> 48) & 0xffff), lsl #48 1223 } 1224 } else { 1225 if (val == 0) { 1226 if (reg != IR_REG_ZR) { 1227 | mov Rw(reg), wzr 1228 } 1229 } else if (((uint64_t)(val)) <= 0xffff) { 1230 | movz Rw(reg), #((uint64_t)(val)) 1231 } else if (~((uint64_t)(val)) <= 0xffff) { 1232 | movn Rw(reg), #(~((uint64_t)(val))) 1233 } else if ((uint64_t)(val) & 0xffff) { 1234 | movz Rw(reg), #((uint64_t)(val) & 0xffff) 1235 if (((uint64_t)(val) >> 16) & 0xffff) { 1236 | movk Rw(reg), #(((uint64_t)(val) >> 16) & 0xffff), lsl #16 1237 } 1238 } else if (((uint64_t)(val) >> 16) & 0xffff) { 1239 | movz Rw(reg), #(((uint64_t)(val) >> 16) & 0xffff), lsl #16 1240 } 1241 } 1242} 1243 1244static void ir_emit_load_mem_int(ir_ctx *ctx, ir_type type, ir_reg reg, ir_mem mem) 1245{ 1246 ir_backend_data *data = ctx->data; 1247 dasm_State **Dst = &data->dasm_state; 1248 ir_reg base_reg = IR_MEM_BASE(mem); 1249 ir_reg index_reg = IR_MEM_INDEX(mem); 1250 int32_t offset = IR_MEM_OFFSET(mem); 1251 1252 if (index_reg == IR_REG_NONE) { 1253 if (aarch64_may_encode_addr_offset(offset, ir_type_size[type])) { 1254 switch (ir_type_size[type]) { 1255 default: 1256 IR_ASSERT(0); 1257 case 8: 1258 | ldr Rx(reg), [Rx(base_reg), #offset] 1259 break; 1260 case 4: 1261 | ldr Rw(reg), [Rx(base_reg), #offset] 1262 break; 1263 case 2: 1264 if (IR_IS_TYPE_SIGNED(type)) { 1265 | ldrsh Rw(reg), [Rx(base_reg), #offset] 1266 } else { 1267 | ldrh Rw(reg), [Rx(base_reg), #offset] 1268 } 1269 break; 1270 case 1: 1271 if (IR_IS_TYPE_SIGNED(type)) { 1272 | ldrsb Rw(reg), [Rx(base_reg), #offset] 1273 } else { 1274 | ldrb Rw(reg), [Rx(base_reg), #offset] 1275 } 1276 break; 1277 } 1278 return; 1279 } else { 1280 index_reg = IR_REG_INT_TMP; /* reserved temporary register */ 1281 1282 ir_emit_load_imm_int(ctx, IR_ADDR, index_reg, offset); 1283 } 1284 } else { 1285 IR_ASSERT(offset == 0); 1286 } 1287 1288 switch (ir_type_size[type]) { 1289 default: 1290 IR_ASSERT(0); 1291 case 8: 1292 | ldr Rx(reg), [Rx(base_reg), Rx(index_reg)] 1293 break; 1294 case 4: 1295 | ldr Rw(reg), [Rx(base_reg), Rx(index_reg)] 1296 break; 1297 case 2: 1298 if (IR_IS_TYPE_SIGNED(type)) { 1299 | ldrsh Rw(reg), [Rx(base_reg), Rx(index_reg)] 1300 } else { 1301 | ldrh Rw(reg), [Rx(base_reg), Rx(index_reg)] 1302 } 1303 break; 1304 case 1: 1305 if (IR_IS_TYPE_SIGNED(type)) { 1306 | ldrsb Rw(reg), [Rx(base_reg), Rx(index_reg)] 1307 } else { 1308 | ldrb Rw(reg), [Rx(base_reg), Rx(index_reg)] 1309 } 1310 break; 1311 } 1312} 1313 1314static void ir_emit_load_imm_fp(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src) 1315{ 1316 ir_backend_data *data = ctx->data; 1317 dasm_State **Dst = &data->dasm_state; 1318 ir_insn *insn = &ctx->ir_base[src]; 1319 int label; 1320 1321 if (type == IR_FLOAT && insn->val.u32 == 0) { 1322 | fmov Rs(reg-IR_REG_FP_FIRST), wzr 1323 } else if (type == IR_DOUBLE && insn->val.u64 == 0) { 1324 | fmov Rd(reg-IR_REG_FP_FIRST), xzr 1325 } else { 1326 label = ir_const_label(ctx, src); 1327 if (type == IR_DOUBLE) { 1328 | ldr Rd(reg-IR_REG_FP_FIRST), =>label 1329 } else { 1330 IR_ASSERT(type == IR_FLOAT); 1331 | ldr Rs(reg-IR_REG_FP_FIRST), =>label 1332 } 1333 } 1334} 1335 1336static void ir_emit_load_mem_fp(ir_ctx *ctx, ir_type type, ir_reg reg, ir_mem mem) 1337{ 1338 ir_backend_data *data = ctx->data; 1339 dasm_State **Dst = &data->dasm_state; 1340 ir_reg base_reg = IR_MEM_BASE(mem); 1341 ir_ref index_reg = IR_MEM_INDEX(mem); 1342 int32_t offset = IR_MEM_OFFSET(mem); 1343 1344 if (index_reg == IR_REG_NONE) { 1345 if (aarch64_may_encode_addr_offset(offset, ir_type_size[type])) { 1346 if (type == IR_DOUBLE) { 1347 | ldr Rd(reg-IR_REG_FP_FIRST), [Rx(base_reg), #offset] 1348 } else { 1349 IR_ASSERT(type == IR_FLOAT); 1350 | ldr Rs(reg-IR_REG_FP_FIRST), [Rx(base_reg), #offset] 1351 } 1352 } else { 1353 index_reg = IR_REG_INT_TMP; /* reserved temporary register */ 1354 1355 ir_emit_load_imm_int(ctx, IR_ADDR, index_reg, offset); 1356 } 1357 return; 1358 } else { 1359 IR_ASSERT(offset == 0); 1360 } 1361 1362 if (type == IR_DOUBLE) { 1363 | ldr Rd(reg-IR_REG_FP_FIRST), [Rx(base_reg), Rx(index_reg)] 1364 } else { 1365 IR_ASSERT(type == IR_FLOAT); 1366 | ldr Rs(reg-IR_REG_FP_FIRST), [Rx(base_reg), Rx(index_reg)] 1367 } 1368} 1369 1370static void ir_emit_load_mem(ir_ctx *ctx, ir_type type, ir_reg reg, ir_mem mem) 1371{ 1372 if (IR_IS_TYPE_INT(type)) { 1373 ir_emit_load_mem_int(ctx, type, reg, mem); 1374 } else { 1375 ir_emit_load_mem_fp(ctx, type, reg, mem); 1376 } 1377} 1378 1379static void ir_load_local_addr(ir_ctx *ctx, ir_reg reg, ir_ref src) 1380{ 1381 ir_backend_data *data = ctx->data; 1382 dasm_State **Dst = &data->dasm_state; 1383 ir_reg base = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 1384 int32_t offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[src].op3); 1385 1386 IR_ASSERT(ir_rule(ctx, src) == IR_STATIC_ALLOCA); 1387 if (aarch64_may_encode_imm12(offset)) { 1388 | add Rx(reg), Rx(base), #offset 1389 } else { 1390 ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, offset); 1391 | add sp, sp, Rx(IR_REG_INT_TMP) 1392 } 1393} 1394 1395 1396static void ir_emit_load(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src) 1397{ 1398 if (IR_IS_CONST_REF(src)) { 1399 if (IR_IS_TYPE_INT(type)) { 1400 ir_insn *insn = &ctx->ir_base[src]; 1401 1402 if (insn->op == IR_SYM || insn->op == IR_FUNC) { 1403 void *addr = ir_sym_val(ctx, insn); 1404 IR_ASSERT(addr); 1405 ir_emit_load_imm_int(ctx, type, reg, (intptr_t)addr); 1406 } else if (insn->op == IR_STR) { 1407 ir_backend_data *data = ctx->data; 1408 dasm_State **Dst = &data->dasm_state; 1409 int label = ir_const_label(ctx, src); 1410 1411 | adr Rx(reg), =>label 1412 } else { 1413 ir_emit_load_imm_int(ctx, type, reg, insn->val.i64); 1414 } 1415 } else { 1416 ir_emit_load_imm_fp(ctx, type, reg, src); 1417 } 1418 } else if (ctx->vregs[src]) { 1419 ir_mem mem = ir_ref_spill_slot(ctx, src); 1420 ir_emit_load_mem(ctx, type, reg, mem); 1421 } else { 1422 ir_load_local_addr(ctx, reg, src); 1423 } 1424} 1425 1426static void ir_emit_store_mem_int(ir_ctx *ctx, ir_type type, ir_mem mem, ir_reg reg) 1427{ 1428 ir_backend_data *data = ctx->data; 1429 dasm_State **Dst = &data->dasm_state; 1430 ir_reg base_reg = IR_MEM_BASE(mem); 1431 ir_reg index_reg = IR_MEM_INDEX(mem); 1432 int32_t offset = IR_MEM_OFFSET(mem); 1433 1434 if (index_reg == IR_REG_NONE) { 1435 if (aarch64_may_encode_addr_offset(offset, ir_type_size[type])) { 1436 switch (ir_type_size[type]) { 1437 default: 1438 IR_ASSERT(0); 1439 case 8: 1440 | str Rx(reg), [Rx(base_reg), #offset] 1441 break; 1442 case 4: 1443 | str Rw(reg), [Rx(base_reg), #offset] 1444 break; 1445 case 2: 1446 | strh Rw(reg), [Rx(base_reg), #offset] 1447 break; 1448 case 1: 1449 | strb Rw(reg), [Rx(base_reg), #offset] 1450 break; 1451 } 1452 return; 1453 } else { 1454 index_reg = IR_REG_INT_TMP; /* reserved temporary register */ 1455 1456 ir_emit_load_imm_int(ctx, IR_ADDR, index_reg, offset); 1457 } 1458 } else { 1459 IR_ASSERT(offset == 0); 1460 } 1461 1462 switch (ir_type_size[type]) { 1463 default: 1464 IR_ASSERT(0); 1465 case 8: 1466 | str Rx(reg), [Rx(base_reg), Rx(index_reg)] 1467 break; 1468 case 4: 1469 | str Rw(reg), [Rx(base_reg), Rx(index_reg)] 1470 break; 1471 case 2: 1472 | strh Rw(reg), [Rx(base_reg), Rx(index_reg)] 1473 break; 1474 case 1: 1475 | strb Rw(reg), [Rx(base_reg), Rx(index_reg)] 1476 break; 1477 } 1478} 1479 1480static void ir_emit_store_mem_fp(ir_ctx *ctx, ir_type type, ir_mem mem, ir_reg reg) 1481{ 1482 ir_backend_data *data = ctx->data; 1483 dasm_State **Dst = &data->dasm_state; 1484 ir_reg base_reg = IR_MEM_BASE(mem); 1485 ir_reg index_reg = IR_MEM_INDEX(mem); 1486 int32_t offset = IR_MEM_OFFSET(mem); 1487 1488 if (index_reg == IR_REG_NONE) { 1489 if (aarch64_may_encode_addr_offset(offset, ir_type_size[type])) { 1490 if (type == IR_DOUBLE) { 1491 | str Rd(reg-IR_REG_FP_FIRST), [Rx(base_reg), #offset] 1492 } else { 1493 IR_ASSERT(type == IR_FLOAT); 1494 | str Rs(reg-IR_REG_FP_FIRST), [Rx(base_reg), #offset] 1495 } 1496 } else { 1497 index_reg = IR_REG_INT_TMP; /* reserved temporary register */ 1498 1499 ir_emit_load_imm_int(ctx, IR_ADDR, index_reg, offset); 1500 } 1501 return; 1502 } else { 1503 IR_ASSERT(offset == 0); 1504 } 1505 1506 if (type == IR_DOUBLE) { 1507 | str Rd(reg-IR_REG_FP_FIRST), [Rx(base_reg), Rx(index_reg)] 1508 } else { 1509 IR_ASSERT(type == IR_FLOAT); 1510 | str Rs(reg-IR_REG_FP_FIRST), [Rx(base_reg), Rx(index_reg)] 1511 } 1512} 1513 1514static void ir_emit_store_mem(ir_ctx *ctx, ir_type type, ir_mem mem, ir_reg reg) 1515{ 1516 if (IR_IS_TYPE_INT(type)) { 1517 ir_emit_store_mem_int(ctx, type, mem, reg); 1518 } else { 1519 ir_emit_store_mem_fp(ctx, type, mem, reg); 1520 } 1521} 1522 1523static void ir_emit_store(ir_ctx *ctx, ir_type type, ir_ref dst, ir_reg reg) 1524{ 1525 IR_ASSERT(dst >= 0); 1526 ir_emit_store_mem(ctx, type, ir_ref_spill_slot(ctx, dst), reg); 1527} 1528 1529static void ir_emit_mov(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src) 1530{ 1531 ir_backend_data *data = ctx->data; 1532 dasm_State **Dst = &data->dasm_state; 1533 1534 if (ir_type_size[type] == 8) { 1535 if (dst == IR_REG_STACK_POINTER) { 1536 | mov sp, Rx(src) 1537 } else if (src == IR_REG_STACK_POINTER) { 1538 | mov Rx(dst), sp 1539 } else { 1540 | mov Rx(dst), Rx(src) 1541 } 1542 } else { 1543 | mov Rw(dst), Rw(src) 1544 } 1545} 1546 1547static void ir_emit_mov_ext(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src) 1548{ 1549 ir_backend_data *data = ctx->data; 1550 dasm_State **Dst = &data->dasm_state; 1551 1552 if (ir_type_size[type] == 8) { 1553 | mov Rx(dst), Rx(src) 1554 } else { 1555 | mov Rw(dst), Rw(src) 1556 } 1557} 1558static void ir_emit_fp_mov(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src) 1559{ 1560 ir_backend_data *data = ctx->data; 1561 dasm_State **Dst = &data->dasm_state; 1562 1563 if (ir_type_size[type] == 8) { 1564 | fmov Rd(dst-IR_REG_FP_FIRST), Rd(src-IR_REG_FP_FIRST) 1565 } else { 1566 | fmov Rs(dst-IR_REG_FP_FIRST), Rs(src-IR_REG_FP_FIRST) 1567 } 1568} 1569 1570static void ir_emit_prologue(ir_ctx *ctx) 1571{ 1572 ir_backend_data *data = ctx->data; 1573 dasm_State **Dst = &data->dasm_state; 1574 int offset; 1575 1576 if (ctx->flags & IR_USE_FRAME_POINTER) { 1577 offset = -(ctx->stack_frame_size+16); 1578 if (aarch64_may_encode_imm7_addr_offset(offset, 8)) { 1579 | stp x29, x30, [sp, #offset]! 1580 } else { 1581 | sub sp, sp, #(ctx->stack_frame_size+16) 1582 | stp x29, x30, [sp] 1583 } 1584 | mov x29, sp 1585 if (ctx->call_stack_size) { 1586 | sub sp, sp, #(ctx->call_stack_size) 1587 } 1588 } else if (ctx->stack_frame_size + ctx->call_stack_size) { 1589 if (ctx->fixed_stack_red_zone) { 1590 IR_ASSERT(ctx->stack_frame_size + ctx->call_stack_size <= ctx->fixed_stack_red_zone); 1591 } else { 1592 | sub sp, sp, #(ctx->stack_frame_size + ctx->call_stack_size) 1593 } 1594 } 1595 if (ctx->used_preserved_regs) { 1596 ir_reg fp; 1597 uint32_t i; 1598 ir_reg prev = IR_REG_NONE; 1599 ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs; 1600 1601 if (ctx->flags & IR_USE_FRAME_POINTER) { 1602 fp = IR_REG_FRAME_POINTER; 1603 offset = ctx->stack_frame_size + sizeof(void*) * 2; 1604 } else { 1605 fp = IR_REG_STACK_POINTER; 1606 offset = ctx->stack_frame_size + ctx->call_stack_size; 1607 } 1608 for (i = 0; i < IR_REG_NUM; i++) { 1609 if (IR_REGSET_IN(used_preserved_regs, i)) { 1610 if (prev == IR_REG_NONE) { 1611 prev = i; 1612 } else if (i < IR_REG_FP_FIRST) { 1613 offset -= sizeof(void*) * 2; 1614 if (aarch64_may_encode_imm7_addr_offset(offset, 8)) { 1615 | stp Rx(prev), Rx(i), [Rx(fp), #offset] 1616 } else { 1617 IR_ASSERT(aarch64_may_encode_addr_offset(offset, 8)); 1618 | str Rx(prev), [Rx(fp), #offset] 1619 | str Rx(i), [Rx(fp), #(offset+8)] 1620 } 1621 prev = IR_REG_NONE; 1622 } else { 1623 if (prev < IR_REG_FP_FIRST) { 1624 offset -= sizeof(void*); 1625 | str Rx(prev), [Rx(fp), #offset] 1626 offset -= sizeof(void*); 1627 | str Rd(i-IR_REG_FP_FIRST), [Rx(fp), #offset] 1628 } else { 1629 offset -= sizeof(void*) * 2; 1630 if (aarch64_may_encode_imm7_addr_offset(offset, 8)) { 1631 | stp Rd(prev-IR_REG_FP_FIRST), Rd(i-IR_REG_FP_FIRST), [Rx(fp), #offset] 1632 } else { 1633 IR_ASSERT(aarch64_may_encode_addr_offset(offset, 8)); 1634 | str Rd(prev-IR_REG_FP_FIRST), [Rx(fp), #offset] 1635 | str Rd(i-IR_REG_FP_FIRST), [Rx(fp), #(offset+8)] 1636 } 1637 } 1638 prev = IR_REG_NONE; 1639 } 1640 } 1641 } 1642 if (prev != IR_REG_NONE) { 1643 if (prev < IR_REG_FP_FIRST) { 1644 offset -= sizeof(void*); 1645 | str Rx(prev), [Rx(fp), #offset] 1646 } else { 1647 offset -= sizeof(void*); 1648 | str Rd(prev-IR_REG_FP_FIRST), [Rx(fp), #offset] 1649 } 1650 } 1651 } 1652 if ((ctx->flags & IR_VARARG_FUNC) && (ctx->flags2 & IR_HAS_VA_START)) { 1653#ifndef __APPLE__ 1654 const int8_t *int_reg_params = _ir_int_reg_params; 1655 const int8_t *fp_reg_params = _ir_fp_reg_params; 1656 ir_reg fp; 1657 int offset; 1658 int i; 1659 1660 if (ctx->flags & IR_USE_FRAME_POINTER) { 1661 fp = IR_REG_FRAME_POINTER; 1662 1663 offset = ctx->locals_area_size + sizeof(void*) * 2; 1664 } else { 1665 fp = IR_REG_STACK_POINTER; 1666 offset = ctx->locals_area_size + ctx->call_stack_size; 1667 } 1668 1669 if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) { 1670 ir_reg prev = IR_REG_NONE; 1671 1672 /* skip named args */ 1673 offset += sizeof(void*) * ctx->gp_reg_params; 1674 for (i = ctx->gp_reg_params; i < IR_REG_INT_ARGS; i++) { 1675 if (prev != IR_REG_NONE) { 1676 if (aarch64_may_encode_imm7_addr_offset(offset, 8)) { 1677 | stp Rx(prev), Rx(int_reg_params[i]), [Rx(fp), #offset] 1678 } else { 1679 IR_ASSERT(aarch64_may_encode_addr_offset(offset, 8)); 1680 | str Rx(prev), [Rx(fp), #offset] 1681 | str Rx(int_reg_params[i]), [Rx(fp), #(offset+8)] 1682 } 1683 prev = IR_REG_NONE; 1684 offset += sizeof(void*) * 2; 1685 } else { 1686 prev = int_reg_params[i]; 1687 } 1688 } 1689 if (prev != IR_REG_NONE) { 1690 | str Rx(prev), [Rx(fp), #offset] 1691 offset += sizeof(void*); 1692 } 1693 } 1694 if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) { 1695 /* skip named args */ 1696 offset += 16 * ctx->fp_reg_params; 1697 for (i = ctx->fp_reg_params; i < IR_REG_FP_ARGS; i++) { 1698 // TODO: Rd->Rq stur->str ??? 1699 | str Rd(fp_reg_params[i]-IR_REG_FP_FIRST), [Rx(fp), #offset] 1700 offset += 16; 1701 } 1702 } 1703#endif 1704 } 1705} 1706 1707static void ir_emit_epilogue(ir_ctx *ctx) 1708{ 1709 ir_backend_data *data = ctx->data; 1710 dasm_State **Dst = &data->dasm_state; 1711 1712 if (ctx->used_preserved_regs) { 1713 int offset; 1714 uint32_t i; 1715 ir_reg prev = IR_REG_NONE; 1716 ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 1717 ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs; 1718 1719 if (ctx->flags & IR_USE_FRAME_POINTER) { 1720 offset = ctx->stack_frame_size + sizeof(void*) * 2; 1721 } else { 1722 offset = ctx->stack_frame_size + ctx->call_stack_size; 1723 } 1724 for (i = 0; i < IR_REG_NUM; i++) { 1725 if (IR_REGSET_IN(used_preserved_regs, i)) { 1726 if (prev == IR_REG_NONE) { 1727 prev = i; 1728 } else if (i < IR_REG_FP_FIRST) { 1729 offset -= sizeof(void*) * 2; 1730 if (aarch64_may_encode_imm7_addr_offset(offset, 8)) { 1731 | ldp Rx(prev), Rx(i), [Rx(fp), #offset] 1732 } else { 1733 IR_ASSERT(aarch64_may_encode_addr_offset(offset, 8)); 1734 | ldr Rx(prev), [Rx(fp), #offset] 1735 | ldr Rx(i), [Rx(fp), #(offset+8)] 1736 } 1737 prev = IR_REG_NONE; 1738 } else { 1739 if (prev < IR_REG_FP_FIRST) { 1740 offset -= sizeof(void*); 1741 | ldr Rx(prev), [Rx(fp), #offset] 1742 offset -= sizeof(void*); 1743 | ldr Rd(i-IR_REG_FP_FIRST), [Rx(fp), #offset] 1744 } else { 1745 offset -= sizeof(void*) * 2; 1746 if (aarch64_may_encode_imm7_addr_offset(offset, 8)) { 1747 | ldp Rd(prev-IR_REG_FP_FIRST), Rd(i-IR_REG_FP_FIRST), [Rx(fp), #offset] 1748 } else { 1749 IR_ASSERT(aarch64_may_encode_addr_offset(offset, 8)); 1750 | ldr Rd(prev-IR_REG_FP_FIRST), [Rx(fp), #offset] 1751 | ldr Rd(i-IR_REG_FP_FIRST), [Rx(fp), #(offset+8)] 1752 } 1753 } 1754 prev = IR_REG_NONE; 1755 } 1756 } 1757 } 1758 if (prev != IR_REG_NONE) { 1759 if (prev < IR_REG_FP_FIRST) { 1760 offset -= sizeof(void*); 1761 | ldr Rx(prev), [Rx(fp), #offset] 1762 } else { 1763 offset -= sizeof(void*); 1764 | ldr Rd(prev-IR_REG_FP_FIRST), [Rx(fp), #offset] 1765 } 1766 } 1767 } 1768 1769 if (ctx->flags & IR_USE_FRAME_POINTER) { 1770 if (ctx->call_stack_size || (ctx->flags2 & IR_HAS_ALLOCA)) { 1771 | mov sp, x29 1772 } 1773 if (aarch64_may_encode_imm7_addr_offset(ctx->stack_frame_size+16, 8)) { 1774 | ldp x29, x30, [sp], #(ctx->stack_frame_size+16) 1775 } else { 1776 | ldp x29, x30, [sp] 1777 | add sp, sp, #(ctx->stack_frame_size+16) 1778 } 1779 } else if (ctx->stack_frame_size + ctx->call_stack_size) { 1780 if (ctx->fixed_stack_red_zone) { 1781 IR_ASSERT(ctx->stack_frame_size + ctx->call_stack_size <= ctx->fixed_stack_red_zone); 1782 } else { 1783 | add sp, sp, #(ctx->stack_frame_size + ctx->call_stack_size) 1784 } 1785 } 1786} 1787 1788static void ir_emit_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 1789{ 1790 ir_backend_data *data = ctx->data; 1791 dasm_State **Dst = &data->dasm_state; 1792 ir_type type = insn->type; 1793 ir_ref op1 = insn->op1; 1794 ir_ref op2 = insn->op2; 1795 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 1796 ir_reg op1_reg = ctx->regs[def][1]; 1797 ir_reg op2_reg = ctx->regs[def][2]; 1798 ir_reg tmp_reg; 1799 1800 IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); 1801 1802 if (IR_REG_SPILLED(op1_reg)) { 1803 op1_reg = IR_REG_NUM(op1_reg); 1804 ir_emit_load(ctx, type, op1_reg, op1); 1805 } 1806 if (op2_reg != IR_REG_NONE) { 1807 if (IR_REG_SPILLED(op2_reg)) { 1808 op2_reg = IR_REG_NUM(op2_reg); 1809 if (op1 != op2) { 1810 ir_emit_load(ctx, type, op2_reg, op2); 1811 } 1812 } 1813 switch (insn->op) { 1814 default: 1815 IR_ASSERT(0 && "NIY binary op"); 1816 case IR_ADD: 1817 | ASM_REG_REG_REG_OP add, type, def_reg, op1_reg, op2_reg 1818 break; 1819 case IR_ADD_OV: 1820 | ASM_REG_REG_REG_OP adds, type, def_reg, op1_reg, op2_reg 1821 break; 1822 case IR_SUB: 1823 | ASM_REG_REG_REG_OP sub, type, def_reg, op1_reg, op2_reg 1824 break; 1825 case IR_SUB_OV: 1826 | ASM_REG_REG_REG_OP subs, type, def_reg, op1_reg, op2_reg 1827 break; 1828 case IR_MUL: 1829 | ASM_REG_REG_REG_OP mul, type, def_reg, op1_reg, op2_reg 1830 break; 1831 case IR_MUL_OV: 1832 if (ir_type_size[type] == 8) { 1833 if (IR_IS_TYPE_SIGNED(type)) { 1834 tmp_reg = ctx->regs[def][3]; 1835 IR_ASSERT(tmp_reg != IR_REG_NONE); 1836 | smulh Rx(tmp_reg), Rx(op1_reg), Rx(op2_reg) 1837 | mul Rx(def_reg), Rx(op1_reg), Rx(op2_reg) 1838 | cmp Rx(tmp_reg), Rx(def_reg), asr #63 1839 } else { 1840 tmp_reg = ctx->regs[def][3]; 1841 IR_ASSERT(tmp_reg != IR_REG_NONE); 1842 | umulh Rx(tmp_reg), Rx(op1_reg), Rx(op2_reg) 1843 | mul Rx(def_reg), Rx(op1_reg), Rx(op2_reg) 1844 | cmp Rx(tmp_reg), xzr 1845 } 1846 } else { 1847 if (IR_IS_TYPE_SIGNED(type)) { 1848 tmp_reg = ctx->regs[def][3]; 1849 IR_ASSERT(tmp_reg != IR_REG_NONE); 1850 | smull Rx(def_reg), Rw(op1_reg), Rw(op2_reg) 1851 | asr Rx(tmp_reg), Rx(def_reg), #32 1852 | cmp Rx(tmp_reg), Rx(def_reg), asr #31 1853 } else { 1854 | umull Rx(def_reg), Rw(op1_reg), Rw(op2_reg) 1855 | cmp xzr, Rx(def_reg), lsr #32 1856 } 1857 } 1858 break; 1859 case IR_DIV: 1860 if (IR_IS_TYPE_SIGNED(type)) { 1861 | ASM_REG_REG_REG_OP sdiv, type, def_reg, op1_reg, op2_reg 1862 } else { 1863 | ASM_REG_REG_REG_OP udiv, type, def_reg, op1_reg, op2_reg 1864 } 1865 break; 1866 case IR_MOD: 1867 tmp_reg = ctx->regs[def][3]; 1868 IR_ASSERT(tmp_reg != IR_REG_NONE); 1869 if (IR_IS_TYPE_SIGNED(type)) { 1870 | ASM_REG_REG_REG_OP sdiv, type, tmp_reg, op1_reg, op2_reg 1871 | ASM_REG_REG_REG_REG_OP msub, type, def_reg, tmp_reg, op2_reg, op1_reg 1872 } else { 1873 | ASM_REG_REG_REG_OP udiv, type, tmp_reg, op1_reg, op2_reg 1874 | ASM_REG_REG_REG_REG_OP msub, type, def_reg, tmp_reg, op2_reg, op1_reg 1875 } 1876 break; 1877 case IR_OR: 1878 | ASM_REG_REG_REG_OP orr, type, def_reg, op1_reg, op2_reg 1879 break; 1880 case IR_AND: 1881 | ASM_REG_REG_REG_OP and, type, def_reg, op1_reg, op2_reg 1882 break; 1883 case IR_XOR: 1884 | ASM_REG_REG_REG_OP eor, type, def_reg, op1_reg, op2_reg 1885 break; 1886 } 1887 } else { 1888 IR_ASSERT(IR_IS_CONST_REF(op2)); 1889 IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[op2].op)); 1890 int32_t val = ctx->ir_base[op2].val.i32; 1891 switch (insn->op) { 1892 default: 1893 IR_ASSERT(0 && "NIY binary op"); 1894 case IR_ADD: 1895 | ASM_REG_REG_IMM_OP add, type, def_reg, op1_reg, val 1896 break; 1897 case IR_ADD_OV: 1898 | ASM_REG_REG_IMM_OP adds, type, def_reg, op1_reg, val 1899 break; 1900 case IR_SUB: 1901 | ASM_REG_REG_IMM_OP sub, type, def_reg, op1_reg, val 1902 break; 1903 case IR_SUB_OV: 1904 | ASM_REG_REG_IMM_OP subs, type, def_reg, op1_reg, val 1905 break; 1906 case IR_OR: 1907 if (ir_type_size[type] == 8) { 1908 uint64_t val = ctx->ir_base[op2].val.u64; 1909 | ASM_REG_REG_IMM_OP orr, type, def_reg, op1_reg, val 1910 } else { 1911 | ASM_REG_REG_IMM_OP orr, type, def_reg, op1_reg, val 1912 } 1913 break; 1914 case IR_AND: 1915 if (ir_type_size[type] == 8) { 1916 uint64_t val = ctx->ir_base[op2].val.u64; 1917 | ASM_REG_REG_IMM_OP and, type, def_reg, op1_reg, val 1918 } else { 1919 | ASM_REG_REG_IMM_OP and, type, def_reg, op1_reg, val 1920 } 1921 break; 1922 case IR_XOR: 1923 if (ir_type_size[type] == 8) { 1924 uint64_t val = ctx->ir_base[op2].val.u64; 1925 | ASM_REG_REG_IMM_OP eor, type, def_reg, op1_reg, val 1926 } else { 1927 | ASM_REG_REG_IMM_OP eor, type, def_reg, op1_reg, val 1928 } 1929 break; 1930 } 1931 } 1932 if (IR_REG_SPILLED(ctx->regs[def][0])) { 1933 ir_emit_store(ctx, type, def, def_reg); 1934 } 1935} 1936 1937static void ir_emit_min_max_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 1938{ 1939 ir_backend_data *data = ctx->data; 1940 dasm_State **Dst = &data->dasm_state; 1941 ir_type type = insn->type; 1942 ir_ref op1 = insn->op1; 1943 ir_ref op2 = insn->op2; 1944 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 1945 ir_reg op1_reg = ctx->regs[def][1]; 1946 ir_reg op2_reg = ctx->regs[def][2]; 1947 1948 IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); 1949 1950 if (IR_REG_SPILLED(op1_reg)) { 1951 op1_reg = IR_REG_NUM(op1_reg); 1952 ir_emit_load(ctx, type, op1_reg, op1); 1953 } 1954 if (IR_REG_SPILLED(op2_reg)) { 1955 op2_reg = IR_REG_NUM(op2_reg); 1956 ir_emit_load(ctx, type, op2_reg, op2); 1957 } 1958 1959 if (op1 == op2) { 1960 return; 1961 } 1962 1963 if (ir_type_size[type] == 8) { 1964 | cmp Rx(op1_reg), Rx(op2_reg) 1965 if (insn->op == IR_MIN) { 1966 if (IR_IS_TYPE_SIGNED(type)) { 1967 | csel Rx(def_reg), Rx(op1_reg), Rx(op2_reg), le 1968 } else { 1969 | csel Rx(def_reg), Rx(op1_reg), Rx(op2_reg), ls 1970 } 1971 } else { 1972 IR_ASSERT(insn->op == IR_MAX); 1973 if (IR_IS_TYPE_SIGNED(type)) { 1974 | csel Rx(def_reg), Rx(op1_reg), Rx(op2_reg), ge 1975 } else { 1976 | csel Rx(def_reg), Rx(op1_reg), Rx(op2_reg), hs 1977 } 1978 } 1979 } else { 1980 | cmp Rw(op1_reg), Rw(op2_reg) 1981 if (insn->op == IR_MIN) { 1982 if (IR_IS_TYPE_SIGNED(type)) { 1983 | csel Rw(def_reg), Rw(op1_reg), Rw(op2_reg), le 1984 } else { 1985 | csel Rw(def_reg), Rw(op1_reg), Rw(op2_reg), ls 1986 } 1987 } else { 1988 IR_ASSERT(insn->op == IR_MAX); 1989 if (IR_IS_TYPE_SIGNED(type)) { 1990 | csel Rw(def_reg), Rw(op1_reg), Rw(op2_reg), ge 1991 } else { 1992 | csel Rw(def_reg), Rw(op1_reg), Rw(op2_reg), hs 1993 } 1994 } 1995 } 1996 1997 if (IR_REG_SPILLED(ctx->regs[def][0])) { 1998 ir_emit_store(ctx, type, def, def_reg); 1999 } 2000} 2001 2002static void ir_emit_overflow(ir_ctx *ctx, ir_ref def, ir_insn *insn) 2003{ 2004 ir_backend_data *data = ctx->data; 2005 dasm_State **Dst = &data->dasm_state; 2006 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 2007 ir_insn *math_insn = &ctx->ir_base[insn->op1]; 2008 ir_type type = math_insn->type; 2009 2010 IR_ASSERT(def_reg != IR_REG_NONE); 2011 IR_ASSERT(IR_IS_TYPE_INT(type)); 2012 if (math_insn->op == IR_MUL_OV) { 2013 | cset Rw(def_reg), ne 2014 } else if (IR_IS_TYPE_SIGNED(type)) { 2015 | cset Rw(def_reg), vs 2016 } else { 2017 | cset Rw(def_reg), cs 2018 } 2019 if (IR_REG_SPILLED(ctx->regs[def][0])) { 2020 ir_emit_store(ctx, insn->type, def, def_reg); 2021 } 2022} 2023 2024static void ir_emit_overflow_and_branch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block) 2025{ 2026 ir_backend_data *data = ctx->data; 2027 dasm_State **Dst = &data->dasm_state; 2028 ir_insn *overflow_insn = &ctx->ir_base[insn->op2]; 2029 ir_insn *math_insn = &ctx->ir_base[overflow_insn->op1]; 2030 ir_type type = math_insn->type; 2031 uint32_t true_block, false_block; 2032 bool reverse = 0; 2033 2034 ir_get_true_false_blocks(ctx, b, &true_block, &false_block); 2035 if (true_block == next_block) { 2036 reverse = 1; 2037 true_block = false_block; 2038 false_block = 0; 2039 } else if (false_block == next_block) { 2040 false_block = 0; 2041 } 2042 2043 if (math_insn->op == IR_MUL_OV) { 2044 if (reverse) { 2045 | beq =>true_block 2046 } else { 2047 | bne =>true_block 2048 } 2049 } else if (IR_IS_TYPE_SIGNED(type)) { 2050 if (reverse) { 2051 | bvc =>true_block 2052 } else { 2053 | bvs =>true_block 2054 } 2055 } else { 2056 if (reverse) { 2057 | bcc =>true_block 2058 } else { 2059 | bcs =>true_block 2060 } 2061 } 2062 if (false_block) { 2063 | b =>false_block 2064 } 2065} 2066 2067static void ir_emit_reg_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 2068{ 2069 ir_backend_data *data = ctx->data; 2070 dasm_State **Dst = &data->dasm_state; 2071 ir_insn *op_insn = &ctx->ir_base[insn->op2]; 2072 ir_type type = op_insn->type; 2073 ir_ref op2 = op_insn->op2; 2074 ir_reg op2_reg = ctx->regs[insn->op2][2]; 2075 ir_reg reg; 2076 2077 IR_ASSERT(insn->op == IR_RSTORE); 2078 reg = insn->op3; 2079 2080 if (op2_reg == IR_REG_NONE) { 2081 ir_val *val = &ctx->ir_base[op2].val; 2082 2083 IR_ASSERT(IR_IS_CONST_REF(op2)); 2084 IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[op2].op)); 2085 switch (op_insn->op) { 2086 default: 2087 IR_ASSERT(0 && "NIY binary op"); 2088 case IR_ADD: 2089 | ASM_REG_REG_IMM_OP add, type, reg, reg, val->i32 2090 break; 2091 case IR_SUB: 2092 | ASM_REG_REG_IMM_OP sub, type, reg, reg, val->i32 2093 break; 2094 case IR_OR: 2095 | ASM_REG_REG_IMM_OP orr, type, reg, reg, val->i32 2096 break; 2097 case IR_AND: 2098 | ASM_REG_REG_IMM_OP and, type, reg, reg, val->i32 2099 break; 2100 case IR_XOR: 2101 | ASM_REG_REG_IMM_OP eor, type, reg, reg, val->i32 2102 break; 2103 } 2104 } else { 2105 if (IR_REG_SPILLED(op2_reg)) { 2106 op2_reg = IR_REG_NUM(op2_reg); 2107 ir_emit_load(ctx, type, op2_reg, op2); 2108 } 2109 switch (op_insn->op) { 2110 default: 2111 IR_ASSERT(0 && "NIY binary op"); 2112 case IR_ADD: 2113 | ASM_REG_REG_REG_OP add, type, reg, reg, op2_reg 2114 break; 2115 case IR_SUB: 2116 | ASM_REG_REG_REG_OP sub, type, reg, reg, op2_reg 2117 break; 2118 case IR_MUL: 2119 | ASM_REG_REG_REG_OP mul, type, reg, reg, op2_reg 2120 break; 2121 case IR_OR: 2122 | ASM_REG_REG_REG_OP orr, type, reg, reg, op2_reg 2123 break; 2124 case IR_AND: 2125 | ASM_REG_REG_REG_OP and, type, reg, reg, op2_reg 2126 break; 2127 case IR_XOR: 2128 | ASM_REG_REG_REG_OP eor, type, reg, reg, op2_reg 2129 break; 2130 } 2131 } 2132} 2133 2134static void ir_emit_mul_div_mod_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn) 2135{ 2136 ir_backend_data *data = ctx->data; 2137 dasm_State **Dst = &data->dasm_state; 2138 ir_type type = insn->type; 2139 ir_ref op1 = insn->op1; 2140 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 2141 ir_reg op1_reg = ctx->regs[def][1]; 2142 2143 IR_ASSERT(IR_IS_CONST_REF(insn->op2)); 2144 IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[insn->op2].op)); 2145 IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); 2146 2147 if (IR_REG_SPILLED(op1_reg)) { 2148 op1_reg = IR_REG_NUM(op1_reg); 2149 ir_emit_load(ctx, type, op1_reg, op1); 2150 } 2151 if (insn->op == IR_MUL) { 2152 uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64); 2153 if (shift == 1) { 2154 | ASM_REG_REG_REG_OP add, type, def_reg, op1_reg, op1_reg 2155 } else { 2156 | ASM_REG_REG_IMM_OP lsl, type, def_reg, op1_reg, shift 2157 } 2158 } else if (insn->op == IR_DIV) { 2159 uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64); 2160 | ASM_REG_REG_IMM_OP lsr, type, def_reg, op1_reg, shift 2161 } else { 2162 IR_ASSERT(insn->op == IR_MOD); 2163 uint64_t mask = ctx->ir_base[insn->op2].val.u64 - 1; 2164 | ASM_REG_REG_IMM_OP and, type, def_reg, op1_reg, mask 2165 } 2166 if (IR_REG_SPILLED(ctx->regs[def][0])) { 2167 ir_emit_store(ctx, type, def, def_reg); 2168 } 2169} 2170 2171static void ir_emit_sdiv_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn) 2172{ 2173 ir_backend_data *data = ctx->data; 2174 dasm_State **Dst = &data->dasm_state; 2175 ir_type type = insn->type; 2176 ir_ref op1 = insn->op1; 2177 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 2178 ir_reg op1_reg = ctx->regs[def][1]; 2179 ir_reg op2_reg = ctx->regs[def][2]; 2180 uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64); 2181 int64_t offset = ctx->ir_base[insn->op2].val.u64 - 1; 2182 2183 IR_ASSERT(shift != 0); 2184 IR_ASSERT(IR_IS_CONST_REF(insn->op2)); 2185 IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[insn->op2].op)); 2186 IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE && def_reg != op1_reg); 2187 2188 if (IR_REG_SPILLED(op1_reg)) { 2189 op1_reg = IR_REG_NUM(op1_reg); 2190 ir_emit_load(ctx, type, op1_reg, op1); 2191 } 2192 2193 if (op2_reg != IR_REG_NONE) { 2194 op2_reg = IR_REG_NUM(op2_reg); 2195 ir_emit_load_imm_int(ctx, type, op2_reg, offset); 2196 } 2197 2198 if (ir_type_size[type] == 8) { 2199 | cmp Rx(op1_reg), #0 2200 if (op2_reg != IR_REG_NONE) { 2201 | add Rx(def_reg), Rx(op1_reg), Rx(op2_reg) 2202 } else { 2203 | add Rx(def_reg), Rx(op1_reg), #offset 2204 } 2205 | csel Rx(def_reg), Rx(def_reg), Rx(op1_reg), lt 2206 | asr Rx(def_reg), Rx(def_reg), #shift 2207 } else { 2208 | cmp Rw(op1_reg), #0 2209 if (op2_reg != IR_REG_NONE) { 2210 | add Rw(def_reg), Rw(op1_reg), Rw(op2_reg) 2211 } else { 2212 | add Rw(def_reg), Rw(op1_reg), #offset 2213 } 2214 | csel Rw(def_reg), Rw(def_reg), Rw(op1_reg), lt 2215 if (ir_type_size[type] == 4) { 2216 | asr Rw(def_reg), Rw(def_reg), #shift 2217 } else if (ir_type_size[type] == 2) { 2218 | ubfx Rw(def_reg), Rw(def_reg), #shift, #16 2219 } else { 2220 IR_ASSERT(ir_type_size[type] == 1); 2221 | ubfx Rw(def_reg), Rw(def_reg), #shift, #8 2222 } 2223 } 2224 if (IR_REG_SPILLED(ctx->regs[def][0])) { 2225 ir_emit_store(ctx, type, def, def_reg); 2226 } 2227} 2228 2229static void ir_emit_smod_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn) 2230{ 2231 ir_backend_data *data = ctx->data; 2232 dasm_State **Dst = &data->dasm_state; 2233 ir_type type = insn->type; 2234 ir_ref op1 = insn->op1; 2235 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 2236 ir_reg op1_reg = ctx->regs[def][1]; 2237 ir_reg tmp_reg = ctx->regs[def][3]; 2238// uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64); 2239 uint64_t mask = ctx->ir_base[insn->op2].val.u64 - 1; 2240 2241 IR_ASSERT(mask != 0); 2242 IR_ASSERT(IR_IS_CONST_REF(insn->op2)); 2243 IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[insn->op2].op)); 2244 IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE && def_reg != tmp_reg); 2245 2246 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 2247 op1_reg = IR_REG_NUM(op1_reg); 2248 ir_emit_load(ctx, type, op1_reg, op1); 2249 } 2250 if (def_reg != op1_reg) { 2251 if (op1_reg != IR_REG_NONE) { 2252 ir_emit_mov(ctx, type, def_reg, op1_reg); 2253 } else { 2254 ir_emit_load(ctx, type, def_reg, op1); 2255 } 2256 } 2257 2258// | ASM_REG_REG_IMM_OP asr, type, tmp_reg, def_reg, (ir_type_size[type]*8-1) 2259// | ASM_REG_REG_IMM_OP lsr, type, tmp_reg, tmp_reg, (ir_type_size[type]*8-shift) 2260// | ASM_REG_REG_REG_OP add, type, def_reg, def_reg, tmp_reg 2261// | ASM_REG_REG_IMM_OP and, type, def_reg, def_reg, mask 2262// | ASM_REG_REG_REG_OP sub, type, def_reg, def_reg, tmp_reg 2263 2264 | ASM_REG_REG_OP negs, type, tmp_reg, def_reg 2265 | ASM_REG_REG_IMM_OP and, type, def_reg, def_reg, mask 2266 | ASM_REG_REG_IMM_OP and, type, tmp_reg, tmp_reg, mask 2267 | ASM_REG_REG_REG_TXT_OP csneg, type, def_reg, def_reg, tmp_reg, mi 2268 2269 if (IR_REG_SPILLED(ctx->regs[def][0])) { 2270 ir_emit_store(ctx, type, def, def_reg); 2271 } 2272} 2273 2274static void ir_emit_shift(ir_ctx *ctx, ir_ref def, ir_insn *insn) 2275{ 2276 ir_backend_data *data = ctx->data; 2277 dasm_State **Dst = &data->dasm_state; 2278 ir_type type = insn->type; 2279 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 2280 ir_reg op1_reg = ctx->regs[def][1]; 2281 ir_reg op2_reg = ctx->regs[def][2]; 2282 ir_reg tmp_reg; 2283 2284 IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); 2285 if (IR_REG_SPILLED(op1_reg)) { 2286 op1_reg = IR_REG_NUM(op1_reg); 2287 ir_emit_load(ctx, type, op1_reg, insn->op1); 2288 } 2289 if (IR_REG_SPILLED(op2_reg)) { 2290 op2_reg = IR_REG_NUM(op2_reg); 2291 ir_emit_load(ctx, type, op2_reg, insn->op2); 2292 } 2293 switch (insn->op) { 2294 default: 2295 IR_ASSERT(0); 2296 case IR_SHL: 2297 if (ir_type_size[type] == 1) { 2298 | and Rw(def_reg), Rw(op1_reg), #0xff 2299 | lsl Rw(def_reg), Rw(def_reg), Rw(op2_reg) 2300 } else if (ir_type_size[type] == 2) { 2301 | and Rw(def_reg), Rw(op1_reg), #0xffff 2302 | lsl Rw(def_reg), Rw(def_reg), Rw(op2_reg) 2303 } else { 2304 | ASM_REG_REG_REG_OP lsl, type, def_reg, op1_reg, op2_reg 2305 } 2306 break; 2307 case IR_SHR: 2308 if (ir_type_size[type] == 1) { 2309 | and Rw(def_reg), Rw(op1_reg), #0xff 2310 | lsr Rw(def_reg), Rw(def_reg), Rw(op2_reg) 2311 } else if (ir_type_size[type] == 2) { 2312 | and Rw(def_reg), Rw(op1_reg), #0xffff 2313 | lsr Rw(def_reg), Rw(def_reg), Rw(op2_reg) 2314 } else { 2315 | ASM_REG_REG_REG_OP lsr, type, def_reg, op1_reg, op2_reg 2316 } 2317 break; 2318 case IR_SAR: 2319 if (ir_type_size[type] == 1) { 2320 | sxtb Rw(def_reg), Rw(op1_reg) 2321 | asr Rw(def_reg), Rw(def_reg), Rw(op2_reg) 2322 } else if (ir_type_size[type] == 2) { 2323 | sxth Rw(def_reg), Rw(op1_reg) 2324 | asr Rw(def_reg), Rw(def_reg), Rw(op2_reg) 2325 } else { 2326 | ASM_REG_REG_REG_OP asr, type, def_reg, op1_reg, op2_reg 2327 } 2328 break; 2329 case IR_ROL: 2330 tmp_reg = ctx->regs[def][3]; 2331 IR_ASSERT(tmp_reg != IR_REG_NONE); 2332 if (ir_type_size[type] == 1) { 2333 | and Rw(def_reg), Rw(op1_reg), #0xff 2334 | add Rw(def_reg), Rw(def_reg), Rw(def_reg), lsl #8 2335 | add Rw(def_reg), Rw(def_reg), Rw(def_reg), lsl #16 2336 | neg Rw(tmp_reg), Rw(op2_reg) 2337 | ror Rw(def_reg), Rw(def_reg), Rw(tmp_reg) 2338 | and Rw(def_reg), Rw(def_reg), #0xff 2339 } else if (ir_type_size[type] == 2) { 2340 | and Rw(def_reg), Rw(op1_reg), #0xffff 2341 | add Rw(def_reg), Rw(def_reg), Rw(def_reg), lsl #16 2342 | neg Rw(tmp_reg), Rw(op2_reg) 2343 | ror Rw(def_reg), Rw(def_reg), Rw(tmp_reg) 2344 | and Rw(def_reg), Rw(def_reg), #0xffff 2345 } else if (ir_type_size[type] == 8) { 2346 | neg Rx(tmp_reg), Rx(op2_reg) 2347 | ror Rx(def_reg), Rx(op1_reg), Rx(tmp_reg) 2348 } else { 2349 | neg Rw(tmp_reg), Rw(op2_reg) 2350 | ror Rw(def_reg), Rw(op1_reg), Rw(tmp_reg) 2351 } 2352 break; 2353 case IR_ROR: 2354 if (ir_type_size[type] == 1) { 2355 tmp_reg = ctx->regs[def][3]; 2356 IR_ASSERT(tmp_reg != IR_REG_NONE); 2357 | and Rw(tmp_reg), Rw(op1_reg), #0xff 2358 | add Rw(tmp_reg), Rw(tmp_reg), Rw(tmp_reg), lsl #8 2359 | add Rw(tmp_reg), Rw(tmp_reg), Rw(tmp_reg), lsl #16 2360 | ror Rw(def_reg), Rw(tmp_reg), Rw(op2_reg) 2361 | and Rw(def_reg), Rw(def_reg), #0xff 2362 } else if (ir_type_size[type] == 2) { 2363 tmp_reg = ctx->regs[def][3]; 2364 IR_ASSERT(tmp_reg != IR_REG_NONE); 2365 | and Rw(tmp_reg), Rw(op1_reg), #0xffff 2366 | add Rw(tmp_reg), Rw(tmp_reg), Rw(tmp_reg), lsl #16 2367 | ror Rw(def_reg), Rw(tmp_reg), Rw(op2_reg) 2368 | and Rw(def_reg), Rw(def_reg), #0xffff 2369 } else { 2370 | ASM_REG_REG_REG_OP ror, type, def_reg, op1_reg, op2_reg 2371 } 2372 break; 2373 } 2374 if (IR_REG_SPILLED(ctx->regs[def][0])) { 2375 ir_emit_store(ctx, type, def, def_reg); 2376 } 2377} 2378 2379static void ir_emit_shift_const(ir_ctx *ctx, ir_ref def, ir_insn *insn) 2380{ 2381 ir_backend_data *data = ctx->data; 2382 dasm_State **Dst = &data->dasm_state; 2383 uint32_t shift = ctx->ir_base[insn->op2].val.u64; 2384 ir_type type = insn->type; 2385 ir_ref op1 = insn->op1; 2386 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 2387 ir_reg op1_reg = ctx->regs[def][1]; 2388 ir_reg tmp_reg; 2389 2390 IR_ASSERT(IR_IS_CONST_REF(insn->op2)); 2391 IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[insn->op2].op)); 2392 IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); 2393 2394 if (IR_REG_SPILLED(op1_reg)) { 2395 op1_reg = IR_REG_NUM(op1_reg); 2396 ir_emit_load(ctx, type, op1_reg, op1); 2397 } 2398 switch (insn->op) { 2399 default: 2400 IR_ASSERT(0); 2401 case IR_SHL: 2402 if (ir_type_size[type] == 1) { 2403 | ubfiz Rw(def_reg), Rw(op1_reg), #shift, #(8-shift) 2404 } else if (ir_type_size[type] == 2) { 2405 | ubfiz Rw(def_reg), Rw(op1_reg), #shift, #(16-shift) 2406 } else { 2407 | ASM_REG_REG_IMM_OP lsl, type, def_reg, op1_reg, shift 2408 } 2409 break; 2410 case IR_SHR: 2411 if (ir_type_size[type] == 1) { 2412 | ubfx Rw(def_reg), Rw(op1_reg), #shift, #(8-shift) 2413 } else if (ir_type_size[type] == 2) { 2414 | ubfx Rw(def_reg), Rw(op1_reg), #shift, #(16-shift) 2415 } else { 2416 | ASM_REG_REG_IMM_OP lsr, type, def_reg, op1_reg, shift 2417 } 2418 break; 2419 case IR_SAR: 2420 if (ir_type_size[type] == 1) { 2421 | sbfx Rw(def_reg), Rw(op1_reg), #shift, #(8-shift) 2422 } else if (ir_type_size[type] == 2) { 2423 | sbfx Rw(def_reg), Rw(op1_reg), #shift, #(16-shift) 2424 } else { 2425 | ASM_REG_REG_IMM_OP asr, type, def_reg, op1_reg, shift 2426 } 2427 break; 2428 case IR_ROL: 2429 if (ir_type_size[type] == 1) { 2430 tmp_reg = ctx->regs[def][3]; 2431 | ubfx Rw(tmp_reg), Rw(op1_reg), #(8-shift), #shift 2432 | orr Rw(def_reg), Rw(tmp_reg), Rw(op1_reg), lsl #shift 2433 } else if (ir_type_size[type] == 2) { 2434 tmp_reg = ctx->regs[def][3]; 2435 | ubfx Rw(tmp_reg), Rw(op1_reg), #(16-shift), #shift 2436 | orr Rw(def_reg), Rw(tmp_reg), Rw(op1_reg), lsl #shift 2437 } else if (ir_type_size[type] == 8) { 2438 shift = (64 - shift) % 64; 2439 | ror Rx(def_reg), Rx(op1_reg), #shift 2440 } else { 2441 shift = (32 - shift) % 32; 2442 | ror Rw(def_reg), Rw(op1_reg), #shift 2443 } 2444 break; 2445 case IR_ROR: 2446 if (ir_type_size[type] == 1) { 2447 tmp_reg = ctx->regs[def][3]; 2448 | ubfx Rw(tmp_reg), Rw(op1_reg), #shift, #(8-shift) 2449 | orr Rw(def_reg), Rw(tmp_reg), Rw(op1_reg), lsl #(8-shift) 2450 } else if (ir_type_size[type] == 2) { 2451 tmp_reg = ctx->regs[def][3]; 2452 | ubfx Rw(tmp_reg), Rw(op1_reg), #shift, #(16-shift) 2453 | orr Rw(def_reg), Rw(tmp_reg), Rw(op1_reg), lsl #(16-shift) 2454 } else { 2455 | ASM_REG_REG_IMM_OP ror, type, def_reg, op1_reg, shift 2456 } 2457 break; 2458 } 2459 if (IR_REG_SPILLED(ctx->regs[def][0])) { 2460 ir_emit_store(ctx, type, def, def_reg); 2461 } 2462} 2463 2464static void ir_emit_op_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 2465{ 2466 ir_backend_data *data = ctx->data; 2467 dasm_State **Dst = &data->dasm_state; 2468 ir_type type = insn->type; 2469 ir_ref op1 = insn->op1; 2470 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 2471 ir_reg op1_reg = ctx->regs[def][1]; 2472 2473 IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); 2474 2475 if (IR_REG_SPILLED(op1_reg)) { 2476 op1_reg = IR_REG_NUM(op1_reg); 2477 ir_emit_load(ctx, type, op1_reg, op1); 2478 } 2479 if (insn->op == IR_NOT) { 2480 if (insn->type == IR_BOOL) { 2481 | ASM_REG_IMM_OP cmp, type, op1_reg, 0 2482 | cset Rw(def_reg), eq 2483 } else { 2484 | ASM_REG_REG_OP mvn, insn->type, def_reg, op1_reg 2485 } 2486 } else if (insn->op == IR_NEG) { 2487 | ASM_REG_REG_OP neg, insn->type, def_reg, op1_reg 2488 } else if (insn->op == IR_ABS) { 2489 if (ir_type_size[type] == 8) { 2490 | cmp Rx(op1_reg), #0 2491 | cneg Rx(def_reg), Rx(op1_reg), lt 2492 } else { 2493 | cmp Rw(op1_reg), #0 2494 | cneg Rw(def_reg), Rw(op1_reg), lt 2495 } 2496 } else if (insn->op == IR_CTLZ) { 2497 if (ir_type_size[type] == 1) { 2498 | and Rw(def_reg), Rw(op1_reg), #0xff 2499 | clz Rw(def_reg), Rw(def_reg) 2500 | sub Rw(def_reg), Rw(def_reg), #24 2501 } else if (ir_type_size[type] == 2) { 2502 | and Rw(def_reg), Rw(op1_reg), #0xffff 2503 | clz Rw(def_reg), Rw(def_reg) 2504 | sub Rw(def_reg), Rw(def_reg), #16 2505 } else { 2506 | ASM_REG_REG_OP clz, type, def_reg, op1_reg 2507 } 2508 } else if (insn->op == IR_CTTZ) { 2509 | ASM_REG_REG_OP rbit, insn->type, def_reg, op1_reg 2510 | ASM_REG_REG_OP clz, insn->type, def_reg, def_reg 2511 } else { 2512 IR_ASSERT(insn->op == IR_BSWAP); 2513 | ASM_REG_REG_OP rev, insn->type, def_reg, op1_reg 2514 } 2515 if (IR_REG_SPILLED(ctx->regs[def][0])) { 2516 ir_emit_store(ctx, type, def, def_reg); 2517 } 2518} 2519 2520static void ir_emit_ctpop(ir_ctx *ctx, ir_ref def, ir_insn *insn) 2521{ 2522 ir_backend_data *data = ctx->data; 2523 dasm_State **Dst = &data->dasm_state; 2524 ir_type type = insn->type; 2525 ir_ref op1 = insn->op1; 2526 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 2527 ir_reg op1_reg = ctx->regs[def][1]; 2528 ir_reg tmp_reg = ctx->regs[def][2]; 2529 uint32_t code1 = 0x0e205800 | (tmp_reg-IR_REG_FP_FIRST); // cnt v0.8b, v0.8b 2530 uint32_t code2 = 0x0e31b800 | (tmp_reg-IR_REG_FP_FIRST); // addv b0, v0.8b 2531 2532 IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); 2533 2534 if (IR_REG_SPILLED(op1_reg)) { 2535 op1_reg = IR_REG_NUM(op1_reg); 2536 ir_emit_load(ctx, type, op1_reg, op1); 2537 } 2538 switch (ir_type_size[insn->type]) { 2539 default: 2540 IR_ASSERT(0); 2541 case 1: 2542 | and Rw(def_reg), Rw(op1_reg), #0xff 2543 | fmov Rs(tmp_reg-IR_REG_FP_FIRST), Rw(def_reg) 2544 | .long code1 // cnt v0.8b, v0.8b 2545 | .long code2 // addv b0, v0.8b 2546 | fmov Rw(def_reg), Rs(tmp_reg-IR_REG_FP_FIRST) 2547 break; 2548 case 2: 2549 | and Rw(def_reg), Rw(op1_reg), #0xffff 2550 | fmov Rs(tmp_reg-IR_REG_FP_FIRST), Rw(def_reg) 2551 | .long code1 // cnt v0.8b, v0.8b 2552 | .long code2 // addv b0, v0.8b 2553 | fmov Rw(def_reg), Rs(tmp_reg-IR_REG_FP_FIRST) 2554 break; 2555 case 4: 2556 | fmov Rs(tmp_reg-IR_REG_FP_FIRST), Rw(op1_reg) 2557 | .long code1 // cnt v0.8b, v0.8b 2558 | .long code2 // addv b0, v0.8b 2559 | fmov Rw(def_reg), Rs(tmp_reg-IR_REG_FP_FIRST) 2560 break; 2561 case 8: 2562 | fmov Rd(tmp_reg-IR_REG_FP_FIRST), Rx(op1_reg) 2563 | .long code1 // cnt v0.8b, v0.8b 2564 | .long code2 // addv b0, v0.8b 2565 | fmov Rx(def_reg), Rd(tmp_reg-IR_REG_FP_FIRST) 2566 break; 2567 } 2568 if (IR_REG_SPILLED(ctx->regs[def][0])) { 2569 ir_emit_store(ctx, type, def, def_reg); 2570 } 2571} 2572 2573static void ir_emit_op_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) 2574{ 2575 ir_backend_data *data = ctx->data; 2576 dasm_State **Dst = &data->dasm_state; 2577 ir_type type = insn->type; 2578 ir_ref op1 = insn->op1; 2579 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 2580 ir_reg op1_reg = ctx->regs[def][1]; 2581 2582 IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); 2583 2584 if (IR_REG_SPILLED(op1_reg)) { 2585 op1_reg = IR_REG_NUM(op1_reg); 2586 ir_emit_load(ctx, type, op1_reg, op1); 2587 } 2588 if (insn->op == IR_NEG) { 2589 if (type == IR_DOUBLE) { 2590 | fneg Rd(def_reg-IR_REG_FP_FIRST), Rd(op1_reg-IR_REG_FP_FIRST) 2591 } else { 2592 IR_ASSERT(type == IR_FLOAT); 2593 | fneg Rs(def_reg-IR_REG_FP_FIRST), Rs(op1_reg-IR_REG_FP_FIRST) 2594 } 2595 } else { 2596 IR_ASSERT(insn->op == IR_ABS); 2597 if (type == IR_DOUBLE) { 2598 | fabs Rd(def_reg-IR_REG_FP_FIRST), Rd(op1_reg-IR_REG_FP_FIRST) 2599 } else { 2600 IR_ASSERT(type == IR_FLOAT); 2601 | fabs Rs(def_reg-IR_REG_FP_FIRST), Rs(op1_reg-IR_REG_FP_FIRST) 2602 } 2603 } 2604 if (IR_REG_SPILLED(ctx->regs[def][0])) { 2605 ir_emit_store(ctx, insn->type, def, def_reg); 2606 } 2607} 2608 2609static void ir_emit_binop_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) 2610{ 2611 ir_backend_data *data = ctx->data; 2612 dasm_State **Dst = &data->dasm_state; 2613 ir_type type = insn->type; 2614 ir_ref op1 = insn->op1; 2615 ir_ref op2 = insn->op2; 2616 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 2617 ir_reg op1_reg = ctx->regs[def][1]; 2618 ir_reg op2_reg = ctx->regs[def][2]; 2619 2620 IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); 2621 if (IR_REG_SPILLED(op1_reg)) { 2622 op1_reg = IR_REG_NUM(op1_reg); 2623 ir_emit_load(ctx, type, op1_reg, op1); 2624 } 2625 if (IR_REG_SPILLED(op2_reg)) { 2626 op2_reg = IR_REG_NUM(op2_reg); 2627 if (op1 != op2) { 2628 ir_emit_load(ctx, type, op2_reg, op2); 2629 } 2630 } 2631 switch (insn->op) { 2632 default: 2633 IR_ASSERT(0 && "NIY binary op"); 2634 case IR_ADD: 2635 | ASM_FP_REG_REG_REG_OP fadd, type, def_reg, op1_reg, op2_reg 2636 break; 2637 case IR_SUB: 2638 | ASM_FP_REG_REG_REG_OP fsub, type, def_reg, op1_reg, op2_reg 2639 break; 2640 case IR_MUL: 2641 | ASM_FP_REG_REG_REG_OP fmul, type, def_reg, op1_reg, op2_reg 2642 break; 2643 case IR_DIV: 2644 | ASM_FP_REG_REG_REG_OP fdiv, type, def_reg, op1_reg, op2_reg 2645 break; 2646 case IR_MIN: 2647 | ASM_FP_REG_REG_REG_OP fmin, type, def_reg, op1_reg, op2_reg 2648 break; 2649 case IR_MAX: 2650 | ASM_FP_REG_REG_REG_OP fmax, type, def_reg, op1_reg, op2_reg 2651 break; 2652 } 2653 if (IR_REG_SPILLED(ctx->regs[def][0])) { 2654 ir_emit_store(ctx, insn->type, def, def_reg); 2655 } 2656} 2657 2658static void ir_emit_fix_type(ir_ctx *ctx, ir_type type, ir_reg op1_reg) 2659{ 2660 ir_backend_data *data = ctx->data; 2661 dasm_State **Dst = &data->dasm_state; 2662 2663 // TODO: prevent repeatable sign/zero extension ??? 2664 if (ir_type_size[type] == 2) { 2665 if (IR_IS_TYPE_SIGNED(type)) { 2666 | sxth Rw(op1_reg), Rw(op1_reg) 2667 } else { 2668 | uxth Rw(op1_reg), Rw(op1_reg) 2669 } 2670 } else if (ir_type_size[type] == 1) { 2671 if (IR_IS_TYPE_SIGNED(type)) { 2672 | sxtb Rw(op1_reg), Rw(op1_reg) 2673 } else { 2674 | uxtb Rw(op1_reg), Rw(op1_reg) 2675 } 2676 } 2677} 2678 2679static void ir_emit_cmp_int_common(ir_ctx *ctx, ir_type type, ir_reg op1_reg, ir_ref op1, ir_reg op2_reg, ir_ref op2) 2680{ 2681 ir_backend_data *data = ctx->data; 2682 dasm_State **Dst = &data->dasm_state; 2683 2684 IR_ASSERT(op1_reg != IR_REG_NONE); 2685 if (ir_type_size[type] < 4) { 2686 ir_emit_fix_type(ctx, type, op1_reg); 2687 } 2688 if (op2_reg != IR_REG_NONE) { 2689 if (ir_type_size[type] == 8) { 2690 | cmp Rx(op1_reg), Rx(op2_reg) 2691 } else if (ir_type_size[type] == 4) { 2692 | cmp Rw(op1_reg), Rw(op2_reg) 2693 } else if (ir_type_size[type] == 2) { 2694 if (IR_IS_TYPE_SIGNED(type)) { 2695 | cmp Rw(op1_reg), Rw(op2_reg), sxth 2696 } else { 2697 | cmp Rw(op1_reg), Rw(op2_reg), uxth 2698 } 2699 } else if (ir_type_size[type] == 1) { 2700 if (IR_IS_TYPE_SIGNED(type)) { 2701 | cmp Rw(op1_reg), Rw(op2_reg), sxtb 2702 } else { 2703 | cmp Rw(op1_reg), Rw(op2_reg), uxtb 2704 } 2705 } else { 2706 IR_ASSERT(0); 2707 } 2708 } else { 2709 IR_ASSERT(IR_IS_CONST_REF(op2)); 2710 IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[op2].op)); 2711 int32_t val = ctx->ir_base[op2].val.i32; 2712 2713 if (ir_type_size[type] == 8) { 2714 | cmp Rx(op1_reg), #val 2715 } else { 2716 | cmp Rw(op1_reg), #val 2717 } 2718 } 2719} 2720 2721static void ir_emit_cmp_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 2722{ 2723 ir_backend_data *data = ctx->data; 2724 dasm_State **Dst = &data->dasm_state; 2725 ir_type type = ctx->ir_base[insn->op1].type; 2726 ir_op op = insn->op; 2727 ir_ref op1 = insn->op1; 2728 ir_ref op2 = insn->op2; 2729 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 2730 ir_reg op1_reg = ctx->regs[def][1]; 2731 ir_reg op2_reg = ctx->regs[def][2]; 2732 2733 IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); 2734 if (IR_REG_SPILLED(op1_reg)) { 2735 op1_reg = IR_REG_NUM(op1_reg); 2736 ir_emit_load(ctx, type, op1_reg, op1); 2737 } 2738 if (op2_reg != IR_REG_NONE) { 2739 if (IR_REG_SPILLED(op2_reg)) { 2740 op2_reg = IR_REG_NUM(op2_reg); 2741 if (op1 != op2) { 2742 ir_emit_load(ctx, type, op2_reg, op2); 2743 } 2744 } 2745 } 2746 if (IR_IS_CONST_REF(insn->op2) 2747 && !IR_IS_SYM_CONST(ctx->ir_base[insn->op2].op) 2748 && ctx->ir_base[insn->op2].val.u64 == 0) { 2749 if (op == IR_ULT) { 2750 /* always false */ 2751 ir_emit_load_imm_int(ctx, IR_BOOL, def_reg, 0); 2752 if (IR_REG_SPILLED(ctx->regs[def][0])) { 2753 ir_emit_store(ctx, insn->type, def, def_reg); 2754 } 2755 return; 2756 } else if (op == IR_UGE) { 2757 /* always true */ 2758 ir_emit_load_imm_int(ctx, IR_BOOL, def_reg, 1); 2759 if (IR_REG_SPILLED(ctx->regs[def][0])) { 2760 ir_emit_store(ctx, insn->type, def, def_reg); 2761 } 2762 return; 2763 } else if (op == IR_ULE) { 2764 op = IR_EQ; 2765 } else if (op == IR_UGT) { 2766 op = IR_NE; 2767 } 2768 } 2769 ir_emit_cmp_int_common(ctx, type, op1_reg, op1, op2_reg, op2); 2770 switch (op) { 2771 default: 2772 IR_ASSERT(0 && "NIY binary op"); 2773 case IR_EQ: 2774 | cset Rw(def_reg), eq 2775 break; 2776 case IR_NE: 2777 | cset Rw(def_reg), ne 2778 break; 2779 case IR_LT: 2780 | cset Rw(def_reg), lt 2781 break; 2782 case IR_GE: 2783 | cset Rw(def_reg), ge 2784 break; 2785 case IR_LE: 2786 | cset Rw(def_reg), le 2787 break; 2788 case IR_GT: 2789 | cset Rw(def_reg), gt 2790 break; 2791 case IR_ULT: 2792 | cset Rw(def_reg), lo 2793 break; 2794 case IR_UGE: 2795 | cset Rw(def_reg), hs 2796 break; 2797 case IR_ULE: 2798 | cset Rw(def_reg), ls 2799 break; 2800 case IR_UGT: 2801 | cset Rw(def_reg), hi 2802 break; 2803 } 2804 if (IR_REG_SPILLED(ctx->regs[def][0])) { 2805 ir_emit_store(ctx, insn->type, def, def_reg); 2806 } 2807} 2808 2809static ir_op ir_emit_cmp_fp_common(ir_ctx *ctx, ir_ref cmp_ref, ir_insn *cmp_insn) 2810{ 2811 ir_backend_data *data = ctx->data; 2812 dasm_State **Dst = &data->dasm_state; 2813 ir_type type = ctx->ir_base[cmp_insn->op1].type; 2814 ir_op op = cmp_insn->op; 2815 ir_ref op1, op2; 2816 ir_reg op1_reg, op2_reg; 2817 2818 if (op == IR_LT || op == IR_LE) { 2819 /* swap operands to avoid P flag check */ 2820 op ^= 3; 2821 op1 = cmp_insn->op2; 2822 op2 = cmp_insn->op1; 2823 op1_reg = ctx->regs[cmp_ref][2]; 2824 op2_reg = ctx->regs[cmp_ref][1]; 2825 } else { 2826 op1 = cmp_insn->op1; 2827 op2 = cmp_insn->op2; 2828 op1_reg = ctx->regs[cmp_ref][1]; 2829 op2_reg = ctx->regs[cmp_ref][2]; 2830 } 2831 2832 IR_ASSERT(op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); 2833 if (IR_REG_SPILLED(op1_reg)) { 2834 op1_reg = IR_REG_NUM(op1_reg); 2835 ir_emit_load(ctx, type, op1_reg, op1); 2836 } 2837 if (IR_REG_SPILLED(op2_reg)) { 2838 op2_reg = IR_REG_NUM(op2_reg); 2839 if (op1 != op2) { 2840 ir_emit_load(ctx, type, op2_reg, op2); 2841 } 2842 } 2843 if (type == IR_DOUBLE) { 2844 | fcmp Rd(op1_reg-IR_REG_FP_FIRST), Rd(op2_reg-IR_REG_FP_FIRST) 2845 } else { 2846 IR_ASSERT(type == IR_FLOAT); 2847 | fcmp Rs(op1_reg-IR_REG_FP_FIRST), Rs(op2_reg-IR_REG_FP_FIRST) 2848 } 2849 return op; 2850} 2851 2852static void ir_emit_cmp_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) 2853{ 2854 ir_backend_data *data = ctx->data; 2855 dasm_State **Dst = &data->dasm_state; 2856 ir_op op = ir_emit_cmp_fp_common(ctx, def, insn); 2857 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 2858//??? ir_reg tmp_reg = ctx->regs[def][3]; // TODO: take into account vs flag 2859 2860 IR_ASSERT(def_reg != IR_REG_NONE); 2861 switch (op) { 2862 default: 2863 IR_ASSERT(0 && "NIY binary op"); 2864 case IR_EQ: 2865 | cset Rw(def_reg), eq 2866 break; 2867 case IR_NE: 2868 | cset Rw(def_reg), ne 2869 break; 2870 case IR_LT: 2871 | cset Rw(def_reg), mi 2872 break; 2873 case IR_GE: 2874 | cset Rw(def_reg), ge 2875 break; 2876 case IR_LE: 2877 | cset Rw(def_reg), ls 2878 break; 2879 case IR_GT: 2880 | cset Rw(def_reg), gt 2881 break; 2882 case IR_ULT: 2883 | cset Rw(def_reg), lt 2884 break; 2885 case IR_UGE: 2886 | cset Rw(def_reg), hs 2887 break; 2888 case IR_ULE: 2889 | cset Rw(def_reg), le 2890 break; 2891 case IR_UGT: 2892 | cset Rw(def_reg), hi 2893 break; 2894 } 2895 if (IR_REG_SPILLED(ctx->regs[def][0])) { 2896 ir_emit_store(ctx, insn->type, def, def_reg); 2897 } 2898} 2899 2900static void ir_emit_jmp_true(ir_ctx *ctx, uint32_t b, ir_ref def, uint32_t next_block) 2901{ 2902 uint32_t true_block, false_block; 2903 ir_backend_data *data = ctx->data; 2904 dasm_State **Dst = &data->dasm_state; 2905 2906 ir_get_true_false_blocks(ctx, b, &true_block, &false_block); 2907 if (true_block != next_block) { 2908 | b =>true_block 2909 } 2910} 2911 2912static void ir_emit_jmp_false(ir_ctx *ctx, uint32_t b, ir_ref def, uint32_t next_block) 2913{ 2914 uint32_t true_block, false_block; 2915 ir_backend_data *data = ctx->data; 2916 dasm_State **Dst = &data->dasm_state; 2917 2918 ir_get_true_false_blocks(ctx, b, &true_block, &false_block); 2919 if (false_block != next_block) { 2920 | b =>false_block 2921 } 2922} 2923 2924static void ir_emit_jz(ir_ctx *ctx, uint32_t b, uint32_t next_block, uint8_t op, ir_type type, ir_reg reg) 2925{ 2926 uint32_t true_block, false_block; 2927 ir_backend_data *data = ctx->data; 2928 dasm_State **Dst = &data->dasm_state; 2929 2930 ir_get_true_false_blocks(ctx, b, &true_block, &false_block); 2931 if (true_block == next_block) { 2932 IR_ASSERT(op < IR_LT); 2933 op ^= 1; // reverse 2934 true_block = false_block; 2935 false_block = 0; 2936 } else if (false_block == next_block) { 2937 false_block = 0; 2938 } 2939 2940 if (op == IR_EQ) { 2941 if (ir_type_size[type] == 8) { 2942 | cbz Rx(reg), =>true_block 2943 } else { 2944 | cbz Rw(reg), =>true_block 2945 } 2946 } else { 2947 IR_ASSERT(op == IR_NE); 2948 if (ir_type_size[type] == 8) { 2949 | cbnz Rx(reg), =>true_block 2950 } else { 2951 | cbnz Rw(reg), =>true_block 2952 } 2953 } 2954 if (false_block) { 2955 | b =>false_block 2956 } 2957} 2958 2959static void ir_emit_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block, uint8_t op, bool int_cmp) 2960{ 2961 uint32_t true_block, false_block; 2962 ir_backend_data *data = ctx->data; 2963 dasm_State **Dst = &data->dasm_state; 2964 2965 ir_get_true_false_blocks(ctx, b, &true_block, &false_block); 2966 if (true_block == next_block) { 2967 /* swap to avoid unconditional JMP */ 2968 if (int_cmp || op == IR_EQ || op == IR_NE) { 2969 op ^= 1; // reverse 2970 } else { 2971 op ^= 5; // reverse 2972 } 2973 true_block = false_block; 2974 false_block = 0; 2975 } else if (false_block == next_block) { 2976 false_block = 0; 2977 } 2978 2979 if (int_cmp) { 2980 switch (op) { 2981 default: 2982 IR_ASSERT(0 && "NIY binary op"); 2983 case IR_EQ: 2984 | beq =>true_block 2985 break; 2986 case IR_NE: 2987 | bne =>true_block 2988 break; 2989 case IR_LT: 2990 | blt =>true_block 2991 break; 2992 case IR_GE: 2993 | bge =>true_block 2994 break; 2995 case IR_LE: 2996 | ble =>true_block 2997 break; 2998 case IR_GT: 2999 | bgt =>true_block 3000 break; 3001 case IR_ULT: 3002 | blo =>true_block 3003 break; 3004 case IR_UGE: 3005 | bhs =>true_block 3006 break; 3007 case IR_ULE: 3008 | bls =>true_block 3009 break; 3010 case IR_UGT: 3011 | bhi =>true_block 3012 break; 3013 } 3014 } else { 3015 switch (op) { 3016 default: 3017 IR_ASSERT(0 && "NIY binary op"); 3018 case IR_EQ: 3019 | beq =>true_block 3020 break; 3021 case IR_NE: 3022 | bne =>true_block 3023 break; 3024 case IR_LT: 3025 | bmi =>true_block 3026 break; 3027 case IR_GE: 3028 | bge =>true_block 3029 break; 3030 case IR_LE: 3031 | bls =>true_block 3032 break; 3033 case IR_GT: 3034 | bgt =>true_block 3035 break; 3036 case IR_ULT: 3037 | blt =>true_block 3038 break; 3039 case IR_UGE: 3040 | bhs =>true_block 3041 break; 3042 case IR_ULE: 3043 | ble =>true_block 3044 break; 3045 case IR_UGT: 3046 | bhi =>true_block 3047 break; 3048// case IR_ULT: fprintf(stderr, "\tjb .LL%d\n", true_block); break; 3049// case IR_UGE: fprintf(stderr, "\tjae .LL%d\n", true_block); break; 3050// case IR_ULE: fprintf(stderr, "\tjbe .LL%d\n", true_block); break; 3051// case IR_UGT: fprintf(stderr, "\tja .LL%d\n", true_block); break; 3052 } 3053 } 3054 if (false_block) { 3055 | b =>false_block 3056 } 3057} 3058 3059static void ir_emit_cmp_and_branch_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block) 3060{ 3061 ir_insn *cmp_insn = &ctx->ir_base[insn->op2]; 3062 ir_op op = cmp_insn->op; 3063 ir_type type = ctx->ir_base[cmp_insn->op1].type; 3064 ir_ref op1 = cmp_insn->op1; 3065 ir_ref op2 = cmp_insn->op2; 3066 ir_reg op1_reg = ctx->regs[insn->op2][1]; 3067 ir_reg op2_reg = ctx->regs[insn->op2][2]; 3068 3069 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 3070 op1_reg = IR_REG_NUM(op1_reg); 3071 ir_emit_load(ctx, type, op1_reg, op1); 3072 } 3073 if (op2_reg != IR_REG_NONE) { 3074 if (IR_REG_SPILLED(op2_reg)) { 3075 op2_reg = IR_REG_NUM(op2_reg); 3076 if (op1 != op2) { 3077 ir_emit_load(ctx, type, op2_reg, op2); 3078 } 3079 } 3080 } 3081 if (IR_IS_CONST_REF(op2) 3082 && !IR_IS_SYM_CONST(ctx->ir_base[op2].op) 3083 && ctx->ir_base[op2].val.u64 == 0) { 3084 if (op == IR_ULT) { 3085 /* always false */ 3086 ir_emit_jmp_false(ctx, b, def, next_block); 3087 return; 3088 } else if (op == IR_UGE) { 3089 /* always true */ 3090 ir_emit_jmp_true(ctx, b, def, next_block); 3091 return; 3092 } else if (op == IR_ULE) { 3093 op = IR_EQ; 3094 } else if (op == IR_UGT) { 3095 op = IR_NE; 3096 } 3097 if (op1_reg != IR_REG_NONE && (op == IR_EQ || op == IR_NE)) { 3098 ir_emit_jz(ctx, b, next_block, op, type, op1_reg); 3099 return; 3100 } 3101 } 3102 ir_emit_cmp_int_common(ctx, type, op1_reg, op1, op2_reg, op2); 3103 ir_emit_jcc(ctx, b, def, insn, next_block, op, 1); 3104} 3105 3106static void ir_emit_cmp_and_branch_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block) 3107{ 3108 ir_op op = ir_emit_cmp_fp_common(ctx, insn->op2, &ctx->ir_base[insn->op2]); 3109 ir_emit_jcc(ctx, b, def, insn, next_block, op, 0); 3110} 3111 3112static void ir_emit_if_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block) 3113{ 3114 ir_type type = ctx->ir_base[insn->op2].type; 3115 ir_reg op2_reg = ctx->regs[def][2]; 3116 ir_backend_data *data = ctx->data; 3117 dasm_State **Dst = &data->dasm_state; 3118 3119 if (IR_IS_CONST_REF(insn->op2)) { 3120 uint32_t true_block, false_block; 3121 3122 ir_get_true_false_blocks(ctx, b, &true_block, &false_block); 3123 if (ir_const_is_true(&ctx->ir_base[insn->op2])) { 3124 if (true_block != next_block) { 3125 | b =>true_block 3126 } 3127 } else { 3128 if (false_block != next_block) { 3129 | b =>false_block 3130 } 3131 } 3132 return; 3133 } else if (ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) { 3134 uint32_t true_block, false_block; 3135 3136 ir_get_true_false_blocks(ctx, b, &true_block, &false_block); 3137 if (true_block != next_block) { 3138 | b =>true_block 3139 } 3140 return; 3141 } 3142 IR_ASSERT(op2_reg != IR_REG_NONE); 3143 if (IR_REG_SPILLED(op2_reg)) { 3144 op2_reg = IR_REG_NUM(op2_reg); 3145 ir_emit_load(ctx, type, op2_reg, insn->op2); 3146 } 3147 | ASM_REG_IMM_OP cmp, type, op2_reg, 0 3148 ir_emit_jcc(ctx, b, def, insn, next_block, IR_NE, 1); 3149} 3150 3151static void ir_emit_cond(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3152{ 3153 ir_backend_data *data = ctx->data; 3154 dasm_State **Dst = &data->dasm_state; 3155 ir_type type = insn->type; 3156 ir_ref op1 = insn->op1; 3157 ir_ref op2 = insn->op2; 3158 ir_ref op3 = insn->op3; 3159 ir_type op1_type = ctx->ir_base[op1].type; 3160 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3161 ir_reg op1_reg = ctx->regs[def][1]; 3162 ir_reg op2_reg = ctx->regs[def][2]; 3163 ir_reg op3_reg = ctx->regs[def][3]; 3164 3165 IR_ASSERT(def_reg != IR_REG_NONE); 3166 3167 if (IR_REG_SPILLED(op2_reg)) { 3168 op2_reg = IR_REG_NUM(op2_reg); 3169 ir_emit_load(ctx, type, op2_reg, op2); 3170 if (op1 == op2) { 3171 op1_reg = op2_reg; 3172 } 3173 if (op3 == op2) { 3174 op3_reg = op2_reg; 3175 } 3176 } 3177 if (op3 != op2 && IR_REG_SPILLED(op3_reg)) { 3178 op3_reg = IR_REG_NUM(op3_reg); 3179 ir_emit_load(ctx, type, op3_reg, op3); 3180 if (op1 == op2) { 3181 op1_reg = op3_reg; 3182 } 3183 } 3184 if (op1 != op2 && op1 != op3 && IR_REG_SPILLED(op1_reg)) { 3185 op1_reg = IR_REG_NUM(op1_reg); 3186 ir_emit_load(ctx, op1_type, op1_reg, op1); 3187 } 3188 3189 if (IR_IS_TYPE_INT(op1_type)) { 3190 | ASM_REG_IMM_OP cmp, op1_type, op1_reg, 0 3191 } else{ 3192 | ASM_FP_REG_IMM_OP fcmp, op1_type, op1_reg, 0.0 3193 } 3194 3195 if (IR_IS_TYPE_INT(type)) { 3196 if (ir_type_size[type] == 8) { 3197 | csel Rx(def_reg), Rx(op2_reg), Rx(op3_reg), ne 3198 } else { 3199 | csel Rw(def_reg), Rw(op2_reg), Rw(op3_reg), ne 3200 } 3201 } else{ 3202 if (type == IR_DOUBLE) { 3203 | fcsel Rd(def_reg-IR_REG_FP_FIRST), Rd(op2_reg-IR_REG_FP_FIRST), Rd(op3_reg-IR_REG_FP_FIRST), ne 3204 } else { 3205 | fcsel Rs(def_reg-IR_REG_FP_FIRST), Rs(op2_reg-IR_REG_FP_FIRST), Rs(op3_reg-IR_REG_FP_FIRST), ne 3206 } 3207 } 3208 3209 if (IR_REG_SPILLED(ctx->regs[def][0])) { 3210 ir_emit_store(ctx, type, def, def_reg); 3211 } 3212} 3213 3214static void ir_emit_return_void(ir_ctx *ctx) 3215{ 3216 ir_backend_data *data = ctx->data; 3217 dasm_State **Dst = &data->dasm_state; 3218 3219 ir_emit_epilogue(ctx); 3220 | ret 3221} 3222 3223static void ir_emit_return_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn) 3224{ 3225 ir_reg op2_reg = ctx->regs[ref][2]; 3226 3227 if (op2_reg != IR_REG_INT_RET1) { 3228 ir_type type = ctx->ir_base[insn->op2].type; 3229 3230 if (op2_reg != IR_REG_NONE && !IR_REG_SPILLED(op2_reg)) { 3231 ir_emit_mov(ctx, type, IR_REG_INT_RET1, op2_reg); 3232 } else { 3233 ir_emit_load(ctx, type, IR_REG_INT_RET1, insn->op2); 3234 } 3235 } 3236 ir_emit_return_void(ctx); 3237} 3238 3239static void ir_emit_return_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn) 3240{ 3241 ir_reg op2_reg = ctx->regs[ref][2]; 3242 ir_type type = ctx->ir_base[insn->op2].type; 3243 3244 if (op2_reg != IR_REG_FP_RET1) { 3245 if (op2_reg != IR_REG_NONE && !IR_REG_SPILLED(op2_reg)) { 3246 ir_emit_fp_mov(ctx, type, IR_REG_FP_RET1, op2_reg); 3247 } else { 3248 ir_emit_load(ctx, type, IR_REG_FP_RET1, insn->op2); 3249 } 3250 } 3251 ir_emit_return_void(ctx); 3252} 3253 3254static void ir_emit_sext(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3255{ 3256 ir_type dst_type = insn->type; 3257 ir_type src_type = ctx->ir_base[insn->op1].type; 3258 ir_backend_data *data = ctx->data; 3259 dasm_State **Dst = &data->dasm_state; 3260 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3261 ir_reg op1_reg = ctx->regs[def][1]; 3262 3263 IR_ASSERT(IR_IS_TYPE_INT(src_type)); 3264 IR_ASSERT(IR_IS_TYPE_INT(dst_type)); 3265 IR_ASSERT(ir_type_size[dst_type] > ir_type_size[src_type]); 3266 IR_ASSERT(def_reg != IR_REG_NONE); 3267 if ((op1_reg != IR_REG_NONE) && IR_REG_SPILLED(op1_reg)) { 3268 op1_reg = IR_REG_NUM(op1_reg); 3269 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 3270 } 3271 3272 if (op1_reg != IR_REG_NONE) { 3273 if (ir_type_size[src_type] == 1) { 3274 if (ir_type_size[dst_type] == 2) { 3275 | sxtb Rw(def_reg), Rw(op1_reg) 3276 } else if (ir_type_size[dst_type] == 4) { 3277 | sxtb Rw(def_reg), Rw(op1_reg) 3278 } else { 3279 IR_ASSERT(ir_type_size[dst_type] == 8); 3280 | sxtb Rx(def_reg), Rx(op1_reg) 3281 } 3282 } else if (ir_type_size[src_type] == 2) { 3283 if (ir_type_size[dst_type] == 4) { 3284 | sxth Rw(def_reg), Rw(op1_reg) 3285 } else { 3286 IR_ASSERT(ir_type_size[dst_type] == 8); 3287 | sxth Rx(def_reg), Rx(op1_reg) 3288 } 3289 } else { 3290 IR_ASSERT(ir_type_size[src_type] == 4); 3291 IR_ASSERT(ir_type_size[dst_type] == 8); 3292 | sxtw Rx(def_reg), Rw(op1_reg) 3293 } 3294 } else if (IR_IS_CONST_REF(insn->op1)) { 3295 IR_ASSERT(0); 3296 } else { 3297 ir_reg fp; 3298 int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op1, &fp); 3299 3300 if (ir_type_size[src_type] == 1) { 3301 if (ir_type_size[dst_type] == 2) { 3302 | ldrsb Rw(def_reg), [Rx(fp), #offset] 3303 } else if (ir_type_size[dst_type] == 4) { 3304 | ldrsb Rw(def_reg), [Rx(fp), #offset] 3305 } else { 3306 IR_ASSERT(ir_type_size[dst_type] == 8); 3307 | ldrsb Rx(def_reg), [Rx(fp), #offset] 3308 } 3309 } else if (ir_type_size[src_type] == 2) { 3310 if (ir_type_size[dst_type] == 4) { 3311 | ldrsh Rw(def_reg), [Rx(fp), #offset] 3312 } else { 3313 IR_ASSERT(ir_type_size[dst_type] == 8); 3314 | ldrsh Rx(def_reg), [Rx(fp), #offset] 3315 } 3316 } else { 3317 IR_ASSERT(ir_type_size[src_type] == 4); 3318 IR_ASSERT(ir_type_size[dst_type] == 8); 3319 | ldrsw Rx(def_reg), [Rx(fp), #offset] 3320 } 3321 } 3322 if (IR_REG_SPILLED(ctx->regs[def][0])) { 3323 ir_emit_store(ctx, dst_type, def, def_reg); 3324 } 3325} 3326 3327static void ir_emit_zext(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3328{ 3329 ir_type dst_type = insn->type; 3330 ir_type src_type = ctx->ir_base[insn->op1].type; 3331 ir_backend_data *data = ctx->data; 3332 dasm_State **Dst = &data->dasm_state; 3333 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3334 ir_reg op1_reg = ctx->regs[def][1]; 3335 3336 IR_ASSERT(IR_IS_TYPE_INT(src_type)); 3337 IR_ASSERT(IR_IS_TYPE_INT(dst_type)); 3338 IR_ASSERT(ir_type_size[dst_type] > ir_type_size[src_type]); 3339 IR_ASSERT(def_reg != IR_REG_NONE); 3340 if ((op1_reg != IR_REG_NONE) && IR_REG_SPILLED(op1_reg)) { 3341 op1_reg = IR_REG_NUM(op1_reg); 3342 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 3343 } 3344 3345 if (op1_reg != IR_REG_NONE) { 3346 if (ir_type_size[src_type] == 1) { 3347 | uxtb Rw(def_reg), Rw(op1_reg) 3348 } else if (ir_type_size[src_type] == 2) { 3349 | uxth Rw(def_reg), Rw(op1_reg) 3350 } else { 3351 | mov Rw(def_reg), Rw(op1_reg) 3352 } 3353 } else if (IR_IS_CONST_REF(insn->op1)) { 3354 IR_ASSERT(0); 3355 } else { 3356 ir_reg fp; 3357 int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op1, &fp); 3358 3359 if (ir_type_size[src_type] == 1) { 3360 | ldrb Rw(def_reg), [Rx(fp), #offset] 3361 } else if (ir_type_size[src_type] == 2) { 3362 | ldrh Rw(def_reg), [Rx(fp), #offset] 3363 } else { 3364 IR_ASSERT(ir_type_size[src_type] == 4); 3365 IR_ASSERT(ir_type_size[dst_type] == 8); 3366 | ldr Rw(def_reg), [Rx(fp), #offset] 3367 } 3368 } 3369 if (IR_REG_SPILLED(ctx->regs[def][0])) { 3370 ir_emit_store(ctx, dst_type, def, def_reg); 3371 } 3372} 3373 3374static void ir_emit_trunc(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3375{ 3376 ir_backend_data *data = ctx->data; 3377 dasm_State **Dst = &data->dasm_state; 3378 ir_type dst_type = insn->type; 3379 ir_type src_type = ctx->ir_base[insn->op1].type; 3380 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3381 ir_reg op1_reg = ctx->regs[def][1]; 3382 3383 IR_ASSERT(IR_IS_TYPE_INT(src_type)); 3384 IR_ASSERT(IR_IS_TYPE_INT(dst_type)); 3385 IR_ASSERT(ir_type_size[dst_type] < ir_type_size[src_type]); 3386 IR_ASSERT(def_reg != IR_REG_NONE); 3387 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 3388 op1_reg = IR_REG_NUM(op1_reg); 3389 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 3390 } 3391 if (op1_reg != IR_REG_NONE) { 3392 if (ir_type_size[dst_type] == 1) { 3393 | and Rw(def_reg), Rw(op1_reg), #0xff 3394 } else if (ir_type_size[dst_type] == 2) { 3395 | and Rw(def_reg), Rw(op1_reg), #0xffff 3396 } else if (op1_reg != def_reg) { 3397 ir_emit_mov(ctx, dst_type, def_reg, op1_reg); 3398 } 3399 } else { 3400 ir_emit_load(ctx, dst_type, def_reg, insn->op1); 3401 } 3402 if (IR_REG_SPILLED(ctx->regs[def][0])) { 3403 ir_emit_store(ctx, dst_type, def, def_reg); 3404 } 3405} 3406 3407static void ir_emit_bitcast(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3408{ 3409 ir_type dst_type = insn->type; 3410 ir_type src_type = ctx->ir_base[insn->op1].type; 3411 ir_backend_data *data = ctx->data; 3412 dasm_State **Dst = &data->dasm_state; 3413 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3414 ir_reg op1_reg = ctx->regs[def][1]; 3415 3416 IR_ASSERT(ir_type_size[dst_type] == ir_type_size[src_type]); 3417 IR_ASSERT(def_reg != IR_REG_NONE); 3418 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 3419 op1_reg = IR_REG_NUM(op1_reg); 3420 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 3421 } 3422 if (IR_IS_TYPE_INT(src_type) && IR_IS_TYPE_INT(dst_type)) { 3423 if (op1_reg != IR_REG_NONE) { 3424 if (IR_REG_SPILLED(op1_reg)) { 3425 op1_reg = IR_REG_NUM(op1_reg); 3426 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 3427 } 3428 if (op1_reg != def_reg) { 3429 ir_emit_mov(ctx, dst_type, def_reg, op1_reg); 3430 } 3431 } else { 3432 ir_emit_load(ctx, dst_type, def_reg, insn->op1); 3433 } 3434 } else if (IR_IS_TYPE_FP(src_type) && IR_IS_TYPE_FP(dst_type)) { 3435 if (op1_reg != IR_REG_NONE) { 3436 if (IR_REG_SPILLED(op1_reg)) { 3437 op1_reg = IR_REG_NUM(op1_reg); 3438 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 3439 } 3440 if (op1_reg != def_reg) { 3441 ir_emit_fp_mov(ctx, dst_type, def_reg, op1_reg); 3442 } 3443 } else { 3444 ir_emit_load(ctx, dst_type, def_reg, insn->op1); 3445 } 3446 } else if (IR_IS_TYPE_FP(src_type)) { 3447 IR_ASSERT(IR_IS_TYPE_INT(dst_type)); 3448 if (op1_reg != IR_REG_NONE) { 3449 if (IR_REG_SPILLED(op1_reg)) { 3450 op1_reg = IR_REG_NUM(op1_reg); 3451 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 3452 } 3453 if (src_type == IR_DOUBLE) { 3454 | fmov Rx(def_reg), Rd(op1_reg-IR_REG_FP_FIRST) 3455 } else { 3456 IR_ASSERT(src_type == IR_FLOAT); 3457 | fmov Rw(def_reg), Rs(op1_reg-IR_REG_FP_FIRST) 3458 } 3459 } else if (IR_IS_CONST_REF(insn->op1)) { 3460 IR_ASSERT(0); //??? 3461 } else { 3462 ir_reg fp; 3463 int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op1, &fp); 3464 3465 if (src_type == IR_DOUBLE) { 3466 | ldr Rx(def_reg), [Rx(fp), #offset] 3467 } else { 3468 IR_ASSERT(src_type == IR_FLOAT); 3469 | ldr Rw(def_reg), [Rx(fp), #offset] 3470 } 3471 } 3472 } else if (IR_IS_TYPE_FP(dst_type)) { 3473 IR_ASSERT(IR_IS_TYPE_INT(src_type)); 3474 if (op1_reg != IR_REG_NONE) { 3475 if (IR_REG_SPILLED(op1_reg)) { 3476 op1_reg = IR_REG_NUM(op1_reg); 3477 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 3478 } 3479 if (dst_type == IR_DOUBLE) { 3480 | fmov Rd(def_reg-IR_REG_FP_FIRST), Rx(op1_reg) 3481 } else { 3482 IR_ASSERT(dst_type == IR_FLOAT); 3483 | fmov Rs(def_reg-IR_REG_FP_FIRST), Rw(op1_reg) 3484 } 3485 } else if (IR_IS_CONST_REF(insn->op1)) { 3486 IR_ASSERT(0); //??? 3487 } else { 3488 ir_reg fp; 3489 int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op1, &fp); 3490 3491 if (dst_type == IR_DOUBLE) { 3492 | ldr Rd(def_reg), [Rx(fp), #offset] 3493 } else { 3494 IR_ASSERT(src_type == IR_FLOAT); 3495 | ldr Rs(def_reg), [Rx(fp), #offset] 3496 } 3497 } 3498 } 3499 if (IR_REG_SPILLED(ctx->regs[def][0])) { 3500 ir_emit_store(ctx, dst_type, def, def_reg); 3501 } 3502} 3503 3504static void ir_emit_int2fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3505{ 3506 ir_type dst_type = insn->type; 3507 ir_type src_type = ctx->ir_base[insn->op1].type; 3508 ir_backend_data *data = ctx->data; 3509 dasm_State **Dst = &data->dasm_state; 3510 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3511 ir_reg op1_reg = ctx->regs[def][1]; 3512 3513 IR_ASSERT(IR_IS_TYPE_INT(src_type)); 3514 IR_ASSERT(IR_IS_TYPE_FP(dst_type)); 3515 IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); 3516 if (IR_REG_SPILLED(op1_reg)) { 3517 op1_reg = IR_REG_NUM(op1_reg); 3518 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 3519 } 3520 3521 if (ir_type_size[src_type] == 8) { 3522 if (IR_IS_TYPE_SIGNED(src_type)) { 3523 if (dst_type == IR_DOUBLE) { 3524 | scvtf Rd(def_reg-IR_REG_FP_FIRST), Rx(op1_reg) 3525 } else { 3526 IR_ASSERT(dst_type == IR_FLOAT); 3527 | scvtf Rs(def_reg-IR_REG_FP_FIRST), Rx(op1_reg) 3528 } 3529 } else { 3530 if (dst_type == IR_DOUBLE) { 3531 | ucvtf Rd(def_reg-IR_REG_FP_FIRST), Rx(op1_reg) 3532 } else { 3533 IR_ASSERT(dst_type == IR_FLOAT); 3534 | ucvtf Rs(def_reg-IR_REG_FP_FIRST), Rx(op1_reg) 3535 } 3536 } 3537 } else { 3538 if (IR_IS_TYPE_SIGNED(src_type)) { 3539 if (ir_type_size[src_type] == 2) { 3540 ir_emit_fix_type(ctx, IR_I16, op1_reg); 3541 } else if (ir_type_size[src_type] == 1) { 3542 ir_emit_fix_type(ctx, IR_I8, op1_reg); 3543 } 3544 if (dst_type == IR_DOUBLE) { 3545 | scvtf Rd(def_reg-IR_REG_FP_FIRST), Rw(op1_reg) 3546 } else { 3547 IR_ASSERT(dst_type == IR_FLOAT); 3548 | scvtf Rs(def_reg-IR_REG_FP_FIRST), Rw(op1_reg) 3549 } 3550 } else { 3551 if (ir_type_size[src_type] == 2) { 3552 ir_emit_fix_type(ctx, IR_U16, op1_reg); 3553 } else if (ir_type_size[src_type] == 1) { 3554 ir_emit_fix_type(ctx, IR_U8, op1_reg); 3555 } 3556 if (dst_type == IR_DOUBLE) { 3557 | ucvtf Rd(def_reg-IR_REG_FP_FIRST), Rw(op1_reg) 3558 } else { 3559 IR_ASSERT(dst_type == IR_FLOAT); 3560 | ucvtf Rs(def_reg-IR_REG_FP_FIRST), Rw(op1_reg) 3561 } 3562 } 3563 } 3564 if (IR_REG_SPILLED(ctx->regs[def][0])) { 3565 ir_emit_store(ctx, dst_type, def, def_reg); 3566 } 3567} 3568 3569static void ir_emit_fp2int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3570{ 3571 ir_type dst_type = insn->type; 3572 ir_type src_type = ctx->ir_base[insn->op1].type; 3573 ir_backend_data *data = ctx->data; 3574 dasm_State **Dst = &data->dasm_state; 3575 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3576 ir_reg op1_reg = ctx->regs[def][1]; 3577 3578 IR_ASSERT(IR_IS_TYPE_FP(src_type)); 3579 IR_ASSERT(IR_IS_TYPE_INT(dst_type)); 3580 IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); 3581 if (IR_REG_SPILLED(op1_reg)) { 3582 op1_reg = IR_REG_NUM(op1_reg); 3583 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 3584 } 3585 if (ir_type_size[dst_type] == 8) { 3586 if (IR_IS_TYPE_SIGNED(dst_type)) { 3587 if (src_type == IR_DOUBLE) { 3588 | fcvtzs Rx(def_reg), Rd(op1_reg-IR_REG_FP_FIRST) 3589 } else { 3590 IR_ASSERT(src_type == IR_FLOAT); 3591 | fcvtzs Rx(def_reg), Rs(op1_reg-IR_REG_FP_FIRST) 3592 } 3593 } else { 3594 if (src_type == IR_DOUBLE) { 3595 | fcvtzu Rx(def_reg), Rd(op1_reg-IR_REG_FP_FIRST) 3596 } else { 3597 IR_ASSERT(src_type == IR_FLOAT); 3598 | fcvtzu Rx(def_reg), Rs(op1_reg-IR_REG_FP_FIRST) 3599 } 3600 } 3601 } else { 3602 if (IR_IS_TYPE_SIGNED(dst_type)) { 3603 if (src_type == IR_DOUBLE) { 3604 | fcvtzs Rw(def_reg), Rd(op1_reg-IR_REG_FP_FIRST) 3605 } else { 3606 IR_ASSERT(src_type == IR_FLOAT); 3607 | fcvtzs Rw(def_reg), Rs(op1_reg-IR_REG_FP_FIRST) 3608 } 3609 } else { 3610 if (src_type == IR_DOUBLE) { 3611 | fcvtzu Rw(def_reg), Rd(op1_reg-IR_REG_FP_FIRST) 3612 } else { 3613 IR_ASSERT(src_type == IR_FLOAT); 3614 | fcvtzu Rw(def_reg), Rs(op1_reg-IR_REG_FP_FIRST) 3615 } 3616 } 3617 } 3618 if (IR_REG_SPILLED(ctx->regs[def][0])) { 3619 ir_emit_store(ctx, dst_type, def, def_reg); 3620 } 3621} 3622 3623static void ir_emit_fp2fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3624{ 3625 ir_type dst_type = insn->type; 3626 ir_type src_type = ctx->ir_base[insn->op1].type; 3627 ir_backend_data *data = ctx->data; 3628 dasm_State **Dst = &data->dasm_state; 3629 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3630 ir_reg op1_reg = ctx->regs[def][1]; 3631 3632 IR_ASSERT(IR_IS_TYPE_FP(src_type)); 3633 IR_ASSERT(IR_IS_TYPE_FP(dst_type)); 3634 IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); 3635 if (IR_REG_SPILLED(op1_reg)) { 3636 op1_reg = IR_REG_NUM(op1_reg); 3637 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 3638 } 3639 if (src_type == dst_type) { 3640 if (op1_reg != def_reg) { 3641 ir_emit_fp_mov(ctx, dst_type, def_reg, op1_reg); 3642 } 3643 } else if (src_type == IR_DOUBLE) { 3644 | fcvt Rs(def_reg-IR_REG_FP_FIRST), Rd(op1_reg-IR_REG_FP_FIRST) 3645 } else { 3646 IR_ASSERT(src_type == IR_FLOAT); 3647 | fcvt Rd(def_reg-IR_REG_FP_FIRST), Rs(op1_reg-IR_REG_FP_FIRST) 3648 } 3649 if (IR_REG_SPILLED(ctx->regs[def][0])) { 3650 ir_emit_store(ctx, dst_type, def, def_reg); 3651 } 3652} 3653 3654static void ir_emit_copy_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3655{ 3656 ir_ref type = insn->type; 3657 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3658 ir_reg op1_reg = ctx->regs[def][1]; 3659 3660 IR_ASSERT(def_reg != IR_REG_NONE || op1_reg != IR_REG_NONE); 3661 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 3662 op1_reg = IR_REG_NUM(op1_reg); 3663 ir_emit_load(ctx, type, op1_reg, insn->op1); 3664 } 3665 if (def_reg == op1_reg) { 3666 /* same reg */ 3667 } else if (def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE) { 3668 ir_emit_mov(ctx, type, def_reg, op1_reg); 3669 } else if (def_reg != IR_REG_NONE) { 3670 ir_emit_load(ctx, type, def_reg, insn->op1); 3671 } else if (op1_reg != IR_REG_NONE) { 3672 ir_emit_store(ctx, type, def, op1_reg); 3673 } else { 3674 IR_ASSERT(0); 3675 } 3676 if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) { 3677 ir_emit_store(ctx, type, def, def_reg); 3678 } 3679} 3680 3681static void ir_emit_copy_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3682{ 3683 ir_type type = insn->type; 3684 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3685 ir_reg op1_reg = ctx->regs[def][1]; 3686 3687 IR_ASSERT(def_reg != IR_REG_NONE || op1_reg != IR_REG_NONE); 3688 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 3689 op1_reg = IR_REG_NUM(op1_reg); 3690 ir_emit_load(ctx, type, op1_reg, insn->op1); 3691 } 3692 if (def_reg == op1_reg) { 3693 /* same reg */ 3694 } else if (def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE) { 3695 ir_emit_fp_mov(ctx, type, def_reg, op1_reg); 3696 } else if (def_reg != IR_REG_NONE) { 3697 ir_emit_load(ctx, type, def_reg, insn->op1); 3698 } else if (op1_reg != IR_REG_NONE) { 3699 ir_emit_store(ctx, type, def, op1_reg); 3700 } else { 3701 IR_ASSERT(0); 3702 } 3703 if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) { 3704 ir_emit_store(ctx, type, def, def_reg); 3705 } 3706} 3707 3708static void ir_emit_vaddr(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3709{ 3710 ir_backend_data *data = ctx->data; 3711 dasm_State **Dst = &data->dasm_state; 3712 ir_ref type = insn->type; 3713 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3714 int32_t offset; 3715 ir_reg fp; 3716 3717 IR_ASSERT(def_reg != IR_REG_NONE); 3718 offset = ir_var_spill_slot(ctx, insn->op1, &fp); 3719 | add Rx(def_reg), Rx(fp), #offset 3720 if (IR_REG_SPILLED(ctx->regs[def][0])) { 3721 ir_emit_store(ctx, type, def, def_reg); 3722 } 3723} 3724 3725static void ir_emit_vload(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3726{ 3727 ir_insn *var_insn = &ctx->ir_base[insn->op2]; 3728 ir_ref type = insn->type; 3729 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3730 ir_reg fp; 3731 int32_t offset; 3732 ir_mem mem; 3733 3734 IR_ASSERT(var_insn->op == IR_VAR); 3735 fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 3736 offset = IR_SPILL_POS_TO_OFFSET(var_insn->op3); 3737 mem = IR_MEM_BO(fp, offset); 3738 if (def_reg == IR_REG_NONE && ir_is_same_mem_var(ctx, def, var_insn->op3)) { 3739 return; // fake load 3740 } 3741 IR_ASSERT(def_reg != IR_REG_NONE); 3742 ir_emit_load_mem(ctx, type, def_reg, mem); 3743 if (IR_REG_SPILLED(ctx->regs[def][0])) { 3744 ir_emit_store(ctx, type, def, def_reg); 3745 } 3746} 3747 3748static void ir_emit_vstore(ir_ctx *ctx, ir_ref ref, ir_insn *insn) 3749{ 3750 ir_insn *var_insn = &ctx->ir_base[insn->op2]; 3751 ir_insn *val_insn = &ctx->ir_base[insn->op3]; 3752 ir_ref type = val_insn->type; 3753 ir_reg op3_reg = ctx->regs[ref][3]; 3754 ir_reg fp; 3755 int32_t offset; 3756 ir_mem mem; 3757 3758 IR_ASSERT(var_insn->op == IR_VAR); 3759 fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 3760 offset = IR_SPILL_POS_TO_OFFSET(var_insn->op3); 3761 IR_ASSERT(op3_reg != IR_REG_NONE); 3762 if (IR_REG_SPILLED(op3_reg) 3763 && !IR_IS_CONST_REF(insn->op3) 3764 && ir_rule(ctx, insn->op3) != IR_STATIC_ALLOCA 3765 && ir_is_same_mem_var(ctx, insn->op3, var_insn->op3)) { 3766 return; // fake store 3767 } 3768 if (IR_REG_SPILLED(op3_reg)) { 3769 op3_reg = IR_REG_NUM(op3_reg); 3770 ir_emit_load(ctx, type, op3_reg, insn->op3); 3771 } 3772 mem = IR_MEM_BO(fp, offset); 3773 ir_emit_store_mem(ctx, type, mem, op3_reg); 3774} 3775 3776static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref) 3777{ 3778 ir_insn *addr_insn = &ctx->ir_base[ref]; 3779 ir_reg reg; 3780 int32_t offset; 3781 3782 if (addr_insn->op == IR_ADD) { 3783 IR_ASSERT(!IR_IS_CONST_REF(addr_insn->op1) && IR_IS_CONST_REF(addr_insn->op2)); 3784 IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[addr_insn->op2].op)); 3785 if (ir_rule(ctx, addr_insn->op1) == IR_STATIC_ALLOCA) { 3786 reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 3787 offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[addr_insn->op1].op3); 3788 offset += ctx->ir_base[addr_insn->op2].val.i32; 3789 return IR_MEM_BO(reg, offset); 3790 } else { 3791 if (UNEXPECTED(ctx->rules[ref] & IR_FUSED_REG)) { 3792 reg = ir_get_fused_reg(ctx, root, ref * sizeof(ir_ref) + 1); 3793 } else { 3794 reg = ctx->regs[ref][1]; 3795 } 3796 if (IR_REG_SPILLED(reg)) { 3797 reg = IR_REG_NUM(reg); 3798 ir_emit_load(ctx, IR_ADDR, reg, addr_insn->op1); 3799 } 3800 return IR_MEM_BO(reg, ctx->ir_base[addr_insn->op2].val.i32); 3801 } 3802 } else { 3803 IR_ASSERT(addr_insn->op == IR_ALLOCA || addr_insn->op == IR_VADDR); 3804 reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 3805 offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[ref].op3); 3806 return IR_MEM_BO(reg, offset); 3807 } 3808} 3809 3810static void ir_emit_load_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3811{ 3812 ir_ref type = insn->type; 3813 ir_reg op2_reg = ctx->regs[def][2]; 3814 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3815 ir_mem mem; 3816 3817 if (ctx->use_lists[def].count == 1) { 3818 /* dead load */ 3819 return; 3820 } 3821 IR_ASSERT(def_reg != IR_REG_NONE); 3822 if (op2_reg != IR_REG_NONE) { 3823 if (IR_REG_SPILLED(op2_reg)) { 3824 op2_reg = IR_REG_NUM(op2_reg); 3825 IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); 3826 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 3827 } 3828 mem = IR_MEM_B(op2_reg); 3829 } else if (IR_IS_CONST_REF(insn->op2)) { 3830 op2_reg = def_reg; 3831 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 3832 mem = IR_MEM_B(op2_reg); 3833 } else { 3834 IR_ASSERT(ir_rule(ctx, insn->op2) & IR_FUSED); 3835 mem = ir_fuse_addr(ctx, def, insn->op2); 3836 if (IR_REG_SPILLED(ctx->regs[def][0]) && ir_is_same_spill_slot(ctx, def, mem)) { 3837 if (!ir_may_avoid_spill_load(ctx, def, def)) { 3838 ir_emit_load_mem_int(ctx, type, def_reg, mem); 3839 } 3840 /* avoid load to the same location (valid only when register is not reused) */ 3841 return; 3842 } 3843 } 3844 ir_emit_load_mem_int(ctx, type, def_reg, mem); 3845 if (IR_REG_SPILLED(ctx->regs[def][0])) { 3846 ir_emit_store(ctx, type, def, def_reg); 3847 } 3848} 3849 3850static void ir_emit_load_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3851{ 3852 ir_ref type = insn->type; 3853 ir_reg op2_reg = ctx->regs[def][2]; 3854 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3855 ir_mem mem; 3856 3857 if (ctx->use_lists[def].count == 1) { 3858 /* dead load */ 3859 return; 3860 } 3861 IR_ASSERT(def_reg != IR_REG_NONE); 3862 if (op2_reg != IR_REG_NONE) { 3863 if (IR_REG_SPILLED(op2_reg)) { 3864 op2_reg = IR_REG_NUM(op2_reg); 3865 IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); 3866 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 3867 } 3868 mem = IR_MEM_B(op2_reg); 3869 } else if (IR_IS_CONST_REF(insn->op2)) { 3870 op2_reg = def_reg; 3871 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 3872 mem = IR_MEM_B(op2_reg); 3873 } else { 3874 IR_ASSERT(ir_rule(ctx, insn->op2) & IR_FUSED); 3875 mem = ir_fuse_addr(ctx, def, insn->op2); 3876 if (IR_REG_SPILLED(ctx->regs[def][0]) && ir_is_same_spill_slot(ctx, def, mem)) { 3877 if (!ir_may_avoid_spill_load(ctx, def, def)) { 3878 ir_emit_load_mem_fp(ctx, type, def_reg, mem); 3879 } 3880 /* avoid load to the same location (valid only when register is not reused) */ 3881 return; 3882 } 3883 } 3884 ir_emit_load_mem_fp(ctx, type, def_reg, mem); 3885 if (IR_REG_SPILLED(ctx->regs[def][0])) { 3886 ir_emit_store(ctx, type, def, def_reg); 3887 } 3888} 3889 3890static void ir_emit_store_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn) 3891{ 3892 ir_insn *val_insn = &ctx->ir_base[insn->op3]; 3893 ir_ref type = val_insn->type; 3894 ir_reg op2_reg = ctx->regs[ref][2]; 3895 ir_reg op3_reg = ctx->regs[ref][3]; 3896 ir_mem mem; 3897 3898 if (op2_reg != IR_REG_NONE) { 3899 if (IR_REG_SPILLED(op2_reg)) { 3900 op2_reg = IR_REG_NUM(op2_reg); 3901 IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); 3902 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 3903 } 3904 mem = IR_MEM_B(op2_reg); 3905 } else { 3906 IR_ASSERT(!IR_IS_CONST_REF(insn->op2) && (ir_rule(ctx, insn->op2) & IR_FUSED)); 3907 mem = ir_fuse_addr(ctx, ref, insn->op2); 3908 if (!IR_IS_CONST_REF(insn->op3) 3909 && IR_REG_SPILLED(op3_reg) 3910 && ir_rule(ctx, insn->op3) != IR_STATIC_ALLOCA 3911 && ir_is_same_spill_slot(ctx, insn->op3, mem)) { 3912 if (!ir_may_avoid_spill_load(ctx, insn->op3, ref)) { 3913 op3_reg = IR_REG_NUM(op3_reg); 3914 ir_emit_load(ctx, type, op3_reg, insn->op3); 3915 } 3916 /* avoid store to the same location */ 3917 return; 3918 } 3919 } 3920 3921 if (op3_reg != IR_REG_NONE) { 3922 if (IR_REG_SPILLED(op3_reg)) { 3923 op3_reg = IR_REG_NUM(op3_reg); 3924 ir_emit_load(ctx, type, op3_reg, insn->op3); 3925 } 3926 } else { 3927 IR_ASSERT(IR_IS_CONST_REF(insn->op3) && !IR_IS_SYM_CONST(ctx->ir_base[insn->op3].op) && ctx->ir_base[insn->op3].val.i64 == 0); 3928 op3_reg = IR_REG_ZR; 3929 } 3930 ir_emit_store_mem_int(ctx, type, mem, op3_reg); 3931} 3932 3933static void ir_emit_store_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn) 3934{ 3935 ir_ref type = ctx->ir_base[insn->op3].type; 3936 ir_reg op2_reg = ctx->regs[ref][2]; 3937 ir_reg op3_reg = ctx->regs[ref][3]; 3938 ir_mem mem; 3939 3940 IR_ASSERT(op3_reg != IR_REG_NONE); 3941 if (op2_reg != IR_REG_NONE) { 3942 if (IR_REG_SPILLED(op2_reg)) { 3943 op2_reg = IR_REG_NUM(op2_reg); 3944 IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); 3945 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 3946 } 3947 mem = IR_MEM_B(op2_reg); 3948 } else { 3949 IR_ASSERT(!IR_IS_CONST_REF(insn->op2) && (ir_rule(ctx, insn->op2) & IR_FUSED)); 3950 mem = ir_fuse_addr(ctx, ref, insn->op2); 3951 if (!IR_IS_CONST_REF(insn->op3) 3952 && IR_REG_SPILLED(op3_reg) 3953 && ir_rule(ctx, insn->op3) != IR_STATIC_ALLOCA 3954 && ir_is_same_spill_slot(ctx, insn->op3, mem)) { 3955 if (!ir_may_avoid_spill_load(ctx, insn->op3, ref)) { 3956 op3_reg = IR_REG_NUM(op3_reg); 3957 ir_emit_load(ctx, type, op3_reg, insn->op3); 3958 } 3959 /* avoid store to the same location */ 3960 return; 3961 } 3962 } 3963 3964 if (IR_REG_SPILLED(op3_reg)) { 3965 op3_reg = IR_REG_NUM(op3_reg); 3966 ir_emit_load(ctx, type, op3_reg, insn->op3); 3967 } 3968 ir_emit_store_mem_fp(ctx, type, mem, op3_reg); 3969} 3970 3971static void ir_emit_rload(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3972{ 3973 ir_reg src_reg = insn->op2; 3974 ir_type type = insn->type; 3975 3976 if (IR_REGSET_IN(IR_REGSET_UNION((ir_regset)ctx->fixed_regset, IR_REGSET_FIXED), src_reg)) { 3977 if (ctx->vregs[def] 3978 && ctx->live_intervals[ctx->vregs[def]] 3979 && ctx->live_intervals[ctx->vregs[def]]->stack_spill_pos != -1) { 3980 ir_emit_store(ctx, type, def, src_reg); 3981 } 3982 } else { 3983 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3984 3985 if (def_reg == IR_REG_NONE) { 3986 /* op3 is used as a flag that the value is already stored in memory. 3987 * If op3 is set we don't have to store the value once again (in case of spilling) 3988 */ 3989 if (!insn->op3 || !ir_is_same_spill_slot(ctx, def, IR_MEM_BO(ctx->spill_base, insn->op3))) { 3990 ir_emit_store(ctx, type, def, src_reg); 3991 } 3992 } else { 3993 if (src_reg != def_reg) { 3994 if (IR_IS_TYPE_INT(type)) { 3995 ir_emit_mov(ctx, type, def_reg, src_reg); 3996 } else { 3997 IR_ASSERT(IR_IS_TYPE_FP(type)); 3998 ir_emit_fp_mov(ctx, type, def_reg, src_reg); 3999 } 4000 } 4001 if (IR_REG_SPILLED(ctx->regs[def][0]) 4002 && (!insn->op3 || !ir_is_same_spill_slot(ctx, def, IR_MEM_BO(ctx->spill_base, insn->op3)))) { 4003 ir_emit_store(ctx, type, def, def_reg); 4004 } 4005 } 4006 } 4007} 4008 4009static void ir_emit_rstore(ir_ctx *ctx, ir_ref ref, ir_insn *insn) 4010{ 4011 ir_ref type = ctx->ir_base[insn->op2].type; 4012 ir_reg op2_reg = ctx->regs[ref][2]; 4013 ir_reg dst_reg = insn->op3; 4014 4015 if (op2_reg != IR_REG_NONE) { 4016 if (IR_REG_SPILLED(op2_reg)) { 4017 op2_reg = IR_REG_NUM(op2_reg); 4018 ir_emit_load(ctx, type, op2_reg, insn->op2); 4019 } 4020 if (op2_reg != dst_reg) { 4021 if (IR_IS_TYPE_INT(type)) { 4022 ir_emit_mov(ctx, type, dst_reg, op2_reg); 4023 } else { 4024 IR_ASSERT(IR_IS_TYPE_FP(type)); 4025 ir_emit_fp_mov(ctx, type, dst_reg, op2_reg); 4026 } 4027 } 4028 } else { 4029 ir_emit_load(ctx, type, dst_reg, insn->op2); 4030 } 4031} 4032 4033static void ir_emit_alloca(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4034{ 4035 ir_backend_data *data = ctx->data; 4036 dasm_State **Dst = &data->dasm_state; 4037 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 4038 4039 if (ctx->use_lists[def].count == 1) { 4040 /* dead alloca */ 4041 return; 4042 } 4043 if (IR_IS_CONST_REF(insn->op2)) { 4044 ir_insn *val = &ctx->ir_base[insn->op2]; 4045 int32_t size = val->val.i32; 4046 4047 IR_ASSERT(IR_IS_TYPE_INT(val->type)); 4048 IR_ASSERT(!IR_IS_SYM_CONST(val->op)); 4049 IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 >= 0); 4050 4051 /* Stack must be 16 byte aligned */ 4052 size = IR_ALIGNED_SIZE(size, 16); 4053 if (aarch64_may_encode_imm12(size)) { 4054 | sub sp, sp, #size 4055 } else { 4056 ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, size); 4057 | sub sp, sp, Rx(IR_REG_INT_TMP) 4058 } 4059 if (!(ctx->flags & IR_USE_FRAME_POINTER)) { 4060 ctx->call_stack_size += size; 4061 } 4062 } else { 4063 int32_t alignment = 16; 4064 ir_reg op2_reg = ctx->regs[def][2]; 4065 ir_type type = ctx->ir_base[insn->op2].type; 4066 4067 IR_ASSERT(ctx->flags & IR_FUNCTION); 4068 IR_ASSERT(ctx->flags & IR_USE_FRAME_POINTER); 4069 IR_ASSERT(def_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); 4070 if (IR_REG_SPILLED(op2_reg)) { 4071 op2_reg = IR_REG_NUM(op2_reg); 4072 ir_emit_load(ctx, type, op2_reg, insn->op2); 4073 } 4074 | add Rx(def_reg), Rx(op2_reg), #(alignment-1) 4075 | and Rx(def_reg), Rx(def_reg), #(~(alignment-1)) 4076 | sub sp, sp, Rx(def_reg); 4077 } 4078 if (def_reg != IR_REG_NONE) { 4079 | mov Rx(def_reg), sp 4080 if (IR_REG_SPILLED(ctx->regs[def][0])) { 4081 ir_emit_store(ctx, insn->type, def, def_reg); 4082 } 4083 } else { 4084 ir_emit_store(ctx, IR_ADDR, def, IR_REG_STACK_POINTER); 4085 } 4086} 4087 4088static void ir_emit_afree(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4089{ 4090 ir_backend_data *data = ctx->data; 4091 dasm_State **Dst = &data->dasm_state; 4092 4093 if (IR_IS_CONST_REF(insn->op2)) { 4094 ir_insn *val = &ctx->ir_base[insn->op2]; 4095 int32_t size = val->val.i32; 4096 4097 IR_ASSERT(IR_IS_TYPE_INT(val->type)); 4098 IR_ASSERT(!IR_IS_SYM_CONST(val->op)); 4099 IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 > 0); 4100 4101 /* Stack must be 16 byte aligned */ 4102 size = IR_ALIGNED_SIZE(size, 16); 4103 | add sp, sp, #size 4104 if (!(ctx->flags & IR_USE_FRAME_POINTER)) { 4105 ctx->call_stack_size -= size; 4106 } 4107 } else { 4108// int32_t alignment = 16; 4109 ir_reg op2_reg = ctx->regs[def][2]; 4110 ir_type type = ctx->ir_base[insn->op2].type; 4111 4112 IR_ASSERT(ctx->flags & IR_FUNCTION); 4113 IR_ASSERT(op2_reg != IR_REG_NONE); 4114 if (IR_REG_SPILLED(op2_reg)) { 4115 op2_reg = IR_REG_NUM(op2_reg); 4116 ir_emit_load(ctx, type, op2_reg, insn->op2); 4117 } 4118 4119 // TODO: alignment 4120 4121 | add sp, sp, Rx(op2_reg); 4122 } 4123} 4124 4125static void ir_emit_block_begin(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4126{ 4127 ir_backend_data *data = ctx->data; 4128 dasm_State **Dst = &data->dasm_state; 4129 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 4130 4131 | mov Rx(def_reg), sp 4132 4133 if (IR_REG_SPILLED(ctx->regs[def][0])) { 4134 ir_emit_store(ctx, IR_ADDR, def, def_reg); 4135 } 4136} 4137 4138static void ir_emit_block_end(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4139{ 4140 ir_backend_data *data = ctx->data; 4141 dasm_State **Dst = &data->dasm_state; 4142 ir_reg op2_reg = ctx->regs[def][2]; 4143 4144 IR_ASSERT(op2_reg != IR_REG_NONE); 4145 if (IR_REG_SPILLED(op2_reg)) { 4146 op2_reg = IR_REG_NUM(op2_reg); 4147 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 4148 } 4149 4150 | mov sp, Rx(op2_reg) 4151} 4152 4153static void ir_emit_frame_addr(ir_ctx *ctx, ir_ref def) 4154{ 4155 ir_backend_data *data = ctx->data; 4156 dasm_State **Dst = &data->dasm_state; 4157 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 4158 4159 if (ctx->flags & IR_USE_FRAME_POINTER) { 4160 | mov Rx(def_reg), Rx(IR_REG_X29) 4161 } else { 4162 | add Rx(def_reg), Rx(IR_REG_X31), #(ctx->stack_frame_size + ctx->call_stack_size) 4163 } 4164 if (IR_REG_SPILLED(ctx->regs[def][0])) { 4165 ir_emit_store(ctx, IR_ADDR, def, def_reg); 4166 } 4167} 4168 4169static void ir_emit_va_start(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4170{ 4171#ifdef __APPLE__ 4172 ir_backend_data *data = ctx->data; 4173 dasm_State **Dst = &data->dasm_state; 4174 ir_reg fp; 4175 int arg_area_offset; 4176 ir_reg op2_reg = ctx->regs[def][2]; 4177 ir_reg tmp_reg = ctx->regs[def][3]; 4178 int32_t offset; 4179 4180 IR_ASSERT(tmp_reg != IR_REG_NONE); 4181 if (op2_reg != IR_REG_NONE) { 4182 if (IR_REG_SPILLED(op2_reg)) { 4183 op2_reg = IR_REG_NUM(op2_reg); 4184 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 4185 } 4186 offset = 0; 4187 } else { 4188 IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); 4189 op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 4190 offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); 4191 } 4192 4193 if (ctx->flags & IR_USE_FRAME_POINTER) { 4194 fp = IR_REG_FRAME_POINTER; 4195 arg_area_offset = ctx->stack_frame_size + sizeof(void*) * 2 + ctx->param_stack_size; 4196 } else { 4197 fp = IR_REG_STACK_POINTER; 4198 arg_area_offset = ctx->call_stack_size + ctx->stack_frame_size + ctx->param_stack_size; 4199 } 4200 | add Rx(tmp_reg), Rx(fp), #arg_area_offset 4201 | str Rx(tmp_reg), [Rx(op2_reg), #offset] 4202#else 4203 ir_backend_data *data = ctx->data; 4204 dasm_State **Dst = &data->dasm_state; 4205 ir_reg fp; 4206 int reg_save_area_offset; 4207 int overflow_arg_area_offset; 4208 ir_reg op2_reg = ctx->regs[def][2]; 4209 ir_reg tmp_reg = ctx->regs[def][3]; 4210 int32_t offset; 4211 4212 IR_ASSERT(tmp_reg != IR_REG_NONE); 4213 if (op2_reg != IR_REG_NONE) { 4214 if (IR_REG_SPILLED(op2_reg)) { 4215 op2_reg = IR_REG_NUM(op2_reg); 4216 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 4217 } 4218 offset = 0; 4219 } else { 4220 IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); 4221 op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 4222 offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); 4223 } 4224 4225 if (ctx->flags & IR_USE_FRAME_POINTER) { 4226 fp = IR_REG_FRAME_POINTER; 4227 reg_save_area_offset = ctx->locals_area_size + sizeof(void*) * 2; 4228 overflow_arg_area_offset = ctx->stack_frame_size + sizeof(void*) * 2 + ctx->param_stack_size; 4229 } else { 4230 fp = IR_REG_STACK_POINTER; 4231 reg_save_area_offset = ctx->locals_area_size + ctx->call_stack_size; 4232 overflow_arg_area_offset = ctx->call_stack_size + ctx->stack_frame_size + ctx->param_stack_size; 4233 } 4234 4235 /* Set va_list.stack */ 4236 | add Rx(tmp_reg), Rx(fp), #overflow_arg_area_offset 4237 | str Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, stack))] 4238 if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) { 4239 reg_save_area_offset += sizeof(void*) * IR_REG_INT_ARGS; 4240 /* Set va_list.gr_top */ 4241 if (overflow_arg_area_offset != reg_save_area_offset) { 4242 | add Rx(tmp_reg), Rx(fp), #reg_save_area_offset 4243 } 4244 | str Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, gr_top))] 4245 /* Set va_list.gr_offset */ 4246 | movn Rw(tmp_reg), #~(0 - (sizeof(void*) * (IR_REG_INT_ARGS - ctx->gp_reg_params))) 4247 | str Rw(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, gr_offset))] 4248 } else { 4249 /* Set va_list.gr_offset */ 4250 | str wzr, [Rx(op2_reg), #(offset+offsetof(ir_va_list, gr_offset))] 4251 } 4252 if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) { 4253 reg_save_area_offset += 16 * IR_REG_FP_ARGS; 4254 /* Set va_list.vr_top */ 4255 if (overflow_arg_area_offset != reg_save_area_offset) { 4256 | add Rx(tmp_reg), Rx(fp), #reg_save_area_offset 4257 } 4258 | str Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, vr_top))] 4259 /* Set va_list.vr_offset */ 4260 | movn Rw(tmp_reg), #~(0 - (16 * (IR_REG_FP_ARGS - ctx->fp_reg_params))) 4261 | str Rw(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, vr_offset))] 4262 } else { 4263 /* Set va_list.vr_offset */ 4264 | str wzr, [Rx(op2_reg), #(offset+offsetof(ir_va_list, vr_offset))] 4265 } 4266#endif 4267} 4268 4269static void ir_emit_va_copy(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4270{ 4271#ifdef __APPLE__ 4272 ir_backend_data *data = ctx->data; 4273 dasm_State **Dst = &data->dasm_state; 4274 ir_reg tmp_reg = ctx->regs[def][1]; 4275 ir_reg op2_reg = ctx->regs[def][2]; 4276 ir_reg op3_reg = ctx->regs[def][3]; 4277 int32_t op2_offset, op3_offset; 4278 4279 IR_ASSERT(tmp_reg != IR_REG_NONE); 4280 if (op2_reg != IR_REG_NONE) { 4281 if (IR_REG_SPILLED(op2_reg)) { 4282 op2_reg = IR_REG_NUM(op2_reg); 4283 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 4284 } 4285 op2_offset = 0; 4286 } else { 4287 IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); 4288 op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 4289 op2_offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); 4290 } 4291 if (op3_reg != IR_REG_NONE) { 4292 if (IR_REG_SPILLED(op3_reg)) { 4293 op3_reg = IR_REG_NUM(op3_reg); 4294 ir_emit_load(ctx, IR_ADDR, op3_reg, insn->op3); 4295 } 4296 op3_offset = 0; 4297 } else { 4298 IR_ASSERT(ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA); 4299 op3_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 4300 op3_offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op3].op3); 4301 } 4302 | ldr Rx(tmp_reg), [Rx(op3_reg), #op3_offset] 4303 | str Rx(tmp_reg), [Rx(op2_reg), #op2_offset] 4304#else 4305 ir_backend_data *data = ctx->data; 4306 dasm_State **Dst = &data->dasm_state; 4307 ir_reg tmp_reg = ctx->regs[def][1]; 4308 ir_reg op2_reg = ctx->regs[def][2]; 4309 ir_reg op3_reg = ctx->regs[def][3]; 4310 int32_t op2_offset, op3_offset; 4311 4312 IR_ASSERT(tmp_reg != IR_REG_NONE); 4313 if (op2_reg != IR_REG_NONE) { 4314 if (IR_REG_SPILLED(op2_reg)) { 4315 op2_reg = IR_REG_NUM(op2_reg); 4316 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 4317 } 4318 op2_offset = 0; 4319 } else { 4320 IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); 4321 op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 4322 op2_offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); 4323 } 4324 if (op3_reg != IR_REG_NONE) { 4325 if (IR_REG_SPILLED(op3_reg)) { 4326 op3_reg = IR_REG_NUM(op3_reg); 4327 ir_emit_load(ctx, IR_ADDR, op3_reg, insn->op3); 4328 } 4329 op3_offset = 0; 4330 } else { 4331 IR_ASSERT(ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA); 4332 op3_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 4333 op3_offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op3].op3); 4334 } 4335 | ldr Rx(tmp_reg), [Rx(op3_reg), #op3_offset] 4336 | str Rx(tmp_reg), [Rx(op2_reg), #op2_offset] 4337 | ldr Rx(tmp_reg), [Rx(op3_reg), #(op3_offset+8)] 4338 | str Rx(tmp_reg), [Rx(op2_reg), #(op2_offset+8)] 4339 | ldr Rx(tmp_reg), [Rx(op3_reg), #(op3_offset+16)] 4340 | str Rx(tmp_reg), [Rx(op2_reg), #(op2_offset+16)] 4341 | ldr Rx(tmp_reg), [Rx(op3_reg), #(op3_offset+24)] 4342 | str Rx(tmp_reg), [Rx(op2_reg), #(op2_offset+24)] 4343#endif 4344} 4345 4346static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4347{ 4348#ifdef __APPLE__ 4349 ir_backend_data *data = ctx->data; 4350 dasm_State **Dst = &data->dasm_state; 4351 ir_type type = insn->type; 4352 ir_reg def_reg = ctx->regs[def][0]; 4353 ir_reg op2_reg = ctx->regs[def][2]; 4354 ir_reg tmp_reg = ctx->regs[def][3]; 4355 int32_t offset; 4356 4357 IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); 4358 if (op2_reg != IR_REG_NONE) { 4359 if (IR_REG_SPILLED(op2_reg)) { 4360 op2_reg = IR_REG_NUM(op2_reg); 4361 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 4362 } 4363 offset = 0; 4364 } else { 4365 IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); 4366 op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 4367 offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); 4368 } 4369 | ldr Rx(tmp_reg), [Rx(op2_reg), #offset] 4370 ir_emit_load_mem(ctx, type, def_reg, IR_MEM_BO(tmp_reg, 0)); 4371 | add Rx(tmp_reg), Rx(tmp_reg), #IR_MAX(ir_type_size[type], sizeof(void*)) 4372 | str Rx(tmp_reg), [Rx(op2_reg), #offset] 4373 if (IR_REG_SPILLED(ctx->regs[def][0])) { 4374 ir_emit_store(ctx, type, def, def_reg); 4375 } 4376#else 4377 ir_backend_data *data = ctx->data; 4378 dasm_State **Dst = &data->dasm_state; 4379 ir_type type = insn->type; 4380 ir_reg def_reg = ctx->regs[def][0]; 4381 ir_reg op2_reg = ctx->regs[def][2]; 4382 ir_reg tmp_reg = ctx->regs[def][3]; 4383 int32_t offset; 4384 4385 IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); 4386 if (op2_reg != IR_REG_NONE) { 4387 if (IR_REG_SPILLED(op2_reg)) { 4388 op2_reg = IR_REG_NUM(op2_reg); 4389 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 4390 } 4391 offset = 0; 4392 } else { 4393 IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); 4394 op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 4395 offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); 4396 } 4397 if (IR_IS_TYPE_INT(type)) { 4398 | ldr Rw(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, gr_offset))] 4399 | cmp Rw(tmp_reg), wzr 4400 | bge >1 4401 | ldr Rx(IR_REG_INT_TMP), [Rx(op2_reg), #(offset+offsetof(ir_va_list, gr_top))] 4402 | sxtw Rx(tmp_reg), Rw(tmp_reg) 4403 | add Rx(IR_REG_INT_TMP), Rx(tmp_reg), Rx(IR_REG_INT_TMP) 4404 | ldr Rx(def_reg), [Rx(IR_REG_INT_TMP)] 4405 | add Rw(tmp_reg), Rw(tmp_reg), #sizeof(void*) 4406 | str Rw(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, gr_offset))] 4407 | b >2 4408 |1: 4409 | ldr Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, stack))] 4410 | ldr Rx(def_reg), [Rx(tmp_reg)] 4411 | add Rx(tmp_reg), Rx(tmp_reg), #sizeof(void*) 4412 | str Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, stack))] 4413 |2: 4414 } else { 4415 | ldr Rw(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, vr_offset))] 4416 | cmp Rw(tmp_reg), wzr 4417 | bge >1 4418 | ldr Rx(IR_REG_INT_TMP), [Rx(op2_reg), #(offset+offsetof(ir_va_list, vr_top))] 4419 | sxtw Rx(tmp_reg), Rw(tmp_reg) 4420 | add Rx(IR_REG_INT_TMP), Rx(tmp_reg), Rx(IR_REG_INT_TMP) 4421 | ldr Rd(def_reg-IR_REG_FP_FIRST), [Rx(IR_REG_INT_TMP)] 4422 | add Rw(tmp_reg), Rw(tmp_reg), #16 4423 | str Rw(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, vr_offset))] 4424 | b >2 4425 |1: 4426 | ldr Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, stack))] 4427 | ldr Rd(def_reg-IR_REG_FP_FIRST), [Rx(tmp_reg)] 4428 | add Rx(tmp_reg), Rx(tmp_reg), #sizeof(void*) 4429 | str Rx(tmp_reg), [Rx(op2_reg), #(offset+offsetof(ir_va_list, stack))] 4430 |2: 4431 } 4432 if (IR_REG_SPILLED(ctx->regs[def][0])) { 4433 ir_emit_store(ctx, type, def, def_reg); 4434 } 4435#endif 4436} 4437 4438static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) 4439{ 4440 ir_backend_data *data = ctx->data; 4441 dasm_State **Dst = &data->dasm_state; 4442 ir_type type; 4443 ir_block *bb; 4444 ir_insn *use_insn, *val; 4445 uint32_t n, *p, use_block; 4446 int i; 4447 int label, default_label = 0; 4448 int count = 0; 4449 ir_val min, max; 4450 ir_reg op1_reg, op2_reg, tmp_reg; 4451 4452 type = ctx->ir_base[insn->op2].type; 4453 if (IR_IS_TYPE_SIGNED(type)) { 4454 min.u64 = 0x7fffffffffffffff; 4455 max.u64 = 0x8000000000000000; 4456 } else { 4457 min.u64 = 0xffffffffffffffff; 4458 max.u64 = 0x0; 4459 } 4460 4461 bb = &ctx->cfg_blocks[b]; 4462 p = &ctx->cfg_edges[bb->successors]; 4463 for (n = bb->successors_count; n != 0; p++, n--) { 4464 use_block = *p; 4465 use_insn = &ctx->ir_base[ctx->cfg_blocks[use_block].start]; 4466 if (use_insn->op == IR_CASE_VAL) { 4467 val = &ctx->ir_base[use_insn->op2]; 4468 IR_ASSERT(!IR_IS_SYM_CONST(val->op)); 4469 if (IR_IS_TYPE_SIGNED(type)) { 4470 IR_ASSERT(IR_IS_TYPE_SIGNED(val->type)); 4471 min.i64 = IR_MIN(min.i64, val->val.i64); 4472 max.i64 = IR_MAX(max.i64, val->val.i64); 4473 } else { 4474 IR_ASSERT(!IR_IS_TYPE_SIGNED(val->type)); 4475 min.u64 = (int64_t)IR_MIN(min.u64, val->val.u64); 4476 max.u64 = (int64_t)IR_MAX(max.u64, val->val.u64); 4477 } 4478 count++; 4479 } else { 4480 IR_ASSERT(use_insn->op == IR_CASE_DEFAULT); 4481 default_label = ir_skip_empty_target_blocks(ctx, use_block); 4482 } 4483 } 4484 4485 op1_reg = ctx->regs[def][1]; 4486 op2_reg = ctx->regs[def][2]; 4487 tmp_reg = ctx->regs[def][3]; 4488 4489 IR_ASSERT(op2_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); 4490 if (IR_REG_SPILLED(op2_reg)) { 4491 op2_reg = IR_REG_NUM(op2_reg); 4492 ir_emit_load(ctx, type, op2_reg, insn->op2); 4493 } 4494 4495 /* Generate a table jmp or a sequence of calls */ 4496 if (count > 2 && (max.i64-min.i64) < count * 8) { 4497 int *labels = ir_mem_malloc(sizeof(int) * (max.i64 - min.i64 + 1)); 4498 4499 for (i = 0; i <= (max.i64 - min.i64); i++) { 4500 labels[i] = default_label; 4501 } 4502 p = &ctx->cfg_edges[bb->successors]; 4503 for (n = bb->successors_count; n != 0; p++, n--) { 4504 use_block = *p; 4505 use_insn = &ctx->ir_base[ctx->cfg_blocks[use_block].start]; 4506 if (use_insn->op == IR_CASE_VAL) { 4507 val = &ctx->ir_base[use_insn->op2]; 4508 IR_ASSERT(!IR_IS_SYM_CONST(val->op)); 4509 label = ir_skip_empty_target_blocks(ctx, use_block); 4510 labels[val->val.i64 - min.i64] = label; 4511 } 4512 } 4513 4514 if (default_label) { 4515 if (aarch64_may_encode_imm12(max.i64)) { 4516 | ASM_REG_IMM_OP cmp, type, op2_reg, max.i64 4517 } else { 4518 ir_emit_load_imm_int(ctx, type, tmp_reg, max.i64); 4519 | ASM_REG_REG_OP cmp, type, op2_reg, tmp_reg 4520 } 4521 if (IR_IS_TYPE_SIGNED(type)) { 4522 | bgt =>default_label 4523 } else { 4524 | bhi =>default_label 4525 } 4526 } 4527 4528 if (op1_reg == IR_REG_NONE) { 4529 op1_reg = op2_reg; 4530 } 4531 if (aarch64_may_encode_imm12(min.i64)) { 4532 | ASM_REG_REG_IMM_OP subs, type, op1_reg, op2_reg, min.i64 4533 } else { 4534 ir_emit_load_imm_int(ctx, type, tmp_reg, min.i64); 4535 | ASM_REG_REG_REG_OP subs, type, op1_reg, op2_reg, tmp_reg 4536 } 4537 4538 if (default_label) { 4539 if (IR_IS_TYPE_SIGNED(type)) { 4540 | blt =>default_label 4541 } else { 4542 | blo =>default_label 4543 } 4544 } 4545 4546 | adr Rx(tmp_reg), >1 4547 | ldr Rx(tmp_reg), [Rx(tmp_reg), Rx(op1_reg), lsl #3] 4548 | br Rx(tmp_reg) 4549 |.jmp_table 4550 if (!data->jmp_table_label) { 4551 data->jmp_table_label = ctx->cfg_blocks_count + ctx->consts_count + 3; 4552 |=>data->jmp_table_label: 4553 } 4554 |.align 8 4555 |1: 4556 for (i = 0; i <= (max.i64 - min.i64); i++) { 4557 int b = labels[i]; 4558 if (b) { 4559 ir_block *bb = &ctx->cfg_blocks[b]; 4560 ir_insn *insn = &ctx->ir_base[bb->end]; 4561 4562 if (insn->op == IR_IJMP && IR_IS_CONST_REF(insn->op2)) { 4563 ir_ref prev = ctx->prev_ref[bb->end]; 4564 if (prev != bb->start && ctx->ir_base[prev].op == IR_SNAPSHOT) { 4565 prev = ctx->prev_ref[prev]; 4566 } 4567 if (prev == bb->start) { 4568 void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]); 4569 4570 | .addr &addr 4571 if (ctx->ir_base[bb->start].op != IR_CASE_DEFAULT) { 4572 bb->flags |= IR_BB_EMPTY; 4573 } 4574 continue; 4575 } 4576 } 4577 | .addr =>b 4578 } else { 4579 | .addr 0 4580 } 4581 } 4582 |.code 4583 ir_mem_free(labels); 4584 } else { 4585 p = &ctx->cfg_edges[bb->successors]; 4586 for (n = bb->successors_count; n != 0; p++, n--) { 4587 use_block = *p; 4588 use_insn = &ctx->ir_base[ctx->cfg_blocks[use_block].start]; 4589 if (use_insn->op == IR_CASE_VAL) { 4590 val = &ctx->ir_base[use_insn->op2]; 4591 IR_ASSERT(!IR_IS_SYM_CONST(val->op)); 4592 label = ir_skip_empty_target_blocks(ctx, use_block); 4593 if (aarch64_may_encode_imm12(val->val.i64)) { 4594 | ASM_REG_IMM_OP cmp, type, op2_reg, val->val.i64 4595 } else { 4596 ir_emit_load_imm_int(ctx, type, tmp_reg, val->val.i64); 4597 | ASM_REG_REG_OP cmp, type, op2_reg, tmp_reg 4598 4599 } 4600 | beq =>label 4601 } 4602 } 4603 if (default_label) { 4604 | b =>default_label 4605 } 4606 } 4607} 4608 4609static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn) 4610{ 4611 int j, n; 4612 ir_type type; 4613 int int_param = 0; 4614 int fp_param = 0; 4615 int int_reg_params_count = IR_REG_INT_ARGS; 4616 int fp_reg_params_count = IR_REG_FP_ARGS; 4617 int32_t used_stack = 0; 4618#ifdef __APPLE__ 4619 const ir_proto_t *proto = ir_call_proto(ctx, insn); 4620 int last_named_input = (proto && (proto->flags & IR_VARARG_FUNC)) ? proto->params_count + 2 : insn->inputs_count; 4621#endif 4622 4623 n = insn->inputs_count; 4624 for (j = 3; j <= n; j++) { 4625 type = ctx->ir_base[ir_insn_op(insn, j)].type; 4626#ifdef __APPLE__ 4627 if (j > last_named_input) { 4628 used_stack += IR_MAX(sizeof(void*), ir_type_size[type]); 4629 } else 4630#endif 4631 if (IR_IS_TYPE_INT(type)) { 4632 if (int_param >= int_reg_params_count) { 4633 used_stack += IR_MAX(sizeof(void*), ir_type_size[type]); 4634 } 4635 int_param++; 4636 } else { 4637 IR_ASSERT(IR_IS_TYPE_FP(type)); 4638 if (fp_param >= fp_reg_params_count) { 4639 used_stack += IR_MAX(sizeof(void*), ir_type_size[type]); 4640 } 4641 fp_param++; 4642 } 4643 } 4644 4645 return used_stack; 4646} 4647 4648static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg tmp_reg) 4649{ 4650 ir_backend_data *data = ctx->data; 4651 dasm_State **Dst = &data->dasm_state; 4652 int j, n; 4653 ir_ref arg; 4654 ir_insn *arg_insn; 4655 uint8_t type; 4656 ir_reg src_reg, dst_reg; 4657 int int_param = 0; 4658 int fp_param = 0; 4659 int count = 0; 4660 int int_reg_params_count = IR_REG_INT_ARGS; 4661 int fp_reg_params_count = IR_REG_FP_ARGS; 4662 const int8_t *int_reg_params = _ir_int_reg_params; 4663 const int8_t *fp_reg_params = _ir_fp_reg_params; 4664 int32_t used_stack, stack_offset = 0; 4665 ir_copy *copies; 4666 bool do_pass3 = 0; 4667 /* For temporaries we may use any scratch registers except for registers used for parameters */ 4668 ir_reg tmp_fp_reg = IR_REG_FP_LAST; /* Temporary register for FP loads and swap */ 4669 4670 n = insn->inputs_count; 4671 if (n < 3) { 4672 return 0; 4673 } 4674 4675 if (tmp_reg == IR_REG_NONE) { 4676 tmp_reg = IR_REG_IP0; 4677 } 4678 4679 if (insn->op == IR_CALL && (ctx->flags & IR_PREALLOCATED_STACK)) { 4680 // TODO: support for preallocated stack 4681 used_stack = 0; 4682 } else { 4683 used_stack = ir_call_used_stack(ctx, insn); 4684 /* Stack must be 16 byte aligned */ 4685 used_stack = IR_ALIGNED_SIZE(used_stack, 16); 4686 if (ctx->fixed_call_stack_size && used_stack <= ctx->fixed_call_stack_size) { 4687 used_stack = 0; 4688 } else { 4689 ctx->call_stack_size += used_stack; 4690 if (used_stack) { 4691 if (insn->op == IR_TAILCALL && !(ctx->flags & IR_USE_FRAME_POINTER)) { 4692 ctx->flags |= IR_USE_FRAME_POINTER; 4693 | stp x29, x30, [sp, # (-(ctx->stack_frame_size+16))]! 4694 | mov x29, sp 4695 } 4696 | sub sp, sp, #used_stack 4697 } 4698 } 4699 } 4700 4701#ifdef __APPLE__ 4702 const ir_proto_t *proto = ir_call_proto(ctx, insn); 4703 int last_named_input = (proto && (proto->flags & IR_VARARG_FUNC)) ? proto->params_count + 2 : insn->inputs_count; 4704#endif 4705 4706 /* 1. move all register arguments that should be passed through stack 4707 * and collect arguments that should be passed through registers */ 4708 copies = ir_mem_malloc((n - 2) * sizeof(ir_copy)); 4709 for (j = 3; j <= n; j++) { 4710 arg = ir_insn_op(insn, j); 4711 src_reg = ir_get_alocated_reg(ctx, def, j); 4712 arg_insn = &ctx->ir_base[arg]; 4713 type = arg_insn->type; 4714#ifdef __APPLE__ 4715 if (j > last_named_input) { 4716 dst_reg = IR_REG_NONE; /* pass argument through stack */ 4717 } else 4718#endif 4719 if (IR_IS_TYPE_INT(type)) { 4720 if (int_param < int_reg_params_count) { 4721 dst_reg = int_reg_params[int_param]; 4722 } else { 4723 dst_reg = IR_REG_NONE; /* pass argument through stack */ 4724 } 4725 int_param++; 4726 } else { 4727 IR_ASSERT(IR_IS_TYPE_FP(type)); 4728 if (fp_param < fp_reg_params_count) { 4729 dst_reg = fp_reg_params[fp_param]; 4730 } else { 4731 dst_reg = IR_REG_NONE; /* pass argument through stack */ 4732 } 4733 fp_param++; 4734 } 4735 if (dst_reg != IR_REG_NONE) { 4736 if (src_reg == IR_REG_NONE) { 4737 /* delay CONST->REG and MEM->REG moves to third pass */ 4738 do_pass3 = 1; 4739 } else { 4740 IR_ASSERT(src_reg != IR_REG_NONE); 4741 if (IR_REG_SPILLED(src_reg)) { 4742 src_reg = IR_REG_NUM(src_reg); 4743 ir_emit_load(ctx, type, src_reg, arg); 4744 } 4745 if (src_reg != dst_reg) { 4746 /* delay REG->REG moves to second pass */ 4747 copies[count].type = type; 4748 copies[count].from = src_reg; 4749 copies[count].to = dst_reg; 4750 count++; 4751 } 4752 } 4753 } else { 4754 /* Pass register arguments to stack (REG->MEM moves) */ 4755 if (!IR_IS_CONST_REF(arg) && src_reg != IR_REG_NONE && !IR_REG_SPILLED(src_reg)) { 4756 ir_emit_store_mem(ctx, type, IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset), src_reg); 4757 } else { 4758 do_pass3 = 1; 4759 } 4760 stack_offset += IR_MAX(sizeof(void*), ir_type_size[type]); 4761 } 4762 } 4763 4764 /* 2. move all arguments that should be passed from one register to another (REG->REG movs) */ 4765 if (count) { 4766 ir_parallel_copy(ctx, copies, count, tmp_reg, tmp_fp_reg); 4767 } 4768 ir_mem_free(copies); 4769 4770 /* 3. move the remaining memory and immediate values */ 4771 if (do_pass3) { 4772 stack_offset = 0; 4773 int_param = 0; 4774 fp_param = 0; 4775 for (j = 3; j <= n; j++) { 4776 arg = ir_insn_op(insn, j); 4777 src_reg = ir_get_alocated_reg(ctx, def, j); 4778 arg_insn = &ctx->ir_base[arg]; 4779 type = arg_insn->type; 4780#ifdef __APPLE__ 4781 if (j > last_named_input) { 4782 dst_reg = IR_REG_NONE; /* pass argument through stack */ 4783 } else 4784#endif 4785 if (IR_IS_TYPE_INT(type)) { 4786 if (int_param < int_reg_params_count) { 4787 dst_reg = int_reg_params[int_param]; 4788 } else { 4789 dst_reg = IR_REG_NONE; /* argument already passed through stack */ 4790 } 4791 int_param++; 4792 } else { 4793 IR_ASSERT(IR_IS_TYPE_FP(type)); 4794 if (fp_param < fp_reg_params_count) { 4795 dst_reg = fp_reg_params[fp_param]; 4796 } else { 4797 dst_reg = IR_REG_NONE; /* argument already passed through stack */ 4798 } 4799 fp_param++; 4800 } 4801 if (dst_reg != IR_REG_NONE) { 4802 if (src_reg == IR_REG_NONE) { 4803 if (IR_IS_CONST_REF(arg) && IR_IS_TYPE_INT(type)) { 4804 if (ir_type_size[type] == 1) { 4805 type = IR_ADDR; 4806 } 4807 } 4808 ir_emit_load(ctx, type, dst_reg, arg); 4809 } 4810 } else { 4811 if (IR_IS_TYPE_INT(type)) { 4812 if (src_reg == IR_REG_NONE) { 4813 IR_ASSERT(tmp_reg != IR_REG_NONE); 4814 ir_emit_load(ctx, type, tmp_reg, arg); 4815 if (IR_IS_CONST_REF(arg)) { 4816 type = IR_ADDR; //TODO: ??? 4817 } 4818 ir_emit_store_mem_int(ctx, type, IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset), tmp_reg); 4819 } else if (IR_REG_SPILLED(src_reg)) { 4820 src_reg = IR_REG_NUM(src_reg); 4821 ir_emit_load(ctx, type, src_reg, arg); 4822 ir_emit_store_mem_int(ctx, type, IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset), src_reg); 4823 } 4824 } else { 4825 if (src_reg == IR_REG_NONE) { 4826 IR_ASSERT(tmp_fp_reg != IR_REG_NONE); 4827 ir_emit_load(ctx, type, tmp_fp_reg, arg); 4828 ir_emit_store_mem_fp(ctx, IR_DOUBLE, IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset), tmp_fp_reg); 4829 } else if (IR_REG_SPILLED(src_reg)) { 4830 src_reg = IR_REG_NUM(src_reg); 4831 ir_emit_load(ctx, type, src_reg, arg); 4832 ir_emit_store_mem_fp(ctx, type, IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset), src_reg); 4833 } 4834 } 4835 stack_offset += IR_MAX(sizeof(void*), ir_type_size[type]); 4836 } 4837 } 4838 } 4839 return used_stack; 4840} 4841 4842static void ir_emit_call_ex(ir_ctx *ctx, ir_ref def, ir_insn *insn, int32_t used_stack) 4843{ 4844 ir_backend_data *data = ctx->data; 4845 dasm_State **Dst = &data->dasm_state; 4846 ir_reg def_reg; 4847 4848 if (IR_IS_CONST_REF(insn->op2)) { 4849 void *addr = ir_call_addr(ctx, insn, &ctx->ir_base[insn->op2]); 4850 4851 if (aarch64_may_use_b(ctx->code_buffer, addr)) { 4852 | bl &addr 4853 } else { 4854 ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr); 4855 | blr Rx(IR_REG_INT_TMP) 4856 } 4857 } else { 4858 ir_reg op2_reg = ctx->regs[def][2]; 4859 4860 IR_ASSERT(op2_reg != IR_REG_NONE); 4861 if (IR_REG_SPILLED(op2_reg)) { 4862 op2_reg = IR_REG_NUM(op2_reg); 4863 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 4864 } 4865 | blr Rx(op2_reg) 4866 } 4867 4868 if (used_stack) { 4869 | add sp, sp, #used_stack 4870 ctx->call_stack_size -= used_stack; 4871 } 4872 4873 if (insn->type != IR_VOID) { 4874 if (IR_IS_TYPE_INT(insn->type)) { 4875 def_reg = IR_REG_NUM(ctx->regs[def][0]); 4876 if (def_reg != IR_REG_NONE) { 4877 if (def_reg != IR_REG_INT_RET1) { 4878 ir_emit_mov(ctx, insn->type, def_reg, IR_REG_INT_RET1); 4879 } 4880 if (IR_REG_SPILLED(ctx->regs[def][0])) { 4881 ir_emit_store(ctx, insn->type, def, def_reg); 4882 } 4883 } else if (ctx->use_lists[def].count > 1) { 4884 ir_emit_store(ctx, insn->type, def, IR_REG_INT_RET1); 4885 } 4886 } else { 4887 IR_ASSERT(IR_IS_TYPE_FP(insn->type)); 4888 def_reg = IR_REG_NUM(ctx->regs[def][0]); 4889 if (def_reg != IR_REG_NONE) { 4890 if (def_reg != IR_REG_FP_RET1) { 4891 ir_emit_fp_mov(ctx, insn->type, def_reg, IR_REG_FP_RET1); 4892 } 4893 if (IR_REG_SPILLED(ctx->regs[def][0])) { 4894 ir_emit_store(ctx, insn->type, def, def_reg); 4895 } 4896 } else if (ctx->use_lists[def].count > 1) { 4897 ir_emit_store(ctx, insn->type, def, IR_REG_FP_RET1); 4898 } 4899 } 4900 } 4901} 4902 4903static void ir_emit_call(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4904{ 4905 int32_t used_stack = ir_emit_arguments(ctx, def, insn, ctx->regs[def][1]); 4906 ir_emit_call_ex(ctx, def, insn, used_stack); 4907} 4908 4909static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4910{ 4911 ir_backend_data *data = ctx->data; 4912 dasm_State **Dst = &data->dasm_state; 4913 int32_t used_stack = ir_emit_arguments(ctx, def, insn, ctx->regs[def][1]); 4914 4915 if (used_stack != 0) { 4916 ir_emit_call_ex(ctx, def, insn, used_stack); 4917 ir_emit_return_void(ctx); 4918 return; 4919 } 4920 4921 ir_emit_epilogue(ctx); 4922 4923 if (IR_IS_CONST_REF(insn->op2)) { 4924 void *addr = ir_call_addr(ctx, insn, &ctx->ir_base[insn->op2]); 4925 4926 if (aarch64_may_use_b(ctx->code_buffer, addr)) { 4927 | b &addr 4928 } else { 4929 ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr); 4930 | br Rx(IR_REG_INT_TMP) 4931 } 4932 } else { 4933 ir_reg op2_reg = ctx->regs[def][2]; 4934 4935 IR_ASSERT(op2_reg != IR_REG_NONE); 4936 if (IR_REG_SPILLED(op2_reg)) { 4937 op2_reg = IR_REG_NUM(op2_reg); 4938 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 4939 } 4940 | br Rx(op2_reg) 4941 } 4942} 4943 4944static void ir_emit_ijmp(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4945{ 4946 ir_backend_data *data = ctx->data; 4947 dasm_State **Dst = &data->dasm_state; 4948 ir_reg op2_reg = ctx->regs[def][2]; 4949 4950 if (op2_reg != IR_REG_NONE) { 4951 if (IR_REG_SPILLED(op2_reg)) { 4952 op2_reg = IR_REG_NUM(op2_reg); 4953 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 4954 } 4955 | br Rx(op2_reg) 4956 } else if (IR_IS_CONST_REF(insn->op2)) { 4957 void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]); 4958 4959 if (aarch64_may_use_b(ctx->code_buffer, addr)) { 4960 | b &addr 4961 } else { 4962 ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr); 4963 | br Rx(IR_REG_INT_TMP) 4964 } 4965 } else { 4966 IR_ASSERT(0); 4967 } 4968} 4969 4970static void ir_emit_guard(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4971{ 4972 ir_backend_data *data = ctx->data; 4973 dasm_State **Dst = &data->dasm_state; 4974 ir_reg op2_reg = ctx->regs[def][2]; 4975 ir_type type = ctx->ir_base[insn->op2].type; 4976 4977 IR_ASSERT(IR_IS_TYPE_INT(type)); 4978 if (IR_IS_CONST_REF(insn->op2)) { 4979 bool is_true = ir_ref_is_true(ctx, insn->op2); 4980 4981 if ((insn->op == IR_GUARD && !is_true) || (insn->op == IR_GUARD_NOT && is_true)) { 4982 if (IR_IS_CONST_REF(insn->op3)) { 4983 void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); 4984 4985 if (aarch64_may_use_b(ctx->code_buffer, addr)) { 4986 | b &addr 4987 } else { 4988 ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr); 4989 | br Rx(IR_REG_INT_TMP) 4990 } 4991 } else { 4992 IR_ASSERT(0); 4993 } 4994 } 4995 return; 4996 } 4997 4998 IR_ASSERT(op2_reg != IR_REG_NONE); 4999 if (IR_REG_SPILLED(op2_reg)) { 5000 op2_reg = IR_REG_NUM(op2_reg); 5001 ir_emit_load(ctx, type, op2_reg, insn->op2); 5002 } 5003 5004 if (IR_IS_CONST_REF(insn->op3)) { 5005 void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); 5006 5007 if (insn->op == IR_GUARD) { 5008 if (ir_type_size[type] == 8) { 5009 | cbz Rx(op2_reg), &addr 5010 } else { 5011 | cbz Rw(op2_reg), &addr 5012 } 5013 } else { 5014 if (ir_type_size[type] == 8) { 5015 | cbnz Rx(op2_reg), &addr 5016 } else { 5017 | cbnz Rw(op2_reg), &addr 5018 } 5019 } 5020 } else { 5021 IR_ASSERT(0); 5022 } 5023} 5024 5025static void ir_emit_guard_jz(ir_ctx *ctx, uint8_t op, void *addr, ir_type type, ir_reg reg) 5026{ 5027 ir_backend_data *data = ctx->data; 5028 dasm_State **Dst = &data->dasm_state; 5029 5030 if (op == IR_EQ) { 5031 if (ir_type_size[type] == 8) { 5032 | cbnz Rx(reg), &addr 5033 } else { 5034 | cbnz Rw(reg), &addr 5035 } 5036 } else { 5037 IR_ASSERT(op == IR_NE); 5038 if (ir_type_size[type] == 8) { 5039 | cbz Rx(reg), &addr 5040 } else { 5041 | cbz Rw(reg), &addr 5042 } 5043 } 5044} 5045 5046static void ir_emit_guard_jcc(ir_ctx *ctx, uint8_t op, void *addr, bool int_cmp) 5047{ 5048 ir_backend_data *data = ctx->data; 5049 dasm_State **Dst = &data->dasm_state; 5050 5051 if (int_cmp) { 5052 switch (op) { 5053 default: 5054 IR_ASSERT(0 && "NIY binary op"); 5055 case IR_EQ: 5056 | beq &addr 5057 break; 5058 case IR_NE: 5059 | bne &addr 5060 break; 5061 case IR_LT: 5062 | blt &addr 5063 break; 5064 case IR_GE: 5065 | bge &addr 5066 break; 5067 case IR_LE: 5068 | ble &addr 5069 break; 5070 case IR_GT: 5071 | bgt &addr 5072 break; 5073 case IR_ULT: 5074 | blo &addr 5075 break; 5076 case IR_UGE: 5077 | bhs &addr 5078 break; 5079 case IR_ULE: 5080 | bls &addr 5081 break; 5082 case IR_UGT: 5083 | bhi &addr 5084 break; 5085 } 5086 } else { 5087 switch (op) { 5088 default: 5089 IR_ASSERT(0 && "NIY binary op"); 5090 case IR_EQ: 5091 | beq &addr 5092 break; 5093 case IR_NE: 5094 | bne &addr 5095 break; 5096 case IR_LT: 5097 | bmi &addr 5098 break; 5099 case IR_GE: 5100 | bge &addr 5101 break; 5102 case IR_LE: 5103 | bls &addr 5104 break; 5105 case IR_GT: 5106 | bgt &addr 5107 break; 5108// case IR_ULT: fprintf(stderr, "\tjb .LL%d\n", true_block); break; 5109// case IR_UGE: fprintf(stderr, "\tjae .LL%d\n", true_block); break; 5110// case IR_ULE: fprintf(stderr, "\tjbe .LL%d\n", true_block); break; 5111// case IR_UGT: fprintf(stderr, "\tja .LL%d\n", true_block); break; 5112 } 5113 } 5114} 5115 5116static void ir_emit_guard_cmp_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) 5117{ 5118 ir_backend_data *data = ctx->data; 5119 dasm_State **Dst = &data->dasm_state; 5120 ir_insn *cmp_insn = &ctx->ir_base[insn->op2]; 5121 ir_op op = cmp_insn->op; 5122 ir_type type = ctx->ir_base[cmp_insn->op1].type; 5123 ir_ref op1 = cmp_insn->op1; 5124 ir_ref op2 = cmp_insn->op2; 5125 ir_reg op1_reg = ctx->regs[insn->op2][1]; 5126 ir_reg op2_reg = ctx->regs[insn->op2][2]; 5127 void *addr; 5128 5129 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 5130 op1_reg = IR_REG_NUM(op1_reg); 5131 ir_emit_load(ctx, type, op1_reg, op1); 5132 } 5133 if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { 5134 op2_reg = IR_REG_NUM(op2_reg); 5135 if (op1 != op2) { 5136 ir_emit_load(ctx, type, op2_reg, op2); 5137 } 5138 } 5139 5140 addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); 5141 5142 if (IR_IS_CONST_REF(op2) 5143 && !IR_IS_SYM_CONST(ctx->ir_base[op2].op) 5144 && ctx->ir_base[op2].val.u64 == 0) { 5145 if (op == IR_ULT) { 5146 /* always false */ 5147 if (aarch64_may_use_b(ctx->code_buffer, addr)) { 5148 | b &addr 5149 } else { 5150 ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr); 5151 | br Rx(IR_REG_INT_TMP) 5152 } 5153 return; 5154 } else if (op == IR_UGE) { 5155 /* always true */ 5156 return; 5157 } else if (op == IR_ULE) { 5158 op = IR_EQ; 5159 } else if (op == IR_UGT) { 5160 op = IR_NE; 5161 } 5162 if (op1_reg != IR_REG_NONE && (op == IR_EQ || op == IR_NE)) { 5163 if (insn->op == IR_GUARD_NOT) { 5164 op ^= 1; // reverse 5165 } 5166 ir_emit_guard_jz(ctx, op, addr, type, op1_reg); 5167 return; 5168 } 5169 } 5170 ir_emit_cmp_int_common(ctx, type, op1_reg, op1, op2_reg, op2); 5171 5172 if (insn->op == IR_GUARD) { 5173 op ^= 1; // reverse 5174 } 5175 5176 ir_emit_guard_jcc(ctx, op, addr, 1); 5177} 5178 5179static void ir_emit_guard_cmp_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) 5180{ 5181 ir_op op = ir_emit_cmp_fp_common(ctx, insn->op2, &ctx->ir_base[insn->op2]); 5182 void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); 5183 5184 if (insn->op == IR_GUARD) { 5185 op ^= 1; // reverse 5186 } 5187 ir_emit_guard_jcc(ctx, op, addr, 0); 5188} 5189 5190static void ir_emit_guard_overflow(ir_ctx *ctx, ir_ref def, ir_insn *insn) 5191{ 5192 ir_backend_data *data = ctx->data; 5193 dasm_State **Dst = &data->dasm_state; 5194 ir_insn *overflow_insn = &ctx->ir_base[insn->op2]; 5195 ir_insn *math_insn = &ctx->ir_base[overflow_insn->op1]; 5196 ir_type type = math_insn->type; 5197 void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); 5198 5199 IR_ASSERT(IR_IS_TYPE_INT(type)); 5200 if (math_insn->op == IR_MUL_OV) { 5201 if (insn->op == IR_GUARD) { 5202 | beq &addr 5203 } else { 5204 | bne &addr 5205 } 5206 } else if (IR_IS_TYPE_SIGNED(type)) { 5207 if (insn->op == IR_GUARD) { 5208 | bvc &addr 5209 } else { 5210 | bvs &addr 5211 } 5212 } else { 5213 if (insn->op == IR_GUARD) { 5214 | bcc &addr 5215 } else { 5216 | bcs &addr 5217 } 5218 } 5219} 5220 5221static void ir_emit_tls(ir_ctx *ctx, ir_ref def, ir_insn *insn) 5222{ 5223 ir_backend_data *data = ctx->data; 5224 dasm_State **Dst = &data->dasm_state; 5225 uint32_t code; 5226 ir_reg reg = IR_REG_NUM(ctx->regs[def][0]); 5227 5228 if (ctx->use_lists[def].count == 1) { 5229 /* dead load */ 5230 return; 5231 } 5232 5233||#ifdef __APPLE__ 5234|| code = 0xd53bd060 | reg; // TODO: hard-coded: mrs reg, tpidrro_el0 5235| .long code 5236| and Rx(reg), Rx(reg), #0xfffffffffffffff8 5237|//??? MEM_ACCESS_64_WITH_UOFFSET_64 ldr, Rx(reg), Rx(reg), #insn->op2, TMP1 5238|//??? MEM_ACCESS_64_WITH_UOFFSET_64 ldr, Rx(reg), Rx(reg), #insn->op3, TMP1 5239||#else 5240|| code = 0xd53bd040 | reg; // TODO: hard-coded: mrs reg, tpidr_el0 5241| .long code 5242||//??? IR_ASSERT(insn->op2 <= LDR_STR_PIMM64); 5243| ldr Rx(reg), [Rx(reg), #insn->op2] 5244||#endif 5245 if (IR_REG_SPILLED(ctx->regs[def][0])) { 5246 ir_emit_store(ctx, IR_ADDR, def, reg); 5247 } 5248} 5249 5250static void ir_emit_exitcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) 5251{ 5252 ir_backend_data *data = ctx->data; 5253 dasm_State **Dst = &data->dasm_state; 5254 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 5255 5256 IR_ASSERT(def_reg != IR_REG_NONE); 5257 5258 | stp d30, d31, [sp, #-16]! 5259 | stp d28, d29, [sp, #-16]! 5260 | stp d26, d27, [sp, #-16]! 5261 | stp d24, d25, [sp, #-16]! 5262 | stp d22, d23, [sp, #-16]! 5263 | stp d20, d21, [sp, #-16]! 5264 | stp d18, d19, [sp, #-16]! 5265 | stp d16, d17, [sp, #-16]! 5266 | stp d14, d15, [sp, #-16]! 5267 | stp d12, d13, [sp, #-16]! 5268 | stp d10, d11, [sp, #-16]! 5269 | stp d8, d9, [sp, #-16]! 5270 | stp d6, d7, [sp, #-16]! 5271 | stp d4, d5, [sp, #-16]! 5272 | stp d2, d3, [sp, #-16]! 5273 | stp d0, d1, [sp, #-16]! 5274 5275 | str x30, [sp, #-16]! 5276 | stp x28, x29, [sp, #-16]! 5277 | stp x26, x27, [sp, #-16]! 5278 | stp x24, x25, [sp, #-16]! 5279 | stp x22, x23, [sp, #-16]! 5280 | stp x20, x21, [sp, #-16]! 5281 | stp x18, x19, [sp, #-16]! 5282 | stp x16, x17, [sp, #-16]! 5283 | stp x14, x15, [sp, #-16]! 5284 | stp x12, x13, [sp, #-16]! 5285 | stp x10, x11, [sp, #-16]! 5286 | stp x8, x9, [sp, #-16]! 5287 | stp x6, x7, [sp, #-16]! 5288 | stp x4, x5, [sp, #-16]! 5289 | stp x2, x3, [sp, #-16]! 5290 | stp x0, x1, [sp, #-16]! 5291 5292 | mov Rx(IR_REG_INT_ARG2), sp 5293 | add Rx(IR_REG_INT_ARG1), Rx(IR_REG_INT_ARG2), #(32*8+32*8) 5294 | str Rx(IR_REG_INT_ARG1), [sp, #(31*8)] 5295 | mov Rx(IR_REG_INT_ARG1), Rx(IR_REG_INT_TMP) 5296 5297 if (IR_IS_CONST_REF(insn->op2)) { 5298 void *addr = ir_call_addr(ctx, insn, &ctx->ir_base[insn->op2]); 5299 5300 if (aarch64_may_use_b(ctx->code_buffer, addr)) { 5301 | bl &addr 5302 } else { 5303 ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr); 5304 | blr Rx(IR_REG_INT_TMP) 5305 } 5306 } else { 5307 IR_ASSERT(0); 5308 } 5309 5310 | add sp, sp, #(32*8+32*8) 5311 5312 if (def_reg != IR_REG_INT_RET1) { 5313 ir_emit_mov(ctx, insn->type, def_reg, IR_REG_INT_RET1); 5314 } 5315 if (IR_REG_SPILLED(ctx->regs[def][0])) { 5316 ir_emit_store(ctx, insn->type, def, def_reg); 5317 } 5318} 5319 5320static void ir_emit_param_move(ir_ctx *ctx, uint8_t type, ir_reg from_reg, ir_reg to_reg, ir_ref to, int32_t offset) 5321{ 5322 ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 5323 5324 IR_ASSERT(from_reg != IR_REG_NONE || to_reg != IR_REG_NONE); 5325 5326 if (IR_IS_TYPE_INT(type)) { 5327 if (from_reg != IR_REG_NONE) { 5328 if (to_reg != IR_REG_NONE) { 5329 ir_emit_mov(ctx, type, to_reg, from_reg); 5330 } else { 5331 ir_emit_store(ctx, type, to, from_reg); 5332 } 5333 } else { 5334 ir_emit_load_mem_int(ctx, type, to_reg, IR_MEM_BO(fp, offset)); 5335 } 5336 } else { 5337 if (from_reg != IR_REG_NONE) { 5338 if (to_reg != IR_REG_NONE) { 5339 ir_emit_fp_mov(ctx, type, to_reg, from_reg); 5340 } else { 5341 ir_emit_store(ctx, type, to, from_reg); 5342 } 5343 } else { 5344 ir_emit_load_mem_fp(ctx, type, to_reg, IR_MEM_BO(fp, offset)); 5345 } 5346 } 5347} 5348 5349static void ir_emit_load_params(ir_ctx *ctx) 5350{ 5351 ir_use_list *use_list = &ctx->use_lists[1]; 5352 ir_insn *insn; 5353 ir_ref i, n, *p, use; 5354 int int_param_num = 0; 5355 int fp_param_num = 0; 5356 ir_reg src_reg; 5357 ir_reg dst_reg; 5358 // TODO: Calling convention specific 5359 int int_reg_params_count = IR_REG_INT_ARGS; 5360 int fp_reg_params_count = IR_REG_FP_ARGS; 5361 const int8_t *int_reg_params = _ir_int_reg_params; 5362 const int8_t *fp_reg_params = _ir_fp_reg_params; 5363 int32_t stack_offset = 0; 5364 5365 if (ctx->flags & IR_USE_FRAME_POINTER) { 5366 stack_offset = sizeof(void*) * 2; /* skip old frame pointer and return address */ 5367 } else { 5368 stack_offset = ctx->stack_frame_size + ctx->call_stack_size; 5369 } 5370 n = use_list->count; 5371 for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { 5372 use = *p; 5373 insn = &ctx->ir_base[use]; 5374 if (insn->op == IR_PARAM) { 5375 if (IR_IS_TYPE_INT(insn->type)) { 5376 if (int_param_num < int_reg_params_count) { 5377 src_reg = int_reg_params[int_param_num]; 5378 } else { 5379 src_reg = IR_REG_NONE; 5380 } 5381 int_param_num++; 5382 } else { 5383 if (fp_param_num < fp_reg_params_count) { 5384 src_reg = fp_reg_params[fp_param_num]; 5385 } else { 5386 src_reg = IR_REG_NONE; 5387 } 5388 fp_param_num++; 5389 } 5390 if (ctx->vregs[use]) { 5391 dst_reg = IR_REG_NUM(ctx->regs[use][0]); 5392 IR_ASSERT(src_reg != IR_REG_NONE || dst_reg != IR_REG_NONE || 5393 stack_offset == ctx->live_intervals[ctx->vregs[use]]->stack_spill_pos + 5394 ((ctx->flags & IR_USE_FRAME_POINTER) ? 5395 -(ctx->stack_frame_size - ctx->stack_frame_alignment) : 5396 ctx->call_stack_size)); 5397 if (src_reg != dst_reg) { 5398 ir_emit_param_move(ctx, insn->type, src_reg, dst_reg, use, stack_offset); 5399 } 5400 if (dst_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[use][0])) { 5401 ir_emit_store(ctx, insn->type, use, dst_reg); 5402 } 5403 } 5404 if (src_reg == IR_REG_NONE) { 5405 if (sizeof(void*) == 8) { 5406 stack_offset += sizeof(void*); 5407 } else { 5408 stack_offset += IR_MAX(sizeof(void*), ir_type_size[insn->type]); 5409 } 5410 } 5411 } 5412 } 5413} 5414 5415static ir_reg ir_get_free_reg(ir_type type, ir_regset available) 5416{ 5417 if (IR_IS_TYPE_INT(type)) { 5418 available = IR_REGSET_INTERSECTION(available, IR_REGSET_GP); 5419 } else { 5420 IR_ASSERT(IR_IS_TYPE_FP(type)); 5421 available = IR_REGSET_INTERSECTION(available, IR_REGSET_FP); 5422 } 5423 IR_ASSERT(!IR_REGSET_IS_EMPTY(available)); 5424 return IR_REGSET_FIRST(available); 5425} 5426 5427static int ir_fix_dessa_tmps(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to) 5428{ 5429 ir_backend_data *data = ctx->data; 5430 ir_ref ref = ctx->cfg_blocks[data->dessa_from_block].end; 5431 5432 if (to == 0) { 5433 if (IR_IS_TYPE_INT(type)) { 5434 if (ctx->regs[ref][0] == IR_REG_NONE) { 5435 ctx->regs[ref][0] = IR_REG_X0; 5436 } 5437 } else { 5438 IR_ASSERT(IR_IS_TYPE_FP(type)); 5439 if (ctx->regs[ref][1] == IR_REG_NONE) { 5440 ctx->regs[ref][1] = IR_REG_V0; 5441 } 5442 } 5443 } else if (from != 0) { 5444 if (IR_IS_TYPE_INT(type)) { 5445 if (ctx->regs[ref][0] == IR_REG_NONE) { 5446 ctx->regs[ref][0] = IR_REG_X0; 5447 } 5448 } else { 5449 IR_ASSERT(IR_IS_TYPE_FP(type)); 5450 if (ctx->regs[ref][1] == IR_REG_NONE) { 5451 ctx->regs[ref][1] = IR_REG_V0; 5452 } 5453 } 5454 } 5455 return 1; 5456} 5457 5458static void ir_fix_param_spills(ir_ctx *ctx) 5459{ 5460 ir_use_list *use_list = &ctx->use_lists[1]; 5461 ir_insn *insn; 5462 ir_ref i, n, *p, use; 5463 int int_param_num = 0; 5464 int fp_param_num = 0; 5465 ir_reg src_reg; 5466 // TODO: Calling convention specific 5467 int int_reg_params_count = IR_REG_INT_ARGS; 5468 int fp_reg_params_count = IR_REG_FP_ARGS; 5469 const int8_t *int_reg_params = _ir_int_reg_params; 5470 const int8_t *fp_reg_params = _ir_fp_reg_params; 5471 int32_t stack_offset = 0; 5472 int32_t param_stack_size = 0; 5473 5474 if (ctx->flags & IR_USE_FRAME_POINTER) { 5475 /* skip old frame pointer and return address */ 5476 stack_offset = sizeof(void*) * 2 + (ctx->stack_frame_size - ctx->stack_frame_alignment); 5477 } else { 5478 stack_offset = ctx->stack_frame_size; 5479 } 5480 n = use_list->count; 5481 for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { 5482 use = *p; 5483 insn = &ctx->ir_base[use]; 5484 if (insn->op == IR_PARAM) { 5485 if (IR_IS_TYPE_INT(insn->type)) { 5486 if (int_param_num < int_reg_params_count) { 5487 src_reg = int_reg_params[int_param_num]; 5488 } else { 5489 src_reg = IR_REG_NONE; 5490 } 5491 int_param_num++; 5492 } else { 5493 if (fp_param_num < fp_reg_params_count) { 5494 src_reg = fp_reg_params[fp_param_num]; 5495 } else { 5496 src_reg = IR_REG_NONE; 5497 } 5498 fp_param_num++; 5499 } 5500 if (src_reg == IR_REG_NONE) { 5501 if (ctx->vregs[use]) { 5502 ir_live_interval *ival = ctx->live_intervals[ctx->vregs[use]]; 5503 if ((ival->flags & IR_LIVE_INTERVAL_MEM_PARAM) 5504 && ival->stack_spill_pos == -1 5505 && (ival->next || ival->reg == IR_REG_NONE)) { 5506 ival->stack_spill_pos = stack_offset; 5507 } 5508 } 5509 if (sizeof(void*) == 8) { 5510 stack_offset += sizeof(void*); 5511 param_stack_size += sizeof(void*); 5512 } else { 5513 stack_offset += IR_MAX(sizeof(void*), ir_type_size[insn->type]); 5514 param_stack_size += IR_MAX(sizeof(void*), ir_type_size[insn->type]); 5515 } 5516 } 5517 } 5518 } 5519 5520 ctx->gp_reg_params = IR_MIN(int_param_num, int_reg_params_count); 5521 ctx->fp_reg_params = IR_MIN(fp_param_num, fp_reg_params_count); 5522 ctx->param_stack_size = param_stack_size; 5523} 5524 5525static void ir_allocate_unique_spill_slots(ir_ctx *ctx) 5526{ 5527 uint32_t b; 5528 ir_block *bb; 5529 ir_insn *insn; 5530 ir_ref i, n, j, *p; 5531 uint32_t *rule, insn_flags; 5532 ir_backend_data *data = ctx->data; 5533 ir_regset available = 0; 5534 ir_target_constraints constraints; 5535 uint32_t def_flags; 5536 ir_reg reg; 5537 5538 ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count); 5539 memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count); 5540 5541 /* vregs + tmp + fixed + SRATCH + ALL */ 5542 ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 2, sizeof(ir_live_interval*)); 5543 5544 if (!ctx->arena) { 5545 ctx->arena = ir_arena_create(16 * 1024); 5546 } 5547 5548 for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { 5549 IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); 5550 for (i = bb->start, insn = ctx->ir_base + i, rule = ctx->rules + i; i <= bb->end;) { 5551 switch (ctx->rules ? *rule : insn->op) { 5552 case IR_START: 5553 case IR_BEGIN: 5554 case IR_END: 5555 case IR_IF_TRUE: 5556 case IR_IF_FALSE: 5557 case IR_CASE_VAL: 5558 case IR_CASE_DEFAULT: 5559 case IR_MERGE: 5560 case IR_LOOP_BEGIN: 5561 case IR_LOOP_END: 5562 break; 5563 default: 5564 def_flags = ir_get_target_constraints(ctx, i, &constraints); 5565 if (ctx->rules 5566 && *rule != IR_CMP_AND_BRANCH_INT 5567 && *rule != IR_CMP_AND_BRANCH_FP 5568 && *rule != IR_GUARD_CMP_INT 5569 && *rule != IR_GUARD_CMP_FP) { 5570 available = IR_REGSET_SCRATCH; 5571 } 5572 if (ctx->vregs[i]) { 5573 reg = constraints.def_reg; 5574 if (reg != IR_REG_NONE && IR_REGSET_IN(available, reg)) { 5575 IR_REGSET_EXCL(available, reg); 5576 ctx->regs[i][0] = reg | IR_REG_SPILL_STORE; 5577 } else if (def_flags & IR_USE_MUST_BE_IN_REG) { 5578 if (insn->op == IR_VLOAD 5579 && ctx->live_intervals[ctx->vregs[i]] 5580 && ctx->live_intervals[ctx->vregs[i]]->stack_spill_pos != -1) { 5581 /* pass */ 5582 } else if (insn->op != IR_PARAM) { 5583 reg = ir_get_free_reg(insn->type, available); 5584 IR_REGSET_EXCL(available, reg); 5585 ctx->regs[i][0] = reg | IR_REG_SPILL_STORE; 5586 } 5587 } 5588 if (!ctx->live_intervals[ctx->vregs[i]]) { 5589 ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); 5590 memset(ival, 0, sizeof(ir_live_interval)); 5591 ctx->live_intervals[ctx->vregs[i]] = ival; 5592 ival->type = insn->type; 5593 ival->reg = IR_REG_NONE; 5594 ival->vreg = ctx->vregs[i]; 5595 ival->stack_spill_pos = -1; 5596 if (insn->op == IR_PARAM && reg == IR_REG_NONE) { 5597 ival->flags |= IR_LIVE_INTERVAL_MEM_PARAM; 5598 } else { 5599 ival->stack_spill_pos = ir_allocate_spill_slot(ctx, ival->type, &data->ra_data); 5600 } 5601 } else if (insn->op == IR_PARAM) { 5602 IR_ASSERT(0 && "unexpected PARAM"); 5603 return; 5604 } 5605 } else if (insn->op == IR_VAR) { 5606 ir_use_list *use_list = &ctx->use_lists[i]; 5607 ir_ref n = use_list->count; 5608 5609 if (n > 0) { 5610 int32_t stack_spill_pos = insn->op3 = ir_allocate_spill_slot(ctx, insn->type, &data->ra_data); 5611 ir_ref i, *p, use; 5612 ir_insn *use_insn; 5613 5614 for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { 5615 use = *p; 5616 use_insn = &ctx->ir_base[use]; 5617 if (use_insn->op == IR_VLOAD) { 5618 if (ctx->vregs[use] 5619 && !ctx->live_intervals[ctx->vregs[use]]) { 5620 ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); 5621 memset(ival, 0, sizeof(ir_live_interval)); 5622 ctx->live_intervals[ctx->vregs[use]] = ival; 5623 ival->type = insn->type; 5624 ival->reg = IR_REG_NONE; 5625 ival->vreg = ctx->vregs[use]; 5626 ival->stack_spill_pos = stack_spill_pos; 5627 } 5628 } else if (use_insn->op == IR_VSTORE) { 5629 if (!IR_IS_CONST_REF(use_insn->op3) 5630 && ctx->vregs[use_insn->op3] 5631 && !ctx->live_intervals[ctx->vregs[use_insn->op3]]) { 5632 ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); 5633 memset(ival, 0, sizeof(ir_live_interval)); 5634 ctx->live_intervals[ctx->vregs[use_insn->op3]] = ival; 5635 ival->type = insn->type; 5636 ival->reg = IR_REG_NONE; 5637 ival->vreg = ctx->vregs[use_insn->op3]; 5638 ival->stack_spill_pos = stack_spill_pos; 5639 } 5640 } 5641 } 5642 } 5643 } 5644 5645 insn_flags = ir_op_flags[insn->op]; 5646 n = constraints.tmps_count; 5647 if (n) { 5648 do { 5649 n--; 5650 if (constraints.tmp_regs[n].type) { 5651 ir_reg reg = ir_get_free_reg(constraints.tmp_regs[n].type, available); 5652 ir_ref *ops = insn->ops; 5653 IR_REGSET_EXCL(available, reg); 5654 if (constraints.tmp_regs[n].num > 0 5655 && IR_IS_CONST_REF(ops[constraints.tmp_regs[n].num])) { 5656 /* rematerialization */ 5657 reg |= IR_REG_SPILL_LOAD; 5658 } 5659 ctx->regs[i][constraints.tmp_regs[n].num] = reg; 5660 } else if (constraints.tmp_regs[n].reg == IR_REG_SCRATCH) { 5661 available = IR_REGSET_DIFFERENCE(available, IR_REGSET_SCRATCH); 5662 } else { 5663 IR_REGSET_EXCL(available, constraints.tmp_regs[n].reg); 5664 } 5665 } while (n); 5666 } 5667 n = insn->inputs_count; 5668 for (j = 1, p = insn->ops + 1; j <= n; j++, p++) { 5669 ir_ref input = *p; 5670 if (IR_OPND_KIND(insn_flags, j) == IR_OPND_DATA && input > 0 && ctx->vregs[input]) { 5671 if ((def_flags & IR_DEF_REUSES_OP1_REG) && j == 1) { 5672 ir_reg reg = IR_REG_NUM(ctx->regs[i][0]); 5673 ctx->regs[i][1] = reg | IR_REG_SPILL_LOAD; 5674 } else { 5675 uint8_t use_flags = IR_USE_FLAGS(def_flags, j); 5676 ir_reg reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE; 5677 5678 if (reg != IR_REG_NONE && IR_REGSET_IN(available, reg)) { 5679 IR_REGSET_EXCL(available, reg); 5680 ctx->regs[i][j] = reg | IR_REG_SPILL_LOAD; 5681 } else if (j > 1 && input == insn->op1 && ctx->regs[i][1] != IR_REG_NONE) { 5682 ctx->regs[i][j] = ctx->regs[i][1]; 5683 } else if (use_flags & IR_USE_MUST_BE_IN_REG) { 5684 reg = ir_get_free_reg(ctx->ir_base[input].type, available); 5685 IR_REGSET_EXCL(available, reg); 5686 ctx->regs[i][j] = reg | IR_REG_SPILL_LOAD; 5687 } 5688 } 5689 } 5690 } 5691 break; 5692 } 5693 n = ir_insn_len(insn); 5694 i += n; 5695 insn += n; 5696 rule += n; 5697 } 5698 if (bb->flags & IR_BB_DESSA_MOVES) { 5699 data->dessa_from_block = b; 5700 ir_gen_dessa_moves(ctx, b, ir_fix_dessa_tmps); 5701 } 5702 } 5703 5704 ctx->used_preserved_regs = ctx->fixed_save_regset; 5705 ctx->flags |= IR_NO_STACK_COMBINE; 5706 ir_fix_stack_frame(ctx); 5707} 5708 5709static void ir_preallocate_call_stack(ir_ctx *ctx) 5710{ 5711 int call_stack_size, peak_call_stack_size = 0; 5712 ir_ref i, n; 5713 ir_insn *insn; 5714 5715 for (i = 1, insn = ctx->ir_base + 1; i < ctx->insns_count;) { 5716 if (insn->op == IR_CALL) { 5717 call_stack_size = ir_call_used_stack(ctx, insn); 5718 if (call_stack_size > peak_call_stack_size) { 5719 peak_call_stack_size = call_stack_size; 5720 } 5721 } 5722 n = ir_insn_len(insn); 5723 i += n; 5724 insn += n; 5725 } 5726 if (peak_call_stack_size) { 5727 ctx->call_stack_size = peak_call_stack_size; 5728 ctx->flags |= IR_PREALLOCATED_STACK; 5729 } 5730} 5731 5732void ir_fix_stack_frame(ir_ctx *ctx) 5733{ 5734 uint32_t additional_size = 0; 5735 5736 ctx->locals_area_size = ctx->stack_frame_size; 5737 5738 if (ctx->used_preserved_regs) { 5739 ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs; 5740 ir_reg reg; 5741 (void) reg; 5742 5743 IR_REGSET_FOREACH(used_preserved_regs, reg) { 5744 additional_size += sizeof(void*); 5745 } IR_REGSET_FOREACH_END(); 5746 } 5747 5748 if ((ctx->flags & IR_VARARG_FUNC) && (ctx->flags2 & IR_HAS_VA_START)) { 5749 if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) { 5750 additional_size += sizeof(void*) * IR_REG_INT_ARGS; 5751 } 5752 if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) { 5753 additional_size += 16 * IR_REG_FP_ARGS; 5754 } 5755 } 5756 5757 ctx->stack_frame_size = IR_ALIGNED_SIZE(ctx->stack_frame_size, sizeof(void*)); 5758 ctx->stack_frame_size += additional_size; 5759 ctx->stack_frame_alignment = 0; 5760 ctx->call_stack_size = 0; 5761 5762 if ((ctx->flags2 & IR_16B_FRAME_ALIGNMENT) && !(ctx->flags & IR_FUNCTION)) { 5763 while (IR_ALIGNED_SIZE(ctx->stack_frame_size, 16) != ctx->stack_frame_size) { 5764 ctx->stack_frame_size += sizeof(void*); 5765 ctx->stack_frame_alignment += sizeof(void*); 5766 } 5767 } else if (ctx->flags2 & IR_16B_FRAME_ALIGNMENT) { 5768 /* Stack must be 16 byte aligned */ 5769 if (!(ctx->flags & IR_FUNCTION)) { 5770 while (IR_ALIGNED_SIZE(ctx->stack_frame_size, 16) != ctx->stack_frame_size) { 5771 ctx->stack_frame_size += sizeof(void*); 5772 ctx->stack_frame_alignment += sizeof(void*); 5773 } 5774 } else if (ctx->flags & IR_USE_FRAME_POINTER) { 5775 while (IR_ALIGNED_SIZE(ctx->stack_frame_size + sizeof(void*) * 2, 16) != ctx->stack_frame_size + sizeof(void*) * 2) { 5776 ctx->stack_frame_size += sizeof(void*); 5777 ctx->stack_frame_alignment += sizeof(void*); 5778 } 5779 } else { 5780 if (!(ctx->flags & IR_NO_STACK_COMBINE)) { 5781 ir_preallocate_call_stack(ctx); 5782 } 5783 while (IR_ALIGNED_SIZE(ctx->stack_frame_size + ctx->call_stack_size, 16) != 5784 ctx->stack_frame_size + ctx->call_stack_size) { 5785 ctx->stack_frame_size += sizeof(void*); 5786 ctx->stack_frame_alignment += sizeof(void*); 5787 } 5788 } 5789 } 5790 5791 ir_fix_param_spills(ctx); 5792} 5793 5794static void* dasm_labels[ir_lb_MAX]; 5795 5796/* Veneers support (TODO: avid global variable usage) */ 5797static ir_ctx *ir_current_ctx; 5798 5799static uint32_t _ir_next_block(ir_ctx *ctx, uint32_t _b) 5800{ 5801 uint32_t b = ctx->cfg_schedule[++_b]; 5802 5803 /* Check for empty ENTRY block */ 5804 while (b && ((ctx->cfg_blocks[b].flags & (IR_BB_START|IR_BB_EMPTY)) == IR_BB_EMPTY)) { 5805 b = ctx->cfg_schedule[++_b]; 5806 } 5807 return b; 5808} 5809 5810void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) 5811{ 5812 uint32_t _b, b, n, target; 5813 ir_block *bb; 5814 ir_ref i; 5815 ir_insn *insn; 5816 uint32_t *rule; 5817 ir_backend_data data; 5818 dasm_State **Dst; 5819 int ret; 5820 void *entry; 5821 size_t size; 5822 5823 data.ra_data.unused_slot_4 = 0; 5824 data.ra_data.unused_slot_2 = 0; 5825 data.ra_data.unused_slot_1 = 0; 5826 data.ra_data.handled = NULL; 5827 data.rodata_label = 0; 5828 data.jmp_table_label = 0; 5829 ctx->data = &data; 5830 5831 if (!ctx->live_intervals) { 5832 ctx->stack_frame_size = 0; 5833 ctx->stack_frame_alignment = 0; 5834 ctx->call_stack_size = 0; 5835 ctx->used_preserved_regs = 0; 5836 ir_allocate_unique_spill_slots(ctx); 5837 } 5838 5839 if (ctx->fixed_stack_frame_size != -1) { 5840 if (ctx->fixed_stack_red_zone) { 5841 IR_ASSERT(ctx->fixed_stack_red_zone == ctx->fixed_stack_frame_size + ctx->fixed_call_stack_size); 5842 } 5843 if (ctx->stack_frame_size > ctx->fixed_stack_frame_size) { 5844 // TODO: report error to caller 5845#ifdef IR_DEBUG_MESSAGES 5846 fprintf(stderr, "IR Compilation Aborted: ctx->stack_frame_size > ctx->fixed_stack_frame_size at %s:%d\n", 5847 __FILE__, __LINE__); 5848#endif 5849 ctx->data = NULL; 5850 ctx->status = IR_ERROR_FIXED_STACK_FRAME_OVERFLOW; 5851 return NULL; 5852 } 5853 ctx->stack_frame_size = ctx->fixed_stack_frame_size; 5854 ctx->call_stack_size = ctx->fixed_call_stack_size; 5855 ctx->stack_frame_alignment = 0; 5856 } 5857 5858 Dst = &data.dasm_state; 5859 data.dasm_state = NULL; 5860 dasm_init(&data.dasm_state, DASM_MAXSECTION); 5861 dasm_setupglobal(&data.dasm_state, dasm_labels, ir_lb_MAX); 5862 dasm_setup(&data.dasm_state, dasm_actions); 5863 /* labels for each block + for each constant + rodata label + jmp_table label + for each entry + exit_table label */ 5864 dasm_growpc(&data.dasm_state, ctx->cfg_blocks_count + 1 + ctx->consts_count + 1 + 1 + 1 + ctx->entries_count + 1); 5865 data.emit_constants = ir_bitset_malloc(ctx->consts_count); 5866 5867 if (!(ctx->flags & IR_SKIP_PROLOGUE)) { 5868 ir_emit_prologue(ctx); 5869 } 5870 if (ctx->flags & IR_FUNCTION) { 5871 ir_emit_load_params(ctx); 5872 } 5873 5874 if (UNEXPECTED(!ctx->cfg_schedule)) { 5875 uint32_t *list = ctx->cfg_schedule = ir_mem_malloc(sizeof(uint32_t) * (ctx->cfg_blocks_count + 2)); 5876 for (b = 0; b <= ctx->cfg_blocks_count; b++) { 5877 list[b] = b; 5878 } 5879 list[ctx->cfg_blocks_count + 1] = 0; 5880 } 5881 5882 for (_b = 1; _b <= ctx->cfg_blocks_count; _b++) { 5883 b = ctx->cfg_schedule[_b]; 5884 bb = &ctx->cfg_blocks[b]; 5885 IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); 5886 if ((bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) == IR_BB_EMPTY) { 5887 continue; 5888 } 5889 if (bb->flags & IR_BB_ALIGN_LOOP) { 5890 | .align IR_LOOP_ALIGNMENT 5891 } 5892 |=>b: 5893 5894 i = bb->start; 5895 insn = ctx->ir_base + i; 5896 if (bb->flags & IR_BB_ENTRY) { 5897 uint32_t label = ctx->cfg_blocks_count + ctx->consts_count + 4 + insn->op3; 5898 5899 |=>label: 5900 ir_emit_prologue(ctx); 5901 ctx->entries[insn->op3] = i; 5902 } 5903 5904 /* skip first instruction */ 5905 n = ir_insn_len(insn); 5906 i += n; 5907 insn += n; 5908 rule = ctx->rules + i; 5909 5910 while (i <= bb->end) { 5911 if (!((*rule) & (IR_FUSED|IR_SKIPPED))) 5912 switch ((*rule) & IR_RULE_MASK) { 5913 case IR_VAR: 5914 case IR_PARAM: 5915 case IR_PI: 5916 case IR_PHI: 5917 case IR_SNAPSHOT: 5918 case IR_VA_END: 5919 break; 5920 case IR_MUL_PWR2: 5921 case IR_DIV_PWR2: 5922 case IR_MOD_PWR2: 5923 ir_emit_mul_div_mod_pwr2(ctx, i, insn); 5924 break; 5925 case IR_SDIV_PWR2: 5926 ir_emit_sdiv_pwr2(ctx, i, insn); 5927 break; 5928 case IR_SMOD_PWR2: 5929 ir_emit_smod_pwr2(ctx, i, insn); 5930 break; 5931 case IR_SHIFT: 5932 ir_emit_shift(ctx, i, insn); 5933 break; 5934 case IR_SHIFT_CONST: 5935 ir_emit_shift_const(ctx, i, insn); 5936 break; 5937 case IR_CTPOP: 5938 ir_emit_ctpop(ctx, i, insn); 5939 break; 5940 case IR_OP_INT: 5941 ir_emit_op_int(ctx, i, insn); 5942 break; 5943 case IR_OP_FP: 5944 ir_emit_op_fp(ctx, i, insn); 5945 break; 5946 case IR_BINOP_INT: 5947 ir_emit_binop_int(ctx, i, insn); 5948 break; 5949 case IR_BINOP_FP: 5950 ir_emit_binop_fp(ctx, i, insn); 5951 break; 5952 case IR_CMP_INT: 5953 ir_emit_cmp_int(ctx, i, insn); 5954 break; 5955 case IR_CMP_FP: 5956 ir_emit_cmp_fp(ctx, i, insn); 5957 break; 5958 case IR_SEXT: 5959 ir_emit_sext(ctx, i, insn); 5960 break; 5961 case IR_ZEXT: 5962 ir_emit_zext(ctx, i, insn); 5963 break; 5964 case IR_TRUNC: 5965 ir_emit_trunc(ctx, i, insn); 5966 break; 5967 case IR_BITCAST: 5968 case IR_PROTO: 5969 ir_emit_bitcast(ctx, i, insn); 5970 break; 5971 case IR_INT2FP: 5972 ir_emit_int2fp(ctx, i, insn); 5973 break; 5974 case IR_FP2INT: 5975 ir_emit_fp2int(ctx, i, insn); 5976 break; 5977 case IR_FP2FP: 5978 ir_emit_fp2fp(ctx, i, insn); 5979 break; 5980 case IR_COPY_INT: 5981 ir_emit_copy_int(ctx, i, insn); 5982 break; 5983 case IR_COPY_FP: 5984 ir_emit_copy_fp(ctx, i, insn); 5985 break; 5986 case IR_CMP_AND_BRANCH_INT: 5987 ir_emit_cmp_and_branch_int(ctx, b, i, insn, _ir_next_block(ctx, _b)); 5988 break; 5989 case IR_CMP_AND_BRANCH_FP: 5990 ir_emit_cmp_and_branch_fp(ctx, b, i, insn, _ir_next_block(ctx, _b)); 5991 break; 5992 case IR_GUARD_CMP_INT: 5993 ir_emit_guard_cmp_int(ctx, b, i, insn); 5994 break; 5995 case IR_GUARD_CMP_FP: 5996 ir_emit_guard_cmp_fp(ctx, b, i, insn); 5997 break; 5998 case IR_IF_INT: 5999 ir_emit_if_int(ctx, b, i, insn, _ir_next_block(ctx, _b)); 6000 break; 6001 case IR_COND: 6002 ir_emit_cond(ctx, i, insn); 6003 break; 6004 case IR_SWITCH: 6005 ir_emit_switch(ctx, b, i, insn); 6006 break; 6007 case IR_MIN_MAX_INT: 6008 ir_emit_min_max_int(ctx, i, insn); 6009 break; 6010 case IR_OVERFLOW: 6011 ir_emit_overflow(ctx, i, insn); 6012 break; 6013 case IR_OVERFLOW_AND_BRANCH: 6014 ir_emit_overflow_and_branch(ctx, b, i, insn, _ir_next_block(ctx, _b)); 6015 break; 6016 case IR_END: 6017 case IR_LOOP_END: 6018 if (bb->flags & IR_BB_OSR_ENTRY_LOADS) { 6019 ir_emit_osr_entry_loads(ctx, b, bb); 6020 } 6021 if (bb->flags & IR_BB_DESSA_MOVES) { 6022 ir_emit_dessa_moves(ctx, b, bb); 6023 } 6024 do { 6025 ir_ref succ = ctx->cfg_edges[bb->successors]; 6026 6027 if (UNEXPECTED(bb->successors_count == 2)) { 6028 if (ctx->cfg_blocks[succ].flags & IR_BB_ENTRY) { 6029 succ = ctx->cfg_edges[bb->successors + 1]; 6030 } else { 6031 IR_ASSERT(ctx->cfg_blocks[ctx->cfg_edges[bb->successors + 1]].flags & IR_BB_ENTRY); 6032 } 6033 } else { 6034 IR_ASSERT(bb->successors_count == 1); 6035 } 6036 target = ir_skip_empty_target_blocks(ctx, succ); 6037 if (target != _ir_next_block(ctx, _b)) { 6038 | b =>target 6039 } 6040 } while (0); 6041 break; 6042 case IR_RETURN_VOID: 6043 ir_emit_return_void(ctx); 6044 break; 6045 case IR_RETURN_INT: 6046 ir_emit_return_int(ctx, i, insn); 6047 break; 6048 case IR_RETURN_FP: 6049 ir_emit_return_fp(ctx, i, insn); 6050 break; 6051 case IR_CALL: 6052 ir_emit_call(ctx, i, insn); 6053 break; 6054 case IR_TAILCALL: 6055 ir_emit_tailcall(ctx, i, insn); 6056 break; 6057 case IR_IJMP: 6058 ir_emit_ijmp(ctx, i, insn); 6059 break; 6060 case IR_REG_BINOP_INT: 6061 ir_emit_reg_binop_int(ctx, i, insn); 6062 break; 6063 case IR_VADDR: 6064 ir_emit_vaddr(ctx, i, insn); 6065 break; 6066 case IR_VLOAD: 6067 ir_emit_vload(ctx, i, insn); 6068 break; 6069 case IR_VSTORE: 6070 ir_emit_vstore(ctx, i, insn); 6071 break; 6072 case IR_RLOAD: 6073 ir_emit_rload(ctx, i, insn); 6074 break; 6075 case IR_RSTORE: 6076 ir_emit_rstore(ctx, i, insn); 6077 break; 6078 case IR_LOAD_INT: 6079 ir_emit_load_int(ctx, i, insn); 6080 break; 6081 case IR_LOAD_FP: 6082 ir_emit_load_fp(ctx, i, insn); 6083 break; 6084 case IR_STORE_INT: 6085 ir_emit_store_int(ctx, i, insn); 6086 break; 6087 case IR_STORE_FP: 6088 ir_emit_store_fp(ctx, i, insn); 6089 break; 6090 case IR_ALLOCA: 6091 ir_emit_alloca(ctx, i, insn); 6092 break; 6093 case IR_VA_START: 6094 ir_emit_va_start(ctx, i, insn); 6095 break; 6096 case IR_VA_COPY: 6097 ir_emit_va_copy(ctx, i, insn); 6098 break; 6099 case IR_VA_ARG: 6100 ir_emit_va_arg(ctx, i, insn); 6101 break; 6102 case IR_AFREE: 6103 ir_emit_afree(ctx, i, insn); 6104 break; 6105 case IR_BLOCK_BEGIN: 6106 ir_emit_block_begin(ctx, i, insn); 6107 break; 6108 case IR_BLOCK_END: 6109 ir_emit_block_end(ctx, i, insn); 6110 break; 6111 case IR_FRAME_ADDR: 6112 ir_emit_frame_addr(ctx, i); 6113 break; 6114 case IR_EXITCALL: 6115 ir_emit_exitcall(ctx, i, insn); 6116 break; 6117 case IR_GUARD: 6118 case IR_GUARD_NOT: 6119 ir_emit_guard(ctx, i, insn); 6120 break; 6121 case IR_GUARD_OVERFLOW: 6122 ir_emit_guard_overflow(ctx, i, insn); 6123 break; 6124 case IR_TLS: 6125 ir_emit_tls(ctx, i, insn); 6126 break; 6127 case IR_TRAP: 6128 | brk 6129 break; 6130 default: 6131 IR_ASSERT(0 && "NIY rule/instruction"); 6132 ir_mem_free(data.emit_constants); 6133 dasm_free(&data.dasm_state); 6134 ctx->data = NULL; 6135 ctx->status = IR_ERROR_UNSUPPORTED_CODE_RULE; 6136 return NULL; 6137 } 6138 n = ir_insn_len(insn); 6139 i += n; 6140 insn += n; 6141 rule += n; 6142 } 6143 } 6144 6145 if (ctx->deoptimization_exits) { 6146 uint32_t exit_table_label = ctx->cfg_blocks_count + 1 + ctx->consts_count + 1 + 1 + 1 + ctx->entries_count; 6147 6148 |=>exit_table_label: 6149 for (i = 0; i < ctx->deoptimization_exits; i++) { 6150 const void *exit_addr = ctx->get_exit_addr(i); 6151 6152 if (!exit_addr) { 6153 ctx->data = NULL; 6154 return 0; 6155 } 6156 | b &exit_addr 6157 } 6158 } 6159 6160 if (data.rodata_label) { 6161 |.rodata 6162 } 6163 IR_BITSET_FOREACH(data.emit_constants, ir_bitset_len(ctx->consts_count), i) { 6164 insn = &ctx->ir_base[-i]; 6165 if (IR_IS_TYPE_FP(insn->type)) { 6166 int label = ctx->cfg_blocks_count + i; 6167 6168 if (!data.rodata_label) { 6169 data.rodata_label = ctx->cfg_blocks_count + ctx->consts_count + 2; 6170 6171 |.rodata 6172 |=>data.rodata_label: 6173 } 6174 if (insn->type == IR_DOUBLE) { 6175 |.align 8 6176 |=>label: 6177 |.long insn->val.u32, insn->val.u32_hi 6178 } else { 6179 IR_ASSERT(insn->type == IR_FLOAT); 6180 |.align 4 6181 |=>label: 6182 |.long insn->val.u32 6183 } 6184 } else if (insn->op == IR_STR) { 6185 int label = ctx->cfg_blocks_count + i; 6186 const char *str = ir_get_str(ctx, insn->val.str); 6187 int i = 0; 6188 6189 if (!data.rodata_label) { 6190 data.rodata_label = ctx->cfg_blocks_count + ctx->consts_count + 2; 6191 6192 |.rodata 6193 |=>data.rodata_label: 6194 } 6195 |.align 8 6196 |=>label: 6197 while (1) { 6198 char c; 6199 uint32_t w = 0; 6200 int j; 6201 6202 for (j = 0; j < 4; j++) { 6203 c = str[i]; 6204 if (!c) { 6205 break; 6206 } 6207 w |= c << (8 * j); 6208 i++; 6209 } 6210 | .long w 6211 if (!c) { 6212 break; 6213 } 6214 } 6215 6216 } else { 6217 IR_ASSERT(0); 6218 } 6219 } IR_BITSET_FOREACH_END(); 6220 if (data.rodata_label) { 6221 |.code 6222 } 6223 ir_mem_free(data.emit_constants); 6224 6225 if (ctx->status) { 6226 dasm_free(&data.dasm_state); 6227 ctx->data = NULL; 6228 return NULL; 6229 } 6230 6231 ret = dasm_link(&data.dasm_state, size_ptr); 6232 if (ret != DASM_S_OK) { 6233 IR_ASSERT(0); 6234 dasm_free(&data.dasm_state); 6235 ctx->data = NULL; 6236 ctx->status = IR_ERROR_LINK; 6237 return NULL; 6238 } 6239 size = *size_ptr; 6240 6241 if (ctx->code_buffer) { 6242 entry = ctx->code_buffer->pos; 6243 entry = (void*)IR_ALIGNED_SIZE(((size_t)(entry)), 16); 6244 if (size > (size_t)((char*)ctx->code_buffer->end - (char*)entry)) { 6245 ctx->data = NULL; 6246 ctx->status = IR_ERROR_CODE_MEM_OVERFLOW; 6247 return NULL; 6248 } 6249 ctx->code_buffer->pos = (char*)entry + size; 6250 } else { 6251 entry = ir_mem_mmap(size); 6252 if (!entry) { 6253 dasm_free(&data.dasm_state); 6254 ctx->data = NULL; 6255 ctx->status = IR_ERROR_CODE_MEM_OVERFLOW; 6256 return NULL; 6257 } 6258 ir_mem_unprotect(entry, size); 6259 } 6260 6261 if (ctx->deoptimization_exits) { 6262 uint32_t exit_table_label = ctx->cfg_blocks_count + 1 + ctx->consts_count + 1 + 1 + 1 + ctx->entries_count; 6263 6264 ctx->deoptimization_exits_base = (const void*)((char*)entry + dasm_getpclabel(&data.dasm_state, exit_table_label)); 6265 } 6266 6267 ir_current_ctx = ctx; 6268 ret = dasm_encode(&data.dasm_state, entry); 6269 if (ret != DASM_S_OK) { 6270 IR_ASSERT(0); 6271 dasm_free(&data.dasm_state); 6272 if (ctx->code_buffer) { 6273 if (ctx->code_buffer->pos == (char*)entry + size) { 6274 /* rollback */ 6275 ctx->code_buffer->pos = (char*)entry - size; 6276 } 6277 } else { 6278 ir_mem_unmap(entry, size); 6279 } 6280 ctx->data = NULL; 6281 ctx->status = IR_ERROR_ENCODE; 6282 return NULL; 6283 } 6284 6285 if (data.jmp_table_label) { 6286 uint32_t offset = dasm_getpclabel(&data.dasm_state, data.jmp_table_label); 6287 ctx->jmp_table_offset = offset; 6288 } else { 6289 ctx->jmp_table_offset = 0; 6290 } 6291 if (data.rodata_label) { 6292 uint32_t offset = dasm_getpclabel(&data.dasm_state, data.rodata_label); 6293 ctx->rodata_offset = offset; 6294 } else { 6295 ctx->rodata_offset = 0; 6296 } 6297 6298 if (ctx->entries_count) { 6299 /* For all entries */ 6300 i = ctx->entries_count; 6301 do { 6302 ir_insn *insn = &ctx->ir_base[ctx->entries[--i]]; 6303 uint32_t offset = dasm_getpclabel(&data.dasm_state, ctx->cfg_blocks_count + ctx->consts_count + 4 + insn->op3); 6304 insn->op3 = offset; 6305 } while (i != 0); 6306 } 6307 6308 dasm_free(&data.dasm_state); 6309 6310 if (ctx->code_buffer) { 6311 size = (char*)ctx->code_buffer->pos - (char*)entry; 6312 } 6313 6314 ir_mem_flush(entry, size); 6315 6316 if (!ctx->code_buffer) { 6317 ir_mem_protect(entry, size); 6318 } 6319 6320 ctx->data = NULL; 6321 return entry; 6322} 6323 6324const void *ir_emit_exitgroup(uint32_t first_exit_point, uint32_t exit_points_per_group, const void *exit_addr, ir_code_buffer *code_buffer, size_t *size_ptr) 6325{ 6326 void *entry; 6327 size_t size; 6328 uint32_t i; 6329 dasm_State **Dst, *dasm_state; 6330 int ret; 6331 6332 IR_ASSERT(code_buffer); 6333 IR_ASSERT(aarch64_may_use_b(code_buffer, exit_addr)); 6334 6335 Dst = &dasm_state; 6336 dasm_state = NULL; 6337 dasm_init(&dasm_state, DASM_MAXSECTION); 6338 dasm_setupglobal(&dasm_state, dasm_labels, ir_lb_MAX); 6339 dasm_setup(&dasm_state, dasm_actions); 6340 6341 | bl >2 6342 |1: 6343 for (i = 1; i < exit_points_per_group; i++) { 6344 | bl >2 6345 } 6346 |2: 6347 | adr Rx(IR_REG_INT_TMP), <1 6348 | sub Rx(IR_REG_INT_TMP), lr, Rx(IR_REG_INT_TMP) 6349 | lsr Rx(IR_REG_INT_TMP), Rx(IR_REG_INT_TMP), #2 6350 if (first_exit_point) { 6351 | add Rx(IR_REG_INT_TMP), Rx(IR_REG_INT_TMP), #first_exit_point 6352 } 6353 | b &exit_addr 6354 6355 ret = dasm_link(&dasm_state, &size); 6356 if (ret != DASM_S_OK) { 6357 IR_ASSERT(0); 6358 dasm_free(&dasm_state); 6359 return NULL; 6360 } 6361 6362 entry = code_buffer->pos; 6363 entry = (void*)IR_ALIGNED_SIZE(((size_t)(entry)), 16); 6364 if (size > (size_t)((char*)code_buffer->end - (char*)entry)) { 6365 return NULL; 6366 } 6367 code_buffer->pos = (char*)entry + size; 6368 6369 ir_current_ctx = NULL; 6370 ret = dasm_encode(&dasm_state, entry); 6371 if (ret != DASM_S_OK) { 6372 IR_ASSERT(0); 6373 dasm_free(&dasm_state); 6374 if (code_buffer->pos == (char*)entry + size) { 6375 /* rollback */ 6376 code_buffer->pos = (char*)entry - size; 6377 } 6378 return NULL; 6379 } 6380 6381 dasm_free(&dasm_state); 6382 6383 ir_mem_flush(entry, size); 6384 6385 *size_ptr = size; 6386 return entry; 6387} 6388 6389static int ir_add_veneer(dasm_State *Dst, void *buffer, uint32_t ins, int *b, uint32_t *cp, ptrdiff_t offset) 6390{ 6391 ir_ctx *ctx = ir_current_ctx; 6392 const void *addr, *veneer = NULL; 6393 ptrdiff_t na; 6394 int n, m; 6395 6396 IR_ASSERT(ctx && ctx->code_buffer); 6397 6398 if ((ins >> 16) == DASM_REL_A) { 6399 addr = (void*)((((ptrdiff_t)(*(b-1))) << 32) | (unsigned int)(*(b-2))); 6400 if (ctx->get_veneer) { 6401 veneer = ctx->get_veneer(ctx, addr); 6402 } 6403 } else { 6404 IR_ASSERT(0 && "too long jmp distance"); 6405 return 0; 6406 } 6407 6408 if (veneer) { 6409 na = (ptrdiff_t)veneer - (ptrdiff_t)cp + 4; 6410 n = (int)na; 6411 6412 /* check if we can jump to veneer */ 6413 if ((ptrdiff_t)n != na) { 6414 /* pass */ 6415 } else if (!(ins & 0xf800)) { /* B, BL */ 6416 if ((n & 3) == 0 && ((n+0x08000000) >> 28) == 0) { 6417 return n; 6418 } 6419 } else if ((ins & 0x800)) { /* B.cond, CBZ, CBNZ, LDR* literal */ 6420 if ((n & 3) == 0 && ((n+0x00100000) >> 21) == 0) { 6421 return n; 6422 } 6423 } else if ((ins & 0x3000) == 0x2000) { /* ADR */ 6424 /* pass */ 6425 } else if ((ins & 0x3000) == 0x3000) { /* ADRP */ 6426 /* pass */ 6427 } else if ((ins & 0x1000)) { /* TBZ, TBNZ */ 6428 if ((n & 3) == 0 && ((n+0x00008000) >> 16) == 0) { 6429 return n; 6430 } 6431 } 6432 } 6433 6434 veneer = ctx->code_buffer->pos; 6435 if ((char*)ctx->code_buffer->end - (char*)veneer < 4 ) { 6436 IR_ASSERT(0 && "too long jmp distance" && "jit buffer overflow"); 6437 return 0; /* jit_buffer_size overflow */ 6438 } 6439 6440 na = (ptrdiff_t)veneer - (ptrdiff_t)cp + 4; 6441 n = (int)na; 6442 6443 /* check if we can jump to veneer */ 6444 if ((ptrdiff_t)n != na) { 6445 IR_ASSERT(0 && "too long jmp distance"); 6446 return 0; 6447 } else if (!(ins & 0xf800)) { /* B, BL */ 6448 if ((n & 3) != 0 || ((n+0x08000000) >> 28) != 0) { 6449 IR_ASSERT(0 && "too long jmp distance"); 6450 return 0; 6451 } 6452 } else if ((ins & 0x800)) { /* B.cond, CBZ, CBNZ, LDR* literal */ 6453 if ((n & 3) != 0 || ((n+0x00100000) >> 21) != 0) { 6454 IR_ASSERT(0 && "too long jmp distance"); 6455 return 0; 6456 } 6457 } else if ((ins & 0x3000) == 0x2000) { /* ADR */ 6458 IR_ASSERT(0 && "too long jmp distance"); 6459 return 0; 6460 } else if ((ins & 0x3000) == 0x3000) { /* ADRP */ 6461 IR_ASSERT(0 && "too long jmp distance"); 6462 return 0; 6463 } else if ((ins & 0x1000)) { /* TBZ, TBNZ */ 6464 if ((n & 3) != 0 || ((n+0x00008000) >> 16) != 0) { 6465 IR_ASSERT(0 && "too long jmp distance"); 6466 return 0; 6467 } 6468 } else if ((ins & 0x8000)) { /* absolute */ 6469 IR_ASSERT(0 && "too long jmp distance"); 6470 return 0; 6471 } else { 6472 IR_ASSERT(0 && "too long jmp distance"); 6473 return 0; 6474 } 6475 6476 /* check if we can use B to jump from veneer */ 6477 na = (ptrdiff_t)cp + offset - (ptrdiff_t)veneer - 4; 6478 m = (int)na; 6479 if ((ptrdiff_t)m != na) { 6480 IR_ASSERT(0 && "too long jmp distance"); 6481 return 0; 6482 } else if ((m & 3) != 0 || ((m+0x08000000) >> 28) != 0) { 6483 IR_ASSERT(0 && "too long jmp distance"); 6484 return 0; 6485 } 6486 6487 if (!ctx->set_veneer || !ctx->set_veneer(ctx, addr, veneer)) { 6488 IR_ASSERT(0 && "too long jmp distance"); 6489 return 0; 6490 } 6491 6492 /* generate B instruction */ 6493 *(uint32_t*)veneer = 0x14000000 | ((m >> 2) & 0x03ffffff); 6494 ctx->code_buffer->pos = (char*)ctx->code_buffer->pos + 4; 6495 6496 return n; 6497} 6498 6499bool ir_needs_thunk(ir_code_buffer *code_buffer, void *addr) 6500{ 6501 return !aarch64_may_use_b(code_buffer, addr); 6502} 6503 6504void *ir_emit_thunk(ir_code_buffer *code_buffer, void *addr, size_t *size_ptr) 6505{ 6506 void *entry; 6507 size_t size; 6508 dasm_State **Dst, *dasm_state; 6509 int ret; 6510 6511 Dst = &dasm_state; 6512 dasm_state = NULL; 6513 dasm_init(&dasm_state, DASM_MAXSECTION); 6514 dasm_setupglobal(&dasm_state, dasm_labels, ir_lb_MAX); 6515 dasm_setup(&dasm_state, dasm_actions); 6516 6517 |.code 6518 | movz Rx(IR_REG_INT_TMP), #((uint64_t)(addr) & 0xffff) 6519 | movk Rx(IR_REG_INT_TMP), #(((uint64_t)(addr) >> 16) & 0xffff), lsl #16 6520 | movk Rx(IR_REG_INT_TMP), #(((uint64_t)(addr) >> 32) & 0xffff), lsl #32 6521 | movk Rx(IR_REG_INT_TMP), #(((uint64_t)(addr) >> 48) & 0xffff), lsl #48 6522 | br Rx(IR_REG_INT_TMP) 6523 6524 ret = dasm_link(&dasm_state, &size); 6525 if (ret != DASM_S_OK) { 6526 IR_ASSERT(0); 6527 dasm_free(&dasm_state); 6528 return NULL; 6529 } 6530 6531 entry = code_buffer->pos; 6532 entry = (void*)IR_ALIGNED_SIZE(((size_t)(entry)), 4); 6533 if (size > (size_t)((char*)code_buffer->end - (char*)entry)) { 6534 dasm_free(&dasm_state); 6535 return NULL; 6536 } 6537 6538 ret = dasm_encode(&dasm_state, entry); 6539 if (ret != DASM_S_OK) { 6540 dasm_free(&dasm_state); 6541 return NULL; 6542 } 6543 6544 *size_ptr = size; 6545 code_buffer->pos = (char*)code_buffer->pos + size; 6546 6547 dasm_free(&dasm_state); 6548 ir_mem_flush(entry, size); 6549 6550 return entry; 6551} 6552 6553void ir_fix_thunk(void *thunk_entry, void *addr) 6554{ 6555 uint32_t *code = thunk_entry; 6556 IR_ASSERT((code[0] & 0xffe00000) == 0xd2800000 6557 && (code[1] & 0xffe00000) == 0xf2a00000 6558 && (code[2] & 0xffe00000) == 0xf2c00000 6559 && (code[3] & 0xffe00000) == 0xf2e00000 6560 && (code[4] & 0xfffffc1f) == 0xd61f0000); 6561 6562 code[0] = (code[0] & 0xffe0001f) | (uint32_t)((uint64_t)(addr) & 0xffff) << 5; 6563 code[1] = (code[1] & 0xffe0001f) | (uint32_t)(((uint64_t)(addr) >> 16) & 0xffff) << 5; 6564 code[2] = (code[2] & 0xffe0001f) | (uint32_t)(((uint64_t)(addr) >> 32) & 0xffff) << 5; 6565 code[3] = (code[3] & 0xffe0001f) | (uint32_t)(((uint64_t)(addr) >> 48) & 0xffff) << 5; 6566 6567 ir_mem_flush(code, sizeof(uint32_t) * 4); 6568} 6569