1/* 2 * IR - Lightweight JIT Compilation Framework 3 * (Aarch64 native code generator based on DynAsm) 4 * Copyright (C) 2022 Zend by Perforce. 5 * Authors: Dmitry Stogov <dmitry@php.net> 6 */ 7 8|.arch arm64 9 10|.actionlist dasm_actions 11|.globals ir_lb 12|.section code, cold_code, rodata, jmp_table 13 14#ifdef IR_DEBUG 15typedef struct _ir_mem {uint64_t v;} ir_mem; 16 17# define IR_MEM_VAL(loc) ((loc).v) 18#else 19typedef uint64_t ir_mem; 20 21# define IR_MEM_VAL(loc) (loc) 22#endif 23 24#define IR_MEM_OFFSET(loc) ((int32_t)(IR_MEM_VAL(loc) & 0xffffffff)) 25#define IR_MEM_BASE(loc) ((ir_reg)((IR_MEM_VAL(loc) >> 32) & 0xff)) 26#define IR_MEM_INDEX(loc) ((ir_reg)((IR_MEM_VAL(loc) >> 40) & 0xff)) 27#define IR_MEM_SHIFT(loc) ((int32_t)((IR_MEM_VAL(loc) >> 48) & 0xff)) 28 29#define IR_MEM_O(addr) IR_MEM(IR_REG_NONE, addr, IR_REG_NONE, 0) 30#define IR_MEM_B(base) IR_MEM(base, 0, IR_REG_NONE, 0) 31#define IR_MEM_BO(base, offset) IR_MEM(base, offset, IR_REG_NONE, 0) 32 33IR_ALWAYS_INLINE ir_mem IR_MEM(ir_reg base, int32_t offset, ir_reg index, int32_t shift) 34{ 35 ir_mem mem; 36 IR_ASSERT(base == IR_REG_NONE || (base >= IR_REG_GP_FIRST && base <= IR_REG_GP_LAST)); 37 IR_ASSERT(index == IR_REG_NONE || (index >= IR_REG_GP_FIRST && index <= IR_REG_GP_LAST)); 38 IR_ASSERT(index == IR_REG_NONE || offset == 0); 39 IR_ASSERT(shift == 0); // TODO: ??? 40#ifdef IR_DEBUG 41 mem.v = 42#else 43 mem = 44#endif 45 ((uint64_t)(uint32_t)offset | 46 ((uint64_t)(uint8_t)base << 32) | 47 ((uint64_t)(uint8_t)index << 40) | 48 ((uint64_t)(uint8_t)shift << 48)); 49 return mem; 50} 51 52#define IR_SPILL_POS_TO_OFFSET(offset) \ 53 ((ctx->flags & IR_USE_FRAME_POINTER) ? \ 54 ((offset) + (int32_t)sizeof(void*) * 2) : \ 55 ((offset) + ctx->call_stack_size)) 56 57#define B_IMM (1<<27) // signed imm26 * 4 58#define ADR_IMM (1<<20) // signed imm21 59#define ADRP_IMM (1LL<<32) // signed imm21 * 4096 60 61static bool aarch64_may_use_b(ir_code_buffer *code_buffer, const void *addr) 62{ 63 if (code_buffer) { 64 if (addr >= code_buffer->start && (char*)addr < (char*)code_buffer->end) { 65 return (((char*)code_buffer->end - (char*)code_buffer->start) < B_IMM); 66 } else if ((char*)addr >= (char*)code_buffer->end) { 67 return (((char*)addr - (char*)code_buffer->start) < B_IMM); 68 } else if (addr < code_buffer->start) { 69 return (((char*)code_buffer->end - (char*)addr) < B_IMM); 70 } 71 } 72 return 0; 73} 74 75#if 0 76static bool aarch64_may_use_adr(ir_code_buffer *code_buffer, const void *addr) 77{ 78 if (code_buffer) { 79 if (addr >= code_buffer->start && (char*)addr < (char*)code_buffer->end) { 80 return (((char*)code_buffer->end - (char*)code_buffer->start) < ADR_IMM); 81 } else if ((char*)addr >= (char*)code_buffer->end) { 82 return (((char*)addr - (char*)code_buffer->start) < ADR_IMM); 83 } else if (addr < code_buffer->start) { 84 return (((char*)code_buffer->end - (char*)addr) < ADR_IMM); 85 } 86 } 87 return 0; 88} 89 90static bool aarch64_may_use_adrp(ir_code_buffer *code_buffer, const void *addr) 91{ 92 if (code_buffer) { 93 if (addr >= code_buffer->start && (char*)addr < (char*)code_buffer->end) { 94 return (((char*)code_buffer->end - (char*)code_buffer->start) < ADRP_IMM); 95 } else if ((char*)addr >= (char*)code_buffer->end) { 96 return (((char*)addr - (char*)code_buffer->start) < ADRP_IMM); 97 } else if (addr < code_buffer->start) { 98 return (((char*)code_buffer->end - (char*)addr) < ADRP_IMM); 99 } 100 } 101 return 0; 102} 103#endif 104 105/* Determine whether "val" falls into two allowed ranges: 106 * Range 1: [0, 0xfff] 107 * Range 2: LSL #12 to Range 1 108 * Used to guard the immediate encoding for add/adds/sub/subs/cmp/cmn instructions. */ 109static bool aarch64_may_encode_imm12(const int64_t val) 110{ 111 return (val >= 0 && (val <= 0xfff || !(val & 0xffffffffff000fff))); 112} 113 114/* Determine whether an immediate value can be encoded as the immediate operand of logical instructions. */ 115static bool aarch64_may_encode_logical_imm(uint64_t value, uint32_t type_size) 116{ 117 /* fast path: power of two */ 118 if (value > 0 && !(value & (value - 1))) { 119 return 1; 120 } 121 122 if (type_size == 8) { 123 if (dasm_imm13((uint32_t)value, (uint32_t)(value >> 32)) != -1) { 124 return 1; 125 } 126 } else { 127 if (dasm_imm13((uint32_t)value, (uint32_t)value) != -1) { 128 return 1; 129 } 130 } 131 132 return 0; 133} 134 135static bool aarch64_may_encode_imm7_addr_offset(const int64_t offset, uint32_t type_size) 136{ 137 return (uintptr_t)(offset) % type_size == 0 138 && offset < 63 * (int32_t)type_size 139 && offset >= -64 * (int32_t)type_size; 140} 141 142static bool aarch64_may_encode_addr_offset(int64_t offset, uint32_t type_size) 143{ 144 return (uintptr_t)(offset) % type_size == 0 && (uintptr_t)(offset) < 0xfff * type_size; 145} 146 147|.macro ASM_REG_REG_OP, op, type, dst, src 148|| if (ir_type_size[type] == 8) { 149| op Rx(dst), Rx(src) 150|| } else { 151| op Rw(dst), Rw(src) 152|| } 153|.endmacro 154 155|.macro ASM_REG_REG_REG_OP, op, type, dst, src1, src2 156|| if (ir_type_size[type] == 8) { 157| op Rx(dst), Rx(src1), Rx(src2) 158|| } else { 159| op Rw(dst), Rw(src1), Rw(src2) 160|| } 161|.endmacro 162 163|.macro ASM_REG_REG_REG_TXT_OP, op, type, dst, src1, src2, txt 164|| if (ir_type_size[type] == 8) { 165| op Rx(dst), Rx(src1), Rx(src2), txt 166|| } else { 167| op Rw(dst), Rw(src1), Rw(src2), txt 168|| } 169|.endmacro 170 171|.macro ASM_REG_REG_REG_REG_OP, op, type, dst, src1, src2, src3 172|| if (ir_type_size[type] == 8) { 173| op Rx(dst), Rx(src1), Rx(src2), Rx(src3) 174|| } else { 175| op Rw(dst), Rw(src1), Rw(src2), Rw(src3); 176|| } 177|.endmacro 178 179|.macro ASM_REG_REG_IMM_OP, op, type, dst, src1, val 180|| if (ir_type_size[type] == 8) { 181| op Rx(dst), Rx(src1), #val 182|| } else { 183| op Rw(dst), Rw(src1), #val 184|| } 185|.endmacro 186 187|.macro ASM_REG_IMM_OP, op, type, reg, val 188|| if (ir_type_size[type] == 8) { 189| op Rx(reg), #val 190|| } else { 191| op Rw(reg), #val 192|| } 193|.endmacro 194 195|.macro ASM_FP_REG_IMM_OP, op, type, reg, val 196|| if (type == IR_DOUBLE) { 197| op Rd(reg-IR_REG_FP_FIRST), #val 198|| } else { 199|| IR_ASSERT(type == IR_FLOAT); 200| op Rs(reg-IR_REG_FP_FIRST), #val 201|| } 202|.endmacro 203 204|.macro ASM_FP_REG_REG_REG_OP, op, type, dst, src1, src2 205|| if (type == IR_DOUBLE) { 206| op Rd(dst-IR_REG_FP_FIRST), Rd(src1-IR_REG_FP_FIRST), Rd(src2-IR_REG_FP_FIRST) 207|| } else { 208|| IR_ASSERT(type == IR_FLOAT); 209| op Rs(dst-IR_REG_FP_FIRST), Rs(src1-IR_REG_FP_FIRST), Rs(src2-IR_REG_FP_FIRST) 210|| } 211|.endmacro 212 213typedef struct _ir_backend_data { 214 ir_reg_alloc_data ra_data; 215 uint32_t dessa_from_block; 216 dasm_State *dasm_state; 217 ir_bitset emit_constants; 218 int rodata_label, jmp_table_label; 219} ir_backend_data; 220 221#define IR_GP_REG_NAME(code, name64, name32) \ 222 #name64, 223#define IR_GP_REG_NAME32(code, name64, name32) \ 224 #name32, 225#define IR_FP_REG_NAME(code, name64, name32, name16, name8) \ 226 #name64, 227#define IR_FP_REG_NAME32(code, name64, name32, name16, name8) \ 228 #name32, 229 230static const char *_ir_reg_name[IR_REG_NUM] = { 231 IR_GP_REGS(IR_GP_REG_NAME) 232 IR_FP_REGS(IR_FP_REG_NAME) 233}; 234 235static const char *_ir_reg_name32[IR_REG_NUM] = { 236 IR_GP_REGS(IR_GP_REG_NAME32) 237 IR_FP_REGS(IR_FP_REG_NAME32) 238}; 239 240/* Calling Convention */ 241static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS] = { 242 IR_REG_INT_ARG1, 243 IR_REG_INT_ARG2, 244 IR_REG_INT_ARG3, 245 IR_REG_INT_ARG4, 246 IR_REG_INT_ARG5, 247 IR_REG_INT_ARG6, 248 IR_REG_INT_ARG7, 249 IR_REG_INT_ARG8, 250}; 251 252static const int8_t _ir_fp_reg_params[IR_REG_FP_ARGS] = { 253 IR_REG_FP_ARG1, 254 IR_REG_FP_ARG2, 255 IR_REG_FP_ARG3, 256 IR_REG_FP_ARG4, 257 IR_REG_FP_ARG5, 258 IR_REG_FP_ARG6, 259 IR_REG_FP_ARG7, 260 IR_REG_FP_ARG8, 261}; 262 263const char *ir_reg_name(int8_t reg, ir_type type) 264{ 265 if (reg >= IR_REG_NUM) { 266 if (reg == IR_REG_SCRATCH) { 267 return "SCRATCH"; 268 } else { 269 IR_ASSERT(reg == IR_REG_ALL); 270 return "ALL"; 271 } 272 } 273 IR_ASSERT(reg >= 0 && reg < IR_REG_NUM); 274 if (type == IR_VOID) { 275 type = (reg < IR_REG_FP_FIRST) ? IR_ADDR : IR_DOUBLE; 276 } 277 if (ir_type_size[type] == 8) { 278 return _ir_reg_name[reg]; 279 } else { 280 return _ir_reg_name32[reg]; 281 } 282} 283 284#define IR_RULES(_) \ 285 _(CMP_INT) \ 286 _(CMP_FP) \ 287 _(MUL_PWR2) \ 288 _(DIV_PWR2) \ 289 _(MOD_PWR2) \ 290 _(SDIV_PWR2) \ 291 _(SMOD_PWR2) \ 292 _(OP_INT) \ 293 _(OP_FP) \ 294 _(BINOP_INT) \ 295 _(BINOP_FP) \ 296 _(SHIFT) \ 297 _(SHIFT_CONST) \ 298 _(COPY_INT) \ 299 _(COPY_FP) \ 300 _(CMP_AND_BRANCH_INT) \ 301 _(CMP_AND_BRANCH_FP) \ 302 _(GUARD_CMP_INT) \ 303 _(GUARD_CMP_FP) \ 304 _(GUARD_OVERFLOW) \ 305 _(OVERFLOW_AND_BRANCH) \ 306 _(MIN_MAX_INT) \ 307 _(REG_BINOP_INT) \ 308 _(LOAD_INT) \ 309 _(LOAD_FP) \ 310 _(STORE_INT) \ 311 _(STORE_FP) \ 312 _(IF_INT) \ 313 _(RETURN_VOID) \ 314 _(RETURN_INT) \ 315 _(RETURN_FP) \ 316 317#define IR_RULE_ENUM(name) IR_ ## name, 318 319enum _ir_rule { 320 IR_FIRST_RULE = IR_LAST_OP, 321 IR_RULES(IR_RULE_ENUM) 322 IR_LAST_RULE 323}; 324 325#define IR_RULE_NAME(name) #name, 326const char *ir_rule_name[IR_LAST_OP] = { 327 NULL, 328 IR_RULES(IR_RULE_NAME) 329 NULL 330}; 331 332/* register allocation */ 333int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *constraints) 334{ 335 uint32_t rule = ir_rule(ctx, ref); 336 const ir_insn *insn; 337 int n = 0; 338 int flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; 339 340 constraints->def_reg = IR_REG_NONE; 341 constraints->hints_count = 0; 342 switch (rule & IR_RULE_MASK) { 343 case IR_BINOP_INT: 344 insn = &ctx->ir_base[ref]; 345 n = 0; 346 if (IR_IS_CONST_REF(insn->op1)) { 347 constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 348 n++; 349 } 350 if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { 351 const ir_insn *val_insn = &ctx->ir_base[insn->op2]; 352 switch (insn->op) { 353 case IR_ADD: 354 case IR_ADD_OV: 355 case IR_SUB: 356 case IR_SUB_OV: 357 if (IR_IS_SYM_CONST(val_insn->op) || !aarch64_may_encode_imm12(val_insn->val.u64)) { 358 constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 359 n++; 360 } 361 break; 362 case IR_MUL_OV: 363 constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); 364 n++; 365 break; 366 case IR_AND: 367 case IR_OR: 368 case IR_XOR: 369 if (IR_IS_SYM_CONST(val_insn->op) || !aarch64_may_encode_logical_imm(val_insn->val.u64, ir_type_size[insn->type])) { 370 constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 371 n++; 372 } 373 break; 374 case IR_MUL: 375 case IR_DIV: 376 case IR_MOD: 377 constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 378 n++; 379 break; 380 } 381 } 382 if (insn->op == IR_MOD) { 383 constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 384 n++; 385 } else if (insn->op == IR_MUL_OV && (ir_type_size[insn->type] == 8 || IR_IS_TYPE_SIGNED(insn->type))) { 386 constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); 387 n++; 388 } 389 break; 390 case IR_MUL_PWR2: 391 case IR_DIV_PWR2: 392 case IR_MOD_PWR2: 393 case IR_SHIFT: 394 case IR_SHIFT_CONST: 395 case IR_OP_INT: 396 case IR_OP_FP: 397 case IR_INT2FP: 398 case IR_FP2INT: 399 case IR_FP2FP: 400 insn = &ctx->ir_base[ref]; 401 n = 0; 402 if (IR_IS_CONST_REF(insn->op1)) { 403 constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 404 n++; 405 } 406 if (rule == IR_SHIFT_CONST 407 && (insn->op == IR_ROL || insn->op == IR_ROR) 408 && ir_type_size[insn->type] < 4) { 409 constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 410 n++; 411 } else if (rule == IR_SHIFT 412 && (insn->op == IR_ROL || insn->op == IR_ROR) 413 && ir_type_size[insn->type] < 4) { 414 if (insn->op == IR_ROL) { 415 flags |= IR_DEF_CONFLICTS_WITH_INPUT_REGS; 416 } 417 constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); 418 n++; 419 } else if (rule == IR_SHIFT && insn->op == IR_ROL) { 420 constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 421 n++; 422 } 423 break; 424 case IR_SDIV_PWR2: 425 flags = IR_DEF_CONFLICTS_WITH_INPUT_REGS | IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG; 426 insn = &ctx->ir_base[ref]; 427 n = 0; 428 if (IR_IS_CONST_REF(insn->op1)) { 429 constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 430 n++; 431 } 432 if (IR_IS_CONST_REF(insn->op2)) { 433 int64_t offset = ctx->ir_base[insn->op2].val.u64 - 1; 434 if (!aarch64_may_encode_imm12(offset)) { 435 constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 436 n++; 437 } 438 } 439 break; 440 case IR_SMOD_PWR2: 441 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; 442 insn = &ctx->ir_base[ref]; 443 n = 0; 444 if (IR_IS_CONST_REF(insn->op1)) { 445 constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 446 n++; 447 } 448 constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_USE_SUB_REF, IR_SAVE_SUB_REF); 449 n++; 450 break; 451 case IR_CTPOP: 452 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG; 453 insn = &ctx->ir_base[ref]; 454 constraints->tmp_regs[0] = IR_TMP_REG(2, IR_DOUBLE, IR_USE_SUB_REF, IR_SAVE_SUB_REF); 455 n = 1; 456 break; 457 case IR_BINOP_FP: 458 case IR_MIN_MAX_INT: 459 insn = &ctx->ir_base[ref]; 460 n = 0; 461 if (IR_IS_CONST_REF(insn->op1)) { 462 constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 463 n++; 464 } 465 if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { 466 constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 467 n++; 468 } 469 break; 470 case IR_CMP_INT: 471 insn = &ctx->ir_base[ref]; 472 n = 0; 473 if (IR_IS_CONST_REF(insn->op1)) { 474 constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 475 n++; 476 } 477 if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { 478 insn = &ctx->ir_base[insn->op2]; 479 if (IR_IS_SYM_CONST(insn->op) || !aarch64_may_encode_imm12(insn->val.u64)) { 480 constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 481 n++; 482 } 483 } 484 break; 485 case IR_CMP_FP: 486 insn = &ctx->ir_base[ref]; 487 n = 0; 488 if (IR_IS_CONST_REF(insn->op1)) { 489 const ir_insn *val_insn = &ctx->ir_base[insn->op1]; 490 constraints->tmp_regs[n] = IR_TMP_REG(1, val_insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 491 n++; 492 } 493 if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { 494 const ir_insn *val_insn = &ctx->ir_base[insn->op2]; 495 constraints->tmp_regs[n] = IR_TMP_REG(2, val_insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 496 n++; 497 } 498 break; 499 case IR_VSTORE: 500 insn = &ctx->ir_base[ref]; 501 if (IR_IS_CONST_REF(insn->op3)) { 502 insn = &ctx->ir_base[insn->op3]; 503 constraints->tmp_regs[0] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 504 n = 1; 505 } 506 break; 507 case IR_LOAD_FP: 508 insn = &ctx->ir_base[ref]; 509 n = 0; 510 if (IR_IS_CONST_REF(insn->op2)) { 511 IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); 512 constraints->tmp_regs[n] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 513 n++; 514 } 515 break; 516 case IR_STORE_INT: 517 case IR_STORE_FP: 518 insn = &ctx->ir_base[ref]; 519 n = 0; 520 if (IR_IS_CONST_REF(insn->op2)) { 521 IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); 522 constraints->tmp_regs[n] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 523 n++; 524 } 525 if (IR_IS_CONST_REF(insn->op3)) { 526 insn = &ctx->ir_base[insn->op3]; 527 if (!IR_IS_TYPE_INT(insn->type) || IR_IS_SYM_CONST(insn->op) || insn->val.i64 != 0) { 528 constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 529 n++; 530 } 531 } 532 break; 533 case IR_SWITCH: 534 insn = &ctx->ir_base[ref]; 535 n = 0; 536 if (IR_IS_CONST_REF(insn->op2)) { 537 insn = &ctx->ir_base[insn->op2]; 538 constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 539 n++; 540 } else { 541 insn = &ctx->ir_base[insn->op2]; 542 constraints->tmp_regs[n] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 543 n++; 544 } 545 constraints->tmp_regs[n] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 546 n++; 547 break; 548 case IR_CALL: 549 insn = &ctx->ir_base[ref]; 550 constraints->def_reg = (IR_IS_TYPE_INT(insn->type)) ? IR_REG_INT_RET1 : IR_REG_FP_RET1; 551 constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_SCRATCH, IR_USE_SUB_REF, IR_DEF_SUB_REF); 552 n = 1; 553 IR_FALLTHROUGH; 554 case IR_TAILCALL: 555 insn = &ctx->ir_base[ref]; 556 if (insn->inputs_count > 2) { 557 constraints->hints[2] = IR_REG_NONE; 558 constraints->hints_count = ir_get_args_regs(ctx, insn, constraints->hints); 559 if (!IR_IS_CONST_REF(insn->op2)) { 560 constraints->tmp_regs[n] = IR_TMP_REG(1, IR_ADDR, IR_LOAD_SUB_REF, IR_USE_SUB_REF); 561 n++; 562 } 563 } 564 flags = IR_USE_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG | IR_OP3_SHOULD_BE_IN_REG; 565 break; 566 case IR_COND: 567 insn = &ctx->ir_base[ref]; 568 n = 0; 569 if (IR_IS_CONST_REF(insn->op1)) { 570 constraints->tmp_regs[n] = IR_TMP_REG(1, ctx->ir_base[insn->op1].type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 571 n++; 572 } 573 if (IR_IS_CONST_REF(insn->op2)) { 574 constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 575 n++; 576 } 577 if (IR_IS_CONST_REF(insn->op3)) { 578 constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 579 n++; 580 } 581 break; 582 case IR_COPY_INT: 583 case IR_COPY_FP: 584 case IR_TRUNC: 585 case IR_BITCAST: 586 case IR_PROTO: 587 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; 588 break; 589 case IR_ZEXT: 590 case IR_SEXT: 591 flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG; 592 break; 593 case IR_PARAM: 594 constraints->def_reg = ir_get_param_reg(ctx, ref); 595 flags = 0; 596 break; 597 case IR_PI: 598 case IR_PHI: 599 flags = IR_USE_SHOULD_BE_IN_REG; 600 break; 601 case IR_RLOAD: 602 constraints->def_reg = ctx->ir_base[ref].op2; 603 flags = IR_USE_SHOULD_BE_IN_REG; 604 break; 605 case IR_EXITCALL: 606 constraints->def_reg = IR_REG_INT_RET1; 607 break; 608 case IR_RSTORE: 609 flags = IR_OP3_SHOULD_BE_IN_REG; 610 break; 611 case IR_RETURN_INT: 612 flags = IR_OP2_SHOULD_BE_IN_REG; 613 constraints->hints[2] = IR_REG_INT_RET1; 614 constraints->hints_count = 3; 615 break; 616 case IR_RETURN_FP: 617 flags = IR_OP2_SHOULD_BE_IN_REG; 618 constraints->hints[2] = IR_REG_FP_RET1; 619 constraints->hints_count = 3; 620 break; 621 case IR_SNAPSHOT: 622 flags = 0; 623 break; 624 case IR_VA_START: 625 flags = IR_OP1_MUST_BE_IN_REG; 626 constraints->tmp_regs[0] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 627 n = 1; 628 break; 629 case IR_VA_ARG: 630 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG; 631 constraints->tmp_regs[0] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); 632 n = 1; 633 break; 634 } 635 constraints->tmps_count = n; 636 637 return flags; 638} 639 640/* instruction selection */ 641static void ir_match_fuse_addr(ir_ctx *ctx, ir_ref addr_ref, ir_type type) 642{ 643 if (!IR_IS_CONST_REF(addr_ref)) { 644 ir_insn *addr_insn = &ctx->ir_base[addr_ref]; 645 646 if (addr_insn->op == IR_ADD 647 && !IR_IS_CONST_REF(addr_insn->op1) 648 && IR_IS_CONST_REF(addr_insn->op2) // TODO: temporary workaround 649 && !IR_IS_SYM_CONST(ctx->ir_base[addr_insn->op2].op) 650 && aarch64_may_encode_addr_offset(ctx->ir_base[addr_insn->op2].val.i64, ir_type_size[type])) { 651 ir_use_list *use_list = &ctx->use_lists[addr_ref]; 652 ir_ref j = use_list->count; 653 654 if (j > 1) { 655 /* check if address is used only in LOAD and STORE */ 656 ir_ref *p = &ctx->use_edges[use_list->refs]; 657 658 do { 659 ir_insn *insn = &ctx->ir_base[*p]; 660 if (insn->op != IR_LOAD && (insn->op != IR_STORE || insn->op3 == addr_ref)) { 661 return; 662 } 663 p++; 664 } while (--j); 665 } 666 ctx->rules[addr_ref] = IR_FUSED | IR_SIMPLE | addr_insn->op; 667 } 668 } 669} 670 671static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref) 672{ 673 ir_insn *op2_insn; 674 ir_insn *insn = &ctx->ir_base[ref]; 675 676 switch (insn->op) { 677 case IR_EQ: 678 case IR_NE: 679 case IR_LT: 680 case IR_GE: 681 case IR_LE: 682 case IR_GT: 683 case IR_ULT: 684 case IR_UGE: 685 case IR_ULE: 686 case IR_UGT: 687 if (IR_IS_TYPE_INT(ctx->ir_base[insn->op1].type)) { 688 return IR_CMP_INT; 689 } else { 690 return IR_CMP_FP; 691 } 692 break; 693 case IR_ADD: 694 case IR_SUB: 695 if (IR_IS_TYPE_INT(insn->type)) { 696 if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { 697 op2_insn = &ctx->ir_base[insn->op2]; 698 if (IR_IS_SYM_CONST(op2_insn->op)) { 699 /* pass */ 700 } else if (IR_IS_CONST_REF(insn->op1)) { 701 // const 702 } else if (op2_insn->val.i64 == 0) { 703 return IR_COPY_INT; 704 } 705 } 706binop_int: 707 return IR_BINOP_INT; 708 } else { 709binop_fp: 710 return IR_BINOP_FP; 711 } 712 break; 713 case IR_MUL: 714 if (IR_IS_TYPE_INT(insn->type)) { 715 if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { 716 op2_insn = &ctx->ir_base[insn->op2]; 717 if (IR_IS_SYM_CONST(op2_insn->op)) { 718 /* pass */ 719 } else if (IR_IS_CONST_REF(insn->op1)) { 720 // const 721 } else if (op2_insn->val.u64 == 0) { 722 // 0 723 } else if (op2_insn->val.u64 == 1) { 724 return IR_COPY_INT; 725 } else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) { 726 return IR_MUL_PWR2; 727 } 728 } 729 return IR_BINOP_INT; 730 } else { 731 goto binop_fp; 732 } 733 break; 734 case IR_ADD_OV: 735 case IR_SUB_OV: 736 IR_ASSERT(IR_IS_TYPE_INT(insn->type)); 737 goto binop_int; 738 case IR_MUL_OV: 739 IR_ASSERT(IR_IS_TYPE_INT(insn->type)); 740 goto binop_int; 741 case IR_DIV: 742 if (IR_IS_TYPE_INT(insn->type)) { 743 if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { 744 op2_insn = &ctx->ir_base[insn->op2]; 745 if (IR_IS_SYM_CONST(op2_insn->op)) { 746 /* pass */ 747 } else if (IR_IS_CONST_REF(insn->op1)) { 748 // const 749 } else if (op2_insn->val.u64 == 1) { 750 return IR_COPY_INT; 751 } else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) { 752 if (IR_IS_TYPE_UNSIGNED(insn->type)) { 753 return IR_DIV_PWR2; 754 } else { 755 return IR_SDIV_PWR2; 756 } 757 } 758 } 759 return IR_BINOP_INT; 760 } else { 761 goto binop_fp; 762 } 763 break; 764 case IR_MOD: 765 if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { 766 op2_insn = &ctx->ir_base[insn->op2]; 767 if (IR_IS_SYM_CONST(op2_insn->op)) { 768 /* pass */ 769 } else if (IR_IS_CONST_REF(insn->op1)) { 770 // const 771 } else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) { 772 if (IR_IS_TYPE_UNSIGNED(insn->type)) { 773 return IR_MOD_PWR2; 774 } else { 775 return IR_SMOD_PWR2; 776 } 777 } 778 } 779 return IR_BINOP_INT; 780 case IR_BSWAP: 781 case IR_NOT: 782 case IR_CTLZ: 783 case IR_CTTZ: 784 IR_ASSERT(IR_IS_TYPE_INT(insn->type)); 785 return IR_OP_INT; 786 case IR_NEG: 787 case IR_ABS: 788 if (IR_IS_TYPE_INT(insn->type)) { 789 return IR_OP_INT; 790 } else { 791 return IR_OP_FP; 792 } 793 case IR_OR: 794 if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { 795 op2_insn = &ctx->ir_base[insn->op2]; 796 if (IR_IS_SYM_CONST(op2_insn->op)) { 797 /* pass */ 798 } else if (IR_IS_CONST_REF(insn->op1)) { 799 // const 800 } else if (op2_insn->val.i64 == 0) { 801 return IR_COPY_INT; 802 } else if (op2_insn->val.i64 == -1) { 803 // -1 804 } 805 } 806 goto binop_int; 807 case IR_AND: 808 if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { 809 op2_insn = &ctx->ir_base[insn->op2]; 810 if (IR_IS_SYM_CONST(op2_insn->op)) { 811 /* pass */ 812 } else if (IR_IS_CONST_REF(insn->op1)) { 813 // const 814 } else if (op2_insn->val.i64 == 0) { 815 // 0 816 } else if (op2_insn->val.i64 == -1) { 817 return IR_COPY_INT; 818 } 819 } 820 goto binop_int; 821 case IR_XOR: 822 if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { 823 op2_insn = &ctx->ir_base[insn->op2]; 824 if (IR_IS_SYM_CONST(op2_insn->op)) { 825 /* pass */ 826 } else if (IR_IS_CONST_REF(insn->op1)) { 827 // const 828 } 829 } 830 goto binop_int; 831 case IR_SHL: 832 if (IR_IS_CONST_REF(insn->op2)) { 833 if (ctx->flags & IR_OPT_CODEGEN) { 834 op2_insn = &ctx->ir_base[insn->op2]; 835 if (IR_IS_SYM_CONST(op2_insn->op)) { 836 /* pass */ 837 } else if (IR_IS_CONST_REF(insn->op1)) { 838 // const 839 } else if (op2_insn->val.u64 == 0) { 840 return IR_COPY_INT; 841 } else if (ir_type_size[insn->type] >= 4) { 842 if (op2_insn->val.u64 == 1) { 843 // lea [op1*2] 844 } else if (op2_insn->val.u64 == 2) { 845 // lea [op1*4] 846 } else if (op2_insn->val.u64 == 3) { 847 // lea [op1*8] 848 } 849 } 850 } 851 return IR_SHIFT_CONST; 852 } 853 return IR_SHIFT; 854 case IR_SHR: 855 case IR_SAR: 856 case IR_ROL: 857 case IR_ROR: 858 if (IR_IS_CONST_REF(insn->op2)) { 859 if (ctx->flags & IR_OPT_CODEGEN) { 860 op2_insn = &ctx->ir_base[insn->op2]; 861 if (IR_IS_SYM_CONST(op2_insn->op)) { 862 /* pass */ 863 } else if (IR_IS_CONST_REF(insn->op1)) { 864 // const 865 } else if (op2_insn->val.u64 == 0) { 866 return IR_COPY_INT; 867 } 868 } 869 return IR_SHIFT_CONST; 870 } 871 return IR_SHIFT; 872 case IR_MIN: 873 case IR_MAX: 874 if (IR_IS_TYPE_INT(insn->type)) { 875 return IR_MIN_MAX_INT; 876 } else { 877 goto binop_fp; 878 } 879 break; 880// case IR_COND: 881 case IR_COPY: 882 if (IR_IS_TYPE_INT(insn->type)) { 883 return IR_COPY_INT; 884 } else { 885 return IR_COPY_FP; 886 } 887 break; 888 case IR_CALL: 889 ctx->flags2 |= IR_HAS_CALLS; 890 return IR_CALL; 891 case IR_VAR: 892 return IR_SKIPPED | IR_VAR; 893 case IR_PARAM: 894 return ctx->use_lists[ref].count > 0 ? IR_PARAM : IR_SKIPPED | IR_PARAM; 895 case IR_ALLOCA: 896 if (ctx->flags & IR_FUNCTION) { 897 ctx->flags |= IR_USE_FRAME_POINTER; 898 ctx->flags2 |= IR_HAS_ALLOCA; 899 } 900 return IR_ALLOCA; 901 case IR_LOAD: 902 ir_match_fuse_addr(ctx, insn->op2, insn->type); 903 if (IR_IS_TYPE_INT(insn->type)) { 904 return IR_LOAD_INT; 905 } else { 906 return IR_LOAD_FP; 907 } 908 break; 909 case IR_STORE: 910 ir_match_fuse_addr(ctx, insn->op2, ctx->ir_base[insn->op3].type); 911 if (IR_IS_TYPE_INT(ctx->ir_base[insn->op3].type)) { 912 return IR_STORE_INT; 913 } else { 914 return IR_STORE_FP; 915 } 916 break; 917 case IR_RLOAD: 918 if (IR_REGSET_IN(IR_REGSET_UNION((ir_regset)ctx->fixed_regset, IR_REGSET_FIXED), insn->op2)) { 919 return IR_SKIPPED | IR_RLOAD; 920 } 921 return IR_RLOAD; 922 case IR_RSTORE: 923 if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { 924 if ((ctx->flags & IR_OPT_CODEGEN) && ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) { 925 ir_insn *op_insn = &ctx->ir_base[insn->op2]; 926 927 if (!ctx->rules[insn->op2]) { 928 ctx->rules[insn->op2] = ir_match_insn(ctx, insn->op2); 929 } 930 if (ctx->rules[insn->op2] == IR_BINOP_INT) { 931 if (ctx->ir_base[op_insn->op1].op == IR_RLOAD 932 && ctx->ir_base[op_insn->op1].op2 == insn->op3) { 933 ctx->rules[insn->op2] = IR_FUSED | IR_BINOP_INT; 934 ctx->rules[op_insn->op1] = IR_SKIPPED | IR_RLOAD; 935 return IR_REG_BINOP_INT; 936 } else if ((ir_op_flags[op_insn->op] & IR_OP_FLAG_COMMUTATIVE) 937 && ctx->ir_base[op_insn->op2].op == IR_RLOAD 938 && ctx->ir_base[op_insn->op2].op2 == insn->op3) { 939 ir_ref tmp = op_insn->op1; 940 op_insn->op1 = op_insn->op2; 941 op_insn->op2 = tmp; 942 ctx->rules[insn->op2] = IR_FUSED | IR_BINOP_INT; 943 ctx->rules[op_insn->op1] = IR_SKIPPED | IR_RLOAD; 944 return IR_REG_BINOP_INT; 945 } 946 } 947 } 948 } 949 return IR_RSTORE; 950 case IR_START: 951 case IR_BEGIN: 952 case IR_IF_TRUE: 953 case IR_IF_FALSE: 954 case IR_CASE_VAL: 955 case IR_CASE_DEFAULT: 956 case IR_MERGE: 957 case IR_LOOP_BEGIN: 958 case IR_UNREACHABLE: 959 return IR_SKIPPED | insn->op; 960 case IR_RETURN: 961 if (!insn->op2) { 962 return IR_RETURN_VOID; 963 } else if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { 964 return IR_RETURN_INT; 965 } else { 966 return IR_RETURN_FP; 967 } 968 case IR_IF: 969 if (ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) { 970 op2_insn = &ctx->ir_base[insn->op2]; 971 if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT) { 972 if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) { 973 ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT; 974 return IR_CMP_AND_BRANCH_INT; 975 } else { 976 ctx->rules[insn->op2] = IR_FUSED | IR_CMP_FP; 977 return IR_CMP_AND_BRANCH_FP; 978 } 979 } else if (op2_insn->op == IR_OVERFLOW) { 980 ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_OVERFLOW; 981 return IR_OVERFLOW_AND_BRANCH; 982 } 983 } 984 if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { 985 return IR_IF_INT; 986 } else { 987 IR_ASSERT(0 && "NIY IR_IF_FP"); 988 break; 989 } 990 case IR_GUARD: 991 case IR_GUARD_NOT: 992 if (ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) { 993 op2_insn = &ctx->ir_base[insn->op2]; 994 if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT 995 // TODO: register allocator may clobber operands of CMP before they are used in the GUARD_CMP 996 && (insn->op2 == ref - 1 || 997 (insn->op2 == ctx->prev_ref[ref] - 1 998 && ctx->ir_base[ctx->prev_ref[ref]].op == IR_SNAPSHOT))) { 999 if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) { 1000 ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT; 1001 return IR_GUARD_CMP_INT; 1002 } else { 1003 ctx->rules[insn->op2] = IR_FUSED | IR_CMP_FP; 1004 return IR_GUARD_CMP_FP; 1005 } 1006 } else if (op2_insn->op == IR_OVERFLOW) { 1007 ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_OVERFLOW; 1008 return IR_GUARD_OVERFLOW; 1009 } 1010 } 1011 return insn->op; 1012 case IR_VA_START: 1013 ctx->flags2 |= IR_HAS_VA_START; 1014 if (ctx->ir_base[insn->op2].op == IR_ALLOCA) { 1015 ir_use_list *use_list = &ctx->use_lists[insn->op2]; 1016 ir_ref *p, n = use_list->count; 1017 for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { 1018 ir_insn *use_insn = &ctx->ir_base[*p]; 1019 if (use_insn->op == IR_VA_START || use_insn->op == IR_VA_END) { 1020 } else if (use_insn->op == IR_VA_COPY) { 1021 if (use_insn->op3 == insn->op2) { 1022 ctx->flags2 |= IR_HAS_VA_COPY; 1023 } 1024 } else if (use_insn->op == IR_VA_ARG) { 1025 if (use_insn->op2 == insn->op2) { 1026 if (IR_IS_TYPE_INT(use_insn->type)) { 1027 ctx->flags2 |= IR_HAS_VA_ARG_GP; 1028 } else { 1029 IR_ASSERT(IR_IS_TYPE_FP(use_insn->type)); 1030 ctx->flags2 |= IR_HAS_VA_ARG_FP; 1031 } 1032 } 1033 } else if (*p > ref) { 1034 /* diriect va_list access */ 1035 ctx->flags2 |= IR_HAS_VA_ARG_GP|IR_HAS_VA_ARG_FP; 1036 } 1037 } 1038 } 1039 return IR_VA_START; 1040 case IR_VA_END: 1041 return IR_SKIPPED | IR_NOP; 1042 case IR_VADDR: 1043 if (ctx->use_lists[ref].count > 0) { 1044 ir_use_list *use_list = &ctx->use_lists[ref]; 1045 ir_ref *p, n = use_list->count; 1046 1047 for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { 1048 if (ctx->ir_base[*p].op != IR_VA_END) { 1049 return IR_VADDR; 1050 } 1051 } 1052 } 1053 return IR_SKIPPED | IR_NOP; 1054 default: 1055 break; 1056 } 1057 1058 return insn->op; 1059} 1060 1061static void ir_match_insn2(ir_ctx *ctx, ir_ref ref, uint32_t rule) 1062{ 1063} 1064 1065/* code generation */ 1066static int32_t ir_ref_spill_slot_offset(ir_ctx *ctx, ir_ref ref, ir_reg *reg) 1067{ 1068 int32_t offset; 1069 1070 IR_ASSERT(ref >= 0); 1071 offset = ctx->live_intervals[ctx->vregs[ref]]->stack_spill_pos; 1072 IR_ASSERT(offset != -1); 1073 if (ctx->live_intervals[ctx->vregs[ref]]->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) { 1074 IR_ASSERT(ctx->spill_base != IR_REG_NONE); 1075 *reg = ctx->spill_base; 1076 return offset; 1077 } 1078 *reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 1079 return IR_SPILL_POS_TO_OFFSET(offset); 1080} 1081 1082static ir_mem ir_vreg_spill_slot(ir_ctx *ctx, ir_ref v) 1083{ 1084 int32_t offset; 1085 ir_reg base; 1086 1087 IR_ASSERT(v > 0 && v <= ctx->vregs_count && ctx->live_intervals[v]); 1088 offset = ctx->live_intervals[v]->stack_spill_pos; 1089 IR_ASSERT(offset != -1); 1090 if (ctx->live_intervals[v]->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) { 1091 IR_ASSERT(ctx->spill_base != IR_REG_NONE); 1092 return IR_MEM_BO(ctx->spill_base, offset); 1093 } 1094 base = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 1095 offset = IR_SPILL_POS_TO_OFFSET(offset); 1096 return IR_MEM_BO(base, offset); 1097} 1098 1099static ir_mem ir_ref_spill_slot(ir_ctx *ctx, ir_ref ref) 1100{ 1101 IR_ASSERT(!IR_IS_CONST_REF(ref)); 1102 return ir_vreg_spill_slot(ctx, ctx->vregs[ref]); 1103} 1104 1105static bool ir_is_same_spill_slot(ir_ctx *ctx, ir_ref ref, ir_mem mem) 1106{ 1107 return IR_MEM_VAL(ir_ref_spill_slot(ctx, ref)) == IR_MEM_VAL(mem); 1108} 1109 1110static int32_t ir_var_spill_slot(ir_ctx *ctx, ir_ref ref, ir_reg *reg) 1111{ 1112 ir_insn *var_insn = &ctx->ir_base[ref]; 1113 1114 IR_ASSERT(var_insn->op == IR_VAR); 1115 *reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 1116 return IR_SPILL_POS_TO_OFFSET(var_insn->op3); 1117} 1118 1119static bool ir_may_avoid_spill_load(ir_ctx *ctx, ir_ref ref, ir_ref use) 1120{ 1121 ir_live_interval *ival; 1122 1123 IR_ASSERT(ctx->vregs[ref] && ctx->live_intervals[ctx->vregs[ref]]); 1124 ival = ctx->live_intervals[ctx->vregs[ref]]; 1125 while (ival) { 1126 ir_use_pos *use_pos = ival->use_pos; 1127 while (use_pos) { 1128 if (IR_LIVE_POS_TO_REF(use_pos->pos) == use) { 1129 return !use_pos->next || use_pos->next->op_num == 0; 1130 } 1131 use_pos = use_pos->next; 1132 } 1133 ival = ival->next; 1134 } 1135 return 0; 1136} 1137 1138static void ir_emit_load_imm_int(ir_ctx *ctx, ir_type type, ir_reg reg, int64_t val) 1139{ 1140 ir_backend_data *data = ctx->data; 1141 dasm_State **Dst = &data->dasm_state; 1142 1143 IR_ASSERT(IR_IS_TYPE_INT(type)); 1144 if (ir_type_size[type] == 8) { 1145 if (val == 0) { 1146 if (reg != IR_REG_ZR) { 1147 | mov Rx(reg), xzr 1148 } 1149 } else if (((uint64_t)(val)) <= 0xffff) { 1150 | movz Rx(reg), #((uint64_t)(val)) 1151 } else if (~((uint64_t)(val)) <= 0xffff) { 1152 | movn Rx(reg), #(~((uint64_t)(val))) 1153 } else if ((uint64_t)(val) & 0xffff) { 1154 | movz Rx(reg), #((uint64_t)(val) & 0xffff) 1155 if (((uint64_t)(val) >> 16) & 0xffff) { 1156 | movk Rx(reg), #(((uint64_t)(val) >> 16) & 0xffff), lsl #16 1157 } 1158 if (((uint64_t)(val) >> 32) & 0xffff) { 1159 | movk Rx(reg), #(((uint64_t)(val) >> 32) & 0xffff), lsl #32 1160 } 1161 if ((((uint64_t)(val) >> 48) & 0xffff)) { 1162 | movk Rx(reg), #(((uint64_t)(val) >> 48) & 0xffff), lsl #48 1163 } 1164 } else if (((uint64_t)(val) >> 16) & 0xffff) { 1165 | movz Rx(reg), #(((uint64_t)(val) >> 16) & 0xffff), lsl #16 1166 if (((uint64_t)(val) >> 32) & 0xffff) { 1167 | movk Rx(reg), #(((uint64_t)(val) >> 32) & 0xffff), lsl #32 1168 } 1169 if ((((uint64_t)(val) >> 48) & 0xffff)) { 1170 | movk Rx(reg), #(((uint64_t)(val) >> 48) & 0xffff), lsl #48 1171 } 1172 } else if (((uint64_t)(val) >> 32) & 0xffff) { 1173 | movz Rx(reg), #(((uint64_t)(val) >> 32) & 0xffff), lsl #32 1174 if ((((uint64_t)(val) >> 48) & 0xffff)) { 1175 | movk Rx(reg), #(((uint64_t)(val) >> 48) & 0xffff), lsl #48 1176 } 1177 } else { 1178 | movz Rx(reg), #(((uint64_t)(val) >> 48) & 0xffff), lsl #48 1179 } 1180 } else { 1181 if (val == 0) { 1182 if (reg != IR_REG_ZR) { 1183 | mov Rw(reg), wzr 1184 } 1185 } else if (((uint64_t)(val)) <= 0xffff) { 1186 | movz Rw(reg), #((uint64_t)(val)) 1187 } else if (~((uint64_t)(val)) <= 0xffff) { 1188 | movn Rw(reg), #(~((uint64_t)(val))) 1189 } else if ((uint64_t)(val) & 0xffff) { 1190 | movz Rw(reg), #((uint64_t)(val) & 0xffff) 1191 if (((uint64_t)(val) >> 16) & 0xffff) { 1192 | movk Rw(reg), #(((uint64_t)(val) >> 16) & 0xffff), lsl #16 1193 } 1194 } else if (((uint64_t)(val) >> 16) & 0xffff) { 1195 | movz Rw(reg), #(((uint64_t)(val) >> 16) & 0xffff), lsl #16 1196 } 1197 } 1198} 1199 1200static void ir_emit_load_mem_int(ir_ctx *ctx, ir_type type, ir_reg reg, ir_mem mem) 1201{ 1202 ir_backend_data *data = ctx->data; 1203 dasm_State **Dst = &data->dasm_state; 1204 ir_reg base_reg = IR_MEM_BASE(mem); 1205 ir_reg index_reg = IR_MEM_INDEX(mem); 1206 int32_t offset = IR_MEM_OFFSET(mem); 1207 1208 if (index_reg == IR_REG_NONE) { 1209 if (aarch64_may_encode_addr_offset(offset, ir_type_size[type])) { 1210 switch (ir_type_size[type]) { 1211 default: 1212 IR_ASSERT(0); 1213 case 8: 1214 | ldr Rx(reg), [Rx(base_reg), #offset] 1215 break; 1216 case 4: 1217 | ldr Rw(reg), [Rx(base_reg), #offset] 1218 break; 1219 case 2: 1220 if (IR_IS_TYPE_SIGNED(type)) { 1221 | ldrsh Rw(reg), [Rx(base_reg), #offset] 1222 } else { 1223 | ldrh Rw(reg), [Rx(base_reg), #offset] 1224 } 1225 break; 1226 case 1: 1227 if (IR_IS_TYPE_SIGNED(type)) { 1228 | ldrsb Rw(reg), [Rx(base_reg), #offset] 1229 } else { 1230 | ldrb Rw(reg), [Rx(base_reg), #offset] 1231 } 1232 break; 1233 } 1234 return; 1235 } else { 1236 index_reg = IR_REG_INT_TMP; /* reserved temporary register */ 1237 1238 ir_emit_load_imm_int(ctx, IR_ADDR, index_reg, offset); 1239 } 1240 } else { 1241 IR_ASSERT(offset == 0); 1242 } 1243 1244 switch (ir_type_size[type]) { 1245 default: 1246 IR_ASSERT(0); 1247 case 8: 1248 | ldr Rx(reg), [Rx(base_reg), Rx(index_reg)] 1249 break; 1250 case 4: 1251 | ldr Rw(reg), [Rx(base_reg), Rx(index_reg)] 1252 break; 1253 case 2: 1254 if (IR_IS_TYPE_SIGNED(type)) { 1255 | ldrsh Rw(reg), [Rx(base_reg), Rx(index_reg)] 1256 } else { 1257 | ldrh Rw(reg), [Rx(base_reg), Rx(index_reg)] 1258 } 1259 break; 1260 case 1: 1261 if (IR_IS_TYPE_SIGNED(type)) { 1262 | ldrsb Rw(reg), [Rx(base_reg), Rx(index_reg)] 1263 } else { 1264 | ldrb Rw(reg), [Rx(base_reg), Rx(index_reg)] 1265 } 1266 break; 1267 } 1268} 1269 1270static void ir_emit_load_imm_fp(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src) 1271{ 1272 ir_backend_data *data = ctx->data; 1273 dasm_State **Dst = &data->dasm_state; 1274 ir_insn *insn = &ctx->ir_base[src]; 1275 int label; 1276 1277 if (type == IR_FLOAT && insn->val.u32 == 0) { 1278 | fmov Rs(reg-IR_REG_FP_FIRST), wzr 1279 } else if (type == IR_DOUBLE && insn->val.u64 == 0) { 1280 | fmov Rd(reg-IR_REG_FP_FIRST), xzr 1281 } else { 1282 label = ir_const_label(ctx, src); 1283 if (type == IR_DOUBLE) { 1284 | ldr Rd(reg-IR_REG_FP_FIRST), =>label 1285 } else { 1286 IR_ASSERT(type == IR_FLOAT); 1287 | ldr Rs(reg-IR_REG_FP_FIRST), =>label 1288 } 1289 } 1290} 1291 1292static void ir_emit_load_mem_fp(ir_ctx *ctx, ir_type type, ir_reg reg, ir_mem mem) 1293{ 1294 ir_backend_data *data = ctx->data; 1295 dasm_State **Dst = &data->dasm_state; 1296 ir_reg base_reg = IR_MEM_BASE(mem); 1297 ir_ref index_reg = IR_MEM_INDEX(mem); 1298 int32_t offset = IR_MEM_OFFSET(mem); 1299 1300 if (index_reg == IR_REG_NONE) { 1301 if (aarch64_may_encode_addr_offset(offset, ir_type_size[type])) { 1302 if (type == IR_DOUBLE) { 1303 | ldr Rd(reg-IR_REG_FP_FIRST), [Rx(base_reg), #offset] 1304 } else { 1305 IR_ASSERT(type == IR_FLOAT); 1306 | ldr Rs(reg-IR_REG_FP_FIRST), [Rx(base_reg), #offset] 1307 } 1308 } else { 1309 index_reg = IR_REG_INT_TMP; /* reserved temporary register */ 1310 1311 ir_emit_load_imm_int(ctx, IR_ADDR, index_reg, offset); 1312 } 1313 return; 1314 } else { 1315 IR_ASSERT(offset == 0); 1316 } 1317 1318 if (type == IR_DOUBLE) { 1319 | ldr Rd(reg-IR_REG_FP_FIRST), [Rx(base_reg), Rx(index_reg)] 1320 } else { 1321 IR_ASSERT(type == IR_FLOAT); 1322 | ldr Rs(reg-IR_REG_FP_FIRST), [Rx(base_reg), Rx(index_reg)] 1323 } 1324} 1325 1326static void ir_emit_load_mem(ir_ctx *ctx, ir_type type, ir_reg reg, ir_mem mem) 1327{ 1328 if (IR_IS_TYPE_INT(type)) { 1329 ir_emit_load_mem_int(ctx, type, reg, mem); 1330 } else { 1331 ir_emit_load_mem_fp(ctx, type, reg, mem); 1332 } 1333} 1334 1335static void ir_emit_load(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src) 1336{ 1337 if (IR_IS_CONST_REF(src)) { 1338 if (IR_IS_TYPE_INT(type)) { 1339 ir_insn *insn = &ctx->ir_base[src]; 1340 1341 if (insn->op == IR_SYM || insn->op == IR_FUNC) { 1342 void *addr = ir_sym_val(ctx, insn); 1343 IR_ASSERT(addr); 1344 ir_emit_load_imm_int(ctx, type, reg, (intptr_t)addr); 1345 } else if (insn->op == IR_STR) { 1346 ir_backend_data *data = ctx->data; 1347 dasm_State **Dst = &data->dasm_state; 1348 int label = ir_const_label(ctx, src); 1349 1350 | adr Rx(reg), =>label 1351 } else { 1352 ir_emit_load_imm_int(ctx, type, reg, insn->val.i64); 1353 } 1354 } else { 1355 ir_emit_load_imm_fp(ctx, type, reg, src); 1356 } 1357 } else { 1358 ir_mem mem = ir_ref_spill_slot(ctx, src); 1359 ir_emit_load_mem(ctx, type, reg, mem); 1360 } 1361} 1362 1363static void ir_emit_store_mem_int(ir_ctx *ctx, ir_type type, ir_mem mem, ir_reg reg) 1364{ 1365 ir_backend_data *data = ctx->data; 1366 dasm_State **Dst = &data->dasm_state; 1367 ir_reg base_reg = IR_MEM_BASE(mem); 1368 ir_reg index_reg = IR_MEM_INDEX(mem); 1369 int32_t offset = IR_MEM_OFFSET(mem); 1370 1371 if (index_reg == IR_REG_NONE) { 1372 if (aarch64_may_encode_addr_offset(offset, ir_type_size[type])) { 1373 switch (ir_type_size[type]) { 1374 default: 1375 IR_ASSERT(0); 1376 case 8: 1377 | str Rx(reg), [Rx(base_reg), #offset] 1378 break; 1379 case 4: 1380 | str Rw(reg), [Rx(base_reg), #offset] 1381 break; 1382 case 2: 1383 | strh Rw(reg), [Rx(base_reg), #offset] 1384 break; 1385 case 1: 1386 | strb Rw(reg), [Rx(base_reg), #offset] 1387 break; 1388 } 1389 return; 1390 } else { 1391 index_reg = IR_REG_INT_TMP; /* reserved temporary register */ 1392 1393 ir_emit_load_imm_int(ctx, IR_ADDR, index_reg, offset); 1394 } 1395 } else { 1396 IR_ASSERT(offset == 0); 1397 } 1398 1399 switch (ir_type_size[type]) { 1400 default: 1401 IR_ASSERT(0); 1402 case 8: 1403 | str Rx(reg), [Rx(base_reg), Rx(index_reg)] 1404 break; 1405 case 4: 1406 | str Rw(reg), [Rx(base_reg), Rx(index_reg)] 1407 break; 1408 case 2: 1409 | strh Rw(reg), [Rx(base_reg), Rx(index_reg)] 1410 break; 1411 case 1: 1412 | strb Rw(reg), [Rx(base_reg), Rx(index_reg)] 1413 break; 1414 } 1415} 1416 1417static void ir_emit_store_mem_fp(ir_ctx *ctx, ir_type type, ir_mem mem, ir_reg reg) 1418{ 1419 ir_backend_data *data = ctx->data; 1420 dasm_State **Dst = &data->dasm_state; 1421 ir_reg base_reg = IR_MEM_BASE(mem); 1422 ir_reg index_reg = IR_MEM_INDEX(mem); 1423 int32_t offset = IR_MEM_OFFSET(mem); 1424 1425 if (index_reg == IR_REG_NONE) { 1426 if (aarch64_may_encode_addr_offset(offset, ir_type_size[type])) { 1427 if (type == IR_DOUBLE) { 1428 | str Rd(reg-IR_REG_FP_FIRST), [Rx(base_reg), #offset] 1429 } else { 1430 IR_ASSERT(type == IR_FLOAT); 1431 | str Rs(reg-IR_REG_FP_FIRST), [Rx(base_reg), #offset] 1432 } 1433 } else { 1434 index_reg = IR_REG_INT_TMP; /* reserved temporary register */ 1435 1436 ir_emit_load_imm_int(ctx, IR_ADDR, index_reg, offset); 1437 } 1438 return; 1439 } else { 1440 IR_ASSERT(offset == 0); 1441 } 1442 1443 if (type == IR_DOUBLE) { 1444 | str Rd(reg-IR_REG_FP_FIRST), [Rx(base_reg), Rx(index_reg)] 1445 } else { 1446 IR_ASSERT(type == IR_FLOAT); 1447 | str Rs(reg-IR_REG_FP_FIRST), [Rx(base_reg), Rx(index_reg)] 1448 } 1449} 1450 1451static void ir_emit_store_mem(ir_ctx *ctx, ir_type type, ir_mem mem, ir_reg reg) 1452{ 1453 if (IR_IS_TYPE_INT(type)) { 1454 ir_emit_store_mem_int(ctx, type, mem, reg); 1455 } else { 1456 ir_emit_store_mem_fp(ctx, type, mem, reg); 1457 } 1458} 1459 1460static void ir_emit_store(ir_ctx *ctx, ir_type type, ir_ref dst, ir_reg reg) 1461{ 1462 IR_ASSERT(dst >= 0); 1463 ir_emit_store_mem(ctx, type, ir_ref_spill_slot(ctx, dst), reg); 1464} 1465 1466static void ir_emit_mov(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src) 1467{ 1468 ir_backend_data *data = ctx->data; 1469 dasm_State **Dst = &data->dasm_state; 1470 1471 if (ir_type_size[type] == 8) { 1472 if (dst == IR_REG_STACK_POINTER) { 1473 | mov sp, Rx(src) 1474 } else if (src == IR_REG_STACK_POINTER) { 1475 | mov Rx(dst), sp 1476 } else { 1477 | mov Rx(dst), Rx(src) 1478 } 1479 } else { 1480 | mov Rw(dst), Rw(src) 1481 } 1482} 1483 1484static void ir_emit_mov_ext(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src) 1485{ 1486 ir_backend_data *data = ctx->data; 1487 dasm_State **Dst = &data->dasm_state; 1488 1489 if (ir_type_size[type] == 8) { 1490 | mov Rx(dst), Rx(src) 1491 } else { 1492 | mov Rw(dst), Rw(src) 1493 } 1494} 1495static void ir_emit_fp_mov(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src) 1496{ 1497 ir_backend_data *data = ctx->data; 1498 dasm_State **Dst = &data->dasm_state; 1499 1500 if (ir_type_size[type] == 8) { 1501 | fmov Rd(dst-IR_REG_FP_FIRST), Rd(src-IR_REG_FP_FIRST) 1502 } else { 1503 | fmov Rs(dst-IR_REG_FP_FIRST), Rs(src-IR_REG_FP_FIRST) 1504 } 1505} 1506 1507static void ir_emit_prologue(ir_ctx *ctx) 1508{ 1509 ir_backend_data *data = ctx->data; 1510 dasm_State **Dst = &data->dasm_state; 1511 int offset; 1512 1513 if (ctx->flags & IR_USE_FRAME_POINTER) { 1514 offset = -(ctx->stack_frame_size+16); 1515 if (aarch64_may_encode_imm7_addr_offset(offset, 8)) { 1516 | stp x29, x30, [sp, #offset]! 1517 } else { 1518 | sub sp, sp, #(ctx->stack_frame_size+16) 1519 | stp x29, x30, [sp] 1520 } 1521 | mov x29, sp 1522 if (ctx->call_stack_size) { 1523 | sub sp, sp, #(ctx->call_stack_size) 1524 } 1525 } else if (ctx->stack_frame_size + ctx->call_stack_size) { 1526 if (ctx->fixed_stack_red_zone) { 1527 IR_ASSERT(ctx->stack_frame_size + ctx->call_stack_size <= ctx->fixed_stack_red_zone); 1528 } else { 1529 | sub sp, sp, #(ctx->stack_frame_size + ctx->call_stack_size) 1530 } 1531 } 1532 if (ctx->used_preserved_regs) { 1533 ir_reg fp; 1534 uint32_t i; 1535 ir_reg prev = IR_REG_NONE; 1536 ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs; 1537 1538 if (ctx->flags & IR_USE_FRAME_POINTER) { 1539 fp = IR_REG_FRAME_POINTER; 1540 offset = ctx->stack_frame_size + sizeof(void*) * 2; 1541 } else { 1542 fp = IR_REG_STACK_POINTER; 1543 offset = ctx->stack_frame_size + ctx->call_stack_size; 1544 } 1545 for (i = 0; i < IR_REG_NUM; i++) { 1546 if (IR_REGSET_IN(used_preserved_regs, i)) { 1547 if (prev == IR_REG_NONE) { 1548 prev = i; 1549 } else if (i < IR_REG_FP_FIRST) { 1550 offset -= sizeof(void*) * 2; 1551 if (aarch64_may_encode_imm7_addr_offset(offset, 8)) { 1552 | stp Rx(prev), Rx(i), [Rx(fp), #offset] 1553 } else { 1554 IR_ASSERT(aarch64_may_encode_addr_offset(offset, 8)); 1555 | str Rx(prev), [Rx(fp), #offset] 1556 | str Rx(i), [Rx(fp), #(offset+8)] 1557 } 1558 prev = IR_REG_NONE; 1559 } else { 1560 if (prev < IR_REG_FP_FIRST) { 1561 offset -= sizeof(void*); 1562 | str Rx(prev), [Rx(fp), #offset] 1563 offset -= sizeof(void*); 1564 | str Rd(i-IR_REG_FP_FIRST), [Rx(fp), #offset] 1565 } else { 1566 offset -= sizeof(void*) * 2; 1567 if (aarch64_may_encode_imm7_addr_offset(offset, 8)) { 1568 | stp Rd(prev-IR_REG_FP_FIRST), Rd(i-IR_REG_FP_FIRST), [Rx(fp), #offset] 1569 } else { 1570 IR_ASSERT(aarch64_may_encode_addr_offset(offset, 8)); 1571 | str Rd(prev-IR_REG_FP_FIRST), [Rx(fp), #offset] 1572 | str Rd(i-IR_REG_FP_FIRST), [Rx(fp), #(offset+8)] 1573 } 1574 } 1575 prev = IR_REG_NONE; 1576 } 1577 } 1578 } 1579 if (prev != IR_REG_NONE) { 1580 if (prev < IR_REG_FP_FIRST) { 1581 offset -= sizeof(void*); 1582 | str Rx(prev), [Rx(fp), #offset] 1583 } else { 1584 offset -= sizeof(void*); 1585 | str Rd(prev-IR_REG_FP_FIRST), [Rx(fp), #offset] 1586 } 1587 } 1588 } 1589 if ((ctx->flags & IR_VARARG_FUNC) && (ctx->flags2 & IR_HAS_VA_START)) { 1590#ifndef __APPLE__ 1591 const int8_t *int_reg_params = _ir_int_reg_params; 1592 const int8_t *fp_reg_params = _ir_fp_reg_params; 1593 ir_reg fp; 1594 int offset; 1595 int i; 1596 1597 if (ctx->flags & IR_USE_FRAME_POINTER) { 1598 fp = IR_REG_FRAME_POINTER; 1599 1600 offset = ctx->locals_area_size + sizeof(void*) * 2; 1601 } else { 1602 fp = IR_REG_STACK_POINTER; 1603 offset = ctx->locals_area_size + ctx->call_stack_size; 1604 } 1605 1606 if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) { 1607 ir_reg prev = IR_REG_NONE; 1608 1609 /* skip named args */ 1610 offset += sizeof(void*) * ctx->gp_reg_params; 1611 for (i = ctx->gp_reg_params; i < IR_REG_INT_ARGS; i++) { 1612 if (prev != IR_REG_NONE) { 1613 if (aarch64_may_encode_imm7_addr_offset(offset, 8)) { 1614 | stp Rx(prev), Rx(int_reg_params[i]), [Rx(fp), #offset] 1615 } else { 1616 IR_ASSERT(aarch64_may_encode_addr_offset(offset, 8)); 1617 | str Rx(prev), [Rx(fp), #offset] 1618 | str Rx(int_reg_params[i]), [Rx(fp), #(offset+8)] 1619 } 1620 prev = IR_REG_NONE; 1621 offset += sizeof(void*) * 2; 1622 } else { 1623 prev = int_reg_params[i]; 1624 } 1625 } 1626 if (prev != IR_REG_NONE) { 1627 | str Rx(prev), [Rx(fp), #offset] 1628 offset += sizeof(void*); 1629 } 1630 } 1631 if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) { 1632 /* skip named args */ 1633 offset += 16 * ctx->fp_reg_params; 1634 for (i = ctx->fp_reg_params; i < IR_REG_FP_ARGS; i++) { 1635 // TODO: Rd->Rq stur->str ??? 1636 | str Rd(fp_reg_params[i]-IR_REG_FP_FIRST), [Rx(fp), #offset] 1637 offset += 16; 1638 } 1639 } 1640#endif 1641 } 1642} 1643 1644static void ir_emit_epilogue(ir_ctx *ctx) 1645{ 1646 ir_backend_data *data = ctx->data; 1647 dasm_State **Dst = &data->dasm_state; 1648 1649 if (ctx->used_preserved_regs) { 1650 int offset; 1651 uint32_t i; 1652 ir_reg prev = IR_REG_NONE; 1653 ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 1654 ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs; 1655 1656 if (ctx->flags & IR_USE_FRAME_POINTER) { 1657 offset = ctx->stack_frame_size + sizeof(void*) * 2; 1658 } else { 1659 offset = ctx->stack_frame_size + ctx->call_stack_size; 1660 } 1661 for (i = 0; i < IR_REG_NUM; i++) { 1662 if (IR_REGSET_IN(used_preserved_regs, i)) { 1663 if (prev == IR_REG_NONE) { 1664 prev = i; 1665 } else if (i < IR_REG_FP_FIRST) { 1666 offset -= sizeof(void*) * 2; 1667 if (aarch64_may_encode_imm7_addr_offset(offset, 8)) { 1668 | ldp Rx(prev), Rx(i), [Rx(fp), #offset] 1669 } else { 1670 IR_ASSERT(aarch64_may_encode_addr_offset(offset, 8)); 1671 | ldr Rx(prev), [Rx(fp), #offset] 1672 | ldr Rx(i), [Rx(fp), #(offset+8)] 1673 } 1674 prev = IR_REG_NONE; 1675 } else { 1676 if (prev < IR_REG_FP_FIRST) { 1677 offset -= sizeof(void*); 1678 | ldr Rx(prev), [Rx(fp), #offset] 1679 offset -= sizeof(void*); 1680 | ldr Rd(i-IR_REG_FP_FIRST), [Rx(fp), #offset] 1681 } else { 1682 offset -= sizeof(void*) * 2; 1683 if (aarch64_may_encode_imm7_addr_offset(offset, 8)) { 1684 | ldp Rd(prev-IR_REG_FP_FIRST), Rd(i-IR_REG_FP_FIRST), [Rx(fp), #offset] 1685 } else { 1686 IR_ASSERT(aarch64_may_encode_addr_offset(offset, 8)); 1687 | ldr Rd(prev-IR_REG_FP_FIRST), [Rx(fp), #offset] 1688 | ldr Rd(i-IR_REG_FP_FIRST), [Rx(fp), #(offset+8)] 1689 } 1690 } 1691 prev = IR_REG_NONE; 1692 } 1693 } 1694 } 1695 if (prev != IR_REG_NONE) { 1696 if (prev < IR_REG_FP_FIRST) { 1697 offset -= sizeof(void*); 1698 | ldr Rx(prev), [Rx(fp), #offset] 1699 } else { 1700 offset -= sizeof(void*); 1701 | ldr Rd(prev-IR_REG_FP_FIRST), [Rx(fp), #offset] 1702 } 1703 } 1704 } 1705 1706 if (ctx->flags & IR_USE_FRAME_POINTER) { 1707 if (ctx->call_stack_size || (ctx->flags2 & IR_HAS_ALLOCA)) { 1708 | mov sp, x29 1709 } 1710 if (aarch64_may_encode_imm7_addr_offset(ctx->stack_frame_size+16, 8)) { 1711 | ldp x29, x30, [sp], #(ctx->stack_frame_size+16) 1712 } else { 1713 | ldp x29, x30, [sp] 1714 | add sp, sp, #(ctx->stack_frame_size+16) 1715 } 1716 } else if (ctx->stack_frame_size + ctx->call_stack_size) { 1717 if (ctx->fixed_stack_red_zone) { 1718 IR_ASSERT(ctx->stack_frame_size + ctx->call_stack_size <= ctx->fixed_stack_red_zone); 1719 } else { 1720 | add sp, sp, #(ctx->stack_frame_size + ctx->call_stack_size) 1721 } 1722 } 1723} 1724 1725static void ir_emit_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 1726{ 1727 ir_backend_data *data = ctx->data; 1728 dasm_State **Dst = &data->dasm_state; 1729 ir_type type = insn->type; 1730 ir_ref op1 = insn->op1; 1731 ir_ref op2 = insn->op2; 1732 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 1733 ir_reg op1_reg = ctx->regs[def][1]; 1734 ir_reg op2_reg = ctx->regs[def][2]; 1735 ir_reg tmp_reg; 1736 1737 IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); 1738 1739 if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) { 1740 op1_reg = IR_REG_NUM(op1_reg); 1741 ir_emit_load(ctx, type, op1_reg, op1); 1742 } 1743 if (op2_reg != IR_REG_NONE) { 1744 if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { 1745 op2_reg = IR_REG_NUM(op2_reg); 1746 if (op1 != op2) { 1747 ir_emit_load(ctx, type, op2_reg, op2); 1748 } 1749 } 1750 switch (insn->op) { 1751 default: 1752 IR_ASSERT(0 && "NIY binary op"); 1753 case IR_ADD: 1754 | ASM_REG_REG_REG_OP add, type, def_reg, op1_reg, op2_reg 1755 break; 1756 case IR_ADD_OV: 1757 | ASM_REG_REG_REG_OP adds, type, def_reg, op1_reg, op2_reg 1758 break; 1759 case IR_SUB: 1760 | ASM_REG_REG_REG_OP sub, type, def_reg, op1_reg, op2_reg 1761 break; 1762 case IR_SUB_OV: 1763 | ASM_REG_REG_REG_OP subs, type, def_reg, op1_reg, op2_reg 1764 break; 1765 case IR_MUL: 1766 | ASM_REG_REG_REG_OP mul, type, def_reg, op1_reg, op2_reg 1767 break; 1768 case IR_MUL_OV: 1769 if (ir_type_size[type] == 8) { 1770 if (IR_IS_TYPE_SIGNED(type)) { 1771 tmp_reg = ctx->regs[def][3]; 1772 IR_ASSERT(tmp_reg != IR_REG_NONE); 1773 | smulh Rx(tmp_reg), Rx(op1_reg), Rx(op2_reg) 1774 | mul Rx(def_reg), Rx(op1_reg), Rx(op2_reg) 1775 | cmp Rx(tmp_reg), Rx(def_reg), asr #63 1776 } else { 1777 tmp_reg = ctx->regs[def][3]; 1778 IR_ASSERT(tmp_reg != IR_REG_NONE); 1779 | umulh Rx(tmp_reg), Rx(op1_reg), Rx(op2_reg) 1780 | mul Rx(def_reg), Rx(op1_reg), Rx(op2_reg) 1781 | cmp Rx(tmp_reg), xzr 1782 } 1783 } else { 1784 if (IR_IS_TYPE_SIGNED(type)) { 1785 tmp_reg = ctx->regs[def][3]; 1786 IR_ASSERT(tmp_reg != IR_REG_NONE); 1787 | smull Rx(def_reg), Rw(op1_reg), Rw(op2_reg) 1788 | asr Rx(tmp_reg), Rx(def_reg), #32 1789 | cmp Rx(tmp_reg), Rx(def_reg), asr #31 1790 } else { 1791 | umull Rx(def_reg), Rw(op1_reg), Rw(op2_reg) 1792 | cmp xzr, Rx(def_reg), lsr #32 1793 } 1794 } 1795 break; 1796 case IR_DIV: 1797 if (IR_IS_TYPE_SIGNED(type)) { 1798 | ASM_REG_REG_REG_OP sdiv, type, def_reg, op1_reg, op2_reg 1799 } else { 1800 | ASM_REG_REG_REG_OP udiv, type, def_reg, op1_reg, op2_reg 1801 } 1802 break; 1803 case IR_MOD: 1804 tmp_reg = ctx->regs[def][3]; 1805 IR_ASSERT(tmp_reg != IR_REG_NONE); 1806 if (IR_IS_TYPE_SIGNED(type)) { 1807 | ASM_REG_REG_REG_OP sdiv, type, tmp_reg, op1_reg, op2_reg 1808 | ASM_REG_REG_REG_REG_OP msub, type, def_reg, tmp_reg, op2_reg, op1_reg 1809 } else { 1810 | ASM_REG_REG_REG_OP udiv, type, tmp_reg, op1_reg, op2_reg 1811 | ASM_REG_REG_REG_REG_OP msub, type, def_reg, tmp_reg, op2_reg, op1_reg 1812 } 1813 break; 1814 case IR_OR: 1815 | ASM_REG_REG_REG_OP orr, type, def_reg, op1_reg, op2_reg 1816 break; 1817 case IR_AND: 1818 | ASM_REG_REG_REG_OP and, type, def_reg, op1_reg, op2_reg 1819 break; 1820 case IR_XOR: 1821 | ASM_REG_REG_REG_OP eor, type, def_reg, op1_reg, op2_reg 1822 break; 1823 } 1824 } else { 1825 IR_ASSERT(IR_IS_CONST_REF(op2)); 1826 IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[op2].op)); 1827 int32_t val = ctx->ir_base[op2].val.i32; 1828 switch (insn->op) { 1829 default: 1830 IR_ASSERT(0 && "NIY binary op"); 1831 case IR_ADD: 1832 | ASM_REG_REG_IMM_OP add, type, def_reg, op1_reg, val 1833 break; 1834 case IR_ADD_OV: 1835 | ASM_REG_REG_IMM_OP adds, type, def_reg, op1_reg, val 1836 break; 1837 case IR_SUB: 1838 | ASM_REG_REG_IMM_OP sub, type, def_reg, op1_reg, val 1839 break; 1840 case IR_SUB_OV: 1841 | ASM_REG_REG_IMM_OP subs, type, def_reg, op1_reg, val 1842 break; 1843 case IR_OR: 1844 if (ir_type_size[type] == 8) { 1845 uint64_t val = ctx->ir_base[op2].val.u64; 1846 | ASM_REG_REG_IMM_OP orr, type, def_reg, op1_reg, val 1847 } else { 1848 | ASM_REG_REG_IMM_OP orr, type, def_reg, op1_reg, val 1849 } 1850 break; 1851 case IR_AND: 1852 if (ir_type_size[type] == 8) { 1853 uint64_t val = ctx->ir_base[op2].val.u64; 1854 | ASM_REG_REG_IMM_OP and, type, def_reg, op1_reg, val 1855 } else { 1856 | ASM_REG_REG_IMM_OP and, type, def_reg, op1_reg, val 1857 } 1858 break; 1859 case IR_XOR: 1860 if (ir_type_size[type] == 8) { 1861 uint64_t val = ctx->ir_base[op2].val.u64; 1862 | ASM_REG_REG_IMM_OP eor, type, def_reg, op1_reg, val 1863 } else { 1864 | ASM_REG_REG_IMM_OP eor, type, def_reg, op1_reg, val 1865 } 1866 break; 1867 } 1868 } 1869 if (IR_REG_SPILLED(ctx->regs[def][0])) { 1870 ir_emit_store(ctx, type, def, def_reg); 1871 } 1872} 1873 1874static void ir_emit_min_max_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 1875{ 1876 ir_backend_data *data = ctx->data; 1877 dasm_State **Dst = &data->dasm_state; 1878 ir_type type = insn->type; 1879 ir_ref op1 = insn->op1; 1880 ir_ref op2 = insn->op2; 1881 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 1882 ir_reg op1_reg = ctx->regs[def][1]; 1883 ir_reg op2_reg = ctx->regs[def][2]; 1884 1885 IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); 1886 1887 if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) { 1888 op1_reg = IR_REG_NUM(op1_reg); 1889 ir_emit_load(ctx, type, op1_reg, op1); 1890 } 1891 if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { 1892 op2_reg = IR_REG_NUM(op2_reg); 1893 ir_emit_load(ctx, type, op2_reg, op2); 1894 } 1895 1896 if (op1 == op2) { 1897 return; 1898 } 1899 1900 if (ir_type_size[type] == 8) { 1901 | cmp Rx(op1_reg), Rx(op2_reg) 1902 if (insn->op == IR_MIN) { 1903 if (IR_IS_TYPE_SIGNED(type)) { 1904 | csel Rx(def_reg), Rx(op1_reg), Rx(op2_reg), le 1905 } else { 1906 | csel Rx(def_reg), Rx(op1_reg), Rx(op2_reg), ls 1907 } 1908 } else { 1909 IR_ASSERT(insn->op == IR_MAX); 1910 if (IR_IS_TYPE_SIGNED(type)) { 1911 | csel Rx(def_reg), Rx(op1_reg), Rx(op2_reg), ge 1912 } else { 1913 | csel Rx(def_reg), Rx(op1_reg), Rx(op2_reg), hs 1914 } 1915 } 1916 } else { 1917 | cmp Rw(op1_reg), Rw(op2_reg) 1918 if (insn->op == IR_MIN) { 1919 if (IR_IS_TYPE_SIGNED(type)) { 1920 | csel Rw(def_reg), Rw(op1_reg), Rw(op2_reg), le 1921 } else { 1922 | csel Rw(def_reg), Rw(op1_reg), Rw(op2_reg), ls 1923 } 1924 } else { 1925 IR_ASSERT(insn->op == IR_MAX); 1926 if (IR_IS_TYPE_SIGNED(type)) { 1927 | csel Rw(def_reg), Rw(op1_reg), Rw(op2_reg), ge 1928 } else { 1929 | csel Rw(def_reg), Rw(op1_reg), Rw(op2_reg), hs 1930 } 1931 } 1932 } 1933 1934 if (IR_REG_SPILLED(ctx->regs[def][0])) { 1935 ir_emit_store(ctx, type, def, def_reg); 1936 } 1937} 1938 1939static void ir_emit_overflow(ir_ctx *ctx, ir_ref def, ir_insn *insn) 1940{ 1941 ir_backend_data *data = ctx->data; 1942 dasm_State **Dst = &data->dasm_state; 1943 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 1944 ir_insn *math_insn = &ctx->ir_base[insn->op1]; 1945 ir_type type = math_insn->type; 1946 1947 IR_ASSERT(def_reg != IR_REG_NONE); 1948 IR_ASSERT(IR_IS_TYPE_INT(type)); 1949 if (math_insn->op == IR_MUL_OV) { 1950 | cset Rw(def_reg), ne 1951 } else if (IR_IS_TYPE_SIGNED(type)) { 1952 | cset Rw(def_reg), vs 1953 } else { 1954 | cset Rw(def_reg), cs 1955 } 1956 if (IR_REG_SPILLED(ctx->regs[def][0])) { 1957 ir_emit_store(ctx, insn->type, def, def_reg); 1958 } 1959} 1960 1961static void ir_emit_overflow_and_branch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) 1962{ 1963 ir_backend_data *data = ctx->data; 1964 dasm_State **Dst = &data->dasm_state; 1965 ir_insn *overflow_insn = &ctx->ir_base[insn->op2]; 1966 ir_insn *math_insn = &ctx->ir_base[overflow_insn->op1]; 1967 ir_type type = math_insn->type; 1968 uint32_t true_block, false_block, next_block; 1969 bool reverse = 0; 1970 1971 ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block); 1972 if (true_block == next_block) { 1973 reverse = 1; 1974 true_block = false_block; 1975 false_block = 0; 1976 } else if (false_block == next_block) { 1977 false_block = 0; 1978 } 1979 1980 if (math_insn->op == IR_MUL_OV) { 1981 if (reverse) { 1982 | beq =>true_block 1983 } else { 1984 | bne =>true_block 1985 } 1986 } else if (IR_IS_TYPE_SIGNED(type)) { 1987 if (reverse) { 1988 | bvc =>true_block 1989 } else { 1990 | bvs =>true_block 1991 } 1992 } else { 1993 if (reverse) { 1994 | bcc =>true_block 1995 } else { 1996 | bcs =>true_block 1997 } 1998 } 1999 if (false_block) { 2000 | b =>false_block 2001 } 2002} 2003 2004static void ir_emit_reg_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 2005{ 2006 ir_backend_data *data = ctx->data; 2007 dasm_State **Dst = &data->dasm_state; 2008 ir_insn *op_insn = &ctx->ir_base[insn->op2]; 2009 ir_type type = op_insn->type; 2010 ir_ref op2 = op_insn->op2; 2011 ir_reg op2_reg = ctx->regs[insn->op2][2]; 2012 ir_reg reg; 2013 2014 IR_ASSERT(insn->op == IR_RSTORE); 2015 reg = insn->op3; 2016 2017 if (op2_reg == IR_REG_NONE) { 2018 ir_val *val = &ctx->ir_base[op2].val; 2019 2020 IR_ASSERT(IR_IS_CONST_REF(op2)); 2021 IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[op2].op)); 2022 switch (op_insn->op) { 2023 default: 2024 IR_ASSERT(0 && "NIY binary op"); 2025 case IR_ADD: 2026 | ASM_REG_REG_IMM_OP add, type, reg, reg, val->i32 2027 break; 2028 case IR_SUB: 2029 | ASM_REG_REG_IMM_OP sub, type, reg, reg, val->i32 2030 break; 2031 case IR_OR: 2032 | ASM_REG_REG_IMM_OP orr, type, reg, reg, val->i32 2033 break; 2034 case IR_AND: 2035 | ASM_REG_REG_IMM_OP and, type, reg, reg, val->i32 2036 break; 2037 case IR_XOR: 2038 | ASM_REG_REG_IMM_OP eor, type, reg, reg, val->i32 2039 break; 2040 } 2041 } else { 2042 if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { 2043 op2_reg = IR_REG_NUM(op2_reg); 2044 ir_emit_load(ctx, type, op2_reg, op2); 2045 } 2046 switch (op_insn->op) { 2047 default: 2048 IR_ASSERT(0 && "NIY binary op"); 2049 case IR_ADD: 2050 | ASM_REG_REG_REG_OP add, type, reg, reg, op2_reg 2051 break; 2052 case IR_SUB: 2053 | ASM_REG_REG_REG_OP sub, type, reg, reg, op2_reg 2054 break; 2055 case IR_MUL: 2056 | ASM_REG_REG_REG_OP mul, type, reg, reg, op2_reg 2057 break; 2058 case IR_OR: 2059 | ASM_REG_REG_REG_OP orr, type, reg, reg, op2_reg 2060 break; 2061 case IR_AND: 2062 | ASM_REG_REG_REG_OP and, type, reg, reg, op2_reg 2063 break; 2064 case IR_XOR: 2065 | ASM_REG_REG_REG_OP eor, type, reg, reg, op2_reg 2066 break; 2067 } 2068 } 2069} 2070 2071static void ir_emit_mul_div_mod_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn) 2072{ 2073 ir_backend_data *data = ctx->data; 2074 dasm_State **Dst = &data->dasm_state; 2075 ir_type type = insn->type; 2076 ir_ref op1 = insn->op1; 2077 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 2078 ir_reg op1_reg = ctx->regs[def][1]; 2079 2080 IR_ASSERT(IR_IS_CONST_REF(insn->op2)); 2081 IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[insn->op2].op)); 2082 IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); 2083 2084 if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) { 2085 op1_reg = IR_REG_NUM(op1_reg); 2086 ir_emit_load(ctx, type, op1_reg, op1); 2087 } 2088 if (insn->op == IR_MUL) { 2089 uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64); 2090 if (shift == 1) { 2091 | ASM_REG_REG_REG_OP add, type, def_reg, op1_reg, op1_reg 2092 } else { 2093 | ASM_REG_REG_IMM_OP lsl, type, def_reg, op1_reg, shift 2094 } 2095 } else if (insn->op == IR_DIV) { 2096 uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64); 2097 | ASM_REG_REG_IMM_OP lsr, type, def_reg, op1_reg, shift 2098 } else { 2099 IR_ASSERT(insn->op == IR_MOD); 2100 uint64_t mask = ctx->ir_base[insn->op2].val.u64 - 1; 2101 | ASM_REG_REG_IMM_OP and, type, def_reg, op1_reg, mask 2102 } 2103 if (IR_REG_SPILLED(ctx->regs[def][0])) { 2104 ir_emit_store(ctx, type, def, def_reg); 2105 } 2106} 2107 2108static void ir_emit_sdiv_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn) 2109{ 2110 ir_backend_data *data = ctx->data; 2111 dasm_State **Dst = &data->dasm_state; 2112 ir_type type = insn->type; 2113 ir_ref op1 = insn->op1; 2114 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 2115 ir_reg op1_reg = ctx->regs[def][1]; 2116 ir_reg op2_reg = ctx->regs[def][2]; 2117 uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64); 2118 int64_t offset = ctx->ir_base[insn->op2].val.u64 - 1; 2119 2120 IR_ASSERT(shift != 0); 2121 IR_ASSERT(IR_IS_CONST_REF(insn->op2)); 2122 IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[insn->op2].op)); 2123 IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE && def_reg != op1_reg); 2124 2125 if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) { 2126 op1_reg = IR_REG_NUM(op1_reg); 2127 ir_emit_load(ctx, type, op1_reg, op1); 2128 } 2129 2130 if (op2_reg != IR_REG_NONE) { 2131 ir_emit_load_imm_int(ctx, type, op2_reg, offset); 2132 } 2133 2134 if (ir_type_size[type] == 8) { 2135 | cmp Rx(op1_reg), #0 2136 if (op2_reg != IR_REG_NONE) { 2137 | add Rx(def_reg), Rx(op1_reg), Rx(op2_reg) 2138 } else { 2139 | add Rx(def_reg), Rx(op1_reg), #offset 2140 } 2141 | csel Rx(def_reg), Rx(def_reg), Rx(op1_reg), lt 2142 | asr Rx(def_reg), Rx(def_reg), #shift 2143 } else { 2144 | cmp Rw(op1_reg), #0 2145 if (op2_reg != IR_REG_NONE) { 2146 | add Rw(def_reg), Rw(op1_reg), Rw(op2_reg) 2147 } else { 2148 | add Rw(def_reg), Rw(op1_reg), #offset 2149 } 2150 | csel Rw(def_reg), Rw(def_reg), Rw(op1_reg), lt 2151 if (ir_type_size[type] == 4) { 2152 | asr Rw(def_reg), Rw(def_reg), #shift 2153 } else if (ir_type_size[type] == 2) { 2154 | ubfx Rw(def_reg), Rw(def_reg), #shift, #16 2155 } else { 2156 IR_ASSERT(ir_type_size[type] == 1); 2157 | ubfx Rw(def_reg), Rw(def_reg), #shift, #8 2158 } 2159 } 2160 if (IR_REG_SPILLED(ctx->regs[def][0])) { 2161 ir_emit_store(ctx, type, def, def_reg); 2162 } 2163} 2164 2165static void ir_emit_smod_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn) 2166{ 2167 ir_backend_data *data = ctx->data; 2168 dasm_State **Dst = &data->dasm_state; 2169 ir_type type = insn->type; 2170 ir_ref op1 = insn->op1; 2171 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 2172 ir_reg op1_reg = ctx->regs[def][1]; 2173 ir_reg tmp_reg = ctx->regs[def][3]; 2174// uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64); 2175 uint64_t mask = ctx->ir_base[insn->op2].val.u64 - 1; 2176 2177 IR_ASSERT(mask != 0); 2178 IR_ASSERT(IR_IS_CONST_REF(insn->op2)); 2179 IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[insn->op2].op)); 2180 IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE && def_reg != tmp_reg); 2181 2182 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 2183 op1_reg = IR_REG_NUM(op1_reg); 2184 ir_emit_load(ctx, type, op1_reg, op1); 2185 } 2186 if (def_reg != op1_reg) { 2187 if (op1_reg != IR_REG_NONE) { 2188 ir_emit_mov(ctx, type, def_reg, op1_reg); 2189 } else { 2190 ir_emit_load(ctx, type, def_reg, op1); 2191 } 2192 } 2193 2194// | ASM_REG_REG_IMM_OP asr, type, tmp_reg, def_reg, (ir_type_size[type]*8-1) 2195// | ASM_REG_REG_IMM_OP lsr, type, tmp_reg, tmp_reg, (ir_type_size[type]*8-shift) 2196// | ASM_REG_REG_REG_OP add, type, def_reg, def_reg, tmp_reg 2197// | ASM_REG_REG_IMM_OP and, type, def_reg, def_reg, mask 2198// | ASM_REG_REG_REG_OP sub, type, def_reg, def_reg, tmp_reg 2199 2200 | ASM_REG_REG_OP negs, type, tmp_reg, def_reg 2201 | ASM_REG_REG_IMM_OP and, type, def_reg, def_reg, mask 2202 | ASM_REG_REG_IMM_OP and, type, tmp_reg, tmp_reg, mask 2203 | ASM_REG_REG_REG_TXT_OP csneg, type, def_reg, def_reg, tmp_reg, mi 2204 2205 if (IR_REG_SPILLED(ctx->regs[def][0])) { 2206 ir_emit_store(ctx, type, def, def_reg); 2207 } 2208} 2209 2210static void ir_emit_shift(ir_ctx *ctx, ir_ref def, ir_insn *insn) 2211{ 2212 ir_backend_data *data = ctx->data; 2213 dasm_State **Dst = &data->dasm_state; 2214 ir_type type = insn->type; 2215 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 2216 ir_reg op1_reg = ctx->regs[def][1]; 2217 ir_reg op2_reg = ctx->regs[def][2]; 2218 ir_reg tmp_reg; 2219 2220 IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); 2221 if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(insn->op1)) { 2222 op1_reg = IR_REG_NUM(op1_reg); 2223 ir_emit_load(ctx, type, op1_reg, insn->op1); 2224 } 2225 if (IR_REG_SPILLED(op2_reg)) { 2226 op2_reg = IR_REG_NUM(op2_reg); 2227 ir_emit_load(ctx, type, op2_reg, insn->op2); 2228 } 2229 switch (insn->op) { 2230 default: 2231 IR_ASSERT(0); 2232 case IR_SHL: 2233 if (ir_type_size[type] == 1) { 2234 | and Rw(def_reg), Rw(op1_reg), #0xff 2235 | lsl Rw(def_reg), Rw(def_reg), Rw(op2_reg) 2236 } else if (ir_type_size[type] == 2) { 2237 | and Rw(def_reg), Rw(op1_reg), #0xffff 2238 | lsl Rw(def_reg), Rw(def_reg), Rw(op2_reg) 2239 } else { 2240 | ASM_REG_REG_REG_OP lsl, type, def_reg, op1_reg, op2_reg 2241 } 2242 break; 2243 case IR_SHR: 2244 if (ir_type_size[type] == 1) { 2245 | and Rw(def_reg), Rw(op1_reg), #0xff 2246 | lsr Rw(def_reg), Rw(def_reg), Rw(op2_reg) 2247 } else if (ir_type_size[type] == 2) { 2248 | and Rw(def_reg), Rw(op1_reg), #0xffff 2249 | lsr Rw(def_reg), Rw(def_reg), Rw(op2_reg) 2250 } else { 2251 | ASM_REG_REG_REG_OP lsr, type, def_reg, op1_reg, op2_reg 2252 } 2253 break; 2254 case IR_SAR: 2255 if (ir_type_size[type] == 1) { 2256 | sxtb Rw(def_reg), Rw(op1_reg) 2257 | asr Rw(def_reg), Rw(def_reg), Rw(op2_reg) 2258 } else if (ir_type_size[type] == 2) { 2259 | sxth Rw(def_reg), Rw(op1_reg) 2260 | asr Rw(def_reg), Rw(def_reg), Rw(op2_reg) 2261 } else { 2262 | ASM_REG_REG_REG_OP asr, type, def_reg, op1_reg, op2_reg 2263 } 2264 break; 2265 case IR_ROL: 2266 tmp_reg = ctx->regs[def][3]; 2267 IR_ASSERT(tmp_reg != IR_REG_NONE); 2268 if (ir_type_size[type] == 1) { 2269 | and Rw(def_reg), Rw(op1_reg), #0xff 2270 | add Rw(def_reg), Rw(def_reg), Rw(def_reg), lsl #8 2271 | add Rw(def_reg), Rw(def_reg), Rw(def_reg), lsl #16 2272 | neg Rw(tmp_reg), Rw(op2_reg) 2273 | ror Rw(def_reg), Rw(def_reg), Rw(tmp_reg) 2274 | and Rw(def_reg), Rw(def_reg), #0xff 2275 } else if (ir_type_size[type] == 2) { 2276 | and Rw(def_reg), Rw(op1_reg), #0xffff 2277 | add Rw(def_reg), Rw(def_reg), Rw(def_reg), lsl #16 2278 | neg Rw(tmp_reg), Rw(op2_reg) 2279 | ror Rw(def_reg), Rw(def_reg), Rw(tmp_reg) 2280 | and Rw(def_reg), Rw(def_reg), #0xffff 2281 } else if (ir_type_size[type] == 8) { 2282 | neg Rx(tmp_reg), Rx(op2_reg) 2283 | ror Rx(def_reg), Rx(op1_reg), Rx(tmp_reg) 2284 } else { 2285 | neg Rw(tmp_reg), Rw(op2_reg) 2286 | ror Rw(def_reg), Rw(op1_reg), Rw(tmp_reg) 2287 } 2288 break; 2289 case IR_ROR: 2290 if (ir_type_size[type] == 1) { 2291 tmp_reg = ctx->regs[def][3]; 2292 IR_ASSERT(tmp_reg != IR_REG_NONE); 2293 | and Rw(tmp_reg), Rw(op1_reg), #0xff 2294 | add Rw(tmp_reg), Rw(tmp_reg), Rw(tmp_reg), lsl #8 2295 | add Rw(tmp_reg), Rw(tmp_reg), Rw(tmp_reg), lsl #16 2296 | ror Rw(def_reg), Rw(tmp_reg), Rw(op2_reg) 2297 | and Rw(def_reg), Rw(def_reg), #0xff 2298 } else if (ir_type_size[type] == 2) { 2299 tmp_reg = ctx->regs[def][3]; 2300 IR_ASSERT(tmp_reg != IR_REG_NONE); 2301 | and Rw(tmp_reg), Rw(op1_reg), #0xffff 2302 | add Rw(tmp_reg), Rw(tmp_reg), Rw(tmp_reg), lsl #16 2303 | ror Rw(def_reg), Rw(tmp_reg), Rw(op2_reg) 2304 | and Rw(def_reg), Rw(def_reg), #0xffff 2305 } else { 2306 | ASM_REG_REG_REG_OP ror, type, def_reg, op1_reg, op2_reg 2307 } 2308 break; 2309 } 2310 if (IR_REG_SPILLED(ctx->regs[def][0])) { 2311 ir_emit_store(ctx, type, def, def_reg); 2312 } 2313} 2314 2315static void ir_emit_shift_const(ir_ctx *ctx, ir_ref def, ir_insn *insn) 2316{ 2317 ir_backend_data *data = ctx->data; 2318 dasm_State **Dst = &data->dasm_state; 2319 uint32_t shift = ctx->ir_base[insn->op2].val.u64; 2320 ir_type type = insn->type; 2321 ir_ref op1 = insn->op1; 2322 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 2323 ir_reg op1_reg = ctx->regs[def][1]; 2324 ir_reg tmp_reg; 2325 2326 IR_ASSERT(IR_IS_CONST_REF(insn->op2)); 2327 IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[insn->op2].op)); 2328 IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); 2329 2330 if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) { 2331 op1_reg = IR_REG_NUM(op1_reg); 2332 ir_emit_load(ctx, type, op1_reg, op1); 2333 } 2334 switch (insn->op) { 2335 default: 2336 IR_ASSERT(0); 2337 case IR_SHL: 2338 if (ir_type_size[type] == 1) { 2339 | ubfiz Rw(def_reg), Rw(op1_reg), #shift, #(8-shift) 2340 } else if (ir_type_size[type] == 2) { 2341 | ubfiz Rw(def_reg), Rw(op1_reg), #shift, #(16-shift) 2342 } else { 2343 | ASM_REG_REG_IMM_OP lsl, type, def_reg, op1_reg, shift 2344 } 2345 break; 2346 case IR_SHR: 2347 if (ir_type_size[type] == 1) { 2348 | ubfx Rw(def_reg), Rw(op1_reg), #shift, #(8-shift) 2349 } else if (ir_type_size[type] == 2) { 2350 | ubfx Rw(def_reg), Rw(op1_reg), #shift, #(16-shift) 2351 } else { 2352 | ASM_REG_REG_IMM_OP lsr, type, def_reg, op1_reg, shift 2353 } 2354 break; 2355 case IR_SAR: 2356 if (ir_type_size[type] == 1) { 2357 | sbfx Rw(def_reg), Rw(op1_reg), #shift, #(8-shift) 2358 } else if (ir_type_size[type] == 2) { 2359 | sbfx Rw(def_reg), Rw(op1_reg), #shift, #(16-shift) 2360 } else { 2361 | ASM_REG_REG_IMM_OP asr, type, def_reg, op1_reg, shift 2362 } 2363 break; 2364 case IR_ROL: 2365 if (ir_type_size[type] == 1) { 2366 tmp_reg = ctx->regs[def][3]; 2367 | ubfx Rw(tmp_reg), Rw(op1_reg), #(8-shift), #shift 2368 | orr Rw(def_reg), Rw(tmp_reg), Rw(op1_reg), lsl #shift 2369 } else if (ir_type_size[type] == 2) { 2370 tmp_reg = ctx->regs[def][3]; 2371 | ubfx Rw(tmp_reg), Rw(op1_reg), #(16-shift), #shift 2372 | orr Rw(def_reg), Rw(tmp_reg), Rw(op1_reg), lsl #shift 2373 } else if (ir_type_size[type] == 8) { 2374 shift = (64 - shift) % 64; 2375 | ror Rx(def_reg), Rx(op1_reg), #shift 2376 } else { 2377 shift = (32 - shift) % 32; 2378 | ror Rw(def_reg), Rw(op1_reg), #shift 2379 } 2380 break; 2381 case IR_ROR: 2382 if (ir_type_size[type] == 1) { 2383 tmp_reg = ctx->regs[def][3]; 2384 | ubfx Rw(tmp_reg), Rw(op1_reg), #shift, #(8-shift) 2385 | orr Rw(def_reg), Rw(tmp_reg), Rw(op1_reg), lsl #(8-shift) 2386 } else if (ir_type_size[type] == 2) { 2387 tmp_reg = ctx->regs[def][3]; 2388 | ubfx Rw(tmp_reg), Rw(op1_reg), #shift, #(16-shift) 2389 | orr Rw(def_reg), Rw(tmp_reg), Rw(op1_reg), lsl #(16-shift) 2390 } else { 2391 | ASM_REG_REG_IMM_OP ror, type, def_reg, op1_reg, shift 2392 } 2393 break; 2394 } 2395 if (IR_REG_SPILLED(ctx->regs[def][0])) { 2396 ir_emit_store(ctx, type, def, def_reg); 2397 } 2398} 2399 2400static void ir_emit_op_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 2401{ 2402 ir_backend_data *data = ctx->data; 2403 dasm_State **Dst = &data->dasm_state; 2404 ir_type type = insn->type; 2405 ir_ref op1 = insn->op1; 2406 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 2407 ir_reg op1_reg = ctx->regs[def][1]; 2408 2409 IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); 2410 2411 if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) { 2412 op1_reg = IR_REG_NUM(op1_reg); 2413 ir_emit_load(ctx, type, op1_reg, op1); 2414 } 2415 if (insn->op == IR_NOT) { 2416 if (insn->type == IR_BOOL) { 2417 | ASM_REG_IMM_OP cmp, type, op1_reg, 0 2418 | cset Rw(def_reg), eq 2419 } else { 2420 | ASM_REG_REG_OP mvn, insn->type, def_reg, op1_reg 2421 } 2422 } else if (insn->op == IR_NEG) { 2423 | ASM_REG_REG_OP neg, insn->type, def_reg, op1_reg 2424 } else if (insn->op == IR_ABS) { 2425 if (ir_type_size[type] == 8) { 2426 | cmp Rx(op1_reg), #0 2427 | cneg Rx(def_reg), Rx(op1_reg), lt 2428 } else { 2429 | cmp Rw(op1_reg), #0 2430 | cneg Rw(def_reg), Rw(op1_reg), lt 2431 } 2432 } else if (insn->op == IR_CTLZ) { 2433 if (ir_type_size[type] == 1) { 2434 | and Rw(def_reg), Rw(op1_reg), #0xff 2435 | clz Rw(def_reg), Rw(def_reg) 2436 | sub Rw(def_reg), Rw(def_reg), #24 2437 } else if (ir_type_size[type] == 2) { 2438 | and Rw(def_reg), Rw(op1_reg), #0xffff 2439 | clz Rw(def_reg), Rw(def_reg) 2440 | sub Rw(def_reg), Rw(def_reg), #16 2441 } else { 2442 | ASM_REG_REG_OP clz, type, def_reg, op1_reg 2443 } 2444 } else if (insn->op == IR_CTTZ) { 2445 | ASM_REG_REG_OP rbit, insn->type, def_reg, op1_reg 2446 | ASM_REG_REG_OP clz, insn->type, def_reg, def_reg 2447 } else { 2448 IR_ASSERT(insn->op == IR_BSWAP); 2449 | ASM_REG_REG_OP rev, insn->type, def_reg, op1_reg 2450 } 2451 if (IR_REG_SPILLED(ctx->regs[def][0])) { 2452 ir_emit_store(ctx, type, def, def_reg); 2453 } 2454} 2455 2456static void ir_emit_ctpop(ir_ctx *ctx, ir_ref def, ir_insn *insn) 2457{ 2458 ir_backend_data *data = ctx->data; 2459 dasm_State **Dst = &data->dasm_state; 2460 ir_type type = insn->type; 2461 ir_ref op1 = insn->op1; 2462 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 2463 ir_reg op1_reg = ctx->regs[def][1]; 2464 ir_reg tmp_reg = ctx->regs[def][2]; 2465 uint32_t code1 = 0x0e205800 | (tmp_reg-IR_REG_FP_FIRST); // cnt v0.8b, v0.8b 2466 uint32_t code2 = 0x0e31b800 | (tmp_reg-IR_REG_FP_FIRST); // addv b0, v0.8b 2467 2468 IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); 2469 2470 if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) { 2471 op1_reg = IR_REG_NUM(op1_reg); 2472 ir_emit_load(ctx, type, op1_reg, op1); 2473 } 2474 switch (ir_type_size[insn->type]) { 2475 default: 2476 IR_ASSERT(0); 2477 case 1: 2478 | and Rw(def_reg), Rw(op1_reg), #0xff 2479 | fmov Rs(tmp_reg-IR_REG_FP_FIRST), Rw(def_reg) 2480 | .long code1 // cnt v0.8b, v0.8b 2481 | .long code2 // addv b0, v0.8b 2482 | fmov Rw(def_reg), Rs(tmp_reg-IR_REG_FP_FIRST) 2483 break; 2484 case 2: 2485 | and Rw(def_reg), Rw(op1_reg), #0xffff 2486 | fmov Rs(tmp_reg-IR_REG_FP_FIRST), Rw(def_reg) 2487 | .long code1 // cnt v0.8b, v0.8b 2488 | .long code2 // addv b0, v0.8b 2489 | fmov Rw(def_reg), Rs(tmp_reg-IR_REG_FP_FIRST) 2490 break; 2491 case 4: 2492 | fmov Rs(tmp_reg-IR_REG_FP_FIRST), Rw(op1_reg) 2493 | .long code1 // cnt v0.8b, v0.8b 2494 | .long code2 // addv b0, v0.8b 2495 | fmov Rw(def_reg), Rs(tmp_reg-IR_REG_FP_FIRST) 2496 break; 2497 case 8: 2498 | fmov Rd(tmp_reg-IR_REG_FP_FIRST), Rx(op1_reg) 2499 | .long code1 // cnt v0.8b, v0.8b 2500 | .long code2 // addv b0, v0.8b 2501 | fmov Rx(def_reg), Rd(tmp_reg-IR_REG_FP_FIRST) 2502 break; 2503 } 2504 if (IR_REG_SPILLED(ctx->regs[def][0])) { 2505 ir_emit_store(ctx, type, def, def_reg); 2506 } 2507} 2508 2509static void ir_emit_op_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) 2510{ 2511 ir_backend_data *data = ctx->data; 2512 dasm_State **Dst = &data->dasm_state; 2513 ir_type type = insn->type; 2514 ir_ref op1 = insn->op1; 2515 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 2516 ir_reg op1_reg = ctx->regs[def][1]; 2517 2518 IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); 2519 2520 if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) { 2521 op1_reg = IR_REG_NUM(op1_reg); 2522 ir_emit_load(ctx, type, op1_reg, op1); 2523 } 2524 if (insn->op == IR_NEG) { 2525 if (type == IR_DOUBLE) { 2526 | fneg Rd(def_reg-IR_REG_FP_FIRST), Rd(op1_reg-IR_REG_FP_FIRST) 2527 } else { 2528 IR_ASSERT(type == IR_FLOAT); 2529 | fneg Rs(def_reg-IR_REG_FP_FIRST), Rs(op1_reg-IR_REG_FP_FIRST) 2530 } 2531 } else { 2532 IR_ASSERT(insn->op == IR_ABS); 2533 if (type == IR_DOUBLE) { 2534 | fabs Rd(def_reg-IR_REG_FP_FIRST), Rd(op1_reg-IR_REG_FP_FIRST) 2535 } else { 2536 IR_ASSERT(type == IR_FLOAT); 2537 | fabs Rs(def_reg-IR_REG_FP_FIRST), Rs(op1_reg-IR_REG_FP_FIRST) 2538 } 2539 } 2540 if (IR_REG_SPILLED(ctx->regs[def][0])) { 2541 ir_emit_store(ctx, insn->type, def, def_reg); 2542 } 2543} 2544 2545static void ir_emit_binop_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) 2546{ 2547 ir_backend_data *data = ctx->data; 2548 dasm_State **Dst = &data->dasm_state; 2549 ir_type type = insn->type; 2550 ir_ref op1 = insn->op1; 2551 ir_ref op2 = insn->op2; 2552 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 2553 ir_reg op1_reg = ctx->regs[def][1]; 2554 ir_reg op2_reg = ctx->regs[def][2]; 2555 2556 IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); 2557 if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) { 2558 op1_reg = IR_REG_NUM(op1_reg); 2559 ir_emit_load(ctx, type, op1_reg, op1); 2560 } 2561 if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { 2562 op2_reg = IR_REG_NUM(op2_reg); 2563 if (op1 != op2) { 2564 ir_emit_load(ctx, type, op2_reg, op2); 2565 } 2566 } 2567 switch (insn->op) { 2568 default: 2569 IR_ASSERT(0 && "NIY binary op"); 2570 case IR_ADD: 2571 | ASM_FP_REG_REG_REG_OP fadd, type, def_reg, op1_reg, op2_reg 2572 break; 2573 case IR_SUB: 2574 | ASM_FP_REG_REG_REG_OP fsub, type, def_reg, op1_reg, op2_reg 2575 break; 2576 case IR_MUL: 2577 | ASM_FP_REG_REG_REG_OP fmul, type, def_reg, op1_reg, op2_reg 2578 break; 2579 case IR_DIV: 2580 | ASM_FP_REG_REG_REG_OP fdiv, type, def_reg, op1_reg, op2_reg 2581 break; 2582 case IR_MIN: 2583 | ASM_FP_REG_REG_REG_OP fmin, type, def_reg, op1_reg, op2_reg 2584 break; 2585 case IR_MAX: 2586 | ASM_FP_REG_REG_REG_OP fmax, type, def_reg, op1_reg, op2_reg 2587 break; 2588 } 2589 if (IR_REG_SPILLED(ctx->regs[def][0])) { 2590 ir_emit_store(ctx, insn->type, def, def_reg); 2591 } 2592} 2593 2594static void ir_emit_fix_type(ir_ctx *ctx, ir_type type, ir_reg op1_reg) 2595{ 2596 ir_backend_data *data = ctx->data; 2597 dasm_State **Dst = &data->dasm_state; 2598 2599 // TODO: prevent repeatable sign/zero extension ??? 2600 if (ir_type_size[type] == 2) { 2601 if (IR_IS_TYPE_SIGNED(type)) { 2602 | sxth Rw(op1_reg), Rw(op1_reg) 2603 } else { 2604 | uxth Rw(op1_reg), Rw(op1_reg) 2605 } 2606 } else if (ir_type_size[type] == 1) { 2607 if (IR_IS_TYPE_SIGNED(type)) { 2608 | sxtb Rw(op1_reg), Rw(op1_reg) 2609 } else { 2610 | uxtb Rw(op1_reg), Rw(op1_reg) 2611 } 2612 } 2613} 2614 2615static void ir_emit_cmp_int_common(ir_ctx *ctx, ir_type type, ir_reg op1_reg, ir_ref op1, ir_reg op2_reg, ir_ref op2) 2616{ 2617 ir_backend_data *data = ctx->data; 2618 dasm_State **Dst = &data->dasm_state; 2619 2620 IR_ASSERT(op1_reg != IR_REG_NONE); 2621 if (ir_type_size[type] < 4) { 2622 ir_emit_fix_type(ctx, type, op1_reg); 2623 } 2624 if (op2_reg != IR_REG_NONE) { 2625 if (ir_type_size[type] == 8) { 2626 | cmp Rx(op1_reg), Rx(op2_reg) 2627 } else if (ir_type_size[type] == 4) { 2628 | cmp Rw(op1_reg), Rw(op2_reg) 2629 } else if (ir_type_size[type] == 2) { 2630 if (IR_IS_TYPE_SIGNED(type)) { 2631 | cmp Rw(op1_reg), Rw(op2_reg), sxth 2632 } else { 2633 | cmp Rw(op1_reg), Rw(op2_reg), uxth 2634 } 2635 } else if (ir_type_size[type] == 1) { 2636 if (IR_IS_TYPE_SIGNED(type)) { 2637 | cmp Rw(op1_reg), Rw(op2_reg), sxtb 2638 } else { 2639 | cmp Rw(op1_reg), Rw(op2_reg), uxtb 2640 } 2641 } else { 2642 IR_ASSERT(0); 2643 } 2644 } else { 2645 IR_ASSERT(IR_IS_CONST_REF(op2)); 2646 IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[op2].op)); 2647 int32_t val = ctx->ir_base[op2].val.i32; 2648 2649 if (ir_type_size[type] == 8) { 2650 | cmp Rx(op1_reg), #val 2651 } else { 2652 | cmp Rw(op1_reg), #val 2653 } 2654 } 2655} 2656 2657static void ir_emit_cmp_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 2658{ 2659 ir_backend_data *data = ctx->data; 2660 dasm_State **Dst = &data->dasm_state; 2661 ir_type type = ctx->ir_base[insn->op1].type; 2662 ir_op op = insn->op; 2663 ir_ref op1 = insn->op1; 2664 ir_ref op2 = insn->op2; 2665 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 2666 ir_reg op1_reg = ctx->regs[def][1]; 2667 ir_reg op2_reg = ctx->regs[def][2]; 2668 2669 IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); 2670 if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) { 2671 op1_reg = IR_REG_NUM(op1_reg); 2672 ir_emit_load(ctx, type, op1_reg, op1); 2673 } 2674 if (op2_reg != IR_REG_NONE) { 2675 if (IR_REG_SPILLED(op2_reg)) { 2676 op2_reg = IR_REG_NUM(op2_reg); 2677 if (op1 != op2) { 2678 ir_emit_load(ctx, type, op2_reg, op2); 2679 } 2680 } 2681 if (IR_IS_CONST_REF(op2)) { 2682 ir_emit_load(ctx, type, op2_reg, op2); 2683 } 2684 } 2685 if (IR_IS_CONST_REF(insn->op2) 2686 && !IR_IS_SYM_CONST(ctx->ir_base[insn->op2].op) 2687 && ctx->ir_base[insn->op2].val.u64 == 0) { 2688 if (op == IR_ULT) { 2689 /* always false */ 2690 ir_emit_load_imm_int(ctx, IR_BOOL, def_reg, 0); 2691 if (IR_REG_SPILLED(ctx->regs[def][0])) { 2692 ir_emit_store(ctx, insn->type, def, def_reg); 2693 } 2694 return; 2695 } else if (op == IR_UGE) { 2696 /* always true */ 2697 ir_emit_load_imm_int(ctx, IR_BOOL, def_reg, 1); 2698 if (IR_REG_SPILLED(ctx->regs[def][0])) { 2699 ir_emit_store(ctx, insn->type, def, def_reg); 2700 } 2701 return; 2702 } else if (op == IR_ULE) { 2703 op = IR_EQ; 2704 } else if (op == IR_UGT) { 2705 op = IR_NE; 2706 } 2707 } 2708 ir_emit_cmp_int_common(ctx, type, op1_reg, op1, op2_reg, op2); 2709 switch (op) { 2710 default: 2711 IR_ASSERT(0 && "NIY binary op"); 2712 case IR_EQ: 2713 | cset Rw(def_reg), eq 2714 break; 2715 case IR_NE: 2716 | cset Rw(def_reg), ne 2717 break; 2718 case IR_LT: 2719 | cset Rw(def_reg), lt 2720 break; 2721 case IR_GE: 2722 | cset Rw(def_reg), ge 2723 break; 2724 case IR_LE: 2725 | cset Rw(def_reg), le 2726 break; 2727 case IR_GT: 2728 | cset Rw(def_reg), gt 2729 break; 2730 case IR_ULT: 2731 | cset Rw(def_reg), lo 2732 break; 2733 case IR_UGE: 2734 | cset Rw(def_reg), hs 2735 break; 2736 case IR_ULE: 2737 | cset Rw(def_reg), ls 2738 break; 2739 case IR_UGT: 2740 | cset Rw(def_reg), hi 2741 break; 2742 } 2743 if (IR_REG_SPILLED(ctx->regs[def][0])) { 2744 ir_emit_store(ctx, insn->type, def, def_reg); 2745 } 2746} 2747 2748static ir_op ir_emit_cmp_fp_common(ir_ctx *ctx, ir_ref cmp_ref, ir_insn *cmp_insn) 2749{ 2750 ir_backend_data *data = ctx->data; 2751 dasm_State **Dst = &data->dasm_state; 2752 ir_type type = ctx->ir_base[cmp_insn->op1].type; 2753 ir_op op = cmp_insn->op; 2754 ir_ref op1, op2; 2755 ir_reg op1_reg, op2_reg; 2756 2757 if (op == IR_LT || op == IR_LE) { 2758 /* swap operands to avoid P flag check */ 2759 op ^= 3; 2760 op1 = cmp_insn->op2; 2761 op2 = cmp_insn->op1; 2762 op1_reg = ctx->regs[cmp_ref][2]; 2763 op2_reg = ctx->regs[cmp_ref][1]; 2764 } else { 2765 op1 = cmp_insn->op1; 2766 op2 = cmp_insn->op2; 2767 op1_reg = ctx->regs[cmp_ref][1]; 2768 op2_reg = ctx->regs[cmp_ref][2]; 2769 } 2770 2771 IR_ASSERT(op1_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); 2772 if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) { 2773 op1_reg = IR_REG_NUM(op1_reg); 2774 ir_emit_load(ctx, type, op1_reg, op1); 2775 } 2776 if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { 2777 op2_reg = IR_REG_NUM(op2_reg); 2778 if (op1 != op2) { 2779 ir_emit_load(ctx, type, op2_reg, op2); 2780 } 2781 } 2782 if (type == IR_DOUBLE) { 2783 | fcmp Rd(op1_reg-IR_REG_FP_FIRST), Rd(op2_reg-IR_REG_FP_FIRST) 2784 } else { 2785 IR_ASSERT(type == IR_FLOAT); 2786 | fcmp Rs(op1_reg-IR_REG_FP_FIRST), Rs(op2_reg-IR_REG_FP_FIRST) 2787 } 2788 return op; 2789} 2790 2791static void ir_emit_cmp_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) 2792{ 2793 ir_backend_data *data = ctx->data; 2794 dasm_State **Dst = &data->dasm_state; 2795 ir_op op = ir_emit_cmp_fp_common(ctx, def, insn); 2796 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 2797//??? ir_reg tmp_reg = ctx->regs[def][3]; // TODO: take into account vs flag 2798 2799 IR_ASSERT(def_reg != IR_REG_NONE); 2800 switch (op) { 2801 default: 2802 IR_ASSERT(0 && "NIY binary op"); 2803 case IR_EQ: 2804 | cset Rw(def_reg), eq 2805 break; 2806 case IR_NE: 2807 | cset Rw(def_reg), ne 2808 break; 2809 case IR_LT: 2810 | cset Rw(def_reg), mi 2811 break; 2812 case IR_GE: 2813 | cset Rw(def_reg), ge 2814 break; 2815 case IR_LE: 2816 | cset Rw(def_reg), ls 2817 break; 2818 case IR_GT: 2819 | cset Rw(def_reg), gt 2820 break; 2821 case IR_ULT: 2822 | cset Rw(def_reg), lt 2823 break; 2824 case IR_UGE: 2825 | cset Rw(def_reg), hs 2826 break; 2827 case IR_ULE: 2828 | cset Rw(def_reg), le 2829 break; 2830 case IR_UGT: 2831 | cset Rw(def_reg), hi 2832 break; 2833 } 2834 if (IR_REG_SPILLED(ctx->regs[def][0])) { 2835 ir_emit_store(ctx, insn->type, def, def_reg); 2836 } 2837} 2838 2839static void ir_emit_jmp_true(ir_ctx *ctx, uint32_t b, ir_ref def) 2840{ 2841 uint32_t true_block, false_block, next_block; 2842 ir_backend_data *data = ctx->data; 2843 dasm_State **Dst = &data->dasm_state; 2844 2845 ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block); 2846 if (true_block != next_block) { 2847 | b =>true_block 2848 } 2849} 2850 2851static void ir_emit_jmp_false(ir_ctx *ctx, uint32_t b, ir_ref def) 2852{ 2853 uint32_t true_block, false_block, next_block; 2854 ir_backend_data *data = ctx->data; 2855 dasm_State **Dst = &data->dasm_state; 2856 2857 ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block); 2858 if (false_block != next_block) { 2859 | b =>false_block 2860 } 2861} 2862 2863static void ir_emit_jz(ir_ctx *ctx, uint8_t op, uint32_t b, ir_type type, ir_reg reg) 2864{ 2865 uint32_t true_block, false_block, next_block; 2866 ir_backend_data *data = ctx->data; 2867 dasm_State **Dst = &data->dasm_state; 2868 2869 ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block); 2870 if (true_block == next_block) { 2871 IR_ASSERT(op < IR_LT); 2872 op ^= 1; // reverse 2873 true_block = false_block; 2874 false_block = 0; 2875 } else if (false_block == next_block) { 2876 false_block = 0; 2877 } 2878 2879 if (op == IR_EQ) { 2880 if (ir_type_size[type] == 8) { 2881 | cbz Rx(reg), =>true_block 2882 } else { 2883 | cbz Rw(reg), =>true_block 2884 } 2885 } else { 2886 IR_ASSERT(op == IR_NE); 2887 if (ir_type_size[type] == 8) { 2888 | cbnz Rx(reg), =>true_block 2889 } else { 2890 | cbnz Rw(reg), =>true_block 2891 } 2892 } 2893 if (false_block) { 2894 | b =>false_block 2895 } 2896} 2897 2898static void ir_emit_jcc(ir_ctx *ctx, uint8_t op, uint32_t b, ir_ref def, ir_insn *insn, bool int_cmp) 2899{ 2900 uint32_t true_block, false_block, next_block; 2901 ir_backend_data *data = ctx->data; 2902 dasm_State **Dst = &data->dasm_state; 2903 2904 ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block); 2905 if (true_block == next_block) { 2906 /* swap to avoid unconditional JMP */ 2907 if (int_cmp || op == IR_EQ || op == IR_NE) { 2908 op ^= 1; // reverse 2909 } else { 2910 op ^= 5; // reverse 2911 } 2912 true_block = false_block; 2913 false_block = 0; 2914 } else if (false_block == next_block) { 2915 false_block = 0; 2916 } 2917 2918 if (int_cmp) { 2919 switch (op) { 2920 default: 2921 IR_ASSERT(0 && "NIY binary op"); 2922 case IR_EQ: 2923 | beq =>true_block 2924 break; 2925 case IR_NE: 2926 | bne =>true_block 2927 break; 2928 case IR_LT: 2929 | blt =>true_block 2930 break; 2931 case IR_GE: 2932 | bge =>true_block 2933 break; 2934 case IR_LE: 2935 | ble =>true_block 2936 break; 2937 case IR_GT: 2938 | bgt =>true_block 2939 break; 2940 case IR_ULT: 2941 | blo =>true_block 2942 break; 2943 case IR_UGE: 2944 | bhs =>true_block 2945 break; 2946 case IR_ULE: 2947 | bls =>true_block 2948 break; 2949 case IR_UGT: 2950 | bhi =>true_block 2951 break; 2952 } 2953 } else { 2954 switch (op) { 2955 default: 2956 IR_ASSERT(0 && "NIY binary op"); 2957 case IR_EQ: 2958 | beq =>true_block 2959 break; 2960 case IR_NE: 2961 | bne =>true_block 2962 break; 2963 case IR_LT: 2964 | bmi =>true_block 2965 break; 2966 case IR_GE: 2967 | bge =>true_block 2968 break; 2969 case IR_LE: 2970 | bls =>true_block 2971 break; 2972 case IR_GT: 2973 | bgt =>true_block 2974 break; 2975 case IR_ULT: 2976 | blt =>true_block 2977 break; 2978 case IR_UGE: 2979 | bhs =>true_block 2980 break; 2981 case IR_ULE: 2982 | ble =>true_block 2983 break; 2984 case IR_UGT: 2985 | bhi =>true_block 2986 break; 2987// case IR_ULT: fprintf(stderr, "\tjb .LL%d\n", true_block); break; 2988// case IR_UGE: fprintf(stderr, "\tjae .LL%d\n", true_block); break; 2989// case IR_ULE: fprintf(stderr, "\tjbe .LL%d\n", true_block); break; 2990// case IR_UGT: fprintf(stderr, "\tja .LL%d\n", true_block); break; 2991 } 2992 } 2993 if (false_block) { 2994 | b =>false_block 2995 } 2996} 2997 2998static void ir_emit_cmp_and_branch_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) 2999{ 3000 ir_insn *cmp_insn = &ctx->ir_base[insn->op2]; 3001 ir_op op = cmp_insn->op; 3002 ir_type type = ctx->ir_base[cmp_insn->op1].type; 3003 ir_ref op1 = cmp_insn->op1; 3004 ir_ref op2 = cmp_insn->op2; 3005 ir_reg op1_reg = ctx->regs[insn->op2][1]; 3006 ir_reg op2_reg = ctx->regs[insn->op2][2]; 3007 3008 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 3009 op1_reg = IR_REG_NUM(op1_reg); 3010 ir_emit_load(ctx, type, op1_reg, op1); 3011 } 3012 if (op1_reg != IR_REG_NONE && IR_IS_CONST_REF(op1)) { 3013 ir_emit_load(ctx, type, op1_reg, op1); 3014 } 3015 if (op2_reg != IR_REG_NONE) { 3016 if (IR_REG_SPILLED(op2_reg)) { 3017 op2_reg = IR_REG_NUM(op2_reg); 3018 if (op1 != op2) { 3019 ir_emit_load(ctx, type, op2_reg, op2); 3020 } 3021 } 3022 if (IR_IS_CONST_REF(op2)) { 3023 ir_emit_load(ctx, type, op2_reg, op2); 3024 } 3025 } 3026 if (IR_IS_CONST_REF(op2) 3027 && !IR_IS_SYM_CONST(ctx->ir_base[op2].op) 3028 && ctx->ir_base[op2].val.u64 == 0) { 3029 if (op == IR_ULT) { 3030 /* always false */ 3031 ir_emit_jmp_false(ctx, b, def); 3032 return; 3033 } else if (op == IR_UGE) { 3034 /* always true */ 3035 ir_emit_jmp_true(ctx, b, def); 3036 return; 3037 } else if (op == IR_ULE) { 3038 op = IR_EQ; 3039 } else if (op == IR_UGT) { 3040 op = IR_NE; 3041 } 3042 if (op1_reg != IR_REG_NONE && (op == IR_EQ || op == IR_NE)) { 3043 ir_emit_jz(ctx, op, b, type, op1_reg); 3044 return; 3045 } 3046 } 3047 ir_emit_cmp_int_common(ctx, type, op1_reg, op1, op2_reg, op2); 3048 ir_emit_jcc(ctx, op, b, def, insn, 1); 3049} 3050 3051static void ir_emit_cmp_and_branch_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) 3052{ 3053 ir_op op = ir_emit_cmp_fp_common(ctx, insn->op2, &ctx->ir_base[insn->op2]); 3054 ir_emit_jcc(ctx, op, b, def, insn, 0); 3055} 3056 3057static void ir_emit_if_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) 3058{ 3059 ir_type type = ctx->ir_base[insn->op2].type; 3060 ir_reg op2_reg = ctx->regs[def][2]; 3061 ir_backend_data *data = ctx->data; 3062 dasm_State **Dst = &data->dasm_state; 3063 3064 if (IR_IS_CONST_REF(insn->op2)) { 3065 uint32_t true_block, false_block, next_block; 3066 3067 ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block); 3068 if (ir_const_is_true(&ctx->ir_base[insn->op2])) { 3069 if (true_block != next_block) { 3070 | b =>true_block 3071 } 3072 } else { 3073 if (false_block != next_block) { 3074 | b =>false_block 3075 } 3076 } 3077 return; 3078 } 3079 IR_ASSERT(op2_reg != IR_REG_NONE); 3080 if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(insn->op2)) { 3081 op2_reg = IR_REG_NUM(op2_reg); 3082 ir_emit_load(ctx, type, op2_reg, insn->op2); 3083 } 3084 | ASM_REG_IMM_OP cmp, type, op2_reg, 0 3085 ir_emit_jcc(ctx, IR_NE, b, def, insn, 1); 3086} 3087 3088static void ir_emit_cond(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3089{ 3090 ir_backend_data *data = ctx->data; 3091 dasm_State **Dst = &data->dasm_state; 3092 ir_type type = insn->type; 3093 ir_ref op1 = insn->op1; 3094 ir_ref op2 = insn->op2; 3095 ir_ref op3 = insn->op3; 3096 ir_type op1_type = ctx->ir_base[op1].type; 3097 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3098 ir_reg op1_reg = ctx->regs[def][1]; 3099 ir_reg op2_reg = ctx->regs[def][2]; 3100 ir_reg op3_reg = ctx->regs[def][3]; 3101 3102 IR_ASSERT(def_reg != IR_REG_NONE); 3103 3104 if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { 3105 op2_reg = IR_REG_NUM(op2_reg); 3106 ir_emit_load(ctx, type, op2_reg, op2); 3107 if (op1 == op2) { 3108 op1_reg = op2_reg; 3109 } 3110 if (op3 == op2) { 3111 op3_reg = op2_reg; 3112 } 3113 } 3114 if (op3 != op2 && (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(op3))) { 3115 op3_reg = IR_REG_NUM(op3_reg); 3116 ir_emit_load(ctx, type, op3_reg, op3); 3117 if (op1 == op2) { 3118 op1_reg = op3_reg; 3119 } 3120 } 3121 if (op1 != op2 && op1 != op3 && (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1))) { 3122 op1_reg = IR_REG_NUM(op1_reg); 3123 ir_emit_load(ctx, op1_type, op1_reg, op1); 3124 } 3125 3126 if (IR_IS_TYPE_INT(op1_type)) { 3127 | ASM_REG_IMM_OP cmp, op1_type, op1_reg, 0 3128 } else{ 3129 | ASM_FP_REG_IMM_OP fcmp, op1_type, op1_reg, 0.0 3130 } 3131 3132 if (IR_IS_TYPE_INT(type)) { 3133 if (ir_type_size[type] == 8) { 3134 | csel Rx(def_reg), Rx(op2_reg), Rx(op3_reg), ne 3135 } else { 3136 | csel Rw(def_reg), Rw(op2_reg), Rw(op3_reg), ne 3137 } 3138 } else{ 3139 if (type == IR_DOUBLE) { 3140 | fcsel Rd(def_reg-IR_REG_FP_FIRST), Rd(op2_reg-IR_REG_FP_FIRST), Rd(op3_reg-IR_REG_FP_FIRST), ne 3141 } else { 3142 | fcsel Rs(def_reg-IR_REG_FP_FIRST), Rs(op2_reg-IR_REG_FP_FIRST), Rs(op3_reg-IR_REG_FP_FIRST), ne 3143 } 3144 } 3145 3146 if (IR_REG_SPILLED(ctx->regs[def][0])) { 3147 ir_emit_store(ctx, type, def, def_reg); 3148 } 3149} 3150 3151static void ir_emit_return_void(ir_ctx *ctx) 3152{ 3153 ir_backend_data *data = ctx->data; 3154 dasm_State **Dst = &data->dasm_state; 3155 3156 ir_emit_epilogue(ctx); 3157 | ret 3158} 3159 3160static void ir_emit_return_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn) 3161{ 3162 ir_reg op2_reg = ctx->regs[ref][2]; 3163 3164 if (op2_reg != IR_REG_INT_RET1) { 3165 ir_type type = ctx->ir_base[insn->op2].type; 3166 3167 if (op2_reg != IR_REG_NONE && !IR_REG_SPILLED(op2_reg)) { 3168 ir_emit_mov(ctx, type, IR_REG_INT_RET1, op2_reg); 3169 } else { 3170 ir_emit_load(ctx, type, IR_REG_INT_RET1, insn->op2); 3171 } 3172 } 3173 ir_emit_return_void(ctx); 3174} 3175 3176static void ir_emit_return_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn) 3177{ 3178 ir_reg op2_reg = ctx->regs[ref][2]; 3179 ir_type type = ctx->ir_base[insn->op2].type; 3180 3181 if (op2_reg != IR_REG_FP_RET1) { 3182 if (op2_reg != IR_REG_NONE && !IR_REG_SPILLED(op2_reg)) { 3183 ir_emit_fp_mov(ctx, type, IR_REG_FP_RET1, op2_reg); 3184 } else { 3185 ir_emit_load(ctx, type, IR_REG_FP_RET1, insn->op2); 3186 } 3187 } 3188 ir_emit_return_void(ctx); 3189} 3190 3191static void ir_emit_sext(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3192{ 3193 ir_type dst_type = insn->type; 3194 ir_type src_type = ctx->ir_base[insn->op1].type; 3195 ir_backend_data *data = ctx->data; 3196 dasm_State **Dst = &data->dasm_state; 3197 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3198 ir_reg op1_reg = ctx->regs[def][1]; 3199 3200 IR_ASSERT(IR_IS_TYPE_INT(src_type)); 3201 IR_ASSERT(IR_IS_TYPE_INT(dst_type)); 3202 IR_ASSERT(ir_type_size[dst_type] > ir_type_size[src_type]); 3203 IR_ASSERT(def_reg != IR_REG_NONE); 3204 if ((op1_reg != IR_REG_NONE) && (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(insn->op1))) { 3205 op1_reg = IR_REG_NUM(op1_reg); 3206 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 3207 } 3208 3209 if (op1_reg != IR_REG_NONE) { 3210 if (ir_type_size[src_type] == 1) { 3211 if (ir_type_size[dst_type] == 2) { 3212 | sxtb Rw(def_reg), Rw(op1_reg) 3213 } else if (ir_type_size[dst_type] == 4) { 3214 | sxtb Rw(def_reg), Rw(op1_reg) 3215 } else { 3216 IR_ASSERT(ir_type_size[dst_type] == 8); 3217 | sxtb Rx(def_reg), Rx(op1_reg) 3218 } 3219 } else if (ir_type_size[src_type] == 2) { 3220 if (ir_type_size[dst_type] == 4) { 3221 | sxth Rw(def_reg), Rw(op1_reg) 3222 } else { 3223 IR_ASSERT(ir_type_size[dst_type] == 8); 3224 | sxth Rx(def_reg), Rx(op1_reg) 3225 } 3226 } else { 3227 IR_ASSERT(ir_type_size[src_type] == 4); 3228 IR_ASSERT(ir_type_size[dst_type] == 8); 3229 | sxtw Rx(def_reg), Rw(op1_reg) 3230 } 3231 } else if (IR_IS_CONST_REF(insn->op1)) { 3232 IR_ASSERT(0); 3233 } else { 3234 ir_reg fp; 3235 int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op1, &fp); 3236 3237 if (ir_type_size[src_type] == 1) { 3238 if (ir_type_size[dst_type] == 2) { 3239 | ldrsb Rw(def_reg), [Rx(fp), #offset] 3240 } else if (ir_type_size[dst_type] == 4) { 3241 | ldrsb Rw(def_reg), [Rx(fp), #offset] 3242 } else { 3243 IR_ASSERT(ir_type_size[dst_type] == 8); 3244 | ldrsb Rx(def_reg), [Rx(fp), #offset] 3245 } 3246 } else if (ir_type_size[src_type] == 2) { 3247 if (ir_type_size[dst_type] == 4) { 3248 | ldrsh Rw(def_reg), [Rx(fp), #offset] 3249 } else { 3250 IR_ASSERT(ir_type_size[dst_type] == 8); 3251 | ldrsh Rx(def_reg), [Rx(fp), #offset] 3252 } 3253 } else { 3254 IR_ASSERT(ir_type_size[src_type] == 4); 3255 IR_ASSERT(ir_type_size[dst_type] == 8); 3256 | ldrsw Rx(def_reg), [Rx(fp), #offset] 3257 } 3258 } 3259 if (IR_REG_SPILLED(ctx->regs[def][0])) { 3260 ir_emit_store(ctx, dst_type, def, def_reg); 3261 } 3262} 3263 3264static void ir_emit_zext(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3265{ 3266 ir_type dst_type = insn->type; 3267 ir_type src_type = ctx->ir_base[insn->op1].type; 3268 ir_backend_data *data = ctx->data; 3269 dasm_State **Dst = &data->dasm_state; 3270 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3271 ir_reg op1_reg = ctx->regs[def][1]; 3272 3273 IR_ASSERT(IR_IS_TYPE_INT(src_type)); 3274 IR_ASSERT(IR_IS_TYPE_INT(dst_type)); 3275 IR_ASSERT(ir_type_size[dst_type] > ir_type_size[src_type]); 3276 IR_ASSERT(def_reg != IR_REG_NONE); 3277 if ((op1_reg != IR_REG_NONE) && (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(insn->op1))) { 3278 op1_reg = IR_REG_NUM(op1_reg); 3279 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 3280 } 3281 3282 if (op1_reg != IR_REG_NONE) { 3283 if (ir_type_size[src_type] == 1) { 3284 | uxtb Rw(def_reg), Rw(op1_reg) 3285 } else if (ir_type_size[src_type] == 2) { 3286 | uxth Rw(def_reg), Rw(op1_reg) 3287 } else { 3288 | mov Rw(def_reg), Rw(op1_reg) 3289 } 3290 } else if (IR_IS_CONST_REF(insn->op1)) { 3291 IR_ASSERT(0); 3292 } else { 3293 ir_reg fp; 3294 int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op1, &fp); 3295 3296 if (ir_type_size[src_type] == 1) { 3297 | ldrb Rw(def_reg), [Rx(fp), #offset] 3298 } else if (ir_type_size[src_type] == 2) { 3299 | ldrh Rw(def_reg), [Rx(fp), #offset] 3300 } else { 3301 IR_ASSERT(ir_type_size[src_type] == 4); 3302 IR_ASSERT(ir_type_size[dst_type] == 8); 3303 | ldr Rw(def_reg), [Rx(fp), #offset] 3304 } 3305 } 3306 if (IR_REG_SPILLED(ctx->regs[def][0])) { 3307 ir_emit_store(ctx, dst_type, def, def_reg); 3308 } 3309} 3310 3311static void ir_emit_trunc(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3312{ 3313 ir_backend_data *data = ctx->data; 3314 dasm_State **Dst = &data->dasm_state; 3315 ir_type dst_type = insn->type; 3316 ir_type src_type = ctx->ir_base[insn->op1].type; 3317 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3318 ir_reg op1_reg = ctx->regs[def][1]; 3319 3320 IR_ASSERT(IR_IS_TYPE_INT(src_type)); 3321 IR_ASSERT(IR_IS_TYPE_INT(dst_type)); 3322 IR_ASSERT(ir_type_size[dst_type] < ir_type_size[src_type]); 3323 IR_ASSERT(def_reg != IR_REG_NONE); 3324 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 3325 op1_reg = IR_REG_NUM(op1_reg); 3326 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 3327 } 3328 if (op1_reg != IR_REG_NONE) { 3329 if (ir_type_size[dst_type] == 1) { 3330 | and Rw(def_reg), Rw(op1_reg), #0xff 3331 } else if (ir_type_size[dst_type] == 2) { 3332 | and Rw(def_reg), Rw(op1_reg), #0xffff 3333 } else if (op1_reg != def_reg) { 3334 ir_emit_mov(ctx, dst_type, def_reg, op1_reg); 3335 } 3336 } else { 3337 ir_emit_load(ctx, dst_type, def_reg, insn->op1); 3338 } 3339 if (IR_REG_SPILLED(ctx->regs[def][0])) { 3340 ir_emit_store(ctx, dst_type, def, def_reg); 3341 } 3342} 3343 3344static void ir_emit_bitcast(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3345{ 3346 ir_type dst_type = insn->type; 3347 ir_type src_type = ctx->ir_base[insn->op1].type; 3348 ir_backend_data *data = ctx->data; 3349 dasm_State **Dst = &data->dasm_state; 3350 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3351 ir_reg op1_reg = ctx->regs[def][1]; 3352 3353 IR_ASSERT(ir_type_size[dst_type] == ir_type_size[src_type]); 3354 IR_ASSERT(def_reg != IR_REG_NONE); 3355 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 3356 op1_reg = IR_REG_NUM(op1_reg); 3357 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 3358 } 3359 if (IR_IS_TYPE_INT(src_type) && IR_IS_TYPE_INT(dst_type)) { 3360 if (op1_reg != IR_REG_NONE) { 3361 if (IR_REG_SPILLED(op1_reg)) { 3362 op1_reg = IR_REG_NUM(op1_reg); 3363 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 3364 } 3365 if (op1_reg != def_reg) { 3366 ir_emit_mov(ctx, dst_type, def_reg, op1_reg); 3367 } 3368 } else { 3369 ir_emit_load(ctx, dst_type, def_reg, insn->op1); 3370 } 3371 } else if (IR_IS_TYPE_FP(src_type) && IR_IS_TYPE_FP(dst_type)) { 3372 if (op1_reg != IR_REG_NONE) { 3373 if (IR_REG_SPILLED(op1_reg)) { 3374 op1_reg = IR_REG_NUM(op1_reg); 3375 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 3376 } 3377 if (op1_reg != def_reg) { 3378 ir_emit_fp_mov(ctx, dst_type, def_reg, op1_reg); 3379 } 3380 } else { 3381 ir_emit_load(ctx, dst_type, def_reg, insn->op1); 3382 } 3383 } else if (IR_IS_TYPE_FP(src_type)) { 3384 IR_ASSERT(IR_IS_TYPE_INT(dst_type)); 3385 if (op1_reg != IR_REG_NONE) { 3386 if (IR_REG_SPILLED(op1_reg)) { 3387 op1_reg = IR_REG_NUM(op1_reg); 3388 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 3389 } 3390 if (src_type == IR_DOUBLE) { 3391 | fmov Rx(def_reg), Rd(op1_reg-IR_REG_FP_FIRST) 3392 } else { 3393 IR_ASSERT(src_type == IR_FLOAT); 3394 | fmov Rw(def_reg), Rs(op1_reg-IR_REG_FP_FIRST) 3395 } 3396 } else if (IR_IS_CONST_REF(insn->op1)) { 3397 IR_ASSERT(0); //??? 3398 } else { 3399 ir_reg fp; 3400 int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op1, &fp); 3401 3402 if (src_type == IR_DOUBLE) { 3403 | ldr Rx(def_reg), [Rx(fp), #offset] 3404 } else { 3405 IR_ASSERT(src_type == IR_FLOAT); 3406 | ldr Rw(def_reg), [Rx(fp), #offset] 3407 } 3408 } 3409 } else if (IR_IS_TYPE_FP(dst_type)) { 3410 IR_ASSERT(IR_IS_TYPE_INT(src_type)); 3411 if (op1_reg != IR_REG_NONE) { 3412 if (IR_REG_SPILLED(op1_reg)) { 3413 op1_reg = IR_REG_NUM(op1_reg); 3414 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 3415 } 3416 if (dst_type == IR_DOUBLE) { 3417 | fmov Rd(def_reg-IR_REG_FP_FIRST), Rx(op1_reg) 3418 } else { 3419 IR_ASSERT(dst_type == IR_FLOAT); 3420 | fmov Rs(def_reg-IR_REG_FP_FIRST), Rw(op1_reg) 3421 } 3422 } else if (IR_IS_CONST_REF(insn->op1)) { 3423 IR_ASSERT(0); //??? 3424 } else { 3425 ir_reg fp; 3426 int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op1, &fp); 3427 3428 if (dst_type == IR_DOUBLE) { 3429 | ldr Rd(def_reg), [Rx(fp), #offset] 3430 } else { 3431 IR_ASSERT(src_type == IR_FLOAT); 3432 | ldr Rs(def_reg), [Rx(fp), #offset] 3433 } 3434 } 3435 } 3436 if (IR_REG_SPILLED(ctx->regs[def][0])) { 3437 ir_emit_store(ctx, dst_type, def, def_reg); 3438 } 3439} 3440 3441static void ir_emit_int2fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3442{ 3443 ir_type dst_type = insn->type; 3444 ir_type src_type = ctx->ir_base[insn->op1].type; 3445 ir_backend_data *data = ctx->data; 3446 dasm_State **Dst = &data->dasm_state; 3447 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3448 ir_reg op1_reg = ctx->regs[def][1]; 3449 3450 IR_ASSERT(IR_IS_TYPE_INT(src_type)); 3451 IR_ASSERT(IR_IS_TYPE_FP(dst_type)); 3452 IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); 3453 if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(insn->op1)) { 3454 op1_reg = IR_REG_NUM(op1_reg); 3455 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 3456 } 3457 3458 if (ir_type_size[src_type] == 8) { 3459 if (IR_IS_TYPE_SIGNED(src_type)) { 3460 if (dst_type == IR_DOUBLE) { 3461 | scvtf Rd(def_reg-IR_REG_FP_FIRST), Rx(op1_reg) 3462 } else { 3463 IR_ASSERT(dst_type == IR_FLOAT); 3464 | scvtf Rs(def_reg-IR_REG_FP_FIRST), Rx(op1_reg) 3465 } 3466 } else { 3467 if (dst_type == IR_DOUBLE) { 3468 | ucvtf Rd(def_reg-IR_REG_FP_FIRST), Rx(op1_reg) 3469 } else { 3470 IR_ASSERT(dst_type == IR_FLOAT); 3471 | ucvtf Rs(def_reg-IR_REG_FP_FIRST), Rx(op1_reg) 3472 } 3473 } 3474 } else { 3475 if (IR_IS_TYPE_SIGNED(src_type)) { 3476 if (ir_type_size[src_type] == 2) { 3477 ir_emit_fix_type(ctx, IR_I16, op1_reg); 3478 } else if (ir_type_size[src_type] == 1) { 3479 ir_emit_fix_type(ctx, IR_I8, op1_reg); 3480 } 3481 if (dst_type == IR_DOUBLE) { 3482 | scvtf Rd(def_reg-IR_REG_FP_FIRST), Rw(op1_reg) 3483 } else { 3484 IR_ASSERT(dst_type == IR_FLOAT); 3485 | scvtf Rs(def_reg-IR_REG_FP_FIRST), Rw(op1_reg) 3486 } 3487 } else { 3488 if (ir_type_size[src_type] == 2) { 3489 ir_emit_fix_type(ctx, IR_U16, op1_reg); 3490 } else if (ir_type_size[src_type] == 1) { 3491 ir_emit_fix_type(ctx, IR_U8, op1_reg); 3492 } 3493 if (dst_type == IR_DOUBLE) { 3494 | ucvtf Rd(def_reg-IR_REG_FP_FIRST), Rw(op1_reg) 3495 } else { 3496 IR_ASSERT(dst_type == IR_FLOAT); 3497 | ucvtf Rs(def_reg-IR_REG_FP_FIRST), Rw(op1_reg) 3498 } 3499 } 3500 } 3501 if (IR_REG_SPILLED(ctx->regs[def][0])) { 3502 ir_emit_store(ctx, dst_type, def, def_reg); 3503 } 3504} 3505 3506static void ir_emit_fp2int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3507{ 3508 ir_type dst_type = insn->type; 3509 ir_type src_type = ctx->ir_base[insn->op1].type; 3510 ir_backend_data *data = ctx->data; 3511 dasm_State **Dst = &data->dasm_state; 3512 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3513 ir_reg op1_reg = ctx->regs[def][1]; 3514 3515 IR_ASSERT(IR_IS_TYPE_FP(src_type)); 3516 IR_ASSERT(IR_IS_TYPE_INT(dst_type)); 3517 IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); 3518 if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(insn->op1)) { 3519 op1_reg = IR_REG_NUM(op1_reg); 3520 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 3521 } 3522 if (ir_type_size[dst_type] == 8) { 3523 if (IR_IS_TYPE_SIGNED(dst_type)) { 3524 if (src_type == IR_DOUBLE) { 3525 | fcvtzs Rx(def_reg), Rd(op1_reg-IR_REG_FP_FIRST) 3526 } else { 3527 IR_ASSERT(src_type == IR_FLOAT); 3528 | fcvtzs Rx(def_reg), Rs(op1_reg-IR_REG_FP_FIRST) 3529 } 3530 } else { 3531 if (src_type == IR_DOUBLE) { 3532 | fcvtzu Rx(def_reg), Rd(op1_reg-IR_REG_FP_FIRST) 3533 } else { 3534 IR_ASSERT(src_type == IR_FLOAT); 3535 | fcvtzu Rx(def_reg), Rs(op1_reg-IR_REG_FP_FIRST) 3536 } 3537 } 3538 } else { 3539 if (IR_IS_TYPE_SIGNED(dst_type)) { 3540 if (src_type == IR_DOUBLE) { 3541 | fcvtzs Rw(def_reg), Rd(op1_reg-IR_REG_FP_FIRST) 3542 } else { 3543 IR_ASSERT(src_type == IR_FLOAT); 3544 | fcvtzs Rw(def_reg), Rs(op1_reg-IR_REG_FP_FIRST) 3545 } 3546 } else { 3547 if (src_type == IR_DOUBLE) { 3548 | fcvtzu Rw(def_reg), Rd(op1_reg-IR_REG_FP_FIRST) 3549 } else { 3550 IR_ASSERT(src_type == IR_FLOAT); 3551 | fcvtzu Rw(def_reg), Rs(op1_reg-IR_REG_FP_FIRST) 3552 } 3553 } 3554 } 3555 if (IR_REG_SPILLED(ctx->regs[def][0])) { 3556 ir_emit_store(ctx, dst_type, def, def_reg); 3557 } 3558} 3559 3560static void ir_emit_fp2fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3561{ 3562 ir_type dst_type = insn->type; 3563 ir_type src_type = ctx->ir_base[insn->op1].type; 3564 ir_backend_data *data = ctx->data; 3565 dasm_State **Dst = &data->dasm_state; 3566 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3567 ir_reg op1_reg = ctx->regs[def][1]; 3568 3569 IR_ASSERT(IR_IS_TYPE_FP(src_type)); 3570 IR_ASSERT(IR_IS_TYPE_FP(dst_type)); 3571 IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); 3572 if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(insn->op1)) { 3573 op1_reg = IR_REG_NUM(op1_reg); 3574 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 3575 } 3576 if (src_type == dst_type) { 3577 if (op1_reg != def_reg) { 3578 ir_emit_fp_mov(ctx, dst_type, def_reg, op1_reg); 3579 } 3580 } else if (src_type == IR_DOUBLE) { 3581 | fcvt Rs(def_reg-IR_REG_FP_FIRST), Rd(op1_reg-IR_REG_FP_FIRST) 3582 } else { 3583 IR_ASSERT(src_type == IR_FLOAT); 3584 | fcvt Rd(def_reg-IR_REG_FP_FIRST), Rs(op1_reg-IR_REG_FP_FIRST) 3585 } 3586 if (IR_REG_SPILLED(ctx->regs[def][0])) { 3587 ir_emit_store(ctx, dst_type, def, def_reg); 3588 } 3589} 3590 3591static void ir_emit_copy_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3592{ 3593 ir_ref type = insn->type; 3594 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3595 ir_reg op1_reg = ctx->regs[def][1]; 3596 3597 IR_ASSERT(def_reg != IR_REG_NONE || op1_reg != IR_REG_NONE); 3598 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 3599 op1_reg = IR_REG_NUM(op1_reg); 3600 ir_emit_load(ctx, type, op1_reg, insn->op1); 3601 } 3602 if (def_reg == op1_reg) { 3603 /* same reg */ 3604 } else if (def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE) { 3605 ir_emit_mov(ctx, type, def_reg, op1_reg); 3606 } else if (def_reg != IR_REG_NONE) { 3607 ir_emit_load(ctx, type, def_reg, insn->op1); 3608 } else if (op1_reg != IR_REG_NONE) { 3609 ir_emit_store(ctx, type, def, op1_reg); 3610 } else { 3611 IR_ASSERT(0); 3612 } 3613 if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) { 3614 ir_emit_store(ctx, type, def, def_reg); 3615 } 3616} 3617 3618static void ir_emit_copy_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3619{ 3620 ir_type type = insn->type; 3621 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3622 ir_reg op1_reg = ctx->regs[def][1]; 3623 3624 IR_ASSERT(def_reg != IR_REG_NONE || op1_reg != IR_REG_NONE); 3625 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 3626 op1_reg = IR_REG_NUM(op1_reg); 3627 ir_emit_load(ctx, type, op1_reg, insn->op1); 3628 } 3629 if (def_reg == op1_reg) { 3630 /* same reg */ 3631 } else if (def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE) { 3632 ir_emit_fp_mov(ctx, type, def_reg, op1_reg); 3633 } else if (def_reg != IR_REG_NONE) { 3634 ir_emit_load(ctx, type, def_reg, insn->op1); 3635 } else if (op1_reg != IR_REG_NONE) { 3636 ir_emit_store(ctx, type, def, op1_reg); 3637 } else { 3638 IR_ASSERT(0); 3639 } 3640 if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) { 3641 ir_emit_store(ctx, type, def, def_reg); 3642 } 3643} 3644 3645static void ir_emit_vaddr(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3646{ 3647 ir_backend_data *data = ctx->data; 3648 dasm_State **Dst = &data->dasm_state; 3649 ir_ref type = insn->type; 3650 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3651 int32_t offset; 3652 ir_reg fp; 3653 3654 IR_ASSERT(def_reg != IR_REG_NONE); 3655 offset = ir_var_spill_slot(ctx, insn->op1, &fp); 3656 | add Rx(def_reg), Rx(fp), #offset 3657 if (IR_REG_SPILLED(ctx->regs[def][0])) { 3658 ir_emit_store(ctx, type, def, def_reg); 3659 } 3660} 3661 3662static void ir_emit_vload(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3663{ 3664 ir_insn *var_insn = &ctx->ir_base[insn->op2]; 3665 ir_ref type = insn->type; 3666 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3667 ir_reg fp; 3668 int32_t offset; 3669 ir_mem mem; 3670 3671 IR_ASSERT(var_insn->op == IR_VAR); 3672 fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 3673 offset = IR_SPILL_POS_TO_OFFSET(var_insn->op3); 3674 mem = IR_MEM_BO(fp, offset); 3675 if (def_reg == IR_REG_NONE && ir_is_same_mem_var(ctx, def, var_insn->op3)) { 3676 return; // fake load 3677 } 3678 IR_ASSERT(def_reg != IR_REG_NONE); 3679 ir_emit_load_mem(ctx, type, def_reg, mem); 3680 if (IR_REG_SPILLED(ctx->regs[def][0])) { 3681 ir_emit_store(ctx, type, def, def_reg); 3682 } 3683} 3684 3685static void ir_emit_vstore(ir_ctx *ctx, ir_ref ref, ir_insn *insn) 3686{ 3687 ir_insn *var_insn = &ctx->ir_base[insn->op2]; 3688 ir_insn *val_insn = &ctx->ir_base[insn->op3]; 3689 ir_ref type = val_insn->type; 3690 ir_reg op3_reg = ctx->regs[ref][3]; 3691 ir_reg fp; 3692 int32_t offset; 3693 ir_mem mem; 3694 3695 IR_ASSERT(var_insn->op == IR_VAR); 3696 fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 3697 offset = IR_SPILL_POS_TO_OFFSET(var_insn->op3); 3698 IR_ASSERT(op3_reg != IR_REG_NONE); 3699 if (IR_REG_SPILLED(op3_reg) && ir_is_same_mem_var(ctx, insn->op3, var_insn->op3)) { 3700 return; // fake store 3701 } 3702 if (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(insn->op3)) { 3703 op3_reg = IR_REG_NUM(op3_reg); 3704 ir_emit_load(ctx, type, op3_reg, insn->op3); 3705 } 3706 mem = IR_MEM_BO(fp, offset); 3707 ir_emit_store_mem(ctx, type, mem, op3_reg); 3708} 3709 3710static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref) 3711{ 3712 ir_insn *addr_insn = &ctx->ir_base[ref]; 3713 ir_reg reg; 3714 3715 IR_ASSERT(addr_insn->op == IR_ADD); 3716 IR_ASSERT(!IR_IS_CONST_REF(addr_insn->op1) && IR_IS_CONST_REF(addr_insn->op2)); 3717 IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[addr_insn->op2].op)); 3718 if (UNEXPECTED(ctx->rules[ref] & IR_FUSED_REG)) { 3719 reg = ir_get_fused_reg(ctx, root, ref * sizeof(ir_ref) + 1); 3720 } else { 3721 reg = ctx->regs[ref][1]; 3722 } 3723 if (IR_REG_SPILLED(reg)) { 3724 reg = IR_REG_NUM(reg); 3725 ir_emit_load(ctx, IR_ADDR, reg, addr_insn->op1); 3726 } 3727 return IR_MEM_BO(reg, ctx->ir_base[addr_insn->op2].val.i32); 3728} 3729 3730static void ir_emit_load_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3731{ 3732 ir_ref type = insn->type; 3733 ir_reg op2_reg = ctx->regs[def][2]; 3734 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3735 3736 if (ctx->use_lists[def].count == 1) { 3737 /* dead load */ 3738 return; 3739 } 3740 IR_ASSERT(def_reg != IR_REG_NONE); 3741 if (!IR_IS_CONST_REF(insn->op2) && (ir_rule(ctx, insn->op2) & IR_FUSED)) { 3742 ir_mem mem = ir_fuse_addr(ctx, def, insn->op2); 3743 3744 if (IR_REG_SPILLED(ctx->regs[def][0]) && ir_is_same_spill_slot(ctx, def, mem)) { 3745 if (!ir_may_avoid_spill_load(ctx, def, def)) { 3746 ir_emit_load_mem_int(ctx, type, def_reg, mem); 3747 } 3748 /* avoid load to the same location (valid only when register is not reused) */ 3749 return; 3750 } 3751 ir_emit_load_mem_int(ctx, type, def_reg, mem); 3752 } else { 3753 if (op2_reg == IR_REG_NONE) { 3754 op2_reg = def_reg; 3755 } 3756 if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(insn->op2)) { 3757 op2_reg = IR_REG_NUM(op2_reg); 3758 IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); 3759 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 3760 } 3761 ir_emit_load_mem_int(ctx, type, def_reg, IR_MEM_B(op2_reg)); 3762 } 3763 if (IR_REG_SPILLED(ctx->regs[def][0])) { 3764 ir_emit_store(ctx, type, def, def_reg); 3765 } 3766} 3767 3768static void ir_emit_load_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3769{ 3770 ir_ref type = insn->type; 3771 ir_reg op2_reg = ctx->regs[def][2]; 3772 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3773 3774 if (ctx->use_lists[def].count == 1) { 3775 /* dead load */ 3776 return; 3777 } 3778 IR_ASSERT(def_reg != IR_REG_NONE); 3779 if (!IR_IS_CONST_REF(insn->op2) && (ir_rule(ctx, insn->op2) & IR_FUSED)) { 3780 ir_mem mem = ir_fuse_addr(ctx, def, insn->op2); 3781 3782 if (IR_REG_SPILLED(ctx->regs[def][0]) && ir_is_same_spill_slot(ctx, def, mem)) { 3783 if (!ir_may_avoid_spill_load(ctx, def, def)) { 3784 ir_emit_load_mem_fp(ctx, type, def_reg, mem); 3785 } 3786 /* avoid load to the same location (valid only when register is not reused) */ 3787 return; 3788 } 3789 ir_emit_load_mem_fp(ctx, type, def_reg, mem); 3790 } else { 3791 if (op2_reg != IR_REG_NONE && (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(insn->op2))) { 3792 op2_reg = IR_REG_NUM(op2_reg); 3793 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 3794 } 3795 if (op2_reg == IR_REG_NONE) { 3796 op2_reg = def_reg; 3797 IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); 3798 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 3799 } 3800 ir_emit_load_mem_fp(ctx, type, def_reg, IR_MEM_B(op2_reg)); 3801 } 3802 if (IR_REG_SPILLED(ctx->regs[def][0])) { 3803 ir_emit_store(ctx, type, def, def_reg); 3804 } 3805} 3806 3807static void ir_emit_store_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn) 3808{ 3809 ir_insn *val_insn = &ctx->ir_base[insn->op3]; 3810 ir_ref type = val_insn->type; 3811 ir_reg op2_reg = ctx->regs[ref][2]; 3812 ir_reg op3_reg = ctx->regs[ref][3]; 3813 3814 if (!IR_IS_CONST_REF(insn->op2) && (ir_rule(ctx, insn->op2) & IR_FUSED)) { 3815 ir_mem mem = ir_fuse_addr(ctx, ref, insn->op2); 3816 3817 if (!IR_IS_CONST_REF(insn->op3) && IR_REG_SPILLED(op3_reg) && ir_is_same_spill_slot(ctx, insn->op3, mem)) { 3818 if (!ir_may_avoid_spill_load(ctx, insn->op3, ref)) { 3819 op3_reg = IR_REG_NUM(op3_reg); 3820 ir_emit_load(ctx, type, op3_reg, insn->op3); 3821 } 3822 /* avoid store to the same location */ 3823 return; 3824 } 3825 if (op3_reg == IR_REG_NONE) { 3826 IR_ASSERT(IR_IS_CONST_REF(insn->op3) && !IR_IS_SYM_CONST(ctx->ir_base[insn->op3].op) && ctx->ir_base[insn->op3].val.i64 == 0); 3827 op3_reg = IR_REG_ZR; 3828 } else if (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(insn->op3)) { 3829 op3_reg = IR_REG_NUM(op3_reg); 3830 ir_emit_load(ctx, type, op3_reg, insn->op3); 3831 } 3832 ir_emit_store_mem_int(ctx, type, mem, op3_reg); 3833 } else { 3834 IR_ASSERT(op2_reg != IR_REG_NONE); 3835 if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(insn->op2)) { 3836 op2_reg = IR_REG_NUM(op2_reg); 3837 IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); 3838 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 3839 } 3840 if (op3_reg == IR_REG_NONE) { 3841 IR_ASSERT(IR_IS_CONST_REF(insn->op3) && !IR_IS_SYM_CONST(ctx->ir_base[insn->op3].op) && ctx->ir_base[insn->op3].val.i64 == 0); 3842 op3_reg = IR_REG_ZR; 3843 } else if (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(insn->op3)) { 3844 op3_reg = IR_REG_NUM(op3_reg); 3845 ir_emit_load(ctx, type, op3_reg, insn->op3); 3846 } 3847 ir_emit_store_mem_int(ctx, type, IR_MEM_B(op2_reg), op3_reg); 3848 } 3849} 3850 3851static void ir_emit_store_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn) 3852{ 3853 ir_ref type = ctx->ir_base[insn->op3].type; 3854 ir_reg op2_reg = ctx->regs[ref][2]; 3855 ir_reg op3_reg = ctx->regs[ref][3]; 3856 3857 IR_ASSERT(op3_reg != IR_REG_NONE); 3858 if (!IR_IS_CONST_REF(insn->op2) && (ir_rule(ctx, insn->op2) & IR_FUSED)) { 3859 ir_mem mem = ir_fuse_addr(ctx, ref, insn->op2); 3860 3861 if (!IR_IS_CONST_REF(insn->op3) && IR_REG_SPILLED(op3_reg) && ir_is_same_spill_slot(ctx, insn->op3, mem)) { 3862 if (!ir_may_avoid_spill_load(ctx, insn->op3, ref)) { 3863 op3_reg = IR_REG_NUM(op3_reg); 3864 ir_emit_load(ctx, type, op3_reg, insn->op3); 3865 } 3866 /* avoid store to the same location */ 3867 return; 3868 } 3869 if (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(insn->op3)) { 3870 op3_reg = IR_REG_NUM(op3_reg); 3871 ir_emit_load(ctx, type, op3_reg, insn->op3); 3872 } 3873 ir_emit_store_mem_fp(ctx, type, mem, op3_reg); 3874 } else { 3875 if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(insn->op2)) { 3876 op2_reg = IR_REG_NUM(op2_reg); 3877 IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); 3878 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 3879 } 3880 if (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(insn->op3)) { 3881 op3_reg = IR_REG_NUM(op3_reg); 3882 ir_emit_load(ctx, type, op3_reg, insn->op3); 3883 } 3884 ir_emit_store_mem_fp(ctx, type, IR_MEM_B(op2_reg), op3_reg); 3885 } 3886} 3887 3888static void ir_emit_rload(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3889{ 3890 ir_reg src_reg = insn->op2; 3891 ir_type type = insn->type; 3892 3893 if (IR_REGSET_IN(IR_REGSET_UNION((ir_regset)ctx->fixed_regset, IR_REGSET_FIXED), src_reg)) { 3894 if (ctx->vregs[def] 3895 && ctx->live_intervals[ctx->vregs[def]] 3896 && ctx->live_intervals[ctx->vregs[def]]->stack_spill_pos != -1) { 3897 ir_emit_store(ctx, type, def, src_reg); 3898 } 3899 } else { 3900 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3901 3902 if (def_reg == IR_REG_NONE) { 3903 /* op3 is used as a flag that the value is already stored in memory. 3904 * If op3 is set we don't have to store the value once again (in case of spilling) 3905 */ 3906 if (!insn->op3 || !ir_is_same_spill_slot(ctx, def, IR_MEM_BO(ctx->spill_base, insn->op3))) { 3907 ir_emit_store(ctx, type, def, src_reg); 3908 } 3909 } else { 3910 if (src_reg != def_reg) { 3911 if (IR_IS_TYPE_INT(type)) { 3912 ir_emit_mov(ctx, type, def_reg, src_reg); 3913 } else { 3914 IR_ASSERT(IR_IS_TYPE_FP(type)); 3915 ir_emit_fp_mov(ctx, type, def_reg, src_reg); 3916 } 3917 } 3918 if (IR_REG_SPILLED(ctx->regs[def][0]) 3919 && (!insn->op3 || !ir_is_same_spill_slot(ctx, def, IR_MEM_BO(ctx->spill_base, insn->op3)))) { 3920 ir_emit_store(ctx, type, def, def_reg); 3921 } 3922 } 3923 } 3924} 3925 3926static void ir_emit_rstore(ir_ctx *ctx, ir_ref ref, ir_insn *insn) 3927{ 3928 ir_ref type = ctx->ir_base[insn->op2].type; 3929 ir_reg op2_reg = ctx->regs[ref][2]; 3930 ir_reg dst_reg = insn->op3; 3931 3932 if (op2_reg != IR_REG_NONE) { 3933 if (IR_REG_SPILLED(op2_reg)) { 3934 op2_reg = IR_REG_NUM(op2_reg); 3935 ir_emit_load(ctx, type, op2_reg, insn->op2); 3936 } 3937 if (op2_reg != dst_reg) { 3938 if (IR_IS_TYPE_INT(type)) { 3939 ir_emit_mov(ctx, type, dst_reg, op2_reg); 3940 } else { 3941 IR_ASSERT(IR_IS_TYPE_FP(type)); 3942 ir_emit_fp_mov(ctx, type, dst_reg, op2_reg); 3943 } 3944 } 3945 } else { 3946 ir_emit_load(ctx, type, dst_reg, insn->op2); 3947 } 3948} 3949 3950static void ir_emit_alloca(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3951{ 3952 ir_backend_data *data = ctx->data; 3953 dasm_State **Dst = &data->dasm_state; 3954 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3955 3956 if (IR_IS_CONST_REF(insn->op2)) { 3957 ir_insn *val = &ctx->ir_base[insn->op2]; 3958 int32_t size = val->val.i32; 3959 3960 IR_ASSERT(IR_IS_TYPE_INT(val->type)); 3961 IR_ASSERT(!IR_IS_SYM_CONST(val->op)); 3962 IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 >= 0); 3963 3964 if (ctx->flags2 & IR_HAS_CALLS) { 3965 /* Stack must be 16 byte aligned */ 3966 size = IR_ALIGNED_SIZE(size, 16); 3967 } else { 3968 size = IR_ALIGNED_SIZE(size, 8); 3969 } 3970 if (aarch64_may_encode_imm12(size)) { 3971 | sub sp, sp, #size 3972 } else { 3973 ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, size); 3974 | sub sp, sp, Rx(IR_REG_INT_TMP) 3975 } 3976 if (!(ctx->flags & IR_USE_FRAME_POINTER)) { 3977 ctx->call_stack_size += size; 3978 } 3979 } else { 3980 int32_t alignment = (ctx->flags2 & IR_HAS_CALLS) ? 16 : 8; 3981 ir_reg op2_reg = ctx->regs[def][2]; 3982 ir_type type = ctx->ir_base[insn->op2].type; 3983 3984 IR_ASSERT(ctx->flags & IR_FUNCTION); 3985 IR_ASSERT(ctx->flags & IR_USE_FRAME_POINTER); 3986 IR_ASSERT(def_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); 3987 if (IR_REG_SPILLED(op2_reg)) { 3988 op2_reg = IR_REG_NUM(op2_reg); 3989 ir_emit_load(ctx, type, op2_reg, insn->op2); 3990 } 3991 | add Rx(def_reg), Rx(op2_reg), #(alignment-1) 3992 | and Rx(def_reg), Rx(def_reg), #(~(alignment-1)) 3993 | sub sp, sp, Rx(def_reg); 3994 } 3995 if (def_reg != IR_REG_NONE) { 3996 | mov Rx(def_reg), sp 3997 if (IR_REG_SPILLED(ctx->regs[def][0])) { 3998 ir_emit_store(ctx, insn->type, def, def_reg); 3999 } 4000 } else { 4001 ir_emit_store(ctx, IR_ADDR, def, IR_REG_STACK_POINTER); 4002 } 4003} 4004 4005static void ir_emit_afree(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4006{ 4007 ir_backend_data *data = ctx->data; 4008 dasm_State **Dst = &data->dasm_state; 4009 4010 if (IR_IS_CONST_REF(insn->op2)) { 4011 ir_insn *val = &ctx->ir_base[insn->op2]; 4012 int32_t size = val->val.i32; 4013 4014 IR_ASSERT(IR_IS_TYPE_INT(val->type)); 4015 IR_ASSERT(!IR_IS_SYM_CONST(val->op)); 4016 IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 > 0); 4017 4018 if (ctx->flags2 & IR_HAS_CALLS) { 4019 /* Stack must be 16 byte aligned */ 4020 size = IR_ALIGNED_SIZE(size, 16); 4021 } else { 4022 size = IR_ALIGNED_SIZE(size, 8); 4023 } 4024 | add sp, sp, #size 4025 if (!(ctx->flags & IR_USE_FRAME_POINTER)) { 4026 ctx->call_stack_size -= size; 4027 } 4028 } else { 4029// int32_t alignment = (ctx->flags2 & IR_HAS_CALLS) ? 16 : 8; 4030 ir_reg op2_reg = ctx->regs[def][2]; 4031 ir_type type = ctx->ir_base[insn->op2].type; 4032 4033 IR_ASSERT(ctx->flags & IR_FUNCTION); 4034 IR_ASSERT(op2_reg != IR_REG_NONE); 4035 if (IR_REG_SPILLED(op2_reg)) { 4036 op2_reg = IR_REG_NUM(op2_reg); 4037 ir_emit_load(ctx, type, op2_reg, insn->op2); 4038 } 4039 4040 // TODO: alignment 4041 4042 | add sp, sp, Rx(op2_reg); 4043 } 4044} 4045 4046static void ir_emit_frame_addr(ir_ctx *ctx, ir_ref def) 4047{ 4048 ir_backend_data *data = ctx->data; 4049 dasm_State **Dst = &data->dasm_state; 4050 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 4051 4052 if (ctx->flags & IR_USE_FRAME_POINTER) { 4053 | mov Rx(def_reg), Rx(IR_REG_X29) 4054 } else { 4055 | add Rx(def_reg), Rx(IR_REG_X31), #(ctx->stack_frame_size + ctx->call_stack_size) 4056 } 4057 if (IR_REG_SPILLED(ctx->regs[def][0])) { 4058 ir_emit_store(ctx, IR_ADDR, def, def_reg); 4059 } 4060} 4061 4062static void ir_emit_va_start(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4063{ 4064#ifdef __APPLE__ 4065 ir_backend_data *data = ctx->data; 4066 dasm_State **Dst = &data->dasm_state; 4067 ir_reg fp; 4068 int arg_area_offset; 4069 ir_reg op2_reg = ctx->regs[def][2]; 4070 ir_reg tmp_reg = ctx->regs[def][3]; 4071 4072 IR_ASSERT(tmp_reg != IR_REG_NONE); 4073 if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { 4074 op2_reg = IR_REG_NUM(op2_reg); 4075 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 4076 } 4077 4078 if (ctx->flags & IR_USE_FRAME_POINTER) { 4079 fp = IR_REG_FRAME_POINTER; 4080 arg_area_offset = ctx->stack_frame_size + sizeof(void*) * 2 + ctx->param_stack_size; 4081 } else { 4082 fp = IR_REG_STACK_POINTER; 4083 arg_area_offset = ctx->call_stack_size + ctx->stack_frame_size + ctx->param_stack_size; 4084 } 4085 | add Rx(tmp_reg), Rx(fp), #arg_area_offset 4086 if (op2_reg != IR_REG_NONE) { 4087 | str Rx(tmp_reg), [Rx(op2_reg)] 4088 } else { 4089 int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op2, &op2_reg); 4090 4091 | str Rx(tmp_reg), [Rx(op2_reg), #offset] 4092 } 4093#else 4094 ir_backend_data *data = ctx->data; 4095 dasm_State **Dst = &data->dasm_state; 4096 ir_reg fp; 4097 int reg_save_area_offset; 4098 int overflow_arg_area_offset; 4099 ir_reg op2_reg = ctx->regs[def][2]; 4100 ir_reg tmp_reg = ctx->regs[def][3]; 4101 4102 IR_ASSERT(op2_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); 4103 if (IR_REG_SPILLED(op2_reg)) { 4104 op2_reg = IR_REG_NUM(op2_reg); 4105 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 4106 } 4107 4108 if (ctx->flags & IR_USE_FRAME_POINTER) { 4109 fp = IR_REG_FRAME_POINTER; 4110 reg_save_area_offset = ctx->locals_area_size + sizeof(void*) * 2; 4111 overflow_arg_area_offset = ctx->stack_frame_size + sizeof(void*) * 2 + ctx->param_stack_size; 4112 } else { 4113 fp = IR_REG_STACK_POINTER; 4114 reg_save_area_offset = ctx->locals_area_size + ctx->call_stack_size; 4115 overflow_arg_area_offset = ctx->call_stack_size + ctx->stack_frame_size + ctx->param_stack_size; 4116 } 4117 4118 /* Set va_list.stack */ 4119 | add Rx(tmp_reg), Rx(fp), #overflow_arg_area_offset 4120 | str Rx(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, stack)] 4121 if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) { 4122 reg_save_area_offset += sizeof(void*) * IR_REG_INT_ARGS; 4123 /* Set va_list.gr_top */ 4124 if (overflow_arg_area_offset != reg_save_area_offset) { 4125 | add Rx(tmp_reg), Rx(fp), #reg_save_area_offset 4126 } 4127 | str Rx(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, gr_top)] 4128 /* Set va_list.gr_offset */ 4129 | movn Rw(tmp_reg), #~(0 - (sizeof(void*) * (IR_REG_INT_ARGS - ctx->gp_reg_params))) 4130 | str Rw(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, gr_offset)] 4131 } else { 4132 /* Set va_list.gr_offset */ 4133 | str wzr, [Rx(op2_reg), #offsetof(ir_va_list, gr_offset)] 4134 } 4135 if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) { 4136 reg_save_area_offset += 16 * IR_REG_FP_ARGS; 4137 /* Set va_list.vr_top */ 4138 if (overflow_arg_area_offset != reg_save_area_offset) { 4139 | add Rx(tmp_reg), Rx(fp), #reg_save_area_offset 4140 } 4141 | str Rx(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, vr_top)] 4142 /* Set va_list.vr_offset */ 4143 | movn Rw(tmp_reg), #~(0 - (16 * (IR_REG_FP_ARGS - ctx->fp_reg_params))) 4144 | str Rw(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, vr_offset)] 4145 } else { 4146 /* Set va_list.vr_offset */ 4147 | str wzr, [Rx(op2_reg), #offsetof(ir_va_list, vr_offset)] 4148 } 4149#endif 4150} 4151 4152static void ir_emit_va_copy(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4153{ 4154 IR_ASSERT(0 && "NIY va_copy"); 4155} 4156 4157static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4158{ 4159#ifdef __APPLE__ 4160 ir_backend_data *data = ctx->data; 4161 dasm_State **Dst = &data->dasm_state; 4162 ir_type type = insn->type; 4163 ir_reg def_reg = ctx->regs[def][0]; 4164 ir_reg op2_reg = ctx->regs[def][2]; 4165 ir_reg tmp_reg = ctx->regs[def][3]; 4166 4167 IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); 4168 if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { 4169 op2_reg = IR_REG_NUM(op2_reg); 4170 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 4171 } 4172 | ldr Rx(tmp_reg), [Rx(op2_reg)] 4173 ir_emit_load_mem(ctx, type, def_reg, IR_MEM_BO(tmp_reg, 0)); 4174 | add Rx(tmp_reg), Rx(tmp_reg), #IR_MAX(ir_type_size[type], sizeof(void*)) 4175 if (op2_reg != IR_REG_NONE) { 4176 | str Rx(tmp_reg), [Rx(op2_reg)] 4177 } else { 4178 int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op2, &op2_reg); 4179 4180 | str Rx(tmp_reg), [Rx(op2_reg), #offset] 4181 } 4182 if (IR_REG_SPILLED(ctx->regs[def][0])) { 4183 ir_emit_store(ctx, type, def, def_reg); 4184 } 4185#else 4186 ir_backend_data *data = ctx->data; 4187 dasm_State **Dst = &data->dasm_state; 4188 ir_type type = insn->type; 4189 ir_reg def_reg = ctx->regs[def][0]; 4190 ir_reg op2_reg = ctx->regs[def][2]; 4191 ir_reg tmp_reg = ctx->regs[def][3]; 4192 4193 IR_ASSERT(def_reg != IR_REG_NONE && op2_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); 4194 if (IR_REG_SPILLED(op2_reg)) { 4195 op2_reg = IR_REG_NUM(op2_reg); 4196 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 4197 } 4198 if (IR_IS_TYPE_INT(type)) { 4199 | ldr Rw(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, gr_offset)] 4200 | cmp Rw(tmp_reg), wzr 4201 | bge >1 4202 | ldr Rx(IR_REG_INT_TMP), [Rx(op2_reg), #offsetof(ir_va_list, gr_top)] 4203 | sxtw Rx(tmp_reg), Rw(tmp_reg) 4204 | add Rx(IR_REG_INT_TMP), Rx(tmp_reg), Rx(IR_REG_INT_TMP) 4205 | ldr Rx(def_reg), [Rx(IR_REG_INT_TMP)] 4206 | add Rw(tmp_reg), Rw(tmp_reg), #sizeof(void*) 4207 | str Rw(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, gr_offset)] 4208 | b >2 4209 |1: 4210 | ldr Rx(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, stack)] 4211 | ldr Rx(def_reg), [Rx(tmp_reg)] 4212 | add Rx(tmp_reg), Rx(tmp_reg), #sizeof(void*) 4213 | str Rx(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, stack)] 4214 |2: 4215 } else { 4216 | ldr Rw(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, vr_offset)] 4217 | cmp Rw(tmp_reg), wzr 4218 | bge >1 4219 | ldr Rx(IR_REG_INT_TMP), [Rx(op2_reg), #offsetof(ir_va_list, vr_top)] 4220 | sxtw Rx(tmp_reg), Rw(tmp_reg) 4221 | add Rx(IR_REG_INT_TMP), Rx(tmp_reg), Rx(IR_REG_INT_TMP) 4222 | ldr Rd(def_reg-IR_REG_FP_FIRST), [Rx(IR_REG_INT_TMP)] 4223 | add Rw(tmp_reg), Rw(tmp_reg), #16 4224 | str Rw(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, vr_offset)] 4225 | b >2 4226 |1: 4227 | ldr Rx(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, stack)] 4228 | ldr Rd(def_reg-IR_REG_FP_FIRST), [Rx(tmp_reg)] 4229 | add Rx(tmp_reg), Rx(tmp_reg), #sizeof(void*) 4230 | str Rx(tmp_reg), [Rx(op2_reg), #offsetof(ir_va_list, stack)] 4231 |2: 4232 } 4233 if (IR_REG_SPILLED(ctx->regs[def][0])) { 4234 ir_emit_store(ctx, type, def, def_reg); 4235 } 4236#endif 4237} 4238 4239static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) 4240{ 4241 ir_backend_data *data = ctx->data; 4242 dasm_State **Dst = &data->dasm_state; 4243 ir_type type; 4244 ir_block *bb; 4245 ir_insn *use_insn, *val; 4246 uint32_t n, *p, use_block; 4247 int i; 4248 int label, default_label = 0; 4249 int count = 0; 4250 ir_val min, max; 4251 ir_reg op1_reg, op2_reg, tmp_reg; 4252 4253 type = ctx->ir_base[insn->op2].type; 4254 if (IR_IS_TYPE_SIGNED(type)) { 4255 min.u64 = 0x7fffffffffffffff; 4256 max.u64 = 0x8000000000000000; 4257 } else { 4258 min.u64 = 0xffffffffffffffff; 4259 max.u64 = 0x0; 4260 } 4261 4262 bb = &ctx->cfg_blocks[b]; 4263 p = &ctx->cfg_edges[bb->successors]; 4264 for (n = bb->successors_count; n != 0; p++, n--) { 4265 use_block = *p; 4266 use_insn = &ctx->ir_base[ctx->cfg_blocks[use_block].start]; 4267 if (use_insn->op == IR_CASE_VAL) { 4268 val = &ctx->ir_base[use_insn->op2]; 4269 IR_ASSERT(!IR_IS_SYM_CONST(val->op)); 4270 if (IR_IS_TYPE_SIGNED(type)) { 4271 IR_ASSERT(IR_IS_TYPE_SIGNED(val->type)); 4272 min.i64 = IR_MIN(min.i64, val->val.i64); 4273 max.i64 = IR_MAX(max.i64, val->val.i64); 4274 } else { 4275 IR_ASSERT(!IR_IS_TYPE_SIGNED(val->type)); 4276 min.u64 = (int64_t)IR_MIN(min.u64, val->val.u64); 4277 max.u64 = (int64_t)IR_MAX(max.u64, val->val.u64); 4278 } 4279 count++; 4280 } else { 4281 IR_ASSERT(use_insn->op == IR_CASE_DEFAULT); 4282 default_label = ir_skip_empty_target_blocks(ctx, use_block); 4283 } 4284 } 4285 4286 op1_reg = ctx->regs[def][1]; 4287 op2_reg = ctx->regs[def][2]; 4288 tmp_reg = ctx->regs[def][3]; 4289 4290 IR_ASSERT(op2_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); 4291 if (IR_REG_SPILLED(op2_reg)) { 4292 op2_reg = IR_REG_NUM(op2_reg); 4293 ir_emit_load(ctx, type, op2_reg, insn->op2); 4294 } else if (IR_IS_CONST_REF(insn->op2)) { 4295 ir_emit_load(ctx, type, op2_reg, insn->op2); 4296 } 4297 4298 /* Generate a table jmp or a sequence of calls */ 4299 if ((max.i64-min.i64) < count * 8) { 4300 int *labels = ir_mem_malloc(sizeof(int) * (max.i64 - min.i64 + 1)); 4301 4302 for (i = 0; i <= (max.i64 - min.i64); i++) { 4303 labels[i] = default_label; 4304 } 4305 p = &ctx->cfg_edges[bb->successors]; 4306 for (n = bb->successors_count; n != 0; p++, n--) { 4307 use_block = *p; 4308 use_insn = &ctx->ir_base[ctx->cfg_blocks[use_block].start]; 4309 if (use_insn->op == IR_CASE_VAL) { 4310 val = &ctx->ir_base[use_insn->op2]; 4311 IR_ASSERT(!IR_IS_SYM_CONST(val->op)); 4312 label = ir_skip_empty_target_blocks(ctx, use_block); 4313 labels[val->val.i64 - min.i64] = label; 4314 } 4315 } 4316 4317 if (aarch64_may_encode_imm12(max.i64)) { 4318 | ASM_REG_IMM_OP cmp, type, op2_reg, max.i64 4319 } else { 4320 ir_emit_load_imm_int(ctx, type, tmp_reg, max.i64); 4321 | ASM_REG_REG_OP cmp, type, op2_reg, tmp_reg 4322 } 4323 if (IR_IS_TYPE_SIGNED(type)) { 4324 | bgt =>default_label 4325 } else { 4326 | bhi =>default_label 4327 } 4328 4329 if (op1_reg == IR_REG_NONE) { 4330 op1_reg = op2_reg; 4331 } 4332 if (aarch64_may_encode_imm12(min.i64)) { 4333 | ASM_REG_REG_IMM_OP subs, type, op1_reg, op2_reg, min.i64 4334 } else { 4335 ir_emit_load_imm_int(ctx, type, tmp_reg, min.i64); 4336 | ASM_REG_REG_REG_OP subs, type, op1_reg, op2_reg, tmp_reg 4337 } 4338 if (IR_IS_TYPE_SIGNED(type)) { 4339 | blt =>default_label 4340 } else { 4341 | blo =>default_label 4342 } 4343 | adr Rx(tmp_reg), >1 4344 | ldr Rx(tmp_reg), [Rx(tmp_reg), Rx(op1_reg), lsl #3] 4345 | br Rx(tmp_reg) 4346 |.jmp_table 4347 if (!data->jmp_table_label) { 4348 data->jmp_table_label = ctx->cfg_blocks_count + ctx->consts_count + 3; 4349 |=>data->jmp_table_label: 4350 } 4351 |.align 8 4352 |1: 4353 for (i = 0; i <= (max.i64 - min.i64); i++) { 4354 int b = labels[i]; 4355 ir_block *bb = &ctx->cfg_blocks[b]; 4356 ir_insn *insn = &ctx->ir_base[bb->end]; 4357 4358 if (insn->op == IR_IJMP && IR_IS_CONST_REF(insn->op2)) { 4359 ir_ref prev = ctx->prev_ref[bb->end]; 4360 if (prev != bb->start && ctx->ir_base[prev].op == IR_SNAPSHOT) { 4361 prev = ctx->prev_ref[prev]; 4362 } 4363 if (prev == bb->start) { 4364 void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]); 4365 4366 | .addr &addr 4367 if (ctx->ir_base[bb->start].op != IR_CASE_DEFAULT) { 4368 bb->flags |= IR_BB_EMPTY; 4369 } 4370 continue; 4371 } 4372 } 4373 | .addr =>b 4374 } 4375 |.code 4376 ir_mem_free(labels); 4377 } else { 4378 p = &ctx->cfg_edges[bb->successors]; 4379 for (n = bb->successors_count; n != 0; p++, n--) { 4380 use_block = *p; 4381 use_insn = &ctx->ir_base[ctx->cfg_blocks[use_block].start]; 4382 if (use_insn->op == IR_CASE_VAL) { 4383 val = &ctx->ir_base[use_insn->op2]; 4384 IR_ASSERT(!IR_IS_SYM_CONST(val->op)); 4385 label = ir_skip_empty_target_blocks(ctx, use_block); 4386 if (aarch64_may_encode_imm12(val->val.i64)) { 4387 | ASM_REG_IMM_OP cmp, type, op2_reg, val->val.i64 4388 } else { 4389 ir_emit_load_imm_int(ctx, type, tmp_reg, val->val.i64); 4390 | ASM_REG_REG_OP cmp, type, op2_reg, tmp_reg 4391 4392 } 4393 | beq =>label 4394 } 4395 } 4396 if (default_label) { 4397 | b =>default_label 4398 } 4399 } 4400} 4401 4402static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn) 4403{ 4404 int j, n; 4405 ir_type type; 4406 int int_param = 0; 4407 int fp_param = 0; 4408 int int_reg_params_count = IR_REG_INT_ARGS; 4409 int fp_reg_params_count = IR_REG_FP_ARGS; 4410 int32_t used_stack = 0; 4411#ifdef __APPLE__ 4412 const ir_proto_t *proto = ir_call_proto(ctx, insn); 4413 int last_named_input = (proto && (proto->flags & IR_VARARG_FUNC)) ? proto->params_count + 2 : insn->inputs_count; 4414#endif 4415 4416 n = insn->inputs_count; 4417 for (j = 3; j <= n; j++) { 4418 type = ctx->ir_base[ir_insn_op(insn, j)].type; 4419#ifdef __APPLE__ 4420 if (j > last_named_input) { 4421 used_stack += IR_MAX(sizeof(void*), ir_type_size[type]); 4422 } else 4423#endif 4424 if (IR_IS_TYPE_INT(type)) { 4425 if (int_param >= int_reg_params_count) { 4426 used_stack += IR_MAX(sizeof(void*), ir_type_size[type]); 4427 } 4428 int_param++; 4429 } else { 4430 IR_ASSERT(IR_IS_TYPE_FP(type)); 4431 if (fp_param >= fp_reg_params_count) { 4432 used_stack += IR_MAX(sizeof(void*), ir_type_size[type]); 4433 } 4434 fp_param++; 4435 } 4436 } 4437 4438 return used_stack; 4439} 4440 4441static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg tmp_reg) 4442{ 4443 ir_backend_data *data = ctx->data; 4444 dasm_State **Dst = &data->dasm_state; 4445 int j, n; 4446 ir_ref arg; 4447 ir_insn *arg_insn; 4448 uint8_t type; 4449 ir_reg src_reg, dst_reg; 4450 int int_param = 0; 4451 int fp_param = 0; 4452 int count = 0; 4453 int int_reg_params_count = IR_REG_INT_ARGS; 4454 int fp_reg_params_count = IR_REG_FP_ARGS; 4455 const int8_t *int_reg_params = _ir_int_reg_params; 4456 const int8_t *fp_reg_params = _ir_fp_reg_params; 4457 int32_t used_stack, stack_offset = 0; 4458 ir_copy *copies; 4459 bool do_pass3 = 0; 4460 /* For temporaries we may use any scratch registers except for registers used for parameters */ 4461 ir_reg tmp_fp_reg = IR_REG_FP_LAST; /* Temporary register for FP loads and swap */ 4462 4463 n = insn->inputs_count; 4464 if (n < 3) { 4465 return 0; 4466 } 4467 4468 if (tmp_reg == IR_REG_NONE) { 4469 tmp_reg = IR_REG_IP0; 4470 } 4471 4472 if (insn->op == IR_CALL && (ctx->flags & IR_PREALLOCATED_STACK)) { 4473 // TODO: support for preallocated stack 4474 used_stack = 0; 4475 } else { 4476 used_stack = ir_call_used_stack(ctx, insn); 4477 /* Stack must be 16 byte aligned */ 4478 used_stack = IR_ALIGNED_SIZE(used_stack, 16); 4479 if (ctx->fixed_call_stack_size && used_stack <= ctx->fixed_call_stack_size) { 4480 used_stack = 0; 4481 } else { 4482 ctx->call_stack_size += used_stack; 4483 if (used_stack) { 4484 if (insn->op == IR_TAILCALL && !(ctx->flags & IR_USE_FRAME_POINTER)) { 4485 ctx->flags |= IR_USE_FRAME_POINTER; 4486 | stp x29, x30, [sp, # (-(ctx->stack_frame_size+16))]! 4487 | mov x29, sp 4488 } 4489 | sub sp, sp, #used_stack 4490 } 4491 } 4492 } 4493 4494#ifdef __APPLE__ 4495 const ir_proto_t *proto = ir_call_proto(ctx, insn); 4496 int last_named_input = (proto && (proto->flags & IR_VARARG_FUNC)) ? proto->params_count + 2 : insn->inputs_count; 4497#endif 4498 4499 /* 1. move all register arguments that should be passed through stack 4500 * and collect arguments that should be passed through registers */ 4501 copies = ir_mem_malloc((n - 2) * sizeof(ir_copy)); 4502 for (j = 3; j <= n; j++) { 4503 arg = ir_insn_op(insn, j); 4504 src_reg = ir_get_alocated_reg(ctx, def, j); 4505 arg_insn = &ctx->ir_base[arg]; 4506 type = arg_insn->type; 4507#ifdef __APPLE__ 4508 if (j > last_named_input) { 4509 dst_reg = IR_REG_NONE; /* pass argument through stack */ 4510 } else 4511#endif 4512 if (IR_IS_TYPE_INT(type)) { 4513 if (int_param < int_reg_params_count) { 4514 dst_reg = int_reg_params[int_param]; 4515 } else { 4516 dst_reg = IR_REG_NONE; /* pass argument through stack */ 4517 } 4518 int_param++; 4519 } else { 4520 IR_ASSERT(IR_IS_TYPE_FP(type)); 4521 if (fp_param < fp_reg_params_count) { 4522 dst_reg = fp_reg_params[fp_param]; 4523 } else { 4524 dst_reg = IR_REG_NONE; /* pass argument through stack */ 4525 } 4526 fp_param++; 4527 } 4528 if (dst_reg != IR_REG_NONE) { 4529 if (IR_IS_CONST_REF(arg) || src_reg == IR_REG_NONE) { 4530 /* delay CONST->REG and MEM->REG moves to third pass */ 4531 do_pass3 = 1; 4532 } else { 4533 IR_ASSERT(src_reg != IR_REG_NONE); 4534 if (IR_REG_SPILLED(src_reg)) { 4535 src_reg = IR_REG_NUM(src_reg); 4536 ir_emit_load(ctx, type, src_reg, arg); 4537 } 4538 if (src_reg != dst_reg) { 4539 /* delay REG->REG moves to second pass */ 4540 copies[count].type = type; 4541 copies[count].from = src_reg; 4542 copies[count].to = dst_reg; 4543 count++; 4544 } 4545 } 4546 } else { 4547 /* Pass register arguments to stack (REG->MEM moves) */ 4548 if (!IR_IS_CONST_REF(arg) && src_reg != IR_REG_NONE && !IR_REG_SPILLED(src_reg)) { 4549 ir_emit_store_mem(ctx, type, IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset), src_reg); 4550 } else { 4551 do_pass3 = 1; 4552 } 4553 stack_offset += IR_MAX(sizeof(void*), ir_type_size[type]); 4554 } 4555 } 4556 4557 /* 2. move all arguments that should be passed from one register to another (REG->REG movs) */ 4558 if (count) { 4559 ir_parallel_copy(ctx, copies, count, tmp_reg, tmp_fp_reg); 4560 } 4561 ir_mem_free(copies); 4562 4563 /* 3. move the remaining memory and immediate values */ 4564 if (do_pass3) { 4565 stack_offset = 0; 4566 int_param = 0; 4567 fp_param = 0; 4568 for (j = 3; j <= n; j++) { 4569 arg = ir_insn_op(insn, j); 4570 src_reg = ir_get_alocated_reg(ctx, def, j); 4571 arg_insn = &ctx->ir_base[arg]; 4572 type = arg_insn->type; 4573#ifdef __APPLE__ 4574 if (j > last_named_input) { 4575 dst_reg = IR_REG_NONE; /* pass argument through stack */ 4576 } else 4577#endif 4578 if (IR_IS_TYPE_INT(type)) { 4579 if (int_param < int_reg_params_count) { 4580 dst_reg = int_reg_params[int_param]; 4581 } else { 4582 dst_reg = IR_REG_NONE; /* argument already passed through stack */ 4583 } 4584 int_param++; 4585 } else { 4586 IR_ASSERT(IR_IS_TYPE_FP(type)); 4587 if (fp_param < fp_reg_params_count) { 4588 dst_reg = fp_reg_params[fp_param]; 4589 } else { 4590 dst_reg = IR_REG_NONE; /* argument already passed through stack */ 4591 } 4592 fp_param++; 4593 } 4594 if (dst_reg != IR_REG_NONE) { 4595 if (IR_IS_CONST_REF(arg) || src_reg == IR_REG_NONE) { 4596 if (IR_IS_CONST_REF(arg) && IR_IS_TYPE_INT(type)) { 4597 if (ir_type_size[type] == 1) { 4598 type = IR_ADDR; 4599 } 4600 } 4601 ir_emit_load(ctx, type, dst_reg, arg); 4602 } 4603 } else { 4604 if (IR_IS_TYPE_INT(type)) { 4605 if (IR_IS_CONST_REF(arg) || src_reg == IR_REG_NONE) { 4606 IR_ASSERT(tmp_reg != IR_REG_NONE); 4607 ir_emit_load(ctx, type, tmp_reg, arg); 4608 if (IR_IS_CONST_REF(arg)) { 4609 type = IR_ADDR; //TODO: ??? 4610 } 4611 ir_emit_store_mem_int(ctx, type, IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset), tmp_reg); 4612 } else if (IR_REG_SPILLED(src_reg)) { 4613 src_reg = IR_REG_NUM(src_reg); 4614 ir_emit_load(ctx, type, src_reg, arg); 4615 ir_emit_store_mem_int(ctx, type, IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset), src_reg); 4616 } 4617 } else { 4618 if (IR_IS_CONST_REF(arg)) { 4619 ir_emit_load(ctx, type, tmp_fp_reg, arg); 4620 ir_emit_store_mem_fp(ctx, IR_DOUBLE, IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset), tmp_fp_reg); 4621 } else if (src_reg == IR_REG_NONE) { 4622 IR_ASSERT(tmp_fp_reg != IR_REG_NONE); 4623 ir_emit_load(ctx, type, tmp_fp_reg, arg); 4624 ir_emit_store_mem_fp(ctx, IR_DOUBLE, IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset), tmp_fp_reg); 4625 } else if (IR_REG_SPILLED(src_reg)) { 4626 src_reg = IR_REG_NUM(src_reg); 4627 ir_emit_load(ctx, type, src_reg, arg); 4628 ir_emit_store_mem_fp(ctx, type, IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset), src_reg); 4629 } 4630 } 4631 stack_offset += IR_MAX(sizeof(void*), ir_type_size[type]); 4632 } 4633 } 4634 } 4635 return used_stack; 4636} 4637 4638static void ir_emit_call_ex(ir_ctx *ctx, ir_ref def, ir_insn *insn, int32_t used_stack) 4639{ 4640 ir_backend_data *data = ctx->data; 4641 dasm_State **Dst = &data->dasm_state; 4642 ir_reg def_reg; 4643 4644 if (IR_IS_CONST_REF(insn->op2)) { 4645 void *addr = ir_call_addr(ctx, insn, &ctx->ir_base[insn->op2]); 4646 4647 if (aarch64_may_use_b(ctx->code_buffer, addr)) { 4648 | bl &addr 4649 } else { 4650 ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr); 4651 | blr Rx(IR_REG_INT_TMP) 4652 } 4653 } else { 4654 ir_reg op2_reg = ctx->regs[def][2]; 4655 4656 IR_ASSERT(op2_reg != IR_REG_NONE); 4657 if (IR_REG_SPILLED(op2_reg)) { 4658 op2_reg = IR_REG_NUM(op2_reg); 4659 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 4660 } 4661 | blr Rx(op2_reg) 4662 } 4663 4664 if (used_stack) { 4665 | add sp, sp, #used_stack 4666 ctx->call_stack_size -= used_stack; 4667 } 4668 4669 if (insn->type != IR_VOID) { 4670 if (IR_IS_TYPE_INT(insn->type)) { 4671 def_reg = IR_REG_NUM(ctx->regs[def][0]); 4672 if (def_reg != IR_REG_NONE) { 4673 if (def_reg != IR_REG_INT_RET1) { 4674 ir_emit_mov(ctx, insn->type, def_reg, IR_REG_INT_RET1); 4675 } 4676 if (IR_REG_SPILLED(ctx->regs[def][0])) { 4677 ir_emit_store(ctx, insn->type, def, def_reg); 4678 } 4679 } else if (ctx->use_lists[def].count > 1) { 4680 ir_emit_store(ctx, insn->type, def, IR_REG_INT_RET1); 4681 } 4682 } else { 4683 IR_ASSERT(IR_IS_TYPE_FP(insn->type)); 4684 def_reg = IR_REG_NUM(ctx->regs[def][0]); 4685 if (def_reg != IR_REG_NONE) { 4686 if (def_reg != IR_REG_FP_RET1) { 4687 ir_emit_fp_mov(ctx, insn->type, def_reg, IR_REG_FP_RET1); 4688 } 4689 if (IR_REG_SPILLED(ctx->regs[def][0])) { 4690 ir_emit_store(ctx, insn->type, def, def_reg); 4691 } 4692 } else if (ctx->use_lists[def].count > 1) { 4693 ir_emit_store(ctx, insn->type, def, IR_REG_FP_RET1); 4694 } 4695 } 4696 } 4697} 4698 4699static void ir_emit_call(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4700{ 4701 int32_t used_stack = ir_emit_arguments(ctx, def, insn, ctx->regs[def][1]); 4702 ir_emit_call_ex(ctx, def, insn, used_stack); 4703} 4704 4705static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4706{ 4707 ir_backend_data *data = ctx->data; 4708 dasm_State **Dst = &data->dasm_state; 4709 int32_t used_stack = ir_emit_arguments(ctx, def, insn, ctx->regs[def][1]); 4710 4711 if (used_stack != 0) { 4712 ir_emit_call_ex(ctx, def, insn, used_stack); 4713 ir_emit_return_void(ctx); 4714 return; 4715 } 4716 4717 ir_emit_epilogue(ctx); 4718 4719 if (IR_IS_CONST_REF(insn->op2)) { 4720 void *addr = ir_call_addr(ctx, insn, &ctx->ir_base[insn->op2]); 4721 4722 if (aarch64_may_use_b(ctx->code_buffer, addr)) { 4723 | b &addr 4724 } else { 4725 ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr); 4726 | br Rx(IR_REG_INT_TMP) 4727 } 4728 } else { 4729 ir_reg op2_reg = ctx->regs[def][2]; 4730 4731 IR_ASSERT(op2_reg != IR_REG_NONE); 4732 if (IR_REG_SPILLED(op2_reg)) { 4733 op2_reg = IR_REG_NUM(op2_reg); 4734 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 4735 } 4736 | br Rx(op2_reg) 4737 } 4738} 4739 4740static void ir_emit_ijmp(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4741{ 4742 ir_backend_data *data = ctx->data; 4743 dasm_State **Dst = &data->dasm_state; 4744 ir_reg op2_reg = ctx->regs[def][2]; 4745 4746 if (op2_reg != IR_REG_NONE) { 4747 if (IR_REG_SPILLED(op2_reg)) { 4748 op2_reg = IR_REG_NUM(op2_reg); 4749 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 4750 } 4751 | br Rx(op2_reg) 4752 } else if (IR_IS_CONST_REF(insn->op2)) { 4753 void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]); 4754 4755 if (aarch64_may_use_b(ctx->code_buffer, addr)) { 4756 | b &addr 4757 } else { 4758 ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr); 4759 | br Rx(IR_REG_INT_TMP) 4760 } 4761 } else { 4762 IR_ASSERT(0); 4763 } 4764} 4765 4766static void ir_emit_guard(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4767{ 4768 ir_backend_data *data = ctx->data; 4769 dasm_State **Dst = &data->dasm_state; 4770 ir_reg op2_reg = ctx->regs[def][2]; 4771 ir_type type = ctx->ir_base[insn->op2].type; 4772 4773 IR_ASSERT(IR_IS_TYPE_INT(type)); 4774 if (IR_IS_CONST_REF(insn->op2)) { 4775 bool is_true = ir_ref_is_true(ctx, insn->op2); 4776 4777 if ((insn->op == IR_GUARD && !is_true) || (insn->op == IR_GUARD_NOT && is_true)) { 4778 if (IR_IS_CONST_REF(insn->op3)) { 4779 void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); 4780 4781 if (aarch64_may_use_b(ctx->code_buffer, addr)) { 4782 | b &addr 4783 } else { 4784 ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr); 4785 | br Rx(IR_REG_INT_TMP) 4786 } 4787 } else { 4788 IR_ASSERT(0); 4789 } 4790 } 4791 return; 4792 } 4793 4794 IR_ASSERT(op2_reg != IR_REG_NONE); 4795 if (IR_REG_SPILLED(op2_reg)) { 4796 op2_reg = IR_REG_NUM(op2_reg); 4797 ir_emit_load(ctx, type, op2_reg, insn->op2); 4798 } 4799 4800 if (IR_IS_CONST_REF(insn->op3)) { 4801 void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); 4802 4803 if (insn->op == IR_GUARD) { 4804 if (ir_type_size[type] == 8) { 4805 | cbz Rx(op2_reg), &addr 4806 } else { 4807 | cbz Rw(op2_reg), &addr 4808 } 4809 } else { 4810 if (ir_type_size[type] == 8) { 4811 | cbnz Rx(op2_reg), &addr 4812 } else { 4813 | cbnz Rw(op2_reg), &addr 4814 } 4815 } 4816 } else { 4817 IR_ASSERT(0); 4818 } 4819} 4820 4821static void ir_emit_guard_jz(ir_ctx *ctx, uint8_t op, void *addr, ir_type type, ir_reg reg) 4822{ 4823 ir_backend_data *data = ctx->data; 4824 dasm_State **Dst = &data->dasm_state; 4825 4826 if (op == IR_EQ) { 4827 if (ir_type_size[type] == 8) { 4828 | cbnz Rx(reg), &addr 4829 } else { 4830 | cbnz Rw(reg), &addr 4831 } 4832 } else { 4833 IR_ASSERT(op == IR_NE); 4834 if (ir_type_size[type] == 8) { 4835 | cbz Rx(reg), &addr 4836 } else { 4837 | cbz Rw(reg), &addr 4838 } 4839 } 4840} 4841 4842static void ir_emit_guard_jcc(ir_ctx *ctx, uint8_t op, void *addr, bool int_cmp) 4843{ 4844 ir_backend_data *data = ctx->data; 4845 dasm_State **Dst = &data->dasm_state; 4846 4847 if (int_cmp) { 4848 switch (op) { 4849 default: 4850 IR_ASSERT(0 && "NIY binary op"); 4851 case IR_EQ: 4852 | beq &addr 4853 break; 4854 case IR_NE: 4855 | bne &addr 4856 break; 4857 case IR_LT: 4858 | blt &addr 4859 break; 4860 case IR_GE: 4861 | bge &addr 4862 break; 4863 case IR_LE: 4864 | ble &addr 4865 break; 4866 case IR_GT: 4867 | bgt &addr 4868 break; 4869 case IR_ULT: 4870 | blo &addr 4871 break; 4872 case IR_UGE: 4873 | bhs &addr 4874 break; 4875 case IR_ULE: 4876 | bls &addr 4877 break; 4878 case IR_UGT: 4879 | bhi &addr 4880 break; 4881 } 4882 } else { 4883 switch (op) { 4884 default: 4885 IR_ASSERT(0 && "NIY binary op"); 4886 case IR_EQ: 4887 | beq &addr 4888 break; 4889 case IR_NE: 4890 | bne &addr 4891 break; 4892 case IR_LT: 4893 | bmi &addr 4894 break; 4895 case IR_GE: 4896 | bge &addr 4897 break; 4898 case IR_LE: 4899 | bls &addr 4900 break; 4901 case IR_GT: 4902 | bgt &addr 4903 break; 4904// case IR_ULT: fprintf(stderr, "\tjb .LL%d\n", true_block); break; 4905// case IR_UGE: fprintf(stderr, "\tjae .LL%d\n", true_block); break; 4906// case IR_ULE: fprintf(stderr, "\tjbe .LL%d\n", true_block); break; 4907// case IR_UGT: fprintf(stderr, "\tja .LL%d\n", true_block); break; 4908 } 4909 } 4910} 4911 4912static void ir_emit_guard_cmp_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) 4913{ 4914 ir_backend_data *data = ctx->data; 4915 dasm_State **Dst = &data->dasm_state; 4916 ir_insn *cmp_insn = &ctx->ir_base[insn->op2]; 4917 ir_op op = cmp_insn->op; 4918 ir_type type = ctx->ir_base[cmp_insn->op1].type; 4919 ir_ref op1 = cmp_insn->op1; 4920 ir_ref op2 = cmp_insn->op2; 4921 ir_reg op1_reg = ctx->regs[insn->op2][1]; 4922 ir_reg op2_reg = ctx->regs[insn->op2][2]; 4923 void *addr; 4924 4925 if (op1_reg != IR_REG_NONE && (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1))) { 4926 op1_reg = IR_REG_NUM(op1_reg); 4927 ir_emit_load(ctx, type, op1_reg, op1); 4928 } 4929 if (op2_reg != IR_REG_NONE && (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2))) { 4930 op2_reg = IR_REG_NUM(op2_reg); 4931 if (op1 != op2) { 4932 ir_emit_load(ctx, type, op2_reg, op2); 4933 } 4934 } 4935 4936 addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); 4937 4938 if (IR_IS_CONST_REF(op2) 4939 && !IR_IS_SYM_CONST(ctx->ir_base[op2].op) 4940 && ctx->ir_base[op2].val.u64 == 0) { 4941 if (op == IR_ULT) { 4942 /* always false */ 4943 if (aarch64_may_use_b(ctx->code_buffer, addr)) { 4944 | b &addr 4945 } else { 4946 ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr); 4947 | br Rx(IR_REG_INT_TMP) 4948 } 4949 return; 4950 } else if (op == IR_UGE) { 4951 /* always true */ 4952 return; 4953 } else if (op == IR_ULE) { 4954 op = IR_EQ; 4955 } else if (op == IR_UGT) { 4956 op = IR_NE; 4957 } 4958 if (op1_reg != IR_REG_NONE && (op == IR_EQ || op == IR_NE)) { 4959 if (insn->op == IR_GUARD_NOT) { 4960 op ^= 1; // reverse 4961 } 4962 ir_emit_guard_jz(ctx, op, addr, type, op1_reg); 4963 return; 4964 } 4965 } 4966 ir_emit_cmp_int_common(ctx, type, op1_reg, op1, op2_reg, op2); 4967 4968 if (insn->op == IR_GUARD) { 4969 op ^= 1; // reverse 4970 } 4971 4972 ir_emit_guard_jcc(ctx, op, addr, 1); 4973} 4974 4975static void ir_emit_guard_cmp_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) 4976{ 4977 ir_op op = ir_emit_cmp_fp_common(ctx, insn->op2, &ctx->ir_base[insn->op2]); 4978 void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); 4979 4980 if (insn->op == IR_GUARD) { 4981 op ^= 1; // reverse 4982 } 4983 ir_emit_guard_jcc(ctx, op, addr, 0); 4984} 4985 4986static void ir_emit_guard_overflow(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4987{ 4988 ir_backend_data *data = ctx->data; 4989 dasm_State **Dst = &data->dasm_state; 4990 ir_insn *overflow_insn = &ctx->ir_base[insn->op2]; 4991 ir_insn *math_insn = &ctx->ir_base[overflow_insn->op1]; 4992 ir_type type = math_insn->type; 4993 void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); 4994 4995 IR_ASSERT(IR_IS_TYPE_INT(type)); 4996 if (math_insn->op == IR_MUL_OV) { 4997 if (insn->op == IR_GUARD) { 4998 | beq &addr 4999 } else { 5000 | bne &addr 5001 } 5002 } else if (IR_IS_TYPE_SIGNED(type)) { 5003 if (insn->op == IR_GUARD) { 5004 | bvc &addr 5005 } else { 5006 | bvs &addr 5007 } 5008 } else { 5009 if (insn->op == IR_GUARD) { 5010 | bcc &addr 5011 } else { 5012 | bcs &addr 5013 } 5014 } 5015} 5016 5017static void ir_emit_tls(ir_ctx *ctx, ir_ref def, ir_insn *insn) 5018{ 5019 ir_backend_data *data = ctx->data; 5020 dasm_State **Dst = &data->dasm_state; 5021 uint32_t code; 5022 ir_reg reg = IR_REG_NUM(ctx->regs[def][0]); 5023 5024 if (ctx->use_lists[def].count == 1) { 5025 /* dead load */ 5026 return; 5027 } 5028 5029||#ifdef __APPLE__ 5030|| code = 0xd53bd060 | reg; // TODO: hard-coded: mrs reg, tpidrro_el0 5031| .long code 5032| and Rx(reg), Rx(reg), #0xfffffffffffffff8 5033|//??? MEM_ACCESS_64_WITH_UOFFSET_64 ldr, Rx(reg), Rx(reg), #insn->op2, TMP1 5034|//??? MEM_ACCESS_64_WITH_UOFFSET_64 ldr, Rx(reg), Rx(reg), #insn->op3, TMP1 5035||#else 5036|| code = 0xd53bd040 | reg; // TODO: hard-coded: mrs reg, tpidr_el0 5037| .long code 5038||//??? IR_ASSERT(insn->op2 <= LDR_STR_PIMM64); 5039| ldr Rx(reg), [Rx(reg), #insn->op2] 5040||#endif 5041 if (IR_REG_SPILLED(ctx->regs[def][0])) { 5042 ir_emit_store(ctx, IR_ADDR, def, reg); 5043 } 5044} 5045 5046static void ir_emit_exitcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) 5047{ 5048 ir_backend_data *data = ctx->data; 5049 dasm_State **Dst = &data->dasm_state; 5050 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 5051 5052 IR_ASSERT(def_reg != IR_REG_NONE); 5053 5054 | stp d30, d31, [sp, #-16]! 5055 | stp d28, d29, [sp, #-16]! 5056 | stp d26, d27, [sp, #-16]! 5057 | stp d24, d25, [sp, #-16]! 5058 | stp d22, d23, [sp, #-16]! 5059 | stp d20, d21, [sp, #-16]! 5060 | stp d18, d19, [sp, #-16]! 5061 | stp d16, d17, [sp, #-16]! 5062 | stp d14, d15, [sp, #-16]! 5063 | stp d12, d13, [sp, #-16]! 5064 | stp d10, d11, [sp, #-16]! 5065 | stp d8, d9, [sp, #-16]! 5066 | stp d6, d7, [sp, #-16]! 5067 | stp d4, d5, [sp, #-16]! 5068 | stp d2, d3, [sp, #-16]! 5069 | stp d0, d1, [sp, #-16]! 5070 5071 | str x30, [sp, #-16]! 5072 | stp x28, x29, [sp, #-16]! 5073 | stp x26, x27, [sp, #-16]! 5074 | stp x24, x25, [sp, #-16]! 5075 | stp x22, x23, [sp, #-16]! 5076 | stp x20, x21, [sp, #-16]! 5077 | stp x18, x19, [sp, #-16]! 5078 | stp x16, x17, [sp, #-16]! 5079 | stp x14, x15, [sp, #-16]! 5080 | stp x12, x13, [sp, #-16]! 5081 | stp x10, x11, [sp, #-16]! 5082 | stp x8, x9, [sp, #-16]! 5083 | stp x6, x7, [sp, #-16]! 5084 | stp x4, x5, [sp, #-16]! 5085 | stp x2, x3, [sp, #-16]! 5086 | stp x0, x1, [sp, #-16]! 5087 5088 | mov Rx(IR_REG_INT_ARG2), sp 5089 | add Rx(IR_REG_INT_ARG1), Rx(IR_REG_INT_ARG2), #(32*8+32*8) 5090 | str Rx(IR_REG_INT_ARG1), [sp, #(31*8)] 5091 | mov Rx(IR_REG_INT_ARG1), Rx(IR_REG_INT_TMP) 5092 5093 if (IR_IS_CONST_REF(insn->op2)) { 5094 void *addr = ir_call_addr(ctx, insn, &ctx->ir_base[insn->op2]); 5095 5096 if (aarch64_may_use_b(ctx->code_buffer, addr)) { 5097 | bl &addr 5098 } else { 5099 ir_emit_load_imm_int(ctx, IR_ADDR, IR_REG_INT_TMP, (intptr_t)addr); 5100 | blr Rx(IR_REG_INT_TMP) 5101 } 5102 } else { 5103 IR_ASSERT(0); 5104 } 5105 5106 | add sp, sp, #(32*8+32*8) 5107 5108 if (def_reg != IR_REG_INT_RET1) { 5109 ir_emit_mov(ctx, insn->type, def_reg, IR_REG_INT_RET1); 5110 } 5111 if (IR_REG_SPILLED(ctx->regs[def][0])) { 5112 ir_emit_store(ctx, insn->type, def, def_reg); 5113 } 5114} 5115 5116static void ir_emit_param_move(ir_ctx *ctx, uint8_t type, ir_reg from_reg, ir_reg to_reg, ir_ref to, int32_t offset) 5117{ 5118 ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 5119 5120 IR_ASSERT(from_reg != IR_REG_NONE || to_reg != IR_REG_NONE); 5121 5122 if (IR_IS_TYPE_INT(type)) { 5123 if (from_reg != IR_REG_NONE) { 5124 if (to_reg != IR_REG_NONE) { 5125 ir_emit_mov(ctx, type, to_reg, from_reg); 5126 } else { 5127 ir_emit_store(ctx, type, to, from_reg); 5128 } 5129 } else { 5130 ir_emit_load_mem_int(ctx, type, to_reg, IR_MEM_BO(fp, offset)); 5131 } 5132 } else { 5133 if (from_reg != IR_REG_NONE) { 5134 if (to_reg != IR_REG_NONE) { 5135 ir_emit_fp_mov(ctx, type, to_reg, from_reg); 5136 } else { 5137 ir_emit_store(ctx, type, to, from_reg); 5138 } 5139 } else { 5140 ir_emit_load_mem_fp(ctx, type, to_reg, IR_MEM_BO(fp, offset)); 5141 } 5142 } 5143} 5144 5145static void ir_emit_load_params(ir_ctx *ctx) 5146{ 5147 ir_use_list *use_list = &ctx->use_lists[1]; 5148 ir_insn *insn; 5149 ir_ref i, n, *p, use; 5150 int int_param_num = 0; 5151 int fp_param_num = 0; 5152 ir_reg src_reg; 5153 ir_reg dst_reg; 5154 // TODO: Calling convention specific 5155 int int_reg_params_count = IR_REG_INT_ARGS; 5156 int fp_reg_params_count = IR_REG_FP_ARGS; 5157 const int8_t *int_reg_params = _ir_int_reg_params; 5158 const int8_t *fp_reg_params = _ir_fp_reg_params; 5159 int32_t stack_offset = 0; 5160 5161 if (ctx->flags & IR_USE_FRAME_POINTER) { 5162 stack_offset = sizeof(void*) * 2; /* skip old frame pointer and return address */ 5163 } else { 5164 stack_offset = ctx->stack_frame_size + ctx->call_stack_size; 5165 } 5166 n = use_list->count; 5167 for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { 5168 use = *p; 5169 insn = &ctx->ir_base[use]; 5170 if (insn->op == IR_PARAM) { 5171 if (IR_IS_TYPE_INT(insn->type)) { 5172 if (int_param_num < int_reg_params_count) { 5173 src_reg = int_reg_params[int_param_num]; 5174 } else { 5175 src_reg = IR_REG_NONE; 5176 } 5177 int_param_num++; 5178 } else { 5179 if (fp_param_num < fp_reg_params_count) { 5180 src_reg = fp_reg_params[fp_param_num]; 5181 } else { 5182 src_reg = IR_REG_NONE; 5183 } 5184 fp_param_num++; 5185 } 5186 if (ctx->vregs[use]) { 5187 dst_reg = IR_REG_NUM(ctx->regs[use][0]); 5188 IR_ASSERT(src_reg != IR_REG_NONE || dst_reg != IR_REG_NONE || 5189 stack_offset == ctx->live_intervals[ctx->vregs[use]]->stack_spill_pos + 5190 ((ctx->flags & IR_USE_FRAME_POINTER) ? -ctx->stack_frame_size : ctx->call_stack_size)); 5191 if (src_reg != dst_reg) { 5192 ir_emit_param_move(ctx, insn->type, src_reg, dst_reg, use, stack_offset); 5193 } 5194 if (dst_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[use][0])) { 5195 ir_emit_store(ctx, insn->type, use, dst_reg); 5196 } 5197 } 5198 if (src_reg == IR_REG_NONE) { 5199 if (sizeof(void*) == 8) { 5200 stack_offset += sizeof(void*); 5201 } else { 5202 stack_offset += IR_MAX(sizeof(void*), ir_type_size[insn->type]); 5203 } 5204 } 5205 } 5206 } 5207} 5208 5209static ir_reg ir_get_free_reg(ir_type type, ir_regset available) 5210{ 5211 if (IR_IS_TYPE_INT(type)) { 5212 available = IR_REGSET_INTERSECTION(available, IR_REGSET_GP); 5213 } else { 5214 IR_ASSERT(IR_IS_TYPE_FP(type)); 5215 available = IR_REGSET_INTERSECTION(available, IR_REGSET_FP); 5216 } 5217 IR_ASSERT(!IR_REGSET_IS_EMPTY(available)); 5218 return IR_REGSET_FIRST(available); 5219} 5220 5221static int ir_fix_dessa_tmps(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to) 5222{ 5223 ir_backend_data *data = ctx->data; 5224 ir_ref ref = ctx->cfg_blocks[data->dessa_from_block].end; 5225 5226 if (to == 0) { 5227 if (IR_IS_TYPE_INT(type)) { 5228 if (ctx->regs[ref][0] == IR_REG_NONE) { 5229 ctx->regs[ref][0] = IR_REG_X0; 5230 } 5231 } else { 5232 IR_ASSERT(IR_IS_TYPE_FP(type)); 5233 if (ctx->regs[ref][1] == IR_REG_NONE) { 5234 ctx->regs[ref][1] = IR_REG_V0; 5235 } 5236 } 5237 } else if (from != 0) { 5238 if (IR_IS_TYPE_INT(type)) { 5239 if (ctx->regs[ref][0] == IR_REG_NONE) { 5240 ctx->regs[ref][0] = IR_REG_X0; 5241 } 5242 } else { 5243 IR_ASSERT(IR_IS_TYPE_FP(type)); 5244 if (ctx->regs[ref][1] == IR_REG_NONE) { 5245 ctx->regs[ref][1] = IR_REG_V0; 5246 } 5247 } 5248 } 5249 return 1; 5250} 5251 5252static void ir_fix_param_spills(ir_ctx *ctx) 5253{ 5254 ir_use_list *use_list = &ctx->use_lists[1]; 5255 ir_insn *insn; 5256 ir_ref i, n, *p, use; 5257 int int_param_num = 0; 5258 int fp_param_num = 0; 5259 ir_reg src_reg; 5260 // TODO: Calling convention specific 5261 int int_reg_params_count = IR_REG_INT_ARGS; 5262 int fp_reg_params_count = IR_REG_FP_ARGS; 5263 const int8_t *int_reg_params = _ir_int_reg_params; 5264 const int8_t *fp_reg_params = _ir_fp_reg_params; 5265 int32_t stack_offset = 0; 5266 int32_t param_stack_size = 0; 5267 5268 if (ctx->flags & IR_USE_FRAME_POINTER) { 5269 /* skip old frame pointer and return address */ 5270 stack_offset = sizeof(void*) * 2 + (ctx->stack_frame_size - ctx->stack_frame_alignment); 5271 } else { 5272 stack_offset = ctx->stack_frame_size; 5273 } 5274 n = use_list->count; 5275 for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { 5276 use = *p; 5277 insn = &ctx->ir_base[use]; 5278 if (insn->op == IR_PARAM) { 5279 if (IR_IS_TYPE_INT(insn->type)) { 5280 if (int_param_num < int_reg_params_count) { 5281 src_reg = int_reg_params[int_param_num]; 5282 } else { 5283 src_reg = IR_REG_NONE; 5284 } 5285 int_param_num++; 5286 } else { 5287 if (fp_param_num < fp_reg_params_count) { 5288 src_reg = fp_reg_params[fp_param_num]; 5289 } else { 5290 src_reg = IR_REG_NONE; 5291 } 5292 fp_param_num++; 5293 } 5294 if (src_reg == IR_REG_NONE) { 5295 if (ctx->vregs[use]) { 5296 ir_live_interval *ival = ctx->live_intervals[ctx->vregs[use]]; 5297 if ((ival->flags & IR_LIVE_INTERVAL_MEM_PARAM) 5298 && ival->stack_spill_pos == -1 5299 && (ival->next || ival->reg == IR_REG_NONE)) { 5300 ival->stack_spill_pos = stack_offset; 5301 } 5302 } 5303 if (sizeof(void*) == 8) { 5304 stack_offset += sizeof(void*); 5305 param_stack_size += sizeof(void*); 5306 } else { 5307 stack_offset += IR_MAX(sizeof(void*), ir_type_size[insn->type]); 5308 param_stack_size += IR_MAX(sizeof(void*), ir_type_size[insn->type]); 5309 } 5310 } 5311 } 5312 } 5313 5314 ctx->gp_reg_params = IR_MIN(int_param_num, int_reg_params_count); 5315 ctx->fp_reg_params = IR_MIN(fp_param_num, fp_reg_params_count); 5316 ctx->param_stack_size = param_stack_size; 5317} 5318 5319static void ir_allocate_unique_spill_slots(ir_ctx *ctx) 5320{ 5321 uint32_t b; 5322 ir_block *bb; 5323 ir_insn *insn; 5324 ir_ref i, n, j, *p; 5325 uint32_t *rule, insn_flags; 5326 ir_backend_data *data = ctx->data; 5327 ir_regset available = 0; 5328 ir_target_constraints constraints; 5329 uint32_t def_flags; 5330 ir_reg reg; 5331 5332 ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count); 5333 memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count); 5334 5335 /* vregs + tmp + fixed + SRATCH + ALL */ 5336 ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 2, sizeof(ir_live_interval*)); 5337 5338 if (!ctx->arena) { 5339 ctx->arena = ir_arena_create(16 * 1024); 5340 } 5341 5342 for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { 5343 IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); 5344 for (i = bb->start, insn = ctx->ir_base + i, rule = ctx->rules + i; i <= bb->end;) { 5345 switch (ctx->rules ? *rule : insn->op) { 5346 case IR_START: 5347 case IR_BEGIN: 5348 case IR_END: 5349 case IR_IF_TRUE: 5350 case IR_IF_FALSE: 5351 case IR_CASE_VAL: 5352 case IR_CASE_DEFAULT: 5353 case IR_MERGE: 5354 case IR_LOOP_BEGIN: 5355 case IR_LOOP_END: 5356 break; 5357 default: 5358 def_flags = ir_get_target_constraints(ctx, i, &constraints); 5359 if (ctx->rules 5360 && *rule != IR_CMP_AND_BRANCH_INT 5361 && *rule != IR_CMP_AND_BRANCH_FP 5362 && *rule != IR_GUARD_CMP_INT 5363 && *rule != IR_GUARD_CMP_FP) { 5364 available = IR_REGSET_SCRATCH; 5365 } 5366 if (ctx->vregs[i]) { 5367 reg = constraints.def_reg; 5368 if (reg != IR_REG_NONE && IR_REGSET_IN(available, reg)) { 5369 IR_REGSET_EXCL(available, reg); 5370 ctx->regs[i][0] = reg | IR_REG_SPILL_STORE; 5371 } else if (def_flags & IR_USE_MUST_BE_IN_REG) { 5372 if (insn->op == IR_VLOAD 5373 && ctx->live_intervals[ctx->vregs[i]] 5374 && ctx->live_intervals[ctx->vregs[i]]->stack_spill_pos != -1) { 5375 /* pass */ 5376 } else if (insn->op != IR_PARAM) { 5377 reg = ir_get_free_reg(insn->type, available); 5378 IR_REGSET_EXCL(available, reg); 5379 ctx->regs[i][0] = reg | IR_REG_SPILL_STORE; 5380 } 5381 } 5382 if (!ctx->live_intervals[ctx->vregs[i]]) { 5383 ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); 5384 memset(ival, 0, sizeof(ir_live_interval)); 5385 ctx->live_intervals[ctx->vregs[i]] = ival; 5386 ival->type = insn->type; 5387 ival->reg = IR_REG_NONE; 5388 ival->vreg = ctx->vregs[i]; 5389 ival->stack_spill_pos = -1; 5390 if (insn->op == IR_PARAM && reg == IR_REG_NONE) { 5391 ival->flags |= IR_LIVE_INTERVAL_MEM_PARAM; 5392 } else { 5393 ival->stack_spill_pos = ir_allocate_spill_slot(ctx, ival->type, &data->ra_data); 5394 } 5395 } else if (insn->op == IR_PARAM) { 5396 IR_ASSERT(0 && "unexpected PARAM"); 5397 return; 5398 } 5399 } else if (insn->op == IR_VAR) { 5400 ir_use_list *use_list = &ctx->use_lists[i]; 5401 ir_ref n = use_list->count; 5402 5403 if (n > 0) { 5404 int32_t stack_spill_pos = insn->op3 = ir_allocate_spill_slot(ctx, insn->type, &data->ra_data); 5405 ir_ref i, *p, use; 5406 ir_insn *use_insn; 5407 5408 for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { 5409 use = *p; 5410 use_insn = &ctx->ir_base[use]; 5411 if (use_insn->op == IR_VLOAD) { 5412 if (ctx->vregs[use] 5413 && !ctx->live_intervals[ctx->vregs[use]]) { 5414 ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); 5415 memset(ival, 0, sizeof(ir_live_interval)); 5416 ctx->live_intervals[ctx->vregs[use]] = ival; 5417 ival->type = insn->type; 5418 ival->reg = IR_REG_NONE; 5419 ival->vreg = ctx->vregs[use]; 5420 ival->stack_spill_pos = stack_spill_pos; 5421 } 5422 } else if (use_insn->op == IR_VSTORE) { 5423 if (!IR_IS_CONST_REF(use_insn->op3) 5424 && ctx->vregs[use_insn->op3] 5425 && !ctx->live_intervals[ctx->vregs[use_insn->op3]]) { 5426 ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); 5427 memset(ival, 0, sizeof(ir_live_interval)); 5428 ctx->live_intervals[ctx->vregs[use_insn->op3]] = ival; 5429 ival->type = insn->type; 5430 ival->reg = IR_REG_NONE; 5431 ival->vreg = ctx->vregs[use_insn->op3]; 5432 ival->stack_spill_pos = stack_spill_pos; 5433 } 5434 } 5435 } 5436 } 5437 } 5438 5439 insn_flags = ir_op_flags[insn->op]; 5440 n = constraints.tmps_count; 5441 if (n) { 5442 do { 5443 n--; 5444 if (constraints.tmp_regs[n].type) { 5445 ir_reg reg = ir_get_free_reg(constraints.tmp_regs[n].type, available); 5446 IR_REGSET_EXCL(available, reg); 5447 ctx->regs[i][constraints.tmp_regs[n].num] = reg; 5448 } else if (constraints.tmp_regs[n].reg == IR_REG_SCRATCH) { 5449 available = IR_REGSET_DIFFERENCE(available, IR_REGSET_SCRATCH); 5450 } else { 5451 IR_REGSET_EXCL(available, constraints.tmp_regs[n].reg); 5452 } 5453 } while (n); 5454 } 5455 n = insn->inputs_count; 5456 for (j = 1, p = insn->ops + 1; j <= n; j++, p++) { 5457 ir_ref input = *p; 5458 if (IR_OPND_KIND(insn_flags, j) == IR_OPND_DATA && input > 0 && ctx->vregs[input]) { 5459 if ((def_flags & IR_DEF_REUSES_OP1_REG) && j == 1) { 5460 ir_reg reg = IR_REG_NUM(ctx->regs[i][0]); 5461 ctx->regs[i][1] = reg | IR_REG_SPILL_LOAD; 5462 } else { 5463 uint8_t use_flags = IR_USE_FLAGS(def_flags, j); 5464 ir_reg reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE; 5465 5466 if (reg != IR_REG_NONE && IR_REGSET_IN(available, reg)) { 5467 IR_REGSET_EXCL(available, reg); 5468 ctx->regs[i][j] = reg | IR_REG_SPILL_LOAD; 5469 } else if (j > 1 && input == insn->op1 && ctx->regs[i][1] != IR_REG_NONE) { 5470 ctx->regs[i][j] = ctx->regs[i][1]; 5471 } else if (use_flags & IR_USE_MUST_BE_IN_REG) { 5472 reg = ir_get_free_reg(ctx->ir_base[input].type, available); 5473 IR_REGSET_EXCL(available, reg); 5474 ctx->regs[i][j] = reg | IR_REG_SPILL_LOAD; 5475 } 5476 } 5477 } 5478 } 5479 break; 5480 } 5481 n = ir_insn_len(insn); 5482 i += n; 5483 insn += n; 5484 rule += n; 5485 } 5486 if (bb->flags & IR_BB_DESSA_MOVES) { 5487 data->dessa_from_block = b; 5488 ir_gen_dessa_moves(ctx, b, ir_fix_dessa_tmps); 5489 } 5490 } 5491 5492 ctx->used_preserved_regs = ctx->fixed_save_regset; 5493 ctx->flags |= IR_NO_STACK_COMBINE; 5494 ir_fix_stack_frame(ctx); 5495} 5496 5497static void ir_preallocate_call_stack(ir_ctx *ctx) 5498{ 5499 int call_stack_size, peak_call_stack_size = 0; 5500 ir_ref i, n; 5501 ir_insn *insn; 5502 5503 for (i = 1, insn = ctx->ir_base + 1; i < ctx->insns_count;) { 5504 if (insn->op == IR_CALL) { 5505 call_stack_size = ir_call_used_stack(ctx, insn); 5506 if (call_stack_size > peak_call_stack_size) { 5507 peak_call_stack_size = call_stack_size; 5508 } 5509 } 5510 n = ir_insn_len(insn); 5511 i += n; 5512 insn += n; 5513 } 5514 if (peak_call_stack_size) { 5515 ctx->call_stack_size = peak_call_stack_size; 5516 ctx->flags |= IR_PREALLOCATED_STACK; 5517 } 5518} 5519 5520void ir_fix_stack_frame(ir_ctx *ctx) 5521{ 5522 uint32_t additional_size = 0; 5523 5524 ctx->locals_area_size = ctx->stack_frame_size; 5525 5526 if (ctx->used_preserved_regs) { 5527 ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs; 5528 ir_reg reg; 5529 (void) reg; 5530 5531 IR_REGSET_FOREACH(used_preserved_regs, reg) { 5532 additional_size += sizeof(void*); 5533 } IR_REGSET_FOREACH_END(); 5534 } 5535 5536 if ((ctx->flags & IR_VARARG_FUNC) && (ctx->flags2 & IR_HAS_VA_START)) { 5537 if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) { 5538 additional_size += sizeof(void*) * IR_REG_INT_ARGS; 5539 } 5540 if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) { 5541 additional_size += 16 * IR_REG_FP_ARGS; 5542 } 5543 } 5544 5545 ctx->stack_frame_size = IR_ALIGNED_SIZE(ctx->stack_frame_size, sizeof(void*)); 5546 ctx->stack_frame_size += additional_size; 5547 ctx->stack_frame_alignment = 0; 5548 ctx->call_stack_size = 0; 5549 5550 if ((ctx->flags2 & IR_HAS_CALLS) && !(ctx->flags & IR_FUNCTION)) { 5551 while (IR_ALIGNED_SIZE(ctx->stack_frame_size, 16) != ctx->stack_frame_size) { 5552 ctx->stack_frame_size += sizeof(void*); 5553 ctx->stack_frame_alignment += sizeof(void*); 5554 } 5555 } else if (ctx->flags2 & IR_HAS_CALLS) { 5556 ctx->flags |= IR_USE_FRAME_POINTER; 5557 /* Stack must be 16 byte aligned */ 5558 if (!(ctx->flags & IR_FUNCTION)) { 5559 while (IR_ALIGNED_SIZE(ctx->stack_frame_size, 16) != ctx->stack_frame_size) { 5560 ctx->stack_frame_size += sizeof(void*); 5561 ctx->stack_frame_alignment += sizeof(void*); 5562 } 5563 } else if (ctx->flags & IR_USE_FRAME_POINTER) { 5564 while (IR_ALIGNED_SIZE(ctx->stack_frame_size + sizeof(void*) * 2, 16) != ctx->stack_frame_size + sizeof(void*) * 2) { 5565 ctx->stack_frame_size += sizeof(void*); 5566 ctx->stack_frame_alignment += sizeof(void*); 5567 } 5568 } else { 5569 if (!(ctx->flags & IR_NO_STACK_COMBINE)) { 5570 ir_preallocate_call_stack(ctx); 5571 } 5572 while (IR_ALIGNED_SIZE(ctx->stack_frame_size + ctx->call_stack_size, 16) != 5573 ctx->stack_frame_size + ctx->call_stack_size) { 5574 ctx->stack_frame_size += sizeof(void*); 5575 ctx->stack_frame_alignment += sizeof(void*); 5576 } 5577 } 5578 } 5579 5580 ir_fix_param_spills(ctx); 5581} 5582 5583static void* dasm_labels[ir_lb_MAX]; 5584 5585/* Veneers support (TODO: avid global variable usage) */ 5586static ir_ctx *ir_current_ctx; 5587 5588void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) 5589{ 5590 uint32_t b, n, target; 5591 ir_block *bb; 5592 ir_ref i; 5593 ir_insn *insn; 5594 uint32_t *rule; 5595 ir_backend_data data; 5596 dasm_State **Dst; 5597 int ret; 5598 void *entry; 5599 size_t size; 5600 5601 data.ra_data.unused_slot_4 = 0; 5602 data.ra_data.unused_slot_2 = 0; 5603 data.ra_data.unused_slot_1 = 0; 5604 data.ra_data.handled = NULL; 5605 data.rodata_label = 0; 5606 data.jmp_table_label = 0; 5607 ctx->data = &data; 5608 5609 if (!ctx->live_intervals) { 5610 ctx->stack_frame_size = 0; 5611 ctx->stack_frame_alignment = 0; 5612 ctx->call_stack_size = 0; 5613 ctx->used_preserved_regs = 0; 5614 ir_allocate_unique_spill_slots(ctx); 5615 } 5616 5617 if (ctx->fixed_stack_frame_size != -1) { 5618 if (ctx->fixed_stack_red_zone) { 5619 IR_ASSERT(ctx->fixed_stack_red_zone == ctx->fixed_stack_frame_size + ctx->fixed_call_stack_size); 5620 } 5621 if (ctx->stack_frame_size > ctx->fixed_stack_frame_size) { 5622 // TODO: report error to caller 5623#ifdef IR_DEBUG_MESSAGES 5624 fprintf(stderr, "IR Compilation Aborted: ctx->stack_frame_size > ctx->fixed_stack_frame_size at %s:%d\n", 5625 __FILE__, __LINE__); 5626#endif 5627 ctx->data = NULL; 5628 ctx->status = IR_ERROR_FIXED_STACK_FRAME_OVERFLOW; 5629 return NULL; 5630 } 5631 ctx->stack_frame_size = ctx->fixed_stack_frame_size; 5632 ctx->call_stack_size = ctx->fixed_call_stack_size; 5633 ctx->stack_frame_alignment = 0; 5634 } 5635 5636 Dst = &data.dasm_state; 5637 data.dasm_state = NULL; 5638 dasm_init(&data.dasm_state, DASM_MAXSECTION); 5639 dasm_setupglobal(&data.dasm_state, dasm_labels, ir_lb_MAX); 5640 dasm_setup(&data.dasm_state, dasm_actions); 5641 /* labels for each block + for each constant + rodata label + jmp_table label + for each entry + exit_table label */ 5642 dasm_growpc(&data.dasm_state, ctx->cfg_blocks_count + 1 + ctx->consts_count + 1 + 1 + 1 + ctx->entries_count + 1); 5643 data.emit_constants = ir_bitset_malloc(ctx->consts_count); 5644 5645 if (!(ctx->flags & IR_SKIP_PROLOGUE)) { 5646 ir_emit_prologue(ctx); 5647 } 5648 if (ctx->flags & IR_FUNCTION) { 5649 ir_emit_load_params(ctx); 5650 } 5651 5652 for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { 5653 IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); 5654 if ((bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) == IR_BB_EMPTY) { 5655 continue; 5656 } 5657 |=>b: 5658 5659 i = bb->start; 5660 insn = ctx->ir_base + i; 5661 if (bb->flags & IR_BB_ENTRY) { 5662 uint32_t label = ctx->cfg_blocks_count + ctx->consts_count + 4 + insn->op3; 5663 5664 |=>label: 5665 ir_emit_prologue(ctx); 5666 ctx->entries[insn->op3] = i; 5667 } 5668 5669 /* skip first instruction */ 5670 n = ir_insn_len(insn); 5671 i += n; 5672 insn += n; 5673 rule = ctx->rules + i; 5674 5675 while (i <= bb->end) { 5676 if (!((*rule) & (IR_FUSED|IR_SKIPPED))) 5677 switch (*rule) { 5678 case IR_VAR: 5679 case IR_PARAM: 5680 case IR_PI: 5681 case IR_PHI: 5682 case IR_SNAPSHOT: 5683 case IR_VA_END: 5684 break; 5685 case IR_MUL_PWR2: 5686 case IR_DIV_PWR2: 5687 case IR_MOD_PWR2: 5688 ir_emit_mul_div_mod_pwr2(ctx, i, insn); 5689 break; 5690 case IR_SDIV_PWR2: 5691 ir_emit_sdiv_pwr2(ctx, i, insn); 5692 break; 5693 case IR_SMOD_PWR2: 5694 ir_emit_smod_pwr2(ctx, i, insn); 5695 break; 5696 case IR_SHIFT: 5697 ir_emit_shift(ctx, i, insn); 5698 break; 5699 case IR_SHIFT_CONST: 5700 ir_emit_shift_const(ctx, i, insn); 5701 break; 5702 case IR_CTPOP: 5703 ir_emit_ctpop(ctx, i, insn); 5704 break; 5705 case IR_OP_INT: 5706 ir_emit_op_int(ctx, i, insn); 5707 break; 5708 case IR_OP_FP: 5709 ir_emit_op_fp(ctx, i, insn); 5710 break; 5711 case IR_BINOP_INT: 5712 ir_emit_binop_int(ctx, i, insn); 5713 break; 5714 case IR_BINOP_FP: 5715 ir_emit_binop_fp(ctx, i, insn); 5716 break; 5717 case IR_CMP_INT: 5718 ir_emit_cmp_int(ctx, i, insn); 5719 break; 5720 case IR_CMP_FP: 5721 ir_emit_cmp_fp(ctx, i, insn); 5722 break; 5723 case IR_SEXT: 5724 ir_emit_sext(ctx, i, insn); 5725 break; 5726 case IR_ZEXT: 5727 ir_emit_zext(ctx, i, insn); 5728 break; 5729 case IR_TRUNC: 5730 ir_emit_trunc(ctx, i, insn); 5731 break; 5732 case IR_BITCAST: 5733 case IR_PROTO: 5734 ir_emit_bitcast(ctx, i, insn); 5735 break; 5736 case IR_INT2FP: 5737 ir_emit_int2fp(ctx, i, insn); 5738 break; 5739 case IR_FP2INT: 5740 ir_emit_fp2int(ctx, i, insn); 5741 break; 5742 case IR_FP2FP: 5743 ir_emit_fp2fp(ctx, i, insn); 5744 break; 5745 case IR_COPY_INT: 5746 ir_emit_copy_int(ctx, i, insn); 5747 break; 5748 case IR_COPY_FP: 5749 ir_emit_copy_fp(ctx, i, insn); 5750 break; 5751 case IR_CMP_AND_BRANCH_INT: 5752 ir_emit_cmp_and_branch_int(ctx, b, i, insn); 5753 break; 5754 case IR_CMP_AND_BRANCH_FP: 5755 ir_emit_cmp_and_branch_fp(ctx, b, i, insn); 5756 break; 5757 case IR_GUARD_CMP_INT: 5758 ir_emit_guard_cmp_int(ctx, b, i, insn); 5759 break; 5760 case IR_GUARD_CMP_FP: 5761 ir_emit_guard_cmp_fp(ctx, b, i, insn); 5762 break; 5763 case IR_IF_INT: 5764 ir_emit_if_int(ctx, b, i, insn); 5765 break; 5766 case IR_COND: 5767 ir_emit_cond(ctx, i, insn); 5768 break; 5769 case IR_SWITCH: 5770 ir_emit_switch(ctx, b, i, insn); 5771 break; 5772 case IR_MIN_MAX_INT: 5773 ir_emit_min_max_int(ctx, i, insn); 5774 break; 5775 case IR_OVERFLOW: 5776 ir_emit_overflow(ctx, i, insn); 5777 break; 5778 case IR_OVERFLOW_AND_BRANCH: 5779 ir_emit_overflow_and_branch(ctx, b, i, insn); 5780 break; 5781 case IR_END: 5782 case IR_LOOP_END: 5783 if (bb->flags & IR_BB_OSR_ENTRY_LOADS) { 5784 ir_emit_osr_entry_loads(ctx, b, bb); 5785 } 5786 if (bb->flags & IR_BB_DESSA_MOVES) { 5787 ir_emit_dessa_moves(ctx, b, bb); 5788 } 5789 do { 5790 ir_ref succ = ctx->cfg_edges[bb->successors]; 5791 5792 if (UNEXPECTED(bb->successors_count == 2)) { 5793 if (ctx->cfg_blocks[succ].flags & IR_BB_ENTRY) { 5794 succ = ctx->cfg_edges[bb->successors + 1]; 5795 } else { 5796 IR_ASSERT(ctx->cfg_blocks[ctx->cfg_edges[bb->successors + 1]].flags & IR_BB_ENTRY); 5797 } 5798 } else { 5799 IR_ASSERT(bb->successors_count == 1); 5800 } 5801 target = ir_skip_empty_target_blocks(ctx, succ); 5802 if (b == ctx->cfg_blocks_count || target != ir_skip_empty_next_blocks(ctx, b + 1)) { 5803 | b =>target 5804 } 5805 } while (0); 5806 break; 5807 case IR_RETURN_VOID: 5808 ir_emit_return_void(ctx); 5809 break; 5810 case IR_RETURN_INT: 5811 ir_emit_return_int(ctx, i, insn); 5812 break; 5813 case IR_RETURN_FP: 5814 ir_emit_return_fp(ctx, i, insn); 5815 break; 5816 case IR_CALL: 5817 ir_emit_call(ctx, i, insn); 5818 break; 5819 case IR_TAILCALL: 5820 ir_emit_tailcall(ctx, i, insn); 5821 break; 5822 case IR_IJMP: 5823 ir_emit_ijmp(ctx, i, insn); 5824 break; 5825 case IR_REG_BINOP_INT: 5826 ir_emit_reg_binop_int(ctx, i, insn); 5827 break; 5828 case IR_VADDR: 5829 ir_emit_vaddr(ctx, i, insn); 5830 break; 5831 case IR_VLOAD: 5832 ir_emit_vload(ctx, i, insn); 5833 break; 5834 case IR_VSTORE: 5835 ir_emit_vstore(ctx, i, insn); 5836 break; 5837 case IR_RLOAD: 5838 ir_emit_rload(ctx, i, insn); 5839 break; 5840 case IR_RSTORE: 5841 ir_emit_rstore(ctx, i, insn); 5842 break; 5843 case IR_LOAD_INT: 5844 ir_emit_load_int(ctx, i, insn); 5845 break; 5846 case IR_LOAD_FP: 5847 ir_emit_load_fp(ctx, i, insn); 5848 break; 5849 case IR_STORE_INT: 5850 ir_emit_store_int(ctx, i, insn); 5851 break; 5852 case IR_STORE_FP: 5853 ir_emit_store_fp(ctx, i, insn); 5854 break; 5855 case IR_ALLOCA: 5856 ir_emit_alloca(ctx, i, insn); 5857 break; 5858 case IR_VA_START: 5859 ir_emit_va_start(ctx, i, insn); 5860 break; 5861 case IR_VA_COPY: 5862 ir_emit_va_copy(ctx, i, insn); 5863 break; 5864 case IR_VA_ARG: 5865 ir_emit_va_arg(ctx, i, insn); 5866 break; 5867 case IR_AFREE: 5868 ir_emit_afree(ctx, i, insn); 5869 break; 5870 case IR_FRAME_ADDR: 5871 ir_emit_frame_addr(ctx, i); 5872 break; 5873 case IR_EXITCALL: 5874 ir_emit_exitcall(ctx, i, insn); 5875 break; 5876 case IR_GUARD: 5877 case IR_GUARD_NOT: 5878 ir_emit_guard(ctx, i, insn); 5879 break; 5880 case IR_GUARD_OVERFLOW: 5881 ir_emit_guard_overflow(ctx, i, insn); 5882 break; 5883 case IR_TLS: 5884 ir_emit_tls(ctx, i, insn); 5885 break; 5886 case IR_TRAP: 5887 | brk 5888 break; 5889 default: 5890 IR_ASSERT(0 && "NIY rule/instruction"); 5891 ir_mem_free(data.emit_constants); 5892 dasm_free(&data.dasm_state); 5893 ctx->data = NULL; 5894 ctx->status = IR_ERROR_UNSUPPORTED_CODE_RULE; 5895 return NULL; 5896 } 5897 n = ir_insn_len(insn); 5898 i += n; 5899 insn += n; 5900 rule += n; 5901 } 5902 } 5903 5904 if (ctx->deoptimization_exits) { 5905 uint32_t exit_table_label = ctx->cfg_blocks_count + 1 + ctx->consts_count + 1 + 1 + 1 + ctx->entries_count; 5906 5907 |=>exit_table_label: 5908 for (i = 0; i < ctx->deoptimization_exits; i++) { 5909 const void *exit_addr = ctx->get_exit_addr(i); 5910 5911 if (!exit_addr) { 5912 ctx->data = NULL; 5913 return 0; 5914 } 5915 | b &exit_addr 5916 } 5917 } 5918 5919 if (data.rodata_label) { 5920 |.rodata 5921 } 5922 IR_BITSET_FOREACH(data.emit_constants, ir_bitset_len(ctx->consts_count), i) { 5923 insn = &ctx->ir_base[-i]; 5924 if (IR_IS_TYPE_FP(insn->type)) { 5925 int label = ctx->cfg_blocks_count + i; 5926 5927 if (!data.rodata_label) { 5928 data.rodata_label = ctx->cfg_blocks_count + ctx->consts_count + 2; 5929 5930 |.rodata 5931 |=>data.rodata_label: 5932 } 5933 if (insn->type == IR_DOUBLE) { 5934 |.align 8 5935 |=>label: 5936 |.long insn->val.u32, insn->val.u32_hi 5937 } else { 5938 IR_ASSERT(insn->type == IR_FLOAT); 5939 |.align 4 5940 |=>label: 5941 |.long insn->val.u32 5942 } 5943 } else if (insn->op == IR_STR) { 5944 int label = ctx->cfg_blocks_count + i; 5945 const char *str = ir_get_str(ctx, insn->val.str); 5946 int i = 0; 5947 5948 if (!data.rodata_label) { 5949 data.rodata_label = ctx->cfg_blocks_count + ctx->consts_count + 2; 5950 5951 |.rodata 5952 |=>data.rodata_label: 5953 } 5954 |.align 8 5955 |=>label: 5956 while (1) { 5957 char c; 5958 uint32_t w = 0; 5959 int j; 5960 5961 for (j = 0; j < 4; j++) { 5962 c = str[i]; 5963 if (!c) { 5964 break; 5965 } 5966 w |= c << (8 * j); 5967 i++; 5968 } 5969 | .long w 5970 if (!c) { 5971 break; 5972 } 5973 } 5974 5975 } else { 5976 IR_ASSERT(0); 5977 } 5978 } IR_BITSET_FOREACH_END(); 5979 if (data.rodata_label) { 5980 |.code 5981 } 5982 ir_mem_free(data.emit_constants); 5983 5984 if (ctx->status) { 5985 dasm_free(&data.dasm_state); 5986 ctx->data = NULL; 5987 return NULL; 5988 } 5989 5990 ret = dasm_link(&data.dasm_state, size_ptr); 5991 if (ret != DASM_S_OK) { 5992 IR_ASSERT(0); 5993 dasm_free(&data.dasm_state); 5994 ctx->data = NULL; 5995 ctx->status = IR_ERROR_LINK; 5996 return NULL; 5997 } 5998 size = *size_ptr; 5999 6000 if (ctx->code_buffer) { 6001 entry = ctx->code_buffer->pos; 6002 entry = (void*)IR_ALIGNED_SIZE(((size_t)(entry)), 16); 6003 if (size > (size_t)((char*)ctx->code_buffer->end - (char*)entry)) { 6004 ctx->data = NULL; 6005 ctx->status = IR_ERROR_CODE_MEM_OVERFLOW; 6006 return NULL; 6007 } 6008 ctx->code_buffer->pos = (char*)entry + size; 6009 } else { 6010 entry = ir_mem_mmap(size); 6011 if (!entry) { 6012 dasm_free(&data.dasm_state); 6013 ctx->data = NULL; 6014 ctx->status = IR_ERROR_CODE_MEM_OVERFLOW; 6015 return NULL; 6016 } 6017 ir_mem_unprotect(entry, size); 6018 } 6019 6020 if (ctx->deoptimization_exits) { 6021 uint32_t exit_table_label = ctx->cfg_blocks_count + 1 + ctx->consts_count + 1 + 1 + 1 + ctx->entries_count; 6022 6023 ctx->deoptimization_exits_base = (const void*)((char*)entry + dasm_getpclabel(&data.dasm_state, exit_table_label)); 6024 } 6025 6026 ir_current_ctx = ctx; 6027 ret = dasm_encode(&data.dasm_state, entry); 6028 if (ret != DASM_S_OK) { 6029 IR_ASSERT(0); 6030 dasm_free(&data.dasm_state); 6031 if (ctx->code_buffer) { 6032 if (ctx->code_buffer->pos == (char*)entry + size) { 6033 /* rollback */ 6034 ctx->code_buffer->pos = (char*)entry - size; 6035 } 6036 } else { 6037 ir_mem_unmap(entry, size); 6038 } 6039 ctx->data = NULL; 6040 ctx->status = IR_ERROR_ENCODE; 6041 return NULL; 6042 } 6043 6044 if (data.jmp_table_label) { 6045 uint32_t offset = dasm_getpclabel(&data.dasm_state, data.jmp_table_label); 6046 ctx->jmp_table_offset = offset; 6047 } else { 6048 ctx->jmp_table_offset = 0; 6049 } 6050 if (data.rodata_label) { 6051 uint32_t offset = dasm_getpclabel(&data.dasm_state, data.rodata_label); 6052 ctx->rodata_offset = offset; 6053 } else { 6054 ctx->rodata_offset = 0; 6055 } 6056 6057 if (ctx->entries_count) { 6058 /* For all entries */ 6059 i = ctx->entries_count; 6060 do { 6061 ir_insn *insn = &ctx->ir_base[ctx->entries[--i]]; 6062 uint32_t offset = dasm_getpclabel(&data.dasm_state, ctx->cfg_blocks_count + ctx->consts_count + 4 + insn->op3); 6063 insn->op3 = offset; 6064 } while (i != 0); 6065 } 6066 6067 dasm_free(&data.dasm_state); 6068 6069 if (ctx->code_buffer) { 6070 size = (char*)ctx->code_buffer->pos - (char*)entry; 6071 } 6072 6073 ir_mem_flush(entry, size); 6074 6075 if (!ctx->code_buffer) { 6076 ir_mem_protect(entry, size); 6077 } 6078 6079 ctx->data = NULL; 6080 return entry; 6081} 6082 6083const void *ir_emit_exitgroup(uint32_t first_exit_point, uint32_t exit_points_per_group, const void *exit_addr, ir_code_buffer *code_buffer, size_t *size_ptr) 6084{ 6085 void *entry; 6086 size_t size; 6087 uint32_t i; 6088 dasm_State **Dst, *dasm_state; 6089 int ret; 6090 6091 IR_ASSERT(code_buffer); 6092 IR_ASSERT(aarch64_may_use_b(code_buffer, exit_addr)); 6093 6094 Dst = &dasm_state; 6095 dasm_state = NULL; 6096 dasm_init(&dasm_state, DASM_MAXSECTION); 6097 dasm_setupglobal(&dasm_state, dasm_labels, ir_lb_MAX); 6098 dasm_setup(&dasm_state, dasm_actions); 6099 6100 | bl >2 6101 |1: 6102 for (i = 1; i < exit_points_per_group; i++) { 6103 | bl >2 6104 } 6105 |2: 6106 | adr Rx(IR_REG_INT_TMP), <1 6107 | sub Rx(IR_REG_INT_TMP), lr, Rx(IR_REG_INT_TMP) 6108 | lsr Rx(IR_REG_INT_TMP), Rx(IR_REG_INT_TMP), #2 6109 if (first_exit_point) { 6110 | add Rx(IR_REG_INT_TMP), Rx(IR_REG_INT_TMP), #first_exit_point 6111 } 6112 | b &exit_addr 6113 6114 ret = dasm_link(&dasm_state, &size); 6115 if (ret != DASM_S_OK) { 6116 IR_ASSERT(0); 6117 dasm_free(&dasm_state); 6118 return NULL; 6119 } 6120 6121 entry = code_buffer->pos; 6122 entry = (void*)IR_ALIGNED_SIZE(((size_t)(entry)), 16); 6123 if (size > (size_t)((char*)code_buffer->end - (char*)entry)) { 6124 return NULL; 6125 } 6126 code_buffer->pos = (char*)entry + size; 6127 6128 ir_current_ctx = NULL; 6129 ret = dasm_encode(&dasm_state, entry); 6130 if (ret != DASM_S_OK) { 6131 IR_ASSERT(0); 6132 dasm_free(&dasm_state); 6133 if (code_buffer->pos == (char*)entry + size) { 6134 /* rollback */ 6135 code_buffer->pos = (char*)entry - size; 6136 } 6137 return NULL; 6138 } 6139 6140 dasm_free(&dasm_state); 6141 6142 ir_mem_flush(entry, size); 6143 6144 *size_ptr = size; 6145 return entry; 6146} 6147 6148static int ir_add_veneer(dasm_State *Dst, void *buffer, uint32_t ins, int *b, uint32_t *cp, ptrdiff_t offset) 6149{ 6150 ir_ctx *ctx = ir_current_ctx; 6151 const void *addr, *veneer = NULL; 6152 ptrdiff_t na; 6153 int n, m; 6154 6155 IR_ASSERT(ctx && ctx->code_buffer); 6156 6157 if ((ins >> 16) == DASM_REL_A) { 6158 addr = (void*)((((ptrdiff_t)(*(b-1))) << 32) | (unsigned int)(*(b-2))); 6159 if (ctx->get_veneer) { 6160 veneer = ctx->get_veneer(ctx, addr); 6161 } 6162 } else { 6163 IR_ASSERT(0 && "too long jmp distance"); 6164 return 0; 6165 } 6166 6167 if (veneer) { 6168 na = (ptrdiff_t)veneer - (ptrdiff_t)cp + 4; 6169 n = (int)na; 6170 6171 /* check if we can jump to veneer */ 6172 if ((ptrdiff_t)n != na) { 6173 /* pass */ 6174 } else if (!(ins & 0xf800)) { /* B, BL */ 6175 if ((n & 3) == 0 && ((n+0x08000000) >> 28) == 0) { 6176 return n; 6177 } 6178 } else if ((ins & 0x800)) { /* B.cond, CBZ, CBNZ, LDR* literal */ 6179 if ((n & 3) == 0 && ((n+0x00100000) >> 21) == 0) { 6180 return n; 6181 } 6182 } else if ((ins & 0x3000) == 0x2000) { /* ADR */ 6183 /* pass */ 6184 } else if ((ins & 0x3000) == 0x3000) { /* ADRP */ 6185 /* pass */ 6186 } else if ((ins & 0x1000)) { /* TBZ, TBNZ */ 6187 if ((n & 3) == 0 && ((n+0x00008000) >> 16) == 0) { 6188 return n; 6189 } 6190 } 6191 } 6192 6193 veneer = ctx->code_buffer->pos; 6194 if ((char*)ctx->code_buffer->end - (char*)veneer < 4 ) { 6195 IR_ASSERT(0 && "too long jmp distance" && "jit buffer overflow"); 6196 return 0; /* jit_buffer_size overflow */ 6197 } 6198 6199 na = (ptrdiff_t)veneer - (ptrdiff_t)cp + 4; 6200 n = (int)na; 6201 6202 /* check if we can jump to veneer */ 6203 if ((ptrdiff_t)n != na) { 6204 IR_ASSERT(0 && "too long jmp distance"); 6205 return 0; 6206 } else if (!(ins & 0xf800)) { /* B, BL */ 6207 if ((n & 3) != 0 || ((n+0x08000000) >> 28) != 0) { 6208 IR_ASSERT(0 && "too long jmp distance"); 6209 return 0; 6210 } 6211 } else if ((ins & 0x800)) { /* B.cond, CBZ, CBNZ, LDR* literal */ 6212 if ((n & 3) != 0 || ((n+0x00100000) >> 21) != 0) { 6213 IR_ASSERT(0 && "too long jmp distance"); 6214 return 0; 6215 } 6216 } else if ((ins & 0x3000) == 0x2000) { /* ADR */ 6217 IR_ASSERT(0 && "too long jmp distance"); 6218 return 0; 6219 } else if ((ins & 0x3000) == 0x3000) { /* ADRP */ 6220 IR_ASSERT(0 && "too long jmp distance"); 6221 return 0; 6222 } else if ((ins & 0x1000)) { /* TBZ, TBNZ */ 6223 if ((n & 3) != 0 || ((n+0x00008000) >> 16) != 0) { 6224 IR_ASSERT(0 && "too long jmp distance"); 6225 return 0; 6226 } 6227 } else if ((ins & 0x8000)) { /* absolute */ 6228 IR_ASSERT(0 && "too long jmp distance"); 6229 return 0; 6230 } else { 6231 IR_ASSERT(0 && "too long jmp distance"); 6232 return 0; 6233 } 6234 6235 /* check if we can use B to jump from veneer */ 6236 na = (ptrdiff_t)cp + offset - (ptrdiff_t)veneer - 4; 6237 m = (int)na; 6238 if ((ptrdiff_t)m != na) { 6239 IR_ASSERT(0 && "too long jmp distance"); 6240 return 0; 6241 } else if ((m & 3) != 0 || ((m+0x08000000) >> 28) != 0) { 6242 IR_ASSERT(0 && "too long jmp distance"); 6243 return 0; 6244 } 6245 6246 if (!ctx->set_veneer || !ctx->set_veneer(ctx, addr, veneer)) { 6247 IR_ASSERT(0 && "too long jmp distance"); 6248 return 0; 6249 } 6250 6251 /* generate B instruction */ 6252 *(uint32_t*)veneer = 0x14000000 | ((m >> 2) & 0x03ffffff); 6253 ctx->code_buffer->pos = (char*)ctx->code_buffer->pos + 4; 6254 6255 return n; 6256} 6257 6258bool ir_needs_thunk(ir_code_buffer *code_buffer, void *addr) 6259{ 6260 return !aarch64_may_use_b(code_buffer, addr); 6261} 6262 6263void *ir_emit_thunk(ir_code_buffer *code_buffer, void *addr, size_t *size_ptr) 6264{ 6265 void *entry; 6266 size_t size; 6267 dasm_State **Dst, *dasm_state; 6268 int ret; 6269 6270 Dst = &dasm_state; 6271 dasm_state = NULL; 6272 dasm_init(&dasm_state, DASM_MAXSECTION); 6273 dasm_setupglobal(&dasm_state, dasm_labels, ir_lb_MAX); 6274 dasm_setup(&dasm_state, dasm_actions); 6275 6276 |.code 6277 | movz Rx(IR_REG_INT_TMP), #((uint64_t)(addr) & 0xffff) 6278 | movk Rx(IR_REG_INT_TMP), #(((uint64_t)(addr) >> 16) & 0xffff), lsl #16 6279 | movk Rx(IR_REG_INT_TMP), #(((uint64_t)(addr) >> 32) & 0xffff), lsl #32 6280 | movk Rx(IR_REG_INT_TMP), #(((uint64_t)(addr) >> 48) & 0xffff), lsl #48 6281 | br Rx(IR_REG_INT_TMP) 6282 6283 ret = dasm_link(&dasm_state, &size); 6284 if (ret != DASM_S_OK) { 6285 IR_ASSERT(0); 6286 dasm_free(&dasm_state); 6287 return NULL; 6288 } 6289 6290 entry = code_buffer->pos; 6291 entry = (void*)IR_ALIGNED_SIZE(((size_t)(entry)), 4); 6292 if (size > (size_t)((char*)code_buffer->end - (char*)entry)) { 6293 dasm_free(&dasm_state); 6294 return NULL; 6295 } 6296 6297 ret = dasm_encode(&dasm_state, entry); 6298 if (ret != DASM_S_OK) { 6299 dasm_free(&dasm_state); 6300 return NULL; 6301 } 6302 6303 *size_ptr = size; 6304 code_buffer->pos = (char*)code_buffer->pos + size; 6305 6306 dasm_free(&dasm_state); 6307 ir_mem_flush(entry, size); 6308 6309 return entry; 6310} 6311 6312void ir_fix_thunk(void *thunk_entry, void *addr) 6313{ 6314 uint32_t *code = thunk_entry; 6315 IR_ASSERT((code[0] & 0xffe00000) == 0xd2800000 6316 && (code[1] & 0xffe00000) == 0xf2a00000 6317 && (code[2] & 0xffe00000) == 0xf2c00000 6318 && (code[3] & 0xffe00000) == 0xf2e00000 6319 && (code[4] & 0xfffffc1f) == 0xd61f0000); 6320 6321 code[0] = (code[0] & 0xffe0001f) | (uint32_t)((uint64_t)(addr) & 0xffff) << 5; 6322 code[1] = (code[1] & 0xffe0001f) | (uint32_t)(((uint64_t)(addr) >> 16) & 0xffff) << 5; 6323 code[2] = (code[2] & 0xffe0001f) | (uint32_t)(((uint64_t)(addr) >> 32) & 0xffff) << 5; 6324 code[3] = (code[3] & 0xffe0001f) | (uint32_t)(((uint64_t)(addr) >> 48) & 0xffff) << 5; 6325 6326 ir_mem_flush(code, sizeof(uint32_t) * 4); 6327} 6328