/* * IR - Lightweight JIT Compilation Framework * (x86/x86_64 native code generator based on DynAsm) * Copyright (C) 2022 Zend by Perforce. * Authors: Dmitry Stogov */ |.if X64 |.arch x64 |.else |.arch x86 |.endif |.actionlist dasm_actions |.globals ir_lb |.section code, cold_code, rodata, jmp_table |.define IR_LOOP_ALIGNMENT, 16 #ifdef IR_DEBUG typedef struct _ir_mem {uint64_t v;} ir_mem; # define IR_MEM_VAL(loc) ((loc).v) #else typedef uint64_t ir_mem; # define IR_MEM_VAL(loc) (loc) #endif #define IR_MEM_OFFSET(loc) ((int32_t)(IR_MEM_VAL(loc) & 0xffffffff)) #define IR_MEM_BASE(loc) ((ir_reg)((IR_MEM_VAL(loc) >> 32) & 0xff)) #define IR_MEM_INDEX(loc) ((ir_reg)((IR_MEM_VAL(loc) >> 40) & 0xff)) #define IR_MEM_SCALE(loc) ((int32_t)((IR_MEM_VAL(loc) >> 48) & 0xff)) #define IR_MEM_O(addr) IR_MEM(IR_REG_NONE, addr, IR_REG_NONE, 1) #define IR_MEM_B(base) IR_MEM(base, 0, IR_REG_NONE, 1) #define IR_MEM_BO(base, offset) IR_MEM(base, offset, IR_REG_NONE, 1) IR_ALWAYS_INLINE ir_mem IR_MEM(ir_reg base, int32_t offset, ir_reg index, int32_t scale) { ir_mem mem; IR_ASSERT(base == IR_REG_NONE || (base >= IR_REG_GP_FIRST && base <= IR_REG_GP_LAST)); IR_ASSERT(index == IR_REG_NONE || (index >= IR_REG_GP_FIRST && index <= IR_REG_GP_LAST)); IR_ASSERT(scale == 1 || scale == 2 || scale == 4 || scale == 8); #ifdef IR_DEBUG mem.v = #else mem = #endif ((uint64_t)(uint32_t)offset | ((uint64_t)(uint8_t)base << 32) | ((uint64_t)(uint8_t)index << 40) | ((uint64_t)(uint8_t)scale << 48)); return mem; } #define IR_IS_SIGNED_32BIT(val) ((((intptr_t)(val)) <= 0x7fffffff) && (((intptr_t)(val)) >= (-2147483647 - 1))) #define IR_IS_SIGNED_NEG_32BIT(val) ((((intptr_t)(val)) <= 0x7fffffff) && (((intptr_t)(val)) >= -2147483647)) #define IR_IS_UNSIGNED_32BIT(val) (((uintptr_t)(val)) <= 0xffffffff) #define IR_IS_32BIT(type, val) (IR_IS_TYPE_SIGNED(type) ? IR_IS_SIGNED_32BIT((val).i64) : IR_IS_UNSIGNED_32BIT((val).u64)) #define IR_IS_FP_ZERO(insn) ((insn.type == IR_DOUBLE) ? (insn.val.u64 == 0) : (insn.val.u32 == 0)) #define IR_MAY_USE_32BIT_ADDR(code_buffer, addr) \ ((code_buffer) && \ IR_IS_SIGNED_32BIT((char*)(addr) - (char*)(code_buffer)->start) && \ IR_IS_SIGNED_32BIT((char*)(addr) - ((char*)(code_buffer)->end))) #define IR_SPILL_POS_TO_OFFSET(offset) \ ((ctx->flags & IR_USE_FRAME_POINTER) ? \ ((offset) - (ctx->stack_frame_size - ctx->stack_frame_alignment)) : \ ((offset) + ctx->call_stack_size)) |.macro ASM_EXPAND_OP_MEM, MACRO, op, type, op1 || do { || int32_t offset = IR_MEM_OFFSET(op1); || int32_t base = IR_MEM_BASE(op1); || int32_t index = IR_MEM_INDEX(op1); || int32_t scale = IR_MEM_SCALE(op1); || if (index == IR_REG_NONE) { || if (base == IR_REG_NONE) { | MACRO op, type, [offset] || } else { | MACRO op, type, [Ra(base)+offset] || } || } else if (scale == 8) { || if (base == IR_REG_NONE) { | MACRO op, type, [Ra(index)*8+offset] || } else { | MACRO op, type, [Ra(base)+Ra(index)*8+offset] || } || } else if (scale == 4) { || if (base == IR_REG_NONE) { | MACRO op, type, [Ra(index)*4+offset] || } else { | MACRO op, type, [Ra(base)+Ra(index)*4+offset] || } || } else if (scale == 2) { || if (base == IR_REG_NONE) { | MACRO op, type, [Ra(index)*2+offset] || } else { | MACRO op, type, [Ra(base)+Ra(index)*2+offset] || } || } else { || IR_ASSERT(scale == 1); || if (base == IR_REG_NONE) { | MACRO op, type, [Ra(index)+offset] || } else { | MACRO op, type, [Ra(base)+Ra(index)+offset] || } || } || } while (0); |.endmacro |.macro ASM_EXPAND_OP1_MEM, MACRO, op, type, op1, op2 || do { || int32_t offset = IR_MEM_OFFSET(op1); || int32_t base = IR_MEM_BASE(op1); || int32_t index = IR_MEM_INDEX(op1); || int32_t scale = IR_MEM_SCALE(op1); || if (index == IR_REG_NONE) { || if (base == IR_REG_NONE) { | MACRO op, type, [offset], op2 || } else { | MACRO op, type, [Ra(base)+offset], op2 || } || } else if (scale == 8) { || if (base == IR_REG_NONE) { | MACRO op, type, [Ra(index)*8+offset], op2 || } else { | MACRO op, type, [Ra(base)+Ra(index)*8+offset], op2 || } || } else if (scale == 4) { || if (base == IR_REG_NONE) { | MACRO op, type, [Ra(index)*4+offset], op2 || } else { | MACRO op, type, [Ra(base)+Ra(index)*4+offset], op2 || } || } else if (scale == 2) { || if (base == IR_REG_NONE) { | MACRO op, type, [Ra(index)*2+offset], op2 || } else { | MACRO op, type, [Ra(base)+Ra(index)*2+offset], op2 || } || } else { || IR_ASSERT(scale == 1); || if (base == IR_REG_NONE) { | MACRO op, type, [Ra(index)+offset], op2 || } else { | MACRO op, type, [Ra(base)+Ra(index)+offset], op2 || } || } || } while (0); |.endmacro |.macro ASM_EXPAND_OP2_MEM, MACRO, op, type, op1, op2 || do { || int32_t offset = IR_MEM_OFFSET(op2); || int32_t base = IR_MEM_BASE(op2); || int32_t index = IR_MEM_INDEX(op2); || int32_t scale = IR_MEM_SCALE(op2); || if (index == IR_REG_NONE) { || if (base == IR_REG_NONE) { | MACRO op, type, op1, [offset] || } else { | MACRO op, type, op1, [Ra(base)+offset] || } || } else if (scale == 8) { || if (base == IR_REG_NONE) { | MACRO op, type, op1, [Ra(index)*8+offset] || } else { | MACRO op, type, op1, [Ra(base)+Ra(index)*8+offset] || } || } else if (scale == 4) { || if (base == IR_REG_NONE) { | MACRO op, type, op1, [Ra(index)*4+offset] || } else { | MACRO op, type, op1, [Ra(base)+Ra(index)*4+offset] || } || } else if (scale == 2) { || if (base == IR_REG_NONE) { | MACRO op, type, op1, [Ra(index)*2+offset] || } else { | MACRO op, type, op1, [Ra(base)+Ra(index)*2+offset] || } || } else { || IR_ASSERT(scale == 1); || if (base == IR_REG_NONE) { | MACRO op, type, op1, [Ra(index)+offset] || } else { | MACRO op, type, op1, [Ra(base)+Ra(index)+offset] || } || } || } while (0); |.endmacro |.macro ASM_EXPAND_OP2_MEM_3, MACRO, op, type, op1, op2, op3 || do { || int32_t offset = IR_MEM_OFFSET(op2); || int32_t base = IR_MEM_BASE(op2); || int32_t index = IR_MEM_INDEX(op2); || int32_t scale = IR_MEM_SCALE(op2); || if (index == IR_REG_NONE) { || if (base == IR_REG_NONE) { | MACRO op, type, op1, [offset], op3 || } else { | MACRO op, type, op1, [Ra(base)+offset], op3 || } || } else if (scale == 8) { || if (base == IR_REG_NONE) { | MACRO op, type, op1, [Ra(index)*8+offset], op3 || } else { | MACRO op, type, op1, [Ra(base)+Ra(index)*8+offset], op3 || } || } else if (scale == 4) { || if (base == IR_REG_NONE) { | MACRO op, type, op1, [Ra(index)*4+offset], op3 || } else { | MACRO op, type, op1, [Ra(base)+Ra(index)*4+offset], op3 || } || } else if (scale == 2) { || if (base == IR_REG_NONE) { | MACRO op, type, op1, [Ra(index)*2+offset], op3 || } else { | MACRO op, type, op1, [Ra(base)+Ra(index)*2+offset], op3 || } || } else { || IR_ASSERT(scale == 1); || if (base == IR_REG_NONE) { | MACRO op, type, op1, [Ra(index)+offset], op3 || } else { | MACRO op, type, op1, [Ra(base)+Ra(index)+offset], op3 || } || } || } while (0); |.endmacro |.macro ASM_EXPAND_OP3_MEM, MACRO, op, type, op1, op2, op3 || do { || int32_t offset = IR_MEM_OFFSET(op3); || int32_t base = IR_MEM_BASE(op3); || int32_t index = IR_MEM_INDEX(op3); || int32_t scale = IR_MEM_SCALE(op3); || if (index == IR_REG_NONE) { || if (base == IR_REG_NONE) { | MACRO op, type, op1, op2, [offset] || } else { | MACRO op, type, op1, op2, [Ra(base)+offset] || } || } else if (scale == 8) { || if (base == IR_REG_NONE) { | MACRO op, type, op1, op2, [Ra(index)*8+offset] || } else { | MACRO op, type, op1, op2, [Ra(base)+Ra(index)*8+offset] || } || } else if (scale == 4) { || if (base == IR_REG_NONE) { | MACRO op, type, op1, op2, [Ra(index)*4+offset] || } else { | MACRO op, type, op1, op2, [Ra(base)+Ra(index)*4+offset] || } || } else if (scale == 2) { || if (base == IR_REG_NONE) { | MACRO op, type, op1, op2, [Ra(index)*2+offset] || } else { | MACRO op, type, op1, op2, [Ra(base)+Ra(index)*2+offset] || } || } else { || IR_ASSERT(scale == 1); || if (base == IR_REG_NONE) { | MACRO op, type, op1, op2, [Ra(index)+offset] || } else { | MACRO op, type, op1, op2, [Ra(base)+Ra(index)+offset] || } || } || } while (0); |.endmacro |.macro ASM_EXPAND_TYPE_MEM, op, type, op1 || switch (ir_type_size[type]) { || default: || IR_ASSERT(0); || case 1: | op byte op1 || break; || case 2: | op word op1 || break; || case 4: | op dword op1 || break; |.if X64 || case 8: | op qword op1 || break; |.endif || } |.endmacro |.macro ASM_EXPAND_TYPE_MEM_REG, op, type, op1, op2 || switch (ir_type_size[type]) { || default: || IR_ASSERT(0); || case 1: | op byte op1, Rb(op2) || break; || case 2: | op word op1, Rw(op2) || break; || case 4: | op dword op1, Rd(op2) || break; |.if X64 || case 8: | op qword op1, Rq(op2) || break; |.endif || } |.endmacro |.macro ASM_EXPAND_TYPE_MEM_TXT, op, type, op1, op2 || switch (ir_type_size[type]) { || default: || IR_ASSERT(0); || case 1: | op byte op1, op2 || break; || case 2: | op word op1, op2 || break; || case 4: | op dword op1, op2 || break; |.if X64 || case 8: | op qword op1, op2 || break; |.endif || } |.endmacro |.macro ASM_EXPAND_TYPE_MEM_IMM, op, type, op1, op2 || switch (ir_type_size[type]) { || default: || IR_ASSERT(0); || case 1: | op byte op1, (op2 & 0xff) || break; || case 2: | op word op1, (op2 & 0xffff) || break; || case 4: | op dword op1, op2 || break; |.if X64 || case 8: | op qword op1, op2 || break; |.endif || } |.endmacro |.macro ASM_EXPAND_TYPE_REG_MEM, op, type, op1, op2 || switch (ir_type_size[type]) { || default: || IR_ASSERT(0); || case 1: | op Rb(op1), byte op2 || break; || case 2: | op Rw(op1), word op2 || break; || case 4: | op Rd(op1), dword op2 || break; |.if X64 || case 8: | op Rq(op1), qword op2 || break; |.endif || } |.endmacro |.macro ASM_TMEM_OP, op, type, op1 || do { || int32_t offset = IR_MEM_OFFSET(op1); || int32_t base = IR_MEM_BASE(op1); || int32_t index = IR_MEM_INDEX(op1); || int32_t scale = IR_MEM_SCALE(op1); || if (index == IR_REG_NONE) { || if (base == IR_REG_NONE) { | op type [offset] || } else { | op type [Ra(base)+offset] || } || } else if (scale == 8) { || if (base == IR_REG_NONE) { | op type [Ra(index)*8+offset] || } else { | op type [Ra(base)+Ra(index)*8+offset] || } || } else if (scale == 4) { || if (base == IR_REG_NONE) { | op type [Ra(index)*4+offset] || } else { | op type [Ra(base)+Ra(index)*4+offset] || } || } else if (scale == 2) { || if (base == IR_REG_NONE) { | op type [Ra(index)*2+offset] || } else { | op type [Ra(base)+Ra(index)*2+offset] || } || } else { || IR_ASSERT(scale == 1); || if (base == IR_REG_NONE) { | op type [Ra(index)+offset] || } else { | op type [Ra(base)+Ra(index)+offset] || } || } || } while (0); |.endmacro |.macro ASM_TXT_TMEM_OP, op, op1, type, op2 || do { || int32_t offset = IR_MEM_OFFSET(op2); || int32_t base = IR_MEM_BASE(op2); || int32_t index = IR_MEM_INDEX(op2); || int32_t scale = IR_MEM_SCALE(op2); || if (index == IR_REG_NONE) { || if (base == IR_REG_NONE) { | op op1, type [offset] || } else { | op op1, type [Ra(base)+offset] || } || } else if (scale == 8) { || if (base == IR_REG_NONE) { | op op1, type [Ra(index)*8+offset] || } else { | op op1, type [Ra(base)+Ra(index)*8+offset] || } || } else if (scale == 4) { || if (base == IR_REG_NONE) { | op op1, type [Ra(index)*4+offset] || } else { | op op1, type [Ra(base)+Ra(index)*4+offset] || } || } else if (scale == 2) { || if (base == IR_REG_NONE) { | op op1, type [Ra(index)*2+offset] || } else { | op op1, type [Ra(base)+Ra(index)*2+offset] || } || } else { || IR_ASSERT(scale == 1); || if (base == IR_REG_NONE) { | op op1, type [Ra(index)+offset] || } else { | op op1, type [Ra(base)+Ra(index)+offset] || } || } || } while (0); |.endmacro |.macro ASM_TMEM_TXT_OP, op, type, op1, op2 || do { || int32_t offset = IR_MEM_OFFSET(op1); || int32_t base = IR_MEM_BASE(op1); || int32_t index = IR_MEM_INDEX(op1); || int32_t scale = IR_MEM_SCALE(op1); || if (index == IR_REG_NONE) { || if (base == IR_REG_NONE) { | op type [offset], op2 || } else { | op type [Ra(base)+offset], op2 || } || } else if (scale == 8) { || if (base == IR_REG_NONE) { | op type [Ra(index)*8+offset], op2 || } else { | op type [Ra(base)+Ra(index)*8+offset], op2 || } || } else if (scale == 4) { || if (base == IR_REG_NONE) { | op type [Ra(index)*4+offset], op2 || } else { | op type [Ra(base)+Ra(index)*4+offset], op2 || } || } else if (scale == 2) { || if (base == IR_REG_NONE) { | op type [Ra(index)*2+offset], op2 || } else { | op type [Ra(base)+Ra(index)*2+offset], op2 || } || } else { || IR_ASSERT(scale == 1); || if (base == IR_REG_NONE) { | op type [Ra(index)+offset], op2 || } else { | op type [Ra(base)+Ra(index)+offset], op2 || } || } || } while (0); |.endmacro |.macro ASM_TXT_TXT_TMEM_OP, op, op1, op2, type, op3 || do { || int32_t offset = IR_MEM_OFFSET(op3); || int32_t base = IR_MEM_BASE(op3); || int32_t index = IR_MEM_INDEX(op3); || int32_t scale = IR_MEM_SCALE(op3); || if (index == IR_REG_NONE) { || if (base == IR_REG_NONE) { | op op1, op2, type [offset] || } else { | op op1, op2, type [Ra(base)+offset] || } || } else if (scale == 8) { || if (base == IR_REG_NONE) { | op op1, op2, type [Ra(index)*8+offset] || } else { | op op1, op2, type [Ra(base)+Ra(index)*8+offset] || } || } else if (scale == 4) { || if (base == IR_REG_NONE) { | op op1, op2, type [Ra(index)*4+offset] || } else { | op op1, op2, type [Ra(base)+Ra(index)*4+offset] || } || } else if (scale == 2) { || if (base == IR_REG_NONE) { | op op1, op2, type [Ra(index)*2+offset] || } else { | op op1, op2, type [Ra(base)+Ra(index)*2+offset] || } || } else { || IR_ASSERT(scale == 1); || if (base == IR_REG_NONE) { | op op1, op2, type [Ra(index)+offset] || } else { | op op1, op2, type [Ra(base)+Ra(index)+offset] || } || } || } while (0); |.endmacro |.macro ASM_REG_OP, op, type, op1 || switch (ir_type_size[type]) { || default: || IR_ASSERT(0); || case 1: | op Rb(op1) || break; || case 2: | op Rw(op1) || break; || case 4: | op Rd(op1) || break; |.if X64 || case 8: | op Rq(op1) || break; |.endif || } |.endmacro |.macro ASM_MEM_OP, op, type, op1 | ASM_EXPAND_OP_MEM ASM_EXPAND_TYPE_MEM, op, type, op1 |.endmacro |.macro ASM_REG_REG_OP, op, type, op1, op2 || switch (ir_type_size[type]) { || default: || IR_ASSERT(0); || case 1: | op Rb(op1), Rb(op2) || break; || case 2: | op Rw(op1), Rw(op2) || break; || case 4: | op Rd(op1), Rd(op2) || break; |.if X64 || case 8: | op Rq(op1), Rq(op2) || break; |.endif || } |.endmacro |.macro ASM_REG_REG_OP2, op, type, op1, op2 || switch (ir_type_size[type]) { || default: || IR_ASSERT(0); || case 1: || case 2: | op Rw(op1), Rw(op2) || break; || case 4: | op Rd(op1), Rd(op2) || break; |.if X64 || case 8: | op Rq(op1), Rq(op2) || break; |.endif || } |.endmacro |.macro ASM_REG_TXT_OP, op, type, op1, op2 || switch (ir_type_size[type]) { || default: || IR_ASSERT(0); || case 1: | op Rb(op1), op2 || break; || case 2: | op Rw(op1), op2 || break; || case 4: | op Rd(op1), op2 || break; |.if X64 || case 8: | op Rq(op1), op2 || break; |.endif || } |.endmacro |.macro ASM_REG_IMM_OP, op, type, op1, op2 || switch (ir_type_size[type]) { || default: || IR_ASSERT(0); || case 1: | op Rb(op1), (op2 & 0xff) || break; || case 2: | op Rw(op1), (op2 & 0xffff) || break; || case 4: | op Rd(op1), op2 || break; |.if X64 || case 8: | op Rq(op1), op2 || break; |.endif || } |.endmacro |.macro ASM_MEM_REG_OP, op, type, op1, op2 | ASM_EXPAND_OP1_MEM ASM_EXPAND_TYPE_MEM_REG, op, type, op1, op2 |.endmacro |.macro ASM_MEM_TXT_OP, op, type, op1, op2 | ASM_EXPAND_OP1_MEM ASM_EXPAND_TYPE_MEM_TXT, op, type, op1, op2 |.endmacro |.macro ASM_MEM_IMM_OP, op, type, op1, op2 | ASM_EXPAND_OP1_MEM ASM_EXPAND_TYPE_MEM_IMM, op, type, op1, op2 |.endmacro |.macro ASM_REG_MEM_OP, op, type, op1, op2 | ASM_EXPAND_OP2_MEM ASM_REG_TXT_OP, op, type, op1, op2 |.endmacro |.macro ASM_REG_REG_MUL, op, type, op1, op2 || switch (ir_type_size[type]) { || default: || IR_ASSERT(0); || case 2: | op Rw(op1), Rw(op2) || break; || case 4: | op Rd(op1), Rd(op2) || break; |.if X64 || case 8: | op Rq(op1), Rq(op2) || break; |.endif || } |.endmacro |.macro ASM_REG_IMM_MUL, op, type, op1, op2 || switch (ir_type_size[type]) { || default: || IR_ASSERT(0); || case 2: | op Rw(op1), op2 || break; || case 4: | op Rd(op1), op2 || break; |.if X64 || case 8: | op Rq(op1), op2 || break; |.endif || } |.endmacro |.macro ASM_REG_TXT_MUL, op, type, op1, op2 || switch (ir_type_size[type]) { || default: || IR_ASSERT(0); || case 2: | op Rw(op1), op2 || break; || case 4: | op Rd(op1), op2 || break; |.if X64 || case 8: | op Rq(op1), op2 || break; |.endif || } |.endmacro |.macro ASM_REG_MEM_MUL, op, type, op1, op2 | ASM_EXPAND_OP2_MEM ASM_REG_TXT_MUL, op, type, op1, op2 |.endmacro |.macro ASM_REG_TXT_TXT_MUL, op, type, op1, op2, op3 || switch (ir_type_size[type]) { || default: || IR_ASSERT(0); || case 2: | op Rw(op1), op2, op3 || break; || case 4: | op Rd(op1), op2, op3 || break; |.if X64 || case 8: | op Rq(op1), op2, op3 || break; |.endif || } |.endmacro |.macro ASM_REG_MEM_TXT_MUL, op, type, op1, op2, op3 | ASM_EXPAND_OP2_MEM_3 ASM_REG_TXT_TXT_MUL, imul, type, op1, op2, op3 |.endmacro |.macro ASM_SSE2_REG_REG_OP, op, type, op1, op2 || if (type == IR_DOUBLE) { | op..d xmm(op1-IR_REG_FP_FIRST), xmm(op2-IR_REG_FP_FIRST) || } else { || IR_ASSERT(type == IR_FLOAT); | op..s xmm(op1-IR_REG_FP_FIRST), xmm(op2-IR_REG_FP_FIRST) || } |.endmacro |.macro ASM_SSE2_REG_TXT_OP, op, type, op1, op2 || if (type == IR_DOUBLE) { | op..d xmm(op1-IR_REG_FP_FIRST), qword op2 || } else { || IR_ASSERT(type == IR_FLOAT); | op..s xmm(op1-IR_REG_FP_FIRST), dword op2 || } |.endmacro |.macro ASM_SSE2_REG_MEM_OP, op, type, op1, op2 | ASM_EXPAND_OP2_MEM ASM_SSE2_REG_TXT_OP, op, type, op1, op2 |.endmacro |.macro ASM_AVX_REG_REG_REG_OP, op, type, op1, op2, op3 || if (type == IR_DOUBLE) { | op..d xmm(op1-IR_REG_FP_FIRST), xmm(op2-IR_REG_FP_FIRST), xmm(op3-IR_REG_FP_FIRST) || } else { || IR_ASSERT(type == IR_FLOAT); | op..s xmm(op1-IR_REG_FP_FIRST), xmm(op2-IR_REG_FP_FIRST), xmm(op3-IR_REG_FP_FIRST) || } |.endmacro |.macro ASM_AVX_REG_REG_TXT_OP, op, type, op1, op2, op3 || if (type == IR_DOUBLE) { | op..d xmm(op1-IR_REG_FP_FIRST), xmm(op2-IR_REG_FP_FIRST), qword op3 || } else { || IR_ASSERT(type == IR_FLOAT); | op..s xmm(op1-IR_REG_FP_FIRST), xmm(op2-IR_REG_FP_FIRST), dword op3 || } |.endmacro |.macro ASM_AVX_REG_REG_MEM_OP, op, type, op1, op2, op3 | ASM_EXPAND_OP3_MEM ASM_AVX_REG_REG_TXT_OP, op, type, op1, op2, op3 |.endmacro |.macro ASM_FP_REG_REG_OP, op, type, op1, op2 || if (ctx->mflags & IR_X86_AVX) { | ASM_SSE2_REG_REG_OP v..op, type, op1, op2 || } else { | ASM_SSE2_REG_REG_OP op, type, op1, op2 || } |.endmacro |.macro ASM_FP_TXT_REG_OP, op, type, dst, src || if (type == IR_DOUBLE) { || if (ctx->mflags & IR_X86_AVX) { | v..op..d qword dst, xmm(src-IR_REG_FP_FIRST) || } else { | op..d qword dst, xmm(src-IR_REG_FP_FIRST) || } || } else { || IR_ASSERT(type == IR_FLOAT); || if (ctx->mflags & IR_X86_AVX) { | v..op..s dword dst, xmm(src-IR_REG_FP_FIRST) || } else { | op..s dword dst, xmm(src-IR_REG_FP_FIRST) || } || } |.endmacro |.macro ASM_FP_MEM_REG_OP, op, type, op1, op2 | ASM_EXPAND_OP1_MEM ASM_FP_TXT_REG_OP, op, type, op1, op2 |.endmacro |.macro ASM_FP_REG_TXT_OP, op, type, op1, op2 || if (ctx->mflags & IR_X86_AVX) { | ASM_SSE2_REG_TXT_OP v..op, type, op1, op2 || } else { | ASM_SSE2_REG_TXT_OP op, type, op1, op2 || } |.endmacro |.macro ASM_FP_REG_MEM_OP, op, type, op1, op2 || if (ctx->mflags & IR_X86_AVX) { | ASM_SSE2_REG_MEM_OP v..op, type, op1, op2 || } else { | ASM_SSE2_REG_MEM_OP op, type, op1, op2 || } |.endmacro |.macro ASM_SSE2_REG_REG_TXT_OP, op, type, op1, op2, op3 || if (type == IR_DOUBLE) { | op..d xmm(op1-IR_REG_FP_FIRST), xmm(op2-IR_REG_FP_FIRST), op3 || } else { || IR_ASSERT(type == IR_FLOAT); | op..s xmm(op1-IR_REG_FP_FIRST), xmm(op2-IR_REG_FP_FIRST), op3 || } |.endmacro |.macro ASM_SSE2_REG_REG_REG_TXT_OP, op, type, op1, op2, op3, op4 || if (type == IR_DOUBLE) { | op..d xmm(op1-IR_REG_FP_FIRST), xmm(op2-IR_REG_FP_FIRST), xmm(op3-IR_REG_FP_FIRST), op4 || } else { || IR_ASSERT(type == IR_FLOAT); | op..s xmm(op1-IR_REG_FP_FIRST), xmm(op2-IR_REG_FP_FIRST), xmm(op3-IR_REG_FP_FIRST), op4 || } |.endmacro |.macro ASM_FP_REG_REG_TXT_OP, op, type, op1, op2, op3 || if (ctx->mflags & IR_X86_AVX) { | ASM_SSE2_REG_REG_REG_TXT_OP v..op, type, op1, op2, op3 || } else { | ASM_SSE2_REG_REG_TXT_OP op, type, op1, op2, op3 || } |.endmacro typedef struct _ir_backend_data { ir_reg_alloc_data ra_data; uint32_t dessa_from_block; dasm_State *dasm_state; ir_bitset emit_constants; int rodata_label, jmp_table_label; bool double_neg_const; bool float_neg_const; bool double_abs_const; bool float_abs_const; bool double_zero_const; } ir_backend_data; #define IR_GP_REG_NAME(code, name64, name32, name16, name8, name8h) \ #name64, #define IR_GP_REG_NAME32(code, name64, name32, name16, name8, name8h) \ #name32, #define IR_GP_REG_NAME16(code, name64, name32, name16, name8, name8h) \ #name16, #define IR_GP_REG_NAME8(code, name64, name32, name16, name8, name8h) \ #name8, #define IR_FP_REG_NAME(code, name) \ #name, static const char *_ir_reg_name[IR_REG_NUM] = { IR_GP_REGS(IR_GP_REG_NAME) IR_FP_REGS(IR_FP_REG_NAME) }; static const char *_ir_reg_name32[IR_REG_NUM] = { IR_GP_REGS(IR_GP_REG_NAME32) }; static const char *_ir_reg_name16[IR_REG_NUM] = { IR_GP_REGS(IR_GP_REG_NAME16) }; static const char *_ir_reg_name8[IR_REG_NUM] = { IR_GP_REGS(IR_GP_REG_NAME8) }; /* Calling Convention */ #ifdef _WIN64 static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS] = { IR_REG_INT_ARG1, IR_REG_INT_ARG2, IR_REG_INT_ARG3, IR_REG_INT_ARG4, }; static const int8_t _ir_fp_reg_params[IR_REG_FP_ARGS] = { IR_REG_FP_ARG1, IR_REG_FP_ARG2, IR_REG_FP_ARG3, IR_REG_FP_ARG4, }; #elif defined(IR_TARGET_X64) static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS] = { IR_REG_INT_ARG1, IR_REG_INT_ARG2, IR_REG_INT_ARG3, IR_REG_INT_ARG4, IR_REG_INT_ARG5, IR_REG_INT_ARG6, }; static const int8_t _ir_fp_reg_params[IR_REG_FP_ARGS] = { IR_REG_FP_ARG1, IR_REG_FP_ARG2, IR_REG_FP_ARG3, IR_REG_FP_ARG4, IR_REG_FP_ARG5, IR_REG_FP_ARG6, IR_REG_FP_ARG7, IR_REG_FP_ARG8, }; #else static const int8_t *_ir_int_reg_params = NULL; static const int8_t *_ir_fp_reg_params = NULL; static const int8_t _ir_int_fc_reg_params[IR_REG_INT_FCARGS] = { IR_REG_INT_FCARG1, IR_REG_INT_FCARG2, }; static const int8_t *_ir_fp_fc_reg_params = NULL; #endif const char *ir_reg_name(int8_t reg, ir_type type) { if (reg >= IR_REG_NUM) { if (reg == IR_REG_SCRATCH) { return "SCRATCH"; } else { IR_ASSERT(reg == IR_REG_ALL); return "ALL"; } } IR_ASSERT(reg >= 0 && reg < IR_REG_NUM); if (type == IR_VOID) { type = (reg < IR_REG_FP_FIRST) ? IR_ADDR : IR_DOUBLE; } if (IR_IS_TYPE_FP(type) || ir_type_size[type] == 8) { return _ir_reg_name[reg]; } else if (ir_type_size[type] == 4) { return _ir_reg_name32[reg]; } else if (ir_type_size[type] == 2) { return _ir_reg_name16[reg]; } else { IR_ASSERT(ir_type_size[type] == 1); return _ir_reg_name8[reg]; } } #define IR_RULES(_) \ _(CMP_INT) \ _(CMP_FP) \ _(MUL_INT) \ _(DIV_INT) \ _(MOD_INT) \ _(TEST_INT) \ _(SETCC_INT) \ _(TESTCC_INT) \ _(LEA_OB) \ _(LEA_SI) \ _(LEA_SIB) \ _(LEA_IB) \ _(LEA_SI_O) \ _(LEA_SIB_O) \ _(LEA_IB_O) \ _(LEA_I_OB) \ _(LEA_OB_I) \ _(LEA_OB_SI) \ _(LEA_SI_OB) \ _(LEA_B_SI) \ _(LEA_SI_B) \ _(INC) \ _(DEC) \ _(MUL_PWR2) \ _(DIV_PWR2) \ _(MOD_PWR2) \ _(SDIV_PWR2) \ _(SMOD_PWR2) \ _(BOOL_NOT_INT) \ _(ABS_INT) \ _(OP_INT) \ _(OP_FP) \ _(IMUL3) \ _(BINOP_INT) \ _(BINOP_SSE2) \ _(BINOP_AVX) \ _(SHIFT) \ _(SHIFT_CONST) \ _(COPY_INT) \ _(COPY_FP) \ _(CMP_AND_STORE_INT) \ _(CMP_AND_BRANCH_INT) \ _(CMP_AND_BRANCH_FP) \ _(TEST_AND_BRANCH_INT) \ _(JCC_INT) \ _(COND_CMP_INT) \ _(COND_CMP_FP) \ _(GUARD_CMP_INT) \ _(GUARD_CMP_FP) \ _(GUARD_TEST_INT) \ _(GUARD_JCC_INT) \ _(GUARD_OVERFLOW) \ _(OVERFLOW_AND_BRANCH) \ _(MIN_MAX_INT) \ _(MEM_OP_INT) \ _(MEM_INC) \ _(MEM_DEC) \ _(MEM_MUL_PWR2) \ _(MEM_DIV_PWR2) \ _(MEM_MOD_PWR2) \ _(MEM_BINOP_INT) \ _(MEM_SHIFT) \ _(MEM_SHIFT_CONST) \ _(REG_BINOP_INT) \ _(VSTORE_INT) \ _(VSTORE_FP) \ _(LOAD_INT) \ _(LOAD_FP) \ _(STORE_INT) \ _(STORE_FP) \ _(IF_INT) \ _(RETURN_VOID) \ _(RETURN_INT) \ _(RETURN_FP) \ _(BIT_COUNT) \ _(SSE_SQRT) \ _(SSE_RINT) \ _(SSE_FLOOR) \ _(SSE_CEIL) \ _(SSE_TRUNC) \ _(SSE_NEARBYINT) \ #define IR_RULE_ENUM(name) IR_ ## name, #define IR_STATIC_ALLOCA (IR_SKIPPED | IR_FUSED | IR_SIMPLE | IR_ALLOCA) enum _ir_rule { IR_FIRST_RULE = IR_LAST_OP, IR_RULES(IR_RULE_ENUM) IR_LAST_RULE }; #define IR_RULE_NAME(name) #name, const char *ir_rule_name[IR_LAST_OP] = { NULL, IR_RULES(IR_RULE_NAME) NULL }; static bool ir_may_fuse_addr(ir_ctx *ctx, const ir_insn *addr_insn) { if (sizeof(void*) == 4) { return 1; } else if (IR_IS_SYM_CONST(addr_insn->op)) { void *addr = ir_sym_addr(ctx, addr_insn); if (!addr) { return 0; } return IR_IS_SIGNED_32BIT((int64_t)(intptr_t)addr); } else { return IR_IS_SIGNED_32BIT(addr_insn->val.i64); } } static bool ir_may_fuse_imm(ir_ctx *ctx, const ir_insn *val_insn) { if (val_insn->type == IR_ADDR) { if (sizeof(void*) == 4) { return 1; } else if (IR_IS_SYM_CONST(val_insn->op)) { void *addr = ir_sym_addr(ctx, val_insn); if (!addr) { return 0; } return IR_IS_SIGNED_32BIT((intptr_t)addr); } else { return IR_IS_SIGNED_32BIT(val_insn->val.i64); } } else { return (ir_type_size[val_insn->type] <= 4 || IR_IS_SIGNED_32BIT(val_insn->val.i64)); } } /* register allocation */ static int ir_add_const_tmp_reg(ir_ctx *ctx, ir_ref ref, uint32_t num, int n, ir_target_constraints *constraints) { IR_ASSERT(IR_IS_CONST_REF(ref)); const ir_insn *val_insn = &ctx->ir_base[ref]; if (!ir_may_fuse_imm(ctx, val_insn)) { constraints->tmp_regs[n] = IR_TMP_REG(num, val_insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); n++; } return n; } int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *constraints) { uint32_t rule = ir_rule(ctx, ref); const ir_insn *insn; int n = 0; int flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; constraints->def_reg = IR_REG_NONE; constraints->hints_count = 0; switch (rule & IR_RULE_MASK) { case IR_BINOP_INT: insn = &ctx->ir_base[ref]; if (rule & IR_FUSED) { if (ctx->ir_base[insn->op1].op == IR_RLOAD) { flags = IR_OP1_MUST_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; } else { flags = IR_OP2_MUST_BE_IN_REG; } } else { flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; } if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { n = ir_add_const_tmp_reg(ctx, insn->op2, 2, n, constraints); } else if (ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) { constraints->tmp_regs[n] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); n++; } break; case IR_IMUL3: flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; break; case IR_SHIFT: if (rule & IR_FUSED) { flags = IR_OP2_MUST_BE_IN_REG; } else { flags = IR_DEF_REUSES_OP1_REG | IR_DEF_CONFLICTS_WITH_INPUT_REGS | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; } constraints->hints[1] = IR_REG_NONE; constraints->hints[2] = IR_REG_RCX; constraints->hints_count = 3; constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_RCX, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); n = 1; break; case IR_MUL_INT: /* %rax - used as input and result */ constraints->def_reg = IR_REG_RAX; constraints->hints[1] = IR_REG_RAX; constraints->hints_count = 2; flags = IR_USE_SHOULD_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_RDX, IR_USE_SUB_REF, IR_DEF_SUB_REF); constraints->tmp_regs[1] = IR_SCRATCH_REG(IR_REG_RAX, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); n = 2; break; case IR_DIV_INT: /* %rax - used as input and result */ constraints->def_reg = IR_REG_RAX; constraints->hints[1] = IR_REG_RAX; constraints->hints_count = 2; flags = IR_USE_SHOULD_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_RDX, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); constraints->tmp_regs[1] = IR_SCRATCH_REG(IR_REG_RAX, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); n = 2; goto op2_const; case IR_MOD_INT: constraints->def_reg = IR_REG_RDX; constraints->hints[1] = IR_REG_RAX; constraints->hints_count = 2; flags = IR_USE_SHOULD_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_RAX, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); constraints->tmp_regs[1] = IR_SCRATCH_REG(IR_REG_RDX, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); n = 2; goto op2_const; case IR_MIN_MAX_INT: flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_MUST_BE_IN_REG; op2_const: insn = &ctx->ir_base[ref]; if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); n++; } break; case IR_CMP_INT: case IR_TEST_INT: insn = &ctx->ir_base[ref]; flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; if (IR_IS_CONST_REF(insn->op1)) { const ir_insn *val_insn = &ctx->ir_base[insn->op1]; constraints->tmp_regs[0] = IR_TMP_REG(1, val_insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); n = 1; } else if (ir_rule(ctx, insn->op1) == IR_STATIC_ALLOCA) { constraints->tmp_regs[0] = IR_TMP_REG(1, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); n = 1; } else if (ir_rule(ctx, insn->op1) & IR_FUSED) { flags = IR_USE_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG; } if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; n = ir_add_const_tmp_reg(ctx, insn->op2, 2, n, constraints); } else if (ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) { constraints->tmp_regs[n] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); n++; } break; case IR_CMP_FP: insn = &ctx->ir_base[ref]; if (!(rule & IR_FUSED)) { constraints->tmp_regs[0] = IR_TMP_REG(3, IR_BOOL, IR_DEF_SUB_REF, IR_SAVE_SUB_REF); n = 1; } flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; if (IR_IS_CONST_REF(insn->op1)) { const ir_insn *val_insn = &ctx->ir_base[insn->op1]; constraints->tmp_regs[n] = IR_TMP_REG(1, val_insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); n++; } break; case IR_BINOP_AVX: flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; insn = &ctx->ir_base[ref]; if (IR_IS_CONST_REF(insn->op1)) { constraints->tmp_regs[0] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); n = 1; } break; case IR_COND: insn = &ctx->ir_base[ref]; if (!IR_IS_TYPE_INT(ctx->ir_base[insn->op1].type)) { flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG | IR_OP3_SHOULD_BE_IN_REG; break; } IR_FALLTHROUGH; case IR_COND_CMP_INT: insn = &ctx->ir_base[ref]; if (IR_IS_TYPE_INT(insn->type)) { if (IR_IS_CONST_REF(insn->op3) || ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA) { flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; constraints->tmp_regs[0] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); n = 1; } else if (IR_IS_CONST_REF(insn->op2) || ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) { flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; constraints->tmp_regs[0] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); n = 1; } else { flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; } } else { flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG | IR_OP3_SHOULD_BE_IN_REG; } break; case IR_COND_CMP_FP: flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG | IR_OP3_SHOULD_BE_IN_REG; break; case IR_VSTORE_INT: flags = IR_OP3_MUST_BE_IN_REG; insn = &ctx->ir_base[ref]; if (IR_IS_CONST_REF(insn->op3)) { n = ir_add_const_tmp_reg(ctx, insn->op3, 3, n, constraints); } else if (ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA) { constraints->tmp_regs[n] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); n++; } break; case IR_STORE_INT: flags = IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; insn = &ctx->ir_base[ref]; if (IR_IS_CONST_REF(insn->op2)) { n = ir_add_const_tmp_reg(ctx, insn->op2, 2, n, constraints); } if (IR_IS_CONST_REF(insn->op3)) { n = ir_add_const_tmp_reg(ctx, insn->op3, 3, n, constraints); } else if (ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA) { constraints->tmp_regs[n] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); n++; } break; case IR_VSTORE_FP: flags = IR_OP3_MUST_BE_IN_REG; insn = &ctx->ir_base[ref]; if (IR_IS_CONST_REF(insn->op3)) { insn = &ctx->ir_base[insn->op3]; constraints->tmp_regs[0] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); n = 1; } break; case IR_LOAD_FP: case IR_LOAD_INT: case IR_MEM_OP_INT: case IR_MEM_INC: case IR_MEM_DEC: case IR_MEM_MUL_PWR2: case IR_MEM_DIV_PWR2: case IR_MEM_MOD_PWR2: case IR_MEM_BINOP_INT: case IR_MEM_SHIFT: case IR_MEM_SHIFT_CONST: case IR_CMP_AND_STORE_INT: flags = IR_USE_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG; insn = &ctx->ir_base[ref]; if (IR_IS_CONST_REF(insn->op2)) { n = ir_add_const_tmp_reg(ctx, insn->op2, 2, n, constraints); } break; case IR_STORE_FP: flags = IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; insn = &ctx->ir_base[ref]; if (IR_IS_CONST_REF(insn->op2)) { n = ir_add_const_tmp_reg(ctx, insn->op2, 2, n, constraints); } if (IR_IS_CONST_REF(insn->op3)) { insn = &ctx->ir_base[insn->op3]; constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); n++; } break; case IR_SWITCH: flags = IR_OP2_MUST_BE_IN_REG; insn = &ctx->ir_base[ref]; if (IR_IS_CONST_REF(insn->op2)) { insn = &ctx->ir_base[insn->op2]; constraints->tmp_regs[0] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); n = 1; } /* we need a temporary regeset in case MIN CASE value is not zero or some CASE VAL can't fit into 32-bit */ constraints->tmp_regs[n] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); n++; break; case IR_CALL: insn = &ctx->ir_base[ref]; if (IR_IS_TYPE_INT(insn->type)) { constraints->def_reg = IR_REG_INT_RET1; #ifdef IR_REG_FP_RET1 } else { constraints->def_reg = IR_REG_FP_RET1; #endif } constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_SCRATCH, IR_USE_SUB_REF, IR_DEF_SUB_REF); n = 1; IR_FALLTHROUGH; case IR_TAILCALL: insn = &ctx->ir_base[ref]; if (insn->inputs_count > 2) { constraints->hints[2] = IR_REG_NONE; constraints->hints_count = ir_get_args_regs(ctx, insn, constraints->hints); if (!IR_IS_CONST_REF(insn->op2)) { constraints->tmp_regs[n] = IR_TMP_REG(1, IR_ADDR, IR_LOAD_SUB_REF, IR_USE_SUB_REF); n++; } } flags = IR_USE_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG | IR_OP3_SHOULD_BE_IN_REG; break; case IR_BINOP_SSE2: flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; break; case IR_SHIFT_CONST: case IR_INC: case IR_DEC: case IR_MUL_PWR2: case IR_DIV_PWR2: case IR_OP_INT: case IR_OP_FP: flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; break; case IR_MOD_PWR2: flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; insn = &ctx->ir_base[ref]; if (ir_type_size[insn->type] == 8) { int64_t offset = ctx->ir_base[insn->op2].val.u64 - 1; if (!IR_IS_SIGNED_32BIT(offset)) { constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); n++; } } break; case IR_SMOD_PWR2: flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; insn = &ctx->ir_base[ref]; if (ir_type_size[insn->type] == 8) { int64_t offset = ctx->ir_base[insn->op2].val.u64 - 1; if (!IR_IS_SIGNED_32BIT(offset)) { constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); n++; } } constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_USE_SUB_REF, IR_SAVE_SUB_REF); n++; break; case IR_SDIV_PWR2: flags = IR_DEF_CONFLICTS_WITH_INPUT_REGS | IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG; insn = &ctx->ir_base[ref]; if (ir_type_size[insn->type] == 8) { int64_t offset = ctx->ir_base[insn->op2].val.u64 - 1; if (!IR_IS_SIGNED_32BIT(offset)) { constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); n++; } } break; case IR_BIT_COUNT: insn = &ctx->ir_base[ref]; if (ir_type_size[insn->type] == 1) { flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG; } else { flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; } if (IR_IS_CONST_REF(insn->op1)) { constraints->tmp_regs[0] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); n = 1; } break; case IR_CTPOP: flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; insn = &ctx->ir_base[ref]; constraints->tmp_regs[0] = IR_TMP_REG(2, insn->type, IR_USE_SUB_REF, IR_SAVE_SUB_REF); n = 1; if (ir_type_size[insn->type] == 8) { constraints->tmp_regs[1] = IR_TMP_REG(3, insn->type, IR_USE_SUB_REF, IR_SAVE_SUB_REF); n = 2; } break; case IR_COPY_INT: case IR_COPY_FP: case IR_SEXT: case IR_ZEXT: case IR_TRUNC: case IR_BITCAST: case IR_PROTO: case IR_FP2FP: flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; break; case IR_ABS_INT: flags = IR_DEF_CONFLICTS_WITH_INPUT_REGS | IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG; break; case IR_PARAM: constraints->def_reg = ir_get_param_reg(ctx, ref); flags = 0; break; case IR_PI: case IR_PHI: flags = IR_USE_SHOULD_BE_IN_REG; break; case IR_RLOAD: constraints->def_reg = ctx->ir_base[ref].op2; flags = IR_USE_SHOULD_BE_IN_REG; break; case IR_EXITCALL: flags = IR_USE_MUST_BE_IN_REG; constraints->def_reg = IR_REG_INT_RET1; break; case IR_IF_INT: case IR_GUARD: case IR_GUARD_NOT: flags = IR_OP2_SHOULD_BE_IN_REG; break; case IR_IJMP: flags = IR_OP2_SHOULD_BE_IN_REG; break; case IR_RSTORE: flags = IR_OP3_SHOULD_BE_IN_REG; break; case IR_RETURN_INT: flags = IR_OP2_SHOULD_BE_IN_REG; constraints->hints[2] = IR_REG_INT_RET1; constraints->hints_count = 3; break; case IR_RETURN_FP: #ifdef IR_REG_FP_RET1 flags = IR_OP2_SHOULD_BE_IN_REG; constraints->hints[2] = IR_REG_FP_RET1; constraints->hints_count = 3; #endif break; case IR_SNAPSHOT: flags = 0; break; case IR_VA_START: flags = IR_OP2_MUST_BE_IN_REG; constraints->tmp_regs[0] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); n = 1; break; case IR_VA_ARG: flags = IR_USE_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG; constraints->tmp_regs[0] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); n = 1; break; case IR_VA_COPY: flags = IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; constraints->tmp_regs[0] = IR_TMP_REG(1, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); n = 1; break; } constraints->tmps_count = n; return flags; } /* instruction selection */ static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref); static bool ir_match_try_fuse_load(ir_ctx *ctx, ir_ref ref, ir_ref root); static void ir_swap_ops(ir_insn *insn) { SWAP_REFS(insn->op1, insn->op2); } static bool ir_match_try_revert_lea_to_add(ir_ctx *ctx, ir_ref ref) { ir_insn *insn = &ctx->ir_base[ref]; /* TODO: This optimization makes sense only if the other operand is killed */ if (insn->op1 == insn->op2) { /* pass */ } else if (ir_match_try_fuse_load(ctx, insn->op2, ref)) { ctx->rules[ref] = IR_BINOP_INT | IR_MAY_SWAP; return 1; } else if (ir_match_try_fuse_load(ctx, insn->op1, ref)) { /* swap for better load fusion */ ir_swap_ops(insn); ctx->rules[ref] = IR_BINOP_INT | IR_MAY_SWAP; return 1; } return 0; } static void ir_match_fuse_addr(ir_ctx *ctx, ir_ref addr_ref) { if (!IR_IS_CONST_REF(addr_ref)) { uint32_t rule = ctx->rules[addr_ref]; if (!rule) { ctx->rules[addr_ref] = rule = ir_match_insn(ctx, addr_ref); } if (rule >= IR_LEA_OB && rule <= IR_LEA_SI_B) { ir_use_list *use_list; ir_ref j; if (rule == IR_LEA_IB && ir_match_try_revert_lea_to_add(ctx, addr_ref)) { return; } use_list = &ctx->use_lists[addr_ref]; j = use_list->count; if (j > 1) { /* check if address is used only in LOAD and STORE */ ir_ref *p = &ctx->use_edges[use_list->refs]; do { ir_insn *insn = &ctx->ir_base[*p]; if (insn->op != IR_LOAD && (insn->op != IR_STORE || insn->op3 == addr_ref)) { return; } p++; } while (--j); } ctx->rules[addr_ref] = IR_FUSED | IR_SIMPLE | rule; } } } static bool ir_match_may_fuse_SI(ir_ctx *ctx, ir_ref ref, ir_ref use) { ir_insn *op2_insn, *insn = &ctx->ir_base[use]; if (insn->op == IR_ADD) { if (insn->op1 == ref) { if (IR_IS_CONST_REF(insn->op2)) { op2_insn = &ctx->ir_base[insn->op2]; if (IR_IS_SYM_CONST(op2_insn->op)) { if (ir_may_fuse_addr(ctx, op2_insn)) { return 1; // LEA_SI_O } } else if (IR_IS_SIGNED_32BIT(op2_insn->val.i64)) { return 1; // LEA_SI_O } } else if (insn->op2 != ref) { return 1; // LEA_SI_B or LEA_SI_OB } } else if (insn->op2 == ref && insn->op1 != insn->op2) { return 1; // LEA_B_SI or LEA_OB_SI } } return 0; } static bool ir_match_fuse_addr_all_useges(ir_ctx *ctx, ir_ref ref) { uint32_t rule = ctx->rules[ref]; ir_use_list *use_list; ir_ref n, *p, use; if (rule == (IR_FUSED | IR_SIMPLE | IR_LEA_SI)) { return 1; } else if (!rule) { ir_insn *insn = &ctx->ir_base[ref]; IR_ASSERT(IR_IS_TYPE_INT(insn->type) && ir_type_size[insn->type] >= 4); if (insn->op == IR_MUL && IR_IS_CONST_REF(insn->op2)) { insn = &ctx->ir_base[insn->op2]; if (!IR_IS_SYM_CONST(insn->op) && (insn->val.u64 == 2 || insn->val.u64 == 4 || insn->val.u64 == 8)) { ctx->rules[ref] = IR_LEA_SI; use_list = &ctx->use_lists[ref]; n = use_list->count; IR_ASSERT(n > 1); p = &ctx->use_edges[use_list->refs]; for (; n > 0; p++, n--) { use = *p; if (!ir_match_may_fuse_SI(ctx, ref, use)) { return 0; } } return 1; } } } return 0; } /* A naive check if there is a STORE or CALL between this LOAD and the fusion root */ static bool ir_match_has_mem_deps(ir_ctx *ctx, ir_ref ref, ir_ref root) { if (ref + 1 != root) { ir_ref pos = ctx->prev_ref[root]; do { ir_insn *insn = &ctx->ir_base[pos]; if (insn->op == IR_STORE) { // TODO: check if LOAD and STORE addresses may alias return 1; } else if (insn->op == IR_CALL) { return 1; } pos = ctx->prev_ref[pos]; } while (ref != pos); } return 0; } static void ir_match_fuse_load(ir_ctx *ctx, ir_ref ref, ir_ref root) { if (ir_in_same_block(ctx, ref) && ctx->ir_base[ref].op == IR_LOAD) { if (ctx->use_lists[ref].count == 2 && !ir_match_has_mem_deps(ctx, ref, root)) { ir_ref addr_ref = ctx->ir_base[ref].op2; ir_insn *addr_insn = &ctx->ir_base[addr_ref]; if (IR_IS_CONST_REF(addr_ref)) { if (ir_may_fuse_addr(ctx, addr_insn)) { ctx->rules[ref] = IR_FUSED | IR_SIMPLE | IR_LOAD; return; } } else { ctx->rules[ref] = IR_FUSED | IR_SIMPLE | IR_LOAD; ir_match_fuse_addr(ctx, addr_ref); return; } } } } static bool ir_match_try_fuse_load(ir_ctx *ctx, ir_ref ref, ir_ref root) { ir_insn *insn = &ctx->ir_base[ref]; if (ir_in_same_block(ctx, ref) && insn->op == IR_LOAD) { if (ctx->use_lists[ref].count == 2 && !ir_match_has_mem_deps(ctx, ref, root)) { ir_ref addr_ref = ctx->ir_base[ref].op2; ir_insn *addr_insn = &ctx->ir_base[addr_ref]; if (IR_IS_CONST_REF(addr_ref)) { if (ir_may_fuse_addr(ctx, addr_insn)) { ctx->rules[ref] = IR_FUSED | IR_SIMPLE | IR_LOAD; return 1; } } else { ctx->rules[ref] = IR_FUSED | IR_SIMPLE | IR_LOAD; ir_match_fuse_addr(ctx, addr_ref); return 1; } } } else if (insn->op == IR_PARAM) { if (ctx->use_lists[ref].count == 1 && ir_get_param_reg(ctx, ref) == IR_REG_NONE) { return 1; } } else if (ctx->ir_base[ref].op == IR_VLOAD) { return 1; } return 0; } static void ir_match_fuse_load_commutative_int(ir_ctx *ctx, ir_insn *insn, ir_ref root) { if (IR_IS_CONST_REF(insn->op2) && ir_may_fuse_imm(ctx, &ctx->ir_base[insn->op2])) { return; } else if (ir_match_try_fuse_load(ctx, insn->op2, root)) { return; } else if (ir_match_try_fuse_load(ctx, insn->op1, root)) { ir_swap_ops(insn); } } static void ir_match_fuse_load_commutative_fp(ir_ctx *ctx, ir_insn *insn, ir_ref root) { if (!IR_IS_CONST_REF(insn->op2) && !ir_match_try_fuse_load(ctx, insn->op2, root) && (IR_IS_CONST_REF(insn->op1) || ir_match_try_fuse_load(ctx, insn->op1, root))) { ir_swap_ops(insn); } } static void ir_match_fuse_load_cmp_int(ir_ctx *ctx, ir_insn *insn, ir_ref root) { if (IR_IS_CONST_REF(insn->op2) && ir_may_fuse_imm(ctx, &ctx->ir_base[insn->op2])) { ir_match_fuse_load(ctx, insn->op1, root); } else if (!ir_match_try_fuse_load(ctx, insn->op2, root) && ir_match_try_fuse_load(ctx, insn->op1, root)) { ir_swap_ops(insn); if (insn->op != IR_EQ && insn->op != IR_NE) { insn->op ^= 3; } } } static void ir_match_fuse_load_test_int(ir_ctx *ctx, ir_insn *insn, ir_ref root) { if (IR_IS_CONST_REF(insn->op2) && ir_may_fuse_imm(ctx, &ctx->ir_base[insn->op2])) { ir_match_fuse_load(ctx, insn->op1, root); } else if (!ir_match_try_fuse_load(ctx, insn->op2, root) && ir_match_try_fuse_load(ctx, insn->op1, root)) { ir_swap_ops(insn); } } static void ir_match_fuse_load_cmp_fp(ir_ctx *ctx, ir_insn *insn, ir_ref root) { if (insn->op != IR_EQ && insn->op != IR_NE) { if (insn->op == IR_LT || insn->op == IR_LE) { /* swap operands to avoid P flag check */ ir_swap_ops(insn); insn->op ^= 3; } ir_match_fuse_load(ctx, insn->op2, root); } else if (IR_IS_CONST_REF(insn->op2) && !IR_IS_FP_ZERO(ctx->ir_base[insn->op2])) { /* pass */ } else if (ir_match_try_fuse_load(ctx, insn->op2, root)) { /* pass */ } else if ((IR_IS_CONST_REF(insn->op1) && !IR_IS_FP_ZERO(ctx->ir_base[insn->op1])) || ir_match_try_fuse_load(ctx, insn->op1, root)) { ir_swap_ops(insn); if (insn->op != IR_EQ && insn->op != IR_NE) { insn->op ^= 3; } } } static void ir_match_fuse_load_cmp_fp_br(ir_ctx *ctx, ir_insn *insn, ir_ref root, bool direct) { if (direct) { if (insn->op == IR_LT || insn->op == IR_LE) { /* swap operands to avoid P flag check */ ir_swap_ops(insn); insn->op ^= 3; } } else { if (insn->op == IR_GT || insn->op == IR_GE) { /* swap operands to avoid P flag check */ ir_swap_ops(insn); insn->op ^= 3; } } if (IR_IS_CONST_REF(insn->op2) && !IR_IS_FP_ZERO(ctx->ir_base[insn->op2])) { /* pass */ } else if (ir_match_try_fuse_load(ctx, insn->op2, root)) { /* pass */ } else if ((IR_IS_CONST_REF(insn->op1) && !IR_IS_FP_ZERO(ctx->ir_base[insn->op1])) || ir_match_try_fuse_load(ctx, insn->op1, root)) { ir_swap_ops(insn); if (insn->op != IR_EQ && insn->op != IR_NE) { insn->op ^= 3; } } } #define STR_EQUAL(name, name_len, str) (name_len == strlen(str) && memcmp(name, str, strlen(str)) == 0) #define IR_IS_FP_FUNC_1(proto, _type) (proto->params_count == 1 && \ proto->param_types[0] == _type && \ proto->ret_type == _type) static uint32_t ir_match_builtin_call(ir_ctx *ctx, const ir_insn *func) { const ir_proto_t *proto = (const ir_proto_t *)ir_get_str(ctx, func->proto); if (proto->flags & IR_BUILTIN_FUNC) { size_t name_len; const char *name = ir_get_strl(ctx, func->val.name, &name_len); if (STR_EQUAL(name, name_len, "sqrt")) { if (IR_IS_FP_FUNC_1(proto, IR_DOUBLE)) { return IR_SSE_SQRT; } } else if (STR_EQUAL(name, name_len, "sqrtf")) { if (IR_IS_FP_FUNC_1(proto, IR_FLOAT)) { return IR_SSE_SQRT; } } else if (STR_EQUAL(name, name_len, "rint")) { if (IR_IS_FP_FUNC_1(proto, IR_DOUBLE)) { return IR_SSE_RINT; } } else if (STR_EQUAL(name, name_len, "rintf")) { if (IR_IS_FP_FUNC_1(proto, IR_FLOAT)) { return IR_SSE_RINT; } } else if (STR_EQUAL(name, name_len, "floor")) { if (IR_IS_FP_FUNC_1(proto, IR_DOUBLE)) { return IR_SSE_FLOOR; } } else if (STR_EQUAL(name, name_len, "floorf")) { if (IR_IS_FP_FUNC_1(proto, IR_FLOAT)) { return IR_SSE_FLOOR; } } else if (STR_EQUAL(name, name_len, "ceil")) { if (IR_IS_FP_FUNC_1(proto, IR_DOUBLE)) { return IR_SSE_CEIL; } } else if (STR_EQUAL(name, name_len, "ceilf")) { if (IR_IS_FP_FUNC_1(proto, IR_FLOAT)) { return IR_SSE_CEIL; } } else if (STR_EQUAL(name, name_len, "trunc")) { if (IR_IS_FP_FUNC_1(proto, IR_DOUBLE)) { return IR_SSE_TRUNC; } } else if (STR_EQUAL(name, name_len, "truncf")) { if (IR_IS_FP_FUNC_1(proto, IR_FLOAT)) { return IR_SSE_TRUNC; } } else if (STR_EQUAL(name, name_len, "nearbyint")) { if (IR_IS_FP_FUNC_1(proto, IR_DOUBLE)) { return IR_SSE_NEARBYINT; } } else if (STR_EQUAL(name, name_len, "nearbyintf")) { if (IR_IS_FP_FUNC_1(proto, IR_FLOAT)) { return IR_SSE_NEARBYINT; } } } return 0; } static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref) { ir_insn *op2_insn; ir_insn *insn = &ctx->ir_base[ref]; uint32_t store_rule; ir_op load_op; switch (insn->op) { case IR_EQ: case IR_NE: case IR_LT: case IR_GE: case IR_LE: case IR_GT: case IR_ULT: case IR_UGE: case IR_ULE: case IR_UGT: if (IR_IS_TYPE_INT(ctx->ir_base[insn->op1].type)) { if (IR_IS_CONST_REF(insn->op2) && !IR_IS_SYM_CONST(ctx->ir_base[insn->op2].op) && ctx->ir_base[insn->op2].val.i64 == 0 && insn->op1 == ref - 1) { /* previous instruction */ ir_insn *op1_insn = &ctx->ir_base[insn->op1]; if (op1_insn->op == IR_AND && ctx->use_lists[insn->op1].count == 1) { /* v = AND(_, _); CMP(v, 0) => SKIP_TEST; TEST */ ir_match_fuse_load_test_int(ctx, op1_insn, ref); ctx->rules[insn->op1] = IR_FUSED | IR_TEST_INT; return IR_TESTCC_INT; } else if ((op1_insn->op == IR_OR || op1_insn->op == IR_AND || op1_insn->op == IR_XOR) || /* GT(ADD(_, _), 0) can't be optimized because ADD may overflow */ ((op1_insn->op == IR_ADD || op1_insn->op == IR_SUB) && (insn->op == IR_EQ || insn->op == IR_NE))) { /* v = BINOP(_, _); CMP(v, 0) => BINOP; SETCC */ if (ir_op_flags[op1_insn->op] & IR_OP_FLAG_COMMUTATIVE) { ir_match_fuse_load_commutative_int(ctx, op1_insn, ref); ctx->rules[insn->op1] = IR_BINOP_INT | IR_MAY_SWAP; } else { ir_match_fuse_load(ctx, op1_insn->op2, ref); ctx->rules[insn->op1] = IR_BINOP_INT; } return IR_SETCC_INT; } } ir_match_fuse_load_cmp_int(ctx, insn, ref); return IR_CMP_INT; } else { ir_match_fuse_load_cmp_fp(ctx, insn, ref); return IR_CMP_FP; } break; case IR_ADD: case IR_SUB: if (IR_IS_TYPE_INT(insn->type)) { if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { op2_insn = &ctx->ir_base[insn->op2]; if (IR_IS_CONST_REF(insn->op1)) { // const // TODO: add support for sym+offset ??? } else if (IR_IS_SYM_CONST(op2_insn->op)) { if (insn->op == IR_ADD && ir_may_fuse_addr(ctx, op2_insn)) { goto lea; } /* pass */ } else if (op2_insn->val.i64 == 0) { // return IR_COPY_INT; } else if ((ir_type_size[insn->type] >= 4 && insn->op == IR_ADD && IR_IS_SIGNED_32BIT(op2_insn->val.i64)) || (ir_type_size[insn->type] >= 4 && insn->op == IR_SUB && IR_IS_SIGNED_NEG_32BIT(op2_insn->val.i64))) { lea: if (ctx->use_lists[insn->op1].count == 1 || ir_match_fuse_addr_all_useges(ctx, insn->op1)) { uint32_t rule = ctx->rules[insn->op1]; if (!rule) { ctx->rules[insn->op1] = rule = ir_match_insn(ctx, insn->op1); } if (rule == IR_LEA_SI || rule == (IR_FUSED | IR_SIMPLE | IR_LEA_SI)) { /* z = MUL(Y, 2|4|8) ... ADD(z, imm32) => SKIP ... LEA [Y*2|4|8+im32] */ ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_SI; return IR_LEA_SI_O; } else if (rule == IR_LEA_SIB) { /* z = ADD(X, MUL(Y, 2|4|8)) ... ADD(z, imm32) => SKIP ... LEA [X+Y*2|4|8+im32] */ ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_SIB; return IR_LEA_SIB_O; } else if (rule == IR_LEA_IB) { /* z = ADD(X, Y) ... ADD(z, imm32) => SKIP ... LEA [X+Y+im32] */ ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_IB; return IR_LEA_IB_O; } } /* ADD(X, imm32) => LEA [X+imm32] */ return IR_LEA_OB; } else if (op2_insn->val.i64 == 1 || op2_insn->val.i64 == -1) { if (insn->op == IR_ADD) { if (op2_insn->val.i64 == 1) { /* ADD(_, 1) => INC */ return IR_INC; } else { /* ADD(_, -1) => DEC */ return IR_DEC; } } else { if (op2_insn->val.i64 == 1) { /* SUB(_, 1) => DEC */ return IR_DEC; } else { /* SUB(_, -1) => INC */ return IR_INC; } } } } else if ((ctx->flags & IR_OPT_CODEGEN) && insn->op == IR_ADD && ir_type_size[insn->type] >= 4) { if (insn->op1 != insn->op2) { if (ctx->use_lists[insn->op1].count == 1 || ir_match_fuse_addr_all_useges(ctx, insn->op1)) { uint32_t rule =ctx->rules[insn->op1]; if (!rule) { ctx->rules[insn->op1] = rule = ir_match_insn(ctx, insn->op1); } if (rule == IR_LEA_OB) { ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_OB; if (ctx->use_lists[insn->op2].count == 1 || ir_match_fuse_addr_all_useges(ctx, insn->op2)) { rule = ctx->rules[insn->op2]; if (!rule) { ctx->rules[insn->op2] = rule = ir_match_insn(ctx, insn->op2); } if (rule == IR_LEA_SI || rule == (IR_FUSED | IR_SIMPLE | IR_LEA_SI)) { /* x = ADD(X, imm32) ... y = MUL(Y, 2|4|8) ... ADD(x, y) => SKIP ... SKIP ... LEA */ ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_LEA_SI; return IR_LEA_OB_SI; } } /* x = ADD(X, imm32) ... ADD(x, Y) => SKIP ... LEA */ return IR_LEA_OB_I; } else if (rule == IR_LEA_SI || rule == (IR_FUSED | IR_SIMPLE | IR_LEA_SI)) { ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_SI; if (ctx->use_lists[insn->op2].count == 1) { rule = ctx->rules[insn->op2]; if (!rule) { ctx->rules[insn->op2] = rule = ir_match_insn(ctx, insn->op2); } if (rule == IR_LEA_OB) { /* x = ADD(X, imm32) ... y = MUL(Y, 2|4|8) ... ADD(y, x) => SKIP ... SKIP ... LEA */ ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_LEA_OB; return IR_LEA_SI_OB; } } /* x = MUL(X, 2|4|8) ... ADD(x, Y) => SKIP ... LEA */ return IR_LEA_SI_B; } } if (ctx->use_lists[insn->op2].count == 1 || ir_match_fuse_addr_all_useges(ctx, insn->op2)) { uint32_t rule = ctx->rules[insn->op2]; if (!rule) { ctx->rules[insn->op2] = rule = ir_match_insn(ctx, insn->op2); } if (rule == IR_LEA_OB) { ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_LEA_OB; /* x = ADD(X, imm32) ... ADD(Y, x) => SKIP ... LEA */ return IR_LEA_I_OB; } else if (rule == IR_LEA_SI || rule == (IR_FUSED | IR_SIMPLE | IR_LEA_SI)) { ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_LEA_SI; /* x = MUL(X, 2|4|8) ... ADD(Y, x) => SKIP ... LEA */ return IR_LEA_B_SI; } } } /* ADD(X, Y) => LEA [X + Y] */ return IR_LEA_IB; } binop_int: if (ir_op_flags[insn->op] & IR_OP_FLAG_COMMUTATIVE) { ir_match_fuse_load_commutative_int(ctx, insn, ref); return IR_BINOP_INT | IR_MAY_SWAP; } else { ir_match_fuse_load(ctx, insn->op2, ref); return IR_BINOP_INT; } } else { binop_fp: if (ir_op_flags[insn->op] & IR_OP_FLAG_COMMUTATIVE) { ir_match_fuse_load_commutative_fp(ctx, insn, ref); if (ctx->mflags & IR_X86_AVX) { return IR_BINOP_AVX; } else { return IR_BINOP_SSE2 | IR_MAY_SWAP; } } else { ir_match_fuse_load(ctx, insn->op2, ref); if (ctx->mflags & IR_X86_AVX) { return IR_BINOP_AVX; } else { return IR_BINOP_SSE2; } } } break; case IR_MUL: if (IR_IS_TYPE_INT(insn->type)) { if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { op2_insn = &ctx->ir_base[insn->op2]; if (IR_IS_SYM_CONST(op2_insn->op)) { /* pass */ } else if (IR_IS_CONST_REF(insn->op1)) { // const } else if (op2_insn->val.u64 == 0) { // 0 } else if (op2_insn->val.u64 == 1) { // return IR_COPY_INT; } else if (ir_type_size[insn->type] >= 4 && (op2_insn->val.u64 == 2 || op2_insn->val.u64 == 4 || op2_insn->val.u64 == 8)) { /* MUL(X, 2|4|8) => LEA [X*2|4|8] */ return IR_LEA_SI; } else if (ir_type_size[insn->type] >= 4 && (op2_insn->val.u64 == 3 || op2_insn->val.u64 == 5 || op2_insn->val.u64 == 9)) { /* MUL(X, 3|5|9) => LEA [X+X*2|4|8] */ return IR_LEA_SIB; } else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) { /* MUL(X, PWR2) => SHL */ return IR_MUL_PWR2; } else if (IR_IS_TYPE_SIGNED(insn->type) && ir_type_size[insn->type] != 1 && IR_IS_SIGNED_32BIT(op2_insn->val.i64) && !IR_IS_CONST_REF(insn->op1)) { /* MUL(_, imm32) => IMUL */ ir_match_fuse_load(ctx, insn->op1, ref); return IR_IMUL3; } } /* Prefer IMUL over MUL because it's more flexible and uses less registers ??? */ // if (IR_IS_TYPE_SIGNED(insn->type) && ir_type_size[insn->type] != 1) { if (ir_type_size[insn->type] != 1) { goto binop_int; } ir_match_fuse_load(ctx, insn->op2, ref); return IR_MUL_INT; } else { goto binop_fp; } break; case IR_ADD_OV: case IR_SUB_OV: IR_ASSERT(IR_IS_TYPE_INT(insn->type)); goto binop_int; case IR_MUL_OV: IR_ASSERT(IR_IS_TYPE_INT(insn->type)); if (IR_IS_TYPE_SIGNED(insn->type) && ir_type_size[insn->type] != 1) { if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { op2_insn = &ctx->ir_base[insn->op2]; if (!IR_IS_SYM_CONST(op2_insn->op) && IR_IS_SIGNED_32BIT(op2_insn->val.i64) && !IR_IS_CONST_REF(insn->op1)) { /* MUL(_, imm32) => IMUL */ ir_match_fuse_load(ctx, insn->op1, ref); return IR_IMUL3; } } goto binop_int; } ir_match_fuse_load(ctx, insn->op2, ref); return IR_MUL_INT; case IR_DIV: if (IR_IS_TYPE_INT(insn->type)) { if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { op2_insn = &ctx->ir_base[insn->op2]; if (IR_IS_SYM_CONST(op2_insn->op)) { /* pass */ } else if (IR_IS_CONST_REF(insn->op1)) { // const } else if (op2_insn->val.u64 == 1) { // return IR_COPY_INT; } else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) { /* DIV(X, PWR2) => SHR */ if (IR_IS_TYPE_UNSIGNED(insn->type)) { return IR_DIV_PWR2; } else { return IR_SDIV_PWR2; } } } ir_match_fuse_load(ctx, insn->op2, ref); return IR_DIV_INT; } else { goto binop_fp; } break; case IR_MOD: if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { op2_insn = &ctx->ir_base[insn->op2]; if (IR_IS_SYM_CONST(op2_insn->op)) { /* pass */ } else if (IR_IS_CONST_REF(insn->op1)) { // const } else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) { /* MOD(X, PWR2) => AND */ if (IR_IS_TYPE_UNSIGNED(insn->type)) { return IR_MOD_PWR2; } else { return IR_SMOD_PWR2; } } } ir_match_fuse_load(ctx, insn->op2, ref); return IR_MOD_INT; case IR_BSWAP: case IR_NOT: if (insn->type == IR_BOOL) { IR_ASSERT(IR_IS_TYPE_INT(ctx->ir_base[insn->op1].type)); // TODO: IR_BOOL_NOT_FP return IR_BOOL_NOT_INT; } else { IR_ASSERT(IR_IS_TYPE_INT(insn->type)); return IR_OP_INT; } break; case IR_NEG: if (IR_IS_TYPE_INT(insn->type)) { return IR_OP_INT; } else { return IR_OP_FP; } case IR_ABS: if (IR_IS_TYPE_INT(insn->type)) { return IR_ABS_INT; // movl %edi, %eax; negl %eax; cmovs %edi, %eax } else { return IR_OP_FP; } case IR_OR: if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { op2_insn = &ctx->ir_base[insn->op2]; if (IR_IS_SYM_CONST(op2_insn->op)) { /* pass */ } else if (IR_IS_CONST_REF(insn->op1)) { // const } else if (op2_insn->val.i64 == 0) { // return IR_COPY_INT; } else if (op2_insn->val.i64 == -1) { // -1 } } goto binop_int; case IR_AND: if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { op2_insn = &ctx->ir_base[insn->op2]; if (IR_IS_SYM_CONST(op2_insn->op)) { /* pass */ } else if (IR_IS_CONST_REF(insn->op1)) { // const } else if (op2_insn->val.i64 == 0) { // 0 } else if (op2_insn->val.i64 == -1) { // return IR_COPY_INT; } } goto binop_int; case IR_XOR: if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { op2_insn = &ctx->ir_base[insn->op2]; if (IR_IS_SYM_CONST(op2_insn->op)) { /* pass */ } else if (IR_IS_CONST_REF(insn->op1)) { // const } } goto binop_int; case IR_SHL: if (IR_IS_CONST_REF(insn->op2)) { if (ctx->flags & IR_OPT_CODEGEN) { op2_insn = &ctx->ir_base[insn->op2]; if (IR_IS_SYM_CONST(op2_insn->op)) { /* pass */ } else if (IR_IS_CONST_REF(insn->op1)) { // const } else if (op2_insn->val.u64 == 0) { // return IR_COPY_INT; } else if (ir_type_size[insn->type] >= 4) { if (op2_insn->val.u64 == 1) { // lea [op1*2] } else if (op2_insn->val.u64 == 2) { // lea [op1*4] } else if (op2_insn->val.u64 == 3) { // lea [op1*8] } } } return IR_SHIFT_CONST; } return IR_SHIFT; case IR_SHR: case IR_SAR: case IR_ROL: case IR_ROR: if (IR_IS_CONST_REF(insn->op2)) { if (ctx->flags & IR_OPT_CODEGEN) { op2_insn = &ctx->ir_base[insn->op2]; if (IR_IS_SYM_CONST(op2_insn->op)) { /* pass */ } else if (IR_IS_CONST_REF(insn->op1)) { // const } else if (op2_insn->val.u64 == 0) { // return IR_COPY_INT; } } return IR_SHIFT_CONST; } return IR_SHIFT; case IR_MIN: case IR_MAX: if (IR_IS_TYPE_INT(insn->type)) { return IR_MIN_MAX_INT | IR_MAY_SWAP; } else { goto binop_fp; } break; case IR_COPY: if (IR_IS_TYPE_INT(insn->type)) { return IR_COPY_INT | IR_MAY_REUSE; } else { return IR_COPY_FP | IR_MAY_REUSE; } break; case IR_CALL: if (IR_IS_CONST_REF(insn->op2)) { const ir_insn *func = &ctx->ir_base[insn->op2]; if (func->op == IR_FUNC && func->proto) { uint32_t rule = ir_match_builtin_call(ctx, func); if (rule) { return rule; } } } ctx->flags2 |= IR_HAS_CALLS | IR_16B_FRAME_ALIGNMENT; #ifndef IR_REG_FP_RET1 if (IR_IS_TYPE_FP(insn->type)) { ctx->flags2 |= IR_HAS_FP_RET_SLOT; } #endif IR_FALLTHROUGH; case IR_TAILCALL: case IR_IJMP: ir_match_fuse_load(ctx, insn->op2, ref); return insn->op; case IR_VAR: return IR_SKIPPED | IR_VAR; case IR_PARAM: return ctx->use_lists[ref].count > 0 ? IR_PARAM : IR_SKIPPED | IR_PARAM; case IR_ALLOCA: /* alloca() may be used only in functions */ if (ctx->flags & IR_FUNCTION) { if (IR_IS_CONST_REF(insn->op2) && ctx->cfg_map[ref] == 1) { ir_insn *val = &ctx->ir_base[insn->op2]; if (!IR_IS_SYM_CONST(val->op)) { return IR_STATIC_ALLOCA; } } ctx->flags |= IR_USE_FRAME_POINTER; ctx->flags2 |= IR_HAS_ALLOCA | IR_16B_FRAME_ALIGNMENT; } return IR_ALLOCA; case IR_VSTORE: if (IR_IS_TYPE_INT(ctx->ir_base[insn->op3].type)) { store_rule = IR_VSTORE_INT; load_op = IR_VLOAD; store_int: if ((ctx->flags & IR_OPT_CODEGEN) && ir_in_same_block(ctx, insn->op3) && (ctx->use_lists[insn->op3].count == 1 || (ctx->use_lists[insn->op3].count == 2 && (ctx->ir_base[insn->op3].op == IR_ADD_OV || ctx->ir_base[insn->op3].op == IR_SUB_OV)))) { ir_insn *op_insn = &ctx->ir_base[insn->op3]; uint32_t rule = ctx->rules[insn->op3]; if (!rule) { ctx->rules[insn->op3] = rule = ir_match_insn(ctx, insn->op3); } if (((rule & IR_RULE_MASK) == IR_BINOP_INT && op_insn->op != IR_MUL) || rule == IR_LEA_OB || rule == IR_LEA_IB) { if (insn->op1 == op_insn->op1 && ctx->ir_base[op_insn->op1].op == load_op && ctx->ir_base[op_insn->op1].op2 == insn->op2 && ctx->use_lists[op_insn->op1].count == 2) { /* l = LOAD(_, a) ... v = BINOP(l, _) ... STORE(l, a, v) => SKIP ... SKIP_MEM_BINOP ... MEM_BINOP */ ctx->rules[insn->op3] = IR_FUSED | IR_BINOP_INT; ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; if (!IR_IS_CONST_REF(op_insn->op2) && ctx->rules[op_insn->op2] == (IR_FUSED|IR_SIMPLE|IR_LOAD)) { ctx->rules[op_insn->op2] = IR_LOAD_INT; } return IR_MEM_BINOP_INT; } else if ((ir_op_flags[op_insn->op] & IR_OP_FLAG_COMMUTATIVE) && insn->op1 == op_insn->op2 && ctx->ir_base[op_insn->op2].op == load_op && ctx->ir_base[op_insn->op2].op2 == insn->op2 && ctx->use_lists[op_insn->op2].count == 2) { /* l = LOAD(_, a) ... v = BINOP(_, l) ... STORE(l, a, v) => SKIP ... SKIP_MEM_BINOP ... MEM_BINOP */ ir_swap_ops(op_insn); ctx->rules[insn->op3] = IR_FUSED | IR_BINOP_INT; ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; return IR_MEM_BINOP_INT; } } else if (rule == IR_INC) { if (insn->op1 == op_insn->op1 && ctx->ir_base[op_insn->op1].op == load_op && ctx->ir_base[op_insn->op1].op2 == insn->op2 && ctx->use_lists[op_insn->op1].count == 2) { /* l = LOAD(_, a) ... v = INC(l) ... STORE(l, a, v) => SKIP ... SKIP ... MEM_INC */ ctx->rules[insn->op3] = IR_SKIPPED | IR_INC; ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; return IR_MEM_INC; } } else if (rule == IR_DEC) { if (insn->op1 == op_insn->op1 && ctx->ir_base[op_insn->op1].op == load_op && ctx->ir_base[op_insn->op1].op2 == insn->op2 && ctx->use_lists[op_insn->op1].count == 2){ /* l = LOAD(_, a) ... v = DEC(l) ... STORE(l, a, v) => SKIP ... SKIP ... MEM_DEC */ ctx->rules[insn->op3] = IR_SKIPPED | IR_DEC; ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; return IR_MEM_DEC; } } else if (rule == IR_MUL_PWR2) { if (insn->op1 == op_insn->op1 && ctx->ir_base[op_insn->op1].op == load_op && ctx->ir_base[op_insn->op1].op2 == insn->op2 && ctx->use_lists[op_insn->op1].count == 2) { /* l = LOAD(_, a) ... v = MUL_PWR2(l) ... STORE(l, a, v) => SKIP ... SKIP ... MEM_MUL_PWR2 */ ctx->rules[insn->op3] = IR_SKIPPED | IR_MUL_PWR2; ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; return IR_MEM_MUL_PWR2; } } else if (rule == IR_DIV_PWR2) { if (insn->op1 == op_insn->op1 && ctx->ir_base[op_insn->op1].op == load_op && ctx->ir_base[op_insn->op1].op2 == insn->op2 && ctx->use_lists[op_insn->op1].count == 2) { /* l = LOAD(_, a) ... v = DIV_PWR2(l) ... STORE(l, a, v) => SKIP ... SKIP ... MEM_DIV_PWR2 */ ctx->rules[insn->op3] = IR_SKIPPED | IR_DIV_PWR2; ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; return IR_MEM_DIV_PWR2; } } else if (rule == IR_MOD_PWR2) { if (insn->op1 == op_insn->op1 && ctx->ir_base[op_insn->op1].op == load_op && ctx->ir_base[op_insn->op1].op2 == insn->op2 && ctx->use_lists[op_insn->op1].count == 2) { /* l = LOAD(_, a) ... v = MOD_PWR2(l) ... STORE(l, a, v) => SKIP ... SKIP ... MEM_MOD_PWR2 */ ctx->rules[insn->op3] = IR_SKIPPED | IR_MOD_PWR2; ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; return IR_MEM_MOD_PWR2; } } else if (rule == IR_SHIFT) { if (insn->op1 == op_insn->op1 && ctx->ir_base[op_insn->op1].op == load_op && ctx->ir_base[op_insn->op1].op2 == insn->op2 && ctx->use_lists[op_insn->op1].count == 2) { /* l = LOAD(_, a) ... v = SHIFT(l, _) ... STORE(l, a, v) => SKIP ... SKIP_SHIFT ... MEM_SHIFT */ ctx->rules[insn->op3] = IR_FUSED | IR_SHIFT; ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; return IR_MEM_SHIFT; } } else if (rule == IR_SHIFT_CONST) { if (insn->op1 == op_insn->op1 && ctx->ir_base[op_insn->op1].op == load_op && ctx->ir_base[op_insn->op1].op2 == insn->op2 && ctx->use_lists[op_insn->op1].count == 2) { /* l = LOAD(_, a) ... v = SHIFT(l, CONST) ... STORE(l, a, v) => SKIP ... SKIP ... MEM_SHIFT_CONST */ ctx->rules[insn->op3] = IR_SKIPPED | IR_SHIFT_CONST; ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; return IR_MEM_SHIFT_CONST; } } else if (rule == IR_OP_INT && op_insn->op != IR_BSWAP) { if (insn->op1 == op_insn->op1 && ctx->ir_base[op_insn->op1].op == load_op && ctx->ir_base[op_insn->op1].op2 == insn->op2 && ctx->use_lists[op_insn->op1].count == 2) { /* l = LOAD(_, a) ... v = OP(l) ... STORE(l, a, v) => SKIP ... SKIP ... MEM_OP */ ctx->rules[insn->op3] = IR_SKIPPED | IR_OP_INT; ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; return IR_MEM_OP_INT; } } else if (rule == IR_CMP_INT && load_op == IR_LOAD) { /* c = CMP(_, _) ... STORE(c) => SKIP_CMP ... CMP_AND_STORE_INT */ ctx->rules[insn->op3] = IR_FUSED | IR_CMP_INT; return IR_CMP_AND_STORE_INT; } } return store_rule; } else { return IR_VSTORE_FP; } break; case IR_LOAD: ir_match_fuse_addr(ctx, insn->op2); if (IR_IS_TYPE_INT(insn->type)) { return IR_LOAD_INT; } else { return IR_LOAD_FP; } break; case IR_STORE: ir_match_fuse_addr(ctx, insn->op2); if (IR_IS_TYPE_INT(ctx->ir_base[insn->op3].type)) { store_rule = IR_STORE_INT; load_op = IR_LOAD; goto store_int; } else { return IR_STORE_FP; } break; case IR_RLOAD: if (IR_REGSET_IN(IR_REGSET_UNION((ir_regset)ctx->fixed_regset, IR_REGSET_FIXED), insn->op2)) { return IR_SKIPPED | IR_RLOAD; } return IR_RLOAD; case IR_RSTORE: if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { if ((ctx->flags & IR_OPT_CODEGEN) && ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1 && IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { ir_insn *op_insn = &ctx->ir_base[insn->op2]; if (op_insn->op == IR_ADD || op_insn->op == IR_SUB || // op_insn->op == IR_MUL || op_insn->op == IR_OR || op_insn->op == IR_AND || op_insn->op == IR_XOR) { if (insn->op1 == op_insn->op1 && ctx->ir_base[op_insn->op1].op == IR_RLOAD && ctx->ir_base[op_insn->op1].op2 == insn->op3 && ctx->use_lists[op_insn->op1].count == 2) { /* l = RLOAD(r) ... v = BINOP(l, _) ... RSTORE(l, r, v) => SKIP ... SKIP_REG_BINOP ... REG_BINOP */ ctx->rules[insn->op2] = IR_FUSED | IR_BINOP_INT; ctx->rules[op_insn->op1] = IR_SKIPPED | IR_RLOAD; return IR_REG_BINOP_INT; } else if ((ir_op_flags[op_insn->op] & IR_OP_FLAG_COMMUTATIVE) && insn->op1 == op_insn->op2 && ctx->ir_base[op_insn->op2].op == IR_RLOAD && ctx->ir_base[op_insn->op2].op2 == insn->op3 && ctx->use_lists[op_insn->op2].count == 2) { /* l = RLOAD(r) ... v = BINOP(x, l) ... RSTORE(l, r, v) => SKIP ... SKIP_REG_BINOP ... REG_BINOP */ ir_swap_ops(op_insn); ctx->rules[insn->op2] = IR_FUSED | IR_BINOP_INT; ctx->rules[op_insn->op1] = IR_SKIPPED | IR_RLOAD; return IR_REG_BINOP_INT; } } } } ir_match_fuse_load(ctx, insn->op2, ref); return IR_RSTORE; case IR_START: case IR_BEGIN: case IR_IF_TRUE: case IR_IF_FALSE: case IR_CASE_VAL: case IR_CASE_DEFAULT: case IR_MERGE: case IR_LOOP_BEGIN: case IR_UNREACHABLE: return IR_SKIPPED | insn->op; case IR_RETURN: if (!insn->op2) { return IR_RETURN_VOID; } else if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { return IR_RETURN_INT; } else { return IR_RETURN_FP; } case IR_IF: if (!IR_IS_CONST_REF(insn->op2) && ctx->use_lists[insn->op2].count == 1) { op2_insn = &ctx->ir_base[insn->op2]; if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT) { if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) { if (IR_IS_CONST_REF(op2_insn->op2) && !IR_IS_SYM_CONST(ctx->ir_base[op2_insn->op2].op) && ctx->ir_base[op2_insn->op2].val.i64 == 0 && op2_insn->op1 == insn->op2 - 1) { /* previous instruction */ ir_insn *op1_insn = &ctx->ir_base[op2_insn->op1]; if (op1_insn->op == IR_AND && ctx->use_lists[op2_insn->op1].count == 1) { /* v = AND(_, _); c = CMP(v, 0) ... IF(c) => SKIP_TEST; SKIP ... TEST_AND_BRANCH */ ir_match_fuse_load_test_int(ctx, op1_insn, ref); ctx->rules[op2_insn->op1] = IR_FUSED | IR_TEST_INT; ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_NOP; return IR_TEST_AND_BRANCH_INT; } else if (insn->op2 == ref - 1 && /* previous instruction */ ((op1_insn->op == IR_OR || op1_insn->op == IR_AND || op1_insn->op == IR_XOR) || /* GT(ADD(_, _), 0) can't be optimized because ADD may overflow */ ((op1_insn->op == IR_ADD || op1_insn->op == IR_SUB) && (op2_insn->op == IR_EQ || op2_insn->op == IR_NE)))) { /* v = BINOP(_, _); c = CMP(v, 0) ... IF(c) => BINOP; SKIP_CMP ... JCC */ if (ir_op_flags[op1_insn->op] & IR_OP_FLAG_COMMUTATIVE) { ir_match_fuse_load_commutative_int(ctx, op1_insn, ref); ctx->rules[op2_insn->op1] = IR_BINOP_INT | IR_MAY_SWAP; } else { ir_match_fuse_load(ctx, op1_insn->op2, ref); ctx->rules[op2_insn->op1] = IR_BINOP_INT; } ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT; return IR_JCC_INT; } } /* c = CMP(_, _) ... IF(c) => SKIP_CMP ... CMP_AND_BRANCH */ ir_match_fuse_load_cmp_int(ctx, op2_insn, ref); ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT; return IR_CMP_AND_BRANCH_INT; } else { /* c = CMP(_, _) ... IF(c) => SKIP_CMP ... CMP_AND_BRANCH */ ir_match_fuse_load_cmp_fp_br(ctx, op2_insn, ref, 1); ctx->rules[insn->op2] = IR_FUSED | IR_CMP_FP; return IR_CMP_AND_BRANCH_FP; } } else if (op2_insn->op == IR_AND) { /* c = AND(_, _) ... IF(c) => SKIP_TEST ... TEST_AND_BRANCH */ ir_match_fuse_load_test_int(ctx, op2_insn, ref); ctx->rules[insn->op2] = IR_FUSED | IR_TEST_INT; return IR_TEST_AND_BRANCH_INT; } else if (op2_insn->op == IR_OVERFLOW && ir_in_same_block(ctx, insn->op2)) { /* c = OVERFLOW(_) ... IF(c) => SKIP_OVERFLOW ... OVERFLOW_AND_BRANCH */ ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_OVERFLOW; return IR_OVERFLOW_AND_BRANCH; } } if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { if (insn->op2 == ref - 1) { /* previous instruction */ op2_insn = &ctx->ir_base[insn->op2]; if (op2_insn->op == IR_ADD || op2_insn->op == IR_SUB || // op2_insn->op == IR_MUL || op2_insn->op == IR_OR || op2_insn->op == IR_AND || op2_insn->op == IR_XOR) { /* v = BINOP(_, _); IF(v) => BINOP; JCC */ if (ir_op_flags[op2_insn->op] & IR_OP_FLAG_COMMUTATIVE) { ir_match_fuse_load_commutative_int(ctx, op2_insn, ref); ctx->rules[insn->op2] = IR_BINOP_INT | IR_MAY_SWAP; } else { ir_match_fuse_load(ctx, op2_insn->op2, ref); ctx->rules[insn->op2] = IR_BINOP_INT; } return IR_JCC_INT; } } else if ((ctx->flags & IR_OPT_CODEGEN) && insn->op1 == ref - 1 /* previous instruction */ && insn->op2 == ref - 2 /* previous instruction */ && ctx->use_lists[insn->op2].count == 2 && IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { ir_insn *store_insn = &ctx->ir_base[insn->op1]; if (store_insn->op == IR_STORE && store_insn->op3 == insn->op2) { ir_insn *op_insn = &ctx->ir_base[insn->op2]; if (op_insn->op == IR_ADD || op_insn->op == IR_SUB || // op_insn->op == IR_MUL || op_insn->op == IR_OR || op_insn->op == IR_AND || op_insn->op == IR_XOR) { if (ctx->ir_base[op_insn->op1].op == IR_LOAD && ctx->ir_base[op_insn->op1].op2 == store_insn->op2) { if (ir_in_same_block(ctx, op_insn->op1) && ctx->use_lists[op_insn->op1].count == 2 && store_insn->op1 == op_insn->op1) { /* v = MEM_BINOP(_, _); IF(v) => MEM_BINOP; JCC */ ctx->rules[insn->op2] = IR_FUSED | IR_BINOP_INT; ctx->rules[op_insn->op1] = IR_SKIPPED | IR_LOAD; ir_match_fuse_addr(ctx, store_insn->op2); ctx->rules[insn->op1] = IR_MEM_BINOP_INT; return IR_JCC_INT; } } else if ((ir_op_flags[op_insn->op] & IR_OP_FLAG_COMMUTATIVE) && ctx->ir_base[op_insn->op2].op == IR_LOAD && ctx->ir_base[op_insn->op2].op2 == store_insn->op2) { if (ir_in_same_block(ctx, op_insn->op2) && ctx->use_lists[op_insn->op2].count == 2 && store_insn->op1 == op_insn->op2) { /* v = MEM_BINOP(_, _); IF(v) => MEM_BINOP; JCC */ ir_swap_ops(op_insn); ctx->rules[insn->op2] = IR_FUSED | IR_BINOP_INT; ctx->rules[op_insn->op1] = IR_SKIPPED | IR_LOAD; ir_match_fuse_addr(ctx, store_insn->op2); ctx->rules[insn->op1] = IR_MEM_BINOP_INT; return IR_JCC_INT; } } } } } ir_match_fuse_load(ctx, insn->op2, ref); return IR_IF_INT; } else { IR_ASSERT(0 && "NIY IR_IF_FP"); break; } case IR_COND: if (!IR_IS_CONST_REF(insn->op1) && ctx->use_lists[insn->op1].count == 1) { ir_insn *op1_insn = &ctx->ir_base[insn->op1]; if (op1_insn->op >= IR_EQ && op1_insn->op <= IR_UGT) { if (IR_IS_TYPE_INT(ctx->ir_base[op1_insn->op1].type)) { ir_match_fuse_load_cmp_int(ctx, op1_insn, ref); ctx->rules[insn->op1] = IR_FUSED | IR_CMP_INT; return IR_COND_CMP_INT; } else { ir_match_fuse_load_cmp_fp_br(ctx, op1_insn, ref, 1); ctx->rules[insn->op1] = IR_FUSED | IR_CMP_FP; return IR_COND_CMP_FP; } } } return IR_COND; case IR_GUARD: case IR_GUARD_NOT: if (!IR_IS_CONST_REF(insn->op2) && ctx->use_lists[insn->op2].count == 1) { op2_insn = &ctx->ir_base[insn->op2]; if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT // TODO: register allocator may clobber operands of CMP before they are used in the GUARD_CMP && (insn->op2 == ref - 1 || (insn->op2 == ctx->prev_ref[ref] - 1 && ctx->ir_base[ctx->prev_ref[ref]].op == IR_SNAPSHOT))) { if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) { if (IR_IS_CONST_REF(op2_insn->op2) && !IR_IS_SYM_CONST(ctx->ir_base[op2_insn->op2].op) && ctx->ir_base[op2_insn->op2].val.i64 == 0) { if (op2_insn->op1 == insn->op2 - 1) { /* previous instruction */ ir_insn *op1_insn = &ctx->ir_base[op2_insn->op1]; if ((op1_insn->op == IR_OR || op1_insn->op == IR_AND || op1_insn->op == IR_XOR) || /* GT(ADD(_, _), 0) can't be optimized because ADD may overflow */ ((op1_insn->op == IR_ADD || op1_insn->op == IR_SUB) && (op2_insn->op == IR_EQ || op2_insn->op == IR_NE))) { if (ir_op_flags[op1_insn->op] & IR_OP_FLAG_COMMUTATIVE) { ir_match_fuse_load_commutative_int(ctx, op1_insn, ref); ctx->rules[op2_insn->op1] = IR_BINOP_INT | IR_MAY_SWAP; } else { ir_match_fuse_load(ctx, op1_insn->op2, ref); ctx->rules[op2_insn->op1] = IR_BINOP_INT; } /* v = BINOP(_, _); c = CMP(v, 0) ... IF(c) => BINOP; SKIP_CMP ... GUARD_JCC */ ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT; return IR_GUARD_JCC_INT; } } else if ((ctx->flags & IR_OPT_CODEGEN) && op2_insn->op1 == insn->op2 - 2 /* before previous instruction */ && ir_in_same_block(ctx, op2_insn->op1) && ctx->use_lists[op2_insn->op1].count == 2) { ir_insn *store_insn = &ctx->ir_base[insn->op2 - 1]; if (store_insn->op == IR_STORE && store_insn->op3 == op2_insn->op1) { ir_insn *op_insn = &ctx->ir_base[op2_insn->op1]; if ((op_insn->op == IR_OR || op_insn->op == IR_AND || op_insn->op == IR_XOR) || /* GT(ADD(_, _), 0) can't be optimized because ADD may overflow */ ((op_insn->op == IR_ADD || op_insn->op == IR_SUB) && (op2_insn->op == IR_EQ || op2_insn->op == IR_NE))) { if (ctx->ir_base[op_insn->op1].op == IR_LOAD && ctx->ir_base[op_insn->op1].op2 == store_insn->op2) { if (ir_in_same_block(ctx, op_insn->op1) && ctx->use_lists[op_insn->op1].count == 2 && store_insn->op1 == op_insn->op1) { /* v = MEM_BINOP(_, _); IF(v) => MEM_BINOP; GUARD_JCC */ ctx->rules[op2_insn->op1] = IR_FUSED | IR_BINOP_INT; ctx->rules[op_insn->op1] = IR_SKIPPED | IR_LOAD; ir_match_fuse_addr(ctx, store_insn->op2); ctx->rules[insn->op2 - 1] = IR_MEM_BINOP_INT; ctx->rules[insn->op2] = IR_SKIPPED | IR_NOP; return IR_GUARD_JCC_INT; } } else if ((ir_op_flags[op_insn->op] & IR_OP_FLAG_COMMUTATIVE) && ctx->ir_base[op_insn->op2].op == IR_LOAD && ctx->ir_base[op_insn->op2].op2 == store_insn->op2) { if (ir_in_same_block(ctx, op_insn->op2) && ctx->use_lists[op_insn->op2].count == 2 && store_insn->op1 == op_insn->op2) { /* v = MEM_BINOP(_, _); IF(v) => MEM_BINOP; JCC */ ir_swap_ops(op_insn); ctx->rules[op2_insn->op1] = IR_FUSED | IR_BINOP_INT; ctx->rules[op_insn->op1] = IR_SKIPPED | IR_LOAD; ir_match_fuse_addr(ctx, store_insn->op2); ctx->rules[insn->op2 - 1] = IR_MEM_BINOP_INT; ctx->rules[insn->op2] = IR_SKIPPED | IR_NOP; return IR_GUARD_JCC_INT; } } } } } } /* c = CMP(_, _) ... GUARD(c) => SKIP_CMP ... GUARD_CMP */ ir_match_fuse_load_cmp_int(ctx, op2_insn, ref); ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT; return IR_GUARD_CMP_INT; } else { /* c = CMP(_, _) ... GUARD(c) => SKIP_CMP ... GUARD_CMP */ ir_match_fuse_load_cmp_fp_br(ctx, op2_insn, ref, insn->op == IR_GUARD_NOT); ctx->rules[insn->op2] = IR_FUSED | IR_CMP_FP; return IR_GUARD_CMP_FP; } } else if (op2_insn->op == IR_AND) { // TODO: OR, XOR. etc /* c = AND(_, _) ... GUARD(c) => SKIP_TEST ... GUARD_TEST */ ir_match_fuse_load_test_int(ctx, op2_insn, ref); ctx->rules[insn->op2] = IR_FUSED | IR_TEST_INT; return IR_GUARD_TEST_INT; } else if (op2_insn->op == IR_OVERFLOW && ir_in_same_block(ctx, insn->op2)) { /* c = OVERFLOW(_) ... GUARD(c) => SKIP_OVERFLOW ... GUARD_OVERFLOW */ ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_OVERFLOW; return IR_GUARD_OVERFLOW; } } ir_match_fuse_load(ctx, insn->op2, ref); return insn->op; case IR_INT2FP: if (ir_type_size[ctx->ir_base[insn->op1].type] > (IR_IS_TYPE_SIGNED(ctx->ir_base[insn->op1].type) ? 2 : 4)) { ir_match_fuse_load(ctx, insn->op1, ref); } return insn->op; case IR_SEXT: case IR_ZEXT: case IR_FP2INT: case IR_FP2FP: ir_match_fuse_load(ctx, insn->op1, ref); return insn->op; case IR_TRUNC: case IR_PROTO: ir_match_fuse_load(ctx, insn->op1, ref); return insn->op | IR_MAY_REUSE; case IR_BITCAST: ir_match_fuse_load(ctx, insn->op1, ref); if (IR_IS_TYPE_INT(insn->type) && IR_IS_TYPE_INT(ctx->ir_base[insn->op1].type)) { return insn->op | IR_MAY_REUSE; } else { return insn->op; } case IR_CTLZ: case IR_CTTZ: ir_match_fuse_load(ctx, insn->op1, ref); return IR_BIT_COUNT; case IR_CTPOP: ir_match_fuse_load(ctx, insn->op1, ref); return (ctx->mflags & IR_X86_BMI1) ? IR_BIT_COUNT : IR_CTPOP; case IR_VA_START: ctx->flags2 |= IR_HAS_VA_START; if ((ctx->ir_base[insn->op2].op == IR_ALLOCA) || (ctx->ir_base[insn->op2].op == IR_VADDR)) { ir_use_list *use_list = &ctx->use_lists[insn->op2]; ir_ref *p, n = use_list->count; for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { ir_insn *use_insn = &ctx->ir_base[*p]; if (use_insn->op == IR_VA_START || use_insn->op == IR_VA_END) { } else if (use_insn->op == IR_VA_COPY) { if (use_insn->op3 == insn->op2) { ctx->flags2 |= IR_HAS_VA_COPY; } } else if (use_insn->op == IR_VA_ARG) { if (use_insn->op2 == insn->op2) { if (IR_IS_TYPE_INT(use_insn->type)) { ctx->flags2 |= IR_HAS_VA_ARG_GP; } else { IR_ASSERT(IR_IS_TYPE_FP(use_insn->type)); ctx->flags2 |= IR_HAS_VA_ARG_FP; } } } else if (*p > ref) { /* diriect va_list access */ ctx->flags2 |= IR_HAS_VA_ARG_GP|IR_HAS_VA_ARG_FP; } } } return IR_VA_START; case IR_VA_END: return IR_SKIPPED | IR_NOP; case IR_VADDR: if (ctx->use_lists[ref].count > 0) { ir_use_list *use_list = &ctx->use_lists[ref]; ir_ref *p, n = use_list->count; for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { if (ctx->ir_base[*p].op != IR_VA_END) { return IR_STATIC_ALLOCA; } } } return IR_SKIPPED | IR_NOP; default: break; } return insn->op; } static void ir_match_insn2(ir_ctx *ctx, ir_ref ref, uint32_t rule) { if (rule == IR_LEA_IB) { ir_match_try_revert_lea_to_add(ctx, ref); } } /* code generation */ static int32_t ir_ref_spill_slot_offset(ir_ctx *ctx, ir_ref ref, ir_reg *reg) { int32_t offset; IR_ASSERT(ref >= 0 && ctx->vregs[ref] && ctx->live_intervals[ctx->vregs[ref]]); offset = ctx->live_intervals[ctx->vregs[ref]]->stack_spill_pos; IR_ASSERT(offset != -1); if (ctx->live_intervals[ctx->vregs[ref]]->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) { IR_ASSERT(ctx->spill_base != IR_REG_NONE); *reg = ctx->spill_base; return offset; } *reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; return IR_SPILL_POS_TO_OFFSET(offset); } static ir_mem ir_vreg_spill_slot(ir_ctx *ctx, ir_ref v) { int32_t offset; ir_reg base; IR_ASSERT(v > 0 && v <= ctx->vregs_count && ctx->live_intervals[v]); offset = ctx->live_intervals[v]->stack_spill_pos; IR_ASSERT(offset != -1); if (ctx->live_intervals[v]->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) { IR_ASSERT(ctx->spill_base != IR_REG_NONE); return IR_MEM_BO(ctx->spill_base, offset); } base = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; offset = IR_SPILL_POS_TO_OFFSET(offset); return IR_MEM_BO(base, offset); } static ir_mem ir_ref_spill_slot(ir_ctx *ctx, ir_ref ref) { IR_ASSERT(!IR_IS_CONST_REF(ref)); return ir_vreg_spill_slot(ctx, ctx->vregs[ref]); } static bool ir_is_same_spill_slot(ir_ctx *ctx, ir_ref ref, ir_mem mem) { ir_mem m = ir_ref_spill_slot(ctx, ref); return IR_MEM_VAL(m) == IR_MEM_VAL(mem); } static ir_mem ir_var_spill_slot(ir_ctx *ctx, ir_ref ref) { ir_insn *var_insn = &ctx->ir_base[ref]; ir_reg reg; IR_ASSERT(var_insn->op == IR_VAR); reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; return IR_MEM_BO(reg, IR_SPILL_POS_TO_OFFSET(var_insn->op3)); } static bool ir_may_avoid_spill_load(ir_ctx *ctx, ir_ref ref, ir_ref use) { ir_live_interval *ival; IR_ASSERT(ctx->vregs[ref] && ctx->live_intervals[ctx->vregs[ref]]); ival = ctx->live_intervals[ctx->vregs[ref]]; while (ival) { ir_use_pos *use_pos = ival->use_pos; while (use_pos) { if (IR_LIVE_POS_TO_REF(use_pos->pos) == use) { return !use_pos->next || use_pos->next->op_num == 0; } use_pos = use_pos->next; } ival = ival->next; } return 0; } static void ir_emit_mov_imm_int(ir_ctx *ctx, ir_type type, ir_reg reg, int64_t val) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; if (ir_type_size[type] == 8) { IR_ASSERT(sizeof(void*) == 8); |.if X64 if (IR_IS_UNSIGNED_32BIT(val)) { | mov Rd(reg), (uint32_t)val // zero extended load } else if (IR_IS_SIGNED_32BIT(val)) { | mov Rq(reg), (int32_t)val // sign extended load } else if (type == IR_ADDR && IR_MAY_USE_32BIT_ADDR(ctx->code_buffer, (intptr_t)val)) { | lea Ra(reg), [&val] } else { | mov64 Ra(reg), val } |.endif } else { | ASM_REG_IMM_OP mov, type, reg, (int32_t)val // sign extended load } } static void ir_emit_load_imm_int(ir_ctx *ctx, ir_type type, ir_reg reg, int64_t val) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; IR_ASSERT(IR_IS_TYPE_INT(type)); if (val == 0) { | ASM_REG_REG_OP xor, type, reg, reg } else { ir_emit_mov_imm_int(ctx, type, reg, val); } } static void ir_emit_load_mem_int(ir_ctx *ctx, ir_type type, ir_reg reg, ir_mem mem) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; | ASM_REG_MEM_OP mov, type, reg, mem } static void ir_emit_load_imm_fp(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_insn *insn = &ctx->ir_base[src]; int label; if (type == IR_FLOAT && insn->val.u32 == 0) { if (ctx->mflags & IR_X86_AVX) { | vxorps xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST) } else { | xorps xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST) } } else if (type == IR_DOUBLE && insn->val.u64 == 0) { if (ctx->mflags & IR_X86_AVX) { | vxorpd xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST) } else { | xorpd xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST) } } else { label = ir_const_label(ctx, src); | ASM_FP_REG_TXT_OP movs, type, reg, [=>label] } } static void ir_emit_load_mem_fp(ir_ctx *ctx, ir_type type, ir_reg reg, ir_mem mem) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; | ASM_FP_REG_MEM_OP movs, type, reg, mem } static void ir_emit_load_mem(ir_ctx *ctx, ir_type type, ir_reg reg, ir_mem mem) { if (IR_IS_TYPE_INT(type)) { ir_emit_load_mem_int(ctx, type, reg, mem); } else { ir_emit_load_mem_fp(ctx, type, reg, mem); } } static void ir_load_local_addr(ir_ctx *ctx, ir_reg reg, ir_ref src) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_reg base = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; int32_t offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[src].op3); IR_ASSERT(ir_rule(ctx, src) == IR_STATIC_ALLOCA); if (offset == 0) { | mov Ra(reg), Ra(base) } else { | lea Ra(reg), [Ra(base)+offset] } } static void ir_emit_load(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src) { if (IR_IS_CONST_REF(src)) { if (IR_IS_TYPE_INT(type)) { ir_insn *insn = &ctx->ir_base[src]; if (insn->op == IR_SYM || insn->op == IR_FUNC) { void *addr = ir_sym_val(ctx, insn); ir_emit_load_imm_int(ctx, type, reg, (intptr_t)addr); } else if (insn->op == IR_STR) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; int label = ir_const_label(ctx, src); | lea Ra(reg), aword [=>label] } else { ir_emit_load_imm_int(ctx, type, reg, insn->val.i64); } } else { ir_emit_load_imm_fp(ctx, type, reg, src); } } else if (ctx->vregs[src]) { ir_emit_load_mem(ctx, type, reg, ir_ref_spill_slot(ctx, src)); } else { ir_load_local_addr(ctx, reg, src); } } static void ir_emit_store_mem_int(ir_ctx *ctx, ir_type type, ir_mem mem, ir_reg reg) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; | ASM_MEM_REG_OP mov, type, mem, reg } static void ir_emit_store_mem_fp(ir_ctx *ctx, ir_type type, ir_mem mem, ir_reg reg) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; | ASM_FP_MEM_REG_OP movs, type, mem, reg } static void ir_emit_store_mem_imm(ir_ctx *ctx, ir_type type, ir_mem mem, int32_t imm) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; | ASM_MEM_IMM_OP mov, type, mem, imm } static void ir_emit_store_mem_int_const(ir_ctx *ctx, ir_type type, ir_mem mem, ir_ref src, ir_reg tmp_reg, bool is_arg) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_insn *val_insn = &ctx->ir_base[src]; IR_ASSERT(IR_IS_CONST_REF(src)); if (val_insn->op == IR_STR) { int label = ir_const_label(ctx, src); IR_ASSERT(tmp_reg != IR_REG_NONE); |.if X64 | lea Ra(tmp_reg), aword [=>label] || ir_emit_store_mem_int(ctx, type, mem, tmp_reg); |.else | ASM_TMEM_TXT_OP mov, aword, mem, =>label |.endif } else { int64_t val = val_insn->val.i64; if (val_insn->op == IR_FUNC || val_insn->op == IR_SYM) { val = (int64_t)(intptr_t)ir_sym_val(ctx, val_insn); } if (sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(val)) { if (is_arg && ir_type_size[type] < 4) { type = IR_U32; } ir_emit_store_mem_imm(ctx, type, mem, val); } else { IR_ASSERT(tmp_reg != IR_REG_NONE); tmp_reg = IR_REG_NUM(tmp_reg); ir_emit_load_imm_int(ctx, type, tmp_reg, val); ir_emit_store_mem_int(ctx, type, mem, tmp_reg); } } } static void ir_emit_store_mem_fp_const(ir_ctx *ctx, ir_type type, ir_mem mem, ir_ref src, ir_reg tmp_reg, ir_reg tmp_fp_reg) { ir_val *val = &ctx->ir_base[src].val; if (type == IR_FLOAT) { ir_emit_store_mem_imm(ctx, IR_U32, mem, val->i32); } else if (sizeof(void*) == 8 && val->i64 == 0) { ir_emit_store_mem_imm(ctx, IR_U64, mem, 0); } else if (sizeof(void*) == 8 && tmp_reg != IR_REG_NONE) { ir_emit_load_imm_int(ctx, IR_U64, tmp_reg, val->i64); ir_emit_store_mem_int(ctx, IR_U64, mem, tmp_reg); } else { tmp_fp_reg = IR_REG_NUM(tmp_fp_reg); ir_emit_load(ctx, type, tmp_fp_reg, src); ir_emit_store_mem_fp(ctx, IR_DOUBLE, mem, tmp_fp_reg); } } static void ir_emit_store_mem(ir_ctx *ctx, ir_type type, ir_mem mem, ir_reg reg) { if (IR_IS_TYPE_INT(type)) { ir_emit_store_mem_int(ctx, type, mem, reg); } else { ir_emit_store_mem_fp(ctx, type, mem, reg); } } static void ir_emit_store(ir_ctx *ctx, ir_type type, ir_ref dst, ir_reg reg) { IR_ASSERT(dst >= 0); ir_emit_store_mem(ctx, type, ir_ref_spill_slot(ctx, dst), reg); } static void ir_emit_mov(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; | ASM_REG_REG_OP mov, type, dst, src } #define IR_HAVE_SWAP_INT static void ir_emit_swap(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; | ASM_REG_REG_OP xchg, type, dst, src } static void ir_emit_mov_ext(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; if (ir_type_size[type] > 2) { | ASM_REG_REG_OP mov, type, dst, src } else if (ir_type_size[type] == 2) { if (IR_IS_TYPE_SIGNED(type)) { | movsx Rd(dst), Rw(src) } else { | movzx Rd(dst), Rw(src) } } else /* if (ir_type_size[type] == 1) */ { if (IR_IS_TYPE_SIGNED(type)) { | movsx Rd(dst), Rb(src) } else { | movzx Rd(dst), Rb(src) } } } static void ir_emit_fp_mov(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; | ASM_FP_REG_REG_OP movap, type, dst, src } static ir_mem ir_fuse_addr_const(ir_ctx *ctx, ir_ref ref) { ir_mem mem; ir_insn *addr_insn = &ctx->ir_base[ref]; IR_ASSERT(IR_IS_CONST_REF(ref)); if (IR_IS_SYM_CONST(addr_insn->op)) { void *addr = ir_sym_val(ctx, addr_insn); IR_ASSERT(sizeof(void*) == 4 || IR_IS_SIGNED_32BIT((intptr_t)addr)); mem = IR_MEM_O((int32_t)(intptr_t)addr); } else { IR_ASSERT(sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(addr_insn->val.i64)); mem = IR_MEM_O(addr_insn->val.i32); } return mem; } static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref) { uint32_t rule = ctx->rules[ref]; ir_insn *insn = &ctx->ir_base[ref]; ir_insn *op1_insn, *op2_insn, *offset_insn; ir_ref base_reg_ref, index_reg_ref; ir_reg base_reg = IR_REG_NONE, index_reg; int32_t offset = 0, scale; IR_ASSERT(((rule & IR_RULE_MASK) >= IR_LEA_OB && (rule & IR_RULE_MASK) <= IR_LEA_SI_B) || rule == IR_STATIC_ALLOCA); switch (rule & IR_RULE_MASK) { default: IR_ASSERT(0); case IR_LEA_OB: offset_insn = insn; if (ir_rule(ctx, insn->op1) == IR_STATIC_ALLOCA) { offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op1].op3); base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; base_reg_ref = IR_UNUSED; } else { base_reg_ref = ref * sizeof(ir_ref) + 1; } index_reg_ref = IR_UNUSED; scale = 1; break; case IR_LEA_SI: scale = ctx->ir_base[insn->op2].val.i32; index_reg_ref = ref * sizeof(ir_ref) + 1; base_reg_ref = IR_UNUSED; offset_insn = NULL; break; case IR_LEA_SIB: base_reg_ref = index_reg_ref = ref * sizeof(ir_ref) + 1; scale = ctx->ir_base[insn->op2].val.i32 - 1; offset_insn = NULL; break; case IR_LEA_IB: if (ir_rule(ctx, insn->op1) == IR_STATIC_ALLOCA) { offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op1].op3); base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; base_reg_ref = IR_UNUSED; index_reg_ref = ref * sizeof(ir_ref) + 2; } else if (ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) { offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; base_reg_ref = IR_UNUSED; index_reg_ref = ref * sizeof(ir_ref) + 1; } else { base_reg_ref = ref * sizeof(ir_ref) + 1; index_reg_ref = ref * sizeof(ir_ref) + 2; } offset_insn = NULL; scale = 1; break; case IR_LEA_OB_I: op1_insn = &ctx->ir_base[insn->op1]; offset_insn = op1_insn; scale = 1; if (ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) { offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; base_reg_ref = IR_UNUSED; index_reg_ref = insn->op1 * sizeof(ir_ref) + 1; } else if (ir_rule(ctx, op1_insn->op1) == IR_STATIC_ALLOCA) { offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[op1_insn->op1].op3); base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; base_reg_ref = IR_UNUSED; index_reg_ref = ref * sizeof(ir_ref) + 2; } else { base_reg_ref = insn->op1 * sizeof(ir_ref) + 1; index_reg_ref = ref * sizeof(ir_ref) + 2; } break; case IR_LEA_I_OB: op2_insn = &ctx->ir_base[insn->op2]; offset_insn = op2_insn; scale = 1; if (ir_rule(ctx, insn->op1) == IR_STATIC_ALLOCA) { offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op1].op3); base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; base_reg_ref = IR_UNUSED; index_reg_ref = insn->op2 * sizeof(ir_ref) + 1; } else if (ir_rule(ctx, op2_insn->op1) == IR_STATIC_ALLOCA) { offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[op2_insn->op1].op3); base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; base_reg_ref = IR_UNUSED; index_reg_ref = ref * sizeof(ir_ref) + 1; } else { base_reg_ref = ref * sizeof(ir_ref) + 1; index_reg_ref = insn->op2 * sizeof(ir_ref) + 1; } break; case IR_LEA_SI_O: index_reg_ref = insn->op1 * sizeof(ir_ref) + 1; op1_insn = &ctx->ir_base[insn->op1]; scale = ctx->ir_base[op1_insn->op2].val.i32; offset_insn = insn; base_reg_ref = IR_UNUSED; break; case IR_LEA_SIB_O: base_reg_ref = index_reg_ref = insn->op1 * sizeof(ir_ref) + 1; op1_insn = &ctx->ir_base[insn->op1]; scale = ctx->ir_base[op1_insn->op2].val.i32 - 1; offset_insn = insn; break; case IR_LEA_IB_O: base_reg_ref = insn->op1 * sizeof(ir_ref) + 1; index_reg_ref = insn->op1 * sizeof(ir_ref) + 2; offset_insn = insn; scale = 1; break; case IR_LEA_OB_SI: index_reg_ref = insn->op2 * sizeof(ir_ref) + 1; op1_insn = &ctx->ir_base[insn->op1]; offset_insn = op1_insn; op2_insn = &ctx->ir_base[insn->op2]; scale = ctx->ir_base[op2_insn->op2].val.i32; if (ir_rule(ctx, op1_insn->op1) == IR_STATIC_ALLOCA) { offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[op1_insn->op1].op3); base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; base_reg_ref = IR_UNUSED; } else { base_reg_ref = insn->op1 * sizeof(ir_ref) + 1; } break; case IR_LEA_SI_OB: index_reg_ref = insn->op1 * sizeof(ir_ref) + 1; op1_insn = &ctx->ir_base[insn->op1]; scale = ctx->ir_base[op1_insn->op2].val.i32; op2_insn = &ctx->ir_base[insn->op2]; offset_insn = op2_insn; if (ir_rule(ctx, op2_insn->op1) == IR_STATIC_ALLOCA) { offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[op2_insn->op1].op3); base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; base_reg_ref = IR_UNUSED; } else { base_reg_ref = insn->op2 * sizeof(ir_ref) + 1; } break; case IR_LEA_B_SI: if (ir_rule(ctx, insn->op1) == IR_STATIC_ALLOCA) { offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op1].op3); base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; base_reg_ref = IR_UNUSED; } else { base_reg_ref = ref * sizeof(ir_ref) + 1; } index_reg_ref = insn->op2 * sizeof(ir_ref) + 1; op2_insn = &ctx->ir_base[insn->op2]; scale = ctx->ir_base[op2_insn->op2].val.i32; offset_insn = NULL; break; case IR_LEA_SI_B: index_reg_ref = insn->op1 * sizeof(ir_ref) + 1; if (ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) { offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; base_reg_ref = IR_UNUSED; } else { base_reg_ref = ref * sizeof(ir_ref) + 2; } op1_insn = &ctx->ir_base[insn->op1]; scale = ctx->ir_base[op1_insn->op2].val.i32; offset_insn = NULL; break; case IR_ALLOCA: offset = IR_SPILL_POS_TO_OFFSET(insn->op3); base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; base_reg_ref = index_reg_ref = IR_UNUSED; scale = 1; offset_insn = NULL; break; } if (offset_insn) { ir_insn *addr_insn = &ctx->ir_base[offset_insn->op2]; if (IR_IS_SYM_CONST(addr_insn->op)) { void *addr = ir_sym_val(ctx, addr_insn); IR_ASSERT(sizeof(void*) != 8 || IR_IS_SIGNED_32BIT((intptr_t)addr)); offset += (int64_t)(intptr_t)(addr); } else { if (offset_insn->op == IR_SUB) { offset = -addr_insn->val.i32; } else { offset += addr_insn->val.i32; } } } if (base_reg_ref) { if (UNEXPECTED(ctx->rules[base_reg_ref / sizeof(ir_ref)] & IR_FUSED_REG)) { base_reg = ir_get_fused_reg(ctx, root, base_reg_ref); } else { base_reg = ((int8_t*)ctx->regs)[base_reg_ref]; } IR_ASSERT(base_reg != IR_REG_NONE); if (IR_REG_SPILLED(base_reg)) { base_reg = IR_REG_NUM(base_reg); ir_emit_load(ctx, insn->type, base_reg, ((ir_ref*)ctx->ir_base)[base_reg_ref]); } } index_reg = IR_REG_NONE; if (index_reg_ref) { if (base_reg_ref && ((ir_ref*)ctx->ir_base)[index_reg_ref] == ((ir_ref*)ctx->ir_base)[base_reg_ref]) { index_reg = base_reg; } else { if (UNEXPECTED(ctx->rules[index_reg_ref / sizeof(ir_ref)] & IR_FUSED_REG)) { index_reg = ir_get_fused_reg(ctx, root, index_reg_ref); } else { index_reg = ((int8_t*)ctx->regs)[index_reg_ref]; } IR_ASSERT(index_reg != IR_REG_NONE); if (IR_REG_SPILLED(index_reg)) { index_reg = IR_REG_NUM(index_reg); ir_emit_load(ctx, insn->type, index_reg, ((ir_ref*)ctx->ir_base)[index_reg_ref]); } } } return IR_MEM(base_reg, offset, index_reg, scale); } static ir_mem ir_fuse_mem(ir_ctx *ctx, ir_ref root, ir_ref ref, ir_insn *mem_insn, ir_reg reg) { if (reg != IR_REG_NONE) { if (IR_REG_SPILLED(reg)) { reg = IR_REG_NUM(reg); ir_emit_load(ctx, IR_ADDR, reg, mem_insn->op2); } return IR_MEM_B(reg); } else if (IR_IS_CONST_REF(mem_insn->op2)) { return ir_fuse_addr_const(ctx, mem_insn->op2); } else { return ir_fuse_addr(ctx, root, mem_insn->op2); } } static ir_mem ir_fuse_load(ir_ctx *ctx, ir_ref root, ir_ref ref) { ir_insn *load_insn = &ctx->ir_base[ref]; ir_reg reg; IR_ASSERT(load_insn->op == IR_LOAD); if (UNEXPECTED(ctx->rules[ref] & IR_FUSED_REG)) { reg = ir_get_fused_reg(ctx, root, ref * sizeof(ir_ref) + 2); } else { reg = ctx->regs[ref][2]; } return ir_fuse_mem(ctx, root, ref, load_insn, reg); } static int32_t ir_fuse_imm(ir_ctx *ctx, ir_ref ref) { ir_insn *val_insn = &ctx->ir_base[ref]; IR_ASSERT(IR_IS_CONST_REF(ref)); if (IR_IS_SYM_CONST(val_insn->op)) { void *addr = ir_sym_val(ctx, val_insn); IR_ASSERT(IR_IS_SIGNED_32BIT((intptr_t)addr)); return (int32_t)(intptr_t)addr; } else { IR_ASSERT(IR_IS_SIGNED_32BIT(val_insn->val.i32)); return val_insn->val.i32; } } static void ir_emit_load_ex(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src, ir_ref root) { if (IR_IS_CONST_REF(src)) { if (IR_IS_TYPE_INT(type)) { ir_insn *insn = &ctx->ir_base[src]; if (insn->op == IR_SYM || insn->op == IR_FUNC) { void *addr = ir_sym_val(ctx, insn); ir_emit_load_imm_int(ctx, type, reg, (intptr_t)addr); } else if (insn->op == IR_STR) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; int label = ir_const_label(ctx, src); | lea Ra(reg), aword [=>label] } else { ir_emit_load_imm_int(ctx, type, reg, insn->val.i64); } } else { ir_emit_load_imm_fp(ctx, type, reg, src); } } else if (ir_rule(ctx, src) == IR_STATIC_ALLOCA) { ir_load_local_addr(ctx, reg, src); } else { ir_mem mem; if (ir_rule(ctx, src) & IR_FUSED) { mem = ir_fuse_load(ctx, root, src); } else { mem = ir_ref_spill_slot(ctx, src); } ir_emit_load_mem(ctx, type, reg, mem); } } static void ir_emit_prologue(ir_ctx *ctx) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; int offset = ctx->stack_frame_size + ctx->call_stack_size; if (ctx->flags & IR_USE_FRAME_POINTER) { | push Ra(IR_REG_RBP) | mov Ra(IR_REG_RBP), Ra(IR_REG_RSP) } if (IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_GP)) { int i; ir_regset used_preserved_regs = IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_GP); for (i = IR_REG_GP_FIRST; i <= IR_REG_GP_LAST; i++) { if (IR_REGSET_IN(used_preserved_regs, i)) { offset -= sizeof(void*); | push Ra(i) } } } if (ctx->stack_frame_size + ctx->call_stack_size) { if (ctx->fixed_stack_red_zone) { IR_ASSERT(ctx->stack_frame_size + ctx->call_stack_size <= ctx->fixed_stack_red_zone); } else if (offset) { | sub Ra(IR_REG_RSP), offset } } if (IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_FP)) { ir_reg fp; int i; ir_regset used_preserved_regs = IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_FP); if (ctx->flags & IR_USE_FRAME_POINTER) { fp = IR_REG_FRAME_POINTER; offset -= ctx->stack_frame_size + ctx->call_stack_size; } else { fp = IR_REG_STACK_POINTER; } for (i = IR_REG_FP_FIRST; i <= IR_REG_FP_LAST; i++) { if (IR_REGSET_IN(used_preserved_regs, i)) { offset -= sizeof(void*); if (ctx->mflags & IR_X86_AVX) { | vmovsd qword [Ra(fp)+offset], xmm(i-IR_REG_FP_FIRST) } else { | movsd qword [Ra(fp)+offset], xmm(i-IR_REG_FP_FIRST) } } } } if ((ctx->flags & IR_VARARG_FUNC) && (ctx->flags2 & IR_HAS_VA_START)) { #if defined(_WIN64) ir_reg fp; int offset; if (ctx->flags & IR_USE_FRAME_POINTER) { fp = IR_REG_FRAME_POINTER; offset = sizeof(void*) * 2; } else { fp = IR_REG_STACK_POINTER; offset = ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*); } | mov [Ra(fp)+offset], Ra(IR_REG_INT_ARG1) | mov [Ra(fp)+offset+8], Ra(IR_REG_INT_ARG2) | mov [Ra(fp)+offset+16], Ra(IR_REG_INT_ARG3) | mov [Ra(fp)+offset+24], Ra(IR_REG_INT_ARG4) #elif defined(IR_TARGET_X64) |.if X64 const int8_t *int_reg_params = _ir_int_reg_params; const int8_t *fp_reg_params = _ir_fp_reg_params; uint32_t i; ir_reg fp; int offset; if (ctx->flags & IR_USE_FRAME_POINTER) { fp = IR_REG_FRAME_POINTER; offset = -(ctx->stack_frame_size - ctx->stack_frame_alignment - ctx->locals_area_size); } else { fp = IR_REG_STACK_POINTER; offset = ctx->locals_area_size + ctx->call_stack_size; } if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) { /* skip named args */ offset += sizeof(void*) * ctx->gp_reg_params; for (i = ctx->gp_reg_params; i < IR_REG_INT_ARGS; i++) { | mov qword [Ra(fp)+offset], Rq(int_reg_params[i]) offset += sizeof(void*); } } if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) { | test al, al | je >1 /* skip named args */ offset += 16 * ctx->fp_reg_params; for (i = ctx->fp_reg_params; i < IR_REG_FP_ARGS; i++) { | movaps [Ra(fp)+offset], xmm(fp_reg_params[i]-IR_REG_FP_FIRST) offset += 16; } |1: } |.endif #endif } } static void ir_emit_epilogue(ir_ctx *ctx) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; if (IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_FP)) { int i; int offset; ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs; if (ctx->flags & IR_USE_FRAME_POINTER) { fp = IR_REG_FRAME_POINTER; offset = 0; } else { fp = IR_REG_STACK_POINTER; offset = ctx->stack_frame_size + ctx->call_stack_size; } for (i = 0; i < IR_REG_NUM; i++) { if (IR_REGSET_IN(used_preserved_regs, i)) { if (i < IR_REG_FP_FIRST) { offset -= sizeof(void*); } else { offset -= sizeof(void*); if (ctx->mflags & IR_X86_AVX) { | vmovsd xmm(i-IR_REG_FP_FIRST), qword [Ra(fp)+offset] } else { | movsd xmm(i-IR_REG_FP_FIRST), qword [Ra(fp)+offset] } } } } } if (IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_GP)) { int i; ir_regset used_preserved_regs = IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_GP); int offset; if (ctx->flags & IR_USE_FRAME_POINTER) { offset = 0; } else { offset = ctx->stack_frame_size + ctx->call_stack_size; } if (IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_GP)) { int i; ir_regset used_preserved_regs = IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_GP); for (i = IR_REG_GP_LAST; i >= IR_REG_GP_FIRST; i--) { if (IR_REGSET_IN(used_preserved_regs, i)) { offset -= sizeof(void*); } } } if (ctx->flags & IR_USE_FRAME_POINTER) { | lea Ra(IR_REG_RSP), [Ra(IR_REG_RBP)+offset] } else if (offset) { | add Ra(IR_REG_RSP), offset } for (i = IR_REG_GP_LAST; i >= IR_REG_GP_FIRST; i--) { if (IR_REGSET_IN(used_preserved_regs, i)) { | pop Ra(i) } } if (ctx->flags & IR_USE_FRAME_POINTER) { | pop Ra(IR_REG_RBP) } } else if (ctx->flags & IR_USE_FRAME_POINTER) { | mov Ra(IR_REG_RSP), Ra(IR_REG_RBP) | pop Ra(IR_REG_RBP) } else if (ctx->stack_frame_size + ctx->call_stack_size) { if (ctx->fixed_stack_red_zone) { IR_ASSERT(ctx->stack_frame_size + ctx->call_stack_size <= ctx->fixed_stack_red_zone); } else { | add Ra(IR_REG_RSP), (ctx->stack_frame_size + ctx->call_stack_size) } } } static void ir_emit_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_type type = insn->type; ir_ref op1 = insn->op1; ir_ref op2 = insn->op2; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); ir_reg op1_reg = ctx->regs[def][1]; ir_reg op2_reg = ctx->regs[def][2]; IR_ASSERT(def_reg != IR_REG_NONE); if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { op1_reg = IR_REG_NUM(op1_reg); ir_emit_load(ctx, type, op1_reg, op1); } if (def_reg != op1_reg) { if (op1_reg != IR_REG_NONE) { ir_emit_mov(ctx, type, def_reg, op1_reg); } else { ir_emit_load(ctx, type, def_reg, op1); } if (op1 == op2) { op2_reg = def_reg; } } if (op2_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); if (op1 != op2) { ir_emit_load(ctx, type, op2_reg, op2); } } switch (insn->op) { default: IR_ASSERT(0 && "NIY binary op"); case IR_ADD: case IR_ADD_OV: | ASM_REG_REG_OP add, type, def_reg, op2_reg break; case IR_SUB: case IR_SUB_OV: | ASM_REG_REG_OP sub, type, def_reg, op2_reg break; case IR_MUL: case IR_MUL_OV: | ASM_REG_REG_MUL imul, type, def_reg, op2_reg break; case IR_OR: | ASM_REG_REG_OP or, type, def_reg, op2_reg break; case IR_AND: | ASM_REG_REG_OP and, type, def_reg, op2_reg break; case IR_XOR: | ASM_REG_REG_OP xor, type, def_reg, op2_reg break; } } else if (IR_IS_CONST_REF(op2)) { int32_t val = ir_fuse_imm(ctx, op2); switch (insn->op) { default: IR_ASSERT(0 && "NIY binary op"); case IR_ADD: case IR_ADD_OV: | ASM_REG_IMM_OP add, type, def_reg, val break; case IR_SUB: case IR_SUB_OV: | ASM_REG_IMM_OP sub, type, def_reg, val break; case IR_MUL: case IR_MUL_OV: | ASM_REG_IMM_MUL imul, type, def_reg, val break; case IR_OR: | ASM_REG_IMM_OP or, type, def_reg, val break; case IR_AND: | ASM_REG_IMM_OP and, type, def_reg, val break; case IR_XOR: | ASM_REG_IMM_OP xor, type, def_reg, val break; } } else { ir_mem mem; if (ir_rule(ctx, op2) & IR_FUSED) { mem = ir_fuse_load(ctx, def, op2); } else { mem = ir_ref_spill_slot(ctx, op2); } switch (insn->op) { default: IR_ASSERT(0 && "NIY binary op"); case IR_ADD: case IR_ADD_OV: | ASM_REG_MEM_OP add, type, def_reg, mem break; case IR_SUB: case IR_SUB_OV: | ASM_REG_MEM_OP sub, type, def_reg, mem break; case IR_MUL: case IR_MUL_OV: | ASM_REG_MEM_MUL imul, type, def_reg, mem break; case IR_OR: | ASM_REG_MEM_OP or, type, def_reg, mem break; case IR_AND: | ASM_REG_MEM_OP and, type, def_reg, mem break; case IR_XOR: | ASM_REG_MEM_OP xor, type, def_reg, mem break; } } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, type, def, def_reg); } } static void ir_emit_imul3(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_type type = insn->type; ir_ref op1 = insn->op1; ir_ref op2 = insn->op2; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); ir_reg op1_reg = ctx->regs[def][1]; int32_t val = ir_fuse_imm(ctx, op2); IR_ASSERT(def_reg != IR_REG_NONE); IR_ASSERT(!IR_IS_CONST_REF(op1)); if (op1_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op1_reg)) { op1_reg = IR_REG_NUM(op1_reg); ir_emit_load(ctx, type, op1_reg, op1); } switch (ir_type_size[type]) { default: IR_ASSERT(0); case 2: | imul Rw(def_reg), Rw(op1_reg), val break; case 4: | imul Rd(def_reg), Rd(op1_reg), val break; |.if X64 || case 8: | imul Rq(def_reg), Rq(op1_reg), val || break; |.endif } } else { ir_mem mem; if (ir_rule(ctx, op1) & IR_FUSED) { mem = ir_fuse_load(ctx, def, op1); } else { mem = ir_ref_spill_slot(ctx, op1); } | ASM_REG_MEM_TXT_MUL imul, type, def_reg, mem, val } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, type, def, def_reg); } } static void ir_emit_min_max_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_type type = insn->type; ir_ref op1 = insn->op1; ir_ref op2 = insn->op2; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); ir_reg op1_reg = ctx->regs[def][1]; ir_reg op2_reg = ctx->regs[def][2]; IR_ASSERT(def_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { op1_reg = IR_REG_NUM(op1_reg); ir_emit_load(ctx, type, op1_reg, op1); } if (def_reg != op1_reg) { if (op1_reg != IR_REG_NONE) { ir_emit_mov(ctx, type, def_reg, op1_reg); } else { ir_emit_load(ctx, type, def_reg, op1); } } if (IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); if (op1 != op2) { ir_emit_load(ctx, type, op2_reg, op2); } } if (op1 == op2) { return; } | ASM_REG_REG_OP cmp, type, def_reg, op2_reg if (insn->op == IR_MIN) { if (IR_IS_TYPE_SIGNED(type)) { | ASM_REG_REG_OP2 cmovg, type, def_reg, op2_reg } else { | ASM_REG_REG_OP2 cmova, type, def_reg, op2_reg } } else { IR_ASSERT(insn->op == IR_MAX); if (IR_IS_TYPE_SIGNED(type)) { | ASM_REG_REG_OP2 cmovl, type, def_reg, op2_reg } else { | ASM_REG_REG_OP2 cmovb, type, def_reg, op2_reg } } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, type, def, def_reg); } } static void ir_emit_overflow(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); ir_type type = ctx->ir_base[insn->op1].type; IR_ASSERT(def_reg != IR_REG_NONE); IR_ASSERT(IR_IS_TYPE_INT(type)); if (IR_IS_TYPE_SIGNED(type)) { | seto Rb(def_reg) } else { | setc Rb(def_reg) } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, insn->type, def, def_reg); } } static void ir_emit_overflow_and_branch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_insn *overflow_insn = &ctx->ir_base[insn->op2]; ir_type type = ctx->ir_base[overflow_insn->op1].type; uint32_t true_block, false_block; bool reverse = 0; ir_get_true_false_blocks(ctx, b, &true_block, &false_block); if (true_block == next_block) { reverse = 1; true_block = false_block; false_block = 0; } else if (false_block == next_block) { false_block = 0; } if (IR_IS_TYPE_SIGNED(type)) { if (reverse) { | jno =>true_block } else { | jo =>true_block } } else { if (reverse) { | jnc =>true_block } else { | jc =>true_block } } if (false_block) { | jmp =>false_block } } static void ir_emit_mem_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_insn *op_insn = &ctx->ir_base[insn->op3]; ir_type type = op_insn->type; ir_ref op2 = op_insn->op2; ir_reg op2_reg = ctx->regs[insn->op3][2]; ir_mem mem; if (insn->op == IR_STORE) { mem = ir_fuse_mem(ctx, def, def, insn, ctx->regs[def][2]); } else { IR_ASSERT(insn->op == IR_VSTORE); mem = ir_var_spill_slot(ctx, insn->op2); } if (op2_reg == IR_REG_NONE) { int32_t val = ir_fuse_imm(ctx, op2); switch (op_insn->op) { default: IR_ASSERT(0 && "NIY binary op"); case IR_ADD: case IR_ADD_OV: | ASM_MEM_IMM_OP add, type, mem, val break; case IR_SUB: case IR_SUB_OV: | ASM_MEM_IMM_OP sub, type, mem, val break; case IR_OR: | ASM_MEM_IMM_OP or, type, mem, val break; case IR_AND: | ASM_MEM_IMM_OP and, type, mem, val break; case IR_XOR: | ASM_MEM_IMM_OP xor, type, mem, val break; } } else { if (IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); ir_emit_load(ctx, type, op2_reg, op2); } switch (op_insn->op) { default: IR_ASSERT(0 && "NIY binary op"); case IR_ADD: case IR_ADD_OV: | ASM_MEM_REG_OP add, type, mem, op2_reg break; case IR_SUB: case IR_SUB_OV: | ASM_MEM_REG_OP sub, type, mem, op2_reg break; case IR_OR: | ASM_MEM_REG_OP or, type, mem, op2_reg break; case IR_AND: | ASM_MEM_REG_OP and, type, mem, op2_reg break; case IR_XOR: | ASM_MEM_REG_OP xor, type, mem, op2_reg break; } } } static void ir_emit_reg_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_insn *op_insn = &ctx->ir_base[insn->op2]; ir_type type = op_insn->type; ir_ref op2 = op_insn->op2; ir_reg op2_reg = ctx->regs[insn->op2][2]; ir_reg reg; IR_ASSERT(insn->op == IR_RSTORE); reg = insn->op3; if (op2_reg == IR_REG_NONE) { int32_t val = ir_fuse_imm(ctx, op2); switch (op_insn->op) { default: IR_ASSERT(0 && "NIY binary op"); case IR_ADD: | ASM_REG_IMM_OP add, type, reg, val break; case IR_SUB: | ASM_REG_IMM_OP sub, type, reg, val break; case IR_OR: | ASM_REG_IMM_OP or, type, reg, val break; case IR_AND: | ASM_REG_IMM_OP and, type, reg, val break; case IR_XOR: | ASM_REG_IMM_OP xor, type, reg, val break; } } else { if (IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); ir_emit_load(ctx, type, op2_reg, op2); } switch (op_insn->op) { default: IR_ASSERT(0 && "NIY binary op"); case IR_ADD: | ASM_REG_REG_OP add, type, reg, op2_reg break; case IR_SUB: | ASM_REG_REG_OP sub, type, reg, op2_reg break; case IR_OR: | ASM_REG_REG_OP or, type, reg, op2_reg break; case IR_AND: | ASM_REG_REG_OP and, type, reg, op2_reg break; case IR_XOR: | ASM_REG_REG_OP xor, type, reg, op2_reg break; } } } static void ir_emit_mul_div_mod_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_type type = insn->type; ir_ref op1 = insn->op1; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); ir_reg op1_reg = ctx->regs[def][1]; IR_ASSERT(IR_IS_CONST_REF(insn->op2)); IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[insn->op2].op)); IR_ASSERT(def_reg != IR_REG_NONE); if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { op1_reg = IR_REG_NUM(op1_reg); ir_emit_load(ctx, type, op1_reg, op1); } if (def_reg != op1_reg) { if (op1_reg != IR_REG_NONE) { ir_emit_mov(ctx, type, def_reg, op1_reg); } else { ir_emit_load(ctx, type, def_reg, op1); } } if (insn->op == IR_MUL) { uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64); if (shift == 1) { | ASM_REG_REG_OP add, type, def_reg, def_reg } else { | ASM_REG_IMM_OP shl, type, def_reg, shift } } else if (insn->op == IR_DIV) { uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64); | ASM_REG_IMM_OP shr, type, def_reg, shift } else { IR_ASSERT(insn->op == IR_MOD); uint64_t mask = ctx->ir_base[insn->op2].val.u64 - 1; |.if X64 || if (ir_type_size[type] == 8 && ctx->regs[def][2] != IR_REG_NONE) { || ir_reg op2_reg = ctx->regs[def][2]; || || op2_reg = IR_REG_NUM(op2_reg); || ir_emit_load_imm_int(ctx, type, op2_reg, mask); | ASM_REG_REG_OP and, type, def_reg, op2_reg || } else { |.endif | ASM_REG_IMM_OP and, type, def_reg, mask |.if X64 || } |.endif } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, type, def, def_reg); } } static void ir_emit_sdiv_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_type type = insn->type; ir_ref op1 = insn->op1; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); ir_reg op1_reg = ctx->regs[def][1]; uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64); int64_t offset = ctx->ir_base[insn->op2].val.u64 - 1; IR_ASSERT(shift != 0); IR_ASSERT(IR_IS_CONST_REF(insn->op2)); IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[insn->op2].op)); IR_ASSERT(op1_reg != IR_REG_NONE && def_reg != IR_REG_NONE && op1_reg != def_reg); if (IR_REG_SPILLED(op1_reg)) { op1_reg = IR_REG_NUM(op1_reg); ir_emit_load(ctx, type, op1_reg, op1); } if (shift == 1) { |.if X64 || if (ir_type_size[type] == 8) { | mov Rq(def_reg), Rq(op1_reg) | ASM_REG_IMM_OP shr, type, def_reg, 63 | add Rq(def_reg), Rq(op1_reg) || } else { |.endif | mov Rd(def_reg), Rd(op1_reg) | ASM_REG_IMM_OP shr, type, def_reg, (ir_type_size[type]*8-1) | add Rd(def_reg), Rd(op1_reg) |.if X64 || } |.endif } else { |.if X64 || if (ir_type_size[type] == 8) { || ir_reg op2_reg = ctx->regs[def][2]; || || if (op2_reg != IR_REG_NONE) { || op2_reg = IR_REG_NUM(op2_reg); || ir_emit_load_imm_int(ctx, type, op2_reg, offset); | lea Rq(def_reg), [Rq(op1_reg)+Rq(op2_reg)] || } else { | lea Rq(def_reg), [Rq(op1_reg)+(int32_t)offset] || } || } else { |.endif | lea Rd(def_reg), [Rd(op1_reg)+(int32_t)offset] |.if X64 || } |.endif | ASM_REG_REG_OP test, type, op1_reg, op1_reg | ASM_REG_REG_OP2 cmovns, type, def_reg, op1_reg } | ASM_REG_IMM_OP sar, type, def_reg, shift if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, type, def, def_reg); } } static void ir_emit_smod_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_type type = insn->type; ir_ref op1 = insn->op1; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); ir_reg op1_reg = ctx->regs[def][1]; ir_reg tmp_reg = ctx->regs[def][3]; uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64); uint64_t mask = ctx->ir_base[insn->op2].val.u64 - 1; IR_ASSERT(shift != 0); IR_ASSERT(IR_IS_CONST_REF(insn->op2)); IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[insn->op2].op)); IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE && def_reg != tmp_reg); if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { op1_reg = IR_REG_NUM(op1_reg); ir_emit_load(ctx, type, op1_reg, op1); } if (def_reg != op1_reg) { if (op1_reg != IR_REG_NONE) { ir_emit_mov(ctx, type, def_reg, op1_reg); } else { ir_emit_load(ctx, type, def_reg, op1); } } if (tmp_reg != op1_reg) { ir_emit_mov(ctx, type, tmp_reg, def_reg); } if (shift == 1) { | ASM_REG_IMM_OP shr, type, tmp_reg, (ir_type_size[type]*8-1) } else { | ASM_REG_IMM_OP sar, type, tmp_reg, (ir_type_size[type]*8-1) | ASM_REG_IMM_OP shr, type, tmp_reg, (ir_type_size[type]*8-shift) } | ASM_REG_REG_OP add, type, def_reg, tmp_reg |.if X64 || if (ir_type_size[type] == 8 && ctx->regs[def][2] != IR_REG_NONE) { || ir_reg op2_reg = ctx->regs[def][2]; || || op2_reg = IR_REG_NUM(op2_reg); || ir_emit_load_imm_int(ctx, type, op2_reg, mask); | ASM_REG_REG_OP and, type, def_reg, op2_reg || } else { |.endif | ASM_REG_IMM_OP and, type, def_reg, mask |.if X64 || } |.endif | ASM_REG_REG_OP sub, type, def_reg, tmp_reg if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, type, def, def_reg); } } static void ir_emit_mem_mul_div_mod_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_insn *op_insn = &ctx->ir_base[insn->op3]; ir_type type = op_insn->type; ir_mem mem; IR_ASSERT(IR_IS_CONST_REF(op_insn->op2)); IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[op_insn->op2].op)); if (insn->op == IR_STORE) { mem = ir_fuse_mem(ctx, def, def, insn, ctx->regs[def][2]); } else { IR_ASSERT(insn->op == IR_VSTORE); mem = ir_var_spill_slot(ctx, insn->op2); } if (op_insn->op == IR_MUL) { uint32_t shift = IR_LOG2(ctx->ir_base[op_insn->op2].val.u64); | ASM_MEM_IMM_OP shl, type, mem, shift } else if (op_insn->op == IR_DIV) { uint32_t shift = IR_LOG2(ctx->ir_base[op_insn->op2].val.u64); | ASM_MEM_IMM_OP shr, type, mem, shift } else { IR_ASSERT(op_insn->op == IR_MOD); uint64_t mask = ctx->ir_base[op_insn->op2].val.u64 - 1; IR_ASSERT(IR_IS_UNSIGNED_32BIT(mask)); | ASM_MEM_IMM_OP and, type, mem, mask } } static void ir_emit_shift(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_type type = insn->type; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); ir_reg op1_reg = ctx->regs[def][1]; ir_reg op2_reg = ctx->regs[def][2]; IR_ASSERT(def_reg != IR_REG_NONE && def_reg != IR_REG_RCX); if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { op1_reg = IR_REG_NUM(op1_reg); ir_emit_load(ctx, type, op1_reg, insn->op1); } if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); ir_emit_load(ctx, type, op2_reg, insn->op2); } if (op2_reg != IR_REG_RCX) { if (op1_reg == IR_REG_RCX) { ir_emit_mov(ctx, type, def_reg, op1_reg); op1_reg = def_reg; } if (op2_reg != IR_REG_NONE) { ir_emit_mov(ctx, type, IR_REG_RCX, op2_reg); } else { ir_emit_load(ctx, type, IR_REG_RCX, insn->op2); } } if (def_reg != op1_reg) { if (op1_reg != IR_REG_NONE) { ir_emit_mov(ctx, type, def_reg, op1_reg); } else { ir_emit_load(ctx, type, def_reg, insn->op1); } } switch (insn->op) { default: IR_ASSERT(0); case IR_SHL: | ASM_REG_TXT_OP shl, insn->type, def_reg, cl break; case IR_SHR: | ASM_REG_TXT_OP shr, insn->type, def_reg, cl break; case IR_SAR: | ASM_REG_TXT_OP sar, insn->type, def_reg, cl break; case IR_ROL: | ASM_REG_TXT_OP rol, insn->type, def_reg, cl break; case IR_ROR: | ASM_REG_TXT_OP ror, insn->type, def_reg, cl break; } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, type, def, def_reg); } } static void ir_emit_mem_shift(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_insn *op_insn = &ctx->ir_base[insn->op3]; ir_type type = op_insn->type; ir_ref op2 = op_insn->op2; ir_reg op2_reg = ctx->regs[insn->op3][2]; ir_mem mem; if (insn->op == IR_STORE) { mem = ir_fuse_mem(ctx, def, def, insn, ctx->regs[def][2]); } else { IR_ASSERT(insn->op == IR_VSTORE); mem = ir_var_spill_slot(ctx, insn->op2); } if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); ir_emit_load(ctx, type, op2_reg, op2); } if (op2_reg != IR_REG_RCX) { if (op2_reg != IR_REG_NONE) { ir_emit_mov(ctx, type, IR_REG_RCX, op2_reg); } else { ir_emit_load(ctx, type, IR_REG_RCX, op2); } } switch (op_insn->op) { default: IR_ASSERT(0); case IR_SHL: | ASM_MEM_TXT_OP shl, type, mem, cl break; case IR_SHR: | ASM_MEM_TXT_OP shr, type, mem, cl break; case IR_SAR: | ASM_MEM_TXT_OP sar, type, mem, cl break; case IR_ROL: | ASM_MEM_TXT_OP rol, type, mem, cl break; case IR_ROR: | ASM_MEM_TXT_OP ror, type, mem, cl break; } } static void ir_emit_shift_const(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; int32_t shift; ir_type type = insn->type; ir_ref op1 = insn->op1; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); ir_reg op1_reg = ctx->regs[def][1]; IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[insn->op2].op)); IR_ASSERT(IR_IS_SIGNED_32BIT(ctx->ir_base[insn->op2].val.i64)); shift = ctx->ir_base[insn->op2].val.i32; IR_ASSERT(def_reg != IR_REG_NONE); if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { op1_reg = IR_REG_NUM(op1_reg); ir_emit_load(ctx, type, op1_reg, op1); } if (def_reg != op1_reg) { if (op1_reg != IR_REG_NONE) { ir_emit_mov(ctx, type, def_reg, op1_reg); } else { ir_emit_load(ctx, type, def_reg, op1); } } switch (insn->op) { default: IR_ASSERT(0); case IR_SHL: | ASM_REG_IMM_OP shl, insn->type, def_reg, shift break; case IR_SHR: | ASM_REG_IMM_OP shr, insn->type, def_reg, shift break; case IR_SAR: | ASM_REG_IMM_OP sar, insn->type, def_reg, shift break; case IR_ROL: | ASM_REG_IMM_OP rol, insn->type, def_reg, shift break; case IR_ROR: | ASM_REG_IMM_OP ror, insn->type, def_reg, shift break; } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, type, def, def_reg); } } static void ir_emit_mem_shift_const(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_insn *op_insn = &ctx->ir_base[insn->op3]; ir_type type = op_insn->type; int32_t shift; ir_mem mem; IR_ASSERT(IR_IS_CONST_REF(op_insn->op2)); IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[op_insn->op2].op)); IR_ASSERT(IR_IS_SIGNED_32BIT(ctx->ir_base[op_insn->op2].val.i64)); shift = ctx->ir_base[op_insn->op2].val.i32; if (insn->op == IR_STORE) { mem = ir_fuse_mem(ctx, def, def, insn, ctx->regs[def][2]); } else { IR_ASSERT(insn->op == IR_VSTORE); mem = ir_var_spill_slot(ctx, insn->op2); } switch (op_insn->op) { default: IR_ASSERT(0); case IR_SHL: | ASM_MEM_IMM_OP shl, type, mem, shift break; case IR_SHR: | ASM_MEM_IMM_OP shr, type, mem, shift break; case IR_SAR: | ASM_MEM_IMM_OP sar, type, mem, shift break; case IR_ROL: | ASM_MEM_IMM_OP rol, type, mem, shift break; case IR_ROR: | ASM_MEM_IMM_OP ror, type, mem, shift break; } } static void ir_emit_op_int(ir_ctx *ctx, ir_ref def, ir_insn *insn, uint32_t rule) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_type type = insn->type; ir_ref op1 = insn->op1; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); ir_reg op1_reg = ctx->regs[def][1]; IR_ASSERT(def_reg != IR_REG_NONE); if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { op1_reg = IR_REG_NUM(op1_reg); ir_emit_load(ctx, type, op1_reg, op1); } if (def_reg != op1_reg) { if (op1_reg != IR_REG_NONE) { ir_emit_mov(ctx, type, def_reg, op1_reg); } else { ir_emit_load(ctx, type, def_reg, op1); } } if (rule == IR_INC) { | ASM_REG_OP inc, insn->type, def_reg } else if (rule == IR_DEC) { | ASM_REG_OP dec, insn->type, def_reg } else if (insn->op == IR_NOT) { | ASM_REG_OP not, insn->type, def_reg } else if (insn->op == IR_NEG) { | ASM_REG_OP neg, insn->type, def_reg } else { IR_ASSERT(insn->op == IR_BSWAP); switch (ir_type_size[insn->type]) { default: IR_ASSERT(0); case 4: | bswap Rd(def_reg) break; case 8: IR_ASSERT(sizeof(void*) == 8); |.if X64 | bswap Rq(def_reg) |.endif break; } } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, type, def, def_reg); } } static void ir_emit_bit_count(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_type type = insn->type; ir_ref op1 = insn->op1; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); ir_reg op1_reg = ctx->regs[def][1]; IR_ASSERT(def_reg != IR_REG_NONE); if (op1_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op1_reg)) { op1_reg = IR_REG_NUM(op1_reg); ir_emit_load(ctx, type, op1_reg, op1); } switch (ir_type_size[insn->type]) { default: IR_ASSERT(0); case 2: if (insn->op == IR_CTLZ) { if (ctx->mflags & IR_X86_BMI1) { | lzcnt Rw(def_reg), Rw(op1_reg) } else { | bsr Rw(def_reg), Rw(op1_reg) | xor Rw(def_reg), 0xf } } else if (insn->op == IR_CTTZ) { if (ctx->mflags & IR_X86_BMI1) { | tzcnt Rw(def_reg), Rw(op1_reg) } else { | bsf Rw(def_reg), Rw(op1_reg) } } else { IR_ASSERT(insn->op == IR_CTPOP); | popcnt Rw(def_reg), Rw(op1_reg) } break; case 1: | movzx Rd(op1_reg), Rb(op1_reg) if (insn->op == IR_CTLZ) { if (ctx->mflags & IR_X86_BMI1) { | lzcnt Rd(def_reg), Rd(op1_reg) | sub Rd(def_reg), 24 } else { | bsr Rd(def_reg), Rd(op1_reg) | xor Rw(def_reg), 0x7 } break; } IR_FALLTHROUGH; case 4: if (insn->op == IR_CTLZ) { if (ctx->mflags & IR_X86_BMI1) { | lzcnt Rd(def_reg), Rd(op1_reg) } else { | bsr Rd(def_reg), Rd(op1_reg) | xor Rw(def_reg), 0x1f } } else if (insn->op == IR_CTTZ) { if (ctx->mflags & IR_X86_BMI1) { | tzcnt Rd(def_reg), Rd(op1_reg) } else { | bsf Rd(def_reg), Rd(op1_reg) } } else { IR_ASSERT(insn->op == IR_CTPOP); | popcnt Rd(def_reg), Rd(op1_reg) } break; |.if X64 case 8: if (insn->op == IR_CTLZ) { if (ctx->mflags & IR_X86_BMI1) { | lzcnt Rq(def_reg), Rq(op1_reg) } else { | bsr Rq(def_reg), Rq(op1_reg) | xor Rw(def_reg), 0x3f } } else if (insn->op == IR_CTTZ) { if (ctx->mflags & IR_X86_BMI1) { | tzcnt Rq(def_reg), Rq(op1_reg) } else { | bsf Rq(def_reg), Rq(op1_reg) } } else { IR_ASSERT(insn->op == IR_CTPOP); | popcnt Rq(def_reg), Rq(op1_reg) } break; |.endif } } else { ir_mem mem; if (ir_rule(ctx, op1) & IR_FUSED) { mem = ir_fuse_load(ctx, def, op1); } else { mem = ir_ref_spill_slot(ctx, op1); } switch (ir_type_size[insn->type]) { default: IR_ASSERT(0); case 2: if (insn->op == IR_CTLZ) { if (ctx->mflags & IR_X86_BMI1) { | ASM_TXT_TMEM_OP lzcnt, Rw(def_reg), word, mem } else { | ASM_TXT_TMEM_OP bsr, Rw(def_reg), word, mem | xor Rw(def_reg), 0xf } } else if (insn->op == IR_CTTZ) { if (ctx->mflags & IR_X86_BMI1) { | ASM_TXT_TMEM_OP tzcnt, Rw(def_reg), word, mem } else { | ASM_TXT_TMEM_OP bsf, Rw(def_reg), word, mem } } else { | ASM_TXT_TMEM_OP popcnt, Rw(def_reg), word, mem } break; case 4: if (insn->op == IR_CTLZ) { if (ctx->mflags & IR_X86_BMI1) { | ASM_TXT_TMEM_OP lzcnt, Rd(def_reg), dword, mem } else { | ASM_TXT_TMEM_OP bsr, Rd(def_reg), dword, mem | xor Rw(def_reg), 0x1f } } else if (insn->op == IR_CTTZ) { if (ctx->mflags & IR_X86_BMI1) { | ASM_TXT_TMEM_OP tzcnt, Rd(def_reg), dword, mem } else { | ASM_TXT_TMEM_OP bsf, Rd(def_reg), dword, mem } } else { | ASM_TXT_TMEM_OP popcnt, Rd(def_reg), dword, mem } break; |.if X64 case 8: if (insn->op == IR_CTLZ) { if (ctx->mflags & IR_X86_BMI1) { | ASM_TXT_TMEM_OP lzcnt, Rq(def_reg), qword, mem } else { | ASM_TXT_TMEM_OP bsr, Rq(def_reg), qword, mem | xor Rw(def_reg), 0x3f } } else if (insn->op == IR_CTTZ) { if (ctx->mflags & IR_X86_BMI1) { | ASM_TXT_TMEM_OP tzcnt, Rq(def_reg), qword, mem } else { | ASM_TXT_TMEM_OP bsf, Rq(def_reg), qword, mem } } else { | ASM_TXT_TMEM_OP popcnt, Rq(def_reg), qword, mem } break; |.endif } } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, type, def, def_reg); } } static void ir_emit_ctpop(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_type type = insn->type; ir_ref op1 = insn->op1; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); ir_reg op1_reg = ctx->regs[def][1]; ir_reg tmp_reg = ctx->regs[def][2]; |.if X64 || ir_reg const_reg = ctx->regs[def][3]; |.endif IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); if (op1_reg == IR_REG_NONE) { ir_emit_load(ctx, type, def_reg, op1); if (ir_type_size[insn->type] == 1) { | movzx Rd(def_reg), Rb(def_reg) } else if (ir_type_size[insn->type] == 2) { | movzx Rd(def_reg), Rw(def_reg) } } else { if (IR_REG_SPILLED(op1_reg)) { op1_reg = IR_REG_NUM(op1_reg); ir_emit_load(ctx, type, op1_reg, op1); } switch (ir_type_size[insn->type]) { default: IR_ASSERT(0); case 1: | movzx Rd(def_reg), Rb(op1_reg) break; case 2: | movzx Rd(def_reg), Rw(op1_reg) break; case 4: | mov Rd(def_reg), Rd(op1_reg) break; |.if X64 || case 8: | mov Rq(def_reg), Rq(op1_reg) || break; |.endif } } switch (ir_type_size[insn->type]) { default: IR_ASSERT(0); case 1: | mov Rd(tmp_reg), Rd(def_reg) | shr Rd(def_reg), 1 | and Rd(def_reg), 0x55 | sub Rd(tmp_reg), Rd(def_reg) | mov Rd(def_reg), Rd(tmp_reg) | and Rd(def_reg), 0x33 | shr Rd(tmp_reg), 2 | and Rd(tmp_reg), 0x33 | add Rd(tmp_reg), Rd(def_reg) | mov Rd(def_reg), Rd(tmp_reg) | shr Rd(def_reg), 4 | add Rd(def_reg), Rd(tmp_reg) | and Rd(def_reg), 0x0f break; case 2: | mov Rd(tmp_reg), Rd(def_reg) | shr Rd(def_reg), 1 | and Rd(def_reg), 0x5555 | sub Rd(tmp_reg), Rd(def_reg) | mov Rd(def_reg), Rd(tmp_reg) | and Rd(def_reg), 0x3333 | shr Rd(tmp_reg), 2 | and Rd(tmp_reg), 0x3333 | add Rd(tmp_reg), Rd(def_reg) | mov Rd(def_reg), Rd(tmp_reg) | shr Rd(def_reg), 4 | add Rd(def_reg), Rd(tmp_reg) | and Rd(def_reg), 0x0f0f | mov Rd(tmp_reg), Rd(def_reg) | shr Rd(tmp_reg), 8 | and Rd(def_reg), 0x0f | add Rd(def_reg), Rd(tmp_reg) break; case 4: | mov Rd(tmp_reg), Rd(def_reg) | shr Rd(def_reg), 1 | and Rd(def_reg), 0x55555555 | sub Rd(tmp_reg), Rd(def_reg) | mov Rd(def_reg), Rd(tmp_reg) | and Rd(def_reg), 0x33333333 | shr Rd(tmp_reg), 2 | and Rd(tmp_reg), 0x33333333 | add Rd(tmp_reg), Rd(def_reg) | mov Rd(def_reg), Rd(tmp_reg) | shr Rd(def_reg), 4 | add Rd(def_reg), Rd(tmp_reg) | and Rd(def_reg), 0x0f0f0f0f | imul Rd(def_reg), 0x01010101 | shr Rd(def_reg), 24 break; |.if X64 || case 8: || IR_ASSERT(const_reg != IR_REG_NONE); | mov Rq(tmp_reg), Rq(def_reg) | shr Rq(def_reg), 1 | mov64 Rq(const_reg), 0x5555555555555555 | and Rq(def_reg), Rq(const_reg) | sub Rq(tmp_reg), Rq(def_reg) | mov Rq(def_reg), Rq(tmp_reg) | mov64 Rq(const_reg), 0x3333333333333333 | and Rq(def_reg), Rq(const_reg) | shr Rq(tmp_reg), 2 | and Rq(tmp_reg), Rq(const_reg) | add Rq(tmp_reg), Rq(def_reg) | mov Rq(def_reg), Rq(tmp_reg) | shr Rq(def_reg), 4 | add Rq(def_reg), Rq(tmp_reg) | mov64 Rq(const_reg), 0x0f0f0f0f0f0f0f0f | and Rq(def_reg), Rq(const_reg) | mov64 Rq(const_reg), 0x0101010101010101 | imul Rq(def_reg), Rq(const_reg) | shr Rq(def_reg), 56 || break; |.endif } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, type, def, def_reg); } } static void ir_emit_mem_op_int(ir_ctx *ctx, ir_ref def, ir_insn *insn, uint32_t rule) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_insn *op_insn = &ctx->ir_base[insn->op3]; ir_type type = op_insn->type; ir_mem mem; if (insn->op == IR_STORE) { mem = ir_fuse_mem(ctx, def, def, insn, ctx->regs[def][2]); } else { IR_ASSERT(insn->op == IR_VSTORE); mem = ir_var_spill_slot(ctx, insn->op2); } if (rule == IR_MEM_INC) { | ASM_MEM_OP inc, type, mem } else if (rule == IR_MEM_DEC) { | ASM_MEM_OP dec, type, mem } else if (op_insn->op == IR_NOT) { | ASM_MEM_OP not, type, mem } else { IR_ASSERT(op_insn->op == IR_NEG); | ASM_MEM_OP neg, type, mem } } static void ir_emit_abs_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_type type = insn->type; ir_ref op1 = insn->op1; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); ir_reg op1_reg = ctx->regs[def][1]; IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { op1_reg = IR_REG_NUM(op1_reg); ir_emit_load(ctx, type, op1_reg, op1); } IR_ASSERT(def_reg != op1_reg); ir_emit_mov(ctx, insn->type, def_reg, op1_reg); | ASM_REG_OP neg, insn->type, def_reg | ASM_REG_REG_OP2, cmovs, type, def_reg, op1_reg if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, type, def, def_reg); } } static void ir_emit_bool_not_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_type type = ctx->ir_base[insn->op1].type; ir_ref op1 = insn->op1; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); ir_reg op1_reg = ctx->regs[def][1]; IR_ASSERT(def_reg != IR_REG_NONE); if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { op1_reg = IR_REG_NUM(op1_reg); ir_emit_load(ctx, type, op1_reg, op1); } if (op1_reg != IR_REG_NONE) { | ASM_REG_REG_OP test, type, op1_reg, op1_reg } else { ir_mem mem = ir_ref_spill_slot(ctx, op1); | ASM_MEM_IMM_OP cmp, type, mem, 0 } | sete Rb(def_reg) if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, type, def, def_reg); } } static void ir_emit_mul_div_mod(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_type type = insn->type; ir_ref op1 = insn->op1; ir_ref op2 = insn->op2; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); ir_reg op1_reg = ctx->regs[def][1]; ir_reg op2_reg = ctx->regs[def][2]; ir_mem mem; if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { op1_reg = IR_REG_NUM(op1_reg); ir_emit_load(ctx, type, op1_reg, op1); } if (op1_reg != IR_REG_RAX) { if (op1_reg != IR_REG_NONE) { ir_emit_mov(ctx, type, IR_REG_RAX, op1_reg); } else { ir_emit_load(ctx, type, IR_REG_RAX, op1); } } if (op2_reg == IR_REG_NONE && op1 == op2) { op2_reg = IR_REG_RAX; } else if (op2_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); ir_emit_load(ctx, type, op2_reg, op2); } } else if (IR_IS_CONST_REF(op2) && (insn->op == IR_MUL || insn->op == IR_MUL_OV)) { op2_reg = IR_REG_RDX; ir_emit_load(ctx, type, op2_reg, op2); } if (insn->op == IR_MUL || insn->op == IR_MUL_OV) { if (IR_IS_TYPE_SIGNED(insn->type)) { if (op2_reg != IR_REG_NONE) { | ASM_REG_OP imul, type, op2_reg } else { if (ir_rule(ctx, op2) & IR_FUSED) { mem = ir_fuse_load(ctx, def, op2); } else { mem = ir_ref_spill_slot(ctx, op2); } | ASM_MEM_OP imul, type, mem } } else { if (op2_reg != IR_REG_NONE) { | ASM_REG_OP mul, type, op2_reg } else { if (ir_rule(ctx, op2) & IR_FUSED) { mem = ir_fuse_load(ctx, def, op2); } else { mem = ir_ref_spill_slot(ctx, op2); } | ASM_MEM_OP mul, type, mem } } } else { if (IR_IS_TYPE_SIGNED(type)) { if (ir_type_size[type] == 8) { | cqo } else if (ir_type_size[type] == 4) { | cdq } else if (ir_type_size[type] == 2) { | cwd } else { | movsx ax, al } if (op2_reg != IR_REG_NONE) { | ASM_REG_OP idiv, type, op2_reg } else { if (ir_rule(ctx, op2) & IR_FUSED) { mem = ir_fuse_load(ctx, def, op2); } else { mem = ir_ref_spill_slot(ctx, op2); } | ASM_MEM_OP idiv, type, mem } } else { if (ir_type_size[type] == 1) { | movzx ax, al } else { | ASM_REG_REG_OP xor, type, IR_REG_RDX, IR_REG_RDX } if (op2_reg != IR_REG_NONE) { | ASM_REG_OP div, type, op2_reg } else { if (ir_rule(ctx, op2) & IR_FUSED) { mem = ir_fuse_load(ctx, def, op2); } else { mem = ir_ref_spill_slot(ctx, op2); } | ASM_MEM_OP div, type, mem } } } if (insn->op == IR_MUL || insn->op == IR_MUL_OV || insn->op == IR_DIV) { if (def_reg != IR_REG_NONE) { if (def_reg != IR_REG_RAX) { ir_emit_mov(ctx, type, def_reg, IR_REG_RAX); } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, type, def, def_reg); } } else { ir_emit_store(ctx, type, def, IR_REG_RAX); } } else { IR_ASSERT(insn->op == IR_MOD); if (ir_type_size[type] == 1) { if (def_reg != IR_REG_NONE) { | mov al, ah if (def_reg != IR_REG_RAX) { | mov Rb(def_reg), al } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, type, def, def_reg); } } else { ir_reg fp; int32_t offset = ir_ref_spill_slot_offset(ctx, def, &fp); //????? | mov byte [Ra(fp)+offset], ah } } else { if (def_reg != IR_REG_NONE) { if (def_reg != IR_REG_RDX) { ir_emit_mov(ctx, type, def_reg, IR_REG_RDX); } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, type, def, def_reg); } } else { ir_emit_store(ctx, type, def, IR_REG_RDX); } } } } static void ir_rodata(ir_ctx *ctx) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; |.rodata if (!data->rodata_label) { int label = data->rodata_label = ctx->cfg_blocks_count + ctx->consts_count + 2; |=>label: } } static void ir_emit_op_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_type type = insn->type; ir_ref op1 = insn->op1; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); ir_reg op1_reg = ctx->regs[def][1]; IR_ASSERT(def_reg != IR_REG_NONE); if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { op1_reg = IR_REG_NUM(op1_reg); ir_emit_load(ctx, type, op1_reg, op1); } if (def_reg != op1_reg) { if (op1_reg != IR_REG_NONE) { ir_emit_fp_mov(ctx, type, def_reg, op1_reg); } else { ir_emit_load(ctx, type, def_reg, op1); } } if (insn->op == IR_NEG) { if (insn->type == IR_DOUBLE) { if (!data->double_neg_const) { data->double_neg_const = 1; ir_rodata(ctx); |.align 16 |->double_neg_const: |.dword 0, 0x80000000, 0, 0 |.code } if (ctx->mflags & IR_X86_AVX) { | vxorpd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), [->double_neg_const] } else { | xorpd xmm(def_reg-IR_REG_FP_FIRST), [->double_neg_const] } } else { IR_ASSERT(insn->type == IR_FLOAT); if (!data->float_neg_const) { data->float_neg_const = 1; ir_rodata(ctx); |.align 16 |->float_neg_const: |.dword 0x80000000, 0, 0, 0 |.code } if (ctx->mflags & IR_X86_AVX) { | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), [->float_neg_const] } else { | xorps xmm(def_reg-IR_REG_FP_FIRST), [->float_neg_const] } } } else { IR_ASSERT(insn->op == IR_ABS); if (insn->type == IR_DOUBLE) { if (!data->double_abs_const) { data->double_abs_const = 1; ir_rodata(ctx); |.align 16 |->double_abs_const: |.dword 0xffffffff, 0x7fffffff, 0, 0 |.code } if (ctx->mflags & IR_X86_AVX) { | vandpd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), [->double_abs_const] } else { | andpd xmm(def_reg-IR_REG_FP_FIRST), [->double_abs_const] } } else { IR_ASSERT(insn->type == IR_FLOAT); if (!data->float_abs_const) { data->float_abs_const = 1; ir_rodata(ctx); |.align 16 |->float_abs_const: |.dword 0x7fffffff, 0, 0, 0 |.code } if (ctx->mflags & IR_X86_AVX) { | vandps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), [->float_abs_const] } else { | andps xmm(def_reg-IR_REG_FP_FIRST), [->float_abs_const] } } } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, insn->type, def, def_reg); } } static void ir_emit_binop_sse2(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_type type = insn->type; ir_ref op1 = insn->op1; ir_ref op2 = insn->op2; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); ir_reg op1_reg = ctx->regs[def][1]; ir_reg op2_reg = ctx->regs[def][2]; IR_ASSERT(def_reg != IR_REG_NONE); if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { op1_reg = IR_REG_NUM(op1_reg); ir_emit_load(ctx, type, op1_reg, op1); } if (def_reg != op1_reg) { if (op1_reg != IR_REG_NONE) { ir_emit_fp_mov(ctx, type, def_reg, op1_reg); } else { ir_emit_load(ctx, type, def_reg, op1); } if (op1 == op2) { op2_reg = def_reg; } } if (op2_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); if (op1 != op2) { ir_emit_load(ctx, type, op2_reg, op2); } } switch (insn->op) { default: IR_ASSERT(0 && "NIY binary op"); case IR_ADD: | ASM_SSE2_REG_REG_OP adds, type, def_reg, op2_reg break; case IR_SUB: | ASM_SSE2_REG_REG_OP subs, type, def_reg, op2_reg break; case IR_MUL: | ASM_SSE2_REG_REG_OP muls, type, def_reg, op2_reg break; case IR_DIV: | ASM_SSE2_REG_REG_OP divs, type, def_reg, op2_reg break; case IR_MIN: | ASM_SSE2_REG_REG_OP mins, type, def_reg, op2_reg break; case IR_MAX: | ASM_SSE2_REG_REG_OP maxs, type, def_reg, op2_reg break; } } else if (IR_IS_CONST_REF(op2)) { int label = ir_const_label(ctx, op2); switch (insn->op) { default: IR_ASSERT(0 && "NIY binary op"); case IR_ADD: | ASM_SSE2_REG_TXT_OP adds, type, def_reg, [=>label] break; case IR_SUB: | ASM_SSE2_REG_TXT_OP subs, type, def_reg, [=>label] break; case IR_MUL: | ASM_SSE2_REG_TXT_OP muls, type, def_reg, [=>label] break; case IR_DIV: | ASM_SSE2_REG_TXT_OP divs, type, def_reg, [=>label] break; case IR_MIN: | ASM_SSE2_REG_TXT_OP mins, type, def_reg, [=>label] break; case IR_MAX: | ASM_SSE2_REG_TXT_OP maxs, type, def_reg, [=>label] break; } } else { ir_mem mem; if (ir_rule(ctx, op2) & IR_FUSED) { mem = ir_fuse_load(ctx, def, op2); } else { mem = ir_ref_spill_slot(ctx, op2); } switch (insn->op) { default: IR_ASSERT(0 && "NIY binary op"); case IR_ADD: | ASM_SSE2_REG_MEM_OP adds, type, def_reg, mem break; case IR_SUB: | ASM_SSE2_REG_MEM_OP subs, type, def_reg, mem break; case IR_MUL: | ASM_SSE2_REG_MEM_OP muls, type, def_reg, mem break; case IR_DIV: | ASM_SSE2_REG_MEM_OP divs, type, def_reg, mem break; case IR_MIN: | ASM_SSE2_REG_MEM_OP mins, type, def_reg, mem break; case IR_MAX: | ASM_SSE2_REG_MEM_OP maxs, type, def_reg, mem break; } } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, insn->type, def, def_reg); } } static void ir_emit_binop_avx(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_type type = insn->type; ir_ref op1 = insn->op1; ir_ref op2 = insn->op2; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); ir_reg op1_reg = ctx->regs[def][1]; ir_reg op2_reg = ctx->regs[def][2]; IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); if (IR_REG_SPILLED(op1_reg)) { op1_reg = IR_REG_NUM(op1_reg); ir_emit_load(ctx, type, op1_reg, op1); } if (op2_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); if (op1 != op2) { ir_emit_load(ctx, type, op2_reg, op2); } } switch (insn->op) { default: IR_ASSERT(0 && "NIY binary op"); case IR_ADD: | ASM_AVX_REG_REG_REG_OP vadds, type, def_reg, op1_reg, op2_reg break; case IR_SUB: | ASM_AVX_REG_REG_REG_OP vsubs, type, def_reg, op1_reg, op2_reg break; case IR_MUL: | ASM_AVX_REG_REG_REG_OP vmuls, type, def_reg, op1_reg, op2_reg break; case IR_DIV: | ASM_AVX_REG_REG_REG_OP vdivs, type, def_reg, op1_reg, op2_reg break; case IR_MIN: | ASM_AVX_REG_REG_REG_OP vmins, type, def_reg, op1_reg, op2_reg break; case IR_MAX: | ASM_AVX_REG_REG_REG_OP vmaxs, type, def_reg, op1_reg, op2_reg break; } } else if (IR_IS_CONST_REF(op2)) { int label = ir_const_label(ctx, op2); switch (insn->op) { default: IR_ASSERT(0 && "NIY binary op"); case IR_ADD: | ASM_AVX_REG_REG_TXT_OP vadds, type, def_reg, op1_reg, [=>label] break; case IR_SUB: | ASM_AVX_REG_REG_TXT_OP vsubs, type, def_reg, op1_reg, [=>label] break; case IR_MUL: | ASM_AVX_REG_REG_TXT_OP vmuls, type, def_reg, op1_reg, [=>label] break; case IR_DIV: | ASM_AVX_REG_REG_TXT_OP vdivs, type, def_reg, op1_reg, [=>label] break; case IR_MIN: | ASM_AVX_REG_REG_TXT_OP vmins, type, def_reg, op1_reg, [=>label] break; case IR_MAX: | ASM_AVX_REG_REG_TXT_OP vmaxs, type, def_reg, op1_reg, [=>label] break; } } else { ir_mem mem; if (ir_rule(ctx, op2) & IR_FUSED) { mem = ir_fuse_load(ctx, def, op2); } else { mem = ir_ref_spill_slot(ctx, op2); } switch (insn->op) { default: IR_ASSERT(0 && "NIY binary op"); case IR_ADD: | ASM_AVX_REG_REG_MEM_OP vadds, type, def_reg, op1_reg, mem break; case IR_SUB: | ASM_AVX_REG_REG_MEM_OP vsubs, type, def_reg, op1_reg, mem break; case IR_MUL: | ASM_AVX_REG_REG_MEM_OP vmuls, type, def_reg, op1_reg, mem break; case IR_DIV: | ASM_AVX_REG_REG_MEM_OP vdivs, type, def_reg, op1_reg, mem break; case IR_MIN: | ASM_AVX_REG_REG_MEM_OP vmins, type, def_reg, op1_reg, mem break; case IR_MAX: | ASM_AVX_REG_REG_MEM_OP vmaxs, type, def_reg, op1_reg, mem break; } } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, insn->type, def, def_reg); } } static void ir_emit_cmp_int_common(ir_ctx *ctx, ir_type type, ir_ref root, ir_insn *insn, ir_reg op1_reg, ir_ref op1, ir_reg op2_reg, ir_ref op2) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; if (op1_reg != IR_REG_NONE) { if (op2_reg != IR_REG_NONE) { | ASM_REG_REG_OP cmp, type, op1_reg, op2_reg } else if (IR_IS_CONST_REF(op2) && !IR_IS_SYM_CONST(ctx->ir_base[op2].op) && ctx->ir_base[op2].val.u64 == 0) { | ASM_REG_REG_OP test, type, op1_reg, op1_reg } else if (IR_IS_CONST_REF(op2)) { int32_t val = ir_fuse_imm(ctx, op2); | ASM_REG_IMM_OP cmp, type, op1_reg, val } else { ir_mem mem; if (ir_rule(ctx, op2) & IR_FUSED) { mem = ir_fuse_load(ctx, root, op2); } else { mem = ir_ref_spill_slot(ctx, op2); } | ASM_REG_MEM_OP cmp, type, op1_reg, mem } } else if (IR_IS_CONST_REF(op1)) { IR_ASSERT(0); } else { ir_mem mem; if (ir_rule(ctx, op1) & IR_FUSED) { mem = ir_fuse_load(ctx, root, op1); } else { mem = ir_ref_spill_slot(ctx, op1); } if (op2_reg != IR_REG_NONE) { | ASM_MEM_REG_OP cmp, type, mem, op2_reg } else { int32_t val = ir_fuse_imm(ctx, op2); | ASM_MEM_IMM_OP cmp, type, mem, val } } } static void ir_emit_cmp_int_common2(ir_ctx *ctx, ir_ref root, ir_ref ref, ir_insn *cmp_insn) { ir_type type = ctx->ir_base[cmp_insn->op1].type; ir_ref op1 = cmp_insn->op1; ir_ref op2 = cmp_insn->op2; ir_reg op1_reg = ctx->regs[ref][1]; ir_reg op2_reg = ctx->regs[ref][2]; if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { op1_reg = IR_REG_NUM(op1_reg); ir_emit_load(ctx, type, op1_reg, op1); } if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); if (op1 != op2) { ir_emit_load(ctx, type, op2_reg, op2); } } ir_emit_cmp_int_common(ctx, type, root, cmp_insn, op1_reg, op1, op2_reg, op2); } static void _ir_emit_setcc_int(ir_ctx *ctx, uint8_t op, ir_reg def_reg) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; switch (op) { default: IR_ASSERT(0 && "NIY binary op"); case IR_EQ: | sete Rb(def_reg) break; case IR_NE: | setne Rb(def_reg) break; case IR_LT: | setl Rb(def_reg) break; case IR_GE: | setge Rb(def_reg) break; case IR_LE: | setle Rb(def_reg) break; case IR_GT: | setg Rb(def_reg) break; case IR_ULT: | setb Rb(def_reg) break; case IR_UGE: | setae Rb(def_reg) break; case IR_ULE: | setbe Rb(def_reg) break; case IR_UGT: | seta Rb(def_reg) break; } } static void _ir_emit_setcc_int_mem(ir_ctx *ctx, uint8_t op, ir_mem mem) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; switch (op) { default: IR_ASSERT(0 && "NIY binary op"); case IR_EQ: | ASM_TMEM_OP sete, byte, mem break; case IR_NE: | ASM_TMEM_OP setne, byte, mem break; case IR_LT: | ASM_TMEM_OP setl, byte, mem break; case IR_GE: | ASM_TMEM_OP setge, byte, mem break; case IR_LE: | ASM_TMEM_OP setle, byte, mem break; case IR_GT: | ASM_TMEM_OP setg, byte, mem break; case IR_ULT: | ASM_TMEM_OP setb, byte, mem break; case IR_UGE: | ASM_TMEM_OP setae, byte, mem break; case IR_ULE: | ASM_TMEM_OP setbe, byte, mem break; case IR_UGT: | ASM_TMEM_OP seta, byte, mem break; } } static void ir_emit_cmp_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_type type = ctx->ir_base[insn->op1].type; ir_op op = insn->op; ir_ref op1 = insn->op1; ir_ref op2 = insn->op2; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); ir_reg op1_reg = ctx->regs[def][1]; ir_reg op2_reg = ctx->regs[def][2]; IR_ASSERT(def_reg != IR_REG_NONE); if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { op1_reg = IR_REG_NUM(op1_reg); ir_emit_load(ctx, type, op1_reg, op1); } if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); if (op1 != op2) { ir_emit_load(ctx, type, op2_reg, op2); } } if (IR_IS_CONST_REF(op2) && !IR_IS_SYM_CONST(ctx->ir_base[op2].op) && ctx->ir_base[op2].val.u64 == 0) { if (op == IR_ULT) { /* always false */ | xor Ra(def_reg), Ra(def_reg) if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, insn->type, def, def_reg); } return; } else if (op == IR_UGE) { /* always true */ | ASM_REG_IMM_OP mov, insn->type, def_reg, 1 if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, insn->type, def, def_reg); } return; } else if (op == IR_ULE) { op = IR_EQ; } else if (op == IR_UGT) { op = IR_NE; } } ir_emit_cmp_int_common(ctx, type, def, insn, op1_reg, op1, op2_reg, op2); _ir_emit_setcc_int(ctx, op, def_reg); if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, insn->type, def, def_reg); } } static void ir_emit_test_int_common(ir_ctx *ctx, ir_ref root, ir_ref ref, ir_op op) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_insn *binop_insn = &ctx->ir_base[ref]; ir_type type = binop_insn->type; ir_ref op1 = binop_insn->op1; ir_ref op2 = binop_insn->op2; ir_reg op1_reg = ctx->regs[ref][1]; ir_reg op2_reg = ctx->regs[ref][2]; IR_ASSERT(binop_insn->op == IR_AND); if (op1_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op1_reg)) { op1_reg = IR_REG_NUM(op1_reg); ir_emit_load(ctx, type, op1_reg, op1); } if (op2_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); if (op1 != op2) { ir_emit_load(ctx, type, op2_reg, op2); } } | ASM_REG_REG_OP test, type, op1_reg, op2_reg } else if (IR_IS_CONST_REF(op2)) { int32_t val = ir_fuse_imm(ctx, op2); if ((op == IR_EQ || op == IR_NE) && val == 0xff && (sizeof(void*) == 8 || op1_reg <= IR_REG_R3)) { | test Rb(op1_reg), Rb(op1_reg) } else if ((op == IR_EQ || op == IR_NE) && val == 0xff00 && op1_reg <= IR_REG_R3) { if (op1_reg == IR_REG_RAX) { | test ah, ah } else if (op1_reg == IR_REG_RBX) { | test bh, bh } else if (op1_reg == IR_REG_RCX) { | test ch, ch } else if (op1_reg == IR_REG_RDX) { | test dh, dh } else { IR_ASSERT(0); } } else if ((op == IR_EQ || op == IR_NE) && val == 0xffff) { | test Rw(op1_reg), Rw(op1_reg) } else if ((op == IR_EQ || op == IR_NE) && val == -1) { | test Rd(op1_reg), Rd(op1_reg) } else { | ASM_REG_IMM_OP test, type, op1_reg, val } } else { ir_mem mem; if (ir_rule(ctx, op2) & IR_FUSED) { mem = ir_fuse_load(ctx, root, op2); } else { mem = ir_ref_spill_slot(ctx, op2); } | ASM_REG_MEM_OP test, type, op1_reg, mem } } else if (IR_IS_CONST_REF(op1)) { IR_ASSERT(0); } else { ir_mem mem; if (ir_rule(ctx, op1) & IR_FUSED) { mem = ir_fuse_load(ctx, root, op1); } else { mem = ir_ref_spill_slot(ctx, op1); } if (op2_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); if (op1 != op2) { ir_emit_load(ctx, type, op2_reg, op2); } } | ASM_MEM_REG_OP test, type, mem, op2_reg } else { IR_ASSERT(!IR_IS_CONST_REF(op1)); int32_t val = ir_fuse_imm(ctx, op2); | ASM_MEM_IMM_OP test, type, mem, val } } } static void ir_emit_testcc_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); IR_ASSERT(def_reg != IR_REG_NONE); ir_emit_test_int_common(ctx, def, insn->op1, insn->op); _ir_emit_setcc_int(ctx, insn->op, def_reg); if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, insn->type, def, def_reg); } } static void ir_emit_setcc_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); IR_ASSERT(def_reg != IR_REG_NONE); _ir_emit_setcc_int(ctx, insn->op, def_reg); if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, insn->type, def, def_reg); } } static ir_op ir_emit_cmp_fp_common(ir_ctx *ctx, ir_ref root, ir_ref cmp_ref, ir_insn *cmp_insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_type type = ctx->ir_base[cmp_insn->op1].type; ir_op op = cmp_insn->op; ir_ref op1, op2; ir_reg op1_reg, op2_reg; op1 = cmp_insn->op1; op2 = cmp_insn->op2; op1_reg = ctx->regs[cmp_ref][1]; op2_reg = ctx->regs[cmp_ref][2]; if (op1_reg == IR_REG_NONE && op2_reg != IR_REG_NONE && (op == IR_EQ || op == IR_NE)) { ir_reg tmp_reg; SWAP_REFS(op1, op2); tmp_reg = op1_reg; op1_reg = op2_reg; op2_reg = tmp_reg; } IR_ASSERT(op1_reg != IR_REG_NONE); if (IR_REG_SPILLED(op1_reg)) { op1_reg = IR_REG_NUM(op1_reg); ir_emit_load(ctx, type, op1_reg, op1); } if (op2_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); if (op1 != op2) { ir_emit_load(ctx, type, op2_reg, op2); } } | ASM_FP_REG_REG_OP ucomis, type, op1_reg, op2_reg } else if (IR_IS_CONST_REF(op2)) { int label = ir_const_label(ctx, op2); | ASM_FP_REG_TXT_OP ucomis, type, op1_reg, [=>label] } else { ir_mem mem; if (ir_rule(ctx, op2) & IR_FUSED) { mem = ir_fuse_load(ctx, root, op2); } else { mem = ir_ref_spill_slot(ctx, op2); } | ASM_FP_REG_MEM_OP ucomis, type, op1_reg, mem } return op; } static void ir_emit_cmp_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_op op = ir_emit_cmp_fp_common(ctx, def, def, insn); ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); ir_reg tmp_reg = ctx->regs[def][3]; IR_ASSERT(def_reg != IR_REG_NONE); switch (op) { default: IR_ASSERT(0 && "NIY binary op"); case IR_EQ: | setnp Rb(def_reg) | mov Rd(tmp_reg), 0 | cmovne Rd(def_reg), Rd(tmp_reg) break; case IR_NE: | setp Rb(def_reg) | mov Rd(tmp_reg), 1 | cmovne Rd(def_reg), Rd(tmp_reg) break; case IR_LT: | setnp Rb(def_reg) | mov Rd(tmp_reg), 0 | cmovae Rd(def_reg), Rd(tmp_reg) break; case IR_GE: | setae Rb(def_reg) break; case IR_LE: | setnp Rb(def_reg) | mov Rd(tmp_reg), 0 | cmova Rd(def_reg), Rd(tmp_reg) break; case IR_GT: | seta Rb(def_reg) break; case IR_ULT: | setb Rb(def_reg) break; case IR_UGE: | setp Rb(def_reg) | mov Rd(tmp_reg), 1 | cmovae Rd(def_reg), Rd(tmp_reg) break; case IR_ULE: | setbe Rb(def_reg) break; case IR_UGT: | setp Rb(def_reg) | mov Rd(tmp_reg), 1 | cmova Rd(def_reg), Rd(tmp_reg) break; } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, insn->type, def, def_reg); } } static void ir_emit_jmp_true(ir_ctx *ctx, uint32_t b, ir_ref def, uint32_t next_block) { uint32_t true_block, false_block; ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_get_true_false_blocks(ctx, b, &true_block, &false_block); if (true_block != next_block) { | jmp =>true_block } } static void ir_emit_jmp_false(ir_ctx *ctx, uint32_t b, ir_ref def, uint32_t next_block) { uint32_t true_block, false_block; ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_get_true_false_blocks(ctx, b, &true_block, &false_block); if (false_block != next_block) { | jmp =>false_block } } static void ir_emit_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block, uint8_t op, bool int_cmp) { uint32_t true_block, false_block; ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_get_true_false_blocks(ctx, b, &true_block, &false_block); if (true_block == next_block) { /* swap to avoid unconditional JMP */ if (int_cmp || op == IR_EQ || op == IR_NE) { op ^= 1; // reverse } else { op ^= 5; // reverse } true_block = false_block; false_block = 0; } else if (false_block == next_block) { false_block = 0; } if (int_cmp) { switch (op) { default: IR_ASSERT(0 && "NIY binary op"); case IR_EQ: | je =>true_block break; case IR_NE: | jne =>true_block break; case IR_LT: | jl =>true_block break; case IR_GE: | jge =>true_block break; case IR_LE: | jle =>true_block break; case IR_GT: | jg =>true_block break; case IR_ULT: | jb =>true_block break; case IR_UGE: | jae =>true_block break; case IR_ULE: | jbe =>true_block break; case IR_UGT: | ja =>true_block break; } } else { switch (op) { default: IR_ASSERT(0 && "NIY binary op"); case IR_EQ: if (!false_block) { | jp >1 | je =>true_block |1: } else { | jp =>false_block | je =>true_block } break; case IR_NE: | jne =>true_block | jp =>true_block break; case IR_LT: if (!false_block) { | jp >1 | jb =>true_block |1: } else { | jp =>false_block | jb =>true_block } break; case IR_GE: | jae =>true_block break; case IR_LE: if (!false_block) { | jp >1 | jbe =>true_block |1: } else { | jp =>false_block | jbe =>true_block } break; case IR_GT: | ja =>true_block break; case IR_ULT: | jb =>true_block break; case IR_UGE: | jp =>true_block | jae =>true_block break; case IR_ULE: | jbe =>true_block break; case IR_UGT: | jp =>true_block | ja =>true_block break; } } if (false_block) { | jmp =>false_block } } static void ir_emit_cmp_and_branch_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block) { ir_insn *cmp_insn = &ctx->ir_base[insn->op2]; ir_op op = cmp_insn->op; ir_type type = ctx->ir_base[cmp_insn->op1].type; ir_ref op1 = cmp_insn->op1; ir_ref op2 = cmp_insn->op2; ir_reg op1_reg = ctx->regs[insn->op2][1]; ir_reg op2_reg = ctx->regs[insn->op2][2]; if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { op1_reg = IR_REG_NUM(op1_reg); ir_emit_load(ctx, type, op1_reg, op1); } if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); if (op1 != op2) { ir_emit_load(ctx, type, op2_reg, op2); } } if (IR_IS_CONST_REF(op2) && !IR_IS_SYM_CONST(ctx->ir_base[op2].op) && ctx->ir_base[op2].val.u64 == 0) { if (op == IR_ULT) { /* always false */ ir_emit_jmp_false(ctx, b, def, next_block); return; } else if (op == IR_UGE) { /* always true */ ir_emit_jmp_true(ctx, b, def, next_block); return; } else if (op == IR_ULE) { op = IR_EQ; } else if (op == IR_UGT) { op = IR_NE; } } bool same_comparison = 0; ir_insn *prev_insn = &ctx->ir_base[insn->op1]; if (prev_insn->op == IR_IF_TRUE || prev_insn->op == IR_IF_FALSE) { if (ir_rule(ctx, prev_insn->op1) == IR_CMP_AND_BRANCH_INT) { prev_insn = &ctx->ir_base[prev_insn->op1]; prev_insn = &ctx->ir_base[prev_insn->op2]; if (prev_insn->op1 == cmp_insn->op1 && prev_insn->op2 == cmp_insn->op2) { same_comparison = true; } } } if (!same_comparison) { ir_emit_cmp_int_common(ctx, type, def, cmp_insn, op1_reg, op1, op2_reg, op2); } ir_emit_jcc(ctx, b, def, insn, next_block, op, 1); } static void ir_emit_test_and_branch_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block) { ir_ref op2 = insn->op2; ir_op op = ctx->ir_base[op2].op; if (op >= IR_EQ && op <= IR_UGT) { op2 = ctx->ir_base[op2].op1; } else { IR_ASSERT(op == IR_AND); op = IR_NE; } ir_emit_test_int_common(ctx, def, op2, op); ir_emit_jcc(ctx, b, def, insn, next_block, op, 1); } static void ir_emit_cmp_and_branch_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block) { ir_op op = ir_emit_cmp_fp_common(ctx, def, insn->op2, &ctx->ir_base[insn->op2]); ir_emit_jcc(ctx, b, def, insn, next_block, op, 0); } static void ir_emit_if_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block) { ir_type type = ctx->ir_base[insn->op2].type; ir_reg op2_reg = ctx->regs[def][2]; ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; if (op2_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); ir_emit_load(ctx, type, op2_reg, insn->op2); } | ASM_REG_REG_OP test, type, op2_reg, op2_reg } else if (IR_IS_CONST_REF(insn->op2)) { uint32_t true_block, false_block; ir_get_true_false_blocks(ctx, b, &true_block, &false_block); if (ir_const_is_true(&ctx->ir_base[insn->op2])) { if (true_block != next_block) { | jmp =>true_block } } else { if (false_block != next_block) { | jmp =>false_block } } return; } else if (ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) { uint32_t true_block, false_block; ir_get_true_false_blocks(ctx, b, &true_block, &false_block); if (true_block != next_block) { | jmp =>true_block } return; } else { ir_mem mem; if (ir_rule(ctx, insn->op2) & IR_FUSED) { mem = ir_fuse_load(ctx, def, insn->op2); } else { mem = ir_ref_spill_slot(ctx, insn->op2); } | ASM_MEM_IMM_OP cmp, type, mem, 0 } ir_emit_jcc(ctx, b, def, insn, next_block, IR_NE, 1); } static void ir_emit_cond(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_type type = insn->type; ir_ref op1 = insn->op1; ir_ref op2 = insn->op2; ir_ref op3 = insn->op3; ir_type op1_type = ctx->ir_base[op1].type; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); ir_reg op1_reg = ctx->regs[def][1]; ir_reg op2_reg = ctx->regs[def][2]; ir_reg op3_reg = ctx->regs[def][3]; IR_ASSERT(def_reg != IR_REG_NONE); if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); ir_emit_load(ctx, type, op2_reg, op2); if (op1 == op2) { op1_reg = op2_reg; } if (op3 == op2) { op3_reg = op2_reg; } } if (op3_reg != IR_REG_NONE && op3 != op2 && IR_REG_SPILLED(op3_reg)) { op3_reg = IR_REG_NUM(op3_reg); ir_emit_load(ctx, type, op3_reg, op3); if (op1 == op2) { op1_reg = op3_reg; } } if (op1_reg != IR_REG_NONE && op1 != op2 && op1 != op3 && IR_REG_SPILLED(op1_reg)) { op1_reg = IR_REG_NUM(op1_reg); ir_emit_load(ctx, op1_type, op1_reg, op1); } if (IR_IS_TYPE_INT(op1_type)) { if (op1_reg != IR_REG_NONE) { | ASM_REG_REG_OP test, op1_type, op1_reg, op1_reg } else { ir_mem mem = ir_ref_spill_slot(ctx, op1); | ASM_MEM_IMM_OP cmp, op1_type, mem, 0 } if (IR_IS_TYPE_INT(type)) { IR_ASSERT(op2_reg != IR_REG_NONE || op3_reg != IR_REG_NONE); if (op3_reg != IR_REG_NONE) { if (op3_reg == def_reg) { IR_ASSERT(op2_reg != IR_REG_NONE); | ASM_REG_REG_OP2 cmovne, type, def_reg, op2_reg } else { if (op2_reg != IR_REG_NONE) { if (def_reg != op2_reg) { if (IR_IS_TYPE_INT(type)) { ir_emit_mov(ctx, type, def_reg, op2_reg); } else { ir_emit_fp_mov(ctx, type, def_reg, op2_reg); } } } else if (IR_IS_CONST_REF(op2) && !IR_IS_SYM_CONST(ctx->ir_base[op2].op)) { /* prevent "xor" and flags clobbering */ ir_emit_mov_imm_int(ctx, type, def_reg, ctx->ir_base[op2].val.i64); } else { ir_emit_load_ex(ctx, type, def_reg, op2, def); } | ASM_REG_REG_OP2 cmove, type, def_reg, op3_reg } } else { IR_ASSERT(op2_reg != IR_REG_NONE && op2_reg != def_reg); if (IR_IS_CONST_REF(op3) && !IR_IS_SYM_CONST(ctx->ir_base[op3].op)) { /* prevent "xor" and flags clobbering */ ir_emit_mov_imm_int(ctx, type, def_reg, ctx->ir_base[op3].val.i64); } else { ir_emit_load_ex(ctx, type, def_reg, op3, def); } | ASM_REG_REG_OP2 cmovne, type, def_reg, op2_reg } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, type, def, def_reg); } return; } | je >2 } else { if (!data->double_zero_const) { data->double_zero_const = 1; ir_rodata(ctx); |.align 16 |->double_zero_const: |.dword 0, 0 |.code } | ASM_FP_REG_TXT_OP ucomis, op1_type, op1_reg, [->double_zero_const] | jp >1 | je >2 |1: } if (op2_reg != IR_REG_NONE) { if (def_reg != op2_reg) { if (IR_IS_TYPE_INT(type)) { ir_emit_mov(ctx, type, def_reg, op2_reg); } else { ir_emit_fp_mov(ctx, type, def_reg, op2_reg); } } } else { ir_emit_load_ex(ctx, type, def_reg, op2, def); } | jmp >3 |2: if (op3_reg != IR_REG_NONE) { if (def_reg != op3_reg) { if (IR_IS_TYPE_INT(type)) { ir_emit_mov(ctx, type, def_reg, op3_reg); } else { ir_emit_fp_mov(ctx, type, def_reg, op3_reg); } } } else { ir_emit_load_ex(ctx, type, def_reg, op3, def); } |3: if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, type, def, def_reg); } } static void ir_emit_cond_cmp_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_type type = insn->type; ir_ref op2 = insn->op2; ir_ref op3 = insn->op3; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); ir_reg op2_reg = ctx->regs[def][2]; ir_reg op3_reg = ctx->regs[def][3]; ir_op op; if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); ir_emit_load(ctx, type, op2_reg, op2); if (op3 == op2) { op3_reg = op2_reg; } } if (op3_reg != IR_REG_NONE && op3 != op2 && IR_REG_SPILLED(op3_reg)) { op3_reg = IR_REG_NUM(op3_reg); ir_emit_load(ctx, type, op3_reg, op3); } ir_emit_cmp_int_common2(ctx, def, insn->op1, &ctx->ir_base[insn->op1]); op = ctx->ir_base[insn->op1].op; if (IR_IS_TYPE_INT(type)) { if (op3_reg != IR_REG_NONE) { if (op3_reg == def_reg) { IR_ASSERT(op2_reg != IR_REG_NONE); op3_reg = op2_reg; op ^= 1; // reverse } else { if (op2_reg != IR_REG_NONE) { if (def_reg != op2_reg) { // if (IR_IS_TYPE_INT(type)) { ir_emit_mov(ctx, type, def_reg, op2_reg); // } else { // ir_emit_fp_mov(ctx, type, def_reg, op2_reg); // } } } else if (IR_IS_CONST_REF(op2) && !IR_IS_SYM_CONST(ctx->ir_base[op2].op)) { /* prevent "xor" and flags clobbering */ ir_emit_mov_imm_int(ctx, type, def_reg, ctx->ir_base[op2].val.i64); } else { ir_emit_load_ex(ctx, type, def_reg, op2, def); } } } else { IR_ASSERT(op2_reg != IR_REG_NONE && op2_reg != def_reg); if (IR_IS_CONST_REF(op3) && !IR_IS_SYM_CONST(ctx->ir_base[op3].op)) { /* prevent "xor" and flags clobbering */ ir_emit_mov_imm_int(ctx, type, def_reg, ctx->ir_base[op3].val.i64); } else { ir_emit_load_ex(ctx, type, def_reg, op3, def); } op3_reg = op2_reg; op ^= 1; // reverse } switch (op) { default: IR_ASSERT(0 && "NIY binary op"); case IR_EQ: | ASM_REG_REG_OP2 cmovne, type, def_reg, op3_reg break; case IR_NE: | ASM_REG_REG_OP2 cmove, type, def_reg, op3_reg break; case IR_LT: | ASM_REG_REG_OP2 cmovge, type, def_reg, op3_reg break; case IR_GE: | ASM_REG_REG_OP2 cmovl, type, def_reg, op3_reg break; case IR_LE: | ASM_REG_REG_OP2 cmovg, type, def_reg, op3_reg break; case IR_GT: | ASM_REG_REG_OP2 cmovle, type, def_reg, op3_reg break; case IR_ULT: | ASM_REG_REG_OP2 cmovae, type, def_reg, op3_reg break; case IR_UGE: | ASM_REG_REG_OP2 cmovb, type, def_reg, op3_reg break; case IR_ULE: | ASM_REG_REG_OP2 cmova, type, def_reg, op3_reg break; case IR_UGT: | ASM_REG_REG_OP2 cmovbe, type, def_reg, op3_reg break; } } else { switch (op) { default: IR_ASSERT(0 && "NIY binary op"); case IR_EQ: | jne >2 break; case IR_NE: | je >2 break; case IR_LT: | jge >2 break; case IR_GE: | jl >2 break; case IR_LE: | jg >2 break; case IR_GT: | jle >2 break; case IR_ULT: | jae >2 break; case IR_UGE: | jb >2 break; case IR_ULE: | ja >2 break; case IR_UGT: | jbe >2 break; } |1: if (op2_reg != IR_REG_NONE) { if (def_reg != op2_reg) { if (IR_IS_TYPE_INT(type)) { ir_emit_mov(ctx, type, def_reg, op2_reg); } else { ir_emit_fp_mov(ctx, type, def_reg, op2_reg); } } } else { ir_emit_load_ex(ctx, type, def_reg, op2, def); } | jmp >3 |2: if (op3_reg != IR_REG_NONE) { if (def_reg != op3_reg) { if (IR_IS_TYPE_INT(type)) { ir_emit_mov(ctx, type, def_reg, op3_reg); } else { ir_emit_fp_mov(ctx, type, def_reg, op3_reg); } } } else { ir_emit_load_ex(ctx, type, def_reg, op3, def); } |3: } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, type, def, def_reg); } } static void ir_emit_cond_cmp_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_type type = insn->type; ir_ref op2 = insn->op2; ir_ref op3 = insn->op3; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); ir_reg op2_reg = ctx->regs[def][2]; ir_reg op3_reg = ctx->regs[def][3]; ir_op op; if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); ir_emit_load(ctx, type, op2_reg, op2); if (op3 == op2) { op3_reg = op2_reg; } } if (op3_reg != IR_REG_NONE && op3 != op2 && IR_REG_SPILLED(op3_reg)) { op3_reg = IR_REG_NUM(op3_reg); ir_emit_load(ctx, type, op3_reg, op3); } op = ir_emit_cmp_fp_common(ctx, def, insn->op1, &ctx->ir_base[insn->op1]); switch (op) { default: IR_ASSERT(0 && "NIY binary op"); case IR_EQ: | jne >2 | jp >2 break; case IR_NE: | jp >1 | je >2 break; case IR_LT: | jp >2 | jae >2 break; case IR_GE: | jb >2 break; case IR_LE: | jp >2 | ja >2 break; case IR_GT: | jbe >2 break; case IR_ULT: | jae >2 break; case IR_UGE: | jp >1 | jb >2 break; case IR_ULE: | ja >2 break; case IR_UGT: | jp >1 | jbe >2 break; } |1: if (op2_reg != IR_REG_NONE) { if (def_reg != op2_reg) { if (IR_IS_TYPE_INT(type)) { ir_emit_mov(ctx, type, def_reg, op2_reg); } else { ir_emit_fp_mov(ctx, type, def_reg, op2_reg); } } } else { ir_emit_load_ex(ctx, type, def_reg, op2, def); } | jmp >3 |2: if (op3_reg != IR_REG_NONE) { if (def_reg != op3_reg) { if (IR_IS_TYPE_INT(type)) { ir_emit_mov(ctx, type, def_reg, op3_reg); } else { ir_emit_fp_mov(ctx, type, def_reg, op3_reg); } } } else { ir_emit_load_ex(ctx, type, def_reg, op3, def); } |3: if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, type, def, def_reg); } } static void ir_emit_return_void(ir_ctx *ctx) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_emit_epilogue(ctx); #ifdef IR_TARGET_X86 if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC) && ctx->param_stack_size) { | ret ctx->param_stack_size return; } #endif | ret } static void ir_emit_return_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn) { ir_reg op2_reg = ctx->regs[ref][2]; if (op2_reg != IR_REG_INT_RET1) { ir_type type = ctx->ir_base[insn->op2].type; if (op2_reg != IR_REG_NONE && !IR_REG_SPILLED(op2_reg)) { ir_emit_mov(ctx, type, IR_REG_INT_RET1, op2_reg); } else { ir_emit_load(ctx, type, IR_REG_INT_RET1, insn->op2); } } ir_emit_return_void(ctx); } static void ir_emit_return_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn) { ir_reg op2_reg = ctx->regs[ref][2]; ir_type type = ctx->ir_base[insn->op2].type; #ifdef IR_REG_FP_RET1 if (op2_reg != IR_REG_FP_RET1) { if (op2_reg != IR_REG_NONE && !IR_REG_SPILLED(op2_reg)) { ir_emit_fp_mov(ctx, type, IR_REG_FP_RET1, op2_reg); } else { ir_emit_load(ctx, type, IR_REG_FP_RET1, insn->op2); } } #else ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; if (op2_reg == IR_REG_NONE || IR_REG_SPILLED(op2_reg)) { ir_reg fp; int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op2, &fp); if (type == IR_DOUBLE) { | fld qword [Ra(fp)+offset] } else { IR_ASSERT(type == IR_FLOAT); | fld dword [Ra(fp)+offset] } } else { int32_t offset = ctx->ret_slot; ir_reg fp; IR_ASSERT(offset != -1); offset = IR_SPILL_POS_TO_OFFSET(offset); fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; ir_emit_store_mem_fp(ctx, type, IR_MEM_BO(fp, offset), op2_reg); if (type == IR_DOUBLE) { | fld qword [Ra(fp)+offset] } else { IR_ASSERT(type == IR_FLOAT); | fld dword [Ra(fp)+offset] } } #endif ir_emit_return_void(ctx); } static void ir_emit_sext(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_type dst_type = insn->type; ir_type src_type = ctx->ir_base[insn->op1].type; ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); ir_reg op1_reg = ctx->regs[def][1]; IR_ASSERT(IR_IS_TYPE_INT(src_type)); IR_ASSERT(IR_IS_TYPE_INT(dst_type)); IR_ASSERT(ir_type_size[dst_type] > ir_type_size[src_type]); IR_ASSERT(def_reg != IR_REG_NONE); if (op1_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op1_reg)) { op1_reg = IR_REG_NUM(op1_reg); ir_emit_load(ctx, src_type, op1_reg, insn->op1); } if (ir_type_size[src_type] == 1) { if (ir_type_size[dst_type] == 2) { | movsx Rw(def_reg), Rb(op1_reg) } else if (ir_type_size[dst_type] == 4) { | movsx Rd(def_reg), Rb(op1_reg) } else { IR_ASSERT(ir_type_size[dst_type] == 8); IR_ASSERT(sizeof(void*) == 8); |.if X64 | movsx Rq(def_reg), Rb(op1_reg) |.endif } } else if (ir_type_size[src_type] == 2) { if (ir_type_size[dst_type] == 4) { | movsx Rd(def_reg), Rw(op1_reg) } else { IR_ASSERT(ir_type_size[dst_type] == 8); IR_ASSERT(sizeof(void*) == 8); |.if X64 | movsx Rq(def_reg), Rw(op1_reg) |.endif } } else { IR_ASSERT(ir_type_size[src_type] == 4); IR_ASSERT(ir_type_size[dst_type] == 8); IR_ASSERT(sizeof(void*) == 8); |.if X64 | movsxd Rq(def_reg), Rd(op1_reg) |.endif } } else if (IR_IS_CONST_REF(insn->op1)) { IR_ASSERT(0); } else { ir_mem mem; if (ir_rule(ctx, insn->op1) & IR_FUSED) { mem = ir_fuse_load(ctx, def, insn->op1); } else { mem = ir_ref_spill_slot(ctx, insn->op1); } if (ir_type_size[src_type] == 1) { if (ir_type_size[dst_type] == 2) { | ASM_TXT_TMEM_OP movsx, Rw(def_reg), byte, mem } else if (ir_type_size[dst_type] == 4) { | ASM_TXT_TMEM_OP movsx, Rd(def_reg), byte, mem } else { IR_ASSERT(ir_type_size[dst_type] == 8); IR_ASSERT(sizeof(void*) == 8); |.if X64 | ASM_TXT_TMEM_OP movsx, Rq(def_reg), byte, mem |.endif } } else if (ir_type_size[src_type] == 2) { if (ir_type_size[dst_type] == 4) { | ASM_TXT_TMEM_OP movsx, Rd(def_reg), word, mem } else { IR_ASSERT(ir_type_size[dst_type] == 8); IR_ASSERT(sizeof(void*) == 8); |.if X64 | ASM_TXT_TMEM_OP movsx, Rq(def_reg), word, mem |.endif } } else { IR_ASSERT(ir_type_size[src_type] == 4); IR_ASSERT(ir_type_size[dst_type] == 8); IR_ASSERT(sizeof(void*) == 8); |.if X64 | ASM_TXT_TMEM_OP movsxd, Rq(def_reg), dword, mem |.endif } } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, dst_type, def, def_reg); } } static void ir_emit_zext(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_type dst_type = insn->type; ir_type src_type = ctx->ir_base[insn->op1].type; ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); ir_reg op1_reg = ctx->regs[def][1]; IR_ASSERT(IR_IS_TYPE_INT(src_type)); IR_ASSERT(IR_IS_TYPE_INT(dst_type)); IR_ASSERT(ir_type_size[dst_type] > ir_type_size[src_type]); IR_ASSERT(def_reg != IR_REG_NONE); if (op1_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op1_reg)) { op1_reg = IR_REG_NUM(op1_reg); ir_emit_load(ctx, src_type, op1_reg, insn->op1); } if (ir_type_size[src_type] == 1) { if (ir_type_size[dst_type] == 2) { | movzx Rw(def_reg), Rb(op1_reg) } else if (ir_type_size[dst_type] == 4) { | movzx Rd(def_reg), Rb(op1_reg) } else { IR_ASSERT(ir_type_size[dst_type] == 8); IR_ASSERT(sizeof(void*) == 8); |.if X64 | movzx Rq(def_reg), Rb(op1_reg) |.endif } } else if (ir_type_size[src_type] == 2) { if (ir_type_size[dst_type] == 4) { | movzx Rd(def_reg), Rw(op1_reg) } else { IR_ASSERT(ir_type_size[dst_type] == 8); IR_ASSERT(sizeof(void*) == 8); |.if X64 | movzx Rq(def_reg), Rw(op1_reg) |.endif } } else { IR_ASSERT(ir_type_size[src_type] == 4); IR_ASSERT(ir_type_size[dst_type] == 8); IR_ASSERT(sizeof(void*) == 8); |.if X64 /* Avoid zero extension to the same register. This may be not always safe ??? */ if (op1_reg != def_reg) { | mov Rd(def_reg), Rd(op1_reg) } |.endif } } else if (IR_IS_CONST_REF(insn->op1)) { IR_ASSERT(0); } else { ir_mem mem; if (ir_rule(ctx, insn->op1) & IR_FUSED) { mem = ir_fuse_load(ctx, def, insn->op1); } else { mem = ir_ref_spill_slot(ctx, insn->op1); } if (ir_type_size[src_type] == 1) { if (ir_type_size[dst_type] == 2) { | ASM_TXT_TMEM_OP movzx, Rw(def_reg), byte, mem } else if (ir_type_size[dst_type] == 4) { | ASM_TXT_TMEM_OP movzx, Rd(def_reg), byte, mem } else { IR_ASSERT(ir_type_size[dst_type] == 8); IR_ASSERT(sizeof(void*) == 8); |.if X64 | ASM_TXT_TMEM_OP movzx, Rq(def_reg), byte, mem |.endif } } else if (ir_type_size[src_type] == 2) { if (ir_type_size[dst_type] == 4) { | ASM_TXT_TMEM_OP movzx, Rd(def_reg), word, mem } else { IR_ASSERT(ir_type_size[dst_type] == 8); IR_ASSERT(sizeof(void*) == 8); |.if X64 | ASM_TXT_TMEM_OP movzx, Rq(def_reg), word, mem |.endif } } else { IR_ASSERT(ir_type_size[src_type] == 4); IR_ASSERT(ir_type_size[dst_type] == 8); |.if X64 | ASM_TXT_TMEM_OP mov, Rd(def_reg), dword, mem |.endif } } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, dst_type, def, def_reg); } } static void ir_emit_trunc(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_type dst_type = insn->type; ir_type src_type = ctx->ir_base[insn->op1].type; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); ir_reg op1_reg = ctx->regs[def][1]; IR_ASSERT(IR_IS_TYPE_INT(src_type)); IR_ASSERT(IR_IS_TYPE_INT(dst_type)); IR_ASSERT(ir_type_size[dst_type] < ir_type_size[src_type]); IR_ASSERT(def_reg != IR_REG_NONE); if (op1_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op1_reg)) { op1_reg = IR_REG_NUM(op1_reg); ir_emit_load(ctx, src_type, op1_reg, insn->op1); } if (op1_reg != def_reg) { ir_emit_mov(ctx, dst_type, def_reg, op1_reg); } } else { ir_emit_load_ex(ctx, dst_type, def_reg, insn->op1, def); } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, dst_type, def, def_reg); } } static void ir_emit_bitcast(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_type dst_type = insn->type; ir_type src_type = ctx->ir_base[insn->op1].type; ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); ir_reg op1_reg = ctx->regs[def][1]; IR_ASSERT(ir_type_size[dst_type] == ir_type_size[src_type]); IR_ASSERT(def_reg != IR_REG_NONE); if (IR_IS_TYPE_INT(src_type) && IR_IS_TYPE_INT(dst_type)) { if (op1_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op1_reg)) { op1_reg = IR_REG_NUM(op1_reg); ir_emit_load(ctx, src_type, op1_reg, insn->op1); } if (op1_reg != def_reg) { ir_emit_mov(ctx, dst_type, def_reg, op1_reg); } } else { ir_emit_load_ex(ctx, dst_type, def_reg, insn->op1, def); } } else if (IR_IS_TYPE_FP(src_type) && IR_IS_TYPE_FP(dst_type)) { if (op1_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op1_reg)) { op1_reg = IR_REG_NUM(op1_reg); ir_emit_load(ctx, src_type, op1_reg, insn->op1); } if (op1_reg != def_reg) { ir_emit_fp_mov(ctx, dst_type, def_reg, op1_reg); } } else { ir_emit_load_ex(ctx, dst_type, def_reg, insn->op1, def); } } else if (IR_IS_TYPE_FP(src_type)) { IR_ASSERT(IR_IS_TYPE_INT(dst_type)); if (op1_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op1_reg)) { op1_reg = IR_REG_NUM(op1_reg); ir_emit_load(ctx, src_type, op1_reg, insn->op1); } if (src_type == IR_DOUBLE) { IR_ASSERT(sizeof(void*) == 8); |.if X64 if (ctx->mflags & IR_X86_AVX) { | vmovd Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) } else { | movd Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) } |.endif } else { IR_ASSERT(src_type == IR_FLOAT); if (ctx->mflags & IR_X86_AVX) { | vmovd Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) } else { | movd Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) } } } else if (IR_IS_CONST_REF(insn->op1)) { ir_insn *_insn = &ctx->ir_base[insn->op1]; IR_ASSERT(!IR_IS_SYM_CONST(_insn->op)); if (src_type == IR_DOUBLE) { IR_ASSERT(sizeof(void*) == 8); |.if X64 | mov64 Rq(def_reg), _insn->val.i64 |.endif } else { IR_ASSERT(src_type == IR_FLOAT); | mov Rd(def_reg), _insn->val.i32 } } else { ir_mem mem; if (ir_rule(ctx, insn->op1) & IR_FUSED) { mem = ir_fuse_load(ctx, def, insn->op1); } else { mem = ir_ref_spill_slot(ctx, insn->op1); } if (src_type == IR_DOUBLE) { IR_ASSERT(sizeof(void*) == 8); |.if X64 | ASM_TXT_TMEM_OP mov, Rq(def_reg), qword, mem |.endif } else { IR_ASSERT(src_type == IR_FLOAT); | ASM_TXT_TMEM_OP mov, Rd(def_reg), dword, mem } } } else if (IR_IS_TYPE_FP(dst_type)) { IR_ASSERT(IR_IS_TYPE_INT(src_type)); if (op1_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op1_reg)) { op1_reg = IR_REG_NUM(op1_reg); ir_emit_load(ctx, src_type, op1_reg, insn->op1); } if (dst_type == IR_DOUBLE) { IR_ASSERT(sizeof(void*) == 8); |.if X64 if (ctx->mflags & IR_X86_AVX) { | vmovd xmm(def_reg-IR_REG_FP_FIRST), Rq(op1_reg) } else { | movd xmm(def_reg-IR_REG_FP_FIRST), Rq(op1_reg) } |.endif } else { IR_ASSERT(dst_type == IR_FLOAT); if (ctx->mflags & IR_X86_AVX) { | vmovd xmm(def_reg-IR_REG_FP_FIRST), Rd(op1_reg) } else { | movd xmm(def_reg-IR_REG_FP_FIRST), Rd(op1_reg) } } } else if (IR_IS_CONST_REF(insn->op1)) { int label = ir_const_label(ctx, insn->op1); | ASM_FP_REG_TXT_OP movs, dst_type, def_reg, [=>label] } else { ir_mem mem; if (ir_rule(ctx, insn->op1) & IR_FUSED) { mem = ir_fuse_load(ctx, def, insn->op1); } else { mem = ir_ref_spill_slot(ctx, insn->op1); } | ASM_FP_REG_MEM_OP movs, dst_type, def_reg, mem } } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, dst_type, def, def_reg); } } static void ir_emit_int2fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_type dst_type = insn->type; ir_type src_type = ctx->ir_base[insn->op1].type; ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); ir_reg op1_reg = ctx->regs[def][1]; IR_ASSERT(IR_IS_TYPE_INT(src_type)); IR_ASSERT(IR_IS_TYPE_FP(dst_type)); IR_ASSERT(def_reg != IR_REG_NONE); if (op1_reg != IR_REG_NONE) { bool src64 = 0; if (IR_REG_SPILLED(op1_reg)) { op1_reg = IR_REG_NUM(op1_reg); ir_emit_load(ctx, src_type, op1_reg, insn->op1); } if (IR_IS_TYPE_SIGNED(src_type)) { if (ir_type_size[src_type] < 4) { |.if X64 || if (ir_type_size[src_type] == 1) { | movsx Rq(op1_reg), Rb(op1_reg) || } else { | movsx Rq(op1_reg), Rw(op1_reg) || } || src64 = 1; |.else || if (ir_type_size[src_type] == 1) { | movsx Rd(op1_reg), Rb(op1_reg) || } else { | movsx Rd(op1_reg), Rw(op1_reg) || } |.endif } else if (ir_type_size[src_type] > 4) { src64 = 1; } } else { if (ir_type_size[src_type] < 8) { |.if X64 || if (ir_type_size[src_type] == 1) { | movzx Rq(op1_reg), Rb(op1_reg) || } else if (ir_type_size[src_type] == 2) { | movzx Rq(op1_reg), Rw(op1_reg) || } || src64 = 1; |.else || if (ir_type_size[src_type] == 1) { | movzx Rd(op1_reg), Rb(op1_reg) || } else if (ir_type_size[src_type] == 2) { | movzx Rd(op1_reg), Rw(op1_reg) || } |.endif } else { // TODO: uint64_t -> double src64 = 1; } } if (!src64) { if (dst_type == IR_DOUBLE) { if (ctx->mflags & IR_X86_AVX) { | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) | vcvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), Rd(op1_reg) } else { | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) | cvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), Rd(op1_reg) } } else { IR_ASSERT(dst_type == IR_FLOAT); if (ctx->mflags & IR_X86_AVX) { | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) | vcvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), Rd(op1_reg) } else { | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) | cvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), Rd(op1_reg) } } } else { IR_ASSERT(sizeof(void*) == 8); |.if X64 if (dst_type == IR_DOUBLE) { if (ctx->mflags & IR_X86_AVX) { | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) | vcvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), Rq(op1_reg) } else { | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) | cvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), Rq(op1_reg) } } else { IR_ASSERT(dst_type == IR_FLOAT); if (ctx->mflags & IR_X86_AVX) { | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) | vcvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), Rq(op1_reg) } else { | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) | cvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), Rq(op1_reg) } } |.endif } } else { ir_mem mem; bool src64 = ir_type_size[src_type] == 8; if (ir_rule(ctx, insn->op1) & IR_FUSED) { mem = ir_fuse_load(ctx, def, insn->op1); } else { mem = ir_ref_spill_slot(ctx, insn->op1); } if (!src64) { if (dst_type == IR_DOUBLE) { if (ctx->mflags & IR_X86_AVX) { | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) | ASM_TXT_TXT_TMEM_OP vcvtsi2sd, xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), dword, mem } else { | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) | ASM_TXT_TMEM_OP cvtsi2sd, xmm(def_reg-IR_REG_FP_FIRST), dword, mem } } else { IR_ASSERT(dst_type == IR_FLOAT); if (ctx->mflags & IR_X86_AVX) { | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) | ASM_TXT_TXT_TMEM_OP vcvtsi2ss, xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), dword, mem } else { | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) | ASM_TXT_TMEM_OP cvtsi2ss, xmm(def_reg-IR_REG_FP_FIRST), dword, mem } } } else { IR_ASSERT(sizeof(void*) == 8); |.if X64 if (dst_type == IR_DOUBLE) { if (ctx->mflags & IR_X86_AVX) { | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) | ASM_TXT_TXT_TMEM_OP vcvtsi2sd, xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), qword, mem } else { | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) | ASM_TXT_TMEM_OP cvtsi2sd, xmm(def_reg-IR_REG_FP_FIRST), qword, mem } } else { IR_ASSERT(dst_type == IR_FLOAT); if (ctx->mflags & IR_X86_AVX) { | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) | ASM_TXT_TXT_TMEM_OP vcvtsi2ss, xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), qword, mem } else { | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) | ASM_TXT_TMEM_OP cvtsi2ss, xmm(def_reg-IR_REG_FP_FIRST), qword, mem } } |.endif } } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, dst_type, def, def_reg); } } static void ir_emit_fp2int(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_type dst_type = insn->type; ir_type src_type = ctx->ir_base[insn->op1].type; ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); ir_reg op1_reg = ctx->regs[def][1]; bool dst64 = 0; IR_ASSERT(IR_IS_TYPE_FP(src_type)); IR_ASSERT(IR_IS_TYPE_INT(dst_type)); IR_ASSERT(def_reg != IR_REG_NONE); if (IR_IS_TYPE_SIGNED(dst_type) ? ir_type_size[dst_type] == 8 : ir_type_size[dst_type] >= 4) { // TODO: we might need to perform truncation from 32/64 bit integer dst64 = 1; } if (op1_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op1_reg)) { op1_reg = IR_REG_NUM(op1_reg); ir_emit_load(ctx, src_type, op1_reg, insn->op1); } if (!dst64) { if (src_type == IR_DOUBLE) { if (ctx->mflags & IR_X86_AVX) { | vcvttsd2si Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) } else { | cvttsd2si Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) } } else { IR_ASSERT(src_type == IR_FLOAT); if (ctx->mflags & IR_X86_AVX) { | vcvttss2si Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) } else { | cvttss2si Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) } } } else { IR_ASSERT(sizeof(void*) == 8); |.if X64 if (src_type == IR_DOUBLE) { if (ctx->mflags & IR_X86_AVX) { | vcvttsd2si Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) } else { | cvttsd2si Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) } } else { IR_ASSERT(src_type == IR_FLOAT); if (ctx->mflags & IR_X86_AVX) { | vcvttss2si Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) } else { | cvttss2si Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) } } |.endif } } else if (IR_IS_CONST_REF(insn->op1)) { int label = ir_const_label(ctx, insn->op1); if (!dst64) { if (src_type == IR_DOUBLE) { if (ctx->mflags & IR_X86_AVX) { | vcvttsd2si Rd(def_reg), qword [=>label] } else { | cvttsd2si Rd(def_reg), qword [=>label] } } else { IR_ASSERT(src_type == IR_FLOAT); if (ctx->mflags & IR_X86_AVX) { | vcvttss2si Rd(def_reg), dword [=>label] } else { | cvttss2si Rd(def_reg), dword [=>label] } } } else { IR_ASSERT(sizeof(void*) == 8); |.if X64 if (src_type == IR_DOUBLE) { if (ctx->mflags & IR_X86_AVX) { | vcvttsd2si Rq(def_reg), qword [=>label] } else { | cvttsd2si Rq(def_reg), qword [=>label] } } else { IR_ASSERT(src_type == IR_FLOAT); if (ctx->mflags & IR_X86_AVX) { | vcvttss2si Rq(def_reg), dword [=>label] } else { | cvttss2si Rq(def_reg), dword [=>label] } } |.endif } } else { ir_mem mem; if (ir_rule(ctx, insn->op1) & IR_FUSED) { mem = ir_fuse_load(ctx, def, insn->op1); } else { mem = ir_ref_spill_slot(ctx, insn->op1); } if (!dst64) { if (src_type == IR_DOUBLE) { if (ctx->mflags & IR_X86_AVX) { | ASM_TXT_TMEM_OP vcvttsd2si, Rd(def_reg), qword, mem } else { | ASM_TXT_TMEM_OP cvttsd2si, Rd(def_reg), qword, mem } } else { IR_ASSERT(src_type == IR_FLOAT); if (ctx->mflags & IR_X86_AVX) { | ASM_TXT_TMEM_OP vcvttss2si, Rd(def_reg), dword, mem } else { | ASM_TXT_TMEM_OP cvttss2si, Rd(def_reg), dword, mem } } } else { IR_ASSERT(sizeof(void*) == 8); |.if X64 if (src_type == IR_DOUBLE) { if (ctx->mflags & IR_X86_AVX) { | ASM_TXT_TMEM_OP vcvttsd2si, Rq(def_reg), qword, mem } else { | ASM_TXT_TMEM_OP cvttsd2si, Rq(def_reg), qword, mem } } else { IR_ASSERT(src_type == IR_FLOAT); if (ctx->mflags & IR_X86_AVX) { | ASM_TXT_TMEM_OP vcvttss2si, Rq(def_reg), dword, mem } else { | ASM_TXT_TMEM_OP cvttss2si, Rq(def_reg), dword, mem } } |.endif } } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, dst_type, def, def_reg); } } static void ir_emit_fp2fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_type dst_type = insn->type; ir_type src_type = ctx->ir_base[insn->op1].type; ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); ir_reg op1_reg = ctx->regs[def][1]; IR_ASSERT(IR_IS_TYPE_FP(src_type)); IR_ASSERT(IR_IS_TYPE_FP(dst_type)); IR_ASSERT(def_reg != IR_REG_NONE); if (op1_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op1_reg)) { op1_reg = IR_REG_NUM(op1_reg); ir_emit_load(ctx, src_type, op1_reg, insn->op1); } if (src_type == dst_type) { if (op1_reg != def_reg) { ir_emit_fp_mov(ctx, dst_type, def_reg, op1_reg); } } else if (src_type == IR_DOUBLE) { if (ctx->mflags & IR_X86_AVX) { | vcvtsd2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(op1_reg-IR_REG_FP_FIRST) } else { | cvtsd2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(op1_reg-IR_REG_FP_FIRST) } } else { IR_ASSERT(src_type == IR_FLOAT); if (ctx->mflags & IR_X86_AVX) { | vcvtss2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(op1_reg-IR_REG_FP_FIRST) } else { | cvtss2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(op1_reg-IR_REG_FP_FIRST) } } } else if (IR_IS_CONST_REF(insn->op1)) { int label = ir_const_label(ctx, insn->op1); if (src_type == IR_DOUBLE) { if (ctx->mflags & IR_X86_AVX) { | vcvtsd2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), qword [=>label] } else { | cvtsd2ss xmm(def_reg-IR_REG_FP_FIRST), qword [=>label] } } else { IR_ASSERT(src_type == IR_FLOAT); if (ctx->mflags & IR_X86_AVX) { | vcvtss2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), dword [=>label] } else { | cvtss2sd xmm(def_reg-IR_REG_FP_FIRST), dword [=>label] } } } else { ir_mem mem; if (ir_rule(ctx, insn->op1) & IR_FUSED) { mem = ir_fuse_load(ctx, def, insn->op1); } else { mem = ir_ref_spill_slot(ctx, insn->op1); } if (src_type == IR_DOUBLE) { if (ctx->mflags & IR_X86_AVX) { | ASM_TXT_TXT_TMEM_OP vcvtsd2ss, xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), qword, mem } else { | ASM_TXT_TMEM_OP cvtsd2ss, xmm(def_reg-IR_REG_FP_FIRST), qword, mem } } else { IR_ASSERT(src_type == IR_FLOAT); if (ctx->mflags & IR_X86_AVX) { | ASM_TXT_TXT_TMEM_OP vcvtss2sd, xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), dword, mem } else { | ASM_TXT_TMEM_OP cvtss2sd, xmm(def_reg-IR_REG_FP_FIRST), dword, mem } } } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, dst_type, def, def_reg); } } static void ir_emit_copy_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_ref type = insn->type; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); ir_reg op1_reg = ctx->regs[def][1]; IR_ASSERT(def_reg != IR_REG_NONE || op1_reg != IR_REG_NONE); if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { op1_reg = IR_REG_NUM(op1_reg); ir_emit_load(ctx, type, op1_reg, insn->op1); } if (def_reg == op1_reg) { /* same reg */ } else if (def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE) { ir_emit_mov(ctx, type, def_reg, op1_reg); } else if (def_reg != IR_REG_NONE) { ir_emit_load(ctx, type, def_reg, insn->op1); } else if (op1_reg != IR_REG_NONE) { ir_emit_store(ctx, type, def, op1_reg); } else { IR_ASSERT(0); } if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, type, def, def_reg); } } static void ir_emit_copy_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_type type = insn->type; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); ir_reg op1_reg = ctx->regs[def][1]; IR_ASSERT(def_reg != IR_REG_NONE || op1_reg != IR_REG_NONE); if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { op1_reg = IR_REG_NUM(op1_reg); ir_emit_load(ctx, type, op1_reg, insn->op1); } if (def_reg == op1_reg) { /* same reg */ } else if (def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE) { ir_emit_fp_mov(ctx, type, def_reg, op1_reg); } else if (def_reg != IR_REG_NONE) { ir_emit_load(ctx, type, def_reg, insn->op1); } else if (op1_reg != IR_REG_NONE) { ir_emit_store(ctx, type, def, op1_reg); } else { IR_ASSERT(0); } if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, type, def, def_reg); } } static void ir_emit_vaddr(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_ref type = insn->type; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); ir_mem mem; int32_t offset; ir_reg fp; IR_ASSERT(def_reg != IR_REG_NONE); mem = ir_var_spill_slot(ctx, insn->op1); fp = IR_MEM_BASE(mem); offset = IR_MEM_OFFSET(mem); | lea Ra(def_reg), aword [Ra(fp)+offset] if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, type, def, def_reg); } } static void ir_emit_vload(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_insn *var_insn = &ctx->ir_base[insn->op2]; ir_ref type = insn->type; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); ir_reg fp; ir_mem mem; IR_ASSERT(var_insn->op == IR_VAR); fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; mem = IR_MEM_BO(fp, IR_SPILL_POS_TO_OFFSET(var_insn->op3)); if (def_reg == IR_REG_NONE && ir_is_same_mem_var(ctx, def, var_insn->op3)) { return; // fake load } IR_ASSERT(def_reg != IR_REG_NONE); ir_emit_load_mem(ctx, type, def_reg, mem); if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, type, def, def_reg); } } static void ir_emit_vstore_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn) { ir_insn *var_insn = &ctx->ir_base[insn->op2]; ir_insn *val_insn = &ctx->ir_base[insn->op3]; ir_ref type = val_insn->type; ir_reg op3_reg = ctx->regs[ref][3]; ir_reg fp; ir_mem mem; IR_ASSERT(var_insn->op == IR_VAR); fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; mem = IR_MEM_BO(fp, IR_SPILL_POS_TO_OFFSET(var_insn->op3)); if ((op3_reg == IR_REG_NONE || IR_REG_SPILLED(op3_reg)) && !IR_IS_CONST_REF(insn->op3) && ir_rule(ctx, insn->op3) != IR_STATIC_ALLOCA && ir_is_same_mem_var(ctx, insn->op3, var_insn->op3)) { return; // fake store } if (IR_IS_CONST_REF(insn->op3)) { ir_emit_store_mem_int_const(ctx, type, mem, insn->op3, op3_reg, 0); } else { IR_ASSERT(op3_reg != IR_REG_NONE); if (IR_REG_SPILLED(op3_reg)) { op3_reg = IR_REG_NUM(op3_reg); ir_emit_load(ctx, type, op3_reg, insn->op3); } ir_emit_store_mem_int(ctx, type, mem, op3_reg); } } static void ir_emit_vstore_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn) { ir_insn *var_insn = &ctx->ir_base[insn->op2]; ir_ref type = ctx->ir_base[insn->op3].type; ir_reg op3_reg = ctx->regs[ref][3]; ir_reg fp; ir_mem mem; IR_ASSERT(var_insn->op == IR_VAR); fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; mem = IR_MEM_BO(fp, IR_SPILL_POS_TO_OFFSET(var_insn->op3)); if ((op3_reg == IR_REG_NONE || IR_REG_SPILLED(op3_reg)) && !IR_IS_CONST_REF(insn->op3) && ir_rule(ctx, insn->op3) != IR_STATIC_ALLOCA && ir_is_same_mem_var(ctx, insn->op3, var_insn->op3)) { return; // fake store } if (IR_IS_CONST_REF(insn->op3)) { ir_emit_store_mem_fp_const(ctx, type, mem, insn->op3, IR_REG_NONE, op3_reg); } else { IR_ASSERT(op3_reg != IR_REG_NONE); if (IR_REG_SPILLED(op3_reg)) { op3_reg = IR_REG_NUM(op3_reg); ir_emit_load(ctx, type, op3_reg, insn->op3); } ir_emit_store_mem_fp(ctx, type, mem, op3_reg); } } static void ir_emit_load_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_ref type = insn->type; ir_reg op2_reg = ctx->regs[def][2]; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); ir_mem mem; if (ctx->use_lists[def].count == 1) { /* dead load */ return; } IR_ASSERT(def_reg != IR_REG_NONE); if (op2_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); } mem = IR_MEM_B(op2_reg); } else if (IR_IS_CONST_REF(insn->op2)) { mem = ir_fuse_addr_const(ctx, insn->op2); } else { IR_ASSERT(ir_rule(ctx, insn->op2) & IR_FUSED); mem = ir_fuse_addr(ctx, def, insn->op2); if (IR_REG_SPILLED(ctx->regs[def][0]) && ir_is_same_spill_slot(ctx, def, mem)) { if (!ir_may_avoid_spill_load(ctx, def, def)) { ir_emit_load_mem_int(ctx, type, def_reg, mem); } /* avoid load to the same location (valid only when register is not reused) */ return; } } ir_emit_load_mem_int(ctx, type, def_reg, mem); if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, type, def, def_reg); } } static void ir_emit_load_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_ref type = insn->type; ir_reg op2_reg = ctx->regs[def][2]; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); ir_mem mem; if (ctx->use_lists[def].count == 1) { /* dead load */ return; } IR_ASSERT(def_reg != IR_REG_NONE); if (op2_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); } mem = IR_MEM_B(op2_reg); } else if (IR_IS_CONST_REF(insn->op2)) { mem = ir_fuse_addr_const(ctx, insn->op2); } else { IR_ASSERT(ir_rule(ctx, insn->op2) & IR_FUSED); mem = ir_fuse_addr(ctx, def, insn->op2); if (IR_REG_SPILLED(ctx->regs[def][0]) && ir_is_same_spill_slot(ctx, def, mem)) { if (!ir_may_avoid_spill_load(ctx, def, def)) { ir_emit_load_mem_fp(ctx, type, def_reg, mem); } /* avoid load to the same location (valid only when register is not reused) */ return; } } ir_emit_load_mem_fp(ctx, type, def_reg, mem); if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, type, def, def_reg); } } static void ir_emit_store_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn) { ir_insn *val_insn = &ctx->ir_base[insn->op3]; ir_ref type = val_insn->type; ir_reg op2_reg = ctx->regs[ref][2]; ir_reg op3_reg = ctx->regs[ref][3]; ir_mem mem; if (op2_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); } mem = IR_MEM_B(op2_reg); } else if (IR_IS_CONST_REF(insn->op2)) { mem = ir_fuse_addr_const(ctx, insn->op2); } else { IR_ASSERT(ir_rule(ctx, insn->op2) & IR_FUSED); mem = ir_fuse_addr(ctx, ref, insn->op2); if (!IR_IS_CONST_REF(insn->op3) && IR_REG_SPILLED(op3_reg) && ir_rule(ctx, insn->op3) != IR_STATIC_ALLOCA && ir_is_same_spill_slot(ctx, insn->op3, mem)) { if (!ir_may_avoid_spill_load(ctx, insn->op3, ref)) { op3_reg = IR_REG_NUM(op3_reg); ir_emit_load(ctx, type, op3_reg, insn->op3); } /* avoid store to the same location */ return; } } if (IR_IS_CONST_REF(insn->op3)) { ir_emit_store_mem_int_const(ctx, type, mem, insn->op3, op3_reg, 0); } else { IR_ASSERT(op3_reg != IR_REG_NONE); if (IR_REG_SPILLED(op3_reg)) { op3_reg = IR_REG_NUM(op3_reg); ir_emit_load(ctx, type, op3_reg, insn->op3); } ir_emit_store_mem_int(ctx, type, mem, op3_reg); } } static void ir_emit_cmp_and_store_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn) { ir_reg addr_reg = ctx->regs[ref][2]; ir_mem mem; ir_insn *cmp_insn = &ctx->ir_base[insn->op3]; ir_op op = cmp_insn->op; ir_type type = ctx->ir_base[cmp_insn->op1].type; ir_ref op1 = cmp_insn->op1; ir_ref op2 = cmp_insn->op2; ir_reg op1_reg = ctx->regs[insn->op3][1]; ir_reg op2_reg = ctx->regs[insn->op3][2]; if (addr_reg != IR_REG_NONE) { if (IR_REG_SPILLED(addr_reg)) { addr_reg = IR_REG_NUM(addr_reg); IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); ir_emit_load(ctx, IR_ADDR, addr_reg, insn->op2); } mem = IR_MEM_B(addr_reg); } else if (IR_IS_CONST_REF(insn->op2)) { mem = ir_fuse_addr_const(ctx, insn->op2); } else { IR_ASSERT(ir_rule(ctx, insn->op2) & IR_FUSED); mem = ir_fuse_addr(ctx, ref, insn->op2); } if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { op1_reg = IR_REG_NUM(op1_reg); ir_emit_load(ctx, type, op1_reg, op1); } if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); if (op1 != op2) { ir_emit_load(ctx, type, op2_reg, op2); } } ir_emit_cmp_int_common(ctx, type, ref, cmp_insn, op1_reg, op1, op2_reg, op2); _ir_emit_setcc_int_mem(ctx, op, mem); } static void ir_emit_store_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn) { ir_ref type = ctx->ir_base[insn->op3].type; ir_reg op2_reg = ctx->regs[ref][2]; ir_reg op3_reg = ctx->regs[ref][3]; ir_mem mem; IR_ASSERT(op3_reg != IR_REG_NONE); if (op2_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); } mem = IR_MEM_B(op2_reg); } else if (IR_IS_CONST_REF(insn->op2)) { mem = ir_fuse_addr_const(ctx, insn->op2); } else { IR_ASSERT(ir_rule(ctx, insn->op2) & IR_FUSED); mem = ir_fuse_addr(ctx, ref, insn->op2); if (!IR_IS_CONST_REF(insn->op3) && IR_REG_SPILLED(op3_reg) && ir_rule(ctx, insn->op3) != IR_STATIC_ALLOCA && ir_is_same_spill_slot(ctx, insn->op3, mem)) { if (!ir_may_avoid_spill_load(ctx, insn->op3, ref)) { op3_reg = IR_REG_NUM(op3_reg); ir_emit_load(ctx, type, op3_reg, insn->op3); } /* avoid store to the same location */ return; } } if (IR_IS_CONST_REF(insn->op3)) { ir_emit_store_mem_fp_const(ctx, type, mem, insn->op3, IR_REG_NONE, op3_reg); } else { IR_ASSERT(op3_reg != IR_REG_NONE); if (IR_REG_SPILLED(op3_reg)) { op3_reg = IR_REG_NUM(op3_reg); ir_emit_load(ctx, type, op3_reg, insn->op3); } ir_emit_store_mem_fp(ctx, type, mem, op3_reg); } } static void ir_emit_rload(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_reg src_reg = insn->op2; ir_type type = insn->type; if (IR_REGSET_IN(IR_REGSET_UNION((ir_regset)ctx->fixed_regset, IR_REGSET_FIXED), src_reg)) { if (ctx->vregs[def] && ctx->live_intervals[ctx->vregs[def]] && ctx->live_intervals[ctx->vregs[def]]->stack_spill_pos != -1) { ir_emit_store(ctx, type, def, src_reg); } } else { ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); if (def_reg == IR_REG_NONE) { /* op3 is used as a flag that the value is already stored in memory. * If op3 is set we don't have to store the value once again (in case of spilling) */ if (!insn->op3 || !ir_is_same_spill_slot(ctx, def, IR_MEM_BO(ctx->spill_base, insn->op3))) { ir_emit_store(ctx, type, def, src_reg); } } else { if (src_reg != def_reg) { if (IR_IS_TYPE_INT(type)) { ir_emit_mov(ctx, type, def_reg, src_reg); } else { IR_ASSERT(IR_IS_TYPE_FP(type)); ir_emit_fp_mov(ctx, type, def_reg, src_reg); } } if (IR_REG_SPILLED(ctx->regs[def][0]) && (!insn->op3 || !ir_is_same_spill_slot(ctx, def, IR_MEM_BO(ctx->spill_base, insn->op3)))) { ir_emit_store(ctx, type, def, def_reg); } } } } static void ir_emit_rstore(ir_ctx *ctx, ir_ref ref, ir_insn *insn) { ir_ref type = ctx->ir_base[insn->op2].type; ir_reg op2_reg = ctx->regs[ref][2]; ir_reg dst_reg = insn->op3; if (op2_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); ir_emit_load(ctx, type, op2_reg, insn->op2); } if (op2_reg != dst_reg) { if (IR_IS_TYPE_INT(type)) { ir_emit_mov(ctx, type, dst_reg, op2_reg); } else { IR_ASSERT(IR_IS_TYPE_FP(type)); ir_emit_fp_mov(ctx, type, dst_reg, op2_reg); } } } else { ir_emit_load_ex(ctx, type, dst_reg, insn->op2, ref); } } static void ir_emit_alloca(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); if (ctx->use_lists[def].count == 1) { /* dead alloca */ return; } if (IR_IS_CONST_REF(insn->op2)) { ir_insn *val = &ctx->ir_base[insn->op2]; int32_t size = val->val.i32; IR_ASSERT(IR_IS_TYPE_INT(val->type)); IR_ASSERT(!IR_IS_SYM_CONST(val->op)); IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 >= 0); IR_ASSERT(IR_IS_SIGNED_32BIT(val->val.i64)); /* Stack must be 16 byte aligned */ size = IR_ALIGNED_SIZE(size, 16); | ASM_REG_IMM_OP sub, IR_ADDR, IR_REG_RSP, size if (!(ctx->flags & IR_USE_FRAME_POINTER)) { ctx->call_stack_size += size; } } else { int32_t alignment = 16; ir_reg op2_reg = ctx->regs[def][2]; ir_type type = ctx->ir_base[insn->op2].type; IR_ASSERT(ctx->flags & IR_FUNCTION); IR_ASSERT(ctx->flags & IR_USE_FRAME_POINTER); IR_ASSERT(def_reg != IR_REG_NONE); if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); ir_emit_load(ctx, type, op2_reg, insn->op2); } if (def_reg != op2_reg) { if (op2_reg != IR_REG_NONE) { ir_emit_mov(ctx, type, def_reg, op2_reg); } else { ir_emit_load(ctx, type, def_reg, insn->op2); } } | ASM_REG_IMM_OP add, IR_ADDR, def_reg, (alignment-1) | ASM_REG_IMM_OP and, IR_ADDR, def_reg, ~(alignment-1) | ASM_REG_REG_OP sub, IR_ADDR, IR_REG_RSP, def_reg } if (def_reg != IR_REG_NONE) { | mov Ra(def_reg), Ra(IR_REG_RSP) if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, insn->type, def, def_reg); } } else { ir_emit_store(ctx, IR_ADDR, def, IR_REG_STACK_POINTER); } } static void ir_emit_afree(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; if (IR_IS_CONST_REF(insn->op2)) { ir_insn *val = &ctx->ir_base[insn->op2]; int32_t size = val->val.i32; IR_ASSERT(IR_IS_TYPE_INT(val->type)); IR_ASSERT(!IR_IS_SYM_CONST(val->op)); IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 > 0); IR_ASSERT(IR_IS_SIGNED_32BIT(val->val.i64)); /* Stack must be 16 byte aligned */ size = IR_ALIGNED_SIZE(size, 16); | ASM_REG_IMM_OP add, IR_ADDR, IR_REG_RSP, size if (!(ctx->flags & IR_USE_FRAME_POINTER)) { ctx->call_stack_size -= size; } } else { // int32_t alignment = 16; ir_reg op2_reg = ctx->regs[def][2]; ir_type type = ctx->ir_base[insn->op2].type; IR_ASSERT(ctx->flags & IR_FUNCTION); if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); ir_emit_load(ctx, type, op2_reg, insn->op2); } // TODO: alignment ??? | ASM_REG_REG_OP add, IR_ADDR, IR_REG_RSP, op2_reg } } static void ir_emit_block_begin(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); | mov Ra(def_reg), Ra(IR_REG_RSP) if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, IR_ADDR, def, def_reg); } } static void ir_emit_block_end(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_reg op2_reg = ctx->regs[def][2]; IR_ASSERT(op2_reg != IR_REG_NONE); if (IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); } | mov Ra(IR_REG_RSP), Ra(op2_reg) } static void ir_emit_frame_addr(ir_ctx *ctx, ir_ref def) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); if (ctx->flags & IR_USE_FRAME_POINTER) { | mov Ra(def_reg), Ra(IR_REG_RBP) } else { | lea Ra(def_reg), [Ra(IR_REG_RSP)+(ctx->stack_frame_size + ctx->call_stack_size)] } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, IR_ADDR, def, def_reg); } } static void ir_emit_va_start(ir_ctx *ctx, ir_ref def, ir_insn *insn) { #if defined(_WIN64) || defined(IR_TARGET_X86) ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_reg fp; int arg_area_offset; ir_reg op2_reg = ctx->regs[def][2]; ir_reg tmp_reg = ctx->regs[def][3]; int32_t offset; IR_ASSERT(tmp_reg != IR_REG_NONE); if (op2_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); } offset = 0; } else { IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); } if (ctx->flags & IR_USE_FRAME_POINTER) { fp = IR_REG_FRAME_POINTER; arg_area_offset = sizeof(void*) * 2 + ctx->param_stack_size; } else { fp = IR_REG_STACK_POINTER; arg_area_offset = ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*) + ctx->param_stack_size; } | lea Ra(tmp_reg), aword [Ra(fp)+arg_area_offset] | mov aword [Ra(op2_reg)+offset], Ra(tmp_reg) #elif defined(IR_TARGET_X64) |.if X64 ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_reg fp; int reg_save_area_offset; int overflow_arg_area_offset; ir_reg op2_reg = ctx->regs[def][2]; ir_reg tmp_reg = ctx->regs[def][3]; bool have_reg_save_area = 0; int32_t offset; IR_ASSERT(tmp_reg != IR_REG_NONE); if (op2_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); } offset = 0; } else { IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); } if (ctx->flags & IR_USE_FRAME_POINTER) { fp = IR_REG_FRAME_POINTER; reg_save_area_offset = -(ctx->stack_frame_size - ctx->stack_frame_alignment - ctx->locals_area_size); overflow_arg_area_offset = sizeof(void*) * 2 + ctx->param_stack_size; } else { fp = IR_REG_STACK_POINTER; reg_save_area_offset = ctx->locals_area_size + ctx->call_stack_size; overflow_arg_area_offset = ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*) + ctx->param_stack_size; } if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) { | lea Ra(tmp_reg), aword [Ra(fp)+reg_save_area_offset] have_reg_save_area = 1; /* Set va_list.gp_offset */ | mov dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, gp_offset))], sizeof(void*) * ctx->gp_reg_params } else { reg_save_area_offset -= sizeof(void*) * IR_REG_INT_ARGS; /* Set va_list.gp_offset */ | mov dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, gp_offset))], sizeof(void*) * IR_REG_INT_ARGS } if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) { if (!have_reg_save_area) { | lea Ra(tmp_reg), aword [Ra(fp)+reg_save_area_offset] have_reg_save_area = 1; } /* Set va_list.fp_offset */ | mov dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, fp_offset))], sizeof(void*) * IR_REG_INT_ARGS + 16 * ctx->fp_reg_params } else { /* Set va_list.fp_offset */ | mov dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, fp_offset))], sizeof(void*) * IR_REG_INT_ARGS + 16 * IR_REG_FP_ARGS } if (have_reg_save_area) { /* Set va_list.reg_save_area */ | mov qword [Ra(op2_reg)+(offset+offsetof(ir_va_list, reg_save_area))], Ra(tmp_reg) } | lea Ra(tmp_reg), aword [Ra(fp)+overflow_arg_area_offset] /* Set va_list.overflow_arg_area */ | mov qword [Ra(op2_reg)+(offset+offsetof(ir_va_list, overflow_arg_area))], Ra(tmp_reg) |.endif #else IR_ASSERT(0 && "NIY va_start"); #endif } static void ir_emit_va_copy(ir_ctx *ctx, ir_ref def, ir_insn *insn) { #if defined(_WIN64) || defined(IR_TARGET_X86) ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_reg tmp_reg = ctx->regs[def][1]; ir_reg op2_reg = ctx->regs[def][2]; ir_reg op3_reg = ctx->regs[def][3]; int32_t op2_offset, op3_offset; IR_ASSERT(tmp_reg != IR_REG_NONE); if (op2_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); } op2_offset = 0; } else { IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; op2_offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); } if (op3_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op3_reg)) { op3_reg = IR_REG_NUM(op3_reg); ir_emit_load(ctx, IR_ADDR, op3_reg, insn->op3); } op3_offset = 0; } else { IR_ASSERT(ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA); op3_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; op3_offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op3].op3); } | mov Ra(tmp_reg), aword [Ra(op3_reg)+op3_offset] | mov aword [Ra(op2_reg)+op2_offset], Ra(tmp_reg) #elif defined(IR_TARGET_X64) |.if X64 ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_reg tmp_reg = ctx->regs[def][1]; ir_reg op2_reg = ctx->regs[def][2]; ir_reg op3_reg = ctx->regs[def][3]; int32_t op2_offset, op3_offset; IR_ASSERT(tmp_reg != IR_REG_NONE); if (op2_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); } op2_offset = 0; } else { IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; op2_offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); } if (op3_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op3_reg)) { op3_reg = IR_REG_NUM(op3_reg); ir_emit_load(ctx, IR_ADDR, op3_reg, insn->op3); } op3_offset = 0; } else { IR_ASSERT(ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA); op3_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; op3_offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op3].op3); } | mov Rd(tmp_reg), dword [Ra(op3_reg)+(op3_offset+offsetof(ir_va_list, gp_offset))] | mov dword [Ra(op2_reg)+(op2_offset+offsetof(ir_va_list, gp_offset))], Rd(tmp_reg) | mov Rd(tmp_reg), dword [Ra(op3_reg)+(op3_offset+offsetof(ir_va_list, fp_offset))] | mov aword [Ra(op2_reg)+(op2_offset+offsetof(ir_va_list, fp_offset))], Ra(tmp_reg) | mov Ra(tmp_reg), aword [Ra(op3_reg)+(op3_offset+offsetof(ir_va_list, overflow_arg_area))] | mov aword [Ra(op2_reg)+(op2_offset+offsetof(ir_va_list, overflow_arg_area))], Ra(tmp_reg) | mov Ra(tmp_reg), aword [Ra(op3_reg)+(op3_offset+offsetof(ir_va_list, reg_save_area))] | mov aword [Ra(op2_reg)+(op2_offset+offsetof(ir_va_list, reg_save_area))], Ra(tmp_reg) |.endif #else IR_ASSERT(0 && "NIY va_copy"); #endif } static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn) { #if defined(_WIN64) || defined(IR_TARGET_X86) ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_type type = insn->type; ir_reg def_reg = ctx->regs[def][0]; ir_reg op2_reg = ctx->regs[def][2]; ir_reg tmp_reg = ctx->regs[def][3]; int32_t offset; IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); if (op2_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); } offset = 0; } else { IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); } | mov Ra(tmp_reg), aword [Ra(op2_reg)+offset] ir_emit_load_mem(ctx, type, def_reg, IR_MEM_B(tmp_reg)); | add Ra(tmp_reg), IR_MAX(ir_type_size[type], sizeof(void*)) | mov aword [Ra(op2_reg)+offset], Ra(tmp_reg) if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, type, def, def_reg); } #elif defined(IR_TARGET_X64) |.if X64 ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_type type = insn->type; ir_reg def_reg = ctx->regs[def][0]; ir_reg op2_reg = ctx->regs[def][2]; ir_reg tmp_reg = ctx->regs[def][3]; int32_t offset; IR_ASSERT(def_reg != IR_REG_NONE&& tmp_reg != IR_REG_NONE); if (op2_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); } offset = 0; } else { IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); } if (IR_IS_TYPE_INT(type)) { | mov Rd(tmp_reg), dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, gp_offset))] | cmp Rd(tmp_reg), sizeof(void*)*IR_REG_INT_ARGS | jge >1 | add Rd(tmp_reg), sizeof(void*) | mov dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, gp_offset))], Rd(tmp_reg) | add Ra(tmp_reg), aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, reg_save_area))] | jmp >2 |1: | mov Ra(tmp_reg), aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, overflow_arg_area))] | add Ra(tmp_reg), sizeof(void*) | mov aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, overflow_arg_area))], Ra(tmp_reg) |2: | mov Ra(def_reg), aword [Ra(tmp_reg)-sizeof(void*)] } else { | mov Rd(tmp_reg), dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, fp_offset))] | cmp Rd(tmp_reg), sizeof(void*) * IR_REG_INT_ARGS + 16 * IR_REG_FP_ARGS | jge >1 | add Rd(tmp_reg), 16 | mov dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, fp_offset))], Rd(tmp_reg) | add Ra(tmp_reg), aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, reg_save_area))] ir_emit_load_mem_fp(ctx, type, def_reg, IR_MEM_BO(tmp_reg, -16)); | jmp >2 |1: | mov Ra(tmp_reg), aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, overflow_arg_area))] ir_emit_load_mem_fp(ctx, type, def_reg, IR_MEM_BO(tmp_reg, 0)); | add Ra(tmp_reg), 8 | mov aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, overflow_arg_area))], Ra(tmp_reg) |2: } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, type, def, def_reg); } |.endif #else IR_ASSERT(0 && "NIY va_arg"); #endif } static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_type type; ir_block *bb; ir_insn *use_insn, *val; uint32_t n, *p, use_block; int i; int label, default_label = 0; int count = 0; ir_val min, max; ir_reg op2_reg = ctx->regs[def][2]; ir_reg tmp_reg = ctx->regs[def][3]; type = ctx->ir_base[insn->op2].type; IR_ASSERT(tmp_reg != IR_REG_NONE); if (IR_IS_TYPE_SIGNED(type)) { min.u64 = 0x7fffffffffffffff; max.u64 = 0x8000000000000000; } else { min.u64 = 0xffffffffffffffff; max.u64 = 0x0; } bb = &ctx->cfg_blocks[b]; p = &ctx->cfg_edges[bb->successors]; for (n = bb->successors_count; n != 0; p++, n--) { use_block = *p; use_insn = &ctx->ir_base[ctx->cfg_blocks[use_block].start]; if (use_insn->op == IR_CASE_VAL) { val = &ctx->ir_base[use_insn->op2]; IR_ASSERT(!IR_IS_SYM_CONST(val->op)); if (IR_IS_TYPE_SIGNED(type)) { IR_ASSERT(IR_IS_TYPE_SIGNED(val->type)); min.i64 = IR_MIN(min.i64, val->val.i64); max.i64 = IR_MAX(max.i64, val->val.i64); } else { IR_ASSERT(!IR_IS_TYPE_SIGNED(val->type)); min.u64 = (int64_t)IR_MIN(min.u64, val->val.u64); max.u64 = (int64_t)IR_MAX(max.u64, val->val.u64); } count++; } else { IR_ASSERT(use_insn->op == IR_CASE_DEFAULT); default_label = ir_skip_empty_target_blocks(ctx, use_block); } } IR_ASSERT(op2_reg != IR_REG_NONE); if (IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); ir_emit_load(ctx, type, op2_reg, insn->op2); } /* Generate a table jmp or a seqence of calls */ if (count > 2 && (max.i64-min.i64) < count * 8) { int *labels = ir_mem_malloc(sizeof(int) * (size_t)(max.i64 - min.i64 + 1)); for (i = 0; i <= (max.i64 - min.i64); i++) { labels[i] = default_label; } p = &ctx->cfg_edges[bb->successors]; for (n = bb->successors_count; n != 0; p++, n--) { use_block = *p; use_insn = &ctx->ir_base[ctx->cfg_blocks[use_block].start]; if (use_insn->op == IR_CASE_VAL) { val = &ctx->ir_base[use_insn->op2]; IR_ASSERT(!IR_IS_SYM_CONST(val->op)); label = ir_skip_empty_target_blocks(ctx, use_block); labels[val->val.i64 - min.i64] = label; } } switch (ir_type_size[type]) { default: IR_ASSERT(0 && "Unsupported type size"); case 1: if (IR_IS_TYPE_SIGNED(type)) { | movsx Ra(op2_reg), Rb(op2_reg) } else { | movzx Ra(op2_reg), Rb(op2_reg) } break; case 2: if (IR_IS_TYPE_SIGNED(type)) { | movsx Ra(op2_reg), Rw(op2_reg) } else { | movzx Ra(op2_reg), Rw(op2_reg) } break; case 4: |.if X64 if (IR_IS_TYPE_SIGNED(type)) { | movsxd Ra(op2_reg), Rd(op2_reg) } else { | mov Rd(op2_reg), Rd(op2_reg) } break; || case 8: |.endif break; } if (min.i64 != 0) { int64_t offset = -min.i64; if (IR_IS_SIGNED_32BIT(offset)) { | lea Ra(tmp_reg), [Ra(op2_reg)+(int32_t)offset] } else { IR_ASSERT(sizeof(void*) == 8); |.if X64 | mov64 Rq(tmp_reg), offset | add Ra(tmp_reg), Ra(op2_reg) |.endif } if (default_label) { offset = max.i64 - min.i64; IR_ASSERT(IR_IS_SIGNED_32BIT(offset)); | cmp Ra(tmp_reg), (int32_t)offset | ja =>default_label } |.if X64 if (ctx->code_buffer && IR_IS_SIGNED_32BIT((char*)ctx->code_buffer->start) && IR_IS_SIGNED_32BIT((char*)ctx->code_buffer->end)) { | jmp aword [Ra(tmp_reg)*8+>1] } else { int64_t offset = -min.i64; IR_ASSERT(IR_IS_SIGNED_32BIT(offset)); offset *= 8; IR_ASSERT(IR_IS_SIGNED_32BIT(offset)); | lea Ra(tmp_reg), aword [>1] | jmp aword [Ra(tmp_reg)+Ra(op2_reg)*8+offset] } |.else | jmp aword [Ra(tmp_reg)*4+>1] |.endif } else { if (default_label) { int64_t offset = max.i64; IR_ASSERT(IR_IS_SIGNED_32BIT(offset)); | cmp Ra(op2_reg), (int32_t)offset | ja =>default_label } |.if X64 if (ctx->code_buffer && IR_IS_SIGNED_32BIT((char*)ctx->code_buffer->start) && IR_IS_SIGNED_32BIT((char*)ctx->code_buffer->end)) { | jmp aword [Ra(op2_reg)*8+>1] } else { | lea Ra(tmp_reg), aword [>1] | jmp aword [Ra(tmp_reg)+Ra(op2_reg)*8] } |.else | jmp aword [Ra(op2_reg)*4+>1] |.endif } |.jmp_table if (!data->jmp_table_label) { data->jmp_table_label = ctx->cfg_blocks_count + ctx->consts_count + 3; |=>data->jmp_table_label: } |.align aword |1: for (i = 0; i <= (max.i64 - min.i64); i++) { int b = labels[i]; if (b) { ir_block *bb = &ctx->cfg_blocks[b]; ir_insn *insn = &ctx->ir_base[bb->end]; if (insn->op == IR_IJMP && IR_IS_CONST_REF(insn->op2)) { ir_ref prev = ctx->prev_ref[bb->end]; if (prev != bb->start && ctx->ir_base[prev].op == IR_SNAPSHOT) { prev = ctx->prev_ref[prev]; } if (prev == bb->start) { void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]); | .aword &addr if (ctx->ir_base[bb->start].op != IR_CASE_DEFAULT) { bb->flags |= IR_BB_EMPTY; } continue; } } | .aword =>b } else { | .aword 0 } } |.code ir_mem_free(labels); } else { p = &ctx->cfg_edges[bb->successors]; for (n = bb->successors_count; n != 0; p++, n--) { use_block = *p; use_insn = &ctx->ir_base[ctx->cfg_blocks[use_block].start]; if (use_insn->op == IR_CASE_VAL) { val = &ctx->ir_base[use_insn->op2]; IR_ASSERT(!IR_IS_SYM_CONST(val->op)); label = ir_skip_empty_target_blocks(ctx, use_block); if (IR_IS_32BIT(type, val->val)) { | ASM_REG_IMM_OP cmp, type, op2_reg, val->val.i32 } else { IR_ASSERT(sizeof(void*) == 8); |.if X64 | mov64 Ra(tmp_reg), val->val.i64 | ASM_REG_REG_OP cmp, type, op2_reg, tmp_reg |.endif } | je =>label } } if (default_label) { | jmp =>default_label } } } static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn) { int j, n; ir_type type; int int_param = 0; int fp_param = 0; int int_reg_params_count = IR_REG_INT_ARGS; int fp_reg_params_count = IR_REG_FP_ARGS; int32_t used_stack = 0; #ifdef IR_HAVE_FASTCALL if (sizeof(void*) == 4 && ir_is_fastcall(ctx, insn)) { int_reg_params_count = IR_REG_INT_FCARGS; fp_reg_params_count = IR_REG_FP_FCARGS; } #endif n = insn->inputs_count; for (j = 3; j <= n; j++) { type = ctx->ir_base[ir_insn_op(insn, j)].type; if (IR_IS_TYPE_INT(type)) { if (int_param >= int_reg_params_count) { used_stack += IR_MAX(sizeof(void*), ir_type_size[type]); } int_param++; #ifdef _WIN64 /* WIN64 calling convention use common couter for int and fp registers */ fp_param++; #endif } else { IR_ASSERT(IR_IS_TYPE_FP(type)); if (fp_param >= fp_reg_params_count) { used_stack += IR_MAX(sizeof(void*), ir_type_size[type]); } fp_param++; #ifdef _WIN64 /* WIN64 calling convention use common couter for int and fp registers */ int_param++; #endif } } /* Reserved "home space" or "shadow store" for register arguments (used in Windows64 ABI) */ used_stack += IR_SHADOW_ARGS; return used_stack; } static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg tmp_reg) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; int j, n; ir_ref arg; ir_insn *arg_insn; uint8_t type; ir_reg src_reg, dst_reg; int int_param = 0; int fp_param = 0; int count = 0; int int_reg_params_count = IR_REG_INT_ARGS; int fp_reg_params_count = IR_REG_FP_ARGS; const int8_t *int_reg_params = _ir_int_reg_params; const int8_t *fp_reg_params = _ir_fp_reg_params; int32_t used_stack, stack_offset = IR_SHADOW_ARGS; ir_copy *copies; bool do_pass3 = 0; /* For temporaries we may use any scratch registers except for registers used for parameters */ ir_reg tmp_fp_reg = IR_REG_FP_LAST; /* Temporary register for FP loads and swap */ n = insn->inputs_count; if (n < 3) { return 0; } if (tmp_reg == IR_REG_NONE) { tmp_reg = IR_REG_RAX; } #ifdef IR_HAVE_FASTCALL if (sizeof(void*) == 4 && ir_is_fastcall(ctx, insn)) { int_reg_params_count = IR_REG_INT_FCARGS; fp_reg_params_count = IR_REG_FP_FCARGS; int_reg_params = _ir_int_fc_reg_params; fp_reg_params = _ir_fp_fc_reg_params; } #endif if (insn->op == IR_CALL && (ctx->flags & IR_PREALLOCATED_STACK) #ifdef IR_HAVE_FASTCALL && !ir_is_fastcall(ctx, insn) /* fast call functions restore stack pointer */ #endif ) { // TODO: support for preallocated stack used_stack = 0; } else { used_stack = ir_call_used_stack(ctx, insn); if (IR_SHADOW_ARGS && insn->op == IR_TAILCALL && used_stack == IR_SHADOW_ARGS) { used_stack = 0; } if (ctx->fixed_call_stack_size && used_stack <= ctx->fixed_call_stack_size #ifdef IR_HAVE_FASTCALL && !ir_is_fastcall(ctx, insn) /* fast call functions restore stack pointer */ #endif ) { used_stack = 0; } else { /* Stack must be 16 byte aligned */ int32_t aligned_stack = IR_ALIGNED_SIZE(used_stack, 16); ctx->call_stack_size += aligned_stack; if (aligned_stack) { | sub Ra(IR_REG_RSP), aligned_stack } } } /* 1. move all register arguments that should be passed through stack * and collect arguments that should be passed through registers */ copies = ir_mem_malloc((n - 2) * sizeof(ir_copy)); for (j = 3; j <= n; j++) { arg = ir_insn_op(insn, j); src_reg = ir_get_alocated_reg(ctx, def, j); arg_insn = &ctx->ir_base[arg]; type = arg_insn->type; if (IR_IS_TYPE_INT(type)) { if (int_param < int_reg_params_count) { dst_reg = int_reg_params[int_param]; } else { dst_reg = IR_REG_NONE; /* pass argument through stack */ } int_param++; #ifdef _WIN64 /* WIN64 calling convention use common couter for int and fp registers */ fp_param++; #endif } else { IR_ASSERT(IR_IS_TYPE_FP(type)); if (fp_param < fp_reg_params_count) { dst_reg = fp_reg_params[fp_param]; } else { dst_reg = IR_REG_NONE; /* pass argument through stack */ } fp_param++; #ifdef _WIN64 /* WIN64 calling convention use common couter for int and fp registers */ int_param++; #endif } if (dst_reg != IR_REG_NONE) { if (src_reg == IR_REG_NONE) { /* delay CONST->REG and MEM->REG moves to third pass */ do_pass3 = 1; } else { if (IR_REG_SPILLED(src_reg)) { src_reg = IR_REG_NUM(src_reg); ir_emit_load(ctx, type, src_reg, arg); } if (src_reg != dst_reg) { /* delay REG->REG moves to second pass */ copies[count].type = type; copies[count].from = src_reg; copies[count].to = dst_reg; count++; } } } else { /* Pass register arguments to stack (REG->MEM moves) */ if (!IR_IS_CONST_REF(arg) && src_reg != IR_REG_NONE && !IR_REG_SPILLED(src_reg)) { ir_emit_store_mem(ctx, type, IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset), src_reg); } else { do_pass3 = 1; } stack_offset += IR_MAX(sizeof(void*), ir_type_size[type]); } } /* 2. move all arguments that should be passed from one register to another (REG->REG movs) */ if (count) { ir_parallel_copy(ctx, copies, count, tmp_reg, tmp_fp_reg); } ir_mem_free(copies); /* 3. move the remaining memory and immediate values */ if (do_pass3) { stack_offset = IR_SHADOW_ARGS; int_param = 0; fp_param = 0; for (j = 3; j <= n; j++) { arg = ir_insn_op(insn, j); src_reg = ir_get_alocated_reg(ctx, def, j); arg_insn = &ctx->ir_base[arg]; type = arg_insn->type; if (IR_IS_TYPE_INT(type)) { if (int_param < int_reg_params_count) { dst_reg = int_reg_params[int_param]; } else { dst_reg = IR_REG_NONE; /* argument already passed through stack */ } int_param++; #ifdef _WIN64 /* WIN64 calling convention use common couter for int and fp registers */ fp_param++; #endif } else { IR_ASSERT(IR_IS_TYPE_FP(type)); if (fp_param < fp_reg_params_count) { dst_reg = fp_reg_params[fp_param]; } else { dst_reg = IR_REG_NONE; /* argument already passed through stack */ } fp_param++; #ifdef _WIN64 /* WIN64 calling convention use common couter for int and fp registers */ int_param++; #endif } if (dst_reg != IR_REG_NONE) { if (src_reg == IR_REG_NONE) { if (IR_IS_TYPE_INT(type)) { if (IR_IS_CONST_REF(arg)) { if (type == IR_I8 || type == IR_I16) { type = IR_I32; } else if (type == IR_U8 || type == IR_U16) { type = IR_U32; } ir_emit_load(ctx, type, dst_reg, arg); } else if (ctx->vregs[arg]) { ir_mem mem = ir_ref_spill_slot(ctx, arg); if (ir_type_size[type] > 2) { ir_emit_load_mem_int(ctx, type, dst_reg, mem); } else if (ir_type_size[type] == 2) { if (type == IR_I16) { | ASM_TXT_TMEM_OP movsx, Rd(dst_reg), word, mem } else { | ASM_TXT_TMEM_OP movzx, Rd(dst_reg), word, mem } } else { IR_ASSERT(ir_type_size[type] == 1); if (type == IR_I8) { | ASM_TXT_TMEM_OP movsx, Rd(dst_reg), byte, mem } else { | ASM_TXT_TMEM_OP movzx, Rd(dst_reg), byte, mem } } } else { ir_load_local_addr(ctx, dst_reg, arg); } } else { ir_emit_load(ctx, type, dst_reg, arg); } } } else { ir_mem mem = IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset); if (IR_IS_TYPE_INT(type)) { if (IR_IS_CONST_REF(arg)) { ir_emit_store_mem_int_const(ctx, type, mem, arg, tmp_reg, 1); } else if (src_reg == IR_REG_NONE) { IR_ASSERT(tmp_reg != IR_REG_NONE); ir_emit_load(ctx, type, tmp_reg, arg); ir_emit_store_mem_int(ctx, type, mem, tmp_reg); } else if (IR_REG_SPILLED(src_reg)) { src_reg = IR_REG_NUM(src_reg); ir_emit_load(ctx, type, src_reg, arg); ir_emit_store_mem_int(ctx, type, mem, src_reg); } } else { if (IR_IS_CONST_REF(arg)) { ir_emit_store_mem_fp_const(ctx, type, mem, arg, tmp_reg, tmp_fp_reg); } else if (src_reg == IR_REG_NONE) { IR_ASSERT(tmp_fp_reg != IR_REG_NONE); ir_emit_load(ctx, type, tmp_fp_reg, arg); ir_emit_store_mem_fp(ctx, IR_DOUBLE, mem, tmp_fp_reg); } else if (IR_REG_SPILLED(src_reg)) { src_reg = IR_REG_NUM(src_reg); ir_emit_load(ctx, type, src_reg, arg); ir_emit_store_mem_fp(ctx, type, mem, src_reg); } } stack_offset += IR_MAX(sizeof(void*), ir_type_size[type]); } } } #ifdef _WIN64 /* WIN64 calling convention requires duplcation of parameters passed in FP register into GP ones */ if (ir_is_vararg(ctx, insn)) { n = IR_MIN(n, IR_MAX_REG_ARGS + 2); for (j = 3; j <= n; j++) { arg = ir_insn_op(insn, j); arg_insn = &ctx->ir_base[arg]; type = arg_insn->type; if (IR_IS_TYPE_FP(type)) { src_reg = fp_reg_params[j-3]; dst_reg = int_reg_params[j-3]; |.if X64 if (ctx->mflags & IR_X86_AVX) { | vmovd Rq(dst_reg), xmm(src_reg-IR_REG_FP_FIRST) } else { | movd Rq(dst_reg), xmm(src_reg-IR_REG_FP_FIRST) } |.endif } } } #endif #ifdef IR_REG_VARARG_FP_REGS /* set hidden argument to specify the number of vector registers used */ if (ir_is_vararg(ctx, insn)) { fp_param = IR_MIN(fp_param, fp_reg_params_count); | mov Rd(IR_REG_VARARG_FP_REGS), fp_param } #endif return used_stack; } static void ir_emit_call_ex(ir_ctx *ctx, ir_ref def, ir_insn *insn, int32_t used_stack) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_reg def_reg; if (IR_IS_CONST_REF(insn->op2)) { void *addr = ir_call_addr(ctx, insn, &ctx->ir_base[insn->op2]); if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(ctx->code_buffer, addr)) { | call aword &addr } else { |.if X64 || ir_reg tmp_reg = IR_REG_RAX; #ifdef IR_REG_VARARG_FP_REGS || if (ir_is_vararg(ctx, insn)) { || tmp_reg = IR_REG_R11; || } #endif || if (IR_IS_SIGNED_32BIT(addr)) { | mov Rq(tmp_reg), ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 || } else { | mov64 Rq(tmp_reg), ((ptrdiff_t)addr) // 0x48 0xb8 || } | call Rq(tmp_reg) |.endif } } else { ir_reg op2_reg = ctx->regs[def][2]; if (op2_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); } | call Ra(op2_reg) } else { ir_mem mem; if (ir_rule(ctx, insn->op2) & IR_FUSED) { mem = ir_fuse_load(ctx, def, insn->op2); } else { mem = ir_ref_spill_slot(ctx, insn->op2); } | ASM_TMEM_OP call, aword, mem } } if (used_stack) { int32_t aligned_stack = IR_ALIGNED_SIZE(used_stack, 16); ctx->call_stack_size -= aligned_stack; if (ir_is_fastcall(ctx, insn)) { aligned_stack -= used_stack; if (aligned_stack) { | add Ra(IR_REG_RSP), aligned_stack } } else { | add Ra(IR_REG_RSP), aligned_stack } } if (insn->type != IR_VOID) { if (IR_IS_TYPE_INT(insn->type)) { def_reg = IR_REG_NUM(ctx->regs[def][0]); if (def_reg != IR_REG_NONE) { if (def_reg != IR_REG_INT_RET1) { ir_emit_mov(ctx, insn->type, def_reg, IR_REG_INT_RET1); } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, insn->type, def, def_reg); } } else if (ctx->use_lists[def].count > 1) { ir_emit_store(ctx, insn->type, def, IR_REG_INT_RET1); } } else { IR_ASSERT(IR_IS_TYPE_FP(insn->type)); def_reg = IR_REG_NUM(ctx->regs[def][0]); #ifdef IR_REG_FP_RET1 if (def_reg != IR_REG_NONE) { if (def_reg != IR_REG_FP_RET1) { ir_emit_fp_mov(ctx, insn->type, def_reg, IR_REG_FP_RET1); } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, insn->type, def, def_reg); } } else if (ctx->use_lists[def].count > 1) { ir_emit_store(ctx, insn->type, def, IR_REG_FP_RET1); } #else if (ctx->use_lists[def].count > 1) { int32_t offset; ir_reg fp; if (def_reg == IR_REG_NONE) { offset = ir_ref_spill_slot_offset(ctx, def, &fp); if (insn->type == IR_DOUBLE) { | fstp qword [Ra(fp)+offset] } else { IR_ASSERT(insn->type == IR_FLOAT); | fstp dword [Ra(fp)+offset] } } else { offset = ctx->ret_slot; IR_ASSERT(offset != -1); offset = IR_SPILL_POS_TO_OFFSET(offset); fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; if (insn->type == IR_DOUBLE) { | fstp qword [Ra(fp)+offset] } else { IR_ASSERT(insn->type == IR_FLOAT); | fstp dword [Ra(fp)+offset] } ir_emit_load_mem_fp(ctx, insn->type, def_reg, IR_MEM_BO(fp, offset)); if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, insn->type, def, def_reg); } } } #endif } } } static void ir_emit_call(ir_ctx *ctx, ir_ref def, ir_insn *insn) { int32_t used_stack = ir_emit_arguments(ctx, def, insn, ctx->regs[def][1]); ir_emit_call_ex(ctx, def, insn, used_stack); } static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; int32_t used_stack = ir_emit_arguments(ctx, def, insn, ctx->regs[def][1]); if (used_stack != 0) { ir_emit_call_ex(ctx, def, insn, used_stack); ir_emit_return_void(ctx); return; } ir_emit_epilogue(ctx); if (IR_IS_CONST_REF(insn->op2)) { void *addr = ir_call_addr(ctx, insn, &ctx->ir_base[insn->op2]); if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(ctx->code_buffer, addr)) { | jmp aword &addr } else { |.if X64 || ir_reg tmp_reg = IR_REG_RAX; #ifdef IR_REG_VARARG_FP_REGS || if (ir_is_vararg(ctx, insn)) { || tmp_reg = IR_REG_R11; || } #endif || if (IR_IS_SIGNED_32BIT(addr)) { | mov Rq(tmp_reg), ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 || } else { | mov64 Rq(tmp_reg), ((ptrdiff_t)addr) // 0x48 0xb8 || } | jmp Rq(tmp_reg) |.endif } } else { ir_reg op2_reg = ctx->regs[def][2]; if (op2_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); } | jmp Ra(op2_reg) } else { ir_mem mem; if (ir_rule(ctx, insn->op2) & IR_FUSED) { mem = ir_fuse_load(ctx, def, insn->op2); } else { mem = ir_ref_spill_slot(ctx, insn->op2); } | ASM_TMEM_OP jmp, aword, mem } } } static void ir_emit_ijmp(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_reg op2_reg = ctx->regs[def][2]; if (IR_IS_CONST_REF(insn->op2)) { void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]); if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(ctx->code_buffer, addr)) { | jmp aword &addr } else { |.if X64 if (IR_IS_SIGNED_32BIT(addr)) { | mov rax, ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 } else { | mov64 rax, ((ptrdiff_t)addr) // 0x48 0xb8 } | jmp rax |.endif } } else if (ir_rule(ctx, insn->op2) & IR_FUSED) { ir_mem mem = ir_fuse_load(ctx, def, insn->op2); | ASM_TMEM_OP jmp, aword, mem } else if (op2_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); } | jmp Ra(op2_reg) } else { ir_mem mem = ir_ref_spill_slot(ctx, insn->op2); | ASM_TMEM_OP jmp, aword, mem } } static bool ir_emit_guard_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, uint32_t next_block, uint8_t op, void *addr, bool int_cmp) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_insn *next_insn = &ctx->ir_base[def + 1]; if (next_insn->op == IR_END || next_insn->op == IR_LOOP_END) { ir_block *bb = &ctx->cfg_blocks[b]; uint32_t target; if (!(bb->flags & IR_BB_DESSA_MOVES)) { target = ctx->cfg_edges[bb->successors]; if (UNEXPECTED(bb->successors_count == 2)) { if (ctx->cfg_blocks[target].flags & IR_BB_ENTRY) { target = ctx->cfg_edges[bb->successors + 1]; } else { IR_ASSERT(ctx->cfg_blocks[ctx->cfg_edges[bb->successors + 1]].flags & IR_BB_ENTRY); } } else { IR_ASSERT(bb->successors_count == 1); } target = ir_skip_empty_target_blocks(ctx, target); if (target != next_block) { if (int_cmp) { switch (op) { default: IR_ASSERT(0 && "NIY binary op"); case IR_EQ: | jne =>target break; case IR_NE: | je =>target break; case IR_LT: | jge =>target break; case IR_GE: | jl =>target break; case IR_LE: | jg =>target break; case IR_GT: | jle =>target break; case IR_ULT: | jae =>target break; case IR_UGE: | jb =>target break; case IR_ULE: | ja =>target break; case IR_UGT: | jbe =>target break; } } else { switch (op) { default: IR_ASSERT(0 && "NIY binary op"); case IR_EQ: | jne =>target | jp =>target break; case IR_NE: | jp &addr | je =>target break; case IR_LT: | jae =>target break; case IR_GE: | jp &addr | jb =>target break; case IR_LE: | ja =>target break; case IR_GT: | jp &addr | jbe =>target break; } } | jmp &addr return 1; } } } else if (next_insn->op == IR_IJMP && IR_IS_CONST_REF(next_insn->op2)) { void *target_addr = ir_jmp_addr(ctx, next_insn, &ctx->ir_base[next_insn->op2]); if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(ctx->code_buffer, target_addr)) { if (int_cmp) { switch (op) { default: IR_ASSERT(0 && "NIY binary op"); case IR_EQ: | jne &target_addr break; case IR_NE: | je &target_addr break; case IR_LT: | jge &target_addr break; case IR_GE: | jl &target_addr break; case IR_LE: | jg &target_addr break; case IR_GT: | jle &target_addr break; case IR_ULT: | jae &target_addr break; case IR_UGE: | jb &target_addr break; case IR_ULE: | ja &target_addr break; case IR_UGT: | jbe &target_addr break; } } else { switch (op) { default: IR_ASSERT(0 && "NIY binary op"); case IR_EQ: | jne &target_addr | jp &target_addr break; case IR_NE: | jp &addr | je &target_addr break; case IR_LT: | jae &target_addr break; case IR_GE: | jp &addr | jb &target_addr break; case IR_LE: | ja &target_addr break; case IR_GT: | jp &addr | jbe &target_addr break; } } | jmp &addr return 1; } } if (int_cmp) { switch (op) { default: IR_ASSERT(0 && "NIY binary op"); case IR_EQ: | je &addr break; case IR_NE: | jne &addr break; case IR_LT: | jl &addr break; case IR_GE: | jge &addr break; case IR_LE: | jle &addr break; case IR_GT: | jg &addr break; case IR_ULT: | jb &addr break; case IR_UGE: | jae &addr break; case IR_ULE: | jbe &addr break; case IR_UGT: | ja &addr break; } } else { switch (op) { default: IR_ASSERT(0 && "NIY binary op"); case IR_EQ: | jp >1 | je &addr |1: break; case IR_NE: | jne &addr | jp &addr break; case IR_LT: | jp >1 | jb &addr |1: break; case IR_GE: | jae &addr break; case IR_LE: | jp >1 | jbe &addr |1: break; case IR_GT: | ja &addr break; // case IR_ULT: fprintf(stderr, "\tjb .LL%d\n", true_block); break; // case IR_UGE: fprintf(stderr, "\tjae .LL%d\n", true_block); break; // case IR_ULE: fprintf(stderr, "\tjbe .LL%d\n", true_block); break; // case IR_UGT: fprintf(stderr, "\tja .LL%d\n", true_block); break; } } return 0; } static bool ir_emit_guard(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_reg op2_reg = ctx->regs[def][2]; ir_type type = ctx->ir_base[insn->op2].type; void *addr; IR_ASSERT(IR_IS_TYPE_INT(type)); if (IR_IS_CONST_REF(insn->op2)) { bool is_true = ir_ref_is_true(ctx, insn->op2); if ((insn->op == IR_GUARD && !is_true) || (insn->op == IR_GUARD_NOT && is_true)) { addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(ctx->code_buffer, addr)) { | jmp aword &addr } else { |.if X64 if (IR_IS_SIGNED_32BIT(addr)) { | mov rax, ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 } else { | mov64 rax, ((ptrdiff_t)addr) // 0x48 0xb8 } | jmp aword [rax] |.endif } } return 0; } if (op2_reg != IR_REG_NONE) { if (IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); ir_emit_load(ctx, type, op2_reg, insn->op2); } | ASM_REG_REG_OP test, type, op2_reg, op2_reg } else { ir_mem mem; if (ir_rule(ctx, insn->op2) & IR_FUSED) { mem = ir_fuse_load(ctx, def, insn->op2); } else { mem = ir_ref_spill_slot(ctx, insn->op2); } | ASM_MEM_IMM_OP cmp, type, mem, 0 } addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(ctx->code_buffer, addr)) { ir_op op; if (insn->op == IR_GUARD) { op = IR_EQ; } else { op = IR_NE; } return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 1); } else { |.if X64 if (insn->op == IR_GUARD) { | je >1 } else { | jne >1 } |.cold_code |1: if (IR_IS_SIGNED_32BIT(addr)) { | mov rax, ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 } else { | mov64 rax, ((ptrdiff_t)addr) // 0x48 0xb8 } | jmp aword [rax] |.code |.endif return 0; } } static bool ir_emit_guard_cmp_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_insn *cmp_insn = &ctx->ir_base[insn->op2]; ir_op op = cmp_insn->op; ir_type type = ctx->ir_base[cmp_insn->op1].type; ir_ref op1 = cmp_insn->op1; ir_ref op2 = cmp_insn->op2; ir_reg op1_reg = ctx->regs[insn->op2][1]; ir_reg op2_reg = ctx->regs[insn->op2][2]; void *addr; if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { op1_reg = IR_REG_NUM(op1_reg); ir_emit_load(ctx, type, op1_reg, op1); } if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { op2_reg = IR_REG_NUM(op2_reg); if (op1 != op2) { ir_emit_load(ctx, type, op2_reg, op2); } } addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); if (IR_IS_CONST_REF(op2) && !IR_IS_SYM_CONST(ctx->ir_base[op2].op) && ctx->ir_base[op2].val.u64 == 0) { if (op == IR_ULT) { /* always false */ if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(ctx->code_buffer, addr)) { | jmp aword &addr } else { |.if X64 if (IR_IS_SIGNED_32BIT(addr)) { | mov rax, ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 } else { | mov64 rax, ((ptrdiff_t)addr) // 0x48 0xb8 } | jmp aword [rax] |.endif } return 0; } else if (op == IR_UGE) { /* always true */ return 0; } else if (op == IR_ULE) { op = IR_EQ; } else if (op == IR_UGT) { op = IR_NE; } } ir_emit_cmp_int_common(ctx, type, def, cmp_insn, op1_reg, op1, op2_reg, op2); if (insn->op == IR_GUARD) { op ^= 1; // reverse } return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 1); } static bool ir_emit_guard_cmp_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block) { ir_op op = ir_emit_cmp_fp_common(ctx, def, insn->op2, &ctx->ir_base[insn->op2]); void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); if (insn->op == IR_GUARD) { op ^= 1; // reverse } return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 0); } static bool ir_emit_guard_test_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block) { void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); ir_op op = (insn->op == IR_GUARD) ? IR_EQ : IR_NE; ir_emit_test_int_common(ctx, def, insn->op2, op); return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 1); } static bool ir_emit_guard_jcc_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block) { void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); ir_op op = ctx->ir_base[insn->op2].op; if (insn->op == IR_GUARD) { op ^= 1; // reverse } return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 1); } static bool ir_emit_guard_overflow(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_type type; void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); type = ctx->ir_base[ctx->ir_base[insn->op2].op1].type; IR_ASSERT(IR_IS_TYPE_INT(type)); if (IR_IS_TYPE_SIGNED(type)) { if (insn->op == IR_GUARD) { | jno &addr } else { | jo &addr } } else { if (insn->op == IR_GUARD) { | jnc &addr } else { | jc &addr } } return 0; } static void ir_emit_lea(ir_ctx *ctx, ir_ref def, ir_type type) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); ir_mem mem = ir_fuse_addr(ctx, def, def); IR_ASSERT(def_reg != IR_REG_NONE); if (ir_type_size[type] == 4) { if (IR_MEM_BASE(mem) == def_reg && IR_MEM_OFFSET(mem) == 0 && IR_MEM_SCALE(mem) == 1 && IR_MEM_INDEX(mem) != IR_REG_NONE) { ir_reg reg = IR_MEM_INDEX(mem); | add Rd(def_reg), Rd(reg) } else if (IR_MEM_INDEX(mem) == def_reg && IR_MEM_OFFSET(mem) == 0 && IR_MEM_SCALE(mem) == 1 && IR_MEM_BASE(mem) != IR_REG_NONE) { ir_reg reg = IR_MEM_BASE(mem); | add Rd(def_reg), Rd(reg) } else { | ASM_TXT_TMEM_OP lea, Rd(def_reg), dword, mem } } else { if (IR_MEM_BASE(mem) == def_reg && IR_MEM_OFFSET(mem) == 0 && IR_MEM_SCALE(mem) == 1 && IR_MEM_INDEX(mem) != IR_REG_NONE) { ir_reg reg = IR_MEM_INDEX(mem); | add Ra(def_reg), Ra(reg) } else if (IR_MEM_INDEX(mem) == def_reg && IR_MEM_OFFSET(mem) == 0 && IR_MEM_SCALE(mem) == 1 && IR_MEM_BASE(mem) != IR_REG_NONE) { ir_reg reg = IR_MEM_BASE(mem); | add Ra(def_reg), Ra(reg) } else { | ASM_TXT_TMEM_OP lea, Ra(def_reg), aword, mem } } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, type, def, def_reg); } } static void ir_emit_tls(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_reg reg = IR_REG_NUM(ctx->regs[def][0]); if (ctx->use_lists[def].count == 1) { /* dead load */ return; } |.if X64WIN | gs | mov Ra(reg), aword [0x58] | mov Ra(reg), aword [Ra(reg)+insn->op2] | mov Ra(reg), aword [Ra(reg)+insn->op3] |.elif WIN | fs | mov Ra(reg), aword [0x2c] | mov Ra(reg), aword [Ra(reg)+insn->op2] | mov Ra(reg), aword [Ra(reg)+insn->op3] |.elif X64APPLE | gs || if (insn->op3 == IR_NULL) { | mov Ra(reg), aword [insn->op2] || } else { | mov Ra(reg), aword [insn->op2] | mov Ra(reg), aword [Ra(reg)+insn->op3] || } |.elif X64 | fs || if (insn->op3 == IR_NULL) { | mov Ra(reg), aword [insn->op2] || } else { | mov Ra(reg), [0x8] | mov Ra(reg), aword [Ra(reg)+insn->op2] | mov Ra(reg), aword [Ra(reg)+insn->op3] || } |.else | gs || if (insn->op3 == IR_NULL) { | mov Ra(reg), aword [insn->op2] || } else { | mov Ra(reg), [0x4] | mov Ra(reg), aword [Ra(reg)+insn->op2] | mov Ra(reg), aword [Ra(reg)+insn->op3] || } | .endif if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, IR_ADDR, def, reg); } } static void ir_emit_sse_sqrt(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_reg op3_reg = ctx->regs[def][3]; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); IR_ASSERT(IR_IS_TYPE_FP(insn->type)); IR_ASSERT(def_reg != IR_REG_NONE && op3_reg != IR_REG_NONE); if (IR_REG_SPILLED(op3_reg)) { op3_reg = IR_REG_NUM(op3_reg); ir_emit_load(ctx, IR_ADDR, op3_reg, insn->op3); } | ASM_FP_REG_REG_OP sqrts, insn->type, def_reg, op3_reg if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, insn->type, def, def_reg); } } static void ir_emit_sse_round(ir_ctx *ctx, ir_ref def, ir_insn *insn, int round_op) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_reg op3_reg = ctx->regs[def][3]; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); IR_ASSERT(IR_IS_TYPE_FP(insn->type)); IR_ASSERT(def_reg != IR_REG_NONE && op3_reg != IR_REG_NONE); if (IR_REG_SPILLED(op3_reg)) { op3_reg = IR_REG_NUM(op3_reg); ir_emit_load(ctx, IR_ADDR, op3_reg, insn->op3); } if (ctx->mflags & IR_X86_AVX) { | ASM_SSE2_REG_REG_REG_TXT_OP vrounds, insn->type, def_reg, def_reg, op3_reg, round_op } else { | ASM_SSE2_REG_REG_TXT_OP rounds, insn->type, def_reg, op3_reg, round_op } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, insn->type, def, def_reg); } } static void ir_emit_exitcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) { ir_backend_data *data = ctx->data; dasm_State **Dst = &data->dasm_state; ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); IR_ASSERT(def_reg != IR_REG_NONE); |.if X64 | sub rsp, 16*8+16*8+8 /* CPU regs + SSE regs */ | mov aword [rsp+0*8], rax | mov aword [rsp+1*8], rcx | mov aword [rsp+2*8], rdx | mov aword [rsp+3*8], rbx | mov aword [rsp+5*8], rbp | mov aword [rsp+6*8], rsi | mov aword [rsp+7*8], rdi | mov aword [rsp+8*8], r8 | mov aword [rsp+9*8], r9 | mov aword [rsp+10*8], r10 | mov aword [rsp+11*8], r11 | mov aword [rsp+12*8], r12 | mov aword [rsp+13*8], r13 | mov aword [rsp+14*8], r14 | mov aword [rsp+15*8], r15 | movsd qword [rsp+16*8+0*8], xmm0 | movsd qword [rsp+16*8+1*8], xmm1 | movsd qword [rsp+16*8+2*8], xmm2 | movsd qword [rsp+16*8+3*8], xmm3 | movsd qword [rsp+16*8+4*8], xmm4 | movsd qword [rsp+16*8+5*8], xmm5 | movsd qword [rsp+16*8+6*8], xmm6 | movsd qword [rsp+16*8+7*8], xmm7 | movsd qword [rsp+16*8+8*8], xmm8 | movsd qword [rsp+16*8+9*8], xmm9 | movsd qword [rsp+16*8+10*8], xmm10 | movsd qword [rsp+16*8+11*8], xmm11 | movsd qword [rsp+16*8+12*8], xmm12 | movsd qword [rsp+16*8+13*8], xmm13 | movsd qword [rsp+16*8+14*8], xmm14 | movsd qword [rsp+16*8+15*8], xmm15 | | mov Ra(IR_REG_INT_ARG2), rsp | lea Ra(IR_REG_INT_ARG1), [rsp+16*8+16*8+16] | mov aword [rsp+4*8], Ra(IR_REG_INT_ARG1) | mov Ra(IR_REG_INT_ARG1), [rsp+16*8+16*8+8] |.if X64WIN | sub rsp, 32 /* shadow space */ |.endif |.else | sub esp, 8*4+8*8+12 /* CPU regs + SSE regs */ | mov aword [esp+0*4], eax | mov aword [esp+1*4], ecx | mov aword [esp+2*4], edx | mov aword [esp+3*4], ebx | mov aword [esp+5*4], ebp | mov aword [esp+6*4], esi | mov aword [esp+7*4], edi | movsd qword [esp+8*4+0*8], xmm0 | movsd qword [esp+8*4+1*8], xmm1 | movsd qword [esp+8*4+2*8], xmm2 | movsd qword [esp+8*4+3*8], xmm3 | movsd qword [esp+8*4+4*8], xmm4 | movsd qword [esp+8*4+5*8], xmm5 | movsd qword [esp+8*4+6*8], xmm6 | movsd qword [esp+8*4+7*8], xmm7 | | mov Ra(IR_REG_INT_FCARG2), esp | lea Ra(IR_REG_INT_FCARG1), [esp+8*4+8*8+16] | mov aword [esp+4*4], Ra(IR_REG_INT_FCARG1) | mov Ra(IR_REG_INT_FCARG1), [esp+8*4+8*8+12] |.endif if (IR_IS_CONST_REF(insn->op2)) { void *addr = ir_call_addr(ctx, insn, &ctx->ir_base[insn->op2]); if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(ctx->code_buffer, addr)) { | call aword &addr } else { |.if X64 if (IR_IS_SIGNED_32BIT(addr)) { | mov rax, ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 } else { | mov64 rax, ((ptrdiff_t)addr) // 0x48 0xb8 } | call rax |.endif } } else { IR_ASSERT(0); } // restore SP |.if X64WIN | add rsp, 32+16*8+16*8+16 /* shadow space + CPU regs + SSE regs */ |.elif X64 | add rsp, 16*8+16*8+16 /* CPU regs + SSE regs */ |.else | add esp, 8*4+8*8+16 /* CPU regs + SSE regs */ |.endif if (def_reg != IR_REG_INT_RET1) { ir_emit_mov(ctx, insn->type, def_reg, IR_REG_INT_RET1); } if (IR_REG_SPILLED(ctx->regs[def][0])) { ir_emit_store(ctx, insn->type, def, def_reg); } } static void ir_emit_param_move(ir_ctx *ctx, uint8_t type, ir_reg from_reg, ir_reg to_reg, ir_ref to, int32_t offset) { ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; IR_ASSERT(from_reg != IR_REG_NONE || to_reg != IR_REG_NONE); if (IR_IS_TYPE_INT(type)) { if (from_reg != IR_REG_NONE) { if (to_reg != IR_REG_NONE) { ir_emit_mov(ctx, type, to_reg, from_reg); } else { ir_emit_store(ctx, type, to, from_reg); } } else { ir_emit_load_mem_int(ctx, type, to_reg, IR_MEM_BO(fp, offset)); } } else { if (from_reg != IR_REG_NONE) { if (to_reg != IR_REG_NONE) { ir_emit_fp_mov(ctx, type, to_reg, from_reg); } else { ir_emit_store(ctx, type, to, from_reg); } } else { ir_emit_load_mem_fp(ctx, type, to_reg, IR_MEM_BO(fp, offset)); } } } static void ir_emit_load_params(ir_ctx *ctx) { ir_use_list *use_list = &ctx->use_lists[1]; ir_insn *insn; ir_ref i, n, *p, use; int int_param_num = 0; int fp_param_num = 0; ir_reg src_reg; ir_reg dst_reg; // TODO: Calling convention specific int int_reg_params_count = IR_REG_INT_ARGS; int fp_reg_params_count = IR_REG_FP_ARGS; const int8_t *int_reg_params = _ir_int_reg_params; const int8_t *fp_reg_params = _ir_fp_reg_params; int32_t stack_offset = 0; #ifdef IR_TARGET_X86 if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC)) { int_reg_params_count = IR_REG_INT_FCARGS; fp_reg_params_count = IR_REG_FP_FCARGS; int_reg_params = _ir_int_fc_reg_params; fp_reg_params = _ir_fp_fc_reg_params; } #endif if (ctx->flags & IR_USE_FRAME_POINTER) { stack_offset = sizeof(void*) * 2; /* skip old frame pointer and return address */ } else { stack_offset = sizeof(void*) + ctx->stack_frame_size + ctx->call_stack_size; /* skip return address */ } n = use_list->count; for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { use = *p; insn = &ctx->ir_base[use]; if (insn->op == IR_PARAM) { if (IR_IS_TYPE_INT(insn->type)) { if (int_param_num < int_reg_params_count) { src_reg = int_reg_params[int_param_num]; } else { src_reg = IR_REG_NONE; } int_param_num++; #ifdef _WIN64 /* WIN64 calling convention use common couter for int and fp registers */ fp_param_num++; #endif } else { if (fp_param_num < fp_reg_params_count) { src_reg = fp_reg_params[fp_param_num]; } else { src_reg = IR_REG_NONE; } fp_param_num++; #ifdef _WIN64 /* WIN64 calling convention use common couter for int and fp registers */ int_param_num++; #endif } if (ctx->vregs[use]) { dst_reg = IR_REG_NUM(ctx->regs[use][0]); IR_ASSERT(src_reg != IR_REG_NONE || dst_reg != IR_REG_NONE || stack_offset == ctx->live_intervals[ctx->vregs[use]]->stack_spill_pos + ((ctx->flags & IR_USE_FRAME_POINTER) ? -(ctx->stack_frame_size - ctx->stack_frame_alignment) : ctx->call_stack_size)); if (src_reg != dst_reg) { ir_emit_param_move(ctx, insn->type, src_reg, dst_reg, use, stack_offset); } if (dst_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[use][0])) { ir_emit_store(ctx, insn->type, use, dst_reg); } } if (src_reg == IR_REG_NONE) { if (sizeof(void*) == 8) { stack_offset += sizeof(void*); } else { stack_offset += IR_MAX(sizeof(void*), ir_type_size[insn->type]); } } } } } static ir_reg ir_get_free_reg(ir_type type, ir_regset available) { if (IR_IS_TYPE_INT(type)) { available = IR_REGSET_INTERSECTION(available, IR_REGSET_GP); } else { IR_ASSERT(IR_IS_TYPE_FP(type)); available = IR_REGSET_INTERSECTION(available, IR_REGSET_FP); } IR_ASSERT(!IR_REGSET_IS_EMPTY(available)); return IR_REGSET_FIRST(available); } static int ir_fix_dessa_tmps(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to) { ir_backend_data *data = ctx->data; ir_ref ref = ctx->cfg_blocks[data->dessa_from_block].end; if (to == 0) { if (IR_IS_TYPE_INT(type)) { if (ctx->regs[ref][0] == IR_REG_NONE) { ctx->regs[ref][0] = IR_REG_RAX; } } else { IR_ASSERT(IR_IS_TYPE_FP(type)); if (ctx->regs[ref][1] == IR_REG_NONE) { ctx->regs[ref][1] = IR_REG_XMM0; } } } else if (from != 0) { if (IR_IS_TYPE_INT(type)) { if (ctx->regs[ref][0] == IR_REG_NONE) { ctx->regs[ref][0] = IR_REG_RAX; } } else { IR_ASSERT(IR_IS_TYPE_FP(type)); if (ctx->regs[ref][1] == IR_REG_NONE) { ctx->regs[ref][1] = IR_REG_XMM0; } } } return 1; } static void ir_fix_param_spills(ir_ctx *ctx) { ir_use_list *use_list = &ctx->use_lists[1]; ir_insn *insn; ir_ref i, n, *p, use; int int_param_num = 0; int fp_param_num = 0; ir_reg src_reg; // TODO: Calling convention specific int int_reg_params_count = IR_REG_INT_ARGS; int fp_reg_params_count = IR_REG_FP_ARGS; const int8_t *int_reg_params = _ir_int_reg_params; const int8_t *fp_reg_params = _ir_fp_reg_params; int32_t stack_start = 0; int32_t stack_offset = 0; #ifdef IR_TARGET_X86 if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC)) { int_reg_params_count = IR_REG_INT_FCARGS; fp_reg_params_count = IR_REG_FP_FCARGS; int_reg_params = _ir_int_fc_reg_params; fp_reg_params = _ir_fp_fc_reg_params; } #endif if (ctx->flags & IR_USE_FRAME_POINTER) { /* skip old frame pointer and return address */ stack_start = sizeof(void*) * 2 + (ctx->stack_frame_size - ctx->stack_frame_alignment); } else { /* skip return address */ stack_start = sizeof(void*) + ctx->stack_frame_size; } n = use_list->count; for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { use = *p; insn = &ctx->ir_base[use]; if (insn->op == IR_PARAM) { if (IR_IS_TYPE_INT(insn->type)) { if (int_param_num < int_reg_params_count) { src_reg = int_reg_params[int_param_num]; } else { src_reg = IR_REG_NONE; } int_param_num++; #ifdef _WIN64 /* WIN64 calling convention use common couter for int and fp registers */ fp_param_num++; #endif } else { if (fp_param_num < fp_reg_params_count) { src_reg = fp_reg_params[fp_param_num]; } else { src_reg = IR_REG_NONE; } fp_param_num++; #ifdef _WIN64 /* WIN64 calling convention use common couter for int and fp registers */ int_param_num++; #endif } if (src_reg == IR_REG_NONE) { if (ctx->vregs[use]) { ir_live_interval *ival = ctx->live_intervals[ctx->vregs[use]]; if ((ival->flags & IR_LIVE_INTERVAL_MEM_PARAM) && ival->stack_spill_pos == -1 && (ival->next || ival->reg == IR_REG_NONE)) { ival->stack_spill_pos = stack_start + stack_offset; } } if (sizeof(void*) == 8) { stack_offset += sizeof(void*); } else { stack_offset += IR_MAX(sizeof(void*), ir_type_size[insn->type]); } } } } #ifdef _WIN64 /* WIN64 uses shsow area for registers */ stack_offset += IR_MIN(int_param_num, int_reg_params_count) * sizeof(void*); #endif ctx->gp_reg_params = IR_MIN(int_param_num, int_reg_params_count); ctx->fp_reg_params = IR_MIN(fp_param_num, fp_reg_params_count); ctx->param_stack_size = stack_offset; } static void ir_allocate_unique_spill_slots(ir_ctx *ctx) { uint32_t b; ir_block *bb; ir_insn *insn; ir_ref i, n, j, *p; uint32_t *rule, insn_flags; ir_backend_data *data = ctx->data; ir_regset available = 0; ir_target_constraints constraints; uint32_t def_flags; ir_reg reg; #ifndef IR_REG_FP_RET1 if (ctx->flags2 & IR_HAS_FP_RET_SLOT) { ctx->ret_slot = ir_allocate_spill_slot(ctx, IR_DOUBLE, &data->ra_data); } else if (ctx->ret_type == IR_FLOAT || ctx->ret_type == IR_DOUBLE) { ctx->ret_slot = ir_allocate_spill_slot(ctx, ctx->ret_type, &data->ra_data); } else { ctx->ret_slot = -1; } #endif ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count); memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count); /* vregs + tmp + fixed + SRATCH + ALL */ ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 2, sizeof(ir_live_interval*)); if (!ctx->arena) { ctx->arena = ir_arena_create(16 * 1024); } for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); for (i = bb->start, insn = ctx->ir_base + i, rule = ctx->rules + i; i <= bb->end;) { switch (ctx->rules ? *rule : insn->op) { case IR_START: case IR_BEGIN: case IR_END: case IR_IF_TRUE: case IR_IF_FALSE: case IR_CASE_VAL: case IR_CASE_DEFAULT: case IR_MERGE: case IR_LOOP_BEGIN: case IR_LOOP_END: break; #ifndef IR_REG_FP_RET1 case IR_CALL: if (ctx->ret_slot == -1 && (insn->type == IR_FLOAT || insn->type == IR_DOUBLE)) { ctx->ret_slot = ir_allocate_spill_slot(ctx, IR_DOUBLE, &data->ra_data); } #endif IR_FALLTHROUGH; default: def_flags = ir_get_target_constraints(ctx, i, &constraints); if (ctx->rules && *rule != IR_CMP_AND_BRANCH_INT && *rule != IR_CMP_AND_BRANCH_FP && *rule != IR_TEST_AND_BRANCH_INT && *rule != IR_GUARD_CMP_INT && *rule != IR_GUARD_CMP_FP) { available = IR_REGSET_SCRATCH; } if (ctx->vregs[i]) { reg = constraints.def_reg; if (reg != IR_REG_NONE && IR_REGSET_IN(available, reg)) { IR_REGSET_EXCL(available, reg); ctx->regs[i][0] = reg | IR_REG_SPILL_STORE; } else if (def_flags & IR_USE_MUST_BE_IN_REG) { if (insn->op == IR_VLOAD && ctx->live_intervals[ctx->vregs[i]] && ctx->live_intervals[ctx->vregs[i]]->stack_spill_pos != -1) { /* pass */ } else if (insn->op != IR_PARAM) { reg = ir_get_free_reg(insn->type, available); IR_REGSET_EXCL(available, reg); ctx->regs[i][0] = reg | IR_REG_SPILL_STORE; } } if (!ctx->live_intervals[ctx->vregs[i]]) { ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); memset(ival, 0, sizeof(ir_live_interval)); ctx->live_intervals[ctx->vregs[i]] = ival; ival->type = insn->type; ival->reg = IR_REG_NONE; ival->vreg = ctx->vregs[i]; ival->stack_spill_pos = -1; if (insn->op == IR_PARAM && reg == IR_REG_NONE) { ival->flags |= IR_LIVE_INTERVAL_MEM_PARAM; } else { ival->stack_spill_pos = ir_allocate_spill_slot(ctx, ival->type, &data->ra_data); } } else if (insn->op == IR_PARAM) { IR_ASSERT(0 && "unexpected PARAM"); return; } } else if (insn->op == IR_VAR) { ir_use_list *use_list = &ctx->use_lists[i]; ir_ref n = use_list->count; if (n > 0) { int32_t stack_spill_pos = insn->op3 = ir_allocate_spill_slot(ctx, insn->type, &data->ra_data); ir_ref i, *p, use; ir_insn *use_insn; for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { use = *p; use_insn = &ctx->ir_base[use]; if (use_insn->op == IR_VLOAD) { if (ctx->vregs[use] && !ctx->live_intervals[ctx->vregs[use]]) { ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); memset(ival, 0, sizeof(ir_live_interval)); ctx->live_intervals[ctx->vregs[use]] = ival; ival->type = insn->type; ival->reg = IR_REG_NONE; ival->vreg = ctx->vregs[use]; ival->stack_spill_pos = stack_spill_pos; } } else if (use_insn->op == IR_VSTORE) { if (!IR_IS_CONST_REF(use_insn->op3) && ctx->vregs[use_insn->op3] && !ctx->live_intervals[ctx->vregs[use_insn->op3]]) { ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); memset(ival, 0, sizeof(ir_live_interval)); ctx->live_intervals[ctx->vregs[use_insn->op3]] = ival; ival->type = insn->type; ival->reg = IR_REG_NONE; ival->vreg = ctx->vregs[use_insn->op3]; ival->stack_spill_pos = stack_spill_pos; } } } } } insn_flags = ir_op_flags[insn->op]; n = constraints.tmps_count; if (n) { do { n--; if (constraints.tmp_regs[n].type) { ir_reg reg = ir_get_free_reg(constraints.tmp_regs[n].type, available); ir_ref *ops = insn->ops; IR_REGSET_EXCL(available, reg); if (constraints.tmp_regs[n].num > 0 && IR_IS_CONST_REF(ops[constraints.tmp_regs[n].num])) { /* rematerialization */ reg |= IR_REG_SPILL_LOAD; } ctx->regs[i][constraints.tmp_regs[n].num] = reg; } else if (constraints.tmp_regs[n].reg == IR_REG_SCRATCH) { available = IR_REGSET_DIFFERENCE(available, IR_REGSET_SCRATCH); } else { IR_REGSET_EXCL(available, constraints.tmp_regs[n].reg); } } while (n); } n = insn->inputs_count; for (j = 1, p = insn->ops + 1; j <= n; j++, p++) { ir_ref input = *p; if (IR_OPND_KIND(insn_flags, j) == IR_OPND_DATA && input > 0 && ctx->vregs[input]) { if ((def_flags & IR_DEF_REUSES_OP1_REG) && j == 1) { ir_reg reg = IR_REG_NUM(ctx->regs[i][0]); ctx->regs[i][1] = reg | IR_REG_SPILL_LOAD; } else { uint8_t use_flags = IR_USE_FLAGS(def_flags, j); ir_reg reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE; if (reg != IR_REG_NONE && IR_REGSET_IN(available, reg)) { IR_REGSET_EXCL(available, reg); ctx->regs[i][j] = reg | IR_REG_SPILL_LOAD; } else if (j > 1 && input == insn->op1 && ctx->regs[i][1] != IR_REG_NONE) { ctx->regs[i][j] = ctx->regs[i][1]; } else if (use_flags & IR_USE_MUST_BE_IN_REG) { reg = ir_get_free_reg(ctx->ir_base[input].type, available); IR_REGSET_EXCL(available, reg); ctx->regs[i][j] = reg | IR_REG_SPILL_LOAD; } } } } break; } n = ir_insn_len(insn); i += n; insn += n; rule += n; } if (bb->flags & IR_BB_DESSA_MOVES) { data->dessa_from_block = b; ir_gen_dessa_moves(ctx, b, ir_fix_dessa_tmps); } } ctx->used_preserved_regs = ctx->fixed_save_regset; ctx->flags |= IR_NO_STACK_COMBINE; ir_fix_stack_frame(ctx); } static void ir_preallocate_call_stack(ir_ctx *ctx) { int call_stack_size, peak_call_stack_size = 0; ir_ref i, n; ir_insn *insn; for (i = 1, insn = ctx->ir_base + 1; i < ctx->insns_count;) { if (insn->op == IR_CALL) { call_stack_size = ir_call_used_stack(ctx, insn); if (call_stack_size > peak_call_stack_size #ifdef IR_HAVE_FASTCALL && !ir_is_fastcall(ctx, insn) /* fast call functions restore stack pointer */ #endif ) { peak_call_stack_size = call_stack_size; } } n = ir_insn_len(insn); i += n; insn += n; } if (peak_call_stack_size) { ctx->call_stack_size = peak_call_stack_size; ctx->flags |= IR_PREALLOCATED_STACK; } } void ir_fix_stack_frame(ir_ctx *ctx) { uint32_t additional_size = 0; ctx->locals_area_size = ctx->stack_frame_size; #if defined(IR_TARGET_X64) && !defined(_WIN64) if ((ctx->flags & IR_VARARG_FUNC) && (ctx->flags2 & IR_HAS_VA_START)) { ctx->flags2 |= IR_16B_FRAME_ALIGNMENT; ctx->stack_frame_size = IR_ALIGNED_SIZE(ctx->stack_frame_size, 16); ctx->locals_area_size = ctx->stack_frame_size; if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) { additional_size += sizeof(void*) * IR_REG_INT_ARGS; } if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) { additional_size += 16 * IR_REG_FP_ARGS; } } #endif if (ctx->used_preserved_regs) { ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs; ir_reg reg; (void) reg; IR_REGSET_FOREACH(used_preserved_regs, reg) { additional_size += sizeof(void*); } IR_REGSET_FOREACH_END(); } ctx->stack_frame_size = IR_ALIGNED_SIZE(ctx->stack_frame_size, sizeof(void*)); ctx->stack_frame_size += additional_size; ctx->stack_frame_alignment = 0; ctx->call_stack_size = 0; if (ctx->flags2 & IR_16B_FRAME_ALIGNMENT) { /* Stack must be 16 byte aligned */ if (!(ctx->flags & IR_FUNCTION)) { while (IR_ALIGNED_SIZE(ctx->stack_frame_size, 16) != ctx->stack_frame_size) { ctx->stack_frame_size += sizeof(void*); ctx->stack_frame_alignment += sizeof(void*); } } else if (ctx->flags & IR_USE_FRAME_POINTER) { while (IR_ALIGNED_SIZE(ctx->stack_frame_size + sizeof(void*) * 2, 16) != ctx->stack_frame_size + sizeof(void*) * 2) { ctx->stack_frame_size += sizeof(void*); ctx->stack_frame_alignment += sizeof(void*); } } else { if (!(ctx->flags & IR_NO_STACK_COMBINE)) { ir_preallocate_call_stack(ctx); } while (IR_ALIGNED_SIZE(ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*), 16) != ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*)) { ctx->stack_frame_size += sizeof(void*); ctx->stack_frame_alignment += sizeof(void*); } } } ir_fix_param_spills(ctx); } static void* dasm_labels[ir_lb_MAX]; static uint32_t _ir_next_block(ir_ctx *ctx, uint32_t _b) { uint32_t b = ctx->cfg_schedule[++_b]; /* Check for empty ENTRY block */ while (b && ((ctx->cfg_blocks[b].flags & (IR_BB_START|IR_BB_EMPTY)) == IR_BB_EMPTY)) { b = ctx->cfg_schedule[++_b]; } return b; } void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) { uint32_t _b, b, n, target; ir_block *bb; ir_ref i; ir_insn *insn; uint32_t *rule; ir_backend_data data; dasm_State **Dst; int ret; void *entry; size_t size; data.ra_data.unused_slot_4 = 0; data.ra_data.unused_slot_2 = 0; data.ra_data.unused_slot_1 = 0; data.ra_data.handled = NULL; data.rodata_label = 0; data.jmp_table_label = 0; data.double_neg_const = 0; data.float_neg_const = 0; data.double_abs_const = 0; data.float_abs_const = 0; data.double_zero_const = 0; ctx->data = &data; if (!ctx->live_intervals) { ctx->stack_frame_size = 0; ctx->stack_frame_alignment = 0; ctx->call_stack_size = 0; ctx->used_preserved_regs = 0; ir_allocate_unique_spill_slots(ctx); } if (ctx->fixed_stack_frame_size != -1) { if (ctx->fixed_stack_red_zone) { IR_ASSERT(ctx->fixed_stack_red_zone == ctx->fixed_stack_frame_size + ctx->fixed_call_stack_size); } if (ctx->stack_frame_size > ctx->fixed_stack_frame_size) { // TODO: report error to caller #ifdef IR_DEBUG_MESSAGES fprintf(stderr, "IR Compilation Aborted: ctx->stack_frame_size > ctx->fixed_stack_frame_size at %s:%d\n", __FILE__, __LINE__); #endif ctx->data = NULL; ctx->status = IR_ERROR_FIXED_STACK_FRAME_OVERFLOW; return NULL; } ctx->stack_frame_size = ctx->fixed_stack_frame_size; ctx->call_stack_size = ctx->fixed_call_stack_size; ctx->stack_frame_alignment = 0; } Dst = &data.dasm_state; data.dasm_state = NULL; dasm_init(&data.dasm_state, DASM_MAXSECTION); dasm_setupglobal(&data.dasm_state, dasm_labels, ir_lb_MAX); dasm_setup(&data.dasm_state, dasm_actions); /* labels for each block + for each constant + rodata label + jmp_table label + for each entry */ dasm_growpc(&data.dasm_state, ctx->cfg_blocks_count + 1 + ctx->consts_count + 1 + 1 + 1 + ctx->entries_count); data.emit_constants = ir_bitset_malloc(ctx->consts_count); if ((ctx->flags & IR_GEN_ENDBR) && (ctx->flags & IR_START_BR_TARGET)) { |.if X64 | endbr64 |.else | endbr32 |.endif } if (!(ctx->flags & IR_SKIP_PROLOGUE)) { ir_emit_prologue(ctx); } if (ctx->flags & IR_FUNCTION) { ir_emit_load_params(ctx); } if (UNEXPECTED(!ctx->cfg_schedule)) { uint32_t *list = ctx->cfg_schedule = ir_mem_malloc(sizeof(uint32_t) * (ctx->cfg_blocks_count + 2)); for (b = 0; b <= ctx->cfg_blocks_count; b++) { list[b] = b; } list[ctx->cfg_blocks_count + 1] = 0; } for (_b = 1; _b <= ctx->cfg_blocks_count; _b++) { b = ctx->cfg_schedule[_b]; bb = &ctx->cfg_blocks[b]; IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); if ((bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) == IR_BB_EMPTY) { continue; } if (bb->flags & IR_BB_ALIGN_LOOP) { | .align IR_LOOP_ALIGNMENT } |=>b: i = bb->start; insn = ctx->ir_base + i; if (bb->flags & IR_BB_ENTRY) { uint32_t label = ctx->cfg_blocks_count + ctx->consts_count + 4 + insn->op3; |=>label: if ((ctx->flags & IR_GEN_ENDBR) && (ctx->flags & IR_ENTRY_BR_TARGET)) { |.if X64 | endbr64 |.else | endbr32 |.endif } ir_emit_prologue(ctx); ctx->entries[insn->op3] = i; } /* skip first instruction */ n = ir_insn_len(insn); i += n; insn += n; rule = ctx->rules + i; while (i <= bb->end) { if (!((*rule) & (IR_FUSED|IR_SKIPPED))) switch ((*rule) & IR_RULE_MASK) { case IR_VAR: case IR_PARAM: case IR_PI: case IR_PHI: case IR_SNAPSHOT: case IR_VA_END: break; case IR_LEA_OB: case IR_LEA_SI: case IR_LEA_SIB: case IR_LEA_IB: case IR_LEA_OB_I: case IR_LEA_I_OB: case IR_LEA_SI_O: case IR_LEA_SIB_O: case IR_LEA_IB_O: case IR_LEA_OB_SI: case IR_LEA_SI_OB: case IR_LEA_B_SI: case IR_LEA_SI_B: ir_emit_lea(ctx, i, insn->type); break; case IR_MUL_PWR2: case IR_DIV_PWR2: case IR_MOD_PWR2: ir_emit_mul_div_mod_pwr2(ctx, i, insn); break; case IR_SDIV_PWR2: ir_emit_sdiv_pwr2(ctx, i, insn); break; case IR_SMOD_PWR2: ir_emit_smod_pwr2(ctx, i, insn); break; case IR_SHIFT: ir_emit_shift(ctx, i, insn); break; case IR_SHIFT_CONST: ir_emit_shift_const(ctx, i, insn); break; case IR_BIT_COUNT: ir_emit_bit_count(ctx, i, insn); break; case IR_CTPOP: ir_emit_ctpop(ctx, i, insn); break; case IR_INC: case IR_DEC: case IR_OP_INT: ir_emit_op_int(ctx, i, insn, *rule); break; case IR_ABS_INT: ir_emit_abs_int(ctx, i, insn); break; case IR_BOOL_NOT_INT: ir_emit_bool_not_int(ctx, i, insn); break; case IR_OP_FP: ir_emit_op_fp(ctx, i, insn); break; case IR_IMUL3: ir_emit_imul3(ctx, i, insn); break; case IR_BINOP_INT: ir_emit_binop_int(ctx, i, insn); break; case IR_BINOP_SSE2: ir_emit_binop_sse2(ctx, i, insn); break; case IR_BINOP_AVX: ir_emit_binop_avx(ctx, i, insn); break; case IR_MUL_INT: case IR_DIV_INT: case IR_MOD_INT: ir_emit_mul_div_mod(ctx, i, insn); break; case IR_CMP_INT: ir_emit_cmp_int(ctx, i, insn); break; case IR_TESTCC_INT: ir_emit_testcc_int(ctx, i, insn); break; case IR_SETCC_INT: ir_emit_setcc_int(ctx, i, insn); break; case IR_CMP_FP: ir_emit_cmp_fp(ctx, i, insn); break; case IR_SEXT: ir_emit_sext(ctx, i, insn); break; case IR_ZEXT: ir_emit_zext(ctx, i, insn); break; case IR_TRUNC: ir_emit_trunc(ctx, i, insn); break; case IR_BITCAST: case IR_PROTO: ir_emit_bitcast(ctx, i, insn); break; case IR_INT2FP: ir_emit_int2fp(ctx, i, insn); break; case IR_FP2INT: ir_emit_fp2int(ctx, i, insn); break; case IR_FP2FP: ir_emit_fp2fp(ctx, i, insn); break; case IR_COPY_INT: ir_emit_copy_int(ctx, i, insn); break; case IR_COPY_FP: ir_emit_copy_fp(ctx, i, insn); break; case IR_CMP_AND_STORE_INT: ir_emit_cmp_and_store_int(ctx, i, insn); break; case IR_CMP_AND_BRANCH_INT: ir_emit_cmp_and_branch_int(ctx, b, i, insn, _ir_next_block(ctx, _b)); break; case IR_CMP_AND_BRANCH_FP: ir_emit_cmp_and_branch_fp(ctx, b, i, insn, _ir_next_block(ctx, _b)); break; case IR_TEST_AND_BRANCH_INT: ir_emit_test_and_branch_int(ctx, b, i, insn, _ir_next_block(ctx, _b)); break; case IR_JCC_INT: { ir_op op = ctx->ir_base[insn->op2].op; if (op == IR_ADD || op == IR_SUB || // op == IR_MUL || op == IR_OR || op == IR_AND || op == IR_XOR) { op = IR_NE; } else { IR_ASSERT(op >= IR_EQ && op <= IR_UGT); } ir_emit_jcc(ctx, b, i, insn, _ir_next_block(ctx, _b), op, 1); } break; case IR_GUARD_CMP_INT: if (ir_emit_guard_cmp_int(ctx, b, i, insn, _ir_next_block(ctx, _b))) { goto next_block; } break; case IR_GUARD_CMP_FP: if (ir_emit_guard_cmp_fp(ctx, b, i, insn, _ir_next_block(ctx, _b))) { goto next_block; } break; case IR_GUARD_TEST_INT: if (ir_emit_guard_test_int(ctx, b, i, insn, _ir_next_block(ctx, _b))) { goto next_block; } break; case IR_GUARD_JCC_INT: if (ir_emit_guard_jcc_int(ctx, b, i, insn, _ir_next_block(ctx, _b))) { goto next_block; } break; case IR_IF_INT: ir_emit_if_int(ctx, b, i, insn, _ir_next_block(ctx, _b)); break; case IR_COND: ir_emit_cond(ctx, i, insn); break; case IR_COND_CMP_INT: ir_emit_cond_cmp_int(ctx, i, insn); break; case IR_COND_CMP_FP: ir_emit_cond_cmp_fp(ctx, i, insn); break; case IR_SWITCH: ir_emit_switch(ctx, b, i, insn); break; case IR_MIN_MAX_INT: ir_emit_min_max_int(ctx, i, insn); break; case IR_OVERFLOW: ir_emit_overflow(ctx, i, insn); break; case IR_OVERFLOW_AND_BRANCH: ir_emit_overflow_and_branch(ctx, b, i, insn, _ir_next_block(ctx, _b)); break; case IR_END: case IR_LOOP_END: if (bb->flags & IR_BB_OSR_ENTRY_LOADS) { ir_emit_osr_entry_loads(ctx, b, bb); } if (bb->flags & IR_BB_DESSA_MOVES) { ir_emit_dessa_moves(ctx, b, bb); } do { ir_ref succ = ctx->cfg_edges[bb->successors]; if (UNEXPECTED(bb->successors_count == 2)) { if (ctx->cfg_blocks[succ].flags & IR_BB_ENTRY) { succ = ctx->cfg_edges[bb->successors + 1]; } else { IR_ASSERT(ctx->cfg_blocks[ctx->cfg_edges[bb->successors + 1]].flags & IR_BB_ENTRY); } } else { IR_ASSERT(bb->successors_count == 1); } target = ir_skip_empty_target_blocks(ctx, succ); if (target != _ir_next_block(ctx, _b)) { | jmp =>target } } while (0); break; case IR_RETURN_VOID: ir_emit_return_void(ctx); break; case IR_RETURN_INT: ir_emit_return_int(ctx, i, insn); break; case IR_RETURN_FP: ir_emit_return_fp(ctx, i, insn); break; case IR_CALL: ir_emit_call(ctx, i, insn); break; case IR_TAILCALL: ir_emit_tailcall(ctx, i, insn); break; case IR_IJMP: ir_emit_ijmp(ctx, i, insn); break; case IR_MEM_OP_INT: case IR_MEM_INC: case IR_MEM_DEC: ir_emit_mem_op_int(ctx, i, insn, *rule); break; case IR_MEM_BINOP_INT: ir_emit_mem_binop_int(ctx, i, insn); break; case IR_MEM_MUL_PWR2: case IR_MEM_DIV_PWR2: case IR_MEM_MOD_PWR2: ir_emit_mem_mul_div_mod_pwr2(ctx, i, insn); break; case IR_MEM_SHIFT: ir_emit_mem_shift(ctx, i, insn); break; case IR_MEM_SHIFT_CONST: ir_emit_mem_shift_const(ctx, i, insn); break; case IR_REG_BINOP_INT: ir_emit_reg_binop_int(ctx, i, insn); break; case IR_VADDR: ir_emit_vaddr(ctx, i, insn); break; case IR_VLOAD: ir_emit_vload(ctx, i, insn); break; case IR_VSTORE_INT: ir_emit_vstore_int(ctx, i, insn); break; case IR_VSTORE_FP: ir_emit_vstore_fp(ctx, i, insn); break; case IR_RLOAD: ir_emit_rload(ctx, i, insn); break; case IR_RSTORE: ir_emit_rstore(ctx, i, insn); break; case IR_LOAD_INT: ir_emit_load_int(ctx, i, insn); break; case IR_LOAD_FP: ir_emit_load_fp(ctx, i, insn); break; case IR_STORE_INT: ir_emit_store_int(ctx, i, insn); break; case IR_STORE_FP: ir_emit_store_fp(ctx, i, insn); break; case IR_ALLOCA: ir_emit_alloca(ctx, i, insn); break; case IR_VA_START: ir_emit_va_start(ctx, i, insn); break; case IR_VA_COPY: ir_emit_va_copy(ctx, i, insn); break; case IR_VA_ARG: ir_emit_va_arg(ctx, i, insn); break; case IR_AFREE: ir_emit_afree(ctx, i, insn); break; case IR_BLOCK_BEGIN: ir_emit_block_begin(ctx, i, insn); break; case IR_BLOCK_END: ir_emit_block_end(ctx, i, insn); break; case IR_FRAME_ADDR: ir_emit_frame_addr(ctx, i); break; case IR_EXITCALL: ir_emit_exitcall(ctx, i, insn); break; case IR_GUARD: case IR_GUARD_NOT: if (ir_emit_guard(ctx, b, i, insn, _ir_next_block(ctx, _b))) { goto next_block; } break; case IR_GUARD_OVERFLOW: if (ir_emit_guard_overflow(ctx, b, i, insn)) { goto next_block; } break; case IR_SSE_SQRT: ir_emit_sse_sqrt(ctx, i, insn); break; case IR_SSE_RINT: ir_emit_sse_round(ctx, i, insn, 4); break; case IR_SSE_FLOOR: ir_emit_sse_round(ctx, i, insn, 9); break; case IR_SSE_CEIL: ir_emit_sse_round(ctx, i, insn, 10); break; case IR_SSE_TRUNC: ir_emit_sse_round(ctx, i, insn, 11); break; case IR_SSE_NEARBYINT: ir_emit_sse_round(ctx, i, insn, 12); break; case IR_TLS: ir_emit_tls(ctx, i, insn); break; case IR_TRAP: | int3 break; default: IR_ASSERT(0 && "NIY rule/instruction"); ir_mem_free(data.emit_constants); dasm_free(&data.dasm_state); ctx->data = NULL; ctx->status = IR_ERROR_UNSUPPORTED_CODE_RULE; return NULL; } n = ir_insn_len(insn); i += n; insn += n; rule += n; } next_block:; } if (data.rodata_label) { |.rodata } IR_BITSET_FOREACH(data.emit_constants, ir_bitset_len(ctx->consts_count), i) { insn = &ctx->ir_base[-i]; if (IR_IS_TYPE_FP(insn->type)) { int label = ctx->cfg_blocks_count + i; if (!data.rodata_label) { data.rodata_label = ctx->cfg_blocks_count + ctx->consts_count + 2; |.rodata |=>data.rodata_label: } if (insn->type == IR_DOUBLE) { |.align 8 |=>label: |.dword insn->val.u32, insn->val.u32_hi } else { IR_ASSERT(insn->type == IR_FLOAT); |.align 4 |=>label: |.dword insn->val.u32 } } else if (insn->op == IR_STR) { int label = ctx->cfg_blocks_count + i; const char *str = ir_get_str(ctx, insn->val.str); int i = 0; if (!data.rodata_label) { data.rodata_label = ctx->cfg_blocks_count + ctx->consts_count + 2; |.rodata |=>data.rodata_label: } |.align 8 |=>label: while (str[i]) { char c = str[i]; |.byte c i++; } |.byte 0 } else { IR_ASSERT(0); } } IR_BITSET_FOREACH_END(); if (data.rodata_label) { |.code } ir_mem_free(data.emit_constants); if (ctx->status) { dasm_free(&data.dasm_state); ctx->data = NULL; return NULL; } ret = dasm_link(&data.dasm_state, size_ptr); if (ret != DASM_S_OK) { IR_ASSERT(0); dasm_free(&data.dasm_state); ctx->data = NULL; ctx->status = IR_ERROR_LINK; return NULL; } size = *size_ptr; if (ctx->code_buffer) { entry = ctx->code_buffer->pos; entry = (void*)IR_ALIGNED_SIZE(((size_t)(entry)), 16); if (size > (size_t)((char*)ctx->code_buffer->end - (char*)entry)) { ctx->data = NULL; ctx->status = IR_ERROR_CODE_MEM_OVERFLOW; return NULL; } ctx->code_buffer->pos = (char*)entry + size; } else { entry = ir_mem_mmap(size); if (!entry) { dasm_free(&data.dasm_state); ctx->data = NULL; ctx->status = IR_ERROR_CODE_MEM_OVERFLOW; return NULL; } ir_mem_unprotect(entry, size); } ret = dasm_encode(&data.dasm_state, entry); if (ret != DASM_S_OK) { IR_ASSERT(0); dasm_free(&data.dasm_state); if (ctx->code_buffer) { if (ctx->code_buffer->pos == (char*)entry + size) { /* rollback */ ctx->code_buffer->pos = (char*)entry - size; } } else { ir_mem_unmap(entry, size); } ctx->data = NULL; ctx->status = IR_ERROR_ENCODE; return NULL; } if (data.jmp_table_label) { uint32_t offset = dasm_getpclabel(&data.dasm_state, data.jmp_table_label); ctx->jmp_table_offset = offset; } else { ctx->jmp_table_offset = 0; } if (data.rodata_label) { uint32_t offset = dasm_getpclabel(&data.dasm_state, data.rodata_label); ctx->rodata_offset = offset; } else { ctx->rodata_offset = 0; } if (ctx->entries_count) { /* For all entries */ i = ctx->entries_count; do { ir_insn *insn = &ctx->ir_base[ctx->entries[--i]]; uint32_t offset = dasm_getpclabel(&data.dasm_state, ctx->cfg_blocks_count + ctx->consts_count + 4 + insn->op3); insn->op3 = offset; } while (i != 0); } dasm_free(&data.dasm_state); ir_mem_flush(entry, size); #if defined(__GNUC__) if ((ctx->flags & IR_GEN_CACHE_DEMOTE) && (ctx->mflags & IR_X86_CLDEMOTE)) { uintptr_t start = (uintptr_t)entry; uintptr_t p = (uintptr_t)start & ~0x3F; do { /* _cldemote(p); */ asm volatile(".byte 0x0f, 0x1c, 0x06" :: "S" (p)); p += 64; } while (p < start + size); } #endif if (!ctx->code_buffer) { ir_mem_protect(entry, size); } ctx->data = NULL; return entry; } const void *ir_emit_exitgroup(uint32_t first_exit_point, uint32_t exit_points_per_group, const void *exit_addr, ir_code_buffer *code_buffer, size_t *size_ptr) { void *entry; size_t size; uint32_t i; dasm_State **Dst, *dasm_state; int ret; IR_ASSERT(code_buffer); IR_ASSERT(sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(code_buffer, exit_addr)); Dst = &dasm_state; dasm_state = NULL; dasm_init(&dasm_state, DASM_MAXSECTION); dasm_setupglobal(&dasm_state, dasm_labels, ir_lb_MAX); dasm_setup(&dasm_state, dasm_actions); for (i = 0; i < exit_points_per_group - 1; i++) { | push byte i | .byte 0xeb, (4*(exit_points_per_group-i)-6) // jmp >1 } | push byte i |// 1: | add aword [r4], first_exit_point | jmp aword &exit_addr ret = dasm_link(&dasm_state, &size); if (ret != DASM_S_OK) { IR_ASSERT(0); dasm_free(&dasm_state); return NULL; } entry = code_buffer->pos; entry = (void*)IR_ALIGNED_SIZE(((size_t)(entry)), 16); if (size > (size_t)((char*)code_buffer->end - (char*)entry)) { return NULL; } code_buffer->pos = (char*)entry + size; ret = dasm_encode(&dasm_state, entry); if (ret != DASM_S_OK) { IR_ASSERT(0); dasm_free(&dasm_state); if (code_buffer->pos == (char*)entry + size) { /* rollback */ code_buffer->pos = (char*)entry - size; } return NULL; } dasm_free(&dasm_state); ir_mem_flush(entry, size); *size_ptr = size; return entry; } bool ir_needs_thunk(ir_code_buffer *code_buffer, void *addr) { return sizeof(void*) == 8 && !IR_MAY_USE_32BIT_ADDR(code_buffer, addr); } void *ir_emit_thunk(ir_code_buffer *code_buffer, void *addr, size_t *size_ptr) { void *entry; size_t size; dasm_State **Dst, *dasm_state; int ret; Dst = &dasm_state; dasm_state = NULL; dasm_init(&dasm_state, DASM_MAXSECTION); dasm_setupglobal(&dasm_state, dasm_labels, ir_lb_MAX); dasm_setup(&dasm_state, dasm_actions); |.code |.if X64 | jmp aword [>1] |1: | .aword &addr |.else | jmp &addr |.endif ret = dasm_link(&dasm_state, &size); if (ret != DASM_S_OK) { IR_ASSERT(0); dasm_free(&dasm_state); return NULL; } if (size > (size_t)((char*)code_buffer->end - (char*)code_buffer->pos)) { dasm_free(&dasm_state); return NULL; } entry = code_buffer->pos; ret = dasm_encode(&dasm_state, entry); if (ret != DASM_S_OK) { dasm_free(&dasm_state); return NULL; } *size_ptr = size; code_buffer->pos = (char*)code_buffer->pos + size; dasm_free(&dasm_state); ir_mem_flush(entry, size); return entry; } void ir_fix_thunk(void *thunk_entry, void *addr) { unsigned char *code = thunk_entry; if (sizeof(void*) == 8 && !IR_IS_SIGNED_32BIT(((unsigned char*)addr - (code + 5)))) { int32_t *offset_ptr; void **addr_ptr; IR_ASSERT(code[0] == 0xff && code[1] == 0x25); offset_ptr = (int32_t*)(code + 2); addr_ptr = (void**)(code + 6 + *offset_ptr); *addr_ptr = addr; } else { int32_t *addr_ptr; code[0] = 0xe9; addr_ptr = (int32_t*)(code + 1); *addr_ptr = (int32_t)(intptr_t)(void*)((unsigned char*)addr - (code + 5)); } }