1/* 2 * IR - Lightweight JIT Compilation Framework 3 * (x86/x86_64 native code generator based on DynAsm) 4 * Copyright (C) 2022 Zend by Perforce. 5 * Authors: Dmitry Stogov <dmitry@php.net> 6 */ 7 8|.if X64 9|.arch x64 10|.else 11|.arch x86 12|.endif 13 14|.actionlist dasm_actions 15|.globals ir_lb 16|.section code, cold_code, rodata, jmp_table 17 18|.define IR_LOOP_ALIGNMENT, 16 19 20#ifdef IR_DEBUG 21typedef struct _ir_mem {uint64_t v;} ir_mem; 22 23# define IR_MEM_VAL(loc) ((loc).v) 24#else 25typedef uint64_t ir_mem; 26 27# define IR_MEM_VAL(loc) (loc) 28#endif 29 30#define IR_MEM_OFFSET(loc) ((int32_t)(IR_MEM_VAL(loc) & 0xffffffff)) 31#define IR_MEM_BASE(loc) ((ir_reg)((IR_MEM_VAL(loc) >> 32) & 0xff)) 32#define IR_MEM_INDEX(loc) ((ir_reg)((IR_MEM_VAL(loc) >> 40) & 0xff)) 33#define IR_MEM_SCALE(loc) ((int32_t)((IR_MEM_VAL(loc) >> 48) & 0xff)) 34 35#define IR_MEM_O(addr) IR_MEM(IR_REG_NONE, addr, IR_REG_NONE, 1) 36#define IR_MEM_B(base) IR_MEM(base, 0, IR_REG_NONE, 1) 37#define IR_MEM_BO(base, offset) IR_MEM(base, offset, IR_REG_NONE, 1) 38 39IR_ALWAYS_INLINE ir_mem IR_MEM(ir_reg base, int32_t offset, ir_reg index, int32_t scale) 40{ 41 ir_mem mem; 42 IR_ASSERT(base == IR_REG_NONE || (base >= IR_REG_GP_FIRST && base <= IR_REG_GP_LAST)); 43 IR_ASSERT(index == IR_REG_NONE || (index >= IR_REG_GP_FIRST && index <= IR_REG_GP_LAST)); 44 IR_ASSERT(scale == 1 || scale == 2 || scale == 4 || scale == 8); 45#ifdef IR_DEBUG 46 mem.v = 47#else 48 mem = 49#endif 50 ((uint64_t)(uint32_t)offset | 51 ((uint64_t)(uint8_t)base << 32) | 52 ((uint64_t)(uint8_t)index << 40) | 53 ((uint64_t)(uint8_t)scale << 48)); 54 return mem; 55} 56 57#define IR_IS_SIGNED_32BIT(val) ((((intptr_t)(val)) <= 0x7fffffff) && (((intptr_t)(val)) >= (-2147483647 - 1))) 58#define IR_IS_SIGNED_NEG_32BIT(val) ((((intptr_t)(val)) <= 0x7fffffff) && (((intptr_t)(val)) >= -2147483647)) 59#define IR_IS_UNSIGNED_32BIT(val) (((uintptr_t)(val)) <= 0xffffffff) 60#define IR_IS_32BIT(type, val) (IR_IS_TYPE_SIGNED(type) ? IR_IS_SIGNED_32BIT((val).i64) : IR_IS_UNSIGNED_32BIT((val).u64)) 61#define IR_IS_FP_ZERO(insn) ((insn.type == IR_DOUBLE) ? (insn.val.u64 == 0) : (insn.val.u32 == 0)) 62#define IR_MAY_USE_32BIT_ADDR(code_buffer, addr) \ 63 ((code_buffer) && \ 64 IR_IS_SIGNED_32BIT((char*)(addr) - (char*)(code_buffer)->start) && \ 65 IR_IS_SIGNED_32BIT((char*)(addr) - ((char*)(code_buffer)->end))) 66 67#define IR_SPILL_POS_TO_OFFSET(offset) \ 68 ((ctx->flags & IR_USE_FRAME_POINTER) ? \ 69 ((offset) - (ctx->stack_frame_size - ctx->stack_frame_alignment)) : \ 70 ((offset) + ctx->call_stack_size)) 71 72|.macro ASM_EXPAND_OP_MEM, MACRO, op, type, op1 73|| do { 74|| int32_t offset = IR_MEM_OFFSET(op1); 75|| int32_t base = IR_MEM_BASE(op1); 76|| int32_t index = IR_MEM_INDEX(op1); 77|| int32_t scale = IR_MEM_SCALE(op1); 78|| if (index == IR_REG_NONE) { 79|| if (base == IR_REG_NONE) { 80| MACRO op, type, [offset] 81|| } else { 82| MACRO op, type, [Ra(base)+offset] 83|| } 84|| } else if (scale == 8) { 85|| if (base == IR_REG_NONE) { 86| MACRO op, type, [Ra(index)*8+offset] 87|| } else { 88| MACRO op, type, [Ra(base)+Ra(index)*8+offset] 89|| } 90|| } else if (scale == 4) { 91|| if (base == IR_REG_NONE) { 92| MACRO op, type, [Ra(index)*4+offset] 93|| } else { 94| MACRO op, type, [Ra(base)+Ra(index)*4+offset] 95|| } 96|| } else if (scale == 2) { 97|| if (base == IR_REG_NONE) { 98| MACRO op, type, [Ra(index)*2+offset] 99|| } else { 100| MACRO op, type, [Ra(base)+Ra(index)*2+offset] 101|| } 102|| } else { 103|| IR_ASSERT(scale == 1); 104|| if (base == IR_REG_NONE) { 105| MACRO op, type, [Ra(index)+offset] 106|| } else { 107| MACRO op, type, [Ra(base)+Ra(index)+offset] 108|| } 109|| } 110|| } while (0); 111|.endmacro 112 113|.macro ASM_EXPAND_OP1_MEM, MACRO, op, type, op1, op2 114|| do { 115|| int32_t offset = IR_MEM_OFFSET(op1); 116|| int32_t base = IR_MEM_BASE(op1); 117|| int32_t index = IR_MEM_INDEX(op1); 118|| int32_t scale = IR_MEM_SCALE(op1); 119|| if (index == IR_REG_NONE) { 120|| if (base == IR_REG_NONE) { 121| MACRO op, type, [offset], op2 122|| } else { 123| MACRO op, type, [Ra(base)+offset], op2 124|| } 125|| } else if (scale == 8) { 126|| if (base == IR_REG_NONE) { 127| MACRO op, type, [Ra(index)*8+offset], op2 128|| } else { 129| MACRO op, type, [Ra(base)+Ra(index)*8+offset], op2 130|| } 131|| } else if (scale == 4) { 132|| if (base == IR_REG_NONE) { 133| MACRO op, type, [Ra(index)*4+offset], op2 134|| } else { 135| MACRO op, type, [Ra(base)+Ra(index)*4+offset], op2 136|| } 137|| } else if (scale == 2) { 138|| if (base == IR_REG_NONE) { 139| MACRO op, type, [Ra(index)*2+offset], op2 140|| } else { 141| MACRO op, type, [Ra(base)+Ra(index)*2+offset], op2 142|| } 143|| } else { 144|| IR_ASSERT(scale == 1); 145|| if (base == IR_REG_NONE) { 146| MACRO op, type, [Ra(index)+offset], op2 147|| } else { 148| MACRO op, type, [Ra(base)+Ra(index)+offset], op2 149|| } 150|| } 151|| } while (0); 152|.endmacro 153 154|.macro ASM_EXPAND_OP2_MEM, MACRO, op, type, op1, op2 155|| do { 156|| int32_t offset = IR_MEM_OFFSET(op2); 157|| int32_t base = IR_MEM_BASE(op2); 158|| int32_t index = IR_MEM_INDEX(op2); 159|| int32_t scale = IR_MEM_SCALE(op2); 160|| if (index == IR_REG_NONE) { 161|| if (base == IR_REG_NONE) { 162| MACRO op, type, op1, [offset] 163|| } else { 164| MACRO op, type, op1, [Ra(base)+offset] 165|| } 166|| } else if (scale == 8) { 167|| if (base == IR_REG_NONE) { 168| MACRO op, type, op1, [Ra(index)*8+offset] 169|| } else { 170| MACRO op, type, op1, [Ra(base)+Ra(index)*8+offset] 171|| } 172|| } else if (scale == 4) { 173|| if (base == IR_REG_NONE) { 174| MACRO op, type, op1, [Ra(index)*4+offset] 175|| } else { 176| MACRO op, type, op1, [Ra(base)+Ra(index)*4+offset] 177|| } 178|| } else if (scale == 2) { 179|| if (base == IR_REG_NONE) { 180| MACRO op, type, op1, [Ra(index)*2+offset] 181|| } else { 182| MACRO op, type, op1, [Ra(base)+Ra(index)*2+offset] 183|| } 184|| } else { 185|| IR_ASSERT(scale == 1); 186|| if (base == IR_REG_NONE) { 187| MACRO op, type, op1, [Ra(index)+offset] 188|| } else { 189| MACRO op, type, op1, [Ra(base)+Ra(index)+offset] 190|| } 191|| } 192|| } while (0); 193|.endmacro 194 195|.macro ASM_EXPAND_OP2_MEM_3, MACRO, op, type, op1, op2, op3 196|| do { 197|| int32_t offset = IR_MEM_OFFSET(op2); 198|| int32_t base = IR_MEM_BASE(op2); 199|| int32_t index = IR_MEM_INDEX(op2); 200|| int32_t scale = IR_MEM_SCALE(op2); 201|| if (index == IR_REG_NONE) { 202|| if (base == IR_REG_NONE) { 203| MACRO op, type, op1, [offset], op3 204|| } else { 205| MACRO op, type, op1, [Ra(base)+offset], op3 206|| } 207|| } else if (scale == 8) { 208|| if (base == IR_REG_NONE) { 209| MACRO op, type, op1, [Ra(index)*8+offset], op3 210|| } else { 211| MACRO op, type, op1, [Ra(base)+Ra(index)*8+offset], op3 212|| } 213|| } else if (scale == 4) { 214|| if (base == IR_REG_NONE) { 215| MACRO op, type, op1, [Ra(index)*4+offset], op3 216|| } else { 217| MACRO op, type, op1, [Ra(base)+Ra(index)*4+offset], op3 218|| } 219|| } else if (scale == 2) { 220|| if (base == IR_REG_NONE) { 221| MACRO op, type, op1, [Ra(index)*2+offset], op3 222|| } else { 223| MACRO op, type, op1, [Ra(base)+Ra(index)*2+offset], op3 224|| } 225|| } else { 226|| IR_ASSERT(scale == 1); 227|| if (base == IR_REG_NONE) { 228| MACRO op, type, op1, [Ra(index)+offset], op3 229|| } else { 230| MACRO op, type, op1, [Ra(base)+Ra(index)+offset], op3 231|| } 232|| } 233|| } while (0); 234|.endmacro 235 236|.macro ASM_EXPAND_OP3_MEM, MACRO, op, type, op1, op2, op3 237|| do { 238|| int32_t offset = IR_MEM_OFFSET(op3); 239|| int32_t base = IR_MEM_BASE(op3); 240|| int32_t index = IR_MEM_INDEX(op3); 241|| int32_t scale = IR_MEM_SCALE(op3); 242|| if (index == IR_REG_NONE) { 243|| if (base == IR_REG_NONE) { 244| MACRO op, type, op1, op2, [offset] 245|| } else { 246| MACRO op, type, op1, op2, [Ra(base)+offset] 247|| } 248|| } else if (scale == 8) { 249|| if (base == IR_REG_NONE) { 250| MACRO op, type, op1, op2, [Ra(index)*8+offset] 251|| } else { 252| MACRO op, type, op1, op2, [Ra(base)+Ra(index)*8+offset] 253|| } 254|| } else if (scale == 4) { 255|| if (base == IR_REG_NONE) { 256| MACRO op, type, op1, op2, [Ra(index)*4+offset] 257|| } else { 258| MACRO op, type, op1, op2, [Ra(base)+Ra(index)*4+offset] 259|| } 260|| } else if (scale == 2) { 261|| if (base == IR_REG_NONE) { 262| MACRO op, type, op1, op2, [Ra(index)*2+offset] 263|| } else { 264| MACRO op, type, op1, op2, [Ra(base)+Ra(index)*2+offset] 265|| } 266|| } else { 267|| IR_ASSERT(scale == 1); 268|| if (base == IR_REG_NONE) { 269| MACRO op, type, op1, op2, [Ra(index)+offset] 270|| } else { 271| MACRO op, type, op1, op2, [Ra(base)+Ra(index)+offset] 272|| } 273|| } 274|| } while (0); 275|.endmacro 276 277|.macro ASM_EXPAND_TYPE_MEM, op, type, op1 278|| switch (ir_type_size[type]) { 279|| default: 280|| IR_ASSERT(0); 281|| case 1: 282| op byte op1 283|| break; 284|| case 2: 285| op word op1 286|| break; 287|| case 4: 288| op dword op1 289|| break; 290|.if X64 291|| case 8: 292| op qword op1 293|| break; 294|.endif 295|| } 296|.endmacro 297 298|.macro ASM_EXPAND_TYPE_MEM_REG, op, type, op1, op2 299|| switch (ir_type_size[type]) { 300|| default: 301|| IR_ASSERT(0); 302|| case 1: 303| op byte op1, Rb(op2) 304|| break; 305|| case 2: 306| op word op1, Rw(op2) 307|| break; 308|| case 4: 309| op dword op1, Rd(op2) 310|| break; 311|.if X64 312|| case 8: 313| op qword op1, Rq(op2) 314|| break; 315|.endif 316|| } 317|.endmacro 318 319|.macro ASM_EXPAND_TYPE_MEM_TXT, op, type, op1, op2 320|| switch (ir_type_size[type]) { 321|| default: 322|| IR_ASSERT(0); 323|| case 1: 324| op byte op1, op2 325|| break; 326|| case 2: 327| op word op1, op2 328|| break; 329|| case 4: 330| op dword op1, op2 331|| break; 332|.if X64 333|| case 8: 334| op qword op1, op2 335|| break; 336|.endif 337|| } 338|.endmacro 339 340|.macro ASM_EXPAND_TYPE_MEM_IMM, op, type, op1, op2 341|| switch (ir_type_size[type]) { 342|| default: 343|| IR_ASSERT(0); 344|| case 1: 345| op byte op1, (op2 & 0xff) 346|| break; 347|| case 2: 348| op word op1, (op2 & 0xffff) 349|| break; 350|| case 4: 351| op dword op1, op2 352|| break; 353|.if X64 354|| case 8: 355| op qword op1, op2 356|| break; 357|.endif 358|| } 359|.endmacro 360 361|.macro ASM_EXPAND_TYPE_REG_MEM, op, type, op1, op2 362|| switch (ir_type_size[type]) { 363|| default: 364|| IR_ASSERT(0); 365|| case 1: 366| op Rb(op1), byte op2 367|| break; 368|| case 2: 369| op Rw(op1), word op2 370|| break; 371|| case 4: 372| op Rd(op1), dword op2 373|| break; 374|.if X64 375|| case 8: 376| op Rq(op1), qword op2 377|| break; 378|.endif 379|| } 380|.endmacro 381 382|.macro ASM_TMEM_OP, op, type, op1 383|| do { 384|| int32_t offset = IR_MEM_OFFSET(op1); 385|| int32_t base = IR_MEM_BASE(op1); 386|| int32_t index = IR_MEM_INDEX(op1); 387|| int32_t scale = IR_MEM_SCALE(op1); 388|| if (index == IR_REG_NONE) { 389|| if (base == IR_REG_NONE) { 390| op type [offset] 391|| } else { 392| op type [Ra(base)+offset] 393|| } 394|| } else if (scale == 8) { 395|| if (base == IR_REG_NONE) { 396| op type [Ra(index)*8+offset] 397|| } else { 398| op type [Ra(base)+Ra(index)*8+offset] 399|| } 400|| } else if (scale == 4) { 401|| if (base == IR_REG_NONE) { 402| op type [Ra(index)*4+offset] 403|| } else { 404| op type [Ra(base)+Ra(index)*4+offset] 405|| } 406|| } else if (scale == 2) { 407|| if (base == IR_REG_NONE) { 408| op type [Ra(index)*2+offset] 409|| } else { 410| op type [Ra(base)+Ra(index)*2+offset] 411|| } 412|| } else { 413|| IR_ASSERT(scale == 1); 414|| if (base == IR_REG_NONE) { 415| op type [Ra(index)+offset] 416|| } else { 417| op type [Ra(base)+Ra(index)+offset] 418|| } 419|| } 420|| } while (0); 421|.endmacro 422 423|.macro ASM_TXT_TMEM_OP, op, op1, type, op2 424|| do { 425|| int32_t offset = IR_MEM_OFFSET(op2); 426|| int32_t base = IR_MEM_BASE(op2); 427|| int32_t index = IR_MEM_INDEX(op2); 428|| int32_t scale = IR_MEM_SCALE(op2); 429|| if (index == IR_REG_NONE) { 430|| if (base == IR_REG_NONE) { 431| op op1, type [offset] 432|| } else { 433| op op1, type [Ra(base)+offset] 434|| } 435|| } else if (scale == 8) { 436|| if (base == IR_REG_NONE) { 437| op op1, type [Ra(index)*8+offset] 438|| } else { 439| op op1, type [Ra(base)+Ra(index)*8+offset] 440|| } 441|| } else if (scale == 4) { 442|| if (base == IR_REG_NONE) { 443| op op1, type [Ra(index)*4+offset] 444|| } else { 445| op op1, type [Ra(base)+Ra(index)*4+offset] 446|| } 447|| } else if (scale == 2) { 448|| if (base == IR_REG_NONE) { 449| op op1, type [Ra(index)*2+offset] 450|| } else { 451| op op1, type [Ra(base)+Ra(index)*2+offset] 452|| } 453|| } else { 454|| IR_ASSERT(scale == 1); 455|| if (base == IR_REG_NONE) { 456| op op1, type [Ra(index)+offset] 457|| } else { 458| op op1, type [Ra(base)+Ra(index)+offset] 459|| } 460|| } 461|| } while (0); 462|.endmacro 463 464|.macro ASM_TMEM_TXT_OP, op, type, op1, op2 465|| do { 466|| int32_t offset = IR_MEM_OFFSET(op1); 467|| int32_t base = IR_MEM_BASE(op1); 468|| int32_t index = IR_MEM_INDEX(op1); 469|| int32_t scale = IR_MEM_SCALE(op1); 470|| if (index == IR_REG_NONE) { 471|| if (base == IR_REG_NONE) { 472| op type [offset], op2 473|| } else { 474| op type [Ra(base)+offset], op2 475|| } 476|| } else if (scale == 8) { 477|| if (base == IR_REG_NONE) { 478| op type [Ra(index)*8+offset], op2 479|| } else { 480| op type [Ra(base)+Ra(index)*8+offset], op2 481|| } 482|| } else if (scale == 4) { 483|| if (base == IR_REG_NONE) { 484| op type [Ra(index)*4+offset], op2 485|| } else { 486| op type [Ra(base)+Ra(index)*4+offset], op2 487|| } 488|| } else if (scale == 2) { 489|| if (base == IR_REG_NONE) { 490| op type [Ra(index)*2+offset], op2 491|| } else { 492| op type [Ra(base)+Ra(index)*2+offset], op2 493|| } 494|| } else { 495|| IR_ASSERT(scale == 1); 496|| if (base == IR_REG_NONE) { 497| op type [Ra(index)+offset], op2 498|| } else { 499| op type [Ra(base)+Ra(index)+offset], op2 500|| } 501|| } 502|| } while (0); 503|.endmacro 504 505|.macro ASM_TXT_TXT_TMEM_OP, op, op1, op2, type, op3 506|| do { 507|| int32_t offset = IR_MEM_OFFSET(op3); 508|| int32_t base = IR_MEM_BASE(op3); 509|| int32_t index = IR_MEM_INDEX(op3); 510|| int32_t scale = IR_MEM_SCALE(op3); 511|| if (index == IR_REG_NONE) { 512|| if (base == IR_REG_NONE) { 513| op op1, op2, type [offset] 514|| } else { 515| op op1, op2, type [Ra(base)+offset] 516|| } 517|| } else if (scale == 8) { 518|| if (base == IR_REG_NONE) { 519| op op1, op2, type [Ra(index)*8+offset] 520|| } else { 521| op op1, op2, type [Ra(base)+Ra(index)*8+offset] 522|| } 523|| } else if (scale == 4) { 524|| if (base == IR_REG_NONE) { 525| op op1, op2, type [Ra(index)*4+offset] 526|| } else { 527| op op1, op2, type [Ra(base)+Ra(index)*4+offset] 528|| } 529|| } else if (scale == 2) { 530|| if (base == IR_REG_NONE) { 531| op op1, op2, type [Ra(index)*2+offset] 532|| } else { 533| op op1, op2, type [Ra(base)+Ra(index)*2+offset] 534|| } 535|| } else { 536|| IR_ASSERT(scale == 1); 537|| if (base == IR_REG_NONE) { 538| op op1, op2, type [Ra(index)+offset] 539|| } else { 540| op op1, op2, type [Ra(base)+Ra(index)+offset] 541|| } 542|| } 543|| } while (0); 544|.endmacro 545 546|.macro ASM_REG_OP, op, type, op1 547|| switch (ir_type_size[type]) { 548|| default: 549|| IR_ASSERT(0); 550|| case 1: 551| op Rb(op1) 552|| break; 553|| case 2: 554| op Rw(op1) 555|| break; 556|| case 4: 557| op Rd(op1) 558|| break; 559|.if X64 560|| case 8: 561| op Rq(op1) 562|| break; 563|.endif 564|| } 565|.endmacro 566 567|.macro ASM_MEM_OP, op, type, op1 568| ASM_EXPAND_OP_MEM ASM_EXPAND_TYPE_MEM, op, type, op1 569|.endmacro 570 571|.macro ASM_REG_REG_OP, op, type, op1, op2 572|| switch (ir_type_size[type]) { 573|| default: 574|| IR_ASSERT(0); 575|| case 1: 576| op Rb(op1), Rb(op2) 577|| break; 578|| case 2: 579| op Rw(op1), Rw(op2) 580|| break; 581|| case 4: 582| op Rd(op1), Rd(op2) 583|| break; 584|.if X64 585|| case 8: 586| op Rq(op1), Rq(op2) 587|| break; 588|.endif 589|| } 590|.endmacro 591 592|.macro ASM_REG_REG_OP2, op, type, op1, op2 593|| switch (ir_type_size[type]) { 594|| default: 595|| IR_ASSERT(0); 596|| case 1: 597|| case 2: 598| op Rw(op1), Rw(op2) 599|| break; 600|| case 4: 601| op Rd(op1), Rd(op2) 602|| break; 603|.if X64 604|| case 8: 605| op Rq(op1), Rq(op2) 606|| break; 607|.endif 608|| } 609|.endmacro 610 611|.macro ASM_REG_TXT_OP, op, type, op1, op2 612|| switch (ir_type_size[type]) { 613|| default: 614|| IR_ASSERT(0); 615|| case 1: 616| op Rb(op1), op2 617|| break; 618|| case 2: 619| op Rw(op1), op2 620|| break; 621|| case 4: 622| op Rd(op1), op2 623|| break; 624|.if X64 625|| case 8: 626| op Rq(op1), op2 627|| break; 628|.endif 629|| } 630|.endmacro 631 632|.macro ASM_REG_IMM_OP, op, type, op1, op2 633|| switch (ir_type_size[type]) { 634|| default: 635|| IR_ASSERT(0); 636|| case 1: 637| op Rb(op1), (op2 & 0xff) 638|| break; 639|| case 2: 640| op Rw(op1), (op2 & 0xffff) 641|| break; 642|| case 4: 643| op Rd(op1), op2 644|| break; 645|.if X64 646|| case 8: 647| op Rq(op1), op2 648|| break; 649|.endif 650|| } 651|.endmacro 652 653|.macro ASM_MEM_REG_OP, op, type, op1, op2 654| ASM_EXPAND_OP1_MEM ASM_EXPAND_TYPE_MEM_REG, op, type, op1, op2 655|.endmacro 656 657|.macro ASM_MEM_TXT_OP, op, type, op1, op2 658| ASM_EXPAND_OP1_MEM ASM_EXPAND_TYPE_MEM_TXT, op, type, op1, op2 659|.endmacro 660 661|.macro ASM_MEM_IMM_OP, op, type, op1, op2 662| ASM_EXPAND_OP1_MEM ASM_EXPAND_TYPE_MEM_IMM, op, type, op1, op2 663|.endmacro 664 665|.macro ASM_REG_MEM_OP, op, type, op1, op2 666| ASM_EXPAND_OP2_MEM ASM_REG_TXT_OP, op, type, op1, op2 667|.endmacro 668 669|.macro ASM_REG_REG_MUL, op, type, op1, op2 670|| switch (ir_type_size[type]) { 671|| default: 672|| IR_ASSERT(0); 673|| case 2: 674| op Rw(op1), Rw(op2) 675|| break; 676|| case 4: 677| op Rd(op1), Rd(op2) 678|| break; 679|.if X64 680|| case 8: 681| op Rq(op1), Rq(op2) 682|| break; 683|.endif 684|| } 685|.endmacro 686 687|.macro ASM_REG_IMM_MUL, op, type, op1, op2 688|| switch (ir_type_size[type]) { 689|| default: 690|| IR_ASSERT(0); 691|| case 2: 692| op Rw(op1), op2 693|| break; 694|| case 4: 695| op Rd(op1), op2 696|| break; 697|.if X64 698|| case 8: 699| op Rq(op1), op2 700|| break; 701|.endif 702|| } 703|.endmacro 704 705|.macro ASM_REG_TXT_MUL, op, type, op1, op2 706|| switch (ir_type_size[type]) { 707|| default: 708|| IR_ASSERT(0); 709|| case 2: 710| op Rw(op1), op2 711|| break; 712|| case 4: 713| op Rd(op1), op2 714|| break; 715|.if X64 716|| case 8: 717| op Rq(op1), op2 718|| break; 719|.endif 720|| } 721|.endmacro 722 723|.macro ASM_REG_MEM_MUL, op, type, op1, op2 724| ASM_EXPAND_OP2_MEM ASM_REG_TXT_MUL, op, type, op1, op2 725|.endmacro 726 727|.macro ASM_REG_TXT_TXT_MUL, op, type, op1, op2, op3 728|| switch (ir_type_size[type]) { 729|| default: 730|| IR_ASSERT(0); 731|| case 2: 732| op Rw(op1), op2, op3 733|| break; 734|| case 4: 735| op Rd(op1), op2, op3 736|| break; 737|.if X64 738|| case 8: 739| op Rq(op1), op2, op3 740|| break; 741|.endif 742|| } 743|.endmacro 744 745|.macro ASM_REG_MEM_TXT_MUL, op, type, op1, op2, op3 746| ASM_EXPAND_OP2_MEM_3 ASM_REG_TXT_TXT_MUL, imul, type, op1, op2, op3 747|.endmacro 748 749|.macro ASM_SSE2_REG_REG_OP, op, type, op1, op2 750|| if (type == IR_DOUBLE) { 751| op..d xmm(op1-IR_REG_FP_FIRST), xmm(op2-IR_REG_FP_FIRST) 752|| } else { 753|| IR_ASSERT(type == IR_FLOAT); 754| op..s xmm(op1-IR_REG_FP_FIRST), xmm(op2-IR_REG_FP_FIRST) 755|| } 756|.endmacro 757 758|.macro ASM_SSE2_REG_TXT_OP, op, type, op1, op2 759|| if (type == IR_DOUBLE) { 760| op..d xmm(op1-IR_REG_FP_FIRST), qword op2 761|| } else { 762|| IR_ASSERT(type == IR_FLOAT); 763| op..s xmm(op1-IR_REG_FP_FIRST), dword op2 764|| } 765|.endmacro 766 767|.macro ASM_SSE2_REG_MEM_OP, op, type, op1, op2 768| ASM_EXPAND_OP2_MEM ASM_SSE2_REG_TXT_OP, op, type, op1, op2 769|.endmacro 770 771|.macro ASM_AVX_REG_REG_REG_OP, op, type, op1, op2, op3 772|| if (type == IR_DOUBLE) { 773| op..d xmm(op1-IR_REG_FP_FIRST), xmm(op2-IR_REG_FP_FIRST), xmm(op3-IR_REG_FP_FIRST) 774|| } else { 775|| IR_ASSERT(type == IR_FLOAT); 776| op..s xmm(op1-IR_REG_FP_FIRST), xmm(op2-IR_REG_FP_FIRST), xmm(op3-IR_REG_FP_FIRST) 777|| } 778|.endmacro 779 780|.macro ASM_AVX_REG_REG_TXT_OP, op, type, op1, op2, op3 781|| if (type == IR_DOUBLE) { 782| op..d xmm(op1-IR_REG_FP_FIRST), xmm(op2-IR_REG_FP_FIRST), qword op3 783|| } else { 784|| IR_ASSERT(type == IR_FLOAT); 785| op..s xmm(op1-IR_REG_FP_FIRST), xmm(op2-IR_REG_FP_FIRST), dword op3 786|| } 787|.endmacro 788 789|.macro ASM_AVX_REG_REG_MEM_OP, op, type, op1, op2, op3 790| ASM_EXPAND_OP3_MEM ASM_AVX_REG_REG_TXT_OP, op, type, op1, op2, op3 791|.endmacro 792 793|.macro ASM_FP_REG_REG_OP, op, type, op1, op2 794|| if (ctx->mflags & IR_X86_AVX) { 795| ASM_SSE2_REG_REG_OP v..op, type, op1, op2 796|| } else { 797| ASM_SSE2_REG_REG_OP op, type, op1, op2 798|| } 799|.endmacro 800 801|.macro ASM_FP_TXT_REG_OP, op, type, dst, src 802|| if (type == IR_DOUBLE) { 803|| if (ctx->mflags & IR_X86_AVX) { 804| v..op..d qword dst, xmm(src-IR_REG_FP_FIRST) 805|| } else { 806| op..d qword dst, xmm(src-IR_REG_FP_FIRST) 807|| } 808|| } else { 809|| IR_ASSERT(type == IR_FLOAT); 810|| if (ctx->mflags & IR_X86_AVX) { 811| v..op..s dword dst, xmm(src-IR_REG_FP_FIRST) 812|| } else { 813| op..s dword dst, xmm(src-IR_REG_FP_FIRST) 814|| } 815|| } 816|.endmacro 817 818|.macro ASM_FP_MEM_REG_OP, op, type, op1, op2 819| ASM_EXPAND_OP1_MEM ASM_FP_TXT_REG_OP, op, type, op1, op2 820|.endmacro 821 822|.macro ASM_FP_REG_TXT_OP, op, type, op1, op2 823|| if (ctx->mflags & IR_X86_AVX) { 824| ASM_SSE2_REG_TXT_OP v..op, type, op1, op2 825|| } else { 826| ASM_SSE2_REG_TXT_OP op, type, op1, op2 827|| } 828|.endmacro 829 830|.macro ASM_FP_REG_MEM_OP, op, type, op1, op2 831|| if (ctx->mflags & IR_X86_AVX) { 832| ASM_SSE2_REG_MEM_OP v..op, type, op1, op2 833|| } else { 834| ASM_SSE2_REG_MEM_OP op, type, op1, op2 835|| } 836|.endmacro 837 838|.macro ASM_SSE2_REG_REG_TXT_OP, op, type, op1, op2, op3 839|| if (type == IR_DOUBLE) { 840| op..d xmm(op1-IR_REG_FP_FIRST), xmm(op2-IR_REG_FP_FIRST), op3 841|| } else { 842|| IR_ASSERT(type == IR_FLOAT); 843| op..s xmm(op1-IR_REG_FP_FIRST), xmm(op2-IR_REG_FP_FIRST), op3 844|| } 845|.endmacro 846 847|.macro ASM_SSE2_REG_REG_REG_TXT_OP, op, type, op1, op2, op3, op4 848|| if (type == IR_DOUBLE) { 849| op..d xmm(op1-IR_REG_FP_FIRST), xmm(op2-IR_REG_FP_FIRST), xmm(op3-IR_REG_FP_FIRST), op4 850|| } else { 851|| IR_ASSERT(type == IR_FLOAT); 852| op..s xmm(op1-IR_REG_FP_FIRST), xmm(op2-IR_REG_FP_FIRST), xmm(op3-IR_REG_FP_FIRST), op4 853|| } 854|.endmacro 855 856|.macro ASM_FP_REG_REG_TXT_OP, op, type, op1, op2, op3 857|| if (ctx->mflags & IR_X86_AVX) { 858| ASM_SSE2_REG_REG_REG_TXT_OP v..op, type, op1, op2, op3 859|| } else { 860| ASM_SSE2_REG_REG_TXT_OP op, type, op1, op2, op3 861|| } 862|.endmacro 863 864typedef struct _ir_backend_data { 865 ir_reg_alloc_data ra_data; 866 uint32_t dessa_from_block; 867 dasm_State *dasm_state; 868 ir_bitset emit_constants; 869 int rodata_label, jmp_table_label; 870 bool double_neg_const; 871 bool float_neg_const; 872 bool double_abs_const; 873 bool float_abs_const; 874 bool double_zero_const; 875} ir_backend_data; 876 877#define IR_GP_REG_NAME(code, name64, name32, name16, name8, name8h) \ 878 #name64, 879#define IR_GP_REG_NAME32(code, name64, name32, name16, name8, name8h) \ 880 #name32, 881#define IR_GP_REG_NAME16(code, name64, name32, name16, name8, name8h) \ 882 #name16, 883#define IR_GP_REG_NAME8(code, name64, name32, name16, name8, name8h) \ 884 #name8, 885#define IR_FP_REG_NAME(code, name) \ 886 #name, 887 888static const char *_ir_reg_name[IR_REG_NUM] = { 889 IR_GP_REGS(IR_GP_REG_NAME) 890 IR_FP_REGS(IR_FP_REG_NAME) 891}; 892 893static const char *_ir_reg_name32[IR_REG_NUM] = { 894 IR_GP_REGS(IR_GP_REG_NAME32) 895}; 896 897static const char *_ir_reg_name16[IR_REG_NUM] = { 898 IR_GP_REGS(IR_GP_REG_NAME16) 899}; 900 901static const char *_ir_reg_name8[IR_REG_NUM] = { 902 IR_GP_REGS(IR_GP_REG_NAME8) 903}; 904 905/* Calling Convention */ 906#ifdef _WIN64 907 908static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS] = { 909 IR_REG_INT_ARG1, 910 IR_REG_INT_ARG2, 911 IR_REG_INT_ARG3, 912 IR_REG_INT_ARG4, 913}; 914 915static const int8_t _ir_fp_reg_params[IR_REG_FP_ARGS] = { 916 IR_REG_FP_ARG1, 917 IR_REG_FP_ARG2, 918 IR_REG_FP_ARG3, 919 IR_REG_FP_ARG4, 920}; 921 922#elif defined(IR_TARGET_X64) 923 924static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS] = { 925 IR_REG_INT_ARG1, 926 IR_REG_INT_ARG2, 927 IR_REG_INT_ARG3, 928 IR_REG_INT_ARG4, 929 IR_REG_INT_ARG5, 930 IR_REG_INT_ARG6, 931}; 932 933static const int8_t _ir_fp_reg_params[IR_REG_FP_ARGS] = { 934 IR_REG_FP_ARG1, 935 IR_REG_FP_ARG2, 936 IR_REG_FP_ARG3, 937 IR_REG_FP_ARG4, 938 IR_REG_FP_ARG5, 939 IR_REG_FP_ARG6, 940 IR_REG_FP_ARG7, 941 IR_REG_FP_ARG8, 942}; 943 944#else 945 946static const int8_t *_ir_int_reg_params = NULL; 947static const int8_t *_ir_fp_reg_params = NULL; 948static const int8_t _ir_int_fc_reg_params[IR_REG_INT_FCARGS] = { 949 IR_REG_INT_FCARG1, 950 IR_REG_INT_FCARG2, 951}; 952static const int8_t *_ir_fp_fc_reg_params = NULL; 953 954#endif 955 956const char *ir_reg_name(int8_t reg, ir_type type) 957{ 958 if (reg >= IR_REG_NUM) { 959 if (reg == IR_REG_SCRATCH) { 960 return "SCRATCH"; 961 } else { 962 IR_ASSERT(reg == IR_REG_ALL); 963 return "ALL"; 964 } 965 } 966 IR_ASSERT(reg >= 0 && reg < IR_REG_NUM); 967 if (type == IR_VOID) { 968 type = (reg < IR_REG_FP_FIRST) ? IR_ADDR : IR_DOUBLE; 969 } 970 if (IR_IS_TYPE_FP(type) || ir_type_size[type] == 8) { 971 return _ir_reg_name[reg]; 972 } else if (ir_type_size[type] == 4) { 973 return _ir_reg_name32[reg]; 974 } else if (ir_type_size[type] == 2) { 975 return _ir_reg_name16[reg]; 976 } else { 977 IR_ASSERT(ir_type_size[type] == 1); 978 return _ir_reg_name8[reg]; 979 } 980} 981 982#define IR_RULES(_) \ 983 _(CMP_INT) \ 984 _(CMP_FP) \ 985 _(MUL_INT) \ 986 _(DIV_INT) \ 987 _(MOD_INT) \ 988 _(TEST_INT) \ 989 _(SETCC_INT) \ 990 _(TESTCC_INT) \ 991 _(LEA_OB) \ 992 _(LEA_SI) \ 993 _(LEA_SIB) \ 994 _(LEA_IB) \ 995 _(LEA_SI_O) \ 996 _(LEA_SIB_O) \ 997 _(LEA_IB_O) \ 998 _(LEA_I_OB) \ 999 _(LEA_OB_I) \ 1000 _(LEA_OB_SI) \ 1001 _(LEA_SI_OB) \ 1002 _(LEA_B_SI) \ 1003 _(LEA_SI_B) \ 1004 _(INC) \ 1005 _(DEC) \ 1006 _(MUL_PWR2) \ 1007 _(DIV_PWR2) \ 1008 _(MOD_PWR2) \ 1009 _(SDIV_PWR2) \ 1010 _(SMOD_PWR2) \ 1011 _(BOOL_NOT_INT) \ 1012 _(ABS_INT) \ 1013 _(OP_INT) \ 1014 _(OP_FP) \ 1015 _(IMUL3) \ 1016 _(BINOP_INT) \ 1017 _(BINOP_SSE2) \ 1018 _(BINOP_AVX) \ 1019 _(SHIFT) \ 1020 _(SHIFT_CONST) \ 1021 _(COPY_INT) \ 1022 _(COPY_FP) \ 1023 _(CMP_AND_STORE_INT) \ 1024 _(CMP_AND_BRANCH_INT) \ 1025 _(CMP_AND_BRANCH_FP) \ 1026 _(TEST_AND_BRANCH_INT) \ 1027 _(JCC_INT) \ 1028 _(COND_CMP_INT) \ 1029 _(COND_CMP_FP) \ 1030 _(GUARD_CMP_INT) \ 1031 _(GUARD_CMP_FP) \ 1032 _(GUARD_TEST_INT) \ 1033 _(GUARD_JCC_INT) \ 1034 _(GUARD_OVERFLOW) \ 1035 _(OVERFLOW_AND_BRANCH) \ 1036 _(MIN_MAX_INT) \ 1037 _(MEM_OP_INT) \ 1038 _(MEM_INC) \ 1039 _(MEM_DEC) \ 1040 _(MEM_MUL_PWR2) \ 1041 _(MEM_DIV_PWR2) \ 1042 _(MEM_MOD_PWR2) \ 1043 _(MEM_BINOP_INT) \ 1044 _(MEM_SHIFT) \ 1045 _(MEM_SHIFT_CONST) \ 1046 _(REG_BINOP_INT) \ 1047 _(VSTORE_INT) \ 1048 _(VSTORE_FP) \ 1049 _(LOAD_INT) \ 1050 _(LOAD_FP) \ 1051 _(STORE_INT) \ 1052 _(STORE_FP) \ 1053 _(IF_INT) \ 1054 _(RETURN_VOID) \ 1055 _(RETURN_INT) \ 1056 _(RETURN_FP) \ 1057 _(BIT_COUNT) \ 1058 _(SSE_SQRT) \ 1059 _(SSE_RINT) \ 1060 _(SSE_FLOOR) \ 1061 _(SSE_CEIL) \ 1062 _(SSE_TRUNC) \ 1063 _(SSE_NEARBYINT) \ 1064 1065#define IR_RULE_ENUM(name) IR_ ## name, 1066 1067#define IR_STATIC_ALLOCA (IR_SKIPPED | IR_FUSED | IR_SIMPLE | IR_ALLOCA) 1068 1069enum _ir_rule { 1070 IR_FIRST_RULE = IR_LAST_OP, 1071 IR_RULES(IR_RULE_ENUM) 1072 IR_LAST_RULE 1073}; 1074 1075#define IR_RULE_NAME(name) #name, 1076const char *ir_rule_name[IR_LAST_OP] = { 1077 NULL, 1078 IR_RULES(IR_RULE_NAME) 1079 NULL 1080}; 1081 1082static bool ir_may_fuse_addr(ir_ctx *ctx, const ir_insn *addr_insn) 1083{ 1084 if (sizeof(void*) == 4) { 1085 return 1; 1086 } else if (IR_IS_SYM_CONST(addr_insn->op)) { 1087 void *addr = ir_sym_addr(ctx, addr_insn); 1088 1089 if (!addr) { 1090 return 0; 1091 } 1092 return IR_IS_SIGNED_32BIT((int64_t)(intptr_t)addr); 1093 } else { 1094 return IR_IS_SIGNED_32BIT(addr_insn->val.i64); 1095 } 1096} 1097 1098static bool ir_may_fuse_imm(ir_ctx *ctx, const ir_insn *val_insn) 1099{ 1100 if (val_insn->type == IR_ADDR) { 1101 if (sizeof(void*) == 4) { 1102 return 1; 1103 } else if (IR_IS_SYM_CONST(val_insn->op)) { 1104 void *addr = ir_sym_addr(ctx, val_insn); 1105 1106 if (!addr) { 1107 return 0; 1108 } 1109 return IR_IS_SIGNED_32BIT((intptr_t)addr); 1110 } else { 1111 return IR_IS_SIGNED_32BIT(val_insn->val.i64); 1112 } 1113 } else { 1114 return (ir_type_size[val_insn->type] <= 4 || IR_IS_SIGNED_32BIT(val_insn->val.i64)); 1115 } 1116} 1117 1118/* register allocation */ 1119static int ir_add_const_tmp_reg(ir_ctx *ctx, ir_ref ref, uint32_t num, int n, ir_target_constraints *constraints) 1120{ 1121 IR_ASSERT(IR_IS_CONST_REF(ref)); 1122 const ir_insn *val_insn = &ctx->ir_base[ref]; 1123 1124 if (!ir_may_fuse_imm(ctx, val_insn)) { 1125 constraints->tmp_regs[n] = IR_TMP_REG(num, val_insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1126 n++; 1127 } 1128 return n; 1129} 1130 1131int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *constraints) 1132{ 1133 uint32_t rule = ir_rule(ctx, ref); 1134 const ir_insn *insn; 1135 int n = 0; 1136 int flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; 1137 1138 constraints->def_reg = IR_REG_NONE; 1139 constraints->hints_count = 0; 1140 switch (rule & IR_RULE_MASK) { 1141 case IR_BINOP_INT: 1142 insn = &ctx->ir_base[ref]; 1143 if (rule & IR_FUSED) { 1144 if (ctx->ir_base[insn->op1].op == IR_RLOAD) { 1145 flags = IR_OP1_MUST_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; 1146 } else { 1147 flags = IR_OP2_MUST_BE_IN_REG; 1148 } 1149 } else { 1150 flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; 1151 } 1152 if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { 1153 n = ir_add_const_tmp_reg(ctx, insn->op2, 2, n, constraints); 1154 } else if (ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) { 1155 constraints->tmp_regs[n] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1156 n++; 1157 } 1158 break; 1159 case IR_IMUL3: 1160 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; 1161 break; 1162 case IR_SHIFT: 1163 if (rule & IR_FUSED) { 1164 flags = IR_OP2_MUST_BE_IN_REG; 1165 } else { 1166 flags = IR_DEF_REUSES_OP1_REG | IR_DEF_CONFLICTS_WITH_INPUT_REGS | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; 1167 } 1168 constraints->hints[1] = IR_REG_NONE; 1169 constraints->hints[2] = IR_REG_RCX; 1170 constraints->hints_count = 3; 1171 constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_RCX, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1172 n = 1; 1173 break; 1174 case IR_MUL_INT: 1175 /* %rax - used as input and result */ 1176 constraints->def_reg = IR_REG_RAX; 1177 constraints->hints[1] = IR_REG_RAX; 1178 constraints->hints_count = 2; 1179 flags = IR_USE_SHOULD_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; 1180 constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_RDX, IR_USE_SUB_REF, IR_DEF_SUB_REF); 1181 constraints->tmp_regs[1] = IR_SCRATCH_REG(IR_REG_RAX, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); 1182 n = 2; 1183 break; 1184 case IR_DIV_INT: 1185 /* %rax - used as input and result */ 1186 constraints->def_reg = IR_REG_RAX; 1187 constraints->hints[1] = IR_REG_RAX; 1188 constraints->hints_count = 2; 1189 flags = IR_USE_SHOULD_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; 1190 constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_RDX, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1191 constraints->tmp_regs[1] = IR_SCRATCH_REG(IR_REG_RAX, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); 1192 n = 2; 1193 goto op2_const; 1194 case IR_MOD_INT: 1195 constraints->def_reg = IR_REG_RDX; 1196 constraints->hints[1] = IR_REG_RAX; 1197 constraints->hints_count = 2; 1198 flags = IR_USE_SHOULD_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; 1199 constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_RAX, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1200 constraints->tmp_regs[1] = IR_SCRATCH_REG(IR_REG_RDX, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); 1201 n = 2; 1202 goto op2_const; 1203 case IR_MIN_MAX_INT: 1204 flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_MUST_BE_IN_REG; 1205op2_const: 1206 insn = &ctx->ir_base[ref]; 1207 if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { 1208 constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1209 n++; 1210 } 1211 break; 1212 case IR_CMP_INT: 1213 case IR_TEST_INT: 1214 insn = &ctx->ir_base[ref]; 1215 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; 1216 if (IR_IS_CONST_REF(insn->op1)) { 1217 const ir_insn *val_insn = &ctx->ir_base[insn->op1]; 1218 constraints->tmp_regs[0] = IR_TMP_REG(1, val_insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1219 n = 1; 1220 } else if (ir_rule(ctx, insn->op1) == IR_STATIC_ALLOCA) { 1221 constraints->tmp_regs[0] = IR_TMP_REG(1, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1222 n = 1; 1223 } else if (ir_rule(ctx, insn->op1) & IR_FUSED) { 1224 flags = IR_USE_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG; 1225 } 1226 if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { 1227 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; 1228 n = ir_add_const_tmp_reg(ctx, insn->op2, 2, n, constraints); 1229 } else if (ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) { 1230 constraints->tmp_regs[n] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1231 n++; 1232 } 1233 break; 1234 case IR_CMP_FP: 1235 insn = &ctx->ir_base[ref]; 1236 if (!(rule & IR_FUSED)) { 1237 constraints->tmp_regs[0] = IR_TMP_REG(3, IR_BOOL, IR_DEF_SUB_REF, IR_SAVE_SUB_REF); 1238 n = 1; 1239 } 1240 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; 1241 if (IR_IS_CONST_REF(insn->op1)) { 1242 const ir_insn *val_insn = &ctx->ir_base[insn->op1]; 1243 constraints->tmp_regs[n] = IR_TMP_REG(1, val_insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1244 n++; 1245 } 1246 break; 1247 case IR_BINOP_AVX: 1248 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; 1249 insn = &ctx->ir_base[ref]; 1250 if (IR_IS_CONST_REF(insn->op1)) { 1251 constraints->tmp_regs[0] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1252 n = 1; 1253 } 1254 break; 1255 case IR_COND: 1256 insn = &ctx->ir_base[ref]; 1257 if (!IR_IS_TYPE_INT(ctx->ir_base[insn->op1].type)) { 1258 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG | IR_OP3_SHOULD_BE_IN_REG; 1259 break; 1260 } 1261 IR_FALLTHROUGH; 1262 case IR_COND_CMP_INT: 1263 insn = &ctx->ir_base[ref]; 1264 if (IR_IS_TYPE_INT(insn->type)) { 1265 if (IR_IS_CONST_REF(insn->op3) || ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA) { 1266 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; 1267 constraints->tmp_regs[0] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); 1268 n = 1; 1269 } else if (IR_IS_CONST_REF(insn->op2) || ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) { 1270 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; 1271 constraints->tmp_regs[0] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); 1272 n = 1; 1273 } else { 1274 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; 1275 } 1276 } else { 1277 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG | IR_OP3_SHOULD_BE_IN_REG; 1278 } 1279 break; 1280 case IR_COND_CMP_FP: 1281 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG | IR_OP3_SHOULD_BE_IN_REG; 1282 break; 1283 case IR_VSTORE_INT: 1284 flags = IR_OP3_MUST_BE_IN_REG; 1285 insn = &ctx->ir_base[ref]; 1286 if (IR_IS_CONST_REF(insn->op3)) { 1287 n = ir_add_const_tmp_reg(ctx, insn->op3, 3, n, constraints); 1288 } else if (ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA) { 1289 constraints->tmp_regs[n] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1290 n++; 1291 } 1292 break; 1293 case IR_STORE_INT: 1294 flags = IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; 1295 insn = &ctx->ir_base[ref]; 1296 if (IR_IS_CONST_REF(insn->op2)) { 1297 n = ir_add_const_tmp_reg(ctx, insn->op2, 2, n, constraints); 1298 } 1299 if (IR_IS_CONST_REF(insn->op3)) { 1300 n = ir_add_const_tmp_reg(ctx, insn->op3, 3, n, constraints); 1301 } else if (ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA) { 1302 constraints->tmp_regs[n] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1303 n++; 1304 } 1305 break; 1306 case IR_VSTORE_FP: 1307 flags = IR_OP3_MUST_BE_IN_REG; 1308 insn = &ctx->ir_base[ref]; 1309 if (IR_IS_CONST_REF(insn->op3)) { 1310 insn = &ctx->ir_base[insn->op3]; 1311 constraints->tmp_regs[0] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1312 n = 1; 1313 } 1314 break; 1315 case IR_LOAD_FP: 1316 case IR_LOAD_INT: 1317 case IR_MEM_OP_INT: 1318 case IR_MEM_INC: 1319 case IR_MEM_DEC: 1320 case IR_MEM_MUL_PWR2: 1321 case IR_MEM_DIV_PWR2: 1322 case IR_MEM_MOD_PWR2: 1323 case IR_MEM_BINOP_INT: 1324 case IR_MEM_SHIFT: 1325 case IR_MEM_SHIFT_CONST: 1326 case IR_CMP_AND_STORE_INT: 1327 flags = IR_USE_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG; 1328 insn = &ctx->ir_base[ref]; 1329 if (IR_IS_CONST_REF(insn->op2)) { 1330 n = ir_add_const_tmp_reg(ctx, insn->op2, 2, n, constraints); 1331 } 1332 break; 1333 case IR_STORE_FP: 1334 flags = IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; 1335 insn = &ctx->ir_base[ref]; 1336 if (IR_IS_CONST_REF(insn->op2)) { 1337 n = ir_add_const_tmp_reg(ctx, insn->op2, 2, n, constraints); 1338 } 1339 if (IR_IS_CONST_REF(insn->op3)) { 1340 insn = &ctx->ir_base[insn->op3]; 1341 constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1342 n++; 1343 } 1344 break; 1345 case IR_SWITCH: 1346 flags = IR_OP2_MUST_BE_IN_REG; 1347 insn = &ctx->ir_base[ref]; 1348 if (IR_IS_CONST_REF(insn->op2)) { 1349 insn = &ctx->ir_base[insn->op2]; 1350 constraints->tmp_regs[0] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1351 n = 1; 1352 } 1353 /* we need a temporary regeset in case MIN CASE value is not zero or some CASE VAL can't fit into 32-bit */ 1354 constraints->tmp_regs[n] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1355 n++; 1356 break; 1357 case IR_CALL: 1358 insn = &ctx->ir_base[ref]; 1359 if (IR_IS_TYPE_INT(insn->type)) { 1360 constraints->def_reg = IR_REG_INT_RET1; 1361#ifdef IR_REG_FP_RET1 1362 } else { 1363 constraints->def_reg = IR_REG_FP_RET1; 1364#endif 1365 } 1366 constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_SCRATCH, IR_USE_SUB_REF, IR_DEF_SUB_REF); 1367 n = 1; 1368 IR_FALLTHROUGH; 1369 case IR_TAILCALL: 1370 insn = &ctx->ir_base[ref]; 1371 if (insn->inputs_count > 2) { 1372 constraints->hints[2] = IR_REG_NONE; 1373 constraints->hints_count = ir_get_args_regs(ctx, insn, constraints->hints); 1374 if (!IR_IS_CONST_REF(insn->op2)) { 1375 constraints->tmp_regs[n] = IR_TMP_REG(1, IR_ADDR, IR_LOAD_SUB_REF, IR_USE_SUB_REF); 1376 n++; 1377 } 1378 } 1379 flags = IR_USE_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG | IR_OP3_SHOULD_BE_IN_REG; 1380 break; 1381 case IR_BINOP_SSE2: 1382 flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; 1383 break; 1384 case IR_SHIFT_CONST: 1385 case IR_INC: 1386 case IR_DEC: 1387 case IR_MUL_PWR2: 1388 case IR_DIV_PWR2: 1389 case IR_OP_INT: 1390 case IR_OP_FP: 1391 flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; 1392 break; 1393 case IR_MOD_PWR2: 1394 flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; 1395 insn = &ctx->ir_base[ref]; 1396 if (ir_type_size[insn->type] == 8) { 1397 int64_t offset = ctx->ir_base[insn->op2].val.u64 - 1; 1398 if (!IR_IS_SIGNED_32BIT(offset)) { 1399 constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1400 n++; 1401 } 1402 } 1403 break; 1404 case IR_SMOD_PWR2: 1405 flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; 1406 insn = &ctx->ir_base[ref]; 1407 if (ir_type_size[insn->type] == 8) { 1408 int64_t offset = ctx->ir_base[insn->op2].val.u64 - 1; 1409 if (!IR_IS_SIGNED_32BIT(offset)) { 1410 constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1411 n++; 1412 } 1413 } 1414 constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_USE_SUB_REF, IR_SAVE_SUB_REF); 1415 n++; 1416 break; 1417 case IR_SDIV_PWR2: 1418 flags = IR_DEF_CONFLICTS_WITH_INPUT_REGS | IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG; 1419 insn = &ctx->ir_base[ref]; 1420 if (ir_type_size[insn->type] == 8) { 1421 int64_t offset = ctx->ir_base[insn->op2].val.u64 - 1; 1422 if (!IR_IS_SIGNED_32BIT(offset)) { 1423 constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1424 n++; 1425 } 1426 } 1427 break; 1428 case IR_BIT_COUNT: 1429 insn = &ctx->ir_base[ref]; 1430 if (ir_type_size[insn->type] == 1) { 1431 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG; 1432 } else { 1433 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; 1434 } 1435 if (IR_IS_CONST_REF(insn->op1)) { 1436 constraints->tmp_regs[0] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1437 n = 1; 1438 } 1439 break; 1440 case IR_CTPOP: 1441 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; 1442 insn = &ctx->ir_base[ref]; 1443 constraints->tmp_regs[0] = IR_TMP_REG(2, insn->type, IR_USE_SUB_REF, IR_SAVE_SUB_REF); 1444 n = 1; 1445 if (ir_type_size[insn->type] == 8) { 1446 constraints->tmp_regs[1] = IR_TMP_REG(3, insn->type, IR_USE_SUB_REF, IR_SAVE_SUB_REF); 1447 n = 2; 1448 } 1449 break; 1450 case IR_COPY_INT: 1451 case IR_COPY_FP: 1452 case IR_SEXT: 1453 case IR_ZEXT: 1454 case IR_TRUNC: 1455 case IR_BITCAST: 1456 case IR_PROTO: 1457 case IR_FP2FP: 1458 flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; 1459 break; 1460 case IR_ABS_INT: 1461 flags = IR_DEF_CONFLICTS_WITH_INPUT_REGS | IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG; 1462 break; 1463 case IR_PARAM: 1464 constraints->def_reg = ir_get_param_reg(ctx, ref); 1465 flags = 0; 1466 break; 1467 case IR_PI: 1468 case IR_PHI: 1469 flags = IR_USE_SHOULD_BE_IN_REG; 1470 break; 1471 case IR_RLOAD: 1472 constraints->def_reg = ctx->ir_base[ref].op2; 1473 flags = IR_USE_SHOULD_BE_IN_REG; 1474 break; 1475 case IR_EXITCALL: 1476 flags = IR_USE_MUST_BE_IN_REG; 1477 constraints->def_reg = IR_REG_INT_RET1; 1478 break; 1479 case IR_IF_INT: 1480 case IR_GUARD: 1481 case IR_GUARD_NOT: 1482 flags = IR_OP2_SHOULD_BE_IN_REG; 1483 break; 1484 case IR_IJMP: 1485 flags = IR_OP2_SHOULD_BE_IN_REG; 1486 break; 1487 case IR_RSTORE: 1488 flags = IR_OP3_SHOULD_BE_IN_REG; 1489 break; 1490 case IR_RETURN_INT: 1491 flags = IR_OP2_SHOULD_BE_IN_REG; 1492 constraints->hints[2] = IR_REG_INT_RET1; 1493 constraints->hints_count = 3; 1494 break; 1495 case IR_RETURN_FP: 1496#ifdef IR_REG_FP_RET1 1497 flags = IR_OP2_SHOULD_BE_IN_REG; 1498 constraints->hints[2] = IR_REG_FP_RET1; 1499 constraints->hints_count = 3; 1500#endif 1501 break; 1502 case IR_SNAPSHOT: 1503 flags = 0; 1504 break; 1505 case IR_VA_START: 1506 flags = IR_OP2_MUST_BE_IN_REG; 1507 constraints->tmp_regs[0] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1508 n = 1; 1509 break; 1510 case IR_VA_ARG: 1511 flags = IR_USE_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG; 1512 constraints->tmp_regs[0] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); 1513 n = 1; 1514 break; 1515 case IR_VA_COPY: 1516 flags = IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; 1517 constraints->tmp_regs[0] = IR_TMP_REG(1, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1518 n = 1; 1519 break; 1520 } 1521 constraints->tmps_count = n; 1522 1523 return flags; 1524} 1525 1526/* instruction selection */ 1527static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref); 1528static bool ir_match_try_fuse_load(ir_ctx *ctx, ir_ref ref, ir_ref root); 1529 1530static void ir_swap_ops(ir_insn *insn) 1531{ 1532 SWAP_REFS(insn->op1, insn->op2); 1533} 1534 1535static bool ir_match_try_revert_lea_to_add(ir_ctx *ctx, ir_ref ref) 1536{ 1537 ir_insn *insn = &ctx->ir_base[ref]; 1538 1539 /* TODO: This optimization makes sense only if the other operand is killed */ 1540 if (insn->op1 == insn->op2) { 1541 /* pass */ 1542 } else if (ir_match_try_fuse_load(ctx, insn->op2, ref)) { 1543 ctx->rules[ref] = IR_BINOP_INT | IR_MAY_SWAP; 1544 return 1; 1545 } else if (ir_match_try_fuse_load(ctx, insn->op1, ref)) { 1546 /* swap for better load fusion */ 1547 ir_swap_ops(insn); 1548 ctx->rules[ref] = IR_BINOP_INT | IR_MAY_SWAP; 1549 return 1; 1550 } 1551 return 0; 1552} 1553 1554static void ir_match_fuse_addr(ir_ctx *ctx, ir_ref addr_ref) 1555{ 1556 if (!IR_IS_CONST_REF(addr_ref)) { 1557 uint32_t rule = ctx->rules[addr_ref]; 1558 1559 if (!rule) { 1560 ctx->rules[addr_ref] = rule = ir_match_insn(ctx, addr_ref); 1561 } 1562 if (rule >= IR_LEA_OB && rule <= IR_LEA_SI_B) { 1563 ir_use_list *use_list; 1564 ir_ref j; 1565 1566 if (rule == IR_LEA_IB && ir_match_try_revert_lea_to_add(ctx, addr_ref)) { 1567 return; 1568 } 1569 1570 use_list = &ctx->use_lists[addr_ref]; 1571 j = use_list->count; 1572 if (j > 1) { 1573 /* check if address is used only in LOAD and STORE */ 1574 ir_ref *p = &ctx->use_edges[use_list->refs]; 1575 1576 do { 1577 ir_insn *insn = &ctx->ir_base[*p]; 1578 if (insn->op != IR_LOAD && (insn->op != IR_STORE || insn->op3 == addr_ref)) { 1579 return; 1580 } 1581 p++; 1582 } while (--j); 1583 } 1584 ctx->rules[addr_ref] = IR_FUSED | IR_SIMPLE | rule; 1585 } 1586 } 1587} 1588 1589static bool ir_match_may_fuse_SI(ir_ctx *ctx, ir_ref ref, ir_ref use) 1590{ 1591 ir_insn *op2_insn, *insn = &ctx->ir_base[use]; 1592 1593 if (insn->op == IR_ADD) { 1594 if (insn->op1 == ref) { 1595 if (IR_IS_CONST_REF(insn->op2)) { 1596 op2_insn = &ctx->ir_base[insn->op2]; 1597 if (IR_IS_SYM_CONST(op2_insn->op)) { 1598 if (ir_may_fuse_addr(ctx, op2_insn)) { 1599 return 1; // LEA_SI_O 1600 } 1601 } else if (IR_IS_SIGNED_32BIT(op2_insn->val.i64)) { 1602 return 1; // LEA_SI_O 1603 } 1604 } else if (insn->op2 != ref) { 1605 return 1; // LEA_SI_B or LEA_SI_OB 1606 } 1607 } else if (insn->op2 == ref && insn->op1 != insn->op2) { 1608 return 1; // LEA_B_SI or LEA_OB_SI 1609 } 1610 } 1611 return 0; 1612} 1613 1614static bool ir_match_fuse_addr_all_useges(ir_ctx *ctx, ir_ref ref) 1615{ 1616 uint32_t rule = ctx->rules[ref]; 1617 ir_use_list *use_list; 1618 ir_ref n, *p, use; 1619 1620 if (rule == (IR_FUSED | IR_SIMPLE | IR_LEA_SI)) { 1621 return 1; 1622 } else if (!rule) { 1623 ir_insn *insn = &ctx->ir_base[ref]; 1624 1625 IR_ASSERT(IR_IS_TYPE_INT(insn->type) && ir_type_size[insn->type] >= 4); 1626 if (insn->op == IR_MUL 1627 && IR_IS_CONST_REF(insn->op2)) { 1628 insn = &ctx->ir_base[insn->op2]; 1629 if (!IR_IS_SYM_CONST(insn->op) 1630 && (insn->val.u64 == 2 || insn->val.u64 == 4 || insn->val.u64 == 8)) { 1631 ctx->rules[ref] = IR_LEA_SI; 1632 1633 use_list = &ctx->use_lists[ref]; 1634 n = use_list->count; 1635 IR_ASSERT(n > 1); 1636 p = &ctx->use_edges[use_list->refs]; 1637 for (; n > 0; p++, n--) { 1638 use = *p; 1639 if (!ir_match_may_fuse_SI(ctx, ref, use)) { 1640 return 0; 1641 } 1642 } 1643 1644 return 1; 1645 } 1646 } 1647 } 1648 1649 return 0; 1650} 1651 1652/* A naive check if there is a STORE or CALL between this LOAD and the fusion root */ 1653static bool ir_match_has_mem_deps(ir_ctx *ctx, ir_ref ref, ir_ref root) 1654{ 1655 if (ref + 1 != root) { 1656 ir_ref pos = ctx->prev_ref[root]; 1657 1658 do { 1659 ir_insn *insn = &ctx->ir_base[pos]; 1660 1661 if (insn->op == IR_STORE) { 1662 // TODO: check if LOAD and STORE addresses may alias 1663 return 1; 1664 } else if (insn->op == IR_CALL) { 1665 return 1; 1666 } 1667 pos = ctx->prev_ref[pos]; 1668 } while (ref != pos); 1669 } 1670 return 0; 1671} 1672 1673static void ir_match_fuse_load(ir_ctx *ctx, ir_ref ref, ir_ref root) 1674{ 1675 if (ir_in_same_block(ctx, ref) 1676 && ctx->ir_base[ref].op == IR_LOAD) { 1677 if (ctx->use_lists[ref].count == 2 1678 && !ir_match_has_mem_deps(ctx, ref, root)) { 1679 ir_ref addr_ref = ctx->ir_base[ref].op2; 1680 ir_insn *addr_insn = &ctx->ir_base[addr_ref]; 1681 1682 if (IR_IS_CONST_REF(addr_ref)) { 1683 if (ir_may_fuse_addr(ctx, addr_insn)) { 1684 ctx->rules[ref] = IR_FUSED | IR_SIMPLE | IR_LOAD; 1685 return; 1686 } 1687 } else { 1688 ctx->rules[ref] = IR_FUSED | IR_SIMPLE | IR_LOAD; 1689 ir_match_fuse_addr(ctx, addr_ref); 1690 return; 1691 } 1692 } 1693 } 1694} 1695 1696static bool ir_match_try_fuse_load(ir_ctx *ctx, ir_ref ref, ir_ref root) 1697{ 1698 ir_insn *insn = &ctx->ir_base[ref]; 1699 1700 if (ir_in_same_block(ctx, ref) 1701 && insn->op == IR_LOAD) { 1702 if (ctx->use_lists[ref].count == 2 1703 && !ir_match_has_mem_deps(ctx, ref, root)) { 1704 ir_ref addr_ref = ctx->ir_base[ref].op2; 1705 ir_insn *addr_insn = &ctx->ir_base[addr_ref]; 1706 1707 if (IR_IS_CONST_REF(addr_ref)) { 1708 if (ir_may_fuse_addr(ctx, addr_insn)) { 1709 ctx->rules[ref] = IR_FUSED | IR_SIMPLE | IR_LOAD; 1710 return 1; 1711 } 1712 } else { 1713 ctx->rules[ref] = IR_FUSED | IR_SIMPLE | IR_LOAD; 1714 ir_match_fuse_addr(ctx, addr_ref); 1715 return 1; 1716 } 1717 } 1718 } else if (insn->op == IR_PARAM) { 1719 if (ctx->use_lists[ref].count == 1 1720 && ir_get_param_reg(ctx, ref) == IR_REG_NONE) { 1721 return 1; 1722 } 1723 } else if (ctx->ir_base[ref].op == IR_VLOAD) { 1724 return 1; 1725 } 1726 return 0; 1727} 1728 1729static void ir_match_fuse_load_commutative_int(ir_ctx *ctx, ir_insn *insn, ir_ref root) 1730{ 1731 if (IR_IS_CONST_REF(insn->op2) 1732 && ir_may_fuse_imm(ctx, &ctx->ir_base[insn->op2])) { 1733 return; 1734 } else if (ir_match_try_fuse_load(ctx, insn->op2, root)) { 1735 return; 1736 } else if (ir_match_try_fuse_load(ctx, insn->op1, root)) { 1737 ir_swap_ops(insn); 1738 } 1739} 1740 1741static void ir_match_fuse_load_commutative_fp(ir_ctx *ctx, ir_insn *insn, ir_ref root) 1742{ 1743 if (!IR_IS_CONST_REF(insn->op2) 1744 && !ir_match_try_fuse_load(ctx, insn->op2, root) 1745 && (IR_IS_CONST_REF(insn->op1) || ir_match_try_fuse_load(ctx, insn->op1, root))) { 1746 ir_swap_ops(insn); 1747 } 1748} 1749 1750static void ir_match_fuse_load_cmp_int(ir_ctx *ctx, ir_insn *insn, ir_ref root) 1751{ 1752 if (IR_IS_CONST_REF(insn->op2) 1753 && ir_may_fuse_imm(ctx, &ctx->ir_base[insn->op2])) { 1754 ir_match_fuse_load(ctx, insn->op1, root); 1755 } else if (!ir_match_try_fuse_load(ctx, insn->op2, root) 1756 && ir_match_try_fuse_load(ctx, insn->op1, root)) { 1757 ir_swap_ops(insn); 1758 if (insn->op != IR_EQ && insn->op != IR_NE) { 1759 insn->op ^= 3; 1760 } 1761 } 1762} 1763 1764static void ir_match_fuse_load_test_int(ir_ctx *ctx, ir_insn *insn, ir_ref root) 1765{ 1766 if (IR_IS_CONST_REF(insn->op2) 1767 && ir_may_fuse_imm(ctx, &ctx->ir_base[insn->op2])) { 1768 ir_match_fuse_load(ctx, insn->op1, root); 1769 } else if (!ir_match_try_fuse_load(ctx, insn->op2, root) 1770 && ir_match_try_fuse_load(ctx, insn->op1, root)) { 1771 ir_swap_ops(insn); 1772 } 1773} 1774 1775static void ir_match_fuse_load_cmp_fp(ir_ctx *ctx, ir_insn *insn, ir_ref root) 1776{ 1777 if (insn->op != IR_EQ && insn->op != IR_NE) { 1778 if (insn->op == IR_LT || insn->op == IR_LE) { 1779 /* swap operands to avoid P flag check */ 1780 ir_swap_ops(insn); 1781 insn->op ^= 3; 1782 } 1783 ir_match_fuse_load(ctx, insn->op2, root); 1784 } else if (IR_IS_CONST_REF(insn->op2) && !IR_IS_FP_ZERO(ctx->ir_base[insn->op2])) { 1785 /* pass */ 1786 } else if (ir_match_try_fuse_load(ctx, insn->op2, root)) { 1787 /* pass */ 1788 } else if ((IR_IS_CONST_REF(insn->op1) && !IR_IS_FP_ZERO(ctx->ir_base[insn->op1])) || ir_match_try_fuse_load(ctx, insn->op1, root)) { 1789 ir_swap_ops(insn); 1790 if (insn->op != IR_EQ && insn->op != IR_NE) { 1791 insn->op ^= 3; 1792 } 1793 } 1794} 1795 1796static void ir_match_fuse_load_cmp_fp_br(ir_ctx *ctx, ir_insn *insn, ir_ref root, bool direct) 1797{ 1798 if (direct) { 1799 if (insn->op == IR_LT || insn->op == IR_LE) { 1800 /* swap operands to avoid P flag check */ 1801 ir_swap_ops(insn); 1802 insn->op ^= 3; 1803 } 1804 } else { 1805 if (insn->op == IR_GT || insn->op == IR_GE) { 1806 /* swap operands to avoid P flag check */ 1807 ir_swap_ops(insn); 1808 insn->op ^= 3; 1809 } 1810 } 1811 if (IR_IS_CONST_REF(insn->op2) && !IR_IS_FP_ZERO(ctx->ir_base[insn->op2])) { 1812 /* pass */ 1813 } else if (ir_match_try_fuse_load(ctx, insn->op2, root)) { 1814 /* pass */ 1815 } else if ((IR_IS_CONST_REF(insn->op1) && !IR_IS_FP_ZERO(ctx->ir_base[insn->op1])) || ir_match_try_fuse_load(ctx, insn->op1, root)) { 1816 ir_swap_ops(insn); 1817 if (insn->op != IR_EQ && insn->op != IR_NE) { 1818 insn->op ^= 3; 1819 } 1820 } 1821} 1822 1823#define STR_EQUAL(name, name_len, str) (name_len == strlen(str) && memcmp(name, str, strlen(str)) == 0) 1824 1825#define IR_IS_FP_FUNC_1(proto, _type) (proto->params_count == 1 && \ 1826 proto->param_types[0] == _type && \ 1827 proto->ret_type == _type) 1828 1829static uint32_t ir_match_builtin_call(ir_ctx *ctx, const ir_insn *func) 1830{ 1831 const ir_proto_t *proto = (const ir_proto_t *)ir_get_str(ctx, func->proto); 1832 1833 if (proto->flags & IR_BUILTIN_FUNC) { 1834 size_t name_len; 1835 const char *name = ir_get_strl(ctx, func->val.name, &name_len); 1836 1837 if (STR_EQUAL(name, name_len, "sqrt")) { 1838 if (IR_IS_FP_FUNC_1(proto, IR_DOUBLE)) { 1839 return IR_SSE_SQRT; 1840 } 1841 } else if (STR_EQUAL(name, name_len, "sqrtf")) { 1842 if (IR_IS_FP_FUNC_1(proto, IR_FLOAT)) { 1843 return IR_SSE_SQRT; 1844 } 1845 } else if (STR_EQUAL(name, name_len, "rint")) { 1846 if (IR_IS_FP_FUNC_1(proto, IR_DOUBLE)) { 1847 return IR_SSE_RINT; 1848 } 1849 } else if (STR_EQUAL(name, name_len, "rintf")) { 1850 if (IR_IS_FP_FUNC_1(proto, IR_FLOAT)) { 1851 return IR_SSE_RINT; 1852 } 1853 } else if (STR_EQUAL(name, name_len, "floor")) { 1854 if (IR_IS_FP_FUNC_1(proto, IR_DOUBLE)) { 1855 return IR_SSE_FLOOR; 1856 } 1857 } else if (STR_EQUAL(name, name_len, "floorf")) { 1858 if (IR_IS_FP_FUNC_1(proto, IR_FLOAT)) { 1859 return IR_SSE_FLOOR; 1860 } 1861 } else if (STR_EQUAL(name, name_len, "ceil")) { 1862 if (IR_IS_FP_FUNC_1(proto, IR_DOUBLE)) { 1863 return IR_SSE_CEIL; 1864 } 1865 } else if (STR_EQUAL(name, name_len, "ceilf")) { 1866 if (IR_IS_FP_FUNC_1(proto, IR_FLOAT)) { 1867 return IR_SSE_CEIL; 1868 } 1869 } else if (STR_EQUAL(name, name_len, "trunc")) { 1870 if (IR_IS_FP_FUNC_1(proto, IR_DOUBLE)) { 1871 return IR_SSE_TRUNC; 1872 } 1873 } else if (STR_EQUAL(name, name_len, "truncf")) { 1874 if (IR_IS_FP_FUNC_1(proto, IR_FLOAT)) { 1875 return IR_SSE_TRUNC; 1876 } 1877 } else if (STR_EQUAL(name, name_len, "nearbyint")) { 1878 if (IR_IS_FP_FUNC_1(proto, IR_DOUBLE)) { 1879 return IR_SSE_NEARBYINT; 1880 } 1881 } else if (STR_EQUAL(name, name_len, "nearbyintf")) { 1882 if (IR_IS_FP_FUNC_1(proto, IR_FLOAT)) { 1883 return IR_SSE_NEARBYINT; 1884 } 1885 } 1886 } 1887 1888 return 0; 1889} 1890 1891static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref) 1892{ 1893 ir_insn *op2_insn; 1894 ir_insn *insn = &ctx->ir_base[ref]; 1895 uint32_t store_rule; 1896 ir_op load_op; 1897 1898 switch (insn->op) { 1899 case IR_EQ: 1900 case IR_NE: 1901 case IR_LT: 1902 case IR_GE: 1903 case IR_LE: 1904 case IR_GT: 1905 case IR_ULT: 1906 case IR_UGE: 1907 case IR_ULE: 1908 case IR_UGT: 1909 if (IR_IS_TYPE_INT(ctx->ir_base[insn->op1].type)) { 1910 if (IR_IS_CONST_REF(insn->op2) 1911 && !IR_IS_SYM_CONST(ctx->ir_base[insn->op2].op) 1912 && ctx->ir_base[insn->op2].val.i64 == 0 1913 && insn->op1 == ref - 1) { /* previous instruction */ 1914 ir_insn *op1_insn = &ctx->ir_base[insn->op1]; 1915 1916 if (op1_insn->op == IR_AND && ctx->use_lists[insn->op1].count == 1) { 1917 /* v = AND(_, _); CMP(v, 0) => SKIP_TEST; TEST */ 1918 ir_match_fuse_load_test_int(ctx, op1_insn, ref); 1919 ctx->rules[insn->op1] = IR_FUSED | IR_TEST_INT; 1920 return IR_TESTCC_INT; 1921 } else if ((op1_insn->op == IR_OR || op1_insn->op == IR_AND || op1_insn->op == IR_XOR) || 1922 /* GT(ADD(_, _), 0) can't be optimized because ADD may overflow */ 1923 ((op1_insn->op == IR_ADD || op1_insn->op == IR_SUB) && 1924 (insn->op == IR_EQ || insn->op == IR_NE))) { 1925 /* v = BINOP(_, _); CMP(v, 0) => BINOP; SETCC */ 1926 if (ir_op_flags[op1_insn->op] & IR_OP_FLAG_COMMUTATIVE) { 1927 ir_match_fuse_load_commutative_int(ctx, op1_insn, ref); 1928 ctx->rules[insn->op1] = IR_BINOP_INT | IR_MAY_SWAP; 1929 } else { 1930 ir_match_fuse_load(ctx, op1_insn->op2, ref); 1931 ctx->rules[insn->op1] = IR_BINOP_INT; 1932 } 1933 return IR_SETCC_INT; 1934 } 1935 } 1936 ir_match_fuse_load_cmp_int(ctx, insn, ref); 1937 return IR_CMP_INT; 1938 } else { 1939 ir_match_fuse_load_cmp_fp(ctx, insn, ref); 1940 return IR_CMP_FP; 1941 } 1942 break; 1943 case IR_ADD: 1944 case IR_SUB: 1945 if (IR_IS_TYPE_INT(insn->type)) { 1946 if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { 1947 op2_insn = &ctx->ir_base[insn->op2]; 1948 if (IR_IS_CONST_REF(insn->op1)) { 1949 // const 1950 // TODO: add support for sym+offset ??? 1951 } else if (IR_IS_SYM_CONST(op2_insn->op)) { 1952 if (insn->op == IR_ADD && ir_may_fuse_addr(ctx, op2_insn)) { 1953 goto lea; 1954 } 1955 /* pass */ 1956 } else if (op2_insn->val.i64 == 0) { 1957 // return IR_COPY_INT; 1958 } else if ((ir_type_size[insn->type] >= 4 && insn->op == IR_ADD && IR_IS_SIGNED_32BIT(op2_insn->val.i64)) || 1959 (ir_type_size[insn->type] >= 4 && insn->op == IR_SUB && IR_IS_SIGNED_NEG_32BIT(op2_insn->val.i64))) { 1960lea: 1961 if (ctx->use_lists[insn->op1].count == 1 || ir_match_fuse_addr_all_useges(ctx, insn->op1)) { 1962 uint32_t rule = ctx->rules[insn->op1]; 1963 1964 if (!rule) { 1965 ctx->rules[insn->op1] = rule = ir_match_insn(ctx, insn->op1); 1966 } 1967 if (rule == IR_LEA_SI || rule == (IR_FUSED | IR_SIMPLE | IR_LEA_SI)) { 1968 /* z = MUL(Y, 2|4|8) ... ADD(z, imm32) => SKIP ... LEA [Y*2|4|8+im32] */ 1969 ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_SI; 1970 return IR_LEA_SI_O; 1971 } else if (rule == IR_LEA_SIB) { 1972 /* z = ADD(X, MUL(Y, 2|4|8)) ... ADD(z, imm32) => SKIP ... LEA [X+Y*2|4|8+im32] */ 1973 ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_SIB; 1974 return IR_LEA_SIB_O; 1975 } else if (rule == IR_LEA_IB) { 1976 /* z = ADD(X, Y) ... ADD(z, imm32) => SKIP ... LEA [X+Y+im32] */ 1977 ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_IB; 1978 return IR_LEA_IB_O; 1979 } 1980 } 1981 /* ADD(X, imm32) => LEA [X+imm32] */ 1982 return IR_LEA_OB; 1983 } else if (op2_insn->val.i64 == 1 || op2_insn->val.i64 == -1) { 1984 if (insn->op == IR_ADD) { 1985 if (op2_insn->val.i64 == 1) { 1986 /* ADD(_, 1) => INC */ 1987 return IR_INC; 1988 } else { 1989 /* ADD(_, -1) => DEC */ 1990 return IR_DEC; 1991 } 1992 } else { 1993 if (op2_insn->val.i64 == 1) { 1994 /* SUB(_, 1) => DEC */ 1995 return IR_DEC; 1996 } else { 1997 /* SUB(_, -1) => INC */ 1998 return IR_INC; 1999 } 2000 } 2001 } 2002 } else if ((ctx->flags & IR_OPT_CODEGEN) && insn->op == IR_ADD && ir_type_size[insn->type] >= 4) { 2003 if (insn->op1 != insn->op2) { 2004 if (ctx->use_lists[insn->op1].count == 1 || ir_match_fuse_addr_all_useges(ctx, insn->op1)) { 2005 uint32_t rule =ctx->rules[insn->op1]; 2006 if (!rule) { 2007 ctx->rules[insn->op1] = rule = ir_match_insn(ctx, insn->op1); 2008 } 2009 if (rule == IR_LEA_OB) { 2010 ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_OB; 2011 if (ctx->use_lists[insn->op2].count == 1 || ir_match_fuse_addr_all_useges(ctx, insn->op2)) { 2012 rule = ctx->rules[insn->op2]; 2013 if (!rule) { 2014 ctx->rules[insn->op2] = rule = ir_match_insn(ctx, insn->op2); 2015 } 2016 if (rule == IR_LEA_SI || rule == (IR_FUSED | IR_SIMPLE | IR_LEA_SI)) { 2017 /* x = ADD(X, imm32) ... y = MUL(Y, 2|4|8) ... ADD(x, y) => SKIP ... SKIP ... LEA */ 2018 ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_LEA_SI; 2019 return IR_LEA_OB_SI; 2020 } 2021 } 2022 /* x = ADD(X, imm32) ... ADD(x, Y) => SKIP ... LEA */ 2023 return IR_LEA_OB_I; 2024 } else if (rule == IR_LEA_SI || rule == (IR_FUSED | IR_SIMPLE | IR_LEA_SI)) { 2025 ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_SI; 2026 if (ctx->use_lists[insn->op2].count == 1) { 2027 rule = ctx->rules[insn->op2]; 2028 if (!rule) { 2029 ctx->rules[insn->op2] = rule = ir_match_insn(ctx, insn->op2); 2030 } 2031 if (rule == IR_LEA_OB) { 2032 /* x = ADD(X, imm32) ... y = MUL(Y, 2|4|8) ... ADD(y, x) => SKIP ... SKIP ... LEA */ 2033 ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_LEA_OB; 2034 return IR_LEA_SI_OB; 2035 } 2036 } 2037 /* x = MUL(X, 2|4|8) ... ADD(x, Y) => SKIP ... LEA */ 2038 return IR_LEA_SI_B; 2039 } 2040 } 2041 if (ctx->use_lists[insn->op2].count == 1 || ir_match_fuse_addr_all_useges(ctx, insn->op2)) { 2042 uint32_t rule = ctx->rules[insn->op2]; 2043 if (!rule) { 2044 ctx->rules[insn->op2] = rule = ir_match_insn(ctx, insn->op2); 2045 } 2046 if (rule == IR_LEA_OB) { 2047 ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_LEA_OB; 2048 /* x = ADD(X, imm32) ... ADD(Y, x) => SKIP ... LEA */ 2049 return IR_LEA_I_OB; 2050 } else if (rule == IR_LEA_SI || rule == (IR_FUSED | IR_SIMPLE | IR_LEA_SI)) { 2051 ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_LEA_SI; 2052 /* x = MUL(X, 2|4|8) ... ADD(Y, x) => SKIP ... LEA */ 2053 return IR_LEA_B_SI; 2054 } 2055 } 2056 } 2057 /* ADD(X, Y) => LEA [X + Y] */ 2058 return IR_LEA_IB; 2059 } 2060binop_int: 2061 if (ir_op_flags[insn->op] & IR_OP_FLAG_COMMUTATIVE) { 2062 ir_match_fuse_load_commutative_int(ctx, insn, ref); 2063 return IR_BINOP_INT | IR_MAY_SWAP; 2064 } else { 2065 ir_match_fuse_load(ctx, insn->op2, ref); 2066 return IR_BINOP_INT; 2067 } 2068 } else { 2069binop_fp: 2070 if (ir_op_flags[insn->op] & IR_OP_FLAG_COMMUTATIVE) { 2071 ir_match_fuse_load_commutative_fp(ctx, insn, ref); 2072 if (ctx->mflags & IR_X86_AVX) { 2073 return IR_BINOP_AVX; 2074 } else { 2075 return IR_BINOP_SSE2 | IR_MAY_SWAP; 2076 } 2077 } else { 2078 ir_match_fuse_load(ctx, insn->op2, ref); 2079 if (ctx->mflags & IR_X86_AVX) { 2080 return IR_BINOP_AVX; 2081 } else { 2082 return IR_BINOP_SSE2; 2083 } 2084 } 2085 } 2086 break; 2087 case IR_MUL: 2088 if (IR_IS_TYPE_INT(insn->type)) { 2089 if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { 2090 op2_insn = &ctx->ir_base[insn->op2]; 2091 if (IR_IS_SYM_CONST(op2_insn->op)) { 2092 /* pass */ 2093 } else if (IR_IS_CONST_REF(insn->op1)) { 2094 // const 2095 } else if (op2_insn->val.u64 == 0) { 2096 // 0 2097 } else if (op2_insn->val.u64 == 1) { 2098 // return IR_COPY_INT; 2099 } else if (ir_type_size[insn->type] >= 4 && 2100 (op2_insn->val.u64 == 2 || op2_insn->val.u64 == 4 || op2_insn->val.u64 == 8)) { 2101 /* MUL(X, 2|4|8) => LEA [X*2|4|8] */ 2102 return IR_LEA_SI; 2103 } else if (ir_type_size[insn->type] >= 4 && 2104 (op2_insn->val.u64 == 3 || op2_insn->val.u64 == 5 || op2_insn->val.u64 == 9)) { 2105 /* MUL(X, 3|5|9) => LEA [X+X*2|4|8] */ 2106 return IR_LEA_SIB; 2107 } else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) { 2108 /* MUL(X, PWR2) => SHL */ 2109 return IR_MUL_PWR2; 2110 } else if (IR_IS_TYPE_SIGNED(insn->type) 2111 && ir_type_size[insn->type] != 1 2112 && IR_IS_SIGNED_32BIT(op2_insn->val.i64) 2113 && !IR_IS_CONST_REF(insn->op1)) { 2114 /* MUL(_, imm32) => IMUL */ 2115 ir_match_fuse_load(ctx, insn->op1, ref); 2116 return IR_IMUL3; 2117 } 2118 } 2119 /* Prefer IMUL over MUL because it's more flexible and uses less registers ??? */ 2120// if (IR_IS_TYPE_SIGNED(insn->type) && ir_type_size[insn->type] != 1) { 2121 if (ir_type_size[insn->type] != 1) { 2122 goto binop_int; 2123 } 2124 ir_match_fuse_load(ctx, insn->op2, ref); 2125 return IR_MUL_INT; 2126 } else { 2127 goto binop_fp; 2128 } 2129 break; 2130 case IR_ADD_OV: 2131 case IR_SUB_OV: 2132 IR_ASSERT(IR_IS_TYPE_INT(insn->type)); 2133 goto binop_int; 2134 case IR_MUL_OV: 2135 IR_ASSERT(IR_IS_TYPE_INT(insn->type)); 2136 if (IR_IS_TYPE_SIGNED(insn->type) && ir_type_size[insn->type] != 1) { 2137 if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { 2138 op2_insn = &ctx->ir_base[insn->op2]; 2139 if (!IR_IS_SYM_CONST(op2_insn->op) 2140 && IR_IS_SIGNED_32BIT(op2_insn->val.i64) 2141 && !IR_IS_CONST_REF(insn->op1)) { 2142 /* MUL(_, imm32) => IMUL */ 2143 ir_match_fuse_load(ctx, insn->op1, ref); 2144 return IR_IMUL3; 2145 } 2146 } 2147 goto binop_int; 2148 } 2149 ir_match_fuse_load(ctx, insn->op2, ref); 2150 return IR_MUL_INT; 2151 case IR_DIV: 2152 if (IR_IS_TYPE_INT(insn->type)) { 2153 if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { 2154 op2_insn = &ctx->ir_base[insn->op2]; 2155 if (IR_IS_SYM_CONST(op2_insn->op)) { 2156 /* pass */ 2157 } else if (IR_IS_CONST_REF(insn->op1)) { 2158 // const 2159 } else if (op2_insn->val.u64 == 1) { 2160 // return IR_COPY_INT; 2161 } else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) { 2162 /* DIV(X, PWR2) => SHR */ 2163 if (IR_IS_TYPE_UNSIGNED(insn->type)) { 2164 return IR_DIV_PWR2; 2165 } else { 2166 return IR_SDIV_PWR2; 2167 } 2168 } 2169 } 2170 ir_match_fuse_load(ctx, insn->op2, ref); 2171 return IR_DIV_INT; 2172 } else { 2173 goto binop_fp; 2174 } 2175 break; 2176 case IR_MOD: 2177 if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { 2178 op2_insn = &ctx->ir_base[insn->op2]; 2179 if (IR_IS_SYM_CONST(op2_insn->op)) { 2180 /* pass */ 2181 } else if (IR_IS_CONST_REF(insn->op1)) { 2182 // const 2183 } else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) { 2184 /* MOD(X, PWR2) => AND */ 2185 if (IR_IS_TYPE_UNSIGNED(insn->type)) { 2186 return IR_MOD_PWR2; 2187 } else { 2188 return IR_SMOD_PWR2; 2189 } 2190 } 2191 } 2192 ir_match_fuse_load(ctx, insn->op2, ref); 2193 return IR_MOD_INT; 2194 case IR_BSWAP: 2195 case IR_NOT: 2196 if (insn->type == IR_BOOL) { 2197 IR_ASSERT(IR_IS_TYPE_INT(ctx->ir_base[insn->op1].type)); // TODO: IR_BOOL_NOT_FP 2198 return IR_BOOL_NOT_INT; 2199 } else { 2200 IR_ASSERT(IR_IS_TYPE_INT(insn->type)); 2201 return IR_OP_INT; 2202 } 2203 break; 2204 case IR_NEG: 2205 if (IR_IS_TYPE_INT(insn->type)) { 2206 return IR_OP_INT; 2207 } else { 2208 return IR_OP_FP; 2209 } 2210 case IR_ABS: 2211 if (IR_IS_TYPE_INT(insn->type)) { 2212 return IR_ABS_INT; // movl %edi, %eax; negl %eax; cmovs %edi, %eax 2213 } else { 2214 return IR_OP_FP; 2215 } 2216 case IR_OR: 2217 if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { 2218 op2_insn = &ctx->ir_base[insn->op2]; 2219 if (IR_IS_SYM_CONST(op2_insn->op)) { 2220 /* pass */ 2221 } else if (IR_IS_CONST_REF(insn->op1)) { 2222 // const 2223 } else if (op2_insn->val.i64 == 0) { 2224 // return IR_COPY_INT; 2225 } else if (op2_insn->val.i64 == -1) { 2226 // -1 2227 } 2228 } 2229 goto binop_int; 2230 case IR_AND: 2231 if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { 2232 op2_insn = &ctx->ir_base[insn->op2]; 2233 if (IR_IS_SYM_CONST(op2_insn->op)) { 2234 /* pass */ 2235 } else if (IR_IS_CONST_REF(insn->op1)) { 2236 // const 2237 } else if (op2_insn->val.i64 == 0) { 2238 // 0 2239 } else if (op2_insn->val.i64 == -1) { 2240 // return IR_COPY_INT; 2241 } 2242 } 2243 goto binop_int; 2244 case IR_XOR: 2245 if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { 2246 op2_insn = &ctx->ir_base[insn->op2]; 2247 if (IR_IS_SYM_CONST(op2_insn->op)) { 2248 /* pass */ 2249 } else if (IR_IS_CONST_REF(insn->op1)) { 2250 // const 2251 } 2252 } 2253 goto binop_int; 2254 case IR_SHL: 2255 if (IR_IS_CONST_REF(insn->op2)) { 2256 if (ctx->flags & IR_OPT_CODEGEN) { 2257 op2_insn = &ctx->ir_base[insn->op2]; 2258 if (IR_IS_SYM_CONST(op2_insn->op)) { 2259 /* pass */ 2260 } else if (IR_IS_CONST_REF(insn->op1)) { 2261 // const 2262 } else if (op2_insn->val.u64 == 0) { 2263 // return IR_COPY_INT; 2264 } else if (ir_type_size[insn->type] >= 4) { 2265 if (op2_insn->val.u64 == 1) { 2266 // lea [op1*2] 2267 } else if (op2_insn->val.u64 == 2) { 2268 // lea [op1*4] 2269 } else if (op2_insn->val.u64 == 3) { 2270 // lea [op1*8] 2271 } 2272 } 2273 } 2274 return IR_SHIFT_CONST; 2275 } 2276 return IR_SHIFT; 2277 case IR_SHR: 2278 case IR_SAR: 2279 case IR_ROL: 2280 case IR_ROR: 2281 if (IR_IS_CONST_REF(insn->op2)) { 2282 if (ctx->flags & IR_OPT_CODEGEN) { 2283 op2_insn = &ctx->ir_base[insn->op2]; 2284 if (IR_IS_SYM_CONST(op2_insn->op)) { 2285 /* pass */ 2286 } else if (IR_IS_CONST_REF(insn->op1)) { 2287 // const 2288 } else if (op2_insn->val.u64 == 0) { 2289 // return IR_COPY_INT; 2290 } 2291 } 2292 return IR_SHIFT_CONST; 2293 } 2294 return IR_SHIFT; 2295 case IR_MIN: 2296 case IR_MAX: 2297 if (IR_IS_TYPE_INT(insn->type)) { 2298 return IR_MIN_MAX_INT | IR_MAY_SWAP; 2299 } else { 2300 goto binop_fp; 2301 } 2302 break; 2303 case IR_COPY: 2304 if (IR_IS_TYPE_INT(insn->type)) { 2305 return IR_COPY_INT | IR_MAY_REUSE; 2306 } else { 2307 return IR_COPY_FP | IR_MAY_REUSE; 2308 } 2309 break; 2310 case IR_CALL: 2311 if (IR_IS_CONST_REF(insn->op2)) { 2312 const ir_insn *func = &ctx->ir_base[insn->op2]; 2313 2314 if (func->op == IR_FUNC && func->proto) { 2315 uint32_t rule = ir_match_builtin_call(ctx, func); 2316 2317 if (rule) { 2318 return rule; 2319 } 2320 } 2321 } 2322 ctx->flags2 |= IR_HAS_CALLS | IR_16B_FRAME_ALIGNMENT; 2323#ifndef IR_REG_FP_RET1 2324 if (IR_IS_TYPE_FP(insn->type)) { 2325 ctx->flags2 |= IR_HAS_FP_RET_SLOT; 2326 } 2327#endif 2328 IR_FALLTHROUGH; 2329 case IR_TAILCALL: 2330 case IR_IJMP: 2331 ir_match_fuse_load(ctx, insn->op2, ref); 2332 return insn->op; 2333 case IR_VAR: 2334 return IR_SKIPPED | IR_VAR; 2335 case IR_PARAM: 2336 return ctx->use_lists[ref].count > 0 ? IR_PARAM : IR_SKIPPED | IR_PARAM; 2337 case IR_ALLOCA: 2338 /* alloca() may be used only in functions */ 2339 if (ctx->flags & IR_FUNCTION) { 2340 if (IR_IS_CONST_REF(insn->op2) && ctx->cfg_map[ref] == 1) { 2341 ir_insn *val = &ctx->ir_base[insn->op2]; 2342 2343 if (!IR_IS_SYM_CONST(val->op)) { 2344 return IR_STATIC_ALLOCA; 2345 } 2346 } 2347 ctx->flags |= IR_USE_FRAME_POINTER; 2348 ctx->flags2 |= IR_HAS_ALLOCA | IR_16B_FRAME_ALIGNMENT; 2349 } 2350 return IR_ALLOCA; 2351 case IR_VSTORE: 2352 if (IR_IS_TYPE_INT(ctx->ir_base[insn->op3].type)) { 2353 store_rule = IR_VSTORE_INT; 2354 load_op = IR_VLOAD; 2355store_int: 2356 if ((ctx->flags & IR_OPT_CODEGEN) 2357 && ir_in_same_block(ctx, insn->op3) 2358 && (ctx->use_lists[insn->op3].count == 1 || 2359 (ctx->use_lists[insn->op3].count == 2 2360 && (ctx->ir_base[insn->op3].op == IR_ADD_OV || 2361 ctx->ir_base[insn->op3].op == IR_SUB_OV)))) { 2362 ir_insn *op_insn = &ctx->ir_base[insn->op3]; 2363 uint32_t rule = ctx->rules[insn->op3]; 2364 2365 if (!rule) { 2366 ctx->rules[insn->op3] = rule = ir_match_insn(ctx, insn->op3); 2367 } 2368 if (((rule & IR_RULE_MASK) == IR_BINOP_INT && op_insn->op != IR_MUL) || rule == IR_LEA_OB || rule == IR_LEA_IB) { 2369 if (insn->op1 == op_insn->op1 2370 && ctx->ir_base[op_insn->op1].op == load_op 2371 && ctx->ir_base[op_insn->op1].op2 == insn->op2 2372 && ctx->use_lists[op_insn->op1].count == 2) { 2373 /* l = LOAD(_, a) ... v = BINOP(l, _) ... STORE(l, a, v) => SKIP ... SKIP_MEM_BINOP ... MEM_BINOP */ 2374 ctx->rules[insn->op3] = IR_FUSED | IR_BINOP_INT; 2375 ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; 2376 if (!IR_IS_CONST_REF(op_insn->op2) 2377 && ctx->rules[op_insn->op2] == (IR_FUSED|IR_SIMPLE|IR_LOAD)) { 2378 ctx->rules[op_insn->op2] = IR_LOAD_INT; 2379 } 2380 return IR_MEM_BINOP_INT; 2381 } else if ((ir_op_flags[op_insn->op] & IR_OP_FLAG_COMMUTATIVE) 2382 && insn->op1 == op_insn->op2 2383 && ctx->ir_base[op_insn->op2].op == load_op 2384 && ctx->ir_base[op_insn->op2].op2 == insn->op2 2385 && ctx->use_lists[op_insn->op2].count == 2) { 2386 /* l = LOAD(_, a) ... v = BINOP(_, l) ... STORE(l, a, v) => SKIP ... SKIP_MEM_BINOP ... MEM_BINOP */ 2387 ir_swap_ops(op_insn); 2388 ctx->rules[insn->op3] = IR_FUSED | IR_BINOP_INT; 2389 ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; 2390 return IR_MEM_BINOP_INT; 2391 } 2392 } else if (rule == IR_INC) { 2393 if (insn->op1 == op_insn->op1 2394 && ctx->ir_base[op_insn->op1].op == load_op 2395 && ctx->ir_base[op_insn->op1].op2 == insn->op2 2396 && ctx->use_lists[op_insn->op1].count == 2) { 2397 /* l = LOAD(_, a) ... v = INC(l) ... STORE(l, a, v) => SKIP ... SKIP ... MEM_INC */ 2398 ctx->rules[insn->op3] = IR_SKIPPED | IR_INC; 2399 ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; 2400 return IR_MEM_INC; 2401 } 2402 } else if (rule == IR_DEC) { 2403 if (insn->op1 == op_insn->op1 2404 && ctx->ir_base[op_insn->op1].op == load_op 2405 && ctx->ir_base[op_insn->op1].op2 == insn->op2 2406 && ctx->use_lists[op_insn->op1].count == 2){ 2407 /* l = LOAD(_, a) ... v = DEC(l) ... STORE(l, a, v) => SKIP ... SKIP ... MEM_DEC */ 2408 ctx->rules[insn->op3] = IR_SKIPPED | IR_DEC; 2409 ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; 2410 return IR_MEM_DEC; 2411 } 2412 } else if (rule == IR_MUL_PWR2) { 2413 if (insn->op1 == op_insn->op1 2414 && ctx->ir_base[op_insn->op1].op == load_op 2415 && ctx->ir_base[op_insn->op1].op2 == insn->op2 2416 && ctx->use_lists[op_insn->op1].count == 2) { 2417 /* l = LOAD(_, a) ... v = MUL_PWR2(l) ... STORE(l, a, v) => SKIP ... SKIP ... MEM_MUL_PWR2 */ 2418 ctx->rules[insn->op3] = IR_SKIPPED | IR_MUL_PWR2; 2419 ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; 2420 return IR_MEM_MUL_PWR2; 2421 } 2422 } else if (rule == IR_DIV_PWR2) { 2423 if (insn->op1 == op_insn->op1 2424 && ctx->ir_base[op_insn->op1].op == load_op 2425 && ctx->ir_base[op_insn->op1].op2 == insn->op2 2426 && ctx->use_lists[op_insn->op1].count == 2) { 2427 /* l = LOAD(_, a) ... v = DIV_PWR2(l) ... STORE(l, a, v) => SKIP ... SKIP ... MEM_DIV_PWR2 */ 2428 ctx->rules[insn->op3] = IR_SKIPPED | IR_DIV_PWR2; 2429 ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; 2430 return IR_MEM_DIV_PWR2; 2431 } 2432 } else if (rule == IR_MOD_PWR2) { 2433 if (insn->op1 == op_insn->op1 2434 && ctx->ir_base[op_insn->op1].op == load_op 2435 && ctx->ir_base[op_insn->op1].op2 == insn->op2 2436 && ctx->use_lists[op_insn->op1].count == 2) { 2437 /* l = LOAD(_, a) ... v = MOD_PWR2(l) ... STORE(l, a, v) => SKIP ... SKIP ... MEM_MOD_PWR2 */ 2438 ctx->rules[insn->op3] = IR_SKIPPED | IR_MOD_PWR2; 2439 ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; 2440 return IR_MEM_MOD_PWR2; 2441 } 2442 } else if (rule == IR_SHIFT) { 2443 if (insn->op1 == op_insn->op1 2444 && ctx->ir_base[op_insn->op1].op == load_op 2445 && ctx->ir_base[op_insn->op1].op2 == insn->op2 2446 && ctx->use_lists[op_insn->op1].count == 2) { 2447 /* l = LOAD(_, a) ... v = SHIFT(l, _) ... STORE(l, a, v) => SKIP ... SKIP_SHIFT ... MEM_SHIFT */ 2448 ctx->rules[insn->op3] = IR_FUSED | IR_SHIFT; 2449 ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; 2450 return IR_MEM_SHIFT; 2451 } 2452 } else if (rule == IR_SHIFT_CONST) { 2453 if (insn->op1 == op_insn->op1 2454 && ctx->ir_base[op_insn->op1].op == load_op 2455 && ctx->ir_base[op_insn->op1].op2 == insn->op2 2456 && ctx->use_lists[op_insn->op1].count == 2) { 2457 /* l = LOAD(_, a) ... v = SHIFT(l, CONST) ... STORE(l, a, v) => SKIP ... SKIP ... MEM_SHIFT_CONST */ 2458 ctx->rules[insn->op3] = IR_SKIPPED | IR_SHIFT_CONST; 2459 ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; 2460 return IR_MEM_SHIFT_CONST; 2461 } 2462 } else if (rule == IR_OP_INT && op_insn->op != IR_BSWAP) { 2463 if (insn->op1 == op_insn->op1 2464 && ctx->ir_base[op_insn->op1].op == load_op 2465 && ctx->ir_base[op_insn->op1].op2 == insn->op2 2466 && ctx->use_lists[op_insn->op1].count == 2) { 2467 /* l = LOAD(_, a) ... v = OP(l) ... STORE(l, a, v) => SKIP ... SKIP ... MEM_OP */ 2468 ctx->rules[insn->op3] = IR_SKIPPED | IR_OP_INT; 2469 ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; 2470 return IR_MEM_OP_INT; 2471 } 2472 } else if (rule == IR_CMP_INT && load_op == IR_LOAD) { 2473 /* c = CMP(_, _) ... STORE(c) => SKIP_CMP ... CMP_AND_STORE_INT */ 2474 ctx->rules[insn->op3] = IR_FUSED | IR_CMP_INT; 2475 return IR_CMP_AND_STORE_INT; 2476 } 2477 } 2478 return store_rule; 2479 } else { 2480 return IR_VSTORE_FP; 2481 } 2482 break; 2483 case IR_LOAD: 2484 ir_match_fuse_addr(ctx, insn->op2); 2485 if (IR_IS_TYPE_INT(insn->type)) { 2486 return IR_LOAD_INT; 2487 } else { 2488 return IR_LOAD_FP; 2489 } 2490 break; 2491 case IR_STORE: 2492 ir_match_fuse_addr(ctx, insn->op2); 2493 if (IR_IS_TYPE_INT(ctx->ir_base[insn->op3].type)) { 2494 store_rule = IR_STORE_INT; 2495 load_op = IR_LOAD; 2496 goto store_int; 2497 } else { 2498 return IR_STORE_FP; 2499 } 2500 break; 2501 case IR_RLOAD: 2502 if (IR_REGSET_IN(IR_REGSET_UNION((ir_regset)ctx->fixed_regset, IR_REGSET_FIXED), insn->op2)) { 2503 return IR_SKIPPED | IR_RLOAD; 2504 } 2505 return IR_RLOAD; 2506 case IR_RSTORE: 2507 if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { 2508 if ((ctx->flags & IR_OPT_CODEGEN) 2509 && ir_in_same_block(ctx, insn->op2) 2510 && ctx->use_lists[insn->op2].count == 1 2511 && IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { 2512 ir_insn *op_insn = &ctx->ir_base[insn->op2]; 2513 2514 if (op_insn->op == IR_ADD || 2515 op_insn->op == IR_SUB || 2516// op_insn->op == IR_MUL || 2517 op_insn->op == IR_OR || 2518 op_insn->op == IR_AND || 2519 op_insn->op == IR_XOR) { 2520 if (insn->op1 == op_insn->op1 2521 && ctx->ir_base[op_insn->op1].op == IR_RLOAD 2522 && ctx->ir_base[op_insn->op1].op2 == insn->op3 2523 && ctx->use_lists[op_insn->op1].count == 2) { 2524 /* l = RLOAD(r) ... v = BINOP(l, _) ... RSTORE(l, r, v) => SKIP ... SKIP_REG_BINOP ... REG_BINOP */ 2525 ctx->rules[insn->op2] = IR_FUSED | IR_BINOP_INT; 2526 ctx->rules[op_insn->op1] = IR_SKIPPED | IR_RLOAD; 2527 return IR_REG_BINOP_INT; 2528 } else if ((ir_op_flags[op_insn->op] & IR_OP_FLAG_COMMUTATIVE) 2529 && insn->op1 == op_insn->op2 2530 && ctx->ir_base[op_insn->op2].op == IR_RLOAD 2531 && ctx->ir_base[op_insn->op2].op2 == insn->op3 2532 && ctx->use_lists[op_insn->op2].count == 2) { 2533 /* l = RLOAD(r) ... v = BINOP(x, l) ... RSTORE(l, r, v) => SKIP ... SKIP_REG_BINOP ... REG_BINOP */ 2534 ir_swap_ops(op_insn); 2535 ctx->rules[insn->op2] = IR_FUSED | IR_BINOP_INT; 2536 ctx->rules[op_insn->op1] = IR_SKIPPED | IR_RLOAD; 2537 return IR_REG_BINOP_INT; 2538 } 2539 } 2540 } 2541 } 2542 ir_match_fuse_load(ctx, insn->op2, ref); 2543 return IR_RSTORE; 2544 case IR_START: 2545 case IR_BEGIN: 2546 case IR_IF_TRUE: 2547 case IR_IF_FALSE: 2548 case IR_CASE_VAL: 2549 case IR_CASE_DEFAULT: 2550 case IR_MERGE: 2551 case IR_LOOP_BEGIN: 2552 case IR_UNREACHABLE: 2553 return IR_SKIPPED | insn->op; 2554 case IR_RETURN: 2555 if (!insn->op2) { 2556 return IR_RETURN_VOID; 2557 } else if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { 2558 return IR_RETURN_INT; 2559 } else { 2560 return IR_RETURN_FP; 2561 } 2562 case IR_IF: 2563 if (!IR_IS_CONST_REF(insn->op2) && ctx->use_lists[insn->op2].count == 1) { 2564 op2_insn = &ctx->ir_base[insn->op2]; 2565 if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT) { 2566 if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) { 2567 if (IR_IS_CONST_REF(op2_insn->op2) 2568 && !IR_IS_SYM_CONST(ctx->ir_base[op2_insn->op2].op) 2569 && ctx->ir_base[op2_insn->op2].val.i64 == 0 2570 && op2_insn->op1 == insn->op2 - 1) { /* previous instruction */ 2571 ir_insn *op1_insn = &ctx->ir_base[op2_insn->op1]; 2572 2573 if (op1_insn->op == IR_AND && ctx->use_lists[op2_insn->op1].count == 1) { 2574 /* v = AND(_, _); c = CMP(v, 0) ... IF(c) => SKIP_TEST; SKIP ... TEST_AND_BRANCH */ 2575 ir_match_fuse_load_test_int(ctx, op1_insn, ref); 2576 ctx->rules[op2_insn->op1] = IR_FUSED | IR_TEST_INT; 2577 ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_NOP; 2578 return IR_TEST_AND_BRANCH_INT; 2579 } else if (insn->op2 == ref - 1 && /* previous instruction */ 2580 ((op1_insn->op == IR_OR || op1_insn->op == IR_AND || op1_insn->op == IR_XOR) || 2581 /* GT(ADD(_, _), 0) can't be optimized because ADD may overflow */ 2582 ((op1_insn->op == IR_ADD || op1_insn->op == IR_SUB) && 2583 (op2_insn->op == IR_EQ || op2_insn->op == IR_NE)))) { 2584 /* v = BINOP(_, _); c = CMP(v, 0) ... IF(c) => BINOP; SKIP_CMP ... JCC */ 2585 if (ir_op_flags[op1_insn->op] & IR_OP_FLAG_COMMUTATIVE) { 2586 ir_match_fuse_load_commutative_int(ctx, op1_insn, ref); 2587 ctx->rules[op2_insn->op1] = IR_BINOP_INT | IR_MAY_SWAP; 2588 } else { 2589 ir_match_fuse_load(ctx, op1_insn->op2, ref); 2590 ctx->rules[op2_insn->op1] = IR_BINOP_INT; 2591 } 2592 ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT; 2593 return IR_JCC_INT; 2594 } 2595 } 2596 /* c = CMP(_, _) ... IF(c) => SKIP_CMP ... CMP_AND_BRANCH */ 2597 ir_match_fuse_load_cmp_int(ctx, op2_insn, ref); 2598 ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT; 2599 return IR_CMP_AND_BRANCH_INT; 2600 } else { 2601 /* c = CMP(_, _) ... IF(c) => SKIP_CMP ... CMP_AND_BRANCH */ 2602 ir_match_fuse_load_cmp_fp_br(ctx, op2_insn, ref, 1); 2603 ctx->rules[insn->op2] = IR_FUSED | IR_CMP_FP; 2604 return IR_CMP_AND_BRANCH_FP; 2605 } 2606 } else if (op2_insn->op == IR_AND) { 2607 /* c = AND(_, _) ... IF(c) => SKIP_TEST ... TEST_AND_BRANCH */ 2608 ir_match_fuse_load_test_int(ctx, op2_insn, ref); 2609 ctx->rules[insn->op2] = IR_FUSED | IR_TEST_INT; 2610 return IR_TEST_AND_BRANCH_INT; 2611 } else if (op2_insn->op == IR_OVERFLOW && ir_in_same_block(ctx, insn->op2)) { 2612 /* c = OVERFLOW(_) ... IF(c) => SKIP_OVERFLOW ... OVERFLOW_AND_BRANCH */ 2613 ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_OVERFLOW; 2614 return IR_OVERFLOW_AND_BRANCH; 2615 } 2616 } 2617 if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { 2618 if (insn->op2 == ref - 1) { /* previous instruction */ 2619 op2_insn = &ctx->ir_base[insn->op2]; 2620 if (op2_insn->op == IR_ADD || 2621 op2_insn->op == IR_SUB || 2622// op2_insn->op == IR_MUL || 2623 op2_insn->op == IR_OR || 2624 op2_insn->op == IR_AND || 2625 op2_insn->op == IR_XOR) { 2626 2627 /* v = BINOP(_, _); IF(v) => BINOP; JCC */ 2628 if (ir_op_flags[op2_insn->op] & IR_OP_FLAG_COMMUTATIVE) { 2629 ir_match_fuse_load_commutative_int(ctx, op2_insn, ref); 2630 ctx->rules[insn->op2] = IR_BINOP_INT | IR_MAY_SWAP; 2631 } else { 2632 ir_match_fuse_load(ctx, op2_insn->op2, ref); 2633 ctx->rules[insn->op2] = IR_BINOP_INT; 2634 } 2635 return IR_JCC_INT; 2636 } 2637 } else if ((ctx->flags & IR_OPT_CODEGEN) 2638 && insn->op1 == ref - 1 /* previous instruction */ 2639 && insn->op2 == ref - 2 /* previous instruction */ 2640 && ctx->use_lists[insn->op2].count == 2 2641 && IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { 2642 ir_insn *store_insn = &ctx->ir_base[insn->op1]; 2643 2644 if (store_insn->op == IR_STORE && store_insn->op3 == insn->op2) { 2645 ir_insn *op_insn = &ctx->ir_base[insn->op2]; 2646 2647 if (op_insn->op == IR_ADD || 2648 op_insn->op == IR_SUB || 2649// op_insn->op == IR_MUL || 2650 op_insn->op == IR_OR || 2651 op_insn->op == IR_AND || 2652 op_insn->op == IR_XOR) { 2653 if (ctx->ir_base[op_insn->op1].op == IR_LOAD 2654 && ctx->ir_base[op_insn->op1].op2 == store_insn->op2) { 2655 if (ir_in_same_block(ctx, op_insn->op1) 2656 && ctx->use_lists[op_insn->op1].count == 2 2657 && store_insn->op1 == op_insn->op1) { 2658 /* v = MEM_BINOP(_, _); IF(v) => MEM_BINOP; JCC */ 2659 ctx->rules[insn->op2] = IR_FUSED | IR_BINOP_INT; 2660 ctx->rules[op_insn->op1] = IR_SKIPPED | IR_LOAD; 2661 ir_match_fuse_addr(ctx, store_insn->op2); 2662 ctx->rules[insn->op1] = IR_MEM_BINOP_INT; 2663 return IR_JCC_INT; 2664 } 2665 } else if ((ir_op_flags[op_insn->op] & IR_OP_FLAG_COMMUTATIVE) 2666 && ctx->ir_base[op_insn->op2].op == IR_LOAD 2667 && ctx->ir_base[op_insn->op2].op2 == store_insn->op2) { 2668 if (ir_in_same_block(ctx, op_insn->op2) 2669 && ctx->use_lists[op_insn->op2].count == 2 2670 && store_insn->op1 == op_insn->op2) { 2671 /* v = MEM_BINOP(_, _); IF(v) => MEM_BINOP; JCC */ 2672 ir_swap_ops(op_insn); 2673 ctx->rules[insn->op2] = IR_FUSED | IR_BINOP_INT; 2674 ctx->rules[op_insn->op1] = IR_SKIPPED | IR_LOAD; 2675 ir_match_fuse_addr(ctx, store_insn->op2); 2676 ctx->rules[insn->op1] = IR_MEM_BINOP_INT; 2677 return IR_JCC_INT; 2678 } 2679 } 2680 } 2681 } 2682 } 2683 ir_match_fuse_load(ctx, insn->op2, ref); 2684 return IR_IF_INT; 2685 } else { 2686 IR_ASSERT(0 && "NIY IR_IF_FP"); 2687 break; 2688 } 2689 case IR_COND: 2690 if (!IR_IS_CONST_REF(insn->op1) && ctx->use_lists[insn->op1].count == 1) { 2691 ir_insn *op1_insn = &ctx->ir_base[insn->op1]; 2692 2693 if (op1_insn->op >= IR_EQ && op1_insn->op <= IR_UGT) { 2694 if (IR_IS_TYPE_INT(ctx->ir_base[op1_insn->op1].type)) { 2695 ir_match_fuse_load_cmp_int(ctx, op1_insn, ref); 2696 ctx->rules[insn->op1] = IR_FUSED | IR_CMP_INT; 2697 return IR_COND_CMP_INT; 2698 } else { 2699 ir_match_fuse_load_cmp_fp_br(ctx, op1_insn, ref, 1); 2700 ctx->rules[insn->op1] = IR_FUSED | IR_CMP_FP; 2701 return IR_COND_CMP_FP; 2702 } 2703 } 2704 } 2705 return IR_COND; 2706 case IR_GUARD: 2707 case IR_GUARD_NOT: 2708 if (!IR_IS_CONST_REF(insn->op2) && ctx->use_lists[insn->op2].count == 1) { 2709 op2_insn = &ctx->ir_base[insn->op2]; 2710 if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT 2711 // TODO: register allocator may clobber operands of CMP before they are used in the GUARD_CMP 2712 && (insn->op2 == ref - 1 || 2713 (insn->op2 == ctx->prev_ref[ref] - 1 2714 && ctx->ir_base[ctx->prev_ref[ref]].op == IR_SNAPSHOT))) { 2715 if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) { 2716 if (IR_IS_CONST_REF(op2_insn->op2) 2717 && !IR_IS_SYM_CONST(ctx->ir_base[op2_insn->op2].op) 2718 && ctx->ir_base[op2_insn->op2].val.i64 == 0) { 2719 if (op2_insn->op1 == insn->op2 - 1) { /* previous instruction */ 2720 ir_insn *op1_insn = &ctx->ir_base[op2_insn->op1]; 2721 2722 if ((op1_insn->op == IR_OR || op1_insn->op == IR_AND || op1_insn->op == IR_XOR) || 2723 /* GT(ADD(_, _), 0) can't be optimized because ADD may overflow */ 2724 ((op1_insn->op == IR_ADD || op1_insn->op == IR_SUB) && 2725 (op2_insn->op == IR_EQ || op2_insn->op == IR_NE))) { 2726 if (ir_op_flags[op1_insn->op] & IR_OP_FLAG_COMMUTATIVE) { 2727 ir_match_fuse_load_commutative_int(ctx, op1_insn, ref); 2728 ctx->rules[op2_insn->op1] = IR_BINOP_INT | IR_MAY_SWAP; 2729 } else { 2730 ir_match_fuse_load(ctx, op1_insn->op2, ref); 2731 ctx->rules[op2_insn->op1] = IR_BINOP_INT; 2732 } 2733 /* v = BINOP(_, _); c = CMP(v, 0) ... IF(c) => BINOP; SKIP_CMP ... GUARD_JCC */ 2734 ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT; 2735 return IR_GUARD_JCC_INT; 2736 } 2737 } else if ((ctx->flags & IR_OPT_CODEGEN) 2738 && op2_insn->op1 == insn->op2 - 2 /* before previous instruction */ 2739 && ir_in_same_block(ctx, op2_insn->op1) 2740 && ctx->use_lists[op2_insn->op1].count == 2) { 2741 ir_insn *store_insn = &ctx->ir_base[insn->op2 - 1]; 2742 2743 if (store_insn->op == IR_STORE && store_insn->op3 == op2_insn->op1) { 2744 ir_insn *op_insn = &ctx->ir_base[op2_insn->op1]; 2745 2746 if ((op_insn->op == IR_OR || op_insn->op == IR_AND || op_insn->op == IR_XOR) || 2747 /* GT(ADD(_, _), 0) can't be optimized because ADD may overflow */ 2748 ((op_insn->op == IR_ADD || op_insn->op == IR_SUB) && 2749 (op2_insn->op == IR_EQ || op2_insn->op == IR_NE))) { 2750 if (ctx->ir_base[op_insn->op1].op == IR_LOAD 2751 && ctx->ir_base[op_insn->op1].op2 == store_insn->op2) { 2752 if (ir_in_same_block(ctx, op_insn->op1) 2753 && ctx->use_lists[op_insn->op1].count == 2 2754 && store_insn->op1 == op_insn->op1) { 2755 /* v = MEM_BINOP(_, _); IF(v) => MEM_BINOP; GUARD_JCC */ 2756 ctx->rules[op2_insn->op1] = IR_FUSED | IR_BINOP_INT; 2757 ctx->rules[op_insn->op1] = IR_SKIPPED | IR_LOAD; 2758 ir_match_fuse_addr(ctx, store_insn->op2); 2759 ctx->rules[insn->op2 - 1] = IR_MEM_BINOP_INT; 2760 ctx->rules[insn->op2] = IR_SKIPPED | IR_NOP; 2761 return IR_GUARD_JCC_INT; 2762 } 2763 } else if ((ir_op_flags[op_insn->op] & IR_OP_FLAG_COMMUTATIVE) 2764 && ctx->ir_base[op_insn->op2].op == IR_LOAD 2765 && ctx->ir_base[op_insn->op2].op2 == store_insn->op2) { 2766 if (ir_in_same_block(ctx, op_insn->op2) 2767 && ctx->use_lists[op_insn->op2].count == 2 2768 && store_insn->op1 == op_insn->op2) { 2769 /* v = MEM_BINOP(_, _); IF(v) => MEM_BINOP; JCC */ 2770 ir_swap_ops(op_insn); 2771 ctx->rules[op2_insn->op1] = IR_FUSED | IR_BINOP_INT; 2772 ctx->rules[op_insn->op1] = IR_SKIPPED | IR_LOAD; 2773 ir_match_fuse_addr(ctx, store_insn->op2); 2774 ctx->rules[insn->op2 - 1] = IR_MEM_BINOP_INT; 2775 ctx->rules[insn->op2] = IR_SKIPPED | IR_NOP; 2776 return IR_GUARD_JCC_INT; 2777 } 2778 } 2779 } 2780 } 2781 } 2782 } 2783 /* c = CMP(_, _) ... GUARD(c) => SKIP_CMP ... GUARD_CMP */ 2784 ir_match_fuse_load_cmp_int(ctx, op2_insn, ref); 2785 ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT; 2786 return IR_GUARD_CMP_INT; 2787 } else { 2788 /* c = CMP(_, _) ... GUARD(c) => SKIP_CMP ... GUARD_CMP */ 2789 ir_match_fuse_load_cmp_fp_br(ctx, op2_insn, ref, insn->op == IR_GUARD_NOT); 2790 ctx->rules[insn->op2] = IR_FUSED | IR_CMP_FP; 2791 return IR_GUARD_CMP_FP; 2792 } 2793 } else if (op2_insn->op == IR_AND) { // TODO: OR, XOR. etc 2794 /* c = AND(_, _) ... GUARD(c) => SKIP_TEST ... GUARD_TEST */ 2795 ir_match_fuse_load_test_int(ctx, op2_insn, ref); 2796 ctx->rules[insn->op2] = IR_FUSED | IR_TEST_INT; 2797 return IR_GUARD_TEST_INT; 2798 } else if (op2_insn->op == IR_OVERFLOW && ir_in_same_block(ctx, insn->op2)) { 2799 /* c = OVERFLOW(_) ... GUARD(c) => SKIP_OVERFLOW ... GUARD_OVERFLOW */ 2800 ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_OVERFLOW; 2801 return IR_GUARD_OVERFLOW; 2802 } 2803 } 2804 ir_match_fuse_load(ctx, insn->op2, ref); 2805 return insn->op; 2806 case IR_INT2FP: 2807 if (ir_type_size[ctx->ir_base[insn->op1].type] > (IR_IS_TYPE_SIGNED(ctx->ir_base[insn->op1].type) ? 2 : 4)) { 2808 ir_match_fuse_load(ctx, insn->op1, ref); 2809 } 2810 return insn->op; 2811 case IR_SEXT: 2812 case IR_ZEXT: 2813 case IR_FP2INT: 2814 case IR_FP2FP: 2815 ir_match_fuse_load(ctx, insn->op1, ref); 2816 return insn->op; 2817 case IR_TRUNC: 2818 case IR_PROTO: 2819 ir_match_fuse_load(ctx, insn->op1, ref); 2820 return insn->op | IR_MAY_REUSE; 2821 case IR_BITCAST: 2822 ir_match_fuse_load(ctx, insn->op1, ref); 2823 if (IR_IS_TYPE_INT(insn->type) && IR_IS_TYPE_INT(ctx->ir_base[insn->op1].type)) { 2824 return insn->op | IR_MAY_REUSE; 2825 } else { 2826 return insn->op; 2827 } 2828 case IR_CTLZ: 2829 case IR_CTTZ: 2830 ir_match_fuse_load(ctx, insn->op1, ref); 2831 return IR_BIT_COUNT; 2832 case IR_CTPOP: 2833 ir_match_fuse_load(ctx, insn->op1, ref); 2834 return (ctx->mflags & IR_X86_BMI1) ? IR_BIT_COUNT : IR_CTPOP; 2835 case IR_VA_START: 2836 ctx->flags2 |= IR_HAS_VA_START; 2837 if ((ctx->ir_base[insn->op2].op == IR_ALLOCA) || (ctx->ir_base[insn->op2].op == IR_VADDR)) { 2838 ir_use_list *use_list = &ctx->use_lists[insn->op2]; 2839 ir_ref *p, n = use_list->count; 2840 for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { 2841 ir_insn *use_insn = &ctx->ir_base[*p]; 2842 if (use_insn->op == IR_VA_START || use_insn->op == IR_VA_END) { 2843 } else if (use_insn->op == IR_VA_COPY) { 2844 if (use_insn->op3 == insn->op2) { 2845 ctx->flags2 |= IR_HAS_VA_COPY; 2846 } 2847 } else if (use_insn->op == IR_VA_ARG) { 2848 if (use_insn->op2 == insn->op2) { 2849 if (IR_IS_TYPE_INT(use_insn->type)) { 2850 ctx->flags2 |= IR_HAS_VA_ARG_GP; 2851 } else { 2852 IR_ASSERT(IR_IS_TYPE_FP(use_insn->type)); 2853 ctx->flags2 |= IR_HAS_VA_ARG_FP; 2854 } 2855 } 2856 } else if (*p > ref) { 2857 /* diriect va_list access */ 2858 ctx->flags2 |= IR_HAS_VA_ARG_GP|IR_HAS_VA_ARG_FP; 2859 } 2860 } 2861 } 2862 return IR_VA_START; 2863 case IR_VA_END: 2864 return IR_SKIPPED | IR_NOP; 2865 case IR_VADDR: 2866 if (ctx->use_lists[ref].count > 0) { 2867 ir_use_list *use_list = &ctx->use_lists[ref]; 2868 ir_ref *p, n = use_list->count; 2869 2870 for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { 2871 if (ctx->ir_base[*p].op != IR_VA_END) { 2872 return IR_STATIC_ALLOCA; 2873 } 2874 } 2875 } 2876 return IR_SKIPPED | IR_NOP; 2877 default: 2878 break; 2879 } 2880 2881 return insn->op; 2882} 2883 2884static void ir_match_insn2(ir_ctx *ctx, ir_ref ref, uint32_t rule) 2885{ 2886 if (rule == IR_LEA_IB) { 2887 ir_match_try_revert_lea_to_add(ctx, ref); 2888 } 2889} 2890 2891/* code generation */ 2892static int32_t ir_ref_spill_slot_offset(ir_ctx *ctx, ir_ref ref, ir_reg *reg) 2893{ 2894 int32_t offset; 2895 2896 IR_ASSERT(ref >= 0 && ctx->vregs[ref] && ctx->live_intervals[ctx->vregs[ref]]); 2897 offset = ctx->live_intervals[ctx->vregs[ref]]->stack_spill_pos; 2898 IR_ASSERT(offset != -1); 2899 if (ctx->live_intervals[ctx->vregs[ref]]->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) { 2900 IR_ASSERT(ctx->spill_base != IR_REG_NONE); 2901 *reg = ctx->spill_base; 2902 return offset; 2903 } 2904 *reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 2905 return IR_SPILL_POS_TO_OFFSET(offset); 2906} 2907 2908static ir_mem ir_vreg_spill_slot(ir_ctx *ctx, ir_ref v) 2909{ 2910 int32_t offset; 2911 ir_reg base; 2912 2913 IR_ASSERT(v > 0 && v <= ctx->vregs_count && ctx->live_intervals[v]); 2914 offset = ctx->live_intervals[v]->stack_spill_pos; 2915 IR_ASSERT(offset != -1); 2916 if (ctx->live_intervals[v]->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) { 2917 IR_ASSERT(ctx->spill_base != IR_REG_NONE); 2918 return IR_MEM_BO(ctx->spill_base, offset); 2919 } 2920 base = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 2921 offset = IR_SPILL_POS_TO_OFFSET(offset); 2922 return IR_MEM_BO(base, offset); 2923} 2924 2925static ir_mem ir_ref_spill_slot(ir_ctx *ctx, ir_ref ref) 2926{ 2927 IR_ASSERT(!IR_IS_CONST_REF(ref)); 2928 return ir_vreg_spill_slot(ctx, ctx->vregs[ref]); 2929} 2930 2931static bool ir_is_same_spill_slot(ir_ctx *ctx, ir_ref ref, ir_mem mem) 2932{ 2933 ir_mem m = ir_ref_spill_slot(ctx, ref); 2934 return IR_MEM_VAL(m) == IR_MEM_VAL(mem); 2935} 2936 2937static ir_mem ir_var_spill_slot(ir_ctx *ctx, ir_ref ref) 2938{ 2939 ir_insn *var_insn = &ctx->ir_base[ref]; 2940 ir_reg reg; 2941 2942 IR_ASSERT(var_insn->op == IR_VAR); 2943 reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 2944 return IR_MEM_BO(reg, IR_SPILL_POS_TO_OFFSET(var_insn->op3)); 2945} 2946 2947static bool ir_may_avoid_spill_load(ir_ctx *ctx, ir_ref ref, ir_ref use) 2948{ 2949 ir_live_interval *ival; 2950 2951 IR_ASSERT(ctx->vregs[ref] && ctx->live_intervals[ctx->vregs[ref]]); 2952 ival = ctx->live_intervals[ctx->vregs[ref]]; 2953 while (ival) { 2954 ir_use_pos *use_pos = ival->use_pos; 2955 while (use_pos) { 2956 if (IR_LIVE_POS_TO_REF(use_pos->pos) == use) { 2957 return !use_pos->next || use_pos->next->op_num == 0; 2958 } 2959 use_pos = use_pos->next; 2960 } 2961 ival = ival->next; 2962 } 2963 return 0; 2964} 2965 2966static void ir_emit_mov_imm_int(ir_ctx *ctx, ir_type type, ir_reg reg, int64_t val) 2967{ 2968 ir_backend_data *data = ctx->data; 2969 dasm_State **Dst = &data->dasm_state; 2970 2971 if (ir_type_size[type] == 8) { 2972 IR_ASSERT(sizeof(void*) == 8); 2973|.if X64 2974 if (IR_IS_UNSIGNED_32BIT(val)) { 2975 | mov Rd(reg), (uint32_t)val // zero extended load 2976 } else if (IR_IS_SIGNED_32BIT(val)) { 2977 | mov Rq(reg), (int32_t)val // sign extended load 2978 } else if (type == IR_ADDR && IR_MAY_USE_32BIT_ADDR(ctx->code_buffer, (intptr_t)val)) { 2979 | lea Ra(reg), [&val] 2980 } else { 2981 | mov64 Ra(reg), val 2982 } 2983|.endif 2984 } else { 2985 | ASM_REG_IMM_OP mov, type, reg, (int32_t)val // sign extended load 2986 } 2987} 2988 2989static void ir_emit_load_imm_int(ir_ctx *ctx, ir_type type, ir_reg reg, int64_t val) 2990{ 2991 ir_backend_data *data = ctx->data; 2992 dasm_State **Dst = &data->dasm_state; 2993 2994 IR_ASSERT(IR_IS_TYPE_INT(type)); 2995 if (val == 0) { 2996 | ASM_REG_REG_OP xor, type, reg, reg 2997 } else { 2998 ir_emit_mov_imm_int(ctx, type, reg, val); 2999 } 3000} 3001 3002static void ir_emit_load_mem_int(ir_ctx *ctx, ir_type type, ir_reg reg, ir_mem mem) 3003{ 3004 ir_backend_data *data = ctx->data; 3005 dasm_State **Dst = &data->dasm_state; 3006 3007 | ASM_REG_MEM_OP mov, type, reg, mem 3008} 3009 3010static void ir_emit_load_imm_fp(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src) 3011{ 3012 ir_backend_data *data = ctx->data; 3013 dasm_State **Dst = &data->dasm_state; 3014 ir_insn *insn = &ctx->ir_base[src]; 3015 int label; 3016 3017 if (type == IR_FLOAT && insn->val.u32 == 0) { 3018 if (ctx->mflags & IR_X86_AVX) { 3019 | vxorps xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST) 3020 } else { 3021 | xorps xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST) 3022 } 3023 } else if (type == IR_DOUBLE && insn->val.u64 == 0) { 3024 if (ctx->mflags & IR_X86_AVX) { 3025 | vxorpd xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST) 3026 } else { 3027 | xorpd xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST) 3028 } 3029 } else { 3030 label = ir_const_label(ctx, src); 3031 | ASM_FP_REG_TXT_OP movs, type, reg, [=>label] 3032 } 3033} 3034 3035static void ir_emit_load_mem_fp(ir_ctx *ctx, ir_type type, ir_reg reg, ir_mem mem) 3036{ 3037 ir_backend_data *data = ctx->data; 3038 dasm_State **Dst = &data->dasm_state; 3039 3040 | ASM_FP_REG_MEM_OP movs, type, reg, mem 3041} 3042 3043static void ir_emit_load_mem(ir_ctx *ctx, ir_type type, ir_reg reg, ir_mem mem) 3044{ 3045 if (IR_IS_TYPE_INT(type)) { 3046 ir_emit_load_mem_int(ctx, type, reg, mem); 3047 } else { 3048 ir_emit_load_mem_fp(ctx, type, reg, mem); 3049 } 3050} 3051 3052static void ir_load_local_addr(ir_ctx *ctx, ir_reg reg, ir_ref src) 3053{ 3054 ir_backend_data *data = ctx->data; 3055 dasm_State **Dst = &data->dasm_state; 3056 ir_reg base = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 3057 int32_t offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[src].op3); 3058 3059 IR_ASSERT(ir_rule(ctx, src) == IR_STATIC_ALLOCA); 3060 if (offset == 0) { 3061 | mov Ra(reg), Ra(base) 3062 } else { 3063 | lea Ra(reg), [Ra(base)+offset] 3064 } 3065} 3066 3067static void ir_emit_load(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src) 3068{ 3069 if (IR_IS_CONST_REF(src)) { 3070 if (IR_IS_TYPE_INT(type)) { 3071 ir_insn *insn = &ctx->ir_base[src]; 3072 3073 if (insn->op == IR_SYM || insn->op == IR_FUNC) { 3074 void *addr = ir_sym_val(ctx, insn); 3075 ir_emit_load_imm_int(ctx, type, reg, (intptr_t)addr); 3076 } else if (insn->op == IR_STR) { 3077 ir_backend_data *data = ctx->data; 3078 dasm_State **Dst = &data->dasm_state; 3079 int label = ir_const_label(ctx, src); 3080 3081 | lea Ra(reg), aword [=>label] 3082 } else { 3083 ir_emit_load_imm_int(ctx, type, reg, insn->val.i64); 3084 } 3085 } else { 3086 ir_emit_load_imm_fp(ctx, type, reg, src); 3087 } 3088 } else if (ctx->vregs[src]) { 3089 ir_emit_load_mem(ctx, type, reg, ir_ref_spill_slot(ctx, src)); 3090 } else { 3091 ir_load_local_addr(ctx, reg, src); 3092 } 3093} 3094 3095static void ir_emit_store_mem_int(ir_ctx *ctx, ir_type type, ir_mem mem, ir_reg reg) 3096{ 3097 ir_backend_data *data = ctx->data; 3098 dasm_State **Dst = &data->dasm_state; 3099 3100 | ASM_MEM_REG_OP mov, type, mem, reg 3101} 3102 3103static void ir_emit_store_mem_fp(ir_ctx *ctx, ir_type type, ir_mem mem, ir_reg reg) 3104{ 3105 ir_backend_data *data = ctx->data; 3106 dasm_State **Dst = &data->dasm_state; 3107 3108 | ASM_FP_MEM_REG_OP movs, type, mem, reg 3109} 3110 3111static void ir_emit_store_mem_imm(ir_ctx *ctx, ir_type type, ir_mem mem, int32_t imm) 3112{ 3113 ir_backend_data *data = ctx->data; 3114 dasm_State **Dst = &data->dasm_state; 3115 3116 | ASM_MEM_IMM_OP mov, type, mem, imm 3117} 3118 3119static void ir_emit_store_mem_int_const(ir_ctx *ctx, ir_type type, ir_mem mem, ir_ref src, ir_reg tmp_reg, bool is_arg) 3120{ 3121 ir_backend_data *data = ctx->data; 3122 dasm_State **Dst = &data->dasm_state; 3123 ir_insn *val_insn = &ctx->ir_base[src]; 3124 3125 IR_ASSERT(IR_IS_CONST_REF(src)); 3126 if (val_insn->op == IR_STR) { 3127 int label = ir_const_label(ctx, src); 3128 3129 IR_ASSERT(tmp_reg != IR_REG_NONE); 3130|.if X64 3131 | lea Ra(tmp_reg), aword [=>label] 3132|| ir_emit_store_mem_int(ctx, type, mem, tmp_reg); 3133|.else 3134 | ASM_TMEM_TXT_OP mov, aword, mem, =>label 3135|.endif 3136 } else { 3137 int64_t val = val_insn->val.i64; 3138 3139 if (val_insn->op == IR_FUNC || val_insn->op == IR_SYM) { 3140 val = (int64_t)(intptr_t)ir_sym_val(ctx, val_insn); 3141 } 3142 3143 if (sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(val)) { 3144 if (is_arg && ir_type_size[type] < 4) { 3145 type = IR_U32; 3146 } 3147 ir_emit_store_mem_imm(ctx, type, mem, val); 3148 } else { 3149 IR_ASSERT(tmp_reg != IR_REG_NONE); 3150 tmp_reg = IR_REG_NUM(tmp_reg); 3151 ir_emit_load_imm_int(ctx, type, tmp_reg, val); 3152 ir_emit_store_mem_int(ctx, type, mem, tmp_reg); 3153 } 3154 } 3155} 3156 3157static void ir_emit_store_mem_fp_const(ir_ctx *ctx, ir_type type, ir_mem mem, ir_ref src, ir_reg tmp_reg, ir_reg tmp_fp_reg) 3158{ 3159 ir_val *val = &ctx->ir_base[src].val; 3160 3161 if (type == IR_FLOAT) { 3162 ir_emit_store_mem_imm(ctx, IR_U32, mem, val->i32); 3163 } else if (sizeof(void*) == 8 && val->i64 == 0) { 3164 ir_emit_store_mem_imm(ctx, IR_U64, mem, 0); 3165 } else if (sizeof(void*) == 8 && tmp_reg != IR_REG_NONE) { 3166 ir_emit_load_imm_int(ctx, IR_U64, tmp_reg, val->i64); 3167 ir_emit_store_mem_int(ctx, IR_U64, mem, tmp_reg); 3168 } else { 3169 tmp_fp_reg = IR_REG_NUM(tmp_fp_reg); 3170 ir_emit_load(ctx, type, tmp_fp_reg, src); 3171 ir_emit_store_mem_fp(ctx, IR_DOUBLE, mem, tmp_fp_reg); 3172 } 3173} 3174 3175static void ir_emit_store_mem(ir_ctx *ctx, ir_type type, ir_mem mem, ir_reg reg) 3176{ 3177 if (IR_IS_TYPE_INT(type)) { 3178 ir_emit_store_mem_int(ctx, type, mem, reg); 3179 } else { 3180 ir_emit_store_mem_fp(ctx, type, mem, reg); 3181 } 3182} 3183 3184static void ir_emit_store(ir_ctx *ctx, ir_type type, ir_ref dst, ir_reg reg) 3185{ 3186 IR_ASSERT(dst >= 0); 3187 ir_emit_store_mem(ctx, type, ir_ref_spill_slot(ctx, dst), reg); 3188} 3189 3190static void ir_emit_mov(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src) 3191{ 3192 ir_backend_data *data = ctx->data; 3193 dasm_State **Dst = &data->dasm_state; 3194 3195 | ASM_REG_REG_OP mov, type, dst, src 3196} 3197 3198#define IR_HAVE_SWAP_INT 3199 3200static void ir_emit_swap(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src) 3201{ 3202 ir_backend_data *data = ctx->data; 3203 dasm_State **Dst = &data->dasm_state; 3204 3205 | ASM_REG_REG_OP xchg, type, dst, src 3206} 3207 3208static void ir_emit_mov_ext(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src) 3209{ 3210 ir_backend_data *data = ctx->data; 3211 dasm_State **Dst = &data->dasm_state; 3212 3213 if (ir_type_size[type] > 2) { 3214 | ASM_REG_REG_OP mov, type, dst, src 3215 } else if (ir_type_size[type] == 2) { 3216 if (IR_IS_TYPE_SIGNED(type)) { 3217 | movsx Rd(dst), Rw(src) 3218 } else { 3219 | movzx Rd(dst), Rw(src) 3220 } 3221 } else /* if (ir_type_size[type] == 1) */ { 3222 if (IR_IS_TYPE_SIGNED(type)) { 3223 | movsx Rd(dst), Rb(src) 3224 } else { 3225 | movzx Rd(dst), Rb(src) 3226 } 3227 } 3228} 3229 3230static void ir_emit_fp_mov(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src) 3231{ 3232 ir_backend_data *data = ctx->data; 3233 dasm_State **Dst = &data->dasm_state; 3234 3235 | ASM_FP_REG_REG_OP movap, type, dst, src 3236} 3237 3238static ir_mem ir_fuse_addr_const(ir_ctx *ctx, ir_ref ref) 3239{ 3240 ir_mem mem; 3241 ir_insn *addr_insn = &ctx->ir_base[ref]; 3242 3243 IR_ASSERT(IR_IS_CONST_REF(ref)); 3244 if (IR_IS_SYM_CONST(addr_insn->op)) { 3245 void *addr = ir_sym_val(ctx, addr_insn); 3246 IR_ASSERT(sizeof(void*) == 4 || IR_IS_SIGNED_32BIT((intptr_t)addr)); 3247 mem = IR_MEM_O((int32_t)(intptr_t)addr); 3248 } else { 3249 IR_ASSERT(sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(addr_insn->val.i64)); 3250 mem = IR_MEM_O(addr_insn->val.i32); 3251 } 3252 return mem; 3253} 3254 3255static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref) 3256{ 3257 uint32_t rule = ctx->rules[ref]; 3258 ir_insn *insn = &ctx->ir_base[ref]; 3259 ir_insn *op1_insn, *op2_insn, *offset_insn; 3260 ir_ref base_reg_ref, index_reg_ref; 3261 ir_reg base_reg = IR_REG_NONE, index_reg; 3262 int32_t offset = 0, scale; 3263 3264 IR_ASSERT(((rule & IR_RULE_MASK) >= IR_LEA_OB && 3265 (rule & IR_RULE_MASK) <= IR_LEA_SI_B) || 3266 rule == IR_STATIC_ALLOCA); 3267 switch (rule & IR_RULE_MASK) { 3268 default: 3269 IR_ASSERT(0); 3270 case IR_LEA_OB: 3271 offset_insn = insn; 3272 if (ir_rule(ctx, insn->op1) == IR_STATIC_ALLOCA) { 3273 offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op1].op3); 3274 base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 3275 base_reg_ref = IR_UNUSED; 3276 } else { 3277 base_reg_ref = ref * sizeof(ir_ref) + 1; 3278 } 3279 index_reg_ref = IR_UNUSED; 3280 scale = 1; 3281 break; 3282 case IR_LEA_SI: 3283 scale = ctx->ir_base[insn->op2].val.i32; 3284 index_reg_ref = ref * sizeof(ir_ref) + 1; 3285 base_reg_ref = IR_UNUSED; 3286 offset_insn = NULL; 3287 break; 3288 case IR_LEA_SIB: 3289 base_reg_ref = index_reg_ref = ref * sizeof(ir_ref) + 1; 3290 scale = ctx->ir_base[insn->op2].val.i32 - 1; 3291 offset_insn = NULL; 3292 break; 3293 case IR_LEA_IB: 3294 if (ir_rule(ctx, insn->op1) == IR_STATIC_ALLOCA) { 3295 offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op1].op3); 3296 base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 3297 base_reg_ref = IR_UNUSED; 3298 index_reg_ref = ref * sizeof(ir_ref) + 2; 3299 } else if (ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) { 3300 offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); 3301 base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 3302 base_reg_ref = IR_UNUSED; 3303 index_reg_ref = ref * sizeof(ir_ref) + 1; 3304 } else { 3305 base_reg_ref = ref * sizeof(ir_ref) + 1; 3306 index_reg_ref = ref * sizeof(ir_ref) + 2; 3307 } 3308 offset_insn = NULL; 3309 scale = 1; 3310 break; 3311 case IR_LEA_OB_I: 3312 op1_insn = &ctx->ir_base[insn->op1]; 3313 offset_insn = op1_insn; 3314 scale = 1; 3315 if (ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) { 3316 offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); 3317 base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 3318 base_reg_ref = IR_UNUSED; 3319 index_reg_ref = insn->op1 * sizeof(ir_ref) + 1; 3320 } else if (ir_rule(ctx, op1_insn->op1) == IR_STATIC_ALLOCA) { 3321 offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[op1_insn->op1].op3); 3322 base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 3323 base_reg_ref = IR_UNUSED; 3324 index_reg_ref = ref * sizeof(ir_ref) + 2; 3325 } else { 3326 base_reg_ref = insn->op1 * sizeof(ir_ref) + 1; 3327 index_reg_ref = ref * sizeof(ir_ref) + 2; 3328 } 3329 break; 3330 case IR_LEA_I_OB: 3331 op2_insn = &ctx->ir_base[insn->op2]; 3332 offset_insn = op2_insn; 3333 scale = 1; 3334 if (ir_rule(ctx, insn->op1) == IR_STATIC_ALLOCA) { 3335 offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op1].op3); 3336 base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 3337 base_reg_ref = IR_UNUSED; 3338 index_reg_ref = insn->op2 * sizeof(ir_ref) + 1; 3339 } else if (ir_rule(ctx, op2_insn->op1) == IR_STATIC_ALLOCA) { 3340 offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[op2_insn->op1].op3); 3341 base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 3342 base_reg_ref = IR_UNUSED; 3343 index_reg_ref = ref * sizeof(ir_ref) + 1; 3344 } else { 3345 base_reg_ref = ref * sizeof(ir_ref) + 1; 3346 index_reg_ref = insn->op2 * sizeof(ir_ref) + 1; 3347 } 3348 break; 3349 case IR_LEA_SI_O: 3350 index_reg_ref = insn->op1 * sizeof(ir_ref) + 1; 3351 op1_insn = &ctx->ir_base[insn->op1]; 3352 scale = ctx->ir_base[op1_insn->op2].val.i32; 3353 offset_insn = insn; 3354 base_reg_ref = IR_UNUSED; 3355 break; 3356 case IR_LEA_SIB_O: 3357 base_reg_ref = index_reg_ref = insn->op1 * sizeof(ir_ref) + 1; 3358 op1_insn = &ctx->ir_base[insn->op1]; 3359 scale = ctx->ir_base[op1_insn->op2].val.i32 - 1; 3360 offset_insn = insn; 3361 break; 3362 case IR_LEA_IB_O: 3363 base_reg_ref = insn->op1 * sizeof(ir_ref) + 1; 3364 index_reg_ref = insn->op1 * sizeof(ir_ref) + 2; 3365 offset_insn = insn; 3366 scale = 1; 3367 break; 3368 case IR_LEA_OB_SI: 3369 index_reg_ref = insn->op2 * sizeof(ir_ref) + 1; 3370 op1_insn = &ctx->ir_base[insn->op1]; 3371 offset_insn = op1_insn; 3372 op2_insn = &ctx->ir_base[insn->op2]; 3373 scale = ctx->ir_base[op2_insn->op2].val.i32; 3374 if (ir_rule(ctx, op1_insn->op1) == IR_STATIC_ALLOCA) { 3375 offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[op1_insn->op1].op3); 3376 base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 3377 base_reg_ref = IR_UNUSED; 3378 } else { 3379 base_reg_ref = insn->op1 * sizeof(ir_ref) + 1; 3380 } 3381 break; 3382 case IR_LEA_SI_OB: 3383 index_reg_ref = insn->op1 * sizeof(ir_ref) + 1; 3384 op1_insn = &ctx->ir_base[insn->op1]; 3385 scale = ctx->ir_base[op1_insn->op2].val.i32; 3386 op2_insn = &ctx->ir_base[insn->op2]; 3387 offset_insn = op2_insn; 3388 if (ir_rule(ctx, op2_insn->op1) == IR_STATIC_ALLOCA) { 3389 offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[op2_insn->op1].op3); 3390 base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 3391 base_reg_ref = IR_UNUSED; 3392 } else { 3393 base_reg_ref = insn->op2 * sizeof(ir_ref) + 1; 3394 } 3395 break; 3396 case IR_LEA_B_SI: 3397 if (ir_rule(ctx, insn->op1) == IR_STATIC_ALLOCA) { 3398 offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op1].op3); 3399 base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 3400 base_reg_ref = IR_UNUSED; 3401 } else { 3402 base_reg_ref = ref * sizeof(ir_ref) + 1; 3403 } 3404 index_reg_ref = insn->op2 * sizeof(ir_ref) + 1; 3405 op2_insn = &ctx->ir_base[insn->op2]; 3406 scale = ctx->ir_base[op2_insn->op2].val.i32; 3407 offset_insn = NULL; 3408 break; 3409 case IR_LEA_SI_B: 3410 index_reg_ref = insn->op1 * sizeof(ir_ref) + 1; 3411 if (ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) { 3412 offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); 3413 base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 3414 base_reg_ref = IR_UNUSED; 3415 } else { 3416 base_reg_ref = ref * sizeof(ir_ref) + 2; 3417 } 3418 op1_insn = &ctx->ir_base[insn->op1]; 3419 scale = ctx->ir_base[op1_insn->op2].val.i32; 3420 offset_insn = NULL; 3421 break; 3422 case IR_ALLOCA: 3423 offset = IR_SPILL_POS_TO_OFFSET(insn->op3); 3424 base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 3425 base_reg_ref = index_reg_ref = IR_UNUSED; 3426 scale = 1; 3427 offset_insn = NULL; 3428 break; 3429 } 3430 3431 if (offset_insn) { 3432 ir_insn *addr_insn = &ctx->ir_base[offset_insn->op2]; 3433 3434 if (IR_IS_SYM_CONST(addr_insn->op)) { 3435 void *addr = ir_sym_val(ctx, addr_insn); 3436 IR_ASSERT(sizeof(void*) != 8 || IR_IS_SIGNED_32BIT((intptr_t)addr)); 3437 offset += (int64_t)(intptr_t)(addr); 3438 } else { 3439 if (offset_insn->op == IR_SUB) { 3440 offset = -addr_insn->val.i32; 3441 } else { 3442 offset += addr_insn->val.i32; 3443 } 3444 } 3445 } 3446 3447 if (base_reg_ref) { 3448 if (UNEXPECTED(ctx->rules[base_reg_ref / sizeof(ir_ref)] & IR_FUSED_REG)) { 3449 base_reg = ir_get_fused_reg(ctx, root, base_reg_ref); 3450 } else { 3451 base_reg = ((int8_t*)ctx->regs)[base_reg_ref]; 3452 } 3453 IR_ASSERT(base_reg != IR_REG_NONE); 3454 if (IR_REG_SPILLED(base_reg)) { 3455 base_reg = IR_REG_NUM(base_reg); 3456 ir_emit_load(ctx, insn->type, base_reg, ((ir_ref*)ctx->ir_base)[base_reg_ref]); 3457 } 3458 } 3459 3460 index_reg = IR_REG_NONE; 3461 if (index_reg_ref) { 3462 if (base_reg_ref 3463 && ((ir_ref*)ctx->ir_base)[index_reg_ref] 3464 == ((ir_ref*)ctx->ir_base)[base_reg_ref]) { 3465 index_reg = base_reg; 3466 } else { 3467 if (UNEXPECTED(ctx->rules[index_reg_ref / sizeof(ir_ref)] & IR_FUSED_REG)) { 3468 index_reg = ir_get_fused_reg(ctx, root, index_reg_ref); 3469 } else { 3470 index_reg = ((int8_t*)ctx->regs)[index_reg_ref]; 3471 } 3472 IR_ASSERT(index_reg != IR_REG_NONE); 3473 if (IR_REG_SPILLED(index_reg)) { 3474 index_reg = IR_REG_NUM(index_reg); 3475 ir_emit_load(ctx, insn->type, index_reg, ((ir_ref*)ctx->ir_base)[index_reg_ref]); 3476 } 3477 } 3478 } 3479 3480 return IR_MEM(base_reg, offset, index_reg, scale); 3481} 3482 3483static ir_mem ir_fuse_mem(ir_ctx *ctx, ir_ref root, ir_ref ref, ir_insn *mem_insn, ir_reg reg) 3484{ 3485 if (reg != IR_REG_NONE) { 3486 if (IR_REG_SPILLED(reg)) { 3487 reg = IR_REG_NUM(reg); 3488 ir_emit_load(ctx, IR_ADDR, reg, mem_insn->op2); 3489 } 3490 return IR_MEM_B(reg); 3491 } else if (IR_IS_CONST_REF(mem_insn->op2)) { 3492 return ir_fuse_addr_const(ctx, mem_insn->op2); 3493 } else { 3494 return ir_fuse_addr(ctx, root, mem_insn->op2); 3495 } 3496} 3497 3498static ir_mem ir_fuse_load(ir_ctx *ctx, ir_ref root, ir_ref ref) 3499{ 3500 ir_insn *load_insn = &ctx->ir_base[ref]; 3501 ir_reg reg; 3502 3503 IR_ASSERT(load_insn->op == IR_LOAD); 3504 if (UNEXPECTED(ctx->rules[ref] & IR_FUSED_REG)) { 3505 reg = ir_get_fused_reg(ctx, root, ref * sizeof(ir_ref) + 2); 3506 } else { 3507 reg = ctx->regs[ref][2]; 3508 } 3509 return ir_fuse_mem(ctx, root, ref, load_insn, reg); 3510} 3511 3512static int32_t ir_fuse_imm(ir_ctx *ctx, ir_ref ref) 3513{ 3514 ir_insn *val_insn = &ctx->ir_base[ref]; 3515 3516 IR_ASSERT(IR_IS_CONST_REF(ref)); 3517 if (IR_IS_SYM_CONST(val_insn->op)) { 3518 void *addr = ir_sym_val(ctx, val_insn); 3519 IR_ASSERT(IR_IS_SIGNED_32BIT((intptr_t)addr)); 3520 return (int32_t)(intptr_t)addr; 3521 } else { 3522 IR_ASSERT(IR_IS_SIGNED_32BIT(val_insn->val.i32)); 3523 return val_insn->val.i32; 3524 } 3525} 3526 3527static void ir_emit_load_ex(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src, ir_ref root) 3528{ 3529 if (IR_IS_CONST_REF(src)) { 3530 if (IR_IS_TYPE_INT(type)) { 3531 ir_insn *insn = &ctx->ir_base[src]; 3532 3533 if (insn->op == IR_SYM || insn->op == IR_FUNC) { 3534 void *addr = ir_sym_val(ctx, insn); 3535 ir_emit_load_imm_int(ctx, type, reg, (intptr_t)addr); 3536 } else if (insn->op == IR_STR) { 3537 ir_backend_data *data = ctx->data; 3538 dasm_State **Dst = &data->dasm_state; 3539 int label = ir_const_label(ctx, src); 3540 3541 | lea Ra(reg), aword [=>label] 3542 } else { 3543 ir_emit_load_imm_int(ctx, type, reg, insn->val.i64); 3544 } 3545 } else { 3546 ir_emit_load_imm_fp(ctx, type, reg, src); 3547 } 3548 } else if (ir_rule(ctx, src) == IR_STATIC_ALLOCA) { 3549 ir_load_local_addr(ctx, reg, src); 3550 } else { 3551 ir_mem mem; 3552 3553 if (ir_rule(ctx, src) & IR_FUSED) { 3554 mem = ir_fuse_load(ctx, root, src); 3555 } else { 3556 mem = ir_ref_spill_slot(ctx, src); 3557 } 3558 ir_emit_load_mem(ctx, type, reg, mem); 3559 } 3560} 3561 3562static void ir_emit_prologue(ir_ctx *ctx) 3563{ 3564 ir_backend_data *data = ctx->data; 3565 dasm_State **Dst = &data->dasm_state; 3566 int offset = ctx->stack_frame_size + ctx->call_stack_size; 3567 3568 if (ctx->flags & IR_USE_FRAME_POINTER) { 3569 | push Ra(IR_REG_RBP) 3570 | mov Ra(IR_REG_RBP), Ra(IR_REG_RSP) 3571 } 3572 if (IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_GP)) { 3573 int i; 3574 ir_regset used_preserved_regs = IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_GP); 3575 3576 for (i = IR_REG_GP_FIRST; i <= IR_REG_GP_LAST; i++) { 3577 if (IR_REGSET_IN(used_preserved_regs, i)) { 3578 offset -= sizeof(void*); 3579 | push Ra(i) 3580 } 3581 } 3582 } 3583 if (ctx->stack_frame_size + ctx->call_stack_size) { 3584 if (ctx->fixed_stack_red_zone) { 3585 IR_ASSERT(ctx->stack_frame_size + ctx->call_stack_size <= ctx->fixed_stack_red_zone); 3586 } else if (offset) { 3587 | sub Ra(IR_REG_RSP), offset 3588 } 3589 } 3590 if (IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_FP)) { 3591 ir_reg fp; 3592 int i; 3593 ir_regset used_preserved_regs = IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_FP); 3594 3595 if (ctx->flags & IR_USE_FRAME_POINTER) { 3596 fp = IR_REG_FRAME_POINTER; 3597 offset -= ctx->stack_frame_size + ctx->call_stack_size; 3598 } else { 3599 fp = IR_REG_STACK_POINTER; 3600 } 3601 for (i = IR_REG_FP_FIRST; i <= IR_REG_FP_LAST; i++) { 3602 if (IR_REGSET_IN(used_preserved_regs, i)) { 3603 offset -= sizeof(void*); 3604 if (ctx->mflags & IR_X86_AVX) { 3605 | vmovsd qword [Ra(fp)+offset], xmm(i-IR_REG_FP_FIRST) 3606 } else { 3607 | movsd qword [Ra(fp)+offset], xmm(i-IR_REG_FP_FIRST) 3608 } 3609 } 3610 } 3611 } 3612 if ((ctx->flags & IR_VARARG_FUNC) && (ctx->flags2 & IR_HAS_VA_START)) { 3613#if defined(_WIN64) 3614 ir_reg fp; 3615 int offset; 3616 3617 if (ctx->flags & IR_USE_FRAME_POINTER) { 3618 fp = IR_REG_FRAME_POINTER; 3619 offset = sizeof(void*) * 2; 3620 } else { 3621 fp = IR_REG_STACK_POINTER; 3622 offset = ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*); 3623 } 3624 | mov [Ra(fp)+offset], Ra(IR_REG_INT_ARG1) 3625 | mov [Ra(fp)+offset+8], Ra(IR_REG_INT_ARG2) 3626 | mov [Ra(fp)+offset+16], Ra(IR_REG_INT_ARG3) 3627 | mov [Ra(fp)+offset+24], Ra(IR_REG_INT_ARG4) 3628#elif defined(IR_TARGET_X64) 3629|.if X64 3630 const int8_t *int_reg_params = _ir_int_reg_params; 3631 const int8_t *fp_reg_params = _ir_fp_reg_params; 3632 uint32_t i; 3633 ir_reg fp; 3634 int offset; 3635 3636 if (ctx->flags & IR_USE_FRAME_POINTER) { 3637 fp = IR_REG_FRAME_POINTER; 3638 3639 offset = -(ctx->stack_frame_size - ctx->stack_frame_alignment - ctx->locals_area_size); 3640 } else { 3641 fp = IR_REG_STACK_POINTER; 3642 offset = ctx->locals_area_size + ctx->call_stack_size; 3643 } 3644 3645 if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) { 3646 /* skip named args */ 3647 offset += sizeof(void*) * ctx->gp_reg_params; 3648 for (i = ctx->gp_reg_params; i < IR_REG_INT_ARGS; i++) { 3649 | mov qword [Ra(fp)+offset], Rq(int_reg_params[i]) 3650 offset += sizeof(void*); 3651 } 3652 } 3653 if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) { 3654 | test al, al 3655 | je >1 3656 /* skip named args */ 3657 offset += 16 * ctx->fp_reg_params; 3658 for (i = ctx->fp_reg_params; i < IR_REG_FP_ARGS; i++) { 3659 | movaps [Ra(fp)+offset], xmm(fp_reg_params[i]-IR_REG_FP_FIRST) 3660 offset += 16; 3661 } 3662 |1: 3663 } 3664|.endif 3665#endif 3666 } 3667} 3668 3669static void ir_emit_epilogue(ir_ctx *ctx) 3670{ 3671 ir_backend_data *data = ctx->data; 3672 dasm_State **Dst = &data->dasm_state; 3673 3674 if (IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_FP)) { 3675 int i; 3676 int offset; 3677 ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 3678 ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs; 3679 3680 if (ctx->flags & IR_USE_FRAME_POINTER) { 3681 fp = IR_REG_FRAME_POINTER; 3682 offset = 0; 3683 } else { 3684 fp = IR_REG_STACK_POINTER; 3685 offset = ctx->stack_frame_size + ctx->call_stack_size; 3686 } 3687 for (i = 0; i < IR_REG_NUM; i++) { 3688 if (IR_REGSET_IN(used_preserved_regs, i)) { 3689 if (i < IR_REG_FP_FIRST) { 3690 offset -= sizeof(void*); 3691 } else { 3692 offset -= sizeof(void*); 3693 if (ctx->mflags & IR_X86_AVX) { 3694 | vmovsd xmm(i-IR_REG_FP_FIRST), qword [Ra(fp)+offset] 3695 } else { 3696 | movsd xmm(i-IR_REG_FP_FIRST), qword [Ra(fp)+offset] 3697 } 3698 } 3699 } 3700 } 3701 } 3702 3703 if (IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_GP)) { 3704 int i; 3705 ir_regset used_preserved_regs = IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_GP); 3706 int offset; 3707 3708 if (ctx->flags & IR_USE_FRAME_POINTER) { 3709 offset = 0; 3710 } else { 3711 offset = ctx->stack_frame_size + ctx->call_stack_size; 3712 } 3713 if (IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_GP)) { 3714 int i; 3715 ir_regset used_preserved_regs = IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_GP); 3716 3717 for (i = IR_REG_GP_LAST; i >= IR_REG_GP_FIRST; i--) { 3718 if (IR_REGSET_IN(used_preserved_regs, i)) { 3719 offset -= sizeof(void*); 3720 } 3721 } 3722 } 3723 if (ctx->flags & IR_USE_FRAME_POINTER) { 3724 | lea Ra(IR_REG_RSP), [Ra(IR_REG_RBP)+offset] 3725 } else if (offset) { 3726 | add Ra(IR_REG_RSP), offset 3727 } 3728 for (i = IR_REG_GP_LAST; i >= IR_REG_GP_FIRST; i--) { 3729 if (IR_REGSET_IN(used_preserved_regs, i)) { 3730 | pop Ra(i) 3731 } 3732 } 3733 if (ctx->flags & IR_USE_FRAME_POINTER) { 3734 | pop Ra(IR_REG_RBP) 3735 } 3736 } else if (ctx->flags & IR_USE_FRAME_POINTER) { 3737 | mov Ra(IR_REG_RSP), Ra(IR_REG_RBP) 3738 | pop Ra(IR_REG_RBP) 3739 } else if (ctx->stack_frame_size + ctx->call_stack_size) { 3740 if (ctx->fixed_stack_red_zone) { 3741 IR_ASSERT(ctx->stack_frame_size + ctx->call_stack_size <= ctx->fixed_stack_red_zone); 3742 } else { 3743 | add Ra(IR_REG_RSP), (ctx->stack_frame_size + ctx->call_stack_size) 3744 } 3745 } 3746} 3747 3748static void ir_emit_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3749{ 3750 ir_backend_data *data = ctx->data; 3751 dasm_State **Dst = &data->dasm_state; 3752 ir_type type = insn->type; 3753 ir_ref op1 = insn->op1; 3754 ir_ref op2 = insn->op2; 3755 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3756 ir_reg op1_reg = ctx->regs[def][1]; 3757 ir_reg op2_reg = ctx->regs[def][2]; 3758 3759 IR_ASSERT(def_reg != IR_REG_NONE); 3760 3761 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 3762 op1_reg = IR_REG_NUM(op1_reg); 3763 ir_emit_load(ctx, type, op1_reg, op1); 3764 } 3765 if (def_reg != op1_reg) { 3766 if (op1_reg != IR_REG_NONE) { 3767 ir_emit_mov(ctx, type, def_reg, op1_reg); 3768 } else { 3769 ir_emit_load(ctx, type, def_reg, op1); 3770 } 3771 if (op1 == op2) { 3772 op2_reg = def_reg; 3773 } 3774 } 3775 3776 if (op2_reg != IR_REG_NONE) { 3777 if (IR_REG_SPILLED(op2_reg)) { 3778 op2_reg = IR_REG_NUM(op2_reg); 3779 if (op1 != op2) { 3780 ir_emit_load(ctx, type, op2_reg, op2); 3781 } 3782 } 3783 switch (insn->op) { 3784 default: 3785 IR_ASSERT(0 && "NIY binary op"); 3786 case IR_ADD: 3787 case IR_ADD_OV: 3788 | ASM_REG_REG_OP add, type, def_reg, op2_reg 3789 break; 3790 case IR_SUB: 3791 case IR_SUB_OV: 3792 | ASM_REG_REG_OP sub, type, def_reg, op2_reg 3793 break; 3794 case IR_MUL: 3795 case IR_MUL_OV: 3796 | ASM_REG_REG_MUL imul, type, def_reg, op2_reg 3797 break; 3798 case IR_OR: 3799 | ASM_REG_REG_OP or, type, def_reg, op2_reg 3800 break; 3801 case IR_AND: 3802 | ASM_REG_REG_OP and, type, def_reg, op2_reg 3803 break; 3804 case IR_XOR: 3805 | ASM_REG_REG_OP xor, type, def_reg, op2_reg 3806 break; 3807 } 3808 } else if (IR_IS_CONST_REF(op2)) { 3809 int32_t val = ir_fuse_imm(ctx, op2); 3810 3811 switch (insn->op) { 3812 default: 3813 IR_ASSERT(0 && "NIY binary op"); 3814 case IR_ADD: 3815 case IR_ADD_OV: 3816 | ASM_REG_IMM_OP add, type, def_reg, val 3817 break; 3818 case IR_SUB: 3819 case IR_SUB_OV: 3820 | ASM_REG_IMM_OP sub, type, def_reg, val 3821 break; 3822 case IR_MUL: 3823 case IR_MUL_OV: 3824 | ASM_REG_IMM_MUL imul, type, def_reg, val 3825 break; 3826 case IR_OR: 3827 | ASM_REG_IMM_OP or, type, def_reg, val 3828 break; 3829 case IR_AND: 3830 | ASM_REG_IMM_OP and, type, def_reg, val 3831 break; 3832 case IR_XOR: 3833 | ASM_REG_IMM_OP xor, type, def_reg, val 3834 break; 3835 } 3836 } else { 3837 ir_mem mem; 3838 3839 if (ir_rule(ctx, op2) & IR_FUSED) { 3840 mem = ir_fuse_load(ctx, def, op2); 3841 } else { 3842 mem = ir_ref_spill_slot(ctx, op2); 3843 } 3844 switch (insn->op) { 3845 default: 3846 IR_ASSERT(0 && "NIY binary op"); 3847 case IR_ADD: 3848 case IR_ADD_OV: 3849 | ASM_REG_MEM_OP add, type, def_reg, mem 3850 break; 3851 case IR_SUB: 3852 case IR_SUB_OV: 3853 | ASM_REG_MEM_OP sub, type, def_reg, mem 3854 break; 3855 case IR_MUL: 3856 case IR_MUL_OV: 3857 | ASM_REG_MEM_MUL imul, type, def_reg, mem 3858 break; 3859 case IR_OR: 3860 | ASM_REG_MEM_OP or, type, def_reg, mem 3861 break; 3862 case IR_AND: 3863 | ASM_REG_MEM_OP and, type, def_reg, mem 3864 break; 3865 case IR_XOR: 3866 | ASM_REG_MEM_OP xor, type, def_reg, mem 3867 break; 3868 } 3869 } 3870 if (IR_REG_SPILLED(ctx->regs[def][0])) { 3871 ir_emit_store(ctx, type, def, def_reg); 3872 } 3873} 3874 3875static void ir_emit_imul3(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3876{ 3877 ir_backend_data *data = ctx->data; 3878 dasm_State **Dst = &data->dasm_state; 3879 ir_type type = insn->type; 3880 ir_ref op1 = insn->op1; 3881 ir_ref op2 = insn->op2; 3882 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3883 ir_reg op1_reg = ctx->regs[def][1]; 3884 int32_t val = ir_fuse_imm(ctx, op2); 3885 3886 IR_ASSERT(def_reg != IR_REG_NONE); 3887 IR_ASSERT(!IR_IS_CONST_REF(op1)); 3888 3889 if (op1_reg != IR_REG_NONE) { 3890 if (IR_REG_SPILLED(op1_reg)) { 3891 op1_reg = IR_REG_NUM(op1_reg); 3892 ir_emit_load(ctx, type, op1_reg, op1); 3893 } 3894 switch (ir_type_size[type]) { 3895 default: 3896 IR_ASSERT(0); 3897 case 2: 3898 | imul Rw(def_reg), Rw(op1_reg), val 3899 break; 3900 case 4: 3901 | imul Rd(def_reg), Rd(op1_reg), val 3902 break; 3903|.if X64 3904|| case 8: 3905| imul Rq(def_reg), Rq(op1_reg), val 3906|| break; 3907|.endif 3908 } 3909 } else { 3910 ir_mem mem; 3911 3912 if (ir_rule(ctx, op1) & IR_FUSED) { 3913 mem = ir_fuse_load(ctx, def, op1); 3914 } else { 3915 mem = ir_ref_spill_slot(ctx, op1); 3916 } 3917 | ASM_REG_MEM_TXT_MUL imul, type, def_reg, mem, val 3918 } 3919 if (IR_REG_SPILLED(ctx->regs[def][0])) { 3920 ir_emit_store(ctx, type, def, def_reg); 3921 } 3922} 3923 3924static void ir_emit_min_max_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3925{ 3926 ir_backend_data *data = ctx->data; 3927 dasm_State **Dst = &data->dasm_state; 3928 ir_type type = insn->type; 3929 ir_ref op1 = insn->op1; 3930 ir_ref op2 = insn->op2; 3931 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3932 ir_reg op1_reg = ctx->regs[def][1]; 3933 ir_reg op2_reg = ctx->regs[def][2]; 3934 3935 IR_ASSERT(def_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); 3936 3937 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 3938 op1_reg = IR_REG_NUM(op1_reg); 3939 ir_emit_load(ctx, type, op1_reg, op1); 3940 } 3941 if (def_reg != op1_reg) { 3942 if (op1_reg != IR_REG_NONE) { 3943 ir_emit_mov(ctx, type, def_reg, op1_reg); 3944 } else { 3945 ir_emit_load(ctx, type, def_reg, op1); 3946 } 3947 } 3948 3949 if (IR_REG_SPILLED(op2_reg)) { 3950 op2_reg = IR_REG_NUM(op2_reg); 3951 if (op1 != op2) { 3952 ir_emit_load(ctx, type, op2_reg, op2); 3953 } 3954 } 3955 3956 if (op1 == op2) { 3957 return; 3958 } 3959 3960 | ASM_REG_REG_OP cmp, type, def_reg, op2_reg 3961 if (insn->op == IR_MIN) { 3962 if (IR_IS_TYPE_SIGNED(type)) { 3963 | ASM_REG_REG_OP2 cmovg, type, def_reg, op2_reg 3964 } else { 3965 | ASM_REG_REG_OP2 cmova, type, def_reg, op2_reg 3966 } 3967 } else { 3968 IR_ASSERT(insn->op == IR_MAX); 3969 if (IR_IS_TYPE_SIGNED(type)) { 3970 | ASM_REG_REG_OP2 cmovl, type, def_reg, op2_reg 3971 } else { 3972 | ASM_REG_REG_OP2 cmovb, type, def_reg, op2_reg 3973 } 3974 } 3975 3976 if (IR_REG_SPILLED(ctx->regs[def][0])) { 3977 ir_emit_store(ctx, type, def, def_reg); 3978 } 3979} 3980 3981static void ir_emit_overflow(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3982{ 3983 ir_backend_data *data = ctx->data; 3984 dasm_State **Dst = &data->dasm_state; 3985 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3986 ir_type type = ctx->ir_base[insn->op1].type; 3987 3988 IR_ASSERT(def_reg != IR_REG_NONE); 3989 IR_ASSERT(IR_IS_TYPE_INT(type)); 3990 if (IR_IS_TYPE_SIGNED(type)) { 3991 | seto Rb(def_reg) 3992 } else { 3993 | setc Rb(def_reg) 3994 } 3995 if (IR_REG_SPILLED(ctx->regs[def][0])) { 3996 ir_emit_store(ctx, insn->type, def, def_reg); 3997 } 3998} 3999 4000static void ir_emit_overflow_and_branch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block) 4001{ 4002 ir_backend_data *data = ctx->data; 4003 dasm_State **Dst = &data->dasm_state; 4004 ir_insn *overflow_insn = &ctx->ir_base[insn->op2]; 4005 ir_type type = ctx->ir_base[overflow_insn->op1].type; 4006 uint32_t true_block, false_block; 4007 bool reverse = 0; 4008 4009 ir_get_true_false_blocks(ctx, b, &true_block, &false_block); 4010 if (true_block == next_block) { 4011 reverse = 1; 4012 true_block = false_block; 4013 false_block = 0; 4014 } else if (false_block == next_block) { 4015 false_block = 0; 4016 } 4017 4018 if (IR_IS_TYPE_SIGNED(type)) { 4019 if (reverse) { 4020 | jno =>true_block 4021 } else { 4022 | jo =>true_block 4023 } 4024 } else { 4025 if (reverse) { 4026 | jnc =>true_block 4027 } else { 4028 | jc =>true_block 4029 } 4030 } 4031 if (false_block) { 4032 | jmp =>false_block 4033 } 4034} 4035 4036static void ir_emit_mem_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4037{ 4038 ir_backend_data *data = ctx->data; 4039 dasm_State **Dst = &data->dasm_state; 4040 ir_insn *op_insn = &ctx->ir_base[insn->op3]; 4041 ir_type type = op_insn->type; 4042 ir_ref op2 = op_insn->op2; 4043 ir_reg op2_reg = ctx->regs[insn->op3][2]; 4044 ir_mem mem; 4045 4046 if (insn->op == IR_STORE) { 4047 mem = ir_fuse_mem(ctx, def, def, insn, ctx->regs[def][2]); 4048 } else { 4049 IR_ASSERT(insn->op == IR_VSTORE); 4050 mem = ir_var_spill_slot(ctx, insn->op2); 4051 } 4052 4053 if (op2_reg == IR_REG_NONE) { 4054 int32_t val = ir_fuse_imm(ctx, op2); 4055 4056 switch (op_insn->op) { 4057 default: 4058 IR_ASSERT(0 && "NIY binary op"); 4059 case IR_ADD: 4060 case IR_ADD_OV: 4061 | ASM_MEM_IMM_OP add, type, mem, val 4062 break; 4063 case IR_SUB: 4064 case IR_SUB_OV: 4065 | ASM_MEM_IMM_OP sub, type, mem, val 4066 break; 4067 case IR_OR: 4068 | ASM_MEM_IMM_OP or, type, mem, val 4069 break; 4070 case IR_AND: 4071 | ASM_MEM_IMM_OP and, type, mem, val 4072 break; 4073 case IR_XOR: 4074 | ASM_MEM_IMM_OP xor, type, mem, val 4075 break; 4076 } 4077 } else { 4078 if (IR_REG_SPILLED(op2_reg)) { 4079 op2_reg = IR_REG_NUM(op2_reg); 4080 ir_emit_load(ctx, type, op2_reg, op2); 4081 } 4082 switch (op_insn->op) { 4083 default: 4084 IR_ASSERT(0 && "NIY binary op"); 4085 case IR_ADD: 4086 case IR_ADD_OV: 4087 | ASM_MEM_REG_OP add, type, mem, op2_reg 4088 break; 4089 case IR_SUB: 4090 case IR_SUB_OV: 4091 | ASM_MEM_REG_OP sub, type, mem, op2_reg 4092 break; 4093 case IR_OR: 4094 | ASM_MEM_REG_OP or, type, mem, op2_reg 4095 break; 4096 case IR_AND: 4097 | ASM_MEM_REG_OP and, type, mem, op2_reg 4098 break; 4099 case IR_XOR: 4100 | ASM_MEM_REG_OP xor, type, mem, op2_reg 4101 break; 4102 } 4103 } 4104} 4105 4106static void ir_emit_reg_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4107{ 4108 ir_backend_data *data = ctx->data; 4109 dasm_State **Dst = &data->dasm_state; 4110 ir_insn *op_insn = &ctx->ir_base[insn->op2]; 4111 ir_type type = op_insn->type; 4112 ir_ref op2 = op_insn->op2; 4113 ir_reg op2_reg = ctx->regs[insn->op2][2]; 4114 ir_reg reg; 4115 4116 IR_ASSERT(insn->op == IR_RSTORE); 4117 reg = insn->op3; 4118 4119 if (op2_reg == IR_REG_NONE) { 4120 int32_t val = ir_fuse_imm(ctx, op2); 4121 4122 switch (op_insn->op) { 4123 default: 4124 IR_ASSERT(0 && "NIY binary op"); 4125 case IR_ADD: 4126 | ASM_REG_IMM_OP add, type, reg, val 4127 break; 4128 case IR_SUB: 4129 | ASM_REG_IMM_OP sub, type, reg, val 4130 break; 4131 case IR_OR: 4132 | ASM_REG_IMM_OP or, type, reg, val 4133 break; 4134 case IR_AND: 4135 | ASM_REG_IMM_OP and, type, reg, val 4136 break; 4137 case IR_XOR: 4138 | ASM_REG_IMM_OP xor, type, reg, val 4139 break; 4140 } 4141 } else { 4142 if (IR_REG_SPILLED(op2_reg)) { 4143 op2_reg = IR_REG_NUM(op2_reg); 4144 ir_emit_load(ctx, type, op2_reg, op2); 4145 } 4146 switch (op_insn->op) { 4147 default: 4148 IR_ASSERT(0 && "NIY binary op"); 4149 case IR_ADD: 4150 | ASM_REG_REG_OP add, type, reg, op2_reg 4151 break; 4152 case IR_SUB: 4153 | ASM_REG_REG_OP sub, type, reg, op2_reg 4154 break; 4155 case IR_OR: 4156 | ASM_REG_REG_OP or, type, reg, op2_reg 4157 break; 4158 case IR_AND: 4159 | ASM_REG_REG_OP and, type, reg, op2_reg 4160 break; 4161 case IR_XOR: 4162 | ASM_REG_REG_OP xor, type, reg, op2_reg 4163 break; 4164 } 4165 } 4166} 4167 4168static void ir_emit_mul_div_mod_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4169{ 4170 ir_backend_data *data = ctx->data; 4171 dasm_State **Dst = &data->dasm_state; 4172 ir_type type = insn->type; 4173 ir_ref op1 = insn->op1; 4174 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 4175 ir_reg op1_reg = ctx->regs[def][1]; 4176 4177 IR_ASSERT(IR_IS_CONST_REF(insn->op2)); 4178 IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[insn->op2].op)); 4179 IR_ASSERT(def_reg != IR_REG_NONE); 4180 4181 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 4182 op1_reg = IR_REG_NUM(op1_reg); 4183 ir_emit_load(ctx, type, op1_reg, op1); 4184 } 4185 if (def_reg != op1_reg) { 4186 if (op1_reg != IR_REG_NONE) { 4187 ir_emit_mov(ctx, type, def_reg, op1_reg); 4188 } else { 4189 ir_emit_load(ctx, type, def_reg, op1); 4190 } 4191 } 4192 if (insn->op == IR_MUL) { 4193 uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64); 4194 4195 if (shift == 1) { 4196 | ASM_REG_REG_OP add, type, def_reg, def_reg 4197 } else { 4198 | ASM_REG_IMM_OP shl, type, def_reg, shift 4199 } 4200 } else if (insn->op == IR_DIV) { 4201 uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64); 4202 4203 | ASM_REG_IMM_OP shr, type, def_reg, shift 4204 } else { 4205 IR_ASSERT(insn->op == IR_MOD); 4206 uint64_t mask = ctx->ir_base[insn->op2].val.u64 - 1; 4207 4208|.if X64 4209|| if (ir_type_size[type] == 8 && ctx->regs[def][2] != IR_REG_NONE) { 4210|| ir_reg op2_reg = ctx->regs[def][2]; 4211|| 4212|| op2_reg = IR_REG_NUM(op2_reg); 4213|| ir_emit_load_imm_int(ctx, type, op2_reg, mask); 4214 | ASM_REG_REG_OP and, type, def_reg, op2_reg 4215|| } else { 4216|.endif 4217 | ASM_REG_IMM_OP and, type, def_reg, mask 4218|.if X64 4219|| } 4220|.endif 4221 } 4222 if (IR_REG_SPILLED(ctx->regs[def][0])) { 4223 ir_emit_store(ctx, type, def, def_reg); 4224 } 4225} 4226 4227static void ir_emit_sdiv_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4228{ 4229 ir_backend_data *data = ctx->data; 4230 dasm_State **Dst = &data->dasm_state; 4231 ir_type type = insn->type; 4232 ir_ref op1 = insn->op1; 4233 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 4234 ir_reg op1_reg = ctx->regs[def][1]; 4235 uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64); 4236 int64_t offset = ctx->ir_base[insn->op2].val.u64 - 1; 4237 4238 IR_ASSERT(shift != 0); 4239 IR_ASSERT(IR_IS_CONST_REF(insn->op2)); 4240 IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[insn->op2].op)); 4241 IR_ASSERT(op1_reg != IR_REG_NONE && def_reg != IR_REG_NONE && op1_reg != def_reg); 4242 4243 if (IR_REG_SPILLED(op1_reg)) { 4244 op1_reg = IR_REG_NUM(op1_reg); 4245 ir_emit_load(ctx, type, op1_reg, op1); 4246 } 4247 4248 if (shift == 1) { 4249|.if X64 4250|| if (ir_type_size[type] == 8) { 4251 | mov Rq(def_reg), Rq(op1_reg) 4252 | ASM_REG_IMM_OP shr, type, def_reg, 63 4253 | add Rq(def_reg), Rq(op1_reg) 4254|| } else { 4255|.endif 4256 | mov Rd(def_reg), Rd(op1_reg) 4257 | ASM_REG_IMM_OP shr, type, def_reg, (ir_type_size[type]*8-1) 4258 | add Rd(def_reg), Rd(op1_reg) 4259|.if X64 4260|| } 4261|.endif 4262 } else { 4263|.if X64 4264|| if (ir_type_size[type] == 8) { 4265|| ir_reg op2_reg = ctx->regs[def][2]; 4266|| 4267|| if (op2_reg != IR_REG_NONE) { 4268|| op2_reg = IR_REG_NUM(op2_reg); 4269|| ir_emit_load_imm_int(ctx, type, op2_reg, offset); 4270 | lea Rq(def_reg), [Rq(op1_reg)+Rq(op2_reg)] 4271|| } else { 4272 | lea Rq(def_reg), [Rq(op1_reg)+(int32_t)offset] 4273|| } 4274|| } else { 4275|.endif 4276 | lea Rd(def_reg), [Rd(op1_reg)+(int32_t)offset] 4277|.if X64 4278|| } 4279|.endif 4280 | ASM_REG_REG_OP test, type, op1_reg, op1_reg 4281 | ASM_REG_REG_OP2 cmovns, type, def_reg, op1_reg 4282 } 4283 | ASM_REG_IMM_OP sar, type, def_reg, shift 4284 4285 if (IR_REG_SPILLED(ctx->regs[def][0])) { 4286 ir_emit_store(ctx, type, def, def_reg); 4287 } 4288} 4289 4290static void ir_emit_smod_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4291{ 4292 ir_backend_data *data = ctx->data; 4293 dasm_State **Dst = &data->dasm_state; 4294 ir_type type = insn->type; 4295 ir_ref op1 = insn->op1; 4296 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 4297 ir_reg op1_reg = ctx->regs[def][1]; 4298 ir_reg tmp_reg = ctx->regs[def][3]; 4299 uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64); 4300 uint64_t mask = ctx->ir_base[insn->op2].val.u64 - 1; 4301 4302 IR_ASSERT(shift != 0); 4303 IR_ASSERT(IR_IS_CONST_REF(insn->op2)); 4304 IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[insn->op2].op)); 4305 IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE && def_reg != tmp_reg); 4306 4307 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 4308 op1_reg = IR_REG_NUM(op1_reg); 4309 ir_emit_load(ctx, type, op1_reg, op1); 4310 } 4311 if (def_reg != op1_reg) { 4312 if (op1_reg != IR_REG_NONE) { 4313 ir_emit_mov(ctx, type, def_reg, op1_reg); 4314 } else { 4315 ir_emit_load(ctx, type, def_reg, op1); 4316 } 4317 } 4318 if (tmp_reg != op1_reg) { 4319 ir_emit_mov(ctx, type, tmp_reg, def_reg); 4320 } 4321 4322 4323 if (shift == 1) { 4324 | ASM_REG_IMM_OP shr, type, tmp_reg, (ir_type_size[type]*8-1) 4325 } else { 4326 | ASM_REG_IMM_OP sar, type, tmp_reg, (ir_type_size[type]*8-1) 4327 | ASM_REG_IMM_OP shr, type, tmp_reg, (ir_type_size[type]*8-shift) 4328 } 4329 | ASM_REG_REG_OP add, type, def_reg, tmp_reg 4330 4331|.if X64 4332|| if (ir_type_size[type] == 8 && ctx->regs[def][2] != IR_REG_NONE) { 4333|| ir_reg op2_reg = ctx->regs[def][2]; 4334|| 4335|| op2_reg = IR_REG_NUM(op2_reg); 4336|| ir_emit_load_imm_int(ctx, type, op2_reg, mask); 4337 | ASM_REG_REG_OP and, type, def_reg, op2_reg 4338|| } else { 4339|.endif 4340 | ASM_REG_IMM_OP and, type, def_reg, mask 4341|.if X64 4342|| } 4343|.endif 4344 4345 | ASM_REG_REG_OP sub, type, def_reg, tmp_reg 4346 4347 if (IR_REG_SPILLED(ctx->regs[def][0])) { 4348 ir_emit_store(ctx, type, def, def_reg); 4349 } 4350} 4351 4352static void ir_emit_mem_mul_div_mod_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4353{ 4354 ir_backend_data *data = ctx->data; 4355 dasm_State **Dst = &data->dasm_state; 4356 ir_insn *op_insn = &ctx->ir_base[insn->op3]; 4357 ir_type type = op_insn->type; 4358 ir_mem mem; 4359 4360 IR_ASSERT(IR_IS_CONST_REF(op_insn->op2)); 4361 IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[op_insn->op2].op)); 4362 4363 if (insn->op == IR_STORE) { 4364 mem = ir_fuse_mem(ctx, def, def, insn, ctx->regs[def][2]); 4365 } else { 4366 IR_ASSERT(insn->op == IR_VSTORE); 4367 mem = ir_var_spill_slot(ctx, insn->op2); 4368 } 4369 4370 if (op_insn->op == IR_MUL) { 4371 uint32_t shift = IR_LOG2(ctx->ir_base[op_insn->op2].val.u64); 4372 | ASM_MEM_IMM_OP shl, type, mem, shift 4373 } else if (op_insn->op == IR_DIV) { 4374 uint32_t shift = IR_LOG2(ctx->ir_base[op_insn->op2].val.u64); 4375 | ASM_MEM_IMM_OP shr, type, mem, shift 4376 } else { 4377 IR_ASSERT(op_insn->op == IR_MOD); 4378 uint64_t mask = ctx->ir_base[op_insn->op2].val.u64 - 1; 4379 IR_ASSERT(IR_IS_UNSIGNED_32BIT(mask)); 4380 | ASM_MEM_IMM_OP and, type, mem, mask 4381 } 4382} 4383 4384static void ir_emit_shift(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4385{ 4386 ir_backend_data *data = ctx->data; 4387 dasm_State **Dst = &data->dasm_state; 4388 ir_type type = insn->type; 4389 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 4390 ir_reg op1_reg = ctx->regs[def][1]; 4391 ir_reg op2_reg = ctx->regs[def][2]; 4392 4393 IR_ASSERT(def_reg != IR_REG_NONE && def_reg != IR_REG_RCX); 4394 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 4395 op1_reg = IR_REG_NUM(op1_reg); 4396 ir_emit_load(ctx, type, op1_reg, insn->op1); 4397 } 4398 if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { 4399 op2_reg = IR_REG_NUM(op2_reg); 4400 ir_emit_load(ctx, type, op2_reg, insn->op2); 4401 } 4402 if (op2_reg != IR_REG_RCX) { 4403 if (op1_reg == IR_REG_RCX) { 4404 ir_emit_mov(ctx, type, def_reg, op1_reg); 4405 op1_reg = def_reg; 4406 } 4407 if (op2_reg != IR_REG_NONE) { 4408 ir_emit_mov(ctx, type, IR_REG_RCX, op2_reg); 4409 } else { 4410 ir_emit_load(ctx, type, IR_REG_RCX, insn->op2); 4411 } 4412 } 4413 if (def_reg != op1_reg) { 4414 if (op1_reg != IR_REG_NONE) { 4415 ir_emit_mov(ctx, type, def_reg, op1_reg); 4416 } else { 4417 ir_emit_load(ctx, type, def_reg, insn->op1); 4418 } 4419 } 4420 switch (insn->op) { 4421 default: 4422 IR_ASSERT(0); 4423 case IR_SHL: 4424 | ASM_REG_TXT_OP shl, insn->type, def_reg, cl 4425 break; 4426 case IR_SHR: 4427 | ASM_REG_TXT_OP shr, insn->type, def_reg, cl 4428 break; 4429 case IR_SAR: 4430 | ASM_REG_TXT_OP sar, insn->type, def_reg, cl 4431 break; 4432 case IR_ROL: 4433 | ASM_REG_TXT_OP rol, insn->type, def_reg, cl 4434 break; 4435 case IR_ROR: 4436 | ASM_REG_TXT_OP ror, insn->type, def_reg, cl 4437 break; 4438 } 4439 if (IR_REG_SPILLED(ctx->regs[def][0])) { 4440 ir_emit_store(ctx, type, def, def_reg); 4441 } 4442} 4443 4444static void ir_emit_mem_shift(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4445{ 4446 ir_backend_data *data = ctx->data; 4447 dasm_State **Dst = &data->dasm_state; 4448 ir_insn *op_insn = &ctx->ir_base[insn->op3]; 4449 ir_type type = op_insn->type; 4450 ir_ref op2 = op_insn->op2; 4451 ir_reg op2_reg = ctx->regs[insn->op3][2]; 4452 ir_mem mem; 4453 4454 if (insn->op == IR_STORE) { 4455 mem = ir_fuse_mem(ctx, def, def, insn, ctx->regs[def][2]); 4456 } else { 4457 IR_ASSERT(insn->op == IR_VSTORE); 4458 mem = ir_var_spill_slot(ctx, insn->op2); 4459 } 4460 4461 if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { 4462 op2_reg = IR_REG_NUM(op2_reg); 4463 ir_emit_load(ctx, type, op2_reg, op2); 4464 } 4465 if (op2_reg != IR_REG_RCX) { 4466 if (op2_reg != IR_REG_NONE) { 4467 ir_emit_mov(ctx, type, IR_REG_RCX, op2_reg); 4468 } else { 4469 ir_emit_load(ctx, type, IR_REG_RCX, op2); 4470 } 4471 } 4472 switch (op_insn->op) { 4473 default: 4474 IR_ASSERT(0); 4475 case IR_SHL: 4476 | ASM_MEM_TXT_OP shl, type, mem, cl 4477 break; 4478 case IR_SHR: 4479 | ASM_MEM_TXT_OP shr, type, mem, cl 4480 break; 4481 case IR_SAR: 4482 | ASM_MEM_TXT_OP sar, type, mem, cl 4483 break; 4484 case IR_ROL: 4485 | ASM_MEM_TXT_OP rol, type, mem, cl 4486 break; 4487 case IR_ROR: 4488 | ASM_MEM_TXT_OP ror, type, mem, cl 4489 break; 4490 } 4491} 4492 4493static void ir_emit_shift_const(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4494{ 4495 ir_backend_data *data = ctx->data; 4496 dasm_State **Dst = &data->dasm_state; 4497 int32_t shift; 4498 ir_type type = insn->type; 4499 ir_ref op1 = insn->op1; 4500 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 4501 ir_reg op1_reg = ctx->regs[def][1]; 4502 4503 IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[insn->op2].op)); 4504 IR_ASSERT(IR_IS_SIGNED_32BIT(ctx->ir_base[insn->op2].val.i64)); 4505 shift = ctx->ir_base[insn->op2].val.i32; 4506 IR_ASSERT(def_reg != IR_REG_NONE); 4507 4508 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 4509 op1_reg = IR_REG_NUM(op1_reg); 4510 ir_emit_load(ctx, type, op1_reg, op1); 4511 } 4512 if (def_reg != op1_reg) { 4513 if (op1_reg != IR_REG_NONE) { 4514 ir_emit_mov(ctx, type, def_reg, op1_reg); 4515 } else { 4516 ir_emit_load(ctx, type, def_reg, op1); 4517 } 4518 } 4519 switch (insn->op) { 4520 default: 4521 IR_ASSERT(0); 4522 case IR_SHL: 4523 | ASM_REG_IMM_OP shl, insn->type, def_reg, shift 4524 break; 4525 case IR_SHR: 4526 | ASM_REG_IMM_OP shr, insn->type, def_reg, shift 4527 break; 4528 case IR_SAR: 4529 | ASM_REG_IMM_OP sar, insn->type, def_reg, shift 4530 break; 4531 case IR_ROL: 4532 | ASM_REG_IMM_OP rol, insn->type, def_reg, shift 4533 break; 4534 case IR_ROR: 4535 | ASM_REG_IMM_OP ror, insn->type, def_reg, shift 4536 break; 4537 } 4538 if (IR_REG_SPILLED(ctx->regs[def][0])) { 4539 ir_emit_store(ctx, type, def, def_reg); 4540 } 4541} 4542 4543static void ir_emit_mem_shift_const(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4544{ 4545 ir_backend_data *data = ctx->data; 4546 dasm_State **Dst = &data->dasm_state; 4547 ir_insn *op_insn = &ctx->ir_base[insn->op3]; 4548 ir_type type = op_insn->type; 4549 int32_t shift; 4550 ir_mem mem; 4551 4552 IR_ASSERT(IR_IS_CONST_REF(op_insn->op2)); 4553 IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[op_insn->op2].op)); 4554 IR_ASSERT(IR_IS_SIGNED_32BIT(ctx->ir_base[op_insn->op2].val.i64)); 4555 shift = ctx->ir_base[op_insn->op2].val.i32; 4556 if (insn->op == IR_STORE) { 4557 mem = ir_fuse_mem(ctx, def, def, insn, ctx->regs[def][2]); 4558 } else { 4559 IR_ASSERT(insn->op == IR_VSTORE); 4560 mem = ir_var_spill_slot(ctx, insn->op2); 4561 } 4562 4563 switch (op_insn->op) { 4564 default: 4565 IR_ASSERT(0); 4566 case IR_SHL: 4567 | ASM_MEM_IMM_OP shl, type, mem, shift 4568 break; 4569 case IR_SHR: 4570 | ASM_MEM_IMM_OP shr, type, mem, shift 4571 break; 4572 case IR_SAR: 4573 | ASM_MEM_IMM_OP sar, type, mem, shift 4574 break; 4575 case IR_ROL: 4576 | ASM_MEM_IMM_OP rol, type, mem, shift 4577 break; 4578 case IR_ROR: 4579 | ASM_MEM_IMM_OP ror, type, mem, shift 4580 break; 4581 } 4582} 4583 4584static void ir_emit_op_int(ir_ctx *ctx, ir_ref def, ir_insn *insn, uint32_t rule) 4585{ 4586 ir_backend_data *data = ctx->data; 4587 dasm_State **Dst = &data->dasm_state; 4588 ir_type type = insn->type; 4589 ir_ref op1 = insn->op1; 4590 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 4591 ir_reg op1_reg = ctx->regs[def][1]; 4592 4593 IR_ASSERT(def_reg != IR_REG_NONE); 4594 4595 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 4596 op1_reg = IR_REG_NUM(op1_reg); 4597 ir_emit_load(ctx, type, op1_reg, op1); 4598 } 4599 if (def_reg != op1_reg) { 4600 if (op1_reg != IR_REG_NONE) { 4601 ir_emit_mov(ctx, type, def_reg, op1_reg); 4602 } else { 4603 ir_emit_load(ctx, type, def_reg, op1); 4604 } 4605 } 4606 if (rule == IR_INC) { 4607 | ASM_REG_OP inc, insn->type, def_reg 4608 } else if (rule == IR_DEC) { 4609 | ASM_REG_OP dec, insn->type, def_reg 4610 } else if (insn->op == IR_NOT) { 4611 | ASM_REG_OP not, insn->type, def_reg 4612 } else if (insn->op == IR_NEG) { 4613 | ASM_REG_OP neg, insn->type, def_reg 4614 } else { 4615 IR_ASSERT(insn->op == IR_BSWAP); 4616 switch (ir_type_size[insn->type]) { 4617 default: 4618 IR_ASSERT(0); 4619 case 4: 4620 | bswap Rd(def_reg) 4621 break; 4622 case 8: 4623 IR_ASSERT(sizeof(void*) == 8); 4624|.if X64 4625 | bswap Rq(def_reg) 4626|.endif 4627 break; 4628 } 4629 } 4630 if (IR_REG_SPILLED(ctx->regs[def][0])) { 4631 ir_emit_store(ctx, type, def, def_reg); 4632 } 4633} 4634 4635static void ir_emit_bit_count(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4636{ 4637 ir_backend_data *data = ctx->data; 4638 dasm_State **Dst = &data->dasm_state; 4639 ir_type type = insn->type; 4640 ir_ref op1 = insn->op1; 4641 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 4642 ir_reg op1_reg = ctx->regs[def][1]; 4643 4644 IR_ASSERT(def_reg != IR_REG_NONE); 4645 4646 if (op1_reg != IR_REG_NONE) { 4647 if (IR_REG_SPILLED(op1_reg)) { 4648 op1_reg = IR_REG_NUM(op1_reg); 4649 ir_emit_load(ctx, type, op1_reg, op1); 4650 } 4651 switch (ir_type_size[insn->type]) { 4652 default: 4653 IR_ASSERT(0); 4654 case 2: 4655 if (insn->op == IR_CTLZ) { 4656 if (ctx->mflags & IR_X86_BMI1) { 4657 | lzcnt Rw(def_reg), Rw(op1_reg) 4658 } else { 4659 | bsr Rw(def_reg), Rw(op1_reg) 4660 | xor Rw(def_reg), 0xf 4661 } 4662 } else if (insn->op == IR_CTTZ) { 4663 if (ctx->mflags & IR_X86_BMI1) { 4664 | tzcnt Rw(def_reg), Rw(op1_reg) 4665 } else { 4666 | bsf Rw(def_reg), Rw(op1_reg) 4667 } 4668 } else { 4669 IR_ASSERT(insn->op == IR_CTPOP); 4670 | popcnt Rw(def_reg), Rw(op1_reg) 4671 } 4672 break; 4673 case 1: 4674 | movzx Rd(op1_reg), Rb(op1_reg) 4675 if (insn->op == IR_CTLZ) { 4676 if (ctx->mflags & IR_X86_BMI1) { 4677 | lzcnt Rd(def_reg), Rd(op1_reg) 4678 | sub Rd(def_reg), 24 4679 } else { 4680 | bsr Rd(def_reg), Rd(op1_reg) 4681 | xor Rw(def_reg), 0x7 4682 } 4683 break; 4684 } 4685 IR_FALLTHROUGH; 4686 case 4: 4687 if (insn->op == IR_CTLZ) { 4688 if (ctx->mflags & IR_X86_BMI1) { 4689 | lzcnt Rd(def_reg), Rd(op1_reg) 4690 } else { 4691 | bsr Rd(def_reg), Rd(op1_reg) 4692 | xor Rw(def_reg), 0x1f 4693 } 4694 } else if (insn->op == IR_CTTZ) { 4695 if (ctx->mflags & IR_X86_BMI1) { 4696 | tzcnt Rd(def_reg), Rd(op1_reg) 4697 } else { 4698 | bsf Rd(def_reg), Rd(op1_reg) 4699 } 4700 } else { 4701 IR_ASSERT(insn->op == IR_CTPOP); 4702 | popcnt Rd(def_reg), Rd(op1_reg) 4703 } 4704 break; 4705|.if X64 4706 case 8: 4707 if (insn->op == IR_CTLZ) { 4708 if (ctx->mflags & IR_X86_BMI1) { 4709 | lzcnt Rq(def_reg), Rq(op1_reg) 4710 } else { 4711 | bsr Rq(def_reg), Rq(op1_reg) 4712 | xor Rw(def_reg), 0x3f 4713 } 4714 } else if (insn->op == IR_CTTZ) { 4715 if (ctx->mflags & IR_X86_BMI1) { 4716 | tzcnt Rq(def_reg), Rq(op1_reg) 4717 } else { 4718 | bsf Rq(def_reg), Rq(op1_reg) 4719 } 4720 } else { 4721 IR_ASSERT(insn->op == IR_CTPOP); 4722 | popcnt Rq(def_reg), Rq(op1_reg) 4723 } 4724 break; 4725|.endif 4726 } 4727 } else { 4728 ir_mem mem; 4729 4730 if (ir_rule(ctx, op1) & IR_FUSED) { 4731 mem = ir_fuse_load(ctx, def, op1); 4732 } else { 4733 mem = ir_ref_spill_slot(ctx, op1); 4734 } 4735 switch (ir_type_size[insn->type]) { 4736 default: 4737 IR_ASSERT(0); 4738 case 2: 4739 if (insn->op == IR_CTLZ) { 4740 if (ctx->mflags & IR_X86_BMI1) { 4741 | ASM_TXT_TMEM_OP lzcnt, Rw(def_reg), word, mem 4742 } else { 4743 | ASM_TXT_TMEM_OP bsr, Rw(def_reg), word, mem 4744 | xor Rw(def_reg), 0xf 4745 } 4746 } else if (insn->op == IR_CTTZ) { 4747 if (ctx->mflags & IR_X86_BMI1) { 4748 | ASM_TXT_TMEM_OP tzcnt, Rw(def_reg), word, mem 4749 } else { 4750 | ASM_TXT_TMEM_OP bsf, Rw(def_reg), word, mem 4751 } 4752 } else { 4753 | ASM_TXT_TMEM_OP popcnt, Rw(def_reg), word, mem 4754 } 4755 break; 4756 case 4: 4757 if (insn->op == IR_CTLZ) { 4758 if (ctx->mflags & IR_X86_BMI1) { 4759 | ASM_TXT_TMEM_OP lzcnt, Rd(def_reg), dword, mem 4760 } else { 4761 | ASM_TXT_TMEM_OP bsr, Rd(def_reg), dword, mem 4762 | xor Rw(def_reg), 0x1f 4763 } 4764 } else if (insn->op == IR_CTTZ) { 4765 if (ctx->mflags & IR_X86_BMI1) { 4766 | ASM_TXT_TMEM_OP tzcnt, Rd(def_reg), dword, mem 4767 } else { 4768 | ASM_TXT_TMEM_OP bsf, Rd(def_reg), dword, mem 4769 } 4770 } else { 4771 | ASM_TXT_TMEM_OP popcnt, Rd(def_reg), dword, mem 4772 } 4773 break; 4774|.if X64 4775 case 8: 4776 if (insn->op == IR_CTLZ) { 4777 if (ctx->mflags & IR_X86_BMI1) { 4778 | ASM_TXT_TMEM_OP lzcnt, Rq(def_reg), qword, mem 4779 } else { 4780 | ASM_TXT_TMEM_OP bsr, Rq(def_reg), qword, mem 4781 | xor Rw(def_reg), 0x3f 4782 } 4783 } else if (insn->op == IR_CTTZ) { 4784 if (ctx->mflags & IR_X86_BMI1) { 4785 | ASM_TXT_TMEM_OP tzcnt, Rq(def_reg), qword, mem 4786 } else { 4787 | ASM_TXT_TMEM_OP bsf, Rq(def_reg), qword, mem 4788 } 4789 } else { 4790 | ASM_TXT_TMEM_OP popcnt, Rq(def_reg), qword, mem 4791 } 4792 break; 4793|.endif 4794 } 4795 } 4796 4797 if (IR_REG_SPILLED(ctx->regs[def][0])) { 4798 ir_emit_store(ctx, type, def, def_reg); 4799 } 4800} 4801 4802static void ir_emit_ctpop(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4803{ 4804 ir_backend_data *data = ctx->data; 4805 dasm_State **Dst = &data->dasm_state; 4806 ir_type type = insn->type; 4807 ir_ref op1 = insn->op1; 4808 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 4809 ir_reg op1_reg = ctx->regs[def][1]; 4810 ir_reg tmp_reg = ctx->regs[def][2]; 4811|.if X64 4812|| ir_reg const_reg = ctx->regs[def][3]; 4813|.endif 4814 4815 IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); 4816 if (op1_reg == IR_REG_NONE) { 4817 ir_emit_load(ctx, type, def_reg, op1); 4818 if (ir_type_size[insn->type] == 1) { 4819 | movzx Rd(def_reg), Rb(def_reg) 4820 } else if (ir_type_size[insn->type] == 2) { 4821 | movzx Rd(def_reg), Rw(def_reg) 4822 } 4823 } else { 4824 if (IR_REG_SPILLED(op1_reg)) { 4825 op1_reg = IR_REG_NUM(op1_reg); 4826 ir_emit_load(ctx, type, op1_reg, op1); 4827 } 4828 switch (ir_type_size[insn->type]) { 4829 default: 4830 IR_ASSERT(0); 4831 case 1: 4832 | movzx Rd(def_reg), Rb(op1_reg) 4833 break; 4834 case 2: 4835 | movzx Rd(def_reg), Rw(op1_reg) 4836 break; 4837 case 4: 4838 | mov Rd(def_reg), Rd(op1_reg) 4839 break; 4840|.if X64 4841|| case 8: 4842 | mov Rq(def_reg), Rq(op1_reg) 4843|| break; 4844|.endif 4845 } 4846 } 4847 switch (ir_type_size[insn->type]) { 4848 default: 4849 IR_ASSERT(0); 4850 case 1: 4851 | mov Rd(tmp_reg), Rd(def_reg) 4852 | shr Rd(def_reg), 1 4853 | and Rd(def_reg), 0x55 4854 | sub Rd(tmp_reg), Rd(def_reg) 4855 | mov Rd(def_reg), Rd(tmp_reg) 4856 | and Rd(def_reg), 0x33 4857 | shr Rd(tmp_reg), 2 4858 | and Rd(tmp_reg), 0x33 4859 | add Rd(tmp_reg), Rd(def_reg) 4860 | mov Rd(def_reg), Rd(tmp_reg) 4861 | shr Rd(def_reg), 4 4862 | add Rd(def_reg), Rd(tmp_reg) 4863 | and Rd(def_reg), 0x0f 4864 break; 4865 case 2: 4866 | mov Rd(tmp_reg), Rd(def_reg) 4867 | shr Rd(def_reg), 1 4868 | and Rd(def_reg), 0x5555 4869 | sub Rd(tmp_reg), Rd(def_reg) 4870 | mov Rd(def_reg), Rd(tmp_reg) 4871 | and Rd(def_reg), 0x3333 4872 | shr Rd(tmp_reg), 2 4873 | and Rd(tmp_reg), 0x3333 4874 | add Rd(tmp_reg), Rd(def_reg) 4875 | mov Rd(def_reg), Rd(tmp_reg) 4876 | shr Rd(def_reg), 4 4877 | add Rd(def_reg), Rd(tmp_reg) 4878 | and Rd(def_reg), 0x0f0f 4879 | mov Rd(tmp_reg), Rd(def_reg) 4880 | shr Rd(tmp_reg), 8 4881 | and Rd(def_reg), 0x0f 4882 | add Rd(def_reg), Rd(tmp_reg) 4883 break; 4884 case 4: 4885 | mov Rd(tmp_reg), Rd(def_reg) 4886 | shr Rd(def_reg), 1 4887 | and Rd(def_reg), 0x55555555 4888 | sub Rd(tmp_reg), Rd(def_reg) 4889 | mov Rd(def_reg), Rd(tmp_reg) 4890 | and Rd(def_reg), 0x33333333 4891 | shr Rd(tmp_reg), 2 4892 | and Rd(tmp_reg), 0x33333333 4893 | add Rd(tmp_reg), Rd(def_reg) 4894 | mov Rd(def_reg), Rd(tmp_reg) 4895 | shr Rd(def_reg), 4 4896 | add Rd(def_reg), Rd(tmp_reg) 4897 | and Rd(def_reg), 0x0f0f0f0f 4898 | imul Rd(def_reg), 0x01010101 4899 | shr Rd(def_reg), 24 4900 break; 4901|.if X64 4902|| case 8: 4903|| IR_ASSERT(const_reg != IR_REG_NONE); 4904 | mov Rq(tmp_reg), Rq(def_reg) 4905 | shr Rq(def_reg), 1 4906 | mov64 Rq(const_reg), 0x5555555555555555 4907 | and Rq(def_reg), Rq(const_reg) 4908 | sub Rq(tmp_reg), Rq(def_reg) 4909 | mov Rq(def_reg), Rq(tmp_reg) 4910 | mov64 Rq(const_reg), 0x3333333333333333 4911 | and Rq(def_reg), Rq(const_reg) 4912 | shr Rq(tmp_reg), 2 4913 | and Rq(tmp_reg), Rq(const_reg) 4914 | add Rq(tmp_reg), Rq(def_reg) 4915 | mov Rq(def_reg), Rq(tmp_reg) 4916 | shr Rq(def_reg), 4 4917 | add Rq(def_reg), Rq(tmp_reg) 4918 | mov64 Rq(const_reg), 0x0f0f0f0f0f0f0f0f 4919 | and Rq(def_reg), Rq(const_reg) 4920 | mov64 Rq(const_reg), 0x0101010101010101 4921 | imul Rq(def_reg), Rq(const_reg) 4922 | shr Rq(def_reg), 56 4923|| break; 4924|.endif 4925 } 4926 4927 if (IR_REG_SPILLED(ctx->regs[def][0])) { 4928 ir_emit_store(ctx, type, def, def_reg); 4929 } 4930} 4931 4932static void ir_emit_mem_op_int(ir_ctx *ctx, ir_ref def, ir_insn *insn, uint32_t rule) 4933{ 4934 ir_backend_data *data = ctx->data; 4935 dasm_State **Dst = &data->dasm_state; 4936 ir_insn *op_insn = &ctx->ir_base[insn->op3]; 4937 ir_type type = op_insn->type; 4938 ir_mem mem; 4939 4940 if (insn->op == IR_STORE) { 4941 mem = ir_fuse_mem(ctx, def, def, insn, ctx->regs[def][2]); 4942 } else { 4943 IR_ASSERT(insn->op == IR_VSTORE); 4944 mem = ir_var_spill_slot(ctx, insn->op2); 4945 } 4946 4947 if (rule == IR_MEM_INC) { 4948 | ASM_MEM_OP inc, type, mem 4949 } else if (rule == IR_MEM_DEC) { 4950 | ASM_MEM_OP dec, type, mem 4951 } else if (op_insn->op == IR_NOT) { 4952 | ASM_MEM_OP not, type, mem 4953 } else { 4954 IR_ASSERT(op_insn->op == IR_NEG); 4955 | ASM_MEM_OP neg, type, mem 4956 } 4957} 4958 4959static void ir_emit_abs_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4960{ 4961 ir_backend_data *data = ctx->data; 4962 dasm_State **Dst = &data->dasm_state; 4963 ir_type type = insn->type; 4964 ir_ref op1 = insn->op1; 4965 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 4966 ir_reg op1_reg = ctx->regs[def][1]; 4967 4968 IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); 4969 4970 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 4971 op1_reg = IR_REG_NUM(op1_reg); 4972 ir_emit_load(ctx, type, op1_reg, op1); 4973 } 4974 4975 IR_ASSERT(def_reg != op1_reg); 4976 4977 ir_emit_mov(ctx, insn->type, def_reg, op1_reg); 4978 | ASM_REG_OP neg, insn->type, def_reg 4979 | ASM_REG_REG_OP2, cmovs, type, def_reg, op1_reg 4980 if (IR_REG_SPILLED(ctx->regs[def][0])) { 4981 ir_emit_store(ctx, type, def, def_reg); 4982 } 4983} 4984 4985static void ir_emit_bool_not_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4986{ 4987 ir_backend_data *data = ctx->data; 4988 dasm_State **Dst = &data->dasm_state; 4989 ir_type type = ctx->ir_base[insn->op1].type; 4990 ir_ref op1 = insn->op1; 4991 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 4992 ir_reg op1_reg = ctx->regs[def][1]; 4993 4994 IR_ASSERT(def_reg != IR_REG_NONE); 4995 4996 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 4997 op1_reg = IR_REG_NUM(op1_reg); 4998 ir_emit_load(ctx, type, op1_reg, op1); 4999 } 5000 5001 if (op1_reg != IR_REG_NONE) { 5002 | ASM_REG_REG_OP test, type, op1_reg, op1_reg 5003 } else { 5004 ir_mem mem = ir_ref_spill_slot(ctx, op1); 5005 5006 | ASM_MEM_IMM_OP cmp, type, mem, 0 5007 } 5008 | sete Rb(def_reg) 5009 5010 if (IR_REG_SPILLED(ctx->regs[def][0])) { 5011 ir_emit_store(ctx, type, def, def_reg); 5012 } 5013} 5014 5015static void ir_emit_mul_div_mod(ir_ctx *ctx, ir_ref def, ir_insn *insn) 5016{ 5017 ir_backend_data *data = ctx->data; 5018 dasm_State **Dst = &data->dasm_state; 5019 ir_type type = insn->type; 5020 ir_ref op1 = insn->op1; 5021 ir_ref op2 = insn->op2; 5022 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 5023 ir_reg op1_reg = ctx->regs[def][1]; 5024 ir_reg op2_reg = ctx->regs[def][2]; 5025 ir_mem mem; 5026 5027 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 5028 op1_reg = IR_REG_NUM(op1_reg); 5029 ir_emit_load(ctx, type, op1_reg, op1); 5030 } 5031 if (op1_reg != IR_REG_RAX) { 5032 if (op1_reg != IR_REG_NONE) { 5033 ir_emit_mov(ctx, type, IR_REG_RAX, op1_reg); 5034 } else { 5035 ir_emit_load(ctx, type, IR_REG_RAX, op1); 5036 } 5037 } 5038 if (op2_reg == IR_REG_NONE && op1 == op2) { 5039 op2_reg = IR_REG_RAX; 5040 } else if (op2_reg != IR_REG_NONE) { 5041 if (IR_REG_SPILLED(op2_reg)) { 5042 op2_reg = IR_REG_NUM(op2_reg); 5043 ir_emit_load(ctx, type, op2_reg, op2); 5044 } 5045 } else if (IR_IS_CONST_REF(op2) 5046 && (insn->op == IR_MUL || insn->op == IR_MUL_OV)) { 5047 op2_reg = IR_REG_RDX; 5048 ir_emit_load(ctx, type, op2_reg, op2); 5049 } 5050 if (insn->op == IR_MUL || insn->op == IR_MUL_OV) { 5051 if (IR_IS_TYPE_SIGNED(insn->type)) { 5052 if (op2_reg != IR_REG_NONE) { 5053 | ASM_REG_OP imul, type, op2_reg 5054 } else { 5055 if (ir_rule(ctx, op2) & IR_FUSED) { 5056 mem = ir_fuse_load(ctx, def, op2); 5057 } else { 5058 mem = ir_ref_spill_slot(ctx, op2); 5059 } 5060 | ASM_MEM_OP imul, type, mem 5061 } 5062 } else { 5063 if (op2_reg != IR_REG_NONE) { 5064 | ASM_REG_OP mul, type, op2_reg 5065 } else { 5066 if (ir_rule(ctx, op2) & IR_FUSED) { 5067 mem = ir_fuse_load(ctx, def, op2); 5068 } else { 5069 mem = ir_ref_spill_slot(ctx, op2); 5070 } 5071 | ASM_MEM_OP mul, type, mem 5072 } 5073 } 5074 } else { 5075 if (IR_IS_TYPE_SIGNED(type)) { 5076 if (ir_type_size[type] == 8) { 5077 | cqo 5078 } else if (ir_type_size[type] == 4) { 5079 | cdq 5080 } else if (ir_type_size[type] == 2) { 5081 | cwd 5082 } else { 5083 | movsx ax, al 5084 } 5085 if (op2_reg != IR_REG_NONE) { 5086 | ASM_REG_OP idiv, type, op2_reg 5087 } else { 5088 if (ir_rule(ctx, op2) & IR_FUSED) { 5089 mem = ir_fuse_load(ctx, def, op2); 5090 } else { 5091 mem = ir_ref_spill_slot(ctx, op2); 5092 } 5093 | ASM_MEM_OP idiv, type, mem 5094 } 5095 } else { 5096 if (ir_type_size[type] == 1) { 5097 | movzx ax, al 5098 } else { 5099 | ASM_REG_REG_OP xor, type, IR_REG_RDX, IR_REG_RDX 5100 } 5101 if (op2_reg != IR_REG_NONE) { 5102 | ASM_REG_OP div, type, op2_reg 5103 } else { 5104 if (ir_rule(ctx, op2) & IR_FUSED) { 5105 mem = ir_fuse_load(ctx, def, op2); 5106 } else { 5107 mem = ir_ref_spill_slot(ctx, op2); 5108 } 5109 | ASM_MEM_OP div, type, mem 5110 } 5111 } 5112 } 5113 5114 if (insn->op == IR_MUL || insn->op == IR_MUL_OV || insn->op == IR_DIV) { 5115 if (def_reg != IR_REG_NONE) { 5116 if (def_reg != IR_REG_RAX) { 5117 ir_emit_mov(ctx, type, def_reg, IR_REG_RAX); 5118 } 5119 if (IR_REG_SPILLED(ctx->regs[def][0])) { 5120 ir_emit_store(ctx, type, def, def_reg); 5121 } 5122 } else { 5123 ir_emit_store(ctx, type, def, IR_REG_RAX); 5124 } 5125 } else { 5126 IR_ASSERT(insn->op == IR_MOD); 5127 if (ir_type_size[type] == 1) { 5128 if (def_reg != IR_REG_NONE) { 5129 | mov al, ah 5130 if (def_reg != IR_REG_RAX) { 5131 | mov Rb(def_reg), al 5132 } 5133 if (IR_REG_SPILLED(ctx->regs[def][0])) { 5134 ir_emit_store(ctx, type, def, def_reg); 5135 } 5136 } else { 5137 ir_reg fp; 5138 int32_t offset = ir_ref_spill_slot_offset(ctx, def, &fp); 5139 5140//????? 5141 | mov byte [Ra(fp)+offset], ah 5142 } 5143 } else { 5144 if (def_reg != IR_REG_NONE) { 5145 if (def_reg != IR_REG_RDX) { 5146 ir_emit_mov(ctx, type, def_reg, IR_REG_RDX); 5147 } 5148 if (IR_REG_SPILLED(ctx->regs[def][0])) { 5149 ir_emit_store(ctx, type, def, def_reg); 5150 } 5151 } else { 5152 ir_emit_store(ctx, type, def, IR_REG_RDX); 5153 } 5154 } 5155 } 5156} 5157 5158static void ir_rodata(ir_ctx *ctx) 5159{ 5160 ir_backend_data *data = ctx->data; 5161 dasm_State **Dst = &data->dasm_state; 5162 5163 |.rodata 5164 if (!data->rodata_label) { 5165 int label = data->rodata_label = ctx->cfg_blocks_count + ctx->consts_count + 2; 5166 |=>label: 5167 } 5168} 5169 5170static void ir_emit_op_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) 5171{ 5172 ir_backend_data *data = ctx->data; 5173 dasm_State **Dst = &data->dasm_state; 5174 ir_type type = insn->type; 5175 ir_ref op1 = insn->op1; 5176 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 5177 ir_reg op1_reg = ctx->regs[def][1]; 5178 5179 IR_ASSERT(def_reg != IR_REG_NONE); 5180 5181 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 5182 op1_reg = IR_REG_NUM(op1_reg); 5183 ir_emit_load(ctx, type, op1_reg, op1); 5184 } 5185 if (def_reg != op1_reg) { 5186 if (op1_reg != IR_REG_NONE) { 5187 ir_emit_fp_mov(ctx, type, def_reg, op1_reg); 5188 } else { 5189 ir_emit_load(ctx, type, def_reg, op1); 5190 } 5191 } 5192 if (insn->op == IR_NEG) { 5193 if (insn->type == IR_DOUBLE) { 5194 if (!data->double_neg_const) { 5195 data->double_neg_const = 1; 5196 ir_rodata(ctx); 5197 |.align 16 5198 |->double_neg_const: 5199 |.dword 0, 0x80000000, 0, 0 5200 |.code 5201 } 5202 if (ctx->mflags & IR_X86_AVX) { 5203 | vxorpd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), [->double_neg_const] 5204 } else { 5205 | xorpd xmm(def_reg-IR_REG_FP_FIRST), [->double_neg_const] 5206 } 5207 } else { 5208 IR_ASSERT(insn->type == IR_FLOAT); 5209 if (!data->float_neg_const) { 5210 data->float_neg_const = 1; 5211 ir_rodata(ctx); 5212 |.align 16 5213 |->float_neg_const: 5214 |.dword 0x80000000, 0, 0, 0 5215 |.code 5216 } 5217 if (ctx->mflags & IR_X86_AVX) { 5218 | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), [->float_neg_const] 5219 } else { 5220 | xorps xmm(def_reg-IR_REG_FP_FIRST), [->float_neg_const] 5221 } 5222 } 5223 } else { 5224 IR_ASSERT(insn->op == IR_ABS); 5225 if (insn->type == IR_DOUBLE) { 5226 if (!data->double_abs_const) { 5227 data->double_abs_const = 1; 5228 ir_rodata(ctx); 5229 |.align 16 5230 |->double_abs_const: 5231 |.dword 0xffffffff, 0x7fffffff, 0, 0 5232 |.code 5233 } 5234 if (ctx->mflags & IR_X86_AVX) { 5235 | vandpd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), [->double_abs_const] 5236 } else { 5237 | andpd xmm(def_reg-IR_REG_FP_FIRST), [->double_abs_const] 5238 } 5239 } else { 5240 IR_ASSERT(insn->type == IR_FLOAT); 5241 if (!data->float_abs_const) { 5242 data->float_abs_const = 1; 5243 ir_rodata(ctx); 5244 |.align 16 5245 |->float_abs_const: 5246 |.dword 0x7fffffff, 0, 0, 0 5247 |.code 5248 } 5249 if (ctx->mflags & IR_X86_AVX) { 5250 | vandps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), [->float_abs_const] 5251 } else { 5252 | andps xmm(def_reg-IR_REG_FP_FIRST), [->float_abs_const] 5253 } 5254 } 5255 } 5256 if (IR_REG_SPILLED(ctx->regs[def][0])) { 5257 ir_emit_store(ctx, insn->type, def, def_reg); 5258 } 5259} 5260 5261static void ir_emit_binop_sse2(ir_ctx *ctx, ir_ref def, ir_insn *insn) 5262{ 5263 ir_backend_data *data = ctx->data; 5264 dasm_State **Dst = &data->dasm_state; 5265 ir_type type = insn->type; 5266 ir_ref op1 = insn->op1; 5267 ir_ref op2 = insn->op2; 5268 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 5269 ir_reg op1_reg = ctx->regs[def][1]; 5270 ir_reg op2_reg = ctx->regs[def][2]; 5271 5272 IR_ASSERT(def_reg != IR_REG_NONE); 5273 5274 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 5275 op1_reg = IR_REG_NUM(op1_reg); 5276 ir_emit_load(ctx, type, op1_reg, op1); 5277 } 5278 if (def_reg != op1_reg) { 5279 if (op1_reg != IR_REG_NONE) { 5280 ir_emit_fp_mov(ctx, type, def_reg, op1_reg); 5281 } else { 5282 ir_emit_load(ctx, type, def_reg, op1); 5283 } 5284 if (op1 == op2) { 5285 op2_reg = def_reg; 5286 } 5287 } 5288 if (op2_reg != IR_REG_NONE) { 5289 if (IR_REG_SPILLED(op2_reg)) { 5290 op2_reg = IR_REG_NUM(op2_reg); 5291 if (op1 != op2) { 5292 ir_emit_load(ctx, type, op2_reg, op2); 5293 } 5294 } 5295 switch (insn->op) { 5296 default: 5297 IR_ASSERT(0 && "NIY binary op"); 5298 case IR_ADD: 5299 | ASM_SSE2_REG_REG_OP adds, type, def_reg, op2_reg 5300 break; 5301 case IR_SUB: 5302 | ASM_SSE2_REG_REG_OP subs, type, def_reg, op2_reg 5303 break; 5304 case IR_MUL: 5305 | ASM_SSE2_REG_REG_OP muls, type, def_reg, op2_reg 5306 break; 5307 case IR_DIV: 5308 | ASM_SSE2_REG_REG_OP divs, type, def_reg, op2_reg 5309 break; 5310 case IR_MIN: 5311 | ASM_SSE2_REG_REG_OP mins, type, def_reg, op2_reg 5312 break; 5313 case IR_MAX: 5314 | ASM_SSE2_REG_REG_OP maxs, type, def_reg, op2_reg 5315 break; 5316 } 5317 } else if (IR_IS_CONST_REF(op2)) { 5318 int label = ir_const_label(ctx, op2); 5319 5320 switch (insn->op) { 5321 default: 5322 IR_ASSERT(0 && "NIY binary op"); 5323 case IR_ADD: 5324 | ASM_SSE2_REG_TXT_OP adds, type, def_reg, [=>label] 5325 break; 5326 case IR_SUB: 5327 | ASM_SSE2_REG_TXT_OP subs, type, def_reg, [=>label] 5328 break; 5329 case IR_MUL: 5330 | ASM_SSE2_REG_TXT_OP muls, type, def_reg, [=>label] 5331 break; 5332 case IR_DIV: 5333 | ASM_SSE2_REG_TXT_OP divs, type, def_reg, [=>label] 5334 break; 5335 case IR_MIN: 5336 | ASM_SSE2_REG_TXT_OP mins, type, def_reg, [=>label] 5337 break; 5338 case IR_MAX: 5339 | ASM_SSE2_REG_TXT_OP maxs, type, def_reg, [=>label] 5340 break; 5341 } 5342 } else { 5343 ir_mem mem; 5344 5345 if (ir_rule(ctx, op2) & IR_FUSED) { 5346 mem = ir_fuse_load(ctx, def, op2); 5347 } else { 5348 mem = ir_ref_spill_slot(ctx, op2); 5349 } 5350 switch (insn->op) { 5351 default: 5352 IR_ASSERT(0 && "NIY binary op"); 5353 case IR_ADD: 5354 | ASM_SSE2_REG_MEM_OP adds, type, def_reg, mem 5355 break; 5356 case IR_SUB: 5357 | ASM_SSE2_REG_MEM_OP subs, type, def_reg, mem 5358 break; 5359 case IR_MUL: 5360 | ASM_SSE2_REG_MEM_OP muls, type, def_reg, mem 5361 break; 5362 case IR_DIV: 5363 | ASM_SSE2_REG_MEM_OP divs, type, def_reg, mem 5364 break; 5365 case IR_MIN: 5366 | ASM_SSE2_REG_MEM_OP mins, type, def_reg, mem 5367 break; 5368 case IR_MAX: 5369 | ASM_SSE2_REG_MEM_OP maxs, type, def_reg, mem 5370 break; 5371 } 5372 } 5373 if (IR_REG_SPILLED(ctx->regs[def][0])) { 5374 ir_emit_store(ctx, insn->type, def, def_reg); 5375 } 5376} 5377 5378static void ir_emit_binop_avx(ir_ctx *ctx, ir_ref def, ir_insn *insn) 5379{ 5380 ir_backend_data *data = ctx->data; 5381 dasm_State **Dst = &data->dasm_state; 5382 ir_type type = insn->type; 5383 ir_ref op1 = insn->op1; 5384 ir_ref op2 = insn->op2; 5385 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 5386 ir_reg op1_reg = ctx->regs[def][1]; 5387 ir_reg op2_reg = ctx->regs[def][2]; 5388 5389 IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); 5390 5391 if (IR_REG_SPILLED(op1_reg)) { 5392 op1_reg = IR_REG_NUM(op1_reg); 5393 ir_emit_load(ctx, type, op1_reg, op1); 5394 } 5395 if (op2_reg != IR_REG_NONE) { 5396 if (IR_REG_SPILLED(op2_reg)) { 5397 op2_reg = IR_REG_NUM(op2_reg); 5398 if (op1 != op2) { 5399 ir_emit_load(ctx, type, op2_reg, op2); 5400 } 5401 } 5402 switch (insn->op) { 5403 default: 5404 IR_ASSERT(0 && "NIY binary op"); 5405 case IR_ADD: 5406 | ASM_AVX_REG_REG_REG_OP vadds, type, def_reg, op1_reg, op2_reg 5407 break; 5408 case IR_SUB: 5409 | ASM_AVX_REG_REG_REG_OP vsubs, type, def_reg, op1_reg, op2_reg 5410 break; 5411 case IR_MUL: 5412 | ASM_AVX_REG_REG_REG_OP vmuls, type, def_reg, op1_reg, op2_reg 5413 break; 5414 case IR_DIV: 5415 | ASM_AVX_REG_REG_REG_OP vdivs, type, def_reg, op1_reg, op2_reg 5416 break; 5417 case IR_MIN: 5418 | ASM_AVX_REG_REG_REG_OP vmins, type, def_reg, op1_reg, op2_reg 5419 break; 5420 case IR_MAX: 5421 | ASM_AVX_REG_REG_REG_OP vmaxs, type, def_reg, op1_reg, op2_reg 5422 break; 5423 } 5424 } else if (IR_IS_CONST_REF(op2)) { 5425 int label = ir_const_label(ctx, op2); 5426 5427 switch (insn->op) { 5428 default: 5429 IR_ASSERT(0 && "NIY binary op"); 5430 case IR_ADD: 5431 | ASM_AVX_REG_REG_TXT_OP vadds, type, def_reg, op1_reg, [=>label] 5432 break; 5433 case IR_SUB: 5434 | ASM_AVX_REG_REG_TXT_OP vsubs, type, def_reg, op1_reg, [=>label] 5435 break; 5436 case IR_MUL: 5437 | ASM_AVX_REG_REG_TXT_OP vmuls, type, def_reg, op1_reg, [=>label] 5438 break; 5439 case IR_DIV: 5440 | ASM_AVX_REG_REG_TXT_OP vdivs, type, def_reg, op1_reg, [=>label] 5441 break; 5442 case IR_MIN: 5443 | ASM_AVX_REG_REG_TXT_OP vmins, type, def_reg, op1_reg, [=>label] 5444 break; 5445 case IR_MAX: 5446 | ASM_AVX_REG_REG_TXT_OP vmaxs, type, def_reg, op1_reg, [=>label] 5447 break; 5448 } 5449 } else { 5450 ir_mem mem; 5451 5452 if (ir_rule(ctx, op2) & IR_FUSED) { 5453 mem = ir_fuse_load(ctx, def, op2); 5454 } else { 5455 mem = ir_ref_spill_slot(ctx, op2); 5456 } 5457 switch (insn->op) { 5458 default: 5459 IR_ASSERT(0 && "NIY binary op"); 5460 case IR_ADD: 5461 | ASM_AVX_REG_REG_MEM_OP vadds, type, def_reg, op1_reg, mem 5462 break; 5463 case IR_SUB: 5464 | ASM_AVX_REG_REG_MEM_OP vsubs, type, def_reg, op1_reg, mem 5465 break; 5466 case IR_MUL: 5467 | ASM_AVX_REG_REG_MEM_OP vmuls, type, def_reg, op1_reg, mem 5468 break; 5469 case IR_DIV: 5470 | ASM_AVX_REG_REG_MEM_OP vdivs, type, def_reg, op1_reg, mem 5471 break; 5472 case IR_MIN: 5473 | ASM_AVX_REG_REG_MEM_OP vmins, type, def_reg, op1_reg, mem 5474 break; 5475 case IR_MAX: 5476 | ASM_AVX_REG_REG_MEM_OP vmaxs, type, def_reg, op1_reg, mem 5477 break; 5478 } 5479 } 5480 if (IR_REG_SPILLED(ctx->regs[def][0])) { 5481 ir_emit_store(ctx, insn->type, def, def_reg); 5482 } 5483} 5484 5485static void ir_emit_cmp_int_common(ir_ctx *ctx, ir_type type, ir_ref root, ir_insn *insn, ir_reg op1_reg, ir_ref op1, ir_reg op2_reg, ir_ref op2) 5486{ 5487 ir_backend_data *data = ctx->data; 5488 dasm_State **Dst = &data->dasm_state; 5489 5490 if (op1_reg != IR_REG_NONE) { 5491 if (op2_reg != IR_REG_NONE) { 5492 | ASM_REG_REG_OP cmp, type, op1_reg, op2_reg 5493 } else if (IR_IS_CONST_REF(op2) && !IR_IS_SYM_CONST(ctx->ir_base[op2].op) && ctx->ir_base[op2].val.u64 == 0) { 5494 | ASM_REG_REG_OP test, type, op1_reg, op1_reg 5495 } else if (IR_IS_CONST_REF(op2)) { 5496 int32_t val = ir_fuse_imm(ctx, op2); 5497 | ASM_REG_IMM_OP cmp, type, op1_reg, val 5498 } else { 5499 ir_mem mem; 5500 5501 if (ir_rule(ctx, op2) & IR_FUSED) { 5502 mem = ir_fuse_load(ctx, root, op2); 5503 } else { 5504 mem = ir_ref_spill_slot(ctx, op2); 5505 } 5506 | ASM_REG_MEM_OP cmp, type, op1_reg, mem 5507 } 5508 } else if (IR_IS_CONST_REF(op1)) { 5509 IR_ASSERT(0); 5510 } else { 5511 ir_mem mem; 5512 5513 if (ir_rule(ctx, op1) & IR_FUSED) { 5514 mem = ir_fuse_load(ctx, root, op1); 5515 } else { 5516 mem = ir_ref_spill_slot(ctx, op1); 5517 } 5518 if (op2_reg != IR_REG_NONE) { 5519 | ASM_MEM_REG_OP cmp, type, mem, op2_reg 5520 } else { 5521 int32_t val = ir_fuse_imm(ctx, op2); 5522 | ASM_MEM_IMM_OP cmp, type, mem, val 5523 } 5524 } 5525} 5526 5527static void ir_emit_cmp_int_common2(ir_ctx *ctx, ir_ref root, ir_ref ref, ir_insn *cmp_insn) 5528{ 5529 ir_type type = ctx->ir_base[cmp_insn->op1].type; 5530 ir_ref op1 = cmp_insn->op1; 5531 ir_ref op2 = cmp_insn->op2; 5532 ir_reg op1_reg = ctx->regs[ref][1]; 5533 ir_reg op2_reg = ctx->regs[ref][2]; 5534 5535 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 5536 op1_reg = IR_REG_NUM(op1_reg); 5537 ir_emit_load(ctx, type, op1_reg, op1); 5538 } 5539 if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { 5540 op2_reg = IR_REG_NUM(op2_reg); 5541 if (op1 != op2) { 5542 ir_emit_load(ctx, type, op2_reg, op2); 5543 } 5544 } 5545 5546 ir_emit_cmp_int_common(ctx, type, root, cmp_insn, op1_reg, op1, op2_reg, op2); 5547} 5548 5549static void _ir_emit_setcc_int(ir_ctx *ctx, uint8_t op, ir_reg def_reg) 5550{ 5551 ir_backend_data *data = ctx->data; 5552 dasm_State **Dst = &data->dasm_state; 5553 5554 switch (op) { 5555 default: 5556 IR_ASSERT(0 && "NIY binary op"); 5557 case IR_EQ: 5558 | sete Rb(def_reg) 5559 break; 5560 case IR_NE: 5561 | setne Rb(def_reg) 5562 break; 5563 case IR_LT: 5564 | setl Rb(def_reg) 5565 break; 5566 case IR_GE: 5567 | setge Rb(def_reg) 5568 break; 5569 case IR_LE: 5570 | setle Rb(def_reg) 5571 break; 5572 case IR_GT: 5573 | setg Rb(def_reg) 5574 break; 5575 case IR_ULT: 5576 | setb Rb(def_reg) 5577 break; 5578 case IR_UGE: 5579 | setae Rb(def_reg) 5580 break; 5581 case IR_ULE: 5582 | setbe Rb(def_reg) 5583 break; 5584 case IR_UGT: 5585 | seta Rb(def_reg) 5586 break; 5587 } 5588} 5589 5590static void _ir_emit_setcc_int_mem(ir_ctx *ctx, uint8_t op, ir_mem mem) 5591{ 5592 ir_backend_data *data = ctx->data; 5593 dasm_State **Dst = &data->dasm_state; 5594 5595 5596 switch (op) { 5597 default: 5598 IR_ASSERT(0 && "NIY binary op"); 5599 case IR_EQ: 5600 | ASM_TMEM_OP sete, byte, mem 5601 break; 5602 case IR_NE: 5603 | ASM_TMEM_OP setne, byte, mem 5604 break; 5605 case IR_LT: 5606 | ASM_TMEM_OP setl, byte, mem 5607 break; 5608 case IR_GE: 5609 | ASM_TMEM_OP setge, byte, mem 5610 break; 5611 case IR_LE: 5612 | ASM_TMEM_OP setle, byte, mem 5613 break; 5614 case IR_GT: 5615 | ASM_TMEM_OP setg, byte, mem 5616 break; 5617 case IR_ULT: 5618 | ASM_TMEM_OP setb, byte, mem 5619 break; 5620 case IR_UGE: 5621 | ASM_TMEM_OP setae, byte, mem 5622 break; 5623 case IR_ULE: 5624 | ASM_TMEM_OP setbe, byte, mem 5625 break; 5626 case IR_UGT: 5627 | ASM_TMEM_OP seta, byte, mem 5628 break; 5629 } 5630} 5631 5632static void ir_emit_cmp_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 5633{ 5634 ir_backend_data *data = ctx->data; 5635 dasm_State **Dst = &data->dasm_state; 5636 ir_type type = ctx->ir_base[insn->op1].type; 5637 ir_op op = insn->op; 5638 ir_ref op1 = insn->op1; 5639 ir_ref op2 = insn->op2; 5640 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 5641 ir_reg op1_reg = ctx->regs[def][1]; 5642 ir_reg op2_reg = ctx->regs[def][2]; 5643 5644 IR_ASSERT(def_reg != IR_REG_NONE); 5645 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 5646 op1_reg = IR_REG_NUM(op1_reg); 5647 ir_emit_load(ctx, type, op1_reg, op1); 5648 } 5649 if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { 5650 op2_reg = IR_REG_NUM(op2_reg); 5651 if (op1 != op2) { 5652 ir_emit_load(ctx, type, op2_reg, op2); 5653 } 5654 } 5655 if (IR_IS_CONST_REF(op2) && !IR_IS_SYM_CONST(ctx->ir_base[op2].op) && ctx->ir_base[op2].val.u64 == 0) { 5656 if (op == IR_ULT) { 5657 /* always false */ 5658 | xor Ra(def_reg), Ra(def_reg) 5659 if (IR_REG_SPILLED(ctx->regs[def][0])) { 5660 ir_emit_store(ctx, insn->type, def, def_reg); 5661 } 5662 return; 5663 } else if (op == IR_UGE) { 5664 /* always true */ 5665 | ASM_REG_IMM_OP mov, insn->type, def_reg, 1 5666 if (IR_REG_SPILLED(ctx->regs[def][0])) { 5667 ir_emit_store(ctx, insn->type, def, def_reg); 5668 } 5669 return; 5670 } else if (op == IR_ULE) { 5671 op = IR_EQ; 5672 } else if (op == IR_UGT) { 5673 op = IR_NE; 5674 } 5675 } 5676 ir_emit_cmp_int_common(ctx, type, def, insn, op1_reg, op1, op2_reg, op2); 5677 _ir_emit_setcc_int(ctx, op, def_reg); 5678 if (IR_REG_SPILLED(ctx->regs[def][0])) { 5679 ir_emit_store(ctx, insn->type, def, def_reg); 5680 } 5681} 5682 5683static void ir_emit_test_int_common(ir_ctx *ctx, ir_ref root, ir_ref ref, ir_op op) 5684{ 5685 ir_backend_data *data = ctx->data; 5686 dasm_State **Dst = &data->dasm_state; 5687 ir_insn *binop_insn = &ctx->ir_base[ref]; 5688 ir_type type = binop_insn->type; 5689 ir_ref op1 = binop_insn->op1; 5690 ir_ref op2 = binop_insn->op2; 5691 ir_reg op1_reg = ctx->regs[ref][1]; 5692 ir_reg op2_reg = ctx->regs[ref][2]; 5693 5694 IR_ASSERT(binop_insn->op == IR_AND); 5695 if (op1_reg != IR_REG_NONE) { 5696 if (IR_REG_SPILLED(op1_reg)) { 5697 op1_reg = IR_REG_NUM(op1_reg); 5698 ir_emit_load(ctx, type, op1_reg, op1); 5699 } 5700 if (op2_reg != IR_REG_NONE) { 5701 if (IR_REG_SPILLED(op2_reg)) { 5702 op2_reg = IR_REG_NUM(op2_reg); 5703 if (op1 != op2) { 5704 ir_emit_load(ctx, type, op2_reg, op2); 5705 } 5706 } 5707 | ASM_REG_REG_OP test, type, op1_reg, op2_reg 5708 } else if (IR_IS_CONST_REF(op2)) { 5709 int32_t val = ir_fuse_imm(ctx, op2); 5710 5711 if ((op == IR_EQ || op == IR_NE) && val == 0xff && (sizeof(void*) == 8 || op1_reg <= IR_REG_R3)) { 5712 | test Rb(op1_reg), Rb(op1_reg) 5713 } else if ((op == IR_EQ || op == IR_NE) && val == 0xff00 && op1_reg <= IR_REG_R3) { 5714 if (op1_reg == IR_REG_RAX) { 5715 | test ah, ah 5716 } else if (op1_reg == IR_REG_RBX) { 5717 | test bh, bh 5718 } else if (op1_reg == IR_REG_RCX) { 5719 | test ch, ch 5720 } else if (op1_reg == IR_REG_RDX) { 5721 | test dh, dh 5722 } else { 5723 IR_ASSERT(0); 5724 } 5725 } else if ((op == IR_EQ || op == IR_NE) && val == 0xffff) { 5726 | test Rw(op1_reg), Rw(op1_reg) 5727 } else if ((op == IR_EQ || op == IR_NE) && val == -1) { 5728 | test Rd(op1_reg), Rd(op1_reg) 5729 } else { 5730 | ASM_REG_IMM_OP test, type, op1_reg, val 5731 } 5732 } else { 5733 ir_mem mem; 5734 5735 if (ir_rule(ctx, op2) & IR_FUSED) { 5736 mem = ir_fuse_load(ctx, root, op2); 5737 } else { 5738 mem = ir_ref_spill_slot(ctx, op2); 5739 } 5740 | ASM_REG_MEM_OP test, type, op1_reg, mem 5741 } 5742 } else if (IR_IS_CONST_REF(op1)) { 5743 IR_ASSERT(0); 5744 } else { 5745 ir_mem mem; 5746 5747 if (ir_rule(ctx, op1) & IR_FUSED) { 5748 mem = ir_fuse_load(ctx, root, op1); 5749 } else { 5750 mem = ir_ref_spill_slot(ctx, op1); 5751 } 5752 if (op2_reg != IR_REG_NONE) { 5753 if (IR_REG_SPILLED(op2_reg)) { 5754 op2_reg = IR_REG_NUM(op2_reg); 5755 if (op1 != op2) { 5756 ir_emit_load(ctx, type, op2_reg, op2); 5757 } 5758 } 5759 | ASM_MEM_REG_OP test, type, mem, op2_reg 5760 } else { 5761 IR_ASSERT(!IR_IS_CONST_REF(op1)); 5762 int32_t val = ir_fuse_imm(ctx, op2); 5763 | ASM_MEM_IMM_OP test, type, mem, val 5764 } 5765 } 5766} 5767 5768static void ir_emit_testcc_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 5769{ 5770 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 5771 5772 IR_ASSERT(def_reg != IR_REG_NONE); 5773 ir_emit_test_int_common(ctx, def, insn->op1, insn->op); 5774 _ir_emit_setcc_int(ctx, insn->op, def_reg); 5775 if (IR_REG_SPILLED(ctx->regs[def][0])) { 5776 ir_emit_store(ctx, insn->type, def, def_reg); 5777 } 5778} 5779 5780static void ir_emit_setcc_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 5781{ 5782 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 5783 5784 IR_ASSERT(def_reg != IR_REG_NONE); 5785 _ir_emit_setcc_int(ctx, insn->op, def_reg); 5786 if (IR_REG_SPILLED(ctx->regs[def][0])) { 5787 ir_emit_store(ctx, insn->type, def, def_reg); 5788 } 5789} 5790 5791static ir_op ir_emit_cmp_fp_common(ir_ctx *ctx, ir_ref root, ir_ref cmp_ref, ir_insn *cmp_insn) 5792{ 5793 ir_backend_data *data = ctx->data; 5794 dasm_State **Dst = &data->dasm_state; 5795 ir_type type = ctx->ir_base[cmp_insn->op1].type; 5796 ir_op op = cmp_insn->op; 5797 ir_ref op1, op2; 5798 ir_reg op1_reg, op2_reg; 5799 5800 op1 = cmp_insn->op1; 5801 op2 = cmp_insn->op2; 5802 op1_reg = ctx->regs[cmp_ref][1]; 5803 op2_reg = ctx->regs[cmp_ref][2]; 5804 5805 if (op1_reg == IR_REG_NONE && op2_reg != IR_REG_NONE && (op == IR_EQ || op == IR_NE)) { 5806 ir_reg tmp_reg; 5807 5808 SWAP_REFS(op1, op2); 5809 tmp_reg = op1_reg; 5810 op1_reg = op2_reg; 5811 op2_reg = tmp_reg; 5812 } 5813 5814 5815 IR_ASSERT(op1_reg != IR_REG_NONE); 5816 if (IR_REG_SPILLED(op1_reg)) { 5817 op1_reg = IR_REG_NUM(op1_reg); 5818 ir_emit_load(ctx, type, op1_reg, op1); 5819 } 5820 if (op2_reg != IR_REG_NONE) { 5821 if (IR_REG_SPILLED(op2_reg)) { 5822 op2_reg = IR_REG_NUM(op2_reg); 5823 if (op1 != op2) { 5824 ir_emit_load(ctx, type, op2_reg, op2); 5825 } 5826 } 5827 | ASM_FP_REG_REG_OP ucomis, type, op1_reg, op2_reg 5828 } else if (IR_IS_CONST_REF(op2)) { 5829 int label = ir_const_label(ctx, op2); 5830 5831 | ASM_FP_REG_TXT_OP ucomis, type, op1_reg, [=>label] 5832 } else { 5833 ir_mem mem; 5834 5835 if (ir_rule(ctx, op2) & IR_FUSED) { 5836 mem = ir_fuse_load(ctx, root, op2); 5837 } else { 5838 mem = ir_ref_spill_slot(ctx, op2); 5839 } 5840 | ASM_FP_REG_MEM_OP ucomis, type, op1_reg, mem 5841 } 5842 return op; 5843} 5844 5845static void ir_emit_cmp_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) 5846{ 5847 ir_backend_data *data = ctx->data; 5848 dasm_State **Dst = &data->dasm_state; 5849 ir_op op = ir_emit_cmp_fp_common(ctx, def, def, insn); 5850 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 5851 ir_reg tmp_reg = ctx->regs[def][3]; 5852 5853 IR_ASSERT(def_reg != IR_REG_NONE); 5854 switch (op) { 5855 default: 5856 IR_ASSERT(0 && "NIY binary op"); 5857 case IR_EQ: 5858 | setnp Rb(def_reg) 5859 | mov Rd(tmp_reg), 0 5860 | cmovne Rd(def_reg), Rd(tmp_reg) 5861 break; 5862 case IR_NE: 5863 | setp Rb(def_reg) 5864 | mov Rd(tmp_reg), 1 5865 | cmovne Rd(def_reg), Rd(tmp_reg) 5866 break; 5867 case IR_LT: 5868 | setnp Rb(def_reg) 5869 | mov Rd(tmp_reg), 0 5870 | cmovae Rd(def_reg), Rd(tmp_reg) 5871 break; 5872 case IR_GE: 5873 | setae Rb(def_reg) 5874 break; 5875 case IR_LE: 5876 | setnp Rb(def_reg) 5877 | mov Rd(tmp_reg), 0 5878 | cmova Rd(def_reg), Rd(tmp_reg) 5879 break; 5880 case IR_GT: 5881 | seta Rb(def_reg) 5882 break; 5883 case IR_ULT: 5884 | setb Rb(def_reg) 5885 break; 5886 case IR_UGE: 5887 | setp Rb(def_reg) 5888 | mov Rd(tmp_reg), 1 5889 | cmovae Rd(def_reg), Rd(tmp_reg) 5890 break; 5891 case IR_ULE: 5892 | setbe Rb(def_reg) 5893 break; 5894 case IR_UGT: 5895 | setp Rb(def_reg) 5896 | mov Rd(tmp_reg), 1 5897 | cmova Rd(def_reg), Rd(tmp_reg) 5898 break; 5899 } 5900 if (IR_REG_SPILLED(ctx->regs[def][0])) { 5901 ir_emit_store(ctx, insn->type, def, def_reg); 5902 } 5903} 5904 5905static void ir_emit_jmp_true(ir_ctx *ctx, uint32_t b, ir_ref def, uint32_t next_block) 5906{ 5907 uint32_t true_block, false_block; 5908 ir_backend_data *data = ctx->data; 5909 dasm_State **Dst = &data->dasm_state; 5910 5911 ir_get_true_false_blocks(ctx, b, &true_block, &false_block); 5912 if (true_block != next_block) { 5913 | jmp =>true_block 5914 } 5915} 5916 5917static void ir_emit_jmp_false(ir_ctx *ctx, uint32_t b, ir_ref def, uint32_t next_block) 5918{ 5919 uint32_t true_block, false_block; 5920 ir_backend_data *data = ctx->data; 5921 dasm_State **Dst = &data->dasm_state; 5922 5923 ir_get_true_false_blocks(ctx, b, &true_block, &false_block); 5924 if (false_block != next_block) { 5925 | jmp =>false_block 5926 } 5927} 5928 5929static void ir_emit_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block, uint8_t op, bool int_cmp) 5930{ 5931 uint32_t true_block, false_block; 5932 ir_backend_data *data = ctx->data; 5933 dasm_State **Dst = &data->dasm_state; 5934 5935 ir_get_true_false_blocks(ctx, b, &true_block, &false_block); 5936 if (true_block == next_block) { 5937 /* swap to avoid unconditional JMP */ 5938 if (int_cmp || op == IR_EQ || op == IR_NE) { 5939 op ^= 1; // reverse 5940 } else { 5941 op ^= 5; // reverse 5942 } 5943 true_block = false_block; 5944 false_block = 0; 5945 } else if (false_block == next_block) { 5946 false_block = 0; 5947 } 5948 5949 if (int_cmp) { 5950 switch (op) { 5951 default: 5952 IR_ASSERT(0 && "NIY binary op"); 5953 case IR_EQ: 5954 | je =>true_block 5955 break; 5956 case IR_NE: 5957 | jne =>true_block 5958 break; 5959 case IR_LT: 5960 | jl =>true_block 5961 break; 5962 case IR_GE: 5963 | jge =>true_block 5964 break; 5965 case IR_LE: 5966 | jle =>true_block 5967 break; 5968 case IR_GT: 5969 | jg =>true_block 5970 break; 5971 case IR_ULT: 5972 | jb =>true_block 5973 break; 5974 case IR_UGE: 5975 | jae =>true_block 5976 break; 5977 case IR_ULE: 5978 | jbe =>true_block 5979 break; 5980 case IR_UGT: 5981 | ja =>true_block 5982 break; 5983 } 5984 } else { 5985 switch (op) { 5986 default: 5987 IR_ASSERT(0 && "NIY binary op"); 5988 case IR_EQ: 5989 if (!false_block) { 5990 | jp >1 5991 | je =>true_block 5992 |1: 5993 } else { 5994 | jp =>false_block 5995 | je =>true_block 5996 } 5997 break; 5998 case IR_NE: 5999 | jne =>true_block 6000 | jp =>true_block 6001 break; 6002 case IR_LT: 6003 if (!false_block) { 6004 | jp >1 6005 | jb =>true_block 6006 |1: 6007 } else { 6008 | jp =>false_block 6009 | jb =>true_block 6010 } 6011 break; 6012 case IR_GE: 6013 | jae =>true_block 6014 break; 6015 case IR_LE: 6016 if (!false_block) { 6017 | jp >1 6018 | jbe =>true_block 6019 |1: 6020 } else { 6021 | jp =>false_block 6022 | jbe =>true_block 6023 } 6024 break; 6025 case IR_GT: 6026 | ja =>true_block 6027 break; 6028 case IR_ULT: 6029 | jb =>true_block 6030 break; 6031 case IR_UGE: 6032 | jp =>true_block 6033 | jae =>true_block 6034 break; 6035 case IR_ULE: 6036 | jbe =>true_block 6037 break; 6038 case IR_UGT: 6039 | jp =>true_block 6040 | ja =>true_block 6041 break; 6042 } 6043 } 6044 if (false_block) { 6045 | jmp =>false_block 6046 } 6047} 6048 6049static void ir_emit_cmp_and_branch_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block) 6050{ 6051 ir_insn *cmp_insn = &ctx->ir_base[insn->op2]; 6052 ir_op op = cmp_insn->op; 6053 ir_type type = ctx->ir_base[cmp_insn->op1].type; 6054 ir_ref op1 = cmp_insn->op1; 6055 ir_ref op2 = cmp_insn->op2; 6056 ir_reg op1_reg = ctx->regs[insn->op2][1]; 6057 ir_reg op2_reg = ctx->regs[insn->op2][2]; 6058 6059 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 6060 op1_reg = IR_REG_NUM(op1_reg); 6061 ir_emit_load(ctx, type, op1_reg, op1); 6062 } 6063 if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { 6064 op2_reg = IR_REG_NUM(op2_reg); 6065 if (op1 != op2) { 6066 ir_emit_load(ctx, type, op2_reg, op2); 6067 } 6068 } 6069 if (IR_IS_CONST_REF(op2) && !IR_IS_SYM_CONST(ctx->ir_base[op2].op) && ctx->ir_base[op2].val.u64 == 0) { 6070 if (op == IR_ULT) { 6071 /* always false */ 6072 ir_emit_jmp_false(ctx, b, def, next_block); 6073 return; 6074 } else if (op == IR_UGE) { 6075 /* always true */ 6076 ir_emit_jmp_true(ctx, b, def, next_block); 6077 return; 6078 } else if (op == IR_ULE) { 6079 op = IR_EQ; 6080 } else if (op == IR_UGT) { 6081 op = IR_NE; 6082 } 6083 } 6084 6085 bool same_comparison = 0; 6086 ir_insn *prev_insn = &ctx->ir_base[insn->op1]; 6087 if (prev_insn->op == IR_IF_TRUE || prev_insn->op == IR_IF_FALSE) { 6088 if (ir_rule(ctx, prev_insn->op1) == IR_CMP_AND_BRANCH_INT) { 6089 prev_insn = &ctx->ir_base[prev_insn->op1]; 6090 prev_insn = &ctx->ir_base[prev_insn->op2]; 6091 if (prev_insn->op1 == cmp_insn->op1 && prev_insn->op2 == cmp_insn->op2) { 6092 same_comparison = true; 6093 } 6094 } 6095 } 6096 if (!same_comparison) { 6097 ir_emit_cmp_int_common(ctx, type, def, cmp_insn, op1_reg, op1, op2_reg, op2); 6098 } 6099 ir_emit_jcc(ctx, b, def, insn, next_block, op, 1); 6100} 6101 6102static void ir_emit_test_and_branch_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block) 6103{ 6104 ir_ref op2 = insn->op2; 6105 ir_op op = ctx->ir_base[op2].op; 6106 6107 if (op >= IR_EQ && op <= IR_UGT) { 6108 op2 = ctx->ir_base[op2].op1; 6109 } else { 6110 IR_ASSERT(op == IR_AND); 6111 op = IR_NE; 6112 } 6113 6114 ir_emit_test_int_common(ctx, def, op2, op); 6115 ir_emit_jcc(ctx, b, def, insn, next_block, op, 1); 6116} 6117 6118static void ir_emit_cmp_and_branch_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block) 6119{ 6120 ir_op op = ir_emit_cmp_fp_common(ctx, def, insn->op2, &ctx->ir_base[insn->op2]); 6121 ir_emit_jcc(ctx, b, def, insn, next_block, op, 0); 6122} 6123 6124static void ir_emit_if_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block) 6125{ 6126 ir_type type = ctx->ir_base[insn->op2].type; 6127 ir_reg op2_reg = ctx->regs[def][2]; 6128 ir_backend_data *data = ctx->data; 6129 dasm_State **Dst = &data->dasm_state; 6130 6131 if (op2_reg != IR_REG_NONE) { 6132 if (IR_REG_SPILLED(op2_reg)) { 6133 op2_reg = IR_REG_NUM(op2_reg); 6134 ir_emit_load(ctx, type, op2_reg, insn->op2); 6135 } 6136 | ASM_REG_REG_OP test, type, op2_reg, op2_reg 6137 } else if (IR_IS_CONST_REF(insn->op2)) { 6138 uint32_t true_block, false_block; 6139 6140 ir_get_true_false_blocks(ctx, b, &true_block, &false_block); 6141 if (ir_const_is_true(&ctx->ir_base[insn->op2])) { 6142 if (true_block != next_block) { 6143 | jmp =>true_block 6144 } 6145 } else { 6146 if (false_block != next_block) { 6147 | jmp =>false_block 6148 } 6149 } 6150 return; 6151 } else if (ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) { 6152 uint32_t true_block, false_block; 6153 6154 ir_get_true_false_blocks(ctx, b, &true_block, &false_block); 6155 if (true_block != next_block) { 6156 | jmp =>true_block 6157 } 6158 return; 6159 } else { 6160 ir_mem mem; 6161 6162 if (ir_rule(ctx, insn->op2) & IR_FUSED) { 6163 mem = ir_fuse_load(ctx, def, insn->op2); 6164 } else { 6165 mem = ir_ref_spill_slot(ctx, insn->op2); 6166 } 6167 | ASM_MEM_IMM_OP cmp, type, mem, 0 6168 } 6169 ir_emit_jcc(ctx, b, def, insn, next_block, IR_NE, 1); 6170} 6171 6172static void ir_emit_cond(ir_ctx *ctx, ir_ref def, ir_insn *insn) 6173{ 6174 ir_backend_data *data = ctx->data; 6175 dasm_State **Dst = &data->dasm_state; 6176 ir_type type = insn->type; 6177 ir_ref op1 = insn->op1; 6178 ir_ref op2 = insn->op2; 6179 ir_ref op3 = insn->op3; 6180 ir_type op1_type = ctx->ir_base[op1].type; 6181 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 6182 ir_reg op1_reg = ctx->regs[def][1]; 6183 ir_reg op2_reg = ctx->regs[def][2]; 6184 ir_reg op3_reg = ctx->regs[def][3]; 6185 6186 IR_ASSERT(def_reg != IR_REG_NONE); 6187 6188 if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { 6189 op2_reg = IR_REG_NUM(op2_reg); 6190 ir_emit_load(ctx, type, op2_reg, op2); 6191 if (op1 == op2) { 6192 op1_reg = op2_reg; 6193 } 6194 if (op3 == op2) { 6195 op3_reg = op2_reg; 6196 } 6197 } 6198 if (op3_reg != IR_REG_NONE && op3 != op2 && IR_REG_SPILLED(op3_reg)) { 6199 op3_reg = IR_REG_NUM(op3_reg); 6200 ir_emit_load(ctx, type, op3_reg, op3); 6201 if (op1 == op2) { 6202 op1_reg = op3_reg; 6203 } 6204 } 6205 if (op1_reg != IR_REG_NONE && op1 != op2 && op1 != op3 && IR_REG_SPILLED(op1_reg)) { 6206 op1_reg = IR_REG_NUM(op1_reg); 6207 ir_emit_load(ctx, op1_type, op1_reg, op1); 6208 } 6209 6210 if (IR_IS_TYPE_INT(op1_type)) { 6211 if (op1_reg != IR_REG_NONE) { 6212 | ASM_REG_REG_OP test, op1_type, op1_reg, op1_reg 6213 } else { 6214 ir_mem mem = ir_ref_spill_slot(ctx, op1); 6215 6216 | ASM_MEM_IMM_OP cmp, op1_type, mem, 0 6217 } 6218 if (IR_IS_TYPE_INT(type)) { 6219 IR_ASSERT(op2_reg != IR_REG_NONE || op3_reg != IR_REG_NONE); 6220 if (op3_reg != IR_REG_NONE) { 6221 if (op3_reg == def_reg) { 6222 IR_ASSERT(op2_reg != IR_REG_NONE); 6223 | ASM_REG_REG_OP2 cmovne, type, def_reg, op2_reg 6224 } else { 6225 if (op2_reg != IR_REG_NONE) { 6226 if (def_reg != op2_reg) { 6227 if (IR_IS_TYPE_INT(type)) { 6228 ir_emit_mov(ctx, type, def_reg, op2_reg); 6229 } else { 6230 ir_emit_fp_mov(ctx, type, def_reg, op2_reg); 6231 } 6232 } 6233 } else if (IR_IS_CONST_REF(op2) && !IR_IS_SYM_CONST(ctx->ir_base[op2].op)) { 6234 /* prevent "xor" and flags clobbering */ 6235 ir_emit_mov_imm_int(ctx, type, def_reg, ctx->ir_base[op2].val.i64); 6236 } else { 6237 ir_emit_load_ex(ctx, type, def_reg, op2, def); 6238 } 6239 | ASM_REG_REG_OP2 cmove, type, def_reg, op3_reg 6240 } 6241 } else { 6242 IR_ASSERT(op2_reg != IR_REG_NONE && op2_reg != def_reg); 6243 if (IR_IS_CONST_REF(op3) && !IR_IS_SYM_CONST(ctx->ir_base[op3].op)) { 6244 /* prevent "xor" and flags clobbering */ 6245 ir_emit_mov_imm_int(ctx, type, def_reg, ctx->ir_base[op3].val.i64); 6246 } else { 6247 ir_emit_load_ex(ctx, type, def_reg, op3, def); 6248 } 6249 | ASM_REG_REG_OP2 cmovne, type, def_reg, op2_reg 6250 } 6251 6252 if (IR_REG_SPILLED(ctx->regs[def][0])) { 6253 ir_emit_store(ctx, type, def, def_reg); 6254 } 6255 return; 6256 } 6257 | je >2 6258 } else { 6259 if (!data->double_zero_const) { 6260 data->double_zero_const = 1; 6261 ir_rodata(ctx); 6262 |.align 16 6263 |->double_zero_const: 6264 |.dword 0, 0 6265 |.code 6266 } 6267 | ASM_FP_REG_TXT_OP ucomis, op1_type, op1_reg, [->double_zero_const] 6268 | jp >1 6269 | je >2 6270 |1: 6271 } 6272 6273 if (op2_reg != IR_REG_NONE) { 6274 if (def_reg != op2_reg) { 6275 if (IR_IS_TYPE_INT(type)) { 6276 ir_emit_mov(ctx, type, def_reg, op2_reg); 6277 } else { 6278 ir_emit_fp_mov(ctx, type, def_reg, op2_reg); 6279 } 6280 } 6281 } else { 6282 ir_emit_load_ex(ctx, type, def_reg, op2, def); 6283 } 6284 | jmp >3 6285 |2: 6286 if (op3_reg != IR_REG_NONE) { 6287 if (def_reg != op3_reg) { 6288 if (IR_IS_TYPE_INT(type)) { 6289 ir_emit_mov(ctx, type, def_reg, op3_reg); 6290 } else { 6291 ir_emit_fp_mov(ctx, type, def_reg, op3_reg); 6292 } 6293 } 6294 } else { 6295 ir_emit_load_ex(ctx, type, def_reg, op3, def); 6296 } 6297 |3: 6298 6299 if (IR_REG_SPILLED(ctx->regs[def][0])) { 6300 ir_emit_store(ctx, type, def, def_reg); 6301 } 6302} 6303 6304static void ir_emit_cond_cmp_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 6305{ 6306 ir_backend_data *data = ctx->data; 6307 dasm_State **Dst = &data->dasm_state; 6308 ir_type type = insn->type; 6309 ir_ref op2 = insn->op2; 6310 ir_ref op3 = insn->op3; 6311 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 6312 ir_reg op2_reg = ctx->regs[def][2]; 6313 ir_reg op3_reg = ctx->regs[def][3]; 6314 ir_op op; 6315 6316 if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { 6317 op2_reg = IR_REG_NUM(op2_reg); 6318 ir_emit_load(ctx, type, op2_reg, op2); 6319 if (op3 == op2) { 6320 op3_reg = op2_reg; 6321 } 6322 } 6323 if (op3_reg != IR_REG_NONE && op3 != op2 && IR_REG_SPILLED(op3_reg)) { 6324 op3_reg = IR_REG_NUM(op3_reg); 6325 ir_emit_load(ctx, type, op3_reg, op3); 6326 } 6327 6328 ir_emit_cmp_int_common2(ctx, def, insn->op1, &ctx->ir_base[insn->op1]); 6329 op = ctx->ir_base[insn->op1].op; 6330 6331 if (IR_IS_TYPE_INT(type)) { 6332 if (op3_reg != IR_REG_NONE) { 6333 if (op3_reg == def_reg) { 6334 IR_ASSERT(op2_reg != IR_REG_NONE); 6335 op3_reg = op2_reg; 6336 op ^= 1; // reverse 6337 } else { 6338 if (op2_reg != IR_REG_NONE) { 6339 if (def_reg != op2_reg) { 6340// if (IR_IS_TYPE_INT(type)) { 6341 ir_emit_mov(ctx, type, def_reg, op2_reg); 6342// } else { 6343// ir_emit_fp_mov(ctx, type, def_reg, op2_reg); 6344// } 6345 } 6346 } else if (IR_IS_CONST_REF(op2) && !IR_IS_SYM_CONST(ctx->ir_base[op2].op)) { 6347 /* prevent "xor" and flags clobbering */ 6348 ir_emit_mov_imm_int(ctx, type, def_reg, ctx->ir_base[op2].val.i64); 6349 } else { 6350 ir_emit_load_ex(ctx, type, def_reg, op2, def); 6351 } 6352 } 6353 } else { 6354 IR_ASSERT(op2_reg != IR_REG_NONE && op2_reg != def_reg); 6355 if (IR_IS_CONST_REF(op3) && !IR_IS_SYM_CONST(ctx->ir_base[op3].op)) { 6356 /* prevent "xor" and flags clobbering */ 6357 ir_emit_mov_imm_int(ctx, type, def_reg, ctx->ir_base[op3].val.i64); 6358 } else { 6359 ir_emit_load_ex(ctx, type, def_reg, op3, def); 6360 } 6361 op3_reg = op2_reg; 6362 op ^= 1; // reverse 6363 } 6364 6365 switch (op) { 6366 default: 6367 IR_ASSERT(0 && "NIY binary op"); 6368 case IR_EQ: 6369 | ASM_REG_REG_OP2 cmovne, type, def_reg, op3_reg 6370 break; 6371 case IR_NE: 6372 | ASM_REG_REG_OP2 cmove, type, def_reg, op3_reg 6373 break; 6374 case IR_LT: 6375 | ASM_REG_REG_OP2 cmovge, type, def_reg, op3_reg 6376 break; 6377 case IR_GE: 6378 | ASM_REG_REG_OP2 cmovl, type, def_reg, op3_reg 6379 break; 6380 case IR_LE: 6381 | ASM_REG_REG_OP2 cmovg, type, def_reg, op3_reg 6382 break; 6383 case IR_GT: 6384 | ASM_REG_REG_OP2 cmovle, type, def_reg, op3_reg 6385 break; 6386 case IR_ULT: 6387 | ASM_REG_REG_OP2 cmovae, type, def_reg, op3_reg 6388 break; 6389 case IR_UGE: 6390 | ASM_REG_REG_OP2 cmovb, type, def_reg, op3_reg 6391 break; 6392 case IR_ULE: 6393 | ASM_REG_REG_OP2 cmova, type, def_reg, op3_reg 6394 break; 6395 case IR_UGT: 6396 | ASM_REG_REG_OP2 cmovbe, type, def_reg, op3_reg 6397 break; 6398 } 6399 } else { 6400 switch (op) { 6401 default: 6402 IR_ASSERT(0 && "NIY binary op"); 6403 case IR_EQ: 6404 | jne >2 6405 break; 6406 case IR_NE: 6407 | je >2 6408 break; 6409 case IR_LT: 6410 | jge >2 6411 break; 6412 case IR_GE: 6413 | jl >2 6414 break; 6415 case IR_LE: 6416 | jg >2 6417 break; 6418 case IR_GT: 6419 | jle >2 6420 break; 6421 case IR_ULT: 6422 | jae >2 6423 break; 6424 case IR_UGE: 6425 | jb >2 6426 break; 6427 case IR_ULE: 6428 | ja >2 6429 break; 6430 case IR_UGT: 6431 | jbe >2 6432 break; 6433 } 6434 |1: 6435 6436 if (op2_reg != IR_REG_NONE) { 6437 if (def_reg != op2_reg) { 6438 if (IR_IS_TYPE_INT(type)) { 6439 ir_emit_mov(ctx, type, def_reg, op2_reg); 6440 } else { 6441 ir_emit_fp_mov(ctx, type, def_reg, op2_reg); 6442 } 6443 } 6444 } else { 6445 ir_emit_load_ex(ctx, type, def_reg, op2, def); 6446 } 6447 | jmp >3 6448 |2: 6449 if (op3_reg != IR_REG_NONE) { 6450 if (def_reg != op3_reg) { 6451 if (IR_IS_TYPE_INT(type)) { 6452 ir_emit_mov(ctx, type, def_reg, op3_reg); 6453 } else { 6454 ir_emit_fp_mov(ctx, type, def_reg, op3_reg); 6455 } 6456 } 6457 } else { 6458 ir_emit_load_ex(ctx, type, def_reg, op3, def); 6459 } 6460 |3: 6461 } 6462 6463 if (IR_REG_SPILLED(ctx->regs[def][0])) { 6464 ir_emit_store(ctx, type, def, def_reg); 6465 } 6466} 6467 6468static void ir_emit_cond_cmp_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) 6469{ 6470 ir_backend_data *data = ctx->data; 6471 dasm_State **Dst = &data->dasm_state; 6472 ir_type type = insn->type; 6473 ir_ref op2 = insn->op2; 6474 ir_ref op3 = insn->op3; 6475 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 6476 ir_reg op2_reg = ctx->regs[def][2]; 6477 ir_reg op3_reg = ctx->regs[def][3]; 6478 ir_op op; 6479 6480 if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { 6481 op2_reg = IR_REG_NUM(op2_reg); 6482 ir_emit_load(ctx, type, op2_reg, op2); 6483 if (op3 == op2) { 6484 op3_reg = op2_reg; 6485 } 6486 } 6487 if (op3_reg != IR_REG_NONE && op3 != op2 && IR_REG_SPILLED(op3_reg)) { 6488 op3_reg = IR_REG_NUM(op3_reg); 6489 ir_emit_load(ctx, type, op3_reg, op3); 6490 } 6491 6492 op = ir_emit_cmp_fp_common(ctx, def, insn->op1, &ctx->ir_base[insn->op1]); 6493 6494 switch (op) { 6495 default: 6496 IR_ASSERT(0 && "NIY binary op"); 6497 case IR_EQ: 6498 | jne >2 6499 | jp >2 6500 break; 6501 case IR_NE: 6502 | jp >1 6503 | je >2 6504 break; 6505 case IR_LT: 6506 | jp >2 6507 | jae >2 6508 break; 6509 case IR_GE: 6510 | jb >2 6511 break; 6512 case IR_LE: 6513 | jp >2 6514 | ja >2 6515 break; 6516 case IR_GT: 6517 | jbe >2 6518 break; 6519 case IR_ULT: 6520 | jae >2 6521 break; 6522 case IR_UGE: 6523 | jp >1 6524 | jb >2 6525 break; 6526 case IR_ULE: 6527 | ja >2 6528 break; 6529 case IR_UGT: 6530 | jp >1 6531 | jbe >2 6532 break; 6533 } 6534 |1: 6535 6536 if (op2_reg != IR_REG_NONE) { 6537 if (def_reg != op2_reg) { 6538 if (IR_IS_TYPE_INT(type)) { 6539 ir_emit_mov(ctx, type, def_reg, op2_reg); 6540 } else { 6541 ir_emit_fp_mov(ctx, type, def_reg, op2_reg); 6542 } 6543 } 6544 } else { 6545 ir_emit_load_ex(ctx, type, def_reg, op2, def); 6546 } 6547 | jmp >3 6548 |2: 6549 if (op3_reg != IR_REG_NONE) { 6550 if (def_reg != op3_reg) { 6551 if (IR_IS_TYPE_INT(type)) { 6552 ir_emit_mov(ctx, type, def_reg, op3_reg); 6553 } else { 6554 ir_emit_fp_mov(ctx, type, def_reg, op3_reg); 6555 } 6556 } 6557 } else { 6558 ir_emit_load_ex(ctx, type, def_reg, op3, def); 6559 } 6560 |3: 6561 6562 if (IR_REG_SPILLED(ctx->regs[def][0])) { 6563 ir_emit_store(ctx, type, def, def_reg); 6564 } 6565} 6566 6567static void ir_emit_return_void(ir_ctx *ctx) 6568{ 6569 ir_backend_data *data = ctx->data; 6570 dasm_State **Dst = &data->dasm_state; 6571 6572 ir_emit_epilogue(ctx); 6573 6574#ifdef IR_TARGET_X86 6575 if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC) && ctx->param_stack_size) { 6576 | ret ctx->param_stack_size 6577 return; 6578 } 6579#endif 6580 6581 | ret 6582} 6583 6584static void ir_emit_return_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn) 6585{ 6586 ir_reg op2_reg = ctx->regs[ref][2]; 6587 6588 if (op2_reg != IR_REG_INT_RET1) { 6589 ir_type type = ctx->ir_base[insn->op2].type; 6590 6591 if (op2_reg != IR_REG_NONE && !IR_REG_SPILLED(op2_reg)) { 6592 ir_emit_mov(ctx, type, IR_REG_INT_RET1, op2_reg); 6593 } else { 6594 ir_emit_load(ctx, type, IR_REG_INT_RET1, insn->op2); 6595 } 6596 } 6597 ir_emit_return_void(ctx); 6598} 6599 6600static void ir_emit_return_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn) 6601{ 6602 ir_reg op2_reg = ctx->regs[ref][2]; 6603 ir_type type = ctx->ir_base[insn->op2].type; 6604 6605#ifdef IR_REG_FP_RET1 6606 if (op2_reg != IR_REG_FP_RET1) { 6607 if (op2_reg != IR_REG_NONE && !IR_REG_SPILLED(op2_reg)) { 6608 ir_emit_fp_mov(ctx, type, IR_REG_FP_RET1, op2_reg); 6609 } else { 6610 ir_emit_load(ctx, type, IR_REG_FP_RET1, insn->op2); 6611 } 6612 } 6613#else 6614 ir_backend_data *data = ctx->data; 6615 dasm_State **Dst = &data->dasm_state; 6616 6617 if (op2_reg == IR_REG_NONE || IR_REG_SPILLED(op2_reg)) { 6618 ir_reg fp; 6619 int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op2, &fp); 6620 6621 if (type == IR_DOUBLE) { 6622 | fld qword [Ra(fp)+offset] 6623 } else { 6624 IR_ASSERT(type == IR_FLOAT); 6625 | fld dword [Ra(fp)+offset] 6626 } 6627 } else { 6628 int32_t offset = ctx->ret_slot; 6629 ir_reg fp; 6630 6631 IR_ASSERT(offset != -1); 6632 offset = IR_SPILL_POS_TO_OFFSET(offset); 6633 fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 6634 ir_emit_store_mem_fp(ctx, type, IR_MEM_BO(fp, offset), op2_reg); 6635 if (type == IR_DOUBLE) { 6636 | fld qword [Ra(fp)+offset] 6637 } else { 6638 IR_ASSERT(type == IR_FLOAT); 6639 | fld dword [Ra(fp)+offset] 6640 } 6641 } 6642#endif 6643 ir_emit_return_void(ctx); 6644} 6645 6646static void ir_emit_sext(ir_ctx *ctx, ir_ref def, ir_insn *insn) 6647{ 6648 ir_type dst_type = insn->type; 6649 ir_type src_type = ctx->ir_base[insn->op1].type; 6650 ir_backend_data *data = ctx->data; 6651 dasm_State **Dst = &data->dasm_state; 6652 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 6653 ir_reg op1_reg = ctx->regs[def][1]; 6654 6655 IR_ASSERT(IR_IS_TYPE_INT(src_type)); 6656 IR_ASSERT(IR_IS_TYPE_INT(dst_type)); 6657 IR_ASSERT(ir_type_size[dst_type] > ir_type_size[src_type]); 6658 IR_ASSERT(def_reg != IR_REG_NONE); 6659 6660 if (op1_reg != IR_REG_NONE) { 6661 if (IR_REG_SPILLED(op1_reg)) { 6662 op1_reg = IR_REG_NUM(op1_reg); 6663 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 6664 } 6665 if (ir_type_size[src_type] == 1) { 6666 if (ir_type_size[dst_type] == 2) { 6667 | movsx Rw(def_reg), Rb(op1_reg) 6668 } else if (ir_type_size[dst_type] == 4) { 6669 | movsx Rd(def_reg), Rb(op1_reg) 6670 } else { 6671 IR_ASSERT(ir_type_size[dst_type] == 8); 6672 IR_ASSERT(sizeof(void*) == 8); 6673|.if X64 6674 | movsx Rq(def_reg), Rb(op1_reg) 6675|.endif 6676 } 6677 } else if (ir_type_size[src_type] == 2) { 6678 if (ir_type_size[dst_type] == 4) { 6679 | movsx Rd(def_reg), Rw(op1_reg) 6680 } else { 6681 IR_ASSERT(ir_type_size[dst_type] == 8); 6682 IR_ASSERT(sizeof(void*) == 8); 6683|.if X64 6684 | movsx Rq(def_reg), Rw(op1_reg) 6685|.endif 6686 } 6687 } else { 6688 IR_ASSERT(ir_type_size[src_type] == 4); 6689 IR_ASSERT(ir_type_size[dst_type] == 8); 6690 IR_ASSERT(sizeof(void*) == 8); 6691|.if X64 6692 | movsxd Rq(def_reg), Rd(op1_reg) 6693|.endif 6694 } 6695 } else if (IR_IS_CONST_REF(insn->op1)) { 6696 IR_ASSERT(0); 6697 } else { 6698 ir_mem mem; 6699 6700 if (ir_rule(ctx, insn->op1) & IR_FUSED) { 6701 mem = ir_fuse_load(ctx, def, insn->op1); 6702 } else { 6703 mem = ir_ref_spill_slot(ctx, insn->op1); 6704 } 6705 6706 if (ir_type_size[src_type] == 1) { 6707 if (ir_type_size[dst_type] == 2) { 6708 | ASM_TXT_TMEM_OP movsx, Rw(def_reg), byte, mem 6709 } else if (ir_type_size[dst_type] == 4) { 6710 | ASM_TXT_TMEM_OP movsx, Rd(def_reg), byte, mem 6711 } else { 6712 IR_ASSERT(ir_type_size[dst_type] == 8); 6713 IR_ASSERT(sizeof(void*) == 8); 6714|.if X64 6715 | ASM_TXT_TMEM_OP movsx, Rq(def_reg), byte, mem 6716|.endif 6717 } 6718 } else if (ir_type_size[src_type] == 2) { 6719 if (ir_type_size[dst_type] == 4) { 6720 | ASM_TXT_TMEM_OP movsx, Rd(def_reg), word, mem 6721 } else { 6722 IR_ASSERT(ir_type_size[dst_type] == 8); 6723 IR_ASSERT(sizeof(void*) == 8); 6724|.if X64 6725 | ASM_TXT_TMEM_OP movsx, Rq(def_reg), word, mem 6726|.endif 6727 } 6728 } else { 6729 IR_ASSERT(ir_type_size[src_type] == 4); 6730 IR_ASSERT(ir_type_size[dst_type] == 8); 6731 IR_ASSERT(sizeof(void*) == 8); 6732|.if X64 6733 | ASM_TXT_TMEM_OP movsxd, Rq(def_reg), dword, mem 6734|.endif 6735 } 6736 } 6737 if (IR_REG_SPILLED(ctx->regs[def][0])) { 6738 ir_emit_store(ctx, dst_type, def, def_reg); 6739 } 6740} 6741 6742static void ir_emit_zext(ir_ctx *ctx, ir_ref def, ir_insn *insn) 6743{ 6744 ir_type dst_type = insn->type; 6745 ir_type src_type = ctx->ir_base[insn->op1].type; 6746 ir_backend_data *data = ctx->data; 6747 dasm_State **Dst = &data->dasm_state; 6748 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 6749 ir_reg op1_reg = ctx->regs[def][1]; 6750 6751 IR_ASSERT(IR_IS_TYPE_INT(src_type)); 6752 IR_ASSERT(IR_IS_TYPE_INT(dst_type)); 6753 IR_ASSERT(ir_type_size[dst_type] > ir_type_size[src_type]); 6754 IR_ASSERT(def_reg != IR_REG_NONE); 6755 6756 if (op1_reg != IR_REG_NONE) { 6757 if (IR_REG_SPILLED(op1_reg)) { 6758 op1_reg = IR_REG_NUM(op1_reg); 6759 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 6760 } 6761 if (ir_type_size[src_type] == 1) { 6762 if (ir_type_size[dst_type] == 2) { 6763 | movzx Rw(def_reg), Rb(op1_reg) 6764 } else if (ir_type_size[dst_type] == 4) { 6765 | movzx Rd(def_reg), Rb(op1_reg) 6766 } else { 6767 IR_ASSERT(ir_type_size[dst_type] == 8); 6768 IR_ASSERT(sizeof(void*) == 8); 6769|.if X64 6770 | movzx Rq(def_reg), Rb(op1_reg) 6771|.endif 6772 } 6773 } else if (ir_type_size[src_type] == 2) { 6774 if (ir_type_size[dst_type] == 4) { 6775 | movzx Rd(def_reg), Rw(op1_reg) 6776 } else { 6777 IR_ASSERT(ir_type_size[dst_type] == 8); 6778 IR_ASSERT(sizeof(void*) == 8); 6779|.if X64 6780 | movzx Rq(def_reg), Rw(op1_reg) 6781|.endif 6782 } 6783 } else { 6784 IR_ASSERT(ir_type_size[src_type] == 4); 6785 IR_ASSERT(ir_type_size[dst_type] == 8); 6786 IR_ASSERT(sizeof(void*) == 8); 6787|.if X64 6788 /* Avoid zero extension to the same register. This may be not always safe ??? */ 6789 if (op1_reg != def_reg) { 6790 | mov Rd(def_reg), Rd(op1_reg) 6791 } 6792|.endif 6793 } 6794 } else if (IR_IS_CONST_REF(insn->op1)) { 6795 IR_ASSERT(0); 6796 } else { 6797 ir_mem mem; 6798 6799 if (ir_rule(ctx, insn->op1) & IR_FUSED) { 6800 mem = ir_fuse_load(ctx, def, insn->op1); 6801 } else { 6802 mem = ir_ref_spill_slot(ctx, insn->op1); 6803 } 6804 6805 if (ir_type_size[src_type] == 1) { 6806 if (ir_type_size[dst_type] == 2) { 6807 | ASM_TXT_TMEM_OP movzx, Rw(def_reg), byte, mem 6808 } else if (ir_type_size[dst_type] == 4) { 6809 | ASM_TXT_TMEM_OP movzx, Rd(def_reg), byte, mem 6810 } else { 6811 IR_ASSERT(ir_type_size[dst_type] == 8); 6812 IR_ASSERT(sizeof(void*) == 8); 6813|.if X64 6814 | ASM_TXT_TMEM_OP movzx, Rq(def_reg), byte, mem 6815|.endif 6816 } 6817 } else if (ir_type_size[src_type] == 2) { 6818 if (ir_type_size[dst_type] == 4) { 6819 | ASM_TXT_TMEM_OP movzx, Rd(def_reg), word, mem 6820 } else { 6821 IR_ASSERT(ir_type_size[dst_type] == 8); 6822 IR_ASSERT(sizeof(void*) == 8); 6823|.if X64 6824 | ASM_TXT_TMEM_OP movzx, Rq(def_reg), word, mem 6825|.endif 6826 } 6827 } else { 6828 IR_ASSERT(ir_type_size[src_type] == 4); 6829 IR_ASSERT(ir_type_size[dst_type] == 8); 6830|.if X64 6831 | ASM_TXT_TMEM_OP mov, Rd(def_reg), dword, mem 6832|.endif 6833 } 6834 } 6835 if (IR_REG_SPILLED(ctx->regs[def][0])) { 6836 ir_emit_store(ctx, dst_type, def, def_reg); 6837 } 6838} 6839 6840static void ir_emit_trunc(ir_ctx *ctx, ir_ref def, ir_insn *insn) 6841{ 6842 ir_type dst_type = insn->type; 6843 ir_type src_type = ctx->ir_base[insn->op1].type; 6844 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 6845 ir_reg op1_reg = ctx->regs[def][1]; 6846 6847 IR_ASSERT(IR_IS_TYPE_INT(src_type)); 6848 IR_ASSERT(IR_IS_TYPE_INT(dst_type)); 6849 IR_ASSERT(ir_type_size[dst_type] < ir_type_size[src_type]); 6850 IR_ASSERT(def_reg != IR_REG_NONE); 6851 if (op1_reg != IR_REG_NONE) { 6852 if (IR_REG_SPILLED(op1_reg)) { 6853 op1_reg = IR_REG_NUM(op1_reg); 6854 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 6855 } 6856 if (op1_reg != def_reg) { 6857 ir_emit_mov(ctx, dst_type, def_reg, op1_reg); 6858 } 6859 } else { 6860 ir_emit_load_ex(ctx, dst_type, def_reg, insn->op1, def); 6861 } 6862 if (IR_REG_SPILLED(ctx->regs[def][0])) { 6863 ir_emit_store(ctx, dst_type, def, def_reg); 6864 } 6865} 6866 6867static void ir_emit_bitcast(ir_ctx *ctx, ir_ref def, ir_insn *insn) 6868{ 6869 ir_type dst_type = insn->type; 6870 ir_type src_type = ctx->ir_base[insn->op1].type; 6871 ir_backend_data *data = ctx->data; 6872 dasm_State **Dst = &data->dasm_state; 6873 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 6874 ir_reg op1_reg = ctx->regs[def][1]; 6875 6876 IR_ASSERT(ir_type_size[dst_type] == ir_type_size[src_type]); 6877 IR_ASSERT(def_reg != IR_REG_NONE); 6878 if (IR_IS_TYPE_INT(src_type) && IR_IS_TYPE_INT(dst_type)) { 6879 if (op1_reg != IR_REG_NONE) { 6880 if (IR_REG_SPILLED(op1_reg)) { 6881 op1_reg = IR_REG_NUM(op1_reg); 6882 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 6883 } 6884 if (op1_reg != def_reg) { 6885 ir_emit_mov(ctx, dst_type, def_reg, op1_reg); 6886 } 6887 } else { 6888 ir_emit_load_ex(ctx, dst_type, def_reg, insn->op1, def); 6889 } 6890 } else if (IR_IS_TYPE_FP(src_type) && IR_IS_TYPE_FP(dst_type)) { 6891 if (op1_reg != IR_REG_NONE) { 6892 if (IR_REG_SPILLED(op1_reg)) { 6893 op1_reg = IR_REG_NUM(op1_reg); 6894 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 6895 } 6896 if (op1_reg != def_reg) { 6897 ir_emit_fp_mov(ctx, dst_type, def_reg, op1_reg); 6898 } 6899 } else { 6900 ir_emit_load_ex(ctx, dst_type, def_reg, insn->op1, def); 6901 } 6902 } else if (IR_IS_TYPE_FP(src_type)) { 6903 IR_ASSERT(IR_IS_TYPE_INT(dst_type)); 6904 if (op1_reg != IR_REG_NONE) { 6905 if (IR_REG_SPILLED(op1_reg)) { 6906 op1_reg = IR_REG_NUM(op1_reg); 6907 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 6908 } 6909 if (src_type == IR_DOUBLE) { 6910 IR_ASSERT(sizeof(void*) == 8); 6911|.if X64 6912 if (ctx->mflags & IR_X86_AVX) { 6913 | vmovd Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) 6914 } else { 6915 | movd Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) 6916 } 6917|.endif 6918 } else { 6919 IR_ASSERT(src_type == IR_FLOAT); 6920 if (ctx->mflags & IR_X86_AVX) { 6921 | vmovd Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) 6922 } else { 6923 | movd Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) 6924 } 6925 } 6926 } else if (IR_IS_CONST_REF(insn->op1)) { 6927 ir_insn *_insn = &ctx->ir_base[insn->op1]; 6928 IR_ASSERT(!IR_IS_SYM_CONST(_insn->op)); 6929 if (src_type == IR_DOUBLE) { 6930 IR_ASSERT(sizeof(void*) == 8); 6931|.if X64 6932 | mov64 Rq(def_reg), _insn->val.i64 6933|.endif 6934 } else { 6935 IR_ASSERT(src_type == IR_FLOAT); 6936 | mov Rd(def_reg), _insn->val.i32 6937 } 6938 } else { 6939 ir_mem mem; 6940 6941 if (ir_rule(ctx, insn->op1) & IR_FUSED) { 6942 mem = ir_fuse_load(ctx, def, insn->op1); 6943 } else { 6944 mem = ir_ref_spill_slot(ctx, insn->op1); 6945 } 6946 6947 if (src_type == IR_DOUBLE) { 6948 IR_ASSERT(sizeof(void*) == 8); 6949|.if X64 6950 | ASM_TXT_TMEM_OP mov, Rq(def_reg), qword, mem 6951|.endif 6952 } else { 6953 IR_ASSERT(src_type == IR_FLOAT); 6954 | ASM_TXT_TMEM_OP mov, Rd(def_reg), dword, mem 6955 } 6956 } 6957 } else if (IR_IS_TYPE_FP(dst_type)) { 6958 IR_ASSERT(IR_IS_TYPE_INT(src_type)); 6959 if (op1_reg != IR_REG_NONE) { 6960 if (IR_REG_SPILLED(op1_reg)) { 6961 op1_reg = IR_REG_NUM(op1_reg); 6962 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 6963 } 6964 if (dst_type == IR_DOUBLE) { 6965 IR_ASSERT(sizeof(void*) == 8); 6966|.if X64 6967 if (ctx->mflags & IR_X86_AVX) { 6968 | vmovd xmm(def_reg-IR_REG_FP_FIRST), Rq(op1_reg) 6969 } else { 6970 | movd xmm(def_reg-IR_REG_FP_FIRST), Rq(op1_reg) 6971 } 6972|.endif 6973 } else { 6974 IR_ASSERT(dst_type == IR_FLOAT); 6975 if (ctx->mflags & IR_X86_AVX) { 6976 | vmovd xmm(def_reg-IR_REG_FP_FIRST), Rd(op1_reg) 6977 } else { 6978 | movd xmm(def_reg-IR_REG_FP_FIRST), Rd(op1_reg) 6979 } 6980 } 6981 } else if (IR_IS_CONST_REF(insn->op1)) { 6982 int label = ir_const_label(ctx, insn->op1); 6983 6984 | ASM_FP_REG_TXT_OP movs, dst_type, def_reg, [=>label] 6985 } else { 6986 ir_mem mem; 6987 6988 if (ir_rule(ctx, insn->op1) & IR_FUSED) { 6989 mem = ir_fuse_load(ctx, def, insn->op1); 6990 } else { 6991 mem = ir_ref_spill_slot(ctx, insn->op1); 6992 } 6993 6994 | ASM_FP_REG_MEM_OP movs, dst_type, def_reg, mem 6995 } 6996 } 6997 if (IR_REG_SPILLED(ctx->regs[def][0])) { 6998 ir_emit_store(ctx, dst_type, def, def_reg); 6999 } 7000} 7001 7002static void ir_emit_int2fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) 7003{ 7004 ir_type dst_type = insn->type; 7005 ir_type src_type = ctx->ir_base[insn->op1].type; 7006 ir_backend_data *data = ctx->data; 7007 dasm_State **Dst = &data->dasm_state; 7008 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 7009 ir_reg op1_reg = ctx->regs[def][1]; 7010 7011 IR_ASSERT(IR_IS_TYPE_INT(src_type)); 7012 IR_ASSERT(IR_IS_TYPE_FP(dst_type)); 7013 IR_ASSERT(def_reg != IR_REG_NONE); 7014 if (op1_reg != IR_REG_NONE) { 7015 bool src64 = 0; 7016 7017 if (IR_REG_SPILLED(op1_reg)) { 7018 op1_reg = IR_REG_NUM(op1_reg); 7019 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 7020 } 7021 if (IR_IS_TYPE_SIGNED(src_type)) { 7022 if (ir_type_size[src_type] < 4) { 7023|.if X64 7024|| if (ir_type_size[src_type] == 1) { 7025 | movsx Rq(op1_reg), Rb(op1_reg) 7026|| } else { 7027 | movsx Rq(op1_reg), Rw(op1_reg) 7028|| } 7029|| src64 = 1; 7030|.else 7031|| if (ir_type_size[src_type] == 1) { 7032 | movsx Rd(op1_reg), Rb(op1_reg) 7033|| } else { 7034 | movsx Rd(op1_reg), Rw(op1_reg) 7035|| } 7036|.endif 7037 } else if (ir_type_size[src_type] > 4) { 7038 src64 = 1; 7039 } 7040 } else { 7041 if (ir_type_size[src_type] < 8) { 7042|.if X64 7043|| if (ir_type_size[src_type] == 1) { 7044 | movzx Rq(op1_reg), Rb(op1_reg) 7045|| } else if (ir_type_size[src_type] == 2) { 7046 | movzx Rq(op1_reg), Rw(op1_reg) 7047|| } 7048|| src64 = 1; 7049|.else 7050|| if (ir_type_size[src_type] == 1) { 7051 | movzx Rd(op1_reg), Rb(op1_reg) 7052|| } else if (ir_type_size[src_type] == 2) { 7053 | movzx Rd(op1_reg), Rw(op1_reg) 7054|| } 7055|.endif 7056 } else { 7057 // TODO: uint64_t -> double 7058 src64 = 1; 7059 } 7060 } 7061 if (!src64) { 7062 if (dst_type == IR_DOUBLE) { 7063 if (ctx->mflags & IR_X86_AVX) { 7064 | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 7065 | vcvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), Rd(op1_reg) 7066 } else { 7067 | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 7068 | cvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), Rd(op1_reg) 7069 } 7070 } else { 7071 IR_ASSERT(dst_type == IR_FLOAT); 7072 if (ctx->mflags & IR_X86_AVX) { 7073 | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 7074 | vcvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), Rd(op1_reg) 7075 } else { 7076 | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 7077 | cvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), Rd(op1_reg) 7078 } 7079 } 7080 } else { 7081 IR_ASSERT(sizeof(void*) == 8); 7082|.if X64 7083 if (dst_type == IR_DOUBLE) { 7084 if (ctx->mflags & IR_X86_AVX) { 7085 | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 7086 | vcvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), Rq(op1_reg) 7087 } else { 7088 | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 7089 | cvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), Rq(op1_reg) 7090 } 7091 } else { 7092 IR_ASSERT(dst_type == IR_FLOAT); 7093 if (ctx->mflags & IR_X86_AVX) { 7094 | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 7095 | vcvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), Rq(op1_reg) 7096 } else { 7097 | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 7098 | cvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), Rq(op1_reg) 7099 } 7100 } 7101|.endif 7102 } 7103 } else { 7104 ir_mem mem; 7105 bool src64 = ir_type_size[src_type] == 8; 7106 7107 if (ir_rule(ctx, insn->op1) & IR_FUSED) { 7108 mem = ir_fuse_load(ctx, def, insn->op1); 7109 } else { 7110 mem = ir_ref_spill_slot(ctx, insn->op1); 7111 } 7112 7113 if (!src64) { 7114 if (dst_type == IR_DOUBLE) { 7115 if (ctx->mflags & IR_X86_AVX) { 7116 | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 7117 | ASM_TXT_TXT_TMEM_OP vcvtsi2sd, xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), dword, mem 7118 } else { 7119 | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 7120 | ASM_TXT_TMEM_OP cvtsi2sd, xmm(def_reg-IR_REG_FP_FIRST), dword, mem 7121 } 7122 } else { 7123 IR_ASSERT(dst_type == IR_FLOAT); 7124 if (ctx->mflags & IR_X86_AVX) { 7125 | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 7126 | ASM_TXT_TXT_TMEM_OP vcvtsi2ss, xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), dword, mem 7127 } else { 7128 | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 7129 | ASM_TXT_TMEM_OP cvtsi2ss, xmm(def_reg-IR_REG_FP_FIRST), dword, mem 7130 } 7131 } 7132 } else { 7133 IR_ASSERT(sizeof(void*) == 8); 7134|.if X64 7135 if (dst_type == IR_DOUBLE) { 7136 if (ctx->mflags & IR_X86_AVX) { 7137 | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 7138 | ASM_TXT_TXT_TMEM_OP vcvtsi2sd, xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), qword, mem 7139 } else { 7140 | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 7141 | ASM_TXT_TMEM_OP cvtsi2sd, xmm(def_reg-IR_REG_FP_FIRST), qword, mem 7142 } 7143 } else { 7144 IR_ASSERT(dst_type == IR_FLOAT); 7145 if (ctx->mflags & IR_X86_AVX) { 7146 | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 7147 | ASM_TXT_TXT_TMEM_OP vcvtsi2ss, xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), qword, mem 7148 } else { 7149 | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 7150 | ASM_TXT_TMEM_OP cvtsi2ss, xmm(def_reg-IR_REG_FP_FIRST), qword, mem 7151 } 7152 } 7153|.endif 7154 } 7155 } 7156 if (IR_REG_SPILLED(ctx->regs[def][0])) { 7157 ir_emit_store(ctx, dst_type, def, def_reg); 7158 } 7159} 7160 7161static void ir_emit_fp2int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 7162{ 7163 ir_type dst_type = insn->type; 7164 ir_type src_type = ctx->ir_base[insn->op1].type; 7165 ir_backend_data *data = ctx->data; 7166 dasm_State **Dst = &data->dasm_state; 7167 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 7168 ir_reg op1_reg = ctx->regs[def][1]; 7169 bool dst64 = 0; 7170 7171 IR_ASSERT(IR_IS_TYPE_FP(src_type)); 7172 IR_ASSERT(IR_IS_TYPE_INT(dst_type)); 7173 IR_ASSERT(def_reg != IR_REG_NONE); 7174 if (IR_IS_TYPE_SIGNED(dst_type) ? ir_type_size[dst_type] == 8 : ir_type_size[dst_type] >= 4) { 7175 // TODO: we might need to perform truncation from 32/64 bit integer 7176 dst64 = 1; 7177 } 7178 if (op1_reg != IR_REG_NONE) { 7179 if (IR_REG_SPILLED(op1_reg)) { 7180 op1_reg = IR_REG_NUM(op1_reg); 7181 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 7182 } 7183 if (!dst64) { 7184 if (src_type == IR_DOUBLE) { 7185 if (ctx->mflags & IR_X86_AVX) { 7186 | vcvttsd2si Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) 7187 } else { 7188 | cvttsd2si Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) 7189 } 7190 } else { 7191 IR_ASSERT(src_type == IR_FLOAT); 7192 if (ctx->mflags & IR_X86_AVX) { 7193 | vcvttss2si Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) 7194 } else { 7195 | cvttss2si Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) 7196 } 7197 } 7198 } else { 7199 IR_ASSERT(sizeof(void*) == 8); 7200|.if X64 7201 if (src_type == IR_DOUBLE) { 7202 if (ctx->mflags & IR_X86_AVX) { 7203 | vcvttsd2si Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) 7204 } else { 7205 | cvttsd2si Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) 7206 } 7207 } else { 7208 IR_ASSERT(src_type == IR_FLOAT); 7209 if (ctx->mflags & IR_X86_AVX) { 7210 | vcvttss2si Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) 7211 } else { 7212 | cvttss2si Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) 7213 } 7214 } 7215|.endif 7216 } 7217 } else if (IR_IS_CONST_REF(insn->op1)) { 7218 int label = ir_const_label(ctx, insn->op1); 7219 7220 if (!dst64) { 7221 if (src_type == IR_DOUBLE) { 7222 if (ctx->mflags & IR_X86_AVX) { 7223 | vcvttsd2si Rd(def_reg), qword [=>label] 7224 } else { 7225 | cvttsd2si Rd(def_reg), qword [=>label] 7226 } 7227 } else { 7228 IR_ASSERT(src_type == IR_FLOAT); 7229 if (ctx->mflags & IR_X86_AVX) { 7230 | vcvttss2si Rd(def_reg), dword [=>label] 7231 } else { 7232 | cvttss2si Rd(def_reg), dword [=>label] 7233 } 7234 } 7235 } else { 7236 IR_ASSERT(sizeof(void*) == 8); 7237|.if X64 7238 if (src_type == IR_DOUBLE) { 7239 if (ctx->mflags & IR_X86_AVX) { 7240 | vcvttsd2si Rq(def_reg), qword [=>label] 7241 } else { 7242 | cvttsd2si Rq(def_reg), qword [=>label] 7243 } 7244 } else { 7245 IR_ASSERT(src_type == IR_FLOAT); 7246 if (ctx->mflags & IR_X86_AVX) { 7247 | vcvttss2si Rq(def_reg), dword [=>label] 7248 } else { 7249 | cvttss2si Rq(def_reg), dword [=>label] 7250 } 7251 } 7252|.endif 7253 } 7254 } else { 7255 ir_mem mem; 7256 7257 if (ir_rule(ctx, insn->op1) & IR_FUSED) { 7258 mem = ir_fuse_load(ctx, def, insn->op1); 7259 } else { 7260 mem = ir_ref_spill_slot(ctx, insn->op1); 7261 } 7262 7263 if (!dst64) { 7264 if (src_type == IR_DOUBLE) { 7265 if (ctx->mflags & IR_X86_AVX) { 7266 | ASM_TXT_TMEM_OP vcvttsd2si, Rd(def_reg), qword, mem 7267 } else { 7268 | ASM_TXT_TMEM_OP cvttsd2si, Rd(def_reg), qword, mem 7269 } 7270 } else { 7271 IR_ASSERT(src_type == IR_FLOAT); 7272 if (ctx->mflags & IR_X86_AVX) { 7273 | ASM_TXT_TMEM_OP vcvttss2si, Rd(def_reg), dword, mem 7274 } else { 7275 | ASM_TXT_TMEM_OP cvttss2si, Rd(def_reg), dword, mem 7276 } 7277 } 7278 } else { 7279 IR_ASSERT(sizeof(void*) == 8); 7280|.if X64 7281 if (src_type == IR_DOUBLE) { 7282 if (ctx->mflags & IR_X86_AVX) { 7283 | ASM_TXT_TMEM_OP vcvttsd2si, Rq(def_reg), qword, mem 7284 } else { 7285 | ASM_TXT_TMEM_OP cvttsd2si, Rq(def_reg), qword, mem 7286 } 7287 } else { 7288 IR_ASSERT(src_type == IR_FLOAT); 7289 if (ctx->mflags & IR_X86_AVX) { 7290 | ASM_TXT_TMEM_OP vcvttss2si, Rq(def_reg), dword, mem 7291 } else { 7292 | ASM_TXT_TMEM_OP cvttss2si, Rq(def_reg), dword, mem 7293 } 7294 } 7295|.endif 7296 } 7297 } 7298 if (IR_REG_SPILLED(ctx->regs[def][0])) { 7299 ir_emit_store(ctx, dst_type, def, def_reg); 7300 } 7301} 7302 7303static void ir_emit_fp2fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) 7304{ 7305 ir_type dst_type = insn->type; 7306 ir_type src_type = ctx->ir_base[insn->op1].type; 7307 ir_backend_data *data = ctx->data; 7308 dasm_State **Dst = &data->dasm_state; 7309 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 7310 ir_reg op1_reg = ctx->regs[def][1]; 7311 7312 IR_ASSERT(IR_IS_TYPE_FP(src_type)); 7313 IR_ASSERT(IR_IS_TYPE_FP(dst_type)); 7314 IR_ASSERT(def_reg != IR_REG_NONE); 7315 if (op1_reg != IR_REG_NONE) { 7316 if (IR_REG_SPILLED(op1_reg)) { 7317 op1_reg = IR_REG_NUM(op1_reg); 7318 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 7319 } 7320 if (src_type == dst_type) { 7321 if (op1_reg != def_reg) { 7322 ir_emit_fp_mov(ctx, dst_type, def_reg, op1_reg); 7323 } 7324 } else if (src_type == IR_DOUBLE) { 7325 if (ctx->mflags & IR_X86_AVX) { 7326 | vcvtsd2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(op1_reg-IR_REG_FP_FIRST) 7327 } else { 7328 | cvtsd2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(op1_reg-IR_REG_FP_FIRST) 7329 } 7330 } else { 7331 IR_ASSERT(src_type == IR_FLOAT); 7332 if (ctx->mflags & IR_X86_AVX) { 7333 | vcvtss2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(op1_reg-IR_REG_FP_FIRST) 7334 } else { 7335 | cvtss2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(op1_reg-IR_REG_FP_FIRST) 7336 } 7337 } 7338 } else if (IR_IS_CONST_REF(insn->op1)) { 7339 int label = ir_const_label(ctx, insn->op1); 7340 7341 if (src_type == IR_DOUBLE) { 7342 if (ctx->mflags & IR_X86_AVX) { 7343 | vcvtsd2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), qword [=>label] 7344 } else { 7345 | cvtsd2ss xmm(def_reg-IR_REG_FP_FIRST), qword [=>label] 7346 } 7347 } else { 7348 IR_ASSERT(src_type == IR_FLOAT); 7349 if (ctx->mflags & IR_X86_AVX) { 7350 | vcvtss2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), dword [=>label] 7351 } else { 7352 | cvtss2sd xmm(def_reg-IR_REG_FP_FIRST), dword [=>label] 7353 } 7354 } 7355 } else { 7356 ir_mem mem; 7357 7358 if (ir_rule(ctx, insn->op1) & IR_FUSED) { 7359 mem = ir_fuse_load(ctx, def, insn->op1); 7360 } else { 7361 mem = ir_ref_spill_slot(ctx, insn->op1); 7362 } 7363 7364 if (src_type == IR_DOUBLE) { 7365 if (ctx->mflags & IR_X86_AVX) { 7366 | ASM_TXT_TXT_TMEM_OP vcvtsd2ss, xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), qword, mem 7367 } else { 7368 | ASM_TXT_TMEM_OP cvtsd2ss, xmm(def_reg-IR_REG_FP_FIRST), qword, mem 7369 } 7370 } else { 7371 IR_ASSERT(src_type == IR_FLOAT); 7372 if (ctx->mflags & IR_X86_AVX) { 7373 | ASM_TXT_TXT_TMEM_OP vcvtss2sd, xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), dword, mem 7374 } else { 7375 | ASM_TXT_TMEM_OP cvtss2sd, xmm(def_reg-IR_REG_FP_FIRST), dword, mem 7376 } 7377 } 7378 } 7379 if (IR_REG_SPILLED(ctx->regs[def][0])) { 7380 ir_emit_store(ctx, dst_type, def, def_reg); 7381 } 7382} 7383 7384static void ir_emit_copy_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 7385{ 7386 ir_ref type = insn->type; 7387 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 7388 ir_reg op1_reg = ctx->regs[def][1]; 7389 7390 IR_ASSERT(def_reg != IR_REG_NONE || op1_reg != IR_REG_NONE); 7391 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 7392 op1_reg = IR_REG_NUM(op1_reg); 7393 ir_emit_load(ctx, type, op1_reg, insn->op1); 7394 } 7395 if (def_reg == op1_reg) { 7396 /* same reg */ 7397 } else if (def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE) { 7398 ir_emit_mov(ctx, type, def_reg, op1_reg); 7399 } else if (def_reg != IR_REG_NONE) { 7400 ir_emit_load(ctx, type, def_reg, insn->op1); 7401 } else if (op1_reg != IR_REG_NONE) { 7402 ir_emit_store(ctx, type, def, op1_reg); 7403 } else { 7404 IR_ASSERT(0); 7405 } 7406 if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) { 7407 ir_emit_store(ctx, type, def, def_reg); 7408 } 7409} 7410 7411static void ir_emit_copy_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) 7412{ 7413 ir_type type = insn->type; 7414 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 7415 ir_reg op1_reg = ctx->regs[def][1]; 7416 7417 IR_ASSERT(def_reg != IR_REG_NONE || op1_reg != IR_REG_NONE); 7418 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 7419 op1_reg = IR_REG_NUM(op1_reg); 7420 ir_emit_load(ctx, type, op1_reg, insn->op1); 7421 } 7422 if (def_reg == op1_reg) { 7423 /* same reg */ 7424 } else if (def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE) { 7425 ir_emit_fp_mov(ctx, type, def_reg, op1_reg); 7426 } else if (def_reg != IR_REG_NONE) { 7427 ir_emit_load(ctx, type, def_reg, insn->op1); 7428 } else if (op1_reg != IR_REG_NONE) { 7429 ir_emit_store(ctx, type, def, op1_reg); 7430 } else { 7431 IR_ASSERT(0); 7432 } 7433 if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) { 7434 ir_emit_store(ctx, type, def, def_reg); 7435 } 7436} 7437 7438static void ir_emit_vaddr(ir_ctx *ctx, ir_ref def, ir_insn *insn) 7439{ 7440 ir_backend_data *data = ctx->data; 7441 dasm_State **Dst = &data->dasm_state; 7442 ir_ref type = insn->type; 7443 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 7444 ir_mem mem; 7445 int32_t offset; 7446 ir_reg fp; 7447 7448 IR_ASSERT(def_reg != IR_REG_NONE); 7449 mem = ir_var_spill_slot(ctx, insn->op1); 7450 fp = IR_MEM_BASE(mem); 7451 offset = IR_MEM_OFFSET(mem); 7452 | lea Ra(def_reg), aword [Ra(fp)+offset] 7453 if (IR_REG_SPILLED(ctx->regs[def][0])) { 7454 ir_emit_store(ctx, type, def, def_reg); 7455 } 7456} 7457 7458static void ir_emit_vload(ir_ctx *ctx, ir_ref def, ir_insn *insn) 7459{ 7460 ir_insn *var_insn = &ctx->ir_base[insn->op2]; 7461 ir_ref type = insn->type; 7462 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 7463 ir_reg fp; 7464 ir_mem mem; 7465 7466 IR_ASSERT(var_insn->op == IR_VAR); 7467 fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 7468 mem = IR_MEM_BO(fp, IR_SPILL_POS_TO_OFFSET(var_insn->op3)); 7469 if (def_reg == IR_REG_NONE && ir_is_same_mem_var(ctx, def, var_insn->op3)) { 7470 return; // fake load 7471 } 7472 IR_ASSERT(def_reg != IR_REG_NONE); 7473 7474 ir_emit_load_mem(ctx, type, def_reg, mem); 7475 if (IR_REG_SPILLED(ctx->regs[def][0])) { 7476 ir_emit_store(ctx, type, def, def_reg); 7477 } 7478} 7479 7480static void ir_emit_vstore_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn) 7481{ 7482 ir_insn *var_insn = &ctx->ir_base[insn->op2]; 7483 ir_insn *val_insn = &ctx->ir_base[insn->op3]; 7484 ir_ref type = val_insn->type; 7485 ir_reg op3_reg = ctx->regs[ref][3]; 7486 ir_reg fp; 7487 ir_mem mem; 7488 7489 IR_ASSERT(var_insn->op == IR_VAR); 7490 fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 7491 mem = IR_MEM_BO(fp, IR_SPILL_POS_TO_OFFSET(var_insn->op3)); 7492 if ((op3_reg == IR_REG_NONE || IR_REG_SPILLED(op3_reg)) 7493 && !IR_IS_CONST_REF(insn->op3) 7494 && ir_rule(ctx, insn->op3) != IR_STATIC_ALLOCA 7495 && ir_is_same_mem_var(ctx, insn->op3, var_insn->op3)) { 7496 return; // fake store 7497 } 7498 if (IR_IS_CONST_REF(insn->op3)) { 7499 ir_emit_store_mem_int_const(ctx, type, mem, insn->op3, op3_reg, 0); 7500 } else { 7501 IR_ASSERT(op3_reg != IR_REG_NONE); 7502 if (IR_REG_SPILLED(op3_reg)) { 7503 op3_reg = IR_REG_NUM(op3_reg); 7504 ir_emit_load(ctx, type, op3_reg, insn->op3); 7505 } 7506 ir_emit_store_mem_int(ctx, type, mem, op3_reg); 7507 } 7508} 7509 7510static void ir_emit_vstore_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn) 7511{ 7512 ir_insn *var_insn = &ctx->ir_base[insn->op2]; 7513 ir_ref type = ctx->ir_base[insn->op3].type; 7514 ir_reg op3_reg = ctx->regs[ref][3]; 7515 ir_reg fp; 7516 ir_mem mem; 7517 7518 IR_ASSERT(var_insn->op == IR_VAR); 7519 fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 7520 mem = IR_MEM_BO(fp, IR_SPILL_POS_TO_OFFSET(var_insn->op3)); 7521 if ((op3_reg == IR_REG_NONE || IR_REG_SPILLED(op3_reg)) 7522 && !IR_IS_CONST_REF(insn->op3) 7523 && ir_rule(ctx, insn->op3) != IR_STATIC_ALLOCA 7524 && ir_is_same_mem_var(ctx, insn->op3, var_insn->op3)) { 7525 return; // fake store 7526 } 7527 if (IR_IS_CONST_REF(insn->op3)) { 7528 ir_emit_store_mem_fp_const(ctx, type, mem, insn->op3, IR_REG_NONE, op3_reg); 7529 } else { 7530 IR_ASSERT(op3_reg != IR_REG_NONE); 7531 if (IR_REG_SPILLED(op3_reg)) { 7532 op3_reg = IR_REG_NUM(op3_reg); 7533 ir_emit_load(ctx, type, op3_reg, insn->op3); 7534 } 7535 ir_emit_store_mem_fp(ctx, type, mem, op3_reg); 7536 } 7537} 7538 7539static void ir_emit_load_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 7540{ 7541 ir_ref type = insn->type; 7542 ir_reg op2_reg = ctx->regs[def][2]; 7543 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 7544 ir_mem mem; 7545 7546 if (ctx->use_lists[def].count == 1) { 7547 /* dead load */ 7548 return; 7549 } 7550 IR_ASSERT(def_reg != IR_REG_NONE); 7551 if (op2_reg != IR_REG_NONE) { 7552 if (IR_REG_SPILLED(op2_reg)) { 7553 op2_reg = IR_REG_NUM(op2_reg); 7554 IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); 7555 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 7556 } 7557 mem = IR_MEM_B(op2_reg); 7558 } else if (IR_IS_CONST_REF(insn->op2)) { 7559 mem = ir_fuse_addr_const(ctx, insn->op2); 7560 } else { 7561 IR_ASSERT(ir_rule(ctx, insn->op2) & IR_FUSED); 7562 mem = ir_fuse_addr(ctx, def, insn->op2); 7563 if (IR_REG_SPILLED(ctx->regs[def][0]) && ir_is_same_spill_slot(ctx, def, mem)) { 7564 if (!ir_may_avoid_spill_load(ctx, def, def)) { 7565 ir_emit_load_mem_int(ctx, type, def_reg, mem); 7566 } 7567 /* avoid load to the same location (valid only when register is not reused) */ 7568 return; 7569 } 7570 } 7571 7572 ir_emit_load_mem_int(ctx, type, def_reg, mem); 7573 if (IR_REG_SPILLED(ctx->regs[def][0])) { 7574 ir_emit_store(ctx, type, def, def_reg); 7575 } 7576} 7577 7578static void ir_emit_load_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) 7579{ 7580 ir_ref type = insn->type; 7581 ir_reg op2_reg = ctx->regs[def][2]; 7582 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 7583 ir_mem mem; 7584 7585 if (ctx->use_lists[def].count == 1) { 7586 /* dead load */ 7587 return; 7588 } 7589 IR_ASSERT(def_reg != IR_REG_NONE); 7590 if (op2_reg != IR_REG_NONE) { 7591 if (IR_REG_SPILLED(op2_reg)) { 7592 op2_reg = IR_REG_NUM(op2_reg); 7593 IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); 7594 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 7595 } 7596 mem = IR_MEM_B(op2_reg); 7597 } else if (IR_IS_CONST_REF(insn->op2)) { 7598 mem = ir_fuse_addr_const(ctx, insn->op2); 7599 } else { 7600 IR_ASSERT(ir_rule(ctx, insn->op2) & IR_FUSED); 7601 mem = ir_fuse_addr(ctx, def, insn->op2); 7602 if (IR_REG_SPILLED(ctx->regs[def][0]) && ir_is_same_spill_slot(ctx, def, mem)) { 7603 if (!ir_may_avoid_spill_load(ctx, def, def)) { 7604 ir_emit_load_mem_fp(ctx, type, def_reg, mem); 7605 } 7606 /* avoid load to the same location (valid only when register is not reused) */ 7607 return; 7608 } 7609 } 7610 7611 ir_emit_load_mem_fp(ctx, type, def_reg, mem); 7612 if (IR_REG_SPILLED(ctx->regs[def][0])) { 7613 ir_emit_store(ctx, type, def, def_reg); 7614 } 7615} 7616 7617static void ir_emit_store_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn) 7618{ 7619 ir_insn *val_insn = &ctx->ir_base[insn->op3]; 7620 ir_ref type = val_insn->type; 7621 ir_reg op2_reg = ctx->regs[ref][2]; 7622 ir_reg op3_reg = ctx->regs[ref][3]; 7623 ir_mem mem; 7624 7625 if (op2_reg != IR_REG_NONE) { 7626 if (IR_REG_SPILLED(op2_reg)) { 7627 op2_reg = IR_REG_NUM(op2_reg); 7628 IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); 7629 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 7630 } 7631 mem = IR_MEM_B(op2_reg); 7632 } else if (IR_IS_CONST_REF(insn->op2)) { 7633 mem = ir_fuse_addr_const(ctx, insn->op2); 7634 } else { 7635 IR_ASSERT(ir_rule(ctx, insn->op2) & IR_FUSED); 7636 mem = ir_fuse_addr(ctx, ref, insn->op2); 7637 if (!IR_IS_CONST_REF(insn->op3) 7638 && IR_REG_SPILLED(op3_reg) 7639 && ir_rule(ctx, insn->op3) != IR_STATIC_ALLOCA 7640 && ir_is_same_spill_slot(ctx, insn->op3, mem)) { 7641 if (!ir_may_avoid_spill_load(ctx, insn->op3, ref)) { 7642 op3_reg = IR_REG_NUM(op3_reg); 7643 ir_emit_load(ctx, type, op3_reg, insn->op3); 7644 } 7645 /* avoid store to the same location */ 7646 return; 7647 } 7648 } 7649 7650 if (IR_IS_CONST_REF(insn->op3)) { 7651 ir_emit_store_mem_int_const(ctx, type, mem, insn->op3, op3_reg, 0); 7652 } else { 7653 IR_ASSERT(op3_reg != IR_REG_NONE); 7654 if (IR_REG_SPILLED(op3_reg)) { 7655 op3_reg = IR_REG_NUM(op3_reg); 7656 ir_emit_load(ctx, type, op3_reg, insn->op3); 7657 } 7658 ir_emit_store_mem_int(ctx, type, mem, op3_reg); 7659 } 7660} 7661 7662static void ir_emit_cmp_and_store_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn) 7663{ 7664 ir_reg addr_reg = ctx->regs[ref][2]; 7665 ir_mem mem; 7666 ir_insn *cmp_insn = &ctx->ir_base[insn->op3]; 7667 ir_op op = cmp_insn->op; 7668 ir_type type = ctx->ir_base[cmp_insn->op1].type; 7669 ir_ref op1 = cmp_insn->op1; 7670 ir_ref op2 = cmp_insn->op2; 7671 ir_reg op1_reg = ctx->regs[insn->op3][1]; 7672 ir_reg op2_reg = ctx->regs[insn->op3][2]; 7673 7674 if (addr_reg != IR_REG_NONE) { 7675 if (IR_REG_SPILLED(addr_reg)) { 7676 addr_reg = IR_REG_NUM(addr_reg); 7677 IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); 7678 ir_emit_load(ctx, IR_ADDR, addr_reg, insn->op2); 7679 } 7680 mem = IR_MEM_B(addr_reg); 7681 } else if (IR_IS_CONST_REF(insn->op2)) { 7682 mem = ir_fuse_addr_const(ctx, insn->op2); 7683 } else { 7684 IR_ASSERT(ir_rule(ctx, insn->op2) & IR_FUSED); 7685 mem = ir_fuse_addr(ctx, ref, insn->op2); 7686 } 7687 7688 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 7689 op1_reg = IR_REG_NUM(op1_reg); 7690 ir_emit_load(ctx, type, op1_reg, op1); 7691 } 7692 if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { 7693 op2_reg = IR_REG_NUM(op2_reg); 7694 if (op1 != op2) { 7695 ir_emit_load(ctx, type, op2_reg, op2); 7696 } 7697 } 7698 7699 ir_emit_cmp_int_common(ctx, type, ref, cmp_insn, op1_reg, op1, op2_reg, op2); 7700 _ir_emit_setcc_int_mem(ctx, op, mem); 7701} 7702 7703static void ir_emit_store_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn) 7704{ 7705 ir_ref type = ctx->ir_base[insn->op3].type; 7706 ir_reg op2_reg = ctx->regs[ref][2]; 7707 ir_reg op3_reg = ctx->regs[ref][3]; 7708 ir_mem mem; 7709 7710 IR_ASSERT(op3_reg != IR_REG_NONE); 7711 if (op2_reg != IR_REG_NONE) { 7712 if (IR_REG_SPILLED(op2_reg)) { 7713 op2_reg = IR_REG_NUM(op2_reg); 7714 IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); 7715 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 7716 } 7717 mem = IR_MEM_B(op2_reg); 7718 } else if (IR_IS_CONST_REF(insn->op2)) { 7719 mem = ir_fuse_addr_const(ctx, insn->op2); 7720 } else { 7721 IR_ASSERT(ir_rule(ctx, insn->op2) & IR_FUSED); 7722 mem = ir_fuse_addr(ctx, ref, insn->op2); 7723 if (!IR_IS_CONST_REF(insn->op3) 7724 && IR_REG_SPILLED(op3_reg) 7725 && ir_rule(ctx, insn->op3) != IR_STATIC_ALLOCA 7726 && ir_is_same_spill_slot(ctx, insn->op3, mem)) { 7727 if (!ir_may_avoid_spill_load(ctx, insn->op3, ref)) { 7728 op3_reg = IR_REG_NUM(op3_reg); 7729 ir_emit_load(ctx, type, op3_reg, insn->op3); 7730 } 7731 /* avoid store to the same location */ 7732 return; 7733 } 7734 } 7735 7736 if (IR_IS_CONST_REF(insn->op3)) { 7737 ir_emit_store_mem_fp_const(ctx, type, mem, insn->op3, IR_REG_NONE, op3_reg); 7738 } else { 7739 IR_ASSERT(op3_reg != IR_REG_NONE); 7740 if (IR_REG_SPILLED(op3_reg)) { 7741 op3_reg = IR_REG_NUM(op3_reg); 7742 ir_emit_load(ctx, type, op3_reg, insn->op3); 7743 } 7744 ir_emit_store_mem_fp(ctx, type, mem, op3_reg); 7745 } 7746} 7747 7748static void ir_emit_rload(ir_ctx *ctx, ir_ref def, ir_insn *insn) 7749{ 7750 ir_reg src_reg = insn->op2; 7751 ir_type type = insn->type; 7752 7753 if (IR_REGSET_IN(IR_REGSET_UNION((ir_regset)ctx->fixed_regset, IR_REGSET_FIXED), src_reg)) { 7754 if (ctx->vregs[def] 7755 && ctx->live_intervals[ctx->vregs[def]] 7756 && ctx->live_intervals[ctx->vregs[def]]->stack_spill_pos != -1) { 7757 ir_emit_store(ctx, type, def, src_reg); 7758 } 7759 } else { 7760 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 7761 7762 if (def_reg == IR_REG_NONE) { 7763 /* op3 is used as a flag that the value is already stored in memory. 7764 * If op3 is set we don't have to store the value once again (in case of spilling) 7765 */ 7766 if (!insn->op3 || !ir_is_same_spill_slot(ctx, def, IR_MEM_BO(ctx->spill_base, insn->op3))) { 7767 ir_emit_store(ctx, type, def, src_reg); 7768 } 7769 } else { 7770 if (src_reg != def_reg) { 7771 if (IR_IS_TYPE_INT(type)) { 7772 ir_emit_mov(ctx, type, def_reg, src_reg); 7773 } else { 7774 IR_ASSERT(IR_IS_TYPE_FP(type)); 7775 ir_emit_fp_mov(ctx, type, def_reg, src_reg); 7776 } 7777 } 7778 if (IR_REG_SPILLED(ctx->regs[def][0]) 7779 && (!insn->op3 || !ir_is_same_spill_slot(ctx, def, IR_MEM_BO(ctx->spill_base, insn->op3)))) { 7780 ir_emit_store(ctx, type, def, def_reg); 7781 } 7782 } 7783 } 7784} 7785 7786static void ir_emit_rstore(ir_ctx *ctx, ir_ref ref, ir_insn *insn) 7787{ 7788 ir_ref type = ctx->ir_base[insn->op2].type; 7789 ir_reg op2_reg = ctx->regs[ref][2]; 7790 ir_reg dst_reg = insn->op3; 7791 7792 if (op2_reg != IR_REG_NONE) { 7793 if (IR_REG_SPILLED(op2_reg)) { 7794 op2_reg = IR_REG_NUM(op2_reg); 7795 ir_emit_load(ctx, type, op2_reg, insn->op2); 7796 } 7797 if (op2_reg != dst_reg) { 7798 if (IR_IS_TYPE_INT(type)) { 7799 ir_emit_mov(ctx, type, dst_reg, op2_reg); 7800 } else { 7801 IR_ASSERT(IR_IS_TYPE_FP(type)); 7802 ir_emit_fp_mov(ctx, type, dst_reg, op2_reg); 7803 } 7804 } 7805 } else { 7806 ir_emit_load_ex(ctx, type, dst_reg, insn->op2, ref); 7807 } 7808} 7809 7810static void ir_emit_alloca(ir_ctx *ctx, ir_ref def, ir_insn *insn) 7811{ 7812 ir_backend_data *data = ctx->data; 7813 dasm_State **Dst = &data->dasm_state; 7814 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 7815 7816 if (ctx->use_lists[def].count == 1) { 7817 /* dead alloca */ 7818 return; 7819 } 7820 if (IR_IS_CONST_REF(insn->op2)) { 7821 ir_insn *val = &ctx->ir_base[insn->op2]; 7822 int32_t size = val->val.i32; 7823 7824 IR_ASSERT(IR_IS_TYPE_INT(val->type)); 7825 IR_ASSERT(!IR_IS_SYM_CONST(val->op)); 7826 IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 >= 0); 7827 IR_ASSERT(IR_IS_SIGNED_32BIT(val->val.i64)); 7828 7829 /* Stack must be 16 byte aligned */ 7830 size = IR_ALIGNED_SIZE(size, 16); 7831 | ASM_REG_IMM_OP sub, IR_ADDR, IR_REG_RSP, size 7832 if (!(ctx->flags & IR_USE_FRAME_POINTER)) { 7833 ctx->call_stack_size += size; 7834 } 7835 } else { 7836 int32_t alignment = 16; 7837 ir_reg op2_reg = ctx->regs[def][2]; 7838 ir_type type = ctx->ir_base[insn->op2].type; 7839 7840 IR_ASSERT(ctx->flags & IR_FUNCTION); 7841 IR_ASSERT(ctx->flags & IR_USE_FRAME_POINTER); 7842 IR_ASSERT(def_reg != IR_REG_NONE); 7843 if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { 7844 op2_reg = IR_REG_NUM(op2_reg); 7845 ir_emit_load(ctx, type, op2_reg, insn->op2); 7846 } 7847 if (def_reg != op2_reg) { 7848 if (op2_reg != IR_REG_NONE) { 7849 ir_emit_mov(ctx, type, def_reg, op2_reg); 7850 } else { 7851 ir_emit_load(ctx, type, def_reg, insn->op2); 7852 } 7853 } 7854 7855 | ASM_REG_IMM_OP add, IR_ADDR, def_reg, (alignment-1) 7856 | ASM_REG_IMM_OP and, IR_ADDR, def_reg, ~(alignment-1) 7857 | ASM_REG_REG_OP sub, IR_ADDR, IR_REG_RSP, def_reg 7858 } 7859 if (def_reg != IR_REG_NONE) { 7860 | mov Ra(def_reg), Ra(IR_REG_RSP) 7861 if (IR_REG_SPILLED(ctx->regs[def][0])) { 7862 ir_emit_store(ctx, insn->type, def, def_reg); 7863 } 7864 } else { 7865 ir_emit_store(ctx, IR_ADDR, def, IR_REG_STACK_POINTER); 7866 } 7867} 7868 7869static void ir_emit_afree(ir_ctx *ctx, ir_ref def, ir_insn *insn) 7870{ 7871 ir_backend_data *data = ctx->data; 7872 dasm_State **Dst = &data->dasm_state; 7873 7874 if (IR_IS_CONST_REF(insn->op2)) { 7875 ir_insn *val = &ctx->ir_base[insn->op2]; 7876 int32_t size = val->val.i32; 7877 7878 IR_ASSERT(IR_IS_TYPE_INT(val->type)); 7879 IR_ASSERT(!IR_IS_SYM_CONST(val->op)); 7880 IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 > 0); 7881 IR_ASSERT(IR_IS_SIGNED_32BIT(val->val.i64)); 7882 7883 /* Stack must be 16 byte aligned */ 7884 size = IR_ALIGNED_SIZE(size, 16); 7885 | ASM_REG_IMM_OP add, IR_ADDR, IR_REG_RSP, size 7886 if (!(ctx->flags & IR_USE_FRAME_POINTER)) { 7887 ctx->call_stack_size -= size; 7888 } 7889 } else { 7890// int32_t alignment = 16; 7891 ir_reg op2_reg = ctx->regs[def][2]; 7892 ir_type type = ctx->ir_base[insn->op2].type; 7893 7894 IR_ASSERT(ctx->flags & IR_FUNCTION); 7895 if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { 7896 op2_reg = IR_REG_NUM(op2_reg); 7897 ir_emit_load(ctx, type, op2_reg, insn->op2); 7898 } 7899 7900 // TODO: alignment ??? 7901 7902 | ASM_REG_REG_OP add, IR_ADDR, IR_REG_RSP, op2_reg 7903 } 7904} 7905 7906static void ir_emit_block_begin(ir_ctx *ctx, ir_ref def, ir_insn *insn) 7907{ 7908 ir_backend_data *data = ctx->data; 7909 dasm_State **Dst = &data->dasm_state; 7910 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 7911 7912 | mov Ra(def_reg), Ra(IR_REG_RSP) 7913 7914 if (IR_REG_SPILLED(ctx->regs[def][0])) { 7915 ir_emit_store(ctx, IR_ADDR, def, def_reg); 7916 } 7917} 7918 7919static void ir_emit_block_end(ir_ctx *ctx, ir_ref def, ir_insn *insn) 7920{ 7921 ir_backend_data *data = ctx->data; 7922 dasm_State **Dst = &data->dasm_state; 7923 ir_reg op2_reg = ctx->regs[def][2]; 7924 7925 IR_ASSERT(op2_reg != IR_REG_NONE); 7926 if (IR_REG_SPILLED(op2_reg)) { 7927 op2_reg = IR_REG_NUM(op2_reg); 7928 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 7929 } 7930 7931 | mov Ra(IR_REG_RSP), Ra(op2_reg) 7932} 7933 7934static void ir_emit_frame_addr(ir_ctx *ctx, ir_ref def) 7935{ 7936 ir_backend_data *data = ctx->data; 7937 dasm_State **Dst = &data->dasm_state; 7938 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 7939 7940 if (ctx->flags & IR_USE_FRAME_POINTER) { 7941 | mov Ra(def_reg), Ra(IR_REG_RBP) 7942 } else { 7943 | lea Ra(def_reg), [Ra(IR_REG_RSP)+(ctx->stack_frame_size + ctx->call_stack_size)] 7944 } 7945 if (IR_REG_SPILLED(ctx->regs[def][0])) { 7946 ir_emit_store(ctx, IR_ADDR, def, def_reg); 7947 } 7948} 7949 7950static void ir_emit_va_start(ir_ctx *ctx, ir_ref def, ir_insn *insn) 7951{ 7952#if defined(_WIN64) || defined(IR_TARGET_X86) 7953 ir_backend_data *data = ctx->data; 7954 dasm_State **Dst = &data->dasm_state; 7955 ir_reg fp; 7956 int arg_area_offset; 7957 ir_reg op2_reg = ctx->regs[def][2]; 7958 ir_reg tmp_reg = ctx->regs[def][3]; 7959 int32_t offset; 7960 7961 IR_ASSERT(tmp_reg != IR_REG_NONE); 7962 if (op2_reg != IR_REG_NONE) { 7963 if (IR_REG_SPILLED(op2_reg)) { 7964 op2_reg = IR_REG_NUM(op2_reg); 7965 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 7966 } 7967 offset = 0; 7968 } else { 7969 IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); 7970 op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 7971 offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); 7972 } 7973 7974 if (ctx->flags & IR_USE_FRAME_POINTER) { 7975 fp = IR_REG_FRAME_POINTER; 7976 arg_area_offset = sizeof(void*) * 2 + ctx->param_stack_size; 7977 } else { 7978 fp = IR_REG_STACK_POINTER; 7979 arg_area_offset = ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*) + ctx->param_stack_size; 7980 } 7981 | lea Ra(tmp_reg), aword [Ra(fp)+arg_area_offset] 7982 | mov aword [Ra(op2_reg)+offset], Ra(tmp_reg) 7983#elif defined(IR_TARGET_X64) 7984|.if X64 7985 ir_backend_data *data = ctx->data; 7986 dasm_State **Dst = &data->dasm_state; 7987 ir_reg fp; 7988 int reg_save_area_offset; 7989 int overflow_arg_area_offset; 7990 ir_reg op2_reg = ctx->regs[def][2]; 7991 ir_reg tmp_reg = ctx->regs[def][3]; 7992 bool have_reg_save_area = 0; 7993 int32_t offset; 7994 7995 IR_ASSERT(tmp_reg != IR_REG_NONE); 7996 if (op2_reg != IR_REG_NONE) { 7997 if (IR_REG_SPILLED(op2_reg)) { 7998 op2_reg = IR_REG_NUM(op2_reg); 7999 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 8000 } 8001 offset = 0; 8002 } else { 8003 IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); 8004 op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 8005 offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); 8006 } 8007 8008 if (ctx->flags & IR_USE_FRAME_POINTER) { 8009 fp = IR_REG_FRAME_POINTER; 8010 reg_save_area_offset = -(ctx->stack_frame_size - ctx->stack_frame_alignment - ctx->locals_area_size); 8011 overflow_arg_area_offset = sizeof(void*) * 2 + ctx->param_stack_size; 8012 } else { 8013 fp = IR_REG_STACK_POINTER; 8014 reg_save_area_offset = ctx->locals_area_size + ctx->call_stack_size; 8015 overflow_arg_area_offset = ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*) + ctx->param_stack_size; 8016 } 8017 8018 if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) { 8019 | lea Ra(tmp_reg), aword [Ra(fp)+reg_save_area_offset] 8020 have_reg_save_area = 1; 8021 /* Set va_list.gp_offset */ 8022 | mov dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, gp_offset))], sizeof(void*) * ctx->gp_reg_params 8023 } else { 8024 reg_save_area_offset -= sizeof(void*) * IR_REG_INT_ARGS; 8025 /* Set va_list.gp_offset */ 8026 | mov dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, gp_offset))], sizeof(void*) * IR_REG_INT_ARGS 8027 } 8028 if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) { 8029 if (!have_reg_save_area) { 8030 | lea Ra(tmp_reg), aword [Ra(fp)+reg_save_area_offset] 8031 have_reg_save_area = 1; 8032 } 8033 /* Set va_list.fp_offset */ 8034 | mov dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, fp_offset))], sizeof(void*) * IR_REG_INT_ARGS + 16 * ctx->fp_reg_params 8035 } else { 8036 /* Set va_list.fp_offset */ 8037 | mov dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, fp_offset))], sizeof(void*) * IR_REG_INT_ARGS + 16 * IR_REG_FP_ARGS 8038 } 8039 if (have_reg_save_area) { 8040 /* Set va_list.reg_save_area */ 8041 | mov qword [Ra(op2_reg)+(offset+offsetof(ir_va_list, reg_save_area))], Ra(tmp_reg) 8042 } 8043 | lea Ra(tmp_reg), aword [Ra(fp)+overflow_arg_area_offset] 8044 /* Set va_list.overflow_arg_area */ 8045 | mov qword [Ra(op2_reg)+(offset+offsetof(ir_va_list, overflow_arg_area))], Ra(tmp_reg) 8046|.endif 8047#else 8048 IR_ASSERT(0 && "NIY va_start"); 8049#endif 8050} 8051 8052static void ir_emit_va_copy(ir_ctx *ctx, ir_ref def, ir_insn *insn) 8053{ 8054#if defined(_WIN64) || defined(IR_TARGET_X86) 8055 ir_backend_data *data = ctx->data; 8056 dasm_State **Dst = &data->dasm_state; 8057 ir_reg tmp_reg = ctx->regs[def][1]; 8058 ir_reg op2_reg = ctx->regs[def][2]; 8059 ir_reg op3_reg = ctx->regs[def][3]; 8060 int32_t op2_offset, op3_offset; 8061 8062 IR_ASSERT(tmp_reg != IR_REG_NONE); 8063 if (op2_reg != IR_REG_NONE) { 8064 if (IR_REG_SPILLED(op2_reg)) { 8065 op2_reg = IR_REG_NUM(op2_reg); 8066 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 8067 } 8068 op2_offset = 0; 8069 } else { 8070 IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); 8071 op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 8072 op2_offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); 8073 } 8074 if (op3_reg != IR_REG_NONE) { 8075 if (IR_REG_SPILLED(op3_reg)) { 8076 op3_reg = IR_REG_NUM(op3_reg); 8077 ir_emit_load(ctx, IR_ADDR, op3_reg, insn->op3); 8078 } 8079 op3_offset = 0; 8080 } else { 8081 IR_ASSERT(ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA); 8082 op3_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 8083 op3_offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op3].op3); 8084 } 8085 | mov Ra(tmp_reg), aword [Ra(op3_reg)+op3_offset] 8086 | mov aword [Ra(op2_reg)+op2_offset], Ra(tmp_reg) 8087#elif defined(IR_TARGET_X64) 8088|.if X64 8089 ir_backend_data *data = ctx->data; 8090 dasm_State **Dst = &data->dasm_state; 8091 ir_reg tmp_reg = ctx->regs[def][1]; 8092 ir_reg op2_reg = ctx->regs[def][2]; 8093 ir_reg op3_reg = ctx->regs[def][3]; 8094 int32_t op2_offset, op3_offset; 8095 8096 IR_ASSERT(tmp_reg != IR_REG_NONE); 8097 if (op2_reg != IR_REG_NONE) { 8098 if (IR_REG_SPILLED(op2_reg)) { 8099 op2_reg = IR_REG_NUM(op2_reg); 8100 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 8101 } 8102 op2_offset = 0; 8103 } else { 8104 IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); 8105 op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 8106 op2_offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); 8107 } 8108 if (op3_reg != IR_REG_NONE) { 8109 if (IR_REG_SPILLED(op3_reg)) { 8110 op3_reg = IR_REG_NUM(op3_reg); 8111 ir_emit_load(ctx, IR_ADDR, op3_reg, insn->op3); 8112 } 8113 op3_offset = 0; 8114 } else { 8115 IR_ASSERT(ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA); 8116 op3_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 8117 op3_offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op3].op3); 8118 } 8119 | mov Rd(tmp_reg), dword [Ra(op3_reg)+(op3_offset+offsetof(ir_va_list, gp_offset))] 8120 | mov dword [Ra(op2_reg)+(op2_offset+offsetof(ir_va_list, gp_offset))], Rd(tmp_reg) 8121 | mov Rd(tmp_reg), dword [Ra(op3_reg)+(op3_offset+offsetof(ir_va_list, fp_offset))] 8122 | mov aword [Ra(op2_reg)+(op2_offset+offsetof(ir_va_list, fp_offset))], Ra(tmp_reg) 8123 | mov Ra(tmp_reg), aword [Ra(op3_reg)+(op3_offset+offsetof(ir_va_list, overflow_arg_area))] 8124 | mov aword [Ra(op2_reg)+(op2_offset+offsetof(ir_va_list, overflow_arg_area))], Ra(tmp_reg) 8125 | mov Ra(tmp_reg), aword [Ra(op3_reg)+(op3_offset+offsetof(ir_va_list, reg_save_area))] 8126 | mov aword [Ra(op2_reg)+(op2_offset+offsetof(ir_va_list, reg_save_area))], Ra(tmp_reg) 8127|.endif 8128#else 8129 IR_ASSERT(0 && "NIY va_copy"); 8130#endif 8131} 8132 8133static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn) 8134{ 8135#if defined(_WIN64) || defined(IR_TARGET_X86) 8136 ir_backend_data *data = ctx->data; 8137 dasm_State **Dst = &data->dasm_state; 8138 ir_type type = insn->type; 8139 ir_reg def_reg = ctx->regs[def][0]; 8140 ir_reg op2_reg = ctx->regs[def][2]; 8141 ir_reg tmp_reg = ctx->regs[def][3]; 8142 int32_t offset; 8143 8144 IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); 8145 if (op2_reg != IR_REG_NONE) { 8146 if (IR_REG_SPILLED(op2_reg)) { 8147 op2_reg = IR_REG_NUM(op2_reg); 8148 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 8149 } 8150 offset = 0; 8151 } else { 8152 IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); 8153 op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 8154 offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); 8155 } 8156 | mov Ra(tmp_reg), aword [Ra(op2_reg)+offset] 8157 ir_emit_load_mem(ctx, type, def_reg, IR_MEM_B(tmp_reg)); 8158 | add Ra(tmp_reg), IR_MAX(ir_type_size[type], sizeof(void*)) 8159 | mov aword [Ra(op2_reg)+offset], Ra(tmp_reg) 8160 if (IR_REG_SPILLED(ctx->regs[def][0])) { 8161 ir_emit_store(ctx, type, def, def_reg); 8162 } 8163#elif defined(IR_TARGET_X64) 8164|.if X64 8165 ir_backend_data *data = ctx->data; 8166 dasm_State **Dst = &data->dasm_state; 8167 ir_type type = insn->type; 8168 ir_reg def_reg = ctx->regs[def][0]; 8169 ir_reg op2_reg = ctx->regs[def][2]; 8170 ir_reg tmp_reg = ctx->regs[def][3]; 8171 int32_t offset; 8172 8173 IR_ASSERT(def_reg != IR_REG_NONE&& tmp_reg != IR_REG_NONE); 8174 if (op2_reg != IR_REG_NONE) { 8175 if (IR_REG_SPILLED(op2_reg)) { 8176 op2_reg = IR_REG_NUM(op2_reg); 8177 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 8178 } 8179 offset = 0; 8180 } else { 8181 IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); 8182 op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 8183 offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); 8184 } 8185 if (IR_IS_TYPE_INT(type)) { 8186 | mov Rd(tmp_reg), dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, gp_offset))] 8187 | cmp Rd(tmp_reg), sizeof(void*)*IR_REG_INT_ARGS 8188 | jge >1 8189 | add Rd(tmp_reg), sizeof(void*) 8190 | mov dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, gp_offset))], Rd(tmp_reg) 8191 | add Ra(tmp_reg), aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, reg_save_area))] 8192 | jmp >2 8193 |1: 8194 | mov Ra(tmp_reg), aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, overflow_arg_area))] 8195 | add Ra(tmp_reg), sizeof(void*) 8196 | mov aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, overflow_arg_area))], Ra(tmp_reg) 8197 |2: 8198 | mov Ra(def_reg), aword [Ra(tmp_reg)-sizeof(void*)] 8199 } else { 8200 | mov Rd(tmp_reg), dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, fp_offset))] 8201 | cmp Rd(tmp_reg), sizeof(void*) * IR_REG_INT_ARGS + 16 * IR_REG_FP_ARGS 8202 | jge >1 8203 | add Rd(tmp_reg), 16 8204 | mov dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, fp_offset))], Rd(tmp_reg) 8205 | add Ra(tmp_reg), aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, reg_save_area))] 8206 ir_emit_load_mem_fp(ctx, type, def_reg, IR_MEM_BO(tmp_reg, -16)); 8207 | jmp >2 8208 |1: 8209 | mov Ra(tmp_reg), aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, overflow_arg_area))] 8210 ir_emit_load_mem_fp(ctx, type, def_reg, IR_MEM_BO(tmp_reg, 0)); 8211 | add Ra(tmp_reg), 8 8212 | mov aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, overflow_arg_area))], Ra(tmp_reg) 8213 |2: 8214 } 8215 if (IR_REG_SPILLED(ctx->regs[def][0])) { 8216 ir_emit_store(ctx, type, def, def_reg); 8217 } 8218|.endif 8219#else 8220 IR_ASSERT(0 && "NIY va_arg"); 8221#endif 8222} 8223 8224static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) 8225{ 8226 ir_backend_data *data = ctx->data; 8227 dasm_State **Dst = &data->dasm_state; 8228 ir_type type; 8229 ir_block *bb; 8230 ir_insn *use_insn, *val; 8231 uint32_t n, *p, use_block; 8232 int i; 8233 int label, default_label = 0; 8234 int count = 0; 8235 ir_val min, max; 8236 ir_reg op2_reg = ctx->regs[def][2]; 8237 ir_reg tmp_reg = ctx->regs[def][3]; 8238 8239 type = ctx->ir_base[insn->op2].type; 8240 IR_ASSERT(tmp_reg != IR_REG_NONE); 8241 if (IR_IS_TYPE_SIGNED(type)) { 8242 min.u64 = 0x7fffffffffffffff; 8243 max.u64 = 0x8000000000000000; 8244 } else { 8245 min.u64 = 0xffffffffffffffff; 8246 max.u64 = 0x0; 8247 } 8248 8249 bb = &ctx->cfg_blocks[b]; 8250 p = &ctx->cfg_edges[bb->successors]; 8251 for (n = bb->successors_count; n != 0; p++, n--) { 8252 use_block = *p; 8253 use_insn = &ctx->ir_base[ctx->cfg_blocks[use_block].start]; 8254 if (use_insn->op == IR_CASE_VAL) { 8255 val = &ctx->ir_base[use_insn->op2]; 8256 IR_ASSERT(!IR_IS_SYM_CONST(val->op)); 8257 if (IR_IS_TYPE_SIGNED(type)) { 8258 IR_ASSERT(IR_IS_TYPE_SIGNED(val->type)); 8259 min.i64 = IR_MIN(min.i64, val->val.i64); 8260 max.i64 = IR_MAX(max.i64, val->val.i64); 8261 } else { 8262 IR_ASSERT(!IR_IS_TYPE_SIGNED(val->type)); 8263 min.u64 = (int64_t)IR_MIN(min.u64, val->val.u64); 8264 max.u64 = (int64_t)IR_MAX(max.u64, val->val.u64); 8265 } 8266 count++; 8267 } else { 8268 IR_ASSERT(use_insn->op == IR_CASE_DEFAULT); 8269 default_label = ir_skip_empty_target_blocks(ctx, use_block); 8270 } 8271 } 8272 8273 IR_ASSERT(op2_reg != IR_REG_NONE); 8274 if (IR_REG_SPILLED(op2_reg)) { 8275 op2_reg = IR_REG_NUM(op2_reg); 8276 ir_emit_load(ctx, type, op2_reg, insn->op2); 8277 } 8278 8279 /* Generate a table jmp or a seqence of calls */ 8280 if (count > 2 && (max.i64-min.i64) < count * 8) { 8281 int *labels = ir_mem_malloc(sizeof(int) * (size_t)(max.i64 - min.i64 + 1)); 8282 8283 for (i = 0; i <= (max.i64 - min.i64); i++) { 8284 labels[i] = default_label; 8285 } 8286 p = &ctx->cfg_edges[bb->successors]; 8287 for (n = bb->successors_count; n != 0; p++, n--) { 8288 use_block = *p; 8289 use_insn = &ctx->ir_base[ctx->cfg_blocks[use_block].start]; 8290 if (use_insn->op == IR_CASE_VAL) { 8291 val = &ctx->ir_base[use_insn->op2]; 8292 IR_ASSERT(!IR_IS_SYM_CONST(val->op)); 8293 label = ir_skip_empty_target_blocks(ctx, use_block); 8294 labels[val->val.i64 - min.i64] = label; 8295 } 8296 } 8297 8298 switch (ir_type_size[type]) { 8299 default: 8300 IR_ASSERT(0 && "Unsupported type size"); 8301 case 1: 8302 if (IR_IS_TYPE_SIGNED(type)) { 8303 | movsx Ra(op2_reg), Rb(op2_reg) 8304 } else { 8305 | movzx Ra(op2_reg), Rb(op2_reg) 8306 } 8307 break; 8308 case 2: 8309 if (IR_IS_TYPE_SIGNED(type)) { 8310 | movsx Ra(op2_reg), Rw(op2_reg) 8311 } else { 8312 | movzx Ra(op2_reg), Rw(op2_reg) 8313 } 8314 break; 8315 case 4: 8316|.if X64 8317 if (IR_IS_TYPE_SIGNED(type)) { 8318 | movsxd Ra(op2_reg), Rd(op2_reg) 8319 } else { 8320 | mov Rd(op2_reg), Rd(op2_reg) 8321 } 8322 break; 8323|| case 8: 8324|.endif 8325 break; 8326 } 8327 8328 if (min.i64 != 0) { 8329 int64_t offset = -min.i64; 8330 8331 if (IR_IS_SIGNED_32BIT(offset)) { 8332 | lea Ra(tmp_reg), [Ra(op2_reg)+(int32_t)offset] 8333 } else { 8334 IR_ASSERT(sizeof(void*) == 8); 8335|.if X64 8336 | mov64 Rq(tmp_reg), offset 8337 | add Ra(tmp_reg), Ra(op2_reg) 8338|.endif 8339 } 8340 if (default_label) { 8341 offset = max.i64 - min.i64; 8342 8343 IR_ASSERT(IR_IS_SIGNED_32BIT(offset)); 8344 | cmp Ra(tmp_reg), (int32_t)offset 8345 | ja =>default_label 8346 } 8347|.if X64 8348 if (ctx->code_buffer 8349 && IR_IS_SIGNED_32BIT((char*)ctx->code_buffer->start) 8350 && IR_IS_SIGNED_32BIT((char*)ctx->code_buffer->end)) { 8351 | jmp aword [Ra(tmp_reg)*8+>1] 8352 } else { 8353 int64_t offset = -min.i64; 8354 8355 IR_ASSERT(IR_IS_SIGNED_32BIT(offset)); 8356 offset *= 8; 8357 IR_ASSERT(IR_IS_SIGNED_32BIT(offset)); 8358 | lea Ra(tmp_reg), aword [>1] 8359 | jmp aword [Ra(tmp_reg)+Ra(op2_reg)*8+offset] 8360 } 8361|.else 8362 | jmp aword [Ra(tmp_reg)*4+>1] 8363|.endif 8364 } else { 8365 if (default_label) { 8366 int64_t offset = max.i64; 8367 8368 IR_ASSERT(IR_IS_SIGNED_32BIT(offset)); 8369 | cmp Ra(op2_reg), (int32_t)offset 8370 | ja =>default_label 8371 } 8372|.if X64 8373 if (ctx->code_buffer 8374 && IR_IS_SIGNED_32BIT((char*)ctx->code_buffer->start) 8375 && IR_IS_SIGNED_32BIT((char*)ctx->code_buffer->end)) { 8376 | jmp aword [Ra(op2_reg)*8+>1] 8377 } else { 8378 | lea Ra(tmp_reg), aword [>1] 8379 | jmp aword [Ra(tmp_reg)+Ra(op2_reg)*8] 8380 } 8381|.else 8382 | jmp aword [Ra(op2_reg)*4+>1] 8383|.endif 8384 } 8385 8386 |.jmp_table 8387 if (!data->jmp_table_label) { 8388 data->jmp_table_label = ctx->cfg_blocks_count + ctx->consts_count + 3; 8389 |=>data->jmp_table_label: 8390 } 8391 |.align aword 8392 |1: 8393 for (i = 0; i <= (max.i64 - min.i64); i++) { 8394 int b = labels[i]; 8395 if (b) { 8396 ir_block *bb = &ctx->cfg_blocks[b]; 8397 ir_insn *insn = &ctx->ir_base[bb->end]; 8398 8399 if (insn->op == IR_IJMP && IR_IS_CONST_REF(insn->op2)) { 8400 ir_ref prev = ctx->prev_ref[bb->end]; 8401 if (prev != bb->start && ctx->ir_base[prev].op == IR_SNAPSHOT) { 8402 prev = ctx->prev_ref[prev]; 8403 } 8404 if (prev == bb->start) { 8405 void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]); 8406 8407 | .aword &addr 8408 if (ctx->ir_base[bb->start].op != IR_CASE_DEFAULT) { 8409 bb->flags |= IR_BB_EMPTY; 8410 } 8411 continue; 8412 } 8413 } 8414 | .aword =>b 8415 } else { 8416 | .aword 0 8417 } 8418 } 8419 |.code 8420 ir_mem_free(labels); 8421 } else { 8422 p = &ctx->cfg_edges[bb->successors]; 8423 for (n = bb->successors_count; n != 0; p++, n--) { 8424 use_block = *p; 8425 use_insn = &ctx->ir_base[ctx->cfg_blocks[use_block].start]; 8426 if (use_insn->op == IR_CASE_VAL) { 8427 val = &ctx->ir_base[use_insn->op2]; 8428 IR_ASSERT(!IR_IS_SYM_CONST(val->op)); 8429 label = ir_skip_empty_target_blocks(ctx, use_block); 8430 if (IR_IS_32BIT(type, val->val)) { 8431 | ASM_REG_IMM_OP cmp, type, op2_reg, val->val.i32 8432 } else { 8433 IR_ASSERT(sizeof(void*) == 8); 8434|.if X64 8435 | mov64 Ra(tmp_reg), val->val.i64 8436 | ASM_REG_REG_OP cmp, type, op2_reg, tmp_reg 8437|.endif 8438 } 8439 | je =>label 8440 } 8441 } 8442 if (default_label) { 8443 | jmp =>default_label 8444 } 8445 } 8446} 8447 8448static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn) 8449{ 8450 int j, n; 8451 ir_type type; 8452 int int_param = 0; 8453 int fp_param = 0; 8454 int int_reg_params_count = IR_REG_INT_ARGS; 8455 int fp_reg_params_count = IR_REG_FP_ARGS; 8456 int32_t used_stack = 0; 8457 8458#ifdef IR_HAVE_FASTCALL 8459 if (sizeof(void*) == 4 && ir_is_fastcall(ctx, insn)) { 8460 int_reg_params_count = IR_REG_INT_FCARGS; 8461 fp_reg_params_count = IR_REG_FP_FCARGS; 8462 } 8463#endif 8464 8465 n = insn->inputs_count; 8466 for (j = 3; j <= n; j++) { 8467 type = ctx->ir_base[ir_insn_op(insn, j)].type; 8468 if (IR_IS_TYPE_INT(type)) { 8469 if (int_param >= int_reg_params_count) { 8470 used_stack += IR_MAX(sizeof(void*), ir_type_size[type]); 8471 } 8472 int_param++; 8473#ifdef _WIN64 8474 /* WIN64 calling convention use common couter for int and fp registers */ 8475 fp_param++; 8476#endif 8477 } else { 8478 IR_ASSERT(IR_IS_TYPE_FP(type)); 8479 if (fp_param >= fp_reg_params_count) { 8480 used_stack += IR_MAX(sizeof(void*), ir_type_size[type]); 8481 } 8482 fp_param++; 8483#ifdef _WIN64 8484 /* WIN64 calling convention use common couter for int and fp registers */ 8485 int_param++; 8486#endif 8487 } 8488 } 8489 8490 /* Reserved "home space" or "shadow store" for register arguments (used in Windows64 ABI) */ 8491 used_stack += IR_SHADOW_ARGS; 8492 8493 return used_stack; 8494} 8495 8496static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg tmp_reg) 8497{ 8498 ir_backend_data *data = ctx->data; 8499 dasm_State **Dst = &data->dasm_state; 8500 int j, n; 8501 ir_ref arg; 8502 ir_insn *arg_insn; 8503 uint8_t type; 8504 ir_reg src_reg, dst_reg; 8505 int int_param = 0; 8506 int fp_param = 0; 8507 int count = 0; 8508 int int_reg_params_count = IR_REG_INT_ARGS; 8509 int fp_reg_params_count = IR_REG_FP_ARGS; 8510 const int8_t *int_reg_params = _ir_int_reg_params; 8511 const int8_t *fp_reg_params = _ir_fp_reg_params; 8512 int32_t used_stack, stack_offset = IR_SHADOW_ARGS; 8513 ir_copy *copies; 8514 bool do_pass3 = 0; 8515 /* For temporaries we may use any scratch registers except for registers used for parameters */ 8516 ir_reg tmp_fp_reg = IR_REG_FP_LAST; /* Temporary register for FP loads and swap */ 8517 8518 n = insn->inputs_count; 8519 if (n < 3) { 8520 return 0; 8521 } 8522 8523 if (tmp_reg == IR_REG_NONE) { 8524 tmp_reg = IR_REG_RAX; 8525 } 8526 8527#ifdef IR_HAVE_FASTCALL 8528 if (sizeof(void*) == 4 && ir_is_fastcall(ctx, insn)) { 8529 int_reg_params_count = IR_REG_INT_FCARGS; 8530 fp_reg_params_count = IR_REG_FP_FCARGS; 8531 int_reg_params = _ir_int_fc_reg_params; 8532 fp_reg_params = _ir_fp_fc_reg_params; 8533 } 8534#endif 8535 8536 if (insn->op == IR_CALL 8537 && (ctx->flags & IR_PREALLOCATED_STACK) 8538#ifdef IR_HAVE_FASTCALL 8539 && !ir_is_fastcall(ctx, insn) /* fast call functions restore stack pointer */ 8540#endif 8541 ) { 8542 // TODO: support for preallocated stack 8543 used_stack = 0; 8544 } else { 8545 used_stack = ir_call_used_stack(ctx, insn); 8546 if (IR_SHADOW_ARGS 8547 && insn->op == IR_TAILCALL 8548 && used_stack == IR_SHADOW_ARGS) { 8549 used_stack = 0; 8550 } 8551 if (ctx->fixed_call_stack_size 8552 && used_stack <= ctx->fixed_call_stack_size 8553#ifdef IR_HAVE_FASTCALL 8554 && !ir_is_fastcall(ctx, insn) /* fast call functions restore stack pointer */ 8555#endif 8556 ) { 8557 used_stack = 0; 8558 } else { 8559 /* Stack must be 16 byte aligned */ 8560 int32_t aligned_stack = IR_ALIGNED_SIZE(used_stack, 16); 8561 ctx->call_stack_size += aligned_stack; 8562 if (aligned_stack) { 8563 | sub Ra(IR_REG_RSP), aligned_stack 8564 } 8565 } 8566 } 8567 8568 /* 1. move all register arguments that should be passed through stack 8569 * and collect arguments that should be passed through registers */ 8570 copies = ir_mem_malloc((n - 2) * sizeof(ir_copy)); 8571 for (j = 3; j <= n; j++) { 8572 arg = ir_insn_op(insn, j); 8573 src_reg = ir_get_alocated_reg(ctx, def, j); 8574 arg_insn = &ctx->ir_base[arg]; 8575 type = arg_insn->type; 8576 if (IR_IS_TYPE_INT(type)) { 8577 if (int_param < int_reg_params_count) { 8578 dst_reg = int_reg_params[int_param]; 8579 } else { 8580 dst_reg = IR_REG_NONE; /* pass argument through stack */ 8581 } 8582 int_param++; 8583#ifdef _WIN64 8584 /* WIN64 calling convention use common couter for int and fp registers */ 8585 fp_param++; 8586#endif 8587 } else { 8588 IR_ASSERT(IR_IS_TYPE_FP(type)); 8589 if (fp_param < fp_reg_params_count) { 8590 dst_reg = fp_reg_params[fp_param]; 8591 } else { 8592 dst_reg = IR_REG_NONE; /* pass argument through stack */ 8593 } 8594 fp_param++; 8595#ifdef _WIN64 8596 /* WIN64 calling convention use common couter for int and fp registers */ 8597 int_param++; 8598#endif 8599 } 8600 if (dst_reg != IR_REG_NONE) { 8601 if (src_reg == IR_REG_NONE) { 8602 /* delay CONST->REG and MEM->REG moves to third pass */ 8603 do_pass3 = 1; 8604 } else { 8605 if (IR_REG_SPILLED(src_reg)) { 8606 src_reg = IR_REG_NUM(src_reg); 8607 ir_emit_load(ctx, type, src_reg, arg); 8608 } 8609 if (src_reg != dst_reg) { 8610 /* delay REG->REG moves to second pass */ 8611 copies[count].type = type; 8612 copies[count].from = src_reg; 8613 copies[count].to = dst_reg; 8614 count++; 8615 } 8616 } 8617 } else { 8618 /* Pass register arguments to stack (REG->MEM moves) */ 8619 if (!IR_IS_CONST_REF(arg) && src_reg != IR_REG_NONE && !IR_REG_SPILLED(src_reg)) { 8620 ir_emit_store_mem(ctx, type, IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset), src_reg); 8621 } else { 8622 do_pass3 = 1; 8623 } 8624 stack_offset += IR_MAX(sizeof(void*), ir_type_size[type]); 8625 } 8626 } 8627 8628 /* 2. move all arguments that should be passed from one register to another (REG->REG movs) */ 8629 if (count) { 8630 ir_parallel_copy(ctx, copies, count, tmp_reg, tmp_fp_reg); 8631 } 8632 ir_mem_free(copies); 8633 8634 /* 3. move the remaining memory and immediate values */ 8635 if (do_pass3) { 8636 stack_offset = IR_SHADOW_ARGS; 8637 int_param = 0; 8638 fp_param = 0; 8639 for (j = 3; j <= n; j++) { 8640 arg = ir_insn_op(insn, j); 8641 src_reg = ir_get_alocated_reg(ctx, def, j); 8642 arg_insn = &ctx->ir_base[arg]; 8643 type = arg_insn->type; 8644 if (IR_IS_TYPE_INT(type)) { 8645 if (int_param < int_reg_params_count) { 8646 dst_reg = int_reg_params[int_param]; 8647 } else { 8648 dst_reg = IR_REG_NONE; /* argument already passed through stack */ 8649 } 8650 int_param++; 8651#ifdef _WIN64 8652 /* WIN64 calling convention use common couter for int and fp registers */ 8653 fp_param++; 8654#endif 8655 } else { 8656 IR_ASSERT(IR_IS_TYPE_FP(type)); 8657 if (fp_param < fp_reg_params_count) { 8658 dst_reg = fp_reg_params[fp_param]; 8659 } else { 8660 dst_reg = IR_REG_NONE; /* argument already passed through stack */ 8661 } 8662 fp_param++; 8663#ifdef _WIN64 8664 /* WIN64 calling convention use common couter for int and fp registers */ 8665 int_param++; 8666#endif 8667 } 8668 if (dst_reg != IR_REG_NONE) { 8669 if (src_reg == IR_REG_NONE) { 8670 if (IR_IS_TYPE_INT(type)) { 8671 if (IR_IS_CONST_REF(arg)) { 8672 if (type == IR_I8 || type == IR_I16) { 8673 type = IR_I32; 8674 } else if (type == IR_U8 || type == IR_U16) { 8675 type = IR_U32; 8676 } 8677 ir_emit_load(ctx, type, dst_reg, arg); 8678 } else if (ctx->vregs[arg]) { 8679 ir_mem mem = ir_ref_spill_slot(ctx, arg); 8680 8681 if (ir_type_size[type] > 2) { 8682 ir_emit_load_mem_int(ctx, type, dst_reg, mem); 8683 } else if (ir_type_size[type] == 2) { 8684 if (type == IR_I16) { 8685 | ASM_TXT_TMEM_OP movsx, Rd(dst_reg), word, mem 8686 } else { 8687 | ASM_TXT_TMEM_OP movzx, Rd(dst_reg), word, mem 8688 } 8689 } else { 8690 IR_ASSERT(ir_type_size[type] == 1); 8691 if (type == IR_I8) { 8692 | ASM_TXT_TMEM_OP movsx, Rd(dst_reg), byte, mem 8693 } else { 8694 | ASM_TXT_TMEM_OP movzx, Rd(dst_reg), byte, mem 8695 } 8696 } 8697 } else { 8698 ir_load_local_addr(ctx, dst_reg, arg); 8699 } 8700 } else { 8701 ir_emit_load(ctx, type, dst_reg, arg); 8702 } 8703 } 8704 } else { 8705 ir_mem mem = IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset); 8706 8707 if (IR_IS_TYPE_INT(type)) { 8708 if (IR_IS_CONST_REF(arg)) { 8709 ir_emit_store_mem_int_const(ctx, type, mem, arg, tmp_reg, 1); 8710 } else if (src_reg == IR_REG_NONE) { 8711 IR_ASSERT(tmp_reg != IR_REG_NONE); 8712 ir_emit_load(ctx, type, tmp_reg, arg); 8713 ir_emit_store_mem_int(ctx, type, mem, tmp_reg); 8714 } else if (IR_REG_SPILLED(src_reg)) { 8715 src_reg = IR_REG_NUM(src_reg); 8716 ir_emit_load(ctx, type, src_reg, arg); 8717 ir_emit_store_mem_int(ctx, type, mem, src_reg); 8718 } 8719 } else { 8720 if (IR_IS_CONST_REF(arg)) { 8721 ir_emit_store_mem_fp_const(ctx, type, mem, arg, tmp_reg, tmp_fp_reg); 8722 } else if (src_reg == IR_REG_NONE) { 8723 IR_ASSERT(tmp_fp_reg != IR_REG_NONE); 8724 ir_emit_load(ctx, type, tmp_fp_reg, arg); 8725 ir_emit_store_mem_fp(ctx, IR_DOUBLE, mem, tmp_fp_reg); 8726 } else if (IR_REG_SPILLED(src_reg)) { 8727 src_reg = IR_REG_NUM(src_reg); 8728 ir_emit_load(ctx, type, src_reg, arg); 8729 ir_emit_store_mem_fp(ctx, type, mem, src_reg); 8730 } 8731 } 8732 stack_offset += IR_MAX(sizeof(void*), ir_type_size[type]); 8733 } 8734 } 8735 } 8736 8737#ifdef _WIN64 8738 /* WIN64 calling convention requires duplcation of parameters passed in FP register into GP ones */ 8739 if (ir_is_vararg(ctx, insn)) { 8740 n = IR_MIN(n, IR_MAX_REG_ARGS + 2); 8741 for (j = 3; j <= n; j++) { 8742 arg = ir_insn_op(insn, j); 8743 arg_insn = &ctx->ir_base[arg]; 8744 type = arg_insn->type; 8745 if (IR_IS_TYPE_FP(type)) { 8746 src_reg = fp_reg_params[j-3]; 8747 dst_reg = int_reg_params[j-3]; 8748|.if X64 8749 if (ctx->mflags & IR_X86_AVX) { 8750 | vmovd Rq(dst_reg), xmm(src_reg-IR_REG_FP_FIRST) 8751 } else { 8752 | movd Rq(dst_reg), xmm(src_reg-IR_REG_FP_FIRST) 8753 } 8754|.endif 8755 } 8756 } 8757 } 8758#endif 8759#ifdef IR_REG_VARARG_FP_REGS 8760 /* set hidden argument to specify the number of vector registers used */ 8761 if (ir_is_vararg(ctx, insn)) { 8762 fp_param = IR_MIN(fp_param, fp_reg_params_count); 8763 | mov Rd(IR_REG_VARARG_FP_REGS), fp_param 8764 } 8765#endif 8766 8767 return used_stack; 8768} 8769 8770static void ir_emit_call_ex(ir_ctx *ctx, ir_ref def, ir_insn *insn, int32_t used_stack) 8771{ 8772 ir_backend_data *data = ctx->data; 8773 dasm_State **Dst = &data->dasm_state; 8774 ir_reg def_reg; 8775 8776 if (IR_IS_CONST_REF(insn->op2)) { 8777 void *addr = ir_call_addr(ctx, insn, &ctx->ir_base[insn->op2]); 8778 8779 if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(ctx->code_buffer, addr)) { 8780 | call aword &addr 8781 } else { 8782|.if X64 8783|| ir_reg tmp_reg = IR_REG_RAX; 8784 8785#ifdef IR_REG_VARARG_FP_REGS 8786|| if (ir_is_vararg(ctx, insn)) { 8787|| tmp_reg = IR_REG_R11; 8788|| } 8789#endif 8790|| if (IR_IS_SIGNED_32BIT(addr)) { 8791 | mov Rq(tmp_reg), ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 <imm-32-bit> 8792|| } else { 8793 | mov64 Rq(tmp_reg), ((ptrdiff_t)addr) // 0x48 0xb8 <imm-64-bit> 8794|| } 8795 | call Rq(tmp_reg) 8796|.endif 8797 } 8798 } else { 8799 ir_reg op2_reg = ctx->regs[def][2]; 8800 8801 if (op2_reg != IR_REG_NONE) { 8802 if (IR_REG_SPILLED(op2_reg)) { 8803 op2_reg = IR_REG_NUM(op2_reg); 8804 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 8805 } 8806 | call Ra(op2_reg) 8807 } else { 8808 ir_mem mem; 8809 8810 if (ir_rule(ctx, insn->op2) & IR_FUSED) { 8811 mem = ir_fuse_load(ctx, def, insn->op2); 8812 } else { 8813 mem = ir_ref_spill_slot(ctx, insn->op2); 8814 } 8815 8816 | ASM_TMEM_OP call, aword, mem 8817 } 8818 } 8819 8820 if (used_stack) { 8821 int32_t aligned_stack = IR_ALIGNED_SIZE(used_stack, 16); 8822 8823 ctx->call_stack_size -= aligned_stack; 8824 if (ir_is_fastcall(ctx, insn)) { 8825 aligned_stack -= used_stack; 8826 if (aligned_stack) { 8827 | add Ra(IR_REG_RSP), aligned_stack 8828 } 8829 } else { 8830 | add Ra(IR_REG_RSP), aligned_stack 8831 } 8832 } 8833 8834 if (insn->type != IR_VOID) { 8835 if (IR_IS_TYPE_INT(insn->type)) { 8836 def_reg = IR_REG_NUM(ctx->regs[def][0]); 8837 if (def_reg != IR_REG_NONE) { 8838 if (def_reg != IR_REG_INT_RET1) { 8839 ir_emit_mov(ctx, insn->type, def_reg, IR_REG_INT_RET1); 8840 } 8841 if (IR_REG_SPILLED(ctx->regs[def][0])) { 8842 ir_emit_store(ctx, insn->type, def, def_reg); 8843 } 8844 } else if (ctx->use_lists[def].count > 1) { 8845 ir_emit_store(ctx, insn->type, def, IR_REG_INT_RET1); 8846 } 8847 } else { 8848 IR_ASSERT(IR_IS_TYPE_FP(insn->type)); 8849 def_reg = IR_REG_NUM(ctx->regs[def][0]); 8850#ifdef IR_REG_FP_RET1 8851 if (def_reg != IR_REG_NONE) { 8852 if (def_reg != IR_REG_FP_RET1) { 8853 ir_emit_fp_mov(ctx, insn->type, def_reg, IR_REG_FP_RET1); 8854 } 8855 if (IR_REG_SPILLED(ctx->regs[def][0])) { 8856 ir_emit_store(ctx, insn->type, def, def_reg); 8857 } 8858 } else if (ctx->use_lists[def].count > 1) { 8859 ir_emit_store(ctx, insn->type, def, IR_REG_FP_RET1); 8860 } 8861#else 8862 if (ctx->use_lists[def].count > 1) { 8863 int32_t offset; 8864 ir_reg fp; 8865 8866 if (def_reg == IR_REG_NONE) { 8867 offset = ir_ref_spill_slot_offset(ctx, def, &fp); 8868 if (insn->type == IR_DOUBLE) { 8869 | fstp qword [Ra(fp)+offset] 8870 } else { 8871 IR_ASSERT(insn->type == IR_FLOAT); 8872 | fstp dword [Ra(fp)+offset] 8873 } 8874 } else { 8875 offset = ctx->ret_slot; 8876 IR_ASSERT(offset != -1); 8877 offset = IR_SPILL_POS_TO_OFFSET(offset); 8878 fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 8879 if (insn->type == IR_DOUBLE) { 8880 | fstp qword [Ra(fp)+offset] 8881 } else { 8882 IR_ASSERT(insn->type == IR_FLOAT); 8883 | fstp dword [Ra(fp)+offset] 8884 } 8885 ir_emit_load_mem_fp(ctx, insn->type, def_reg, IR_MEM_BO(fp, offset)); 8886 if (IR_REG_SPILLED(ctx->regs[def][0])) { 8887 ir_emit_store(ctx, insn->type, def, def_reg); 8888 } 8889 } 8890 } 8891#endif 8892 } 8893 } 8894} 8895 8896static void ir_emit_call(ir_ctx *ctx, ir_ref def, ir_insn *insn) 8897{ 8898 int32_t used_stack = ir_emit_arguments(ctx, def, insn, ctx->regs[def][1]); 8899 ir_emit_call_ex(ctx, def, insn, used_stack); 8900} 8901 8902static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) 8903{ 8904 ir_backend_data *data = ctx->data; 8905 dasm_State **Dst = &data->dasm_state; 8906 int32_t used_stack = ir_emit_arguments(ctx, def, insn, ctx->regs[def][1]); 8907 8908 if (used_stack != 0) { 8909 ir_emit_call_ex(ctx, def, insn, used_stack); 8910 ir_emit_return_void(ctx); 8911 return; 8912 } 8913 8914 ir_emit_epilogue(ctx); 8915 8916 if (IR_IS_CONST_REF(insn->op2)) { 8917 void *addr = ir_call_addr(ctx, insn, &ctx->ir_base[insn->op2]); 8918 8919 if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(ctx->code_buffer, addr)) { 8920 | jmp aword &addr 8921 } else { 8922|.if X64 8923|| ir_reg tmp_reg = IR_REG_RAX; 8924 8925#ifdef IR_REG_VARARG_FP_REGS 8926|| if (ir_is_vararg(ctx, insn)) { 8927|| tmp_reg = IR_REG_R11; 8928|| } 8929#endif 8930|| if (IR_IS_SIGNED_32BIT(addr)) { 8931 | mov Rq(tmp_reg), ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 <imm-32-bit> 8932|| } else { 8933 | mov64 Rq(tmp_reg), ((ptrdiff_t)addr) // 0x48 0xb8 <imm-64-bit> 8934|| } 8935 | jmp Rq(tmp_reg) 8936|.endif 8937 } 8938 } else { 8939 ir_reg op2_reg = ctx->regs[def][2]; 8940 8941 if (op2_reg != IR_REG_NONE) { 8942 if (IR_REG_SPILLED(op2_reg)) { 8943 op2_reg = IR_REG_NUM(op2_reg); 8944 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 8945 } 8946 | jmp Ra(op2_reg) 8947 } else { 8948 ir_mem mem; 8949 8950 if (ir_rule(ctx, insn->op2) & IR_FUSED) { 8951 mem = ir_fuse_load(ctx, def, insn->op2); 8952 } else { 8953 mem = ir_ref_spill_slot(ctx, insn->op2); 8954 } 8955 | ASM_TMEM_OP jmp, aword, mem 8956 } 8957 } 8958} 8959 8960static void ir_emit_ijmp(ir_ctx *ctx, ir_ref def, ir_insn *insn) 8961{ 8962 ir_backend_data *data = ctx->data; 8963 dasm_State **Dst = &data->dasm_state; 8964 ir_reg op2_reg = ctx->regs[def][2]; 8965 8966 if (IR_IS_CONST_REF(insn->op2)) { 8967 void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]); 8968 8969 if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(ctx->code_buffer, addr)) { 8970 | jmp aword &addr 8971 } else { 8972|.if X64 8973 if (IR_IS_SIGNED_32BIT(addr)) { 8974 | mov rax, ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 <imm-32-bit> 8975 } else { 8976 | mov64 rax, ((ptrdiff_t)addr) // 0x48 0xb8 <imm-64-bit> 8977 } 8978 | jmp rax 8979|.endif 8980 } 8981 } else if (ir_rule(ctx, insn->op2) & IR_FUSED) { 8982 ir_mem mem = ir_fuse_load(ctx, def, insn->op2); 8983 | ASM_TMEM_OP jmp, aword, mem 8984 } else if (op2_reg != IR_REG_NONE) { 8985 if (IR_REG_SPILLED(op2_reg)) { 8986 op2_reg = IR_REG_NUM(op2_reg); 8987 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 8988 } 8989 | jmp Ra(op2_reg) 8990 } else { 8991 ir_mem mem = ir_ref_spill_slot(ctx, insn->op2); 8992 8993 | ASM_TMEM_OP jmp, aword, mem 8994 } 8995} 8996 8997static bool ir_emit_guard_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, uint32_t next_block, uint8_t op, void *addr, bool int_cmp) 8998{ 8999 ir_backend_data *data = ctx->data; 9000 dasm_State **Dst = &data->dasm_state; 9001 ir_insn *next_insn = &ctx->ir_base[def + 1]; 9002 9003 if (next_insn->op == IR_END || next_insn->op == IR_LOOP_END) { 9004 ir_block *bb = &ctx->cfg_blocks[b]; 9005 uint32_t target; 9006 9007 if (!(bb->flags & IR_BB_DESSA_MOVES)) { 9008 target = ctx->cfg_edges[bb->successors]; 9009 if (UNEXPECTED(bb->successors_count == 2)) { 9010 if (ctx->cfg_blocks[target].flags & IR_BB_ENTRY) { 9011 target = ctx->cfg_edges[bb->successors + 1]; 9012 } else { 9013 IR_ASSERT(ctx->cfg_blocks[ctx->cfg_edges[bb->successors + 1]].flags & IR_BB_ENTRY); 9014 } 9015 } else { 9016 IR_ASSERT(bb->successors_count == 1); 9017 } 9018 target = ir_skip_empty_target_blocks(ctx, target); 9019 if (target != next_block) { 9020 if (int_cmp) { 9021 switch (op) { 9022 default: 9023 IR_ASSERT(0 && "NIY binary op"); 9024 case IR_EQ: 9025 | jne =>target 9026 break; 9027 case IR_NE: 9028 | je =>target 9029 break; 9030 case IR_LT: 9031 | jge =>target 9032 break; 9033 case IR_GE: 9034 | jl =>target 9035 break; 9036 case IR_LE: 9037 | jg =>target 9038 break; 9039 case IR_GT: 9040 | jle =>target 9041 break; 9042 case IR_ULT: 9043 | jae =>target 9044 break; 9045 case IR_UGE: 9046 | jb =>target 9047 break; 9048 case IR_ULE: 9049 | ja =>target 9050 break; 9051 case IR_UGT: 9052 | jbe =>target 9053 break; 9054 } 9055 } else { 9056 switch (op) { 9057 default: 9058 IR_ASSERT(0 && "NIY binary op"); 9059 case IR_EQ: 9060 | jne =>target 9061 | jp =>target 9062 break; 9063 case IR_NE: 9064 | jp &addr 9065 | je =>target 9066 break; 9067 case IR_LT: 9068 | jae =>target 9069 break; 9070 case IR_GE: 9071 | jp &addr 9072 | jb =>target 9073 break; 9074 case IR_LE: 9075 | ja =>target 9076 break; 9077 case IR_GT: 9078 | jp &addr 9079 | jbe =>target 9080 break; 9081 } 9082 } 9083 | jmp &addr 9084 return 1; 9085 } 9086 } 9087 } else if (next_insn->op == IR_IJMP && IR_IS_CONST_REF(next_insn->op2)) { 9088 void *target_addr = ir_jmp_addr(ctx, next_insn, &ctx->ir_base[next_insn->op2]); 9089 9090 if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(ctx->code_buffer, target_addr)) { 9091 if (int_cmp) { 9092 switch (op) { 9093 default: 9094 IR_ASSERT(0 && "NIY binary op"); 9095 case IR_EQ: 9096 | jne &target_addr 9097 break; 9098 case IR_NE: 9099 | je &target_addr 9100 break; 9101 case IR_LT: 9102 | jge &target_addr 9103 break; 9104 case IR_GE: 9105 | jl &target_addr 9106 break; 9107 case IR_LE: 9108 | jg &target_addr 9109 break; 9110 case IR_GT: 9111 | jle &target_addr 9112 break; 9113 case IR_ULT: 9114 | jae &target_addr 9115 break; 9116 case IR_UGE: 9117 | jb &target_addr 9118 break; 9119 case IR_ULE: 9120 | ja &target_addr 9121 break; 9122 case IR_UGT: 9123 | jbe &target_addr 9124 break; 9125 } 9126 } else { 9127 switch (op) { 9128 default: 9129 IR_ASSERT(0 && "NIY binary op"); 9130 case IR_EQ: 9131 | jne &target_addr 9132 | jp &target_addr 9133 break; 9134 case IR_NE: 9135 | jp &addr 9136 | je &target_addr 9137 break; 9138 case IR_LT: 9139 | jae &target_addr 9140 break; 9141 case IR_GE: 9142 | jp &addr 9143 | jb &target_addr 9144 break; 9145 case IR_LE: 9146 | ja &target_addr 9147 break; 9148 case IR_GT: 9149 | jp &addr 9150 | jbe &target_addr 9151 break; 9152 } 9153 } 9154 | jmp &addr 9155 return 1; 9156 } 9157 } 9158 9159 if (int_cmp) { 9160 switch (op) { 9161 default: 9162 IR_ASSERT(0 && "NIY binary op"); 9163 case IR_EQ: 9164 | je &addr 9165 break; 9166 case IR_NE: 9167 | jne &addr 9168 break; 9169 case IR_LT: 9170 | jl &addr 9171 break; 9172 case IR_GE: 9173 | jge &addr 9174 break; 9175 case IR_LE: 9176 | jle &addr 9177 break; 9178 case IR_GT: 9179 | jg &addr 9180 break; 9181 case IR_ULT: 9182 | jb &addr 9183 break; 9184 case IR_UGE: 9185 | jae &addr 9186 break; 9187 case IR_ULE: 9188 | jbe &addr 9189 break; 9190 case IR_UGT: 9191 | ja &addr 9192 break; 9193 } 9194 } else { 9195 switch (op) { 9196 default: 9197 IR_ASSERT(0 && "NIY binary op"); 9198 case IR_EQ: 9199 | jp >1 9200 | je &addr 9201 |1: 9202 break; 9203 case IR_NE: 9204 | jne &addr 9205 | jp &addr 9206 break; 9207 case IR_LT: 9208 | jp >1 9209 | jb &addr 9210 |1: 9211 break; 9212 case IR_GE: 9213 | jae &addr 9214 break; 9215 case IR_LE: 9216 | jp >1 9217 | jbe &addr 9218 |1: 9219 break; 9220 case IR_GT: 9221 | ja &addr 9222 break; 9223// case IR_ULT: fprintf(stderr, "\tjb .LL%d\n", true_block); break; 9224// case IR_UGE: fprintf(stderr, "\tjae .LL%d\n", true_block); break; 9225// case IR_ULE: fprintf(stderr, "\tjbe .LL%d\n", true_block); break; 9226// case IR_UGT: fprintf(stderr, "\tja .LL%d\n", true_block); break; 9227 } 9228 } 9229 return 0; 9230} 9231 9232static bool ir_emit_guard(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block) 9233{ 9234 ir_backend_data *data = ctx->data; 9235 dasm_State **Dst = &data->dasm_state; 9236 ir_reg op2_reg = ctx->regs[def][2]; 9237 ir_type type = ctx->ir_base[insn->op2].type; 9238 void *addr; 9239 9240 IR_ASSERT(IR_IS_TYPE_INT(type)); 9241 if (IR_IS_CONST_REF(insn->op2)) { 9242 bool is_true = ir_ref_is_true(ctx, insn->op2); 9243 9244 if ((insn->op == IR_GUARD && !is_true) || (insn->op == IR_GUARD_NOT && is_true)) { 9245 addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); 9246 if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(ctx->code_buffer, addr)) { 9247 | jmp aword &addr 9248 } else { 9249|.if X64 9250 if (IR_IS_SIGNED_32BIT(addr)) { 9251 | mov rax, ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 <imm-32-bit> 9252 } else { 9253 | mov64 rax, ((ptrdiff_t)addr) // 0x48 0xb8 <imm-64-bit> 9254 } 9255 | jmp aword [rax] 9256|.endif 9257 } 9258 } 9259 return 0; 9260 } 9261 9262 if (op2_reg != IR_REG_NONE) { 9263 if (IR_REG_SPILLED(op2_reg)) { 9264 op2_reg = IR_REG_NUM(op2_reg); 9265 ir_emit_load(ctx, type, op2_reg, insn->op2); 9266 } 9267 | ASM_REG_REG_OP test, type, op2_reg, op2_reg 9268 } else { 9269 ir_mem mem; 9270 9271 if (ir_rule(ctx, insn->op2) & IR_FUSED) { 9272 mem = ir_fuse_load(ctx, def, insn->op2); 9273 } else { 9274 mem = ir_ref_spill_slot(ctx, insn->op2); 9275 } 9276 | ASM_MEM_IMM_OP cmp, type, mem, 0 9277 } 9278 9279 addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); 9280 if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(ctx->code_buffer, addr)) { 9281 ir_op op; 9282 9283 if (insn->op == IR_GUARD) { 9284 op = IR_EQ; 9285 } else { 9286 op = IR_NE; 9287 } 9288 return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 1); 9289 } else { 9290|.if X64 9291 if (insn->op == IR_GUARD) { 9292 | je >1 9293 } else { 9294 | jne >1 9295 } 9296 |.cold_code 9297 |1: 9298 if (IR_IS_SIGNED_32BIT(addr)) { 9299 | mov rax, ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 <imm-32-bit> 9300 } else { 9301 | mov64 rax, ((ptrdiff_t)addr) // 0x48 0xb8 <imm-64-bit> 9302 } 9303 | jmp aword [rax] 9304 |.code 9305|.endif 9306 return 0; 9307 } 9308} 9309 9310static bool ir_emit_guard_cmp_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block) 9311{ 9312 ir_backend_data *data = ctx->data; 9313 dasm_State **Dst = &data->dasm_state; 9314 ir_insn *cmp_insn = &ctx->ir_base[insn->op2]; 9315 ir_op op = cmp_insn->op; 9316 ir_type type = ctx->ir_base[cmp_insn->op1].type; 9317 ir_ref op1 = cmp_insn->op1; 9318 ir_ref op2 = cmp_insn->op2; 9319 ir_reg op1_reg = ctx->regs[insn->op2][1]; 9320 ir_reg op2_reg = ctx->regs[insn->op2][2]; 9321 void *addr; 9322 9323 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 9324 op1_reg = IR_REG_NUM(op1_reg); 9325 ir_emit_load(ctx, type, op1_reg, op1); 9326 } 9327 if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { 9328 op2_reg = IR_REG_NUM(op2_reg); 9329 if (op1 != op2) { 9330 ir_emit_load(ctx, type, op2_reg, op2); 9331 } 9332 } 9333 9334 addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); 9335 if (IR_IS_CONST_REF(op2) && !IR_IS_SYM_CONST(ctx->ir_base[op2].op) && ctx->ir_base[op2].val.u64 == 0) { 9336 if (op == IR_ULT) { 9337 /* always false */ 9338 if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(ctx->code_buffer, addr)) { 9339 | jmp aword &addr 9340 } else { 9341|.if X64 9342 if (IR_IS_SIGNED_32BIT(addr)) { 9343 | mov rax, ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 <imm-32-bit> 9344 } else { 9345 | mov64 rax, ((ptrdiff_t)addr) // 0x48 0xb8 <imm-64-bit> 9346 } 9347 | jmp aword [rax] 9348|.endif 9349 } 9350 return 0; 9351 } else if (op == IR_UGE) { 9352 /* always true */ 9353 return 0; 9354 } else if (op == IR_ULE) { 9355 op = IR_EQ; 9356 } else if (op == IR_UGT) { 9357 op = IR_NE; 9358 } 9359 } 9360 ir_emit_cmp_int_common(ctx, type, def, cmp_insn, op1_reg, op1, op2_reg, op2); 9361 9362 if (insn->op == IR_GUARD) { 9363 op ^= 1; // reverse 9364 } 9365 9366 return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 1); 9367} 9368 9369static bool ir_emit_guard_cmp_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block) 9370{ 9371 ir_op op = ir_emit_cmp_fp_common(ctx, def, insn->op2, &ctx->ir_base[insn->op2]); 9372 void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); 9373 9374 if (insn->op == IR_GUARD) { 9375 op ^= 1; // reverse 9376 } 9377 return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 0); 9378} 9379 9380static bool ir_emit_guard_test_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block) 9381{ 9382 void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); 9383 ir_op op = (insn->op == IR_GUARD) ? IR_EQ : IR_NE; 9384 9385 ir_emit_test_int_common(ctx, def, insn->op2, op); 9386 return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 1); 9387} 9388 9389static bool ir_emit_guard_jcc_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block) 9390{ 9391 void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); 9392 ir_op op = ctx->ir_base[insn->op2].op; 9393 9394 if (insn->op == IR_GUARD) { 9395 op ^= 1; // reverse 9396 } 9397 return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 1); 9398} 9399 9400static bool ir_emit_guard_overflow(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) 9401{ 9402 ir_backend_data *data = ctx->data; 9403 dasm_State **Dst = &data->dasm_state; 9404 ir_type type; 9405 void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); 9406 9407 type = ctx->ir_base[ctx->ir_base[insn->op2].op1].type; 9408 9409 IR_ASSERT(IR_IS_TYPE_INT(type)); 9410 if (IR_IS_TYPE_SIGNED(type)) { 9411 if (insn->op == IR_GUARD) { 9412 | jno &addr 9413 } else { 9414 | jo &addr 9415 } 9416 } else { 9417 if (insn->op == IR_GUARD) { 9418 | jnc &addr 9419 } else { 9420 | jc &addr 9421 } 9422 } 9423 return 0; 9424} 9425 9426static void ir_emit_lea(ir_ctx *ctx, ir_ref def, ir_type type) 9427{ 9428 ir_backend_data *data = ctx->data; 9429 dasm_State **Dst = &data->dasm_state; 9430 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 9431 ir_mem mem = ir_fuse_addr(ctx, def, def); 9432 9433 IR_ASSERT(def_reg != IR_REG_NONE); 9434 if (ir_type_size[type] == 4) { 9435 if (IR_MEM_BASE(mem) == def_reg 9436 && IR_MEM_OFFSET(mem) == 0 9437 && IR_MEM_SCALE(mem) == 1 9438 && IR_MEM_INDEX(mem) != IR_REG_NONE) { 9439 ir_reg reg = IR_MEM_INDEX(mem); 9440 | add Rd(def_reg), Rd(reg) 9441 } else if (IR_MEM_INDEX(mem) == def_reg 9442 && IR_MEM_OFFSET(mem) == 0 9443 && IR_MEM_SCALE(mem) == 1 9444 && IR_MEM_BASE(mem) != IR_REG_NONE) { 9445 ir_reg reg = IR_MEM_BASE(mem); 9446 | add Rd(def_reg), Rd(reg) 9447 } else { 9448 | ASM_TXT_TMEM_OP lea, Rd(def_reg), dword, mem 9449 } 9450 } else { 9451 if (IR_MEM_BASE(mem) == def_reg 9452 && IR_MEM_OFFSET(mem) == 0 9453 && IR_MEM_SCALE(mem) == 1 9454 && IR_MEM_INDEX(mem) != IR_REG_NONE) { 9455 ir_reg reg = IR_MEM_INDEX(mem); 9456 | add Ra(def_reg), Ra(reg) 9457 } else if (IR_MEM_INDEX(mem) == def_reg 9458 && IR_MEM_OFFSET(mem) == 0 9459 && IR_MEM_SCALE(mem) == 1 9460 && IR_MEM_BASE(mem) != IR_REG_NONE) { 9461 ir_reg reg = IR_MEM_BASE(mem); 9462 | add Ra(def_reg), Ra(reg) 9463 } else { 9464 | ASM_TXT_TMEM_OP lea, Ra(def_reg), aword, mem 9465 } 9466 } 9467 if (IR_REG_SPILLED(ctx->regs[def][0])) { 9468 ir_emit_store(ctx, type, def, def_reg); 9469 } 9470} 9471 9472static void ir_emit_tls(ir_ctx *ctx, ir_ref def, ir_insn *insn) 9473{ 9474 ir_backend_data *data = ctx->data; 9475 dasm_State **Dst = &data->dasm_state; 9476 ir_reg reg = IR_REG_NUM(ctx->regs[def][0]); 9477 9478 if (ctx->use_lists[def].count == 1) { 9479 /* dead load */ 9480 return; 9481 } 9482 9483|.if X64WIN 9484| gs 9485| mov Ra(reg), aword [0x58] 9486| mov Ra(reg), aword [Ra(reg)+insn->op2] 9487| mov Ra(reg), aword [Ra(reg)+insn->op3] 9488|.elif WIN 9489| fs 9490| mov Ra(reg), aword [0x2c] 9491| mov Ra(reg), aword [Ra(reg)+insn->op2] 9492| mov Ra(reg), aword [Ra(reg)+insn->op3] 9493|.elif X64APPLE 9494| gs 9495|| if (insn->op3 == IR_NULL) { 9496| mov Ra(reg), aword [insn->op2] 9497|| } else { 9498| mov Ra(reg), aword [insn->op2] 9499| mov Ra(reg), aword [Ra(reg)+insn->op3] 9500|| } 9501|.elif X64 9502| fs 9503|| if (insn->op3 == IR_NULL) { 9504| mov Ra(reg), aword [insn->op2] 9505|| } else { 9506| mov Ra(reg), [0x8] 9507| mov Ra(reg), aword [Ra(reg)+insn->op2] 9508| mov Ra(reg), aword [Ra(reg)+insn->op3] 9509|| } 9510|.else 9511| gs 9512|| if (insn->op3 == IR_NULL) { 9513| mov Ra(reg), aword [insn->op2] 9514|| } else { 9515| mov Ra(reg), [0x4] 9516| mov Ra(reg), aword [Ra(reg)+insn->op2] 9517| mov Ra(reg), aword [Ra(reg)+insn->op3] 9518|| } 9519| .endif 9520 if (IR_REG_SPILLED(ctx->regs[def][0])) { 9521 ir_emit_store(ctx, IR_ADDR, def, reg); 9522 } 9523} 9524 9525static void ir_emit_sse_sqrt(ir_ctx *ctx, ir_ref def, ir_insn *insn) 9526{ 9527 ir_backend_data *data = ctx->data; 9528 dasm_State **Dst = &data->dasm_state; 9529 ir_reg op3_reg = ctx->regs[def][3]; 9530 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 9531 9532 IR_ASSERT(IR_IS_TYPE_FP(insn->type)); 9533 IR_ASSERT(def_reg != IR_REG_NONE && op3_reg != IR_REG_NONE); 9534 9535 if (IR_REG_SPILLED(op3_reg)) { 9536 op3_reg = IR_REG_NUM(op3_reg); 9537 ir_emit_load(ctx, IR_ADDR, op3_reg, insn->op3); 9538 } 9539 9540 | ASM_FP_REG_REG_OP sqrts, insn->type, def_reg, op3_reg 9541 9542 if (IR_REG_SPILLED(ctx->regs[def][0])) { 9543 ir_emit_store(ctx, insn->type, def, def_reg); 9544 } 9545} 9546 9547static void ir_emit_sse_round(ir_ctx *ctx, ir_ref def, ir_insn *insn, int round_op) 9548{ 9549 ir_backend_data *data = ctx->data; 9550 dasm_State **Dst = &data->dasm_state; 9551 ir_reg op3_reg = ctx->regs[def][3]; 9552 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 9553 9554 IR_ASSERT(IR_IS_TYPE_FP(insn->type)); 9555 IR_ASSERT(def_reg != IR_REG_NONE && op3_reg != IR_REG_NONE); 9556 9557 if (IR_REG_SPILLED(op3_reg)) { 9558 op3_reg = IR_REG_NUM(op3_reg); 9559 ir_emit_load(ctx, IR_ADDR, op3_reg, insn->op3); 9560 } 9561 9562 if (ctx->mflags & IR_X86_AVX) { 9563 | ASM_SSE2_REG_REG_REG_TXT_OP vrounds, insn->type, def_reg, def_reg, op3_reg, round_op 9564 } else { 9565 | ASM_SSE2_REG_REG_TXT_OP rounds, insn->type, def_reg, op3_reg, round_op 9566 } 9567 9568 if (IR_REG_SPILLED(ctx->regs[def][0])) { 9569 ir_emit_store(ctx, insn->type, def, def_reg); 9570 } 9571} 9572 9573static void ir_emit_exitcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) 9574{ 9575 ir_backend_data *data = ctx->data; 9576 dasm_State **Dst = &data->dasm_state; 9577 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 9578 9579 IR_ASSERT(def_reg != IR_REG_NONE); 9580 9581 |.if X64 9582 | sub rsp, 16*8+16*8+8 /* CPU regs + SSE regs */ 9583 | mov aword [rsp+0*8], rax 9584 | mov aword [rsp+1*8], rcx 9585 | mov aword [rsp+2*8], rdx 9586 | mov aword [rsp+3*8], rbx 9587 | mov aword [rsp+5*8], rbp 9588 | mov aword [rsp+6*8], rsi 9589 | mov aword [rsp+7*8], rdi 9590 | mov aword [rsp+8*8], r8 9591 | mov aword [rsp+9*8], r9 9592 | mov aword [rsp+10*8], r10 9593 | mov aword [rsp+11*8], r11 9594 | mov aword [rsp+12*8], r12 9595 | mov aword [rsp+13*8], r13 9596 | mov aword [rsp+14*8], r14 9597 | mov aword [rsp+15*8], r15 9598 | movsd qword [rsp+16*8+0*8], xmm0 9599 | movsd qword [rsp+16*8+1*8], xmm1 9600 | movsd qword [rsp+16*8+2*8], xmm2 9601 | movsd qword [rsp+16*8+3*8], xmm3 9602 | movsd qword [rsp+16*8+4*8], xmm4 9603 | movsd qword [rsp+16*8+5*8], xmm5 9604 | movsd qword [rsp+16*8+6*8], xmm6 9605 | movsd qword [rsp+16*8+7*8], xmm7 9606 | movsd qword [rsp+16*8+8*8], xmm8 9607 | movsd qword [rsp+16*8+9*8], xmm9 9608 | movsd qword [rsp+16*8+10*8], xmm10 9609 | movsd qword [rsp+16*8+11*8], xmm11 9610 | movsd qword [rsp+16*8+12*8], xmm12 9611 | movsd qword [rsp+16*8+13*8], xmm13 9612 | movsd qword [rsp+16*8+14*8], xmm14 9613 | movsd qword [rsp+16*8+15*8], xmm15 9614 | 9615 | mov Ra(IR_REG_INT_ARG2), rsp 9616 | lea Ra(IR_REG_INT_ARG1), [rsp+16*8+16*8+16] 9617 | mov aword [rsp+4*8], Ra(IR_REG_INT_ARG1) 9618 | mov Ra(IR_REG_INT_ARG1), [rsp+16*8+16*8+8] 9619 |.if X64WIN 9620 | sub rsp, 32 /* shadow space */ 9621 |.endif 9622 |.else 9623 | sub esp, 8*4+8*8+12 /* CPU regs + SSE regs */ 9624 | mov aword [esp+0*4], eax 9625 | mov aword [esp+1*4], ecx 9626 | mov aword [esp+2*4], edx 9627 | mov aword [esp+3*4], ebx 9628 | mov aword [esp+5*4], ebp 9629 | mov aword [esp+6*4], esi 9630 | mov aword [esp+7*4], edi 9631 | movsd qword [esp+8*4+0*8], xmm0 9632 | movsd qword [esp+8*4+1*8], xmm1 9633 | movsd qword [esp+8*4+2*8], xmm2 9634 | movsd qword [esp+8*4+3*8], xmm3 9635 | movsd qword [esp+8*4+4*8], xmm4 9636 | movsd qword [esp+8*4+5*8], xmm5 9637 | movsd qword [esp+8*4+6*8], xmm6 9638 | movsd qword [esp+8*4+7*8], xmm7 9639 | 9640 | mov Ra(IR_REG_INT_FCARG2), esp 9641 | lea Ra(IR_REG_INT_FCARG1), [esp+8*4+8*8+16] 9642 | mov aword [esp+4*4], Ra(IR_REG_INT_FCARG1) 9643 | mov Ra(IR_REG_INT_FCARG1), [esp+8*4+8*8+12] 9644 |.endif 9645 9646 if (IR_IS_CONST_REF(insn->op2)) { 9647 void *addr = ir_call_addr(ctx, insn, &ctx->ir_base[insn->op2]); 9648 9649 if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(ctx->code_buffer, addr)) { 9650 | call aword &addr 9651 } else { 9652|.if X64 9653 if (IR_IS_SIGNED_32BIT(addr)) { 9654 | mov rax, ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 <imm-32-bit> 9655 } else { 9656 | mov64 rax, ((ptrdiff_t)addr) // 0x48 0xb8 <imm-64-bit> 9657 } 9658 | call rax 9659|.endif 9660 } 9661 } else { 9662 IR_ASSERT(0); 9663 } 9664 9665 // restore SP 9666 |.if X64WIN 9667 | add rsp, 32+16*8+16*8+16 /* shadow space + CPU regs + SSE regs */ 9668 |.elif X64 9669 | add rsp, 16*8+16*8+16 /* CPU regs + SSE regs */ 9670 |.else 9671 | add esp, 8*4+8*8+16 /* CPU regs + SSE regs */ 9672 |.endif 9673 9674 if (def_reg != IR_REG_INT_RET1) { 9675 ir_emit_mov(ctx, insn->type, def_reg, IR_REG_INT_RET1); 9676 } 9677 if (IR_REG_SPILLED(ctx->regs[def][0])) { 9678 ir_emit_store(ctx, insn->type, def, def_reg); 9679 } 9680} 9681 9682static void ir_emit_param_move(ir_ctx *ctx, uint8_t type, ir_reg from_reg, ir_reg to_reg, ir_ref to, int32_t offset) 9683{ 9684 ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 9685 9686 IR_ASSERT(from_reg != IR_REG_NONE || to_reg != IR_REG_NONE); 9687 9688 if (IR_IS_TYPE_INT(type)) { 9689 if (from_reg != IR_REG_NONE) { 9690 if (to_reg != IR_REG_NONE) { 9691 ir_emit_mov(ctx, type, to_reg, from_reg); 9692 } else { 9693 ir_emit_store(ctx, type, to, from_reg); 9694 } 9695 } else { 9696 ir_emit_load_mem_int(ctx, type, to_reg, IR_MEM_BO(fp, offset)); 9697 } 9698 } else { 9699 if (from_reg != IR_REG_NONE) { 9700 if (to_reg != IR_REG_NONE) { 9701 ir_emit_fp_mov(ctx, type, to_reg, from_reg); 9702 } else { 9703 ir_emit_store(ctx, type, to, from_reg); 9704 } 9705 } else { 9706 ir_emit_load_mem_fp(ctx, type, to_reg, IR_MEM_BO(fp, offset)); 9707 } 9708 } 9709} 9710 9711static void ir_emit_load_params(ir_ctx *ctx) 9712{ 9713 ir_use_list *use_list = &ctx->use_lists[1]; 9714 ir_insn *insn; 9715 ir_ref i, n, *p, use; 9716 int int_param_num = 0; 9717 int fp_param_num = 0; 9718 ir_reg src_reg; 9719 ir_reg dst_reg; 9720 // TODO: Calling convention specific 9721 int int_reg_params_count = IR_REG_INT_ARGS; 9722 int fp_reg_params_count = IR_REG_FP_ARGS; 9723 const int8_t *int_reg_params = _ir_int_reg_params; 9724 const int8_t *fp_reg_params = _ir_fp_reg_params; 9725 int32_t stack_offset = 0; 9726 9727#ifdef IR_TARGET_X86 9728 if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC)) { 9729 int_reg_params_count = IR_REG_INT_FCARGS; 9730 fp_reg_params_count = IR_REG_FP_FCARGS; 9731 int_reg_params = _ir_int_fc_reg_params; 9732 fp_reg_params = _ir_fp_fc_reg_params; 9733 } 9734#endif 9735 9736 if (ctx->flags & IR_USE_FRAME_POINTER) { 9737 stack_offset = sizeof(void*) * 2; /* skip old frame pointer and return address */ 9738 } else { 9739 stack_offset = sizeof(void*) + ctx->stack_frame_size + ctx->call_stack_size; /* skip return address */ 9740 } 9741 n = use_list->count; 9742 for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { 9743 use = *p; 9744 insn = &ctx->ir_base[use]; 9745 if (insn->op == IR_PARAM) { 9746 if (IR_IS_TYPE_INT(insn->type)) { 9747 if (int_param_num < int_reg_params_count) { 9748 src_reg = int_reg_params[int_param_num]; 9749 } else { 9750 src_reg = IR_REG_NONE; 9751 } 9752 int_param_num++; 9753#ifdef _WIN64 9754 /* WIN64 calling convention use common couter for int and fp registers */ 9755 fp_param_num++; 9756#endif 9757 } else { 9758 if (fp_param_num < fp_reg_params_count) { 9759 src_reg = fp_reg_params[fp_param_num]; 9760 } else { 9761 src_reg = IR_REG_NONE; 9762 } 9763 fp_param_num++; 9764#ifdef _WIN64 9765 /* WIN64 calling convention use common couter for int and fp registers */ 9766 int_param_num++; 9767#endif 9768 } 9769 if (ctx->vregs[use]) { 9770 dst_reg = IR_REG_NUM(ctx->regs[use][0]); 9771 IR_ASSERT(src_reg != IR_REG_NONE || dst_reg != IR_REG_NONE || 9772 stack_offset == ctx->live_intervals[ctx->vregs[use]]->stack_spill_pos + 9773 ((ctx->flags & IR_USE_FRAME_POINTER) ? 9774 -(ctx->stack_frame_size - ctx->stack_frame_alignment) : 9775 ctx->call_stack_size)); 9776 if (src_reg != dst_reg) { 9777 ir_emit_param_move(ctx, insn->type, src_reg, dst_reg, use, stack_offset); 9778 } 9779 if (dst_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[use][0])) { 9780 ir_emit_store(ctx, insn->type, use, dst_reg); 9781 } 9782 } 9783 if (src_reg == IR_REG_NONE) { 9784 if (sizeof(void*) == 8) { 9785 stack_offset += sizeof(void*); 9786 } else { 9787 stack_offset += IR_MAX(sizeof(void*), ir_type_size[insn->type]); 9788 } 9789 } 9790 } 9791 } 9792} 9793 9794static ir_reg ir_get_free_reg(ir_type type, ir_regset available) 9795{ 9796 if (IR_IS_TYPE_INT(type)) { 9797 available = IR_REGSET_INTERSECTION(available, IR_REGSET_GP); 9798 } else { 9799 IR_ASSERT(IR_IS_TYPE_FP(type)); 9800 available = IR_REGSET_INTERSECTION(available, IR_REGSET_FP); 9801 } 9802 IR_ASSERT(!IR_REGSET_IS_EMPTY(available)); 9803 return IR_REGSET_FIRST(available); 9804} 9805 9806static int ir_fix_dessa_tmps(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to) 9807{ 9808 ir_backend_data *data = ctx->data; 9809 ir_ref ref = ctx->cfg_blocks[data->dessa_from_block].end; 9810 9811 if (to == 0) { 9812 if (IR_IS_TYPE_INT(type)) { 9813 if (ctx->regs[ref][0] == IR_REG_NONE) { 9814 ctx->regs[ref][0] = IR_REG_RAX; 9815 } 9816 } else { 9817 IR_ASSERT(IR_IS_TYPE_FP(type)); 9818 if (ctx->regs[ref][1] == IR_REG_NONE) { 9819 ctx->regs[ref][1] = IR_REG_XMM0; 9820 } 9821 } 9822 } else if (from != 0) { 9823 if (IR_IS_TYPE_INT(type)) { 9824 if (ctx->regs[ref][0] == IR_REG_NONE) { 9825 ctx->regs[ref][0] = IR_REG_RAX; 9826 } 9827 } else { 9828 IR_ASSERT(IR_IS_TYPE_FP(type)); 9829 if (ctx->regs[ref][1] == IR_REG_NONE) { 9830 ctx->regs[ref][1] = IR_REG_XMM0; 9831 } 9832 } 9833 } 9834 return 1; 9835} 9836 9837static void ir_fix_param_spills(ir_ctx *ctx) 9838{ 9839 ir_use_list *use_list = &ctx->use_lists[1]; 9840 ir_insn *insn; 9841 ir_ref i, n, *p, use; 9842 int int_param_num = 0; 9843 int fp_param_num = 0; 9844 ir_reg src_reg; 9845 // TODO: Calling convention specific 9846 int int_reg_params_count = IR_REG_INT_ARGS; 9847 int fp_reg_params_count = IR_REG_FP_ARGS; 9848 const int8_t *int_reg_params = _ir_int_reg_params; 9849 const int8_t *fp_reg_params = _ir_fp_reg_params; 9850 int32_t stack_start = 0; 9851 int32_t stack_offset = 0; 9852 9853#ifdef IR_TARGET_X86 9854 if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC)) { 9855 int_reg_params_count = IR_REG_INT_FCARGS; 9856 fp_reg_params_count = IR_REG_FP_FCARGS; 9857 int_reg_params = _ir_int_fc_reg_params; 9858 fp_reg_params = _ir_fp_fc_reg_params; 9859 } 9860#endif 9861 9862 if (ctx->flags & IR_USE_FRAME_POINTER) { 9863 /* skip old frame pointer and return address */ 9864 stack_start = sizeof(void*) * 2 + (ctx->stack_frame_size - ctx->stack_frame_alignment); 9865 } else { 9866 /* skip return address */ 9867 stack_start = sizeof(void*) + ctx->stack_frame_size; 9868 } 9869 n = use_list->count; 9870 for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { 9871 use = *p; 9872 insn = &ctx->ir_base[use]; 9873 if (insn->op == IR_PARAM) { 9874 if (IR_IS_TYPE_INT(insn->type)) { 9875 if (int_param_num < int_reg_params_count) { 9876 src_reg = int_reg_params[int_param_num]; 9877 } else { 9878 src_reg = IR_REG_NONE; 9879 } 9880 int_param_num++; 9881#ifdef _WIN64 9882 /* WIN64 calling convention use common couter for int and fp registers */ 9883 fp_param_num++; 9884#endif 9885 } else { 9886 if (fp_param_num < fp_reg_params_count) { 9887 src_reg = fp_reg_params[fp_param_num]; 9888 } else { 9889 src_reg = IR_REG_NONE; 9890 } 9891 fp_param_num++; 9892#ifdef _WIN64 9893 /* WIN64 calling convention use common couter for int and fp registers */ 9894 int_param_num++; 9895#endif 9896 } 9897 if (src_reg == IR_REG_NONE) { 9898 if (ctx->vregs[use]) { 9899 ir_live_interval *ival = ctx->live_intervals[ctx->vregs[use]]; 9900 if ((ival->flags & IR_LIVE_INTERVAL_MEM_PARAM) 9901 && ival->stack_spill_pos == -1 9902 && (ival->next || ival->reg == IR_REG_NONE)) { 9903 ival->stack_spill_pos = stack_start + stack_offset; 9904 } 9905 } 9906 if (sizeof(void*) == 8) { 9907 stack_offset += sizeof(void*); 9908 } else { 9909 stack_offset += IR_MAX(sizeof(void*), ir_type_size[insn->type]); 9910 } 9911 } 9912 } 9913 } 9914 9915#ifdef _WIN64 9916 /* WIN64 uses shsow area for registers */ 9917 stack_offset += IR_MIN(int_param_num, int_reg_params_count) * sizeof(void*); 9918#endif 9919 ctx->gp_reg_params = IR_MIN(int_param_num, int_reg_params_count); 9920 ctx->fp_reg_params = IR_MIN(fp_param_num, fp_reg_params_count); 9921 ctx->param_stack_size = stack_offset; 9922} 9923 9924static void ir_allocate_unique_spill_slots(ir_ctx *ctx) 9925{ 9926 uint32_t b; 9927 ir_block *bb; 9928 ir_insn *insn; 9929 ir_ref i, n, j, *p; 9930 uint32_t *rule, insn_flags; 9931 ir_backend_data *data = ctx->data; 9932 ir_regset available = 0; 9933 ir_target_constraints constraints; 9934 uint32_t def_flags; 9935 ir_reg reg; 9936 9937#ifndef IR_REG_FP_RET1 9938 if (ctx->flags2 & IR_HAS_FP_RET_SLOT) { 9939 ctx->ret_slot = ir_allocate_spill_slot(ctx, IR_DOUBLE, &data->ra_data); 9940 } else if (ctx->ret_type == IR_FLOAT || ctx->ret_type == IR_DOUBLE) { 9941 ctx->ret_slot = ir_allocate_spill_slot(ctx, ctx->ret_type, &data->ra_data); 9942 } else { 9943 ctx->ret_slot = -1; 9944 } 9945#endif 9946 9947 ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count); 9948 memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count); 9949 9950 /* vregs + tmp + fixed + SRATCH + ALL */ 9951 ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 2, sizeof(ir_live_interval*)); 9952 9953 if (!ctx->arena) { 9954 ctx->arena = ir_arena_create(16 * 1024); 9955 } 9956 9957 for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { 9958 IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); 9959 for (i = bb->start, insn = ctx->ir_base + i, rule = ctx->rules + i; i <= bb->end;) { 9960 switch (ctx->rules ? *rule : insn->op) { 9961 case IR_START: 9962 case IR_BEGIN: 9963 case IR_END: 9964 case IR_IF_TRUE: 9965 case IR_IF_FALSE: 9966 case IR_CASE_VAL: 9967 case IR_CASE_DEFAULT: 9968 case IR_MERGE: 9969 case IR_LOOP_BEGIN: 9970 case IR_LOOP_END: 9971 break; 9972#ifndef IR_REG_FP_RET1 9973 case IR_CALL: 9974 if (ctx->ret_slot == -1 && (insn->type == IR_FLOAT || insn->type == IR_DOUBLE)) { 9975 ctx->ret_slot = ir_allocate_spill_slot(ctx, IR_DOUBLE, &data->ra_data); 9976 } 9977#endif 9978 IR_FALLTHROUGH; 9979 default: 9980 def_flags = ir_get_target_constraints(ctx, i, &constraints); 9981 if (ctx->rules 9982 && *rule != IR_CMP_AND_BRANCH_INT 9983 && *rule != IR_CMP_AND_BRANCH_FP 9984 && *rule != IR_TEST_AND_BRANCH_INT 9985 && *rule != IR_GUARD_CMP_INT 9986 && *rule != IR_GUARD_CMP_FP) { 9987 available = IR_REGSET_SCRATCH; 9988 } 9989 if (ctx->vregs[i]) { 9990 reg = constraints.def_reg; 9991 if (reg != IR_REG_NONE && IR_REGSET_IN(available, reg)) { 9992 IR_REGSET_EXCL(available, reg); 9993 ctx->regs[i][0] = reg | IR_REG_SPILL_STORE; 9994 } else if (def_flags & IR_USE_MUST_BE_IN_REG) { 9995 if (insn->op == IR_VLOAD 9996 && ctx->live_intervals[ctx->vregs[i]] 9997 && ctx->live_intervals[ctx->vregs[i]]->stack_spill_pos != -1) { 9998 /* pass */ 9999 } else if (insn->op != IR_PARAM) { 10000 reg = ir_get_free_reg(insn->type, available); 10001 IR_REGSET_EXCL(available, reg); 10002 ctx->regs[i][0] = reg | IR_REG_SPILL_STORE; 10003 } 10004 } 10005 if (!ctx->live_intervals[ctx->vregs[i]]) { 10006 ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); 10007 memset(ival, 0, sizeof(ir_live_interval)); 10008 ctx->live_intervals[ctx->vregs[i]] = ival; 10009 ival->type = insn->type; 10010 ival->reg = IR_REG_NONE; 10011 ival->vreg = ctx->vregs[i]; 10012 ival->stack_spill_pos = -1; 10013 if (insn->op == IR_PARAM && reg == IR_REG_NONE) { 10014 ival->flags |= IR_LIVE_INTERVAL_MEM_PARAM; 10015 } else { 10016 ival->stack_spill_pos = ir_allocate_spill_slot(ctx, ival->type, &data->ra_data); 10017 } 10018 } else if (insn->op == IR_PARAM) { 10019 IR_ASSERT(0 && "unexpected PARAM"); 10020 return; 10021 } 10022 } else if (insn->op == IR_VAR) { 10023 ir_use_list *use_list = &ctx->use_lists[i]; 10024 ir_ref n = use_list->count; 10025 10026 if (n > 0) { 10027 int32_t stack_spill_pos = insn->op3 = ir_allocate_spill_slot(ctx, insn->type, &data->ra_data); 10028 ir_ref i, *p, use; 10029 ir_insn *use_insn; 10030 10031 for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { 10032 use = *p; 10033 use_insn = &ctx->ir_base[use]; 10034 if (use_insn->op == IR_VLOAD) { 10035 if (ctx->vregs[use] 10036 && !ctx->live_intervals[ctx->vregs[use]]) { 10037 ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); 10038 memset(ival, 0, sizeof(ir_live_interval)); 10039 ctx->live_intervals[ctx->vregs[use]] = ival; 10040 ival->type = insn->type; 10041 ival->reg = IR_REG_NONE; 10042 ival->vreg = ctx->vregs[use]; 10043 ival->stack_spill_pos = stack_spill_pos; 10044 } 10045 } else if (use_insn->op == IR_VSTORE) { 10046 if (!IR_IS_CONST_REF(use_insn->op3) 10047 && ctx->vregs[use_insn->op3] 10048 && !ctx->live_intervals[ctx->vregs[use_insn->op3]]) { 10049 ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); 10050 memset(ival, 0, sizeof(ir_live_interval)); 10051 ctx->live_intervals[ctx->vregs[use_insn->op3]] = ival; 10052 ival->type = insn->type; 10053 ival->reg = IR_REG_NONE; 10054 ival->vreg = ctx->vregs[use_insn->op3]; 10055 ival->stack_spill_pos = stack_spill_pos; 10056 } 10057 } 10058 } 10059 } 10060 } 10061 10062 insn_flags = ir_op_flags[insn->op]; 10063 n = constraints.tmps_count; 10064 if (n) { 10065 do { 10066 n--; 10067 if (constraints.tmp_regs[n].type) { 10068 ir_reg reg = ir_get_free_reg(constraints.tmp_regs[n].type, available); 10069 ir_ref *ops = insn->ops; 10070 IR_REGSET_EXCL(available, reg); 10071 if (constraints.tmp_regs[n].num > 0 10072 && IR_IS_CONST_REF(ops[constraints.tmp_regs[n].num])) { 10073 /* rematerialization */ 10074 reg |= IR_REG_SPILL_LOAD; 10075 } 10076 ctx->regs[i][constraints.tmp_regs[n].num] = reg; 10077 } else if (constraints.tmp_regs[n].reg == IR_REG_SCRATCH) { 10078 available = IR_REGSET_DIFFERENCE(available, IR_REGSET_SCRATCH); 10079 } else { 10080 IR_REGSET_EXCL(available, constraints.tmp_regs[n].reg); 10081 } 10082 } while (n); 10083 } 10084 n = insn->inputs_count; 10085 for (j = 1, p = insn->ops + 1; j <= n; j++, p++) { 10086 ir_ref input = *p; 10087 if (IR_OPND_KIND(insn_flags, j) == IR_OPND_DATA && input > 0 && ctx->vregs[input]) { 10088 if ((def_flags & IR_DEF_REUSES_OP1_REG) && j == 1) { 10089 ir_reg reg = IR_REG_NUM(ctx->regs[i][0]); 10090 ctx->regs[i][1] = reg | IR_REG_SPILL_LOAD; 10091 } else { 10092 uint8_t use_flags = IR_USE_FLAGS(def_flags, j); 10093 ir_reg reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE; 10094 10095 if (reg != IR_REG_NONE && IR_REGSET_IN(available, reg)) { 10096 IR_REGSET_EXCL(available, reg); 10097 ctx->regs[i][j] = reg | IR_REG_SPILL_LOAD; 10098 } else if (j > 1 && input == insn->op1 && ctx->regs[i][1] != IR_REG_NONE) { 10099 ctx->regs[i][j] = ctx->regs[i][1]; 10100 } else if (use_flags & IR_USE_MUST_BE_IN_REG) { 10101 reg = ir_get_free_reg(ctx->ir_base[input].type, available); 10102 IR_REGSET_EXCL(available, reg); 10103 ctx->regs[i][j] = reg | IR_REG_SPILL_LOAD; 10104 } 10105 } 10106 } 10107 } 10108 break; 10109 } 10110 n = ir_insn_len(insn); 10111 i += n; 10112 insn += n; 10113 rule += n; 10114 } 10115 if (bb->flags & IR_BB_DESSA_MOVES) { 10116 data->dessa_from_block = b; 10117 ir_gen_dessa_moves(ctx, b, ir_fix_dessa_tmps); 10118 } 10119 } 10120 10121 ctx->used_preserved_regs = ctx->fixed_save_regset; 10122 ctx->flags |= IR_NO_STACK_COMBINE; 10123 ir_fix_stack_frame(ctx); 10124} 10125 10126static void ir_preallocate_call_stack(ir_ctx *ctx) 10127{ 10128 int call_stack_size, peak_call_stack_size = 0; 10129 ir_ref i, n; 10130 ir_insn *insn; 10131 10132 for (i = 1, insn = ctx->ir_base + 1; i < ctx->insns_count;) { 10133 if (insn->op == IR_CALL) { 10134 call_stack_size = ir_call_used_stack(ctx, insn); 10135 if (call_stack_size > peak_call_stack_size 10136#ifdef IR_HAVE_FASTCALL 10137 && !ir_is_fastcall(ctx, insn) /* fast call functions restore stack pointer */ 10138#endif 10139 ) { 10140 peak_call_stack_size = call_stack_size; 10141 } 10142 } 10143 n = ir_insn_len(insn); 10144 i += n; 10145 insn += n; 10146 } 10147 if (peak_call_stack_size) { 10148 ctx->call_stack_size = peak_call_stack_size; 10149 ctx->flags |= IR_PREALLOCATED_STACK; 10150 } 10151} 10152 10153void ir_fix_stack_frame(ir_ctx *ctx) 10154{ 10155 uint32_t additional_size = 0; 10156 10157 ctx->locals_area_size = ctx->stack_frame_size; 10158 10159#if defined(IR_TARGET_X64) && !defined(_WIN64) 10160 if ((ctx->flags & IR_VARARG_FUNC) && (ctx->flags2 & IR_HAS_VA_START)) { 10161 ctx->flags2 |= IR_16B_FRAME_ALIGNMENT; 10162 ctx->stack_frame_size = IR_ALIGNED_SIZE(ctx->stack_frame_size, 16); 10163 ctx->locals_area_size = ctx->stack_frame_size; 10164 if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) { 10165 additional_size += sizeof(void*) * IR_REG_INT_ARGS; 10166 } 10167 if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) { 10168 additional_size += 16 * IR_REG_FP_ARGS; 10169 } 10170 } 10171#endif 10172 10173 if (ctx->used_preserved_regs) { 10174 ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs; 10175 ir_reg reg; 10176 (void) reg; 10177 10178 IR_REGSET_FOREACH(used_preserved_regs, reg) { 10179 additional_size += sizeof(void*); 10180 } IR_REGSET_FOREACH_END(); 10181 } 10182 10183 ctx->stack_frame_size = IR_ALIGNED_SIZE(ctx->stack_frame_size, sizeof(void*)); 10184 ctx->stack_frame_size += additional_size; 10185 ctx->stack_frame_alignment = 0; 10186 ctx->call_stack_size = 0; 10187 10188 if (ctx->flags2 & IR_16B_FRAME_ALIGNMENT) { 10189 /* Stack must be 16 byte aligned */ 10190 if (!(ctx->flags & IR_FUNCTION)) { 10191 while (IR_ALIGNED_SIZE(ctx->stack_frame_size, 16) != ctx->stack_frame_size) { 10192 ctx->stack_frame_size += sizeof(void*); 10193 ctx->stack_frame_alignment += sizeof(void*); 10194 } 10195 } else if (ctx->flags & IR_USE_FRAME_POINTER) { 10196 while (IR_ALIGNED_SIZE(ctx->stack_frame_size + sizeof(void*) * 2, 16) != ctx->stack_frame_size + sizeof(void*) * 2) { 10197 ctx->stack_frame_size += sizeof(void*); 10198 ctx->stack_frame_alignment += sizeof(void*); 10199 } 10200 } else { 10201 if (!(ctx->flags & IR_NO_STACK_COMBINE)) { 10202 ir_preallocate_call_stack(ctx); 10203 } 10204 while (IR_ALIGNED_SIZE(ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*), 16) != 10205 ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*)) { 10206 ctx->stack_frame_size += sizeof(void*); 10207 ctx->stack_frame_alignment += sizeof(void*); 10208 } 10209 } 10210 } 10211 10212 ir_fix_param_spills(ctx); 10213} 10214 10215static void* dasm_labels[ir_lb_MAX]; 10216 10217static uint32_t _ir_next_block(ir_ctx *ctx, uint32_t _b) 10218{ 10219 uint32_t b = ctx->cfg_schedule[++_b]; 10220 10221 /* Check for empty ENTRY block */ 10222 while (b && ((ctx->cfg_blocks[b].flags & (IR_BB_START|IR_BB_EMPTY)) == IR_BB_EMPTY)) { 10223 b = ctx->cfg_schedule[++_b]; 10224 } 10225 return b; 10226} 10227 10228void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) 10229{ 10230 uint32_t _b, b, n, target; 10231 ir_block *bb; 10232 ir_ref i; 10233 ir_insn *insn; 10234 uint32_t *rule; 10235 ir_backend_data data; 10236 dasm_State **Dst; 10237 int ret; 10238 void *entry; 10239 size_t size; 10240 10241 data.ra_data.unused_slot_4 = 0; 10242 data.ra_data.unused_slot_2 = 0; 10243 data.ra_data.unused_slot_1 = 0; 10244 data.ra_data.handled = NULL; 10245 data.rodata_label = 0; 10246 data.jmp_table_label = 0; 10247 data.double_neg_const = 0; 10248 data.float_neg_const = 0; 10249 data.double_abs_const = 0; 10250 data.float_abs_const = 0; 10251 data.double_zero_const = 0; 10252 ctx->data = &data; 10253 10254 if (!ctx->live_intervals) { 10255 ctx->stack_frame_size = 0; 10256 ctx->stack_frame_alignment = 0; 10257 ctx->call_stack_size = 0; 10258 ctx->used_preserved_regs = 0; 10259 ir_allocate_unique_spill_slots(ctx); 10260 } 10261 10262 if (ctx->fixed_stack_frame_size != -1) { 10263 if (ctx->fixed_stack_red_zone) { 10264 IR_ASSERT(ctx->fixed_stack_red_zone == ctx->fixed_stack_frame_size + ctx->fixed_call_stack_size); 10265 } 10266 if (ctx->stack_frame_size > ctx->fixed_stack_frame_size) { 10267 // TODO: report error to caller 10268#ifdef IR_DEBUG_MESSAGES 10269 fprintf(stderr, "IR Compilation Aborted: ctx->stack_frame_size > ctx->fixed_stack_frame_size at %s:%d\n", 10270 __FILE__, __LINE__); 10271#endif 10272 ctx->data = NULL; 10273 ctx->status = IR_ERROR_FIXED_STACK_FRAME_OVERFLOW; 10274 return NULL; 10275 } 10276 ctx->stack_frame_size = ctx->fixed_stack_frame_size; 10277 ctx->call_stack_size = ctx->fixed_call_stack_size; 10278 ctx->stack_frame_alignment = 0; 10279 } 10280 10281 Dst = &data.dasm_state; 10282 data.dasm_state = NULL; 10283 dasm_init(&data.dasm_state, DASM_MAXSECTION); 10284 dasm_setupglobal(&data.dasm_state, dasm_labels, ir_lb_MAX); 10285 dasm_setup(&data.dasm_state, dasm_actions); 10286 /* labels for each block + for each constant + rodata label + jmp_table label + for each entry */ 10287 dasm_growpc(&data.dasm_state, ctx->cfg_blocks_count + 1 + ctx->consts_count + 1 + 1 + 1 + ctx->entries_count); 10288 data.emit_constants = ir_bitset_malloc(ctx->consts_count); 10289 10290 if ((ctx->flags & IR_GEN_ENDBR) && (ctx->flags & IR_START_BR_TARGET)) { 10291 |.if X64 10292 | endbr64 10293 |.else 10294 | endbr32 10295 |.endif 10296 } 10297 10298 if (!(ctx->flags & IR_SKIP_PROLOGUE)) { 10299 ir_emit_prologue(ctx); 10300 } 10301 if (ctx->flags & IR_FUNCTION) { 10302 ir_emit_load_params(ctx); 10303 } 10304 10305 if (UNEXPECTED(!ctx->cfg_schedule)) { 10306 uint32_t *list = ctx->cfg_schedule = ir_mem_malloc(sizeof(uint32_t) * (ctx->cfg_blocks_count + 2)); 10307 for (b = 0; b <= ctx->cfg_blocks_count; b++) { 10308 list[b] = b; 10309 } 10310 list[ctx->cfg_blocks_count + 1] = 0; 10311 } 10312 10313 for (_b = 1; _b <= ctx->cfg_blocks_count; _b++) { 10314 b = ctx->cfg_schedule[_b]; 10315 bb = &ctx->cfg_blocks[b]; 10316 IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); 10317 if ((bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) == IR_BB_EMPTY) { 10318 continue; 10319 } 10320 if (bb->flags & IR_BB_ALIGN_LOOP) { 10321 | .align IR_LOOP_ALIGNMENT 10322 } 10323 |=>b: 10324 10325 i = bb->start; 10326 insn = ctx->ir_base + i; 10327 if (bb->flags & IR_BB_ENTRY) { 10328 uint32_t label = ctx->cfg_blocks_count + ctx->consts_count + 4 + insn->op3; 10329 10330 |=>label: 10331 if ((ctx->flags & IR_GEN_ENDBR) && (ctx->flags & IR_ENTRY_BR_TARGET)) { 10332 |.if X64 10333 | endbr64 10334 |.else 10335 | endbr32 10336 |.endif 10337 } 10338 ir_emit_prologue(ctx); 10339 ctx->entries[insn->op3] = i; 10340 } 10341 10342 /* skip first instruction */ 10343 n = ir_insn_len(insn); 10344 i += n; 10345 insn += n; 10346 rule = ctx->rules + i; 10347 10348 while (i <= bb->end) { 10349 if (!((*rule) & (IR_FUSED|IR_SKIPPED))) 10350 switch ((*rule) & IR_RULE_MASK) { 10351 case IR_VAR: 10352 case IR_PARAM: 10353 case IR_PI: 10354 case IR_PHI: 10355 case IR_SNAPSHOT: 10356 case IR_VA_END: 10357 break; 10358 case IR_LEA_OB: 10359 case IR_LEA_SI: 10360 case IR_LEA_SIB: 10361 case IR_LEA_IB: 10362 case IR_LEA_OB_I: 10363 case IR_LEA_I_OB: 10364 case IR_LEA_SI_O: 10365 case IR_LEA_SIB_O: 10366 case IR_LEA_IB_O: 10367 case IR_LEA_OB_SI: 10368 case IR_LEA_SI_OB: 10369 case IR_LEA_B_SI: 10370 case IR_LEA_SI_B: 10371 ir_emit_lea(ctx, i, insn->type); 10372 break; 10373 case IR_MUL_PWR2: 10374 case IR_DIV_PWR2: 10375 case IR_MOD_PWR2: 10376 ir_emit_mul_div_mod_pwr2(ctx, i, insn); 10377 break; 10378 case IR_SDIV_PWR2: 10379 ir_emit_sdiv_pwr2(ctx, i, insn); 10380 break; 10381 case IR_SMOD_PWR2: 10382 ir_emit_smod_pwr2(ctx, i, insn); 10383 break; 10384 case IR_SHIFT: 10385 ir_emit_shift(ctx, i, insn); 10386 break; 10387 case IR_SHIFT_CONST: 10388 ir_emit_shift_const(ctx, i, insn); 10389 break; 10390 case IR_BIT_COUNT: 10391 ir_emit_bit_count(ctx, i, insn); 10392 break; 10393 case IR_CTPOP: 10394 ir_emit_ctpop(ctx, i, insn); 10395 break; 10396 case IR_INC: 10397 case IR_DEC: 10398 case IR_OP_INT: 10399 ir_emit_op_int(ctx, i, insn, *rule); 10400 break; 10401 case IR_ABS_INT: 10402 ir_emit_abs_int(ctx, i, insn); 10403 break; 10404 case IR_BOOL_NOT_INT: 10405 ir_emit_bool_not_int(ctx, i, insn); 10406 break; 10407 case IR_OP_FP: 10408 ir_emit_op_fp(ctx, i, insn); 10409 break; 10410 case IR_IMUL3: 10411 ir_emit_imul3(ctx, i, insn); 10412 break; 10413 case IR_BINOP_INT: 10414 ir_emit_binop_int(ctx, i, insn); 10415 break; 10416 case IR_BINOP_SSE2: 10417 ir_emit_binop_sse2(ctx, i, insn); 10418 break; 10419 case IR_BINOP_AVX: 10420 ir_emit_binop_avx(ctx, i, insn); 10421 break; 10422 case IR_MUL_INT: 10423 case IR_DIV_INT: 10424 case IR_MOD_INT: 10425 ir_emit_mul_div_mod(ctx, i, insn); 10426 break; 10427 case IR_CMP_INT: 10428 ir_emit_cmp_int(ctx, i, insn); 10429 break; 10430 case IR_TESTCC_INT: 10431 ir_emit_testcc_int(ctx, i, insn); 10432 break; 10433 case IR_SETCC_INT: 10434 ir_emit_setcc_int(ctx, i, insn); 10435 break; 10436 case IR_CMP_FP: 10437 ir_emit_cmp_fp(ctx, i, insn); 10438 break; 10439 case IR_SEXT: 10440 ir_emit_sext(ctx, i, insn); 10441 break; 10442 case IR_ZEXT: 10443 ir_emit_zext(ctx, i, insn); 10444 break; 10445 case IR_TRUNC: 10446 ir_emit_trunc(ctx, i, insn); 10447 break; 10448 case IR_BITCAST: 10449 case IR_PROTO: 10450 ir_emit_bitcast(ctx, i, insn); 10451 break; 10452 case IR_INT2FP: 10453 ir_emit_int2fp(ctx, i, insn); 10454 break; 10455 case IR_FP2INT: 10456 ir_emit_fp2int(ctx, i, insn); 10457 break; 10458 case IR_FP2FP: 10459 ir_emit_fp2fp(ctx, i, insn); 10460 break; 10461 case IR_COPY_INT: 10462 ir_emit_copy_int(ctx, i, insn); 10463 break; 10464 case IR_COPY_FP: 10465 ir_emit_copy_fp(ctx, i, insn); 10466 break; 10467 case IR_CMP_AND_STORE_INT: 10468 ir_emit_cmp_and_store_int(ctx, i, insn); 10469 break; 10470 case IR_CMP_AND_BRANCH_INT: 10471 ir_emit_cmp_and_branch_int(ctx, b, i, insn, _ir_next_block(ctx, _b)); 10472 break; 10473 case IR_CMP_AND_BRANCH_FP: 10474 ir_emit_cmp_and_branch_fp(ctx, b, i, insn, _ir_next_block(ctx, _b)); 10475 break; 10476 case IR_TEST_AND_BRANCH_INT: 10477 ir_emit_test_and_branch_int(ctx, b, i, insn, _ir_next_block(ctx, _b)); 10478 break; 10479 case IR_JCC_INT: 10480 { 10481 ir_op op = ctx->ir_base[insn->op2].op; 10482 10483 if (op == IR_ADD || 10484 op == IR_SUB || 10485// op == IR_MUL || 10486 op == IR_OR || 10487 op == IR_AND || 10488 op == IR_XOR) { 10489 op = IR_NE; 10490 } else { 10491 IR_ASSERT(op >= IR_EQ && op <= IR_UGT); 10492 } 10493 ir_emit_jcc(ctx, b, i, insn, _ir_next_block(ctx, _b), op, 1); 10494 } 10495 break; 10496 case IR_GUARD_CMP_INT: 10497 if (ir_emit_guard_cmp_int(ctx, b, i, insn, _ir_next_block(ctx, _b))) { 10498 goto next_block; 10499 } 10500 break; 10501 case IR_GUARD_CMP_FP: 10502 if (ir_emit_guard_cmp_fp(ctx, b, i, insn, _ir_next_block(ctx, _b))) { 10503 goto next_block; 10504 } 10505 break; 10506 case IR_GUARD_TEST_INT: 10507 if (ir_emit_guard_test_int(ctx, b, i, insn, _ir_next_block(ctx, _b))) { 10508 goto next_block; 10509 } 10510 break; 10511 case IR_GUARD_JCC_INT: 10512 if (ir_emit_guard_jcc_int(ctx, b, i, insn, _ir_next_block(ctx, _b))) { 10513 goto next_block; 10514 } 10515 break; 10516 case IR_IF_INT: 10517 ir_emit_if_int(ctx, b, i, insn, _ir_next_block(ctx, _b)); 10518 break; 10519 case IR_COND: 10520 ir_emit_cond(ctx, i, insn); 10521 break; 10522 case IR_COND_CMP_INT: 10523 ir_emit_cond_cmp_int(ctx, i, insn); 10524 break; 10525 case IR_COND_CMP_FP: 10526 ir_emit_cond_cmp_fp(ctx, i, insn); 10527 break; 10528 case IR_SWITCH: 10529 ir_emit_switch(ctx, b, i, insn); 10530 break; 10531 case IR_MIN_MAX_INT: 10532 ir_emit_min_max_int(ctx, i, insn); 10533 break; 10534 case IR_OVERFLOW: 10535 ir_emit_overflow(ctx, i, insn); 10536 break; 10537 case IR_OVERFLOW_AND_BRANCH: 10538 ir_emit_overflow_and_branch(ctx, b, i, insn, _ir_next_block(ctx, _b)); 10539 break; 10540 case IR_END: 10541 case IR_LOOP_END: 10542 if (bb->flags & IR_BB_OSR_ENTRY_LOADS) { 10543 ir_emit_osr_entry_loads(ctx, b, bb); 10544 } 10545 if (bb->flags & IR_BB_DESSA_MOVES) { 10546 ir_emit_dessa_moves(ctx, b, bb); 10547 } 10548 do { 10549 ir_ref succ = ctx->cfg_edges[bb->successors]; 10550 10551 if (UNEXPECTED(bb->successors_count == 2)) { 10552 if (ctx->cfg_blocks[succ].flags & IR_BB_ENTRY) { 10553 succ = ctx->cfg_edges[bb->successors + 1]; 10554 } else { 10555 IR_ASSERT(ctx->cfg_blocks[ctx->cfg_edges[bb->successors + 1]].flags & IR_BB_ENTRY); 10556 } 10557 } else { 10558 IR_ASSERT(bb->successors_count == 1); 10559 } 10560 target = ir_skip_empty_target_blocks(ctx, succ); 10561 if (target != _ir_next_block(ctx, _b)) { 10562 | jmp =>target 10563 } 10564 } while (0); 10565 break; 10566 case IR_RETURN_VOID: 10567 ir_emit_return_void(ctx); 10568 break; 10569 case IR_RETURN_INT: 10570 ir_emit_return_int(ctx, i, insn); 10571 break; 10572 case IR_RETURN_FP: 10573 ir_emit_return_fp(ctx, i, insn); 10574 break; 10575 case IR_CALL: 10576 ir_emit_call(ctx, i, insn); 10577 break; 10578 case IR_TAILCALL: 10579 ir_emit_tailcall(ctx, i, insn); 10580 break; 10581 case IR_IJMP: 10582 ir_emit_ijmp(ctx, i, insn); 10583 break; 10584 case IR_MEM_OP_INT: 10585 case IR_MEM_INC: 10586 case IR_MEM_DEC: 10587 ir_emit_mem_op_int(ctx, i, insn, *rule); 10588 break; 10589 case IR_MEM_BINOP_INT: 10590 ir_emit_mem_binop_int(ctx, i, insn); 10591 break; 10592 case IR_MEM_MUL_PWR2: 10593 case IR_MEM_DIV_PWR2: 10594 case IR_MEM_MOD_PWR2: 10595 ir_emit_mem_mul_div_mod_pwr2(ctx, i, insn); 10596 break; 10597 case IR_MEM_SHIFT: 10598 ir_emit_mem_shift(ctx, i, insn); 10599 break; 10600 case IR_MEM_SHIFT_CONST: 10601 ir_emit_mem_shift_const(ctx, i, insn); 10602 break; 10603 case IR_REG_BINOP_INT: 10604 ir_emit_reg_binop_int(ctx, i, insn); 10605 break; 10606 case IR_VADDR: 10607 ir_emit_vaddr(ctx, i, insn); 10608 break; 10609 case IR_VLOAD: 10610 ir_emit_vload(ctx, i, insn); 10611 break; 10612 case IR_VSTORE_INT: 10613 ir_emit_vstore_int(ctx, i, insn); 10614 break; 10615 case IR_VSTORE_FP: 10616 ir_emit_vstore_fp(ctx, i, insn); 10617 break; 10618 case IR_RLOAD: 10619 ir_emit_rload(ctx, i, insn); 10620 break; 10621 case IR_RSTORE: 10622 ir_emit_rstore(ctx, i, insn); 10623 break; 10624 case IR_LOAD_INT: 10625 ir_emit_load_int(ctx, i, insn); 10626 break; 10627 case IR_LOAD_FP: 10628 ir_emit_load_fp(ctx, i, insn); 10629 break; 10630 case IR_STORE_INT: 10631 ir_emit_store_int(ctx, i, insn); 10632 break; 10633 case IR_STORE_FP: 10634 ir_emit_store_fp(ctx, i, insn); 10635 break; 10636 case IR_ALLOCA: 10637 ir_emit_alloca(ctx, i, insn); 10638 break; 10639 case IR_VA_START: 10640 ir_emit_va_start(ctx, i, insn); 10641 break; 10642 case IR_VA_COPY: 10643 ir_emit_va_copy(ctx, i, insn); 10644 break; 10645 case IR_VA_ARG: 10646 ir_emit_va_arg(ctx, i, insn); 10647 break; 10648 case IR_AFREE: 10649 ir_emit_afree(ctx, i, insn); 10650 break; 10651 case IR_BLOCK_BEGIN: 10652 ir_emit_block_begin(ctx, i, insn); 10653 break; 10654 case IR_BLOCK_END: 10655 ir_emit_block_end(ctx, i, insn); 10656 break; 10657 case IR_FRAME_ADDR: 10658 ir_emit_frame_addr(ctx, i); 10659 break; 10660 case IR_EXITCALL: 10661 ir_emit_exitcall(ctx, i, insn); 10662 break; 10663 case IR_GUARD: 10664 case IR_GUARD_NOT: 10665 if (ir_emit_guard(ctx, b, i, insn, _ir_next_block(ctx, _b))) { 10666 goto next_block; 10667 } 10668 break; 10669 case IR_GUARD_OVERFLOW: 10670 if (ir_emit_guard_overflow(ctx, b, i, insn)) { 10671 goto next_block; 10672 } 10673 break; 10674 case IR_SSE_SQRT: 10675 ir_emit_sse_sqrt(ctx, i, insn); 10676 break; 10677 case IR_SSE_RINT: 10678 ir_emit_sse_round(ctx, i, insn, 4); 10679 break; 10680 case IR_SSE_FLOOR: 10681 ir_emit_sse_round(ctx, i, insn, 9); 10682 break; 10683 case IR_SSE_CEIL: 10684 ir_emit_sse_round(ctx, i, insn, 10); 10685 break; 10686 case IR_SSE_TRUNC: 10687 ir_emit_sse_round(ctx, i, insn, 11); 10688 break; 10689 case IR_SSE_NEARBYINT: 10690 ir_emit_sse_round(ctx, i, insn, 12); 10691 break; 10692 case IR_TLS: 10693 ir_emit_tls(ctx, i, insn); 10694 break; 10695 case IR_TRAP: 10696 | int3 10697 break; 10698 default: 10699 IR_ASSERT(0 && "NIY rule/instruction"); 10700 ir_mem_free(data.emit_constants); 10701 dasm_free(&data.dasm_state); 10702 ctx->data = NULL; 10703 ctx->status = IR_ERROR_UNSUPPORTED_CODE_RULE; 10704 return NULL; 10705 } 10706 n = ir_insn_len(insn); 10707 i += n; 10708 insn += n; 10709 rule += n; 10710 } 10711next_block:; 10712 } 10713 10714 if (data.rodata_label) { 10715 |.rodata 10716 } 10717 IR_BITSET_FOREACH(data.emit_constants, ir_bitset_len(ctx->consts_count), i) { 10718 insn = &ctx->ir_base[-i]; 10719 if (IR_IS_TYPE_FP(insn->type)) { 10720 int label = ctx->cfg_blocks_count + i; 10721 10722 if (!data.rodata_label) { 10723 data.rodata_label = ctx->cfg_blocks_count + ctx->consts_count + 2; 10724 10725 |.rodata 10726 |=>data.rodata_label: 10727 } 10728 if (insn->type == IR_DOUBLE) { 10729 |.align 8 10730 |=>label: 10731 |.dword insn->val.u32, insn->val.u32_hi 10732 } else { 10733 IR_ASSERT(insn->type == IR_FLOAT); 10734 |.align 4 10735 |=>label: 10736 |.dword insn->val.u32 10737 } 10738 } else if (insn->op == IR_STR) { 10739 int label = ctx->cfg_blocks_count + i; 10740 const char *str = ir_get_str(ctx, insn->val.str); 10741 int i = 0; 10742 10743 if (!data.rodata_label) { 10744 data.rodata_label = ctx->cfg_blocks_count + ctx->consts_count + 2; 10745 10746 |.rodata 10747 |=>data.rodata_label: 10748 } 10749 |.align 8 10750 |=>label: 10751 while (str[i]) { 10752 char c = str[i]; 10753 10754 |.byte c 10755 i++; 10756 } 10757 |.byte 0 10758 10759 } else { 10760 IR_ASSERT(0); 10761 } 10762 } IR_BITSET_FOREACH_END(); 10763 if (data.rodata_label) { 10764 |.code 10765 } 10766 ir_mem_free(data.emit_constants); 10767 10768 if (ctx->status) { 10769 dasm_free(&data.dasm_state); 10770 ctx->data = NULL; 10771 return NULL; 10772 } 10773 10774 ret = dasm_link(&data.dasm_state, size_ptr); 10775 if (ret != DASM_S_OK) { 10776 IR_ASSERT(0); 10777 dasm_free(&data.dasm_state); 10778 ctx->data = NULL; 10779 ctx->status = IR_ERROR_LINK; 10780 return NULL; 10781 } 10782 size = *size_ptr; 10783 10784 if (ctx->code_buffer) { 10785 entry = ctx->code_buffer->pos; 10786 entry = (void*)IR_ALIGNED_SIZE(((size_t)(entry)), 16); 10787 if (size > (size_t)((char*)ctx->code_buffer->end - (char*)entry)) { 10788 ctx->data = NULL; 10789 ctx->status = IR_ERROR_CODE_MEM_OVERFLOW; 10790 return NULL; 10791 } 10792 ctx->code_buffer->pos = (char*)entry + size; 10793 } else { 10794 entry = ir_mem_mmap(size); 10795 if (!entry) { 10796 dasm_free(&data.dasm_state); 10797 ctx->data = NULL; 10798 ctx->status = IR_ERROR_CODE_MEM_OVERFLOW; 10799 return NULL; 10800 } 10801 ir_mem_unprotect(entry, size); 10802 } 10803 10804 ret = dasm_encode(&data.dasm_state, entry); 10805 if (ret != DASM_S_OK) { 10806 IR_ASSERT(0); 10807 dasm_free(&data.dasm_state); 10808 if (ctx->code_buffer) { 10809 if (ctx->code_buffer->pos == (char*)entry + size) { 10810 /* rollback */ 10811 ctx->code_buffer->pos = (char*)entry - size; 10812 } 10813 } else { 10814 ir_mem_unmap(entry, size); 10815 } 10816 ctx->data = NULL; 10817 ctx->status = IR_ERROR_ENCODE; 10818 return NULL; 10819 } 10820 10821 if (data.jmp_table_label) { 10822 uint32_t offset = dasm_getpclabel(&data.dasm_state, data.jmp_table_label); 10823 ctx->jmp_table_offset = offset; 10824 } else { 10825 ctx->jmp_table_offset = 0; 10826 } 10827 if (data.rodata_label) { 10828 uint32_t offset = dasm_getpclabel(&data.dasm_state, data.rodata_label); 10829 ctx->rodata_offset = offset; 10830 } else { 10831 ctx->rodata_offset = 0; 10832 } 10833 10834 if (ctx->entries_count) { 10835 /* For all entries */ 10836 i = ctx->entries_count; 10837 do { 10838 ir_insn *insn = &ctx->ir_base[ctx->entries[--i]]; 10839 uint32_t offset = dasm_getpclabel(&data.dasm_state, ctx->cfg_blocks_count + ctx->consts_count + 4 + insn->op3); 10840 insn->op3 = offset; 10841 } while (i != 0); 10842 } 10843 10844 dasm_free(&data.dasm_state); 10845 10846 ir_mem_flush(entry, size); 10847 10848#if defined(__GNUC__) 10849 if ((ctx->flags & IR_GEN_CACHE_DEMOTE) && (ctx->mflags & IR_X86_CLDEMOTE)) { 10850 uintptr_t start = (uintptr_t)entry; 10851 uintptr_t p = (uintptr_t)start & ~0x3F; 10852 10853 do { 10854 /* _cldemote(p); */ 10855 asm volatile(".byte 0x0f, 0x1c, 0x06" :: "S" (p)); 10856 p += 64; 10857 } while (p < start + size); 10858 } 10859#endif 10860 10861 if (!ctx->code_buffer) { 10862 ir_mem_protect(entry, size); 10863 } 10864 10865 ctx->data = NULL; 10866 return entry; 10867} 10868 10869const void *ir_emit_exitgroup(uint32_t first_exit_point, uint32_t exit_points_per_group, const void *exit_addr, ir_code_buffer *code_buffer, size_t *size_ptr) 10870{ 10871 void *entry; 10872 size_t size; 10873 uint32_t i; 10874 dasm_State **Dst, *dasm_state; 10875 int ret; 10876 10877 IR_ASSERT(code_buffer); 10878 IR_ASSERT(sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(code_buffer, exit_addr)); 10879 10880 Dst = &dasm_state; 10881 dasm_state = NULL; 10882 dasm_init(&dasm_state, DASM_MAXSECTION); 10883 dasm_setupglobal(&dasm_state, dasm_labels, ir_lb_MAX); 10884 dasm_setup(&dasm_state, dasm_actions); 10885 10886 for (i = 0; i < exit_points_per_group - 1; i++) { 10887 | push byte i 10888 | .byte 0xeb, (4*(exit_points_per_group-i)-6) // jmp >1 10889 } 10890 | push byte i 10891 |// 1: 10892 | add aword [r4], first_exit_point 10893 | jmp aword &exit_addr 10894 10895 ret = dasm_link(&dasm_state, &size); 10896 if (ret != DASM_S_OK) { 10897 IR_ASSERT(0); 10898 dasm_free(&dasm_state); 10899 return NULL; 10900 } 10901 10902 entry = code_buffer->pos; 10903 entry = (void*)IR_ALIGNED_SIZE(((size_t)(entry)), 16); 10904 if (size > (size_t)((char*)code_buffer->end - (char*)entry)) { 10905 return NULL; 10906 } 10907 code_buffer->pos = (char*)entry + size; 10908 10909 ret = dasm_encode(&dasm_state, entry); 10910 if (ret != DASM_S_OK) { 10911 IR_ASSERT(0); 10912 dasm_free(&dasm_state); 10913 if (code_buffer->pos == (char*)entry + size) { 10914 /* rollback */ 10915 code_buffer->pos = (char*)entry - size; 10916 } 10917 return NULL; 10918 } 10919 10920 dasm_free(&dasm_state); 10921 10922 ir_mem_flush(entry, size); 10923 10924 *size_ptr = size; 10925 return entry; 10926} 10927 10928bool ir_needs_thunk(ir_code_buffer *code_buffer, void *addr) 10929{ 10930 return sizeof(void*) == 8 && !IR_MAY_USE_32BIT_ADDR(code_buffer, addr); 10931} 10932 10933void *ir_emit_thunk(ir_code_buffer *code_buffer, void *addr, size_t *size_ptr) 10934{ 10935 void *entry; 10936 size_t size; 10937 dasm_State **Dst, *dasm_state; 10938 int ret; 10939 10940 Dst = &dasm_state; 10941 dasm_state = NULL; 10942 dasm_init(&dasm_state, DASM_MAXSECTION); 10943 dasm_setupglobal(&dasm_state, dasm_labels, ir_lb_MAX); 10944 dasm_setup(&dasm_state, dasm_actions); 10945 10946 |.code 10947 |.if X64 10948 | jmp aword [>1] 10949 |1: 10950 | .aword &addr 10951 |.else 10952 | jmp &addr 10953 |.endif 10954 10955 ret = dasm_link(&dasm_state, &size); 10956 if (ret != DASM_S_OK) { 10957 IR_ASSERT(0); 10958 dasm_free(&dasm_state); 10959 return NULL; 10960 } 10961 10962 if (size > (size_t)((char*)code_buffer->end - (char*)code_buffer->pos)) { 10963 dasm_free(&dasm_state); 10964 return NULL; 10965 } 10966 10967 entry = code_buffer->pos; 10968 ret = dasm_encode(&dasm_state, entry); 10969 if (ret != DASM_S_OK) { 10970 dasm_free(&dasm_state); 10971 return NULL; 10972 } 10973 10974 *size_ptr = size; 10975 code_buffer->pos = (char*)code_buffer->pos + size; 10976 10977 dasm_free(&dasm_state); 10978 ir_mem_flush(entry, size); 10979 10980 return entry; 10981} 10982 10983void ir_fix_thunk(void *thunk_entry, void *addr) 10984{ 10985 unsigned char *code = thunk_entry; 10986 10987 if (sizeof(void*) == 8 && !IR_IS_SIGNED_32BIT(((unsigned char*)addr - (code + 5)))) { 10988 int32_t *offset_ptr; 10989 void **addr_ptr; 10990 10991 IR_ASSERT(code[0] == 0xff && code[1] == 0x25); 10992 offset_ptr = (int32_t*)(code + 2); 10993 addr_ptr = (void**)(code + 6 + *offset_ptr); 10994 *addr_ptr = addr; 10995 } else { 10996 int32_t *addr_ptr; 10997 10998 code[0] = 0xe9; 10999 addr_ptr = (int32_t*)(code + 1); 11000 *addr_ptr = (int32_t)(intptr_t)(void*)((unsigned char*)addr - (code + 5)); 11001 } 11002} 11003