1/* 2 * IR - Lightweight JIT Compilation Framework 3 * (x86/x86_64 native code generator based on DynAsm) 4 * Copyright (C) 2022 Zend by Perforce. 5 * Authors: Dmitry Stogov <dmitry@php.net> 6 */ 7 8|.if X64 9|.arch x64 10|.else 11|.arch x86 12|.endif 13 14|.actionlist dasm_actions 15|.globals ir_lb 16|.section code, cold_code, rodata, jmp_table 17 18|.define IR_LOOP_ALIGNMENT, 16 19 20#ifdef IR_DEBUG 21typedef struct _ir_mem {uint64_t v;} ir_mem; 22 23# define IR_MEM_VAL(loc) ((loc).v) 24#else 25typedef uint64_t ir_mem; 26 27# define IR_MEM_VAL(loc) (loc) 28#endif 29 30#define IR_MEM_OFFSET(loc) ((int32_t)(IR_MEM_VAL(loc) & 0xffffffff)) 31#define IR_MEM_BASE(loc) ((ir_reg)((IR_MEM_VAL(loc) >> 32) & 0xff)) 32#define IR_MEM_INDEX(loc) ((ir_reg)((IR_MEM_VAL(loc) >> 40) & 0xff)) 33#define IR_MEM_SCALE(loc) ((int32_t)((IR_MEM_VAL(loc) >> 48) & 0xff)) 34 35#define IR_MEM_O(addr) IR_MEM(IR_REG_NONE, addr, IR_REG_NONE, 1) 36#define IR_MEM_B(base) IR_MEM(base, 0, IR_REG_NONE, 1) 37#define IR_MEM_BO(base, offset) IR_MEM(base, offset, IR_REG_NONE, 1) 38 39IR_ALWAYS_INLINE ir_mem IR_MEM(ir_reg base, int32_t offset, ir_reg index, int32_t scale) 40{ 41 ir_mem mem; 42 IR_ASSERT(base == IR_REG_NONE || (base >= IR_REG_GP_FIRST && base <= IR_REG_GP_LAST)); 43 IR_ASSERT(index == IR_REG_NONE || (index >= IR_REG_GP_FIRST && index <= IR_REG_GP_LAST)); 44 IR_ASSERT(scale == 1 || scale == 2 || scale == 4 || scale == 8); 45#ifdef IR_DEBUG 46 mem.v = 47#else 48 mem = 49#endif 50 ((uint64_t)(uint32_t)offset | 51 ((uint64_t)(uint8_t)base << 32) | 52 ((uint64_t)(uint8_t)index << 40) | 53 ((uint64_t)(uint8_t)scale << 48)); 54 return mem; 55} 56 57#define IR_IS_SIGNED_32BIT(val) ((((intptr_t)(val)) <= 0x7fffffff) && (((intptr_t)(val)) >= (-2147483647 - 1))) 58#define IR_IS_SIGNED_NEG_32BIT(val) ((((intptr_t)(val)) <= 0x7fffffff) && (((intptr_t)(val)) >= -2147483647)) 59#define IR_IS_UNSIGNED_32BIT(val) (((uintptr_t)(val)) <= 0xffffffff) 60#define IR_IS_32BIT(type, val) (IR_IS_TYPE_SIGNED(type) ? IR_IS_SIGNED_32BIT((val).i64) : IR_IS_UNSIGNED_32BIT((val).u64)) 61#define IR_IS_FP_ZERO(insn) ((insn.type == IR_DOUBLE) ? (insn.val.u64 == 0) : (insn.val.u32 == 0)) 62#define IR_MAY_USE_32BIT_ADDR(code_buffer, addr) \ 63 ((code_buffer) && \ 64 IR_IS_SIGNED_32BIT((char*)(addr) - (char*)(code_buffer)->start) && \ 65 IR_IS_SIGNED_32BIT((char*)(addr) - ((char*)(code_buffer)->end))) 66 67#define IR_SPILL_POS_TO_OFFSET(offset) \ 68 ((ctx->flags & IR_USE_FRAME_POINTER) ? \ 69 ((offset) - (ctx->stack_frame_size - ctx->stack_frame_alignment)) : \ 70 ((offset) + ctx->call_stack_size)) 71 72|.macro ASM_EXPAND_OP_MEM, MACRO, op, type, op1 73|| do { 74|| int32_t offset = IR_MEM_OFFSET(op1); 75|| int32_t base = IR_MEM_BASE(op1); 76|| int32_t index = IR_MEM_INDEX(op1); 77|| int32_t scale = IR_MEM_SCALE(op1); 78|| if (index == IR_REG_NONE) { 79|| if (base == IR_REG_NONE) { 80| MACRO op, type, [offset] 81|| } else { 82| MACRO op, type, [Ra(base)+offset] 83|| } 84|| } else if (scale == 8) { 85|| if (base == IR_REG_NONE) { 86| MACRO op, type, [Ra(index)*8+offset] 87|| } else { 88| MACRO op, type, [Ra(base)+Ra(index)*8+offset] 89|| } 90|| } else if (scale == 4) { 91|| if (base == IR_REG_NONE) { 92| MACRO op, type, [Ra(index)*4+offset] 93|| } else { 94| MACRO op, type, [Ra(base)+Ra(index)*4+offset] 95|| } 96|| } else if (scale == 2) { 97|| if (base == IR_REG_NONE) { 98| MACRO op, type, [Ra(index)*2+offset] 99|| } else { 100| MACRO op, type, [Ra(base)+Ra(index)*2+offset] 101|| } 102|| } else { 103|| IR_ASSERT(scale == 1); 104|| if (base == IR_REG_NONE) { 105| MACRO op, type, [Ra(index)+offset] 106|| } else { 107| MACRO op, type, [Ra(base)+Ra(index)+offset] 108|| } 109|| } 110|| } while (0); 111|.endmacro 112 113|.macro ASM_EXPAND_OP1_MEM, MACRO, op, type, op1, op2 114|| do { 115|| int32_t offset = IR_MEM_OFFSET(op1); 116|| int32_t base = IR_MEM_BASE(op1); 117|| int32_t index = IR_MEM_INDEX(op1); 118|| int32_t scale = IR_MEM_SCALE(op1); 119|| if (index == IR_REG_NONE) { 120|| if (base == IR_REG_NONE) { 121| MACRO op, type, [offset], op2 122|| } else { 123| MACRO op, type, [Ra(base)+offset], op2 124|| } 125|| } else if (scale == 8) { 126|| if (base == IR_REG_NONE) { 127| MACRO op, type, [Ra(index)*8+offset], op2 128|| } else { 129| MACRO op, type, [Ra(base)+Ra(index)*8+offset], op2 130|| } 131|| } else if (scale == 4) { 132|| if (base == IR_REG_NONE) { 133| MACRO op, type, [Ra(index)*4+offset], op2 134|| } else { 135| MACRO op, type, [Ra(base)+Ra(index)*4+offset], op2 136|| } 137|| } else if (scale == 2) { 138|| if (base == IR_REG_NONE) { 139| MACRO op, type, [Ra(index)*2+offset], op2 140|| } else { 141| MACRO op, type, [Ra(base)+Ra(index)*2+offset], op2 142|| } 143|| } else { 144|| IR_ASSERT(scale == 1); 145|| if (base == IR_REG_NONE) { 146| MACRO op, type, [Ra(index)+offset], op2 147|| } else { 148| MACRO op, type, [Ra(base)+Ra(index)+offset], op2 149|| } 150|| } 151|| } while (0); 152|.endmacro 153 154|.macro ASM_EXPAND_OP2_MEM, MACRO, op, type, op1, op2 155|| do { 156|| int32_t offset = IR_MEM_OFFSET(op2); 157|| int32_t base = IR_MEM_BASE(op2); 158|| int32_t index = IR_MEM_INDEX(op2); 159|| int32_t scale = IR_MEM_SCALE(op2); 160|| if (index == IR_REG_NONE) { 161|| if (base == IR_REG_NONE) { 162| MACRO op, type, op1, [offset] 163|| } else { 164| MACRO op, type, op1, [Ra(base)+offset] 165|| } 166|| } else if (scale == 8) { 167|| if (base == IR_REG_NONE) { 168| MACRO op, type, op1, [Ra(index)*8+offset] 169|| } else { 170| MACRO op, type, op1, [Ra(base)+Ra(index)*8+offset] 171|| } 172|| } else if (scale == 4) { 173|| if (base == IR_REG_NONE) { 174| MACRO op, type, op1, [Ra(index)*4+offset] 175|| } else { 176| MACRO op, type, op1, [Ra(base)+Ra(index)*4+offset] 177|| } 178|| } else if (scale == 2) { 179|| if (base == IR_REG_NONE) { 180| MACRO op, type, op1, [Ra(index)*2+offset] 181|| } else { 182| MACRO op, type, op1, [Ra(base)+Ra(index)*2+offset] 183|| } 184|| } else { 185|| IR_ASSERT(scale == 1); 186|| if (base == IR_REG_NONE) { 187| MACRO op, type, op1, [Ra(index)+offset] 188|| } else { 189| MACRO op, type, op1, [Ra(base)+Ra(index)+offset] 190|| } 191|| } 192|| } while (0); 193|.endmacro 194 195|.macro ASM_EXPAND_OP2_MEM_3, MACRO, op, type, op1, op2, op3 196|| do { 197|| int32_t offset = IR_MEM_OFFSET(op2); 198|| int32_t base = IR_MEM_BASE(op2); 199|| int32_t index = IR_MEM_INDEX(op2); 200|| int32_t scale = IR_MEM_SCALE(op2); 201|| if (index == IR_REG_NONE) { 202|| if (base == IR_REG_NONE) { 203| MACRO op, type, op1, [offset], op3 204|| } else { 205| MACRO op, type, op1, [Ra(base)+offset], op3 206|| } 207|| } else if (scale == 8) { 208|| if (base == IR_REG_NONE) { 209| MACRO op, type, op1, [Ra(index)*8+offset], op3 210|| } else { 211| MACRO op, type, op1, [Ra(base)+Ra(index)*8+offset], op3 212|| } 213|| } else if (scale == 4) { 214|| if (base == IR_REG_NONE) { 215| MACRO op, type, op1, [Ra(index)*4+offset], op3 216|| } else { 217| MACRO op, type, op1, [Ra(base)+Ra(index)*4+offset], op3 218|| } 219|| } else if (scale == 2) { 220|| if (base == IR_REG_NONE) { 221| MACRO op, type, op1, [Ra(index)*2+offset], op3 222|| } else { 223| MACRO op, type, op1, [Ra(base)+Ra(index)*2+offset], op3 224|| } 225|| } else { 226|| IR_ASSERT(scale == 1); 227|| if (base == IR_REG_NONE) { 228| MACRO op, type, op1, [Ra(index)+offset], op3 229|| } else { 230| MACRO op, type, op1, [Ra(base)+Ra(index)+offset], op3 231|| } 232|| } 233|| } while (0); 234|.endmacro 235 236|.macro ASM_EXPAND_OP3_MEM, MACRO, op, type, op1, op2, op3 237|| do { 238|| int32_t offset = IR_MEM_OFFSET(op3); 239|| int32_t base = IR_MEM_BASE(op3); 240|| int32_t index = IR_MEM_INDEX(op3); 241|| int32_t scale = IR_MEM_SCALE(op3); 242|| if (index == IR_REG_NONE) { 243|| if (base == IR_REG_NONE) { 244| MACRO op, type, op1, op2, [offset] 245|| } else { 246| MACRO op, type, op1, op2, [Ra(base)+offset] 247|| } 248|| } else if (scale == 8) { 249|| if (base == IR_REG_NONE) { 250| MACRO op, type, op1, op2, [Ra(index)*8+offset] 251|| } else { 252| MACRO op, type, op1, op2, [Ra(base)+Ra(index)*8+offset] 253|| } 254|| } else if (scale == 4) { 255|| if (base == IR_REG_NONE) { 256| MACRO op, type, op1, op2, [Ra(index)*4+offset] 257|| } else { 258| MACRO op, type, op1, op2, [Ra(base)+Ra(index)*4+offset] 259|| } 260|| } else if (scale == 2) { 261|| if (base == IR_REG_NONE) { 262| MACRO op, type, op1, op2, [Ra(index)*2+offset] 263|| } else { 264| MACRO op, type, op1, op2, [Ra(base)+Ra(index)*2+offset] 265|| } 266|| } else { 267|| IR_ASSERT(scale == 1); 268|| if (base == IR_REG_NONE) { 269| MACRO op, type, op1, op2, [Ra(index)+offset] 270|| } else { 271| MACRO op, type, op1, op2, [Ra(base)+Ra(index)+offset] 272|| } 273|| } 274|| } while (0); 275|.endmacro 276 277|.macro ASM_EXPAND_TYPE_MEM, op, type, op1 278|| switch (ir_type_size[type]) { 279|| default: 280|| IR_ASSERT(0); 281|| case 1: 282| op byte op1 283|| break; 284|| case 2: 285| op word op1 286|| break; 287|| case 4: 288| op dword op1 289|| break; 290|.if X64 291|| case 8: 292| op qword op1 293|| break; 294|.endif 295|| } 296|.endmacro 297 298|.macro ASM_EXPAND_TYPE_MEM_REG, op, type, op1, op2 299|| switch (ir_type_size[type]) { 300|| default: 301|| IR_ASSERT(0); 302|| case 1: 303| op byte op1, Rb(op2) 304|| break; 305|| case 2: 306| op word op1, Rw(op2) 307|| break; 308|| case 4: 309| op dword op1, Rd(op2) 310|| break; 311|.if X64 312|| case 8: 313| op qword op1, Rq(op2) 314|| break; 315|.endif 316|| } 317|.endmacro 318 319|.macro ASM_EXPAND_TYPE_MEM_TXT, op, type, op1, op2 320|| switch (ir_type_size[type]) { 321|| default: 322|| IR_ASSERT(0); 323|| case 1: 324| op byte op1, op2 325|| break; 326|| case 2: 327| op word op1, op2 328|| break; 329|| case 4: 330| op dword op1, op2 331|| break; 332|.if X64 333|| case 8: 334| op qword op1, op2 335|| break; 336|.endif 337|| } 338|.endmacro 339 340|.macro ASM_EXPAND_TYPE_MEM_IMM, op, type, op1, op2 341|| switch (ir_type_size[type]) { 342|| default: 343|| IR_ASSERT(0); 344|| case 1: 345| op byte op1, (op2 & 0xff) 346|| break; 347|| case 2: 348| op word op1, (op2 & 0xffff) 349|| break; 350|| case 4: 351| op dword op1, op2 352|| break; 353|.if X64 354|| case 8: 355| op qword op1, op2 356|| break; 357|.endif 358|| } 359|.endmacro 360 361|.macro ASM_EXPAND_TYPE_REG_MEM, op, type, op1, op2 362|| switch (ir_type_size[type]) { 363|| default: 364|| IR_ASSERT(0); 365|| case 1: 366| op Rb(op1), byte op2 367|| break; 368|| case 2: 369| op Rw(op1), word op2 370|| break; 371|| case 4: 372| op Rd(op1), dword op2 373|| break; 374|.if X64 375|| case 8: 376| op Rq(op1), qword op2 377|| break; 378|.endif 379|| } 380|.endmacro 381 382|.macro ASM_TMEM_OP, op, type, op1 383|| do { 384|| int32_t offset = IR_MEM_OFFSET(op1); 385|| int32_t base = IR_MEM_BASE(op1); 386|| int32_t index = IR_MEM_INDEX(op1); 387|| int32_t scale = IR_MEM_SCALE(op1); 388|| if (index == IR_REG_NONE) { 389|| if (base == IR_REG_NONE) { 390| op type [offset] 391|| } else { 392| op type [Ra(base)+offset] 393|| } 394|| } else if (scale == 8) { 395|| if (base == IR_REG_NONE) { 396| op type [Ra(index)*8+offset] 397|| } else { 398| op type [Ra(base)+Ra(index)*8+offset] 399|| } 400|| } else if (scale == 4) { 401|| if (base == IR_REG_NONE) { 402| op type [Ra(index)*4+offset] 403|| } else { 404| op type [Ra(base)+Ra(index)*4+offset] 405|| } 406|| } else if (scale == 2) { 407|| if (base == IR_REG_NONE) { 408| op type [Ra(index)*2+offset] 409|| } else { 410| op type [Ra(base)+Ra(index)*2+offset] 411|| } 412|| } else { 413|| IR_ASSERT(scale == 1); 414|| if (base == IR_REG_NONE) { 415| op type [Ra(index)+offset] 416|| } else { 417| op type [Ra(base)+Ra(index)+offset] 418|| } 419|| } 420|| } while (0); 421|.endmacro 422 423|.macro ASM_TXT_TMEM_OP, op, op1, type, op2 424|| do { 425|| int32_t offset = IR_MEM_OFFSET(op2); 426|| int32_t base = IR_MEM_BASE(op2); 427|| int32_t index = IR_MEM_INDEX(op2); 428|| int32_t scale = IR_MEM_SCALE(op2); 429|| if (index == IR_REG_NONE) { 430|| if (base == IR_REG_NONE) { 431| op op1, type [offset] 432|| } else { 433| op op1, type [Ra(base)+offset] 434|| } 435|| } else if (scale == 8) { 436|| if (base == IR_REG_NONE) { 437| op op1, type [Ra(index)*8+offset] 438|| } else { 439| op op1, type [Ra(base)+Ra(index)*8+offset] 440|| } 441|| } else if (scale == 4) { 442|| if (base == IR_REG_NONE) { 443| op op1, type [Ra(index)*4+offset] 444|| } else { 445| op op1, type [Ra(base)+Ra(index)*4+offset] 446|| } 447|| } else if (scale == 2) { 448|| if (base == IR_REG_NONE) { 449| op op1, type [Ra(index)*2+offset] 450|| } else { 451| op op1, type [Ra(base)+Ra(index)*2+offset] 452|| } 453|| } else { 454|| IR_ASSERT(scale == 1); 455|| if (base == IR_REG_NONE) { 456| op op1, type [Ra(index)+offset] 457|| } else { 458| op op1, type [Ra(base)+Ra(index)+offset] 459|| } 460|| } 461|| } while (0); 462|.endmacro 463 464|.macro ASM_TMEM_TXT_OP, op, type, op1, op2 465|| do { 466|| int32_t offset = IR_MEM_OFFSET(op1); 467|| int32_t base = IR_MEM_BASE(op1); 468|| int32_t index = IR_MEM_INDEX(op1); 469|| int32_t scale = IR_MEM_SCALE(op1); 470|| if (index == IR_REG_NONE) { 471|| if (base == IR_REG_NONE) { 472| op type [offset], op2 473|| } else { 474| op type [Ra(base)+offset], op2 475|| } 476|| } else if (scale == 8) { 477|| if (base == IR_REG_NONE) { 478| op type [Ra(index)*8+offset], op2 479|| } else { 480| op type [Ra(base)+Ra(index)*8+offset], op2 481|| } 482|| } else if (scale == 4) { 483|| if (base == IR_REG_NONE) { 484| op type [Ra(index)*4+offset], op2 485|| } else { 486| op type [Ra(base)+Ra(index)*4+offset], op2 487|| } 488|| } else if (scale == 2) { 489|| if (base == IR_REG_NONE) { 490| op type [Ra(index)*2+offset], op2 491|| } else { 492| op type [Ra(base)+Ra(index)*2+offset], op2 493|| } 494|| } else { 495|| IR_ASSERT(scale == 1); 496|| if (base == IR_REG_NONE) { 497| op type [Ra(index)+offset], op2 498|| } else { 499| op type [Ra(base)+Ra(index)+offset], op2 500|| } 501|| } 502|| } while (0); 503|.endmacro 504 505|.macro ASM_TXT_TXT_TMEM_OP, op, op1, op2, type, op3 506|| do { 507|| int32_t offset = IR_MEM_OFFSET(op3); 508|| int32_t base = IR_MEM_BASE(op3); 509|| int32_t index = IR_MEM_INDEX(op3); 510|| int32_t scale = IR_MEM_SCALE(op3); 511|| if (index == IR_REG_NONE) { 512|| if (base == IR_REG_NONE) { 513| op op1, op2, type [offset] 514|| } else { 515| op op1, op2, type [Ra(base)+offset] 516|| } 517|| } else if (scale == 8) { 518|| if (base == IR_REG_NONE) { 519| op op1, op2, type [Ra(index)*8+offset] 520|| } else { 521| op op1, op2, type [Ra(base)+Ra(index)*8+offset] 522|| } 523|| } else if (scale == 4) { 524|| if (base == IR_REG_NONE) { 525| op op1, op2, type [Ra(index)*4+offset] 526|| } else { 527| op op1, op2, type [Ra(base)+Ra(index)*4+offset] 528|| } 529|| } else if (scale == 2) { 530|| if (base == IR_REG_NONE) { 531| op op1, op2, type [Ra(index)*2+offset] 532|| } else { 533| op op1, op2, type [Ra(base)+Ra(index)*2+offset] 534|| } 535|| } else { 536|| IR_ASSERT(scale == 1); 537|| if (base == IR_REG_NONE) { 538| op op1, op2, type [Ra(index)+offset] 539|| } else { 540| op op1, op2, type [Ra(base)+Ra(index)+offset] 541|| } 542|| } 543|| } while (0); 544|.endmacro 545 546|.macro ASM_REG_OP, op, type, op1 547|| switch (ir_type_size[type]) { 548|| default: 549|| IR_ASSERT(0); 550|| case 1: 551| op Rb(op1) 552|| break; 553|| case 2: 554| op Rw(op1) 555|| break; 556|| case 4: 557| op Rd(op1) 558|| break; 559|.if X64 560|| case 8: 561| op Rq(op1) 562|| break; 563|.endif 564|| } 565|.endmacro 566 567|.macro ASM_MEM_OP, op, type, op1 568| ASM_EXPAND_OP_MEM ASM_EXPAND_TYPE_MEM, op, type, op1 569|.endmacro 570 571|.macro ASM_REG_REG_OP, op, type, op1, op2 572|| switch (ir_type_size[type]) { 573|| default: 574|| IR_ASSERT(0); 575|| case 1: 576| op Rb(op1), Rb(op2) 577|| break; 578|| case 2: 579| op Rw(op1), Rw(op2) 580|| break; 581|| case 4: 582| op Rd(op1), Rd(op2) 583|| break; 584|.if X64 585|| case 8: 586| op Rq(op1), Rq(op2) 587|| break; 588|.endif 589|| } 590|.endmacro 591 592|.macro ASM_REG_REG_OP2, op, type, op1, op2 593|| switch (ir_type_size[type]) { 594|| default: 595|| IR_ASSERT(0); 596|| case 1: 597|| case 2: 598| op Rw(op1), Rw(op2) 599|| break; 600|| case 4: 601| op Rd(op1), Rd(op2) 602|| break; 603|.if X64 604|| case 8: 605| op Rq(op1), Rq(op2) 606|| break; 607|.endif 608|| } 609|.endmacro 610 611|.macro ASM_REG_TXT_OP, op, type, op1, op2 612|| switch (ir_type_size[type]) { 613|| default: 614|| IR_ASSERT(0); 615|| case 1: 616| op Rb(op1), op2 617|| break; 618|| case 2: 619| op Rw(op1), op2 620|| break; 621|| case 4: 622| op Rd(op1), op2 623|| break; 624|.if X64 625|| case 8: 626| op Rq(op1), op2 627|| break; 628|.endif 629|| } 630|.endmacro 631 632|.macro ASM_REG_IMM_OP, op, type, op1, op2 633|| switch (ir_type_size[type]) { 634|| default: 635|| IR_ASSERT(0); 636|| case 1: 637| op Rb(op1), (op2 & 0xff) 638|| break; 639|| case 2: 640| op Rw(op1), (op2 & 0xffff) 641|| break; 642|| case 4: 643| op Rd(op1), op2 644|| break; 645|.if X64 646|| case 8: 647| op Rq(op1), op2 648|| break; 649|.endif 650|| } 651|.endmacro 652 653|.macro ASM_MEM_REG_OP, op, type, op1, op2 654| ASM_EXPAND_OP1_MEM ASM_EXPAND_TYPE_MEM_REG, op, type, op1, op2 655|.endmacro 656 657|.macro ASM_MEM_TXT_OP, op, type, op1, op2 658| ASM_EXPAND_OP1_MEM ASM_EXPAND_TYPE_MEM_TXT, op, type, op1, op2 659|.endmacro 660 661|.macro ASM_MEM_IMM_OP, op, type, op1, op2 662| ASM_EXPAND_OP1_MEM ASM_EXPAND_TYPE_MEM_IMM, op, type, op1, op2 663|.endmacro 664 665|.macro ASM_REG_MEM_OP, op, type, op1, op2 666| ASM_EXPAND_OP2_MEM ASM_REG_TXT_OP, op, type, op1, op2 667|.endmacro 668 669|.macro ASM_REG_REG_MUL, op, type, op1, op2 670|| switch (ir_type_size[type]) { 671|| default: 672|| IR_ASSERT(0); 673|| case 2: 674| op Rw(op1), Rw(op2) 675|| break; 676|| case 4: 677| op Rd(op1), Rd(op2) 678|| break; 679|.if X64 680|| case 8: 681| op Rq(op1), Rq(op2) 682|| break; 683|.endif 684|| } 685|.endmacro 686 687|.macro ASM_REG_IMM_MUL, op, type, op1, op2 688|| switch (ir_type_size[type]) { 689|| default: 690|| IR_ASSERT(0); 691|| case 2: 692| op Rw(op1), op2 693|| break; 694|| case 4: 695| op Rd(op1), op2 696|| break; 697|.if X64 698|| case 8: 699| op Rq(op1), op2 700|| break; 701|.endif 702|| } 703|.endmacro 704 705|.macro ASM_REG_TXT_MUL, op, type, op1, op2 706|| switch (ir_type_size[type]) { 707|| default: 708|| IR_ASSERT(0); 709|| case 2: 710| op Rw(op1), op2 711|| break; 712|| case 4: 713| op Rd(op1), op2 714|| break; 715|.if X64 716|| case 8: 717| op Rq(op1), op2 718|| break; 719|.endif 720|| } 721|.endmacro 722 723|.macro ASM_REG_MEM_MUL, op, type, op1, op2 724| ASM_EXPAND_OP2_MEM ASM_REG_TXT_MUL, op, type, op1, op2 725|.endmacro 726 727|.macro ASM_REG_TXT_TXT_MUL, op, type, op1, op2, op3 728|| switch (ir_type_size[type]) { 729|| default: 730|| IR_ASSERT(0); 731|| case 2: 732| op Rw(op1), op2, op3 733|| break; 734|| case 4: 735| op Rd(op1), op2, op3 736|| break; 737|.if X64 738|| case 8: 739| op Rq(op1), op2, op3 740|| break; 741|.endif 742|| } 743|.endmacro 744 745|.macro ASM_REG_MEM_TXT_MUL, op, type, op1, op2, op3 746| ASM_EXPAND_OP2_MEM_3 ASM_REG_TXT_TXT_MUL, imul, type, op1, op2, op3 747|.endmacro 748 749|.macro ASM_SSE2_REG_REG_OP, op, type, op1, op2 750|| if (type == IR_DOUBLE) { 751| op..d xmm(op1-IR_REG_FP_FIRST), xmm(op2-IR_REG_FP_FIRST) 752|| } else { 753|| IR_ASSERT(type == IR_FLOAT); 754| op..s xmm(op1-IR_REG_FP_FIRST), xmm(op2-IR_REG_FP_FIRST) 755|| } 756|.endmacro 757 758|.macro ASM_SSE2_REG_TXT_OP, op, type, op1, op2 759|| if (type == IR_DOUBLE) { 760| op..d xmm(op1-IR_REG_FP_FIRST), qword op2 761|| } else { 762|| IR_ASSERT(type == IR_FLOAT); 763| op..s xmm(op1-IR_REG_FP_FIRST), dword op2 764|| } 765|.endmacro 766 767|.macro ASM_SSE2_REG_MEM_OP, op, type, op1, op2 768| ASM_EXPAND_OP2_MEM ASM_SSE2_REG_TXT_OP, op, type, op1, op2 769|.endmacro 770 771|.macro ASM_AVX_REG_REG_REG_OP, op, type, op1, op2, op3 772|| if (type == IR_DOUBLE) { 773| op..d xmm(op1-IR_REG_FP_FIRST), xmm(op2-IR_REG_FP_FIRST), xmm(op3-IR_REG_FP_FIRST) 774|| } else { 775|| IR_ASSERT(type == IR_FLOAT); 776| op..s xmm(op1-IR_REG_FP_FIRST), xmm(op2-IR_REG_FP_FIRST), xmm(op3-IR_REG_FP_FIRST) 777|| } 778|.endmacro 779 780|.macro ASM_AVX_REG_REG_TXT_OP, op, type, op1, op2, op3 781|| if (type == IR_DOUBLE) { 782| op..d xmm(op1-IR_REG_FP_FIRST), xmm(op2-IR_REG_FP_FIRST), qword op3 783|| } else { 784|| IR_ASSERT(type == IR_FLOAT); 785| op..s xmm(op1-IR_REG_FP_FIRST), xmm(op2-IR_REG_FP_FIRST), dword op3 786|| } 787|.endmacro 788 789|.macro ASM_AVX_REG_REG_MEM_OP, op, type, op1, op2, op3 790| ASM_EXPAND_OP3_MEM ASM_AVX_REG_REG_TXT_OP, op, type, op1, op2, op3 791|.endmacro 792 793|.macro ASM_FP_REG_REG_OP, op, type, op1, op2 794|| if (ctx->mflags & IR_X86_AVX) { 795| ASM_SSE2_REG_REG_OP v..op, type, op1, op2 796|| } else { 797| ASM_SSE2_REG_REG_OP op, type, op1, op2 798|| } 799|.endmacro 800 801|.macro ASM_FP_TXT_REG_OP, op, type, dst, src 802|| if (type == IR_DOUBLE) { 803|| if (ctx->mflags & IR_X86_AVX) { 804| v..op..d qword dst, xmm(src-IR_REG_FP_FIRST) 805|| } else { 806| op..d qword dst, xmm(src-IR_REG_FP_FIRST) 807|| } 808|| } else { 809|| IR_ASSERT(type == IR_FLOAT); 810|| if (ctx->mflags & IR_X86_AVX) { 811| v..op..s dword dst, xmm(src-IR_REG_FP_FIRST) 812|| } else { 813| op..s dword dst, xmm(src-IR_REG_FP_FIRST) 814|| } 815|| } 816|.endmacro 817 818|.macro ASM_FP_MEM_REG_OP, op, type, op1, op2 819| ASM_EXPAND_OP1_MEM ASM_FP_TXT_REG_OP, op, type, op1, op2 820|.endmacro 821 822|.macro ASM_FP_REG_TXT_OP, op, type, op1, op2 823|| if (ctx->mflags & IR_X86_AVX) { 824| ASM_SSE2_REG_TXT_OP v..op, type, op1, op2 825|| } else { 826| ASM_SSE2_REG_TXT_OP op, type, op1, op2 827|| } 828|.endmacro 829 830|.macro ASM_FP_REG_MEM_OP, op, type, op1, op2 831|| if (ctx->mflags & IR_X86_AVX) { 832| ASM_SSE2_REG_MEM_OP v..op, type, op1, op2 833|| } else { 834| ASM_SSE2_REG_MEM_OP op, type, op1, op2 835|| } 836|.endmacro 837 838|.macro ASM_SSE2_REG_REG_TXT_OP, op, type, op1, op2, op3 839|| if (type == IR_DOUBLE) { 840| op..d xmm(op1-IR_REG_FP_FIRST), xmm(op2-IR_REG_FP_FIRST), op3 841|| } else { 842|| IR_ASSERT(type == IR_FLOAT); 843| op..s xmm(op1-IR_REG_FP_FIRST), xmm(op2-IR_REG_FP_FIRST), op3 844|| } 845|.endmacro 846 847|.macro ASM_SSE2_REG_REG_REG_TXT_OP, op, type, op1, op2, op3, op4 848|| if (type == IR_DOUBLE) { 849| op..d xmm(op1-IR_REG_FP_FIRST), xmm(op2-IR_REG_FP_FIRST), xmm(op3-IR_REG_FP_FIRST), op4 850|| } else { 851|| IR_ASSERT(type == IR_FLOAT); 852| op..s xmm(op1-IR_REG_FP_FIRST), xmm(op2-IR_REG_FP_FIRST), xmm(op3-IR_REG_FP_FIRST), op4 853|| } 854|.endmacro 855 856|.macro ASM_FP_REG_REG_TXT_OP, op, type, op1, op2, op3 857|| if (ctx->mflags & IR_X86_AVX) { 858| ASM_SSE2_REG_REG_REG_TXT_OP v..op, type, op1, op2, op3 859|| } else { 860| ASM_SSE2_REG_REG_TXT_OP op, type, op1, op2, op3 861|| } 862|.endmacro 863 864typedef struct _ir_backend_data { 865 ir_reg_alloc_data ra_data; 866 uint32_t dessa_from_block; 867 dasm_State *dasm_state; 868 ir_bitset emit_constants; 869 int rodata_label, jmp_table_label; 870 bool double_neg_const; 871 bool float_neg_const; 872 bool double_abs_const; 873 bool float_abs_const; 874 bool double_zero_const; 875} ir_backend_data; 876 877#define IR_GP_REG_NAME(code, name64, name32, name16, name8, name8h) \ 878 #name64, 879#define IR_GP_REG_NAME32(code, name64, name32, name16, name8, name8h) \ 880 #name32, 881#define IR_GP_REG_NAME16(code, name64, name32, name16, name8, name8h) \ 882 #name16, 883#define IR_GP_REG_NAME8(code, name64, name32, name16, name8, name8h) \ 884 #name8, 885#define IR_FP_REG_NAME(code, name) \ 886 #name, 887 888static const char *_ir_reg_name[IR_REG_NUM] = { 889 IR_GP_REGS(IR_GP_REG_NAME) 890 IR_FP_REGS(IR_FP_REG_NAME) 891}; 892 893static const char *_ir_reg_name32[IR_REG_NUM] = { 894 IR_GP_REGS(IR_GP_REG_NAME32) 895}; 896 897static const char *_ir_reg_name16[IR_REG_NUM] = { 898 IR_GP_REGS(IR_GP_REG_NAME16) 899}; 900 901static const char *_ir_reg_name8[IR_REG_NUM] = { 902 IR_GP_REGS(IR_GP_REG_NAME8) 903}; 904 905/* Calling Convention */ 906#ifdef _WIN64 907 908static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS] = { 909 IR_REG_INT_ARG1, 910 IR_REG_INT_ARG2, 911 IR_REG_INT_ARG3, 912 IR_REG_INT_ARG4, 913}; 914 915static const int8_t _ir_fp_reg_params[IR_REG_FP_ARGS] = { 916 IR_REG_FP_ARG1, 917 IR_REG_FP_ARG2, 918 IR_REG_FP_ARG3, 919 IR_REG_FP_ARG4, 920}; 921 922#elif defined(IR_TARGET_X64) 923 924static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS] = { 925 IR_REG_INT_ARG1, 926 IR_REG_INT_ARG2, 927 IR_REG_INT_ARG3, 928 IR_REG_INT_ARG4, 929 IR_REG_INT_ARG5, 930 IR_REG_INT_ARG6, 931}; 932 933static const int8_t _ir_fp_reg_params[IR_REG_FP_ARGS] = { 934 IR_REG_FP_ARG1, 935 IR_REG_FP_ARG2, 936 IR_REG_FP_ARG3, 937 IR_REG_FP_ARG4, 938 IR_REG_FP_ARG5, 939 IR_REG_FP_ARG6, 940 IR_REG_FP_ARG7, 941 IR_REG_FP_ARG8, 942}; 943 944#else 945 946static const int8_t *_ir_int_reg_params = NULL; 947static const int8_t *_ir_fp_reg_params = NULL; 948static const int8_t _ir_int_fc_reg_params[IR_REG_INT_FCARGS] = { 949 IR_REG_INT_FCARG1, 950 IR_REG_INT_FCARG2, 951}; 952static const int8_t *_ir_fp_fc_reg_params = NULL; 953 954#endif 955 956const char *ir_reg_name(int8_t reg, ir_type type) 957{ 958 if (reg >= IR_REG_NUM) { 959 if (reg == IR_REG_SCRATCH) { 960 return "SCRATCH"; 961 } else { 962 IR_ASSERT(reg == IR_REG_ALL); 963 return "ALL"; 964 } 965 } 966 IR_ASSERT(reg >= 0 && reg < IR_REG_NUM); 967 if (type == IR_VOID) { 968 type = (reg < IR_REG_FP_FIRST) ? IR_ADDR : IR_DOUBLE; 969 } 970 if (IR_IS_TYPE_FP(type) || ir_type_size[type] == 8) { 971 return _ir_reg_name[reg]; 972 } else if (ir_type_size[type] == 4) { 973 return _ir_reg_name32[reg]; 974 } else if (ir_type_size[type] == 2) { 975 return _ir_reg_name16[reg]; 976 } else { 977 IR_ASSERT(ir_type_size[type] == 1); 978 return _ir_reg_name8[reg]; 979 } 980} 981 982#define IR_RULES(_) \ 983 _(CMP_INT) \ 984 _(CMP_FP) \ 985 _(MUL_INT) \ 986 _(DIV_INT) \ 987 _(MOD_INT) \ 988 _(TEST_INT) \ 989 _(SETCC_INT) \ 990 _(TESTCC_INT) \ 991 _(LEA_OB) \ 992 _(LEA_SI) \ 993 _(LEA_SIB) \ 994 _(LEA_IB) \ 995 _(LEA_SI_O) \ 996 _(LEA_SIB_O) \ 997 _(LEA_IB_O) \ 998 _(LEA_I_OB) \ 999 _(LEA_OB_I) \ 1000 _(LEA_OB_SI) \ 1001 _(LEA_SI_OB) \ 1002 _(LEA_B_SI) \ 1003 _(LEA_SI_B) \ 1004 _(INC) \ 1005 _(DEC) \ 1006 _(MUL_PWR2) \ 1007 _(DIV_PWR2) \ 1008 _(MOD_PWR2) \ 1009 _(SDIV_PWR2) \ 1010 _(SMOD_PWR2) \ 1011 _(BOOL_NOT_INT) \ 1012 _(ABS_INT) \ 1013 _(OP_INT) \ 1014 _(OP_FP) \ 1015 _(IMUL3) \ 1016 _(BINOP_INT) \ 1017 _(BINOP_SSE2) \ 1018 _(BINOP_AVX) \ 1019 _(SHIFT) \ 1020 _(SHIFT_CONST) \ 1021 _(COPY_INT) \ 1022 _(COPY_FP) \ 1023 _(CMP_AND_STORE_INT) \ 1024 _(CMP_AND_BRANCH_INT) \ 1025 _(CMP_AND_BRANCH_FP) \ 1026 _(TEST_AND_BRANCH_INT) \ 1027 _(JCC_INT) \ 1028 _(COND_CMP_INT) \ 1029 _(COND_CMP_FP) \ 1030 _(GUARD_CMP_INT) \ 1031 _(GUARD_CMP_FP) \ 1032 _(GUARD_TEST_INT) \ 1033 _(GUARD_JCC_INT) \ 1034 _(GUARD_OVERFLOW) \ 1035 _(OVERFLOW_AND_BRANCH) \ 1036 _(MIN_MAX_INT) \ 1037 _(MEM_OP_INT) \ 1038 _(MEM_INC) \ 1039 _(MEM_DEC) \ 1040 _(MEM_MUL_PWR2) \ 1041 _(MEM_DIV_PWR2) \ 1042 _(MEM_MOD_PWR2) \ 1043 _(MEM_BINOP_INT) \ 1044 _(MEM_SHIFT) \ 1045 _(MEM_SHIFT_CONST) \ 1046 _(REG_BINOP_INT) \ 1047 _(VSTORE_INT) \ 1048 _(VSTORE_FP) \ 1049 _(LOAD_INT) \ 1050 _(LOAD_FP) \ 1051 _(STORE_INT) \ 1052 _(STORE_FP) \ 1053 _(IF_INT) \ 1054 _(RETURN_VOID) \ 1055 _(RETURN_INT) \ 1056 _(RETURN_FP) \ 1057 _(BIT_COUNT) \ 1058 _(SSE_SQRT) \ 1059 _(SSE_RINT) \ 1060 _(SSE_FLOOR) \ 1061 _(SSE_CEIL) \ 1062 _(SSE_TRUNC) \ 1063 _(SSE_NEARBYINT) \ 1064 1065#define IR_RULE_ENUM(name) IR_ ## name, 1066 1067#define IR_STATIC_ALLOCA (IR_SKIPPED | IR_FUSED | IR_SIMPLE | IR_ALLOCA) 1068 1069enum _ir_rule { 1070 IR_FIRST_RULE = IR_LAST_OP, 1071 IR_RULES(IR_RULE_ENUM) 1072 IR_LAST_RULE 1073}; 1074 1075#define IR_RULE_NAME(name) #name, 1076const char *ir_rule_name[IR_LAST_OP] = { 1077 NULL, 1078 IR_RULES(IR_RULE_NAME) 1079 NULL 1080}; 1081 1082static bool ir_may_fuse_addr(ir_ctx *ctx, const ir_insn *addr_insn) 1083{ 1084 if (sizeof(void*) == 4) { 1085 return 1; 1086 } else if (IR_IS_SYM_CONST(addr_insn->op)) { 1087 void *addr = ir_sym_addr(ctx, addr_insn); 1088 1089 if (!addr) { 1090 return 0; 1091 } 1092 return IR_IS_SIGNED_32BIT((int64_t)(intptr_t)addr); 1093 } else { 1094 return IR_IS_SIGNED_32BIT(addr_insn->val.i64); 1095 } 1096} 1097 1098static bool ir_may_fuse_imm(ir_ctx *ctx, const ir_insn *val_insn) 1099{ 1100 if (val_insn->type == IR_ADDR) { 1101 if (sizeof(void*) == 4) { 1102 return 1; 1103 } else if (IR_IS_SYM_CONST(val_insn->op)) { 1104 void *addr = ir_sym_addr(ctx, val_insn); 1105 1106 if (!addr) { 1107 return 0; 1108 } 1109 return IR_IS_SIGNED_32BIT((intptr_t)addr); 1110 } else { 1111 return IR_IS_SIGNED_32BIT(val_insn->val.i64); 1112 } 1113 } else { 1114 return (ir_type_size[val_insn->type] <= 4 || IR_IS_SIGNED_32BIT(val_insn->val.i64)); 1115 } 1116} 1117 1118/* register allocation */ 1119static int ir_add_const_tmp_reg(ir_ctx *ctx, ir_ref ref, uint32_t num, int n, ir_target_constraints *constraints) 1120{ 1121 IR_ASSERT(IR_IS_CONST_REF(ref)); 1122 const ir_insn *val_insn = &ctx->ir_base[ref]; 1123 1124 if (!ir_may_fuse_imm(ctx, val_insn)) { 1125 constraints->tmp_regs[n] = IR_TMP_REG(num, val_insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1126 n++; 1127 } 1128 return n; 1129} 1130 1131int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *constraints) 1132{ 1133 uint32_t rule = ir_rule(ctx, ref); 1134 const ir_insn *insn; 1135 int n = 0; 1136 int flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; 1137 1138 constraints->def_reg = IR_REG_NONE; 1139 constraints->hints_count = 0; 1140 switch (rule & IR_RULE_MASK) { 1141 case IR_BINOP_INT: 1142 insn = &ctx->ir_base[ref]; 1143 if (rule & IR_FUSED) { 1144 if (ctx->ir_base[insn->op1].op == IR_RLOAD) { 1145 flags = IR_OP1_MUST_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; 1146 } else { 1147 flags = IR_OP2_MUST_BE_IN_REG; 1148 } 1149 } else { 1150 flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; 1151 } 1152 if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { 1153 n = ir_add_const_tmp_reg(ctx, insn->op2, 2, n, constraints); 1154 } else if (ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) { 1155 constraints->tmp_regs[n] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1156 n++; 1157 } 1158 break; 1159 case IR_IMUL3: 1160 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; 1161 break; 1162 case IR_SHIFT: 1163 if (rule & IR_FUSED) { 1164 flags = IR_OP2_MUST_BE_IN_REG; 1165 } else { 1166 flags = IR_DEF_REUSES_OP1_REG | IR_DEF_CONFLICTS_WITH_INPUT_REGS | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; 1167 } 1168 constraints->hints[1] = IR_REG_NONE; 1169 constraints->hints[2] = IR_REG_RCX; 1170 constraints->hints_count = 3; 1171 constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_RCX, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1172 n = 1; 1173 break; 1174 case IR_MUL_INT: 1175 /* %rax - used as input and result */ 1176 constraints->def_reg = IR_REG_RAX; 1177 constraints->hints[1] = IR_REG_RAX; 1178 constraints->hints_count = 2; 1179 flags = IR_USE_SHOULD_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; 1180 constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_RDX, IR_USE_SUB_REF, IR_DEF_SUB_REF); 1181 constraints->tmp_regs[1] = IR_SCRATCH_REG(IR_REG_RAX, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); 1182 n = 2; 1183 break; 1184 case IR_DIV_INT: 1185 /* %rax - used as input and result */ 1186 constraints->def_reg = IR_REG_RAX; 1187 constraints->hints[1] = IR_REG_RAX; 1188 constraints->hints_count = 2; 1189 flags = IR_USE_SHOULD_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; 1190 constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_RDX, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1191 constraints->tmp_regs[1] = IR_SCRATCH_REG(IR_REG_RAX, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); 1192 n = 2; 1193 goto op2_const; 1194 case IR_MOD_INT: 1195 constraints->def_reg = IR_REG_RDX; 1196 constraints->hints[1] = IR_REG_RAX; 1197 constraints->hints_count = 2; 1198 flags = IR_USE_SHOULD_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; 1199 constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_RAX, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1200 constraints->tmp_regs[1] = IR_SCRATCH_REG(IR_REG_RDX, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); 1201 n = 2; 1202 goto op2_const; 1203 case IR_MIN_MAX_INT: 1204 flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_MUST_BE_IN_REG; 1205op2_const: 1206 insn = &ctx->ir_base[ref]; 1207 if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { 1208 constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1209 n++; 1210 } 1211 break; 1212 case IR_CMP_INT: 1213 case IR_TEST_INT: 1214 insn = &ctx->ir_base[ref]; 1215 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; 1216 if (IR_IS_CONST_REF(insn->op1)) { 1217 const ir_insn *val_insn = &ctx->ir_base[insn->op1]; 1218 constraints->tmp_regs[0] = IR_TMP_REG(1, val_insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1219 n = 1; 1220 } else if (ir_rule(ctx, insn->op1) == IR_STATIC_ALLOCA) { 1221 constraints->tmp_regs[0] = IR_TMP_REG(1, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1222 n = 1; 1223 } else if (ir_rule(ctx, insn->op1) & IR_FUSED) { 1224 flags = IR_USE_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG; 1225 } 1226 if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { 1227 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; 1228 n = ir_add_const_tmp_reg(ctx, insn->op2, 2, n, constraints); 1229 } else if (ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) { 1230 constraints->tmp_regs[n] = IR_TMP_REG(2, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1231 n++; 1232 } 1233 break; 1234 case IR_CMP_FP: 1235 insn = &ctx->ir_base[ref]; 1236 if (!(rule & IR_FUSED)) { 1237 constraints->tmp_regs[0] = IR_TMP_REG(3, IR_BOOL, IR_DEF_SUB_REF, IR_SAVE_SUB_REF); 1238 n = 1; 1239 } 1240 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; 1241 if (IR_IS_CONST_REF(insn->op1)) { 1242 const ir_insn *val_insn = &ctx->ir_base[insn->op1]; 1243 constraints->tmp_regs[n] = IR_TMP_REG(1, val_insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1244 n++; 1245 } 1246 break; 1247 case IR_BINOP_AVX: 1248 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; 1249 insn = &ctx->ir_base[ref]; 1250 if (IR_IS_CONST_REF(insn->op1)) { 1251 constraints->tmp_regs[0] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1252 n = 1; 1253 } 1254 break; 1255 case IR_COND: 1256 insn = &ctx->ir_base[ref]; 1257 if (!IR_IS_TYPE_INT(ctx->ir_base[insn->op1].type)) { 1258 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG | IR_OP3_SHOULD_BE_IN_REG; 1259 break; 1260 } 1261 IR_FALLTHROUGH; 1262 case IR_COND_CMP_INT: 1263 insn = &ctx->ir_base[ref]; 1264 if (IR_IS_TYPE_INT(insn->type)) { 1265 if (IR_IS_CONST_REF(insn->op3)) { 1266 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; 1267 constraints->tmp_regs[0] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); 1268 n = 1; 1269 } else if (IR_IS_CONST_REF(insn->op2)) { 1270 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; 1271 constraints->tmp_regs[0] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); 1272 n = 1; 1273 } else { 1274 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; 1275 } 1276 } else { 1277 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG | IR_OP3_SHOULD_BE_IN_REG; 1278 } 1279 break; 1280 case IR_COND_CMP_FP: 1281 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG | IR_OP3_SHOULD_BE_IN_REG; 1282 break; 1283 case IR_VSTORE_INT: 1284 flags = IR_OP3_MUST_BE_IN_REG; 1285 insn = &ctx->ir_base[ref]; 1286 if (IR_IS_CONST_REF(insn->op3)) { 1287 n = ir_add_const_tmp_reg(ctx, insn->op3, 3, n, constraints); 1288 } else if (ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA) { 1289 constraints->tmp_regs[n] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1290 n++; 1291 } 1292 break; 1293 case IR_STORE_INT: 1294 flags = IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; 1295 insn = &ctx->ir_base[ref]; 1296 if (IR_IS_CONST_REF(insn->op2)) { 1297 n = ir_add_const_tmp_reg(ctx, insn->op2, 2, n, constraints); 1298 } 1299 if (IR_IS_CONST_REF(insn->op3)) { 1300 n = ir_add_const_tmp_reg(ctx, insn->op3, 3, n, constraints); 1301 } else if (ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA) { 1302 constraints->tmp_regs[n] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1303 n++; 1304 } 1305 break; 1306 case IR_VSTORE_FP: 1307 flags = IR_OP3_MUST_BE_IN_REG; 1308 insn = &ctx->ir_base[ref]; 1309 if (IR_IS_CONST_REF(insn->op3)) { 1310 insn = &ctx->ir_base[insn->op3]; 1311 constraints->tmp_regs[0] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1312 n = 1; 1313 } 1314 break; 1315 case IR_LOAD_FP: 1316 case IR_LOAD_INT: 1317 case IR_MEM_OP_INT: 1318 case IR_MEM_INC: 1319 case IR_MEM_DEC: 1320 case IR_MEM_MUL_PWR2: 1321 case IR_MEM_DIV_PWR2: 1322 case IR_MEM_MOD_PWR2: 1323 case IR_MEM_BINOP_INT: 1324 case IR_MEM_SHIFT: 1325 case IR_MEM_SHIFT_CONST: 1326 case IR_CMP_AND_STORE_INT: 1327 flags = IR_USE_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG; 1328 insn = &ctx->ir_base[ref]; 1329 if (IR_IS_CONST_REF(insn->op2)) { 1330 n = ir_add_const_tmp_reg(ctx, insn->op2, 2, n, constraints); 1331 } 1332 break; 1333 case IR_STORE_FP: 1334 flags = IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; 1335 insn = &ctx->ir_base[ref]; 1336 if (IR_IS_CONST_REF(insn->op2)) { 1337 n = ir_add_const_tmp_reg(ctx, insn->op2, 2, n, constraints); 1338 } 1339 if (IR_IS_CONST_REF(insn->op3)) { 1340 insn = &ctx->ir_base[insn->op3]; 1341 constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1342 n++; 1343 } 1344 break; 1345 case IR_SWITCH: 1346 flags = IR_OP2_MUST_BE_IN_REG; 1347 insn = &ctx->ir_base[ref]; 1348 if (IR_IS_CONST_REF(insn->op2)) { 1349 insn = &ctx->ir_base[insn->op2]; 1350 constraints->tmp_regs[0] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1351 n = 1; 1352 } 1353 /* we need a temporary regeset in case MIN CASE value is not zero or some CASE VAL can't fit into 32-bit */ 1354 constraints->tmp_regs[n] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1355 n++; 1356 break; 1357 case IR_CALL: 1358 insn = &ctx->ir_base[ref]; 1359 if (IR_IS_TYPE_INT(insn->type)) { 1360 constraints->def_reg = IR_REG_INT_RET1; 1361#ifdef IR_REG_FP_RET1 1362 } else { 1363 constraints->def_reg = IR_REG_FP_RET1; 1364#endif 1365 } 1366 constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_SCRATCH, IR_USE_SUB_REF, IR_DEF_SUB_REF); 1367 n = 1; 1368 IR_FALLTHROUGH; 1369 case IR_TAILCALL: 1370 insn = &ctx->ir_base[ref]; 1371 if (insn->inputs_count > 2) { 1372 constraints->hints[2] = IR_REG_NONE; 1373 constraints->hints_count = ir_get_args_regs(ctx, insn, constraints->hints); 1374 if (!IR_IS_CONST_REF(insn->op2)) { 1375 constraints->tmp_regs[n] = IR_TMP_REG(1, IR_ADDR, IR_LOAD_SUB_REF, IR_USE_SUB_REF); 1376 n++; 1377 } 1378 } 1379 flags = IR_USE_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG | IR_OP3_SHOULD_BE_IN_REG; 1380 break; 1381 case IR_BINOP_SSE2: 1382 flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; 1383 break; 1384 case IR_SHIFT_CONST: 1385 case IR_INC: 1386 case IR_DEC: 1387 case IR_MUL_PWR2: 1388 case IR_DIV_PWR2: 1389 case IR_OP_INT: 1390 case IR_OP_FP: 1391 flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; 1392 break; 1393 case IR_MOD_PWR2: 1394 flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; 1395 insn = &ctx->ir_base[ref]; 1396 if (ir_type_size[insn->type] == 8) { 1397 int64_t offset = ctx->ir_base[insn->op2].val.u64 - 1; 1398 if (!IR_IS_SIGNED_32BIT(offset)) { 1399 constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1400 n++; 1401 } 1402 } 1403 break; 1404 case IR_SMOD_PWR2: 1405 flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; 1406 insn = &ctx->ir_base[ref]; 1407 if (ir_type_size[insn->type] == 8) { 1408 int64_t offset = ctx->ir_base[insn->op2].val.u64 - 1; 1409 if (!IR_IS_SIGNED_32BIT(offset)) { 1410 constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1411 n++; 1412 } 1413 } 1414 constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_USE_SUB_REF, IR_SAVE_SUB_REF); 1415 n++; 1416 break; 1417 case IR_SDIV_PWR2: 1418 flags = IR_DEF_CONFLICTS_WITH_INPUT_REGS | IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG; 1419 insn = &ctx->ir_base[ref]; 1420 if (ir_type_size[insn->type] == 8) { 1421 int64_t offset = ctx->ir_base[insn->op2].val.u64 - 1; 1422 if (!IR_IS_SIGNED_32BIT(offset)) { 1423 constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1424 n++; 1425 } 1426 } 1427 break; 1428 case IR_BIT_COUNT: 1429 insn = &ctx->ir_base[ref]; 1430 if (ir_type_size[insn->type] == 1) { 1431 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG; 1432 } else { 1433 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; 1434 } 1435 if (IR_IS_CONST_REF(insn->op1)) { 1436 constraints->tmp_regs[0] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1437 n = 1; 1438 } 1439 break; 1440 case IR_CTPOP: 1441 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; 1442 insn = &ctx->ir_base[ref]; 1443 constraints->tmp_regs[0] = IR_TMP_REG(2, insn->type, IR_USE_SUB_REF, IR_SAVE_SUB_REF); 1444 n = 1; 1445 if (ir_type_size[insn->type] == 8) { 1446 constraints->tmp_regs[1] = IR_TMP_REG(3, insn->type, IR_USE_SUB_REF, IR_SAVE_SUB_REF); 1447 n = 2; 1448 } 1449 break; 1450 case IR_COPY_INT: 1451 case IR_COPY_FP: 1452 case IR_SEXT: 1453 case IR_ZEXT: 1454 case IR_TRUNC: 1455 case IR_BITCAST: 1456 case IR_PROTO: 1457 case IR_FP2FP: 1458 flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; 1459 break; 1460 case IR_ABS_INT: 1461 flags = IR_DEF_CONFLICTS_WITH_INPUT_REGS | IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG; 1462 break; 1463 case IR_PARAM: 1464 constraints->def_reg = ir_get_param_reg(ctx, ref); 1465 flags = 0; 1466 break; 1467 case IR_PI: 1468 case IR_PHI: 1469 flags = IR_USE_SHOULD_BE_IN_REG; 1470 break; 1471 case IR_RLOAD: 1472 constraints->def_reg = ctx->ir_base[ref].op2; 1473 flags = IR_USE_SHOULD_BE_IN_REG; 1474 break; 1475 case IR_EXITCALL: 1476 flags = IR_USE_MUST_BE_IN_REG; 1477 constraints->def_reg = IR_REG_INT_RET1; 1478 break; 1479 case IR_IF_INT: 1480 case IR_GUARD: 1481 case IR_GUARD_NOT: 1482 flags = IR_OP2_SHOULD_BE_IN_REG; 1483 break; 1484 case IR_IJMP: 1485 flags = IR_OP2_SHOULD_BE_IN_REG; 1486 break; 1487 case IR_RSTORE: 1488 flags = IR_OP3_SHOULD_BE_IN_REG; 1489 break; 1490 case IR_RETURN_INT: 1491 flags = IR_OP2_SHOULD_BE_IN_REG; 1492 constraints->hints[2] = IR_REG_INT_RET1; 1493 constraints->hints_count = 3; 1494 break; 1495 case IR_RETURN_FP: 1496#ifdef IR_REG_FP_RET1 1497 flags = IR_OP2_SHOULD_BE_IN_REG; 1498 constraints->hints[2] = IR_REG_FP_RET1; 1499 constraints->hints_count = 3; 1500#endif 1501 break; 1502 case IR_SNAPSHOT: 1503 flags = 0; 1504 break; 1505 case IR_VA_START: 1506 flags = IR_OP2_MUST_BE_IN_REG; 1507 constraints->tmp_regs[0] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1508 n = 1; 1509 break; 1510 case IR_VA_ARG: 1511 flags = IR_USE_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG; 1512 constraints->tmp_regs[0] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); 1513 n = 1; 1514 break; 1515 case IR_VA_COPY: 1516 flags = IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; 1517 constraints->tmp_regs[0] = IR_TMP_REG(1, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1518 n = 1; 1519 break; 1520 } 1521 constraints->tmps_count = n; 1522 1523 return flags; 1524} 1525 1526/* instruction selection */ 1527static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref); 1528static bool ir_match_try_fuse_load(ir_ctx *ctx, ir_ref ref, ir_ref root); 1529 1530static void ir_swap_ops(ir_insn *insn) 1531{ 1532 SWAP_REFS(insn->op1, insn->op2); 1533} 1534 1535static bool ir_match_try_revert_lea_to_add(ir_ctx *ctx, ir_ref ref) 1536{ 1537 ir_insn *insn = &ctx->ir_base[ref]; 1538 1539 /* TODO: This optimization makes sense only if the other operand is killed */ 1540 if (insn->op1 == insn->op2) { 1541 /* pass */ 1542 } else if (ir_match_try_fuse_load(ctx, insn->op2, ref)) { 1543 ctx->rules[ref] = IR_BINOP_INT | IR_MAY_SWAP; 1544 return 1; 1545 } else if (ir_match_try_fuse_load(ctx, insn->op1, ref)) { 1546 /* swap for better load fusion */ 1547 ir_swap_ops(insn); 1548 ctx->rules[ref] = IR_BINOP_INT | IR_MAY_SWAP; 1549 return 1; 1550 } 1551 return 0; 1552} 1553 1554static void ir_match_fuse_addr(ir_ctx *ctx, ir_ref addr_ref) 1555{ 1556 if (!IR_IS_CONST_REF(addr_ref)) { 1557 uint32_t rule = ctx->rules[addr_ref]; 1558 1559 if (!rule) { 1560 ctx->rules[addr_ref] = rule = ir_match_insn(ctx, addr_ref); 1561 } 1562 if (rule >= IR_LEA_OB && rule <= IR_LEA_SI_B) { 1563 ir_use_list *use_list; 1564 ir_ref j; 1565 1566 if (rule == IR_LEA_IB && ir_match_try_revert_lea_to_add(ctx, addr_ref)) { 1567 return; 1568 } 1569 1570 use_list = &ctx->use_lists[addr_ref]; 1571 j = use_list->count; 1572 if (j > 1) { 1573 /* check if address is used only in LOAD and STORE */ 1574 ir_ref *p = &ctx->use_edges[use_list->refs]; 1575 1576 do { 1577 ir_insn *insn = &ctx->ir_base[*p]; 1578 if (insn->op != IR_LOAD && (insn->op != IR_STORE || insn->op3 == addr_ref)) { 1579 return; 1580 } 1581 p++; 1582 } while (--j); 1583 } 1584 ctx->rules[addr_ref] = IR_FUSED | IR_SIMPLE | rule; 1585 } 1586 } 1587} 1588 1589/* A naive check if there is a STORE or CALL between this LOAD and the fusion root */ 1590static bool ir_match_has_mem_deps(ir_ctx *ctx, ir_ref ref, ir_ref root) 1591{ 1592 if (ref + 1 != root) { 1593 ir_ref pos = ctx->prev_ref[root]; 1594 1595 do { 1596 ir_insn *insn = &ctx->ir_base[pos]; 1597 1598 if (insn->op == IR_STORE) { 1599 // TODO: check if LOAD and STORE addresses may alias 1600 return 1; 1601 } else if (insn->op == IR_CALL) { 1602 return 1; 1603 } 1604 pos = ctx->prev_ref[pos]; 1605 } while (ref != pos); 1606 } 1607 return 0; 1608} 1609 1610static void ir_match_fuse_load(ir_ctx *ctx, ir_ref ref, ir_ref root) 1611{ 1612 if (ir_in_same_block(ctx, ref) 1613 && ctx->ir_base[ref].op == IR_LOAD) { 1614 if (ctx->use_lists[ref].count == 2 1615 && !ir_match_has_mem_deps(ctx, ref, root)) { 1616 ir_ref addr_ref = ctx->ir_base[ref].op2; 1617 ir_insn *addr_insn = &ctx->ir_base[addr_ref]; 1618 1619 if (IR_IS_CONST_REF(addr_ref)) { 1620 if (ir_may_fuse_addr(ctx, addr_insn)) { 1621 ctx->rules[ref] = IR_FUSED | IR_SIMPLE | IR_LOAD; 1622 return; 1623 } 1624 } else { 1625 ctx->rules[ref] = IR_FUSED | IR_SIMPLE | IR_LOAD; 1626 ir_match_fuse_addr(ctx, addr_ref); 1627 return; 1628 } 1629 } 1630 } 1631} 1632 1633static bool ir_match_try_fuse_load(ir_ctx *ctx, ir_ref ref, ir_ref root) 1634{ 1635 ir_insn *insn = &ctx->ir_base[ref]; 1636 1637 if (ir_in_same_block(ctx, ref) 1638 && insn->op == IR_LOAD) { 1639 if (ctx->use_lists[ref].count == 2 1640 && !ir_match_has_mem_deps(ctx, ref, root)) { 1641 ir_ref addr_ref = ctx->ir_base[ref].op2; 1642 ir_insn *addr_insn = &ctx->ir_base[addr_ref]; 1643 1644 if (IR_IS_CONST_REF(addr_ref)) { 1645 if (ir_may_fuse_addr(ctx, addr_insn)) { 1646 ctx->rules[ref] = IR_FUSED | IR_SIMPLE | IR_LOAD; 1647 return 1; 1648 } 1649 } else { 1650 ctx->rules[ref] = IR_FUSED | IR_SIMPLE | IR_LOAD; 1651 ir_match_fuse_addr(ctx, addr_ref); 1652 return 1; 1653 } 1654 } 1655 } else if (insn->op == IR_PARAM) { 1656 if (ctx->use_lists[ref].count == 1 1657 && ir_get_param_reg(ctx, ref) == IR_REG_NONE) { 1658 return 1; 1659 } 1660 } else if (ctx->ir_base[ref].op == IR_VLOAD) { 1661 return 1; 1662 } 1663 return 0; 1664} 1665 1666static void ir_match_fuse_load_commutative_int(ir_ctx *ctx, ir_insn *insn, ir_ref root) 1667{ 1668 if (IR_IS_CONST_REF(insn->op2) 1669 && ir_may_fuse_imm(ctx, &ctx->ir_base[insn->op2])) { 1670 return; 1671 } else if (ir_match_try_fuse_load(ctx, insn->op2, root)) { 1672 return; 1673 } else if (ir_match_try_fuse_load(ctx, insn->op1, root)) { 1674 ir_swap_ops(insn); 1675 } 1676} 1677 1678static void ir_match_fuse_load_commutative_fp(ir_ctx *ctx, ir_insn *insn, ir_ref root) 1679{ 1680 if (!IR_IS_CONST_REF(insn->op2) 1681 && !ir_match_try_fuse_load(ctx, insn->op2, root) 1682 && (IR_IS_CONST_REF(insn->op1) || ir_match_try_fuse_load(ctx, insn->op1, root))) { 1683 ir_swap_ops(insn); 1684 } 1685} 1686 1687static void ir_match_fuse_load_cmp_int(ir_ctx *ctx, ir_insn *insn, ir_ref root) 1688{ 1689 if (IR_IS_CONST_REF(insn->op2) 1690 && ir_may_fuse_imm(ctx, &ctx->ir_base[insn->op2])) { 1691 ir_match_fuse_load(ctx, insn->op1, root); 1692 } else if (!ir_match_try_fuse_load(ctx, insn->op2, root) 1693 && ir_match_try_fuse_load(ctx, insn->op1, root)) { 1694 ir_swap_ops(insn); 1695 if (insn->op != IR_EQ && insn->op != IR_NE) { 1696 insn->op ^= 3; 1697 } 1698 } 1699} 1700 1701static void ir_match_fuse_load_test_int(ir_ctx *ctx, ir_insn *insn, ir_ref root) 1702{ 1703 if (IR_IS_CONST_REF(insn->op2) 1704 && ir_may_fuse_imm(ctx, &ctx->ir_base[insn->op2])) { 1705 ir_match_fuse_load(ctx, insn->op1, root); 1706 } else if (!ir_match_try_fuse_load(ctx, insn->op2, root) 1707 && ir_match_try_fuse_load(ctx, insn->op1, root)) { 1708 ir_swap_ops(insn); 1709 } 1710} 1711 1712static void ir_match_fuse_load_cmp_fp(ir_ctx *ctx, ir_insn *insn, ir_ref root) 1713{ 1714 if (insn->op != IR_EQ && insn->op != IR_NE) { 1715 if (insn->op == IR_LT || insn->op == IR_LE) { 1716 /* swap operands to avoid P flag check */ 1717 ir_swap_ops(insn); 1718 insn->op ^= 3; 1719 } 1720 ir_match_fuse_load(ctx, insn->op2, root); 1721 } else if (IR_IS_CONST_REF(insn->op2) && !IR_IS_FP_ZERO(ctx->ir_base[insn->op2])) { 1722 /* pass */ 1723 } else if (ir_match_try_fuse_load(ctx, insn->op2, root)) { 1724 /* pass */ 1725 } else if ((IR_IS_CONST_REF(insn->op1) && !IR_IS_FP_ZERO(ctx->ir_base[insn->op1])) || ir_match_try_fuse_load(ctx, insn->op1, root)) { 1726 ir_swap_ops(insn); 1727 if (insn->op != IR_EQ && insn->op != IR_NE) { 1728 insn->op ^= 3; 1729 } 1730 } 1731} 1732 1733static void ir_match_fuse_load_cmp_fp_br(ir_ctx *ctx, ir_insn *insn, ir_ref root, bool direct) 1734{ 1735 if (direct) { 1736 if (insn->op == IR_LT || insn->op == IR_LE) { 1737 /* swap operands to avoid P flag check */ 1738 ir_swap_ops(insn); 1739 insn->op ^= 3; 1740 } 1741 } else { 1742 if (insn->op == IR_GT || insn->op == IR_GE) { 1743 /* swap operands to avoid P flag check */ 1744 ir_swap_ops(insn); 1745 insn->op ^= 3; 1746 } 1747 } 1748 if (IR_IS_CONST_REF(insn->op2) && !IR_IS_FP_ZERO(ctx->ir_base[insn->op2])) { 1749 /* pass */ 1750 } else if (ir_match_try_fuse_load(ctx, insn->op2, root)) { 1751 /* pass */ 1752 } else if ((IR_IS_CONST_REF(insn->op1) && !IR_IS_FP_ZERO(ctx->ir_base[insn->op1])) || ir_match_try_fuse_load(ctx, insn->op1, root)) { 1753 ir_swap_ops(insn); 1754 if (insn->op != IR_EQ && insn->op != IR_NE) { 1755 insn->op ^= 3; 1756 } 1757 } 1758} 1759 1760#define STR_EQUAL(name, name_len, str) (name_len == strlen(str) && memcmp(name, str, strlen(str)) == 0) 1761 1762#define IR_IS_FP_FUNC_1(proto, _type) (proto->params_count == 1 && \ 1763 proto->param_types[0] == _type && \ 1764 proto->ret_type == _type) 1765 1766static uint32_t ir_match_builtin_call(ir_ctx *ctx, const ir_insn *func) 1767{ 1768 const ir_proto_t *proto = (const ir_proto_t *)ir_get_str(ctx, func->proto); 1769 1770 if (proto->flags & IR_BUILTIN_FUNC) { 1771 size_t name_len; 1772 const char *name = ir_get_strl(ctx, func->val.name, &name_len); 1773 1774 if (STR_EQUAL(name, name_len, "sqrt")) { 1775 if (IR_IS_FP_FUNC_1(proto, IR_DOUBLE)) { 1776 return IR_SSE_SQRT; 1777 } 1778 } else if (STR_EQUAL(name, name_len, "sqrtf")) { 1779 if (IR_IS_FP_FUNC_1(proto, IR_FLOAT)) { 1780 return IR_SSE_SQRT; 1781 } 1782 } else if (STR_EQUAL(name, name_len, "rint")) { 1783 if (IR_IS_FP_FUNC_1(proto, IR_DOUBLE)) { 1784 return IR_SSE_RINT; 1785 } 1786 } else if (STR_EQUAL(name, name_len, "rintf")) { 1787 if (IR_IS_FP_FUNC_1(proto, IR_FLOAT)) { 1788 return IR_SSE_RINT; 1789 } 1790 } else if (STR_EQUAL(name, name_len, "floor")) { 1791 if (IR_IS_FP_FUNC_1(proto, IR_DOUBLE)) { 1792 return IR_SSE_FLOOR; 1793 } 1794 } else if (STR_EQUAL(name, name_len, "floorf")) { 1795 if (IR_IS_FP_FUNC_1(proto, IR_FLOAT)) { 1796 return IR_SSE_FLOOR; 1797 } 1798 } else if (STR_EQUAL(name, name_len, "ceil")) { 1799 if (IR_IS_FP_FUNC_1(proto, IR_DOUBLE)) { 1800 return IR_SSE_CEIL; 1801 } 1802 } else if (STR_EQUAL(name, name_len, "ceilf")) { 1803 if (IR_IS_FP_FUNC_1(proto, IR_FLOAT)) { 1804 return IR_SSE_CEIL; 1805 } 1806 } else if (STR_EQUAL(name, name_len, "trunc")) { 1807 if (IR_IS_FP_FUNC_1(proto, IR_DOUBLE)) { 1808 return IR_SSE_TRUNC; 1809 } 1810 } else if (STR_EQUAL(name, name_len, "truncf")) { 1811 if (IR_IS_FP_FUNC_1(proto, IR_FLOAT)) { 1812 return IR_SSE_TRUNC; 1813 } 1814 } else if (STR_EQUAL(name, name_len, "nearbyint")) { 1815 if (IR_IS_FP_FUNC_1(proto, IR_DOUBLE)) { 1816 return IR_SSE_NEARBYINT; 1817 } 1818 } else if (STR_EQUAL(name, name_len, "nearbyintf")) { 1819 if (IR_IS_FP_FUNC_1(proto, IR_FLOAT)) { 1820 return IR_SSE_NEARBYINT; 1821 } 1822 } 1823 } 1824 1825 return 0; 1826} 1827 1828static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref) 1829{ 1830 ir_insn *op2_insn; 1831 ir_insn *insn = &ctx->ir_base[ref]; 1832 uint32_t store_rule; 1833 ir_op load_op; 1834 1835 switch (insn->op) { 1836 case IR_EQ: 1837 case IR_NE: 1838 case IR_LT: 1839 case IR_GE: 1840 case IR_LE: 1841 case IR_GT: 1842 case IR_ULT: 1843 case IR_UGE: 1844 case IR_ULE: 1845 case IR_UGT: 1846 if (IR_IS_TYPE_INT(ctx->ir_base[insn->op1].type)) { 1847 if (IR_IS_CONST_REF(insn->op2) 1848 && !IR_IS_SYM_CONST(ctx->ir_base[insn->op2].op) 1849 && ctx->ir_base[insn->op2].val.i64 == 0 1850 && insn->op1 == ref - 1) { /* previous instruction */ 1851 ir_insn *op1_insn = &ctx->ir_base[insn->op1]; 1852 1853 if (op1_insn->op == IR_AND && ctx->use_lists[insn->op1].count == 1) { 1854 /* v = AND(_, _); CMP(v, 0) => SKIP_TEST; TEST */ 1855 ir_match_fuse_load_test_int(ctx, op1_insn, ref); 1856 ctx->rules[insn->op1] = IR_FUSED | IR_TEST_INT; 1857 return IR_TESTCC_INT; 1858 } else if ((op1_insn->op == IR_OR || op1_insn->op == IR_AND || op1_insn->op == IR_XOR) || 1859 /* GT(ADD(_, _), 0) can't be optimized because ADD may overflow */ 1860 ((op1_insn->op == IR_ADD || op1_insn->op == IR_SUB) && 1861 (insn->op == IR_EQ || insn->op == IR_NE))) { 1862 /* v = BINOP(_, _); CMP(v, 0) => BINOP; SETCC */ 1863 if (ir_op_flags[op1_insn->op] & IR_OP_FLAG_COMMUTATIVE) { 1864 ir_match_fuse_load_commutative_int(ctx, op1_insn, ref); 1865 ctx->rules[insn->op1] = IR_BINOP_INT | IR_MAY_SWAP; 1866 } else { 1867 ir_match_fuse_load(ctx, op1_insn->op2, ref); 1868 ctx->rules[insn->op1] = IR_BINOP_INT; 1869 } 1870 return IR_SETCC_INT; 1871 } 1872 } 1873 ir_match_fuse_load_cmp_int(ctx, insn, ref); 1874 return IR_CMP_INT; 1875 } else { 1876 ir_match_fuse_load_cmp_fp(ctx, insn, ref); 1877 return IR_CMP_FP; 1878 } 1879 break; 1880 case IR_ADD: 1881 case IR_SUB: 1882 if (IR_IS_TYPE_INT(insn->type)) { 1883 if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { 1884 op2_insn = &ctx->ir_base[insn->op2]; 1885 if (IR_IS_CONST_REF(insn->op1)) { 1886 // const 1887 // TODO: add support for sym+offset ??? 1888 } else if (IR_IS_SYM_CONST(op2_insn->op)) { 1889 if (insn->op == IR_ADD && ir_may_fuse_addr(ctx, op2_insn)) { 1890 goto lea; 1891 } 1892 /* pass */ 1893 } else if (op2_insn->val.i64 == 0) { 1894 // return IR_COPY_INT; 1895 } else if ((ir_type_size[insn->type] >= 4 && insn->op == IR_ADD && IR_IS_SIGNED_32BIT(op2_insn->val.i64)) || 1896 (ir_type_size[insn->type] >= 4 && insn->op == IR_SUB && IR_IS_SIGNED_NEG_32BIT(op2_insn->val.i64))) { 1897lea: 1898 if (ctx->use_lists[insn->op1].count == 1) { 1899 uint32_t rule = ctx->rules[insn->op1]; 1900 1901 if (!rule) { 1902 ctx->rules[insn->op1] = rule = ir_match_insn(ctx, insn->op1); 1903 } 1904 if (rule == IR_LEA_SI) { 1905 /* z = MUL(Y, 2|4|8) ... ADD(z, imm32) => SKIP ... LEA [Y*2|4|8+im32] */ 1906 ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_SI; 1907 return IR_LEA_SI_O; 1908 } else if (rule == IR_LEA_SIB) { 1909 /* z = ADD(X, MUL(Y, 2|4|8)) ... ADD(z, imm32) => SKIP ... LEA [X+Y*2|4|8+im32] */ 1910 ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_SIB; 1911 return IR_LEA_SIB_O; 1912 } else if (rule == IR_LEA_IB) { 1913 /* z = ADD(X, Y) ... ADD(z, imm32) => SKIP ... LEA [X+Y+im32] */ 1914 ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_IB; 1915 return IR_LEA_IB_O; 1916 } 1917 } 1918 /* ADD(X, imm32) => LEA [X+imm32] */ 1919 return IR_LEA_OB; 1920 } else if (op2_insn->val.i64 == 1 || op2_insn->val.i64 == -1) { 1921 if (insn->op == IR_ADD) { 1922 if (op2_insn->val.i64 == 1) { 1923 /* ADD(_, 1) => INC */ 1924 return IR_INC; 1925 } else { 1926 /* ADD(_, -1) => DEC */ 1927 return IR_DEC; 1928 } 1929 } else { 1930 if (op2_insn->val.i64 == 1) { 1931 /* SUB(_, 1) => DEC */ 1932 return IR_DEC; 1933 } else { 1934 /* SUB(_, -1) => INC */ 1935 return IR_INC; 1936 } 1937 } 1938 } 1939 } else if ((ctx->flags & IR_OPT_CODEGEN) && insn->op == IR_ADD && ir_type_size[insn->type] >= 4) { 1940 if (insn->op1 != insn->op2) { 1941 if (ctx->use_lists[insn->op1].count == 1) { 1942 uint32_t rule =ctx->rules[insn->op1]; 1943 if (!rule) { 1944 ctx->rules[insn->op1] = rule = ir_match_insn(ctx, insn->op1); 1945 } 1946 if (rule == IR_LEA_OB) { 1947 ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_OB; 1948 if (ctx->use_lists[insn->op2].count == 1) { 1949 rule = ctx->rules[insn->op2]; 1950 if (!rule) { 1951 ctx->rules[insn->op2] = rule = ir_match_insn(ctx, insn->op2); 1952 } 1953 if (rule == IR_LEA_SI) { 1954 /* x = ADD(X, imm32) ... y = MUL(Y, 2|4|8) ... ADD(x, y) => SKIP ... SKIP ... LEA */ 1955 ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_LEA_SI; 1956 return IR_LEA_OB_SI; 1957 } 1958 } 1959 /* x = ADD(X, imm32) ... ADD(x, Y) => SKIP ... LEA */ 1960 return IR_LEA_OB_I; 1961 } else if (rule == IR_LEA_SI) { 1962 ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_SI; 1963 if (ctx->use_lists[insn->op2].count == 1) { 1964 rule = ctx->rules[insn->op2]; 1965 if (!rule) { 1966 ctx->rules[insn->op2] = rule = ir_match_insn(ctx, insn->op2); 1967 } 1968 if (rule == IR_LEA_OB) { 1969 /* x = ADD(X, imm32) ... y = MUL(Y, 2|4|8) ... ADD(y, x) => SKIP ... SKIP ... LEA */ 1970 ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_LEA_OB; 1971 return IR_LEA_SI_OB; 1972 } 1973 } 1974 /* x = MUL(X, 2|4|8) ... ADD(x, Y) => SKIP ... LEA */ 1975 return IR_LEA_SI_B; 1976 } 1977 } 1978 if (ctx->use_lists[insn->op2].count == 1) { 1979 uint32_t rule = ctx->rules[insn->op2]; 1980 if (!rule) { 1981 ctx->rules[insn->op2] = rule = ir_match_insn(ctx, insn->op2); 1982 } 1983 if (rule == IR_LEA_OB) { 1984 ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_LEA_OB; 1985 /* x = ADD(X, imm32) ... ADD(Y, x) => SKIP ... LEA */ 1986 return IR_LEA_I_OB; 1987 } else if (rule == IR_LEA_SI) { 1988 ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_LEA_SI; 1989 /* x = MUL(X, 2|4|8) ... ADD(Y, x) => SKIP ... LEA */ 1990 return IR_LEA_B_SI; 1991 } 1992 } 1993 } 1994 /* ADD(X, Y) => LEA [X + Y] */ 1995 return IR_LEA_IB; 1996 } 1997binop_int: 1998 if (ir_op_flags[insn->op] & IR_OP_FLAG_COMMUTATIVE) { 1999 ir_match_fuse_load_commutative_int(ctx, insn, ref); 2000 return IR_BINOP_INT | IR_MAY_SWAP; 2001 } else { 2002 ir_match_fuse_load(ctx, insn->op2, ref); 2003 return IR_BINOP_INT; 2004 } 2005 } else { 2006binop_fp: 2007 if (ir_op_flags[insn->op] & IR_OP_FLAG_COMMUTATIVE) { 2008 ir_match_fuse_load_commutative_fp(ctx, insn, ref); 2009 if (ctx->mflags & IR_X86_AVX) { 2010 return IR_BINOP_AVX; 2011 } else { 2012 return IR_BINOP_SSE2 | IR_MAY_SWAP; 2013 } 2014 } else { 2015 ir_match_fuse_load(ctx, insn->op2, ref); 2016 if (ctx->mflags & IR_X86_AVX) { 2017 return IR_BINOP_AVX; 2018 } else { 2019 return IR_BINOP_SSE2; 2020 } 2021 } 2022 } 2023 break; 2024 case IR_MUL: 2025 if (IR_IS_TYPE_INT(insn->type)) { 2026 if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { 2027 op2_insn = &ctx->ir_base[insn->op2]; 2028 if (IR_IS_SYM_CONST(op2_insn->op)) { 2029 /* pass */ 2030 } else if (IR_IS_CONST_REF(insn->op1)) { 2031 // const 2032 } else if (op2_insn->val.u64 == 0) { 2033 // 0 2034 } else if (op2_insn->val.u64 == 1) { 2035 // return IR_COPY_INT; 2036 } else if (ir_type_size[insn->type] >= 4 && 2037 (op2_insn->val.u64 == 2 || op2_insn->val.u64 == 4 || op2_insn->val.u64 == 8)) { 2038 /* MUL(X, 2|4|8) => LEA [X*2|4|8] */ 2039 return IR_LEA_SI; 2040 } else if (ir_type_size[insn->type] >= 4 && 2041 (op2_insn->val.u64 == 3 || op2_insn->val.u64 == 5 || op2_insn->val.u64 == 9)) { 2042 /* MUL(X, 3|5|9) => LEA [X+X*2|4|8] */ 2043 return IR_LEA_SIB; 2044 } else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) { 2045 /* MUL(X, PWR2) => SHL */ 2046 return IR_MUL_PWR2; 2047 } else if (IR_IS_TYPE_SIGNED(insn->type) 2048 && ir_type_size[insn->type] != 1 2049 && IR_IS_SIGNED_32BIT(op2_insn->val.i64) 2050 && !IR_IS_CONST_REF(insn->op1)) { 2051 /* MUL(_, imm32) => IMUL */ 2052 ir_match_fuse_load(ctx, insn->op1, ref); 2053 return IR_IMUL3; 2054 } 2055 } 2056 /* Prefer IMUL over MUL because it's more flexible and uses less registers ??? */ 2057// if (IR_IS_TYPE_SIGNED(insn->type) && ir_type_size[insn->type] != 1) { 2058 if (ir_type_size[insn->type] != 1) { 2059 goto binop_int; 2060 } 2061 ir_match_fuse_load(ctx, insn->op2, ref); 2062 return IR_MUL_INT; 2063 } else { 2064 goto binop_fp; 2065 } 2066 break; 2067 case IR_ADD_OV: 2068 case IR_SUB_OV: 2069 IR_ASSERT(IR_IS_TYPE_INT(insn->type)); 2070 goto binop_int; 2071 case IR_MUL_OV: 2072 IR_ASSERT(IR_IS_TYPE_INT(insn->type)); 2073 if (IR_IS_TYPE_SIGNED(insn->type) && ir_type_size[insn->type] != 1) { 2074 if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { 2075 op2_insn = &ctx->ir_base[insn->op2]; 2076 if (!IR_IS_SYM_CONST(op2_insn->op) 2077 && IR_IS_SIGNED_32BIT(op2_insn->val.i64) 2078 && !IR_IS_CONST_REF(insn->op1)) { 2079 /* MUL(_, imm32) => IMUL */ 2080 ir_match_fuse_load(ctx, insn->op1, ref); 2081 return IR_IMUL3; 2082 } 2083 } 2084 goto binop_int; 2085 } 2086 ir_match_fuse_load(ctx, insn->op2, ref); 2087 return IR_MUL_INT; 2088 case IR_DIV: 2089 if (IR_IS_TYPE_INT(insn->type)) { 2090 if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { 2091 op2_insn = &ctx->ir_base[insn->op2]; 2092 if (IR_IS_SYM_CONST(op2_insn->op)) { 2093 /* pass */ 2094 } else if (IR_IS_CONST_REF(insn->op1)) { 2095 // const 2096 } else if (op2_insn->val.u64 == 1) { 2097 // return IR_COPY_INT; 2098 } else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) { 2099 /* DIV(X, PWR2) => SHR */ 2100 if (IR_IS_TYPE_UNSIGNED(insn->type)) { 2101 return IR_DIV_PWR2; 2102 } else { 2103 return IR_SDIV_PWR2; 2104 } 2105 } 2106 } 2107 ir_match_fuse_load(ctx, insn->op2, ref); 2108 return IR_DIV_INT; 2109 } else { 2110 goto binop_fp; 2111 } 2112 break; 2113 case IR_MOD: 2114 if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { 2115 op2_insn = &ctx->ir_base[insn->op2]; 2116 if (IR_IS_SYM_CONST(op2_insn->op)) { 2117 /* pass */ 2118 } else if (IR_IS_CONST_REF(insn->op1)) { 2119 // const 2120 } else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) { 2121 /* MOD(X, PWR2) => AND */ 2122 if (IR_IS_TYPE_UNSIGNED(insn->type)) { 2123 return IR_MOD_PWR2; 2124 } else { 2125 return IR_SMOD_PWR2; 2126 } 2127 } 2128 } 2129 ir_match_fuse_load(ctx, insn->op2, ref); 2130 return IR_MOD_INT; 2131 case IR_BSWAP: 2132 case IR_NOT: 2133 if (insn->type == IR_BOOL) { 2134 IR_ASSERT(IR_IS_TYPE_INT(ctx->ir_base[insn->op1].type)); // TODO: IR_BOOL_NOT_FP 2135 return IR_BOOL_NOT_INT; 2136 } else { 2137 IR_ASSERT(IR_IS_TYPE_INT(insn->type)); 2138 return IR_OP_INT; 2139 } 2140 break; 2141 case IR_NEG: 2142 if (IR_IS_TYPE_INT(insn->type)) { 2143 return IR_OP_INT; 2144 } else { 2145 return IR_OP_FP; 2146 } 2147 case IR_ABS: 2148 if (IR_IS_TYPE_INT(insn->type)) { 2149 return IR_ABS_INT; // movl %edi, %eax; negl %eax; cmovs %edi, %eax 2150 } else { 2151 return IR_OP_FP; 2152 } 2153 case IR_OR: 2154 if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { 2155 op2_insn = &ctx->ir_base[insn->op2]; 2156 if (IR_IS_SYM_CONST(op2_insn->op)) { 2157 /* pass */ 2158 } else if (IR_IS_CONST_REF(insn->op1)) { 2159 // const 2160 } else if (op2_insn->val.i64 == 0) { 2161 // return IR_COPY_INT; 2162 } else if (op2_insn->val.i64 == -1) { 2163 // -1 2164 } 2165 } 2166 goto binop_int; 2167 case IR_AND: 2168 if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { 2169 op2_insn = &ctx->ir_base[insn->op2]; 2170 if (IR_IS_SYM_CONST(op2_insn->op)) { 2171 /* pass */ 2172 } else if (IR_IS_CONST_REF(insn->op1)) { 2173 // const 2174 } else if (op2_insn->val.i64 == 0) { 2175 // 0 2176 } else if (op2_insn->val.i64 == -1) { 2177 // return IR_COPY_INT; 2178 } 2179 } 2180 goto binop_int; 2181 case IR_XOR: 2182 if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { 2183 op2_insn = &ctx->ir_base[insn->op2]; 2184 if (IR_IS_SYM_CONST(op2_insn->op)) { 2185 /* pass */ 2186 } else if (IR_IS_CONST_REF(insn->op1)) { 2187 // const 2188 } 2189 } 2190 goto binop_int; 2191 case IR_SHL: 2192 if (IR_IS_CONST_REF(insn->op2)) { 2193 if (ctx->flags & IR_OPT_CODEGEN) { 2194 op2_insn = &ctx->ir_base[insn->op2]; 2195 if (IR_IS_SYM_CONST(op2_insn->op)) { 2196 /* pass */ 2197 } else if (IR_IS_CONST_REF(insn->op1)) { 2198 // const 2199 } else if (op2_insn->val.u64 == 0) { 2200 // return IR_COPY_INT; 2201 } else if (ir_type_size[insn->type] >= 4) { 2202 if (op2_insn->val.u64 == 1) { 2203 // lea [op1*2] 2204 } else if (op2_insn->val.u64 == 2) { 2205 // lea [op1*4] 2206 } else if (op2_insn->val.u64 == 3) { 2207 // lea [op1*8] 2208 } 2209 } 2210 } 2211 return IR_SHIFT_CONST; 2212 } 2213 return IR_SHIFT; 2214 case IR_SHR: 2215 case IR_SAR: 2216 case IR_ROL: 2217 case IR_ROR: 2218 if (IR_IS_CONST_REF(insn->op2)) { 2219 if (ctx->flags & IR_OPT_CODEGEN) { 2220 op2_insn = &ctx->ir_base[insn->op2]; 2221 if (IR_IS_SYM_CONST(op2_insn->op)) { 2222 /* pass */ 2223 } else if (IR_IS_CONST_REF(insn->op1)) { 2224 // const 2225 } else if (op2_insn->val.u64 == 0) { 2226 // return IR_COPY_INT; 2227 } 2228 } 2229 return IR_SHIFT_CONST; 2230 } 2231 return IR_SHIFT; 2232 case IR_MIN: 2233 case IR_MAX: 2234 if (IR_IS_TYPE_INT(insn->type)) { 2235 return IR_MIN_MAX_INT | IR_MAY_SWAP; 2236 } else { 2237 goto binop_fp; 2238 } 2239 break; 2240 case IR_COPY: 2241 if (IR_IS_TYPE_INT(insn->type)) { 2242 return IR_COPY_INT | IR_MAY_REUSE; 2243 } else { 2244 return IR_COPY_FP | IR_MAY_REUSE; 2245 } 2246 break; 2247 case IR_CALL: 2248 if (IR_IS_CONST_REF(insn->op2)) { 2249 const ir_insn *func = &ctx->ir_base[insn->op2]; 2250 2251 if (func->op == IR_FUNC && func->proto) { 2252 uint32_t rule = ir_match_builtin_call(ctx, func); 2253 2254 if (rule) { 2255 return rule; 2256 } 2257 } 2258 } 2259 ctx->flags2 |= IR_HAS_CALLS | IR_16B_FRAME_ALIGNMENT; 2260#ifndef IR_REG_FP_RET1 2261 if (IR_IS_TYPE_FP(insn->type)) { 2262 ctx->flags2 |= IR_HAS_FP_RET_SLOT; 2263 } 2264#endif 2265 IR_FALLTHROUGH; 2266 case IR_TAILCALL: 2267 case IR_IJMP: 2268 ir_match_fuse_load(ctx, insn->op2, ref); 2269 return insn->op; 2270 case IR_VAR: 2271 return IR_SKIPPED | IR_VAR; 2272 case IR_PARAM: 2273 return ctx->use_lists[ref].count > 0 ? IR_PARAM : IR_SKIPPED | IR_PARAM; 2274 case IR_ALLOCA: 2275 /* alloca() may be used only in functions */ 2276 if (ctx->flags & IR_FUNCTION) { 2277 if (IR_IS_CONST_REF(insn->op2) && ctx->cfg_map[ref] == 1) { 2278 ir_insn *val = &ctx->ir_base[insn->op2]; 2279 2280 if (!IR_IS_SYM_CONST(val->op)) { 2281 return IR_STATIC_ALLOCA; 2282 } 2283 } 2284 ctx->flags |= IR_USE_FRAME_POINTER; 2285 ctx->flags2 |= IR_HAS_ALLOCA | IR_16B_FRAME_ALIGNMENT; 2286 } 2287 return IR_ALLOCA; 2288 case IR_VSTORE: 2289 if (IR_IS_TYPE_INT(ctx->ir_base[insn->op3].type)) { 2290 store_rule = IR_VSTORE_INT; 2291 load_op = IR_VLOAD; 2292store_int: 2293 if ((ctx->flags & IR_OPT_CODEGEN) 2294 && ir_in_same_block(ctx, insn->op3) 2295 && (ctx->use_lists[insn->op3].count == 1 || 2296 (ctx->use_lists[insn->op3].count == 2 2297 && (ctx->ir_base[insn->op3].op == IR_ADD_OV || 2298 ctx->ir_base[insn->op3].op == IR_SUB_OV)))) { 2299 ir_insn *op_insn = &ctx->ir_base[insn->op3]; 2300 uint32_t rule = ctx->rules[insn->op3]; 2301 2302 if (!rule) { 2303 ctx->rules[insn->op3] = rule = ir_match_insn(ctx, insn->op3); 2304 } 2305 if (((rule & IR_RULE_MASK) == IR_BINOP_INT && op_insn->op != IR_MUL) || rule == IR_LEA_OB || rule == IR_LEA_IB) { 2306 if (insn->op1 == op_insn->op1 2307 && ctx->ir_base[op_insn->op1].op == load_op 2308 && ctx->ir_base[op_insn->op1].op2 == insn->op2 2309 && ctx->use_lists[op_insn->op1].count == 2) { 2310 /* l = LOAD(_, a) ... v = BINOP(l, _) ... STORE(l, a, v) => SKIP ... SKIP_MEM_BINOP ... MEM_BINOP */ 2311 ctx->rules[insn->op3] = IR_FUSED | IR_BINOP_INT; 2312 ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; 2313 if (!IR_IS_CONST_REF(op_insn->op2) 2314 && ctx->rules[op_insn->op2] == (IR_FUSED|IR_SIMPLE|IR_LOAD)) { 2315 ctx->rules[op_insn->op2] = IR_LOAD_INT; 2316 } 2317 return IR_MEM_BINOP_INT; 2318 } else if ((ir_op_flags[op_insn->op] & IR_OP_FLAG_COMMUTATIVE) 2319 && insn->op1 == op_insn->op2 2320 && ctx->ir_base[op_insn->op2].op == load_op 2321 && ctx->ir_base[op_insn->op2].op2 == insn->op2 2322 && ctx->use_lists[op_insn->op2].count == 2) { 2323 /* l = LOAD(_, a) ... v = BINOP(_, l) ... STORE(l, a, v) => SKIP ... SKIP_MEM_BINOP ... MEM_BINOP */ 2324 ir_swap_ops(op_insn); 2325 ctx->rules[insn->op3] = IR_FUSED | IR_BINOP_INT; 2326 ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; 2327 return IR_MEM_BINOP_INT; 2328 } 2329 } else if (rule == IR_INC) { 2330 if (insn->op1 == op_insn->op1 2331 && ctx->ir_base[op_insn->op1].op == load_op 2332 && ctx->ir_base[op_insn->op1].op2 == insn->op2 2333 && ctx->use_lists[op_insn->op1].count == 2) { 2334 /* l = LOAD(_, a) ... v = INC(l) ... STORE(l, a, v) => SKIP ... SKIP ... MEM_INC */ 2335 ctx->rules[insn->op3] = IR_SKIPPED | IR_INC; 2336 ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; 2337 return IR_MEM_INC; 2338 } 2339 } else if (rule == IR_DEC) { 2340 if (insn->op1 == op_insn->op1 2341 && ctx->ir_base[op_insn->op1].op == load_op 2342 && ctx->ir_base[op_insn->op1].op2 == insn->op2 2343 && ctx->use_lists[op_insn->op1].count == 2){ 2344 /* l = LOAD(_, a) ... v = DEC(l) ... STORE(l, a, v) => SKIP ... SKIP ... MEM_DEC */ 2345 ctx->rules[insn->op3] = IR_SKIPPED | IR_DEC; 2346 ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; 2347 return IR_MEM_DEC; 2348 } 2349 } else if (rule == IR_MUL_PWR2) { 2350 if (insn->op1 == op_insn->op1 2351 && ctx->ir_base[op_insn->op1].op == load_op 2352 && ctx->ir_base[op_insn->op1].op2 == insn->op2 2353 && ctx->use_lists[op_insn->op1].count == 2) { 2354 /* l = LOAD(_, a) ... v = MUL_PWR2(l) ... STORE(l, a, v) => SKIP ... SKIP ... MEM_MUL_PWR2 */ 2355 ctx->rules[insn->op3] = IR_SKIPPED | IR_MUL_PWR2; 2356 ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; 2357 return IR_MEM_MUL_PWR2; 2358 } 2359 } else if (rule == IR_DIV_PWR2) { 2360 if (insn->op1 == op_insn->op1 2361 && ctx->ir_base[op_insn->op1].op == load_op 2362 && ctx->ir_base[op_insn->op1].op2 == insn->op2 2363 && ctx->use_lists[op_insn->op1].count == 2) { 2364 /* l = LOAD(_, a) ... v = DIV_PWR2(l) ... STORE(l, a, v) => SKIP ... SKIP ... MEM_DIV_PWR2 */ 2365 ctx->rules[insn->op3] = IR_SKIPPED | IR_DIV_PWR2; 2366 ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; 2367 return IR_MEM_DIV_PWR2; 2368 } 2369 } else if (rule == IR_MOD_PWR2) { 2370 if (insn->op1 == op_insn->op1 2371 && ctx->ir_base[op_insn->op1].op == load_op 2372 && ctx->ir_base[op_insn->op1].op2 == insn->op2 2373 && ctx->use_lists[op_insn->op1].count == 2) { 2374 /* l = LOAD(_, a) ... v = MOD_PWR2(l) ... STORE(l, a, v) => SKIP ... SKIP ... MEM_MOD_PWR2 */ 2375 ctx->rules[insn->op3] = IR_SKIPPED | IR_MOD_PWR2; 2376 ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; 2377 return IR_MEM_MOD_PWR2; 2378 } 2379 } else if (rule == IR_SHIFT) { 2380 if (insn->op1 == op_insn->op1 2381 && ctx->ir_base[op_insn->op1].op == load_op 2382 && ctx->ir_base[op_insn->op1].op2 == insn->op2 2383 && ctx->use_lists[op_insn->op1].count == 2) { 2384 /* l = LOAD(_, a) ... v = SHIFT(l, _) ... STORE(l, a, v) => SKIP ... SKIP_SHIFT ... MEM_SHIFT */ 2385 ctx->rules[insn->op3] = IR_FUSED | IR_SHIFT; 2386 ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; 2387 return IR_MEM_SHIFT; 2388 } 2389 } else if (rule == IR_SHIFT_CONST) { 2390 if (insn->op1 == op_insn->op1 2391 && ctx->ir_base[op_insn->op1].op == load_op 2392 && ctx->ir_base[op_insn->op1].op2 == insn->op2 2393 && ctx->use_lists[op_insn->op1].count == 2) { 2394 /* l = LOAD(_, a) ... v = SHIFT(l, CONST) ... STORE(l, a, v) => SKIP ... SKIP ... MEM_SHIFT_CONST */ 2395 ctx->rules[insn->op3] = IR_SKIPPED | IR_SHIFT_CONST; 2396 ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; 2397 return IR_MEM_SHIFT_CONST; 2398 } 2399 } else if (rule == IR_OP_INT && op_insn->op != IR_BSWAP) { 2400 if (insn->op1 == op_insn->op1 2401 && ctx->ir_base[op_insn->op1].op == load_op 2402 && ctx->ir_base[op_insn->op1].op2 == insn->op2 2403 && ctx->use_lists[op_insn->op1].count == 2) { 2404 /* l = LOAD(_, a) ... v = OP(l) ... STORE(l, a, v) => SKIP ... SKIP ... MEM_OP */ 2405 ctx->rules[insn->op3] = IR_SKIPPED | IR_OP_INT; 2406 ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; 2407 return IR_MEM_OP_INT; 2408 } 2409 } else if (rule == IR_CMP_INT && load_op == IR_LOAD) { 2410 /* c = CMP(_, _) ... STORE(c) => SKIP_CMP ... CMP_AND_STORE_INT */ 2411 ctx->rules[insn->op3] = IR_FUSED | IR_CMP_INT; 2412 return IR_CMP_AND_STORE_INT; 2413 } 2414 } 2415 return store_rule; 2416 } else { 2417 return IR_VSTORE_FP; 2418 } 2419 break; 2420 case IR_LOAD: 2421 ir_match_fuse_addr(ctx, insn->op2); 2422 if (IR_IS_TYPE_INT(insn->type)) { 2423 return IR_LOAD_INT; 2424 } else { 2425 return IR_LOAD_FP; 2426 } 2427 break; 2428 case IR_STORE: 2429 ir_match_fuse_addr(ctx, insn->op2); 2430 if (IR_IS_TYPE_INT(ctx->ir_base[insn->op3].type)) { 2431 store_rule = IR_STORE_INT; 2432 load_op = IR_LOAD; 2433 goto store_int; 2434 } else { 2435 return IR_STORE_FP; 2436 } 2437 break; 2438 case IR_RLOAD: 2439 if (IR_REGSET_IN(IR_REGSET_UNION((ir_regset)ctx->fixed_regset, IR_REGSET_FIXED), insn->op2)) { 2440 return IR_SKIPPED | IR_RLOAD; 2441 } 2442 return IR_RLOAD; 2443 case IR_RSTORE: 2444 if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { 2445 if ((ctx->flags & IR_OPT_CODEGEN) 2446 && ir_in_same_block(ctx, insn->op2) 2447 && ctx->use_lists[insn->op2].count == 1 2448 && IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { 2449 ir_insn *op_insn = &ctx->ir_base[insn->op2]; 2450 2451 if (op_insn->op == IR_ADD || 2452 op_insn->op == IR_SUB || 2453// op_insn->op == IR_MUL || 2454 op_insn->op == IR_OR || 2455 op_insn->op == IR_AND || 2456 op_insn->op == IR_XOR) { 2457 if (insn->op1 == op_insn->op1 2458 && ctx->ir_base[op_insn->op1].op == IR_RLOAD 2459 && ctx->ir_base[op_insn->op1].op2 == insn->op3 2460 && ctx->use_lists[op_insn->op1].count == 2) { 2461 /* l = RLOAD(r) ... v = BINOP(l, _) ... RSTORE(l, r, v) => SKIP ... SKIP_REG_BINOP ... REG_BINOP */ 2462 ctx->rules[insn->op2] = IR_FUSED | IR_BINOP_INT; 2463 ctx->rules[op_insn->op1] = IR_SKIPPED | IR_RLOAD; 2464 return IR_REG_BINOP_INT; 2465 } else if ((ir_op_flags[op_insn->op] & IR_OP_FLAG_COMMUTATIVE) 2466 && insn->op1 == op_insn->op2 2467 && ctx->ir_base[op_insn->op2].op == IR_RLOAD 2468 && ctx->ir_base[op_insn->op2].op2 == insn->op3 2469 && ctx->use_lists[op_insn->op2].count == 2) { 2470 /* l = RLOAD(r) ... v = BINOP(x, l) ... RSTORE(l, r, v) => SKIP ... SKIP_REG_BINOP ... REG_BINOP */ 2471 ir_swap_ops(op_insn); 2472 ctx->rules[insn->op2] = IR_FUSED | IR_BINOP_INT; 2473 ctx->rules[op_insn->op1] = IR_SKIPPED | IR_RLOAD; 2474 return IR_REG_BINOP_INT; 2475 } 2476 } 2477 } 2478 } 2479 ir_match_fuse_load(ctx, insn->op2, ref); 2480 return IR_RSTORE; 2481 case IR_START: 2482 case IR_BEGIN: 2483 case IR_IF_TRUE: 2484 case IR_IF_FALSE: 2485 case IR_CASE_VAL: 2486 case IR_CASE_DEFAULT: 2487 case IR_MERGE: 2488 case IR_LOOP_BEGIN: 2489 case IR_UNREACHABLE: 2490 return IR_SKIPPED | insn->op; 2491 case IR_RETURN: 2492 if (!insn->op2) { 2493 return IR_RETURN_VOID; 2494 } else if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { 2495 return IR_RETURN_INT; 2496 } else { 2497 return IR_RETURN_FP; 2498 } 2499 case IR_IF: 2500 if (ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) { 2501 op2_insn = &ctx->ir_base[insn->op2]; 2502 if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT) { 2503 if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) { 2504 if (IR_IS_CONST_REF(op2_insn->op2) 2505 && !IR_IS_SYM_CONST(ctx->ir_base[op2_insn->op2].op) 2506 && ctx->ir_base[op2_insn->op2].val.i64 == 0 2507 && op2_insn->op1 == insn->op2 - 1) { /* previous instruction */ 2508 ir_insn *op1_insn = &ctx->ir_base[op2_insn->op1]; 2509 2510 if (op1_insn->op == IR_AND && ctx->use_lists[op2_insn->op1].count == 1) { 2511 /* v = AND(_, _); c = CMP(v, 0) ... IF(c) => SKIP_TEST; SKIP ... TEST_AND_BRANCH */ 2512 ir_match_fuse_load_test_int(ctx, op1_insn, ref); 2513 ctx->rules[op2_insn->op1] = IR_FUSED | IR_TEST_INT; 2514 ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_NOP; 2515 return IR_TEST_AND_BRANCH_INT; 2516 } else if (insn->op2 == ref - 1 && /* previous instruction */ 2517 ((op1_insn->op == IR_OR || op1_insn->op == IR_AND || op1_insn->op == IR_XOR) || 2518 /* GT(ADD(_, _), 0) can't be optimized because ADD may overflow */ 2519 ((op1_insn->op == IR_ADD || op1_insn->op == IR_SUB) && 2520 (op2_insn->op == IR_EQ || op2_insn->op == IR_NE)))) { 2521 /* v = BINOP(_, _); c = CMP(v, 0) ... IF(c) => BINOP; SKIP_CMP ... JCC */ 2522 if (ir_op_flags[op1_insn->op] & IR_OP_FLAG_COMMUTATIVE) { 2523 ir_match_fuse_load_commutative_int(ctx, op1_insn, ref); 2524 ctx->rules[op2_insn->op1] = IR_BINOP_INT | IR_MAY_SWAP; 2525 } else { 2526 ir_match_fuse_load(ctx, op1_insn->op2, ref); 2527 ctx->rules[op2_insn->op1] = IR_BINOP_INT; 2528 } 2529 ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT; 2530 return IR_JCC_INT; 2531 } 2532 } 2533 /* c = CMP(_, _) ... IF(c) => SKIP_CMP ... CMP_AND_BRANCH */ 2534 ir_match_fuse_load_cmp_int(ctx, op2_insn, ref); 2535 ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT; 2536 return IR_CMP_AND_BRANCH_INT; 2537 } else { 2538 /* c = CMP(_, _) ... IF(c) => SKIP_CMP ... CMP_AND_BRANCH */ 2539 ir_match_fuse_load_cmp_fp_br(ctx, op2_insn, ref, 1); 2540 ctx->rules[insn->op2] = IR_FUSED | IR_CMP_FP; 2541 return IR_CMP_AND_BRANCH_FP; 2542 } 2543 } else if (op2_insn->op == IR_AND) { 2544 /* c = AND(_, _) ... IF(c) => SKIP_TEST ... TEST_AND_BRANCH */ 2545 ir_match_fuse_load_test_int(ctx, op2_insn, ref); 2546 ctx->rules[insn->op2] = IR_FUSED | IR_TEST_INT; 2547 return IR_TEST_AND_BRANCH_INT; 2548 } else if (op2_insn->op == IR_OVERFLOW) { 2549 /* c = OVERFLOW(_) ... IF(c) => SKIP_OVERFLOW ... OVERFLOW_AND_BRANCH */ 2550 ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_OVERFLOW; 2551 return IR_OVERFLOW_AND_BRANCH; 2552 } 2553 } 2554 if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { 2555 if (insn->op2 == ref - 1 /* previous instruction */ 2556 && ir_in_same_block(ctx, insn->op2)) { 2557 op2_insn = &ctx->ir_base[insn->op2]; 2558 if (op2_insn->op == IR_ADD || 2559 op2_insn->op == IR_SUB || 2560// op2_insn->op == IR_MUL || 2561 op2_insn->op == IR_OR || 2562 op2_insn->op == IR_AND || 2563 op2_insn->op == IR_XOR) { 2564 2565 /* v = BINOP(_, _); IF(v) => BINOP; JCC */ 2566 if (ir_op_flags[op2_insn->op] & IR_OP_FLAG_COMMUTATIVE) { 2567 ir_match_fuse_load_commutative_int(ctx, op2_insn, ref); 2568 ctx->rules[insn->op2] = IR_BINOP_INT | IR_MAY_SWAP; 2569 } else { 2570 ir_match_fuse_load(ctx, op2_insn->op2, ref); 2571 ctx->rules[insn->op2] = IR_BINOP_INT; 2572 } 2573 return IR_JCC_INT; 2574 } 2575 } else if ((ctx->flags & IR_OPT_CODEGEN) 2576 && insn->op1 == ref - 1 /* previous instruction */ 2577 && insn->op2 == ref - 2 /* previous instruction */ 2578 && ir_in_same_block(ctx, insn->op2) 2579 && ctx->use_lists[insn->op2].count == 2 2580 && IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { 2581 ir_insn *store_insn = &ctx->ir_base[insn->op1]; 2582 2583 if (store_insn->op == IR_STORE && store_insn->op3 == insn->op2) { 2584 ir_insn *op_insn = &ctx->ir_base[insn->op2]; 2585 2586 if (op_insn->op == IR_ADD || 2587 op_insn->op == IR_SUB || 2588// op_insn->op == IR_MUL || 2589 op_insn->op == IR_OR || 2590 op_insn->op == IR_AND || 2591 op_insn->op == IR_XOR) { 2592 if (ctx->ir_base[op_insn->op1].op == IR_LOAD 2593 && ctx->ir_base[op_insn->op1].op2 == store_insn->op2) { 2594 if (ir_in_same_block(ctx, op_insn->op1) 2595 && ctx->use_lists[op_insn->op1].count == 2 2596 && store_insn->op1 == op_insn->op1) { 2597 /* v = MEM_BINOP(_, _); IF(v) => MEM_BINOP; JCC */ 2598 ctx->rules[insn->op2] = IR_FUSED | IR_BINOP_INT; 2599 ctx->rules[op_insn->op1] = IR_SKIPPED | IR_LOAD; 2600 ir_match_fuse_addr(ctx, store_insn->op2); 2601 ctx->rules[insn->op1] = IR_MEM_BINOP_INT; 2602 return IR_JCC_INT; 2603 } 2604 } else if ((ir_op_flags[op_insn->op] & IR_OP_FLAG_COMMUTATIVE) 2605 && ctx->ir_base[op_insn->op2].op == IR_LOAD 2606 && ctx->ir_base[op_insn->op2].op2 == store_insn->op2) { 2607 if (ir_in_same_block(ctx, op_insn->op2) 2608 && ctx->use_lists[op_insn->op2].count == 2 2609 && store_insn->op1 == op_insn->op2) { 2610 /* v = MEM_BINOP(_, _); IF(v) => MEM_BINOP; JCC */ 2611 ir_swap_ops(op_insn); 2612 ctx->rules[insn->op2] = IR_FUSED | IR_BINOP_INT; 2613 ctx->rules[op_insn->op1] = IR_SKIPPED | IR_LOAD; 2614 ir_match_fuse_addr(ctx, store_insn->op2); 2615 ctx->rules[insn->op1] = IR_MEM_BINOP_INT; 2616 return IR_JCC_INT; 2617 } 2618 } 2619 } 2620 } 2621 } 2622 ir_match_fuse_load(ctx, insn->op2, ref); 2623 return IR_IF_INT; 2624 } else { 2625 IR_ASSERT(0 && "NIY IR_IF_FP"); 2626 break; 2627 } 2628 case IR_COND: 2629 if (ir_in_same_block(ctx, insn->op1) && ctx->use_lists[insn->op1].count == 1) { 2630 ir_insn *op1_insn = &ctx->ir_base[insn->op1]; 2631 2632 if (op1_insn->op >= IR_EQ && op1_insn->op <= IR_UGT) { 2633 if (IR_IS_TYPE_INT(ctx->ir_base[op1_insn->op1].type)) { 2634 ir_match_fuse_load_cmp_int(ctx, op1_insn, ref); 2635 ctx->rules[insn->op1] = IR_FUSED | IR_CMP_INT; 2636 return IR_COND_CMP_INT; 2637 } else { 2638 ir_match_fuse_load_cmp_fp_br(ctx, op1_insn, ref, 1); 2639 ctx->rules[insn->op1] = IR_FUSED | IR_CMP_FP; 2640 return IR_COND_CMP_FP; 2641 } 2642 } 2643 } 2644 return IR_COND; 2645 case IR_GUARD: 2646 case IR_GUARD_NOT: 2647 if (ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) { 2648 op2_insn = &ctx->ir_base[insn->op2]; 2649 if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT 2650 // TODO: register allocator may clobber operands of CMP before they are used in the GUARD_CMP 2651 && (insn->op2 == ref - 1 || 2652 (insn->op2 == ctx->prev_ref[ref] - 1 2653 && ctx->ir_base[ctx->prev_ref[ref]].op == IR_SNAPSHOT))) { 2654 if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) { 2655 if (IR_IS_CONST_REF(op2_insn->op2) 2656 && !IR_IS_SYM_CONST(ctx->ir_base[op2_insn->op2].op) 2657 && ctx->ir_base[op2_insn->op2].val.i64 == 0) { 2658 if (op2_insn->op1 == insn->op2 - 1) { /* previous instruction */ 2659 ir_insn *op1_insn = &ctx->ir_base[op2_insn->op1]; 2660 2661 if ((op1_insn->op == IR_OR || op1_insn->op == IR_AND || op1_insn->op == IR_XOR) || 2662 /* GT(ADD(_, _), 0) can't be optimized because ADD may overflow */ 2663 ((op1_insn->op == IR_ADD || op1_insn->op == IR_SUB) && 2664 (op2_insn->op == IR_EQ || op2_insn->op == IR_NE))) { 2665 if (ir_op_flags[op1_insn->op] & IR_OP_FLAG_COMMUTATIVE) { 2666 ir_match_fuse_load_commutative_int(ctx, op1_insn, ref); 2667 ctx->rules[op2_insn->op1] = IR_BINOP_INT | IR_MAY_SWAP; 2668 } else { 2669 ir_match_fuse_load(ctx, op1_insn->op2, ref); 2670 ctx->rules[op2_insn->op1] = IR_BINOP_INT; 2671 } 2672 /* v = BINOP(_, _); c = CMP(v, 0) ... IF(c) => BINOP; SKIP_CMP ... GUARD_JCC */ 2673 ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT; 2674 return IR_GUARD_JCC_INT; 2675 } 2676 } else if ((ctx->flags & IR_OPT_CODEGEN) 2677 && op2_insn->op1 == insn->op2 - 2 /* before previous instruction */ 2678 && ir_in_same_block(ctx, op2_insn->op1) 2679 && ctx->use_lists[op2_insn->op1].count == 2) { 2680 ir_insn *store_insn = &ctx->ir_base[insn->op2 - 1]; 2681 2682 if (store_insn->op == IR_STORE && store_insn->op3 == op2_insn->op1) { 2683 ir_insn *op_insn = &ctx->ir_base[op2_insn->op1]; 2684 2685 if ((op_insn->op == IR_OR || op_insn->op == IR_AND || op_insn->op == IR_XOR) || 2686 /* GT(ADD(_, _), 0) can't be optimized because ADD may overflow */ 2687 ((op_insn->op == IR_ADD || op_insn->op == IR_SUB) && 2688 (op2_insn->op == IR_EQ || op2_insn->op == IR_NE))) { 2689 if (ctx->ir_base[op_insn->op1].op == IR_LOAD 2690 && ctx->ir_base[op_insn->op1].op2 == store_insn->op2) { 2691 if (ir_in_same_block(ctx, op_insn->op1) 2692 && ctx->use_lists[op_insn->op1].count == 2 2693 && store_insn->op1 == op_insn->op1) { 2694 /* v = MEM_BINOP(_, _); IF(v) => MEM_BINOP; GUARD_JCC */ 2695 ctx->rules[op2_insn->op1] = IR_FUSED | IR_BINOP_INT; 2696 ctx->rules[op_insn->op1] = IR_SKIPPED | IR_LOAD; 2697 ir_match_fuse_addr(ctx, store_insn->op2); 2698 ctx->rules[insn->op2 - 1] = IR_MEM_BINOP_INT; 2699 ctx->rules[insn->op2] = IR_SKIPPED | IR_NOP; 2700 return IR_GUARD_JCC_INT; 2701 } 2702 } else if ((ir_op_flags[op_insn->op] & IR_OP_FLAG_COMMUTATIVE) 2703 && ctx->ir_base[op_insn->op2].op == IR_LOAD 2704 && ctx->ir_base[op_insn->op2].op2 == store_insn->op2) { 2705 if (ir_in_same_block(ctx, op_insn->op2) 2706 && ctx->use_lists[op_insn->op2].count == 2 2707 && store_insn->op1 == op_insn->op2) { 2708 /* v = MEM_BINOP(_, _); IF(v) => MEM_BINOP; JCC */ 2709 ir_swap_ops(op_insn); 2710 ctx->rules[op2_insn->op1] = IR_FUSED | IR_BINOP_INT; 2711 ctx->rules[op_insn->op1] = IR_SKIPPED | IR_LOAD; 2712 ir_match_fuse_addr(ctx, store_insn->op2); 2713 ctx->rules[insn->op2 - 1] = IR_MEM_BINOP_INT; 2714 ctx->rules[insn->op2] = IR_SKIPPED | IR_NOP; 2715 return IR_GUARD_JCC_INT; 2716 } 2717 } 2718 } 2719 } 2720 } 2721 } 2722 /* c = CMP(_, _) ... GUARD(c) => SKIP_CMP ... GUARD_CMP */ 2723 ir_match_fuse_load_cmp_int(ctx, op2_insn, ref); 2724 ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT; 2725 return IR_GUARD_CMP_INT; 2726 } else { 2727 /* c = CMP(_, _) ... GUARD(c) => SKIP_CMP ... GUARD_CMP */ 2728 ir_match_fuse_load_cmp_fp_br(ctx, op2_insn, ref, insn->op == IR_GUARD_NOT); 2729 ctx->rules[insn->op2] = IR_FUSED | IR_CMP_FP; 2730 return IR_GUARD_CMP_FP; 2731 } 2732 } else if (op2_insn->op == IR_AND) { // TODO: OR, XOR. etc 2733 /* c = AND(_, _) ... GUARD(c) => SKIP_TEST ... GUARD_TEST */ 2734 ir_match_fuse_load_test_int(ctx, op2_insn, ref); 2735 ctx->rules[insn->op2] = IR_FUSED | IR_TEST_INT; 2736 return IR_GUARD_TEST_INT; 2737 } else if (op2_insn->op == IR_OVERFLOW) { 2738 /* c = OVERFLOW(_) ... GUARD(c) => SKIP_OVERFLOW ... GUARD_OVERFLOW */ 2739 ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_OVERFLOW; 2740 return IR_GUARD_OVERFLOW; 2741 } 2742 } 2743 ir_match_fuse_load(ctx, insn->op2, ref); 2744 return insn->op; 2745 case IR_INT2FP: 2746 if (ir_type_size[ctx->ir_base[insn->op1].type] > (IR_IS_TYPE_SIGNED(ctx->ir_base[insn->op1].type) ? 2 : 4)) { 2747 ir_match_fuse_load(ctx, insn->op1, ref); 2748 } 2749 return insn->op; 2750 case IR_SEXT: 2751 case IR_ZEXT: 2752 case IR_FP2INT: 2753 case IR_FP2FP: 2754 ir_match_fuse_load(ctx, insn->op1, ref); 2755 return insn->op; 2756 case IR_TRUNC: 2757 case IR_PROTO: 2758 ir_match_fuse_load(ctx, insn->op1, ref); 2759 return insn->op | IR_MAY_REUSE; 2760 case IR_BITCAST: 2761 ir_match_fuse_load(ctx, insn->op1, ref); 2762 if (IR_IS_TYPE_INT(insn->type) && IR_IS_TYPE_INT(ctx->ir_base[insn->op1].type)) { 2763 return insn->op | IR_MAY_REUSE; 2764 } else { 2765 return insn->op; 2766 } 2767 case IR_CTLZ: 2768 case IR_CTTZ: 2769 ir_match_fuse_load(ctx, insn->op1, ref); 2770 return IR_BIT_COUNT; 2771 case IR_CTPOP: 2772 ir_match_fuse_load(ctx, insn->op1, ref); 2773 return (ctx->mflags & IR_X86_BMI1) ? IR_BIT_COUNT : IR_CTPOP; 2774 case IR_VA_START: 2775 ctx->flags2 |= IR_HAS_VA_START; 2776 if ((ctx->ir_base[insn->op2].op == IR_ALLOCA) || (ctx->ir_base[insn->op2].op == IR_VADDR)) { 2777 ir_use_list *use_list = &ctx->use_lists[insn->op2]; 2778 ir_ref *p, n = use_list->count; 2779 for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { 2780 ir_insn *use_insn = &ctx->ir_base[*p]; 2781 if (use_insn->op == IR_VA_START || use_insn->op == IR_VA_END) { 2782 } else if (use_insn->op == IR_VA_COPY) { 2783 if (use_insn->op3 == insn->op2) { 2784 ctx->flags2 |= IR_HAS_VA_COPY; 2785 } 2786 } else if (use_insn->op == IR_VA_ARG) { 2787 if (use_insn->op2 == insn->op2) { 2788 if (IR_IS_TYPE_INT(use_insn->type)) { 2789 ctx->flags2 |= IR_HAS_VA_ARG_GP; 2790 } else { 2791 IR_ASSERT(IR_IS_TYPE_FP(use_insn->type)); 2792 ctx->flags2 |= IR_HAS_VA_ARG_FP; 2793 } 2794 } 2795 } else if (*p > ref) { 2796 /* diriect va_list access */ 2797 ctx->flags2 |= IR_HAS_VA_ARG_GP|IR_HAS_VA_ARG_FP; 2798 } 2799 } 2800 } 2801 return IR_VA_START; 2802 case IR_VA_END: 2803 return IR_SKIPPED | IR_NOP; 2804 case IR_VADDR: 2805 if (ctx->use_lists[ref].count > 0) { 2806 ir_use_list *use_list = &ctx->use_lists[ref]; 2807 ir_ref *p, n = use_list->count; 2808 2809 for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { 2810 if (ctx->ir_base[*p].op != IR_VA_END) { 2811 return IR_STATIC_ALLOCA; 2812 } 2813 } 2814 } 2815 return IR_SKIPPED | IR_NOP; 2816 default: 2817 break; 2818 } 2819 2820 return insn->op; 2821} 2822 2823static void ir_match_insn2(ir_ctx *ctx, ir_ref ref, uint32_t rule) 2824{ 2825 if (rule == IR_LEA_IB) { 2826 ir_match_try_revert_lea_to_add(ctx, ref); 2827 } 2828} 2829 2830/* code generation */ 2831static int32_t ir_ref_spill_slot_offset(ir_ctx *ctx, ir_ref ref, ir_reg *reg) 2832{ 2833 int32_t offset; 2834 2835 IR_ASSERT(ref >= 0 && ctx->vregs[ref] && ctx->live_intervals[ctx->vregs[ref]]); 2836 offset = ctx->live_intervals[ctx->vregs[ref]]->stack_spill_pos; 2837 IR_ASSERT(offset != -1); 2838 if (ctx->live_intervals[ctx->vregs[ref]]->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) { 2839 IR_ASSERT(ctx->spill_base != IR_REG_NONE); 2840 *reg = ctx->spill_base; 2841 return offset; 2842 } 2843 *reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 2844 return IR_SPILL_POS_TO_OFFSET(offset); 2845} 2846 2847static ir_mem ir_vreg_spill_slot(ir_ctx *ctx, ir_ref v) 2848{ 2849 int32_t offset; 2850 ir_reg base; 2851 2852 IR_ASSERT(v > 0 && v <= ctx->vregs_count && ctx->live_intervals[v]); 2853 offset = ctx->live_intervals[v]->stack_spill_pos; 2854 IR_ASSERT(offset != -1); 2855 if (ctx->live_intervals[v]->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) { 2856 IR_ASSERT(ctx->spill_base != IR_REG_NONE); 2857 return IR_MEM_BO(ctx->spill_base, offset); 2858 } 2859 base = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 2860 offset = IR_SPILL_POS_TO_OFFSET(offset); 2861 return IR_MEM_BO(base, offset); 2862} 2863 2864static ir_mem ir_ref_spill_slot(ir_ctx *ctx, ir_ref ref) 2865{ 2866 IR_ASSERT(!IR_IS_CONST_REF(ref)); 2867 return ir_vreg_spill_slot(ctx, ctx->vregs[ref]); 2868} 2869 2870static bool ir_is_same_spill_slot(ir_ctx *ctx, ir_ref ref, ir_mem mem) 2871{ 2872 ir_mem m = ir_ref_spill_slot(ctx, ref); 2873 return IR_MEM_VAL(m) == IR_MEM_VAL(mem); 2874} 2875 2876static ir_mem ir_var_spill_slot(ir_ctx *ctx, ir_ref ref) 2877{ 2878 ir_insn *var_insn = &ctx->ir_base[ref]; 2879 ir_reg reg; 2880 2881 IR_ASSERT(var_insn->op == IR_VAR); 2882 reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 2883 return IR_MEM_BO(reg, IR_SPILL_POS_TO_OFFSET(var_insn->op3)); 2884} 2885 2886static bool ir_may_avoid_spill_load(ir_ctx *ctx, ir_ref ref, ir_ref use) 2887{ 2888 ir_live_interval *ival; 2889 2890 IR_ASSERT(ctx->vregs[ref] && ctx->live_intervals[ctx->vregs[ref]]); 2891 ival = ctx->live_intervals[ctx->vregs[ref]]; 2892 while (ival) { 2893 ir_use_pos *use_pos = ival->use_pos; 2894 while (use_pos) { 2895 if (IR_LIVE_POS_TO_REF(use_pos->pos) == use) { 2896 return !use_pos->next || use_pos->next->op_num == 0; 2897 } 2898 use_pos = use_pos->next; 2899 } 2900 ival = ival->next; 2901 } 2902 return 0; 2903} 2904 2905static void ir_emit_mov_imm_int(ir_ctx *ctx, ir_type type, ir_reg reg, int64_t val) 2906{ 2907 ir_backend_data *data = ctx->data; 2908 dasm_State **Dst = &data->dasm_state; 2909 2910 if (ir_type_size[type] == 8) { 2911 IR_ASSERT(sizeof(void*) == 8); 2912|.if X64 2913 if (IR_IS_UNSIGNED_32BIT(val)) { 2914 | mov Rd(reg), (uint32_t)val // zero extended load 2915 } else if (IR_IS_SIGNED_32BIT(val)) { 2916 | mov Rq(reg), (int32_t)val // sign extended load 2917 } else if (type == IR_ADDR && IR_MAY_USE_32BIT_ADDR(ctx->code_buffer, (intptr_t)val)) { 2918 | lea Ra(reg), [&val] 2919 } else { 2920 | mov64 Ra(reg), val 2921 } 2922|.endif 2923 } else { 2924 | ASM_REG_IMM_OP mov, type, reg, (int32_t)val // sign extended load 2925 } 2926} 2927 2928static void ir_emit_load_imm_int(ir_ctx *ctx, ir_type type, ir_reg reg, int64_t val) 2929{ 2930 ir_backend_data *data = ctx->data; 2931 dasm_State **Dst = &data->dasm_state; 2932 2933 IR_ASSERT(IR_IS_TYPE_INT(type)); 2934 if (val == 0) { 2935 | ASM_REG_REG_OP xor, type, reg, reg 2936 } else { 2937 ir_emit_mov_imm_int(ctx, type, reg, val); 2938 } 2939} 2940 2941static void ir_emit_load_mem_int(ir_ctx *ctx, ir_type type, ir_reg reg, ir_mem mem) 2942{ 2943 ir_backend_data *data = ctx->data; 2944 dasm_State **Dst = &data->dasm_state; 2945 2946 | ASM_REG_MEM_OP mov, type, reg, mem 2947} 2948 2949static void ir_emit_load_imm_fp(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src) 2950{ 2951 ir_backend_data *data = ctx->data; 2952 dasm_State **Dst = &data->dasm_state; 2953 ir_insn *insn = &ctx->ir_base[src]; 2954 int label; 2955 2956 if (type == IR_FLOAT && insn->val.u32 == 0) { 2957 if (ctx->mflags & IR_X86_AVX) { 2958 | vxorps xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST) 2959 } else { 2960 | xorps xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST) 2961 } 2962 } else if (type == IR_DOUBLE && insn->val.u64 == 0) { 2963 if (ctx->mflags & IR_X86_AVX) { 2964 | vxorpd xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST) 2965 } else { 2966 | xorpd xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST) 2967 } 2968 } else { 2969 label = ir_const_label(ctx, src); 2970 | ASM_FP_REG_TXT_OP movs, type, reg, [=>label] 2971 } 2972} 2973 2974static void ir_emit_load_mem_fp(ir_ctx *ctx, ir_type type, ir_reg reg, ir_mem mem) 2975{ 2976 ir_backend_data *data = ctx->data; 2977 dasm_State **Dst = &data->dasm_state; 2978 2979 | ASM_FP_REG_MEM_OP movs, type, reg, mem 2980} 2981 2982static void ir_emit_load_mem(ir_ctx *ctx, ir_type type, ir_reg reg, ir_mem mem) 2983{ 2984 if (IR_IS_TYPE_INT(type)) { 2985 ir_emit_load_mem_int(ctx, type, reg, mem); 2986 } else { 2987 ir_emit_load_mem_fp(ctx, type, reg, mem); 2988 } 2989} 2990 2991static void ir_load_local_addr(ir_ctx *ctx, ir_reg reg, ir_ref src) 2992{ 2993 ir_backend_data *data = ctx->data; 2994 dasm_State **Dst = &data->dasm_state; 2995 ir_reg base = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 2996 int32_t offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[src].op3); 2997 2998 IR_ASSERT(ir_rule(ctx, src) == IR_STATIC_ALLOCA); 2999 if (offset == 0) { 3000 | mov Ra(reg), Ra(base) 3001 } else { 3002 | lea Ra(reg), [Ra(base)+offset] 3003 } 3004} 3005 3006static void ir_emit_load(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src) 3007{ 3008 if (IR_IS_CONST_REF(src)) { 3009 if (IR_IS_TYPE_INT(type)) { 3010 ir_insn *insn = &ctx->ir_base[src]; 3011 3012 if (insn->op == IR_SYM || insn->op == IR_FUNC) { 3013 void *addr = ir_sym_val(ctx, insn); 3014 ir_emit_load_imm_int(ctx, type, reg, (intptr_t)addr); 3015 } else if (insn->op == IR_STR) { 3016 ir_backend_data *data = ctx->data; 3017 dasm_State **Dst = &data->dasm_state; 3018 int label = ir_const_label(ctx, src); 3019 3020 | lea Ra(reg), aword [=>label] 3021 } else { 3022 ir_emit_load_imm_int(ctx, type, reg, insn->val.i64); 3023 } 3024 } else { 3025 ir_emit_load_imm_fp(ctx, type, reg, src); 3026 } 3027 } else if (ctx->vregs[src]) { 3028 ir_emit_load_mem(ctx, type, reg, ir_ref_spill_slot(ctx, src)); 3029 } else { 3030 ir_load_local_addr(ctx, reg, src); 3031 } 3032} 3033 3034static void ir_emit_store_mem_int(ir_ctx *ctx, ir_type type, ir_mem mem, ir_reg reg) 3035{ 3036 ir_backend_data *data = ctx->data; 3037 dasm_State **Dst = &data->dasm_state; 3038 3039 | ASM_MEM_REG_OP mov, type, mem, reg 3040} 3041 3042static void ir_emit_store_mem_fp(ir_ctx *ctx, ir_type type, ir_mem mem, ir_reg reg) 3043{ 3044 ir_backend_data *data = ctx->data; 3045 dasm_State **Dst = &data->dasm_state; 3046 3047 | ASM_FP_MEM_REG_OP movs, type, mem, reg 3048} 3049 3050static void ir_emit_store_mem_imm(ir_ctx *ctx, ir_type type, ir_mem mem, int32_t imm) 3051{ 3052 ir_backend_data *data = ctx->data; 3053 dasm_State **Dst = &data->dasm_state; 3054 3055 | ASM_MEM_IMM_OP mov, type, mem, imm 3056} 3057 3058static void ir_emit_store_mem_int_const(ir_ctx *ctx, ir_type type, ir_mem mem, ir_ref src, ir_reg tmp_reg, bool is_arg) 3059{ 3060 ir_backend_data *data = ctx->data; 3061 dasm_State **Dst = &data->dasm_state; 3062 ir_insn *val_insn = &ctx->ir_base[src]; 3063 3064 IR_ASSERT(IR_IS_CONST_REF(src)); 3065 if (val_insn->op == IR_STR) { 3066 int label = ir_const_label(ctx, src); 3067 3068 IR_ASSERT(tmp_reg != IR_REG_NONE); 3069|.if X64 3070 | lea Ra(tmp_reg), aword [=>label] 3071|| ir_emit_store_mem_int(ctx, type, mem, tmp_reg); 3072|.else 3073 | ASM_TMEM_TXT_OP mov, aword, mem, =>label 3074|.endif 3075 } else { 3076 int64_t val = val_insn->val.i64; 3077 3078 if (val_insn->op == IR_FUNC || val_insn->op == IR_SYM) { 3079 val = (int64_t)(intptr_t)ir_sym_val(ctx, val_insn); 3080 } 3081 3082 if (sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(val)) { 3083 if (is_arg && ir_type_size[type] < 4) { 3084 type = IR_U32; 3085 } 3086 ir_emit_store_mem_imm(ctx, type, mem, val); 3087 } else { 3088 IR_ASSERT(tmp_reg != IR_REG_NONE); 3089 tmp_reg = IR_REG_NUM(tmp_reg); 3090 ir_emit_load_imm_int(ctx, type, tmp_reg, val); 3091 ir_emit_store_mem_int(ctx, type, mem, tmp_reg); 3092 } 3093 } 3094} 3095 3096static void ir_emit_store_mem_fp_const(ir_ctx *ctx, ir_type type, ir_mem mem, ir_ref src, ir_reg tmp_reg, ir_reg tmp_fp_reg) 3097{ 3098 ir_val *val = &ctx->ir_base[src].val; 3099 3100 if (type == IR_FLOAT) { 3101 ir_emit_store_mem_imm(ctx, IR_U32, mem, val->i32); 3102 } else if (sizeof(void*) == 8 && val->i64 == 0) { 3103 ir_emit_store_mem_imm(ctx, IR_U64, mem, 0); 3104 } else if (sizeof(void*) == 8 && tmp_reg != IR_REG_NONE) { 3105 ir_emit_load_imm_int(ctx, IR_U64, tmp_reg, val->i64); 3106 ir_emit_store_mem_int(ctx, IR_U64, mem, tmp_reg); 3107 } else { 3108 tmp_fp_reg = IR_REG_NUM(tmp_fp_reg); 3109 ir_emit_load(ctx, type, tmp_fp_reg, src); 3110 ir_emit_store_mem_fp(ctx, IR_DOUBLE, mem, tmp_fp_reg); 3111 } 3112} 3113 3114static void ir_emit_store_mem(ir_ctx *ctx, ir_type type, ir_mem mem, ir_reg reg) 3115{ 3116 if (IR_IS_TYPE_INT(type)) { 3117 ir_emit_store_mem_int(ctx, type, mem, reg); 3118 } else { 3119 ir_emit_store_mem_fp(ctx, type, mem, reg); 3120 } 3121} 3122 3123static void ir_emit_store(ir_ctx *ctx, ir_type type, ir_ref dst, ir_reg reg) 3124{ 3125 IR_ASSERT(dst >= 0); 3126 ir_emit_store_mem(ctx, type, ir_ref_spill_slot(ctx, dst), reg); 3127} 3128 3129static void ir_emit_mov(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src) 3130{ 3131 ir_backend_data *data = ctx->data; 3132 dasm_State **Dst = &data->dasm_state; 3133 3134 | ASM_REG_REG_OP mov, type, dst, src 3135} 3136 3137#define IR_HAVE_SWAP_INT 3138 3139static void ir_emit_swap(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src) 3140{ 3141 ir_backend_data *data = ctx->data; 3142 dasm_State **Dst = &data->dasm_state; 3143 3144 | ASM_REG_REG_OP xchg, type, dst, src 3145} 3146 3147static void ir_emit_mov_ext(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src) 3148{ 3149 ir_backend_data *data = ctx->data; 3150 dasm_State **Dst = &data->dasm_state; 3151 3152 if (ir_type_size[type] > 2) { 3153 | ASM_REG_REG_OP mov, type, dst, src 3154 } else if (ir_type_size[type] == 2) { 3155 if (IR_IS_TYPE_SIGNED(type)) { 3156 | movsx Rd(dst), Rw(src) 3157 } else { 3158 | movzx Rd(dst), Rw(src) 3159 } 3160 } else /* if (ir_type_size[type] == 1) */ { 3161 if (IR_IS_TYPE_SIGNED(type)) { 3162 | movsx Rd(dst), Rb(src) 3163 } else { 3164 | movzx Rd(dst), Rb(src) 3165 } 3166 } 3167} 3168 3169static void ir_emit_fp_mov(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src) 3170{ 3171 ir_backend_data *data = ctx->data; 3172 dasm_State **Dst = &data->dasm_state; 3173 3174 | ASM_FP_REG_REG_OP movap, type, dst, src 3175} 3176 3177static ir_mem ir_fuse_addr_const(ir_ctx *ctx, ir_ref ref) 3178{ 3179 ir_mem mem; 3180 ir_insn *addr_insn = &ctx->ir_base[ref]; 3181 3182 IR_ASSERT(IR_IS_CONST_REF(ref)); 3183 if (IR_IS_SYM_CONST(addr_insn->op)) { 3184 void *addr = ir_sym_val(ctx, addr_insn); 3185 IR_ASSERT(sizeof(void*) == 4 || IR_IS_SIGNED_32BIT((intptr_t)addr)); 3186 mem = IR_MEM_O((int32_t)(intptr_t)addr); 3187 } else { 3188 IR_ASSERT(sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(addr_insn->val.i64)); 3189 mem = IR_MEM_O(addr_insn->val.i32); 3190 } 3191 return mem; 3192} 3193 3194static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref) 3195{ 3196 uint32_t rule = ctx->rules[ref]; 3197 ir_insn *insn = &ctx->ir_base[ref]; 3198 ir_insn *op1_insn, *op2_insn, *offset_insn; 3199 ir_ref base_reg_ref, index_reg_ref; 3200 ir_reg base_reg = IR_REG_NONE, index_reg; 3201 int32_t offset = 0, scale; 3202 3203 IR_ASSERT(((rule & IR_RULE_MASK) >= IR_LEA_OB && 3204 (rule & IR_RULE_MASK) <= IR_LEA_SI_B) || 3205 rule == IR_STATIC_ALLOCA); 3206 switch (rule & IR_RULE_MASK) { 3207 default: 3208 IR_ASSERT(0); 3209 case IR_LEA_OB: 3210 offset_insn = insn; 3211 if (ir_rule(ctx, insn->op1) == IR_STATIC_ALLOCA) { 3212 offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op1].op3); 3213 base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 3214 base_reg_ref = IR_UNUSED; 3215 } else { 3216 base_reg_ref = ref * sizeof(ir_ref) + 1; 3217 } 3218 index_reg_ref = IR_UNUSED; 3219 scale = 1; 3220 break; 3221 case IR_LEA_SI: 3222 scale = ctx->ir_base[insn->op2].val.i32; 3223 index_reg_ref = ref * sizeof(ir_ref) + 1; 3224 base_reg_ref = IR_UNUSED; 3225 offset_insn = NULL; 3226 break; 3227 case IR_LEA_SIB: 3228 base_reg_ref = index_reg_ref = ref * sizeof(ir_ref) + 1; 3229 scale = ctx->ir_base[insn->op2].val.i32 - 1; 3230 offset_insn = NULL; 3231 break; 3232 case IR_LEA_IB: 3233 if (ir_rule(ctx, insn->op1) == IR_STATIC_ALLOCA) { 3234 offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op1].op3); 3235 base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 3236 base_reg_ref = IR_UNUSED; 3237 index_reg_ref = ref * sizeof(ir_ref) + 2; 3238 } else if (ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) { 3239 offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); 3240 base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 3241 base_reg_ref = IR_UNUSED; 3242 index_reg_ref = ref * sizeof(ir_ref) + 1; 3243 } else { 3244 base_reg_ref = ref * sizeof(ir_ref) + 1; 3245 index_reg_ref = ref * sizeof(ir_ref) + 2; 3246 } 3247 offset_insn = NULL; 3248 scale = 1; 3249 break; 3250 case IR_LEA_OB_I: 3251 op1_insn = &ctx->ir_base[insn->op1]; 3252 offset_insn = op1_insn; 3253 scale = 1; 3254 if (ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) { 3255 offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); 3256 base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 3257 base_reg_ref = IR_UNUSED; 3258 index_reg_ref = insn->op1 * sizeof(ir_ref) + 1; 3259 } else if (ir_rule(ctx, op1_insn->op1) == IR_STATIC_ALLOCA) { 3260 offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[op1_insn->op1].op3); 3261 base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 3262 base_reg_ref = IR_UNUSED; 3263 index_reg_ref = ref * sizeof(ir_ref) + 2; 3264 } else { 3265 base_reg_ref = insn->op1 * sizeof(ir_ref) + 1; 3266 index_reg_ref = ref * sizeof(ir_ref) + 2; 3267 } 3268 break; 3269 case IR_LEA_I_OB: 3270 op2_insn = &ctx->ir_base[insn->op2]; 3271 offset_insn = op2_insn; 3272 scale = 1; 3273 if (ir_rule(ctx, insn->op1) == IR_STATIC_ALLOCA) { 3274 offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op1].op3); 3275 base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 3276 base_reg_ref = IR_UNUSED; 3277 index_reg_ref = insn->op2 * sizeof(ir_ref) + 1; 3278 } else if (ir_rule(ctx, op2_insn->op1) == IR_STATIC_ALLOCA) { 3279 offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[op2_insn->op1].op3); 3280 base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 3281 base_reg_ref = IR_UNUSED; 3282 index_reg_ref = ref * sizeof(ir_ref) + 1; 3283 } else { 3284 base_reg_ref = ref * sizeof(ir_ref) + 1; 3285 index_reg_ref = insn->op2 * sizeof(ir_ref) + 1; 3286 } 3287 break; 3288 case IR_LEA_SI_O: 3289 index_reg_ref = insn->op1 * sizeof(ir_ref) + 1; 3290 op1_insn = &ctx->ir_base[insn->op1]; 3291 scale = ctx->ir_base[op1_insn->op2].val.i32; 3292 offset_insn = insn; 3293 base_reg_ref = IR_UNUSED; 3294 break; 3295 case IR_LEA_SIB_O: 3296 base_reg_ref = index_reg_ref = insn->op1 * sizeof(ir_ref) + 1; 3297 op1_insn = &ctx->ir_base[insn->op1]; 3298 scale = ctx->ir_base[op1_insn->op2].val.i32 - 1; 3299 offset_insn = insn; 3300 break; 3301 case IR_LEA_IB_O: 3302 base_reg_ref = insn->op1 * sizeof(ir_ref) + 1; 3303 index_reg_ref = insn->op1 * sizeof(ir_ref) + 2; 3304 offset_insn = insn; 3305 scale = 1; 3306 break; 3307 case IR_LEA_OB_SI: 3308 index_reg_ref = insn->op2 * sizeof(ir_ref) + 1; 3309 op1_insn = &ctx->ir_base[insn->op1]; 3310 offset_insn = op1_insn; 3311 op2_insn = &ctx->ir_base[insn->op2]; 3312 scale = ctx->ir_base[op2_insn->op2].val.i32; 3313 if (ir_rule(ctx, op1_insn->op1) == IR_STATIC_ALLOCA) { 3314 offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[op1_insn->op1].op3); 3315 base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 3316 base_reg_ref = IR_UNUSED; 3317 } else { 3318 base_reg_ref = insn->op1 * sizeof(ir_ref) + 1; 3319 } 3320 break; 3321 case IR_LEA_SI_OB: 3322 index_reg_ref = insn->op1 * sizeof(ir_ref) + 1; 3323 op1_insn = &ctx->ir_base[insn->op1]; 3324 scale = ctx->ir_base[op1_insn->op2].val.i32; 3325 op2_insn = &ctx->ir_base[insn->op2]; 3326 offset_insn = op2_insn; 3327 if (ir_rule(ctx, op2_insn->op1) == IR_STATIC_ALLOCA) { 3328 offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[op2_insn->op1].op3); 3329 base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 3330 base_reg_ref = IR_UNUSED; 3331 } else { 3332 base_reg_ref = insn->op2 * sizeof(ir_ref) + 1; 3333 } 3334 break; 3335 case IR_LEA_B_SI: 3336 if (ir_rule(ctx, insn->op1) == IR_STATIC_ALLOCA) { 3337 offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op1].op3); 3338 base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 3339 base_reg_ref = IR_UNUSED; 3340 } else { 3341 base_reg_ref = ref * sizeof(ir_ref) + 1; 3342 } 3343 index_reg_ref = insn->op2 * sizeof(ir_ref) + 1; 3344 op2_insn = &ctx->ir_base[insn->op2]; 3345 scale = ctx->ir_base[op2_insn->op2].val.i32; 3346 offset_insn = NULL; 3347 break; 3348 case IR_LEA_SI_B: 3349 index_reg_ref = insn->op1 * sizeof(ir_ref) + 1; 3350 if (ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) { 3351 offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); 3352 base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 3353 base_reg_ref = IR_UNUSED; 3354 } else { 3355 base_reg_ref = ref * sizeof(ir_ref) + 2; 3356 } 3357 op1_insn = &ctx->ir_base[insn->op1]; 3358 scale = ctx->ir_base[op1_insn->op2].val.i32; 3359 offset_insn = NULL; 3360 break; 3361 case IR_ALLOCA: 3362 offset = IR_SPILL_POS_TO_OFFSET(insn->op3); 3363 base_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 3364 base_reg_ref = index_reg_ref = IR_UNUSED; 3365 scale = 1; 3366 offset_insn = NULL; 3367 break; 3368 } 3369 3370 if (offset_insn) { 3371 ir_insn *addr_insn = &ctx->ir_base[offset_insn->op2]; 3372 3373 if (IR_IS_SYM_CONST(addr_insn->op)) { 3374 void *addr = ir_sym_val(ctx, addr_insn); 3375 IR_ASSERT(sizeof(void*) != 8 || IR_IS_SIGNED_32BIT((intptr_t)addr)); 3376 offset += (int64_t)(intptr_t)(addr); 3377 } else { 3378 if (offset_insn->op == IR_SUB) { 3379 offset = -addr_insn->val.i32; 3380 } else { 3381 offset += addr_insn->val.i32; 3382 } 3383 } 3384 } 3385 3386 if (base_reg_ref) { 3387 if (UNEXPECTED(ctx->rules[base_reg_ref / sizeof(ir_ref)] & IR_FUSED_REG)) { 3388 base_reg = ir_get_fused_reg(ctx, root, base_reg_ref); 3389 } else { 3390 base_reg = ((int8_t*)ctx->regs)[base_reg_ref]; 3391 } 3392 IR_ASSERT(base_reg != IR_REG_NONE); 3393 if (IR_REG_SPILLED(base_reg)) { 3394 base_reg = IR_REG_NUM(base_reg); 3395 ir_emit_load(ctx, insn->type, base_reg, ((ir_ref*)ctx->ir_base)[base_reg_ref]); 3396 } 3397 } 3398 3399 index_reg = IR_REG_NONE; 3400 if (index_reg_ref) { 3401 if (base_reg_ref 3402 && ((ir_ref*)ctx->ir_base)[index_reg_ref] 3403 == ((ir_ref*)ctx->ir_base)[base_reg_ref]) { 3404 index_reg = base_reg; 3405 } else { 3406 if (UNEXPECTED(ctx->rules[index_reg_ref / sizeof(ir_ref)] & IR_FUSED_REG)) { 3407 index_reg = ir_get_fused_reg(ctx, root, index_reg_ref); 3408 } else { 3409 index_reg = ((int8_t*)ctx->regs)[index_reg_ref]; 3410 } 3411 IR_ASSERT(index_reg != IR_REG_NONE); 3412 if (IR_REG_SPILLED(index_reg)) { 3413 index_reg = IR_REG_NUM(index_reg); 3414 ir_emit_load(ctx, insn->type, index_reg, ((ir_ref*)ctx->ir_base)[index_reg_ref]); 3415 } 3416 } 3417 } 3418 3419 return IR_MEM(base_reg, offset, index_reg, scale); 3420} 3421 3422static ir_mem ir_fuse_mem(ir_ctx *ctx, ir_ref root, ir_ref ref, ir_insn *mem_insn, ir_reg reg) 3423{ 3424 if (reg != IR_REG_NONE) { 3425 if (IR_REG_SPILLED(reg)) { 3426 reg = IR_REG_NUM(reg); 3427 ir_emit_load(ctx, IR_ADDR, reg, mem_insn->op2); 3428 } 3429 return IR_MEM_B(reg); 3430 } else if (IR_IS_CONST_REF(mem_insn->op2)) { 3431 return ir_fuse_addr_const(ctx, mem_insn->op2); 3432 } else { 3433 return ir_fuse_addr(ctx, root, mem_insn->op2); 3434 } 3435} 3436 3437static ir_mem ir_fuse_load(ir_ctx *ctx, ir_ref root, ir_ref ref) 3438{ 3439 ir_insn *load_insn = &ctx->ir_base[ref]; 3440 ir_reg reg; 3441 3442 IR_ASSERT(load_insn->op == IR_LOAD); 3443 if (UNEXPECTED(ctx->rules[ref] & IR_FUSED_REG)) { 3444 reg = ir_get_fused_reg(ctx, root, ref * sizeof(ir_ref) + 2); 3445 } else { 3446 reg = ctx->regs[ref][2]; 3447 } 3448 return ir_fuse_mem(ctx, root, ref, load_insn, reg); 3449} 3450 3451static int32_t ir_fuse_imm(ir_ctx *ctx, ir_ref ref) 3452{ 3453 ir_insn *val_insn = &ctx->ir_base[ref]; 3454 3455 IR_ASSERT(IR_IS_CONST_REF(ref)); 3456 if (IR_IS_SYM_CONST(val_insn->op)) { 3457 void *addr = ir_sym_val(ctx, val_insn); 3458 IR_ASSERT(IR_IS_SIGNED_32BIT((intptr_t)addr)); 3459 return (int32_t)(intptr_t)addr; 3460 } else { 3461 IR_ASSERT(IR_IS_SIGNED_32BIT(val_insn->val.i32)); 3462 return val_insn->val.i32; 3463 } 3464} 3465 3466static void ir_emit_load_ex(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src, ir_ref root) 3467{ 3468 if (IR_IS_CONST_REF(src)) { 3469 if (IR_IS_TYPE_INT(type)) { 3470 ir_insn *insn = &ctx->ir_base[src]; 3471 3472 if (insn->op == IR_SYM || insn->op == IR_FUNC) { 3473 void *addr = ir_sym_val(ctx, insn); 3474 ir_emit_load_imm_int(ctx, type, reg, (intptr_t)addr); 3475 } else if (insn->op == IR_STR) { 3476 ir_backend_data *data = ctx->data; 3477 dasm_State **Dst = &data->dasm_state; 3478 int label = ir_const_label(ctx, src); 3479 3480 | lea Ra(reg), aword [=>label] 3481 } else { 3482 ir_emit_load_imm_int(ctx, type, reg, insn->val.i64); 3483 } 3484 } else { 3485 ir_emit_load_imm_fp(ctx, type, reg, src); 3486 } 3487 } else if (ir_rule(ctx, src) == IR_STATIC_ALLOCA) { 3488 ir_load_local_addr(ctx, reg, src); 3489 } else { 3490 ir_mem mem; 3491 3492 if (ir_rule(ctx, src) & IR_FUSED) { 3493 mem = ir_fuse_load(ctx, root, src); 3494 } else { 3495 mem = ir_ref_spill_slot(ctx, src); 3496 } 3497 ir_emit_load_mem(ctx, type, reg, mem); 3498 } 3499} 3500 3501static void ir_emit_prologue(ir_ctx *ctx) 3502{ 3503 ir_backend_data *data = ctx->data; 3504 dasm_State **Dst = &data->dasm_state; 3505 int offset = ctx->stack_frame_size + ctx->call_stack_size; 3506 3507 if (ctx->flags & IR_USE_FRAME_POINTER) { 3508 | push Ra(IR_REG_RBP) 3509 | mov Ra(IR_REG_RBP), Ra(IR_REG_RSP) 3510 } 3511 if (IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_GP)) { 3512 int i; 3513 ir_regset used_preserved_regs = IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_GP); 3514 3515 for (i = IR_REG_GP_FIRST; i <= IR_REG_GP_LAST; i++) { 3516 if (IR_REGSET_IN(used_preserved_regs, i)) { 3517 offset -= sizeof(void*); 3518 | push Ra(i) 3519 } 3520 } 3521 } 3522 if (ctx->stack_frame_size + ctx->call_stack_size) { 3523 if (ctx->fixed_stack_red_zone) { 3524 IR_ASSERT(ctx->stack_frame_size + ctx->call_stack_size <= ctx->fixed_stack_red_zone); 3525 } else if (offset) { 3526 | sub Ra(IR_REG_RSP), offset 3527 } 3528 } 3529 if (IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_FP)) { 3530 ir_reg fp; 3531 int i; 3532 ir_regset used_preserved_regs = IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_FP); 3533 3534 if (ctx->flags & IR_USE_FRAME_POINTER) { 3535 fp = IR_REG_FRAME_POINTER; 3536 offset -= ctx->stack_frame_size + ctx->call_stack_size; 3537 } else { 3538 fp = IR_REG_STACK_POINTER; 3539 } 3540 for (i = IR_REG_FP_FIRST; i <= IR_REG_FP_LAST; i++) { 3541 if (IR_REGSET_IN(used_preserved_regs, i)) { 3542 offset -= sizeof(void*); 3543 if (ctx->mflags & IR_X86_AVX) { 3544 | vmovsd qword [Ra(fp)+offset], xmm(i-IR_REG_FP_FIRST) 3545 } else { 3546 | movsd qword [Ra(fp)+offset], xmm(i-IR_REG_FP_FIRST) 3547 } 3548 } 3549 } 3550 } 3551 if ((ctx->flags & IR_VARARG_FUNC) && (ctx->flags2 & IR_HAS_VA_START)) { 3552#if defined(_WIN64) 3553 ir_reg fp; 3554 int offset; 3555 3556 if (ctx->flags & IR_USE_FRAME_POINTER) { 3557 fp = IR_REG_FRAME_POINTER; 3558 offset = sizeof(void*) * 2; 3559 } else { 3560 fp = IR_REG_STACK_POINTER; 3561 offset = ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*); 3562 } 3563 | mov [Ra(fp)+offset], Ra(IR_REG_INT_ARG1) 3564 | mov [Ra(fp)+offset+8], Ra(IR_REG_INT_ARG2) 3565 | mov [Ra(fp)+offset+16], Ra(IR_REG_INT_ARG3) 3566 | mov [Ra(fp)+offset+24], Ra(IR_REG_INT_ARG4) 3567#elif defined(IR_TARGET_X64) 3568|.if X64 3569 const int8_t *int_reg_params = _ir_int_reg_params; 3570 const int8_t *fp_reg_params = _ir_fp_reg_params; 3571 uint32_t i; 3572 ir_reg fp; 3573 int offset; 3574 3575 if (ctx->flags & IR_USE_FRAME_POINTER) { 3576 fp = IR_REG_FRAME_POINTER; 3577 3578 offset = -(ctx->stack_frame_size - ctx->stack_frame_alignment - ctx->locals_area_size); 3579 } else { 3580 fp = IR_REG_STACK_POINTER; 3581 offset = ctx->locals_area_size + ctx->call_stack_size; 3582 } 3583 3584 if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) { 3585 /* skip named args */ 3586 offset += sizeof(void*) * ctx->gp_reg_params; 3587 for (i = ctx->gp_reg_params; i < IR_REG_INT_ARGS; i++) { 3588 | mov qword [Ra(fp)+offset], Rq(int_reg_params[i]) 3589 offset += sizeof(void*); 3590 } 3591 } 3592 if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) { 3593 | test al, al 3594 | je >1 3595 /* skip named args */ 3596 offset += 16 * ctx->fp_reg_params; 3597 for (i = ctx->fp_reg_params; i < IR_REG_FP_ARGS; i++) { 3598 | movaps [Ra(fp)+offset], xmm(fp_reg_params[i]-IR_REG_FP_FIRST) 3599 offset += 16; 3600 } 3601 |1: 3602 } 3603|.endif 3604#endif 3605 } 3606} 3607 3608static void ir_emit_epilogue(ir_ctx *ctx) 3609{ 3610 ir_backend_data *data = ctx->data; 3611 dasm_State **Dst = &data->dasm_state; 3612 3613 if (IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_FP)) { 3614 int i; 3615 int offset; 3616 ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 3617 ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs; 3618 3619 if (ctx->flags & IR_USE_FRAME_POINTER) { 3620 fp = IR_REG_FRAME_POINTER; 3621 offset = 0; 3622 } else { 3623 fp = IR_REG_STACK_POINTER; 3624 offset = ctx->stack_frame_size + ctx->call_stack_size; 3625 } 3626 for (i = 0; i < IR_REG_NUM; i++) { 3627 if (IR_REGSET_IN(used_preserved_regs, i)) { 3628 if (i < IR_REG_FP_FIRST) { 3629 offset -= sizeof(void*); 3630 } else { 3631 offset -= sizeof(void*); 3632 if (ctx->mflags & IR_X86_AVX) { 3633 | vmovsd xmm(i-IR_REG_FP_FIRST), qword [Ra(fp)+offset] 3634 } else { 3635 | movsd xmm(i-IR_REG_FP_FIRST), qword [Ra(fp)+offset] 3636 } 3637 } 3638 } 3639 } 3640 } 3641 3642 if (IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_GP)) { 3643 int i; 3644 ir_regset used_preserved_regs = IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_GP); 3645 int offset; 3646 3647 if (ctx->flags & IR_USE_FRAME_POINTER) { 3648 offset = 0; 3649 } else { 3650 offset = ctx->stack_frame_size + ctx->call_stack_size; 3651 } 3652 if (IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_GP)) { 3653 int i; 3654 ir_regset used_preserved_regs = IR_REGSET_INTERSECTION((ir_regset)ctx->used_preserved_regs, IR_REGSET_GP); 3655 3656 for (i = IR_REG_GP_LAST; i >= IR_REG_GP_FIRST; i--) { 3657 if (IR_REGSET_IN(used_preserved_regs, i)) { 3658 offset -= sizeof(void*); 3659 } 3660 } 3661 } 3662 if (ctx->flags & IR_USE_FRAME_POINTER) { 3663 | lea Ra(IR_REG_RSP), [Ra(IR_REG_RBP)+offset] 3664 } else if (offset) { 3665 | add Ra(IR_REG_RSP), offset 3666 } 3667 for (i = IR_REG_GP_LAST; i >= IR_REG_GP_FIRST; i--) { 3668 if (IR_REGSET_IN(used_preserved_regs, i)) { 3669 | pop Ra(i) 3670 } 3671 } 3672 if (ctx->flags & IR_USE_FRAME_POINTER) { 3673 | pop Ra(IR_REG_RBP) 3674 } 3675 } else if (ctx->flags & IR_USE_FRAME_POINTER) { 3676 | mov Ra(IR_REG_RSP), Ra(IR_REG_RBP) 3677 | pop Ra(IR_REG_RBP) 3678 } else if (ctx->stack_frame_size + ctx->call_stack_size) { 3679 if (ctx->fixed_stack_red_zone) { 3680 IR_ASSERT(ctx->stack_frame_size + ctx->call_stack_size <= ctx->fixed_stack_red_zone); 3681 } else { 3682 | add Ra(IR_REG_RSP), (ctx->stack_frame_size + ctx->call_stack_size) 3683 } 3684 } 3685} 3686 3687static void ir_emit_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3688{ 3689 ir_backend_data *data = ctx->data; 3690 dasm_State **Dst = &data->dasm_state; 3691 ir_type type = insn->type; 3692 ir_ref op1 = insn->op1; 3693 ir_ref op2 = insn->op2; 3694 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3695 ir_reg op1_reg = ctx->regs[def][1]; 3696 ir_reg op2_reg = ctx->regs[def][2]; 3697 3698 IR_ASSERT(def_reg != IR_REG_NONE); 3699 3700 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 3701 op1_reg = IR_REG_NUM(op1_reg); 3702 ir_emit_load(ctx, type, op1_reg, op1); 3703 } 3704 if (def_reg != op1_reg) { 3705 if (op1_reg != IR_REG_NONE) { 3706 ir_emit_mov(ctx, type, def_reg, op1_reg); 3707 } else { 3708 ir_emit_load(ctx, type, def_reg, op1); 3709 } 3710 if (op1 == op2) { 3711 op2_reg = def_reg; 3712 } 3713 } 3714 3715 if (op2_reg != IR_REG_NONE) { 3716 if (IR_REG_SPILLED(op2_reg)) { 3717 op2_reg = IR_REG_NUM(op2_reg); 3718 if (op1 != op2) { 3719 ir_emit_load(ctx, type, op2_reg, op2); 3720 } 3721 } 3722 switch (insn->op) { 3723 default: 3724 IR_ASSERT(0 && "NIY binary op"); 3725 case IR_ADD: 3726 case IR_ADD_OV: 3727 | ASM_REG_REG_OP add, type, def_reg, op2_reg 3728 break; 3729 case IR_SUB: 3730 case IR_SUB_OV: 3731 | ASM_REG_REG_OP sub, type, def_reg, op2_reg 3732 break; 3733 case IR_MUL: 3734 case IR_MUL_OV: 3735 | ASM_REG_REG_MUL imul, type, def_reg, op2_reg 3736 break; 3737 case IR_OR: 3738 | ASM_REG_REG_OP or, type, def_reg, op2_reg 3739 break; 3740 case IR_AND: 3741 | ASM_REG_REG_OP and, type, def_reg, op2_reg 3742 break; 3743 case IR_XOR: 3744 | ASM_REG_REG_OP xor, type, def_reg, op2_reg 3745 break; 3746 } 3747 } else if (IR_IS_CONST_REF(op2)) { 3748 int32_t val = ir_fuse_imm(ctx, op2); 3749 3750 switch (insn->op) { 3751 default: 3752 IR_ASSERT(0 && "NIY binary op"); 3753 case IR_ADD: 3754 case IR_ADD_OV: 3755 | ASM_REG_IMM_OP add, type, def_reg, val 3756 break; 3757 case IR_SUB: 3758 case IR_SUB_OV: 3759 | ASM_REG_IMM_OP sub, type, def_reg, val 3760 break; 3761 case IR_MUL: 3762 case IR_MUL_OV: 3763 | ASM_REG_IMM_MUL imul, type, def_reg, val 3764 break; 3765 case IR_OR: 3766 | ASM_REG_IMM_OP or, type, def_reg, val 3767 break; 3768 case IR_AND: 3769 | ASM_REG_IMM_OP and, type, def_reg, val 3770 break; 3771 case IR_XOR: 3772 | ASM_REG_IMM_OP xor, type, def_reg, val 3773 break; 3774 } 3775 } else { 3776 ir_mem mem; 3777 3778 if (ir_rule(ctx, op2) & IR_FUSED) { 3779 mem = ir_fuse_load(ctx, def, op2); 3780 } else { 3781 mem = ir_ref_spill_slot(ctx, op2); 3782 } 3783 switch (insn->op) { 3784 default: 3785 IR_ASSERT(0 && "NIY binary op"); 3786 case IR_ADD: 3787 case IR_ADD_OV: 3788 | ASM_REG_MEM_OP add, type, def_reg, mem 3789 break; 3790 case IR_SUB: 3791 case IR_SUB_OV: 3792 | ASM_REG_MEM_OP sub, type, def_reg, mem 3793 break; 3794 case IR_MUL: 3795 case IR_MUL_OV: 3796 | ASM_REG_MEM_MUL imul, type, def_reg, mem 3797 break; 3798 case IR_OR: 3799 | ASM_REG_MEM_OP or, type, def_reg, mem 3800 break; 3801 case IR_AND: 3802 | ASM_REG_MEM_OP and, type, def_reg, mem 3803 break; 3804 case IR_XOR: 3805 | ASM_REG_MEM_OP xor, type, def_reg, mem 3806 break; 3807 } 3808 } 3809 if (IR_REG_SPILLED(ctx->regs[def][0])) { 3810 ir_emit_store(ctx, type, def, def_reg); 3811 } 3812} 3813 3814static void ir_emit_imul3(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3815{ 3816 ir_backend_data *data = ctx->data; 3817 dasm_State **Dst = &data->dasm_state; 3818 ir_type type = insn->type; 3819 ir_ref op1 = insn->op1; 3820 ir_ref op2 = insn->op2; 3821 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3822 ir_reg op1_reg = ctx->regs[def][1]; 3823 int32_t val = ir_fuse_imm(ctx, op2); 3824 3825 IR_ASSERT(def_reg != IR_REG_NONE); 3826 IR_ASSERT(!IR_IS_CONST_REF(op1)); 3827 3828 if (op1_reg != IR_REG_NONE) { 3829 if (IR_REG_SPILLED(op1_reg)) { 3830 op1_reg = IR_REG_NUM(op1_reg); 3831 ir_emit_load(ctx, type, op1_reg, op1); 3832 } 3833 switch (ir_type_size[type]) { 3834 default: 3835 IR_ASSERT(0); 3836 case 2: 3837 | imul Rw(def_reg), Rw(op1_reg), val 3838 break; 3839 case 4: 3840 | imul Rd(def_reg), Rd(op1_reg), val 3841 break; 3842|.if X64 3843|| case 8: 3844| imul Rq(def_reg), Rq(op1_reg), val 3845|| break; 3846|.endif 3847 } 3848 } else { 3849 ir_mem mem; 3850 3851 if (ir_rule(ctx, op1) & IR_FUSED) { 3852 mem = ir_fuse_load(ctx, def, op1); 3853 } else { 3854 mem = ir_ref_spill_slot(ctx, op1); 3855 } 3856 | ASM_REG_MEM_TXT_MUL imul, type, def_reg, mem, val 3857 } 3858 if (IR_REG_SPILLED(ctx->regs[def][0])) { 3859 ir_emit_store(ctx, type, def, def_reg); 3860 } 3861} 3862 3863static void ir_emit_min_max_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3864{ 3865 ir_backend_data *data = ctx->data; 3866 dasm_State **Dst = &data->dasm_state; 3867 ir_type type = insn->type; 3868 ir_ref op1 = insn->op1; 3869 ir_ref op2 = insn->op2; 3870 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3871 ir_reg op1_reg = ctx->regs[def][1]; 3872 ir_reg op2_reg = ctx->regs[def][2]; 3873 3874 IR_ASSERT(def_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); 3875 3876 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 3877 op1_reg = IR_REG_NUM(op1_reg); 3878 ir_emit_load(ctx, type, op1_reg, op1); 3879 } 3880 if (def_reg != op1_reg) { 3881 if (op1_reg != IR_REG_NONE) { 3882 ir_emit_mov(ctx, type, def_reg, op1_reg); 3883 } else { 3884 ir_emit_load(ctx, type, def_reg, op1); 3885 } 3886 } 3887 3888 if (IR_REG_SPILLED(op2_reg)) { 3889 op2_reg = IR_REG_NUM(op2_reg); 3890 if (op1 != op2) { 3891 ir_emit_load(ctx, type, op2_reg, op2); 3892 } 3893 } 3894 3895 if (op1 == op2) { 3896 return; 3897 } 3898 3899 | ASM_REG_REG_OP cmp, type, def_reg, op2_reg 3900 if (insn->op == IR_MIN) { 3901 if (IR_IS_TYPE_SIGNED(type)) { 3902 | ASM_REG_REG_OP2 cmovg, type, def_reg, op2_reg 3903 } else { 3904 | ASM_REG_REG_OP2 cmova, type, def_reg, op2_reg 3905 } 3906 } else { 3907 IR_ASSERT(insn->op == IR_MAX); 3908 if (IR_IS_TYPE_SIGNED(type)) { 3909 | ASM_REG_REG_OP2 cmovl, type, def_reg, op2_reg 3910 } else { 3911 | ASM_REG_REG_OP2 cmovb, type, def_reg, op2_reg 3912 } 3913 } 3914 3915 if (IR_REG_SPILLED(ctx->regs[def][0])) { 3916 ir_emit_store(ctx, type, def, def_reg); 3917 } 3918} 3919 3920static void ir_emit_overflow(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3921{ 3922 ir_backend_data *data = ctx->data; 3923 dasm_State **Dst = &data->dasm_state; 3924 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3925 ir_type type = ctx->ir_base[insn->op1].type; 3926 3927 IR_ASSERT(def_reg != IR_REG_NONE); 3928 IR_ASSERT(IR_IS_TYPE_INT(type)); 3929 if (IR_IS_TYPE_SIGNED(type)) { 3930 | seto Rb(def_reg) 3931 } else { 3932 | setc Rb(def_reg) 3933 } 3934 if (IR_REG_SPILLED(ctx->regs[def][0])) { 3935 ir_emit_store(ctx, insn->type, def, def_reg); 3936 } 3937} 3938 3939static void ir_emit_overflow_and_branch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block) 3940{ 3941 ir_backend_data *data = ctx->data; 3942 dasm_State **Dst = &data->dasm_state; 3943 ir_insn *overflow_insn = &ctx->ir_base[insn->op2]; 3944 ir_type type = ctx->ir_base[overflow_insn->op1].type; 3945 uint32_t true_block, false_block; 3946 bool reverse = 0; 3947 3948 ir_get_true_false_blocks(ctx, b, &true_block, &false_block); 3949 if (true_block == next_block) { 3950 reverse = 1; 3951 true_block = false_block; 3952 false_block = 0; 3953 } else if (false_block == next_block) { 3954 false_block = 0; 3955 } 3956 3957 if (IR_IS_TYPE_SIGNED(type)) { 3958 if (reverse) { 3959 | jno =>true_block 3960 } else { 3961 | jo =>true_block 3962 } 3963 } else { 3964 if (reverse) { 3965 | jnc =>true_block 3966 } else { 3967 | jc =>true_block 3968 } 3969 } 3970 if (false_block) { 3971 | jmp =>false_block 3972 } 3973} 3974 3975static void ir_emit_mem_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3976{ 3977 ir_backend_data *data = ctx->data; 3978 dasm_State **Dst = &data->dasm_state; 3979 ir_insn *op_insn = &ctx->ir_base[insn->op3]; 3980 ir_type type = op_insn->type; 3981 ir_ref op2 = op_insn->op2; 3982 ir_reg op2_reg = ctx->regs[insn->op3][2]; 3983 ir_mem mem; 3984 3985 if (insn->op == IR_STORE) { 3986 mem = ir_fuse_mem(ctx, def, def, insn, ctx->regs[def][2]); 3987 } else { 3988 IR_ASSERT(insn->op == IR_VSTORE); 3989 mem = ir_var_spill_slot(ctx, insn->op2); 3990 } 3991 3992 if (op2_reg == IR_REG_NONE) { 3993 int32_t val = ir_fuse_imm(ctx, op2); 3994 3995 switch (op_insn->op) { 3996 default: 3997 IR_ASSERT(0 && "NIY binary op"); 3998 case IR_ADD: 3999 case IR_ADD_OV: 4000 | ASM_MEM_IMM_OP add, type, mem, val 4001 break; 4002 case IR_SUB: 4003 case IR_SUB_OV: 4004 | ASM_MEM_IMM_OP sub, type, mem, val 4005 break; 4006 case IR_OR: 4007 | ASM_MEM_IMM_OP or, type, mem, val 4008 break; 4009 case IR_AND: 4010 | ASM_MEM_IMM_OP and, type, mem, val 4011 break; 4012 case IR_XOR: 4013 | ASM_MEM_IMM_OP xor, type, mem, val 4014 break; 4015 } 4016 } else { 4017 if (IR_REG_SPILLED(op2_reg)) { 4018 op2_reg = IR_REG_NUM(op2_reg); 4019 ir_emit_load(ctx, type, op2_reg, op2); 4020 } 4021 switch (op_insn->op) { 4022 default: 4023 IR_ASSERT(0 && "NIY binary op"); 4024 case IR_ADD: 4025 case IR_ADD_OV: 4026 | ASM_MEM_REG_OP add, type, mem, op2_reg 4027 break; 4028 case IR_SUB: 4029 case IR_SUB_OV: 4030 | ASM_MEM_REG_OP sub, type, mem, op2_reg 4031 break; 4032 case IR_OR: 4033 | ASM_MEM_REG_OP or, type, mem, op2_reg 4034 break; 4035 case IR_AND: 4036 | ASM_MEM_REG_OP and, type, mem, op2_reg 4037 break; 4038 case IR_XOR: 4039 | ASM_MEM_REG_OP xor, type, mem, op2_reg 4040 break; 4041 } 4042 } 4043} 4044 4045static void ir_emit_reg_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4046{ 4047 ir_backend_data *data = ctx->data; 4048 dasm_State **Dst = &data->dasm_state; 4049 ir_insn *op_insn = &ctx->ir_base[insn->op2]; 4050 ir_type type = op_insn->type; 4051 ir_ref op2 = op_insn->op2; 4052 ir_reg op2_reg = ctx->regs[insn->op2][2]; 4053 ir_reg reg; 4054 4055 IR_ASSERT(insn->op == IR_RSTORE); 4056 reg = insn->op3; 4057 4058 if (op2_reg == IR_REG_NONE) { 4059 int32_t val = ir_fuse_imm(ctx, op2); 4060 4061 switch (op_insn->op) { 4062 default: 4063 IR_ASSERT(0 && "NIY binary op"); 4064 case IR_ADD: 4065 | ASM_REG_IMM_OP add, type, reg, val 4066 break; 4067 case IR_SUB: 4068 | ASM_REG_IMM_OP sub, type, reg, val 4069 break; 4070 case IR_OR: 4071 | ASM_REG_IMM_OP or, type, reg, val 4072 break; 4073 case IR_AND: 4074 | ASM_REG_IMM_OP and, type, reg, val 4075 break; 4076 case IR_XOR: 4077 | ASM_REG_IMM_OP xor, type, reg, val 4078 break; 4079 } 4080 } else { 4081 if (IR_REG_SPILLED(op2_reg)) { 4082 op2_reg = IR_REG_NUM(op2_reg); 4083 ir_emit_load(ctx, type, op2_reg, op2); 4084 } 4085 switch (op_insn->op) { 4086 default: 4087 IR_ASSERT(0 && "NIY binary op"); 4088 case IR_ADD: 4089 | ASM_REG_REG_OP add, type, reg, op2_reg 4090 break; 4091 case IR_SUB: 4092 | ASM_REG_REG_OP sub, type, reg, op2_reg 4093 break; 4094 case IR_OR: 4095 | ASM_REG_REG_OP or, type, reg, op2_reg 4096 break; 4097 case IR_AND: 4098 | ASM_REG_REG_OP and, type, reg, op2_reg 4099 break; 4100 case IR_XOR: 4101 | ASM_REG_REG_OP xor, type, reg, op2_reg 4102 break; 4103 } 4104 } 4105} 4106 4107static void ir_emit_mul_div_mod_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4108{ 4109 ir_backend_data *data = ctx->data; 4110 dasm_State **Dst = &data->dasm_state; 4111 ir_type type = insn->type; 4112 ir_ref op1 = insn->op1; 4113 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 4114 ir_reg op1_reg = ctx->regs[def][1]; 4115 4116 IR_ASSERT(IR_IS_CONST_REF(insn->op2)); 4117 IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[insn->op2].op)); 4118 IR_ASSERT(def_reg != IR_REG_NONE); 4119 4120 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 4121 op1_reg = IR_REG_NUM(op1_reg); 4122 ir_emit_load(ctx, type, op1_reg, op1); 4123 } 4124 if (def_reg != op1_reg) { 4125 if (op1_reg != IR_REG_NONE) { 4126 ir_emit_mov(ctx, type, def_reg, op1_reg); 4127 } else { 4128 ir_emit_load(ctx, type, def_reg, op1); 4129 } 4130 } 4131 if (insn->op == IR_MUL) { 4132 uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64); 4133 4134 if (shift == 1) { 4135 | ASM_REG_REG_OP add, type, def_reg, def_reg 4136 } else { 4137 | ASM_REG_IMM_OP shl, type, def_reg, shift 4138 } 4139 } else if (insn->op == IR_DIV) { 4140 uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64); 4141 4142 | ASM_REG_IMM_OP shr, type, def_reg, shift 4143 } else { 4144 IR_ASSERT(insn->op == IR_MOD); 4145 uint64_t mask = ctx->ir_base[insn->op2].val.u64 - 1; 4146 4147|.if X64 4148|| if (ir_type_size[type] == 8 && ctx->regs[def][2] != IR_REG_NONE) { 4149|| ir_reg op2_reg = ctx->regs[def][2]; 4150|| 4151|| op2_reg = IR_REG_NUM(op2_reg); 4152|| ir_emit_load_imm_int(ctx, type, op2_reg, mask); 4153 | ASM_REG_REG_OP and, type, def_reg, op2_reg 4154|| } else { 4155|.endif 4156 | ASM_REG_IMM_OP and, type, def_reg, mask 4157|.if X64 4158|| } 4159|.endif 4160 } 4161 if (IR_REG_SPILLED(ctx->regs[def][0])) { 4162 ir_emit_store(ctx, type, def, def_reg); 4163 } 4164} 4165 4166static void ir_emit_sdiv_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4167{ 4168 ir_backend_data *data = ctx->data; 4169 dasm_State **Dst = &data->dasm_state; 4170 ir_type type = insn->type; 4171 ir_ref op1 = insn->op1; 4172 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 4173 ir_reg op1_reg = ctx->regs[def][1]; 4174 uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64); 4175 int64_t offset = ctx->ir_base[insn->op2].val.u64 - 1; 4176 4177 IR_ASSERT(shift != 0); 4178 IR_ASSERT(IR_IS_CONST_REF(insn->op2)); 4179 IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[insn->op2].op)); 4180 IR_ASSERT(op1_reg != IR_REG_NONE && def_reg != IR_REG_NONE && op1_reg != def_reg); 4181 4182 if (IR_REG_SPILLED(op1_reg)) { 4183 op1_reg = IR_REG_NUM(op1_reg); 4184 ir_emit_load(ctx, type, op1_reg, op1); 4185 } 4186 4187 if (shift == 1) { 4188|.if X64 4189|| if (ir_type_size[type] == 8) { 4190 | mov Rq(def_reg), Rq(op1_reg) 4191 | ASM_REG_IMM_OP shr, type, def_reg, 63 4192 | add Rq(def_reg), Rq(op1_reg) 4193|| } else { 4194|.endif 4195 | mov Rd(def_reg), Rd(op1_reg) 4196 | ASM_REG_IMM_OP shr, type, def_reg, (ir_type_size[type]*8-1) 4197 | add Rd(def_reg), Rd(op1_reg) 4198|.if X64 4199|| } 4200|.endif 4201 } else { 4202|.if X64 4203|| if (ir_type_size[type] == 8) { 4204|| ir_reg op2_reg = ctx->regs[def][2]; 4205|| 4206|| if (op2_reg != IR_REG_NONE) { 4207|| op2_reg = IR_REG_NUM(op2_reg); 4208|| ir_emit_load_imm_int(ctx, type, op2_reg, offset); 4209 | lea Rq(def_reg), [Rq(op1_reg)+Rq(op2_reg)] 4210|| } else { 4211 | lea Rq(def_reg), [Rq(op1_reg)+(int32_t)offset] 4212|| } 4213|| } else { 4214|.endif 4215 | lea Rd(def_reg), [Rd(op1_reg)+(int32_t)offset] 4216|.if X64 4217|| } 4218|.endif 4219 | ASM_REG_REG_OP test, type, op1_reg, op1_reg 4220 | ASM_REG_REG_OP2 cmovns, type, def_reg, op1_reg 4221 } 4222 | ASM_REG_IMM_OP sar, type, def_reg, shift 4223 4224 if (IR_REG_SPILLED(ctx->regs[def][0])) { 4225 ir_emit_store(ctx, type, def, def_reg); 4226 } 4227} 4228 4229static void ir_emit_smod_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4230{ 4231 ir_backend_data *data = ctx->data; 4232 dasm_State **Dst = &data->dasm_state; 4233 ir_type type = insn->type; 4234 ir_ref op1 = insn->op1; 4235 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 4236 ir_reg op1_reg = ctx->regs[def][1]; 4237 ir_reg tmp_reg = ctx->regs[def][3]; 4238 uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64); 4239 uint64_t mask = ctx->ir_base[insn->op2].val.u64 - 1; 4240 4241 IR_ASSERT(shift != 0); 4242 IR_ASSERT(IR_IS_CONST_REF(insn->op2)); 4243 IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[insn->op2].op)); 4244 IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE && def_reg != tmp_reg); 4245 4246 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 4247 op1_reg = IR_REG_NUM(op1_reg); 4248 ir_emit_load(ctx, type, op1_reg, op1); 4249 } 4250 if (def_reg != op1_reg) { 4251 if (op1_reg != IR_REG_NONE) { 4252 ir_emit_mov(ctx, type, def_reg, op1_reg); 4253 } else { 4254 ir_emit_load(ctx, type, def_reg, op1); 4255 } 4256 } 4257 if (tmp_reg != op1_reg) { 4258 ir_emit_mov(ctx, type, tmp_reg, def_reg); 4259 } 4260 4261 4262 if (shift == 1) { 4263 | ASM_REG_IMM_OP shr, type, tmp_reg, (ir_type_size[type]*8-1) 4264 } else { 4265 | ASM_REG_IMM_OP sar, type, tmp_reg, (ir_type_size[type]*8-1) 4266 | ASM_REG_IMM_OP shr, type, tmp_reg, (ir_type_size[type]*8-shift) 4267 } 4268 | ASM_REG_REG_OP add, type, def_reg, tmp_reg 4269 4270|.if X64 4271|| if (ir_type_size[type] == 8 && ctx->regs[def][2] != IR_REG_NONE) { 4272|| ir_reg op2_reg = ctx->regs[def][2]; 4273|| 4274|| op2_reg = IR_REG_NUM(op2_reg); 4275|| ir_emit_load_imm_int(ctx, type, op2_reg, mask); 4276 | ASM_REG_REG_OP and, type, def_reg, op2_reg 4277|| } else { 4278|.endif 4279 | ASM_REG_IMM_OP and, type, def_reg, mask 4280|.if X64 4281|| } 4282|.endif 4283 4284 | ASM_REG_REG_OP sub, type, def_reg, tmp_reg 4285 4286 if (IR_REG_SPILLED(ctx->regs[def][0])) { 4287 ir_emit_store(ctx, type, def, def_reg); 4288 } 4289} 4290 4291static void ir_emit_mem_mul_div_mod_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4292{ 4293 ir_backend_data *data = ctx->data; 4294 dasm_State **Dst = &data->dasm_state; 4295 ir_insn *op_insn = &ctx->ir_base[insn->op3]; 4296 ir_type type = op_insn->type; 4297 ir_mem mem; 4298 4299 IR_ASSERT(IR_IS_CONST_REF(op_insn->op2)); 4300 IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[op_insn->op2].op)); 4301 4302 if (insn->op == IR_STORE) { 4303 mem = ir_fuse_mem(ctx, def, def, insn, ctx->regs[def][2]); 4304 } else { 4305 IR_ASSERT(insn->op == IR_VSTORE); 4306 mem = ir_var_spill_slot(ctx, insn->op2); 4307 } 4308 4309 if (op_insn->op == IR_MUL) { 4310 uint32_t shift = IR_LOG2(ctx->ir_base[op_insn->op2].val.u64); 4311 | ASM_MEM_IMM_OP shl, type, mem, shift 4312 } else if (op_insn->op == IR_DIV) { 4313 uint32_t shift = IR_LOG2(ctx->ir_base[op_insn->op2].val.u64); 4314 | ASM_MEM_IMM_OP shr, type, mem, shift 4315 } else { 4316 IR_ASSERT(op_insn->op == IR_MOD); 4317 uint64_t mask = ctx->ir_base[op_insn->op2].val.u64 - 1; 4318 IR_ASSERT(IR_IS_UNSIGNED_32BIT(mask)); 4319 | ASM_MEM_IMM_OP and, type, mem, mask 4320 } 4321} 4322 4323static void ir_emit_shift(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4324{ 4325 ir_backend_data *data = ctx->data; 4326 dasm_State **Dst = &data->dasm_state; 4327 ir_type type = insn->type; 4328 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 4329 ir_reg op1_reg = ctx->regs[def][1]; 4330 ir_reg op2_reg = ctx->regs[def][2]; 4331 4332 IR_ASSERT(def_reg != IR_REG_NONE && def_reg != IR_REG_RCX); 4333 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 4334 op1_reg = IR_REG_NUM(op1_reg); 4335 ir_emit_load(ctx, type, op1_reg, insn->op1); 4336 } 4337 if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { 4338 op2_reg = IR_REG_NUM(op2_reg); 4339 ir_emit_load(ctx, type, op2_reg, insn->op2); 4340 } 4341 if (op2_reg != IR_REG_RCX) { 4342 if (op1_reg == IR_REG_RCX) { 4343 ir_emit_mov(ctx, type, def_reg, op1_reg); 4344 op1_reg = def_reg; 4345 } 4346 if (op2_reg != IR_REG_NONE) { 4347 ir_emit_mov(ctx, type, IR_REG_RCX, op2_reg); 4348 } else { 4349 ir_emit_load(ctx, type, IR_REG_RCX, insn->op2); 4350 } 4351 } 4352 if (def_reg != op1_reg) { 4353 if (op1_reg != IR_REG_NONE) { 4354 ir_emit_mov(ctx, type, def_reg, op1_reg); 4355 } else { 4356 ir_emit_load(ctx, type, def_reg, insn->op1); 4357 } 4358 } 4359 switch (insn->op) { 4360 default: 4361 IR_ASSERT(0); 4362 case IR_SHL: 4363 | ASM_REG_TXT_OP shl, insn->type, def_reg, cl 4364 break; 4365 case IR_SHR: 4366 | ASM_REG_TXT_OP shr, insn->type, def_reg, cl 4367 break; 4368 case IR_SAR: 4369 | ASM_REG_TXT_OP sar, insn->type, def_reg, cl 4370 break; 4371 case IR_ROL: 4372 | ASM_REG_TXT_OP rol, insn->type, def_reg, cl 4373 break; 4374 case IR_ROR: 4375 | ASM_REG_TXT_OP ror, insn->type, def_reg, cl 4376 break; 4377 } 4378 if (IR_REG_SPILLED(ctx->regs[def][0])) { 4379 ir_emit_store(ctx, type, def, def_reg); 4380 } 4381} 4382 4383static void ir_emit_mem_shift(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4384{ 4385 ir_backend_data *data = ctx->data; 4386 dasm_State **Dst = &data->dasm_state; 4387 ir_insn *op_insn = &ctx->ir_base[insn->op3]; 4388 ir_type type = op_insn->type; 4389 ir_ref op2 = op_insn->op2; 4390 ir_reg op2_reg = ctx->regs[insn->op3][2]; 4391 ir_mem mem; 4392 4393 if (insn->op == IR_STORE) { 4394 mem = ir_fuse_mem(ctx, def, def, insn, ctx->regs[def][2]); 4395 } else { 4396 IR_ASSERT(insn->op == IR_VSTORE); 4397 mem = ir_var_spill_slot(ctx, insn->op2); 4398 } 4399 4400 if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { 4401 op2_reg = IR_REG_NUM(op2_reg); 4402 ir_emit_load(ctx, type, op2_reg, op2); 4403 } 4404 if (op2_reg != IR_REG_RCX) { 4405 if (op2_reg != IR_REG_NONE) { 4406 ir_emit_mov(ctx, type, IR_REG_RCX, op2_reg); 4407 } else { 4408 ir_emit_load(ctx, type, IR_REG_RCX, op2); 4409 } 4410 } 4411 switch (op_insn->op) { 4412 default: 4413 IR_ASSERT(0); 4414 case IR_SHL: 4415 | ASM_MEM_TXT_OP shl, type, mem, cl 4416 break; 4417 case IR_SHR: 4418 | ASM_MEM_TXT_OP shr, type, mem, cl 4419 break; 4420 case IR_SAR: 4421 | ASM_MEM_TXT_OP sar, type, mem, cl 4422 break; 4423 case IR_ROL: 4424 | ASM_MEM_TXT_OP rol, type, mem, cl 4425 break; 4426 case IR_ROR: 4427 | ASM_MEM_TXT_OP ror, type, mem, cl 4428 break; 4429 } 4430} 4431 4432static void ir_emit_shift_const(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4433{ 4434 ir_backend_data *data = ctx->data; 4435 dasm_State **Dst = &data->dasm_state; 4436 int32_t shift; 4437 ir_type type = insn->type; 4438 ir_ref op1 = insn->op1; 4439 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 4440 ir_reg op1_reg = ctx->regs[def][1]; 4441 4442 IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[insn->op2].op)); 4443 IR_ASSERT(IR_IS_SIGNED_32BIT(ctx->ir_base[insn->op2].val.i64)); 4444 shift = ctx->ir_base[insn->op2].val.i32; 4445 IR_ASSERT(def_reg != IR_REG_NONE); 4446 4447 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 4448 op1_reg = IR_REG_NUM(op1_reg); 4449 ir_emit_load(ctx, type, op1_reg, op1); 4450 } 4451 if (def_reg != op1_reg) { 4452 if (op1_reg != IR_REG_NONE) { 4453 ir_emit_mov(ctx, type, def_reg, op1_reg); 4454 } else { 4455 ir_emit_load(ctx, type, def_reg, op1); 4456 } 4457 } 4458 switch (insn->op) { 4459 default: 4460 IR_ASSERT(0); 4461 case IR_SHL: 4462 | ASM_REG_IMM_OP shl, insn->type, def_reg, shift 4463 break; 4464 case IR_SHR: 4465 | ASM_REG_IMM_OP shr, insn->type, def_reg, shift 4466 break; 4467 case IR_SAR: 4468 | ASM_REG_IMM_OP sar, insn->type, def_reg, shift 4469 break; 4470 case IR_ROL: 4471 | ASM_REG_IMM_OP rol, insn->type, def_reg, shift 4472 break; 4473 case IR_ROR: 4474 | ASM_REG_IMM_OP ror, insn->type, def_reg, shift 4475 break; 4476 } 4477 if (IR_REG_SPILLED(ctx->regs[def][0])) { 4478 ir_emit_store(ctx, type, def, def_reg); 4479 } 4480} 4481 4482static void ir_emit_mem_shift_const(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4483{ 4484 ir_backend_data *data = ctx->data; 4485 dasm_State **Dst = &data->dasm_state; 4486 ir_insn *op_insn = &ctx->ir_base[insn->op3]; 4487 ir_type type = op_insn->type; 4488 int32_t shift; 4489 ir_mem mem; 4490 4491 IR_ASSERT(IR_IS_CONST_REF(op_insn->op2)); 4492 IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[op_insn->op2].op)); 4493 IR_ASSERT(IR_IS_SIGNED_32BIT(ctx->ir_base[op_insn->op2].val.i64)); 4494 shift = ctx->ir_base[op_insn->op2].val.i32; 4495 if (insn->op == IR_STORE) { 4496 mem = ir_fuse_mem(ctx, def, def, insn, ctx->regs[def][2]); 4497 } else { 4498 IR_ASSERT(insn->op == IR_VSTORE); 4499 mem = ir_var_spill_slot(ctx, insn->op2); 4500 } 4501 4502 switch (op_insn->op) { 4503 default: 4504 IR_ASSERT(0); 4505 case IR_SHL: 4506 | ASM_MEM_IMM_OP shl, type, mem, shift 4507 break; 4508 case IR_SHR: 4509 | ASM_MEM_IMM_OP shr, type, mem, shift 4510 break; 4511 case IR_SAR: 4512 | ASM_MEM_IMM_OP sar, type, mem, shift 4513 break; 4514 case IR_ROL: 4515 | ASM_MEM_IMM_OP rol, type, mem, shift 4516 break; 4517 case IR_ROR: 4518 | ASM_MEM_IMM_OP ror, type, mem, shift 4519 break; 4520 } 4521} 4522 4523static void ir_emit_op_int(ir_ctx *ctx, ir_ref def, ir_insn *insn, uint32_t rule) 4524{ 4525 ir_backend_data *data = ctx->data; 4526 dasm_State **Dst = &data->dasm_state; 4527 ir_type type = insn->type; 4528 ir_ref op1 = insn->op1; 4529 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 4530 ir_reg op1_reg = ctx->regs[def][1]; 4531 4532 IR_ASSERT(def_reg != IR_REG_NONE); 4533 4534 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 4535 op1_reg = IR_REG_NUM(op1_reg); 4536 ir_emit_load(ctx, type, op1_reg, op1); 4537 } 4538 if (def_reg != op1_reg) { 4539 if (op1_reg != IR_REG_NONE) { 4540 ir_emit_mov(ctx, type, def_reg, op1_reg); 4541 } else { 4542 ir_emit_load(ctx, type, def_reg, op1); 4543 } 4544 } 4545 if (rule == IR_INC) { 4546 | ASM_REG_OP inc, insn->type, def_reg 4547 } else if (rule == IR_DEC) { 4548 | ASM_REG_OP dec, insn->type, def_reg 4549 } else if (insn->op == IR_NOT) { 4550 | ASM_REG_OP not, insn->type, def_reg 4551 } else if (insn->op == IR_NEG) { 4552 | ASM_REG_OP neg, insn->type, def_reg 4553 } else { 4554 IR_ASSERT(insn->op == IR_BSWAP); 4555 switch (ir_type_size[insn->type]) { 4556 default: 4557 IR_ASSERT(0); 4558 case 4: 4559 | bswap Rd(def_reg) 4560 break; 4561 case 8: 4562 IR_ASSERT(sizeof(void*) == 8); 4563|.if X64 4564 | bswap Rq(def_reg) 4565|.endif 4566 break; 4567 } 4568 } 4569 if (IR_REG_SPILLED(ctx->regs[def][0])) { 4570 ir_emit_store(ctx, type, def, def_reg); 4571 } 4572} 4573 4574static void ir_emit_bit_count(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4575{ 4576 ir_backend_data *data = ctx->data; 4577 dasm_State **Dst = &data->dasm_state; 4578 ir_type type = insn->type; 4579 ir_ref op1 = insn->op1; 4580 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 4581 ir_reg op1_reg = ctx->regs[def][1]; 4582 4583 IR_ASSERT(def_reg != IR_REG_NONE); 4584 4585 if (op1_reg != IR_REG_NONE) { 4586 if (IR_REG_SPILLED(op1_reg)) { 4587 op1_reg = IR_REG_NUM(op1_reg); 4588 ir_emit_load(ctx, type, op1_reg, op1); 4589 } 4590 switch (ir_type_size[insn->type]) { 4591 default: 4592 IR_ASSERT(0); 4593 case 2: 4594 if (insn->op == IR_CTLZ) { 4595 if (ctx->mflags & IR_X86_BMI1) { 4596 | lzcnt Rw(def_reg), Rw(op1_reg) 4597 } else { 4598 | bsr Rw(def_reg), Rw(op1_reg) 4599 | xor Rw(def_reg), 0xf 4600 } 4601 } else if (insn->op == IR_CTTZ) { 4602 if (ctx->mflags & IR_X86_BMI1) { 4603 | tzcnt Rw(def_reg), Rw(op1_reg) 4604 } else { 4605 | bsf Rw(def_reg), Rw(op1_reg) 4606 } 4607 } else { 4608 IR_ASSERT(insn->op == IR_CTPOP); 4609 | popcnt Rw(def_reg), Rw(op1_reg) 4610 } 4611 break; 4612 case 1: 4613 | movzx Rd(op1_reg), Rb(op1_reg) 4614 if (insn->op == IR_CTLZ) { 4615 if (ctx->mflags & IR_X86_BMI1) { 4616 | lzcnt Rd(def_reg), Rd(op1_reg) 4617 | sub Rd(def_reg), 24 4618 } else { 4619 | bsr Rd(def_reg), Rd(op1_reg) 4620 | xor Rw(def_reg), 0x7 4621 } 4622 break; 4623 } 4624 IR_FALLTHROUGH; 4625 case 4: 4626 if (insn->op == IR_CTLZ) { 4627 if (ctx->mflags & IR_X86_BMI1) { 4628 | lzcnt Rd(def_reg), Rd(op1_reg) 4629 } else { 4630 | bsr Rd(def_reg), Rd(op1_reg) 4631 | xor Rw(def_reg), 0x1f 4632 } 4633 } else if (insn->op == IR_CTTZ) { 4634 if (ctx->mflags & IR_X86_BMI1) { 4635 | tzcnt Rd(def_reg), Rd(op1_reg) 4636 } else { 4637 | bsf Rd(def_reg), Rd(op1_reg) 4638 } 4639 } else { 4640 IR_ASSERT(insn->op == IR_CTPOP); 4641 | popcnt Rd(def_reg), Rd(op1_reg) 4642 } 4643 break; 4644|.if X64 4645 case 8: 4646 if (insn->op == IR_CTLZ) { 4647 if (ctx->mflags & IR_X86_BMI1) { 4648 | lzcnt Rq(def_reg), Rq(op1_reg) 4649 } else { 4650 | bsr Rq(def_reg), Rq(op1_reg) 4651 | xor Rw(def_reg), 0x3f 4652 } 4653 } else if (insn->op == IR_CTTZ) { 4654 if (ctx->mflags & IR_X86_BMI1) { 4655 | tzcnt Rq(def_reg), Rq(op1_reg) 4656 } else { 4657 | bsf Rq(def_reg), Rq(op1_reg) 4658 } 4659 } else { 4660 IR_ASSERT(insn->op == IR_CTPOP); 4661 | popcnt Rq(def_reg), Rq(op1_reg) 4662 } 4663 break; 4664|.endif 4665 } 4666 } else { 4667 ir_mem mem; 4668 4669 if (ir_rule(ctx, op1) & IR_FUSED) { 4670 mem = ir_fuse_load(ctx, def, op1); 4671 } else { 4672 mem = ir_ref_spill_slot(ctx, op1); 4673 } 4674 switch (ir_type_size[insn->type]) { 4675 default: 4676 IR_ASSERT(0); 4677 case 2: 4678 if (insn->op == IR_CTLZ) { 4679 if (ctx->mflags & IR_X86_BMI1) { 4680 | ASM_TXT_TMEM_OP lzcnt, Rw(def_reg), word, mem 4681 } else { 4682 | ASM_TXT_TMEM_OP bsr, Rw(def_reg), word, mem 4683 | xor Rw(def_reg), 0xf 4684 } 4685 } else if (insn->op == IR_CTTZ) { 4686 if (ctx->mflags & IR_X86_BMI1) { 4687 | ASM_TXT_TMEM_OP tzcnt, Rw(def_reg), word, mem 4688 } else { 4689 | ASM_TXT_TMEM_OP bsf, Rw(def_reg), word, mem 4690 } 4691 } else { 4692 | ASM_TXT_TMEM_OP popcnt, Rw(def_reg), word, mem 4693 } 4694 break; 4695 case 4: 4696 if (insn->op == IR_CTLZ) { 4697 if (ctx->mflags & IR_X86_BMI1) { 4698 | ASM_TXT_TMEM_OP lzcnt, Rd(def_reg), dword, mem 4699 } else { 4700 | ASM_TXT_TMEM_OP bsr, Rd(def_reg), dword, mem 4701 | xor Rw(def_reg), 0x1f 4702 } 4703 } else if (insn->op == IR_CTTZ) { 4704 if (ctx->mflags & IR_X86_BMI1) { 4705 | ASM_TXT_TMEM_OP tzcnt, Rd(def_reg), dword, mem 4706 } else { 4707 | ASM_TXT_TMEM_OP bsf, Rd(def_reg), dword, mem 4708 } 4709 } else { 4710 | ASM_TXT_TMEM_OP popcnt, Rd(def_reg), dword, mem 4711 } 4712 break; 4713|.if X64 4714 case 8: 4715 if (insn->op == IR_CTLZ) { 4716 if (ctx->mflags & IR_X86_BMI1) { 4717 | ASM_TXT_TMEM_OP lzcnt, Rq(def_reg), qword, mem 4718 } else { 4719 | ASM_TXT_TMEM_OP bsr, Rq(def_reg), qword, mem 4720 | xor Rw(def_reg), 0x3f 4721 } 4722 } else if (insn->op == IR_CTTZ) { 4723 if (ctx->mflags & IR_X86_BMI1) { 4724 | ASM_TXT_TMEM_OP tzcnt, Rq(def_reg), qword, mem 4725 } else { 4726 | ASM_TXT_TMEM_OP bsf, Rq(def_reg), qword, mem 4727 } 4728 } else { 4729 | ASM_TXT_TMEM_OP popcnt, Rq(def_reg), qword, mem 4730 } 4731 break; 4732|.endif 4733 } 4734 } 4735 4736 if (IR_REG_SPILLED(ctx->regs[def][0])) { 4737 ir_emit_store(ctx, type, def, def_reg); 4738 } 4739} 4740 4741static void ir_emit_ctpop(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4742{ 4743 ir_backend_data *data = ctx->data; 4744 dasm_State **Dst = &data->dasm_state; 4745 ir_type type = insn->type; 4746 ir_ref op1 = insn->op1; 4747 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 4748 ir_reg op1_reg = ctx->regs[def][1]; 4749 ir_reg tmp_reg = ctx->regs[def][2]; 4750|.if X64 4751|| ir_reg const_reg = ctx->regs[def][3]; 4752|.endif 4753 4754 IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); 4755 if (op1_reg == IR_REG_NONE) { 4756 ir_emit_load(ctx, type, def_reg, op1); 4757 if (ir_type_size[insn->type] == 1) { 4758 | movzx Rd(def_reg), Rb(def_reg) 4759 } else if (ir_type_size[insn->type] == 2) { 4760 | movzx Rd(def_reg), Rw(def_reg) 4761 } 4762 } else { 4763 if (IR_REG_SPILLED(op1_reg)) { 4764 op1_reg = IR_REG_NUM(op1_reg); 4765 ir_emit_load(ctx, type, op1_reg, op1); 4766 } 4767 switch (ir_type_size[insn->type]) { 4768 default: 4769 IR_ASSERT(0); 4770 case 1: 4771 | movzx Rd(def_reg), Rb(op1_reg) 4772 break; 4773 case 2: 4774 | movzx Rd(def_reg), Rw(op1_reg) 4775 break; 4776 case 4: 4777 | mov Rd(def_reg), Rd(op1_reg) 4778 break; 4779|.if X64 4780|| case 8: 4781 | mov Rq(def_reg), Rq(op1_reg) 4782|| break; 4783|.endif 4784 } 4785 } 4786 switch (ir_type_size[insn->type]) { 4787 default: 4788 IR_ASSERT(0); 4789 case 1: 4790 | mov Rd(tmp_reg), Rd(def_reg) 4791 | shr Rd(def_reg), 1 4792 | and Rd(def_reg), 0x55 4793 | sub Rd(tmp_reg), Rd(def_reg) 4794 | mov Rd(def_reg), Rd(tmp_reg) 4795 | and Rd(def_reg), 0x33 4796 | shr Rd(tmp_reg), 2 4797 | and Rd(tmp_reg), 0x33 4798 | add Rd(tmp_reg), Rd(def_reg) 4799 | mov Rd(def_reg), Rd(tmp_reg) 4800 | shr Rd(def_reg), 4 4801 | add Rd(def_reg), Rd(tmp_reg) 4802 | and Rd(def_reg), 0x0f 4803 break; 4804 case 2: 4805 | mov Rd(tmp_reg), Rd(def_reg) 4806 | shr Rd(def_reg), 1 4807 | and Rd(def_reg), 0x5555 4808 | sub Rd(tmp_reg), Rd(def_reg) 4809 | mov Rd(def_reg), Rd(tmp_reg) 4810 | and Rd(def_reg), 0x3333 4811 | shr Rd(tmp_reg), 2 4812 | and Rd(tmp_reg), 0x3333 4813 | add Rd(tmp_reg), Rd(def_reg) 4814 | mov Rd(def_reg), Rd(tmp_reg) 4815 | shr Rd(def_reg), 4 4816 | add Rd(def_reg), Rd(tmp_reg) 4817 | and Rd(def_reg), 0x0f0f 4818 | mov Rd(tmp_reg), Rd(def_reg) 4819 | shr Rd(tmp_reg), 8 4820 | and Rd(def_reg), 0x0f 4821 | add Rd(def_reg), Rd(tmp_reg) 4822 break; 4823 case 4: 4824 | mov Rd(tmp_reg), Rd(def_reg) 4825 | shr Rd(def_reg), 1 4826 | and Rd(def_reg), 0x55555555 4827 | sub Rd(tmp_reg), Rd(def_reg) 4828 | mov Rd(def_reg), Rd(tmp_reg) 4829 | and Rd(def_reg), 0x33333333 4830 | shr Rd(tmp_reg), 2 4831 | and Rd(tmp_reg), 0x33333333 4832 | add Rd(tmp_reg), Rd(def_reg) 4833 | mov Rd(def_reg), Rd(tmp_reg) 4834 | shr Rd(def_reg), 4 4835 | add Rd(def_reg), Rd(tmp_reg) 4836 | and Rd(def_reg), 0x0f0f0f0f 4837 | imul Rd(def_reg), 0x01010101 4838 | shr Rd(def_reg), 24 4839 break; 4840|.if X64 4841|| case 8: 4842|| IR_ASSERT(const_reg != IR_REG_NONE); 4843 | mov Rq(tmp_reg), Rq(def_reg) 4844 | shr Rq(def_reg), 1 4845 | mov64 Rq(const_reg), 0x5555555555555555 4846 | and Rq(def_reg), Rq(const_reg) 4847 | sub Rq(tmp_reg), Rq(def_reg) 4848 | mov Rq(def_reg), Rq(tmp_reg) 4849 | mov64 Rq(const_reg), 0x3333333333333333 4850 | and Rq(def_reg), Rq(const_reg) 4851 | shr Rq(tmp_reg), 2 4852 | and Rq(tmp_reg), Rq(const_reg) 4853 | add Rq(tmp_reg), Rq(def_reg) 4854 | mov Rq(def_reg), Rq(tmp_reg) 4855 | shr Rq(def_reg), 4 4856 | add Rq(def_reg), Rq(tmp_reg) 4857 | mov64 Rq(const_reg), 0x0f0f0f0f0f0f0f0f 4858 | and Rq(def_reg), Rq(const_reg) 4859 | mov64 Rq(const_reg), 0x0101010101010101 4860 | imul Rq(def_reg), Rq(const_reg) 4861 | shr Rq(def_reg), 56 4862|| break; 4863|.endif 4864 } 4865 4866 if (IR_REG_SPILLED(ctx->regs[def][0])) { 4867 ir_emit_store(ctx, type, def, def_reg); 4868 } 4869} 4870 4871static void ir_emit_mem_op_int(ir_ctx *ctx, ir_ref def, ir_insn *insn, uint32_t rule) 4872{ 4873 ir_backend_data *data = ctx->data; 4874 dasm_State **Dst = &data->dasm_state; 4875 ir_insn *op_insn = &ctx->ir_base[insn->op3]; 4876 ir_type type = op_insn->type; 4877 ir_mem mem; 4878 4879 if (insn->op == IR_STORE) { 4880 mem = ir_fuse_mem(ctx, def, def, insn, ctx->regs[def][2]); 4881 } else { 4882 IR_ASSERT(insn->op == IR_VSTORE); 4883 mem = ir_var_spill_slot(ctx, insn->op2); 4884 } 4885 4886 if (rule == IR_MEM_INC) { 4887 | ASM_MEM_OP inc, type, mem 4888 } else if (rule == IR_MEM_DEC) { 4889 | ASM_MEM_OP dec, type, mem 4890 } else if (op_insn->op == IR_NOT) { 4891 | ASM_MEM_OP not, type, mem 4892 } else { 4893 IR_ASSERT(op_insn->op == IR_NEG); 4894 | ASM_MEM_OP neg, type, mem 4895 } 4896} 4897 4898static void ir_emit_abs_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4899{ 4900 ir_backend_data *data = ctx->data; 4901 dasm_State **Dst = &data->dasm_state; 4902 ir_type type = insn->type; 4903 ir_ref op1 = insn->op1; 4904 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 4905 ir_reg op1_reg = ctx->regs[def][1]; 4906 4907 IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); 4908 4909 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 4910 op1_reg = IR_REG_NUM(op1_reg); 4911 ir_emit_load(ctx, type, op1_reg, op1); 4912 } 4913 4914 IR_ASSERT(def_reg != op1_reg); 4915 4916 ir_emit_mov(ctx, insn->type, def_reg, op1_reg); 4917 | ASM_REG_OP neg, insn->type, def_reg 4918 | ASM_REG_REG_OP2, cmovs, type, def_reg, op1_reg 4919 if (IR_REG_SPILLED(ctx->regs[def][0])) { 4920 ir_emit_store(ctx, type, def, def_reg); 4921 } 4922} 4923 4924static void ir_emit_bool_not_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4925{ 4926 ir_backend_data *data = ctx->data; 4927 dasm_State **Dst = &data->dasm_state; 4928 ir_type type = ctx->ir_base[insn->op1].type; 4929 ir_ref op1 = insn->op1; 4930 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 4931 ir_reg op1_reg = ctx->regs[def][1]; 4932 4933 IR_ASSERT(def_reg != IR_REG_NONE); 4934 4935 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 4936 op1_reg = IR_REG_NUM(op1_reg); 4937 ir_emit_load(ctx, type, op1_reg, op1); 4938 } 4939 4940 if (op1_reg != IR_REG_NONE) { 4941 | ASM_REG_REG_OP test, type, op1_reg, op1_reg 4942 } else { 4943 ir_mem mem = ir_ref_spill_slot(ctx, op1); 4944 4945 | ASM_MEM_IMM_OP cmp, type, mem, 0 4946 } 4947 | sete Rb(def_reg) 4948 4949 if (IR_REG_SPILLED(ctx->regs[def][0])) { 4950 ir_emit_store(ctx, type, def, def_reg); 4951 } 4952} 4953 4954static void ir_emit_mul_div_mod(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4955{ 4956 ir_backend_data *data = ctx->data; 4957 dasm_State **Dst = &data->dasm_state; 4958 ir_type type = insn->type; 4959 ir_ref op1 = insn->op1; 4960 ir_ref op2 = insn->op2; 4961 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 4962 ir_reg op1_reg = ctx->regs[def][1]; 4963 ir_reg op2_reg = ctx->regs[def][2]; 4964 ir_mem mem; 4965 4966 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 4967 op1_reg = IR_REG_NUM(op1_reg); 4968 ir_emit_load(ctx, type, op1_reg, op1); 4969 } 4970 if (op1_reg != IR_REG_RAX) { 4971 if (op1_reg != IR_REG_NONE) { 4972 ir_emit_mov(ctx, type, IR_REG_RAX, op1_reg); 4973 } else { 4974 ir_emit_load(ctx, type, IR_REG_RAX, op1); 4975 } 4976 } 4977 if (op2_reg == IR_REG_NONE && op1 == op2) { 4978 op2_reg = IR_REG_RAX; 4979 } else if (op2_reg != IR_REG_NONE) { 4980 if (IR_REG_SPILLED(op2_reg)) { 4981 op2_reg = IR_REG_NUM(op2_reg); 4982 ir_emit_load(ctx, type, op2_reg, op2); 4983 } 4984 } else if (IR_IS_CONST_REF(op2) 4985 && (insn->op == IR_MUL || insn->op == IR_MUL_OV)) { 4986 op2_reg = IR_REG_RDX; 4987 ir_emit_load(ctx, type, op2_reg, op2); 4988 } 4989 if (insn->op == IR_MUL || insn->op == IR_MUL_OV) { 4990 if (IR_IS_TYPE_SIGNED(insn->type)) { 4991 if (op2_reg != IR_REG_NONE) { 4992 | ASM_REG_OP imul, type, op2_reg 4993 } else { 4994 if (ir_rule(ctx, op2) & IR_FUSED) { 4995 mem = ir_fuse_load(ctx, def, op2); 4996 } else { 4997 mem = ir_ref_spill_slot(ctx, op2); 4998 } 4999 | ASM_MEM_OP imul, type, mem 5000 } 5001 } else { 5002 if (op2_reg != IR_REG_NONE) { 5003 | ASM_REG_OP mul, type, op2_reg 5004 } else { 5005 if (ir_rule(ctx, op2) & IR_FUSED) { 5006 mem = ir_fuse_load(ctx, def, op2); 5007 } else { 5008 mem = ir_ref_spill_slot(ctx, op2); 5009 } 5010 | ASM_MEM_OP mul, type, mem 5011 } 5012 } 5013 } else { 5014 if (IR_IS_TYPE_SIGNED(type)) { 5015 if (ir_type_size[type] == 8) { 5016 | cqo 5017 } else if (ir_type_size[type] == 4) { 5018 | cdq 5019 } else if (ir_type_size[type] == 2) { 5020 | cwd 5021 } else { 5022 | movsx ax, al 5023 } 5024 if (op2_reg != IR_REG_NONE) { 5025 | ASM_REG_OP idiv, type, op2_reg 5026 } else { 5027 if (ir_rule(ctx, op2) & IR_FUSED) { 5028 mem = ir_fuse_load(ctx, def, op2); 5029 } else { 5030 mem = ir_ref_spill_slot(ctx, op2); 5031 } 5032 | ASM_MEM_OP idiv, type, mem 5033 } 5034 } else { 5035 if (ir_type_size[type] == 1) { 5036 | movzx ax, al 5037 } else { 5038 | ASM_REG_REG_OP xor, type, IR_REG_RDX, IR_REG_RDX 5039 } 5040 if (op2_reg != IR_REG_NONE) { 5041 | ASM_REG_OP div, type, op2_reg 5042 } else { 5043 if (ir_rule(ctx, op2) & IR_FUSED) { 5044 mem = ir_fuse_load(ctx, def, op2); 5045 } else { 5046 mem = ir_ref_spill_slot(ctx, op2); 5047 } 5048 | ASM_MEM_OP div, type, mem 5049 } 5050 } 5051 } 5052 5053 if (insn->op == IR_MUL || insn->op == IR_MUL_OV || insn->op == IR_DIV) { 5054 if (def_reg != IR_REG_NONE) { 5055 if (def_reg != IR_REG_RAX) { 5056 ir_emit_mov(ctx, type, def_reg, IR_REG_RAX); 5057 } 5058 if (IR_REG_SPILLED(ctx->regs[def][0])) { 5059 ir_emit_store(ctx, type, def, def_reg); 5060 } 5061 } else { 5062 ir_emit_store(ctx, type, def, IR_REG_RAX); 5063 } 5064 } else { 5065 IR_ASSERT(insn->op == IR_MOD); 5066 if (ir_type_size[type] == 1) { 5067 if (def_reg != IR_REG_NONE) { 5068 | mov al, ah 5069 if (def_reg != IR_REG_RAX) { 5070 | mov Rb(def_reg), al 5071 } 5072 if (IR_REG_SPILLED(ctx->regs[def][0])) { 5073 ir_emit_store(ctx, type, def, def_reg); 5074 } 5075 } else { 5076 ir_reg fp; 5077 int32_t offset = ir_ref_spill_slot_offset(ctx, def, &fp); 5078 5079//????? 5080 | mov byte [Ra(fp)+offset], ah 5081 } 5082 } else { 5083 if (def_reg != IR_REG_NONE) { 5084 if (def_reg != IR_REG_RDX) { 5085 ir_emit_mov(ctx, type, def_reg, IR_REG_RDX); 5086 } 5087 if (IR_REG_SPILLED(ctx->regs[def][0])) { 5088 ir_emit_store(ctx, type, def, def_reg); 5089 } 5090 } else { 5091 ir_emit_store(ctx, type, def, IR_REG_RDX); 5092 } 5093 } 5094 } 5095} 5096 5097static void ir_rodata(ir_ctx *ctx) 5098{ 5099 ir_backend_data *data = ctx->data; 5100 dasm_State **Dst = &data->dasm_state; 5101 5102 |.rodata 5103 if (!data->rodata_label) { 5104 int label = data->rodata_label = ctx->cfg_blocks_count + ctx->consts_count + 2; 5105 |=>label: 5106 } 5107} 5108 5109static void ir_emit_op_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) 5110{ 5111 ir_backend_data *data = ctx->data; 5112 dasm_State **Dst = &data->dasm_state; 5113 ir_type type = insn->type; 5114 ir_ref op1 = insn->op1; 5115 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 5116 ir_reg op1_reg = ctx->regs[def][1]; 5117 5118 IR_ASSERT(def_reg != IR_REG_NONE); 5119 5120 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 5121 op1_reg = IR_REG_NUM(op1_reg); 5122 ir_emit_load(ctx, type, op1_reg, op1); 5123 } 5124 if (def_reg != op1_reg) { 5125 if (op1_reg != IR_REG_NONE) { 5126 ir_emit_fp_mov(ctx, type, def_reg, op1_reg); 5127 } else { 5128 ir_emit_load(ctx, type, def_reg, op1); 5129 } 5130 } 5131 if (insn->op == IR_NEG) { 5132 if (insn->type == IR_DOUBLE) { 5133 if (!data->double_neg_const) { 5134 data->double_neg_const = 1; 5135 ir_rodata(ctx); 5136 |.align 16 5137 |->double_neg_const: 5138 |.dword 0, 0x80000000, 0, 0 5139 |.code 5140 } 5141 if (ctx->mflags & IR_X86_AVX) { 5142 | vxorpd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), [->double_neg_const] 5143 } else { 5144 | xorpd xmm(def_reg-IR_REG_FP_FIRST), [->double_neg_const] 5145 } 5146 } else { 5147 IR_ASSERT(insn->type == IR_FLOAT); 5148 if (!data->float_neg_const) { 5149 data->float_neg_const = 1; 5150 ir_rodata(ctx); 5151 |.align 16 5152 |->float_neg_const: 5153 |.dword 0x80000000, 0, 0, 0 5154 |.code 5155 } 5156 if (ctx->mflags & IR_X86_AVX) { 5157 | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), [->float_neg_const] 5158 } else { 5159 | xorps xmm(def_reg-IR_REG_FP_FIRST), [->float_neg_const] 5160 } 5161 } 5162 } else { 5163 IR_ASSERT(insn->op == IR_ABS); 5164 if (insn->type == IR_DOUBLE) { 5165 if (!data->double_abs_const) { 5166 data->double_abs_const = 1; 5167 ir_rodata(ctx); 5168 |.align 16 5169 |->double_abs_const: 5170 |.dword 0xffffffff, 0x7fffffff, 0, 0 5171 |.code 5172 } 5173 if (ctx->mflags & IR_X86_AVX) { 5174 | vandpd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), [->double_abs_const] 5175 } else { 5176 | andpd xmm(def_reg-IR_REG_FP_FIRST), [->double_abs_const] 5177 } 5178 } else { 5179 IR_ASSERT(insn->type == IR_FLOAT); 5180 if (!data->float_abs_const) { 5181 data->float_abs_const = 1; 5182 ir_rodata(ctx); 5183 |.align 16 5184 |->float_abs_const: 5185 |.dword 0x7fffffff, 0, 0, 0 5186 |.code 5187 } 5188 if (ctx->mflags & IR_X86_AVX) { 5189 | vandps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), [->float_abs_const] 5190 } else { 5191 | andps xmm(def_reg-IR_REG_FP_FIRST), [->float_abs_const] 5192 } 5193 } 5194 } 5195 if (IR_REG_SPILLED(ctx->regs[def][0])) { 5196 ir_emit_store(ctx, insn->type, def, def_reg); 5197 } 5198} 5199 5200static void ir_emit_binop_sse2(ir_ctx *ctx, ir_ref def, ir_insn *insn) 5201{ 5202 ir_backend_data *data = ctx->data; 5203 dasm_State **Dst = &data->dasm_state; 5204 ir_type type = insn->type; 5205 ir_ref op1 = insn->op1; 5206 ir_ref op2 = insn->op2; 5207 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 5208 ir_reg op1_reg = ctx->regs[def][1]; 5209 ir_reg op2_reg = ctx->regs[def][2]; 5210 5211 IR_ASSERT(def_reg != IR_REG_NONE); 5212 5213 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 5214 op1_reg = IR_REG_NUM(op1_reg); 5215 ir_emit_load(ctx, type, op1_reg, op1); 5216 } 5217 if (def_reg != op1_reg) { 5218 if (op1_reg != IR_REG_NONE) { 5219 ir_emit_fp_mov(ctx, type, def_reg, op1_reg); 5220 } else { 5221 ir_emit_load(ctx, type, def_reg, op1); 5222 } 5223 if (op1 == op2) { 5224 op2_reg = def_reg; 5225 } 5226 } 5227 if (op2_reg != IR_REG_NONE) { 5228 if (IR_REG_SPILLED(op2_reg)) { 5229 op2_reg = IR_REG_NUM(op2_reg); 5230 if (op1 != op2) { 5231 ir_emit_load(ctx, type, op2_reg, op2); 5232 } 5233 } 5234 switch (insn->op) { 5235 default: 5236 IR_ASSERT(0 && "NIY binary op"); 5237 case IR_ADD: 5238 | ASM_SSE2_REG_REG_OP adds, type, def_reg, op2_reg 5239 break; 5240 case IR_SUB: 5241 | ASM_SSE2_REG_REG_OP subs, type, def_reg, op2_reg 5242 break; 5243 case IR_MUL: 5244 | ASM_SSE2_REG_REG_OP muls, type, def_reg, op2_reg 5245 break; 5246 case IR_DIV: 5247 | ASM_SSE2_REG_REG_OP divs, type, def_reg, op2_reg 5248 break; 5249 case IR_MIN: 5250 | ASM_SSE2_REG_REG_OP mins, type, def_reg, op2_reg 5251 break; 5252 case IR_MAX: 5253 | ASM_SSE2_REG_REG_OP maxs, type, def_reg, op2_reg 5254 break; 5255 } 5256 } else if (IR_IS_CONST_REF(op2)) { 5257 int label = ir_const_label(ctx, op2); 5258 5259 switch (insn->op) { 5260 default: 5261 IR_ASSERT(0 && "NIY binary op"); 5262 case IR_ADD: 5263 | ASM_SSE2_REG_TXT_OP adds, type, def_reg, [=>label] 5264 break; 5265 case IR_SUB: 5266 | ASM_SSE2_REG_TXT_OP subs, type, def_reg, [=>label] 5267 break; 5268 case IR_MUL: 5269 | ASM_SSE2_REG_TXT_OP muls, type, def_reg, [=>label] 5270 break; 5271 case IR_DIV: 5272 | ASM_SSE2_REG_TXT_OP divs, type, def_reg, [=>label] 5273 break; 5274 case IR_MIN: 5275 | ASM_SSE2_REG_TXT_OP mins, type, def_reg, [=>label] 5276 break; 5277 case IR_MAX: 5278 | ASM_SSE2_REG_TXT_OP maxs, type, def_reg, [=>label] 5279 break; 5280 } 5281 } else { 5282 ir_mem mem; 5283 5284 if (ir_rule(ctx, op2) & IR_FUSED) { 5285 mem = ir_fuse_load(ctx, def, op2); 5286 } else { 5287 mem = ir_ref_spill_slot(ctx, op2); 5288 } 5289 switch (insn->op) { 5290 default: 5291 IR_ASSERT(0 && "NIY binary op"); 5292 case IR_ADD: 5293 | ASM_SSE2_REG_MEM_OP adds, type, def_reg, mem 5294 break; 5295 case IR_SUB: 5296 | ASM_SSE2_REG_MEM_OP subs, type, def_reg, mem 5297 break; 5298 case IR_MUL: 5299 | ASM_SSE2_REG_MEM_OP muls, type, def_reg, mem 5300 break; 5301 case IR_DIV: 5302 | ASM_SSE2_REG_MEM_OP divs, type, def_reg, mem 5303 break; 5304 case IR_MIN: 5305 | ASM_SSE2_REG_MEM_OP mins, type, def_reg, mem 5306 break; 5307 case IR_MAX: 5308 | ASM_SSE2_REG_MEM_OP maxs, type, def_reg, mem 5309 break; 5310 } 5311 } 5312 if (IR_REG_SPILLED(ctx->regs[def][0])) { 5313 ir_emit_store(ctx, insn->type, def, def_reg); 5314 } 5315} 5316 5317static void ir_emit_binop_avx(ir_ctx *ctx, ir_ref def, ir_insn *insn) 5318{ 5319 ir_backend_data *data = ctx->data; 5320 dasm_State **Dst = &data->dasm_state; 5321 ir_type type = insn->type; 5322 ir_ref op1 = insn->op1; 5323 ir_ref op2 = insn->op2; 5324 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 5325 ir_reg op1_reg = ctx->regs[def][1]; 5326 ir_reg op2_reg = ctx->regs[def][2]; 5327 5328 IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); 5329 5330 if (IR_REG_SPILLED(op1_reg)) { 5331 op1_reg = IR_REG_NUM(op1_reg); 5332 ir_emit_load(ctx, type, op1_reg, op1); 5333 } 5334 if (op2_reg != IR_REG_NONE) { 5335 if (IR_REG_SPILLED(op2_reg)) { 5336 op2_reg = IR_REG_NUM(op2_reg); 5337 if (op1 != op2) { 5338 ir_emit_load(ctx, type, op2_reg, op2); 5339 } 5340 } 5341 switch (insn->op) { 5342 default: 5343 IR_ASSERT(0 && "NIY binary op"); 5344 case IR_ADD: 5345 | ASM_AVX_REG_REG_REG_OP vadds, type, def_reg, op1_reg, op2_reg 5346 break; 5347 case IR_SUB: 5348 | ASM_AVX_REG_REG_REG_OP vsubs, type, def_reg, op1_reg, op2_reg 5349 break; 5350 case IR_MUL: 5351 | ASM_AVX_REG_REG_REG_OP vmuls, type, def_reg, op1_reg, op2_reg 5352 break; 5353 case IR_DIV: 5354 | ASM_AVX_REG_REG_REG_OP vdivs, type, def_reg, op1_reg, op2_reg 5355 break; 5356 case IR_MIN: 5357 | ASM_AVX_REG_REG_REG_OP vmins, type, def_reg, op1_reg, op2_reg 5358 break; 5359 case IR_MAX: 5360 | ASM_AVX_REG_REG_REG_OP vmaxs, type, def_reg, op1_reg, op2_reg 5361 break; 5362 } 5363 } else if (IR_IS_CONST_REF(op2)) { 5364 int label = ir_const_label(ctx, op2); 5365 5366 switch (insn->op) { 5367 default: 5368 IR_ASSERT(0 && "NIY binary op"); 5369 case IR_ADD: 5370 | ASM_AVX_REG_REG_TXT_OP vadds, type, def_reg, op1_reg, [=>label] 5371 break; 5372 case IR_SUB: 5373 | ASM_AVX_REG_REG_TXT_OP vsubs, type, def_reg, op1_reg, [=>label] 5374 break; 5375 case IR_MUL: 5376 | ASM_AVX_REG_REG_TXT_OP vmuls, type, def_reg, op1_reg, [=>label] 5377 break; 5378 case IR_DIV: 5379 | ASM_AVX_REG_REG_TXT_OP vdivs, type, def_reg, op1_reg, [=>label] 5380 break; 5381 case IR_MIN: 5382 | ASM_AVX_REG_REG_TXT_OP vmins, type, def_reg, op1_reg, [=>label] 5383 break; 5384 case IR_MAX: 5385 | ASM_AVX_REG_REG_TXT_OP vmaxs, type, def_reg, op1_reg, [=>label] 5386 break; 5387 } 5388 } else { 5389 ir_mem mem; 5390 5391 if (ir_rule(ctx, op2) & IR_FUSED) { 5392 mem = ir_fuse_load(ctx, def, op2); 5393 } else { 5394 mem = ir_ref_spill_slot(ctx, op2); 5395 } 5396 switch (insn->op) { 5397 default: 5398 IR_ASSERT(0 && "NIY binary op"); 5399 case IR_ADD: 5400 | ASM_AVX_REG_REG_MEM_OP vadds, type, def_reg, op1_reg, mem 5401 break; 5402 case IR_SUB: 5403 | ASM_AVX_REG_REG_MEM_OP vsubs, type, def_reg, op1_reg, mem 5404 break; 5405 case IR_MUL: 5406 | ASM_AVX_REG_REG_MEM_OP vmuls, type, def_reg, op1_reg, mem 5407 break; 5408 case IR_DIV: 5409 | ASM_AVX_REG_REG_MEM_OP vdivs, type, def_reg, op1_reg, mem 5410 break; 5411 case IR_MIN: 5412 | ASM_AVX_REG_REG_MEM_OP vmins, type, def_reg, op1_reg, mem 5413 break; 5414 case IR_MAX: 5415 | ASM_AVX_REG_REG_MEM_OP vmaxs, type, def_reg, op1_reg, mem 5416 break; 5417 } 5418 } 5419 if (IR_REG_SPILLED(ctx->regs[def][0])) { 5420 ir_emit_store(ctx, insn->type, def, def_reg); 5421 } 5422} 5423 5424static void ir_emit_cmp_int_common(ir_ctx *ctx, ir_type type, ir_ref root, ir_insn *insn, ir_reg op1_reg, ir_ref op1, ir_reg op2_reg, ir_ref op2) 5425{ 5426 ir_backend_data *data = ctx->data; 5427 dasm_State **Dst = &data->dasm_state; 5428 5429 if (op1_reg != IR_REG_NONE) { 5430 if (op2_reg != IR_REG_NONE) { 5431 | ASM_REG_REG_OP cmp, type, op1_reg, op2_reg 5432 } else if (IR_IS_CONST_REF(op2) && !IR_IS_SYM_CONST(ctx->ir_base[op2].op) && ctx->ir_base[op2].val.u64 == 0) { 5433 | ASM_REG_REG_OP test, type, op1_reg, op1_reg 5434 } else if (IR_IS_CONST_REF(op2)) { 5435 int32_t val = ir_fuse_imm(ctx, op2); 5436 | ASM_REG_IMM_OP cmp, type, op1_reg, val 5437 } else { 5438 ir_mem mem; 5439 5440 if (ir_rule(ctx, op2) & IR_FUSED) { 5441 mem = ir_fuse_load(ctx, root, op2); 5442 } else { 5443 mem = ir_ref_spill_slot(ctx, op2); 5444 } 5445 | ASM_REG_MEM_OP cmp, type, op1_reg, mem 5446 } 5447 } else if (IR_IS_CONST_REF(op1)) { 5448 IR_ASSERT(0); 5449 } else { 5450 ir_mem mem; 5451 5452 if (ir_rule(ctx, op1) & IR_FUSED) { 5453 mem = ir_fuse_load(ctx, root, op1); 5454 } else { 5455 mem = ir_ref_spill_slot(ctx, op1); 5456 } 5457 if (op2_reg != IR_REG_NONE) { 5458 | ASM_MEM_REG_OP cmp, type, mem, op2_reg 5459 } else { 5460 int32_t val = ir_fuse_imm(ctx, op2); 5461 | ASM_MEM_IMM_OP cmp, type, mem, val 5462 } 5463 } 5464} 5465 5466static void ir_emit_cmp_int_common2(ir_ctx *ctx, ir_ref root, ir_ref ref, ir_insn *cmp_insn) 5467{ 5468 ir_type type = ctx->ir_base[cmp_insn->op1].type; 5469 ir_ref op1 = cmp_insn->op1; 5470 ir_ref op2 = cmp_insn->op2; 5471 ir_reg op1_reg = ctx->regs[ref][1]; 5472 ir_reg op2_reg = ctx->regs[ref][2]; 5473 5474 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 5475 op1_reg = IR_REG_NUM(op1_reg); 5476 ir_emit_load(ctx, type, op1_reg, op1); 5477 } 5478 if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { 5479 op2_reg = IR_REG_NUM(op2_reg); 5480 if (op1 != op2) { 5481 ir_emit_load(ctx, type, op2_reg, op2); 5482 } 5483 } 5484 5485 ir_emit_cmp_int_common(ctx, type, root, cmp_insn, op1_reg, op1, op2_reg, op2); 5486} 5487 5488static void _ir_emit_setcc_int(ir_ctx *ctx, uint8_t op, ir_reg def_reg) 5489{ 5490 ir_backend_data *data = ctx->data; 5491 dasm_State **Dst = &data->dasm_state; 5492 5493 switch (op) { 5494 default: 5495 IR_ASSERT(0 && "NIY binary op"); 5496 case IR_EQ: 5497 | sete Rb(def_reg) 5498 break; 5499 case IR_NE: 5500 | setne Rb(def_reg) 5501 break; 5502 case IR_LT: 5503 | setl Rb(def_reg) 5504 break; 5505 case IR_GE: 5506 | setge Rb(def_reg) 5507 break; 5508 case IR_LE: 5509 | setle Rb(def_reg) 5510 break; 5511 case IR_GT: 5512 | setg Rb(def_reg) 5513 break; 5514 case IR_ULT: 5515 | setb Rb(def_reg) 5516 break; 5517 case IR_UGE: 5518 | setae Rb(def_reg) 5519 break; 5520 case IR_ULE: 5521 | setbe Rb(def_reg) 5522 break; 5523 case IR_UGT: 5524 | seta Rb(def_reg) 5525 break; 5526 } 5527} 5528 5529static void _ir_emit_setcc_int_mem(ir_ctx *ctx, uint8_t op, ir_mem mem) 5530{ 5531 ir_backend_data *data = ctx->data; 5532 dasm_State **Dst = &data->dasm_state; 5533 5534 5535 switch (op) { 5536 default: 5537 IR_ASSERT(0 && "NIY binary op"); 5538 case IR_EQ: 5539 | ASM_TMEM_OP sete, byte, mem 5540 break; 5541 case IR_NE: 5542 | ASM_TMEM_OP setne, byte, mem 5543 break; 5544 case IR_LT: 5545 | ASM_TMEM_OP setl, byte, mem 5546 break; 5547 case IR_GE: 5548 | ASM_TMEM_OP setge, byte, mem 5549 break; 5550 case IR_LE: 5551 | ASM_TMEM_OP setle, byte, mem 5552 break; 5553 case IR_GT: 5554 | ASM_TMEM_OP setg, byte, mem 5555 break; 5556 case IR_ULT: 5557 | ASM_TMEM_OP setb, byte, mem 5558 break; 5559 case IR_UGE: 5560 | ASM_TMEM_OP setae, byte, mem 5561 break; 5562 case IR_ULE: 5563 | ASM_TMEM_OP setbe, byte, mem 5564 break; 5565 case IR_UGT: 5566 | ASM_TMEM_OP seta, byte, mem 5567 break; 5568 } 5569} 5570 5571static void ir_emit_cmp_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 5572{ 5573 ir_backend_data *data = ctx->data; 5574 dasm_State **Dst = &data->dasm_state; 5575 ir_type type = ctx->ir_base[insn->op1].type; 5576 ir_op op = insn->op; 5577 ir_ref op1 = insn->op1; 5578 ir_ref op2 = insn->op2; 5579 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 5580 ir_reg op1_reg = ctx->regs[def][1]; 5581 ir_reg op2_reg = ctx->regs[def][2]; 5582 5583 IR_ASSERT(def_reg != IR_REG_NONE); 5584 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 5585 op1_reg = IR_REG_NUM(op1_reg); 5586 ir_emit_load(ctx, type, op1_reg, op1); 5587 } 5588 if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { 5589 op2_reg = IR_REG_NUM(op2_reg); 5590 if (op1 != op2) { 5591 ir_emit_load(ctx, type, op2_reg, op2); 5592 } 5593 } 5594 if (IR_IS_CONST_REF(op2) && !IR_IS_SYM_CONST(ctx->ir_base[op2].op) && ctx->ir_base[op2].val.u64 == 0) { 5595 if (op == IR_ULT) { 5596 /* always false */ 5597 | xor Ra(def_reg), Ra(def_reg) 5598 if (IR_REG_SPILLED(ctx->regs[def][0])) { 5599 ir_emit_store(ctx, insn->type, def, def_reg); 5600 } 5601 return; 5602 } else if (op == IR_UGE) { 5603 /* always true */ 5604 | ASM_REG_IMM_OP mov, insn->type, def_reg, 1 5605 if (IR_REG_SPILLED(ctx->regs[def][0])) { 5606 ir_emit_store(ctx, insn->type, def, def_reg); 5607 } 5608 return; 5609 } else if (op == IR_ULE) { 5610 op = IR_EQ; 5611 } else if (op == IR_UGT) { 5612 op = IR_NE; 5613 } 5614 } 5615 ir_emit_cmp_int_common(ctx, type, def, insn, op1_reg, op1, op2_reg, op2); 5616 _ir_emit_setcc_int(ctx, op, def_reg); 5617 if (IR_REG_SPILLED(ctx->regs[def][0])) { 5618 ir_emit_store(ctx, insn->type, def, def_reg); 5619 } 5620} 5621 5622static void ir_emit_test_int_common(ir_ctx *ctx, ir_ref root, ir_ref ref, ir_op op) 5623{ 5624 ir_backend_data *data = ctx->data; 5625 dasm_State **Dst = &data->dasm_state; 5626 ir_insn *binop_insn = &ctx->ir_base[ref]; 5627 ir_type type = binop_insn->type; 5628 ir_ref op1 = binop_insn->op1; 5629 ir_ref op2 = binop_insn->op2; 5630 ir_reg op1_reg = ctx->regs[ref][1]; 5631 ir_reg op2_reg = ctx->regs[ref][2]; 5632 5633 IR_ASSERT(binop_insn->op == IR_AND); 5634 if (op1_reg != IR_REG_NONE) { 5635 if (IR_REG_SPILLED(op1_reg)) { 5636 op1_reg = IR_REG_NUM(op1_reg); 5637 ir_emit_load(ctx, type, op1_reg, op1); 5638 } 5639 if (op2_reg != IR_REG_NONE) { 5640 if (IR_REG_SPILLED(op2_reg)) { 5641 op2_reg = IR_REG_NUM(op2_reg); 5642 if (op1 != op2) { 5643 ir_emit_load(ctx, type, op2_reg, op2); 5644 } 5645 } 5646 | ASM_REG_REG_OP test, type, op1_reg, op2_reg 5647 } else if (IR_IS_CONST_REF(op2)) { 5648 int32_t val = ir_fuse_imm(ctx, op2); 5649 5650 if ((op == IR_EQ || op == IR_NE) && val == 0xff && (sizeof(void*) == 8 || op1_reg <= IR_REG_R3)) { 5651 | test Rb(op1_reg), Rb(op1_reg) 5652 } else if ((op == IR_EQ || op == IR_NE) && val == 0xff00 && op1_reg <= IR_REG_R3) { 5653 if (op1_reg == IR_REG_RAX) { 5654 | test ah, ah 5655 } else if (op1_reg == IR_REG_RBX) { 5656 | test bh, bh 5657 } else if (op1_reg == IR_REG_RCX) { 5658 | test ch, ch 5659 } else if (op1_reg == IR_REG_RDX) { 5660 | test dh, dh 5661 } else { 5662 IR_ASSERT(0); 5663 } 5664 } else if ((op == IR_EQ || op == IR_NE) && val == 0xffff) { 5665 | test Rw(op1_reg), Rw(op1_reg) 5666 } else if ((op == IR_EQ || op == IR_NE) && val == -1) { 5667 | test Rd(op1_reg), Rd(op1_reg) 5668 } else { 5669 | ASM_REG_IMM_OP test, type, op1_reg, val 5670 } 5671 } else { 5672 ir_mem mem; 5673 5674 if (ir_rule(ctx, op2) & IR_FUSED) { 5675 mem = ir_fuse_load(ctx, root, op2); 5676 } else { 5677 mem = ir_ref_spill_slot(ctx, op2); 5678 } 5679 | ASM_REG_MEM_OP test, type, op1_reg, mem 5680 } 5681 } else if (IR_IS_CONST_REF(op1)) { 5682 IR_ASSERT(0); 5683 } else { 5684 ir_mem mem; 5685 5686 if (ir_rule(ctx, op1) & IR_FUSED) { 5687 mem = ir_fuse_load(ctx, root, op1); 5688 } else { 5689 mem = ir_ref_spill_slot(ctx, op1); 5690 } 5691 if (op2_reg != IR_REG_NONE) { 5692 if (IR_REG_SPILLED(op2_reg)) { 5693 op2_reg = IR_REG_NUM(op2_reg); 5694 if (op1 != op2) { 5695 ir_emit_load(ctx, type, op2_reg, op2); 5696 } 5697 } 5698 | ASM_MEM_REG_OP test, type, mem, op2_reg 5699 } else { 5700 IR_ASSERT(!IR_IS_CONST_REF(op1)); 5701 int32_t val = ir_fuse_imm(ctx, op2); 5702 | ASM_MEM_IMM_OP test, type, mem, val 5703 } 5704 } 5705} 5706 5707static void ir_emit_testcc_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 5708{ 5709 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 5710 5711 IR_ASSERT(def_reg != IR_REG_NONE); 5712 ir_emit_test_int_common(ctx, def, insn->op1, insn->op); 5713 _ir_emit_setcc_int(ctx, insn->op, def_reg); 5714 if (IR_REG_SPILLED(ctx->regs[def][0])) { 5715 ir_emit_store(ctx, insn->type, def, def_reg); 5716 } 5717} 5718 5719static void ir_emit_setcc_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 5720{ 5721 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 5722 5723 IR_ASSERT(def_reg != IR_REG_NONE); 5724 _ir_emit_setcc_int(ctx, insn->op, def_reg); 5725 if (IR_REG_SPILLED(ctx->regs[def][0])) { 5726 ir_emit_store(ctx, insn->type, def, def_reg); 5727 } 5728} 5729 5730static ir_op ir_emit_cmp_fp_common(ir_ctx *ctx, ir_ref root, ir_ref cmp_ref, ir_insn *cmp_insn) 5731{ 5732 ir_backend_data *data = ctx->data; 5733 dasm_State **Dst = &data->dasm_state; 5734 ir_type type = ctx->ir_base[cmp_insn->op1].type; 5735 ir_op op = cmp_insn->op; 5736 ir_ref op1, op2; 5737 ir_reg op1_reg, op2_reg; 5738 5739 op1 = cmp_insn->op1; 5740 op2 = cmp_insn->op2; 5741 op1_reg = ctx->regs[cmp_ref][1]; 5742 op2_reg = ctx->regs[cmp_ref][2]; 5743 5744 if (op1_reg == IR_REG_NONE && op2_reg != IR_REG_NONE && (op == IR_EQ || op == IR_NE)) { 5745 ir_reg tmp_reg; 5746 5747 SWAP_REFS(op1, op2); 5748 tmp_reg = op1_reg; 5749 op1_reg = op2_reg; 5750 op2_reg = tmp_reg; 5751 } 5752 5753 5754 IR_ASSERT(op1_reg != IR_REG_NONE); 5755 if (IR_REG_SPILLED(op1_reg)) { 5756 op1_reg = IR_REG_NUM(op1_reg); 5757 ir_emit_load(ctx, type, op1_reg, op1); 5758 } 5759 if (op2_reg != IR_REG_NONE) { 5760 if (IR_REG_SPILLED(op2_reg)) { 5761 op2_reg = IR_REG_NUM(op2_reg); 5762 if (op1 != op2) { 5763 ir_emit_load(ctx, type, op2_reg, op2); 5764 } 5765 } 5766 | ASM_FP_REG_REG_OP ucomis, type, op1_reg, op2_reg 5767 } else if (IR_IS_CONST_REF(op2)) { 5768 int label = ir_const_label(ctx, op2); 5769 5770 | ASM_FP_REG_TXT_OP ucomis, type, op1_reg, [=>label] 5771 } else { 5772 ir_mem mem; 5773 5774 if (ir_rule(ctx, op2) & IR_FUSED) { 5775 mem = ir_fuse_load(ctx, root, op2); 5776 } else { 5777 mem = ir_ref_spill_slot(ctx, op2); 5778 } 5779 | ASM_FP_REG_MEM_OP ucomis, type, op1_reg, mem 5780 } 5781 return op; 5782} 5783 5784static void ir_emit_cmp_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) 5785{ 5786 ir_backend_data *data = ctx->data; 5787 dasm_State **Dst = &data->dasm_state; 5788 ir_op op = ir_emit_cmp_fp_common(ctx, def, def, insn); 5789 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 5790 ir_reg tmp_reg = ctx->regs[def][3]; 5791 5792 IR_ASSERT(def_reg != IR_REG_NONE); 5793 switch (op) { 5794 default: 5795 IR_ASSERT(0 && "NIY binary op"); 5796 case IR_EQ: 5797 | setnp Rb(def_reg) 5798 | mov Rd(tmp_reg), 0 5799 | cmovne Rd(def_reg), Rd(tmp_reg) 5800 break; 5801 case IR_NE: 5802 | setp Rb(def_reg) 5803 | mov Rd(tmp_reg), 1 5804 | cmovne Rd(def_reg), Rd(tmp_reg) 5805 break; 5806 case IR_LT: 5807 | setnp Rb(def_reg) 5808 | mov Rd(tmp_reg), 0 5809 | cmovae Rd(def_reg), Rd(tmp_reg) 5810 break; 5811 case IR_GE: 5812 | setae Rb(def_reg) 5813 break; 5814 case IR_LE: 5815 | setnp Rb(def_reg) 5816 | mov Rd(tmp_reg), 0 5817 | cmova Rd(def_reg), Rd(tmp_reg) 5818 break; 5819 case IR_GT: 5820 | seta Rb(def_reg) 5821 break; 5822 case IR_ULT: 5823 | setb Rb(def_reg) 5824 break; 5825 case IR_UGE: 5826 | setp Rb(def_reg) 5827 | mov Rd(tmp_reg), 1 5828 | cmovae Rd(def_reg), Rd(tmp_reg) 5829 break; 5830 case IR_ULE: 5831 | setbe Rb(def_reg) 5832 break; 5833 case IR_UGT: 5834 | setp Rb(def_reg) 5835 | mov Rd(tmp_reg), 1 5836 | cmova Rd(def_reg), Rd(tmp_reg) 5837 break; 5838 } 5839 if (IR_REG_SPILLED(ctx->regs[def][0])) { 5840 ir_emit_store(ctx, insn->type, def, def_reg); 5841 } 5842} 5843 5844static void ir_emit_jmp_true(ir_ctx *ctx, uint32_t b, ir_ref def, uint32_t next_block) 5845{ 5846 uint32_t true_block, false_block; 5847 ir_backend_data *data = ctx->data; 5848 dasm_State **Dst = &data->dasm_state; 5849 5850 ir_get_true_false_blocks(ctx, b, &true_block, &false_block); 5851 if (true_block != next_block) { 5852 | jmp =>true_block 5853 } 5854} 5855 5856static void ir_emit_jmp_false(ir_ctx *ctx, uint32_t b, ir_ref def, uint32_t next_block) 5857{ 5858 uint32_t true_block, false_block; 5859 ir_backend_data *data = ctx->data; 5860 dasm_State **Dst = &data->dasm_state; 5861 5862 ir_get_true_false_blocks(ctx, b, &true_block, &false_block); 5863 if (false_block != next_block) { 5864 | jmp =>false_block 5865 } 5866} 5867 5868static void ir_emit_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block, uint8_t op, bool int_cmp) 5869{ 5870 uint32_t true_block, false_block; 5871 ir_backend_data *data = ctx->data; 5872 dasm_State **Dst = &data->dasm_state; 5873 5874 ir_get_true_false_blocks(ctx, b, &true_block, &false_block); 5875 if (true_block == next_block) { 5876 /* swap to avoid unconditional JMP */ 5877 if (int_cmp || op == IR_EQ || op == IR_NE) { 5878 op ^= 1; // reverse 5879 } else { 5880 op ^= 5; // reverse 5881 } 5882 true_block = false_block; 5883 false_block = 0; 5884 } else if (false_block == next_block) { 5885 false_block = 0; 5886 } 5887 5888 if (int_cmp) { 5889 switch (op) { 5890 default: 5891 IR_ASSERT(0 && "NIY binary op"); 5892 case IR_EQ: 5893 | je =>true_block 5894 break; 5895 case IR_NE: 5896 | jne =>true_block 5897 break; 5898 case IR_LT: 5899 | jl =>true_block 5900 break; 5901 case IR_GE: 5902 | jge =>true_block 5903 break; 5904 case IR_LE: 5905 | jle =>true_block 5906 break; 5907 case IR_GT: 5908 | jg =>true_block 5909 break; 5910 case IR_ULT: 5911 | jb =>true_block 5912 break; 5913 case IR_UGE: 5914 | jae =>true_block 5915 break; 5916 case IR_ULE: 5917 | jbe =>true_block 5918 break; 5919 case IR_UGT: 5920 | ja =>true_block 5921 break; 5922 } 5923 } else { 5924 switch (op) { 5925 default: 5926 IR_ASSERT(0 && "NIY binary op"); 5927 case IR_EQ: 5928 if (!false_block) { 5929 | jp >1 5930 | je =>true_block 5931 |1: 5932 } else { 5933 | jp =>false_block 5934 | je =>true_block 5935 } 5936 break; 5937 case IR_NE: 5938 | jne =>true_block 5939 | jp =>true_block 5940 break; 5941 case IR_LT: 5942 if (!false_block) { 5943 | jp >1 5944 | jb =>true_block 5945 |1: 5946 } else { 5947 | jp =>false_block 5948 | jb =>true_block 5949 } 5950 break; 5951 case IR_GE: 5952 | jae =>true_block 5953 break; 5954 case IR_LE: 5955 if (!false_block) { 5956 | jp >1 5957 | jbe =>true_block 5958 |1: 5959 } else { 5960 | jp =>false_block 5961 | jbe =>true_block 5962 } 5963 break; 5964 case IR_GT: 5965 | ja =>true_block 5966 break; 5967 case IR_ULT: 5968 | jb =>true_block 5969 break; 5970 case IR_UGE: 5971 | jp =>true_block 5972 | jae =>true_block 5973 break; 5974 case IR_ULE: 5975 | jbe =>true_block 5976 break; 5977 case IR_UGT: 5978 | jp =>true_block 5979 | ja =>true_block 5980 break; 5981 } 5982 } 5983 if (false_block) { 5984 | jmp =>false_block 5985 } 5986} 5987 5988static void ir_emit_cmp_and_branch_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block) 5989{ 5990 ir_insn *cmp_insn = &ctx->ir_base[insn->op2]; 5991 ir_op op = cmp_insn->op; 5992 ir_type type = ctx->ir_base[cmp_insn->op1].type; 5993 ir_ref op1 = cmp_insn->op1; 5994 ir_ref op2 = cmp_insn->op2; 5995 ir_reg op1_reg = ctx->regs[insn->op2][1]; 5996 ir_reg op2_reg = ctx->regs[insn->op2][2]; 5997 5998 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 5999 op1_reg = IR_REG_NUM(op1_reg); 6000 ir_emit_load(ctx, type, op1_reg, op1); 6001 } 6002 if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { 6003 op2_reg = IR_REG_NUM(op2_reg); 6004 if (op1 != op2) { 6005 ir_emit_load(ctx, type, op2_reg, op2); 6006 } 6007 } 6008 if (IR_IS_CONST_REF(op2) && !IR_IS_SYM_CONST(ctx->ir_base[op2].op) && ctx->ir_base[op2].val.u64 == 0) { 6009 if (op == IR_ULT) { 6010 /* always false */ 6011 ir_emit_jmp_false(ctx, b, def, next_block); 6012 return; 6013 } else if (op == IR_UGE) { 6014 /* always true */ 6015 ir_emit_jmp_true(ctx, b, def, next_block); 6016 return; 6017 } else if (op == IR_ULE) { 6018 op = IR_EQ; 6019 } else if (op == IR_UGT) { 6020 op = IR_NE; 6021 } 6022 } 6023 6024 bool same_comparison = 0; 6025 ir_insn *prev_insn = &ctx->ir_base[insn->op1]; 6026 if (prev_insn->op == IR_IF_TRUE || prev_insn->op == IR_IF_FALSE) { 6027 if (ir_rule(ctx, prev_insn->op1) == IR_CMP_AND_BRANCH_INT) { 6028 prev_insn = &ctx->ir_base[prev_insn->op1]; 6029 prev_insn = &ctx->ir_base[prev_insn->op2]; 6030 if (prev_insn->op1 == cmp_insn->op1 && prev_insn->op2 == cmp_insn->op2) { 6031 same_comparison = true; 6032 } 6033 } 6034 } 6035 if (!same_comparison) { 6036 ir_emit_cmp_int_common(ctx, type, def, cmp_insn, op1_reg, op1, op2_reg, op2); 6037 } 6038 ir_emit_jcc(ctx, b, def, insn, next_block, op, 1); 6039} 6040 6041static void ir_emit_test_and_branch_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block) 6042{ 6043 ir_ref op2 = insn->op2; 6044 ir_op op = ctx->ir_base[op2].op; 6045 6046 if (op >= IR_EQ && op <= IR_UGT) { 6047 op2 = ctx->ir_base[op2].op1; 6048 } else { 6049 IR_ASSERT(op == IR_AND); 6050 op = IR_NE; 6051 } 6052 6053 ir_emit_test_int_common(ctx, def, op2, op); 6054 ir_emit_jcc(ctx, b, def, insn, next_block, op, 1); 6055} 6056 6057static void ir_emit_cmp_and_branch_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block) 6058{ 6059 ir_op op = ir_emit_cmp_fp_common(ctx, def, insn->op2, &ctx->ir_base[insn->op2]); 6060 ir_emit_jcc(ctx, b, def, insn, next_block, op, 0); 6061} 6062 6063static void ir_emit_if_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block) 6064{ 6065 ir_type type = ctx->ir_base[insn->op2].type; 6066 ir_reg op2_reg = ctx->regs[def][2]; 6067 ir_backend_data *data = ctx->data; 6068 dasm_State **Dst = &data->dasm_state; 6069 6070 if (op2_reg != IR_REG_NONE) { 6071 if (IR_REG_SPILLED(op2_reg)) { 6072 op2_reg = IR_REG_NUM(op2_reg); 6073 ir_emit_load(ctx, type, op2_reg, insn->op2); 6074 } 6075 | ASM_REG_REG_OP test, type, op2_reg, op2_reg 6076 } else if (IR_IS_CONST_REF(insn->op2)) { 6077 uint32_t true_block, false_block; 6078 6079 ir_get_true_false_blocks(ctx, b, &true_block, &false_block); 6080 if (ir_const_is_true(&ctx->ir_base[insn->op2])) { 6081 if (true_block != next_block) { 6082 | jmp =>true_block 6083 } 6084 } else { 6085 if (false_block != next_block) { 6086 | jmp =>false_block 6087 } 6088 } 6089 return; 6090 } else if (ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA) { 6091 uint32_t true_block, false_block; 6092 6093 ir_get_true_false_blocks(ctx, b, &true_block, &false_block); 6094 if (true_block != next_block) { 6095 | jmp =>true_block 6096 } 6097 return; 6098 } else { 6099 ir_mem mem; 6100 6101 if (ir_rule(ctx, insn->op2) & IR_FUSED) { 6102 mem = ir_fuse_load(ctx, def, insn->op2); 6103 } else { 6104 mem = ir_ref_spill_slot(ctx, insn->op2); 6105 } 6106 | ASM_MEM_IMM_OP cmp, type, mem, 0 6107 } 6108 ir_emit_jcc(ctx, b, def, insn, next_block, IR_NE, 1); 6109} 6110 6111static void ir_emit_cond(ir_ctx *ctx, ir_ref def, ir_insn *insn) 6112{ 6113 ir_backend_data *data = ctx->data; 6114 dasm_State **Dst = &data->dasm_state; 6115 ir_type type = insn->type; 6116 ir_ref op1 = insn->op1; 6117 ir_ref op2 = insn->op2; 6118 ir_ref op3 = insn->op3; 6119 ir_type op1_type = ctx->ir_base[op1].type; 6120 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 6121 ir_reg op1_reg = ctx->regs[def][1]; 6122 ir_reg op2_reg = ctx->regs[def][2]; 6123 ir_reg op3_reg = ctx->regs[def][3]; 6124 6125 IR_ASSERT(def_reg != IR_REG_NONE); 6126 6127 if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { 6128 op2_reg = IR_REG_NUM(op2_reg); 6129 ir_emit_load(ctx, type, op2_reg, op2); 6130 if (op1 == op2) { 6131 op1_reg = op2_reg; 6132 } 6133 if (op3 == op2) { 6134 op3_reg = op2_reg; 6135 } 6136 } 6137 if (op3_reg != IR_REG_NONE && op3 != op2 && IR_REG_SPILLED(op3_reg)) { 6138 op3_reg = IR_REG_NUM(op3_reg); 6139 ir_emit_load(ctx, type, op3_reg, op3); 6140 if (op1 == op2) { 6141 op1_reg = op3_reg; 6142 } 6143 } 6144 if (op1_reg != IR_REG_NONE && op1 != op2 && op1 != op3 && IR_REG_SPILLED(op1_reg)) { 6145 op1_reg = IR_REG_NUM(op1_reg); 6146 ir_emit_load(ctx, op1_type, op1_reg, op1); 6147 } 6148 6149 if (IR_IS_TYPE_INT(op1_type)) { 6150 if (op1_reg != IR_REG_NONE) { 6151 | ASM_REG_REG_OP test, op1_type, op1_reg, op1_reg 6152 } else { 6153 ir_mem mem = ir_ref_spill_slot(ctx, op1); 6154 6155 | ASM_MEM_IMM_OP cmp, op1_type, mem, 0 6156 } 6157 if (IR_IS_TYPE_INT(type)) { 6158 IR_ASSERT(op2_reg != IR_REG_NONE || op3_reg != IR_REG_NONE); 6159 if (op3_reg != IR_REG_NONE) { 6160 if (op3_reg == def_reg) { 6161 IR_ASSERT(op2_reg != IR_REG_NONE); 6162 | ASM_REG_REG_OP2 cmovne, type, def_reg, op2_reg 6163 } else { 6164 if (op2_reg != IR_REG_NONE) { 6165 if (def_reg != op2_reg) { 6166 if (IR_IS_TYPE_INT(type)) { 6167 ir_emit_mov(ctx, type, def_reg, op2_reg); 6168 } else { 6169 ir_emit_fp_mov(ctx, type, def_reg, op2_reg); 6170 } 6171 } 6172 } else if (IR_IS_CONST_REF(op2) && !IR_IS_SYM_CONST(ctx->ir_base[op2].op)) { 6173 /* prevent "xor" and flags clobbering */ 6174 ir_emit_mov_imm_int(ctx, type, def_reg, ctx->ir_base[op2].val.i64); 6175 } else { 6176 ir_emit_load_ex(ctx, type, def_reg, op2, def); 6177 } 6178 | ASM_REG_REG_OP2 cmove, type, def_reg, op3_reg 6179 } 6180 } else { 6181 IR_ASSERT(op2_reg != IR_REG_NONE && op2_reg != def_reg); 6182 if (IR_IS_CONST_REF(op3) && !IR_IS_SYM_CONST(ctx->ir_base[op3].op)) { 6183 /* prevent "xor" and flags clobbering */ 6184 ir_emit_mov_imm_int(ctx, type, def_reg, ctx->ir_base[op3].val.i64); 6185 } else { 6186 ir_emit_load_ex(ctx, type, def_reg, op3, def); 6187 } 6188 | ASM_REG_REG_OP2 cmovne, type, def_reg, op2_reg 6189 } 6190 6191 if (IR_REG_SPILLED(ctx->regs[def][0])) { 6192 ir_emit_store(ctx, type, def, def_reg); 6193 } 6194 return; 6195 } 6196 | je >2 6197 } else { 6198 if (!data->double_zero_const) { 6199 data->double_zero_const = 1; 6200 ir_rodata(ctx); 6201 |.align 16 6202 |->double_zero_const: 6203 |.dword 0, 0 6204 |.code 6205 } 6206 | ASM_FP_REG_TXT_OP ucomis, op1_type, op1_reg, [->double_zero_const] 6207 | jp >1 6208 | je >2 6209 |1: 6210 } 6211 6212 if (op2_reg != IR_REG_NONE) { 6213 if (def_reg != op2_reg) { 6214 if (IR_IS_TYPE_INT(type)) { 6215 ir_emit_mov(ctx, type, def_reg, op2_reg); 6216 } else { 6217 ir_emit_fp_mov(ctx, type, def_reg, op2_reg); 6218 } 6219 } 6220 } else { 6221 ir_emit_load_ex(ctx, type, def_reg, op2, def); 6222 } 6223 | jmp >3 6224 |2: 6225 if (op3_reg != IR_REG_NONE) { 6226 if (def_reg != op3_reg) { 6227 if (IR_IS_TYPE_INT(type)) { 6228 ir_emit_mov(ctx, type, def_reg, op3_reg); 6229 } else { 6230 ir_emit_fp_mov(ctx, type, def_reg, op3_reg); 6231 } 6232 } 6233 } else { 6234 ir_emit_load_ex(ctx, type, def_reg, op3, def); 6235 } 6236 |3: 6237 6238 if (IR_REG_SPILLED(ctx->regs[def][0])) { 6239 ir_emit_store(ctx, type, def, def_reg); 6240 } 6241} 6242 6243static void ir_emit_cond_cmp_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 6244{ 6245 ir_backend_data *data = ctx->data; 6246 dasm_State **Dst = &data->dasm_state; 6247 ir_type type = insn->type; 6248 ir_ref op2 = insn->op2; 6249 ir_ref op3 = insn->op3; 6250 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 6251 ir_reg op2_reg = ctx->regs[def][2]; 6252 ir_reg op3_reg = ctx->regs[def][3]; 6253 ir_op op; 6254 6255 if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { 6256 op2_reg = IR_REG_NUM(op2_reg); 6257 ir_emit_load(ctx, type, op2_reg, op2); 6258 if (op3 == op2) { 6259 op3_reg = op2_reg; 6260 } 6261 } 6262 if (op3_reg != IR_REG_NONE && op3 != op2 && IR_REG_SPILLED(op3_reg)) { 6263 op3_reg = IR_REG_NUM(op3_reg); 6264 ir_emit_load(ctx, type, op3_reg, op3); 6265 } 6266 6267 ir_emit_cmp_int_common2(ctx, def, insn->op1, &ctx->ir_base[insn->op1]); 6268 op = ctx->ir_base[insn->op1].op; 6269 6270 if (IR_IS_TYPE_INT(type)) { 6271 if (op3_reg != IR_REG_NONE) { 6272 if (op3_reg == def_reg) { 6273 IR_ASSERT(op2_reg != IR_REG_NONE); 6274 op3_reg = op2_reg; 6275 op ^= 1; // reverse 6276 } else { 6277 if (op2_reg != IR_REG_NONE) { 6278 if (def_reg != op2_reg) { 6279// if (IR_IS_TYPE_INT(type)) { 6280 ir_emit_mov(ctx, type, def_reg, op2_reg); 6281// } else { 6282// ir_emit_fp_mov(ctx, type, def_reg, op2_reg); 6283// } 6284 } 6285 } else if (IR_IS_CONST_REF(op2) && !IR_IS_SYM_CONST(ctx->ir_base[op2].op)) { 6286 /* prevent "xor" and flags clobbering */ 6287 ir_emit_mov_imm_int(ctx, type, def_reg, ctx->ir_base[op2].val.i64); 6288 } else { 6289 ir_emit_load_ex(ctx, type, def_reg, op2, def); 6290 } 6291 } 6292 } else { 6293 IR_ASSERT(op2_reg != IR_REG_NONE && op2_reg != def_reg); 6294 if (IR_IS_CONST_REF(op3) && !IR_IS_SYM_CONST(ctx->ir_base[op3].op)) { 6295 /* prevent "xor" and flags clobbering */ 6296 ir_emit_mov_imm_int(ctx, type, def_reg, ctx->ir_base[op3].val.i64); 6297 } else { 6298 ir_emit_load_ex(ctx, type, def_reg, op3, def); 6299 } 6300 op3_reg = op2_reg; 6301 op ^= 1; // reverse 6302 } 6303 6304 switch (op) { 6305 default: 6306 IR_ASSERT(0 && "NIY binary op"); 6307 case IR_EQ: 6308 | ASM_REG_REG_OP2 cmovne, type, def_reg, op3_reg 6309 break; 6310 case IR_NE: 6311 | ASM_REG_REG_OP2 cmove, type, def_reg, op3_reg 6312 break; 6313 case IR_LT: 6314 | ASM_REG_REG_OP2 cmovge, type, def_reg, op3_reg 6315 break; 6316 case IR_GE: 6317 | ASM_REG_REG_OP2 cmovl, type, def_reg, op3_reg 6318 break; 6319 case IR_LE: 6320 | ASM_REG_REG_OP2 cmovg, type, def_reg, op3_reg 6321 break; 6322 case IR_GT: 6323 | ASM_REG_REG_OP2 cmovle, type, def_reg, op3_reg 6324 break; 6325 case IR_ULT: 6326 | ASM_REG_REG_OP2 cmovae, type, def_reg, op3_reg 6327 break; 6328 case IR_UGE: 6329 | ASM_REG_REG_OP2 cmovb, type, def_reg, op3_reg 6330 break; 6331 case IR_ULE: 6332 | ASM_REG_REG_OP2 cmova, type, def_reg, op3_reg 6333 break; 6334 case IR_UGT: 6335 | ASM_REG_REG_OP2 cmovbe, type, def_reg, op3_reg 6336 break; 6337 } 6338 } else { 6339 switch (op) { 6340 default: 6341 IR_ASSERT(0 && "NIY binary op"); 6342 case IR_EQ: 6343 | jne >2 6344 break; 6345 case IR_NE: 6346 | je >2 6347 break; 6348 case IR_LT: 6349 | jge >2 6350 break; 6351 case IR_GE: 6352 | jl >2 6353 break; 6354 case IR_LE: 6355 | jg >2 6356 break; 6357 case IR_GT: 6358 | jle >2 6359 break; 6360 case IR_ULT: 6361 | jae >2 6362 break; 6363 case IR_UGE: 6364 | jb >2 6365 break; 6366 case IR_ULE: 6367 | ja >2 6368 break; 6369 case IR_UGT: 6370 | jbe >2 6371 break; 6372 } 6373 |1: 6374 6375 if (op2_reg != IR_REG_NONE) { 6376 if (def_reg != op2_reg) { 6377 if (IR_IS_TYPE_INT(type)) { 6378 ir_emit_mov(ctx, type, def_reg, op2_reg); 6379 } else { 6380 ir_emit_fp_mov(ctx, type, def_reg, op2_reg); 6381 } 6382 } 6383 } else { 6384 ir_emit_load_ex(ctx, type, def_reg, op2, def); 6385 } 6386 | jmp >3 6387 |2: 6388 if (op3_reg != IR_REG_NONE) { 6389 if (def_reg != op3_reg) { 6390 if (IR_IS_TYPE_INT(type)) { 6391 ir_emit_mov(ctx, type, def_reg, op3_reg); 6392 } else { 6393 ir_emit_fp_mov(ctx, type, def_reg, op3_reg); 6394 } 6395 } 6396 } else { 6397 ir_emit_load_ex(ctx, type, def_reg, op3, def); 6398 } 6399 |3: 6400 } 6401 6402 if (IR_REG_SPILLED(ctx->regs[def][0])) { 6403 ir_emit_store(ctx, type, def, def_reg); 6404 } 6405} 6406 6407static void ir_emit_cond_cmp_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) 6408{ 6409 ir_backend_data *data = ctx->data; 6410 dasm_State **Dst = &data->dasm_state; 6411 ir_type type = insn->type; 6412 ir_ref op2 = insn->op2; 6413 ir_ref op3 = insn->op3; 6414 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 6415 ir_reg op2_reg = ctx->regs[def][2]; 6416 ir_reg op3_reg = ctx->regs[def][3]; 6417 ir_op op; 6418 6419 if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { 6420 op2_reg = IR_REG_NUM(op2_reg); 6421 ir_emit_load(ctx, type, op2_reg, op2); 6422 if (op3 == op2) { 6423 op3_reg = op2_reg; 6424 } 6425 } 6426 if (op3_reg != IR_REG_NONE && op3 != op2 && IR_REG_SPILLED(op3_reg)) { 6427 op3_reg = IR_REG_NUM(op3_reg); 6428 ir_emit_load(ctx, type, op3_reg, op3); 6429 } 6430 6431 op = ir_emit_cmp_fp_common(ctx, def, insn->op1, &ctx->ir_base[insn->op1]); 6432 6433 switch (op) { 6434 default: 6435 IR_ASSERT(0 && "NIY binary op"); 6436 case IR_EQ: 6437 | jne >2 6438 | jp >2 6439 break; 6440 case IR_NE: 6441 | jp >1 6442 | je >2 6443 break; 6444 case IR_LT: 6445 | jp >2 6446 | jae >2 6447 break; 6448 case IR_GE: 6449 | jb >2 6450 break; 6451 case IR_LE: 6452 | jp >2 6453 | ja >2 6454 break; 6455 case IR_GT: 6456 | jbe >2 6457 break; 6458 case IR_ULT: 6459 | jae >2 6460 break; 6461 case IR_UGE: 6462 | jp >1 6463 | jb >2 6464 break; 6465 case IR_ULE: 6466 | ja >2 6467 break; 6468 case IR_UGT: 6469 | jp >1 6470 | jbe >2 6471 break; 6472 } 6473 |1: 6474 6475 if (op2_reg != IR_REG_NONE) { 6476 if (def_reg != op2_reg) { 6477 if (IR_IS_TYPE_INT(type)) { 6478 ir_emit_mov(ctx, type, def_reg, op2_reg); 6479 } else { 6480 ir_emit_fp_mov(ctx, type, def_reg, op2_reg); 6481 } 6482 } 6483 } else { 6484 ir_emit_load_ex(ctx, type, def_reg, op2, def); 6485 } 6486 | jmp >3 6487 |2: 6488 if (op3_reg != IR_REG_NONE) { 6489 if (def_reg != op3_reg) { 6490 if (IR_IS_TYPE_INT(type)) { 6491 ir_emit_mov(ctx, type, def_reg, op3_reg); 6492 } else { 6493 ir_emit_fp_mov(ctx, type, def_reg, op3_reg); 6494 } 6495 } 6496 } else { 6497 ir_emit_load_ex(ctx, type, def_reg, op3, def); 6498 } 6499 |3: 6500 6501 if (IR_REG_SPILLED(ctx->regs[def][0])) { 6502 ir_emit_store(ctx, type, def, def_reg); 6503 } 6504} 6505 6506static void ir_emit_return_void(ir_ctx *ctx) 6507{ 6508 ir_backend_data *data = ctx->data; 6509 dasm_State **Dst = &data->dasm_state; 6510 6511 ir_emit_epilogue(ctx); 6512 6513#ifdef IR_TARGET_X86 6514 if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC) && ctx->param_stack_size) { 6515 | ret ctx->param_stack_size 6516 return; 6517 } 6518#endif 6519 6520 | ret 6521} 6522 6523static void ir_emit_return_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn) 6524{ 6525 ir_reg op2_reg = ctx->regs[ref][2]; 6526 6527 if (op2_reg != IR_REG_INT_RET1) { 6528 ir_type type = ctx->ir_base[insn->op2].type; 6529 6530 if (op2_reg != IR_REG_NONE && !IR_REG_SPILLED(op2_reg)) { 6531 ir_emit_mov(ctx, type, IR_REG_INT_RET1, op2_reg); 6532 } else { 6533 ir_emit_load(ctx, type, IR_REG_INT_RET1, insn->op2); 6534 } 6535 } 6536 ir_emit_return_void(ctx); 6537} 6538 6539static void ir_emit_return_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn) 6540{ 6541 ir_reg op2_reg = ctx->regs[ref][2]; 6542 ir_type type = ctx->ir_base[insn->op2].type; 6543 6544#ifdef IR_REG_FP_RET1 6545 if (op2_reg != IR_REG_FP_RET1) { 6546 if (op2_reg != IR_REG_NONE && !IR_REG_SPILLED(op2_reg)) { 6547 ir_emit_fp_mov(ctx, type, IR_REG_FP_RET1, op2_reg); 6548 } else { 6549 ir_emit_load(ctx, type, IR_REG_FP_RET1, insn->op2); 6550 } 6551 } 6552#else 6553 ir_backend_data *data = ctx->data; 6554 dasm_State **Dst = &data->dasm_state; 6555 6556 if (op2_reg == IR_REG_NONE || IR_REG_SPILLED(op2_reg)) { 6557 ir_reg fp; 6558 int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op2, &fp); 6559 6560 if (type == IR_DOUBLE) { 6561 | fld qword [Ra(fp)+offset] 6562 } else { 6563 IR_ASSERT(type == IR_FLOAT); 6564 | fld dword [Ra(fp)+offset] 6565 } 6566 } else { 6567 int32_t offset = ctx->ret_slot; 6568 ir_reg fp; 6569 6570 IR_ASSERT(offset != -1); 6571 offset = IR_SPILL_POS_TO_OFFSET(offset); 6572 fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 6573 ir_emit_store_mem_fp(ctx, type, IR_MEM_BO(fp, offset), op2_reg); 6574 if (type == IR_DOUBLE) { 6575 | fld qword [Ra(fp)+offset] 6576 } else { 6577 IR_ASSERT(type == IR_FLOAT); 6578 | fld dword [Ra(fp)+offset] 6579 } 6580 } 6581#endif 6582 ir_emit_return_void(ctx); 6583} 6584 6585static void ir_emit_sext(ir_ctx *ctx, ir_ref def, ir_insn *insn) 6586{ 6587 ir_type dst_type = insn->type; 6588 ir_type src_type = ctx->ir_base[insn->op1].type; 6589 ir_backend_data *data = ctx->data; 6590 dasm_State **Dst = &data->dasm_state; 6591 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 6592 ir_reg op1_reg = ctx->regs[def][1]; 6593 6594 IR_ASSERT(IR_IS_TYPE_INT(src_type)); 6595 IR_ASSERT(IR_IS_TYPE_INT(dst_type)); 6596 IR_ASSERT(ir_type_size[dst_type] > ir_type_size[src_type]); 6597 IR_ASSERT(def_reg != IR_REG_NONE); 6598 6599 if (op1_reg != IR_REG_NONE) { 6600 if (IR_REG_SPILLED(op1_reg)) { 6601 op1_reg = IR_REG_NUM(op1_reg); 6602 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 6603 } 6604 if (ir_type_size[src_type] == 1) { 6605 if (ir_type_size[dst_type] == 2) { 6606 | movsx Rw(def_reg), Rb(op1_reg) 6607 } else if (ir_type_size[dst_type] == 4) { 6608 | movsx Rd(def_reg), Rb(op1_reg) 6609 } else { 6610 IR_ASSERT(ir_type_size[dst_type] == 8); 6611 IR_ASSERT(sizeof(void*) == 8); 6612|.if X64 6613 | movsx Rq(def_reg), Rb(op1_reg) 6614|.endif 6615 } 6616 } else if (ir_type_size[src_type] == 2) { 6617 if (ir_type_size[dst_type] == 4) { 6618 | movsx Rd(def_reg), Rw(op1_reg) 6619 } else { 6620 IR_ASSERT(ir_type_size[dst_type] == 8); 6621 IR_ASSERT(sizeof(void*) == 8); 6622|.if X64 6623 | movsx Rq(def_reg), Rw(op1_reg) 6624|.endif 6625 } 6626 } else { 6627 IR_ASSERT(ir_type_size[src_type] == 4); 6628 IR_ASSERT(ir_type_size[dst_type] == 8); 6629 IR_ASSERT(sizeof(void*) == 8); 6630|.if X64 6631 | movsxd Rq(def_reg), Rd(op1_reg) 6632|.endif 6633 } 6634 } else if (IR_IS_CONST_REF(insn->op1)) { 6635 IR_ASSERT(0); 6636 } else { 6637 ir_mem mem; 6638 6639 if (ir_rule(ctx, insn->op1) & IR_FUSED) { 6640 mem = ir_fuse_load(ctx, def, insn->op1); 6641 } else { 6642 mem = ir_ref_spill_slot(ctx, insn->op1); 6643 } 6644 6645 if (ir_type_size[src_type] == 1) { 6646 if (ir_type_size[dst_type] == 2) { 6647 | ASM_TXT_TMEM_OP movsx, Rw(def_reg), byte, mem 6648 } else if (ir_type_size[dst_type] == 4) { 6649 | ASM_TXT_TMEM_OP movsx, Rd(def_reg), byte, mem 6650 } else { 6651 IR_ASSERT(ir_type_size[dst_type] == 8); 6652 IR_ASSERT(sizeof(void*) == 8); 6653|.if X64 6654 | ASM_TXT_TMEM_OP movsx, Rq(def_reg), byte, mem 6655|.endif 6656 } 6657 } else if (ir_type_size[src_type] == 2) { 6658 if (ir_type_size[dst_type] == 4) { 6659 | ASM_TXT_TMEM_OP movsx, Rd(def_reg), word, mem 6660 } else { 6661 IR_ASSERT(ir_type_size[dst_type] == 8); 6662 IR_ASSERT(sizeof(void*) == 8); 6663|.if X64 6664 | ASM_TXT_TMEM_OP movsx, Rq(def_reg), word, mem 6665|.endif 6666 } 6667 } else { 6668 IR_ASSERT(ir_type_size[src_type] == 4); 6669 IR_ASSERT(ir_type_size[dst_type] == 8); 6670 IR_ASSERT(sizeof(void*) == 8); 6671|.if X64 6672 | ASM_TXT_TMEM_OP movsxd, Rq(def_reg), dword, mem 6673|.endif 6674 } 6675 } 6676 if (IR_REG_SPILLED(ctx->regs[def][0])) { 6677 ir_emit_store(ctx, dst_type, def, def_reg); 6678 } 6679} 6680 6681static void ir_emit_zext(ir_ctx *ctx, ir_ref def, ir_insn *insn) 6682{ 6683 ir_type dst_type = insn->type; 6684 ir_type src_type = ctx->ir_base[insn->op1].type; 6685 ir_backend_data *data = ctx->data; 6686 dasm_State **Dst = &data->dasm_state; 6687 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 6688 ir_reg op1_reg = ctx->regs[def][1]; 6689 6690 IR_ASSERT(IR_IS_TYPE_INT(src_type)); 6691 IR_ASSERT(IR_IS_TYPE_INT(dst_type)); 6692 IR_ASSERT(ir_type_size[dst_type] > ir_type_size[src_type]); 6693 IR_ASSERT(def_reg != IR_REG_NONE); 6694 6695 if (op1_reg != IR_REG_NONE) { 6696 if (IR_REG_SPILLED(op1_reg)) { 6697 op1_reg = IR_REG_NUM(op1_reg); 6698 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 6699 } 6700 if (ir_type_size[src_type] == 1) { 6701 if (ir_type_size[dst_type] == 2) { 6702 | movzx Rw(def_reg), Rb(op1_reg) 6703 } else if (ir_type_size[dst_type] == 4) { 6704 | movzx Rd(def_reg), Rb(op1_reg) 6705 } else { 6706 IR_ASSERT(ir_type_size[dst_type] == 8); 6707 IR_ASSERT(sizeof(void*) == 8); 6708|.if X64 6709 | movzx Rq(def_reg), Rb(op1_reg) 6710|.endif 6711 } 6712 } else if (ir_type_size[src_type] == 2) { 6713 if (ir_type_size[dst_type] == 4) { 6714 | movzx Rd(def_reg), Rw(op1_reg) 6715 } else { 6716 IR_ASSERT(ir_type_size[dst_type] == 8); 6717 IR_ASSERT(sizeof(void*) == 8); 6718|.if X64 6719 | movzx Rq(def_reg), Rw(op1_reg) 6720|.endif 6721 } 6722 } else { 6723 IR_ASSERT(ir_type_size[src_type] == 4); 6724 IR_ASSERT(ir_type_size[dst_type] == 8); 6725 IR_ASSERT(sizeof(void*) == 8); 6726|.if X64 6727 /* Avoid zero extension to the same register. This may be not always safe ??? */ 6728 if (op1_reg != def_reg) { 6729 | mov Rd(def_reg), Rd(op1_reg) 6730 } 6731|.endif 6732 } 6733 } else if (IR_IS_CONST_REF(insn->op1)) { 6734 IR_ASSERT(0); 6735 } else { 6736 ir_mem mem; 6737 6738 if (ir_rule(ctx, insn->op1) & IR_FUSED) { 6739 mem = ir_fuse_load(ctx, def, insn->op1); 6740 } else { 6741 mem = ir_ref_spill_slot(ctx, insn->op1); 6742 } 6743 6744 if (ir_type_size[src_type] == 1) { 6745 if (ir_type_size[dst_type] == 2) { 6746 | ASM_TXT_TMEM_OP movzx, Rw(def_reg), byte, mem 6747 } else if (ir_type_size[dst_type] == 4) { 6748 | ASM_TXT_TMEM_OP movzx, Rd(def_reg), byte, mem 6749 } else { 6750 IR_ASSERT(ir_type_size[dst_type] == 8); 6751 IR_ASSERT(sizeof(void*) == 8); 6752|.if X64 6753 | ASM_TXT_TMEM_OP movzx, Rq(def_reg), byte, mem 6754|.endif 6755 } 6756 } else if (ir_type_size[src_type] == 2) { 6757 if (ir_type_size[dst_type] == 4) { 6758 | ASM_TXT_TMEM_OP movzx, Rd(def_reg), word, mem 6759 } else { 6760 IR_ASSERT(ir_type_size[dst_type] == 8); 6761 IR_ASSERT(sizeof(void*) == 8); 6762|.if X64 6763 | ASM_TXT_TMEM_OP movzx, Rq(def_reg), word, mem 6764|.endif 6765 } 6766 } else { 6767 IR_ASSERT(ir_type_size[src_type] == 4); 6768 IR_ASSERT(ir_type_size[dst_type] == 8); 6769|.if X64 6770 | ASM_TXT_TMEM_OP mov, Rd(def_reg), dword, mem 6771|.endif 6772 } 6773 } 6774 if (IR_REG_SPILLED(ctx->regs[def][0])) { 6775 ir_emit_store(ctx, dst_type, def, def_reg); 6776 } 6777} 6778 6779static void ir_emit_trunc(ir_ctx *ctx, ir_ref def, ir_insn *insn) 6780{ 6781 ir_type dst_type = insn->type; 6782 ir_type src_type = ctx->ir_base[insn->op1].type; 6783 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 6784 ir_reg op1_reg = ctx->regs[def][1]; 6785 6786 IR_ASSERT(IR_IS_TYPE_INT(src_type)); 6787 IR_ASSERT(IR_IS_TYPE_INT(dst_type)); 6788 IR_ASSERT(ir_type_size[dst_type] < ir_type_size[src_type]); 6789 IR_ASSERT(def_reg != IR_REG_NONE); 6790 if (op1_reg != IR_REG_NONE) { 6791 if (IR_REG_SPILLED(op1_reg)) { 6792 op1_reg = IR_REG_NUM(op1_reg); 6793 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 6794 } 6795 if (op1_reg != def_reg) { 6796 ir_emit_mov(ctx, dst_type, def_reg, op1_reg); 6797 } 6798 } else { 6799 ir_emit_load_ex(ctx, dst_type, def_reg, insn->op1, def); 6800 } 6801 if (IR_REG_SPILLED(ctx->regs[def][0])) { 6802 ir_emit_store(ctx, dst_type, def, def_reg); 6803 } 6804} 6805 6806static void ir_emit_bitcast(ir_ctx *ctx, ir_ref def, ir_insn *insn) 6807{ 6808 ir_type dst_type = insn->type; 6809 ir_type src_type = ctx->ir_base[insn->op1].type; 6810 ir_backend_data *data = ctx->data; 6811 dasm_State **Dst = &data->dasm_state; 6812 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 6813 ir_reg op1_reg = ctx->regs[def][1]; 6814 6815 IR_ASSERT(ir_type_size[dst_type] == ir_type_size[src_type]); 6816 IR_ASSERT(def_reg != IR_REG_NONE); 6817 if (IR_IS_TYPE_INT(src_type) && IR_IS_TYPE_INT(dst_type)) { 6818 if (op1_reg != IR_REG_NONE) { 6819 if (IR_REG_SPILLED(op1_reg)) { 6820 op1_reg = IR_REG_NUM(op1_reg); 6821 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 6822 } 6823 if (op1_reg != def_reg) { 6824 ir_emit_mov(ctx, dst_type, def_reg, op1_reg); 6825 } 6826 } else { 6827 ir_emit_load_ex(ctx, dst_type, def_reg, insn->op1, def); 6828 } 6829 } else if (IR_IS_TYPE_FP(src_type) && IR_IS_TYPE_FP(dst_type)) { 6830 if (op1_reg != IR_REG_NONE) { 6831 if (IR_REG_SPILLED(op1_reg)) { 6832 op1_reg = IR_REG_NUM(op1_reg); 6833 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 6834 } 6835 if (op1_reg != def_reg) { 6836 ir_emit_fp_mov(ctx, dst_type, def_reg, op1_reg); 6837 } 6838 } else { 6839 ir_emit_load_ex(ctx, dst_type, def_reg, insn->op1, def); 6840 } 6841 } else if (IR_IS_TYPE_FP(src_type)) { 6842 IR_ASSERT(IR_IS_TYPE_INT(dst_type)); 6843 if (op1_reg != IR_REG_NONE) { 6844 if (IR_REG_SPILLED(op1_reg)) { 6845 op1_reg = IR_REG_NUM(op1_reg); 6846 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 6847 } 6848 if (src_type == IR_DOUBLE) { 6849 IR_ASSERT(sizeof(void*) == 8); 6850|.if X64 6851 if (ctx->mflags & IR_X86_AVX) { 6852 | vmovd Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) 6853 } else { 6854 | movd Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) 6855 } 6856|.endif 6857 } else { 6858 IR_ASSERT(src_type == IR_FLOAT); 6859 if (ctx->mflags & IR_X86_AVX) { 6860 | vmovd Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) 6861 } else { 6862 | movd Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) 6863 } 6864 } 6865 } else if (IR_IS_CONST_REF(insn->op1)) { 6866 ir_insn *_insn = &ctx->ir_base[insn->op1]; 6867 IR_ASSERT(!IR_IS_SYM_CONST(_insn->op)); 6868 if (src_type == IR_DOUBLE) { 6869 IR_ASSERT(sizeof(void*) == 8); 6870|.if X64 6871 | mov64 Rq(def_reg), _insn->val.i64 6872|.endif 6873 } else { 6874 IR_ASSERT(src_type == IR_FLOAT); 6875 | mov Rd(def_reg), _insn->val.i32 6876 } 6877 } else { 6878 ir_mem mem; 6879 6880 if (ir_rule(ctx, insn->op1) & IR_FUSED) { 6881 mem = ir_fuse_load(ctx, def, insn->op1); 6882 } else { 6883 mem = ir_ref_spill_slot(ctx, insn->op1); 6884 } 6885 6886 if (src_type == IR_DOUBLE) { 6887 IR_ASSERT(sizeof(void*) == 8); 6888|.if X64 6889 | ASM_TXT_TMEM_OP mov, Rq(def_reg), qword, mem 6890|.endif 6891 } else { 6892 IR_ASSERT(src_type == IR_FLOAT); 6893 | ASM_TXT_TMEM_OP mov, Rd(def_reg), dword, mem 6894 } 6895 } 6896 } else if (IR_IS_TYPE_FP(dst_type)) { 6897 IR_ASSERT(IR_IS_TYPE_INT(src_type)); 6898 if (op1_reg != IR_REG_NONE) { 6899 if (IR_REG_SPILLED(op1_reg)) { 6900 op1_reg = IR_REG_NUM(op1_reg); 6901 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 6902 } 6903 if (dst_type == IR_DOUBLE) { 6904 IR_ASSERT(sizeof(void*) == 8); 6905|.if X64 6906 if (ctx->mflags & IR_X86_AVX) { 6907 | vmovd xmm(def_reg-IR_REG_FP_FIRST), Rq(op1_reg) 6908 } else { 6909 | movd xmm(def_reg-IR_REG_FP_FIRST), Rq(op1_reg) 6910 } 6911|.endif 6912 } else { 6913 IR_ASSERT(dst_type == IR_FLOAT); 6914 if (ctx->mflags & IR_X86_AVX) { 6915 | vmovd xmm(def_reg-IR_REG_FP_FIRST), Rd(op1_reg) 6916 } else { 6917 | movd xmm(def_reg-IR_REG_FP_FIRST), Rd(op1_reg) 6918 } 6919 } 6920 } else if (IR_IS_CONST_REF(insn->op1)) { 6921 int label = ir_const_label(ctx, insn->op1); 6922 6923 | ASM_FP_REG_TXT_OP movs, dst_type, def_reg, [=>label] 6924 } else { 6925 ir_mem mem; 6926 6927 if (ir_rule(ctx, insn->op1) & IR_FUSED) { 6928 mem = ir_fuse_load(ctx, def, insn->op1); 6929 } else { 6930 mem = ir_ref_spill_slot(ctx, insn->op1); 6931 } 6932 6933 | ASM_FP_REG_MEM_OP movs, dst_type, def_reg, mem 6934 } 6935 } 6936 if (IR_REG_SPILLED(ctx->regs[def][0])) { 6937 ir_emit_store(ctx, dst_type, def, def_reg); 6938 } 6939} 6940 6941static void ir_emit_int2fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) 6942{ 6943 ir_type dst_type = insn->type; 6944 ir_type src_type = ctx->ir_base[insn->op1].type; 6945 ir_backend_data *data = ctx->data; 6946 dasm_State **Dst = &data->dasm_state; 6947 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 6948 ir_reg op1_reg = ctx->regs[def][1]; 6949 6950 IR_ASSERT(IR_IS_TYPE_INT(src_type)); 6951 IR_ASSERT(IR_IS_TYPE_FP(dst_type)); 6952 IR_ASSERT(def_reg != IR_REG_NONE); 6953 if (op1_reg != IR_REG_NONE) { 6954 bool src64 = 0; 6955 6956 if (IR_REG_SPILLED(op1_reg)) { 6957 op1_reg = IR_REG_NUM(op1_reg); 6958 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 6959 } 6960 if (IR_IS_TYPE_SIGNED(src_type)) { 6961 if (ir_type_size[src_type] < 4) { 6962|.if X64 6963|| if (ir_type_size[src_type] == 1) { 6964 | movsx Rq(op1_reg), Rb(op1_reg) 6965|| } else { 6966 | movsx Rq(op1_reg), Rw(op1_reg) 6967|| } 6968|| src64 = 1; 6969|.else 6970|| if (ir_type_size[src_type] == 1) { 6971 | movsx Rd(op1_reg), Rb(op1_reg) 6972|| } else { 6973 | movsx Rd(op1_reg), Rw(op1_reg) 6974|| } 6975|.endif 6976 } else if (ir_type_size[src_type] > 4) { 6977 src64 = 1; 6978 } 6979 } else { 6980 if (ir_type_size[src_type] < 8) { 6981|.if X64 6982|| if (ir_type_size[src_type] == 1) { 6983 | movzx Rq(op1_reg), Rb(op1_reg) 6984|| } else if (ir_type_size[src_type] == 2) { 6985 | movzx Rq(op1_reg), Rw(op1_reg) 6986|| } 6987|| src64 = 1; 6988|.else 6989|| if (ir_type_size[src_type] == 1) { 6990 | movzx Rd(op1_reg), Rb(op1_reg) 6991|| } else if (ir_type_size[src_type] == 2) { 6992 | movzx Rd(op1_reg), Rw(op1_reg) 6993|| } 6994|.endif 6995 } else { 6996 // TODO: uint64_t -> double 6997 src64 = 1; 6998 } 6999 } 7000 if (!src64) { 7001 if (dst_type == IR_DOUBLE) { 7002 if (ctx->mflags & IR_X86_AVX) { 7003 | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 7004 | vcvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), Rd(op1_reg) 7005 } else { 7006 | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 7007 | cvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), Rd(op1_reg) 7008 } 7009 } else { 7010 IR_ASSERT(dst_type == IR_FLOAT); 7011 if (ctx->mflags & IR_X86_AVX) { 7012 | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 7013 | vcvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), Rd(op1_reg) 7014 } else { 7015 | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 7016 | cvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), Rd(op1_reg) 7017 } 7018 } 7019 } else { 7020 IR_ASSERT(sizeof(void*) == 8); 7021|.if X64 7022 if (dst_type == IR_DOUBLE) { 7023 if (ctx->mflags & IR_X86_AVX) { 7024 | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 7025 | vcvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), Rq(op1_reg) 7026 } else { 7027 | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 7028 | cvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), Rq(op1_reg) 7029 } 7030 } else { 7031 IR_ASSERT(dst_type == IR_FLOAT); 7032 if (ctx->mflags & IR_X86_AVX) { 7033 | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 7034 | vcvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), Rq(op1_reg) 7035 } else { 7036 | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 7037 | cvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), Rq(op1_reg) 7038 } 7039 } 7040|.endif 7041 } 7042 } else { 7043 ir_mem mem; 7044 bool src64 = ir_type_size[src_type] == 8; 7045 7046 if (ir_rule(ctx, insn->op1) & IR_FUSED) { 7047 mem = ir_fuse_load(ctx, def, insn->op1); 7048 } else { 7049 mem = ir_ref_spill_slot(ctx, insn->op1); 7050 } 7051 7052 if (!src64) { 7053 if (dst_type == IR_DOUBLE) { 7054 if (ctx->mflags & IR_X86_AVX) { 7055 | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 7056 | ASM_TXT_TXT_TMEM_OP vcvtsi2sd, xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), dword, mem 7057 } else { 7058 | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 7059 | ASM_TXT_TMEM_OP cvtsi2sd, xmm(def_reg-IR_REG_FP_FIRST), dword, mem 7060 } 7061 } else { 7062 IR_ASSERT(dst_type == IR_FLOAT); 7063 if (ctx->mflags & IR_X86_AVX) { 7064 | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 7065 | ASM_TXT_TXT_TMEM_OP vcvtsi2ss, xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), dword, mem 7066 } else { 7067 | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 7068 | ASM_TXT_TMEM_OP cvtsi2ss, xmm(def_reg-IR_REG_FP_FIRST), dword, mem 7069 } 7070 } 7071 } else { 7072 IR_ASSERT(sizeof(void*) == 8); 7073|.if X64 7074 if (dst_type == IR_DOUBLE) { 7075 if (ctx->mflags & IR_X86_AVX) { 7076 | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 7077 | ASM_TXT_TXT_TMEM_OP vcvtsi2sd, xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), qword, mem 7078 } else { 7079 | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 7080 | ASM_TXT_TMEM_OP cvtsi2sd, xmm(def_reg-IR_REG_FP_FIRST), qword, mem 7081 } 7082 } else { 7083 IR_ASSERT(dst_type == IR_FLOAT); 7084 if (ctx->mflags & IR_X86_AVX) { 7085 | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 7086 | ASM_TXT_TXT_TMEM_OP vcvtsi2ss, xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), qword, mem 7087 } else { 7088 | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 7089 | ASM_TXT_TMEM_OP cvtsi2ss, xmm(def_reg-IR_REG_FP_FIRST), qword, mem 7090 } 7091 } 7092|.endif 7093 } 7094 } 7095 if (IR_REG_SPILLED(ctx->regs[def][0])) { 7096 ir_emit_store(ctx, dst_type, def, def_reg); 7097 } 7098} 7099 7100static void ir_emit_fp2int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 7101{ 7102 ir_type dst_type = insn->type; 7103 ir_type src_type = ctx->ir_base[insn->op1].type; 7104 ir_backend_data *data = ctx->data; 7105 dasm_State **Dst = &data->dasm_state; 7106 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 7107 ir_reg op1_reg = ctx->regs[def][1]; 7108 bool dst64 = 0; 7109 7110 IR_ASSERT(IR_IS_TYPE_FP(src_type)); 7111 IR_ASSERT(IR_IS_TYPE_INT(dst_type)); 7112 IR_ASSERT(def_reg != IR_REG_NONE); 7113 if (IR_IS_TYPE_SIGNED(dst_type) ? ir_type_size[dst_type] == 8 : ir_type_size[dst_type] >= 4) { 7114 // TODO: we might need to perform truncation from 32/64 bit integer 7115 dst64 = 1; 7116 } 7117 if (op1_reg != IR_REG_NONE) { 7118 if (IR_REG_SPILLED(op1_reg)) { 7119 op1_reg = IR_REG_NUM(op1_reg); 7120 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 7121 } 7122 if (!dst64) { 7123 if (src_type == IR_DOUBLE) { 7124 if (ctx->mflags & IR_X86_AVX) { 7125 | vcvttsd2si Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) 7126 } else { 7127 | cvttsd2si Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) 7128 } 7129 } else { 7130 IR_ASSERT(src_type == IR_FLOAT); 7131 if (ctx->mflags & IR_X86_AVX) { 7132 | vcvttss2si Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) 7133 } else { 7134 | cvttss2si Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) 7135 } 7136 } 7137 } else { 7138 IR_ASSERT(sizeof(void*) == 8); 7139|.if X64 7140 if (src_type == IR_DOUBLE) { 7141 if (ctx->mflags & IR_X86_AVX) { 7142 | vcvttsd2si Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) 7143 } else { 7144 | cvttsd2si Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) 7145 } 7146 } else { 7147 IR_ASSERT(src_type == IR_FLOAT); 7148 if (ctx->mflags & IR_X86_AVX) { 7149 | vcvttss2si Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) 7150 } else { 7151 | cvttss2si Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) 7152 } 7153 } 7154|.endif 7155 } 7156 } else if (IR_IS_CONST_REF(insn->op1)) { 7157 int label = ir_const_label(ctx, insn->op1); 7158 7159 if (!dst64) { 7160 if (src_type == IR_DOUBLE) { 7161 if (ctx->mflags & IR_X86_AVX) { 7162 | vcvttsd2si Rd(def_reg), qword [=>label] 7163 } else { 7164 | cvttsd2si Rd(def_reg), qword [=>label] 7165 } 7166 } else { 7167 IR_ASSERT(src_type == IR_FLOAT); 7168 if (ctx->mflags & IR_X86_AVX) { 7169 | vcvttss2si Rd(def_reg), dword [=>label] 7170 } else { 7171 | cvttss2si Rd(def_reg), dword [=>label] 7172 } 7173 } 7174 } else { 7175 IR_ASSERT(sizeof(void*) == 8); 7176|.if X64 7177 if (src_type == IR_DOUBLE) { 7178 if (ctx->mflags & IR_X86_AVX) { 7179 | vcvttsd2si Rq(def_reg), qword [=>label] 7180 } else { 7181 | cvttsd2si Rq(def_reg), qword [=>label] 7182 } 7183 } else { 7184 IR_ASSERT(src_type == IR_FLOAT); 7185 if (ctx->mflags & IR_X86_AVX) { 7186 | vcvttss2si Rq(def_reg), dword [=>label] 7187 } else { 7188 | cvttss2si Rq(def_reg), dword [=>label] 7189 } 7190 } 7191|.endif 7192 } 7193 } else { 7194 ir_mem mem; 7195 7196 if (ir_rule(ctx, insn->op1) & IR_FUSED) { 7197 mem = ir_fuse_load(ctx, def, insn->op1); 7198 } else { 7199 mem = ir_ref_spill_slot(ctx, insn->op1); 7200 } 7201 7202 if (!dst64) { 7203 if (src_type == IR_DOUBLE) { 7204 if (ctx->mflags & IR_X86_AVX) { 7205 | ASM_TXT_TMEM_OP vcvttsd2si, Rd(def_reg), qword, mem 7206 } else { 7207 | ASM_TXT_TMEM_OP cvttsd2si, Rd(def_reg), qword, mem 7208 } 7209 } else { 7210 IR_ASSERT(src_type == IR_FLOAT); 7211 if (ctx->mflags & IR_X86_AVX) { 7212 | ASM_TXT_TMEM_OP vcvttss2si, Rd(def_reg), dword, mem 7213 } else { 7214 | ASM_TXT_TMEM_OP cvttss2si, Rd(def_reg), dword, mem 7215 } 7216 } 7217 } else { 7218 IR_ASSERT(sizeof(void*) == 8); 7219|.if X64 7220 if (src_type == IR_DOUBLE) { 7221 if (ctx->mflags & IR_X86_AVX) { 7222 | ASM_TXT_TMEM_OP vcvttsd2si, Rq(def_reg), qword, mem 7223 } else { 7224 | ASM_TXT_TMEM_OP cvttsd2si, Rq(def_reg), qword, mem 7225 } 7226 } else { 7227 IR_ASSERT(src_type == IR_FLOAT); 7228 if (ctx->mflags & IR_X86_AVX) { 7229 | ASM_TXT_TMEM_OP vcvttss2si, Rq(def_reg), dword, mem 7230 } else { 7231 | ASM_TXT_TMEM_OP cvttss2si, Rq(def_reg), dword, mem 7232 } 7233 } 7234|.endif 7235 } 7236 } 7237 if (IR_REG_SPILLED(ctx->regs[def][0])) { 7238 ir_emit_store(ctx, dst_type, def, def_reg); 7239 } 7240} 7241 7242static void ir_emit_fp2fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) 7243{ 7244 ir_type dst_type = insn->type; 7245 ir_type src_type = ctx->ir_base[insn->op1].type; 7246 ir_backend_data *data = ctx->data; 7247 dasm_State **Dst = &data->dasm_state; 7248 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 7249 ir_reg op1_reg = ctx->regs[def][1]; 7250 7251 IR_ASSERT(IR_IS_TYPE_FP(src_type)); 7252 IR_ASSERT(IR_IS_TYPE_FP(dst_type)); 7253 IR_ASSERT(def_reg != IR_REG_NONE); 7254 if (op1_reg != IR_REG_NONE) { 7255 if (IR_REG_SPILLED(op1_reg)) { 7256 op1_reg = IR_REG_NUM(op1_reg); 7257 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 7258 } 7259 if (src_type == dst_type) { 7260 if (op1_reg != def_reg) { 7261 ir_emit_fp_mov(ctx, dst_type, def_reg, op1_reg); 7262 } 7263 } else if (src_type == IR_DOUBLE) { 7264 if (ctx->mflags & IR_X86_AVX) { 7265 | vcvtsd2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(op1_reg-IR_REG_FP_FIRST) 7266 } else { 7267 | cvtsd2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(op1_reg-IR_REG_FP_FIRST) 7268 } 7269 } else { 7270 IR_ASSERT(src_type == IR_FLOAT); 7271 if (ctx->mflags & IR_X86_AVX) { 7272 | vcvtss2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(op1_reg-IR_REG_FP_FIRST) 7273 } else { 7274 | cvtss2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(op1_reg-IR_REG_FP_FIRST) 7275 } 7276 } 7277 } else if (IR_IS_CONST_REF(insn->op1)) { 7278 int label = ir_const_label(ctx, insn->op1); 7279 7280 if (src_type == IR_DOUBLE) { 7281 if (ctx->mflags & IR_X86_AVX) { 7282 | vcvtsd2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), qword [=>label] 7283 } else { 7284 | cvtsd2ss xmm(def_reg-IR_REG_FP_FIRST), qword [=>label] 7285 } 7286 } else { 7287 IR_ASSERT(src_type == IR_FLOAT); 7288 if (ctx->mflags & IR_X86_AVX) { 7289 | vcvtss2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), dword [=>label] 7290 } else { 7291 | cvtss2sd xmm(def_reg-IR_REG_FP_FIRST), dword [=>label] 7292 } 7293 } 7294 } else { 7295 ir_mem mem; 7296 7297 if (ir_rule(ctx, insn->op1) & IR_FUSED) { 7298 mem = ir_fuse_load(ctx, def, insn->op1); 7299 } else { 7300 mem = ir_ref_spill_slot(ctx, insn->op1); 7301 } 7302 7303 if (src_type == IR_DOUBLE) { 7304 if (ctx->mflags & IR_X86_AVX) { 7305 | ASM_TXT_TXT_TMEM_OP vcvtsd2ss, xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), qword, mem 7306 } else { 7307 | ASM_TXT_TMEM_OP cvtsd2ss, xmm(def_reg-IR_REG_FP_FIRST), qword, mem 7308 } 7309 } else { 7310 IR_ASSERT(src_type == IR_FLOAT); 7311 if (ctx->mflags & IR_X86_AVX) { 7312 | ASM_TXT_TXT_TMEM_OP vcvtss2sd, xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), dword, mem 7313 } else { 7314 | ASM_TXT_TMEM_OP cvtss2sd, xmm(def_reg-IR_REG_FP_FIRST), dword, mem 7315 } 7316 } 7317 } 7318 if (IR_REG_SPILLED(ctx->regs[def][0])) { 7319 ir_emit_store(ctx, dst_type, def, def_reg); 7320 } 7321} 7322 7323static void ir_emit_copy_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 7324{ 7325 ir_ref type = insn->type; 7326 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 7327 ir_reg op1_reg = ctx->regs[def][1]; 7328 7329 IR_ASSERT(def_reg != IR_REG_NONE || op1_reg != IR_REG_NONE); 7330 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 7331 op1_reg = IR_REG_NUM(op1_reg); 7332 ir_emit_load(ctx, type, op1_reg, insn->op1); 7333 } 7334 if (def_reg == op1_reg) { 7335 /* same reg */ 7336 } else if (def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE) { 7337 ir_emit_mov(ctx, type, def_reg, op1_reg); 7338 } else if (def_reg != IR_REG_NONE) { 7339 ir_emit_load(ctx, type, def_reg, insn->op1); 7340 } else if (op1_reg != IR_REG_NONE) { 7341 ir_emit_store(ctx, type, def, op1_reg); 7342 } else { 7343 IR_ASSERT(0); 7344 } 7345 if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) { 7346 ir_emit_store(ctx, type, def, def_reg); 7347 } 7348} 7349 7350static void ir_emit_copy_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) 7351{ 7352 ir_type type = insn->type; 7353 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 7354 ir_reg op1_reg = ctx->regs[def][1]; 7355 7356 IR_ASSERT(def_reg != IR_REG_NONE || op1_reg != IR_REG_NONE); 7357 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 7358 op1_reg = IR_REG_NUM(op1_reg); 7359 ir_emit_load(ctx, type, op1_reg, insn->op1); 7360 } 7361 if (def_reg == op1_reg) { 7362 /* same reg */ 7363 } else if (def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE) { 7364 ir_emit_fp_mov(ctx, type, def_reg, op1_reg); 7365 } else if (def_reg != IR_REG_NONE) { 7366 ir_emit_load(ctx, type, def_reg, insn->op1); 7367 } else if (op1_reg != IR_REG_NONE) { 7368 ir_emit_store(ctx, type, def, op1_reg); 7369 } else { 7370 IR_ASSERT(0); 7371 } 7372 if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) { 7373 ir_emit_store(ctx, type, def, def_reg); 7374 } 7375} 7376 7377static void ir_emit_vaddr(ir_ctx *ctx, ir_ref def, ir_insn *insn) 7378{ 7379 ir_backend_data *data = ctx->data; 7380 dasm_State **Dst = &data->dasm_state; 7381 ir_ref type = insn->type; 7382 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 7383 ir_mem mem; 7384 int32_t offset; 7385 ir_reg fp; 7386 7387 IR_ASSERT(def_reg != IR_REG_NONE); 7388 mem = ir_var_spill_slot(ctx, insn->op1); 7389 fp = IR_MEM_BASE(mem); 7390 offset = IR_MEM_OFFSET(mem); 7391 | lea Ra(def_reg), aword [Ra(fp)+offset] 7392 if (IR_REG_SPILLED(ctx->regs[def][0])) { 7393 ir_emit_store(ctx, type, def, def_reg); 7394 } 7395} 7396 7397static void ir_emit_vload(ir_ctx *ctx, ir_ref def, ir_insn *insn) 7398{ 7399 ir_insn *var_insn = &ctx->ir_base[insn->op2]; 7400 ir_ref type = insn->type; 7401 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 7402 ir_reg fp; 7403 ir_mem mem; 7404 7405 IR_ASSERT(var_insn->op == IR_VAR); 7406 fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 7407 mem = IR_MEM_BO(fp, IR_SPILL_POS_TO_OFFSET(var_insn->op3)); 7408 if (def_reg == IR_REG_NONE && ir_is_same_mem_var(ctx, def, var_insn->op3)) { 7409 return; // fake load 7410 } 7411 IR_ASSERT(def_reg != IR_REG_NONE); 7412 7413 ir_emit_load_mem(ctx, type, def_reg, mem); 7414 if (IR_REG_SPILLED(ctx->regs[def][0])) { 7415 ir_emit_store(ctx, type, def, def_reg); 7416 } 7417} 7418 7419static void ir_emit_vstore_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn) 7420{ 7421 ir_insn *var_insn = &ctx->ir_base[insn->op2]; 7422 ir_insn *val_insn = &ctx->ir_base[insn->op3]; 7423 ir_ref type = val_insn->type; 7424 ir_reg op3_reg = ctx->regs[ref][3]; 7425 ir_reg fp; 7426 ir_mem mem; 7427 7428 IR_ASSERT(var_insn->op == IR_VAR); 7429 fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 7430 mem = IR_MEM_BO(fp, IR_SPILL_POS_TO_OFFSET(var_insn->op3)); 7431 if ((op3_reg == IR_REG_NONE || IR_REG_SPILLED(op3_reg)) 7432 && !IR_IS_CONST_REF(insn->op3) 7433 && ir_rule(ctx, insn->op3) != IR_STATIC_ALLOCA 7434 && ir_is_same_mem_var(ctx, insn->op3, var_insn->op3)) { 7435 return; // fake store 7436 } 7437 if (IR_IS_CONST_REF(insn->op3)) { 7438 ir_emit_store_mem_int_const(ctx, type, mem, insn->op3, op3_reg, 0); 7439 } else { 7440 IR_ASSERT(op3_reg != IR_REG_NONE); 7441 if (IR_REG_SPILLED(op3_reg)) { 7442 op3_reg = IR_REG_NUM(op3_reg); 7443 ir_emit_load(ctx, type, op3_reg, insn->op3); 7444 } 7445 ir_emit_store_mem_int(ctx, type, mem, op3_reg); 7446 } 7447} 7448 7449static void ir_emit_vstore_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn) 7450{ 7451 ir_insn *var_insn = &ctx->ir_base[insn->op2]; 7452 ir_ref type = ctx->ir_base[insn->op3].type; 7453 ir_reg op3_reg = ctx->regs[ref][3]; 7454 ir_reg fp; 7455 ir_mem mem; 7456 7457 IR_ASSERT(var_insn->op == IR_VAR); 7458 fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 7459 mem = IR_MEM_BO(fp, IR_SPILL_POS_TO_OFFSET(var_insn->op3)); 7460 if ((op3_reg == IR_REG_NONE || IR_REG_SPILLED(op3_reg)) 7461 && !IR_IS_CONST_REF(insn->op3) 7462 && ir_rule(ctx, insn->op3) != IR_STATIC_ALLOCA 7463 && ir_is_same_mem_var(ctx, insn->op3, var_insn->op3)) { 7464 return; // fake store 7465 } 7466 if (IR_IS_CONST_REF(insn->op3)) { 7467 ir_emit_store_mem_fp_const(ctx, type, mem, insn->op3, IR_REG_NONE, op3_reg); 7468 } else { 7469 IR_ASSERT(op3_reg != IR_REG_NONE); 7470 if (IR_REG_SPILLED(op3_reg)) { 7471 op3_reg = IR_REG_NUM(op3_reg); 7472 ir_emit_load(ctx, type, op3_reg, insn->op3); 7473 } 7474 ir_emit_store_mem_fp(ctx, type, mem, op3_reg); 7475 } 7476} 7477 7478static void ir_emit_load_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 7479{ 7480 ir_ref type = insn->type; 7481 ir_reg op2_reg = ctx->regs[def][2]; 7482 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 7483 ir_mem mem; 7484 7485 if (ctx->use_lists[def].count == 1) { 7486 /* dead load */ 7487 return; 7488 } 7489 IR_ASSERT(def_reg != IR_REG_NONE); 7490 if (op2_reg != IR_REG_NONE) { 7491 if (IR_REG_SPILLED(op2_reg)) { 7492 op2_reg = IR_REG_NUM(op2_reg); 7493 IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); 7494 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 7495 } 7496 mem = IR_MEM_B(op2_reg); 7497 } else if (IR_IS_CONST_REF(insn->op2)) { 7498 mem = ir_fuse_addr_const(ctx, insn->op2); 7499 } else { 7500 IR_ASSERT(ir_rule(ctx, insn->op2) & IR_FUSED); 7501 mem = ir_fuse_addr(ctx, def, insn->op2); 7502 if (IR_REG_SPILLED(ctx->regs[def][0]) && ir_is_same_spill_slot(ctx, def, mem)) { 7503 if (!ir_may_avoid_spill_load(ctx, def, def)) { 7504 ir_emit_load_mem_int(ctx, type, def_reg, mem); 7505 } 7506 /* avoid load to the same location (valid only when register is not reused) */ 7507 return; 7508 } 7509 } 7510 7511 ir_emit_load_mem_int(ctx, type, def_reg, mem); 7512 if (IR_REG_SPILLED(ctx->regs[def][0])) { 7513 ir_emit_store(ctx, type, def, def_reg); 7514 } 7515} 7516 7517static void ir_emit_load_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) 7518{ 7519 ir_ref type = insn->type; 7520 ir_reg op2_reg = ctx->regs[def][2]; 7521 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 7522 ir_mem mem; 7523 7524 if (ctx->use_lists[def].count == 1) { 7525 /* dead load */ 7526 return; 7527 } 7528 IR_ASSERT(def_reg != IR_REG_NONE); 7529 if (op2_reg != IR_REG_NONE) { 7530 if (IR_REG_SPILLED(op2_reg)) { 7531 op2_reg = IR_REG_NUM(op2_reg); 7532 IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); 7533 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 7534 } 7535 mem = IR_MEM_B(op2_reg); 7536 } else if (IR_IS_CONST_REF(insn->op2)) { 7537 mem = ir_fuse_addr_const(ctx, insn->op2); 7538 } else { 7539 IR_ASSERT(ir_rule(ctx, insn->op2) & IR_FUSED); 7540 mem = ir_fuse_addr(ctx, def, insn->op2); 7541 if (IR_REG_SPILLED(ctx->regs[def][0]) && ir_is_same_spill_slot(ctx, def, mem)) { 7542 if (!ir_may_avoid_spill_load(ctx, def, def)) { 7543 ir_emit_load_mem_fp(ctx, type, def_reg, mem); 7544 } 7545 /* avoid load to the same location (valid only when register is not reused) */ 7546 return; 7547 } 7548 } 7549 7550 ir_emit_load_mem_fp(ctx, type, def_reg, mem); 7551 if (IR_REG_SPILLED(ctx->regs[def][0])) { 7552 ir_emit_store(ctx, type, def, def_reg); 7553 } 7554} 7555 7556static void ir_emit_store_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn) 7557{ 7558 ir_insn *val_insn = &ctx->ir_base[insn->op3]; 7559 ir_ref type = val_insn->type; 7560 ir_reg op2_reg = ctx->regs[ref][2]; 7561 ir_reg op3_reg = ctx->regs[ref][3]; 7562 ir_mem mem; 7563 7564 if (op2_reg != IR_REG_NONE) { 7565 if (IR_REG_SPILLED(op2_reg)) { 7566 op2_reg = IR_REG_NUM(op2_reg); 7567 IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); 7568 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 7569 } 7570 mem = IR_MEM_B(op2_reg); 7571 } else if (IR_IS_CONST_REF(insn->op2)) { 7572 mem = ir_fuse_addr_const(ctx, insn->op2); 7573 } else { 7574 IR_ASSERT(ir_rule(ctx, insn->op2) & IR_FUSED); 7575 mem = ir_fuse_addr(ctx, ref, insn->op2); 7576 if (!IR_IS_CONST_REF(insn->op3) 7577 && IR_REG_SPILLED(op3_reg) 7578 && ir_rule(ctx, insn->op3) != IR_STATIC_ALLOCA 7579 && ir_is_same_spill_slot(ctx, insn->op3, mem)) { 7580 if (!ir_may_avoid_spill_load(ctx, insn->op3, ref)) { 7581 op3_reg = IR_REG_NUM(op3_reg); 7582 ir_emit_load(ctx, type, op3_reg, insn->op3); 7583 } 7584 /* avoid store to the same location */ 7585 return; 7586 } 7587 } 7588 7589 if (IR_IS_CONST_REF(insn->op3)) { 7590 ir_emit_store_mem_int_const(ctx, type, mem, insn->op3, op3_reg, 0); 7591 } else { 7592 IR_ASSERT(op3_reg != IR_REG_NONE); 7593 if (IR_REG_SPILLED(op3_reg)) { 7594 op3_reg = IR_REG_NUM(op3_reg); 7595 ir_emit_load(ctx, type, op3_reg, insn->op3); 7596 } 7597 ir_emit_store_mem_int(ctx, type, mem, op3_reg); 7598 } 7599} 7600 7601static void ir_emit_cmp_and_store_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn) 7602{ 7603 ir_reg addr_reg = ctx->regs[ref][2]; 7604 ir_mem mem; 7605 ir_insn *cmp_insn = &ctx->ir_base[insn->op3]; 7606 ir_op op = cmp_insn->op; 7607 ir_type type = ctx->ir_base[cmp_insn->op1].type; 7608 ir_ref op1 = cmp_insn->op1; 7609 ir_ref op2 = cmp_insn->op2; 7610 ir_reg op1_reg = ctx->regs[insn->op3][1]; 7611 ir_reg op2_reg = ctx->regs[insn->op3][2]; 7612 7613 if (addr_reg != IR_REG_NONE) { 7614 if (IR_REG_SPILLED(addr_reg)) { 7615 addr_reg = IR_REG_NUM(addr_reg); 7616 IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); 7617 ir_emit_load(ctx, IR_ADDR, addr_reg, insn->op2); 7618 } 7619 mem = IR_MEM_B(addr_reg); 7620 } else if (IR_IS_CONST_REF(insn->op2)) { 7621 mem = ir_fuse_addr_const(ctx, insn->op2); 7622 } else { 7623 IR_ASSERT(ir_rule(ctx, insn->op2) & IR_FUSED); 7624 mem = ir_fuse_addr(ctx, ref, insn->op2); 7625 } 7626 7627 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 7628 op1_reg = IR_REG_NUM(op1_reg); 7629 ir_emit_load(ctx, type, op1_reg, op1); 7630 } 7631 if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { 7632 op2_reg = IR_REG_NUM(op2_reg); 7633 if (op1 != op2) { 7634 ir_emit_load(ctx, type, op2_reg, op2); 7635 } 7636 } 7637 7638 ir_emit_cmp_int_common(ctx, type, ref, cmp_insn, op1_reg, op1, op2_reg, op2); 7639 _ir_emit_setcc_int_mem(ctx, op, mem); 7640} 7641 7642static void ir_emit_store_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn) 7643{ 7644 ir_ref type = ctx->ir_base[insn->op3].type; 7645 ir_reg op2_reg = ctx->regs[ref][2]; 7646 ir_reg op3_reg = ctx->regs[ref][3]; 7647 ir_mem mem; 7648 7649 IR_ASSERT(op3_reg != IR_REG_NONE); 7650 if (op2_reg != IR_REG_NONE) { 7651 if (IR_REG_SPILLED(op2_reg)) { 7652 op2_reg = IR_REG_NUM(op2_reg); 7653 IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); 7654 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 7655 } 7656 mem = IR_MEM_B(op2_reg); 7657 } else if (IR_IS_CONST_REF(insn->op2)) { 7658 mem = ir_fuse_addr_const(ctx, insn->op2); 7659 } else { 7660 IR_ASSERT(ir_rule(ctx, insn->op2) & IR_FUSED); 7661 mem = ir_fuse_addr(ctx, ref, insn->op2); 7662 if (!IR_IS_CONST_REF(insn->op3) 7663 && IR_REG_SPILLED(op3_reg) 7664 && ir_rule(ctx, insn->op3) != IR_STATIC_ALLOCA 7665 && ir_is_same_spill_slot(ctx, insn->op3, mem)) { 7666 if (!ir_may_avoid_spill_load(ctx, insn->op3, ref)) { 7667 op3_reg = IR_REG_NUM(op3_reg); 7668 ir_emit_load(ctx, type, op3_reg, insn->op3); 7669 } 7670 /* avoid store to the same location */ 7671 return; 7672 } 7673 } 7674 7675 if (IR_IS_CONST_REF(insn->op3)) { 7676 ir_emit_store_mem_fp_const(ctx, type, mem, insn->op3, IR_REG_NONE, op3_reg); 7677 } else { 7678 IR_ASSERT(op3_reg != IR_REG_NONE); 7679 if (IR_REG_SPILLED(op3_reg)) { 7680 op3_reg = IR_REG_NUM(op3_reg); 7681 ir_emit_load(ctx, type, op3_reg, insn->op3); 7682 } 7683 ir_emit_store_mem_fp(ctx, type, mem, op3_reg); 7684 } 7685} 7686 7687static void ir_emit_rload(ir_ctx *ctx, ir_ref def, ir_insn *insn) 7688{ 7689 ir_reg src_reg = insn->op2; 7690 ir_type type = insn->type; 7691 7692 if (IR_REGSET_IN(IR_REGSET_UNION((ir_regset)ctx->fixed_regset, IR_REGSET_FIXED), src_reg)) { 7693 if (ctx->vregs[def] 7694 && ctx->live_intervals[ctx->vregs[def]] 7695 && ctx->live_intervals[ctx->vregs[def]]->stack_spill_pos != -1) { 7696 ir_emit_store(ctx, type, def, src_reg); 7697 } 7698 } else { 7699 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 7700 7701 if (def_reg == IR_REG_NONE) { 7702 /* op3 is used as a flag that the value is already stored in memory. 7703 * If op3 is set we don't have to store the value once again (in case of spilling) 7704 */ 7705 if (!insn->op3 || !ir_is_same_spill_slot(ctx, def, IR_MEM_BO(ctx->spill_base, insn->op3))) { 7706 ir_emit_store(ctx, type, def, src_reg); 7707 } 7708 } else { 7709 if (src_reg != def_reg) { 7710 if (IR_IS_TYPE_INT(type)) { 7711 ir_emit_mov(ctx, type, def_reg, src_reg); 7712 } else { 7713 IR_ASSERT(IR_IS_TYPE_FP(type)); 7714 ir_emit_fp_mov(ctx, type, def_reg, src_reg); 7715 } 7716 } 7717 if (IR_REG_SPILLED(ctx->regs[def][0]) 7718 && (!insn->op3 || !ir_is_same_spill_slot(ctx, def, IR_MEM_BO(ctx->spill_base, insn->op3)))) { 7719 ir_emit_store(ctx, type, def, def_reg); 7720 } 7721 } 7722 } 7723} 7724 7725static void ir_emit_rstore(ir_ctx *ctx, ir_ref ref, ir_insn *insn) 7726{ 7727 ir_ref type = ctx->ir_base[insn->op2].type; 7728 ir_reg op2_reg = ctx->regs[ref][2]; 7729 ir_reg dst_reg = insn->op3; 7730 7731 if (op2_reg != IR_REG_NONE) { 7732 if (IR_REG_SPILLED(op2_reg)) { 7733 op2_reg = IR_REG_NUM(op2_reg); 7734 ir_emit_load(ctx, type, op2_reg, insn->op2); 7735 } 7736 if (op2_reg != dst_reg) { 7737 if (IR_IS_TYPE_INT(type)) { 7738 ir_emit_mov(ctx, type, dst_reg, op2_reg); 7739 } else { 7740 IR_ASSERT(IR_IS_TYPE_FP(type)); 7741 ir_emit_fp_mov(ctx, type, dst_reg, op2_reg); 7742 } 7743 } 7744 } else { 7745 ir_emit_load_ex(ctx, type, dst_reg, insn->op2, ref); 7746 } 7747} 7748 7749static void ir_emit_alloca(ir_ctx *ctx, ir_ref def, ir_insn *insn) 7750{ 7751 ir_backend_data *data = ctx->data; 7752 dasm_State **Dst = &data->dasm_state; 7753 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 7754 7755 if (ctx->use_lists[def].count == 1) { 7756 /* dead alloca */ 7757 return; 7758 } 7759 if (IR_IS_CONST_REF(insn->op2)) { 7760 ir_insn *val = &ctx->ir_base[insn->op2]; 7761 int32_t size = val->val.i32; 7762 7763 IR_ASSERT(IR_IS_TYPE_INT(val->type)); 7764 IR_ASSERT(!IR_IS_SYM_CONST(val->op)); 7765 IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 >= 0); 7766 IR_ASSERT(IR_IS_SIGNED_32BIT(val->val.i64)); 7767 7768 /* Stack must be 16 byte aligned */ 7769 size = IR_ALIGNED_SIZE(size, 16); 7770 | ASM_REG_IMM_OP sub, IR_ADDR, IR_REG_RSP, size 7771 if (!(ctx->flags & IR_USE_FRAME_POINTER)) { 7772 ctx->call_stack_size += size; 7773 } 7774 } else { 7775 int32_t alignment = 16; 7776 ir_reg op2_reg = ctx->regs[def][2]; 7777 ir_type type = ctx->ir_base[insn->op2].type; 7778 7779 IR_ASSERT(ctx->flags & IR_FUNCTION); 7780 IR_ASSERT(ctx->flags & IR_USE_FRAME_POINTER); 7781 IR_ASSERT(def_reg != IR_REG_NONE); 7782 if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { 7783 op2_reg = IR_REG_NUM(op2_reg); 7784 ir_emit_load(ctx, type, op2_reg, insn->op2); 7785 } 7786 if (def_reg != op2_reg) { 7787 if (op2_reg != IR_REG_NONE) { 7788 ir_emit_mov(ctx, type, def_reg, op2_reg); 7789 } else { 7790 ir_emit_load(ctx, type, def_reg, insn->op2); 7791 } 7792 } 7793 7794 | ASM_REG_IMM_OP add, IR_ADDR, def_reg, (alignment-1) 7795 | ASM_REG_IMM_OP and, IR_ADDR, def_reg, ~(alignment-1) 7796 | ASM_REG_REG_OP sub, IR_ADDR, IR_REG_RSP, def_reg 7797 } 7798 if (def_reg != IR_REG_NONE) { 7799 | mov Ra(def_reg), Ra(IR_REG_RSP) 7800 if (IR_REG_SPILLED(ctx->regs[def][0])) { 7801 ir_emit_store(ctx, insn->type, def, def_reg); 7802 } 7803 } else { 7804 ir_emit_store(ctx, IR_ADDR, def, IR_REG_STACK_POINTER); 7805 } 7806} 7807 7808static void ir_emit_afree(ir_ctx *ctx, ir_ref def, ir_insn *insn) 7809{ 7810 ir_backend_data *data = ctx->data; 7811 dasm_State **Dst = &data->dasm_state; 7812 7813 if (IR_IS_CONST_REF(insn->op2)) { 7814 ir_insn *val = &ctx->ir_base[insn->op2]; 7815 int32_t size = val->val.i32; 7816 7817 IR_ASSERT(IR_IS_TYPE_INT(val->type)); 7818 IR_ASSERT(!IR_IS_SYM_CONST(val->op)); 7819 IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 > 0); 7820 IR_ASSERT(IR_IS_SIGNED_32BIT(val->val.i64)); 7821 7822 /* Stack must be 16 byte aligned */ 7823 size = IR_ALIGNED_SIZE(size, 16); 7824 | ASM_REG_IMM_OP add, IR_ADDR, IR_REG_RSP, size 7825 if (!(ctx->flags & IR_USE_FRAME_POINTER)) { 7826 ctx->call_stack_size -= size; 7827 } 7828 } else { 7829// int32_t alignment = 16; 7830 ir_reg op2_reg = ctx->regs[def][2]; 7831 ir_type type = ctx->ir_base[insn->op2].type; 7832 7833 IR_ASSERT(ctx->flags & IR_FUNCTION); 7834 if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { 7835 op2_reg = IR_REG_NUM(op2_reg); 7836 ir_emit_load(ctx, type, op2_reg, insn->op2); 7837 } 7838 7839 // TODO: alignment ??? 7840 7841 | ASM_REG_REG_OP add, IR_ADDR, IR_REG_RSP, op2_reg 7842 } 7843} 7844 7845static void ir_emit_block_begin(ir_ctx *ctx, ir_ref def, ir_insn *insn) 7846{ 7847 ir_backend_data *data = ctx->data; 7848 dasm_State **Dst = &data->dasm_state; 7849 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 7850 7851 | mov Ra(def_reg), Ra(IR_REG_RSP) 7852 7853 if (IR_REG_SPILLED(ctx->regs[def][0])) { 7854 ir_emit_store(ctx, IR_ADDR, def, def_reg); 7855 } 7856} 7857 7858static void ir_emit_block_end(ir_ctx *ctx, ir_ref def, ir_insn *insn) 7859{ 7860 ir_backend_data *data = ctx->data; 7861 dasm_State **Dst = &data->dasm_state; 7862 ir_reg op2_reg = ctx->regs[def][2]; 7863 7864 IR_ASSERT(op2_reg != IR_REG_NONE); 7865 if (IR_REG_SPILLED(op2_reg)) { 7866 op2_reg = IR_REG_NUM(op2_reg); 7867 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 7868 } 7869 7870 | mov Ra(IR_REG_RSP), Ra(op2_reg) 7871} 7872 7873static void ir_emit_frame_addr(ir_ctx *ctx, ir_ref def) 7874{ 7875 ir_backend_data *data = ctx->data; 7876 dasm_State **Dst = &data->dasm_state; 7877 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 7878 7879 if (ctx->flags & IR_USE_FRAME_POINTER) { 7880 | mov Ra(def_reg), Ra(IR_REG_RBP) 7881 } else { 7882 | lea Ra(def_reg), [Ra(IR_REG_RSP)+(ctx->stack_frame_size + ctx->call_stack_size)] 7883 } 7884 if (IR_REG_SPILLED(ctx->regs[def][0])) { 7885 ir_emit_store(ctx, IR_ADDR, def, def_reg); 7886 } 7887} 7888 7889static void ir_emit_va_start(ir_ctx *ctx, ir_ref def, ir_insn *insn) 7890{ 7891#if defined(_WIN64) || defined(IR_TARGET_X86) 7892 ir_backend_data *data = ctx->data; 7893 dasm_State **Dst = &data->dasm_state; 7894 ir_reg fp; 7895 int arg_area_offset; 7896 ir_reg op2_reg = ctx->regs[def][2]; 7897 ir_reg tmp_reg = ctx->regs[def][3]; 7898 int32_t offset; 7899 7900 IR_ASSERT(tmp_reg != IR_REG_NONE); 7901 if (op2_reg != IR_REG_NONE) { 7902 if (IR_REG_SPILLED(op2_reg)) { 7903 op2_reg = IR_REG_NUM(op2_reg); 7904 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 7905 } 7906 offset = 0; 7907 } else { 7908 IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); 7909 op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 7910 offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); 7911 } 7912 7913 if (ctx->flags & IR_USE_FRAME_POINTER) { 7914 fp = IR_REG_FRAME_POINTER; 7915 arg_area_offset = sizeof(void*) * 2 + ctx->param_stack_size; 7916 } else { 7917 fp = IR_REG_STACK_POINTER; 7918 arg_area_offset = ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*) + ctx->param_stack_size; 7919 } 7920 | lea Ra(tmp_reg), aword [Ra(fp)+arg_area_offset] 7921 | mov aword [Ra(op2_reg)+offset], Ra(tmp_reg) 7922#elif defined(IR_TARGET_X64) 7923|.if X64 7924 ir_backend_data *data = ctx->data; 7925 dasm_State **Dst = &data->dasm_state; 7926 ir_reg fp; 7927 int reg_save_area_offset; 7928 int overflow_arg_area_offset; 7929 ir_reg op2_reg = ctx->regs[def][2]; 7930 ir_reg tmp_reg = ctx->regs[def][3]; 7931 bool have_reg_save_area = 0; 7932 int32_t offset; 7933 7934 IR_ASSERT(tmp_reg != IR_REG_NONE); 7935 if (op2_reg != IR_REG_NONE) { 7936 if (IR_REG_SPILLED(op2_reg)) { 7937 op2_reg = IR_REG_NUM(op2_reg); 7938 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 7939 } 7940 offset = 0; 7941 } else { 7942 IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); 7943 op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 7944 offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); 7945 } 7946 7947 if (ctx->flags & IR_USE_FRAME_POINTER) { 7948 fp = IR_REG_FRAME_POINTER; 7949 reg_save_area_offset = -(ctx->stack_frame_size - ctx->stack_frame_alignment - ctx->locals_area_size); 7950 overflow_arg_area_offset = sizeof(void*) * 2 + ctx->param_stack_size; 7951 } else { 7952 fp = IR_REG_STACK_POINTER; 7953 reg_save_area_offset = ctx->locals_area_size + ctx->call_stack_size; 7954 overflow_arg_area_offset = ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*) + ctx->param_stack_size; 7955 } 7956 7957 if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) { 7958 | lea Ra(tmp_reg), aword [Ra(fp)+reg_save_area_offset] 7959 have_reg_save_area = 1; 7960 /* Set va_list.gp_offset */ 7961 | mov dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, gp_offset))], sizeof(void*) * ctx->gp_reg_params 7962 } else { 7963 reg_save_area_offset -= sizeof(void*) * IR_REG_INT_ARGS; 7964 /* Set va_list.gp_offset */ 7965 | mov dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, gp_offset))], sizeof(void*) * IR_REG_INT_ARGS 7966 } 7967 if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) { 7968 if (!have_reg_save_area) { 7969 | lea Ra(tmp_reg), aword [Ra(fp)+reg_save_area_offset] 7970 have_reg_save_area = 1; 7971 } 7972 /* Set va_list.fp_offset */ 7973 | mov dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, fp_offset))], sizeof(void*) * IR_REG_INT_ARGS + 16 * ctx->fp_reg_params 7974 } else { 7975 /* Set va_list.fp_offset */ 7976 | mov dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, fp_offset))], sizeof(void*) * IR_REG_INT_ARGS + 16 * IR_REG_FP_ARGS 7977 } 7978 if (have_reg_save_area) { 7979 /* Set va_list.reg_save_area */ 7980 | mov qword [Ra(op2_reg)+(offset+offsetof(ir_va_list, reg_save_area))], Ra(tmp_reg) 7981 } 7982 | lea Ra(tmp_reg), aword [Ra(fp)+overflow_arg_area_offset] 7983 /* Set va_list.overflow_arg_area */ 7984 | mov qword [Ra(op2_reg)+(offset+offsetof(ir_va_list, overflow_arg_area))], Ra(tmp_reg) 7985|.endif 7986#else 7987 IR_ASSERT(0 && "NIY va_start"); 7988#endif 7989} 7990 7991static void ir_emit_va_copy(ir_ctx *ctx, ir_ref def, ir_insn *insn) 7992{ 7993#if defined(_WIN64) || defined(IR_TARGET_X86) 7994 ir_backend_data *data = ctx->data; 7995 dasm_State **Dst = &data->dasm_state; 7996 ir_reg tmp_reg = ctx->regs[def][1]; 7997 ir_reg op2_reg = ctx->regs[def][2]; 7998 ir_reg op3_reg = ctx->regs[def][3]; 7999 int32_t op2_offset, op3_offset; 8000 8001 IR_ASSERT(tmp_reg != IR_REG_NONE); 8002 if (op2_reg != IR_REG_NONE) { 8003 if (IR_REG_SPILLED(op2_reg)) { 8004 op2_reg = IR_REG_NUM(op2_reg); 8005 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 8006 } 8007 op2_offset = 0; 8008 } else { 8009 IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); 8010 op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 8011 op2_offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); 8012 } 8013 if (op3_reg != IR_REG_NONE) { 8014 if (IR_REG_SPILLED(op3_reg)) { 8015 op3_reg = IR_REG_NUM(op3_reg); 8016 ir_emit_load(ctx, IR_ADDR, op3_reg, insn->op3); 8017 } 8018 op3_offset = 0; 8019 } else { 8020 IR_ASSERT(ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA); 8021 op3_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 8022 op3_offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op3].op3); 8023 } 8024 | mov Ra(tmp_reg), aword [Ra(op3_reg)+op3_offset] 8025 | mov aword [Ra(op2_reg)+op2_offset], Ra(tmp_reg) 8026#elif defined(IR_TARGET_X64) 8027|.if X64 8028 ir_backend_data *data = ctx->data; 8029 dasm_State **Dst = &data->dasm_state; 8030 ir_reg tmp_reg = ctx->regs[def][1]; 8031 ir_reg op2_reg = ctx->regs[def][2]; 8032 ir_reg op3_reg = ctx->regs[def][3]; 8033 int32_t op2_offset, op3_offset; 8034 8035 IR_ASSERT(tmp_reg != IR_REG_NONE); 8036 if (op2_reg != IR_REG_NONE) { 8037 if (IR_REG_SPILLED(op2_reg)) { 8038 op2_reg = IR_REG_NUM(op2_reg); 8039 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 8040 } 8041 op2_offset = 0; 8042 } else { 8043 IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); 8044 op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 8045 op2_offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); 8046 } 8047 if (op3_reg != IR_REG_NONE) { 8048 if (IR_REG_SPILLED(op3_reg)) { 8049 op3_reg = IR_REG_NUM(op3_reg); 8050 ir_emit_load(ctx, IR_ADDR, op3_reg, insn->op3); 8051 } 8052 op3_offset = 0; 8053 } else { 8054 IR_ASSERT(ir_rule(ctx, insn->op3) == IR_STATIC_ALLOCA); 8055 op3_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 8056 op3_offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op3].op3); 8057 } 8058 | mov Rd(tmp_reg), dword [Ra(op3_reg)+(op3_offset+offsetof(ir_va_list, gp_offset))] 8059 | mov dword [Ra(op2_reg)+(op2_offset+offsetof(ir_va_list, gp_offset))], Rd(tmp_reg) 8060 | mov Rd(tmp_reg), dword [Ra(op3_reg)+(op3_offset+offsetof(ir_va_list, fp_offset))] 8061 | mov aword [Ra(op2_reg)+(op2_offset+offsetof(ir_va_list, fp_offset))], Ra(tmp_reg) 8062 | mov Ra(tmp_reg), aword [Ra(op3_reg)+(op3_offset+offsetof(ir_va_list, overflow_arg_area))] 8063 | mov aword [Ra(op2_reg)+(op2_offset+offsetof(ir_va_list, overflow_arg_area))], Ra(tmp_reg) 8064 | mov Ra(tmp_reg), aword [Ra(op3_reg)+(op3_offset+offsetof(ir_va_list, reg_save_area))] 8065 | mov aword [Ra(op2_reg)+(op2_offset+offsetof(ir_va_list, reg_save_area))], Ra(tmp_reg) 8066|.endif 8067#else 8068 IR_ASSERT(0 && "NIY va_copy"); 8069#endif 8070} 8071 8072static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn) 8073{ 8074#if defined(_WIN64) || defined(IR_TARGET_X86) 8075 ir_backend_data *data = ctx->data; 8076 dasm_State **Dst = &data->dasm_state; 8077 ir_type type = insn->type; 8078 ir_reg def_reg = ctx->regs[def][0]; 8079 ir_reg op2_reg = ctx->regs[def][2]; 8080 ir_reg tmp_reg = ctx->regs[def][3]; 8081 int32_t offset; 8082 8083 IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); 8084 if (op2_reg != IR_REG_NONE) { 8085 if (IR_REG_SPILLED(op2_reg)) { 8086 op2_reg = IR_REG_NUM(op2_reg); 8087 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 8088 } 8089 offset = 0; 8090 } else { 8091 IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); 8092 op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 8093 offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); 8094 } 8095 | mov Ra(tmp_reg), aword [Ra(op2_reg)+offset] 8096 ir_emit_load_mem(ctx, type, def_reg, IR_MEM_B(tmp_reg)); 8097 | add Ra(tmp_reg), IR_MAX(ir_type_size[type], sizeof(void*)) 8098 | mov aword [Ra(op2_reg)+offset], Ra(tmp_reg) 8099 if (IR_REG_SPILLED(ctx->regs[def][0])) { 8100 ir_emit_store(ctx, type, def, def_reg); 8101 } 8102#elif defined(IR_TARGET_X64) 8103|.if X64 8104 ir_backend_data *data = ctx->data; 8105 dasm_State **Dst = &data->dasm_state; 8106 ir_type type = insn->type; 8107 ir_reg def_reg = ctx->regs[def][0]; 8108 ir_reg op2_reg = ctx->regs[def][2]; 8109 ir_reg tmp_reg = ctx->regs[def][3]; 8110 int32_t offset; 8111 8112 IR_ASSERT(def_reg != IR_REG_NONE&& tmp_reg != IR_REG_NONE); 8113 if (op2_reg != IR_REG_NONE) { 8114 if (IR_REG_SPILLED(op2_reg)) { 8115 op2_reg = IR_REG_NUM(op2_reg); 8116 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 8117 } 8118 offset = 0; 8119 } else { 8120 IR_ASSERT(ir_rule(ctx, insn->op2) == IR_STATIC_ALLOCA); 8121 op2_reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 8122 offset = IR_SPILL_POS_TO_OFFSET(ctx->ir_base[insn->op2].op3); 8123 } 8124 if (IR_IS_TYPE_INT(type)) { 8125 | mov Rd(tmp_reg), dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, gp_offset))] 8126 | cmp Rd(tmp_reg), sizeof(void*)*IR_REG_INT_ARGS 8127 | jge >1 8128 | add Rd(tmp_reg), sizeof(void*) 8129 | mov dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, gp_offset))], Rd(tmp_reg) 8130 | add Ra(tmp_reg), aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, reg_save_area))] 8131 | jmp >2 8132 |1: 8133 | mov Ra(tmp_reg), aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, overflow_arg_area))] 8134 | add Ra(tmp_reg), sizeof(void*) 8135 | mov aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, overflow_arg_area))], Ra(tmp_reg) 8136 |2: 8137 | mov Ra(def_reg), aword [Ra(tmp_reg)-sizeof(void*)] 8138 } else { 8139 | mov Rd(tmp_reg), dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, fp_offset))] 8140 | cmp Rd(tmp_reg), sizeof(void*) * IR_REG_INT_ARGS + 16 * IR_REG_FP_ARGS 8141 | jge >1 8142 | add Rd(tmp_reg), 16 8143 | mov dword [Ra(op2_reg)+(offset+offsetof(ir_va_list, fp_offset))], Rd(tmp_reg) 8144 | add Ra(tmp_reg), aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, reg_save_area))] 8145 ir_emit_load_mem_fp(ctx, type, def_reg, IR_MEM_BO(tmp_reg, -16)); 8146 | jmp >2 8147 |1: 8148 | mov Ra(tmp_reg), aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, overflow_arg_area))] 8149 ir_emit_load_mem_fp(ctx, type, def_reg, IR_MEM_BO(tmp_reg, 0)); 8150 | add Ra(tmp_reg), 8 8151 | mov aword [Ra(op2_reg)+(offset+offsetof(ir_va_list, overflow_arg_area))], Ra(tmp_reg) 8152 |2: 8153 } 8154 if (IR_REG_SPILLED(ctx->regs[def][0])) { 8155 ir_emit_store(ctx, type, def, def_reg); 8156 } 8157|.endif 8158#else 8159 IR_ASSERT(0 && "NIY va_arg"); 8160#endif 8161} 8162 8163static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) 8164{ 8165 ir_backend_data *data = ctx->data; 8166 dasm_State **Dst = &data->dasm_state; 8167 ir_type type; 8168 ir_block *bb; 8169 ir_insn *use_insn, *val; 8170 uint32_t n, *p, use_block; 8171 int i; 8172 int label, default_label = 0; 8173 int count = 0; 8174 ir_val min, max; 8175 ir_reg op2_reg = ctx->regs[def][2]; 8176 ir_reg tmp_reg = ctx->regs[def][3]; 8177 8178 type = ctx->ir_base[insn->op2].type; 8179 IR_ASSERT(tmp_reg != IR_REG_NONE); 8180 if (IR_IS_TYPE_SIGNED(type)) { 8181 min.u64 = 0x7fffffffffffffff; 8182 max.u64 = 0x8000000000000000; 8183 } else { 8184 min.u64 = 0xffffffffffffffff; 8185 max.u64 = 0x0; 8186 } 8187 8188 bb = &ctx->cfg_blocks[b]; 8189 p = &ctx->cfg_edges[bb->successors]; 8190 for (n = bb->successors_count; n != 0; p++, n--) { 8191 use_block = *p; 8192 use_insn = &ctx->ir_base[ctx->cfg_blocks[use_block].start]; 8193 if (use_insn->op == IR_CASE_VAL) { 8194 val = &ctx->ir_base[use_insn->op2]; 8195 IR_ASSERT(!IR_IS_SYM_CONST(val->op)); 8196 if (IR_IS_TYPE_SIGNED(type)) { 8197 IR_ASSERT(IR_IS_TYPE_SIGNED(val->type)); 8198 min.i64 = IR_MIN(min.i64, val->val.i64); 8199 max.i64 = IR_MAX(max.i64, val->val.i64); 8200 } else { 8201 IR_ASSERT(!IR_IS_TYPE_SIGNED(val->type)); 8202 min.u64 = (int64_t)IR_MIN(min.u64, val->val.u64); 8203 max.u64 = (int64_t)IR_MAX(max.u64, val->val.u64); 8204 } 8205 count++; 8206 } else { 8207 IR_ASSERT(use_insn->op == IR_CASE_DEFAULT); 8208 default_label = ir_skip_empty_target_blocks(ctx, use_block); 8209 } 8210 } 8211 8212 IR_ASSERT(op2_reg != IR_REG_NONE); 8213 if (IR_REG_SPILLED(op2_reg)) { 8214 op2_reg = IR_REG_NUM(op2_reg); 8215 ir_emit_load(ctx, type, op2_reg, insn->op2); 8216 } 8217 8218 /* Generate a table jmp or a seqence of calls */ 8219 if (count > 2 && (max.i64-min.i64) < count * 8) { 8220 int *labels = ir_mem_malloc(sizeof(int) * (size_t)(max.i64 - min.i64 + 1)); 8221 8222 for (i = 0; i <= (max.i64 - min.i64); i++) { 8223 labels[i] = default_label; 8224 } 8225 p = &ctx->cfg_edges[bb->successors]; 8226 for (n = bb->successors_count; n != 0; p++, n--) { 8227 use_block = *p; 8228 use_insn = &ctx->ir_base[ctx->cfg_blocks[use_block].start]; 8229 if (use_insn->op == IR_CASE_VAL) { 8230 val = &ctx->ir_base[use_insn->op2]; 8231 IR_ASSERT(!IR_IS_SYM_CONST(val->op)); 8232 label = ir_skip_empty_target_blocks(ctx, use_block); 8233 labels[val->val.i64 - min.i64] = label; 8234 } 8235 } 8236 8237 switch (ir_type_size[type]) { 8238 default: 8239 IR_ASSERT(0 && "Unsupported type size"); 8240 case 1: 8241 if (IR_IS_TYPE_SIGNED(type)) { 8242 | movsx Ra(op2_reg), Rb(op2_reg) 8243 } else { 8244 | movzx Ra(op2_reg), Rb(op2_reg) 8245 } 8246 break; 8247 case 2: 8248 if (IR_IS_TYPE_SIGNED(type)) { 8249 | movsx Ra(op2_reg), Rw(op2_reg) 8250 } else { 8251 | movzx Ra(op2_reg), Rw(op2_reg) 8252 } 8253 break; 8254 case 4: 8255|.if X64 8256 if (IR_IS_TYPE_SIGNED(type)) { 8257 | movsxd Ra(op2_reg), Rd(op2_reg) 8258 } else { 8259 | mov Rd(op2_reg), Rd(op2_reg) 8260 } 8261 break; 8262|| case 8: 8263|.endif 8264 break; 8265 } 8266 8267 if (min.i64 != 0) { 8268 int64_t offset = -min.i64; 8269 8270 if (IR_IS_SIGNED_32BIT(offset)) { 8271 | lea Ra(tmp_reg), [Ra(op2_reg)+(int32_t)offset] 8272 } else { 8273 IR_ASSERT(sizeof(void*) == 8); 8274|.if X64 8275 | mov64 Rq(tmp_reg), offset 8276 | add Ra(tmp_reg), Ra(op2_reg) 8277|.endif 8278 } 8279 if (default_label) { 8280 offset = max.i64 - min.i64; 8281 8282 IR_ASSERT(IR_IS_SIGNED_32BIT(offset)); 8283 | cmp Ra(tmp_reg), (int32_t)offset 8284 | ja =>default_label 8285 } 8286|.if X64 8287 if (ctx->code_buffer 8288 && IR_IS_SIGNED_32BIT((char*)ctx->code_buffer->start) 8289 && IR_IS_SIGNED_32BIT((char*)ctx->code_buffer->end)) { 8290 | jmp aword [Ra(tmp_reg)*8+>1] 8291 } else { 8292 int64_t offset = -min.i64; 8293 8294 IR_ASSERT(IR_IS_SIGNED_32BIT(offset)); 8295 offset *= 8; 8296 IR_ASSERT(IR_IS_SIGNED_32BIT(offset)); 8297 | lea Ra(tmp_reg), aword [>1] 8298 | jmp aword [Ra(tmp_reg)+Ra(op2_reg)*8+offset] 8299 } 8300|.else 8301 | jmp aword [Ra(tmp_reg)*4+>1] 8302|.endif 8303 } else { 8304 if (default_label) { 8305 int64_t offset = max.i64; 8306 8307 IR_ASSERT(IR_IS_SIGNED_32BIT(offset)); 8308 | cmp Ra(op2_reg), (int32_t)offset 8309 | ja =>default_label 8310 } 8311|.if X64 8312 if (ctx->code_buffer 8313 && IR_IS_SIGNED_32BIT((char*)ctx->code_buffer->start) 8314 && IR_IS_SIGNED_32BIT((char*)ctx->code_buffer->end)) { 8315 | jmp aword [Ra(op2_reg)*8+>1] 8316 } else { 8317 | lea Ra(tmp_reg), aword [>1] 8318 | jmp aword [Ra(tmp_reg)+Ra(op2_reg)*8] 8319 } 8320|.else 8321 | jmp aword [Ra(op2_reg)*4+>1] 8322|.endif 8323 } 8324 8325 |.jmp_table 8326 if (!data->jmp_table_label) { 8327 data->jmp_table_label = ctx->cfg_blocks_count + ctx->consts_count + 3; 8328 |=>data->jmp_table_label: 8329 } 8330 |.align aword 8331 |1: 8332 for (i = 0; i <= (max.i64 - min.i64); i++) { 8333 int b = labels[i]; 8334 if (b) { 8335 ir_block *bb = &ctx->cfg_blocks[b]; 8336 ir_insn *insn = &ctx->ir_base[bb->end]; 8337 8338 if (insn->op == IR_IJMP && IR_IS_CONST_REF(insn->op2)) { 8339 ir_ref prev = ctx->prev_ref[bb->end]; 8340 if (prev != bb->start && ctx->ir_base[prev].op == IR_SNAPSHOT) { 8341 prev = ctx->prev_ref[prev]; 8342 } 8343 if (prev == bb->start) { 8344 void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]); 8345 8346 | .aword &addr 8347 if (ctx->ir_base[bb->start].op != IR_CASE_DEFAULT) { 8348 bb->flags |= IR_BB_EMPTY; 8349 } 8350 continue; 8351 } 8352 } 8353 | .aword =>b 8354 } else { 8355 | .aword 0 8356 } 8357 } 8358 |.code 8359 ir_mem_free(labels); 8360 } else { 8361 p = &ctx->cfg_edges[bb->successors]; 8362 for (n = bb->successors_count; n != 0; p++, n--) { 8363 use_block = *p; 8364 use_insn = &ctx->ir_base[ctx->cfg_blocks[use_block].start]; 8365 if (use_insn->op == IR_CASE_VAL) { 8366 val = &ctx->ir_base[use_insn->op2]; 8367 IR_ASSERT(!IR_IS_SYM_CONST(val->op)); 8368 label = ir_skip_empty_target_blocks(ctx, use_block); 8369 if (IR_IS_32BIT(type, val->val)) { 8370 | ASM_REG_IMM_OP cmp, type, op2_reg, val->val.i32 8371 } else { 8372 IR_ASSERT(sizeof(void*) == 8); 8373|.if X64 8374 | mov64 Ra(tmp_reg), val->val.i64 8375 | ASM_REG_REG_OP cmp, type, op2_reg, tmp_reg 8376|.endif 8377 } 8378 | je =>label 8379 } 8380 } 8381 if (default_label) { 8382 | jmp =>default_label 8383 } 8384 } 8385} 8386 8387static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn) 8388{ 8389 int j, n; 8390 ir_type type; 8391 int int_param = 0; 8392 int fp_param = 0; 8393 int int_reg_params_count = IR_REG_INT_ARGS; 8394 int fp_reg_params_count = IR_REG_FP_ARGS; 8395 int32_t used_stack = 0; 8396 8397#ifdef IR_HAVE_FASTCALL 8398 if (sizeof(void*) == 4 && ir_is_fastcall(ctx, insn)) { 8399 int_reg_params_count = IR_REG_INT_FCARGS; 8400 fp_reg_params_count = IR_REG_FP_FCARGS; 8401 } 8402#endif 8403 8404 n = insn->inputs_count; 8405 for (j = 3; j <= n; j++) { 8406 type = ctx->ir_base[ir_insn_op(insn, j)].type; 8407 if (IR_IS_TYPE_INT(type)) { 8408 if (int_param >= int_reg_params_count) { 8409 used_stack += IR_MAX(sizeof(void*), ir_type_size[type]); 8410 } 8411 int_param++; 8412#ifdef _WIN64 8413 /* WIN64 calling convention use common couter for int and fp registers */ 8414 fp_param++; 8415#endif 8416 } else { 8417 IR_ASSERT(IR_IS_TYPE_FP(type)); 8418 if (fp_param >= fp_reg_params_count) { 8419 used_stack += IR_MAX(sizeof(void*), ir_type_size[type]); 8420 } 8421 fp_param++; 8422#ifdef _WIN64 8423 /* WIN64 calling convention use common couter for int and fp registers */ 8424 int_param++; 8425#endif 8426 } 8427 } 8428 8429 /* Reserved "home space" or "shadow store" for register arguments (used in Windows64 ABI) */ 8430 used_stack += IR_SHADOW_ARGS; 8431 8432 return used_stack; 8433} 8434 8435static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg tmp_reg) 8436{ 8437 ir_backend_data *data = ctx->data; 8438 dasm_State **Dst = &data->dasm_state; 8439 int j, n; 8440 ir_ref arg; 8441 ir_insn *arg_insn; 8442 uint8_t type; 8443 ir_reg src_reg, dst_reg; 8444 int int_param = 0; 8445 int fp_param = 0; 8446 int count = 0; 8447 int int_reg_params_count = IR_REG_INT_ARGS; 8448 int fp_reg_params_count = IR_REG_FP_ARGS; 8449 const int8_t *int_reg_params = _ir_int_reg_params; 8450 const int8_t *fp_reg_params = _ir_fp_reg_params; 8451 int32_t used_stack, stack_offset = IR_SHADOW_ARGS; 8452 ir_copy *copies; 8453 bool do_pass3 = 0; 8454 /* For temporaries we may use any scratch registers except for registers used for parameters */ 8455 ir_reg tmp_fp_reg = IR_REG_FP_LAST; /* Temporary register for FP loads and swap */ 8456 8457 n = insn->inputs_count; 8458 if (n < 3) { 8459 return 0; 8460 } 8461 8462 if (tmp_reg == IR_REG_NONE) { 8463 tmp_reg = IR_REG_RAX; 8464 } 8465 8466#ifdef IR_HAVE_FASTCALL 8467 if (sizeof(void*) == 4 && ir_is_fastcall(ctx, insn)) { 8468 int_reg_params_count = IR_REG_INT_FCARGS; 8469 fp_reg_params_count = IR_REG_FP_FCARGS; 8470 int_reg_params = _ir_int_fc_reg_params; 8471 fp_reg_params = _ir_fp_fc_reg_params; 8472 } 8473#endif 8474 8475 if (insn->op == IR_CALL 8476 && (ctx->flags & IR_PREALLOCATED_STACK) 8477#ifdef IR_HAVE_FASTCALL 8478 && !ir_is_fastcall(ctx, insn) /* fast call functions restore stack pointer */ 8479#endif 8480 ) { 8481 // TODO: support for preallocated stack 8482 used_stack = 0; 8483 } else { 8484 used_stack = ir_call_used_stack(ctx, insn); 8485 if (IR_SHADOW_ARGS 8486 && insn->op == IR_TAILCALL 8487 && used_stack == IR_SHADOW_ARGS) { 8488 used_stack = 0; 8489 } 8490 if (ctx->fixed_call_stack_size 8491 && used_stack <= ctx->fixed_call_stack_size 8492#ifdef IR_HAVE_FASTCALL 8493 && !ir_is_fastcall(ctx, insn) /* fast call functions restore stack pointer */ 8494#endif 8495 ) { 8496 used_stack = 0; 8497 } else { 8498 /* Stack must be 16 byte aligned */ 8499 int32_t aligned_stack = IR_ALIGNED_SIZE(used_stack, 16); 8500 ctx->call_stack_size += aligned_stack; 8501 if (aligned_stack) { 8502 | sub Ra(IR_REG_RSP), aligned_stack 8503 } 8504 } 8505 } 8506 8507 /* 1. move all register arguments that should be passed through stack 8508 * and collect arguments that should be passed through registers */ 8509 copies = ir_mem_malloc((n - 2) * sizeof(ir_copy)); 8510 for (j = 3; j <= n; j++) { 8511 arg = ir_insn_op(insn, j); 8512 src_reg = ir_get_alocated_reg(ctx, def, j); 8513 arg_insn = &ctx->ir_base[arg]; 8514 type = arg_insn->type; 8515 if (IR_IS_TYPE_INT(type)) { 8516 if (int_param < int_reg_params_count) { 8517 dst_reg = int_reg_params[int_param]; 8518 } else { 8519 dst_reg = IR_REG_NONE; /* pass argument through stack */ 8520 } 8521 int_param++; 8522#ifdef _WIN64 8523 /* WIN64 calling convention use common couter for int and fp registers */ 8524 fp_param++; 8525#endif 8526 } else { 8527 IR_ASSERT(IR_IS_TYPE_FP(type)); 8528 if (fp_param < fp_reg_params_count) { 8529 dst_reg = fp_reg_params[fp_param]; 8530 } else { 8531 dst_reg = IR_REG_NONE; /* pass argument through stack */ 8532 } 8533 fp_param++; 8534#ifdef _WIN64 8535 /* WIN64 calling convention use common couter for int and fp registers */ 8536 int_param++; 8537#endif 8538 } 8539 if (dst_reg != IR_REG_NONE) { 8540 if (src_reg == IR_REG_NONE) { 8541 /* delay CONST->REG and MEM->REG moves to third pass */ 8542 do_pass3 = 1; 8543 } else { 8544 if (IR_REG_SPILLED(src_reg)) { 8545 src_reg = IR_REG_NUM(src_reg); 8546 ir_emit_load(ctx, type, src_reg, arg); 8547 } 8548 if (src_reg != dst_reg) { 8549 /* delay REG->REG moves to second pass */ 8550 copies[count].type = type; 8551 copies[count].from = src_reg; 8552 copies[count].to = dst_reg; 8553 count++; 8554 } 8555 } 8556 } else { 8557 /* Pass register arguments to stack (REG->MEM moves) */ 8558 if (!IR_IS_CONST_REF(arg) && src_reg != IR_REG_NONE && !IR_REG_SPILLED(src_reg)) { 8559 ir_emit_store_mem(ctx, type, IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset), src_reg); 8560 } else { 8561 do_pass3 = 1; 8562 } 8563 stack_offset += IR_MAX(sizeof(void*), ir_type_size[type]); 8564 } 8565 } 8566 8567 /* 2. move all arguments that should be passed from one register to another (REG->REG movs) */ 8568 if (count) { 8569 ir_parallel_copy(ctx, copies, count, tmp_reg, tmp_fp_reg); 8570 } 8571 ir_mem_free(copies); 8572 8573 /* 3. move the remaining memory and immediate values */ 8574 if (do_pass3) { 8575 stack_offset = IR_SHADOW_ARGS; 8576 int_param = 0; 8577 fp_param = 0; 8578 for (j = 3; j <= n; j++) { 8579 arg = ir_insn_op(insn, j); 8580 src_reg = ir_get_alocated_reg(ctx, def, j); 8581 arg_insn = &ctx->ir_base[arg]; 8582 type = arg_insn->type; 8583 if (IR_IS_TYPE_INT(type)) { 8584 if (int_param < int_reg_params_count) { 8585 dst_reg = int_reg_params[int_param]; 8586 } else { 8587 dst_reg = IR_REG_NONE; /* argument already passed through stack */ 8588 } 8589 int_param++; 8590#ifdef _WIN64 8591 /* WIN64 calling convention use common couter for int and fp registers */ 8592 fp_param++; 8593#endif 8594 } else { 8595 IR_ASSERT(IR_IS_TYPE_FP(type)); 8596 if (fp_param < fp_reg_params_count) { 8597 dst_reg = fp_reg_params[fp_param]; 8598 } else { 8599 dst_reg = IR_REG_NONE; /* argument already passed through stack */ 8600 } 8601 fp_param++; 8602#ifdef _WIN64 8603 /* WIN64 calling convention use common couter for int and fp registers */ 8604 int_param++; 8605#endif 8606 } 8607 if (dst_reg != IR_REG_NONE) { 8608 if (src_reg == IR_REG_NONE) { 8609 if (IR_IS_TYPE_INT(type)) { 8610 if (IR_IS_CONST_REF(arg)) { 8611 if (type == IR_I8 || type == IR_I16) { 8612 type = IR_I32; 8613 } else if (type == IR_U8 || type == IR_U16) { 8614 type = IR_U32; 8615 } 8616 ir_emit_load(ctx, type, dst_reg, arg); 8617 } else if (ctx->vregs[arg]) { 8618 ir_mem mem = ir_ref_spill_slot(ctx, arg); 8619 8620 if (ir_type_size[type] > 2) { 8621 ir_emit_load_mem_int(ctx, type, dst_reg, mem); 8622 } else if (ir_type_size[type] == 2) { 8623 if (type == IR_I16) { 8624 | ASM_TXT_TMEM_OP movsx, Rd(dst_reg), word, mem 8625 } else { 8626 | ASM_TXT_TMEM_OP movzx, Rd(dst_reg), word, mem 8627 } 8628 } else { 8629 IR_ASSERT(ir_type_size[type] == 1); 8630 if (type == IR_I8) { 8631 | ASM_TXT_TMEM_OP movsx, Rd(dst_reg), byte, mem 8632 } else { 8633 | ASM_TXT_TMEM_OP movzx, Rd(dst_reg), byte, mem 8634 } 8635 } 8636 } else { 8637 ir_load_local_addr(ctx, dst_reg, arg); 8638 } 8639 } else { 8640 ir_emit_load(ctx, type, dst_reg, arg); 8641 } 8642 } 8643 } else { 8644 ir_mem mem = IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset); 8645 8646 if (IR_IS_TYPE_INT(type)) { 8647 if (IR_IS_CONST_REF(arg)) { 8648 ir_emit_store_mem_int_const(ctx, type, mem, arg, tmp_reg, 1); 8649 } else if (src_reg == IR_REG_NONE) { 8650 IR_ASSERT(tmp_reg != IR_REG_NONE); 8651 ir_emit_load(ctx, type, tmp_reg, arg); 8652 ir_emit_store_mem_int(ctx, type, mem, tmp_reg); 8653 } else if (IR_REG_SPILLED(src_reg)) { 8654 src_reg = IR_REG_NUM(src_reg); 8655 ir_emit_load(ctx, type, src_reg, arg); 8656 ir_emit_store_mem_int(ctx, type, mem, src_reg); 8657 } 8658 } else { 8659 if (IR_IS_CONST_REF(arg)) { 8660 ir_emit_store_mem_fp_const(ctx, type, mem, arg, tmp_reg, tmp_fp_reg); 8661 } else if (src_reg == IR_REG_NONE) { 8662 IR_ASSERT(tmp_fp_reg != IR_REG_NONE); 8663 ir_emit_load(ctx, type, tmp_fp_reg, arg); 8664 ir_emit_store_mem_fp(ctx, IR_DOUBLE, mem, tmp_fp_reg); 8665 } else if (IR_REG_SPILLED(src_reg)) { 8666 src_reg = IR_REG_NUM(src_reg); 8667 ir_emit_load(ctx, type, src_reg, arg); 8668 ir_emit_store_mem_fp(ctx, type, mem, src_reg); 8669 } 8670 } 8671 stack_offset += IR_MAX(sizeof(void*), ir_type_size[type]); 8672 } 8673 } 8674 } 8675 8676#ifdef _WIN64 8677 /* WIN64 calling convention requires duplcation of parameters passed in FP register into GP ones */ 8678 if (ir_is_vararg(ctx, insn)) { 8679 n = IR_MIN(n, IR_MAX_REG_ARGS + 2); 8680 for (j = 3; j <= n; j++) { 8681 arg = ir_insn_op(insn, j); 8682 arg_insn = &ctx->ir_base[arg]; 8683 type = arg_insn->type; 8684 if (IR_IS_TYPE_FP(type)) { 8685 src_reg = fp_reg_params[j-3]; 8686 dst_reg = int_reg_params[j-3]; 8687|.if X64 8688 if (ctx->mflags & IR_X86_AVX) { 8689 | vmovd Rq(dst_reg), xmm(src_reg-IR_REG_FP_FIRST) 8690 } else { 8691 | movd Rq(dst_reg), xmm(src_reg-IR_REG_FP_FIRST) 8692 } 8693|.endif 8694 } 8695 } 8696 } 8697#endif 8698#ifdef IR_REG_VARARG_FP_REGS 8699 /* set hidden argument to specify the number of vector registers used */ 8700 if (ir_is_vararg(ctx, insn)) { 8701 fp_param = IR_MIN(fp_param, fp_reg_params_count); 8702 | mov Rd(IR_REG_VARARG_FP_REGS), fp_param 8703 } 8704#endif 8705 8706 return used_stack; 8707} 8708 8709static void ir_emit_call_ex(ir_ctx *ctx, ir_ref def, ir_insn *insn, int32_t used_stack) 8710{ 8711 ir_backend_data *data = ctx->data; 8712 dasm_State **Dst = &data->dasm_state; 8713 ir_reg def_reg; 8714 8715 if (IR_IS_CONST_REF(insn->op2)) { 8716 void *addr = ir_call_addr(ctx, insn, &ctx->ir_base[insn->op2]); 8717 8718 if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(ctx->code_buffer, addr)) { 8719 | call aword &addr 8720 } else { 8721|.if X64 8722|| ir_reg tmp_reg = IR_REG_RAX; 8723 8724#ifdef IR_REG_VARARG_FP_REGS 8725|| if (ir_is_vararg(ctx, insn)) { 8726|| tmp_reg = IR_REG_R11; 8727|| } 8728#endif 8729|| if (IR_IS_SIGNED_32BIT(addr)) { 8730 | mov Rq(tmp_reg), ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 <imm-32-bit> 8731|| } else { 8732 | mov64 Rq(tmp_reg), ((ptrdiff_t)addr) // 0x48 0xb8 <imm-64-bit> 8733|| } 8734 | call Rq(tmp_reg) 8735|.endif 8736 } 8737 } else { 8738 ir_reg op2_reg = ctx->regs[def][2]; 8739 8740 if (op2_reg != IR_REG_NONE) { 8741 if (IR_REG_SPILLED(op2_reg)) { 8742 op2_reg = IR_REG_NUM(op2_reg); 8743 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 8744 } 8745 | call Ra(op2_reg) 8746 } else { 8747 ir_mem mem; 8748 8749 if (ir_rule(ctx, insn->op2) & IR_FUSED) { 8750 mem = ir_fuse_load(ctx, def, insn->op2); 8751 } else { 8752 mem = ir_ref_spill_slot(ctx, insn->op2); 8753 } 8754 8755 | ASM_TMEM_OP call, aword, mem 8756 } 8757 } 8758 8759 if (used_stack) { 8760 int32_t aligned_stack = IR_ALIGNED_SIZE(used_stack, 16); 8761 8762 ctx->call_stack_size -= aligned_stack; 8763 if (ir_is_fastcall(ctx, insn)) { 8764 aligned_stack -= used_stack; 8765 if (aligned_stack) { 8766 | add Ra(IR_REG_RSP), aligned_stack 8767 } 8768 } else { 8769 | add Ra(IR_REG_RSP), aligned_stack 8770 } 8771 } 8772 8773 if (insn->type != IR_VOID) { 8774 if (IR_IS_TYPE_INT(insn->type)) { 8775 def_reg = IR_REG_NUM(ctx->regs[def][0]); 8776 if (def_reg != IR_REG_NONE) { 8777 if (def_reg != IR_REG_INT_RET1) { 8778 ir_emit_mov(ctx, insn->type, def_reg, IR_REG_INT_RET1); 8779 } 8780 if (IR_REG_SPILLED(ctx->regs[def][0])) { 8781 ir_emit_store(ctx, insn->type, def, def_reg); 8782 } 8783 } else if (ctx->use_lists[def].count > 1) { 8784 ir_emit_store(ctx, insn->type, def, IR_REG_INT_RET1); 8785 } 8786 } else { 8787 IR_ASSERT(IR_IS_TYPE_FP(insn->type)); 8788 def_reg = IR_REG_NUM(ctx->regs[def][0]); 8789#ifdef IR_REG_FP_RET1 8790 if (def_reg != IR_REG_NONE) { 8791 if (def_reg != IR_REG_FP_RET1) { 8792 ir_emit_fp_mov(ctx, insn->type, def_reg, IR_REG_FP_RET1); 8793 } 8794 if (IR_REG_SPILLED(ctx->regs[def][0])) { 8795 ir_emit_store(ctx, insn->type, def, def_reg); 8796 } 8797 } else if (ctx->use_lists[def].count > 1) { 8798 ir_emit_store(ctx, insn->type, def, IR_REG_FP_RET1); 8799 } 8800#else 8801 if (ctx->use_lists[def].count > 1) { 8802 int32_t offset; 8803 ir_reg fp; 8804 8805 if (def_reg == IR_REG_NONE) { 8806 offset = ir_ref_spill_slot_offset(ctx, def, &fp); 8807 if (insn->type == IR_DOUBLE) { 8808 | fstp qword [Ra(fp)+offset] 8809 } else { 8810 IR_ASSERT(insn->type == IR_FLOAT); 8811 | fstp dword [Ra(fp)+offset] 8812 } 8813 } else { 8814 offset = ctx->ret_slot; 8815 IR_ASSERT(offset != -1); 8816 offset = IR_SPILL_POS_TO_OFFSET(offset); 8817 fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 8818 if (insn->type == IR_DOUBLE) { 8819 | fstp qword [Ra(fp)+offset] 8820 } else { 8821 IR_ASSERT(insn->type == IR_FLOAT); 8822 | fstp dword [Ra(fp)+offset] 8823 } 8824 ir_emit_load_mem_fp(ctx, insn->type, def_reg, IR_MEM_BO(fp, offset)); 8825 if (IR_REG_SPILLED(ctx->regs[def][0])) { 8826 ir_emit_store(ctx, insn->type, def, def_reg); 8827 } 8828 } 8829 } 8830#endif 8831 } 8832 } 8833} 8834 8835static void ir_emit_call(ir_ctx *ctx, ir_ref def, ir_insn *insn) 8836{ 8837 int32_t used_stack = ir_emit_arguments(ctx, def, insn, ctx->regs[def][1]); 8838 ir_emit_call_ex(ctx, def, insn, used_stack); 8839} 8840 8841static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) 8842{ 8843 ir_backend_data *data = ctx->data; 8844 dasm_State **Dst = &data->dasm_state; 8845 int32_t used_stack = ir_emit_arguments(ctx, def, insn, ctx->regs[def][1]); 8846 8847 if (used_stack != 0) { 8848 ir_emit_call_ex(ctx, def, insn, used_stack); 8849 ir_emit_return_void(ctx); 8850 return; 8851 } 8852 8853 ir_emit_epilogue(ctx); 8854 8855 if (IR_IS_CONST_REF(insn->op2)) { 8856 void *addr = ir_call_addr(ctx, insn, &ctx->ir_base[insn->op2]); 8857 8858 if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(ctx->code_buffer, addr)) { 8859 | jmp aword &addr 8860 } else { 8861|.if X64 8862|| ir_reg tmp_reg = IR_REG_RAX; 8863 8864#ifdef IR_REG_VARARG_FP_REGS 8865|| if (ir_is_vararg(ctx, insn)) { 8866|| tmp_reg = IR_REG_R11; 8867|| } 8868#endif 8869|| if (IR_IS_SIGNED_32BIT(addr)) { 8870 | mov Rq(tmp_reg), ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 <imm-32-bit> 8871|| } else { 8872 | mov64 Rq(tmp_reg), ((ptrdiff_t)addr) // 0x48 0xb8 <imm-64-bit> 8873|| } 8874 | jmp Rq(tmp_reg) 8875|.endif 8876 } 8877 } else { 8878 ir_reg op2_reg = ctx->regs[def][2]; 8879 8880 if (op2_reg != IR_REG_NONE) { 8881 if (IR_REG_SPILLED(op2_reg)) { 8882 op2_reg = IR_REG_NUM(op2_reg); 8883 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 8884 } 8885 | jmp Ra(op2_reg) 8886 } else { 8887 ir_mem mem; 8888 8889 if (ir_rule(ctx, insn->op2) & IR_FUSED) { 8890 mem = ir_fuse_load(ctx, def, insn->op2); 8891 } else { 8892 mem = ir_ref_spill_slot(ctx, insn->op2); 8893 } 8894 | ASM_TMEM_OP jmp, aword, mem 8895 } 8896 } 8897} 8898 8899static void ir_emit_ijmp(ir_ctx *ctx, ir_ref def, ir_insn *insn) 8900{ 8901 ir_backend_data *data = ctx->data; 8902 dasm_State **Dst = &data->dasm_state; 8903 ir_reg op2_reg = ctx->regs[def][2]; 8904 8905 if (IR_IS_CONST_REF(insn->op2)) { 8906 void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]); 8907 8908 if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(ctx->code_buffer, addr)) { 8909 | jmp aword &addr 8910 } else { 8911|.if X64 8912 if (IR_IS_SIGNED_32BIT(addr)) { 8913 | mov rax, ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 <imm-32-bit> 8914 } else { 8915 | mov64 rax, ((ptrdiff_t)addr) // 0x48 0xb8 <imm-64-bit> 8916 } 8917 | jmp rax 8918|.endif 8919 } 8920 } else if (ir_rule(ctx, insn->op2) & IR_FUSED) { 8921 ir_mem mem = ir_fuse_load(ctx, def, insn->op2); 8922 | ASM_TMEM_OP jmp, aword, mem 8923 } else if (op2_reg != IR_REG_NONE) { 8924 if (IR_REG_SPILLED(op2_reg)) { 8925 op2_reg = IR_REG_NUM(op2_reg); 8926 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 8927 } 8928 | jmp Ra(op2_reg) 8929 } else { 8930 ir_mem mem = ir_ref_spill_slot(ctx, insn->op2); 8931 8932 | ASM_TMEM_OP jmp, aword, mem 8933 } 8934} 8935 8936static bool ir_emit_guard_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, uint32_t next_block, uint8_t op, void *addr, bool int_cmp) 8937{ 8938 ir_backend_data *data = ctx->data; 8939 dasm_State **Dst = &data->dasm_state; 8940 ir_insn *next_insn = &ctx->ir_base[def + 1]; 8941 8942 if (next_insn->op == IR_END || next_insn->op == IR_LOOP_END) { 8943 ir_block *bb = &ctx->cfg_blocks[b]; 8944 uint32_t target; 8945 8946 if (!(bb->flags & IR_BB_DESSA_MOVES)) { 8947 target = ctx->cfg_edges[bb->successors]; 8948 if (UNEXPECTED(bb->successors_count == 2)) { 8949 if (ctx->cfg_blocks[target].flags & IR_BB_ENTRY) { 8950 target = ctx->cfg_edges[bb->successors + 1]; 8951 } else { 8952 IR_ASSERT(ctx->cfg_blocks[ctx->cfg_edges[bb->successors + 1]].flags & IR_BB_ENTRY); 8953 } 8954 } else { 8955 IR_ASSERT(bb->successors_count == 1); 8956 } 8957 target = ir_skip_empty_target_blocks(ctx, target); 8958 if (target != next_block) { 8959 if (int_cmp) { 8960 switch (op) { 8961 default: 8962 IR_ASSERT(0 && "NIY binary op"); 8963 case IR_EQ: 8964 | jne =>target 8965 break; 8966 case IR_NE: 8967 | je =>target 8968 break; 8969 case IR_LT: 8970 | jge =>target 8971 break; 8972 case IR_GE: 8973 | jl =>target 8974 break; 8975 case IR_LE: 8976 | jg =>target 8977 break; 8978 case IR_GT: 8979 | jle =>target 8980 break; 8981 case IR_ULT: 8982 | jae =>target 8983 break; 8984 case IR_UGE: 8985 | jb =>target 8986 break; 8987 case IR_ULE: 8988 | ja =>target 8989 break; 8990 case IR_UGT: 8991 | jbe =>target 8992 break; 8993 } 8994 } else { 8995 switch (op) { 8996 default: 8997 IR_ASSERT(0 && "NIY binary op"); 8998 case IR_EQ: 8999 | jne =>target 9000 | jp =>target 9001 break; 9002 case IR_NE: 9003 | jp &addr 9004 | je =>target 9005 break; 9006 case IR_LT: 9007 | jae =>target 9008 break; 9009 case IR_GE: 9010 | jp &addr 9011 | jb =>target 9012 break; 9013 case IR_LE: 9014 | ja =>target 9015 break; 9016 case IR_GT: 9017 | jp &addr 9018 | jbe =>target 9019 break; 9020 } 9021 } 9022 | jmp &addr 9023 return 1; 9024 } 9025 } 9026 } else if (next_insn->op == IR_IJMP && IR_IS_CONST_REF(next_insn->op2)) { 9027 void *target_addr = ir_jmp_addr(ctx, next_insn, &ctx->ir_base[next_insn->op2]); 9028 9029 if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(ctx->code_buffer, target_addr)) { 9030 if (int_cmp) { 9031 switch (op) { 9032 default: 9033 IR_ASSERT(0 && "NIY binary op"); 9034 case IR_EQ: 9035 | jne &target_addr 9036 break; 9037 case IR_NE: 9038 | je &target_addr 9039 break; 9040 case IR_LT: 9041 | jge &target_addr 9042 break; 9043 case IR_GE: 9044 | jl &target_addr 9045 break; 9046 case IR_LE: 9047 | jg &target_addr 9048 break; 9049 case IR_GT: 9050 | jle &target_addr 9051 break; 9052 case IR_ULT: 9053 | jae &target_addr 9054 break; 9055 case IR_UGE: 9056 | jb &target_addr 9057 break; 9058 case IR_ULE: 9059 | ja &target_addr 9060 break; 9061 case IR_UGT: 9062 | jbe &target_addr 9063 break; 9064 } 9065 } else { 9066 switch (op) { 9067 default: 9068 IR_ASSERT(0 && "NIY binary op"); 9069 case IR_EQ: 9070 | jne &target_addr 9071 | jp &target_addr 9072 break; 9073 case IR_NE: 9074 | jp &addr 9075 | je &target_addr 9076 break; 9077 case IR_LT: 9078 | jae &target_addr 9079 break; 9080 case IR_GE: 9081 | jp &addr 9082 | jb &target_addr 9083 break; 9084 case IR_LE: 9085 | ja &target_addr 9086 break; 9087 case IR_GT: 9088 | jp &addr 9089 | jbe &target_addr 9090 break; 9091 } 9092 } 9093 | jmp &addr 9094 return 1; 9095 } 9096 } 9097 9098 if (int_cmp) { 9099 switch (op) { 9100 default: 9101 IR_ASSERT(0 && "NIY binary op"); 9102 case IR_EQ: 9103 | je &addr 9104 break; 9105 case IR_NE: 9106 | jne &addr 9107 break; 9108 case IR_LT: 9109 | jl &addr 9110 break; 9111 case IR_GE: 9112 | jge &addr 9113 break; 9114 case IR_LE: 9115 | jle &addr 9116 break; 9117 case IR_GT: 9118 | jg &addr 9119 break; 9120 case IR_ULT: 9121 | jb &addr 9122 break; 9123 case IR_UGE: 9124 | jae &addr 9125 break; 9126 case IR_ULE: 9127 | jbe &addr 9128 break; 9129 case IR_UGT: 9130 | ja &addr 9131 break; 9132 } 9133 } else { 9134 switch (op) { 9135 default: 9136 IR_ASSERT(0 && "NIY binary op"); 9137 case IR_EQ: 9138 | jp >1 9139 | je &addr 9140 |1: 9141 break; 9142 case IR_NE: 9143 | jne &addr 9144 | jp &addr 9145 break; 9146 case IR_LT: 9147 | jp >1 9148 | jb &addr 9149 |1: 9150 break; 9151 case IR_GE: 9152 | jae &addr 9153 break; 9154 case IR_LE: 9155 | jp >1 9156 | jbe &addr 9157 |1: 9158 break; 9159 case IR_GT: 9160 | ja &addr 9161 break; 9162// case IR_ULT: fprintf(stderr, "\tjb .LL%d\n", true_block); break; 9163// case IR_UGE: fprintf(stderr, "\tjae .LL%d\n", true_block); break; 9164// case IR_ULE: fprintf(stderr, "\tjbe .LL%d\n", true_block); break; 9165// case IR_UGT: fprintf(stderr, "\tja .LL%d\n", true_block); break; 9166 } 9167 } 9168 return 0; 9169} 9170 9171static bool ir_emit_guard(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block) 9172{ 9173 ir_backend_data *data = ctx->data; 9174 dasm_State **Dst = &data->dasm_state; 9175 ir_reg op2_reg = ctx->regs[def][2]; 9176 ir_type type = ctx->ir_base[insn->op2].type; 9177 void *addr; 9178 9179 IR_ASSERT(IR_IS_TYPE_INT(type)); 9180 if (IR_IS_CONST_REF(insn->op2)) { 9181 bool is_true = ir_ref_is_true(ctx, insn->op2); 9182 9183 if ((insn->op == IR_GUARD && !is_true) || (insn->op == IR_GUARD_NOT && is_true)) { 9184 addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); 9185 if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(ctx->code_buffer, addr)) { 9186 | jmp aword &addr 9187 } else { 9188|.if X64 9189 if (IR_IS_SIGNED_32BIT(addr)) { 9190 | mov rax, ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 <imm-32-bit> 9191 } else { 9192 | mov64 rax, ((ptrdiff_t)addr) // 0x48 0xb8 <imm-64-bit> 9193 } 9194 | jmp aword [rax] 9195|.endif 9196 } 9197 } 9198 return 0; 9199 } 9200 9201 if (op2_reg != IR_REG_NONE) { 9202 if (IR_REG_SPILLED(op2_reg)) { 9203 op2_reg = IR_REG_NUM(op2_reg); 9204 ir_emit_load(ctx, type, op2_reg, insn->op2); 9205 } 9206 | ASM_REG_REG_OP test, type, op2_reg, op2_reg 9207 } else { 9208 ir_mem mem; 9209 9210 if (ir_rule(ctx, insn->op2) & IR_FUSED) { 9211 mem = ir_fuse_load(ctx, def, insn->op2); 9212 } else { 9213 mem = ir_ref_spill_slot(ctx, insn->op2); 9214 } 9215 | ASM_MEM_IMM_OP cmp, type, mem, 0 9216 } 9217 9218 addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); 9219 if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(ctx->code_buffer, addr)) { 9220 ir_op op; 9221 9222 if (insn->op == IR_GUARD) { 9223 op = IR_EQ; 9224 } else { 9225 op = IR_NE; 9226 } 9227 return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 1); 9228 } else { 9229|.if X64 9230 if (insn->op == IR_GUARD) { 9231 | je >1 9232 } else { 9233 | jne >1 9234 } 9235 |.cold_code 9236 |1: 9237 if (IR_IS_SIGNED_32BIT(addr)) { 9238 | mov rax, ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 <imm-32-bit> 9239 } else { 9240 | mov64 rax, ((ptrdiff_t)addr) // 0x48 0xb8 <imm-64-bit> 9241 } 9242 | jmp aword [rax] 9243 |.code 9244|.endif 9245 return 0; 9246 } 9247} 9248 9249static bool ir_emit_guard_cmp_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block) 9250{ 9251 ir_backend_data *data = ctx->data; 9252 dasm_State **Dst = &data->dasm_state; 9253 ir_insn *cmp_insn = &ctx->ir_base[insn->op2]; 9254 ir_op op = cmp_insn->op; 9255 ir_type type = ctx->ir_base[cmp_insn->op1].type; 9256 ir_ref op1 = cmp_insn->op1; 9257 ir_ref op2 = cmp_insn->op2; 9258 ir_reg op1_reg = ctx->regs[insn->op2][1]; 9259 ir_reg op2_reg = ctx->regs[insn->op2][2]; 9260 void *addr; 9261 9262 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 9263 op1_reg = IR_REG_NUM(op1_reg); 9264 ir_emit_load(ctx, type, op1_reg, op1); 9265 } 9266 if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { 9267 op2_reg = IR_REG_NUM(op2_reg); 9268 if (op1 != op2) { 9269 ir_emit_load(ctx, type, op2_reg, op2); 9270 } 9271 } 9272 9273 addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); 9274 if (IR_IS_CONST_REF(op2) && !IR_IS_SYM_CONST(ctx->ir_base[op2].op) && ctx->ir_base[op2].val.u64 == 0) { 9275 if (op == IR_ULT) { 9276 /* always false */ 9277 if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(ctx->code_buffer, addr)) { 9278 | jmp aword &addr 9279 } else { 9280|.if X64 9281 if (IR_IS_SIGNED_32BIT(addr)) { 9282 | mov rax, ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 <imm-32-bit> 9283 } else { 9284 | mov64 rax, ((ptrdiff_t)addr) // 0x48 0xb8 <imm-64-bit> 9285 } 9286 | jmp aword [rax] 9287|.endif 9288 } 9289 return 0; 9290 } else if (op == IR_UGE) { 9291 /* always true */ 9292 return 0; 9293 } else if (op == IR_ULE) { 9294 op = IR_EQ; 9295 } else if (op == IR_UGT) { 9296 op = IR_NE; 9297 } 9298 } 9299 ir_emit_cmp_int_common(ctx, type, def, cmp_insn, op1_reg, op1, op2_reg, op2); 9300 9301 if (insn->op == IR_GUARD) { 9302 op ^= 1; // reverse 9303 } 9304 9305 return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 1); 9306} 9307 9308static bool ir_emit_guard_cmp_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block) 9309{ 9310 ir_op op = ir_emit_cmp_fp_common(ctx, def, insn->op2, &ctx->ir_base[insn->op2]); 9311 void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); 9312 9313 if (insn->op == IR_GUARD) { 9314 op ^= 1; // reverse 9315 } 9316 return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 0); 9317} 9318 9319static bool ir_emit_guard_test_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block) 9320{ 9321 void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); 9322 ir_op op = (insn->op == IR_GUARD) ? IR_EQ : IR_NE; 9323 9324 ir_emit_test_int_common(ctx, def, insn->op2, op); 9325 return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 1); 9326} 9327 9328static bool ir_emit_guard_jcc_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn, uint32_t next_block) 9329{ 9330 void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); 9331 ir_op op = ctx->ir_base[insn->op2].op; 9332 9333 if (insn->op == IR_GUARD) { 9334 op ^= 1; // reverse 9335 } 9336 return ir_emit_guard_jcc(ctx, b, def, next_block, op, addr, 1); 9337} 9338 9339static bool ir_emit_guard_overflow(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) 9340{ 9341 ir_backend_data *data = ctx->data; 9342 dasm_State **Dst = &data->dasm_state; 9343 ir_type type; 9344 void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); 9345 9346 type = ctx->ir_base[ctx->ir_base[insn->op2].op1].type; 9347 9348 IR_ASSERT(IR_IS_TYPE_INT(type)); 9349 if (IR_IS_TYPE_SIGNED(type)) { 9350 if (insn->op == IR_GUARD) { 9351 | jno &addr 9352 } else { 9353 | jo &addr 9354 } 9355 } else { 9356 if (insn->op == IR_GUARD) { 9357 | jnc &addr 9358 } else { 9359 | jc &addr 9360 } 9361 } 9362 return 0; 9363} 9364 9365static void ir_emit_lea(ir_ctx *ctx, ir_ref def, ir_type type) 9366{ 9367 ir_backend_data *data = ctx->data; 9368 dasm_State **Dst = &data->dasm_state; 9369 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 9370 ir_mem mem = ir_fuse_addr(ctx, def, def); 9371 9372 IR_ASSERT(def_reg != IR_REG_NONE); 9373 if (ir_type_size[type] == 4) { 9374 if (IR_MEM_BASE(mem) == def_reg 9375 && IR_MEM_OFFSET(mem) == 0 9376 && IR_MEM_SCALE(mem) == 1 9377 && IR_MEM_INDEX(mem) != IR_REG_NONE) { 9378 ir_reg reg = IR_MEM_INDEX(mem); 9379 | add Rd(def_reg), Rd(reg) 9380 } else if (IR_MEM_INDEX(mem) == def_reg 9381 && IR_MEM_OFFSET(mem) == 0 9382 && IR_MEM_SCALE(mem) == 1 9383 && IR_MEM_BASE(mem) != IR_REG_NONE) { 9384 ir_reg reg = IR_MEM_BASE(mem); 9385 | add Rd(def_reg), Rd(reg) 9386 } else { 9387 | ASM_TXT_TMEM_OP lea, Rd(def_reg), dword, mem 9388 } 9389 } else { 9390 if (IR_MEM_BASE(mem) == def_reg 9391 && IR_MEM_OFFSET(mem) == 0 9392 && IR_MEM_SCALE(mem) == 1 9393 && IR_MEM_INDEX(mem) != IR_REG_NONE) { 9394 ir_reg reg = IR_MEM_INDEX(mem); 9395 | add Ra(def_reg), Ra(reg) 9396 } else if (IR_MEM_INDEX(mem) == def_reg 9397 && IR_MEM_OFFSET(mem) == 0 9398 && IR_MEM_SCALE(mem) == 1 9399 && IR_MEM_BASE(mem) != IR_REG_NONE) { 9400 ir_reg reg = IR_MEM_BASE(mem); 9401 | add Ra(def_reg), Ra(reg) 9402 } else { 9403 | ASM_TXT_TMEM_OP lea, Ra(def_reg), aword, mem 9404 } 9405 } 9406 if (IR_REG_SPILLED(ctx->regs[def][0])) { 9407 ir_emit_store(ctx, type, def, def_reg); 9408 } 9409} 9410 9411static void ir_emit_tls(ir_ctx *ctx, ir_ref def, ir_insn *insn) 9412{ 9413 ir_backend_data *data = ctx->data; 9414 dasm_State **Dst = &data->dasm_state; 9415 ir_reg reg = IR_REG_NUM(ctx->regs[def][0]); 9416 9417 if (ctx->use_lists[def].count == 1) { 9418 /* dead load */ 9419 return; 9420 } 9421 9422|.if X64WIN 9423| gs 9424| mov Ra(reg), aword [0x58] 9425| mov Ra(reg), aword [Ra(reg)+insn->op2] 9426| mov Ra(reg), aword [Ra(reg)+insn->op3] 9427|.elif WIN 9428| fs 9429| mov Ra(reg), aword [0x2c] 9430| mov Ra(reg), aword [Ra(reg)+insn->op2] 9431| mov Ra(reg), aword [Ra(reg)+insn->op3] 9432|.elif X64APPLE 9433| gs 9434|| if (insn->op3 == IR_NULL) { 9435| mov Ra(reg), aword [insn->op2] 9436|| } else { 9437| mov Ra(reg), aword [insn->op2] 9438| mov Ra(reg), aword [Ra(reg)+insn->op3] 9439|| } 9440|.elif X64 9441| fs 9442|| if (insn->op3 == IR_NULL) { 9443| mov Ra(reg), aword [insn->op2] 9444|| } else { 9445| mov Ra(reg), [0x8] 9446| mov Ra(reg), aword [Ra(reg)+insn->op2] 9447| mov Ra(reg), aword [Ra(reg)+insn->op3] 9448|| } 9449|.else 9450| gs 9451|| if (insn->op3 == IR_NULL) { 9452| mov Ra(reg), aword [insn->op2] 9453|| } else { 9454| mov Ra(reg), [0x4] 9455| mov Ra(reg), aword [Ra(reg)+insn->op2] 9456| mov Ra(reg), aword [Ra(reg)+insn->op3] 9457|| } 9458| .endif 9459 if (IR_REG_SPILLED(ctx->regs[def][0])) { 9460 ir_emit_store(ctx, IR_ADDR, def, reg); 9461 } 9462} 9463 9464static void ir_emit_sse_sqrt(ir_ctx *ctx, ir_ref def, ir_insn *insn) 9465{ 9466 ir_backend_data *data = ctx->data; 9467 dasm_State **Dst = &data->dasm_state; 9468 ir_reg op3_reg = ctx->regs[def][3]; 9469 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 9470 9471 IR_ASSERT(IR_IS_TYPE_FP(insn->type)); 9472 IR_ASSERT(def_reg != IR_REG_NONE && op3_reg != IR_REG_NONE); 9473 9474 if (IR_REG_SPILLED(op3_reg)) { 9475 op3_reg = IR_REG_NUM(op3_reg); 9476 ir_emit_load(ctx, IR_ADDR, op3_reg, insn->op3); 9477 } 9478 9479 | ASM_FP_REG_REG_OP sqrts, insn->type, def_reg, op3_reg 9480 9481 if (IR_REG_SPILLED(ctx->regs[def][0])) { 9482 ir_emit_store(ctx, insn->type, def, def_reg); 9483 } 9484} 9485 9486static void ir_emit_sse_round(ir_ctx *ctx, ir_ref def, ir_insn *insn, int round_op) 9487{ 9488 ir_backend_data *data = ctx->data; 9489 dasm_State **Dst = &data->dasm_state; 9490 ir_reg op3_reg = ctx->regs[def][3]; 9491 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 9492 9493 IR_ASSERT(IR_IS_TYPE_FP(insn->type)); 9494 IR_ASSERT(def_reg != IR_REG_NONE && op3_reg != IR_REG_NONE); 9495 9496 if (IR_REG_SPILLED(op3_reg)) { 9497 op3_reg = IR_REG_NUM(op3_reg); 9498 ir_emit_load(ctx, IR_ADDR, op3_reg, insn->op3); 9499 } 9500 9501 if (ctx->mflags & IR_X86_AVX) { 9502 | ASM_SSE2_REG_REG_REG_TXT_OP vrounds, insn->type, def_reg, def_reg, op3_reg, round_op 9503 } else { 9504 | ASM_SSE2_REG_REG_TXT_OP rounds, insn->type, def_reg, op3_reg, round_op 9505 } 9506 9507 if (IR_REG_SPILLED(ctx->regs[def][0])) { 9508 ir_emit_store(ctx, insn->type, def, def_reg); 9509 } 9510} 9511 9512static void ir_emit_exitcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) 9513{ 9514 ir_backend_data *data = ctx->data; 9515 dasm_State **Dst = &data->dasm_state; 9516 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 9517 9518 IR_ASSERT(def_reg != IR_REG_NONE); 9519 9520 |.if X64 9521 | sub rsp, 16*8+16*8+8 /* CPU regs + SSE regs */ 9522 | mov aword [rsp+0*8], rax 9523 | mov aword [rsp+1*8], rcx 9524 | mov aword [rsp+2*8], rdx 9525 | mov aword [rsp+3*8], rbx 9526 | mov aword [rsp+5*8], rbp 9527 | mov aword [rsp+6*8], rsi 9528 | mov aword [rsp+7*8], rdi 9529 | mov aword [rsp+8*8], r8 9530 | mov aword [rsp+9*8], r9 9531 | mov aword [rsp+10*8], r10 9532 | mov aword [rsp+11*8], r11 9533 | mov aword [rsp+12*8], r12 9534 | mov aword [rsp+13*8], r13 9535 | mov aword [rsp+14*8], r14 9536 | mov aword [rsp+15*8], r15 9537 | movsd qword [rsp+16*8+0*8], xmm0 9538 | movsd qword [rsp+16*8+1*8], xmm1 9539 | movsd qword [rsp+16*8+2*8], xmm2 9540 | movsd qword [rsp+16*8+3*8], xmm3 9541 | movsd qword [rsp+16*8+4*8], xmm4 9542 | movsd qword [rsp+16*8+5*8], xmm5 9543 | movsd qword [rsp+16*8+6*8], xmm6 9544 | movsd qword [rsp+16*8+7*8], xmm7 9545 | movsd qword [rsp+16*8+8*8], xmm8 9546 | movsd qword [rsp+16*8+9*8], xmm9 9547 | movsd qword [rsp+16*8+10*8], xmm10 9548 | movsd qword [rsp+16*8+11*8], xmm11 9549 | movsd qword [rsp+16*8+12*8], xmm12 9550 | movsd qword [rsp+16*8+13*8], xmm13 9551 | movsd qword [rsp+16*8+14*8], xmm14 9552 | movsd qword [rsp+16*8+15*8], xmm15 9553 | 9554 | mov Ra(IR_REG_INT_ARG2), rsp 9555 | lea Ra(IR_REG_INT_ARG1), [rsp+16*8+16*8+16] 9556 | mov aword [rsp+4*8], Ra(IR_REG_INT_ARG1) 9557 | mov Ra(IR_REG_INT_ARG1), [rsp+16*8+16*8+8] 9558 |.if X64WIN 9559 | sub rsp, 32 /* shadow space */ 9560 |.endif 9561 |.else 9562 | sub esp, 8*4+8*8+12 /* CPU regs + SSE regs */ 9563 | mov aword [esp+0*4], eax 9564 | mov aword [esp+1*4], ecx 9565 | mov aword [esp+2*4], edx 9566 | mov aword [esp+3*4], ebx 9567 | mov aword [esp+5*4], ebp 9568 | mov aword [esp+6*4], esi 9569 | mov aword [esp+7*4], edi 9570 | movsd qword [esp+8*4+0*8], xmm0 9571 | movsd qword [esp+8*4+1*8], xmm1 9572 | movsd qword [esp+8*4+2*8], xmm2 9573 | movsd qword [esp+8*4+3*8], xmm3 9574 | movsd qword [esp+8*4+4*8], xmm4 9575 | movsd qword [esp+8*4+5*8], xmm5 9576 | movsd qword [esp+8*4+6*8], xmm6 9577 | movsd qword [esp+8*4+7*8], xmm7 9578 | 9579 | mov Ra(IR_REG_INT_FCARG2), esp 9580 | lea Ra(IR_REG_INT_FCARG1), [esp+8*4+8*8+16] 9581 | mov aword [esp+4*4], Ra(IR_REG_INT_FCARG1) 9582 | mov Ra(IR_REG_INT_FCARG1), [esp+8*4+8*8+12] 9583 |.endif 9584 9585 if (IR_IS_CONST_REF(insn->op2)) { 9586 void *addr = ir_call_addr(ctx, insn, &ctx->ir_base[insn->op2]); 9587 9588 if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(ctx->code_buffer, addr)) { 9589 | call aword &addr 9590 } else { 9591|.if X64 9592 if (IR_IS_SIGNED_32BIT(addr)) { 9593 | mov rax, ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 <imm-32-bit> 9594 } else { 9595 | mov64 rax, ((ptrdiff_t)addr) // 0x48 0xb8 <imm-64-bit> 9596 } 9597 | call rax 9598|.endif 9599 } 9600 } else { 9601 IR_ASSERT(0); 9602 } 9603 9604 // restore SP 9605 |.if X64WIN 9606 | add rsp, 32+16*8+16*8+16 /* shadow space + CPU regs + SSE regs */ 9607 |.elif X64 9608 | add rsp, 16*8+16*8+16 /* CPU regs + SSE regs */ 9609 |.else 9610 | add esp, 8*4+8*8+16 /* CPU regs + SSE regs */ 9611 |.endif 9612 9613 if (def_reg != IR_REG_INT_RET1) { 9614 ir_emit_mov(ctx, insn->type, def_reg, IR_REG_INT_RET1); 9615 } 9616 if (IR_REG_SPILLED(ctx->regs[def][0])) { 9617 ir_emit_store(ctx, insn->type, def, def_reg); 9618 } 9619} 9620 9621static void ir_emit_param_move(ir_ctx *ctx, uint8_t type, ir_reg from_reg, ir_reg to_reg, ir_ref to, int32_t offset) 9622{ 9623 ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 9624 9625 IR_ASSERT(from_reg != IR_REG_NONE || to_reg != IR_REG_NONE); 9626 9627 if (IR_IS_TYPE_INT(type)) { 9628 if (from_reg != IR_REG_NONE) { 9629 if (to_reg != IR_REG_NONE) { 9630 ir_emit_mov(ctx, type, to_reg, from_reg); 9631 } else { 9632 ir_emit_store(ctx, type, to, from_reg); 9633 } 9634 } else { 9635 ir_emit_load_mem_int(ctx, type, to_reg, IR_MEM_BO(fp, offset)); 9636 } 9637 } else { 9638 if (from_reg != IR_REG_NONE) { 9639 if (to_reg != IR_REG_NONE) { 9640 ir_emit_fp_mov(ctx, type, to_reg, from_reg); 9641 } else { 9642 ir_emit_store(ctx, type, to, from_reg); 9643 } 9644 } else { 9645 ir_emit_load_mem_fp(ctx, type, to_reg, IR_MEM_BO(fp, offset)); 9646 } 9647 } 9648} 9649 9650static void ir_emit_load_params(ir_ctx *ctx) 9651{ 9652 ir_use_list *use_list = &ctx->use_lists[1]; 9653 ir_insn *insn; 9654 ir_ref i, n, *p, use; 9655 int int_param_num = 0; 9656 int fp_param_num = 0; 9657 ir_reg src_reg; 9658 ir_reg dst_reg; 9659 // TODO: Calling convention specific 9660 int int_reg_params_count = IR_REG_INT_ARGS; 9661 int fp_reg_params_count = IR_REG_FP_ARGS; 9662 const int8_t *int_reg_params = _ir_int_reg_params; 9663 const int8_t *fp_reg_params = _ir_fp_reg_params; 9664 int32_t stack_offset = 0; 9665 9666#ifdef IR_TARGET_X86 9667 if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC)) { 9668 int_reg_params_count = IR_REG_INT_FCARGS; 9669 fp_reg_params_count = IR_REG_FP_FCARGS; 9670 int_reg_params = _ir_int_fc_reg_params; 9671 fp_reg_params = _ir_fp_fc_reg_params; 9672 } 9673#endif 9674 9675 if (ctx->flags & IR_USE_FRAME_POINTER) { 9676 stack_offset = sizeof(void*) * 2; /* skip old frame pointer and return address */ 9677 } else { 9678 stack_offset = sizeof(void*) + ctx->stack_frame_size + ctx->call_stack_size; /* skip return address */ 9679 } 9680 n = use_list->count; 9681 for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { 9682 use = *p; 9683 insn = &ctx->ir_base[use]; 9684 if (insn->op == IR_PARAM) { 9685 if (IR_IS_TYPE_INT(insn->type)) { 9686 if (int_param_num < int_reg_params_count) { 9687 src_reg = int_reg_params[int_param_num]; 9688 } else { 9689 src_reg = IR_REG_NONE; 9690 } 9691 int_param_num++; 9692#ifdef _WIN64 9693 /* WIN64 calling convention use common couter for int and fp registers */ 9694 fp_param_num++; 9695#endif 9696 } else { 9697 if (fp_param_num < fp_reg_params_count) { 9698 src_reg = fp_reg_params[fp_param_num]; 9699 } else { 9700 src_reg = IR_REG_NONE; 9701 } 9702 fp_param_num++; 9703#ifdef _WIN64 9704 /* WIN64 calling convention use common couter for int and fp registers */ 9705 int_param_num++; 9706#endif 9707 } 9708 if (ctx->vregs[use]) { 9709 dst_reg = IR_REG_NUM(ctx->regs[use][0]); 9710 IR_ASSERT(src_reg != IR_REG_NONE || dst_reg != IR_REG_NONE || 9711 stack_offset == ctx->live_intervals[ctx->vregs[use]]->stack_spill_pos + 9712 ((ctx->flags & IR_USE_FRAME_POINTER) ? 9713 -(ctx->stack_frame_size - ctx->stack_frame_alignment) : 9714 ctx->call_stack_size)); 9715 if (src_reg != dst_reg) { 9716 ir_emit_param_move(ctx, insn->type, src_reg, dst_reg, use, stack_offset); 9717 } 9718 if (dst_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[use][0])) { 9719 ir_emit_store(ctx, insn->type, use, dst_reg); 9720 } 9721 } 9722 if (src_reg == IR_REG_NONE) { 9723 if (sizeof(void*) == 8) { 9724 stack_offset += sizeof(void*); 9725 } else { 9726 stack_offset += IR_MAX(sizeof(void*), ir_type_size[insn->type]); 9727 } 9728 } 9729 } 9730 } 9731} 9732 9733static ir_reg ir_get_free_reg(ir_type type, ir_regset available) 9734{ 9735 if (IR_IS_TYPE_INT(type)) { 9736 available = IR_REGSET_INTERSECTION(available, IR_REGSET_GP); 9737 } else { 9738 IR_ASSERT(IR_IS_TYPE_FP(type)); 9739 available = IR_REGSET_INTERSECTION(available, IR_REGSET_FP); 9740 } 9741 IR_ASSERT(!IR_REGSET_IS_EMPTY(available)); 9742 return IR_REGSET_FIRST(available); 9743} 9744 9745static int ir_fix_dessa_tmps(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to) 9746{ 9747 ir_backend_data *data = ctx->data; 9748 ir_ref ref = ctx->cfg_blocks[data->dessa_from_block].end; 9749 9750 if (to == 0) { 9751 if (IR_IS_TYPE_INT(type)) { 9752 if (ctx->regs[ref][0] == IR_REG_NONE) { 9753 ctx->regs[ref][0] = IR_REG_RAX; 9754 } 9755 } else { 9756 IR_ASSERT(IR_IS_TYPE_FP(type)); 9757 if (ctx->regs[ref][1] == IR_REG_NONE) { 9758 ctx->regs[ref][1] = IR_REG_XMM0; 9759 } 9760 } 9761 } else if (from != 0) { 9762 if (IR_IS_TYPE_INT(type)) { 9763 if (ctx->regs[ref][0] == IR_REG_NONE) { 9764 ctx->regs[ref][0] = IR_REG_RAX; 9765 } 9766 } else { 9767 IR_ASSERT(IR_IS_TYPE_FP(type)); 9768 if (ctx->regs[ref][1] == IR_REG_NONE) { 9769 ctx->regs[ref][1] = IR_REG_XMM0; 9770 } 9771 } 9772 } 9773 return 1; 9774} 9775 9776static void ir_fix_param_spills(ir_ctx *ctx) 9777{ 9778 ir_use_list *use_list = &ctx->use_lists[1]; 9779 ir_insn *insn; 9780 ir_ref i, n, *p, use; 9781 int int_param_num = 0; 9782 int fp_param_num = 0; 9783 ir_reg src_reg; 9784 // TODO: Calling convention specific 9785 int int_reg_params_count = IR_REG_INT_ARGS; 9786 int fp_reg_params_count = IR_REG_FP_ARGS; 9787 const int8_t *int_reg_params = _ir_int_reg_params; 9788 const int8_t *fp_reg_params = _ir_fp_reg_params; 9789 int32_t stack_start = 0; 9790 int32_t stack_offset = 0; 9791 9792#ifdef IR_TARGET_X86 9793 if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC)) { 9794 int_reg_params_count = IR_REG_INT_FCARGS; 9795 fp_reg_params_count = IR_REG_FP_FCARGS; 9796 int_reg_params = _ir_int_fc_reg_params; 9797 fp_reg_params = _ir_fp_fc_reg_params; 9798 } 9799#endif 9800 9801 if (ctx->flags & IR_USE_FRAME_POINTER) { 9802 /* skip old frame pointer and return address */ 9803 stack_start = sizeof(void*) * 2 + (ctx->stack_frame_size - ctx->stack_frame_alignment); 9804 } else { 9805 /* skip return address */ 9806 stack_start = sizeof(void*) + ctx->stack_frame_size; 9807 } 9808 n = use_list->count; 9809 for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { 9810 use = *p; 9811 insn = &ctx->ir_base[use]; 9812 if (insn->op == IR_PARAM) { 9813 if (IR_IS_TYPE_INT(insn->type)) { 9814 if (int_param_num < int_reg_params_count) { 9815 src_reg = int_reg_params[int_param_num]; 9816 } else { 9817 src_reg = IR_REG_NONE; 9818 } 9819 int_param_num++; 9820#ifdef _WIN64 9821 /* WIN64 calling convention use common couter for int and fp registers */ 9822 fp_param_num++; 9823#endif 9824 } else { 9825 if (fp_param_num < fp_reg_params_count) { 9826 src_reg = fp_reg_params[fp_param_num]; 9827 } else { 9828 src_reg = IR_REG_NONE; 9829 } 9830 fp_param_num++; 9831#ifdef _WIN64 9832 /* WIN64 calling convention use common couter for int and fp registers */ 9833 int_param_num++; 9834#endif 9835 } 9836 if (src_reg == IR_REG_NONE) { 9837 if (ctx->vregs[use]) { 9838 ir_live_interval *ival = ctx->live_intervals[ctx->vregs[use]]; 9839 if ((ival->flags & IR_LIVE_INTERVAL_MEM_PARAM) 9840 && ival->stack_spill_pos == -1 9841 && (ival->next || ival->reg == IR_REG_NONE)) { 9842 ival->stack_spill_pos = stack_start + stack_offset; 9843 } 9844 } 9845 if (sizeof(void*) == 8) { 9846 stack_offset += sizeof(void*); 9847 } else { 9848 stack_offset += IR_MAX(sizeof(void*), ir_type_size[insn->type]); 9849 } 9850 } 9851 } 9852 } 9853 9854#ifdef _WIN64 9855 /* WIN64 uses shsow area for registers */ 9856 stack_offset += IR_MIN(int_param_num, int_reg_params_count) * sizeof(void*); 9857#endif 9858 ctx->gp_reg_params = IR_MIN(int_param_num, int_reg_params_count); 9859 ctx->fp_reg_params = IR_MIN(fp_param_num, fp_reg_params_count); 9860 ctx->param_stack_size = stack_offset; 9861} 9862 9863static void ir_allocate_unique_spill_slots(ir_ctx *ctx) 9864{ 9865 uint32_t b; 9866 ir_block *bb; 9867 ir_insn *insn; 9868 ir_ref i, n, j, *p; 9869 uint32_t *rule, insn_flags; 9870 ir_backend_data *data = ctx->data; 9871 ir_regset available = 0; 9872 ir_target_constraints constraints; 9873 uint32_t def_flags; 9874 ir_reg reg; 9875 9876#ifndef IR_REG_FP_RET1 9877 if (ctx->flags2 & IR_HAS_FP_RET_SLOT) { 9878 ctx->ret_slot = ir_allocate_spill_slot(ctx, IR_DOUBLE, &data->ra_data); 9879 } else if (ctx->ret_type == IR_FLOAT || ctx->ret_type == IR_DOUBLE) { 9880 ctx->ret_slot = ir_allocate_spill_slot(ctx, ctx->ret_type, &data->ra_data); 9881 } else { 9882 ctx->ret_slot = -1; 9883 } 9884#endif 9885 9886 ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count); 9887 memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count); 9888 9889 /* vregs + tmp + fixed + SRATCH + ALL */ 9890 ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 2, sizeof(ir_live_interval*)); 9891 9892 if (!ctx->arena) { 9893 ctx->arena = ir_arena_create(16 * 1024); 9894 } 9895 9896 for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { 9897 IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); 9898 for (i = bb->start, insn = ctx->ir_base + i, rule = ctx->rules + i; i <= bb->end;) { 9899 switch (ctx->rules ? *rule : insn->op) { 9900 case IR_START: 9901 case IR_BEGIN: 9902 case IR_END: 9903 case IR_IF_TRUE: 9904 case IR_IF_FALSE: 9905 case IR_CASE_VAL: 9906 case IR_CASE_DEFAULT: 9907 case IR_MERGE: 9908 case IR_LOOP_BEGIN: 9909 case IR_LOOP_END: 9910 break; 9911#ifndef IR_REG_FP_RET1 9912 case IR_CALL: 9913 if (ctx->ret_slot == -1 && (insn->type == IR_FLOAT || insn->type == IR_DOUBLE)) { 9914 ctx->ret_slot = ir_allocate_spill_slot(ctx, IR_DOUBLE, &data->ra_data); 9915 } 9916#endif 9917 IR_FALLTHROUGH; 9918 default: 9919 def_flags = ir_get_target_constraints(ctx, i, &constraints); 9920 if (ctx->rules 9921 && *rule != IR_CMP_AND_BRANCH_INT 9922 && *rule != IR_CMP_AND_BRANCH_FP 9923 && *rule != IR_TEST_AND_BRANCH_INT 9924 && *rule != IR_GUARD_CMP_INT 9925 && *rule != IR_GUARD_CMP_FP) { 9926 available = IR_REGSET_SCRATCH; 9927 } 9928 if (ctx->vregs[i]) { 9929 reg = constraints.def_reg; 9930 if (reg != IR_REG_NONE && IR_REGSET_IN(available, reg)) { 9931 IR_REGSET_EXCL(available, reg); 9932 ctx->regs[i][0] = reg | IR_REG_SPILL_STORE; 9933 } else if (def_flags & IR_USE_MUST_BE_IN_REG) { 9934 if (insn->op == IR_VLOAD 9935 && ctx->live_intervals[ctx->vregs[i]] 9936 && ctx->live_intervals[ctx->vregs[i]]->stack_spill_pos != -1) { 9937 /* pass */ 9938 } else if (insn->op != IR_PARAM) { 9939 reg = ir_get_free_reg(insn->type, available); 9940 IR_REGSET_EXCL(available, reg); 9941 ctx->regs[i][0] = reg | IR_REG_SPILL_STORE; 9942 } 9943 } 9944 if (!ctx->live_intervals[ctx->vregs[i]]) { 9945 ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); 9946 memset(ival, 0, sizeof(ir_live_interval)); 9947 ctx->live_intervals[ctx->vregs[i]] = ival; 9948 ival->type = insn->type; 9949 ival->reg = IR_REG_NONE; 9950 ival->vreg = ctx->vregs[i]; 9951 ival->stack_spill_pos = -1; 9952 if (insn->op == IR_PARAM && reg == IR_REG_NONE) { 9953 ival->flags |= IR_LIVE_INTERVAL_MEM_PARAM; 9954 } else { 9955 ival->stack_spill_pos = ir_allocate_spill_slot(ctx, ival->type, &data->ra_data); 9956 } 9957 } else if (insn->op == IR_PARAM) { 9958 IR_ASSERT(0 && "unexpected PARAM"); 9959 return; 9960 } 9961 } else if (insn->op == IR_VAR) { 9962 ir_use_list *use_list = &ctx->use_lists[i]; 9963 ir_ref n = use_list->count; 9964 9965 if (n > 0) { 9966 int32_t stack_spill_pos = insn->op3 = ir_allocate_spill_slot(ctx, insn->type, &data->ra_data); 9967 ir_ref i, *p, use; 9968 ir_insn *use_insn; 9969 9970 for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { 9971 use = *p; 9972 use_insn = &ctx->ir_base[use]; 9973 if (use_insn->op == IR_VLOAD) { 9974 if (ctx->vregs[use] 9975 && !ctx->live_intervals[ctx->vregs[use]]) { 9976 ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); 9977 memset(ival, 0, sizeof(ir_live_interval)); 9978 ctx->live_intervals[ctx->vregs[use]] = ival; 9979 ival->type = insn->type; 9980 ival->reg = IR_REG_NONE; 9981 ival->vreg = ctx->vregs[use]; 9982 ival->stack_spill_pos = stack_spill_pos; 9983 } 9984 } else if (use_insn->op == IR_VSTORE) { 9985 if (!IR_IS_CONST_REF(use_insn->op3) 9986 && ctx->vregs[use_insn->op3] 9987 && !ctx->live_intervals[ctx->vregs[use_insn->op3]]) { 9988 ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); 9989 memset(ival, 0, sizeof(ir_live_interval)); 9990 ctx->live_intervals[ctx->vregs[use_insn->op3]] = ival; 9991 ival->type = insn->type; 9992 ival->reg = IR_REG_NONE; 9993 ival->vreg = ctx->vregs[use_insn->op3]; 9994 ival->stack_spill_pos = stack_spill_pos; 9995 } 9996 } 9997 } 9998 } 9999 } 10000 10001 insn_flags = ir_op_flags[insn->op]; 10002 n = constraints.tmps_count; 10003 if (n) { 10004 do { 10005 n--; 10006 if (constraints.tmp_regs[n].type) { 10007 ir_reg reg = ir_get_free_reg(constraints.tmp_regs[n].type, available); 10008 ir_ref *ops = insn->ops; 10009 IR_REGSET_EXCL(available, reg); 10010 if (constraints.tmp_regs[n].num > 0 10011 && IR_IS_CONST_REF(ops[constraints.tmp_regs[n].num])) { 10012 /* rematerialization */ 10013 reg |= IR_REG_SPILL_LOAD; 10014 } 10015 ctx->regs[i][constraints.tmp_regs[n].num] = reg; 10016 } else if (constraints.tmp_regs[n].reg == IR_REG_SCRATCH) { 10017 available = IR_REGSET_DIFFERENCE(available, IR_REGSET_SCRATCH); 10018 } else { 10019 IR_REGSET_EXCL(available, constraints.tmp_regs[n].reg); 10020 } 10021 } while (n); 10022 } 10023 n = insn->inputs_count; 10024 for (j = 1, p = insn->ops + 1; j <= n; j++, p++) { 10025 ir_ref input = *p; 10026 if (IR_OPND_KIND(insn_flags, j) == IR_OPND_DATA && input > 0 && ctx->vregs[input]) { 10027 if ((def_flags & IR_DEF_REUSES_OP1_REG) && j == 1) { 10028 ir_reg reg = IR_REG_NUM(ctx->regs[i][0]); 10029 ctx->regs[i][1] = reg | IR_REG_SPILL_LOAD; 10030 } else { 10031 uint8_t use_flags = IR_USE_FLAGS(def_flags, j); 10032 ir_reg reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE; 10033 10034 if (reg != IR_REG_NONE && IR_REGSET_IN(available, reg)) { 10035 IR_REGSET_EXCL(available, reg); 10036 ctx->regs[i][j] = reg | IR_REG_SPILL_LOAD; 10037 } else if (j > 1 && input == insn->op1 && ctx->regs[i][1] != IR_REG_NONE) { 10038 ctx->regs[i][j] = ctx->regs[i][1]; 10039 } else if (use_flags & IR_USE_MUST_BE_IN_REG) { 10040 reg = ir_get_free_reg(ctx->ir_base[input].type, available); 10041 IR_REGSET_EXCL(available, reg); 10042 ctx->regs[i][j] = reg | IR_REG_SPILL_LOAD; 10043 } 10044 } 10045 } 10046 } 10047 break; 10048 } 10049 n = ir_insn_len(insn); 10050 i += n; 10051 insn += n; 10052 rule += n; 10053 } 10054 if (bb->flags & IR_BB_DESSA_MOVES) { 10055 data->dessa_from_block = b; 10056 ir_gen_dessa_moves(ctx, b, ir_fix_dessa_tmps); 10057 } 10058 } 10059 10060 ctx->used_preserved_regs = ctx->fixed_save_regset; 10061 ctx->flags |= IR_NO_STACK_COMBINE; 10062 ir_fix_stack_frame(ctx); 10063} 10064 10065static void ir_preallocate_call_stack(ir_ctx *ctx) 10066{ 10067 int call_stack_size, peak_call_stack_size = 0; 10068 ir_ref i, n; 10069 ir_insn *insn; 10070 10071 for (i = 1, insn = ctx->ir_base + 1; i < ctx->insns_count;) { 10072 if (insn->op == IR_CALL) { 10073 call_stack_size = ir_call_used_stack(ctx, insn); 10074 if (call_stack_size > peak_call_stack_size 10075#ifdef IR_HAVE_FASTCALL 10076 && !ir_is_fastcall(ctx, insn) /* fast call functions restore stack pointer */ 10077#endif 10078 ) { 10079 peak_call_stack_size = call_stack_size; 10080 } 10081 } 10082 n = ir_insn_len(insn); 10083 i += n; 10084 insn += n; 10085 } 10086 if (peak_call_stack_size) { 10087 ctx->call_stack_size = peak_call_stack_size; 10088 ctx->flags |= IR_PREALLOCATED_STACK; 10089 } 10090} 10091 10092void ir_fix_stack_frame(ir_ctx *ctx) 10093{ 10094 uint32_t additional_size = 0; 10095 10096 ctx->locals_area_size = ctx->stack_frame_size; 10097 10098#if defined(IR_TARGET_X64) && !defined(_WIN64) 10099 if ((ctx->flags & IR_VARARG_FUNC) && (ctx->flags2 & IR_HAS_VA_START)) { 10100 ctx->flags2 |= IR_16B_FRAME_ALIGNMENT; 10101 ctx->stack_frame_size = IR_ALIGNED_SIZE(ctx->stack_frame_size, 16); 10102 ctx->locals_area_size = ctx->stack_frame_size; 10103 if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) { 10104 additional_size += sizeof(void*) * IR_REG_INT_ARGS; 10105 } 10106 if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) { 10107 additional_size += 16 * IR_REG_FP_ARGS; 10108 } 10109 } 10110#endif 10111 10112 if (ctx->used_preserved_regs) { 10113 ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs; 10114 ir_reg reg; 10115 (void) reg; 10116 10117 IR_REGSET_FOREACH(used_preserved_regs, reg) { 10118 additional_size += sizeof(void*); 10119 } IR_REGSET_FOREACH_END(); 10120 } 10121 10122 ctx->stack_frame_size = IR_ALIGNED_SIZE(ctx->stack_frame_size, sizeof(void*)); 10123 ctx->stack_frame_size += additional_size; 10124 ctx->stack_frame_alignment = 0; 10125 ctx->call_stack_size = 0; 10126 10127 if (ctx->flags2 & IR_16B_FRAME_ALIGNMENT) { 10128 /* Stack must be 16 byte aligned */ 10129 if (!(ctx->flags & IR_FUNCTION)) { 10130 while (IR_ALIGNED_SIZE(ctx->stack_frame_size, 16) != ctx->stack_frame_size) { 10131 ctx->stack_frame_size += sizeof(void*); 10132 ctx->stack_frame_alignment += sizeof(void*); 10133 } 10134 } else if (ctx->flags & IR_USE_FRAME_POINTER) { 10135 while (IR_ALIGNED_SIZE(ctx->stack_frame_size + sizeof(void*) * 2, 16) != ctx->stack_frame_size + sizeof(void*) * 2) { 10136 ctx->stack_frame_size += sizeof(void*); 10137 ctx->stack_frame_alignment += sizeof(void*); 10138 } 10139 } else { 10140 if (!(ctx->flags & IR_NO_STACK_COMBINE)) { 10141 ir_preallocate_call_stack(ctx); 10142 } 10143 while (IR_ALIGNED_SIZE(ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*), 16) != 10144 ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*)) { 10145 ctx->stack_frame_size += sizeof(void*); 10146 ctx->stack_frame_alignment += sizeof(void*); 10147 } 10148 } 10149 } 10150 10151 ir_fix_param_spills(ctx); 10152} 10153 10154static void* dasm_labels[ir_lb_MAX]; 10155 10156static uint32_t _ir_next_block(ir_ctx *ctx, uint32_t _b) 10157{ 10158 uint32_t b = ctx->cfg_schedule[++_b]; 10159 10160 /* Check for empty ENTRY block */ 10161 while (b && ((ctx->cfg_blocks[b].flags & (IR_BB_START|IR_BB_EMPTY)) == IR_BB_EMPTY)) { 10162 b = ctx->cfg_schedule[++_b]; 10163 } 10164 return b; 10165} 10166 10167void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) 10168{ 10169 uint32_t _b, b, n, target; 10170 ir_block *bb; 10171 ir_ref i; 10172 ir_insn *insn; 10173 uint32_t *rule; 10174 ir_backend_data data; 10175 dasm_State **Dst; 10176 int ret; 10177 void *entry; 10178 size_t size; 10179 10180 data.ra_data.unused_slot_4 = 0; 10181 data.ra_data.unused_slot_2 = 0; 10182 data.ra_data.unused_slot_1 = 0; 10183 data.ra_data.handled = NULL; 10184 data.rodata_label = 0; 10185 data.jmp_table_label = 0; 10186 data.double_neg_const = 0; 10187 data.float_neg_const = 0; 10188 data.double_abs_const = 0; 10189 data.float_abs_const = 0; 10190 data.double_zero_const = 0; 10191 ctx->data = &data; 10192 10193 if (!ctx->live_intervals) { 10194 ctx->stack_frame_size = 0; 10195 ctx->stack_frame_alignment = 0; 10196 ctx->call_stack_size = 0; 10197 ctx->used_preserved_regs = 0; 10198 ir_allocate_unique_spill_slots(ctx); 10199 } 10200 10201 if (ctx->fixed_stack_frame_size != -1) { 10202 if (ctx->fixed_stack_red_zone) { 10203 IR_ASSERT(ctx->fixed_stack_red_zone == ctx->fixed_stack_frame_size + ctx->fixed_call_stack_size); 10204 } 10205 if (ctx->stack_frame_size > ctx->fixed_stack_frame_size) { 10206 // TODO: report error to caller 10207#ifdef IR_DEBUG_MESSAGES 10208 fprintf(stderr, "IR Compilation Aborted: ctx->stack_frame_size > ctx->fixed_stack_frame_size at %s:%d\n", 10209 __FILE__, __LINE__); 10210#endif 10211 ctx->data = NULL; 10212 ctx->status = IR_ERROR_FIXED_STACK_FRAME_OVERFLOW; 10213 return NULL; 10214 } 10215 ctx->stack_frame_size = ctx->fixed_stack_frame_size; 10216 ctx->call_stack_size = ctx->fixed_call_stack_size; 10217 ctx->stack_frame_alignment = 0; 10218 } 10219 10220 Dst = &data.dasm_state; 10221 data.dasm_state = NULL; 10222 dasm_init(&data.dasm_state, DASM_MAXSECTION); 10223 dasm_setupglobal(&data.dasm_state, dasm_labels, ir_lb_MAX); 10224 dasm_setup(&data.dasm_state, dasm_actions); 10225 /* labels for each block + for each constant + rodata label + jmp_table label + for each entry */ 10226 dasm_growpc(&data.dasm_state, ctx->cfg_blocks_count + 1 + ctx->consts_count + 1 + 1 + 1 + ctx->entries_count); 10227 data.emit_constants = ir_bitset_malloc(ctx->consts_count); 10228 10229 if ((ctx->flags & IR_GEN_ENDBR) && (ctx->flags & IR_START_BR_TARGET)) { 10230 |.if X64 10231 | endbr64 10232 |.else 10233 | endbr32 10234 |.endif 10235 } 10236 10237 if (!(ctx->flags & IR_SKIP_PROLOGUE)) { 10238 ir_emit_prologue(ctx); 10239 } 10240 if (ctx->flags & IR_FUNCTION) { 10241 ir_emit_load_params(ctx); 10242 } 10243 10244 if (UNEXPECTED(!ctx->cfg_schedule)) { 10245 uint32_t *list = ctx->cfg_schedule = ir_mem_malloc(sizeof(uint32_t) * (ctx->cfg_blocks_count + 2)); 10246 for (b = 0; b <= ctx->cfg_blocks_count; b++) { 10247 list[b] = b; 10248 } 10249 list[ctx->cfg_blocks_count + 1] = 0; 10250 } 10251 10252 for (_b = 1; _b <= ctx->cfg_blocks_count; _b++) { 10253 b = ctx->cfg_schedule[_b]; 10254 bb = &ctx->cfg_blocks[b]; 10255 IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); 10256 if ((bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) == IR_BB_EMPTY) { 10257 continue; 10258 } 10259 if (bb->flags & IR_BB_ALIGN_LOOP) { 10260 | .align IR_LOOP_ALIGNMENT 10261 } 10262 |=>b: 10263 10264 i = bb->start; 10265 insn = ctx->ir_base + i; 10266 if (bb->flags & IR_BB_ENTRY) { 10267 uint32_t label = ctx->cfg_blocks_count + ctx->consts_count + 4 + insn->op3; 10268 10269 |=>label: 10270 if ((ctx->flags & IR_GEN_ENDBR) && (ctx->flags & IR_ENTRY_BR_TARGET)) { 10271 |.if X64 10272 | endbr64 10273 |.else 10274 | endbr32 10275 |.endif 10276 } 10277 ir_emit_prologue(ctx); 10278 ctx->entries[insn->op3] = i; 10279 } 10280 10281 /* skip first instruction */ 10282 n = ir_insn_len(insn); 10283 i += n; 10284 insn += n; 10285 rule = ctx->rules + i; 10286 10287 while (i <= bb->end) { 10288 if (!((*rule) & (IR_FUSED|IR_SKIPPED))) 10289 switch ((*rule) & IR_RULE_MASK) { 10290 case IR_VAR: 10291 case IR_PARAM: 10292 case IR_PI: 10293 case IR_PHI: 10294 case IR_SNAPSHOT: 10295 case IR_VA_END: 10296 break; 10297 case IR_LEA_OB: 10298 case IR_LEA_SI: 10299 case IR_LEA_SIB: 10300 case IR_LEA_IB: 10301 case IR_LEA_OB_I: 10302 case IR_LEA_I_OB: 10303 case IR_LEA_SI_O: 10304 case IR_LEA_SIB_O: 10305 case IR_LEA_IB_O: 10306 case IR_LEA_OB_SI: 10307 case IR_LEA_SI_OB: 10308 case IR_LEA_B_SI: 10309 case IR_LEA_SI_B: 10310 ir_emit_lea(ctx, i, insn->type); 10311 break; 10312 case IR_MUL_PWR2: 10313 case IR_DIV_PWR2: 10314 case IR_MOD_PWR2: 10315 ir_emit_mul_div_mod_pwr2(ctx, i, insn); 10316 break; 10317 case IR_SDIV_PWR2: 10318 ir_emit_sdiv_pwr2(ctx, i, insn); 10319 break; 10320 case IR_SMOD_PWR2: 10321 ir_emit_smod_pwr2(ctx, i, insn); 10322 break; 10323 case IR_SHIFT: 10324 ir_emit_shift(ctx, i, insn); 10325 break; 10326 case IR_SHIFT_CONST: 10327 ir_emit_shift_const(ctx, i, insn); 10328 break; 10329 case IR_BIT_COUNT: 10330 ir_emit_bit_count(ctx, i, insn); 10331 break; 10332 case IR_CTPOP: 10333 ir_emit_ctpop(ctx, i, insn); 10334 break; 10335 case IR_INC: 10336 case IR_DEC: 10337 case IR_OP_INT: 10338 ir_emit_op_int(ctx, i, insn, *rule); 10339 break; 10340 case IR_ABS_INT: 10341 ir_emit_abs_int(ctx, i, insn); 10342 break; 10343 case IR_BOOL_NOT_INT: 10344 ir_emit_bool_not_int(ctx, i, insn); 10345 break; 10346 case IR_OP_FP: 10347 ir_emit_op_fp(ctx, i, insn); 10348 break; 10349 case IR_IMUL3: 10350 ir_emit_imul3(ctx, i, insn); 10351 break; 10352 case IR_BINOP_INT: 10353 ir_emit_binop_int(ctx, i, insn); 10354 break; 10355 case IR_BINOP_SSE2: 10356 ir_emit_binop_sse2(ctx, i, insn); 10357 break; 10358 case IR_BINOP_AVX: 10359 ir_emit_binop_avx(ctx, i, insn); 10360 break; 10361 case IR_MUL_INT: 10362 case IR_DIV_INT: 10363 case IR_MOD_INT: 10364 ir_emit_mul_div_mod(ctx, i, insn); 10365 break; 10366 case IR_CMP_INT: 10367 ir_emit_cmp_int(ctx, i, insn); 10368 break; 10369 case IR_TESTCC_INT: 10370 ir_emit_testcc_int(ctx, i, insn); 10371 break; 10372 case IR_SETCC_INT: 10373 ir_emit_setcc_int(ctx, i, insn); 10374 break; 10375 case IR_CMP_FP: 10376 ir_emit_cmp_fp(ctx, i, insn); 10377 break; 10378 case IR_SEXT: 10379 ir_emit_sext(ctx, i, insn); 10380 break; 10381 case IR_ZEXT: 10382 ir_emit_zext(ctx, i, insn); 10383 break; 10384 case IR_TRUNC: 10385 ir_emit_trunc(ctx, i, insn); 10386 break; 10387 case IR_BITCAST: 10388 case IR_PROTO: 10389 ir_emit_bitcast(ctx, i, insn); 10390 break; 10391 case IR_INT2FP: 10392 ir_emit_int2fp(ctx, i, insn); 10393 break; 10394 case IR_FP2INT: 10395 ir_emit_fp2int(ctx, i, insn); 10396 break; 10397 case IR_FP2FP: 10398 ir_emit_fp2fp(ctx, i, insn); 10399 break; 10400 case IR_COPY_INT: 10401 ir_emit_copy_int(ctx, i, insn); 10402 break; 10403 case IR_COPY_FP: 10404 ir_emit_copy_fp(ctx, i, insn); 10405 break; 10406 case IR_CMP_AND_STORE_INT: 10407 ir_emit_cmp_and_store_int(ctx, i, insn); 10408 break; 10409 case IR_CMP_AND_BRANCH_INT: 10410 ir_emit_cmp_and_branch_int(ctx, b, i, insn, _ir_next_block(ctx, _b)); 10411 break; 10412 case IR_CMP_AND_BRANCH_FP: 10413 ir_emit_cmp_and_branch_fp(ctx, b, i, insn, _ir_next_block(ctx, _b)); 10414 break; 10415 case IR_TEST_AND_BRANCH_INT: 10416 ir_emit_test_and_branch_int(ctx, b, i, insn, _ir_next_block(ctx, _b)); 10417 break; 10418 case IR_JCC_INT: 10419 { 10420 ir_op op = ctx->ir_base[insn->op2].op; 10421 10422 if (op == IR_ADD || 10423 op == IR_SUB || 10424// op == IR_MUL || 10425 op == IR_OR || 10426 op == IR_AND || 10427 op == IR_XOR) { 10428 op = IR_NE; 10429 } else { 10430 IR_ASSERT(op >= IR_EQ && op <= IR_UGT); 10431 } 10432 ir_emit_jcc(ctx, b, i, insn, _ir_next_block(ctx, _b), op, 1); 10433 } 10434 break; 10435 case IR_GUARD_CMP_INT: 10436 if (ir_emit_guard_cmp_int(ctx, b, i, insn, _ir_next_block(ctx, _b))) { 10437 goto next_block; 10438 } 10439 break; 10440 case IR_GUARD_CMP_FP: 10441 if (ir_emit_guard_cmp_fp(ctx, b, i, insn, _ir_next_block(ctx, _b))) { 10442 goto next_block; 10443 } 10444 break; 10445 case IR_GUARD_TEST_INT: 10446 if (ir_emit_guard_test_int(ctx, b, i, insn, _ir_next_block(ctx, _b))) { 10447 goto next_block; 10448 } 10449 break; 10450 case IR_GUARD_JCC_INT: 10451 if (ir_emit_guard_jcc_int(ctx, b, i, insn, _ir_next_block(ctx, _b))) { 10452 goto next_block; 10453 } 10454 break; 10455 case IR_IF_INT: 10456 ir_emit_if_int(ctx, b, i, insn, _ir_next_block(ctx, _b)); 10457 break; 10458 case IR_COND: 10459 ir_emit_cond(ctx, i, insn); 10460 break; 10461 case IR_COND_CMP_INT: 10462 ir_emit_cond_cmp_int(ctx, i, insn); 10463 break; 10464 case IR_COND_CMP_FP: 10465 ir_emit_cond_cmp_fp(ctx, i, insn); 10466 break; 10467 case IR_SWITCH: 10468 ir_emit_switch(ctx, b, i, insn); 10469 break; 10470 case IR_MIN_MAX_INT: 10471 ir_emit_min_max_int(ctx, i, insn); 10472 break; 10473 case IR_OVERFLOW: 10474 ir_emit_overflow(ctx, i, insn); 10475 break; 10476 case IR_OVERFLOW_AND_BRANCH: 10477 ir_emit_overflow_and_branch(ctx, b, i, insn, _ir_next_block(ctx, _b)); 10478 break; 10479 case IR_END: 10480 case IR_LOOP_END: 10481 if (bb->flags & IR_BB_OSR_ENTRY_LOADS) { 10482 ir_emit_osr_entry_loads(ctx, b, bb); 10483 } 10484 if (bb->flags & IR_BB_DESSA_MOVES) { 10485 ir_emit_dessa_moves(ctx, b, bb); 10486 } 10487 do { 10488 ir_ref succ = ctx->cfg_edges[bb->successors]; 10489 10490 if (UNEXPECTED(bb->successors_count == 2)) { 10491 if (ctx->cfg_blocks[succ].flags & IR_BB_ENTRY) { 10492 succ = ctx->cfg_edges[bb->successors + 1]; 10493 } else { 10494 IR_ASSERT(ctx->cfg_blocks[ctx->cfg_edges[bb->successors + 1]].flags & IR_BB_ENTRY); 10495 } 10496 } else { 10497 IR_ASSERT(bb->successors_count == 1); 10498 } 10499 target = ir_skip_empty_target_blocks(ctx, succ); 10500 if (target != _ir_next_block(ctx, _b)) { 10501 | jmp =>target 10502 } 10503 } while (0); 10504 break; 10505 case IR_RETURN_VOID: 10506 ir_emit_return_void(ctx); 10507 break; 10508 case IR_RETURN_INT: 10509 ir_emit_return_int(ctx, i, insn); 10510 break; 10511 case IR_RETURN_FP: 10512 ir_emit_return_fp(ctx, i, insn); 10513 break; 10514 case IR_CALL: 10515 ir_emit_call(ctx, i, insn); 10516 break; 10517 case IR_TAILCALL: 10518 ir_emit_tailcall(ctx, i, insn); 10519 break; 10520 case IR_IJMP: 10521 ir_emit_ijmp(ctx, i, insn); 10522 break; 10523 case IR_MEM_OP_INT: 10524 case IR_MEM_INC: 10525 case IR_MEM_DEC: 10526 ir_emit_mem_op_int(ctx, i, insn, *rule); 10527 break; 10528 case IR_MEM_BINOP_INT: 10529 ir_emit_mem_binop_int(ctx, i, insn); 10530 break; 10531 case IR_MEM_MUL_PWR2: 10532 case IR_MEM_DIV_PWR2: 10533 case IR_MEM_MOD_PWR2: 10534 ir_emit_mem_mul_div_mod_pwr2(ctx, i, insn); 10535 break; 10536 case IR_MEM_SHIFT: 10537 ir_emit_mem_shift(ctx, i, insn); 10538 break; 10539 case IR_MEM_SHIFT_CONST: 10540 ir_emit_mem_shift_const(ctx, i, insn); 10541 break; 10542 case IR_REG_BINOP_INT: 10543 ir_emit_reg_binop_int(ctx, i, insn); 10544 break; 10545 case IR_VADDR: 10546 ir_emit_vaddr(ctx, i, insn); 10547 break; 10548 case IR_VLOAD: 10549 ir_emit_vload(ctx, i, insn); 10550 break; 10551 case IR_VSTORE_INT: 10552 ir_emit_vstore_int(ctx, i, insn); 10553 break; 10554 case IR_VSTORE_FP: 10555 ir_emit_vstore_fp(ctx, i, insn); 10556 break; 10557 case IR_RLOAD: 10558 ir_emit_rload(ctx, i, insn); 10559 break; 10560 case IR_RSTORE: 10561 ir_emit_rstore(ctx, i, insn); 10562 break; 10563 case IR_LOAD_INT: 10564 ir_emit_load_int(ctx, i, insn); 10565 break; 10566 case IR_LOAD_FP: 10567 ir_emit_load_fp(ctx, i, insn); 10568 break; 10569 case IR_STORE_INT: 10570 ir_emit_store_int(ctx, i, insn); 10571 break; 10572 case IR_STORE_FP: 10573 ir_emit_store_fp(ctx, i, insn); 10574 break; 10575 case IR_ALLOCA: 10576 ir_emit_alloca(ctx, i, insn); 10577 break; 10578 case IR_VA_START: 10579 ir_emit_va_start(ctx, i, insn); 10580 break; 10581 case IR_VA_COPY: 10582 ir_emit_va_copy(ctx, i, insn); 10583 break; 10584 case IR_VA_ARG: 10585 ir_emit_va_arg(ctx, i, insn); 10586 break; 10587 case IR_AFREE: 10588 ir_emit_afree(ctx, i, insn); 10589 break; 10590 case IR_BLOCK_BEGIN: 10591 ir_emit_block_begin(ctx, i, insn); 10592 break; 10593 case IR_BLOCK_END: 10594 ir_emit_block_end(ctx, i, insn); 10595 break; 10596 case IR_FRAME_ADDR: 10597 ir_emit_frame_addr(ctx, i); 10598 break; 10599 case IR_EXITCALL: 10600 ir_emit_exitcall(ctx, i, insn); 10601 break; 10602 case IR_GUARD: 10603 case IR_GUARD_NOT: 10604 if (ir_emit_guard(ctx, b, i, insn, _ir_next_block(ctx, _b))) { 10605 goto next_block; 10606 } 10607 break; 10608 case IR_GUARD_OVERFLOW: 10609 if (ir_emit_guard_overflow(ctx, b, i, insn)) { 10610 goto next_block; 10611 } 10612 break; 10613 case IR_SSE_SQRT: 10614 ir_emit_sse_sqrt(ctx, i, insn); 10615 break; 10616 case IR_SSE_RINT: 10617 ir_emit_sse_round(ctx, i, insn, 4); 10618 break; 10619 case IR_SSE_FLOOR: 10620 ir_emit_sse_round(ctx, i, insn, 9); 10621 break; 10622 case IR_SSE_CEIL: 10623 ir_emit_sse_round(ctx, i, insn, 10); 10624 break; 10625 case IR_SSE_TRUNC: 10626 ir_emit_sse_round(ctx, i, insn, 11); 10627 break; 10628 case IR_SSE_NEARBYINT: 10629 ir_emit_sse_round(ctx, i, insn, 12); 10630 break; 10631 case IR_TLS: 10632 ir_emit_tls(ctx, i, insn); 10633 break; 10634 case IR_TRAP: 10635 | int3 10636 break; 10637 default: 10638 IR_ASSERT(0 && "NIY rule/instruction"); 10639 ir_mem_free(data.emit_constants); 10640 dasm_free(&data.dasm_state); 10641 ctx->data = NULL; 10642 ctx->status = IR_ERROR_UNSUPPORTED_CODE_RULE; 10643 return NULL; 10644 } 10645 n = ir_insn_len(insn); 10646 i += n; 10647 insn += n; 10648 rule += n; 10649 } 10650next_block:; 10651 } 10652 10653 if (data.rodata_label) { 10654 |.rodata 10655 } 10656 IR_BITSET_FOREACH(data.emit_constants, ir_bitset_len(ctx->consts_count), i) { 10657 insn = &ctx->ir_base[-i]; 10658 if (IR_IS_TYPE_FP(insn->type)) { 10659 int label = ctx->cfg_blocks_count + i; 10660 10661 if (!data.rodata_label) { 10662 data.rodata_label = ctx->cfg_blocks_count + ctx->consts_count + 2; 10663 10664 |.rodata 10665 |=>data.rodata_label: 10666 } 10667 if (insn->type == IR_DOUBLE) { 10668 |.align 8 10669 |=>label: 10670 |.dword insn->val.u32, insn->val.u32_hi 10671 } else { 10672 IR_ASSERT(insn->type == IR_FLOAT); 10673 |.align 4 10674 |=>label: 10675 |.dword insn->val.u32 10676 } 10677 } else if (insn->op == IR_STR) { 10678 int label = ctx->cfg_blocks_count + i; 10679 const char *str = ir_get_str(ctx, insn->val.str); 10680 int i = 0; 10681 10682 if (!data.rodata_label) { 10683 data.rodata_label = ctx->cfg_blocks_count + ctx->consts_count + 2; 10684 10685 |.rodata 10686 |=>data.rodata_label: 10687 } 10688 |.align 8 10689 |=>label: 10690 while (str[i]) { 10691 char c = str[i]; 10692 10693 |.byte c 10694 i++; 10695 } 10696 |.byte 0 10697 10698 } else { 10699 IR_ASSERT(0); 10700 } 10701 } IR_BITSET_FOREACH_END(); 10702 if (data.rodata_label) { 10703 |.code 10704 } 10705 ir_mem_free(data.emit_constants); 10706 10707 if (ctx->status) { 10708 dasm_free(&data.dasm_state); 10709 ctx->data = NULL; 10710 return NULL; 10711 } 10712 10713 ret = dasm_link(&data.dasm_state, size_ptr); 10714 if (ret != DASM_S_OK) { 10715 IR_ASSERT(0); 10716 dasm_free(&data.dasm_state); 10717 ctx->data = NULL; 10718 ctx->status = IR_ERROR_LINK; 10719 return NULL; 10720 } 10721 size = *size_ptr; 10722 10723 if (ctx->code_buffer) { 10724 entry = ctx->code_buffer->pos; 10725 entry = (void*)IR_ALIGNED_SIZE(((size_t)(entry)), 16); 10726 if (size > (size_t)((char*)ctx->code_buffer->end - (char*)entry)) { 10727 ctx->data = NULL; 10728 ctx->status = IR_ERROR_CODE_MEM_OVERFLOW; 10729 return NULL; 10730 } 10731 ctx->code_buffer->pos = (char*)entry + size; 10732 } else { 10733 entry = ir_mem_mmap(size); 10734 if (!entry) { 10735 dasm_free(&data.dasm_state); 10736 ctx->data = NULL; 10737 ctx->status = IR_ERROR_CODE_MEM_OVERFLOW; 10738 return NULL; 10739 } 10740 ir_mem_unprotect(entry, size); 10741 } 10742 10743 ret = dasm_encode(&data.dasm_state, entry); 10744 if (ret != DASM_S_OK) { 10745 IR_ASSERT(0); 10746 dasm_free(&data.dasm_state); 10747 if (ctx->code_buffer) { 10748 if (ctx->code_buffer->pos == (char*)entry + size) { 10749 /* rollback */ 10750 ctx->code_buffer->pos = (char*)entry - size; 10751 } 10752 } else { 10753 ir_mem_unmap(entry, size); 10754 } 10755 ctx->data = NULL; 10756 ctx->status = IR_ERROR_ENCODE; 10757 return NULL; 10758 } 10759 10760 if (data.jmp_table_label) { 10761 uint32_t offset = dasm_getpclabel(&data.dasm_state, data.jmp_table_label); 10762 ctx->jmp_table_offset = offset; 10763 } else { 10764 ctx->jmp_table_offset = 0; 10765 } 10766 if (data.rodata_label) { 10767 uint32_t offset = dasm_getpclabel(&data.dasm_state, data.rodata_label); 10768 ctx->rodata_offset = offset; 10769 } else { 10770 ctx->rodata_offset = 0; 10771 } 10772 10773 if (ctx->entries_count) { 10774 /* For all entries */ 10775 i = ctx->entries_count; 10776 do { 10777 ir_insn *insn = &ctx->ir_base[ctx->entries[--i]]; 10778 uint32_t offset = dasm_getpclabel(&data.dasm_state, ctx->cfg_blocks_count + ctx->consts_count + 4 + insn->op3); 10779 insn->op3 = offset; 10780 } while (i != 0); 10781 } 10782 10783 dasm_free(&data.dasm_state); 10784 10785 ir_mem_flush(entry, size); 10786 10787#if defined(__GNUC__) 10788 if ((ctx->flags & IR_GEN_CACHE_DEMOTE) && (ctx->mflags & IR_X86_CLDEMOTE)) { 10789 uintptr_t start = (uintptr_t)entry; 10790 uintptr_t p = (uintptr_t)start & ~0x3F; 10791 10792 do { 10793 /* _cldemote(p); */ 10794 asm volatile(".byte 0x0f, 0x1c, 0x06" :: "S" (p)); 10795 p += 64; 10796 } while (p < start + size); 10797 } 10798#endif 10799 10800 if (!ctx->code_buffer) { 10801 ir_mem_protect(entry, size); 10802 } 10803 10804 ctx->data = NULL; 10805 return entry; 10806} 10807 10808const void *ir_emit_exitgroup(uint32_t first_exit_point, uint32_t exit_points_per_group, const void *exit_addr, ir_code_buffer *code_buffer, size_t *size_ptr) 10809{ 10810 void *entry; 10811 size_t size; 10812 uint32_t i; 10813 dasm_State **Dst, *dasm_state; 10814 int ret; 10815 10816 IR_ASSERT(code_buffer); 10817 IR_ASSERT(sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(code_buffer, exit_addr)); 10818 10819 Dst = &dasm_state; 10820 dasm_state = NULL; 10821 dasm_init(&dasm_state, DASM_MAXSECTION); 10822 dasm_setupglobal(&dasm_state, dasm_labels, ir_lb_MAX); 10823 dasm_setup(&dasm_state, dasm_actions); 10824 10825 for (i = 0; i < exit_points_per_group - 1; i++) { 10826 | push byte i 10827 | .byte 0xeb, (4*(exit_points_per_group-i)-6) // jmp >1 10828 } 10829 | push byte i 10830 |// 1: 10831 | add aword [r4], first_exit_point 10832 | jmp aword &exit_addr 10833 10834 ret = dasm_link(&dasm_state, &size); 10835 if (ret != DASM_S_OK) { 10836 IR_ASSERT(0); 10837 dasm_free(&dasm_state); 10838 return NULL; 10839 } 10840 10841 entry = code_buffer->pos; 10842 entry = (void*)IR_ALIGNED_SIZE(((size_t)(entry)), 16); 10843 if (size > (size_t)((char*)code_buffer->end - (char*)entry)) { 10844 return NULL; 10845 } 10846 code_buffer->pos = (char*)entry + size; 10847 10848 ret = dasm_encode(&dasm_state, entry); 10849 if (ret != DASM_S_OK) { 10850 IR_ASSERT(0); 10851 dasm_free(&dasm_state); 10852 if (code_buffer->pos == (char*)entry + size) { 10853 /* rollback */ 10854 code_buffer->pos = (char*)entry - size; 10855 } 10856 return NULL; 10857 } 10858 10859 dasm_free(&dasm_state); 10860 10861 ir_mem_flush(entry, size); 10862 10863 *size_ptr = size; 10864 return entry; 10865} 10866 10867bool ir_needs_thunk(ir_code_buffer *code_buffer, void *addr) 10868{ 10869 return sizeof(void*) == 8 && !IR_MAY_USE_32BIT_ADDR(code_buffer, addr); 10870} 10871 10872void *ir_emit_thunk(ir_code_buffer *code_buffer, void *addr, size_t *size_ptr) 10873{ 10874 void *entry; 10875 size_t size; 10876 dasm_State **Dst, *dasm_state; 10877 int ret; 10878 10879 Dst = &dasm_state; 10880 dasm_state = NULL; 10881 dasm_init(&dasm_state, DASM_MAXSECTION); 10882 dasm_setupglobal(&dasm_state, dasm_labels, ir_lb_MAX); 10883 dasm_setup(&dasm_state, dasm_actions); 10884 10885 |.code 10886 |.if X64 10887 | jmp aword [>1] 10888 |1: 10889 | .aword &addr 10890 |.else 10891 | jmp &addr 10892 |.endif 10893 10894 ret = dasm_link(&dasm_state, &size); 10895 if (ret != DASM_S_OK) { 10896 IR_ASSERT(0); 10897 dasm_free(&dasm_state); 10898 return NULL; 10899 } 10900 10901 if (size > (size_t)((char*)code_buffer->end - (char*)code_buffer->pos)) { 10902 dasm_free(&dasm_state); 10903 return NULL; 10904 } 10905 10906 entry = code_buffer->pos; 10907 ret = dasm_encode(&dasm_state, entry); 10908 if (ret != DASM_S_OK) { 10909 dasm_free(&dasm_state); 10910 return NULL; 10911 } 10912 10913 *size_ptr = size; 10914 code_buffer->pos = (char*)code_buffer->pos + size; 10915 10916 dasm_free(&dasm_state); 10917 ir_mem_flush(entry, size); 10918 10919 return entry; 10920} 10921 10922void ir_fix_thunk(void *thunk_entry, void *addr) 10923{ 10924 unsigned char *code = thunk_entry; 10925 10926 if (sizeof(void*) == 8 && !IR_IS_SIGNED_32BIT(((unsigned char*)addr - (code + 5)))) { 10927 int32_t *offset_ptr; 10928 void **addr_ptr; 10929 10930 IR_ASSERT(code[0] == 0xff && code[1] == 0x25); 10931 offset_ptr = (int32_t*)(code + 2); 10932 addr_ptr = (void**)(code + 6 + *offset_ptr); 10933 *addr_ptr = addr; 10934 } else { 10935 int32_t *addr_ptr; 10936 10937 code[0] = 0xe9; 10938 addr_ptr = (int32_t*)(code + 1); 10939 *addr_ptr = (int32_t)(intptr_t)(void*)((unsigned char*)addr - (code + 5)); 10940 } 10941} 10942