1/* 2 * IR - Lightweight JIT Compilation Framework 3 * (x86/x86_64 native code generator based on DynAsm) 4 * Copyright (C) 2022 Zend by Perforce. 5 * Authors: Dmitry Stogov <dmitry@php.net> 6 */ 7 8|.if X64 9|.arch x64 10|.else 11|.arch x86 12|.endif 13 14|.actionlist dasm_actions 15|.globals ir_lb 16|.section code, cold_code, rodata, jmp_table 17 18#ifdef IR_DEBUG 19typedef struct _ir_mem {uint64_t v;} ir_mem; 20 21# define IR_MEM_VAL(loc) ((loc).v) 22#else 23typedef uint64_t ir_mem; 24 25# define IR_MEM_VAL(loc) (loc) 26#endif 27 28#define IR_MEM_OFFSET(loc) ((int32_t)(IR_MEM_VAL(loc) & 0xffffffff)) 29#define IR_MEM_BASE(loc) ((ir_reg)((IR_MEM_VAL(loc) >> 32) & 0xff)) 30#define IR_MEM_INDEX(loc) ((ir_reg)((IR_MEM_VAL(loc) >> 40) & 0xff)) 31#define IR_MEM_SCALE(loc) ((int32_t)((IR_MEM_VAL(loc) >> 48) & 0xff)) 32 33#define IR_MEM_O(addr) IR_MEM(IR_REG_NONE, addr, IR_REG_NONE, 1) 34#define IR_MEM_B(base) IR_MEM(base, 0, IR_REG_NONE, 1) 35#define IR_MEM_BO(base, offset) IR_MEM(base, offset, IR_REG_NONE, 1) 36 37IR_ALWAYS_INLINE ir_mem IR_MEM(ir_reg base, int32_t offset, ir_reg index, int32_t scale) 38{ 39 ir_mem mem; 40 IR_ASSERT(base == IR_REG_NONE || (base >= IR_REG_GP_FIRST && base <= IR_REG_GP_LAST)); 41 IR_ASSERT(index == IR_REG_NONE || (index >= IR_REG_GP_FIRST && index <= IR_REG_GP_LAST)); 42 IR_ASSERT(scale == 1 || scale == 2 || scale == 4 || scale == 8); 43#ifdef IR_DEBUG 44 mem.v = 45#else 46 mem = 47#endif 48 ((uint64_t)(uint32_t)offset | 49 ((uint64_t)(uint8_t)base << 32) | 50 ((uint64_t)(uint8_t)index << 40) | 51 ((uint64_t)(uint8_t)scale << 48)); 52 return mem; 53} 54 55#define IR_IS_SIGNED_32BIT(val) ((((intptr_t)(val)) <= 0x7fffffff) && (((intptr_t)(val)) >= (-2147483647 - 1))) 56#define IR_IS_SIGNED_NEG_32BIT(val) ((((intptr_t)(val)) <= 0x7fffffff) && (((intptr_t)(val)) >= -2147483647)) 57#define IR_IS_UNSIGNED_32BIT(val) (((uintptr_t)(val)) <= 0xffffffff) 58#define IR_IS_32BIT(type, val) (IR_IS_TYPE_SIGNED(type) ? IR_IS_SIGNED_32BIT((val).i64) : IR_IS_UNSIGNED_32BIT((val).u64)) 59#define IR_IS_FP_ZERO(insn) ((insn.type == IR_DOUBLE) ? (insn.val.u64 == 0) : (insn.val.u32 == 0)) 60#define IR_MAY_USE_32BIT_ADDR(code_buffer, addr) \ 61 ((code_buffer) && \ 62 IR_IS_SIGNED_32BIT((char*)(addr) - (char*)(code_buffer)->start) && \ 63 IR_IS_SIGNED_32BIT((char*)(addr) - ((char*)(code_buffer)->end))) 64 65#define IR_SPILL_POS_TO_OFFSET(offset) \ 66 ((ctx->flags & IR_USE_FRAME_POINTER) ? \ 67 ((offset) - (ctx->stack_frame_size - ctx->stack_frame_alignment)) : \ 68 ((offset) + ctx->call_stack_size)) 69 70|.macro ASM_EXPAND_OP_MEM, MACRO, op, type, op1 71|| do { 72|| int32_t offset = IR_MEM_OFFSET(op1); 73|| int32_t base = IR_MEM_BASE(op1); 74|| int32_t index = IR_MEM_INDEX(op1); 75|| int32_t scale = IR_MEM_SCALE(op1); 76|| if (index == IR_REG_NONE) { 77|| if (base == IR_REG_NONE) { 78| MACRO op, type, [offset] 79|| } else { 80| MACRO op, type, [Ra(base)+offset] 81|| } 82|| } else if (scale == 8) { 83|| if (base == IR_REG_NONE) { 84| MACRO op, type, [Ra(index)*8+offset] 85|| } else { 86| MACRO op, type, [Ra(base)+Ra(index)*8+offset] 87|| } 88|| } else if (scale == 4) { 89|| if (base == IR_REG_NONE) { 90| MACRO op, type, [Ra(index)*4+offset] 91|| } else { 92| MACRO op, type, [Ra(base)+Ra(index)*4+offset] 93|| } 94|| } else if (scale == 2) { 95|| if (base == IR_REG_NONE) { 96| MACRO op, type, [Ra(index)*2+offset] 97|| } else { 98| MACRO op, type, [Ra(base)+Ra(index)*2+offset] 99|| } 100|| } else { 101|| IR_ASSERT(scale == 1); 102|| if (base == IR_REG_NONE) { 103| MACRO op, type, [Ra(index)+offset] 104|| } else { 105| MACRO op, type, [Ra(base)+Ra(index)+offset] 106|| } 107|| } 108|| } while (0); 109|.endmacro 110 111|.macro ASM_EXPAND_OP1_MEM, MACRO, op, type, op1, op2 112|| do { 113|| int32_t offset = IR_MEM_OFFSET(op1); 114|| int32_t base = IR_MEM_BASE(op1); 115|| int32_t index = IR_MEM_INDEX(op1); 116|| int32_t scale = IR_MEM_SCALE(op1); 117|| if (index == IR_REG_NONE) { 118|| if (base == IR_REG_NONE) { 119| MACRO op, type, [offset], op2 120|| } else { 121| MACRO op, type, [Ra(base)+offset], op2 122|| } 123|| } else if (scale == 8) { 124|| if (base == IR_REG_NONE) { 125| MACRO op, type, [Ra(index)*8+offset], op2 126|| } else { 127| MACRO op, type, [Ra(base)+Ra(index)*8+offset], op2 128|| } 129|| } else if (scale == 4) { 130|| if (base == IR_REG_NONE) { 131| MACRO op, type, [Ra(index)*4+offset], op2 132|| } else { 133| MACRO op, type, [Ra(base)+Ra(index)*4+offset], op2 134|| } 135|| } else if (scale == 2) { 136|| if (base == IR_REG_NONE) { 137| MACRO op, type, [Ra(index)*2+offset], op2 138|| } else { 139| MACRO op, type, [Ra(base)+Ra(index)*2+offset], op2 140|| } 141|| } else { 142|| IR_ASSERT(scale == 1); 143|| if (base == IR_REG_NONE) { 144| MACRO op, type, [Ra(index)+offset], op2 145|| } else { 146| MACRO op, type, [Ra(base)+Ra(index)+offset], op2 147|| } 148|| } 149|| } while (0); 150|.endmacro 151 152|.macro ASM_EXPAND_OP2_MEM, MACRO, op, type, op1, op2 153|| do { 154|| int32_t offset = IR_MEM_OFFSET(op2); 155|| int32_t base = IR_MEM_BASE(op2); 156|| int32_t index = IR_MEM_INDEX(op2); 157|| int32_t scale = IR_MEM_SCALE(op2); 158|| if (index == IR_REG_NONE) { 159|| if (base == IR_REG_NONE) { 160| MACRO op, type, op1, [offset] 161|| } else { 162| MACRO op, type, op1, [Ra(base)+offset] 163|| } 164|| } else if (scale == 8) { 165|| if (base == IR_REG_NONE) { 166| MACRO op, type, op1, [Ra(index)*8+offset] 167|| } else { 168| MACRO op, type, op1, [Ra(base)+Ra(index)*8+offset] 169|| } 170|| } else if (scale == 4) { 171|| if (base == IR_REG_NONE) { 172| MACRO op, type, op1, [Ra(index)*4+offset] 173|| } else { 174| MACRO op, type, op1, [Ra(base)+Ra(index)*4+offset] 175|| } 176|| } else if (scale == 2) { 177|| if (base == IR_REG_NONE) { 178| MACRO op, type, op1, [Ra(index)*2+offset] 179|| } else { 180| MACRO op, type, op1, [Ra(base)+Ra(index)*2+offset] 181|| } 182|| } else { 183|| IR_ASSERT(scale == 1); 184|| if (base == IR_REG_NONE) { 185| MACRO op, type, op1, [Ra(index)+offset] 186|| } else { 187| MACRO op, type, op1, [Ra(base)+Ra(index)+offset] 188|| } 189|| } 190|| } while (0); 191|.endmacro 192 193|.macro ASM_EXPAND_OP2_MEM_3, MACRO, op, type, op1, op2, op3 194|| do { 195|| int32_t offset = IR_MEM_OFFSET(op2); 196|| int32_t base = IR_MEM_BASE(op2); 197|| int32_t index = IR_MEM_INDEX(op2); 198|| int32_t scale = IR_MEM_SCALE(op2); 199|| if (index == IR_REG_NONE) { 200|| if (base == IR_REG_NONE) { 201| MACRO op, type, op1, [offset], op3 202|| } else { 203| MACRO op, type, op1, [Ra(base)+offset], op3 204|| } 205|| } else if (scale == 8) { 206|| if (base == IR_REG_NONE) { 207| MACRO op, type, op1, [Ra(index)*8+offset], op3 208|| } else { 209| MACRO op, type, op1, [Ra(base)+Ra(index)*8+offset], op3 210|| } 211|| } else if (scale == 4) { 212|| if (base == IR_REG_NONE) { 213| MACRO op, type, op1, [Ra(index)*4+offset], op3 214|| } else { 215| MACRO op, type, op1, [Ra(base)+Ra(index)*4+offset], op3 216|| } 217|| } else if (scale == 2) { 218|| if (base == IR_REG_NONE) { 219| MACRO op, type, op1, [Ra(index)*2+offset], op3 220|| } else { 221| MACRO op, type, op1, [Ra(base)+Ra(index)*2+offset], op3 222|| } 223|| } else { 224|| IR_ASSERT(scale == 1); 225|| if (base == IR_REG_NONE) { 226| MACRO op, type, op1, [Ra(index)+offset], op3 227|| } else { 228| MACRO op, type, op1, [Ra(base)+Ra(index)+offset], op3 229|| } 230|| } 231|| } while (0); 232|.endmacro 233 234|.macro ASM_EXPAND_OP3_MEM, MACRO, op, type, op1, op2, op3 235|| do { 236|| int32_t offset = IR_MEM_OFFSET(op3); 237|| int32_t base = IR_MEM_BASE(op3); 238|| int32_t index = IR_MEM_INDEX(op3); 239|| int32_t scale = IR_MEM_SCALE(op3); 240|| if (index == IR_REG_NONE) { 241|| if (base == IR_REG_NONE) { 242| MACRO op, type, op1, op2, [offset] 243|| } else { 244| MACRO op, type, op1, op2, [Ra(base)+offset] 245|| } 246|| } else if (scale == 8) { 247|| if (base == IR_REG_NONE) { 248| MACRO op, type, op1, op2, [Ra(index)*8+offset] 249|| } else { 250| MACRO op, type, op1, op2, [Ra(base)+Ra(index)*8+offset] 251|| } 252|| } else if (scale == 4) { 253|| if (base == IR_REG_NONE) { 254| MACRO op, type, op1, op2, [Ra(index)*4+offset] 255|| } else { 256| MACRO op, type, op1, op2, [Ra(base)+Ra(index)*4+offset] 257|| } 258|| } else if (scale == 2) { 259|| if (base == IR_REG_NONE) { 260| MACRO op, type, op1, op2, [Ra(index)*2+offset] 261|| } else { 262| MACRO op, type, op1, op2, [Ra(base)+Ra(index)*2+offset] 263|| } 264|| } else { 265|| IR_ASSERT(scale == 1); 266|| if (base == IR_REG_NONE) { 267| MACRO op, type, op1, op2, [Ra(index)+offset] 268|| } else { 269| MACRO op, type, op1, op2, [Ra(base)+Ra(index)+offset] 270|| } 271|| } 272|| } while (0); 273|.endmacro 274 275|.macro ASM_EXPAND_TYPE_MEM, op, type, op1 276|| switch (ir_type_size[type]) { 277|| default: 278|| IR_ASSERT(0); 279|| case 1: 280| op byte op1 281|| break; 282|| case 2: 283| op word op1 284|| break; 285|| case 4: 286| op dword op1 287|| break; 288|.if X64 289|| case 8: 290| op qword op1 291|| break; 292|.endif 293|| } 294|.endmacro 295 296|.macro ASM_EXPAND_TYPE_MEM_REG, op, type, op1, op2 297|| switch (ir_type_size[type]) { 298|| default: 299|| IR_ASSERT(0); 300|| case 1: 301| op byte op1, Rb(op2) 302|| break; 303|| case 2: 304| op word op1, Rw(op2) 305|| break; 306|| case 4: 307| op dword op1, Rd(op2) 308|| break; 309|.if X64 310|| case 8: 311| op qword op1, Rq(op2) 312|| break; 313|.endif 314|| } 315|.endmacro 316 317|.macro ASM_EXPAND_TYPE_MEM_TXT, op, type, op1, op2 318|| switch (ir_type_size[type]) { 319|| default: 320|| IR_ASSERT(0); 321|| case 1: 322| op byte op1, op2 323|| break; 324|| case 2: 325| op word op1, op2 326|| break; 327|| case 4: 328| op dword op1, op2 329|| break; 330|.if X64 331|| case 8: 332| op qword op1, op2 333|| break; 334|.endif 335|| } 336|.endmacro 337 338|.macro ASM_EXPAND_TYPE_MEM_IMM, op, type, op1, op2 339|| switch (ir_type_size[type]) { 340|| default: 341|| IR_ASSERT(0); 342|| case 1: 343| op byte op1, (op2 & 0xff) 344|| break; 345|| case 2: 346| op word op1, (op2 & 0xffff) 347|| break; 348|| case 4: 349| op dword op1, op2 350|| break; 351|.if X64 352|| case 8: 353| op qword op1, op2 354|| break; 355|.endif 356|| } 357|.endmacro 358 359|.macro ASM_EXPAND_TYPE_REG_MEM, op, type, op1, op2 360|| switch (ir_type_size[type]) { 361|| default: 362|| IR_ASSERT(0); 363|| case 1: 364| op Rb(op1), byte op2 365|| break; 366|| case 2: 367| op Rw(op1), word op2 368|| break; 369|| case 4: 370| op Rd(op1), dword op2 371|| break; 372|.if X64 373|| case 8: 374| op Rq(op1), qword op2 375|| break; 376|.endif 377|| } 378|.endmacro 379 380|.macro ASM_TMEM_OP, op, type, op1 381|| do { 382|| int32_t offset = IR_MEM_OFFSET(op1); 383|| int32_t base = IR_MEM_BASE(op1); 384|| int32_t index = IR_MEM_INDEX(op1); 385|| int32_t scale = IR_MEM_SCALE(op1); 386|| if (index == IR_REG_NONE) { 387|| if (base == IR_REG_NONE) { 388| op type [offset] 389|| } else { 390| op type [Ra(base)+offset] 391|| } 392|| } else if (scale == 8) { 393|| if (base == IR_REG_NONE) { 394| op type [Ra(index)*8+offset] 395|| } else { 396| op type [Ra(base)+Ra(index)*8+offset] 397|| } 398|| } else if (scale == 4) { 399|| if (base == IR_REG_NONE) { 400| op type [Ra(index)*4+offset] 401|| } else { 402| op type [Ra(base)+Ra(index)*4+offset] 403|| } 404|| } else if (scale == 2) { 405|| if (base == IR_REG_NONE) { 406| op type [Ra(index)*2+offset] 407|| } else { 408| op type [Ra(base)+Ra(index)*2+offset] 409|| } 410|| } else { 411|| IR_ASSERT(scale == 1); 412|| if (base == IR_REG_NONE) { 413| op type [Ra(index)+offset] 414|| } else { 415| op type [Ra(base)+Ra(index)+offset] 416|| } 417|| } 418|| } while (0); 419|.endmacro 420 421|.macro ASM_TXT_TMEM_OP, op, op1, type, op2 422|| do { 423|| int32_t offset = IR_MEM_OFFSET(op2); 424|| int32_t base = IR_MEM_BASE(op2); 425|| int32_t index = IR_MEM_INDEX(op2); 426|| int32_t scale = IR_MEM_SCALE(op2); 427|| if (index == IR_REG_NONE) { 428|| if (base == IR_REG_NONE) { 429| op op1, type [offset] 430|| } else { 431| op op1, type [Ra(base)+offset] 432|| } 433|| } else if (scale == 8) { 434|| if (base == IR_REG_NONE) { 435| op op1, type [Ra(index)*8+offset] 436|| } else { 437| op op1, type [Ra(base)+Ra(index)*8+offset] 438|| } 439|| } else if (scale == 4) { 440|| if (base == IR_REG_NONE) { 441| op op1, type [Ra(index)*4+offset] 442|| } else { 443| op op1, type [Ra(base)+Ra(index)*4+offset] 444|| } 445|| } else if (scale == 2) { 446|| if (base == IR_REG_NONE) { 447| op op1, type [Ra(index)*2+offset] 448|| } else { 449| op op1, type [Ra(base)+Ra(index)*2+offset] 450|| } 451|| } else { 452|| IR_ASSERT(scale == 1); 453|| if (base == IR_REG_NONE) { 454| op op1, type [Ra(index)+offset] 455|| } else { 456| op op1, type [Ra(base)+Ra(index)+offset] 457|| } 458|| } 459|| } while (0); 460|.endmacro 461 462|.macro ASM_TMEM_TXT_OP, op, type, op1, op2 463|| do { 464|| int32_t offset = IR_MEM_OFFSET(op1); 465|| int32_t base = IR_MEM_BASE(op1); 466|| int32_t index = IR_MEM_INDEX(op1); 467|| int32_t scale = IR_MEM_SCALE(op1); 468|| if (index == IR_REG_NONE) { 469|| if (base == IR_REG_NONE) { 470| op type [offset], op2 471|| } else { 472| op type [Ra(base)+offset], op2 473|| } 474|| } else if (scale == 8) { 475|| if (base == IR_REG_NONE) { 476| op type [Ra(index)*8+offset], op2 477|| } else { 478| op type [Ra(base)+Ra(index)*8+offset], op2 479|| } 480|| } else if (scale == 4) { 481|| if (base == IR_REG_NONE) { 482| op type [Ra(index)*4+offset], op2 483|| } else { 484| op type [Ra(base)+Ra(index)*4+offset], op2 485|| } 486|| } else if (scale == 2) { 487|| if (base == IR_REG_NONE) { 488| op type [Ra(index)*2+offset], op2 489|| } else { 490| op type [Ra(base)+Ra(index)*2+offset], op2 491|| } 492|| } else { 493|| IR_ASSERT(scale == 1); 494|| if (base == IR_REG_NONE) { 495| op type [Ra(index)+offset], op2 496|| } else { 497| op type [Ra(base)+Ra(index)+offset], op2 498|| } 499|| } 500|| } while (0); 501|.endmacro 502 503|.macro ASM_TXT_TXT_TMEM_OP, op, op1, op2, type, op3 504|| do { 505|| int32_t offset = IR_MEM_OFFSET(op3); 506|| int32_t base = IR_MEM_BASE(op3); 507|| int32_t index = IR_MEM_INDEX(op3); 508|| int32_t scale = IR_MEM_SCALE(op3); 509|| if (index == IR_REG_NONE) { 510|| if (base == IR_REG_NONE) { 511| op op1, op2, type [offset] 512|| } else { 513| op op1, op2, type [Ra(base)+offset] 514|| } 515|| } else if (scale == 8) { 516|| if (base == IR_REG_NONE) { 517| op op1, op2, type [Ra(index)*8+offset] 518|| } else { 519| op op1, op2, type [Ra(base)+Ra(index)*8+offset] 520|| } 521|| } else if (scale == 4) { 522|| if (base == IR_REG_NONE) { 523| op op1, op2, type [Ra(index)*4+offset] 524|| } else { 525| op op1, op2, type [Ra(base)+Ra(index)*4+offset] 526|| } 527|| } else if (scale == 2) { 528|| if (base == IR_REG_NONE) { 529| op op1, op2, type [Ra(index)*2+offset] 530|| } else { 531| op op1, op2, type [Ra(base)+Ra(index)*2+offset] 532|| } 533|| } else { 534|| IR_ASSERT(scale == 1); 535|| if (base == IR_REG_NONE) { 536| op op1, op2, type [Ra(index)+offset] 537|| } else { 538| op op1, op2, type [Ra(base)+Ra(index)+offset] 539|| } 540|| } 541|| } while (0); 542|.endmacro 543 544|.macro ASM_REG_OP, op, type, op1 545|| switch (ir_type_size[type]) { 546|| default: 547|| IR_ASSERT(0); 548|| case 1: 549| op Rb(op1) 550|| break; 551|| case 2: 552| op Rw(op1) 553|| break; 554|| case 4: 555| op Rd(op1) 556|| break; 557|.if X64 558|| case 8: 559| op Rq(op1) 560|| break; 561|.endif 562|| } 563|.endmacro 564 565|.macro ASM_MEM_OP, op, type, op1 566| ASM_EXPAND_OP_MEM ASM_EXPAND_TYPE_MEM, op, type, op1 567|.endmacro 568 569|.macro ASM_REG_REG_OP, op, type, op1, op2 570|| switch (ir_type_size[type]) { 571|| default: 572|| IR_ASSERT(0); 573|| case 1: 574| op Rb(op1), Rb(op2) 575|| break; 576|| case 2: 577| op Rw(op1), Rw(op2) 578|| break; 579|| case 4: 580| op Rd(op1), Rd(op2) 581|| break; 582|.if X64 583|| case 8: 584| op Rq(op1), Rq(op2) 585|| break; 586|.endif 587|| } 588|.endmacro 589 590|.macro ASM_REG_REG_OP2, op, type, op1, op2 591|| switch (ir_type_size[type]) { 592|| default: 593|| IR_ASSERT(0); 594|| case 1: 595|| case 2: 596| op Rw(op1), Rw(op2) 597|| break; 598|| case 4: 599| op Rd(op1), Rd(op2) 600|| break; 601|.if X64 602|| case 8: 603| op Rq(op1), Rq(op2) 604|| break; 605|.endif 606|| } 607|.endmacro 608 609|.macro ASM_REG_TXT_OP, op, type, op1, op2 610|| switch (ir_type_size[type]) { 611|| default: 612|| IR_ASSERT(0); 613|| case 1: 614| op Rb(op1), op2 615|| break; 616|| case 2: 617| op Rw(op1), op2 618|| break; 619|| case 4: 620| op Rd(op1), op2 621|| break; 622|.if X64 623|| case 8: 624| op Rq(op1), op2 625|| break; 626|.endif 627|| } 628|.endmacro 629 630|.macro ASM_REG_IMM_OP, op, type, op1, op2 631|| switch (ir_type_size[type]) { 632|| default: 633|| IR_ASSERT(0); 634|| case 1: 635| op Rb(op1), (op2 & 0xff) 636|| break; 637|| case 2: 638| op Rw(op1), (op2 & 0xffff) 639|| break; 640|| case 4: 641| op Rd(op1), op2 642|| break; 643|.if X64 644|| case 8: 645| op Rq(op1), op2 646|| break; 647|.endif 648|| } 649|.endmacro 650 651|.macro ASM_MEM_REG_OP, op, type, op1, op2 652| ASM_EXPAND_OP1_MEM ASM_EXPAND_TYPE_MEM_REG, op, type, op1, op2 653|.endmacro 654 655|.macro ASM_MEM_TXT_OP, op, type, op1, op2 656| ASM_EXPAND_OP1_MEM ASM_EXPAND_TYPE_MEM_TXT, op, type, op1, op2 657|.endmacro 658 659|.macro ASM_MEM_IMM_OP, op, type, op1, op2 660| ASM_EXPAND_OP1_MEM ASM_EXPAND_TYPE_MEM_IMM, op, type, op1, op2 661|.endmacro 662 663|.macro ASM_REG_MEM_OP, op, type, op1, op2 664| ASM_EXPAND_OP2_MEM ASM_REG_TXT_OP, op, type, op1, op2 665|.endmacro 666 667|.macro ASM_REG_REG_MUL, op, type, op1, op2 668|| switch (ir_type_size[type]) { 669|| default: 670|| IR_ASSERT(0); 671|| case 2: 672| op Rw(op1), Rw(op2) 673|| break; 674|| case 4: 675| op Rd(op1), Rd(op2) 676|| break; 677|.if X64 678|| case 8: 679| op Rq(op1), Rq(op2) 680|| break; 681|.endif 682|| } 683|.endmacro 684 685|.macro ASM_REG_IMM_MUL, op, type, op1, op2 686|| switch (ir_type_size[type]) { 687|| default: 688|| IR_ASSERT(0); 689|| case 2: 690| op Rw(op1), op2 691|| break; 692|| case 4: 693| op Rd(op1), op2 694|| break; 695|.if X64 696|| case 8: 697| op Rq(op1), op2 698|| break; 699|.endif 700|| } 701|.endmacro 702 703|.macro ASM_REG_TXT_MUL, op, type, op1, op2 704|| switch (ir_type_size[type]) { 705|| default: 706|| IR_ASSERT(0); 707|| case 2: 708| op Rw(op1), op2 709|| break; 710|| case 4: 711| op Rd(op1), op2 712|| break; 713|.if X64 714|| case 8: 715| op Rq(op1), op2 716|| break; 717|.endif 718|| } 719|.endmacro 720 721|.macro ASM_REG_MEM_MUL, op, type, op1, op2 722| ASM_EXPAND_OP2_MEM ASM_REG_TXT_MUL, op, type, op1, op2 723|.endmacro 724 725|.macro ASM_REG_TXT_TXT_MUL, op, type, op1, op2, op3 726|| switch (ir_type_size[type]) { 727|| default: 728|| IR_ASSERT(0); 729|| case 2: 730| op Rw(op1), op2, op3 731|| break; 732|| case 4: 733| op Rd(op1), op2, op3 734|| break; 735|.if X64 736|| case 8: 737| op Rq(op1), op2, op3 738|| break; 739|.endif 740|| } 741|.endmacro 742 743|.macro ASM_REG_MEM_TXT_MUL, op, type, op1, op2, op3 744| ASM_EXPAND_OP2_MEM_3 ASM_REG_TXT_TXT_MUL, imul, type, op1, op2, op3 745|.endmacro 746 747|.macro ASM_SSE2_REG_REG_OP, op, type, op1, op2 748|| if (type == IR_DOUBLE) { 749| op..d xmm(op1-IR_REG_FP_FIRST), xmm(op2-IR_REG_FP_FIRST) 750|| } else { 751|| IR_ASSERT(type == IR_FLOAT); 752| op..s xmm(op1-IR_REG_FP_FIRST), xmm(op2-IR_REG_FP_FIRST) 753|| } 754|.endmacro 755 756|.macro ASM_SSE2_REG_TXT_OP, op, type, op1, op2 757|| if (type == IR_DOUBLE) { 758| op..d xmm(op1-IR_REG_FP_FIRST), qword op2 759|| } else { 760|| IR_ASSERT(type == IR_FLOAT); 761| op..s xmm(op1-IR_REG_FP_FIRST), dword op2 762|| } 763|.endmacro 764 765|.macro ASM_SSE2_REG_MEM_OP, op, type, op1, op2 766| ASM_EXPAND_OP2_MEM ASM_SSE2_REG_TXT_OP, op, type, op1, op2 767|.endmacro 768 769|.macro ASM_AVX_REG_REG_REG_OP, op, type, op1, op2, op3 770|| if (type == IR_DOUBLE) { 771| op..d xmm(op1-IR_REG_FP_FIRST), xmm(op2-IR_REG_FP_FIRST), xmm(op3-IR_REG_FP_FIRST) 772|| } else { 773|| IR_ASSERT(type == IR_FLOAT); 774| op..s xmm(op1-IR_REG_FP_FIRST), xmm(op2-IR_REG_FP_FIRST), xmm(op3-IR_REG_FP_FIRST) 775|| } 776|.endmacro 777 778|.macro ASM_AVX_REG_REG_TXT_OP, op, type, op1, op2, op3 779|| if (type == IR_DOUBLE) { 780| op..d xmm(op1-IR_REG_FP_FIRST), xmm(op2-IR_REG_FP_FIRST), qword op3 781|| } else { 782|| IR_ASSERT(type == IR_FLOAT); 783| op..s xmm(op1-IR_REG_FP_FIRST), xmm(op2-IR_REG_FP_FIRST), dword op3 784|| } 785|.endmacro 786 787|.macro ASM_AVX_REG_REG_MEM_OP, op, type, op1, op2, op3 788| ASM_EXPAND_OP3_MEM ASM_AVX_REG_REG_TXT_OP, op, type, op1, op2, op3 789|.endmacro 790 791|.macro ASM_FP_REG_REG_OP, op, type, op1, op2 792|| if (ctx->mflags & IR_X86_AVX) { 793| ASM_SSE2_REG_REG_OP v..op, type, op1, op2 794|| } else { 795| ASM_SSE2_REG_REG_OP op, type, op1, op2 796|| } 797|.endmacro 798 799|.macro ASM_FP_TXT_REG_OP, op, type, dst, src 800|| if (type == IR_DOUBLE) { 801|| if (ctx->mflags & IR_X86_AVX) { 802| v..op..d qword dst, xmm(src-IR_REG_FP_FIRST) 803|| } else { 804| op..d qword dst, xmm(src-IR_REG_FP_FIRST) 805|| } 806|| } else { 807|| IR_ASSERT(type == IR_FLOAT); 808|| if (ctx->mflags & IR_X86_AVX) { 809| v..op..s dword dst, xmm(src-IR_REG_FP_FIRST) 810|| } else { 811| op..s dword dst, xmm(src-IR_REG_FP_FIRST) 812|| } 813|| } 814|.endmacro 815 816|.macro ASM_FP_MEM_REG_OP, op, type, op1, op2 817| ASM_EXPAND_OP1_MEM ASM_FP_TXT_REG_OP, op, type, op1, op2 818|.endmacro 819 820|.macro ASM_FP_REG_TXT_OP, op, type, op1, op2 821|| if (ctx->mflags & IR_X86_AVX) { 822| ASM_SSE2_REG_TXT_OP v..op, type, op1, op2 823|| } else { 824| ASM_SSE2_REG_TXT_OP op, type, op1, op2 825|| } 826|.endmacro 827 828|.macro ASM_FP_REG_MEM_OP, op, type, op1, op2 829|| if (ctx->mflags & IR_X86_AVX) { 830| ASM_SSE2_REG_MEM_OP v..op, type, op1, op2 831|| } else { 832| ASM_SSE2_REG_MEM_OP op, type, op1, op2 833|| } 834|.endmacro 835 836typedef struct _ir_backend_data { 837 ir_reg_alloc_data ra_data; 838 uint32_t dessa_from_block; 839 dasm_State *dasm_state; 840 ir_bitset emit_constants; 841 int rodata_label, jmp_table_label; 842 bool double_neg_const; 843 bool float_neg_const; 844 bool double_abs_const; 845 bool float_abs_const; 846 bool double_zero_const; 847} ir_backend_data; 848 849#define IR_GP_REG_NAME(code, name64, name32, name16, name8, name8h) \ 850 #name64, 851#define IR_GP_REG_NAME32(code, name64, name32, name16, name8, name8h) \ 852 #name32, 853#define IR_GP_REG_NAME16(code, name64, name32, name16, name8, name8h) \ 854 #name16, 855#define IR_GP_REG_NAME8(code, name64, name32, name16, name8, name8h) \ 856 #name8, 857#define IR_FP_REG_NAME(code, name) \ 858 #name, 859 860static const char *_ir_reg_name[IR_REG_NUM] = { 861 IR_GP_REGS(IR_GP_REG_NAME) 862 IR_FP_REGS(IR_FP_REG_NAME) 863}; 864 865static const char *_ir_reg_name32[IR_REG_NUM] = { 866 IR_GP_REGS(IR_GP_REG_NAME32) 867}; 868 869static const char *_ir_reg_name16[IR_REG_NUM] = { 870 IR_GP_REGS(IR_GP_REG_NAME16) 871}; 872 873static const char *_ir_reg_name8[IR_REG_NUM] = { 874 IR_GP_REGS(IR_GP_REG_NAME8) 875}; 876 877/* Calling Convention */ 878#ifdef _WIN64 879 880static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS] = { 881 IR_REG_INT_ARG1, 882 IR_REG_INT_ARG2, 883 IR_REG_INT_ARG3, 884 IR_REG_INT_ARG4, 885}; 886 887static const int8_t _ir_fp_reg_params[IR_REG_FP_ARGS] = { 888 IR_REG_FP_ARG1, 889 IR_REG_FP_ARG2, 890 IR_REG_FP_ARG3, 891 IR_REG_FP_ARG4, 892}; 893 894#elif defined(IR_TARGET_X64) 895 896static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS] = { 897 IR_REG_INT_ARG1, 898 IR_REG_INT_ARG2, 899 IR_REG_INT_ARG3, 900 IR_REG_INT_ARG4, 901 IR_REG_INT_ARG5, 902 IR_REG_INT_ARG6, 903}; 904 905static const int8_t _ir_fp_reg_params[IR_REG_FP_ARGS] = { 906 IR_REG_FP_ARG1, 907 IR_REG_FP_ARG2, 908 IR_REG_FP_ARG3, 909 IR_REG_FP_ARG4, 910 IR_REG_FP_ARG5, 911 IR_REG_FP_ARG6, 912 IR_REG_FP_ARG7, 913 IR_REG_FP_ARG8, 914}; 915 916#else 917 918static const int8_t *_ir_int_reg_params = NULL; 919static const int8_t *_ir_fp_reg_params = NULL; 920static const int8_t _ir_int_fc_reg_params[IR_REG_INT_FCARGS] = { 921 IR_REG_INT_FCARG1, 922 IR_REG_INT_FCARG2, 923}; 924static const int8_t *_ir_fp_fc_reg_params = NULL; 925 926#endif 927 928const char *ir_reg_name(int8_t reg, ir_type type) 929{ 930 if (reg >= IR_REG_NUM) { 931 if (reg == IR_REG_SCRATCH) { 932 return "SCRATCH"; 933 } else { 934 IR_ASSERT(reg == IR_REG_ALL); 935 return "ALL"; 936 } 937 } 938 IR_ASSERT(reg >= 0 && reg < IR_REG_NUM); 939 if (type == IR_VOID) { 940 type = (reg < IR_REG_FP_FIRST) ? IR_ADDR : IR_DOUBLE; 941 } 942 if (IR_IS_TYPE_FP(type) || ir_type_size[type] == 8) { 943 return _ir_reg_name[reg]; 944 } else if (ir_type_size[type] == 4) { 945 return _ir_reg_name32[reg]; 946 } else if (ir_type_size[type] == 2) { 947 return _ir_reg_name16[reg]; 948 } else { 949 IR_ASSERT(ir_type_size[type] == 1); 950 return _ir_reg_name8[reg]; 951 } 952} 953 954#define IR_RULES(_) \ 955 _(CMP_INT) \ 956 _(CMP_FP) \ 957 _(MUL_INT) \ 958 _(DIV_INT) \ 959 _(MOD_INT) \ 960 _(TEST_INT) \ 961 _(SETCC_INT) \ 962 _(TESTCC_INT) \ 963 _(LEA_OB) \ 964 _(LEA_SI) \ 965 _(LEA_SIB) \ 966 _(LEA_IB) \ 967 _(LEA_SI_O) \ 968 _(LEA_SIB_O) \ 969 _(LEA_IB_O) \ 970 _(LEA_I_OB) \ 971 _(LEA_OB_I) \ 972 _(LEA_OB_SI) \ 973 _(LEA_SI_OB) \ 974 _(LEA_B_SI) \ 975 _(LEA_SI_B) \ 976 _(INC) \ 977 _(DEC) \ 978 _(MUL_PWR2) \ 979 _(DIV_PWR2) \ 980 _(MOD_PWR2) \ 981 _(SDIV_PWR2) \ 982 _(SMOD_PWR2) \ 983 _(BOOL_NOT_INT) \ 984 _(ABS_INT) \ 985 _(OP_INT) \ 986 _(OP_FP) \ 987 _(IMUL3) \ 988 _(BINOP_INT) \ 989 _(BINOP_SSE2) \ 990 _(BINOP_AVX) \ 991 _(SHIFT) \ 992 _(SHIFT_CONST) \ 993 _(COPY_INT) \ 994 _(COPY_FP) \ 995 _(CMP_AND_BRANCH_INT) \ 996 _(CMP_AND_BRANCH_FP) \ 997 _(TEST_AND_BRANCH_INT) \ 998 _(JCC_INT) \ 999 _(GUARD_CMP_INT) \ 1000 _(GUARD_CMP_FP) \ 1001 _(GUARD_TEST_INT) \ 1002 _(GUARD_JCC_INT) \ 1003 _(GUARD_OVERFLOW) \ 1004 _(OVERFLOW_AND_BRANCH) \ 1005 _(MIN_MAX_INT) \ 1006 _(MEM_OP_INT) \ 1007 _(MEM_INC) \ 1008 _(MEM_DEC) \ 1009 _(MEM_MUL_PWR2) \ 1010 _(MEM_DIV_PWR2) \ 1011 _(MEM_MOD_PWR2) \ 1012 _(MEM_BINOP_INT) \ 1013 _(MEM_SHIFT) \ 1014 _(MEM_SHIFT_CONST) \ 1015 _(REG_BINOP_INT) \ 1016 _(VSTORE_INT) \ 1017 _(VSTORE_FP) \ 1018 _(LOAD_INT) \ 1019 _(LOAD_FP) \ 1020 _(STORE_INT) \ 1021 _(STORE_FP) \ 1022 _(IF_INT) \ 1023 _(RETURN_VOID) \ 1024 _(RETURN_INT) \ 1025 _(RETURN_FP) \ 1026 _(BIT_COUNT) \ 1027 1028#define IR_RULE_ENUM(name) IR_ ## name, 1029 1030enum _ir_rule { 1031 IR_FIRST_RULE = IR_LAST_OP, 1032 IR_RULES(IR_RULE_ENUM) 1033 IR_LAST_RULE 1034}; 1035 1036#define IR_RULE_NAME(name) #name, 1037const char *ir_rule_name[IR_LAST_OP] = { 1038 NULL, 1039 IR_RULES(IR_RULE_NAME) 1040 NULL 1041}; 1042 1043static bool ir_may_fuse_addr(ir_ctx *ctx, const ir_insn *addr_insn) 1044{ 1045 if (sizeof(void*) == 4) { 1046 return 1; 1047 } else if (IR_IS_SYM_CONST(addr_insn->op)) { 1048 void *addr = ir_sym_addr(ctx, addr_insn); 1049 1050 if (!addr) { 1051 return 0; 1052 } 1053 return IR_IS_SIGNED_32BIT((int64_t)(intptr_t)addr); 1054 } else { 1055 return IR_IS_SIGNED_32BIT(addr_insn->val.i64); 1056 } 1057} 1058 1059static bool ir_may_fuse_imm(ir_ctx *ctx, const ir_insn *val_insn) 1060{ 1061 if (val_insn->type == IR_ADDR) { 1062 if (sizeof(void*) == 4) { 1063 return 1; 1064 } else if (IR_IS_SYM_CONST(val_insn->op)) { 1065 void *addr = ir_sym_addr(ctx, val_insn); 1066 1067 if (!addr) { 1068 return 0; 1069 } 1070 return IR_IS_SIGNED_32BIT((intptr_t)addr); 1071 } else { 1072 return IR_IS_SIGNED_32BIT(val_insn->val.i64); 1073 } 1074 } else { 1075 return (ir_type_size[val_insn->type] <= 4 || IR_IS_SIGNED_32BIT(val_insn->val.i64)); 1076 } 1077} 1078 1079/* register allocation */ 1080static int ir_add_const_tmp_reg(ir_ctx *ctx, ir_ref ref, uint32_t num, int n, ir_target_constraints *constraints) 1081{ 1082 IR_ASSERT(IR_IS_CONST_REF(ref)); 1083 const ir_insn *val_insn = &ctx->ir_base[ref]; 1084 1085 if (!ir_may_fuse_imm(ctx, val_insn)) { 1086 constraints->tmp_regs[n] = IR_TMP_REG(num, val_insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1087 n++; 1088 } 1089 return n; 1090} 1091 1092int ir_get_target_constraints(ir_ctx *ctx, ir_ref ref, ir_target_constraints *constraints) 1093{ 1094 uint32_t rule = ir_rule(ctx, ref); 1095 const ir_insn *insn; 1096 int n = 0; 1097 int flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; 1098 1099 constraints->def_reg = IR_REG_NONE; 1100 constraints->hints_count = 0; 1101 switch (rule & IR_RULE_MASK) { 1102 case IR_BINOP_INT: 1103 insn = &ctx->ir_base[ref]; 1104 if (rule & IR_FUSED) { 1105 if (ctx->ir_base[insn->op1].op == IR_RLOAD) { 1106 flags = IR_OP1_MUST_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; 1107 } else { 1108 flags = IR_OP2_MUST_BE_IN_REG; 1109 } 1110 } else { 1111 flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; 1112 } 1113 if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { 1114 n = ir_add_const_tmp_reg(ctx, insn->op2, 2, n, constraints); 1115 } 1116 break; 1117 case IR_IMUL3: 1118 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; 1119 break; 1120 case IR_SHIFT: 1121 if (rule & IR_FUSED) { 1122 flags = IR_OP2_MUST_BE_IN_REG; 1123 } else { 1124 flags = IR_DEF_REUSES_OP1_REG | IR_DEF_CONFLICTS_WITH_INPUT_REGS | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; 1125 } 1126 constraints->hints[1] = IR_REG_NONE; 1127 constraints->hints[2] = IR_REG_RCX; 1128 constraints->hints_count = 3; 1129 constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_RCX, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1130 n = 1; 1131 break; 1132 case IR_MUL_INT: 1133 /* %rax - used as input and result */ 1134 constraints->def_reg = IR_REG_RAX; 1135 constraints->hints[1] = IR_REG_RAX; 1136 constraints->hints_count = 2; 1137 flags = IR_USE_SHOULD_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; 1138 constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_RDX, IR_USE_SUB_REF, IR_DEF_SUB_REF); 1139 constraints->tmp_regs[1] = IR_SCRATCH_REG(IR_REG_RAX, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); 1140 n = 2; 1141 break; 1142 case IR_DIV_INT: 1143 /* %rax - used as input and result */ 1144 constraints->def_reg = IR_REG_RAX; 1145 constraints->hints[1] = IR_REG_RAX; 1146 constraints->hints_count = 2; 1147 flags = IR_USE_SHOULD_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; 1148 constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_RDX, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1149 constraints->tmp_regs[1] = IR_SCRATCH_REG(IR_REG_RAX, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); 1150 n = 2; 1151 goto op2_const; 1152 case IR_MOD_INT: 1153 constraints->def_reg = IR_REG_RDX; 1154 constraints->hints[1] = IR_REG_RAX; 1155 constraints->hints_count = 2; 1156 flags = IR_USE_SHOULD_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; 1157 constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_RAX, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1158 constraints->tmp_regs[1] = IR_SCRATCH_REG(IR_REG_RDX, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); 1159 n = 2; 1160 goto op2_const; 1161 case IR_MIN_MAX_INT: 1162 flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_MUST_BE_IN_REG; 1163op2_const: 1164 insn = &ctx->ir_base[ref]; 1165 if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { 1166 constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1167 n++; 1168 } 1169 break; 1170 case IR_CMP_INT: 1171 case IR_TEST_INT: 1172 insn = &ctx->ir_base[ref]; 1173 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; 1174 if (IR_IS_CONST_REF(insn->op1)) { 1175 const ir_insn *val_insn = &ctx->ir_base[insn->op1]; 1176 constraints->tmp_regs[0] = IR_TMP_REG(1, val_insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1177 n = 1; 1178 } else if (ir_rule(ctx, insn->op1) & IR_FUSED) { 1179 flags = IR_USE_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG; 1180 } 1181 if (IR_IS_CONST_REF(insn->op2) && insn->op1 != insn->op2) { 1182 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; 1183 n = ir_add_const_tmp_reg(ctx, insn->op2, 2, n, constraints); 1184 } 1185 break; 1186 case IR_CMP_FP: 1187 insn = &ctx->ir_base[ref]; 1188 if (!(rule & IR_FUSED)) { 1189 constraints->tmp_regs[0] = IR_TMP_REG(3, IR_BOOL, IR_DEF_SUB_REF, IR_SAVE_SUB_REF); 1190 n = 1; 1191 } 1192 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; 1193 if (IR_IS_CONST_REF(insn->op1)) { 1194 const ir_insn *val_insn = &ctx->ir_base[insn->op1]; 1195 constraints->tmp_regs[n] = IR_TMP_REG(1, val_insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1196 n++; 1197 } 1198 break; 1199 case IR_BINOP_AVX: 1200 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; 1201 insn = &ctx->ir_base[ref]; 1202 if (IR_IS_CONST_REF(insn->op1)) { 1203 constraints->tmp_regs[0] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1204 n = 1; 1205 } 1206 break; 1207 case IR_VSTORE_INT: 1208 flags = IR_OP3_MUST_BE_IN_REG; 1209 insn = &ctx->ir_base[ref]; 1210 if (IR_IS_CONST_REF(insn->op3)) { 1211 n = ir_add_const_tmp_reg(ctx, insn->op3, 3, n, constraints); 1212 } 1213 break; 1214 case IR_STORE_INT: 1215 flags = IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; 1216 insn = &ctx->ir_base[ref]; 1217 if (IR_IS_CONST_REF(insn->op2)) { 1218 n = ir_add_const_tmp_reg(ctx, insn->op2, 2, n, constraints); 1219 } 1220 if (IR_IS_CONST_REF(insn->op3)) { 1221 n = ir_add_const_tmp_reg(ctx, insn->op3, 3, n, constraints); 1222 } 1223 break; 1224 case IR_VSTORE_FP: 1225 flags = IR_OP3_MUST_BE_IN_REG; 1226 insn = &ctx->ir_base[ref]; 1227 if (IR_IS_CONST_REF(insn->op3)) { 1228 insn = &ctx->ir_base[insn->op3]; 1229 constraints->tmp_regs[0] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1230 n = 1; 1231 } 1232 break; 1233 case IR_LOAD_FP: 1234 case IR_LOAD_INT: 1235 case IR_MEM_OP_INT: 1236 case IR_MEM_INC: 1237 case IR_MEM_DEC: 1238 case IR_MEM_MUL_PWR2: 1239 case IR_MEM_DIV_PWR2: 1240 case IR_MEM_MOD_PWR2: 1241 case IR_MEM_BINOP_INT: 1242 case IR_MEM_SHIFT: 1243 case IR_MEM_SHIFT_CONST: 1244 flags = IR_USE_MUST_BE_IN_REG | IR_OP2_MUST_BE_IN_REG; 1245 insn = &ctx->ir_base[ref]; 1246 if (IR_IS_CONST_REF(insn->op2)) { 1247 n = ir_add_const_tmp_reg(ctx, insn->op2, 2, n, constraints); 1248 } 1249 break; 1250 case IR_STORE_FP: 1251 flags = IR_OP2_MUST_BE_IN_REG | IR_OP3_MUST_BE_IN_REG; 1252 insn = &ctx->ir_base[ref]; 1253 if (IR_IS_CONST_REF(insn->op2)) { 1254 n = ir_add_const_tmp_reg(ctx, insn->op2, 2, n, constraints); 1255 } 1256 if (IR_IS_CONST_REF(insn->op3)) { 1257 insn = &ctx->ir_base[insn->op3]; 1258 constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1259 n++; 1260 } 1261 break; 1262 case IR_SWITCH: 1263 flags = IR_OP2_MUST_BE_IN_REG; 1264 insn = &ctx->ir_base[ref]; 1265 if (IR_IS_CONST_REF(insn->op2)) { 1266 insn = &ctx->ir_base[insn->op2]; 1267 constraints->tmp_regs[0] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1268 n = 1; 1269 } 1270 if (sizeof(void*) == 8) { 1271 constraints->tmp_regs[n] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1272 n++; 1273 } 1274 break; 1275 case IR_CALL: 1276 insn = &ctx->ir_base[ref]; 1277 if (IR_IS_TYPE_INT(insn->type)) { 1278 constraints->def_reg = IR_REG_INT_RET1; 1279#ifdef IR_REG_FP_RET1 1280 } else { 1281 constraints->def_reg = IR_REG_FP_RET1; 1282#endif 1283 } 1284 constraints->tmp_regs[0] = IR_SCRATCH_REG(IR_REG_SCRATCH, IR_USE_SUB_REF, IR_DEF_SUB_REF); 1285 n = 1; 1286 IR_FALLTHROUGH; 1287 case IR_TAILCALL: 1288 insn = &ctx->ir_base[ref]; 1289 if (insn->inputs_count > 2) { 1290 constraints->hints[2] = IR_REG_NONE; 1291 constraints->hints_count = ir_get_args_regs(ctx, insn, constraints->hints); 1292 if (!IR_IS_CONST_REF(insn->op2)) { 1293 constraints->tmp_regs[n] = IR_TMP_REG(1, IR_ADDR, IR_LOAD_SUB_REF, IR_USE_SUB_REF); 1294 n++; 1295 } 1296 } 1297 flags = IR_USE_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG | IR_OP3_SHOULD_BE_IN_REG; 1298 break; 1299 case IR_BINOP_SSE2: 1300 flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG | IR_OP2_SHOULD_BE_IN_REG; 1301 break; 1302 case IR_SHIFT_CONST: 1303 case IR_INC: 1304 case IR_DEC: 1305 case IR_MUL_PWR2: 1306 case IR_DIV_PWR2: 1307 case IR_OP_INT: 1308 case IR_OP_FP: 1309 flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; 1310 break; 1311 case IR_MOD_PWR2: 1312 flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; 1313 insn = &ctx->ir_base[ref]; 1314 if (ir_type_size[insn->type] == 8) { 1315 int64_t offset = ctx->ir_base[insn->op2].val.u64 - 1; 1316 if (!IR_IS_SIGNED_32BIT(offset)) { 1317 constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1318 n++; 1319 } 1320 } 1321 break; 1322 case IR_SMOD_PWR2: 1323 flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; 1324 insn = &ctx->ir_base[ref]; 1325 if (ir_type_size[insn->type] == 8) { 1326 int64_t offset = ctx->ir_base[insn->op2].val.u64 - 1; 1327 if (!IR_IS_SIGNED_32BIT(offset)) { 1328 constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1329 n++; 1330 } 1331 } 1332 constraints->tmp_regs[n] = IR_TMP_REG(3, insn->type, IR_USE_SUB_REF, IR_SAVE_SUB_REF); 1333 n++; 1334 break; 1335 case IR_SDIV_PWR2: 1336 flags = IR_DEF_CONFLICTS_WITH_INPUT_REGS | IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG; 1337 insn = &ctx->ir_base[ref]; 1338 if (ir_type_size[insn->type] == 8) { 1339 int64_t offset = ctx->ir_base[insn->op2].val.u64 - 1; 1340 if (!IR_IS_SIGNED_32BIT(offset)) { 1341 constraints->tmp_regs[n] = IR_TMP_REG(2, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1342 n++; 1343 } 1344 } 1345 break; 1346 case IR_BIT_COUNT: 1347 insn = &ctx->ir_base[ref]; 1348 if (ir_type_size[insn->type] == 1) { 1349 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG; 1350 } else { 1351 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; 1352 } 1353 if (IR_IS_CONST_REF(insn->op1)) { 1354 constraints->tmp_regs[0] = IR_TMP_REG(1, insn->type, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1355 n = 1; 1356 } 1357 break; 1358 case IR_CTPOP: 1359 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; 1360 insn = &ctx->ir_base[ref]; 1361 constraints->tmp_regs[0] = IR_TMP_REG(2, insn->type, IR_USE_SUB_REF, IR_SAVE_SUB_REF); 1362 n = 1; 1363 if (ir_type_size[insn->type] == 8) { 1364 constraints->tmp_regs[1] = IR_TMP_REG(3, insn->type, IR_USE_SUB_REF, IR_SAVE_SUB_REF); 1365 n = 2; 1366 } 1367 break; 1368 case IR_COPY_INT: 1369 case IR_COPY_FP: 1370 case IR_SEXT: 1371 case IR_ZEXT: 1372 case IR_TRUNC: 1373 case IR_BITCAST: 1374 case IR_PROTO: 1375 flags = IR_DEF_REUSES_OP1_REG | IR_USE_MUST_BE_IN_REG | IR_OP1_SHOULD_BE_IN_REG; 1376 break; 1377 case IR_ABS_INT: 1378 flags = IR_DEF_CONFLICTS_WITH_INPUT_REGS | IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG; 1379 break; 1380 case IR_PARAM: 1381 constraints->def_reg = ir_get_param_reg(ctx, ref); 1382 flags = 0; 1383 break; 1384 case IR_PI: 1385 case IR_PHI: 1386 flags = IR_USE_SHOULD_BE_IN_REG; 1387 break; 1388 case IR_RLOAD: 1389 constraints->def_reg = ctx->ir_base[ref].op2; 1390 flags = IR_USE_SHOULD_BE_IN_REG; 1391 break; 1392 case IR_EXITCALL: 1393 flags = IR_USE_MUST_BE_IN_REG; 1394 constraints->def_reg = IR_REG_INT_RET1; 1395 break; 1396 case IR_IF_INT: 1397 case IR_GUARD: 1398 case IR_GUARD_NOT: 1399 flags = IR_OP2_SHOULD_BE_IN_REG; 1400 break; 1401 case IR_IJMP: 1402 flags = IR_OP2_SHOULD_BE_IN_REG; 1403 break; 1404 case IR_RSTORE: 1405 flags = IR_OP3_SHOULD_BE_IN_REG; 1406 break; 1407 case IR_RETURN_INT: 1408 flags = IR_OP2_SHOULD_BE_IN_REG; 1409 constraints->hints[2] = IR_REG_INT_RET1; 1410 constraints->hints_count = 3; 1411 break; 1412 case IR_RETURN_FP: 1413#ifdef IR_REG_FP_RET1 1414 flags = IR_OP2_SHOULD_BE_IN_REG; 1415 constraints->hints[2] = IR_REG_FP_RET1; 1416 constraints->hints_count = 3; 1417#endif 1418 break; 1419 case IR_SNAPSHOT: 1420 flags = 0; 1421 break; 1422 case IR_VA_START: 1423 flags = IR_OP1_MUST_BE_IN_REG; 1424 constraints->tmp_regs[0] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_DEF_SUB_REF); 1425 n = 1; 1426 break; 1427 case IR_VA_ARG: 1428 flags = IR_USE_MUST_BE_IN_REG | IR_OP1_MUST_BE_IN_REG; 1429 constraints->tmp_regs[0] = IR_TMP_REG(3, IR_ADDR, IR_LOAD_SUB_REF, IR_SAVE_SUB_REF); 1430 n = 1; 1431 break; 1432 } 1433 constraints->tmps_count = n; 1434 1435 return flags; 1436} 1437 1438/* instruction selection */ 1439static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref); 1440static bool ir_match_try_fuse_load(ir_ctx *ctx, ir_ref ref, ir_ref root); 1441 1442static void ir_swap_ops(ir_insn *insn) 1443{ 1444 ir_ref tmp = insn->op1; 1445 insn->op1 = insn->op2; 1446 insn->op2 = tmp; 1447} 1448 1449static bool ir_match_try_revert_lea_to_add(ir_ctx *ctx, ir_ref ref) 1450{ 1451 ir_insn *insn = &ctx->ir_base[ref]; 1452 1453 /* TODO: This optimization makes sense only if the other operand is killed */ 1454 if (insn->op1 == insn->op2) { 1455 /* pass */ 1456 } else if (ir_match_try_fuse_load(ctx, insn->op2, ref)) { 1457 ctx->rules[ref] = IR_BINOP_INT; 1458 return 1; 1459 } else if (ir_match_try_fuse_load(ctx, insn->op1, ref)) { 1460 /* swap for better load fusion */ 1461 ir_swap_ops(insn); 1462 ctx->rules[ref] = IR_BINOP_INT; 1463 return 1; 1464 } 1465 return 0; 1466} 1467 1468static void ir_match_fuse_addr(ir_ctx *ctx, ir_ref addr_ref) 1469{ 1470 if (!IR_IS_CONST_REF(addr_ref)) { 1471 uint32_t rule = ctx->rules[addr_ref]; 1472 1473 if (!rule) { 1474 ctx->rules[addr_ref] = rule = ir_match_insn(ctx, addr_ref); 1475 } 1476 if (rule >= IR_LEA_OB && rule <= IR_LEA_SI_B) { 1477 ir_use_list *use_list; 1478 ir_ref j; 1479 1480 if (rule == IR_LEA_IB && ir_match_try_revert_lea_to_add(ctx, addr_ref)) { 1481 return; 1482 } 1483 1484 use_list = &ctx->use_lists[addr_ref]; 1485 j = use_list->count; 1486 if (j > 1) { 1487 /* check if address is used only in LOAD and STORE */ 1488 ir_ref *p = &ctx->use_edges[use_list->refs]; 1489 1490 do { 1491 ir_insn *insn = &ctx->ir_base[*p]; 1492 if (insn->op != IR_LOAD && (insn->op != IR_STORE || insn->op3 == addr_ref)) { 1493 return; 1494 } 1495 p++; 1496 } while (--j); 1497 } 1498 ctx->rules[addr_ref] = IR_FUSED | IR_SIMPLE | rule; 1499 } 1500 } 1501} 1502 1503/* A naive check if there is a STORE or CALL between this LOAD and the fusion root */ 1504static bool ir_match_has_mem_deps(ir_ctx *ctx, ir_ref ref, ir_ref root) 1505{ 1506 if (ref + 1 != root) { 1507 ir_ref pos = ctx->prev_ref[root]; 1508 1509 do { 1510 ir_insn *insn = &ctx->ir_base[pos]; 1511 1512 if (insn->op == IR_STORE) { 1513 // TODO: check if LOAD and STORE addresses may alias 1514 return 1; 1515 } else if (insn->op == IR_CALL) { 1516 return 1; 1517 } 1518 pos = ctx->prev_ref[pos]; 1519 } while (ref != pos); 1520 } 1521 return 0; 1522} 1523 1524static void ir_match_fuse_load(ir_ctx *ctx, ir_ref ref, ir_ref root) 1525{ 1526 if (ir_in_same_block(ctx, ref) 1527 && ctx->ir_base[ref].op == IR_LOAD) { 1528 if (ctx->use_lists[ref].count == 2 1529 && !ir_match_has_mem_deps(ctx, ref, root)) { 1530 ir_ref addr_ref = ctx->ir_base[ref].op2; 1531 ir_insn *addr_insn = &ctx->ir_base[addr_ref]; 1532 1533 if (IR_IS_CONST_REF(addr_ref)) { 1534 if (ir_may_fuse_addr(ctx, addr_insn)) { 1535 ctx->rules[ref] = IR_FUSED | IR_SIMPLE | IR_LOAD; 1536 return; 1537 } 1538 } else { 1539 ctx->rules[ref] = IR_FUSED | IR_SIMPLE | IR_LOAD; 1540 ir_match_fuse_addr(ctx, addr_ref); 1541 return; 1542 } 1543 } 1544 } 1545} 1546 1547static bool ir_match_try_fuse_load(ir_ctx *ctx, ir_ref ref, ir_ref root) 1548{ 1549 ir_insn *insn = &ctx->ir_base[ref]; 1550 1551 if (ir_in_same_block(ctx, ref) 1552 && insn->op == IR_LOAD) { 1553 if (ctx->use_lists[ref].count == 2 1554 && !ir_match_has_mem_deps(ctx, ref, root)) { 1555 ir_ref addr_ref = ctx->ir_base[ref].op2; 1556 ir_insn *addr_insn = &ctx->ir_base[addr_ref]; 1557 1558 if (IR_IS_CONST_REF(addr_ref)) { 1559 if (ir_may_fuse_addr(ctx, addr_insn)) { 1560 ctx->rules[ref] = IR_FUSED | IR_SIMPLE | IR_LOAD; 1561 return 1; 1562 } 1563 } else { 1564 ctx->rules[ref] = IR_FUSED | IR_SIMPLE | IR_LOAD; 1565 ir_match_fuse_addr(ctx, addr_ref); 1566 return 1; 1567 } 1568 } 1569 } else if (insn->op == IR_PARAM) { 1570 if (ctx->use_lists[ref].count == 1 1571 && ir_get_param_reg(ctx, ref) == IR_REG_NONE) { 1572 return 1; 1573 } 1574 } else if (ctx->ir_base[ref].op == IR_VLOAD) { 1575 return 1; 1576 } 1577 return 0; 1578} 1579 1580static void ir_match_fuse_load_commutative_int(ir_ctx *ctx, ir_insn *insn, ir_ref root) 1581{ 1582 if (IR_IS_CONST_REF(insn->op2) 1583 && ir_may_fuse_imm(ctx, &ctx->ir_base[insn->op2])) { 1584 return; 1585 } else if (ir_match_try_fuse_load(ctx, insn->op2, root)) { 1586 return; 1587 } else if (ir_match_try_fuse_load(ctx, insn->op1, root)) { 1588 ir_swap_ops(insn); 1589 } 1590} 1591 1592static void ir_match_fuse_load_commutative_fp(ir_ctx *ctx, ir_insn *insn, ir_ref root) 1593{ 1594 if (!IR_IS_CONST_REF(insn->op2) 1595 && !ir_match_try_fuse_load(ctx, insn->op2, root) 1596 && (IR_IS_CONST_REF(insn->op1) || ir_match_try_fuse_load(ctx, insn->op1, root))) { 1597 ir_swap_ops(insn); 1598 } 1599} 1600 1601static void ir_match_fuse_load_cmp_int(ir_ctx *ctx, ir_insn *insn, ir_ref root) 1602{ 1603 if (IR_IS_CONST_REF(insn->op2) 1604 && ir_may_fuse_imm(ctx, &ctx->ir_base[insn->op2])) { 1605 ir_match_fuse_load(ctx, insn->op1, root); 1606 } else if (!ir_match_try_fuse_load(ctx, insn->op2, root) 1607 && ir_match_try_fuse_load(ctx, insn->op1, root)) { 1608 ir_swap_ops(insn); 1609 if (insn->op != IR_EQ && insn->op != IR_NE) { 1610 insn->op ^= 3; 1611 } 1612 } 1613} 1614 1615static void ir_match_fuse_load_test_int(ir_ctx *ctx, ir_insn *insn, ir_ref root) 1616{ 1617 if (IR_IS_CONST_REF(insn->op2) 1618 && ir_may_fuse_imm(ctx, &ctx->ir_base[insn->op2])) { 1619 ir_match_fuse_load(ctx, insn->op1, root); 1620 } else if (!ir_match_try_fuse_load(ctx, insn->op2, root) 1621 && ir_match_try_fuse_load(ctx, insn->op1, root)) { 1622 ir_swap_ops(insn); 1623 } 1624} 1625 1626static void ir_match_fuse_load_cmp_fp(ir_ctx *ctx, ir_insn *insn, ir_ref root) 1627{ 1628 if (insn->op != IR_EQ && insn->op != IR_NE) { 1629 if (insn->op == IR_LT || insn->op == IR_LE) { 1630 /* swap operands to avoid P flag check */ 1631 ir_swap_ops(insn); 1632 insn->op ^= 3; 1633 } 1634 ir_match_fuse_load(ctx, insn->op2, root); 1635 } else if (IR_IS_CONST_REF(insn->op2) && !IR_IS_FP_ZERO(ctx->ir_base[insn->op2])) { 1636 /* pass */ 1637 } else if (ir_match_try_fuse_load(ctx, insn->op2, root)) { 1638 /* pass */ 1639 } else if ((IR_IS_CONST_REF(insn->op1) && !IR_IS_FP_ZERO(ctx->ir_base[insn->op1])) || ir_match_try_fuse_load(ctx, insn->op1, root)) { 1640 ir_swap_ops(insn); 1641 if (insn->op != IR_EQ && insn->op != IR_NE) { 1642 insn->op ^= 3; 1643 } 1644 } 1645} 1646 1647static void ir_match_fuse_load_cmp_fp_br(ir_ctx *ctx, ir_insn *insn, ir_ref root, bool direct) 1648{ 1649 if (direct) { 1650 if (insn->op == IR_LT || insn->op == IR_LE) { 1651 /* swap operands to avoid P flag check */ 1652 ir_swap_ops(insn); 1653 insn->op ^= 3; 1654 } 1655 } else { 1656 if (insn->op == IR_GT || insn->op == IR_GE) { 1657 /* swap operands to avoid P flag check */ 1658 ir_swap_ops(insn); 1659 insn->op ^= 3; 1660 } 1661 } 1662 if (IR_IS_CONST_REF(insn->op2) && !IR_IS_FP_ZERO(ctx->ir_base[insn->op2])) { 1663 /* pass */ 1664 } else if (ir_match_try_fuse_load(ctx, insn->op2, root)) { 1665 /* pass */ 1666 } else if ((IR_IS_CONST_REF(insn->op1) && !IR_IS_FP_ZERO(ctx->ir_base[insn->op1])) || ir_match_try_fuse_load(ctx, insn->op1, root)) { 1667 ir_swap_ops(insn); 1668 if (insn->op != IR_EQ && insn->op != IR_NE) { 1669 insn->op ^= 3; 1670 } 1671 } 1672} 1673 1674static uint32_t ir_match_insn(ir_ctx *ctx, ir_ref ref) 1675{ 1676 ir_insn *op2_insn; 1677 ir_insn *insn = &ctx->ir_base[ref]; 1678 uint32_t store_rule; 1679 ir_op load_op; 1680 1681 switch (insn->op) { 1682 case IR_EQ: 1683 case IR_NE: 1684 case IR_LT: 1685 case IR_GE: 1686 case IR_LE: 1687 case IR_GT: 1688 case IR_ULT: 1689 case IR_UGE: 1690 case IR_ULE: 1691 case IR_UGT: 1692 if (IR_IS_TYPE_INT(ctx->ir_base[insn->op1].type)) { 1693 if (IR_IS_CONST_REF(insn->op2) 1694 && !IR_IS_SYM_CONST(ctx->ir_base[insn->op2].op) 1695 && ctx->ir_base[insn->op2].val.i64 == 0 1696 && insn->op1 == ref - 1) { /* previous instruction */ 1697 ir_insn *op1_insn = &ctx->ir_base[insn->op1]; 1698 1699 if (op1_insn->op == IR_AND && ctx->use_lists[insn->op1].count == 1) { 1700 /* v = AND(_, _); CMP(v, 0) => SKIP_TEST; TEST */ 1701 ir_match_fuse_load_test_int(ctx, op1_insn, ref); 1702 ctx->rules[insn->op1] = IR_FUSED | IR_TEST_INT; 1703 return IR_TESTCC_INT; 1704 } else if ((op1_insn->op == IR_OR || op1_insn->op == IR_AND || op1_insn->op == IR_XOR) || 1705 /* GT(ADD(_, _), 0) can't be optimized because ADD may overflow */ 1706 ((op1_insn->op == IR_ADD || op1_insn->op == IR_SUB) && 1707 (insn->op == IR_EQ || insn->op == IR_NE))) { 1708 /* v = BINOP(_, _); CMP(v, 0) => BINOP; SETCC */ 1709 if (ir_op_flags[op1_insn->op] & IR_OP_FLAG_COMMUTATIVE) { 1710 ir_match_fuse_load_commutative_int(ctx, op1_insn, ref); 1711 } else { 1712 ir_match_fuse_load(ctx, op1_insn->op2, ref); 1713 } 1714 ctx->rules[insn->op1] = IR_BINOP_INT; 1715 return IR_SETCC_INT; 1716 } 1717 } 1718 ir_match_fuse_load_cmp_int(ctx, insn, ref); 1719 return IR_CMP_INT; 1720 } else { 1721 ir_match_fuse_load_cmp_fp(ctx, insn, ref); 1722 return IR_CMP_FP; 1723 } 1724 break; 1725 case IR_ADD: 1726 case IR_SUB: 1727 if (IR_IS_TYPE_INT(insn->type)) { 1728 if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { 1729 op2_insn = &ctx->ir_base[insn->op2]; 1730 if (IR_IS_CONST_REF(insn->op1)) { 1731 // const 1732 // TODO: add support for sym+offset ??? 1733 } else if (IR_IS_SYM_CONST(op2_insn->op)) { 1734 if (insn->op == IR_ADD && ir_may_fuse_addr(ctx, op2_insn)) { 1735 goto lea; 1736 } 1737 /* pass */ 1738 } else if (op2_insn->val.i64 == 0) { 1739 return IR_COPY_INT; 1740 } else if ((ir_type_size[insn->type] >= 4 && insn->op == IR_ADD && IR_IS_SIGNED_32BIT(op2_insn->val.i64)) || 1741 (ir_type_size[insn->type] >= 4 && insn->op == IR_SUB && IR_IS_SIGNED_NEG_32BIT(op2_insn->val.i64))) { 1742lea: 1743 if (ir_in_same_block(ctx, insn->op1) && ctx->use_lists[insn->op1].count == 1) { 1744 uint32_t rule = ctx->rules[insn->op1]; 1745 1746 if (!rule) { 1747 ctx->rules[insn->op1] = rule = ir_match_insn(ctx, insn->op1); 1748 } 1749 if (rule == IR_LEA_SI) { 1750 /* z = MUL(Y, 2|4|8) ... ADD(z, imm32) => SKIP ... LEA [Y*2|4|8+im32] */ 1751 ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_SI; 1752 return IR_LEA_SI_O; 1753 } else if (rule == IR_LEA_SIB) { 1754 /* z = ADD(X, MUL(Y, 2|4|8)) ... ADD(z, imm32) => SKIP ... LEA [X+Y*2|4|8+im32] */ 1755 ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_SIB; 1756 return IR_LEA_SIB_O; 1757 } else if (rule == IR_LEA_IB) { 1758 /* z = ADD(X, Y) ... ADD(z, imm32) => SKIP ... LEA [X+Y+im32] */ 1759 ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_IB; 1760 return IR_LEA_IB_O; 1761 } 1762 } 1763 /* ADD(X, imm32) => LEA [X+imm32] */ 1764 return IR_LEA_OB; 1765 } else if (op2_insn->val.i64 == 1 || op2_insn->val.i64 == -1) { 1766 if (insn->op == IR_ADD) { 1767 if (op2_insn->val.i64 == 1) { 1768 /* ADD(_, 1) => INC */ 1769 return IR_INC; 1770 } else { 1771 /* ADD(_, -1) => DEC */ 1772 return IR_DEC; 1773 } 1774 } else { 1775 if (op2_insn->val.i64 == 1) { 1776 /* SUB(_, 1) => DEC */ 1777 return IR_DEC; 1778 } else { 1779 /* SUB(_, -1) => INC */ 1780 return IR_INC; 1781 } 1782 } 1783 } 1784 } else if ((ctx->flags & IR_OPT_CODEGEN) && insn->op == IR_ADD && ir_type_size[insn->type] >= 4) { 1785 if (insn->op1 != insn->op2) { 1786 if (ir_in_same_block(ctx, insn->op1) && ctx->use_lists[insn->op1].count == 1) { 1787 uint32_t rule =ctx->rules[insn->op1]; 1788 if (!rule) { 1789 ctx->rules[insn->op1] = rule = ir_match_insn(ctx, insn->op1); 1790 } 1791 if (rule == IR_LEA_OB) { 1792 ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_OB; 1793 if (ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) { 1794 rule = ctx->rules[insn->op2]; 1795 if (!rule) { 1796 ctx->rules[insn->op2] = rule = ir_match_insn(ctx, insn->op2); 1797 } 1798 if (rule == IR_LEA_SI) { 1799 /* x = ADD(X, imm32) ... y = MUL(Y, 2|4|8) ... ADD(x, y) => SKIP ... SKIP ... LEA */ 1800 ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_LEA_SI; 1801 return IR_LEA_OB_SI; 1802 } 1803 } 1804 /* x = ADD(X, imm32) ... ADD(x, Y) => SKIP ... LEA */ 1805 return IR_LEA_OB_I; 1806 } else if (rule == IR_LEA_SI) { 1807 ctx->rules[insn->op1] = IR_FUSED | IR_SIMPLE | IR_LEA_SI; 1808 if (ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) { 1809 rule = ctx->rules[insn->op2]; 1810 if (!rule) { 1811 ctx->rules[insn->op2] = rule = ir_match_insn(ctx, insn->op2); 1812 } 1813 if (rule == IR_LEA_OB) { 1814 /* x = ADD(X, imm32) ... y = MUL(Y, 2|4|8) ... ADD(y, x) => SKIP ... SKIP ... LEA */ 1815 ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_LEA_OB; 1816 return IR_LEA_SI_OB; 1817 } 1818 } 1819 /* x = MUL(X, 2|4|8) ... ADD(x, Y) => SKIP ... LEA */ 1820 return IR_LEA_SI_B; 1821 } 1822 } 1823 if (ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) { 1824 uint32_t rule = ctx->rules[insn->op2]; 1825 if (!rule) { 1826 ctx->rules[insn->op2] = rule = ir_match_insn(ctx, insn->op2); 1827 } 1828 if (rule == IR_LEA_OB) { 1829 ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_LEA_OB; 1830 /* x = ADD(X, imm32) ... ADD(Y, x) => SKIP ... LEA */ 1831 return IR_LEA_I_OB; 1832 } else if (rule == IR_LEA_SI) { 1833 ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_LEA_SI; 1834 /* x = MUL(X, 2|4|8) ... ADD(Y, x) => SKIP ... LEA */ 1835 return IR_LEA_B_SI; 1836 } 1837 } 1838 } 1839 /* ADD(X, Y) => LEA [X + Y] */ 1840 return IR_LEA_IB; 1841 } 1842binop_int: 1843 if (ir_op_flags[insn->op] & IR_OP_FLAG_COMMUTATIVE) { 1844 ir_match_fuse_load_commutative_int(ctx, insn, ref); 1845 } else { 1846 ir_match_fuse_load(ctx, insn->op2, ref); 1847 } 1848 return IR_BINOP_INT; 1849 } else { 1850binop_fp: 1851 if (ir_op_flags[insn->op] & IR_OP_FLAG_COMMUTATIVE) { 1852 ir_match_fuse_load_commutative_fp(ctx, insn, ref); 1853 } else { 1854 ir_match_fuse_load(ctx, insn->op2, ref); 1855 } 1856 if (ctx->mflags & IR_X86_AVX) { 1857 return IR_BINOP_AVX; 1858 } else { 1859 return IR_BINOP_SSE2; 1860 } 1861 } 1862 break; 1863 case IR_MUL: 1864 if (IR_IS_TYPE_INT(insn->type)) { 1865 if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { 1866 op2_insn = &ctx->ir_base[insn->op2]; 1867 if (IR_IS_SYM_CONST(op2_insn->op)) { 1868 /* pass */ 1869 } else if (IR_IS_CONST_REF(insn->op1)) { 1870 // const 1871 } else if (op2_insn->val.u64 == 0) { 1872 // 0 1873 } else if (op2_insn->val.u64 == 1) { 1874 return IR_COPY_INT; 1875 } else if (ir_type_size[insn->type] >= 4 && 1876 (op2_insn->val.u64 == 2 || op2_insn->val.u64 == 4 || op2_insn->val.u64 == 8)) { 1877 /* MUL(X, 2|4|8) => LEA [X*2|4|8] */ 1878 return IR_LEA_SI; 1879 } else if (ir_type_size[insn->type] >= 4 && 1880 (op2_insn->val.u64 == 3 || op2_insn->val.u64 == 5 || op2_insn->val.u64 == 9)) { 1881 /* MUL(X, 3|5|9) => LEA [X+X*2|4|8] */ 1882 return IR_LEA_SIB; 1883 } else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) { 1884 /* MUL(X, PWR2) => SHL */ 1885 return IR_MUL_PWR2; 1886 } else if (IR_IS_TYPE_SIGNED(insn->type) 1887 && ir_type_size[insn->type] != 1 1888 && IR_IS_SIGNED_32BIT(op2_insn->val.i64) 1889 && !IR_IS_CONST_REF(insn->op1)) { 1890 /* MUL(_, imm32) => IMUL */ 1891 ir_match_fuse_load(ctx, insn->op1, ref); 1892 return IR_IMUL3; 1893 } 1894 } 1895 /* Prefer IMUL over MUL because it's more flexible and uses less registers ??? */ 1896// if (IR_IS_TYPE_SIGNED(insn->type) && ir_type_size[insn->type] != 1) { 1897 if (ir_type_size[insn->type] != 1) { 1898 goto binop_int; 1899 } 1900 ir_match_fuse_load(ctx, insn->op2, ref); 1901 return IR_MUL_INT; 1902 } else { 1903 goto binop_fp; 1904 } 1905 break; 1906 case IR_ADD_OV: 1907 case IR_SUB_OV: 1908 IR_ASSERT(IR_IS_TYPE_INT(insn->type)); 1909 goto binop_int; 1910 case IR_MUL_OV: 1911 IR_ASSERT(IR_IS_TYPE_INT(insn->type)); 1912 if (IR_IS_TYPE_SIGNED(insn->type) && ir_type_size[insn->type] != 1) { 1913 if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { 1914 op2_insn = &ctx->ir_base[insn->op2]; 1915 if (!IR_IS_SYM_CONST(op2_insn->op) 1916 && IR_IS_SIGNED_32BIT(op2_insn->val.i64) 1917 && !IR_IS_CONST_REF(insn->op1)) { 1918 /* MUL(_, imm32) => IMUL */ 1919 ir_match_fuse_load(ctx, insn->op1, ref); 1920 return IR_IMUL3; 1921 } 1922 } 1923 goto binop_int; 1924 } 1925 ir_match_fuse_load(ctx, insn->op2, ref); 1926 return IR_MUL_INT; 1927 case IR_DIV: 1928 if (IR_IS_TYPE_INT(insn->type)) { 1929 if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { 1930 op2_insn = &ctx->ir_base[insn->op2]; 1931 if (IR_IS_SYM_CONST(op2_insn->op)) { 1932 /* pass */ 1933 } else if (IR_IS_CONST_REF(insn->op1)) { 1934 // const 1935 } else if (op2_insn->val.u64 == 1) { 1936 return IR_COPY_INT; 1937 } else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) { 1938 /* DIV(X, PWR2) => SHR */ 1939 if (IR_IS_TYPE_UNSIGNED(insn->type)) { 1940 return IR_DIV_PWR2; 1941 } else { 1942 return IR_SDIV_PWR2; 1943 } 1944 } 1945 } 1946 ir_match_fuse_load(ctx, insn->op2, ref); 1947 return IR_DIV_INT; 1948 } else { 1949 goto binop_fp; 1950 } 1951 break; 1952 case IR_MOD: 1953 if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { 1954 op2_insn = &ctx->ir_base[insn->op2]; 1955 if (IR_IS_SYM_CONST(op2_insn->op)) { 1956 /* pass */ 1957 } else if (IR_IS_CONST_REF(insn->op1)) { 1958 // const 1959 } else if (IR_IS_POWER_OF_TWO(op2_insn->val.u64)) { 1960 /* MOD(X, PWR2) => AND */ 1961 if (IR_IS_TYPE_UNSIGNED(insn->type)) { 1962 return IR_MOD_PWR2; 1963 } else { 1964 return IR_SMOD_PWR2; 1965 } 1966 } 1967 } 1968 ir_match_fuse_load(ctx, insn->op2, ref); 1969 return IR_MOD_INT; 1970 case IR_BSWAP: 1971 case IR_NOT: 1972 if (insn->type == IR_BOOL) { 1973 IR_ASSERT(IR_IS_TYPE_INT(ctx->ir_base[insn->op1].type)); // TODO: IR_BOOL_NOT_FP 1974 return IR_BOOL_NOT_INT; 1975 } else { 1976 IR_ASSERT(IR_IS_TYPE_INT(insn->type)); 1977 return IR_OP_INT; 1978 } 1979 break; 1980 case IR_NEG: 1981 if (IR_IS_TYPE_INT(insn->type)) { 1982 return IR_OP_INT; 1983 } else { 1984 return IR_OP_FP; 1985 } 1986 case IR_ABS: 1987 if (IR_IS_TYPE_INT(insn->type)) { 1988 return IR_ABS_INT; // movl %edi, %eax; negl %eax; cmovs %edi, %eax 1989 } else { 1990 return IR_OP_FP; 1991 } 1992 case IR_OR: 1993 if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { 1994 op2_insn = &ctx->ir_base[insn->op2]; 1995 if (IR_IS_SYM_CONST(op2_insn->op)) { 1996 /* pass */ 1997 } else if (IR_IS_CONST_REF(insn->op1)) { 1998 // const 1999 } else if (op2_insn->val.i64 == 0) { 2000 return IR_COPY_INT; 2001 } else if (op2_insn->val.i64 == -1) { 2002 // -1 2003 } 2004 } 2005 goto binop_int; 2006 case IR_AND: 2007 if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { 2008 op2_insn = &ctx->ir_base[insn->op2]; 2009 if (IR_IS_SYM_CONST(op2_insn->op)) { 2010 /* pass */ 2011 } else if (IR_IS_CONST_REF(insn->op1)) { 2012 // const 2013 } else if (op2_insn->val.i64 == 0) { 2014 // 0 2015 } else if (op2_insn->val.i64 == -1) { 2016 return IR_COPY_INT; 2017 } 2018 } 2019 goto binop_int; 2020 case IR_XOR: 2021 if ((ctx->flags & IR_OPT_CODEGEN) && IR_IS_CONST_REF(insn->op2)) { 2022 op2_insn = &ctx->ir_base[insn->op2]; 2023 if (IR_IS_SYM_CONST(op2_insn->op)) { 2024 /* pass */ 2025 } else if (IR_IS_CONST_REF(insn->op1)) { 2026 // const 2027 } 2028 } 2029 goto binop_int; 2030 case IR_SHL: 2031 if (IR_IS_CONST_REF(insn->op2)) { 2032 if (ctx->flags & IR_OPT_CODEGEN) { 2033 op2_insn = &ctx->ir_base[insn->op2]; 2034 if (IR_IS_SYM_CONST(op2_insn->op)) { 2035 /* pass */ 2036 } else if (IR_IS_CONST_REF(insn->op1)) { 2037 // const 2038 } else if (op2_insn->val.u64 == 0) { 2039 return IR_COPY_INT; 2040 } else if (ir_type_size[insn->type] >= 4) { 2041 if (op2_insn->val.u64 == 1) { 2042 // lea [op1*2] 2043 } else if (op2_insn->val.u64 == 2) { 2044 // lea [op1*4] 2045 } else if (op2_insn->val.u64 == 3) { 2046 // lea [op1*8] 2047 } 2048 } 2049 } 2050 return IR_SHIFT_CONST; 2051 } 2052 return IR_SHIFT; 2053 case IR_SHR: 2054 case IR_SAR: 2055 case IR_ROL: 2056 case IR_ROR: 2057 if (IR_IS_CONST_REF(insn->op2)) { 2058 if (ctx->flags & IR_OPT_CODEGEN) { 2059 op2_insn = &ctx->ir_base[insn->op2]; 2060 if (IR_IS_SYM_CONST(op2_insn->op)) { 2061 /* pass */ 2062 } else if (IR_IS_CONST_REF(insn->op1)) { 2063 // const 2064 } else if (op2_insn->val.u64 == 0) { 2065 return IR_COPY_INT; 2066 } 2067 } 2068 return IR_SHIFT_CONST; 2069 } 2070 return IR_SHIFT; 2071 case IR_MIN: 2072 case IR_MAX: 2073 if (IR_IS_TYPE_INT(insn->type)) { 2074 return IR_MIN_MAX_INT; 2075 } else { 2076 goto binop_fp; 2077 } 2078 break; 2079// case IR_COND: 2080 case IR_COPY: 2081 if (IR_IS_TYPE_INT(insn->type)) { 2082 return IR_COPY_INT; 2083 } else { 2084 return IR_COPY_FP; 2085 } 2086 break; 2087 case IR_CALL: 2088 ctx->flags2 |= IR_HAS_CALLS; 2089#ifndef IR_REG_FP_RET1 2090 if (IR_IS_TYPE_FP(insn->type)) { 2091 ctx->flags2 |= IR_HAS_FP_RET_SLOT; 2092 } 2093#endif 2094 IR_FALLTHROUGH; 2095 case IR_TAILCALL: 2096 case IR_IJMP: 2097 ir_match_fuse_load(ctx, insn->op2, ref); 2098 return insn->op; 2099 case IR_VAR: 2100 return IR_SKIPPED | IR_VAR; 2101 case IR_PARAM: 2102 return ctx->use_lists[ref].count > 0 ? IR_PARAM : IR_SKIPPED | IR_PARAM; 2103 case IR_ALLOCA: 2104 /* alloca() may be used only in functions */ 2105 if (ctx->flags & IR_FUNCTION) { 2106 ctx->flags |= IR_USE_FRAME_POINTER; 2107 ctx->flags2 |= IR_HAS_ALLOCA; 2108 } 2109 return IR_ALLOCA; 2110 case IR_VSTORE: 2111 if (IR_IS_TYPE_INT(ctx->ir_base[insn->op3].type)) { 2112 store_rule = IR_VSTORE_INT; 2113 load_op = IR_VLOAD; 2114store_int: 2115 if ((ctx->flags & IR_OPT_CODEGEN) 2116 && ir_in_same_block(ctx, insn->op3) 2117 && (ctx->use_lists[insn->op3].count == 1 || 2118 (ctx->use_lists[insn->op3].count == 2 2119 && (ctx->ir_base[insn->op3].op == IR_ADD_OV || 2120 ctx->ir_base[insn->op3].op == IR_SUB_OV)))) { 2121 ir_insn *op_insn = &ctx->ir_base[insn->op3]; 2122 uint32_t rule = ctx->rules[insn->op3]; 2123 2124 if (!rule) { 2125 ctx->rules[insn->op3] = rule = ir_match_insn(ctx, insn->op3); 2126 } 2127 if ((rule == IR_BINOP_INT && op_insn->op != IR_MUL) || rule == IR_LEA_OB || rule == IR_LEA_IB) { 2128 if (insn->op1 == op_insn->op1 2129 && ctx->ir_base[op_insn->op1].op == load_op 2130 && ctx->ir_base[op_insn->op1].op2 == insn->op2 2131 && ctx->use_lists[op_insn->op1].count == 2) { 2132 /* l = LOAD(_, a) ... v = BINOP(l, _) ... STORE(l, a, v) => SKIP ... SKIP_MEM_BINOP ... MEM_BINOP */ 2133 ctx->rules[insn->op3] = IR_FUSED | IR_BINOP_INT; 2134 ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; 2135 if (!IR_IS_CONST_REF(op_insn->op2) 2136 && ctx->rules[op_insn->op2] == (IR_FUSED|IR_SIMPLE|IR_LOAD)) { 2137 ctx->rules[op_insn->op2] = IR_LOAD_INT; 2138 } 2139 return IR_MEM_BINOP_INT; 2140 } else if ((ir_op_flags[op_insn->op] & IR_OP_FLAG_COMMUTATIVE) 2141 && insn->op1 == op_insn->op2 2142 && ctx->ir_base[op_insn->op2].op == load_op 2143 && ctx->ir_base[op_insn->op2].op2 == insn->op2 2144 && ctx->use_lists[op_insn->op2].count == 2) { 2145 /* l = LOAD(_, a) ... v = BINOP(_, l) ... STORE(l, a, v) => SKIP ... SKIP_MEM_BINOP ... MEM_BINOP */ 2146 ir_swap_ops(op_insn); 2147 ctx->rules[insn->op3] = IR_FUSED | IR_BINOP_INT; 2148 ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; 2149 return IR_MEM_BINOP_INT; 2150 } 2151 } else if (rule == IR_INC) { 2152 if (insn->op1 == op_insn->op1 2153 && ctx->ir_base[op_insn->op1].op == load_op 2154 && ctx->ir_base[op_insn->op1].op2 == insn->op2 2155 && ctx->use_lists[op_insn->op1].count == 2) { 2156 /* l = LOAD(_, a) ... v = INC(l) ... STORE(l, a, v) => SKIP ... SKIP ... MEM_INC */ 2157 ctx->rules[insn->op3] = IR_SKIPPED | IR_INC; 2158 ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; 2159 return IR_MEM_INC; 2160 } 2161 } else if (rule == IR_DEC) { 2162 if (insn->op1 == op_insn->op1 2163 && ctx->ir_base[op_insn->op1].op == load_op 2164 && ctx->ir_base[op_insn->op1].op2 == insn->op2 2165 && ctx->use_lists[op_insn->op1].count == 2){ 2166 /* l = LOAD(_, a) ... v = DEC(l) ... STORE(l, a, v) => SKIP ... SKIP ... MEM_DEC */ 2167 ctx->rules[insn->op3] = IR_SKIPPED | IR_DEC; 2168 ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; 2169 return IR_MEM_DEC; 2170 } 2171 } else if (rule == IR_MUL_PWR2) { 2172 if (insn->op1 == op_insn->op1 2173 && ctx->ir_base[op_insn->op1].op == load_op 2174 && ctx->ir_base[op_insn->op1].op2 == insn->op2 2175 && ctx->use_lists[op_insn->op1].count == 2) { 2176 /* l = LOAD(_, a) ... v = MUL_PWR2(l) ... STORE(l, a, v) => SKIP ... SKIP ... MEM_MUL_PWR2 */ 2177 ctx->rules[insn->op3] = IR_SKIPPED | IR_MUL_PWR2; 2178 ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; 2179 return IR_MEM_MUL_PWR2; 2180 } 2181 } else if (rule == IR_DIV_PWR2) { 2182 if (insn->op1 == op_insn->op1 2183 && ctx->ir_base[op_insn->op1].op == load_op 2184 && ctx->ir_base[op_insn->op1].op2 == insn->op2 2185 && ctx->use_lists[op_insn->op1].count == 2) { 2186 /* l = LOAD(_, a) ... v = DIV_PWR2(l) ... STORE(l, a, v) => SKIP ... SKIP ... MEM_DIV_PWR2 */ 2187 ctx->rules[insn->op3] = IR_SKIPPED | IR_DIV_PWR2; 2188 ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; 2189 return IR_MEM_DIV_PWR2; 2190 } 2191 } else if (rule == IR_MOD_PWR2) { 2192 if (insn->op1 == op_insn->op1 2193 && ctx->ir_base[op_insn->op1].op == load_op 2194 && ctx->ir_base[op_insn->op1].op2 == insn->op2 2195 && ctx->use_lists[op_insn->op1].count == 2) { 2196 /* l = LOAD(_, a) ... v = MOD_PWR2(l) ... STORE(l, a, v) => SKIP ... SKIP ... MEM_MOD_PWR2 */ 2197 ctx->rules[insn->op3] = IR_SKIPPED | IR_MOD_PWR2; 2198 ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; 2199 return IR_MEM_MOD_PWR2; 2200 } 2201 } else if (rule == IR_SHIFT) { 2202 if (insn->op1 == op_insn->op1 2203 && ctx->ir_base[op_insn->op1].op == load_op 2204 && ctx->ir_base[op_insn->op1].op2 == insn->op2 2205 && ctx->use_lists[op_insn->op1].count == 2) { 2206 /* l = LOAD(_, a) ... v = SHIFT(l, _) ... STORE(l, a, v) => SKIP ... SKIP_SHIFT ... MEM_SHIFT */ 2207 ctx->rules[insn->op3] = IR_FUSED | IR_SHIFT; 2208 ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; 2209 return IR_MEM_SHIFT; 2210 } 2211 } else if (rule == IR_SHIFT_CONST) { 2212 if (insn->op1 == op_insn->op1 2213 && ctx->ir_base[op_insn->op1].op == load_op 2214 && ctx->ir_base[op_insn->op1].op2 == insn->op2 2215 && ctx->use_lists[op_insn->op1].count == 2) { 2216 /* l = LOAD(_, a) ... v = SHIFT(l, CONST) ... STORE(l, a, v) => SKIP ... SKIP ... MEM_SHIFT_CONST */ 2217 ctx->rules[insn->op3] = IR_SKIPPED | IR_SHIFT_CONST; 2218 ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; 2219 return IR_MEM_SHIFT_CONST; 2220 } 2221 } else if (rule == IR_OP_INT && op_insn->op != IR_BSWAP) { 2222 if (insn->op1 == op_insn->op1 2223 && ctx->ir_base[op_insn->op1].op == load_op 2224 && ctx->ir_base[op_insn->op1].op2 == insn->op2 2225 && ctx->use_lists[op_insn->op1].count == 2) { 2226 /* l = LOAD(_, a) ... v = OP(l) ... STORE(l, a, v) => SKIP ... SKIP ... MEM_OP */ 2227 ctx->rules[insn->op3] = IR_SKIPPED | IR_OP_INT; 2228 ctx->rules[op_insn->op1] = IR_SKIPPED | load_op; 2229 return IR_MEM_OP_INT; 2230 } 2231 } 2232 } 2233 return store_rule; 2234 } else { 2235 return IR_VSTORE_FP; 2236 } 2237 break; 2238 case IR_LOAD: 2239 ir_match_fuse_addr(ctx, insn->op2); 2240 if (IR_IS_TYPE_INT(insn->type)) { 2241 return IR_LOAD_INT; 2242 } else { 2243 return IR_LOAD_FP; 2244 } 2245 break; 2246 case IR_STORE: 2247 ir_match_fuse_addr(ctx, insn->op2); 2248 if (IR_IS_TYPE_INT(ctx->ir_base[insn->op3].type)) { 2249 store_rule = IR_STORE_INT; 2250 load_op = IR_LOAD; 2251 goto store_int; 2252 } else { 2253 return IR_STORE_FP; 2254 } 2255 break; 2256 case IR_RLOAD: 2257 if (IR_REGSET_IN(IR_REGSET_UNION((ir_regset)ctx->fixed_regset, IR_REGSET_FIXED), insn->op2)) { 2258 return IR_SKIPPED | IR_RLOAD; 2259 } 2260 return IR_RLOAD; 2261 case IR_RSTORE: 2262 if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { 2263 if ((ctx->flags & IR_OPT_CODEGEN) 2264 && ir_in_same_block(ctx, insn->op2) 2265 && ctx->use_lists[insn->op2].count == 1 2266 && IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { 2267 ir_insn *op_insn = &ctx->ir_base[insn->op2]; 2268 2269 if (op_insn->op == IR_ADD || 2270 op_insn->op == IR_SUB || 2271// op_insn->op == IR_MUL || 2272 op_insn->op == IR_OR || 2273 op_insn->op == IR_AND || 2274 op_insn->op == IR_XOR) { 2275 if (insn->op1 == op_insn->op1 2276 && ctx->ir_base[op_insn->op1].op == IR_RLOAD 2277 && ctx->ir_base[op_insn->op1].op2 == insn->op3 2278 && ctx->use_lists[op_insn->op1].count == 2) { 2279 /* l = RLOAD(r) ... v = BINOP(l, _) ... RSTORE(l, r, v) => SKIP ... SKIP_REG_BINOP ... REG_BINOP */ 2280 ctx->rules[insn->op2] = IR_FUSED | IR_BINOP_INT; 2281 ctx->rules[op_insn->op1] = IR_SKIPPED | IR_RLOAD; 2282 return IR_REG_BINOP_INT; 2283 } else if ((ir_op_flags[op_insn->op] & IR_OP_FLAG_COMMUTATIVE) 2284 && insn->op1 == op_insn->op2 2285 && ctx->ir_base[op_insn->op2].op == IR_RLOAD 2286 && ctx->ir_base[op_insn->op2].op2 == insn->op3 2287 && ctx->use_lists[op_insn->op2].count == 2) { 2288 /* l = RLOAD(r) ... v = BINOP(x, l) ... RSTORE(l, r, v) => SKIP ... SKIP_REG_BINOP ... REG_BINOP */ 2289 ir_swap_ops(op_insn); 2290 ctx->rules[insn->op2] = IR_FUSED | IR_BINOP_INT; 2291 ctx->rules[op_insn->op1] = IR_SKIPPED | IR_RLOAD; 2292 return IR_REG_BINOP_INT; 2293 } 2294 } 2295 } 2296 } 2297 ir_match_fuse_load(ctx, insn->op2, ref); 2298 return IR_RSTORE; 2299 case IR_START: 2300 case IR_BEGIN: 2301 case IR_IF_TRUE: 2302 case IR_IF_FALSE: 2303 case IR_CASE_VAL: 2304 case IR_CASE_DEFAULT: 2305 case IR_MERGE: 2306 case IR_LOOP_BEGIN: 2307 case IR_UNREACHABLE: 2308 return IR_SKIPPED | insn->op; 2309 case IR_RETURN: 2310 if (!insn->op2) { 2311 return IR_RETURN_VOID; 2312 } else if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { 2313 return IR_RETURN_INT; 2314 } else { 2315 return IR_RETURN_FP; 2316 } 2317 case IR_IF: 2318 if (ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) { 2319 op2_insn = &ctx->ir_base[insn->op2]; 2320 if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT) { 2321 if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) { 2322 if (IR_IS_CONST_REF(op2_insn->op2) 2323 && !IR_IS_SYM_CONST(ctx->ir_base[op2_insn->op2].op) 2324 && ctx->ir_base[op2_insn->op2].val.i64 == 0 2325 && op2_insn->op1 == insn->op2 - 1) { /* previous instruction */ 2326 ir_insn *op1_insn = &ctx->ir_base[op2_insn->op1]; 2327 2328 if (op1_insn->op == IR_AND && ctx->use_lists[op2_insn->op1].count == 1) { 2329 /* v = AND(_, _); c = CMP(v, 0) ... IF(c) => SKIP_TEST; SKIP ... TEST_AND_BRANCH */ 2330 ir_match_fuse_load_test_int(ctx, op1_insn, ref); 2331 ctx->rules[op2_insn->op1] = IR_FUSED | IR_TEST_INT; 2332 ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_NOP; 2333 return IR_TEST_AND_BRANCH_INT; 2334 } else if ((op1_insn->op == IR_OR || op1_insn->op == IR_AND || op1_insn->op == IR_XOR) || 2335 /* GT(ADD(_, _), 0) can't be optimized because ADD may overflow */ 2336 ((op1_insn->op == IR_ADD || op1_insn->op == IR_SUB) && 2337 (op2_insn->op == IR_EQ || op2_insn->op == IR_NE))) { 2338 /* v = BINOP(_, _); c = CMP(v, 0) ... IF(c) => BINOP; SKIP_CMP ... JCC */ 2339 if (ir_op_flags[op1_insn->op] & IR_OP_FLAG_COMMUTATIVE) { 2340 ir_match_fuse_load_commutative_int(ctx, op1_insn, ref); 2341 } else { 2342 ir_match_fuse_load(ctx, op1_insn->op2, ref); 2343 } 2344 ctx->rules[op2_insn->op1] = IR_BINOP_INT; 2345 ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT; 2346 return IR_JCC_INT; 2347 } 2348 } 2349 /* c = CMP(_, _) ... IF(c) => SKIP_CMP ... CMP_AND_BRANCH */ 2350 ir_match_fuse_load_cmp_int(ctx, op2_insn, ref); 2351 ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT; 2352 return IR_CMP_AND_BRANCH_INT; 2353 } else { 2354 /* c = CMP(_, _) ... IF(c) => SKIP_CMP ... CMP_AND_BRANCH */ 2355 ir_match_fuse_load_cmp_fp_br(ctx, op2_insn, ref, 1); 2356 ctx->rules[insn->op2] = IR_FUSED | IR_CMP_FP; 2357 return IR_CMP_AND_BRANCH_FP; 2358 } 2359 } else if (op2_insn->op == IR_AND) { 2360 /* c = AND(_, _) ... IF(c) => SKIP_TEST ... TEST_AND_BRANCH */ 2361 ir_match_fuse_load_test_int(ctx, op2_insn, ref); 2362 ctx->rules[insn->op2] = IR_FUSED | IR_TEST_INT; 2363 return IR_TEST_AND_BRANCH_INT; 2364 } else if (op2_insn->op == IR_OVERFLOW) { 2365 /* c = OVERFLOW(_) ... IF(c) => SKIP_OVERFLOW ... OVERFLOW_AND_BRANCH */ 2366 ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_OVERFLOW; 2367 return IR_OVERFLOW_AND_BRANCH; 2368 } 2369 } 2370 if (IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { 2371 if (insn->op2 == ref - 1 /* previous instruction */ 2372 && ir_in_same_block(ctx, insn->op2)) { 2373 op2_insn = &ctx->ir_base[insn->op2]; 2374 if (op2_insn->op == IR_ADD || 2375 op2_insn->op == IR_SUB || 2376// op2_insn->op == IR_MUL || 2377 op2_insn->op == IR_OR || 2378 op2_insn->op == IR_AND || 2379 op2_insn->op == IR_XOR) { 2380 2381 /* v = BINOP(_, _); IF(v) => BINOP; JCC */ 2382 if (ir_op_flags[op2_insn->op] & IR_OP_FLAG_COMMUTATIVE) { 2383 ir_match_fuse_load_commutative_int(ctx, op2_insn, ref); 2384 } else { 2385 ir_match_fuse_load(ctx, op2_insn->op2, ref); 2386 } 2387 ctx->rules[insn->op2] = IR_BINOP_INT; 2388 return IR_JCC_INT; 2389 } 2390 } else if ((ctx->flags & IR_OPT_CODEGEN) 2391 && insn->op1 == ref - 1 /* previous instruction */ 2392 && insn->op2 == ref - 2 /* previous instruction */ 2393 && ir_in_same_block(ctx, insn->op2) 2394 && ctx->use_lists[insn->op2].count == 2 2395 && IR_IS_TYPE_INT(ctx->ir_base[insn->op2].type)) { 2396 ir_insn *store_insn = &ctx->ir_base[insn->op1]; 2397 2398 if (store_insn->op == IR_STORE && store_insn->op3 == insn->op2) { 2399 ir_insn *op_insn = &ctx->ir_base[insn->op2]; 2400 2401 if (op_insn->op == IR_ADD || 2402 op_insn->op == IR_SUB || 2403// op_insn->op == IR_MUL || 2404 op_insn->op == IR_OR || 2405 op_insn->op == IR_AND || 2406 op_insn->op == IR_XOR) { 2407 if (ctx->ir_base[op_insn->op1].op == IR_LOAD 2408 && ctx->ir_base[op_insn->op1].op2 == store_insn->op2) { 2409 if (ir_in_same_block(ctx, op_insn->op1) 2410 && ctx->use_lists[op_insn->op1].count == 2 2411 && store_insn->op1 == op_insn->op1) { 2412 /* v = MEM_BINOP(_, _); IF(v) => MEM_BINOP; JCC */ 2413 ctx->rules[insn->op2] = IR_FUSED | IR_BINOP_INT; 2414 ctx->rules[op_insn->op1] = IR_SKIPPED | IR_LOAD; 2415 ir_match_fuse_addr(ctx, store_insn->op2); 2416 ctx->rules[insn->op1] = IR_MEM_BINOP_INT; 2417 return IR_JCC_INT; 2418 } 2419 } else if ((ir_op_flags[op_insn->op] & IR_OP_FLAG_COMMUTATIVE) 2420 && ctx->ir_base[op_insn->op2].op == IR_LOAD 2421 && ctx->ir_base[op_insn->op2].op2 == store_insn->op2) { 2422 if (ir_in_same_block(ctx, op_insn->op2) 2423 && ctx->use_lists[op_insn->op2].count == 2 2424 && store_insn->op1 == op_insn->op2) { 2425 /* v = MEM_BINOP(_, _); IF(v) => MEM_BINOP; JCC */ 2426 ir_swap_ops(op_insn); 2427 ctx->rules[insn->op2] = IR_FUSED | IR_BINOP_INT; 2428 ctx->rules[op_insn->op1] = IR_SKIPPED | IR_LOAD; 2429 ir_match_fuse_addr(ctx, store_insn->op2); 2430 ctx->rules[insn->op1] = IR_MEM_BINOP_INT; 2431 return IR_JCC_INT; 2432 } 2433 } 2434 } 2435 } 2436 } 2437 ir_match_fuse_load(ctx, insn->op2, ref); 2438 return IR_IF_INT; 2439 } else { 2440 IR_ASSERT(0 && "NIY IR_IF_FP"); 2441 break; 2442 } 2443 case IR_GUARD: 2444 case IR_GUARD_NOT: 2445 if (ir_in_same_block(ctx, insn->op2) && ctx->use_lists[insn->op2].count == 1) { 2446 op2_insn = &ctx->ir_base[insn->op2]; 2447 if (op2_insn->op >= IR_EQ && op2_insn->op <= IR_UGT 2448 // TODO: register allocator may clobber operands of CMP before they are used in the GUARD_CMP 2449 && (insn->op2 == ref - 1 || 2450 (insn->op2 == ctx->prev_ref[ref] - 1 2451 && ctx->ir_base[ctx->prev_ref[ref]].op == IR_SNAPSHOT))) { 2452 if (IR_IS_TYPE_INT(ctx->ir_base[op2_insn->op1].type)) { 2453 if (IR_IS_CONST_REF(op2_insn->op2) 2454 && !IR_IS_SYM_CONST(ctx->ir_base[op2_insn->op2].op) 2455 && ctx->ir_base[op2_insn->op2].val.i64 == 0) { 2456 if (op2_insn->op1 == insn->op2 - 1) { /* previous instruction */ 2457 ir_insn *op1_insn = &ctx->ir_base[op2_insn->op1]; 2458 2459 if ((op1_insn->op == IR_OR || op1_insn->op == IR_AND || op1_insn->op == IR_XOR) || 2460 /* GT(ADD(_, _), 0) can't be optimized because ADD may overflow */ 2461 ((op1_insn->op == IR_ADD || op1_insn->op == IR_SUB) && 2462 (op2_insn->op == IR_EQ || op2_insn->op == IR_NE))) { 2463 if (ir_op_flags[op1_insn->op] & IR_OP_FLAG_COMMUTATIVE) { 2464 ir_match_fuse_load_commutative_int(ctx, op1_insn, ref); 2465 } else { 2466 ir_match_fuse_load(ctx, op1_insn->op2, ref); 2467 } 2468 /* v = BINOP(_, _); c = CMP(v, 0) ... IF(c) => BINOP; SKIP_CMP ... GUARD_JCC */ 2469 ctx->rules[op2_insn->op1] = IR_BINOP_INT; 2470 ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT; 2471 return IR_GUARD_JCC_INT; 2472 } 2473 } else if ((ctx->flags & IR_OPT_CODEGEN) 2474 && op2_insn->op1 == insn->op2 - 2 /* before previous instruction */ 2475 && ir_in_same_block(ctx, op2_insn->op1) 2476 && ctx->use_lists[op2_insn->op1].count == 2) { 2477 ir_insn *store_insn = &ctx->ir_base[insn->op2 - 1]; 2478 2479 if (store_insn->op == IR_STORE && store_insn->op3 == op2_insn->op1) { 2480 ir_insn *op_insn = &ctx->ir_base[op2_insn->op1]; 2481 2482 if ((op_insn->op == IR_OR || op_insn->op == IR_AND || op_insn->op == IR_XOR) || 2483 /* GT(ADD(_, _), 0) can't be optimized because ADD may overflow */ 2484 ((op_insn->op == IR_ADD || op_insn->op == IR_SUB) && 2485 (op2_insn->op == IR_EQ || op2_insn->op == IR_NE))) { 2486 if (ctx->ir_base[op_insn->op1].op == IR_LOAD 2487 && ctx->ir_base[op_insn->op1].op2 == store_insn->op2) { 2488 if (ir_in_same_block(ctx, op_insn->op1) 2489 && ctx->use_lists[op_insn->op1].count == 2 2490 && store_insn->op1 == op_insn->op1) { 2491 /* v = MEM_BINOP(_, _); IF(v) => MEM_BINOP; GUARD_JCC */ 2492 ctx->rules[op2_insn->op1] = IR_FUSED | IR_BINOP_INT; 2493 ctx->rules[op_insn->op1] = IR_SKIPPED | IR_LOAD; 2494 ir_match_fuse_addr(ctx, store_insn->op2); 2495 ctx->rules[insn->op2 - 1] = IR_MEM_BINOP_INT; 2496 ctx->rules[insn->op2] = IR_SKIPPED | IR_NOP; 2497 return IR_GUARD_JCC_INT; 2498 } 2499 } else if ((ir_op_flags[op_insn->op] & IR_OP_FLAG_COMMUTATIVE) 2500 && ctx->ir_base[op_insn->op2].op == IR_LOAD 2501 && ctx->ir_base[op_insn->op2].op2 == store_insn->op2) { 2502 if (ir_in_same_block(ctx, op_insn->op2) 2503 && ctx->use_lists[op_insn->op2].count == 2 2504 && store_insn->op1 == op_insn->op2) { 2505 /* v = MEM_BINOP(_, _); IF(v) => MEM_BINOP; JCC */ 2506 ir_swap_ops(op_insn); 2507 ctx->rules[op2_insn->op1] = IR_FUSED | IR_BINOP_INT; 2508 ctx->rules[op_insn->op1] = IR_SKIPPED | IR_LOAD; 2509 ir_match_fuse_addr(ctx, store_insn->op2); 2510 ctx->rules[insn->op2 - 1] = IR_MEM_BINOP_INT; 2511 ctx->rules[insn->op2] = IR_SKIPPED | IR_NOP; 2512 return IR_GUARD_JCC_INT; 2513 } 2514 } 2515 } 2516 } 2517 } 2518 } 2519 /* c = CMP(_, _) ... GUARD(c) => SKIP_CMP ... GUARD_CMP */ 2520 ir_match_fuse_load_cmp_int(ctx, op2_insn, ref); 2521 ctx->rules[insn->op2] = IR_FUSED | IR_CMP_INT; 2522 return IR_GUARD_CMP_INT; 2523 } else { 2524 /* c = CMP(_, _) ... GUARD(c) => SKIP_CMP ... GUARD_CMP */ 2525 ir_match_fuse_load_cmp_fp_br(ctx, op2_insn, ref, insn->op == IR_GUARD_NOT); 2526 ctx->rules[insn->op2] = IR_FUSED | IR_CMP_FP; 2527 return IR_GUARD_CMP_FP; 2528 } 2529 } else if (op2_insn->op == IR_AND) { // TODO: OR, XOR. etc 2530 /* c = AND(_, _) ... GUARD(c) => SKIP_TEST ... GUARD_TEST */ 2531 ir_match_fuse_load_test_int(ctx, op2_insn, ref); 2532 ctx->rules[insn->op2] = IR_FUSED | IR_TEST_INT; 2533 return IR_GUARD_TEST_INT; 2534 } else if (op2_insn->op == IR_OVERFLOW) { 2535 /* c = OVERFLOW(_) ... GUARD(c) => SKIP_OVERFLOW ... GUARD_OVERFLOW */ 2536 ctx->rules[insn->op2] = IR_FUSED | IR_SIMPLE | IR_OVERFLOW; 2537 return IR_GUARD_OVERFLOW; 2538 } 2539 } 2540 ir_match_fuse_load(ctx, insn->op2, ref); 2541 return insn->op; 2542 case IR_INT2FP: 2543 if (ir_type_size[ctx->ir_base[insn->op1].type] > (IR_IS_TYPE_SIGNED(ctx->ir_base[insn->op1].type) ? 2 : 4)) { 2544 ir_match_fuse_load(ctx, insn->op1, ref); 2545 } 2546 return insn->op; 2547 case IR_SEXT: 2548 case IR_ZEXT: 2549 case IR_BITCAST: 2550 case IR_FP2INT: 2551 case IR_FP2FP: 2552 case IR_PROTO: 2553 ir_match_fuse_load(ctx, insn->op1, ref); 2554 return insn->op; 2555 case IR_CTLZ: 2556 case IR_CTTZ: 2557 ir_match_fuse_load(ctx, insn->op1, ref); 2558 return IR_BIT_COUNT; 2559 case IR_CTPOP: 2560 ir_match_fuse_load(ctx, insn->op1, ref); 2561 return (ctx->mflags & IR_X86_BMI1) ? IR_BIT_COUNT : IR_CTPOP; 2562 case IR_VA_START: 2563 ctx->flags2 |= IR_HAS_VA_START; 2564 if ((ctx->ir_base[insn->op2].op == IR_ALLOCA) || (ctx->ir_base[insn->op2].op == IR_VADDR)) { 2565 ir_use_list *use_list = &ctx->use_lists[insn->op2]; 2566 ir_ref *p, n = use_list->count; 2567 for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { 2568 ir_insn *use_insn = &ctx->ir_base[*p]; 2569 if (use_insn->op == IR_VA_START || use_insn->op == IR_VA_END) { 2570 } else if (use_insn->op == IR_VA_COPY) { 2571 if (use_insn->op3 == insn->op2) { 2572 ctx->flags2 |= IR_HAS_VA_COPY; 2573 } 2574 } else if (use_insn->op == IR_VA_ARG) { 2575 if (use_insn->op2 == insn->op2) { 2576 if (IR_IS_TYPE_INT(use_insn->type)) { 2577 ctx->flags2 |= IR_HAS_VA_ARG_GP; 2578 } else { 2579 IR_ASSERT(IR_IS_TYPE_FP(use_insn->type)); 2580 ctx->flags2 |= IR_HAS_VA_ARG_FP; 2581 } 2582 } 2583 } else if (*p > ref) { 2584 /* diriect va_list access */ 2585 ctx->flags2 |= IR_HAS_VA_ARG_GP|IR_HAS_VA_ARG_FP; 2586 } 2587 } 2588 } 2589 return IR_VA_START; 2590 case IR_VA_END: 2591 return IR_SKIPPED | IR_NOP; 2592 case IR_VADDR: 2593 if (ctx->use_lists[ref].count > 0) { 2594 ir_use_list *use_list = &ctx->use_lists[ref]; 2595 ir_ref *p, n = use_list->count; 2596 2597 for (p = &ctx->use_edges[use_list->refs]; n > 0; p++, n--) { 2598 if (ctx->ir_base[*p].op != IR_VA_END) { 2599 return IR_VADDR; 2600 } 2601 } 2602 } 2603 return IR_SKIPPED | IR_NOP; 2604 default: 2605 break; 2606 } 2607 2608 return insn->op; 2609} 2610 2611static void ir_match_insn2(ir_ctx *ctx, ir_ref ref, uint32_t rule) 2612{ 2613 if (rule == IR_LEA_IB) { 2614 ir_match_try_revert_lea_to_add(ctx, ref); 2615 } 2616} 2617 2618/* code generation */ 2619static int32_t ir_ref_spill_slot_offset(ir_ctx *ctx, ir_ref ref, ir_reg *reg) 2620{ 2621 int32_t offset; 2622 2623 IR_ASSERT(ref >= 0 && ctx->vregs[ref] && ctx->live_intervals[ctx->vregs[ref]]); 2624 offset = ctx->live_intervals[ctx->vregs[ref]]->stack_spill_pos; 2625 IR_ASSERT(offset != -1); 2626 if (ctx->live_intervals[ctx->vregs[ref]]->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) { 2627 IR_ASSERT(ctx->spill_base != IR_REG_NONE); 2628 *reg = ctx->spill_base; 2629 return offset; 2630 } 2631 *reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 2632 return IR_SPILL_POS_TO_OFFSET(offset); 2633} 2634 2635static ir_mem ir_vreg_spill_slot(ir_ctx *ctx, ir_ref v) 2636{ 2637 int32_t offset; 2638 ir_reg base; 2639 2640 IR_ASSERT(v > 0 && v <= ctx->vregs_count && ctx->live_intervals[v]); 2641 offset = ctx->live_intervals[v]->stack_spill_pos; 2642 IR_ASSERT(offset != -1); 2643 if (ctx->live_intervals[v]->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL) { 2644 IR_ASSERT(ctx->spill_base != IR_REG_NONE); 2645 return IR_MEM_BO(ctx->spill_base, offset); 2646 } 2647 base = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 2648 offset = IR_SPILL_POS_TO_OFFSET(offset); 2649 return IR_MEM_BO(base, offset); 2650} 2651 2652static ir_mem ir_ref_spill_slot(ir_ctx *ctx, ir_ref ref) 2653{ 2654 IR_ASSERT(!IR_IS_CONST_REF(ref)); 2655 return ir_vreg_spill_slot(ctx, ctx->vregs[ref]); 2656} 2657 2658static bool ir_is_same_spill_slot(ir_ctx *ctx, ir_ref ref, ir_mem mem) 2659{ 2660 ir_mem m = ir_ref_spill_slot(ctx, ref); 2661 return IR_MEM_VAL(m) == IR_MEM_VAL(mem); 2662} 2663 2664static ir_mem ir_var_spill_slot(ir_ctx *ctx, ir_ref ref) 2665{ 2666 ir_insn *var_insn = &ctx->ir_base[ref]; 2667 ir_reg reg; 2668 2669 IR_ASSERT(var_insn->op == IR_VAR); 2670 reg = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 2671 return IR_MEM_BO(reg, IR_SPILL_POS_TO_OFFSET(var_insn->op3)); 2672} 2673 2674static bool ir_may_avoid_spill_load(ir_ctx *ctx, ir_ref ref, ir_ref use) 2675{ 2676 ir_live_interval *ival; 2677 2678 IR_ASSERT(ctx->vregs[ref] && ctx->live_intervals[ctx->vregs[ref]]); 2679 ival = ctx->live_intervals[ctx->vregs[ref]]; 2680 while (ival) { 2681 ir_use_pos *use_pos = ival->use_pos; 2682 while (use_pos) { 2683 if (IR_LIVE_POS_TO_REF(use_pos->pos) == use) { 2684 return !use_pos->next || use_pos->next->op_num == 0; 2685 } 2686 use_pos = use_pos->next; 2687 } 2688 ival = ival->next; 2689 } 2690 return 0; 2691} 2692 2693static void ir_emit_load_imm_int(ir_ctx *ctx, ir_type type, ir_reg reg, int64_t val) 2694{ 2695 ir_backend_data *data = ctx->data; 2696 dasm_State **Dst = &data->dasm_state; 2697 2698 IR_ASSERT(IR_IS_TYPE_INT(type)); 2699 if (val == 0) { 2700 | ASM_REG_REG_OP xor, type, reg, reg 2701 } else if (ir_type_size[type] == 8) { 2702 IR_ASSERT(sizeof(void*) == 8); 2703|.if X64 2704 if (IR_IS_UNSIGNED_32BIT(val)) { 2705 | mov Rd(reg), (uint32_t)val // zero extended load 2706 } else if (IR_IS_SIGNED_32BIT(val)) { 2707 | mov Rq(reg), (int32_t)val // sign extended load 2708// } else if (type == IR_ADDR && IR_MAY_USE_32BIT_ADDR(ctx->code_buffer, (intptr_t)val)) { 2709// | lea Ra(reg), [&val] 2710 } else { 2711 | mov64 Ra(reg), val 2712 } 2713|.endif 2714 } else { 2715 | ASM_REG_IMM_OP mov, type, reg, (int32_t)val // sign extended load 2716 } 2717} 2718 2719static void ir_emit_load_mem_int(ir_ctx *ctx, ir_type type, ir_reg reg, ir_mem mem) 2720{ 2721 ir_backend_data *data = ctx->data; 2722 dasm_State **Dst = &data->dasm_state; 2723 2724 | ASM_REG_MEM_OP mov, type, reg, mem 2725} 2726 2727static void ir_emit_load_imm_fp(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src) 2728{ 2729 ir_backend_data *data = ctx->data; 2730 dasm_State **Dst = &data->dasm_state; 2731 ir_insn *insn = &ctx->ir_base[src]; 2732 int label; 2733 2734 if (type == IR_FLOAT && insn->val.u32 == 0) { 2735 if (ctx->mflags & IR_X86_AVX) { 2736 | vxorps xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST) 2737 } else { 2738 | xorps xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST) 2739 } 2740 } else if (type == IR_DOUBLE && insn->val.u64 == 0) { 2741 if (ctx->mflags & IR_X86_AVX) { 2742 | vxorpd xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST) 2743 } else { 2744 | xorpd xmm(reg-IR_REG_FP_FIRST), xmm(reg-IR_REG_FP_FIRST) 2745 } 2746 } else { 2747 label = ir_const_label(ctx, src); 2748 | ASM_FP_REG_TXT_OP movs, type, reg, [=>label] 2749 } 2750} 2751 2752static void ir_emit_load_mem_fp(ir_ctx *ctx, ir_type type, ir_reg reg, ir_mem mem) 2753{ 2754 ir_backend_data *data = ctx->data; 2755 dasm_State **Dst = &data->dasm_state; 2756 2757 | ASM_FP_REG_MEM_OP movs, type, reg, mem 2758} 2759 2760static void ir_emit_load_mem(ir_ctx *ctx, ir_type type, ir_reg reg, ir_mem mem) 2761{ 2762 if (IR_IS_TYPE_INT(type)) { 2763 ir_emit_load_mem_int(ctx, type, reg, mem); 2764 } else { 2765 ir_emit_load_mem_fp(ctx, type, reg, mem); 2766 } 2767} 2768 2769static void ir_emit_load(ir_ctx *ctx, ir_type type, ir_reg reg, ir_ref src) 2770{ 2771 if (IR_IS_CONST_REF(src)) { 2772 if (IR_IS_TYPE_INT(type)) { 2773 ir_insn *insn = &ctx->ir_base[src]; 2774 2775 if (insn->op == IR_SYM || insn->op == IR_FUNC) { 2776 void *addr = ir_sym_val(ctx, insn); 2777 ir_emit_load_imm_int(ctx, type, reg, (intptr_t)addr); 2778 } else if (insn->op == IR_STR) { 2779 ir_backend_data *data = ctx->data; 2780 dasm_State **Dst = &data->dasm_state; 2781 int label = ir_const_label(ctx, src); 2782 2783 | lea Ra(reg), aword [=>label] 2784 } else { 2785 ir_emit_load_imm_int(ctx, type, reg, insn->val.i64); 2786 } 2787 } else { 2788 ir_emit_load_imm_fp(ctx, type, reg, src); 2789 } 2790 } else { 2791 ir_emit_load_mem(ctx, type, reg, ir_ref_spill_slot(ctx, src)); 2792 } 2793} 2794 2795static void ir_emit_store_mem_int(ir_ctx *ctx, ir_type type, ir_mem mem, ir_reg reg) 2796{ 2797 ir_backend_data *data = ctx->data; 2798 dasm_State **Dst = &data->dasm_state; 2799 2800 | ASM_MEM_REG_OP mov, type, mem, reg 2801} 2802 2803static void ir_emit_store_mem_fp(ir_ctx *ctx, ir_type type, ir_mem mem, ir_reg reg) 2804{ 2805 ir_backend_data *data = ctx->data; 2806 dasm_State **Dst = &data->dasm_state; 2807 2808 | ASM_FP_MEM_REG_OP movs, type, mem, reg 2809} 2810 2811static void ir_emit_store_mem_imm(ir_ctx *ctx, ir_type type, ir_mem mem, int32_t imm) 2812{ 2813 ir_backend_data *data = ctx->data; 2814 dasm_State **Dst = &data->dasm_state; 2815 2816 | ASM_MEM_IMM_OP mov, type, mem, imm 2817} 2818 2819static void ir_emit_store_mem_int_const(ir_ctx *ctx, ir_type type, ir_mem mem, ir_ref src, ir_reg tmp_reg, bool is_arg) 2820{ 2821 ir_backend_data *data = ctx->data; 2822 dasm_State **Dst = &data->dasm_state; 2823 ir_insn *val_insn = &ctx->ir_base[src]; 2824 2825 IR_ASSERT(IR_IS_CONST_REF(src)); 2826 if (val_insn->op == IR_STR) { 2827 int label = ir_const_label(ctx, src); 2828 2829 IR_ASSERT(tmp_reg != IR_REG_NONE); 2830|.if X64 2831 | lea Ra(tmp_reg), aword [=>label] 2832|| ir_emit_store_mem_int(ctx, type, mem, tmp_reg); 2833|.else 2834 | ASM_TMEM_TXT_OP mov, aword, mem, =>label 2835|.endif 2836 } else { 2837 int64_t val = val_insn->val.i64; 2838 2839 if (val_insn->op == IR_FUNC || val_insn->op == IR_SYM) { 2840 val = (int64_t)(intptr_t)ir_sym_val(ctx, val_insn); 2841 } 2842 2843 if (sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(val)) { 2844 if (is_arg && ir_type_size[type] < 4) { 2845 type = IR_U32; 2846 } 2847 ir_emit_store_mem_imm(ctx, type, mem, val); 2848 } else { 2849 IR_ASSERT(tmp_reg != IR_REG_NONE); 2850 ir_emit_load_imm_int(ctx, type, tmp_reg, val); 2851 ir_emit_store_mem_int(ctx, type, mem, tmp_reg); 2852 } 2853 } 2854} 2855 2856static void ir_emit_store_mem_fp_const(ir_ctx *ctx, ir_type type, ir_mem mem, ir_ref src, ir_reg tmp_reg, ir_reg tmp_fp_reg) 2857{ 2858 ir_val *val = &ctx->ir_base[src].val; 2859 2860 if (type == IR_FLOAT) { 2861 ir_emit_store_mem_imm(ctx, IR_U32, mem, val->i32); 2862 } else if (sizeof(void*) == 8 && val->i64 == 0) { 2863 ir_emit_store_mem_imm(ctx, IR_U64, mem, 0); 2864 } else if (sizeof(void*) == 8 && tmp_reg != IR_REG_NONE) { 2865 ir_emit_load_imm_int(ctx, IR_U64, tmp_reg, val->i64); 2866 ir_emit_store_mem_int(ctx, IR_U64, mem, tmp_reg); 2867 } else { 2868 ir_emit_load(ctx, type, tmp_fp_reg, src); 2869 ir_emit_store_mem_fp(ctx, IR_DOUBLE, mem, tmp_fp_reg); 2870 } 2871} 2872 2873static void ir_emit_store_mem(ir_ctx *ctx, ir_type type, ir_mem mem, ir_reg reg) 2874{ 2875 if (IR_IS_TYPE_INT(type)) { 2876 ir_emit_store_mem_int(ctx, type, mem, reg); 2877 } else { 2878 ir_emit_store_mem_fp(ctx, type, mem, reg); 2879 } 2880} 2881 2882static void ir_emit_store(ir_ctx *ctx, ir_type type, ir_ref dst, ir_reg reg) 2883{ 2884 IR_ASSERT(dst >= 0); 2885 ir_emit_store_mem(ctx, type, ir_ref_spill_slot(ctx, dst), reg); 2886} 2887 2888static void ir_emit_mov(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src) 2889{ 2890 ir_backend_data *data = ctx->data; 2891 dasm_State **Dst = &data->dasm_state; 2892 2893 | ASM_REG_REG_OP mov, type, dst, src 2894} 2895 2896#define IR_HAVE_SWAP_INT 2897 2898static void ir_emit_swap(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src) 2899{ 2900 ir_backend_data *data = ctx->data; 2901 dasm_State **Dst = &data->dasm_state; 2902 2903 | ASM_REG_REG_OP xchg, type, dst, src 2904} 2905 2906static void ir_emit_mov_ext(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src) 2907{ 2908 ir_backend_data *data = ctx->data; 2909 dasm_State **Dst = &data->dasm_state; 2910 2911 if (ir_type_size[type] > 2) { 2912 | ASM_REG_REG_OP mov, type, dst, src 2913 } else if (ir_type_size[type] == 2) { 2914 if (IR_IS_TYPE_SIGNED(type)) { 2915 | movsx Rd(dst), Rw(src) 2916 } else { 2917 | movzx Rd(dst), Rw(src) 2918 } 2919 } else /* if (ir_type_size[type] == 1) */ { 2920 if (IR_IS_TYPE_SIGNED(type)) { 2921 | movsx Rd(dst), Rb(src) 2922 } else { 2923 | movzx Rd(dst), Rb(src) 2924 } 2925 } 2926} 2927 2928static void ir_emit_fp_mov(ir_ctx *ctx, ir_type type, ir_reg dst, ir_reg src) 2929{ 2930 ir_backend_data *data = ctx->data; 2931 dasm_State **Dst = &data->dasm_state; 2932 2933 | ASM_FP_REG_REG_OP movap, type, dst, src 2934} 2935 2936static ir_mem ir_fuse_addr_const(ir_ctx *ctx, ir_ref ref) 2937{ 2938 ir_mem mem; 2939 ir_insn *addr_insn = &ctx->ir_base[ref]; 2940 2941 IR_ASSERT(IR_IS_CONST_REF(ref)); 2942 if (IR_IS_SYM_CONST(addr_insn->op)) { 2943 void *addr = ir_sym_val(ctx, addr_insn); 2944 IR_ASSERT(sizeof(void*) == 4 || IR_IS_SIGNED_32BIT((intptr_t)addr)); 2945 mem = IR_MEM_O((int32_t)(intptr_t)addr); 2946 } else { 2947 IR_ASSERT(sizeof(void*) == 4 || IR_IS_SIGNED_32BIT(addr_insn->val.i64)); 2948 mem = IR_MEM_O(addr_insn->val.i32); 2949 } 2950 return mem; 2951} 2952 2953static ir_mem ir_fuse_addr(ir_ctx *ctx, ir_ref root, ir_ref ref) 2954{ 2955 uint32_t rule = ctx->rules[ref]; 2956 ir_insn *insn = &ctx->ir_base[ref]; 2957 ir_insn *op1_insn, *op2_insn, *offset_insn; 2958 ir_ref base_reg_ref, index_reg_ref; 2959 ir_reg base_reg, index_reg; 2960 int32_t offset, scale; 2961 2962 IR_ASSERT((rule & IR_RULE_MASK) >= IR_LEA_OB && (rule & IR_RULE_MASK) <= IR_LEA_SI_B); 2963 switch (rule & IR_RULE_MASK) { 2964 default: 2965 IR_ASSERT(0); 2966 case IR_LEA_OB: 2967 offset_insn = insn; 2968 base_reg_ref = ref * sizeof(ir_ref) + 1; 2969 index_reg_ref = IR_UNUSED; 2970 scale = 1; 2971 break; 2972 case IR_LEA_SI: 2973 scale = ctx->ir_base[insn->op2].val.i32; 2974 index_reg_ref = ref * sizeof(ir_ref) + 1; 2975 base_reg_ref = IR_UNUSED; 2976 offset_insn = NULL; 2977 break; 2978 case IR_LEA_SIB: 2979 base_reg_ref = index_reg_ref = ref * sizeof(ir_ref) + 1; 2980 scale = ctx->ir_base[insn->op2].val.i32 - 1; 2981 offset_insn = NULL; 2982 break; 2983 case IR_LEA_IB: 2984 base_reg_ref = ref * sizeof(ir_ref) + 1; 2985 index_reg_ref = ref * sizeof(ir_ref) + 2; 2986 offset_insn = NULL; 2987 scale = 1; 2988 break; 2989 case IR_LEA_OB_I: 2990 base_reg_ref = insn->op1 * sizeof(ir_ref) + 1; 2991 index_reg_ref = ref * sizeof(ir_ref) + 2; 2992 op1_insn = &ctx->ir_base[insn->op1]; 2993 offset_insn = op1_insn; 2994 scale = 1; 2995 break; 2996 case IR_LEA_I_OB: 2997 base_reg_ref = ref * sizeof(ir_ref) + 1; 2998 index_reg_ref = insn->op2 * sizeof(ir_ref) + 1; 2999 op2_insn = &ctx->ir_base[insn->op2]; 3000 offset_insn = op2_insn; 3001 scale = 1; 3002 break; 3003 case IR_LEA_SI_O: 3004 index_reg_ref = insn->op1 * sizeof(ir_ref) + 1; 3005 op1_insn = &ctx->ir_base[insn->op1]; 3006 scale = ctx->ir_base[op1_insn->op2].val.i32; 3007 offset_insn = insn; 3008 base_reg_ref = IR_UNUSED; 3009 break; 3010 case IR_LEA_SIB_O: 3011 base_reg_ref = index_reg_ref = insn->op1 * sizeof(ir_ref) + 1; 3012 op1_insn = &ctx->ir_base[insn->op1]; 3013 scale = ctx->ir_base[op1_insn->op2].val.i32 - 1; 3014 offset_insn = insn; 3015 break; 3016 case IR_LEA_IB_O: 3017 base_reg_ref = insn->op1 * sizeof(ir_ref) + 1; 3018 index_reg_ref = insn->op1 * sizeof(ir_ref) + 2; 3019 offset_insn = insn; 3020 scale = 1; 3021 break; 3022 case IR_LEA_OB_SI: 3023 base_reg_ref = insn->op1 * sizeof(ir_ref) + 1; 3024 index_reg_ref = insn->op2 * sizeof(ir_ref) + 1; 3025 op1_insn = &ctx->ir_base[insn->op1]; 3026 offset_insn = op1_insn; 3027 op2_insn = &ctx->ir_base[insn->op2]; 3028 scale = ctx->ir_base[op2_insn->op2].val.i32; 3029 break; 3030 case IR_LEA_SI_OB: 3031 index_reg_ref = insn->op1 * sizeof(ir_ref) + 1; 3032 base_reg_ref = insn->op2 * sizeof(ir_ref) + 1; 3033 op1_insn = &ctx->ir_base[insn->op1]; 3034 scale = ctx->ir_base[op1_insn->op2].val.i32; 3035 op2_insn = &ctx->ir_base[insn->op2]; 3036 offset_insn = op2_insn; 3037 break; 3038 case IR_LEA_B_SI: 3039 base_reg_ref = ref * sizeof(ir_ref) + 1; 3040 index_reg_ref = insn->op2 * sizeof(ir_ref) + 1; 3041 op2_insn = &ctx->ir_base[insn->op2]; 3042 scale = ctx->ir_base[op2_insn->op2].val.i32; 3043 offset_insn = NULL; 3044 break; 3045 case IR_LEA_SI_B: 3046 index_reg_ref = insn->op1 * sizeof(ir_ref) + 1; 3047 base_reg_ref = ref * sizeof(ir_ref) + 2; 3048 op1_insn = &ctx->ir_base[insn->op1]; 3049 scale = ctx->ir_base[op1_insn->op2].val.i32; 3050 offset_insn = NULL; 3051 break; 3052 } 3053 3054 offset = 0; 3055 if (offset_insn) { 3056 ir_insn *addr_insn = &ctx->ir_base[offset_insn->op2]; 3057 3058 if (IR_IS_SYM_CONST(addr_insn->op)) { 3059 void *addr = ir_sym_val(ctx, addr_insn); 3060 IR_ASSERT(sizeof(void*) != 8 || IR_IS_SIGNED_32BIT((intptr_t)addr)); 3061 offset = (int64_t)(intptr_t)(addr); 3062 } else { 3063 offset = addr_insn->val.i32; 3064 if (offset_insn->op == IR_SUB) { 3065 offset = -offset; 3066 } 3067 } 3068 } 3069 3070 base_reg = IR_REG_NONE; 3071 if (base_reg_ref) { 3072 if (UNEXPECTED(ctx->rules[base_reg_ref / sizeof(ir_ref)] & IR_FUSED_REG)) { 3073 base_reg = ir_get_fused_reg(ctx, root, base_reg_ref); 3074 } else { 3075 base_reg = ((int8_t*)ctx->regs)[base_reg_ref]; 3076 } 3077 IR_ASSERT(base_reg != IR_REG_NONE); 3078 if (IR_REG_SPILLED(base_reg)) { 3079 base_reg = IR_REG_NUM(base_reg); 3080 ir_emit_load(ctx, insn->type, base_reg, ((ir_ref*)ctx->ir_base)[base_reg_ref]); 3081 } 3082 } 3083 3084 index_reg = IR_REG_NONE; 3085 if (index_reg_ref) { 3086 if (base_reg_ref 3087 && ((ir_ref*)ctx->ir_base)[index_reg_ref] 3088 == ((ir_ref*)ctx->ir_base)[base_reg_ref]) { 3089 index_reg = base_reg; 3090 } else { 3091 if (UNEXPECTED(ctx->rules[index_reg_ref / sizeof(ir_ref)] & IR_FUSED_REG)) { 3092 index_reg = ir_get_fused_reg(ctx, root, index_reg_ref); 3093 } else { 3094 index_reg = ((int8_t*)ctx->regs)[index_reg_ref]; 3095 } 3096 IR_ASSERT(index_reg != IR_REG_NONE); 3097 if (IR_REG_SPILLED(index_reg)) { 3098 index_reg = IR_REG_NUM(index_reg); 3099 ir_emit_load(ctx, insn->type, index_reg, ((ir_ref*)ctx->ir_base)[index_reg_ref]); 3100 } 3101 } 3102 } 3103 3104 return IR_MEM(base_reg, offset, index_reg, scale); 3105} 3106 3107static ir_mem ir_fuse_mem(ir_ctx *ctx, ir_ref root, ir_ref ref, ir_insn *mem_insn, ir_reg reg) 3108{ 3109 if (reg != IR_REG_NONE) { 3110 if (IR_REG_SPILLED(reg) || IR_IS_CONST_REF(mem_insn->op2)) { 3111 reg = IR_REG_NUM(reg); 3112 ir_emit_load(ctx, IR_ADDR, reg, mem_insn->op2); 3113 } 3114 return IR_MEM_B(reg); 3115 } else if (IR_IS_CONST_REF(mem_insn->op2)) { 3116 return ir_fuse_addr_const(ctx, mem_insn->op2); 3117 } else { 3118 return ir_fuse_addr(ctx, root, mem_insn->op2); 3119 } 3120} 3121 3122static ir_mem ir_fuse_load(ir_ctx *ctx, ir_ref root, ir_ref ref) 3123{ 3124 ir_insn *load_insn = &ctx->ir_base[ref]; 3125 ir_reg reg; 3126 3127 IR_ASSERT(load_insn->op == IR_LOAD); 3128 if (UNEXPECTED(ctx->rules[ref] & IR_FUSED_REG)) { 3129 reg = ir_get_fused_reg(ctx, root, ref * sizeof(ir_ref) + 2); 3130 } else { 3131 reg = ctx->regs[ref][2]; 3132 } 3133 return ir_fuse_mem(ctx, root, ref, load_insn, reg); 3134} 3135 3136static int32_t ir_fuse_imm(ir_ctx *ctx, ir_ref ref) 3137{ 3138 ir_insn *val_insn = &ctx->ir_base[ref]; 3139 3140 IR_ASSERT(IR_IS_CONST_REF(ref)); 3141 if (IR_IS_SYM_CONST(val_insn->op)) { 3142 void *addr = ir_sym_val(ctx, val_insn); 3143 IR_ASSERT(IR_IS_SIGNED_32BIT((intptr_t)addr)); 3144 return (int32_t)(intptr_t)addr; 3145 } else { 3146 IR_ASSERT(IR_IS_SIGNED_32BIT(val_insn->val.i32)); 3147 return val_insn->val.i32; 3148 } 3149} 3150 3151static void ir_emit_prologue(ir_ctx *ctx) 3152{ 3153 ir_backend_data *data = ctx->data; 3154 dasm_State **Dst = &data->dasm_state; 3155 3156 if (ctx->flags & IR_USE_FRAME_POINTER) { 3157 | push Ra(IR_REG_RBP) 3158 | mov Ra(IR_REG_RBP), Ra(IR_REG_RSP) 3159 } 3160 if (ctx->stack_frame_size + ctx->call_stack_size) { 3161 if (ctx->fixed_stack_red_zone) { 3162 IR_ASSERT(ctx->stack_frame_size + ctx->call_stack_size <= ctx->fixed_stack_red_zone); 3163 } else { 3164 | sub Ra(IR_REG_RSP), (ctx->stack_frame_size + ctx->call_stack_size) 3165 } 3166 } 3167 if (ctx->used_preserved_regs) { 3168 ir_reg fp; 3169 int offset; 3170 uint32_t i; 3171 ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs; 3172 3173 if (ctx->flags & IR_USE_FRAME_POINTER) { 3174 fp = IR_REG_FRAME_POINTER; 3175 offset = 0; 3176 } else { 3177 fp = IR_REG_STACK_POINTER; 3178 offset = ctx->stack_frame_size + ctx->call_stack_size; 3179 } 3180 for (i = 0; i < IR_REG_NUM; i++) { 3181 if (IR_REGSET_IN(used_preserved_regs, i)) { 3182 if (i < IR_REG_FP_FIRST) { 3183 offset -= sizeof(void*); 3184 | mov aword [Ra(fp)+offset], Ra(i) 3185 } else { 3186 offset -= sizeof(void*); 3187 if (ctx->mflags & IR_X86_AVX) { 3188 | vmovsd qword [Ra(fp)+offset], xmm(i-IR_REG_FP_FIRST) 3189 } else { 3190 | movsd qword [Ra(fp)+offset], xmm(i-IR_REG_FP_FIRST) 3191 } 3192 } 3193 } 3194 } 3195 } 3196 if ((ctx->flags & IR_VARARG_FUNC) && (ctx->flags2 & IR_HAS_VA_START)) { 3197#if defined(_WIN64) 3198 ir_reg fp; 3199 int offset; 3200 3201 if (ctx->flags & IR_USE_FRAME_POINTER) { 3202 fp = IR_REG_FRAME_POINTER; 3203 offset = sizeof(void*) * 2; 3204 } else { 3205 fp = IR_REG_STACK_POINTER; 3206 offset = ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*); 3207 } 3208 | mov [Ra(fp)+offset], Ra(IR_REG_INT_ARG1) 3209 | mov [Ra(fp)+offset+8], Ra(IR_REG_INT_ARG2) 3210 | mov [Ra(fp)+offset+16], Ra(IR_REG_INT_ARG3) 3211 | mov [Ra(fp)+offset+24], Ra(IR_REG_INT_ARG4) 3212#elif defined(IR_TARGET_X64) 3213|.if X64 3214 const int8_t *int_reg_params = _ir_int_reg_params; 3215 const int8_t *fp_reg_params = _ir_fp_reg_params; 3216 uint32_t i; 3217 ir_reg fp; 3218 int offset; 3219 3220 if (ctx->flags & IR_USE_FRAME_POINTER) { 3221 fp = IR_REG_FRAME_POINTER; 3222 3223 offset = -(ctx->stack_frame_size - ctx->stack_frame_alignment - ctx->locals_area_size); 3224 } else { 3225 fp = IR_REG_STACK_POINTER; 3226 offset = ctx->locals_area_size + ctx->call_stack_size; 3227 } 3228 3229 if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) { 3230 /* skip named args */ 3231 offset += sizeof(void*) * ctx->gp_reg_params; 3232 for (i = ctx->gp_reg_params; i < IR_REG_INT_ARGS; i++) { 3233 | mov qword [Ra(fp)+offset], Rq(int_reg_params[i]) 3234 offset += sizeof(void*); 3235 } 3236 } 3237 if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) { 3238 | test al, al 3239 | je >1 3240 /* skip named args */ 3241 offset += 16 * ctx->fp_reg_params; 3242 for (i = ctx->fp_reg_params; i < IR_REG_FP_ARGS; i++) { 3243 | movaps [Ra(fp)+offset], xmm(fp_reg_params[i]-IR_REG_FP_FIRST) 3244 offset += 16; 3245 } 3246 |1: 3247 } 3248|.endif 3249#endif 3250 } 3251} 3252 3253static void ir_emit_epilogue(ir_ctx *ctx) 3254{ 3255 ir_backend_data *data = ctx->data; 3256 dasm_State **Dst = &data->dasm_state; 3257 3258 if (ctx->used_preserved_regs) { 3259 int offset; 3260 uint32_t i; 3261 ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs; 3262 3263 if (ctx->flags & IR_USE_FRAME_POINTER) { 3264 offset = 0; 3265 } else { 3266 offset = ctx->stack_frame_size + ctx->call_stack_size; 3267 } 3268 for (i = 0; i < IR_REG_NUM; i++) { 3269 if (IR_REGSET_IN(used_preserved_regs, i)) { 3270 if (i < IR_REG_FP_FIRST) { 3271 ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 3272 3273 offset -= sizeof(void*); 3274 | mov Ra(i), aword [Ra(fp)+offset] 3275 } else { 3276 ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 3277 3278 offset -= sizeof(void*); 3279 if (ctx->mflags & IR_X86_AVX) { 3280 | vmovsd xmm(i-IR_REG_FP_FIRST), qword [Ra(fp)+offset] 3281 } else { 3282 | movsd xmm(i-IR_REG_FP_FIRST), qword [Ra(fp)+offset] 3283 } 3284 } 3285 } 3286 } 3287 } 3288 3289 if (ctx->flags & IR_USE_FRAME_POINTER) { 3290 | mov Ra(IR_REG_RSP), Ra(IR_REG_RBP) 3291 | pop Ra(IR_REG_RBP) 3292 } else if (ctx->stack_frame_size + ctx->call_stack_size) { 3293 if (ctx->fixed_stack_red_zone) { 3294 IR_ASSERT(ctx->stack_frame_size + ctx->call_stack_size <= ctx->fixed_stack_red_zone); 3295 } else { 3296 | add Ra(IR_REG_RSP), (ctx->stack_frame_size + ctx->call_stack_size) 3297 } 3298 } 3299} 3300 3301static void ir_emit_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3302{ 3303 ir_backend_data *data = ctx->data; 3304 dasm_State **Dst = &data->dasm_state; 3305 ir_type type = insn->type; 3306 ir_ref op1 = insn->op1; 3307 ir_ref op2 = insn->op2; 3308 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3309 ir_reg op1_reg = ctx->regs[def][1]; 3310 ir_reg op2_reg = ctx->regs[def][2]; 3311 3312 IR_ASSERT(def_reg != IR_REG_NONE); 3313 3314 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 3315 op1_reg = IR_REG_NUM(op1_reg); 3316 ir_emit_load(ctx, type, op1_reg, op1); 3317 } 3318 if (def_reg != op1_reg) { 3319 if (op1_reg != IR_REG_NONE) { 3320 ir_emit_mov(ctx, type, def_reg, op1_reg); 3321 } else { 3322 ir_emit_load(ctx, type, def_reg, op1); 3323 } 3324 if (op1 == op2) { 3325 op2_reg = def_reg; 3326 } 3327 } 3328 3329 if (op2_reg != IR_REG_NONE) { 3330 if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { 3331 op2_reg = IR_REG_NUM(op2_reg); 3332 if (op1 != op2) { 3333 ir_emit_load(ctx, type, op2_reg, op2); 3334 } 3335 } 3336 switch (insn->op) { 3337 default: 3338 IR_ASSERT(0 && "NIY binary op"); 3339 case IR_ADD: 3340 case IR_ADD_OV: 3341 | ASM_REG_REG_OP add, type, def_reg, op2_reg 3342 break; 3343 case IR_SUB: 3344 case IR_SUB_OV: 3345 | ASM_REG_REG_OP sub, type, def_reg, op2_reg 3346 break; 3347 case IR_MUL: 3348 case IR_MUL_OV: 3349 | ASM_REG_REG_MUL imul, type, def_reg, op2_reg 3350 break; 3351 case IR_OR: 3352 | ASM_REG_REG_OP or, type, def_reg, op2_reg 3353 break; 3354 case IR_AND: 3355 | ASM_REG_REG_OP and, type, def_reg, op2_reg 3356 break; 3357 case IR_XOR: 3358 | ASM_REG_REG_OP xor, type, def_reg, op2_reg 3359 break; 3360 } 3361 } else if (IR_IS_CONST_REF(op2)) { 3362 int32_t val = ir_fuse_imm(ctx, op2); 3363 3364 switch (insn->op) { 3365 default: 3366 IR_ASSERT(0 && "NIY binary op"); 3367 case IR_ADD: 3368 case IR_ADD_OV: 3369 | ASM_REG_IMM_OP add, type, def_reg, val 3370 break; 3371 case IR_SUB: 3372 case IR_SUB_OV: 3373 | ASM_REG_IMM_OP sub, type, def_reg, val 3374 break; 3375 case IR_MUL: 3376 case IR_MUL_OV: 3377 | ASM_REG_IMM_MUL imul, type, def_reg, val 3378 break; 3379 case IR_OR: 3380 | ASM_REG_IMM_OP or, type, def_reg, val 3381 break; 3382 case IR_AND: 3383 | ASM_REG_IMM_OP and, type, def_reg, val 3384 break; 3385 case IR_XOR: 3386 | ASM_REG_IMM_OP xor, type, def_reg, val 3387 break; 3388 } 3389 } else { 3390 ir_mem mem; 3391 3392 if (ir_rule(ctx, op2) & IR_FUSED) { 3393 mem = ir_fuse_load(ctx, def, op2); 3394 } else { 3395 mem = ir_ref_spill_slot(ctx, op2); 3396 } 3397 switch (insn->op) { 3398 default: 3399 IR_ASSERT(0 && "NIY binary op"); 3400 case IR_ADD: 3401 case IR_ADD_OV: 3402 | ASM_REG_MEM_OP add, type, def_reg, mem 3403 break; 3404 case IR_SUB: 3405 case IR_SUB_OV: 3406 | ASM_REG_MEM_OP sub, type, def_reg, mem 3407 break; 3408 case IR_MUL: 3409 case IR_MUL_OV: 3410 | ASM_REG_MEM_MUL imul, type, def_reg, mem 3411 break; 3412 case IR_OR: 3413 | ASM_REG_MEM_OP or, type, def_reg, mem 3414 break; 3415 case IR_AND: 3416 | ASM_REG_MEM_OP and, type, def_reg, mem 3417 break; 3418 case IR_XOR: 3419 | ASM_REG_MEM_OP xor, type, def_reg, mem 3420 break; 3421 } 3422 } 3423 if (IR_REG_SPILLED(ctx->regs[def][0])) { 3424 ir_emit_store(ctx, type, def, def_reg); 3425 } 3426} 3427 3428static void ir_emit_imul3(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3429{ 3430 ir_backend_data *data = ctx->data; 3431 dasm_State **Dst = &data->dasm_state; 3432 ir_type type = insn->type; 3433 ir_ref op1 = insn->op1; 3434 ir_ref op2 = insn->op2; 3435 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3436 ir_reg op1_reg = ctx->regs[def][1]; 3437 int32_t val = ir_fuse_imm(ctx, op2); 3438 3439 IR_ASSERT(def_reg != IR_REG_NONE); 3440 IR_ASSERT(!IR_IS_CONST_REF(op1)); 3441 3442 if (op1_reg != IR_REG_NONE) { 3443 if (IR_REG_SPILLED(op1_reg)) { 3444 op1_reg = IR_REG_NUM(op1_reg); 3445 ir_emit_load(ctx, type, op1_reg, op1); 3446 } 3447 switch (ir_type_size[type]) { 3448 default: 3449 IR_ASSERT(0); 3450 case 2: 3451 | imul Rw(def_reg), Rw(op1_reg), val 3452 break; 3453 case 4: 3454 | imul Rd(def_reg), Rd(op1_reg), val 3455 break; 3456|.if X64 3457|| case 8: 3458| imul Rq(def_reg), Rq(op1_reg), val 3459|| break; 3460|.endif 3461 } 3462 } else { 3463 ir_mem mem; 3464 3465 if (ir_rule(ctx, op1) & IR_FUSED) { 3466 mem = ir_fuse_load(ctx, def, op1); 3467 } else { 3468 mem = ir_ref_spill_slot(ctx, op1); 3469 } 3470 | ASM_REG_MEM_TXT_MUL imul, type, def_reg, mem, val 3471 } 3472 if (IR_REG_SPILLED(ctx->regs[def][0])) { 3473 ir_emit_store(ctx, type, def, def_reg); 3474 } 3475} 3476 3477static void ir_emit_min_max_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3478{ 3479 ir_backend_data *data = ctx->data; 3480 dasm_State **Dst = &data->dasm_state; 3481 ir_type type = insn->type; 3482 ir_ref op1 = insn->op1; 3483 ir_ref op2 = insn->op2; 3484 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3485 ir_reg op1_reg = ctx->regs[def][1]; 3486 ir_reg op2_reg = ctx->regs[def][2]; 3487 3488 IR_ASSERT(def_reg != IR_REG_NONE && op2_reg != IR_REG_NONE); 3489 3490 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 3491 op1_reg = IR_REG_NUM(op1_reg); 3492 ir_emit_load(ctx, type, op1_reg, op1); 3493 } 3494 if (def_reg != op1_reg) { 3495 if (op1_reg != IR_REG_NONE) { 3496 ir_emit_mov(ctx, type, def_reg, op1_reg); 3497 } else { 3498 ir_emit_load(ctx, type, def_reg, op1); 3499 } 3500 } 3501 3502 if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { 3503 op2_reg = IR_REG_NUM(op2_reg); 3504 if (op1 != op2) { 3505 ir_emit_load(ctx, type, op2_reg, op2); 3506 } 3507 } 3508 3509 if (op1 == op2) { 3510 return; 3511 } 3512 3513 | ASM_REG_REG_OP cmp, type, def_reg, op2_reg 3514 if (insn->op == IR_MIN) { 3515 if (IR_IS_TYPE_SIGNED(type)) { 3516 | ASM_REG_REG_OP2 cmovg, type, def_reg, op2_reg 3517 } else { 3518 | ASM_REG_REG_OP2 cmova, type, def_reg, op2_reg 3519 } 3520 } else { 3521 IR_ASSERT(insn->op == IR_MAX); 3522 if (IR_IS_TYPE_SIGNED(type)) { 3523 | ASM_REG_REG_OP2 cmovl, type, def_reg, op2_reg 3524 } else { 3525 | ASM_REG_REG_OP2 cmovb, type, def_reg, op2_reg 3526 } 3527 } 3528 3529 if (IR_REG_SPILLED(ctx->regs[def][0])) { 3530 ir_emit_store(ctx, type, def, def_reg); 3531 } 3532} 3533 3534static void ir_emit_overflow(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3535{ 3536 ir_backend_data *data = ctx->data; 3537 dasm_State **Dst = &data->dasm_state; 3538 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3539 ir_type type = ctx->ir_base[insn->op1].type; 3540 3541 IR_ASSERT(def_reg != IR_REG_NONE); 3542 IR_ASSERT(IR_IS_TYPE_INT(type)); 3543 if (IR_IS_TYPE_SIGNED(type)) { 3544 | seto Rb(def_reg) 3545 } else { 3546 | setc Rb(def_reg) 3547 } 3548 if (IR_REG_SPILLED(ctx->regs[def][0])) { 3549 ir_emit_store(ctx, insn->type, def, def_reg); 3550 } 3551} 3552 3553static void ir_emit_overflow_and_branch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) 3554{ 3555 ir_backend_data *data = ctx->data; 3556 dasm_State **Dst = &data->dasm_state; 3557 ir_insn *overflow_insn = &ctx->ir_base[insn->op2]; 3558 ir_type type = ctx->ir_base[overflow_insn->op1].type; 3559 uint32_t true_block, false_block, next_block; 3560 bool reverse = 0; 3561 3562 ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block); 3563 if (true_block == next_block) { 3564 reverse = 1; 3565 true_block = false_block; 3566 false_block = 0; 3567 } else if (false_block == next_block) { 3568 false_block = 0; 3569 } 3570 3571 if (IR_IS_TYPE_SIGNED(type)) { 3572 if (reverse) { 3573 | jno =>true_block 3574 } else { 3575 | jo =>true_block 3576 } 3577 } else { 3578 if (reverse) { 3579 | jnc =>true_block 3580 } else { 3581 | jc =>true_block 3582 } 3583 } 3584 if (false_block) { 3585 | jmp =>false_block 3586 } 3587} 3588 3589static void ir_emit_mem_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3590{ 3591 ir_backend_data *data = ctx->data; 3592 dasm_State **Dst = &data->dasm_state; 3593 ir_insn *op_insn = &ctx->ir_base[insn->op3]; 3594 ir_type type = op_insn->type; 3595 ir_ref op2 = op_insn->op2; 3596 ir_reg op2_reg = ctx->regs[insn->op3][2]; 3597 ir_mem mem; 3598 3599 if (insn->op == IR_STORE) { 3600 mem = ir_fuse_mem(ctx, def, def, insn, ctx->regs[def][2]); 3601 } else { 3602 IR_ASSERT(insn->op == IR_VSTORE); 3603 mem = ir_var_spill_slot(ctx, insn->op2); 3604 } 3605 3606 if (op2_reg == IR_REG_NONE) { 3607 int32_t val = ir_fuse_imm(ctx, op2); 3608 3609 switch (op_insn->op) { 3610 default: 3611 IR_ASSERT(0 && "NIY binary op"); 3612 case IR_ADD: 3613 case IR_ADD_OV: 3614 | ASM_MEM_IMM_OP add, type, mem, val 3615 break; 3616 case IR_SUB: 3617 case IR_SUB_OV: 3618 | ASM_MEM_IMM_OP sub, type, mem, val 3619 break; 3620 case IR_OR: 3621 | ASM_MEM_IMM_OP or, type, mem, val 3622 break; 3623 case IR_AND: 3624 | ASM_MEM_IMM_OP and, type, mem, val 3625 break; 3626 case IR_XOR: 3627 | ASM_MEM_IMM_OP xor, type, mem, val 3628 break; 3629 } 3630 } else { 3631 if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { 3632 op2_reg = IR_REG_NUM(op2_reg); 3633 ir_emit_load(ctx, type, op2_reg, op2); 3634 } 3635 switch (op_insn->op) { 3636 default: 3637 IR_ASSERT(0 && "NIY binary op"); 3638 case IR_ADD: 3639 case IR_ADD_OV: 3640 | ASM_MEM_REG_OP add, type, mem, op2_reg 3641 break; 3642 case IR_SUB: 3643 case IR_SUB_OV: 3644 | ASM_MEM_REG_OP sub, type, mem, op2_reg 3645 break; 3646 case IR_OR: 3647 | ASM_MEM_REG_OP or, type, mem, op2_reg 3648 break; 3649 case IR_AND: 3650 | ASM_MEM_REG_OP and, type, mem, op2_reg 3651 break; 3652 case IR_XOR: 3653 | ASM_MEM_REG_OP xor, type, mem, op2_reg 3654 break; 3655 } 3656 } 3657} 3658 3659static void ir_emit_reg_binop_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3660{ 3661 ir_backend_data *data = ctx->data; 3662 dasm_State **Dst = &data->dasm_state; 3663 ir_insn *op_insn = &ctx->ir_base[insn->op2]; 3664 ir_type type = op_insn->type; 3665 ir_ref op2 = op_insn->op2; 3666 ir_reg op2_reg = ctx->regs[insn->op2][2]; 3667 ir_reg reg; 3668 3669 IR_ASSERT(insn->op == IR_RSTORE); 3670 reg = insn->op3; 3671 3672 if (op2_reg == IR_REG_NONE) { 3673 int32_t val = ir_fuse_imm(ctx, op2); 3674 3675 switch (op_insn->op) { 3676 default: 3677 IR_ASSERT(0 && "NIY binary op"); 3678 case IR_ADD: 3679 | ASM_REG_IMM_OP add, type, reg, val 3680 break; 3681 case IR_SUB: 3682 | ASM_REG_IMM_OP sub, type, reg, val 3683 break; 3684 case IR_OR: 3685 | ASM_REG_IMM_OP or, type, reg, val 3686 break; 3687 case IR_AND: 3688 | ASM_REG_IMM_OP and, type, reg, val 3689 break; 3690 case IR_XOR: 3691 | ASM_REG_IMM_OP xor, type, reg, val 3692 break; 3693 } 3694 } else { 3695 if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { 3696 op2_reg = IR_REG_NUM(op2_reg); 3697 ir_emit_load(ctx, type, op2_reg, op2); 3698 } 3699 switch (op_insn->op) { 3700 default: 3701 IR_ASSERT(0 && "NIY binary op"); 3702 case IR_ADD: 3703 | ASM_REG_REG_OP add, type, reg, op2_reg 3704 break; 3705 case IR_SUB: 3706 | ASM_REG_REG_OP sub, type, reg, op2_reg 3707 break; 3708 case IR_OR: 3709 | ASM_REG_REG_OP or, type, reg, op2_reg 3710 break; 3711 case IR_AND: 3712 | ASM_REG_REG_OP and, type, reg, op2_reg 3713 break; 3714 case IR_XOR: 3715 | ASM_REG_REG_OP xor, type, reg, op2_reg 3716 break; 3717 } 3718 } 3719} 3720 3721static void ir_emit_mul_div_mod_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3722{ 3723 ir_backend_data *data = ctx->data; 3724 dasm_State **Dst = &data->dasm_state; 3725 ir_type type = insn->type; 3726 ir_ref op1 = insn->op1; 3727 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3728 ir_reg op1_reg = ctx->regs[def][1]; 3729 3730 IR_ASSERT(IR_IS_CONST_REF(insn->op2)); 3731 IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[insn->op2].op)); 3732 IR_ASSERT(def_reg != IR_REG_NONE); 3733 3734 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 3735 op1_reg = IR_REG_NUM(op1_reg); 3736 ir_emit_load(ctx, type, op1_reg, op1); 3737 } 3738 if (def_reg != op1_reg) { 3739 if (op1_reg != IR_REG_NONE) { 3740 ir_emit_mov(ctx, type, def_reg, op1_reg); 3741 } else { 3742 ir_emit_load(ctx, type, def_reg, op1); 3743 } 3744 } 3745 if (insn->op == IR_MUL) { 3746 uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64); 3747 3748 if (shift == 1) { 3749 | ASM_REG_REG_OP add, type, def_reg, def_reg 3750 } else { 3751 | ASM_REG_IMM_OP shl, type, def_reg, shift 3752 } 3753 } else if (insn->op == IR_DIV) { 3754 uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64); 3755 3756 | ASM_REG_IMM_OP shr, type, def_reg, shift 3757 } else { 3758 IR_ASSERT(insn->op == IR_MOD); 3759 uint64_t mask = ctx->ir_base[insn->op2].val.u64 - 1; 3760 3761|.if X64 3762|| if (ir_type_size[type] == 8 && ctx->regs[def][2] != IR_REG_NONE) { 3763|| ir_reg op2_reg = ctx->regs[def][2]; 3764|| 3765|| ir_emit_load_imm_int(ctx, type, op2_reg, mask); 3766 | ASM_REG_REG_OP and, type, def_reg, op2_reg 3767|| } else { 3768|.endif 3769 | ASM_REG_IMM_OP and, type, def_reg, mask 3770|.if X64 3771|| } 3772|.endif 3773 } 3774 if (IR_REG_SPILLED(ctx->regs[def][0])) { 3775 ir_emit_store(ctx, type, def, def_reg); 3776 } 3777} 3778 3779static void ir_emit_sdiv_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3780{ 3781 ir_backend_data *data = ctx->data; 3782 dasm_State **Dst = &data->dasm_state; 3783 ir_type type = insn->type; 3784 ir_ref op1 = insn->op1; 3785 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3786 ir_reg op1_reg = ctx->regs[def][1]; 3787 uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64); 3788 int64_t offset = ctx->ir_base[insn->op2].val.u64 - 1; 3789 3790 IR_ASSERT(shift != 0); 3791 IR_ASSERT(IR_IS_CONST_REF(insn->op2)); 3792 IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[insn->op2].op)); 3793 IR_ASSERT(op1_reg != IR_REG_NONE && def_reg != IR_REG_NONE && op1_reg != def_reg); 3794 3795 if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) { 3796 op1_reg = IR_REG_NUM(op1_reg); 3797 ir_emit_load(ctx, type, op1_reg, op1); 3798 } 3799 3800 if (shift == 1) { 3801|.if X64 3802|| if (ir_type_size[type] == 8) { 3803 | mov Rq(def_reg), Rq(op1_reg) 3804 | ASM_REG_IMM_OP shr, type, def_reg, 63 3805 | add Rq(def_reg), Rq(op1_reg) 3806|| } else { 3807|.endif 3808 | mov Rd(def_reg), Rd(op1_reg) 3809 | ASM_REG_IMM_OP shr, type, def_reg, (ir_type_size[type]*8-1) 3810 | add Rd(def_reg), Rd(op1_reg) 3811|.if X64 3812|| } 3813|.endif 3814 } else { 3815|.if X64 3816|| if (ir_type_size[type] == 8) { 3817|| ir_reg op2_reg = ctx->regs[def][2]; 3818|| 3819|| if (op2_reg != IR_REG_NONE) { 3820|| ir_emit_load_imm_int(ctx, type, op2_reg, offset); 3821 | lea Rq(def_reg), [Rq(op1_reg)+Rq(op2_reg)] 3822|| } else { 3823 | lea Rq(def_reg), [Rq(op1_reg)+(int32_t)offset] 3824|| } 3825|| } else { 3826|.endif 3827 | lea Rd(def_reg), [Rd(op1_reg)+(int32_t)offset] 3828|.if X64 3829|| } 3830|.endif 3831 | ASM_REG_REG_OP test, type, op1_reg, op1_reg 3832 | ASM_REG_REG_OP2 cmovns, type, def_reg, op1_reg 3833 } 3834 | ASM_REG_IMM_OP sar, type, def_reg, shift 3835 3836 if (IR_REG_SPILLED(ctx->regs[def][0])) { 3837 ir_emit_store(ctx, type, def, def_reg); 3838 } 3839} 3840 3841static void ir_emit_smod_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3842{ 3843 ir_backend_data *data = ctx->data; 3844 dasm_State **Dst = &data->dasm_state; 3845 ir_type type = insn->type; 3846 ir_ref op1 = insn->op1; 3847 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3848 ir_reg op1_reg = ctx->regs[def][1]; 3849 ir_reg tmp_reg = ctx->regs[def][3]; 3850 uint32_t shift = IR_LOG2(ctx->ir_base[insn->op2].val.u64); 3851 uint64_t mask = ctx->ir_base[insn->op2].val.u64 - 1; 3852 3853 IR_ASSERT(shift != 0); 3854 IR_ASSERT(IR_IS_CONST_REF(insn->op2)); 3855 IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[insn->op2].op)); 3856 IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE && def_reg != tmp_reg); 3857 3858 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 3859 op1_reg = IR_REG_NUM(op1_reg); 3860 ir_emit_load(ctx, type, op1_reg, op1); 3861 } 3862 if (def_reg != op1_reg) { 3863 if (op1_reg != IR_REG_NONE) { 3864 ir_emit_mov(ctx, type, def_reg, op1_reg); 3865 } else { 3866 ir_emit_load(ctx, type, def_reg, op1); 3867 } 3868 } 3869 if (tmp_reg != op1_reg) { 3870 ir_emit_mov(ctx, type, tmp_reg, def_reg); 3871 } 3872 3873 3874 if (shift == 1) { 3875 | ASM_REG_IMM_OP shr, type, tmp_reg, (ir_type_size[type]*8-1) 3876 } else { 3877 | ASM_REG_IMM_OP sar, type, tmp_reg, (ir_type_size[type]*8-1) 3878 | ASM_REG_IMM_OP shr, type, tmp_reg, (ir_type_size[type]*8-shift) 3879 } 3880 | ASM_REG_REG_OP add, type, def_reg, tmp_reg 3881 3882|.if X64 3883|| if (ir_type_size[type] == 8 && ctx->regs[def][2] != IR_REG_NONE) { 3884|| ir_reg op2_reg = ctx->regs[def][2]; 3885|| 3886|| ir_emit_load_imm_int(ctx, type, op2_reg, mask); 3887 | ASM_REG_REG_OP and, type, def_reg, op2_reg 3888|| } else { 3889|.endif 3890 | ASM_REG_IMM_OP and, type, def_reg, mask 3891|.if X64 3892|| } 3893|.endif 3894 3895 | ASM_REG_REG_OP sub, type, def_reg, tmp_reg 3896 3897 if (IR_REG_SPILLED(ctx->regs[def][0])) { 3898 ir_emit_store(ctx, type, def, def_reg); 3899 } 3900} 3901 3902static void ir_emit_mem_mul_div_mod_pwr2(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3903{ 3904 ir_backend_data *data = ctx->data; 3905 dasm_State **Dst = &data->dasm_state; 3906 ir_insn *op_insn = &ctx->ir_base[insn->op3]; 3907 ir_type type = op_insn->type; 3908 ir_mem mem; 3909 3910 IR_ASSERT(IR_IS_CONST_REF(op_insn->op2)); 3911 IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[op_insn->op2].op)); 3912 3913 if (insn->op == IR_STORE) { 3914 mem = ir_fuse_mem(ctx, def, def, insn, ctx->regs[def][2]); 3915 } else { 3916 IR_ASSERT(insn->op == IR_VSTORE); 3917 mem = ir_var_spill_slot(ctx, insn->op2); 3918 } 3919 3920 if (op_insn->op == IR_MUL) { 3921 uint32_t shift = IR_LOG2(ctx->ir_base[op_insn->op2].val.u64); 3922 | ASM_MEM_IMM_OP shl, type, mem, shift 3923 } else if (op_insn->op == IR_DIV) { 3924 uint32_t shift = IR_LOG2(ctx->ir_base[op_insn->op2].val.u64); 3925 | ASM_MEM_IMM_OP shr, type, mem, shift 3926 } else { 3927 IR_ASSERT(op_insn->op == IR_MOD); 3928 uint64_t mask = ctx->ir_base[op_insn->op2].val.u64 - 1; 3929 IR_ASSERT(IR_IS_UNSIGNED_32BIT(mask)); 3930 | ASM_MEM_IMM_OP and, type, mem, mask 3931 } 3932} 3933 3934static void ir_emit_shift(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3935{ 3936 ir_backend_data *data = ctx->data; 3937 dasm_State **Dst = &data->dasm_state; 3938 ir_type type = insn->type; 3939 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 3940 ir_reg op1_reg = ctx->regs[def][1]; 3941 ir_reg op2_reg = ctx->regs[def][2]; 3942 3943 IR_ASSERT(def_reg != IR_REG_NONE && def_reg != IR_REG_RCX); 3944 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 3945 op1_reg = IR_REG_NUM(op1_reg); 3946 ir_emit_load(ctx, type, op1_reg, insn->op1); 3947 } 3948 if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { 3949 op2_reg = IR_REG_NUM(op2_reg); 3950 ir_emit_load(ctx, type, op2_reg, insn->op2); 3951 } 3952 if (op2_reg != IR_REG_RCX) { 3953 if (op1_reg == IR_REG_RCX) { 3954 ir_emit_mov(ctx, type, def_reg, op1_reg); 3955 op1_reg = def_reg; 3956 } 3957 if (op2_reg != IR_REG_NONE) { 3958 ir_emit_mov(ctx, type, IR_REG_RCX, op2_reg); 3959 } else { 3960 ir_emit_load(ctx, type, IR_REG_RCX, insn->op2); 3961 } 3962 } 3963 if (def_reg != op1_reg) { 3964 if (op1_reg != IR_REG_NONE) { 3965 ir_emit_mov(ctx, type, def_reg, op1_reg); 3966 } else { 3967 ir_emit_load(ctx, type, def_reg, insn->op1); 3968 } 3969 } 3970 switch (insn->op) { 3971 default: 3972 IR_ASSERT(0); 3973 case IR_SHL: 3974 | ASM_REG_TXT_OP shl, insn->type, def_reg, cl 3975 break; 3976 case IR_SHR: 3977 | ASM_REG_TXT_OP shr, insn->type, def_reg, cl 3978 break; 3979 case IR_SAR: 3980 | ASM_REG_TXT_OP sar, insn->type, def_reg, cl 3981 break; 3982 case IR_ROL: 3983 | ASM_REG_TXT_OP rol, insn->type, def_reg, cl 3984 break; 3985 case IR_ROR: 3986 | ASM_REG_TXT_OP ror, insn->type, def_reg, cl 3987 break; 3988 } 3989 if (IR_REG_SPILLED(ctx->regs[def][0])) { 3990 ir_emit_store(ctx, type, def, def_reg); 3991 } 3992} 3993 3994static void ir_emit_mem_shift(ir_ctx *ctx, ir_ref def, ir_insn *insn) 3995{ 3996 ir_backend_data *data = ctx->data; 3997 dasm_State **Dst = &data->dasm_state; 3998 ir_insn *op_insn = &ctx->ir_base[insn->op3]; 3999 ir_type type = op_insn->type; 4000 ir_ref op2 = op_insn->op2; 4001 ir_reg op2_reg = ctx->regs[insn->op3][2]; 4002 ir_mem mem; 4003 4004 if (insn->op == IR_STORE) { 4005 mem = ir_fuse_mem(ctx, def, def, insn, ctx->regs[def][2]); 4006 } else { 4007 IR_ASSERT(insn->op == IR_VSTORE); 4008 mem = ir_var_spill_slot(ctx, insn->op2); 4009 } 4010 4011 if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { 4012 op2_reg = IR_REG_NUM(op2_reg); 4013 ir_emit_load(ctx, type, op2_reg, op2); 4014 } 4015 if (op2_reg != IR_REG_RCX) { 4016 if (op2_reg != IR_REG_NONE) { 4017 ir_emit_mov(ctx, type, IR_REG_RCX, op2_reg); 4018 } else { 4019 ir_emit_load(ctx, type, IR_REG_RCX, op2); 4020 } 4021 } 4022 switch (op_insn->op) { 4023 default: 4024 IR_ASSERT(0); 4025 case IR_SHL: 4026 | ASM_MEM_TXT_OP shl, type, mem, cl 4027 break; 4028 case IR_SHR: 4029 | ASM_MEM_TXT_OP shr, type, mem, cl 4030 break; 4031 case IR_SAR: 4032 | ASM_MEM_TXT_OP sar, type, mem, cl 4033 break; 4034 case IR_ROL: 4035 | ASM_MEM_TXT_OP rol, type, mem, cl 4036 break; 4037 case IR_ROR: 4038 | ASM_MEM_TXT_OP ror, type, mem, cl 4039 break; 4040 } 4041} 4042 4043static void ir_emit_shift_const(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4044{ 4045 ir_backend_data *data = ctx->data; 4046 dasm_State **Dst = &data->dasm_state; 4047 int32_t shift; 4048 ir_type type = insn->type; 4049 ir_ref op1 = insn->op1; 4050 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 4051 ir_reg op1_reg = ctx->regs[def][1]; 4052 4053 IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[insn->op2].op)); 4054 IR_ASSERT(IR_IS_SIGNED_32BIT(ctx->ir_base[insn->op2].val.i64)); 4055 shift = ctx->ir_base[insn->op2].val.i32; 4056 IR_ASSERT(def_reg != IR_REG_NONE); 4057 4058 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 4059 op1_reg = IR_REG_NUM(op1_reg); 4060 ir_emit_load(ctx, type, op1_reg, op1); 4061 } 4062 if (def_reg != op1_reg) { 4063 if (op1_reg != IR_REG_NONE) { 4064 ir_emit_mov(ctx, type, def_reg, op1_reg); 4065 } else { 4066 ir_emit_load(ctx, type, def_reg, op1); 4067 } 4068 } 4069 switch (insn->op) { 4070 default: 4071 IR_ASSERT(0); 4072 case IR_SHL: 4073 | ASM_REG_IMM_OP shl, insn->type, def_reg, shift 4074 break; 4075 case IR_SHR: 4076 | ASM_REG_IMM_OP shr, insn->type, def_reg, shift 4077 break; 4078 case IR_SAR: 4079 | ASM_REG_IMM_OP sar, insn->type, def_reg, shift 4080 break; 4081 case IR_ROL: 4082 | ASM_REG_IMM_OP rol, insn->type, def_reg, shift 4083 break; 4084 case IR_ROR: 4085 | ASM_REG_IMM_OP ror, insn->type, def_reg, shift 4086 break; 4087 } 4088 if (IR_REG_SPILLED(ctx->regs[def][0])) { 4089 ir_emit_store(ctx, type, def, def_reg); 4090 } 4091} 4092 4093static void ir_emit_mem_shift_const(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4094{ 4095 ir_backend_data *data = ctx->data; 4096 dasm_State **Dst = &data->dasm_state; 4097 ir_insn *op_insn = &ctx->ir_base[insn->op3]; 4098 ir_type type = op_insn->type; 4099 int32_t shift; 4100 ir_mem mem; 4101 4102 IR_ASSERT(IR_IS_CONST_REF(op_insn->op2)); 4103 IR_ASSERT(!IR_IS_SYM_CONST(ctx->ir_base[op_insn->op2].op)); 4104 IR_ASSERT(IR_IS_SIGNED_32BIT(ctx->ir_base[op_insn->op2].val.i64)); 4105 shift = ctx->ir_base[op_insn->op2].val.i32; 4106 if (insn->op == IR_STORE) { 4107 mem = ir_fuse_mem(ctx, def, def, insn, ctx->regs[def][2]); 4108 } else { 4109 IR_ASSERT(insn->op == IR_VSTORE); 4110 mem = ir_var_spill_slot(ctx, insn->op2); 4111 } 4112 4113 switch (op_insn->op) { 4114 default: 4115 IR_ASSERT(0); 4116 case IR_SHL: 4117 | ASM_MEM_IMM_OP shl, type, mem, shift 4118 break; 4119 case IR_SHR: 4120 | ASM_MEM_IMM_OP shr, type, mem, shift 4121 break; 4122 case IR_SAR: 4123 | ASM_MEM_IMM_OP sar, type, mem, shift 4124 break; 4125 case IR_ROL: 4126 | ASM_MEM_IMM_OP rol, type, mem, shift 4127 break; 4128 case IR_ROR: 4129 | ASM_MEM_IMM_OP ror, type, mem, shift 4130 break; 4131 } 4132} 4133 4134static void ir_emit_op_int(ir_ctx *ctx, ir_ref def, ir_insn *insn, uint32_t rule) 4135{ 4136 ir_backend_data *data = ctx->data; 4137 dasm_State **Dst = &data->dasm_state; 4138 ir_type type = insn->type; 4139 ir_ref op1 = insn->op1; 4140 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 4141 ir_reg op1_reg = ctx->regs[def][1]; 4142 4143 IR_ASSERT(def_reg != IR_REG_NONE); 4144 4145 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 4146 op1_reg = IR_REG_NUM(op1_reg); 4147 ir_emit_load(ctx, type, op1_reg, op1); 4148 } 4149 if (def_reg != op1_reg) { 4150 if (op1_reg != IR_REG_NONE) { 4151 ir_emit_mov(ctx, type, def_reg, op1_reg); 4152 } else { 4153 ir_emit_load(ctx, type, def_reg, op1); 4154 } 4155 } 4156 if (rule == IR_INC) { 4157 | ASM_REG_OP inc, insn->type, def_reg 4158 } else if (rule == IR_DEC) { 4159 | ASM_REG_OP dec, insn->type, def_reg 4160 } else if (insn->op == IR_NOT) { 4161 | ASM_REG_OP not, insn->type, def_reg 4162 } else if (insn->op == IR_NEG) { 4163 | ASM_REG_OP neg, insn->type, def_reg 4164 } else { 4165 IR_ASSERT(insn->op == IR_BSWAP); 4166 switch (ir_type_size[insn->type]) { 4167 default: 4168 IR_ASSERT(0); 4169 case 4: 4170 | bswap Rd(def_reg) 4171 break; 4172 case 8: 4173 IR_ASSERT(sizeof(void*) == 8); 4174|.if X64 4175 | bswap Rq(def_reg) 4176|.endif 4177 break; 4178 } 4179 } 4180 if (IR_REG_SPILLED(ctx->regs[def][0])) { 4181 ir_emit_store(ctx, type, def, def_reg); 4182 } 4183} 4184 4185static void ir_emit_bit_count(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4186{ 4187 ir_backend_data *data = ctx->data; 4188 dasm_State **Dst = &data->dasm_state; 4189 ir_type type = insn->type; 4190 ir_ref op1 = insn->op1; 4191 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 4192 ir_reg op1_reg = ctx->regs[def][1]; 4193 4194 IR_ASSERT(def_reg != IR_REG_NONE); 4195 4196 if (op1_reg != IR_REG_NONE) { 4197 if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) { 4198 op1_reg = IR_REG_NUM(op1_reg); 4199 ir_emit_load(ctx, type, op1_reg, op1); 4200 } 4201 switch (ir_type_size[insn->type]) { 4202 default: 4203 IR_ASSERT(0); 4204 case 2: 4205 if (insn->op == IR_CTLZ) { 4206 if (ctx->mflags & IR_X86_BMI1) { 4207 | lzcnt Rw(def_reg), Rw(op1_reg) 4208 } else { 4209 | bsr Rw(def_reg), Rw(op1_reg) 4210 | xor Rw(def_reg), 0xf 4211 } 4212 } else if (insn->op == IR_CTTZ) { 4213 if (ctx->mflags & IR_X86_BMI1) { 4214 | tzcnt Rw(def_reg), Rw(op1_reg) 4215 } else { 4216 | bsf Rw(def_reg), Rw(op1_reg) 4217 } 4218 } else { 4219 IR_ASSERT(insn->op == IR_CTPOP); 4220 | popcnt Rw(def_reg), Rw(op1_reg) 4221 } 4222 break; 4223 case 1: 4224 | movzx Rd(op1_reg), Rb(op1_reg) 4225 if (insn->op == IR_CTLZ) { 4226 if (ctx->mflags & IR_X86_BMI1) { 4227 | lzcnt Rd(def_reg), Rd(op1_reg) 4228 | sub Rd(def_reg), 24 4229 } else { 4230 | bsr Rd(def_reg), Rd(op1_reg) 4231 | xor Rw(def_reg), 0x7 4232 } 4233 break; 4234 } 4235 IR_FALLTHROUGH; 4236 case 4: 4237 if (insn->op == IR_CTLZ) { 4238 if (ctx->mflags & IR_X86_BMI1) { 4239 | lzcnt Rd(def_reg), Rd(op1_reg) 4240 } else { 4241 | bsr Rd(def_reg), Rd(op1_reg) 4242 | xor Rw(def_reg), 0x1f 4243 } 4244 } else if (insn->op == IR_CTTZ) { 4245 if (ctx->mflags & IR_X86_BMI1) { 4246 | tzcnt Rd(def_reg), Rd(op1_reg) 4247 } else { 4248 | bsf Rd(def_reg), Rd(op1_reg) 4249 } 4250 } else { 4251 IR_ASSERT(insn->op == IR_CTPOP); 4252 | popcnt Rd(def_reg), Rd(op1_reg) 4253 } 4254 break; 4255|.if X64 4256 case 8: 4257 if (insn->op == IR_CTLZ) { 4258 if (ctx->mflags & IR_X86_BMI1) { 4259 | lzcnt Rq(def_reg), Rq(op1_reg) 4260 } else { 4261 | bsr Rq(def_reg), Rq(op1_reg) 4262 | xor Rw(def_reg), 0x3f 4263 } 4264 } else if (insn->op == IR_CTTZ) { 4265 if (ctx->mflags & IR_X86_BMI1) { 4266 | tzcnt Rq(def_reg), Rq(op1_reg) 4267 } else { 4268 | bsf Rq(def_reg), Rq(op1_reg) 4269 } 4270 } else { 4271 IR_ASSERT(insn->op == IR_CTPOP); 4272 | popcnt Rq(def_reg), Rq(op1_reg) 4273 } 4274 break; 4275|.endif 4276 } 4277 } else { 4278 ir_mem mem; 4279 4280 if (ir_rule(ctx, op1) & IR_FUSED) { 4281 mem = ir_fuse_load(ctx, def, op1); 4282 } else { 4283 mem = ir_ref_spill_slot(ctx, op1); 4284 } 4285 switch (ir_type_size[insn->type]) { 4286 default: 4287 IR_ASSERT(0); 4288 case 2: 4289 if (insn->op == IR_CTLZ) { 4290 if (ctx->mflags & IR_X86_BMI1) { 4291 | ASM_TXT_TMEM_OP lzcnt, Rw(def_reg), word, mem 4292 } else { 4293 | ASM_TXT_TMEM_OP bsr, Rw(def_reg), word, mem 4294 | xor Rw(def_reg), 0xf 4295 } 4296 } else if (insn->op == IR_CTTZ) { 4297 if (ctx->mflags & IR_X86_BMI1) { 4298 | ASM_TXT_TMEM_OP tzcnt, Rw(def_reg), word, mem 4299 } else { 4300 | ASM_TXT_TMEM_OP bsf, Rw(def_reg), word, mem 4301 } 4302 } else { 4303 | ASM_TXT_TMEM_OP popcnt, Rw(def_reg), word, mem 4304 } 4305 break; 4306 case 4: 4307 if (insn->op == IR_CTLZ) { 4308 if (ctx->mflags & IR_X86_BMI1) { 4309 | ASM_TXT_TMEM_OP lzcnt, Rd(def_reg), dword, mem 4310 } else { 4311 | ASM_TXT_TMEM_OP bsr, Rd(def_reg), dword, mem 4312 | xor Rw(def_reg), 0x1f 4313 } 4314 } else if (insn->op == IR_CTTZ) { 4315 if (ctx->mflags & IR_X86_BMI1) { 4316 | ASM_TXT_TMEM_OP tzcnt, Rd(def_reg), dword, mem 4317 } else { 4318 | ASM_TXT_TMEM_OP bsf, Rd(def_reg), dword, mem 4319 } 4320 } else { 4321 | ASM_TXT_TMEM_OP popcnt, Rd(def_reg), dword, mem 4322 } 4323 break; 4324|.if X64 4325 case 8: 4326 if (insn->op == IR_CTLZ) { 4327 if (ctx->mflags & IR_X86_BMI1) { 4328 | ASM_TXT_TMEM_OP lzcnt, Rq(def_reg), qword, mem 4329 } else { 4330 | ASM_TXT_TMEM_OP bsr, Rq(def_reg), qword, mem 4331 | xor Rw(def_reg), 0x3f 4332 } 4333 } else if (insn->op == IR_CTTZ) { 4334 if (ctx->mflags & IR_X86_BMI1) { 4335 | ASM_TXT_TMEM_OP tzcnt, Rq(def_reg), qword, mem 4336 } else { 4337 | ASM_TXT_TMEM_OP bsf, Rq(def_reg), qword, mem 4338 } 4339 } else { 4340 | ASM_TXT_TMEM_OP popcnt, Rq(def_reg), qword, mem 4341 } 4342 break; 4343|.endif 4344 } 4345 } 4346 4347 if (IR_REG_SPILLED(ctx->regs[def][0])) { 4348 ir_emit_store(ctx, type, def, def_reg); 4349 } 4350} 4351 4352static void ir_emit_ctpop(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4353{ 4354 ir_backend_data *data = ctx->data; 4355 dasm_State **Dst = &data->dasm_state; 4356 ir_type type = insn->type; 4357 ir_ref op1 = insn->op1; 4358 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 4359 ir_reg op1_reg = ctx->regs[def][1]; 4360 ir_reg tmp_reg = ctx->regs[def][2]; 4361|.if X64 4362|| ir_reg const_reg = ctx->regs[def][3]; 4363|.endif 4364 4365 IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); 4366 if (IR_IS_CONST_REF(op1) || op1_reg == IR_REG_NONE) { 4367 ir_emit_load(ctx, type, def_reg, op1); 4368 if (ir_type_size[insn->type] == 1) { 4369 | movzx Rd(def_reg), Rb(def_reg) 4370 } else if (ir_type_size[insn->type] == 2) { 4371 | movzx Rd(def_reg), Rw(def_reg) 4372 } 4373 } else { 4374 if (IR_REG_SPILLED(op1_reg)) { 4375 op1_reg = IR_REG_NUM(op1_reg); 4376 ir_emit_load(ctx, type, op1_reg, op1); 4377 } 4378 switch (ir_type_size[insn->type]) { 4379 default: 4380 IR_ASSERT(0); 4381 case 1: 4382 | movzx Rd(def_reg), Rb(op1_reg) 4383 break; 4384 case 2: 4385 | movzx Rd(def_reg), Rw(op1_reg) 4386 break; 4387 case 4: 4388 | mov Rd(def_reg), Rd(op1_reg) 4389 break; 4390|.if X64 4391|| case 8: 4392 | mov Rq(def_reg), Rq(op1_reg) 4393|| break; 4394|.endif 4395 } 4396 } 4397 switch (ir_type_size[insn->type]) { 4398 default: 4399 IR_ASSERT(0); 4400 case 1: 4401 | mov Rd(tmp_reg), Rd(def_reg) 4402 | shr Rd(def_reg), 1 4403 | and Rd(def_reg), 0x55 4404 | sub Rd(tmp_reg), Rd(def_reg) 4405 | mov Rd(def_reg), Rd(tmp_reg) 4406 | and Rd(def_reg), 0x33 4407 | shr Rd(tmp_reg), 2 4408 | and Rd(tmp_reg), 0x33 4409 | add Rd(tmp_reg), Rd(def_reg) 4410 | mov Rd(def_reg), Rd(tmp_reg) 4411 | shr Rd(def_reg), 4 4412 | add Rd(def_reg), Rd(tmp_reg) 4413 | and Rd(def_reg), 0x0f 4414 break; 4415 case 2: 4416 | mov Rd(tmp_reg), Rd(def_reg) 4417 | shr Rd(def_reg), 1 4418 | and Rd(def_reg), 0x5555 4419 | sub Rd(tmp_reg), Rd(def_reg) 4420 | mov Rd(def_reg), Rd(tmp_reg) 4421 | and Rd(def_reg), 0x3333 4422 | shr Rd(tmp_reg), 2 4423 | and Rd(tmp_reg), 0x3333 4424 | add Rd(tmp_reg), Rd(def_reg) 4425 | mov Rd(def_reg), Rd(tmp_reg) 4426 | shr Rd(def_reg), 4 4427 | add Rd(def_reg), Rd(tmp_reg) 4428 | and Rd(def_reg), 0x0f0f 4429 | mov Rd(tmp_reg), Rd(def_reg) 4430 | shr Rd(tmp_reg), 8 4431 | and Rd(def_reg), 0x0f 4432 | add Rd(def_reg), Rd(tmp_reg) 4433 break; 4434 case 4: 4435 | mov Rd(tmp_reg), Rd(def_reg) 4436 | shr Rd(def_reg), 1 4437 | and Rd(def_reg), 0x55555555 4438 | sub Rd(tmp_reg), Rd(def_reg) 4439 | mov Rd(def_reg), Rd(tmp_reg) 4440 | and Rd(def_reg), 0x33333333 4441 | shr Rd(tmp_reg), 2 4442 | and Rd(tmp_reg), 0x33333333 4443 | add Rd(tmp_reg), Rd(def_reg) 4444 | mov Rd(def_reg), Rd(tmp_reg) 4445 | shr Rd(def_reg), 4 4446 | add Rd(def_reg), Rd(tmp_reg) 4447 | and Rd(def_reg), 0x0f0f0f0f 4448 | imul Rd(def_reg), 0x01010101 4449 | shr Rd(def_reg), 24 4450 break; 4451|.if X64 4452|| case 8: 4453|| IR_ASSERT(const_reg != IR_REG_NONE); 4454 | mov Rq(tmp_reg), Rq(def_reg) 4455 | shr Rq(def_reg), 1 4456 | mov64 Rq(const_reg), 0x5555555555555555 4457 | and Rq(def_reg), Rq(const_reg) 4458 | sub Rq(tmp_reg), Rq(def_reg) 4459 | mov Rq(def_reg), Rq(tmp_reg) 4460 | mov64 Rq(const_reg), 0x3333333333333333 4461 | and Rq(def_reg), Rq(const_reg) 4462 | shr Rq(tmp_reg), 2 4463 | and Rq(tmp_reg), Rq(const_reg) 4464 | add Rq(tmp_reg), Rq(def_reg) 4465 | mov Rq(def_reg), Rq(tmp_reg) 4466 | shr Rq(def_reg), 4 4467 | add Rq(def_reg), Rq(tmp_reg) 4468 | mov64 Rq(const_reg), 0x0f0f0f0f0f0f0f0f 4469 | and Rq(def_reg), Rq(const_reg) 4470 | mov64 Rq(const_reg), 0x0101010101010101 4471 | imul Rq(def_reg), Rq(const_reg) 4472 | shr Rq(def_reg), 56 4473|| break; 4474|.endif 4475 } 4476 4477 if (IR_REG_SPILLED(ctx->regs[def][0])) { 4478 ir_emit_store(ctx, type, def, def_reg); 4479 } 4480} 4481 4482static void ir_emit_mem_op_int(ir_ctx *ctx, ir_ref def, ir_insn *insn, uint32_t rule) 4483{ 4484 ir_backend_data *data = ctx->data; 4485 dasm_State **Dst = &data->dasm_state; 4486 ir_insn *op_insn = &ctx->ir_base[insn->op3]; 4487 ir_type type = op_insn->type; 4488 ir_mem mem; 4489 4490 if (insn->op == IR_STORE) { 4491 mem = ir_fuse_mem(ctx, def, def, insn, ctx->regs[def][2]); 4492 } else { 4493 IR_ASSERT(insn->op == IR_VSTORE); 4494 mem = ir_var_spill_slot(ctx, insn->op2); 4495 } 4496 4497 if (rule == IR_MEM_INC) { 4498 | ASM_MEM_OP inc, type, mem 4499 } else if (rule == IR_MEM_DEC) { 4500 | ASM_MEM_OP dec, type, mem 4501 } else if (op_insn->op == IR_NOT) { 4502 | ASM_MEM_OP not, type, mem 4503 } else { 4504 IR_ASSERT(op_insn->op == IR_NEG); 4505 | ASM_MEM_OP neg, type, mem 4506 } 4507} 4508 4509static void ir_emit_abs_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4510{ 4511 ir_backend_data *data = ctx->data; 4512 dasm_State **Dst = &data->dasm_state; 4513 ir_type type = insn->type; 4514 ir_ref op1 = insn->op1; 4515 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 4516 ir_reg op1_reg = ctx->regs[def][1]; 4517 4518 IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); 4519 4520 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 4521 op1_reg = IR_REG_NUM(op1_reg); 4522 ir_emit_load(ctx, type, op1_reg, op1); 4523 } 4524 4525 IR_ASSERT(def_reg != op1_reg); 4526 4527 ir_emit_mov(ctx, insn->type, def_reg, op1_reg); 4528 | ASM_REG_OP neg, insn->type, def_reg 4529 | ASM_REG_REG_OP2, cmovs, type, def_reg, op1_reg 4530 if (IR_REG_SPILLED(ctx->regs[def][0])) { 4531 ir_emit_store(ctx, type, def, def_reg); 4532 } 4533} 4534 4535static void ir_emit_bool_not_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4536{ 4537 ir_backend_data *data = ctx->data; 4538 dasm_State **Dst = &data->dasm_state; 4539 ir_type type = ctx->ir_base[insn->op1].type; 4540 ir_ref op1 = insn->op1; 4541 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 4542 ir_reg op1_reg = ctx->regs[def][1]; 4543 4544 IR_ASSERT(def_reg != IR_REG_NONE); 4545 4546 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 4547 op1_reg = IR_REG_NUM(op1_reg); 4548 ir_emit_load(ctx, type, op1_reg, op1); 4549 } 4550 4551 if (op1_reg != IR_REG_NONE) { 4552 | ASM_REG_REG_OP test, type, op1_reg, op1_reg 4553 } else { 4554 ir_mem mem = ir_ref_spill_slot(ctx, op1); 4555 4556 | ASM_MEM_IMM_OP cmp, type, mem, 0 4557 } 4558 | sete Rb(def_reg) 4559 4560 if (IR_REG_SPILLED(ctx->regs[def][0])) { 4561 ir_emit_store(ctx, type, def, def_reg); 4562 } 4563} 4564 4565static void ir_emit_mul_div_mod(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4566{ 4567 ir_backend_data *data = ctx->data; 4568 dasm_State **Dst = &data->dasm_state; 4569 ir_type type = insn->type; 4570 ir_ref op1 = insn->op1; 4571 ir_ref op2 = insn->op2; 4572 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 4573 ir_reg op1_reg = ctx->regs[def][1]; 4574 ir_reg op2_reg = ctx->regs[def][2]; 4575 ir_mem mem; 4576 4577 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 4578 op1_reg = IR_REG_NUM(op1_reg); 4579 ir_emit_load(ctx, type, op1_reg, op1); 4580 } 4581 if (op1_reg != IR_REG_RAX) { 4582 if (op1_reg != IR_REG_NONE) { 4583 ir_emit_mov(ctx, type, IR_REG_RAX, op1_reg); 4584 } else { 4585 ir_emit_load(ctx, type, IR_REG_RAX, op1); 4586 } 4587 } 4588 if (op2_reg == IR_REG_NONE && op1 == op2) { 4589 op2_reg = IR_REG_RAX; 4590 } else if (IR_IS_CONST_REF(op2)) { 4591 if (insn->op == IR_MUL || insn->op == IR_MUL_OV) { 4592 op2_reg = IR_REG_RDX; 4593 } else { 4594 IR_ASSERT(op2_reg != IR_REG_NONE); 4595 } 4596 ir_emit_load(ctx, type, op2_reg, op2); 4597 } 4598 if (insn->op == IR_MUL || insn->op == IR_MUL_OV) { 4599 if (IR_IS_TYPE_SIGNED(insn->type)) { 4600 if (op2_reg != IR_REG_NONE) { 4601 if (IR_REG_SPILLED(op2_reg)) { 4602 op2_reg = IR_REG_NUM(op2_reg); 4603 ir_emit_load(ctx, type, op2_reg, op2); 4604 } 4605 | ASM_REG_OP imul, type, op2_reg 4606 } else { 4607 if (ir_rule(ctx, op2) & IR_FUSED) { 4608 mem = ir_fuse_load(ctx, def, op2); 4609 } else { 4610 mem = ir_ref_spill_slot(ctx, op2); 4611 } 4612 | ASM_MEM_OP imul, type, mem 4613 } 4614 } else { 4615 if (op2_reg != IR_REG_NONE) { 4616 if (IR_REG_SPILLED(op2_reg)) { 4617 op2_reg = IR_REG_NUM(op2_reg); 4618 ir_emit_load(ctx, type, op2_reg, op2); 4619 } 4620 | ASM_REG_OP mul, type, op2_reg 4621 } else { 4622 if (ir_rule(ctx, op2) & IR_FUSED) { 4623 mem = ir_fuse_load(ctx, def, op2); 4624 } else { 4625 mem = ir_ref_spill_slot(ctx, op2); 4626 } 4627 | ASM_MEM_OP mul, type, mem 4628 } 4629 } 4630 } else { 4631 if (IR_IS_TYPE_SIGNED(type)) { 4632 if (ir_type_size[type] == 8) { 4633 | cqo 4634 } else if (ir_type_size[type] == 4) { 4635 | cdq 4636 } else if (ir_type_size[type] == 2) { 4637 | cwd 4638 } else { 4639 | movsx ax, al 4640 } 4641 if (op2_reg != IR_REG_NONE) { 4642 if (IR_REG_SPILLED(op2_reg)) { 4643 op2_reg = IR_REG_NUM(op2_reg); 4644 ir_emit_load(ctx, type, op2_reg, op2); 4645 } 4646 | ASM_REG_OP idiv, type, op2_reg 4647 } else { 4648 if (ir_rule(ctx, op2) & IR_FUSED) { 4649 mem = ir_fuse_load(ctx, def, op2); 4650 } else { 4651 mem = ir_ref_spill_slot(ctx, op2); 4652 } 4653 | ASM_MEM_OP idiv, type, mem 4654 } 4655 } else { 4656 if (ir_type_size[type] == 1) { 4657 | movzx ax, al 4658 } else { 4659 | ASM_REG_REG_OP xor, type, IR_REG_RDX, IR_REG_RDX 4660 } 4661 if (op2_reg != IR_REG_NONE) { 4662 if (IR_REG_SPILLED(op2_reg)) { 4663 op2_reg = IR_REG_NUM(op2_reg); 4664 ir_emit_load(ctx, type, op2_reg, op2); 4665 } 4666 | ASM_REG_OP div, type, op2_reg 4667 } else { 4668 if (ir_rule(ctx, op2) & IR_FUSED) { 4669 mem = ir_fuse_load(ctx, def, op2); 4670 } else { 4671 mem = ir_ref_spill_slot(ctx, op2); 4672 } 4673 | ASM_MEM_OP div, type, mem 4674 } 4675 } 4676 } 4677 4678 if (insn->op == IR_MUL || insn->op == IR_MUL_OV || insn->op == IR_DIV) { 4679 if (def_reg != IR_REG_NONE) { 4680 if (def_reg != IR_REG_RAX) { 4681 ir_emit_mov(ctx, type, def_reg, IR_REG_RAX); 4682 } 4683 if (IR_REG_SPILLED(ctx->regs[def][0])) { 4684 ir_emit_store(ctx, type, def, def_reg); 4685 } 4686 } else { 4687 ir_emit_store(ctx, type, def, IR_REG_RAX); 4688 } 4689 } else { 4690 IR_ASSERT(insn->op == IR_MOD); 4691 if (ir_type_size[type] == 1) { 4692 if (def_reg != IR_REG_NONE) { 4693 | mov al, ah 4694 if (def_reg != IR_REG_RAX) { 4695 | mov Rb(def_reg), al 4696 } 4697 if (IR_REG_SPILLED(ctx->regs[def][0])) { 4698 ir_emit_store(ctx, type, def, def_reg); 4699 } 4700 } else { 4701 ir_reg fp; 4702 int32_t offset = ir_ref_spill_slot_offset(ctx, def, &fp); 4703 4704//????? 4705 | mov byte [Ra(fp)+offset], ah 4706 } 4707 } else { 4708 if (def_reg != IR_REG_NONE) { 4709 if (def_reg != IR_REG_RDX) { 4710 ir_emit_mov(ctx, type, def_reg, IR_REG_RDX); 4711 } 4712 if (IR_REG_SPILLED(ctx->regs[def][0])) { 4713 ir_emit_store(ctx, type, def, def_reg); 4714 } 4715 } else { 4716 ir_emit_store(ctx, type, def, IR_REG_RDX); 4717 } 4718 } 4719 } 4720} 4721 4722static void ir_rodata(ir_ctx *ctx) 4723{ 4724 ir_backend_data *data = ctx->data; 4725 dasm_State **Dst = &data->dasm_state; 4726 4727 |.rodata 4728 if (!data->rodata_label) { 4729 int label = data->rodata_label = ctx->cfg_blocks_count + ctx->consts_count + 2; 4730 |=>label: 4731 } 4732} 4733 4734static void ir_emit_op_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4735{ 4736 ir_backend_data *data = ctx->data; 4737 dasm_State **Dst = &data->dasm_state; 4738 ir_type type = insn->type; 4739 ir_ref op1 = insn->op1; 4740 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 4741 ir_reg op1_reg = ctx->regs[def][1]; 4742 4743 IR_ASSERT(def_reg != IR_REG_NONE); 4744 4745 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 4746 op1_reg = IR_REG_NUM(op1_reg); 4747 ir_emit_load(ctx, type, op1_reg, op1); 4748 } 4749 if (def_reg != op1_reg) { 4750 if (op1_reg != IR_REG_NONE) { 4751 ir_emit_fp_mov(ctx, type, def_reg, op1_reg); 4752 } else { 4753 ir_emit_load(ctx, type, def_reg, op1); 4754 } 4755 } 4756 if (insn->op == IR_NEG) { 4757 if (insn->type == IR_DOUBLE) { 4758 if (!data->double_neg_const) { 4759 data->double_neg_const = 1; 4760 ir_rodata(ctx); 4761 |.align 16 4762 |->double_neg_const: 4763 |.dword 0, 0x80000000, 0, 0 4764 |.code 4765 } 4766 if (ctx->mflags & IR_X86_AVX) { 4767 | vxorpd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), [->double_neg_const] 4768 } else { 4769 | xorpd xmm(def_reg-IR_REG_FP_FIRST), [->double_neg_const] 4770 } 4771 } else { 4772 IR_ASSERT(insn->type == IR_FLOAT); 4773 if (!data->float_neg_const) { 4774 data->float_neg_const = 1; 4775 ir_rodata(ctx); 4776 |.align 16 4777 |->float_neg_const: 4778 |.dword 0x80000000, 0, 0, 0 4779 |.code 4780 } 4781 if (ctx->mflags & IR_X86_AVX) { 4782 | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), [->float_neg_const] 4783 } else { 4784 | xorps xmm(def_reg-IR_REG_FP_FIRST), [->float_neg_const] 4785 } 4786 } 4787 } else { 4788 IR_ASSERT(insn->op == IR_ABS); 4789 if (insn->type == IR_DOUBLE) { 4790 if (!data->double_abs_const) { 4791 data->double_abs_const = 1; 4792 ir_rodata(ctx); 4793 |.align 16 4794 |->double_abs_const: 4795 |.dword 0xffffffff, 0x7fffffff, 0, 0 4796 |.code 4797 } 4798 if (ctx->mflags & IR_X86_AVX) { 4799 | vandpd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), [->double_abs_const] 4800 } else { 4801 | andpd xmm(def_reg-IR_REG_FP_FIRST), [->double_abs_const] 4802 } 4803 } else { 4804 IR_ASSERT(insn->type == IR_FLOAT); 4805 if (!data->float_abs_const) { 4806 data->float_abs_const = 1; 4807 ir_rodata(ctx); 4808 |.align 16 4809 |->float_abs_const: 4810 |.dword 0x7fffffff, 0, 0, 0 4811 |.code 4812 } 4813 if (ctx->mflags & IR_X86_AVX) { 4814 | vandps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), [->float_abs_const] 4815 } else { 4816 | andps xmm(def_reg-IR_REG_FP_FIRST), [->float_abs_const] 4817 } 4818 } 4819 } 4820 if (IR_REG_SPILLED(ctx->regs[def][0])) { 4821 ir_emit_store(ctx, insn->type, def, def_reg); 4822 } 4823} 4824 4825static void ir_emit_binop_sse2(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4826{ 4827 ir_backend_data *data = ctx->data; 4828 dasm_State **Dst = &data->dasm_state; 4829 ir_type type = insn->type; 4830 ir_ref op1 = insn->op1; 4831 ir_ref op2 = insn->op2; 4832 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 4833 ir_reg op1_reg = ctx->regs[def][1]; 4834 ir_reg op2_reg = ctx->regs[def][2]; 4835 4836 IR_ASSERT(def_reg != IR_REG_NONE); 4837 4838 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 4839 op1_reg = IR_REG_NUM(op1_reg); 4840 ir_emit_load(ctx, type, op1_reg, op1); 4841 } 4842 if (def_reg != op1_reg) { 4843 if (op1_reg != IR_REG_NONE) { 4844 ir_emit_fp_mov(ctx, type, def_reg, op1_reg); 4845 } else { 4846 ir_emit_load(ctx, type, def_reg, op1); 4847 } 4848 if (op1 == op2) { 4849 op2_reg = def_reg; 4850 } 4851 } 4852 if (op2_reg != IR_REG_NONE) { 4853 if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { 4854 op2_reg = IR_REG_NUM(op2_reg); 4855 if (op1 != op2) { 4856 ir_emit_load(ctx, type, op2_reg, op2); 4857 } 4858 } 4859 switch (insn->op) { 4860 default: 4861 IR_ASSERT(0 && "NIY binary op"); 4862 case IR_ADD: 4863 | ASM_SSE2_REG_REG_OP adds, type, def_reg, op2_reg 4864 break; 4865 case IR_SUB: 4866 | ASM_SSE2_REG_REG_OP subs, type, def_reg, op2_reg 4867 break; 4868 case IR_MUL: 4869 | ASM_SSE2_REG_REG_OP muls, type, def_reg, op2_reg 4870 break; 4871 case IR_DIV: 4872 | ASM_SSE2_REG_REG_OP divs, type, def_reg, op2_reg 4873 break; 4874 case IR_MIN: 4875 | ASM_SSE2_REG_REG_OP mins, type, def_reg, op2_reg 4876 break; 4877 case IR_MAX: 4878 | ASM_SSE2_REG_REG_OP maxs, type, def_reg, op2_reg 4879 break; 4880 } 4881 } else if (IR_IS_CONST_REF(op2)) { 4882 int label = ir_const_label(ctx, op2); 4883 4884 switch (insn->op) { 4885 default: 4886 IR_ASSERT(0 && "NIY binary op"); 4887 case IR_ADD: 4888 | ASM_SSE2_REG_TXT_OP adds, type, def_reg, [=>label] 4889 break; 4890 case IR_SUB: 4891 | ASM_SSE2_REG_TXT_OP subs, type, def_reg, [=>label] 4892 break; 4893 case IR_MUL: 4894 | ASM_SSE2_REG_TXT_OP muls, type, def_reg, [=>label] 4895 break; 4896 case IR_DIV: 4897 | ASM_SSE2_REG_TXT_OP divs, type, def_reg, [=>label] 4898 break; 4899 case IR_MIN: 4900 | ASM_SSE2_REG_TXT_OP mins, type, def_reg, [=>label] 4901 break; 4902 case IR_MAX: 4903 | ASM_SSE2_REG_TXT_OP maxs, type, def_reg, [=>label] 4904 break; 4905 } 4906 } else { 4907 ir_mem mem; 4908 4909 if (ir_rule(ctx, op2) & IR_FUSED) { 4910 mem = ir_fuse_load(ctx, def, op2); 4911 } else { 4912 mem = ir_ref_spill_slot(ctx, op2); 4913 } 4914 switch (insn->op) { 4915 default: 4916 IR_ASSERT(0 && "NIY binary op"); 4917 case IR_ADD: 4918 | ASM_SSE2_REG_MEM_OP adds, type, def_reg, mem 4919 break; 4920 case IR_SUB: 4921 | ASM_SSE2_REG_MEM_OP subs, type, def_reg, mem 4922 break; 4923 case IR_MUL: 4924 | ASM_SSE2_REG_MEM_OP muls, type, def_reg, mem 4925 break; 4926 case IR_DIV: 4927 | ASM_SSE2_REG_MEM_OP divs, type, def_reg, mem 4928 break; 4929 case IR_MIN: 4930 | ASM_SSE2_REG_MEM_OP mins, type, def_reg, mem 4931 break; 4932 case IR_MAX: 4933 | ASM_SSE2_REG_MEM_OP maxs, type, def_reg, mem 4934 break; 4935 } 4936 } 4937 if (IR_REG_SPILLED(ctx->regs[def][0])) { 4938 ir_emit_store(ctx, insn->type, def, def_reg); 4939 } 4940} 4941 4942static void ir_emit_binop_avx(ir_ctx *ctx, ir_ref def, ir_insn *insn) 4943{ 4944 ir_backend_data *data = ctx->data; 4945 dasm_State **Dst = &data->dasm_state; 4946 ir_type type = insn->type; 4947 ir_ref op1 = insn->op1; 4948 ir_ref op2 = insn->op2; 4949 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 4950 ir_reg op1_reg = ctx->regs[def][1]; 4951 ir_reg op2_reg = ctx->regs[def][2]; 4952 4953 IR_ASSERT(def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE); 4954 4955 if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) { 4956 op1_reg = IR_REG_NUM(op1_reg); 4957 ir_emit_load(ctx, type, op1_reg, op1); 4958 } 4959 if (op2_reg != IR_REG_NONE) { 4960 if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { 4961 op2_reg = IR_REG_NUM(op2_reg); 4962 if (op1 != op2) { 4963 ir_emit_load(ctx, type, op2_reg, op2); 4964 } 4965 } 4966 switch (insn->op) { 4967 default: 4968 IR_ASSERT(0 && "NIY binary op"); 4969 case IR_ADD: 4970 | ASM_AVX_REG_REG_REG_OP vadds, type, def_reg, op1_reg, op2_reg 4971 break; 4972 case IR_SUB: 4973 | ASM_AVX_REG_REG_REG_OP vsubs, type, def_reg, op1_reg, op2_reg 4974 break; 4975 case IR_MUL: 4976 | ASM_AVX_REG_REG_REG_OP vmuls, type, def_reg, op1_reg, op2_reg 4977 break; 4978 case IR_DIV: 4979 | ASM_AVX_REG_REG_REG_OP vdivs, type, def_reg, op1_reg, op2_reg 4980 break; 4981 case IR_MIN: 4982 | ASM_AVX_REG_REG_REG_OP vmins, type, def_reg, op1_reg, op2_reg 4983 break; 4984 case IR_MAX: 4985 | ASM_AVX_REG_REG_REG_OP vmaxs, type, def_reg, op1_reg, op2_reg 4986 break; 4987 } 4988 } else if (IR_IS_CONST_REF(op2)) { 4989 int label = ir_const_label(ctx, op2); 4990 4991 switch (insn->op) { 4992 default: 4993 IR_ASSERT(0 && "NIY binary op"); 4994 case IR_ADD: 4995 | ASM_AVX_REG_REG_TXT_OP vadds, type, def_reg, op1_reg, [=>label] 4996 break; 4997 case IR_SUB: 4998 | ASM_AVX_REG_REG_TXT_OP vsubs, type, def_reg, op1_reg, [=>label] 4999 break; 5000 case IR_MUL: 5001 | ASM_AVX_REG_REG_TXT_OP vmuls, type, def_reg, op1_reg, [=>label] 5002 break; 5003 case IR_DIV: 5004 | ASM_AVX_REG_REG_TXT_OP vdivs, type, def_reg, op1_reg, [=>label] 5005 break; 5006 case IR_MIN: 5007 | ASM_AVX_REG_REG_TXT_OP vmins, type, def_reg, op1_reg, [=>label] 5008 break; 5009 case IR_MAX: 5010 | ASM_AVX_REG_REG_TXT_OP vmaxs, type, def_reg, op1_reg, [=>label] 5011 break; 5012 } 5013 } else { 5014 ir_mem mem; 5015 5016 if (ir_rule(ctx, op2) & IR_FUSED) { 5017 mem = ir_fuse_load(ctx, def, op2); 5018 } else { 5019 mem = ir_ref_spill_slot(ctx, op2); 5020 } 5021 switch (insn->op) { 5022 default: 5023 IR_ASSERT(0 && "NIY binary op"); 5024 case IR_ADD: 5025 | ASM_AVX_REG_REG_MEM_OP vadds, type, def_reg, op1_reg, mem 5026 break; 5027 case IR_SUB: 5028 | ASM_AVX_REG_REG_MEM_OP vsubs, type, def_reg, op1_reg, mem 5029 break; 5030 case IR_MUL: 5031 | ASM_AVX_REG_REG_MEM_OP vmuls, type, def_reg, op1_reg, mem 5032 break; 5033 case IR_DIV: 5034 | ASM_AVX_REG_REG_MEM_OP vdivs, type, def_reg, op1_reg, mem 5035 break; 5036 case IR_MIN: 5037 | ASM_AVX_REG_REG_MEM_OP vmins, type, def_reg, op1_reg, mem 5038 break; 5039 case IR_MAX: 5040 | ASM_AVX_REG_REG_MEM_OP vmaxs, type, def_reg, op1_reg, mem 5041 break; 5042 } 5043 } 5044 if (IR_REG_SPILLED(ctx->regs[def][0])) { 5045 ir_emit_store(ctx, insn->type, def, def_reg); 5046 } 5047} 5048 5049static void ir_emit_cmp_int_common(ir_ctx *ctx, ir_type type, ir_ref root, ir_insn *insn, ir_reg op1_reg, ir_ref op1, ir_reg op2_reg, ir_ref op2) 5050{ 5051 ir_backend_data *data = ctx->data; 5052 dasm_State **Dst = &data->dasm_state; 5053 5054 if (op1_reg != IR_REG_NONE) { 5055 if (op2_reg != IR_REG_NONE) { 5056 | ASM_REG_REG_OP cmp, type, op1_reg, op2_reg 5057 } else if (IR_IS_CONST_REF(op2) && !IR_IS_SYM_CONST(ctx->ir_base[op2].op) && ctx->ir_base[op2].val.u64 == 0) { 5058 | ASM_REG_REG_OP test, type, op1_reg, op1_reg 5059 } else if (IR_IS_CONST_REF(op2)) { 5060 int32_t val = ir_fuse_imm(ctx, op2); 5061 | ASM_REG_IMM_OP cmp, type, op1_reg, val 5062 } else { 5063 ir_mem mem; 5064 5065 if (ir_rule(ctx, op2) & IR_FUSED) { 5066 mem = ir_fuse_load(ctx, root, op2); 5067 } else { 5068 mem = ir_ref_spill_slot(ctx, op2); 5069 } 5070 | ASM_REG_MEM_OP cmp, type, op1_reg, mem 5071 } 5072 } else if (IR_IS_CONST_REF(insn->op1)) { 5073 IR_ASSERT(0); 5074 } else { 5075 ir_mem mem; 5076 5077 if (ir_rule(ctx, insn->op1) & IR_FUSED) { 5078 mem = ir_fuse_load(ctx, root, insn->op1); 5079 } else { 5080 mem = ir_ref_spill_slot(ctx, insn->op1); 5081 } 5082 if (op2_reg != IR_REG_NONE) { 5083 | ASM_MEM_REG_OP cmp, type, mem, op2_reg 5084 } else { 5085 IR_ASSERT(!IR_IS_CONST_REF(op1)); 5086 int32_t val = ir_fuse_imm(ctx, op2); 5087 | ASM_MEM_IMM_OP cmp, type, mem, val 5088 } 5089 } 5090} 5091 5092static void _ir_emit_setcc_int(ir_ctx *ctx, uint8_t op, ir_reg def_reg) 5093{ 5094 ir_backend_data *data = ctx->data; 5095 dasm_State **Dst = &data->dasm_state; 5096 5097 switch (op) { 5098 default: 5099 IR_ASSERT(0 && "NIY binary op"); 5100 case IR_EQ: 5101 | sete Rb(def_reg) 5102 break; 5103 case IR_NE: 5104 | setne Rb(def_reg) 5105 break; 5106 case IR_LT: 5107 | setl Rb(def_reg) 5108 break; 5109 case IR_GE: 5110 | setge Rb(def_reg) 5111 break; 5112 case IR_LE: 5113 | setle Rb(def_reg) 5114 break; 5115 case IR_GT: 5116 | setg Rb(def_reg) 5117 break; 5118 case IR_ULT: 5119 | setb Rb(def_reg) 5120 break; 5121 case IR_UGE: 5122 | setae Rb(def_reg) 5123 break; 5124 case IR_ULE: 5125 | setbe Rb(def_reg) 5126 break; 5127 case IR_UGT: 5128 | seta Rb(def_reg) 5129 break; 5130 } 5131} 5132 5133static void ir_emit_cmp_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 5134{ 5135 ir_backend_data *data = ctx->data; 5136 dasm_State **Dst = &data->dasm_state; 5137 ir_type type = ctx->ir_base[insn->op1].type; 5138 ir_op op = insn->op; 5139 ir_ref op1 = insn->op1; 5140 ir_ref op2 = insn->op2; 5141 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 5142 ir_reg op1_reg = ctx->regs[def][1]; 5143 ir_reg op2_reg = ctx->regs[def][2]; 5144 5145 IR_ASSERT(def_reg != IR_REG_NONE); 5146 if (op1_reg != IR_REG_NONE && (IR_IS_CONST_REF(op1) || IR_REG_SPILLED(op1_reg))) { 5147 op1_reg = IR_REG_NUM(op1_reg); 5148 ir_emit_load(ctx, type, op1_reg, op1); 5149 } 5150 if (op2_reg != IR_REG_NONE && (IR_IS_CONST_REF(op2) || IR_REG_SPILLED(op2_reg))) { 5151 op2_reg = IR_REG_NUM(op2_reg); 5152 if (op1 != op2) { 5153 ir_emit_load(ctx, type, op2_reg, op2); 5154 } 5155 } 5156 if (IR_IS_CONST_REF(op2) && !IR_IS_SYM_CONST(ctx->ir_base[op2].op) && ctx->ir_base[op2].val.u64 == 0) { 5157 if (op == IR_ULT) { 5158 /* always false */ 5159 | xor Ra(def_reg), Ra(def_reg) 5160 if (IR_REG_SPILLED(ctx->regs[def][0])) { 5161 ir_emit_store(ctx, insn->type, def, def_reg); 5162 } 5163 return; 5164 } else if (op == IR_UGE) { 5165 /* always true */ 5166 | ASM_REG_IMM_OP mov, insn->type, def_reg, 1 5167 if (IR_REG_SPILLED(ctx->regs[def][0])) { 5168 ir_emit_store(ctx, insn->type, def, def_reg); 5169 } 5170 return; 5171 } else if (op == IR_ULE) { 5172 op = IR_EQ; 5173 } else if (op == IR_UGT) { 5174 op = IR_NE; 5175 } 5176 } 5177 ir_emit_cmp_int_common(ctx, type, def, insn, op1_reg, op1, op2_reg, op2); 5178 _ir_emit_setcc_int(ctx, op, def_reg); 5179 if (IR_REG_SPILLED(ctx->regs[def][0])) { 5180 ir_emit_store(ctx, insn->type, def, def_reg); 5181 } 5182} 5183 5184static void ir_emit_test_int_common(ir_ctx *ctx, ir_ref root, ir_ref ref, ir_op op) 5185{ 5186 ir_backend_data *data = ctx->data; 5187 dasm_State **Dst = &data->dasm_state; 5188 ir_insn *binop_insn = &ctx->ir_base[ref]; 5189 ir_type type = binop_insn->type; 5190 ir_ref op1 = binop_insn->op1; 5191 ir_ref op2 = binop_insn->op2; 5192 ir_reg op1_reg = ctx->regs[ref][1]; 5193 ir_reg op2_reg = ctx->regs[ref][2]; 5194 5195 IR_ASSERT(binop_insn->op == IR_AND); 5196 if (op1_reg != IR_REG_NONE) { 5197 if (IR_REG_SPILLED(op1_reg)) { 5198 op1_reg = IR_REG_NUM(op1_reg); 5199 ir_emit_load(ctx, type, op1_reg, op1); 5200 } 5201 if (op2_reg != IR_REG_NONE) { 5202 if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { 5203 op2_reg = IR_REG_NUM(op2_reg); 5204 if (op1 != op2) { 5205 ir_emit_load(ctx, type, op2_reg, op2); 5206 } 5207 } 5208 | ASM_REG_REG_OP test, type, op1_reg, op2_reg 5209 } else if (IR_IS_CONST_REF(op2)) { 5210 int32_t val = ir_fuse_imm(ctx, op2); 5211 5212 if ((op == IR_EQ || op == IR_NE) && val == 0xff && (sizeof(void*) == 8 || op1_reg <= IR_REG_R3)) { 5213 | test Rb(op1_reg), Rb(op1_reg) 5214 } else if ((op == IR_EQ || op == IR_NE) && val == 0xff00 && op1_reg <= IR_REG_R3) { 5215 if (op1_reg == IR_REG_RAX) { 5216 | test ah, ah 5217 } else if (op1_reg == IR_REG_RBX) { 5218 | test bh, bh 5219 } else if (op1_reg == IR_REG_RCX) { 5220 | test ch, ch 5221 } else if (op1_reg == IR_REG_RDX) { 5222 | test dh, dh 5223 } else { 5224 IR_ASSERT(0); 5225 } 5226 } else if ((op == IR_EQ || op == IR_NE) && val == 0xffff) { 5227 | test Rw(op1_reg), Rw(op1_reg) 5228 } else if ((op == IR_EQ || op == IR_NE) && val == -1) { 5229 | test Rd(op1_reg), Rd(op1_reg) 5230 } else { 5231 | ASM_REG_IMM_OP test, type, op1_reg, val 5232 } 5233 } else { 5234 ir_mem mem; 5235 5236 if (ir_rule(ctx, op2) & IR_FUSED) { 5237 mem = ir_fuse_load(ctx, root, op2); 5238 } else { 5239 mem = ir_ref_spill_slot(ctx, op2); 5240 } 5241 | ASM_REG_MEM_OP test, type, op1_reg, mem 5242 } 5243 } else if (IR_IS_CONST_REF(op1)) { 5244 IR_ASSERT(0); 5245 } else { 5246 ir_mem mem; 5247 5248 if (ir_rule(ctx, op1) & IR_FUSED) { 5249 mem = ir_fuse_load(ctx, root, op1); 5250 } else { 5251 mem = ir_ref_spill_slot(ctx, op1); 5252 } 5253 if (op2_reg != IR_REG_NONE) { 5254 if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { 5255 op2_reg = IR_REG_NUM(op2_reg); 5256 if (op1 != op2) { 5257 ir_emit_load(ctx, type, op2_reg, op2); 5258 } 5259 } 5260 | ASM_MEM_REG_OP test, type, mem, op2_reg 5261 } else { 5262 IR_ASSERT(!IR_IS_CONST_REF(op1)); 5263 int32_t val = ir_fuse_imm(ctx, op2); 5264 | ASM_MEM_IMM_OP test, type, mem, val 5265 } 5266 } 5267} 5268 5269static void ir_emit_testcc_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 5270{ 5271 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 5272 5273 IR_ASSERT(def_reg != IR_REG_NONE); 5274 ir_emit_test_int_common(ctx, def, insn->op1, insn->op); 5275 _ir_emit_setcc_int(ctx, insn->op, def_reg); 5276 if (IR_REG_SPILLED(ctx->regs[def][0])) { 5277 ir_emit_store(ctx, insn->type, def, def_reg); 5278 } 5279} 5280 5281static void ir_emit_setcc_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 5282{ 5283 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 5284 5285 IR_ASSERT(def_reg != IR_REG_NONE); 5286 _ir_emit_setcc_int(ctx, insn->op, def_reg); 5287 if (IR_REG_SPILLED(ctx->regs[def][0])) { 5288 ir_emit_store(ctx, insn->type, def, def_reg); 5289 } 5290} 5291 5292static ir_op ir_emit_cmp_fp_common(ir_ctx *ctx, ir_ref root, ir_ref cmp_ref, ir_insn *cmp_insn) 5293{ 5294 ir_backend_data *data = ctx->data; 5295 dasm_State **Dst = &data->dasm_state; 5296 ir_type type = ctx->ir_base[cmp_insn->op1].type; 5297 ir_op op = cmp_insn->op; 5298 ir_ref op1, op2; 5299 ir_reg op1_reg, op2_reg; 5300 5301 op1 = cmp_insn->op1; 5302 op2 = cmp_insn->op2; 5303 op1_reg = ctx->regs[cmp_ref][1]; 5304 op2_reg = ctx->regs[cmp_ref][2]; 5305 5306 if (op1_reg == IR_REG_NONE && op2_reg != IR_REG_NONE && (op == IR_EQ || op == IR_NE)) { 5307 ir_ref tmp; 5308 ir_reg tmp_reg; 5309 5310 tmp = op1; 5311 op1 = op2; 5312 op2 = tmp; 5313 tmp_reg = op1_reg; 5314 op1_reg = op2_reg; 5315 op2_reg = tmp_reg; 5316 } 5317 5318 5319 IR_ASSERT(op1_reg != IR_REG_NONE); 5320 if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1)) { 5321 op1_reg = IR_REG_NUM(op1_reg); 5322 ir_emit_load(ctx, type, op1_reg, op1); 5323 } 5324 if (op2_reg != IR_REG_NONE) { 5325 if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2)) { 5326 op2_reg = IR_REG_NUM(op2_reg); 5327 if (op1 != op2) { 5328 ir_emit_load(ctx, type, op2_reg, op2); 5329 } 5330 } 5331 | ASM_FP_REG_REG_OP ucomis, type, op1_reg, op2_reg 5332 } else if (IR_IS_CONST_REF(op2)) { 5333 int label = ir_const_label(ctx, op2); 5334 5335 | ASM_FP_REG_TXT_OP ucomis, type, op1_reg, [=>label] 5336 } else { 5337 ir_mem mem; 5338 5339 if (ir_rule(ctx, op2) & IR_FUSED) { 5340 mem = ir_fuse_load(ctx, root, op2); 5341 } else { 5342 mem = ir_ref_spill_slot(ctx, op2); 5343 } 5344 | ASM_FP_REG_MEM_OP ucomis, type, op1_reg, mem 5345 } 5346 return op; 5347} 5348 5349static void ir_emit_cmp_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) 5350{ 5351 ir_backend_data *data = ctx->data; 5352 dasm_State **Dst = &data->dasm_state; 5353 ir_op op = ir_emit_cmp_fp_common(ctx, def, def, insn); 5354 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 5355 ir_reg tmp_reg = ctx->regs[def][3]; 5356 5357 IR_ASSERT(def_reg != IR_REG_NONE); 5358 switch (op) { 5359 default: 5360 IR_ASSERT(0 && "NIY binary op"); 5361 case IR_EQ: 5362 | setnp Rb(def_reg) 5363 | mov Rd(tmp_reg), 0 5364 | cmovne Rd(def_reg), Rd(tmp_reg) 5365 break; 5366 case IR_NE: 5367 | setp Rb(def_reg) 5368 | mov Rd(tmp_reg), 1 5369 | cmovne Rd(def_reg), Rd(tmp_reg) 5370 break; 5371 case IR_LT: 5372 | setnp Rb(def_reg) 5373 | mov Rd(tmp_reg), 0 5374 | cmovae Rd(def_reg), Rd(tmp_reg) 5375 break; 5376 case IR_GE: 5377 | setae Rb(def_reg) 5378 break; 5379 case IR_LE: 5380 | setnp Rb(def_reg) 5381 | mov Rd(tmp_reg), 0 5382 | cmova Rd(def_reg), Rd(tmp_reg) 5383 break; 5384 case IR_GT: 5385 | seta Rb(def_reg) 5386 break; 5387 case IR_ULT: 5388 | setb Rb(def_reg) 5389 break; 5390 case IR_UGE: 5391 | setp Rb(def_reg) 5392 | mov Rd(tmp_reg), 1 5393 | cmovae Rd(def_reg), Rd(tmp_reg) 5394 break; 5395 case IR_ULE: 5396 | setbe Rb(def_reg) 5397 break; 5398 case IR_UGT: 5399 | setp Rb(def_reg) 5400 | mov Rd(tmp_reg), 1 5401 | cmova Rd(def_reg), Rd(tmp_reg) 5402 break; 5403 } 5404 if (IR_REG_SPILLED(ctx->regs[def][0])) { 5405 ir_emit_store(ctx, insn->type, def, def_reg); 5406 } 5407} 5408 5409static void ir_emit_jmp_true(ir_ctx *ctx, uint32_t b, ir_ref def) 5410{ 5411 uint32_t true_block, false_block, next_block; 5412 ir_backend_data *data = ctx->data; 5413 dasm_State **Dst = &data->dasm_state; 5414 5415 ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block); 5416 if (true_block != next_block) { 5417 | jmp =>true_block 5418 } 5419} 5420 5421static void ir_emit_jmp_false(ir_ctx *ctx, uint32_t b, ir_ref def) 5422{ 5423 uint32_t true_block, false_block, next_block; 5424 ir_backend_data *data = ctx->data; 5425 dasm_State **Dst = &data->dasm_state; 5426 5427 ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block); 5428 if (false_block != next_block) { 5429 | jmp =>false_block 5430 } 5431} 5432 5433static void ir_emit_jcc(ir_ctx *ctx, uint8_t op, uint32_t b, ir_ref def, ir_insn *insn, bool int_cmp) 5434{ 5435 uint32_t true_block, false_block, next_block; 5436 ir_backend_data *data = ctx->data; 5437 dasm_State **Dst = &data->dasm_state; 5438 5439 ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block); 5440 if (true_block == next_block) { 5441 /* swap to avoid unconditional JMP */ 5442 if (int_cmp || op == IR_EQ || op == IR_NE) { 5443 op ^= 1; // reverse 5444 } else { 5445 op ^= 5; // reverse 5446 } 5447 true_block = false_block; 5448 false_block = 0; 5449 } else if (false_block == next_block) { 5450 false_block = 0; 5451 } 5452 5453 if (int_cmp) { 5454 switch (op) { 5455 default: 5456 IR_ASSERT(0 && "NIY binary op"); 5457 case IR_EQ: 5458 | je =>true_block 5459 break; 5460 case IR_NE: 5461 | jne =>true_block 5462 break; 5463 case IR_LT: 5464 | jl =>true_block 5465 break; 5466 case IR_GE: 5467 | jge =>true_block 5468 break; 5469 case IR_LE: 5470 | jle =>true_block 5471 break; 5472 case IR_GT: 5473 | jg =>true_block 5474 break; 5475 case IR_ULT: 5476 | jb =>true_block 5477 break; 5478 case IR_UGE: 5479 | jae =>true_block 5480 break; 5481 case IR_ULE: 5482 | jbe =>true_block 5483 break; 5484 case IR_UGT: 5485 | ja =>true_block 5486 break; 5487 } 5488 } else { 5489 switch (op) { 5490 default: 5491 IR_ASSERT(0 && "NIY binary op"); 5492 case IR_EQ: 5493 if (!false_block) { 5494 | jp >1 5495 | je =>true_block 5496 |1: 5497 } else { 5498 | jp =>false_block 5499 | je =>true_block 5500 } 5501 break; 5502 case IR_NE: 5503 | jne =>true_block 5504 | jp =>true_block 5505 break; 5506 case IR_LT: 5507 if (!false_block) { 5508 | jp >1 5509 | jb =>true_block 5510 |1: 5511 } else { 5512 | jp =>false_block 5513 | jb =>true_block 5514 } 5515 break; 5516 case IR_GE: 5517 | jae =>true_block 5518 break; 5519 case IR_LE: 5520 if (!false_block) { 5521 | jp >1 5522 | jbe =>true_block 5523 |1: 5524 } else { 5525 | jp =>false_block 5526 | jbe =>true_block 5527 } 5528 break; 5529 case IR_GT: 5530 | ja =>true_block 5531 break; 5532 case IR_ULT: 5533 | jb =>true_block 5534 break; 5535 case IR_UGE: 5536 | jp =>true_block 5537 | jae =>true_block 5538 break; 5539 case IR_ULE: 5540 | jbe =>true_block 5541 break; 5542 case IR_UGT: 5543 | jp =>true_block 5544 | ja =>true_block 5545 break; 5546 } 5547 } 5548 if (false_block) { 5549 | jmp =>false_block 5550 } 5551} 5552 5553static void ir_emit_cmp_and_branch_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) 5554{ 5555 ir_insn *cmp_insn = &ctx->ir_base[insn->op2]; 5556 ir_op op = cmp_insn->op; 5557 ir_type type = ctx->ir_base[cmp_insn->op1].type; 5558 ir_ref op1 = cmp_insn->op1; 5559 ir_ref op2 = cmp_insn->op2; 5560 ir_reg op1_reg = ctx->regs[insn->op2][1]; 5561 ir_reg op2_reg = ctx->regs[insn->op2][2]; 5562 5563 if (op1_reg != IR_REG_NONE && (IR_IS_CONST_REF(op1) || IR_REG_SPILLED(op1_reg))) { 5564 op1_reg = IR_REG_NUM(op1_reg); 5565 ir_emit_load(ctx, type, op1_reg, op1); 5566 } 5567 if (op2_reg != IR_REG_NONE && (IR_IS_CONST_REF(op2) || IR_REG_SPILLED(op2_reg))) { 5568 op2_reg = IR_REG_NUM(op2_reg); 5569 if (op1 != op2) { 5570 ir_emit_load(ctx, type, op2_reg, op2); 5571 } 5572 } 5573 if (IR_IS_CONST_REF(op2) && !IR_IS_SYM_CONST(ctx->ir_base[op2].op) && ctx->ir_base[op2].val.u64 == 0) { 5574 if (op == IR_ULT) { 5575 /* always false */ 5576 ir_emit_jmp_false(ctx, b, def); 5577 return; 5578 } else if (op == IR_UGE) { 5579 /* always true */ 5580 ir_emit_jmp_true(ctx, b, def); 5581 return; 5582 } else if (op == IR_ULE) { 5583 op = IR_EQ; 5584 } else if (op == IR_UGT) { 5585 op = IR_NE; 5586 } 5587 } 5588 5589 bool same_comparison = 0; 5590 ir_insn *prev_insn = &ctx->ir_base[insn->op1]; 5591 if (prev_insn->op == IR_IF_TRUE || prev_insn->op == IR_IF_FALSE) { 5592 if (ir_rule(ctx, prev_insn->op1) == IR_CMP_AND_BRANCH_INT) { 5593 prev_insn = &ctx->ir_base[prev_insn->op1]; 5594 prev_insn = &ctx->ir_base[prev_insn->op2]; 5595 if (prev_insn->op1 == cmp_insn->op1 && prev_insn->op2 == cmp_insn->op2) { 5596 same_comparison = true; 5597 } 5598 } 5599 } 5600 if (!same_comparison) { 5601 ir_emit_cmp_int_common(ctx, type, def, cmp_insn, op1_reg, op1, op2_reg, op2); 5602 } 5603 ir_emit_jcc(ctx, op, b, def, insn, 1); 5604} 5605 5606static void ir_emit_test_and_branch_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) 5607{ 5608 ir_ref op2 = insn->op2; 5609 ir_op op = ctx->ir_base[op2].op; 5610 5611 if (op >= IR_EQ && op <= IR_UGT) { 5612 op2 = ctx->ir_base[op2].op1; 5613 } else { 5614 IR_ASSERT(op == IR_AND); 5615 op = IR_NE; 5616 } 5617 5618 ir_emit_test_int_common(ctx, def, op2, op); 5619 ir_emit_jcc(ctx, op, b, def, insn, 1); 5620} 5621 5622static void ir_emit_cmp_and_branch_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) 5623{ 5624 ir_op op = ir_emit_cmp_fp_common(ctx, def, insn->op2, &ctx->ir_base[insn->op2]); 5625 ir_emit_jcc(ctx, op, b, def, insn, 0); 5626} 5627 5628static void ir_emit_if_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) 5629{ 5630 ir_type type = ctx->ir_base[insn->op2].type; 5631 ir_reg op2_reg = ctx->regs[def][2]; 5632 ir_backend_data *data = ctx->data; 5633 dasm_State **Dst = &data->dasm_state; 5634 5635 if (op2_reg != IR_REG_NONE) { 5636 if (IR_REG_SPILLED(op2_reg)) { 5637 op2_reg = IR_REG_NUM(op2_reg); 5638 ir_emit_load(ctx, type, op2_reg, insn->op2); 5639 } 5640 | ASM_REG_REG_OP test, type, op2_reg, op2_reg 5641 } else if (IR_IS_CONST_REF(insn->op2)) { 5642 uint32_t true_block, false_block, next_block; 5643 5644 ir_get_true_false_blocks(ctx, b, &true_block, &false_block, &next_block); 5645 if (ir_const_is_true(&ctx->ir_base[insn->op2])) { 5646 if (true_block != next_block) { 5647 | jmp =>true_block 5648 } 5649 } else { 5650 if (false_block != next_block) { 5651 | jmp =>false_block 5652 } 5653 } 5654 return; 5655 } else { 5656 ir_mem mem; 5657 5658 if (ir_rule(ctx, insn->op2) & IR_FUSED) { 5659 mem = ir_fuse_load(ctx, def, insn->op2); 5660 } else { 5661 mem = ir_ref_spill_slot(ctx, insn->op2); 5662 } 5663 | ASM_MEM_IMM_OP cmp, type, mem, 0 5664 } 5665 ir_emit_jcc(ctx, IR_NE, b, def, insn, 1); 5666} 5667 5668static void ir_emit_cond(ir_ctx *ctx, ir_ref def, ir_insn *insn) 5669{ 5670 ir_backend_data *data = ctx->data; 5671 dasm_State **Dst = &data->dasm_state; 5672 ir_type type = insn->type; 5673 ir_ref op1 = insn->op1; 5674 ir_ref op2 = insn->op2; 5675 ir_ref op3 = insn->op3; 5676 ir_type op1_type = ctx->ir_base[op1].type; 5677 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 5678 ir_reg op1_reg = ctx->regs[def][1]; 5679 ir_reg op2_reg = ctx->regs[def][2]; 5680 ir_reg op3_reg = ctx->regs[def][3]; 5681 5682 IR_ASSERT(def_reg != IR_REG_NONE); 5683 5684 if (op2_reg != IR_REG_NONE && (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(op2))) { 5685 op2_reg = IR_REG_NUM(op2_reg); 5686 ir_emit_load(ctx, type, op2_reg, op2); 5687 if (op1 == op2) { 5688 op1_reg = op2_reg; 5689 } 5690 if (op3 == op2) { 5691 op3_reg = op2_reg; 5692 } 5693 } 5694 if (op3_reg != IR_REG_NONE && op3 != op2 && (IR_REG_SPILLED(op3_reg) || IR_IS_CONST_REF(op3))) { 5695 op3_reg = IR_REG_NUM(op3_reg); 5696 ir_emit_load(ctx, type, op3_reg, op3); 5697 if (op1 == op2) { 5698 op1_reg = op3_reg; 5699 } 5700 } 5701 if (op1_reg != IR_REG_NONE && op1 != op2 && op1 != op3 && (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(op1))) { 5702 op1_reg = IR_REG_NUM(op1_reg); 5703 ir_emit_load(ctx, op1_type, op1_reg, op1); 5704 } 5705 5706 if (IR_IS_TYPE_INT(op1_type)) { 5707 if (op1_reg != IR_REG_NONE) { 5708 | ASM_REG_REG_OP test, op1_type, op1_reg, op1_reg 5709 } else { 5710 ir_mem mem = ir_ref_spill_slot(ctx, op1); 5711 5712 | ASM_MEM_IMM_OP cmp, op1_type, mem, 0 5713 } 5714 | je >2 5715 } else { 5716 if (!data->double_zero_const) { 5717 data->double_zero_const = 1; 5718 ir_rodata(ctx); 5719 |.align 16 5720 |->double_zero_const: 5721 |.dword 0, 0 5722 |.code 5723 } 5724 | ASM_FP_REG_TXT_OP ucomis, op1_type, op1_reg, [->double_zero_const] 5725 | jp >1 5726 | je >2 5727 |1: 5728 } 5729 5730 if (op2_reg != IR_REG_NONE) { 5731 if (def_reg != op2_reg) { 5732 if (IR_IS_TYPE_INT(type)) { 5733 ir_emit_mov(ctx, type, def_reg, op2_reg); 5734 } else { 5735 ir_emit_fp_mov(ctx, type, def_reg, op2_reg); 5736 } 5737 } 5738 } else if (IR_IS_CONST_REF(op2) || !(ir_rule(ctx, op2) & IR_FUSED)) { 5739 ir_emit_load(ctx, type, def_reg, op2); 5740 } else { 5741 ir_emit_load_mem(ctx, type, def_reg, ir_fuse_load(ctx, def, op2)); 5742 } 5743 | jmp >3 5744 |2: 5745 if (op3_reg != IR_REG_NONE) { 5746 if (def_reg != op3_reg) { 5747 if (IR_IS_TYPE_INT(type)) { 5748 ir_emit_mov(ctx, type, def_reg, op3_reg); 5749 } else { 5750 ir_emit_fp_mov(ctx, type, def_reg, op3_reg); 5751 } 5752 } 5753 } else if (IR_IS_CONST_REF(op3) || !(ir_rule(ctx, op3) & IR_FUSED)) { 5754 ir_emit_load(ctx, type, def_reg, op3); 5755 } else { 5756 ir_emit_load_mem(ctx, type, def_reg, ir_fuse_load(ctx, def, op3)); 5757 } 5758 |3: 5759 5760 if (IR_REG_SPILLED(ctx->regs[def][0])) { 5761 ir_emit_store(ctx, type, def, def_reg); 5762 } 5763} 5764 5765static void ir_emit_return_void(ir_ctx *ctx) 5766{ 5767 ir_backend_data *data = ctx->data; 5768 dasm_State **Dst = &data->dasm_state; 5769 5770 ir_emit_epilogue(ctx); 5771 5772#ifdef IR_TARGET_X86 5773 if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC) && ctx->param_stack_size) { 5774 | ret ctx->param_stack_size 5775 return; 5776 } 5777#endif 5778 5779 | ret 5780} 5781 5782static void ir_emit_return_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn) 5783{ 5784 ir_reg op2_reg = ctx->regs[ref][2]; 5785 5786 if (op2_reg != IR_REG_INT_RET1) { 5787 ir_type type = ctx->ir_base[insn->op2].type; 5788 5789 if (op2_reg != IR_REG_NONE && !IR_REG_SPILLED(op2_reg)) { 5790 ir_emit_mov(ctx, type, IR_REG_INT_RET1, op2_reg); 5791 } else { 5792 ir_emit_load(ctx, type, IR_REG_INT_RET1, insn->op2); 5793 } 5794 } 5795 ir_emit_return_void(ctx); 5796} 5797 5798static void ir_emit_return_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn) 5799{ 5800 ir_reg op2_reg = ctx->regs[ref][2]; 5801 ir_type type = ctx->ir_base[insn->op2].type; 5802 5803#ifdef IR_REG_FP_RET1 5804 if (op2_reg != IR_REG_FP_RET1) { 5805 if (op2_reg != IR_REG_NONE && !IR_REG_SPILLED(op2_reg)) { 5806 ir_emit_fp_mov(ctx, type, IR_REG_FP_RET1, op2_reg); 5807 } else { 5808 ir_emit_load(ctx, type, IR_REG_FP_RET1, insn->op2); 5809 } 5810 } 5811#else 5812 ir_backend_data *data = ctx->data; 5813 dasm_State **Dst = &data->dasm_state; 5814 5815 if (op2_reg == IR_REG_NONE || IR_REG_SPILLED(op2_reg)) { 5816 ir_reg fp; 5817 int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op2, &fp); 5818 5819 if (type == IR_DOUBLE) { 5820 | fld qword [Ra(fp)+offset] 5821 } else { 5822 IR_ASSERT(type == IR_FLOAT); 5823 | fld dword [Ra(fp)+offset] 5824 } 5825 } else { 5826 int32_t offset = ctx->ret_slot; 5827 ir_reg fp; 5828 5829 IR_ASSERT(offset != -1); 5830 offset = IR_SPILL_POS_TO_OFFSET(offset); 5831 fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 5832 ir_emit_store_mem_fp(ctx, type, IR_MEM_BO(fp, offset), op2_reg); 5833 if (type == IR_DOUBLE) { 5834 | fld qword [Ra(fp)+offset] 5835 } else { 5836 IR_ASSERT(type == IR_FLOAT); 5837 | fld dword [Ra(fp)+offset] 5838 } 5839 } 5840#endif 5841 ir_emit_return_void(ctx); 5842} 5843 5844static void ir_emit_sext(ir_ctx *ctx, ir_ref def, ir_insn *insn) 5845{ 5846 ir_type dst_type = insn->type; 5847 ir_type src_type = ctx->ir_base[insn->op1].type; 5848 ir_backend_data *data = ctx->data; 5849 dasm_State **Dst = &data->dasm_state; 5850 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 5851 ir_reg op1_reg = ctx->regs[def][1]; 5852 5853 IR_ASSERT(IR_IS_TYPE_INT(src_type)); 5854 IR_ASSERT(IR_IS_TYPE_INT(dst_type)); 5855 IR_ASSERT(ir_type_size[dst_type] > ir_type_size[src_type]); 5856 IR_ASSERT(def_reg != IR_REG_NONE); 5857 5858 if (op1_reg != IR_REG_NONE) { 5859 if (IR_REG_SPILLED(op1_reg)) { 5860 op1_reg = IR_REG_NUM(op1_reg); 5861 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 5862 } 5863 if (ir_type_size[src_type] == 1) { 5864 if (ir_type_size[dst_type] == 2) { 5865 | movsx Rw(def_reg), Rb(op1_reg) 5866 } else if (ir_type_size[dst_type] == 4) { 5867 | movsx Rd(def_reg), Rb(op1_reg) 5868 } else { 5869 IR_ASSERT(ir_type_size[dst_type] == 8); 5870 IR_ASSERT(sizeof(void*) == 8); 5871|.if X64 5872 | movsx Rq(def_reg), Rb(op1_reg) 5873|.endif 5874 } 5875 } else if (ir_type_size[src_type] == 2) { 5876 if (ir_type_size[dst_type] == 4) { 5877 | movsx Rd(def_reg), Rw(op1_reg) 5878 } else { 5879 IR_ASSERT(ir_type_size[dst_type] == 8); 5880 IR_ASSERT(sizeof(void*) == 8); 5881|.if X64 5882 | movsx Rq(def_reg), Rw(op1_reg) 5883|.endif 5884 } 5885 } else { 5886 IR_ASSERT(ir_type_size[src_type] == 4); 5887 IR_ASSERT(ir_type_size[dst_type] == 8); 5888 IR_ASSERT(sizeof(void*) == 8); 5889|.if X64 5890 | movsxd Rq(def_reg), Rd(op1_reg) 5891|.endif 5892 } 5893 } else if (IR_IS_CONST_REF(insn->op1)) { 5894 IR_ASSERT(0); 5895 } else { 5896 ir_mem mem; 5897 5898 if (ir_rule(ctx, insn->op1) & IR_FUSED) { 5899 mem = ir_fuse_load(ctx, def, insn->op1); 5900 } else { 5901 mem = ir_ref_spill_slot(ctx, insn->op1); 5902 } 5903 5904 if (ir_type_size[src_type] == 1) { 5905 if (ir_type_size[dst_type] == 2) { 5906 | ASM_TXT_TMEM_OP movsx, Rw(def_reg), byte, mem 5907 } else if (ir_type_size[dst_type] == 4) { 5908 | ASM_TXT_TMEM_OP movsx, Rd(def_reg), byte, mem 5909 } else { 5910 IR_ASSERT(ir_type_size[dst_type] == 8); 5911 IR_ASSERT(sizeof(void*) == 8); 5912|.if X64 5913 | ASM_TXT_TMEM_OP movsx, Rq(def_reg), byte, mem 5914|.endif 5915 } 5916 } else if (ir_type_size[src_type] == 2) { 5917 if (ir_type_size[dst_type] == 4) { 5918 | ASM_TXT_TMEM_OP movsx, Rd(def_reg), word, mem 5919 } else { 5920 IR_ASSERT(ir_type_size[dst_type] == 8); 5921 IR_ASSERT(sizeof(void*) == 8); 5922|.if X64 5923 | ASM_TXT_TMEM_OP movsx, Rq(def_reg), word, mem 5924|.endif 5925 } 5926 } else { 5927 IR_ASSERT(ir_type_size[src_type] == 4); 5928 IR_ASSERT(ir_type_size[dst_type] == 8); 5929 IR_ASSERT(sizeof(void*) == 8); 5930|.if X64 5931 | ASM_TXT_TMEM_OP movsxd, Rq(def_reg), dword, mem 5932|.endif 5933 } 5934 } 5935 if (IR_REG_SPILLED(ctx->regs[def][0])) { 5936 ir_emit_store(ctx, dst_type, def, def_reg); 5937 } 5938} 5939 5940static void ir_emit_zext(ir_ctx *ctx, ir_ref def, ir_insn *insn) 5941{ 5942 ir_type dst_type = insn->type; 5943 ir_type src_type = ctx->ir_base[insn->op1].type; 5944 ir_backend_data *data = ctx->data; 5945 dasm_State **Dst = &data->dasm_state; 5946 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 5947 ir_reg op1_reg = ctx->regs[def][1]; 5948 5949 IR_ASSERT(IR_IS_TYPE_INT(src_type)); 5950 IR_ASSERT(IR_IS_TYPE_INT(dst_type)); 5951 IR_ASSERT(ir_type_size[dst_type] > ir_type_size[src_type]); 5952 IR_ASSERT(def_reg != IR_REG_NONE); 5953 5954 if (op1_reg != IR_REG_NONE) { 5955 if (IR_REG_SPILLED(op1_reg)) { 5956 op1_reg = IR_REG_NUM(op1_reg); 5957 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 5958 } 5959 if (ir_type_size[src_type] == 1) { 5960 if (ir_type_size[dst_type] == 2) { 5961 | movzx Rw(def_reg), Rb(op1_reg) 5962 } else if (ir_type_size[dst_type] == 4) { 5963 | movzx Rd(def_reg), Rb(op1_reg) 5964 } else { 5965 IR_ASSERT(ir_type_size[dst_type] == 8); 5966 IR_ASSERT(sizeof(void*) == 8); 5967|.if X64 5968 | movzx Rq(def_reg), Rb(op1_reg) 5969|.endif 5970 } 5971 } else if (ir_type_size[src_type] == 2) { 5972 if (ir_type_size[dst_type] == 4) { 5973 | movzx Rd(def_reg), Rw(op1_reg) 5974 } else { 5975 IR_ASSERT(ir_type_size[dst_type] == 8); 5976 IR_ASSERT(sizeof(void*) == 8); 5977|.if X64 5978 | movzx Rq(def_reg), Rw(op1_reg) 5979|.endif 5980 } 5981 } else { 5982 IR_ASSERT(ir_type_size[src_type] == 4); 5983 IR_ASSERT(ir_type_size[dst_type] == 8); 5984 IR_ASSERT(sizeof(void*) == 8); 5985|.if X64 5986 /* Avoid zero extension to the same register. This may be not always safe ??? */ 5987 if (op1_reg != def_reg) { 5988 | mov Rd(def_reg), Rd(op1_reg) 5989 } 5990|.endif 5991 } 5992 } else if (IR_IS_CONST_REF(insn->op1)) { 5993 IR_ASSERT(0); 5994 } else { 5995 ir_mem mem; 5996 5997 if (ir_rule(ctx, insn->op1) & IR_FUSED) { 5998 mem = ir_fuse_load(ctx, def, insn->op1); 5999 } else { 6000 mem = ir_ref_spill_slot(ctx, insn->op1); 6001 } 6002 6003 if (ir_type_size[src_type] == 1) { 6004 if (ir_type_size[dst_type] == 2) { 6005 | ASM_TXT_TMEM_OP movzx, Rw(def_reg), byte, mem 6006 } else if (ir_type_size[dst_type] == 4) { 6007 | ASM_TXT_TMEM_OP movzx, Rd(def_reg), byte, mem 6008 } else { 6009 IR_ASSERT(ir_type_size[dst_type] == 8); 6010 IR_ASSERT(sizeof(void*) == 8); 6011|.if X64 6012 | ASM_TXT_TMEM_OP movzx, Rq(def_reg), byte, mem 6013|.endif 6014 } 6015 } else if (ir_type_size[src_type] == 2) { 6016 if (ir_type_size[dst_type] == 4) { 6017 | ASM_TXT_TMEM_OP movzx, Rd(def_reg), word, mem 6018 } else { 6019 IR_ASSERT(ir_type_size[dst_type] == 8); 6020 IR_ASSERT(sizeof(void*) == 8); 6021|.if X64 6022 | ASM_TXT_TMEM_OP movzx, Rq(def_reg), word, mem 6023|.endif 6024 } 6025 } else { 6026 IR_ASSERT(ir_type_size[src_type] == 4); 6027 IR_ASSERT(ir_type_size[dst_type] == 8); 6028|.if X64 6029 | ASM_TXT_TMEM_OP mov, Rd(def_reg), dword, mem 6030|.endif 6031 } 6032 } 6033 if (IR_REG_SPILLED(ctx->regs[def][0])) { 6034 ir_emit_store(ctx, dst_type, def, def_reg); 6035 } 6036} 6037 6038static void ir_emit_trunc(ir_ctx *ctx, ir_ref def, ir_insn *insn) 6039{ 6040 ir_type dst_type = insn->type; 6041 ir_type src_type = ctx->ir_base[insn->op1].type; 6042 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 6043 ir_reg op1_reg = ctx->regs[def][1]; 6044 6045 IR_ASSERT(IR_IS_TYPE_INT(src_type)); 6046 IR_ASSERT(IR_IS_TYPE_INT(dst_type)); 6047 IR_ASSERT(ir_type_size[dst_type] < ir_type_size[src_type]); 6048 IR_ASSERT(def_reg != IR_REG_NONE); 6049 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 6050 op1_reg = IR_REG_NUM(op1_reg); 6051 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 6052 } 6053 if (op1_reg != IR_REG_NONE) { 6054 if (op1_reg != def_reg) { 6055 ir_emit_mov(ctx, dst_type, def_reg, op1_reg); 6056 } 6057 } else { 6058 ir_emit_load(ctx, dst_type, def_reg, insn->op1); 6059 } 6060 if (IR_REG_SPILLED(ctx->regs[def][0])) { 6061 ir_emit_store(ctx, dst_type, def, def_reg); 6062 } 6063} 6064 6065static void ir_emit_bitcast(ir_ctx *ctx, ir_ref def, ir_insn *insn) 6066{ 6067 ir_type dst_type = insn->type; 6068 ir_type src_type = ctx->ir_base[insn->op1].type; 6069 ir_backend_data *data = ctx->data; 6070 dasm_State **Dst = &data->dasm_state; 6071 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 6072 ir_reg op1_reg = ctx->regs[def][1]; 6073 6074 IR_ASSERT(ir_type_size[dst_type] == ir_type_size[src_type]); 6075 IR_ASSERT(def_reg != IR_REG_NONE); 6076 if (IR_IS_TYPE_INT(src_type) && IR_IS_TYPE_INT(dst_type)) { 6077 if (!IR_IS_CONST_REF(insn->op1) && (ir_rule(ctx, insn->op1) & IR_FUSED)) { 6078 ir_emit_load_mem_int(ctx, dst_type, def_reg, ir_fuse_load(ctx, def, insn->op1)); 6079 } else if (op1_reg != IR_REG_NONE) { 6080 if (IR_REG_SPILLED(op1_reg)) { 6081 op1_reg = IR_REG_NUM(op1_reg); 6082 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 6083 } 6084 if (op1_reg != def_reg) { 6085 ir_emit_mov(ctx, dst_type, def_reg, op1_reg); 6086 } 6087 } else { 6088 ir_emit_load(ctx, dst_type, def_reg, insn->op1); 6089 } 6090 } else if (IR_IS_TYPE_FP(src_type) && IR_IS_TYPE_FP(dst_type)) { 6091 if (!IR_IS_CONST_REF(insn->op1) && (ir_rule(ctx, insn->op1) & IR_FUSED)) { 6092 ir_mem mem = ir_fuse_load(ctx, def, insn->op1); 6093 ir_emit_load_mem_fp(ctx, dst_type, def_reg, mem); 6094 } else if (op1_reg != IR_REG_NONE) { 6095 if (IR_REG_SPILLED(op1_reg)) { 6096 op1_reg = IR_REG_NUM(op1_reg); 6097 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 6098 } 6099 if (op1_reg != def_reg) { 6100 ir_emit_fp_mov(ctx, dst_type, def_reg, op1_reg); 6101 } 6102 } else { 6103 ir_emit_load(ctx, dst_type, def_reg, insn->op1); 6104 } 6105 } else if (IR_IS_TYPE_FP(src_type)) { 6106 IR_ASSERT(IR_IS_TYPE_INT(dst_type)); 6107 if (op1_reg != IR_REG_NONE) { 6108 if (IR_REG_SPILLED(op1_reg)) { 6109 op1_reg = IR_REG_NUM(op1_reg); 6110 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 6111 } 6112 if (src_type == IR_DOUBLE) { 6113 IR_ASSERT(sizeof(void*) == 8); 6114|.if X64 6115 if (ctx->mflags & IR_X86_AVX) { 6116 | vmovd Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) 6117 } else { 6118 | movd Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) 6119 } 6120|.endif 6121 } else { 6122 IR_ASSERT(src_type == IR_FLOAT); 6123 if (ctx->mflags & IR_X86_AVX) { 6124 | vmovd Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) 6125 } else { 6126 | movd Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) 6127 } 6128 } 6129 } else if (IR_IS_CONST_REF(insn->op1)) { 6130 ir_insn *_insn = &ctx->ir_base[insn->op1]; 6131 IR_ASSERT(!IR_IS_SYM_CONST(_insn->op)); 6132 if (src_type == IR_DOUBLE) { 6133 IR_ASSERT(sizeof(void*) == 8); 6134|.if X64 6135 | mov64 Rq(def_reg), _insn->val.i64 6136|.endif 6137 } else { 6138 IR_ASSERT(src_type == IR_FLOAT); 6139 | mov Rd(def_reg), _insn->val.i32 6140 } 6141 } else { 6142 ir_mem mem; 6143 6144 if (ir_rule(ctx, insn->op1) & IR_FUSED) { 6145 mem = ir_fuse_load(ctx, def, insn->op1); 6146 } else { 6147 mem = ir_ref_spill_slot(ctx, insn->op1); 6148 } 6149 6150 if (src_type == IR_DOUBLE) { 6151 IR_ASSERT(sizeof(void*) == 8); 6152|.if X64 6153 | ASM_TXT_TMEM_OP mov, Rq(def_reg), qword, mem 6154|.endif 6155 } else { 6156 IR_ASSERT(src_type == IR_FLOAT); 6157 | ASM_TXT_TMEM_OP mov, Rd(def_reg), dword, mem 6158 } 6159 } 6160 } else if (IR_IS_TYPE_FP(dst_type)) { 6161 IR_ASSERT(IR_IS_TYPE_INT(src_type)); 6162 if (op1_reg != IR_REG_NONE) { 6163 if (IR_REG_SPILLED(op1_reg)) { 6164 op1_reg = IR_REG_NUM(op1_reg); 6165 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 6166 } 6167 if (dst_type == IR_DOUBLE) { 6168 IR_ASSERT(sizeof(void*) == 8); 6169|.if X64 6170 if (ctx->mflags & IR_X86_AVX) { 6171 | vmovd xmm(def_reg-IR_REG_FP_FIRST), Rq(op1_reg) 6172 } else { 6173 | movd xmm(def_reg-IR_REG_FP_FIRST), Rq(op1_reg) 6174 } 6175|.endif 6176 } else { 6177 IR_ASSERT(dst_type == IR_FLOAT); 6178 if (ctx->mflags & IR_X86_AVX) { 6179 | vmovd xmm(def_reg-IR_REG_FP_FIRST), Rd(op1_reg) 6180 } else { 6181 | movd xmm(def_reg-IR_REG_FP_FIRST), Rd(op1_reg) 6182 } 6183 } 6184 } else if (IR_IS_CONST_REF(insn->op1)) { 6185 int label = ir_const_label(ctx, insn->op1); 6186 6187 | ASM_FP_REG_TXT_OP movs, dst_type, def_reg, [=>label] 6188 } else { 6189 ir_mem mem; 6190 6191 if (ir_rule(ctx, insn->op1) & IR_FUSED) { 6192 mem = ir_fuse_load(ctx, def, insn->op1); 6193 } else { 6194 mem = ir_ref_spill_slot(ctx, insn->op1); 6195 } 6196 6197 | ASM_FP_REG_MEM_OP movs, dst_type, def_reg, mem 6198 } 6199 } 6200 if (IR_REG_SPILLED(ctx->regs[def][0])) { 6201 ir_emit_store(ctx, dst_type, def, def_reg); 6202 } 6203} 6204 6205static void ir_emit_int2fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) 6206{ 6207 ir_type dst_type = insn->type; 6208 ir_type src_type = ctx->ir_base[insn->op1].type; 6209 ir_backend_data *data = ctx->data; 6210 dasm_State **Dst = &data->dasm_state; 6211 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 6212 ir_reg op1_reg = ctx->regs[def][1]; 6213 6214 IR_ASSERT(IR_IS_TYPE_INT(src_type)); 6215 IR_ASSERT(IR_IS_TYPE_FP(dst_type)); 6216 IR_ASSERT(def_reg != IR_REG_NONE); 6217 if (op1_reg != IR_REG_NONE) { 6218 bool src64 = 0; 6219 6220 if (IR_REG_SPILLED(op1_reg) || IR_IS_CONST_REF(insn->op1)) { 6221 op1_reg = IR_REG_NUM(op1_reg); 6222 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 6223 } 6224 if (IR_IS_TYPE_SIGNED(src_type)) { 6225 if (ir_type_size[src_type] < 4) { 6226|.if X64 6227|| if (ir_type_size[src_type] == 1) { 6228 | movsx Rq(op1_reg), Rb(op1_reg) 6229|| } else { 6230 | movsx Rq(op1_reg), Rw(op1_reg) 6231|| } 6232|| src64 = 1; 6233|.else 6234|| if (ir_type_size[src_type] == 1) { 6235 | movsx Rd(op1_reg), Rb(op1_reg) 6236|| } else { 6237 | movsx Rd(op1_reg), Rw(op1_reg) 6238|| } 6239|.endif 6240 } else if (ir_type_size[src_type] > 4) { 6241 src64 = 1; 6242 } 6243 } else { 6244 if (ir_type_size[src_type] < 8) { 6245|.if X64 6246|| if (ir_type_size[src_type] == 1) { 6247 | movzx Rq(op1_reg), Rb(op1_reg) 6248|| } else if (ir_type_size[src_type] == 2) { 6249 | movzx Rq(op1_reg), Rw(op1_reg) 6250|| } 6251|| src64 = 1; 6252|.else 6253|| if (ir_type_size[src_type] == 1) { 6254 | movzx Rd(op1_reg), Rb(op1_reg) 6255|| } else if (ir_type_size[src_type] == 2) { 6256 | movzx Rd(op1_reg), Rw(op1_reg) 6257|| } 6258|.endif 6259 } else { 6260 // TODO: uint64_t -> double 6261 src64 = 1; 6262 } 6263 } 6264 if (!src64) { 6265 if (dst_type == IR_DOUBLE) { 6266 if (ctx->mflags & IR_X86_AVX) { 6267 | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 6268 | vcvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), Rd(op1_reg) 6269 } else { 6270 | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 6271 | cvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), Rd(op1_reg) 6272 } 6273 } else { 6274 IR_ASSERT(dst_type == IR_FLOAT); 6275 if (ctx->mflags & IR_X86_AVX) { 6276 | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 6277 | vcvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), Rd(op1_reg) 6278 } else { 6279 | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 6280 | cvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), Rd(op1_reg) 6281 } 6282 } 6283 } else { 6284 IR_ASSERT(sizeof(void*) == 8); 6285|.if X64 6286 if (dst_type == IR_DOUBLE) { 6287 if (ctx->mflags & IR_X86_AVX) { 6288 | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 6289 | vcvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), Rq(op1_reg) 6290 } else { 6291 | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 6292 | cvtsi2sd xmm(def_reg-IR_REG_FP_FIRST), Rq(op1_reg) 6293 } 6294 } else { 6295 IR_ASSERT(dst_type == IR_FLOAT); 6296 if (ctx->mflags & IR_X86_AVX) { 6297 | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 6298 | vcvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), Rq(op1_reg) 6299 } else { 6300 | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 6301 | cvtsi2ss xmm(def_reg-IR_REG_FP_FIRST), Rq(op1_reg) 6302 } 6303 } 6304|.endif 6305 } 6306 } else { 6307 ir_mem mem; 6308 bool src64 = ir_type_size[src_type] == 8; 6309 6310 if (ir_rule(ctx, insn->op1) & IR_FUSED) { 6311 mem = ir_fuse_load(ctx, def, insn->op1); 6312 } else { 6313 mem = ir_ref_spill_slot(ctx, insn->op1); 6314 } 6315 6316 if (!src64) { 6317 if (dst_type == IR_DOUBLE) { 6318 if (ctx->mflags & IR_X86_AVX) { 6319 | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 6320 | ASM_TXT_TXT_TMEM_OP vcvtsi2sd, xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), dword, mem 6321 } else { 6322 | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 6323 | ASM_TXT_TMEM_OP cvtsi2sd, xmm(def_reg-IR_REG_FP_FIRST), dword, mem 6324 } 6325 } else { 6326 IR_ASSERT(dst_type == IR_FLOAT); 6327 if (ctx->mflags & IR_X86_AVX) { 6328 | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 6329 | ASM_TXT_TXT_TMEM_OP vcvtsi2ss, xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), dword, mem 6330 } else { 6331 | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 6332 | ASM_TXT_TMEM_OP cvtsi2ss, xmm(def_reg-IR_REG_FP_FIRST), dword, mem 6333 } 6334 } 6335 } else { 6336 IR_ASSERT(sizeof(void*) == 8); 6337|.if X64 6338 if (dst_type == IR_DOUBLE) { 6339 if (ctx->mflags & IR_X86_AVX) { 6340 | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 6341 | ASM_TXT_TXT_TMEM_OP vcvtsi2sd, xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), qword, mem 6342 } else { 6343 | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 6344 | ASM_TXT_TMEM_OP cvtsi2sd, xmm(def_reg-IR_REG_FP_FIRST), qword, mem 6345 } 6346 } else { 6347 IR_ASSERT(dst_type == IR_FLOAT); 6348 if (ctx->mflags & IR_X86_AVX) { 6349 | vxorps xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 6350 | ASM_TXT_TXT_TMEM_OP vcvtsi2ss, xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), qword, mem 6351 } else { 6352 | pxor xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST) 6353 | ASM_TXT_TMEM_OP cvtsi2ss, xmm(def_reg-IR_REG_FP_FIRST), qword, mem 6354 } 6355 } 6356|.endif 6357 } 6358 } 6359 if (IR_REG_SPILLED(ctx->regs[def][0])) { 6360 ir_emit_store(ctx, dst_type, def, def_reg); 6361 } 6362} 6363 6364static void ir_emit_fp2int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 6365{ 6366 ir_type dst_type = insn->type; 6367 ir_type src_type = ctx->ir_base[insn->op1].type; 6368 ir_backend_data *data = ctx->data; 6369 dasm_State **Dst = &data->dasm_state; 6370 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 6371 ir_reg op1_reg = ctx->regs[def][1]; 6372 bool dst64 = 0; 6373 6374 IR_ASSERT(IR_IS_TYPE_FP(src_type)); 6375 IR_ASSERT(IR_IS_TYPE_INT(dst_type)); 6376 IR_ASSERT(def_reg != IR_REG_NONE); 6377 if (IR_IS_TYPE_SIGNED(dst_type) ? ir_type_size[dst_type] == 8 : ir_type_size[dst_type] >= 4) { 6378 // TODO: we might need to perform truncation from 32/64 bit integer 6379 dst64 = 1; 6380 } 6381 if (op1_reg != IR_REG_NONE) { 6382 if (IR_REG_SPILLED(op1_reg)) { 6383 op1_reg = IR_REG_NUM(op1_reg); 6384 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 6385 } 6386 if (!dst64) { 6387 if (src_type == IR_DOUBLE) { 6388 if (ctx->mflags & IR_X86_AVX) { 6389 | vcvtsd2si Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) 6390 } else { 6391 | cvtsd2si Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) 6392 } 6393 } else { 6394 IR_ASSERT(src_type == IR_FLOAT); 6395 if (ctx->mflags & IR_X86_AVX) { 6396 | vcvtss2si Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) 6397 } else { 6398 | cvtss2si Rd(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) 6399 } 6400 } 6401 } else { 6402 IR_ASSERT(sizeof(void*) == 8); 6403|.if X64 6404 if (src_type == IR_DOUBLE) { 6405 if (ctx->mflags & IR_X86_AVX) { 6406 | vcvtsd2si Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) 6407 } else { 6408 | cvtsd2si Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) 6409 } 6410 } else { 6411 IR_ASSERT(src_type == IR_FLOAT); 6412 if (ctx->mflags & IR_X86_AVX) { 6413 | vcvtss2si Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) 6414 } else { 6415 | cvtss2si Rq(def_reg), xmm(op1_reg-IR_REG_FP_FIRST) 6416 } 6417 } 6418|.endif 6419 } 6420 } else if (IR_IS_CONST_REF(insn->op1)) { 6421 int label = ir_const_label(ctx, insn->op1); 6422 6423 if (!dst64) { 6424 if (src_type == IR_DOUBLE) { 6425 if (ctx->mflags & IR_X86_AVX) { 6426 | vcvtsd2si Rd(def_reg), qword [=>label] 6427 } else { 6428 | cvtsd2si Rd(def_reg), qword [=>label] 6429 } 6430 } else { 6431 IR_ASSERT(src_type == IR_FLOAT); 6432 if (ctx->mflags & IR_X86_AVX) { 6433 | vcvtss2si Rd(def_reg), dword [=>label] 6434 } else { 6435 | cvtss2si Rd(def_reg), dword [=>label] 6436 } 6437 } 6438 } else { 6439 IR_ASSERT(sizeof(void*) == 8); 6440|.if X64 6441 if (src_type == IR_DOUBLE) { 6442 if (ctx->mflags & IR_X86_AVX) { 6443 | vcvtsd2si Rq(def_reg), qword [=>label] 6444 } else { 6445 | cvtsd2si Rq(def_reg), qword [=>label] 6446 } 6447 } else { 6448 IR_ASSERT(src_type == IR_FLOAT); 6449 if (ctx->mflags & IR_X86_AVX) { 6450 | vcvtss2si Rq(def_reg), dword [=>label] 6451 } else { 6452 | cvtss2si Rq(def_reg), dword [=>label] 6453 } 6454 } 6455|.endif 6456 } 6457 } else { 6458 ir_mem mem; 6459 6460 if (ir_rule(ctx, insn->op1) & IR_FUSED) { 6461 mem = ir_fuse_load(ctx, def, insn->op1); 6462 } else { 6463 mem = ir_ref_spill_slot(ctx, insn->op1); 6464 } 6465 6466 if (!dst64) { 6467 if (src_type == IR_DOUBLE) { 6468 if (ctx->mflags & IR_X86_AVX) { 6469 | ASM_TXT_TMEM_OP vcvtsd2si, Rd(def_reg), qword, mem 6470 } else { 6471 | ASM_TXT_TMEM_OP cvtsd2si, Rd(def_reg), qword, mem 6472 } 6473 } else { 6474 IR_ASSERT(src_type == IR_FLOAT); 6475 if (ctx->mflags & IR_X86_AVX) { 6476 | ASM_TXT_TMEM_OP vcvtss2si, Rd(def_reg), dword, mem 6477 } else { 6478 | ASM_TXT_TMEM_OP cvtss2si, Rd(def_reg), dword, mem 6479 } 6480 } 6481 } else { 6482 IR_ASSERT(sizeof(void*) == 8); 6483|.if X64 6484 if (src_type == IR_DOUBLE) { 6485 if (ctx->mflags & IR_X86_AVX) { 6486 | ASM_TXT_TMEM_OP vcvtsd2si, Rq(def_reg), qword, mem 6487 } else { 6488 | ASM_TXT_TMEM_OP cvtsd2si, Rq(def_reg), qword, mem 6489 } 6490 } else { 6491 IR_ASSERT(src_type == IR_FLOAT); 6492 if (ctx->mflags & IR_X86_AVX) { 6493 | ASM_TXT_TMEM_OP vcvtss2si, Rq(def_reg), dword, mem 6494 } else { 6495 | ASM_TXT_TMEM_OP cvtss2si, Rq(def_reg), dword, mem 6496 } 6497 } 6498|.endif 6499 } 6500 } 6501 if (IR_REG_SPILLED(ctx->regs[def][0])) { 6502 ir_emit_store(ctx, dst_type, def, def_reg); 6503 } 6504} 6505 6506static void ir_emit_fp2fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) 6507{ 6508 ir_type dst_type = insn->type; 6509 ir_type src_type = ctx->ir_base[insn->op1].type; 6510 ir_backend_data *data = ctx->data; 6511 dasm_State **Dst = &data->dasm_state; 6512 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 6513 ir_reg op1_reg = ctx->regs[def][1]; 6514 6515 IR_ASSERT(IR_IS_TYPE_FP(src_type)); 6516 IR_ASSERT(IR_IS_TYPE_FP(dst_type)); 6517 IR_ASSERT(def_reg != IR_REG_NONE); 6518 if (op1_reg != IR_REG_NONE) { 6519 if (IR_REG_SPILLED(op1_reg)) { 6520 op1_reg = IR_REG_NUM(op1_reg); 6521 ir_emit_load(ctx, src_type, op1_reg, insn->op1); 6522 } 6523 if (src_type == dst_type) { 6524 if (op1_reg != def_reg) { 6525 ir_emit_fp_mov(ctx, dst_type, def_reg, op1_reg); 6526 } 6527 } else if (src_type == IR_DOUBLE) { 6528 if (ctx->mflags & IR_X86_AVX) { 6529 | vcvtsd2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(op1_reg-IR_REG_FP_FIRST) 6530 } else { 6531 | cvtsd2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(op1_reg-IR_REG_FP_FIRST) 6532 } 6533 } else { 6534 IR_ASSERT(src_type == IR_FLOAT); 6535 if (ctx->mflags & IR_X86_AVX) { 6536 | vcvtss2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), xmm(op1_reg-IR_REG_FP_FIRST) 6537 } else { 6538 | cvtss2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(op1_reg-IR_REG_FP_FIRST) 6539 } 6540 } 6541 } else if (IR_IS_CONST_REF(insn->op1)) { 6542 int label = ir_const_label(ctx, insn->op1); 6543 6544 if (src_type == IR_DOUBLE) { 6545 if (ctx->mflags & IR_X86_AVX) { 6546 | vcvtsd2ss xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), qword [=>label] 6547 } else { 6548 | cvtsd2ss xmm(def_reg-IR_REG_FP_FIRST), qword [=>label] 6549 } 6550 } else { 6551 IR_ASSERT(src_type == IR_FLOAT); 6552 if (ctx->mflags & IR_X86_AVX) { 6553 | vcvtss2sd xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), dword [=>label] 6554 } else { 6555 | cvtss2sd xmm(def_reg-IR_REG_FP_FIRST), dword [=>label] 6556 } 6557 } 6558 } else { 6559 ir_mem mem; 6560 6561 if (ir_rule(ctx, insn->op1) & IR_FUSED) { 6562 mem = ir_fuse_load(ctx, def, insn->op1); 6563 } else { 6564 mem = ir_ref_spill_slot(ctx, insn->op1); 6565 } 6566 6567 if (src_type == IR_DOUBLE) { 6568 if (ctx->mflags & IR_X86_AVX) { 6569 | ASM_TXT_TXT_TMEM_OP vcvtsd2ss, xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), qword, mem 6570 } else { 6571 | ASM_TXT_TMEM_OP cvtsd2ss, xmm(def_reg-IR_REG_FP_FIRST), qword, mem 6572 } 6573 } else { 6574 IR_ASSERT(src_type == IR_FLOAT); 6575 if (ctx->mflags & IR_X86_AVX) { 6576 | ASM_TXT_TXT_TMEM_OP vcvtss2sd, xmm(def_reg-IR_REG_FP_FIRST), xmm(def_reg-IR_REG_FP_FIRST), dword, mem 6577 } else { 6578 | ASM_TXT_TMEM_OP cvtss2sd, xmm(def_reg-IR_REG_FP_FIRST), dword, mem 6579 } 6580 } 6581 } 6582 if (IR_REG_SPILLED(ctx->regs[def][0])) { 6583 ir_emit_store(ctx, dst_type, def, def_reg); 6584 } 6585} 6586 6587static void ir_emit_copy_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 6588{ 6589 ir_ref type = insn->type; 6590 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 6591 ir_reg op1_reg = ctx->regs[def][1]; 6592 6593 IR_ASSERT(def_reg != IR_REG_NONE || op1_reg != IR_REG_NONE); 6594 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 6595 op1_reg = IR_REG_NUM(op1_reg); 6596 ir_emit_load(ctx, type, op1_reg, insn->op1); 6597 } 6598 if (def_reg == op1_reg) { 6599 /* same reg */ 6600 } else if (def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE) { 6601 ir_emit_mov(ctx, type, def_reg, op1_reg); 6602 } else if (def_reg != IR_REG_NONE) { 6603 ir_emit_load(ctx, type, def_reg, insn->op1); 6604 } else if (op1_reg != IR_REG_NONE) { 6605 ir_emit_store(ctx, type, def, op1_reg); 6606 } else { 6607 IR_ASSERT(0); 6608 } 6609 if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) { 6610 ir_emit_store(ctx, type, def, def_reg); 6611 } 6612} 6613 6614static void ir_emit_copy_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) 6615{ 6616 ir_type type = insn->type; 6617 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 6618 ir_reg op1_reg = ctx->regs[def][1]; 6619 6620 IR_ASSERT(def_reg != IR_REG_NONE || op1_reg != IR_REG_NONE); 6621 if (op1_reg != IR_REG_NONE && IR_REG_SPILLED(op1_reg)) { 6622 op1_reg = IR_REG_NUM(op1_reg); 6623 ir_emit_load(ctx, type, op1_reg, insn->op1); 6624 } 6625 if (def_reg == op1_reg) { 6626 /* same reg */ 6627 } else if (def_reg != IR_REG_NONE && op1_reg != IR_REG_NONE) { 6628 ir_emit_fp_mov(ctx, type, def_reg, op1_reg); 6629 } else if (def_reg != IR_REG_NONE) { 6630 ir_emit_load(ctx, type, def_reg, insn->op1); 6631 } else if (op1_reg != IR_REG_NONE) { 6632 ir_emit_store(ctx, type, def, op1_reg); 6633 } else { 6634 IR_ASSERT(0); 6635 } 6636 if (def_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[def][0])) { 6637 ir_emit_store(ctx, type, def, def_reg); 6638 } 6639} 6640 6641static void ir_emit_vaddr(ir_ctx *ctx, ir_ref def, ir_insn *insn) 6642{ 6643 ir_backend_data *data = ctx->data; 6644 dasm_State **Dst = &data->dasm_state; 6645 ir_ref type = insn->type; 6646 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 6647 ir_mem mem; 6648 int32_t offset; 6649 ir_reg fp; 6650 6651 IR_ASSERT(def_reg != IR_REG_NONE); 6652 mem = ir_var_spill_slot(ctx, insn->op1); 6653 fp = IR_MEM_BASE(mem); 6654 offset = IR_MEM_OFFSET(mem); 6655 | lea Ra(def_reg), aword [Ra(fp)+offset] 6656 if (IR_REG_SPILLED(ctx->regs[def][0])) { 6657 ir_emit_store(ctx, type, def, def_reg); 6658 } 6659} 6660 6661static void ir_emit_vload(ir_ctx *ctx, ir_ref def, ir_insn *insn) 6662{ 6663 ir_insn *var_insn = &ctx->ir_base[insn->op2]; 6664 ir_ref type = insn->type; 6665 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 6666 ir_reg fp; 6667 ir_mem mem; 6668 6669 IR_ASSERT(var_insn->op == IR_VAR); 6670 fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 6671 mem = IR_MEM_BO(fp, IR_SPILL_POS_TO_OFFSET(var_insn->op3)); 6672 if (def_reg == IR_REG_NONE && ir_is_same_mem_var(ctx, def, var_insn->op3)) { 6673 return; // fake load 6674 } 6675 IR_ASSERT(def_reg != IR_REG_NONE); 6676 6677 ir_emit_load_mem(ctx, type, def_reg, mem); 6678 if (IR_REG_SPILLED(ctx->regs[def][0])) { 6679 ir_emit_store(ctx, type, def, def_reg); 6680 } 6681} 6682 6683static void ir_emit_vstore_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn) 6684{ 6685 ir_insn *var_insn = &ctx->ir_base[insn->op2]; 6686 ir_insn *val_insn = &ctx->ir_base[insn->op3]; 6687 ir_ref type = val_insn->type; 6688 ir_reg op3_reg = ctx->regs[ref][3]; 6689 ir_reg fp; 6690 ir_mem mem; 6691 6692 IR_ASSERT(var_insn->op == IR_VAR); 6693 fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 6694 mem = IR_MEM_BO(fp, IR_SPILL_POS_TO_OFFSET(var_insn->op3)); 6695 if ((op3_reg == IR_REG_NONE || IR_REG_SPILLED(op3_reg)) 6696 && !IR_IS_CONST_REF(insn->op3) && ir_is_same_mem_var(ctx, insn->op3, var_insn->op3)) { 6697 return; // fake store 6698 } 6699 if (IR_IS_CONST_REF(insn->op3)) { 6700 ir_emit_store_mem_int_const(ctx, type, mem, insn->op3, op3_reg, 0); 6701 } else { 6702 IR_ASSERT(op3_reg != IR_REG_NONE); 6703 if (IR_REG_SPILLED(op3_reg)) { 6704 op3_reg = IR_REG_NUM(op3_reg); 6705 ir_emit_load(ctx, type, op3_reg, insn->op3); 6706 } 6707 ir_emit_store_mem_int(ctx, type, mem, op3_reg); 6708 } 6709} 6710 6711static void ir_emit_vstore_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn) 6712{ 6713 ir_insn *var_insn = &ctx->ir_base[insn->op2]; 6714 ir_ref type = ctx->ir_base[insn->op3].type; 6715 ir_reg op3_reg = ctx->regs[ref][3]; 6716 ir_reg fp; 6717 ir_mem mem; 6718 6719 IR_ASSERT(var_insn->op == IR_VAR); 6720 fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 6721 mem = IR_MEM_BO(fp, IR_SPILL_POS_TO_OFFSET(var_insn->op3)); 6722 if ((op3_reg == IR_REG_NONE || IR_REG_SPILLED(op3_reg)) 6723 && !IR_IS_CONST_REF(insn->op3) && ir_is_same_mem_var(ctx, insn->op3, var_insn->op3)) { 6724 return; // fake store 6725 } 6726 if (IR_IS_CONST_REF(insn->op3)) { 6727 ir_emit_store_mem_fp_const(ctx, type, mem, insn->op3, IR_REG_NONE, op3_reg); 6728 } else { 6729 IR_ASSERT(op3_reg != IR_REG_NONE); 6730 if (IR_REG_SPILLED(op3_reg)) { 6731 op3_reg = IR_REG_NUM(op3_reg); 6732 ir_emit_load(ctx, type, op3_reg, insn->op3); 6733 } 6734 ir_emit_store_mem_fp(ctx, type, mem, op3_reg); 6735 } 6736} 6737 6738static void ir_emit_load_int(ir_ctx *ctx, ir_ref def, ir_insn *insn) 6739{ 6740 ir_ref type = insn->type; 6741 ir_reg op2_reg = ctx->regs[def][2]; 6742 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 6743 ir_mem mem; 6744 6745 if (ctx->use_lists[def].count == 1) { 6746 /* dead load */ 6747 return; 6748 } 6749 IR_ASSERT(def_reg != IR_REG_NONE); 6750 if (op2_reg != IR_REG_NONE) { 6751 if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(insn->op2)) { 6752 op2_reg = IR_REG_NUM(op2_reg); 6753 IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); 6754 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 6755 } 6756 mem = IR_MEM_B(op2_reg); 6757 } else if (IR_IS_CONST_REF(insn->op2)) { 6758 mem = ir_fuse_addr_const(ctx, insn->op2); 6759 } else { 6760 IR_ASSERT(ir_rule(ctx, insn->op2) & IR_FUSED); 6761 mem = ir_fuse_addr(ctx, def, insn->op2); 6762 if (IR_REG_SPILLED(ctx->regs[def][0]) && ir_is_same_spill_slot(ctx, def, mem)) { 6763 if (!ir_may_avoid_spill_load(ctx, def, def)) { 6764 ir_emit_load_mem_int(ctx, type, def_reg, mem); 6765 } 6766 /* avoid load to the same location (valid only when register is not reused) */ 6767 return; 6768 } 6769 } 6770 6771 ir_emit_load_mem_int(ctx, type, def_reg, mem); 6772 if (IR_REG_SPILLED(ctx->regs[def][0])) { 6773 ir_emit_store(ctx, type, def, def_reg); 6774 } 6775} 6776 6777static void ir_emit_load_fp(ir_ctx *ctx, ir_ref def, ir_insn *insn) 6778{ 6779 ir_ref type = insn->type; 6780 ir_reg op2_reg = ctx->regs[def][2]; 6781 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 6782 ir_mem mem; 6783 6784 if (ctx->use_lists[def].count == 1) { 6785 /* dead load */ 6786 return; 6787 } 6788 IR_ASSERT(def_reg != IR_REG_NONE); 6789 if (op2_reg != IR_REG_NONE) { 6790 if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(insn->op2)) { 6791 op2_reg = IR_REG_NUM(op2_reg); 6792 IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); 6793 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 6794 } 6795 mem = IR_MEM_B(op2_reg); 6796 } else if (IR_IS_CONST_REF(insn->op2)) { 6797 mem = ir_fuse_addr_const(ctx, insn->op2); 6798 } else { 6799 IR_ASSERT(ir_rule(ctx, insn->op2) & IR_FUSED); 6800 mem = ir_fuse_addr(ctx, def, insn->op2); 6801 if (IR_REG_SPILLED(ctx->regs[def][0]) && ir_is_same_spill_slot(ctx, def, mem)) { 6802 if (!ir_may_avoid_spill_load(ctx, def, def)) { 6803 ir_emit_load_mem_fp(ctx, type, def_reg, mem); 6804 } 6805 /* avoid load to the same location (valid only when register is not reused) */ 6806 return; 6807 } 6808 } 6809 6810 ir_emit_load_mem_fp(ctx, type, def_reg, mem); 6811 if (IR_REG_SPILLED(ctx->regs[def][0])) { 6812 ir_emit_store(ctx, type, def, def_reg); 6813 } 6814} 6815 6816static void ir_emit_store_int(ir_ctx *ctx, ir_ref ref, ir_insn *insn) 6817{ 6818 ir_insn *val_insn = &ctx->ir_base[insn->op3]; 6819 ir_ref type = val_insn->type; 6820 ir_reg op2_reg = ctx->regs[ref][2]; 6821 ir_reg op3_reg = ctx->regs[ref][3]; 6822 ir_mem mem; 6823 6824 if (op2_reg != IR_REG_NONE) { 6825 if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(insn->op2)) { 6826 op2_reg = IR_REG_NUM(op2_reg); 6827 IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); 6828 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 6829 } 6830 mem = IR_MEM_B(op2_reg); 6831 } else if (IR_IS_CONST_REF(insn->op2)) { 6832 mem = ir_fuse_addr_const(ctx, insn->op2); 6833 } else { 6834 IR_ASSERT(ir_rule(ctx, insn->op2) & IR_FUSED); 6835 mem = ir_fuse_addr(ctx, ref, insn->op2); 6836 if (!IR_IS_CONST_REF(insn->op3) && IR_REG_SPILLED(op3_reg) && ir_is_same_spill_slot(ctx, insn->op3, mem)) { 6837 if (!ir_may_avoid_spill_load(ctx, insn->op3, ref)) { 6838 op3_reg = IR_REG_NUM(op3_reg); 6839 ir_emit_load(ctx, type, op3_reg, insn->op3); 6840 } 6841 /* avoid store to the same location */ 6842 return; 6843 } 6844 } 6845 6846 if (IR_IS_CONST_REF(insn->op3)) { 6847 ir_emit_store_mem_int_const(ctx, type, mem, insn->op3, op3_reg, 0); 6848 } else { 6849 IR_ASSERT(op3_reg != IR_REG_NONE); 6850 if (IR_REG_SPILLED(op3_reg)) { 6851 op3_reg = IR_REG_NUM(op3_reg); 6852 ir_emit_load(ctx, type, op3_reg, insn->op3); 6853 } 6854 ir_emit_store_mem_int(ctx, type, mem, op3_reg); 6855 } 6856} 6857 6858static void ir_emit_store_fp(ir_ctx *ctx, ir_ref ref, ir_insn *insn) 6859{ 6860 ir_ref type = ctx->ir_base[insn->op3].type; 6861 ir_reg op2_reg = ctx->regs[ref][2]; 6862 ir_reg op3_reg = ctx->regs[ref][3]; 6863 ir_mem mem; 6864 6865 IR_ASSERT(op3_reg != IR_REG_NONE); 6866 if (op2_reg != IR_REG_NONE) { 6867 if (IR_REG_SPILLED(op2_reg) || IR_IS_CONST_REF(insn->op2)) { 6868 op2_reg = IR_REG_NUM(op2_reg); 6869 IR_ASSERT(ctx->ir_base[insn->op2].type == IR_ADDR); 6870 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 6871 } 6872 mem = IR_MEM_B(op2_reg); 6873 } else if (IR_IS_CONST_REF(insn->op2)) { 6874 mem = ir_fuse_addr_const(ctx, insn->op2); 6875 } else { 6876 IR_ASSERT(ir_rule(ctx, insn->op2) & IR_FUSED); 6877 mem = ir_fuse_addr(ctx, ref, insn->op2); 6878 if (!IR_IS_CONST_REF(insn->op3) && IR_REG_SPILLED(op3_reg) && ir_is_same_spill_slot(ctx, insn->op3, mem)) { 6879 if (!ir_may_avoid_spill_load(ctx, insn->op3, ref)) { 6880 op3_reg = IR_REG_NUM(op3_reg); 6881 ir_emit_load(ctx, type, op3_reg, insn->op3); 6882 } 6883 /* avoid store to the same location */ 6884 return; 6885 } 6886 } 6887 6888 if (IR_IS_CONST_REF(insn->op3)) { 6889 ir_emit_store_mem_fp_const(ctx, type, mem, insn->op3, IR_REG_NONE, op3_reg); 6890 } else { 6891 IR_ASSERT(op3_reg != IR_REG_NONE); 6892 if (IR_REG_SPILLED(op3_reg)) { 6893 op3_reg = IR_REG_NUM(op3_reg); 6894 ir_emit_load(ctx, type, op3_reg, insn->op3); 6895 } 6896 ir_emit_store_mem_fp(ctx, type, mem, op3_reg); 6897 } 6898} 6899 6900static void ir_emit_rload(ir_ctx *ctx, ir_ref def, ir_insn *insn) 6901{ 6902 ir_reg src_reg = insn->op2; 6903 ir_type type = insn->type; 6904 6905 if (IR_REGSET_IN(IR_REGSET_UNION((ir_regset)ctx->fixed_regset, IR_REGSET_FIXED), src_reg)) { 6906 if (ctx->vregs[def] 6907 && ctx->live_intervals[ctx->vregs[def]] 6908 && ctx->live_intervals[ctx->vregs[def]]->stack_spill_pos != -1) { 6909 ir_emit_store(ctx, type, def, src_reg); 6910 } 6911 } else { 6912 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 6913 6914 if (def_reg == IR_REG_NONE) { 6915 /* op3 is used as a flag that the value is already stored in memory. 6916 * If op3 is set we don't have to store the value once again (in case of spilling) 6917 */ 6918 if (!insn->op3 || !ir_is_same_spill_slot(ctx, def, IR_MEM_BO(ctx->spill_base, insn->op3))) { 6919 ir_emit_store(ctx, type, def, src_reg); 6920 } 6921 } else { 6922 if (src_reg != def_reg) { 6923 if (IR_IS_TYPE_INT(type)) { 6924 ir_emit_mov(ctx, type, def_reg, src_reg); 6925 } else { 6926 IR_ASSERT(IR_IS_TYPE_FP(type)); 6927 ir_emit_fp_mov(ctx, type, def_reg, src_reg); 6928 } 6929 } 6930 if (IR_REG_SPILLED(ctx->regs[def][0]) 6931 && (!insn->op3 || !ir_is_same_spill_slot(ctx, def, IR_MEM_BO(ctx->spill_base, insn->op3)))) { 6932 ir_emit_store(ctx, type, def, def_reg); 6933 } 6934 } 6935 } 6936} 6937 6938static void ir_emit_rstore(ir_ctx *ctx, ir_ref ref, ir_insn *insn) 6939{ 6940 ir_ref type = ctx->ir_base[insn->op2].type; 6941 ir_reg op2_reg = ctx->regs[ref][2]; 6942 ir_reg dst_reg = insn->op3; 6943 6944 if (!IR_IS_CONST_REF(insn->op2) && (ir_rule(ctx, insn->op2) & IR_FUSED)) { 6945 ir_emit_load_mem(ctx, type, dst_reg, ir_fuse_load(ctx, ref, insn->op2)); 6946 } else if (op2_reg != IR_REG_NONE) { 6947 if (IR_REG_SPILLED(op2_reg)) { 6948 op2_reg = IR_REG_NUM(op2_reg); 6949 ir_emit_load(ctx, type, op2_reg, insn->op2); 6950 } 6951 if (op2_reg != dst_reg) { 6952 if (IR_IS_TYPE_INT(type)) { 6953 ir_emit_mov(ctx, type, dst_reg, op2_reg); 6954 } else { 6955 IR_ASSERT(IR_IS_TYPE_FP(type)); 6956 ir_emit_fp_mov(ctx, type, dst_reg, op2_reg); 6957 } 6958 } 6959 } else { 6960 ir_emit_load(ctx, type, dst_reg, insn->op2); 6961 } 6962} 6963 6964static void ir_emit_alloca(ir_ctx *ctx, ir_ref def, ir_insn *insn) 6965{ 6966 ir_backend_data *data = ctx->data; 6967 dasm_State **Dst = &data->dasm_state; 6968 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 6969 6970 if (IR_IS_CONST_REF(insn->op2)) { 6971 ir_insn *val = &ctx->ir_base[insn->op2]; 6972 int32_t size = val->val.i32; 6973 6974 IR_ASSERT(IR_IS_TYPE_INT(val->type)); 6975 IR_ASSERT(!IR_IS_SYM_CONST(val->op)); 6976 IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 >= 0); 6977 IR_ASSERT(IR_IS_SIGNED_32BIT(val->val.i64)); 6978 6979 if (ctx->flags2 & IR_HAS_CALLS) { 6980 /* Stack must be 16 byte aligned */ 6981 size = IR_ALIGNED_SIZE(size, 16); 6982 } else { 6983 size = IR_ALIGNED_SIZE(size, 8); 6984 } 6985 | ASM_REG_IMM_OP sub, IR_ADDR, IR_REG_RSP, size 6986 if (!(ctx->flags & IR_USE_FRAME_POINTER)) { 6987 ctx->call_stack_size += size; 6988 } 6989 } else { 6990 int32_t alignment = (ctx->flags2 & IR_HAS_CALLS) ? 16 : 8; 6991 ir_reg op2_reg = ctx->regs[def][2]; 6992 ir_type type = ctx->ir_base[insn->op2].type; 6993 6994 IR_ASSERT(ctx->flags & IR_FUNCTION); 6995 IR_ASSERT(ctx->flags & IR_USE_FRAME_POINTER); 6996 IR_ASSERT(def_reg != IR_REG_NONE); 6997 if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { 6998 op2_reg = IR_REG_NUM(op2_reg); 6999 ir_emit_load(ctx, type, op2_reg, insn->op2); 7000 } 7001 if (def_reg != op2_reg) { 7002 if (op2_reg != IR_REG_NONE) { 7003 ir_emit_mov(ctx, type, def_reg, op2_reg); 7004 } else { 7005 ir_emit_load(ctx, type, def_reg, insn->op2); 7006 } 7007 } 7008 7009 | ASM_REG_IMM_OP add, IR_ADDR, def_reg, (alignment-1) 7010 | ASM_REG_IMM_OP and, IR_ADDR, def_reg, ~(alignment-1) 7011 | ASM_REG_REG_OP sub, IR_ADDR, IR_REG_RSP, def_reg 7012 } 7013 if (def_reg != IR_REG_NONE) { 7014 | mov Ra(def_reg), Ra(IR_REG_RSP) 7015 if (IR_REG_SPILLED(ctx->regs[def][0])) { 7016 ir_emit_store(ctx, insn->type, def, def_reg); 7017 } 7018 } else { 7019 ir_emit_store(ctx, IR_ADDR, def, IR_REG_STACK_POINTER); 7020 } 7021} 7022 7023static void ir_emit_afree(ir_ctx *ctx, ir_ref def, ir_insn *insn) 7024{ 7025 ir_backend_data *data = ctx->data; 7026 dasm_State **Dst = &data->dasm_state; 7027 7028 if (IR_IS_CONST_REF(insn->op2)) { 7029 ir_insn *val = &ctx->ir_base[insn->op2]; 7030 int32_t size = val->val.i32; 7031 7032 IR_ASSERT(IR_IS_TYPE_INT(val->type)); 7033 IR_ASSERT(!IR_IS_SYM_CONST(val->op)); 7034 IR_ASSERT(IR_IS_TYPE_UNSIGNED(val->type) || val->val.i64 > 0); 7035 IR_ASSERT(IR_IS_SIGNED_32BIT(val->val.i64)); 7036 7037 if (ctx->flags2 & IR_HAS_CALLS) { 7038 /* Stack must be 16 byte aligned */ 7039 size = IR_ALIGNED_SIZE(size, 16); 7040 } else { 7041 size = IR_ALIGNED_SIZE(size, 8); 7042 } 7043 | ASM_REG_IMM_OP add, IR_ADDR, IR_REG_RSP, size 7044 if (!(ctx->flags & IR_USE_FRAME_POINTER)) { 7045 ctx->call_stack_size -= size; 7046 } 7047 } else { 7048// int32_t alignment = (ctx->flags2 & IR_HAS_CALLS) ? 16 : 8; 7049 ir_reg op2_reg = ctx->regs[def][2]; 7050 ir_type type = ctx->ir_base[insn->op2].type; 7051 7052 IR_ASSERT(ctx->flags & IR_FUNCTION); 7053 if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { 7054 op2_reg = IR_REG_NUM(op2_reg); 7055 ir_emit_load(ctx, type, op2_reg, insn->op2); 7056 } 7057 7058 // TODO: alignment ??? 7059 7060 | ASM_REG_REG_OP add, IR_ADDR, IR_REG_RSP, op2_reg 7061 } 7062} 7063 7064static void ir_emit_frame_addr(ir_ctx *ctx, ir_ref def) 7065{ 7066 ir_backend_data *data = ctx->data; 7067 dasm_State **Dst = &data->dasm_state; 7068 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 7069 7070 if (ctx->flags & IR_USE_FRAME_POINTER) { 7071 | mov Ra(def_reg), Ra(IR_REG_RBP) 7072 } else { 7073 | lea Ra(def_reg), [Ra(IR_REG_RSP)+(ctx->stack_frame_size + ctx->call_stack_size)] 7074 } 7075 if (IR_REG_SPILLED(ctx->regs[def][0])) { 7076 ir_emit_store(ctx, IR_ADDR, def, def_reg); 7077 } 7078} 7079 7080static void ir_emit_va_start(ir_ctx *ctx, ir_ref def, ir_insn *insn) 7081{ 7082#if defined(_WIN64) || defined(IR_TARGET_X86) 7083 ir_backend_data *data = ctx->data; 7084 dasm_State **Dst = &data->dasm_state; 7085 ir_reg fp; 7086 int arg_area_offset; 7087 ir_reg op2_reg = ctx->regs[def][2]; 7088 ir_reg tmp_reg = ctx->regs[def][3]; 7089 7090 IR_ASSERT(tmp_reg != IR_REG_NONE); 7091 if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { 7092 op2_reg = IR_REG_NUM(op2_reg); 7093 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 7094 } 7095 7096 if (ctx->flags & IR_USE_FRAME_POINTER) { 7097 fp = IR_REG_FRAME_POINTER; 7098 arg_area_offset = sizeof(void*) * 2 + ctx->param_stack_size; 7099 } else { 7100 fp = IR_REG_STACK_POINTER; 7101 arg_area_offset = ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*) + ctx->param_stack_size; 7102 } 7103 | lea Ra(tmp_reg), aword [Ra(fp)+arg_area_offset] 7104 if (op2_reg != IR_REG_NONE) { 7105 | mov aword [Ra(op2_reg)], Ra(tmp_reg) 7106 } else { 7107 int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op2, &op2_reg); 7108 7109 | mov aword [Ra(op2_reg)+offset], Ra(tmp_reg) 7110 } 7111#elif defined(IR_TARGET_X64) 7112|.if X64 7113 ir_backend_data *data = ctx->data; 7114 dasm_State **Dst = &data->dasm_state; 7115 ir_reg fp; 7116 int reg_save_area_offset; 7117 int overflow_arg_area_offset; 7118 ir_reg op2_reg = ctx->regs[def][2]; 7119 ir_reg tmp_reg = ctx->regs[def][3]; 7120 bool have_reg_save_area = 0; 7121 7122 IR_ASSERT(op2_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); 7123 if (IR_REG_SPILLED(op2_reg)) { 7124 op2_reg = IR_REG_NUM(op2_reg); 7125 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 7126 } 7127 7128 if (ctx->flags & IR_USE_FRAME_POINTER) { 7129 fp = IR_REG_FRAME_POINTER; 7130 reg_save_area_offset = -(ctx->stack_frame_size - ctx->stack_frame_alignment - ctx->locals_area_size); 7131 overflow_arg_area_offset = sizeof(void*) * 2 + ctx->param_stack_size; 7132 } else { 7133 fp = IR_REG_STACK_POINTER; 7134 reg_save_area_offset = ctx->locals_area_size + ctx->call_stack_size; 7135 overflow_arg_area_offset = ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*) + ctx->param_stack_size; 7136 } 7137 7138 if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) { 7139 | lea Ra(tmp_reg), aword [Ra(fp)+reg_save_area_offset] 7140 have_reg_save_area = 1; 7141 /* Set va_list.gp_offset */ 7142 | mov dword [Ra(op2_reg)+offsetof(ir_va_list, gp_offset)], sizeof(void*) * ctx->gp_reg_params 7143 } else { 7144 reg_save_area_offset -= sizeof(void*) * IR_REG_INT_ARGS; 7145 /* Set va_list.gp_offset */ 7146 | mov dword [Ra(op2_reg)+offsetof(ir_va_list, gp_offset)], sizeof(void*) * IR_REG_INT_ARGS 7147 } 7148 if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) { 7149 if (!have_reg_save_area) { 7150 | lea Ra(tmp_reg), aword [Ra(fp)+reg_save_area_offset] 7151 have_reg_save_area = 1; 7152 } 7153 /* Set va_list.fp_offset */ 7154 | mov dword [Ra(op2_reg)+offsetof(ir_va_list, fp_offset)], sizeof(void*) * IR_REG_INT_ARGS + 16 * ctx->fp_reg_params 7155 } else { 7156 /* Set va_list.fp_offset */ 7157 | mov dword [Ra(op2_reg)+offsetof(ir_va_list, fp_offset)], sizeof(void*) * IR_REG_INT_ARGS + 16 * IR_REG_FP_ARGS 7158 } 7159 if (have_reg_save_area) { 7160 /* Set va_list.reg_save_area */ 7161 | mov qword [Ra(op2_reg)+offsetof(ir_va_list, reg_save_area)], Ra(tmp_reg) 7162 } 7163 | lea Ra(tmp_reg), aword [Ra(fp)+overflow_arg_area_offset] 7164 /* Set va_list.overflow_arg_area */ 7165 | mov qword [Ra(op2_reg)+offsetof(ir_va_list, overflow_arg_area)], Ra(tmp_reg) 7166|.endif 7167#else 7168 IR_ASSERT(0 && "NIY va_start"); 7169#endif 7170} 7171 7172static void ir_emit_va_copy(ir_ctx *ctx, ir_ref def, ir_insn *insn) 7173{ 7174 IR_ASSERT(0 && "NIY va_copy"); 7175} 7176 7177static void ir_emit_va_arg(ir_ctx *ctx, ir_ref def, ir_insn *insn) 7178{ 7179#if defined(_WIN64) || defined(IR_TARGET_X86) 7180 ir_backend_data *data = ctx->data; 7181 dasm_State **Dst = &data->dasm_state; 7182 ir_type type = insn->type; 7183 ir_reg def_reg = ctx->regs[def][0]; 7184 ir_reg op2_reg = ctx->regs[def][2]; 7185 ir_reg tmp_reg = ctx->regs[def][3]; 7186 7187 IR_ASSERT(def_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); 7188 if (op2_reg != IR_REG_NONE && IR_REG_SPILLED(op2_reg)) { 7189 op2_reg = IR_REG_NUM(op2_reg); 7190 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 7191 } 7192 | mov Ra(tmp_reg), aword [Ra(op2_reg)] 7193 ir_emit_load_mem(ctx, type, def_reg, IR_MEM_B(tmp_reg)); 7194 | add Ra(tmp_reg), IR_MAX(ir_type_size[type], sizeof(void*)) 7195 if (op2_reg != IR_REG_NONE) { 7196 | mov aword [Ra(op2_reg)], Ra(tmp_reg) 7197 } else { 7198 int32_t offset = ir_ref_spill_slot_offset(ctx, insn->op2, &op2_reg); 7199 7200 | mov aword [Ra(op2_reg)+offset], Ra(tmp_reg) 7201 } 7202 if (IR_REG_SPILLED(ctx->regs[def][0])) { 7203 ir_emit_store(ctx, type, def, def_reg); 7204 } 7205#elif defined(IR_TARGET_X64) 7206|.if X64 7207 ir_backend_data *data = ctx->data; 7208 dasm_State **Dst = &data->dasm_state; 7209 ir_type type = insn->type; 7210 ir_reg def_reg = ctx->regs[def][0]; 7211 ir_reg op2_reg = ctx->regs[def][2]; 7212 ir_reg tmp_reg = ctx->regs[def][3]; 7213 7214 IR_ASSERT(def_reg != IR_REG_NONE && op2_reg != IR_REG_NONE && tmp_reg != IR_REG_NONE); 7215 if (IR_REG_SPILLED(op2_reg)) { 7216 op2_reg = IR_REG_NUM(op2_reg); 7217 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 7218 } 7219 if (IR_IS_TYPE_INT(type)) { 7220 | mov Rd(tmp_reg), dword [Ra(op2_reg)+offsetof(ir_va_list, gp_offset)] 7221 | cmp Rd(tmp_reg), sizeof(void*)*IR_REG_INT_ARGS 7222 | jge >1 7223 | add Rd(tmp_reg), sizeof(void*) 7224 | mov dword [Ra(op2_reg)+offsetof(ir_va_list, gp_offset)], Rd(tmp_reg) 7225 | add Ra(tmp_reg), aword [Ra(op2_reg)+offsetof(ir_va_list, reg_save_area)] 7226 | jmp >2 7227 |1: 7228 | mov Ra(tmp_reg), aword [Ra(op2_reg)+offsetof(ir_va_list, overflow_arg_area)] 7229 | add Ra(tmp_reg), sizeof(void*) 7230 | mov aword [Ra(op2_reg)+offsetof(ir_va_list, overflow_arg_area)], Ra(tmp_reg) 7231 |2: 7232 | mov Ra(def_reg), aword [Ra(tmp_reg)-sizeof(void*)] 7233 } else { 7234 | mov Rd(tmp_reg), dword [Ra(op2_reg)+offsetof(ir_va_list, fp_offset)] 7235 | cmp Rd(tmp_reg), sizeof(void*) * IR_REG_INT_ARGS + 16 * IR_REG_FP_ARGS 7236 | jge >1 7237 | add Rd(tmp_reg), 16 7238 | mov dword [Ra(op2_reg)+offsetof(ir_va_list, fp_offset)], Rd(tmp_reg) 7239 | add Ra(tmp_reg), aword [Ra(op2_reg)+offsetof(ir_va_list, reg_save_area)] 7240 ir_emit_load_mem_fp(ctx, type, def_reg, IR_MEM_BO(tmp_reg, -16)); 7241 | jmp >2 7242 |1: 7243 | mov Ra(tmp_reg), aword [Ra(op2_reg)+offsetof(ir_va_list, overflow_arg_area)] 7244 ir_emit_load_mem_fp(ctx, type, def_reg, IR_MEM_BO(tmp_reg, 0)); 7245 | add Ra(tmp_reg), 8 7246 | mov aword [Ra(op2_reg)+offsetof(ir_va_list, overflow_arg_area)], Ra(tmp_reg) 7247 |2: 7248 } 7249 if (IR_REG_SPILLED(ctx->regs[def][0])) { 7250 ir_emit_store(ctx, type, def, def_reg); 7251 } 7252|.endif 7253#else 7254 IR_ASSERT(0 && "NIY va_arg"); 7255#endif 7256} 7257 7258static void ir_emit_switch(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) 7259{ 7260 ir_backend_data *data = ctx->data; 7261 dasm_State **Dst = &data->dasm_state; 7262 ir_type type; 7263 ir_block *bb; 7264 ir_insn *use_insn, *val; 7265 uint32_t n, *p, use_block; 7266 int i; 7267 int label, default_label = 0; 7268 int count = 0; 7269 ir_val min, max; 7270 int64_t offset; 7271 ir_reg op2_reg = ctx->regs[def][2]; 7272|.if X64 7273|| ir_reg tmp_reg = ctx->regs[def][3]; 7274|.endif 7275 7276 type = ctx->ir_base[insn->op2].type; 7277 if (IR_IS_TYPE_SIGNED(type)) { 7278 min.u64 = 0x7fffffffffffffff; 7279 max.u64 = 0x8000000000000000; 7280 } else { 7281 min.u64 = 0xffffffffffffffff; 7282 max.u64 = 0x0; 7283 } 7284 7285 bb = &ctx->cfg_blocks[b]; 7286 p = &ctx->cfg_edges[bb->successors]; 7287 for (n = bb->successors_count; n != 0; p++, n--) { 7288 use_block = *p; 7289 use_insn = &ctx->ir_base[ctx->cfg_blocks[use_block].start]; 7290 if (use_insn->op == IR_CASE_VAL) { 7291 val = &ctx->ir_base[use_insn->op2]; 7292 IR_ASSERT(!IR_IS_SYM_CONST(val->op)); 7293 if (IR_IS_TYPE_SIGNED(type)) { 7294 IR_ASSERT(IR_IS_TYPE_SIGNED(val->type)); 7295 min.i64 = IR_MIN(min.i64, val->val.i64); 7296 max.i64 = IR_MAX(max.i64, val->val.i64); 7297 } else { 7298 IR_ASSERT(!IR_IS_TYPE_SIGNED(val->type)); 7299 min.u64 = (int64_t)IR_MIN(min.u64, val->val.u64); 7300 max.u64 = (int64_t)IR_MAX(max.u64, val->val.u64); 7301 } 7302 count++; 7303 } else { 7304 IR_ASSERT(use_insn->op == IR_CASE_DEFAULT); 7305 default_label = ir_skip_empty_target_blocks(ctx, use_block); 7306 } 7307 } 7308 7309 IR_ASSERT(op2_reg != IR_REG_NONE); 7310|.if X64 7311|| IR_ASSERT(tmp_reg != IR_REG_NONE || sizeof(void*) != 8); 7312|.endif 7313 if (IR_REG_SPILLED(op2_reg)) { 7314 op2_reg = IR_REG_NUM(op2_reg); 7315 ir_emit_load(ctx, type, op2_reg, insn->op2); 7316 } else if (IR_IS_CONST_REF(insn->op2)) { 7317 ir_emit_load(ctx, type, op2_reg, insn->op2); 7318 } 7319 7320 /* Generate a table jmp or a seqence of calls */ 7321 if ((max.i64-min.i64) < count * 8) { 7322 int *labels = ir_mem_malloc(sizeof(int) * (size_t)(max.i64 - min.i64 + 1)); 7323 7324 for (i = 0; i <= (max.i64 - min.i64); i++) { 7325 labels[i] = default_label; 7326 } 7327 p = &ctx->cfg_edges[bb->successors]; 7328 for (n = bb->successors_count; n != 0; p++, n--) { 7329 use_block = *p; 7330 use_insn = &ctx->ir_base[ctx->cfg_blocks[use_block].start]; 7331 if (use_insn->op == IR_CASE_VAL) { 7332 val = &ctx->ir_base[use_insn->op2]; 7333 IR_ASSERT(!IR_IS_SYM_CONST(val->op)); 7334 label = ir_skip_empty_target_blocks(ctx, use_block); 7335 labels[val->val.i64 - min.i64] = label; 7336 } 7337 } 7338 7339 if (IR_IS_32BIT(type, max)) { 7340 | ASM_REG_IMM_OP cmp, type, op2_reg, max.i32 7341 } else { 7342 IR_ASSERT(ir_type_size[type] == 8); 7343 IR_ASSERT(sizeof(void*) == 8); 7344|.if X64 7345 | mov64 Rq(tmp_reg), max.i64 7346 | cmp Rq(op2_reg), Rq(tmp_reg) 7347|.endif 7348 } 7349 if (IR_IS_TYPE_SIGNED(type)) { 7350 | jg =>default_label 7351 } else { 7352 | ja =>default_label 7353 } 7354 7355 if (IR_IS_32BIT(type, min)) { 7356 offset = -min.i64 * sizeof(void*); 7357 if (IR_IS_SIGNED_32BIT(offset)) { 7358 | ASM_REG_IMM_OP cmp, type, op2_reg, min.i32 7359 } else { 7360 | ASM_REG_REG_OP sub, type, op2_reg, (int32_t)offset // TODO: reg clobbering 7361 offset = 0; 7362 } 7363 } else { 7364 IR_ASSERT(sizeof(void*) == 8); 7365|.if X64 7366 | mov64 Rq(tmp_reg), min.i64 7367 | ASM_REG_REG_OP sub, type, op2_reg, tmp_reg // TODO: reg clobbering 7368 offset = 0; 7369|.endif 7370 } 7371 if (IR_IS_TYPE_SIGNED(type)) { 7372 | jl =>default_label 7373 } else { 7374 | jb =>default_label 7375 } 7376 if (sizeof(void*) == 8) { 7377|.if X64 7378 switch (ir_type_size[type]) { 7379 default: 7380 IR_ASSERT(0); 7381 case 1: 7382 if (IR_IS_TYPE_SIGNED(type)) { 7383 | movsx Ra(op2_reg), Rb(op2_reg) 7384 } else { 7385 | movzx Ra(op2_reg), Rb(op2_reg) 7386 } 7387 break; 7388 case 2: 7389 if (IR_IS_TYPE_SIGNED(type)) { 7390 | movsx Ra(op2_reg), Rw(op2_reg) 7391 } else { 7392 | movzx Ra(op2_reg), Rw(op2_reg) 7393 } 7394 break; 7395 case 4: 7396 if (IR_IS_TYPE_SIGNED(type)) { 7397 | movsxd Ra(op2_reg), Rd(op2_reg) 7398 } else { 7399 | mov Rd(op2_reg), Rd(op2_reg) 7400 } 7401 break; 7402 case 8: 7403 break; 7404 } 7405 | lea Ra(tmp_reg), aword [>1] 7406 | jmp aword [Ra(tmp_reg)+Ra(op2_reg)*8+(int32_t)offset] 7407|.endif 7408 } else { 7409|.if not X64 7410 switch (ir_type_size[type]) { 7411 default: 7412 IR_ASSERT(0 && "Unsupported type size"); 7413 case 1: 7414 if (IR_IS_TYPE_SIGNED(type)) { 7415 | movsx Ra(op2_reg), Rb(op2_reg) 7416 } else { 7417 | movzx Ra(op2_reg), Rb(op2_reg) 7418 } 7419 break; 7420 case 2: 7421 if (IR_IS_TYPE_SIGNED(type)) { 7422 | movsx Ra(op2_reg), Rw(op2_reg) 7423 } else { 7424 | movzx Ra(op2_reg), Rw(op2_reg) 7425 } 7426 break; 7427 case 4: 7428 break; 7429 } 7430 |// jmp aword [Ra(op2_reg)*4+(int32_t)offset+>1] 7431 | lea Ra(op2_reg), aword [Ra(op2_reg)*4+(int32_t)offset] // TODO: reg clobbering 7432 | jmp aword [Ra(op2_reg)+>1] 7433|.endif 7434 } 7435 |.jmp_table 7436 if (!data->jmp_table_label) { 7437 data->jmp_table_label = ctx->cfg_blocks_count + ctx->consts_count + 3; 7438 |=>data->jmp_table_label: 7439 } 7440 |.align aword 7441 |1: 7442 for (i = 0; i <= (max.i64 - min.i64); i++) { 7443 int b = labels[i]; 7444 ir_block *bb = &ctx->cfg_blocks[b]; 7445 ir_insn *insn = &ctx->ir_base[bb->end]; 7446 7447 if (insn->op == IR_IJMP && IR_IS_CONST_REF(insn->op2)) { 7448 ir_ref prev = ctx->prev_ref[bb->end]; 7449 if (prev != bb->start && ctx->ir_base[prev].op == IR_SNAPSHOT) { 7450 prev = ctx->prev_ref[prev]; 7451 } 7452 if (prev == bb->start) { 7453 void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]); 7454 7455 | .aword &addr 7456 if (ctx->ir_base[bb->start].op != IR_CASE_DEFAULT) { 7457 bb->flags |= IR_BB_EMPTY; 7458 } 7459 continue; 7460 } 7461 } 7462 | .aword =>b 7463 } 7464 |.code 7465 ir_mem_free(labels); 7466 } else { 7467 p = &ctx->cfg_edges[bb->successors]; 7468 for (n = bb->successors_count; n != 0; p++, n--) { 7469 use_block = *p; 7470 use_insn = &ctx->ir_base[ctx->cfg_blocks[use_block].start]; 7471 if (use_insn->op == IR_CASE_VAL) { 7472 val = &ctx->ir_base[use_insn->op2]; 7473 IR_ASSERT(!IR_IS_SYM_CONST(val->op)); 7474 label = ir_skip_empty_target_blocks(ctx, use_block); 7475 if (IR_IS_32BIT(type, val->val)) { 7476 | ASM_REG_IMM_OP cmp, type, op2_reg, val->val.i32 7477 } else { 7478 IR_ASSERT(sizeof(void*) == 8); 7479|.if X64 7480 | mov64 Ra(tmp_reg), val->val.i64 7481 | ASM_REG_REG_OP cmp, type, op2_reg, tmp_reg 7482|.endif 7483 } 7484 | je =>label 7485 } 7486 } 7487 if (default_label) { 7488 | jmp =>default_label 7489 } 7490 } 7491} 7492 7493static int32_t ir_call_used_stack(ir_ctx *ctx, ir_insn *insn) 7494{ 7495 int j, n; 7496 ir_type type; 7497 int int_param = 0; 7498 int fp_param = 0; 7499 int int_reg_params_count = IR_REG_INT_ARGS; 7500 int fp_reg_params_count = IR_REG_FP_ARGS; 7501 int32_t used_stack = 0; 7502 7503#ifdef IR_HAVE_FASTCALL 7504 if (sizeof(void*) == 4 && ir_is_fastcall(ctx, insn)) { 7505 int_reg_params_count = IR_REG_INT_FCARGS; 7506 fp_reg_params_count = IR_REG_FP_FCARGS; 7507 } 7508#endif 7509 7510 n = insn->inputs_count; 7511 for (j = 3; j <= n; j++) { 7512 type = ctx->ir_base[ir_insn_op(insn, j)].type; 7513 if (IR_IS_TYPE_INT(type)) { 7514 if (int_param >= int_reg_params_count) { 7515 used_stack += IR_MAX(sizeof(void*), ir_type_size[type]); 7516 } 7517 int_param++; 7518#ifdef _WIN64 7519 /* WIN64 calling convention use common couter for int and fp registers */ 7520 fp_param++; 7521#endif 7522 } else { 7523 IR_ASSERT(IR_IS_TYPE_FP(type)); 7524 if (fp_param >= fp_reg_params_count) { 7525 used_stack += IR_MAX(sizeof(void*), ir_type_size[type]); 7526 } 7527 fp_param++; 7528#ifdef _WIN64 7529 /* WIN64 calling convention use common couter for int and fp registers */ 7530 int_param++; 7531#endif 7532 } 7533 } 7534 7535 /* Reserved "home space" or "shadow store" for register arguments (used in Windows64 ABI) */ 7536 used_stack += IR_SHADOW_ARGS; 7537 7538 return used_stack; 7539} 7540 7541static int32_t ir_emit_arguments(ir_ctx *ctx, ir_ref def, ir_insn *insn, ir_reg tmp_reg) 7542{ 7543 ir_backend_data *data = ctx->data; 7544 dasm_State **Dst = &data->dasm_state; 7545 int j, n; 7546 ir_ref arg; 7547 ir_insn *arg_insn; 7548 uint8_t type; 7549 ir_reg src_reg, dst_reg; 7550 int int_param = 0; 7551 int fp_param = 0; 7552 int count = 0; 7553 int int_reg_params_count = IR_REG_INT_ARGS; 7554 int fp_reg_params_count = IR_REG_FP_ARGS; 7555 const int8_t *int_reg_params = _ir_int_reg_params; 7556 const int8_t *fp_reg_params = _ir_fp_reg_params; 7557 int32_t used_stack, stack_offset = IR_SHADOW_ARGS; 7558 ir_copy *copies; 7559 bool do_pass3 = 0; 7560 /* For temporaries we may use any scratch registers except for registers used for parameters */ 7561 ir_reg tmp_fp_reg = IR_REG_FP_LAST; /* Temporary register for FP loads and swap */ 7562 7563 n = insn->inputs_count; 7564 if (n < 3) { 7565 return 0; 7566 } 7567 7568 if (tmp_reg == IR_REG_NONE) { 7569 tmp_reg = IR_REG_RAX; 7570 } 7571 7572#ifdef IR_HAVE_FASTCALL 7573 if (sizeof(void*) == 4 && ir_is_fastcall(ctx, insn)) { 7574 int_reg_params_count = IR_REG_INT_FCARGS; 7575 fp_reg_params_count = IR_REG_FP_FCARGS; 7576 int_reg_params = _ir_int_fc_reg_params; 7577 fp_reg_params = _ir_fp_fc_reg_params; 7578 } 7579#endif 7580 7581 if (insn->op == IR_CALL 7582 && (ctx->flags & IR_PREALLOCATED_STACK) 7583#ifdef IR_HAVE_FASTCALL 7584 && !ir_is_fastcall(ctx, insn) /* fast call functions restore stack pointer */ 7585#endif 7586 ) { 7587 // TODO: support for preallocated stack 7588 used_stack = 0; 7589 } else { 7590 used_stack = ir_call_used_stack(ctx, insn); 7591 if (IR_SHADOW_ARGS 7592 && insn->op == IR_TAILCALL 7593 && used_stack == IR_SHADOW_ARGS) { 7594 used_stack = 0; 7595 } 7596 if (ctx->fixed_call_stack_size 7597 && used_stack <= ctx->fixed_call_stack_size 7598#ifdef IR_HAVE_FASTCALL 7599 && !ir_is_fastcall(ctx, insn) /* fast call functions restore stack pointer */ 7600#endif 7601 ) { 7602 used_stack = 0; 7603 } else { 7604 /* Stack must be 16 byte aligned */ 7605 int32_t aligned_stack = IR_ALIGNED_SIZE(used_stack, 16); 7606 ctx->call_stack_size += aligned_stack; 7607 if (aligned_stack) { 7608 | sub Ra(IR_REG_RSP), aligned_stack 7609 } 7610 } 7611 } 7612 7613 /* 1. move all register arguments that should be passed through stack 7614 * and collect arguments that should be passed through registers */ 7615 copies = ir_mem_malloc((n - 2) * sizeof(ir_copy)); 7616 for (j = 3; j <= n; j++) { 7617 arg = ir_insn_op(insn, j); 7618 src_reg = ir_get_alocated_reg(ctx, def, j); 7619 arg_insn = &ctx->ir_base[arg]; 7620 type = arg_insn->type; 7621 if (IR_IS_TYPE_INT(type)) { 7622 if (int_param < int_reg_params_count) { 7623 dst_reg = int_reg_params[int_param]; 7624 } else { 7625 dst_reg = IR_REG_NONE; /* pass argument through stack */ 7626 } 7627 int_param++; 7628#ifdef _WIN64 7629 /* WIN64 calling convention use common couter for int and fp registers */ 7630 fp_param++; 7631#endif 7632 } else { 7633 IR_ASSERT(IR_IS_TYPE_FP(type)); 7634 if (fp_param < fp_reg_params_count) { 7635 dst_reg = fp_reg_params[fp_param]; 7636 } else { 7637 dst_reg = IR_REG_NONE; /* pass argument through stack */ 7638 } 7639 fp_param++; 7640#ifdef _WIN64 7641 /* WIN64 calling convention use common couter for int and fp registers */ 7642 int_param++; 7643#endif 7644 } 7645 if (dst_reg != IR_REG_NONE) { 7646 if (IR_IS_CONST_REF(arg) || src_reg == IR_REG_NONE) { 7647 /* delay CONST->REG and MEM->REG moves to third pass */ 7648 do_pass3 = 1; 7649 } else { 7650 if (IR_REG_SPILLED(src_reg)) { 7651 src_reg = IR_REG_NUM(src_reg); 7652 ir_emit_load(ctx, type, src_reg, arg); 7653 } 7654 if (src_reg != dst_reg) { 7655 /* delay REG->REG moves to second pass */ 7656 copies[count].type = type; 7657 copies[count].from = src_reg; 7658 copies[count].to = dst_reg; 7659 count++; 7660 } 7661 } 7662 } else { 7663 /* Pass register arguments to stack (REG->MEM moves) */ 7664 if (!IR_IS_CONST_REF(arg) && src_reg != IR_REG_NONE && !IR_REG_SPILLED(src_reg)) { 7665 ir_emit_store_mem(ctx, type, IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset), src_reg); 7666 } else { 7667 do_pass3 = 1; 7668 } 7669 stack_offset += IR_MAX(sizeof(void*), ir_type_size[type]); 7670 } 7671 } 7672 7673 /* 2. move all arguments that should be passed from one register to another (REG->REG movs) */ 7674 if (count) { 7675 ir_parallel_copy(ctx, copies, count, tmp_reg, tmp_fp_reg); 7676 } 7677 ir_mem_free(copies); 7678 7679 /* 3. move the remaining memory and immediate values */ 7680 if (do_pass3) { 7681 stack_offset = IR_SHADOW_ARGS; 7682 int_param = 0; 7683 fp_param = 0; 7684 for (j = 3; j <= n; j++) { 7685 arg = ir_insn_op(insn, j); 7686 src_reg = ir_get_alocated_reg(ctx, def, j); 7687 arg_insn = &ctx->ir_base[arg]; 7688 type = arg_insn->type; 7689 if (IR_IS_TYPE_INT(type)) { 7690 if (int_param < int_reg_params_count) { 7691 dst_reg = int_reg_params[int_param]; 7692 } else { 7693 dst_reg = IR_REG_NONE; /* argument already passed through stack */ 7694 } 7695 int_param++; 7696#ifdef _WIN64 7697 /* WIN64 calling convention use common couter for int and fp registers */ 7698 fp_param++; 7699#endif 7700 } else { 7701 IR_ASSERT(IR_IS_TYPE_FP(type)); 7702 if (fp_param < fp_reg_params_count) { 7703 dst_reg = fp_reg_params[fp_param]; 7704 } else { 7705 dst_reg = IR_REG_NONE; /* argument already passed through stack */ 7706 } 7707 fp_param++; 7708#ifdef _WIN64 7709 /* WIN64 calling convention use common couter for int and fp registers */ 7710 int_param++; 7711#endif 7712 } 7713 if (dst_reg != IR_REG_NONE) { 7714 if (IR_IS_CONST_REF(arg) || src_reg == IR_REG_NONE) { 7715 if (IR_IS_TYPE_INT(type)) { 7716 if (IR_IS_CONST_REF(arg)) { 7717 if (type == IR_I8 || type == IR_I16) { 7718 type = IR_I32; 7719 } else if (type == IR_U8 || type == IR_U16) { 7720 type = IR_U32; 7721 } 7722 ir_emit_load(ctx, type, dst_reg, arg); 7723 } else { 7724 ir_mem mem = ir_ref_spill_slot(ctx, arg); 7725 7726 if (ir_type_size[type] > 2) { 7727 ir_emit_load_mem_int(ctx, type, dst_reg, mem); 7728 } else if (ir_type_size[type] == 2) { 7729 if (type == IR_I16) { 7730 | ASM_TXT_TMEM_OP movsx, Rd(dst_reg), word, mem 7731 } else { 7732 | ASM_TXT_TMEM_OP movzx, Rd(dst_reg), word, mem 7733 } 7734 } else { 7735 IR_ASSERT(ir_type_size[type] == 1); 7736 if (type == IR_I8) { 7737 | ASM_TXT_TMEM_OP movsx, Rd(dst_reg), byte, mem 7738 } else { 7739 | ASM_TXT_TMEM_OP movzx, Rd(dst_reg), byte, mem 7740 } 7741 } 7742 } 7743 } else { 7744 ir_emit_load(ctx, type, dst_reg, arg); 7745 } 7746 } 7747 } else { 7748 ir_mem mem = IR_MEM_BO(IR_REG_STACK_POINTER, stack_offset); 7749 7750 if (IR_IS_TYPE_INT(type)) { 7751 if (IR_IS_CONST_REF(arg)) { 7752 ir_emit_store_mem_int_const(ctx, type, mem, arg, tmp_reg, 1); 7753 } else if (src_reg == IR_REG_NONE) { 7754 IR_ASSERT(tmp_reg != IR_REG_NONE); 7755 ir_emit_load(ctx, type, tmp_reg, arg); 7756 ir_emit_store_mem_int(ctx, type, mem, tmp_reg); 7757 } else if (IR_REG_SPILLED(src_reg)) { 7758 src_reg = IR_REG_NUM(src_reg); 7759 ir_emit_load(ctx, type, src_reg, arg); 7760 ir_emit_store_mem_int(ctx, type, mem, src_reg); 7761 } 7762 } else { 7763 if (IR_IS_CONST_REF(arg)) { 7764 ir_emit_store_mem_fp_const(ctx, type, mem, arg, tmp_reg, tmp_fp_reg); 7765 } else if (src_reg == IR_REG_NONE) { 7766 IR_ASSERT(tmp_fp_reg != IR_REG_NONE); 7767 ir_emit_load(ctx, type, tmp_fp_reg, arg); 7768 ir_emit_store_mem_fp(ctx, IR_DOUBLE, mem, tmp_fp_reg); 7769 } else if (IR_REG_SPILLED(src_reg)) { 7770 src_reg = IR_REG_NUM(src_reg); 7771 ir_emit_load(ctx, type, src_reg, arg); 7772 ir_emit_store_mem_fp(ctx, type, mem, src_reg); 7773 } 7774 } 7775 stack_offset += IR_MAX(sizeof(void*), ir_type_size[type]); 7776 } 7777 } 7778 } 7779 7780#ifdef _WIN64 7781 /* WIN64 calling convention requires duplcation of parameters passed in FP register into GP ones */ 7782 if (ir_is_vararg(ctx, insn)) { 7783 n = IR_MIN(n, IR_MAX_REG_ARGS + 2); 7784 for (j = 3; j <= n; j++) { 7785 arg = ir_insn_op(insn, j); 7786 arg_insn = &ctx->ir_base[arg]; 7787 type = arg_insn->type; 7788 if (IR_IS_TYPE_FP(type)) { 7789 src_reg = fp_reg_params[j-3]; 7790 dst_reg = int_reg_params[j-3]; 7791|.if X64 7792 if (ctx->mflags & IR_X86_AVX) { 7793 | vmovd Rq(dst_reg), xmm(src_reg-IR_REG_FP_FIRST) 7794 } else { 7795 | movd Rq(dst_reg), xmm(src_reg-IR_REG_FP_FIRST) 7796 } 7797|.endif 7798 } 7799 } 7800 } 7801#endif 7802#ifdef IR_REG_VARARG_FP_REGS 7803 /* set hidden argument to specify the number of vector registers used */ 7804 if (ir_is_vararg(ctx, insn)) { 7805 fp_param = IR_MIN(fp_param, fp_reg_params_count); 7806 | mov Rd(IR_REG_VARARG_FP_REGS), fp_param 7807 } 7808#endif 7809 7810 return used_stack; 7811} 7812 7813static void ir_emit_call_ex(ir_ctx *ctx, ir_ref def, ir_insn *insn, int32_t used_stack) 7814{ 7815 ir_backend_data *data = ctx->data; 7816 dasm_State **Dst = &data->dasm_state; 7817 ir_reg def_reg; 7818 7819 if (IR_IS_CONST_REF(insn->op2)) { 7820 void *addr = ir_call_addr(ctx, insn, &ctx->ir_base[insn->op2]); 7821 7822 if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(ctx->code_buffer, addr)) { 7823 | call aword &addr 7824 } else { 7825|.if X64 7826|| ir_reg tmp_reg = IR_REG_RAX; 7827 7828#ifdef IR_REG_VARARG_FP_REGS 7829|| if (ir_is_vararg(ctx, insn)) { 7830|| tmp_reg = IR_REG_R11; 7831|| } 7832#endif 7833|| if (IR_IS_SIGNED_32BIT(addr)) { 7834 | mov Rq(tmp_reg), ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 <imm-32-bit> 7835|| } else { 7836 | mov64 Rq(tmp_reg), ((ptrdiff_t)addr) // 0x48 0xb8 <imm-64-bit> 7837|| } 7838 | call Rq(tmp_reg) 7839|.endif 7840 } 7841 } else { 7842 ir_reg op2_reg = ctx->regs[def][2]; 7843 7844 if (op2_reg != IR_REG_NONE) { 7845 if (IR_REG_SPILLED(op2_reg)) { 7846 op2_reg = IR_REG_NUM(op2_reg); 7847 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 7848 } 7849 | call Ra(op2_reg) 7850 } else { 7851 ir_mem mem; 7852 7853 if (ir_rule(ctx, insn->op2) & IR_FUSED) { 7854 mem = ir_fuse_load(ctx, def, insn->op2); 7855 } else { 7856 mem = ir_ref_spill_slot(ctx, insn->op2); 7857 } 7858 7859 | ASM_TMEM_OP call, aword, mem 7860 } 7861 } 7862 7863 if (used_stack) { 7864 int32_t aligned_stack = IR_ALIGNED_SIZE(used_stack, 16); 7865 7866 ctx->call_stack_size -= aligned_stack; 7867 if (ir_is_fastcall(ctx, insn)) { 7868 aligned_stack -= used_stack; 7869 if (aligned_stack) { 7870 | add Ra(IR_REG_RSP), aligned_stack 7871 } 7872 } else { 7873 | add Ra(IR_REG_RSP), aligned_stack 7874 } 7875 } 7876 7877 if (insn->type != IR_VOID) { 7878 if (IR_IS_TYPE_INT(insn->type)) { 7879 def_reg = IR_REG_NUM(ctx->regs[def][0]); 7880 if (def_reg != IR_REG_NONE) { 7881 if (def_reg != IR_REG_INT_RET1) { 7882 ir_emit_mov(ctx, insn->type, def_reg, IR_REG_INT_RET1); 7883 } 7884 if (IR_REG_SPILLED(ctx->regs[def][0])) { 7885 ir_emit_store(ctx, insn->type, def, def_reg); 7886 } 7887 } else if (ctx->use_lists[def].count > 1) { 7888 ir_emit_store(ctx, insn->type, def, IR_REG_INT_RET1); 7889 } 7890 } else { 7891 IR_ASSERT(IR_IS_TYPE_FP(insn->type)); 7892 def_reg = IR_REG_NUM(ctx->regs[def][0]); 7893#ifdef IR_REG_FP_RET1 7894 if (def_reg != IR_REG_NONE) { 7895 if (def_reg != IR_REG_FP_RET1) { 7896 ir_emit_fp_mov(ctx, insn->type, def_reg, IR_REG_FP_RET1); 7897 } 7898 if (IR_REG_SPILLED(ctx->regs[def][0])) { 7899 ir_emit_store(ctx, insn->type, def, def_reg); 7900 } 7901 } else if (ctx->use_lists[def].count > 1) { 7902 ir_emit_store(ctx, insn->type, def, IR_REG_FP_RET1); 7903 } 7904#else 7905 if (ctx->use_lists[def].count > 1) { 7906 int32_t offset; 7907 ir_reg fp; 7908 7909 if (def_reg == IR_REG_NONE) { 7910 offset = ir_ref_spill_slot_offset(ctx, def, &fp); 7911 if (insn->type == IR_DOUBLE) { 7912 | fstp qword [Ra(fp)+offset] 7913 } else { 7914 IR_ASSERT(insn->type == IR_FLOAT); 7915 | fstp dword [Ra(fp)+offset] 7916 } 7917 } else { 7918 offset = ctx->ret_slot; 7919 IR_ASSERT(offset != -1); 7920 offset = IR_SPILL_POS_TO_OFFSET(offset); 7921 fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 7922 if (insn->type == IR_DOUBLE) { 7923 | fstp qword [Ra(fp)+offset] 7924 } else { 7925 IR_ASSERT(insn->type == IR_FLOAT); 7926 | fstp dword [Ra(fp)+offset] 7927 } 7928 ir_emit_load_mem_fp(ctx, insn->type, def_reg, IR_MEM_BO(fp, offset)); 7929 if (IR_REG_SPILLED(ctx->regs[def][0])) { 7930 ir_emit_store(ctx, insn->type, def, def_reg); 7931 } 7932 } 7933 } 7934#endif 7935 } 7936 } 7937} 7938 7939static void ir_emit_call(ir_ctx *ctx, ir_ref def, ir_insn *insn) 7940{ 7941 int32_t used_stack = ir_emit_arguments(ctx, def, insn, ctx->regs[def][1]); 7942 ir_emit_call_ex(ctx, def, insn, used_stack); 7943} 7944 7945static void ir_emit_tailcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) 7946{ 7947 ir_backend_data *data = ctx->data; 7948 dasm_State **Dst = &data->dasm_state; 7949 int32_t used_stack = ir_emit_arguments(ctx, def, insn, ctx->regs[def][1]); 7950 7951 if (used_stack != 0) { 7952 ir_emit_call_ex(ctx, def, insn, used_stack); 7953 ir_emit_return_void(ctx); 7954 return; 7955 } 7956 7957 ir_emit_epilogue(ctx); 7958 7959 if (IR_IS_CONST_REF(insn->op2)) { 7960 void *addr = ir_call_addr(ctx, insn, &ctx->ir_base[insn->op2]); 7961 7962 if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(ctx->code_buffer, addr)) { 7963 | jmp aword &addr 7964 } else { 7965|.if X64 7966|| ir_reg tmp_reg = IR_REG_RAX; 7967 7968#ifdef IR_REG_VARARG_FP_REGS 7969|| if (ir_is_vararg(ctx, insn)) { 7970|| tmp_reg = IR_REG_R11; 7971|| } 7972#endif 7973|| if (IR_IS_SIGNED_32BIT(addr)) { 7974 | mov Rq(tmp_reg), ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 <imm-32-bit> 7975|| } else { 7976 | mov64 Rq(tmp_reg), ((ptrdiff_t)addr) // 0x48 0xb8 <imm-64-bit> 7977|| } 7978 | jmp Rq(tmp_reg) 7979|.endif 7980 } 7981 } else { 7982 ir_reg op2_reg = ctx->regs[def][2]; 7983 7984 if (op2_reg != IR_REG_NONE) { 7985 if (IR_REG_SPILLED(op2_reg)) { 7986 op2_reg = IR_REG_NUM(op2_reg); 7987 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 7988 } 7989 | jmp Ra(op2_reg) 7990 } else { 7991 ir_mem mem; 7992 7993 if (ir_rule(ctx, insn->op2) & IR_FUSED) { 7994 mem = ir_fuse_load(ctx, def, insn->op2); 7995 } else { 7996 mem = ir_ref_spill_slot(ctx, insn->op2); 7997 } 7998 | ASM_TMEM_OP jmp, aword, mem 7999 } 8000 } 8001} 8002 8003static void ir_emit_ijmp(ir_ctx *ctx, ir_ref def, ir_insn *insn) 8004{ 8005 ir_backend_data *data = ctx->data; 8006 dasm_State **Dst = &data->dasm_state; 8007 ir_reg op2_reg = ctx->regs[def][2]; 8008 8009 if (IR_IS_CONST_REF(insn->op2)) { 8010 void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op2]); 8011 8012 if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(ctx->code_buffer, addr)) { 8013 | jmp aword &addr 8014 } else { 8015|.if X64 8016 if (IR_IS_SIGNED_32BIT(addr)) { 8017 | mov rax, ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 <imm-32-bit> 8018 } else { 8019 | mov64 rax, ((ptrdiff_t)addr) // 0x48 0xb8 <imm-64-bit> 8020 } 8021 | jmp rax 8022|.endif 8023 } 8024 } else if (ir_rule(ctx, insn->op2) & IR_FUSED) { 8025 ir_mem mem = ir_fuse_load(ctx, def, insn->op2); 8026 | ASM_TMEM_OP jmp, aword, mem 8027 } else if (op2_reg != IR_REG_NONE) { 8028 if (IR_REG_SPILLED(op2_reg)) { 8029 op2_reg = IR_REG_NUM(op2_reg); 8030 ir_emit_load(ctx, IR_ADDR, op2_reg, insn->op2); 8031 } 8032 | jmp Ra(op2_reg) 8033 } else { 8034 ir_mem mem = ir_ref_spill_slot(ctx, insn->op2); 8035 8036 | ASM_TMEM_OP jmp, aword, mem 8037 } 8038} 8039 8040static bool ir_emit_guard_jcc(ir_ctx *ctx, uint32_t b, ir_ref def, uint8_t op, void *addr, bool int_cmp) 8041{ 8042 ir_backend_data *data = ctx->data; 8043 dasm_State **Dst = &data->dasm_state; 8044 ir_insn *next_insn = &ctx->ir_base[def + 1]; 8045 8046 if (next_insn->op == IR_END || next_insn->op == IR_LOOP_END) { 8047 ir_block *bb = &ctx->cfg_blocks[b]; 8048 uint32_t target; 8049 8050 if (!(bb->flags & IR_BB_DESSA_MOVES)) { 8051 target = ctx->cfg_edges[bb->successors]; 8052 if (UNEXPECTED(bb->successors_count == 2)) { 8053 if (ctx->cfg_blocks[target].flags & IR_BB_ENTRY) { 8054 target = ctx->cfg_edges[bb->successors + 1]; 8055 } else { 8056 IR_ASSERT(ctx->cfg_blocks[ctx->cfg_edges[bb->successors + 1]].flags & IR_BB_ENTRY); 8057 } 8058 } else { 8059 IR_ASSERT(bb->successors_count == 1); 8060 } 8061 target = ir_skip_empty_target_blocks(ctx, target); 8062 if (b == ctx->cfg_blocks_count || target != ir_skip_empty_next_blocks(ctx, b + 1)) { 8063 if (int_cmp) { 8064 switch (op) { 8065 default: 8066 IR_ASSERT(0 && "NIY binary op"); 8067 case IR_EQ: 8068 | jne =>target 8069 break; 8070 case IR_NE: 8071 | je =>target 8072 break; 8073 case IR_LT: 8074 | jge =>target 8075 break; 8076 case IR_GE: 8077 | jl =>target 8078 break; 8079 case IR_LE: 8080 | jg =>target 8081 break; 8082 case IR_GT: 8083 | jle =>target 8084 break; 8085 case IR_ULT: 8086 | jae =>target 8087 break; 8088 case IR_UGE: 8089 | jb =>target 8090 break; 8091 case IR_ULE: 8092 | ja =>target 8093 break; 8094 case IR_UGT: 8095 | jbe =>target 8096 break; 8097 } 8098 } else { 8099 switch (op) { 8100 default: 8101 IR_ASSERT(0 && "NIY binary op"); 8102 case IR_EQ: 8103 | jne =>target 8104 | jp =>target 8105 break; 8106 case IR_NE: 8107 | jp &addr 8108 | je =>target 8109 break; 8110 case IR_LT: 8111 | jae =>target 8112 break; 8113 case IR_GE: 8114 | jp &addr 8115 | jb =>target 8116 break; 8117 case IR_LE: 8118 | ja =>target 8119 break; 8120 case IR_GT: 8121 | jp &addr 8122 | jbe =>target 8123 break; 8124 } 8125 } 8126 | jmp &addr 8127 return 1; 8128 } 8129 } 8130 } else if (next_insn->op == IR_IJMP && IR_IS_CONST_REF(next_insn->op2)) { 8131 void *target_addr = ir_jmp_addr(ctx, next_insn, &ctx->ir_base[next_insn->op2]); 8132 8133 if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(ctx->code_buffer, target_addr)) { 8134 if (int_cmp) { 8135 switch (op) { 8136 default: 8137 IR_ASSERT(0 && "NIY binary op"); 8138 case IR_EQ: 8139 | jne &target_addr 8140 break; 8141 case IR_NE: 8142 | je &target_addr 8143 break; 8144 case IR_LT: 8145 | jge &target_addr 8146 break; 8147 case IR_GE: 8148 | jl &target_addr 8149 break; 8150 case IR_LE: 8151 | jg &target_addr 8152 break; 8153 case IR_GT: 8154 | jle &target_addr 8155 break; 8156 case IR_ULT: 8157 | jae &target_addr 8158 break; 8159 case IR_UGE: 8160 | jb &target_addr 8161 break; 8162 case IR_ULE: 8163 | ja &target_addr 8164 break; 8165 case IR_UGT: 8166 | jbe &target_addr 8167 break; 8168 } 8169 } else { 8170 switch (op) { 8171 default: 8172 IR_ASSERT(0 && "NIY binary op"); 8173 case IR_EQ: 8174 | jne &target_addr 8175 | jp &target_addr 8176 break; 8177 case IR_NE: 8178 | jp &addr 8179 | je &target_addr 8180 break; 8181 case IR_LT: 8182 | jae &target_addr 8183 break; 8184 case IR_GE: 8185 | jp &addr 8186 | jb &target_addr 8187 break; 8188 case IR_LE: 8189 | ja &target_addr 8190 break; 8191 case IR_GT: 8192 | jp &addr 8193 | jbe &target_addr 8194 break; 8195 } 8196 } 8197 | jmp &addr 8198 return 1; 8199 } 8200 } 8201 8202 if (int_cmp) { 8203 switch (op) { 8204 default: 8205 IR_ASSERT(0 && "NIY binary op"); 8206 case IR_EQ: 8207 | je &addr 8208 break; 8209 case IR_NE: 8210 | jne &addr 8211 break; 8212 case IR_LT: 8213 | jl &addr 8214 break; 8215 case IR_GE: 8216 | jge &addr 8217 break; 8218 case IR_LE: 8219 | jle &addr 8220 break; 8221 case IR_GT: 8222 | jg &addr 8223 break; 8224 case IR_ULT: 8225 | jb &addr 8226 break; 8227 case IR_UGE: 8228 | jae &addr 8229 break; 8230 case IR_ULE: 8231 | jbe &addr 8232 break; 8233 case IR_UGT: 8234 | ja &addr 8235 break; 8236 } 8237 } else { 8238 switch (op) { 8239 default: 8240 IR_ASSERT(0 && "NIY binary op"); 8241 case IR_EQ: 8242 | jp >1 8243 | je &addr 8244 |1: 8245 break; 8246 case IR_NE: 8247 | jne &addr 8248 | jp &addr 8249 break; 8250 case IR_LT: 8251 | jp >1 8252 | jb &addr 8253 |1: 8254 break; 8255 case IR_GE: 8256 | jae &addr 8257 break; 8258 case IR_LE: 8259 | jp >1 8260 | jbe &addr 8261 |1: 8262 break; 8263 case IR_GT: 8264 | ja &addr 8265 break; 8266// case IR_ULT: fprintf(stderr, "\tjb .LL%d\n", true_block); break; 8267// case IR_UGE: fprintf(stderr, "\tjae .LL%d\n", true_block); break; 8268// case IR_ULE: fprintf(stderr, "\tjbe .LL%d\n", true_block); break; 8269// case IR_UGT: fprintf(stderr, "\tja .LL%d\n", true_block); break; 8270 } 8271 } 8272 return 0; 8273} 8274 8275static bool ir_emit_guard(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) 8276{ 8277 ir_backend_data *data = ctx->data; 8278 dasm_State **Dst = &data->dasm_state; 8279 ir_reg op2_reg = ctx->regs[def][2]; 8280 ir_type type = ctx->ir_base[insn->op2].type; 8281 void *addr; 8282 8283 IR_ASSERT(IR_IS_TYPE_INT(type)); 8284 if (IR_IS_CONST_REF(insn->op2)) { 8285 bool is_true = ir_ref_is_true(ctx, insn->op2); 8286 8287 if ((insn->op == IR_GUARD && !is_true) || (insn->op == IR_GUARD_NOT && is_true)) { 8288 addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); 8289 if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(ctx->code_buffer, addr)) { 8290 | jmp aword &addr 8291 } else { 8292|.if X64 8293 if (IR_IS_SIGNED_32BIT(addr)) { 8294 | mov rax, ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 <imm-32-bit> 8295 } else { 8296 | mov64 rax, ((ptrdiff_t)addr) // 0x48 0xb8 <imm-64-bit> 8297 } 8298 | jmp aword [rax] 8299|.endif 8300 } 8301 } 8302 return 0; 8303 } 8304 8305 if (op2_reg != IR_REG_NONE) { 8306 if (IR_REG_SPILLED(op2_reg)) { 8307 op2_reg = IR_REG_NUM(op2_reg); 8308 ir_emit_load(ctx, type, op2_reg, insn->op2); 8309 } 8310 | ASM_REG_REG_OP test, type, op2_reg, op2_reg 8311 } else { 8312 ir_mem mem; 8313 8314 if (ir_rule(ctx, insn->op2) & IR_FUSED) { 8315 mem = ir_fuse_load(ctx, def, insn->op2); 8316 } else { 8317 mem = ir_ref_spill_slot(ctx, insn->op2); 8318 } 8319 | ASM_MEM_IMM_OP cmp, type, mem, 0 8320 } 8321 8322 addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); 8323 if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(ctx->code_buffer, addr)) { 8324 ir_op op; 8325 8326 if (insn->op == IR_GUARD) { 8327 op = IR_EQ; 8328 } else { 8329 op = IR_NE; 8330 } 8331 return ir_emit_guard_jcc(ctx, b, def, op, addr, 1); 8332 } else { 8333|.if X64 8334 if (insn->op == IR_GUARD) { 8335 | je >1 8336 } else { 8337 | jne >1 8338 } 8339 |.cold_code 8340 |1: 8341 if (IR_IS_SIGNED_32BIT(addr)) { 8342 | mov rax, ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 <imm-32-bit> 8343 } else { 8344 | mov64 rax, ((ptrdiff_t)addr) // 0x48 0xb8 <imm-64-bit> 8345 } 8346 | jmp aword [rax] 8347 |.code 8348|.endif 8349 return 0; 8350 } 8351} 8352 8353static bool ir_emit_guard_cmp_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) 8354{ 8355 ir_backend_data *data = ctx->data; 8356 dasm_State **Dst = &data->dasm_state; 8357 ir_insn *cmp_insn = &ctx->ir_base[insn->op2]; 8358 ir_op op = cmp_insn->op; 8359 ir_type type = ctx->ir_base[cmp_insn->op1].type; 8360 ir_ref op1 = cmp_insn->op1; 8361 ir_ref op2 = cmp_insn->op2; 8362 ir_reg op1_reg = ctx->regs[insn->op2][1]; 8363 ir_reg op2_reg = ctx->regs[insn->op2][2]; 8364 void *addr; 8365 8366 if (op1_reg != IR_REG_NONE && (IR_IS_CONST_REF(op1) || IR_REG_SPILLED(op1_reg))) { 8367 op1_reg = IR_REG_NUM(op1_reg); 8368 ir_emit_load(ctx, type, op1_reg, op1); 8369 } 8370 if (op2_reg != IR_REG_NONE && (IR_IS_CONST_REF(op2) || IR_REG_SPILLED(op2_reg))) { 8371 op2_reg = IR_REG_NUM(op2_reg); 8372 if (op1 != op2) { 8373 ir_emit_load(ctx, type, op2_reg, op2); 8374 } 8375 } 8376 8377 addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); 8378 if (IR_IS_CONST_REF(op2) && !IR_IS_SYM_CONST(ctx->ir_base[op2].op) && ctx->ir_base[op2].val.u64 == 0) { 8379 if (op == IR_ULT) { 8380 /* always false */ 8381 if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(ctx->code_buffer, addr)) { 8382 | jmp aword &addr 8383 } else { 8384|.if X64 8385 if (IR_IS_SIGNED_32BIT(addr)) { 8386 | mov rax, ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 <imm-32-bit> 8387 } else { 8388 | mov64 rax, ((ptrdiff_t)addr) // 0x48 0xb8 <imm-64-bit> 8389 } 8390 | jmp aword [rax] 8391|.endif 8392 } 8393 return 0; 8394 } else if (op == IR_UGE) { 8395 /* always true */ 8396 return 0; 8397 } else if (op == IR_ULE) { 8398 op = IR_EQ; 8399 } else if (op == IR_UGT) { 8400 op = IR_NE; 8401 } 8402 } 8403 ir_emit_cmp_int_common(ctx, type, def, cmp_insn, op1_reg, op1, op2_reg, op2); 8404 8405 if (insn->op == IR_GUARD) { 8406 op ^= 1; // reverse 8407 } 8408 8409 return ir_emit_guard_jcc(ctx, b, def, op, addr, 1); 8410} 8411 8412static bool ir_emit_guard_cmp_fp(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) 8413{ 8414 ir_op op = ir_emit_cmp_fp_common(ctx, def, insn->op2, &ctx->ir_base[insn->op2]); 8415 void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); 8416 8417 if (insn->op == IR_GUARD) { 8418 op ^= 1; // reverse 8419 } 8420 return ir_emit_guard_jcc(ctx, b, def, op, addr, 0); 8421} 8422 8423static bool ir_emit_guard_test_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) 8424{ 8425 void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); 8426 ir_op op = (insn->op == IR_GUARD) ? IR_EQ : IR_NE; 8427 8428 ir_emit_test_int_common(ctx, def, insn->op2, op); 8429 return ir_emit_guard_jcc(ctx, b, def, op, addr, 1); 8430} 8431 8432static bool ir_emit_guard_jcc_int(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) 8433{ 8434 void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); 8435 ir_op op = ctx->ir_base[insn->op2].op; 8436 8437 if (insn->op == IR_GUARD) { 8438 op ^= 1; // reverse 8439 } 8440 return ir_emit_guard_jcc(ctx, b, def, op, addr, 1); 8441} 8442 8443static bool ir_emit_guard_overflow(ir_ctx *ctx, uint32_t b, ir_ref def, ir_insn *insn) 8444{ 8445 ir_backend_data *data = ctx->data; 8446 dasm_State **Dst = &data->dasm_state; 8447 ir_type type; 8448 void *addr = ir_jmp_addr(ctx, insn, &ctx->ir_base[insn->op3]); 8449 8450 type = ctx->ir_base[ctx->ir_base[insn->op2].op1].type; 8451 8452 IR_ASSERT(IR_IS_TYPE_INT(type)); 8453 if (IR_IS_TYPE_SIGNED(type)) { 8454 if (insn->op == IR_GUARD) { 8455 | jno &addr 8456 } else { 8457 | jo &addr 8458 } 8459 } else { 8460 if (insn->op == IR_GUARD) { 8461 | jnc &addr 8462 } else { 8463 | jc &addr 8464 } 8465 } 8466 return 0; 8467} 8468 8469static void ir_emit_lea(ir_ctx *ctx, ir_ref def, ir_type type) 8470{ 8471 ir_backend_data *data = ctx->data; 8472 dasm_State **Dst = &data->dasm_state; 8473 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 8474 ir_mem mem = ir_fuse_addr(ctx, def, def); 8475 8476 IR_ASSERT(def_reg != IR_REG_NONE); 8477 if (ir_type_size[type] == 4) { 8478 if (IR_MEM_BASE(mem) == def_reg 8479 && IR_MEM_OFFSET(mem) == 0 8480 && IR_MEM_SCALE(mem) == 1 8481 && IR_MEM_INDEX(mem) != IR_REG_NONE) { 8482 ir_reg reg = IR_MEM_INDEX(mem); 8483 | add Rd(def_reg), Rd(reg) 8484 } else if (IR_MEM_INDEX(mem) == def_reg 8485 && IR_MEM_OFFSET(mem) == 0 8486 && IR_MEM_SCALE(mem) == 1 8487 && IR_MEM_BASE(mem) != IR_REG_NONE) { 8488 ir_reg reg = IR_MEM_BASE(mem); 8489 | add Rd(def_reg), Rd(reg) 8490 } else { 8491 | ASM_TXT_TMEM_OP lea, Rd(def_reg), dword, mem 8492 } 8493 } else { 8494 if (IR_MEM_BASE(mem) == def_reg 8495 && IR_MEM_OFFSET(mem) == 0 8496 && IR_MEM_SCALE(mem) == 1 8497 && IR_MEM_INDEX(mem) != IR_REG_NONE) { 8498 ir_reg reg = IR_MEM_INDEX(mem); 8499 | add Ra(def_reg), Ra(reg) 8500 } else if (IR_MEM_INDEX(mem) == def_reg 8501 && IR_MEM_OFFSET(mem) == 0 8502 && IR_MEM_SCALE(mem) == 1 8503 && IR_MEM_BASE(mem) != IR_REG_NONE) { 8504 ir_reg reg = IR_MEM_BASE(mem); 8505 | add Ra(def_reg), Ra(reg) 8506 } else { 8507 | ASM_TXT_TMEM_OP lea, Ra(def_reg), aword, mem 8508 } 8509 } 8510 if (IR_REG_SPILLED(ctx->regs[def][0])) { 8511 ir_emit_store(ctx, type, def, def_reg); 8512 } 8513} 8514 8515static void ir_emit_tls(ir_ctx *ctx, ir_ref def, ir_insn *insn) 8516{ 8517 ir_backend_data *data = ctx->data; 8518 dasm_State **Dst = &data->dasm_state; 8519 ir_reg reg = IR_REG_NUM(ctx->regs[def][0]); 8520 8521 if (ctx->use_lists[def].count == 1) { 8522 /* dead load */ 8523 return; 8524 } 8525 8526|.if X64WIN 8527| gs 8528| mov Ra(reg), aword [0x58] 8529| mov Ra(reg), aword [Ra(reg)+insn->op2] 8530| mov Ra(reg), aword [Ra(reg)+insn->op3] 8531|.elif WIN 8532| fs 8533| mov Ra(reg), aword [0x2c] 8534| mov Ra(reg), aword [Ra(reg)+insn->op2] 8535| mov Ra(reg), aword [Ra(reg)+insn->op3] 8536|.elif X64APPLE 8537| gs 8538|| if (insn->op3 == IR_NULL) { 8539| mov Ra(reg), aword [insn->op2] 8540|| } else { 8541| mov Ra(reg), aword [insn->op2] 8542| mov Ra(reg), aword [Ra(reg)+insn->op3] 8543|| } 8544|.elif X64 8545| fs 8546|| if (insn->op3 == IR_NULL) { 8547| mov Ra(reg), aword [insn->op2] 8548|| } else { 8549| mov Ra(reg), [0x8] 8550| mov Ra(reg), aword [Ra(reg)+insn->op2] 8551| mov Ra(reg), aword [Ra(reg)+insn->op3] 8552|| } 8553|.else 8554| gs 8555|| if (insn->op3 == IR_NULL) { 8556| mov Ra(reg), aword [insn->op2] 8557|| } else { 8558| mov Ra(reg), [0x4] 8559| mov Ra(reg), aword [Ra(reg)+insn->op2] 8560| mov Ra(reg), aword [Ra(reg)+insn->op3] 8561|| } 8562| .endif 8563 if (IR_REG_SPILLED(ctx->regs[def][0])) { 8564 ir_emit_store(ctx, IR_ADDR, def, reg); 8565 } 8566} 8567 8568static void ir_emit_exitcall(ir_ctx *ctx, ir_ref def, ir_insn *insn) 8569{ 8570 ir_backend_data *data = ctx->data; 8571 dasm_State **Dst = &data->dasm_state; 8572 ir_reg def_reg = IR_REG_NUM(ctx->regs[def][0]); 8573 8574 IR_ASSERT(def_reg != IR_REG_NONE); 8575 8576 |.if X64 8577 | sub rsp, 16*8+16*8+8 /* CPU regs + SSE regs */ 8578 | mov aword [rsp+0*8], rax 8579 | mov aword [rsp+1*8], rcx 8580 | mov aword [rsp+2*8], rdx 8581 | mov aword [rsp+3*8], rbx 8582 | mov aword [rsp+5*8], rbp 8583 | mov aword [rsp+6*8], rsi 8584 | mov aword [rsp+7*8], rdi 8585 | mov aword [rsp+8*8], r8 8586 | mov aword [rsp+9*8], r9 8587 | mov aword [rsp+10*8], r10 8588 | mov aword [rsp+11*8], r11 8589 | mov aword [rsp+12*8], r12 8590 | mov aword [rsp+13*8], r13 8591 | mov aword [rsp+14*8], r14 8592 | mov aword [rsp+15*8], r15 8593 | movsd qword [rsp+16*8+0*8], xmm0 8594 | movsd qword [rsp+16*8+1*8], xmm1 8595 | movsd qword [rsp+16*8+2*8], xmm2 8596 | movsd qword [rsp+16*8+3*8], xmm3 8597 | movsd qword [rsp+16*8+4*8], xmm4 8598 | movsd qword [rsp+16*8+5*8], xmm5 8599 | movsd qword [rsp+16*8+6*8], xmm6 8600 | movsd qword [rsp+16*8+7*8], xmm7 8601 | movsd qword [rsp+16*8+8*8], xmm8 8602 | movsd qword [rsp+16*8+9*8], xmm9 8603 | movsd qword [rsp+16*8+10*8], xmm10 8604 | movsd qword [rsp+16*8+11*8], xmm11 8605 | movsd qword [rsp+16*8+12*8], xmm12 8606 | movsd qword [rsp+16*8+13*8], xmm13 8607 | movsd qword [rsp+16*8+14*8], xmm14 8608 | movsd qword [rsp+16*8+15*8], xmm15 8609 | 8610 | mov Ra(IR_REG_INT_ARG2), rsp 8611 | lea Ra(IR_REG_INT_ARG1), [rsp+16*8+16*8+16] 8612 | mov aword [rsp+4*8], Ra(IR_REG_INT_ARG1) 8613 | mov Ra(IR_REG_INT_ARG1), [rsp+16*8+16*8+8] 8614 |.if X64WIN 8615 | sub rsp, 32 /* shadow space */ 8616 |.endif 8617 |.else 8618 | sub esp, 8*4+8*8+12 /* CPU regs + SSE regs */ 8619 | mov aword [esp+0*4], eax 8620 | mov aword [esp+1*4], ecx 8621 | mov aword [esp+2*4], edx 8622 | mov aword [esp+3*4], ebx 8623 | mov aword [esp+5*4], ebp 8624 | mov aword [esp+6*4], esi 8625 | mov aword [esp+7*4], edi 8626 | movsd qword [esp+8*4+0*8], xmm0 8627 | movsd qword [esp+8*4+1*8], xmm1 8628 | movsd qword [esp+8*4+2*8], xmm2 8629 | movsd qword [esp+8*4+3*8], xmm3 8630 | movsd qword [esp+8*4+4*8], xmm4 8631 | movsd qword [esp+8*4+5*8], xmm5 8632 | movsd qword [esp+8*4+6*8], xmm6 8633 | movsd qword [esp+8*4+7*8], xmm7 8634 | 8635 | mov Ra(IR_REG_INT_FCARG2), esp 8636 | lea Ra(IR_REG_INT_FCARG1), [esp+8*4+8*8+16] 8637 | mov aword [esp+4*4], Ra(IR_REG_INT_FCARG1) 8638 | mov Ra(IR_REG_INT_FCARG1), [esp+8*4+8*8+12] 8639 |.endif 8640 8641 if (IR_IS_CONST_REF(insn->op2)) { 8642 void *addr = ir_call_addr(ctx, insn, &ctx->ir_base[insn->op2]); 8643 8644 if (sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(ctx->code_buffer, addr)) { 8645 | call aword &addr 8646 } else { 8647|.if X64 8648 if (IR_IS_SIGNED_32BIT(addr)) { 8649 | mov rax, ((ptrdiff_t)addr) // 0x48 0xc7 0xc0 <imm-32-bit> 8650 } else { 8651 | mov64 rax, ((ptrdiff_t)addr) // 0x48 0xb8 <imm-64-bit> 8652 } 8653 | call rax 8654|.endif 8655 } 8656 } else { 8657 IR_ASSERT(0); 8658 } 8659 8660 // restore SP 8661 |.if X64WIN 8662 | add rsp, 32+16*8+16*8+16 /* shadow space + CPU regs + SSE regs */ 8663 |.elif X64 8664 | add rsp, 16*8+16*8+16 /* CPU regs + SSE regs */ 8665 |.else 8666 | add esp, 8*4+8*8+16 /* CPU regs + SSE regs */ 8667 |.endif 8668 8669 if (def_reg != IR_REG_INT_RET1) { 8670 ir_emit_mov(ctx, insn->type, def_reg, IR_REG_INT_RET1); 8671 } 8672 if (IR_REG_SPILLED(ctx->regs[def][0])) { 8673 ir_emit_store(ctx, insn->type, def, def_reg); 8674 } 8675} 8676 8677static void ir_emit_param_move(ir_ctx *ctx, uint8_t type, ir_reg from_reg, ir_reg to_reg, ir_ref to, int32_t offset) 8678{ 8679 ir_reg fp = (ctx->flags & IR_USE_FRAME_POINTER) ? IR_REG_FRAME_POINTER : IR_REG_STACK_POINTER; 8680 8681 IR_ASSERT(from_reg != IR_REG_NONE || to_reg != IR_REG_NONE); 8682 8683 if (IR_IS_TYPE_INT(type)) { 8684 if (from_reg != IR_REG_NONE) { 8685 if (to_reg != IR_REG_NONE) { 8686 ir_emit_mov(ctx, type, to_reg, from_reg); 8687 } else { 8688 ir_emit_store(ctx, type, to, from_reg); 8689 } 8690 } else { 8691 ir_emit_load_mem_int(ctx, type, to_reg, IR_MEM_BO(fp, offset)); 8692 } 8693 } else { 8694 if (from_reg != IR_REG_NONE) { 8695 if (to_reg != IR_REG_NONE) { 8696 ir_emit_fp_mov(ctx, type, to_reg, from_reg); 8697 } else { 8698 ir_emit_store(ctx, type, to, from_reg); 8699 } 8700 } else { 8701 ir_emit_load_mem_fp(ctx, type, to_reg, IR_MEM_BO(fp, offset)); 8702 } 8703 } 8704} 8705 8706static void ir_emit_load_params(ir_ctx *ctx) 8707{ 8708 ir_use_list *use_list = &ctx->use_lists[1]; 8709 ir_insn *insn; 8710 ir_ref i, n, *p, use; 8711 int int_param_num = 0; 8712 int fp_param_num = 0; 8713 ir_reg src_reg; 8714 ir_reg dst_reg; 8715 // TODO: Calling convention specific 8716 int int_reg_params_count = IR_REG_INT_ARGS; 8717 int fp_reg_params_count = IR_REG_FP_ARGS; 8718 const int8_t *int_reg_params = _ir_int_reg_params; 8719 const int8_t *fp_reg_params = _ir_fp_reg_params; 8720 int32_t stack_offset = 0; 8721 8722#ifdef IR_TARGET_X86 8723 if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC)) { 8724 int_reg_params_count = IR_REG_INT_FCARGS; 8725 fp_reg_params_count = IR_REG_FP_FCARGS; 8726 int_reg_params = _ir_int_fc_reg_params; 8727 fp_reg_params = _ir_fp_fc_reg_params; 8728 } 8729#endif 8730 8731 if (ctx->flags & IR_USE_FRAME_POINTER) { 8732 stack_offset = sizeof(void*) * 2; /* skip old frame pointer and return address */ 8733 } else { 8734 stack_offset = sizeof(void*) + ctx->stack_frame_size + ctx->call_stack_size; /* skip return address */ 8735 } 8736 n = use_list->count; 8737 for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { 8738 use = *p; 8739 insn = &ctx->ir_base[use]; 8740 if (insn->op == IR_PARAM) { 8741 if (IR_IS_TYPE_INT(insn->type)) { 8742 if (int_param_num < int_reg_params_count) { 8743 src_reg = int_reg_params[int_param_num]; 8744 } else { 8745 src_reg = IR_REG_NONE; 8746 } 8747 int_param_num++; 8748#ifdef _WIN64 8749 /* WIN64 calling convention use common couter for int and fp registers */ 8750 fp_param_num++; 8751#endif 8752 } else { 8753 if (fp_param_num < fp_reg_params_count) { 8754 src_reg = fp_reg_params[fp_param_num]; 8755 } else { 8756 src_reg = IR_REG_NONE; 8757 } 8758 fp_param_num++; 8759#ifdef _WIN64 8760 /* WIN64 calling convention use common couter for int and fp registers */ 8761 int_param_num++; 8762#endif 8763 } 8764 if (ctx->vregs[use]) { 8765 dst_reg = IR_REG_NUM(ctx->regs[use][0]); 8766 IR_ASSERT(src_reg != IR_REG_NONE || dst_reg != IR_REG_NONE || 8767 stack_offset == ctx->live_intervals[ctx->vregs[use]]->stack_spill_pos + 8768 ((ctx->flags & IR_USE_FRAME_POINTER) ? -ctx->stack_frame_size : ctx->call_stack_size)); 8769 if (src_reg != dst_reg) { 8770 ir_emit_param_move(ctx, insn->type, src_reg, dst_reg, use, stack_offset); 8771 } 8772 if (dst_reg != IR_REG_NONE && IR_REG_SPILLED(ctx->regs[use][0])) { 8773 ir_emit_store(ctx, insn->type, use, dst_reg); 8774 } 8775 } 8776 if (src_reg == IR_REG_NONE) { 8777 if (sizeof(void*) == 8) { 8778 stack_offset += sizeof(void*); 8779 } else { 8780 stack_offset += IR_MAX(sizeof(void*), ir_type_size[insn->type]); 8781 } 8782 } 8783 } 8784 } 8785} 8786 8787static ir_reg ir_get_free_reg(ir_type type, ir_regset available) 8788{ 8789 if (IR_IS_TYPE_INT(type)) { 8790 available = IR_REGSET_INTERSECTION(available, IR_REGSET_GP); 8791 } else { 8792 IR_ASSERT(IR_IS_TYPE_FP(type)); 8793 available = IR_REGSET_INTERSECTION(available, IR_REGSET_FP); 8794 } 8795 IR_ASSERT(!IR_REGSET_IS_EMPTY(available)); 8796 return IR_REGSET_FIRST(available); 8797} 8798 8799static int ir_fix_dessa_tmps(ir_ctx *ctx, uint8_t type, ir_ref from, ir_ref to) 8800{ 8801 ir_backend_data *data = ctx->data; 8802 ir_ref ref = ctx->cfg_blocks[data->dessa_from_block].end; 8803 8804 if (to == 0) { 8805 if (IR_IS_TYPE_INT(type)) { 8806 if (ctx->regs[ref][0] == IR_REG_NONE) { 8807 ctx->regs[ref][0] = IR_REG_RAX; 8808 } 8809 } else { 8810 IR_ASSERT(IR_IS_TYPE_FP(type)); 8811 if (ctx->regs[ref][1] == IR_REG_NONE) { 8812 ctx->regs[ref][1] = IR_REG_XMM0; 8813 } 8814 } 8815 } else if (from != 0) { 8816 if (IR_IS_TYPE_INT(type)) { 8817 if (ctx->regs[ref][0] == IR_REG_NONE) { 8818 ctx->regs[ref][0] = IR_REG_RAX; 8819 } 8820 } else { 8821 IR_ASSERT(IR_IS_TYPE_FP(type)); 8822 if (ctx->regs[ref][1] == IR_REG_NONE) { 8823 ctx->regs[ref][1] = IR_REG_XMM0; 8824 } 8825 } 8826 } 8827 return 1; 8828} 8829 8830static void ir_fix_param_spills(ir_ctx *ctx) 8831{ 8832 ir_use_list *use_list = &ctx->use_lists[1]; 8833 ir_insn *insn; 8834 ir_ref i, n, *p, use; 8835 int int_param_num = 0; 8836 int fp_param_num = 0; 8837 ir_reg src_reg; 8838 // TODO: Calling convention specific 8839 int int_reg_params_count = IR_REG_INT_ARGS; 8840 int fp_reg_params_count = IR_REG_FP_ARGS; 8841 const int8_t *int_reg_params = _ir_int_reg_params; 8842 const int8_t *fp_reg_params = _ir_fp_reg_params; 8843 int32_t stack_start = 0; 8844 int32_t stack_offset = 0; 8845 8846#ifdef IR_TARGET_X86 8847 if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC)) { 8848 int_reg_params_count = IR_REG_INT_FCARGS; 8849 fp_reg_params_count = IR_REG_FP_FCARGS; 8850 int_reg_params = _ir_int_fc_reg_params; 8851 fp_reg_params = _ir_fp_fc_reg_params; 8852 } 8853#endif 8854 8855 if (ctx->flags & IR_USE_FRAME_POINTER) { 8856 /* skip old frame pointer and return address */ 8857 stack_start = sizeof(void*) * 2 + (ctx->stack_frame_size - ctx->stack_frame_alignment); 8858 } else { 8859 /* skip return address */ 8860 stack_start = sizeof(void*) + ctx->stack_frame_size; 8861 } 8862 n = use_list->count; 8863 for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { 8864 use = *p; 8865 insn = &ctx->ir_base[use]; 8866 if (insn->op == IR_PARAM) { 8867 if (IR_IS_TYPE_INT(insn->type)) { 8868 if (int_param_num < int_reg_params_count) { 8869 src_reg = int_reg_params[int_param_num]; 8870 } else { 8871 src_reg = IR_REG_NONE; 8872 } 8873 int_param_num++; 8874#ifdef _WIN64 8875 /* WIN64 calling convention use common couter for int and fp registers */ 8876 fp_param_num++; 8877#endif 8878 } else { 8879 if (fp_param_num < fp_reg_params_count) { 8880 src_reg = fp_reg_params[fp_param_num]; 8881 } else { 8882 src_reg = IR_REG_NONE; 8883 } 8884 fp_param_num++; 8885#ifdef _WIN64 8886 /* WIN64 calling convention use common couter for int and fp registers */ 8887 int_param_num++; 8888#endif 8889 } 8890 if (src_reg == IR_REG_NONE) { 8891 if (ctx->vregs[use]) { 8892 ir_live_interval *ival = ctx->live_intervals[ctx->vregs[use]]; 8893 if ((ival->flags & IR_LIVE_INTERVAL_MEM_PARAM) 8894 && ival->stack_spill_pos == -1 8895 && (ival->next || ival->reg == IR_REG_NONE)) { 8896 ival->stack_spill_pos = stack_start + stack_offset; 8897 } 8898 } 8899 if (sizeof(void*) == 8) { 8900 stack_offset += sizeof(void*); 8901 } else { 8902 stack_offset += IR_MAX(sizeof(void*), ir_type_size[insn->type]); 8903 } 8904 } 8905 } 8906 } 8907 8908#ifdef _WIN64 8909 /* WIN64 uses shsow area for registers */ 8910 stack_offset += IR_MIN(int_param_num, int_reg_params_count) * sizeof(void*); 8911#endif 8912 ctx->gp_reg_params = IR_MIN(int_param_num, int_reg_params_count); 8913 ctx->fp_reg_params = IR_MIN(fp_param_num, fp_reg_params_count); 8914 ctx->param_stack_size = stack_offset; 8915} 8916 8917static void ir_allocate_unique_spill_slots(ir_ctx *ctx) 8918{ 8919 uint32_t b; 8920 ir_block *bb; 8921 ir_insn *insn; 8922 ir_ref i, n, j, *p; 8923 uint32_t *rule, insn_flags; 8924 ir_backend_data *data = ctx->data; 8925 ir_regset available = 0; 8926 ir_target_constraints constraints; 8927 uint32_t def_flags; 8928 ir_reg reg; 8929 8930#ifndef IR_REG_FP_RET1 8931 if (ctx->flags2 & IR_HAS_FP_RET_SLOT) { 8932 ctx->ret_slot = ir_allocate_spill_slot(ctx, IR_DOUBLE, &data->ra_data); 8933 } else if (ctx->ret_type == IR_FLOAT || ctx->ret_type == IR_DOUBLE) { 8934 ctx->ret_slot = ir_allocate_spill_slot(ctx, ctx->ret_type, &data->ra_data); 8935 } else { 8936 ctx->ret_slot = -1; 8937 } 8938#endif 8939 8940 ctx->regs = ir_mem_malloc(sizeof(ir_regs) * ctx->insns_count); 8941 memset(ctx->regs, IR_REG_NONE, sizeof(ir_regs) * ctx->insns_count); 8942 8943 /* vregs + tmp + fixed + SRATCH + ALL */ 8944 ctx->live_intervals = ir_mem_calloc(ctx->vregs_count + 1 + IR_REG_NUM + 2, sizeof(ir_live_interval*)); 8945 8946 if (!ctx->arena) { 8947 ctx->arena = ir_arena_create(16 * 1024); 8948 } 8949 8950 for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { 8951 IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); 8952 for (i = bb->start, insn = ctx->ir_base + i, rule = ctx->rules + i; i <= bb->end;) { 8953 switch (ctx->rules ? *rule : insn->op) { 8954 case IR_START: 8955 case IR_BEGIN: 8956 case IR_END: 8957 case IR_IF_TRUE: 8958 case IR_IF_FALSE: 8959 case IR_CASE_VAL: 8960 case IR_CASE_DEFAULT: 8961 case IR_MERGE: 8962 case IR_LOOP_BEGIN: 8963 case IR_LOOP_END: 8964 break; 8965#ifndef IR_REG_FP_RET1 8966 case IR_CALL: 8967 if (ctx->ret_slot == -1 && (insn->type == IR_FLOAT || insn->type == IR_DOUBLE)) { 8968 ctx->ret_slot = ir_allocate_spill_slot(ctx, IR_DOUBLE, &data->ra_data); 8969 } 8970#endif 8971 IR_FALLTHROUGH; 8972 default: 8973 def_flags = ir_get_target_constraints(ctx, i, &constraints); 8974 if (ctx->rules 8975 && *rule != IR_CMP_AND_BRANCH_INT 8976 && *rule != IR_CMP_AND_BRANCH_FP 8977 && *rule != IR_TEST_AND_BRANCH_INT 8978 && *rule != IR_GUARD_CMP_INT 8979 && *rule != IR_GUARD_CMP_FP) { 8980 available = IR_REGSET_SCRATCH; 8981 } 8982 if (ctx->vregs[i]) { 8983 reg = constraints.def_reg; 8984 if (reg != IR_REG_NONE && IR_REGSET_IN(available, reg)) { 8985 IR_REGSET_EXCL(available, reg); 8986 ctx->regs[i][0] = reg | IR_REG_SPILL_STORE; 8987 } else if (def_flags & IR_USE_MUST_BE_IN_REG) { 8988 if (insn->op == IR_VLOAD 8989 && ctx->live_intervals[ctx->vregs[i]] 8990 && ctx->live_intervals[ctx->vregs[i]]->stack_spill_pos != -1) { 8991 /* pass */ 8992 } else if (insn->op != IR_PARAM) { 8993 reg = ir_get_free_reg(insn->type, available); 8994 IR_REGSET_EXCL(available, reg); 8995 ctx->regs[i][0] = reg | IR_REG_SPILL_STORE; 8996 } 8997 } 8998 if (!ctx->live_intervals[ctx->vregs[i]]) { 8999 ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); 9000 memset(ival, 0, sizeof(ir_live_interval)); 9001 ctx->live_intervals[ctx->vregs[i]] = ival; 9002 ival->type = insn->type; 9003 ival->reg = IR_REG_NONE; 9004 ival->vreg = ctx->vregs[i]; 9005 ival->stack_spill_pos = -1; 9006 if (insn->op == IR_PARAM && reg == IR_REG_NONE) { 9007 ival->flags |= IR_LIVE_INTERVAL_MEM_PARAM; 9008 } else { 9009 ival->stack_spill_pos = ir_allocate_spill_slot(ctx, ival->type, &data->ra_data); 9010 } 9011 } else if (insn->op == IR_PARAM) { 9012 IR_ASSERT(0 && "unexpected PARAM"); 9013 return; 9014 } 9015 } else if (insn->op == IR_VAR) { 9016 ir_use_list *use_list = &ctx->use_lists[i]; 9017 ir_ref n = use_list->count; 9018 9019 if (n > 0) { 9020 int32_t stack_spill_pos = insn->op3 = ir_allocate_spill_slot(ctx, insn->type, &data->ra_data); 9021 ir_ref i, *p, use; 9022 ir_insn *use_insn; 9023 9024 for (i = 0, p = &ctx->use_edges[use_list->refs]; i < n; i++, p++) { 9025 use = *p; 9026 use_insn = &ctx->ir_base[use]; 9027 if (use_insn->op == IR_VLOAD) { 9028 if (ctx->vregs[use] 9029 && !ctx->live_intervals[ctx->vregs[use]]) { 9030 ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); 9031 memset(ival, 0, sizeof(ir_live_interval)); 9032 ctx->live_intervals[ctx->vregs[use]] = ival; 9033 ival->type = insn->type; 9034 ival->reg = IR_REG_NONE; 9035 ival->vreg = ctx->vregs[use]; 9036 ival->stack_spill_pos = stack_spill_pos; 9037 } 9038 } else if (use_insn->op == IR_VSTORE) { 9039 if (!IR_IS_CONST_REF(use_insn->op3) 9040 && ctx->vregs[use_insn->op3] 9041 && !ctx->live_intervals[ctx->vregs[use_insn->op3]]) { 9042 ir_live_interval *ival = ir_arena_alloc(&ctx->arena, sizeof(ir_live_interval)); 9043 memset(ival, 0, sizeof(ir_live_interval)); 9044 ctx->live_intervals[ctx->vregs[use_insn->op3]] = ival; 9045 ival->type = insn->type; 9046 ival->reg = IR_REG_NONE; 9047 ival->vreg = ctx->vregs[use_insn->op3]; 9048 ival->stack_spill_pos = stack_spill_pos; 9049 } 9050 } 9051 } 9052 } 9053 } 9054 9055 insn_flags = ir_op_flags[insn->op]; 9056 n = constraints.tmps_count; 9057 if (n) { 9058 do { 9059 n--; 9060 if (constraints.tmp_regs[n].type) { 9061 ir_reg reg = ir_get_free_reg(constraints.tmp_regs[n].type, available); 9062 IR_REGSET_EXCL(available, reg); 9063 ctx->regs[i][constraints.tmp_regs[n].num] = reg; 9064 } else if (constraints.tmp_regs[n].reg == IR_REG_SCRATCH) { 9065 available = IR_REGSET_DIFFERENCE(available, IR_REGSET_SCRATCH); 9066 } else { 9067 IR_REGSET_EXCL(available, constraints.tmp_regs[n].reg); 9068 } 9069 } while (n); 9070 } 9071 n = insn->inputs_count; 9072 for (j = 1, p = insn->ops + 1; j <= n; j++, p++) { 9073 ir_ref input = *p; 9074 if (IR_OPND_KIND(insn_flags, j) == IR_OPND_DATA && input > 0 && ctx->vregs[input]) { 9075 if ((def_flags & IR_DEF_REUSES_OP1_REG) && j == 1) { 9076 ir_reg reg = IR_REG_NUM(ctx->regs[i][0]); 9077 ctx->regs[i][1] = reg | IR_REG_SPILL_LOAD; 9078 } else { 9079 uint8_t use_flags = IR_USE_FLAGS(def_flags, j); 9080 ir_reg reg = (j < constraints.hints_count) ? constraints.hints[j] : IR_REG_NONE; 9081 9082 if (reg != IR_REG_NONE && IR_REGSET_IN(available, reg)) { 9083 IR_REGSET_EXCL(available, reg); 9084 ctx->regs[i][j] = reg | IR_REG_SPILL_LOAD; 9085 } else if (j > 1 && input == insn->op1 && ctx->regs[i][1] != IR_REG_NONE) { 9086 ctx->regs[i][j] = ctx->regs[i][1]; 9087 } else if (use_flags & IR_USE_MUST_BE_IN_REG) { 9088 reg = ir_get_free_reg(ctx->ir_base[input].type, available); 9089 IR_REGSET_EXCL(available, reg); 9090 ctx->regs[i][j] = reg | IR_REG_SPILL_LOAD; 9091 } 9092 } 9093 } 9094 } 9095 break; 9096 } 9097 n = ir_insn_len(insn); 9098 i += n; 9099 insn += n; 9100 rule += n; 9101 } 9102 if (bb->flags & IR_BB_DESSA_MOVES) { 9103 data->dessa_from_block = b; 9104 ir_gen_dessa_moves(ctx, b, ir_fix_dessa_tmps); 9105 } 9106 } 9107 9108 ctx->used_preserved_regs = ctx->fixed_save_regset; 9109 ctx->flags |= IR_NO_STACK_COMBINE; 9110 ir_fix_stack_frame(ctx); 9111} 9112 9113static void ir_preallocate_call_stack(ir_ctx *ctx) 9114{ 9115 int call_stack_size, peak_call_stack_size = 0; 9116 ir_ref i, n; 9117 ir_insn *insn; 9118 9119 for (i = 1, insn = ctx->ir_base + 1; i < ctx->insns_count;) { 9120 if (insn->op == IR_CALL) { 9121 call_stack_size = ir_call_used_stack(ctx, insn); 9122 if (call_stack_size > peak_call_stack_size 9123#ifdef IR_HAVE_FASTCALL 9124 && !ir_is_fastcall(ctx, insn) /* fast call functions restore stack pointer */ 9125#endif 9126 ) { 9127 peak_call_stack_size = call_stack_size; 9128 } 9129 } 9130 n = ir_insn_len(insn); 9131 i += n; 9132 insn += n; 9133 } 9134 if (peak_call_stack_size) { 9135 ctx->call_stack_size = peak_call_stack_size; 9136 ctx->flags |= IR_PREALLOCATED_STACK; 9137 } 9138} 9139 9140void ir_fix_stack_frame(ir_ctx *ctx) 9141{ 9142 uint32_t additional_size = 0; 9143 9144 ctx->locals_area_size = ctx->stack_frame_size; 9145 9146 if (ctx->used_preserved_regs) { 9147 ir_regset used_preserved_regs = (ir_regset)ctx->used_preserved_regs; 9148 ir_reg reg; 9149 (void) reg; 9150 9151 IR_REGSET_FOREACH(used_preserved_regs, reg) { 9152 additional_size += sizeof(void*); 9153 } IR_REGSET_FOREACH_END(); 9154 } 9155 9156#if defined(IR_TARGET_X64) && !defined(_WIN64) 9157 if ((ctx->flags & IR_VARARG_FUNC) && (ctx->flags2 & IR_HAS_VA_START)) { 9158 if ((ctx->flags2 & (IR_HAS_VA_ARG_GP|IR_HAS_VA_COPY)) && ctx->gp_reg_params < IR_REG_INT_ARGS) { 9159 additional_size += sizeof(void*) * IR_REG_INT_ARGS; 9160 } 9161 if ((ctx->flags2 & (IR_HAS_VA_ARG_FP|IR_HAS_VA_COPY)) && ctx->fp_reg_params < IR_REG_FP_ARGS) { 9162 additional_size += 16 * IR_REG_FP_ARGS; 9163 } 9164 } 9165#endif 9166 9167 ctx->stack_frame_size = IR_ALIGNED_SIZE(ctx->stack_frame_size, sizeof(void*)); 9168 ctx->stack_frame_size += additional_size; 9169 ctx->stack_frame_alignment = 0; 9170 ctx->call_stack_size = 0; 9171 9172 if (ctx->flags2 & IR_HAS_CALLS) { 9173 /* Stack must be 16 byte aligned */ 9174 if (!(ctx->flags & IR_FUNCTION)) { 9175 while (IR_ALIGNED_SIZE(ctx->stack_frame_size, 16) != ctx->stack_frame_size) { 9176 ctx->stack_frame_size += sizeof(void*); 9177 ctx->stack_frame_alignment += sizeof(void*); 9178 } 9179 } else if (ctx->flags & IR_USE_FRAME_POINTER) { 9180 while (IR_ALIGNED_SIZE(ctx->stack_frame_size + sizeof(void*) * 2, 16) != ctx->stack_frame_size + sizeof(void*) * 2) { 9181 ctx->stack_frame_size += sizeof(void*); 9182 ctx->stack_frame_alignment += sizeof(void*); 9183 } 9184 } else { 9185 if (!(ctx->flags & IR_NO_STACK_COMBINE)) { 9186 ir_preallocate_call_stack(ctx); 9187 } 9188 while (IR_ALIGNED_SIZE(ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*), 16) != 9189 ctx->stack_frame_size + ctx->call_stack_size + sizeof(void*)) { 9190 ctx->stack_frame_size += sizeof(void*); 9191 ctx->stack_frame_alignment += sizeof(void*); 9192 } 9193 } 9194 } 9195 9196 ir_fix_param_spills(ctx); 9197} 9198 9199static void* dasm_labels[ir_lb_MAX]; 9200 9201void *ir_emit_code(ir_ctx *ctx, size_t *size_ptr) 9202{ 9203 uint32_t b, n, target; 9204 ir_block *bb; 9205 ir_ref i; 9206 ir_insn *insn; 9207 uint32_t *rule; 9208 ir_backend_data data; 9209 dasm_State **Dst; 9210 int ret; 9211 void *entry; 9212 size_t size; 9213 9214 data.ra_data.unused_slot_4 = 0; 9215 data.ra_data.unused_slot_2 = 0; 9216 data.ra_data.unused_slot_1 = 0; 9217 data.ra_data.handled = NULL; 9218 data.rodata_label = 0; 9219 data.jmp_table_label = 0; 9220 data.double_neg_const = 0; 9221 data.float_neg_const = 0; 9222 data.double_abs_const = 0; 9223 data.float_abs_const = 0; 9224 data.double_zero_const = 0; 9225 ctx->data = &data; 9226 9227 if (!ctx->live_intervals) { 9228 ctx->stack_frame_size = 0; 9229 ctx->stack_frame_alignment = 0; 9230 ctx->call_stack_size = 0; 9231 ctx->used_preserved_regs = 0; 9232 ir_allocate_unique_spill_slots(ctx); 9233 } 9234 9235 if (ctx->fixed_stack_frame_size != -1) { 9236 if (ctx->fixed_stack_red_zone) { 9237 IR_ASSERT(ctx->fixed_stack_red_zone == ctx->fixed_stack_frame_size + ctx->fixed_call_stack_size); 9238 } 9239 if (ctx->stack_frame_size > ctx->fixed_stack_frame_size) { 9240 // TODO: report error to caller 9241#ifdef IR_DEBUG_MESSAGES 9242 fprintf(stderr, "IR Compilation Aborted: ctx->stack_frame_size > ctx->fixed_stack_frame_size at %s:%d\n", 9243 __FILE__, __LINE__); 9244#endif 9245 ctx->data = NULL; 9246 ctx->status = IR_ERROR_FIXED_STACK_FRAME_OVERFLOW; 9247 return NULL; 9248 } 9249 ctx->stack_frame_size = ctx->fixed_stack_frame_size; 9250 ctx->call_stack_size = ctx->fixed_call_stack_size; 9251 ctx->stack_frame_alignment = 0; 9252 } 9253 9254 Dst = &data.dasm_state; 9255 data.dasm_state = NULL; 9256 dasm_init(&data.dasm_state, DASM_MAXSECTION); 9257 dasm_setupglobal(&data.dasm_state, dasm_labels, ir_lb_MAX); 9258 dasm_setup(&data.dasm_state, dasm_actions); 9259 /* labels for each block + for each constant + rodata label + jmp_table label + for each entry */ 9260 dasm_growpc(&data.dasm_state, ctx->cfg_blocks_count + 1 + ctx->consts_count + 1 + 1 + 1 + ctx->entries_count); 9261 data.emit_constants = ir_bitset_malloc(ctx->consts_count); 9262 9263 if ((ctx->flags & IR_GEN_ENDBR) && (ctx->flags & IR_START_BR_TARGET)) { 9264 |.if X64 9265 | endbr64 9266 |.else 9267 | endbr32 9268 |.endif 9269 } 9270 9271 if (!(ctx->flags & IR_SKIP_PROLOGUE)) { 9272 ir_emit_prologue(ctx); 9273 } 9274 if (ctx->flags & IR_FUNCTION) { 9275 ir_emit_load_params(ctx); 9276 } 9277 9278 for (b = 1, bb = ctx->cfg_blocks + b; b <= ctx->cfg_blocks_count; b++, bb++) { 9279 IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE)); 9280 if ((bb->flags & (IR_BB_START|IR_BB_ENTRY|IR_BB_EMPTY)) == IR_BB_EMPTY) { 9281 continue; 9282 } 9283 |=>b: 9284 9285 i = bb->start; 9286 insn = ctx->ir_base + i; 9287 if (bb->flags & IR_BB_ENTRY) { 9288 uint32_t label = ctx->cfg_blocks_count + ctx->consts_count + 4 + insn->op3; 9289 9290 |=>label: 9291 if ((ctx->flags & IR_GEN_ENDBR) && (ctx->flags & IR_ENTRY_BR_TARGET)) { 9292 |.if X64 9293 | endbr64 9294 |.else 9295 | endbr32 9296 |.endif 9297 } 9298 ir_emit_prologue(ctx); 9299 ctx->entries[insn->op3] = i; 9300 } 9301 9302 /* skip first instruction */ 9303 n = ir_insn_len(insn); 9304 i += n; 9305 insn += n; 9306 rule = ctx->rules + i; 9307 9308 while (i <= bb->end) { 9309 if (!((*rule) & (IR_FUSED|IR_SKIPPED))) 9310 switch (*rule) { 9311 case IR_VAR: 9312 case IR_PARAM: 9313 case IR_PI: 9314 case IR_PHI: 9315 case IR_SNAPSHOT: 9316 case IR_VA_END: 9317 break; 9318 case IR_LEA_OB: 9319 case IR_LEA_SI: 9320 case IR_LEA_SIB: 9321 case IR_LEA_IB: 9322 case IR_LEA_OB_I: 9323 case IR_LEA_I_OB: 9324 case IR_LEA_SI_O: 9325 case IR_LEA_SIB_O: 9326 case IR_LEA_IB_O: 9327 case IR_LEA_OB_SI: 9328 case IR_LEA_SI_OB: 9329 case IR_LEA_B_SI: 9330 case IR_LEA_SI_B: 9331 ir_emit_lea(ctx, i, insn->type); 9332 break; 9333 case IR_MUL_PWR2: 9334 case IR_DIV_PWR2: 9335 case IR_MOD_PWR2: 9336 ir_emit_mul_div_mod_pwr2(ctx, i, insn); 9337 break; 9338 case IR_SDIV_PWR2: 9339 ir_emit_sdiv_pwr2(ctx, i, insn); 9340 break; 9341 case IR_SMOD_PWR2: 9342 ir_emit_smod_pwr2(ctx, i, insn); 9343 break; 9344 case IR_SHIFT: 9345 ir_emit_shift(ctx, i, insn); 9346 break; 9347 case IR_SHIFT_CONST: 9348 ir_emit_shift_const(ctx, i, insn); 9349 break; 9350 case IR_BIT_COUNT: 9351 ir_emit_bit_count(ctx, i, insn); 9352 break; 9353 case IR_CTPOP: 9354 ir_emit_ctpop(ctx, i, insn); 9355 break; 9356 case IR_INC: 9357 case IR_DEC: 9358 case IR_OP_INT: 9359 ir_emit_op_int(ctx, i, insn, *rule); 9360 break; 9361 case IR_ABS_INT: 9362 ir_emit_abs_int(ctx, i, insn); 9363 break; 9364 case IR_BOOL_NOT_INT: 9365 ir_emit_bool_not_int(ctx, i, insn); 9366 break; 9367 case IR_OP_FP: 9368 ir_emit_op_fp(ctx, i, insn); 9369 break; 9370 case IR_IMUL3: 9371 ir_emit_imul3(ctx, i, insn); 9372 break; 9373 case IR_BINOP_INT: 9374 ir_emit_binop_int(ctx, i, insn); 9375 break; 9376 case IR_BINOP_SSE2: 9377 ir_emit_binop_sse2(ctx, i, insn); 9378 break; 9379 case IR_BINOP_AVX: 9380 ir_emit_binop_avx(ctx, i, insn); 9381 break; 9382 case IR_MUL_INT: 9383 case IR_DIV_INT: 9384 case IR_MOD_INT: 9385 ir_emit_mul_div_mod(ctx, i, insn); 9386 break; 9387 case IR_CMP_INT: 9388 ir_emit_cmp_int(ctx, i, insn); 9389 break; 9390 case IR_TESTCC_INT: 9391 ir_emit_testcc_int(ctx, i, insn); 9392 break; 9393 case IR_SETCC_INT: 9394 ir_emit_setcc_int(ctx, i, insn); 9395 break; 9396 case IR_CMP_FP: 9397 ir_emit_cmp_fp(ctx, i, insn); 9398 break; 9399 case IR_SEXT: 9400 ir_emit_sext(ctx, i, insn); 9401 break; 9402 case IR_ZEXT: 9403 ir_emit_zext(ctx, i, insn); 9404 break; 9405 case IR_TRUNC: 9406 ir_emit_trunc(ctx, i, insn); 9407 break; 9408 case IR_BITCAST: 9409 case IR_PROTO: 9410 ir_emit_bitcast(ctx, i, insn); 9411 break; 9412 case IR_INT2FP: 9413 ir_emit_int2fp(ctx, i, insn); 9414 break; 9415 case IR_FP2INT: 9416 ir_emit_fp2int(ctx, i, insn); 9417 break; 9418 case IR_FP2FP: 9419 ir_emit_fp2fp(ctx, i, insn); 9420 break; 9421 case IR_COPY_INT: 9422 ir_emit_copy_int(ctx, i, insn); 9423 break; 9424 case IR_COPY_FP: 9425 ir_emit_copy_fp(ctx, i, insn); 9426 break; 9427 case IR_CMP_AND_BRANCH_INT: 9428 ir_emit_cmp_and_branch_int(ctx, b, i, insn); 9429 break; 9430 case IR_CMP_AND_BRANCH_FP: 9431 ir_emit_cmp_and_branch_fp(ctx, b, i, insn); 9432 break; 9433 case IR_TEST_AND_BRANCH_INT: 9434 ir_emit_test_and_branch_int(ctx, b, i, insn); 9435 break; 9436 case IR_JCC_INT: 9437 { 9438 ir_op op = ctx->ir_base[insn->op2].op; 9439 9440 if (op == IR_ADD || 9441 op == IR_SUB || 9442// op == IR_MUL || 9443 op == IR_OR || 9444 op == IR_AND || 9445 op == IR_XOR) { 9446 op = IR_NE; 9447 } else { 9448 IR_ASSERT(op >= IR_EQ && op <= IR_UGT); 9449 } 9450 ir_emit_jcc(ctx, op, b, i, insn, 1); 9451 } 9452 break; 9453 case IR_GUARD_CMP_INT: 9454 if (ir_emit_guard_cmp_int(ctx, b, i, insn)) { 9455 goto next_block; 9456 } 9457 break; 9458 case IR_GUARD_CMP_FP: 9459 if (ir_emit_guard_cmp_fp(ctx, b, i, insn)) { 9460 goto next_block; 9461 } 9462 break; 9463 case IR_GUARD_TEST_INT: 9464 if (ir_emit_guard_test_int(ctx, b, i, insn)) { 9465 goto next_block; 9466 } 9467 break; 9468 case IR_GUARD_JCC_INT: 9469 if (ir_emit_guard_jcc_int(ctx, b, i, insn)) { 9470 goto next_block; 9471 } 9472 break; 9473 case IR_IF_INT: 9474 ir_emit_if_int(ctx, b, i, insn); 9475 break; 9476 case IR_COND: 9477 ir_emit_cond(ctx, i, insn); 9478 break; 9479 case IR_SWITCH: 9480 ir_emit_switch(ctx, b, i, insn); 9481 break; 9482 case IR_MIN_MAX_INT: 9483 ir_emit_min_max_int(ctx, i, insn); 9484 break; 9485 case IR_OVERFLOW: 9486 ir_emit_overflow(ctx, i, insn); 9487 break; 9488 case IR_OVERFLOW_AND_BRANCH: 9489 ir_emit_overflow_and_branch(ctx, b, i, insn); 9490 break; 9491 case IR_END: 9492 case IR_LOOP_END: 9493 if (bb->flags & IR_BB_OSR_ENTRY_LOADS) { 9494 ir_emit_osr_entry_loads(ctx, b, bb); 9495 } 9496 if (bb->flags & IR_BB_DESSA_MOVES) { 9497 ir_emit_dessa_moves(ctx, b, bb); 9498 } 9499 do { 9500 ir_ref succ = ctx->cfg_edges[bb->successors]; 9501 9502 if (UNEXPECTED(bb->successors_count == 2)) { 9503 if (ctx->cfg_blocks[succ].flags & IR_BB_ENTRY) { 9504 succ = ctx->cfg_edges[bb->successors + 1]; 9505 } else { 9506 IR_ASSERT(ctx->cfg_blocks[ctx->cfg_edges[bb->successors + 1]].flags & IR_BB_ENTRY); 9507 } 9508 } else { 9509 IR_ASSERT(bb->successors_count == 1); 9510 } 9511 target = ir_skip_empty_target_blocks(ctx, succ); 9512 if (b == ctx->cfg_blocks_count || target != ir_skip_empty_next_blocks(ctx, b + 1)) { 9513 | jmp =>target 9514 } 9515 } while (0); 9516 break; 9517 case IR_RETURN_VOID: 9518 ir_emit_return_void(ctx); 9519 break; 9520 case IR_RETURN_INT: 9521 ir_emit_return_int(ctx, i, insn); 9522 break; 9523 case IR_RETURN_FP: 9524 ir_emit_return_fp(ctx, i, insn); 9525 break; 9526 case IR_CALL: 9527 ir_emit_call(ctx, i, insn); 9528 break; 9529 case IR_TAILCALL: 9530 ir_emit_tailcall(ctx, i, insn); 9531 break; 9532 case IR_IJMP: 9533 ir_emit_ijmp(ctx, i, insn); 9534 break; 9535 case IR_MEM_OP_INT: 9536 case IR_MEM_INC: 9537 case IR_MEM_DEC: 9538 ir_emit_mem_op_int(ctx, i, insn, *rule); 9539 break; 9540 case IR_MEM_BINOP_INT: 9541 ir_emit_mem_binop_int(ctx, i, insn); 9542 break; 9543 case IR_MEM_MUL_PWR2: 9544 case IR_MEM_DIV_PWR2: 9545 case IR_MEM_MOD_PWR2: 9546 ir_emit_mem_mul_div_mod_pwr2(ctx, i, insn); 9547 break; 9548 case IR_MEM_SHIFT: 9549 ir_emit_mem_shift(ctx, i, insn); 9550 break; 9551 case IR_MEM_SHIFT_CONST: 9552 ir_emit_mem_shift_const(ctx, i, insn); 9553 break; 9554 case IR_REG_BINOP_INT: 9555 ir_emit_reg_binop_int(ctx, i, insn); 9556 break; 9557 case IR_VADDR: 9558 ir_emit_vaddr(ctx, i, insn); 9559 break; 9560 case IR_VLOAD: 9561 ir_emit_vload(ctx, i, insn); 9562 break; 9563 case IR_VSTORE_INT: 9564 ir_emit_vstore_int(ctx, i, insn); 9565 break; 9566 case IR_VSTORE_FP: 9567 ir_emit_vstore_fp(ctx, i, insn); 9568 break; 9569 case IR_RLOAD: 9570 ir_emit_rload(ctx, i, insn); 9571 break; 9572 case IR_RSTORE: 9573 ir_emit_rstore(ctx, i, insn); 9574 break; 9575 case IR_LOAD_INT: 9576 ir_emit_load_int(ctx, i, insn); 9577 break; 9578 case IR_LOAD_FP: 9579 ir_emit_load_fp(ctx, i, insn); 9580 break; 9581 case IR_STORE_INT: 9582 ir_emit_store_int(ctx, i, insn); 9583 break; 9584 case IR_STORE_FP: 9585 ir_emit_store_fp(ctx, i, insn); 9586 break; 9587 case IR_ALLOCA: 9588 ir_emit_alloca(ctx, i, insn); 9589 break; 9590 case IR_VA_START: 9591 ir_emit_va_start(ctx, i, insn); 9592 break; 9593 case IR_VA_COPY: 9594 ir_emit_va_copy(ctx, i, insn); 9595 break; 9596 case IR_VA_ARG: 9597 ir_emit_va_arg(ctx, i, insn); 9598 break; 9599 case IR_AFREE: 9600 ir_emit_afree(ctx, i, insn); 9601 break; 9602 case IR_FRAME_ADDR: 9603 ir_emit_frame_addr(ctx, i); 9604 break; 9605 case IR_EXITCALL: 9606 ir_emit_exitcall(ctx, i, insn); 9607 break; 9608 case IR_GUARD: 9609 case IR_GUARD_NOT: 9610 if (ir_emit_guard(ctx, b, i, insn)) { 9611 goto next_block; 9612 } 9613 break; 9614 case IR_GUARD_OVERFLOW: 9615 if (ir_emit_guard_overflow(ctx, b, i, insn)) { 9616 goto next_block; 9617 } 9618 break; 9619 case IR_TLS: 9620 ir_emit_tls(ctx, i, insn); 9621 break; 9622 case IR_TRAP: 9623 | int3 9624 break; 9625 default: 9626 IR_ASSERT(0 && "NIY rule/instruction"); 9627 ir_mem_free(data.emit_constants); 9628 dasm_free(&data.dasm_state); 9629 ctx->data = NULL; 9630 ctx->status = IR_ERROR_UNSUPPORTED_CODE_RULE; 9631 return NULL; 9632 } 9633 n = ir_insn_len(insn); 9634 i += n; 9635 insn += n; 9636 rule += n; 9637 } 9638next_block:; 9639 } 9640 9641 if (data.rodata_label) { 9642 |.rodata 9643 } 9644 IR_BITSET_FOREACH(data.emit_constants, ir_bitset_len(ctx->consts_count), i) { 9645 insn = &ctx->ir_base[-i]; 9646 if (IR_IS_TYPE_FP(insn->type)) { 9647 int label = ctx->cfg_blocks_count + i; 9648 9649 if (!data.rodata_label) { 9650 data.rodata_label = ctx->cfg_blocks_count + ctx->consts_count + 2; 9651 9652 |.rodata 9653 |=>data.rodata_label: 9654 } 9655 if (insn->type == IR_DOUBLE) { 9656 |.align 8 9657 |=>label: 9658 |.dword insn->val.u32, insn->val.u32_hi 9659 } else { 9660 IR_ASSERT(insn->type == IR_FLOAT); 9661 |.align 4 9662 |=>label: 9663 |.dword insn->val.u32 9664 } 9665 } else if (insn->op == IR_STR) { 9666 int label = ctx->cfg_blocks_count + i; 9667 const char *str = ir_get_str(ctx, insn->val.str); 9668 int i = 0; 9669 9670 if (!data.rodata_label) { 9671 data.rodata_label = ctx->cfg_blocks_count + ctx->consts_count + 2; 9672 9673 |.rodata 9674 |=>data.rodata_label: 9675 } 9676 |.align 8 9677 |=>label: 9678 while (str[i]) { 9679 char c = str[i]; 9680 9681 |.byte c 9682 i++; 9683 } 9684 |.byte 0 9685 9686 } else { 9687 IR_ASSERT(0); 9688 } 9689 } IR_BITSET_FOREACH_END(); 9690 if (data.rodata_label) { 9691 |.code 9692 } 9693 ir_mem_free(data.emit_constants); 9694 9695 if (ctx->status) { 9696 dasm_free(&data.dasm_state); 9697 ctx->data = NULL; 9698 return NULL; 9699 } 9700 9701 ret = dasm_link(&data.dasm_state, size_ptr); 9702 if (ret != DASM_S_OK) { 9703 IR_ASSERT(0); 9704 dasm_free(&data.dasm_state); 9705 ctx->data = NULL; 9706 ctx->status = IR_ERROR_LINK; 9707 return NULL; 9708 } 9709 size = *size_ptr; 9710 9711 if (ctx->code_buffer) { 9712 entry = ctx->code_buffer->pos; 9713 entry = (void*)IR_ALIGNED_SIZE(((size_t)(entry)), 16); 9714 if (size > (size_t)((char*)ctx->code_buffer->end - (char*)entry)) { 9715 ctx->data = NULL; 9716 ctx->status = IR_ERROR_CODE_MEM_OVERFLOW; 9717 return NULL; 9718 } 9719 ctx->code_buffer->pos = (char*)entry + size; 9720 } else { 9721 entry = ir_mem_mmap(size); 9722 if (!entry) { 9723 dasm_free(&data.dasm_state); 9724 ctx->data = NULL; 9725 ctx->status = IR_ERROR_CODE_MEM_OVERFLOW; 9726 return NULL; 9727 } 9728 ir_mem_unprotect(entry, size); 9729 } 9730 9731 ret = dasm_encode(&data.dasm_state, entry); 9732 if (ret != DASM_S_OK) { 9733 IR_ASSERT(0); 9734 dasm_free(&data.dasm_state); 9735 if (ctx->code_buffer) { 9736 if (ctx->code_buffer->pos == (char*)entry + size) { 9737 /* rollback */ 9738 ctx->code_buffer->pos = (char*)entry - size; 9739 } 9740 } else { 9741 ir_mem_unmap(entry, size); 9742 } 9743 ctx->data = NULL; 9744 ctx->status = IR_ERROR_ENCODE; 9745 return NULL; 9746 } 9747 9748 if (data.jmp_table_label) { 9749 uint32_t offset = dasm_getpclabel(&data.dasm_state, data.jmp_table_label); 9750 ctx->jmp_table_offset = offset; 9751 } else { 9752 ctx->jmp_table_offset = 0; 9753 } 9754 if (data.rodata_label) { 9755 uint32_t offset = dasm_getpclabel(&data.dasm_state, data.rodata_label); 9756 ctx->rodata_offset = offset; 9757 } else { 9758 ctx->rodata_offset = 0; 9759 } 9760 9761 if (ctx->entries_count) { 9762 /* For all entries */ 9763 i = ctx->entries_count; 9764 do { 9765 ir_insn *insn = &ctx->ir_base[ctx->entries[--i]]; 9766 uint32_t offset = dasm_getpclabel(&data.dasm_state, ctx->cfg_blocks_count + ctx->consts_count + 4 + insn->op3); 9767 insn->op3 = offset; 9768 } while (i != 0); 9769 } 9770 9771 dasm_free(&data.dasm_state); 9772 9773 ir_mem_flush(entry, size); 9774 9775#if defined(__GNUC__) 9776 if ((ctx->flags & IR_GEN_CACHE_DEMOTE) && (ctx->mflags & IR_X86_CLDEMOTE)) { 9777 uintptr_t start = (uintptr_t)entry; 9778 uintptr_t p = (uintptr_t)start & ~0x3F; 9779 9780 do { 9781 /* _cldemote(p); */ 9782 asm volatile(".byte 0x0f, 0x1c, 0x06" :: "S" (p)); 9783 p += 64; 9784 } while (p < start + size); 9785 } 9786#endif 9787 9788 if (!ctx->code_buffer) { 9789 ir_mem_protect(entry, size); 9790 } 9791 9792 ctx->data = NULL; 9793 return entry; 9794} 9795 9796const void *ir_emit_exitgroup(uint32_t first_exit_point, uint32_t exit_points_per_group, const void *exit_addr, ir_code_buffer *code_buffer, size_t *size_ptr) 9797{ 9798 void *entry; 9799 size_t size; 9800 uint32_t i; 9801 dasm_State **Dst, *dasm_state; 9802 int ret; 9803 9804 IR_ASSERT(code_buffer); 9805 IR_ASSERT(sizeof(void*) == 4 || IR_MAY_USE_32BIT_ADDR(code_buffer, exit_addr)); 9806 9807 Dst = &dasm_state; 9808 dasm_state = NULL; 9809 dasm_init(&dasm_state, DASM_MAXSECTION); 9810 dasm_setupglobal(&dasm_state, dasm_labels, ir_lb_MAX); 9811 dasm_setup(&dasm_state, dasm_actions); 9812 9813 for (i = 0; i < exit_points_per_group - 1; i++) { 9814 | push byte i 9815 | .byte 0xeb, (4*(exit_points_per_group-i)-6) // jmp >1 9816 } 9817 | push byte i 9818 |// 1: 9819 | add aword [r4], first_exit_point 9820 | jmp aword &exit_addr 9821 9822 ret = dasm_link(&dasm_state, &size); 9823 if (ret != DASM_S_OK) { 9824 IR_ASSERT(0); 9825 dasm_free(&dasm_state); 9826 return NULL; 9827 } 9828 9829 entry = code_buffer->pos; 9830 entry = (void*)IR_ALIGNED_SIZE(((size_t)(entry)), 16); 9831 if (size > (size_t)((char*)code_buffer->end - (char*)entry)) { 9832 return NULL; 9833 } 9834 code_buffer->pos = (char*)entry + size; 9835 9836 ret = dasm_encode(&dasm_state, entry); 9837 if (ret != DASM_S_OK) { 9838 IR_ASSERT(0); 9839 dasm_free(&dasm_state); 9840 if (code_buffer->pos == (char*)entry + size) { 9841 /* rollback */ 9842 code_buffer->pos = (char*)entry - size; 9843 } 9844 return NULL; 9845 } 9846 9847 dasm_free(&dasm_state); 9848 9849 ir_mem_flush(entry, size); 9850 9851 *size_ptr = size; 9852 return entry; 9853} 9854 9855bool ir_needs_thunk(ir_code_buffer *code_buffer, void *addr) 9856{ 9857 return sizeof(void*) == 8 && !IR_MAY_USE_32BIT_ADDR(code_buffer, addr); 9858} 9859 9860void *ir_emit_thunk(ir_code_buffer *code_buffer, void *addr, size_t *size_ptr) 9861{ 9862 void *entry; 9863 size_t size; 9864 dasm_State **Dst, *dasm_state; 9865 int ret; 9866 9867 Dst = &dasm_state; 9868 dasm_state = NULL; 9869 dasm_init(&dasm_state, DASM_MAXSECTION); 9870 dasm_setupglobal(&dasm_state, dasm_labels, ir_lb_MAX); 9871 dasm_setup(&dasm_state, dasm_actions); 9872 9873 |.code 9874 |.if X64 9875 | jmp aword [>1] 9876 |1: 9877 | .aword &addr 9878 |.else 9879 | jmp &addr 9880 |.endif 9881 9882 ret = dasm_link(&dasm_state, &size); 9883 if (ret != DASM_S_OK) { 9884 IR_ASSERT(0); 9885 dasm_free(&dasm_state); 9886 return NULL; 9887 } 9888 9889 if (size > (size_t)((char*)code_buffer->end - (char*)code_buffer->pos)) { 9890 dasm_free(&dasm_state); 9891 return NULL; 9892 } 9893 9894 entry = code_buffer->pos; 9895 ret = dasm_encode(&dasm_state, entry); 9896 if (ret != DASM_S_OK) { 9897 dasm_free(&dasm_state); 9898 return NULL; 9899 } 9900 9901 *size_ptr = size; 9902 code_buffer->pos = (char*)code_buffer->pos + size; 9903 9904 dasm_free(&dasm_state); 9905 ir_mem_flush(entry, size); 9906 9907 return entry; 9908} 9909 9910void ir_fix_thunk(void *thunk_entry, void *addr) 9911{ 9912 unsigned char *code = thunk_entry; 9913 void **addr_ptr; 9914 9915 if (sizeof(void*) == 8) { 9916 int32_t *offset_ptr; 9917 9918 IR_ASSERT(code[0] == 0xff && code[1] == 0x25); 9919 offset_ptr = (int32_t*)(code + 2); 9920 addr_ptr = (void**)(code + 6 + *offset_ptr); 9921 *addr_ptr = addr; 9922 } else { 9923 IR_ASSERT(code[0] == 0xe9); 9924 addr_ptr = (void**)(code + 1); 9925 *addr_ptr = (void*)((unsigned char*)addr - (code + 5)); 9926 } 9927} 9928