1------------------------------------------------------------------------------ 2-- DynASM x86/x64 module. 3-- 4-- Copyright (C) 2005-2023 Mike Pall. All rights reserved. 5-- See dynasm.lua for full copyright notice. 6------------------------------------------------------------------------------ 7 8local x64 = x64 9 10-- Module information: 11local _info = { 12 arch = x64 and "x64" or "x86", 13 description = "DynASM x86/x64 module", 14 version = "1.5.0", 15 vernum = 10500, 16 release = "2021-05-02", 17 author = "Mike Pall", 18 license = "MIT", 19} 20 21-- Exported glue functions for the arch-specific module. 22local _M = { _info = _info } 23 24-- Cache library functions. 25local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs 26local assert, unpack, setmetatable = assert, unpack or table.unpack, setmetatable 27local _s = string 28local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char 29local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub 30local concat, sort, remove = table.concat, table.sort, table.remove 31local bit = bit or require("bit") 32local band, bxor, shl, shr = bit.band, bit.bxor, bit.lshift, bit.rshift 33 34-- Inherited tables and callbacks. 35local g_opt, g_arch 36local wline, werror, wfatal, wwarn 37 38-- Action name list. 39-- CHECK: Keep this in sync with the C code! 40local action_names = { 41 -- int arg, 1 buffer pos: 42 "DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB", 43 -- action arg (1 byte), int arg, 1 buffer pos (reg/num): 44 "VREG", "SPACE", 45 -- ptrdiff_t arg, 1 buffer pos (address): !x64 46 "SETLABEL", "REL_A", 47 -- action arg (1 byte) or int arg, 2 buffer pos (link, offset): 48 "REL_LG", "REL_PC", 49 -- action arg (1 byte) or int arg, 1 buffer pos (link): 50 "IMM_LG", "IMM_PC", 51 -- action arg (1 byte) or int arg, 1 buffer pos (offset): 52 "LABEL_LG", "LABEL_PC", 53 -- action arg (1 byte), 1 buffer pos (offset): 54 "ALIGN", 55 -- action args (2 bytes), no buffer pos. 56 "EXTERN", 57 -- action arg (1 byte), no buffer pos. 58 "ESC", 59 -- no action arg, no buffer pos. 60 "MARK", 61 -- action arg (1 byte), no buffer pos, terminal action: 62 "SECTION", 63 -- no args, no buffer pos, terminal action: 64 "STOP" 65} 66 67-- Maximum number of section buffer positions for dasm_put(). 68-- CHECK: Keep this in sync with the C code! 69local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. 70 71-- Action name -> action number (dynamically generated below). 72local map_action = {} 73-- First action number. Everything below does not need to be escaped. 74local actfirst = 256-#action_names 75 76-- Action list buffer and string (only used to remove dupes). 77local actlist = {} 78local actstr = "" 79 80-- Argument list for next dasm_put(). Start with offset 0 into action list. 81local actargs = { 0 } 82 83-- Current number of section buffer positions for dasm_put(). 84local secpos = 1 85 86-- VREG kind encodings, pre-shifted by 5 bits. 87local map_vreg = { 88 ["modrm.rm.m"] = 0x00, 89 ["modrm.rm.r"] = 0x20, 90 ["opcode"] = 0x20, 91 ["sib.base"] = 0x20, 92 ["sib.index"] = 0x40, 93 ["modrm.reg"] = 0x80, 94 ["vex.v"] = 0xa0, 95 ["imm.hi"] = 0xc0, 96} 97 98-- Current number of VREG actions contributing to REX/VEX shrinkage. 99local vreg_shrink_count = 0 100 101------------------------------------------------------------------------------ 102 103-- Compute action numbers for action names. 104for n,name in ipairs(action_names) do 105 local num = actfirst + n - 1 106 map_action[name] = num 107end 108 109-- Dump action names and numbers. 110local function dumpactions(out) 111 out:write("DynASM encoding engine action codes:\n") 112 for n,name in ipairs(action_names) do 113 local num = map_action[name] 114 out:write(format(" %-10s %02X %d\n", name, num, num)) 115 end 116 out:write("\n") 117end 118 119-- Write action list buffer as a huge static C array. 120local function writeactions(out, name) 121 local nn = #actlist 122 local last = actlist[nn] or 255 123 actlist[nn] = nil -- Remove last byte. 124 if nn == 0 then nn = 1 end 125 out:write("static const unsigned char ", name, "[", nn, "] = {\n") 126 local s = " " 127 for n,b in ipairs(actlist) do 128 s = s..b.."," 129 if #s >= 75 then 130 assert(out:write(s, "\n")) 131 s = " " 132 end 133 end 134 out:write(s, last, "\n};\n\n") -- Add last byte back. 135end 136 137------------------------------------------------------------------------------ 138 139-- Add byte to action list. 140local function wputxb(n) 141 assert(n >= 0 and n <= 255 and n % 1 == 0, "byte out of range") 142 actlist[#actlist+1] = n 143end 144 145-- Add action to list with optional arg. Advance buffer pos, too. 146local function waction(action, a, num) 147 wputxb(assert(map_action[action], "bad action name `"..action.."'")) 148 if a then actargs[#actargs+1] = a end 149 if a or num then secpos = secpos + (num or 1) end 150end 151 152-- Optionally add a VREG action. 153local function wvreg(kind, vreg, psz, sk, defer) 154 if not vreg then return end 155 waction("VREG", vreg) 156 local b = assert(map_vreg[kind], "bad vreg kind `"..vreg.."'") 157 if b < (sk or 0) then 158 vreg_shrink_count = vreg_shrink_count + 1 159 end 160 if not defer then 161 b = b + vreg_shrink_count * 8 162 vreg_shrink_count = 0 163 end 164 wputxb(b + (psz or 0)) 165end 166 167-- Add call to embedded DynASM C code. 168local function wcall(func, args) 169 wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true) 170end 171 172-- Delete duplicate action list chunks. A tad slow, but so what. 173local function dedupechunk(offset) 174 local al, as = actlist, actstr 175 local chunk = char(unpack(al, offset+1, #al)) 176 local orig = find(as, chunk, 1, true) 177 if orig then 178 actargs[1] = orig-1 -- Replace with original offset. 179 for i=offset+1,#al do al[i] = nil end -- Kill dupe. 180 else 181 actstr = as..chunk 182 end 183end 184 185-- Flush action list (intervening C code or buffer pos overflow). 186local function wflush(term) 187 local offset = actargs[1] 188 if #actlist == offset then return end -- Nothing to flush. 189 if not term then waction("STOP") end -- Terminate action list. 190 dedupechunk(offset) 191 wcall("put", actargs) -- Add call to dasm_put(). 192 actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). 193 secpos = 1 -- The actionlist offset occupies a buffer position, too. 194end 195 196-- Put escaped byte. 197local function wputb(n) 198 if n >= actfirst then waction("ESC") end -- Need to escape byte. 199 wputxb(n) 200end 201 202------------------------------------------------------------------------------ 203 204-- Global label name -> global label number. With auto assignment on 1st use. 205local next_global = 10 206local map_global = setmetatable({}, { __index = function(t, name) 207 if not match(name, "^[%a_][%w_@]*$") then werror("bad global label") end 208 local n = next_global 209 if n > 246 then werror("too many global labels") end 210 next_global = n + 1 211 t[name] = n 212 return n 213end}) 214 215-- Dump global labels. 216local function dumpglobals(out, lvl) 217 local t = {} 218 for name, n in pairs(map_global) do t[n] = name end 219 out:write("Global labels:\n") 220 for i=10,next_global-1 do 221 out:write(format(" %s\n", t[i])) 222 end 223 out:write("\n") 224end 225 226-- Write global label enum. 227local function writeglobals(out, prefix) 228 local t = {} 229 for name, n in pairs(map_global) do t[n] = name end 230 out:write("enum {\n") 231 for i=10,next_global-1 do 232 out:write(" ", prefix, gsub(t[i], "@.*", ""), ",\n") 233 end 234 out:write(" ", prefix, "_MAX\n};\n") 235end 236 237-- Write global label names. 238local function writeglobalnames(out, name) 239 local t = {} 240 for name, n in pairs(map_global) do t[n] = name end 241 out:write("static const char *const ", name, "[] = {\n") 242 for i=10,next_global-1 do 243 out:write(" \"", t[i], "\",\n") 244 end 245 out:write(" (const char *)0\n};\n") 246end 247 248------------------------------------------------------------------------------ 249 250-- Extern label name -> extern label number. With auto assignment on 1st use. 251local next_extern = -1 252local map_extern = setmetatable({}, { __index = function(t, name) 253 -- No restrictions on the name for now. 254 local n = next_extern 255 if n < -256 then werror("too many extern labels") end 256 next_extern = n - 1 257 t[name] = n 258 return n 259end}) 260 261-- Dump extern labels. 262local function dumpexterns(out, lvl) 263 local t = {} 264 for name, n in pairs(map_extern) do t[-n] = name end 265 out:write("Extern labels:\n") 266 for i=1,-next_extern-1 do 267 out:write(format(" %s\n", t[i])) 268 end 269 out:write("\n") 270end 271 272-- Write extern label names. 273local function writeexternnames(out, name) 274 local t = {} 275 for name, n in pairs(map_extern) do t[-n] = name end 276 out:write("static const char *const ", name, "[] = {\n") 277 for i=1,-next_extern-1 do 278 out:write(" \"", t[i], "\",\n") 279 end 280 out:write(" (const char *)0\n};\n") 281end 282 283------------------------------------------------------------------------------ 284 285-- Arch-specific maps. 286local map_archdef = {} -- Ext. register name -> int. name. 287local map_reg_rev = {} -- Int. register name -> ext. name. 288local map_reg_num = {} -- Int. register name -> register number. 289local map_reg_opsize = {} -- Int. register name -> operand size. 290local map_reg_valid_base = {} -- Int. register name -> valid base register? 291local map_reg_valid_index = {} -- Int. register name -> valid index register? 292local map_reg_needrex = {} -- Int. register name -> need rex vs. no rex. 293local reg_list = {} -- Canonical list of int. register names. 294 295local map_type = {} -- Type name -> { ctype, reg } 296local ctypenum = 0 -- Type number (for _PTx macros). 297 298local addrsize = x64 and "q" or "d" -- Size for address operands. 299 300-- Helper functions to fill register maps. 301local function mkrmap(sz, cl, names) 302 local cname = format("@%s", sz) 303 reg_list[#reg_list+1] = cname 304 map_archdef[cl] = cname 305 map_reg_rev[cname] = cl 306 map_reg_num[cname] = -1 307 map_reg_opsize[cname] = sz 308 if sz == addrsize or sz == "d" then 309 map_reg_valid_base[cname] = true 310 map_reg_valid_index[cname] = true 311 end 312 if names then 313 for n,name in ipairs(names) do 314 local iname = format("@%s%x", sz, n-1) 315 reg_list[#reg_list+1] = iname 316 map_archdef[name] = iname 317 map_reg_rev[iname] = name 318 map_reg_num[iname] = n-1 319 map_reg_opsize[iname] = sz 320 if sz == "b" and n > 4 then map_reg_needrex[iname] = false end 321 if sz == addrsize or sz == "d" then 322 map_reg_valid_base[iname] = true 323 map_reg_valid_index[iname] = true 324 end 325 end 326 end 327 for i=0,(x64 and sz ~= "f") and 15 or 7 do 328 local needrex = sz == "b" and i > 3 329 local iname = format("@%s%x%s", sz, i, needrex and "R" or "") 330 if needrex then map_reg_needrex[iname] = true end 331 local name 332 if sz == "o" or sz == "y" then name = format("%s%d", cl, i) 333 elseif sz == "f" then name = format("st%d", i) 334 else name = format("r%d%s", i, sz == addrsize and "" or sz) end 335 map_archdef[name] = iname 336 if not map_reg_rev[iname] then 337 reg_list[#reg_list+1] = iname 338 map_reg_rev[iname] = name 339 map_reg_num[iname] = i 340 map_reg_opsize[iname] = sz 341 if sz == addrsize or sz == "d" then 342 map_reg_valid_base[iname] = true 343 map_reg_valid_index[iname] = true 344 end 345 end 346 end 347 reg_list[#reg_list+1] = "" 348end 349 350-- Integer registers (qword, dword, word and byte sized). 351if x64 then 352 mkrmap("q", "Rq", {"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi"}) 353end 354mkrmap("d", "Rd", {"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi"}) 355mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"}) 356mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"}) 357map_reg_valid_index[map_archdef.esp] = false 358if x64 then map_reg_valid_index[map_archdef.rsp] = false end 359if x64 then map_reg_needrex[map_archdef.Rb] = true end 360map_archdef["Ra"] = "@"..addrsize 361 362-- FP registers (internally tword sized, but use "f" as operand size). 363mkrmap("f", "Rf") 364 365-- SSE registers (oword sized, but qword and dword accessible). 366mkrmap("o", "xmm") 367 368-- AVX registers (yword sized, but oword, qword and dword accessible). 369mkrmap("y", "ymm") 370 371-- Operand size prefixes to codes. 372local map_opsize = { 373 byte = "b", word = "w", dword = "d", qword = "q", oword = "o", yword = "y", 374 tword = "t", aword = addrsize, 375} 376 377-- Operand size code to number. 378local map_opsizenum = { 379 b = 1, w = 2, d = 4, q = 8, o = 16, y = 32, t = 10, 380} 381 382-- Operand size code to name. 383local map_opsizename = { 384 b = "byte", w = "word", d = "dword", q = "qword", o = "oword", y = "yword", 385 t = "tword", f = "fpword", 386} 387 388-- Valid index register scale factors. 389local map_xsc = { 390 ["1"] = 0, ["2"] = 1, ["4"] = 2, ["8"] = 3, 391} 392 393-- Condition codes. 394local map_cc = { 395 o = 0, no = 1, b = 2, nb = 3, e = 4, ne = 5, be = 6, nbe = 7, 396 s = 8, ns = 9, p = 10, np = 11, l = 12, nl = 13, le = 14, nle = 15, 397 c = 2, nae = 2, nc = 3, ae = 3, z = 4, nz = 5, na = 6, a = 7, 398 pe = 10, po = 11, nge = 12, ge = 13, ng = 14, g = 15, 399} 400 401 402-- Reverse defines for registers. 403function _M.revdef(s) 404 return gsub(s, "@%w+", map_reg_rev) 405end 406 407-- Dump register names and numbers 408local function dumpregs(out) 409 out:write("Register names, sizes and internal numbers:\n") 410 for _,reg in ipairs(reg_list) do 411 if reg == "" then 412 out:write("\n") 413 else 414 local name = map_reg_rev[reg] 415 local num = map_reg_num[reg] 416 local opsize = map_opsizename[map_reg_opsize[reg]] 417 out:write(format(" %-5s %-8s %s\n", name, opsize, 418 num < 0 and "(variable)" or num)) 419 end 420 end 421end 422 423------------------------------------------------------------------------------ 424 425-- Put action for label arg (IMM_LG, IMM_PC, REL_LG, REL_PC). 426local function wputlabel(aprefix, imm, num) 427 if type(imm) == "number" then 428 if imm < 0 then 429 waction("EXTERN") 430 wputxb(aprefix == "IMM_" and 0 or 1) 431 imm = -imm-1 432 else 433 waction(aprefix.."LG", nil, num); 434 end 435 wputxb(imm) 436 else 437 waction(aprefix.."PC", imm, num) 438 end 439end 440 441-- Put signed byte or arg. 442local function wputsbarg(n) 443 if type(n) == "number" then 444 if n < -128 or n > 127 then 445 werror("signed immediate byte out of range") 446 end 447 if n < 0 then n = n + 256 end 448 wputb(n) 449 else waction("IMM_S", n) end 450end 451 452-- Put unsigned byte or arg. 453local function wputbarg(n) 454 if type(n) == "number" then 455 if n < 0 or n > 255 then 456 werror("unsigned immediate byte out of range") 457 end 458 wputb(n) 459 else waction("IMM_B", n) end 460end 461 462-- Put unsigned word or arg. 463local function wputwarg(n) 464 if type(n) == "number" then 465 if shr(n, 16) ~= 0 then 466 werror("unsigned immediate word out of range") 467 end 468 wputb(band(n, 255)); wputb(shr(n, 8)); 469 else waction("IMM_W", n) end 470end 471 472-- Put signed or unsigned dword or arg. 473local function wputdarg(n) 474 local tn = type(n) 475 if tn == "number" then 476 wputb(band(n, 255)) 477 wputb(band(shr(n, 8), 255)) 478 wputb(band(shr(n, 16), 255)) 479 wputb(shr(n, 24)) 480 elseif tn == "table" then 481 wputlabel("IMM_", n[1], 1) 482 else 483 waction("IMM_D", n) 484 end 485end 486 487-- Put signed or unsigned qword or arg. 488local function wputqarg(n) 489 local tn = type(n) 490 if tn == "number" then -- This is only used for numbers from -2^31..2^32-1. 491 wputb(band(n, 255)) 492 wputb(band(shr(n, 8), 255)) 493 wputb(band(shr(n, 16), 255)) 494 wputb(shr(n, 24)) 495 local sign = n < 0 and 255 or 0 496 wputb(sign); wputb(sign); wputb(sign); wputb(sign) 497 else 498 waction("IMM_D", format("(unsigned int)(%s)", n)) 499 waction("IMM_D", format("(unsigned int)((unsigned long long)(%s)>>32)", n)) 500 end 501end 502 503-- Put operand-size dependent number or arg (defaults to dword). 504local function wputszarg(sz, n) 505 if not sz or sz == "d" or sz == "q" then wputdarg(n) 506 elseif sz == "w" then wputwarg(n) 507 elseif sz == "b" then wputbarg(n) 508 elseif sz == "s" then wputsbarg(n) 509 else werror("bad operand size") end 510end 511 512-- Put multi-byte opcode with operand-size dependent modifications. 513local function wputop(sz, op, rex, vex, vregr, vregxb) 514 local psz, sk = 0, nil 515 if vex then 516 local tail 517 if vex.m == 1 and band(rex, 11) == 0 then 518 if x64 and vregxb then 519 sk = map_vreg["modrm.reg"] 520 else 521 wputb(0xc5) 522 tail = shl(bxor(band(rex, 4), 4), 5) 523 psz = 3 524 end 525 end 526 if not tail then 527 wputb(0xc4) 528 wputb(shl(bxor(band(rex, 7), 7), 5) + vex.m) 529 tail = shl(band(rex, 8), 4) 530 psz = 4 531 end 532 local reg, vreg = 0, nil 533 if vex.v then 534 reg = vex.v.reg 535 if not reg then werror("bad vex operand") end 536 if reg < 0 then reg = 0; vreg = vex.v.vreg end 537 end 538 if sz == "y" or vex.l then tail = tail + 4 end 539 wputb(tail + shl(bxor(reg, 15), 3) + vex.p) 540 wvreg("vex.v", vreg) 541 rex = 0 542 if op >= 256 then werror("bad vex opcode") end 543 else 544 if rex ~= 0 then 545 if not x64 then werror("bad operand size") end 546 elseif (vregr or vregxb) and x64 then 547 rex = 0x10 548 sk = map_vreg["vex.v"] 549 end 550 end 551 local r 552 if sz == "w" then wputb(102) end 553 -- Needs >32 bit numbers, but only for crc32 eax, word [ebx] 554 if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end 555 if op >= 16777216 then wputb(shr(op, 24)); op = band(op, 0xffffff) end 556 if op >= 65536 then 557 if rex ~= 0 then 558 local opc3 = band(op, 0xffff00) 559 if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then 560 wputb(64 + band(rex, 15)); rex = 0; psz = 2 561 end 562 end 563 wputb(shr(op, 16)); op = band(op, 0xffff); psz = psz + 1 564 end 565 if op >= 256 then 566 local b = shr(op, 8) 567 if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0; psz = 2 end 568 wputb(b); op = band(op, 255); psz = psz + 1 569 end 570 if rex ~= 0 then wputb(64 + band(rex, 15)); psz = 2 end 571 if sz == "b" then op = op - 1 end 572 wputb(op) 573 return psz, sk 574end 575 576-- Put ModRM or SIB formatted byte. 577local function wputmodrm(m, s, rm, vs, vrm) 578 assert(m < 4 and s < 16 and rm < 16, "bad modrm operands") 579 wputb(shl(m, 6) + shl(band(s, 7), 3) + band(rm, 7)) 580end 581 582-- Put ModRM/SIB plus optional displacement. 583local function wputmrmsib(t, imark, s, vsreg, psz, sk) 584 local vreg, vxreg 585 local reg, xreg = t.reg, t.xreg 586 if reg and reg < 0 then reg = 0; vreg = t.vreg end 587 if xreg and xreg < 0 then xreg = 0; vxreg = t.vxreg end 588 if s < 0 then s = 0 end 589 590 -- Register mode. 591 if sub(t.mode, 1, 1) == "r" then 592 wputmodrm(3, s, reg) 593 wvreg("modrm.reg", vsreg, psz+1, sk, vreg) 594 wvreg("modrm.rm.r", vreg, psz+1, sk) 595 return 596 end 597 598 local disp = t.disp 599 local tdisp = type(disp) 600 -- No base register? 601 if not reg then 602 local riprel = false 603 if xreg then 604 -- Indexed mode with index register only. 605 -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp) 606 wputmodrm(0, s, 4) 607 if imark == "I" then waction("MARK") end 608 wvreg("modrm.reg", vsreg, psz+1, sk, vxreg) 609 wputmodrm(t.xsc, xreg, 5) 610 wvreg("sib.index", vxreg, psz+2, sk) 611 else 612 -- Pure 32 bit displacement. 613 if x64 and tdisp ~= "table" then 614 wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp) 615 wvreg("modrm.reg", vsreg, psz+1, sk) 616 if imark == "I" then waction("MARK") end 617 wputmodrm(0, 4, 5) 618 else 619 riprel = x64 620 wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp) 621 wvreg("modrm.reg", vsreg, psz+1, sk) 622 if imark == "I" then waction("MARK") end 623 end 624 end 625 if riprel then -- Emit rip-relative displacement. 626 if match("UWSiI", imark) then 627 werror("NYI: rip-relative displacement followed by immediate") 628 end 629 -- The previous byte in the action buffer cannot be 0xe9 or 0x80-0x8f. 630 if disp[2] == "iPJ" then 631 waction("REL_A", disp[1]) 632 else 633 wputlabel("REL_", disp[1], 2) 634 end 635 else 636 wputdarg(disp) 637 end 638 return 639 end 640 641 local m 642 if tdisp == "number" then -- Check displacement size at assembly time. 643 if disp == 0 and band(reg, 7) ~= 5 then -- [ebp] -> [ebp+0] (in SIB, too) 644 if not vreg then m = 0 end -- Force DISP to allow [Rd(5)] -> [ebp+0] 645 elseif disp >= -128 and disp <= 127 then m = 1 646 else m = 2 end 647 elseif tdisp == "table" then 648 m = 2 649 end 650 651 -- Index register present or esp as base register: need SIB encoding. 652 if xreg or band(reg, 7) == 4 then 653 wputmodrm(m or 2, s, 4) -- ModRM. 654 if m == nil or imark == "I" then waction("MARK") end 655 wvreg("modrm.reg", vsreg, psz+1, sk, vxreg or vreg) 656 wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB. 657 wvreg("sib.index", vxreg, psz+2, sk, vreg) 658 wvreg("sib.base", vreg, psz+2, sk) 659 else 660 wputmodrm(m or 2, s, reg) -- ModRM. 661 if (imark == "I" and (m == 1 or m == 2)) or 662 (m == nil and (vsreg or vreg)) then waction("MARK") end 663 wvreg("modrm.reg", vsreg, psz+1, sk, vreg) 664 wvreg("modrm.rm.m", vreg, psz+1, sk) 665 end 666 667 -- Put displacement. 668 if m == 1 then wputsbarg(disp) 669 elseif m == 2 then wputdarg(disp) 670 elseif m == nil then waction("DISP", disp) end 671end 672 673------------------------------------------------------------------------------ 674 675-- Return human-readable operand mode string. 676local function opmodestr(op, args) 677 local m = {} 678 for i=1,#args do 679 local a = args[i] 680 m[#m+1] = sub(a.mode, 1, 1)..(a.opsize or "?") 681 end 682 return op.." "..concat(m, ",") 683end 684 685-- Convert number to valid integer or nil. 686local function toint(expr, isqword) 687 local n = tonumber(expr) 688 if n then 689 if n % 1 ~= 0 then 690 werror("not an integer number `"..expr.."'") 691 elseif isqword then 692 if n < -2147483648 or n > 2147483647 then 693 n = nil -- Handle it as an expression to avoid precision loss. 694 end 695 elseif n < -2147483648 or n > 4294967295 then 696 werror("bad integer number `"..expr.."'") 697 end 698 return n 699 end 700end 701 702-- Parse immediate expression. 703local function immexpr(expr) 704 -- &expr (pointer) 705 if sub(expr, 1, 1) == "&" then 706 return "iPJ", format("(ptrdiff_t)(%s)", sub(expr,2)) 707 end 708 709 local prefix = sub(expr, 1, 2) 710 -- =>expr (pc label reference) 711 if prefix == "=>" then 712 return "iJ", sub(expr, 3) 713 end 714 -- ->name (global label reference) 715 if prefix == "->" then 716 return "iJ", map_global[sub(expr, 3)] 717 end 718 719 -- [<>][1-9] (local label reference) 720 local dir, lnum = match(expr, "^([<>])([1-9])$") 721 if dir then -- Fwd: 247-255, Bkwd: 1-9. 722 return "iJ", lnum + (dir == ">" and 246 or 0) 723 end 724 725 local extname = match(expr, "^extern%s+(%S+)$") 726 if extname then 727 return "iJ", map_extern[extname] 728 end 729 730 -- expr (interpreted as immediate) 731 return "iI", expr 732end 733 734-- Parse displacement expression: +-num, +-expr, +-opsize*num 735local function dispexpr(expr) 736 local disp = expr == "" and 0 or toint(expr) 737 if disp then return disp end 738 local c, dispt = match(expr, "^([+-])%s*(.+)$") 739 if c == "+" then 740 expr = dispt 741 elseif not c then 742 werror("bad displacement expression `"..expr.."'") 743 end 744 local opsize, tailops = match(dispt, "^(%w+)%s*%*%s*(.+)$") 745 local ops, imm = map_opsize[opsize], toint(tailops) 746 if ops and imm then 747 if c == "-" then imm = -imm end 748 return imm*map_opsizenum[ops] 749 end 750 local mode, iexpr = immexpr(dispt) 751 if mode == "iJ" or mode == "iPJ" then 752 if c == "-" then werror("cannot invert label reference") end 753 return { iexpr, mode } 754 end 755 return expr -- Need to return original signed expression. 756end 757 758-- Parse register or type expression. 759local function rtexpr(expr) 760 if not expr then return end 761 local tname, ovreg = match(expr, "^([%w_]+):(@[%w_]+)$") 762 local tp = map_type[tname or expr] 763 if tp then 764 local reg = ovreg or tp.reg 765 local rnum = map_reg_num[reg] 766 if not rnum then 767 werror("type `"..(tname or expr).."' needs a register override") 768 end 769 if not map_reg_valid_base[reg] then 770 werror("bad base register override `"..(map_reg_rev[reg] or reg).."'") 771 end 772 return reg, rnum, tp 773 end 774 return expr, map_reg_num[expr] 775end 776 777-- Parse operand and return { mode, opsize, reg, xreg, xsc, disp, imm }. 778local function parseoperand(param, isqword) 779 local t = {} 780 781 local expr = param 782 local opsize, tailops = match(param, "^(%w+)%s*(.+)$") 783 if opsize then 784 t.opsize = map_opsize[opsize] 785 if t.opsize then expr = tailops end 786 end 787 788 local br = match(expr, "^%[%s*(.-)%s*%]$") 789 repeat 790 if br then 791 t.mode = "xm" 792 793 -- [disp] 794 t.disp = toint(br) 795 if t.disp then 796 t.mode = x64 and "xm" or "xmO" 797 break 798 end 799 800 -- [reg...] 801 local tp 802 local reg, tailr = match(br, "^([@%w_:]+)%s*(.*)$") 803 reg, t.reg, tp = rtexpr(reg) 804 if not t.reg then 805 -- [expr] 806 t.mode = x64 and "xm" or "xmO" 807 t.disp = dispexpr("+"..br) 808 break 809 end 810 811 if t.reg == -1 then 812 t.vreg, tailr = match(tailr, "^(%b())(.*)$") 813 if not t.vreg then werror("bad variable register expression") end 814 end 815 816 -- [xreg*xsc] or [xreg*xsc+-disp] or [xreg*xsc+-expr] 817 local xsc, tailsc = match(tailr, "^%*%s*([1248])%s*(.*)$") 818 if xsc then 819 if not map_reg_valid_index[reg] then 820 werror("bad index register `"..map_reg_rev[reg].."'") 821 end 822 t.xsc = map_xsc[xsc] 823 t.xreg = t.reg 824 t.vxreg = t.vreg 825 t.reg = nil 826 t.vreg = nil 827 t.disp = dispexpr(tailsc) 828 break 829 end 830 if not map_reg_valid_base[reg] then 831 werror("bad base register `"..map_reg_rev[reg].."'") 832 end 833 834 -- [reg] or [reg+-disp] 835 t.disp = toint(tailr) or (tailr == "" and 0) 836 if t.disp then break end 837 838 -- [reg+xreg...] 839 local xreg, tailx = match(tailr, "^%+%s*([@%w_:]+)%s*(.*)$") 840 xreg, t.xreg, tp = rtexpr(xreg) 841 if not t.xreg then 842 -- [reg+-expr] 843 t.disp = dispexpr(tailr) 844 break 845 end 846 if not map_reg_valid_index[xreg] then 847 werror("bad index register `"..map_reg_rev[xreg].."'") 848 end 849 850 if t.xreg == -1 then 851 t.vxreg, tailx = match(tailx, "^(%b())(.*)$") 852 if not t.vxreg then werror("bad variable register expression") end 853 end 854 855 -- [reg+xreg*xsc...] 856 local xsc, tailsc = match(tailx, "^%*%s*([1248])%s*(.*)$") 857 if xsc then 858 t.xsc = map_xsc[xsc] 859 tailx = tailsc 860 end 861 862 -- [...] or [...+-disp] or [...+-expr] 863 t.disp = dispexpr(tailx) 864 else 865 -- imm or opsize*imm 866 local imm = toint(expr, isqword) 867 if not imm and sub(expr, 1, 1) == "*" and t.opsize then 868 imm = toint(sub(expr, 2)) 869 if imm then 870 imm = imm * map_opsizenum[t.opsize] 871 t.opsize = nil 872 end 873 end 874 if imm then 875 if t.opsize then werror("bad operand size override") end 876 local m = "i" 877 if imm == 1 then m = m.."1" end 878 if imm >= 4294967168 and imm <= 4294967295 then imm = imm-4294967296 end 879 if imm >= -128 and imm <= 127 then m = m.."S" end 880 t.imm = imm 881 t.mode = m 882 break 883 end 884 885 local tp 886 local reg, tailr = match(expr, "^([@%w_:]+)%s*(.*)$") 887 reg, t.reg, tp = rtexpr(reg) 888 if t.reg then 889 if t.reg == -1 then 890 t.vreg, tailr = match(tailr, "^(%b())(.*)$") 891 if not t.vreg then werror("bad variable register expression") end 892 end 893 -- reg 894 if tailr == "" then 895 if t.opsize then werror("bad operand size override") end 896 t.opsize = map_reg_opsize[reg] 897 if t.opsize == "f" then 898 t.mode = t.reg == 0 and "fF" or "f" 899 else 900 if reg == "@w4" or (x64 and reg == "@d4") then 901 wwarn("bad idea, try again with `"..(x64 and "rsp'" or "esp'")) 902 end 903 t.mode = t.reg == 0 and "rmR" or (reg == "@b1" and "rmC" or "rm") 904 end 905 t.needrex = map_reg_needrex[reg] 906 break 907 end 908 909 -- type[idx], type[idx].field, type->field -> [reg+offset_expr] 910 if not tp then werror("bad operand `"..param.."'") end 911 t.mode = "xm" 912 t.disp = format(tp.ctypefmt, tailr) 913 else 914 t.mode, t.imm = immexpr(expr) 915 if sub(t.mode, -1) == "J" then 916 if t.opsize and t.opsize ~= addrsize then 917 werror("bad operand size override") 918 end 919 t.opsize = addrsize 920 end 921 end 922 end 923 until true 924 return t 925end 926 927------------------------------------------------------------------------------ 928-- x86 Template String Description 929-- =============================== 930-- 931-- Each template string is a list of [match:]pattern pairs, 932-- separated by "|". The first match wins. No match means a 933-- bad or unsupported combination of operand modes or sizes. 934-- 935-- The match part and the ":" is omitted if the operation has 936-- no operands. Otherwise the first N characters are matched 937-- against the mode strings of each of the N operands. 938-- 939-- The mode string for each operand type is (see parseoperand()): 940-- Integer register: "rm", +"R" for eax, ax, al, +"C" for cl 941-- FP register: "f", +"F" for st0 942-- Index operand: "xm", +"O" for [disp] (pure offset) 943-- Immediate: "i", +"S" for signed 8 bit, +"1" for 1, 944-- +"I" for arg, +"P" for pointer 945-- Any: +"J" for valid jump targets 946-- 947-- So a match character "m" (mixed) matches both an integer register 948-- and an index operand (to be encoded with the ModRM/SIB scheme). 949-- But "r" matches only a register and "x" only an index operand 950-- (e.g. for FP memory access operations). 951-- 952-- The operand size match string starts right after the mode match 953-- characters and ends before the ":". "dwb" or "qdwb" is assumed, if empty. 954-- The effective data size of the operation is matched against this list. 955-- 956-- If only the regular "b", "w", "d", "q", "t" operand sizes are 957-- present, then all operands must be the same size. Unspecified sizes 958-- are ignored, but at least one operand must have a size or the pattern 959-- won't match (use the "byte", "word", "dword", "qword", "tword" 960-- operand size overrides. E.g.: mov dword [eax], 1). 961-- 962-- If the list has a "1" or "2" prefix, the operand size is taken 963-- from the respective operand and any other operand sizes are ignored. 964-- If the list contains only ".", all operand sizes are ignored. 965-- If the list has a "/" prefix, the concatenated (mixed) operand sizes 966-- are compared to the match. 967-- 968-- E.g. "rrdw" matches for either two dword registers or two word 969-- registers. "Fx2dq" matches an st0 operand plus an index operand 970-- pointing to a dword (float) or qword (double). 971-- 972-- Every character after the ":" is part of the pattern string: 973-- Hex chars are accumulated to form the opcode (left to right). 974-- "n" disables the standard opcode mods 975-- (otherwise: -1 for "b", o16 prefix for "w", rex.w for "q") 976-- "X" Force REX.W. 977-- "r"/"R" adds the reg. number from the 1st/2nd operand to the opcode. 978-- "m"/"M" generates ModRM/SIB from the 1st/2nd operand. 979-- The spare 3 bits are either filled with the last hex digit or 980-- the result from a previous "r"/"R". The opcode is restored. 981-- "u" Use VEX encoding, vvvv unused. 982-- "v"/"V" Use VEX encoding, vvvv from 1st/2nd operand (the operand is 983-- removed from the list used by future characters). 984-- "w" Use VEX encoding, vvvv from 3rd operand. 985-- "L" Force VEX.L 986-- 987-- All of the following characters force a flush of the opcode: 988-- "o"/"O" stores a pure 32 bit disp (offset) from the 1st/2nd operand. 989-- "s" stores a 4 bit immediate from the last register operand, 990-- followed by 4 zero bits. 991-- "S" stores a signed 8 bit immediate from the last operand. 992-- "U" stores an unsigned 8 bit immediate from the last operand. 993-- "W" stores an unsigned 16 bit immediate from the last operand. 994-- "i" stores an operand sized immediate from the last operand. 995-- "I" dito, but generates an action code to optionally modify 996-- the opcode (+2) for a signed 8 bit immediate. 997-- "J" generates one of the REL action codes from the last operand. 998-- 999------------------------------------------------------------------------------ 1000 1001-- Template strings for x86 instructions. Ordered by first opcode byte. 1002-- Unimplemented opcodes (deliberate omissions) are marked with *. 1003local map_op = { 1004 -- 00-05: add... 1005 -- 06: *push es 1006 -- 07: *pop es 1007 -- 08-0D: or... 1008 -- 0E: *push cs 1009 -- 0F: two byte opcode prefix 1010 -- 10-15: adc... 1011 -- 16: *push ss 1012 -- 17: *pop ss 1013 -- 18-1D: sbb... 1014 -- 1E: *push ds 1015 -- 1F: *pop ds 1016 -- 20-25: and... 1017 es_0 = "26", 1018 -- 27: *daa 1019 -- 28-2D: sub... 1020 cs_0 = "2E", 1021 -- 2F: *das 1022 -- 30-35: xor... 1023 ss_0 = "36", 1024 -- 37: *aaa 1025 -- 38-3D: cmp... 1026 ds_0 = "3E", 1027 -- 3F: *aas 1028 inc_1 = x64 and "m:FF0m" or "rdw:40r|m:FF0m", 1029 dec_1 = x64 and "m:FF1m" or "rdw:48r|m:FF1m", 1030 push_1 = (x64 and "rq:n50r|rw:50r|mq:nFF6m|mw:FF6m" or 1031 "rdw:50r|mdw:FF6m").."|S.:6AS|ib:n6Ai|i.:68i", 1032 pop_1 = x64 and "rq:n58r|rw:58r|mq:n8F0m|mw:8F0m" or "rdw:58r|mdw:8F0m", 1033 -- 60: *pusha, *pushad, *pushaw 1034 -- 61: *popa, *popad, *popaw 1035 -- 62: *bound rdw,x 1036 -- 63: x86: *arpl mw,rw 1037 movsxd_2 = x64 and "rm/qd:63rM", 1038 fs_0 = "64", 1039 gs_0 = "65", 1040 o16_0 = "66", 1041 a16_0 = not x64 and "67" or nil, 1042 a32_0 = x64 and "67", 1043 -- 68: push idw 1044 -- 69: imul rdw,mdw,idw 1045 -- 6A: push ib 1046 -- 6B: imul rdw,mdw,S 1047 -- 6C: *insb 1048 -- 6D: *insd, *insw 1049 -- 6E: *outsb 1050 -- 6F: *outsd, *outsw 1051 -- 70-7F: jcc lb 1052 -- 80: add... mb,i 1053 -- 81: add... mdw,i 1054 -- 82: *undefined 1055 -- 83: add... mdw,S 1056 test_2 = "mr:85Rm|rm:85rM|Ri:A9ri|mi:F70mi", 1057 -- 86: xchg rb,mb 1058 -- 87: xchg rdw,mdw 1059 -- 88: mov mb,r 1060 -- 89: mov mdw,r 1061 -- 8A: mov r,mb 1062 -- 8B: mov r,mdw 1063 -- 8C: *mov mdw,seg 1064 lea_2 = "rx1dq:8DrM", 1065 -- 8E: *mov seg,mdw 1066 -- 8F: pop mdw 1067 nop_0 = "90", 1068 xchg_2 = "Rrqdw:90R|rRqdw:90r|rm:87rM|mr:87Rm", 1069 cbw_0 = "6698", 1070 cwde_0 = "98", 1071 cdqe_0 = "4898", 1072 cwd_0 = "6699", 1073 cdq_0 = "99", 1074 cqo_0 = "4899", 1075 -- 9A: *call iw:idw 1076 wait_0 = "9B", 1077 fwait_0 = "9B", 1078 pushf_0 = "9C", 1079 pushfd_0 = not x64 and "9C", 1080 pushfq_0 = x64 and "9C", 1081 popf_0 = "9D", 1082 popfd_0 = not x64 and "9D", 1083 popfq_0 = x64 and "9D", 1084 sahf_0 = "9E", 1085 lahf_0 = "9F", 1086 mov_2 = "OR:A3o|RO:A1O|mr:89Rm|rm:8BrM|rib:nB0ri|ridw:B8ri|mi:C70mi", 1087 movsb_0 = "A4", 1088 movsw_0 = "66A5", 1089 movsd_0 = "A5", 1090 cmpsb_0 = "A6", 1091 cmpsw_0 = "66A7", 1092 cmpsd_0 = "A7", 1093 -- A8: test Rb,i 1094 -- A9: test Rdw,i 1095 stosb_0 = "AA", 1096 stosw_0 = "66AB", 1097 stosd_0 = "AB", 1098 lodsb_0 = "AC", 1099 lodsw_0 = "66AD", 1100 lodsd_0 = "AD", 1101 scasb_0 = "AE", 1102 scasw_0 = "66AF", 1103 scasd_0 = "AF", 1104 -- B0-B7: mov rb,i 1105 -- B8-BF: mov rdw,i 1106 -- C0: rol... mb,i 1107 -- C1: rol... mdw,i 1108 ret_1 = "i.:nC2W", 1109 ret_0 = "C3", 1110 -- C4: *les rdw,mq 1111 -- C5: *lds rdw,mq 1112 -- C6: mov mb,i 1113 -- C7: mov mdw,i 1114 -- C8: *enter iw,ib 1115 leave_0 = "C9", 1116 -- CA: *retf iw 1117 -- CB: *retf 1118 int3_0 = "CC", 1119 int_1 = "i.:nCDU", 1120 into_0 = "CE", 1121 -- CF: *iret 1122 -- D0: rol... mb,1 1123 -- D1: rol... mdw,1 1124 -- D2: rol... mb,cl 1125 -- D3: rol... mb,cl 1126 -- D4: *aam ib 1127 -- D5: *aad ib 1128 -- D6: *salc 1129 -- D7: *xlat 1130 -- D8-DF: floating point ops 1131 -- E0: *loopne 1132 -- E1: *loope 1133 -- E2: *loop 1134 -- E3: *jcxz, *jecxz 1135 -- E4: *in Rb,ib 1136 -- E5: *in Rdw,ib 1137 -- E6: *out ib,Rb 1138 -- E7: *out ib,Rdw 1139 call_1 = x64 and "mq:nFF2m|J.:E8nJ" or "md:FF2m|J.:E8J", 1140 jmp_1 = x64 and "mq:nFF4m|J.:E9nJ" or "md:FF4m|J.:E9J", -- short: EB 1141 -- EA: *jmp iw:idw 1142 -- EB: jmp ib 1143 -- EC: *in Rb,dx 1144 -- ED: *in Rdw,dx 1145 -- EE: *out dx,Rb 1146 -- EF: *out dx,Rdw 1147 lock_0 = "F0", 1148 int1_0 = "F1", 1149 repne_0 = "F2", 1150 repnz_0 = "F2", 1151 rep_0 = "F3", 1152 repe_0 = "F3", 1153 repz_0 = "F3", 1154 endbr32_0 = "F30F1EFB", 1155 endbr64_0 = "F30F1EFA", 1156 -- F4: *hlt 1157 cmc_0 = "F5", 1158 -- F6: test... mb,i; div... mb 1159 -- F7: test... mdw,i; div... mdw 1160 clc_0 = "F8", 1161 stc_0 = "F9", 1162 -- FA: *cli 1163 cld_0 = "FC", 1164 std_0 = "FD", 1165 -- FE: inc... mb 1166 -- FF: inc... mdw 1167 1168 -- misc ops 1169 not_1 = "m:F72m", 1170 neg_1 = "m:F73m", 1171 mul_1 = "m:F74m", 1172 imul_1 = "m:F75m", 1173 div_1 = "m:F76m", 1174 idiv_1 = "m:F77m", 1175 1176 imul_2 = "rmqdw:0FAFrM|rIqdw:69rmI|rSqdw:6BrmS|riqdw:69rmi", 1177 imul_3 = "rmIqdw:69rMI|rmSqdw:6BrMS|rmiqdw:69rMi", 1178 1179 movzx_2 = "rm/db:0FB6rM|rm/qb:|rm/wb:0FB6rM|rm/dw:0FB7rM|rm/qw:", 1180 movsx_2 = "rm/db:0FBErM|rm/qb:|rm/wb:0FBErM|rm/dw:0FBFrM|rm/qw:", 1181 1182 bswap_1 = "rqd:0FC8r", 1183 bsf_2 = "rmqdw:0FBCrM", 1184 bsr_2 = "rmqdw:0FBDrM", 1185 bt_2 = "mrqdw:0FA3Rm|miqdw:0FBA4mU", 1186 btc_2 = "mrqdw:0FBBRm|miqdw:0FBA7mU", 1187 btr_2 = "mrqdw:0FB3Rm|miqdw:0FBA6mU", 1188 bts_2 = "mrqdw:0FABRm|miqdw:0FBA5mU", 1189 1190 shld_3 = "mriqdw:0FA4RmU|mrC/qq:0FA5Rm|mrC/dd:|mrC/ww:", 1191 shrd_3 = "mriqdw:0FACRmU|mrC/qq:0FADRm|mrC/dd:|mrC/ww:", 1192 1193 rdtsc_0 = "0F31", -- P1+ 1194 rdpmc_0 = "0F33", -- P6+ 1195 cpuid_0 = "0FA2", -- P1+ 1196 1197 -- floating point ops 1198 fst_1 = "ff:DDD0r|xd:D92m|xq:nDD2m", 1199 fstp_1 = "ff:DDD8r|xd:D93m|xq:nDD3m|xt:DB7m", 1200 fld_1 = "ff:D9C0r|xd:D90m|xq:nDD0m|xt:DB5m", 1201 1202 fpop_0 = "DDD8", -- Alias for fstp st0. 1203 1204 fist_1 = "xw:nDF2m|xd:DB2m", 1205 fistp_1 = "xw:nDF3m|xd:DB3m|xq:nDF7m", 1206 fild_1 = "xw:nDF0m|xd:DB0m|xq:nDF5m", 1207 1208 fxch_0 = "D9C9", 1209 fxch_1 = "ff:D9C8r", 1210 fxch_2 = "fFf:D9C8r|Fff:D9C8R", 1211 1212 fucom_1 = "ff:DDE0r", 1213 fucom_2 = "Fff:DDE0R", 1214 fucomp_1 = "ff:DDE8r", 1215 fucomp_2 = "Fff:DDE8R", 1216 fucomi_1 = "ff:DBE8r", -- P6+ 1217 fucomi_2 = "Fff:DBE8R", -- P6+ 1218 fucomip_1 = "ff:DFE8r", -- P6+ 1219 fucomip_2 = "Fff:DFE8R", -- P6+ 1220 fcomi_1 = "ff:DBF0r", -- P6+ 1221 fcomi_2 = "Fff:DBF0R", -- P6+ 1222 fcomip_1 = "ff:DFF0r", -- P6+ 1223 fcomip_2 = "Fff:DFF0R", -- P6+ 1224 fucompp_0 = "DAE9", 1225 fcompp_0 = "DED9", 1226 1227 fldenv_1 = "x.:D94m", 1228 fnstenv_1 = "x.:D96m", 1229 fstenv_1 = "x.:9BD96m", 1230 fldcw_1 = "xw:nD95m", 1231 fstcw_1 = "xw:n9BD97m", 1232 fnstcw_1 = "xw:nD97m", 1233 fstsw_1 = "Rw:n9BDFE0|xw:n9BDD7m", 1234 fnstsw_1 = "Rw:nDFE0|xw:nDD7m", 1235 fclex_0 = "9BDBE2", 1236 fnclex_0 = "DBE2", 1237 1238 fnop_0 = "D9D0", 1239 -- D9D1-D9DF: unassigned 1240 1241 fchs_0 = "D9E0", 1242 fabs_0 = "D9E1", 1243 -- D9E2: unassigned 1244 -- D9E3: unassigned 1245 ftst_0 = "D9E4", 1246 fxam_0 = "D9E5", 1247 -- D9E6: unassigned 1248 -- D9E7: unassigned 1249 fld1_0 = "D9E8", 1250 fldl2t_0 = "D9E9", 1251 fldl2e_0 = "D9EA", 1252 fldpi_0 = "D9EB", 1253 fldlg2_0 = "D9EC", 1254 fldln2_0 = "D9ED", 1255 fldz_0 = "D9EE", 1256 -- D9EF: unassigned 1257 1258 f2xm1_0 = "D9F0", 1259 fyl2x_0 = "D9F1", 1260 fptan_0 = "D9F2", 1261 fpatan_0 = "D9F3", 1262 fxtract_0 = "D9F4", 1263 fprem1_0 = "D9F5", 1264 fdecstp_0 = "D9F6", 1265 fincstp_0 = "D9F7", 1266 fprem_0 = "D9F8", 1267 fyl2xp1_0 = "D9F9", 1268 fsqrt_0 = "D9FA", 1269 fsincos_0 = "D9FB", 1270 frndint_0 = "D9FC", 1271 fscale_0 = "D9FD", 1272 fsin_0 = "D9FE", 1273 fcos_0 = "D9FF", 1274 1275 -- SSE, SSE2 1276 andnpd_2 = "rmo:660F55rM", 1277 andnps_2 = "rmo:0F55rM", 1278 andpd_2 = "rmo:660F54rM", 1279 andps_2 = "rmo:0F54rM", 1280 clflush_1 = "x.:0FAE7m", 1281 cmppd_3 = "rmio:660FC2rMU", 1282 cmpps_3 = "rmio:0FC2rMU", 1283 cmpsd_3 = "rrio:F20FC2rMU|rxi/oq:", 1284 cmpss_3 = "rrio:F30FC2rMU|rxi/od:", 1285 comisd_2 = "rro:660F2FrM|rx/oq:", 1286 comiss_2 = "rro:0F2FrM|rx/od:", 1287 cvtdq2pd_2 = "rro:F30FE6rM|rx/oq:", 1288 cvtdq2ps_2 = "rmo:0F5BrM", 1289 cvtpd2dq_2 = "rmo:F20FE6rM", 1290 cvtpd2ps_2 = "rmo:660F5ArM", 1291 cvtpi2pd_2 = "rx/oq:660F2ArM", 1292 cvtpi2ps_2 = "rx/oq:0F2ArM", 1293 cvtps2dq_2 = "rmo:660F5BrM", 1294 cvtps2pd_2 = "rro:0F5ArM|rx/oq:", 1295 cvtsd2si_2 = "rr/do:F20F2DrM|rr/qo:|rx/dq:|rxq:", 1296 cvtsd2ss_2 = "rro:F20F5ArM|rx/oq:", 1297 cvtsi2sd_2 = "rm/od:F20F2ArM|rm/oq:F20F2ArXM", 1298 cvtsi2ss_2 = "rm/od:F30F2ArM|rm/oq:F30F2ArXM", 1299 cvtss2sd_2 = "rro:F30F5ArM|rx/od:", 1300 cvtss2si_2 = "rr/do:F30F2DrM|rr/qo:|rxd:|rx/qd:", 1301 cvttpd2dq_2 = "rmo:660FE6rM", 1302 cvttps2dq_2 = "rmo:F30F5BrM", 1303 cvttsd2si_2 = "rr/do:F20F2CrM|rr/qo:|rx/dq:|rxq:", 1304 cvttss2si_2 = "rr/do:F30F2CrM|rr/qo:|rxd:|rx/qd:", 1305 fxsave_1 = "x.:0FAE0m", 1306 fxrstor_1 = "x.:0FAE1m", 1307 ldmxcsr_1 = "xd:0FAE2m", 1308 lfence_0 = "0FAEE8", 1309 maskmovdqu_2 = "rro:660FF7rM", 1310 mfence_0 = "0FAEF0", 1311 movapd_2 = "rmo:660F28rM|mro:660F29Rm", 1312 movaps_2 = "rmo:0F28rM|mro:0F29Rm", 1313 movd_2 = "rm/od:660F6ErM|rm/oq:660F6ErXM|mr/do:660F7ERm|mr/qo:", 1314 movdqa_2 = "rmo:660F6FrM|mro:660F7FRm", 1315 movdqu_2 = "rmo:F30F6FrM|mro:F30F7FRm", 1316 movhlps_2 = "rro:0F12rM", 1317 movhpd_2 = "rx/oq:660F16rM|xr/qo:n660F17Rm", 1318 movhps_2 = "rx/oq:0F16rM|xr/qo:n0F17Rm", 1319 movlhps_2 = "rro:0F16rM", 1320 movlpd_2 = "rx/oq:660F12rM|xr/qo:n660F13Rm", 1321 movlps_2 = "rx/oq:0F12rM|xr/qo:n0F13Rm", 1322 movmskpd_2 = "rr/do:660F50rM", 1323 movmskps_2 = "rr/do:0F50rM", 1324 movntdq_2 = "xro:660FE7Rm", 1325 movnti_2 = "xrqd:0FC3Rm", 1326 movntpd_2 = "xro:660F2BRm", 1327 movntps_2 = "xro:0F2BRm", 1328 movq_2 = "rro:F30F7ErM|rx/oq:|xr/qo:n660FD6Rm", 1329 movsd_2 = "rro:F20F10rM|rx/oq:|xr/qo:nF20F11Rm", 1330 movss_2 = "rro:F30F10rM|rx/od:|xr/do:F30F11Rm", 1331 movupd_2 = "rmo:660F10rM|mro:660F11Rm", 1332 movups_2 = "rmo:0F10rM|mro:0F11Rm", 1333 orpd_2 = "rmo:660F56rM", 1334 orps_2 = "rmo:0F56rM", 1335 pause_0 = "F390", 1336 pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nRmU", -- Mem op: SSE4.1 only. 1337 pinsrw_3 = "rri/od:660FC4rMU|rxi/ow:", 1338 pmovmskb_2 = "rr/do:660FD7rM", 1339 prefetchnta_1 = "xb:n0F180m", 1340 prefetcht0_1 = "xb:n0F181m", 1341 prefetcht1_1 = "xb:n0F182m", 1342 prefetcht2_1 = "xb:n0F183m", 1343 pshufd_3 = "rmio:660F70rMU", 1344 pshufhw_3 = "rmio:F30F70rMU", 1345 pshuflw_3 = "rmio:F20F70rMU", 1346 pslld_2 = "rmo:660FF2rM|rio:660F726mU", 1347 pslldq_2 = "rio:660F737mU", 1348 psllq_2 = "rmo:660FF3rM|rio:660F736mU", 1349 psllw_2 = "rmo:660FF1rM|rio:660F716mU", 1350 psrad_2 = "rmo:660FE2rM|rio:660F724mU", 1351 psraw_2 = "rmo:660FE1rM|rio:660F714mU", 1352 psrld_2 = "rmo:660FD2rM|rio:660F722mU", 1353 psrldq_2 = "rio:660F733mU", 1354 psrlq_2 = "rmo:660FD3rM|rio:660F732mU", 1355 psrlw_2 = "rmo:660FD1rM|rio:660F712mU", 1356 rcpps_2 = "rmo:0F53rM", 1357 rcpss_2 = "rro:F30F53rM|rx/od:", 1358 rsqrtps_2 = "rmo:0F52rM", 1359 rsqrtss_2 = "rmo:F30F52rM", 1360 sfence_0 = "0FAEF8", 1361 shufpd_3 = "rmio:660FC6rMU", 1362 shufps_3 = "rmio:0FC6rMU", 1363 stmxcsr_1 = "xd:0FAE3m", 1364 ucomisd_2 = "rro:660F2ErM|rx/oq:", 1365 ucomiss_2 = "rro:0F2ErM|rx/od:", 1366 unpckhpd_2 = "rmo:660F15rM", 1367 unpckhps_2 = "rmo:0F15rM", 1368 unpcklpd_2 = "rmo:660F14rM", 1369 unpcklps_2 = "rmo:0F14rM", 1370 xorpd_2 = "rmo:660F57rM", 1371 xorps_2 = "rmo:0F57rM", 1372 1373 -- SSE3 ops 1374 fisttp_1 = "xw:nDF1m|xd:DB1m|xq:nDD1m", 1375 addsubpd_2 = "rmo:660FD0rM", 1376 addsubps_2 = "rmo:F20FD0rM", 1377 haddpd_2 = "rmo:660F7CrM", 1378 haddps_2 = "rmo:F20F7CrM", 1379 hsubpd_2 = "rmo:660F7DrM", 1380 hsubps_2 = "rmo:F20F7DrM", 1381 lddqu_2 = "rxo:F20FF0rM", 1382 movddup_2 = "rmo:F20F12rM", 1383 movshdup_2 = "rmo:F30F16rM", 1384 movsldup_2 = "rmo:F30F12rM", 1385 1386 -- SSSE3 ops 1387 pabsb_2 = "rmo:660F381CrM", 1388 pabsd_2 = "rmo:660F381ErM", 1389 pabsw_2 = "rmo:660F381DrM", 1390 palignr_3 = "rmio:660F3A0FrMU", 1391 phaddd_2 = "rmo:660F3802rM", 1392 phaddsw_2 = "rmo:660F3803rM", 1393 phaddw_2 = "rmo:660F3801rM", 1394 phsubd_2 = "rmo:660F3806rM", 1395 phsubsw_2 = "rmo:660F3807rM", 1396 phsubw_2 = "rmo:660F3805rM", 1397 pmaddubsw_2 = "rmo:660F3804rM", 1398 pmulhrsw_2 = "rmo:660F380BrM", 1399 pshufb_2 = "rmo:660F3800rM", 1400 psignb_2 = "rmo:660F3808rM", 1401 psignd_2 = "rmo:660F380ArM", 1402 psignw_2 = "rmo:660F3809rM", 1403 1404 -- SSE4.1 ops 1405 blendpd_3 = "rmio:660F3A0DrMU", 1406 blendps_3 = "rmio:660F3A0CrMU", 1407 blendvpd_3 = "rmRo:660F3815rM", 1408 blendvps_3 = "rmRo:660F3814rM", 1409 dppd_3 = "rmio:660F3A41rMU", 1410 dpps_3 = "rmio:660F3A40rMU", 1411 extractps_3 = "mri/do:660F3A17RmU|rri/qo:660F3A17RXmU", 1412 insertps_3 = "rrio:660F3A41rMU|rxi/od:", 1413 movntdqa_2 = "rxo:660F382ArM", 1414 mpsadbw_3 = "rmio:660F3A42rMU", 1415 packusdw_2 = "rmo:660F382BrM", 1416 pblendvb_3 = "rmRo:660F3810rM", 1417 pblendw_3 = "rmio:660F3A0ErMU", 1418 pcmpeqq_2 = "rmo:660F3829rM", 1419 pextrb_3 = "rri/do:660F3A14nRmU|rri/qo:|xri/bo:", 1420 pextrd_3 = "mri/do:660F3A16RmU", 1421 pextrq_3 = "mri/qo:660F3A16RmU", 1422 -- pextrw is SSE2, mem operand is SSE4.1 only 1423 phminposuw_2 = "rmo:660F3841rM", 1424 pinsrb_3 = "rri/od:660F3A20nrMU|rxi/ob:", 1425 pinsrd_3 = "rmi/od:660F3A22rMU", 1426 pinsrq_3 = "rmi/oq:660F3A22rXMU", 1427 pmaxsb_2 = "rmo:660F383CrM", 1428 pmaxsd_2 = "rmo:660F383DrM", 1429 pmaxud_2 = "rmo:660F383FrM", 1430 pmaxuw_2 = "rmo:660F383ErM", 1431 pminsb_2 = "rmo:660F3838rM", 1432 pminsd_2 = "rmo:660F3839rM", 1433 pminud_2 = "rmo:660F383BrM", 1434 pminuw_2 = "rmo:660F383ArM", 1435 pmovsxbd_2 = "rro:660F3821rM|rx/od:", 1436 pmovsxbq_2 = "rro:660F3822rM|rx/ow:", 1437 pmovsxbw_2 = "rro:660F3820rM|rx/oq:", 1438 pmovsxdq_2 = "rro:660F3825rM|rx/oq:", 1439 pmovsxwd_2 = "rro:660F3823rM|rx/oq:", 1440 pmovsxwq_2 = "rro:660F3824rM|rx/od:", 1441 pmovzxbd_2 = "rro:660F3831rM|rx/od:", 1442 pmovzxbq_2 = "rro:660F3832rM|rx/ow:", 1443 pmovzxbw_2 = "rro:660F3830rM|rx/oq:", 1444 pmovzxdq_2 = "rro:660F3835rM|rx/oq:", 1445 pmovzxwd_2 = "rro:660F3833rM|rx/oq:", 1446 pmovzxwq_2 = "rro:660F3834rM|rx/od:", 1447 pmuldq_2 = "rmo:660F3828rM", 1448 pmulld_2 = "rmo:660F3840rM", 1449 ptest_2 = "rmo:660F3817rM", 1450 roundpd_3 = "rmio:660F3A09rMU", 1451 roundps_3 = "rmio:660F3A08rMU", 1452 roundsd_3 = "rrio:660F3A0BrMU|rxi/oq:", 1453 roundss_3 = "rrio:660F3A0ArMU|rxi/od:", 1454 1455 -- SSE4.2 ops 1456 crc32_2 = "rmqd:F20F38F1rM|rm/dw:66F20F38F1rM|rm/db:F20F38F0rM|rm/qb:", 1457 pcmpestri_3 = "rmio:660F3A61rMU", 1458 pcmpestrm_3 = "rmio:660F3A60rMU", 1459 pcmpgtq_2 = "rmo:660F3837rM", 1460 pcmpistri_3 = "rmio:660F3A63rMU", 1461 pcmpistrm_3 = "rmio:660F3A62rMU", 1462 popcnt_2 = "rmqdw:F30FB8rM", 1463 1464 -- SSE4a 1465 extrq_2 = "rro:660F79rM", 1466 extrq_3 = "riio:660F780mUU", 1467 insertq_2 = "rro:F20F79rM", 1468 insertq_4 = "rriio:F20F78rMUU", 1469 lzcnt_2 = "rmqdw:F30FBDrM", 1470 movntsd_2 = "xr/qo:nF20F2BRm", 1471 movntss_2 = "xr/do:F30F2BRm", 1472 -- popcnt is also in SSE4.2 1473 1474 -- AES-NI 1475 aesdec_2 = "rmo:660F38DErM", 1476 aesdeclast_2 = "rmo:660F38DFrM", 1477 aesenc_2 = "rmo:660F38DCrM", 1478 aesenclast_2 = "rmo:660F38DDrM", 1479 aesimc_2 = "rmo:660F38DBrM", 1480 aeskeygenassist_3 = "rmio:660F3ADFrMU", 1481 pclmulqdq_3 = "rmio:660F3A44rMU", 1482 1483 -- AVX FP ops 1484 vaddsubpd_3 = "rrmoy:660FVD0rM", 1485 vaddsubps_3 = "rrmoy:F20FVD0rM", 1486 vandpd_3 = "rrmoy:660FV54rM", 1487 vandps_3 = "rrmoy:0FV54rM", 1488 vandnpd_3 = "rrmoy:660FV55rM", 1489 vandnps_3 = "rrmoy:0FV55rM", 1490 vblendpd_4 = "rrmioy:660F3AV0DrMU", 1491 vblendps_4 = "rrmioy:660F3AV0CrMU", 1492 vblendvpd_4 = "rrmroy:660F3AV4BrMs", 1493 vblendvps_4 = "rrmroy:660F3AV4ArMs", 1494 vbroadcastf128_2 = "rx/yo:660F38u1ArM", 1495 vcmppd_4 = "rrmioy:660FVC2rMU", 1496 vcmpps_4 = "rrmioy:0FVC2rMU", 1497 vcmpsd_4 = "rrrio:F20FVC2rMU|rrxi/ooq:", 1498 vcmpss_4 = "rrrio:F30FVC2rMU|rrxi/ood:", 1499 vcomisd_2 = "rro:660Fu2FrM|rx/oq:", 1500 vcomiss_2 = "rro:0Fu2FrM|rx/od:", 1501 vcvtdq2pd_2 = "rro:F30FuE6rM|rx/oq:|rm/yo:", 1502 vcvtdq2ps_2 = "rmoy:0Fu5BrM", 1503 vcvtpd2dq_2 = "rmoy:F20FuE6rM", 1504 vcvtpd2ps_2 = "rmoy:660Fu5ArM", 1505 vcvtps2dq_2 = "rmoy:660Fu5BrM", 1506 vcvtps2pd_2 = "rro:0Fu5ArM|rx/oq:|rm/yo:", 1507 vcvtsd2si_2 = "rr/do:F20Fu2DrM|rx/dq:|rr/qo:|rxq:", 1508 vcvtsd2ss_3 = "rrro:F20FV5ArM|rrx/ooq:", 1509 vcvtsi2sd_3 = "rrm/ood:F20FV2ArM|rrm/ooq:F20FVX2ArM", 1510 vcvtsi2ss_3 = "rrm/ood:F30FV2ArM|rrm/ooq:F30FVX2ArM", 1511 vcvtss2sd_3 = "rrro:F30FV5ArM|rrx/ood:", 1512 vcvtss2si_2 = "rr/do:F30Fu2DrM|rxd:|rr/qo:|rx/qd:", 1513 vcvttpd2dq_2 = "rmo:660FuE6rM|rm/oy:660FuLE6rM", 1514 vcvttps2dq_2 = "rmoy:F30Fu5BrM", 1515 vcvttsd2si_2 = "rr/do:F20Fu2CrM|rx/dq:|rr/qo:|rxq:", 1516 vcvttss2si_2 = "rr/do:F30Fu2CrM|rxd:|rr/qo:|rx/qd:", 1517 vdppd_4 = "rrmio:660F3AV41rMU", 1518 vdpps_4 = "rrmioy:660F3AV40rMU", 1519 vextractf128_3 = "mri/oy:660F3AuL19RmU", 1520 vextractps_3 = "mri/do:660F3Au17RmU", 1521 vhaddpd_3 = "rrmoy:660FV7CrM", 1522 vhaddps_3 = "rrmoy:F20FV7CrM", 1523 vhsubpd_3 = "rrmoy:660FV7DrM", 1524 vhsubps_3 = "rrmoy:F20FV7DrM", 1525 vinsertf128_4 = "rrmi/yyo:660F3AV18rMU", 1526 vinsertps_4 = "rrrio:660F3AV21rMU|rrxi/ood:", 1527 vldmxcsr_1 = "xd:0FuAE2m", 1528 vmaskmovps_3 = "rrxoy:660F38V2CrM|xrroy:660F38V2ERm", 1529 vmaskmovpd_3 = "rrxoy:660F38V2DrM|xrroy:660F38V2FRm", 1530 vmovapd_2 = "rmoy:660Fu28rM|mroy:660Fu29Rm", 1531 vmovaps_2 = "rmoy:0Fu28rM|mroy:0Fu29Rm", 1532 vmovd_2 = "rm/od:660Fu6ErM|rm/oq:660FuX6ErM|mr/do:660Fu7ERm|mr/qo:", 1533 vmovq_2 = "rro:F30Fu7ErM|rx/oq:|xr/qo:660FuD6Rm", 1534 vmovddup_2 = "rmy:F20Fu12rM|rro:|rx/oq:", 1535 vmovhlps_3 = "rrro:0FV12rM", 1536 vmovhpd_2 = "xr/qo:660Fu17Rm", 1537 vmovhpd_3 = "rrx/ooq:660FV16rM", 1538 vmovhps_2 = "xr/qo:0Fu17Rm", 1539 vmovhps_3 = "rrx/ooq:0FV16rM", 1540 vmovlhps_3 = "rrro:0FV16rM", 1541 vmovlpd_2 = "xr/qo:660Fu13Rm", 1542 vmovlpd_3 = "rrx/ooq:660FV12rM", 1543 vmovlps_2 = "xr/qo:0Fu13Rm", 1544 vmovlps_3 = "rrx/ooq:0FV12rM", 1545 vmovmskpd_2 = "rr/do:660Fu50rM|rr/dy:660FuL50rM", 1546 vmovmskps_2 = "rr/do:0Fu50rM|rr/dy:0FuL50rM", 1547 vmovntpd_2 = "xroy:660Fu2BRm", 1548 vmovntps_2 = "xroy:0Fu2BRm", 1549 vmovsd_2 = "rx/oq:F20Fu10rM|xr/qo:F20Fu11Rm", 1550 vmovsd_3 = "rrro:F20FV10rM", 1551 vmovshdup_2 = "rmoy:F30Fu16rM", 1552 vmovsldup_2 = "rmoy:F30Fu12rM", 1553 vmovss_2 = "rx/od:F30Fu10rM|xr/do:F30Fu11Rm", 1554 vmovss_3 = "rrro:F30FV10rM", 1555 vmovupd_2 = "rmoy:660Fu10rM|mroy:660Fu11Rm", 1556 vmovups_2 = "rmoy:0Fu10rM|mroy:0Fu11Rm", 1557 vorpd_3 = "rrmoy:660FV56rM", 1558 vorps_3 = "rrmoy:0FV56rM", 1559 vpermilpd_3 = "rrmoy:660F38V0DrM|rmioy:660F3Au05rMU", 1560 vpermilps_3 = "rrmoy:660F38V0CrM|rmioy:660F3Au04rMU", 1561 vperm2f128_4 = "rrmiy:660F3AV06rMU", 1562 vptestpd_2 = "rmoy:660F38u0FrM", 1563 vptestps_2 = "rmoy:660F38u0ErM", 1564 vrcpps_2 = "rmoy:0Fu53rM", 1565 vrcpss_3 = "rrro:F30FV53rM|rrx/ood:", 1566 vrsqrtps_2 = "rmoy:0Fu52rM", 1567 vrsqrtss_3 = "rrro:F30FV52rM|rrx/ood:", 1568 vroundpd_3 = "rmioy:660F3Au09rMU", 1569 vroundps_3 = "rmioy:660F3Au08rMU", 1570 vroundsd_4 = "rrrio:660F3AV0BrMU|rrxi/ooq:", 1571 vroundss_4 = "rrrio:660F3AV0ArMU|rrxi/ood:", 1572 vshufpd_4 = "rrmioy:660FVC6rMU", 1573 vshufps_4 = "rrmioy:0FVC6rMU", 1574 vsqrtps_2 = "rmoy:0Fu51rM", 1575 vsqrtss_2 = "rro:F30Fu51rM|rx/od:", 1576 vsqrtpd_2 = "rmoy:660Fu51rM", 1577 vsqrtsd_2 = "rro:F20Fu51rM|rx/oq:", 1578 vstmxcsr_1 = "xd:0FuAE3m", 1579 vucomisd_2 = "rro:660Fu2ErM|rx/oq:", 1580 vucomiss_2 = "rro:0Fu2ErM|rx/od:", 1581 vunpckhpd_3 = "rrmoy:660FV15rM", 1582 vunpckhps_3 = "rrmoy:0FV15rM", 1583 vunpcklpd_3 = "rrmoy:660FV14rM", 1584 vunpcklps_3 = "rrmoy:0FV14rM", 1585 vxorpd_3 = "rrmoy:660FV57rM", 1586 vxorps_3 = "rrmoy:0FV57rM", 1587 vzeroall_0 = "0FuL77", 1588 vzeroupper_0 = "0Fu77", 1589 1590 -- AVX2 FP ops 1591 vbroadcastss_2 = "rx/od:660F38u18rM|rx/yd:|rro:|rr/yo:", 1592 vbroadcastsd_2 = "rx/yq:660F38u19rM|rr/yo:", 1593 -- *vgather* (!vsib) 1594 vpermpd_3 = "rmiy:660F3AuX01rMU", 1595 vpermps_3 = "rrmy:660F38V16rM", 1596 1597 -- AVX, AVX2 integer ops 1598 -- In general, xmm requires AVX, ymm requires AVX2. 1599 vaesdec_3 = "rrmo:660F38VDErM", 1600 vaesdeclast_3 = "rrmo:660F38VDFrM", 1601 vaesenc_3 = "rrmo:660F38VDCrM", 1602 vaesenclast_3 = "rrmo:660F38VDDrM", 1603 vaesimc_2 = "rmo:660F38uDBrM", 1604 vaeskeygenassist_3 = "rmio:660F3AuDFrMU", 1605 vlddqu_2 = "rxoy:F20FuF0rM", 1606 vmaskmovdqu_2 = "rro:660FuF7rM", 1607 vmovdqa_2 = "rmoy:660Fu6FrM|mroy:660Fu7FRm", 1608 vmovdqu_2 = "rmoy:F30Fu6FrM|mroy:F30Fu7FRm", 1609 vmovntdq_2 = "xroy:660FuE7Rm", 1610 vmovntdqa_2 = "rxoy:660F38u2ArM", 1611 vmpsadbw_4 = "rrmioy:660F3AV42rMU", 1612 vpabsb_2 = "rmoy:660F38u1CrM", 1613 vpabsd_2 = "rmoy:660F38u1ErM", 1614 vpabsw_2 = "rmoy:660F38u1DrM", 1615 vpackusdw_3 = "rrmoy:660F38V2BrM", 1616 vpalignr_4 = "rrmioy:660F3AV0FrMU", 1617 vpblendvb_4 = "rrmroy:660F3AV4CrMs", 1618 vpblendw_4 = "rrmioy:660F3AV0ErMU", 1619 vpclmulqdq_4 = "rrmio:660F3AV44rMU", 1620 vpcmpeqq_3 = "rrmoy:660F38V29rM", 1621 vpcmpestri_3 = "rmio:660F3Au61rMU", 1622 vpcmpestrm_3 = "rmio:660F3Au60rMU", 1623 vpcmpgtq_3 = "rrmoy:660F38V37rM", 1624 vpcmpistri_3 = "rmio:660F3Au63rMU", 1625 vpcmpistrm_3 = "rmio:660F3Au62rMU", 1626 vpextrb_3 = "rri/do:660F3Au14nRmU|rri/qo:|xri/bo:", 1627 vpextrw_3 = "rri/do:660FuC5rMU|xri/wo:660F3Au15nRmU", 1628 vpextrd_3 = "mri/do:660F3Au16RmU", 1629 vpextrq_3 = "mri/qo:660F3Au16RmU", 1630 vphaddw_3 = "rrmoy:660F38V01rM", 1631 vphaddd_3 = "rrmoy:660F38V02rM", 1632 vphaddsw_3 = "rrmoy:660F38V03rM", 1633 vphminposuw_2 = "rmo:660F38u41rM", 1634 vphsubw_3 = "rrmoy:660F38V05rM", 1635 vphsubd_3 = "rrmoy:660F38V06rM", 1636 vphsubsw_3 = "rrmoy:660F38V07rM", 1637 vpinsrb_4 = "rrri/ood:660F3AV20rMU|rrxi/oob:", 1638 vpinsrw_4 = "rrri/ood:660FVC4rMU|rrxi/oow:", 1639 vpinsrd_4 = "rrmi/ood:660F3AV22rMU", 1640 vpinsrq_4 = "rrmi/ooq:660F3AVX22rMU", 1641 vpmaddubsw_3 = "rrmoy:660F38V04rM", 1642 vpmaxsb_3 = "rrmoy:660F38V3CrM", 1643 vpmaxsd_3 = "rrmoy:660F38V3DrM", 1644 vpmaxuw_3 = "rrmoy:660F38V3ErM", 1645 vpmaxud_3 = "rrmoy:660F38V3FrM", 1646 vpminsb_3 = "rrmoy:660F38V38rM", 1647 vpminsd_3 = "rrmoy:660F38V39rM", 1648 vpminuw_3 = "rrmoy:660F38V3ArM", 1649 vpminud_3 = "rrmoy:660F38V3BrM", 1650 vpmovmskb_2 = "rr/do:660FuD7rM|rr/dy:660FuLD7rM", 1651 vpmovsxbw_2 = "rroy:660F38u20rM|rx/oq:|rx/yo:", 1652 vpmovsxbd_2 = "rroy:660F38u21rM|rx/od:|rx/yq:", 1653 vpmovsxbq_2 = "rroy:660F38u22rM|rx/ow:|rx/yd:", 1654 vpmovsxwd_2 = "rroy:660F38u23rM|rx/oq:|rx/yo:", 1655 vpmovsxwq_2 = "rroy:660F38u24rM|rx/od:|rx/yq:", 1656 vpmovsxdq_2 = "rroy:660F38u25rM|rx/oq:|rx/yo:", 1657 vpmovzxbw_2 = "rroy:660F38u30rM|rx/oq:|rx/yo:", 1658 vpmovzxbd_2 = "rroy:660F38u31rM|rx/od:|rx/yq:", 1659 vpmovzxbq_2 = "rroy:660F38u32rM|rx/ow:|rx/yd:", 1660 vpmovzxwd_2 = "rroy:660F38u33rM|rx/oq:|rx/yo:", 1661 vpmovzxwq_2 = "rroy:660F38u34rM|rx/od:|rx/yq:", 1662 vpmovzxdq_2 = "rroy:660F38u35rM|rx/oq:|rx/yo:", 1663 vpmuldq_3 = "rrmoy:660F38V28rM", 1664 vpmulhrsw_3 = "rrmoy:660F38V0BrM", 1665 vpmulld_3 = "rrmoy:660F38V40rM", 1666 vpshufb_3 = "rrmoy:660F38V00rM", 1667 vpshufd_3 = "rmioy:660Fu70rMU", 1668 vpshufhw_3 = "rmioy:F30Fu70rMU", 1669 vpshuflw_3 = "rmioy:F20Fu70rMU", 1670 vpsignb_3 = "rrmoy:660F38V08rM", 1671 vpsignw_3 = "rrmoy:660F38V09rM", 1672 vpsignd_3 = "rrmoy:660F38V0ArM", 1673 vpslldq_3 = "rrioy:660Fv737mU", 1674 vpsllw_3 = "rrmoy:660FVF1rM|rrioy:660Fv716mU", 1675 vpslld_3 = "rrmoy:660FVF2rM|rrioy:660Fv726mU", 1676 vpsllq_3 = "rrmoy:660FVF3rM|rrioy:660Fv736mU", 1677 vpsraw_3 = "rrmoy:660FVE1rM|rrioy:660Fv714mU", 1678 vpsrad_3 = "rrmoy:660FVE2rM|rrioy:660Fv724mU", 1679 vpsrldq_3 = "rrioy:660Fv733mU", 1680 vpsrlw_3 = "rrmoy:660FVD1rM|rrioy:660Fv712mU", 1681 vpsrld_3 = "rrmoy:660FVD2rM|rrioy:660Fv722mU", 1682 vpsrlq_3 = "rrmoy:660FVD3rM|rrioy:660Fv732mU", 1683 vptest_2 = "rmoy:660F38u17rM", 1684 1685 -- AVX2 integer ops 1686 vbroadcasti128_2 = "rx/yo:660F38u5ArM", 1687 vinserti128_4 = "rrmi/yyo:660F3AV38rMU", 1688 vextracti128_3 = "mri/oy:660F3AuL39RmU", 1689 vpblendd_4 = "rrmioy:660F3AV02rMU", 1690 vpbroadcastb_2 = "rro:660F38u78rM|rx/ob:|rr/yo:|rx/yb:", 1691 vpbroadcastw_2 = "rro:660F38u79rM|rx/ow:|rr/yo:|rx/yw:", 1692 vpbroadcastd_2 = "rro:660F38u58rM|rx/od:|rr/yo:|rx/yd:", 1693 vpbroadcastq_2 = "rro:660F38u59rM|rx/oq:|rr/yo:|rx/yq:", 1694 vpermd_3 = "rrmy:660F38V36rM", 1695 vpermq_3 = "rmiy:660F3AuX00rMU", 1696 -- *vpgather* (!vsib) 1697 vperm2i128_4 = "rrmiy:660F3AV46rMU", 1698 vpmaskmovd_3 = "rrxoy:660F38V8CrM|xrroy:660F38V8ERm", 1699 vpmaskmovq_3 = "rrxoy:660F38VX8CrM|xrroy:660F38VX8ERm", 1700 vpsllvd_3 = "rrmoy:660F38V47rM", 1701 vpsllvq_3 = "rrmoy:660F38VX47rM", 1702 vpsravd_3 = "rrmoy:660F38V46rM", 1703 vpsrlvd_3 = "rrmoy:660F38V45rM", 1704 vpsrlvq_3 = "rrmoy:660F38VX45rM", 1705 1706 -- Intel ADX 1707 adcx_2 = "rmqd:660F38F6rM", 1708 adox_2 = "rmqd:F30F38F6rM", 1709 1710 -- BMI1 1711 andn_3 = "rrmqd:0F38VF2rM", 1712 bextr_3 = "rmrqd:0F38wF7rM", 1713 blsi_2 = "rmqd:0F38vF33m", 1714 blsmsk_2 = "rmqd:0F38vF32m", 1715 blsr_2 = "rmqd:0F38vF31m", 1716 tzcnt_2 = "rmqdw:F30FBCrM", 1717 1718 -- BMI2 1719 bzhi_3 = "rmrqd:0F38wF5rM", 1720 mulx_3 = "rrmqd:F20F38VF6rM", 1721 pdep_3 = "rrmqd:F20F38VF5rM", 1722 pext_3 = "rrmqd:F30F38VF5rM", 1723 rorx_3 = "rmSqd:F20F3AuF0rMS", 1724 sarx_3 = "rmrqd:F30F38wF7rM", 1725 shrx_3 = "rmrqd:F20F38wF7rM", 1726 shlx_3 = "rmrqd:660F38wF7rM", 1727 1728 -- FMA3 1729 vfmaddsub132pd_3 = "rrmoy:660F38VX96rM", 1730 vfmaddsub132ps_3 = "rrmoy:660F38V96rM", 1731 vfmaddsub213pd_3 = "rrmoy:660F38VXA6rM", 1732 vfmaddsub213ps_3 = "rrmoy:660F38VA6rM", 1733 vfmaddsub231pd_3 = "rrmoy:660F38VXB6rM", 1734 vfmaddsub231ps_3 = "rrmoy:660F38VB6rM", 1735 1736 vfmsubadd132pd_3 = "rrmoy:660F38VX97rM", 1737 vfmsubadd132ps_3 = "rrmoy:660F38V97rM", 1738 vfmsubadd213pd_3 = "rrmoy:660F38VXA7rM", 1739 vfmsubadd213ps_3 = "rrmoy:660F38VA7rM", 1740 vfmsubadd231pd_3 = "rrmoy:660F38VXB7rM", 1741 vfmsubadd231ps_3 = "rrmoy:660F38VB7rM", 1742 1743 vfmadd132pd_3 = "rrmoy:660F38VX98rM", 1744 vfmadd132ps_3 = "rrmoy:660F38V98rM", 1745 vfmadd132sd_3 = "rrro:660F38VX99rM|rrx/ooq:", 1746 vfmadd132ss_3 = "rrro:660F38V99rM|rrx/ood:", 1747 vfmadd213pd_3 = "rrmoy:660F38VXA8rM", 1748 vfmadd213ps_3 = "rrmoy:660F38VA8rM", 1749 vfmadd213sd_3 = "rrro:660F38VXA9rM|rrx/ooq:", 1750 vfmadd213ss_3 = "rrro:660F38VA9rM|rrx/ood:", 1751 vfmadd231pd_3 = "rrmoy:660F38VXB8rM", 1752 vfmadd231ps_3 = "rrmoy:660F38VB8rM", 1753 vfmadd231sd_3 = "rrro:660F38VXB9rM|rrx/ooq:", 1754 vfmadd231ss_3 = "rrro:660F38VB9rM|rrx/ood:", 1755 1756 vfmsub132pd_3 = "rrmoy:660F38VX9ArM", 1757 vfmsub132ps_3 = "rrmoy:660F38V9ArM", 1758 vfmsub132sd_3 = "rrro:660F38VX9BrM|rrx/ooq:", 1759 vfmsub132ss_3 = "rrro:660F38V9BrM|rrx/ood:", 1760 vfmsub213pd_3 = "rrmoy:660F38VXAArM", 1761 vfmsub213ps_3 = "rrmoy:660F38VAArM", 1762 vfmsub213sd_3 = "rrro:660F38VXABrM|rrx/ooq:", 1763 vfmsub213ss_3 = "rrro:660F38VABrM|rrx/ood:", 1764 vfmsub231pd_3 = "rrmoy:660F38VXBArM", 1765 vfmsub231ps_3 = "rrmoy:660F38VBArM", 1766 vfmsub231sd_3 = "rrro:660F38VXBBrM|rrx/ooq:", 1767 vfmsub231ss_3 = "rrro:660F38VBBrM|rrx/ood:", 1768 1769 vfnmadd132pd_3 = "rrmoy:660F38VX9CrM", 1770 vfnmadd132ps_3 = "rrmoy:660F38V9CrM", 1771 vfnmadd132sd_3 = "rrro:660F38VX9DrM|rrx/ooq:", 1772 vfnmadd132ss_3 = "rrro:660F38V9DrM|rrx/ood:", 1773 vfnmadd213pd_3 = "rrmoy:660F38VXACrM", 1774 vfnmadd213ps_3 = "rrmoy:660F38VACrM", 1775 vfnmadd213sd_3 = "rrro:660F38VXADrM|rrx/ooq:", 1776 vfnmadd213ss_3 = "rrro:660F38VADrM|rrx/ood:", 1777 vfnmadd231pd_3 = "rrmoy:660F38VXBCrM", 1778 vfnmadd231ps_3 = "rrmoy:660F38VBCrM", 1779 vfnmadd231sd_3 = "rrro:660F38VXBDrM|rrx/ooq:", 1780 vfnmadd231ss_3 = "rrro:660F38VBDrM|rrx/ood:", 1781 1782 vfnmsub132pd_3 = "rrmoy:660F38VX9ErM", 1783 vfnmsub132ps_3 = "rrmoy:660F38V9ErM", 1784 vfnmsub132sd_3 = "rrro:660F38VX9FrM|rrx/ooq:", 1785 vfnmsub132ss_3 = "rrro:660F38V9FrM|rrx/ood:", 1786 vfnmsub213pd_3 = "rrmoy:660F38VXAErM", 1787 vfnmsub213ps_3 = "rrmoy:660F38VAErM", 1788 vfnmsub213sd_3 = "rrro:660F38VXAFrM|rrx/ooq:", 1789 vfnmsub213ss_3 = "rrro:660F38VAFrM|rrx/ood:", 1790 vfnmsub231pd_3 = "rrmoy:660F38VXBErM", 1791 vfnmsub231ps_3 = "rrmoy:660F38VBErM", 1792 vfnmsub231sd_3 = "rrro:660F38VXBFrM|rrx/ooq:", 1793 vfnmsub231ss_3 = "rrro:660F38VBFrM|rrx/ood:", 1794} 1795 1796------------------------------------------------------------------------------ 1797 1798-- Arithmetic ops. 1799for name,n in pairs{ add = 0, ["or"] = 1, adc = 2, sbb = 3, 1800 ["and"] = 4, sub = 5, xor = 6, cmp = 7 } do 1801 local n8 = shl(n, 3) 1802 map_op[name.."_2"] = format( 1803 "mr:%02XRm|rm:%02XrM|mI1qdw:81%XmI|mS1qdw:83%XmS|Ri1qdwb:%02Xri|mi1qdwb:81%Xmi", 1804 1+n8, 3+n8, n, n, 5+n8, n) 1805end 1806 1807-- Shift ops. 1808for name,n in pairs{ rol = 0, ror = 1, rcl = 2, rcr = 3, 1809 shl = 4, shr = 5, sar = 7, sal = 4 } do 1810 map_op[name.."_2"] = format("m1:D1%Xm|mC1qdwb:D3%Xm|mi:C1%XmU", n, n, n) 1811end 1812 1813-- Conditional ops. 1814for cc,n in pairs(map_cc) do 1815 map_op["j"..cc.."_1"] = format("J.:n0F8%XJ", n) -- short: 7%X 1816 map_op["set"..cc.."_1"] = format("mb:n0F9%X2m", n) 1817 map_op["cmov"..cc.."_2"] = format("rmqdw:0F4%XrM", n) -- P6+ 1818end 1819 1820-- FP arithmetic ops. 1821for name,n in pairs{ add = 0, mul = 1, com = 2, comp = 3, 1822 sub = 4, subr = 5, div = 6, divr = 7 } do 1823 local nc = 0xc0 + shl(n, 3) 1824 local nr = nc + (n < 4 and 0 or (n % 2 == 0 and 8 or -8)) 1825 local fn = "f"..name 1826 map_op[fn.."_1"] = format("ff:D8%02Xr|xd:D8%Xm|xq:nDC%Xm", nc, n, n) 1827 if n == 2 or n == 3 then 1828 map_op[fn.."_2"] = format("Fff:D8%02XR|Fx2d:D8%XM|Fx2q:nDC%XM", nc, n, n) 1829 else 1830 map_op[fn.."_2"] = format("Fff:D8%02XR|fFf:DC%02Xr|Fx2d:D8%XM|Fx2q:nDC%XM", nc, nr, n, n) 1831 map_op[fn.."p_1"] = format("ff:DE%02Xr", nr) 1832 map_op[fn.."p_2"] = format("fFf:DE%02Xr", nr) 1833 end 1834 map_op["fi"..name.."_1"] = format("xd:DA%Xm|xw:nDE%Xm", n, n) 1835end 1836 1837-- FP conditional moves. 1838for cc,n in pairs{ b=0, e=1, be=2, u=3, nb=4, ne=5, nbe=6, nu=7 } do 1839 local nc = 0xdac0 + shl(band(n, 3), 3) + shl(band(n, 4), 6) 1840 map_op["fcmov"..cc.."_1"] = format("ff:%04Xr", nc) -- P6+ 1841 map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+ 1842end 1843 1844-- SSE / AVX FP arithmetic ops. 1845for name,n in pairs{ sqrt = 1, add = 8, mul = 9, 1846 sub = 12, min = 13, div = 14, max = 15 } do 1847 map_op[name.."ps_2"] = format("rmo:0F5%XrM", n) 1848 map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n) 1849 map_op[name.."pd_2"] = format("rmo:660F5%XrM", n) 1850 map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n) 1851 if n ~= 1 then 1852 map_op["v"..name.."ps_3"] = format("rrmoy:0FV5%XrM", n) 1853 map_op["v"..name.."ss_3"] = format("rrro:F30FV5%XrM|rrx/ood:", n) 1854 map_op["v"..name.."pd_3"] = format("rrmoy:660FV5%XrM", n) 1855 map_op["v"..name.."sd_3"] = format("rrro:F20FV5%XrM|rrx/ooq:", n) 1856 end 1857end 1858 1859-- SSE2 / AVX / AVX2 integer arithmetic ops (66 0F leaf). 1860for name,n in pairs{ 1861 paddb = 0xFC, paddw = 0xFD, paddd = 0xFE, paddq = 0xD4, 1862 paddsb = 0xEC, paddsw = 0xED, packssdw = 0x6B, 1863 packsswb = 0x63, packuswb = 0x67, paddusb = 0xDC, 1864 paddusw = 0xDD, pand = 0xDB, pandn = 0xDF, pavgb = 0xE0, 1865 pavgw = 0xE3, pcmpeqb = 0x74, pcmpeqd = 0x76, 1866 pcmpeqw = 0x75, pcmpgtb = 0x64, pcmpgtd = 0x66, 1867 pcmpgtw = 0x65, pmaddwd = 0xF5, pmaxsw = 0xEE, 1868 pmaxub = 0xDE, pminsw = 0xEA, pminub = 0xDA, 1869 pmulhuw = 0xE4, pmulhw = 0xE5, pmullw = 0xD5, 1870 pmuludq = 0xF4, por = 0xEB, psadbw = 0xF6, psubb = 0xF8, 1871 psubw = 0xF9, psubd = 0xFA, psubq = 0xFB, psubsb = 0xE8, 1872 psubsw = 0xE9, psubusb = 0xD8, psubusw = 0xD9, 1873 punpckhbw = 0x68, punpckhwd = 0x69, punpckhdq = 0x6A, 1874 punpckhqdq = 0x6D, punpcklbw = 0x60, punpcklwd = 0x61, 1875 punpckldq = 0x62, punpcklqdq = 0x6C, pxor = 0xEF 1876} do 1877 map_op[name.."_2"] = format("rmo:660F%02XrM", n) 1878 map_op["v"..name.."_3"] = format("rrmoy:660FV%02XrM", n) 1879end 1880 1881------------------------------------------------------------------------------ 1882 1883local map_vexarg = { u = false, v = 1, V = 2, w = 3 } 1884 1885-- Process pattern string. 1886local function dopattern(pat, args, sz, op, needrex) 1887 local digit, addin, vex 1888 local opcode = 0 1889 local szov = sz 1890 local narg = 1 1891 local rex = 0 1892 1893 -- Limit number of section buffer positions used by a single dasm_put(). 1894 -- A single opcode needs a maximum of 6 positions. 1895 if secpos+6 > maxsecpos then wflush() end 1896 1897 -- Process each character. 1898 for c in gmatch(pat.."|", ".") do 1899 if match(c, "%x") then -- Hex digit. 1900 digit = byte(c) - 48 1901 if digit > 48 then digit = digit - 39 1902 elseif digit > 16 then digit = digit - 7 end 1903 opcode = opcode*16 + digit 1904 addin = nil 1905 elseif c == "n" then -- Disable operand size mods for opcode. 1906 szov = nil 1907 elseif c == "X" then -- Force REX.W. 1908 rex = 8 1909 elseif c == "L" then -- Force VEX.L. 1910 vex.l = true 1911 elseif c == "r" then -- Merge 1st operand regno. into opcode. 1912 addin = args[1]; opcode = opcode + (addin.reg % 8) 1913 if narg < 2 then narg = 2 end 1914 elseif c == "R" then -- Merge 2nd operand regno. into opcode. 1915 addin = args[2]; opcode = opcode + (addin.reg % 8) 1916 narg = 3 1917 elseif c == "m" or c == "M" then -- Encode ModRM/SIB. 1918 local s 1919 if addin then 1920 s = addin.reg 1921 opcode = opcode - band(s, 7) -- Undo regno opcode merge. 1922 else 1923 s = band(opcode, 15) -- Undo last digit. 1924 opcode = shr(opcode, 4) 1925 end 1926 local nn = c == "m" and 1 or 2 1927 local t = args[nn] 1928 if narg <= nn then narg = nn + 1 end 1929 if szov == "q" and rex == 0 then rex = rex + 8 end 1930 if t.reg and t.reg > 7 then rex = rex + 1 end 1931 if t.xreg and t.xreg > 7 then rex = rex + 2 end 1932 if s > 7 then rex = rex + 4 end 1933 if needrex then rex = rex + 16 end 1934 local psz, sk = wputop(szov, opcode, rex, vex, s < 0, t.vreg or t.vxreg) 1935 opcode = nil 1936 local imark = sub(pat, -1) -- Force a mark (ugly). 1937 -- Put ModRM/SIB with regno/last digit as spare. 1938 wputmrmsib(t, imark, s, addin and addin.vreg, psz, sk) 1939 addin = nil 1940 elseif map_vexarg[c] ~= nil then -- Encode using VEX prefix 1941 local b = band(opcode, 255); opcode = shr(opcode, 8) 1942 local m = 1 1943 if b == 0x38 then m = 2 1944 elseif b == 0x3a then m = 3 end 1945 if m ~= 1 then b = band(opcode, 255); opcode = shr(opcode, 8) end 1946 if b ~= 0x0f then 1947 werror("expected `0F', `0F38', or `0F3A' to precede `"..c.. 1948 "' in pattern `"..pat.."' for `"..op.."'") 1949 end 1950 local v = map_vexarg[c] 1951 if v then v = remove(args, v) end 1952 b = band(opcode, 255) 1953 local p = 0 1954 if b == 0x66 then p = 1 1955 elseif b == 0xf3 then p = 2 1956 elseif b == 0xf2 then p = 3 end 1957 if p ~= 0 then opcode = shr(opcode, 8) end 1958 if opcode ~= 0 then wputop(nil, opcode, 0); opcode = 0 end 1959 vex = { m = m, p = p, v = v } 1960 else 1961 if opcode then -- Flush opcode. 1962 if szov == "q" and rex == 0 then rex = rex + 8 end 1963 if needrex then rex = rex + 16 end 1964 if addin and addin.reg == -1 then 1965 local psz, sk = wputop(szov, opcode - 7, rex, vex, true) 1966 wvreg("opcode", addin.vreg, psz, sk) 1967 else 1968 if addin and addin.reg > 7 then rex = rex + 1 end 1969 wputop(szov, opcode, rex, vex) 1970 end 1971 opcode = nil 1972 end 1973 if c == "|" then break end 1974 if c == "o" then -- Offset (pure 32 bit displacement). 1975 wputdarg(args[1].disp); if narg < 2 then narg = 2 end 1976 elseif c == "O" then 1977 wputdarg(args[2].disp); narg = 3 1978 else 1979 -- Anything else is an immediate operand. 1980 local a = args[narg] 1981 narg = narg + 1 1982 local mode, imm = a.mode, a.imm 1983 if mode == "iJ" and not match(x64 and "J" or "iIJ", c) then 1984 werror("bad operand size for label") 1985 end 1986 if c == "S" then 1987 wputsbarg(imm) 1988 elseif c == "U" then 1989 wputbarg(imm) 1990 elseif c == "W" then 1991 wputwarg(imm) 1992 elseif c == "i" or c == "I" then 1993 if mode == "iJ" then 1994 wputlabel("IMM_", imm, 1) 1995 elseif mode == "iI" and c == "I" then 1996 waction(sz == "w" and "IMM_WB" or "IMM_DB", imm) 1997 else 1998 wputszarg(sz, imm) 1999 end 2000 elseif c == "J" then 2001 if mode == "iPJ" then 2002 waction("REL_A", imm) -- !x64 (secpos) 2003 else 2004 wputlabel("REL_", imm, 2) 2005 end 2006 elseif c == "s" then 2007 local reg = a.reg 2008 if reg < 0 then 2009 wputb(0) 2010 wvreg("imm.hi", a.vreg) 2011 else 2012 wputb(shl(reg, 4)) 2013 end 2014 else 2015 werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'") 2016 end 2017 end 2018 end 2019 end 2020end 2021 2022------------------------------------------------------------------------------ 2023 2024-- Mapping of operand modes to short names. Suppress output with '#'. 2025local map_modename = { 2026 r = "reg", R = "eax", C = "cl", x = "mem", m = "mrm", i = "imm", 2027 f = "stx", F = "st0", J = "lbl", ["1"] = "1", 2028 I = "#", S = "#", O = "#", 2029} 2030 2031-- Return a table/string showing all possible operand modes. 2032local function templatehelp(template, nparams) 2033 if nparams == 0 then return "" end 2034 local t = {} 2035 for tm in gmatch(template, "[^%|]+") do 2036 local s = map_modename[sub(tm, 1, 1)] 2037 s = s..gsub(sub(tm, 2, nparams), ".", function(c) 2038 return ", "..map_modename[c] 2039 end) 2040 if not match(s, "#") then t[#t+1] = s end 2041 end 2042 return t 2043end 2044 2045-- Match operand modes against mode match part of template. 2046local function matchtm(tm, args) 2047 for i=1,#args do 2048 if not match(args[i].mode, sub(tm, i, i)) then return end 2049 end 2050 return true 2051end 2052 2053-- Handle opcodes defined with template strings. 2054map_op[".template__"] = function(params, template, nparams) 2055 if not params then return templatehelp(template, nparams) end 2056 local args = {} 2057 2058 -- Zero-operand opcodes have no match part. 2059 if #params == 0 then 2060 dopattern(template, args, "d", params.op, nil) 2061 return 2062 end 2063 2064 -- Determine common operand size (coerce undefined size) or flag as mixed. 2065 local sz, szmix, needrex 2066 for i,p in ipairs(params) do 2067 args[i] = parseoperand(p) 2068 local nsz = args[i].opsize 2069 if nsz then 2070 if sz and sz ~= nsz then szmix = true else sz = nsz end 2071 end 2072 local nrex = args[i].needrex 2073 if nrex ~= nil then 2074 if needrex == nil then 2075 needrex = nrex 2076 elseif needrex ~= nrex then 2077 werror("bad mix of byte-addressable registers") 2078 end 2079 end 2080 end 2081 2082 -- Try all match:pattern pairs (separated by '|'). 2083 local gotmatch, lastpat 2084 for tm in gmatch(template, "[^%|]+") do 2085 -- Split off size match (starts after mode match) and pattern string. 2086 local szm, pat = match(tm, "^(.-):(.*)$", #args+1) 2087 if pat == "" then pat = lastpat else lastpat = pat end 2088 if matchtm(tm, args) then 2089 local prefix = sub(szm, 1, 1) 2090 if prefix == "/" then -- Exactly match leading operand sizes. 2091 for i = #szm,1,-1 do 2092 if i == 1 then 2093 dopattern(pat, args, sz, params.op, needrex) -- Process pattern. 2094 return 2095 elseif args[i-1].opsize ~= sub(szm, i, i) then 2096 break 2097 end 2098 end 2099 else -- Match common operand size. 2100 local szp = sz 2101 if szm == "" then szm = x64 and "qdwb" or "dwb" end -- Default sizes. 2102 if prefix == "1" then szp = args[1].opsize; szmix = nil 2103 elseif prefix == "2" then szp = args[2].opsize; szmix = nil end 2104 if not szmix and (prefix == "." or match(szm, szp or "#")) then 2105 dopattern(pat, args, szp, params.op, needrex) -- Process pattern. 2106 return 2107 end 2108 end 2109 gotmatch = true 2110 end 2111 end 2112 2113 local msg = "bad operand mode" 2114 if gotmatch then 2115 if szmix then 2116 msg = "mixed operand size" 2117 else 2118 msg = sz and "bad operand size" or "missing operand size" 2119 end 2120 end 2121 2122 werror(msg.." in `"..opmodestr(params.op, args).."'") 2123end 2124 2125------------------------------------------------------------------------------ 2126 2127-- x64-specific opcode for 64 bit immediates and displacements. 2128if x64 then 2129 function map_op.mov64_2(params) 2130 if not params then return { "reg, imm", "reg, [disp]", "[disp], reg" } end 2131 if secpos+2 > maxsecpos then wflush() end 2132 local opcode, op64, sz, rex, vreg 2133 local op64 = match(params[1], "^%[%s*(.-)%s*%]$") 2134 if op64 then 2135 local a = parseoperand(params[2]) 2136 if a.mode ~= "rmR" then werror("bad operand mode") end 2137 sz = a.opsize 2138 rex = sz == "q" and 8 or 0 2139 opcode = 0xa3 2140 else 2141 op64 = match(params[2], "^%[%s*(.-)%s*%]$") 2142 local a = parseoperand(params[1]) 2143 if op64 then 2144 if a.mode ~= "rmR" then werror("bad operand mode") end 2145 sz = a.opsize 2146 rex = sz == "q" and 8 or 0 2147 opcode = 0xa1 2148 else 2149 if sub(a.mode, 1, 1) ~= "r" or a.opsize ~= "q" then 2150 werror("bad operand mode") 2151 end 2152 op64 = params[2] 2153 if a.reg == -1 then 2154 vreg = a.vreg 2155 opcode = 0xb8 2156 else 2157 opcode = 0xb8 + band(a.reg, 7) 2158 end 2159 rex = a.reg > 7 and 9 or 8 2160 end 2161 end 2162 local psz, sk = wputop(sz, opcode, rex, nil, vreg) 2163 wvreg("opcode", vreg, psz, sk) 2164 waction("IMM_D", format("(unsigned int)(%s)", op64)) 2165 waction("IMM_D", format("(unsigned int)((%s)>>32)", op64)) 2166 end 2167end 2168 2169------------------------------------------------------------------------------ 2170 2171-- Pseudo-opcodes for data storage. 2172local function op_data(params) 2173 if not params then return "imm..." end 2174 local sz = sub(params.op, 2, 2) 2175 if sz == "l" then sz = "d" elseif sz == "a" then sz = addrsize end 2176 for _,p in ipairs(params) do 2177 local a = parseoperand(p, sz == "q") 2178 if sub(a.mode, 1, 1) ~= "i" or (a.opsize and a.opsize ~= sz) then 2179 werror("bad mode or size in `"..p.."'") 2180 end 2181 if a.mode == "iJ" then 2182 wputlabel("IMM_", a.imm, 1) 2183 elseif sz == "q" then 2184 wputqarg(a.imm) 2185 else 2186 wputszarg(sz, a.imm) 2187 end 2188 if secpos+2 > maxsecpos then wflush() end 2189 end 2190end 2191 2192map_op[".byte_*"] = op_data 2193map_op[".sbyte_*"] = op_data 2194map_op[".word_*"] = op_data 2195map_op[".dword_*"] = op_data 2196map_op[".qword_*"] = op_data 2197map_op[".aword_*"] = op_data 2198map_op[".long_*"] = op_data 2199map_op[".quad_*"] = op_data 2200map_op[".addr_*"] = op_data 2201 2202------------------------------------------------------------------------------ 2203 2204-- Pseudo-opcode to mark the position where the action list is to be emitted. 2205map_op[".actionlist_1"] = function(params) 2206 if not params then return "cvar" end 2207 local name = params[1] -- No syntax check. You get to keep the pieces. 2208 wline(function(out) writeactions(out, name) end) 2209end 2210 2211-- Pseudo-opcode to mark the position where the global enum is to be emitted. 2212map_op[".globals_1"] = function(params) 2213 if not params then return "prefix" end 2214 local prefix = params[1] -- No syntax check. You get to keep the pieces. 2215 wline(function(out) writeglobals(out, prefix) end) 2216end 2217 2218-- Pseudo-opcode to mark the position where the global names are to be emitted. 2219map_op[".globalnames_1"] = function(params) 2220 if not params then return "cvar" end 2221 local name = params[1] -- No syntax check. You get to keep the pieces. 2222 wline(function(out) writeglobalnames(out, name) end) 2223end 2224 2225-- Pseudo-opcode to mark the position where the extern names are to be emitted. 2226map_op[".externnames_1"] = function(params) 2227 if not params then return "cvar" end 2228 local name = params[1] -- No syntax check. You get to keep the pieces. 2229 wline(function(out) writeexternnames(out, name) end) 2230end 2231 2232------------------------------------------------------------------------------ 2233 2234-- Label pseudo-opcode (converted from trailing colon form). 2235map_op[".label_2"] = function(params) 2236 if not params then return "[1-9] | ->global | =>pcexpr [, addr]" end 2237 if secpos+2 > maxsecpos then wflush() end 2238 local a = parseoperand(params[1]) 2239 local mode, imm = a.mode, a.imm 2240 if type(imm) == "number" and (mode == "iJ" or (imm >= 1 and imm <= 9)) then 2241 -- Local label (1: ... 9:) or global label (->global:). 2242 waction("LABEL_LG", nil, 1) 2243 wputxb(imm) 2244 elseif mode == "iJ" then 2245 -- PC label (=>pcexpr:). 2246 waction("LABEL_PC", imm) 2247 else 2248 werror("bad label definition") 2249 end 2250 -- SETLABEL must immediately follow LABEL_LG/LABEL_PC. 2251 local addr = params[2] 2252 if addr then 2253 local a = parseoperand(addr) 2254 if a.mode == "iPJ" then 2255 waction("SETLABEL", a.imm) 2256 else 2257 werror("bad label assignment") 2258 end 2259 end 2260end 2261map_op[".label_1"] = map_op[".label_2"] 2262 2263------------------------------------------------------------------------------ 2264 2265-- Alignment pseudo-opcode. 2266map_op[".align_1"] = function(params) 2267 if not params then return "numpow2" end 2268 if secpos+1 > maxsecpos then wflush() end 2269 local align = tonumber(params[1]) or map_opsizenum[map_opsize[params[1]]] 2270 if align then 2271 local x = align 2272 -- Must be a power of 2 in the range (2 ... 256). 2273 for i=1,8 do 2274 x = x / 2 2275 if x == 1 then 2276 waction("ALIGN", nil, 1) 2277 wputxb(align-1) -- Action byte is 2**n-1. 2278 return 2279 end 2280 end 2281 end 2282 werror("bad alignment") 2283end 2284 2285-- Spacing pseudo-opcode. 2286map_op[".space_2"] = function(params) 2287 if not params then return "num [, filler]" end 2288 if secpos+1 > maxsecpos then wflush() end 2289 waction("SPACE", params[1]) 2290 local fill = params[2] 2291 if fill then 2292 fill = tonumber(fill) 2293 if not fill or fill < 0 or fill > 255 then werror("bad filler") end 2294 end 2295 wputxb(fill or 0) 2296end 2297map_op[".space_1"] = map_op[".space_2"] 2298 2299------------------------------------------------------------------------------ 2300 2301-- Pseudo-opcode for (primitive) type definitions (map to C types). 2302map_op[".type_3"] = function(params, nparams) 2303 if not params then 2304 return nparams == 2 and "name, ctype" or "name, ctype, reg" 2305 end 2306 local name, ctype, reg = params[1], params[2], params[3] 2307 if not match(name, "^[%a_][%w_]*$") then 2308 werror("bad type name `"..name.."'") 2309 end 2310 local tp = map_type[name] 2311 if tp then 2312 werror("duplicate type `"..name.."'") 2313 end 2314 if reg and not map_reg_valid_base[reg] then 2315 werror("bad base register `"..(map_reg_rev[reg] or reg).."'") 2316 end 2317 -- Add #type to defines. A bit unclean to put it in map_archdef. 2318 map_archdef["#"..name] = "sizeof("..ctype..")" 2319 -- Add new type and emit shortcut define. 2320 local num = ctypenum + 1 2321 map_type[name] = { 2322 ctype = ctype, 2323 ctypefmt = format("Dt%X(%%s)", num), 2324 reg = reg, 2325 } 2326 wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) 2327 ctypenum = num 2328end 2329map_op[".type_2"] = map_op[".type_3"] 2330 2331-- Dump type definitions. 2332local function dumptypes(out, lvl) 2333 local t = {} 2334 for name in pairs(map_type) do t[#t+1] = name end 2335 sort(t) 2336 out:write("Type definitions:\n") 2337 for _,name in ipairs(t) do 2338 local tp = map_type[name] 2339 local reg = tp.reg and map_reg_rev[tp.reg] or "" 2340 out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) 2341 end 2342 out:write("\n") 2343end 2344 2345------------------------------------------------------------------------------ 2346 2347-- Set the current section. 2348function _M.section(num) 2349 waction("SECTION") 2350 wputxb(num) 2351 wflush(true) -- SECTION is a terminal action. 2352end 2353 2354------------------------------------------------------------------------------ 2355 2356-- Dump architecture description. 2357function _M.dumparch(out) 2358 out:write(format("DynASM %s version %s, released %s\n\n", 2359 _info.arch, _info.version, _info.release)) 2360 dumpregs(out) 2361 dumpactions(out) 2362end 2363 2364-- Dump all user defined elements. 2365function _M.dumpdef(out, lvl) 2366 dumptypes(out, lvl) 2367 dumpglobals(out, lvl) 2368 dumpexterns(out, lvl) 2369end 2370 2371------------------------------------------------------------------------------ 2372 2373-- Pass callbacks from/to the DynASM core. 2374function _M.passcb(wl, we, wf, ww) 2375 wline, werror, wfatal, wwarn = wl, we, wf, ww 2376 return wflush 2377end 2378 2379-- Setup the arch-specific module. 2380function _M.setup(arch, opt) 2381 g_arch, g_opt = arch, opt 2382end 2383 2384-- Merge the core maps and the arch-specific maps. 2385function _M.mergemaps(map_coreop, map_def) 2386 setmetatable(map_op, { __index = map_coreop }) 2387 setmetatable(map_def, { __index = map_archdef }) 2388 return map_op, map_def 2389end 2390 2391return _M 2392 2393------------------------------------------------------------------------------ 2394 2395