1------------------------------------------------------------------------------ 2-- DynASM x86/x64 module. 3-- 4-- Copyright (C) 2005-2016 Mike Pall. All rights reserved. 5-- See dynasm.lua for full copyright notice. 6------------------------------------------------------------------------------ 7 8local x64 = x64 9 10-- Module information: 11local _info = { 12 arch = x64 and "x64" or "x86", 13 description = "DynASM x86/x64 module", 14 version = "1.4.0", 15 vernum = 10400, 16 release = "2015-10-18", 17 author = "Mike Pall", 18 license = "MIT", 19} 20 21-- Exported glue functions for the arch-specific module. 22local _M = { _info = _info } 23 24-- Cache library functions. 25local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs 26local assert, unpack, setmetatable = assert, unpack or table.unpack, setmetatable 27local _s = string 28local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char 29local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub 30local concat, sort, remove = table.concat, table.sort, table.remove 31local bit = bit or require("bit") 32local band, bxor, shl, shr = bit.band, bit.bxor, bit.lshift, bit.rshift 33 34-- Inherited tables and callbacks. 35local g_opt, g_arch 36local wline, werror, wfatal, wwarn 37 38-- Action name list. 39-- CHECK: Keep this in sync with the C code! 40local action_names = { 41 -- int arg, 1 buffer pos: 42 "DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB", 43 -- action arg (1 byte), int arg, 1 buffer pos (reg/num): 44 "VREG", "SPACE", 45 -- ptrdiff_t arg, 1 buffer pos (address): !x64 46 "SETLABEL", "REL_A", 47 -- action arg (1 byte) or int arg, 2 buffer pos (link, offset): 48 "REL_LG", "REL_PC", 49 -- action arg (1 byte) or ptrdiff_t arg, 1 buffer pos (link): 50 "IMM_LG", "IMM_LG64", "IMM_PC", "IMM_PC64", 51 -- action arg (1 byte) or int arg, 1 buffer pos (offset): 52 "LABEL_LG", "LABEL_PC", 53 -- action arg (1 byte), 1 buffer pos (offset): 54 "ALIGN", 55 -- action args (2 bytes), no buffer pos. 56 "EXTERN", 57 -- action arg (1 byte), no buffer pos. 58 "ESC", 59 -- no action arg, no buffer pos. 60 "MARK", 61 -- action arg (1 byte), no buffer pos, terminal action: 62 "SECTION", 63 -- no args, no buffer pos, terminal action: 64 "STOP" 65} 66 67-- Maximum number of section buffer positions for dasm_put(). 68-- CHECK: Keep this in sync with the C code! 69local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. 70 71-- Action name -> action number (dynamically generated below). 72local map_action = {} 73-- First action number. Everything below does not need to be escaped. 74local actfirst = 256-#action_names 75 76-- Action list buffer and string (only used to remove dupes). 77local actlist = {} 78local actstr = "" 79 80-- Argument list for next dasm_put(). Start with offset 0 into action list. 81local actargs = { 0 } 82 83-- Current number of section buffer positions for dasm_put(). 84local secpos = 1 85 86-- VREG kind encodings, pre-shifted by 5 bits. 87local map_vreg = { 88 ["modrm.rm.m"] = 0x00, 89 ["modrm.rm.r"] = 0x20, 90 ["opcode"] = 0x20, 91 ["sib.base"] = 0x20, 92 ["sib.index"] = 0x40, 93 ["modrm.reg"] = 0x80, 94 ["vex.v"] = 0xa0, 95 ["imm.hi"] = 0xc0, 96} 97 98-- Current number of VREG actions contributing to REX/VEX shrinkage. 99local vreg_shrink_count = 0 100 101------------------------------------------------------------------------------ 102 103-- Compute action numbers for action names. 104for n,name in ipairs(action_names) do 105 local num = actfirst + n - 1 106 map_action[name] = num 107end 108 109-- Dump action names and numbers. 110local function dumpactions(out) 111 out:write("DynASM encoding engine action codes:\n") 112 for n,name in ipairs(action_names) do 113 local num = map_action[name] 114 out:write(format(" %-10s %02X %d\n", name, num, num)) 115 end 116 out:write("\n") 117end 118 119-- Write action list buffer as a huge static C array. 120local function writeactions(out, name) 121 local nn = #actlist 122 local last = actlist[nn] or 255 123 actlist[nn] = nil -- Remove last byte. 124 if nn == 0 then nn = 1 end 125 out:write("static const unsigned char ", name, "[", nn, "] = {\n") 126 local s = " " 127 for n,b in ipairs(actlist) do 128 s = s..b.."," 129 if #s >= 75 then 130 assert(out:write(s, "\n")) 131 s = " " 132 end 133 end 134 out:write(s, last, "\n};\n\n") -- Add last byte back. 135end 136 137------------------------------------------------------------------------------ 138 139-- Add byte to action list. 140local function wputxb(n) 141 assert(n >= 0 and n <= 255 and n % 1 == 0, "byte out of range") 142 actlist[#actlist+1] = n 143end 144 145-- Add action to list with optional arg. Advance buffer pos, too. 146local function waction(action, a, num) 147 wputxb(assert(map_action[action], "bad action name `"..action.."'")) 148 if a then actargs[#actargs+1] = a end 149 if a or num then secpos = secpos + (num or 1) end 150end 151 152-- Optionally add a VREG action. 153local function wvreg(kind, vreg, psz, sk, defer) 154 if not vreg then return end 155 waction("VREG", vreg) 156 local b = assert(map_vreg[kind], "bad vreg kind `"..vreg.."'") 157 if b < (sk or 0) then 158 vreg_shrink_count = vreg_shrink_count + 1 159 end 160 if not defer then 161 b = b + vreg_shrink_count * 8 162 vreg_shrink_count = 0 163 end 164 wputxb(b + (psz or 0)) 165end 166 167-- Add call to embedded DynASM C code. 168local function wcall(func, args) 169 wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true) 170end 171 172-- Delete duplicate action list chunks. A tad slow, but so what. 173local function dedupechunk(offset) 174 local al, as = actlist, actstr 175 local chunk = char(unpack(al, offset+1, #al)) 176 local orig = find(as, chunk, 1, true) 177 if orig then 178 actargs[1] = orig-1 -- Replace with original offset. 179 for i=offset+1,#al do al[i] = nil end -- Kill dupe. 180 else 181 actstr = as..chunk 182 end 183end 184 185-- Flush action list (intervening C code or buffer pos overflow). 186local function wflush(term) 187 local offset = actargs[1] 188 if #actlist == offset then return end -- Nothing to flush. 189 if not term then waction("STOP") end -- Terminate action list. 190 dedupechunk(offset) 191 wcall("put", actargs) -- Add call to dasm_put(). 192 actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). 193 secpos = 1 -- The actionlist offset occupies a buffer position, too. 194end 195 196-- Put escaped byte. 197local function wputb(n) 198 if n >= actfirst then waction("ESC") end -- Need to escape byte. 199 wputxb(n) 200end 201 202------------------------------------------------------------------------------ 203 204-- Global label name -> global label number. With auto assignment on 1st use. 205local next_global = 10 206local map_global = setmetatable({}, { __index = function(t, name) 207 if not match(name, "^[%a_][%w_@]*$") then werror("bad global label") end 208 local n = next_global 209 if n > 246 then werror("too many global labels") end 210 next_global = n + 1 211 t[name] = n 212 return n 213end}) 214 215-- Dump global labels. 216local function dumpglobals(out, lvl) 217 local t = {} 218 for name, n in pairs(map_global) do t[n] = name end 219 out:write("Global labels:\n") 220 for i=10,next_global-1 do 221 out:write(format(" %s\n", t[i])) 222 end 223 out:write("\n") 224end 225 226-- Write global label enum. 227local function writeglobals(out, prefix) 228 local t = {} 229 for name, n in pairs(map_global) do t[n] = name end 230 out:write("enum {\n") 231 for i=10,next_global-1 do 232 out:write(" ", prefix, gsub(t[i], "@.*", ""), ",\n") 233 end 234 out:write(" ", prefix, "_MAX\n};\n") 235end 236 237-- Write global label names. 238local function writeglobalnames(out, name) 239 local t = {} 240 for name, n in pairs(map_global) do t[n] = name end 241 out:write("static const char *const ", name, "[] = {\n") 242 for i=10,next_global-1 do 243 out:write(" \"", t[i], "\",\n") 244 end 245 out:write(" (const char *)0\n};\n") 246end 247 248------------------------------------------------------------------------------ 249 250-- Extern label name -> extern label number. With auto assignment on 1st use. 251local next_extern = -1 252local map_extern = setmetatable({}, { __index = function(t, name) 253 -- No restrictions on the name for now. 254 local n = next_extern 255 if n < -256 then werror("too many extern labels") end 256 next_extern = n - 1 257 t[name] = n 258 return n 259end}) 260 261-- Dump extern labels. 262local function dumpexterns(out, lvl) 263 local t = {} 264 for name, n in pairs(map_extern) do t[-n] = name end 265 out:write("Extern labels:\n") 266 for i=1,-next_extern-1 do 267 out:write(format(" %s\n", t[i])) 268 end 269 out:write("\n") 270end 271 272-- Write extern label names. 273local function writeexternnames(out, name) 274 local t = {} 275 for name, n in pairs(map_extern) do t[-n] = name end 276 out:write("static const char *const ", name, "[] = {\n") 277 for i=1,-next_extern-1 do 278 out:write(" \"", t[i], "\",\n") 279 end 280 out:write(" (const char *)0\n};\n") 281end 282 283------------------------------------------------------------------------------ 284 285-- Arch-specific maps. 286local map_archdef = {} -- Ext. register name -> int. name. 287local map_reg_rev = {} -- Int. register name -> ext. name. 288local map_reg_num = {} -- Int. register name -> register number. 289local map_reg_opsize = {} -- Int. register name -> operand size. 290local map_reg_valid_base = {} -- Int. register name -> valid base register? 291local map_reg_valid_index = {} -- Int. register name -> valid index register? 292local map_reg_needrex = {} -- Int. register name -> need rex vs. no rex. 293local reg_list = {} -- Canonical list of int. register names. 294 295local map_type = {} -- Type name -> { ctype, reg } 296local ctypenum = 0 -- Type number (for _PTx macros). 297 298local addrsize = x64 and "q" or "d" -- Size for address operands. 299 300-- Helper functions to fill register maps. 301local function mkrmap(sz, cl, names) 302 local cname = format("@%s", sz) 303 reg_list[#reg_list+1] = cname 304 map_archdef[cl] = cname 305 map_reg_rev[cname] = cl 306 map_reg_num[cname] = -1 307 map_reg_opsize[cname] = sz 308 if sz == addrsize or sz == "d" then 309 map_reg_valid_base[cname] = true 310 map_reg_valid_index[cname] = true 311 end 312 if names then 313 for n,name in ipairs(names) do 314 local iname = format("@%s%x", sz, n-1) 315 reg_list[#reg_list+1] = iname 316 map_archdef[name] = iname 317 map_reg_rev[iname] = name 318 map_reg_num[iname] = n-1 319 map_reg_opsize[iname] = sz 320 if sz == "b" and n > 4 then map_reg_needrex[iname] = false end 321 if sz == addrsize or sz == "d" then 322 map_reg_valid_base[iname] = true 323 map_reg_valid_index[iname] = true 324 end 325 end 326 end 327 for i=0,(x64 and sz ~= "f") and 15 or 7 do 328 local needrex = sz == "b" and i > 3 329 local iname = format("@%s%x%s", sz, i, needrex and "R" or "") 330 if needrex then map_reg_needrex[iname] = true end 331 local name 332 if sz == "o" or sz == "y" then name = format("%s%d", cl, i) 333 elseif sz == "f" then name = format("st%d", i) 334 else name = format("r%d%s", i, sz == addrsize and "" or sz) end 335 map_archdef[name] = iname 336 if not map_reg_rev[iname] then 337 reg_list[#reg_list+1] = iname 338 map_reg_rev[iname] = name 339 map_reg_num[iname] = i 340 map_reg_opsize[iname] = sz 341 if sz == addrsize or sz == "d" then 342 map_reg_valid_base[iname] = true 343 map_reg_valid_index[iname] = true 344 end 345 end 346 end 347 reg_list[#reg_list+1] = "" 348end 349 350-- Integer registers (qword, dword, word and byte sized). 351if x64 then 352 mkrmap("q", "Rq", {"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi"}) 353end 354mkrmap("d", "Rd", {"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi"}) 355mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"}) 356mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"}) 357map_reg_valid_index[map_archdef.esp] = false 358if x64 then map_reg_valid_index[map_archdef.rsp] = false end 359if x64 then map_reg_needrex[map_archdef.Rb] = true end 360map_archdef["Ra"] = "@"..addrsize 361 362-- FP registers (internally tword sized, but use "f" as operand size). 363mkrmap("f", "Rf") 364 365-- SSE registers (oword sized, but qword and dword accessible). 366mkrmap("o", "xmm") 367 368-- AVX registers (yword sized, but oword, qword and dword accessible). 369mkrmap("y", "ymm") 370 371-- Operand size prefixes to codes. 372local map_opsize = { 373 byte = "b", word = "w", dword = "d", qword = "q", oword = "o", yword = "y", 374 tword = "t", aword = addrsize, 375} 376 377-- Operand size code to number. 378local map_opsizenum = { 379 b = 1, w = 2, d = 4, q = 8, o = 16, y = 32, t = 10, 380} 381 382-- Operand size code to name. 383local map_opsizename = { 384 b = "byte", w = "word", d = "dword", q = "qword", o = "oword", y = "yword", 385 t = "tword", f = "fpword", 386} 387 388-- Valid index register scale factors. 389local map_xsc = { 390 ["1"] = 0, ["2"] = 1, ["4"] = 2, ["8"] = 3, 391} 392 393-- Condition codes. 394local map_cc = { 395 o = 0, no = 1, b = 2, nb = 3, e = 4, ne = 5, be = 6, nbe = 7, 396 s = 8, ns = 9, p = 10, np = 11, l = 12, nl = 13, le = 14, nle = 15, 397 c = 2, nae = 2, nc = 3, ae = 3, z = 4, nz = 5, na = 6, a = 7, 398 pe = 10, po = 11, nge = 12, ge = 13, ng = 14, g = 15, 399} 400 401 402-- Reverse defines for registers. 403function _M.revdef(s) 404 return gsub(s, "@%w+", map_reg_rev) 405end 406 407-- Dump register names and numbers 408local function dumpregs(out) 409 out:write("Register names, sizes and internal numbers:\n") 410 for _,reg in ipairs(reg_list) do 411 if reg == "" then 412 out:write("\n") 413 else 414 local name = map_reg_rev[reg] 415 local num = map_reg_num[reg] 416 local opsize = map_opsizename[map_reg_opsize[reg]] 417 out:write(format(" %-5s %-8s %s\n", name, opsize, 418 num < 0 and "(variable)" or num)) 419 end 420 end 421end 422 423------------------------------------------------------------------------------ 424 425-- Put action for label arg (IMM_LG, IMM_PC, REL_LG, REL_PC). 426local function wputlabel(aprefix, imm, num) 427 if type(imm) == "number" then 428 if imm < 0 then 429 waction("EXTERN") 430 wputxb(aprefix == "IMM_" and 0 or 1) 431 imm = -imm-1 432 else 433 waction(aprefix.."LG", nil, num); 434 end 435 wputxb(imm) 436 else 437 waction(aprefix.."PC", imm, num) 438 end 439end 440 441-- Put action for label arg (IMM_LG64, IMM_PC64, REL_LG, REL_PC). 442local function wputlabel64(aprefix, imm, num) 443 if type(imm) == "number" then 444 waction("IMM_LG64", nil, num); 445 wputxb(imm) 446 else 447 waction("IMM_PC64", imm, num) 448 end 449end 450 451-- Put signed byte or arg. 452local function wputsbarg(n) 453 if type(n) == "number" then 454 if n < -128 or n > 127 then 455 werror("signed immediate byte out of range") 456 end 457 if n < 0 then n = n + 256 end 458 wputb(n) 459 else waction("IMM_S", n) end 460end 461 462-- Put unsigned byte or arg. 463local function wputbarg(n) 464 if type(n) == "number" then 465 if n < 0 or n > 255 then 466 werror("unsigned immediate byte out of range") 467 end 468 wputb(n) 469 else waction("IMM_B", n) end 470end 471 472-- Put unsigned word or arg. 473local function wputwarg(n) 474 if type(n) == "number" then 475 if shr(n, 16) ~= 0 then 476 werror("unsigned immediate word out of range") 477 end 478 wputb(band(n, 255)); wputb(shr(n, 8)); 479 else waction("IMM_W", n) end 480end 481 482-- Put signed or unsigned qword or arg. 483local function wputqarg(n) 484 local tn = type(n) 485 if tn == "number" then 486 wputb(band(n, 255)) 487 wputb(band(shr(n, 8), 255)) 488 wputb(band(shr(n, 16), 255)) 489 wputb(band(shr(n, 24), 255)) 490 wputb(band(shr(n, 32), 255)) 491 wputb(band(shr(n, 40), 255)) 492 wputb(band(shr(n, 48), 255)) 493 wputb(shr(n, 56)) 494 elseif tn == "table" then 495 wputlabel64("IMM_", n[1], 1) 496 else 497 waction("IMM_D", format("(unsigned int)(%s)", n)) 498 waction("IMM_D", format("(unsigned int)((%s)>>32)", n)) 499 end 500end 501 502-- Put signed or unsigned dword or arg. 503local function wputdarg(n) 504 local tn = type(n) 505 if tn == "number" then 506 wputb(band(n, 255)) 507 wputb(band(shr(n, 8), 255)) 508 wputb(band(shr(n, 16), 255)) 509 wputb(shr(n, 24)) 510 elseif tn == "table" then 511 wputlabel("IMM_", n[1], 1) 512 else 513 waction("IMM_D", n) 514 end 515end 516 517-- Put operand-size dependent number or arg (defaults to dword). 518local function wputszarg(sz, n) 519 if not sz or sz == "d" or sz == "q" then wputdarg(n) 520 elseif sz == "w" then wputwarg(n) 521 elseif sz == "b" then wputbarg(n) 522 elseif sz == "s" then wputsbarg(n) 523 else werror("bad operand size") end 524end 525 526-- Put multi-byte opcode with operand-size dependent modifications. 527local function wputop(sz, op, rex, vex, vregr, vregxb) 528 local psz, sk = 0, nil 529 if vex then 530 local tail 531 if vex.m == 1 and band(rex, 11) == 0 then 532 if x64 and vregxb then 533 sk = map_vreg["modrm.reg"] 534 else 535 wputb(0xc5) 536 tail = shl(bxor(band(rex, 4), 4), 5) 537 psz = 3 538 end 539 end 540 if not tail then 541 wputb(0xc4) 542 wputb(shl(bxor(band(rex, 7), 7), 5) + vex.m) 543 tail = shl(band(rex, 8), 4) 544 psz = 4 545 end 546 local reg, vreg = 0, nil 547 if vex.v then 548 reg = vex.v.reg 549 if not reg then werror("bad vex operand") end 550 if reg < 0 then reg = 0; vreg = vex.v.vreg end 551 end 552 if sz == "y" or vex.l then tail = tail + 4 end 553 wputb(tail + shl(bxor(reg, 15), 3) + vex.p) 554 wvreg("vex.v", vreg) 555 rex = 0 556 if op >= 256 then werror("bad vex opcode") end 557 else 558 if rex ~= 0 then 559 if not x64 then werror("bad operand size") end 560 elseif (vregr or vregxb) and x64 then 561 rex = 0x10 562 sk = map_vreg["vex.v"] 563 end 564 end 565 local r 566 if sz == "w" then wputb(102) end 567 -- Needs >32 bit numbers, but only for crc32 eax, word [ebx] 568 if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end 569 if op >= 16777216 then wputb(shr(op, 24)); op = band(op, 0xffffff) end 570 if op >= 65536 then 571 if rex ~= 0 then 572 local opc3 = band(op, 0xffff00) 573 if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then 574 wputb(64 + band(rex, 15)); rex = 0; psz = 2 575 end 576 end 577 wputb(shr(op, 16)); op = band(op, 0xffff); psz = psz + 1 578 end 579 if op >= 256 then 580 local b = shr(op, 8) 581 if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0; psz = 2 end 582 wputb(b); op = band(op, 255); psz = psz + 1 583 end 584 if rex ~= 0 then wputb(64 + band(rex, 15)); psz = 2 end 585 if sz == "b" then op = op - 1 end 586 wputb(op) 587 return psz, sk 588end 589 590-- Put ModRM or SIB formatted byte. 591local function wputmodrm(m, s, rm, vs, vrm) 592 assert(m < 4 and s < 16 and rm < 16, "bad modrm operands") 593 wputb(shl(m, 6) + shl(band(s, 7), 3) + band(rm, 7)) 594end 595 596-- Put ModRM/SIB plus optional displacement. 597local function wputmrmsib(t, imark, s, vsreg, psz, sk) 598 local vreg, vxreg 599 local reg, xreg = t.reg, t.xreg 600 if reg and reg < 0 then reg = 0; vreg = t.vreg end 601 if xreg and xreg < 0 then xreg = 0; vxreg = t.vxreg end 602 if s < 0 then s = 0 end 603 604 -- Register mode. 605 if sub(t.mode, 1, 1) == "r" then 606 wputmodrm(3, s, reg) 607 wvreg("modrm.reg", vsreg, psz+1, sk, vreg) 608 wvreg("modrm.rm.r", vreg, psz+1, sk) 609 return 610 end 611 612 local disp = t.disp 613 local tdisp = type(disp) 614 -- No base register? 615 if not reg then 616 local riprel = false 617 if xreg then 618 -- Indexed mode with index register only. 619 -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp) 620 wputmodrm(0, s, 4) 621 if imark == "I" then waction("MARK") end 622 wvreg("modrm.reg", vsreg, psz+1, sk, vxreg) 623 wputmodrm(t.xsc, xreg, 5) 624 wvreg("sib.index", vxreg, psz+2, sk) 625 else 626 -- Pure 32 bit displacement. 627 if x64 and tdisp ~= "table" then 628 wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp) 629 wvreg("modrm.reg", vsreg, psz+1, sk) 630 if imark == "I" then waction("MARK") end 631 wputmodrm(0, 4, 5) 632 else 633 riprel = x64 634 wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp) 635 wvreg("modrm.reg", vsreg, psz+1, sk) 636 if imark == "I" then waction("MARK") end 637 end 638 end 639 if riprel then -- Emit rip-relative displacement. 640 if match("UWSiI", imark) then 641 werror("NYI: rip-relative displacement followed by immediate") 642 end 643 -- The previous byte in the action buffer cannot be 0xe9 or 0x80-0x8f. 644 wputlabel("REL_", disp[1], 2) 645 else 646 wputdarg(disp) 647 end 648 return 649 end 650 651 local m 652 if tdisp == "number" then -- Check displacement size at assembly time. 653 if disp == 0 and band(reg, 7) ~= 5 then -- [ebp] -> [ebp+0] (in SIB, too) 654 if not vreg then m = 0 end -- Force DISP to allow [Rd(5)] -> [ebp+0] 655 elseif disp >= -128 and disp <= 127 then m = 1 656 else m = 2 end 657 elseif tdisp == "table" then 658 m = 2 659 end 660 661 -- Index register present or esp as base register: need SIB encoding. 662 if xreg or band(reg, 7) == 4 then 663 wputmodrm(m or 2, s, 4) -- ModRM. 664 if m == nil or imark == "I" then waction("MARK") end 665 wvreg("modrm.reg", vsreg, psz+1, sk, vxreg or vreg) 666 wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB. 667 wvreg("sib.index", vxreg, psz+2, sk, vreg) 668 wvreg("sib.base", vreg, psz+2, sk) 669 else 670 wputmodrm(m or 2, s, reg) -- ModRM. 671 if (imark == "I" and (m == 1 or m == 2)) or 672 (m == nil and (vsreg or vreg)) then waction("MARK") end 673 wvreg("modrm.reg", vsreg, psz+1, sk, vreg) 674 wvreg("modrm.rm.m", vreg, psz+1, sk) 675 end 676 677 -- Put displacement. 678 if m == 1 then wputsbarg(disp) 679 elseif m == 2 then wputdarg(disp) 680 elseif m == nil then waction("DISP", disp) end 681end 682 683------------------------------------------------------------------------------ 684 685-- Return human-readable operand mode string. 686local function opmodestr(op, args) 687 local m = {} 688 for i=1,#args do 689 local a = args[i] 690 m[#m+1] = sub(a.mode, 1, 1)..(a.opsize or "?") 691 end 692 return op.." "..concat(m, ",") 693end 694 695-- Convert number to valid integer or nil. 696local function toint(expr) 697 local n = tonumber(expr) 698 if n then 699 if n % 1 ~= 0 or n < -2147483648 or n > 4294967295 then 700 werror("bad integer number `"..expr.."'") 701 end 702 return n 703 end 704end 705 706-- Parse immediate expression. 707local function immexpr(expr) 708 -- &expr (pointer) 709 if sub(expr, 1, 1) == "&" then 710 return "iPJ", format("(ptrdiff_t)(%s)", sub(expr,2)) 711 end 712 713 local prefix = sub(expr, 1, 2) 714 -- =>expr (pc label reference) 715 if prefix == "=>" then 716 return "iJ", sub(expr, 3) 717 end 718 -- ->name (global label reference) 719 if prefix == "->" then 720 return "iJ", map_global[sub(expr, 3)] 721 end 722 723 -- [<>][1-9] (local label reference) 724 local dir, lnum = match(expr, "^([<>])([1-9])$") 725 if dir then -- Fwd: 247-255, Bkwd: 1-9. 726 return "iJ", lnum + (dir == ">" and 246 or 0) 727 end 728 729 local extname = match(expr, "^extern%s+(%S+)$") 730 if extname then 731 return "iJ", map_extern[extname] 732 end 733 734 -- expr (interpreted as immediate) 735 return "iI", expr 736end 737 738-- Parse displacement expression: +-num, +-expr, +-opsize*num 739local function dispexpr(expr) 740 local disp = expr == "" and 0 or toint(expr) 741 if disp then return disp end 742 local c, dispt = match(expr, "^([+-])%s*(.+)$") 743 if c == "+" then 744 expr = dispt 745 elseif not c then 746 werror("bad displacement expression `"..expr.."'") 747 end 748 local opsize, tailops = match(dispt, "^(%w+)%s*%*%s*(.+)$") 749 local ops, imm = map_opsize[opsize], toint(tailops) 750 if ops and imm then 751 if c == "-" then imm = -imm end 752 return imm*map_opsizenum[ops] 753 end 754 local mode, iexpr = immexpr(dispt) 755 if mode == "iJ" then 756 if c == "-" then werror("cannot invert label reference") end 757 return { iexpr } 758 end 759 return expr -- Need to return original signed expression. 760end 761 762-- Parse register or type expression. 763local function rtexpr(expr) 764 if not expr then return end 765 local tname, ovreg = match(expr, "^([%w_]+):(@[%w_]+)$") 766 local tp = map_type[tname or expr] 767 if tp then 768 local reg = ovreg or tp.reg 769 local rnum = map_reg_num[reg] 770 if not rnum then 771 werror("type `"..(tname or expr).."' needs a register override") 772 end 773 if not map_reg_valid_base[reg] then 774 werror("bad base register override `"..(map_reg_rev[reg] or reg).."'") 775 end 776 return reg, rnum, tp 777 end 778 return expr, map_reg_num[expr] 779end 780 781-- Parse operand and return { mode, opsize, reg, xreg, xsc, disp, imm }. 782local function parseoperand(param) 783 local t = {} 784 785 local expr = param 786 local opsize, tailops = match(param, "^(%w+)%s*(.+)$") 787 if opsize then 788 t.opsize = map_opsize[opsize] 789 if t.opsize then expr = tailops end 790 end 791 792 local br = match(expr, "^%[%s*(.-)%s*%]$") 793 repeat 794 if br then 795 t.mode = "xm" 796 797 -- [disp] 798 t.disp = toint(br) 799 if t.disp then 800 t.mode = x64 and "xm" or "xmO" 801 break 802 end 803 804 -- [reg...] 805 local tp 806 local reg, tailr = match(br, "^([@%w_:]+)%s*(.*)$") 807 reg, t.reg, tp = rtexpr(reg) 808 if not t.reg then 809 -- [expr] 810 t.mode = x64 and "xm" or "xmO" 811 t.disp = dispexpr("+"..br) 812 break 813 end 814 815 if t.reg == -1 then 816 t.vreg, tailr = match(tailr, "^(%b())(.*)$") 817 if not t.vreg then werror("bad variable register expression") end 818 end 819 820 -- [xreg*xsc] or [xreg*xsc+-disp] or [xreg*xsc+-expr] 821 local xsc, tailsc = match(tailr, "^%*%s*([1248])%s*(.*)$") 822 if xsc then 823 if not map_reg_valid_index[reg] then 824 werror("bad index register `"..map_reg_rev[reg].."'") 825 end 826 t.xsc = map_xsc[xsc] 827 t.xreg = t.reg 828 t.vxreg = t.vreg 829 t.reg = nil 830 t.vreg = nil 831 t.disp = dispexpr(tailsc) 832 break 833 end 834 if not map_reg_valid_base[reg] then 835 werror("bad base register `"..map_reg_rev[reg].."'") 836 end 837 838 -- [reg] or [reg+-disp] 839 t.disp = toint(tailr) or (tailr == "" and 0) 840 if t.disp then break end 841 842 -- [reg+xreg...] 843 local xreg, tailx = match(tailr, "^+%s*([@%w_:]+)%s*(.*)$") 844 xreg, t.xreg, tp = rtexpr(xreg) 845 if not t.xreg then 846 -- [reg+-expr] 847 t.disp = dispexpr(tailr) 848 break 849 end 850 if not map_reg_valid_index[xreg] then 851 werror("bad index register `"..map_reg_rev[xreg].."'") 852 end 853 854 if t.xreg == -1 then 855 t.vxreg, tailx = match(tailx, "^(%b())(.*)$") 856 if not t.vxreg then werror("bad variable register expression") end 857 end 858 859 -- [reg+xreg*xsc...] 860 local xsc, tailsc = match(tailx, "^%*%s*([1248])%s*(.*)$") 861 if xsc then 862 t.xsc = map_xsc[xsc] 863 tailx = tailsc 864 end 865 866 -- [...] or [...+-disp] or [...+-expr] 867 t.disp = dispexpr(tailx) 868 else 869 -- imm or opsize*imm 870 local imm = toint(expr) 871 if not imm and sub(expr, 1, 1) == "*" and t.opsize then 872 imm = toint(sub(expr, 2)) 873 if imm then 874 imm = imm * map_opsizenum[t.opsize] 875 t.opsize = nil 876 end 877 end 878 if imm then 879 if t.opsize then werror("bad operand size override") end 880 local m = "i" 881 if imm == 1 then m = m.."1" end 882 if imm >= 4294967168 and imm <= 4294967295 then imm = imm-4294967296 end 883 if imm >= -128 and imm <= 127 then m = m.."S" end 884 t.imm = imm 885 t.mode = m 886 break 887 end 888 889 local tp 890 local reg, tailr = match(expr, "^([@%w_:]+)%s*(.*)$") 891 reg, t.reg, tp = rtexpr(reg) 892 if t.reg then 893 if t.reg == -1 then 894 t.vreg, tailr = match(tailr, "^(%b())(.*)$") 895 if not t.vreg then werror("bad variable register expression") end 896 end 897 -- reg 898 if tailr == "" then 899 if t.opsize then werror("bad operand size override") end 900 t.opsize = map_reg_opsize[reg] 901 if t.opsize == "f" then 902 t.mode = t.reg == 0 and "fF" or "f" 903 else 904 if reg == "@w4" or (x64 and reg == "@d4") then 905 wwarn("bad idea, try again with `"..(x64 and "rsp'" or "esp'")) 906 end 907 t.mode = t.reg == 0 and "rmR" or (reg == "@b1" and "rmC" or "rm") 908 end 909 t.needrex = map_reg_needrex[reg] 910 break 911 end 912 913 -- type[idx], type[idx].field, type->field -> [reg+offset_expr] 914 if not tp then werror("bad operand `"..param.."'") end 915 t.mode = "xm" 916 t.disp = format(tp.ctypefmt, tailr) 917 else 918 t.mode, t.imm = immexpr(expr) 919 if sub(t.mode, -1) == "J" then 920 if t.opsize and t.opsize ~= addrsize then 921 werror("bad operand size override") 922 end 923 t.opsize = addrsize 924 end 925 end 926 end 927 until true 928 return t 929end 930 931------------------------------------------------------------------------------ 932-- x86 Template String Description 933-- =============================== 934-- 935-- Each template string is a list of [match:]pattern pairs, 936-- separated by "|". The first match wins. No match means a 937-- bad or unsupported combination of operand modes or sizes. 938-- 939-- The match part and the ":" is omitted if the operation has 940-- no operands. Otherwise the first N characters are matched 941-- against the mode strings of each of the N operands. 942-- 943-- The mode string for each operand type is (see parseoperand()): 944-- Integer register: "rm", +"R" for eax, ax, al, +"C" for cl 945-- FP register: "f", +"F" for st0 946-- Index operand: "xm", +"O" for [disp] (pure offset) 947-- Immediate: "i", +"S" for signed 8 bit, +"1" for 1, 948-- +"I" for arg, +"P" for pointer 949-- Any: +"J" for valid jump targets 950-- 951-- So a match character "m" (mixed) matches both an integer register 952-- and an index operand (to be encoded with the ModRM/SIB scheme). 953-- But "r" matches only a register and "x" only an index operand 954-- (e.g. for FP memory access operations). 955-- 956-- The operand size match string starts right after the mode match 957-- characters and ends before the ":". "dwb" or "qdwb" is assumed, if empty. 958-- The effective data size of the operation is matched against this list. 959-- 960-- If only the regular "b", "w", "d", "q", "t" operand sizes are 961-- present, then all operands must be the same size. Unspecified sizes 962-- are ignored, but at least one operand must have a size or the pattern 963-- won't match (use the "byte", "word", "dword", "qword", "tword" 964-- operand size overrides. E.g.: mov dword [eax], 1). 965-- 966-- If the list has a "1" or "2" prefix, the operand size is taken 967-- from the respective operand and any other operand sizes are ignored. 968-- If the list contains only ".", all operand sizes are ignored. 969-- If the list has a "/" prefix, the concatenated (mixed) operand sizes 970-- are compared to the match. 971-- 972-- E.g. "rrdw" matches for either two dword registers or two word 973-- registers. "Fx2dq" matches an st0 operand plus an index operand 974-- pointing to a dword (float) or qword (double). 975-- 976-- Every character after the ":" is part of the pattern string: 977-- Hex chars are accumulated to form the opcode (left to right). 978-- "n" disables the standard opcode mods 979-- (otherwise: -1 for "b", o16 prefix for "w", rex.w for "q") 980-- "X" Force REX.W. 981-- "r"/"R" adds the reg. number from the 1st/2nd operand to the opcode. 982-- "m"/"M" generates ModRM/SIB from the 1st/2nd operand. 983-- The spare 3 bits are either filled with the last hex digit or 984-- the result from a previous "r"/"R". The opcode is restored. 985-- "u" Use VEX encoding, vvvv unused. 986-- "v"/"V" Use VEX encoding, vvvv from 1st/2nd operand (the operand is 987-- removed from the list used by future characters). 988-- "L" Force VEX.L 989-- 990-- All of the following characters force a flush of the opcode: 991-- "o"/"O" stores a pure 32 bit disp (offset) from the 1st/2nd operand. 992-- "s" stores a 4 bit immediate from the last register operand, 993-- followed by 4 zero bits. 994-- "S" stores a signed 8 bit immediate from the last operand. 995-- "U" stores an unsigned 8 bit immediate from the last operand. 996-- "W" stores an unsigned 16 bit immediate from the last operand. 997-- "i" stores an operand sized immediate from the last operand. 998-- "I" dito, but generates an action code to optionally modify 999-- the opcode (+2) for a signed 8 bit immediate. 1000-- "J" generates one of the REL action codes from the last operand. 1001-- 1002------------------------------------------------------------------------------ 1003 1004-- Template strings for x86 instructions. Ordered by first opcode byte. 1005-- Unimplemented opcodes (deliberate omissions) are marked with *. 1006local map_op = { 1007 -- 00-05: add... 1008 -- 06: *push es 1009 -- 07: *pop es 1010 -- 08-0D: or... 1011 -- 0E: *push cs 1012 -- 0F: two byte opcode prefix 1013 -- 10-15: adc... 1014 -- 16: *push ss 1015 -- 17: *pop ss 1016 -- 18-1D: sbb... 1017 -- 1E: *push ds 1018 -- 1F: *pop ds 1019 -- 20-25: and... 1020 es_0 = "26", 1021 -- 27: *daa 1022 -- 28-2D: sub... 1023 cs_0 = "2E", 1024 -- 2F: *das 1025 -- 30-35: xor... 1026 ss_0 = "36", 1027 -- 37: *aaa 1028 -- 38-3D: cmp... 1029 ds_0 = "3E", 1030 -- 3F: *aas 1031 inc_1 = x64 and "m:FF0m" or "rdw:40r|m:FF0m", 1032 dec_1 = x64 and "m:FF1m" or "rdw:48r|m:FF1m", 1033 push_1 = (x64 and "rq:n50r|rw:50r|mq:nFF6m|mw:FF6m" or 1034 "rdw:50r|mdw:FF6m").."|S.:6AS|ib:n6Ai|i.:68i", 1035 pop_1 = x64 and "rq:n58r|rw:58r|mq:n8F0m|mw:8F0m" or "rdw:58r|mdw:8F0m", 1036 -- 60: *pusha, *pushad, *pushaw 1037 -- 61: *popa, *popad, *popaw 1038 -- 62: *bound rdw,x 1039 -- 63: x86: *arpl mw,rw 1040 movsxd_2 = x64 and "rm/qd:63rM", 1041 fs_0 = "64", 1042 gs_0 = "65", 1043 o16_0 = "66", 1044 a16_0 = not x64 and "67" or nil, 1045 a32_0 = x64 and "67", 1046 -- 68: push idw 1047 -- 69: imul rdw,mdw,idw 1048 -- 6A: push ib 1049 -- 6B: imul rdw,mdw,S 1050 -- 6C: *insb 1051 -- 6D: *insd, *insw 1052 -- 6E: *outsb 1053 -- 6F: *outsd, *outsw 1054 -- 70-7F: jcc lb 1055 -- 80: add... mb,i 1056 -- 81: add... mdw,i 1057 -- 82: *undefined 1058 -- 83: add... mdw,S 1059 test_2 = "mr:85Rm|rm:85rM|Ri:A9ri|mi:F70mi", 1060 -- 86: xchg rb,mb 1061 -- 87: xchg rdw,mdw 1062 -- 88: mov mb,r 1063 -- 89: mov mdw,r 1064 -- 8A: mov r,mb 1065 -- 8B: mov r,mdw 1066 -- 8C: *mov mdw,seg 1067 lea_2 = "rx1dq:8DrM", 1068 -- 8E: *mov seg,mdw 1069 -- 8F: pop mdw 1070 nop_0 = "90", 1071 xchg_2 = "Rrqdw:90R|rRqdw:90r|rm:87rM|mr:87Rm", 1072 cbw_0 = "6698", 1073 cwde_0 = "98", 1074 cdqe_0 = "4898", 1075 cwd_0 = "6699", 1076 cdq_0 = "99", 1077 cqo_0 = "4899", 1078 -- 9A: *call iw:idw 1079 wait_0 = "9B", 1080 fwait_0 = "9B", 1081 pushf_0 = "9C", 1082 pushfd_0 = not x64 and "9C", 1083 pushfq_0 = x64 and "9C", 1084 popf_0 = "9D", 1085 popfd_0 = not x64 and "9D", 1086 popfq_0 = x64 and "9D", 1087 sahf_0 = "9E", 1088 lahf_0 = "9F", 1089 mov_2 = "OR:A3o|RO:A1O|mr:89Rm|rm:8BrM|rib:nB0ri|ridw:B8ri|mi:C70mi", 1090 movsb_0 = "A4", 1091 movsw_0 = "66A5", 1092 movsd_0 = "A5", 1093 cmpsb_0 = "A6", 1094 cmpsw_0 = "66A7", 1095 cmpsd_0 = "A7", 1096 -- A8: test Rb,i 1097 -- A9: test Rdw,i 1098 stosb_0 = "AA", 1099 stosw_0 = "66AB", 1100 stosd_0 = "AB", 1101 lodsb_0 = "AC", 1102 lodsw_0 = "66AD", 1103 lodsd_0 = "AD", 1104 scasb_0 = "AE", 1105 scasw_0 = "66AF", 1106 scasd_0 = "AF", 1107 -- B0-B7: mov rb,i 1108 -- B8-BF: mov rdw,i 1109 -- C0: rol... mb,i 1110 -- C1: rol... mdw,i 1111 ret_1 = "i.:nC2W", 1112 ret_0 = "C3", 1113 -- C4: *les rdw,mq 1114 -- C5: *lds rdw,mq 1115 -- C6: mov mb,i 1116 -- C7: mov mdw,i 1117 -- C8: *enter iw,ib 1118 leave_0 = "C9", 1119 -- CA: *retf iw 1120 -- CB: *retf 1121 int3_0 = "CC", 1122 int_1 = "i.:nCDU", 1123 into_0 = "CE", 1124 -- CF: *iret 1125 -- D0: rol... mb,1 1126 -- D1: rol... mdw,1 1127 -- D2: rol... mb,cl 1128 -- D3: rol... mb,cl 1129 -- D4: *aam ib 1130 -- D5: *aad ib 1131 -- D6: *salc 1132 -- D7: *xlat 1133 -- D8-DF: floating point ops 1134 -- E0: *loopne 1135 -- E1: *loope 1136 -- E2: *loop 1137 -- E3: *jcxz, *jecxz 1138 -- E4: *in Rb,ib 1139 -- E5: *in Rdw,ib 1140 -- E6: *out ib,Rb 1141 -- E7: *out ib,Rdw 1142 call_1 = x64 and "mq:nFF2m|J.:E8nJ" or "md:FF2m|J.:E8J", 1143 jmp_1 = x64 and "mq:nFF4m|J.:E9nJ" or "md:FF4m|J.:E9J", -- short: EB 1144 -- EA: *jmp iw:idw 1145 -- EB: jmp ib 1146 -- EC: *in Rb,dx 1147 -- ED: *in Rdw,dx 1148 -- EE: *out dx,Rb 1149 -- EF: *out dx,Rdw 1150 lock_0 = "F0", 1151 int1_0 = "F1", 1152 repne_0 = "F2", 1153 repnz_0 = "F2", 1154 rep_0 = "F3", 1155 repe_0 = "F3", 1156 repz_0 = "F3", 1157 -- F4: *hlt 1158 cmc_0 = "F5", 1159 -- F6: test... mb,i; div... mb 1160 -- F7: test... mdw,i; div... mdw 1161 clc_0 = "F8", 1162 stc_0 = "F9", 1163 -- FA: *cli 1164 cld_0 = "FC", 1165 std_0 = "FD", 1166 -- FE: inc... mb 1167 -- FF: inc... mdw 1168 1169 -- misc ops 1170 not_1 = "m:F72m", 1171 neg_1 = "m:F73m", 1172 mul_1 = "m:F74m", 1173 imul_1 = "m:F75m", 1174 div_1 = "m:F76m", 1175 idiv_1 = "m:F77m", 1176 1177 imul_2 = "rmqdw:0FAFrM|rIqdw:69rmI|rSqdw:6BrmS|riqdw:69rmi", 1178 imul_3 = "rmIqdw:69rMI|rmSqdw:6BrMS|rmiqdw:69rMi", 1179 1180 movzx_2 = "rm/db:0FB6rM|rm/qb:|rm/wb:0FB6rM|rm/dw:0FB7rM|rm/qw:", 1181 movsx_2 = "rm/db:0FBErM|rm/qb:|rm/wb:0FBErM|rm/dw:0FBFrM|rm/qw:", 1182 1183 bswap_1 = "rqd:0FC8r", 1184 bsf_2 = "rmqdw:0FBCrM", 1185 bsr_2 = "rmqdw:0FBDrM", 1186 bt_2 = "mrqdw:0FA3Rm|miqdw:0FBA4mU", 1187 btc_2 = "mrqdw:0FBBRm|miqdw:0FBA7mU", 1188 btr_2 = "mrqdw:0FB3Rm|miqdw:0FBA6mU", 1189 bts_2 = "mrqdw:0FABRm|miqdw:0FBA5mU", 1190 1191 shld_3 = "mriqdw:0FA4RmU|mrC/qq:0FA5Rm|mrC/dd:|mrC/ww:", 1192 shrd_3 = "mriqdw:0FACRmU|mrC/qq:0FADRm|mrC/dd:|mrC/ww:", 1193 1194 rdtsc_0 = "0F31", -- P1+ 1195 rdpmc_0 = "0F33", -- P6+ 1196 cpuid_0 = "0FA2", -- P1+ 1197 1198 -- floating point ops 1199 fst_1 = "ff:DDD0r|xd:D92m|xq:nDD2m", 1200 fstp_1 = "ff:DDD8r|xd:D93m|xq:nDD3m|xt:DB7m", 1201 fld_1 = "ff:D9C0r|xd:D90m|xq:nDD0m|xt:DB5m", 1202 1203 fpop_0 = "DDD8", -- Alias for fstp st0. 1204 1205 fist_1 = "xw:nDF2m|xd:DB2m", 1206 fistp_1 = "xw:nDF3m|xd:DB3m|xq:nDF7m", 1207 fild_1 = "xw:nDF0m|xd:DB0m|xq:nDF5m", 1208 1209 fxch_0 = "D9C9", 1210 fxch_1 = "ff:D9C8r", 1211 fxch_2 = "fFf:D9C8r|Fff:D9C8R", 1212 1213 fucom_1 = "ff:DDE0r", 1214 fucom_2 = "Fff:DDE0R", 1215 fucomp_1 = "ff:DDE8r", 1216 fucomp_2 = "Fff:DDE8R", 1217 fucomi_1 = "ff:DBE8r", -- P6+ 1218 fucomi_2 = "Fff:DBE8R", -- P6+ 1219 fucomip_1 = "ff:DFE8r", -- P6+ 1220 fucomip_2 = "Fff:DFE8R", -- P6+ 1221 fcomi_1 = "ff:DBF0r", -- P6+ 1222 fcomi_2 = "Fff:DBF0R", -- P6+ 1223 fcomip_1 = "ff:DFF0r", -- P6+ 1224 fcomip_2 = "Fff:DFF0R", -- P6+ 1225 fucompp_0 = "DAE9", 1226 fcompp_0 = "DED9", 1227 1228 fldenv_1 = "x.:D94m", 1229 fnstenv_1 = "x.:D96m", 1230 fstenv_1 = "x.:9BD96m", 1231 fldcw_1 = "xw:nD95m", 1232 fstcw_1 = "xw:n9BD97m", 1233 fnstcw_1 = "xw:nD97m", 1234 fstsw_1 = "Rw:n9BDFE0|xw:n9BDD7m", 1235 fnstsw_1 = "Rw:nDFE0|xw:nDD7m", 1236 fclex_0 = "9BDBE2", 1237 fnclex_0 = "DBE2", 1238 1239 fnop_0 = "D9D0", 1240 -- D9D1-D9DF: unassigned 1241 1242 fchs_0 = "D9E0", 1243 fabs_0 = "D9E1", 1244 -- D9E2: unassigned 1245 -- D9E3: unassigned 1246 ftst_0 = "D9E4", 1247 fxam_0 = "D9E5", 1248 -- D9E6: unassigned 1249 -- D9E7: unassigned 1250 fld1_0 = "D9E8", 1251 fldl2t_0 = "D9E9", 1252 fldl2e_0 = "D9EA", 1253 fldpi_0 = "D9EB", 1254 fldlg2_0 = "D9EC", 1255 fldln2_0 = "D9ED", 1256 fldz_0 = "D9EE", 1257 -- D9EF: unassigned 1258 1259 f2xm1_0 = "D9F0", 1260 fyl2x_0 = "D9F1", 1261 fptan_0 = "D9F2", 1262 fpatan_0 = "D9F3", 1263 fxtract_0 = "D9F4", 1264 fprem1_0 = "D9F5", 1265 fdecstp_0 = "D9F6", 1266 fincstp_0 = "D9F7", 1267 fprem_0 = "D9F8", 1268 fyl2xp1_0 = "D9F9", 1269 fsqrt_0 = "D9FA", 1270 fsincos_0 = "D9FB", 1271 frndint_0 = "D9FC", 1272 fscale_0 = "D9FD", 1273 fsin_0 = "D9FE", 1274 fcos_0 = "D9FF", 1275 1276 -- SSE, SSE2 1277 andnpd_2 = "rmo:660F55rM", 1278 andnps_2 = "rmo:0F55rM", 1279 andpd_2 = "rmo:660F54rM", 1280 andps_2 = "rmo:0F54rM", 1281 clflush_1 = "x.:0FAE7m", 1282 cmppd_3 = "rmio:660FC2rMU", 1283 cmpps_3 = "rmio:0FC2rMU", 1284 cmpsd_3 = "rrio:F20FC2rMU|rxi/oq:", 1285 cmpss_3 = "rrio:F30FC2rMU|rxi/od:", 1286 comisd_2 = "rro:660F2FrM|rx/oq:", 1287 comiss_2 = "rro:0F2FrM|rx/od:", 1288 cvtdq2pd_2 = "rro:F30FE6rM|rx/oq:", 1289 cvtdq2ps_2 = "rmo:0F5BrM", 1290 cvtpd2dq_2 = "rmo:F20FE6rM", 1291 cvtpd2ps_2 = "rmo:660F5ArM", 1292 cvtpi2pd_2 = "rx/oq:660F2ArM", 1293 cvtpi2ps_2 = "rx/oq:0F2ArM", 1294 cvtps2dq_2 = "rmo:660F5BrM", 1295 cvtps2pd_2 = "rro:0F5ArM|rx/oq:", 1296 cvtsd2si_2 = "rr/do:F20F2DrM|rr/qo:|rx/dq:|rxq:", 1297 cvtsd2ss_2 = "rro:F20F5ArM|rx/oq:", 1298 cvtsi2sd_2 = "rm/od:F20F2ArM|rm/oq:F20F2ArXM", 1299 cvtsi2ss_2 = "rm/od:F30F2ArM|rm/oq:F30F2ArXM", 1300 cvtss2sd_2 = "rro:F30F5ArM|rx/od:", 1301 cvtss2si_2 = "rr/do:F30F2DrM|rr/qo:|rxd:|rx/qd:", 1302 cvttpd2dq_2 = "rmo:660FE6rM", 1303 cvttps2dq_2 = "rmo:F30F5BrM", 1304 cvttsd2si_2 = "rr/do:F20F2CrM|rr/qo:|rx/dq:|rxq:", 1305 cvttss2si_2 = "rr/do:F30F2CrM|rr/qo:|rxd:|rx/qd:", 1306 fxsave_1 = "x.:0FAE0m", 1307 fxrstor_1 = "x.:0FAE1m", 1308 ldmxcsr_1 = "xd:0FAE2m", 1309 lfence_0 = "0FAEE8", 1310 maskmovdqu_2 = "rro:660FF7rM", 1311 mfence_0 = "0FAEF0", 1312 movapd_2 = "rmo:660F28rM|mro:660F29Rm", 1313 movaps_2 = "rmo:0F28rM|mro:0F29Rm", 1314 movd_2 = "rm/od:660F6ErM|rm/oq:660F6ErXM|mr/do:660F7ERm|mr/qo:", 1315 movdqa_2 = "rmo:660F6FrM|mro:660F7FRm", 1316 movdqu_2 = "rmo:F30F6FrM|mro:F30F7FRm", 1317 movhlps_2 = "rro:0F12rM", 1318 movhpd_2 = "rx/oq:660F16rM|xr/qo:n660F17Rm", 1319 movhps_2 = "rx/oq:0F16rM|xr/qo:n0F17Rm", 1320 movlhps_2 = "rro:0F16rM", 1321 movlpd_2 = "rx/oq:660F12rM|xr/qo:n660F13Rm", 1322 movlps_2 = "rx/oq:0F12rM|xr/qo:n0F13Rm", 1323 movmskpd_2 = "rr/do:660F50rM", 1324 movmskps_2 = "rr/do:0F50rM", 1325 movntdq_2 = "xro:660FE7Rm", 1326 movnti_2 = "xrqd:0FC3Rm", 1327 movntpd_2 = "xro:660F2BRm", 1328 movntps_2 = "xro:0F2BRm", 1329 movq_2 = "rro:F30F7ErM|rx/oq:|xr/qo:n660FD6Rm", 1330 movsd_2 = "rro:F20F10rM|rx/oq:|xr/qo:nF20F11Rm", 1331 movss_2 = "rro:F30F10rM|rx/od:|xr/do:F30F11Rm", 1332 movupd_2 = "rmo:660F10rM|mro:660F11Rm", 1333 movups_2 = "rmo:0F10rM|mro:0F11Rm", 1334 orpd_2 = "rmo:660F56rM", 1335 orps_2 = "rmo:0F56rM", 1336 pause_0 = "F390", 1337 pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nRmU", -- Mem op: SSE4.1 only. 1338 pinsrw_3 = "rri/od:660FC4rMU|rxi/ow:", 1339 pmovmskb_2 = "rr/do:660FD7rM", 1340 prefetchnta_1 = "xb:n0F180m", 1341 prefetcht0_1 = "xb:n0F181m", 1342 prefetcht1_1 = "xb:n0F182m", 1343 prefetcht2_1 = "xb:n0F183m", 1344 pshufd_3 = "rmio:660F70rMU", 1345 pshufhw_3 = "rmio:F30F70rMU", 1346 pshuflw_3 = "rmio:F20F70rMU", 1347 pslld_2 = "rmo:660FF2rM|rio:660F726mU", 1348 pslldq_2 = "rio:660F737mU", 1349 psllq_2 = "rmo:660FF3rM|rio:660F736mU", 1350 psllw_2 = "rmo:660FF1rM|rio:660F716mU", 1351 psrad_2 = "rmo:660FE2rM|rio:660F724mU", 1352 psraw_2 = "rmo:660FE1rM|rio:660F714mU", 1353 psrld_2 = "rmo:660FD2rM|rio:660F722mU", 1354 psrldq_2 = "rio:660F733mU", 1355 psrlq_2 = "rmo:660FD3rM|rio:660F732mU", 1356 psrlw_2 = "rmo:660FD1rM|rio:660F712mU", 1357 rcpps_2 = "rmo:0F53rM", 1358 rcpss_2 = "rro:F30F53rM|rx/od:", 1359 rsqrtps_2 = "rmo:0F52rM", 1360 rsqrtss_2 = "rmo:F30F52rM", 1361 sfence_0 = "0FAEF8", 1362 shufpd_3 = "rmio:660FC6rMU", 1363 shufps_3 = "rmio:0FC6rMU", 1364 stmxcsr_1 = "xd:0FAE3m", 1365 ucomisd_2 = "rro:660F2ErM|rx/oq:", 1366 ucomiss_2 = "rro:0F2ErM|rx/od:", 1367 unpckhpd_2 = "rmo:660F15rM", 1368 unpckhps_2 = "rmo:0F15rM", 1369 unpcklpd_2 = "rmo:660F14rM", 1370 unpcklps_2 = "rmo:0F14rM", 1371 xorpd_2 = "rmo:660F57rM", 1372 xorps_2 = "rmo:0F57rM", 1373 1374 -- SSE3 ops 1375 fisttp_1 = "xw:nDF1m|xd:DB1m|xq:nDD1m", 1376 addsubpd_2 = "rmo:660FD0rM", 1377 addsubps_2 = "rmo:F20FD0rM", 1378 haddpd_2 = "rmo:660F7CrM", 1379 haddps_2 = "rmo:F20F7CrM", 1380 hsubpd_2 = "rmo:660F7DrM", 1381 hsubps_2 = "rmo:F20F7DrM", 1382 lddqu_2 = "rxo:F20FF0rM", 1383 movddup_2 = "rmo:F20F12rM", 1384 movshdup_2 = "rmo:F30F16rM", 1385 movsldup_2 = "rmo:F30F12rM", 1386 1387 -- SSSE3 ops 1388 pabsb_2 = "rmo:660F381CrM", 1389 pabsd_2 = "rmo:660F381ErM", 1390 pabsw_2 = "rmo:660F381DrM", 1391 palignr_3 = "rmio:660F3A0FrMU", 1392 phaddd_2 = "rmo:660F3802rM", 1393 phaddsw_2 = "rmo:660F3803rM", 1394 phaddw_2 = "rmo:660F3801rM", 1395 phsubd_2 = "rmo:660F3806rM", 1396 phsubsw_2 = "rmo:660F3807rM", 1397 phsubw_2 = "rmo:660F3805rM", 1398 pmaddubsw_2 = "rmo:660F3804rM", 1399 pmulhrsw_2 = "rmo:660F380BrM", 1400 pshufb_2 = "rmo:660F3800rM", 1401 psignb_2 = "rmo:660F3808rM", 1402 psignd_2 = "rmo:660F380ArM", 1403 psignw_2 = "rmo:660F3809rM", 1404 1405 -- SSE4.1 ops 1406 blendpd_3 = "rmio:660F3A0DrMU", 1407 blendps_3 = "rmio:660F3A0CrMU", 1408 blendvpd_3 = "rmRo:660F3815rM", 1409 blendvps_3 = "rmRo:660F3814rM", 1410 dppd_3 = "rmio:660F3A41rMU", 1411 dpps_3 = "rmio:660F3A40rMU", 1412 extractps_3 = "mri/do:660F3A17RmU|rri/qo:660F3A17RXmU", 1413 insertps_3 = "rrio:660F3A41rMU|rxi/od:", 1414 movntdqa_2 = "rxo:660F382ArM", 1415 mpsadbw_3 = "rmio:660F3A42rMU", 1416 packusdw_2 = "rmo:660F382BrM", 1417 pblendvb_3 = "rmRo:660F3810rM", 1418 pblendw_3 = "rmio:660F3A0ErMU", 1419 pcmpeqq_2 = "rmo:660F3829rM", 1420 pextrb_3 = "rri/do:660F3A14nRmU|rri/qo:|xri/bo:", 1421 pextrd_3 = "mri/do:660F3A16RmU", 1422 pextrq_3 = "mri/qo:660F3A16RmU", 1423 -- pextrw is SSE2, mem operand is SSE4.1 only 1424 phminposuw_2 = "rmo:660F3841rM", 1425 pinsrb_3 = "rri/od:660F3A20nrMU|rxi/ob:", 1426 pinsrd_3 = "rmi/od:660F3A22rMU", 1427 pinsrq_3 = "rmi/oq:660F3A22rXMU", 1428 pmaxsb_2 = "rmo:660F383CrM", 1429 pmaxsd_2 = "rmo:660F383DrM", 1430 pmaxud_2 = "rmo:660F383FrM", 1431 pmaxuw_2 = "rmo:660F383ErM", 1432 pminsb_2 = "rmo:660F3838rM", 1433 pminsd_2 = "rmo:660F3839rM", 1434 pminud_2 = "rmo:660F383BrM", 1435 pminuw_2 = "rmo:660F383ArM", 1436 pmovsxbd_2 = "rro:660F3821rM|rx/od:", 1437 pmovsxbq_2 = "rro:660F3822rM|rx/ow:", 1438 pmovsxbw_2 = "rro:660F3820rM|rx/oq:", 1439 pmovsxdq_2 = "rro:660F3825rM|rx/oq:", 1440 pmovsxwd_2 = "rro:660F3823rM|rx/oq:", 1441 pmovsxwq_2 = "rro:660F3824rM|rx/od:", 1442 pmovzxbd_2 = "rro:660F3831rM|rx/od:", 1443 pmovzxbq_2 = "rro:660F3832rM|rx/ow:", 1444 pmovzxbw_2 = "rro:660F3830rM|rx/oq:", 1445 pmovzxdq_2 = "rro:660F3835rM|rx/oq:", 1446 pmovzxwd_2 = "rro:660F3833rM|rx/oq:", 1447 pmovzxwq_2 = "rro:660F3834rM|rx/od:", 1448 pmuldq_2 = "rmo:660F3828rM", 1449 pmulld_2 = "rmo:660F3840rM", 1450 ptest_2 = "rmo:660F3817rM", 1451 roundpd_3 = "rmio:660F3A09rMU", 1452 roundps_3 = "rmio:660F3A08rMU", 1453 roundsd_3 = "rrio:660F3A0BrMU|rxi/oq:", 1454 roundss_3 = "rrio:660F3A0ArMU|rxi/od:", 1455 1456 -- SSE4.2 ops 1457 crc32_2 = "rmqd:F20F38F1rM|rm/dw:66F20F38F1rM|rm/db:F20F38F0rM|rm/qb:", 1458 pcmpestri_3 = "rmio:660F3A61rMU", 1459 pcmpestrm_3 = "rmio:660F3A60rMU", 1460 pcmpgtq_2 = "rmo:660F3837rM", 1461 pcmpistri_3 = "rmio:660F3A63rMU", 1462 pcmpistrm_3 = "rmio:660F3A62rMU", 1463 popcnt_2 = "rmqdw:F30FB8rM", 1464 1465 -- SSE4a 1466 extrq_2 = "rro:660F79rM", 1467 extrq_3 = "riio:660F780mUU", 1468 insertq_2 = "rro:F20F79rM", 1469 insertq_4 = "rriio:F20F78rMUU", 1470 lzcnt_2 = "rmqdw:F30FBDrM", 1471 movntsd_2 = "xr/qo:nF20F2BRm", 1472 movntss_2 = "xr/do:F30F2BRm", 1473 -- popcnt is also in SSE4.2 1474 1475 -- AES-NI 1476 aesdec_2 = "rmo:660F38DErM", 1477 aesdeclast_2 = "rmo:660F38DFrM", 1478 aesenc_2 = "rmo:660F38DCrM", 1479 aesenclast_2 = "rmo:660F38DDrM", 1480 aesimc_2 = "rmo:660F38DBrM", 1481 aeskeygenassist_3 = "rmio:660F3ADFrMU", 1482 pclmulqdq_3 = "rmio:660F3A44rMU", 1483 1484 -- AVX FP ops 1485 vaddsubpd_3 = "rrmoy:660FVD0rM", 1486 vaddsubps_3 = "rrmoy:F20FVD0rM", 1487 vandpd_3 = "rrmoy:660FV54rM", 1488 vandps_3 = "rrmoy:0FV54rM", 1489 vandnpd_3 = "rrmoy:660FV55rM", 1490 vandnps_3 = "rrmoy:0FV55rM", 1491 vblendpd_4 = "rrmioy:660F3AV0DrMU", 1492 vblendps_4 = "rrmioy:660F3AV0CrMU", 1493 vblendvpd_4 = "rrmroy:660F3AV4BrMs", 1494 vblendvps_4 = "rrmroy:660F3AV4ArMs", 1495 vbroadcastf128_2 = "rx/yo:660F38u1ArM", 1496 vcmppd_4 = "rrmioy:660FVC2rMU", 1497 vcmpps_4 = "rrmioy:0FVC2rMU", 1498 vcmpsd_4 = "rrrio:F20FVC2rMU|rrxi/ooq:", 1499 vcmpss_4 = "rrrio:F30FVC2rMU|rrxi/ood:", 1500 vcomisd_2 = "rro:660Fu2FrM|rx/oq:", 1501 vcomiss_2 = "rro:0Fu2FrM|rx/od:", 1502 vcvtdq2pd_2 = "rro:F30FuE6rM|rx/oq:|rm/yo:", 1503 vcvtdq2ps_2 = "rmoy:0Fu5BrM", 1504 vcvtpd2dq_2 = "rmoy:F20FuE6rM", 1505 vcvtpd2ps_2 = "rmoy:660Fu5ArM", 1506 vcvtps2dq_2 = "rmoy:660Fu5BrM", 1507 vcvtps2pd_2 = "rro:0Fu5ArM|rx/oq:|rm/yo:", 1508 vcvtsd2si_2 = "rr/do:F20Fu2DrM|rx/dq:|rr/qo:|rxq:", 1509 vcvtsd2ss_3 = "rrro:F20FV5ArM|rrx/ooq:", 1510 vcvtsi2sd_3 = "rrm/ood:F20FV2ArM|rrm/ooq:F20FVX2ArM", 1511 vcvtsi2ss_3 = "rrm/ood:F30FV2ArM|rrm/ooq:F30FVX2ArM", 1512 vcvtss2sd_3 = "rrro:F30FV5ArM|rrx/ood:", 1513 vcvtss2si_2 = "rr/do:F30Fu2DrM|rxd:|rr/qo:|rx/qd:", 1514 vcvttpd2dq_2 = "rmo:660FuE6rM|rm/oy:660FuLE6rM", 1515 vcvttps2dq_2 = "rmoy:F30Fu5BrM", 1516 vcvttsd2si_2 = "rr/do:F20Fu2CrM|rx/dq:|rr/qo:|rxq:", 1517 vcvttss2si_2 = "rr/do:F30Fu2CrM|rxd:|rr/qo:|rx/qd:", 1518 vdppd_4 = "rrmio:660F3AV41rMU", 1519 vdpps_4 = "rrmioy:660F3AV40rMU", 1520 vextractf128_3 = "mri/oy:660F3AuL19RmU", 1521 vextractps_3 = "mri/do:660F3Au17RmU", 1522 vhaddpd_3 = "rrmoy:660FV7CrM", 1523 vhaddps_3 = "rrmoy:F20FV7CrM", 1524 vhsubpd_3 = "rrmoy:660FV7DrM", 1525 vhsubps_3 = "rrmoy:F20FV7DrM", 1526 vinsertf128_4 = "rrmi/yyo:660F3AV18rMU", 1527 vinsertps_4 = "rrrio:660F3AV21rMU|rrxi/ood:", 1528 vldmxcsr_1 = "xd:0FuAE2m", 1529 vmaskmovps_3 = "rrxoy:660F38V2CrM|xrroy:660F38V2ERm", 1530 vmaskmovpd_3 = "rrxoy:660F38V2DrM|xrroy:660F38V2FRm", 1531 vmovapd_2 = "rmoy:660Fu28rM|mroy:660Fu29Rm", 1532 vmovaps_2 = "rmoy:0Fu28rM|mroy:0Fu29Rm", 1533 vmovd_2 = "rm/od:660Fu6ErM|rm/oq:660FuX6ErM|mr/do:660Fu7ERm|mr/qo:", 1534 vmovq_2 = "rro:F30Fu7ErM|rx/oq:|xr/qo:660FuD6Rm", 1535 vmovddup_2 = "rmy:F20Fu12rM|rro:|rx/oq:", 1536 vmovhlps_3 = "rrro:0FV12rM", 1537 vmovhpd_2 = "xr/qo:660Fu17Rm", 1538 vmovhpd_3 = "rrx/ooq:660FV16rM", 1539 vmovhps_2 = "xr/qo:0Fu17Rm", 1540 vmovhps_3 = "rrx/ooq:0FV16rM", 1541 vmovlhps_3 = "rrro:0FV16rM", 1542 vmovlpd_2 = "xr/qo:660Fu13Rm", 1543 vmovlpd_3 = "rrx/ooq:660FV12rM", 1544 vmovlps_2 = "xr/qo:0Fu13Rm", 1545 vmovlps_3 = "rrx/ooq:0FV12rM", 1546 vmovmskpd_2 = "rr/do:660Fu50rM|rr/dy:660FuL50rM", 1547 vmovmskps_2 = "rr/do:0Fu50rM|rr/dy:0FuL50rM", 1548 vmovntpd_2 = "xroy:660Fu2BRm", 1549 vmovntps_2 = "xroy:0Fu2BRm", 1550 vmovsd_2 = "rx/oq:F20Fu10rM|xr/qo:F20Fu11Rm", 1551 vmovsd_3 = "rrro:F20FV10rM", 1552 vmovshdup_2 = "rmoy:F30Fu16rM", 1553 vmovsldup_2 = "rmoy:F30Fu12rM", 1554 vmovss_2 = "rx/od:F30Fu10rM|xr/do:F30Fu11Rm", 1555 vmovss_3 = "rrro:F30FV10rM", 1556 vmovupd_2 = "rmoy:660Fu10rM|mroy:660Fu11Rm", 1557 vmovups_2 = "rmoy:0Fu10rM|mroy:0Fu11Rm", 1558 vorpd_3 = "rrmoy:660FV56rM", 1559 vorps_3 = "rrmoy:0FV56rM", 1560 vpermilpd_3 = "rrmoy:660F38V0DrM|rmioy:660F3Au05rMU", 1561 vpermilps_3 = "rrmoy:660F38V0CrM|rmioy:660F3Au04rMU", 1562 vperm2f128_4 = "rrmiy:660F3AV06rMU", 1563 vptestpd_2 = "rmoy:660F38u0FrM", 1564 vptestps_2 = "rmoy:660F38u0ErM", 1565 vrcpps_2 = "rmoy:0Fu53rM", 1566 vrcpss_3 = "rrro:F30FV53rM|rrx/ood:", 1567 vrsqrtps_2 = "rmoy:0Fu52rM", 1568 vrsqrtss_3 = "rrro:F30FV52rM|rrx/ood:", 1569 vroundpd_3 = "rmioy:660F3AV09rMU", 1570 vroundps_3 = "rmioy:660F3AV08rMU", 1571 vroundsd_4 = "rrrio:660F3AV0BrMU|rrxi/ooq:", 1572 vroundss_4 = "rrrio:660F3AV0ArMU|rrxi/ood:", 1573 vshufpd_4 = "rrmioy:660FVC6rMU", 1574 vshufps_4 = "rrmioy:0FVC6rMU", 1575 vsqrtps_2 = "rmoy:0Fu51rM", 1576 vsqrtss_2 = "rro:F30Fu51rM|rx/od:", 1577 vsqrtpd_2 = "rmoy:660Fu51rM", 1578 vsqrtsd_2 = "rro:F20Fu51rM|rx/oq:", 1579 vstmxcsr_1 = "xd:0FuAE3m", 1580 vucomisd_2 = "rro:660Fu2ErM|rx/oq:", 1581 vucomiss_2 = "rro:0Fu2ErM|rx/od:", 1582 vunpckhpd_3 = "rrmoy:660FV15rM", 1583 vunpckhps_3 = "rrmoy:0FV15rM", 1584 vunpcklpd_3 = "rrmoy:660FV14rM", 1585 vunpcklps_3 = "rrmoy:0FV14rM", 1586 vxorpd_3 = "rrmoy:660FV57rM", 1587 vxorps_3 = "rrmoy:0FV57rM", 1588 vzeroall_0 = "0FuL77", 1589 vzeroupper_0 = "0Fu77", 1590 1591 -- AVX2 FP ops 1592 vbroadcastss_2 = "rx/od:660F38u18rM|rx/yd:|rro:|rr/yo:", 1593 vbroadcastsd_2 = "rx/yq:660F38u19rM|rr/yo:", 1594 -- *vgather* (!vsib) 1595 vpermpd_3 = "rmiy:660F3AuX01rMU", 1596 vpermps_3 = "rrmy:660F38V16rM", 1597 1598 -- AVX, AVX2 integer ops 1599 -- In general, xmm requires AVX, ymm requires AVX2. 1600 vaesdec_3 = "rrmo:660F38VDErM", 1601 vaesdeclast_3 = "rrmo:660F38VDFrM", 1602 vaesenc_3 = "rrmo:660F38VDCrM", 1603 vaesenclast_3 = "rrmo:660F38VDDrM", 1604 vaesimc_2 = "rmo:660F38uDBrM", 1605 vaeskeygenassist_3 = "rmio:660F3AuDFrMU", 1606 vlddqu_2 = "rxoy:F20FuF0rM", 1607 vmaskmovdqu_2 = "rro:660FuF7rM", 1608 vmovdqa_2 = "rmoy:660Fu6FrM|mroy:660Fu7FRm", 1609 vmovdqu_2 = "rmoy:F30Fu6FrM|mroy:F30Fu7FRm", 1610 vmovntdq_2 = "xroy:660FuE7Rm", 1611 vmovntdqa_2 = "rxoy:660F38u2ArM", 1612 vmpsadbw_4 = "rrmioy:660F3AV42rMU", 1613 vpabsb_2 = "rmoy:660F38u1CrM", 1614 vpabsd_2 = "rmoy:660F38u1ErM", 1615 vpabsw_2 = "rmoy:660F38u1DrM", 1616 vpackusdw_3 = "rrmoy:660F38V2BrM", 1617 vpalignr_4 = "rrmioy:660F3AV0FrMU", 1618 vpblendvb_4 = "rrmroy:660F3AV4CrMs", 1619 vpblendw_4 = "rrmioy:660F3AV0ErMU", 1620 vpclmulqdq_4 = "rrmio:660F3AV44rMU", 1621 vpcmpeqq_3 = "rrmoy:660F38V29rM", 1622 vpcmpestri_3 = "rmio:660F3Au61rMU", 1623 vpcmpestrm_3 = "rmio:660F3Au60rMU", 1624 vpcmpgtq_3 = "rrmoy:660F38V37rM", 1625 vpcmpistri_3 = "rmio:660F3Au63rMU", 1626 vpcmpistrm_3 = "rmio:660F3Au62rMU", 1627 vpextrb_3 = "rri/do:660F3Au14nRmU|rri/qo:|xri/bo:", 1628 vpextrw_3 = "rri/do:660FuC5rMU|xri/wo:660F3Au15nRmU", 1629 vpextrd_3 = "mri/do:660F3Au16RmU", 1630 vpextrq_3 = "mri/qo:660F3Au16RmU", 1631 vphaddw_3 = "rrmoy:660F38V01rM", 1632 vphaddd_3 = "rrmoy:660F38V02rM", 1633 vphaddsw_3 = "rrmoy:660F38V03rM", 1634 vphminposuw_2 = "rmo:660F38u41rM", 1635 vphsubw_3 = "rrmoy:660F38V05rM", 1636 vphsubd_3 = "rrmoy:660F38V06rM", 1637 vphsubsw_3 = "rrmoy:660F38V07rM", 1638 vpinsrb_4 = "rrri/ood:660F3AV20rMU|rrxi/oob:", 1639 vpinsrw_4 = "rrri/ood:660FVC4rMU|rrxi/oow:", 1640 vpinsrd_4 = "rrmi/ood:660F3AV22rMU", 1641 vpinsrq_4 = "rrmi/ooq:660F3AVX22rMU", 1642 vpmaddubsw_3 = "rrmoy:660F38V04rM", 1643 vpmaxsb_3 = "rrmoy:660F38V3CrM", 1644 vpmaxsd_3 = "rrmoy:660F38V3DrM", 1645 vpmaxuw_3 = "rrmoy:660F38V3ErM", 1646 vpmaxud_3 = "rrmoy:660F38V3FrM", 1647 vpminsb_3 = "rrmoy:660F38V38rM", 1648 vpminsd_3 = "rrmoy:660F38V39rM", 1649 vpminuw_3 = "rrmoy:660F38V3ArM", 1650 vpminud_3 = "rrmoy:660F38V3BrM", 1651 vpmovmskb_2 = "rr/do:660FuD7rM|rr/dy:660FuLD7rM", 1652 vpmovsxbw_2 = "rroy:660F38u20rM|rx/oq:|rx/yo:", 1653 vpmovsxbd_2 = "rroy:660F38u21rM|rx/od:|rx/yq:", 1654 vpmovsxbq_2 = "rroy:660F38u22rM|rx/ow:|rx/yd:", 1655 vpmovsxwd_2 = "rroy:660F38u23rM|rx/oq:|rx/yo:", 1656 vpmovsxwq_2 = "rroy:660F38u24rM|rx/od:|rx/yq:", 1657 vpmovsxdq_2 = "rroy:660F38u25rM|rx/oq:|rx/yo:", 1658 vpmovzxbw_2 = "rroy:660F38u30rM|rx/oq:|rx/yo:", 1659 vpmovzxbd_2 = "rroy:660F38u31rM|rx/od:|rx/yq:", 1660 vpmovzxbq_2 = "rroy:660F38u32rM|rx/ow:|rx/yd:", 1661 vpmovzxwd_2 = "rroy:660F38u33rM|rx/oq:|rx/yo:", 1662 vpmovzxwq_2 = "rroy:660F38u34rM|rx/od:|rx/yq:", 1663 vpmovzxdq_2 = "rroy:660F38u35rM|rx/oq:|rx/yo:", 1664 vpmuldq_3 = "rrmoy:660F38V28rM", 1665 vpmulhrsw_3 = "rrmoy:660F38V0BrM", 1666 vpmulld_3 = "rrmoy:660F38V40rM", 1667 vpshufb_3 = "rrmoy:660F38V00rM", 1668 vpshufd_3 = "rmioy:660Fu70rMU", 1669 vpshufhw_3 = "rmioy:F30Fu70rMU", 1670 vpshuflw_3 = "rmioy:F20Fu70rMU", 1671 vpsignb_3 = "rrmoy:660F38V08rM", 1672 vpsignw_3 = "rrmoy:660F38V09rM", 1673 vpsignd_3 = "rrmoy:660F38V0ArM", 1674 vpslldq_3 = "rrioy:660Fv737mU", 1675 vpsllw_3 = "rrmoy:660FVF1rM|rrioy:660Fv716mU", 1676 vpslld_3 = "rrmoy:660FVF2rM|rrioy:660Fv726mU", 1677 vpsllq_3 = "rrmoy:660FVF3rM|rrioy:660Fv736mU", 1678 vpsraw_3 = "rrmoy:660FVE1rM|rrioy:660Fv714mU", 1679 vpsrad_3 = "rrmoy:660FVE2rM|rrioy:660Fv724mU", 1680 vpsrldq_3 = "rrioy:660Fv733mU", 1681 vpsrlw_3 = "rrmoy:660FVD1rM|rrioy:660Fv712mU", 1682 vpsrld_3 = "rrmoy:660FVD2rM|rrioy:660Fv722mU", 1683 vpsrlq_3 = "rrmoy:660FVD3rM|rrioy:660Fv732mU", 1684 vptest_2 = "rmoy:660F38u17rM", 1685 1686 -- AVX2 integer ops 1687 vbroadcasti128_2 = "rx/yo:660F38u5ArM", 1688 vinserti128_4 = "rrmi/yyo:660F3AV38rMU", 1689 vextracti128_3 = "mri/oy:660F3AuL39RmU", 1690 vpblendd_4 = "rrmioy:660F3AV02rMU", 1691 vpbroadcastb_2 = "rro:660F38u78rM|rx/ob:|rr/yo:|rx/yb:", 1692 vpbroadcastw_2 = "rro:660F38u79rM|rx/ow:|rr/yo:|rx/yw:", 1693 vpbroadcastd_2 = "rro:660F38u58rM|rx/od:|rr/yo:|rx/yd:", 1694 vpbroadcastq_2 = "rro:660F38u59rM|rx/oq:|rr/yo:|rx/yq:", 1695 vpermd_3 = "rrmy:660F38V36rM", 1696 vpermq_3 = "rmiy:660F3AuX00rMU", 1697 -- *vpgather* (!vsib) 1698 vperm2i128_4 = "rrmiy:660F3AV46rMU", 1699 vpmaskmovd_3 = "rrxoy:660F38V8CrM|xrroy:660F38V8ERm", 1700 vpmaskmovq_3 = "rrxoy:660F38VX8CrM|xrroy:660F38VX8ERm", 1701 vpsllvd_3 = "rrmoy:660F38V47rM", 1702 vpsllvq_3 = "rrmoy:660F38VX47rM", 1703 vpsravd_3 = "rrmoy:660F38V46rM", 1704 vpsrlvd_3 = "rrmoy:660F38V45rM", 1705 vpsrlvq_3 = "rrmoy:660F38VX45rM", 1706 1707 -- Intel ADX 1708 adcx_2 = "rmqd:660F38F6rM", 1709 adox_2 = "rmqd:F30F38F6rM", 1710} 1711 1712------------------------------------------------------------------------------ 1713 1714-- Arithmetic ops. 1715for name,n in pairs{ add = 0, ["or"] = 1, adc = 2, sbb = 3, 1716 ["and"] = 4, sub = 5, xor = 6, cmp = 7 } do 1717 local n8 = shl(n, 3) 1718 map_op[name.."_2"] = format( 1719 "mr:%02XRm|rm:%02XrM|mI1qdw:81%XmI|mS1qdw:83%XmS|Ri1qdwb:%02Xri|mi1qdwb:81%Xmi", 1720 1+n8, 3+n8, n, n, 5+n8, n) 1721end 1722 1723-- Shift ops. 1724for name,n in pairs{ rol = 0, ror = 1, rcl = 2, rcr = 3, 1725 shl = 4, shr = 5, sar = 7, sal = 4 } do 1726 map_op[name.."_2"] = format("m1:D1%Xm|mC1qdwb:D3%Xm|mi:C1%XmU", n, n, n) 1727end 1728 1729-- Conditional ops. 1730for cc,n in pairs(map_cc) do 1731 map_op["j"..cc.."_1"] = format("J.:n0F8%XJ", n) -- short: 7%X 1732 map_op["set"..cc.."_1"] = format("mb:n0F9%X2m", n) 1733 map_op["cmov"..cc.."_2"] = format("rmqdw:0F4%XrM", n) -- P6+ 1734end 1735 1736-- FP arithmetic ops. 1737for name,n in pairs{ add = 0, mul = 1, com = 2, comp = 3, 1738 sub = 4, subr = 5, div = 6, divr = 7 } do 1739 local nc = 0xc0 + shl(n, 3) 1740 local nr = nc + (n < 4 and 0 or (n % 2 == 0 and 8 or -8)) 1741 local fn = "f"..name 1742 map_op[fn.."_1"] = format("ff:D8%02Xr|xd:D8%Xm|xq:nDC%Xm", nc, n, n) 1743 if n == 2 or n == 3 then 1744 map_op[fn.."_2"] = format("Fff:D8%02XR|Fx2d:D8%XM|Fx2q:nDC%XM", nc, n, n) 1745 else 1746 map_op[fn.."_2"] = format("Fff:D8%02XR|fFf:DC%02Xr|Fx2d:D8%XM|Fx2q:nDC%XM", nc, nr, n, n) 1747 map_op[fn.."p_1"] = format("ff:DE%02Xr", nr) 1748 map_op[fn.."p_2"] = format("fFf:DE%02Xr", nr) 1749 end 1750 map_op["fi"..name.."_1"] = format("xd:DA%Xm|xw:nDE%Xm", n, n) 1751end 1752 1753-- FP conditional moves. 1754for cc,n in pairs{ b=0, e=1, be=2, u=3, nb=4, ne=5, nbe=6, nu=7 } do 1755 local nc = 0xdac0 + shl(band(n, 3), 3) + shl(band(n, 4), 6) 1756 map_op["fcmov"..cc.."_1"] = format("ff:%04Xr", nc) -- P6+ 1757 map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+ 1758end 1759 1760-- SSE / AVX FP arithmetic ops. 1761for name,n in pairs{ sqrt = 1, add = 8, mul = 9, 1762 sub = 12, min = 13, div = 14, max = 15 } do 1763 map_op[name.."ps_2"] = format("rmo:0F5%XrM", n) 1764 map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n) 1765 map_op[name.."pd_2"] = format("rmo:660F5%XrM", n) 1766 map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n) 1767 if n ~= 1 then 1768 map_op["v"..name.."ps_3"] = format("rrmoy:0FV5%XrM", n) 1769 map_op["v"..name.."ss_3"] = format("rrro:F30FV5%XrM|rrx/ood:", n) 1770 map_op["v"..name.."pd_3"] = format("rrmoy:660FV5%XrM", n) 1771 map_op["v"..name.."sd_3"] = format("rrro:F20FV5%XrM|rrx/ooq:", n) 1772 end 1773end 1774 1775-- SSE2 / AVX / AVX2 integer arithmetic ops (66 0F leaf). 1776for name,n in pairs{ 1777 paddb = 0xFC, paddw = 0xFD, paddd = 0xFE, paddq = 0xD4, 1778 paddsb = 0xEC, paddsw = 0xED, packssdw = 0x6B, 1779 packsswb = 0x63, packuswb = 0x67, paddusb = 0xDC, 1780 paddusw = 0xDD, pand = 0xDB, pandn = 0xDF, pavgb = 0xE0, 1781 pavgw = 0xE3, pcmpeqb = 0x74, pcmpeqd = 0x76, 1782 pcmpeqw = 0x75, pcmpgtb = 0x64, pcmpgtd = 0x66, 1783 pcmpgtw = 0x65, pmaddwd = 0xF5, pmaxsw = 0xEE, 1784 pmaxub = 0xDE, pminsw = 0xEA, pminub = 0xDA, 1785 pmulhuw = 0xE4, pmulhw = 0xE5, pmullw = 0xD5, 1786 pmuludq = 0xF4, por = 0xEB, psadbw = 0xF6, psubb = 0xF8, 1787 psubw = 0xF9, psubd = 0xFA, psubq = 0xFB, psubsb = 0xE8, 1788 psubsw = 0xE9, psubusb = 0xD8, psubusw = 0xD9, 1789 punpckhbw = 0x68, punpckhwd = 0x69, punpckhdq = 0x6A, 1790 punpckhqdq = 0x6D, punpcklbw = 0x60, punpcklwd = 0x61, 1791 punpckldq = 0x62, punpcklqdq = 0x6C, pxor = 0xEF 1792} do 1793 map_op[name.."_2"] = format("rmo:660F%02XrM", n) 1794 map_op["v"..name.."_3"] = format("rrmoy:660FV%02XrM", n) 1795end 1796 1797------------------------------------------------------------------------------ 1798 1799local map_vexarg = { u = false, v = 1, V = 2 } 1800 1801-- Process pattern string. 1802local function dopattern(pat, args, sz, op, needrex) 1803 local digit, addin, vex 1804 local opcode = 0 1805 local szov = sz 1806 local narg = 1 1807 local rex = 0 1808 1809 -- Limit number of section buffer positions used by a single dasm_put(). 1810 -- A single opcode needs a maximum of 6 positions. 1811 if secpos+6 > maxsecpos then wflush() end 1812 1813 -- Process each character. 1814 for c in gmatch(pat.."|", ".") do 1815 if match(c, "%x") then -- Hex digit. 1816 digit = byte(c) - 48 1817 if digit > 48 then digit = digit - 39 1818 elseif digit > 16 then digit = digit - 7 end 1819 opcode = opcode*16 + digit 1820 addin = nil 1821 elseif c == "n" then -- Disable operand size mods for opcode. 1822 szov = nil 1823 elseif c == "X" then -- Force REX.W. 1824 rex = 8 1825 elseif c == "L" then -- Force VEX.L. 1826 vex.l = true 1827 elseif c == "r" then -- Merge 1st operand regno. into opcode. 1828 addin = args[1]; opcode = opcode + (addin.reg % 8) 1829 if narg < 2 then narg = 2 end 1830 elseif c == "R" then -- Merge 2nd operand regno. into opcode. 1831 addin = args[2]; opcode = opcode + (addin.reg % 8) 1832 narg = 3 1833 elseif c == "m" or c == "M" then -- Encode ModRM/SIB. 1834 local s 1835 if addin then 1836 s = addin.reg 1837 opcode = opcode - band(s, 7) -- Undo regno opcode merge. 1838 else 1839 s = band(opcode, 15) -- Undo last digit. 1840 opcode = shr(opcode, 4) 1841 end 1842 local nn = c == "m" and 1 or 2 1843 local t = args[nn] 1844 if narg <= nn then narg = nn + 1 end 1845 if szov == "q" and rex == 0 then rex = rex + 8 end 1846 if t.reg and t.reg > 7 then rex = rex + 1 end 1847 if t.xreg and t.xreg > 7 then rex = rex + 2 end 1848 if s > 7 then rex = rex + 4 end 1849 if needrex then rex = rex + 16 end 1850 local psz, sk = wputop(szov, opcode, rex, vex, s < 0, t.vreg or t.vxreg) 1851 opcode = nil 1852 local imark = sub(pat, -1) -- Force a mark (ugly). 1853 -- Put ModRM/SIB with regno/last digit as spare. 1854 wputmrmsib(t, imark, s, addin and addin.vreg, psz, sk) 1855 addin = nil 1856 elseif map_vexarg[c] ~= nil then -- Encode using VEX prefix 1857 local b = band(opcode, 255); opcode = shr(opcode, 8) 1858 local m = 1 1859 if b == 0x38 then m = 2 1860 elseif b == 0x3a then m = 3 end 1861 if m ~= 1 then b = band(opcode, 255); opcode = shr(opcode, 8) end 1862 if b ~= 0x0f then 1863 werror("expected `0F', `0F38', or `0F3A' to precede `"..c.. 1864 "' in pattern `"..pat.."' for `"..op.."'") 1865 end 1866 local v = map_vexarg[c] 1867 if v then v = remove(args, v) end 1868 b = band(opcode, 255) 1869 local p = 0 1870 if b == 0x66 then p = 1 1871 elseif b == 0xf3 then p = 2 1872 elseif b == 0xf2 then p = 3 end 1873 if p ~= 0 then opcode = shr(opcode, 8) end 1874 if opcode ~= 0 then wputop(nil, opcode, 0); opcode = 0 end 1875 vex = { m = m, p = p, v = v } 1876 else 1877 if opcode then -- Flush opcode. 1878 if szov == "q" and rex == 0 then rex = rex + 8 end 1879 if needrex then rex = rex + 16 end 1880 if addin and addin.reg == -1 then 1881 local psz, sk = wputop(szov, opcode - 7, rex, vex, true) 1882 wvreg("opcode", addin.vreg, psz, sk) 1883 else 1884 if addin and addin.reg > 7 then rex = rex + 1 end 1885 wputop(szov, opcode, rex, vex) 1886 end 1887 opcode = nil 1888 end 1889 if c == "|" then break end 1890 if c == "o" then -- Offset (pure 32 bit displacement). 1891 wputdarg(args[1].disp); if narg < 2 then narg = 2 end 1892 elseif c == "O" then 1893 wputdarg(args[2].disp); narg = 3 1894 else 1895 -- Anything else is an immediate operand. 1896 local a = args[narg] 1897 narg = narg + 1 1898 local mode, imm = a.mode, a.imm 1899 if mode == "iJ" and not match("iIJ", c) then 1900 werror("bad operand size for label") 1901 end 1902 if c == "S" then 1903 wputsbarg(imm) 1904 elseif c == "U" then 1905 wputbarg(imm) 1906 elseif c == "W" then 1907 wputwarg(imm) 1908 elseif c == "i" or c == "I" then 1909 if mode == "iJ" then 1910 wputlabel("IMM_", imm, 1) 1911 elseif mode == "iI" and c == "I" then 1912 waction(sz == "w" and "IMM_WB" or "IMM_DB", imm) 1913 else 1914 wputszarg(sz, imm) 1915 end 1916 elseif c == "J" then 1917 if mode == "iPJ" then 1918 waction("REL_A", imm) -- !x64 (secpos) 1919 else 1920 wputlabel("REL_", imm, 2) 1921 end 1922 elseif c == "s" then 1923 local reg = a.reg 1924 if reg < 0 then 1925 wputb(0) 1926 wvreg("imm.hi", a.vreg) 1927 else 1928 wputb(shl(reg, 4)) 1929 end 1930 else 1931 werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'") 1932 end 1933 end 1934 end 1935 end 1936end 1937 1938------------------------------------------------------------------------------ 1939 1940-- Mapping of operand modes to short names. Suppress output with '#'. 1941local map_modename = { 1942 r = "reg", R = "eax", C = "cl", x = "mem", m = "mrm", i = "imm", 1943 f = "stx", F = "st0", J = "lbl", ["1"] = "1", 1944 I = "#", S = "#", O = "#", 1945} 1946 1947-- Return a table/string showing all possible operand modes. 1948local function templatehelp(template, nparams) 1949 if nparams == 0 then return "" end 1950 local t = {} 1951 for tm in gmatch(template, "[^%|]+") do 1952 local s = map_modename[sub(tm, 1, 1)] 1953 s = s..gsub(sub(tm, 2, nparams), ".", function(c) 1954 return ", "..map_modename[c] 1955 end) 1956 if not match(s, "#") then t[#t+1] = s end 1957 end 1958 return t 1959end 1960 1961-- Match operand modes against mode match part of template. 1962local function matchtm(tm, args) 1963 for i=1,#args do 1964 if not match(args[i].mode, sub(tm, i, i)) then return end 1965 end 1966 return true 1967end 1968 1969-- Handle opcodes defined with template strings. 1970map_op[".template__"] = function(params, template, nparams) 1971 if not params then return templatehelp(template, nparams) end 1972 local args = {} 1973 1974 -- Zero-operand opcodes have no match part. 1975 if #params == 0 then 1976 dopattern(template, args, "d", params.op, nil) 1977 return 1978 end 1979 1980 -- Determine common operand size (coerce undefined size) or flag as mixed. 1981 local sz, szmix, needrex 1982 for i,p in ipairs(params) do 1983 args[i] = parseoperand(p) 1984 local nsz = args[i].opsize 1985 if nsz then 1986 if sz and sz ~= nsz then szmix = true else sz = nsz end 1987 end 1988 local nrex = args[i].needrex 1989 if nrex ~= nil then 1990 if needrex == nil then 1991 needrex = nrex 1992 elseif needrex ~= nrex then 1993 werror("bad mix of byte-addressable registers") 1994 end 1995 end 1996 end 1997 1998 -- Try all match:pattern pairs (separated by '|'). 1999 local gotmatch, lastpat 2000 for tm in gmatch(template, "[^%|]+") do 2001 -- Split off size match (starts after mode match) and pattern string. 2002 local szm, pat = match(tm, "^(.-):(.*)$", #args+1) 2003 if pat == "" then pat = lastpat else lastpat = pat end 2004 if matchtm(tm, args) then 2005 local prefix = sub(szm, 1, 1) 2006 if prefix == "/" then -- Exactly match leading operand sizes. 2007 for i = #szm,1,-1 do 2008 if i == 1 then 2009 dopattern(pat, args, sz, params.op, needrex) -- Process pattern. 2010 return 2011 elseif args[i-1].opsize ~= sub(szm, i, i) then 2012 break 2013 end 2014 end 2015 else -- Match common operand size. 2016 local szp = sz 2017 if szm == "" then szm = x64 and "qdwb" or "dwb" end -- Default sizes. 2018 if prefix == "1" then szp = args[1].opsize; szmix = nil 2019 elseif prefix == "2" then szp = args[2].opsize; szmix = nil end 2020 if not szmix and (prefix == "." or match(szm, szp or "#")) then 2021 dopattern(pat, args, szp, params.op, needrex) -- Process pattern. 2022 return 2023 end 2024 end 2025 gotmatch = true 2026 end 2027 end 2028 2029 local msg = "bad operand mode" 2030 if gotmatch then 2031 if szmix then 2032 msg = "mixed operand size" 2033 else 2034 msg = sz and "bad operand size" or "missing operand size" 2035 end 2036 end 2037 2038 werror(msg.." in `"..opmodestr(params.op, args).."'") 2039end 2040 2041------------------------------------------------------------------------------ 2042 2043-- x64-specific opcode for 64 bit immediates and displacements. 2044if x64 then 2045 function map_op.mov64_2(params) 2046 if not params then return { "reg, imm", "reg, [disp]", "[disp], reg" } end 2047 if secpos+2 > maxsecpos then wflush() end 2048 local opcode, op64, sz, rex, vreg 2049 local op64 = match(params[1], "^%[%s*(.-)%s*%]$") 2050 if op64 then 2051 local a = parseoperand(params[2]) 2052 if a.mode ~= "rmR" then werror("bad operand mode") end 2053 sz = a.opsize 2054 rex = sz == "q" and 8 or 0 2055 opcode = 0xa3 2056 else 2057 op64 = match(params[2], "^%[%s*(.-)%s*%]$") 2058 local a = parseoperand(params[1]) 2059 if op64 then 2060 if a.mode ~= "rmR" then werror("bad operand mode") end 2061 sz = a.opsize 2062 rex = sz == "q" and 8 or 0 2063 opcode = 0xa1 2064 else 2065 if sub(a.mode, 1, 1) ~= "r" or a.opsize ~= "q" then 2066 werror("bad operand mode") 2067 end 2068 op64 = params[2] 2069 if a.reg == -1 then 2070 vreg = a.vreg 2071 opcode = 0xb8 2072 else 2073 opcode = 0xb8 + band(a.reg, 7) 2074 end 2075 rex = a.reg > 7 and 9 or 8 2076 end 2077 end 2078 local psz, sk = wputop(sz, opcode, rex, nil, vreg) 2079 wvreg("opcode", vreg, psz, sk) 2080 waction("IMM_D", format("(unsigned int)(%s)", op64)) 2081 waction("IMM_D", format("(unsigned int)((%s)>>32)", op64)) 2082 end 2083end 2084 2085------------------------------------------------------------------------------ 2086 2087-- Pseudo-opcodes for data storage. 2088local function op_data(params) 2089 if not params then return "imm..." end 2090 local sz = sub(params.op, 2, 2) 2091 if sz == "a" then sz = addrsize end 2092 for _,p in ipairs(params) do 2093 local a = parseoperand(p) 2094 if sub(a.mode, 1, 1) ~= "i" or (a.opsize and a.opsize ~= sz) then 2095 werror("bad mode or size in `"..p.."'") 2096 end 2097 if a.mode == "iJ" then 2098 if sz == 'q' then 2099 wputlabel64("IMM_", a.imm, 1) 2100 else 2101 wputlabel("IMM_", a.imm, 1) 2102 end 2103 else 2104 if sz == 'q' then 2105 wputqarg(a.imm) 2106 else 2107 wputszarg(sz, a.imm) 2108 end 2109 end 2110 if secpos+2 > maxsecpos then wflush() end 2111 end 2112end 2113 2114map_op[".byte_*"] = op_data 2115map_op[".sbyte_*"] = op_data 2116map_op[".word_*"] = op_data 2117map_op[".dword_*"] = op_data 2118map_op[".aword_*"] = op_data 2119 2120------------------------------------------------------------------------------ 2121 2122-- Pseudo-opcode to mark the position where the action list is to be emitted. 2123map_op[".actionlist_1"] = function(params) 2124 if not params then return "cvar" end 2125 local name = params[1] -- No syntax check. You get to keep the pieces. 2126 wline(function(out) writeactions(out, name) end) 2127end 2128 2129-- Pseudo-opcode to mark the position where the global enum is to be emitted. 2130map_op[".globals_1"] = function(params) 2131 if not params then return "prefix" end 2132 local prefix = params[1] -- No syntax check. You get to keep the pieces. 2133 wline(function(out) writeglobals(out, prefix) end) 2134end 2135 2136-- Pseudo-opcode to mark the position where the global names are to be emitted. 2137map_op[".globalnames_1"] = function(params) 2138 if not params then return "cvar" end 2139 local name = params[1] -- No syntax check. You get to keep the pieces. 2140 wline(function(out) writeglobalnames(out, name) end) 2141end 2142 2143-- Pseudo-opcode to mark the position where the extern names are to be emitted. 2144map_op[".externnames_1"] = function(params) 2145 if not params then return "cvar" end 2146 local name = params[1] -- No syntax check. You get to keep the pieces. 2147 wline(function(out) writeexternnames(out, name) end) 2148end 2149 2150------------------------------------------------------------------------------ 2151 2152-- Label pseudo-opcode (converted from trailing colon form). 2153map_op[".label_2"] = function(params) 2154 if not params then return "[1-9] | ->global | =>pcexpr [, addr]" end 2155 if secpos+2 > maxsecpos then wflush() end 2156 local a = parseoperand(params[1]) 2157 local mode, imm = a.mode, a.imm 2158 if type(imm) == "number" and (mode == "iJ" or (imm >= 1 and imm <= 9)) then 2159 -- Local label (1: ... 9:) or global label (->global:). 2160 waction("LABEL_LG", nil, 1) 2161 wputxb(imm) 2162 elseif mode == "iJ" then 2163 -- PC label (=>pcexpr:). 2164 waction("LABEL_PC", imm) 2165 else 2166 werror("bad label definition") 2167 end 2168 -- SETLABEL must immediately follow LABEL_LG/LABEL_PC. 2169 local addr = params[2] 2170 if addr then 2171 local a = parseoperand(addr) 2172 if a.mode == "iPJ" then 2173 waction("SETLABEL", a.imm) 2174 else 2175 werror("bad label assignment") 2176 end 2177 end 2178end 2179map_op[".label_1"] = map_op[".label_2"] 2180 2181------------------------------------------------------------------------------ 2182 2183-- Alignment pseudo-opcode. 2184map_op[".align_1"] = function(params) 2185 if not params then return "numpow2" end 2186 if secpos+1 > maxsecpos then wflush() end 2187 local align = tonumber(params[1]) or map_opsizenum[map_opsize[params[1]]] 2188 if align then 2189 local x = align 2190 -- Must be a power of 2 in the range (2 ... 256). 2191 for i=1,8 do 2192 x = x / 2 2193 if x == 1 then 2194 waction("ALIGN", nil, 1) 2195 wputxb(align-1) -- Action byte is 2**n-1. 2196 return 2197 end 2198 end 2199 end 2200 werror("bad alignment") 2201end 2202 2203-- Spacing pseudo-opcode. 2204map_op[".space_2"] = function(params) 2205 if not params then return "num [, filler]" end 2206 if secpos+1 > maxsecpos then wflush() end 2207 waction("SPACE", params[1]) 2208 local fill = params[2] 2209 if fill then 2210 fill = tonumber(fill) 2211 if not fill or fill < 0 or fill > 255 then werror("bad filler") end 2212 end 2213 wputxb(fill or 0) 2214end 2215map_op[".space_1"] = map_op[".space_2"] 2216 2217------------------------------------------------------------------------------ 2218 2219-- Pseudo-opcode for (primitive) type definitions (map to C types). 2220map_op[".type_3"] = function(params, nparams) 2221 if not params then 2222 return nparams == 2 and "name, ctype" or "name, ctype, reg" 2223 end 2224 local name, ctype, reg = params[1], params[2], params[3] 2225 if not match(name, "^[%a_][%w_]*$") then 2226 werror("bad type name `"..name.."'") 2227 end 2228 local tp = map_type[name] 2229 if tp then 2230 werror("duplicate type `"..name.."'") 2231 end 2232 if reg and not map_reg_valid_base[reg] then 2233 werror("bad base register `"..(map_reg_rev[reg] or reg).."'") 2234 end 2235 -- Add #type to defines. A bit unclean to put it in map_archdef. 2236 map_archdef["#"..name] = "sizeof("..ctype..")" 2237 -- Add new type and emit shortcut define. 2238 local num = ctypenum + 1 2239 map_type[name] = { 2240 ctype = ctype, 2241 ctypefmt = format("Dt%X(%%s)", num), 2242 reg = reg, 2243 } 2244 wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) 2245 ctypenum = num 2246end 2247map_op[".type_2"] = map_op[".type_3"] 2248 2249-- Dump type definitions. 2250local function dumptypes(out, lvl) 2251 local t = {} 2252 for name in pairs(map_type) do t[#t+1] = name end 2253 sort(t) 2254 out:write("Type definitions:\n") 2255 for _,name in ipairs(t) do 2256 local tp = map_type[name] 2257 local reg = tp.reg and map_reg_rev[tp.reg] or "" 2258 out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) 2259 end 2260 out:write("\n") 2261end 2262 2263------------------------------------------------------------------------------ 2264 2265-- Set the current section. 2266function _M.section(num) 2267 waction("SECTION") 2268 wputxb(num) 2269 wflush(true) -- SECTION is a terminal action. 2270end 2271 2272------------------------------------------------------------------------------ 2273 2274-- Dump architecture description. 2275function _M.dumparch(out) 2276 out:write(format("DynASM %s version %s, released %s\n\n", 2277 _info.arch, _info.version, _info.release)) 2278 dumpregs(out) 2279 dumpactions(out) 2280end 2281 2282-- Dump all user defined elements. 2283function _M.dumpdef(out, lvl) 2284 dumptypes(out, lvl) 2285 dumpglobals(out, lvl) 2286 dumpexterns(out, lvl) 2287end 2288 2289------------------------------------------------------------------------------ 2290 2291-- Pass callbacks from/to the DynASM core. 2292function _M.passcb(wl, we, wf, ww) 2293 wline, werror, wfatal, wwarn = wl, we, wf, ww 2294 return wflush 2295end 2296 2297-- Setup the arch-specific module. 2298function _M.setup(arch, opt) 2299 g_arch, g_opt = arch, opt 2300end 2301 2302-- Merge the core maps and the arch-specific maps. 2303function _M.mergemaps(map_coreop, map_def) 2304 setmetatable(map_op, { __index = map_coreop }) 2305 setmetatable(map_def, { __index = map_archdef }) 2306 return map_op, map_def 2307end 2308 2309return _M 2310 2311------------------------------------------------------------------------------ 2312 2313