xref: /PHP-8.0/ext/opcache/jit/dynasm/dasm_x86.lua (revision ddba2a70)
1------------------------------------------------------------------------------
2-- DynASM x86/x64 module.
3--
4-- Copyright (C) 2005-2016 Mike Pall. All rights reserved.
5-- See dynasm.lua for full copyright notice.
6------------------------------------------------------------------------------
7
8local x64 = x64
9
10-- Module information:
11local _info = {
12  arch =	x64 and "x64" or "x86",
13  description =	"DynASM x86/x64 module",
14  version =	"1.4.0",
15  vernum =	 10400,
16  release =	"2015-10-18",
17  author =	"Mike Pall",
18  license =	"MIT",
19}
20
21-- Exported glue functions for the arch-specific module.
22local _M = { _info = _info }
23
24-- Cache library functions.
25local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
26local assert, unpack, setmetatable = assert, unpack or table.unpack, setmetatable
27local _s = string
28local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char
29local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub
30local concat, sort, remove = table.concat, table.sort, table.remove
31local bit = bit or require("bit")
32local band, bxor, shl, shr = bit.band, bit.bxor, bit.lshift, bit.rshift
33
34-- Inherited tables and callbacks.
35local g_opt, g_arch
36local wline, werror, wfatal, wwarn
37
38-- Action name list.
39-- CHECK: Keep this in sync with the C code!
40local action_names = {
41  -- int arg, 1 buffer pos:
42  "DISP",  "IMM_S", "IMM_B", "IMM_W", "IMM_D",  "IMM_WB", "IMM_DB",
43  -- action arg (1 byte), int arg, 1 buffer pos (reg/num):
44  "VREG", "SPACE",
45  -- ptrdiff_t arg, 1 buffer pos (address): !x64
46  "SETLABEL", "REL_A",
47  -- action arg (1 byte) or int arg, 2 buffer pos (link, offset):
48  "REL_LG", "REL_PC",
49  -- action arg (1 byte) or ptrdiff_t arg, 1 buffer pos (link):
50  "IMM_LG", "IMM_LG64", "IMM_PC", "IMM_PC64",
51  -- action arg (1 byte) or int arg, 1 buffer pos (offset):
52  "LABEL_LG", "LABEL_PC",
53  -- action arg (1 byte), 1 buffer pos (offset):
54  "ALIGN",
55  -- action args (2 bytes), no buffer pos.
56  "EXTERN",
57  -- action arg (1 byte), no buffer pos.
58  "ESC",
59  -- no action arg, no buffer pos.
60  "MARK",
61  -- action arg (1 byte), no buffer pos, terminal action:
62  "SECTION",
63  -- no args, no buffer pos, terminal action:
64  "STOP"
65}
66
67-- Maximum number of section buffer positions for dasm_put().
68-- CHECK: Keep this in sync with the C code!
69local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
70
71-- Action name -> action number (dynamically generated below).
72local map_action = {}
73-- First action number. Everything below does not need to be escaped.
74local actfirst = 256-#action_names
75
76-- Action list buffer and string (only used to remove dupes).
77local actlist = {}
78local actstr = ""
79
80-- Argument list for next dasm_put(). Start with offset 0 into action list.
81local actargs = { 0 }
82
83-- Current number of section buffer positions for dasm_put().
84local secpos = 1
85
86-- VREG kind encodings, pre-shifted by 5 bits.
87local map_vreg = {
88  ["modrm.rm.m"] = 0x00,
89  ["modrm.rm.r"] = 0x20,
90  ["opcode"] =     0x20,
91  ["sib.base"] =   0x20,
92  ["sib.index"] =  0x40,
93  ["modrm.reg"] =  0x80,
94  ["vex.v"] =      0xa0,
95  ["imm.hi"] =     0xc0,
96}
97
98-- Current number of VREG actions contributing to REX/VEX shrinkage.
99local vreg_shrink_count = 0
100
101------------------------------------------------------------------------------
102
103-- Compute action numbers for action names.
104for n,name in ipairs(action_names) do
105  local num = actfirst + n - 1
106  map_action[name] = num
107end
108
109-- Dump action names and numbers.
110local function dumpactions(out)
111  out:write("DynASM encoding engine action codes:\n")
112  for n,name in ipairs(action_names) do
113    local num = map_action[name]
114    out:write(format("  %-10s %02X  %d\n", name, num, num))
115  end
116  out:write("\n")
117end
118
119-- Write action list buffer as a huge static C array.
120local function writeactions(out, name)
121  local nn = #actlist
122  local last = actlist[nn] or 255
123  actlist[nn] = nil -- Remove last byte.
124  if nn == 0 then nn = 1 end
125  out:write("static const unsigned char ", name, "[", nn, "] = {\n")
126  local s = "  "
127  for n,b in ipairs(actlist) do
128    s = s..b..","
129    if #s >= 75 then
130      assert(out:write(s, "\n"))
131      s = "  "
132    end
133  end
134  out:write(s, last, "\n};\n\n") -- Add last byte back.
135end
136
137------------------------------------------------------------------------------
138
139-- Add byte to action list.
140local function wputxb(n)
141  assert(n >= 0 and n <= 255 and n % 1 == 0, "byte out of range")
142  actlist[#actlist+1] = n
143end
144
145-- Add action to list with optional arg. Advance buffer pos, too.
146local function waction(action, a, num)
147  wputxb(assert(map_action[action], "bad action name `"..action.."'"))
148  if a then actargs[#actargs+1] = a end
149  if a or num then secpos = secpos + (num or 1) end
150end
151
152-- Optionally add a VREG action.
153local function wvreg(kind, vreg, psz, sk, defer)
154  if not vreg then return end
155  waction("VREG", vreg)
156  local b = assert(map_vreg[kind], "bad vreg kind `"..vreg.."'")
157  if b < (sk or 0) then
158    vreg_shrink_count = vreg_shrink_count + 1
159  end
160  if not defer then
161    b = b + vreg_shrink_count * 8
162    vreg_shrink_count = 0
163  end
164  wputxb(b + (psz or 0))
165end
166
167-- Add call to embedded DynASM C code.
168local function wcall(func, args)
169  wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true)
170end
171
172-- Delete duplicate action list chunks. A tad slow, but so what.
173local function dedupechunk(offset)
174  local al, as = actlist, actstr
175  local chunk = char(unpack(al, offset+1, #al))
176  local orig = find(as, chunk, 1, true)
177  if orig then
178    actargs[1] = orig-1 -- Replace with original offset.
179    for i=offset+1,#al do al[i] = nil end -- Kill dupe.
180  else
181    actstr = as..chunk
182  end
183end
184
185-- Flush action list (intervening C code or buffer pos overflow).
186local function wflush(term)
187  local offset = actargs[1]
188  if #actlist == offset then return end -- Nothing to flush.
189  if not term then waction("STOP") end -- Terminate action list.
190  dedupechunk(offset)
191  wcall("put", actargs) -- Add call to dasm_put().
192  actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put().
193  secpos = 1 -- The actionlist offset occupies a buffer position, too.
194end
195
196-- Put escaped byte.
197local function wputb(n)
198  if n >= actfirst then waction("ESC") end -- Need to escape byte.
199  wputxb(n)
200end
201
202------------------------------------------------------------------------------
203
204-- Global label name -> global label number. With auto assignment on 1st use.
205local next_global = 10
206local map_global = setmetatable({}, { __index = function(t, name)
207  if not match(name, "^[%a_][%w_@]*$") then werror("bad global label") end
208  local n = next_global
209  if n > 246 then werror("too many global labels") end
210  next_global = n + 1
211  t[name] = n
212  return n
213end})
214
215-- Dump global labels.
216local function dumpglobals(out, lvl)
217  local t = {}
218  for name, n in pairs(map_global) do t[n] = name end
219  out:write("Global labels:\n")
220  for i=10,next_global-1 do
221    out:write(format("  %s\n", t[i]))
222  end
223  out:write("\n")
224end
225
226-- Write global label enum.
227local function writeglobals(out, prefix)
228  local t = {}
229  for name, n in pairs(map_global) do t[n] = name end
230  out:write("enum {\n")
231  for i=10,next_global-1 do
232    out:write("  ", prefix, gsub(t[i], "@.*", ""), ",\n")
233  end
234  out:write("  ", prefix, "_MAX\n};\n")
235end
236
237-- Write global label names.
238local function writeglobalnames(out, name)
239  local t = {}
240  for name, n in pairs(map_global) do t[n] = name end
241  out:write("static const char *const ", name, "[] = {\n")
242  for i=10,next_global-1 do
243    out:write("  \"", t[i], "\",\n")
244  end
245  out:write("  (const char *)0\n};\n")
246end
247
248------------------------------------------------------------------------------
249
250-- Extern label name -> extern label number. With auto assignment on 1st use.
251local next_extern = -1
252local map_extern = setmetatable({}, { __index = function(t, name)
253  -- No restrictions on the name for now.
254  local n = next_extern
255  if n < -256 then werror("too many extern labels") end
256  next_extern = n - 1
257  t[name] = n
258  return n
259end})
260
261-- Dump extern labels.
262local function dumpexterns(out, lvl)
263  local t = {}
264  for name, n in pairs(map_extern) do t[-n] = name end
265  out:write("Extern labels:\n")
266  for i=1,-next_extern-1 do
267    out:write(format("  %s\n", t[i]))
268  end
269  out:write("\n")
270end
271
272-- Write extern label names.
273local function writeexternnames(out, name)
274  local t = {}
275  for name, n in pairs(map_extern) do t[-n] = name end
276  out:write("static const char *const ", name, "[] = {\n")
277  for i=1,-next_extern-1 do
278    out:write("  \"", t[i], "\",\n")
279  end
280  out:write("  (const char *)0\n};\n")
281end
282
283------------------------------------------------------------------------------
284
285-- Arch-specific maps.
286local map_archdef = {}		-- Ext. register name -> int. name.
287local map_reg_rev = {}		-- Int. register name -> ext. name.
288local map_reg_num = {}		-- Int. register name -> register number.
289local map_reg_opsize = {}	-- Int. register name -> operand size.
290local map_reg_valid_base = {}	-- Int. register name -> valid base register?
291local map_reg_valid_index = {}	-- Int. register name -> valid index register?
292local map_reg_needrex = {}	-- Int. register name -> need rex vs. no rex.
293local reg_list = {}		-- Canonical list of int. register names.
294
295local map_type = {}		-- Type name -> { ctype, reg }
296local ctypenum = 0		-- Type number (for _PTx macros).
297
298local addrsize = x64 and "q" or "d"	-- Size for address operands.
299
300-- Helper functions to fill register maps.
301local function mkrmap(sz, cl, names)
302  local cname = format("@%s", sz)
303  reg_list[#reg_list+1] = cname
304  map_archdef[cl] = cname
305  map_reg_rev[cname] = cl
306  map_reg_num[cname] = -1
307  map_reg_opsize[cname] = sz
308  if sz == addrsize or sz == "d" then
309    map_reg_valid_base[cname] = true
310    map_reg_valid_index[cname] = true
311  end
312  if names then
313    for n,name in ipairs(names) do
314      local iname = format("@%s%x", sz, n-1)
315      reg_list[#reg_list+1] = iname
316      map_archdef[name] = iname
317      map_reg_rev[iname] = name
318      map_reg_num[iname] = n-1
319      map_reg_opsize[iname] = sz
320      if sz == "b" and n > 4 then map_reg_needrex[iname] = false end
321      if sz == addrsize or sz == "d" then
322	map_reg_valid_base[iname] = true
323	map_reg_valid_index[iname] = true
324      end
325    end
326  end
327  for i=0,(x64 and sz ~= "f") and 15 or 7 do
328    local needrex = sz == "b" and i > 3
329    local iname = format("@%s%x%s", sz, i, needrex and "R" or "")
330    if needrex then map_reg_needrex[iname] = true end
331    local name
332    if sz == "o" or sz == "y" then name = format("%s%d", cl, i)
333    elseif sz == "f" then name = format("st%d", i)
334    else name = format("r%d%s", i, sz == addrsize and "" or sz) end
335    map_archdef[name] = iname
336    if not map_reg_rev[iname] then
337      reg_list[#reg_list+1] = iname
338      map_reg_rev[iname] = name
339      map_reg_num[iname] = i
340      map_reg_opsize[iname] = sz
341      if sz == addrsize or sz == "d" then
342	map_reg_valid_base[iname] = true
343	map_reg_valid_index[iname] = true
344      end
345    end
346  end
347  reg_list[#reg_list+1] = ""
348end
349
350-- Integer registers (qword, dword, word and byte sized).
351if x64 then
352  mkrmap("q", "Rq", {"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi"})
353end
354mkrmap("d", "Rd", {"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi"})
355mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"})
356mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"})
357map_reg_valid_index[map_archdef.esp] = false
358if x64 then map_reg_valid_index[map_archdef.rsp] = false end
359if x64 then map_reg_needrex[map_archdef.Rb] = true end
360map_archdef["Ra"] = "@"..addrsize
361
362-- FP registers (internally tword sized, but use "f" as operand size).
363mkrmap("f", "Rf")
364
365-- SSE registers (oword sized, but qword and dword accessible).
366mkrmap("o", "xmm")
367
368-- AVX registers (yword sized, but oword, qword and dword accessible).
369mkrmap("y", "ymm")
370
371-- Operand size prefixes to codes.
372local map_opsize = {
373  byte = "b", word = "w", dword = "d", qword = "q", oword = "o", yword = "y",
374  tword = "t", aword = addrsize,
375}
376
377-- Operand size code to number.
378local map_opsizenum = {
379  b = 1, w = 2, d = 4, q = 8, o = 16, y = 32, t = 10,
380}
381
382-- Operand size code to name.
383local map_opsizename = {
384  b = "byte", w = "word", d = "dword", q = "qword", o = "oword", y = "yword",
385  t = "tword", f = "fpword",
386}
387
388-- Valid index register scale factors.
389local map_xsc = {
390  ["1"] = 0, ["2"] = 1, ["4"] = 2, ["8"] = 3,
391}
392
393-- Condition codes.
394local map_cc = {
395  o = 0, no = 1, b = 2, nb = 3, e = 4, ne = 5, be = 6, nbe = 7,
396  s = 8, ns = 9, p = 10, np = 11, l = 12, nl = 13, le = 14, nle = 15,
397  c = 2, nae = 2, nc = 3, ae = 3, z = 4, nz = 5, na = 6, a = 7,
398  pe = 10, po = 11, nge = 12, ge = 13, ng = 14, g = 15,
399}
400
401
402-- Reverse defines for registers.
403function _M.revdef(s)
404  return gsub(s, "@%w+", map_reg_rev)
405end
406
407-- Dump register names and numbers
408local function dumpregs(out)
409  out:write("Register names, sizes and internal numbers:\n")
410  for _,reg in ipairs(reg_list) do
411    if reg == "" then
412      out:write("\n")
413    else
414      local name = map_reg_rev[reg]
415      local num = map_reg_num[reg]
416      local opsize = map_opsizename[map_reg_opsize[reg]]
417      out:write(format("  %-5s %-8s %s\n", name, opsize,
418		       num < 0 and "(variable)" or num))
419    end
420  end
421end
422
423------------------------------------------------------------------------------
424
425-- Put action for label arg (IMM_LG, IMM_PC, REL_LG, REL_PC).
426local function wputlabel(aprefix, imm, num)
427  if type(imm) == "number" then
428    if imm < 0 then
429      waction("EXTERN")
430      wputxb(aprefix == "IMM_" and 0 or 1)
431      imm = -imm-1
432    else
433      waction(aprefix.."LG", nil, num);
434    end
435    wputxb(imm)
436  else
437    waction(aprefix.."PC", imm, num)
438  end
439end
440
441-- Put action for label arg (IMM_LG64, IMM_PC64, REL_LG, REL_PC).
442local function wputlabel64(aprefix, imm, num)
443  if type(imm) == "number" then
444    waction("IMM_LG64", nil, num);
445    wputxb(imm)
446  else
447    waction("IMM_PC64", imm, num)
448  end
449end
450
451-- Put signed byte or arg.
452local function wputsbarg(n)
453  if type(n) == "number" then
454    if n < -128 or n > 127 then
455      werror("signed immediate byte out of range")
456    end
457    if n < 0 then n = n + 256 end
458    wputb(n)
459  else waction("IMM_S", n) end
460end
461
462-- Put unsigned byte or arg.
463local function wputbarg(n)
464  if type(n) == "number" then
465    if n < 0 or n > 255 then
466      werror("unsigned immediate byte out of range")
467    end
468    wputb(n)
469  else waction("IMM_B", n) end
470end
471
472-- Put unsigned word or arg.
473local function wputwarg(n)
474  if type(n) == "number" then
475    if shr(n, 16) ~= 0 then
476      werror("unsigned immediate word out of range")
477    end
478    wputb(band(n, 255)); wputb(shr(n, 8));
479  else waction("IMM_W", n) end
480end
481
482-- Put signed or unsigned qword or arg.
483local function wputqarg(n)
484  local tn = type(n)
485  if tn == "number" then
486    wputb(band(n, 255))
487    wputb(band(shr(n, 8), 255))
488    wputb(band(shr(n, 16), 255))
489    wputb(band(shr(n, 24), 255))
490    wputb(band(shr(n, 32), 255))
491    wputb(band(shr(n, 40), 255))
492    wputb(band(shr(n, 48), 255))
493    wputb(shr(n, 56))
494  elseif tn == "table" then
495    wputlabel64("IMM_", n[1], 1)
496  else
497    waction("IMM_D", format("(unsigned int)(%s)", n))
498    waction("IMM_D", format("(unsigned int)((%s)>>32)", n))
499  end
500end
501
502-- Put signed or unsigned dword or arg.
503local function wputdarg(n)
504  local tn = type(n)
505  if tn == "number" then
506    wputb(band(n, 255))
507    wputb(band(shr(n, 8), 255))
508    wputb(band(shr(n, 16), 255))
509    wputb(shr(n, 24))
510  elseif tn == "table" then
511    wputlabel("IMM_", n[1], 1)
512  else
513    waction("IMM_D", n)
514  end
515end
516
517-- Put operand-size dependent number or arg (defaults to dword).
518local function wputszarg(sz, n)
519  if not sz or sz == "d" or sz == "q" then wputdarg(n)
520  elseif sz == "w" then wputwarg(n)
521  elseif sz == "b" then wputbarg(n)
522  elseif sz == "s" then wputsbarg(n)
523  else werror("bad operand size") end
524end
525
526-- Put multi-byte opcode with operand-size dependent modifications.
527local function wputop(sz, op, rex, vex, vregr, vregxb)
528  local psz, sk = 0, nil
529  if vex then
530    local tail
531    if vex.m == 1 and band(rex, 11) == 0 then
532      if x64 and vregxb then
533	sk = map_vreg["modrm.reg"]
534      else
535	wputb(0xc5)
536      tail = shl(bxor(band(rex, 4), 4), 5)
537      psz = 3
538      end
539    end
540    if not tail then
541      wputb(0xc4)
542      wputb(shl(bxor(band(rex, 7), 7), 5) + vex.m)
543      tail = shl(band(rex, 8), 4)
544      psz = 4
545    end
546    local reg, vreg = 0, nil
547    if vex.v then
548      reg = vex.v.reg
549      if not reg then werror("bad vex operand") end
550      if reg < 0 then reg = 0; vreg = vex.v.vreg end
551    end
552    if sz == "y" or vex.l then tail = tail + 4 end
553    wputb(tail + shl(bxor(reg, 15), 3) + vex.p)
554    wvreg("vex.v", vreg)
555    rex = 0
556    if op >= 256 then werror("bad vex opcode") end
557  else
558    if rex ~= 0 then
559      if not x64 then werror("bad operand size") end
560    elseif (vregr or vregxb) and x64 then
561      rex = 0x10
562      sk = map_vreg["vex.v"]
563    end
564  end
565  local r
566  if sz == "w" then wputb(102) end
567  -- Needs >32 bit numbers, but only for crc32 eax, word [ebx]
568  if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end
569  if op >= 16777216 then wputb(shr(op, 24)); op = band(op, 0xffffff) end
570  if op >= 65536 then
571    if rex ~= 0 then
572      local opc3 = band(op, 0xffff00)
573      if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then
574	wputb(64 + band(rex, 15)); rex = 0; psz = 2
575      end
576    end
577    wputb(shr(op, 16)); op = band(op, 0xffff); psz = psz + 1
578  end
579  if op >= 256 then
580    local b = shr(op, 8)
581    if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0; psz = 2 end
582    wputb(b); op = band(op, 255); psz = psz + 1
583  end
584  if rex ~= 0 then wputb(64 + band(rex, 15)); psz = 2 end
585  if sz == "b" then op = op - 1 end
586  wputb(op)
587  return psz, sk
588end
589
590-- Put ModRM or SIB formatted byte.
591local function wputmodrm(m, s, rm, vs, vrm)
592  assert(m < 4 and s < 16 and rm < 16, "bad modrm operands")
593  wputb(shl(m, 6) + shl(band(s, 7), 3) + band(rm, 7))
594end
595
596-- Put ModRM/SIB plus optional displacement.
597local function wputmrmsib(t, imark, s, vsreg, psz, sk)
598  local vreg, vxreg
599  local reg, xreg = t.reg, t.xreg
600  if reg and reg < 0 then reg = 0; vreg = t.vreg end
601  if xreg and xreg < 0 then xreg = 0; vxreg = t.vxreg end
602  if s < 0 then s = 0 end
603
604  -- Register mode.
605  if sub(t.mode, 1, 1) == "r" then
606    wputmodrm(3, s, reg)
607    wvreg("modrm.reg", vsreg, psz+1, sk, vreg)
608    wvreg("modrm.rm.r", vreg, psz+1, sk)
609    return
610  end
611
612  local disp = t.disp
613  local tdisp = type(disp)
614  -- No base register?
615  if not reg then
616    local riprel = false
617    if xreg then
618      -- Indexed mode with index register only.
619      -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp)
620      wputmodrm(0, s, 4)
621      if imark == "I" then waction("MARK") end
622      wvreg("modrm.reg", vsreg, psz+1, sk, vxreg)
623      wputmodrm(t.xsc, xreg, 5)
624      wvreg("sib.index", vxreg, psz+2, sk)
625    else
626      -- Pure 32 bit displacement.
627      if x64 and tdisp ~= "table" then
628	wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp)
629	wvreg("modrm.reg", vsreg, psz+1, sk)
630	if imark == "I" then waction("MARK") end
631	wputmodrm(0, 4, 5)
632      else
633	riprel = x64
634	wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp)
635	wvreg("modrm.reg", vsreg, psz+1, sk)
636	if imark == "I" then waction("MARK") end
637      end
638    end
639    if riprel then -- Emit rip-relative displacement.
640      if match("UWSiI", imark) then
641	werror("NYI: rip-relative displacement followed by immediate")
642      end
643      -- The previous byte in the action buffer cannot be 0xe9 or 0x80-0x8f.
644      wputlabel("REL_", disp[1], 2)
645    else
646      wputdarg(disp)
647    end
648    return
649  end
650
651  local m
652  if tdisp == "number" then -- Check displacement size at assembly time.
653    if disp == 0 and band(reg, 7) ~= 5 then -- [ebp] -> [ebp+0] (in SIB, too)
654      if not vreg then m = 0 end -- Force DISP to allow [Rd(5)] -> [ebp+0]
655    elseif disp >= -128 and disp <= 127 then m = 1
656    else m = 2 end
657  elseif tdisp == "table" then
658    m = 2
659  end
660
661  -- Index register present or esp as base register: need SIB encoding.
662  if xreg or band(reg, 7) == 4 then
663    wputmodrm(m or 2, s, 4) -- ModRM.
664    if m == nil or imark == "I" then waction("MARK") end
665    wvreg("modrm.reg", vsreg, psz+1, sk, vxreg or vreg)
666    wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB.
667    wvreg("sib.index", vxreg, psz+2, sk, vreg)
668    wvreg("sib.base", vreg, psz+2, sk)
669  else
670    wputmodrm(m or 2, s, reg) -- ModRM.
671    if (imark == "I" and (m == 1 or m == 2)) or
672       (m == nil and (vsreg or vreg)) then waction("MARK") end
673    wvreg("modrm.reg", vsreg, psz+1, sk, vreg)
674    wvreg("modrm.rm.m", vreg, psz+1, sk)
675  end
676
677  -- Put displacement.
678  if m == 1 then wputsbarg(disp)
679  elseif m == 2 then wputdarg(disp)
680  elseif m == nil then waction("DISP", disp) end
681end
682
683------------------------------------------------------------------------------
684
685-- Return human-readable operand mode string.
686local function opmodestr(op, args)
687  local m = {}
688  for i=1,#args do
689    local a = args[i]
690    m[#m+1] = sub(a.mode, 1, 1)..(a.opsize or "?")
691  end
692  return op.." "..concat(m, ",")
693end
694
695-- Convert number to valid integer or nil.
696local function toint(expr)
697  local n = tonumber(expr)
698  if n then
699    if n % 1 ~= 0 or n < -2147483648 or n > 4294967295 then
700      werror("bad integer number `"..expr.."'")
701    end
702    return n
703  end
704end
705
706-- Parse immediate expression.
707local function immexpr(expr)
708  -- &expr (pointer)
709  if sub(expr, 1, 1) == "&" then
710    return "iPJ", format("(ptrdiff_t)(%s)", sub(expr,2))
711  end
712
713  local prefix = sub(expr, 1, 2)
714  -- =>expr (pc label reference)
715  if prefix == "=>" then
716    return "iJ", sub(expr, 3)
717  end
718  -- ->name (global label reference)
719  if prefix == "->" then
720    return "iJ", map_global[sub(expr, 3)]
721  end
722
723  -- [<>][1-9] (local label reference)
724  local dir, lnum = match(expr, "^([<>])([1-9])$")
725  if dir then -- Fwd: 247-255, Bkwd: 1-9.
726    return "iJ", lnum + (dir == ">" and 246 or 0)
727  end
728
729  local extname = match(expr, "^extern%s+(%S+)$")
730  if extname then
731    return "iJ", map_extern[extname]
732  end
733
734  -- expr (interpreted as immediate)
735  return "iI", expr
736end
737
738-- Parse displacement expression: +-num, +-expr, +-opsize*num
739local function dispexpr(expr)
740  local disp = expr == "" and 0 or toint(expr)
741  if disp then return disp end
742  local c, dispt = match(expr, "^([+-])%s*(.+)$")
743  if c == "+" then
744    expr = dispt
745  elseif not c then
746    werror("bad displacement expression `"..expr.."'")
747  end
748  local opsize, tailops = match(dispt, "^(%w+)%s*%*%s*(.+)$")
749  local ops, imm = map_opsize[opsize], toint(tailops)
750  if ops and imm then
751    if c == "-" then imm = -imm end
752    return imm*map_opsizenum[ops]
753  end
754  local mode, iexpr = immexpr(dispt)
755  if mode == "iJ" then
756    if c == "-" then werror("cannot invert label reference") end
757    return { iexpr }
758  end
759  return expr -- Need to return original signed expression.
760end
761
762-- Parse register or type expression.
763local function rtexpr(expr)
764  if not expr then return end
765  local tname, ovreg = match(expr, "^([%w_]+):(@[%w_]+)$")
766  local tp = map_type[tname or expr]
767  if tp then
768    local reg = ovreg or tp.reg
769    local rnum = map_reg_num[reg]
770    if not rnum then
771      werror("type `"..(tname or expr).."' needs a register override")
772    end
773    if not map_reg_valid_base[reg] then
774      werror("bad base register override `"..(map_reg_rev[reg] or reg).."'")
775    end
776    return reg, rnum, tp
777  end
778  return expr, map_reg_num[expr]
779end
780
781-- Parse operand and return { mode, opsize, reg, xreg, xsc, disp, imm }.
782local function parseoperand(param)
783  local t = {}
784
785  local expr = param
786  local opsize, tailops = match(param, "^(%w+)%s*(.+)$")
787  if opsize then
788    t.opsize = map_opsize[opsize]
789    if t.opsize then expr = tailops end
790  end
791
792  local br = match(expr, "^%[%s*(.-)%s*%]$")
793  repeat
794    if br then
795      t.mode = "xm"
796
797      -- [disp]
798      t.disp = toint(br)
799      if t.disp then
800	t.mode = x64 and "xm" or "xmO"
801	break
802      end
803
804      -- [reg...]
805      local tp
806      local reg, tailr = match(br, "^([@%w_:]+)%s*(.*)$")
807      reg, t.reg, tp = rtexpr(reg)
808      if not t.reg then
809	-- [expr]
810	t.mode = x64 and "xm" or "xmO"
811	t.disp = dispexpr("+"..br)
812	break
813      end
814
815      if t.reg == -1 then
816	t.vreg, tailr = match(tailr, "^(%b())(.*)$")
817	if not t.vreg then werror("bad variable register expression") end
818      end
819
820      -- [xreg*xsc] or [xreg*xsc+-disp] or [xreg*xsc+-expr]
821      local xsc, tailsc = match(tailr, "^%*%s*([1248])%s*(.*)$")
822      if xsc then
823	if not map_reg_valid_index[reg] then
824	  werror("bad index register `"..map_reg_rev[reg].."'")
825	end
826	t.xsc = map_xsc[xsc]
827	t.xreg = t.reg
828	t.vxreg = t.vreg
829	t.reg = nil
830	t.vreg = nil
831	t.disp = dispexpr(tailsc)
832	break
833      end
834      if not map_reg_valid_base[reg] then
835	werror("bad base register `"..map_reg_rev[reg].."'")
836      end
837
838      -- [reg] or [reg+-disp]
839      t.disp = toint(tailr) or (tailr == "" and 0)
840      if t.disp then break end
841
842      -- [reg+xreg...]
843      local xreg, tailx = match(tailr, "^+%s*([@%w_:]+)%s*(.*)$")
844      xreg, t.xreg, tp = rtexpr(xreg)
845      if not t.xreg then
846	-- [reg+-expr]
847	t.disp = dispexpr(tailr)
848	break
849      end
850      if not map_reg_valid_index[xreg] then
851	werror("bad index register `"..map_reg_rev[xreg].."'")
852      end
853
854      if t.xreg == -1 then
855	t.vxreg, tailx = match(tailx, "^(%b())(.*)$")
856	if not t.vxreg then werror("bad variable register expression") end
857      end
858
859      -- [reg+xreg*xsc...]
860      local xsc, tailsc = match(tailx, "^%*%s*([1248])%s*(.*)$")
861      if xsc then
862	t.xsc = map_xsc[xsc]
863	tailx = tailsc
864      end
865
866      -- [...] or [...+-disp] or [...+-expr]
867      t.disp = dispexpr(tailx)
868    else
869      -- imm or opsize*imm
870      local imm = toint(expr)
871      if not imm and sub(expr, 1, 1) == "*" and t.opsize then
872	imm = toint(sub(expr, 2))
873	if imm then
874	  imm = imm * map_opsizenum[t.opsize]
875	  t.opsize = nil
876	end
877      end
878      if imm then
879	if t.opsize then werror("bad operand size override") end
880	local m = "i"
881	if imm == 1 then m = m.."1" end
882	if imm >= 4294967168 and imm <= 4294967295 then imm = imm-4294967296 end
883	if imm >= -128 and imm <= 127 then m = m.."S" end
884	t.imm = imm
885	t.mode = m
886	break
887      end
888
889      local tp
890      local reg, tailr = match(expr, "^([@%w_:]+)%s*(.*)$")
891      reg, t.reg, tp = rtexpr(reg)
892      if t.reg then
893	if t.reg == -1 then
894	  t.vreg, tailr = match(tailr, "^(%b())(.*)$")
895	  if not t.vreg then werror("bad variable register expression") end
896	end
897	-- reg
898	if tailr == "" then
899	  if t.opsize then werror("bad operand size override") end
900	  t.opsize = map_reg_opsize[reg]
901	  if t.opsize == "f" then
902	    t.mode = t.reg == 0 and "fF" or "f"
903	  else
904	    if reg == "@w4" or (x64 and reg == "@d4") then
905	      wwarn("bad idea, try again with `"..(x64 and "rsp'" or "esp'"))
906	    end
907	    t.mode = t.reg == 0 and "rmR" or (reg == "@b1" and "rmC" or "rm")
908	  end
909	  t.needrex = map_reg_needrex[reg]
910	  break
911	end
912
913	-- type[idx], type[idx].field, type->field -> [reg+offset_expr]
914	if not tp then werror("bad operand `"..param.."'") end
915	t.mode = "xm"
916	t.disp = format(tp.ctypefmt, tailr)
917      else
918	t.mode, t.imm = immexpr(expr)
919	if sub(t.mode, -1) == "J" then
920	  if t.opsize and t.opsize ~= addrsize then
921	    werror("bad operand size override")
922	  end
923	  t.opsize = addrsize
924	end
925      end
926    end
927  until true
928  return t
929end
930
931------------------------------------------------------------------------------
932-- x86 Template String Description
933-- ===============================
934--
935-- Each template string is a list of [match:]pattern pairs,
936-- separated by "|". The first match wins. No match means a
937-- bad or unsupported combination of operand modes or sizes.
938--
939-- The match part and the ":" is omitted if the operation has
940-- no operands. Otherwise the first N characters are matched
941-- against the mode strings of each of the N operands.
942--
943-- The mode string for each operand type is (see parseoperand()):
944--   Integer register: "rm", +"R" for eax, ax, al, +"C" for cl
945--   FP register:      "f",  +"F" for st0
946--   Index operand:    "xm", +"O" for [disp] (pure offset)
947--   Immediate:        "i",  +"S" for signed 8 bit, +"1" for 1,
948--                     +"I" for arg, +"P" for pointer
949--   Any:              +"J" for valid jump targets
950--
951-- So a match character "m" (mixed) matches both an integer register
952-- and an index operand (to be encoded with the ModRM/SIB scheme).
953-- But "r" matches only a register and "x" only an index operand
954-- (e.g. for FP memory access operations).
955--
956-- The operand size match string starts right after the mode match
957-- characters and ends before the ":". "dwb" or "qdwb" is assumed, if empty.
958-- The effective data size of the operation is matched against this list.
959--
960-- If only the regular "b", "w", "d", "q", "t" operand sizes are
961-- present, then all operands must be the same size. Unspecified sizes
962-- are ignored, but at least one operand must have a size or the pattern
963-- won't match (use the "byte", "word", "dword", "qword", "tword"
964-- operand size overrides. E.g.: mov dword [eax], 1).
965--
966-- If the list has a "1" or "2" prefix, the operand size is taken
967-- from the respective operand and any other operand sizes are ignored.
968-- If the list contains only ".", all operand sizes are ignored.
969-- If the list has a "/" prefix, the concatenated (mixed) operand sizes
970-- are compared to the match.
971--
972-- E.g. "rrdw" matches for either two dword registers or two word
973-- registers. "Fx2dq" matches an st0 operand plus an index operand
974-- pointing to a dword (float) or qword (double).
975--
976-- Every character after the ":" is part of the pattern string:
977--   Hex chars are accumulated to form the opcode (left to right).
978--   "n"       disables the standard opcode mods
979--             (otherwise: -1 for "b", o16 prefix for "w", rex.w for "q")
980--   "X"       Force REX.W.
981--   "r"/"R"   adds the reg. number from the 1st/2nd operand to the opcode.
982--   "m"/"M"   generates ModRM/SIB from the 1st/2nd operand.
983--             The spare 3 bits are either filled with the last hex digit or
984--             the result from a previous "r"/"R". The opcode is restored.
985--   "u"       Use VEX encoding, vvvv unused.
986--   "v"/"V"   Use VEX encoding, vvvv from 1st/2nd operand (the operand is
987--             removed from the list used by future characters).
988--   "L"       Force VEX.L
989--
990-- All of the following characters force a flush of the opcode:
991--   "o"/"O"   stores a pure 32 bit disp (offset) from the 1st/2nd operand.
992--   "s"       stores a 4 bit immediate from the last register operand,
993--             followed by 4 zero bits.
994--   "S"       stores a signed 8 bit immediate from the last operand.
995--   "U"       stores an unsigned 8 bit immediate from the last operand.
996--   "W"       stores an unsigned 16 bit immediate from the last operand.
997--   "i"       stores an operand sized immediate from the last operand.
998--   "I"       dito, but generates an action code to optionally modify
999--             the opcode (+2) for a signed 8 bit immediate.
1000--   "J"       generates one of the REL action codes from the last operand.
1001--
1002------------------------------------------------------------------------------
1003
1004-- Template strings for x86 instructions. Ordered by first opcode byte.
1005-- Unimplemented opcodes (deliberate omissions) are marked with *.
1006local map_op = {
1007  -- 00-05: add...
1008  -- 06: *push es
1009  -- 07: *pop es
1010  -- 08-0D: or...
1011  -- 0E: *push cs
1012  -- 0F: two byte opcode prefix
1013  -- 10-15: adc...
1014  -- 16: *push ss
1015  -- 17: *pop ss
1016  -- 18-1D: sbb...
1017  -- 1E: *push ds
1018  -- 1F: *pop ds
1019  -- 20-25: and...
1020  es_0 =	"26",
1021  -- 27: *daa
1022  -- 28-2D: sub...
1023  cs_0 =	"2E",
1024  -- 2F: *das
1025  -- 30-35: xor...
1026  ss_0 =	"36",
1027  -- 37: *aaa
1028  -- 38-3D: cmp...
1029  ds_0 =	"3E",
1030  -- 3F: *aas
1031  inc_1 =	x64 and "m:FF0m" or "rdw:40r|m:FF0m",
1032  dec_1 =	x64 and "m:FF1m" or "rdw:48r|m:FF1m",
1033  push_1 =	(x64 and "rq:n50r|rw:50r|mq:nFF6m|mw:FF6m" or
1034			 "rdw:50r|mdw:FF6m").."|S.:6AS|ib:n6Ai|i.:68i",
1035  pop_1 =	x64 and "rq:n58r|rw:58r|mq:n8F0m|mw:8F0m" or "rdw:58r|mdw:8F0m",
1036  -- 60: *pusha, *pushad, *pushaw
1037  -- 61: *popa, *popad, *popaw
1038  -- 62: *bound rdw,x
1039  -- 63: x86: *arpl mw,rw
1040  movsxd_2 =	x64 and "rm/qd:63rM",
1041  fs_0 =	"64",
1042  gs_0 =	"65",
1043  o16_0 =	"66",
1044  a16_0 =	not x64 and "67" or nil,
1045  a32_0 =	x64 and "67",
1046  -- 68: push idw
1047  -- 69: imul rdw,mdw,idw
1048  -- 6A: push ib
1049  -- 6B: imul rdw,mdw,S
1050  -- 6C: *insb
1051  -- 6D: *insd, *insw
1052  -- 6E: *outsb
1053  -- 6F: *outsd, *outsw
1054  -- 70-7F: jcc lb
1055  -- 80: add... mb,i
1056  -- 81: add... mdw,i
1057  -- 82: *undefined
1058  -- 83: add... mdw,S
1059  test_2 =	"mr:85Rm|rm:85rM|Ri:A9ri|mi:F70mi",
1060  -- 86: xchg rb,mb
1061  -- 87: xchg rdw,mdw
1062  -- 88: mov mb,r
1063  -- 89: mov mdw,r
1064  -- 8A: mov r,mb
1065  -- 8B: mov r,mdw
1066  -- 8C: *mov mdw,seg
1067  lea_2 =	"rx1dq:8DrM",
1068  -- 8E: *mov seg,mdw
1069  -- 8F: pop mdw
1070  nop_0 =	"90",
1071  xchg_2 =	"Rrqdw:90R|rRqdw:90r|rm:87rM|mr:87Rm",
1072  cbw_0 =	"6698",
1073  cwde_0 =	"98",
1074  cdqe_0 =	"4898",
1075  cwd_0 =	"6699",
1076  cdq_0 =	"99",
1077  cqo_0 =	"4899",
1078  -- 9A: *call iw:idw
1079  wait_0 =	"9B",
1080  fwait_0 =	"9B",
1081  pushf_0 =	"9C",
1082  pushfd_0 =	not x64 and "9C",
1083  pushfq_0 =	x64 and "9C",
1084  popf_0 =	"9D",
1085  popfd_0 =	not x64 and "9D",
1086  popfq_0 =	x64 and "9D",
1087  sahf_0 =	"9E",
1088  lahf_0 =	"9F",
1089  mov_2 =	"OR:A3o|RO:A1O|mr:89Rm|rm:8BrM|rib:nB0ri|ridw:B8ri|mi:C70mi",
1090  movsb_0 =	"A4",
1091  movsw_0 =	"66A5",
1092  movsd_0 =	"A5",
1093  cmpsb_0 =	"A6",
1094  cmpsw_0 =	"66A7",
1095  cmpsd_0 =	"A7",
1096  -- A8: test Rb,i
1097  -- A9: test Rdw,i
1098  stosb_0 =	"AA",
1099  stosw_0 =	"66AB",
1100  stosd_0 =	"AB",
1101  lodsb_0 =	"AC",
1102  lodsw_0 =	"66AD",
1103  lodsd_0 =	"AD",
1104  scasb_0 =	"AE",
1105  scasw_0 =	"66AF",
1106  scasd_0 =	"AF",
1107  -- B0-B7: mov rb,i
1108  -- B8-BF: mov rdw,i
1109  -- C0: rol... mb,i
1110  -- C1: rol... mdw,i
1111  ret_1 =	"i.:nC2W",
1112  ret_0 =	"C3",
1113  -- C4: *les rdw,mq
1114  -- C5: *lds rdw,mq
1115  -- C6: mov mb,i
1116  -- C7: mov mdw,i
1117  -- C8: *enter iw,ib
1118  leave_0 =	"C9",
1119  -- CA: *retf iw
1120  -- CB: *retf
1121  int3_0 =	"CC",
1122  int_1 =	"i.:nCDU",
1123  into_0 =	"CE",
1124  -- CF: *iret
1125  -- D0: rol... mb,1
1126  -- D1: rol... mdw,1
1127  -- D2: rol... mb,cl
1128  -- D3: rol... mb,cl
1129  -- D4: *aam ib
1130  -- D5: *aad ib
1131  -- D6: *salc
1132  -- D7: *xlat
1133  -- D8-DF: floating point ops
1134  -- E0: *loopne
1135  -- E1: *loope
1136  -- E2: *loop
1137  -- E3: *jcxz, *jecxz
1138  -- E4: *in Rb,ib
1139  -- E5: *in Rdw,ib
1140  -- E6: *out ib,Rb
1141  -- E7: *out ib,Rdw
1142  call_1 =	x64 and "mq:nFF2m|J.:E8nJ" or "md:FF2m|J.:E8J",
1143  jmp_1 =	x64 and "mq:nFF4m|J.:E9nJ" or "md:FF4m|J.:E9J", -- short: EB
1144  -- EA: *jmp iw:idw
1145  -- EB: jmp ib
1146  -- EC: *in Rb,dx
1147  -- ED: *in Rdw,dx
1148  -- EE: *out dx,Rb
1149  -- EF: *out dx,Rdw
1150  lock_0 =	"F0",
1151  int1_0 =	"F1",
1152  repne_0 =	"F2",
1153  repnz_0 =	"F2",
1154  rep_0 =	"F3",
1155  repe_0 =	"F3",
1156  repz_0 =	"F3",
1157  -- F4: *hlt
1158  cmc_0 =	"F5",
1159  -- F6: test... mb,i; div... mb
1160  -- F7: test... mdw,i; div... mdw
1161  clc_0 =	"F8",
1162  stc_0 =	"F9",
1163  -- FA: *cli
1164  cld_0 =	"FC",
1165  std_0 =	"FD",
1166  -- FE: inc... mb
1167  -- FF: inc... mdw
1168
1169  -- misc ops
1170  not_1 =	"m:F72m",
1171  neg_1 =	"m:F73m",
1172  mul_1 =	"m:F74m",
1173  imul_1 =	"m:F75m",
1174  div_1 =	"m:F76m",
1175  idiv_1 =	"m:F77m",
1176
1177  imul_2 =	"rmqdw:0FAFrM|rIqdw:69rmI|rSqdw:6BrmS|riqdw:69rmi",
1178  imul_3 =	"rmIqdw:69rMI|rmSqdw:6BrMS|rmiqdw:69rMi",
1179
1180  movzx_2 =	"rm/db:0FB6rM|rm/qb:|rm/wb:0FB6rM|rm/dw:0FB7rM|rm/qw:",
1181  movsx_2 =	"rm/db:0FBErM|rm/qb:|rm/wb:0FBErM|rm/dw:0FBFrM|rm/qw:",
1182
1183  bswap_1 =	"rqd:0FC8r",
1184  bsf_2 =	"rmqdw:0FBCrM",
1185  bsr_2 =	"rmqdw:0FBDrM",
1186  bt_2 =	"mrqdw:0FA3Rm|miqdw:0FBA4mU",
1187  btc_2 =	"mrqdw:0FBBRm|miqdw:0FBA7mU",
1188  btr_2 =	"mrqdw:0FB3Rm|miqdw:0FBA6mU",
1189  bts_2 =	"mrqdw:0FABRm|miqdw:0FBA5mU",
1190
1191  shld_3 =	"mriqdw:0FA4RmU|mrC/qq:0FA5Rm|mrC/dd:|mrC/ww:",
1192  shrd_3 =	"mriqdw:0FACRmU|mrC/qq:0FADRm|mrC/dd:|mrC/ww:",
1193
1194  rdtsc_0 =	"0F31", -- P1+
1195  rdpmc_0 =	"0F33", -- P6+
1196  cpuid_0 =	"0FA2", -- P1+
1197
1198  -- floating point ops
1199  fst_1 =	"ff:DDD0r|xd:D92m|xq:nDD2m",
1200  fstp_1 =	"ff:DDD8r|xd:D93m|xq:nDD3m|xt:DB7m",
1201  fld_1 =	"ff:D9C0r|xd:D90m|xq:nDD0m|xt:DB5m",
1202
1203  fpop_0 =	"DDD8", -- Alias for fstp st0.
1204
1205  fist_1 =	"xw:nDF2m|xd:DB2m",
1206  fistp_1 =	"xw:nDF3m|xd:DB3m|xq:nDF7m",
1207  fild_1 =	"xw:nDF0m|xd:DB0m|xq:nDF5m",
1208
1209  fxch_0 =	"D9C9",
1210  fxch_1 =	"ff:D9C8r",
1211  fxch_2 =	"fFf:D9C8r|Fff:D9C8R",
1212
1213  fucom_1 =	"ff:DDE0r",
1214  fucom_2 =	"Fff:DDE0R",
1215  fucomp_1 =	"ff:DDE8r",
1216  fucomp_2 =	"Fff:DDE8R",
1217  fucomi_1 =	"ff:DBE8r", -- P6+
1218  fucomi_2 =	"Fff:DBE8R", -- P6+
1219  fucomip_1 =	"ff:DFE8r", -- P6+
1220  fucomip_2 =	"Fff:DFE8R", -- P6+
1221  fcomi_1 =	"ff:DBF0r", -- P6+
1222  fcomi_2 =	"Fff:DBF0R", -- P6+
1223  fcomip_1 =	"ff:DFF0r", -- P6+
1224  fcomip_2 =	"Fff:DFF0R", -- P6+
1225  fucompp_0 =	"DAE9",
1226  fcompp_0 =	"DED9",
1227
1228  fldenv_1 =	"x.:D94m",
1229  fnstenv_1 =	"x.:D96m",
1230  fstenv_1 =	"x.:9BD96m",
1231  fldcw_1 =	"xw:nD95m",
1232  fstcw_1 =	"xw:n9BD97m",
1233  fnstcw_1 =	"xw:nD97m",
1234  fstsw_1 =	"Rw:n9BDFE0|xw:n9BDD7m",
1235  fnstsw_1 =	"Rw:nDFE0|xw:nDD7m",
1236  fclex_0 =	"9BDBE2",
1237  fnclex_0 =	"DBE2",
1238
1239  fnop_0 =	"D9D0",
1240  -- D9D1-D9DF: unassigned
1241
1242  fchs_0 =	"D9E0",
1243  fabs_0 =	"D9E1",
1244  -- D9E2: unassigned
1245  -- D9E3: unassigned
1246  ftst_0 =	"D9E4",
1247  fxam_0 =	"D9E5",
1248  -- D9E6: unassigned
1249  -- D9E7: unassigned
1250  fld1_0 =	"D9E8",
1251  fldl2t_0 =	"D9E9",
1252  fldl2e_0 =	"D9EA",
1253  fldpi_0 =	"D9EB",
1254  fldlg2_0 =	"D9EC",
1255  fldln2_0 =	"D9ED",
1256  fldz_0 =	"D9EE",
1257  -- D9EF: unassigned
1258
1259  f2xm1_0 =	"D9F0",
1260  fyl2x_0 =	"D9F1",
1261  fptan_0 =	"D9F2",
1262  fpatan_0 =	"D9F3",
1263  fxtract_0 =	"D9F4",
1264  fprem1_0 =	"D9F5",
1265  fdecstp_0 =	"D9F6",
1266  fincstp_0 =	"D9F7",
1267  fprem_0 =	"D9F8",
1268  fyl2xp1_0 =	"D9F9",
1269  fsqrt_0 =	"D9FA",
1270  fsincos_0 =	"D9FB",
1271  frndint_0 =	"D9FC",
1272  fscale_0 =	"D9FD",
1273  fsin_0 =	"D9FE",
1274  fcos_0 =	"D9FF",
1275
1276  -- SSE, SSE2
1277  andnpd_2 =	"rmo:660F55rM",
1278  andnps_2 =	"rmo:0F55rM",
1279  andpd_2 =	"rmo:660F54rM",
1280  andps_2 =	"rmo:0F54rM",
1281  clflush_1 =	"x.:0FAE7m",
1282  cmppd_3 =	"rmio:660FC2rMU",
1283  cmpps_3 =	"rmio:0FC2rMU",
1284  cmpsd_3 =	"rrio:F20FC2rMU|rxi/oq:",
1285  cmpss_3 =	"rrio:F30FC2rMU|rxi/od:",
1286  comisd_2 =	"rro:660F2FrM|rx/oq:",
1287  comiss_2 =	"rro:0F2FrM|rx/od:",
1288  cvtdq2pd_2 =	"rro:F30FE6rM|rx/oq:",
1289  cvtdq2ps_2 =	"rmo:0F5BrM",
1290  cvtpd2dq_2 =	"rmo:F20FE6rM",
1291  cvtpd2ps_2 =	"rmo:660F5ArM",
1292  cvtpi2pd_2 =	"rx/oq:660F2ArM",
1293  cvtpi2ps_2 =	"rx/oq:0F2ArM",
1294  cvtps2dq_2 =	"rmo:660F5BrM",
1295  cvtps2pd_2 =	"rro:0F5ArM|rx/oq:",
1296  cvtsd2si_2 =	"rr/do:F20F2DrM|rr/qo:|rx/dq:|rxq:",
1297  cvtsd2ss_2 =	"rro:F20F5ArM|rx/oq:",
1298  cvtsi2sd_2 =	"rm/od:F20F2ArM|rm/oq:F20F2ArXM",
1299  cvtsi2ss_2 =	"rm/od:F30F2ArM|rm/oq:F30F2ArXM",
1300  cvtss2sd_2 =	"rro:F30F5ArM|rx/od:",
1301  cvtss2si_2 =	"rr/do:F30F2DrM|rr/qo:|rxd:|rx/qd:",
1302  cvttpd2dq_2 =	"rmo:660FE6rM",
1303  cvttps2dq_2 =	"rmo:F30F5BrM",
1304  cvttsd2si_2 =	"rr/do:F20F2CrM|rr/qo:|rx/dq:|rxq:",
1305  cvttss2si_2 =	"rr/do:F30F2CrM|rr/qo:|rxd:|rx/qd:",
1306  fxsave_1 =	"x.:0FAE0m",
1307  fxrstor_1 =	"x.:0FAE1m",
1308  ldmxcsr_1 =	"xd:0FAE2m",
1309  lfence_0 =	"0FAEE8",
1310  maskmovdqu_2 = "rro:660FF7rM",
1311  mfence_0 =	"0FAEF0",
1312  movapd_2 =	"rmo:660F28rM|mro:660F29Rm",
1313  movaps_2 =	"rmo:0F28rM|mro:0F29Rm",
1314  movd_2 =	"rm/od:660F6ErM|rm/oq:660F6ErXM|mr/do:660F7ERm|mr/qo:",
1315  movdqa_2 =	"rmo:660F6FrM|mro:660F7FRm",
1316  movdqu_2 =	"rmo:F30F6FrM|mro:F30F7FRm",
1317  movhlps_2 =	"rro:0F12rM",
1318  movhpd_2 =	"rx/oq:660F16rM|xr/qo:n660F17Rm",
1319  movhps_2 =	"rx/oq:0F16rM|xr/qo:n0F17Rm",
1320  movlhps_2 =	"rro:0F16rM",
1321  movlpd_2 =	"rx/oq:660F12rM|xr/qo:n660F13Rm",
1322  movlps_2 =	"rx/oq:0F12rM|xr/qo:n0F13Rm",
1323  movmskpd_2 =	"rr/do:660F50rM",
1324  movmskps_2 =	"rr/do:0F50rM",
1325  movntdq_2 =	"xro:660FE7Rm",
1326  movnti_2 =	"xrqd:0FC3Rm",
1327  movntpd_2 =	"xro:660F2BRm",
1328  movntps_2 =	"xro:0F2BRm",
1329  movq_2 =	"rro:F30F7ErM|rx/oq:|xr/qo:n660FD6Rm",
1330  movsd_2 =	"rro:F20F10rM|rx/oq:|xr/qo:nF20F11Rm",
1331  movss_2 =	"rro:F30F10rM|rx/od:|xr/do:F30F11Rm",
1332  movupd_2 =	"rmo:660F10rM|mro:660F11Rm",
1333  movups_2 =	"rmo:0F10rM|mro:0F11Rm",
1334  orpd_2 =	"rmo:660F56rM",
1335  orps_2 =	"rmo:0F56rM",
1336  pause_0 =	"F390",
1337  pextrw_3 =	"rri/do:660FC5rMU|xri/wo:660F3A15nRmU", -- Mem op: SSE4.1 only.
1338  pinsrw_3 =	"rri/od:660FC4rMU|rxi/ow:",
1339  pmovmskb_2 =	"rr/do:660FD7rM",
1340  prefetchnta_1 = "xb:n0F180m",
1341  prefetcht0_1 = "xb:n0F181m",
1342  prefetcht1_1 = "xb:n0F182m",
1343  prefetcht2_1 = "xb:n0F183m",
1344  pshufd_3 =	"rmio:660F70rMU",
1345  pshufhw_3 =	"rmio:F30F70rMU",
1346  pshuflw_3 =	"rmio:F20F70rMU",
1347  pslld_2 =	"rmo:660FF2rM|rio:660F726mU",
1348  pslldq_2 =	"rio:660F737mU",
1349  psllq_2 =	"rmo:660FF3rM|rio:660F736mU",
1350  psllw_2 =	"rmo:660FF1rM|rio:660F716mU",
1351  psrad_2 =	"rmo:660FE2rM|rio:660F724mU",
1352  psraw_2 =	"rmo:660FE1rM|rio:660F714mU",
1353  psrld_2 =	"rmo:660FD2rM|rio:660F722mU",
1354  psrldq_2 =	"rio:660F733mU",
1355  psrlq_2 =	"rmo:660FD3rM|rio:660F732mU",
1356  psrlw_2 =	"rmo:660FD1rM|rio:660F712mU",
1357  rcpps_2 =	"rmo:0F53rM",
1358  rcpss_2 =	"rro:F30F53rM|rx/od:",
1359  rsqrtps_2 =	"rmo:0F52rM",
1360  rsqrtss_2 =	"rmo:F30F52rM",
1361  sfence_0 =	"0FAEF8",
1362  shufpd_3 =	"rmio:660FC6rMU",
1363  shufps_3 =	"rmio:0FC6rMU",
1364  stmxcsr_1 =   "xd:0FAE3m",
1365  ucomisd_2 =	"rro:660F2ErM|rx/oq:",
1366  ucomiss_2 =	"rro:0F2ErM|rx/od:",
1367  unpckhpd_2 =	"rmo:660F15rM",
1368  unpckhps_2 =	"rmo:0F15rM",
1369  unpcklpd_2 =	"rmo:660F14rM",
1370  unpcklps_2 =	"rmo:0F14rM",
1371  xorpd_2 =	"rmo:660F57rM",
1372  xorps_2 =	"rmo:0F57rM",
1373
1374  -- SSE3 ops
1375  fisttp_1 =	"xw:nDF1m|xd:DB1m|xq:nDD1m",
1376  addsubpd_2 =	"rmo:660FD0rM",
1377  addsubps_2 =	"rmo:F20FD0rM",
1378  haddpd_2 =	"rmo:660F7CrM",
1379  haddps_2 =	"rmo:F20F7CrM",
1380  hsubpd_2 =	"rmo:660F7DrM",
1381  hsubps_2 =	"rmo:F20F7DrM",
1382  lddqu_2 =	"rxo:F20FF0rM",
1383  movddup_2 =	"rmo:F20F12rM",
1384  movshdup_2 =	"rmo:F30F16rM",
1385  movsldup_2 =	"rmo:F30F12rM",
1386
1387  -- SSSE3 ops
1388  pabsb_2 =	"rmo:660F381CrM",
1389  pabsd_2 =	"rmo:660F381ErM",
1390  pabsw_2 =	"rmo:660F381DrM",
1391  palignr_3 =	"rmio:660F3A0FrMU",
1392  phaddd_2 =	"rmo:660F3802rM",
1393  phaddsw_2 =	"rmo:660F3803rM",
1394  phaddw_2 =	"rmo:660F3801rM",
1395  phsubd_2 =	"rmo:660F3806rM",
1396  phsubsw_2 =	"rmo:660F3807rM",
1397  phsubw_2 =	"rmo:660F3805rM",
1398  pmaddubsw_2 =	"rmo:660F3804rM",
1399  pmulhrsw_2 =	"rmo:660F380BrM",
1400  pshufb_2 =	"rmo:660F3800rM",
1401  psignb_2 =	"rmo:660F3808rM",
1402  psignd_2 =	"rmo:660F380ArM",
1403  psignw_2 =	"rmo:660F3809rM",
1404
1405  -- SSE4.1 ops
1406  blendpd_3 =	"rmio:660F3A0DrMU",
1407  blendps_3 =	"rmio:660F3A0CrMU",
1408  blendvpd_3 =	"rmRo:660F3815rM",
1409  blendvps_3 =	"rmRo:660F3814rM",
1410  dppd_3 =	"rmio:660F3A41rMU",
1411  dpps_3 =	"rmio:660F3A40rMU",
1412  extractps_3 =	"mri/do:660F3A17RmU|rri/qo:660F3A17RXmU",
1413  insertps_3 =	"rrio:660F3A41rMU|rxi/od:",
1414  movntdqa_2 =	"rxo:660F382ArM",
1415  mpsadbw_3 =	"rmio:660F3A42rMU",
1416  packusdw_2 =	"rmo:660F382BrM",
1417  pblendvb_3 =	"rmRo:660F3810rM",
1418  pblendw_3 =	"rmio:660F3A0ErMU",
1419  pcmpeqq_2 =	"rmo:660F3829rM",
1420  pextrb_3 =	"rri/do:660F3A14nRmU|rri/qo:|xri/bo:",
1421  pextrd_3 =	"mri/do:660F3A16RmU",
1422  pextrq_3 =	"mri/qo:660F3A16RmU",
1423  -- pextrw is SSE2, mem operand is SSE4.1 only
1424  phminposuw_2 = "rmo:660F3841rM",
1425  pinsrb_3 =	"rri/od:660F3A20nrMU|rxi/ob:",
1426  pinsrd_3 =	"rmi/od:660F3A22rMU",
1427  pinsrq_3 =	"rmi/oq:660F3A22rXMU",
1428  pmaxsb_2 =	"rmo:660F383CrM",
1429  pmaxsd_2 =	"rmo:660F383DrM",
1430  pmaxud_2 =	"rmo:660F383FrM",
1431  pmaxuw_2 =	"rmo:660F383ErM",
1432  pminsb_2 =	"rmo:660F3838rM",
1433  pminsd_2 =	"rmo:660F3839rM",
1434  pminud_2 =	"rmo:660F383BrM",
1435  pminuw_2 =	"rmo:660F383ArM",
1436  pmovsxbd_2 =	"rro:660F3821rM|rx/od:",
1437  pmovsxbq_2 =	"rro:660F3822rM|rx/ow:",
1438  pmovsxbw_2 =	"rro:660F3820rM|rx/oq:",
1439  pmovsxdq_2 =	"rro:660F3825rM|rx/oq:",
1440  pmovsxwd_2 =	"rro:660F3823rM|rx/oq:",
1441  pmovsxwq_2 =	"rro:660F3824rM|rx/od:",
1442  pmovzxbd_2 =	"rro:660F3831rM|rx/od:",
1443  pmovzxbq_2 =	"rro:660F3832rM|rx/ow:",
1444  pmovzxbw_2 =	"rro:660F3830rM|rx/oq:",
1445  pmovzxdq_2 =	"rro:660F3835rM|rx/oq:",
1446  pmovzxwd_2 =	"rro:660F3833rM|rx/oq:",
1447  pmovzxwq_2 =	"rro:660F3834rM|rx/od:",
1448  pmuldq_2 =	"rmo:660F3828rM",
1449  pmulld_2 =	"rmo:660F3840rM",
1450  ptest_2 =	"rmo:660F3817rM",
1451  roundpd_3 =	"rmio:660F3A09rMU",
1452  roundps_3 =	"rmio:660F3A08rMU",
1453  roundsd_3 =	"rrio:660F3A0BrMU|rxi/oq:",
1454  roundss_3 =	"rrio:660F3A0ArMU|rxi/od:",
1455
1456  -- SSE4.2 ops
1457  crc32_2 =	"rmqd:F20F38F1rM|rm/dw:66F20F38F1rM|rm/db:F20F38F0rM|rm/qb:",
1458  pcmpestri_3 =	"rmio:660F3A61rMU",
1459  pcmpestrm_3 =	"rmio:660F3A60rMU",
1460  pcmpgtq_2 =	"rmo:660F3837rM",
1461  pcmpistri_3 =	"rmio:660F3A63rMU",
1462  pcmpistrm_3 =	"rmio:660F3A62rMU",
1463  popcnt_2 =	"rmqdw:F30FB8rM",
1464
1465  -- SSE4a
1466  extrq_2 =	"rro:660F79rM",
1467  extrq_3 =	"riio:660F780mUU",
1468  insertq_2 =	"rro:F20F79rM",
1469  insertq_4 =	"rriio:F20F78rMUU",
1470  lzcnt_2 =	"rmqdw:F30FBDrM",
1471  movntsd_2 =	"xr/qo:nF20F2BRm",
1472  movntss_2 =	"xr/do:F30F2BRm",
1473  -- popcnt is also in SSE4.2
1474
1475  -- AES-NI
1476  aesdec_2 =	"rmo:660F38DErM",
1477  aesdeclast_2 = "rmo:660F38DFrM",
1478  aesenc_2 =	"rmo:660F38DCrM",
1479  aesenclast_2 = "rmo:660F38DDrM",
1480  aesimc_2 =	"rmo:660F38DBrM",
1481  aeskeygenassist_3 = "rmio:660F3ADFrMU",
1482  pclmulqdq_3 =	"rmio:660F3A44rMU",
1483
1484   -- AVX FP ops
1485  vaddsubpd_3 =	"rrmoy:660FVD0rM",
1486  vaddsubps_3 =	"rrmoy:F20FVD0rM",
1487  vandpd_3 =	"rrmoy:660FV54rM",
1488  vandps_3 =	"rrmoy:0FV54rM",
1489  vandnpd_3 =	"rrmoy:660FV55rM",
1490  vandnps_3 =	"rrmoy:0FV55rM",
1491  vblendpd_4 =	"rrmioy:660F3AV0DrMU",
1492  vblendps_4 =	"rrmioy:660F3AV0CrMU",
1493  vblendvpd_4 =	"rrmroy:660F3AV4BrMs",
1494  vblendvps_4 =	"rrmroy:660F3AV4ArMs",
1495  vbroadcastf128_2 = "rx/yo:660F38u1ArM",
1496  vcmppd_4 =	"rrmioy:660FVC2rMU",
1497  vcmpps_4 =	"rrmioy:0FVC2rMU",
1498  vcmpsd_4 =	"rrrio:F20FVC2rMU|rrxi/ooq:",
1499  vcmpss_4 =	"rrrio:F30FVC2rMU|rrxi/ood:",
1500  vcomisd_2 =	"rro:660Fu2FrM|rx/oq:",
1501  vcomiss_2 =	"rro:0Fu2FrM|rx/od:",
1502  vcvtdq2pd_2 =	"rro:F30FuE6rM|rx/oq:|rm/yo:",
1503  vcvtdq2ps_2 =	"rmoy:0Fu5BrM",
1504  vcvtpd2dq_2 =	"rmoy:F20FuE6rM",
1505  vcvtpd2ps_2 =	"rmoy:660Fu5ArM",
1506  vcvtps2dq_2 =	"rmoy:660Fu5BrM",
1507  vcvtps2pd_2 =	"rro:0Fu5ArM|rx/oq:|rm/yo:",
1508  vcvtsd2si_2 =	"rr/do:F20Fu2DrM|rx/dq:|rr/qo:|rxq:",
1509  vcvtsd2ss_3 =	"rrro:F20FV5ArM|rrx/ooq:",
1510  vcvtsi2sd_3 =	"rrm/ood:F20FV2ArM|rrm/ooq:F20FVX2ArM",
1511  vcvtsi2ss_3 =	"rrm/ood:F30FV2ArM|rrm/ooq:F30FVX2ArM",
1512  vcvtss2sd_3 =	"rrro:F30FV5ArM|rrx/ood:",
1513  vcvtss2si_2 =	"rr/do:F30Fu2DrM|rxd:|rr/qo:|rx/qd:",
1514  vcvttpd2dq_2 = "rmo:660FuE6rM|rm/oy:660FuLE6rM",
1515  vcvttps2dq_2 = "rmoy:F30Fu5BrM",
1516  vcvttsd2si_2 = "rr/do:F20Fu2CrM|rx/dq:|rr/qo:|rxq:",
1517  vcvttss2si_2 = "rr/do:F30Fu2CrM|rxd:|rr/qo:|rx/qd:",
1518  vdppd_4 =	"rrmio:660F3AV41rMU",
1519  vdpps_4 =	"rrmioy:660F3AV40rMU",
1520  vextractf128_3 = "mri/oy:660F3AuL19RmU",
1521  vextractps_3 = "mri/do:660F3Au17RmU",
1522  vhaddpd_3 =	"rrmoy:660FV7CrM",
1523  vhaddps_3 =	"rrmoy:F20FV7CrM",
1524  vhsubpd_3 =	"rrmoy:660FV7DrM",
1525  vhsubps_3 =	"rrmoy:F20FV7DrM",
1526  vinsertf128_4 = "rrmi/yyo:660F3AV18rMU",
1527  vinsertps_4 =	"rrrio:660F3AV21rMU|rrxi/ood:",
1528  vldmxcsr_1 =	"xd:0FuAE2m",
1529  vmaskmovps_3 = "rrxoy:660F38V2CrM|xrroy:660F38V2ERm",
1530  vmaskmovpd_3 = "rrxoy:660F38V2DrM|xrroy:660F38V2FRm",
1531  vmovapd_2 =	"rmoy:660Fu28rM|mroy:660Fu29Rm",
1532  vmovaps_2 =	"rmoy:0Fu28rM|mroy:0Fu29Rm",
1533  vmovd_2 =	"rm/od:660Fu6ErM|rm/oq:660FuX6ErM|mr/do:660Fu7ERm|mr/qo:",
1534  vmovq_2 =	"rro:F30Fu7ErM|rx/oq:|xr/qo:660FuD6Rm",
1535  vmovddup_2 =	"rmy:F20Fu12rM|rro:|rx/oq:",
1536  vmovhlps_3 =	"rrro:0FV12rM",
1537  vmovhpd_2 =	"xr/qo:660Fu17Rm",
1538  vmovhpd_3 =	"rrx/ooq:660FV16rM",
1539  vmovhps_2 =	"xr/qo:0Fu17Rm",
1540  vmovhps_3 =	"rrx/ooq:0FV16rM",
1541  vmovlhps_3 =	"rrro:0FV16rM",
1542  vmovlpd_2 =	"xr/qo:660Fu13Rm",
1543  vmovlpd_3 =	"rrx/ooq:660FV12rM",
1544  vmovlps_2 =	"xr/qo:0Fu13Rm",
1545  vmovlps_3 =	"rrx/ooq:0FV12rM",
1546  vmovmskpd_2 =	"rr/do:660Fu50rM|rr/dy:660FuL50rM",
1547  vmovmskps_2 =	"rr/do:0Fu50rM|rr/dy:0FuL50rM",
1548  vmovntpd_2 =	"xroy:660Fu2BRm",
1549  vmovntps_2 =	"xroy:0Fu2BRm",
1550  vmovsd_2 =	"rx/oq:F20Fu10rM|xr/qo:F20Fu11Rm",
1551  vmovsd_3 =	"rrro:F20FV10rM",
1552  vmovshdup_2 =	"rmoy:F30Fu16rM",
1553  vmovsldup_2 =	"rmoy:F30Fu12rM",
1554  vmovss_2 =	"rx/od:F30Fu10rM|xr/do:F30Fu11Rm",
1555  vmovss_3 =	"rrro:F30FV10rM",
1556  vmovupd_2 =	"rmoy:660Fu10rM|mroy:660Fu11Rm",
1557  vmovups_2 =	"rmoy:0Fu10rM|mroy:0Fu11Rm",
1558  vorpd_3 =	"rrmoy:660FV56rM",
1559  vorps_3 =	"rrmoy:0FV56rM",
1560  vpermilpd_3 =	"rrmoy:660F38V0DrM|rmioy:660F3Au05rMU",
1561  vpermilps_3 =	"rrmoy:660F38V0CrM|rmioy:660F3Au04rMU",
1562  vperm2f128_4 = "rrmiy:660F3AV06rMU",
1563  vptestpd_2 =	"rmoy:660F38u0FrM",
1564  vptestps_2 =	"rmoy:660F38u0ErM",
1565  vrcpps_2 =	"rmoy:0Fu53rM",
1566  vrcpss_3 =	"rrro:F30FV53rM|rrx/ood:",
1567  vrsqrtps_2 =	"rmoy:0Fu52rM",
1568  vrsqrtss_3 =	"rrro:F30FV52rM|rrx/ood:",
1569  vroundpd_3 =	"rmioy:660F3AV09rMU",
1570  vroundps_3 =	"rmioy:660F3AV08rMU",
1571  vroundsd_4 =	"rrrio:660F3AV0BrMU|rrxi/ooq:",
1572  vroundss_4 =	"rrrio:660F3AV0ArMU|rrxi/ood:",
1573  vshufpd_4 =	"rrmioy:660FVC6rMU",
1574  vshufps_4 =	"rrmioy:0FVC6rMU",
1575  vsqrtps_2 =	"rmoy:0Fu51rM",
1576  vsqrtss_2 =	"rro:F30Fu51rM|rx/od:",
1577  vsqrtpd_2 =	"rmoy:660Fu51rM",
1578  vsqrtsd_2 =	"rro:F20Fu51rM|rx/oq:",
1579  vstmxcsr_1 =	"xd:0FuAE3m",
1580  vucomisd_2 =	"rro:660Fu2ErM|rx/oq:",
1581  vucomiss_2 =	"rro:0Fu2ErM|rx/od:",
1582  vunpckhpd_3 =	"rrmoy:660FV15rM",
1583  vunpckhps_3 =	"rrmoy:0FV15rM",
1584  vunpcklpd_3 =	"rrmoy:660FV14rM",
1585  vunpcklps_3 =	"rrmoy:0FV14rM",
1586  vxorpd_3 =	"rrmoy:660FV57rM",
1587  vxorps_3 =	"rrmoy:0FV57rM",
1588  vzeroall_0 =	"0FuL77",
1589  vzeroupper_0 = "0Fu77",
1590
1591  -- AVX2 FP ops
1592  vbroadcastss_2 = "rx/od:660F38u18rM|rx/yd:|rro:|rr/yo:",
1593  vbroadcastsd_2 = "rx/yq:660F38u19rM|rr/yo:",
1594  -- *vgather* (!vsib)
1595  vpermpd_3 =	"rmiy:660F3AuX01rMU",
1596  vpermps_3 =	"rrmy:660F38V16rM",
1597
1598  -- AVX, AVX2 integer ops
1599  -- In general, xmm requires AVX, ymm requires AVX2.
1600  vaesdec_3 =  "rrmo:660F38VDErM",
1601  vaesdeclast_3 = "rrmo:660F38VDFrM",
1602  vaesenc_3 =  "rrmo:660F38VDCrM",
1603  vaesenclast_3 = "rrmo:660F38VDDrM",
1604  vaesimc_2 =  "rmo:660F38uDBrM",
1605  vaeskeygenassist_3 = "rmio:660F3AuDFrMU",
1606  vlddqu_2 =	"rxoy:F20FuF0rM",
1607  vmaskmovdqu_2 = "rro:660FuF7rM",
1608  vmovdqa_2 =	"rmoy:660Fu6FrM|mroy:660Fu7FRm",
1609  vmovdqu_2 =	"rmoy:F30Fu6FrM|mroy:F30Fu7FRm",
1610  vmovntdq_2 =	"xroy:660FuE7Rm",
1611  vmovntdqa_2 =	"rxoy:660F38u2ArM",
1612  vmpsadbw_4 =	"rrmioy:660F3AV42rMU",
1613  vpabsb_2 =	"rmoy:660F38u1CrM",
1614  vpabsd_2 =	"rmoy:660F38u1ErM",
1615  vpabsw_2 =	"rmoy:660F38u1DrM",
1616  vpackusdw_3 =	"rrmoy:660F38V2BrM",
1617  vpalignr_4 =	"rrmioy:660F3AV0FrMU",
1618  vpblendvb_4 =	"rrmroy:660F3AV4CrMs",
1619  vpblendw_4 =	"rrmioy:660F3AV0ErMU",
1620  vpclmulqdq_4 = "rrmio:660F3AV44rMU",
1621  vpcmpeqq_3 =	"rrmoy:660F38V29rM",
1622  vpcmpestri_3 = "rmio:660F3Au61rMU",
1623  vpcmpestrm_3 = "rmio:660F3Au60rMU",
1624  vpcmpgtq_3 =	"rrmoy:660F38V37rM",
1625  vpcmpistri_3 = "rmio:660F3Au63rMU",
1626  vpcmpistrm_3 = "rmio:660F3Au62rMU",
1627  vpextrb_3 =	"rri/do:660F3Au14nRmU|rri/qo:|xri/bo:",
1628  vpextrw_3 =	"rri/do:660FuC5rMU|xri/wo:660F3Au15nRmU",
1629  vpextrd_3 =	"mri/do:660F3Au16RmU",
1630  vpextrq_3 =	"mri/qo:660F3Au16RmU",
1631  vphaddw_3 =	"rrmoy:660F38V01rM",
1632  vphaddd_3 =	"rrmoy:660F38V02rM",
1633  vphaddsw_3 =	"rrmoy:660F38V03rM",
1634  vphminposuw_2 = "rmo:660F38u41rM",
1635  vphsubw_3 =	"rrmoy:660F38V05rM",
1636  vphsubd_3 =	"rrmoy:660F38V06rM",
1637  vphsubsw_3 =	"rrmoy:660F38V07rM",
1638  vpinsrb_4 =	"rrri/ood:660F3AV20rMU|rrxi/oob:",
1639  vpinsrw_4 =	"rrri/ood:660FVC4rMU|rrxi/oow:",
1640  vpinsrd_4 =	"rrmi/ood:660F3AV22rMU",
1641  vpinsrq_4 =	"rrmi/ooq:660F3AVX22rMU",
1642  vpmaddubsw_3 = "rrmoy:660F38V04rM",
1643  vpmaxsb_3 =	"rrmoy:660F38V3CrM",
1644  vpmaxsd_3 =	"rrmoy:660F38V3DrM",
1645  vpmaxuw_3 =	"rrmoy:660F38V3ErM",
1646  vpmaxud_3 =	"rrmoy:660F38V3FrM",
1647  vpminsb_3 =	"rrmoy:660F38V38rM",
1648  vpminsd_3 =	"rrmoy:660F38V39rM",
1649  vpminuw_3 =	"rrmoy:660F38V3ArM",
1650  vpminud_3 =	"rrmoy:660F38V3BrM",
1651  vpmovmskb_2 =	"rr/do:660FuD7rM|rr/dy:660FuLD7rM",
1652  vpmovsxbw_2 =	"rroy:660F38u20rM|rx/oq:|rx/yo:",
1653  vpmovsxbd_2 =	"rroy:660F38u21rM|rx/od:|rx/yq:",
1654  vpmovsxbq_2 =	"rroy:660F38u22rM|rx/ow:|rx/yd:",
1655  vpmovsxwd_2 =	"rroy:660F38u23rM|rx/oq:|rx/yo:",
1656  vpmovsxwq_2 =	"rroy:660F38u24rM|rx/od:|rx/yq:",
1657  vpmovsxdq_2 =	"rroy:660F38u25rM|rx/oq:|rx/yo:",
1658  vpmovzxbw_2 =	"rroy:660F38u30rM|rx/oq:|rx/yo:",
1659  vpmovzxbd_2 =	"rroy:660F38u31rM|rx/od:|rx/yq:",
1660  vpmovzxbq_2 =	"rroy:660F38u32rM|rx/ow:|rx/yd:",
1661  vpmovzxwd_2 =	"rroy:660F38u33rM|rx/oq:|rx/yo:",
1662  vpmovzxwq_2 =	"rroy:660F38u34rM|rx/od:|rx/yq:",
1663  vpmovzxdq_2 =	"rroy:660F38u35rM|rx/oq:|rx/yo:",
1664  vpmuldq_3 =	"rrmoy:660F38V28rM",
1665  vpmulhrsw_3 =	"rrmoy:660F38V0BrM",
1666  vpmulld_3 =	"rrmoy:660F38V40rM",
1667  vpshufb_3 =	"rrmoy:660F38V00rM",
1668  vpshufd_3 =	"rmioy:660Fu70rMU",
1669  vpshufhw_3 =	"rmioy:F30Fu70rMU",
1670  vpshuflw_3 =	"rmioy:F20Fu70rMU",
1671  vpsignb_3 =	"rrmoy:660F38V08rM",
1672  vpsignw_3 =	"rrmoy:660F38V09rM",
1673  vpsignd_3 =	"rrmoy:660F38V0ArM",
1674  vpslldq_3 =	"rrioy:660Fv737mU",
1675  vpsllw_3 =	"rrmoy:660FVF1rM|rrioy:660Fv716mU",
1676  vpslld_3 =	"rrmoy:660FVF2rM|rrioy:660Fv726mU",
1677  vpsllq_3 =	"rrmoy:660FVF3rM|rrioy:660Fv736mU",
1678  vpsraw_3 =	"rrmoy:660FVE1rM|rrioy:660Fv714mU",
1679  vpsrad_3 =	"rrmoy:660FVE2rM|rrioy:660Fv724mU",
1680  vpsrldq_3 =	"rrioy:660Fv733mU",
1681  vpsrlw_3 =	"rrmoy:660FVD1rM|rrioy:660Fv712mU",
1682  vpsrld_3 =	"rrmoy:660FVD2rM|rrioy:660Fv722mU",
1683  vpsrlq_3 =	"rrmoy:660FVD3rM|rrioy:660Fv732mU",
1684  vptest_2 =	"rmoy:660F38u17rM",
1685
1686  -- AVX2 integer ops
1687  vbroadcasti128_2 = "rx/yo:660F38u5ArM",
1688  vinserti128_4 = "rrmi/yyo:660F3AV38rMU",
1689  vextracti128_3 = "mri/oy:660F3AuL39RmU",
1690  vpblendd_4 =	"rrmioy:660F3AV02rMU",
1691  vpbroadcastb_2 = "rro:660F38u78rM|rx/ob:|rr/yo:|rx/yb:",
1692  vpbroadcastw_2 = "rro:660F38u79rM|rx/ow:|rr/yo:|rx/yw:",
1693  vpbroadcastd_2 = "rro:660F38u58rM|rx/od:|rr/yo:|rx/yd:",
1694  vpbroadcastq_2 = "rro:660F38u59rM|rx/oq:|rr/yo:|rx/yq:",
1695  vpermd_3 =	"rrmy:660F38V36rM",
1696  vpermq_3 =	"rmiy:660F3AuX00rMU",
1697  -- *vpgather* (!vsib)
1698  vperm2i128_4 = "rrmiy:660F3AV46rMU",
1699  vpmaskmovd_3 = "rrxoy:660F38V8CrM|xrroy:660F38V8ERm",
1700  vpmaskmovq_3 = "rrxoy:660F38VX8CrM|xrroy:660F38VX8ERm",
1701  vpsllvd_3 =	"rrmoy:660F38V47rM",
1702  vpsllvq_3 =	"rrmoy:660F38VX47rM",
1703  vpsravd_3 =	"rrmoy:660F38V46rM",
1704  vpsrlvd_3 =	"rrmoy:660F38V45rM",
1705  vpsrlvq_3 =	"rrmoy:660F38VX45rM",
1706
1707  -- Intel ADX
1708  adcx_2 =	"rmqd:660F38F6rM",
1709  adox_2 =	"rmqd:F30F38F6rM",
1710}
1711
1712------------------------------------------------------------------------------
1713
1714-- Arithmetic ops.
1715for name,n in pairs{ add = 0, ["or"] = 1, adc = 2, sbb = 3,
1716		     ["and"] = 4, sub = 5, xor = 6, cmp = 7 } do
1717  local n8 = shl(n, 3)
1718  map_op[name.."_2"] = format(
1719    "mr:%02XRm|rm:%02XrM|mI1qdw:81%XmI|mS1qdw:83%XmS|Ri1qdwb:%02Xri|mi1qdwb:81%Xmi",
1720    1+n8, 3+n8, n, n, 5+n8, n)
1721end
1722
1723-- Shift ops.
1724for name,n in pairs{ rol = 0, ror = 1, rcl = 2, rcr = 3,
1725		     shl = 4, shr = 5,          sar = 7, sal = 4 } do
1726  map_op[name.."_2"] = format("m1:D1%Xm|mC1qdwb:D3%Xm|mi:C1%XmU", n, n, n)
1727end
1728
1729-- Conditional ops.
1730for cc,n in pairs(map_cc) do
1731  map_op["j"..cc.."_1"] = format("J.:n0F8%XJ", n) -- short: 7%X
1732  map_op["set"..cc.."_1"] = format("mb:n0F9%X2m", n)
1733  map_op["cmov"..cc.."_2"] = format("rmqdw:0F4%XrM", n) -- P6+
1734end
1735
1736-- FP arithmetic ops.
1737for name,n in pairs{ add = 0, mul = 1, com = 2, comp = 3,
1738		     sub = 4, subr = 5, div = 6, divr = 7 } do
1739  local nc = 0xc0 + shl(n, 3)
1740  local nr = nc + (n < 4 and 0 or (n % 2 == 0 and 8 or -8))
1741  local fn = "f"..name
1742  map_op[fn.."_1"] = format("ff:D8%02Xr|xd:D8%Xm|xq:nDC%Xm", nc, n, n)
1743  if n == 2 or n == 3 then
1744    map_op[fn.."_2"] = format("Fff:D8%02XR|Fx2d:D8%XM|Fx2q:nDC%XM", nc, n, n)
1745  else
1746    map_op[fn.."_2"] = format("Fff:D8%02XR|fFf:DC%02Xr|Fx2d:D8%XM|Fx2q:nDC%XM", nc, nr, n, n)
1747    map_op[fn.."p_1"] = format("ff:DE%02Xr", nr)
1748    map_op[fn.."p_2"] = format("fFf:DE%02Xr", nr)
1749  end
1750  map_op["fi"..name.."_1"] = format("xd:DA%Xm|xw:nDE%Xm", n, n)
1751end
1752
1753-- FP conditional moves.
1754for cc,n in pairs{ b=0, e=1, be=2, u=3, nb=4, ne=5, nbe=6, nu=7 } do
1755  local nc = 0xdac0 + shl(band(n, 3), 3) + shl(band(n, 4), 6)
1756  map_op["fcmov"..cc.."_1"] = format("ff:%04Xr", nc) -- P6+
1757  map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+
1758end
1759
1760-- SSE / AVX FP arithmetic ops.
1761for name,n in pairs{ sqrt = 1, add = 8, mul = 9,
1762		     sub = 12, min = 13, div = 14, max = 15 } do
1763  map_op[name.."ps_2"] = format("rmo:0F5%XrM", n)
1764  map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n)
1765  map_op[name.."pd_2"] = format("rmo:660F5%XrM", n)
1766  map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n)
1767  if n ~= 1 then
1768    map_op["v"..name.."ps_3"] = format("rrmoy:0FV5%XrM", n)
1769    map_op["v"..name.."ss_3"] = format("rrro:F30FV5%XrM|rrx/ood:", n)
1770    map_op["v"..name.."pd_3"] = format("rrmoy:660FV5%XrM", n)
1771    map_op["v"..name.."sd_3"] = format("rrro:F20FV5%XrM|rrx/ooq:", n)
1772  end
1773end
1774
1775-- SSE2 / AVX / AVX2 integer arithmetic ops (66 0F leaf).
1776for name,n in pairs{
1777  paddb = 0xFC, paddw = 0xFD, paddd = 0xFE, paddq = 0xD4,
1778  paddsb = 0xEC, paddsw = 0xED, packssdw = 0x6B,
1779  packsswb = 0x63, packuswb = 0x67, paddusb = 0xDC,
1780  paddusw = 0xDD, pand = 0xDB, pandn = 0xDF, pavgb = 0xE0,
1781  pavgw = 0xE3, pcmpeqb = 0x74, pcmpeqd = 0x76,
1782  pcmpeqw = 0x75, pcmpgtb = 0x64, pcmpgtd = 0x66,
1783  pcmpgtw = 0x65, pmaddwd = 0xF5, pmaxsw = 0xEE,
1784  pmaxub = 0xDE, pminsw = 0xEA, pminub = 0xDA,
1785  pmulhuw = 0xE4, pmulhw = 0xE5, pmullw = 0xD5,
1786  pmuludq = 0xF4, por = 0xEB, psadbw = 0xF6, psubb = 0xF8,
1787  psubw = 0xF9, psubd = 0xFA, psubq = 0xFB, psubsb = 0xE8,
1788  psubsw = 0xE9, psubusb = 0xD8, psubusw = 0xD9,
1789  punpckhbw = 0x68, punpckhwd = 0x69, punpckhdq = 0x6A,
1790  punpckhqdq = 0x6D, punpcklbw = 0x60, punpcklwd = 0x61,
1791  punpckldq = 0x62, punpcklqdq = 0x6C, pxor = 0xEF
1792} do
1793  map_op[name.."_2"] = format("rmo:660F%02XrM", n)
1794  map_op["v"..name.."_3"] = format("rrmoy:660FV%02XrM", n)
1795end
1796
1797------------------------------------------------------------------------------
1798
1799local map_vexarg = { u = false, v = 1, V = 2 }
1800
1801-- Process pattern string.
1802local function dopattern(pat, args, sz, op, needrex)
1803  local digit, addin, vex
1804  local opcode = 0
1805  local szov = sz
1806  local narg = 1
1807  local rex = 0
1808
1809  -- Limit number of section buffer positions used by a single dasm_put().
1810  -- A single opcode needs a maximum of 6 positions.
1811  if secpos+6 > maxsecpos then wflush() end
1812
1813  -- Process each character.
1814  for c in gmatch(pat.."|", ".") do
1815    if match(c, "%x") then	-- Hex digit.
1816      digit = byte(c) - 48
1817      if digit > 48 then digit = digit - 39
1818      elseif digit > 16 then digit = digit - 7 end
1819      opcode = opcode*16 + digit
1820      addin = nil
1821    elseif c == "n" then	-- Disable operand size mods for opcode.
1822      szov = nil
1823    elseif c == "X" then	-- Force REX.W.
1824      rex = 8
1825    elseif c == "L" then	-- Force VEX.L.
1826      vex.l = true
1827    elseif c == "r" then	-- Merge 1st operand regno. into opcode.
1828      addin = args[1]; opcode = opcode + (addin.reg % 8)
1829      if narg < 2 then narg = 2 end
1830    elseif c == "R" then	-- Merge 2nd operand regno. into opcode.
1831      addin = args[2]; opcode = opcode + (addin.reg % 8)
1832      narg = 3
1833    elseif c == "m" or c == "M" then	-- Encode ModRM/SIB.
1834      local s
1835      if addin then
1836	s = addin.reg
1837	opcode = opcode - band(s, 7)	-- Undo regno opcode merge.
1838      else
1839	s = band(opcode, 15)	-- Undo last digit.
1840	opcode = shr(opcode, 4)
1841      end
1842      local nn = c == "m" and 1 or 2
1843      local t = args[nn]
1844      if narg <= nn then narg = nn + 1 end
1845      if szov == "q" and rex == 0 then rex = rex + 8 end
1846      if t.reg and t.reg > 7 then rex = rex + 1 end
1847      if t.xreg and t.xreg > 7 then rex = rex + 2 end
1848      if s > 7 then rex = rex + 4 end
1849      if needrex then rex = rex + 16 end
1850      local psz, sk = wputop(szov, opcode, rex, vex, s < 0, t.vreg or t.vxreg)
1851      opcode = nil
1852      local imark = sub(pat, -1) -- Force a mark (ugly).
1853      -- Put ModRM/SIB with regno/last digit as spare.
1854      wputmrmsib(t, imark, s, addin and addin.vreg, psz, sk)
1855      addin = nil
1856    elseif map_vexarg[c] ~= nil then -- Encode using VEX prefix
1857      local b = band(opcode, 255); opcode = shr(opcode, 8)
1858      local m = 1
1859      if b == 0x38 then m = 2
1860      elseif b == 0x3a then m = 3 end
1861      if m ~= 1 then b = band(opcode, 255); opcode = shr(opcode, 8) end
1862      if b ~= 0x0f then
1863	werror("expected `0F', `0F38', or `0F3A' to precede `"..c..
1864	  "' in pattern `"..pat.."' for `"..op.."'")
1865      end
1866      local v = map_vexarg[c]
1867      if v then v = remove(args, v) end
1868      b = band(opcode, 255)
1869      local p = 0
1870      if b == 0x66 then p = 1
1871      elseif b == 0xf3 then p = 2
1872      elseif b == 0xf2 then p = 3 end
1873      if p ~= 0 then opcode = shr(opcode, 8) end
1874      if opcode ~= 0 then wputop(nil, opcode, 0); opcode = 0 end
1875      vex = { m = m, p = p, v = v }
1876    else
1877      if opcode then -- Flush opcode.
1878	if szov == "q" and rex == 0 then rex = rex + 8 end
1879	if needrex then rex = rex + 16 end
1880	if addin and addin.reg == -1 then
1881	  local psz, sk = wputop(szov, opcode - 7, rex, vex, true)
1882	  wvreg("opcode", addin.vreg, psz, sk)
1883	else
1884	  if addin and addin.reg > 7 then rex = rex + 1 end
1885	  wputop(szov, opcode, rex, vex)
1886	end
1887	opcode = nil
1888      end
1889      if c == "|" then break end
1890      if c == "o" then -- Offset (pure 32 bit displacement).
1891	wputdarg(args[1].disp); if narg < 2 then narg = 2 end
1892      elseif c == "O" then
1893	wputdarg(args[2].disp); narg = 3
1894      else
1895	-- Anything else is an immediate operand.
1896	local a = args[narg]
1897	narg = narg + 1
1898	local mode, imm = a.mode, a.imm
1899	if mode == "iJ" and not match("iIJ", c) then
1900	  werror("bad operand size for label")
1901	end
1902	if c == "S" then
1903	  wputsbarg(imm)
1904	elseif c == "U" then
1905	  wputbarg(imm)
1906	elseif c == "W" then
1907	  wputwarg(imm)
1908	elseif c == "i" or c == "I" then
1909	  if mode == "iJ" then
1910	    wputlabel("IMM_", imm, 1)
1911	  elseif mode == "iI" and c == "I" then
1912	    waction(sz == "w" and "IMM_WB" or "IMM_DB", imm)
1913	  else
1914	    wputszarg(sz, imm)
1915	  end
1916	elseif c == "J" then
1917	  if mode == "iPJ" then
1918	    waction("REL_A", imm) -- !x64 (secpos)
1919	  else
1920	    wputlabel("REL_", imm, 2)
1921	  end
1922	elseif c == "s" then
1923	  local reg = a.reg
1924	  if reg < 0 then
1925	    wputb(0)
1926	    wvreg("imm.hi", a.vreg)
1927	  else
1928	    wputb(shl(reg, 4))
1929	  end
1930	else
1931	  werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'")
1932	end
1933      end
1934    end
1935  end
1936end
1937
1938------------------------------------------------------------------------------
1939
1940-- Mapping of operand modes to short names. Suppress output with '#'.
1941local map_modename = {
1942  r = "reg", R = "eax", C = "cl", x = "mem", m = "mrm", i = "imm",
1943  f = "stx", F = "st0", J = "lbl", ["1"] = "1",
1944  I = "#", S = "#", O = "#",
1945}
1946
1947-- Return a table/string showing all possible operand modes.
1948local function templatehelp(template, nparams)
1949  if nparams == 0 then return "" end
1950  local t = {}
1951  for tm in gmatch(template, "[^%|]+") do
1952    local s = map_modename[sub(tm, 1, 1)]
1953    s = s..gsub(sub(tm, 2, nparams), ".", function(c)
1954      return ", "..map_modename[c]
1955    end)
1956    if not match(s, "#") then t[#t+1] = s end
1957  end
1958  return t
1959end
1960
1961-- Match operand modes against mode match part of template.
1962local function matchtm(tm, args)
1963  for i=1,#args do
1964    if not match(args[i].mode, sub(tm, i, i)) then return end
1965  end
1966  return true
1967end
1968
1969-- Handle opcodes defined with template strings.
1970map_op[".template__"] = function(params, template, nparams)
1971  if not params then return templatehelp(template, nparams) end
1972  local args = {}
1973
1974  -- Zero-operand opcodes have no match part.
1975  if #params == 0 then
1976    dopattern(template, args, "d", params.op, nil)
1977    return
1978  end
1979
1980  -- Determine common operand size (coerce undefined size) or flag as mixed.
1981  local sz, szmix, needrex
1982  for i,p in ipairs(params) do
1983    args[i] = parseoperand(p)
1984    local nsz = args[i].opsize
1985    if nsz then
1986      if sz and sz ~= nsz then szmix = true else sz = nsz end
1987    end
1988    local nrex = args[i].needrex
1989    if nrex ~= nil then
1990      if needrex == nil then
1991	needrex = nrex
1992      elseif needrex ~= nrex then
1993	werror("bad mix of byte-addressable registers")
1994      end
1995    end
1996  end
1997
1998  -- Try all match:pattern pairs (separated by '|').
1999  local gotmatch, lastpat
2000  for tm in gmatch(template, "[^%|]+") do
2001    -- Split off size match (starts after mode match) and pattern string.
2002    local szm, pat = match(tm, "^(.-):(.*)$", #args+1)
2003    if pat == "" then pat = lastpat else lastpat = pat end
2004    if matchtm(tm, args) then
2005      local prefix = sub(szm, 1, 1)
2006      if prefix == "/" then -- Exactly match leading operand sizes.
2007	for i = #szm,1,-1 do
2008	  if i == 1 then
2009	    dopattern(pat, args, sz, params.op, needrex) -- Process pattern.
2010	    return
2011	  elseif args[i-1].opsize ~= sub(szm, i, i) then
2012	    break
2013	  end
2014	end
2015      else -- Match common operand size.
2016	local szp = sz
2017	if szm == "" then szm = x64 and "qdwb" or "dwb" end -- Default sizes.
2018	if prefix == "1" then szp = args[1].opsize; szmix = nil
2019	elseif prefix == "2" then szp = args[2].opsize; szmix = nil end
2020	if not szmix and (prefix == "." or match(szm, szp or "#")) then
2021	  dopattern(pat, args, szp, params.op, needrex) -- Process pattern.
2022	  return
2023	end
2024      end
2025      gotmatch = true
2026    end
2027  end
2028
2029  local msg = "bad operand mode"
2030  if gotmatch then
2031    if szmix then
2032      msg = "mixed operand size"
2033    else
2034      msg = sz and "bad operand size" or "missing operand size"
2035    end
2036  end
2037
2038  werror(msg.." in `"..opmodestr(params.op, args).."'")
2039end
2040
2041------------------------------------------------------------------------------
2042
2043-- x64-specific opcode for 64 bit immediates and displacements.
2044if x64 then
2045  function map_op.mov64_2(params)
2046    if not params then return { "reg, imm", "reg, [disp]", "[disp], reg" } end
2047    if secpos+2 > maxsecpos then wflush() end
2048    local opcode, op64, sz, rex, vreg
2049    local op64 = match(params[1], "^%[%s*(.-)%s*%]$")
2050    if op64 then
2051      local a = parseoperand(params[2])
2052      if a.mode ~= "rmR" then werror("bad operand mode") end
2053      sz = a.opsize
2054      rex = sz == "q" and 8 or 0
2055      opcode = 0xa3
2056    else
2057      op64 = match(params[2], "^%[%s*(.-)%s*%]$")
2058      local a = parseoperand(params[1])
2059      if op64 then
2060	if a.mode ~= "rmR" then werror("bad operand mode") end
2061	sz = a.opsize
2062	rex = sz == "q" and 8 or 0
2063	opcode = 0xa1
2064      else
2065	if sub(a.mode, 1, 1) ~= "r" or a.opsize ~= "q" then
2066	  werror("bad operand mode")
2067	end
2068	op64 = params[2]
2069	if a.reg == -1 then
2070	  vreg = a.vreg
2071	  opcode = 0xb8
2072	else
2073	  opcode = 0xb8 + band(a.reg, 7)
2074	end
2075	rex = a.reg > 7 and 9 or 8
2076      end
2077    end
2078    local psz, sk = wputop(sz, opcode, rex, nil, vreg)
2079    wvreg("opcode", vreg, psz, sk)
2080    waction("IMM_D", format("(unsigned int)(%s)", op64))
2081    waction("IMM_D", format("(unsigned int)((%s)>>32)", op64))
2082  end
2083end
2084
2085------------------------------------------------------------------------------
2086
2087-- Pseudo-opcodes for data storage.
2088local function op_data(params)
2089  if not params then return "imm..." end
2090  local sz = sub(params.op, 2, 2)
2091  if sz == "a" then sz = addrsize end
2092  for _,p in ipairs(params) do
2093    local a = parseoperand(p)
2094    if sub(a.mode, 1, 1) ~= "i" or (a.opsize and a.opsize ~= sz) then
2095      werror("bad mode or size in `"..p.."'")
2096    end
2097    if a.mode == "iJ" then
2098      if sz == 'q' then
2099        wputlabel64("IMM_", a.imm, 1)
2100      else
2101        wputlabel("IMM_", a.imm, 1)
2102      end
2103    else
2104      if sz == 'q' then
2105        wputqarg(a.imm)
2106      else
2107        wputszarg(sz, a.imm)
2108      end
2109    end
2110    if secpos+2 > maxsecpos then wflush() end
2111  end
2112end
2113
2114map_op[".byte_*"] = op_data
2115map_op[".sbyte_*"] = op_data
2116map_op[".word_*"] = op_data
2117map_op[".dword_*"] = op_data
2118map_op[".aword_*"] = op_data
2119
2120------------------------------------------------------------------------------
2121
2122-- Pseudo-opcode to mark the position where the action list is to be emitted.
2123map_op[".actionlist_1"] = function(params)
2124  if not params then return "cvar" end
2125  local name = params[1] -- No syntax check. You get to keep the pieces.
2126  wline(function(out) writeactions(out, name) end)
2127end
2128
2129-- Pseudo-opcode to mark the position where the global enum is to be emitted.
2130map_op[".globals_1"] = function(params)
2131  if not params then return "prefix" end
2132  local prefix = params[1] -- No syntax check. You get to keep the pieces.
2133  wline(function(out) writeglobals(out, prefix) end)
2134end
2135
2136-- Pseudo-opcode to mark the position where the global names are to be emitted.
2137map_op[".globalnames_1"] = function(params)
2138  if not params then return "cvar" end
2139  local name = params[1] -- No syntax check. You get to keep the pieces.
2140  wline(function(out) writeglobalnames(out, name) end)
2141end
2142
2143-- Pseudo-opcode to mark the position where the extern names are to be emitted.
2144map_op[".externnames_1"] = function(params)
2145  if not params then return "cvar" end
2146  local name = params[1] -- No syntax check. You get to keep the pieces.
2147  wline(function(out) writeexternnames(out, name) end)
2148end
2149
2150------------------------------------------------------------------------------
2151
2152-- Label pseudo-opcode (converted from trailing colon form).
2153map_op[".label_2"] = function(params)
2154  if not params then return "[1-9] | ->global | =>pcexpr  [, addr]" end
2155  if secpos+2 > maxsecpos then wflush() end
2156  local a = parseoperand(params[1])
2157  local mode, imm = a.mode, a.imm
2158  if type(imm) == "number" and (mode == "iJ" or (imm >= 1 and imm <= 9)) then
2159    -- Local label (1: ... 9:) or global label (->global:).
2160    waction("LABEL_LG", nil, 1)
2161    wputxb(imm)
2162  elseif mode == "iJ" then
2163    -- PC label (=>pcexpr:).
2164    waction("LABEL_PC", imm)
2165  else
2166    werror("bad label definition")
2167  end
2168  -- SETLABEL must immediately follow LABEL_LG/LABEL_PC.
2169  local addr = params[2]
2170  if addr then
2171    local a = parseoperand(addr)
2172    if a.mode == "iPJ" then
2173      waction("SETLABEL", a.imm)
2174    else
2175      werror("bad label assignment")
2176    end
2177  end
2178end
2179map_op[".label_1"] = map_op[".label_2"]
2180
2181------------------------------------------------------------------------------
2182
2183-- Alignment pseudo-opcode.
2184map_op[".align_1"] = function(params)
2185  if not params then return "numpow2" end
2186  if secpos+1 > maxsecpos then wflush() end
2187  local align = tonumber(params[1]) or map_opsizenum[map_opsize[params[1]]]
2188  if align then
2189    local x = align
2190    -- Must be a power of 2 in the range (2 ... 256).
2191    for i=1,8 do
2192      x = x / 2
2193      if x == 1 then
2194	waction("ALIGN", nil, 1)
2195	wputxb(align-1) -- Action byte is 2**n-1.
2196	return
2197      end
2198    end
2199  end
2200  werror("bad alignment")
2201end
2202
2203-- Spacing pseudo-opcode.
2204map_op[".space_2"] = function(params)
2205  if not params then return "num [, filler]" end
2206  if secpos+1 > maxsecpos then wflush() end
2207  waction("SPACE", params[1])
2208  local fill = params[2]
2209  if fill then
2210    fill = tonumber(fill)
2211    if not fill or fill < 0 or fill > 255 then werror("bad filler") end
2212  end
2213  wputxb(fill or 0)
2214end
2215map_op[".space_1"] = map_op[".space_2"]
2216
2217------------------------------------------------------------------------------
2218
2219-- Pseudo-opcode for (primitive) type definitions (map to C types).
2220map_op[".type_3"] = function(params, nparams)
2221  if not params then
2222    return nparams == 2 and "name, ctype" or "name, ctype, reg"
2223  end
2224  local name, ctype, reg = params[1], params[2], params[3]
2225  if not match(name, "^[%a_][%w_]*$") then
2226    werror("bad type name `"..name.."'")
2227  end
2228  local tp = map_type[name]
2229  if tp then
2230    werror("duplicate type `"..name.."'")
2231  end
2232  if reg and not map_reg_valid_base[reg] then
2233    werror("bad base register `"..(map_reg_rev[reg] or reg).."'")
2234  end
2235  -- Add #type to defines. A bit unclean to put it in map_archdef.
2236  map_archdef["#"..name] = "sizeof("..ctype..")"
2237  -- Add new type and emit shortcut define.
2238  local num = ctypenum + 1
2239  map_type[name] = {
2240    ctype = ctype,
2241    ctypefmt = format("Dt%X(%%s)", num),
2242    reg = reg,
2243  }
2244  wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype))
2245  ctypenum = num
2246end
2247map_op[".type_2"] = map_op[".type_3"]
2248
2249-- Dump type definitions.
2250local function dumptypes(out, lvl)
2251  local t = {}
2252  for name in pairs(map_type) do t[#t+1] = name end
2253  sort(t)
2254  out:write("Type definitions:\n")
2255  for _,name in ipairs(t) do
2256    local tp = map_type[name]
2257    local reg = tp.reg and map_reg_rev[tp.reg] or ""
2258    out:write(format("  %-20s %-20s %s\n", name, tp.ctype, reg))
2259  end
2260  out:write("\n")
2261end
2262
2263------------------------------------------------------------------------------
2264
2265-- Set the current section.
2266function _M.section(num)
2267  waction("SECTION")
2268  wputxb(num)
2269  wflush(true) -- SECTION is a terminal action.
2270end
2271
2272------------------------------------------------------------------------------
2273
2274-- Dump architecture description.
2275function _M.dumparch(out)
2276  out:write(format("DynASM %s version %s, released %s\n\n",
2277    _info.arch, _info.version, _info.release))
2278  dumpregs(out)
2279  dumpactions(out)
2280end
2281
2282-- Dump all user defined elements.
2283function _M.dumpdef(out, lvl)
2284  dumptypes(out, lvl)
2285  dumpglobals(out, lvl)
2286  dumpexterns(out, lvl)
2287end
2288
2289------------------------------------------------------------------------------
2290
2291-- Pass callbacks from/to the DynASM core.
2292function _M.passcb(wl, we, wf, ww)
2293  wline, werror, wfatal, wwarn = wl, we, wf, ww
2294  return wflush
2295end
2296
2297-- Setup the arch-specific module.
2298function _M.setup(arch, opt)
2299  g_arch, g_opt = arch, opt
2300end
2301
2302-- Merge the core maps and the arch-specific maps.
2303function _M.mergemaps(map_coreop, map_def)
2304  setmetatable(map_op, { __index = map_coreop })
2305  setmetatable(map_def, { __index = map_archdef })
2306  return map_op, map_def
2307end
2308
2309return _M
2310
2311------------------------------------------------------------------------------
2312
2313