xref: /php-src/ext/opcache/jit/ir/dynasm/dasm_arm64.lua (revision 2ab1c3d5)
1------------------------------------------------------------------------------
2-- DynASM ARM64 module.
3--
4-- Copyright (C) 2005-2023 Mike Pall. All rights reserved.
5-- See dynasm.lua for full copyright notice.
6------------------------------------------------------------------------------
7
8-- Module information:
9local _info = {
10  arch =	"arm",
11  description =	"DynASM ARM64 module",
12  version =	"1.5.0",
13  vernum =	 10500,
14  release =	"2021-05-02",
15  author =	"Mike Pall",
16  license =	"MIT",
17}
18
19-- Exported glue functions for the arch-specific module.
20local _M = { _info = _info }
21
22-- Cache library functions.
23local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs
24local assert, setmetatable, rawget = assert, setmetatable, rawget
25local _s = string
26local format, byte, char = _s.format, _s.byte, _s.char
27local match, gmatch, gsub = _s.match, _s.gmatch, _s.gsub
28local concat, sort, insert = table.concat, table.sort, table.insert
29local bit = bit or require("bit")
30local band, shl, shr, sar = bit.band, bit.lshift, bit.rshift, bit.arshift
31local ror, tohex, tobit = bit.ror, bit.tohex, bit.tobit
32
33-- Inherited tables and callbacks.
34local g_opt, g_arch
35local wline, werror, wfatal, wwarn
36
37-- Action name list.
38-- CHECK: Keep this in sync with the C code!
39local action_names = {
40  "STOP", "SECTION", "ESC", "REL_EXT",
41  "ALIGN", "REL_LG", "LABEL_LG",
42  "REL_PC", "LABEL_PC", "REL_A",
43  "IMM", "IMM6", "IMM12", "IMM13W", "IMM13X", "IMML", "IMMV",
44  "VREG",
45}
46
47-- Maximum number of section buffer positions for dasm_put().
48-- CHECK: Keep this in sync with the C code!
49local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines.
50
51-- Action name -> action number.
52local map_action = {}
53for n,name in ipairs(action_names) do
54  map_action[name] = n-1
55end
56
57-- Action list buffer.
58local actlist = {}
59
60-- Argument list for next dasm_put(). Start with offset 0 into action list.
61local actargs = { 0 }
62
63-- Current number of section buffer positions for dasm_put().
64local secpos = 1
65
66------------------------------------------------------------------------------
67
68-- Dump action names and numbers.
69local function dumpactions(out)
70  out:write("DynASM encoding engine action codes:\n")
71  for n,name in ipairs(action_names) do
72    local num = map_action[name]
73    out:write(format("  %-10s %02X  %d\n", name, num, num))
74  end
75  out:write("\n")
76end
77
78-- Write action list buffer as a huge static C array.
79local function writeactions(out, name)
80  local nn = #actlist
81  if nn == 0 then nn = 1; actlist[0] = map_action.STOP end
82  out:write("static const unsigned int ", name, "[", nn, "] = {\n")
83  for i = 1,nn-1 do
84    assert(out:write("0x", tohex(actlist[i]), ",\n"))
85  end
86  assert(out:write("0x", tohex(actlist[nn]), "\n};\n\n"))
87end
88
89------------------------------------------------------------------------------
90
91-- Add word to action list.
92local function wputxw(n)
93  assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
94  actlist[#actlist+1] = n
95end
96
97-- Add action to list with optional arg. Advance buffer pos, too.
98local function waction(action, val, a, num)
99  local w = assert(map_action[action], "bad action name `"..action.."'")
100  wputxw(w * 0x10000 + (val or 0))
101  if a then actargs[#actargs+1] = a end
102  if a or num then secpos = secpos + (num or 1) end
103end
104
105-- Flush action list (intervening C code or buffer pos overflow).
106local function wflush(term)
107  if #actlist == actargs[1] then return end -- Nothing to flush.
108  if not term then waction("STOP") end -- Terminate action list.
109  wline(format("dasm_put(Dst, %s);", concat(actargs, ", ")), true)
110  actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put().
111  secpos = 1 -- The actionlist offset occupies a buffer position, too.
112end
113
114-- Put escaped word.
115local function wputw(n)
116  if n <= 0x000fffff then waction("ESC") end
117  wputxw(n)
118end
119
120-- Reserve position for word.
121local function wpos()
122  local pos = #actlist+1
123  actlist[pos] = ""
124  return pos
125end
126
127-- Store word to reserved position.
128local function wputpos(pos, n)
129  assert(n >= 0 and n <= 0xffffffff and n % 1 == 0, "word out of range")
130  if n <= 0x000fffff then
131    insert(actlist, pos+1, n)
132    n = map_action.ESC * 0x10000
133  end
134  actlist[pos] = n
135end
136
137------------------------------------------------------------------------------
138
139-- Global label name -> global label number. With auto assignment on 1st use.
140local next_global = 20
141local map_global = setmetatable({}, { __index = function(t, name)
142  if not match(name, "^[%a_][%w_]*$") then werror("bad global label") end
143  local n = next_global
144  if n > 2047 then werror("too many global labels") end
145  next_global = n + 1
146  t[name] = n
147  return n
148end})
149
150-- Dump global labels.
151local function dumpglobals(out, lvl)
152  local t = {}
153  for name, n in pairs(map_global) do t[n] = name end
154  out:write("Global labels:\n")
155  for i=20,next_global-1 do
156    out:write(format("  %s\n", t[i]))
157  end
158  out:write("\n")
159end
160
161-- Write global label enum.
162local function writeglobals(out, prefix)
163  local t = {}
164  for name, n in pairs(map_global) do t[n] = name end
165  out:write("enum {\n")
166  for i=20,next_global-1 do
167    out:write("  ", prefix, t[i], ",\n")
168  end
169  out:write("  ", prefix, "_MAX\n};\n")
170end
171
172-- Write global label names.
173local function writeglobalnames(out, name)
174  local t = {}
175  for name, n in pairs(map_global) do t[n] = name end
176  out:write("static const char *const ", name, "[] = {\n")
177  for i=20,next_global-1 do
178    out:write("  \"", t[i], "\",\n")
179  end
180  out:write("  (const char *)0\n};\n")
181end
182
183------------------------------------------------------------------------------
184
185-- Extern label name -> extern label number. With auto assignment on 1st use.
186local next_extern = 0
187local map_extern_ = {}
188local map_extern = setmetatable({}, { __index = function(t, name)
189  -- No restrictions on the name for now.
190  local n = next_extern
191  if n > 2047 then werror("too many extern labels") end
192  next_extern = n + 1
193  t[name] = n
194  map_extern_[n] = name
195  return n
196end})
197
198-- Dump extern labels.
199local function dumpexterns(out, lvl)
200  out:write("Extern labels:\n")
201  for i=0,next_extern-1 do
202    out:write(format("  %s\n", map_extern_[i]))
203  end
204  out:write("\n")
205end
206
207-- Write extern label names.
208local function writeexternnames(out, name)
209  out:write("static const char *const ", name, "[] = {\n")
210  for i=0,next_extern-1 do
211    out:write("  \"", map_extern_[i], "\",\n")
212  end
213  out:write("  (const char *)0\n};\n")
214end
215
216------------------------------------------------------------------------------
217
218-- Arch-specific maps.
219
220-- Ext. register name -> int. name.
221local map_archdef = { xzr = "@x31", wzr = "@w31", lr = "x30", }
222
223-- Int. register name -> ext. name.
224local map_reg_rev = { ["@x31"] = "xzr", ["@w31"] = "wzr", x30 = "lr", }
225
226local map_type = {}		-- Type name -> { ctype, reg }
227local ctypenum = 0		-- Type number (for Dt... macros).
228
229-- Reverse defines for registers.
230function _M.revdef(s)
231  return map_reg_rev[s] or s
232end
233
234local map_shift = { lsl = 0, lsr = 1, asr = 2, }
235
236local map_extend = {
237  uxtb = 0, uxth = 1, uxtw = 2, uxtx = 3,
238  sxtb = 4, sxth = 5, sxtw = 6, sxtx = 7,
239}
240
241local map_cond = {
242  eq = 0, ne = 1, cs = 2, cc = 3, mi = 4, pl = 5, vs = 6, vc = 7,
243  hi = 8, ls = 9, ge = 10, lt = 11, gt = 12, le = 13, al = 14,
244  hs = 2, lo = 3,
245}
246
247------------------------------------------------------------------------------
248
249local parse_reg_type
250
251local function parse_reg(expr, shift, no_vreg)
252  if not expr then werror("expected register name") end
253  local tname, ovreg = match(expr, "^([%w_]+):(@?%l%d+)$")
254  if not tname then
255    tname, ovreg = match(expr, "^([%w_]+):(R[xwqdshb]%b())$")
256  end
257  local tp = map_type[tname or expr]
258  if tp then
259    local reg = ovreg or tp.reg
260    if not reg then
261      werror("type `"..(tname or expr).."' needs a register override")
262    end
263    expr = reg
264  end
265  local ok31, rt, r = match(expr, "^(@?)([xwqdshb])([123]?[0-9])$")
266  if r then
267    r = tonumber(r)
268    if r <= 30 or (r == 31 and ok31 ~= "" or (rt ~= "w" and rt ~= "x")) then
269      if not parse_reg_type then
270	parse_reg_type = rt
271      elseif parse_reg_type ~= rt then
272	werror("register size mismatch")
273      end
274      return shl(r, shift), tp
275    end
276  end
277  local vrt, vreg = match(expr, "^R([xwqdshb])(%b())$")
278  if vreg then
279    if not parse_reg_type then
280      parse_reg_type = vrt
281    elseif parse_reg_type ~= vrt then
282      werror("register size mismatch")
283    end
284    if not no_vreg then waction("VREG", shift, vreg) end
285    return 0
286  end
287  werror("bad register name `"..expr.."'")
288end
289
290local function parse_reg_base(expr)
291  if expr == "sp" then return 0x3e0 end
292  local base, tp = parse_reg(expr, 5)
293  if parse_reg_type ~= "x" then werror("bad register type") end
294  parse_reg_type = false
295  return base, tp
296end
297
298local parse_ctx = {}
299
300local loadenv = setfenv and function(s)
301  local code = loadstring(s, "")
302  if code then setfenv(code, parse_ctx) end
303  return code
304end or function(s)
305  return load(s, "", nil, parse_ctx)
306end
307
308-- Try to parse simple arithmetic, too, since some basic ops are aliases.
309local function parse_number(n)
310  local x = tonumber(n)
311  if x then return x end
312  local code = loadenv("return "..n)
313  if code then
314    local ok, y = pcall(code)
315    if ok and type(y) == "number" then return y end
316  end
317  return nil
318end
319
320local function parse_imm(imm, bits, shift, scale, signed)
321  imm = match(imm, "^#(.*)$")
322  if not imm then werror("expected immediate operand") end
323  local n = parse_number(imm)
324  if n then
325    local m = sar(n, scale)
326    if shl(m, scale) == n then
327      if signed then
328	local s = sar(m, bits-1)
329	if s == 0 then return shl(m, shift)
330	elseif s == -1 then return shl(m + shl(1, bits), shift) end
331      else
332	if sar(m, bits) == 0 then return shl(m, shift) end
333      end
334    end
335    werror("out of range immediate `"..imm.."'")
336  else
337    waction("IMM", (signed and 32768 or 0)+scale*1024+bits*32+shift, imm)
338    return 0
339  end
340end
341
342local function parse_imm12(imm)
343  imm = match(imm, "^#(.*)$")
344  if not imm then werror("expected immediate operand") end
345  local n = parse_number(imm)
346  if n then
347    if shr(n, 12) == 0 then
348      return shl(n, 10)
349    elseif band(n, 0xff000fff) == 0 then
350      return shr(n, 2) + 0x00400000
351    end
352    werror("out of range immediate `"..imm.."'")
353  else
354    waction("IMM12", 0, imm)
355    return 0
356  end
357end
358
359local function parse_imm13(imm)
360  imm = match(imm, "^#(.*)$")
361  if not imm then werror("expected immediate operand") end
362  local n = parse_number(imm)
363  local r64 = parse_reg_type == "x"
364  if n and n % 1 == 0 and n >= 0 and n <= 0xffffffff then
365    local inv = false
366    if band(n, 1) == 1 then n = bit.bnot(n); inv = true end
367    local t = {}
368    for i=1,32 do t[i] = band(n, 1); n = shr(n, 1) end
369    local b = table.concat(t)
370    b = b..(r64 and (inv and "1" or "0"):rep(32) or b)
371    local p0, p1, p0a, p1a = b:match("^(0+)(1+)(0*)(1*)")
372    if p0 then
373      local w = p1a == "" and (r64 and 64 or 32) or #p1+#p0a
374      if band(w, w-1) == 0 and b == b:sub(1, w):rep(64/w) then
375	local s = band(-2*w, 0x3f) - 1
376	if w == 64 then s = s + 0x1000 end
377	if inv then
378	  return shl(w-#p1-#p0, 16) + shl(s+w-#p1, 10)
379	else
380	  return shl(w-#p0, 16) + shl(s+#p1, 10)
381	end
382      end
383    end
384    werror("out of range immediate `"..imm.."'")
385  elseif r64 then
386    waction("IMM13X", 0, format("(unsigned int)(%s)", imm))
387    actargs[#actargs+1] = format("(unsigned int)((unsigned long long)(%s)>>32)", imm)
388    return 0
389  else
390    waction("IMM13W", 0, imm)
391    return 0
392  end
393end
394
395local function parse_imm6(imm)
396  imm = match(imm, "^#(.*)$")
397  if not imm then werror("expected immediate operand") end
398  local n = parse_number(imm)
399  if n then
400    if n >= 0 and n <= 63 then
401      return shl(band(n, 0x1f), 19) + (n >= 32 and 0x80000000 or 0)
402    end
403    werror("out of range immediate `"..imm.."'")
404  else
405    waction("IMM6", 0, imm)
406    return 0
407  end
408end
409
410local function parse_imm_load(imm, scale)
411  local n = parse_number(imm)
412  if n then
413    local m = sar(n, scale)
414    if shl(m, scale) == n and m >= 0 and m < 0x1000 then
415      return shl(m, 10) + 0x01000000 -- Scaled, unsigned 12 bit offset.
416    elseif n >= -256 and n < 256 then
417      return shl(band(n, 511), 12) -- Unscaled, signed 9 bit offset.
418    end
419    werror("out of range immediate `"..imm.."'")
420  else
421    waction("IMML", scale, imm)
422    return 0
423  end
424end
425
426local function parse_fpimm(imm)
427  imm = match(imm, "^#(.*)$")
428  if not imm then werror("expected immediate operand") end
429  local n = parse_number(imm)
430  if n then
431    local m, e = math.frexp(n)
432    local s, e2 = 0, band(e-2, 7)
433    if m < 0 then m = -m; s = 0x00100000 end
434    m = m*32-16
435    if m % 1 == 0 and m >= 0 and m <= 15 and sar(shl(e2, 29), 29)+2 == e then
436      return s + shl(e2, 17) + shl(m, 13)
437    end
438    werror("out of range immediate `"..imm.."'")
439  else
440    werror("NYI fpimm action")
441  end
442end
443
444local function parse_shift(expr)
445  local s, s2 = match(expr, "^(%S+)%s*(.*)$")
446  s = map_shift[s]
447  if not s then werror("expected shift operand") end
448  return parse_imm(s2, 6, 10, 0, false) + shl(s, 22)
449end
450
451local function parse_lslx16(expr)
452  local n = match(expr, "^lsl%s*#(%d+)$")
453  n = tonumber(n)
454  if not n then werror("expected shift operand") end
455  if band(n, parse_reg_type == "x" and 0xffffffcf or 0xffffffef) ~= 0 then
456    werror("bad shift amount")
457  end
458  return shl(n, 17)
459end
460
461local function parse_extend(expr)
462  local s, s2 = match(expr, "^(%S+)%s*(.*)$")
463  if s == "lsl" then
464    s = parse_reg_type == "x" and 3 or 2
465  else
466    s = map_extend[s]
467  end
468  if not s then werror("expected extend operand") end
469  return (s2 == "" and 0 or parse_imm(s2, 3, 10, 0, false)) + shl(s, 13)
470end
471
472local function parse_cond(expr, inv)
473  local c = map_cond[expr]
474  if not c then werror("expected condition operand") end
475  return shl(bit.bxor(c, inv), 12)
476end
477
478local function parse_load(params, nparams, n, op)
479  if params[n+2] then werror("too many operands") end
480  local scale = shr(op, 30)
481  local pn, p2 = params[n], params[n+1]
482  local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
483  if not p1 then
484    if not p2 then
485      local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$")
486      if reg and tailr ~= "" then
487	local base, tp = parse_reg_base(reg)
488	if tp then
489	  waction("IMML", scale, format(tp.ctypefmt, tailr))
490	  return op + base
491	end
492      end
493    end
494    werror("expected address operand")
495  end
496  if p2 then
497    if wb == "!" then werror("bad use of '!'") end
498    op = op + parse_reg_base(p1) + parse_imm(p2, 9, 12, 0, true) + 0x400
499  elseif wb == "!" then
500    local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$")
501    if not p1a then werror("bad use of '!'") end
502    op = op + parse_reg_base(p1a) + parse_imm(p2a, 9, 12, 0, true) + 0xc00
503  else
504    local p1a, p2a = match(p1, "^([^,%s]*)%s*(.*)$")
505    op = op + parse_reg_base(p1a)
506    if p2a ~= "" then
507      local imm = match(p2a, "^,%s*#(.*)$")
508      if imm then
509	op = op + parse_imm_load(imm, scale)
510      else
511	local p2b, p3b, p3s = match(p2a, "^,%s*([^,%s]*)%s*,?%s*(%S*)%s*(.*)$")
512	op = op + parse_reg(p2b, 16) + 0x00200800
513	if parse_reg_type ~= "x" and parse_reg_type ~= "w" then
514	  werror("bad index register type")
515	end
516	if p3b == "" then
517	  if parse_reg_type ~= "x" then werror("bad index register type") end
518	  op = op + 0x6000
519	else
520	  if p3s == "" or p3s == "#0" then
521	  elseif p3s == "#"..scale then
522	    op = op + 0x1000
523	  else
524	    werror("bad scale")
525	  end
526	  if parse_reg_type == "x" then
527	    if p3b == "lsl" and p3s ~= "" then op = op + 0x6000
528	    elseif p3b == "sxtx" then op = op + 0xe000
529	    else
530	      werror("bad extend/shift specifier")
531	    end
532	  else
533	    if p3b == "uxtw" then op = op + 0x4000
534	    elseif p3b == "sxtw" then op = op + 0xc000
535	    else
536	      werror("bad extend/shift specifier")
537	    end
538	  end
539	end
540      end
541    else
542      if wb == "!" then werror("bad use of '!'") end
543      op = op + 0x01000000
544    end
545  end
546  return op
547end
548
549local function parse_load_pair(params, nparams, n, op)
550  if params[n+2] then werror("too many operands") end
551  local pn, p2 = params[n], params[n+1]
552  local scale = 2 + shr(op, 31 - band(shr(op, 26), 1))
553  local p1, wb = match(pn, "^%[%s*(.-)%s*%](!?)$")
554  if not p1 then
555    if not p2 then
556      local reg, tailr = match(pn, "^([%w_:]+)%s*(.*)$")
557      if reg and tailr ~= "" then
558	local base, tp = parse_reg_base(reg)
559	if tp then
560	  waction("IMM", 32768+7*32+15+scale*1024, format(tp.ctypefmt, tailr))
561	  return op + base + 0x01000000
562	end
563      end
564    end
565    werror("expected address operand")
566  end
567  if p2 then
568    if wb == "!" then werror("bad use of '!'") end
569    op = op + 0x00800000
570  else
571    local p1a, p2a = match(p1, "^([^,%s]*)%s*,%s*(.*)$")
572    if p1a then p1, p2 = p1a, p2a else p2 = "#0" end
573    op = op + (wb == "!" and 0x01800000 or 0x01000000)
574  end
575  return op + parse_reg_base(p1) + parse_imm(p2, 7, 15, scale, true)
576end
577
578local function parse_label(label, def)
579  local prefix = label:sub(1, 2)
580  -- =>label (pc label reference)
581  if prefix == "=>" then
582    return "PC", 0, label:sub(3)
583  end
584  -- ->name (global label reference)
585  if prefix == "->" then
586    return "LG", map_global[label:sub(3)]
587  end
588  if def then
589    -- [1-9] (local label definition)
590    if match(label, "^[1-9]$") then
591      return "LG", 10+tonumber(label)
592    end
593  else
594    -- [<>][1-9] (local label reference)
595    local dir, lnum = match(label, "^([<>])([1-9])$")
596    if dir then -- Fwd: 1-9, Bkwd: 11-19.
597      return "LG", lnum + (dir == ">" and 0 or 10)
598    end
599    -- extern label (extern label reference)
600    local extname = match(label, "^extern%s+(%S+)$")
601    if extname then
602      return "EXT", map_extern[extname]
603    end
604    -- &expr (pointer)
605    if label:sub(1, 1) == "&" then
606      return "A", 0, format("(ptrdiff_t)(%s)", label:sub(2))
607    end
608  end
609end
610
611local function branch_type(op)
612  if band(op, 0x7c000000) == 0x14000000 then return 0 -- B, BL
613  elseif shr(op, 24) == 0x54 or band(op, 0x7e000000) == 0x34000000 or
614	 band(op, 0x3b000000) == 0x18000000 then
615    return 0x800 -- B.cond, CBZ, CBNZ, LDR* literal
616  elseif band(op, 0x7e000000) == 0x36000000 then return 0x1000 -- TBZ, TBNZ
617  elseif band(op, 0x9f000000) == 0x10000000 then return 0x2000 -- ADR
618  elseif band(op, 0x9f000000) == band(0x90000000) then return 0x3000 -- ADRP
619  else
620    assert(false, "unknown branch type")
621  end
622end
623
624------------------------------------------------------------------------------
625
626local map_op, op_template
627
628local function op_alias(opname, f)
629  return function(params, nparams)
630    if not params then return "-> "..opname:sub(1, -3) end
631    f(params, nparams)
632    op_template(params, map_op[opname], nparams)
633  end
634end
635
636local function alias_bfx(p)
637  p[4] = "#("..p[3]:sub(2)..")+("..p[4]:sub(2)..")-1"
638end
639
640local function alias_bfiz(p)
641  parse_reg(p[1], 0, true)
642  if parse_reg_type == "w" then
643    p[3] = "#(32-("..p[3]:sub(2).."))%32"
644    p[4] = "#("..p[4]:sub(2)..")-1"
645  else
646    p[3] = "#(64-("..p[3]:sub(2).."))%64"
647    p[4] = "#("..p[4]:sub(2)..")-1"
648  end
649end
650
651local alias_lslimm = op_alias("ubfm_4", function(p)
652  parse_reg(p[1], 0, true)
653  local sh = p[3]:sub(2)
654  if parse_reg_type == "w" then
655    p[3] = "#(32-("..sh.."))%32"
656    p[4] = "#31-("..sh..")"
657  else
658    p[3] = "#(64-("..sh.."))%64"
659    p[4] = "#63-("..sh..")"
660  end
661end)
662
663-- Template strings for ARM instructions.
664map_op = {
665  -- Basic data processing instructions.
666  add_3  = "0b000000DNMg|11000000pDpNIg|8b206000pDpNMx",
667  add_4  = "0b000000DNMSg|0b200000DNMXg|8b200000pDpNMXx|8b200000pDpNxMwX",
668  adds_3 = "2b000000DNMg|31000000DpNIg|ab206000DpNMx",
669  adds_4 = "2b000000DNMSg|2b200000DNMXg|ab200000DpNMXx|ab200000DpNxMwX",
670  cmn_2  = "2b00001fNMg|3100001fpNIg|ab20601fpNMx",
671  cmn_3  = "2b00001fNMSg|2b20001fNMXg|ab20001fpNMXx|ab20001fpNxMwX",
672
673  sub_3  = "4b000000DNMg|51000000pDpNIg|cb206000pDpNMx",
674  sub_4  = "4b000000DNMSg|4b200000DNMXg|cb200000pDpNMXx|cb200000pDpNxMwX",
675  subs_3 = "6b000000DNMg|71000000DpNIg|eb206000DpNMx",
676  subs_4 = "6b000000DNMSg|6b200000DNMXg|eb200000DpNMXx|eb200000DpNxMwX",
677  cmp_2  = "6b00001fNMg|7100001fpNIg|eb20601fpNMx",
678  cmp_3  = "6b00001fNMSg|6b20001fNMXg|eb20001fpNMXx|eb20001fpNxMwX",
679
680  neg_2  = "4b0003e0DMg",
681  neg_3  = "4b0003e0DMSg",
682  negs_2 = "6b0003e0DMg",
683  negs_3 = "6b0003e0DMSg",
684
685  adc_3  = "1a000000DNMg",
686  adcs_3 = "3a000000DNMg",
687  sbc_3  = "5a000000DNMg",
688  sbcs_3 = "7a000000DNMg",
689  ngc_2  = "5a0003e0DMg",
690  ngcs_2 = "7a0003e0DMg",
691
692  and_3  = "0a000000DNMg|12000000pDNig",
693  and_4  = "0a000000DNMSg",
694  orr_3  = "2a000000DNMg|32000000pDNig",
695  orr_4  = "2a000000DNMSg",
696  eor_3  = "4a000000DNMg|52000000pDNig",
697  eor_4  = "4a000000DNMSg",
698  ands_3 = "6a000000DNMg|72000000DNig",
699  ands_4 = "6a000000DNMSg",
700  tst_2  = "6a00001fNMg|7200001fNig",
701  tst_3  = "6a00001fNMSg",
702
703  bic_3  = "0a200000DNMg",
704  bic_4  = "0a200000DNMSg",
705  orn_3  = "2a200000DNMg",
706  orn_4  = "2a200000DNMSg",
707  eon_3  = "4a200000DNMg",
708  eon_4  = "4a200000DNMSg",
709  bics_3 = "6a200000DNMg",
710  bics_4 = "6a200000DNMSg",
711
712  movn_2 = "12800000DWg",
713  movn_3 = "12800000DWRg",
714  movz_2 = "52800000DWg",
715  movz_3 = "52800000DWRg",
716  movk_2 = "72800000DWg",
717  movk_3 = "72800000DWRg",
718
719  -- TODO: this doesn't cover all valid immediates for mov reg, #imm.
720  mov_2  = "2a0003e0DMg|52800000DW|320003e0pDig|11000000pDpNg",
721  mov_3  = "2a0003e0DMSg",
722  mvn_2  = "2a2003e0DMg",
723  mvn_3  = "2a2003e0DMSg",
724
725  adr_2  = "10000000DBx",
726  adrp_2 = "90000000DBx",
727
728  csel_4  = "1a800000DNMCg",
729  csinc_4 = "1a800400DNMCg",
730  csinv_4 = "5a800000DNMCg",
731  csneg_4 = "5a800400DNMCg",
732  cset_2  = "1a9f07e0Dcg",
733  csetm_2 = "5a9f03e0Dcg",
734  cinc_3  = "1a800400DNmcg",
735  cinv_3  = "5a800000DNmcg",
736  cneg_3  = "5a800400DNmcg",
737
738  ccmn_4 = "3a400000NMVCg|3a400800N5VCg",
739  ccmp_4 = "7a400000NMVCg|7a400800N5VCg",
740
741  madd_4 = "1b000000DNMAg",
742  msub_4 = "1b008000DNMAg",
743  mul_3  = "1b007c00DNMg",
744  mneg_3 = "1b00fc00DNMg",
745
746  smaddl_4 = "9b200000DxNMwAx",
747  smsubl_4 = "9b208000DxNMwAx",
748  smull_3  = "9b207c00DxNMw",
749  smnegl_3 = "9b20fc00DxNMw",
750  smulh_3  = "9b407c00DNMx",
751  umaddl_4 = "9ba00000DxNMwAx",
752  umsubl_4 = "9ba08000DxNMwAx",
753  umull_3  = "9ba07c00DxNMw",
754  umnegl_3 = "9ba0fc00DxNMw",
755  umulh_3  = "9bc07c00DNMx",
756
757  udiv_3 = "1ac00800DNMg",
758  sdiv_3 = "1ac00c00DNMg",
759
760  -- Bit operations.
761  sbfm_4 = "13000000DN12w|93400000DN12x",
762  bfm_4  = "33000000DN12w|b3400000DN12x",
763  ubfm_4 = "53000000DN12w|d3400000DN12x",
764  extr_4 = "13800000DNM2w|93c00000DNM2x",
765
766  sxtb_2 = "13001c00DNw|93401c00DNx",
767  sxth_2 = "13003c00DNw|93403c00DNx",
768  sxtw_2 = "93407c00DxNw",
769  uxtb_2 = "53001c00DNw",
770  uxth_2 = "53003c00DNw",
771
772  sbfx_4  = op_alias("sbfm_4", alias_bfx),
773  bfxil_4 = op_alias("bfm_4", alias_bfx),
774  ubfx_4  = op_alias("ubfm_4", alias_bfx),
775  sbfiz_4 = op_alias("sbfm_4", alias_bfiz),
776  bfi_4   = op_alias("bfm_4", alias_bfiz),
777  ubfiz_4 = op_alias("ubfm_4", alias_bfiz),
778
779  lsl_3  = function(params, nparams)
780    if params and params[3]:byte() == 35 then
781      return alias_lslimm(params, nparams)
782    else
783      return op_template(params, "1ac02000DNMg", nparams)
784    end
785  end,
786  lsr_3  = "1ac02400DNMg|53007c00DN1w|d340fc00DN1x",
787  asr_3  = "1ac02800DNMg|13007c00DN1w|9340fc00DN1x",
788  ror_3  = "1ac02c00DNMg|13800000DNm2w|93c00000DNm2x",
789
790  clz_2   = "5ac01000DNg",
791  cls_2   = "5ac01400DNg",
792  rbit_2  = "5ac00000DNg",
793  rev_2   = "5ac00800DNw|dac00c00DNx",
794  rev16_2 = "5ac00400DNg",
795  rev32_2 = "dac00800DNx",
796
797  -- Loads and stores.
798  ["strb_*"]  = "38000000DwL",
799  ["ldrb_*"]  = "38400000DwL",
800  ["ldrsb_*"] = "38c00000DwL|38800000DxL",
801  ["strh_*"]  = "78000000DwL",
802  ["ldrh_*"]  = "78400000DwL",
803  ["ldrsh_*"] = "78c00000DwL|78800000DxL",
804  ["str_*"]   = "b8000000DwL|f8000000DxL|bc000000DsL|fc000000DdL",
805  ["ldr_*"]   = "18000000DwB|58000000DxB|1c000000DsB|5c000000DdB|b8400000DwL|f8400000DxL|bc400000DsL|fc400000DdL",
806  ["ldrsw_*"] = "98000000DxB|b8800000DxL",
807  -- NOTE: ldur etc. are handled by ldr et al.
808
809  ["stp_*"]   = "28000000DAwP|a8000000DAxP|2c000000DAsP|6c000000DAdP|ac000000DAqP",
810  ["ldp_*"]   = "28400000DAwP|a8400000DAxP|2c400000DAsP|6c400000DAdP|ac400000DAqP",
811  ["ldpsw_*"] = "68400000DAxP",
812
813  -- Branches.
814  b_1    = "14000000B",
815  bl_1   = "94000000B",
816  blr_1  = "d63f0000Nx",
817  br_1   = "d61f0000Nx",
818  ret_0  = "d65f03c0",
819  ret_1  = "d65f0000Nx",
820  -- b.cond is added below.
821  cbz_2  = "34000000DBg",
822  cbnz_2 = "35000000DBg",
823  tbz_3  = "36000000DTBw|36000000DTBx",
824  tbnz_3 = "37000000DTBw|37000000DTBx",
825
826  -- ARM64e: Pointer authentication codes (PAC).
827  blraaz_1  = "d63f081fNx",
828  braa_2    = "d71f0800NDx",
829  braaz_1   = "d61f081fNx",
830  pacibsp_0 = "d503237f",
831  retab_0   = "d65f0fff",
832
833  -- Miscellaneous instructions.
834  -- TODO: hlt, hvc, smc, svc, eret, dcps[123], drps, mrs, msr
835  -- TODO: sys, sysl, ic, dc, at, tlbi
836  -- TODO: hint, yield, wfe, wfi, sev, sevl
837  -- TODO: clrex, dsb, dmb, isb
838  nop_0  = "d503201f",
839  brk_0  = "d4200000",
840  brk_1  = "d4200000W",
841
842  -- Floating point instructions.
843  fmov_2  = "1e204000DNf|1e260000DwNs|1e270000DsNw|9e660000DxNd|9e670000DdNx|1e201000DFf",
844  fabs_2  = "1e20c000DNf",
845  fneg_2  = "1e214000DNf",
846  fsqrt_2 = "1e21c000DNf",
847
848  fcvt_2  = "1e22c000DdNs|1e624000DsNd",
849
850  -- TODO: half-precision and fixed-point conversions.
851  fcvtas_2 = "1e240000DwNs|9e240000DxNs|1e640000DwNd|9e640000DxNd",
852  fcvtau_2 = "1e250000DwNs|9e250000DxNs|1e650000DwNd|9e650000DxNd",
853  fcvtms_2 = "1e300000DwNs|9e300000DxNs|1e700000DwNd|9e700000DxNd",
854  fcvtmu_2 = "1e310000DwNs|9e310000DxNs|1e710000DwNd|9e710000DxNd",
855  fcvtns_2 = "1e200000DwNs|9e200000DxNs|1e600000DwNd|9e600000DxNd",
856  fcvtnu_2 = "1e210000DwNs|9e210000DxNs|1e610000DwNd|9e610000DxNd",
857  fcvtps_2 = "1e280000DwNs|9e280000DxNs|1e680000DwNd|9e680000DxNd",
858  fcvtpu_2 = "1e290000DwNs|9e290000DxNs|1e690000DwNd|9e690000DxNd",
859  fcvtzs_2 = "1e380000DwNs|9e380000DxNs|1e780000DwNd|9e780000DxNd",
860  fcvtzu_2 = "1e390000DwNs|9e390000DxNs|1e790000DwNd|9e790000DxNd",
861
862  scvtf_2  = "1e220000DsNw|9e220000DsNx|1e620000DdNw|9e620000DdNx",
863  ucvtf_2  = "1e230000DsNw|9e230000DsNx|1e630000DdNw|9e630000DdNx",
864
865  frintn_2 = "1e244000DNf",
866  frintp_2 = "1e24c000DNf",
867  frintm_2 = "1e254000DNf",
868  frintz_2 = "1e25c000DNf",
869  frinta_2 = "1e264000DNf",
870  frintx_2 = "1e274000DNf",
871  frinti_2 = "1e27c000DNf",
872
873  fadd_3   = "1e202800DNMf",
874  fsub_3   = "1e203800DNMf",
875  fmul_3   = "1e200800DNMf",
876  fnmul_3  = "1e208800DNMf",
877  fdiv_3   = "1e201800DNMf",
878
879  fmadd_4  = "1f000000DNMAf",
880  fmsub_4  = "1f008000DNMAf",
881  fnmadd_4 = "1f200000DNMAf",
882  fnmsub_4 = "1f208000DNMAf",
883
884  fmax_3   = "1e204800DNMf",
885  fmaxnm_3 = "1e206800DNMf",
886  fmin_3   = "1e205800DNMf",
887  fminnm_3 = "1e207800DNMf",
888
889  fcmp_2   = "1e202000NMf|1e202008NZf",
890  fcmpe_2  = "1e202010NMf|1e202018NZf",
891
892  fccmp_4  = "1e200400NMVCf",
893  fccmpe_4 = "1e200410NMVCf",
894
895  fcsel_4  = "1e200c00DNMCf",
896
897  -- TODO: crc32*, aes*, sha*, pmull
898  -- TODO: SIMD instructions.
899}
900
901for cond,c in pairs(map_cond) do
902  map_op["b"..cond.."_1"] = tohex(0x54000000+c).."B"
903end
904
905------------------------------------------------------------------------------
906
907-- Handle opcodes defined with template strings.
908local function parse_template(params, template, nparams, pos)
909  local op = tonumber(template:sub(1, 8), 16)
910  local n = 1
911  local rtt = {}
912
913  parse_reg_type = false
914
915  -- Process each character.
916  for p in gmatch(template:sub(9), ".") do
917    local q = params[n]
918    if p == "D" then
919      op = op + parse_reg(q, 0); n = n + 1
920    elseif p == "N" then
921      op = op + parse_reg(q, 5); n = n + 1
922    elseif p == "M" then
923      op = op + parse_reg(q, 16); n = n + 1
924    elseif p == "A" then
925      op = op + parse_reg(q, 10); n = n + 1
926    elseif p == "m" then
927      op = op + parse_reg(params[n-1], 16)
928
929    elseif p == "p" then
930      if q == "sp" then params[n] = "@x31" end
931    elseif p == "g" then
932      if parse_reg_type == "x" then
933	op = op + 0x80000000
934      elseif parse_reg_type ~= "w" then
935	werror("bad register type")
936      end
937      parse_reg_type = false
938    elseif p == "f" then
939      if parse_reg_type == "d" then
940	op = op + 0x00400000
941      elseif parse_reg_type ~= "s" then
942	werror("bad register type")
943      end
944      parse_reg_type = false
945    elseif p == "x" or p == "w" or p == "d" or p == "s" or p == "q" then
946      if parse_reg_type ~= p then
947	werror("register size mismatch")
948      end
949      parse_reg_type = false
950
951    elseif p == "L" then
952      op = parse_load(params, nparams, n, op)
953    elseif p == "P" then
954      op = parse_load_pair(params, nparams, n, op)
955
956    elseif p == "B" then
957      local mode, v, s = parse_label(q, false); n = n + 1
958      if not mode then werror("bad label `"..q.."'") end
959      local m = branch_type(op)
960      if mode == "A" then
961	waction("REL_"..mode, v+m, format("(unsigned int)(%s)", s))
962	actargs[#actargs+1] = format("(unsigned int)((%s)>>32)", s)
963      else
964	waction("REL_"..mode, v+m, s, 1)
965      end
966
967    elseif p == "I" then
968      op = op + parse_imm12(q); n = n + 1
969    elseif p == "i" then
970      op = op + parse_imm13(q); n = n + 1
971    elseif p == "W" then
972      op = op + parse_imm(q, 16, 5, 0, false); n = n + 1
973    elseif p == "T" then
974      op = op + parse_imm6(q); n = n + 1
975    elseif p == "1" then
976      op = op + parse_imm(q, 6, 16, 0, false); n = n + 1
977    elseif p == "2" then
978      op = op + parse_imm(q, 6, 10, 0, false); n = n + 1
979    elseif p == "5" then
980      op = op + parse_imm(q, 5, 16, 0, false); n = n + 1
981    elseif p == "V" then
982      op = op + parse_imm(q, 4, 0, 0, false); n = n + 1
983    elseif p == "F" then
984      op = op + parse_fpimm(q); n = n + 1
985    elseif p == "Z" then
986      if q ~= "#0" and q ~= "#0.0" then werror("expected zero immediate") end
987      n = n + 1
988
989    elseif p == "S" then
990      op = op + parse_shift(q); n = n + 1
991    elseif p == "X" then
992      op = op + parse_extend(q); n = n + 1
993    elseif p == "R" then
994      op = op + parse_lslx16(q); n = n + 1
995    elseif p == "C" then
996      op = op + parse_cond(q, 0); n = n + 1
997    elseif p == "c" then
998      op = op + parse_cond(q, 1); n = n + 1
999
1000    else
1001      assert(false)
1002    end
1003  end
1004  wputpos(pos, op)
1005end
1006
1007function op_template(params, template, nparams)
1008  if not params then return template:gsub("%x%x%x%x%x%x%x%x", "") end
1009
1010  -- Limit number of section buffer positions used by a single dasm_put().
1011  -- A single opcode needs a maximum of 4 positions.
1012  if secpos+4 > maxsecpos then wflush() end
1013  local pos = wpos()
1014  local lpos, apos, spos = #actlist, #actargs, secpos
1015
1016  local ok, err
1017  for t in gmatch(template, "[^|]+") do
1018    ok, err = pcall(parse_template, params, t, nparams, pos)
1019    if ok then return end
1020    secpos = spos
1021    actlist[lpos+1] = nil
1022    actlist[lpos+2] = nil
1023    actlist[lpos+3] = nil
1024    actlist[lpos+4] = nil
1025    actargs[apos+1] = nil
1026    actargs[apos+2] = nil
1027    actargs[apos+3] = nil
1028    actargs[apos+4] = nil
1029  end
1030  error(err, 0)
1031end
1032
1033map_op[".template__"] = op_template
1034
1035------------------------------------------------------------------------------
1036
1037-- Pseudo-opcode to mark the position where the action list is to be emitted.
1038map_op[".actionlist_1"] = function(params)
1039  if not params then return "cvar" end
1040  local name = params[1] -- No syntax check. You get to keep the pieces.
1041  wline(function(out) writeactions(out, name) end)
1042end
1043
1044-- Pseudo-opcode to mark the position where the global enum is to be emitted.
1045map_op[".globals_1"] = function(params)
1046  if not params then return "prefix" end
1047  local prefix = params[1] -- No syntax check. You get to keep the pieces.
1048  wline(function(out) writeglobals(out, prefix) end)
1049end
1050
1051-- Pseudo-opcode to mark the position where the global names are to be emitted.
1052map_op[".globalnames_1"] = function(params)
1053  if not params then return "cvar" end
1054  local name = params[1] -- No syntax check. You get to keep the pieces.
1055  wline(function(out) writeglobalnames(out, name) end)
1056end
1057
1058-- Pseudo-opcode to mark the position where the extern names are to be emitted.
1059map_op[".externnames_1"] = function(params)
1060  if not params then return "cvar" end
1061  local name = params[1] -- No syntax check. You get to keep the pieces.
1062  wline(function(out) writeexternnames(out, name) end)
1063end
1064
1065------------------------------------------------------------------------------
1066
1067-- Label pseudo-opcode (converted from trailing colon form).
1068map_op[".label_1"] = function(params)
1069  if not params then return "[1-9] | ->global | =>pcexpr" end
1070  if secpos+1 > maxsecpos then wflush() end
1071  local mode, n, s = parse_label(params[1], true)
1072  if not mode or mode == "EXT" then werror("bad label definition") end
1073  waction("LABEL_"..mode, n, s, 1)
1074end
1075
1076------------------------------------------------------------------------------
1077
1078-- Pseudo-opcodes for data storage.
1079local function op_data(params)
1080  if not params then return "imm..." end
1081  local sz = params.op == ".long" and 4 or 8
1082  for _,p in ipairs(params) do
1083    local imm = parse_number(p)
1084    if imm then
1085      local n = tobit(imm)
1086      if n == imm or (n < 0 and n + 2^32 == imm) then
1087	wputw(n < 0 and n + 2^32 or n)
1088	if sz == 8 then
1089	  wputw(imm < 0 and 0xffffffff or 0)
1090	end
1091      elseif sz == 4 then
1092	werror("bad immediate `"..p.."'")
1093      else
1094	imm = nil
1095      end
1096    end
1097    if not imm then
1098      local mode, v, s = parse_label(p, false)
1099      if sz == 4 then
1100	if mode then werror("label does not fit into .long") end
1101	waction("IMMV", 0, p)
1102      elseif mode and mode ~= "A" then
1103	waction("REL_"..mode, v+0x8000, s, 1)
1104      else
1105	if mode == "A" then p = s end
1106	waction("IMMV", 0, format("(unsigned int)(%s)", p))
1107	waction("IMMV", 0, format("(unsigned int)((unsigned long long)(%s)>>32)", p))
1108      end
1109    end
1110    if secpos+2 > maxsecpos then wflush() end
1111  end
1112end
1113map_op[".long_*"] = op_data
1114map_op[".quad_*"] = op_data
1115map_op[".addr_*"] = op_data
1116
1117-- Alignment pseudo-opcode.
1118map_op[".align_1"] = function(params)
1119  if not params then return "numpow2" end
1120  if secpos+1 > maxsecpos then wflush() end
1121  local align = tonumber(params[1])
1122  if align then
1123    local x = align
1124    -- Must be a power of 2 in the range (2 ... 256).
1125    for i=1,8 do
1126      x = x / 2
1127      if x == 1 then
1128	waction("ALIGN", align-1, nil, 1) -- Action byte is 2**n-1.
1129	return
1130      end
1131    end
1132  end
1133  werror("bad alignment")
1134end
1135
1136------------------------------------------------------------------------------
1137
1138-- Pseudo-opcode for (primitive) type definitions (map to C types).
1139map_op[".type_3"] = function(params, nparams)
1140  if not params then
1141    return nparams == 2 and "name, ctype" or "name, ctype, reg"
1142  end
1143  local name, ctype, reg = params[1], params[2], params[3]
1144  if not match(name, "^[%a_][%w_]*$") then
1145    werror("bad type name `"..name.."'")
1146  end
1147  local tp = map_type[name]
1148  if tp then
1149    werror("duplicate type `"..name.."'")
1150  end
1151  -- Add #type to defines. A bit unclean to put it in map_archdef.
1152  map_archdef["#"..name] = "sizeof("..ctype..")"
1153  -- Add new type and emit shortcut define.
1154  local num = ctypenum + 1
1155  map_type[name] = {
1156    ctype = ctype,
1157    ctypefmt = format("Dt%X(%%s)", num),
1158    reg = reg,
1159  }
1160  wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype))
1161  ctypenum = num
1162end
1163map_op[".type_2"] = map_op[".type_3"]
1164
1165-- Dump type definitions.
1166local function dumptypes(out, lvl)
1167  local t = {}
1168  for name in pairs(map_type) do t[#t+1] = name end
1169  sort(t)
1170  out:write("Type definitions:\n")
1171  for _,name in ipairs(t) do
1172    local tp = map_type[name]
1173    local reg = tp.reg or ""
1174    out:write(format("  %-20s %-20s %s\n", name, tp.ctype, reg))
1175  end
1176  out:write("\n")
1177end
1178
1179------------------------------------------------------------------------------
1180
1181-- Set the current section.
1182function _M.section(num)
1183  waction("SECTION", num)
1184  wflush(true) -- SECTION is a terminal action.
1185end
1186
1187------------------------------------------------------------------------------
1188
1189-- Dump architecture description.
1190function _M.dumparch(out)
1191  out:write(format("DynASM %s version %s, released %s\n\n",
1192    _info.arch, _info.version, _info.release))
1193  dumpactions(out)
1194end
1195
1196-- Dump all user defined elements.
1197function _M.dumpdef(out, lvl)
1198  dumptypes(out, lvl)
1199  dumpglobals(out, lvl)
1200  dumpexterns(out, lvl)
1201end
1202
1203------------------------------------------------------------------------------
1204
1205-- Pass callbacks from/to the DynASM core.
1206function _M.passcb(wl, we, wf, ww)
1207  wline, werror, wfatal, wwarn = wl, we, wf, ww
1208  return wflush
1209end
1210
1211-- Setup the arch-specific module.
1212function _M.setup(arch, opt)
1213  g_arch, g_opt = arch, opt
1214end
1215
1216-- Merge the core maps and the arch-specific maps.
1217function _M.mergemaps(map_coreop, map_def)
1218  setmetatable(map_op, { __index = map_coreop })
1219  setmetatable(map_def, { __index = map_archdef })
1220  return map_op, map_def
1221end
1222
1223return _M
1224
1225------------------------------------------------------------------------------
1226
1227