xref: /PHP-8.0/ext/opcache/jit/libudis86/decode.c (revision dabc28d1)
1 /* udis86 - libudis86/decode.c
2  *
3  * Copyright (c) 2002-2009 Vivek Thampi
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without modification,
7  * are permitted provided that the following conditions are met:
8  *
9  *     * Redistributions of source code must retain the above copyright notice,
10  *       this list of conditions and the following disclaimer.
11  *     * Redistributions in binary form must reproduce the above copyright notice,
12  *       this list of conditions and the following disclaimer in the documentation
13  *       and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17  * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18  * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20  * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
22  * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24  * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 #include "udint.h"
27 #include "types.h"
28 #include "extern.h"
29 #include "decode.h"
30 
31 #ifndef __UD_STANDALONE__
32 # include <string.h>
33 #endif /* __UD_STANDALONE__ */
34 
35 /* The max number of prefixes to an instruction */
36 #define MAX_PREFIXES    15
37 
38 /* rex prefix bits */
39 #define REX_W(r)        ( ( 0xF & ( r ) )  >> 3 )
40 #define REX_R(r)        ( ( 0x7 & ( r ) )  >> 2 )
41 #define REX_X(r)        ( ( 0x3 & ( r ) )  >> 1 )
42 #define REX_B(r)        ( ( 0x1 & ( r ) )  >> 0 )
43 #define REX_PFX_MASK(n) ( ( P_REXW(n) << 3 ) | \
44                           ( P_REXR(n) << 2 ) | \
45                           ( P_REXX(n) << 1 ) | \
46                           ( P_REXB(n) << 0 ) )
47 
48 /* scable-index-base bits */
49 #define SIB_S(b)        ( ( b ) >> 6 )
50 #define SIB_I(b)        ( ( ( b ) >> 3 ) & 7 )
51 #define SIB_B(b)        ( ( b ) & 7 )
52 
53 /* modrm bits */
54 #define MODRM_REG(b)    ( ( ( b ) >> 3 ) & 7 )
55 #define MODRM_NNN(b)    ( ( ( b ) >> 3 ) & 7 )
56 #define MODRM_MOD(b)    ( ( ( b ) >> 6 ) & 3 )
57 #define MODRM_RM(b)     ( ( b ) & 7 )
58 
59 static int decode_ext(struct ud *u, uint16_t ptr);
60 static int decode_opcode(struct ud *u);
61 
62 enum reg_class { /* register classes */
63   REGCLASS_GPR,
64   REGCLASS_MMX,
65   REGCLASS_CR,
66   REGCLASS_DB,
67   REGCLASS_SEG,
68   REGCLASS_XMM
69 };
70 
71  /*
72  * inp_start
73  *    Should be called before each de-code operation.
74  */
75 static void
inp_start(struct ud * u)76 inp_start(struct ud *u)
77 {
78   u->inp_ctr = 0;
79 }
80 
81 static uint8_t
inp_peek(struct ud * u)82 inp_peek(struct ud *u)
83 {
84   if (u->inp_end == 0) {
85     if (u->inp_buf != NULL) {
86       if (u->inp_buf_index < u->inp_buf_size) {
87         return u->inp_buf[u->inp_buf_index];
88       }
89     } else if (u->inp_peek != UD_EOI) {
90       return u->inp_peek;
91     } else {
92       int c;
93       if ((c = u->inp_hook(u)) != UD_EOI) {
94         u->inp_peek = c;
95         return u->inp_peek;
96       }
97     }
98   }
99   u->inp_end = 1;
100   UDERR(u, "byte expected, eoi received\n");
101   return 0;
102 }
103 
104 static uint8_t
inp_next(struct ud * u)105 inp_next(struct ud *u)
106 {
107   if (u->inp_end == 0) {
108     if (u->inp_buf != NULL) {
109       if (u->inp_buf_index < u->inp_buf_size) {
110         u->inp_ctr++;
111         return (u->inp_curr = u->inp_buf[u->inp_buf_index++]);
112       }
113     } else {
114       int c = u->inp_peek;
115       if (c != UD_EOI || (c = u->inp_hook(u)) != UD_EOI) {
116         u->inp_peek = UD_EOI;
117         u->inp_curr = c;
118         u->inp_sess[u->inp_ctr++] = u->inp_curr;
119         return u->inp_curr;
120       }
121     }
122   }
123   u->inp_end = 1;
124   UDERR(u, "byte expected, eoi received\n");
125   return 0;
126 }
127 
128 static uint8_t
inp_curr(struct ud * u)129 inp_curr(struct ud *u)
130 {
131   return u->inp_curr;
132 }
133 
134 
135 /*
136  * inp_uint8
137  * int_uint16
138  * int_uint32
139  * int_uint64
140  *    Load little-endian values from input
141  */
142 static uint8_t
inp_uint8(struct ud * u)143 inp_uint8(struct ud* u)
144 {
145   return inp_next(u);
146 }
147 
148 static uint16_t
inp_uint16(struct ud * u)149 inp_uint16(struct ud* u)
150 {
151   uint16_t r, ret;
152 
153   ret = inp_next(u);
154   r = inp_next(u);
155   return ret | (r << 8);
156 }
157 
158 static uint32_t
inp_uint32(struct ud * u)159 inp_uint32(struct ud* u)
160 {
161   uint32_t r, ret;
162 
163   ret = inp_next(u);
164   r = inp_next(u);
165   ret = ret | (r << 8);
166   r = inp_next(u);
167   ret = ret | (r << 16);
168   r = inp_next(u);
169   return ret | (r << 24);
170 }
171 
172 static uint64_t
inp_uint64(struct ud * u)173 inp_uint64(struct ud* u)
174 {
175   uint64_t r, ret;
176 
177   ret = inp_next(u);
178   r = inp_next(u);
179   ret = ret | (r << 8);
180   r = inp_next(u);
181   ret = ret | (r << 16);
182   r = inp_next(u);
183   ret = ret | (r << 24);
184   r = inp_next(u);
185   ret = ret | (r << 32);
186   r = inp_next(u);
187   ret = ret | (r << 40);
188   r = inp_next(u);
189   ret = ret | (r << 48);
190   r = inp_next(u);
191   return ret | (r << 56);
192 }
193 
194 
195 static UD_INLINE int
eff_opr_mode(int dis_mode,int rex_w,int pfx_opr)196 eff_opr_mode(int dis_mode, int rex_w, int pfx_opr)
197 {
198   if (dis_mode == 64) {
199     return rex_w ? 64 : (pfx_opr ? 16 : 32);
200   } else if (dis_mode == 32) {
201     return pfx_opr ? 16 : 32;
202   } else {
203     UD_ASSERT(dis_mode == 16);
204     return pfx_opr ? 32 : 16;
205   }
206 }
207 
208 
209 static UD_INLINE int
eff_adr_mode(int dis_mode,int pfx_adr)210 eff_adr_mode(int dis_mode, int pfx_adr)
211 {
212   if (dis_mode == 64) {
213     return pfx_adr ? 32 : 64;
214   } else if (dis_mode == 32) {
215     return pfx_adr ? 16 : 32;
216   } else {
217     UD_ASSERT(dis_mode == 16);
218     return pfx_adr ? 32 : 16;
219   }
220 }
221 
222 
223 /*
224  * decode_prefixes
225  *
226  *  Extracts instruction prefixes.
227  */
228 static int
decode_prefixes(struct ud * u)229 decode_prefixes(struct ud *u)
230 {
231   int done = 0;
232   uint8_t curr = 0, last = 0;
233   UD_RETURN_ON_ERROR(u);
234 
235   do {
236     last = curr;
237     curr = inp_next(u);
238     UD_RETURN_ON_ERROR(u);
239     if (u->inp_ctr == MAX_INSN_LENGTH) {
240       UD_RETURN_WITH_ERROR(u, "max instruction length");
241     }
242 
243     switch (curr)
244     {
245     case 0x2E:
246       u->pfx_seg = UD_R_CS;
247       break;
248     case 0x36:
249       u->pfx_seg = UD_R_SS;
250       break;
251     case 0x3E:
252       u->pfx_seg = UD_R_DS;
253       break;
254     case 0x26:
255       u->pfx_seg = UD_R_ES;
256       break;
257     case 0x64:
258       u->pfx_seg = UD_R_FS;
259       break;
260     case 0x65:
261       u->pfx_seg = UD_R_GS;
262       break;
263     case 0x67: /* adress-size override prefix */
264       u->pfx_adr = 0x67;
265       break;
266     case 0xF0:
267       u->pfx_lock = 0xF0;
268       break;
269     case 0x66:
270       u->pfx_opr = 0x66;
271       break;
272     case 0xF2:
273       u->pfx_str = 0xf2;
274       break;
275     case 0xF3:
276       u->pfx_str = 0xf3;
277       break;
278     default:
279       /* consume if rex */
280       done = (u->dis_mode == 64 && (curr & 0xF0) == 0x40) ? 0 : 1;
281       break;
282     }
283   } while (!done);
284   /* rex prefixes in 64bit mode, must be the last prefix */
285   if (u->dis_mode == 64 && (last & 0xF0) == 0x40) {
286     u->pfx_rex = last;
287   }
288   return 0;
289 }
290 
291 
292 /*
293  * vex_l, vex_w
294  *  Return the vex.L and vex.W bits
295  */
296 static UD_INLINE uint8_t
vex_l(const struct ud * u)297 vex_l(const struct ud *u)
298 {
299   UD_ASSERT(u->vex_op != 0);
300   return ((u->vex_op == 0xc4 ? u->vex_b2 : u->vex_b1) >> 2) & 1;
301 }
302 
303 static UD_INLINE uint8_t
vex_w(const struct ud * u)304 vex_w(const struct ud *u)
305 {
306   UD_ASSERT(u->vex_op != 0);
307   return u->vex_op == 0xc4 ? ((u->vex_b2 >> 7) & 1) : 0;
308 }
309 
310 
311 static UD_INLINE uint8_t
modrm(struct ud * u)312 modrm(struct ud * u)
313 {
314     if ( !u->have_modrm ) {
315         u->modrm = inp_next( u );
316         u->modrm_offset = (uint8_t) (u->inp_ctr - 1);
317         u->have_modrm = 1;
318     }
319     return u->modrm;
320 }
321 
322 
323 static unsigned int
resolve_operand_size(const struct ud * u,ud_operand_size_t osize)324 resolve_operand_size(const struct ud* u, ud_operand_size_t osize)
325 {
326   switch (osize) {
327   case SZ_V:
328     return u->opr_mode;
329   case SZ_Z:
330     return u->opr_mode == 16 ? 16 : 32;
331   case SZ_Y:
332     return u->opr_mode == 16 ? 32 : u->opr_mode;
333   case SZ_RDQ:
334     return u->dis_mode == 64 ? 64 : 32;
335   case SZ_X:
336     UD_ASSERT(u->vex_op != 0);
337     return (P_VEXL(u->itab_entry->prefix) && vex_l(u)) ?  SZ_QQ : SZ_DQ;
338   default:
339     return osize;
340   }
341 }
342 
343 
resolve_mnemonic(struct ud * u)344 static int resolve_mnemonic( struct ud* u )
345 {
346   /* resolve 3dnow weirdness. */
347   if ( u->mnemonic == UD_I3dnow ) {
348     u->mnemonic = ud_itab[ u->le->table[ inp_curr( u )  ] ].mnemonic;
349   }
350   /* SWAPGS is only valid in 64bits mode */
351   if ( u->mnemonic == UD_Iswapgs && u->dis_mode != 64 ) {
352     UDERR(u, "swapgs invalid in 64bits mode\n");
353     return -1;
354   }
355 
356   if (u->mnemonic == UD_Ixchg) {
357     if ((u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_AX  &&
358          u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_AX) ||
359         (u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_EAX &&
360          u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_EAX)) {
361       u->operand[0].type = UD_NONE;
362       u->operand[1].type = UD_NONE;
363       u->mnemonic = UD_Inop;
364     }
365   }
366 
367   if (u->mnemonic == UD_Inop && u->pfx_repe) {
368     u->pfx_repe = 0;
369     u->mnemonic = UD_Ipause;
370   }
371   return 0;
372 }
373 
374 
375 /* -----------------------------------------------------------------------------
376  * decode_a()- Decodes operands of the type seg:offset
377  * -----------------------------------------------------------------------------
378  */
379 static void
decode_a(struct ud * u,struct ud_operand * op)380 decode_a(struct ud* u, struct ud_operand *op)
381 {
382   if (u->opr_mode == 16) {
383     /* seg16:off16 */
384     op->type = UD_OP_PTR;
385     op->size = 32;
386     op->lval.ptr.off = inp_uint16(u);
387     op->lval.ptr.seg = inp_uint16(u);
388   } else {
389     /* seg16:off32 */
390     op->type = UD_OP_PTR;
391     op->size = 48;
392     op->lval.ptr.off = inp_uint32(u);
393     op->lval.ptr.seg = inp_uint16(u);
394   }
395 }
396 
397 /* -----------------------------------------------------------------------------
398  * decode_gpr() - Returns decoded General Purpose Register
399  * -----------------------------------------------------------------------------
400  */
401 static enum ud_type
decode_gpr(register struct ud * u,unsigned int s,unsigned char rm)402 decode_gpr(register struct ud* u, unsigned int s, unsigned char rm)
403 {
404   switch (s) {
405     case 64:
406         return UD_R_RAX + rm;
407     case 32:
408         return UD_R_EAX + rm;
409     case 16:
410         return UD_R_AX  + rm;
411     case  8:
412         if (u->dis_mode == 64 && u->pfx_rex) {
413             if (rm >= 4)
414                 return UD_R_SPL + (rm-4);
415             return UD_R_AL + rm;
416         } else return UD_R_AL + rm;
417     case 0:
418         /* invalid size in case of a decode error */
419         UD_ASSERT(u->error);
420         return UD_NONE;
421     default:
422         UD_ASSERT(!"invalid operand size");
423         return UD_NONE;
424   }
425 }
426 
427 static void
decode_reg(struct ud * u,struct ud_operand * opr,int type,int num,int size)428 decode_reg(struct ud *u,
429            struct ud_operand *opr,
430            int type,
431            int num,
432            int size)
433 {
434   int reg;
435   size = resolve_operand_size(u, size);
436   switch (type) {
437     case REGCLASS_GPR : reg = decode_gpr(u, size, num); break;
438     case REGCLASS_MMX : reg = UD_R_MM0  + (num & 7); break;
439     case REGCLASS_XMM :
440       reg = num + (size == SZ_QQ ? UD_R_YMM0 : UD_R_XMM0);
441       break;
442     case REGCLASS_CR : reg = UD_R_CR0  + num; break;
443     case REGCLASS_DB : reg = UD_R_DR0  + num; break;
444     case REGCLASS_SEG : {
445       /*
446        * Only 6 segment registers, anything else is an error.
447        */
448       if ((num & 7) > 5) {
449         UDERR(u, "invalid segment register value\n");
450         return;
451       } else {
452         reg = UD_R_ES + (num & 7);
453       }
454       break;
455     }
456     default:
457       UD_ASSERT(!"invalid register type");
458       return;
459   }
460   opr->type = UD_OP_REG;
461   opr->base = reg;
462   opr->size = size;
463 }
464 
465 
466 /*
467  * decode_imm
468  *
469  *    Decode Immediate values.
470  */
471 static void
decode_imm(struct ud * u,unsigned int size,struct ud_operand * op)472 decode_imm(struct ud* u, unsigned int size, struct ud_operand *op)
473 {
474   op->size = resolve_operand_size(u, size);
475   op->type = UD_OP_IMM;
476 
477   switch (op->size) {
478   case  8: op->lval.sbyte = inp_uint8(u);   break;
479   case 16: op->lval.uword = inp_uint16(u);  break;
480   case 32: op->lval.udword = inp_uint32(u); break;
481   case 64: op->lval.uqword = inp_uint64(u); break;
482   default: return;
483   }
484 }
485 
486 
487 /*
488  * decode_mem_disp
489  *
490  *    Decode mem address displacement.
491  */
492 static void
decode_mem_disp(struct ud * u,unsigned int size,struct ud_operand * op)493 decode_mem_disp(struct ud* u, unsigned int size, struct ud_operand *op)
494 {
495   switch (size) {
496   case 8:
497     op->offset = 8;
498     op->lval.ubyte  = inp_uint8(u);
499     break;
500   case 16:
501     op->offset = 16;
502     op->lval.uword  = inp_uint16(u);
503     break;
504   case 32:
505     op->offset = 32;
506     op->lval.udword = inp_uint32(u);
507     break;
508   case 64:
509     op->offset = 64;
510     op->lval.uqword = inp_uint64(u);
511     break;
512   default:
513       return;
514   }
515 }
516 
517 
518 /*
519  * decode_modrm_reg
520  *
521  *    Decodes reg field of mod/rm byte
522  *
523  */
524 static UD_INLINE void
decode_modrm_reg(struct ud * u,struct ud_operand * operand,unsigned int type,unsigned int size)525 decode_modrm_reg(struct ud         *u,
526                  struct ud_operand *operand,
527                  unsigned int       type,
528                  unsigned int       size)
529 {
530   uint8_t reg = (REX_R(u->_rex) << 3) | MODRM_REG(modrm(u));
531   decode_reg(u, operand, type, reg, size);
532 }
533 
534 
535 /*
536  * decode_modrm_rm
537  *
538  *    Decodes rm field of mod/rm byte
539  *
540  */
541 static void
decode_modrm_rm(struct ud * u,struct ud_operand * op,unsigned char type,unsigned int size)542 decode_modrm_rm(struct ud         *u,
543                 struct ud_operand *op,
544                 unsigned char      type,    /* register type */
545                 unsigned int       size)    /* operand size */
546 
547 {
548   size_t offset = 0;
549   unsigned char mod, rm;
550 
551   /* get mod, r/m and reg fields */
552   mod = MODRM_MOD(modrm(u));
553   rm  = (REX_B(u->_rex) << 3) | MODRM_RM(modrm(u));
554 
555   /*
556    * If mod is 11b, then the modrm.rm specifies a register.
557    *
558    */
559   if (mod == 3) {
560     decode_reg(u, op, type, rm, size);
561     return;
562   }
563 
564   /*
565    * !11b => Memory Address
566    */
567   op->type = UD_OP_MEM;
568   op->size = resolve_operand_size(u, size);
569 
570   if (u->adr_mode == 64) {
571     op->base = UD_R_RAX + rm;
572     if (mod == 1) {
573       offset = 8;
574     } else if (mod == 2) {
575       offset = 32;
576     } else if (mod == 0 && (rm & 7) == 5) {
577       op->base = UD_R_RIP;
578       offset = 32;
579     } else {
580       offset = 0;
581     }
582     /*
583      * Scale-Index-Base (SIB)
584      */
585     if ((rm & 7) == 4) {
586       inp_next(u);
587 
588       op->base  = UD_R_RAX + (SIB_B(inp_curr(u)) | (REX_B(u->_rex) << 3));
589       op->index = UD_R_RAX + (SIB_I(inp_curr(u)) | (REX_X(u->_rex) << 3));
590       /* special conditions for base reference */
591       if (op->index == UD_R_RSP) {
592         op->index = UD_NONE;
593         op->scale = UD_NONE;
594       } else {
595         op->scale = (1 << SIB_S(inp_curr(u))) & ~1;
596       }
597 
598       if (op->base == UD_R_RBP || op->base == UD_R_R13) {
599         if (mod == 0) {
600           op->base = UD_NONE;
601         }
602         if (mod == 1) {
603           offset = 8;
604         } else {
605           offset = 32;
606         }
607       }
608     } else {
609         op->scale = UD_NONE;
610         op->index = UD_NONE;
611     }
612   } else if (u->adr_mode == 32) {
613     op->base = UD_R_EAX + rm;
614     if (mod == 1) {
615       offset = 8;
616     } else if (mod == 2) {
617       offset = 32;
618     } else if (mod == 0 && rm == 5) {
619       op->base = UD_NONE;
620       offset = 32;
621     } else {
622       offset = 0;
623     }
624 
625     /* Scale-Index-Base (SIB) */
626     if ((rm & 7) == 4) {
627       inp_next(u);
628 
629       op->scale = (1 << SIB_S(inp_curr(u))) & ~1;
630       op->index = UD_R_EAX + (SIB_I(inp_curr(u)) | (REX_X(u->pfx_rex) << 3));
631       op->base  = UD_R_EAX + (SIB_B(inp_curr(u)) | (REX_B(u->pfx_rex) << 3));
632 
633       if (op->index == UD_R_ESP) {
634         op->index = UD_NONE;
635         op->scale = UD_NONE;
636       }
637 
638       /* special condition for base reference */
639       if (op->base == UD_R_EBP) {
640         if (mod == 0) {
641           op->base = UD_NONE;
642         }
643         if (mod == 1) {
644           offset = 8;
645         } else {
646           offset = 32;
647         }
648       }
649     } else {
650       op->scale = UD_NONE;
651       op->index = UD_NONE;
652     }
653   } else {
654     const unsigned int bases[]   = { UD_R_BX, UD_R_BX, UD_R_BP, UD_R_BP,
655                                      UD_R_SI, UD_R_DI, UD_R_BP, UD_R_BX };
656     const unsigned int indices[] = { UD_R_SI, UD_R_DI, UD_R_SI, UD_R_DI,
657                                      UD_NONE, UD_NONE, UD_NONE, UD_NONE };
658     op->base  = bases[rm & 7];
659     op->index = indices[rm & 7];
660     op->scale = UD_NONE;
661     if (mod == 0 && rm == 6) {
662       offset = 16;
663       op->base = UD_NONE;
664     } else if (mod == 1) {
665       offset = 8;
666     } else if (mod == 2) {
667       offset = 16;
668     }
669   }
670 
671   if (offset) {
672     decode_mem_disp(u, offset, op);
673   } else {
674     op->offset = 0;
675   }
676 }
677 
678 
679 /*
680  * decode_moffset
681  *    Decode offset-only memory operand
682  */
683 static void
decode_moffset(struct ud * u,unsigned int size,struct ud_operand * opr)684 decode_moffset(struct ud *u, unsigned int size, struct ud_operand *opr)
685 {
686   opr->type  = UD_OP_MEM;
687   opr->base  = UD_NONE;
688   opr->index = UD_NONE;
689   opr->scale = UD_NONE;
690   opr->size  = resolve_operand_size(u, size);
691   decode_mem_disp(u, u->adr_mode, opr);
692 }
693 
694 
695 static void
decode_vex_vvvv(struct ud * u,struct ud_operand * opr,unsigned size)696 decode_vex_vvvv(struct ud *u, struct ud_operand *opr, unsigned size)
697 {
698   uint8_t vvvv;
699   UD_ASSERT(u->vex_op != 0);
700   vvvv = ((u->vex_op == 0xc4 ? u->vex_b2 : u->vex_b1) >> 3) & 0xf;
701   decode_reg(u, opr, REGCLASS_XMM, (0xf & ~vvvv), size);
702 }
703 
704 
705 /*
706  * decode_vex_immreg
707  *    Decode source operand encoded in immediate byte [7:4]
708  */
709 static int
decode_vex_immreg(struct ud * u,struct ud_operand * opr,unsigned size)710 decode_vex_immreg(struct ud *u, struct ud_operand *opr, unsigned size)
711 {
712   uint8_t imm  = inp_next(u);
713   uint8_t mask = u->dis_mode == 64 ? 0xf : 0x7;
714   UD_RETURN_ON_ERROR(u);
715   UD_ASSERT(u->vex_op != 0);
716   decode_reg(u, opr, REGCLASS_XMM, mask & (imm >> 4), size);
717   return 0;
718 }
719 
720 
721 /*
722  * decode_operand
723  *
724  *      Decodes a single operand.
725  *      Returns the type of the operand (UD_NONE if none)
726  */
727 static int
decode_operand(struct ud * u,struct ud_operand * operand,enum ud_operand_code type,unsigned int size)728 decode_operand(struct ud           *u,
729                struct ud_operand   *operand,
730                enum ud_operand_code type,
731                unsigned int         size)
732 {
733   operand->type = UD_NONE;
734   operand->_oprcode = type;
735 
736   switch (type) {
737     case OP_A :
738       decode_a(u, operand);
739       break;
740     case OP_MR:
741       decode_modrm_rm(u, operand, REGCLASS_GPR,
742                       MODRM_MOD(modrm(u)) == 3 ?
743                         Mx_reg_size(size) : Mx_mem_size(size));
744       break;
745     case OP_F:
746       u->br_far  = 1;
747       /* intended fall through */
748     case OP_M:
749       if (MODRM_MOD(modrm(u)) == 3) {
750         UDERR(u, "expected modrm.mod != 3\n");
751       }
752       /* intended fall through */
753     case OP_E:
754       decode_modrm_rm(u, operand, REGCLASS_GPR, size);
755       break;
756     case OP_G:
757       decode_modrm_reg(u, operand, REGCLASS_GPR, size);
758       break;
759     case OP_sI:
760     case OP_I:
761       decode_imm(u, size, operand);
762       break;
763     case OP_I1:
764       operand->type = UD_OP_CONST;
765       operand->lval.udword = 1;
766       break;
767     case OP_N:
768       if (MODRM_MOD(modrm(u)) != 3) {
769         UDERR(u, "expected modrm.mod == 3\n");
770       }
771       /* intended fall through */
772     case OP_Q:
773       decode_modrm_rm(u, operand, REGCLASS_MMX, size);
774       break;
775     case OP_P:
776       decode_modrm_reg(u, operand, REGCLASS_MMX, size);
777       break;
778     case OP_U:
779       if (MODRM_MOD(modrm(u)) != 3) {
780         UDERR(u, "expected modrm.mod == 3\n");
781       }
782       /* intended fall through */
783     case OP_W:
784       decode_modrm_rm(u, operand, REGCLASS_XMM, size);
785       break;
786     case OP_V:
787       decode_modrm_reg(u, operand, REGCLASS_XMM, size);
788       break;
789     case OP_H:
790       decode_vex_vvvv(u, operand, size);
791       break;
792     case OP_MU:
793       decode_modrm_rm(u, operand, REGCLASS_XMM,
794                       MODRM_MOD(modrm(u)) == 3 ?
795                         Mx_reg_size(size) : Mx_mem_size(size));
796       break;
797     case OP_S:
798       decode_modrm_reg(u, operand, REGCLASS_SEG, size);
799       break;
800     case OP_O:
801       decode_moffset(u, size, operand);
802       break;
803     case OP_R0:
804     case OP_R1:
805     case OP_R2:
806     case OP_R3:
807     case OP_R4:
808     case OP_R5:
809     case OP_R6:
810     case OP_R7:
811       decode_reg(u, operand, REGCLASS_GPR,
812                  (REX_B(u->_rex) << 3) | (type - OP_R0), size);
813       break;
814     case OP_AL:
815     case OP_AX:
816     case OP_eAX:
817     case OP_rAX:
818       decode_reg(u, operand, REGCLASS_GPR, 0, size);
819       break;
820     case OP_CL:
821     case OP_CX:
822     case OP_eCX:
823       decode_reg(u, operand, REGCLASS_GPR, 1, size);
824       break;
825     case OP_DL:
826     case OP_DX:
827     case OP_eDX:
828       decode_reg(u, operand, REGCLASS_GPR, 2, size);
829       break;
830     case OP_ES:
831     case OP_CS:
832     case OP_DS:
833     case OP_SS:
834     case OP_FS:
835     case OP_GS:
836       /* in 64bits mode, only fs and gs are allowed */
837       if (u->dis_mode == 64) {
838         if (type != OP_FS && type != OP_GS) {
839           UDERR(u, "invalid segment register in 64bits\n");
840         }
841       }
842       operand->type = UD_OP_REG;
843       operand->base = (type - OP_ES) + UD_R_ES;
844       operand->size = 16;
845       break;
846     case OP_J :
847       decode_imm(u, size, operand);
848       operand->type = UD_OP_JIMM;
849       break ;
850     case OP_R :
851       if (MODRM_MOD(modrm(u)) != 3) {
852         UDERR(u, "expected modrm.mod == 3\n");
853       }
854       decode_modrm_rm(u, operand, REGCLASS_GPR, size);
855       break;
856     case OP_C:
857       decode_modrm_reg(u, operand, REGCLASS_CR, size);
858       break;
859     case OP_D:
860       decode_modrm_reg(u, operand, REGCLASS_DB, size);
861       break;
862     case OP_I3 :
863       operand->type = UD_OP_CONST;
864       operand->lval.sbyte = 3;
865       break;
866     case OP_ST0:
867     case OP_ST1:
868     case OP_ST2:
869     case OP_ST3:
870     case OP_ST4:
871     case OP_ST5:
872     case OP_ST6:
873     case OP_ST7:
874       operand->type = UD_OP_REG;
875       operand->base = (type - OP_ST0) + UD_R_ST0;
876       operand->size = 80;
877       break;
878     case OP_L:
879       decode_vex_immreg(u, operand, size);
880       break;
881     default :
882       operand->type = UD_NONE;
883       break;
884   }
885   return operand->type;
886 }
887 
888 
889 /*
890  * decode_operands
891  *
892  *    Disassemble up to 3 operands of the current instruction being
893  *    disassembled. By the end of the function, the operand fields
894  *    of the ud structure will have been filled.
895  */
896 static int
decode_operands(struct ud * u)897 decode_operands(struct ud* u)
898 {
899   decode_operand(u, &u->operand[0],
900                     u->itab_entry->operand1.type,
901                     u->itab_entry->operand1.size);
902   if (u->operand[0].type != UD_NONE) {
903       decode_operand(u, &u->operand[1],
904                         u->itab_entry->operand2.type,
905                         u->itab_entry->operand2.size);
906   }
907   if (u->operand[1].type != UD_NONE) {
908       decode_operand(u, &u->operand[2],
909                         u->itab_entry->operand3.type,
910                         u->itab_entry->operand3.size);
911   }
912   if (u->operand[2].type != UD_NONE) {
913       decode_operand(u, &u->operand[3],
914                         u->itab_entry->operand4.type,
915                         u->itab_entry->operand4.size);
916   }
917   return 0;
918 }
919 
920 /* -----------------------------------------------------------------------------
921  * clear_insn() - clear instruction structure
922  * -----------------------------------------------------------------------------
923  */
924 static void
clear_insn(register struct ud * u)925 clear_insn(register struct ud* u)
926 {
927   u->error     = 0;
928   u->pfx_seg   = 0;
929   u->pfx_opr   = 0;
930   u->pfx_adr   = 0;
931   u->pfx_lock  = 0;
932   u->pfx_repne = 0;
933   u->pfx_rep   = 0;
934   u->pfx_repe  = 0;
935   u->pfx_rex   = 0;
936   u->pfx_str   = 0;
937   u->mnemonic  = UD_Inone;
938   u->itab_entry = NULL;
939   u->have_modrm = 0;
940   u->br_far    = 0;
941   u->vex_op    = 0;
942   u->_rex      = 0;
943   u->operand[0].type = UD_NONE;
944   u->operand[1].type = UD_NONE;
945   u->operand[2].type = UD_NONE;
946   u->operand[3].type = UD_NONE;
947 }
948 
949 
950 static UD_INLINE int
resolve_pfx_str(struct ud * u)951 resolve_pfx_str(struct ud* u)
952 {
953   if (u->pfx_str == 0xf3) {
954     if (P_STR(u->itab_entry->prefix)) {
955         u->pfx_rep  = 0xf3;
956     } else {
957         u->pfx_repe = 0xf3;
958     }
959   } else if (u->pfx_str == 0xf2) {
960     u->pfx_repne = 0xf3;
961   }
962   return 0;
963 }
964 
965 
966 static int
resolve_mode(struct ud * u)967 resolve_mode( struct ud* u )
968 {
969   int default64;
970   /* if in error state, bail out */
971   if ( u->error ) return -1;
972 
973   /* propagate prefix effects */
974   if ( u->dis_mode == 64 ) {  /* set 64bit-mode flags */
975 
976     /* Check validity of  instruction m64 */
977     if ( P_INV64( u->itab_entry->prefix ) ) {
978       UDERR(u, "instruction invalid in 64bits\n");
979       return -1;
980     }
981 
982     /* compute effective rex based on,
983      *  - vex prefix (if any)
984      *  - rex prefix (if any, and not vex)
985      *  - allowed prefixes specified by the opcode map
986      */
987     if (u->vex_op == 0xc4) {
988         /* vex has rex.rxb in 1's complement */
989         u->_rex = ((~(u->vex_b1 >> 5) & 0x7) /* rex.0rxb */ |
990                    ((u->vex_b2  >> 4) & 0x8) /* rex.w000 */);
991     } else if (u->vex_op == 0xc5) {
992         /* vex has rex.r in 1's complement */
993         u->_rex = (~(u->vex_b1 >> 5)) & 4;
994     } else {
995         UD_ASSERT(u->vex_op == 0);
996         u->_rex = u->pfx_rex;
997     }
998     u->_rex &= REX_PFX_MASK(u->itab_entry->prefix);
999 
1000     /* whether this instruction has a default operand size of
1001      * 64bit, also hardcoded into the opcode map.
1002      */
1003     default64 = P_DEF64( u->itab_entry->prefix );
1004     /* calculate effective operand size */
1005     if (REX_W(u->_rex)) {
1006         u->opr_mode = 64;
1007     } else if ( u->pfx_opr ) {
1008         u->opr_mode = 16;
1009     } else {
1010         /* unless the default opr size of instruction is 64,
1011          * the effective operand size in the absence of rex.w
1012          * prefix is 32.
1013          */
1014         u->opr_mode = default64 ? 64 : 32;
1015     }
1016 
1017     /* calculate effective address size */
1018     u->adr_mode = (u->pfx_adr) ? 32 : 64;
1019   } else if ( u->dis_mode == 32 ) { /* set 32bit-mode flags */
1020     u->opr_mode = ( u->pfx_opr ) ? 16 : 32;
1021     u->adr_mode = ( u->pfx_adr ) ? 16 : 32;
1022   } else if ( u->dis_mode == 16 ) { /* set 16bit-mode flags */
1023     u->opr_mode = ( u->pfx_opr ) ? 32 : 16;
1024     u->adr_mode = ( u->pfx_adr ) ? 32 : 16;
1025   }
1026 
1027   return 0;
1028 }
1029 
1030 
1031 static UD_INLINE int
decode_insn(struct ud * u,uint16_t ptr)1032 decode_insn(struct ud *u, uint16_t ptr)
1033 {
1034   UD_ASSERT((ptr & 0x8000) == 0);
1035   u->itab_entry = &ud_itab[ ptr ];
1036   u->mnemonic = u->itab_entry->mnemonic;
1037   return (resolve_pfx_str(u)  == 0 &&
1038           resolve_mode(u)     == 0 &&
1039           decode_operands(u)  == 0 &&
1040           resolve_mnemonic(u) == 0) ? 0 : -1;
1041 }
1042 
1043 
1044 /*
1045  * decode_3dnow()
1046  *
1047  *    Decoding 3dnow is a little tricky because of its strange opcode
1048  *    structure. The final opcode disambiguation depends on the last
1049  *    byte that comes after the operands have been decoded. Fortunately,
1050  *    all 3dnow instructions have the same set of operand types. So we
1051  *    go ahead and decode the instruction by picking an arbitrarily chosen
1052  *    valid entry in the table, decode the operands, and read the final
1053  *    byte to resolve the menmonic.
1054  */
1055 static UD_INLINE int
decode_3dnow(struct ud * u)1056 decode_3dnow(struct ud* u)
1057 {
1058   uint16_t ptr;
1059   UD_ASSERT(u->le->type == UD_TAB__OPC_3DNOW);
1060   UD_ASSERT(u->le->table[0xc] != 0);
1061   decode_insn(u, u->le->table[0xc]);
1062   inp_next(u);
1063   if (u->error) {
1064     return -1;
1065   }
1066   ptr = u->le->table[inp_curr(u)];
1067   UD_ASSERT((ptr & 0x8000) == 0);
1068   u->mnemonic = ud_itab[ptr].mnemonic;
1069   return 0;
1070 }
1071 
1072 
1073 static int
decode_ssepfx(struct ud * u)1074 decode_ssepfx(struct ud *u)
1075 {
1076   uint8_t idx;
1077   uint8_t pfx;
1078 
1079   /*
1080    * String prefixes (f2, f3) take precedence over operand
1081    * size prefix (66).
1082    */
1083   pfx = u->pfx_str;
1084   if (pfx == 0) {
1085     pfx = u->pfx_opr;
1086   }
1087   idx = ((pfx & 0xf) + 1) / 2;
1088   if (u->le->table[idx] == 0) {
1089     idx = 0;
1090   }
1091   if (idx && u->le->table[idx] != 0) {
1092     /*
1093      * "Consume" the prefix as a part of the opcode, so it is no
1094      * longer exported as an instruction prefix.
1095      */
1096     u->pfx_str = 0;
1097     if (pfx == 0x66) {
1098         /*
1099          * consume "66" only if it was used for decoding, leaving
1100          * it to be used as an operands size override for some
1101          * simd instructions.
1102          */
1103         u->pfx_opr = 0;
1104     }
1105   }
1106   return decode_ext(u, u->le->table[idx]);
1107 }
1108 
1109 
1110 static int
decode_vex(struct ud * u)1111 decode_vex(struct ud *u)
1112 {
1113   uint8_t index;
1114   if (u->dis_mode != 64 && MODRM_MOD(inp_peek(u)) != 0x3) {
1115     index = 0;
1116   } else {
1117     u->vex_op = inp_curr(u);
1118     u->vex_b1 = inp_next(u);
1119     if (u->vex_op == 0xc4) {
1120       uint8_t pp, m;
1121       /* 3-byte vex */
1122       u->vex_b2 = inp_next(u);
1123       UD_RETURN_ON_ERROR(u);
1124       m  = u->vex_b1 & 0x1f;
1125       if (m == 0 || m > 3) {
1126         UD_RETURN_WITH_ERROR(u, "reserved vex.m-mmmm value");
1127       }
1128       pp = u->vex_b2 & 0x3;
1129       index = (pp << 2) | m;
1130     } else {
1131       /* 2-byte vex */
1132       UD_ASSERT(u->vex_op == 0xc5);
1133       index = 0x1 | ((u->vex_b1 & 0x3) << 2);
1134     }
1135   }
1136   return decode_ext(u, u->le->table[index]);
1137 }
1138 
1139 
1140 /*
1141  * decode_ext()
1142  *
1143  *    Decode opcode extensions (if any)
1144  */
1145 static int
decode_ext(struct ud * u,uint16_t ptr)1146 decode_ext(struct ud *u, uint16_t ptr)
1147 {
1148   uint8_t idx = 0;
1149   if ((ptr & 0x8000) == 0) {
1150     return decode_insn(u, ptr);
1151   }
1152   u->le = &ud_lookup_table_list[(~0x8000 & ptr)];
1153   if (u->le->type == UD_TAB__OPC_3DNOW) {
1154     return decode_3dnow(u);
1155   }
1156 
1157   switch (u->le->type) {
1158     case UD_TAB__OPC_MOD:
1159       /* !11 = 0, 11 = 1 */
1160       idx = (MODRM_MOD(modrm(u)) + 1) / 4;
1161       break;
1162       /* disassembly mode/operand size/address size based tables.
1163        * 16 = 0,, 32 = 1, 64 = 2
1164        */
1165     case UD_TAB__OPC_MODE:
1166       idx = u->dis_mode != 64 ? 0 : 1;
1167       break;
1168     case UD_TAB__OPC_OSIZE:
1169       idx = eff_opr_mode(u->dis_mode, REX_W(u->pfx_rex), u->pfx_opr) / 32;
1170       break;
1171     case UD_TAB__OPC_ASIZE:
1172       idx = eff_adr_mode(u->dis_mode, u->pfx_adr) / 32;
1173       break;
1174     case UD_TAB__OPC_X87:
1175       idx = modrm(u) - 0xC0;
1176       break;
1177     case UD_TAB__OPC_VENDOR:
1178       if (u->vendor == UD_VENDOR_ANY) {
1179         /* choose a valid entry */
1180         idx = (u->le->table[idx] != 0) ? 0 : 1;
1181       } else if (u->vendor == UD_VENDOR_AMD) {
1182         idx = 0;
1183       } else {
1184         idx = 1;
1185       }
1186       break;
1187     case UD_TAB__OPC_RM:
1188       idx = MODRM_RM(modrm(u));
1189       break;
1190     case UD_TAB__OPC_REG:
1191       idx = MODRM_REG(modrm(u));
1192       break;
1193     case UD_TAB__OPC_SSE:
1194       return decode_ssepfx(u);
1195     case UD_TAB__OPC_VEX:
1196       return decode_vex(u);
1197     case UD_TAB__OPC_VEX_W:
1198       idx = vex_w(u);
1199       break;
1200     case UD_TAB__OPC_VEX_L:
1201       idx = vex_l(u);
1202       break;
1203     case UD_TAB__OPC_TABLE:
1204       inp_next(u);
1205       return decode_opcode(u);
1206     default:
1207       UD_ASSERT(!"not reached");
1208       break;
1209   }
1210 
1211   return decode_ext(u, u->le->table[idx]);
1212 }
1213 
1214 
1215 static int
decode_opcode(struct ud * u)1216 decode_opcode(struct ud *u)
1217 {
1218   uint16_t ptr;
1219   UD_ASSERT(u->le->type == UD_TAB__OPC_TABLE);
1220   UD_RETURN_ON_ERROR(u);
1221   ptr = u->le->table[inp_curr(u)];
1222   return decode_ext(u, ptr);
1223 }
1224 
1225 
1226 /* =============================================================================
1227  * ud_decode() - Instruction decoder. Returns the number of bytes decoded.
1228  * =============================================================================
1229  */
1230 unsigned int
ud_decode(struct ud * u)1231 ud_decode(struct ud *u)
1232 {
1233   inp_start(u);
1234   clear_insn(u);
1235   u->le = &ud_lookup_table_list[0];
1236   u->error = decode_prefixes(u) == -1 ||
1237              decode_opcode(u)   == -1 ||
1238              u->error;
1239   /* Handle decode error. */
1240   if (u->error) {
1241     /* clear out the decode data. */
1242     clear_insn(u);
1243     /* mark the sequence of bytes as invalid. */
1244     u->itab_entry = &ud_itab[0]; /* entry 0 is invalid */
1245     u->mnemonic = u->itab_entry->mnemonic;
1246   }
1247 
1248     /* maybe this stray segment override byte
1249      * should be spewed out?
1250      */
1251     if ( !P_SEG( u->itab_entry->prefix ) &&
1252             u->operand[0].type != UD_OP_MEM &&
1253             u->operand[1].type != UD_OP_MEM )
1254         u->pfx_seg = 0;
1255 
1256   u->insn_offset = u->pc; /* set offset of instruction */
1257   u->asm_buf_fill = 0;   /* set translation buffer index to 0 */
1258   u->pc += u->inp_ctr;    /* move program counter by bytes decoded */
1259 
1260   /* return number of bytes disassembled. */
1261   return u->inp_ctr;
1262 }
1263 
1264 /*
1265 vim: set ts=2 sw=2 expandtab
1266 */
1267