1 /* udis86 - libudis86/decode.c
2 *
3 * Copyright (c) 2002-2009 Vivek Thampi
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification,
7 * are permitted provided that the following conditions are met:
8 *
9 * * Redistributions of source code must retain the above copyright notice,
10 * this list of conditions and the following disclaimer.
11 * * Redistributions in binary form must reproduce the above copyright notice,
12 * this list of conditions and the following disclaimer in the documentation
13 * and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
18 * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
19 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
20 * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
21 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
22 * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26 #include "udint.h"
27 #include "types.h"
28 #include "extern.h"
29 #include "decode.h"
30
31 #ifndef __UD_STANDALONE__
32 # include <string.h>
33 #endif /* __UD_STANDALONE__ */
34
35 /* The max number of prefixes to an instruction */
36 #define MAX_PREFIXES 15
37
38 /* rex prefix bits */
39 #define REX_W(r) ( ( 0xF & ( r ) ) >> 3 )
40 #define REX_R(r) ( ( 0x7 & ( r ) ) >> 2 )
41 #define REX_X(r) ( ( 0x3 & ( r ) ) >> 1 )
42 #define REX_B(r) ( ( 0x1 & ( r ) ) >> 0 )
43 #define REX_PFX_MASK(n) ( ( P_REXW(n) << 3 ) | \
44 ( P_REXR(n) << 2 ) | \
45 ( P_REXX(n) << 1 ) | \
46 ( P_REXB(n) << 0 ) )
47
48 /* scable-index-base bits */
49 #define SIB_S(b) ( ( b ) >> 6 )
50 #define SIB_I(b) ( ( ( b ) >> 3 ) & 7 )
51 #define SIB_B(b) ( ( b ) & 7 )
52
53 /* modrm bits */
54 #define MODRM_REG(b) ( ( ( b ) >> 3 ) & 7 )
55 #define MODRM_NNN(b) ( ( ( b ) >> 3 ) & 7 )
56 #define MODRM_MOD(b) ( ( ( b ) >> 6 ) & 3 )
57 #define MODRM_RM(b) ( ( b ) & 7 )
58
59 static int decode_ext(struct ud *u, uint16_t ptr);
60 static int decode_opcode(struct ud *u);
61
62 enum reg_class { /* register classes */
63 REGCLASS_GPR,
64 REGCLASS_MMX,
65 REGCLASS_CR,
66 REGCLASS_DB,
67 REGCLASS_SEG,
68 REGCLASS_XMM
69 };
70
71 /*
72 * inp_start
73 * Should be called before each de-code operation.
74 */
75 static void
inp_start(struct ud * u)76 inp_start(struct ud *u)
77 {
78 u->inp_ctr = 0;
79 }
80
81 static uint8_t
inp_peek(struct ud * u)82 inp_peek(struct ud *u)
83 {
84 if (u->inp_end == 0) {
85 if (u->inp_buf != NULL) {
86 if (u->inp_buf_index < u->inp_buf_size) {
87 return u->inp_buf[u->inp_buf_index];
88 }
89 } else if (u->inp_peek != UD_EOI) {
90 return u->inp_peek;
91 } else {
92 int c;
93 if ((c = u->inp_hook(u)) != UD_EOI) {
94 u->inp_peek = c;
95 return u->inp_peek;
96 }
97 }
98 }
99 u->inp_end = 1;
100 UDERR(u, "byte expected, eoi received\n");
101 return 0;
102 }
103
104 static uint8_t
inp_next(struct ud * u)105 inp_next(struct ud *u)
106 {
107 if (u->inp_end == 0) {
108 if (u->inp_buf != NULL) {
109 if (u->inp_buf_index < u->inp_buf_size) {
110 u->inp_ctr++;
111 return (u->inp_curr = u->inp_buf[u->inp_buf_index++]);
112 }
113 } else {
114 int c = u->inp_peek;
115 if (c != UD_EOI || (c = u->inp_hook(u)) != UD_EOI) {
116 u->inp_peek = UD_EOI;
117 u->inp_curr = c;
118 u->inp_sess[u->inp_ctr++] = u->inp_curr;
119 return u->inp_curr;
120 }
121 }
122 }
123 u->inp_end = 1;
124 UDERR(u, "byte expected, eoi received\n");
125 return 0;
126 }
127
128 static uint8_t
inp_curr(struct ud * u)129 inp_curr(struct ud *u)
130 {
131 return u->inp_curr;
132 }
133
134
135 /*
136 * inp_uint8
137 * int_uint16
138 * int_uint32
139 * int_uint64
140 * Load little-endian values from input
141 */
142 static uint8_t
inp_uint8(struct ud * u)143 inp_uint8(struct ud* u)
144 {
145 return inp_next(u);
146 }
147
148 static uint16_t
inp_uint16(struct ud * u)149 inp_uint16(struct ud* u)
150 {
151 uint16_t r, ret;
152
153 ret = inp_next(u);
154 r = inp_next(u);
155 return ret | (r << 8);
156 }
157
158 static uint32_t
inp_uint32(struct ud * u)159 inp_uint32(struct ud* u)
160 {
161 uint32_t r, ret;
162
163 ret = inp_next(u);
164 r = inp_next(u);
165 ret = ret | (r << 8);
166 r = inp_next(u);
167 ret = ret | (r << 16);
168 r = inp_next(u);
169 return ret | (r << 24);
170 }
171
172 static uint64_t
inp_uint64(struct ud * u)173 inp_uint64(struct ud* u)
174 {
175 uint64_t r, ret;
176
177 ret = inp_next(u);
178 r = inp_next(u);
179 ret = ret | (r << 8);
180 r = inp_next(u);
181 ret = ret | (r << 16);
182 r = inp_next(u);
183 ret = ret | (r << 24);
184 r = inp_next(u);
185 ret = ret | (r << 32);
186 r = inp_next(u);
187 ret = ret | (r << 40);
188 r = inp_next(u);
189 ret = ret | (r << 48);
190 r = inp_next(u);
191 return ret | (r << 56);
192 }
193
194
195 static UD_INLINE int
eff_opr_mode(int dis_mode,int rex_w,int pfx_opr)196 eff_opr_mode(int dis_mode, int rex_w, int pfx_opr)
197 {
198 if (dis_mode == 64) {
199 return rex_w ? 64 : (pfx_opr ? 16 : 32);
200 } else if (dis_mode == 32) {
201 return pfx_opr ? 16 : 32;
202 } else {
203 UD_ASSERT(dis_mode == 16);
204 return pfx_opr ? 32 : 16;
205 }
206 }
207
208
209 static UD_INLINE int
eff_adr_mode(int dis_mode,int pfx_adr)210 eff_adr_mode(int dis_mode, int pfx_adr)
211 {
212 if (dis_mode == 64) {
213 return pfx_adr ? 32 : 64;
214 } else if (dis_mode == 32) {
215 return pfx_adr ? 16 : 32;
216 } else {
217 UD_ASSERT(dis_mode == 16);
218 return pfx_adr ? 32 : 16;
219 }
220 }
221
222
223 /*
224 * decode_prefixes
225 *
226 * Extracts instruction prefixes.
227 */
228 static int
decode_prefixes(struct ud * u)229 decode_prefixes(struct ud *u)
230 {
231 int done = 0;
232 uint8_t curr = 0, last = 0;
233 UD_RETURN_ON_ERROR(u);
234
235 do {
236 last = curr;
237 curr = inp_next(u);
238 UD_RETURN_ON_ERROR(u);
239 if (u->inp_ctr == MAX_INSN_LENGTH) {
240 UD_RETURN_WITH_ERROR(u, "max instruction length");
241 }
242
243 switch (curr)
244 {
245 case 0x2E:
246 u->pfx_seg = UD_R_CS;
247 break;
248 case 0x36:
249 u->pfx_seg = UD_R_SS;
250 break;
251 case 0x3E:
252 u->pfx_seg = UD_R_DS;
253 break;
254 case 0x26:
255 u->pfx_seg = UD_R_ES;
256 break;
257 case 0x64:
258 u->pfx_seg = UD_R_FS;
259 break;
260 case 0x65:
261 u->pfx_seg = UD_R_GS;
262 break;
263 case 0x67: /* adress-size override prefix */
264 u->pfx_adr = 0x67;
265 break;
266 case 0xF0:
267 u->pfx_lock = 0xF0;
268 break;
269 case 0x66:
270 u->pfx_opr = 0x66;
271 break;
272 case 0xF2:
273 u->pfx_str = 0xf2;
274 break;
275 case 0xF3:
276 u->pfx_str = 0xf3;
277 break;
278 default:
279 /* consume if rex */
280 done = (u->dis_mode == 64 && (curr & 0xF0) == 0x40) ? 0 : 1;
281 break;
282 }
283 } while (!done);
284 /* rex prefixes in 64bit mode, must be the last prefix */
285 if (u->dis_mode == 64 && (last & 0xF0) == 0x40) {
286 u->pfx_rex = last;
287 }
288 return 0;
289 }
290
291
292 /*
293 * vex_l, vex_w
294 * Return the vex.L and vex.W bits
295 */
296 static UD_INLINE uint8_t
vex_l(const struct ud * u)297 vex_l(const struct ud *u)
298 {
299 UD_ASSERT(u->vex_op != 0);
300 return ((u->vex_op == 0xc4 ? u->vex_b2 : u->vex_b1) >> 2) & 1;
301 }
302
303 static UD_INLINE uint8_t
vex_w(const struct ud * u)304 vex_w(const struct ud *u)
305 {
306 UD_ASSERT(u->vex_op != 0);
307 return u->vex_op == 0xc4 ? ((u->vex_b2 >> 7) & 1) : 0;
308 }
309
310
311 static UD_INLINE uint8_t
modrm(struct ud * u)312 modrm(struct ud * u)
313 {
314 if ( !u->have_modrm ) {
315 u->modrm = inp_next( u );
316 u->modrm_offset = (uint8_t) (u->inp_ctr - 1);
317 u->have_modrm = 1;
318 }
319 return u->modrm;
320 }
321
322
323 static unsigned int
resolve_operand_size(const struct ud * u,ud_operand_size_t osize)324 resolve_operand_size(const struct ud* u, ud_operand_size_t osize)
325 {
326 switch (osize) {
327 case SZ_V:
328 return u->opr_mode;
329 case SZ_Z:
330 return u->opr_mode == 16 ? 16 : 32;
331 case SZ_Y:
332 return u->opr_mode == 16 ? 32 : u->opr_mode;
333 case SZ_RDQ:
334 return u->dis_mode == 64 ? 64 : 32;
335 case SZ_X:
336 UD_ASSERT(u->vex_op != 0);
337 return (P_VEXL(u->itab_entry->prefix) && vex_l(u)) ? SZ_QQ : SZ_DQ;
338 default:
339 return osize;
340 }
341 }
342
343
resolve_mnemonic(struct ud * u)344 static int resolve_mnemonic( struct ud* u )
345 {
346 /* resolve 3dnow weirdness. */
347 if ( u->mnemonic == UD_I3dnow ) {
348 u->mnemonic = ud_itab[ u->le->table[ inp_curr( u ) ] ].mnemonic;
349 }
350 /* SWAPGS is only valid in 64bits mode */
351 if ( u->mnemonic == UD_Iswapgs && u->dis_mode != 64 ) {
352 UDERR(u, "swapgs invalid in 64bits mode\n");
353 return -1;
354 }
355
356 if (u->mnemonic == UD_Ixchg) {
357 if ((u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_AX &&
358 u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_AX) ||
359 (u->operand[0].type == UD_OP_REG && u->operand[0].base == UD_R_EAX &&
360 u->operand[1].type == UD_OP_REG && u->operand[1].base == UD_R_EAX)) {
361 u->operand[0].type = UD_NONE;
362 u->operand[1].type = UD_NONE;
363 u->mnemonic = UD_Inop;
364 }
365 }
366
367 if (u->mnemonic == UD_Inop && u->pfx_repe) {
368 u->pfx_repe = 0;
369 u->mnemonic = UD_Ipause;
370 }
371 return 0;
372 }
373
374
375 /* -----------------------------------------------------------------------------
376 * decode_a()- Decodes operands of the type seg:offset
377 * -----------------------------------------------------------------------------
378 */
379 static void
decode_a(struct ud * u,struct ud_operand * op)380 decode_a(struct ud* u, struct ud_operand *op)
381 {
382 if (u->opr_mode == 16) {
383 /* seg16:off16 */
384 op->type = UD_OP_PTR;
385 op->size = 32;
386 op->lval.ptr.off = inp_uint16(u);
387 op->lval.ptr.seg = inp_uint16(u);
388 } else {
389 /* seg16:off32 */
390 op->type = UD_OP_PTR;
391 op->size = 48;
392 op->lval.ptr.off = inp_uint32(u);
393 op->lval.ptr.seg = inp_uint16(u);
394 }
395 }
396
397 /* -----------------------------------------------------------------------------
398 * decode_gpr() - Returns decoded General Purpose Register
399 * -----------------------------------------------------------------------------
400 */
401 static enum ud_type
decode_gpr(register struct ud * u,unsigned int s,unsigned char rm)402 decode_gpr(register struct ud* u, unsigned int s, unsigned char rm)
403 {
404 switch (s) {
405 case 64:
406 return UD_R_RAX + rm;
407 case 32:
408 return UD_R_EAX + rm;
409 case 16:
410 return UD_R_AX + rm;
411 case 8:
412 if (u->dis_mode == 64 && u->pfx_rex) {
413 if (rm >= 4)
414 return UD_R_SPL + (rm-4);
415 return UD_R_AL + rm;
416 } else return UD_R_AL + rm;
417 case 0:
418 /* invalid size in case of a decode error */
419 UD_ASSERT(u->error);
420 return UD_NONE;
421 default:
422 UD_ASSERT(!"invalid operand size");
423 return UD_NONE;
424 }
425 }
426
427 static void
decode_reg(struct ud * u,struct ud_operand * opr,int type,int num,int size)428 decode_reg(struct ud *u,
429 struct ud_operand *opr,
430 int type,
431 int num,
432 int size)
433 {
434 int reg;
435 size = resolve_operand_size(u, size);
436 switch (type) {
437 case REGCLASS_GPR : reg = decode_gpr(u, size, num); break;
438 case REGCLASS_MMX : reg = UD_R_MM0 + (num & 7); break;
439 case REGCLASS_XMM :
440 reg = num + (size == SZ_QQ ? UD_R_YMM0 : UD_R_XMM0);
441 break;
442 case REGCLASS_CR : reg = UD_R_CR0 + num; break;
443 case REGCLASS_DB : reg = UD_R_DR0 + num; break;
444 case REGCLASS_SEG : {
445 /*
446 * Only 6 segment registers, anything else is an error.
447 */
448 if ((num & 7) > 5) {
449 UDERR(u, "invalid segment register value\n");
450 return;
451 } else {
452 reg = UD_R_ES + (num & 7);
453 }
454 break;
455 }
456 default:
457 UD_ASSERT(!"invalid register type");
458 return;
459 }
460 opr->type = UD_OP_REG;
461 opr->base = reg;
462 opr->size = size;
463 }
464
465
466 /*
467 * decode_imm
468 *
469 * Decode Immediate values.
470 */
471 static void
decode_imm(struct ud * u,unsigned int size,struct ud_operand * op)472 decode_imm(struct ud* u, unsigned int size, struct ud_operand *op)
473 {
474 op->size = resolve_operand_size(u, size);
475 op->type = UD_OP_IMM;
476
477 switch (op->size) {
478 case 8: op->lval.sbyte = inp_uint8(u); break;
479 case 16: op->lval.uword = inp_uint16(u); break;
480 case 32: op->lval.udword = inp_uint32(u); break;
481 case 64: op->lval.uqword = inp_uint64(u); break;
482 default: return;
483 }
484 }
485
486
487 /*
488 * decode_mem_disp
489 *
490 * Decode mem address displacement.
491 */
492 static void
decode_mem_disp(struct ud * u,unsigned int size,struct ud_operand * op)493 decode_mem_disp(struct ud* u, unsigned int size, struct ud_operand *op)
494 {
495 switch (size) {
496 case 8:
497 op->offset = 8;
498 op->lval.ubyte = inp_uint8(u);
499 break;
500 case 16:
501 op->offset = 16;
502 op->lval.uword = inp_uint16(u);
503 break;
504 case 32:
505 op->offset = 32;
506 op->lval.udword = inp_uint32(u);
507 break;
508 case 64:
509 op->offset = 64;
510 op->lval.uqword = inp_uint64(u);
511 break;
512 default:
513 return;
514 }
515 }
516
517
518 /*
519 * decode_modrm_reg
520 *
521 * Decodes reg field of mod/rm byte
522 *
523 */
524 static UD_INLINE void
decode_modrm_reg(struct ud * u,struct ud_operand * operand,unsigned int type,unsigned int size)525 decode_modrm_reg(struct ud *u,
526 struct ud_operand *operand,
527 unsigned int type,
528 unsigned int size)
529 {
530 uint8_t reg = (REX_R(u->_rex) << 3) | MODRM_REG(modrm(u));
531 decode_reg(u, operand, type, reg, size);
532 }
533
534
535 /*
536 * decode_modrm_rm
537 *
538 * Decodes rm field of mod/rm byte
539 *
540 */
541 static void
decode_modrm_rm(struct ud * u,struct ud_operand * op,unsigned char type,unsigned int size)542 decode_modrm_rm(struct ud *u,
543 struct ud_operand *op,
544 unsigned char type, /* register type */
545 unsigned int size) /* operand size */
546
547 {
548 size_t offset = 0;
549 unsigned char mod, rm;
550
551 /* get mod, r/m and reg fields */
552 mod = MODRM_MOD(modrm(u));
553 rm = (REX_B(u->_rex) << 3) | MODRM_RM(modrm(u));
554
555 /*
556 * If mod is 11b, then the modrm.rm specifies a register.
557 *
558 */
559 if (mod == 3) {
560 decode_reg(u, op, type, rm, size);
561 return;
562 }
563
564 /*
565 * !11b => Memory Address
566 */
567 op->type = UD_OP_MEM;
568 op->size = resolve_operand_size(u, size);
569
570 if (u->adr_mode == 64) {
571 op->base = UD_R_RAX + rm;
572 if (mod == 1) {
573 offset = 8;
574 } else if (mod == 2) {
575 offset = 32;
576 } else if (mod == 0 && (rm & 7) == 5) {
577 op->base = UD_R_RIP;
578 offset = 32;
579 } else {
580 offset = 0;
581 }
582 /*
583 * Scale-Index-Base (SIB)
584 */
585 if ((rm & 7) == 4) {
586 inp_next(u);
587
588 op->base = UD_R_RAX + (SIB_B(inp_curr(u)) | (REX_B(u->_rex) << 3));
589 op->index = UD_R_RAX + (SIB_I(inp_curr(u)) | (REX_X(u->_rex) << 3));
590 /* special conditions for base reference */
591 if (op->index == UD_R_RSP) {
592 op->index = UD_NONE;
593 op->scale = UD_NONE;
594 } else {
595 op->scale = (1 << SIB_S(inp_curr(u))) & ~1;
596 }
597
598 if (op->base == UD_R_RBP || op->base == UD_R_R13) {
599 if (mod == 0) {
600 op->base = UD_NONE;
601 }
602 if (mod == 1) {
603 offset = 8;
604 } else {
605 offset = 32;
606 }
607 }
608 } else {
609 op->scale = UD_NONE;
610 op->index = UD_NONE;
611 }
612 } else if (u->adr_mode == 32) {
613 op->base = UD_R_EAX + rm;
614 if (mod == 1) {
615 offset = 8;
616 } else if (mod == 2) {
617 offset = 32;
618 } else if (mod == 0 && rm == 5) {
619 op->base = UD_NONE;
620 offset = 32;
621 } else {
622 offset = 0;
623 }
624
625 /* Scale-Index-Base (SIB) */
626 if ((rm & 7) == 4) {
627 inp_next(u);
628
629 op->scale = (1 << SIB_S(inp_curr(u))) & ~1;
630 op->index = UD_R_EAX + (SIB_I(inp_curr(u)) | (REX_X(u->pfx_rex) << 3));
631 op->base = UD_R_EAX + (SIB_B(inp_curr(u)) | (REX_B(u->pfx_rex) << 3));
632
633 if (op->index == UD_R_ESP) {
634 op->index = UD_NONE;
635 op->scale = UD_NONE;
636 }
637
638 /* special condition for base reference */
639 if (op->base == UD_R_EBP) {
640 if (mod == 0) {
641 op->base = UD_NONE;
642 }
643 if (mod == 1) {
644 offset = 8;
645 } else {
646 offset = 32;
647 }
648 }
649 } else {
650 op->scale = UD_NONE;
651 op->index = UD_NONE;
652 }
653 } else {
654 const unsigned int bases[] = { UD_R_BX, UD_R_BX, UD_R_BP, UD_R_BP,
655 UD_R_SI, UD_R_DI, UD_R_BP, UD_R_BX };
656 const unsigned int indices[] = { UD_R_SI, UD_R_DI, UD_R_SI, UD_R_DI,
657 UD_NONE, UD_NONE, UD_NONE, UD_NONE };
658 op->base = bases[rm & 7];
659 op->index = indices[rm & 7];
660 op->scale = UD_NONE;
661 if (mod == 0 && rm == 6) {
662 offset = 16;
663 op->base = UD_NONE;
664 } else if (mod == 1) {
665 offset = 8;
666 } else if (mod == 2) {
667 offset = 16;
668 }
669 }
670
671 if (offset) {
672 decode_mem_disp(u, offset, op);
673 } else {
674 op->offset = 0;
675 }
676 }
677
678
679 /*
680 * decode_moffset
681 * Decode offset-only memory operand
682 */
683 static void
decode_moffset(struct ud * u,unsigned int size,struct ud_operand * opr)684 decode_moffset(struct ud *u, unsigned int size, struct ud_operand *opr)
685 {
686 opr->type = UD_OP_MEM;
687 opr->base = UD_NONE;
688 opr->index = UD_NONE;
689 opr->scale = UD_NONE;
690 opr->size = resolve_operand_size(u, size);
691 decode_mem_disp(u, u->adr_mode, opr);
692 }
693
694
695 static void
decode_vex_vvvv(struct ud * u,struct ud_operand * opr,unsigned size)696 decode_vex_vvvv(struct ud *u, struct ud_operand *opr, unsigned size)
697 {
698 uint8_t vvvv;
699 UD_ASSERT(u->vex_op != 0);
700 vvvv = ((u->vex_op == 0xc4 ? u->vex_b2 : u->vex_b1) >> 3) & 0xf;
701 decode_reg(u, opr, REGCLASS_XMM, (0xf & ~vvvv), size);
702 }
703
704
705 /*
706 * decode_vex_immreg
707 * Decode source operand encoded in immediate byte [7:4]
708 */
709 static int
decode_vex_immreg(struct ud * u,struct ud_operand * opr,unsigned size)710 decode_vex_immreg(struct ud *u, struct ud_operand *opr, unsigned size)
711 {
712 uint8_t imm = inp_next(u);
713 uint8_t mask = u->dis_mode == 64 ? 0xf : 0x7;
714 UD_RETURN_ON_ERROR(u);
715 UD_ASSERT(u->vex_op != 0);
716 decode_reg(u, opr, REGCLASS_XMM, mask & (imm >> 4), size);
717 return 0;
718 }
719
720
721 /*
722 * decode_operand
723 *
724 * Decodes a single operand.
725 * Returns the type of the operand (UD_NONE if none)
726 */
727 static int
decode_operand(struct ud * u,struct ud_operand * operand,enum ud_operand_code type,unsigned int size)728 decode_operand(struct ud *u,
729 struct ud_operand *operand,
730 enum ud_operand_code type,
731 unsigned int size)
732 {
733 operand->type = UD_NONE;
734 operand->_oprcode = type;
735
736 switch (type) {
737 case OP_A :
738 decode_a(u, operand);
739 break;
740 case OP_MR:
741 decode_modrm_rm(u, operand, REGCLASS_GPR,
742 MODRM_MOD(modrm(u)) == 3 ?
743 Mx_reg_size(size) : Mx_mem_size(size));
744 break;
745 case OP_F:
746 u->br_far = 1;
747 /* intended fall through */
748 case OP_M:
749 if (MODRM_MOD(modrm(u)) == 3) {
750 UDERR(u, "expected modrm.mod != 3\n");
751 }
752 /* intended fall through */
753 case OP_E:
754 decode_modrm_rm(u, operand, REGCLASS_GPR, size);
755 break;
756 case OP_G:
757 decode_modrm_reg(u, operand, REGCLASS_GPR, size);
758 break;
759 case OP_sI:
760 case OP_I:
761 decode_imm(u, size, operand);
762 break;
763 case OP_I1:
764 operand->type = UD_OP_CONST;
765 operand->lval.udword = 1;
766 break;
767 case OP_N:
768 if (MODRM_MOD(modrm(u)) != 3) {
769 UDERR(u, "expected modrm.mod == 3\n");
770 }
771 /* intended fall through */
772 case OP_Q:
773 decode_modrm_rm(u, operand, REGCLASS_MMX, size);
774 break;
775 case OP_P:
776 decode_modrm_reg(u, operand, REGCLASS_MMX, size);
777 break;
778 case OP_U:
779 if (MODRM_MOD(modrm(u)) != 3) {
780 UDERR(u, "expected modrm.mod == 3\n");
781 }
782 /* intended fall through */
783 case OP_W:
784 decode_modrm_rm(u, operand, REGCLASS_XMM, size);
785 break;
786 case OP_V:
787 decode_modrm_reg(u, operand, REGCLASS_XMM, size);
788 break;
789 case OP_H:
790 decode_vex_vvvv(u, operand, size);
791 break;
792 case OP_MU:
793 decode_modrm_rm(u, operand, REGCLASS_XMM,
794 MODRM_MOD(modrm(u)) == 3 ?
795 Mx_reg_size(size) : Mx_mem_size(size));
796 break;
797 case OP_S:
798 decode_modrm_reg(u, operand, REGCLASS_SEG, size);
799 break;
800 case OP_O:
801 decode_moffset(u, size, operand);
802 break;
803 case OP_R0:
804 case OP_R1:
805 case OP_R2:
806 case OP_R3:
807 case OP_R4:
808 case OP_R5:
809 case OP_R6:
810 case OP_R7:
811 decode_reg(u, operand, REGCLASS_GPR,
812 (REX_B(u->_rex) << 3) | (type - OP_R0), size);
813 break;
814 case OP_AL:
815 case OP_AX:
816 case OP_eAX:
817 case OP_rAX:
818 decode_reg(u, operand, REGCLASS_GPR, 0, size);
819 break;
820 case OP_CL:
821 case OP_CX:
822 case OP_eCX:
823 decode_reg(u, operand, REGCLASS_GPR, 1, size);
824 break;
825 case OP_DL:
826 case OP_DX:
827 case OP_eDX:
828 decode_reg(u, operand, REGCLASS_GPR, 2, size);
829 break;
830 case OP_ES:
831 case OP_CS:
832 case OP_DS:
833 case OP_SS:
834 case OP_FS:
835 case OP_GS:
836 /* in 64bits mode, only fs and gs are allowed */
837 if (u->dis_mode == 64) {
838 if (type != OP_FS && type != OP_GS) {
839 UDERR(u, "invalid segment register in 64bits\n");
840 }
841 }
842 operand->type = UD_OP_REG;
843 operand->base = (type - OP_ES) + UD_R_ES;
844 operand->size = 16;
845 break;
846 case OP_J :
847 decode_imm(u, size, operand);
848 operand->type = UD_OP_JIMM;
849 break ;
850 case OP_R :
851 if (MODRM_MOD(modrm(u)) != 3) {
852 UDERR(u, "expected modrm.mod == 3\n");
853 }
854 decode_modrm_rm(u, operand, REGCLASS_GPR, size);
855 break;
856 case OP_C:
857 decode_modrm_reg(u, operand, REGCLASS_CR, size);
858 break;
859 case OP_D:
860 decode_modrm_reg(u, operand, REGCLASS_DB, size);
861 break;
862 case OP_I3 :
863 operand->type = UD_OP_CONST;
864 operand->lval.sbyte = 3;
865 break;
866 case OP_ST0:
867 case OP_ST1:
868 case OP_ST2:
869 case OP_ST3:
870 case OP_ST4:
871 case OP_ST5:
872 case OP_ST6:
873 case OP_ST7:
874 operand->type = UD_OP_REG;
875 operand->base = (type - OP_ST0) + UD_R_ST0;
876 operand->size = 80;
877 break;
878 case OP_L:
879 decode_vex_immreg(u, operand, size);
880 break;
881 default :
882 operand->type = UD_NONE;
883 break;
884 }
885 return operand->type;
886 }
887
888
889 /*
890 * decode_operands
891 *
892 * Disassemble up to 3 operands of the current instruction being
893 * disassembled. By the end of the function, the operand fields
894 * of the ud structure will have been filled.
895 */
896 static int
decode_operands(struct ud * u)897 decode_operands(struct ud* u)
898 {
899 decode_operand(u, &u->operand[0],
900 u->itab_entry->operand1.type,
901 u->itab_entry->operand1.size);
902 if (u->operand[0].type != UD_NONE) {
903 decode_operand(u, &u->operand[1],
904 u->itab_entry->operand2.type,
905 u->itab_entry->operand2.size);
906 }
907 if (u->operand[1].type != UD_NONE) {
908 decode_operand(u, &u->operand[2],
909 u->itab_entry->operand3.type,
910 u->itab_entry->operand3.size);
911 }
912 if (u->operand[2].type != UD_NONE) {
913 decode_operand(u, &u->operand[3],
914 u->itab_entry->operand4.type,
915 u->itab_entry->operand4.size);
916 }
917 return 0;
918 }
919
920 /* -----------------------------------------------------------------------------
921 * clear_insn() - clear instruction structure
922 * -----------------------------------------------------------------------------
923 */
924 static void
clear_insn(register struct ud * u)925 clear_insn(register struct ud* u)
926 {
927 u->error = 0;
928 u->pfx_seg = 0;
929 u->pfx_opr = 0;
930 u->pfx_adr = 0;
931 u->pfx_lock = 0;
932 u->pfx_repne = 0;
933 u->pfx_rep = 0;
934 u->pfx_repe = 0;
935 u->pfx_rex = 0;
936 u->pfx_str = 0;
937 u->mnemonic = UD_Inone;
938 u->itab_entry = NULL;
939 u->have_modrm = 0;
940 u->br_far = 0;
941 u->vex_op = 0;
942 u->_rex = 0;
943 u->operand[0].type = UD_NONE;
944 u->operand[1].type = UD_NONE;
945 u->operand[2].type = UD_NONE;
946 u->operand[3].type = UD_NONE;
947 }
948
949
950 static UD_INLINE int
resolve_pfx_str(struct ud * u)951 resolve_pfx_str(struct ud* u)
952 {
953 if (u->pfx_str == 0xf3) {
954 if (P_STR(u->itab_entry->prefix)) {
955 u->pfx_rep = 0xf3;
956 } else {
957 u->pfx_repe = 0xf3;
958 }
959 } else if (u->pfx_str == 0xf2) {
960 u->pfx_repne = 0xf3;
961 }
962 return 0;
963 }
964
965
966 static int
resolve_mode(struct ud * u)967 resolve_mode( struct ud* u )
968 {
969 int default64;
970 /* if in error state, bail out */
971 if ( u->error ) return -1;
972
973 /* propagate prefix effects */
974 if ( u->dis_mode == 64 ) { /* set 64bit-mode flags */
975
976 /* Check validity of instruction m64 */
977 if ( P_INV64( u->itab_entry->prefix ) ) {
978 UDERR(u, "instruction invalid in 64bits\n");
979 return -1;
980 }
981
982 /* compute effective rex based on,
983 * - vex prefix (if any)
984 * - rex prefix (if any, and not vex)
985 * - allowed prefixes specified by the opcode map
986 */
987 if (u->vex_op == 0xc4) {
988 /* vex has rex.rxb in 1's complement */
989 u->_rex = ((~(u->vex_b1 >> 5) & 0x7) /* rex.0rxb */ |
990 ((u->vex_b2 >> 4) & 0x8) /* rex.w000 */);
991 } else if (u->vex_op == 0xc5) {
992 /* vex has rex.r in 1's complement */
993 u->_rex = (~(u->vex_b1 >> 5)) & 4;
994 } else {
995 UD_ASSERT(u->vex_op == 0);
996 u->_rex = u->pfx_rex;
997 }
998 u->_rex &= REX_PFX_MASK(u->itab_entry->prefix);
999
1000 /* whether this instruction has a default operand size of
1001 * 64bit, also hardcoded into the opcode map.
1002 */
1003 default64 = P_DEF64( u->itab_entry->prefix );
1004 /* calculate effective operand size */
1005 if (REX_W(u->_rex)) {
1006 u->opr_mode = 64;
1007 } else if ( u->pfx_opr ) {
1008 u->opr_mode = 16;
1009 } else {
1010 /* unless the default opr size of instruction is 64,
1011 * the effective operand size in the absence of rex.w
1012 * prefix is 32.
1013 */
1014 u->opr_mode = default64 ? 64 : 32;
1015 }
1016
1017 /* calculate effective address size */
1018 u->adr_mode = (u->pfx_adr) ? 32 : 64;
1019 } else if ( u->dis_mode == 32 ) { /* set 32bit-mode flags */
1020 u->opr_mode = ( u->pfx_opr ) ? 16 : 32;
1021 u->adr_mode = ( u->pfx_adr ) ? 16 : 32;
1022 } else if ( u->dis_mode == 16 ) { /* set 16bit-mode flags */
1023 u->opr_mode = ( u->pfx_opr ) ? 32 : 16;
1024 u->adr_mode = ( u->pfx_adr ) ? 32 : 16;
1025 }
1026
1027 return 0;
1028 }
1029
1030
1031 static UD_INLINE int
decode_insn(struct ud * u,uint16_t ptr)1032 decode_insn(struct ud *u, uint16_t ptr)
1033 {
1034 UD_ASSERT((ptr & 0x8000) == 0);
1035 u->itab_entry = &ud_itab[ ptr ];
1036 u->mnemonic = u->itab_entry->mnemonic;
1037 return (resolve_pfx_str(u) == 0 &&
1038 resolve_mode(u) == 0 &&
1039 decode_operands(u) == 0 &&
1040 resolve_mnemonic(u) == 0) ? 0 : -1;
1041 }
1042
1043
1044 /*
1045 * decode_3dnow()
1046 *
1047 * Decoding 3dnow is a little tricky because of its strange opcode
1048 * structure. The final opcode disambiguation depends on the last
1049 * byte that comes after the operands have been decoded. Fortunately,
1050 * all 3dnow instructions have the same set of operand types. So we
1051 * go ahead and decode the instruction by picking an arbitrarily chosen
1052 * valid entry in the table, decode the operands, and read the final
1053 * byte to resolve the menmonic.
1054 */
1055 static UD_INLINE int
decode_3dnow(struct ud * u)1056 decode_3dnow(struct ud* u)
1057 {
1058 uint16_t ptr;
1059 UD_ASSERT(u->le->type == UD_TAB__OPC_3DNOW);
1060 UD_ASSERT(u->le->table[0xc] != 0);
1061 decode_insn(u, u->le->table[0xc]);
1062 inp_next(u);
1063 if (u->error) {
1064 return -1;
1065 }
1066 ptr = u->le->table[inp_curr(u)];
1067 UD_ASSERT((ptr & 0x8000) == 0);
1068 u->mnemonic = ud_itab[ptr].mnemonic;
1069 return 0;
1070 }
1071
1072
1073 static int
decode_ssepfx(struct ud * u)1074 decode_ssepfx(struct ud *u)
1075 {
1076 uint8_t idx;
1077 uint8_t pfx;
1078
1079 /*
1080 * String prefixes (f2, f3) take precedence over operand
1081 * size prefix (66).
1082 */
1083 pfx = u->pfx_str;
1084 if (pfx == 0) {
1085 pfx = u->pfx_opr;
1086 }
1087 idx = ((pfx & 0xf) + 1) / 2;
1088 if (u->le->table[idx] == 0) {
1089 idx = 0;
1090 }
1091 if (idx && u->le->table[idx] != 0) {
1092 /*
1093 * "Consume" the prefix as a part of the opcode, so it is no
1094 * longer exported as an instruction prefix.
1095 */
1096 u->pfx_str = 0;
1097 if (pfx == 0x66) {
1098 /*
1099 * consume "66" only if it was used for decoding, leaving
1100 * it to be used as an operands size override for some
1101 * simd instructions.
1102 */
1103 u->pfx_opr = 0;
1104 }
1105 }
1106 return decode_ext(u, u->le->table[idx]);
1107 }
1108
1109
1110 static int
decode_vex(struct ud * u)1111 decode_vex(struct ud *u)
1112 {
1113 uint8_t index;
1114 if (u->dis_mode != 64 && MODRM_MOD(inp_peek(u)) != 0x3) {
1115 index = 0;
1116 } else {
1117 u->vex_op = inp_curr(u);
1118 u->vex_b1 = inp_next(u);
1119 if (u->vex_op == 0xc4) {
1120 uint8_t pp, m;
1121 /* 3-byte vex */
1122 u->vex_b2 = inp_next(u);
1123 UD_RETURN_ON_ERROR(u);
1124 m = u->vex_b1 & 0x1f;
1125 if (m == 0 || m > 3) {
1126 UD_RETURN_WITH_ERROR(u, "reserved vex.m-mmmm value");
1127 }
1128 pp = u->vex_b2 & 0x3;
1129 index = (pp << 2) | m;
1130 } else {
1131 /* 2-byte vex */
1132 UD_ASSERT(u->vex_op == 0xc5);
1133 index = 0x1 | ((u->vex_b1 & 0x3) << 2);
1134 }
1135 }
1136 return decode_ext(u, u->le->table[index]);
1137 }
1138
1139
1140 /*
1141 * decode_ext()
1142 *
1143 * Decode opcode extensions (if any)
1144 */
1145 static int
decode_ext(struct ud * u,uint16_t ptr)1146 decode_ext(struct ud *u, uint16_t ptr)
1147 {
1148 uint8_t idx = 0;
1149 if ((ptr & 0x8000) == 0) {
1150 return decode_insn(u, ptr);
1151 }
1152 u->le = &ud_lookup_table_list[(~0x8000 & ptr)];
1153 if (u->le->type == UD_TAB__OPC_3DNOW) {
1154 return decode_3dnow(u);
1155 }
1156
1157 switch (u->le->type) {
1158 case UD_TAB__OPC_MOD:
1159 /* !11 = 0, 11 = 1 */
1160 idx = (MODRM_MOD(modrm(u)) + 1) / 4;
1161 break;
1162 /* disassembly mode/operand size/address size based tables.
1163 * 16 = 0,, 32 = 1, 64 = 2
1164 */
1165 case UD_TAB__OPC_MODE:
1166 idx = u->dis_mode != 64 ? 0 : 1;
1167 break;
1168 case UD_TAB__OPC_OSIZE:
1169 idx = eff_opr_mode(u->dis_mode, REX_W(u->pfx_rex), u->pfx_opr) / 32;
1170 break;
1171 case UD_TAB__OPC_ASIZE:
1172 idx = eff_adr_mode(u->dis_mode, u->pfx_adr) / 32;
1173 break;
1174 case UD_TAB__OPC_X87:
1175 idx = modrm(u) - 0xC0;
1176 break;
1177 case UD_TAB__OPC_VENDOR:
1178 if (u->vendor == UD_VENDOR_ANY) {
1179 /* choose a valid entry */
1180 idx = (u->le->table[idx] != 0) ? 0 : 1;
1181 } else if (u->vendor == UD_VENDOR_AMD) {
1182 idx = 0;
1183 } else {
1184 idx = 1;
1185 }
1186 break;
1187 case UD_TAB__OPC_RM:
1188 idx = MODRM_RM(modrm(u));
1189 break;
1190 case UD_TAB__OPC_REG:
1191 idx = MODRM_REG(modrm(u));
1192 break;
1193 case UD_TAB__OPC_SSE:
1194 return decode_ssepfx(u);
1195 case UD_TAB__OPC_VEX:
1196 return decode_vex(u);
1197 case UD_TAB__OPC_VEX_W:
1198 idx = vex_w(u);
1199 break;
1200 case UD_TAB__OPC_VEX_L:
1201 idx = vex_l(u);
1202 break;
1203 case UD_TAB__OPC_TABLE:
1204 inp_next(u);
1205 return decode_opcode(u);
1206 default:
1207 UD_ASSERT(!"not reached");
1208 break;
1209 }
1210
1211 return decode_ext(u, u->le->table[idx]);
1212 }
1213
1214
1215 static int
decode_opcode(struct ud * u)1216 decode_opcode(struct ud *u)
1217 {
1218 uint16_t ptr;
1219 UD_ASSERT(u->le->type == UD_TAB__OPC_TABLE);
1220 UD_RETURN_ON_ERROR(u);
1221 ptr = u->le->table[inp_curr(u)];
1222 return decode_ext(u, ptr);
1223 }
1224
1225
1226 /* =============================================================================
1227 * ud_decode() - Instruction decoder. Returns the number of bytes decoded.
1228 * =============================================================================
1229 */
1230 unsigned int
ud_decode(struct ud * u)1231 ud_decode(struct ud *u)
1232 {
1233 inp_start(u);
1234 clear_insn(u);
1235 u->le = &ud_lookup_table_list[0];
1236 u->error = decode_prefixes(u) == -1 ||
1237 decode_opcode(u) == -1 ||
1238 u->error;
1239 /* Handle decode error. */
1240 if (u->error) {
1241 /* clear out the decode data. */
1242 clear_insn(u);
1243 /* mark the sequence of bytes as invalid. */
1244 u->itab_entry = &ud_itab[0]; /* entry 0 is invalid */
1245 u->mnemonic = u->itab_entry->mnemonic;
1246 }
1247
1248 /* maybe this stray segment override byte
1249 * should be spewed out?
1250 */
1251 if ( !P_SEG( u->itab_entry->prefix ) &&
1252 u->operand[0].type != UD_OP_MEM &&
1253 u->operand[1].type != UD_OP_MEM )
1254 u->pfx_seg = 0;
1255
1256 u->insn_offset = u->pc; /* set offset of instruction */
1257 u->asm_buf_fill = 0; /* set translation buffer index to 0 */
1258 u->pc += u->inp_ctr; /* move program counter by bytes decoded */
1259
1260 /* return number of bytes disassembled. */
1261 return u->inp_ctr;
1262 }
1263
1264 /*
1265 vim: set ts=2 sw=2 expandtab
1266 */
1267