1 /*
2 * IR - Lightweight JIT Compilation Framework
3 * (Disassembler based on libcapstone)
4 * Copyright (C) 2022 Zend by Perforce.
5 * Authors: Dmitry Stogov <dmitry@php.net>
6 */
7
8 #ifndef _GNU_SOURCE
9 # define _GNU_SOURCE
10 #endif
11
12 #ifndef _WIN32
13 # include <dlfcn.h>
14 # include <unistd.h>
15 # include <fcntl.h>
16 # include <limits.h>
17 # if defined(__FreeBSD__) || defined(__DragonFly__)
18 # include <sys/sysctl.h>
19 # endif
20 #endif
21
22 #include "ir.h"
23 #include "ir_private.h"
24
25 #ifndef _WIN32
26 # include "ir_elf.h"
27 #endif
28
29 #include <capstone/capstone.h>
30 #define HAVE_CAPSTONE_ITER
31
32 typedef struct _ir_sym_node {
33 uint64_t addr;
34 uint64_t end;
35 struct _ir_sym_node *parent;
36 struct _ir_sym_node *child[2];
37 unsigned char info;
38 char name[1];
39 } ir_sym_node;
40
41 static ir_sym_node *_symbols = NULL;
42
ir_syms_rotateleft(ir_sym_node * p)43 static void ir_syms_rotateleft(ir_sym_node *p)
44 {
45 ir_sym_node *r = p->child[1];
46 p->child[1] = r->child[0];
47 if (r->child[0]) {
48 r->child[0]->parent = p;
49 }
50 r->parent = p->parent;
51 if (p->parent == NULL) {
52 _symbols = r;
53 } else if (p->parent->child[0] == p) {
54 p->parent->child[0] = r;
55 } else {
56 p->parent->child[1] = r;
57 }
58 r->child[0] = p;
59 p->parent = r;
60 }
61
ir_syms_rotateright(ir_sym_node * p)62 static void ir_syms_rotateright(ir_sym_node *p)
63 {
64 ir_sym_node *l = p->child[0];
65 p->child[0] = l->child[1];
66 if (l->child[1]) {
67 l->child[1]->parent = p;
68 }
69 l->parent = p->parent;
70 if (p->parent == NULL) {
71 _symbols = l;
72 } else if (p->parent->child[1] == p) {
73 p->parent->child[1] = l;
74 } else {
75 p->parent->child[0] = l;
76 }
77 l->child[1] = p;
78 p->parent = l;
79 }
80
ir_disasm_add_symbol(const char * name,uint64_t addr,uint64_t size)81 void ir_disasm_add_symbol(const char *name,
82 uint64_t addr,
83 uint64_t size)
84 {
85 ir_sym_node *sym;
86 size_t len = strlen(name);
87
88 sym = ir_mem_pmalloc(sizeof(ir_sym_node) + len + 1);
89 if (!sym) {
90 return;
91 }
92 sym->addr = addr;
93 sym->end = (addr + size - 1);
94 memcpy((char*)&sym->name, name, len + 1);
95 sym->parent = sym->child[0] = sym->child[1] = NULL;
96 sym->info = 1;
97 if (_symbols) {
98 ir_sym_node *node = _symbols;
99
100 /* insert it into rbtree */
101 do {
102 if (sym->addr > node->addr) {
103 IR_ASSERT(sym->addr > (node->end));
104 if (node->child[1]) {
105 node = node->child[1];
106 } else {
107 node->child[1] = sym;
108 sym->parent = node;
109 break;
110 }
111 } else if (sym->addr < node->addr) {
112 if (node->child[0]) {
113 node = node->child[0];
114 } else {
115 node->child[0] = sym;
116 sym->parent = node;
117 break;
118 }
119 } else {
120 IR_ASSERT(sym->addr == node->addr);
121 if (strcmp(name, node->name) == 0 && sym->end < node->end) {
122 /* reduce size of the existing symbol */
123 node->end = sym->end;
124 }
125 ir_mem_pfree(sym);
126 return;
127 }
128 } while (1);
129
130 /* fix rbtree after inserting */
131 while (sym && sym != _symbols && sym->parent->info == 1) {
132 if (sym->parent == sym->parent->parent->child[0]) {
133 node = sym->parent->parent->child[1];
134 if (node && node->info == 1) {
135 sym->parent->info = 0;
136 node->info = 0;
137 sym->parent->parent->info = 1;
138 sym = sym->parent->parent;
139 } else {
140 if (sym == sym->parent->child[1]) {
141 sym = sym->parent;
142 ir_syms_rotateleft(sym);
143 }
144 sym->parent->info = 0;
145 sym->parent->parent->info = 1;
146 ir_syms_rotateright(sym->parent->parent);
147 }
148 } else {
149 node = sym->parent->parent->child[0];
150 if (node && node->info == 1) {
151 sym->parent->info = 0;
152 node->info = 0;
153 sym->parent->parent->info = 1;
154 sym = sym->parent->parent;
155 } else {
156 if (sym == sym->parent->child[0]) {
157 sym = sym->parent;
158 ir_syms_rotateright(sym);
159 }
160 sym->parent->info = 0;
161 sym->parent->parent->info = 1;
162 ir_syms_rotateleft(sym->parent->parent);
163 }
164 }
165 }
166 } else {
167 _symbols = sym;
168 }
169 _symbols->info = 0;
170 }
171
ir_disasm_destroy_symbols(ir_sym_node * n)172 static void ir_disasm_destroy_symbols(ir_sym_node *n)
173 {
174 if (n) {
175 if (n->child[0]) {
176 ir_disasm_destroy_symbols(n->child[0]);
177 }
178 if (n->child[1]) {
179 ir_disasm_destroy_symbols(n->child[1]);
180 }
181 ir_mem_pfree(n);
182 }
183 }
184
ir_disasm_find_symbol(uint64_t addr,int64_t * offset)185 const char* ir_disasm_find_symbol(uint64_t addr, int64_t *offset)
186 {
187 ir_sym_node *node = _symbols;
188 while (node) {
189 if (addr < node->addr) {
190 node = node->child[0];
191 } else if (addr > node->end) {
192 node = node->child[1];
193 } else {
194 *offset = addr - node->addr;
195 return node->name;
196 }
197 }
198 return NULL;
199 }
200
ir_disasm_branch_target(csh cs,const cs_insn * insn)201 static uint64_t ir_disasm_branch_target(csh cs, const cs_insn *insn)
202 {
203 unsigned int i;
204
205 #if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
206 if (cs_insn_group(cs, insn, X86_GRP_JUMP)) {
207 for (i = 0; i < insn->detail->x86.op_count; i++) {
208 if (insn->detail->x86.operands[i].type == X86_OP_IMM) {
209 return insn->detail->x86.operands[i].imm;
210 }
211 }
212 }
213 #elif defined(IR_TARGET_AARCH64)
214 if (cs_insn_group(cs, insn, ARM64_GRP_JUMP)
215 || insn->id == ARM64_INS_BL
216 || insn->id == ARM64_INS_ADR) {
217 for (i = 0; i < insn->detail->arm64.op_count; i++) {
218 if (insn->detail->arm64.operands[i].type == ARM64_OP_IMM)
219 return insn->detail->arm64.operands[i].imm;
220 }
221 }
222 #endif
223
224 return 0;
225 }
226
ir_disasm_rodata_reference(csh cs,const cs_insn * insn)227 static uint64_t ir_disasm_rodata_reference(csh cs, const cs_insn *insn)
228 {
229 #if defined(IR_TARGET_X86)
230 unsigned int i;
231
232 for (i = 0; i < insn->detail->x86.op_count; i++) {
233 if (insn->detail->x86.operands[i].type == X86_OP_MEM
234 && insn->detail->x86.operands[i].mem.base == X86_REG_INVALID
235 && insn->detail->x86.operands[i].mem.segment == X86_REG_INVALID
236 && insn->detail->x86.operands[i].mem.index == X86_REG_INVALID
237 && insn->detail->x86.operands[i].mem.scale == 1) {
238 return (uint32_t)insn->detail->x86.operands[i].mem.disp;
239 }
240 }
241 if (cs_insn_group(cs, insn, X86_GRP_JUMP)) {
242 for (i = 0; i < insn->detail->x86.op_count; i++) {
243 if (insn->detail->x86.operands[i].type == X86_OP_MEM
244 && insn->detail->x86.operands[i].mem.disp) {
245 return (uint32_t)insn->detail->x86.operands[i].mem.disp;
246 }
247 }
248 }
249 if (insn->id == X86_INS_MOV
250 && insn->detail->x86.op_count == 2
251 && insn->detail->x86.operands[0].type == X86_OP_IMM
252 && insn->detail->x86.operands[0].size == sizeof(void*)) {
253 return (uint32_t)insn->detail->x86.operands[0].imm;
254 }
255 #elif defined(IR_TARGET_X64)
256 unsigned int i;
257
258 for (i = 0; i < insn->detail->x86.op_count; i++) {
259 if (insn->detail->x86.operands[i].type == X86_OP_MEM
260 && insn->detail->x86.operands[i].mem.base == X86_REG_RIP
261 && insn->detail->x86.operands[i].mem.segment == X86_REG_INVALID
262 // TODO: support for index and scale
263 && insn->detail->x86.operands[i].mem.index == X86_REG_INVALID
264 && insn->detail->x86.operands[i].mem.scale == 1) {
265 return insn->detail->x86.operands[i].mem.disp + insn->address + insn->size;
266 }
267 }
268 #elif defined(IR_TARGET_AARCH64)
269 unsigned int i;
270
271 if (insn->id == ARM64_INS_ADR
272 || insn->id == ARM64_INS_LDRB
273 || insn->id == ARM64_INS_LDR
274 || insn->id == ARM64_INS_LDRH
275 || insn->id == ARM64_INS_LDRSB
276 || insn->id == ARM64_INS_LDRSH
277 || insn->id == ARM64_INS_LDRSW
278 || insn->id == ARM64_INS_STRB
279 || insn->id == ARM64_INS_STR
280 || insn->id == ARM64_INS_STRH) {
281 for (i = 0; i < insn->detail->arm64.op_count; i++) {
282 if (insn->detail->arm64.operands[i].type == ARM64_OP_IMM)
283 return insn->detail->arm64.operands[i].imm;
284 }
285 }
286 return 0;
287 #endif
288
289 return 0;
290 }
291
ir_disasm_resolver(uint64_t addr,int64_t * offset)292 static const char* ir_disasm_resolver(uint64_t addr,
293 int64_t *offset)
294 {
295 #ifndef _WIN32
296 const char *name;
297 void *a = (void*)(uintptr_t)(addr);
298 Dl_info info;
299
300 name = ir_disasm_find_symbol(addr, offset);
301 if (name) {
302 return name;
303 }
304
305 if (dladdr(a, &info)
306 && info.dli_sname != NULL
307 && info.dli_saddr == a) {
308 *offset = 0;
309 return info.dli_sname;
310 }
311 #else
312 const char *name;
313 name = ir_disasm_find_symbol(addr, offset);
314 if (name) {
315 return name;
316 }
317 #endif
318
319 return NULL;
320 }
321
ir_disasm(const char * name,const void * start,size_t size,bool asm_addr,ir_ctx * ctx,FILE * f)322 int ir_disasm(const char *name,
323 const void *start,
324 size_t size,
325 bool asm_addr,
326 ir_ctx *ctx,
327 FILE *f)
328 {
329 size_t orig_size = size;
330 const void *orig_end = (void *)((char *)start + size);
331 const void *end;
332 ir_hashtab labels;
333 int32_t l, n;
334 uint64_t addr;
335 csh cs;
336 cs_insn *insn;
337 # ifdef HAVE_CAPSTONE_ITER
338 const uint8_t *cs_code;
339 size_t cs_size;
340 uint64_t cs_addr;
341 # else
342 size_t count, i;
343 # endif
344 const char *sym;
345 int64_t offset = 0;
346 char *p, *q, *r;
347 uint32_t rodata_offset = 0;
348 uint32_t jmp_table_offset = 0;
349 ir_hashtab_bucket *b;
350 int32_t entry;
351 cs_err ret;
352
353 # if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
354 # ifdef IR_TARGET_X64
355 ret = cs_open(CS_ARCH_X86, CS_MODE_64, &cs);
356 if (ret != CS_ERR_OK) {
357 fprintf(stderr, "cs_open(CS_ARCH_X86, CS_MODE_64, ...) failed; [%d] %s\n", ret, cs_strerror(ret));
358 return 0;
359 }
360 # else
361 ret = cs_open(CS_ARCH_X86, CS_MODE_32, &cs);
362 if (ret != CS_ERR_OK) {
363 fprintf(stderr, "cs_open(CS_ARCH_X86, CS_MODE_32, ...) failed; [%d] %s\n", ret, cs_strerror(ret));
364 return 0;
365 }
366 # endif
367 cs_option(cs, CS_OPT_DETAIL, CS_OPT_ON);
368 # if DISASM_INTEL_SYNTAX
369 cs_option(cs, CS_OPT_SYNTAX, CS_OPT_SYNTAX_INTEL);
370 # else
371 cs_option(cs, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT);
372 # endif
373 # elif defined(IR_TARGET_AARCH64)
374 ret = cs_open(CS_ARCH_ARM64, CS_MODE_ARM, &cs);
375 if (ret != CS_ERR_OK) {
376 fprintf(stderr, "cs_open(CS_ARCH_ARM64, CS_MODE_ARM, ...) failed; [%d] %s\n", ret, cs_strerror(ret));
377 return 0;
378 }
379 cs_option(cs, CS_OPT_DETAIL, CS_OPT_ON);
380 cs_option(cs, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT);
381 # endif
382
383 if (name) {
384 fprintf(f, "%s:\n", name);
385 }
386
387 ir_hashtab_init(&labels, 32);
388
389 if (ctx) {
390 if (ctx->entries_count) {
391 int i = ctx->entries_count;
392 do {
393 ir_insn *insn = &ctx->ir_base[ctx->entries[--i]];
394 ir_hashtab_add(&labels, insn->op3, insn->op2);
395 } while (i != 0);
396 }
397
398 rodata_offset = ctx->rodata_offset;
399 if (rodata_offset) {
400 if (size > rodata_offset) {
401 size = rodata_offset;
402 }
403 }
404 jmp_table_offset = ctx->jmp_table_offset;
405 if (jmp_table_offset) {
406 uint32_t n;
407 uintptr_t *p;
408
409 IR_ASSERT(orig_size - jmp_table_offset <= 0xffffffff);
410 n = (uint32_t)(orig_size - jmp_table_offset);
411 if (size > jmp_table_offset) {
412 size = jmp_table_offset;
413 }
414 while (n > 0 && IR_ALIGNED_SIZE(n, sizeof(void*)) != n) {
415 jmp_table_offset++;
416 n--;
417 }
418 IR_ASSERT(n > 0 && n % sizeof(void*) == 0 && jmp_table_offset % sizeof(void*) == 0);
419 p = (uintptr_t*)((char*)start + jmp_table_offset);
420 while (n > 0) {
421 if (*p) {
422 if ((uintptr_t)*p >= (uintptr_t)start && (uintptr_t)*p < (uintptr_t)orig_end) {
423 ir_hashtab_add(&labels, (uint32_t)((uintptr_t)*p - (uintptr_t)start), -1);
424 }
425 }
426 p++;
427 n -= sizeof(void*);
428 }
429 }
430 }
431 end = (void *)((char *)start + size);
432
433 # ifdef HAVE_CAPSTONE_ITER
434 cs_code = start;
435 cs_size = (uint8_t*)end - (uint8_t*)start;
436 cs_addr = (uint64_t)(uintptr_t)cs_code;
437 insn = cs_malloc(cs);
438 while (cs_disasm_iter(cs, &cs_code, &cs_size, &cs_addr, insn)) {
439 if ((addr = ir_disasm_branch_target(cs, insn))
440 # else
441 count = cs_disasm(cs, start, (uint8_t*)end - (uint8_t*)start, (uintptr_t)start, 0, &insn);
442 for (i = 0; i < count; i++) {
443 if ((addr = ir_disasm_branch_target(cs, &(insn[i])))
444 # endif
445 && (addr >= (uint64_t)(uintptr_t)start && addr < (uint64_t)(uintptr_t)end)) {
446 ir_hashtab_add(&labels, (uint32_t)((uintptr_t)addr - (uintptr_t)start), -1);
447 # ifdef HAVE_CAPSTONE_ITER
448 } else if ((addr = ir_disasm_rodata_reference(cs, insn))) {
449 # else
450 } else if ((addr = ir_disasm_rodata_reference(cs, &(insn[i])))) {
451 # endif
452 if (addr >= (uint64_t)(uintptr_t)end && addr < (uint64_t)(uintptr_t)orig_end) {
453 ir_hashtab_add(&labels, (uint32_t)((uintptr_t)addr - (uintptr_t)start), -1);
454 }
455 }
456 }
457
458 ir_hashtab_key_sort(&labels);
459
460 /* renumber labels */
461 l = 0;
462 n = labels.count;
463 b = labels.data;
464 while (n > 0) {
465 if (b->val < 0) {
466 b->val = --l;
467 }
468 b++;
469 n--;
470 }
471
472 # ifdef HAVE_CAPSTONE_ITER
473 cs_code = start;
474 cs_size = (uint8_t*)end - (uint8_t*)start;
475 cs_addr = (uint64_t)(uintptr_t)cs_code;
476 while (cs_disasm_iter(cs, &cs_code, &cs_size, &cs_addr, insn)) {
477 entry = ir_hashtab_find(&labels, (uint32_t)((uintptr_t)insn->address - (uintptr_t)start));
478 # else
479 for (i = 0; i < count; i++) {
480 entry = ir_hashtab_find(&labels, (uint32_t)((uintptr_t)insn->address - (uintptr_t)start));
481 # endif
482 if (entry != (ir_ref)IR_INVALID_VAL) {
483 if (entry >= 0) {
484 fprintf(f, ".ENTRY_%d:\n", entry);
485 } else {
486 fprintf(f, ".L%d:\n", -entry);
487 }
488 }
489
490 # ifdef HAVE_CAPSTONE_ITER
491 if (asm_addr) {
492 fprintf(f, " %" PRIx64 ":", insn->address);
493 }
494 p = insn->op_str;
495 #if defined(IR_TARGET_X64) && (CS_API_MAJOR < 5)
496 /* Fix capstone MOVD/MOVQ disassemble mismatch */
497 if (insn->id == X86_INS_MOVQ && strcmp(insn->mnemonic, "movd") == 0) {
498 insn->mnemonic[3] = 'q';
499 }
500 #endif
501 if (strlen(p) == 0) {
502 fprintf(f, "\t%s\n", insn->mnemonic);
503 continue;
504 } else {
505 fprintf(f, "\t%s ", insn->mnemonic);
506 }
507 # else
508 if (asm_addr) {
509 fprintf(f, " %" PRIx64 ":", insn[i].address);
510 }
511 p = insn[i].op_str;
512 if (strlen(p) == 0) {
513 fprintf(f, "\t%s\n", insn[i].mnemonic);
514 continue;
515 } else {
516 fprintf(f, "\t%s ", insn[i].mnemonic);
517 }
518 # endif
519 /* Try to replace the target addresses with a symbols */
520 #if defined(IR_TARGET_X64)
521 # ifdef HAVE_CAPSTONE_ITER
522 if ((addr = ir_disasm_rodata_reference(cs, insn))) {
523 # else
524 if ((addr = ir_disasm_rodata_reference(cs, &(insn[i])))) {
525 # endif
526 if (addr >= (uint64_t)(uintptr_t)end && addr < (uint64_t)(uintptr_t)orig_end) {
527 entry = ir_hashtab_find(&labels, (uint32_t)((uintptr_t)addr - (uintptr_t)start));
528 if (entry != (ir_ref)IR_INVALID_VAL) {
529 r = q = strstr(p, "(%rip)");
530 if (r && r > p) {
531 r--;
532 while (r > p && ((*r >= '0' && *r <= '9') || (*r >= 'a' && *r <= 'f') || (*r >= 'A' && *r <= 'F'))) {
533 r--;
534 }
535 if (r > p && *r == 'x' && *(r - 1) == '0') {
536 r -= 2;
537 }
538 if (r > p) {
539 fwrite(p, 1, r - p, f);
540 }
541 if (entry >= 0) {
542 fprintf(f, ".ENTRY_%d%s\n", entry, q);
543 } else {
544 fprintf(f, ".L%d%s\n", -entry, q);
545 }
546 continue;
547 }
548 }
549 } else if ((sym = ir_disasm_resolver(addr, &offset))) {
550 r = q = strstr(p, "(%rip)");
551 if (r && r > p) {
552 r--;
553 while (r > p && ((*r >= '0' && *r <= '9') || (*r >= 'a' && *r <= 'f') || (*r >= 'A' && *r <= 'F'))) {
554 r--;
555 }
556 if (r > p && *r == 'x' && *(r - 1) == '0') {
557 r -= 2;
558 }
559 if (r > p) {
560 fwrite(p, 1, r - p, f);
561 }
562 fputs(sym, f);
563 if (offset != 0) {
564 if (offset > 0) {
565 fprintf(f, "+0x%" PRIx64, offset);
566 } else {
567 fprintf(f, "-0x%" PRIx64, -offset);
568 }
569 }
570 fprintf(f, "%s\n", q);
571 continue;
572 }
573 }
574 }
575 #endif
576 #if defined(IR_TARGET_AARCH64)
577 while ((q = strstr(p, "#0x")) != NULL) {
578 r = q + 3;
579 #else
580 while ((q = strstr(p, "0x")) != NULL) {
581 r = q + 2;
582 #endif
583 addr = 0;
584 while (1) {
585 if (*r >= '0' && *r <= '9') {
586 addr = addr * 16 + (*r - '0');
587 } else if (*r >= 'A' && *r <= 'F') {
588 addr = addr * 16 + (*r - 'A' + 10);
589 } else if (*r >= 'a' && *r <= 'f') {
590 addr = addr * 16 + (*r - 'a' + 10);
591 } else {
592 break;
593 }
594 r++;
595 }
596 if (p != q && *(q-1) == '-') {
597 q--;
598 addr = (uint32_t)(-(int64_t)addr);
599 }
600 if (addr >= (uint64_t)(uintptr_t)start && addr < (uint64_t)(uintptr_t)orig_end) {
601 entry = ir_hashtab_find(&labels, (uint32_t)((uintptr_t)addr - (uintptr_t)start));
602 if (entry != (ir_ref)IR_INVALID_VAL) {
603 fwrite(p, 1, q - p, f);
604 if (entry >= 0) {
605 fprintf(f, ".ENTRY_%d", entry);
606 } else {
607 fprintf(f, ".L%d", -entry);
608 }
609 } else if (r > p) {
610 fwrite(p, 1, r - p, f);
611 }
612 } else if ((sym = ir_disasm_resolver(addr, &offset))) {
613 #if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
614 if (offset && p != q && *(q-1) == '$') {
615 if (r > p) {
616 fwrite(p, 1, r - p, f);
617 }
618 p = r;
619 continue;
620 }
621 #endif
622 if (q > p) {
623 fwrite(p, 1, q - p, f);
624 }
625 fputs(sym, f);
626 if (offset != 0) {
627 if (offset > 0) {
628 fprintf(f, "+0x%" PRIx64, offset);
629 } else {
630 fprintf(f, "-0x%" PRIx64, -offset);
631 }
632 }
633 } else if (r > p) {
634 fwrite(p, 1, r - p, f);
635 }
636 p = r;
637 }
638 fprintf(f, "%s\n", p);
639 }
640 # ifdef HAVE_CAPSTONE_ITER
641 cs_free(insn, 1);
642 # else
643 cs_free(insn, count);
644 # endif
645
646 if (rodata_offset || jmp_table_offset) {
647 fprintf(f, ".rodata\n");
648 }
649 if (rodata_offset) {
650 const unsigned char *p = (unsigned char*)start + rodata_offset;
651 uint32_t n = jmp_table_offset ?
652 (uint32_t)(jmp_table_offset - rodata_offset) :
653 (uint32_t)(orig_size - rodata_offset);
654 uint32_t j;
655
656 while (n > 0) {
657 entry = ir_hashtab_find(&labels, (uint32_t)((uintptr_t)p - (uintptr_t)start));
658 if (entry != (ir_ref)IR_INVALID_VAL) {
659 if (entry >= 0) {
660 fprintf(f, ".ENTRY_%d:\n", entry);
661 } else {
662 fprintf(f, ".L%d:\n", -entry);
663 }
664 }
665 fprintf(f, "\t.db 0x%02x", (int)*p);
666 p++;
667 n--;
668 j = 15;
669 while (n > 0 && j > 0) {
670 entry = ir_hashtab_find(&labels, (uint32_t)((uintptr_t)p - (uintptr_t)start));
671 if (entry != (ir_ref)IR_INVALID_VAL) {
672 break;
673 }
674 fprintf(f, ", 0x%02x", (int)*p);
675 p++;
676 n--;
677 j--;
678 }
679 fprintf(f, "\n");
680 }
681 }
682 if (jmp_table_offset) {
683 uintptr_t *p = (uintptr_t*)(unsigned char*)start + jmp_table_offset;
684 uint32_t n = (uint32_t)(orig_size - jmp_table_offset);
685
686 fprintf(f, ".align %d\n", (int)sizeof(void*));
687
688 p = (uintptr_t*)((char*)start + jmp_table_offset);
689 while (n > 0) {
690 entry = ir_hashtab_find(&labels, (uint32_t)((uintptr_t)p - (uintptr_t)start));
691 if (entry != (ir_ref)IR_INVALID_VAL) {
692 if (entry >= 0) {
693 fprintf(f, ".ENTRY_%d:\n", entry);
694 } else {
695 fprintf(f, ".L%d:\n", -entry);
696 }
697 }
698 if (*p) {
699 if ((uintptr_t)*p >= (uintptr_t)start && (uintptr_t)*p < (uintptr_t)orig_end) {
700 entry = ir_hashtab_find(&labels, (uint32_t)(*p - (uintptr_t)start));
701 IR_ASSERT(entry != (ir_ref)IR_INVALID_VAL);
702 if (entry >= 0) {
703 if (sizeof(void*) == 8) {
704 fprintf(f, "\t.qword .ENTRY_%d\n", entry);
705 } else {
706 fprintf(f, "\t.dword .ENTRY_%d\n", entry);
707 }
708 } else {
709 if (sizeof(void*) == 8) {
710 fprintf(f, "\t.qword .L%d\n", -entry);
711 } else {
712 fprintf(f, "\t.dword .L%d\n", -entry);
713 }
714 }
715 } else {
716 int64_t offset;
717 const char *name = ir_disasm_find_symbol(*p, &offset);
718
719 if (name && offset == 0) {
720 if (sizeof(void*) == 8) {
721 fprintf(f, "\t.qword %s\n", name);
722 } else {
723 fprintf(f, "\t.dword %s\n", name);
724 }
725 } else {
726 if (sizeof(void*) == 8) {
727 fprintf(f, "\t.qword 0x%0llx\n", (long long)*p);
728 } else {
729 fprintf(f, "\t.dword 0x%0x\n", (int)*p);
730 }
731 }
732 }
733 } else {
734 if (sizeof(void*) == 8) {
735 fprintf(f, "\t.qword 0\n");
736 } else {
737 fprintf(f, "\t.dword 0\n");
738 }
739 }
740 p++;
741 n -= sizeof(void*);
742 }
743 }
744
745 fprintf(f, "\n");
746
747 ir_hashtab_free(&labels);
748
749 cs_close(&cs);
750
751 return 1;
752 }
753
754 #ifndef _WIN32
755 static void* ir_elf_read_sect(int fd, ir_elf_sectheader *sect)
756 {
757 void *s = ir_mem_malloc(sect->size);
758
759 if (lseek(fd, sect->ofs, SEEK_SET) < 0) {
760 ir_mem_free(s);
761 return NULL;
762 }
763 if (read(fd, s, sect->size) != (ssize_t)sect->size) {
764 ir_mem_free(s);
765 return NULL;
766 }
767
768 return s;
769 }
770
771 static void ir_elf_load_symbols(void)
772 {
773 ir_elf_header hdr;
774 ir_elf_sectheader sect;
775 int i;
776 #if defined(__linux__)
777 int fd = open("/proc/self/exe", O_RDONLY);
778 #elif defined(__NetBSD__)
779 int fd = open("/proc/curproc/exe", O_RDONLY);
780 #elif defined(__FreeBSD__) || defined(__DragonFly__)
781 char path[PATH_MAX];
782 size_t pathlen = sizeof(path);
783 int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1};
784 if (sysctl(mib, 4, path, &pathlen, NULL, 0) == -1) {
785 return;
786 }
787 int fd = open(path, O_RDONLY);
788 #elif defined(__sun)
789 int fd = open("/proc/self/path/a.out", O_RDONLY);
790 #elif defined(__HAIKU__)
791 char path[PATH_MAX];
792 if (find_path(B_APP_IMAGE_SYMBOL, B_FIND_PATH_IMAGE_PATH,
793 NULL, path, sizeof(path)) != B_OK) {
794 return;
795 }
796
797 int fd = open(path, O_RDONLY);
798 #else
799 // To complete eventually for other ELF platforms.
800 // Otherwise APPLE is Mach-O
801 int fd = -1;
802 #endif
803
804 if (fd >= 0) {
805 if (read(fd, &hdr, sizeof(hdr)) == sizeof(hdr)
806 && hdr.emagic[0] == '\177'
807 && hdr.emagic[1] == 'E'
808 && hdr.emagic[2] == 'L'
809 && hdr.emagic[3] == 'F'
810 && lseek(fd, hdr.shofs, SEEK_SET) >= 0) {
811 for (i = 0; i < hdr.shnum; i++) {
812 if (read(fd, §, sizeof(sect)) == sizeof(sect)
813 && sect.type == ELFSECT_TYPE_SYMTAB) {
814 uint32_t n, count = sect.size / sizeof(ir_elf_symbol);
815 ir_elf_symbol *syms = ir_elf_read_sect(fd, §);
816 char *str_tbl;
817
818 if (syms) {
819 if (lseek(fd, hdr.shofs + sect.link * sizeof(sect), SEEK_SET) >= 0
820 && read(fd, §, sizeof(sect)) == sizeof(sect)
821 && (str_tbl = (char*)ir_elf_read_sect(fd, §)) != NULL) {
822 for (n = 0; n < count; n++) {
823 if (syms[n].name
824 && (ELFSYM_TYPE(syms[n].info) == ELFSYM_TYPE_FUNC
825 /*|| ELFSYM_TYPE(syms[n].info) == ELFSYM_TYPE_DATA*/)
826 && (ELFSYM_BIND(syms[n].info) == ELFSYM_BIND_LOCAL
827 /*|| ELFSYM_BIND(syms[n].info) == ELFSYM_BIND_GLOBAL*/)) {
828 ir_disasm_add_symbol(str_tbl + syms[n].name, syms[n].value, syms[n].size);
829 }
830 }
831 ir_mem_free(str_tbl);
832 }
833 ir_mem_free(syms);
834 }
835 if (lseek(fd, hdr.shofs + (i + 1) * sizeof(sect), SEEK_SET) < 0) {
836 break;
837 }
838 }
839 }
840 }
841 close(fd);
842 }
843 }
844 #endif
845
846 int ir_disasm_init(void)
847 {
848 #ifndef _WIN32
849 ir_elf_load_symbols();
850 #endif
851 return 1;
852 }
853
854 void ir_disasm_free(void)
855 {
856 if (_symbols) {
857 ir_disasm_destroy_symbols(_symbols);
858 _symbols = NULL;
859 }
860 }
861