xref: /php-src/ext/opcache/jit/ir/ir_disasm.c (revision 7b25cac3)
1 /*
2  * IR - Lightweight JIT Compilation Framework
3  * (Disassembler based on libcapstone)
4  * Copyright (C) 2022 Zend by Perforce.
5  * Authors: Dmitry Stogov <dmitry@php.net>
6  */
7 
8 #ifndef _GNU_SOURCE
9 # define _GNU_SOURCE
10 #endif
11 
12 #ifndef _WIN32
13 # include <dlfcn.h>
14 # include <unistd.h>
15 # include <fcntl.h>
16 # include <limits.h>
17 # if defined(__FreeBSD__) || defined(__DragonFly__)
18 #  include <sys/sysctl.h>
19 # endif
20 #endif
21 
22 #include "ir.h"
23 #include "ir_private.h"
24 
25 #ifndef _WIN32
26 # include "ir_elf.h"
27 #endif
28 
29 #include <capstone/capstone.h>
30 #define HAVE_CAPSTONE_ITER
31 
32 #ifndef IR_DISASM_INTEL_SYNTAX
33 # define IR_DISASM_INTEL_SYNTAX 0
34 #endif
35 
36 typedef struct _ir_sym_node {
37 	uint64_t             addr;
38 	uint64_t             end;
39 	struct _ir_sym_node *parent;
40 	struct _ir_sym_node *child[2];
41 	unsigned char        info;
42 	char                 name[1];
43 } ir_sym_node;
44 
45 static ir_sym_node *_symbols = NULL;
46 
ir_syms_rotateleft(ir_sym_node * p)47 static void ir_syms_rotateleft(ir_sym_node *p)
48 {
49 	ir_sym_node *r = p->child[1];
50 	p->child[1] = r->child[0];
51 	if (r->child[0]) {
52 		r->child[0]->parent = p;
53 	}
54 	r->parent = p->parent;
55 	if (p->parent == NULL) {
56 		_symbols = r;
57 	} else if (p->parent->child[0] == p) {
58 		p->parent->child[0] = r;
59 	} else {
60 		p->parent->child[1] = r;
61 	}
62 	r->child[0] = p;
63 	p->parent = r;
64 }
65 
ir_syms_rotateright(ir_sym_node * p)66 static void ir_syms_rotateright(ir_sym_node *p)
67 {
68 	ir_sym_node *l = p->child[0];
69 	p->child[0] = l->child[1];
70 	if (l->child[1]) {
71 		l->child[1]->parent = p;
72 	}
73 	l->parent = p->parent;
74 	if (p->parent == NULL) {
75 		_symbols = l;
76 	} else if (p->parent->child[1] == p) {
77 		p->parent->child[1] = l;
78 	} else {
79 		p->parent->child[0] = l;
80 	}
81 	l->child[1] = p;
82 	p->parent = l;
83 }
84 
ir_disasm_add_symbol(const char * name,uint64_t addr,uint64_t size)85 void ir_disasm_add_symbol(const char *name,
86                           uint64_t    addr,
87                           uint64_t    size)
88 {
89 	ir_sym_node *sym;
90 	size_t len = strlen(name);
91 
92 	sym = ir_mem_pmalloc(sizeof(ir_sym_node) + len + 1);
93 	if (!sym) {
94 		return;
95 	}
96 	sym->addr = addr;
97 	sym->end  = (addr + size - 1);
98 	memcpy((char*)&sym->name, name, len + 1);
99 	sym->parent = sym->child[0] = sym->child[1] = NULL;
100 	sym->info = 1;
101 	if (_symbols) {
102 		ir_sym_node *node = _symbols;
103 
104 		/* insert it into rbtree */
105 		do {
106 			if (sym->addr > node->addr) {
107 				IR_ASSERT(sym->addr > (node->end));
108 				if (node->child[1]) {
109 					node = node->child[1];
110 				} else {
111 					node->child[1] = sym;
112 					sym->parent = node;
113 					break;
114 				}
115 			} else if (sym->addr < node->addr) {
116 				if (node->child[0]) {
117 					node = node->child[0];
118 				} else {
119 					node->child[0] = sym;
120 					sym->parent = node;
121 					break;
122 				}
123 			} else {
124 				IR_ASSERT(sym->addr == node->addr);
125 				if (strcmp(name, node->name) == 0 && sym->end < node->end) {
126 					/* reduce size of the existing symbol */
127 					node->end = sym->end;
128 				}
129 				ir_mem_pfree(sym);
130 				return;
131 			}
132 		} while (1);
133 
134 		/* fix rbtree after inserting */
135 		while (sym && sym != _symbols && sym->parent->info == 1) {
136 			if (sym->parent == sym->parent->parent->child[0]) {
137 				node = sym->parent->parent->child[1];
138 				if (node && node->info == 1) {
139 					sym->parent->info = 0;
140 					node->info = 0;
141 					sym->parent->parent->info = 1;
142 					sym = sym->parent->parent;
143 				} else {
144 					if (sym == sym->parent->child[1]) {
145 						sym = sym->parent;
146 						ir_syms_rotateleft(sym);
147 					}
148 					sym->parent->info = 0;
149 					sym->parent->parent->info = 1;
150 					ir_syms_rotateright(sym->parent->parent);
151 				}
152 			} else {
153 				node = sym->parent->parent->child[0];
154 				if (node && node->info == 1) {
155 					sym->parent->info = 0;
156 					node->info = 0;
157 					sym->parent->parent->info = 1;
158 					sym = sym->parent->parent;
159 				} else {
160 					if (sym == sym->parent->child[0]) {
161 						sym = sym->parent;
162 						ir_syms_rotateright(sym);
163 					}
164 					sym->parent->info = 0;
165 					sym->parent->parent->info = 1;
166 					ir_syms_rotateleft(sym->parent->parent);
167 				}
168 			}
169 		}
170 	} else {
171 		_symbols = sym;
172 	}
173 	_symbols->info = 0;
174 }
175 
ir_disasm_destroy_symbols(ir_sym_node * n)176 static void ir_disasm_destroy_symbols(ir_sym_node *n)
177 {
178 	if (n) {
179 		if (n->child[0]) {
180 			ir_disasm_destroy_symbols(n->child[0]);
181 		}
182 		if (n->child[1]) {
183 			ir_disasm_destroy_symbols(n->child[1]);
184 		}
185 		ir_mem_pfree(n);
186 	}
187 }
188 
ir_disasm_find_symbol(uint64_t addr,int64_t * offset)189 const char* ir_disasm_find_symbol(uint64_t addr, int64_t *offset)
190 {
191 	ir_sym_node *node = _symbols;
192 	while (node) {
193 		if (addr < node->addr) {
194 			node = node->child[0];
195 		} else if (addr > node->end) {
196 			node = node->child[1];
197 		} else {
198 			*offset = addr - node->addr;
199 			return node->name;
200 		}
201 	}
202 	return NULL;
203 }
204 
ir_disasm_branch_target(csh cs,const cs_insn * insn)205 static uint64_t ir_disasm_branch_target(csh cs, const cs_insn *insn)
206 {
207 	unsigned int i;
208 
209 #if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
210 	if (cs_insn_group(cs, insn, X86_GRP_JUMP)) {
211 		for (i = 0; i < insn->detail->x86.op_count; i++) {
212 			if (insn->detail->x86.operands[i].type == X86_OP_IMM) {
213 				return insn->detail->x86.operands[i].imm;
214 			}
215 		}
216 	}
217 #elif defined(IR_TARGET_AARCH64)
218 	if (cs_insn_group(cs, insn, ARM64_GRP_JUMP)
219 	 || insn->id == ARM64_INS_BL
220 	 || insn->id == ARM64_INS_ADR) {
221 		for (i = 0; i < insn->detail->arm64.op_count; i++) {
222 			if (insn->detail->arm64.operands[i].type == ARM64_OP_IMM)
223 				return insn->detail->arm64.operands[i].imm;
224 		}
225 	}
226 #endif
227 
228 	return 0;
229 }
230 
ir_disasm_rodata_reference(csh cs,const cs_insn * insn)231 static uint64_t ir_disasm_rodata_reference(csh cs, const cs_insn *insn)
232 {
233 #if defined(IR_TARGET_X86)
234 	unsigned int i;
235 
236 	for (i = 0; i < insn->detail->x86.op_count; i++) {
237 		if (insn->detail->x86.operands[i].type == X86_OP_MEM
238 		 && insn->detail->x86.operands[i].mem.base == X86_REG_INVALID
239 		 && insn->detail->x86.operands[i].mem.segment == X86_REG_INVALID
240 		 && insn->detail->x86.operands[i].mem.index == X86_REG_INVALID
241 		 && insn->detail->x86.operands[i].mem.scale == 1) {
242 			return (uint32_t)insn->detail->x86.operands[i].mem.disp;
243 		}
244 	}
245 	if (cs_insn_group(cs, insn, X86_GRP_JUMP)) {
246 		for (i = 0; i < insn->detail->x86.op_count; i++) {
247 			if (insn->detail->x86.operands[i].type == X86_OP_MEM
248 			 && insn->detail->x86.operands[i].mem.disp) {
249 				return (uint32_t)insn->detail->x86.operands[i].mem.disp;
250 			}
251 		}
252 	}
253 	if (insn->id == X86_INS_MOV
254 	 && insn->detail->x86.op_count == 2
255 	 && insn->detail->x86.operands[0].type == X86_OP_IMM
256 	 && insn->detail->x86.operands[0].size == sizeof(void*)) {
257 		return (uint32_t)insn->detail->x86.operands[0].imm;
258 	}
259 #elif defined(IR_TARGET_X64)
260 	unsigned int i;
261 
262 	for (i = 0; i < insn->detail->x86.op_count; i++) {
263 		if (insn->detail->x86.operands[i].type == X86_OP_MEM
264 		 && insn->detail->x86.operands[i].mem.base == X86_REG_RIP
265 		 && insn->detail->x86.operands[i].mem.segment == X86_REG_INVALID
266 		  // TODO: support for index and scale
267 		 && insn->detail->x86.operands[i].mem.index == X86_REG_INVALID
268 		 && insn->detail->x86.operands[i].mem.scale == 1) {
269 			return insn->detail->x86.operands[i].mem.disp + insn->address + insn->size;
270 		}
271 	}
272 #elif defined(IR_TARGET_AARCH64)
273 	unsigned int i;
274 
275 	if (insn->id == ARM64_INS_ADR
276 	 || insn->id == ARM64_INS_LDRB
277 	 || insn->id == ARM64_INS_LDR
278 	 || insn->id == ARM64_INS_LDRH
279 	 || insn->id == ARM64_INS_LDRSB
280 	 || insn->id == ARM64_INS_LDRSH
281 	 || insn->id == ARM64_INS_LDRSW
282 	 || insn->id == ARM64_INS_STRB
283 	 || insn->id == ARM64_INS_STR
284 	 || insn->id == ARM64_INS_STRH) {
285 		for (i = 0; i < insn->detail->arm64.op_count; i++) {
286 			if (insn->detail->arm64.operands[i].type == ARM64_OP_IMM)
287 				return insn->detail->arm64.operands[i].imm;
288 		}
289 	}
290 	return 0;
291 #endif
292 
293 	return 0;
294 }
295 
ir_disasm_resolver(uint64_t addr,int64_t * offset)296 static const char* ir_disasm_resolver(uint64_t   addr,
297                                       int64_t   *offset)
298 {
299 #ifndef _WIN32
300 	const char *name;
301 	void *a = (void*)(uintptr_t)(addr);
302 	Dl_info info;
303 
304 	name = ir_disasm_find_symbol(addr, offset);
305 	if (name) {
306 		return name;
307 	}
308 
309 	if (dladdr(a, &info)
310 	 && info.dli_sname != NULL
311 	 && info.dli_saddr == a) {
312 		*offset = 0;
313 		return info.dli_sname;
314 	}
315 #else
316 	const char *name;
317 	name = ir_disasm_find_symbol(addr, offset);
318 	if (name) {
319 		return name;
320 	}
321 #endif
322 
323 	return NULL;
324 }
325 
ir_disasm(const char * name,const void * start,size_t size,bool asm_addr,ir_ctx * ctx,FILE * f)326 int ir_disasm(const char    *name,
327               const void    *start,
328               size_t         size,
329               bool           asm_addr,
330               ir_ctx        *ctx,
331               FILE          *f)
332 {
333 	size_t orig_size = size;
334 	const void *orig_end = (void *)((char *)start + size);
335 	const void *end;
336 	ir_hashtab labels;
337 	int32_t l, n;
338 	uint64_t addr;
339 	csh cs;
340 	cs_insn *insn;
341 # ifdef HAVE_CAPSTONE_ITER
342 	const uint8_t *cs_code;
343 	size_t cs_size;
344 	uint64_t cs_addr;
345 # else
346 	size_t count, i;
347 # endif
348 	const char *sym;
349 	int64_t offset = 0;
350 	char *p, *q, *r;
351 	uint32_t rodata_offset = 0;
352 	uint32_t jmp_table_offset = 0;
353 	ir_hashtab_bucket *b;
354 	int32_t entry;
355 	cs_err ret;
356 
357 # if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
358 #  ifdef IR_TARGET_X64
359 	ret = cs_open(CS_ARCH_X86, CS_MODE_64, &cs);
360 	if (ret != CS_ERR_OK) {
361 		fprintf(stderr, "cs_open(CS_ARCH_X86, CS_MODE_64, ...) failed; [%d] %s\n", ret, cs_strerror(ret));
362 		return 0;
363 	}
364 #  else
365 	ret = cs_open(CS_ARCH_X86, CS_MODE_32, &cs);
366 	if (ret != CS_ERR_OK) {
367 		fprintf(stderr, "cs_open(CS_ARCH_X86, CS_MODE_32, ...) failed; [%d] %s\n", ret, cs_strerror(ret));
368 		return 0;
369 	}
370 #  endif
371 	cs_option(cs, CS_OPT_DETAIL, CS_OPT_ON);
372 #  if IR_DISASM_INTEL_SYNTAX
373 	cs_option(cs, CS_OPT_SYNTAX, CS_OPT_SYNTAX_INTEL);
374 #  else
375 	cs_option(cs, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT);
376 #  endif
377 # elif defined(IR_TARGET_AARCH64)
378 	ret = cs_open(CS_ARCH_ARM64, CS_MODE_ARM, &cs);
379 	if (ret != CS_ERR_OK) {
380 		fprintf(stderr, "cs_open(CS_ARCH_ARM64, CS_MODE_ARM, ...) failed; [%d] %s\n", ret, cs_strerror(ret));
381 		return 0;
382 	}
383 	cs_option(cs, CS_OPT_DETAIL, CS_OPT_ON);
384 	cs_option(cs, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT);
385 # endif
386 
387 	if (name) {
388 		fprintf(f, "%s:\n", name);
389 	}
390 
391 	ir_hashtab_init(&labels, 32);
392 
393 	if (ctx) {
394 		if (ctx->entries_count) {
395 			int i = ctx->entries_count;
396 			do {
397 				ir_insn *insn = &ctx->ir_base[ctx->entries[--i]];
398 				ir_hashtab_add(&labels, insn->op3, insn->op2);
399 			} while (i != 0);
400 		}
401 
402 		rodata_offset = ctx->rodata_offset;
403 		if (rodata_offset) {
404 			if (size > rodata_offset) {
405 				size = rodata_offset;
406 			}
407 		}
408 		jmp_table_offset = ctx->jmp_table_offset;
409 		if (jmp_table_offset) {
410 			uint32_t n;
411 			uintptr_t *p;
412 
413 			IR_ASSERT(orig_size - jmp_table_offset <= 0xffffffff);
414 			n = (uint32_t)(orig_size - jmp_table_offset);
415 			if (size > jmp_table_offset) {
416 				size = jmp_table_offset;
417 			}
418 			while (n > 0 && IR_ALIGNED_SIZE(n, sizeof(void*)) != n) {
419 				jmp_table_offset++;
420 				n--;
421 			}
422 			IR_ASSERT(n > 0 && n % sizeof(void*) == 0 && jmp_table_offset % sizeof(void*) == 0);
423 			p = (uintptr_t*)((char*)start + jmp_table_offset);
424 			while (n > 0) {
425 				if (*p) {
426 					if ((uintptr_t)*p >= (uintptr_t)start && (uintptr_t)*p < (uintptr_t)orig_end) {
427 						ir_hashtab_add(&labels, (uint32_t)((uintptr_t)*p - (uintptr_t)start), -1);
428 					}
429 				}
430 				p++;
431 				n -= sizeof(void*);
432 			}
433 		}
434 	}
435 	end = (void *)((char *)start + size);
436 
437 # ifdef HAVE_CAPSTONE_ITER
438 	cs_code = start;
439 	cs_size = (uint8_t*)end - (uint8_t*)start;
440 	cs_addr = (uint64_t)(uintptr_t)cs_code;
441 	insn = cs_malloc(cs);
442 	while (cs_disasm_iter(cs, &cs_code, &cs_size, &cs_addr, insn)) {
443 		if ((addr = ir_disasm_branch_target(cs, insn))
444 # else
445 	count = cs_disasm(cs, start, (uint8_t*)end - (uint8_t*)start, (uintptr_t)start, 0, &insn);
446 	for (i = 0; i < count; i++) {
447 		if ((addr = ir_disasm_branch_target(cs, &(insn[i])))
448 # endif
449 		 && (addr >= (uint64_t)(uintptr_t)start && addr < (uint64_t)(uintptr_t)end)) {
450 			ir_hashtab_add(&labels, (uint32_t)((uintptr_t)addr - (uintptr_t)start), -1);
451 # ifdef HAVE_CAPSTONE_ITER
452 		} else if ((addr = ir_disasm_rodata_reference(cs, insn))) {
453 # else
454 		} else if ((addr = ir_disasm_rodata_reference(cs, &(insn[i])))) {
455 # endif
456 			if (addr >= (uint64_t)(uintptr_t)end && addr < (uint64_t)(uintptr_t)orig_end) {
457 				ir_hashtab_add(&labels, (uint32_t)((uintptr_t)addr - (uintptr_t)start), -1);
458 			}
459 		}
460 	}
461 
462 	ir_hashtab_key_sort(&labels);
463 
464 	/* renumber labels */
465 	l = 0;
466 	n = labels.count;
467 	b = labels.data;
468 	while (n > 0) {
469 		if (b->val < 0) {
470 			b->val = --l;
471 		}
472 		b++;
473 		n--;
474 	}
475 
476 # ifdef HAVE_CAPSTONE_ITER
477 	cs_code = start;
478 	cs_size = (uint8_t*)end - (uint8_t*)start;
479 	cs_addr = (uint64_t)(uintptr_t)cs_code;
480 	while (cs_disasm_iter(cs, &cs_code, &cs_size, &cs_addr, insn)) {
481 		entry = ir_hashtab_find(&labels, (uint32_t)((uintptr_t)insn->address - (uintptr_t)start));
482 # else
483 	for (i = 0; i < count; i++) {
484 		entry = ir_hashtab_find(&labels, (uint32_t)((uintptr_t)insn->address - (uintptr_t)start));
485 # endif
486 		if (entry != (ir_ref)IR_INVALID_VAL) {
487 			if (entry >= 0) {
488 				fprintf(f, ".ENTRY_%d:\n", entry);
489 			} else {
490 				fprintf(f, ".L%d:\n", -entry);
491 			}
492 		}
493 
494 # ifdef HAVE_CAPSTONE_ITER
495 		if (asm_addr) {
496 			fprintf(f, "    %" PRIx64 ":", insn->address);
497 		}
498 		p = insn->op_str;
499 #if defined(IR_TARGET_X64) && (CS_API_MAJOR < 5)
500 		/* Fix capstone MOVD/MOVQ disassemble mismatch */
501 		if (insn->id == X86_INS_MOVQ && strcmp(insn->mnemonic, "movd") == 0) {
502 			insn->mnemonic[3] = 'q';
503 		}
504 #endif
505 		if (strlen(p) == 0) {
506 			fprintf(f, "\t%s\n", insn->mnemonic);
507 			continue;
508 		} else  {
509 			fprintf(f, "\t%s ", insn->mnemonic);
510 		}
511 # else
512 		if (asm_addr) {
513 			fprintf(f, "    %" PRIx64 ":", insn[i].address);
514 		}
515 		p = insn[i].op_str;
516 		if (strlen(p) == 0) {
517 			fprintf(f, "\t%s\n", insn[i].mnemonic);
518 			continue;
519 		} else {
520 			fprintf(f, "\t%s ", insn[i].mnemonic);
521 		}
522 # endif
523 		/* Try to replace the target addresses with a symbols */
524 #if defined(IR_TARGET_X64)
525 # ifdef HAVE_CAPSTONE_ITER
526 		if ((addr = ir_disasm_rodata_reference(cs, insn))) {
527 # else
528 		if ((addr = ir_disasm_rodata_reference(cs, &(insn[i])))) {
529 # endif
530 			if (addr >= (uint64_t)(uintptr_t)end && addr < (uint64_t)(uintptr_t)orig_end) {
531 				entry = ir_hashtab_find(&labels, (uint32_t)((uintptr_t)addr - (uintptr_t)start));
532 				if (entry != (ir_ref)IR_INVALID_VAL) {
533 					r = q = strstr(p, "(%rip)");
534 					if (r && r > p) {
535 						r--;
536 						while (r > p && ((*r >= '0' && *r <= '9') || (*r >= 'a' && *r <= 'f') || (*r >= 'A' && *r <= 'F'))) {
537 							r--;
538 						}
539 						if (r > p && *r == 'x' && *(r - 1) == '0') {
540 							r -= 2;
541 						}
542 						if (r > p) {
543 							fwrite(p, 1, r - p, f);
544 						}
545 						if (entry >= 0) {
546 							fprintf(f, ".ENTRY_%d%s\n", entry, q);
547 						} else {
548 							fprintf(f, ".L%d%s\n", -entry, q);
549 						}
550 						continue;
551 					}
552 				}
553 			} else if ((sym = ir_disasm_resolver(addr, &offset))) {
554 				r = q = strstr(p, "(%rip)");
555 				if (r && r > p) {
556 					r--;
557 					while (r > p && ((*r >= '0' && *r <= '9') || (*r >= 'a' && *r <= 'f') || (*r >= 'A' && *r <= 'F'))) {
558 						r--;
559 					}
560 					if (r > p && *r == 'x' && *(r - 1) == '0') {
561 						r -= 2;
562 					}
563 					if (r > p) {
564 						fwrite(p, 1, r - p, f);
565 					}
566 					fputs(sym, f);
567 					if (offset != 0) {
568 						if (offset > 0) {
569 							fprintf(f, "+0x%" PRIx64, offset);
570 						} else {
571 							fprintf(f, "-0x%" PRIx64, -offset);
572 						}
573 					}
574 					fprintf(f, "%s\n", q);
575 					continue;
576 				}
577 			}
578 		}
579 #endif
580 #if defined(IR_TARGET_AARCH64)
581 		while ((q = strstr(p, "#0x")) != NULL) {
582 				r = q + 3;
583 #else
584 		while ((q = strstr(p, "0x")) != NULL) {
585 				r = q + 2;
586 #endif
587 			addr = 0;
588 			while (1) {
589 				if (*r >= '0' && *r <= '9') {
590 					addr = addr * 16 + (*r - '0');
591 				} else if (*r >= 'A' && *r <= 'F') {
592 					addr = addr * 16 + (*r - 'A' + 10);
593 				} else if (*r >= 'a' && *r <= 'f') {
594 					addr = addr * 16 + (*r - 'a' + 10);
595 				} else {
596 					break;
597 				}
598 				r++;
599 			}
600 			if (p != q && *(q-1) == '-') {
601 				q--;
602 				addr = (uint32_t)(-(int64_t)addr);
603 			}
604 			if (addr >= (uint64_t)(uintptr_t)start && addr < (uint64_t)(uintptr_t)orig_end) {
605 				entry = ir_hashtab_find(&labels, (uint32_t)((uintptr_t)addr - (uintptr_t)start));
606 				if (entry != (ir_ref)IR_INVALID_VAL) {
607 					fwrite(p, 1, q - p, f);
608 					if (entry >= 0) {
609 						fprintf(f, ".ENTRY_%d", entry);
610 					} else {
611 						fprintf(f, ".L%d", -entry);
612 					}
613 				} else if (r > p) {
614 					fwrite(p, 1, r - p, f);
615 				}
616 			} else if ((sym = ir_disasm_resolver(addr, &offset))) {
617 #if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
618 				if (offset && p != q && *(q-1) == '$') {
619 					if (r > p) {
620 						fwrite(p, 1, r - p, f);
621 					}
622 					p = r;
623 					continue;
624 				}
625 #endif
626 				if (q > p) {
627 					fwrite(p, 1, q - p, f);
628 				}
629 				fputs(sym, f);
630 				if (offset != 0) {
631 					if (offset > 0) {
632 						fprintf(f, "+0x%" PRIx64, offset);
633 					} else {
634 						fprintf(f, "-0x%" PRIx64, -offset);
635 					}
636 				}
637 			} else if (r > p) {
638 				fwrite(p, 1, r - p, f);
639 			}
640 			p = r;
641 		}
642 		fprintf(f, "%s\n", p);
643 	}
644 # ifdef HAVE_CAPSTONE_ITER
645 	cs_free(insn, 1);
646 # else
647 	cs_free(insn, count);
648 # endif
649 
650 	if (rodata_offset || jmp_table_offset) {
651 		fprintf(f, ".rodata\n");
652 	}
653 	if (rodata_offset) {
654 		const unsigned char *p = (unsigned char*)start + rodata_offset;
655 		uint32_t n = jmp_table_offset ?
656 			(uint32_t)(jmp_table_offset - rodata_offset) :
657 			(uint32_t)(orig_size - rodata_offset);
658 		uint32_t j;
659 
660 		while (n > 0) {
661 			entry = ir_hashtab_find(&labels, (uint32_t)((uintptr_t)p - (uintptr_t)start));
662 			if (entry != (ir_ref)IR_INVALID_VAL) {
663 				if (entry >= 0) {
664 					fprintf(f, ".ENTRY_%d:\n", entry);
665 				} else {
666 					fprintf(f, ".L%d:\n", -entry);
667 				}
668 			}
669 			fprintf(f, "\t.db 0x%02x", (int)*p);
670 			p++;
671 			n--;
672 			j = 15;
673 			while (n > 0 && j > 0) {
674 				entry = ir_hashtab_find(&labels, (uint32_t)((uintptr_t)p - (uintptr_t)start));
675 				if (entry != (ir_ref)IR_INVALID_VAL) {
676 					break;
677 				}
678 				fprintf(f, ", 0x%02x", (int)*p);
679 				p++;
680 				n--;
681 				j--;
682 			}
683 			fprintf(f, "\n");
684 		}
685 	}
686 	if (jmp_table_offset) {
687 		uintptr_t *p = (uintptr_t*)(unsigned char*)start + jmp_table_offset;
688 		uint32_t n = (uint32_t)(orig_size - jmp_table_offset);
689 
690 		fprintf(f, ".align %d\n", (int)sizeof(void*));
691 
692 		p = (uintptr_t*)((char*)start + jmp_table_offset);
693 		while (n > 0) {
694 			entry = ir_hashtab_find(&labels, (uint32_t)((uintptr_t)p - (uintptr_t)start));
695 			if (entry != (ir_ref)IR_INVALID_VAL) {
696 				if (entry >= 0) {
697 					fprintf(f, ".ENTRY_%d:\n", entry);
698 				} else {
699 					fprintf(f, ".L%d:\n", -entry);
700 				}
701 			}
702 			if (*p) {
703 				if ((uintptr_t)*p >= (uintptr_t)start && (uintptr_t)*p < (uintptr_t)orig_end) {
704 					entry = ir_hashtab_find(&labels, (uint32_t)(*p - (uintptr_t)start));
705 					IR_ASSERT(entry != (ir_ref)IR_INVALID_VAL);
706 					if (entry >= 0) {
707 						if (sizeof(void*) == 8) {
708 							fprintf(f, "\t.qword .ENTRY_%d\n", entry);
709 						} else {
710 							fprintf(f, "\t.dword .ENTRY_%d\n", entry);
711 						}
712 					} else {
713 						if (sizeof(void*) == 8) {
714 							fprintf(f, "\t.qword .L%d\n", -entry);
715 						} else {
716 							fprintf(f, "\t.dword .L%d\n", -entry);
717 						}
718 					}
719 				} else {
720 					int64_t offset;
721 					const char *name = ir_disasm_find_symbol(*p, &offset);
722 
723 					if (name && offset == 0) {
724 						if (sizeof(void*) == 8) {
725 							fprintf(f, "\t.qword %s\n", name);
726 						} else {
727 							fprintf(f, "\t.dword %s\n", name);
728 						}
729 					} else {
730 						if (sizeof(void*) == 8) {
731 							fprintf(f, "\t.qword 0x%0llx\n", (long long)*p);
732 						} else {
733 							fprintf(f, "\t.dword 0x%0x\n", (int)*p);
734 						}
735 					}
736 				}
737 			} else {
738 				if (sizeof(void*) == 8) {
739 					fprintf(f, "\t.qword 0\n");
740 				} else {
741 					fprintf(f, "\t.dword 0\n");
742 				}
743 			}
744 			p++;
745 			n -= sizeof(void*);
746 		}
747 	}
748 
749 	fprintf(f, "\n");
750 
751 	ir_hashtab_free(&labels);
752 
753 	cs_close(&cs);
754 
755 	return 1;
756 }
757 
758 #ifndef _WIN32
759 static void* ir_elf_read_sect(int fd, ir_elf_sectheader *sect)
760 {
761 	void *s = ir_mem_malloc(sect->size);
762 
763 	if (lseek(fd, sect->ofs, SEEK_SET) < 0) {
764 		ir_mem_free(s);
765 		return NULL;
766 	}
767 	if (read(fd, s, sect->size) != (ssize_t)sect->size) {
768 		ir_mem_free(s);
769 		return NULL;
770 	}
771 
772 	return s;
773 }
774 
775 static void ir_elf_load_symbols(void)
776 {
777 	ir_elf_header hdr;
778 	ir_elf_sectheader sect;
779 	int i;
780 #if defined(__linux__)
781 	int fd = open("/proc/self/exe", O_RDONLY);
782 #elif defined(__NetBSD__)
783 	int fd = open("/proc/curproc/exe", O_RDONLY);
784 #elif defined(__FreeBSD__) || defined(__DragonFly__)
785 	char path[PATH_MAX];
786 	size_t pathlen = sizeof(path);
787 	int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1};
788 	if (sysctl(mib, 4, path, &pathlen, NULL, 0) == -1) {
789 		return;
790 	}
791 	int fd = open(path, O_RDONLY);
792 #elif defined(__sun)
793 	int fd = open("/proc/self/path/a.out", O_RDONLY);
794 #elif defined(__HAIKU__)
795 	char path[PATH_MAX];
796 	if (find_path(B_APP_IMAGE_SYMBOL, B_FIND_PATH_IMAGE_PATH,
797 		NULL, path, sizeof(path)) != B_OK) {
798 		return;
799 	}
800 
801 	int fd = open(path, O_RDONLY);
802 #else
803 	// To complete eventually for other ELF platforms.
804 	// Otherwise APPLE is Mach-O
805 	int fd = -1;
806 #endif
807 
808 	if (fd >= 0) {
809 		if (read(fd, &hdr, sizeof(hdr)) == sizeof(hdr)
810 		 && hdr.emagic[0] == '\177'
811 		 && hdr.emagic[1] == 'E'
812 		 && hdr.emagic[2] == 'L'
813 		 && hdr.emagic[3] == 'F'
814 		 && lseek(fd, hdr.shofs, SEEK_SET) >= 0) {
815 			for (i = 0; i < hdr.shnum; i++) {
816 				if (read(fd, &sect, sizeof(sect)) == sizeof(sect)
817 				 && sect.type == ELFSECT_TYPE_SYMTAB) {
818 					uint32_t n, count = sect.size / sizeof(ir_elf_symbol);
819 					ir_elf_symbol *syms = ir_elf_read_sect(fd, &sect);
820 					char *str_tbl;
821 
822 					if (syms) {
823 						if (lseek(fd, hdr.shofs + sect.link * sizeof(sect), SEEK_SET) >= 0
824 						 && read(fd, &sect, sizeof(sect)) == sizeof(sect)
825 						 && (str_tbl = (char*)ir_elf_read_sect(fd, &sect)) != NULL) {
826 							for (n = 0; n < count; n++) {
827 								if (syms[n].name
828 								 && (ELFSYM_TYPE(syms[n].info) == ELFSYM_TYPE_FUNC
829 								  /*|| ELFSYM_TYPE(syms[n].info) == ELFSYM_TYPE_DATA*/)
830 								 && (ELFSYM_BIND(syms[n].info) == ELFSYM_BIND_LOCAL
831 								  /*|| ELFSYM_BIND(syms[n].info) == ELFSYM_BIND_GLOBAL*/)) {
832 									ir_disasm_add_symbol(str_tbl + syms[n].name, syms[n].value, syms[n].size);
833 								}
834 							}
835 							ir_mem_free(str_tbl);
836 						}
837 						ir_mem_free(syms);
838 					}
839 					if (lseek(fd, hdr.shofs + (i + 1) * sizeof(sect), SEEK_SET) < 0) {
840 						break;
841 					}
842 				}
843 			}
844 		}
845 		close(fd);
846 	}
847 }
848 #endif
849 
850 int ir_disasm_init(void)
851 {
852 #ifndef _WIN32
853 	ir_elf_load_symbols();
854 #endif
855 	return 1;
856 }
857 
858 void ir_disasm_free(void)
859 {
860 	if (_symbols) {
861 		ir_disasm_destroy_symbols(_symbols);
862 		_symbols = NULL;
863 	}
864 }
865