xref: /php-src/ext/opcache/jit/ir/ir_disasm.c (revision 50573904)
1 /*
2  * IR - Lightweight JIT Compilation Framework
3  * (Disassembler based on libcapstone)
4  * Copyright (C) 2022 Zend by Perforce.
5  * Authors: Dmitry Stogov <dmitry@php.net>
6  */
7 
8 #ifndef _GNU_SOURCE
9 # define _GNU_SOURCE
10 #endif
11 
12 #ifndef _WIN32
13 # include <dlfcn.h>
14 # include <unistd.h>
15 # include <fcntl.h>
16 # include <limits.h>
17 # if defined(__FreeBSD__) || defined(__DragonFly__)
18 #  include <sys/sysctl.h>
19 # endif
20 #endif
21 
22 #include "ir.h"
23 #include "ir_private.h"
24 
25 #ifndef _WIN32
26 # include "ir_elf.h"
27 #endif
28 
29 #include <capstone/capstone.h>
30 #define HAVE_CAPSTONE_ITER
31 
32 typedef struct _ir_sym_node {
33 	uint64_t             addr;
34 	uint64_t             end;
35 	struct _ir_sym_node *parent;
36 	struct _ir_sym_node *child[2];
37 	unsigned char        info;
38 	char                 name[1];
39 } ir_sym_node;
40 
41 static ir_sym_node *_symbols = NULL;
42 
ir_syms_rotateleft(ir_sym_node * p)43 static void ir_syms_rotateleft(ir_sym_node *p)
44 {
45 	ir_sym_node *r = p->child[1];
46 	p->child[1] = r->child[0];
47 	if (r->child[0]) {
48 		r->child[0]->parent = p;
49 	}
50 	r->parent = p->parent;
51 	if (p->parent == NULL) {
52 		_symbols = r;
53 	} else if (p->parent->child[0] == p) {
54 		p->parent->child[0] = r;
55 	} else {
56 		p->parent->child[1] = r;
57 	}
58 	r->child[0] = p;
59 	p->parent = r;
60 }
61 
ir_syms_rotateright(ir_sym_node * p)62 static void ir_syms_rotateright(ir_sym_node *p)
63 {
64 	ir_sym_node *l = p->child[0];
65 	p->child[0] = l->child[1];
66 	if (l->child[1]) {
67 		l->child[1]->parent = p;
68 	}
69 	l->parent = p->parent;
70 	if (p->parent == NULL) {
71 		_symbols = l;
72 	} else if (p->parent->child[1] == p) {
73 		p->parent->child[1] = l;
74 	} else {
75 		p->parent->child[0] = l;
76 	}
77 	l->child[1] = p;
78 	p->parent = l;
79 }
80 
ir_disasm_add_symbol(const char * name,uint64_t addr,uint64_t size)81 void ir_disasm_add_symbol(const char *name,
82                           uint64_t    addr,
83                           uint64_t    size)
84 {
85 	ir_sym_node *sym;
86 	size_t len = strlen(name);
87 
88 	sym = ir_mem_pmalloc(sizeof(ir_sym_node) + len + 1);
89 	if (!sym) {
90 		return;
91 	}
92 	sym->addr = addr;
93 	sym->end  = (addr + size - 1);
94 	memcpy((char*)&sym->name, name, len + 1);
95 	sym->parent = sym->child[0] = sym->child[1] = NULL;
96 	sym->info = 1;
97 	if (_symbols) {
98 		ir_sym_node *node = _symbols;
99 
100 		/* insert it into rbtree */
101 		do {
102 			if (sym->addr > node->addr) {
103 				IR_ASSERT(sym->addr > (node->end));
104 				if (node->child[1]) {
105 					node = node->child[1];
106 				} else {
107 					node->child[1] = sym;
108 					sym->parent = node;
109 					break;
110 				}
111 			} else if (sym->addr < node->addr) {
112 				if (node->child[0]) {
113 					node = node->child[0];
114 				} else {
115 					node->child[0] = sym;
116 					sym->parent = node;
117 					break;
118 				}
119 			} else {
120 				IR_ASSERT(sym->addr == node->addr);
121 				if (strcmp(name, node->name) == 0 && sym->end < node->end) {
122 					/* reduce size of the existing symbol */
123 					node->end = sym->end;
124 				}
125 				ir_mem_pfree(sym);
126 				return;
127 			}
128 		} while (1);
129 
130 		/* fix rbtree after inserting */
131 		while (sym && sym != _symbols && sym->parent->info == 1) {
132 			if (sym->parent == sym->parent->parent->child[0]) {
133 				node = sym->parent->parent->child[1];
134 				if (node && node->info == 1) {
135 					sym->parent->info = 0;
136 					node->info = 0;
137 					sym->parent->parent->info = 1;
138 					sym = sym->parent->parent;
139 				} else {
140 					if (sym == sym->parent->child[1]) {
141 						sym = sym->parent;
142 						ir_syms_rotateleft(sym);
143 					}
144 					sym->parent->info = 0;
145 					sym->parent->parent->info = 1;
146 					ir_syms_rotateright(sym->parent->parent);
147 				}
148 			} else {
149 				node = sym->parent->parent->child[0];
150 				if (node && node->info == 1) {
151 					sym->parent->info = 0;
152 					node->info = 0;
153 					sym->parent->parent->info = 1;
154 					sym = sym->parent->parent;
155 				} else {
156 					if (sym == sym->parent->child[0]) {
157 						sym = sym->parent;
158 						ir_syms_rotateright(sym);
159 					}
160 					sym->parent->info = 0;
161 					sym->parent->parent->info = 1;
162 					ir_syms_rotateleft(sym->parent->parent);
163 				}
164 			}
165 		}
166 	} else {
167 		_symbols = sym;
168 	}
169 	_symbols->info = 0;
170 }
171 
ir_disasm_destroy_symbols(ir_sym_node * n)172 static void ir_disasm_destroy_symbols(ir_sym_node *n)
173 {
174 	if (n) {
175 		if (n->child[0]) {
176 			ir_disasm_destroy_symbols(n->child[0]);
177 		}
178 		if (n->child[1]) {
179 			ir_disasm_destroy_symbols(n->child[1]);
180 		}
181 		ir_mem_pfree(n);
182 	}
183 }
184 
ir_disasm_find_symbol(uint64_t addr,int64_t * offset)185 const char* ir_disasm_find_symbol(uint64_t addr, int64_t *offset)
186 {
187 	ir_sym_node *node = _symbols;
188 	while (node) {
189 		if (addr < node->addr) {
190 			node = node->child[0];
191 		} else if (addr > node->end) {
192 			node = node->child[1];
193 		} else {
194 			*offset = addr - node->addr;
195 			return node->name;
196 		}
197 	}
198 	return NULL;
199 }
200 
ir_disasm_branch_target(csh cs,const cs_insn * insn)201 static uint64_t ir_disasm_branch_target(csh cs, const cs_insn *insn)
202 {
203 	unsigned int i;
204 
205 #if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
206 	if (cs_insn_group(cs, insn, X86_GRP_JUMP)) {
207 		for (i = 0; i < insn->detail->x86.op_count; i++) {
208 			if (insn->detail->x86.operands[i].type == X86_OP_IMM) {
209 				return insn->detail->x86.operands[i].imm;
210 			}
211 		}
212 	}
213 #elif defined(IR_TARGET_AARCH64)
214 	if (cs_insn_group(cs, insn, ARM64_GRP_JUMP)
215 	 || insn->id == ARM64_INS_BL
216 	 || insn->id == ARM64_INS_ADR) {
217 		for (i = 0; i < insn->detail->arm64.op_count; i++) {
218 			if (insn->detail->arm64.operands[i].type == ARM64_OP_IMM)
219 				return insn->detail->arm64.operands[i].imm;
220 		}
221 	}
222 #endif
223 
224 	return 0;
225 }
226 
ir_disasm_rodata_reference(csh cs,const cs_insn * insn)227 static uint64_t ir_disasm_rodata_reference(csh cs, const cs_insn *insn)
228 {
229 #if defined(IR_TARGET_X86)
230 	unsigned int i;
231 
232 	for (i = 0; i < insn->detail->x86.op_count; i++) {
233 		if (insn->detail->x86.operands[i].type == X86_OP_MEM
234 		 && insn->detail->x86.operands[i].mem.base == X86_REG_INVALID
235 		 && insn->detail->x86.operands[i].mem.segment == X86_REG_INVALID
236 		 && insn->detail->x86.operands[i].mem.index == X86_REG_INVALID
237 		 && insn->detail->x86.operands[i].mem.scale == 1) {
238 			return (uint32_t)insn->detail->x86.operands[i].mem.disp;
239 		}
240 	}
241 	if (cs_insn_group(cs, insn, X86_GRP_JUMP)) {
242 		for (i = 0; i < insn->detail->x86.op_count; i++) {
243 			if (insn->detail->x86.operands[i].type == X86_OP_MEM
244 			 && insn->detail->x86.operands[i].mem.disp) {
245 				return (uint32_t)insn->detail->x86.operands[i].mem.disp;
246 			}
247 		}
248 	}
249 	if (insn->id == X86_INS_MOV
250 	 && insn->detail->x86.op_count == 2
251 	 && insn->detail->x86.operands[0].type == X86_OP_IMM
252 	 && insn->detail->x86.operands[0].size == sizeof(void*)) {
253 		return (uint32_t)insn->detail->x86.operands[0].imm;
254 	}
255 #elif defined(IR_TARGET_X64)
256 	unsigned int i;
257 
258 	for (i = 0; i < insn->detail->x86.op_count; i++) {
259 		if (insn->detail->x86.operands[i].type == X86_OP_MEM
260 		 && insn->detail->x86.operands[i].mem.base == X86_REG_RIP
261 		 && insn->detail->x86.operands[i].mem.segment == X86_REG_INVALID
262 		  // TODO: support for index and scale
263 		 && insn->detail->x86.operands[i].mem.index == X86_REG_INVALID
264 		 && insn->detail->x86.operands[i].mem.scale == 1) {
265 			return insn->detail->x86.operands[i].mem.disp + insn->address + insn->size;
266 		}
267 	}
268 #elif defined(IR_TARGET_AARCH64)
269 	unsigned int i;
270 
271 	if (insn->id == ARM64_INS_ADR
272 	 || insn->id == ARM64_INS_LDRB
273 	 || insn->id == ARM64_INS_LDR
274 	 || insn->id == ARM64_INS_LDRH
275 	 || insn->id == ARM64_INS_LDRSB
276 	 || insn->id == ARM64_INS_LDRSH
277 	 || insn->id == ARM64_INS_LDRSW
278 	 || insn->id == ARM64_INS_STRB
279 	 || insn->id == ARM64_INS_STR
280 	 || insn->id == ARM64_INS_STRH) {
281 		for (i = 0; i < insn->detail->arm64.op_count; i++) {
282 			if (insn->detail->arm64.operands[i].type == ARM64_OP_IMM)
283 				return insn->detail->arm64.operands[i].imm;
284 		}
285 	}
286 	return 0;
287 #endif
288 
289 	return 0;
290 }
291 
ir_disasm_resolver(uint64_t addr,int64_t * offset)292 static const char* ir_disasm_resolver(uint64_t   addr,
293                                       int64_t   *offset)
294 {
295 #ifndef _WIN32
296 	const char *name;
297 	void *a = (void*)(uintptr_t)(addr);
298 	Dl_info info;
299 
300 	name = ir_disasm_find_symbol(addr, offset);
301 	if (name) {
302 		return name;
303 	}
304 
305 	if (dladdr(a, &info)
306 	 && info.dli_sname != NULL
307 	 && info.dli_saddr == a) {
308 		*offset = 0;
309 		return info.dli_sname;
310 	}
311 #else
312 	const char *name;
313 	name = ir_disasm_find_symbol(addr, offset);
314 	if (name) {
315 		return name;
316 	}
317 #endif
318 
319 	return NULL;
320 }
321 
ir_disasm(const char * name,const void * start,size_t size,bool asm_addr,ir_ctx * ctx,FILE * f)322 int ir_disasm(const char    *name,
323               const void    *start,
324               size_t         size,
325               bool           asm_addr,
326               ir_ctx        *ctx,
327               FILE          *f)
328 {
329 	size_t orig_size = size;
330 	const void *orig_end = (void *)((char *)start + size);
331 	const void *end;
332 	ir_hashtab labels;
333 	int32_t l, n;
334 	uint64_t addr;
335 	csh cs;
336 	cs_insn *insn;
337 # ifdef HAVE_CAPSTONE_ITER
338 	const uint8_t *cs_code;
339 	size_t cs_size;
340 	uint64_t cs_addr;
341 # else
342 	size_t count, i;
343 # endif
344 	const char *sym;
345 	int64_t offset = 0;
346 	char *p, *q, *r;
347 	uint32_t rodata_offset = 0;
348 	uint32_t jmp_table_offset = 0;
349 	ir_hashtab_bucket *b;
350 	int32_t entry;
351 	cs_err ret;
352 
353 # if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
354 #  ifdef IR_TARGET_X64
355 	ret = cs_open(CS_ARCH_X86, CS_MODE_64, &cs);
356 	if (ret != CS_ERR_OK) {
357 		fprintf(stderr, "cs_open(CS_ARCH_X86, CS_MODE_64, ...) failed; [%d] %s\n", ret, cs_strerror(ret));
358 		return 0;
359 	}
360 #  else
361 	ret = cs_open(CS_ARCH_X86, CS_MODE_32, &cs);
362 	if (ret != CS_ERR_OK) {
363 		fprintf(stderr, "cs_open(CS_ARCH_X86, CS_MODE_32, ...) failed; [%d] %s\n", ret, cs_strerror(ret));
364 		return 0;
365 	}
366 #  endif
367 	cs_option(cs, CS_OPT_DETAIL, CS_OPT_ON);
368 #  if DISASM_INTEL_SYNTAX
369 	cs_option(cs, CS_OPT_SYNTAX, CS_OPT_SYNTAX_INTEL);
370 #  else
371 	cs_option(cs, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT);
372 #  endif
373 # elif defined(IR_TARGET_AARCH64)
374 	ret = cs_open(CS_ARCH_ARM64, CS_MODE_ARM, &cs);
375 	if (ret != CS_ERR_OK) {
376 		fprintf(stderr, "cs_open(CS_ARCH_ARM64, CS_MODE_ARM, ...) failed; [%d] %s\n", ret, cs_strerror(ret));
377 		return 0;
378 	}
379 	cs_option(cs, CS_OPT_DETAIL, CS_OPT_ON);
380 	cs_option(cs, CS_OPT_SYNTAX, CS_OPT_SYNTAX_ATT);
381 # endif
382 
383 	if (name) {
384 		fprintf(f, "%s:\n", name);
385 	}
386 
387 	ir_hashtab_init(&labels, 32);
388 
389 	if (ctx) {
390 		if (ctx->entries_count) {
391 			int i = ctx->entries_count;
392 			do {
393 				ir_insn *insn = &ctx->ir_base[ctx->entries[--i]];
394 				ir_hashtab_add(&labels, insn->op3, insn->op2);
395 			} while (i != 0);
396 		}
397 
398 		rodata_offset = ctx->rodata_offset;
399 		if (rodata_offset) {
400 			if (size > rodata_offset) {
401 				size = rodata_offset;
402 			}
403 		}
404 		jmp_table_offset = ctx->jmp_table_offset;
405 		if (jmp_table_offset) {
406 			uint32_t n;
407 			uintptr_t *p;
408 
409 			IR_ASSERT(orig_size - jmp_table_offset <= 0xffffffff);
410 			n = (uint32_t)(orig_size - jmp_table_offset);
411 			if (size > jmp_table_offset) {
412 				size = jmp_table_offset;
413 			}
414 			while (n > 0 && IR_ALIGNED_SIZE(n, sizeof(void*)) != n) {
415 				jmp_table_offset++;
416 				n--;
417 			}
418 			IR_ASSERT(n > 0 && n % sizeof(void*) == 0 && jmp_table_offset % sizeof(void*) == 0);
419 			p = (uintptr_t*)((char*)start + jmp_table_offset);
420 			while (n > 0) {
421 				if (*p) {
422 					if ((uintptr_t)*p >= (uintptr_t)start && (uintptr_t)*p < (uintptr_t)orig_end) {
423 						ir_hashtab_add(&labels, (uint32_t)((uintptr_t)*p - (uintptr_t)start), -1);
424 					}
425 				}
426 				p++;
427 				n -= sizeof(void*);
428 			}
429 		}
430 	}
431 	end = (void *)((char *)start + size);
432 
433 # ifdef HAVE_CAPSTONE_ITER
434 	cs_code = start;
435 	cs_size = (uint8_t*)end - (uint8_t*)start;
436 	cs_addr = (uint64_t)(uintptr_t)cs_code;
437 	insn = cs_malloc(cs);
438 	while (cs_disasm_iter(cs, &cs_code, &cs_size, &cs_addr, insn)) {
439 		if ((addr = ir_disasm_branch_target(cs, insn))
440 # else
441 	count = cs_disasm(cs, start, (uint8_t*)end - (uint8_t*)start, (uintptr_t)start, 0, &insn);
442 	for (i = 0; i < count; i++) {
443 		if ((addr = ir_disasm_branch_target(cs, &(insn[i])))
444 # endif
445 		 && (addr >= (uint64_t)(uintptr_t)start && addr < (uint64_t)(uintptr_t)end)) {
446 			ir_hashtab_add(&labels, (uint32_t)((uintptr_t)addr - (uintptr_t)start), -1);
447 # ifdef HAVE_CAPSTONE_ITER
448 		} else if ((addr = ir_disasm_rodata_reference(cs, insn))) {
449 # else
450 		} else if ((addr = ir_disasm_rodata_reference(cs, &(insn[i])))) {
451 # endif
452 			if (addr >= (uint64_t)(uintptr_t)end && addr < (uint64_t)(uintptr_t)orig_end) {
453 				ir_hashtab_add(&labels, (uint32_t)((uintptr_t)addr - (uintptr_t)start), -1);
454 			}
455 		}
456 	}
457 
458 	ir_hashtab_key_sort(&labels);
459 
460 	/* renumber labels */
461 	l = 0;
462 	n = labels.count;
463 	b = labels.data;
464 	while (n > 0) {
465 		if (b->val < 0) {
466 			b->val = --l;
467 		}
468 		b++;
469 		n--;
470 	}
471 
472 # ifdef HAVE_CAPSTONE_ITER
473 	cs_code = start;
474 	cs_size = (uint8_t*)end - (uint8_t*)start;
475 	cs_addr = (uint64_t)(uintptr_t)cs_code;
476 	while (cs_disasm_iter(cs, &cs_code, &cs_size, &cs_addr, insn)) {
477 		entry = ir_hashtab_find(&labels, (uint32_t)((uintptr_t)insn->address - (uintptr_t)start));
478 # else
479 	for (i = 0; i < count; i++) {
480 		entry = ir_hashtab_find(&labels, (uint32_t)((uintptr_t)insn->address - (uintptr_t)start));
481 # endif
482 		if (entry != (ir_ref)IR_INVALID_VAL) {
483 			if (entry >= 0) {
484 				fprintf(f, ".ENTRY_%d:\n", entry);
485 			} else {
486 				fprintf(f, ".L%d:\n", -entry);
487 			}
488 		}
489 
490 # ifdef HAVE_CAPSTONE_ITER
491 		if (asm_addr) {
492 			fprintf(f, "    %" PRIx64 ":", insn->address);
493 		}
494 		p = insn->op_str;
495 #if defined(IR_TARGET_X64) && (CS_API_MAJOR < 5)
496 		/* Fix capstone MOVD/MOVQ disassemble mismatch */
497 		if (insn->id == X86_INS_MOVQ && strcmp(insn->mnemonic, "movd") == 0) {
498 			insn->mnemonic[3] = 'q';
499 		}
500 #endif
501 		if (strlen(p) == 0) {
502 			fprintf(f, "\t%s\n", insn->mnemonic);
503 			continue;
504 		} else  {
505 			fprintf(f, "\t%s ", insn->mnemonic);
506 		}
507 # else
508 		if (asm_addr) {
509 			fprintf(f, "    %" PRIx64 ":", insn[i].address);
510 		}
511 		p = insn[i].op_str;
512 		if (strlen(p) == 0) {
513 			fprintf(f, "\t%s\n", insn[i].mnemonic);
514 			continue;
515 		} else {
516 			fprintf(f, "\t%s ", insn[i].mnemonic);
517 		}
518 # endif
519 		/* Try to replace the target addresses with a symbols */
520 #if defined(IR_TARGET_X64)
521 # ifdef HAVE_CAPSTONE_ITER
522 		if ((addr = ir_disasm_rodata_reference(cs, insn))) {
523 # else
524 		if ((addr = ir_disasm_rodata_reference(cs, &(insn[i])))) {
525 # endif
526 			if (addr >= (uint64_t)(uintptr_t)end && addr < (uint64_t)(uintptr_t)orig_end) {
527 				entry = ir_hashtab_find(&labels, (uint32_t)((uintptr_t)addr - (uintptr_t)start));
528 				if (entry != (ir_ref)IR_INVALID_VAL) {
529 					r = q = strstr(p, "(%rip)");
530 					if (r && r > p) {
531 						r--;
532 						while (r > p && ((*r >= '0' && *r <= '9') || (*r >= 'a' && *r <= 'f') || (*r >= 'A' && *r <= 'F'))) {
533 							r--;
534 						}
535 						if (r > p && *r == 'x' && *(r - 1) == '0') {
536 							r -= 2;
537 						}
538 						if (r > p) {
539 							fwrite(p, 1, r - p, f);
540 						}
541 						if (entry >= 0) {
542 							fprintf(f, ".ENTRY_%d%s\n", entry, q);
543 						} else {
544 							fprintf(f, ".L%d%s\n", -entry, q);
545 						}
546 						continue;
547 					}
548 				}
549 			} else if ((sym = ir_disasm_resolver(addr, &offset))) {
550 				r = q = strstr(p, "(%rip)");
551 				if (r && r > p) {
552 					r--;
553 					while (r > p && ((*r >= '0' && *r <= '9') || (*r >= 'a' && *r <= 'f') || (*r >= 'A' && *r <= 'F'))) {
554 						r--;
555 					}
556 					if (r > p && *r == 'x' && *(r - 1) == '0') {
557 						r -= 2;
558 					}
559 					if (r > p) {
560 						fwrite(p, 1, r - p, f);
561 					}
562 					fputs(sym, f);
563 					if (offset != 0) {
564 						if (offset > 0) {
565 							fprintf(f, "+0x%" PRIx64, offset);
566 						} else {
567 							fprintf(f, "-0x%" PRIx64, -offset);
568 						}
569 					}
570 					fprintf(f, "%s\n", q);
571 					continue;
572 				}
573 			}
574 		}
575 #endif
576 #if defined(IR_TARGET_AARCH64)
577 		while ((q = strstr(p, "#0x")) != NULL) {
578 				r = q + 3;
579 #else
580 		while ((q = strstr(p, "0x")) != NULL) {
581 				r = q + 2;
582 #endif
583 			addr = 0;
584 			while (1) {
585 				if (*r >= '0' && *r <= '9') {
586 					addr = addr * 16 + (*r - '0');
587 				} else if (*r >= 'A' && *r <= 'F') {
588 					addr = addr * 16 + (*r - 'A' + 10);
589 				} else if (*r >= 'a' && *r <= 'f') {
590 					addr = addr * 16 + (*r - 'a' + 10);
591 				} else {
592 					break;
593 				}
594 				r++;
595 			}
596 			if (p != q && *(q-1) == '-') {
597 				q--;
598 				addr = (uint32_t)(-(int64_t)addr);
599 			}
600 			if (addr >= (uint64_t)(uintptr_t)start && addr < (uint64_t)(uintptr_t)orig_end) {
601 				entry = ir_hashtab_find(&labels, (uint32_t)((uintptr_t)addr - (uintptr_t)start));
602 				if (entry != (ir_ref)IR_INVALID_VAL) {
603 					fwrite(p, 1, q - p, f);
604 					if (entry >= 0) {
605 						fprintf(f, ".ENTRY_%d", entry);
606 					} else {
607 						fprintf(f, ".L%d", -entry);
608 					}
609 				} else if (r > p) {
610 					fwrite(p, 1, r - p, f);
611 				}
612 			} else if ((sym = ir_disasm_resolver(addr, &offset))) {
613 #if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
614 				if (offset && p != q && *(q-1) == '$') {
615 					if (r > p) {
616 						fwrite(p, 1, r - p, f);
617 					}
618 					p = r;
619 					continue;
620 				}
621 #endif
622 				if (q > p) {
623 					fwrite(p, 1, q - p, f);
624 				}
625 				fputs(sym, f);
626 				if (offset != 0) {
627 					if (offset > 0) {
628 						fprintf(f, "+0x%" PRIx64, offset);
629 					} else {
630 						fprintf(f, "-0x%" PRIx64, -offset);
631 					}
632 				}
633 			} else if (r > p) {
634 				fwrite(p, 1, r - p, f);
635 			}
636 			p = r;
637 		}
638 		fprintf(f, "%s\n", p);
639 	}
640 # ifdef HAVE_CAPSTONE_ITER
641 	cs_free(insn, 1);
642 # else
643 	cs_free(insn, count);
644 # endif
645 
646 	if (rodata_offset || jmp_table_offset) {
647 		fprintf(f, ".rodata\n");
648 	}
649 	if (rodata_offset) {
650 		const unsigned char *p = (unsigned char*)start + rodata_offset;
651 		uint32_t n = jmp_table_offset ?
652 			(uint32_t)(jmp_table_offset - rodata_offset) :
653 			(uint32_t)(orig_size - rodata_offset);
654 		uint32_t j;
655 
656 		while (n > 0) {
657 			entry = ir_hashtab_find(&labels, (uint32_t)((uintptr_t)p - (uintptr_t)start));
658 			if (entry != (ir_ref)IR_INVALID_VAL) {
659 				if (entry >= 0) {
660 					fprintf(f, ".ENTRY_%d:\n", entry);
661 				} else {
662 					fprintf(f, ".L%d:\n", -entry);
663 				}
664 			}
665 			fprintf(f, "\t.db 0x%02x", (int)*p);
666 			p++;
667 			n--;
668 			j = 15;
669 			while (n > 0 && j > 0) {
670 				entry = ir_hashtab_find(&labels, (uint32_t)((uintptr_t)p - (uintptr_t)start));
671 				if (entry != (ir_ref)IR_INVALID_VAL) {
672 					break;
673 				}
674 				fprintf(f, ", 0x%02x", (int)*p);
675 				p++;
676 				n--;
677 				j--;
678 			}
679 			fprintf(f, "\n");
680 		}
681 	}
682 	if (jmp_table_offset) {
683 		uintptr_t *p = (uintptr_t*)(unsigned char*)start + jmp_table_offset;
684 		uint32_t n = (uint32_t)(orig_size - jmp_table_offset);
685 
686 		fprintf(f, ".align %d\n", (int)sizeof(void*));
687 
688 		p = (uintptr_t*)((char*)start + jmp_table_offset);
689 		while (n > 0) {
690 			entry = ir_hashtab_find(&labels, (uint32_t)((uintptr_t)p - (uintptr_t)start));
691 			if (entry != (ir_ref)IR_INVALID_VAL) {
692 				if (entry >= 0) {
693 					fprintf(f, ".ENTRY_%d:\n", entry);
694 				} else {
695 					fprintf(f, ".L%d:\n", -entry);
696 				}
697 			}
698 			if (*p) {
699 				if ((uintptr_t)*p >= (uintptr_t)start && (uintptr_t)*p < (uintptr_t)orig_end) {
700 					entry = ir_hashtab_find(&labels, (uint32_t)(*p - (uintptr_t)start));
701 					IR_ASSERT(entry != (ir_ref)IR_INVALID_VAL);
702 					if (entry >= 0) {
703 						if (sizeof(void*) == 8) {
704 							fprintf(f, "\t.qword .ENTRY_%d\n", entry);
705 						} else {
706 							fprintf(f, "\t.dword .ENTRY_%d\n", entry);
707 						}
708 					} else {
709 						if (sizeof(void*) == 8) {
710 							fprintf(f, "\t.qword .L%d\n", -entry);
711 						} else {
712 							fprintf(f, "\t.dword .L%d\n", -entry);
713 						}
714 					}
715 				} else {
716 					int64_t offset;
717 					const char *name = ir_disasm_find_symbol(*p, &offset);
718 
719 					if (name && offset == 0) {
720 						if (sizeof(void*) == 8) {
721 							fprintf(f, "\t.qword %s\n", name);
722 						} else {
723 							fprintf(f, "\t.dword %s\n", name);
724 						}
725 					} else {
726 						if (sizeof(void*) == 8) {
727 							fprintf(f, "\t.qword 0x%0llx\n", (long long)*p);
728 						} else {
729 							fprintf(f, "\t.dword 0x%0x\n", (int)*p);
730 						}
731 					}
732 				}
733 			} else {
734 				if (sizeof(void*) == 8) {
735 					fprintf(f, "\t.qword 0\n");
736 				} else {
737 					fprintf(f, "\t.dword 0\n");
738 				}
739 			}
740 			p++;
741 			n -= sizeof(void*);
742 		}
743 	}
744 
745 	fprintf(f, "\n");
746 
747 	ir_hashtab_free(&labels);
748 
749 	cs_close(&cs);
750 
751 	return 1;
752 }
753 
754 #ifndef _WIN32
755 static void* ir_elf_read_sect(int fd, ir_elf_sectheader *sect)
756 {
757 	void *s = ir_mem_malloc(sect->size);
758 
759 	if (lseek(fd, sect->ofs, SEEK_SET) < 0) {
760 		ir_mem_free(s);
761 		return NULL;
762 	}
763 	if (read(fd, s, sect->size) != (ssize_t)sect->size) {
764 		ir_mem_free(s);
765 		return NULL;
766 	}
767 
768 	return s;
769 }
770 
771 static void ir_elf_load_symbols(void)
772 {
773 	ir_elf_header hdr;
774 	ir_elf_sectheader sect;
775 	int i;
776 #if defined(__linux__)
777 	int fd = open("/proc/self/exe", O_RDONLY);
778 #elif defined(__NetBSD__)
779 	int fd = open("/proc/curproc/exe", O_RDONLY);
780 #elif defined(__FreeBSD__) || defined(__DragonFly__)
781 	char path[PATH_MAX];
782 	size_t pathlen = sizeof(path);
783 	int mib[4] = {CTL_KERN, KERN_PROC, KERN_PROC_PATHNAME, -1};
784 	if (sysctl(mib, 4, path, &pathlen, NULL, 0) == -1) {
785 		return;
786 	}
787 	int fd = open(path, O_RDONLY);
788 #elif defined(__sun)
789 	int fd = open("/proc/self/path/a.out", O_RDONLY);
790 #elif defined(__HAIKU__)
791 	char path[PATH_MAX];
792 	if (find_path(B_APP_IMAGE_SYMBOL, B_FIND_PATH_IMAGE_PATH,
793 		NULL, path, sizeof(path)) != B_OK) {
794 		return;
795 	}
796 
797 	int fd = open(path, O_RDONLY);
798 #else
799 	// To complete eventually for other ELF platforms.
800 	// Otherwise APPLE is Mach-O
801 	int fd = -1;
802 #endif
803 
804 	if (fd >= 0) {
805 		if (read(fd, &hdr, sizeof(hdr)) == sizeof(hdr)
806 		 && hdr.emagic[0] == '\177'
807 		 && hdr.emagic[1] == 'E'
808 		 && hdr.emagic[2] == 'L'
809 		 && hdr.emagic[3] == 'F'
810 		 && lseek(fd, hdr.shofs, SEEK_SET) >= 0) {
811 			for (i = 0; i < hdr.shnum; i++) {
812 				if (read(fd, &sect, sizeof(sect)) == sizeof(sect)
813 				 && sect.type == ELFSECT_TYPE_SYMTAB) {
814 					uint32_t n, count = sect.size / sizeof(ir_elf_symbol);
815 					ir_elf_symbol *syms = ir_elf_read_sect(fd, &sect);
816 					char *str_tbl;
817 
818 					if (syms) {
819 						if (lseek(fd, hdr.shofs + sect.link * sizeof(sect), SEEK_SET) >= 0
820 						 && read(fd, &sect, sizeof(sect)) == sizeof(sect)
821 						 && (str_tbl = (char*)ir_elf_read_sect(fd, &sect)) != NULL) {
822 							for (n = 0; n < count; n++) {
823 								if (syms[n].name
824 								 && (ELFSYM_TYPE(syms[n].info) == ELFSYM_TYPE_FUNC
825 								  /*|| ELFSYM_TYPE(syms[n].info) == ELFSYM_TYPE_DATA*/)
826 								 && (ELFSYM_BIND(syms[n].info) == ELFSYM_BIND_LOCAL
827 								  /*|| ELFSYM_BIND(syms[n].info) == ELFSYM_BIND_GLOBAL*/)) {
828 									ir_disasm_add_symbol(str_tbl + syms[n].name, syms[n].value, syms[n].size);
829 								}
830 							}
831 							ir_mem_free(str_tbl);
832 						}
833 						ir_mem_free(syms);
834 					}
835 					if (lseek(fd, hdr.shofs + (i + 1) * sizeof(sect), SEEK_SET) < 0) {
836 						break;
837 					}
838 				}
839 			}
840 		}
841 		close(fd);
842 	}
843 }
844 #endif
845 
846 int ir_disasm_init(void)
847 {
848 #ifndef _WIN32
849 	ir_elf_load_symbols();
850 #endif
851 	return 1;
852 }
853 
854 void ir_disasm_free(void)
855 {
856 	if (_symbols) {
857 		ir_disasm_destroy_symbols(_symbols);
858 		_symbols = NULL;
859 	}
860 }
861