1 /*
2 * IR - Lightweight JIT Compilation Framework
3 * (Public API)
4 * Copyright (C) 2022 Zend by Perforce.
5 * Authors: Dmitry Stogov <dmitry@php.net>
6 */
7
8 #ifndef IR_H
9 #define IR_H
10
11 #ifdef __cplusplus
12 extern "C" {
13 #endif
14
15 #include <inttypes.h>
16 #include <stdint.h>
17 #include <stdbool.h>
18 #include <stdio.h>
19 #include <stdlib.h>
20
21 #define IR_VERSION "0.0.1"
22
23 #ifdef _WIN32
24 /* TODO Handle ARM, too. */
25 # if defined(_M_X64)
26 # define __SIZEOF_SIZE_T__ 8
27 # elif defined(_M_IX86)
28 # define __SIZEOF_SIZE_T__ 4
29 # endif
30 /* Only supported is little endian for any arch on Windows,
31 so just fake the same for all. */
32 # define __ORDER_LITTLE_ENDIAN__ 1
33 # define __BYTE_ORDER__ __ORDER_LITTLE_ENDIAN__
34 # ifndef __has_builtin
35 # define __has_builtin(arg) (0)
36 # endif
37 #endif
38
39 #if defined(IR_TARGET_X86)
40 # define IR_TARGET "x86"
41 #elif defined(IR_TARGET_X64)
42 # ifdef _WIN64
43 # define IR_TARGET "Windows-x86_64" /* 64-bit Windows use different ABI and calling convention */
44 # else
45 # define IR_TARGET "x86_64"
46 # endif
47 #elif defined(IR_TARGET_AARCH64)
48 # define IR_TARGET "aarch64"
49 #else
50 # error "Unknown IR target"
51 #endif
52
53 #if defined(__SIZEOF_SIZE_T__)
54 # if __SIZEOF_SIZE_T__ == 8
55 # define IR_64 1
56 # elif __SIZEOF_SIZE_T__ != 4
57 # error "Unknown addr size"
58 # endif
59 #else
60 # error "Unknown addr size"
61 #endif
62
63 #if defined(__BYTE_ORDER__)
64 # if defined(__ORDER_LITTLE_ENDIAN__)
65 # if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
66 # define IR_STRUCT_LOHI(lo, hi) struct {lo; hi;}
67 # endif
68 # endif
69 # if defined(__ORDER_BIG_ENDIAN__)
70 # if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
71 # define IR_STRUCT_LOHI(lo, hi) struct {hi; lo;}
72 # endif
73 # endif
74 #endif
75 #ifndef IR_STRUCT_LOHI
76 # error "Unknown byte order"
77 #endif
78
79 #ifdef __has_attribute
80 # if __has_attribute(always_inline)
81 # define IR_ALWAYS_INLINE static inline __attribute__((always_inline))
82 # endif
83 # if __has_attribute(noinline)
84 # define IR_NEVER_INLINE __attribute__((noinline))
85 # endif
86 #else
87 # define __has_attribute(x) 0
88 #endif
89
90 #ifndef IR_ALWAYS_INLINE
91 # define IR_ALWAYS_INLINE static inline
92 #endif
93 #ifndef IR_NEVER_INLINE
94 # define IR_NEVER_INLINE
95 #endif
96
97 #ifdef IR_PHP
98 # include "ir_php.h"
99 #endif
100
101 /* IR Type flags (low 4 bits are used for type size) */
102 #define IR_TYPE_SIGNED (1<<4)
103 #define IR_TYPE_UNSIGNED (1<<5)
104 #define IR_TYPE_FP (1<<6)
105 #define IR_TYPE_SPECIAL (1<<7)
106 #define IR_TYPE_BOOL (IR_TYPE_SPECIAL|IR_TYPE_UNSIGNED)
107 #define IR_TYPE_ADDR (IR_TYPE_SPECIAL|IR_TYPE_UNSIGNED)
108 #define IR_TYPE_CHAR (IR_TYPE_SPECIAL|IR_TYPE_SIGNED)
109
110 /* List of IR types */
111 #define IR_TYPES(_) \
112 _(BOOL, bool, b, IR_TYPE_BOOL) \
113 _(U8, uint8_t, u8, IR_TYPE_UNSIGNED) \
114 _(U16, uint16_t, u16, IR_TYPE_UNSIGNED) \
115 _(U32, uint32_t, u32, IR_TYPE_UNSIGNED) \
116 _(U64, uint64_t, u64, IR_TYPE_UNSIGNED) \
117 _(ADDR, uintptr_t, addr, IR_TYPE_ADDR) \
118 _(CHAR, char, c, IR_TYPE_CHAR) \
119 _(I8, int8_t, i8, IR_TYPE_SIGNED) \
120 _(I16, int16_t, i16, IR_TYPE_SIGNED) \
121 _(I32, int32_t, i32, IR_TYPE_SIGNED) \
122 _(I64, int64_t, i64, IR_TYPE_SIGNED) \
123 _(DOUBLE, double, d, IR_TYPE_FP) \
124 _(FLOAT, float, f, IR_TYPE_FP) \
125
126 #define IR_IS_TYPE_UNSIGNED(t) ((t) < IR_CHAR)
127 #define IR_IS_TYPE_SIGNED(t) ((t) >= IR_CHAR && (t) < IR_DOUBLE)
128 #define IR_IS_TYPE_INT(t) ((t) < IR_DOUBLE)
129 #define IR_IS_TYPE_FP(t) ((t) >= IR_DOUBLE)
130
131 #define IR_TYPE_ENUM(name, type, field, flags) IR_ ## name,
132
133 typedef enum _ir_type {
134 IR_VOID,
135 IR_TYPES(IR_TYPE_ENUM)
136 IR_LAST_TYPE
137 } ir_type;
138
139 #ifdef IR_64
140 # define IR_SIZE_T IR_U64
141 # define IR_SSIZE_T IR_I64
142 # define IR_UINTPTR_T IR_U64
143 # define IR_INTPTR_T IR_I64
144 #else
145 # define IR_SIZE_T IR_U32
146 # define IR_SSIZE_T IR_I32
147 # define IR_UINTPTR_T IR_U32
148 # define IR_INTPTR_T IR_I32
149 #endif
150
151 /* List of IR opcodes
152 * ==================
153 *
154 * Each instruction is described by a type (opcode, flags, op1_type, op2_type, op3_type)
155 *
156 * flags
157 * -----
158 * v - void
159 * d - data IR_OP_FLAG_DATA
160 * r - ref IR_OP_FLAG_DATA alias
161 * p - pinned IR_OP_FLAG_DATA + IR_OP_FLAG_PINNED
162 * c - control IR_OP_FLAG_CONTROL
163 * S - control IR_OP_FLAG_CONTROL + IR_OP_FLAG_BB_START
164 * E - control IR_OP_FLAG_CONTROL + IR_OP_FLAG_BB_END
165 * T - control IR_OP_FLAG_CONTROL + IR_OP_FLAG_BB_END + IR_OP_FLAG_TERMINATOR
166 * l - load IR_OP_FLAG_MEM + IR_OP_FLAG_MEM_LOAD
167 * s - store IR_OP_FLAG_MEM + IR_OP_FLAG_STORE
168 * x - call IR_OP_FLAG_MEM + IR_OP_FLAG_CALL
169 * a - alloc IR_OP_FLAG_MEM + IR_OP_FLAG_ALLOC
170 * 0-3 - number of input edges
171 * N - number of arguments is defined in the insn->inputs_count (MERGE, PHI, CALL)
172 * X1-X3 - number of extra data ops
173 * C - commutative operation ("d2C" => IR_OP_FLAG_DATA + IR_OP_FLAG_COMMUTATIVE)
174 *
175 * operand types
176 * -------------
177 * ___ - unused
178 * def - reference to a definition op (data-flow use-def dependency edge)
179 * ref - memory reference (data-flow use-def dependency edge)
180 * var - variable reference (data-flow use-def dependency edge)
181 * arg - argument reference CALL/TAILCALL/CARG->CARG
182 * src - reference to a previous control region (IF, IF_TRUE, IF_FALSE, MERGE, LOOP_BEGIN, LOOP_END, RETURN)
183 * reg - data-control dependency on region (PHI, VAR, PARAM)
184 * ret - reference to a previous RETURN instruction (RETURN)
185 * str - string: variable/argument name (VAR, PARAM, CALL, TAILCALL)
186 * num - number: argument number (PARAM)
187 * prb - branch probability 1-99 (0 - unspecified): (IF_TRUE, IF_FALSE, CASE_VAL, CASE_DEFAULT)
188 * opt - optional number
189 * pro - function prototype
190 *
191 * The order of IR opcodes is carefully selected for efficient folding.
192 * - foldable instruction go first
193 * - NOP is never used (code 0 is used as ANY pattern)
194 * - CONST is the most often used instruction (encode with 1 bit)
195 * - equality inversion: EQ <-> NE => op =^ 1
196 * - comparison inversion: [U]LT <-> [U]GT, [U]LE <-> [U]GE => op =^ 3
197 */
198
199 #define IR_OPS(_) \
200 /* special op (must be the first !!!) */ \
201 _(NOP, v, ___, ___, ___) /* empty instruction */ \
202 \
203 /* constants reference */ \
204 _(C_BOOL, r0, ___, ___, ___) /* constant */ \
205 _(C_U8, r0, ___, ___, ___) /* constant */ \
206 _(C_U16, r0, ___, ___, ___) /* constant */ \
207 _(C_U32, r0, ___, ___, ___) /* constant */ \
208 _(C_U64, r0, ___, ___, ___) /* constant */ \
209 _(C_ADDR, r0, ___, ___, ___) /* constant */ \
210 _(C_CHAR, r0, ___, ___, ___) /* constant */ \
211 _(C_I8, r0, ___, ___, ___) /* constant */ \
212 _(C_I16, r0, ___, ___, ___) /* constant */ \
213 _(C_I32, r0, ___, ___, ___) /* constant */ \
214 _(C_I64, r0, ___, ___, ___) /* constant */ \
215 _(C_DOUBLE, r0, ___, ___, ___) /* constant */ \
216 _(C_FLOAT, r0, ___, ___, ___) /* constant */ \
217 \
218 /* equality ops */ \
219 _(EQ, d2C, def, def, ___) /* equal */ \
220 _(NE, d2C, def, def, ___) /* not equal */ \
221 \
222 /* comparison ops (order matters, LT must be a modulo of 4 !!!) */ \
223 _(LT, d2, def, def, ___) /* less */ \
224 _(GE, d2, def, def, ___) /* greater or equal */ \
225 _(LE, d2, def, def, ___) /* less or equal */ \
226 _(GT, d2, def, def, ___) /* greater */ \
227 _(ULT, d2, def, def, ___) /* unsigned less */ \
228 _(UGE, d2, def, def, ___) /* unsigned greater or equal */ \
229 _(ULE, d2, def, def, ___) /* unsigned less or equal */ \
230 _(UGT, d2, def, def, ___) /* unsigned greater */ \
231 \
232 /* arithmetic ops */ \
233 _(ADD, d2C, def, def, ___) /* addition */ \
234 _(SUB, d2, def, def, ___) /* subtraction (must be ADD+1) */ \
235 _(MUL, d2C, def, def, ___) /* multiplication */ \
236 _(DIV, d2, def, def, ___) /* division */ \
237 _(MOD, d2, def, def, ___) /* modulo */ \
238 _(NEG, d1, def, ___, ___) /* change sign */ \
239 _(ABS, d1, def, ___, ___) /* absolute value */ \
240 /* (LDEXP, MIN, MAX, FPMATH) */ \
241 \
242 /* type conversion ops */ \
243 _(SEXT, d1, def, ___, ___) /* sign extension */ \
244 _(ZEXT, d1, def, ___, ___) /* zero extension */ \
245 _(TRUNC, d1, def, ___, ___) /* truncates to int type */ \
246 _(BITCAST, d1, def, ___, ___) /* binary representation */ \
247 _(INT2FP, d1, def, ___, ___) /* int to float conversion */ \
248 _(FP2INT, d1, def, ___, ___) /* float to int conversion */ \
249 _(FP2FP, d1, def, ___, ___) /* float to float conversion */ \
250 _(PROTO, d1X1, def, pro, ___) /* apply function prototype */ \
251 \
252 /* overflow-check */ \
253 _(ADD_OV, d2C, def, def, ___) /* addition */ \
254 _(SUB_OV, d2, def, def, ___) /* subtraction */ \
255 _(MUL_OV, d2C, def, def, ___) /* multiplication */ \
256 _(OVERFLOW, d1, def, ___, ___) /* overflow check add/sub/mul */ \
257 \
258 /* bitwise and shift ops */ \
259 _(NOT, d1, def, ___, ___) /* bitwise NOT */ \
260 _(OR, d2C, def, def, ___) /* bitwise OR */ \
261 _(AND, d2C, def, def, ___) /* bitwise AND */ \
262 _(XOR, d2C, def, def, ___) /* bitwise XOR */ \
263 _(SHL, d2, def, def, ___) /* logic shift left */ \
264 _(SHR, d2, def, def, ___) /* logic shift right */ \
265 _(SAR, d2, def, def, ___) /* arithmetic shift right */ \
266 _(ROL, d2, def, def, ___) /* rotate left */ \
267 _(ROR, d2, def, def, ___) /* rotate right */ \
268 _(BSWAP, d1, def, ___, ___) /* byte swap */ \
269 _(CTPOP, d1, def, ___, ___) /* count population */ \
270 _(CTLZ, d1, def, ___, ___) /* count leading zeros */ \
271 _(CTTZ, d1, def, ___, ___) /* count trailing zeros */ \
272 \
273 /* branch-less conditional ops */ \
274 _(MIN, d2C, def, def, ___) /* min(op1, op2) */ \
275 _(MAX, d2C, def, def, ___) /* max(op1, op2) */ \
276 _(COND, d3, def, def, def) /* op1 ? op2 : op3 */ \
277 \
278 /* data-flow and miscellaneous ops */ \
279 _(PHI, pN, reg, def, def) /* SSA Phi function */ \
280 _(COPY, d1X1, def, opt, ___) /* COPY (last foldable op) */ \
281 _(PI, p2, reg, def, ___) /* e-SSA Pi constraint ??? */ \
282 _(FRAME_ADDR, d0, ___, ___, ___) /* function frame address */ \
283 /* (USE, RENAME) */ \
284 \
285 /* data ops */ \
286 _(PARAM, p1X2, reg, str, num) /* incoming parameter proj. */ \
287 _(VAR, p1X1, reg, str, ___) /* local variable */ \
288 _(FUNC_ADDR, r0, ___, ___, ___) /* constant func ref */ \
289 _(FUNC, r0, ___, ___, ___) /* constant func ref */ \
290 _(SYM, r0, ___, ___, ___) /* constant symbol ref */ \
291 _(STR, r0, ___, ___, ___) /* constant str ref */ \
292 \
293 /* call ops */ \
294 _(CALL, xN, src, def, def) /* CALL(src, func, args...) */ \
295 _(TAILCALL, xN, src, def, def) /* CALL+RETURN */ \
296 \
297 /* memory reference and load/store ops */ \
298 _(ALLOCA, a2, src, def, ___) /* alloca(def) */ \
299 _(AFREE, a2, src, def, ___) /* revert alloca(def) */ \
300 _(VADDR, d1, var, ___, ___) /* load address of local var */ \
301 _(VLOAD, l2, src, var, ___) /* load value of local var */ \
302 _(VSTORE, s3, src, var, def) /* store value to local var */ \
303 _(RLOAD, l1X2, src, num, opt) /* load value from register */ \
304 _(RSTORE, s2X1, src, def, num) /* store value into register */ \
305 _(LOAD, l2, src, ref, ___) /* load from memory */ \
306 _(STORE, s3, src, ref, def) /* store to memory */ \
307 _(TLS, l1X2, src, num, num) /* thread local variable */ \
308 _(TRAP, x1, src, ___, ___) /* DebugBreak */ \
309 /* memory reference ops (A, H, U, S, TMP, STR, NEW, X, V) ??? */ \
310 \
311 /* va_args */ \
312 _(VA_START, x2, src, def, ___) /* va_start(va_list) */ \
313 _(VA_END, x2, src, def, ___) /* va_end(va_list) */ \
314 _(VA_COPY, x3, src, def, def) /* va_copy(dst, stc) */ \
315 _(VA_ARG, x2, src, def, ___) /* va_arg(va_list) */ \
316 \
317 /* guards */ \
318 _(GUARD, c3, src, def, def) /* IF without second successor */ \
319 _(GUARD_NOT , c3, src, def, def) /* IF without second successor */ \
320 \
321 /* deoptimization */ \
322 _(SNAPSHOT, xN, src, def, def) /* SNAPSHOT(src, args...) */ \
323 \
324 /* control-flow nodes */ \
325 _(START, S0X1, ret, ___, ___) /* function start */ \
326 _(ENTRY, S1X1, src, num, ___) /* entry with a fake src edge */ \
327 _(BEGIN, S1, src, ___, ___) /* block start */ \
328 _(IF_TRUE, S1X1, src, prb, ___) /* IF TRUE proj. */ \
329 _(IF_FALSE, S1X1, src, prb, ___) /* IF FALSE proj. */ \
330 _(CASE_VAL, S2X1, src, def, prb) /* switch proj. */ \
331 _(CASE_DEFAULT, S1X1, src, prb, ___) /* switch proj. */ \
332 _(MERGE, SN, src, src, src) /* control merge */ \
333 _(LOOP_BEGIN, SN, src, src, src) /* loop start */ \
334 _(END, E1, src, ___, ___) /* block end */ \
335 _(LOOP_END, E1, src, ___, ___) /* loop end */ \
336 _(IF, E2, src, def, ___) /* conditional control split */ \
337 _(SWITCH, E2, src, def, ___) /* multi-way control split */ \
338 _(RETURN, T2X1, src, def, ret) /* function return */ \
339 _(IJMP, T2X1, src, def, ret) /* computed goto */ \
340 _(UNREACHABLE, T1X2, src, ___, ret) /* unreachable (tailcall, etc) */ \
341 \
342 /* deoptimization helper */ \
343 _(EXITCALL, x2, src, def, ___) /* save CPU regs and call op2 */ \
344
345
346 #define IR_OP_ENUM(name, flags, op1, op2, op3) IR_ ## name,
347
348 typedef enum _ir_op {
349 IR_OPS(IR_OP_ENUM)
350 #ifdef IR_PHP
351 IR_PHP_OPS(IR_OP_ENUM)
352 #endif
353 IR_LAST_OP
354 } ir_op;
355
356 /* IR Opcode and Type Union */
357 #define IR_OPT_OP_MASK 0x00ff
358 #define IR_OPT_TYPE_MASK 0xff00
359 #define IR_OPT_TYPE_SHIFT 8
360 #define IR_OPT_INPUTS_SHIFT 16
361
362 #define IR_OPT(op, type) ((uint16_t)(op) | ((uint16_t)(type) << IR_OPT_TYPE_SHIFT))
363 #define IR_OPTX(op, type, n) ((uint32_t)(op) | ((uint32_t)(type) << IR_OPT_TYPE_SHIFT) | ((uint32_t)(n) << IR_OPT_INPUTS_SHIFT))
364 #define IR_OPT_TYPE(opt) (((opt) & IR_OPT_TYPE_MASK) >> IR_OPT_TYPE_SHIFT)
365
366 /* IR References */
367 typedef int32_t ir_ref;
368
369 #define IR_IS_CONST_REF(ref) ((ref) < 0)
370
371 /* IR Constant Value */
372 #define IR_UNUSED 0
373 #define IR_NULL (-1)
374 #define IR_FALSE (-2)
375 #define IR_TRUE (-3)
376 #define IR_LAST_FOLDABLE_OP IR_COPY
377
378 #define IR_CONSTS_LIMIT_MIN (-(IR_TRUE - 1))
379 #define IR_INSNS_LIMIT_MIN (IR_UNUSED + 1)
380
381 #ifndef IR_64
382 # define ADDR_MEMBER uintptr_t addr;
383 #else
384 # define ADDR_MEMBER
385 #endif
386 typedef union _ir_val {
387 double d;
388 uint64_t u64;
389 int64_t i64;
390 #ifdef IR_64
391 uintptr_t addr;
392 #endif
393 IR_STRUCT_LOHI(
394 union {
395 uint32_t u32;
396 int32_t i32;
397 float f;
398 ADDR_MEMBER
399 ir_ref name;
400 ir_ref str;
401 IR_STRUCT_LOHI(
402 union {
403 uint16_t u16;
404 int16_t i16;
405 IR_STRUCT_LOHI(
406 union {
407 uint8_t u8;
408 int8_t i8;
409 bool b;
410 char c;
411 },
412 uint8_t u8_hi
413 );
414 },
415 uint16_t u16_hi
416 );
417 },
418 uint32_t u32_hi
419 );
420 } ir_val;
421 #undef ADDR_MEMBER
422
423 /* IR Instruction */
424 typedef struct _ir_insn {
425 IR_STRUCT_LOHI(
426 union {
427 IR_STRUCT_LOHI(
428 union {
429 IR_STRUCT_LOHI(
430 uint8_t op,
431 uint8_t type
432 );
433 uint16_t opt;
434 },
435 union {
436 uint16_t inputs_count; /* number of input control edges for MERGE, PHI, CALL, TAILCALL */
437 uint16_t prev_insn_offset; /* 16-bit backward offset from current instruction for CSE */
438 uint16_t proto;
439 }
440 );
441 uint32_t optx;
442 ir_ref ops[1];
443 },
444 union {
445 ir_ref op1;
446 ir_ref prev_const;
447 }
448 );
449 union {
450 IR_STRUCT_LOHI(
451 ir_ref op2,
452 ir_ref op3
453 );
454 ir_val val;
455 };
456 } ir_insn;
457
458 /* IR Hash Tables API (private) */
459 typedef struct _ir_hashtab ir_hashtab;
460
461 /* IR String Tables API (implementation in ir_strtab.c) */
462 typedef struct _ir_strtab {
463 void *data;
464 uint32_t mask;
465 uint32_t size;
466 uint32_t count;
467 uint32_t pos;
468 char *buf;
469 uint32_t buf_size;
470 uint32_t buf_top;
471 } ir_strtab;
472
473 #define ir_strtab_count(strtab) (strtab)->count
474
475 typedef void (*ir_strtab_apply_t)(const char *str, uint32_t len, ir_ref val);
476
477 void ir_strtab_init(ir_strtab *strtab, uint32_t count, uint32_t buf_size);
478 ir_ref ir_strtab_lookup(ir_strtab *strtab, const char *str, uint32_t len, ir_ref val);
479 ir_ref ir_strtab_find(const ir_strtab *strtab, const char *str, uint32_t len);
480 ir_ref ir_strtab_update(ir_strtab *strtab, const char *str, uint32_t len, ir_ref val);
481 const char *ir_strtab_str(const ir_strtab *strtab, ir_ref idx);
482 const char *ir_strtab_strl(const ir_strtab *strtab, ir_ref idx, size_t *len);
483 void ir_strtab_apply(const ir_strtab *strtab, ir_strtab_apply_t func);
484 void ir_strtab_free(ir_strtab *strtab);
485
486 /* IR Context Flags */
487 #define IR_FUNCTION (1<<0) /* Generate a function. */
488 #define IR_FASTCALL_FUNC (1<<1) /* Generate a function with fastcall calling convention, x86 32-bit only. */
489 #define IR_VARARG_FUNC (1<<2)
490 #define IR_BUILTIN_FUNC (1<<3)
491 #define IR_STATIC (1<<4)
492 #define IR_EXTERN (1<<5)
493 #define IR_CONST (1<<6)
494
495 #define IR_SKIP_PROLOGUE (1<<8) /* Don't generate function prologue. */
496 #define IR_USE_FRAME_POINTER (1<<9)
497 #define IR_PREALLOCATED_STACK (1<<10)
498 #define IR_NO_STACK_COMBINE (1<<11)
499 #define IR_START_BR_TARGET (1<<12)
500 #define IR_ENTRY_BR_TARGET (1<<13)
501 #define IR_GEN_ENDBR (1<<14)
502 #define IR_MERGE_EMPTY_ENTRIES (1<<15)
503
504 #define IR_OPT_FOLDING (1<<16)
505 #define IR_OPT_CFG (1<<17) /* merge BBs, by remove END->BEGIN nodes during CFG construction */
506 #define IR_OPT_CODEGEN (1<<18)
507 #define IR_GEN_NATIVE (1<<19)
508 #define IR_GEN_CODE (1<<20) /* C or LLVM */
509
510 #define IR_GEN_CACHE_DEMOTE (1<<21) /* Demote the generated code from closest CPU caches */
511
512 /* debug related */
513 #ifdef IR_DEBUG
514 # define IR_DEBUG_SCCP (1<<27)
515 # define IR_DEBUG_GCM (1<<28)
516 # define IR_DEBUG_SCHEDULE (1<<29)
517 # define IR_DEBUG_RA (1<<30)
518 #endif
519
520 typedef struct _ir_ctx ir_ctx;
521 typedef struct _ir_use_list ir_use_list;
522 typedef struct _ir_block ir_block;
523 typedef struct _ir_arena ir_arena;
524 typedef struct _ir_live_interval ir_live_interval;
525 typedef struct _ir_live_range ir_live_range;
526 typedef struct _ir_loader ir_loader;
527 typedef int8_t ir_regs[4];
528
529 typedef void (*ir_snapshot_create_t)(ir_ctx *ctx, ir_ref addr);
530
531 #if defined(IR_TARGET_AARCH64)
532 typedef const void *(*ir_get_exit_addr_t)(uint32_t exit_num);
533 typedef const void *(*ir_get_veneer_t)(ir_ctx *ctx, const void *addr);
534 typedef bool (*ir_set_veneer_t)(ir_ctx *ctx, const void *addr, const void *veneer);
535 #endif
536
537 typedef struct _ir_code_buffer {
538 void *start;
539 void *end;
540 void *pos;
541 } ir_code_buffer;
542
543 struct _ir_ctx {
544 ir_insn *ir_base; /* two directional array - instructions grow down, constants grow up */
545 ir_ref insns_count; /* number of instructions stored in instructions buffer */
546 ir_ref insns_limit; /* size of allocated instructions buffer (it's extended when overflow) */
547 ir_ref consts_count; /* number of constants stored in constants buffer */
548 ir_ref consts_limit; /* size of allocated constants buffer (it's extended when overflow) */
549 uint32_t flags; /* IR context flags (see IR_* defines above) */
550 uint32_t flags2; /* IR context private flags (see IR_* defines in ir_private.h) */
551 ir_type ret_type; /* Function return type */
552 uint32_t mflags; /* CPU specific flags (see IR_X86_... macros below) */
553 int32_t status; /* non-zero error code (see IR_ERROR_... macros), app may use negative codes */
554 ir_ref fold_cse_limit; /* CSE finds identical insns backward from "insn_count" to "fold_cse_limit" */
555 ir_insn fold_insn; /* temporary storage for folding engine */
556 ir_hashtab *binding;
557 ir_use_list *use_lists; /* def->use lists for each instruction */
558 ir_ref *use_edges; /* the actual uses: use = ctx->use_edges[ctx->use_lists[def].refs + n] */
559 ir_ref use_edges_count; /* number of elements in use_edges[] array */
560 uint32_t cfg_blocks_count; /* number of elements in cfg_blocks[] array */
561 uint32_t cfg_edges_count; /* number of elements in cfg_edges[] array */
562 ir_block *cfg_blocks; /* list of basic blocks (starts from 1) */
563 uint32_t *cfg_edges; /* the actual basic blocks predecessors and successors edges */
564 uint32_t *cfg_map; /* map of instructions to basic block number */
565 uint32_t *rules; /* array of target specific code-generation rules (for each instruction) */
566 uint32_t *vregs;
567 ir_ref vregs_count;
568 int32_t spill_base; /* base register for special spill area (e.g. PHP VM frame pointer) */
569 uint64_t fixed_regset; /* fixed registers, excluded for regular register allocation */
570 int32_t fixed_stack_red_zone; /* reusable stack allocated by caller (default 0) */
571 int32_t fixed_stack_frame_size; /* fixed stack allocated by generated code for spills and registers save/restore */
572 int32_t fixed_call_stack_size; /* fixed preallocated stack for parameter passing (default 0) */
573 uint64_t fixed_save_regset; /* registers that always saved/restored in prologue/epilogue */
574 uint32_t locals_area_size;
575 uint32_t gp_reg_params;
576 uint32_t fp_reg_params;
577 int32_t param_stack_size;
578 ir_live_interval **live_intervals;
579 ir_arena *arena;
580 ir_live_range *unused_ranges;
581 ir_regs *regs;
582 ir_strtab *fused_regs;
583 ir_ref *prev_ref;
584 union {
585 void *data;
586 ir_ref control; /* used by IR construction API (see ir_builder.h) */
587 ir_ref bb_start; /* used by target CPU instruction matcher */
588 ir_ref vars; /* list of VARs (used by register allocator) */
589 };
590 ir_snapshot_create_t snapshot_create;
591 int32_t stack_frame_alignment;
592 int32_t stack_frame_size; /* spill stack frame size (used by register allocator and code generator) */
593 int32_t call_stack_size; /* stack for parameter passing (used by register allocator and code generator) */
594 uint64_t used_preserved_regs;
595 #ifdef IR_TARGET_X86
596 int32_t ret_slot;
597 #endif
598 uint32_t rodata_offset;
599 uint32_t jmp_table_offset;
600 uint32_t entries_count;
601 uint32_t *entries; /* array of ENTRY blocks */
602 void *osr_entry_loads;
603 ir_code_buffer *code_buffer;
604 #if defined(IR_TARGET_AARCH64)
605 int32_t deoptimization_exits;
606 const void *deoptimization_exits_base;
607 ir_get_exit_addr_t get_exit_addr;
608 ir_get_veneer_t get_veneer;
609 ir_set_veneer_t set_veneer;
610 #endif
611 ir_loader *loader;
612 ir_strtab strtab;
613 ir_ref prev_insn_chain[IR_LAST_FOLDABLE_OP + 1];
614 ir_ref prev_const_chain[IR_LAST_TYPE];
615 };
616
617 /* Basic IR Construction API (implementation in ir.c) */
618 void ir_init(ir_ctx *ctx, uint32_t flags, ir_ref consts_limit, ir_ref insns_limit);
619 void ir_free(ir_ctx *ctx);
620 void ir_truncate(ir_ctx *ctx);
621
622 ir_ref ir_const(ir_ctx *ctx, ir_val val, uint8_t type);
623 ir_ref ir_const_i8(ir_ctx *ctx, int8_t c);
624 ir_ref ir_const_i16(ir_ctx *ctx, int16_t c);
625 ir_ref ir_const_i32(ir_ctx *ctx, int32_t c);
626 ir_ref ir_const_i64(ir_ctx *ctx, int64_t c);
627 ir_ref ir_const_u8(ir_ctx *ctx, uint8_t c);
628 ir_ref ir_const_u16(ir_ctx *ctx, uint16_t c);
629 ir_ref ir_const_u32(ir_ctx *ctx, uint32_t c);
630 ir_ref ir_const_u64(ir_ctx *ctx, uint64_t c);
631 ir_ref ir_const_bool(ir_ctx *ctx, bool c);
632 ir_ref ir_const_char(ir_ctx *ctx, char c);
633 ir_ref ir_const_float(ir_ctx *ctx, float c);
634 ir_ref ir_const_double(ir_ctx *ctx, double c);
635 ir_ref ir_const_addr(ir_ctx *ctx, uintptr_t c);
636
637 ir_ref ir_const_func_addr(ir_ctx *ctx, uintptr_t c, ir_ref proto);
638 ir_ref ir_const_func(ir_ctx *ctx, ir_ref str, ir_ref proto);
639 ir_ref ir_const_sym(ir_ctx *ctx, ir_ref str);
640 ir_ref ir_const_str(ir_ctx *ctx, ir_ref str);
641
642 ir_ref ir_unique_const_addr(ir_ctx *ctx, uintptr_t c);
643
644 void ir_print_const(const ir_ctx *ctx, const ir_insn *insn, FILE *f, bool quoted);
645
646 ir_ref ir_str(ir_ctx *ctx, const char *s);
647 ir_ref ir_strl(ir_ctx *ctx, const char *s, size_t len);
648 const char *ir_get_str(const ir_ctx *ctx, ir_ref idx);
649 const char *ir_get_strl(const ir_ctx *ctx, ir_ref idx, size_t *len);
650
651 #define IR_MAX_PROTO_PARAMS 255
652
653 typedef struct _ir_proto_t {
654 uint8_t flags;
655 uint8_t ret_type;
656 uint8_t params_count;
657 uint8_t param_types[5];
658 } ir_proto_t;
659
660 ir_ref ir_proto_0(ir_ctx *ctx, uint8_t flags, ir_type ret_type);
661 ir_ref ir_proto_1(ir_ctx *ctx, uint8_t flags, ir_type ret_type, ir_type t1);
662 ir_ref ir_proto_2(ir_ctx *ctx, uint8_t flags, ir_type ret_type, ir_type t1, ir_type t2);
663 ir_ref ir_proto_3(ir_ctx *ctx, uint8_t flags, ir_type ret_type, ir_type t1, ir_type t2, ir_type t3);
664 ir_ref ir_proto_4(ir_ctx *ctx, uint8_t flags, ir_type ret_type, ir_type t1, ir_type t2, ir_type t3,
665 ir_type t4);
666 ir_ref ir_proto_5(ir_ctx *ctx, uint8_t flags, ir_type ret_type, ir_type t1, ir_type t2, ir_type t3,
667 ir_type t4, ir_type t5);
668 ir_ref ir_proto(ir_ctx *ctx, uint8_t flags, ir_type ret_type, uint32_t params_counts, uint8_t *param_types);
669
670 ir_ref ir_emit(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3);
671
672 ir_ref ir_emit0(ir_ctx *ctx, uint32_t opt);
673 ir_ref ir_emit1(ir_ctx *ctx, uint32_t opt, ir_ref op1);
674 ir_ref ir_emit2(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2);
675 ir_ref ir_emit3(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3);
676
677 ir_ref ir_emit_N(ir_ctx *ctx, uint32_t opt, int32_t count);
678 void ir_set_op(ir_ctx *ctx, ir_ref ref, int32_t n, ir_ref val);
679
ir_set_op1(ir_ctx * ctx,ir_ref ref,ir_ref val)680 IR_ALWAYS_INLINE void ir_set_op1(ir_ctx *ctx, ir_ref ref, ir_ref val)
681 {
682 ctx->ir_base[ref].op1 = val;
683 }
684
ir_set_op2(ir_ctx * ctx,ir_ref ref,ir_ref val)685 IR_ALWAYS_INLINE void ir_set_op2(ir_ctx *ctx, ir_ref ref, ir_ref val)
686 {
687 ctx->ir_base[ref].op2 = val;
688 }
689
ir_set_op3(ir_ctx * ctx,ir_ref ref,ir_ref val)690 IR_ALWAYS_INLINE void ir_set_op3(ir_ctx *ctx, ir_ref ref, ir_ref val)
691 {
692 ctx->ir_base[ref].op3 = val;
693 }
694
ir_insn_op(const ir_insn * insn,int32_t n)695 IR_ALWAYS_INLINE ir_ref ir_insn_op(const ir_insn *insn, int32_t n)
696 {
697 const ir_ref *p = insn->ops + n;
698 return *p;
699 }
700
ir_insn_set_op(ir_insn * insn,int32_t n,ir_ref val)701 IR_ALWAYS_INLINE void ir_insn_set_op(ir_insn *insn, int32_t n, ir_ref val)
702 {
703 ir_ref *p = insn->ops + n;
704 *p = val;
705 }
706
707 ir_ref ir_fold(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3);
708
709 ir_ref ir_fold0(ir_ctx *ctx, uint32_t opt);
710 ir_ref ir_fold1(ir_ctx *ctx, uint32_t opt, ir_ref op1);
711 ir_ref ir_fold2(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2);
712 ir_ref ir_fold3(ir_ctx *ctx, uint32_t opt, ir_ref op1, ir_ref op2, ir_ref op3);
713
714 ir_ref ir_param(ir_ctx *ctx, ir_type type, ir_ref region, const char *name, int pos);
715 ir_ref ir_var(ir_ctx *ctx, ir_type type, ir_ref region, const char *name);
716 ir_ref ir_bind(ir_ctx *ctx, ir_ref var, ir_ref def);
717
718 /* Def -> Use lists */
719 void ir_build_def_use_lists(ir_ctx *ctx);
720
721 /* CFG - Control Flow Graph (implementation in ir_cfg.c) */
722 int ir_build_cfg(ir_ctx *ctx);
723 int ir_remove_unreachable_blocks(ir_ctx *ctx);
724 int ir_build_dominators_tree(ir_ctx *ctx);
725 int ir_find_loops(ir_ctx *ctx);
726 int ir_schedule_blocks(ir_ctx *ctx);
727 void ir_build_prev_refs(ir_ctx *ctx);
728
729 /* SCCP - Sparse Conditional Constant Propagation (implementation in ir_sccp.c) */
730 int ir_sccp(ir_ctx *ctx);
731
732 /* GCM - Global Code Motion and scheduling (implementation in ir_gcm.c) */
733 int ir_gcm(ir_ctx *ctx);
734 int ir_schedule(ir_ctx *ctx);
735
736 /* Liveness & Register Allocation (implementation in ir_ra.c) */
737 #define IR_REG_NONE -1
738 #define IR_REG_SPILL_LOAD (1<<6)
739 #define IR_REG_SPILL_STORE (1<<6)
740 #define IR_REG_SPILL_SPECIAL (1<<7)
741 #define IR_REG_SPILLED(r) \
742 ((r) & (IR_REG_SPILL_LOAD|IR_REG_SPILL_STORE|IR_REG_SPILL_SPECIAL))
743 #define IR_REG_NUM(r) \
744 ((int8_t)((r) == IR_REG_NONE ? IR_REG_NONE : ((r) & ~(IR_REG_SPILL_LOAD|IR_REG_SPILL_STORE|IR_REG_SPILL_SPECIAL))))
745
746 int ir_assign_virtual_registers(ir_ctx *ctx);
747 int ir_compute_live_ranges(ir_ctx *ctx);
748 int ir_coalesce(ir_ctx *ctx);
749 int ir_compute_dessa_moves(ir_ctx *ctx);
750 int ir_reg_alloc(ir_ctx *ctx);
751
752 int ir_regs_number(void);
753 bool ir_reg_is_int(int32_t reg);
754 const char *ir_reg_name(int8_t reg, ir_type type);
755 int32_t ir_get_spill_slot_offset(ir_ctx *ctx, ir_ref ref);
756
757 /* Target CPU instruction selection and code generation (see ir_x86.c) */
758 int ir_match(ir_ctx *ctx);
759 void *ir_emit_code(ir_ctx *ctx, size_t *size);
760
761 bool ir_needs_thunk(ir_code_buffer *code_buffer, void *addr);
762 void *ir_emit_thunk(ir_code_buffer *code_buffer, void *addr, size_t *size_ptr);
763 void ir_fix_thunk(void *thunk_entry, void *addr);
764
765 /* Target address resolution (implementation in ir_emit.c) */
766 void *ir_resolve_sym_name(const char *name);
767
768 /* Target CPU disassembler (implementation in ir_disasm.c) */
769 int ir_disasm_init(void);
770 void ir_disasm_free(void);
771 void ir_disasm_add_symbol(const char *name, uint64_t addr, uint64_t size);
772 const char* ir_disasm_find_symbol(uint64_t addr, int64_t *offset);
773 int ir_disasm(const char *name,
774 const void *start,
775 size_t size,
776 bool asm_addr,
777 ir_ctx *ctx,
778 FILE *f);
779
780 /* Linux perf interface (implementation in ir_perf.c) */
781 int ir_perf_jitdump_open(void);
782 int ir_perf_jitdump_close(void);
783 int ir_perf_jitdump_register(const char *name, const void *start, size_t size);
784 void ir_perf_map_register(const char *name, const void *start, size_t size);
785
786 /* GDB JIT interface (implementation in ir_gdb.c) */
787 int ir_gdb_register(const char *name,
788 const void *start,
789 size_t size,
790 uint32_t sp_offset,
791 uint32_t sp_adjustment);
792 void ir_gdb_unregister_all(void);
793 bool ir_gdb_present(void);
794
795 /* IR load API (implementation in ir_load.c) */
796 struct _ir_loader {
797 uint32_t default_func_flags;
798 bool (*init_module) (ir_loader *loader, const char *name, const char *filename, const char *target);
799 bool (*external_sym_dcl) (ir_loader *loader, const char *name, uint32_t flags);
800 bool (*external_func_dcl) (ir_loader *loader, const char *name,
801 uint32_t flags, ir_type ret_type, uint32_t params_count, const uint8_t *param_types);
802 bool (*forward_func_dcl) (ir_loader *loader, const char *name,
803 uint32_t flags, ir_type ret_type, uint32_t params_count, const uint8_t *param_types);
804 bool (*sym_dcl) (ir_loader *loader, const char *name, uint32_t flags, size_t size, bool has_data);
805 bool (*sym_data) (ir_loader *loader, ir_type type, uint32_t count, const void *data);
806 bool (*sym_data_pad) (ir_loader *loader, size_t offset);
807 bool (*sym_data_ref) (ir_loader *loader, ir_op op, const char *ref, uintptr_t offset);
808 bool (*sym_data_end) (ir_loader *loader);
809 bool (*func_init) (ir_loader *loader, ir_ctx *ctx, const char *name);
810 bool (*func_process) (ir_loader *loader, ir_ctx *ctx, const char *name);
811 void*(*resolve_sym_name) (ir_loader *loader, const char *name, bool add_thunk);
812 bool (*has_sym) (ir_loader *loader, const char *name);
813 bool (*add_sym) (ir_loader *loader, const char *name, void *addr);
814 };
815
816 void ir_loader_init(void);
817 void ir_loader_free(void);
818 int ir_load(ir_loader *loader, FILE *f);
819
820 /* IR LLVM load API (implementation in ir_load_llvm.c) */
821 int ir_load_llvm_bitcode(ir_loader *loader, const char *filename);
822 int ir_load_llvm_asm(ir_loader *loader, const char *filename);
823
824 /* IR save API (implementation in ir_save.c) */
825 void ir_print_proto(const ir_ctx *ctx, ir_ref proto, FILE *f);
826 void ir_save(const ir_ctx *ctx, FILE *f);
827
828 /* IR debug dump API (implementation in ir_dump.c) */
829 void ir_dump(const ir_ctx *ctx, FILE *f);
830 void ir_dump_dot(const ir_ctx *ctx, const char *name, FILE *f);
831 void ir_dump_use_lists(const ir_ctx *ctx, FILE *f);
832 void ir_dump_cfg(ir_ctx *ctx, FILE *f);
833 void ir_dump_cfg_map(const ir_ctx *ctx, FILE *f);
834 void ir_dump_live_ranges(const ir_ctx *ctx, FILE *f);
835 void ir_dump_codegen(const ir_ctx *ctx, FILE *f);
836
837 /* IR to C conversion (implementation in ir_emit_c.c) */
838 int ir_emit_c(ir_ctx *ctx, const char *name, FILE *f);
839 void ir_emit_c_func_decl(const char *name, uint32_t flags, ir_type ret_type, uint32_t params_count, const uint8_t *param_types, FILE *f);
840 void ir_emit_c_sym_decl(const char *name, uint32_t flags, bool has_data, FILE *f);
841
842 /* IR to LLVM conversion (implementation in ir_emit_llvm.c) */
843 int ir_emit_llvm(ir_ctx *ctx, const char *name, FILE *f);
844 void ir_emit_llvm_func_decl(const char *name, uint32_t flags, ir_type ret_type, uint32_t params_count, const uint8_t *param_types, FILE *f);
845 void ir_emit_llvm_sym_decl(const char *name, uint32_t flags, bool has_data, FILE *f);
846
847 /* IR verification API (implementation in ir_check.c) */
848 bool ir_check(const ir_ctx *ctx);
849 void ir_consistency_check(void);
850
851 /* Code patching (implementation in ir_patch.c) */
852 int ir_patch(const void *code, size_t size, uint32_t jmp_table_size, const void *from_addr, const void *to_addr);
853
854 /* CPU information (implementation in ir_cpuinfo.c) */
855 #if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
856 # define IR_X86_SSE2 (1<<0)
857 # define IR_X86_SSE3 (1<<1)
858 # define IR_X86_SSSE3 (1<<2)
859 # define IR_X86_SSE41 (1<<3)
860 # define IR_X86_SSE42 (1<<4)
861 # define IR_X86_AVX (1<<5)
862 # define IR_X86_AVX2 (1<<6)
863 # define IR_X86_BMI1 (1<<7)
864 # define IR_X86_CLDEMOTE (1<<8)
865 #endif
866
867 uint32_t ir_cpuinfo(void);
868
869 /* Deoptimization helpers */
870 const void *ir_emit_exitgroup(uint32_t first_exit_point, uint32_t exit_points_per_group, const void *exit_addr, ir_code_buffer *code_buffer, size_t *size_ptr);
871
872 /* A reference IR JIT compiler */
ir_jit_compile(ir_ctx * ctx,int opt_level,size_t * size)873 IR_ALWAYS_INLINE void *ir_jit_compile(ir_ctx *ctx, int opt_level, size_t *size)
874 {
875 if (opt_level == 0) {
876 if (ctx->flags & IR_OPT_FOLDING) {
877 // IR_ASSERT(0 && "IR_OPT_FOLDING is incompatible with -O0");
878 return NULL;
879 }
880 ctx->flags &= ~(IR_OPT_CFG | IR_OPT_CODEGEN);
881
882 ir_build_def_use_lists(ctx);
883
884 if (!ir_build_cfg(ctx)
885 || !ir_match(ctx)
886 || !ir_assign_virtual_registers(ctx)
887 || !ir_compute_dessa_moves(ctx)) {
888 return NULL;
889 }
890
891 return ir_emit_code(ctx, size);
892 } else if (opt_level == 1 || opt_level == 2) {
893 if (!(ctx->flags & IR_OPT_FOLDING)) {
894 // IR_ASSERT(0 && "IR_OPT_FOLDING must be set in ir_init() for -O1 and -O2");
895 return NULL;
896 }
897 ctx->flags |= IR_OPT_CFG | IR_OPT_CODEGEN;
898
899 ir_build_def_use_lists(ctx);
900
901 if (opt_level == 2
902 && !ir_sccp(ctx)) {
903 return NULL;
904 }
905
906 if (!ir_build_cfg(ctx)
907 || !ir_build_dominators_tree(ctx)
908 || !ir_find_loops(ctx)
909 || !ir_gcm(ctx)
910 || !ir_schedule(ctx)
911 || !ir_match(ctx)
912 || !ir_assign_virtual_registers(ctx)
913 || !ir_compute_live_ranges(ctx)
914 || !ir_coalesce(ctx)
915 || !ir_reg_alloc(ctx)
916 || !ir_schedule_blocks(ctx)) {
917 return NULL;
918 }
919
920 return ir_emit_code(ctx, size);
921 } else {
922 // IR_ASSERT(0 && "wrong optimization level");
923 return NULL;
924 }
925 }
926
927 #define IR_ERROR_CODE_MEM_OVERFLOW 1
928 #define IR_ERROR_FIXED_STACK_FRAME_OVERFLOW 2
929 #define IR_ERROR_UNSUPPORTED_CODE_RULE 3
930 #define IR_ERROR_LINK 4
931 #define IR_ERROR_ENCODE 5
932
933 /* IR Memmory Allocation */
934 #ifndef ir_mem_malloc
935 # define ir_mem_malloc malloc
936 #endif
937 #ifndef ir_mem_calloc
938 # define ir_mem_calloc calloc
939 #endif
940 #ifndef ir_mem_realloc
941 # define ir_mem_realloc realloc
942 #endif
943 #ifndef ir_mem_free
944 # define ir_mem_free free
945 #endif
946
947 #ifndef ir_mem_pmalloc
948 # define ir_mem_pmalloc malloc
949 #endif
950 #ifndef ir_mem_pcalloc
951 # define ir_mem_pcalloc calloc
952 #endif
953 #ifndef ir_mem_prealloc
954 # define ir_mem_prealloc realloc
955 #endif
956 #ifndef ir_mem_pfree
957 # define ir_mem_pfree free
958 #endif
959
960 void *ir_mem_mmap(size_t size);
961 int ir_mem_unmap(void *ptr, size_t size);
962 int ir_mem_protect(void *ptr, size_t size);
963 int ir_mem_unprotect(void *ptr, size_t size);
964 int ir_mem_flush(void *ptr, size_t size);
965
966 #ifdef __cplusplus
967 } /* extern "C" */
968 #endif
969
970 #endif /* IR_H */
971