1 /*
2 * IR - Lightweight JIT Compilation Framework
3 * (Native code generator based on DynAsm)
4 * Copyright (C) 2022 Zend by Perforce.
5 * Authors: Dmitry Stogov <dmitry@php.net>
6 */
7
8 #include "ir.h"
9
10 #if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
11 # include "ir_x86.h"
12 #elif defined(IR_TARGET_AARCH64)
13 # include "ir_aarch64.h"
14 #else
15 # error "Unknown IR target"
16 #endif
17
18 #include "ir_private.h"
19 #ifndef _WIN32
20 # include <dlfcn.h>
21 #else
22 # define WIN32_LEAN_AND_MEAN
23 # include <windows.h>
24 # include <psapi.h>
25 #endif
26
27 #if defined(__linux__) || defined(__sun)
28 # include <alloca.h>
29 #endif
30
31 #define DASM_M_GROW(ctx, t, p, sz, need) \
32 do { \
33 size_t _sz = (sz), _need = (need); \
34 if (_sz < _need) { \
35 if (_sz < 16) _sz = 16; \
36 while (_sz < _need) _sz += _sz; \
37 (p) = (t *)ir_mem_realloc((p), _sz); \
38 (sz) = _sz; \
39 } \
40 } while(0)
41
42 #define DASM_M_FREE(ctx, p, sz) ir_mem_free(p)
43
44 #ifdef IR_DEBUG
45 # define DASM_CHECKS
46 #endif
47
48 typedef struct _ir_copy {
49 ir_type type;
50 ir_reg from;
51 ir_reg to;
52 } ir_copy;
53
54 typedef struct _ir_dessa_copy {
55 ir_type type;
56 int32_t from; /* negative - constant ref, [0..IR_REG_NUM) - CPU reg, [IR_REG_NUM...) - virtual reg */
57 int32_t to; /* [0..IR_REG_NUM) - CPU reg, [IR_REG_NUM...) - virtual reg */
58 } ir_dessa_copy;
59
60 #if IR_REG_INT_ARGS
61 static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS];
62 #else
63 static const int8_t *_ir_int_reg_params;
64 #endif
65 #if IR_REG_FP_ARGS
66 static const int8_t _ir_fp_reg_params[IR_REG_FP_ARGS];
67 #else
68 static const int8_t *_ir_fp_reg_params;
69 #endif
70
ir_call_proto(const ir_ctx * ctx,ir_insn * insn)71 static const ir_proto_t *ir_call_proto(const ir_ctx *ctx, ir_insn *insn)
72 {
73 if (IR_IS_CONST_REF(insn->op2)) {
74 const ir_insn *func = &ctx->ir_base[insn->op2];
75
76 if (func->op == IR_FUNC || func->op == IR_FUNC_ADDR) {
77 if (func->proto) {
78 return (const ir_proto_t *)ir_get_str(ctx, func->proto);
79 }
80 }
81 } else if (ctx->ir_base[insn->op2].op == IR_PROTO) {
82 return (const ir_proto_t *)ir_get_str(ctx, ctx->ir_base[insn->op2].op2);
83 }
84 return NULL;
85 }
86
87 #ifdef IR_HAVE_FASTCALL
88 static const int8_t _ir_int_fc_reg_params[IR_REG_INT_FCARGS];
89 static const int8_t *_ir_fp_fc_reg_params;
90
ir_is_fastcall(const ir_ctx * ctx,const ir_insn * insn)91 bool ir_is_fastcall(const ir_ctx *ctx, const ir_insn *insn)
92 {
93 if (sizeof(void*) == 4) {
94 if (IR_IS_CONST_REF(insn->op2)) {
95 const ir_insn *func = &ctx->ir_base[insn->op2];
96
97 if (func->op == IR_FUNC || func->op == IR_FUNC_ADDR) {
98 if (func->proto) {
99 const ir_proto_t *proto = (const ir_proto_t *)ir_get_str(ctx, func->proto);
100
101 return (proto->flags & IR_FASTCALL_FUNC) != 0;
102 }
103 }
104 } else if (ctx->ir_base[insn->op2].op == IR_PROTO) {
105 const ir_proto_t *proto = (const ir_proto_t *)ir_get_str(ctx, ctx->ir_base[insn->op2].op2);
106
107 return (proto->flags & IR_FASTCALL_FUNC) != 0;
108 }
109 return 0;
110 }
111 return 0;
112 }
113 #else
ir_is_fastcall(const ir_ctx * ctx,const ir_insn * insn)114 bool ir_is_fastcall(const ir_ctx *ctx, const ir_insn *insn)
115 {
116 return 0;
117 }
118 #endif
119
ir_is_vararg(const ir_ctx * ctx,ir_insn * insn)120 bool ir_is_vararg(const ir_ctx *ctx, ir_insn *insn)
121 {
122 const ir_proto_t *proto = ir_call_proto(ctx, insn);
123
124 if (proto) {
125 return (proto->flags & IR_VARARG_FUNC) != 0;
126 }
127 return 0;
128 }
129
ir_rule(const ir_ctx * ctx,ir_ref ref)130 IR_ALWAYS_INLINE uint32_t ir_rule(const ir_ctx *ctx, ir_ref ref)
131 {
132 IR_ASSERT(!IR_IS_CONST_REF(ref));
133 return ctx->rules[ref];
134 }
135
ir_in_same_block(ir_ctx * ctx,ir_ref ref)136 IR_ALWAYS_INLINE bool ir_in_same_block(ir_ctx *ctx, ir_ref ref)
137 {
138 return ref > ctx->bb_start;
139 }
140
141
ir_get_param_reg(const ir_ctx * ctx,ir_ref ref)142 static ir_reg ir_get_param_reg(const ir_ctx *ctx, ir_ref ref)
143 {
144 ir_use_list *use_list = &ctx->use_lists[1];
145 int i;
146 ir_ref use, *p;
147 ir_insn *insn;
148 int int_param = 0;
149 int fp_param = 0;
150 int int_reg_params_count = IR_REG_INT_ARGS;
151 int fp_reg_params_count = IR_REG_FP_ARGS;
152 const int8_t *int_reg_params = _ir_int_reg_params;
153 const int8_t *fp_reg_params = _ir_fp_reg_params;
154
155 #ifdef IR_HAVE_FASTCALL
156 if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC)) {
157 int_reg_params_count = IR_REG_INT_FCARGS;
158 fp_reg_params_count = IR_REG_FP_FCARGS;
159 int_reg_params = _ir_int_fc_reg_params;
160 fp_reg_params = _ir_fp_fc_reg_params;
161 }
162 #endif
163
164 for (i = 0, p = &ctx->use_edges[use_list->refs]; i < use_list->count; i++, p++) {
165 use = *p;
166 insn = &ctx->ir_base[use];
167 if (insn->op == IR_PARAM) {
168 if (IR_IS_TYPE_INT(insn->type)) {
169 if (use == ref) {
170 if (int_param < int_reg_params_count) {
171 return int_reg_params[int_param];
172 } else {
173 return IR_REG_NONE;
174 }
175 }
176 int_param++;
177 #ifdef _WIN64
178 /* WIN64 calling convention use common couter for int and fp registers */
179 fp_param++;
180 #endif
181 } else {
182 IR_ASSERT(IR_IS_TYPE_FP(insn->type));
183 if (use == ref) {
184 if (fp_param < fp_reg_params_count) {
185 return fp_reg_params[fp_param];
186 } else {
187 return IR_REG_NONE;
188 }
189 }
190 fp_param++;
191 #ifdef _WIN64
192 /* WIN64 calling convention use common couter for int and fp registers */
193 int_param++;
194 #endif
195 }
196 }
197 }
198 return IR_REG_NONE;
199 }
200
ir_get_args_regs(const ir_ctx * ctx,const ir_insn * insn,int8_t * regs)201 static int ir_get_args_regs(const ir_ctx *ctx, const ir_insn *insn, int8_t *regs)
202 {
203 int j, n;
204 ir_type type;
205 int int_param = 0;
206 int fp_param = 0;
207 int count = 0;
208 int int_reg_params_count = IR_REG_INT_ARGS;
209 int fp_reg_params_count = IR_REG_FP_ARGS;
210 const int8_t *int_reg_params = _ir_int_reg_params;
211 const int8_t *fp_reg_params = _ir_fp_reg_params;
212
213 #ifdef IR_HAVE_FASTCALL
214 if (sizeof(void*) == 4 && ir_is_fastcall(ctx, insn)) {
215 int_reg_params_count = IR_REG_INT_FCARGS;
216 fp_reg_params_count = IR_REG_FP_FCARGS;
217 int_reg_params = _ir_int_fc_reg_params;
218 fp_reg_params = _ir_fp_fc_reg_params;
219 }
220 #endif
221
222 n = insn->inputs_count;
223 n = IR_MIN(n, IR_MAX_REG_ARGS + 2);
224 for (j = 3; j <= n; j++) {
225 type = ctx->ir_base[ir_insn_op(insn, j)].type;
226 if (IR_IS_TYPE_INT(type)) {
227 if (int_param < int_reg_params_count) {
228 regs[j] = int_reg_params[int_param];
229 count = j + 1;
230 } else {
231 regs[j] = IR_REG_NONE;
232 }
233 int_param++;
234 #ifdef _WIN64
235 /* WIN64 calling convention use common couter for int and fp registers */
236 fp_param++;
237 #endif
238 } else {
239 IR_ASSERT(IR_IS_TYPE_FP(type));
240 if (fp_param < fp_reg_params_count) {
241 regs[j] = fp_reg_params[fp_param];
242 count = j + 1;
243 } else {
244 regs[j] = IR_REG_NONE;
245 }
246 fp_param++;
247 #ifdef _WIN64
248 /* WIN64 calling convention use common couter for int and fp registers */
249 int_param++;
250 #endif
251 }
252 }
253 return count;
254 }
255
ir_is_same_mem_var(const ir_ctx * ctx,ir_ref r1,int32_t offset)256 static bool ir_is_same_mem_var(const ir_ctx *ctx, ir_ref r1, int32_t offset)
257 {
258 ir_live_interval *ival1;
259 int32_t o1;
260
261 if (IR_IS_CONST_REF(r1)) {
262 return 0;
263 }
264
265 IR_ASSERT(ctx->vregs[r1]);
266 ival1 = ctx->live_intervals[ctx->vregs[r1]];
267 IR_ASSERT(ival1);
268 o1 = ival1->stack_spill_pos;
269 IR_ASSERT(o1 != -1);
270 return o1 == offset;
271 }
272
ir_resolve_sym_name(const char * name)273 void *ir_resolve_sym_name(const char *name)
274 {
275 void *handle = NULL;
276 void *addr;
277
278 #ifndef _WIN32
279 # ifdef RTLD_DEFAULT
280 handle = RTLD_DEFAULT;
281 # endif
282 addr = dlsym(handle, name);
283 #else
284 HMODULE mods[256];
285 DWORD cbNeeded;
286 uint32_t i = 0;
287
288 addr = NULL;
289
290 EnumProcessModules(GetCurrentProcess(), mods, sizeof(mods), &cbNeeded);
291
292 while(i < (cbNeeded / sizeof(HMODULE))) {
293 addr = GetProcAddress(mods[i], name);
294 if (addr) {
295 return addr;
296 }
297 i++;
298 }
299 #endif
300 return addr;
301 }
302
303 #ifdef IR_SNAPSHOT_HANDLER_DCL
304 IR_SNAPSHOT_HANDLER_DCL();
305 #endif
306
307 #if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
ir_sym_addr(ir_ctx * ctx,const ir_insn * addr_insn)308 static void* ir_sym_addr(ir_ctx *ctx, const ir_insn *addr_insn)
309 {
310 const char *name = ir_get_str(ctx, addr_insn->val.name);
311 void *addr = (ctx->loader && ctx->loader->resolve_sym_name) ?
312 ctx->loader->resolve_sym_name(ctx->loader, name, 0) :
313 ir_resolve_sym_name(name);
314
315 return addr;
316 }
317 #endif
318
ir_sym_val(ir_ctx * ctx,const ir_insn * addr_insn)319 static void* ir_sym_val(ir_ctx *ctx, const ir_insn *addr_insn)
320 {
321 const char *name = ir_get_str(ctx, addr_insn->val.name);
322 void *addr = (ctx->loader && ctx->loader->resolve_sym_name) ?
323 ctx->loader->resolve_sym_name(ctx->loader, name, addr_insn->op == IR_FUNC) :
324 ir_resolve_sym_name(name);
325
326 IR_ASSERT(addr);
327 return addr;
328 }
329
ir_call_addr(ir_ctx * ctx,ir_insn * insn,ir_insn * addr_insn)330 static void *ir_call_addr(ir_ctx *ctx, ir_insn *insn, ir_insn *addr_insn)
331 {
332 void *addr;
333
334 IR_ASSERT(addr_insn->type == IR_ADDR);
335 if (addr_insn->op == IR_FUNC) {
336 addr = ir_sym_val(ctx, addr_insn);
337 } else {
338 IR_ASSERT(addr_insn->op == IR_ADDR || addr_insn->op == IR_FUNC_ADDR);
339 addr = (void*)addr_insn->val.addr;
340 }
341 return addr;
342 }
343
ir_jmp_addr(ir_ctx * ctx,ir_insn * insn,ir_insn * addr_insn)344 static void *ir_jmp_addr(ir_ctx *ctx, ir_insn *insn, ir_insn *addr_insn)
345 {
346 void *addr = ir_call_addr(ctx, insn, addr_insn);
347
348 #ifdef IR_SNAPSHOT_HANDLER
349 if (ctx->ir_base[insn->op1].op == IR_SNAPSHOT) {
350 addr = IR_SNAPSHOT_HANDLER(ctx, insn->op1, &ctx->ir_base[insn->op1], addr);
351 }
352 #endif
353 return addr;
354 }
355
ir_get_fused_reg(ir_ctx * ctx,ir_ref root,ir_ref ref_and_op)356 static int8_t ir_get_fused_reg(ir_ctx *ctx, ir_ref root, ir_ref ref_and_op)
357 {
358 if (ctx->fused_regs) {
359 char key[10];
360 ir_ref val;
361
362 memcpy(key, &root, sizeof(ir_ref));
363 memcpy(key + 4, &ref_and_op, sizeof(ir_ref));
364
365 val = ir_strtab_find(ctx->fused_regs, key, 8);
366 if (val) {
367 return val;
368 }
369 }
370 return ((int8_t*)ctx->regs)[ref_and_op];
371 }
372
373 #if defined(__GNUC__)
374 # pragma GCC diagnostic push
375 # pragma GCC diagnostic ignored "-Warray-bounds"
376 # pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
377 #endif
378
379 #if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
380 # include "dynasm/dasm_proto.h"
381 # include "dynasm/dasm_x86.h"
382 #elif defined(IR_TARGET_AARCH64)
383 # include "dynasm/dasm_proto.h"
384 static int ir_add_veneer(dasm_State *Dst, void *buffer, uint32_t ins, int *b, uint32_t *cp, ptrdiff_t offset);
385 # define DASM_ADD_VENEER ir_add_veneer
386 # include "dynasm/dasm_arm64.h"
387 #else
388 # error "Unknown IR target"
389 #endif
390
391 #if defined(__GNUC__)
392 # pragma GCC diagnostic pop
393 #endif
394
395 /* Forward Declarations */
396 static void ir_emit_osr_entry_loads(ir_ctx *ctx, int b, ir_block *bb);
397 static int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count, ir_reg tmp_reg, ir_reg tmp_fp_reg);
398 static void ir_emit_dessa_moves(ir_ctx *ctx, int b, ir_block *bb);
399
400 typedef struct _ir_common_backend_data {
401 ir_reg_alloc_data ra_data;
402 uint32_t dessa_from_block;
403 dasm_State *dasm_state;
404 ir_bitset emit_constants;
405 } ir_common_backend_data;
406
ir_const_label(ir_ctx * ctx,ir_ref ref)407 static int ir_const_label(ir_ctx *ctx, ir_ref ref)
408 {
409 ir_common_backend_data *data = ctx->data;
410 int label = ctx->cfg_blocks_count - ref;
411
412 IR_ASSERT(IR_IS_CONST_REF(ref));
413 ir_bitset_incl(data->emit_constants, -ref);
414 return label;
415 }
416
417 #if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
418 # include "ir_emit_x86.h"
419 #elif defined(IR_TARGET_AARCH64)
420 # include "ir_emit_aarch64.h"
421 #else
422 # error "Unknown IR target"
423 #endif
424
ir_emit_osr_entry_loads(ir_ctx * ctx,int b,ir_block * bb)425 static IR_NEVER_INLINE void ir_emit_osr_entry_loads(ir_ctx *ctx, int b, ir_block *bb)
426 {
427 ir_list *list = (ir_list*)ctx->osr_entry_loads;
428 int pos = 0, count, i;
429 ir_ref ref;
430
431 IR_ASSERT(ctx->binding);
432 IR_ASSERT(list);
433 while (1) {
434 i = ir_list_at(list, pos);
435 if (b == i) {
436 break;
437 }
438 IR_ASSERT(i != 0); /* end marker */
439 pos++;
440 count = ir_list_at(list, pos);
441 pos += count + 1;
442 }
443 pos++;
444 count = ir_list_at(list, pos);
445 pos++;
446
447 for (i = 0; i < count; i++, pos++) {
448 ref = ir_list_at(list, pos);
449 IR_ASSERT(ref >= 0 && ctx->vregs[ref] && ctx->live_intervals[ctx->vregs[ref]]);
450 if (!(ctx->live_intervals[ctx->vregs[ref]]->flags & IR_LIVE_INTERVAL_SPILLED)) {
451 /* not spilled */
452 ir_reg reg = ctx->live_intervals[ctx->vregs[ref]]->reg;
453 ir_type type = ctx->ir_base[ref].type;
454 int32_t offset = -ir_binding_find(ctx, ref);
455
456 IR_ASSERT(offset > 0);
457 ir_emit_load_mem(ctx, type, reg, IR_MEM_BO(ctx->spill_base, offset));
458 } else {
459 IR_ASSERT(ctx->live_intervals[ctx->vregs[ref]]->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL);
460 }
461 }
462 }
463
464 /*
465 * Parallel copy sequentialization algorithm
466 *
467 * The implementation is based on algorithm 1 desriebed in
468 * "Revisiting Out-of-SSA Translation for Correctness, Code Quality and Efficiency",
469 * Benoit Boissinot, Alain Darte, Fabrice Rastello, Benoit Dupont de Dinechin, Christophe Guillon.
470 * 2009 International Symposium on Code Generation and Optimization, Seattle, WA, USA, 2009,
471 * pp. 114-125, doi: 10.1109/CGO.2009.19.
472 */
ir_parallel_copy(ir_ctx * ctx,ir_copy * copies,int count,ir_reg tmp_reg,ir_reg tmp_fp_reg)473 static int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count, ir_reg tmp_reg, ir_reg tmp_fp_reg)
474 {
475 int i;
476 int8_t *pred, *loc, *types;
477 ir_reg to, from;
478 ir_type type;
479 ir_regset todo, ready, srcs;
480
481 if (count == 1) {
482 to = copies[0].to;
483 from = copies[0].from;
484 IR_ASSERT(from != to);
485 type = copies[0].type;
486 if (IR_IS_TYPE_INT(type)) {
487 ir_emit_mov(ctx, type, to, from);
488 } else {
489 ir_emit_fp_mov(ctx, type, to, from);
490 }
491 return 1;
492 }
493
494 loc = alloca(IR_REG_NUM * 3 * sizeof(int8_t));
495 pred = loc + IR_REG_NUM;
496 types = pred + IR_REG_NUM;
497 todo = IR_REGSET_EMPTY;
498 srcs = IR_REGSET_EMPTY;
499
500 for (i = 0; i < count; i++) {
501 from = copies[i].from;
502 to = copies[i].to;
503 IR_ASSERT(from != to);
504 IR_REGSET_INCL(srcs, from);
505 loc[from] = from;
506 pred[to] = from;
507 types[from] = copies[i].type;
508 IR_ASSERT(!IR_REGSET_IN(todo, to));
509 IR_REGSET_INCL(todo, to);
510 }
511
512 ready = IR_REGSET_DIFFERENCE(todo, srcs);
513
514 if (ready == todo) {
515 for (i = 0; i < count; i++) {
516 from = copies[i].from;
517 to = copies[i].to;
518 IR_ASSERT(from != to);
519 type = copies[i].type;
520 if (IR_IS_TYPE_INT(type)) {
521 ir_emit_mov(ctx, type, to, from);
522 } else {
523 ir_emit_fp_mov(ctx, type, to, from);
524 }
525 }
526 return 1;
527 }
528
529 /* temporary registers can't be the same as some of the destinations */
530 IR_ASSERT(tmp_reg == IR_REG_NONE || !IR_REGSET_IN(todo, tmp_reg));
531 IR_ASSERT(tmp_fp_reg == IR_REG_NONE || !IR_REGSET_IN(todo, tmp_fp_reg));
532
533 /* first we resolve all "windmill blades" - trees (this doesn't requre temporary registers) */
534 while (ready != IR_REGSET_EMPTY) {
535 ir_reg r;
536
537 to = ir_regset_pop_first(&ready);
538 from = pred[to];
539 r = loc[from];
540 type = types[from];
541 if (IR_IS_TYPE_INT(type)) {
542 ir_emit_mov_ext(ctx, type, to, r);
543 } else {
544 ir_emit_fp_mov(ctx, type, to, r);
545 }
546 IR_REGSET_EXCL(todo, to);
547 loc[from] = to;
548 if (from == r && IR_REGSET_IN(todo, from)) {
549 IR_REGSET_INCL(ready, from);
550 }
551 }
552 if (todo == IR_REGSET_EMPTY) {
553 return 1;
554 }
555
556 /* at this point the sources that are the same as temoraries are already moved */
557 IR_ASSERT(tmp_reg == IR_REG_NONE || !IR_REGSET_IN(srcs, tmp_reg) || pred[loc[tmp_reg]] == tmp_reg);
558 IR_ASSERT(tmp_fp_reg == IR_REG_NONE || !IR_REGSET_IN(srcs, tmp_fp_reg) || pred[loc[tmp_fp_reg]] == tmp_fp_reg);
559
560 /* now we resolve all "windmill axles" - cycles (this reuires temporary registers) */
561 while (todo != IR_REGSET_EMPTY) {
562 to = ir_regset_pop_first(&todo);
563 from = pred[to];
564 IR_ASSERT(to != loc[from]);
565 type = types[from];
566 if (IR_IS_TYPE_INT(type)) {
567 #ifdef IR_HAVE_SWAP_INT
568 if (pred[from] == to) {
569 ir_emit_swap(ctx, type, to, from);
570 IR_REGSET_EXCL(todo, from);
571 loc[to] = from;
572 loc[from] = to;
573 continue;
574 }
575 #endif
576 IR_ASSERT(tmp_reg != IR_REG_NONE);
577 IR_ASSERT(tmp_reg >= IR_REG_GP_FIRST && tmp_reg <= IR_REG_GP_LAST);
578 ir_emit_mov(ctx, type, tmp_reg, to);
579 loc[to] = tmp_reg;
580 } else {
581 #ifdef IR_HAVE_SWAP_FP
582 if (pred[from] == to) {
583 ir_emit_swap_fp(ctx, type, to, from);
584 IR_REGSET_EXCL(todo, from);
585 loc[to] = from;
586 loc[from] = to;
587 continue;
588 }
589 #endif
590 IR_ASSERT(tmp_fp_reg != IR_REG_NONE);
591 IR_ASSERT(tmp_fp_reg >= IR_REG_FP_FIRST && tmp_fp_reg <= IR_REG_FP_LAST);
592 ir_emit_fp_mov(ctx, type, tmp_fp_reg, to);
593 loc[to] = tmp_fp_reg;
594 }
595 while (1) {
596 ir_reg r;
597
598 from = pred[to];
599 r = loc[from];
600 type = types[from];
601 if (IR_IS_TYPE_INT(type)) {
602 ir_emit_mov_ext(ctx, type, to, r);
603 } else {
604 ir_emit_fp_mov(ctx, type, to, r);
605 }
606 IR_REGSET_EXCL(todo, to);
607 loc[from] = to;
608 if (from == r && IR_REGSET_IN(todo, from)) {
609 to = from;
610 } else {
611 break;
612 }
613 }
614 }
615
616 return 1;
617 }
618
ir_emit_dessa_move(ir_ctx * ctx,ir_type type,ir_ref to,ir_ref from,ir_reg tmp_reg,ir_reg tmp_fp_reg)619 static void ir_emit_dessa_move(ir_ctx *ctx, ir_type type, ir_ref to, ir_ref from, ir_reg tmp_reg, ir_reg tmp_fp_reg)
620 {
621 ir_mem mem_from, mem_to;
622
623 IR_ASSERT(from != to);
624 if (to < IR_REG_NUM) {
625 if (IR_IS_CONST_REF(from)) {
626 if (-from < ctx->consts_count) {
627 /* constant reference */
628 ir_emit_load(ctx, type, to, from);
629 } else {
630 /* local variable address */
631 ir_load_local_addr(ctx, to, -from - ctx->consts_count);
632 }
633 } else if (from < IR_REG_NUM) {
634 if (IR_IS_TYPE_INT(type)) {
635 ir_emit_mov(ctx, type, to, from);
636 } else {
637 ir_emit_fp_mov(ctx, type, to, from);
638 }
639 } else {
640 mem_from = ir_vreg_spill_slot(ctx, from - IR_REG_NUM);
641 ir_emit_load_mem(ctx, type, to, mem_from);
642 }
643 } else {
644 mem_to = ir_vreg_spill_slot(ctx, to - IR_REG_NUM);
645 if (IR_IS_CONST_REF(from)) {
646 if (-from < ctx->consts_count) {
647 /* constant reference */
648 #if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
649 if (IR_IS_TYPE_INT(type)
650 && !IR_IS_SYM_CONST(ctx->ir_base[from].op)
651 && (ir_type_size[type] != 8 || IR_IS_SIGNED_32BIT(ctx->ir_base[from].val.i64))) {
652 ir_emit_store_mem_imm(ctx, type, mem_to, ctx->ir_base[from].val.i32);
653 return;
654 }
655 #endif
656 ir_reg tmp = IR_IS_TYPE_INT(type) ? tmp_reg : tmp_fp_reg;
657 IR_ASSERT(tmp != IR_REG_NONE);
658 ir_emit_load(ctx, type, tmp, from);
659 ir_emit_store_mem(ctx, type, mem_to, tmp);
660 } else {
661 /* local variable address */
662 IR_ASSERT(IR_IS_TYPE_INT(type));
663 IR_ASSERT(tmp_reg != IR_REG_NONE);
664 ir_load_local_addr(ctx, tmp_reg, -from - ctx->consts_count);
665 ir_emit_store_mem(ctx, type, mem_to, tmp_reg);
666 }
667 } else if (from < IR_REG_NUM) {
668 ir_emit_store_mem(ctx, type, mem_to, from);
669 } else {
670 mem_from = ir_vreg_spill_slot(ctx, from - IR_REG_NUM);
671 IR_ASSERT(IR_MEM_VAL(mem_to) != IR_MEM_VAL(mem_from));
672 ir_reg tmp = IR_IS_TYPE_INT(type) ? tmp_reg : tmp_fp_reg;
673 IR_ASSERT(tmp != IR_REG_NONE);
674 ir_emit_load_mem(ctx, type, tmp, mem_from);
675 ir_emit_store_mem(ctx, type, mem_to, tmp);
676 }
677 }
678 }
679
ir_dessa_resolve_cycle(ir_ctx * ctx,int32_t * pred,int32_t * loc,int8_t * types,ir_bitset todo,int32_t to,ir_reg tmp_reg,ir_reg tmp_fp_reg)680 IR_ALWAYS_INLINE void ir_dessa_resolve_cycle(ir_ctx *ctx, int32_t *pred, int32_t *loc, int8_t *types, ir_bitset todo, int32_t to, ir_reg tmp_reg, ir_reg tmp_fp_reg)
681 {
682 ir_ref from;
683 ir_mem tmp_spill_slot;
684 ir_type type;
685
686 IR_MEM_VAL(tmp_spill_slot) = 0;
687 IR_ASSERT(!IR_IS_CONST_REF(to));
688 from = pred[to];
689 type = types[from];
690 IR_ASSERT(!IR_IS_CONST_REF(from));
691 IR_ASSERT(from != to);
692 IR_ASSERT(loc[from] == from);
693
694 if (IR_IS_TYPE_INT(type)) {
695 #ifdef IR_HAVE_SWAP_INT
696 if (pred[from] == to && to < IR_REG_NUM && from < IR_REG_NUM) {
697 /* a simple cycle from 2 elements */
698 if (ir_type_size[types[to]] > ir_type_size[type]) {
699 type = types[to];
700 }
701 ir_emit_swap(ctx, type, to, from);
702 ir_bitset_excl(todo, from);
703 ir_bitset_excl(todo, to);
704 loc[to] = from;
705 loc[from] = to;
706 return;
707 }
708 #endif
709 IR_ASSERT(tmp_reg != IR_REG_NONE);
710 IR_ASSERT(tmp_reg >= IR_REG_GP_FIRST && tmp_reg <= IR_REG_GP_LAST);
711 loc[to] = tmp_reg;
712 if (to < IR_REG_NUM) {
713 ir_emit_mov(ctx, type, tmp_reg, to);
714 } else {
715 ir_emit_load_mem_int(ctx, type, tmp_reg, ir_vreg_spill_slot(ctx, to - IR_REG_NUM));
716 }
717 } else {
718 #ifdef IR_HAVE_SWAP_FP
719 if (pred[from] == to && to < IR_REG_NUM && from < IR_REG_NUM && types[to] == type) {
720 /* a simple cycle from 2 elements */
721 ir_emit_swap_fp(ctx, type, to, from);
722 IR_REGSET_EXCL(todo, from);
723 IR_REGSET_EXCL(todo, to);
724 loc[to] = from;
725 loc[from] = to;
726 return;
727 }
728 #endif
729 IR_ASSERT(tmp_fp_reg != IR_REG_NONE);
730 IR_ASSERT(tmp_fp_reg >= IR_REG_FP_FIRST && tmp_fp_reg <= IR_REG_FP_LAST);
731 loc[to] = tmp_fp_reg;
732 if (to < IR_REG_NUM) {
733 ir_emit_fp_mov(ctx, type, tmp_fp_reg, to);
734 } else {
735 ir_emit_load_mem_fp(ctx, type, tmp_fp_reg, ir_vreg_spill_slot(ctx, to - IR_REG_NUM));
736 }
737 }
738
739 while (1) {
740 int32_t r;
741
742 from = pred[to];
743 r = loc[from];
744 type = types[to];
745
746 if (from == r && ir_bitset_in(todo, from)) {
747 /* Memory to memory move inside an isolated or "blocked" cycle requres an additional temporary register */
748 if (to >= IR_REG_NUM && r >= IR_REG_NUM) {
749 ir_reg tmp = IR_IS_TYPE_INT(type) ? tmp_reg : tmp_fp_reg;
750
751 if (!IR_MEM_VAL(tmp_spill_slot)) {
752 /* Free a register, saving it in a temporary spill slot */
753 tmp_spill_slot = IR_MEM_BO(IR_REG_STACK_POINTER, -16);
754 ir_emit_store_mem(ctx, type, tmp_spill_slot, tmp);
755 }
756 ir_emit_dessa_move(ctx, type, to, r, tmp_reg, tmp_fp_reg);
757 } else {
758 ir_emit_dessa_move(ctx, type, to, r, IR_REG_NONE, IR_REG_NONE);
759 }
760 ir_bitset_excl(todo, to);
761 loc[from] = to;
762 to = from;
763 } else {
764 break;
765 }
766 }
767
768 type = types[to];
769 if (IR_MEM_VAL(tmp_spill_slot)) {
770 ir_emit_load_mem(ctx, type, IR_IS_TYPE_INT(type) ? tmp_reg : tmp_fp_reg, tmp_spill_slot);
771 }
772 ir_emit_dessa_move(ctx, type, to, loc[from], IR_REG_NONE, IR_REG_NONE);
773 ir_bitset_excl(todo, to);
774 loc[from] = to;
775 }
776
ir_dessa_parallel_copy(ir_ctx * ctx,ir_dessa_copy * copies,int count,ir_reg tmp_reg,ir_reg tmp_fp_reg)777 static int ir_dessa_parallel_copy(ir_ctx *ctx, ir_dessa_copy *copies, int count, ir_reg tmp_reg, ir_reg tmp_fp_reg)
778 {
779 int i;
780 int32_t *pred, *loc, to, from;
781 int8_t *types;
782 ir_type type;
783 uint32_t len;
784 ir_bitset todo, ready, srcs, visited;
785
786 if (count == 1) {
787 to = copies[0].to;
788 from = copies[0].from;
789 IR_ASSERT(from != to);
790 type = copies[0].type;
791 ir_emit_dessa_move(ctx, type, to, from, tmp_reg, tmp_fp_reg);
792 return 1;
793 }
794
795 len = IR_REG_NUM + ctx->vregs_count + 1;
796 todo = ir_bitset_malloc(len);
797 srcs = ir_bitset_malloc(len);
798 loc = ir_mem_malloc(len * 2 * sizeof(int32_t) + len * sizeof(int8_t));
799 pred = loc + len;
800 types = (int8_t*)(pred + len);
801
802 for (i = 0; i < count; i++) {
803 from = copies[i].from;
804 to = copies[i].to;
805 IR_ASSERT(from != to);
806 if (!IR_IS_CONST_REF(from)) {
807 ir_bitset_incl(srcs, from);
808 loc[from] = from;
809 }
810 pred[to] = from;
811 types[to] = copies[i].type;
812 IR_ASSERT(!ir_bitset_in(todo, to));
813 ir_bitset_incl(todo, to);
814 }
815
816 /* temporary registers can't be the same as some of the sources */
817 IR_ASSERT(tmp_reg == IR_REG_NONE || !ir_bitset_in(srcs, tmp_reg));
818 IR_ASSERT(tmp_fp_reg == IR_REG_NONE || !ir_bitset_in(srcs, tmp_fp_reg));
819
820 /* first we resolve all "windmill blades" - trees, that don't set temporary registers */
821 ready = ir_bitset_malloc(len);
822 ir_bitset_copy(ready, todo, ir_bitset_len(len));
823 ir_bitset_difference(ready, srcs, ir_bitset_len(len));
824 if (tmp_reg != IR_REG_NONE) {
825 ir_bitset_excl(ready, tmp_reg);
826 }
827 if (tmp_fp_reg != IR_REG_NONE) {
828 ir_bitset_excl(ready, tmp_fp_reg);
829 }
830 while ((to = ir_bitset_pop_first(ready, ir_bitset_len(len))) >= 0) {
831 ir_bitset_excl(todo, to);
832 type = types[to];
833 from = pred[to];
834 if (IR_IS_CONST_REF(from)) {
835 ir_emit_dessa_move(ctx, type, to, from, tmp_reg, tmp_fp_reg);
836 } else {
837 int32_t r = loc[from];
838 ir_emit_dessa_move(ctx, type, to, r, tmp_reg, tmp_fp_reg);
839 loc[from] = to;
840 if (from == r && ir_bitset_in(todo, from) && from != tmp_reg && from != tmp_fp_reg) {
841 ir_bitset_incl(ready, from);
842 }
843 }
844 }
845
846 /* then we resolve all "windmill axles" - cycles (this requres temporary registers) */
847 visited = ir_bitset_malloc(len);
848 ir_bitset_copy(ready, todo, ir_bitset_len(len));
849 ir_bitset_intersection(ready, srcs, ir_bitset_len(len));
850 while ((to = ir_bitset_first(ready, ir_bitset_len(len))) >= 0) {
851 ir_bitset_clear(visited, ir_bitset_len(len));
852 ir_bitset_incl(visited, to);
853 to = pred[to];
854 while (!IR_IS_CONST_REF(to) && ir_bitset_in(ready, to)) {
855 to = pred[to];
856 if (IR_IS_CONST_REF(to)) {
857 break;
858 } else if (ir_bitset_in(visited, to)) {
859 /* We found a cycle. Resolve it. */
860 ir_bitset_incl(visited, to);
861 ir_dessa_resolve_cycle(ctx, pred, loc, types, todo, to, tmp_reg, tmp_fp_reg);
862 break;
863 }
864 ir_bitset_incl(visited, to);
865 }
866 ir_bitset_difference(ready, visited, ir_bitset_len(len));
867 }
868
869 /* finally we resolve remaining "windmill blades" - trees that set temporary registers */
870 ir_bitset_copy(ready, todo, ir_bitset_len(len));
871 ir_bitset_difference(ready, srcs, ir_bitset_len(len));
872 while ((to = ir_bitset_pop_first(ready, ir_bitset_len(len))) >= 0) {
873 ir_bitset_excl(todo, to);
874 type = types[to];
875 from = pred[to];
876 if (IR_IS_CONST_REF(from)) {
877 ir_emit_dessa_move(ctx, type, to, from, tmp_reg, tmp_fp_reg);
878 } else {
879 int32_t r = loc[from];
880 ir_emit_dessa_move(ctx, type, to, r, tmp_reg, tmp_fp_reg);
881 loc[from] = to;
882 if (from == r && ir_bitset_in(todo, from)) {
883 ir_bitset_incl(ready, from);
884 }
885 }
886 }
887
888 IR_ASSERT(ir_bitset_empty(todo, ir_bitset_len(len)));
889
890 ir_mem_free(visited);
891 ir_mem_free(ready);
892 ir_mem_free(loc);
893 ir_mem_free(srcs);
894 ir_mem_free(todo);
895 return 1;
896 }
897
ir_emit_dessa_moves(ir_ctx * ctx,int b,ir_block * bb)898 static void ir_emit_dessa_moves(ir_ctx *ctx, int b, ir_block *bb)
899 {
900 uint32_t succ, k, n = 0;
901 ir_block *succ_bb;
902 ir_use_list *use_list;
903 ir_ref i, *p;
904 ir_dessa_copy *copies;
905 ir_reg tmp_reg = ctx->regs[bb->end][0];
906 ir_reg tmp_fp_reg = ctx->regs[bb->end][1];
907
908 IR_ASSERT(bb->successors_count == 1);
909 succ = ctx->cfg_edges[bb->successors];
910 succ_bb = &ctx->cfg_blocks[succ];
911 IR_ASSERT(succ_bb->predecessors_count > 1);
912 use_list = &ctx->use_lists[succ_bb->start];
913 k = ir_phi_input_number(ctx, succ_bb, b);
914
915 copies = alloca(use_list->count * sizeof(ir_dessa_copy));
916
917 for (i = 0, p = &ctx->use_edges[use_list->refs]; i < use_list->count; i++, p++) {
918 ir_ref ref = *p;
919 ir_insn *insn = &ctx->ir_base[ref];
920
921 if (insn->op == IR_PHI) {
922 ir_ref input = ir_insn_op(insn, k);
923 ir_reg src = ir_get_alocated_reg(ctx, ref, k);
924 ir_reg dst = ctx->regs[ref][0];
925 ir_ref from, to;
926
927 IR_ASSERT(dst == IR_REG_NONE || !IR_REG_SPILLED(dst));
928 if (IR_IS_CONST_REF(input)) {
929 from = input;
930 } else if (ir_rule(ctx, input) == IR_STATIC_ALLOCA) {
931 /* encode local variable address */
932 from = -(ctx->consts_count + input);
933 } else {
934 from = (src != IR_REG_NONE && !IR_REG_SPILLED(src)) ?
935 (ir_ref)src : (ir_ref)(IR_REG_NUM + ctx->vregs[input]);
936 }
937 to = (dst != IR_REG_NONE) ?
938 (ir_ref)dst : (ir_ref)(IR_REG_NUM + ctx->vregs[ref]);
939 if (to != from) {
940 if (to >= IR_REG_NUM
941 && from >= IR_REG_NUM
942 && IR_MEM_VAL(ir_vreg_spill_slot(ctx, from - IR_REG_NUM)) ==
943 IR_MEM_VAL(ir_vreg_spill_slot(ctx, to - IR_REG_NUM))) {
944 /* It's possible that different virtual registers share the same special spill slot */
945 // TODO: See ext/opcache/tests/jit/gh11917.phpt failure on Linux 32-bit
946 continue;
947 }
948 copies[n].type = insn->type;
949 copies[n].from = from;
950 copies[n].to = to;
951 n++;
952 }
953 }
954 }
955
956 if (n > 0) {
957 ir_dessa_parallel_copy(ctx, copies, n, tmp_reg, tmp_fp_reg);
958 }
959 }
960
ir_match(ir_ctx * ctx)961 int ir_match(ir_ctx *ctx)
962 {
963 uint32_t b;
964 ir_ref start, ref, *prev_ref;
965 ir_block *bb;
966 ir_insn *insn;
967 uint32_t entries_count = 0;
968
969 ctx->rules = ir_mem_calloc(ctx->insns_count, sizeof(uint32_t));
970
971 prev_ref = ctx->prev_ref;
972 if (!prev_ref) {
973 ir_build_prev_refs(ctx);
974 prev_ref = ctx->prev_ref;
975 }
976
977 if (ctx->entries_count) {
978 ctx->entries = ir_mem_malloc(ctx->entries_count * sizeof(ir_ref));
979 }
980
981 for (b = ctx->cfg_blocks_count, bb = ctx->cfg_blocks + b; b > 0; b--, bb--) {
982 IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
983 start = bb->start;
984 if (UNEXPECTED(bb->flags & IR_BB_ENTRY)) {
985 IR_ASSERT(entries_count < ctx->entries_count);
986 insn = &ctx->ir_base[start];
987 IR_ASSERT(insn->op == IR_ENTRY);
988 insn->op3 = entries_count;
989 ctx->entries[entries_count] = b;
990 entries_count++;
991 }
992 ctx->rules[start] = IR_SKIPPED | IR_NOP;
993 ref = bb->end;
994 if (bb->successors_count == 1) {
995 insn = &ctx->ir_base[ref];
996 if (insn->op == IR_END || insn->op == IR_LOOP_END) {
997 ctx->rules[ref] = insn->op;
998 ref = prev_ref[ref];
999 if (ref == start && ctx->cfg_edges[bb->successors] != b) {
1000 if (EXPECTED(!(bb->flags & IR_BB_ENTRY))) {
1001 bb->flags |= IR_BB_EMPTY;
1002 } else if (ctx->flags & IR_MERGE_EMPTY_ENTRIES) {
1003 bb->flags |= IR_BB_EMPTY;
1004 if (ctx->cfg_edges[bb->successors] == b + 1) {
1005 (bb + 1)->flags |= IR_BB_PREV_EMPTY_ENTRY;
1006 }
1007 }
1008 continue;
1009 }
1010 }
1011 }
1012
1013 ctx->bb_start = start; /* bb_start is used by matcher to avoid fusion of insns from different blocks */
1014
1015 while (ref != start) {
1016 uint32_t rule = ctx->rules[ref];
1017
1018 if (!rule) {
1019 ctx->rules[ref] = rule = ir_match_insn(ctx, ref);
1020 }
1021 ir_match_insn2(ctx, ref, rule);
1022 ref = prev_ref[ref];
1023 }
1024 }
1025
1026 if (ctx->entries_count) {
1027 ctx->entries_count = entries_count;
1028 if (!entries_count) {
1029 ir_mem_free(ctx->entries);
1030 ctx->entries = NULL;
1031 }
1032 }
1033
1034 return 1;
1035 }
1036
ir_get_spill_slot_offset(ir_ctx * ctx,ir_ref ref)1037 int32_t ir_get_spill_slot_offset(ir_ctx *ctx, ir_ref ref)
1038 {
1039 int32_t offset;
1040
1041 IR_ASSERT(ref >= 0 && ctx->vregs[ref] && ctx->live_intervals[ctx->vregs[ref]]);
1042 offset = ctx->live_intervals[ctx->vregs[ref]]->stack_spill_pos;
1043 IR_ASSERT(offset != -1);
1044 return IR_SPILL_POS_TO_OFFSET(offset);
1045 }
1046