1 /*
2 * IR - Lightweight JIT Compilation Framework
3 * (Native code generator based on DynAsm)
4 * Copyright (C) 2022 Zend by Perforce.
5 * Authors: Dmitry Stogov <dmitry@php.net>
6 */
7
8 #include "ir.h"
9
10 #if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
11 # include "ir_x86.h"
12 #elif defined(IR_TARGET_AARCH64)
13 # include "ir_aarch64.h"
14 #else
15 # error "Unknown IR target"
16 #endif
17
18 #include "ir_private.h"
19 #ifndef _WIN32
20 # include <dlfcn.h>
21 #else
22 # define WIN32_LEAN_AND_MEAN
23 # include <windows.h>
24 # include <psapi.h>
25 #endif
26
27 #if defined(__linux__) || defined(__sun)
28 # include <alloca.h>
29 #endif
30
31 #define DASM_M_GROW(ctx, t, p, sz, need) \
32 do { \
33 size_t _sz = (sz), _need = (need); \
34 if (_sz < _need) { \
35 if (_sz < 16) _sz = 16; \
36 while (_sz < _need) _sz += _sz; \
37 (p) = (t *)ir_mem_realloc((p), _sz); \
38 (sz) = _sz; \
39 } \
40 } while(0)
41
42 #define DASM_M_FREE(ctx, p, sz) ir_mem_free(p)
43
44 #if IR_DEBUG
45 # define DASM_CHECKS
46 #endif
47
48 typedef struct _ir_copy {
49 ir_type type;
50 ir_reg from;
51 ir_reg to;
52 } ir_copy;
53
54 typedef struct _ir_dessa_copy {
55 ir_type type;
56 int32_t from; /* negative - constant ref, [0..IR_REG_NUM) - CPU reg, [IR_REG_NUM...) - virtual reg */
57 int32_t to; /* [0..IR_REG_NUM) - CPU reg, [IR_REG_NUM...) - virtual reg */
58 } ir_dessa_copy;
59
60 #if IR_REG_INT_ARGS
61 static const int8_t _ir_int_reg_params[IR_REG_INT_ARGS];
62 #else
63 static const int8_t *_ir_int_reg_params;
64 #endif
65 #if IR_REG_FP_ARGS
66 static const int8_t _ir_fp_reg_params[IR_REG_FP_ARGS];
67 #else
68 static const int8_t *_ir_fp_reg_params;
69 #endif
70
ir_call_proto(const ir_ctx * ctx,ir_insn * insn)71 static const ir_proto_t *ir_call_proto(const ir_ctx *ctx, ir_insn *insn)
72 {
73 if (IR_IS_CONST_REF(insn->op2)) {
74 const ir_insn *func = &ctx->ir_base[insn->op2];
75
76 if (func->op == IR_FUNC || func->op == IR_FUNC_ADDR) {
77 if (func->proto) {
78 return (const ir_proto_t *)ir_get_str(ctx, func->proto);
79 }
80 }
81 } else if (ctx->ir_base[insn->op2].op == IR_PROTO) {
82 return (const ir_proto_t *)ir_get_str(ctx, ctx->ir_base[insn->op2].op2);
83 }
84 return NULL;
85 }
86
87 #ifdef IR_HAVE_FASTCALL
88 static const int8_t _ir_int_fc_reg_params[IR_REG_INT_FCARGS];
89 static const int8_t *_ir_fp_fc_reg_params;
90
ir_is_fastcall(const ir_ctx * ctx,const ir_insn * insn)91 bool ir_is_fastcall(const ir_ctx *ctx, const ir_insn *insn)
92 {
93 if (sizeof(void*) == 4) {
94 if (IR_IS_CONST_REF(insn->op2)) {
95 const ir_insn *func = &ctx->ir_base[insn->op2];
96
97 if (func->op == IR_FUNC || func->op == IR_FUNC_ADDR) {
98 if (func->proto) {
99 const ir_proto_t *proto = (const ir_proto_t *)ir_get_str(ctx, func->proto);
100
101 return (proto->flags & IR_FASTCALL_FUNC) != 0;
102 }
103 }
104 } else if (ctx->ir_base[insn->op2].op == IR_PROTO) {
105 const ir_proto_t *proto = (const ir_proto_t *)ir_get_str(ctx, ctx->ir_base[insn->op2].op2);
106
107 return (proto->flags & IR_FASTCALL_FUNC) != 0;
108 }
109 return 0;
110 }
111 return 0;
112 }
113 #else
ir_is_fastcall(const ir_ctx * ctx,const ir_insn * insn)114 bool ir_is_fastcall(const ir_ctx *ctx, const ir_insn *insn)
115 {
116 return 0;
117 }
118 #endif
119
ir_is_vararg(const ir_ctx * ctx,ir_insn * insn)120 bool ir_is_vararg(const ir_ctx *ctx, ir_insn *insn)
121 {
122 const ir_proto_t *proto = ir_call_proto(ctx, insn);
123
124 if (proto) {
125 return (proto->flags & IR_VARARG_FUNC) != 0;
126 }
127 return 0;
128 }
129
ir_rule(const ir_ctx * ctx,ir_ref ref)130 IR_ALWAYS_INLINE uint32_t ir_rule(const ir_ctx *ctx, ir_ref ref)
131 {
132 IR_ASSERT(!IR_IS_CONST_REF(ref));
133 return ctx->rules[ref];
134 }
135
ir_in_same_block(ir_ctx * ctx,ir_ref ref)136 IR_ALWAYS_INLINE bool ir_in_same_block(ir_ctx *ctx, ir_ref ref)
137 {
138 return ref > ctx->bb_start;
139 }
140
141
ir_get_param_reg(const ir_ctx * ctx,ir_ref ref)142 static ir_reg ir_get_param_reg(const ir_ctx *ctx, ir_ref ref)
143 {
144 ir_use_list *use_list = &ctx->use_lists[1];
145 int i;
146 ir_ref use, *p;
147 ir_insn *insn;
148 int int_param = 0;
149 int fp_param = 0;
150 int int_reg_params_count = IR_REG_INT_ARGS;
151 int fp_reg_params_count = IR_REG_FP_ARGS;
152 const int8_t *int_reg_params = _ir_int_reg_params;
153 const int8_t *fp_reg_params = _ir_fp_reg_params;
154
155 #ifdef IR_HAVE_FASTCALL
156 if (sizeof(void*) == 4 && (ctx->flags & IR_FASTCALL_FUNC)) {
157 int_reg_params_count = IR_REG_INT_FCARGS;
158 fp_reg_params_count = IR_REG_FP_FCARGS;
159 int_reg_params = _ir_int_fc_reg_params;
160 fp_reg_params = _ir_fp_fc_reg_params;
161 }
162 #endif
163
164 for (i = 0, p = &ctx->use_edges[use_list->refs]; i < use_list->count; i++, p++) {
165 use = *p;
166 insn = &ctx->ir_base[use];
167 if (insn->op == IR_PARAM) {
168 if (IR_IS_TYPE_INT(insn->type)) {
169 if (use == ref) {
170 if (int_param < int_reg_params_count) {
171 return int_reg_params[int_param];
172 } else {
173 return IR_REG_NONE;
174 }
175 }
176 int_param++;
177 #ifdef _WIN64
178 /* WIN64 calling convention use common couter for int and fp registers */
179 fp_param++;
180 #endif
181 } else {
182 IR_ASSERT(IR_IS_TYPE_FP(insn->type));
183 if (use == ref) {
184 if (fp_param < fp_reg_params_count) {
185 return fp_reg_params[fp_param];
186 } else {
187 return IR_REG_NONE;
188 }
189 }
190 fp_param++;
191 #ifdef _WIN64
192 /* WIN64 calling convention use common couter for int and fp registers */
193 int_param++;
194 #endif
195 }
196 }
197 }
198 return IR_REG_NONE;
199 }
200
ir_get_args_regs(const ir_ctx * ctx,const ir_insn * insn,int8_t * regs)201 static int ir_get_args_regs(const ir_ctx *ctx, const ir_insn *insn, int8_t *regs)
202 {
203 int j, n;
204 ir_type type;
205 int int_param = 0;
206 int fp_param = 0;
207 int count = 0;
208 int int_reg_params_count = IR_REG_INT_ARGS;
209 int fp_reg_params_count = IR_REG_FP_ARGS;
210 const int8_t *int_reg_params = _ir_int_reg_params;
211 const int8_t *fp_reg_params = _ir_fp_reg_params;
212
213 #ifdef IR_HAVE_FASTCALL
214 if (sizeof(void*) == 4 && ir_is_fastcall(ctx, insn)) {
215 int_reg_params_count = IR_REG_INT_FCARGS;
216 fp_reg_params_count = IR_REG_FP_FCARGS;
217 int_reg_params = _ir_int_fc_reg_params;
218 fp_reg_params = _ir_fp_fc_reg_params;
219 }
220 #endif
221
222 n = insn->inputs_count;
223 n = IR_MIN(n, IR_MAX_REG_ARGS + 2);
224 for (j = 3; j <= n; j++) {
225 type = ctx->ir_base[ir_insn_op(insn, j)].type;
226 if (IR_IS_TYPE_INT(type)) {
227 if (int_param < int_reg_params_count) {
228 regs[j] = int_reg_params[int_param];
229 count = j + 1;
230 } else {
231 regs[j] = IR_REG_NONE;
232 }
233 int_param++;
234 #ifdef _WIN64
235 /* WIN64 calling convention use common couter for int and fp registers */
236 fp_param++;
237 #endif
238 } else {
239 IR_ASSERT(IR_IS_TYPE_FP(type));
240 if (fp_param < fp_reg_params_count) {
241 regs[j] = fp_reg_params[fp_param];
242 count = j + 1;
243 } else {
244 regs[j] = IR_REG_NONE;
245 }
246 fp_param++;
247 #ifdef _WIN64
248 /* WIN64 calling convention use common couter for int and fp registers */
249 int_param++;
250 #endif
251 }
252 }
253 return count;
254 }
255
ir_is_same_mem_var(const ir_ctx * ctx,ir_ref r1,int32_t offset)256 static bool ir_is_same_mem_var(const ir_ctx *ctx, ir_ref r1, int32_t offset)
257 {
258 ir_live_interval *ival1;
259 int32_t o1;
260
261 if (IR_IS_CONST_REF(r1)) {
262 return 0;
263 }
264
265 IR_ASSERT(ctx->vregs[r1]);
266 ival1 = ctx->live_intervals[ctx->vregs[r1]];
267 IR_ASSERT(ival1);
268 o1 = ival1->stack_spill_pos;
269 IR_ASSERT(o1 != -1);
270 return o1 == offset;
271 }
272
ir_resolve_sym_name(const char * name)273 void *ir_resolve_sym_name(const char *name)
274 {
275 void *handle = NULL;
276 void *addr;
277
278 #ifndef _WIN32
279 # ifdef RTLD_DEFAULT
280 handle = RTLD_DEFAULT;
281 # endif
282 addr = dlsym(handle, name);
283 #else
284 HMODULE mods[256];
285 DWORD cbNeeded;
286 uint32_t i = 0;
287
288 addr = NULL;
289
290 EnumProcessModules(GetCurrentProcess(), mods, sizeof(mods), &cbNeeded);
291
292 while(i < (cbNeeded / sizeof(HMODULE))) {
293 addr = GetProcAddress(mods[i], name);
294 if (addr) {
295 return addr;
296 }
297 i++;
298 }
299 #endif
300 return addr;
301 }
302
303 #ifdef IR_SNAPSHOT_HANDLER_DCL
304 IR_SNAPSHOT_HANDLER_DCL();
305 #endif
306
307 #if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
ir_sym_addr(ir_ctx * ctx,const ir_insn * addr_insn)308 static void* ir_sym_addr(ir_ctx *ctx, const ir_insn *addr_insn)
309 {
310 const char *name = ir_get_str(ctx, addr_insn->val.name);
311 void *addr = (ctx->loader && ctx->loader->resolve_sym_name) ?
312 ctx->loader->resolve_sym_name(ctx->loader, name, 0) :
313 ir_resolve_sym_name(name);
314
315 return addr;
316 }
317 #endif
318
ir_sym_val(ir_ctx * ctx,const ir_insn * addr_insn)319 static void* ir_sym_val(ir_ctx *ctx, const ir_insn *addr_insn)
320 {
321 const char *name = ir_get_str(ctx, addr_insn->val.name);
322 void *addr = (ctx->loader && ctx->loader->resolve_sym_name) ?
323 ctx->loader->resolve_sym_name(ctx->loader, name, addr_insn->op == IR_FUNC) :
324 ir_resolve_sym_name(name);
325
326 IR_ASSERT(addr);
327 return addr;
328 }
329
ir_call_addr(ir_ctx * ctx,ir_insn * insn,ir_insn * addr_insn)330 static void *ir_call_addr(ir_ctx *ctx, ir_insn *insn, ir_insn *addr_insn)
331 {
332 void *addr;
333
334 IR_ASSERT(addr_insn->type == IR_ADDR);
335 if (addr_insn->op == IR_FUNC) {
336 addr = ir_sym_val(ctx, addr_insn);
337 } else {
338 IR_ASSERT(addr_insn->op == IR_ADDR || addr_insn->op == IR_FUNC_ADDR);
339 addr = (void*)addr_insn->val.addr;
340 }
341 return addr;
342 }
343
ir_jmp_addr(ir_ctx * ctx,ir_insn * insn,ir_insn * addr_insn)344 static void *ir_jmp_addr(ir_ctx *ctx, ir_insn *insn, ir_insn *addr_insn)
345 {
346 void *addr = ir_call_addr(ctx, insn, addr_insn);
347
348 #ifdef IR_SNAPSHOT_HANDLER
349 if (ctx->ir_base[insn->op1].op == IR_SNAPSHOT) {
350 addr = IR_SNAPSHOT_HANDLER(ctx, insn->op1, &ctx->ir_base[insn->op1], addr);
351 }
352 #endif
353 return addr;
354 }
355
ir_get_fused_reg(ir_ctx * ctx,ir_ref root,ir_ref ref_and_op)356 static int8_t ir_get_fused_reg(ir_ctx *ctx, ir_ref root, ir_ref ref_and_op)
357 {
358 if (ctx->fused_regs) {
359 char key[10];
360 ir_ref val;
361
362 memcpy(key, &root, sizeof(ir_ref));
363 memcpy(key + 4, &ref_and_op, sizeof(ir_ref));
364
365 val = ir_strtab_find(ctx->fused_regs, key, 8);
366 if (val) {
367 return val;
368 }
369 }
370 return ((int8_t*)ctx->regs)[ref_and_op];
371 }
372
373 #if defined(__GNUC__)
374 # pragma GCC diagnostic push
375 # pragma GCC diagnostic ignored "-Warray-bounds"
376 # pragma GCC diagnostic ignored "-Wimplicit-fallthrough"
377 #endif
378
379 #if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
380 # include "dynasm/dasm_proto.h"
381 # include "dynasm/dasm_x86.h"
382 #elif defined(IR_TARGET_AARCH64)
383 # include "dynasm/dasm_proto.h"
384 static int ir_add_veneer(dasm_State *Dst, void *buffer, uint32_t ins, int *b, uint32_t *cp, ptrdiff_t offset);
385 # define DASM_ADD_VENEER ir_add_veneer
386 # include "dynasm/dasm_arm64.h"
387 #else
388 # error "Unknown IR target"
389 #endif
390
391 #if defined(__GNUC__)
392 # pragma GCC diagnostic pop
393 #endif
394
395 /* Forward Declarations */
396 static void ir_emit_osr_entry_loads(ir_ctx *ctx, int b, ir_block *bb);
397 static int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count, ir_reg tmp_reg, ir_reg tmp_fp_reg);
398 static void ir_emit_dessa_moves(ir_ctx *ctx, int b, ir_block *bb);
399
400 typedef struct _ir_common_backend_data {
401 ir_reg_alloc_data ra_data;
402 uint32_t dessa_from_block;
403 dasm_State *dasm_state;
404 ir_bitset emit_constants;
405 } ir_common_backend_data;
406
ir_const_label(ir_ctx * ctx,ir_ref ref)407 static int ir_const_label(ir_ctx *ctx, ir_ref ref)
408 {
409 ir_common_backend_data *data = ctx->data;
410 int label = ctx->cfg_blocks_count - ref;
411
412 IR_ASSERT(IR_IS_CONST_REF(ref));
413 ir_bitset_incl(data->emit_constants, -ref);
414 return label;
415 }
416
417 #if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
418 # include "ir_emit_x86.h"
419 #elif defined(IR_TARGET_AARCH64)
420 # include "ir_emit_aarch64.h"
421 #else
422 # error "Unknown IR target"
423 #endif
424
ir_emit_osr_entry_loads(ir_ctx * ctx,int b,ir_block * bb)425 static IR_NEVER_INLINE void ir_emit_osr_entry_loads(ir_ctx *ctx, int b, ir_block *bb)
426 {
427 ir_list *list = (ir_list*)ctx->osr_entry_loads;
428 int pos = 0, count, i;
429 ir_ref ref;
430
431 IR_ASSERT(ctx->binding);
432 IR_ASSERT(list);
433 while (1) {
434 i = ir_list_at(list, pos);
435 if (b == i) {
436 break;
437 }
438 IR_ASSERT(i != 0); /* end marker */
439 pos++;
440 count = ir_list_at(list, pos);
441 pos += count + 1;
442 }
443 pos++;
444 count = ir_list_at(list, pos);
445 pos++;
446
447 for (i = 0; i < count; i++, pos++) {
448 ref = ir_list_at(list, pos);
449 IR_ASSERT(ref >= 0 && ctx->vregs[ref] && ctx->live_intervals[ctx->vregs[ref]]);
450 if (!(ctx->live_intervals[ctx->vregs[ref]]->flags & IR_LIVE_INTERVAL_SPILLED)) {
451 /* not spilled */
452 ir_reg reg = ctx->live_intervals[ctx->vregs[ref]]->reg;
453 ir_type type = ctx->ir_base[ref].type;
454 int32_t offset = -ir_binding_find(ctx, ref);
455
456 IR_ASSERT(offset > 0);
457 ir_emit_load_mem(ctx, type, reg, IR_MEM_BO(ctx->spill_base, offset));
458 } else {
459 IR_ASSERT(ctx->live_intervals[ctx->vregs[ref]]->flags & IR_LIVE_INTERVAL_SPILL_SPECIAL);
460 }
461 }
462 }
463
464 /*
465 * Parallel copy sequentialization algorithm
466 *
467 * The implementation is based on algorithm 1 desriebed in
468 * "Revisiting Out-of-SSA Translation for Correctness, Code Quality and Efficiency",
469 * Benoit Boissinot, Alain Darte, Fabrice Rastello, Benoit Dupont de Dinechin, Christophe Guillon.
470 * 2009 International Symposium on Code Generation and Optimization, Seattle, WA, USA, 2009,
471 * pp. 114-125, doi: 10.1109/CGO.2009.19.
472 */
ir_parallel_copy(ir_ctx * ctx,ir_copy * copies,int count,ir_reg tmp_reg,ir_reg tmp_fp_reg)473 static int ir_parallel_copy(ir_ctx *ctx, ir_copy *copies, int count, ir_reg tmp_reg, ir_reg tmp_fp_reg)
474 {
475 int i;
476 int8_t *pred, *loc, *types;
477 ir_reg to, from;
478 ir_type type;
479 ir_regset todo, ready, srcs;
480
481 if (count == 1) {
482 to = copies[0].to;
483 from = copies[0].from;
484 IR_ASSERT(from != to);
485 type = copies[0].type;
486 if (IR_IS_TYPE_INT(type)) {
487 ir_emit_mov(ctx, type, to, from);
488 } else {
489 ir_emit_fp_mov(ctx, type, to, from);
490 }
491 return 1;
492 }
493
494 loc = alloca(IR_REG_NUM * 3 * sizeof(int8_t));
495 pred = loc + IR_REG_NUM;
496 types = pred + IR_REG_NUM;
497 todo = IR_REGSET_EMPTY;
498 srcs = IR_REGSET_EMPTY;
499
500 for (i = 0; i < count; i++) {
501 from = copies[i].from;
502 to = copies[i].to;
503 IR_ASSERT(from != to);
504 IR_REGSET_INCL(srcs, from);
505 loc[from] = from;
506 pred[to] = from;
507 types[from] = copies[i].type;
508 IR_ASSERT(!IR_REGSET_IN(todo, to));
509 IR_REGSET_INCL(todo, to);
510 }
511
512 ready = IR_REGSET_DIFFERENCE(todo, srcs);
513
514 if (ready == todo) {
515 for (i = 0; i < count; i++) {
516 from = copies[i].from;
517 to = copies[i].to;
518 IR_ASSERT(from != to);
519 type = copies[i].type;
520 if (IR_IS_TYPE_INT(type)) {
521 ir_emit_mov(ctx, type, to, from);
522 } else {
523 ir_emit_fp_mov(ctx, type, to, from);
524 }
525 }
526 return 1;
527 }
528
529 /* temporary registers can't be the same as some of the destinations */
530 IR_ASSERT(tmp_reg == IR_REG_NONE || !IR_REGSET_IN(todo, tmp_reg));
531 IR_ASSERT(tmp_fp_reg == IR_REG_NONE || !IR_REGSET_IN(todo, tmp_fp_reg));
532
533 /* first we resolve all "windmill blades" - trees (this doesn't requre temporary registers) */
534 while (ready != IR_REGSET_EMPTY) {
535 ir_reg r;
536
537 to = ir_regset_pop_first(&ready);
538 from = pred[to];
539 r = loc[from];
540 type = types[from];
541 if (IR_IS_TYPE_INT(type)) {
542 ir_emit_mov_ext(ctx, type, to, r);
543 } else {
544 ir_emit_fp_mov(ctx, type, to, r);
545 }
546 IR_REGSET_EXCL(todo, to);
547 loc[from] = to;
548 if (from == r && IR_REGSET_IN(todo, from)) {
549 IR_REGSET_INCL(ready, from);
550 }
551 }
552 if (todo == IR_REGSET_EMPTY) {
553 return 1;
554 }
555
556 /* at this point the sources that are the same as temoraries are already moved */
557 IR_ASSERT(tmp_reg == IR_REG_NONE || !IR_REGSET_IN(srcs, tmp_reg) || pred[loc[tmp_reg]] == tmp_reg);
558 IR_ASSERT(tmp_fp_reg == IR_REG_NONE || !IR_REGSET_IN(srcs, tmp_fp_reg) || pred[loc[tmp_fp_reg]] == tmp_fp_reg);
559
560 /* now we resolve all "windmill axles" - cycles (this reuires temporary registers) */
561 while (todo != IR_REGSET_EMPTY) {
562 to = ir_regset_pop_first(&todo);
563 from = pred[to];
564 IR_ASSERT(to != loc[from]);
565 type = types[from];
566 if (IR_IS_TYPE_INT(type)) {
567 #ifdef IR_HAVE_SWAP_INT
568 if (pred[from] == to) {
569 ir_emit_swap(ctx, type, to, from);
570 IR_REGSET_EXCL(todo, from);
571 loc[to] = from;
572 loc[from] = to;
573 continue;
574 }
575 #endif
576 IR_ASSERT(tmp_reg != IR_REG_NONE);
577 IR_ASSERT(tmp_reg >= IR_REG_GP_FIRST && tmp_reg <= IR_REG_GP_LAST);
578 ir_emit_mov(ctx, type, tmp_reg, to);
579 loc[to] = tmp_reg;
580 } else {
581 #ifdef IR_HAVE_SWAP_FP
582 if (pred[from] == to) {
583 ir_emit_swap_fp(ctx, type, to, from);
584 IR_REGSET_EXCL(todo, from);
585 loc[to] = from;
586 loc[from] = to;
587 continue;
588 }
589 #endif
590 IR_ASSERT(tmp_fp_reg != IR_REG_NONE);
591 IR_ASSERT(tmp_fp_reg >= IR_REG_FP_FIRST && tmp_fp_reg <= IR_REG_FP_LAST);
592 ir_emit_fp_mov(ctx, type, tmp_fp_reg, to);
593 loc[to] = tmp_fp_reg;
594 }
595 while (1) {
596 ir_reg r;
597
598 from = pred[to];
599 r = loc[from];
600 type = types[from];
601 if (IR_IS_TYPE_INT(type)) {
602 ir_emit_mov_ext(ctx, type, to, r);
603 } else {
604 ir_emit_fp_mov(ctx, type, to, r);
605 }
606 IR_REGSET_EXCL(todo, to);
607 loc[from] = to;
608 if (from == r && IR_REGSET_IN(todo, from)) {
609 to = from;
610 } else {
611 break;
612 }
613 }
614 }
615
616 return 1;
617 }
618
ir_emit_dessa_move(ir_ctx * ctx,ir_type type,ir_ref to,ir_ref from,ir_reg tmp_reg,ir_reg tmp_fp_reg)619 static void ir_emit_dessa_move(ir_ctx *ctx, ir_type type, ir_ref to, ir_ref from, ir_reg tmp_reg, ir_reg tmp_fp_reg)
620 {
621 ir_mem mem_from, mem_to;
622
623 IR_ASSERT(from != to);
624 if (to < IR_REG_NUM) {
625 if (IR_IS_CONST_REF(from)) {
626 ir_emit_load(ctx, type, to, from);
627 } else if (from < IR_REG_NUM) {
628 if (IR_IS_TYPE_INT(type)) {
629 ir_emit_mov(ctx, type, to, from);
630 } else {
631 ir_emit_fp_mov(ctx, type, to, from);
632 }
633 } else {
634 mem_from = ir_vreg_spill_slot(ctx, from - IR_REG_NUM);
635 ir_emit_load_mem(ctx, type, to, mem_from);
636 }
637 } else {
638 mem_to = ir_vreg_spill_slot(ctx, to - IR_REG_NUM);
639 if (IR_IS_CONST_REF(from)) {
640 #if defined(IR_TARGET_X86) || defined(IR_TARGET_X64)
641 if (IR_IS_TYPE_INT(type)
642 && !IR_IS_SYM_CONST(ctx->ir_base[from].op)
643 && (ir_type_size[type] != 8 || IR_IS_SIGNED_32BIT(ctx->ir_base[from].val.i64))) {
644 ir_emit_store_mem_imm(ctx, type, mem_to, ctx->ir_base[from].val.i32);
645 return;
646 }
647 #endif
648 ir_reg tmp = IR_IS_TYPE_INT(type) ? tmp_reg : tmp_fp_reg;
649 IR_ASSERT(tmp != IR_REG_NONE);
650 ir_emit_load(ctx, type, tmp, from);
651 ir_emit_store_mem(ctx, type, mem_to, tmp);
652 } else if (from < IR_REG_NUM) {
653 ir_emit_store_mem(ctx, type, mem_to, from);
654 } else {
655 mem_from = ir_vreg_spill_slot(ctx, from - IR_REG_NUM);
656 IR_ASSERT(IR_MEM_VAL(mem_to) != IR_MEM_VAL(mem_from));
657 ir_reg tmp = IR_IS_TYPE_INT(type) ? tmp_reg : tmp_fp_reg;
658 IR_ASSERT(tmp != IR_REG_NONE);
659 ir_emit_load_mem(ctx, type, tmp, mem_from);
660 ir_emit_store_mem(ctx, type, mem_to, tmp);
661 }
662 }
663 }
664
ir_dessa_resolve_cycle(ir_ctx * ctx,int32_t * pred,int32_t * loc,ir_bitset todo,ir_type type,int32_t to,ir_reg tmp_reg,ir_reg tmp_fp_reg)665 IR_ALWAYS_INLINE void ir_dessa_resolve_cycle(ir_ctx *ctx, int32_t *pred, int32_t *loc, ir_bitset todo, ir_type type, int32_t to, ir_reg tmp_reg, ir_reg tmp_fp_reg)
666 {
667 ir_reg from;
668 ir_mem tmp_spill_slot;
669
670 IR_MEM_VAL(tmp_spill_slot) = 0;
671 IR_ASSERT(!IR_IS_CONST_REF(to));
672 from = pred[to];
673 IR_ASSERT(!IR_IS_CONST_REF(from));
674 IR_ASSERT(from != to);
675 IR_ASSERT(loc[from] == from);
676
677 if (IR_IS_TYPE_INT(type)) {
678 #ifdef IR_HAVE_SWAP_INT
679 if (pred[from] == to && to < IR_REG_NUM && from < IR_REG_NUM) {
680 /* a simple cycle from 2 elements */
681 ir_emit_swap(ctx, type, to, from);
682 ir_bitset_excl(todo, from);
683 ir_bitset_excl(todo, to);
684 loc[to] = from;
685 loc[from] = to;
686 return;
687 }
688 #endif
689 IR_ASSERT(tmp_reg != IR_REG_NONE);
690 IR_ASSERT(tmp_reg >= IR_REG_GP_FIRST && tmp_reg <= IR_REG_GP_LAST);
691 loc[to] = tmp_reg;
692 if (to < IR_REG_NUM) {
693 ir_emit_mov(ctx, type, tmp_reg, to);
694 } else {
695 ir_emit_load_mem_int(ctx, type, tmp_reg, ir_vreg_spill_slot(ctx, to - IR_REG_NUM));
696 }
697 } else {
698 #ifdef IR_HAVE_SWAP_FP
699 if (pred[from] == to && to < IR_REG_NUM && from < IR_REG_NUM) {
700 /* a simple cycle from 2 elements */
701 ir_emit_swap_fp(ctx, type, to, from);
702 IR_REGSET_EXCL(todo, from);
703 IR_REGSET_EXCL(todo, to);
704 loc[to] = from;
705 loc[from] = to;
706 return;
707 }
708 #endif
709 IR_ASSERT(tmp_fp_reg != IR_REG_NONE);
710 IR_ASSERT(tmp_fp_reg >= IR_REG_FP_FIRST && tmp_fp_reg <= IR_REG_FP_LAST);
711 loc[to] = tmp_fp_reg;
712 if (to < IR_REG_NUM) {
713 ir_emit_fp_mov(ctx, type, tmp_fp_reg, to);
714 } else {
715 ir_emit_load_mem_fp(ctx, type, tmp_fp_reg, ir_vreg_spill_slot(ctx, to - IR_REG_NUM));
716 }
717 }
718
719 while (1) {
720 int32_t r;
721
722 from = pred[to];
723 r = loc[from];
724
725 if (from == r && ir_bitset_in(todo, from)) {
726 /* Memory to memory move inside an isolated or "blocked" cycle requres an additional temporary register */
727 if (to >= IR_REG_NUM && r >= IR_REG_NUM) {
728 ir_reg tmp = IR_IS_TYPE_INT(type) ? tmp_reg : tmp_fp_reg;
729
730 if (!IR_MEM_VAL(tmp_spill_slot)) {
731 /* Free a register, saving it in a temporary spill slot */
732 tmp_spill_slot = IR_MEM_BO(IR_REG_STACK_POINTER, -16);
733 ir_emit_store_mem(ctx, type, tmp_spill_slot, tmp);
734 }
735 ir_emit_dessa_move(ctx, type, to, r, tmp_reg, tmp_fp_reg);
736 } else {
737 ir_emit_dessa_move(ctx, type, to, r, IR_REG_NONE, IR_REG_NONE);
738 }
739 ir_bitset_excl(todo, to);
740 loc[from] = to;
741 to = from;
742 } else {
743 break;
744 }
745 }
746 if (IR_MEM_VAL(tmp_spill_slot)) {
747 ir_emit_load_mem(ctx, type, IR_IS_TYPE_INT(type) ? tmp_reg : tmp_fp_reg, tmp_spill_slot);
748 }
749 ir_emit_dessa_move(ctx, type, to, loc[from], IR_REG_NONE, IR_REG_NONE);
750 ir_bitset_excl(todo, to);
751 loc[from] = to;
752 }
753
ir_dessa_parallel_copy(ir_ctx * ctx,ir_dessa_copy * copies,int count,ir_reg tmp_reg,ir_reg tmp_fp_reg)754 static int ir_dessa_parallel_copy(ir_ctx *ctx, ir_dessa_copy *copies, int count, ir_reg tmp_reg, ir_reg tmp_fp_reg)
755 {
756 int i;
757 int32_t *pred, *loc, to, from;
758 int8_t *types;
759 ir_type type;
760 uint32_t len;
761 ir_bitset todo, ready, srcs, visited;
762
763 if (count == 1) {
764 to = copies[0].to;
765 from = copies[0].from;
766 IR_ASSERT(from != to);
767 type = copies[0].type;
768 ir_emit_dessa_move(ctx, type, to, from, tmp_reg, tmp_fp_reg);
769 return 1;
770 }
771
772 len = IR_REG_NUM + ctx->vregs_count + 1;
773 todo = ir_bitset_malloc(len);
774 srcs = ir_bitset_malloc(len);
775 loc = ir_mem_malloc(len * 2 * sizeof(int32_t) + len * sizeof(int8_t));
776 pred = loc + len;
777 types = (int8_t*)(pred + len);
778
779 for (i = 0; i < count; i++) {
780 from = copies[i].from;
781 to = copies[i].to;
782 IR_ASSERT(from != to);
783 if (!IR_IS_CONST_REF(from)) {
784 ir_bitset_incl(srcs, from);
785 loc[from] = from;
786 }
787 pred[to] = from;
788 types[to] = copies[i].type;
789 IR_ASSERT(!ir_bitset_in(todo, to));
790 ir_bitset_incl(todo, to);
791 }
792
793 /* temporary registers can't be the same as some of the sources */
794 IR_ASSERT(tmp_reg == IR_REG_NONE || !ir_bitset_in(srcs, tmp_reg));
795 IR_ASSERT(tmp_fp_reg == IR_REG_NONE || !ir_bitset_in(srcs, tmp_fp_reg));
796
797 /* first we resolve all "windmill blades" - trees, that don't set temporary registers */
798 ready = ir_bitset_malloc(len);
799 ir_bitset_copy(ready, todo, ir_bitset_len(len));
800 ir_bitset_difference(ready, srcs, ir_bitset_len(len));
801 if (tmp_reg != IR_REG_NONE) {
802 ir_bitset_excl(ready, tmp_reg);
803 }
804 if (tmp_fp_reg != IR_REG_NONE) {
805 ir_bitset_excl(ready, tmp_fp_reg);
806 }
807 while ((to = ir_bitset_pop_first(ready, ir_bitset_len(len))) >= 0) {
808 ir_bitset_excl(todo, to);
809 type = types[to];
810 from = pred[to];
811 if (IR_IS_CONST_REF(from)) {
812 ir_emit_dessa_move(ctx, type, to, from, tmp_reg, tmp_fp_reg);
813 } else {
814 int32_t r = loc[from];
815 ir_emit_dessa_move(ctx, type, to, r, tmp_reg, tmp_fp_reg);
816 loc[from] = to;
817 if (from == r && ir_bitset_in(todo, from) && from != tmp_reg && from != tmp_fp_reg) {
818 ir_bitset_incl(ready, from);
819 }
820 }
821 }
822
823 /* then we resolve all "windmill axles" - cycles (this requres temporary registers) */
824 visited = ir_bitset_malloc(len);
825 ir_bitset_copy(ready, todo, ir_bitset_len(len));
826 ir_bitset_intersection(ready, srcs, ir_bitset_len(len));
827 while ((to = ir_bitset_first(ready, ir_bitset_len(len))) >= 0) {
828 ir_bitset_clear(visited, ir_bitset_len(len));
829 ir_bitset_incl(visited, to);
830 to = pred[to];
831 while (!IR_IS_CONST_REF(to) && ir_bitset_in(ready, to)) {
832 to = pred[to];
833 if (!IR_IS_CONST_REF(to) && ir_bitset_in(visited, to)) {
834 /* We found a cycle. Resolve it. */
835 ir_bitset_incl(visited, to);
836 type = types[to];
837 ir_dessa_resolve_cycle(ctx, pred, loc, todo, type, to, tmp_reg, tmp_fp_reg);
838 break;
839 }
840 ir_bitset_incl(visited, to);
841 }
842 ir_bitset_difference(ready, visited, ir_bitset_len(len));
843 }
844
845 /* finally we resolve remaining "windmill blades" - trees that set temporary registers */
846 ir_bitset_copy(ready, todo, ir_bitset_len(len));
847 ir_bitset_difference(ready, srcs, ir_bitset_len(len));
848 while ((to = ir_bitset_pop_first(ready, ir_bitset_len(len))) >= 0) {
849 ir_bitset_excl(todo, to);
850 type = types[to];
851 from = pred[to];
852 if (IR_IS_CONST_REF(from)) {
853 ir_emit_dessa_move(ctx, type, to, from, tmp_reg, tmp_fp_reg);
854 } else {
855 int32_t r = loc[from];
856 ir_emit_dessa_move(ctx, type, to, r, tmp_reg, tmp_fp_reg);
857 loc[from] = to;
858 if (from == r && ir_bitset_in(todo, from)) {
859 ir_bitset_incl(ready, from);
860 }
861 }
862 }
863
864 IR_ASSERT(ir_bitset_empty(todo, ir_bitset_len(len)));
865
866 ir_mem_free(visited);
867 ir_mem_free(ready);
868 ir_mem_free(loc);
869 ir_mem_free(srcs);
870 ir_mem_free(todo);
871 return 1;
872 }
873
ir_emit_dessa_moves(ir_ctx * ctx,int b,ir_block * bb)874 static void ir_emit_dessa_moves(ir_ctx *ctx, int b, ir_block *bb)
875 {
876 uint32_t succ, k, n = 0;
877 ir_block *succ_bb;
878 ir_use_list *use_list;
879 ir_ref i, *p;
880 ir_dessa_copy *copies;
881 ir_reg tmp_reg = ctx->regs[bb->end][0];
882 ir_reg tmp_fp_reg = ctx->regs[bb->end][1];
883
884 IR_ASSERT(bb->successors_count == 1);
885 succ = ctx->cfg_edges[bb->successors];
886 succ_bb = &ctx->cfg_blocks[succ];
887 IR_ASSERT(succ_bb->predecessors_count > 1);
888 use_list = &ctx->use_lists[succ_bb->start];
889 k = ir_phi_input_number(ctx, succ_bb, b);
890
891 copies = alloca(use_list->count * sizeof(ir_dessa_copy));
892
893 for (i = 0, p = &ctx->use_edges[use_list->refs]; i < use_list->count; i++, p++) {
894 ir_ref ref = *p;
895 ir_insn *insn = &ctx->ir_base[ref];
896
897 if (insn->op == IR_PHI) {
898 ir_ref input = ir_insn_op(insn, k);
899 ir_reg src = ir_get_alocated_reg(ctx, ref, k);
900 ir_reg dst = ctx->regs[ref][0];
901 ir_ref from, to;
902
903 IR_ASSERT(dst == IR_REG_NONE || !IR_REG_SPILLED(dst));
904 if (IR_IS_CONST_REF(input)) {
905 from = input;
906 } else {
907 from = (src != IR_REG_NONE && !IR_REG_SPILLED(src)) ?
908 (ir_ref)src : (ir_ref)(IR_REG_NUM + ctx->vregs[input]);
909 }
910 to = (dst != IR_REG_NONE) ?
911 (ir_ref)dst : (ir_ref)(IR_REG_NUM + ctx->vregs[ref]);
912 if (to != from) {
913 if (to >= IR_REG_NUM
914 && from >= IR_REG_NUM
915 && IR_MEM_VAL(ir_vreg_spill_slot(ctx, from - IR_REG_NUM)) ==
916 IR_MEM_VAL(ir_vreg_spill_slot(ctx, to - IR_REG_NUM))) {
917 /* It's possible that different virtual registers share the same special spill slot */
918 // TODO: See ext/opcache/tests/jit/gh11917.phpt failure on Linux 32-bit
919 continue;
920 }
921 copies[n].type = insn->type;
922 copies[n].from = from;
923 copies[n].to = to;
924 n++;
925 }
926 }
927 }
928
929 if (n > 0) {
930 ir_dessa_parallel_copy(ctx, copies, n, tmp_reg, tmp_fp_reg);
931 }
932 }
933
ir_match(ir_ctx * ctx)934 int ir_match(ir_ctx *ctx)
935 {
936 uint32_t b;
937 ir_ref start, ref, *prev_ref;
938 ir_block *bb;
939 ir_insn *insn;
940 uint32_t entries_count = 0;
941
942 ctx->rules = ir_mem_calloc(ctx->insns_count, sizeof(uint32_t));
943
944 prev_ref = ctx->prev_ref;
945 if (!prev_ref) {
946 ir_build_prev_refs(ctx);
947 prev_ref = ctx->prev_ref;
948 }
949
950 if (ctx->entries_count) {
951 ctx->entries = ir_mem_malloc(ctx->entries_count * sizeof(ir_ref));
952 }
953
954 for (b = ctx->cfg_blocks_count, bb = ctx->cfg_blocks + b; b > 0; b--, bb--) {
955 IR_ASSERT(!(bb->flags & IR_BB_UNREACHABLE));
956 start = bb->start;
957 if (UNEXPECTED(bb->flags & IR_BB_ENTRY)) {
958 IR_ASSERT(entries_count < ctx->entries_count);
959 insn = &ctx->ir_base[start];
960 IR_ASSERT(insn->op == IR_ENTRY);
961 insn->op3 = entries_count;
962 ctx->entries[entries_count] = b;
963 entries_count++;
964 }
965 ctx->rules[start] = IR_SKIPPED | IR_NOP;
966 ref = bb->end;
967 if (bb->successors_count == 1) {
968 insn = &ctx->ir_base[ref];
969 if (insn->op == IR_END || insn->op == IR_LOOP_END) {
970 ctx->rules[ref] = insn->op;
971 ref = prev_ref[ref];
972 if (ref == start && ctx->cfg_edges[bb->successors] != b) {
973 if (EXPECTED(!(bb->flags & IR_BB_ENTRY))) {
974 bb->flags |= IR_BB_EMPTY;
975 } else if (ctx->flags & IR_MERGE_EMPTY_ENTRIES) {
976 bb->flags |= IR_BB_EMPTY;
977 if (ctx->cfg_edges[bb->successors] == b + 1) {
978 (bb + 1)->flags |= IR_BB_PREV_EMPTY_ENTRY;
979 }
980 }
981 continue;
982 }
983 }
984 }
985
986 ctx->bb_start = start; /* bb_start is used by matcher to avoid fusion of insns from different blocks */
987
988 while (ref != start) {
989 uint32_t rule = ctx->rules[ref];
990
991 if (!rule) {
992 ctx->rules[ref] = rule = ir_match_insn(ctx, ref);
993 }
994 ir_match_insn2(ctx, ref, rule);
995 ref = prev_ref[ref];
996 }
997 }
998
999 if (ctx->entries_count) {
1000 ctx->entries_count = entries_count;
1001 if (!entries_count) {
1002 ir_mem_free(ctx->entries);
1003 ctx->entries = NULL;
1004 }
1005 }
1006
1007 return 1;
1008 }
1009
ir_get_spill_slot_offset(ir_ctx * ctx,ir_ref ref)1010 int32_t ir_get_spill_slot_offset(ir_ctx *ctx, ir_ref ref)
1011 {
1012 int32_t offset;
1013
1014 IR_ASSERT(ref >= 0 && ctx->vregs[ref] && ctx->live_intervals[ctx->vregs[ref]]);
1015 offset = ctx->live_intervals[ctx->vregs[ref]]->stack_spill_pos;
1016 IR_ASSERT(offset != -1);
1017 return IR_SPILL_POS_TO_OFFSET(offset);
1018 }
1019