1 /*
2 * Stack-less Just-In-Time compiler
3 *
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 /* x86 64-bit arch dependent functions. */
28
29 /* --------------------------------------------------------------------- */
30 /* Operators */
31 /* --------------------------------------------------------------------- */
32
emit_load_imm64(struct sljit_compiler * compiler,sljit_s32 reg,sljit_sw imm)33 static sljit_s32 emit_load_imm64(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm)
34 {
35 sljit_u8 *inst;
36
37 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_sw));
38 FAIL_IF(!inst);
39 INC_SIZE(2 + sizeof(sljit_sw));
40 inst[0] = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B);
41 inst[1] = U8(MOV_r_i32 | reg_lmap[reg]);
42 sljit_unaligned_store_sw(inst + 2, imm);
43 return SLJIT_SUCCESS;
44 }
45
emit_do_imm32(struct sljit_compiler * compiler,sljit_u8 rex,sljit_u8 opcode,sljit_sw imm)46 static sljit_s32 emit_do_imm32(struct sljit_compiler *compiler, sljit_u8 rex, sljit_u8 opcode, sljit_sw imm)
47 {
48 sljit_u8 *inst;
49 sljit_uw length = (rex ? 2 : 1) + sizeof(sljit_s32);
50
51 inst = (sljit_u8*)ensure_buf(compiler, 1 + length);
52 FAIL_IF(!inst);
53 INC_SIZE(length);
54 if (rex)
55 *inst++ = rex;
56 *inst++ = opcode;
57 sljit_unaligned_store_s32(inst, (sljit_s32)imm);
58 return SLJIT_SUCCESS;
59 }
60
emit_x86_instruction(struct sljit_compiler * compiler,sljit_uw size,sljit_s32 a,sljit_sw imma,sljit_s32 b,sljit_sw immb)61 static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw size,
62 /* The register or immediate operand. */
63 sljit_s32 a, sljit_sw imma,
64 /* The general operand (not immediate). */
65 sljit_s32 b, sljit_sw immb)
66 {
67 sljit_u8 *inst;
68 sljit_u8 *buf_ptr;
69 sljit_u8 rex = 0;
70 sljit_u8 reg_lmap_b;
71 sljit_uw flags = size;
72 sljit_uw inst_size;
73
74 /* The immediate operand must be 32 bit. */
75 SLJIT_ASSERT(a != SLJIT_IMM || compiler->mode32 || IS_HALFWORD(imma));
76 /* Both cannot be switched on. */
77 SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
78 /* Size flags not allowed for typed instructions. */
79 SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
80 /* Both size flags cannot be switched on. */
81 SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
82 /* SSE2 and immediate is not possible. */
83 SLJIT_ASSERT(a != SLJIT_IMM || !(flags & EX86_SSE2));
84 SLJIT_ASSERT(((flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66))
85 & ((flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) - 1)) == 0);
86 SLJIT_ASSERT((flags & (EX86_VEX_EXT | EX86_REX)) != EX86_VEX_EXT);
87
88 size &= 0xf;
89 /* The mod r/m byte is always present. */
90 inst_size = size + 1;
91
92 if (!compiler->mode32 && !(flags & EX86_NO_REXW))
93 rex |= REX_W;
94 else if (flags & EX86_REX)
95 rex |= REX;
96
97 if (flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66))
98 inst_size++;
99
100 /* Calculate size of b. */
101 if (b & SLJIT_MEM) {
102 if (!(b & OFFS_REG_MASK) && NOT_HALFWORD(immb)) {
103 PTR_FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immb));
104 immb = 0;
105 if (b & REG_MASK)
106 b |= TO_OFFS_REG(TMP_REG2);
107 else
108 b |= TMP_REG2;
109 }
110
111 if (!(b & REG_MASK))
112 inst_size += 1 + sizeof(sljit_s32); /* SIB byte required to avoid RIP based addressing. */
113 else {
114 if (immb != 0 && !(b & OFFS_REG_MASK)) {
115 /* Immediate operand. */
116 if (immb <= 127 && immb >= -128)
117 inst_size += sizeof(sljit_s8);
118 else
119 inst_size += sizeof(sljit_s32);
120 } else if (reg_lmap[b & REG_MASK] == 5) {
121 /* Swap registers if possible. */
122 if ((b & OFFS_REG_MASK) && (immb & 0x3) == 0 && reg_lmap[OFFS_REG(b)] != 5)
123 b = SLJIT_MEM | OFFS_REG(b) | TO_OFFS_REG(b & REG_MASK);
124 else
125 inst_size += sizeof(sljit_s8);
126 }
127
128 if (reg_map[b & REG_MASK] >= 8)
129 rex |= REX_B;
130
131 if (reg_lmap[b & REG_MASK] == 4 && !(b & OFFS_REG_MASK))
132 b |= TO_OFFS_REG(SLJIT_SP);
133
134 if (b & OFFS_REG_MASK) {
135 inst_size += 1; /* SIB byte. */
136 if (reg_map[OFFS_REG(b)] >= 8)
137 rex |= REX_X;
138 }
139 }
140 } else if (!(flags & EX86_SSE2_OP2)) {
141 if (reg_map[b] >= 8)
142 rex |= REX_B;
143 } else if (freg_map[b] >= 8)
144 rex |= REX_B;
145
146 if ((flags & EX86_VEX_EXT) && (rex & 0x3)) {
147 SLJIT_ASSERT(size == 2);
148 size++;
149 inst_size++;
150 }
151
152 if (a == SLJIT_IMM) {
153 if (flags & EX86_BIN_INS) {
154 if (imma <= 127 && imma >= -128) {
155 inst_size += 1;
156 flags |= EX86_BYTE_ARG;
157 } else
158 inst_size += 4;
159 } else if (flags & EX86_SHIFT_INS) {
160 SLJIT_ASSERT(imma <= (compiler->mode32 ? 0x1f : 0x3f));
161 if (imma != 1) {
162 inst_size++;
163 flags |= EX86_BYTE_ARG;
164 }
165 } else if (flags & EX86_BYTE_ARG)
166 inst_size++;
167 else if (flags & EX86_HALF_ARG)
168 inst_size += sizeof(short);
169 else
170 inst_size += sizeof(sljit_s32);
171 } else {
172 SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
173 /* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */
174 if (!(flags & EX86_SSE2_OP1)) {
175 if (reg_map[a] >= 8)
176 rex |= REX_R;
177 }
178 else if (freg_map[a] >= 8)
179 rex |= REX_R;
180 }
181
182 if (rex)
183 inst_size++;
184
185 inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size);
186 PTR_FAIL_IF(!inst);
187
188 /* Encoding prefixes. */
189 INC_SIZE(inst_size);
190 if (flags & EX86_PREF_F2)
191 *inst++ = 0xf2;
192 else if (flags & EX86_PREF_F3)
193 *inst++ = 0xf3;
194 else if (flags & EX86_PREF_66)
195 *inst++ = 0x66;
196
197 /* Rex is always the last prefix. */
198 if (rex)
199 *inst++ = rex;
200
201 buf_ptr = inst + size;
202
203 /* Encode mod/rm byte. */
204 if (!(flags & EX86_SHIFT_INS)) {
205 if ((flags & EX86_BIN_INS) && a == SLJIT_IMM)
206 *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
207
208 if (a == SLJIT_IMM)
209 *buf_ptr = 0;
210 else if (!(flags & EX86_SSE2_OP1))
211 *buf_ptr = U8(reg_lmap[a] << 3);
212 else
213 *buf_ptr = U8(freg_lmap[a] << 3);
214 } else {
215 if (a == SLJIT_IMM) {
216 if (imma == 1)
217 *inst = GROUP_SHIFT_1;
218 else
219 *inst = GROUP_SHIFT_N;
220 } else
221 *inst = GROUP_SHIFT_CL;
222 *buf_ptr = 0;
223 }
224
225 if (!(b & SLJIT_MEM)) {
226 *buf_ptr = U8(*buf_ptr | MOD_REG | (!(flags & EX86_SSE2_OP2) ? reg_lmap[b] : freg_lmap[b]));
227 buf_ptr++;
228 } else if (b & REG_MASK) {
229 reg_lmap_b = reg_lmap[b & REG_MASK];
230
231 if (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) {
232 if (immb != 0 || reg_lmap_b == 5) {
233 if (immb <= 127 && immb >= -128)
234 *buf_ptr |= 0x40;
235 else
236 *buf_ptr |= 0x80;
237 }
238
239 if (!(b & OFFS_REG_MASK))
240 *buf_ptr++ |= reg_lmap_b;
241 else {
242 buf_ptr[0] |= 0x04;
243 buf_ptr[1] = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3));
244 buf_ptr += 2;
245 }
246
247 if (immb != 0 || reg_lmap_b == 5) {
248 if (immb <= 127 && immb >= -128)
249 *buf_ptr++ = U8(immb); /* 8 bit displacement. */
250 else {
251 sljit_unaligned_store_s32(buf_ptr, (sljit_s32)immb); /* 32 bit displacement. */
252 buf_ptr += sizeof(sljit_s32);
253 }
254 }
255 } else {
256 if (reg_lmap_b == 5)
257 *buf_ptr |= 0x40;
258
259 buf_ptr[0] |= 0x04;
260 buf_ptr[1] = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6));
261 buf_ptr += 2;
262
263 if (reg_lmap_b == 5)
264 *buf_ptr++ = 0;
265 }
266 } else {
267 buf_ptr[0] |= 0x04;
268 buf_ptr[1] = 0x25;
269 buf_ptr += 2;
270 sljit_unaligned_store_s32(buf_ptr, (sljit_s32)immb); /* 32 bit displacement. */
271 buf_ptr += sizeof(sljit_s32);
272 }
273
274 if (a == SLJIT_IMM) {
275 if (flags & EX86_BYTE_ARG)
276 *buf_ptr = U8(imma);
277 else if (flags & EX86_HALF_ARG)
278 sljit_unaligned_store_s16(buf_ptr, (sljit_s16)imma);
279 else if (!(flags & EX86_SHIFT_INS))
280 sljit_unaligned_store_s32(buf_ptr, (sljit_s32)imma);
281 }
282
283 return inst;
284 }
285
emit_vex_instruction(struct sljit_compiler * compiler,sljit_uw op,sljit_s32 a,sljit_s32 v,sljit_s32 b,sljit_sw immb)286 static sljit_s32 emit_vex_instruction(struct sljit_compiler *compiler, sljit_uw op,
287 /* The first and second register operand. */
288 sljit_s32 a, sljit_s32 v,
289 /* The general operand (not immediate). */
290 sljit_s32 b, sljit_sw immb)
291 {
292 sljit_u8 *inst;
293 sljit_u8 vex = 0;
294 sljit_u8 vex_m = 0;
295 sljit_uw size;
296
297 SLJIT_ASSERT(((op & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66))
298 & ((op & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) - 1)) == 0);
299
300 op |= EX86_REX;
301
302 if (op & VEX_OP_0F38)
303 vex_m = 0x2;
304 else if (op & VEX_OP_0F3A)
305 vex_m = 0x3;
306
307 if ((op & VEX_W) || ((op & VEX_AUTO_W) && !compiler->mode32)) {
308 if (vex_m == 0)
309 vex_m = 0x1;
310
311 vex |= 0x80;
312 }
313
314 if (op & EX86_PREF_66)
315 vex |= 0x1;
316 else if (op & EX86_PREF_F2)
317 vex |= 0x3;
318 else if (op & EX86_PREF_F3)
319 vex |= 0x2;
320
321 op &= ~(EX86_PREF_66 | EX86_PREF_F2 | EX86_PREF_F3);
322
323 if (op & VEX_256)
324 vex |= 0x4;
325
326 vex = U8(vex | ((((op & VEX_SSE2_OPV) ? freg_map[v] : reg_map[v]) ^ 0xf) << 3));
327
328 size = op & ~(sljit_uw)0xff;
329 size |= (vex_m == 0) ? (EX86_VEX_EXT | 2) : 3;
330
331 inst = emit_x86_instruction(compiler, size, a, 0, b, immb);
332 FAIL_IF(!inst);
333
334 SLJIT_ASSERT((inst[-1] & 0xf0) == REX);
335
336 /* If X or B is present in REX prefix. */
337 if (vex_m == 0 && inst[-1] & 0x3)
338 vex_m = 0x1;
339
340 if (vex_m == 0) {
341 vex |= U8(((inst[-1] >> 2) ^ 0x1) << 7);
342
343 inst[-1] = 0xc5;
344 inst[0] = vex;
345 inst[1] = U8(op);
346 return SLJIT_SUCCESS;
347 }
348
349 vex_m |= U8((inst[-1] ^ 0x7) << 5);
350 inst[-1] = 0xc4;
351 inst[0] = vex_m;
352 inst[1] = vex;
353 inst[2] = U8(op);
354 return SLJIT_SUCCESS;
355 }
356
357 /* --------------------------------------------------------------------- */
358 /* Enter / return */
359 /* --------------------------------------------------------------------- */
360
detect_far_jump_type(struct sljit_jump * jump,sljit_u8 * code_ptr)361 static sljit_u8* detect_far_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr)
362 {
363 sljit_uw type = jump->flags >> TYPE_SHIFT;
364
365 int short_addr = !(jump->flags & SLJIT_REWRITABLE_JUMP) && (jump->flags & JUMP_ADDR) && (jump->u.target <= 0xffffffff);
366
367 /* The relative jump below specialized for this case. */
368 SLJIT_ASSERT(reg_map[TMP_REG2] >= 8 && TMP_REG2 != SLJIT_TMP_DEST_REG);
369
370 if (type < SLJIT_JUMP) {
371 /* Invert type. */
372 code_ptr[0] = U8(get_jump_code(type ^ 0x1) - 0x10);
373 code_ptr[1] = short_addr ? (6 + 3) : (10 + 3);
374 code_ptr += 2;
375 }
376
377 code_ptr[0] = short_addr ? REX_B : (REX_W | REX_B);
378 code_ptr[1] = MOV_r_i32 | reg_lmap[TMP_REG2];
379 code_ptr += 2;
380 jump->addr = (sljit_uw)code_ptr;
381
382 if (!(jump->flags & JUMP_ADDR))
383 jump->flags |= PATCH_MD;
384 else if (short_addr)
385 sljit_unaligned_store_s32(code_ptr, (sljit_s32)jump->u.target);
386 else
387 sljit_unaligned_store_sw(code_ptr, (sljit_sw)jump->u.target);
388
389 code_ptr += short_addr ? sizeof(sljit_s32) : sizeof(sljit_sw);
390
391 code_ptr[0] = REX_B;
392 code_ptr[1] = GROUP_FF;
393 code_ptr[2] = U8(MOD_REG | (type >= SLJIT_FAST_CALL ? CALL_rm : JMP_rm) | reg_lmap[TMP_REG2]);
394
395 return code_ptr + 3;
396 }
397
generate_mov_addr_code(struct sljit_jump * jump,sljit_u8 * code_ptr,sljit_u8 * code,sljit_sw executable_offset)398 static sljit_u8* generate_mov_addr_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset)
399 {
400 sljit_uw addr;
401 sljit_sw diff;
402 SLJIT_UNUSED_ARG(executable_offset);
403
404 SLJIT_ASSERT(((jump->flags >> JUMP_SIZE_SHIFT) & 0x1f) <= 10);
405 if (jump->flags & JUMP_ADDR)
406 addr = jump->u.target;
407 else
408 addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + jump->u.label->size;
409
410 if (addr > 0xffffffffl) {
411 diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
412
413 if (diff <= HALFWORD_MAX && diff >= HALFWORD_MIN) {
414 SLJIT_ASSERT(((jump->flags >> JUMP_SIZE_SHIFT) & 0x1f) >= 7);
415 code_ptr -= SSIZE_OF(s32) - 1;
416
417 SLJIT_ASSERT((code_ptr[-3 - SSIZE_OF(s32)] & 0xf8) == REX_W);
418 SLJIT_ASSERT((code_ptr[-2 - SSIZE_OF(s32)] & 0xf8) == MOV_r_i32);
419
420 code_ptr[-3 - SSIZE_OF(s32)] = U8(REX_W | ((code_ptr[-3 - SSIZE_OF(s32)] & 0x1) << 2));
421 code_ptr[-1 - SSIZE_OF(s32)] = U8(((code_ptr[-2 - SSIZE_OF(s32)] & 0x7) << 3) | 0x5);
422 code_ptr[-2 - SSIZE_OF(s32)] = LEA_r_m;
423
424 jump->flags |= PATCH_MW;
425 return code_ptr;
426 }
427
428 jump->flags |= PATCH_MD;
429 return code_ptr;
430 }
431
432 code_ptr -= 2 + sizeof(sljit_uw);
433
434 SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);
435 SLJIT_ASSERT((code_ptr[1] & 0xf8) == MOV_r_i32);
436
437 if ((code_ptr[0] & 0x07) != 0) {
438 SLJIT_ASSERT(((jump->flags >> JUMP_SIZE_SHIFT) & 0x1f) >= 6);
439 code_ptr[0] = U8(code_ptr[0] & ~0x08);
440 code_ptr += 2 + sizeof(sljit_s32);
441 } else {
442 SLJIT_ASSERT(((jump->flags >> JUMP_SIZE_SHIFT) & 0x1f) >= 5);
443 code_ptr[0] = code_ptr[1];
444 code_ptr += 1 + sizeof(sljit_s32);
445 }
446
447 return code_ptr;
448 }
449
450 #ifdef _WIN64
451 typedef struct {
452 sljit_sw regs[2];
453 } sljit_sse2_reg;
454 #endif /* _WIN64 */
455
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)456 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
457 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
458 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
459 {
460 sljit_uw size;
461 sljit_s32 word_arg_count = 0;
462 sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
463 sljit_s32 saved_regs_size, tmp, i;
464 #ifdef _WIN64
465 sljit_s32 saved_float_regs_size;
466 sljit_s32 saved_float_regs_offset = 0;
467 sljit_s32 float_arg_count = 0;
468 #endif /* _WIN64 */
469 sljit_u8 *inst;
470
471 CHECK_ERROR();
472 CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
473 set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
474
475 if (options & SLJIT_ENTER_REG_ARG)
476 arg_types = 0;
477
478 /* Emit ENDBR64 at function entry if needed. */
479 FAIL_IF(emit_endbranch(compiler));
480
481 compiler->mode32 = 0;
482
483 /* Including the return address saved by the call instruction. */
484 saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1);
485
486 tmp = SLJIT_S0 - saveds;
487 for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) {
488 size = reg_map[i] >= 8 ? 2 : 1;
489 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
490 FAIL_IF(!inst);
491 INC_SIZE(size);
492 if (reg_map[i] >= 8)
493 *inst++ = REX_B;
494 PUSH_REG(reg_lmap[i]);
495 }
496
497 for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
498 size = reg_map[i] >= 8 ? 2 : 1;
499 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
500 FAIL_IF(!inst);
501 INC_SIZE(size);
502 if (reg_map[i] >= 8)
503 *inst++ = REX_B;
504 PUSH_REG(reg_lmap[i]);
505 }
506
507 #ifdef _WIN64
508 local_size += SLJIT_LOCALS_OFFSET;
509 saved_float_regs_size = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sse2_reg);
510
511 if (saved_float_regs_size > 0) {
512 saved_float_regs_offset = ((local_size + 0xf) & ~0xf);
513 local_size = saved_float_regs_offset + saved_float_regs_size;
514 }
515 #else /* !_WIN64 */
516 SLJIT_ASSERT(SLJIT_LOCALS_OFFSET == 0);
517 #endif /* _WIN64 */
518
519 arg_types >>= SLJIT_ARG_SHIFT;
520
521 while (arg_types > 0) {
522 if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
523 tmp = 0;
524 #ifndef _WIN64
525 switch (word_arg_count) {
526 case 0:
527 tmp = SLJIT_R2;
528 break;
529 case 1:
530 tmp = SLJIT_R1;
531 break;
532 case 2:
533 tmp = TMP_REG1;
534 break;
535 default:
536 tmp = SLJIT_R3;
537 break;
538 }
539 #else /* !_WIN64 */
540 switch (word_arg_count + float_arg_count) {
541 case 0:
542 tmp = SLJIT_R3;
543 break;
544 case 1:
545 tmp = SLJIT_R1;
546 break;
547 case 2:
548 tmp = SLJIT_R2;
549 break;
550 default:
551 tmp = TMP_REG1;
552 break;
553 }
554 #endif /* _WIN64 */
555 if (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) {
556 if (tmp != SLJIT_R0 + word_arg_count)
557 EMIT_MOV(compiler, SLJIT_R0 + word_arg_count, 0, tmp, 0);
558 } else {
559 EMIT_MOV(compiler, SLJIT_S0 - saved_arg_count, 0, tmp, 0);
560 saved_arg_count++;
561 }
562 word_arg_count++;
563 } else {
564 #ifdef _WIN64
565 SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start);
566 float_arg_count++;
567 if (float_arg_count != float_arg_count + word_arg_count)
568 FAIL_IF(emit_sse2_load(compiler, (arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32,
569 float_arg_count, float_arg_count + word_arg_count, 0));
570 #endif /* _WIN64 */
571 }
572 arg_types >>= SLJIT_ARG_SHIFT;
573 }
574
575 local_size = ((local_size + saved_regs_size + 0xf) & ~0xf) - saved_regs_size;
576 compiler->local_size = local_size;
577
578 #ifdef _WIN64
579 if (local_size > 0) {
580 if (local_size <= 4 * 4096) {
581 if (local_size > 4096)
582 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096);
583 if (local_size > 2 * 4096)
584 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2);
585 if (local_size > 3 * 4096)
586 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);
587 }
588 else {
589 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, local_size >> 12);
590
591 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_MEM1(SLJIT_SP), -4096);
592 BINARY_IMM32(SUB, 4096, SLJIT_SP, 0);
593 BINARY_IMM32(SUB, 1, TMP_REG1, 0);
594
595 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
596 FAIL_IF(!inst);
597
598 INC_SIZE(2);
599 inst[0] = JNE_i8;
600 inst[1] = (sljit_u8)-21;
601 local_size &= 0xfff;
602 }
603
604 if (local_size > 0)
605 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size);
606 }
607 #endif /* _WIN64 */
608
609 if (local_size > 0)
610 BINARY_IMM32(SUB, local_size, SLJIT_SP, 0);
611
612 #ifdef _WIN64
613 if (saved_float_regs_size > 0) {
614 compiler->mode32 = 1;
615
616 tmp = SLJIT_FS0 - fsaveds;
617 for (i = SLJIT_FS0; i > tmp; i--) {
618 FAIL_IF(emit_groupf(compiler, MOVAPS_xm_x | EX86_SSE2, i, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset));
619 saved_float_regs_offset += 16;
620 }
621
622 for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
623 FAIL_IF(emit_groupf(compiler, MOVAPS_xm_x | EX86_SSE2, i, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset));
624 saved_float_regs_offset += 16;
625 }
626 }
627 #endif /* _WIN64 */
628
629 return SLJIT_SUCCESS;
630 }
631
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)632 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
633 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
634 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
635 {
636 sljit_s32 saved_regs_size;
637 #ifdef _WIN64
638 sljit_s32 saved_float_regs_size;
639 #endif /* _WIN64 */
640
641 CHECK_ERROR();
642 CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
643 set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
644
645 #ifdef _WIN64
646 local_size += SLJIT_LOCALS_OFFSET;
647 saved_float_regs_size = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sse2_reg);
648
649 if (saved_float_regs_size > 0)
650 local_size = ((local_size + 0xf) & ~0xf) + saved_float_regs_size;
651 #else /* !_WIN64 */
652 SLJIT_ASSERT(SLJIT_LOCALS_OFFSET == 0);
653 #endif /* _WIN64 */
654
655 /* Including the return address saved by the call instruction. */
656 saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1);
657 compiler->local_size = ((local_size + saved_regs_size + 0xf) & ~0xf) - saved_regs_size;
658 return SLJIT_SUCCESS;
659 }
660
emit_stack_frame_release(struct sljit_compiler * compiler,sljit_s32 is_return_to)661 static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to)
662 {
663 sljit_uw size;
664 sljit_s32 local_size, i, tmp;
665 sljit_u8 *inst;
666 #ifdef _WIN64
667 sljit_s32 saved_float_regs_offset;
668 sljit_s32 fscratches = compiler->fscratches;
669 sljit_s32 fsaveds = compiler->fsaveds;
670 #endif /* _WIN64 */
671
672 #ifdef _WIN64
673 saved_float_regs_offset = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sse2_reg);
674
675 if (saved_float_regs_offset > 0) {
676 compiler->mode32 = 1;
677 saved_float_regs_offset = (compiler->local_size - saved_float_regs_offset) & ~0xf;
678
679 tmp = SLJIT_FS0 - fsaveds;
680 for (i = SLJIT_FS0; i > tmp; i--) {
681 FAIL_IF(emit_groupf(compiler, MOVAPS_x_xm | EX86_SSE2, i, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset));
682 saved_float_regs_offset += 16;
683 }
684
685 for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
686 FAIL_IF(emit_groupf(compiler, MOVAPS_x_xm | EX86_SSE2, i, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset));
687 saved_float_regs_offset += 16;
688 }
689
690 compiler->mode32 = 0;
691 }
692 #endif /* _WIN64 */
693
694 local_size = compiler->local_size;
695
696 if (is_return_to && compiler->scratches < SLJIT_FIRST_SAVED_REG && (compiler->saveds == SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
697 local_size += SSIZE_OF(sw);
698 is_return_to = 0;
699 }
700
701 if (local_size > 0)
702 BINARY_IMM32(ADD, local_size, SLJIT_SP, 0);
703
704 tmp = compiler->scratches;
705 for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) {
706 size = reg_map[i] >= 8 ? 2 : 1;
707 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
708 FAIL_IF(!inst);
709 INC_SIZE(size);
710 if (reg_map[i] >= 8)
711 *inst++ = REX_B;
712 POP_REG(reg_lmap[i]);
713 }
714
715 tmp = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
716 for (i = SLJIT_S0 + 1 - compiler->saveds; i <= tmp; i++) {
717 size = reg_map[i] >= 8 ? 2 : 1;
718 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
719 FAIL_IF(!inst);
720 INC_SIZE(size);
721 if (reg_map[i] >= 8)
722 *inst++ = REX_B;
723 POP_REG(reg_lmap[i]);
724 }
725
726 if (is_return_to)
727 BINARY_IMM32(ADD, sizeof(sljit_sw), SLJIT_SP, 0);
728
729 return SLJIT_SUCCESS;
730 }
731
sljit_emit_return_void(struct sljit_compiler * compiler)732 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
733 {
734 CHECK_ERROR();
735 CHECK(check_sljit_emit_return_void(compiler));
736
737 compiler->mode32 = 0;
738
739 FAIL_IF(emit_stack_frame_release(compiler, 0));
740 return emit_byte(compiler, RET_near);
741 }
742
sljit_emit_return_to(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)743 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
744 sljit_s32 src, sljit_sw srcw)
745 {
746 CHECK_ERROR();
747 CHECK(check_sljit_emit_return_to(compiler, src, srcw));
748
749 compiler->mode32 = 0;
750
751 if ((src & SLJIT_MEM) || (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options)))) {
752 ADJUST_LOCAL_OFFSET(src, srcw);
753
754 EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);
755 src = TMP_REG2;
756 srcw = 0;
757 }
758
759 FAIL_IF(emit_stack_frame_release(compiler, 1));
760
761 SLJIT_SKIP_CHECKS(compiler);
762 return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
763 }
764
765 /* --------------------------------------------------------------------- */
766 /* Call / return instructions */
767 /* --------------------------------------------------------------------- */
768
769 #ifndef _WIN64
770
call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types,sljit_s32 * src_ptr)771 static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr)
772 {
773 sljit_s32 src = src_ptr ? (*src_ptr) : 0;
774 sljit_s32 word_arg_count = 0;
775
776 SLJIT_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R3] == 1 && reg_map[TMP_REG1] == 2);
777 SLJIT_ASSERT(!(src & SLJIT_MEM));
778
779 /* Remove return value. */
780 arg_types >>= SLJIT_ARG_SHIFT;
781
782 while (arg_types) {
783 if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64)
784 word_arg_count++;
785 arg_types >>= SLJIT_ARG_SHIFT;
786 }
787
788 if (word_arg_count == 0)
789 return SLJIT_SUCCESS;
790
791 if (word_arg_count >= 3) {
792 if (src == SLJIT_R2)
793 *src_ptr = TMP_REG1;
794 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R2, 0);
795 }
796
797 return emit_mov(compiler, SLJIT_R2, 0, SLJIT_R0, 0);
798 }
799
800 #else
801
call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types,sljit_s32 * src_ptr)802 static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr)
803 {
804 sljit_s32 src = src_ptr ? (*src_ptr) : 0;
805 sljit_s32 arg_count = 0;
806 sljit_s32 word_arg_count = 0;
807 sljit_s32 float_arg_count = 0;
808 sljit_s32 types = 0;
809 sljit_s32 data_trandfer = 0;
810 static sljit_u8 word_arg_regs[5] = { 0, SLJIT_R3, SLJIT_R1, SLJIT_R2, TMP_REG1 };
811
812 SLJIT_ASSERT(reg_map[SLJIT_R3] == 1 && reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R2] == 8 && reg_map[TMP_REG1] == 9);
813 SLJIT_ASSERT(!(src & SLJIT_MEM));
814
815 arg_types >>= SLJIT_ARG_SHIFT;
816
817 while (arg_types) {
818 types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK);
819
820 switch (arg_types & SLJIT_ARG_MASK) {
821 case SLJIT_ARG_TYPE_F64:
822 case SLJIT_ARG_TYPE_F32:
823 arg_count++;
824 float_arg_count++;
825
826 if (arg_count != float_arg_count)
827 data_trandfer = 1;
828 break;
829 default:
830 arg_count++;
831 word_arg_count++;
832
833 if (arg_count != word_arg_count || arg_count != word_arg_regs[arg_count]) {
834 data_trandfer = 1;
835
836 if (src == word_arg_regs[arg_count]) {
837 EMIT_MOV(compiler, TMP_REG2, 0, src, 0);
838 *src_ptr = TMP_REG2;
839 }
840 }
841 break;
842 }
843
844 arg_types >>= SLJIT_ARG_SHIFT;
845 }
846
847 if (!data_trandfer)
848 return SLJIT_SUCCESS;
849
850 while (types) {
851 switch (types & SLJIT_ARG_MASK) {
852 case SLJIT_ARG_TYPE_F64:
853 if (arg_count != float_arg_count)
854 FAIL_IF(emit_sse2_load(compiler, 0, arg_count, float_arg_count, 0));
855 arg_count--;
856 float_arg_count--;
857 break;
858 case SLJIT_ARG_TYPE_F32:
859 if (arg_count != float_arg_count)
860 FAIL_IF(emit_sse2_load(compiler, 1, arg_count, float_arg_count, 0));
861 arg_count--;
862 float_arg_count--;
863 break;
864 default:
865 if (arg_count != word_arg_count || arg_count != word_arg_regs[arg_count])
866 EMIT_MOV(compiler, word_arg_regs[arg_count], 0, word_arg_count, 0);
867 arg_count--;
868 word_arg_count--;
869 break;
870 }
871
872 types >>= SLJIT_ARG_SHIFT;
873 }
874
875 return SLJIT_SUCCESS;
876 }
877
878 #endif
879
sljit_emit_call(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types)880 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
881 sljit_s32 arg_types)
882 {
883 CHECK_ERROR_PTR();
884 CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
885
886 compiler->mode32 = 0;
887
888 if ((type & 0xff) != SLJIT_CALL_REG_ARG)
889 PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL));
890
891 if (type & SLJIT_CALL_RETURN) {
892 PTR_FAIL_IF(emit_stack_frame_release(compiler, 0));
893 type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
894 }
895
896 SLJIT_SKIP_CHECKS(compiler);
897 return sljit_emit_jump(compiler, type);
898 }
899
sljit_emit_icall(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)900 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
901 sljit_s32 arg_types,
902 sljit_s32 src, sljit_sw srcw)
903 {
904 CHECK_ERROR();
905 CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
906
907 compiler->mode32 = 0;
908
909 if (src & SLJIT_MEM) {
910 ADJUST_LOCAL_OFFSET(src, srcw);
911 EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);
912 src = TMP_REG2;
913 }
914
915 if (type & SLJIT_CALL_RETURN) {
916 if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
917 EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);
918 src = TMP_REG2;
919 }
920
921 FAIL_IF(emit_stack_frame_release(compiler, 0));
922 }
923
924 if ((type & 0xff) != SLJIT_CALL_REG_ARG)
925 FAIL_IF(call_with_args(compiler, arg_types, &src));
926
927 if (type & SLJIT_CALL_RETURN)
928 type = SLJIT_JUMP;
929
930 SLJIT_SKIP_CHECKS(compiler);
931 return sljit_emit_ijump(compiler, type, src, srcw);
932 }
933
emit_fast_enter(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)934 static sljit_s32 emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
935 {
936 sljit_u8 *inst;
937
938 if (FAST_IS_REG(dst)) {
939 if (reg_map[dst] < 8)
940 return emit_byte(compiler, U8(POP_r + reg_lmap[dst]));
941
942 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
943 FAIL_IF(!inst);
944 INC_SIZE(2);
945 *inst++ = REX_B;
946 POP_REG(reg_lmap[dst]);
947 return SLJIT_SUCCESS;
948 }
949
950 /* REX_W is not necessary (src is not immediate). */
951 compiler->mode32 = 1;
952 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
953 FAIL_IF(!inst);
954 *inst = POP_rm;
955 return SLJIT_SUCCESS;
956 }
957
emit_fast_return(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)958 static sljit_s32 emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
959 {
960 sljit_u8 *inst;
961
962 if (FAST_IS_REG(src)) {
963 if (reg_map[src] < 8) {
964 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1);
965 FAIL_IF(!inst);
966
967 INC_SIZE(1 + 1);
968 PUSH_REG(reg_lmap[src]);
969 }
970 else {
971 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 1);
972 FAIL_IF(!inst);
973
974 INC_SIZE(2 + 1);
975 *inst++ = REX_B;
976 PUSH_REG(reg_lmap[src]);
977 }
978 }
979 else {
980 /* REX_W is not necessary (src is not immediate). */
981 compiler->mode32 = 1;
982 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
983 FAIL_IF(!inst);
984 inst[0] = GROUP_FF;
985 inst[1] |= PUSH_rm;
986
987 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
988 FAIL_IF(!inst);
989 INC_SIZE(1);
990 }
991
992 RET();
993 return SLJIT_SUCCESS;
994 }
995
sljit_emit_get_return_address(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)996 static sljit_s32 sljit_emit_get_return_address(struct sljit_compiler *compiler,
997 sljit_s32 dst, sljit_sw dstw)
998 {
999 sljit_s32 saved_regs_size;
1000
1001 compiler->mode32 = 0;
1002 saved_regs_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds - SLJIT_KEPT_SAVEDS_COUNT(compiler->options), 0);
1003 return emit_mov(compiler, dst, dstw, SLJIT_MEM1(SLJIT_SP), compiler->local_size + saved_regs_size);
1004 }
1005
1006 /* --------------------------------------------------------------------- */
1007 /* Other operations */
1008 /* --------------------------------------------------------------------- */
1009
sljit_emit_select(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_reg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2_reg)1010 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
1011 sljit_s32 dst_reg,
1012 sljit_s32 src1, sljit_sw src1w,
1013 sljit_s32 src2_reg)
1014 {
1015 CHECK_ERROR();
1016 CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
1017
1018 ADJUST_LOCAL_OFFSET(src1, src1w);
1019
1020 compiler->mode32 = type & SLJIT_32;
1021 type &= ~SLJIT_32;
1022
1023 if (dst_reg != src2_reg) {
1024 if (dst_reg == src1) {
1025 src1 = src2_reg;
1026 src1w = 0;
1027 type ^= 0x1;
1028 } else if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) {
1029 EMIT_MOV(compiler, dst_reg, 0, src1, src1w);
1030 src1 = src2_reg;
1031 src1w = 0;
1032 type ^= 0x1;
1033 } else
1034 EMIT_MOV(compiler, dst_reg, 0, src2_reg, 0);
1035 }
1036
1037 if (sljit_has_cpu_feature(SLJIT_HAS_CMOV)) {
1038 if (SLJIT_UNLIKELY(src1 == SLJIT_IMM)) {
1039 EMIT_MOV(compiler, TMP_REG2, 0, src1, src1w);
1040 src1 = TMP_REG2;
1041 src1w = 0;
1042 }
1043
1044 return emit_groupf(compiler, U8(get_jump_code((sljit_uw)type) - 0x40), dst_reg, src1, src1w);
1045 }
1046
1047 return emit_cmov_generic(compiler, type, dst_reg, src1, src1w);
1048 }
1049
sljit_emit_mem(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)1050 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
1051 sljit_s32 reg,
1052 sljit_s32 mem, sljit_sw memw)
1053 {
1054 sljit_u8* inst;
1055 sljit_s32 i, next, reg_idx;
1056 sljit_u8 regs[2];
1057
1058 CHECK_ERROR();
1059 CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
1060
1061 if (!(reg & REG_PAIR_MASK))
1062 return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
1063
1064 ADJUST_LOCAL_OFFSET(mem, memw);
1065
1066 compiler->mode32 = 0;
1067
1068 if ((mem & REG_MASK) == 0) {
1069 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, memw);
1070
1071 mem = SLJIT_MEM1(TMP_REG1);
1072 memw = 0;
1073 } else if (!(mem & OFFS_REG_MASK) && ((memw < HALFWORD_MIN) || (memw > HALFWORD_MAX - SSIZE_OF(sw)))) {
1074 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, memw);
1075
1076 mem = SLJIT_MEM2(mem & REG_MASK, TMP_REG1);
1077 memw = 0;
1078 }
1079
1080 regs[0] = U8(REG_PAIR_FIRST(reg));
1081 regs[1] = U8(REG_PAIR_SECOND(reg));
1082
1083 next = SSIZE_OF(sw);
1084
1085 if (!(type & SLJIT_MEM_STORE) && (regs[0] == (mem & REG_MASK) || regs[0] == OFFS_REG(mem))) {
1086 if (regs[1] == (mem & REG_MASK) || regs[1] == OFFS_REG(mem)) {
1087 /* Base and offset cannot be TMP_REG1. */
1088 EMIT_MOV(compiler, TMP_REG1, 0, OFFS_REG(mem), 0);
1089
1090 if (regs[1] == OFFS_REG(mem))
1091 next = -SSIZE_OF(sw);
1092
1093 mem = (mem & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG1);
1094 } else {
1095 next = -SSIZE_OF(sw);
1096
1097 if (!(mem & OFFS_REG_MASK))
1098 memw += SSIZE_OF(sw);
1099 }
1100 }
1101
1102 for (i = 0; i < 2; i++) {
1103 reg_idx = next > 0 ? i : (i ^ 0x1);
1104 reg = regs[reg_idx];
1105
1106 if ((mem & OFFS_REG_MASK) && (reg_idx == 1)) {
1107 inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(1 + 5));
1108 FAIL_IF(!inst);
1109
1110 INC_SIZE(5);
1111
1112 inst[0] = U8(REX_W | ((reg_map[reg] >= 8) ? REX_R : 0) | ((reg_map[mem & REG_MASK] >= 8) ? REX_B : 0) | ((reg_map[OFFS_REG(mem)] >= 8) ? REX_X : 0));
1113 inst[1] = (type & SLJIT_MEM_STORE) ? MOV_rm_r : MOV_r_rm;
1114 inst[2] = 0x44 | U8(reg_lmap[reg] << 3);
1115 inst[3] = U8(memw << 6) | U8(reg_lmap[OFFS_REG(mem)] << 3) | reg_lmap[mem & REG_MASK];
1116 inst[4] = sizeof(sljit_sw);
1117 } else if (type & SLJIT_MEM_STORE) {
1118 EMIT_MOV(compiler, mem, memw, reg, 0);
1119 } else {
1120 EMIT_MOV(compiler, reg, 0, mem, memw);
1121 }
1122
1123 if (!(mem & OFFS_REG_MASK))
1124 memw += next;
1125 }
1126
1127 return SLJIT_SUCCESS;
1128 }
1129
emit_mov_int(struct sljit_compiler * compiler,sljit_s32 sign,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1130 static sljit_s32 emit_mov_int(struct sljit_compiler *compiler, sljit_s32 sign,
1131 sljit_s32 dst, sljit_sw dstw,
1132 sljit_s32 src, sljit_sw srcw)
1133 {
1134 sljit_u8* inst;
1135 sljit_s32 dst_r;
1136
1137 compiler->mode32 = 0;
1138
1139 if (src == SLJIT_IMM) {
1140 if (FAST_IS_REG(dst)) {
1141 if (!sign || ((sljit_u32)srcw <= 0x7fffffff))
1142 return emit_do_imm32(compiler, reg_map[dst] <= 7 ? 0 : REX_B, U8(MOV_r_i32 | reg_lmap[dst]), srcw);
1143
1144 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw);
1145 FAIL_IF(!inst);
1146 *inst = MOV_rm_i32;
1147 return SLJIT_SUCCESS;
1148 }
1149 compiler->mode32 = 1;
1150 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw);
1151 FAIL_IF(!inst);
1152 *inst = MOV_rm_i32;
1153 compiler->mode32 = 0;
1154 return SLJIT_SUCCESS;
1155 }
1156
1157 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1158
1159 if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
1160 dst_r = src;
1161 else {
1162 if (sign) {
1163 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw);
1164 FAIL_IF(!inst);
1165 *inst = MOVSXD_r_rm;
1166 } else {
1167 compiler->mode32 = 1;
1168 EMIT_MOV(compiler, dst_r, 0, src, srcw);
1169 compiler->mode32 = 0;
1170 }
1171 }
1172
1173 if (dst & SLJIT_MEM) {
1174 compiler->mode32 = 1;
1175 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1176 FAIL_IF(!inst);
1177 *inst = MOV_rm_r;
1178 compiler->mode32 = 0;
1179 }
1180
1181 return SLJIT_SUCCESS;
1182 }
1183
sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1184 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
1185 sljit_s32 dst, sljit_sw dstw,
1186 sljit_s32 src, sljit_sw srcw)
1187 {
1188 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
1189 sljit_u8 *inst, *jump_inst1, *jump_inst2;
1190 sljit_uw size1, size2;
1191
1192 compiler->mode32 = 0;
1193
1194 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32) {
1195 if (src != SLJIT_IMM) {
1196 compiler->mode32 = 1;
1197 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1198 compiler->mode32 = 0;
1199 } else
1200 FAIL_IF(emit_do_imm32(compiler, reg_map[TMP_REG1] <= 7 ? 0 : REX_B, U8(MOV_r_i32 | reg_lmap[TMP_REG1]), srcw));
1201
1202 FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, TMP_REG1, 0));
1203
1204 compiler->mode32 = 1;
1205
1206 if (dst_r == TMP_FREG)
1207 return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
1208 return SLJIT_SUCCESS;
1209 }
1210
1211 if (!FAST_IS_REG(src)) {
1212 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1213 src = TMP_REG1;
1214 }
1215
1216 BINARY_IMM32(CMP, 0, src, 0);
1217
1218 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1219 FAIL_IF(!inst);
1220 INC_SIZE(2);
1221 inst[0] = JL_i8;
1222 jump_inst1 = inst;
1223
1224 size1 = compiler->size;
1225
1226 compiler->mode32 = 0;
1227 FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, src, 0));
1228
1229 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1230 FAIL_IF(!inst);
1231 INC_SIZE(2);
1232 inst[0] = JMP_i8;
1233 jump_inst2 = inst;
1234
1235 size2 = compiler->size;
1236
1237 jump_inst1[1] = U8(size2 - size1);
1238
1239 if (src != TMP_REG1)
1240 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
1241
1242 EMIT_MOV(compiler, TMP_REG2, 0, src, 0);
1243
1244 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 1, TMP_REG1, 0);
1245 FAIL_IF(!inst);
1246 inst[1] |= SHR;
1247
1248 compiler->mode32 = 1;
1249 BINARY_IMM32(AND, 1, TMP_REG2, 0);
1250
1251 compiler->mode32 = 0;
1252 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG2, 0);
1253 FAIL_IF(!inst);
1254 inst[0] = OR_r_rm;
1255
1256 FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, TMP_REG1, 0));
1257 compiler->mode32 = 1;
1258 FAIL_IF(emit_groupf(compiler, ADDSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, dst_r, 0));
1259
1260 jump_inst2[1] = U8(compiler->size - size2);
1261
1262 if (dst_r == TMP_FREG)
1263 return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
1264 return SLJIT_SUCCESS;
1265 }
1266
sljit_emit_fset(struct sljit_compiler * compiler,sljit_s32 freg,sljit_u8 rex,sljit_s32 is_zero)1267 static sljit_s32 sljit_emit_fset(struct sljit_compiler *compiler,
1268 sljit_s32 freg, sljit_u8 rex, sljit_s32 is_zero)
1269 {
1270 sljit_u8 *inst;
1271 sljit_u32 size;
1272
1273 if (is_zero) {
1274 rex = freg_map[freg] >= 8 ? (REX_R | REX_B) : 0;
1275 } else {
1276 if (freg_map[freg] >= 8)
1277 rex |= REX_R;
1278 if (reg_map[TMP_REG1] >= 8)
1279 rex |= REX_B;
1280 }
1281
1282 size = (rex != 0) ? 5 : 4;
1283
1284 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
1285 FAIL_IF(!inst);
1286 INC_SIZE(size);
1287
1288 *inst++ = GROUP_66;
1289 if (rex != 0)
1290 *inst++ = rex;
1291 inst[0] = GROUP_0F;
1292
1293 if (is_zero) {
1294 inst[1] = PXOR_x_xm;
1295 inst[2] = U8(freg_lmap[freg] | (freg_lmap[freg] << 3) | MOD_REG);
1296 } else {
1297 inst[1] = MOVD_x_rm;
1298 inst[2] = U8(reg_lmap[TMP_REG1] | (freg_lmap[freg] << 3) | MOD_REG);
1299 }
1300
1301 return SLJIT_SUCCESS;
1302 }
1303
sljit_emit_fset32(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f32 value)1304 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
1305 sljit_s32 freg, sljit_f32 value)
1306 {
1307 union {
1308 sljit_s32 imm;
1309 sljit_f32 value;
1310 } u;
1311
1312 CHECK_ERROR();
1313 CHECK(check_sljit_emit_fset32(compiler, freg, value));
1314
1315 u.value = value;
1316
1317 if (u.imm != 0) {
1318 compiler->mode32 = 1;
1319 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm);
1320 }
1321
1322 return sljit_emit_fset(compiler, freg, 0, u.imm == 0);
1323 }
1324
sljit_emit_fset64(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f64 value)1325 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
1326 sljit_s32 freg, sljit_f64 value)
1327 {
1328 union {
1329 sljit_sw imm;
1330 sljit_f64 value;
1331 } u;
1332
1333 CHECK_ERROR();
1334 CHECK(check_sljit_emit_fset64(compiler, freg, value));
1335
1336 u.value = value;
1337
1338 if (u.imm != 0) {
1339 compiler->mode32 = 0;
1340 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm);
1341 }
1342
1343 return sljit_emit_fset(compiler, freg, REX_W, u.imm == 0);
1344 }
1345
sljit_emit_fcopy(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 freg,sljit_s32 reg)1346 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
1347 sljit_s32 freg, sljit_s32 reg)
1348 {
1349 sljit_u8 *inst;
1350 sljit_u32 size;
1351 sljit_u8 rex = 0;
1352
1353 CHECK_ERROR();
1354 CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
1355
1356 if (!(op & SLJIT_32))
1357 rex = REX_W;
1358
1359 if (freg_map[freg] >= 8)
1360 rex |= REX_R;
1361
1362 if (reg_map[reg] >= 8)
1363 rex |= REX_B;
1364
1365 size = (rex != 0) ? 5 : 4;
1366
1367 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
1368 FAIL_IF(!inst);
1369 INC_SIZE(size);
1370
1371 *inst++ = GROUP_66;
1372 if (rex != 0)
1373 *inst++ = rex;
1374 inst[0] = GROUP_0F;
1375 inst[1] = GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? MOVD_x_rm : MOVD_rm_x;
1376 inst[2] = U8(reg_lmap[reg] | (freg_lmap[freg] << 3) | MOD_REG);
1377
1378 return SLJIT_SUCCESS;
1379 }
1380
skip_frames_before_return(struct sljit_compiler * compiler)1381 static sljit_s32 skip_frames_before_return(struct sljit_compiler *compiler)
1382 {
1383 sljit_s32 tmp, size;
1384
1385 /* Don't adjust shadow stack if it isn't enabled. */
1386 if (!cpu_has_shadow_stack())
1387 return SLJIT_SUCCESS;
1388
1389 size = compiler->local_size;
1390 tmp = compiler->scratches;
1391 if (tmp >= SLJIT_FIRST_SAVED_REG)
1392 size += (tmp - SLJIT_FIRST_SAVED_REG + 1) * SSIZE_OF(sw);
1393 tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
1394 if (SLJIT_S0 >= tmp)
1395 size += (SLJIT_S0 - tmp + 1) * SSIZE_OF(sw);
1396
1397 return adjust_shadow_stack(compiler, SLJIT_MEM1(SLJIT_SP), size);
1398 }
1399