1 /*
2 * Stack-less Just-In-Time compiler
3 *
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 /* x86 64-bit arch dependent functions. */
28
emit_load_imm64(struct sljit_compiler * compiler,sljit_s32 reg,sljit_sw imm)29 static sljit_s32 emit_load_imm64(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm)
30 {
31 sljit_u8 *inst;
32
33 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_sw));
34 FAIL_IF(!inst);
35 INC_SIZE(2 + sizeof(sljit_sw));
36 *inst++ = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B);
37 *inst++ = MOV_r_i32 + (reg_map[reg] & 0x7);
38 sljit_unaligned_store_sw(inst, imm);
39 return SLJIT_SUCCESS;
40 }
41
generate_far_jump_code(struct sljit_jump * jump,sljit_u8 * code_ptr,sljit_s32 type)42 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type)
43 {
44 if (type < SLJIT_JUMP) {
45 /* Invert type. */
46 *code_ptr++ = get_jump_code(type ^ 0x1) - 0x10;
47 *code_ptr++ = 10 + 3;
48 }
49
50 *code_ptr++ = REX_W | ((reg_map[TMP_REG2] <= 7) ? 0 : REX_B);
51 *code_ptr++ = MOV_r_i32 | reg_lmap[TMP_REG2];
52 jump->addr = (sljit_uw)code_ptr;
53
54 if (jump->flags & JUMP_LABEL)
55 jump->flags |= PATCH_MD;
56 else
57 sljit_unaligned_store_sw(code_ptr, jump->u.target);
58
59 code_ptr += sizeof(sljit_sw);
60 if (reg_map[TMP_REG2] >= 8)
61 *code_ptr++ = REX_B;
62 *code_ptr++ = GROUP_FF;
63 *code_ptr++ = MOD_REG | (type >= SLJIT_FAST_CALL ? CALL_rm : JMP_rm) | reg_lmap[TMP_REG2];
64
65 return code_ptr;
66 }
67
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 args,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)68 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
69 sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
70 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
71 {
72 sljit_s32 i, tmp, size, saved_register_size;
73 sljit_u8 *inst;
74
75 CHECK_ERROR();
76 CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
77 set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
78
79 #ifdef _WIN64
80 /* Two/four register slots for parameters plus space for xmm6 register if needed. */
81 if (fscratches >= 6 || fsaveds >= 1)
82 compiler->locals_offset = 6 * sizeof(sljit_sw);
83 else
84 compiler->locals_offset = ((scratches > 2) ? 4 : 2) * sizeof(sljit_sw);
85 #endif
86
87 /* Including the return address saved by the call instruction. */
88 saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
89
90 tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
91 for (i = SLJIT_S0; i >= tmp; i--) {
92 size = reg_map[i] >= 8 ? 2 : 1;
93 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
94 FAIL_IF(!inst);
95 INC_SIZE(size);
96 if (reg_map[i] >= 8)
97 *inst++ = REX_B;
98 PUSH_REG(reg_lmap[i]);
99 }
100
101 for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
102 size = reg_map[i] >= 8 ? 2 : 1;
103 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
104 FAIL_IF(!inst);
105 INC_SIZE(size);
106 if (reg_map[i] >= 8)
107 *inst++ = REX_B;
108 PUSH_REG(reg_lmap[i]);
109 }
110
111 if (args > 0) {
112 size = args * 3;
113 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
114 FAIL_IF(!inst);
115
116 INC_SIZE(size);
117
118 #ifndef _WIN64
119 if (args > 0) {
120 *inst++ = REX_W;
121 *inst++ = MOV_r_rm;
122 *inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x7 /* rdi */;
123 }
124 if (args > 1) {
125 *inst++ = REX_W | REX_R;
126 *inst++ = MOV_r_rm;
127 *inst++ = MOD_REG | (reg_lmap[SLJIT_S1] << 3) | 0x6 /* rsi */;
128 }
129 if (args > 2) {
130 *inst++ = REX_W | REX_R;
131 *inst++ = MOV_r_rm;
132 *inst++ = MOD_REG | (reg_lmap[SLJIT_S2] << 3) | 0x2 /* rdx */;
133 }
134 #else
135 if (args > 0) {
136 *inst++ = REX_W;
137 *inst++ = MOV_r_rm;
138 *inst++ = MOD_REG | (reg_map[SLJIT_S0] << 3) | 0x1 /* rcx */;
139 }
140 if (args > 1) {
141 *inst++ = REX_W;
142 *inst++ = MOV_r_rm;
143 *inst++ = MOD_REG | (reg_map[SLJIT_S1] << 3) | 0x2 /* rdx */;
144 }
145 if (args > 2) {
146 *inst++ = REX_W | REX_B;
147 *inst++ = MOV_r_rm;
148 *inst++ = MOD_REG | (reg_map[SLJIT_S2] << 3) | 0x0 /* r8 */;
149 }
150 #endif
151 }
152
153 local_size = ((local_size + SLJIT_LOCALS_OFFSET + saved_register_size + 15) & ~15) - saved_register_size;
154 compiler->local_size = local_size;
155
156 #ifdef _WIN64
157 if (local_size > 1024) {
158 /* Allocate stack for the callback, which grows the stack. */
159 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + (3 + sizeof(sljit_s32)));
160 FAIL_IF(!inst);
161 INC_SIZE(4 + (3 + sizeof(sljit_s32)));
162 *inst++ = REX_W;
163 *inst++ = GROUP_BINARY_83;
164 *inst++ = MOD_REG | SUB | reg_map[SLJIT_SP];
165 /* Allocated size for registers must be divisible by 8. */
166 SLJIT_ASSERT(!(saved_register_size & 0x7));
167 /* Aligned to 16 byte. */
168 if (saved_register_size & 0x8) {
169 *inst++ = 5 * sizeof(sljit_sw);
170 local_size -= 5 * sizeof(sljit_sw);
171 } else {
172 *inst++ = 4 * sizeof(sljit_sw);
173 local_size -= 4 * sizeof(sljit_sw);
174 }
175 /* Second instruction */
176 SLJIT_ASSERT(reg_map[SLJIT_R0] < 8);
177 *inst++ = REX_W;
178 *inst++ = MOV_rm_i32;
179 *inst++ = MOD_REG | reg_lmap[SLJIT_R0];
180 sljit_unaligned_store_s32(inst, local_size);
181 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
182 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
183 compiler->skip_checks = 1;
184 #endif
185 FAIL_IF(sljit_emit_ijump(compiler, SLJIT_CALL1, SLJIT_IMM, SLJIT_FUNC_OFFSET(sljit_grow_stack)));
186 }
187 #endif
188
189 if (local_size > 0) {
190 if (local_size <= 127) {
191 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
192 FAIL_IF(!inst);
193 INC_SIZE(4);
194 *inst++ = REX_W;
195 *inst++ = GROUP_BINARY_83;
196 *inst++ = MOD_REG | SUB | reg_map[SLJIT_SP];
197 *inst++ = local_size;
198 }
199 else {
200 inst = (sljit_u8*)ensure_buf(compiler, 1 + 7);
201 FAIL_IF(!inst);
202 INC_SIZE(7);
203 *inst++ = REX_W;
204 *inst++ = GROUP_BINARY_81;
205 *inst++ = MOD_REG | SUB | reg_map[SLJIT_SP];
206 sljit_unaligned_store_s32(inst, local_size);
207 inst += sizeof(sljit_s32);
208 }
209 }
210
211 #ifdef _WIN64
212 /* Save xmm6 register: movaps [rsp + 0x20], xmm6 */
213 if (fscratches >= 6 || fsaveds >= 1) {
214 inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
215 FAIL_IF(!inst);
216 INC_SIZE(5);
217 *inst++ = GROUP_0F;
218 sljit_unaligned_store_s32(inst, 0x20247429);
219 }
220 #endif
221
222 return SLJIT_SUCCESS;
223 }
224
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 args,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)225 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
226 sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
227 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
228 {
229 sljit_s32 saved_register_size;
230
231 CHECK_ERROR();
232 CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
233 set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
234
235 #ifdef _WIN64
236 /* Two/four register slots for parameters plus space for xmm6 register if needed. */
237 if (fscratches >= 6 || fsaveds >= 1)
238 compiler->locals_offset = 6 * sizeof(sljit_sw);
239 else
240 compiler->locals_offset = ((scratches > 2) ? 4 : 2) * sizeof(sljit_sw);
241 #endif
242
243 /* Including the return address saved by the call instruction. */
244 saved_register_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
245 compiler->local_size = ((local_size + SLJIT_LOCALS_OFFSET + saved_register_size + 15) & ~15) - saved_register_size;
246 return SLJIT_SUCCESS;
247 }
248
sljit_emit_return(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)249 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
250 {
251 sljit_s32 i, tmp, size;
252 sljit_u8 *inst;
253
254 CHECK_ERROR();
255 CHECK(check_sljit_emit_return(compiler, op, src, srcw));
256
257 FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
258
259 #ifdef _WIN64
260 /* Restore xmm6 register: movaps xmm6, [rsp + 0x20] */
261 if (compiler->fscratches >= 6 || compiler->fsaveds >= 1) {
262 inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
263 FAIL_IF(!inst);
264 INC_SIZE(5);
265 *inst++ = GROUP_0F;
266 sljit_unaligned_store_s32(inst, 0x20247428);
267 }
268 #endif
269
270 if (compiler->local_size > 0) {
271 if (compiler->local_size <= 127) {
272 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
273 FAIL_IF(!inst);
274 INC_SIZE(4);
275 *inst++ = REX_W;
276 *inst++ = GROUP_BINARY_83;
277 *inst++ = MOD_REG | ADD | 4;
278 *inst = compiler->local_size;
279 }
280 else {
281 inst = (sljit_u8*)ensure_buf(compiler, 1 + 7);
282 FAIL_IF(!inst);
283 INC_SIZE(7);
284 *inst++ = REX_W;
285 *inst++ = GROUP_BINARY_81;
286 *inst++ = MOD_REG | ADD | 4;
287 sljit_unaligned_store_s32(inst, compiler->local_size);
288 }
289 }
290
291 tmp = compiler->scratches;
292 for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) {
293 size = reg_map[i] >= 8 ? 2 : 1;
294 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
295 FAIL_IF(!inst);
296 INC_SIZE(size);
297 if (reg_map[i] >= 8)
298 *inst++ = REX_B;
299 POP_REG(reg_lmap[i]);
300 }
301
302 tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
303 for (i = tmp; i <= SLJIT_S0; i++) {
304 size = reg_map[i] >= 8 ? 2 : 1;
305 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
306 FAIL_IF(!inst);
307 INC_SIZE(size);
308 if (reg_map[i] >= 8)
309 *inst++ = REX_B;
310 POP_REG(reg_lmap[i]);
311 }
312
313 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
314 FAIL_IF(!inst);
315 INC_SIZE(1);
316 RET();
317 return SLJIT_SUCCESS;
318 }
319
320 /* --------------------------------------------------------------------- */
321 /* Operators */
322 /* --------------------------------------------------------------------- */
323
emit_do_imm32(struct sljit_compiler * compiler,sljit_u8 rex,sljit_u8 opcode,sljit_sw imm)324 static sljit_s32 emit_do_imm32(struct sljit_compiler *compiler, sljit_u8 rex, sljit_u8 opcode, sljit_sw imm)
325 {
326 sljit_u8 *inst;
327 sljit_s32 length = 1 + (rex ? 1 : 0) + sizeof(sljit_s32);
328
329 inst = (sljit_u8*)ensure_buf(compiler, 1 + length);
330 FAIL_IF(!inst);
331 INC_SIZE(length);
332 if (rex)
333 *inst++ = rex;
334 *inst++ = opcode;
335 sljit_unaligned_store_s32(inst, imm);
336 return SLJIT_SUCCESS;
337 }
338
emit_x86_instruction(struct sljit_compiler * compiler,sljit_s32 size,sljit_s32 a,sljit_sw imma,sljit_s32 b,sljit_sw immb)339 static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_s32 size,
340 /* The register or immediate operand. */
341 sljit_s32 a, sljit_sw imma,
342 /* The general operand (not immediate). */
343 sljit_s32 b, sljit_sw immb)
344 {
345 sljit_u8 *inst;
346 sljit_u8 *buf_ptr;
347 sljit_u8 rex = 0;
348 sljit_s32 flags = size & ~0xf;
349 sljit_s32 inst_size;
350
351 /* The immediate operand must be 32 bit. */
352 SLJIT_ASSERT(!(a & SLJIT_IMM) || compiler->mode32 || IS_HALFWORD(imma));
353 /* Both cannot be switched on. */
354 SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
355 /* Size flags not allowed for typed instructions. */
356 SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
357 /* Both size flags cannot be switched on. */
358 SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
359 /* SSE2 and immediate is not possible. */
360 SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
361 SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
362 && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
363 && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
364
365 size &= 0xf;
366 inst_size = size;
367
368 if (!compiler->mode32 && !(flags & EX86_NO_REXW))
369 rex |= REX_W;
370 else if (flags & EX86_REX)
371 rex |= REX;
372
373 if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
374 inst_size++;
375 if (flags & EX86_PREF_66)
376 inst_size++;
377
378 /* Calculate size of b. */
379 inst_size += 1; /* mod r/m byte. */
380 if (b & SLJIT_MEM) {
381 if (!(b & OFFS_REG_MASK)) {
382 if (NOT_HALFWORD(immb)) {
383 PTR_FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immb));
384 immb = 0;
385 if (b & REG_MASK)
386 b |= TO_OFFS_REG(TMP_REG2);
387 else
388 b |= TMP_REG2;
389 }
390 else if (reg_lmap[b & REG_MASK] == 4)
391 b |= TO_OFFS_REG(SLJIT_SP);
392 }
393
394 if ((b & REG_MASK) == SLJIT_UNUSED)
395 inst_size += 1 + sizeof(sljit_s32); /* SIB byte required to avoid RIP based addressing. */
396 else {
397 if (reg_map[b & REG_MASK] >= 8)
398 rex |= REX_B;
399
400 if (immb != 0 && (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP))) {
401 /* Immediate operand. */
402 if (immb <= 127 && immb >= -128)
403 inst_size += sizeof(sljit_s8);
404 else
405 inst_size += sizeof(sljit_s32);
406 }
407 else if (reg_lmap[b & REG_MASK] == 5)
408 inst_size += sizeof(sljit_s8);
409
410 if ((b & OFFS_REG_MASK) != SLJIT_UNUSED) {
411 inst_size += 1; /* SIB byte. */
412 if (reg_map[OFFS_REG(b)] >= 8)
413 rex |= REX_X;
414 }
415 }
416 }
417 else if (!(flags & EX86_SSE2_OP2) && reg_map[b] >= 8)
418 rex |= REX_B;
419
420 if (a & SLJIT_IMM) {
421 if (flags & EX86_BIN_INS) {
422 if (imma <= 127 && imma >= -128) {
423 inst_size += 1;
424 flags |= EX86_BYTE_ARG;
425 } else
426 inst_size += 4;
427 }
428 else if (flags & EX86_SHIFT_INS) {
429 imma &= compiler->mode32 ? 0x1f : 0x3f;
430 if (imma != 1) {
431 inst_size ++;
432 flags |= EX86_BYTE_ARG;
433 }
434 } else if (flags & EX86_BYTE_ARG)
435 inst_size++;
436 else if (flags & EX86_HALF_ARG)
437 inst_size += sizeof(short);
438 else
439 inst_size += sizeof(sljit_s32);
440 }
441 else {
442 SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
443 /* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */
444 if (!(flags & EX86_SSE2_OP1) && reg_map[a] >= 8)
445 rex |= REX_R;
446 }
447
448 if (rex)
449 inst_size++;
450
451 inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size);
452 PTR_FAIL_IF(!inst);
453
454 /* Encoding the byte. */
455 INC_SIZE(inst_size);
456 if (flags & EX86_PREF_F2)
457 *inst++ = 0xf2;
458 if (flags & EX86_PREF_F3)
459 *inst++ = 0xf3;
460 if (flags & EX86_PREF_66)
461 *inst++ = 0x66;
462 if (rex)
463 *inst++ = rex;
464 buf_ptr = inst + size;
465
466 /* Encode mod/rm byte. */
467 if (!(flags & EX86_SHIFT_INS)) {
468 if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
469 *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
470
471 if ((a & SLJIT_IMM) || (a == 0))
472 *buf_ptr = 0;
473 else if (!(flags & EX86_SSE2_OP1))
474 *buf_ptr = reg_lmap[a] << 3;
475 else
476 *buf_ptr = a << 3;
477 }
478 else {
479 if (a & SLJIT_IMM) {
480 if (imma == 1)
481 *inst = GROUP_SHIFT_1;
482 else
483 *inst = GROUP_SHIFT_N;
484 } else
485 *inst = GROUP_SHIFT_CL;
486 *buf_ptr = 0;
487 }
488
489 if (!(b & SLJIT_MEM))
490 *buf_ptr++ |= MOD_REG + ((!(flags & EX86_SSE2_OP2)) ? reg_lmap[b] : b);
491 else if ((b & REG_MASK) != SLJIT_UNUSED) {
492 if ((b & OFFS_REG_MASK) == SLJIT_UNUSED || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) {
493 if (immb != 0 || reg_lmap[b & REG_MASK] == 5) {
494 if (immb <= 127 && immb >= -128)
495 *buf_ptr |= 0x40;
496 else
497 *buf_ptr |= 0x80;
498 }
499
500 if ((b & OFFS_REG_MASK) == SLJIT_UNUSED)
501 *buf_ptr++ |= reg_lmap[b & REG_MASK];
502 else {
503 *buf_ptr++ |= 0x04;
504 *buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3);
505 }
506
507 if (immb != 0 || reg_lmap[b & REG_MASK] == 5) {
508 if (immb <= 127 && immb >= -128)
509 *buf_ptr++ = immb; /* 8 bit displacement. */
510 else {
511 sljit_unaligned_store_s32(buf_ptr, immb); /* 32 bit displacement. */
512 buf_ptr += sizeof(sljit_s32);
513 }
514 }
515 }
516 else {
517 if (reg_lmap[b & REG_MASK] == 5)
518 *buf_ptr |= 0x40;
519 *buf_ptr++ |= 0x04;
520 *buf_ptr++ = reg_lmap[b & REG_MASK] | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6);
521 if (reg_lmap[b & REG_MASK] == 5)
522 *buf_ptr++ = 0;
523 }
524 }
525 else {
526 *buf_ptr++ |= 0x04;
527 *buf_ptr++ = 0x25;
528 sljit_unaligned_store_s32(buf_ptr, immb); /* 32 bit displacement. */
529 buf_ptr += sizeof(sljit_s32);
530 }
531
532 if (a & SLJIT_IMM) {
533 if (flags & EX86_BYTE_ARG)
534 *buf_ptr = imma;
535 else if (flags & EX86_HALF_ARG)
536 sljit_unaligned_store_s16(buf_ptr, imma);
537 else if (!(flags & EX86_SHIFT_INS))
538 sljit_unaligned_store_s32(buf_ptr, imma);
539 }
540
541 return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
542 }
543
544 /* --------------------------------------------------------------------- */
545 /* Call / return instructions */
546 /* --------------------------------------------------------------------- */
547
call_with_args(struct sljit_compiler * compiler,sljit_s32 type)548 static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 type)
549 {
550 sljit_u8 *inst;
551
552 /* After any change update IS_REG_CHANGED_BY_CALL as well. */
553 #ifndef _WIN64
554 SLJIT_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8 && reg_map[TMP_REG1] == 2);
555
556 inst = (sljit_u8*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6));
557 FAIL_IF(!inst);
558 INC_SIZE((type < SLJIT_CALL3) ? 3 : 6);
559 if (type >= SLJIT_CALL3) {
560 /* Move third argument to TMP_REG1. */
561 *inst++ = REX_W;
562 *inst++ = MOV_r_rm;
563 *inst++ = MOD_REG | (0x2 /* rdx */ << 3) | reg_lmap[SLJIT_R2];
564 }
565 *inst++ = REX_W;
566 *inst++ = MOV_r_rm;
567 *inst++ = MOD_REG | (0x7 /* rdi */ << 3) | reg_lmap[SLJIT_R0];
568 #else
569 SLJIT_ASSERT(reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R0] < 8 && reg_map[SLJIT_R2] < 8 && reg_map[TMP_REG1] == 8);
570
571 inst = (sljit_u8*)ensure_buf(compiler, 1 + ((type < SLJIT_CALL3) ? 3 : 6));
572 FAIL_IF(!inst);
573 INC_SIZE((type < SLJIT_CALL3) ? 3 : 6);
574 if (type >= SLJIT_CALL3) {
575 /* Move third argument to TMP_REG1. */
576 *inst++ = REX_W | REX_R;
577 *inst++ = MOV_r_rm;
578 *inst++ = MOD_REG | (0x0 /* r8 */ << 3) | reg_lmap[SLJIT_R2];
579 }
580 *inst++ = REX_W;
581 *inst++ = MOV_r_rm;
582 *inst++ = MOD_REG | (0x1 /* rcx */ << 3) | reg_lmap[SLJIT_R0];
583 #endif
584 return SLJIT_SUCCESS;
585 }
586
sljit_emit_fast_enter(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)587 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
588 {
589 sljit_u8 *inst;
590
591 CHECK_ERROR();
592 CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
593 ADJUST_LOCAL_OFFSET(dst, dstw);
594
595 /* For UNUSED dst. Uncommon, but possible. */
596 if (dst == SLJIT_UNUSED)
597 dst = TMP_REG1;
598
599 if (FAST_IS_REG(dst)) {
600 if (reg_map[dst] < 8) {
601 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
602 FAIL_IF(!inst);
603 INC_SIZE(1);
604 POP_REG(reg_lmap[dst]);
605 return SLJIT_SUCCESS;
606 }
607
608 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
609 FAIL_IF(!inst);
610 INC_SIZE(2);
611 *inst++ = REX_B;
612 POP_REG(reg_lmap[dst]);
613 return SLJIT_SUCCESS;
614 }
615
616 /* REX_W is not necessary (src is not immediate). */
617 compiler->mode32 = 1;
618 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
619 FAIL_IF(!inst);
620 *inst++ = POP_rm;
621 return SLJIT_SUCCESS;
622 }
623
sljit_emit_fast_return(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)624 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
625 {
626 sljit_u8 *inst;
627
628 CHECK_ERROR();
629 CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
630 ADJUST_LOCAL_OFFSET(src, srcw);
631
632 if ((src & SLJIT_IMM) && NOT_HALFWORD(srcw)) {
633 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));
634 src = TMP_REG1;
635 }
636
637 if (FAST_IS_REG(src)) {
638 if (reg_map[src] < 8) {
639 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1);
640 FAIL_IF(!inst);
641
642 INC_SIZE(1 + 1);
643 PUSH_REG(reg_lmap[src]);
644 }
645 else {
646 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 1);
647 FAIL_IF(!inst);
648
649 INC_SIZE(2 + 1);
650 *inst++ = REX_B;
651 PUSH_REG(reg_lmap[src]);
652 }
653 }
654 else if (src & SLJIT_MEM) {
655 /* REX_W is not necessary (src is not immediate). */
656 compiler->mode32 = 1;
657 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
658 FAIL_IF(!inst);
659 *inst++ = GROUP_FF;
660 *inst |= PUSH_rm;
661
662 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
663 FAIL_IF(!inst);
664 INC_SIZE(1);
665 }
666 else {
667 SLJIT_ASSERT(IS_HALFWORD(srcw));
668 /* SLJIT_IMM. */
669 inst = (sljit_u8*)ensure_buf(compiler, 1 + 5 + 1);
670 FAIL_IF(!inst);
671
672 INC_SIZE(5 + 1);
673 *inst++ = PUSH_i32;
674 sljit_unaligned_store_s32(inst, srcw);
675 inst += sizeof(sljit_s32);
676 }
677
678 RET();
679 return SLJIT_SUCCESS;
680 }
681
682
683 /* --------------------------------------------------------------------- */
684 /* Extend input */
685 /* --------------------------------------------------------------------- */
686
emit_mov_int(struct sljit_compiler * compiler,sljit_s32 sign,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)687 static sljit_s32 emit_mov_int(struct sljit_compiler *compiler, sljit_s32 sign,
688 sljit_s32 dst, sljit_sw dstw,
689 sljit_s32 src, sljit_sw srcw)
690 {
691 sljit_u8* inst;
692 sljit_s32 dst_r;
693
694 compiler->mode32 = 0;
695
696 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
697 return SLJIT_SUCCESS; /* Empty instruction. */
698
699 if (src & SLJIT_IMM) {
700 if (FAST_IS_REG(dst)) {
701 if (sign || ((sljit_uw)srcw <= 0x7fffffff)) {
702 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw);
703 FAIL_IF(!inst);
704 *inst = MOV_rm_i32;
705 return SLJIT_SUCCESS;
706 }
707 return emit_load_imm64(compiler, dst, srcw);
708 }
709 compiler->mode32 = 1;
710 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw);
711 FAIL_IF(!inst);
712 *inst = MOV_rm_i32;
713 compiler->mode32 = 0;
714 return SLJIT_SUCCESS;
715 }
716
717 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
718
719 if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
720 dst_r = src;
721 else {
722 if (sign) {
723 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw);
724 FAIL_IF(!inst);
725 *inst++ = MOVSXD_r_rm;
726 } else {
727 compiler->mode32 = 1;
728 FAIL_IF(emit_mov(compiler, dst_r, 0, src, srcw));
729 compiler->mode32 = 0;
730 }
731 }
732
733 if (dst & SLJIT_MEM) {
734 compiler->mode32 = 1;
735 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
736 FAIL_IF(!inst);
737 *inst = MOV_rm_r;
738 compiler->mode32 = 0;
739 }
740
741 return SLJIT_SUCCESS;
742 }
743