1 /*
2 * Stack-less Just-In-Time compiler
3 *
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 /* x86 32-bit arch dependent functions. */
28
29 /* --------------------------------------------------------------------- */
30 /* Operators */
31 /* --------------------------------------------------------------------- */
32
emit_do_imm(struct sljit_compiler * compiler,sljit_u8 opcode,sljit_sw imm)33 static sljit_s32 emit_do_imm(struct sljit_compiler *compiler, sljit_u8 opcode, sljit_sw imm)
34 {
35 sljit_u8 *inst;
36
37 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + sizeof(sljit_sw));
38 FAIL_IF(!inst);
39 INC_SIZE(1 + sizeof(sljit_sw));
40 *inst++ = opcode;
41 sljit_unaligned_store_sw(inst, imm);
42 return SLJIT_SUCCESS;
43 }
44
45 /* Size contains the flags as well. */
emit_x86_instruction(struct sljit_compiler * compiler,sljit_uw size,sljit_s32 a,sljit_sw imma,sljit_s32 b,sljit_sw immb)46 static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw size,
47 /* The register or immediate operand. */
48 sljit_s32 a, sljit_sw imma,
49 /* The general operand (not immediate). */
50 sljit_s32 b, sljit_sw immb)
51 {
52 sljit_u8 *inst;
53 sljit_u8 *buf_ptr;
54 sljit_u8 reg_map_b;
55 sljit_uw flags = size;
56 sljit_uw inst_size;
57
58 /* Both cannot be switched on. */
59 SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
60 /* Size flags not allowed for typed instructions. */
61 SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
62 /* Both size flags cannot be switched on. */
63 SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
64 /* SSE2 and immediate is not possible. */
65 SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
66 SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
67 && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
68 && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
69
70 size &= 0xf;
71 inst_size = size;
72
73 if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
74 inst_size++;
75 if (flags & EX86_PREF_66)
76 inst_size++;
77
78 /* Calculate size of b. */
79 inst_size += 1; /* mod r/m byte. */
80 if (b & SLJIT_MEM) {
81 if (!(b & REG_MASK))
82 inst_size += sizeof(sljit_sw);
83 else {
84 if (immb != 0 && !(b & OFFS_REG_MASK)) {
85 /* Immediate operand. */
86 if (immb <= 127 && immb >= -128)
87 inst_size += sizeof(sljit_s8);
88 else
89 inst_size += sizeof(sljit_sw);
90 }
91 else if (reg_map[b & REG_MASK] == 5) {
92 /* Swap registers if possible. */
93 if ((b & OFFS_REG_MASK) && (immb & 0x3) == 0 && reg_map[OFFS_REG(b)] != 5)
94 b = SLJIT_MEM | OFFS_REG(b) | TO_OFFS_REG(b & REG_MASK);
95 else
96 inst_size += sizeof(sljit_s8);
97 }
98
99 if (reg_map[b & REG_MASK] == 4 && !(b & OFFS_REG_MASK))
100 b |= TO_OFFS_REG(SLJIT_SP);
101
102 if (b & OFFS_REG_MASK)
103 inst_size += 1; /* SIB byte. */
104 }
105 }
106
107 /* Calculate size of a. */
108 if (a & SLJIT_IMM) {
109 if (flags & EX86_BIN_INS) {
110 if (imma <= 127 && imma >= -128) {
111 inst_size += 1;
112 flags |= EX86_BYTE_ARG;
113 } else
114 inst_size += 4;
115 }
116 else if (flags & EX86_SHIFT_INS) {
117 SLJIT_ASSERT(imma <= 0x1f);
118 if (imma != 1) {
119 inst_size++;
120 flags |= EX86_BYTE_ARG;
121 }
122 } else if (flags & EX86_BYTE_ARG)
123 inst_size++;
124 else if (flags & EX86_HALF_ARG)
125 inst_size += sizeof(short);
126 else
127 inst_size += sizeof(sljit_sw);
128 }
129 else
130 SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
131
132 inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size);
133 PTR_FAIL_IF(!inst);
134
135 /* Encoding the byte. */
136 INC_SIZE(inst_size);
137 if (flags & EX86_PREF_F2)
138 *inst++ = 0xf2;
139 if (flags & EX86_PREF_F3)
140 *inst++ = 0xf3;
141 if (flags & EX86_PREF_66)
142 *inst++ = 0x66;
143
144 buf_ptr = inst + size;
145
146 /* Encode mod/rm byte. */
147 if (!(flags & EX86_SHIFT_INS)) {
148 if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
149 *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
150
151 if (a & SLJIT_IMM)
152 *buf_ptr = 0;
153 else if (!(flags & EX86_SSE2_OP1))
154 *buf_ptr = U8(reg_map[a] << 3);
155 else
156 *buf_ptr = U8(a << 3);
157 }
158 else {
159 if (a & SLJIT_IMM) {
160 if (imma == 1)
161 *inst = GROUP_SHIFT_1;
162 else
163 *inst = GROUP_SHIFT_N;
164 } else
165 *inst = GROUP_SHIFT_CL;
166 *buf_ptr = 0;
167 }
168
169 if (!(b & SLJIT_MEM)) {
170 *buf_ptr = U8(*buf_ptr | MOD_REG | (!(flags & EX86_SSE2_OP2) ? reg_map[b] : b));
171 buf_ptr++;
172 } else if (b & REG_MASK) {
173 reg_map_b = reg_map[b & REG_MASK];
174
175 if (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) {
176 if (immb != 0 || reg_map_b == 5) {
177 if (immb <= 127 && immb >= -128)
178 *buf_ptr |= 0x40;
179 else
180 *buf_ptr |= 0x80;
181 }
182
183 if (!(b & OFFS_REG_MASK))
184 *buf_ptr++ |= reg_map_b;
185 else {
186 *buf_ptr++ |= 0x04;
187 *buf_ptr++ = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3));
188 }
189
190 if (immb != 0 || reg_map_b == 5) {
191 if (immb <= 127 && immb >= -128)
192 *buf_ptr++ = U8(immb); /* 8 bit displacement. */
193 else {
194 sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */
195 buf_ptr += sizeof(sljit_sw);
196 }
197 }
198 }
199 else {
200 if (reg_map_b == 5)
201 *buf_ptr |= 0x40;
202
203 *buf_ptr++ |= 0x04;
204 *buf_ptr++ = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3) | (immb << 6));
205
206 if (reg_map_b == 5)
207 *buf_ptr++ = 0;
208 }
209 }
210 else {
211 *buf_ptr++ |= 0x05;
212 sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */
213 buf_ptr += sizeof(sljit_sw);
214 }
215
216 if (a & SLJIT_IMM) {
217 if (flags & EX86_BYTE_ARG)
218 *buf_ptr = U8(imma);
219 else if (flags & EX86_HALF_ARG)
220 sljit_unaligned_store_s16(buf_ptr, (sljit_s16)imma);
221 else if (!(flags & EX86_SHIFT_INS))
222 sljit_unaligned_store_sw(buf_ptr, imma);
223 }
224
225 return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
226 }
227
228 /* --------------------------------------------------------------------- */
229 /* Enter / return */
230 /* --------------------------------------------------------------------- */
231
generate_far_jump_code(struct sljit_jump * jump,sljit_u8 * code_ptr,sljit_sw executable_offset)232 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset)
233 {
234 sljit_uw type = jump->flags >> TYPE_SHIFT;
235
236 if (type == SLJIT_JUMP) {
237 *code_ptr++ = JMP_i32;
238 jump->addr++;
239 }
240 else if (type >= SLJIT_FAST_CALL) {
241 *code_ptr++ = CALL_i32;
242 jump->addr++;
243 }
244 else {
245 *code_ptr++ = GROUP_0F;
246 *code_ptr++ = get_jump_code(type);
247 jump->addr += 2;
248 }
249
250 if (jump->flags & JUMP_LABEL)
251 jump->flags |= PATCH_MW;
252 else
253 sljit_unaligned_store_sw(code_ptr, (sljit_sw)(jump->u.target - (jump->addr + 4) - (sljit_uw)executable_offset));
254 code_ptr += 4;
255
256 return code_ptr;
257 }
258
259 #define ENTER_TMP_TO_R4 0x00001
260 #define ENTER_TMP_TO_S 0x00002
261
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)262 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
263 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
264 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
265 {
266 sljit_s32 word_arg_count, saved_arg_count, float_arg_count;
267 sljit_s32 size, args_size, types, status;
268 sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(options);
269 sljit_u8 *inst;
270 #ifdef _WIN32
271 sljit_s32 r2_offset = -1;
272 #endif
273
274 CHECK_ERROR();
275 CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
276 set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
277
278 /* Emit ENDBR32 at function entry if needed. */
279 FAIL_IF(emit_endbranch(compiler));
280
281 SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start);
282
283 arg_types >>= SLJIT_ARG_SHIFT;
284 word_arg_count = 0;
285 status = 0;
286
287 if (options & SLJIT_ENTER_REG_ARG) {
288 args_size = 3 * SSIZE_OF(sw);
289
290 while (arg_types) {
291 if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
292 word_arg_count++;
293 if (word_arg_count >= 4)
294 status |= ENTER_TMP_TO_R4;
295 }
296
297 arg_types >>= SLJIT_ARG_SHIFT;
298 }
299
300 compiler->args_size = 0;
301 } else {
302 types = arg_types;
303 saved_arg_count = 0;
304 float_arg_count = 0;
305 args_size = SSIZE_OF(sw);
306 while (types) {
307 switch (types & SLJIT_ARG_MASK) {
308 case SLJIT_ARG_TYPE_F64:
309 float_arg_count++;
310 FAIL_IF(emit_sse2_load(compiler, 0, float_arg_count, SLJIT_MEM1(SLJIT_SP), args_size));
311 args_size += SSIZE_OF(f64);
312 break;
313 case SLJIT_ARG_TYPE_F32:
314 float_arg_count++;
315 FAIL_IF(emit_sse2_load(compiler, 1, float_arg_count, SLJIT_MEM1(SLJIT_SP), args_size));
316 args_size += SSIZE_OF(f32);
317 break;
318 default:
319 word_arg_count++;
320
321 if (!(types & SLJIT_ARG_TYPE_SCRATCH_REG))
322 saved_arg_count++;
323
324 if (word_arg_count == 4) {
325 if (types & SLJIT_ARG_TYPE_SCRATCH_REG) {
326 status |= ENTER_TMP_TO_R4;
327 arg_types &= ~(SLJIT_ARG_FULL_MASK << 3 * SLJIT_ARG_SHIFT);
328 } else if (saved_arg_count == 4) {
329 status |= ENTER_TMP_TO_S;
330 arg_types &= ~(SLJIT_ARG_FULL_MASK << 3 * SLJIT_ARG_SHIFT);
331 }
332 }
333
334 args_size += SSIZE_OF(sw);
335 break;
336 }
337 types >>= SLJIT_ARG_SHIFT;
338 }
339
340 args_size -= SSIZE_OF(sw);
341 compiler->args_size = args_size;
342 }
343
344 size = (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3) - kept_saveds_count;
345 if (!(options & SLJIT_ENTER_REG_ARG))
346 size++;
347
348 if (size != 0) {
349 inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(size + 1));
350 FAIL_IF(!inst);
351
352 INC_SIZE((sljit_uw)size);
353
354 if (!(options & SLJIT_ENTER_REG_ARG))
355 PUSH_REG(reg_map[TMP_REG1]);
356
357 if ((saveds > 2 && kept_saveds_count <= 2) || scratches > 9)
358 PUSH_REG(reg_map[SLJIT_S2]);
359 if ((saveds > 1 && kept_saveds_count <= 1) || scratches > 10)
360 PUSH_REG(reg_map[SLJIT_S1]);
361 if ((saveds > 0 && kept_saveds_count == 0) || scratches > 11)
362 PUSH_REG(reg_map[SLJIT_S0]);
363
364 size *= SSIZE_OF(sw);
365 }
366
367 if (status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S))
368 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), args_size + size);
369
370 size += SSIZE_OF(sw);
371
372 local_size = ((SLJIT_LOCALS_OFFSET_BASE + local_size + size + 0xf) & ~0xf) - size;
373 compiler->local_size = local_size;
374
375 word_arg_count = 0;
376 saved_arg_count = 0;
377 args_size = size;
378 while (arg_types) {
379 switch (arg_types & SLJIT_ARG_MASK) {
380 case SLJIT_ARG_TYPE_F64:
381 args_size += SSIZE_OF(f64);
382 break;
383 case SLJIT_ARG_TYPE_F32:
384 args_size += SSIZE_OF(f32);
385 break;
386 default:
387 word_arg_count++;
388 SLJIT_ASSERT(word_arg_count <= 3 || (word_arg_count == 4 && !(status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S))));
389
390 if (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) {
391 #ifdef _WIN32
392 if (word_arg_count == 3 && local_size > 4 * 4096)
393 r2_offset = local_size + args_size;
394 else
395 #endif
396 EMIT_MOV(compiler, word_arg_count, 0, SLJIT_MEM1(SLJIT_SP), args_size);
397
398 } else {
399 EMIT_MOV(compiler, SLJIT_S0 - saved_arg_count, 0, SLJIT_MEM1(SLJIT_SP), args_size);
400 saved_arg_count++;
401 }
402
403 args_size += SSIZE_OF(sw);
404 break;
405 }
406 arg_types >>= SLJIT_ARG_SHIFT;
407 }
408
409 SLJIT_ASSERT(SLJIT_LOCALS_OFFSET > 0);
410
411 #ifdef _WIN32
412 SLJIT_ASSERT(r2_offset == -1 || local_size > 4 * 4096);
413
414 if (local_size > 4096) {
415 if (local_size <= 4 * 4096) {
416 BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096);
417
418 if (local_size > 2 * 4096)
419 BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2);
420 if (local_size > 3 * 4096)
421 BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);
422 }
423 else {
424 if (options & SLJIT_ENTER_REG_ARG) {
425 SLJIT_ASSERT(r2_offset == -1);
426
427 inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(1 + 1));
428 FAIL_IF(!inst);
429 INC_SIZE(1);
430 PUSH_REG(reg_map[SLJIT_R2]);
431
432 local_size -= SSIZE_OF(sw);
433 r2_offset = local_size;
434 }
435
436 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_IMM, local_size >> 12);
437
438 BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096);
439 BINARY_IMM32(SUB, 4096, SLJIT_SP, 0);
440
441 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
442 FAIL_IF(!inst);
443
444 INC_SIZE(2);
445 inst[0] = LOOP_i8;
446 inst[1] = (sljit_u8)-16;
447 local_size &= 0xfff;
448 }
449 }
450
451 if (local_size > 0) {
452 BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -local_size);
453 BINARY_IMM32(SUB, local_size, SLJIT_SP, 0);
454 }
455
456 if (r2_offset != -1)
457 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), r2_offset);
458
459 #else /* !_WIN32 */
460
461 SLJIT_ASSERT(local_size > 0);
462
463 BINARY_IMM32(SUB, local_size, SLJIT_SP, 0);
464
465 #endif /* _WIN32 */
466
467 size = SLJIT_LOCALS_OFFSET_BASE - SSIZE_OF(sw);
468 kept_saveds_count = SLJIT_R3 - kept_saveds_count;
469
470 while (saved_arg_count > 3) {
471 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), size, kept_saveds_count, 0);
472 kept_saveds_count++;
473 size -= SSIZE_OF(sw);
474 saved_arg_count--;
475 }
476
477 if (status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S)) {
478 if (status & ENTER_TMP_TO_R4)
479 size = 2 * SSIZE_OF(sw);
480
481 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), size, TMP_REG1, 0);
482 }
483
484 return SLJIT_SUCCESS;
485 }
486
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)487 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
488 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
489 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
490 {
491 sljit_s32 args_size;
492
493 CHECK_ERROR();
494 CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
495 set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
496
497 arg_types >>= SLJIT_ARG_SHIFT;
498 args_size = 0;
499
500 if (!(options & SLJIT_ENTER_REG_ARG)) {
501 while (arg_types) {
502 switch (arg_types & SLJIT_ARG_MASK) {
503 case SLJIT_ARG_TYPE_F64:
504 args_size += SSIZE_OF(f64);
505 break;
506 case SLJIT_ARG_TYPE_F32:
507 args_size += SSIZE_OF(f32);
508 break;
509 default:
510 args_size += SSIZE_OF(sw);
511 break;
512 }
513 arg_types >>= SLJIT_ARG_SHIFT;
514 }
515 }
516
517 compiler->args_size = args_size;
518
519 /* [esp+0] for saving temporaries and for function calls. */
520
521 saveds = (1 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3) - SLJIT_KEPT_SAVEDS_COUNT(options)) * SSIZE_OF(sw);
522
523 /* Saving ebp. */
524 if (!(options & SLJIT_ENTER_REG_ARG))
525 saveds += SSIZE_OF(sw);
526
527 compiler->local_size = ((SLJIT_LOCALS_OFFSET_BASE + local_size + saveds + 0xf) & ~0xf) - saveds;
528 return SLJIT_SUCCESS;
529 }
530
emit_stack_frame_release(struct sljit_compiler * compiler,sljit_s32 is_return_to)531 static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to)
532 {
533 sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
534 sljit_s32 local_size, saveds;
535 sljit_uw size;
536 sljit_u8 *inst;
537
538 size = (sljit_uw)((compiler->scratches > 9 ? (compiler->scratches - 9) : 0) +
539 (compiler->saveds <= 3 ? compiler->saveds : 3) - kept_saveds_count);
540
541 local_size = compiler->local_size;
542
543 if (!(compiler->options & SLJIT_ENTER_REG_ARG))
544 size++;
545 else if (is_return_to && size == 0) {
546 local_size += SSIZE_OF(sw);
547 is_return_to = 0;
548 }
549
550 if (local_size > 0)
551 BINARY_IMM32(ADD, local_size, SLJIT_SP, 0);
552
553 if (size == 0)
554 return SLJIT_SUCCESS;
555
556 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
557 FAIL_IF(!inst);
558
559 INC_SIZE(size);
560
561 saveds = compiler->saveds;
562
563 if ((saveds > 0 && kept_saveds_count == 0) || compiler->scratches > 11)
564 POP_REG(reg_map[SLJIT_S0]);
565 if ((saveds > 1 && kept_saveds_count <= 1) || compiler->scratches > 10)
566 POP_REG(reg_map[SLJIT_S1]);
567 if ((saveds > 2 && kept_saveds_count <= 2) || compiler->scratches > 9)
568 POP_REG(reg_map[SLJIT_S2]);
569
570 if (!(compiler->options & SLJIT_ENTER_REG_ARG))
571 POP_REG(reg_map[TMP_REG1]);
572
573 if (is_return_to)
574 BINARY_IMM32(ADD, sizeof(sljit_sw), SLJIT_SP, 0);
575
576 return SLJIT_SUCCESS;
577 }
578
sljit_emit_return_void(struct sljit_compiler * compiler)579 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
580 {
581 sljit_u8 *inst;
582
583 CHECK_ERROR();
584 CHECK(check_sljit_emit_return_void(compiler));
585
586 SLJIT_ASSERT(compiler->args_size >= 0);
587 SLJIT_ASSERT(compiler->local_size > 0);
588
589 FAIL_IF(emit_stack_frame_release(compiler, 0));
590
591 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
592 FAIL_IF(!inst);
593 INC_SIZE(1);
594 RET();
595 return SLJIT_SUCCESS;
596 }
597
sljit_emit_return_to(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)598 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
599 sljit_s32 src, sljit_sw srcw)
600 {
601 sljit_s32 src_r;
602
603 CHECK_ERROR();
604 CHECK(check_sljit_emit_return_to(compiler, src, srcw));
605
606 if ((src & SLJIT_MEM) || (src > SLJIT_R2 && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options)))) {
607 ADJUST_LOCAL_OFFSET(src, srcw);
608 CHECK_EXTRA_REGS(src, srcw, (void)0);
609
610 src_r = (compiler->options & SLJIT_ENTER_REG_ARG) ? TMP_REG1 : SLJIT_R1;
611
612 EMIT_MOV(compiler, src_r, 0, src, srcw);
613 src = src_r;
614 srcw = 0;
615 }
616
617 FAIL_IF(emit_stack_frame_release(compiler, 1));
618
619 SLJIT_SKIP_CHECKS(compiler);
620 return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
621 }
622
623 /* --------------------------------------------------------------------- */
624 /* Call / return instructions */
625 /* --------------------------------------------------------------------- */
626
call_get_stack_size(sljit_s32 arg_types,sljit_s32 * word_arg_count_ptr)627 static sljit_s32 call_get_stack_size(sljit_s32 arg_types, sljit_s32 *word_arg_count_ptr)
628 {
629 sljit_sw stack_size = 0;
630 sljit_s32 word_arg_count = 0;
631
632 arg_types >>= SLJIT_ARG_SHIFT;
633
634 while (arg_types) {
635 switch (arg_types & SLJIT_ARG_MASK) {
636 case SLJIT_ARG_TYPE_F64:
637 stack_size += SSIZE_OF(f64);
638 break;
639 case SLJIT_ARG_TYPE_F32:
640 stack_size += SSIZE_OF(f32);
641 break;
642 default:
643 word_arg_count++;
644 stack_size += SSIZE_OF(sw);
645 break;
646 }
647
648 arg_types >>= SLJIT_ARG_SHIFT;
649 }
650
651 if (word_arg_count_ptr)
652 *word_arg_count_ptr = word_arg_count;
653
654 if (stack_size <= 4 * SSIZE_OF(sw))
655 return 0;
656
657 return ((stack_size - (4 * SSIZE_OF(sw)) + 0xf) & ~0xf);
658 }
659
call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types,sljit_sw stack_size,sljit_s32 word_arg_count,sljit_s32 keep_tmp1)660 static sljit_s32 call_with_args(struct sljit_compiler *compiler,
661 sljit_s32 arg_types, sljit_sw stack_size, sljit_s32 word_arg_count, sljit_s32 keep_tmp1)
662 {
663 sljit_s32 float_arg_count = 0, arg4_reg = 0, arg_offset;
664 sljit_u8 *inst;
665
666 if (word_arg_count >= 4) {
667 arg4_reg = SLJIT_R0;
668
669 if (!keep_tmp1) {
670 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), 2 * SSIZE_OF(sw));
671 arg4_reg = TMP_REG1;
672 }
673 }
674
675 if (stack_size > 0)
676 BINARY_IMM32(SUB, stack_size, SLJIT_SP, 0);
677
678 arg_offset = 0;
679 word_arg_count = 0;
680 arg_types >>= SLJIT_ARG_SHIFT;
681
682 while (arg_types) {
683 switch (arg_types & SLJIT_ARG_MASK) {
684 case SLJIT_ARG_TYPE_F64:
685 float_arg_count++;
686 FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), arg_offset, float_arg_count));
687 arg_offset += SSIZE_OF(f64);
688 break;
689 case SLJIT_ARG_TYPE_F32:
690 float_arg_count++;
691 FAIL_IF(emit_sse2_store(compiler, 1, SLJIT_MEM1(SLJIT_SP), arg_offset, float_arg_count));
692 arg_offset += SSIZE_OF(f32);
693 break;
694 default:
695 word_arg_count++;
696 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), arg_offset, (word_arg_count >= 4) ? arg4_reg : word_arg_count, 0);
697
698 if (word_arg_count == 1 && arg4_reg == SLJIT_R0)
699 EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), 2 * SSIZE_OF(sw) + stack_size);
700
701 arg_offset += SSIZE_OF(sw);
702 break;
703 }
704
705 arg_types >>= SLJIT_ARG_SHIFT;
706 }
707
708 return SLJIT_SUCCESS;
709 }
710
post_call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types,sljit_s32 stack_size)711 static sljit_s32 post_call_with_args(struct sljit_compiler *compiler,
712 sljit_s32 arg_types, sljit_s32 stack_size)
713 {
714 sljit_u8 *inst;
715 sljit_s32 single;
716
717 if (stack_size > 0)
718 BINARY_IMM32(ADD, stack_size, SLJIT_SP, 0);
719
720 if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64)
721 return SLJIT_SUCCESS;
722
723 single = ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32);
724
725 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
726 FAIL_IF(!inst);
727 INC_SIZE(3);
728 inst[0] = single ? FSTPS : FSTPD;
729 inst[1] = (0x03 << 3) | 0x04;
730 inst[2] = (0x04 << 3) | reg_map[SLJIT_SP];
731
732 return emit_sse2_load(compiler, single, SLJIT_FR0, SLJIT_MEM1(SLJIT_SP), 0);
733 }
734
tail_call_with_args(struct sljit_compiler * compiler,sljit_s32 * extra_space,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)735 static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler,
736 sljit_s32 *extra_space, sljit_s32 arg_types,
737 sljit_s32 src, sljit_sw srcw)
738 {
739 sljit_sw args_size, saved_regs_size;
740 sljit_sw types, word_arg_count, float_arg_count;
741 sljit_sw stack_size, prev_stack_size, min_size, offset;
742 sljit_sw word_arg4_offset;
743 sljit_u8 r2_offset = 0;
744 sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
745 sljit_u8* inst;
746
747 ADJUST_LOCAL_OFFSET(src, srcw);
748 CHECK_EXTRA_REGS(src, srcw, (void)0);
749
750 saved_regs_size = (1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0)
751 + (compiler->saveds <= 3 ? compiler->saveds : 3) - kept_saveds_count) * SSIZE_OF(sw);
752
753 word_arg_count = 0;
754 float_arg_count = 0;
755 arg_types >>= SLJIT_ARG_SHIFT;
756 types = 0;
757 args_size = 0;
758
759 while (arg_types != 0) {
760 types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK);
761
762 switch (arg_types & SLJIT_ARG_MASK) {
763 case SLJIT_ARG_TYPE_F64:
764 args_size += SSIZE_OF(f64);
765 float_arg_count++;
766 break;
767 case SLJIT_ARG_TYPE_F32:
768 args_size += SSIZE_OF(f32);
769 float_arg_count++;
770 break;
771 default:
772 word_arg_count++;
773 args_size += SSIZE_OF(sw);
774 break;
775 }
776 arg_types >>= SLJIT_ARG_SHIFT;
777 }
778
779 if (args_size <= compiler->args_size) {
780 *extra_space = 0;
781 stack_size = args_size + SSIZE_OF(sw) + saved_regs_size;
782
783 offset = stack_size + compiler->local_size;
784
785 if (!(src & SLJIT_IMM) && src != SLJIT_R0) {
786 if (word_arg_count >= 1) {
787 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R0, 0);
788 r2_offset = sizeof(sljit_sw);
789 }
790 EMIT_MOV(compiler, SLJIT_R0, 0, src, srcw);
791 }
792
793 while (types != 0) {
794 switch (types & SLJIT_ARG_MASK) {
795 case SLJIT_ARG_TYPE_F64:
796 offset -= SSIZE_OF(f64);
797 FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));
798 float_arg_count--;
799 break;
800 case SLJIT_ARG_TYPE_F32:
801 offset -= SSIZE_OF(f32);
802 FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));
803 float_arg_count--;
804 break;
805 default:
806 switch (word_arg_count) {
807 case 1:
808 offset -= SSIZE_OF(sw);
809 if (r2_offset != 0) {
810 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), 0);
811 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
812 } else
813 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R0, 0);
814 break;
815 case 2:
816 offset -= SSIZE_OF(sw);
817 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R1, 0);
818 break;
819 case 3:
820 offset -= SSIZE_OF(sw);
821 break;
822 case 4:
823 offset -= SSIZE_OF(sw);
824 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), 2 * SSIZE_OF(sw));
825 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
826 break;
827 }
828 word_arg_count--;
829 break;
830 }
831 types >>= SLJIT_ARG_SHIFT;
832 }
833
834 return emit_stack_frame_release(compiler, 0);
835 }
836
837 stack_size = args_size + SSIZE_OF(sw);
838
839 if (word_arg_count >= 1 && !(src & SLJIT_IMM) && src != SLJIT_R0) {
840 r2_offset = SSIZE_OF(sw);
841 stack_size += SSIZE_OF(sw);
842 }
843
844 if (word_arg_count >= 3)
845 stack_size += SSIZE_OF(sw);
846
847 prev_stack_size = SSIZE_OF(sw) + saved_regs_size;
848 min_size = prev_stack_size + compiler->local_size;
849
850 word_arg4_offset = 2 * SSIZE_OF(sw);
851
852 if (stack_size > min_size) {
853 BINARY_IMM32(SUB, stack_size - min_size, SLJIT_SP, 0);
854 if (src == SLJIT_MEM1(SLJIT_SP))
855 srcw += stack_size - min_size;
856 word_arg4_offset += stack_size - min_size;
857 }
858 else
859 stack_size = min_size;
860
861 if (word_arg_count >= 3) {
862 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), r2_offset, SLJIT_R2, 0);
863
864 if (word_arg_count >= 4)
865 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), word_arg4_offset);
866 }
867
868 if (!(src & SLJIT_IMM) && src != SLJIT_R0) {
869 if (word_arg_count >= 1) {
870 SLJIT_ASSERT(r2_offset == sizeof(sljit_sw));
871 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R0, 0);
872 }
873 EMIT_MOV(compiler, SLJIT_R0, 0, src, srcw);
874 }
875
876 /* Restore saved registers. */
877 offset = stack_size - 2 * SSIZE_OF(sw);
878 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), offset);
879
880 if (compiler->saveds > 2 || compiler->scratches > 9) {
881 offset -= SSIZE_OF(sw);
882 EMIT_MOV(compiler, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), offset);
883 }
884 if ((compiler->saveds > 1 && kept_saveds_count <= 1) || compiler->scratches > 10) {
885 offset -= SSIZE_OF(sw);
886 EMIT_MOV(compiler, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_SP), offset);
887 }
888 if ((compiler->saveds > 0 && kept_saveds_count == 0) || compiler->scratches > 11) {
889 offset -= SSIZE_OF(sw);
890 EMIT_MOV(compiler, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), offset);
891 }
892
893 /* Copy fourth argument and return address. */
894 offset = stack_size - SSIZE_OF(sw);
895 *extra_space = args_size;
896
897 if (word_arg_count >= 4) {
898 offset -= SSIZE_OF(sw);
899 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
900 }
901
902 while (types != 0) {
903 switch (types & SLJIT_ARG_MASK) {
904 case SLJIT_ARG_TYPE_F64:
905 offset -= SSIZE_OF(f64);
906 FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));
907 float_arg_count--;
908 break;
909 case SLJIT_ARG_TYPE_F32:
910 offset -= SSIZE_OF(f32);
911 FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));
912 float_arg_count--;
913 break;
914 default:
915 switch (word_arg_count) {
916 case 1:
917 offset -= SSIZE_OF(sw);
918 if (r2_offset != 0) {
919 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), 0);
920 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
921 } else
922 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R0, 0);
923 break;
924 case 2:
925 offset -= SSIZE_OF(sw);
926 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R1, 0);
927 break;
928 case 3:
929 offset -= SSIZE_OF(sw);
930 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), r2_offset);
931 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
932 break;
933 }
934 word_arg_count--;
935 break;
936 }
937 types >>= SLJIT_ARG_SHIFT;
938 }
939
940 SLJIT_ASSERT(offset >= 0);
941
942 if (offset == 0)
943 return SLJIT_SUCCESS;
944
945 BINARY_IMM32(ADD, offset, SLJIT_SP, 0);
946 return SLJIT_SUCCESS;
947 }
948
emit_tail_call_end(struct sljit_compiler * compiler,sljit_s32 extra_space)949 static sljit_s32 emit_tail_call_end(struct sljit_compiler *compiler, sljit_s32 extra_space)
950 {
951 /* Called when stack consumption cannot be reduced to 0. */
952 sljit_u8 *inst;
953
954 BINARY_IMM32(ADD, extra_space, SLJIT_SP, 0);
955
956 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
957 FAIL_IF(!inst);
958 INC_SIZE(1);
959 RET();
960
961 return SLJIT_SUCCESS;
962 }
963
tail_call_reg_arg_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types)964 static sljit_s32 tail_call_reg_arg_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)
965 {
966 sljit_s32 word_arg_count = 0;
967 sljit_s32 kept_saveds_count, offset;
968
969 arg_types >>= SLJIT_ARG_SHIFT;
970
971 while (arg_types) {
972 if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64)
973 word_arg_count++;
974
975 arg_types >>= SLJIT_ARG_SHIFT;
976 }
977
978 if (word_arg_count < 4)
979 return SLJIT_SUCCESS;
980
981 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), 2 * SSIZE_OF(sw));
982
983 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
984 offset = compiler->local_size + 3 * SSIZE_OF(sw);
985
986 if ((compiler->saveds > 0 && kept_saveds_count == 0) || compiler->scratches > 11)
987 offset += SSIZE_OF(sw);
988 if ((compiler->saveds > 1 && kept_saveds_count <= 1) || compiler->scratches > 10)
989 offset += SSIZE_OF(sw);
990 if ((compiler->saveds > 2 && kept_saveds_count <= 2) || compiler->scratches > 9)
991 offset += SSIZE_OF(sw);
992
993 return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), offset, TMP_REG1, 0);
994 }
995
sljit_emit_call(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types)996 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
997 sljit_s32 arg_types)
998 {
999 struct sljit_jump *jump;
1000 sljit_sw stack_size = 0;
1001 sljit_s32 word_arg_count;
1002
1003 CHECK_ERROR_PTR();
1004 CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
1005
1006 if (type & SLJIT_CALL_RETURN) {
1007 if ((type & 0xff) == SLJIT_CALL_REG_ARG) {
1008 PTR_FAIL_IF(tail_call_reg_arg_with_args(compiler, arg_types));
1009 PTR_FAIL_IF(emit_stack_frame_release(compiler, 0));
1010
1011 SLJIT_SKIP_CHECKS(compiler);
1012 return sljit_emit_jump(compiler, SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP));
1013 }
1014
1015 stack_size = type;
1016 PTR_FAIL_IF(tail_call_with_args(compiler, &stack_size, arg_types, SLJIT_IMM, 0));
1017
1018 SLJIT_SKIP_CHECKS(compiler);
1019
1020 if (stack_size == 0)
1021 return sljit_emit_jump(compiler, SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP));
1022
1023 jump = sljit_emit_jump(compiler, type);
1024 PTR_FAIL_IF(jump == NULL);
1025
1026 PTR_FAIL_IF(emit_tail_call_end(compiler, stack_size));
1027 return jump;
1028 }
1029
1030 if ((type & 0xff) == SLJIT_CALL_REG_ARG) {
1031 SLJIT_SKIP_CHECKS(compiler);
1032 return sljit_emit_jump(compiler, type);
1033 }
1034
1035 stack_size = call_get_stack_size(arg_types, &word_arg_count);
1036 PTR_FAIL_IF(call_with_args(compiler, arg_types, stack_size, word_arg_count, 0));
1037
1038 SLJIT_SKIP_CHECKS(compiler);
1039 jump = sljit_emit_jump(compiler, type);
1040 PTR_FAIL_IF(jump == NULL);
1041
1042 PTR_FAIL_IF(post_call_with_args(compiler, arg_types, stack_size));
1043 return jump;
1044 }
1045
sljit_emit_icall(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)1046 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
1047 sljit_s32 arg_types,
1048 sljit_s32 src, sljit_sw srcw)
1049 {
1050 sljit_sw stack_size = 0;
1051 sljit_s32 word_arg_count;
1052
1053 CHECK_ERROR();
1054 CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
1055
1056 if (type & SLJIT_CALL_RETURN) {
1057 if ((type & 0xff) == SLJIT_CALL_REG_ARG) {
1058 FAIL_IF(tail_call_reg_arg_with_args(compiler, arg_types));
1059
1060 if ((src & SLJIT_MEM) || (src > SLJIT_R2 && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options)))) {
1061 ADJUST_LOCAL_OFFSET(src, srcw);
1062 CHECK_EXTRA_REGS(src, srcw, (void)0);
1063
1064 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1065 src = TMP_REG1;
1066 srcw = 0;
1067 }
1068
1069 FAIL_IF(emit_stack_frame_release(compiler, 0));
1070
1071 SLJIT_SKIP_CHECKS(compiler);
1072 return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1073 }
1074
1075 stack_size = type;
1076 FAIL_IF(tail_call_with_args(compiler, &stack_size, arg_types, src, srcw));
1077
1078 if (!(src & SLJIT_IMM)) {
1079 src = SLJIT_R0;
1080 srcw = 0;
1081 }
1082
1083 SLJIT_SKIP_CHECKS(compiler);
1084
1085 if (stack_size == 0)
1086 return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1087
1088 FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
1089 return emit_tail_call_end(compiler, stack_size);
1090 }
1091
1092 if ((type & 0xff) == SLJIT_CALL_REG_ARG) {
1093 SLJIT_SKIP_CHECKS(compiler);
1094 return sljit_emit_ijump(compiler, type, src, srcw);
1095 }
1096
1097 ADJUST_LOCAL_OFFSET(src, srcw);
1098 CHECK_EXTRA_REGS(src, srcw, (void)0);
1099
1100 if (src & SLJIT_MEM) {
1101 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1102 src = TMP_REG1;
1103 srcw = 0;
1104 }
1105
1106 stack_size = call_get_stack_size(arg_types, &word_arg_count);
1107 FAIL_IF(call_with_args(compiler, arg_types, stack_size, word_arg_count, src == TMP_REG1));
1108
1109 if (stack_size > 0 && src == SLJIT_MEM1(SLJIT_SP))
1110 srcw += stack_size;
1111
1112 SLJIT_SKIP_CHECKS(compiler);
1113 FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
1114
1115 return post_call_with_args(compiler, arg_types, stack_size);
1116 }
1117
emit_fmov_before_return(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)1118 static SLJIT_INLINE sljit_s32 emit_fmov_before_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
1119 {
1120 sljit_u8* inst;
1121
1122 if (compiler->options & SLJIT_ENTER_REG_ARG) {
1123 if (src == SLJIT_FR0)
1124 return SLJIT_SUCCESS;
1125
1126 SLJIT_SKIP_CHECKS(compiler);
1127 return sljit_emit_fop1(compiler, op, SLJIT_RETURN_FREG, 0, src, srcw);
1128 }
1129
1130 if (FAST_IS_REG(src)) {
1131 FAIL_IF(emit_sse2_store(compiler, op & SLJIT_32, SLJIT_MEM1(SLJIT_SP), 0, src));
1132
1133 src = SLJIT_MEM1(SLJIT_SP);
1134 srcw = 0;
1135 } else {
1136 ADJUST_LOCAL_OFFSET(src, srcw);
1137 }
1138
1139 inst = emit_x86_instruction(compiler, 1 | EX86_SSE2_OP1, 0, 0, src, srcw);
1140 *inst = (op & SLJIT_32) ? FLDS : FLDL;
1141
1142 return SLJIT_SUCCESS;
1143 }
1144
sljit_emit_fast_enter(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)1145 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
1146 {
1147 sljit_u8 *inst;
1148
1149 CHECK_ERROR();
1150 CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
1151 ADJUST_LOCAL_OFFSET(dst, dstw);
1152
1153 CHECK_EXTRA_REGS(dst, dstw, (void)0);
1154
1155 if (FAST_IS_REG(dst)) {
1156 /* Unused dest is possible here. */
1157 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1158 FAIL_IF(!inst);
1159
1160 INC_SIZE(1);
1161 POP_REG(reg_map[dst]);
1162 return SLJIT_SUCCESS;
1163 }
1164
1165 /* Memory. */
1166 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1167 FAIL_IF(!inst);
1168 *inst++ = POP_rm;
1169 return SLJIT_SUCCESS;
1170 }
1171
emit_fast_return(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)1172 static sljit_s32 emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
1173 {
1174 sljit_u8 *inst;
1175
1176 CHECK_EXTRA_REGS(src, srcw, (void)0);
1177
1178 if (FAST_IS_REG(src)) {
1179 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1);
1180 FAIL_IF(!inst);
1181
1182 INC_SIZE(1 + 1);
1183 PUSH_REG(reg_map[src]);
1184 }
1185 else {
1186 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
1187 FAIL_IF(!inst);
1188 *inst++ = GROUP_FF;
1189 *inst |= PUSH_rm;
1190
1191 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1192 FAIL_IF(!inst);
1193 INC_SIZE(1);
1194 }
1195
1196 RET();
1197 return SLJIT_SUCCESS;
1198 }
1199
1200 /* --------------------------------------------------------------------- */
1201 /* Other operations */
1202 /* --------------------------------------------------------------------- */
1203
sljit_emit_mem(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)1204 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
1205 sljit_s32 reg,
1206 sljit_s32 mem, sljit_sw memw)
1207 {
1208 sljit_u8* inst;
1209 sljit_s32 i, next, reg_idx, offset;
1210 sljit_u8 regs[2];
1211
1212 CHECK_ERROR();
1213 CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
1214
1215 if (!(reg & REG_PAIR_MASK))
1216 return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
1217
1218 ADJUST_LOCAL_OFFSET(mem, memw);
1219
1220 regs[0] = U8(REG_PAIR_FIRST(reg));
1221 regs[1] = U8(REG_PAIR_SECOND(reg));
1222
1223 next = SSIZE_OF(sw);
1224
1225 if (!(type & SLJIT_MEM_STORE) && (regs[0] == (mem & REG_MASK) || regs[0] == OFFS_REG(mem))) {
1226 if (regs[1] == (mem & REG_MASK) || regs[1] == OFFS_REG(mem)) {
1227 /* None of them are virtual register so TMP_REG1 will not be used. */
1228 EMIT_MOV(compiler, TMP_REG1, 0, OFFS_REG(mem), 0);
1229
1230 if (regs[1] == OFFS_REG(mem))
1231 next = -SSIZE_OF(sw);
1232
1233 mem = (mem & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG1);
1234 } else {
1235 next = -SSIZE_OF(sw);
1236
1237 if (!(mem & OFFS_REG_MASK))
1238 memw += SSIZE_OF(sw);
1239 }
1240 }
1241
1242 for (i = 0; i < 2; i++) {
1243 reg_idx = next > 0 ? i : (i ^ 0x1);
1244 reg = regs[reg_idx];
1245
1246 offset = -1;
1247
1248 if (reg >= SLJIT_R3 && reg <= SLJIT_S3) {
1249 offset = (2 * SSIZE_OF(sw)) + ((reg) - SLJIT_R3) * SSIZE_OF(sw);
1250 reg = TMP_REG1;
1251
1252 if (type & SLJIT_MEM_STORE)
1253 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), offset);
1254 }
1255
1256 if ((mem & OFFS_REG_MASK) && (reg_idx == 1)) {
1257 inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(1 + 4));
1258 FAIL_IF(!inst);
1259
1260 INC_SIZE(4);
1261
1262 inst[0] = (type & SLJIT_MEM_STORE) ? MOV_rm_r : MOV_r_rm;
1263 inst[1] = 0x44 | U8(reg_map[reg] << 3);
1264 inst[2] = U8(memw << 6) | U8(reg_map[OFFS_REG(mem)] << 3) | reg_map[mem & REG_MASK];
1265 inst[3] = sizeof(sljit_sw);
1266 } else if (type & SLJIT_MEM_STORE) {
1267 EMIT_MOV(compiler, mem, memw, reg, 0);
1268 } else {
1269 EMIT_MOV(compiler, reg, 0, mem, memw);
1270 }
1271
1272 if (!(mem & OFFS_REG_MASK))
1273 memw += next;
1274
1275 if (!(type & SLJIT_MEM_STORE) && offset != -1)
1276 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, TMP_REG1, 0);
1277 }
1278
1279 return SLJIT_SUCCESS;
1280 }
1281
skip_frames_before_return(struct sljit_compiler * compiler)1282 static sljit_s32 skip_frames_before_return(struct sljit_compiler *compiler)
1283 {
1284 sljit_sw size;
1285
1286 /* Don't adjust shadow stack if it isn't enabled. */
1287 if (!cpu_has_shadow_stack())
1288 return SLJIT_SUCCESS;
1289
1290 SLJIT_ASSERT(compiler->args_size >= 0);
1291 SLJIT_ASSERT(compiler->local_size > 0);
1292
1293 size = compiler->local_size;
1294 size += (1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0)
1295 + (compiler->saveds <= 3 ? compiler->saveds : 3)) * SSIZE_OF(sw);
1296
1297 return adjust_shadow_stack(compiler, SLJIT_MEM1(SLJIT_SP), size);
1298 }
1299