1 /*
2 * Stack-less Just-In-Time compiler
3 *
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 /* x86 32-bit arch dependent functions. */
28
29 /* --------------------------------------------------------------------- */
30 /* Operators */
31 /* --------------------------------------------------------------------- */
32
emit_do_imm(struct sljit_compiler * compiler,sljit_u8 opcode,sljit_sw imm)33 static sljit_s32 emit_do_imm(struct sljit_compiler *compiler, sljit_u8 opcode, sljit_sw imm)
34 {
35 sljit_u8 *inst;
36
37 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + sizeof(sljit_sw));
38 FAIL_IF(!inst);
39 INC_SIZE(1 + sizeof(sljit_sw));
40 *inst++ = opcode;
41 sljit_unaligned_store_sw(inst, imm);
42 return SLJIT_SUCCESS;
43 }
44
45 /* Size contains the flags as well. */
emit_x86_instruction(struct sljit_compiler * compiler,sljit_uw size,sljit_s32 a,sljit_sw imma,sljit_s32 b,sljit_sw immb)46 static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw size,
47 /* The register or immediate operand. */
48 sljit_s32 a, sljit_sw imma,
49 /* The general operand (not immediate). */
50 sljit_s32 b, sljit_sw immb)
51 {
52 sljit_u8 *inst;
53 sljit_u8 *buf_ptr;
54 sljit_u8 reg_map_b;
55 sljit_uw flags = size;
56 sljit_uw inst_size;
57
58 /* Both cannot be switched on. */
59 SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
60 /* Size flags not allowed for typed instructions. */
61 SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
62 /* Both size flags cannot be switched on. */
63 SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
64 /* SSE2 and immediate is not possible. */
65 SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
66 SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
67 && (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
68 && (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
69
70 size &= 0xf;
71 inst_size = size;
72
73 if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
74 inst_size++;
75 if (flags & EX86_PREF_66)
76 inst_size++;
77
78 /* Calculate size of b. */
79 inst_size += 1; /* mod r/m byte. */
80 if (b & SLJIT_MEM) {
81 if (!(b & REG_MASK))
82 inst_size += sizeof(sljit_sw);
83 else if (immb != 0 && !(b & OFFS_REG_MASK)) {
84 /* Immediate operand. */
85 if (immb <= 127 && immb >= -128)
86 inst_size += sizeof(sljit_s8);
87 else
88 inst_size += sizeof(sljit_sw);
89 }
90 else if (reg_map[b & REG_MASK] == 5)
91 inst_size += sizeof(sljit_s8);
92
93 if ((b & REG_MASK) == SLJIT_SP && !(b & OFFS_REG_MASK))
94 b |= TO_OFFS_REG(SLJIT_SP);
95
96 if (b & OFFS_REG_MASK)
97 inst_size += 1; /* SIB byte. */
98 }
99
100 /* Calculate size of a. */
101 if (a & SLJIT_IMM) {
102 if (flags & EX86_BIN_INS) {
103 if (imma <= 127 && imma >= -128) {
104 inst_size += 1;
105 flags |= EX86_BYTE_ARG;
106 } else
107 inst_size += 4;
108 }
109 else if (flags & EX86_SHIFT_INS) {
110 imma &= 0x1f;
111 if (imma != 1) {
112 inst_size ++;
113 flags |= EX86_BYTE_ARG;
114 }
115 } else if (flags & EX86_BYTE_ARG)
116 inst_size++;
117 else if (flags & EX86_HALF_ARG)
118 inst_size += sizeof(short);
119 else
120 inst_size += sizeof(sljit_sw);
121 }
122 else
123 SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
124
125 inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size);
126 PTR_FAIL_IF(!inst);
127
128 /* Encoding the byte. */
129 INC_SIZE(inst_size);
130 if (flags & EX86_PREF_F2)
131 *inst++ = 0xf2;
132 if (flags & EX86_PREF_F3)
133 *inst++ = 0xf3;
134 if (flags & EX86_PREF_66)
135 *inst++ = 0x66;
136
137 buf_ptr = inst + size;
138
139 /* Encode mod/rm byte. */
140 if (!(flags & EX86_SHIFT_INS)) {
141 if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
142 *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
143
144 if (a & SLJIT_IMM)
145 *buf_ptr = 0;
146 else if (!(flags & EX86_SSE2_OP1))
147 *buf_ptr = U8(reg_map[a] << 3);
148 else
149 *buf_ptr = U8(a << 3);
150 }
151 else {
152 if (a & SLJIT_IMM) {
153 if (imma == 1)
154 *inst = GROUP_SHIFT_1;
155 else
156 *inst = GROUP_SHIFT_N;
157 } else
158 *inst = GROUP_SHIFT_CL;
159 *buf_ptr = 0;
160 }
161
162 if (!(b & SLJIT_MEM)) {
163 *buf_ptr = U8(*buf_ptr | MOD_REG | (!(flags & EX86_SSE2_OP2) ? reg_map[b] : b));
164 buf_ptr++;
165 } else if (b & REG_MASK) {
166 reg_map_b = reg_map[b & REG_MASK];
167
168 if (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP) || reg_map_b == 5) {
169 if (immb != 0 || reg_map_b == 5) {
170 if (immb <= 127 && immb >= -128)
171 *buf_ptr |= 0x40;
172 else
173 *buf_ptr |= 0x80;
174 }
175
176 if (!(b & OFFS_REG_MASK))
177 *buf_ptr++ |= reg_map_b;
178 else {
179 *buf_ptr++ |= 0x04;
180 *buf_ptr++ = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3));
181 }
182
183 if (immb != 0 || reg_map_b == 5) {
184 if (immb <= 127 && immb >= -128)
185 *buf_ptr++ = U8(immb); /* 8 bit displacement. */
186 else {
187 sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */
188 buf_ptr += sizeof(sljit_sw);
189 }
190 }
191 }
192 else {
193 *buf_ptr++ |= 0x04;
194 *buf_ptr++ = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3) | (immb << 6));
195 }
196 }
197 else {
198 *buf_ptr++ |= 0x05;
199 sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */
200 buf_ptr += sizeof(sljit_sw);
201 }
202
203 if (a & SLJIT_IMM) {
204 if (flags & EX86_BYTE_ARG)
205 *buf_ptr = U8(imma);
206 else if (flags & EX86_HALF_ARG)
207 sljit_unaligned_store_s16(buf_ptr, (sljit_s16)imma);
208 else if (!(flags & EX86_SHIFT_INS))
209 sljit_unaligned_store_sw(buf_ptr, imma);
210 }
211
212 return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
213 }
214
215 /* --------------------------------------------------------------------- */
216 /* Enter / return */
217 /* --------------------------------------------------------------------- */
218
generate_far_jump_code(struct sljit_jump * jump,sljit_u8 * code_ptr,sljit_sw executable_offset)219 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset)
220 {
221 sljit_uw type = jump->flags >> TYPE_SHIFT;
222
223 if (type == SLJIT_JUMP) {
224 *code_ptr++ = JMP_i32;
225 jump->addr++;
226 }
227 else if (type >= SLJIT_FAST_CALL) {
228 *code_ptr++ = CALL_i32;
229 jump->addr++;
230 }
231 else {
232 *code_ptr++ = GROUP_0F;
233 *code_ptr++ = get_jump_code(type);
234 jump->addr += 2;
235 }
236
237 if (jump->flags & JUMP_LABEL)
238 jump->flags |= PATCH_MW;
239 else
240 sljit_unaligned_store_sw(code_ptr, (sljit_sw)(jump->u.target - (jump->addr + 4) - (sljit_uw)executable_offset));
241 code_ptr += 4;
242
243 return code_ptr;
244 }
245
246 #define ENTER_R2_USED 0x00001
247 #define ENTER_R2_TO_S 0x00002
248 #define ENTER_R2_TO_R0 0x00004
249 #define ENTER_R1_TO_S 0x00008
250 #define ENTER_TMP_TO_R4 0x00010
251 #define ENTER_TMP_TO_S 0x00020
252
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)253 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
254 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
255 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
256 {
257 sljit_s32 word_arg_count, saved_arg_count, float_arg_count;
258 sljit_s32 size, locals_offset, args_size, types, status;
259 sljit_u8 *inst;
260 #ifdef _WIN32
261 sljit_s32 r2_offset = -1;
262 #endif
263
264 CHECK_ERROR();
265 CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
266 set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
267
268 /* Emit ENDBR32 at function entry if needed. */
269 FAIL_IF(emit_endbranch(compiler));
270
271 SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start);
272
273 arg_types >>= SLJIT_ARG_SHIFT;
274 types = arg_types;
275 word_arg_count = 0;
276 saved_arg_count = 0;
277 float_arg_count = 0;
278 args_size = SSIZE_OF(sw);
279 status = 0;
280 while (types) {
281 switch (types & SLJIT_ARG_MASK) {
282 case SLJIT_ARG_TYPE_F64:
283 float_arg_count++;
284 FAIL_IF(emit_sse2_load(compiler, 0, float_arg_count, SLJIT_MEM1(SLJIT_SP), args_size));
285 args_size += SSIZE_OF(f64);
286 break;
287 case SLJIT_ARG_TYPE_F32:
288 float_arg_count++;
289 FAIL_IF(emit_sse2_load(compiler, 1, float_arg_count, SLJIT_MEM1(SLJIT_SP), args_size));
290 args_size += SSIZE_OF(f32);
291 break;
292 default:
293 word_arg_count++;
294
295 if (!(types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
296 saved_arg_count++;
297 if (saved_arg_count == 4)
298 status |= ENTER_TMP_TO_S;
299 } else {
300 if (word_arg_count == 4)
301 status |= ENTER_TMP_TO_R4;
302 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
303 if (word_arg_count == 3)
304 status |= ENTER_R2_USED;
305 #endif
306 }
307
308 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
309 if (word_arg_count <= 2 && !(options & SLJIT_ENTER_CDECL))
310 break;
311 #endif
312
313 args_size += SSIZE_OF(sw);
314 break;
315 }
316 types >>= SLJIT_ARG_SHIFT;
317 }
318
319 args_size -= SSIZE_OF(sw);
320 compiler->args_size = args_size;
321
322 /* [esp+0] for saving temporaries and function calls. */
323 locals_offset = 2 * SSIZE_OF(sw);
324
325 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
326 if ((options & SLJIT_ENTER_CDECL) && scratches >= 3)
327 locals_offset = 4 * SSIZE_OF(sw);
328 #else
329 if (scratches >= 3)
330 locals_offset = 4 * SSIZE_OF(sw);
331 #endif
332
333 compiler->scratches_offset = locals_offset;
334
335 if (scratches > 3)
336 locals_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * SSIZE_OF(sw);
337
338 if (saveds > 3)
339 locals_offset += (saveds - 3) * SSIZE_OF(sw);
340
341 compiler->locals_offset = locals_offset;
342
343 size = 1 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3);
344 inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(size + 1));
345 FAIL_IF(!inst);
346
347 INC_SIZE((sljit_uw)size);
348 PUSH_REG(reg_map[TMP_REG1]);
349 if (saveds > 2 || scratches > 9)
350 PUSH_REG(reg_map[SLJIT_S2]);
351 if (saveds > 1 || scratches > 10)
352 PUSH_REG(reg_map[SLJIT_S1]);
353 if (saveds > 0 || scratches > 11)
354 PUSH_REG(reg_map[SLJIT_S0]);
355
356 size *= SSIZE_OF(sw);
357
358 if (status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S))
359 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), args_size + size);
360
361 size += SSIZE_OF(sw);
362
363 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
364 if (!(options & SLJIT_ENTER_CDECL))
365 size += args_size;
366 #endif
367
368 local_size = ((locals_offset + local_size + size + 0xf) & ~0xf) - size;
369 compiler->local_size = local_size;
370
371 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
372 if (!(options & SLJIT_ENTER_CDECL))
373 size -= args_size;
374 #endif
375
376 word_arg_count = 0;
377 saved_arg_count = 0;
378 args_size = size;
379 while (arg_types) {
380 switch (arg_types & SLJIT_ARG_MASK) {
381 case SLJIT_ARG_TYPE_F64:
382 args_size += SSIZE_OF(f64);
383 break;
384 case SLJIT_ARG_TYPE_F32:
385 args_size += SSIZE_OF(f32);
386 break;
387 default:
388 word_arg_count++;
389
390 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
391 if (!(options & SLJIT_ENTER_CDECL) && word_arg_count <= 2) {
392 if (word_arg_count == 1) {
393 if (status & ENTER_R2_USED) {
394 EMIT_MOV(compiler, (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) ? SLJIT_R0 : SLJIT_S0, 0, SLJIT_R2, 0);
395 } else if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
396 status |= ENTER_R2_TO_S;
397 saved_arg_count++;
398 } else
399 status |= ENTER_R2_TO_R0;
400 } else if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
401 status |= ENTER_R1_TO_S;
402 saved_arg_count++;
403 }
404 break;
405 }
406 #endif
407 if (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) {
408 SLJIT_ASSERT(word_arg_count <= 3 || (status & ENTER_TMP_TO_R4));
409
410 if (word_arg_count <= 3) {
411 #ifdef _WIN32
412 if (word_arg_count == 3 && local_size > 4 * 4096)
413 r2_offset = local_size + args_size;
414 else
415 #endif
416 EMIT_MOV(compiler, word_arg_count, 0, SLJIT_MEM1(SLJIT_SP), args_size);
417 }
418 } else {
419 SLJIT_ASSERT(saved_arg_count <= 3 || (status & ENTER_TMP_TO_S));
420
421 if (saved_arg_count <= 3)
422 EMIT_MOV(compiler, SLJIT_S0 - saved_arg_count, 0, SLJIT_MEM1(SLJIT_SP), args_size);
423 saved_arg_count++;
424 }
425 args_size += SSIZE_OF(sw);
426 break;
427 }
428 arg_types >>= SLJIT_ARG_SHIFT;
429 }
430
431 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
432 if (!(options & SLJIT_ENTER_CDECL)) {
433 if (status & ENTER_R2_TO_R0)
434 EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_R2, 0);
435
436 saved_arg_count = 0;
437 if (status & ENTER_R2_TO_S) {
438 EMIT_MOV(compiler, SLJIT_S0, 0, SLJIT_R2, 0);
439 saved_arg_count++;
440 }
441
442 if (status & ENTER_R1_TO_S)
443 EMIT_MOV(compiler, SLJIT_S0 - saved_arg_count, 0, SLJIT_R1, 0);
444 }
445 #endif
446
447 SLJIT_ASSERT(SLJIT_LOCALS_OFFSET > 0);
448
449 #ifdef _WIN32
450 SLJIT_ASSERT(r2_offset == -1 || local_size > 4 * 4096);
451
452 if (local_size > 4096) {
453 if (local_size <= 4 * 4096) {
454 BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096);
455
456 if (local_size > 2 * 4096)
457 BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2);
458 if (local_size > 3 * 4096)
459 BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);
460 }
461 else {
462 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_IMM, local_size >> 12);
463
464 BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096);
465 BINARY_IMM32(SUB, 4096, SLJIT_SP, 0);
466
467 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
468 FAIL_IF(!inst);
469
470 INC_SIZE(2);
471 inst[0] = LOOP_i8;
472 inst[1] = (sljit_u8)-16;
473 local_size &= 0xfff;
474 }
475 }
476
477 if (local_size > 0) {
478 BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -local_size);
479 BINARY_IMM32(SUB, local_size, SLJIT_SP, 0);
480 }
481
482 if (r2_offset != -1)
483 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), r2_offset);
484
485 #else /* !_WIN32 */
486
487 SLJIT_ASSERT(local_size > 0);
488
489 BINARY_IMM32(SUB, local_size, SLJIT_SP, 0);
490
491 #endif /* _WIN32 */
492
493 if (status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S)) {
494 size = (status & ENTER_TMP_TO_R4) ? compiler->scratches_offset : compiler->locals_offset - SSIZE_OF(sw);
495 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), size, TMP_REG1, 0);
496 }
497
498 return SLJIT_SUCCESS;
499 }
500
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)501 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
502 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
503 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
504 {
505 sljit_s32 args_size, locals_offset;
506 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
507 sljit_s32 word_arg_count = 0;
508 #endif
509
510 CHECK_ERROR();
511 CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
512 set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
513
514 arg_types >>= SLJIT_ARG_SHIFT;
515 args_size = 0;
516 while (arg_types) {
517 switch (arg_types & SLJIT_ARG_MASK) {
518 case SLJIT_ARG_TYPE_F64:
519 args_size += SSIZE_OF(f64);
520 break;
521 case SLJIT_ARG_TYPE_F32:
522 args_size += SSIZE_OF(f32);
523 break;
524 default:
525 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
526 if (word_arg_count >= 2)
527 args_size += SSIZE_OF(sw);
528 word_arg_count++;
529 #else
530 args_size += SSIZE_OF(sw);
531 #endif
532 break;
533 }
534 arg_types >>= SLJIT_ARG_SHIFT;
535 }
536
537 compiler->args_size = args_size;
538
539 /* [esp+0] for saving temporaries and function calls. */
540 locals_offset = 2 * SSIZE_OF(sw);
541
542 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
543 if ((options & SLJIT_ENTER_CDECL) && scratches >= 3)
544 locals_offset = 4 * SSIZE_OF(sw);
545 #else
546 if (scratches >= 3)
547 locals_offset = 4 * SSIZE_OF(sw);
548 #endif
549
550 compiler->scratches_offset = locals_offset;
551
552 if (scratches > 3)
553 locals_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * SSIZE_OF(sw);
554
555 if (saveds > 3)
556 locals_offset += (saveds - 3) * SSIZE_OF(sw);
557
558 compiler->locals_offset = locals_offset;
559
560 saveds = (2 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3)) * SSIZE_OF(sw);
561
562 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
563 if (!(options & SLJIT_ENTER_CDECL))
564 saveds += args_size;
565 #endif
566
567 compiler->local_size = ((locals_offset + local_size + saveds + 0xf) & ~0xf) - saveds;
568 return SLJIT_SUCCESS;
569 }
570
emit_stack_frame_release(struct sljit_compiler * compiler)571 static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler)
572 {
573 sljit_uw size;
574 sljit_u8 *inst;
575
576 size = (sljit_uw)(1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0) +
577 (compiler->saveds <= 3 ? compiler->saveds : 3));
578 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
579 FAIL_IF(!inst);
580
581 INC_SIZE(size);
582
583 if (compiler->saveds > 0 || compiler->scratches > 11)
584 POP_REG(reg_map[SLJIT_S0]);
585 if (compiler->saveds > 1 || compiler->scratches > 10)
586 POP_REG(reg_map[SLJIT_S1]);
587 if (compiler->saveds > 2 || compiler->scratches > 9)
588 POP_REG(reg_map[SLJIT_S2]);
589 POP_REG(reg_map[TMP_REG1]);
590
591 return SLJIT_SUCCESS;
592 }
593
sljit_emit_return_void(struct sljit_compiler * compiler)594 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
595 {
596 sljit_uw size;
597 sljit_u8 *inst;
598
599 CHECK_ERROR();
600 CHECK(check_sljit_emit_return_void(compiler));
601
602 SLJIT_ASSERT(compiler->args_size >= 0);
603 SLJIT_ASSERT(compiler->local_size > 0);
604
605 BINARY_IMM32(ADD, compiler->local_size, SLJIT_SP, 0);
606
607 FAIL_IF(emit_stack_frame_release(compiler));
608
609 size = 1;
610 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
611 if (compiler->args_size > 0 && !(compiler->options & SLJIT_ENTER_CDECL))
612 size = 3;
613 #endif
614 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
615 FAIL_IF(!inst);
616
617 INC_SIZE(size);
618
619 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
620 if (compiler->args_size > 0 && !(compiler->options & SLJIT_ENTER_CDECL)) {
621 RET_I16(U8(compiler->args_size));
622 return SLJIT_SUCCESS;
623 }
624 #endif
625
626 RET();
627 return SLJIT_SUCCESS;
628 }
629
630 /* --------------------------------------------------------------------- */
631 /* Call / return instructions */
632 /* --------------------------------------------------------------------- */
633
634 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
635
c_fast_call_get_stack_size(sljit_s32 arg_types,sljit_s32 * word_arg_count_ptr)636 static sljit_sw c_fast_call_get_stack_size(sljit_s32 arg_types, sljit_s32 *word_arg_count_ptr)
637 {
638 sljit_sw stack_size = 0;
639 sljit_s32 word_arg_count = 0;
640
641 arg_types >>= SLJIT_ARG_SHIFT;
642
643 while (arg_types) {
644 switch (arg_types & SLJIT_ARG_MASK) {
645 case SLJIT_ARG_TYPE_F64:
646 stack_size += SSIZE_OF(f64);
647 break;
648 case SLJIT_ARG_TYPE_F32:
649 stack_size += SSIZE_OF(f32);
650 break;
651 default:
652 word_arg_count++;
653 if (word_arg_count > 2)
654 stack_size += SSIZE_OF(sw);
655 break;
656 }
657
658 arg_types >>= SLJIT_ARG_SHIFT;
659 }
660
661 if (word_arg_count_ptr)
662 *word_arg_count_ptr = word_arg_count;
663
664 return stack_size;
665 }
666
c_fast_call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types,sljit_sw stack_size,sljit_s32 word_arg_count,sljit_s32 swap_args)667 static sljit_s32 c_fast_call_with_args(struct sljit_compiler *compiler,
668 sljit_s32 arg_types, sljit_sw stack_size, sljit_s32 word_arg_count, sljit_s32 swap_args)
669 {
670 sljit_u8 *inst;
671 sljit_s32 float_arg_count;
672
673 if (stack_size == SSIZE_OF(sw) && word_arg_count == 3) {
674 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
675 FAIL_IF(!inst);
676 INC_SIZE(1);
677 PUSH_REG(reg_map[SLJIT_R2]);
678 }
679 else if (stack_size > 0) {
680 if (word_arg_count >= 4)
681 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->scratches_offset);
682
683 BINARY_IMM32(SUB, stack_size, SLJIT_SP, 0);
684
685 stack_size = 0;
686 arg_types >>= SLJIT_ARG_SHIFT;
687 word_arg_count = 0;
688 float_arg_count = 0;
689 while (arg_types) {
690 switch (arg_types & SLJIT_ARG_MASK) {
691 case SLJIT_ARG_TYPE_F64:
692 float_arg_count++;
693 FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count));
694 stack_size += SSIZE_OF(f64);
695 break;
696 case SLJIT_ARG_TYPE_F32:
697 float_arg_count++;
698 FAIL_IF(emit_sse2_store(compiler, 1, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count));
699 stack_size += SSIZE_OF(f32);
700 break;
701 default:
702 word_arg_count++;
703 if (word_arg_count == 3) {
704 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size, SLJIT_R2, 0);
705 stack_size += SSIZE_OF(sw);
706 }
707 else if (word_arg_count == 4) {
708 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size, TMP_REG1, 0);
709 stack_size += SSIZE_OF(sw);
710 }
711 break;
712 }
713
714 arg_types >>= SLJIT_ARG_SHIFT;
715 }
716 }
717
718 if (word_arg_count > 0) {
719 if (swap_args) {
720 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
721 FAIL_IF(!inst);
722 INC_SIZE(1);
723
724 *inst++ = U8(XCHG_EAX_r | reg_map[SLJIT_R2]);
725 }
726 else {
727 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
728 FAIL_IF(!inst);
729 INC_SIZE(2);
730
731 *inst++ = MOV_r_rm;
732 *inst++ = U8(MOD_REG | (reg_map[SLJIT_R2] << 3) | reg_map[SLJIT_R0]);
733 }
734 }
735
736 return SLJIT_SUCCESS;
737 }
738
739 #endif
740
cdecl_call_get_stack_size(struct sljit_compiler * compiler,sljit_s32 arg_types,sljit_s32 * word_arg_count_ptr)741 static sljit_s32 cdecl_call_get_stack_size(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *word_arg_count_ptr)
742 {
743 sljit_sw stack_size = 0;
744 sljit_s32 word_arg_count = 0;
745
746 arg_types >>= SLJIT_ARG_SHIFT;
747
748 while (arg_types) {
749 switch (arg_types & SLJIT_ARG_MASK) {
750 case SLJIT_ARG_TYPE_F64:
751 stack_size += SSIZE_OF(f64);
752 break;
753 case SLJIT_ARG_TYPE_F32:
754 stack_size += SSIZE_OF(f32);
755 break;
756 default:
757 word_arg_count++;
758 stack_size += SSIZE_OF(sw);
759 break;
760 }
761
762 arg_types >>= SLJIT_ARG_SHIFT;
763 }
764
765 if (word_arg_count_ptr)
766 *word_arg_count_ptr = word_arg_count;
767
768 if (stack_size <= compiler->scratches_offset)
769 return 0;
770
771 return ((stack_size - compiler->scratches_offset + 0xf) & ~0xf);
772 }
773
cdecl_call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types,sljit_sw stack_size,sljit_s32 word_arg_count)774 static sljit_s32 cdecl_call_with_args(struct sljit_compiler *compiler,
775 sljit_s32 arg_types, sljit_sw stack_size, sljit_s32 word_arg_count)
776 {
777 sljit_s32 float_arg_count = 0;
778 sljit_u8 *inst;
779
780 if (word_arg_count >= 4)
781 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->scratches_offset);
782
783 if (stack_size > 0)
784 BINARY_IMM32(SUB, stack_size, SLJIT_SP, 0);
785
786 stack_size = 0;
787 word_arg_count = 0;
788 arg_types >>= SLJIT_ARG_SHIFT;
789
790 while (arg_types) {
791 switch (arg_types & SLJIT_ARG_MASK) {
792 case SLJIT_ARG_TYPE_F64:
793 float_arg_count++;
794 FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count));
795 stack_size += SSIZE_OF(f64);
796 break;
797 case SLJIT_ARG_TYPE_F32:
798 float_arg_count++;
799 FAIL_IF(emit_sse2_store(compiler, 1, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count));
800 stack_size += SSIZE_OF(f32);
801 break;
802 default:
803 word_arg_count++;
804 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size, (word_arg_count >= 4) ? TMP_REG1 : word_arg_count, 0);
805 stack_size += SSIZE_OF(sw);
806 break;
807 }
808
809 arg_types >>= SLJIT_ARG_SHIFT;
810 }
811
812 return SLJIT_SUCCESS;
813 }
814
post_call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types,sljit_s32 stack_size)815 static sljit_s32 post_call_with_args(struct sljit_compiler *compiler,
816 sljit_s32 arg_types, sljit_s32 stack_size)
817 {
818 sljit_u8 *inst;
819 sljit_s32 single;
820
821 if (stack_size > 0)
822 BINARY_IMM32(ADD, stack_size, SLJIT_SP, 0);
823
824 if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64)
825 return SLJIT_SUCCESS;
826
827 single = ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32);
828
829 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
830 FAIL_IF(!inst);
831 INC_SIZE(3);
832 inst[0] = single ? FSTPS : FSTPD;
833 inst[1] = (0x03 << 3) | 0x04;
834 inst[2] = (0x04 << 3) | reg_map[SLJIT_SP];
835
836 return emit_sse2_load(compiler, single, SLJIT_FR0, SLJIT_MEM1(SLJIT_SP), 0);
837 }
838
tail_call_with_args(struct sljit_compiler * compiler,sljit_s32 * extra_space,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)839 static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler,
840 sljit_s32 *extra_space, sljit_s32 arg_types,
841 sljit_s32 src, sljit_sw srcw)
842 {
843 sljit_sw args_size, prev_args_size, saved_regs_size;
844 sljit_sw types, word_arg_count, float_arg_count;
845 sljit_sw stack_size, prev_stack_size, min_size, offset;
846 sljit_sw word_arg4_offset;
847 sljit_u8 r2_offset = 0;
848 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
849 sljit_u8 fast_call = (*extra_space & 0xff) == SLJIT_CALL;
850 #endif
851 sljit_u8* inst;
852
853 ADJUST_LOCAL_OFFSET(src, srcw);
854 CHECK_EXTRA_REGS(src, srcw, (void)0);
855
856 saved_regs_size = (1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0)
857 + (compiler->saveds <= 3 ? compiler->saveds : 3)) * SSIZE_OF(sw);
858
859 word_arg_count = 0;
860 float_arg_count = 0;
861 arg_types >>= SLJIT_ARG_SHIFT;
862 types = 0;
863 args_size = 0;
864
865 while (arg_types != 0) {
866 types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK);
867
868 switch (arg_types & SLJIT_ARG_MASK) {
869 case SLJIT_ARG_TYPE_F64:
870 args_size += SSIZE_OF(f64);
871 float_arg_count++;
872 break;
873 case SLJIT_ARG_TYPE_F32:
874 args_size += SSIZE_OF(f32);
875 float_arg_count++;
876 break;
877 default:
878 word_arg_count++;
879 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
880 if (!fast_call || word_arg_count > 2)
881 args_size += SSIZE_OF(sw);
882 #else
883 args_size += SSIZE_OF(sw);
884 #endif
885 break;
886 }
887 arg_types >>= SLJIT_ARG_SHIFT;
888 }
889
890 if (args_size <= compiler->args_size
891 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
892 && (!(compiler->options & SLJIT_ENTER_CDECL) || args_size == 0 || !fast_call)
893 #endif /* SLJIT_X86_32_FASTCALL */
894 && 1) {
895 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
896 *extra_space = fast_call ? 0 : args_size;
897 prev_args_size = compiler->args_size;
898 stack_size = prev_args_size + SSIZE_OF(sw) + saved_regs_size;
899 #else /* !SLJIT_X86_32_FASTCALL */
900 *extra_space = 0;
901 stack_size = args_size + SSIZE_OF(sw) + saved_regs_size;
902 #endif /* SLJIT_X86_32_FASTCALL */
903
904 offset = stack_size + compiler->local_size;
905
906 if (!(src & SLJIT_IMM) && src != SLJIT_R0) {
907 if (word_arg_count >= 1) {
908 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R0, 0);
909 r2_offset = sizeof(sljit_sw);
910 }
911 EMIT_MOV(compiler, SLJIT_R0, 0, src, srcw);
912 }
913
914 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
915 if (!(compiler->options & SLJIT_ENTER_CDECL)) {
916 if (!fast_call)
917 offset -= SSIZE_OF(sw);
918
919 if (word_arg_count >= 3) {
920 word_arg4_offset = SSIZE_OF(sw);
921
922 if (word_arg_count + float_arg_count >= 4) {
923 word_arg4_offset = SSIZE_OF(sw) + SSIZE_OF(sw);
924 if ((types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F64)
925 word_arg4_offset = SSIZE_OF(sw) + SSIZE_OF(f64);
926 }
927
928 /* In cdecl mode, at least one more word value must
929 * be present on the stack before the return address. */
930 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset - word_arg4_offset, SLJIT_R2, 0);
931 }
932
933 if (fast_call) {
934 if (args_size < prev_args_size) {
935 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), offset - prev_args_size - SSIZE_OF(sw));
936 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset - args_size - SSIZE_OF(sw), SLJIT_R2, 0);
937 }
938 } else if (prev_args_size > 0) {
939 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), offset - prev_args_size);
940 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
941 }
942 }
943 #endif /* SLJIT_X86_32_FASTCALL */
944
945 while (types != 0) {
946 switch (types & SLJIT_ARG_MASK) {
947 case SLJIT_ARG_TYPE_F64:
948 offset -= SSIZE_OF(f64);
949 FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));
950 float_arg_count--;
951 break;
952 case SLJIT_ARG_TYPE_F32:
953 offset -= SSIZE_OF(f32);
954 FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));
955 float_arg_count--;
956 break;
957 default:
958 switch (word_arg_count) {
959 case 1:
960 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
961 if (fast_call) {
962 EMIT_MOV(compiler, SLJIT_R2, 0, r2_offset != 0 ? SLJIT_MEM1(SLJIT_SP) : SLJIT_R0, 0);
963 break;
964 }
965 #endif
966 offset -= SSIZE_OF(sw);
967 if (r2_offset != 0) {
968 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), 0);
969 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
970 } else
971 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R0, 0);
972 break;
973 case 2:
974 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
975 if (fast_call)
976 break;
977 #endif
978 offset -= SSIZE_OF(sw);
979 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R1, 0);
980 break;
981 case 3:
982 offset -= SSIZE_OF(sw);
983 break;
984 case 4:
985 offset -= SSIZE_OF(sw);
986 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), compiler->scratches_offset);
987 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
988 break;
989 }
990 word_arg_count--;
991 break;
992 }
993 types >>= SLJIT_ARG_SHIFT;
994 }
995
996 BINARY_IMM32(ADD, compiler->local_size, SLJIT_SP, 0);
997 FAIL_IF(emit_stack_frame_release(compiler));
998
999 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1000 if (args_size < prev_args_size)
1001 BINARY_IMM32(ADD, prev_args_size - args_size, SLJIT_SP, 0);
1002 #endif
1003
1004 return SLJIT_SUCCESS;
1005 }
1006
1007 stack_size = args_size + SSIZE_OF(sw);
1008
1009 if (word_arg_count >= 1 && !(src & SLJIT_IMM) && src != SLJIT_R0) {
1010 r2_offset = SSIZE_OF(sw);
1011 stack_size += SSIZE_OF(sw);
1012 }
1013
1014 if (word_arg_count >= 3)
1015 stack_size += SSIZE_OF(sw);
1016
1017 prev_args_size = 0;
1018 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1019 if (!(compiler->options & SLJIT_ENTER_CDECL))
1020 prev_args_size = compiler->args_size;
1021 #endif
1022
1023 prev_stack_size = prev_args_size + SSIZE_OF(sw) + saved_regs_size;
1024 min_size = prev_stack_size + compiler->local_size;
1025
1026 word_arg4_offset = compiler->scratches_offset;
1027
1028 if (stack_size > min_size) {
1029 BINARY_IMM32(SUB, stack_size - min_size, SLJIT_SP, 0);
1030 if (src == SLJIT_MEM1(SLJIT_SP))
1031 srcw += stack_size - min_size;
1032 word_arg4_offset += stack_size - min_size;
1033 }
1034 else
1035 stack_size = min_size;
1036
1037 if (word_arg_count >= 3) {
1038 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), r2_offset, SLJIT_R2, 0);
1039
1040 if (word_arg_count >= 4)
1041 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), word_arg4_offset);
1042 }
1043
1044 if (!(src & SLJIT_IMM) && src != SLJIT_R0) {
1045 if (word_arg_count >= 1) {
1046 SLJIT_ASSERT(r2_offset == sizeof(sljit_sw));
1047 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R0, 0);
1048 }
1049 EMIT_MOV(compiler, SLJIT_R0, 0, src, srcw);
1050 }
1051
1052 /* Restore saved registers. */
1053 offset = stack_size - prev_args_size - 2 * SSIZE_OF(sw);
1054 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), offset);
1055
1056 if (compiler->saveds > 2 || compiler->scratches > 9) {
1057 offset -= SSIZE_OF(sw);
1058 EMIT_MOV(compiler, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), offset);
1059 }
1060 if (compiler->saveds > 1 || compiler->scratches > 10) {
1061 offset -= SSIZE_OF(sw);
1062 EMIT_MOV(compiler, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_SP), offset);
1063 }
1064 if (compiler->saveds > 0 || compiler->scratches > 11) {
1065 offset -= SSIZE_OF(sw);
1066 EMIT_MOV(compiler, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), offset);
1067 }
1068
1069 /* Copy fourth argument and return address. */
1070 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1071 if (fast_call) {
1072 offset = stack_size;
1073 *extra_space = 0;
1074
1075 if (word_arg_count >= 4 && prev_args_size == 0) {
1076 offset -= SSIZE_OF(sw);
1077 inst = emit_x86_instruction(compiler, 1, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), offset);
1078 FAIL_IF(!inst);
1079 *inst = XCHG_r_rm;
1080
1081 SLJIT_ASSERT(args_size != prev_args_size);
1082 } else {
1083 if (word_arg_count >= 4) {
1084 offset -= SSIZE_OF(sw);
1085 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
1086 }
1087
1088 if (args_size != prev_args_size)
1089 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), stack_size - prev_args_size - SSIZE_OF(sw));
1090 }
1091
1092 if (args_size != prev_args_size)
1093 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size - args_size - SSIZE_OF(sw), SLJIT_R2, 0);
1094 } else {
1095 #endif /* SLJIT_X86_32_FASTCALL */
1096 offset = stack_size - SSIZE_OF(sw);
1097 *extra_space = args_size;
1098
1099 if (word_arg_count >= 4 && prev_args_size == SSIZE_OF(sw)) {
1100 offset -= SSIZE_OF(sw);
1101 inst = emit_x86_instruction(compiler, 1, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), offset);
1102 FAIL_IF(!inst);
1103 *inst = XCHG_r_rm;
1104
1105 SLJIT_ASSERT(prev_args_size > 0);
1106 } else {
1107 if (word_arg_count >= 4) {
1108 offset -= SSIZE_OF(sw);
1109 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
1110 }
1111
1112 if (prev_args_size > 0)
1113 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), stack_size - prev_args_size - SSIZE_OF(sw));
1114 }
1115
1116 /* Copy return address. */
1117 if (prev_args_size > 0)
1118 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size - SSIZE_OF(sw), SLJIT_R2, 0);
1119 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1120 }
1121 #endif /* SLJIT_X86_32_FASTCALL */
1122
1123 while (types != 0) {
1124 switch (types & SLJIT_ARG_MASK) {
1125 case SLJIT_ARG_TYPE_F64:
1126 offset -= SSIZE_OF(f64);
1127 FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));
1128 float_arg_count--;
1129 break;
1130 case SLJIT_ARG_TYPE_F32:
1131 offset -= SSIZE_OF(f32);
1132 FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));
1133 float_arg_count--;
1134 break;
1135 default:
1136 switch (word_arg_count) {
1137 case 1:
1138 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1139 if (fast_call) {
1140 EMIT_MOV(compiler, SLJIT_R2, 0, r2_offset != 0 ? SLJIT_MEM1(SLJIT_SP) : SLJIT_R0, 0);
1141 break;
1142 }
1143 #endif
1144 offset -= SSIZE_OF(sw);
1145 if (r2_offset != 0) {
1146 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), 0);
1147 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
1148 } else
1149 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R0, 0);
1150 break;
1151 case 2:
1152 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1153 if (fast_call)
1154 break;
1155 #endif
1156 offset -= SSIZE_OF(sw);
1157 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R1, 0);
1158 break;
1159 case 3:
1160 offset -= SSIZE_OF(sw);
1161 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), r2_offset);
1162 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
1163 break;
1164 }
1165 word_arg_count--;
1166 break;
1167 }
1168 types >>= SLJIT_ARG_SHIFT;
1169 }
1170
1171 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1172 /* Skip return address. */
1173 if (fast_call)
1174 offset -= SSIZE_OF(sw);
1175 #endif
1176
1177 SLJIT_ASSERT(offset >= 0);
1178
1179 if (offset == 0)
1180 return SLJIT_SUCCESS;
1181
1182 BINARY_IMM32(ADD, offset, SLJIT_SP, 0);
1183 return SLJIT_SUCCESS;
1184 }
1185
emit_tail_call_end(struct sljit_compiler * compiler,sljit_s32 extra_space)1186 static sljit_s32 emit_tail_call_end(struct sljit_compiler *compiler, sljit_s32 extra_space)
1187 {
1188 /* Called when stack consumption cannot be reduced to 0. */
1189 sljit_u8 *inst;
1190
1191 BINARY_IMM32(ADD, extra_space, SLJIT_SP, 0);
1192
1193 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1194 FAIL_IF(!inst);
1195 INC_SIZE(1);
1196 RET();
1197
1198 return SLJIT_SUCCESS;
1199 }
1200
sljit_emit_call(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types)1201 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
1202 sljit_s32 arg_types)
1203 {
1204 struct sljit_jump *jump;
1205 sljit_sw stack_size = 0;
1206 sljit_s32 word_arg_count;
1207
1208 CHECK_ERROR_PTR();
1209 CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
1210
1211 if (type & SLJIT_CALL_RETURN) {
1212 stack_size = type;
1213 PTR_FAIL_IF(tail_call_with_args(compiler, &stack_size, arg_types, SLJIT_IMM, 0));
1214
1215 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1216 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1217 compiler->skip_checks = 1;
1218 #endif
1219
1220 if (stack_size == 0) {
1221 type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
1222 return sljit_emit_jump(compiler, type);
1223 }
1224
1225 jump = sljit_emit_jump(compiler, type);
1226 PTR_FAIL_IF(jump == NULL);
1227
1228 PTR_FAIL_IF(emit_tail_call_end(compiler, stack_size));
1229 return jump;
1230 }
1231
1232 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1233 if ((type & 0xff) == SLJIT_CALL) {
1234 stack_size = c_fast_call_get_stack_size(arg_types, &word_arg_count);
1235 PTR_FAIL_IF(c_fast_call_with_args(compiler, arg_types, stack_size, word_arg_count, 0));
1236
1237 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1238 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1239 compiler->skip_checks = 1;
1240 #endif
1241
1242 jump = sljit_emit_jump(compiler, type);
1243 PTR_FAIL_IF(jump == NULL);
1244
1245 PTR_FAIL_IF(post_call_with_args(compiler, arg_types, 0));
1246 return jump;
1247 }
1248 #endif
1249
1250 stack_size = cdecl_call_get_stack_size(compiler, arg_types, &word_arg_count);
1251 PTR_FAIL_IF(cdecl_call_with_args(compiler, arg_types, stack_size, word_arg_count));
1252
1253 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1254 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1255 compiler->skip_checks = 1;
1256 #endif
1257
1258 jump = sljit_emit_jump(compiler, type);
1259 PTR_FAIL_IF(jump == NULL);
1260
1261 PTR_FAIL_IF(post_call_with_args(compiler, arg_types, stack_size));
1262 return jump;
1263 }
1264
sljit_emit_icall(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)1265 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
1266 sljit_s32 arg_types,
1267 sljit_s32 src, sljit_sw srcw)
1268 {
1269 sljit_sw stack_size = 0;
1270 sljit_s32 word_arg_count;
1271 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1272 sljit_s32 swap_args;
1273 #endif
1274
1275 CHECK_ERROR();
1276 CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
1277
1278 if (type & SLJIT_CALL_RETURN) {
1279 stack_size = type;
1280 FAIL_IF(tail_call_with_args(compiler, &stack_size, arg_types, src, srcw));
1281
1282 if (!(src & SLJIT_IMM)) {
1283 src = SLJIT_R0;
1284 srcw = 0;
1285 }
1286
1287 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1288 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1289 compiler->skip_checks = 1;
1290 #endif
1291
1292 if (stack_size == 0)
1293 return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1294
1295 FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
1296 return emit_tail_call_end(compiler, stack_size);
1297 }
1298
1299 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1300 SLJIT_ASSERT(reg_map[SLJIT_R0] == 0 && reg_map[SLJIT_R2] == 1 && SLJIT_R0 == 1 && SLJIT_R2 == 3);
1301
1302 if ((type & 0xff) == SLJIT_CALL) {
1303 stack_size = c_fast_call_get_stack_size(arg_types, &word_arg_count);
1304 swap_args = 0;
1305
1306 if (word_arg_count > 0) {
1307 if ((src & REG_MASK) == SLJIT_R2 || OFFS_REG(src) == SLJIT_R2) {
1308 swap_args = 1;
1309 if (((src & REG_MASK) | 0x2) == SLJIT_R2)
1310 src ^= 0x2;
1311 if ((OFFS_REG(src) | 0x2) == SLJIT_R2)
1312 src ^= TO_OFFS_REG(0x2);
1313 }
1314 }
1315
1316 FAIL_IF(c_fast_call_with_args(compiler, arg_types, stack_size, word_arg_count, swap_args));
1317
1318 compiler->scratches_offset += stack_size;
1319 compiler->locals_offset += stack_size;
1320
1321 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1322 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1323 compiler->skip_checks = 1;
1324 #endif
1325 FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
1326
1327 compiler->scratches_offset -= stack_size;
1328 compiler->locals_offset -= stack_size;
1329
1330 return post_call_with_args(compiler, arg_types, 0);
1331 }
1332 #endif
1333
1334 stack_size = cdecl_call_get_stack_size(compiler, arg_types, &word_arg_count);
1335 FAIL_IF(cdecl_call_with_args(compiler, arg_types, stack_size, word_arg_count));
1336
1337 compiler->scratches_offset += stack_size;
1338 compiler->locals_offset += stack_size;
1339
1340 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1341 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1342 compiler->skip_checks = 1;
1343 #endif
1344 FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
1345
1346 compiler->scratches_offset -= stack_size;
1347 compiler->locals_offset -= stack_size;
1348
1349 return post_call_with_args(compiler, arg_types, stack_size);
1350 }
1351
sljit_emit_fast_enter(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)1352 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
1353 {
1354 sljit_u8 *inst;
1355
1356 CHECK_ERROR();
1357 CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
1358 ADJUST_LOCAL_OFFSET(dst, dstw);
1359
1360 CHECK_EXTRA_REGS(dst, dstw, (void)0);
1361
1362 if (FAST_IS_REG(dst)) {
1363 /* Unused dest is possible here. */
1364 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1365 FAIL_IF(!inst);
1366
1367 INC_SIZE(1);
1368 POP_REG(reg_map[dst]);
1369 return SLJIT_SUCCESS;
1370 }
1371
1372 /* Memory. */
1373 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1374 FAIL_IF(!inst);
1375 *inst++ = POP_rm;
1376 return SLJIT_SUCCESS;
1377 }
1378
emit_fast_return(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)1379 static sljit_s32 emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
1380 {
1381 sljit_u8 *inst;
1382
1383 CHECK_EXTRA_REGS(src, srcw, (void)0);
1384
1385 if (FAST_IS_REG(src)) {
1386 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1);
1387 FAIL_IF(!inst);
1388
1389 INC_SIZE(1 + 1);
1390 PUSH_REG(reg_map[src]);
1391 }
1392 else {
1393 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
1394 FAIL_IF(!inst);
1395 *inst++ = GROUP_FF;
1396 *inst |= PUSH_rm;
1397
1398 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1399 FAIL_IF(!inst);
1400 INC_SIZE(1);
1401 }
1402
1403 RET();
1404 return SLJIT_SUCCESS;
1405 }
1406
skip_frames_before_return(struct sljit_compiler * compiler)1407 static sljit_s32 skip_frames_before_return(struct sljit_compiler *compiler)
1408 {
1409 sljit_sw size;
1410
1411 /* Don't adjust shadow stack if it isn't enabled. */
1412 if (!cpu_has_shadow_stack())
1413 return SLJIT_SUCCESS;
1414
1415 SLJIT_ASSERT(compiler->args_size >= 0);
1416 SLJIT_ASSERT(compiler->local_size > 0);
1417
1418 size = compiler->local_size;
1419 size += (1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0)
1420 + (compiler->saveds <= 3 ? compiler->saveds : 3)) * SSIZE_OF(sw);
1421
1422 return adjust_shadow_stack(compiler, SLJIT_MEM1(SLJIT_SP), size);
1423 }
1424