1 /*
2 * Stack-less Just-In-Time compiler
3 *
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 /* x86 32-bit arch dependent functions. */
28
29 /* --------------------------------------------------------------------- */
30 /* Operators */
31 /* --------------------------------------------------------------------- */
32
emit_do_imm(struct sljit_compiler * compiler,sljit_u8 opcode,sljit_sw imm)33 static sljit_s32 emit_do_imm(struct sljit_compiler *compiler, sljit_u8 opcode, sljit_sw imm)
34 {
35 sljit_u8 *inst;
36
37 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + sizeof(sljit_sw));
38 FAIL_IF(!inst);
39 INC_SIZE(1 + sizeof(sljit_sw));
40 *inst++ = opcode;
41 sljit_unaligned_store_sw(inst, imm);
42 return SLJIT_SUCCESS;
43 }
44
45 /* Size contains the flags as well. */
emit_x86_instruction(struct sljit_compiler * compiler,sljit_uw size,sljit_s32 a,sljit_sw imma,sljit_s32 b,sljit_sw immb)46 static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw size,
47 /* The register or immediate operand. */
48 sljit_s32 a, sljit_sw imma,
49 /* The general operand (not immediate). */
50 sljit_s32 b, sljit_sw immb)
51 {
52 sljit_u8 *inst;
53 sljit_u8 *buf_ptr;
54 sljit_u8 reg_map_b;
55 sljit_uw flags = size;
56 sljit_uw inst_size;
57
58 /* Both cannot be switched on. */
59 SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
60 /* Size flags not allowed for typed instructions. */
61 SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
62 /* Both size flags cannot be switched on. */
63 SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
64 /* SSE2 and immediate is not possible. */
65 SLJIT_ASSERT(a != SLJIT_IMM || !(flags & EX86_SSE2));
66 SLJIT_ASSERT(((flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66))
67 & ((flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) - 1)) == 0);
68 SLJIT_ASSERT((flags & (EX86_VEX_EXT | EX86_REX)) != EX86_VEX_EXT);
69
70 size &= 0xf;
71 /* The mod r/m byte is always present. */
72 inst_size = size + 1;
73
74 if (flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66))
75 inst_size++;
76
77 /* Calculate size of b. */
78 if (b & SLJIT_MEM) {
79 if (!(b & REG_MASK))
80 inst_size += sizeof(sljit_sw);
81 else {
82 if (immb != 0 && !(b & OFFS_REG_MASK)) {
83 /* Immediate operand. */
84 if (immb <= 127 && immb >= -128)
85 inst_size += sizeof(sljit_s8);
86 else
87 inst_size += sizeof(sljit_sw);
88 } else if (reg_map[b & REG_MASK] == 5) {
89 /* Swap registers if possible. */
90 if ((b & OFFS_REG_MASK) && (immb & 0x3) == 0 && reg_map[OFFS_REG(b)] != 5)
91 b = SLJIT_MEM | OFFS_REG(b) | TO_OFFS_REG(b & REG_MASK);
92 else
93 inst_size += sizeof(sljit_s8);
94 }
95
96 if (reg_map[b & REG_MASK] == 4 && !(b & OFFS_REG_MASK))
97 b |= TO_OFFS_REG(SLJIT_SP);
98
99 if (b & OFFS_REG_MASK)
100 inst_size += 1; /* SIB byte. */
101 }
102 }
103
104 /* Calculate size of a. */
105 if (a == SLJIT_IMM) {
106 if (flags & EX86_BIN_INS) {
107 if (imma <= 127 && imma >= -128) {
108 inst_size += 1;
109 flags |= EX86_BYTE_ARG;
110 } else
111 inst_size += 4;
112 } else if (flags & EX86_SHIFT_INS) {
113 SLJIT_ASSERT(imma <= 0x1f);
114 if (imma != 1) {
115 inst_size++;
116 flags |= EX86_BYTE_ARG;
117 }
118 } else if (flags & EX86_BYTE_ARG)
119 inst_size++;
120 else if (flags & EX86_HALF_ARG)
121 inst_size += sizeof(short);
122 else
123 inst_size += sizeof(sljit_sw);
124 } else
125 SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
126
127 inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size);
128 PTR_FAIL_IF(!inst);
129
130 /* Encoding the byte. */
131 INC_SIZE(inst_size);
132 if (flags & EX86_PREF_F2)
133 *inst++ = 0xf2;
134 else if (flags & EX86_PREF_F3)
135 *inst++ = 0xf3;
136 else if (flags & EX86_PREF_66)
137 *inst++ = 0x66;
138
139 buf_ptr = inst + size;
140
141 /* Encode mod/rm byte. */
142 if (!(flags & EX86_SHIFT_INS)) {
143 if ((flags & EX86_BIN_INS) && a == SLJIT_IMM)
144 *inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
145
146 if (a == SLJIT_IMM)
147 *buf_ptr = 0;
148 else if (!(flags & EX86_SSE2_OP1))
149 *buf_ptr = U8(reg_map[a] << 3);
150 else
151 *buf_ptr = U8(freg_map[a] << 3);
152 } else {
153 if (a == SLJIT_IMM) {
154 if (imma == 1)
155 *inst = GROUP_SHIFT_1;
156 else
157 *inst = GROUP_SHIFT_N;
158 } else
159 *inst = GROUP_SHIFT_CL;
160 *buf_ptr = 0;
161 }
162
163 if (!(b & SLJIT_MEM)) {
164 *buf_ptr = U8(*buf_ptr | MOD_REG | (!(flags & EX86_SSE2_OP2) ? reg_map[b] : freg_map[b]));
165 buf_ptr++;
166 } else if (b & REG_MASK) {
167 reg_map_b = reg_map[b & REG_MASK];
168
169 if (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) {
170 if (immb != 0 || reg_map_b == 5) {
171 if (immb <= 127 && immb >= -128)
172 *buf_ptr |= 0x40;
173 else
174 *buf_ptr |= 0x80;
175 }
176
177 if (!(b & OFFS_REG_MASK))
178 *buf_ptr++ |= reg_map_b;
179 else {
180 buf_ptr[0] |= 0x04;
181 buf_ptr[1] = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3));
182 buf_ptr += 2;
183 }
184
185 if (immb != 0 || reg_map_b == 5) {
186 if (immb <= 127 && immb >= -128)
187 *buf_ptr++ = U8(immb); /* 8 bit displacement. */
188 else {
189 sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */
190 buf_ptr += sizeof(sljit_sw);
191 }
192 }
193 } else {
194 if (reg_map_b == 5)
195 *buf_ptr |= 0x40;
196
197 buf_ptr[0] |= 0x04;
198 buf_ptr[1] = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3) | (immb << 6));
199 buf_ptr += 2;
200
201 if (reg_map_b == 5)
202 *buf_ptr++ = 0;
203 }
204 } else {
205 *buf_ptr++ |= 0x05;
206 sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */
207 buf_ptr += sizeof(sljit_sw);
208 }
209
210 if (a == SLJIT_IMM) {
211 if (flags & EX86_BYTE_ARG)
212 *buf_ptr = U8(imma);
213 else if (flags & EX86_HALF_ARG)
214 sljit_unaligned_store_s16(buf_ptr, (sljit_s16)imma);
215 else if (!(flags & EX86_SHIFT_INS))
216 sljit_unaligned_store_sw(buf_ptr, imma);
217 }
218
219 return inst;
220 }
221
emit_vex_instruction(struct sljit_compiler * compiler,sljit_uw op,sljit_s32 a,sljit_s32 v,sljit_s32 b,sljit_sw immb)222 static sljit_s32 emit_vex_instruction(struct sljit_compiler *compiler, sljit_uw op,
223 /* The first and second register operand. */
224 sljit_s32 a, sljit_s32 v,
225 /* The general operand (not immediate). */
226 sljit_s32 b, sljit_sw immb)
227 {
228 sljit_u8 *inst;
229 sljit_u8 vex = 0;
230 sljit_u8 vex_m = 0;
231 sljit_uw size;
232
233 SLJIT_ASSERT(((op & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66))
234 & ((op & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) - 1)) == 0);
235
236 if (op & VEX_OP_0F38)
237 vex_m = 0x2;
238 else if (op & VEX_OP_0F3A)
239 vex_m = 0x3;
240
241 if (op & VEX_W) {
242 if (vex_m == 0)
243 vex_m = 0x1;
244
245 vex |= 0x80;
246 }
247
248 if (op & EX86_PREF_66)
249 vex |= 0x1;
250 else if (op & EX86_PREF_F2)
251 vex |= 0x3;
252 else if (op & EX86_PREF_F3)
253 vex |= 0x2;
254
255 op &= ~(EX86_PREF_66 | EX86_PREF_F2 | EX86_PREF_F3);
256
257 if (op & VEX_256)
258 vex |= 0x4;
259
260 vex = U8(vex | ((((op & VEX_SSE2_OPV) ? freg_map[v] : reg_map[v]) ^ 0xf) << 3));
261
262 size = op & ~(sljit_uw)0xff;
263 size |= (vex_m == 0) ? 3 : 4;
264
265 inst = emit_x86_instruction(compiler, size, a, 0, b, immb);
266 FAIL_IF(!inst);
267
268 if (vex_m == 0) {
269 inst[0] = 0xc5;
270 inst[1] = U8(vex | 0x80);
271 inst[2] = U8(op);
272 return SLJIT_SUCCESS;
273 }
274
275 inst[0] = 0xc4;
276 inst[1] = U8(vex_m | 0xe0);
277 inst[2] = vex;
278 inst[3] = U8(op);
279 return SLJIT_SUCCESS;
280 }
281
282 /* --------------------------------------------------------------------- */
283 /* Enter / return */
284 /* --------------------------------------------------------------------- */
285
generate_far_jump_code(struct sljit_jump * jump,sljit_u8 * code_ptr,sljit_sw executable_offset)286 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset)
287 {
288 sljit_uw type = jump->flags >> TYPE_SHIFT;
289
290 if (type == SLJIT_JUMP) {
291 *code_ptr++ = JMP_i32;
292 jump->addr++;
293 }
294 else if (type >= SLJIT_FAST_CALL) {
295 *code_ptr++ = CALL_i32;
296 jump->addr++;
297 }
298 else {
299 *code_ptr++ = GROUP_0F;
300 *code_ptr++ = get_jump_code(type);
301 jump->addr += 2;
302 }
303
304 if (jump->flags & JUMP_LABEL)
305 jump->flags |= PATCH_MW;
306 else
307 sljit_unaligned_store_sw(code_ptr, (sljit_sw)(jump->u.target - (jump->addr + 4) - (sljit_uw)executable_offset));
308 code_ptr += 4;
309
310 return code_ptr;
311 }
312
313 #define ENTER_TMP_TO_R4 0x00001
314 #define ENTER_TMP_TO_S 0x00002
315
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)316 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
317 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
318 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
319 {
320 sljit_s32 word_arg_count, saved_arg_count, float_arg_count;
321 sljit_s32 size, args_size, types, status;
322 sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(options);
323 sljit_u8 *inst;
324 #ifdef _WIN32
325 sljit_s32 r2_offset = -1;
326 #endif
327
328 CHECK_ERROR();
329 CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
330 set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
331
332 /* Emit ENDBR32 at function entry if needed. */
333 FAIL_IF(emit_endbranch(compiler));
334
335 SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start);
336
337 arg_types >>= SLJIT_ARG_SHIFT;
338 word_arg_count = 0;
339 status = 0;
340
341 if (options & SLJIT_ENTER_REG_ARG) {
342 args_size = 3 * SSIZE_OF(sw);
343
344 while (arg_types) {
345 if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
346 word_arg_count++;
347 if (word_arg_count >= 4)
348 status |= ENTER_TMP_TO_R4;
349 }
350
351 arg_types >>= SLJIT_ARG_SHIFT;
352 }
353
354 compiler->args_size = 0;
355 } else {
356 types = arg_types;
357 saved_arg_count = 0;
358 float_arg_count = 0;
359 args_size = SSIZE_OF(sw);
360 while (types) {
361 switch (types & SLJIT_ARG_MASK) {
362 case SLJIT_ARG_TYPE_F64:
363 float_arg_count++;
364 FAIL_IF(emit_sse2_load(compiler, 0, float_arg_count, SLJIT_MEM1(SLJIT_SP), args_size));
365 args_size += SSIZE_OF(f64);
366 break;
367 case SLJIT_ARG_TYPE_F32:
368 float_arg_count++;
369 FAIL_IF(emit_sse2_load(compiler, 1, float_arg_count, SLJIT_MEM1(SLJIT_SP), args_size));
370 args_size += SSIZE_OF(f32);
371 break;
372 default:
373 word_arg_count++;
374
375 if (!(types & SLJIT_ARG_TYPE_SCRATCH_REG))
376 saved_arg_count++;
377
378 if (word_arg_count == 4) {
379 if (types & SLJIT_ARG_TYPE_SCRATCH_REG) {
380 status |= ENTER_TMP_TO_R4;
381 arg_types &= ~(SLJIT_ARG_FULL_MASK << 3 * SLJIT_ARG_SHIFT);
382 } else if (saved_arg_count == 4) {
383 status |= ENTER_TMP_TO_S;
384 arg_types &= ~(SLJIT_ARG_FULL_MASK << 3 * SLJIT_ARG_SHIFT);
385 }
386 }
387
388 args_size += SSIZE_OF(sw);
389 break;
390 }
391 types >>= SLJIT_ARG_SHIFT;
392 }
393
394 args_size -= SSIZE_OF(sw);
395 compiler->args_size = args_size;
396 }
397
398 size = (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3) - kept_saveds_count;
399 if (!(options & SLJIT_ENTER_REG_ARG))
400 size++;
401
402 if (size != 0) {
403 inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(size + 1));
404 FAIL_IF(!inst);
405
406 INC_SIZE((sljit_uw)size);
407
408 if (!(options & SLJIT_ENTER_REG_ARG))
409 PUSH_REG(reg_map[TMP_REG1]);
410
411 if ((saveds > 2 && kept_saveds_count <= 2) || scratches > 9)
412 PUSH_REG(reg_map[SLJIT_S2]);
413 if ((saveds > 1 && kept_saveds_count <= 1) || scratches > 10)
414 PUSH_REG(reg_map[SLJIT_S1]);
415 if ((saveds > 0 && kept_saveds_count == 0) || scratches > 11)
416 PUSH_REG(reg_map[SLJIT_S0]);
417
418 size *= SSIZE_OF(sw);
419 }
420
421 if (status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S))
422 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), args_size + size);
423
424 size += SSIZE_OF(sw);
425
426 local_size = ((SLJIT_LOCALS_OFFSET_BASE + local_size + size + 0xf) & ~0xf) - size;
427 compiler->local_size = local_size;
428
429 word_arg_count = 0;
430 saved_arg_count = 0;
431 args_size = size;
432 while (arg_types) {
433 switch (arg_types & SLJIT_ARG_MASK) {
434 case SLJIT_ARG_TYPE_F64:
435 args_size += SSIZE_OF(f64);
436 break;
437 case SLJIT_ARG_TYPE_F32:
438 args_size += SSIZE_OF(f32);
439 break;
440 default:
441 word_arg_count++;
442 SLJIT_ASSERT(word_arg_count <= 3 || (word_arg_count == 4 && !(status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S))));
443
444 if (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) {
445 #ifdef _WIN32
446 if (word_arg_count == 3 && local_size > 4 * 4096)
447 r2_offset = local_size + args_size;
448 else
449 #endif
450 EMIT_MOV(compiler, word_arg_count, 0, SLJIT_MEM1(SLJIT_SP), args_size);
451
452 } else {
453 EMIT_MOV(compiler, SLJIT_S0 - saved_arg_count, 0, SLJIT_MEM1(SLJIT_SP), args_size);
454 saved_arg_count++;
455 }
456
457 args_size += SSIZE_OF(sw);
458 break;
459 }
460 arg_types >>= SLJIT_ARG_SHIFT;
461 }
462
463 SLJIT_ASSERT(SLJIT_LOCALS_OFFSET > 0);
464
465 #ifdef _WIN32
466 SLJIT_ASSERT(r2_offset == -1 || local_size > 4 * 4096);
467
468 if (local_size > 4096) {
469 if (local_size <= 4 * 4096) {
470 BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096);
471
472 if (local_size > 2 * 4096)
473 BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2);
474 if (local_size > 3 * 4096)
475 BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);
476 }
477 else {
478 if (options & SLJIT_ENTER_REG_ARG) {
479 SLJIT_ASSERT(r2_offset == -1);
480
481 inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(1 + 1));
482 FAIL_IF(!inst);
483 INC_SIZE(1);
484 PUSH_REG(reg_map[SLJIT_R2]);
485
486 local_size -= SSIZE_OF(sw);
487 r2_offset = local_size;
488 }
489
490 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_IMM, local_size >> 12);
491
492 BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096);
493 BINARY_IMM32(SUB, 4096, SLJIT_SP, 0);
494
495 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
496 FAIL_IF(!inst);
497
498 INC_SIZE(2);
499 inst[0] = LOOP_i8;
500 inst[1] = (sljit_u8)-16;
501 local_size &= 0xfff;
502 }
503 }
504
505 if (local_size > 0) {
506 BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -local_size);
507 BINARY_IMM32(SUB, local_size, SLJIT_SP, 0);
508 }
509
510 if (r2_offset != -1)
511 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), r2_offset);
512
513 #else /* !_WIN32 */
514
515 SLJIT_ASSERT(local_size > 0);
516
517 BINARY_IMM32(SUB, local_size, SLJIT_SP, 0);
518
519 #endif /* _WIN32 */
520
521 size = SLJIT_LOCALS_OFFSET_BASE - SSIZE_OF(sw);
522 kept_saveds_count = SLJIT_R3 - kept_saveds_count;
523
524 while (saved_arg_count > 3) {
525 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), size, kept_saveds_count, 0);
526 kept_saveds_count++;
527 size -= SSIZE_OF(sw);
528 saved_arg_count--;
529 }
530
531 if (status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S)) {
532 if (status & ENTER_TMP_TO_R4)
533 size = 2 * SSIZE_OF(sw);
534
535 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), size, TMP_REG1, 0);
536 }
537
538 return SLJIT_SUCCESS;
539 }
540
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)541 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
542 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
543 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
544 {
545 sljit_s32 args_size;
546
547 CHECK_ERROR();
548 CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
549 set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
550
551 arg_types >>= SLJIT_ARG_SHIFT;
552 args_size = 0;
553
554 if (!(options & SLJIT_ENTER_REG_ARG)) {
555 while (arg_types) {
556 switch (arg_types & SLJIT_ARG_MASK) {
557 case SLJIT_ARG_TYPE_F64:
558 args_size += SSIZE_OF(f64);
559 break;
560 case SLJIT_ARG_TYPE_F32:
561 args_size += SSIZE_OF(f32);
562 break;
563 default:
564 args_size += SSIZE_OF(sw);
565 break;
566 }
567 arg_types >>= SLJIT_ARG_SHIFT;
568 }
569 }
570
571 compiler->args_size = args_size;
572
573 /* [esp+0] for saving temporaries and for function calls. */
574
575 saveds = (1 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3) - SLJIT_KEPT_SAVEDS_COUNT(options)) * SSIZE_OF(sw);
576
577 /* Saving ebp. */
578 if (!(options & SLJIT_ENTER_REG_ARG))
579 saveds += SSIZE_OF(sw);
580
581 compiler->local_size = ((SLJIT_LOCALS_OFFSET_BASE + local_size + saveds + 0xf) & ~0xf) - saveds;
582 return SLJIT_SUCCESS;
583 }
584
emit_stack_frame_release(struct sljit_compiler * compiler,sljit_s32 is_return_to)585 static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to)
586 {
587 sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
588 sljit_s32 local_size, saveds;
589 sljit_uw size;
590 sljit_u8 *inst;
591
592 size = (sljit_uw)((compiler->scratches > 9 ? (compiler->scratches - 9) : 0) +
593 (compiler->saveds <= 3 ? compiler->saveds : 3) - kept_saveds_count);
594
595 local_size = compiler->local_size;
596
597 if (!(compiler->options & SLJIT_ENTER_REG_ARG))
598 size++;
599 else if (is_return_to && size == 0) {
600 local_size += SSIZE_OF(sw);
601 is_return_to = 0;
602 }
603
604 if (local_size > 0)
605 BINARY_IMM32(ADD, local_size, SLJIT_SP, 0);
606
607 if (size == 0)
608 return SLJIT_SUCCESS;
609
610 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
611 FAIL_IF(!inst);
612
613 INC_SIZE(size);
614
615 saveds = compiler->saveds;
616
617 if ((saveds > 0 && kept_saveds_count == 0) || compiler->scratches > 11)
618 POP_REG(reg_map[SLJIT_S0]);
619 if ((saveds > 1 && kept_saveds_count <= 1) || compiler->scratches > 10)
620 POP_REG(reg_map[SLJIT_S1]);
621 if ((saveds > 2 && kept_saveds_count <= 2) || compiler->scratches > 9)
622 POP_REG(reg_map[SLJIT_S2]);
623
624 if (!(compiler->options & SLJIT_ENTER_REG_ARG))
625 POP_REG(reg_map[TMP_REG1]);
626
627 if (is_return_to)
628 BINARY_IMM32(ADD, sizeof(sljit_sw), SLJIT_SP, 0);
629
630 return SLJIT_SUCCESS;
631 }
632
sljit_emit_return_void(struct sljit_compiler * compiler)633 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
634 {
635 CHECK_ERROR();
636 CHECK(check_sljit_emit_return_void(compiler));
637
638 SLJIT_ASSERT(compiler->args_size >= 0);
639 SLJIT_ASSERT(compiler->local_size > 0);
640
641 FAIL_IF(emit_stack_frame_release(compiler, 0));
642
643 return emit_byte(compiler, RET_near);
644 }
645
sljit_emit_return_to(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)646 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
647 sljit_s32 src, sljit_sw srcw)
648 {
649 sljit_s32 src_r;
650
651 CHECK_ERROR();
652 CHECK(check_sljit_emit_return_to(compiler, src, srcw));
653
654 if ((src & SLJIT_MEM) || (src > SLJIT_R2 && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options)))) {
655 ADJUST_LOCAL_OFFSET(src, srcw);
656 CHECK_EXTRA_REGS(src, srcw, (void)0);
657
658 src_r = (compiler->options & SLJIT_ENTER_REG_ARG) ? TMP_REG1 : SLJIT_R1;
659
660 EMIT_MOV(compiler, src_r, 0, src, srcw);
661 src = src_r;
662 srcw = 0;
663 }
664
665 FAIL_IF(emit_stack_frame_release(compiler, 1));
666
667 SLJIT_SKIP_CHECKS(compiler);
668 return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
669 }
670
671 /* --------------------------------------------------------------------- */
672 /* Call / return instructions */
673 /* --------------------------------------------------------------------- */
674
call_get_stack_size(sljit_s32 arg_types,sljit_s32 * word_arg_count_ptr)675 static sljit_s32 call_get_stack_size(sljit_s32 arg_types, sljit_s32 *word_arg_count_ptr)
676 {
677 sljit_sw stack_size = 0;
678 sljit_s32 word_arg_count = 0;
679
680 arg_types >>= SLJIT_ARG_SHIFT;
681
682 while (arg_types) {
683 switch (arg_types & SLJIT_ARG_MASK) {
684 case SLJIT_ARG_TYPE_F64:
685 stack_size += SSIZE_OF(f64);
686 break;
687 case SLJIT_ARG_TYPE_F32:
688 stack_size += SSIZE_OF(f32);
689 break;
690 default:
691 word_arg_count++;
692 stack_size += SSIZE_OF(sw);
693 break;
694 }
695
696 arg_types >>= SLJIT_ARG_SHIFT;
697 }
698
699 if (word_arg_count_ptr)
700 *word_arg_count_ptr = word_arg_count;
701
702 if (stack_size <= 4 * SSIZE_OF(sw))
703 return 0;
704
705 return ((stack_size - (4 * SSIZE_OF(sw)) + 0xf) & ~0xf);
706 }
707
call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types,sljit_sw stack_size,sljit_s32 word_arg_count,sljit_s32 keep_tmp1)708 static sljit_s32 call_with_args(struct sljit_compiler *compiler,
709 sljit_s32 arg_types, sljit_sw stack_size, sljit_s32 word_arg_count, sljit_s32 keep_tmp1)
710 {
711 sljit_s32 float_arg_count = 0, arg4_reg = 0, arg_offset;
712 sljit_u8 *inst;
713
714 if (word_arg_count >= 4) {
715 arg4_reg = SLJIT_R0;
716
717 if (!keep_tmp1) {
718 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), 2 * SSIZE_OF(sw));
719 arg4_reg = TMP_REG1;
720 }
721 }
722
723 if (stack_size > 0)
724 BINARY_IMM32(SUB, stack_size, SLJIT_SP, 0);
725
726 arg_offset = 0;
727 word_arg_count = 0;
728 arg_types >>= SLJIT_ARG_SHIFT;
729
730 while (arg_types) {
731 switch (arg_types & SLJIT_ARG_MASK) {
732 case SLJIT_ARG_TYPE_F64:
733 float_arg_count++;
734 FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), arg_offset, float_arg_count));
735 arg_offset += SSIZE_OF(f64);
736 break;
737 case SLJIT_ARG_TYPE_F32:
738 float_arg_count++;
739 FAIL_IF(emit_sse2_store(compiler, 1, SLJIT_MEM1(SLJIT_SP), arg_offset, float_arg_count));
740 arg_offset += SSIZE_OF(f32);
741 break;
742 default:
743 word_arg_count++;
744 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), arg_offset, (word_arg_count >= 4) ? arg4_reg : word_arg_count, 0);
745
746 if (word_arg_count == 1 && arg4_reg == SLJIT_R0)
747 EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), 2 * SSIZE_OF(sw) + stack_size);
748
749 arg_offset += SSIZE_OF(sw);
750 break;
751 }
752
753 arg_types >>= SLJIT_ARG_SHIFT;
754 }
755
756 return SLJIT_SUCCESS;
757 }
758
post_call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types,sljit_s32 stack_size)759 static sljit_s32 post_call_with_args(struct sljit_compiler *compiler,
760 sljit_s32 arg_types, sljit_s32 stack_size)
761 {
762 sljit_u8 *inst;
763 sljit_s32 single;
764
765 if (stack_size > 0)
766 BINARY_IMM32(ADD, stack_size, SLJIT_SP, 0);
767
768 if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64)
769 return SLJIT_SUCCESS;
770
771 single = ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32);
772
773 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
774 FAIL_IF(!inst);
775 INC_SIZE(3);
776 inst[0] = single ? FSTPS : FSTPD;
777 inst[1] = (0x03 << 3) | 0x04;
778 inst[2] = (0x04 << 3) | reg_map[SLJIT_SP];
779
780 return emit_sse2_load(compiler, single, SLJIT_FR0, SLJIT_MEM1(SLJIT_SP), 0);
781 }
782
tail_call_with_args(struct sljit_compiler * compiler,sljit_s32 * extra_space,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)783 static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler,
784 sljit_s32 *extra_space, sljit_s32 arg_types,
785 sljit_s32 src, sljit_sw srcw)
786 {
787 sljit_sw args_size, saved_regs_size;
788 sljit_sw types, word_arg_count, float_arg_count;
789 sljit_sw stack_size, prev_stack_size, min_size, offset;
790 sljit_sw word_arg4_offset;
791 sljit_u8 r2_offset = 0;
792 sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
793 sljit_u8* inst;
794
795 ADJUST_LOCAL_OFFSET(src, srcw);
796 CHECK_EXTRA_REGS(src, srcw, (void)0);
797
798 saved_regs_size = (1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0)
799 + (compiler->saveds <= 3 ? compiler->saveds : 3) - kept_saveds_count) * SSIZE_OF(sw);
800
801 word_arg_count = 0;
802 float_arg_count = 0;
803 arg_types >>= SLJIT_ARG_SHIFT;
804 types = 0;
805 args_size = 0;
806
807 while (arg_types != 0) {
808 types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK);
809
810 switch (arg_types & SLJIT_ARG_MASK) {
811 case SLJIT_ARG_TYPE_F64:
812 args_size += SSIZE_OF(f64);
813 float_arg_count++;
814 break;
815 case SLJIT_ARG_TYPE_F32:
816 args_size += SSIZE_OF(f32);
817 float_arg_count++;
818 break;
819 default:
820 word_arg_count++;
821 args_size += SSIZE_OF(sw);
822 break;
823 }
824 arg_types >>= SLJIT_ARG_SHIFT;
825 }
826
827 if (args_size <= compiler->args_size) {
828 *extra_space = 0;
829 stack_size = args_size + SSIZE_OF(sw) + saved_regs_size;
830
831 offset = stack_size + compiler->local_size;
832
833 if (src != SLJIT_IMM && src != SLJIT_R0) {
834 if (word_arg_count >= 1) {
835 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R0, 0);
836 r2_offset = sizeof(sljit_sw);
837 }
838 EMIT_MOV(compiler, SLJIT_R0, 0, src, srcw);
839 }
840
841 while (types != 0) {
842 switch (types & SLJIT_ARG_MASK) {
843 case SLJIT_ARG_TYPE_F64:
844 offset -= SSIZE_OF(f64);
845 FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));
846 float_arg_count--;
847 break;
848 case SLJIT_ARG_TYPE_F32:
849 offset -= SSIZE_OF(f32);
850 FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));
851 float_arg_count--;
852 break;
853 default:
854 switch (word_arg_count) {
855 case 1:
856 offset -= SSIZE_OF(sw);
857 if (r2_offset != 0) {
858 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), 0);
859 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
860 } else
861 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R0, 0);
862 break;
863 case 2:
864 offset -= SSIZE_OF(sw);
865 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R1, 0);
866 break;
867 case 3:
868 offset -= SSIZE_OF(sw);
869 break;
870 case 4:
871 offset -= SSIZE_OF(sw);
872 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), 2 * SSIZE_OF(sw));
873 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
874 break;
875 }
876 word_arg_count--;
877 break;
878 }
879 types >>= SLJIT_ARG_SHIFT;
880 }
881
882 return emit_stack_frame_release(compiler, 0);
883 }
884
885 stack_size = args_size + SSIZE_OF(sw);
886
887 if (word_arg_count >= 1 && src != SLJIT_IMM && src != SLJIT_R0) {
888 r2_offset = SSIZE_OF(sw);
889 stack_size += SSIZE_OF(sw);
890 }
891
892 if (word_arg_count >= 3)
893 stack_size += SSIZE_OF(sw);
894
895 prev_stack_size = SSIZE_OF(sw) + saved_regs_size;
896 min_size = prev_stack_size + compiler->local_size;
897
898 word_arg4_offset = 2 * SSIZE_OF(sw);
899
900 if (stack_size > min_size) {
901 BINARY_IMM32(SUB, stack_size - min_size, SLJIT_SP, 0);
902 if (src == SLJIT_MEM1(SLJIT_SP))
903 srcw += stack_size - min_size;
904 word_arg4_offset += stack_size - min_size;
905 }
906 else
907 stack_size = min_size;
908
909 if (word_arg_count >= 3) {
910 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), r2_offset, SLJIT_R2, 0);
911
912 if (word_arg_count >= 4)
913 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), word_arg4_offset);
914 }
915
916 if (src != SLJIT_IMM && src != SLJIT_R0) {
917 if (word_arg_count >= 1) {
918 SLJIT_ASSERT(r2_offset == sizeof(sljit_sw));
919 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R0, 0);
920 }
921 EMIT_MOV(compiler, SLJIT_R0, 0, src, srcw);
922 }
923
924 /* Restore saved registers. */
925 offset = stack_size - 2 * SSIZE_OF(sw);
926 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), offset);
927
928 if (compiler->saveds > 2 || compiler->scratches > 9) {
929 offset -= SSIZE_OF(sw);
930 EMIT_MOV(compiler, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), offset);
931 }
932 if ((compiler->saveds > 1 && kept_saveds_count <= 1) || compiler->scratches > 10) {
933 offset -= SSIZE_OF(sw);
934 EMIT_MOV(compiler, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_SP), offset);
935 }
936 if ((compiler->saveds > 0 && kept_saveds_count == 0) || compiler->scratches > 11) {
937 offset -= SSIZE_OF(sw);
938 EMIT_MOV(compiler, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), offset);
939 }
940
941 /* Copy fourth argument and return address. */
942 offset = stack_size - SSIZE_OF(sw);
943 *extra_space = args_size;
944
945 if (word_arg_count >= 4) {
946 offset -= SSIZE_OF(sw);
947 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
948 }
949
950 while (types != 0) {
951 switch (types & SLJIT_ARG_MASK) {
952 case SLJIT_ARG_TYPE_F64:
953 offset -= SSIZE_OF(f64);
954 FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));
955 float_arg_count--;
956 break;
957 case SLJIT_ARG_TYPE_F32:
958 offset -= SSIZE_OF(f32);
959 FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));
960 float_arg_count--;
961 break;
962 default:
963 switch (word_arg_count) {
964 case 1:
965 offset -= SSIZE_OF(sw);
966 if (r2_offset != 0) {
967 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), 0);
968 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
969 } else
970 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R0, 0);
971 break;
972 case 2:
973 offset -= SSIZE_OF(sw);
974 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R1, 0);
975 break;
976 case 3:
977 offset -= SSIZE_OF(sw);
978 EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), r2_offset);
979 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
980 break;
981 }
982 word_arg_count--;
983 break;
984 }
985 types >>= SLJIT_ARG_SHIFT;
986 }
987
988 SLJIT_ASSERT(offset >= 0);
989
990 if (offset == 0)
991 return SLJIT_SUCCESS;
992
993 BINARY_IMM32(ADD, offset, SLJIT_SP, 0);
994 return SLJIT_SUCCESS;
995 }
996
emit_tail_call_end(struct sljit_compiler * compiler,sljit_s32 extra_space)997 static sljit_s32 emit_tail_call_end(struct sljit_compiler *compiler, sljit_s32 extra_space)
998 {
999 /* Called when stack consumption cannot be reduced to 0. */
1000 sljit_u8 *inst;
1001
1002 BINARY_IMM32(ADD, extra_space, SLJIT_SP, 0);
1003 return emit_byte(compiler, RET_near);
1004 }
1005
tail_call_reg_arg_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types)1006 static sljit_s32 tail_call_reg_arg_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)
1007 {
1008 sljit_s32 word_arg_count = 0;
1009 sljit_s32 kept_saveds_count, offset;
1010
1011 arg_types >>= SLJIT_ARG_SHIFT;
1012
1013 while (arg_types) {
1014 if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64)
1015 word_arg_count++;
1016
1017 arg_types >>= SLJIT_ARG_SHIFT;
1018 }
1019
1020 if (word_arg_count < 4)
1021 return SLJIT_SUCCESS;
1022
1023 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), 2 * SSIZE_OF(sw));
1024
1025 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
1026 offset = compiler->local_size + 3 * SSIZE_OF(sw);
1027
1028 if ((compiler->saveds > 0 && kept_saveds_count == 0) || compiler->scratches > 11)
1029 offset += SSIZE_OF(sw);
1030 if ((compiler->saveds > 1 && kept_saveds_count <= 1) || compiler->scratches > 10)
1031 offset += SSIZE_OF(sw);
1032 if ((compiler->saveds > 2 && kept_saveds_count <= 2) || compiler->scratches > 9)
1033 offset += SSIZE_OF(sw);
1034
1035 return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), offset, TMP_REG1, 0);
1036 }
1037
sljit_emit_call(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types)1038 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
1039 sljit_s32 arg_types)
1040 {
1041 struct sljit_jump *jump;
1042 sljit_sw stack_size = 0;
1043 sljit_s32 word_arg_count;
1044
1045 CHECK_ERROR_PTR();
1046 CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
1047
1048 if (type & SLJIT_CALL_RETURN) {
1049 if ((type & 0xff) == SLJIT_CALL_REG_ARG) {
1050 PTR_FAIL_IF(tail_call_reg_arg_with_args(compiler, arg_types));
1051 PTR_FAIL_IF(emit_stack_frame_release(compiler, 0));
1052
1053 SLJIT_SKIP_CHECKS(compiler);
1054 return sljit_emit_jump(compiler, SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP));
1055 }
1056
1057 stack_size = type;
1058 PTR_FAIL_IF(tail_call_with_args(compiler, &stack_size, arg_types, SLJIT_IMM, 0));
1059
1060 SLJIT_SKIP_CHECKS(compiler);
1061
1062 if (stack_size == 0)
1063 return sljit_emit_jump(compiler, SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP));
1064
1065 jump = sljit_emit_jump(compiler, type);
1066 PTR_FAIL_IF(jump == NULL);
1067
1068 PTR_FAIL_IF(emit_tail_call_end(compiler, stack_size));
1069 return jump;
1070 }
1071
1072 if ((type & 0xff) == SLJIT_CALL_REG_ARG) {
1073 SLJIT_SKIP_CHECKS(compiler);
1074 return sljit_emit_jump(compiler, type);
1075 }
1076
1077 stack_size = call_get_stack_size(arg_types, &word_arg_count);
1078 PTR_FAIL_IF(call_with_args(compiler, arg_types, stack_size, word_arg_count, 0));
1079
1080 SLJIT_SKIP_CHECKS(compiler);
1081 jump = sljit_emit_jump(compiler, type);
1082 PTR_FAIL_IF(jump == NULL);
1083
1084 PTR_FAIL_IF(post_call_with_args(compiler, arg_types, stack_size));
1085 return jump;
1086 }
1087
sljit_emit_icall(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)1088 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
1089 sljit_s32 arg_types,
1090 sljit_s32 src, sljit_sw srcw)
1091 {
1092 sljit_sw stack_size = 0;
1093 sljit_s32 word_arg_count;
1094
1095 CHECK_ERROR();
1096 CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
1097
1098 if (type & SLJIT_CALL_RETURN) {
1099 if ((type & 0xff) == SLJIT_CALL_REG_ARG) {
1100 FAIL_IF(tail_call_reg_arg_with_args(compiler, arg_types));
1101
1102 if ((src & SLJIT_MEM) || (src > SLJIT_R2 && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options)))) {
1103 ADJUST_LOCAL_OFFSET(src, srcw);
1104 CHECK_EXTRA_REGS(src, srcw, (void)0);
1105
1106 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1107 src = TMP_REG1;
1108 srcw = 0;
1109 }
1110
1111 FAIL_IF(emit_stack_frame_release(compiler, 0));
1112
1113 SLJIT_SKIP_CHECKS(compiler);
1114 return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1115 }
1116
1117 stack_size = type;
1118 FAIL_IF(tail_call_with_args(compiler, &stack_size, arg_types, src, srcw));
1119
1120 if (src != SLJIT_IMM) {
1121 src = SLJIT_R0;
1122 srcw = 0;
1123 }
1124
1125 SLJIT_SKIP_CHECKS(compiler);
1126
1127 if (stack_size == 0)
1128 return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1129
1130 FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
1131 return emit_tail_call_end(compiler, stack_size);
1132 }
1133
1134 if ((type & 0xff) == SLJIT_CALL_REG_ARG) {
1135 SLJIT_SKIP_CHECKS(compiler);
1136 return sljit_emit_ijump(compiler, type, src, srcw);
1137 }
1138
1139 ADJUST_LOCAL_OFFSET(src, srcw);
1140 CHECK_EXTRA_REGS(src, srcw, (void)0);
1141
1142 if (src & SLJIT_MEM) {
1143 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1144 src = TMP_REG1;
1145 srcw = 0;
1146 }
1147
1148 stack_size = call_get_stack_size(arg_types, &word_arg_count);
1149 FAIL_IF(call_with_args(compiler, arg_types, stack_size, word_arg_count, src == TMP_REG1));
1150
1151 if (stack_size > 0 && src == SLJIT_MEM1(SLJIT_SP))
1152 srcw += stack_size;
1153
1154 SLJIT_SKIP_CHECKS(compiler);
1155 FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
1156
1157 return post_call_with_args(compiler, arg_types, stack_size);
1158 }
1159
emit_fmov_before_return(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)1160 static SLJIT_INLINE sljit_s32 emit_fmov_before_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
1161 {
1162 sljit_u8* inst;
1163
1164 if (compiler->options & SLJIT_ENTER_REG_ARG) {
1165 if (src == SLJIT_FR0)
1166 return SLJIT_SUCCESS;
1167
1168 SLJIT_SKIP_CHECKS(compiler);
1169 return sljit_emit_fop1(compiler, op, SLJIT_RETURN_FREG, 0, src, srcw);
1170 }
1171
1172 if (FAST_IS_REG(src)) {
1173 FAIL_IF(emit_sse2_store(compiler, op & SLJIT_32, SLJIT_MEM1(SLJIT_SP), 0, src));
1174
1175 src = SLJIT_MEM1(SLJIT_SP);
1176 srcw = 0;
1177 } else {
1178 ADJUST_LOCAL_OFFSET(src, srcw);
1179 }
1180
1181 inst = emit_x86_instruction(compiler, 1 | EX86_SSE2_OP1, 0, 0, src, srcw);
1182 *inst = (op & SLJIT_32) ? FLDS : FLDL;
1183
1184 return SLJIT_SUCCESS;
1185 }
1186
emit_fast_enter(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)1187 static sljit_s32 emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
1188 {
1189 sljit_u8 *inst;
1190
1191 CHECK_EXTRA_REGS(dst, dstw, (void)0);
1192
1193 /* Unused dest is possible here. */
1194 if (FAST_IS_REG(dst))
1195 return emit_byte(compiler, U8(POP_r + reg_map[dst]));
1196
1197 /* Memory. */
1198 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1199 FAIL_IF(!inst);
1200 *inst = POP_rm;
1201 return SLJIT_SUCCESS;
1202 }
1203
emit_fast_return(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)1204 static sljit_s32 emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
1205 {
1206 sljit_u8 *inst;
1207
1208 CHECK_EXTRA_REGS(src, srcw, (void)0);
1209
1210 if (FAST_IS_REG(src)) {
1211 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1);
1212 FAIL_IF(!inst);
1213
1214 INC_SIZE(1 + 1);
1215 PUSH_REG(reg_map[src]);
1216 }
1217 else {
1218 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
1219 FAIL_IF(!inst);
1220 inst[0] = GROUP_FF;
1221 inst[1] |= PUSH_rm;
1222
1223 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1224 FAIL_IF(!inst);
1225 INC_SIZE(1);
1226 }
1227
1228 RET();
1229 return SLJIT_SUCCESS;
1230 }
1231
sljit_emit_get_return_address(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)1232 static sljit_s32 sljit_emit_get_return_address(struct sljit_compiler *compiler,
1233 sljit_s32 dst, sljit_sw dstw)
1234 {
1235 sljit_s32 options = compiler->options;
1236 sljit_s32 saveds = compiler->saveds;
1237 sljit_s32 scratches = compiler->scratches;
1238
1239 saveds = ((scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3) - SLJIT_KEPT_SAVEDS_COUNT(options)) * SSIZE_OF(sw);
1240
1241 /* Saving ebp. */
1242 if (!(options & SLJIT_ENTER_REG_ARG))
1243 saveds += SSIZE_OF(sw);
1244
1245 return emit_mov(compiler, dst, dstw, SLJIT_MEM1(SLJIT_SP), compiler->local_size + saveds);
1246 }
1247
1248 /* --------------------------------------------------------------------- */
1249 /* Other operations */
1250 /* --------------------------------------------------------------------- */
1251
sljit_emit_mem(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)1252 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
1253 sljit_s32 reg,
1254 sljit_s32 mem, sljit_sw memw)
1255 {
1256 sljit_u8* inst;
1257 sljit_s32 i, next, reg_idx, offset;
1258 sljit_u8 regs[2];
1259
1260 CHECK_ERROR();
1261 CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
1262
1263 if (!(reg & REG_PAIR_MASK))
1264 return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
1265
1266 ADJUST_LOCAL_OFFSET(mem, memw);
1267
1268 regs[0] = U8(REG_PAIR_FIRST(reg));
1269 regs[1] = U8(REG_PAIR_SECOND(reg));
1270
1271 next = SSIZE_OF(sw);
1272
1273 if (!(type & SLJIT_MEM_STORE) && (regs[0] == (mem & REG_MASK) || regs[0] == OFFS_REG(mem))) {
1274 if (regs[1] == (mem & REG_MASK) || regs[1] == OFFS_REG(mem)) {
1275 /* None of them are virtual register so TMP_REG1 will not be used. */
1276 EMIT_MOV(compiler, TMP_REG1, 0, OFFS_REG(mem), 0);
1277
1278 if (regs[1] == OFFS_REG(mem))
1279 next = -SSIZE_OF(sw);
1280
1281 mem = (mem & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG1);
1282 } else {
1283 next = -SSIZE_OF(sw);
1284
1285 if (!(mem & OFFS_REG_MASK))
1286 memw += SSIZE_OF(sw);
1287 }
1288 }
1289
1290 for (i = 0; i < 2; i++) {
1291 reg_idx = next > 0 ? i : (i ^ 0x1);
1292 reg = regs[reg_idx];
1293
1294 offset = -1;
1295
1296 if (reg >= SLJIT_R3 && reg <= SLJIT_S3) {
1297 offset = (2 * SSIZE_OF(sw)) + ((reg) - SLJIT_R3) * SSIZE_OF(sw);
1298 reg = TMP_REG1;
1299
1300 if (type & SLJIT_MEM_STORE)
1301 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), offset);
1302 }
1303
1304 if ((mem & OFFS_REG_MASK) && (reg_idx == 1)) {
1305 inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(1 + 4));
1306 FAIL_IF(!inst);
1307
1308 INC_SIZE(4);
1309
1310 inst[0] = (type & SLJIT_MEM_STORE) ? MOV_rm_r : MOV_r_rm;
1311 inst[1] = 0x44 | U8(reg_map[reg] << 3);
1312 inst[2] = U8(memw << 6) | U8(reg_map[OFFS_REG(mem)] << 3) | reg_map[mem & REG_MASK];
1313 inst[3] = sizeof(sljit_sw);
1314 } else if (type & SLJIT_MEM_STORE) {
1315 EMIT_MOV(compiler, mem, memw, reg, 0);
1316 } else {
1317 EMIT_MOV(compiler, reg, 0, mem, memw);
1318 }
1319
1320 if (!(mem & OFFS_REG_MASK))
1321 memw += next;
1322
1323 if (!(type & SLJIT_MEM_STORE) && offset != -1)
1324 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, TMP_REG1, 0);
1325 }
1326
1327 return SLJIT_SUCCESS;
1328 }
1329
sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1330 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
1331 sljit_s32 dst, sljit_sw dstw,
1332 sljit_s32 src, sljit_sw srcw)
1333 {
1334 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
1335 sljit_u8 *inst, *jump_inst1, *jump_inst2;
1336 sljit_uw size1, size2;
1337
1338 /* Binary representation of 0x80000000. */
1339 static const sljit_f64 f64_high_bit = (sljit_f64)0x80000000ul;
1340
1341 CHECK_EXTRA_REGS(src, srcw, (void)0);
1342
1343 if (!(op & SLJIT_32)) {
1344 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1345
1346 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 1, TMP_REG1, 0);
1347 FAIL_IF(!inst);
1348 inst[1] |= ROL;
1349
1350 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 1, TMP_REG1, 0);
1351 FAIL_IF(!inst);
1352 inst[1] |= SHR;
1353
1354 FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_PREF_F2 | EX86_SSE2_OP1, dst_r, TMP_REG1, 0));
1355
1356 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1357 FAIL_IF(!inst);
1358 INC_SIZE(2);
1359 inst[0] = U8(get_jump_code(SLJIT_NOT_CARRY) - 0x10);
1360
1361 size1 = compiler->size;
1362 FAIL_IF(emit_groupf(compiler, ADDSD_x_xm | EX86_PREF_F2 | EX86_SSE2, dst_r, SLJIT_MEM0(), (sljit_sw)&f64_high_bit));
1363
1364 inst[1] = U8(compiler->size - size1);
1365
1366 if (dst_r == TMP_FREG)
1367 return emit_sse2_store(compiler, 0, dst, dstw, TMP_FREG);
1368 return SLJIT_SUCCESS;
1369 }
1370
1371 if (!FAST_IS_REG(src)) {
1372 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1373 src = TMP_REG1;
1374 }
1375
1376 BINARY_IMM32(CMP, 0, src, 0);
1377
1378 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1379 FAIL_IF(!inst);
1380 INC_SIZE(2);
1381 inst[0] = JL_i8;
1382 jump_inst1 = inst;
1383
1384 size1 = compiler->size;
1385
1386 FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, src, 0));
1387
1388 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1389 FAIL_IF(!inst);
1390 INC_SIZE(2);
1391 inst[0] = JMP_i8;
1392 jump_inst2 = inst;
1393
1394 size2 = compiler->size;
1395
1396 jump_inst1[1] = U8(size2 - size1);
1397
1398 if (src != TMP_REG1)
1399 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
1400
1401 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 1, TMP_REG1, 0);
1402 FAIL_IF(!inst);
1403 inst[1] |= SHR;
1404
1405 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1406 FAIL_IF(!inst);
1407 INC_SIZE(2);
1408 inst[0] = JNC_i8;
1409 jump_inst1 = inst;
1410
1411 size1 = compiler->size;
1412
1413 BINARY_IMM32(OR, 1, TMP_REG1, 0);
1414 jump_inst1[1] = U8(compiler->size - size1);
1415
1416 FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, TMP_REG1, 0));
1417 FAIL_IF(emit_groupf(compiler, ADDSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, dst_r, 0));
1418
1419 jump_inst2[1] = U8(compiler->size - size2);
1420
1421 if (dst_r == TMP_FREG)
1422 return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
1423 return SLJIT_SUCCESS;
1424 }
1425
sljit_emit_fset32(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f32 value)1426 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
1427 sljit_s32 freg, sljit_f32 value)
1428 {
1429 sljit_u8 *inst;
1430 union {
1431 sljit_s32 imm;
1432 sljit_f32 value;
1433 } u;
1434
1435 CHECK_ERROR();
1436 CHECK(check_sljit_emit_fset32(compiler, freg, value));
1437
1438 u.value = value;
1439
1440 if (u.imm != 0)
1441 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm);
1442
1443 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1444 FAIL_IF(!inst);
1445 INC_SIZE(4);
1446
1447 inst[0] = GROUP_66;
1448 inst[1] = GROUP_0F;
1449
1450 if (u.imm == 0) {
1451 inst[2] = PXOR_x_xm;
1452 inst[3] = U8(freg | (freg << 3) | MOD_REG);
1453 } else {
1454 inst[2] = MOVD_x_rm;
1455 inst[3] = U8(reg_map[TMP_REG1] | (freg << 3) | MOD_REG);
1456 }
1457
1458 return SLJIT_SUCCESS;
1459 }
1460
sljit_emit_fset64(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f64 value)1461 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
1462 sljit_s32 freg, sljit_f64 value)
1463 {
1464 sljit_u8 *inst;
1465 sljit_s32 tmp_freg = freg;
1466 union {
1467 sljit_s32 imm[2];
1468 sljit_f64 value;
1469 } u;
1470
1471 CHECK_ERROR();
1472 CHECK(check_sljit_emit_fset64(compiler, freg, value));
1473
1474 u.value = value;
1475
1476 if (u.imm[0] == 0) {
1477 if (u.imm[1] == 0)
1478 return emit_groupf(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2, freg, freg, 0);
1479
1480 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm[1]);
1481 } else
1482 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm[0]);
1483
1484 FAIL_IF(emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, freg, TMP_REG1, 0));
1485
1486 if (u.imm[1] == 0)
1487 return SLJIT_SUCCESS;
1488
1489 if (u.imm[0] == 0) {
1490 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1491 FAIL_IF(!inst);
1492 INC_SIZE(4);
1493
1494 inst[0] = GROUP_0F;
1495 inst[1] = SHUFPS_x_xm;
1496 inst[2] = U8(MOD_REG | (freg << 3) | freg);
1497 inst[3] = 0x51;
1498 return SLJIT_SUCCESS;
1499 }
1500
1501 if (u.imm[0] != u.imm[1]) {
1502 SLJIT_ASSERT(u.imm[1] != 0 && cpu_feature_list != 0);
1503
1504 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm[1]);
1505
1506 if (cpu_feature_list & CPU_FEATURE_SSE41) {
1507 FAIL_IF(emit_groupf_ext(compiler, PINSRD_x_rm_i8 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2_OP1, freg, TMP_REG1, 0));
1508 return emit_byte(compiler, 1);
1509 }
1510
1511 FAIL_IF(emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, TMP_FREG, TMP_REG1, 0));
1512 tmp_freg = TMP_FREG;
1513 }
1514
1515 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
1516 FAIL_IF(!inst);
1517 INC_SIZE(3);
1518
1519 inst[0] = GROUP_0F;
1520 inst[1] = UNPCKLPS_x_xm;
1521 inst[2] = U8(MOD_REG | (freg << 3) | tmp_freg);
1522 return SLJIT_SUCCESS;
1523 }
1524
sljit_emit_fcopy(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 freg,sljit_s32 reg)1525 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
1526 sljit_s32 freg, sljit_s32 reg)
1527 {
1528 sljit_u8 *inst;
1529 sljit_s32 reg2;
1530 sljit_sw regw, reg2w;
1531
1532 CHECK_ERROR();
1533 CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
1534
1535 regw = 0;
1536 reg2 = 0;
1537 reg2w = 0;
1538
1539 SLJIT_ASSERT(cpu_feature_list != 0);
1540
1541 if (!(op & SLJIT_32) && (cpu_feature_list & CPU_FEATURE_SSE41)) {
1542 if (reg & REG_PAIR_MASK) {
1543 reg2 = REG_PAIR_FIRST(reg);
1544 reg = REG_PAIR_SECOND(reg);
1545
1546 CHECK_EXTRA_REGS(reg, regw, (void)0);
1547
1548 FAIL_IF(emit_groupf(compiler, (GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? MOVD_x_rm : MOVD_rm_x)
1549 | EX86_PREF_66 | EX86_SSE2_OP1, freg, reg, regw));
1550 } else
1551 reg2 = reg;
1552
1553 CHECK_EXTRA_REGS(reg2, reg2w, (void)0);
1554
1555 FAIL_IF(emit_groupf_ext(compiler, (GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? PINSRD_x_rm_i8 : PEXTRD_rm_x_i8)
1556 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2_OP1, freg, reg2, reg2w));
1557 return emit_byte(compiler, 1);
1558 }
1559
1560 if (reg & REG_PAIR_MASK) {
1561 reg2 = REG_PAIR_SECOND(reg);
1562 reg = REG_PAIR_FIRST(reg);
1563
1564 if (reg == reg2)
1565 reg = 0;
1566
1567 CHECK_EXTRA_REGS(reg2, reg2w, (void)0);
1568 }
1569
1570 CHECK_EXTRA_REGS(reg, regw, (void)0);
1571
1572 if (op & SLJIT_32)
1573 return emit_groupf(compiler, (GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? MOVD_x_rm : MOVD_rm_x)
1574 | EX86_PREF_66 | EX86_SSE2_OP1, freg, reg, regw);
1575
1576 if (op == SLJIT_COPY_FROM_F64) {
1577 inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
1578 FAIL_IF(!inst);
1579 INC_SIZE(5);
1580
1581 inst[0] = GROUP_66;
1582 inst[1] = GROUP_0F;
1583 inst[2] = PSHUFD_x_xm;
1584 inst[3] = U8(MOD_REG | (TMP_FREG << 3) | freg);
1585 inst[4] = 1;
1586 } else if (reg != 0)
1587 FAIL_IF(emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, TMP_FREG, reg, regw));
1588
1589 if (reg2 != 0)
1590 FAIL_IF(emit_groupf(compiler, (GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? MOVD_x_rm : MOVD_rm_x)
1591 | EX86_PREF_66 | EX86_SSE2_OP1, freg, reg2, reg2w));
1592
1593 if (GET_OPCODE(op) == SLJIT_COPY_TO_F64) {
1594 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
1595 FAIL_IF(!inst);
1596 INC_SIZE(3);
1597
1598 inst[0] = GROUP_0F;
1599 inst[1] = UNPCKLPS_x_xm;
1600 inst[2] = U8(MOD_REG | (freg << 3) | (reg == 0 ? freg : TMP_FREG));
1601 } else
1602 FAIL_IF(emit_groupf(compiler, MOVD_rm_x | EX86_PREF_66 | EX86_SSE2_OP1, TMP_FREG, reg, regw));
1603
1604 return SLJIT_SUCCESS;
1605 }
1606
skip_frames_before_return(struct sljit_compiler * compiler)1607 static sljit_s32 skip_frames_before_return(struct sljit_compiler *compiler)
1608 {
1609 sljit_sw size;
1610
1611 /* Don't adjust shadow stack if it isn't enabled. */
1612 if (!cpu_has_shadow_stack())
1613 return SLJIT_SUCCESS;
1614
1615 SLJIT_ASSERT(compiler->args_size >= 0);
1616 SLJIT_ASSERT(compiler->local_size > 0);
1617
1618 size = compiler->local_size;
1619 size += (1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0)
1620 + (compiler->saveds <= 3 ? compiler->saveds : 3)) * SSIZE_OF(sw);
1621
1622 return adjust_shadow_stack(compiler, SLJIT_MEM1(SLJIT_SP), size);
1623 }
1624