1 /*
2  *    Stack-less Just-In-Time compiler
3  *
4  *    Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without modification, are
7  * permitted provided that the following conditions are met:
8  *
9  *   1. Redistributions of source code must retain the above copyright notice, this list of
10  *      conditions and the following disclaimer.
11  *
12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13  *      of conditions and the following disclaimer in the documentation and/or other materials
14  *      provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 /* x86 32-bit arch dependent functions. */
28 
29 /* --------------------------------------------------------------------- */
30 /*  Operators                                                            */
31 /* --------------------------------------------------------------------- */
32 
emit_do_imm(struct sljit_compiler * compiler,sljit_u8 opcode,sljit_sw imm)33 static sljit_s32 emit_do_imm(struct sljit_compiler *compiler, sljit_u8 opcode, sljit_sw imm)
34 {
35 	sljit_u8 *inst;
36 
37 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + sizeof(sljit_sw));
38 	FAIL_IF(!inst);
39 	INC_SIZE(1 + sizeof(sljit_sw));
40 	*inst++ = opcode;
41 	sljit_unaligned_store_sw(inst, imm);
42 	return SLJIT_SUCCESS;
43 }
44 
45 /* Size contains the flags as well. */
emit_x86_instruction(struct sljit_compiler * compiler,sljit_uw size,sljit_s32 a,sljit_sw imma,sljit_s32 b,sljit_sw immb)46 static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw size,
47 	/* The register or immediate operand. */
48 	sljit_s32 a, sljit_sw imma,
49 	/* The general operand (not immediate). */
50 	sljit_s32 b, sljit_sw immb)
51 {
52 	sljit_u8 *inst;
53 	sljit_u8 *buf_ptr;
54 	sljit_u8 reg_map_b;
55 	sljit_uw flags = size;
56 	sljit_uw inst_size;
57 
58 	/* Both cannot be switched on. */
59 	SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
60 	/* Size flags not allowed for typed instructions. */
61 	SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
62 	/* Both size flags cannot be switched on. */
63 	SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
64 	/* SSE2 and immediate is not possible. */
65 	SLJIT_ASSERT(a != SLJIT_IMM || !(flags & EX86_SSE2));
66 	SLJIT_ASSERT(((flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66))
67 			& ((flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) - 1)) == 0);
68 	SLJIT_ASSERT((flags & (EX86_VEX_EXT | EX86_REX)) != EX86_VEX_EXT);
69 
70 	size &= 0xf;
71 	/* The mod r/m byte is always present. */
72 	inst_size = size + 1;
73 
74 	if (flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66))
75 		inst_size++;
76 
77 	/* Calculate size of b. */
78 	if (b & SLJIT_MEM) {
79 		if (!(b & REG_MASK))
80 			inst_size += sizeof(sljit_sw);
81 		else {
82 			if (immb != 0 && !(b & OFFS_REG_MASK)) {
83 				/* Immediate operand. */
84 				if (immb <= 127 && immb >= -128)
85 					inst_size += sizeof(sljit_s8);
86 				else
87 					inst_size += sizeof(sljit_sw);
88 			} else if (reg_map[b & REG_MASK] == 5) {
89 				/* Swap registers if possible. */
90 				if ((b & OFFS_REG_MASK) && (immb & 0x3) == 0 && reg_map[OFFS_REG(b)] != 5)
91 					b = SLJIT_MEM | OFFS_REG(b) | TO_OFFS_REG(b & REG_MASK);
92 				else
93 					inst_size += sizeof(sljit_s8);
94 			}
95 
96 			if (reg_map[b & REG_MASK] == 4 && !(b & OFFS_REG_MASK))
97 				b |= TO_OFFS_REG(SLJIT_SP);
98 
99 			if (b & OFFS_REG_MASK)
100 				inst_size += 1; /* SIB byte. */
101 		}
102 	}
103 
104 	/* Calculate size of a. */
105 	if (a == SLJIT_IMM) {
106 		if (flags & EX86_BIN_INS) {
107 			if (imma <= 127 && imma >= -128) {
108 				inst_size += 1;
109 				flags |= EX86_BYTE_ARG;
110 			} else
111 				inst_size += 4;
112 		} else if (flags & EX86_SHIFT_INS) {
113 			SLJIT_ASSERT(imma <= 0x1f);
114 			if (imma != 1) {
115 				inst_size++;
116 				flags |= EX86_BYTE_ARG;
117 			}
118 		} else if (flags & EX86_BYTE_ARG)
119 			inst_size++;
120 		else if (flags & EX86_HALF_ARG)
121 			inst_size += sizeof(short);
122 		else
123 			inst_size += sizeof(sljit_sw);
124 	} else
125 		SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
126 
127 	inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size);
128 	PTR_FAIL_IF(!inst);
129 
130 	/* Encoding the byte. */
131 	INC_SIZE(inst_size);
132 	if (flags & EX86_PREF_F2)
133 		*inst++ = 0xf2;
134 	else if (flags & EX86_PREF_F3)
135 		*inst++ = 0xf3;
136 	else if (flags & EX86_PREF_66)
137 		*inst++ = 0x66;
138 
139 	buf_ptr = inst + size;
140 
141 	/* Encode mod/rm byte. */
142 	if (!(flags & EX86_SHIFT_INS)) {
143 		if ((flags & EX86_BIN_INS) && a == SLJIT_IMM)
144 			*inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
145 
146 		if (a == SLJIT_IMM)
147 			*buf_ptr = 0;
148 		else if (!(flags & EX86_SSE2_OP1))
149 			*buf_ptr = U8(reg_map[a] << 3);
150 		else
151 			*buf_ptr = U8(freg_map[a] << 3);
152 	} else {
153 		if (a == SLJIT_IMM) {
154 			if (imma == 1)
155 				*inst = GROUP_SHIFT_1;
156 			else
157 				*inst = GROUP_SHIFT_N;
158 		} else
159 			*inst = GROUP_SHIFT_CL;
160 		*buf_ptr = 0;
161 	}
162 
163 	if (!(b & SLJIT_MEM)) {
164 		*buf_ptr = U8(*buf_ptr | MOD_REG | (!(flags & EX86_SSE2_OP2) ? reg_map[b] : freg_map[b]));
165 		buf_ptr++;
166 	} else if (b & REG_MASK) {
167 		reg_map_b = reg_map[b & REG_MASK];
168 
169 		if (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) {
170 			if (immb != 0 || reg_map_b == 5) {
171 				if (immb <= 127 && immb >= -128)
172 					*buf_ptr |= 0x40;
173 				else
174 					*buf_ptr |= 0x80;
175 			}
176 
177 			if (!(b & OFFS_REG_MASK))
178 				*buf_ptr++ |= reg_map_b;
179 			else {
180 				buf_ptr[0] |= 0x04;
181 				buf_ptr[1] = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3));
182 				buf_ptr += 2;
183 			}
184 
185 			if (immb != 0 || reg_map_b == 5) {
186 				if (immb <= 127 && immb >= -128)
187 					*buf_ptr++ = U8(immb); /* 8 bit displacement. */
188 				else {
189 					sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */
190 					buf_ptr += sizeof(sljit_sw);
191 				}
192 			}
193 		} else {
194 			if (reg_map_b == 5)
195 				*buf_ptr |= 0x40;
196 
197 			buf_ptr[0] |= 0x04;
198 			buf_ptr[1] = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3) | (immb << 6));
199 			buf_ptr += 2;
200 
201 			if (reg_map_b == 5)
202 				*buf_ptr++ = 0;
203 		}
204 	} else {
205 		*buf_ptr++ |= 0x05;
206 		sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */
207 		buf_ptr += sizeof(sljit_sw);
208 	}
209 
210 	if (a == SLJIT_IMM) {
211 		if (flags & EX86_BYTE_ARG)
212 			*buf_ptr = U8(imma);
213 		else if (flags & EX86_HALF_ARG)
214 			sljit_unaligned_store_s16(buf_ptr, (sljit_s16)imma);
215 		else if (!(flags & EX86_SHIFT_INS))
216 			sljit_unaligned_store_sw(buf_ptr, imma);
217 	}
218 
219 	return inst;
220 }
221 
emit_vex_instruction(struct sljit_compiler * compiler,sljit_uw op,sljit_s32 a,sljit_s32 v,sljit_s32 b,sljit_sw immb)222 static sljit_s32 emit_vex_instruction(struct sljit_compiler *compiler, sljit_uw op,
223 	/* The first and second register operand. */
224 	sljit_s32 a, sljit_s32 v,
225 	/* The general operand (not immediate). */
226 	sljit_s32 b, sljit_sw immb)
227 {
228 	sljit_u8 *inst;
229 	sljit_u8 vex = 0;
230 	sljit_u8 vex_m = 0;
231 	sljit_uw size;
232 
233 	SLJIT_ASSERT(((op & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66))
234 			& ((op & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) - 1)) == 0);
235 
236 	if (op & VEX_OP_0F38)
237 		vex_m = 0x2;
238 	else if (op & VEX_OP_0F3A)
239 		vex_m = 0x3;
240 
241 	if (op & VEX_W) {
242 		if (vex_m == 0)
243 			vex_m = 0x1;
244 
245 		vex |= 0x80;
246 	}
247 
248 	if (op & EX86_PREF_66)
249 		vex |= 0x1;
250 	else if (op & EX86_PREF_F2)
251 		vex |= 0x3;
252 	else if (op & EX86_PREF_F3)
253 		vex |= 0x2;
254 
255 	op &= ~(EX86_PREF_66 | EX86_PREF_F2 | EX86_PREF_F3);
256 
257 	if (op & VEX_256)
258 		vex |= 0x4;
259 
260 	vex = U8(vex | ((((op & VEX_SSE2_OPV) ? freg_map[v] : reg_map[v]) ^ 0xf) << 3));
261 
262 	size = op & ~(sljit_uw)0xff;
263 	size |= (vex_m == 0) ? 3 : 4;
264 
265 	inst = emit_x86_instruction(compiler, size, a, 0, b, immb);
266 	FAIL_IF(!inst);
267 
268 	if (vex_m == 0) {
269 		inst[0] = 0xc5;
270 		inst[1] = U8(vex | 0x80);
271 		inst[2] = U8(op);
272 		return SLJIT_SUCCESS;
273 	}
274 
275 	inst[0] = 0xc4;
276 	inst[1] = U8(vex_m | 0xe0);
277 	inst[2] = vex;
278 	inst[3] = U8(op);
279 	return SLJIT_SUCCESS;
280 }
281 
282 /* --------------------------------------------------------------------- */
283 /*  Enter / return                                                       */
284 /* --------------------------------------------------------------------- */
285 
generate_far_jump_code(struct sljit_jump * jump,sljit_u8 * code_ptr,sljit_sw executable_offset)286 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset)
287 {
288 	sljit_uw type = jump->flags >> TYPE_SHIFT;
289 
290 	if (type == SLJIT_JUMP) {
291 		*code_ptr++ = JMP_i32;
292 		jump->addr++;
293 	}
294 	else if (type >= SLJIT_FAST_CALL) {
295 		*code_ptr++ = CALL_i32;
296 		jump->addr++;
297 	}
298 	else {
299 		*code_ptr++ = GROUP_0F;
300 		*code_ptr++ = get_jump_code(type);
301 		jump->addr += 2;
302 	}
303 
304 	if (jump->flags & JUMP_LABEL)
305 		jump->flags |= PATCH_MW;
306 	else
307 		sljit_unaligned_store_sw(code_ptr, (sljit_sw)(jump->u.target - (jump->addr + 4) - (sljit_uw)executable_offset));
308 	code_ptr += 4;
309 
310 	return code_ptr;
311 }
312 
313 #define ENTER_TMP_TO_R4		0x00001
314 #define ENTER_TMP_TO_S		0x00002
315 
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)316 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
317 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
318 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
319 {
320 	sljit_s32 word_arg_count, saved_arg_count, float_arg_count;
321 	sljit_s32 size, args_size, types, status;
322 	sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(options);
323 	sljit_u8 *inst;
324 #ifdef _WIN32
325 	sljit_s32 r2_offset = -1;
326 #endif
327 
328 	CHECK_ERROR();
329 	CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
330 	set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
331 
332 	/* Emit ENDBR32 at function entry if needed.  */
333 	FAIL_IF(emit_endbranch(compiler));
334 
335 	SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start);
336 
337 	arg_types >>= SLJIT_ARG_SHIFT;
338 	word_arg_count = 0;
339 	status = 0;
340 
341 	if (options & SLJIT_ENTER_REG_ARG) {
342 		args_size = 3 * SSIZE_OF(sw);
343 
344 		while (arg_types) {
345 			if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
346 				word_arg_count++;
347 				if (word_arg_count >= 4)
348 					status |= ENTER_TMP_TO_R4;
349 			}
350 
351 			arg_types >>= SLJIT_ARG_SHIFT;
352 		}
353 
354 		compiler->args_size = 0;
355 	} else {
356 		types = arg_types;
357 		saved_arg_count = 0;
358 		float_arg_count = 0;
359 		args_size = SSIZE_OF(sw);
360 		while (types) {
361 			switch (types & SLJIT_ARG_MASK) {
362 			case SLJIT_ARG_TYPE_F64:
363 				float_arg_count++;
364 				FAIL_IF(emit_sse2_load(compiler, 0, float_arg_count, SLJIT_MEM1(SLJIT_SP), args_size));
365 				args_size += SSIZE_OF(f64);
366 				break;
367 			case SLJIT_ARG_TYPE_F32:
368 				float_arg_count++;
369 				FAIL_IF(emit_sse2_load(compiler, 1, float_arg_count, SLJIT_MEM1(SLJIT_SP), args_size));
370 				args_size += SSIZE_OF(f32);
371 				break;
372 			default:
373 				word_arg_count++;
374 
375 				if (!(types & SLJIT_ARG_TYPE_SCRATCH_REG))
376 					saved_arg_count++;
377 
378 				if (word_arg_count == 4) {
379 					if (types & SLJIT_ARG_TYPE_SCRATCH_REG) {
380 						status |= ENTER_TMP_TO_R4;
381 						arg_types &= ~(SLJIT_ARG_FULL_MASK << 3 * SLJIT_ARG_SHIFT);
382 					} else if (saved_arg_count == 4) {
383 						status |= ENTER_TMP_TO_S;
384 						arg_types &= ~(SLJIT_ARG_FULL_MASK << 3 * SLJIT_ARG_SHIFT);
385 					}
386 				}
387 
388 				args_size += SSIZE_OF(sw);
389 				break;
390 			}
391 			types >>= SLJIT_ARG_SHIFT;
392 		}
393 
394 		args_size -= SSIZE_OF(sw);
395 		compiler->args_size = args_size;
396 	}
397 
398 	size = (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3) - kept_saveds_count;
399 	if (!(options & SLJIT_ENTER_REG_ARG))
400 		size++;
401 
402 	if (size != 0) {
403 		inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(size + 1));
404 		FAIL_IF(!inst);
405 
406 		INC_SIZE((sljit_uw)size);
407 
408 		if (!(options & SLJIT_ENTER_REG_ARG))
409 			PUSH_REG(reg_map[TMP_REG1]);
410 
411 		if ((saveds > 2 && kept_saveds_count <= 2) || scratches > 9)
412 			PUSH_REG(reg_map[SLJIT_S2]);
413 		if ((saveds > 1 && kept_saveds_count <= 1) || scratches > 10)
414 			PUSH_REG(reg_map[SLJIT_S1]);
415 		if ((saveds > 0 && kept_saveds_count == 0) || scratches > 11)
416 			PUSH_REG(reg_map[SLJIT_S0]);
417 
418 		size *= SSIZE_OF(sw);
419 	}
420 
421 	if (status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S))
422 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), args_size + size);
423 
424 	size += SSIZE_OF(sw);
425 
426 	local_size = ((SLJIT_LOCALS_OFFSET_BASE + local_size + size + 0xf) & ~0xf) - size;
427 	compiler->local_size = local_size;
428 
429 	word_arg_count = 0;
430 	saved_arg_count = 0;
431 	args_size = size;
432 	while (arg_types) {
433 		switch (arg_types & SLJIT_ARG_MASK) {
434 		case SLJIT_ARG_TYPE_F64:
435 			args_size += SSIZE_OF(f64);
436 			break;
437 		case SLJIT_ARG_TYPE_F32:
438 			args_size += SSIZE_OF(f32);
439 			break;
440 		default:
441 			word_arg_count++;
442 			SLJIT_ASSERT(word_arg_count <= 3 || (word_arg_count == 4 && !(status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S))));
443 
444 			if (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) {
445 #ifdef _WIN32
446 				if (word_arg_count == 3 && local_size > 4 * 4096)
447 					r2_offset = local_size + args_size;
448 				else
449 #endif
450 					EMIT_MOV(compiler, word_arg_count, 0, SLJIT_MEM1(SLJIT_SP), args_size);
451 
452 			} else {
453 				EMIT_MOV(compiler, SLJIT_S0 - saved_arg_count, 0, SLJIT_MEM1(SLJIT_SP), args_size);
454 				saved_arg_count++;
455 			}
456 
457 			args_size += SSIZE_OF(sw);
458 			break;
459 		}
460 		arg_types >>= SLJIT_ARG_SHIFT;
461 	}
462 
463 	SLJIT_ASSERT(SLJIT_LOCALS_OFFSET > 0);
464 
465 #ifdef _WIN32
466 	SLJIT_ASSERT(r2_offset == -1 || local_size > 4 * 4096);
467 
468 	if (local_size > 4096) {
469 		if (local_size <= 4 * 4096) {
470 			BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096);
471 
472 			if (local_size > 2 * 4096)
473 				BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2);
474 			if (local_size > 3 * 4096)
475 				BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);
476 		}
477 		else {
478 			if (options & SLJIT_ENTER_REG_ARG) {
479 				SLJIT_ASSERT(r2_offset == -1);
480 
481 				inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(1 + 1));
482 				FAIL_IF(!inst);
483 				INC_SIZE(1);
484 				PUSH_REG(reg_map[SLJIT_R2]);
485 
486 				local_size -= SSIZE_OF(sw);
487 				r2_offset = local_size;
488 			}
489 
490 			EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_IMM, local_size >> 12);
491 
492 			BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096);
493 			BINARY_IMM32(SUB, 4096, SLJIT_SP, 0);
494 
495 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
496 			FAIL_IF(!inst);
497 
498 			INC_SIZE(2);
499 			inst[0] = LOOP_i8;
500 			inst[1] = (sljit_u8)-16;
501 			local_size &= 0xfff;
502 		}
503 	}
504 
505 	if (local_size > 0) {
506 		BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -local_size);
507 		BINARY_IMM32(SUB, local_size, SLJIT_SP, 0);
508 	}
509 
510 	if (r2_offset != -1)
511 		EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), r2_offset);
512 
513 #else /* !_WIN32 */
514 
515 	SLJIT_ASSERT(local_size > 0);
516 
517 	BINARY_IMM32(SUB, local_size, SLJIT_SP, 0);
518 
519 #endif /* _WIN32 */
520 
521 	size = SLJIT_LOCALS_OFFSET_BASE - SSIZE_OF(sw);
522 	kept_saveds_count = SLJIT_R3 - kept_saveds_count;
523 
524 	while (saved_arg_count > 3) {
525 		EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), size, kept_saveds_count, 0);
526 		kept_saveds_count++;
527 		size -= SSIZE_OF(sw);
528 		saved_arg_count--;
529 	}
530 
531 	if (status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S)) {
532 		if (status & ENTER_TMP_TO_R4)
533 			size = 2 * SSIZE_OF(sw);
534 
535 		EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), size, TMP_REG1, 0);
536 	}
537 
538 	return SLJIT_SUCCESS;
539 }
540 
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)541 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
542 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
543 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
544 {
545 	sljit_s32 args_size;
546 
547 	CHECK_ERROR();
548 	CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
549 	set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
550 
551 	arg_types >>= SLJIT_ARG_SHIFT;
552 	args_size = 0;
553 
554 	if (!(options & SLJIT_ENTER_REG_ARG)) {
555 		while (arg_types) {
556 			switch (arg_types & SLJIT_ARG_MASK) {
557 			case SLJIT_ARG_TYPE_F64:
558 				args_size += SSIZE_OF(f64);
559 				break;
560 			case SLJIT_ARG_TYPE_F32:
561 				args_size += SSIZE_OF(f32);
562 				break;
563 			default:
564 				args_size += SSIZE_OF(sw);
565 				break;
566 			}
567 			arg_types >>= SLJIT_ARG_SHIFT;
568 		}
569 	}
570 
571 	compiler->args_size = args_size;
572 
573 	/* [esp+0] for saving temporaries and for function calls. */
574 
575 	saveds = (1 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3) - SLJIT_KEPT_SAVEDS_COUNT(options)) * SSIZE_OF(sw);
576 
577 	/* Saving ebp. */
578 	if (!(options & SLJIT_ENTER_REG_ARG))
579 		saveds += SSIZE_OF(sw);
580 
581 	compiler->local_size = ((SLJIT_LOCALS_OFFSET_BASE + local_size + saveds + 0xf) & ~0xf) - saveds;
582 	return SLJIT_SUCCESS;
583 }
584 
emit_stack_frame_release(struct sljit_compiler * compiler,sljit_s32 is_return_to)585 static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to)
586 {
587 	sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
588 	sljit_s32 local_size, saveds;
589 	sljit_uw size;
590 	sljit_u8 *inst;
591 
592 	size = (sljit_uw)((compiler->scratches > 9 ? (compiler->scratches - 9) : 0) +
593 		(compiler->saveds <= 3 ? compiler->saveds : 3) - kept_saveds_count);
594 
595 	local_size = compiler->local_size;
596 
597 	if (!(compiler->options & SLJIT_ENTER_REG_ARG))
598 		size++;
599 	else if (is_return_to && size == 0) {
600 		local_size += SSIZE_OF(sw);
601 		is_return_to = 0;
602 	}
603 
604 	if (local_size > 0)
605 		BINARY_IMM32(ADD, local_size, SLJIT_SP, 0);
606 
607 	if (size == 0)
608 		return SLJIT_SUCCESS;
609 
610 	inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
611 	FAIL_IF(!inst);
612 
613 	INC_SIZE(size);
614 
615 	saveds = compiler->saveds;
616 
617 	if ((saveds > 0 && kept_saveds_count == 0) || compiler->scratches > 11)
618 		POP_REG(reg_map[SLJIT_S0]);
619 	if ((saveds > 1 && kept_saveds_count <= 1) || compiler->scratches > 10)
620 		POP_REG(reg_map[SLJIT_S1]);
621 	if ((saveds > 2 && kept_saveds_count <= 2) || compiler->scratches > 9)
622 		POP_REG(reg_map[SLJIT_S2]);
623 
624 	if (!(compiler->options & SLJIT_ENTER_REG_ARG))
625 		POP_REG(reg_map[TMP_REG1]);
626 
627 	if (is_return_to)
628 		BINARY_IMM32(ADD, sizeof(sljit_sw), SLJIT_SP, 0);
629 
630 	return SLJIT_SUCCESS;
631 }
632 
sljit_emit_return_void(struct sljit_compiler * compiler)633 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
634 {
635 	CHECK_ERROR();
636 	CHECK(check_sljit_emit_return_void(compiler));
637 
638 	SLJIT_ASSERT(compiler->args_size >= 0);
639 	SLJIT_ASSERT(compiler->local_size > 0);
640 
641 	FAIL_IF(emit_stack_frame_release(compiler, 0));
642 
643 	return emit_byte(compiler, RET_near);
644 }
645 
sljit_emit_return_to(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)646 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
647 	sljit_s32 src, sljit_sw srcw)
648 {
649 	sljit_s32 src_r;
650 
651 	CHECK_ERROR();
652 	CHECK(check_sljit_emit_return_to(compiler, src, srcw));
653 
654 	if ((src & SLJIT_MEM) || (src > SLJIT_R2 && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options)))) {
655 		ADJUST_LOCAL_OFFSET(src, srcw);
656 		CHECK_EXTRA_REGS(src, srcw, (void)0);
657 
658 		src_r = (compiler->options & SLJIT_ENTER_REG_ARG) ? TMP_REG1 : SLJIT_R1;
659 
660 		EMIT_MOV(compiler, src_r, 0, src, srcw);
661 		src = src_r;
662 		srcw = 0;
663 	}
664 
665 	FAIL_IF(emit_stack_frame_release(compiler, 1));
666 
667 	SLJIT_SKIP_CHECKS(compiler);
668 	return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
669 }
670 
671 /* --------------------------------------------------------------------- */
672 /*  Call / return instructions                                           */
673 /* --------------------------------------------------------------------- */
674 
call_get_stack_size(sljit_s32 arg_types,sljit_s32 * word_arg_count_ptr)675 static sljit_s32 call_get_stack_size(sljit_s32 arg_types, sljit_s32 *word_arg_count_ptr)
676 {
677 	sljit_sw stack_size = 0;
678 	sljit_s32 word_arg_count = 0;
679 
680 	arg_types >>= SLJIT_ARG_SHIFT;
681 
682 	while (arg_types) {
683 		switch (arg_types & SLJIT_ARG_MASK) {
684 		case SLJIT_ARG_TYPE_F64:
685 			stack_size += SSIZE_OF(f64);
686 			break;
687 		case SLJIT_ARG_TYPE_F32:
688 			stack_size += SSIZE_OF(f32);
689 			break;
690 		default:
691 			word_arg_count++;
692 			stack_size += SSIZE_OF(sw);
693 			break;
694 		}
695 
696 		arg_types >>= SLJIT_ARG_SHIFT;
697 	}
698 
699 	if (word_arg_count_ptr)
700 		*word_arg_count_ptr = word_arg_count;
701 
702 	if (stack_size <= 4 * SSIZE_OF(sw))
703 		return 0;
704 
705 	return ((stack_size - (4 * SSIZE_OF(sw)) + 0xf) & ~0xf);
706 }
707 
call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types,sljit_sw stack_size,sljit_s32 word_arg_count,sljit_s32 keep_tmp1)708 static sljit_s32 call_with_args(struct sljit_compiler *compiler,
709 	sljit_s32 arg_types, sljit_sw stack_size, sljit_s32 word_arg_count, sljit_s32 keep_tmp1)
710 {
711 	sljit_s32 float_arg_count = 0, arg4_reg = 0, arg_offset;
712 	sljit_u8 *inst;
713 
714 	if (word_arg_count >= 4) {
715 		arg4_reg = SLJIT_R0;
716 
717 		if (!keep_tmp1) {
718 			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), 2 * SSIZE_OF(sw));
719 			arg4_reg = TMP_REG1;
720 		}
721 	}
722 
723 	if (stack_size > 0)
724 		BINARY_IMM32(SUB, stack_size, SLJIT_SP, 0);
725 
726 	arg_offset = 0;
727 	word_arg_count = 0;
728 	arg_types >>= SLJIT_ARG_SHIFT;
729 
730 	while (arg_types) {
731 		switch (arg_types & SLJIT_ARG_MASK) {
732 		case SLJIT_ARG_TYPE_F64:
733 			float_arg_count++;
734 			FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), arg_offset, float_arg_count));
735 			arg_offset += SSIZE_OF(f64);
736 			break;
737 		case SLJIT_ARG_TYPE_F32:
738 			float_arg_count++;
739 			FAIL_IF(emit_sse2_store(compiler, 1, SLJIT_MEM1(SLJIT_SP), arg_offset, float_arg_count));
740 			arg_offset += SSIZE_OF(f32);
741 			break;
742 		default:
743 			word_arg_count++;
744 			EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), arg_offset, (word_arg_count >= 4) ? arg4_reg : word_arg_count, 0);
745 
746 			if (word_arg_count == 1 && arg4_reg == SLJIT_R0)
747 				EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), 2 * SSIZE_OF(sw) + stack_size);
748 
749 			arg_offset += SSIZE_OF(sw);
750 			break;
751 		}
752 
753 		arg_types >>= SLJIT_ARG_SHIFT;
754 	}
755 
756 	return SLJIT_SUCCESS;
757 }
758 
post_call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types,sljit_s32 stack_size)759 static sljit_s32 post_call_with_args(struct sljit_compiler *compiler,
760 	sljit_s32 arg_types, sljit_s32 stack_size)
761 {
762 	sljit_u8 *inst;
763 	sljit_s32 single;
764 
765 	if (stack_size > 0)
766 		BINARY_IMM32(ADD, stack_size, SLJIT_SP, 0);
767 
768 	if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64)
769 		return SLJIT_SUCCESS;
770 
771 	single = ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32);
772 
773 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
774 	FAIL_IF(!inst);
775 	INC_SIZE(3);
776 	inst[0] = single ? FSTPS : FSTPD;
777 	inst[1] = (0x03 << 3) | 0x04;
778 	inst[2] = (0x04 << 3) | reg_map[SLJIT_SP];
779 
780 	return emit_sse2_load(compiler, single, SLJIT_FR0, SLJIT_MEM1(SLJIT_SP), 0);
781 }
782 
tail_call_with_args(struct sljit_compiler * compiler,sljit_s32 * extra_space,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)783 static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler,
784 	sljit_s32 *extra_space, sljit_s32 arg_types,
785 	sljit_s32 src, sljit_sw srcw)
786 {
787 	sljit_sw args_size, saved_regs_size;
788 	sljit_sw types, word_arg_count, float_arg_count;
789 	sljit_sw stack_size, prev_stack_size, min_size, offset;
790 	sljit_sw word_arg4_offset;
791 	sljit_u8 r2_offset = 0;
792 	sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
793 	sljit_u8* inst;
794 
795 	ADJUST_LOCAL_OFFSET(src, srcw);
796 	CHECK_EXTRA_REGS(src, srcw, (void)0);
797 
798 	saved_regs_size = (1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0)
799 		+ (compiler->saveds <= 3 ? compiler->saveds : 3) - kept_saveds_count) * SSIZE_OF(sw);
800 
801 	word_arg_count = 0;
802 	float_arg_count = 0;
803 	arg_types >>= SLJIT_ARG_SHIFT;
804 	types = 0;
805 	args_size = 0;
806 
807 	while (arg_types != 0) {
808 		types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK);
809 
810 		switch (arg_types & SLJIT_ARG_MASK) {
811 		case SLJIT_ARG_TYPE_F64:
812 			args_size += SSIZE_OF(f64);
813 			float_arg_count++;
814 			break;
815 		case SLJIT_ARG_TYPE_F32:
816 			args_size += SSIZE_OF(f32);
817 			float_arg_count++;
818 			break;
819 		default:
820 			word_arg_count++;
821 			args_size += SSIZE_OF(sw);
822 			break;
823 		}
824 		arg_types >>= SLJIT_ARG_SHIFT;
825 	}
826 
827 	if (args_size <= compiler->args_size) {
828 		*extra_space = 0;
829 		stack_size = args_size + SSIZE_OF(sw) + saved_regs_size;
830 
831 		offset = stack_size + compiler->local_size;
832 
833 		if (src != SLJIT_IMM && src != SLJIT_R0) {
834 			if (word_arg_count >= 1) {
835 				EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R0, 0);
836 				r2_offset = sizeof(sljit_sw);
837 			}
838 			EMIT_MOV(compiler, SLJIT_R0, 0, src, srcw);
839 		}
840 
841 		while (types != 0) {
842 			switch (types & SLJIT_ARG_MASK) {
843 			case SLJIT_ARG_TYPE_F64:
844 				offset -= SSIZE_OF(f64);
845 				FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));
846 				float_arg_count--;
847 				break;
848 			case SLJIT_ARG_TYPE_F32:
849 				offset -= SSIZE_OF(f32);
850 				FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));
851 				float_arg_count--;
852 				break;
853 			default:
854 				switch (word_arg_count) {
855 				case 1:
856 					offset -= SSIZE_OF(sw);
857 					if (r2_offset != 0) {
858 						EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), 0);
859 						EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
860 					} else
861 						EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R0, 0);
862 					break;
863 				case 2:
864 					offset -= SSIZE_OF(sw);
865 					EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R1, 0);
866 					break;
867 				case 3:
868 					offset -= SSIZE_OF(sw);
869 					break;
870 				case 4:
871 					offset -= SSIZE_OF(sw);
872 					EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), 2 * SSIZE_OF(sw));
873 					EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
874 					break;
875 				}
876 				word_arg_count--;
877 				break;
878 			}
879 			types >>= SLJIT_ARG_SHIFT;
880 		}
881 
882 		return emit_stack_frame_release(compiler, 0);
883 	}
884 
885 	stack_size = args_size + SSIZE_OF(sw);
886 
887 	if (word_arg_count >= 1 && src != SLJIT_IMM && src != SLJIT_R0) {
888 		r2_offset = SSIZE_OF(sw);
889 		stack_size += SSIZE_OF(sw);
890 	}
891 
892 	if (word_arg_count >= 3)
893 		stack_size += SSIZE_OF(sw);
894 
895 	prev_stack_size = SSIZE_OF(sw) + saved_regs_size;
896 	min_size = prev_stack_size + compiler->local_size;
897 
898 	word_arg4_offset = 2 * SSIZE_OF(sw);
899 
900 	if (stack_size > min_size) {
901 		BINARY_IMM32(SUB, stack_size - min_size, SLJIT_SP, 0);
902 		if (src == SLJIT_MEM1(SLJIT_SP))
903 			srcw += stack_size - min_size;
904 		word_arg4_offset += stack_size - min_size;
905 	}
906 	else
907 		stack_size = min_size;
908 
909 	if (word_arg_count >= 3) {
910 		EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), r2_offset, SLJIT_R2, 0);
911 
912 		if (word_arg_count >= 4)
913 			EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), word_arg4_offset);
914 	}
915 
916 	if (src != SLJIT_IMM && src != SLJIT_R0) {
917 		if (word_arg_count >= 1) {
918 			SLJIT_ASSERT(r2_offset == sizeof(sljit_sw));
919 			EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R0, 0);
920 		}
921 		EMIT_MOV(compiler, SLJIT_R0, 0, src, srcw);
922 	}
923 
924 	/* Restore saved registers. */
925 	offset = stack_size - 2 * SSIZE_OF(sw);
926 	EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), offset);
927 
928 	if (compiler->saveds > 2 || compiler->scratches > 9) {
929 		offset -= SSIZE_OF(sw);
930 		EMIT_MOV(compiler, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), offset);
931 	}
932 	if ((compiler->saveds > 1 && kept_saveds_count <= 1) || compiler->scratches > 10) {
933 		offset -= SSIZE_OF(sw);
934 		EMIT_MOV(compiler, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_SP), offset);
935 	}
936 	if ((compiler->saveds > 0 && kept_saveds_count == 0) || compiler->scratches > 11) {
937 		offset -= SSIZE_OF(sw);
938 		EMIT_MOV(compiler, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), offset);
939 	}
940 
941 	/* Copy fourth argument and return address. */
942 	offset = stack_size - SSIZE_OF(sw);
943 	*extra_space = args_size;
944 
945 	if (word_arg_count >= 4) {
946 		offset -= SSIZE_OF(sw);
947 		EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
948 	}
949 
950 	while (types != 0) {
951 		switch (types & SLJIT_ARG_MASK) {
952 		case SLJIT_ARG_TYPE_F64:
953 			offset -= SSIZE_OF(f64);
954 			FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));
955 			float_arg_count--;
956 			break;
957 		case SLJIT_ARG_TYPE_F32:
958 			offset -= SSIZE_OF(f32);
959 			FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));
960 			float_arg_count--;
961 			break;
962 		default:
963 			switch (word_arg_count) {
964 			case 1:
965 				offset -= SSIZE_OF(sw);
966 				if (r2_offset != 0) {
967 					EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), 0);
968 					EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
969 				} else
970 					EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R0, 0);
971 				break;
972 			case 2:
973 				offset -= SSIZE_OF(sw);
974 				EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R1, 0);
975 				break;
976 			case 3:
977 				offset -= SSIZE_OF(sw);
978 				EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), r2_offset);
979 				EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
980 				break;
981 			}
982 			word_arg_count--;
983 			break;
984 		}
985 		types >>= SLJIT_ARG_SHIFT;
986 	}
987 
988 	SLJIT_ASSERT(offset >= 0);
989 
990 	if (offset == 0)
991 		return SLJIT_SUCCESS;
992 
993 	BINARY_IMM32(ADD, offset, SLJIT_SP, 0);
994 	return SLJIT_SUCCESS;
995 }
996 
emit_tail_call_end(struct sljit_compiler * compiler,sljit_s32 extra_space)997 static sljit_s32 emit_tail_call_end(struct sljit_compiler *compiler, sljit_s32 extra_space)
998 {
999 	/* Called when stack consumption cannot be reduced to 0. */
1000 	sljit_u8 *inst;
1001 
1002 	BINARY_IMM32(ADD, extra_space, SLJIT_SP, 0);
1003 	return emit_byte(compiler, RET_near);
1004 }
1005 
tail_call_reg_arg_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types)1006 static sljit_s32 tail_call_reg_arg_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)
1007 {
1008 	sljit_s32 word_arg_count = 0;
1009 	sljit_s32 kept_saveds_count, offset;
1010 
1011 	arg_types >>= SLJIT_ARG_SHIFT;
1012 
1013 	while (arg_types) {
1014 		if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64)
1015 			word_arg_count++;
1016 
1017 		arg_types >>= SLJIT_ARG_SHIFT;
1018 	}
1019 
1020 	if (word_arg_count < 4)
1021 		return SLJIT_SUCCESS;
1022 
1023 	EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), 2 * SSIZE_OF(sw));
1024 
1025 	kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
1026 	offset = compiler->local_size + 3 * SSIZE_OF(sw);
1027 
1028 	if ((compiler->saveds > 0 && kept_saveds_count == 0) || compiler->scratches > 11)
1029 		offset += SSIZE_OF(sw);
1030 	if ((compiler->saveds > 1 && kept_saveds_count <= 1) || compiler->scratches > 10)
1031 		offset += SSIZE_OF(sw);
1032 	if ((compiler->saveds > 2 && kept_saveds_count <= 2) || compiler->scratches > 9)
1033 		offset += SSIZE_OF(sw);
1034 
1035 	return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), offset, TMP_REG1, 0);
1036 }
1037 
sljit_emit_call(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types)1038 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
1039 	sljit_s32 arg_types)
1040 {
1041 	struct sljit_jump *jump;
1042 	sljit_sw stack_size = 0;
1043 	sljit_s32 word_arg_count;
1044 
1045 	CHECK_ERROR_PTR();
1046 	CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
1047 
1048 	if (type & SLJIT_CALL_RETURN) {
1049 		if ((type & 0xff) == SLJIT_CALL_REG_ARG) {
1050 			PTR_FAIL_IF(tail_call_reg_arg_with_args(compiler, arg_types));
1051 			PTR_FAIL_IF(emit_stack_frame_release(compiler, 0));
1052 
1053 			SLJIT_SKIP_CHECKS(compiler);
1054 			return sljit_emit_jump(compiler, SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP));
1055 		}
1056 
1057 		stack_size = type;
1058 		PTR_FAIL_IF(tail_call_with_args(compiler, &stack_size, arg_types, SLJIT_IMM, 0));
1059 
1060 		SLJIT_SKIP_CHECKS(compiler);
1061 
1062 		if (stack_size == 0)
1063 			return sljit_emit_jump(compiler, SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP));
1064 
1065 		jump = sljit_emit_jump(compiler, type);
1066 		PTR_FAIL_IF(jump == NULL);
1067 
1068 		PTR_FAIL_IF(emit_tail_call_end(compiler, stack_size));
1069 		return jump;
1070 	}
1071 
1072 	if ((type & 0xff) == SLJIT_CALL_REG_ARG) {
1073 		SLJIT_SKIP_CHECKS(compiler);
1074 		return sljit_emit_jump(compiler, type);
1075 	}
1076 
1077 	stack_size = call_get_stack_size(arg_types, &word_arg_count);
1078 	PTR_FAIL_IF(call_with_args(compiler, arg_types, stack_size, word_arg_count, 0));
1079 
1080 	SLJIT_SKIP_CHECKS(compiler);
1081 	jump = sljit_emit_jump(compiler, type);
1082 	PTR_FAIL_IF(jump == NULL);
1083 
1084 	PTR_FAIL_IF(post_call_with_args(compiler, arg_types, stack_size));
1085 	return jump;
1086 }
1087 
sljit_emit_icall(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)1088 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
1089 	sljit_s32 arg_types,
1090 	sljit_s32 src, sljit_sw srcw)
1091 {
1092 	sljit_sw stack_size = 0;
1093 	sljit_s32 word_arg_count;
1094 
1095 	CHECK_ERROR();
1096 	CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
1097 
1098 	if (type & SLJIT_CALL_RETURN) {
1099 		if ((type & 0xff) == SLJIT_CALL_REG_ARG) {
1100 			FAIL_IF(tail_call_reg_arg_with_args(compiler, arg_types));
1101 
1102 			if ((src & SLJIT_MEM) || (src > SLJIT_R2 && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options)))) {
1103 				ADJUST_LOCAL_OFFSET(src, srcw);
1104 				CHECK_EXTRA_REGS(src, srcw, (void)0);
1105 
1106 				EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1107 				src = TMP_REG1;
1108 				srcw = 0;
1109 			}
1110 
1111 			FAIL_IF(emit_stack_frame_release(compiler, 0));
1112 
1113 			SLJIT_SKIP_CHECKS(compiler);
1114 			return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1115 		}
1116 
1117 		stack_size = type;
1118 		FAIL_IF(tail_call_with_args(compiler, &stack_size, arg_types, src, srcw));
1119 
1120 		if (src != SLJIT_IMM) {
1121 			src = SLJIT_R0;
1122 			srcw = 0;
1123 		}
1124 
1125 		SLJIT_SKIP_CHECKS(compiler);
1126 
1127 		if (stack_size == 0)
1128 			return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1129 
1130 		FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
1131 		return emit_tail_call_end(compiler, stack_size);
1132 	}
1133 
1134 	if ((type & 0xff) == SLJIT_CALL_REG_ARG) {
1135 		SLJIT_SKIP_CHECKS(compiler);
1136 		return sljit_emit_ijump(compiler, type, src, srcw);
1137 	}
1138 
1139 	ADJUST_LOCAL_OFFSET(src, srcw);
1140 	CHECK_EXTRA_REGS(src, srcw, (void)0);
1141 
1142 	if (src & SLJIT_MEM) {
1143 		EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1144 		src = TMP_REG1;
1145 		srcw = 0;
1146 	}
1147 
1148 	stack_size = call_get_stack_size(arg_types, &word_arg_count);
1149 	FAIL_IF(call_with_args(compiler, arg_types, stack_size, word_arg_count, src == TMP_REG1));
1150 
1151 	if (stack_size > 0 && src == SLJIT_MEM1(SLJIT_SP))
1152 		srcw += stack_size;
1153 
1154 	SLJIT_SKIP_CHECKS(compiler);
1155 	FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
1156 
1157 	return post_call_with_args(compiler, arg_types, stack_size);
1158 }
1159 
emit_fmov_before_return(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)1160 static SLJIT_INLINE sljit_s32 emit_fmov_before_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
1161 {
1162 	sljit_u8* inst;
1163 
1164 	if (compiler->options & SLJIT_ENTER_REG_ARG) {
1165 		if (src == SLJIT_FR0)
1166 			return SLJIT_SUCCESS;
1167 
1168 		SLJIT_SKIP_CHECKS(compiler);
1169 		return sljit_emit_fop1(compiler, op, SLJIT_RETURN_FREG, 0, src, srcw);
1170 	}
1171 
1172 	if (FAST_IS_REG(src)) {
1173 		FAIL_IF(emit_sse2_store(compiler, op & SLJIT_32, SLJIT_MEM1(SLJIT_SP), 0, src));
1174 
1175 		src = SLJIT_MEM1(SLJIT_SP);
1176 		srcw = 0;
1177 	} else {
1178 		ADJUST_LOCAL_OFFSET(src, srcw);
1179 	}
1180 
1181 	inst = emit_x86_instruction(compiler, 1 | EX86_SSE2_OP1, 0, 0, src, srcw);
1182 	*inst = (op & SLJIT_32) ? FLDS : FLDL;
1183 
1184 	return SLJIT_SUCCESS;
1185 }
1186 
emit_fast_enter(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)1187 static sljit_s32 emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
1188 {
1189 	sljit_u8 *inst;
1190 
1191 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
1192 
1193 	/* Unused dest is possible here. */
1194 	if (FAST_IS_REG(dst))
1195 		return emit_byte(compiler, U8(POP_r + reg_map[dst]));
1196 
1197 	/* Memory. */
1198 	inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1199 	FAIL_IF(!inst);
1200 	*inst = POP_rm;
1201 	return SLJIT_SUCCESS;
1202 }
1203 
emit_fast_return(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)1204 static sljit_s32 emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
1205 {
1206 	sljit_u8 *inst;
1207 
1208 	CHECK_EXTRA_REGS(src, srcw, (void)0);
1209 
1210 	if (FAST_IS_REG(src)) {
1211 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1);
1212 		FAIL_IF(!inst);
1213 
1214 		INC_SIZE(1 + 1);
1215 		PUSH_REG(reg_map[src]);
1216 	}
1217 	else {
1218 		inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
1219 		FAIL_IF(!inst);
1220 		inst[0] = GROUP_FF;
1221 		inst[1] |= PUSH_rm;
1222 
1223 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1224 		FAIL_IF(!inst);
1225 		INC_SIZE(1);
1226 	}
1227 
1228 	RET();
1229 	return SLJIT_SUCCESS;
1230 }
1231 
sljit_emit_get_return_address(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)1232 static sljit_s32 sljit_emit_get_return_address(struct sljit_compiler *compiler,
1233 	sljit_s32 dst, sljit_sw dstw)
1234 {
1235 	sljit_s32 options = compiler->options;
1236 	sljit_s32 saveds = compiler->saveds;
1237 	sljit_s32 scratches = compiler->scratches;
1238 
1239 	saveds = ((scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3) - SLJIT_KEPT_SAVEDS_COUNT(options)) * SSIZE_OF(sw);
1240 
1241 	/* Saving ebp. */
1242 	if (!(options & SLJIT_ENTER_REG_ARG))
1243 		saveds += SSIZE_OF(sw);
1244 
1245 	return emit_mov(compiler, dst, dstw, SLJIT_MEM1(SLJIT_SP), compiler->local_size + saveds);
1246 }
1247 
1248 /* --------------------------------------------------------------------- */
1249 /*  Other operations                                                     */
1250 /* --------------------------------------------------------------------- */
1251 
sljit_emit_mem(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)1252 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
1253 	sljit_s32 reg,
1254 	sljit_s32 mem, sljit_sw memw)
1255 {
1256 	sljit_u8* inst;
1257 	sljit_s32 i, next, reg_idx, offset;
1258 	sljit_u8 regs[2];
1259 
1260 	CHECK_ERROR();
1261 	CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
1262 
1263 	if (!(reg & REG_PAIR_MASK))
1264 		return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
1265 
1266 	ADJUST_LOCAL_OFFSET(mem, memw);
1267 
1268 	regs[0] = U8(REG_PAIR_FIRST(reg));
1269 	regs[1] = U8(REG_PAIR_SECOND(reg));
1270 
1271 	next = SSIZE_OF(sw);
1272 
1273 	if (!(type & SLJIT_MEM_STORE) && (regs[0] == (mem & REG_MASK) || regs[0] == OFFS_REG(mem))) {
1274 		if (regs[1] == (mem & REG_MASK) || regs[1] == OFFS_REG(mem)) {
1275 			/* None of them are virtual register so TMP_REG1 will not be used. */
1276 			EMIT_MOV(compiler, TMP_REG1, 0, OFFS_REG(mem), 0);
1277 
1278 			if (regs[1] == OFFS_REG(mem))
1279 				next = -SSIZE_OF(sw);
1280 
1281 			mem = (mem & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG1);
1282 		} else {
1283 			next = -SSIZE_OF(sw);
1284 
1285 			if (!(mem & OFFS_REG_MASK))
1286 				memw += SSIZE_OF(sw);
1287 		}
1288 	}
1289 
1290 	for (i = 0; i < 2; i++) {
1291 		reg_idx = next > 0 ? i : (i ^ 0x1);
1292 		reg = regs[reg_idx];
1293 
1294 		offset = -1;
1295 
1296 		if (reg >= SLJIT_R3 && reg <= SLJIT_S3) {
1297 			offset = (2 * SSIZE_OF(sw)) + ((reg) - SLJIT_R3) * SSIZE_OF(sw);
1298 			reg = TMP_REG1;
1299 
1300 			if (type & SLJIT_MEM_STORE)
1301 				EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), offset);
1302 		}
1303 
1304 		if ((mem & OFFS_REG_MASK) && (reg_idx == 1)) {
1305 			inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(1 + 4));
1306 			FAIL_IF(!inst);
1307 
1308 			INC_SIZE(4);
1309 
1310 			inst[0] = (type & SLJIT_MEM_STORE) ? MOV_rm_r : MOV_r_rm;
1311 			inst[1] = 0x44 | U8(reg_map[reg] << 3);
1312 			inst[2] = U8(memw << 6) | U8(reg_map[OFFS_REG(mem)] << 3) | reg_map[mem & REG_MASK];
1313 			inst[3] = sizeof(sljit_sw);
1314 		} else if (type & SLJIT_MEM_STORE) {
1315 			EMIT_MOV(compiler, mem, memw, reg, 0);
1316 		} else {
1317 			EMIT_MOV(compiler, reg, 0, mem, memw);
1318 		}
1319 
1320 		if (!(mem & OFFS_REG_MASK))
1321 			memw += next;
1322 
1323 		if (!(type & SLJIT_MEM_STORE) && offset != -1)
1324 			EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, TMP_REG1, 0);
1325 	}
1326 
1327 	return SLJIT_SUCCESS;
1328 }
1329 
sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1330 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
1331 	sljit_s32 dst, sljit_sw dstw,
1332 	sljit_s32 src, sljit_sw srcw)
1333 {
1334 	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
1335 	sljit_u8 *inst, *jump_inst1, *jump_inst2;
1336 	sljit_uw size1, size2;
1337 
1338 	/* Binary representation of 0x80000000. */
1339 	static const sljit_f64 f64_high_bit = (sljit_f64)0x80000000ul;
1340 
1341 	CHECK_EXTRA_REGS(src, srcw, (void)0);
1342 
1343 	if (!(op & SLJIT_32)) {
1344 		EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1345 
1346 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 1, TMP_REG1, 0);
1347 		FAIL_IF(!inst);
1348 		inst[1] |= ROL;
1349 
1350 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 1, TMP_REG1, 0);
1351 		FAIL_IF(!inst);
1352 		inst[1] |= SHR;
1353 
1354 		FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_PREF_F2 | EX86_SSE2_OP1, dst_r, TMP_REG1, 0));
1355 
1356 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1357 		FAIL_IF(!inst);
1358 		INC_SIZE(2);
1359 		inst[0] = U8(get_jump_code(SLJIT_NOT_CARRY) - 0x10);
1360 
1361 		size1 = compiler->size;
1362 		FAIL_IF(emit_groupf(compiler, ADDSD_x_xm | EX86_PREF_F2 | EX86_SSE2, dst_r, SLJIT_MEM0(), (sljit_sw)&f64_high_bit));
1363 
1364 		inst[1] = U8(compiler->size - size1);
1365 
1366 		if (dst_r == TMP_FREG)
1367 			return emit_sse2_store(compiler, 0, dst, dstw, TMP_FREG);
1368 		return SLJIT_SUCCESS;
1369 	}
1370 
1371 	if (!FAST_IS_REG(src)) {
1372 		EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1373 		src = TMP_REG1;
1374 	}
1375 
1376 	BINARY_IMM32(CMP, 0, src, 0);
1377 
1378 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1379 	FAIL_IF(!inst);
1380 	INC_SIZE(2);
1381 	inst[0] = JL_i8;
1382 	jump_inst1 = inst;
1383 
1384 	size1 = compiler->size;
1385 
1386 	FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, src, 0));
1387 
1388 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1389 	FAIL_IF(!inst);
1390 	INC_SIZE(2);
1391 	inst[0] = JMP_i8;
1392 	jump_inst2 = inst;
1393 
1394 	size2 = compiler->size;
1395 
1396 	jump_inst1[1] = U8(size2 - size1);
1397 
1398 	if (src != TMP_REG1)
1399 		EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
1400 
1401 	inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 1, TMP_REG1, 0);
1402 	FAIL_IF(!inst);
1403 	inst[1] |= SHR;
1404 
1405 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1406 	FAIL_IF(!inst);
1407 	INC_SIZE(2);
1408 	inst[0] = JNC_i8;
1409 	jump_inst1 = inst;
1410 
1411 	size1 = compiler->size;
1412 
1413 	BINARY_IMM32(OR, 1, TMP_REG1, 0);
1414 	jump_inst1[1] = U8(compiler->size - size1);
1415 
1416 	FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, TMP_REG1, 0));
1417 	FAIL_IF(emit_groupf(compiler, ADDSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, dst_r, 0));
1418 
1419 	jump_inst2[1] = U8(compiler->size - size2);
1420 
1421 	if (dst_r == TMP_FREG)
1422 		return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
1423 	return SLJIT_SUCCESS;
1424 }
1425 
sljit_emit_fset32(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f32 value)1426 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
1427 	sljit_s32 freg, sljit_f32 value)
1428 {
1429 	sljit_u8 *inst;
1430 	union {
1431 		sljit_s32 imm;
1432 		sljit_f32 value;
1433 	} u;
1434 
1435 	CHECK_ERROR();
1436 	CHECK(check_sljit_emit_fset32(compiler, freg, value));
1437 
1438 	u.value = value;
1439 
1440 	if (u.imm != 0)
1441 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm);
1442 
1443 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1444 	FAIL_IF(!inst);
1445 	INC_SIZE(4);
1446 
1447 	inst[0] = GROUP_66;
1448 	inst[1] = GROUP_0F;
1449 
1450 	if (u.imm == 0) {
1451 		inst[2] = PXOR_x_xm;
1452 		inst[3] = U8(freg | (freg << 3) | MOD_REG);
1453 	} else {
1454 		inst[2] = MOVD_x_rm;
1455 		inst[3] = U8(reg_map[TMP_REG1] | (freg << 3) | MOD_REG);
1456 	}
1457 
1458 	return SLJIT_SUCCESS;
1459 }
1460 
sljit_emit_fset64(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f64 value)1461 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
1462 	sljit_s32 freg, sljit_f64 value)
1463 {
1464 	sljit_u8 *inst;
1465 	sljit_s32 tmp_freg = freg;
1466 	union {
1467 		sljit_s32 imm[2];
1468 		sljit_f64 value;
1469 	} u;
1470 
1471 	CHECK_ERROR();
1472 	CHECK(check_sljit_emit_fset64(compiler, freg, value));
1473 
1474 	u.value = value;
1475 
1476 	if (u.imm[0] == 0) {
1477 		if (u.imm[1] == 0)
1478 			return emit_groupf(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2, freg, freg, 0);
1479 
1480 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm[1]);
1481 	} else
1482 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm[0]);
1483 
1484 	FAIL_IF(emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, freg, TMP_REG1, 0));
1485 
1486 	if (u.imm[1] == 0)
1487 		return SLJIT_SUCCESS;
1488 
1489 	if (u.imm[0] == 0) {
1490 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1491 		FAIL_IF(!inst);
1492 		INC_SIZE(4);
1493 
1494 		inst[0] = GROUP_0F;
1495 		inst[1] = SHUFPS_x_xm;
1496 		inst[2] = U8(MOD_REG | (freg << 3) | freg);
1497 		inst[3] = 0x51;
1498 		return SLJIT_SUCCESS;
1499 	}
1500 
1501 	if (u.imm[0] != u.imm[1]) {
1502 		SLJIT_ASSERT(u.imm[1] != 0 && cpu_feature_list != 0);
1503 
1504 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm[1]);
1505 
1506 		if (cpu_feature_list & CPU_FEATURE_SSE41) {
1507 			FAIL_IF(emit_groupf_ext(compiler, PINSRD_x_rm_i8 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2_OP1, freg, TMP_REG1, 0));
1508 			return emit_byte(compiler, 1);
1509 		}
1510 
1511 		FAIL_IF(emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, TMP_FREG, TMP_REG1, 0));
1512 		tmp_freg = TMP_FREG;
1513 	}
1514 
1515 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
1516 	FAIL_IF(!inst);
1517 	INC_SIZE(3);
1518 
1519 	inst[0] = GROUP_0F;
1520 	inst[1] = UNPCKLPS_x_xm;
1521 	inst[2] = U8(MOD_REG | (freg << 3) | tmp_freg);
1522 	return SLJIT_SUCCESS;
1523 }
1524 
sljit_emit_fcopy(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 freg,sljit_s32 reg)1525 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
1526 	sljit_s32 freg, sljit_s32 reg)
1527 {
1528 	sljit_u8 *inst;
1529 	sljit_s32 reg2;
1530 	sljit_sw regw, reg2w;
1531 
1532 	CHECK_ERROR();
1533 	CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
1534 
1535 	regw = 0;
1536 	reg2 = 0;
1537 	reg2w = 0;
1538 
1539 	SLJIT_ASSERT(cpu_feature_list != 0);
1540 
1541 	if (!(op & SLJIT_32) && (cpu_feature_list & CPU_FEATURE_SSE41)) {
1542 		if (reg & REG_PAIR_MASK) {
1543 			reg2 = REG_PAIR_FIRST(reg);
1544 			reg = REG_PAIR_SECOND(reg);
1545 
1546 			CHECK_EXTRA_REGS(reg, regw, (void)0);
1547 
1548 			FAIL_IF(emit_groupf(compiler, (GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? MOVD_x_rm : MOVD_rm_x)
1549 				| EX86_PREF_66 | EX86_SSE2_OP1, freg, reg, regw));
1550 		} else
1551 			reg2 = reg;
1552 
1553 		CHECK_EXTRA_REGS(reg2, reg2w, (void)0);
1554 
1555 		FAIL_IF(emit_groupf_ext(compiler, (GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? PINSRD_x_rm_i8 : PEXTRD_rm_x_i8)
1556 			| EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2_OP1, freg, reg2, reg2w));
1557 		return emit_byte(compiler, 1);
1558 	}
1559 
1560 	if (reg & REG_PAIR_MASK) {
1561 		reg2 = REG_PAIR_SECOND(reg);
1562 		reg = REG_PAIR_FIRST(reg);
1563 
1564 		if (reg == reg2)
1565 			reg = 0;
1566 
1567 		CHECK_EXTRA_REGS(reg2, reg2w, (void)0);
1568 	}
1569 
1570 	CHECK_EXTRA_REGS(reg, regw, (void)0);
1571 
1572 	if (op & SLJIT_32)
1573 		return emit_groupf(compiler, (GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? MOVD_x_rm : MOVD_rm_x)
1574 			| EX86_PREF_66 | EX86_SSE2_OP1, freg, reg, regw);
1575 
1576 	if (op == SLJIT_COPY_FROM_F64) {
1577 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 5);
1578 		FAIL_IF(!inst);
1579 		INC_SIZE(5);
1580 
1581 		inst[0] = GROUP_66;
1582 		inst[1] = GROUP_0F;
1583 		inst[2] = PSHUFD_x_xm;
1584 		inst[3] = U8(MOD_REG | (TMP_FREG << 3) | freg);
1585 		inst[4] = 1;
1586 	} else if (reg != 0)
1587 		FAIL_IF(emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, TMP_FREG, reg, regw));
1588 
1589 	if (reg2 != 0)
1590 		FAIL_IF(emit_groupf(compiler, (GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? MOVD_x_rm : MOVD_rm_x)
1591 			| EX86_PREF_66 | EX86_SSE2_OP1, freg, reg2, reg2w));
1592 
1593 	if (GET_OPCODE(op) == SLJIT_COPY_TO_F64) {
1594 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
1595 		FAIL_IF(!inst);
1596 		INC_SIZE(3);
1597 
1598 		inst[0] = GROUP_0F;
1599 		inst[1] = UNPCKLPS_x_xm;
1600 		inst[2] = U8(MOD_REG | (freg << 3) | (reg == 0 ? freg : TMP_FREG));
1601 	} else
1602 		FAIL_IF(emit_groupf(compiler, MOVD_rm_x | EX86_PREF_66 | EX86_SSE2_OP1, TMP_FREG, reg, regw));
1603 
1604 	return SLJIT_SUCCESS;
1605 }
1606 
skip_frames_before_return(struct sljit_compiler * compiler)1607 static sljit_s32 skip_frames_before_return(struct sljit_compiler *compiler)
1608 {
1609 	sljit_sw size;
1610 
1611 	/* Don't adjust shadow stack if it isn't enabled.  */
1612 	if (!cpu_has_shadow_stack())
1613 		return SLJIT_SUCCESS;
1614 
1615 	SLJIT_ASSERT(compiler->args_size >= 0);
1616 	SLJIT_ASSERT(compiler->local_size > 0);
1617 
1618 	size = compiler->local_size;
1619 	size += (1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0)
1620 		+ (compiler->saveds <= 3 ? compiler->saveds : 3)) * SSIZE_OF(sw);
1621 
1622 	return adjust_shadow_stack(compiler, SLJIT_MEM1(SLJIT_SP), size);
1623 }
1624