1 /*
2  *    Stack-less Just-In-Time compiler
3  *
4  *    Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without modification, are
7  * permitted provided that the following conditions are met:
8  *
9  *   1. Redistributions of source code must retain the above copyright notice, this list of
10  *      conditions and the following disclaimer.
11  *
12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13  *      of conditions and the following disclaimer in the documentation and/or other materials
14  *      provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 /* x86 32-bit arch dependent functions. */
28 
29 /* --------------------------------------------------------------------- */
30 /*  Operators                                                            */
31 /* --------------------------------------------------------------------- */
32 
emit_do_imm(struct sljit_compiler * compiler,sljit_u8 opcode,sljit_sw imm)33 static sljit_s32 emit_do_imm(struct sljit_compiler *compiler, sljit_u8 opcode, sljit_sw imm)
34 {
35 	sljit_u8 *inst;
36 
37 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + sizeof(sljit_sw));
38 	FAIL_IF(!inst);
39 	INC_SIZE(1 + sizeof(sljit_sw));
40 	*inst++ = opcode;
41 	sljit_unaligned_store_sw(inst, imm);
42 	return SLJIT_SUCCESS;
43 }
44 
45 /* Size contains the flags as well. */
emit_x86_instruction(struct sljit_compiler * compiler,sljit_uw size,sljit_s32 a,sljit_sw imma,sljit_s32 b,sljit_sw immb)46 static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw size,
47 	/* The register or immediate operand. */
48 	sljit_s32 a, sljit_sw imma,
49 	/* The general operand (not immediate). */
50 	sljit_s32 b, sljit_sw immb)
51 {
52 	sljit_u8 *inst;
53 	sljit_u8 *buf_ptr;
54 	sljit_u8 reg_map_b;
55 	sljit_uw flags = size;
56 	sljit_uw inst_size;
57 
58 	/* Both cannot be switched on. */
59 	SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
60 	/* Size flags not allowed for typed instructions. */
61 	SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
62 	/* Both size flags cannot be switched on. */
63 	SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
64 	/* SSE2 and immediate is not possible. */
65 	SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
66 	SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
67 		&& (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
68 		&& (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
69 
70 	size &= 0xf;
71 	inst_size = size;
72 
73 	if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
74 		inst_size++;
75 	if (flags & EX86_PREF_66)
76 		inst_size++;
77 
78 	/* Calculate size of b. */
79 	inst_size += 1; /* mod r/m byte. */
80 	if (b & SLJIT_MEM) {
81 		if (!(b & REG_MASK))
82 			inst_size += sizeof(sljit_sw);
83 		else if (immb != 0 && !(b & OFFS_REG_MASK)) {
84 			/* Immediate operand. */
85 			if (immb <= 127 && immb >= -128)
86 				inst_size += sizeof(sljit_s8);
87 			else
88 				inst_size += sizeof(sljit_sw);
89 		}
90 		else if (reg_map[b & REG_MASK] == 5)
91 			inst_size += sizeof(sljit_s8);
92 
93 		if ((b & REG_MASK) == SLJIT_SP && !(b & OFFS_REG_MASK))
94 			b |= TO_OFFS_REG(SLJIT_SP);
95 
96 		if (b & OFFS_REG_MASK)
97 			inst_size += 1; /* SIB byte. */
98 	}
99 
100 	/* Calculate size of a. */
101 	if (a & SLJIT_IMM) {
102 		if (flags & EX86_BIN_INS) {
103 			if (imma <= 127 && imma >= -128) {
104 				inst_size += 1;
105 				flags |= EX86_BYTE_ARG;
106 			} else
107 				inst_size += 4;
108 		}
109 		else if (flags & EX86_SHIFT_INS) {
110 			imma &= 0x1f;
111 			if (imma != 1) {
112 				inst_size ++;
113 				flags |= EX86_BYTE_ARG;
114 			}
115 		} else if (flags & EX86_BYTE_ARG)
116 			inst_size++;
117 		else if (flags & EX86_HALF_ARG)
118 			inst_size += sizeof(short);
119 		else
120 			inst_size += sizeof(sljit_sw);
121 	}
122 	else
123 		SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
124 
125 	inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size);
126 	PTR_FAIL_IF(!inst);
127 
128 	/* Encoding the byte. */
129 	INC_SIZE(inst_size);
130 	if (flags & EX86_PREF_F2)
131 		*inst++ = 0xf2;
132 	if (flags & EX86_PREF_F3)
133 		*inst++ = 0xf3;
134 	if (flags & EX86_PREF_66)
135 		*inst++ = 0x66;
136 
137 	buf_ptr = inst + size;
138 
139 	/* Encode mod/rm byte. */
140 	if (!(flags & EX86_SHIFT_INS)) {
141 		if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
142 			*inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
143 
144 		if (a & SLJIT_IMM)
145 			*buf_ptr = 0;
146 		else if (!(flags & EX86_SSE2_OP1))
147 			*buf_ptr = U8(reg_map[a] << 3);
148 		else
149 			*buf_ptr = U8(a << 3);
150 	}
151 	else {
152 		if (a & SLJIT_IMM) {
153 			if (imma == 1)
154 				*inst = GROUP_SHIFT_1;
155 			else
156 				*inst = GROUP_SHIFT_N;
157 		} else
158 			*inst = GROUP_SHIFT_CL;
159 		*buf_ptr = 0;
160 	}
161 
162 	if (!(b & SLJIT_MEM)) {
163 		*buf_ptr = U8(*buf_ptr | MOD_REG | (!(flags & EX86_SSE2_OP2) ? reg_map[b] : b));
164 		buf_ptr++;
165 	} else if (b & REG_MASK) {
166 		reg_map_b = reg_map[b & REG_MASK];
167 
168 		if (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP) || reg_map_b == 5) {
169 			if (immb != 0 || reg_map_b == 5) {
170 				if (immb <= 127 && immb >= -128)
171 					*buf_ptr |= 0x40;
172 				else
173 					*buf_ptr |= 0x80;
174 			}
175 
176 			if (!(b & OFFS_REG_MASK))
177 				*buf_ptr++ |= reg_map_b;
178 			else {
179 				*buf_ptr++ |= 0x04;
180 				*buf_ptr++ = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3));
181 			}
182 
183 			if (immb != 0 || reg_map_b == 5) {
184 				if (immb <= 127 && immb >= -128)
185 					*buf_ptr++ = U8(immb); /* 8 bit displacement. */
186 				else {
187 					sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */
188 					buf_ptr += sizeof(sljit_sw);
189 				}
190 			}
191 		}
192 		else {
193 			*buf_ptr++ |= 0x04;
194 			*buf_ptr++ = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3) | (immb << 6));
195 		}
196 	}
197 	else {
198 		*buf_ptr++ |= 0x05;
199 		sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */
200 		buf_ptr += sizeof(sljit_sw);
201 	}
202 
203 	if (a & SLJIT_IMM) {
204 		if (flags & EX86_BYTE_ARG)
205 			*buf_ptr = U8(imma);
206 		else if (flags & EX86_HALF_ARG)
207 			sljit_unaligned_store_s16(buf_ptr, (sljit_s16)imma);
208 		else if (!(flags & EX86_SHIFT_INS))
209 			sljit_unaligned_store_sw(buf_ptr, imma);
210 	}
211 
212 	return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
213 }
214 
215 /* --------------------------------------------------------------------- */
216 /*  Enter / return                                                       */
217 /* --------------------------------------------------------------------- */
218 
generate_far_jump_code(struct sljit_jump * jump,sljit_u8 * code_ptr,sljit_sw executable_offset)219 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset)
220 {
221 	sljit_uw type = jump->flags >> TYPE_SHIFT;
222 
223 	if (type == SLJIT_JUMP) {
224 		*code_ptr++ = JMP_i32;
225 		jump->addr++;
226 	}
227 	else if (type >= SLJIT_FAST_CALL) {
228 		*code_ptr++ = CALL_i32;
229 		jump->addr++;
230 	}
231 	else {
232 		*code_ptr++ = GROUP_0F;
233 		*code_ptr++ = get_jump_code(type);
234 		jump->addr += 2;
235 	}
236 
237 	if (jump->flags & JUMP_LABEL)
238 		jump->flags |= PATCH_MW;
239 	else
240 		sljit_unaligned_store_sw(code_ptr, (sljit_sw)(jump->u.target - (jump->addr + 4) - (sljit_uw)executable_offset));
241 	code_ptr += 4;
242 
243 	return code_ptr;
244 }
245 
246 #define ENTER_R2_USED	0x00001
247 #define ENTER_R2_TO_S	0x00002
248 #define ENTER_R2_TO_R0	0x00004
249 #define ENTER_R1_TO_S	0x00008
250 #define ENTER_TMP_TO_R4	0x00010
251 #define ENTER_TMP_TO_S	0x00020
252 
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)253 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
254 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
255 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
256 {
257 	sljit_s32 word_arg_count, saved_arg_count, float_arg_count;
258 	sljit_s32 size, locals_offset, args_size, types, status;
259 	sljit_u8 *inst;
260 #ifdef _WIN32
261 	sljit_s32 r2_offset = -1;
262 #endif
263 
264 	CHECK_ERROR();
265 	CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
266 	set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
267 
268 	/* Emit ENDBR32 at function entry if needed.  */
269 	FAIL_IF(emit_endbranch(compiler));
270 
271 	SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start);
272 
273 	arg_types >>= SLJIT_ARG_SHIFT;
274 	types = arg_types;
275 	word_arg_count = 0;
276 	saved_arg_count = 0;
277 	float_arg_count = 0;
278 	args_size = SSIZE_OF(sw);
279 	status = 0;
280 	while (types) {
281 		switch (types & SLJIT_ARG_MASK) {
282 		case SLJIT_ARG_TYPE_F64:
283 			float_arg_count++;
284 			FAIL_IF(emit_sse2_load(compiler, 0, float_arg_count, SLJIT_MEM1(SLJIT_SP), args_size));
285 			args_size += SSIZE_OF(f64);
286 			break;
287 		case SLJIT_ARG_TYPE_F32:
288 			float_arg_count++;
289 			FAIL_IF(emit_sse2_load(compiler, 1, float_arg_count, SLJIT_MEM1(SLJIT_SP), args_size));
290 			args_size += SSIZE_OF(f32);
291 			break;
292 		default:
293 			word_arg_count++;
294 
295 			if (!(types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
296 				saved_arg_count++;
297 				if (saved_arg_count == 4)
298 					status |= ENTER_TMP_TO_S;
299 			} else {
300 				if (word_arg_count == 4)
301 					status |= ENTER_TMP_TO_R4;
302 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
303 				if (word_arg_count == 3)
304 					status |= ENTER_R2_USED;
305 #endif
306 			}
307 
308 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
309 			if (word_arg_count <= 2 && !(options & SLJIT_ENTER_CDECL))
310 				break;
311 #endif
312 
313 			args_size += SSIZE_OF(sw);
314 			break;
315 		}
316 		types >>= SLJIT_ARG_SHIFT;
317 	}
318 
319 	args_size -= SSIZE_OF(sw);
320 	compiler->args_size = args_size;
321 
322 	/* [esp+0] for saving temporaries and function calls. */
323 	locals_offset = 2 * SSIZE_OF(sw);
324 
325 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
326 	if ((options & SLJIT_ENTER_CDECL) && scratches >= 3)
327 		locals_offset = 4 * SSIZE_OF(sw);
328 #else
329 	if (scratches >= 3)
330 		locals_offset = 4 * SSIZE_OF(sw);
331 #endif
332 
333 	compiler->scratches_offset = locals_offset;
334 
335 	if (scratches > 3)
336 		locals_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * SSIZE_OF(sw);
337 
338 	if (saveds > 3)
339 		locals_offset += (saveds - 3) * SSIZE_OF(sw);
340 
341 	compiler->locals_offset = locals_offset;
342 
343 	size = 1 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3);
344 	inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(size + 1));
345 	FAIL_IF(!inst);
346 
347 	INC_SIZE((sljit_uw)size);
348 	PUSH_REG(reg_map[TMP_REG1]);
349 	if (saveds > 2 || scratches > 9)
350 		PUSH_REG(reg_map[SLJIT_S2]);
351 	if (saveds > 1 || scratches > 10)
352 		PUSH_REG(reg_map[SLJIT_S1]);
353 	if (saveds > 0 || scratches > 11)
354 		PUSH_REG(reg_map[SLJIT_S0]);
355 
356 	size *= SSIZE_OF(sw);
357 
358 	if (status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S))
359 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), args_size + size);
360 
361 	size += SSIZE_OF(sw);
362 
363 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
364 	if (!(options & SLJIT_ENTER_CDECL))
365 		size += args_size;
366 #endif
367 
368 	local_size = ((locals_offset + local_size + size + 0xf) & ~0xf) - size;
369 	compiler->local_size = local_size;
370 
371 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
372 	if (!(options & SLJIT_ENTER_CDECL))
373 		size -= args_size;
374 #endif
375 
376 	word_arg_count = 0;
377 	saved_arg_count = 0;
378 	args_size = size;
379 	while (arg_types) {
380 		switch (arg_types & SLJIT_ARG_MASK) {
381 		case SLJIT_ARG_TYPE_F64:
382 			args_size += SSIZE_OF(f64);
383 			break;
384 		case SLJIT_ARG_TYPE_F32:
385 			args_size += SSIZE_OF(f32);
386 			break;
387 		default:
388 			word_arg_count++;
389 
390 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
391 			if (!(options & SLJIT_ENTER_CDECL) && word_arg_count <= 2) {
392 				if (word_arg_count == 1) {
393 					if (status & ENTER_R2_USED) {
394 						EMIT_MOV(compiler, (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) ? SLJIT_R0 : SLJIT_S0, 0, SLJIT_R2, 0);
395 					} else if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
396 						status |= ENTER_R2_TO_S;
397 						saved_arg_count++;
398 					} else
399 						status |= ENTER_R2_TO_R0;
400 				} else if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
401 					status |= ENTER_R1_TO_S;
402 					saved_arg_count++;
403 				}
404 				break;
405 			}
406 #endif
407 			if (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) {
408 				SLJIT_ASSERT(word_arg_count <= 3 || (status & ENTER_TMP_TO_R4));
409 
410 				if (word_arg_count <= 3) {
411 #ifdef _WIN32
412 					if (word_arg_count == 3 && local_size > 4 * 4096)
413 						r2_offset = local_size + args_size;
414 					else
415 #endif
416 						EMIT_MOV(compiler, word_arg_count, 0, SLJIT_MEM1(SLJIT_SP), args_size);
417 				}
418 			} else {
419 				SLJIT_ASSERT(saved_arg_count <= 3 || (status & ENTER_TMP_TO_S));
420 
421 				if (saved_arg_count <= 3)
422 					EMIT_MOV(compiler, SLJIT_S0 - saved_arg_count, 0, SLJIT_MEM1(SLJIT_SP), args_size);
423 				saved_arg_count++;
424 			}
425 			args_size += SSIZE_OF(sw);
426 			break;
427 		}
428 		arg_types >>= SLJIT_ARG_SHIFT;
429 	}
430 
431 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
432 	if (!(options & SLJIT_ENTER_CDECL)) {
433 		if (status & ENTER_R2_TO_R0)
434 			EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_R2, 0);
435 
436 		saved_arg_count = 0;
437 		if (status & ENTER_R2_TO_S) {
438 			EMIT_MOV(compiler, SLJIT_S0, 0, SLJIT_R2, 0);
439 			saved_arg_count++;
440 		}
441 
442 		if (status & ENTER_R1_TO_S)
443 			EMIT_MOV(compiler, SLJIT_S0 - saved_arg_count, 0, SLJIT_R1, 0);
444 	}
445 #endif
446 
447 	SLJIT_ASSERT(SLJIT_LOCALS_OFFSET > 0);
448 
449 #ifdef _WIN32
450 	SLJIT_ASSERT(r2_offset == -1 || local_size > 4 * 4096);
451 
452 	if (local_size > 4096) {
453 		if (local_size <= 4 * 4096) {
454 			BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096);
455 
456 			if (local_size > 2 * 4096)
457 				BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2);
458 			if (local_size > 3 * 4096)
459 				BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);
460 		}
461 		else {
462 			EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_IMM, local_size >> 12);
463 
464 			BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096);
465 			BINARY_IMM32(SUB, 4096, SLJIT_SP, 0);
466 
467 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
468 			FAIL_IF(!inst);
469 
470 			INC_SIZE(2);
471 			inst[0] = LOOP_i8;
472 			inst[1] = (sljit_u8)-16;
473 			local_size &= 0xfff;
474 		}
475 	}
476 
477 	if (local_size > 0) {
478 		BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -local_size);
479 		BINARY_IMM32(SUB, local_size, SLJIT_SP, 0);
480 	}
481 
482 	if (r2_offset != -1)
483 		EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), r2_offset);
484 
485 #else /* !_WIN32 */
486 
487 	SLJIT_ASSERT(local_size > 0);
488 
489 	BINARY_IMM32(SUB, local_size, SLJIT_SP, 0);
490 
491 #endif /* _WIN32 */
492 
493 	if (status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S)) {
494 		size = (status & ENTER_TMP_TO_R4) ? compiler->scratches_offset : compiler->locals_offset - SSIZE_OF(sw);
495 		EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), size, TMP_REG1, 0);
496 	}
497 
498 	return SLJIT_SUCCESS;
499 }
500 
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)501 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
502 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
503 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
504 {
505 	sljit_s32 args_size, locals_offset;
506 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
507 	sljit_s32 word_arg_count = 0;
508 #endif
509 
510 	CHECK_ERROR();
511 	CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
512 	set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
513 
514 	arg_types >>= SLJIT_ARG_SHIFT;
515 	args_size = 0;
516 	while (arg_types) {
517 		switch (arg_types & SLJIT_ARG_MASK) {
518 		case SLJIT_ARG_TYPE_F64:
519 			args_size += SSIZE_OF(f64);
520 			break;
521 		case SLJIT_ARG_TYPE_F32:
522 			args_size += SSIZE_OF(f32);
523 			break;
524 		default:
525 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
526 			if (word_arg_count >= 2)
527 				args_size += SSIZE_OF(sw);
528 			word_arg_count++;
529 #else
530 			args_size += SSIZE_OF(sw);
531 #endif
532 			break;
533 		}
534 		arg_types >>= SLJIT_ARG_SHIFT;
535 	}
536 
537 	compiler->args_size = args_size;
538 
539 	/* [esp+0] for saving temporaries and function calls. */
540 	locals_offset = 2 * SSIZE_OF(sw);
541 
542 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
543 	if ((options & SLJIT_ENTER_CDECL) && scratches >= 3)
544 		locals_offset = 4 * SSIZE_OF(sw);
545 #else
546 	if (scratches >= 3)
547 		locals_offset = 4 * SSIZE_OF(sw);
548 #endif
549 
550 	compiler->scratches_offset = locals_offset;
551 
552 	if (scratches > 3)
553 		locals_offset += ((scratches > (3 + 6)) ? 6 : (scratches - 3)) * SSIZE_OF(sw);
554 
555 	if (saveds > 3)
556 		locals_offset += (saveds - 3) * SSIZE_OF(sw);
557 
558 	compiler->locals_offset = locals_offset;
559 
560 	saveds = (2 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3)) * SSIZE_OF(sw);
561 
562 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
563 	if (!(options & SLJIT_ENTER_CDECL))
564 		saveds += args_size;
565 #endif
566 
567 	compiler->local_size = ((locals_offset + local_size + saveds + 0xf) & ~0xf) - saveds;
568 	return SLJIT_SUCCESS;
569 }
570 
emit_stack_frame_release(struct sljit_compiler * compiler)571 static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler)
572 {
573 	sljit_uw size;
574 	sljit_u8 *inst;
575 
576 	size = (sljit_uw)(1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0) +
577 		(compiler->saveds <= 3 ? compiler->saveds : 3));
578 	inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
579 	FAIL_IF(!inst);
580 
581 	INC_SIZE(size);
582 
583 	if (compiler->saveds > 0 || compiler->scratches > 11)
584 		POP_REG(reg_map[SLJIT_S0]);
585 	if (compiler->saveds > 1 || compiler->scratches > 10)
586 		POP_REG(reg_map[SLJIT_S1]);
587 	if (compiler->saveds > 2 || compiler->scratches > 9)
588 		POP_REG(reg_map[SLJIT_S2]);
589 	POP_REG(reg_map[TMP_REG1]);
590 
591 	return SLJIT_SUCCESS;
592 }
593 
sljit_emit_return_void(struct sljit_compiler * compiler)594 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
595 {
596 	sljit_uw size;
597 	sljit_u8 *inst;
598 
599 	CHECK_ERROR();
600 	CHECK(check_sljit_emit_return_void(compiler));
601 
602 	SLJIT_ASSERT(compiler->args_size >= 0);
603 	SLJIT_ASSERT(compiler->local_size > 0);
604 
605 	BINARY_IMM32(ADD, compiler->local_size, SLJIT_SP, 0);
606 
607 	FAIL_IF(emit_stack_frame_release(compiler));
608 
609 	size = 1;
610 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
611 	if (compiler->args_size > 0 && !(compiler->options & SLJIT_ENTER_CDECL))
612 		size = 3;
613 #endif
614 	inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
615 	FAIL_IF(!inst);
616 
617 	INC_SIZE(size);
618 
619 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
620 	if (compiler->args_size > 0 && !(compiler->options & SLJIT_ENTER_CDECL)) {
621 		RET_I16(U8(compiler->args_size));
622 		return SLJIT_SUCCESS;
623 	}
624 #endif
625 
626 	RET();
627 	return SLJIT_SUCCESS;
628 }
629 
630 /* --------------------------------------------------------------------- */
631 /*  Call / return instructions                                           */
632 /* --------------------------------------------------------------------- */
633 
634 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
635 
c_fast_call_get_stack_size(sljit_s32 arg_types,sljit_s32 * word_arg_count_ptr)636 static sljit_sw c_fast_call_get_stack_size(sljit_s32 arg_types, sljit_s32 *word_arg_count_ptr)
637 {
638 	sljit_sw stack_size = 0;
639 	sljit_s32 word_arg_count = 0;
640 
641 	arg_types >>= SLJIT_ARG_SHIFT;
642 
643 	while (arg_types) {
644 		switch (arg_types & SLJIT_ARG_MASK) {
645 		case SLJIT_ARG_TYPE_F64:
646 			stack_size += SSIZE_OF(f64);
647 			break;
648 		case SLJIT_ARG_TYPE_F32:
649 			stack_size += SSIZE_OF(f32);
650 			break;
651 		default:
652 			word_arg_count++;
653 			if (word_arg_count > 2)
654 				stack_size += SSIZE_OF(sw);
655 			break;
656 		}
657 
658 		arg_types >>= SLJIT_ARG_SHIFT;
659 	}
660 
661 	if (word_arg_count_ptr)
662 		*word_arg_count_ptr = word_arg_count;
663 
664 	return stack_size;
665 }
666 
c_fast_call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types,sljit_sw stack_size,sljit_s32 word_arg_count,sljit_s32 swap_args)667 static sljit_s32 c_fast_call_with_args(struct sljit_compiler *compiler,
668 	sljit_s32 arg_types, sljit_sw stack_size, sljit_s32 word_arg_count, sljit_s32 swap_args)
669 {
670 	sljit_u8 *inst;
671 	sljit_s32 float_arg_count;
672 
673 	if (stack_size == SSIZE_OF(sw) && word_arg_count == 3) {
674 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
675 		FAIL_IF(!inst);
676 		INC_SIZE(1);
677 		PUSH_REG(reg_map[SLJIT_R2]);
678 	}
679 	else if (stack_size > 0) {
680 		if (word_arg_count >= 4)
681 			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->scratches_offset);
682 
683 		BINARY_IMM32(SUB, stack_size, SLJIT_SP, 0);
684 
685 		stack_size = 0;
686 		arg_types >>= SLJIT_ARG_SHIFT;
687 		word_arg_count = 0;
688 		float_arg_count = 0;
689 		while (arg_types) {
690 			switch (arg_types & SLJIT_ARG_MASK) {
691 			case SLJIT_ARG_TYPE_F64:
692 				float_arg_count++;
693 				FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count));
694 				stack_size += SSIZE_OF(f64);
695 				break;
696 			case SLJIT_ARG_TYPE_F32:
697 				float_arg_count++;
698 				FAIL_IF(emit_sse2_store(compiler, 1, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count));
699 				stack_size += SSIZE_OF(f32);
700 				break;
701 			default:
702 				word_arg_count++;
703 				if (word_arg_count == 3) {
704 					EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size, SLJIT_R2, 0);
705 					stack_size += SSIZE_OF(sw);
706 				}
707 				else if (word_arg_count == 4) {
708 					EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size, TMP_REG1, 0);
709 					stack_size += SSIZE_OF(sw);
710 				}
711 				break;
712 			}
713 
714 			arg_types >>= SLJIT_ARG_SHIFT;
715 		}
716 	}
717 
718 	if (word_arg_count > 0) {
719 		if (swap_args) {
720 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
721 			FAIL_IF(!inst);
722 			INC_SIZE(1);
723 
724 			*inst++ = U8(XCHG_EAX_r | reg_map[SLJIT_R2]);
725 		}
726 		else {
727 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
728 			FAIL_IF(!inst);
729 			INC_SIZE(2);
730 
731 			*inst++ = MOV_r_rm;
732 			*inst++ = U8(MOD_REG | (reg_map[SLJIT_R2] << 3) | reg_map[SLJIT_R0]);
733 		}
734 	}
735 
736 	return SLJIT_SUCCESS;
737 }
738 
739 #endif
740 
cdecl_call_get_stack_size(struct sljit_compiler * compiler,sljit_s32 arg_types,sljit_s32 * word_arg_count_ptr)741 static sljit_s32 cdecl_call_get_stack_size(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *word_arg_count_ptr)
742 {
743 	sljit_sw stack_size = 0;
744 	sljit_s32 word_arg_count = 0;
745 
746 	arg_types >>= SLJIT_ARG_SHIFT;
747 
748 	while (arg_types) {
749 		switch (arg_types & SLJIT_ARG_MASK) {
750 		case SLJIT_ARG_TYPE_F64:
751 			stack_size += SSIZE_OF(f64);
752 			break;
753 		case SLJIT_ARG_TYPE_F32:
754 			stack_size += SSIZE_OF(f32);
755 			break;
756 		default:
757 			word_arg_count++;
758 			stack_size += SSIZE_OF(sw);
759 			break;
760 		}
761 
762 		arg_types >>= SLJIT_ARG_SHIFT;
763 	}
764 
765 	if (word_arg_count_ptr)
766 		*word_arg_count_ptr = word_arg_count;
767 
768 	if (stack_size <= compiler->scratches_offset)
769 		return 0;
770 
771 	return ((stack_size - compiler->scratches_offset + 0xf) & ~0xf);
772 }
773 
cdecl_call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types,sljit_sw stack_size,sljit_s32 word_arg_count)774 static sljit_s32 cdecl_call_with_args(struct sljit_compiler *compiler,
775 	sljit_s32 arg_types, sljit_sw stack_size, sljit_s32 word_arg_count)
776 {
777 	sljit_s32 float_arg_count = 0;
778 	sljit_u8 *inst;
779 
780 	if (word_arg_count >= 4)
781 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), compiler->scratches_offset);
782 
783 	if (stack_size > 0)
784 		BINARY_IMM32(SUB, stack_size, SLJIT_SP, 0);
785 
786 	stack_size = 0;
787 	word_arg_count = 0;
788 	arg_types >>= SLJIT_ARG_SHIFT;
789 
790 	while (arg_types) {
791 		switch (arg_types & SLJIT_ARG_MASK) {
792 		case SLJIT_ARG_TYPE_F64:
793 			float_arg_count++;
794 			FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count));
795 			stack_size += SSIZE_OF(f64);
796 			break;
797 		case SLJIT_ARG_TYPE_F32:
798 			float_arg_count++;
799 			FAIL_IF(emit_sse2_store(compiler, 1, SLJIT_MEM1(SLJIT_SP), stack_size, float_arg_count));
800 			stack_size += SSIZE_OF(f32);
801 			break;
802 		default:
803 			word_arg_count++;
804 			EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size, (word_arg_count >= 4) ? TMP_REG1 : word_arg_count, 0);
805 			stack_size += SSIZE_OF(sw);
806 			break;
807 		}
808 
809 		arg_types >>= SLJIT_ARG_SHIFT;
810 	}
811 
812 	return SLJIT_SUCCESS;
813 }
814 
post_call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types,sljit_s32 stack_size)815 static sljit_s32 post_call_with_args(struct sljit_compiler *compiler,
816 	sljit_s32 arg_types, sljit_s32 stack_size)
817 {
818 	sljit_u8 *inst;
819 	sljit_s32 single;
820 
821 	if (stack_size > 0)
822 		BINARY_IMM32(ADD, stack_size, SLJIT_SP, 0);
823 
824 	if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64)
825 		return SLJIT_SUCCESS;
826 
827 	single = ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32);
828 
829 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
830 	FAIL_IF(!inst);
831 	INC_SIZE(3);
832 	inst[0] = single ? FSTPS : FSTPD;
833 	inst[1] = (0x03 << 3) | 0x04;
834 	inst[2] = (0x04 << 3) | reg_map[SLJIT_SP];
835 
836 	return emit_sse2_load(compiler, single, SLJIT_FR0, SLJIT_MEM1(SLJIT_SP), 0);
837 }
838 
tail_call_with_args(struct sljit_compiler * compiler,sljit_s32 * extra_space,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)839 static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler,
840 	sljit_s32 *extra_space, sljit_s32 arg_types,
841 	sljit_s32 src, sljit_sw srcw)
842 {
843 	sljit_sw args_size, prev_args_size, saved_regs_size;
844 	sljit_sw types, word_arg_count, float_arg_count;
845 	sljit_sw stack_size, prev_stack_size, min_size, offset;
846 	sljit_sw word_arg4_offset;
847 	sljit_u8 r2_offset = 0;
848 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
849 	sljit_u8 fast_call = (*extra_space & 0xff) == SLJIT_CALL;
850 #endif
851 	sljit_u8* inst;
852 
853 	ADJUST_LOCAL_OFFSET(src, srcw);
854 	CHECK_EXTRA_REGS(src, srcw, (void)0);
855 
856 	saved_regs_size = (1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0)
857 		+ (compiler->saveds <= 3 ? compiler->saveds : 3)) * SSIZE_OF(sw);
858 
859 	word_arg_count = 0;
860 	float_arg_count = 0;
861 	arg_types >>= SLJIT_ARG_SHIFT;
862 	types = 0;
863 	args_size = 0;
864 
865 	while (arg_types != 0) {
866 		types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK);
867 
868 		switch (arg_types & SLJIT_ARG_MASK) {
869 		case SLJIT_ARG_TYPE_F64:
870 			args_size += SSIZE_OF(f64);
871 			float_arg_count++;
872 			break;
873 		case SLJIT_ARG_TYPE_F32:
874 			args_size += SSIZE_OF(f32);
875 			float_arg_count++;
876 			break;
877 		default:
878 			word_arg_count++;
879 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
880 			if (!fast_call || word_arg_count > 2)
881 				args_size += SSIZE_OF(sw);
882 #else
883 			args_size += SSIZE_OF(sw);
884 #endif
885 			break;
886 		}
887 		arg_types >>= SLJIT_ARG_SHIFT;
888 	}
889 
890 	if (args_size <= compiler->args_size
891 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
892 			&& (!(compiler->options & SLJIT_ENTER_CDECL) || args_size == 0 || !fast_call)
893 #endif /* SLJIT_X86_32_FASTCALL */
894 			&& 1) {
895 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
896 		*extra_space = fast_call ? 0 : args_size;
897 		prev_args_size = compiler->args_size;
898 		stack_size = prev_args_size + SSIZE_OF(sw) + saved_regs_size;
899 #else /* !SLJIT_X86_32_FASTCALL */
900 		*extra_space = 0;
901 		stack_size = args_size + SSIZE_OF(sw) + saved_regs_size;
902 #endif /* SLJIT_X86_32_FASTCALL */
903 
904 		offset = stack_size + compiler->local_size;
905 
906 		if (!(src & SLJIT_IMM) && src != SLJIT_R0) {
907 			if (word_arg_count >= 1) {
908 				EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R0, 0);
909 				r2_offset = sizeof(sljit_sw);
910 			}
911 			EMIT_MOV(compiler, SLJIT_R0, 0, src, srcw);
912 		}
913 
914 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
915 		if (!(compiler->options & SLJIT_ENTER_CDECL)) {
916 			if (!fast_call)
917 				offset -= SSIZE_OF(sw);
918 
919 			if (word_arg_count >= 3) {
920 				word_arg4_offset = SSIZE_OF(sw);
921 
922 				if (word_arg_count + float_arg_count >= 4) {
923 					word_arg4_offset = SSIZE_OF(sw) + SSIZE_OF(sw);
924 					if ((types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F64)
925 						word_arg4_offset = SSIZE_OF(sw) + SSIZE_OF(f64);
926 				}
927 
928 				/* In cdecl mode, at least one more word value must
929 				 * be present on the stack before the return address. */
930 				EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset - word_arg4_offset, SLJIT_R2, 0);
931 			}
932 
933 			if (fast_call) {
934 				if (args_size < prev_args_size) {
935 					EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), offset - prev_args_size - SSIZE_OF(sw));
936 					EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset - args_size - SSIZE_OF(sw), SLJIT_R2, 0);
937 				}
938 			} else if (prev_args_size > 0) {
939 				EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), offset - prev_args_size);
940 				EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
941 			}
942 		}
943 #endif /* SLJIT_X86_32_FASTCALL */
944 
945 		while (types != 0) {
946 			switch (types & SLJIT_ARG_MASK) {
947 			case SLJIT_ARG_TYPE_F64:
948 				offset -= SSIZE_OF(f64);
949 				FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));
950 				float_arg_count--;
951 				break;
952 			case SLJIT_ARG_TYPE_F32:
953 				offset -= SSIZE_OF(f32);
954 				FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));
955 				float_arg_count--;
956 				break;
957 			default:
958 				switch (word_arg_count) {
959 				case 1:
960 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
961 					if (fast_call) {
962 						EMIT_MOV(compiler, SLJIT_R2, 0, r2_offset != 0 ? SLJIT_MEM1(SLJIT_SP) : SLJIT_R0, 0);
963 						break;
964 					}
965 #endif
966 					offset -= SSIZE_OF(sw);
967 					if (r2_offset != 0) {
968 						EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), 0);
969 						EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
970 					} else
971 						EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R0, 0);
972 					break;
973 				case 2:
974 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
975 					if (fast_call)
976 						break;
977 #endif
978 					offset -= SSIZE_OF(sw);
979 					EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R1, 0);
980 					break;
981 				case 3:
982 					offset -= SSIZE_OF(sw);
983 					break;
984 				case 4:
985 					offset -= SSIZE_OF(sw);
986 					EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), compiler->scratches_offset);
987 					EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
988 					break;
989 				}
990 				word_arg_count--;
991 				break;
992 			}
993 			types >>= SLJIT_ARG_SHIFT;
994 		}
995 
996 		BINARY_IMM32(ADD, compiler->local_size, SLJIT_SP, 0);
997 		FAIL_IF(emit_stack_frame_release(compiler));
998 
999 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1000 		if (args_size < prev_args_size)
1001 			BINARY_IMM32(ADD, prev_args_size - args_size, SLJIT_SP, 0);
1002 #endif
1003 
1004 		return SLJIT_SUCCESS;
1005 	}
1006 
1007 	stack_size = args_size + SSIZE_OF(sw);
1008 
1009 	if (word_arg_count >= 1 && !(src & SLJIT_IMM) && src != SLJIT_R0) {
1010 		r2_offset = SSIZE_OF(sw);
1011 		stack_size += SSIZE_OF(sw);
1012 	}
1013 
1014 	if (word_arg_count >= 3)
1015 		stack_size += SSIZE_OF(sw);
1016 
1017 	prev_args_size = 0;
1018 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1019 	if (!(compiler->options & SLJIT_ENTER_CDECL))
1020 		prev_args_size = compiler->args_size;
1021 #endif
1022 
1023 	prev_stack_size = prev_args_size + SSIZE_OF(sw) + saved_regs_size;
1024 	min_size = prev_stack_size + compiler->local_size;
1025 
1026 	word_arg4_offset = compiler->scratches_offset;
1027 
1028 	if (stack_size > min_size) {
1029 		BINARY_IMM32(SUB, stack_size - min_size, SLJIT_SP, 0);
1030 		if (src == SLJIT_MEM1(SLJIT_SP))
1031 			srcw += stack_size - min_size;
1032 		word_arg4_offset += stack_size - min_size;
1033 	}
1034 	else
1035 		stack_size = min_size;
1036 
1037 	if (word_arg_count >= 3) {
1038 		EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), r2_offset, SLJIT_R2, 0);
1039 
1040 		if (word_arg_count >= 4)
1041 			EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), word_arg4_offset);
1042 	}
1043 
1044 	if (!(src & SLJIT_IMM) && src != SLJIT_R0) {
1045 		if (word_arg_count >= 1) {
1046 			SLJIT_ASSERT(r2_offset == sizeof(sljit_sw));
1047 			EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R0, 0);
1048 		}
1049 		EMIT_MOV(compiler, SLJIT_R0, 0, src, srcw);
1050 	}
1051 
1052 	/* Restore saved registers. */
1053 	offset = stack_size - prev_args_size - 2 * SSIZE_OF(sw);
1054 	EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), offset);
1055 
1056 	if (compiler->saveds > 2 || compiler->scratches > 9) {
1057 		offset -= SSIZE_OF(sw);
1058 		EMIT_MOV(compiler, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), offset);
1059 	}
1060 	if (compiler->saveds > 1 || compiler->scratches > 10) {
1061 		offset -= SSIZE_OF(sw);
1062 		EMIT_MOV(compiler, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_SP), offset);
1063 	}
1064 	if (compiler->saveds > 0 || compiler->scratches > 11) {
1065 		offset -= SSIZE_OF(sw);
1066 		EMIT_MOV(compiler, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), offset);
1067 	}
1068 
1069 	/* Copy fourth argument and return address. */
1070 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1071 	if (fast_call) {
1072 		offset = stack_size;
1073 		*extra_space = 0;
1074 
1075 		if (word_arg_count >= 4 && prev_args_size == 0) {
1076 			offset -= SSIZE_OF(sw);
1077 			inst = emit_x86_instruction(compiler, 1, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), offset);
1078 			FAIL_IF(!inst);
1079 			*inst = XCHG_r_rm;
1080 
1081 			SLJIT_ASSERT(args_size != prev_args_size);
1082 		} else {
1083 			if (word_arg_count >= 4) {
1084 				offset -= SSIZE_OF(sw);
1085 				EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
1086 			}
1087 
1088 			if (args_size != prev_args_size)
1089 				EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), stack_size - prev_args_size - SSIZE_OF(sw));
1090 		}
1091 
1092 		if (args_size != prev_args_size)
1093 			EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size - args_size - SSIZE_OF(sw), SLJIT_R2, 0);
1094 	} else {
1095 #endif /* SLJIT_X86_32_FASTCALL */
1096 		offset = stack_size - SSIZE_OF(sw);
1097 		*extra_space = args_size;
1098 
1099 		if (word_arg_count >= 4 && prev_args_size == SSIZE_OF(sw)) {
1100 			offset -= SSIZE_OF(sw);
1101 			inst = emit_x86_instruction(compiler, 1, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), offset);
1102 			FAIL_IF(!inst);
1103 			*inst = XCHG_r_rm;
1104 
1105 			SLJIT_ASSERT(prev_args_size > 0);
1106 		} else {
1107 			if (word_arg_count >= 4) {
1108 				offset -= SSIZE_OF(sw);
1109 				EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
1110 			}
1111 
1112 			if (prev_args_size > 0)
1113 				EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), stack_size - prev_args_size - SSIZE_OF(sw));
1114 		}
1115 
1116 		/* Copy return address. */
1117 		if (prev_args_size > 0)
1118 			EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), stack_size - SSIZE_OF(sw), SLJIT_R2, 0);
1119 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1120 	}
1121 #endif /* SLJIT_X86_32_FASTCALL */
1122 
1123 	while (types != 0) {
1124 		switch (types & SLJIT_ARG_MASK) {
1125 		case SLJIT_ARG_TYPE_F64:
1126 			offset -= SSIZE_OF(f64);
1127 			FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));
1128 			float_arg_count--;
1129 			break;
1130 		case SLJIT_ARG_TYPE_F32:
1131 			offset -= SSIZE_OF(f32);
1132 			FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));
1133 			float_arg_count--;
1134 			break;
1135 		default:
1136 			switch (word_arg_count) {
1137 			case 1:
1138 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1139 				if (fast_call) {
1140 					EMIT_MOV(compiler, SLJIT_R2, 0, r2_offset != 0 ? SLJIT_MEM1(SLJIT_SP) : SLJIT_R0, 0);
1141 					break;
1142 				}
1143 #endif
1144 				offset -= SSIZE_OF(sw);
1145 				if (r2_offset != 0) {
1146 					EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), 0);
1147 					EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
1148 				} else
1149 					EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R0, 0);
1150 				break;
1151 			case 2:
1152 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1153 				if (fast_call)
1154 					break;
1155 #endif
1156 				offset -= SSIZE_OF(sw);
1157 				EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R1, 0);
1158 				break;
1159 			case 3:
1160 				offset -= SSIZE_OF(sw);
1161 				EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), r2_offset);
1162 				EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
1163 				break;
1164 			}
1165 			word_arg_count--;
1166 			break;
1167 		}
1168 		types >>= SLJIT_ARG_SHIFT;
1169 	}
1170 
1171 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1172 	/* Skip return address. */
1173 	if (fast_call)
1174 		offset -= SSIZE_OF(sw);
1175 #endif
1176 
1177 	SLJIT_ASSERT(offset >= 0);
1178 
1179 	if (offset == 0)
1180 		return SLJIT_SUCCESS;
1181 
1182 	BINARY_IMM32(ADD, offset, SLJIT_SP, 0);
1183 	return SLJIT_SUCCESS;
1184 }
1185 
emit_tail_call_end(struct sljit_compiler * compiler,sljit_s32 extra_space)1186 static sljit_s32 emit_tail_call_end(struct sljit_compiler *compiler, sljit_s32 extra_space)
1187 {
1188 	/* Called when stack consumption cannot be reduced to 0. */
1189 	sljit_u8 *inst;
1190 
1191 	BINARY_IMM32(ADD, extra_space, SLJIT_SP, 0);
1192 
1193 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1194 	FAIL_IF(!inst);
1195 	INC_SIZE(1);
1196 	RET();
1197 
1198 	return SLJIT_SUCCESS;
1199 }
1200 
sljit_emit_call(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types)1201 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
1202 	sljit_s32 arg_types)
1203 {
1204 	struct sljit_jump *jump;
1205 	sljit_sw stack_size = 0;
1206 	sljit_s32 word_arg_count;
1207 
1208 	CHECK_ERROR_PTR();
1209 	CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
1210 
1211 	if (type & SLJIT_CALL_RETURN) {
1212 		stack_size = type;
1213 		PTR_FAIL_IF(tail_call_with_args(compiler, &stack_size, arg_types, SLJIT_IMM, 0));
1214 
1215 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1216 			|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1217 		compiler->skip_checks = 1;
1218 #endif
1219 
1220 		if (stack_size == 0) {
1221 			type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
1222 			return sljit_emit_jump(compiler, type);
1223 		}
1224 
1225 		jump = sljit_emit_jump(compiler, type);
1226 		PTR_FAIL_IF(jump == NULL);
1227 
1228 		PTR_FAIL_IF(emit_tail_call_end(compiler, stack_size));
1229 		return jump;
1230 	}
1231 
1232 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1233 	if ((type & 0xff) == SLJIT_CALL) {
1234 		stack_size = c_fast_call_get_stack_size(arg_types, &word_arg_count);
1235 		PTR_FAIL_IF(c_fast_call_with_args(compiler, arg_types, stack_size, word_arg_count, 0));
1236 
1237 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1238 		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1239 		compiler->skip_checks = 1;
1240 #endif
1241 
1242 		jump = sljit_emit_jump(compiler, type);
1243 		PTR_FAIL_IF(jump == NULL);
1244 
1245 		PTR_FAIL_IF(post_call_with_args(compiler, arg_types, 0));
1246 		return jump;
1247 	}
1248 #endif
1249 
1250 	stack_size = cdecl_call_get_stack_size(compiler, arg_types, &word_arg_count);
1251 	PTR_FAIL_IF(cdecl_call_with_args(compiler, arg_types, stack_size, word_arg_count));
1252 
1253 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1254 		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1255 	compiler->skip_checks = 1;
1256 #endif
1257 
1258 	jump = sljit_emit_jump(compiler, type);
1259 	PTR_FAIL_IF(jump == NULL);
1260 
1261 	PTR_FAIL_IF(post_call_with_args(compiler, arg_types, stack_size));
1262 	return jump;
1263 }
1264 
sljit_emit_icall(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)1265 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
1266 	sljit_s32 arg_types,
1267 	sljit_s32 src, sljit_sw srcw)
1268 {
1269 	sljit_sw stack_size = 0;
1270 	sljit_s32 word_arg_count;
1271 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1272 	sljit_s32 swap_args;
1273 #endif
1274 
1275 	CHECK_ERROR();
1276 	CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
1277 
1278 	if (type & SLJIT_CALL_RETURN) {
1279 		stack_size = type;
1280 		FAIL_IF(tail_call_with_args(compiler, &stack_size, arg_types, src, srcw));
1281 
1282 		if (!(src & SLJIT_IMM)) {
1283 			src = SLJIT_R0;
1284 			srcw = 0;
1285 		}
1286 
1287 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1288 			|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1289 		compiler->skip_checks = 1;
1290 #endif
1291 
1292 		if (stack_size == 0)
1293 			return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1294 
1295 		FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
1296 		return emit_tail_call_end(compiler, stack_size);
1297 	}
1298 
1299 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
1300 	SLJIT_ASSERT(reg_map[SLJIT_R0] == 0 && reg_map[SLJIT_R2] == 1 && SLJIT_R0 == 1 && SLJIT_R2 == 3);
1301 
1302 	if ((type & 0xff) == SLJIT_CALL) {
1303 		stack_size = c_fast_call_get_stack_size(arg_types, &word_arg_count);
1304 		swap_args = 0;
1305 
1306 		if (word_arg_count > 0) {
1307 			if ((src & REG_MASK) == SLJIT_R2 || OFFS_REG(src) == SLJIT_R2) {
1308 				swap_args = 1;
1309 				if (((src & REG_MASK) | 0x2) == SLJIT_R2)
1310 					src ^= 0x2;
1311 				if ((OFFS_REG(src) | 0x2) == SLJIT_R2)
1312 					src ^= TO_OFFS_REG(0x2);
1313 			}
1314 		}
1315 
1316 		FAIL_IF(c_fast_call_with_args(compiler, arg_types, stack_size, word_arg_count, swap_args));
1317 
1318 		compiler->scratches_offset += stack_size;
1319 		compiler->locals_offset += stack_size;
1320 
1321 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1322 		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1323 		compiler->skip_checks = 1;
1324 #endif
1325 		FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
1326 
1327 		compiler->scratches_offset -= stack_size;
1328 		compiler->locals_offset -= stack_size;
1329 
1330 		return post_call_with_args(compiler, arg_types, 0);
1331 	}
1332 #endif
1333 
1334 	stack_size = cdecl_call_get_stack_size(compiler, arg_types, &word_arg_count);
1335 	FAIL_IF(cdecl_call_with_args(compiler, arg_types, stack_size, word_arg_count));
1336 
1337 	compiler->scratches_offset += stack_size;
1338 	compiler->locals_offset += stack_size;
1339 
1340 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
1341 		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
1342 	compiler->skip_checks = 1;
1343 #endif
1344 	FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
1345 
1346 	compiler->scratches_offset -= stack_size;
1347 	compiler->locals_offset -= stack_size;
1348 
1349 	return post_call_with_args(compiler, arg_types, stack_size);
1350 }
1351 
sljit_emit_fast_enter(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)1352 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
1353 {
1354 	sljit_u8 *inst;
1355 
1356 	CHECK_ERROR();
1357 	CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
1358 	ADJUST_LOCAL_OFFSET(dst, dstw);
1359 
1360 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
1361 
1362 	if (FAST_IS_REG(dst)) {
1363 		/* Unused dest is possible here. */
1364 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1365 		FAIL_IF(!inst);
1366 
1367 		INC_SIZE(1);
1368 		POP_REG(reg_map[dst]);
1369 		return SLJIT_SUCCESS;
1370 	}
1371 
1372 	/* Memory. */
1373 	inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1374 	FAIL_IF(!inst);
1375 	*inst++ = POP_rm;
1376 	return SLJIT_SUCCESS;
1377 }
1378 
emit_fast_return(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)1379 static sljit_s32 emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
1380 {
1381 	sljit_u8 *inst;
1382 
1383 	CHECK_EXTRA_REGS(src, srcw, (void)0);
1384 
1385 	if (FAST_IS_REG(src)) {
1386 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1);
1387 		FAIL_IF(!inst);
1388 
1389 		INC_SIZE(1 + 1);
1390 		PUSH_REG(reg_map[src]);
1391 	}
1392 	else {
1393 		inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
1394 		FAIL_IF(!inst);
1395 		*inst++ = GROUP_FF;
1396 		*inst |= PUSH_rm;
1397 
1398 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1399 		FAIL_IF(!inst);
1400 		INC_SIZE(1);
1401 	}
1402 
1403 	RET();
1404 	return SLJIT_SUCCESS;
1405 }
1406 
skip_frames_before_return(struct sljit_compiler * compiler)1407 static sljit_s32 skip_frames_before_return(struct sljit_compiler *compiler)
1408 {
1409 	sljit_sw size;
1410 
1411 	/* Don't adjust shadow stack if it isn't enabled.  */
1412 	if (!cpu_has_shadow_stack())
1413 		return SLJIT_SUCCESS;
1414 
1415 	SLJIT_ASSERT(compiler->args_size >= 0);
1416 	SLJIT_ASSERT(compiler->local_size > 0);
1417 
1418 	size = compiler->local_size;
1419 	size += (1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0)
1420 		+ (compiler->saveds <= 3 ? compiler->saveds : 3)) * SSIZE_OF(sw);
1421 
1422 	return adjust_shadow_stack(compiler, SLJIT_MEM1(SLJIT_SP), size);
1423 }
1424