1 /*
2  *    Stack-less Just-In-Time compiler
3  *
4  *    Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without modification, are
7  * permitted provided that the following conditions are met:
8  *
9  *   1. Redistributions of source code must retain the above copyright notice, this list of
10  *      conditions and the following disclaimer.
11  *
12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13  *      of conditions and the following disclaimer in the documentation and/or other materials
14  *      provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 /* x86 32-bit arch dependent functions. */
28 
29 /* --------------------------------------------------------------------- */
30 /*  Operators                                                            */
31 /* --------------------------------------------------------------------- */
32 
emit_do_imm(struct sljit_compiler * compiler,sljit_u8 opcode,sljit_sw imm)33 static sljit_s32 emit_do_imm(struct sljit_compiler *compiler, sljit_u8 opcode, sljit_sw imm)
34 {
35 	sljit_u8 *inst;
36 
37 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + sizeof(sljit_sw));
38 	FAIL_IF(!inst);
39 	INC_SIZE(1 + sizeof(sljit_sw));
40 	*inst++ = opcode;
41 	sljit_unaligned_store_sw(inst, imm);
42 	return SLJIT_SUCCESS;
43 }
44 
45 /* Size contains the flags as well. */
emit_x86_instruction(struct sljit_compiler * compiler,sljit_uw size,sljit_s32 a,sljit_sw imma,sljit_s32 b,sljit_sw immb)46 static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw size,
47 	/* The register or immediate operand. */
48 	sljit_s32 a, sljit_sw imma,
49 	/* The general operand (not immediate). */
50 	sljit_s32 b, sljit_sw immb)
51 {
52 	sljit_u8 *inst;
53 	sljit_u8 *buf_ptr;
54 	sljit_u8 reg_map_b;
55 	sljit_uw flags = size;
56 	sljit_uw inst_size;
57 
58 	/* Both cannot be switched on. */
59 	SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
60 	/* Size flags not allowed for typed instructions. */
61 	SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
62 	/* Both size flags cannot be switched on. */
63 	SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
64 	/* SSE2 and immediate is not possible. */
65 	SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
66 	SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
67 		&& (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
68 		&& (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
69 
70 	size &= 0xf;
71 	inst_size = size;
72 
73 	if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
74 		inst_size++;
75 	if (flags & EX86_PREF_66)
76 		inst_size++;
77 
78 	/* Calculate size of b. */
79 	inst_size += 1; /* mod r/m byte. */
80 	if (b & SLJIT_MEM) {
81 		if (!(b & REG_MASK))
82 			inst_size += sizeof(sljit_sw);
83 		else {
84 			if (immb != 0 && !(b & OFFS_REG_MASK)) {
85 				/* Immediate operand. */
86 				if (immb <= 127 && immb >= -128)
87 					inst_size += sizeof(sljit_s8);
88 				else
89 					inst_size += sizeof(sljit_sw);
90 			}
91 			else if (reg_map[b & REG_MASK] == 5) {
92 				/* Swap registers if possible. */
93 				if ((b & OFFS_REG_MASK) && (immb & 0x3) == 0 && reg_map[OFFS_REG(b)] != 5)
94 					b = SLJIT_MEM | OFFS_REG(b) | TO_OFFS_REG(b & REG_MASK);
95 				else
96 					inst_size += sizeof(sljit_s8);
97 			}
98 
99 			if (reg_map[b & REG_MASK] == 4 && !(b & OFFS_REG_MASK))
100 				b |= TO_OFFS_REG(SLJIT_SP);
101 
102 			if (b & OFFS_REG_MASK)
103 				inst_size += 1; /* SIB byte. */
104 		}
105 	}
106 
107 	/* Calculate size of a. */
108 	if (a & SLJIT_IMM) {
109 		if (flags & EX86_BIN_INS) {
110 			if (imma <= 127 && imma >= -128) {
111 				inst_size += 1;
112 				flags |= EX86_BYTE_ARG;
113 			} else
114 				inst_size += 4;
115 		}
116 		else if (flags & EX86_SHIFT_INS) {
117 			SLJIT_ASSERT(imma <= 0x1f);
118 			if (imma != 1) {
119 				inst_size++;
120 				flags |= EX86_BYTE_ARG;
121 			}
122 		} else if (flags & EX86_BYTE_ARG)
123 			inst_size++;
124 		else if (flags & EX86_HALF_ARG)
125 			inst_size += sizeof(short);
126 		else
127 			inst_size += sizeof(sljit_sw);
128 	}
129 	else
130 		SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
131 
132 	inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size);
133 	PTR_FAIL_IF(!inst);
134 
135 	/* Encoding the byte. */
136 	INC_SIZE(inst_size);
137 	if (flags & EX86_PREF_F2)
138 		*inst++ = 0xf2;
139 	if (flags & EX86_PREF_F3)
140 		*inst++ = 0xf3;
141 	if (flags & EX86_PREF_66)
142 		*inst++ = 0x66;
143 
144 	buf_ptr = inst + size;
145 
146 	/* Encode mod/rm byte. */
147 	if (!(flags & EX86_SHIFT_INS)) {
148 		if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
149 			*inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
150 
151 		if (a & SLJIT_IMM)
152 			*buf_ptr = 0;
153 		else if (!(flags & EX86_SSE2_OP1))
154 			*buf_ptr = U8(reg_map[a] << 3);
155 		else
156 			*buf_ptr = U8(a << 3);
157 	}
158 	else {
159 		if (a & SLJIT_IMM) {
160 			if (imma == 1)
161 				*inst = GROUP_SHIFT_1;
162 			else
163 				*inst = GROUP_SHIFT_N;
164 		} else
165 			*inst = GROUP_SHIFT_CL;
166 		*buf_ptr = 0;
167 	}
168 
169 	if (!(b & SLJIT_MEM)) {
170 		*buf_ptr = U8(*buf_ptr | MOD_REG | (!(flags & EX86_SSE2_OP2) ? reg_map[b] : b));
171 		buf_ptr++;
172 	} else if (b & REG_MASK) {
173 		reg_map_b = reg_map[b & REG_MASK];
174 
175 		if (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) {
176 			if (immb != 0 || reg_map_b == 5) {
177 				if (immb <= 127 && immb >= -128)
178 					*buf_ptr |= 0x40;
179 				else
180 					*buf_ptr |= 0x80;
181 			}
182 
183 			if (!(b & OFFS_REG_MASK))
184 				*buf_ptr++ |= reg_map_b;
185 			else {
186 				*buf_ptr++ |= 0x04;
187 				*buf_ptr++ = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3));
188 			}
189 
190 			if (immb != 0 || reg_map_b == 5) {
191 				if (immb <= 127 && immb >= -128)
192 					*buf_ptr++ = U8(immb); /* 8 bit displacement. */
193 				else {
194 					sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */
195 					buf_ptr += sizeof(sljit_sw);
196 				}
197 			}
198 		}
199 		else {
200 			if (reg_map_b == 5)
201 				*buf_ptr |= 0x40;
202 
203 			*buf_ptr++ |= 0x04;
204 			*buf_ptr++ = U8(reg_map_b | (reg_map[OFFS_REG(b)] << 3) | (immb << 6));
205 
206 			if (reg_map_b == 5)
207 				*buf_ptr++ = 0;
208 		}
209 	}
210 	else {
211 		*buf_ptr++ |= 0x05;
212 		sljit_unaligned_store_sw(buf_ptr, immb); /* 32 bit displacement. */
213 		buf_ptr += sizeof(sljit_sw);
214 	}
215 
216 	if (a & SLJIT_IMM) {
217 		if (flags & EX86_BYTE_ARG)
218 			*buf_ptr = U8(imma);
219 		else if (flags & EX86_HALF_ARG)
220 			sljit_unaligned_store_s16(buf_ptr, (sljit_s16)imma);
221 		else if (!(flags & EX86_SHIFT_INS))
222 			sljit_unaligned_store_sw(buf_ptr, imma);
223 	}
224 
225 	return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
226 }
227 
228 /* --------------------------------------------------------------------- */
229 /*  Enter / return                                                       */
230 /* --------------------------------------------------------------------- */
231 
generate_far_jump_code(struct sljit_jump * jump,sljit_u8 * code_ptr,sljit_sw executable_offset)232 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset)
233 {
234 	sljit_uw type = jump->flags >> TYPE_SHIFT;
235 
236 	if (type == SLJIT_JUMP) {
237 		*code_ptr++ = JMP_i32;
238 		jump->addr++;
239 	}
240 	else if (type >= SLJIT_FAST_CALL) {
241 		*code_ptr++ = CALL_i32;
242 		jump->addr++;
243 	}
244 	else {
245 		*code_ptr++ = GROUP_0F;
246 		*code_ptr++ = get_jump_code(type);
247 		jump->addr += 2;
248 	}
249 
250 	if (jump->flags & JUMP_LABEL)
251 		jump->flags |= PATCH_MW;
252 	else
253 		sljit_unaligned_store_sw(code_ptr, (sljit_sw)(jump->u.target - (jump->addr + 4) - (sljit_uw)executable_offset));
254 	code_ptr += 4;
255 
256 	return code_ptr;
257 }
258 
259 #define ENTER_TMP_TO_R4		0x00001
260 #define ENTER_TMP_TO_S		0x00002
261 
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)262 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
263 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
264 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
265 {
266 	sljit_s32 word_arg_count, saved_arg_count, float_arg_count;
267 	sljit_s32 size, args_size, types, status;
268 	sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(options);
269 	sljit_u8 *inst;
270 #ifdef _WIN32
271 	sljit_s32 r2_offset = -1;
272 #endif
273 
274 	CHECK_ERROR();
275 	CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
276 	set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
277 
278 	/* Emit ENDBR32 at function entry if needed.  */
279 	FAIL_IF(emit_endbranch(compiler));
280 
281 	SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start);
282 
283 	arg_types >>= SLJIT_ARG_SHIFT;
284 	word_arg_count = 0;
285 	status = 0;
286 
287 	if (options & SLJIT_ENTER_REG_ARG) {
288 		args_size = 3 * SSIZE_OF(sw);
289 
290 		while (arg_types) {
291 			if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
292 				word_arg_count++;
293 				if (word_arg_count >= 4)
294 					status |= ENTER_TMP_TO_R4;
295 			}
296 
297 			arg_types >>= SLJIT_ARG_SHIFT;
298 		}
299 
300 		compiler->args_size = 0;
301 	} else {
302 		types = arg_types;
303 		saved_arg_count = 0;
304 		float_arg_count = 0;
305 		args_size = SSIZE_OF(sw);
306 		while (types) {
307 			switch (types & SLJIT_ARG_MASK) {
308 			case SLJIT_ARG_TYPE_F64:
309 				float_arg_count++;
310 				FAIL_IF(emit_sse2_load(compiler, 0, float_arg_count, SLJIT_MEM1(SLJIT_SP), args_size));
311 				args_size += SSIZE_OF(f64);
312 				break;
313 			case SLJIT_ARG_TYPE_F32:
314 				float_arg_count++;
315 				FAIL_IF(emit_sse2_load(compiler, 1, float_arg_count, SLJIT_MEM1(SLJIT_SP), args_size));
316 				args_size += SSIZE_OF(f32);
317 				break;
318 			default:
319 				word_arg_count++;
320 
321 				if (!(types & SLJIT_ARG_TYPE_SCRATCH_REG))
322 					saved_arg_count++;
323 
324 				if (word_arg_count == 4) {
325 					if (types & SLJIT_ARG_TYPE_SCRATCH_REG) {
326 						status |= ENTER_TMP_TO_R4;
327 						arg_types &= ~(SLJIT_ARG_FULL_MASK << 3 * SLJIT_ARG_SHIFT);
328 					} else if (saved_arg_count == 4) {
329 						status |= ENTER_TMP_TO_S;
330 						arg_types &= ~(SLJIT_ARG_FULL_MASK << 3 * SLJIT_ARG_SHIFT);
331 					}
332 				}
333 
334 				args_size += SSIZE_OF(sw);
335 				break;
336 			}
337 			types >>= SLJIT_ARG_SHIFT;
338 		}
339 
340 		args_size -= SSIZE_OF(sw);
341 		compiler->args_size = args_size;
342 	}
343 
344 	size = (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3) - kept_saveds_count;
345 	if (!(options & SLJIT_ENTER_REG_ARG))
346 		size++;
347 
348 	if (size != 0) {
349 		inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(size + 1));
350 		FAIL_IF(!inst);
351 
352 		INC_SIZE((sljit_uw)size);
353 
354 		if (!(options & SLJIT_ENTER_REG_ARG))
355 			PUSH_REG(reg_map[TMP_REG1]);
356 
357 		if ((saveds > 2 && kept_saveds_count <= 2) || scratches > 9)
358 			PUSH_REG(reg_map[SLJIT_S2]);
359 		if ((saveds > 1 && kept_saveds_count <= 1) || scratches > 10)
360 			PUSH_REG(reg_map[SLJIT_S1]);
361 		if ((saveds > 0 && kept_saveds_count == 0) || scratches > 11)
362 			PUSH_REG(reg_map[SLJIT_S0]);
363 
364 		size *= SSIZE_OF(sw);
365 	}
366 
367 	if (status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S))
368 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), args_size + size);
369 
370 	size += SSIZE_OF(sw);
371 
372 	local_size = ((SLJIT_LOCALS_OFFSET_BASE + local_size + size + 0xf) & ~0xf) - size;
373 	compiler->local_size = local_size;
374 
375 	word_arg_count = 0;
376 	saved_arg_count = 0;
377 	args_size = size;
378 	while (arg_types) {
379 		switch (arg_types & SLJIT_ARG_MASK) {
380 		case SLJIT_ARG_TYPE_F64:
381 			args_size += SSIZE_OF(f64);
382 			break;
383 		case SLJIT_ARG_TYPE_F32:
384 			args_size += SSIZE_OF(f32);
385 			break;
386 		default:
387 			word_arg_count++;
388 			SLJIT_ASSERT(word_arg_count <= 3 || (word_arg_count == 4 && !(status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S))));
389 
390 			if (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) {
391 #ifdef _WIN32
392 				if (word_arg_count == 3 && local_size > 4 * 4096)
393 					r2_offset = local_size + args_size;
394 				else
395 #endif
396 					EMIT_MOV(compiler, word_arg_count, 0, SLJIT_MEM1(SLJIT_SP), args_size);
397 
398 			} else {
399 				EMIT_MOV(compiler, SLJIT_S0 - saved_arg_count, 0, SLJIT_MEM1(SLJIT_SP), args_size);
400 				saved_arg_count++;
401 			}
402 
403 			args_size += SSIZE_OF(sw);
404 			break;
405 		}
406 		arg_types >>= SLJIT_ARG_SHIFT;
407 	}
408 
409 	SLJIT_ASSERT(SLJIT_LOCALS_OFFSET > 0);
410 
411 #ifdef _WIN32
412 	SLJIT_ASSERT(r2_offset == -1 || local_size > 4 * 4096);
413 
414 	if (local_size > 4096) {
415 		if (local_size <= 4 * 4096) {
416 			BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096);
417 
418 			if (local_size > 2 * 4096)
419 				BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2);
420 			if (local_size > 3 * 4096)
421 				BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);
422 		}
423 		else {
424 			if (options & SLJIT_ENTER_REG_ARG) {
425 				SLJIT_ASSERT(r2_offset == -1);
426 
427 				inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(1 + 1));
428 				FAIL_IF(!inst);
429 				INC_SIZE(1);
430 				PUSH_REG(reg_map[SLJIT_R2]);
431 
432 				local_size -= SSIZE_OF(sw);
433 				r2_offset = local_size;
434 			}
435 
436 			EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_IMM, local_size >> 12);
437 
438 			BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -4096);
439 			BINARY_IMM32(SUB, 4096, SLJIT_SP, 0);
440 
441 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
442 			FAIL_IF(!inst);
443 
444 			INC_SIZE(2);
445 			inst[0] = LOOP_i8;
446 			inst[1] = (sljit_u8)-16;
447 			local_size &= 0xfff;
448 		}
449 	}
450 
451 	if (local_size > 0) {
452 		BINARY_IMM32(OR, 0, SLJIT_MEM1(SLJIT_SP), -local_size);
453 		BINARY_IMM32(SUB, local_size, SLJIT_SP, 0);
454 	}
455 
456 	if (r2_offset != -1)
457 		EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), r2_offset);
458 
459 #else /* !_WIN32 */
460 
461 	SLJIT_ASSERT(local_size > 0);
462 
463 	BINARY_IMM32(SUB, local_size, SLJIT_SP, 0);
464 
465 #endif /* _WIN32 */
466 
467 	size = SLJIT_LOCALS_OFFSET_BASE - SSIZE_OF(sw);
468 	kept_saveds_count = SLJIT_R3 - kept_saveds_count;
469 
470 	while (saved_arg_count > 3) {
471 		EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), size, kept_saveds_count, 0);
472 		kept_saveds_count++;
473 		size -= SSIZE_OF(sw);
474 		saved_arg_count--;
475 	}
476 
477 	if (status & (ENTER_TMP_TO_R4 | ENTER_TMP_TO_S)) {
478 		if (status & ENTER_TMP_TO_R4)
479 			size = 2 * SSIZE_OF(sw);
480 
481 		EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), size, TMP_REG1, 0);
482 	}
483 
484 	return SLJIT_SUCCESS;
485 }
486 
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)487 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
488 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
489 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
490 {
491 	sljit_s32 args_size;
492 
493 	CHECK_ERROR();
494 	CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
495 	set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
496 
497 	arg_types >>= SLJIT_ARG_SHIFT;
498 	args_size = 0;
499 
500 	if (!(options & SLJIT_ENTER_REG_ARG)) {
501 		while (arg_types) {
502 			switch (arg_types & SLJIT_ARG_MASK) {
503 			case SLJIT_ARG_TYPE_F64:
504 				args_size += SSIZE_OF(f64);
505 				break;
506 			case SLJIT_ARG_TYPE_F32:
507 				args_size += SSIZE_OF(f32);
508 				break;
509 			default:
510 				args_size += SSIZE_OF(sw);
511 				break;
512 			}
513 			arg_types >>= SLJIT_ARG_SHIFT;
514 		}
515 	}
516 
517 	compiler->args_size = args_size;
518 
519 	/* [esp+0] for saving temporaries and for function calls. */
520 
521 	saveds = (1 + (scratches > 9 ? (scratches - 9) : 0) + (saveds <= 3 ? saveds : 3) - SLJIT_KEPT_SAVEDS_COUNT(options)) * SSIZE_OF(sw);
522 
523 	/* Saving ebp. */
524 	if (!(options & SLJIT_ENTER_REG_ARG))
525 		saveds += SSIZE_OF(sw);
526 
527 	compiler->local_size = ((SLJIT_LOCALS_OFFSET_BASE + local_size + saveds + 0xf) & ~0xf) - saveds;
528 	return SLJIT_SUCCESS;
529 }
530 
emit_stack_frame_release(struct sljit_compiler * compiler,sljit_s32 is_return_to)531 static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to)
532 {
533 	sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
534 	sljit_s32 local_size, saveds;
535 	sljit_uw size;
536 	sljit_u8 *inst;
537 
538 	size = (sljit_uw)((compiler->scratches > 9 ? (compiler->scratches - 9) : 0) +
539 		(compiler->saveds <= 3 ? compiler->saveds : 3) - kept_saveds_count);
540 
541 	local_size = compiler->local_size;
542 
543 	if (!(compiler->options & SLJIT_ENTER_REG_ARG))
544 		size++;
545 	else if (is_return_to && size == 0) {
546 		local_size += SSIZE_OF(sw);
547 		is_return_to = 0;
548 	}
549 
550 	if (local_size > 0)
551 		BINARY_IMM32(ADD, local_size, SLJIT_SP, 0);
552 
553 	if (size == 0)
554 		return SLJIT_SUCCESS;
555 
556 	inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
557 	FAIL_IF(!inst);
558 
559 	INC_SIZE(size);
560 
561 	saveds = compiler->saveds;
562 
563 	if ((saveds > 0 && kept_saveds_count == 0) || compiler->scratches > 11)
564 		POP_REG(reg_map[SLJIT_S0]);
565 	if ((saveds > 1 && kept_saveds_count <= 1) || compiler->scratches > 10)
566 		POP_REG(reg_map[SLJIT_S1]);
567 	if ((saveds > 2 && kept_saveds_count <= 2) || compiler->scratches > 9)
568 		POP_REG(reg_map[SLJIT_S2]);
569 
570 	if (!(compiler->options & SLJIT_ENTER_REG_ARG))
571 		POP_REG(reg_map[TMP_REG1]);
572 
573 	if (is_return_to)
574 		BINARY_IMM32(ADD, sizeof(sljit_sw), SLJIT_SP, 0);
575 
576 	return SLJIT_SUCCESS;
577 }
578 
sljit_emit_return_void(struct sljit_compiler * compiler)579 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
580 {
581 	sljit_u8 *inst;
582 
583 	CHECK_ERROR();
584 	CHECK(check_sljit_emit_return_void(compiler));
585 
586 	SLJIT_ASSERT(compiler->args_size >= 0);
587 	SLJIT_ASSERT(compiler->local_size > 0);
588 
589 	FAIL_IF(emit_stack_frame_release(compiler, 0));
590 
591 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
592 	FAIL_IF(!inst);
593 	INC_SIZE(1);
594 	RET();
595 	return SLJIT_SUCCESS;
596 }
597 
sljit_emit_return_to(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)598 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
599 	sljit_s32 src, sljit_sw srcw)
600 {
601 	sljit_s32 src_r;
602 
603 	CHECK_ERROR();
604 	CHECK(check_sljit_emit_return_to(compiler, src, srcw));
605 
606 	if ((src & SLJIT_MEM) || (src > SLJIT_R2 && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options)))) {
607 		ADJUST_LOCAL_OFFSET(src, srcw);
608 		CHECK_EXTRA_REGS(src, srcw, (void)0);
609 
610 		src_r = (compiler->options & SLJIT_ENTER_REG_ARG) ? TMP_REG1 : SLJIT_R1;
611 
612 		EMIT_MOV(compiler, src_r, 0, src, srcw);
613 		src = src_r;
614 		srcw = 0;
615 	}
616 
617 	FAIL_IF(emit_stack_frame_release(compiler, 1));
618 
619 	SLJIT_SKIP_CHECKS(compiler);
620 	return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
621 }
622 
623 /* --------------------------------------------------------------------- */
624 /*  Call / return instructions                                           */
625 /* --------------------------------------------------------------------- */
626 
call_get_stack_size(sljit_s32 arg_types,sljit_s32 * word_arg_count_ptr)627 static sljit_s32 call_get_stack_size(sljit_s32 arg_types, sljit_s32 *word_arg_count_ptr)
628 {
629 	sljit_sw stack_size = 0;
630 	sljit_s32 word_arg_count = 0;
631 
632 	arg_types >>= SLJIT_ARG_SHIFT;
633 
634 	while (arg_types) {
635 		switch (arg_types & SLJIT_ARG_MASK) {
636 		case SLJIT_ARG_TYPE_F64:
637 			stack_size += SSIZE_OF(f64);
638 			break;
639 		case SLJIT_ARG_TYPE_F32:
640 			stack_size += SSIZE_OF(f32);
641 			break;
642 		default:
643 			word_arg_count++;
644 			stack_size += SSIZE_OF(sw);
645 			break;
646 		}
647 
648 		arg_types >>= SLJIT_ARG_SHIFT;
649 	}
650 
651 	if (word_arg_count_ptr)
652 		*word_arg_count_ptr = word_arg_count;
653 
654 	if (stack_size <= 4 * SSIZE_OF(sw))
655 		return 0;
656 
657 	return ((stack_size - (4 * SSIZE_OF(sw)) + 0xf) & ~0xf);
658 }
659 
call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types,sljit_sw stack_size,sljit_s32 word_arg_count,sljit_s32 keep_tmp1)660 static sljit_s32 call_with_args(struct sljit_compiler *compiler,
661 	sljit_s32 arg_types, sljit_sw stack_size, sljit_s32 word_arg_count, sljit_s32 keep_tmp1)
662 {
663 	sljit_s32 float_arg_count = 0, arg4_reg = 0, arg_offset;
664 	sljit_u8 *inst;
665 
666 	if (word_arg_count >= 4) {
667 		arg4_reg = SLJIT_R0;
668 
669 		if (!keep_tmp1) {
670 			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), 2 * SSIZE_OF(sw));
671 			arg4_reg = TMP_REG1;
672 		}
673 	}
674 
675 	if (stack_size > 0)
676 		BINARY_IMM32(SUB, stack_size, SLJIT_SP, 0);
677 
678 	arg_offset = 0;
679 	word_arg_count = 0;
680 	arg_types >>= SLJIT_ARG_SHIFT;
681 
682 	while (arg_types) {
683 		switch (arg_types & SLJIT_ARG_MASK) {
684 		case SLJIT_ARG_TYPE_F64:
685 			float_arg_count++;
686 			FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), arg_offset, float_arg_count));
687 			arg_offset += SSIZE_OF(f64);
688 			break;
689 		case SLJIT_ARG_TYPE_F32:
690 			float_arg_count++;
691 			FAIL_IF(emit_sse2_store(compiler, 1, SLJIT_MEM1(SLJIT_SP), arg_offset, float_arg_count));
692 			arg_offset += SSIZE_OF(f32);
693 			break;
694 		default:
695 			word_arg_count++;
696 			EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), arg_offset, (word_arg_count >= 4) ? arg4_reg : word_arg_count, 0);
697 
698 			if (word_arg_count == 1 && arg4_reg == SLJIT_R0)
699 				EMIT_MOV(compiler, SLJIT_R0, 0, SLJIT_MEM1(SLJIT_SP), 2 * SSIZE_OF(sw) + stack_size);
700 
701 			arg_offset += SSIZE_OF(sw);
702 			break;
703 		}
704 
705 		arg_types >>= SLJIT_ARG_SHIFT;
706 	}
707 
708 	return SLJIT_SUCCESS;
709 }
710 
post_call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types,sljit_s32 stack_size)711 static sljit_s32 post_call_with_args(struct sljit_compiler *compiler,
712 	sljit_s32 arg_types, sljit_s32 stack_size)
713 {
714 	sljit_u8 *inst;
715 	sljit_s32 single;
716 
717 	if (stack_size > 0)
718 		BINARY_IMM32(ADD, stack_size, SLJIT_SP, 0);
719 
720 	if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64)
721 		return SLJIT_SUCCESS;
722 
723 	single = ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32);
724 
725 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
726 	FAIL_IF(!inst);
727 	INC_SIZE(3);
728 	inst[0] = single ? FSTPS : FSTPD;
729 	inst[1] = (0x03 << 3) | 0x04;
730 	inst[2] = (0x04 << 3) | reg_map[SLJIT_SP];
731 
732 	return emit_sse2_load(compiler, single, SLJIT_FR0, SLJIT_MEM1(SLJIT_SP), 0);
733 }
734 
tail_call_with_args(struct sljit_compiler * compiler,sljit_s32 * extra_space,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)735 static sljit_s32 tail_call_with_args(struct sljit_compiler *compiler,
736 	sljit_s32 *extra_space, sljit_s32 arg_types,
737 	sljit_s32 src, sljit_sw srcw)
738 {
739 	sljit_sw args_size, saved_regs_size;
740 	sljit_sw types, word_arg_count, float_arg_count;
741 	sljit_sw stack_size, prev_stack_size, min_size, offset;
742 	sljit_sw word_arg4_offset;
743 	sljit_u8 r2_offset = 0;
744 	sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
745 	sljit_u8* inst;
746 
747 	ADJUST_LOCAL_OFFSET(src, srcw);
748 	CHECK_EXTRA_REGS(src, srcw, (void)0);
749 
750 	saved_regs_size = (1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0)
751 		+ (compiler->saveds <= 3 ? compiler->saveds : 3) - kept_saveds_count) * SSIZE_OF(sw);
752 
753 	word_arg_count = 0;
754 	float_arg_count = 0;
755 	arg_types >>= SLJIT_ARG_SHIFT;
756 	types = 0;
757 	args_size = 0;
758 
759 	while (arg_types != 0) {
760 		types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK);
761 
762 		switch (arg_types & SLJIT_ARG_MASK) {
763 		case SLJIT_ARG_TYPE_F64:
764 			args_size += SSIZE_OF(f64);
765 			float_arg_count++;
766 			break;
767 		case SLJIT_ARG_TYPE_F32:
768 			args_size += SSIZE_OF(f32);
769 			float_arg_count++;
770 			break;
771 		default:
772 			word_arg_count++;
773 			args_size += SSIZE_OF(sw);
774 			break;
775 		}
776 		arg_types >>= SLJIT_ARG_SHIFT;
777 	}
778 
779 	if (args_size <= compiler->args_size) {
780 		*extra_space = 0;
781 		stack_size = args_size + SSIZE_OF(sw) + saved_regs_size;
782 
783 		offset = stack_size + compiler->local_size;
784 
785 		if (!(src & SLJIT_IMM) && src != SLJIT_R0) {
786 			if (word_arg_count >= 1) {
787 				EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R0, 0);
788 				r2_offset = sizeof(sljit_sw);
789 			}
790 			EMIT_MOV(compiler, SLJIT_R0, 0, src, srcw);
791 		}
792 
793 		while (types != 0) {
794 			switch (types & SLJIT_ARG_MASK) {
795 			case SLJIT_ARG_TYPE_F64:
796 				offset -= SSIZE_OF(f64);
797 				FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));
798 				float_arg_count--;
799 				break;
800 			case SLJIT_ARG_TYPE_F32:
801 				offset -= SSIZE_OF(f32);
802 				FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));
803 				float_arg_count--;
804 				break;
805 			default:
806 				switch (word_arg_count) {
807 				case 1:
808 					offset -= SSIZE_OF(sw);
809 					if (r2_offset != 0) {
810 						EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), 0);
811 						EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
812 					} else
813 						EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R0, 0);
814 					break;
815 				case 2:
816 					offset -= SSIZE_OF(sw);
817 					EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R1, 0);
818 					break;
819 				case 3:
820 					offset -= SSIZE_OF(sw);
821 					break;
822 				case 4:
823 					offset -= SSIZE_OF(sw);
824 					EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), 2 * SSIZE_OF(sw));
825 					EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
826 					break;
827 				}
828 				word_arg_count--;
829 				break;
830 			}
831 			types >>= SLJIT_ARG_SHIFT;
832 		}
833 
834 		return emit_stack_frame_release(compiler, 0);
835 	}
836 
837 	stack_size = args_size + SSIZE_OF(sw);
838 
839 	if (word_arg_count >= 1 && !(src & SLJIT_IMM) && src != SLJIT_R0) {
840 		r2_offset = SSIZE_OF(sw);
841 		stack_size += SSIZE_OF(sw);
842 	}
843 
844 	if (word_arg_count >= 3)
845 		stack_size += SSIZE_OF(sw);
846 
847 	prev_stack_size = SSIZE_OF(sw) + saved_regs_size;
848 	min_size = prev_stack_size + compiler->local_size;
849 
850 	word_arg4_offset = 2 * SSIZE_OF(sw);
851 
852 	if (stack_size > min_size) {
853 		BINARY_IMM32(SUB, stack_size - min_size, SLJIT_SP, 0);
854 		if (src == SLJIT_MEM1(SLJIT_SP))
855 			srcw += stack_size - min_size;
856 		word_arg4_offset += stack_size - min_size;
857 	}
858 	else
859 		stack_size = min_size;
860 
861 	if (word_arg_count >= 3) {
862 		EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), r2_offset, SLJIT_R2, 0);
863 
864 		if (word_arg_count >= 4)
865 			EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), word_arg4_offset);
866 	}
867 
868 	if (!(src & SLJIT_IMM) && src != SLJIT_R0) {
869 		if (word_arg_count >= 1) {
870 			SLJIT_ASSERT(r2_offset == sizeof(sljit_sw));
871 			EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R0, 0);
872 		}
873 		EMIT_MOV(compiler, SLJIT_R0, 0, src, srcw);
874 	}
875 
876 	/* Restore saved registers. */
877 	offset = stack_size - 2 * SSIZE_OF(sw);
878 	EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), offset);
879 
880 	if (compiler->saveds > 2 || compiler->scratches > 9) {
881 		offset -= SSIZE_OF(sw);
882 		EMIT_MOV(compiler, SLJIT_S2, 0, SLJIT_MEM1(SLJIT_SP), offset);
883 	}
884 	if ((compiler->saveds > 1 && kept_saveds_count <= 1) || compiler->scratches > 10) {
885 		offset -= SSIZE_OF(sw);
886 		EMIT_MOV(compiler, SLJIT_S1, 0, SLJIT_MEM1(SLJIT_SP), offset);
887 	}
888 	if ((compiler->saveds > 0 && kept_saveds_count == 0) || compiler->scratches > 11) {
889 		offset -= SSIZE_OF(sw);
890 		EMIT_MOV(compiler, SLJIT_S0, 0, SLJIT_MEM1(SLJIT_SP), offset);
891 	}
892 
893 	/* Copy fourth argument and return address. */
894 	offset = stack_size - SSIZE_OF(sw);
895 	*extra_space = args_size;
896 
897 	if (word_arg_count >= 4) {
898 		offset -= SSIZE_OF(sw);
899 		EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
900 	}
901 
902 	while (types != 0) {
903 		switch (types & SLJIT_ARG_MASK) {
904 		case SLJIT_ARG_TYPE_F64:
905 			offset -= SSIZE_OF(f64);
906 			FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));
907 			float_arg_count--;
908 			break;
909 		case SLJIT_ARG_TYPE_F32:
910 			offset -= SSIZE_OF(f32);
911 			FAIL_IF(emit_sse2_store(compiler, 0, SLJIT_MEM1(SLJIT_SP), offset, float_arg_count));
912 			float_arg_count--;
913 			break;
914 		default:
915 			switch (word_arg_count) {
916 			case 1:
917 				offset -= SSIZE_OF(sw);
918 				if (r2_offset != 0) {
919 					EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), 0);
920 					EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
921 				} else
922 					EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R0, 0);
923 				break;
924 			case 2:
925 				offset -= SSIZE_OF(sw);
926 				EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R1, 0);
927 				break;
928 			case 3:
929 				offset -= SSIZE_OF(sw);
930 				EMIT_MOV(compiler, SLJIT_R2, 0, SLJIT_MEM1(SLJIT_SP), r2_offset);
931 				EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, SLJIT_R2, 0);
932 				break;
933 			}
934 			word_arg_count--;
935 			break;
936 		}
937 		types >>= SLJIT_ARG_SHIFT;
938 	}
939 
940 	SLJIT_ASSERT(offset >= 0);
941 
942 	if (offset == 0)
943 		return SLJIT_SUCCESS;
944 
945 	BINARY_IMM32(ADD, offset, SLJIT_SP, 0);
946 	return SLJIT_SUCCESS;
947 }
948 
emit_tail_call_end(struct sljit_compiler * compiler,sljit_s32 extra_space)949 static sljit_s32 emit_tail_call_end(struct sljit_compiler *compiler, sljit_s32 extra_space)
950 {
951 	/* Called when stack consumption cannot be reduced to 0. */
952 	sljit_u8 *inst;
953 
954 	BINARY_IMM32(ADD, extra_space, SLJIT_SP, 0);
955 
956 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
957 	FAIL_IF(!inst);
958 	INC_SIZE(1);
959 	RET();
960 
961 	return SLJIT_SUCCESS;
962 }
963 
tail_call_reg_arg_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types)964 static sljit_s32 tail_call_reg_arg_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)
965 {
966 	sljit_s32 word_arg_count = 0;
967 	sljit_s32 kept_saveds_count, offset;
968 
969 	arg_types >>= SLJIT_ARG_SHIFT;
970 
971 	while (arg_types) {
972 		if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64)
973 			word_arg_count++;
974 
975 		arg_types >>= SLJIT_ARG_SHIFT;
976 	}
977 
978 	if (word_arg_count < 4)
979 		return SLJIT_SUCCESS;
980 
981 	EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), 2 * SSIZE_OF(sw));
982 
983 	kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
984 	offset = compiler->local_size + 3 * SSIZE_OF(sw);
985 
986 	if ((compiler->saveds > 0 && kept_saveds_count == 0) || compiler->scratches > 11)
987 		offset += SSIZE_OF(sw);
988 	if ((compiler->saveds > 1 && kept_saveds_count <= 1) || compiler->scratches > 10)
989 		offset += SSIZE_OF(sw);
990 	if ((compiler->saveds > 2 && kept_saveds_count <= 2) || compiler->scratches > 9)
991 		offset += SSIZE_OF(sw);
992 
993 	return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), offset, TMP_REG1, 0);
994 }
995 
sljit_emit_call(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types)996 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
997 	sljit_s32 arg_types)
998 {
999 	struct sljit_jump *jump;
1000 	sljit_sw stack_size = 0;
1001 	sljit_s32 word_arg_count;
1002 
1003 	CHECK_ERROR_PTR();
1004 	CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
1005 
1006 	if (type & SLJIT_CALL_RETURN) {
1007 		if ((type & 0xff) == SLJIT_CALL_REG_ARG) {
1008 			PTR_FAIL_IF(tail_call_reg_arg_with_args(compiler, arg_types));
1009 			PTR_FAIL_IF(emit_stack_frame_release(compiler, 0));
1010 
1011 			SLJIT_SKIP_CHECKS(compiler);
1012 			return sljit_emit_jump(compiler, SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP));
1013 		}
1014 
1015 		stack_size = type;
1016 		PTR_FAIL_IF(tail_call_with_args(compiler, &stack_size, arg_types, SLJIT_IMM, 0));
1017 
1018 		SLJIT_SKIP_CHECKS(compiler);
1019 
1020 		if (stack_size == 0)
1021 			return sljit_emit_jump(compiler, SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP));
1022 
1023 		jump = sljit_emit_jump(compiler, type);
1024 		PTR_FAIL_IF(jump == NULL);
1025 
1026 		PTR_FAIL_IF(emit_tail_call_end(compiler, stack_size));
1027 		return jump;
1028 	}
1029 
1030 	if ((type & 0xff) == SLJIT_CALL_REG_ARG) {
1031 		SLJIT_SKIP_CHECKS(compiler);
1032 		return sljit_emit_jump(compiler, type);
1033 	}
1034 
1035 	stack_size = call_get_stack_size(arg_types, &word_arg_count);
1036 	PTR_FAIL_IF(call_with_args(compiler, arg_types, stack_size, word_arg_count, 0));
1037 
1038 	SLJIT_SKIP_CHECKS(compiler);
1039 	jump = sljit_emit_jump(compiler, type);
1040 	PTR_FAIL_IF(jump == NULL);
1041 
1042 	PTR_FAIL_IF(post_call_with_args(compiler, arg_types, stack_size));
1043 	return jump;
1044 }
1045 
sljit_emit_icall(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)1046 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
1047 	sljit_s32 arg_types,
1048 	sljit_s32 src, sljit_sw srcw)
1049 {
1050 	sljit_sw stack_size = 0;
1051 	sljit_s32 word_arg_count;
1052 
1053 	CHECK_ERROR();
1054 	CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
1055 
1056 	if (type & SLJIT_CALL_RETURN) {
1057 		if ((type & 0xff) == SLJIT_CALL_REG_ARG) {
1058 			FAIL_IF(tail_call_reg_arg_with_args(compiler, arg_types));
1059 
1060 			if ((src & SLJIT_MEM) || (src > SLJIT_R2 && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options)))) {
1061 				ADJUST_LOCAL_OFFSET(src, srcw);
1062 				CHECK_EXTRA_REGS(src, srcw, (void)0);
1063 
1064 				EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1065 				src = TMP_REG1;
1066 				srcw = 0;
1067 			}
1068 
1069 			FAIL_IF(emit_stack_frame_release(compiler, 0));
1070 
1071 			SLJIT_SKIP_CHECKS(compiler);
1072 			return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1073 		}
1074 
1075 		stack_size = type;
1076 		FAIL_IF(tail_call_with_args(compiler, &stack_size, arg_types, src, srcw));
1077 
1078 		if (!(src & SLJIT_IMM)) {
1079 			src = SLJIT_R0;
1080 			srcw = 0;
1081 		}
1082 
1083 		SLJIT_SKIP_CHECKS(compiler);
1084 
1085 		if (stack_size == 0)
1086 			return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1087 
1088 		FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
1089 		return emit_tail_call_end(compiler, stack_size);
1090 	}
1091 
1092 	if ((type & 0xff) == SLJIT_CALL_REG_ARG) {
1093 		SLJIT_SKIP_CHECKS(compiler);
1094 		return sljit_emit_ijump(compiler, type, src, srcw);
1095 	}
1096 
1097 	ADJUST_LOCAL_OFFSET(src, srcw);
1098 	CHECK_EXTRA_REGS(src, srcw, (void)0);
1099 
1100 	if (src & SLJIT_MEM) {
1101 		EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1102 		src = TMP_REG1;
1103 		srcw = 0;
1104 	}
1105 
1106 	stack_size = call_get_stack_size(arg_types, &word_arg_count);
1107 	FAIL_IF(call_with_args(compiler, arg_types, stack_size, word_arg_count, src == TMP_REG1));
1108 
1109 	if (stack_size > 0 && src == SLJIT_MEM1(SLJIT_SP))
1110 		srcw += stack_size;
1111 
1112 	SLJIT_SKIP_CHECKS(compiler);
1113 	FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
1114 
1115 	return post_call_with_args(compiler, arg_types, stack_size);
1116 }
1117 
emit_fmov_before_return(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)1118 static SLJIT_INLINE sljit_s32 emit_fmov_before_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
1119 {
1120 	sljit_u8* inst;
1121 
1122 	if (compiler->options & SLJIT_ENTER_REG_ARG) {
1123 		if (src == SLJIT_FR0)
1124 			return SLJIT_SUCCESS;
1125 
1126 		SLJIT_SKIP_CHECKS(compiler);
1127 		return sljit_emit_fop1(compiler, op, SLJIT_RETURN_FREG, 0, src, srcw);
1128 	}
1129 
1130 	if (FAST_IS_REG(src)) {
1131 		FAIL_IF(emit_sse2_store(compiler, op & SLJIT_32, SLJIT_MEM1(SLJIT_SP), 0, src));
1132 
1133 		src = SLJIT_MEM1(SLJIT_SP);
1134 		srcw = 0;
1135 	} else {
1136 		ADJUST_LOCAL_OFFSET(src, srcw);
1137 	}
1138 
1139 	inst = emit_x86_instruction(compiler, 1 | EX86_SSE2_OP1, 0, 0, src, srcw);
1140 	*inst = (op & SLJIT_32) ? FLDS : FLDL;
1141 
1142 	return SLJIT_SUCCESS;
1143 }
1144 
sljit_emit_fast_enter(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)1145 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
1146 {
1147 	sljit_u8 *inst;
1148 
1149 	CHECK_ERROR();
1150 	CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
1151 	ADJUST_LOCAL_OFFSET(dst, dstw);
1152 
1153 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
1154 
1155 	if (FAST_IS_REG(dst)) {
1156 		/* Unused dest is possible here. */
1157 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1158 		FAIL_IF(!inst);
1159 
1160 		INC_SIZE(1);
1161 		POP_REG(reg_map[dst]);
1162 		return SLJIT_SUCCESS;
1163 	}
1164 
1165 	/* Memory. */
1166 	inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1167 	FAIL_IF(!inst);
1168 	*inst++ = POP_rm;
1169 	return SLJIT_SUCCESS;
1170 }
1171 
emit_fast_return(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)1172 static sljit_s32 emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
1173 {
1174 	sljit_u8 *inst;
1175 
1176 	CHECK_EXTRA_REGS(src, srcw, (void)0);
1177 
1178 	if (FAST_IS_REG(src)) {
1179 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1);
1180 		FAIL_IF(!inst);
1181 
1182 		INC_SIZE(1 + 1);
1183 		PUSH_REG(reg_map[src]);
1184 	}
1185 	else {
1186 		inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
1187 		FAIL_IF(!inst);
1188 		*inst++ = GROUP_FF;
1189 		*inst |= PUSH_rm;
1190 
1191 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1192 		FAIL_IF(!inst);
1193 		INC_SIZE(1);
1194 	}
1195 
1196 	RET();
1197 	return SLJIT_SUCCESS;
1198 }
1199 
1200 /* --------------------------------------------------------------------- */
1201 /*  Other operations                                                     */
1202 /* --------------------------------------------------------------------- */
1203 
sljit_emit_mem(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)1204 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
1205 	sljit_s32 reg,
1206 	sljit_s32 mem, sljit_sw memw)
1207 {
1208 	sljit_u8* inst;
1209 	sljit_s32 i, next, reg_idx, offset;
1210 	sljit_u8 regs[2];
1211 
1212 	CHECK_ERROR();
1213 	CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
1214 
1215 	if (!(reg & REG_PAIR_MASK))
1216 		return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
1217 
1218 	ADJUST_LOCAL_OFFSET(mem, memw);
1219 
1220 	regs[0] = U8(REG_PAIR_FIRST(reg));
1221 	regs[1] = U8(REG_PAIR_SECOND(reg));
1222 
1223 	next = SSIZE_OF(sw);
1224 
1225 	if (!(type & SLJIT_MEM_STORE) && (regs[0] == (mem & REG_MASK) || regs[0] == OFFS_REG(mem))) {
1226 		if (regs[1] == (mem & REG_MASK) || regs[1] == OFFS_REG(mem)) {
1227 			/* None of them are virtual register so TMP_REG1 will not be used. */
1228 			EMIT_MOV(compiler, TMP_REG1, 0, OFFS_REG(mem), 0);
1229 
1230 			if (regs[1] == OFFS_REG(mem))
1231 				next = -SSIZE_OF(sw);
1232 
1233 			mem = (mem & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG1);
1234 		} else {
1235 			next = -SSIZE_OF(sw);
1236 
1237 			if (!(mem & OFFS_REG_MASK))
1238 				memw += SSIZE_OF(sw);
1239 		}
1240 	}
1241 
1242 	for (i = 0; i < 2; i++) {
1243 		reg_idx = next > 0 ? i : (i ^ 0x1);
1244 		reg = regs[reg_idx];
1245 
1246 		offset = -1;
1247 
1248 		if (reg >= SLJIT_R3 && reg <= SLJIT_S3) {
1249 			offset = (2 * SSIZE_OF(sw)) + ((reg) - SLJIT_R3) * SSIZE_OF(sw);
1250 			reg = TMP_REG1;
1251 
1252 			if (type & SLJIT_MEM_STORE)
1253 				EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), offset);
1254 		}
1255 
1256 		if ((mem & OFFS_REG_MASK) && (reg_idx == 1)) {
1257 			inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(1 + 4));
1258 			FAIL_IF(!inst);
1259 
1260 			INC_SIZE(4);
1261 
1262 			inst[0] = (type & SLJIT_MEM_STORE) ? MOV_rm_r : MOV_r_rm;
1263 			inst[1] = 0x44 | U8(reg_map[reg] << 3);
1264 			inst[2] = U8(memw << 6) | U8(reg_map[OFFS_REG(mem)] << 3) | reg_map[mem & REG_MASK];
1265 			inst[3] = sizeof(sljit_sw);
1266 		} else if (type & SLJIT_MEM_STORE) {
1267 			EMIT_MOV(compiler, mem, memw, reg, 0);
1268 		} else {
1269 			EMIT_MOV(compiler, reg, 0, mem, memw);
1270 		}
1271 
1272 		if (!(mem & OFFS_REG_MASK))
1273 			memw += next;
1274 
1275 		if (!(type & SLJIT_MEM_STORE) && offset != -1)
1276 			EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), offset, TMP_REG1, 0);
1277 	}
1278 
1279 	return SLJIT_SUCCESS;
1280 }
1281 
skip_frames_before_return(struct sljit_compiler * compiler)1282 static sljit_s32 skip_frames_before_return(struct sljit_compiler *compiler)
1283 {
1284 	sljit_sw size;
1285 
1286 	/* Don't adjust shadow stack if it isn't enabled.  */
1287 	if (!cpu_has_shadow_stack())
1288 		return SLJIT_SUCCESS;
1289 
1290 	SLJIT_ASSERT(compiler->args_size >= 0);
1291 	SLJIT_ASSERT(compiler->local_size > 0);
1292 
1293 	size = compiler->local_size;
1294 	size += (1 + (compiler->scratches > 9 ? (compiler->scratches - 9) : 0)
1295 		+ (compiler->saveds <= 3 ? compiler->saveds : 3)) * SSIZE_OF(sw);
1296 
1297 	return adjust_shadow_stack(compiler, SLJIT_MEM1(SLJIT_SP), size);
1298 }
1299