1 /*
2  *    Stack-less Just-In-Time compiler
3  *
4  *    Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without modification, are
7  * permitted provided that the following conditions are met:
8  *
9  *   1. Redistributions of source code must retain the above copyright notice, this list of
10  *      conditions and the following disclaimer.
11  *
12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13  *      of conditions and the following disclaimer in the documentation and/or other materials
14  *      provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 /* x86 64-bit arch dependent functions. */
28 
29 /* --------------------------------------------------------------------- */
30 /*  Operators                                                            */
31 /* --------------------------------------------------------------------- */
32 
emit_load_imm64(struct sljit_compiler * compiler,sljit_s32 reg,sljit_sw imm)33 static sljit_s32 emit_load_imm64(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm)
34 {
35 	sljit_u8 *inst;
36 
37 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_sw));
38 	FAIL_IF(!inst);
39 	INC_SIZE(2 + sizeof(sljit_sw));
40 	*inst++ = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B);
41 	*inst++ = U8(MOV_r_i32 | (reg_map[reg] & 0x7));
42 	sljit_unaligned_store_sw(inst, imm);
43 	return SLJIT_SUCCESS;
44 }
45 
emit_do_imm32(struct sljit_compiler * compiler,sljit_u8 rex,sljit_u8 opcode,sljit_sw imm)46 static sljit_s32 emit_do_imm32(struct sljit_compiler *compiler, sljit_u8 rex, sljit_u8 opcode, sljit_sw imm)
47 {
48 	sljit_u8 *inst;
49 	sljit_uw length = (rex ? 2 : 1) + sizeof(sljit_s32);
50 
51 	inst = (sljit_u8*)ensure_buf(compiler, 1 + length);
52 	FAIL_IF(!inst);
53 	INC_SIZE(length);
54 	if (rex)
55 		*inst++ = rex;
56 	*inst++ = opcode;
57 	sljit_unaligned_store_s32(inst, (sljit_s32)imm);
58 	return SLJIT_SUCCESS;
59 }
60 
emit_x86_instruction(struct sljit_compiler * compiler,sljit_uw size,sljit_s32 a,sljit_sw imma,sljit_s32 b,sljit_sw immb)61 static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw size,
62 	/* The register or immediate operand. */
63 	sljit_s32 a, sljit_sw imma,
64 	/* The general operand (not immediate). */
65 	sljit_s32 b, sljit_sw immb)
66 {
67 	sljit_u8 *inst;
68 	sljit_u8 *buf_ptr;
69 	sljit_u8 rex = 0;
70 	sljit_u8 reg_lmap_b;
71 	sljit_uw flags = size;
72 	sljit_uw inst_size;
73 
74 	/* The immediate operand must be 32 bit. */
75 	SLJIT_ASSERT(!(a & SLJIT_IMM) || compiler->mode32 || IS_HALFWORD(imma));
76 	/* Both cannot be switched on. */
77 	SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
78 	/* Size flags not allowed for typed instructions. */
79 	SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
80 	/* Both size flags cannot be switched on. */
81 	SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
82 	/* SSE2 and immediate is not possible. */
83 	SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
84 	SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
85 		&& (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
86 		&& (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
87 
88 	size &= 0xf;
89 	inst_size = size;
90 
91 	if (!compiler->mode32 && !(flags & EX86_NO_REXW))
92 		rex |= REX_W;
93 	else if (flags & EX86_REX)
94 		rex |= REX;
95 
96 	if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
97 		inst_size++;
98 	if (flags & EX86_PREF_66)
99 		inst_size++;
100 
101 	/* Calculate size of b. */
102 	inst_size += 1; /* mod r/m byte. */
103 	if (b & SLJIT_MEM) {
104 		if (!(b & OFFS_REG_MASK) && NOT_HALFWORD(immb)) {
105 			PTR_FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immb));
106 			immb = 0;
107 			if (b & REG_MASK)
108 				b |= TO_OFFS_REG(TMP_REG2);
109 			else
110 				b |= TMP_REG2;
111 		}
112 
113 		if (!(b & REG_MASK))
114 			inst_size += 1 + sizeof(sljit_s32); /* SIB byte required to avoid RIP based addressing. */
115 		else {
116 			if (immb != 0 && !(b & OFFS_REG_MASK)) {
117 				/* Immediate operand. */
118 				if (immb <= 127 && immb >= -128)
119 					inst_size += sizeof(sljit_s8);
120 				else
121 					inst_size += sizeof(sljit_s32);
122 			}
123 			else if (reg_lmap[b & REG_MASK] == 5) {
124 				/* Swap registers if possible. */
125 				if ((b & OFFS_REG_MASK) && (immb & 0x3) == 0 && reg_lmap[OFFS_REG(b)] != 5)
126 					b = SLJIT_MEM | OFFS_REG(b) | TO_OFFS_REG(b & REG_MASK);
127 				else
128 					inst_size += sizeof(sljit_s8);
129 			}
130 
131 			if (reg_map[b & REG_MASK] >= 8)
132 				rex |= REX_B;
133 
134 			if (reg_lmap[b & REG_MASK] == 4 && !(b & OFFS_REG_MASK))
135 				b |= TO_OFFS_REG(SLJIT_SP);
136 
137 			if (b & OFFS_REG_MASK) {
138 				inst_size += 1; /* SIB byte. */
139 				if (reg_map[OFFS_REG(b)] >= 8)
140 					rex |= REX_X;
141 			}
142 		}
143 	}
144 	else if (!(flags & EX86_SSE2_OP2)) {
145 		if (reg_map[b] >= 8)
146 			rex |= REX_B;
147 	}
148 	else if (freg_map[b] >= 8)
149 		rex |= REX_B;
150 
151 	if (a & SLJIT_IMM) {
152 		if (flags & EX86_BIN_INS) {
153 			if (imma <= 127 && imma >= -128) {
154 				inst_size += 1;
155 				flags |= EX86_BYTE_ARG;
156 			} else
157 				inst_size += 4;
158 		}
159 		else if (flags & EX86_SHIFT_INS) {
160 			SLJIT_ASSERT(imma <= (compiler->mode32 ? 0x1f : 0x3f));
161 			if (imma != 1) {
162 				inst_size++;
163 				flags |= EX86_BYTE_ARG;
164 			}
165 		} else if (flags & EX86_BYTE_ARG)
166 			inst_size++;
167 		else if (flags & EX86_HALF_ARG)
168 			inst_size += sizeof(short);
169 		else
170 			inst_size += sizeof(sljit_s32);
171 	}
172 	else {
173 		SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
174 		/* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */
175 		if (!(flags & EX86_SSE2_OP1)) {
176 			if (reg_map[a] >= 8)
177 				rex |= REX_R;
178 		}
179 		else if (freg_map[a] >= 8)
180 			rex |= REX_R;
181 	}
182 
183 	if (rex)
184 		inst_size++;
185 
186 	inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size);
187 	PTR_FAIL_IF(!inst);
188 
189 	/* Encoding the byte. */
190 	INC_SIZE(inst_size);
191 	if (flags & EX86_PREF_F2)
192 		*inst++ = 0xf2;
193 	if (flags & EX86_PREF_F3)
194 		*inst++ = 0xf3;
195 	if (flags & EX86_PREF_66)
196 		*inst++ = 0x66;
197 	if (rex)
198 		*inst++ = rex;
199 	buf_ptr = inst + size;
200 
201 	/* Encode mod/rm byte. */
202 	if (!(flags & EX86_SHIFT_INS)) {
203 		if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
204 			*inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
205 
206 		if (a & SLJIT_IMM)
207 			*buf_ptr = 0;
208 		else if (!(flags & EX86_SSE2_OP1))
209 			*buf_ptr = U8(reg_lmap[a] << 3);
210 		else
211 			*buf_ptr = U8(freg_lmap[a] << 3);
212 	}
213 	else {
214 		if (a & SLJIT_IMM) {
215 			if (imma == 1)
216 				*inst = GROUP_SHIFT_1;
217 			else
218 				*inst = GROUP_SHIFT_N;
219 		} else
220 			*inst = GROUP_SHIFT_CL;
221 		*buf_ptr = 0;
222 	}
223 
224 	if (!(b & SLJIT_MEM)) {
225 		*buf_ptr = U8(*buf_ptr | MOD_REG | (!(flags & EX86_SSE2_OP2) ? reg_lmap[b] : freg_lmap[b]));
226 		buf_ptr++;
227 	} else if (b & REG_MASK) {
228 		reg_lmap_b = reg_lmap[b & REG_MASK];
229 
230 		if (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) {
231 			if (immb != 0 || reg_lmap_b == 5) {
232 				if (immb <= 127 && immb >= -128)
233 					*buf_ptr |= 0x40;
234 				else
235 					*buf_ptr |= 0x80;
236 			}
237 
238 			if (!(b & OFFS_REG_MASK))
239 				*buf_ptr++ |= reg_lmap_b;
240 			else {
241 				*buf_ptr++ |= 0x04;
242 				*buf_ptr++ = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3));
243 			}
244 
245 			if (immb != 0 || reg_lmap_b == 5) {
246 				if (immb <= 127 && immb >= -128)
247 					*buf_ptr++ = U8(immb); /* 8 bit displacement. */
248 				else {
249 					sljit_unaligned_store_s32(buf_ptr, (sljit_s32)immb); /* 32 bit displacement. */
250 					buf_ptr += sizeof(sljit_s32);
251 				}
252 			}
253 		}
254 		else {
255 			if (reg_lmap_b == 5)
256 				*buf_ptr |= 0x40;
257 
258 			*buf_ptr++ |= 0x04;
259 			*buf_ptr++ = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6));
260 
261 			if (reg_lmap_b == 5)
262 				*buf_ptr++ = 0;
263 		}
264 	}
265 	else {
266 		*buf_ptr++ |= 0x04;
267 		*buf_ptr++ = 0x25;
268 		sljit_unaligned_store_s32(buf_ptr, (sljit_s32)immb); /* 32 bit displacement. */
269 		buf_ptr += sizeof(sljit_s32);
270 	}
271 
272 	if (a & SLJIT_IMM) {
273 		if (flags & EX86_BYTE_ARG)
274 			*buf_ptr = U8(imma);
275 		else if (flags & EX86_HALF_ARG)
276 			sljit_unaligned_store_s16(buf_ptr, (sljit_s16)imma);
277 		else if (!(flags & EX86_SHIFT_INS))
278 			sljit_unaligned_store_s32(buf_ptr, (sljit_s32)imma);
279 	}
280 
281 	return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
282 }
283 
284 /* --------------------------------------------------------------------- */
285 /*  Enter / return                                                       */
286 /* --------------------------------------------------------------------- */
287 
generate_far_jump_code(struct sljit_jump * jump,sljit_u8 * code_ptr)288 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr)
289 {
290 	sljit_uw type = jump->flags >> TYPE_SHIFT;
291 
292 	int short_addr = !(jump->flags & SLJIT_REWRITABLE_JUMP) && !(jump->flags & JUMP_LABEL) && (jump->u.target <= 0xffffffff);
293 
294 	/* The relative jump below specialized for this case. */
295 	SLJIT_ASSERT(reg_map[TMP_REG2] >= 8);
296 
297 	if (type < SLJIT_JUMP) {
298 		/* Invert type. */
299 		*code_ptr++ = U8(get_jump_code(type ^ 0x1) - 0x10);
300 		*code_ptr++ = short_addr ? (6 + 3) : (10 + 3);
301 	}
302 
303 	*code_ptr++ = short_addr ? REX_B : (REX_W | REX_B);
304 	*code_ptr++ = MOV_r_i32 | reg_lmap[TMP_REG2];
305 	jump->addr = (sljit_uw)code_ptr;
306 
307 	if (jump->flags & JUMP_LABEL)
308 		jump->flags |= PATCH_MD;
309 	else if (short_addr)
310 		sljit_unaligned_store_s32(code_ptr, (sljit_s32)jump->u.target);
311 	else
312 		sljit_unaligned_store_sw(code_ptr, (sljit_sw)jump->u.target);
313 
314 	code_ptr += short_addr ? sizeof(sljit_s32) : sizeof(sljit_sw);
315 
316 	*code_ptr++ = REX_B;
317 	*code_ptr++ = GROUP_FF;
318 	*code_ptr++ = U8(MOD_REG | (type >= SLJIT_FAST_CALL ? CALL_rm : JMP_rm) | reg_lmap[TMP_REG2]);
319 
320 	return code_ptr;
321 }
322 
generate_put_label_code(struct sljit_put_label * put_label,sljit_u8 * code_ptr,sljit_uw max_label)323 static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, sljit_u8 *code_ptr, sljit_uw max_label)
324 {
325 	if (max_label > HALFWORD_MAX) {
326 		put_label->addr -= put_label->flags;
327 		put_label->flags = PATCH_MD;
328 		return code_ptr;
329 	}
330 
331 	if (put_label->flags == 0) {
332 		/* Destination is register. */
333 		code_ptr = (sljit_u8*)put_label->addr - 2 - sizeof(sljit_uw);
334 
335 		SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);
336 		SLJIT_ASSERT((code_ptr[1] & 0xf8) == MOV_r_i32);
337 
338 		if ((code_ptr[0] & 0x07) != 0) {
339 			code_ptr[0] = U8(code_ptr[0] & ~0x08);
340 			code_ptr += 2 + sizeof(sljit_s32);
341 		}
342 		else {
343 			code_ptr[0] = code_ptr[1];
344 			code_ptr += 1 + sizeof(sljit_s32);
345 		}
346 
347 		put_label->addr = (sljit_uw)code_ptr;
348 		return code_ptr;
349 	}
350 
351 	code_ptr -= put_label->flags + (2 + sizeof(sljit_uw));
352 	SLJIT_MEMMOVE(code_ptr, code_ptr + (2 + sizeof(sljit_uw)), put_label->flags);
353 
354 	SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);
355 
356 	if ((code_ptr[1] & 0xf8) == MOV_r_i32) {
357 		code_ptr += 2 + sizeof(sljit_uw);
358 		SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);
359 	}
360 
361 	SLJIT_ASSERT(code_ptr[1] == MOV_rm_r);
362 
363 	code_ptr[0] = U8(code_ptr[0] & ~0x4);
364 	code_ptr[1] = MOV_rm_i32;
365 	code_ptr[2] = U8(code_ptr[2] & ~(0x7 << 3));
366 
367 	code_ptr = (sljit_u8*)(put_label->addr - (2 + sizeof(sljit_uw)) + sizeof(sljit_s32));
368 	put_label->addr = (sljit_uw)code_ptr;
369 	put_label->flags = 0;
370 	return code_ptr;
371 }
372 
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)373 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
374 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
375 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
376 {
377 	sljit_uw size;
378 	sljit_s32 word_arg_count = 0;
379 	sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
380 	sljit_s32 saved_regs_size, tmp, i;
381 #ifdef _WIN64
382 	sljit_s32 saved_float_regs_size;
383 	sljit_s32 saved_float_regs_offset = 0;
384 	sljit_s32 float_arg_count = 0;
385 #endif /* _WIN64 */
386 	sljit_u8 *inst;
387 
388 	CHECK_ERROR();
389 	CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
390 	set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
391 
392 	if (options & SLJIT_ENTER_REG_ARG)
393 		arg_types = 0;
394 
395 	/* Emit ENDBR64 at function entry if needed.  */
396 	FAIL_IF(emit_endbranch(compiler));
397 
398 	compiler->mode32 = 0;
399 
400 	/* Including the return address saved by the call instruction. */
401 	saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1);
402 
403 	tmp = SLJIT_S0 - saveds;
404 	for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) {
405 		size = reg_map[i] >= 8 ? 2 : 1;
406 		inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
407 		FAIL_IF(!inst);
408 		INC_SIZE(size);
409 		if (reg_map[i] >= 8)
410 			*inst++ = REX_B;
411 		PUSH_REG(reg_lmap[i]);
412 	}
413 
414 	for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
415 		size = reg_map[i] >= 8 ? 2 : 1;
416 		inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
417 		FAIL_IF(!inst);
418 		INC_SIZE(size);
419 		if (reg_map[i] >= 8)
420 			*inst++ = REX_B;
421 		PUSH_REG(reg_lmap[i]);
422 	}
423 
424 #ifdef _WIN64
425 	local_size += SLJIT_LOCALS_OFFSET;
426 	saved_float_regs_size = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, 16);
427 
428 	if (saved_float_regs_size > 0) {
429 		saved_float_regs_offset = ((local_size + 0xf) & ~0xf);
430 		local_size = saved_float_regs_offset + saved_float_regs_size;
431 	}
432 #else /* !_WIN64 */
433 	SLJIT_ASSERT(SLJIT_LOCALS_OFFSET == 0);
434 #endif /* _WIN64 */
435 
436 	arg_types >>= SLJIT_ARG_SHIFT;
437 
438 	while (arg_types > 0) {
439 		if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
440 			tmp = 0;
441 #ifndef _WIN64
442 			switch (word_arg_count) {
443 			case 0:
444 				tmp = SLJIT_R2;
445 				break;
446 			case 1:
447 				tmp = SLJIT_R1;
448 				break;
449 			case 2:
450 				tmp = TMP_REG1;
451 				break;
452 			default:
453 				tmp = SLJIT_R3;
454 				break;
455 			}
456 #else /* !_WIN64 */
457 			switch (word_arg_count + float_arg_count) {
458 			case 0:
459 				tmp = SLJIT_R3;
460 				break;
461 			case 1:
462 				tmp = SLJIT_R1;
463 				break;
464 			case 2:
465 				tmp = SLJIT_R2;
466 				break;
467 			default:
468 				tmp = TMP_REG1;
469 				break;
470 			}
471 #endif /* _WIN64 */
472 			if (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) {
473 				if (tmp != SLJIT_R0 + word_arg_count)
474 					EMIT_MOV(compiler, SLJIT_R0 + word_arg_count, 0, tmp, 0);
475 			} else {
476 				EMIT_MOV(compiler, SLJIT_S0 - saved_arg_count, 0, tmp, 0);
477 				saved_arg_count++;
478 			}
479 			word_arg_count++;
480 		} else {
481 #ifdef _WIN64
482 			SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start);
483 			float_arg_count++;
484 			if (float_arg_count != float_arg_count + word_arg_count)
485 				FAIL_IF(emit_sse2_load(compiler, (arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32,
486 					float_arg_count, float_arg_count + word_arg_count, 0));
487 #endif /* _WIN64 */
488 		}
489 		arg_types >>= SLJIT_ARG_SHIFT;
490 	}
491 
492 	local_size = ((local_size + saved_regs_size + 0xf) & ~0xf) - saved_regs_size;
493 	compiler->local_size = local_size;
494 
495 #ifdef _WIN64
496 	if (local_size > 0) {
497 		if (local_size <= 4 * 4096) {
498 			if (local_size > 4096)
499 				EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096);
500 			if (local_size > 2 * 4096)
501 				EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2);
502 			if (local_size > 3 * 4096)
503 				EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);
504 		}
505 		else {
506 			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, local_size >> 12);
507 
508 			EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_MEM1(SLJIT_SP), -4096);
509 			BINARY_IMM32(SUB, 4096, SLJIT_SP, 0);
510 			BINARY_IMM32(SUB, 1, TMP_REG1, 0);
511 
512 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
513 			FAIL_IF(!inst);
514 
515 			INC_SIZE(2);
516 			inst[0] = JNE_i8;
517 			inst[1] = (sljit_u8)-21;
518 			local_size &= 0xfff;
519 		}
520 
521 		if (local_size > 0)
522 			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size);
523 	}
524 #endif /* _WIN64 */
525 
526 	if (local_size > 0)
527 		BINARY_IMM32(SUB, local_size, SLJIT_SP, 0);
528 
529 #ifdef _WIN64
530 	if (saved_float_regs_size > 0) {
531 		compiler->mode32 = 1;
532 
533 		tmp = SLJIT_FS0 - fsaveds;
534 		for (i = SLJIT_FS0; i > tmp; i--) {
535 			inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset);
536 			*inst++ = GROUP_0F;
537 			*inst = MOVAPS_xm_x;
538 			saved_float_regs_offset += 16;
539 		}
540 
541 		for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
542 			inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset);
543 			*inst++ = GROUP_0F;
544 			*inst = MOVAPS_xm_x;
545 			saved_float_regs_offset += 16;
546 		}
547 	}
548 #endif /* _WIN64 */
549 
550 	return SLJIT_SUCCESS;
551 }
552 
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)553 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
554 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
555 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
556 {
557 	sljit_s32 saved_regs_size;
558 #ifdef _WIN64
559 	sljit_s32 saved_float_regs_size;
560 #endif /* _WIN64 */
561 
562 	CHECK_ERROR();
563 	CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
564 	set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
565 
566 #ifdef _WIN64
567 	local_size += SLJIT_LOCALS_OFFSET;
568 	saved_float_regs_size = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, 16);
569 
570 	if (saved_float_regs_size > 0)
571 		local_size = ((local_size + 0xf) & ~0xf) + saved_float_regs_size;
572 #else /* !_WIN64 */
573 	SLJIT_ASSERT(SLJIT_LOCALS_OFFSET == 0);
574 #endif /* _WIN64 */
575 
576 	/* Including the return address saved by the call instruction. */
577 	saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1);
578 	compiler->local_size = ((local_size + saved_regs_size + 0xf) & ~0xf) - saved_regs_size;
579 	return SLJIT_SUCCESS;
580 }
581 
emit_stack_frame_release(struct sljit_compiler * compiler,sljit_s32 is_return_to)582 static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to)
583 {
584 	sljit_uw size;
585 	sljit_s32 local_size, i, tmp;
586 	sljit_u8 *inst;
587 #ifdef _WIN64
588 	sljit_s32 saved_float_regs_offset;
589 	sljit_s32 fscratches = compiler->fscratches;
590 	sljit_s32 fsaveds = compiler->fsaveds;
591 #endif /* _WIN64 */
592 
593 #ifdef _WIN64
594 	saved_float_regs_offset = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, 16);
595 
596 	if (saved_float_regs_offset > 0) {
597 		compiler->mode32 = 1;
598 		saved_float_regs_offset = (compiler->local_size - saved_float_regs_offset) & ~0xf;
599 
600 		tmp = SLJIT_FS0 - fsaveds;
601 		for (i = SLJIT_FS0; i > tmp; i--) {
602 			inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset);
603 			*inst++ = GROUP_0F;
604 			*inst = MOVAPS_x_xm;
605 			saved_float_regs_offset += 16;
606 		}
607 
608 		for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
609 			inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset);
610 			*inst++ = GROUP_0F;
611 			*inst = MOVAPS_x_xm;
612 			saved_float_regs_offset += 16;
613 		}
614 
615 		compiler->mode32 = 0;
616 	}
617 #endif /* _WIN64 */
618 
619 	local_size = compiler->local_size;
620 
621 	if (is_return_to && compiler->scratches < SLJIT_FIRST_SAVED_REG && (compiler->saveds == SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
622 		local_size += SSIZE_OF(sw);
623 		is_return_to = 0;
624 	}
625 
626 	if (local_size > 0)
627 		BINARY_IMM32(ADD, local_size, SLJIT_SP, 0);
628 
629 	tmp = compiler->scratches;
630 	for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) {
631 		size = reg_map[i] >= 8 ? 2 : 1;
632 		inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
633 		FAIL_IF(!inst);
634 		INC_SIZE(size);
635 		if (reg_map[i] >= 8)
636 			*inst++ = REX_B;
637 		POP_REG(reg_lmap[i]);
638 	}
639 
640 	tmp = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
641 	for (i = SLJIT_S0 + 1 - compiler->saveds; i <= tmp; i++) {
642 		size = reg_map[i] >= 8 ? 2 : 1;
643 		inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
644 		FAIL_IF(!inst);
645 		INC_SIZE(size);
646 		if (reg_map[i] >= 8)
647 			*inst++ = REX_B;
648 		POP_REG(reg_lmap[i]);
649 	}
650 
651 	if (is_return_to)
652 		BINARY_IMM32(ADD, sizeof(sljit_sw), SLJIT_SP, 0);
653 
654 	return SLJIT_SUCCESS;
655 }
656 
sljit_emit_return_void(struct sljit_compiler * compiler)657 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
658 {
659 	sljit_u8 *inst;
660 
661 	CHECK_ERROR();
662 	CHECK(check_sljit_emit_return_void(compiler));
663 
664 	compiler->mode32 = 0;
665 
666 	FAIL_IF(emit_stack_frame_release(compiler, 0));
667 
668 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
669 	FAIL_IF(!inst);
670 	INC_SIZE(1);
671 	RET();
672 	return SLJIT_SUCCESS;
673 }
674 
sljit_emit_return_to(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)675 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
676 	sljit_s32 src, sljit_sw srcw)
677 {
678 	CHECK_ERROR();
679 	CHECK(check_sljit_emit_return_to(compiler, src, srcw));
680 
681 	compiler->mode32 = 0;
682 
683 	if ((src & SLJIT_MEM) || (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options)))) {
684 		ADJUST_LOCAL_OFFSET(src, srcw);
685 
686 		EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);
687 		src = TMP_REG2;
688 		srcw = 0;
689 	}
690 
691 	FAIL_IF(emit_stack_frame_release(compiler, 1));
692 
693 	SLJIT_SKIP_CHECKS(compiler);
694 	return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
695 }
696 
697 /* --------------------------------------------------------------------- */
698 /*  Call / return instructions                                           */
699 /* --------------------------------------------------------------------- */
700 
701 #ifndef _WIN64
702 
call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types,sljit_s32 * src_ptr)703 static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr)
704 {
705 	sljit_s32 src = src_ptr ? (*src_ptr) : 0;
706 	sljit_s32 word_arg_count = 0;
707 
708 	SLJIT_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R3] == 1 && reg_map[TMP_REG1] == 2);
709 	SLJIT_ASSERT(!(src & SLJIT_MEM));
710 
711 	/* Remove return value. */
712 	arg_types >>= SLJIT_ARG_SHIFT;
713 
714 	while (arg_types) {
715 		if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64)
716 			word_arg_count++;
717 		arg_types >>= SLJIT_ARG_SHIFT;
718 	}
719 
720 	if (word_arg_count == 0)
721 		return SLJIT_SUCCESS;
722 
723 	if (word_arg_count >= 3) {
724 		if (src == SLJIT_R2)
725 			*src_ptr = TMP_REG1;
726 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R2, 0);
727 	}
728 
729 	return emit_mov(compiler, SLJIT_R2, 0, SLJIT_R0, 0);
730 }
731 
732 #else
733 
call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types,sljit_s32 * src_ptr)734 static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr)
735 {
736 	sljit_s32 src = src_ptr ? (*src_ptr) : 0;
737 	sljit_s32 arg_count = 0;
738 	sljit_s32 word_arg_count = 0;
739 	sljit_s32 float_arg_count = 0;
740 	sljit_s32 types = 0;
741 	sljit_s32 data_trandfer = 0;
742 	static sljit_u8 word_arg_regs[5] = { 0, SLJIT_R3, SLJIT_R1, SLJIT_R2, TMP_REG1 };
743 
744 	SLJIT_ASSERT(reg_map[SLJIT_R3] == 1 && reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R2] == 8 && reg_map[TMP_REG1] == 9);
745 	SLJIT_ASSERT(!(src & SLJIT_MEM));
746 
747 	arg_types >>= SLJIT_ARG_SHIFT;
748 
749 	while (arg_types) {
750 		types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK);
751 
752 		switch (arg_types & SLJIT_ARG_MASK) {
753 		case SLJIT_ARG_TYPE_F64:
754 		case SLJIT_ARG_TYPE_F32:
755 			arg_count++;
756 			float_arg_count++;
757 
758 			if (arg_count != float_arg_count)
759 				data_trandfer = 1;
760 			break;
761 		default:
762 			arg_count++;
763 			word_arg_count++;
764 
765 			if (arg_count != word_arg_count || arg_count != word_arg_regs[arg_count]) {
766 				data_trandfer = 1;
767 
768 				if (src == word_arg_regs[arg_count]) {
769 					EMIT_MOV(compiler, TMP_REG2, 0, src, 0);
770 					*src_ptr = TMP_REG2;
771 				}
772 			}
773 			break;
774 		}
775 
776 		arg_types >>= SLJIT_ARG_SHIFT;
777 	}
778 
779 	if (!data_trandfer)
780 		return SLJIT_SUCCESS;
781 
782 	while (types) {
783 		switch (types & SLJIT_ARG_MASK) {
784 		case SLJIT_ARG_TYPE_F64:
785 			if (arg_count != float_arg_count)
786 				FAIL_IF(emit_sse2_load(compiler, 0, arg_count, float_arg_count, 0));
787 			arg_count--;
788 			float_arg_count--;
789 			break;
790 		case SLJIT_ARG_TYPE_F32:
791 			if (arg_count != float_arg_count)
792 				FAIL_IF(emit_sse2_load(compiler, 1, arg_count, float_arg_count, 0));
793 			arg_count--;
794 			float_arg_count--;
795 			break;
796 		default:
797 			if (arg_count != word_arg_count || arg_count != word_arg_regs[arg_count])
798 				EMIT_MOV(compiler, word_arg_regs[arg_count], 0, word_arg_count, 0);
799 			arg_count--;
800 			word_arg_count--;
801 			break;
802 		}
803 
804 		types >>= SLJIT_ARG_SHIFT;
805 	}
806 
807 	return SLJIT_SUCCESS;
808 }
809 
810 #endif
811 
sljit_emit_call(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types)812 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
813 	sljit_s32 arg_types)
814 {
815 	CHECK_ERROR_PTR();
816 	CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
817 
818 	compiler->mode32 = 0;
819 
820 	if ((type & 0xff) != SLJIT_CALL_REG_ARG)
821 		PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL));
822 
823 	if (type & SLJIT_CALL_RETURN) {
824 		PTR_FAIL_IF(emit_stack_frame_release(compiler, 0));
825 		type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
826 	}
827 
828 	SLJIT_SKIP_CHECKS(compiler);
829 	return sljit_emit_jump(compiler, type);
830 }
831 
sljit_emit_icall(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)832 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
833 	sljit_s32 arg_types,
834 	sljit_s32 src, sljit_sw srcw)
835 {
836 	CHECK_ERROR();
837 	CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
838 
839 	compiler->mode32 = 0;
840 
841 	if (src & SLJIT_MEM) {
842 		ADJUST_LOCAL_OFFSET(src, srcw);
843 		EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);
844 		src = TMP_REG2;
845 	}
846 
847 	if (type & SLJIT_CALL_RETURN) {
848 		if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
849 			EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);
850 			src = TMP_REG2;
851 		}
852 
853 		FAIL_IF(emit_stack_frame_release(compiler, 0));
854 	}
855 
856 	if ((type & 0xff) != SLJIT_CALL_REG_ARG)
857 		FAIL_IF(call_with_args(compiler, arg_types, &src));
858 
859 	if (type & SLJIT_CALL_RETURN)
860 		type = SLJIT_JUMP;
861 
862 	SLJIT_SKIP_CHECKS(compiler);
863 	return sljit_emit_ijump(compiler, type, src, srcw);
864 }
865 
sljit_emit_fast_enter(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)866 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
867 {
868 	sljit_u8 *inst;
869 
870 	CHECK_ERROR();
871 	CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
872 	ADJUST_LOCAL_OFFSET(dst, dstw);
873 
874 	if (FAST_IS_REG(dst)) {
875 		if (reg_map[dst] < 8) {
876 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
877 			FAIL_IF(!inst);
878 			INC_SIZE(1);
879 			POP_REG(reg_lmap[dst]);
880 			return SLJIT_SUCCESS;
881 		}
882 
883 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
884 		FAIL_IF(!inst);
885 		INC_SIZE(2);
886 		*inst++ = REX_B;
887 		POP_REG(reg_lmap[dst]);
888 		return SLJIT_SUCCESS;
889 	}
890 
891 	/* REX_W is not necessary (src is not immediate). */
892 	compiler->mode32 = 1;
893 	inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
894 	FAIL_IF(!inst);
895 	*inst++ = POP_rm;
896 	return SLJIT_SUCCESS;
897 }
898 
emit_fast_return(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)899 static sljit_s32 emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
900 {
901 	sljit_u8 *inst;
902 
903 	if (FAST_IS_REG(src)) {
904 		if (reg_map[src] < 8) {
905 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1);
906 			FAIL_IF(!inst);
907 
908 			INC_SIZE(1 + 1);
909 			PUSH_REG(reg_lmap[src]);
910 		}
911 		else {
912 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 1);
913 			FAIL_IF(!inst);
914 
915 			INC_SIZE(2 + 1);
916 			*inst++ = REX_B;
917 			PUSH_REG(reg_lmap[src]);
918 		}
919 	}
920 	else {
921 		/* REX_W is not necessary (src is not immediate). */
922 		compiler->mode32 = 1;
923 		inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
924 		FAIL_IF(!inst);
925 		*inst++ = GROUP_FF;
926 		*inst |= PUSH_rm;
927 
928 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
929 		FAIL_IF(!inst);
930 		INC_SIZE(1);
931 	}
932 
933 	RET();
934 	return SLJIT_SUCCESS;
935 }
936 
937 /* --------------------------------------------------------------------- */
938 /*  Other operations                                                     */
939 /* --------------------------------------------------------------------- */
940 
sljit_emit_mem(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)941 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
942 	sljit_s32 reg,
943 	sljit_s32 mem, sljit_sw memw)
944 {
945 	sljit_u8* inst;
946 	sljit_s32 i, next, reg_idx;
947 	sljit_u8 regs[2];
948 
949 	CHECK_ERROR();
950 	CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
951 
952 	if (!(reg & REG_PAIR_MASK))
953 		return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
954 
955 	ADJUST_LOCAL_OFFSET(mem, memw);
956 
957 	compiler->mode32 = 0;
958 
959 	if ((mem & REG_MASK) == 0) {
960 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, memw);
961 
962 		mem = SLJIT_MEM1(TMP_REG1);
963 		memw = 0;
964 	} else if (!(mem & OFFS_REG_MASK) && ((memw < HALFWORD_MIN) || (memw > HALFWORD_MAX - SSIZE_OF(sw)))) {
965 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, memw);
966 
967 		mem = SLJIT_MEM2(mem & REG_MASK, TMP_REG1);
968 		memw = 0;
969 	}
970 
971 	regs[0] = U8(REG_PAIR_FIRST(reg));
972 	regs[1] = U8(REG_PAIR_SECOND(reg));
973 
974 	next = SSIZE_OF(sw);
975 
976 	if (!(type & SLJIT_MEM_STORE) && (regs[0] == (mem & REG_MASK) || regs[0] == OFFS_REG(mem))) {
977 		if (regs[1] == (mem & REG_MASK) || regs[1] == OFFS_REG(mem)) {
978 			/* Base and offset cannot be TMP_REG1. */
979 			EMIT_MOV(compiler, TMP_REG1, 0, OFFS_REG(mem), 0);
980 
981 			if (regs[1] == OFFS_REG(mem))
982 				next = -SSIZE_OF(sw);
983 
984 			mem = (mem & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG1);
985 		} else {
986 			next = -SSIZE_OF(sw);
987 
988 			if (!(mem & OFFS_REG_MASK))
989 				memw += SSIZE_OF(sw);
990 		}
991 	}
992 
993 	for (i = 0; i < 2; i++) {
994 		reg_idx = next > 0 ? i : (i ^ 0x1);
995 		reg = regs[reg_idx];
996 
997 		if ((mem & OFFS_REG_MASK) && (reg_idx == 1)) {
998 			inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(1 + 5));
999 			FAIL_IF(!inst);
1000 
1001 			INC_SIZE(5);
1002 
1003 			inst[0] = U8(REX_W | ((reg_map[reg] >= 8) ? REX_R : 0) | ((reg_map[mem & REG_MASK] >= 8) ? REX_B : 0) | ((reg_map[OFFS_REG(mem)] >= 8) ? REX_X : 0));
1004 			inst[1] = (type & SLJIT_MEM_STORE) ? MOV_rm_r : MOV_r_rm;
1005 			inst[2] = 0x44 | U8(reg_lmap[reg] << 3);
1006 			inst[3] = U8(memw << 6) | U8(reg_lmap[OFFS_REG(mem)] << 3) | reg_lmap[mem & REG_MASK];
1007 			inst[4] = sizeof(sljit_sw);
1008 		} else if (type & SLJIT_MEM_STORE) {
1009 			EMIT_MOV(compiler, mem, memw, reg, 0);
1010 		} else {
1011 			EMIT_MOV(compiler, reg, 0, mem, memw);
1012 		}
1013 
1014 		if (!(mem & OFFS_REG_MASK))
1015 			memw += next;
1016 	}
1017 
1018 	return SLJIT_SUCCESS;
1019 }
1020 
emit_mov_int(struct sljit_compiler * compiler,sljit_s32 sign,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1021 static sljit_s32 emit_mov_int(struct sljit_compiler *compiler, sljit_s32 sign,
1022 	sljit_s32 dst, sljit_sw dstw,
1023 	sljit_s32 src, sljit_sw srcw)
1024 {
1025 	sljit_u8* inst;
1026 	sljit_s32 dst_r;
1027 
1028 	compiler->mode32 = 0;
1029 
1030 	if (src & SLJIT_IMM) {
1031 		if (FAST_IS_REG(dst)) {
1032 			if (sign || ((sljit_uw)srcw <= 0x7fffffff)) {
1033 				inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw);
1034 				FAIL_IF(!inst);
1035 				*inst = MOV_rm_i32;
1036 				return SLJIT_SUCCESS;
1037 			}
1038 			return emit_load_imm64(compiler, dst, srcw);
1039 		}
1040 		compiler->mode32 = 1;
1041 		inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw);
1042 		FAIL_IF(!inst);
1043 		*inst = MOV_rm_i32;
1044 		compiler->mode32 = 0;
1045 		return SLJIT_SUCCESS;
1046 	}
1047 
1048 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1049 
1050 	if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
1051 		dst_r = src;
1052 	else {
1053 		if (sign) {
1054 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw);
1055 			FAIL_IF(!inst);
1056 			*inst++ = MOVSXD_r_rm;
1057 		} else {
1058 			compiler->mode32 = 1;
1059 			FAIL_IF(emit_mov(compiler, dst_r, 0, src, srcw));
1060 			compiler->mode32 = 0;
1061 		}
1062 	}
1063 
1064 	if (dst & SLJIT_MEM) {
1065 		compiler->mode32 = 1;
1066 		inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1067 		FAIL_IF(!inst);
1068 		*inst = MOV_rm_r;
1069 		compiler->mode32 = 0;
1070 	}
1071 
1072 	return SLJIT_SUCCESS;
1073 }
1074 
skip_frames_before_return(struct sljit_compiler * compiler)1075 static sljit_s32 skip_frames_before_return(struct sljit_compiler *compiler)
1076 {
1077 	sljit_s32 tmp, size;
1078 
1079 	/* Don't adjust shadow stack if it isn't enabled.  */
1080 	if (!cpu_has_shadow_stack())
1081 		return SLJIT_SUCCESS;
1082 
1083 	size = compiler->local_size;
1084 	tmp = compiler->scratches;
1085 	if (tmp >= SLJIT_FIRST_SAVED_REG)
1086 		size += (tmp - SLJIT_FIRST_SAVED_REG + 1) * SSIZE_OF(sw);
1087 	tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
1088 	if (SLJIT_S0 >= tmp)
1089 		size += (SLJIT_S0 - tmp + 1) * SSIZE_OF(sw);
1090 
1091 	return adjust_shadow_stack(compiler, SLJIT_MEM1(SLJIT_SP), size);
1092 }
1093