1 /*
2  *    Stack-less Just-In-Time compiler
3  *
4  *    Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without modification, are
7  * permitted provided that the following conditions are met:
8  *
9  *   1. Redistributions of source code must retain the above copyright notice, this list of
10  *      conditions and the following disclaimer.
11  *
12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13  *      of conditions and the following disclaimer in the documentation and/or other materials
14  *      provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 /* x86 64-bit arch dependent functions. */
28 
29 /* --------------------------------------------------------------------- */
30 /*  Operators                                                            */
31 /* --------------------------------------------------------------------- */
32 
emit_load_imm64(struct sljit_compiler * compiler,sljit_s32 reg,sljit_sw imm)33 static sljit_s32 emit_load_imm64(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm)
34 {
35 	sljit_u8 *inst;
36 
37 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_sw));
38 	FAIL_IF(!inst);
39 	INC_SIZE(2 + sizeof(sljit_sw));
40 	inst[0] = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B);
41 	inst[1] = U8(MOV_r_i32 | reg_lmap[reg]);
42 	sljit_unaligned_store_sw(inst + 2, imm);
43 	return SLJIT_SUCCESS;
44 }
45 
emit_do_imm32(struct sljit_compiler * compiler,sljit_u8 rex,sljit_u8 opcode,sljit_sw imm)46 static sljit_s32 emit_do_imm32(struct sljit_compiler *compiler, sljit_u8 rex, sljit_u8 opcode, sljit_sw imm)
47 {
48 	sljit_u8 *inst;
49 	sljit_uw length = (rex ? 2 : 1) + sizeof(sljit_s32);
50 
51 	inst = (sljit_u8*)ensure_buf(compiler, 1 + length);
52 	FAIL_IF(!inst);
53 	INC_SIZE(length);
54 	if (rex)
55 		*inst++ = rex;
56 	*inst++ = opcode;
57 	sljit_unaligned_store_s32(inst, (sljit_s32)imm);
58 	return SLJIT_SUCCESS;
59 }
60 
emit_x86_instruction(struct sljit_compiler * compiler,sljit_uw size,sljit_s32 a,sljit_sw imma,sljit_s32 b,sljit_sw immb)61 static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw size,
62 	/* The register or immediate operand. */
63 	sljit_s32 a, sljit_sw imma,
64 	/* The general operand (not immediate). */
65 	sljit_s32 b, sljit_sw immb)
66 {
67 	sljit_u8 *inst;
68 	sljit_u8 *buf_ptr;
69 	sljit_u8 rex = 0;
70 	sljit_u8 reg_lmap_b;
71 	sljit_uw flags = size;
72 	sljit_uw inst_size;
73 
74 	/* The immediate operand must be 32 bit. */
75 	SLJIT_ASSERT(a != SLJIT_IMM || compiler->mode32 || IS_HALFWORD(imma));
76 	/* Both cannot be switched on. */
77 	SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
78 	/* Size flags not allowed for typed instructions. */
79 	SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
80 	/* Both size flags cannot be switched on. */
81 	SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
82 	/* SSE2 and immediate is not possible. */
83 	SLJIT_ASSERT(a != SLJIT_IMM || !(flags & EX86_SSE2));
84 	SLJIT_ASSERT(((flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66))
85 			& ((flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) - 1)) == 0);
86 	SLJIT_ASSERT((flags & (EX86_VEX_EXT | EX86_REX)) != EX86_VEX_EXT);
87 
88 	size &= 0xf;
89 	/* The mod r/m byte is always present. */
90 	inst_size = size + 1;
91 
92 	if (!compiler->mode32 && !(flags & EX86_NO_REXW))
93 		rex |= REX_W;
94 	else if (flags & EX86_REX)
95 		rex |= REX;
96 
97 	if (flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66))
98 		inst_size++;
99 
100 	/* Calculate size of b. */
101 	if (b & SLJIT_MEM) {
102 		if (!(b & OFFS_REG_MASK) && NOT_HALFWORD(immb)) {
103 			PTR_FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immb));
104 			immb = 0;
105 			if (b & REG_MASK)
106 				b |= TO_OFFS_REG(TMP_REG2);
107 			else
108 				b |= TMP_REG2;
109 		}
110 
111 		if (!(b & REG_MASK))
112 			inst_size += 1 + sizeof(sljit_s32); /* SIB byte required to avoid RIP based addressing. */
113 		else {
114 			if (immb != 0 && !(b & OFFS_REG_MASK)) {
115 				/* Immediate operand. */
116 				if (immb <= 127 && immb >= -128)
117 					inst_size += sizeof(sljit_s8);
118 				else
119 					inst_size += sizeof(sljit_s32);
120 			} else if (reg_lmap[b & REG_MASK] == 5) {
121 				/* Swap registers if possible. */
122 				if ((b & OFFS_REG_MASK) && (immb & 0x3) == 0 && reg_lmap[OFFS_REG(b)] != 5)
123 					b = SLJIT_MEM | OFFS_REG(b) | TO_OFFS_REG(b & REG_MASK);
124 				else
125 					inst_size += sizeof(sljit_s8);
126 			}
127 
128 			if (reg_map[b & REG_MASK] >= 8)
129 				rex |= REX_B;
130 
131 			if (reg_lmap[b & REG_MASK] == 4 && !(b & OFFS_REG_MASK))
132 				b |= TO_OFFS_REG(SLJIT_SP);
133 
134 			if (b & OFFS_REG_MASK) {
135 				inst_size += 1; /* SIB byte. */
136 				if (reg_map[OFFS_REG(b)] >= 8)
137 					rex |= REX_X;
138 			}
139 		}
140 	} else if (!(flags & EX86_SSE2_OP2)) {
141 		if (reg_map[b] >= 8)
142 			rex |= REX_B;
143 	} else if (freg_map[b] >= 8)
144 		rex |= REX_B;
145 
146 	if ((flags & EX86_VEX_EXT) && (rex & 0x3)) {
147 		SLJIT_ASSERT(size == 2);
148 		size++;
149 		inst_size++;
150 	}
151 
152 	if (a == SLJIT_IMM) {
153 		if (flags & EX86_BIN_INS) {
154 			if (imma <= 127 && imma >= -128) {
155 				inst_size += 1;
156 				flags |= EX86_BYTE_ARG;
157 			} else
158 				inst_size += 4;
159 		} else if (flags & EX86_SHIFT_INS) {
160 			SLJIT_ASSERT(imma <= (compiler->mode32 ? 0x1f : 0x3f));
161 			if (imma != 1) {
162 				inst_size++;
163 				flags |= EX86_BYTE_ARG;
164 			}
165 		} else if (flags & EX86_BYTE_ARG)
166 			inst_size++;
167 		else if (flags & EX86_HALF_ARG)
168 			inst_size += sizeof(short);
169 		else
170 			inst_size += sizeof(sljit_s32);
171 	} else {
172 		SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
173 		/* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */
174 		if (!(flags & EX86_SSE2_OP1)) {
175 			if (reg_map[a] >= 8)
176 				rex |= REX_R;
177 		}
178 		else if (freg_map[a] >= 8)
179 			rex |= REX_R;
180 	}
181 
182 	if (rex)
183 		inst_size++;
184 
185 	inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size);
186 	PTR_FAIL_IF(!inst);
187 
188 	/* Encoding prefixes. */
189 	INC_SIZE(inst_size);
190 	if (flags & EX86_PREF_F2)
191 		*inst++ = 0xf2;
192 	else if (flags & EX86_PREF_F3)
193 		*inst++ = 0xf3;
194 	else if (flags & EX86_PREF_66)
195 		*inst++ = 0x66;
196 
197 	/* Rex is always the last prefix. */
198 	if (rex)
199 		*inst++ = rex;
200 
201 	buf_ptr = inst + size;
202 
203 	/* Encode mod/rm byte. */
204 	if (!(flags & EX86_SHIFT_INS)) {
205 		if ((flags & EX86_BIN_INS) && a == SLJIT_IMM)
206 			*inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
207 
208 		if (a == SLJIT_IMM)
209 			*buf_ptr = 0;
210 		else if (!(flags & EX86_SSE2_OP1))
211 			*buf_ptr = U8(reg_lmap[a] << 3);
212 		else
213 			*buf_ptr = U8(freg_lmap[a] << 3);
214 	} else {
215 		if (a == SLJIT_IMM) {
216 			if (imma == 1)
217 				*inst = GROUP_SHIFT_1;
218 			else
219 				*inst = GROUP_SHIFT_N;
220 		} else
221 			*inst = GROUP_SHIFT_CL;
222 		*buf_ptr = 0;
223 	}
224 
225 	if (!(b & SLJIT_MEM)) {
226 		*buf_ptr = U8(*buf_ptr | MOD_REG | (!(flags & EX86_SSE2_OP2) ? reg_lmap[b] : freg_lmap[b]));
227 		buf_ptr++;
228 	} else if (b & REG_MASK) {
229 		reg_lmap_b = reg_lmap[b & REG_MASK];
230 
231 		if (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) {
232 			if (immb != 0 || reg_lmap_b == 5) {
233 				if (immb <= 127 && immb >= -128)
234 					*buf_ptr |= 0x40;
235 				else
236 					*buf_ptr |= 0x80;
237 			}
238 
239 			if (!(b & OFFS_REG_MASK))
240 				*buf_ptr++ |= reg_lmap_b;
241 			else {
242 				buf_ptr[0] |= 0x04;
243 				buf_ptr[1] = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3));
244 				buf_ptr += 2;
245 			}
246 
247 			if (immb != 0 || reg_lmap_b == 5) {
248 				if (immb <= 127 && immb >= -128)
249 					*buf_ptr++ = U8(immb); /* 8 bit displacement. */
250 				else {
251 					sljit_unaligned_store_s32(buf_ptr, (sljit_s32)immb); /* 32 bit displacement. */
252 					buf_ptr += sizeof(sljit_s32);
253 				}
254 			}
255 		} else {
256 			if (reg_lmap_b == 5)
257 				*buf_ptr |= 0x40;
258 
259 			buf_ptr[0] |= 0x04;
260 			buf_ptr[1] = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6));
261 			buf_ptr += 2;
262 
263 			if (reg_lmap_b == 5)
264 				*buf_ptr++ = 0;
265 		}
266 	} else {
267 		buf_ptr[0] |= 0x04;
268 		buf_ptr[1] = 0x25;
269 		buf_ptr += 2;
270 		sljit_unaligned_store_s32(buf_ptr, (sljit_s32)immb); /* 32 bit displacement. */
271 		buf_ptr += sizeof(sljit_s32);
272 	}
273 
274 	if (a == SLJIT_IMM) {
275 		if (flags & EX86_BYTE_ARG)
276 			*buf_ptr = U8(imma);
277 		else if (flags & EX86_HALF_ARG)
278 			sljit_unaligned_store_s16(buf_ptr, (sljit_s16)imma);
279 		else if (!(flags & EX86_SHIFT_INS))
280 			sljit_unaligned_store_s32(buf_ptr, (sljit_s32)imma);
281 	}
282 
283 	return inst;
284 }
285 
emit_vex_instruction(struct sljit_compiler * compiler,sljit_uw op,sljit_s32 a,sljit_s32 v,sljit_s32 b,sljit_sw immb)286 static sljit_s32 emit_vex_instruction(struct sljit_compiler *compiler, sljit_uw op,
287 	/* The first and second register operand. */
288 	sljit_s32 a, sljit_s32 v,
289 	/* The general operand (not immediate). */
290 	sljit_s32 b, sljit_sw immb)
291 {
292 	sljit_u8 *inst;
293 	sljit_u8 vex = 0;
294 	sljit_u8 vex_m = 0;
295 	sljit_uw size;
296 
297 	SLJIT_ASSERT(((op & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66))
298 			& ((op & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) - 1)) == 0);
299 
300 	op |= EX86_REX;
301 
302 	if (op & VEX_OP_0F38)
303 		vex_m = 0x2;
304 	else if (op & VEX_OP_0F3A)
305 		vex_m = 0x3;
306 
307 	if ((op & VEX_W) || ((op & VEX_AUTO_W) && !compiler->mode32)) {
308 		if (vex_m == 0)
309 			vex_m = 0x1;
310 
311 		vex |= 0x80;
312 	}
313 
314 	if (op & EX86_PREF_66)
315 		vex |= 0x1;
316 	else if (op & EX86_PREF_F2)
317 		vex |= 0x3;
318 	else if (op & EX86_PREF_F3)
319 		vex |= 0x2;
320 
321 	op &= ~(EX86_PREF_66 | EX86_PREF_F2 | EX86_PREF_F3);
322 
323 	if (op & VEX_256)
324 		vex |= 0x4;
325 
326 	vex = U8(vex | ((((op & VEX_SSE2_OPV) ? freg_map[v] : reg_map[v]) ^ 0xf) << 3));
327 
328 	size = op & ~(sljit_uw)0xff;
329 	size |= (vex_m == 0) ? (EX86_VEX_EXT | 2) : 3;
330 
331 	inst = emit_x86_instruction(compiler, size, a, 0, b, immb);
332 	FAIL_IF(!inst);
333 
334 	SLJIT_ASSERT((inst[-1] & 0xf0) == REX);
335 
336 	/* If X or B is present in REX prefix. */
337 	if (vex_m == 0 && inst[-1] & 0x3)
338 		vex_m = 0x1;
339 
340 	if (vex_m == 0) {
341 		vex |= U8(((inst[-1] >> 2) ^ 0x1) << 7);
342 
343 		inst[-1] = 0xc5;
344 		inst[0] = vex;
345 		inst[1] = U8(op);
346 		return SLJIT_SUCCESS;
347 	}
348 
349 	vex_m |= U8((inst[-1] ^ 0x7) << 5);
350 	inst[-1] = 0xc4;
351 	inst[0] = vex_m;
352 	inst[1] = vex;
353 	inst[2] = U8(op);
354 	return SLJIT_SUCCESS;
355 }
356 
357 /* --------------------------------------------------------------------- */
358 /*  Enter / return                                                       */
359 /* --------------------------------------------------------------------- */
360 
detect_far_jump_type(struct sljit_jump * jump,sljit_u8 * code_ptr)361 static sljit_u8* detect_far_jump_type(struct sljit_jump *jump, sljit_u8 *code_ptr)
362 {
363 	sljit_uw type = jump->flags >> TYPE_SHIFT;
364 
365 	int short_addr = !(jump->flags & SLJIT_REWRITABLE_JUMP) && (jump->flags & JUMP_ADDR) && (jump->u.target <= 0xffffffff);
366 
367 	/* The relative jump below specialized for this case. */
368 	SLJIT_ASSERT(reg_map[TMP_REG2] >= 8 && TMP_REG2 != SLJIT_TMP_DEST_REG);
369 
370 	if (type < SLJIT_JUMP) {
371 		/* Invert type. */
372 		code_ptr[0] = U8(get_jump_code(type ^ 0x1) - 0x10);
373 		code_ptr[1] = short_addr ? (6 + 3) : (10 + 3);
374 		code_ptr += 2;
375 	}
376 
377 	code_ptr[0] = short_addr ? REX_B : (REX_W | REX_B);
378 	code_ptr[1] = MOV_r_i32 | reg_lmap[TMP_REG2];
379 	code_ptr += 2;
380 	jump->addr = (sljit_uw)code_ptr;
381 
382 	if (!(jump->flags & JUMP_ADDR))
383 		jump->flags |= PATCH_MD;
384 	else if (short_addr)
385 		sljit_unaligned_store_s32(code_ptr, (sljit_s32)jump->u.target);
386 	else
387 		sljit_unaligned_store_sw(code_ptr, (sljit_sw)jump->u.target);
388 
389 	code_ptr += short_addr ? sizeof(sljit_s32) : sizeof(sljit_sw);
390 
391 	code_ptr[0] = REX_B;
392 	code_ptr[1] = GROUP_FF;
393 	code_ptr[2] = U8(MOD_REG | (type >= SLJIT_FAST_CALL ? CALL_rm : JMP_rm) | reg_lmap[TMP_REG2]);
394 
395 	return code_ptr + 3;
396 }
397 
generate_mov_addr_code(struct sljit_jump * jump,sljit_u8 * code_ptr,sljit_u8 * code,sljit_sw executable_offset)398 static sljit_u8* generate_mov_addr_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset)
399 {
400 	sljit_uw addr;
401 	sljit_sw diff;
402 	SLJIT_UNUSED_ARG(executable_offset);
403 
404 	SLJIT_ASSERT(((jump->flags >> JUMP_SIZE_SHIFT) & 0x1f) <= 10);
405 	if (jump->flags & JUMP_ADDR)
406 		addr = jump->u.target;
407 	else
408 		addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + jump->u.label->size;
409 
410 	if (addr > 0xffffffffl) {
411 		diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
412 
413 		if (diff <= HALFWORD_MAX && diff >= HALFWORD_MIN) {
414 			SLJIT_ASSERT(((jump->flags >> JUMP_SIZE_SHIFT) & 0x1f) >= 7);
415 			code_ptr -= SSIZE_OF(s32) - 1;
416 
417 			SLJIT_ASSERT((code_ptr[-3 - SSIZE_OF(s32)] & 0xf8) == REX_W);
418 			SLJIT_ASSERT((code_ptr[-2 - SSIZE_OF(s32)] & 0xf8) == MOV_r_i32);
419 
420 			code_ptr[-3 - SSIZE_OF(s32)] = U8(REX_W | ((code_ptr[-3 - SSIZE_OF(s32)] & 0x1) << 2));
421 			code_ptr[-1 - SSIZE_OF(s32)] = U8(((code_ptr[-2 - SSIZE_OF(s32)] & 0x7) << 3) | 0x5);
422 			code_ptr[-2 - SSIZE_OF(s32)] = LEA_r_m;
423 
424 			jump->flags |= PATCH_MW;
425 			return code_ptr;
426 		}
427 
428 		jump->flags |= PATCH_MD;
429 		return code_ptr;
430 	}
431 
432 	code_ptr -= 2 + sizeof(sljit_uw);
433 
434 	SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);
435 	SLJIT_ASSERT((code_ptr[1] & 0xf8) == MOV_r_i32);
436 
437 	if ((code_ptr[0] & 0x07) != 0) {
438 		SLJIT_ASSERT(((jump->flags >> JUMP_SIZE_SHIFT) & 0x1f) >= 6);
439 		code_ptr[0] = U8(code_ptr[0] & ~0x08);
440 		code_ptr += 2 + sizeof(sljit_s32);
441 	} else {
442 		SLJIT_ASSERT(((jump->flags >> JUMP_SIZE_SHIFT) & 0x1f) >= 5);
443 		code_ptr[0] = code_ptr[1];
444 		code_ptr += 1 + sizeof(sljit_s32);
445 	}
446 
447 	return code_ptr;
448 }
449 
450 #ifdef _WIN64
451 typedef struct {
452 	sljit_sw regs[2];
453 } sljit_sse2_reg;
454 #endif /* _WIN64 */
455 
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)456 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
457 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
458 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
459 {
460 	sljit_uw size;
461 	sljit_s32 word_arg_count = 0;
462 	sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
463 	sljit_s32 saved_regs_size, tmp, i;
464 #ifdef _WIN64
465 	sljit_s32 saved_float_regs_size;
466 	sljit_s32 saved_float_regs_offset = 0;
467 	sljit_s32 float_arg_count = 0;
468 #endif /* _WIN64 */
469 	sljit_u8 *inst;
470 
471 	CHECK_ERROR();
472 	CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
473 	set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
474 
475 	if (options & SLJIT_ENTER_REG_ARG)
476 		arg_types = 0;
477 
478 	/* Emit ENDBR64 at function entry if needed.  */
479 	FAIL_IF(emit_endbranch(compiler));
480 
481 	compiler->mode32 = 0;
482 
483 	/* Including the return address saved by the call instruction. */
484 	saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1);
485 
486 	tmp = SLJIT_S0 - saveds;
487 	for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) {
488 		size = reg_map[i] >= 8 ? 2 : 1;
489 		inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
490 		FAIL_IF(!inst);
491 		INC_SIZE(size);
492 		if (reg_map[i] >= 8)
493 			*inst++ = REX_B;
494 		PUSH_REG(reg_lmap[i]);
495 	}
496 
497 	for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
498 		size = reg_map[i] >= 8 ? 2 : 1;
499 		inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
500 		FAIL_IF(!inst);
501 		INC_SIZE(size);
502 		if (reg_map[i] >= 8)
503 			*inst++ = REX_B;
504 		PUSH_REG(reg_lmap[i]);
505 	}
506 
507 #ifdef _WIN64
508 	local_size += SLJIT_LOCALS_OFFSET;
509 	saved_float_regs_size = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sse2_reg);
510 
511 	if (saved_float_regs_size > 0) {
512 		saved_float_regs_offset = ((local_size + 0xf) & ~0xf);
513 		local_size = saved_float_regs_offset + saved_float_regs_size;
514 	}
515 #else /* !_WIN64 */
516 	SLJIT_ASSERT(SLJIT_LOCALS_OFFSET == 0);
517 #endif /* _WIN64 */
518 
519 	arg_types >>= SLJIT_ARG_SHIFT;
520 
521 	while (arg_types > 0) {
522 		if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
523 			tmp = 0;
524 #ifndef _WIN64
525 			switch (word_arg_count) {
526 			case 0:
527 				tmp = SLJIT_R2;
528 				break;
529 			case 1:
530 				tmp = SLJIT_R1;
531 				break;
532 			case 2:
533 				tmp = TMP_REG1;
534 				break;
535 			default:
536 				tmp = SLJIT_R3;
537 				break;
538 			}
539 #else /* !_WIN64 */
540 			switch (word_arg_count + float_arg_count) {
541 			case 0:
542 				tmp = SLJIT_R3;
543 				break;
544 			case 1:
545 				tmp = SLJIT_R1;
546 				break;
547 			case 2:
548 				tmp = SLJIT_R2;
549 				break;
550 			default:
551 				tmp = TMP_REG1;
552 				break;
553 			}
554 #endif /* _WIN64 */
555 			if (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) {
556 				if (tmp != SLJIT_R0 + word_arg_count)
557 					EMIT_MOV(compiler, SLJIT_R0 + word_arg_count, 0, tmp, 0);
558 			} else {
559 				EMIT_MOV(compiler, SLJIT_S0 - saved_arg_count, 0, tmp, 0);
560 				saved_arg_count++;
561 			}
562 			word_arg_count++;
563 		} else {
564 #ifdef _WIN64
565 			SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start);
566 			float_arg_count++;
567 			if (float_arg_count != float_arg_count + word_arg_count)
568 				FAIL_IF(emit_sse2_load(compiler, (arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32,
569 					float_arg_count, float_arg_count + word_arg_count, 0));
570 #endif /* _WIN64 */
571 		}
572 		arg_types >>= SLJIT_ARG_SHIFT;
573 	}
574 
575 	local_size = ((local_size + saved_regs_size + 0xf) & ~0xf) - saved_regs_size;
576 	compiler->local_size = local_size;
577 
578 #ifdef _WIN64
579 	if (local_size > 0) {
580 		if (local_size <= 4 * 4096) {
581 			if (local_size > 4096)
582 				EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096);
583 			if (local_size > 2 * 4096)
584 				EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2);
585 			if (local_size > 3 * 4096)
586 				EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);
587 		}
588 		else {
589 			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, local_size >> 12);
590 
591 			EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_MEM1(SLJIT_SP), -4096);
592 			BINARY_IMM32(SUB, 4096, SLJIT_SP, 0);
593 			BINARY_IMM32(SUB, 1, TMP_REG1, 0);
594 
595 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
596 			FAIL_IF(!inst);
597 
598 			INC_SIZE(2);
599 			inst[0] = JNE_i8;
600 			inst[1] = (sljit_u8)-21;
601 			local_size &= 0xfff;
602 		}
603 
604 		if (local_size > 0)
605 			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size);
606 	}
607 #endif /* _WIN64 */
608 
609 	if (local_size > 0)
610 		BINARY_IMM32(SUB, local_size, SLJIT_SP, 0);
611 
612 #ifdef _WIN64
613 	if (saved_float_regs_size > 0) {
614 		compiler->mode32 = 1;
615 
616 		tmp = SLJIT_FS0 - fsaveds;
617 		for (i = SLJIT_FS0; i > tmp; i--) {
618 			FAIL_IF(emit_groupf(compiler, MOVAPS_xm_x | EX86_SSE2, i, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset));
619 			saved_float_regs_offset += 16;
620 		}
621 
622 		for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
623 			FAIL_IF(emit_groupf(compiler, MOVAPS_xm_x | EX86_SSE2, i, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset));
624 			saved_float_regs_offset += 16;
625 		}
626 	}
627 #endif /* _WIN64 */
628 
629 	return SLJIT_SUCCESS;
630 }
631 
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)632 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
633 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
634 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
635 {
636 	sljit_s32 saved_regs_size;
637 #ifdef _WIN64
638 	sljit_s32 saved_float_regs_size;
639 #endif /* _WIN64 */
640 
641 	CHECK_ERROR();
642 	CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
643 	set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
644 
645 #ifdef _WIN64
646 	local_size += SLJIT_LOCALS_OFFSET;
647 	saved_float_regs_size = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sse2_reg);
648 
649 	if (saved_float_regs_size > 0)
650 		local_size = ((local_size + 0xf) & ~0xf) + saved_float_regs_size;
651 #else /* !_WIN64 */
652 	SLJIT_ASSERT(SLJIT_LOCALS_OFFSET == 0);
653 #endif /* _WIN64 */
654 
655 	/* Including the return address saved by the call instruction. */
656 	saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1);
657 	compiler->local_size = ((local_size + saved_regs_size + 0xf) & ~0xf) - saved_regs_size;
658 	return SLJIT_SUCCESS;
659 }
660 
emit_stack_frame_release(struct sljit_compiler * compiler,sljit_s32 is_return_to)661 static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to)
662 {
663 	sljit_uw size;
664 	sljit_s32 local_size, i, tmp;
665 	sljit_u8 *inst;
666 #ifdef _WIN64
667 	sljit_s32 saved_float_regs_offset;
668 	sljit_s32 fscratches = compiler->fscratches;
669 	sljit_s32 fsaveds = compiler->fsaveds;
670 #endif /* _WIN64 */
671 
672 #ifdef _WIN64
673 	saved_float_regs_offset = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sse2_reg);
674 
675 	if (saved_float_regs_offset > 0) {
676 		compiler->mode32 = 1;
677 		saved_float_regs_offset = (compiler->local_size - saved_float_regs_offset) & ~0xf;
678 
679 		tmp = SLJIT_FS0 - fsaveds;
680 		for (i = SLJIT_FS0; i > tmp; i--) {
681 			FAIL_IF(emit_groupf(compiler, MOVAPS_x_xm | EX86_SSE2, i, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset));
682 			saved_float_regs_offset += 16;
683 		}
684 
685 		for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
686 			FAIL_IF(emit_groupf(compiler, MOVAPS_x_xm | EX86_SSE2, i, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset));
687 			saved_float_regs_offset += 16;
688 		}
689 
690 		compiler->mode32 = 0;
691 	}
692 #endif /* _WIN64 */
693 
694 	local_size = compiler->local_size;
695 
696 	if (is_return_to && compiler->scratches < SLJIT_FIRST_SAVED_REG && (compiler->saveds == SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
697 		local_size += SSIZE_OF(sw);
698 		is_return_to = 0;
699 	}
700 
701 	if (local_size > 0)
702 		BINARY_IMM32(ADD, local_size, SLJIT_SP, 0);
703 
704 	tmp = compiler->scratches;
705 	for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) {
706 		size = reg_map[i] >= 8 ? 2 : 1;
707 		inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
708 		FAIL_IF(!inst);
709 		INC_SIZE(size);
710 		if (reg_map[i] >= 8)
711 			*inst++ = REX_B;
712 		POP_REG(reg_lmap[i]);
713 	}
714 
715 	tmp = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
716 	for (i = SLJIT_S0 + 1 - compiler->saveds; i <= tmp; i++) {
717 		size = reg_map[i] >= 8 ? 2 : 1;
718 		inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
719 		FAIL_IF(!inst);
720 		INC_SIZE(size);
721 		if (reg_map[i] >= 8)
722 			*inst++ = REX_B;
723 		POP_REG(reg_lmap[i]);
724 	}
725 
726 	if (is_return_to)
727 		BINARY_IMM32(ADD, sizeof(sljit_sw), SLJIT_SP, 0);
728 
729 	return SLJIT_SUCCESS;
730 }
731 
sljit_emit_return_void(struct sljit_compiler * compiler)732 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
733 {
734 	CHECK_ERROR();
735 	CHECK(check_sljit_emit_return_void(compiler));
736 
737 	compiler->mode32 = 0;
738 
739 	FAIL_IF(emit_stack_frame_release(compiler, 0));
740 	return emit_byte(compiler, RET_near);
741 }
742 
sljit_emit_return_to(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)743 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
744 	sljit_s32 src, sljit_sw srcw)
745 {
746 	CHECK_ERROR();
747 	CHECK(check_sljit_emit_return_to(compiler, src, srcw));
748 
749 	compiler->mode32 = 0;
750 
751 	if ((src & SLJIT_MEM) || (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options)))) {
752 		ADJUST_LOCAL_OFFSET(src, srcw);
753 
754 		EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);
755 		src = TMP_REG2;
756 		srcw = 0;
757 	}
758 
759 	FAIL_IF(emit_stack_frame_release(compiler, 1));
760 
761 	SLJIT_SKIP_CHECKS(compiler);
762 	return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
763 }
764 
765 /* --------------------------------------------------------------------- */
766 /*  Call / return instructions                                           */
767 /* --------------------------------------------------------------------- */
768 
769 #ifndef _WIN64
770 
call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types,sljit_s32 * src_ptr)771 static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr)
772 {
773 	sljit_s32 src = src_ptr ? (*src_ptr) : 0;
774 	sljit_s32 word_arg_count = 0;
775 
776 	SLJIT_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R3] == 1 && reg_map[TMP_REG1] == 2);
777 	SLJIT_ASSERT(!(src & SLJIT_MEM));
778 
779 	/* Remove return value. */
780 	arg_types >>= SLJIT_ARG_SHIFT;
781 
782 	while (arg_types) {
783 		if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64)
784 			word_arg_count++;
785 		arg_types >>= SLJIT_ARG_SHIFT;
786 	}
787 
788 	if (word_arg_count == 0)
789 		return SLJIT_SUCCESS;
790 
791 	if (word_arg_count >= 3) {
792 		if (src == SLJIT_R2)
793 			*src_ptr = TMP_REG1;
794 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R2, 0);
795 	}
796 
797 	return emit_mov(compiler, SLJIT_R2, 0, SLJIT_R0, 0);
798 }
799 
800 #else
801 
call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types,sljit_s32 * src_ptr)802 static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr)
803 {
804 	sljit_s32 src = src_ptr ? (*src_ptr) : 0;
805 	sljit_s32 arg_count = 0;
806 	sljit_s32 word_arg_count = 0;
807 	sljit_s32 float_arg_count = 0;
808 	sljit_s32 types = 0;
809 	sljit_s32 data_trandfer = 0;
810 	static sljit_u8 word_arg_regs[5] = { 0, SLJIT_R3, SLJIT_R1, SLJIT_R2, TMP_REG1 };
811 
812 	SLJIT_ASSERT(reg_map[SLJIT_R3] == 1 && reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R2] == 8 && reg_map[TMP_REG1] == 9);
813 	SLJIT_ASSERT(!(src & SLJIT_MEM));
814 
815 	arg_types >>= SLJIT_ARG_SHIFT;
816 
817 	while (arg_types) {
818 		types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK);
819 
820 		switch (arg_types & SLJIT_ARG_MASK) {
821 		case SLJIT_ARG_TYPE_F64:
822 		case SLJIT_ARG_TYPE_F32:
823 			arg_count++;
824 			float_arg_count++;
825 
826 			if (arg_count != float_arg_count)
827 				data_trandfer = 1;
828 			break;
829 		default:
830 			arg_count++;
831 			word_arg_count++;
832 
833 			if (arg_count != word_arg_count || arg_count != word_arg_regs[arg_count]) {
834 				data_trandfer = 1;
835 
836 				if (src == word_arg_regs[arg_count]) {
837 					EMIT_MOV(compiler, TMP_REG2, 0, src, 0);
838 					*src_ptr = TMP_REG2;
839 				}
840 			}
841 			break;
842 		}
843 
844 		arg_types >>= SLJIT_ARG_SHIFT;
845 	}
846 
847 	if (!data_trandfer)
848 		return SLJIT_SUCCESS;
849 
850 	while (types) {
851 		switch (types & SLJIT_ARG_MASK) {
852 		case SLJIT_ARG_TYPE_F64:
853 			if (arg_count != float_arg_count)
854 				FAIL_IF(emit_sse2_load(compiler, 0, arg_count, float_arg_count, 0));
855 			arg_count--;
856 			float_arg_count--;
857 			break;
858 		case SLJIT_ARG_TYPE_F32:
859 			if (arg_count != float_arg_count)
860 				FAIL_IF(emit_sse2_load(compiler, 1, arg_count, float_arg_count, 0));
861 			arg_count--;
862 			float_arg_count--;
863 			break;
864 		default:
865 			if (arg_count != word_arg_count || arg_count != word_arg_regs[arg_count])
866 				EMIT_MOV(compiler, word_arg_regs[arg_count], 0, word_arg_count, 0);
867 			arg_count--;
868 			word_arg_count--;
869 			break;
870 		}
871 
872 		types >>= SLJIT_ARG_SHIFT;
873 	}
874 
875 	return SLJIT_SUCCESS;
876 }
877 
878 #endif
879 
sljit_emit_call(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types)880 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
881 	sljit_s32 arg_types)
882 {
883 	CHECK_ERROR_PTR();
884 	CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
885 
886 	compiler->mode32 = 0;
887 
888 	if ((type & 0xff) != SLJIT_CALL_REG_ARG)
889 		PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL));
890 
891 	if (type & SLJIT_CALL_RETURN) {
892 		PTR_FAIL_IF(emit_stack_frame_release(compiler, 0));
893 		type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
894 	}
895 
896 	SLJIT_SKIP_CHECKS(compiler);
897 	return sljit_emit_jump(compiler, type);
898 }
899 
sljit_emit_icall(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)900 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
901 	sljit_s32 arg_types,
902 	sljit_s32 src, sljit_sw srcw)
903 {
904 	CHECK_ERROR();
905 	CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
906 
907 	compiler->mode32 = 0;
908 
909 	if (src & SLJIT_MEM) {
910 		ADJUST_LOCAL_OFFSET(src, srcw);
911 		EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);
912 		src = TMP_REG2;
913 	}
914 
915 	if (type & SLJIT_CALL_RETURN) {
916 		if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
917 			EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);
918 			src = TMP_REG2;
919 		}
920 
921 		FAIL_IF(emit_stack_frame_release(compiler, 0));
922 	}
923 
924 	if ((type & 0xff) != SLJIT_CALL_REG_ARG)
925 		FAIL_IF(call_with_args(compiler, arg_types, &src));
926 
927 	if (type & SLJIT_CALL_RETURN)
928 		type = SLJIT_JUMP;
929 
930 	SLJIT_SKIP_CHECKS(compiler);
931 	return sljit_emit_ijump(compiler, type, src, srcw);
932 }
933 
emit_fast_enter(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)934 static sljit_s32 emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
935 {
936 	sljit_u8 *inst;
937 
938 	if (FAST_IS_REG(dst)) {
939 		if (reg_map[dst] < 8)
940 			return emit_byte(compiler, U8(POP_r + reg_lmap[dst]));
941 
942 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
943 		FAIL_IF(!inst);
944 		INC_SIZE(2);
945 		*inst++ = REX_B;
946 		POP_REG(reg_lmap[dst]);
947 		return SLJIT_SUCCESS;
948 	}
949 
950 	/* REX_W is not necessary (src is not immediate). */
951 	compiler->mode32 = 1;
952 	inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
953 	FAIL_IF(!inst);
954 	*inst = POP_rm;
955 	return SLJIT_SUCCESS;
956 }
957 
emit_fast_return(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)958 static sljit_s32 emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
959 {
960 	sljit_u8 *inst;
961 
962 	if (FAST_IS_REG(src)) {
963 		if (reg_map[src] < 8) {
964 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1);
965 			FAIL_IF(!inst);
966 
967 			INC_SIZE(1 + 1);
968 			PUSH_REG(reg_lmap[src]);
969 		}
970 		else {
971 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 1);
972 			FAIL_IF(!inst);
973 
974 			INC_SIZE(2 + 1);
975 			*inst++ = REX_B;
976 			PUSH_REG(reg_lmap[src]);
977 		}
978 	}
979 	else {
980 		/* REX_W is not necessary (src is not immediate). */
981 		compiler->mode32 = 1;
982 		inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
983 		FAIL_IF(!inst);
984 		inst[0] = GROUP_FF;
985 		inst[1] |= PUSH_rm;
986 
987 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
988 		FAIL_IF(!inst);
989 		INC_SIZE(1);
990 	}
991 
992 	RET();
993 	return SLJIT_SUCCESS;
994 }
995 
sljit_emit_get_return_address(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)996 static sljit_s32 sljit_emit_get_return_address(struct sljit_compiler *compiler,
997 	sljit_s32 dst, sljit_sw dstw)
998 {
999 	sljit_s32 saved_regs_size;
1000 
1001 	compiler->mode32 = 0;
1002 	saved_regs_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds - SLJIT_KEPT_SAVEDS_COUNT(compiler->options), 0);
1003 	return emit_mov(compiler, dst, dstw, SLJIT_MEM1(SLJIT_SP), compiler->local_size + saved_regs_size);
1004 }
1005 
1006 /* --------------------------------------------------------------------- */
1007 /*  Other operations                                                     */
1008 /* --------------------------------------------------------------------- */
1009 
sljit_emit_select(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_reg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2_reg)1010 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
1011 	sljit_s32 dst_reg,
1012 	sljit_s32 src1, sljit_sw src1w,
1013 	sljit_s32 src2_reg)
1014 {
1015 	CHECK_ERROR();
1016 	CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
1017 
1018 	ADJUST_LOCAL_OFFSET(src1, src1w);
1019 
1020 	compiler->mode32 = type & SLJIT_32;
1021 	type &= ~SLJIT_32;
1022 
1023 	if (dst_reg != src2_reg) {
1024 		if (dst_reg == src1) {
1025 			src1 = src2_reg;
1026 			src1w = 0;
1027 			type ^= 0x1;
1028 		} else if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) {
1029 			EMIT_MOV(compiler, dst_reg, 0, src1, src1w);
1030 			src1 = src2_reg;
1031 			src1w = 0;
1032 			type ^= 0x1;
1033 		} else
1034 			EMIT_MOV(compiler, dst_reg, 0, src2_reg, 0);
1035 	}
1036 
1037 	if (sljit_has_cpu_feature(SLJIT_HAS_CMOV)) {
1038 		if (SLJIT_UNLIKELY(src1 == SLJIT_IMM)) {
1039 			EMIT_MOV(compiler, TMP_REG2, 0, src1, src1w);
1040 			src1 = TMP_REG2;
1041 			src1w = 0;
1042 		}
1043 
1044 		return emit_groupf(compiler, U8(get_jump_code((sljit_uw)type) - 0x40), dst_reg, src1, src1w);
1045 	}
1046 
1047 	return emit_cmov_generic(compiler, type, dst_reg, src1, src1w);
1048 }
1049 
sljit_emit_mem(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)1050 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
1051 	sljit_s32 reg,
1052 	sljit_s32 mem, sljit_sw memw)
1053 {
1054 	sljit_u8* inst;
1055 	sljit_s32 i, next, reg_idx;
1056 	sljit_u8 regs[2];
1057 
1058 	CHECK_ERROR();
1059 	CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
1060 
1061 	if (!(reg & REG_PAIR_MASK))
1062 		return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
1063 
1064 	ADJUST_LOCAL_OFFSET(mem, memw);
1065 
1066 	compiler->mode32 = 0;
1067 
1068 	if ((mem & REG_MASK) == 0) {
1069 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, memw);
1070 
1071 		mem = SLJIT_MEM1(TMP_REG1);
1072 		memw = 0;
1073 	} else if (!(mem & OFFS_REG_MASK) && ((memw < HALFWORD_MIN) || (memw > HALFWORD_MAX - SSIZE_OF(sw)))) {
1074 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, memw);
1075 
1076 		mem = SLJIT_MEM2(mem & REG_MASK, TMP_REG1);
1077 		memw = 0;
1078 	}
1079 
1080 	regs[0] = U8(REG_PAIR_FIRST(reg));
1081 	regs[1] = U8(REG_PAIR_SECOND(reg));
1082 
1083 	next = SSIZE_OF(sw);
1084 
1085 	if (!(type & SLJIT_MEM_STORE) && (regs[0] == (mem & REG_MASK) || regs[0] == OFFS_REG(mem))) {
1086 		if (regs[1] == (mem & REG_MASK) || regs[1] == OFFS_REG(mem)) {
1087 			/* Base and offset cannot be TMP_REG1. */
1088 			EMIT_MOV(compiler, TMP_REG1, 0, OFFS_REG(mem), 0);
1089 
1090 			if (regs[1] == OFFS_REG(mem))
1091 				next = -SSIZE_OF(sw);
1092 
1093 			mem = (mem & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG1);
1094 		} else {
1095 			next = -SSIZE_OF(sw);
1096 
1097 			if (!(mem & OFFS_REG_MASK))
1098 				memw += SSIZE_OF(sw);
1099 		}
1100 	}
1101 
1102 	for (i = 0; i < 2; i++) {
1103 		reg_idx = next > 0 ? i : (i ^ 0x1);
1104 		reg = regs[reg_idx];
1105 
1106 		if ((mem & OFFS_REG_MASK) && (reg_idx == 1)) {
1107 			inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(1 + 5));
1108 			FAIL_IF(!inst);
1109 
1110 			INC_SIZE(5);
1111 
1112 			inst[0] = U8(REX_W | ((reg_map[reg] >= 8) ? REX_R : 0) | ((reg_map[mem & REG_MASK] >= 8) ? REX_B : 0) | ((reg_map[OFFS_REG(mem)] >= 8) ? REX_X : 0));
1113 			inst[1] = (type & SLJIT_MEM_STORE) ? MOV_rm_r : MOV_r_rm;
1114 			inst[2] = 0x44 | U8(reg_lmap[reg] << 3);
1115 			inst[3] = U8(memw << 6) | U8(reg_lmap[OFFS_REG(mem)] << 3) | reg_lmap[mem & REG_MASK];
1116 			inst[4] = sizeof(sljit_sw);
1117 		} else if (type & SLJIT_MEM_STORE) {
1118 			EMIT_MOV(compiler, mem, memw, reg, 0);
1119 		} else {
1120 			EMIT_MOV(compiler, reg, 0, mem, memw);
1121 		}
1122 
1123 		if (!(mem & OFFS_REG_MASK))
1124 			memw += next;
1125 	}
1126 
1127 	return SLJIT_SUCCESS;
1128 }
1129 
emit_mov_int(struct sljit_compiler * compiler,sljit_s32 sign,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1130 static sljit_s32 emit_mov_int(struct sljit_compiler *compiler, sljit_s32 sign,
1131 	sljit_s32 dst, sljit_sw dstw,
1132 	sljit_s32 src, sljit_sw srcw)
1133 {
1134 	sljit_u8* inst;
1135 	sljit_s32 dst_r;
1136 
1137 	compiler->mode32 = 0;
1138 
1139 	if (src == SLJIT_IMM) {
1140 		if (FAST_IS_REG(dst)) {
1141 			if (!sign || ((sljit_u32)srcw <= 0x7fffffff))
1142 				return emit_do_imm32(compiler, reg_map[dst] <= 7 ? 0 : REX_B, U8(MOV_r_i32 | reg_lmap[dst]), srcw);
1143 
1144 			inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw);
1145 			FAIL_IF(!inst);
1146 			*inst = MOV_rm_i32;
1147 			return SLJIT_SUCCESS;
1148 		}
1149 		compiler->mode32 = 1;
1150 		inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw);
1151 		FAIL_IF(!inst);
1152 		*inst = MOV_rm_i32;
1153 		compiler->mode32 = 0;
1154 		return SLJIT_SUCCESS;
1155 	}
1156 
1157 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1158 
1159 	if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
1160 		dst_r = src;
1161 	else {
1162 		if (sign) {
1163 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw);
1164 			FAIL_IF(!inst);
1165 			*inst = MOVSXD_r_rm;
1166 		} else {
1167 			compiler->mode32 = 1;
1168 			EMIT_MOV(compiler, dst_r, 0, src, srcw);
1169 			compiler->mode32 = 0;
1170 		}
1171 	}
1172 
1173 	if (dst & SLJIT_MEM) {
1174 		compiler->mode32 = 1;
1175 		inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1176 		FAIL_IF(!inst);
1177 		*inst = MOV_rm_r;
1178 		compiler->mode32 = 0;
1179 	}
1180 
1181 	return SLJIT_SUCCESS;
1182 }
1183 
sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1184 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
1185 	sljit_s32 dst, sljit_sw dstw,
1186 	sljit_s32 src, sljit_sw srcw)
1187 {
1188 	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
1189 	sljit_u8 *inst, *jump_inst1, *jump_inst2;
1190 	sljit_uw size1, size2;
1191 
1192 	compiler->mode32 = 0;
1193 
1194 	if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32) {
1195 		if (src != SLJIT_IMM) {
1196 			compiler->mode32 = 1;
1197 			EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1198 			compiler->mode32 = 0;
1199 		} else
1200 			FAIL_IF(emit_do_imm32(compiler, reg_map[TMP_REG1] <= 7 ? 0 : REX_B, U8(MOV_r_i32 | reg_lmap[TMP_REG1]), srcw));
1201 
1202 		FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, TMP_REG1, 0));
1203 
1204 		compiler->mode32 = 1;
1205 
1206 		if (dst_r == TMP_FREG)
1207 			return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
1208 		return SLJIT_SUCCESS;
1209 	}
1210 
1211 	if (!FAST_IS_REG(src)) {
1212 		EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1213 		src = TMP_REG1;
1214 	}
1215 
1216 	BINARY_IMM32(CMP, 0, src, 0);
1217 
1218 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1219 	FAIL_IF(!inst);
1220 	INC_SIZE(2);
1221 	inst[0] = JL_i8;
1222 	jump_inst1 = inst;
1223 
1224 	size1 = compiler->size;
1225 
1226 	compiler->mode32 = 0;
1227 	FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, src, 0));
1228 
1229 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1230 	FAIL_IF(!inst);
1231 	INC_SIZE(2);
1232 	inst[0] = JMP_i8;
1233 	jump_inst2 = inst;
1234 
1235 	size2 = compiler->size;
1236 
1237 	jump_inst1[1] = U8(size2 - size1);
1238 
1239 	if (src != TMP_REG1)
1240 		EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
1241 
1242 	EMIT_MOV(compiler, TMP_REG2, 0, src, 0);
1243 
1244 	inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 1, TMP_REG1, 0);
1245 	FAIL_IF(!inst);
1246 	inst[1] |= SHR;
1247 
1248 	compiler->mode32 = 1;
1249 	BINARY_IMM32(AND, 1, TMP_REG2, 0);
1250 
1251 	compiler->mode32 = 0;
1252 	inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG2, 0);
1253 	FAIL_IF(!inst);
1254 	inst[0] = OR_r_rm;
1255 
1256 	FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, TMP_REG1, 0));
1257 	compiler->mode32 = 1;
1258 	FAIL_IF(emit_groupf(compiler, ADDSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, dst_r, 0));
1259 
1260 	jump_inst2[1] = U8(compiler->size - size2);
1261 
1262 	if (dst_r == TMP_FREG)
1263 		return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
1264 	return SLJIT_SUCCESS;
1265 }
1266 
sljit_emit_fset(struct sljit_compiler * compiler,sljit_s32 freg,sljit_u8 rex,sljit_s32 is_zero)1267 static sljit_s32 sljit_emit_fset(struct sljit_compiler *compiler,
1268 	sljit_s32 freg, sljit_u8 rex, sljit_s32 is_zero)
1269 {
1270 	sljit_u8 *inst;
1271 	sljit_u32 size;
1272 
1273 	if (is_zero) {
1274 		rex = freg_map[freg] >= 8 ? (REX_R | REX_B) : 0;
1275 	} else {
1276 		if (freg_map[freg] >= 8)
1277 			rex |= REX_R;
1278 		if (reg_map[TMP_REG1] >= 8)
1279 			rex |= REX_B;
1280 	}
1281 
1282 	size = (rex != 0) ? 5 : 4;
1283 
1284 	inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
1285 	FAIL_IF(!inst);
1286 	INC_SIZE(size);
1287 
1288 	*inst++ = GROUP_66;
1289 	if (rex != 0)
1290 		*inst++ = rex;
1291 	inst[0] = GROUP_0F;
1292 
1293 	if (is_zero) {
1294 		inst[1] = PXOR_x_xm;
1295 		inst[2] = U8(freg_lmap[freg] | (freg_lmap[freg] << 3) | MOD_REG);
1296 	} else {
1297 		inst[1] = MOVD_x_rm;
1298 		inst[2] = U8(reg_lmap[TMP_REG1] | (freg_lmap[freg] << 3) | MOD_REG);
1299 	}
1300 
1301 	return SLJIT_SUCCESS;
1302 }
1303 
sljit_emit_fset32(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f32 value)1304 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
1305 	sljit_s32 freg, sljit_f32 value)
1306 {
1307 	union {
1308 		sljit_s32 imm;
1309 		sljit_f32 value;
1310 	} u;
1311 
1312 	CHECK_ERROR();
1313 	CHECK(check_sljit_emit_fset32(compiler, freg, value));
1314 
1315 	u.value = value;
1316 
1317 	if (u.imm != 0) {
1318 		compiler->mode32 = 1;
1319 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm);
1320 	}
1321 
1322 	return sljit_emit_fset(compiler, freg, 0, u.imm == 0);
1323 }
1324 
sljit_emit_fset64(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f64 value)1325 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
1326 	sljit_s32 freg, sljit_f64 value)
1327 {
1328 	union {
1329 		sljit_sw imm;
1330 		sljit_f64 value;
1331 	} u;
1332 
1333 	CHECK_ERROR();
1334 	CHECK(check_sljit_emit_fset64(compiler, freg, value));
1335 
1336 	u.value = value;
1337 
1338 	if (u.imm != 0) {
1339 		compiler->mode32 = 0;
1340 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm);
1341 	}
1342 
1343 	return sljit_emit_fset(compiler, freg, REX_W, u.imm == 0);
1344 }
1345 
sljit_emit_fcopy(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 freg,sljit_s32 reg)1346 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
1347 	sljit_s32 freg, sljit_s32 reg)
1348 {
1349 	sljit_u8 *inst;
1350 	sljit_u32 size;
1351 	sljit_u8 rex = 0;
1352 
1353 	CHECK_ERROR();
1354 	CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
1355 
1356 	if (!(op & SLJIT_32))
1357 		rex = REX_W;
1358 
1359 	if (freg_map[freg] >= 8)
1360 		rex |= REX_R;
1361 
1362 	if (reg_map[reg] >= 8)
1363 		rex |= REX_B;
1364 
1365 	size = (rex != 0) ? 5 : 4;
1366 
1367 	inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
1368 	FAIL_IF(!inst);
1369 	INC_SIZE(size);
1370 
1371 	*inst++ = GROUP_66;
1372 	if (rex != 0)
1373 		*inst++ = rex;
1374 	inst[0] = GROUP_0F;
1375 	inst[1] = GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? MOVD_x_rm : MOVD_rm_x;
1376 	inst[2] = U8(reg_lmap[reg] | (freg_lmap[freg] << 3) | MOD_REG);
1377 
1378 	return SLJIT_SUCCESS;
1379 }
1380 
skip_frames_before_return(struct sljit_compiler * compiler)1381 static sljit_s32 skip_frames_before_return(struct sljit_compiler *compiler)
1382 {
1383 	sljit_s32 tmp, size;
1384 
1385 	/* Don't adjust shadow stack if it isn't enabled.  */
1386 	if (!cpu_has_shadow_stack())
1387 		return SLJIT_SUCCESS;
1388 
1389 	size = compiler->local_size;
1390 	tmp = compiler->scratches;
1391 	if (tmp >= SLJIT_FIRST_SAVED_REG)
1392 		size += (tmp - SLJIT_FIRST_SAVED_REG + 1) * SSIZE_OF(sw);
1393 	tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
1394 	if (SLJIT_S0 >= tmp)
1395 		size += (SLJIT_S0 - tmp + 1) * SSIZE_OF(sw);
1396 
1397 	return adjust_shadow_stack(compiler, SLJIT_MEM1(SLJIT_SP), size);
1398 }
1399