1 /*
2  *    Stack-less Just-In-Time compiler
3  *
4  *    Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without modification, are
7  * permitted provided that the following conditions are met:
8  *
9  *   1. Redistributions of source code must retain the above copyright notice, this list of
10  *      conditions and the following disclaimer.
11  *
12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13  *      of conditions and the following disclaimer in the documentation and/or other materials
14  *      provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 /* x86 64-bit arch dependent functions. */
28 
29 /* --------------------------------------------------------------------- */
30 /*  Operators                                                            */
31 /* --------------------------------------------------------------------- */
32 
emit_load_imm64(struct sljit_compiler * compiler,sljit_s32 reg,sljit_sw imm)33 static sljit_s32 emit_load_imm64(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm)
34 {
35 	sljit_u8 *inst;
36 
37 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_sw));
38 	FAIL_IF(!inst);
39 	INC_SIZE(2 + sizeof(sljit_sw));
40 	inst[0] = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B);
41 	inst[1] = U8(MOV_r_i32 | reg_lmap[reg]);
42 	sljit_unaligned_store_sw(inst + 2, imm);
43 	return SLJIT_SUCCESS;
44 }
45 
emit_do_imm32(struct sljit_compiler * compiler,sljit_u8 rex,sljit_u8 opcode,sljit_sw imm)46 static sljit_s32 emit_do_imm32(struct sljit_compiler *compiler, sljit_u8 rex, sljit_u8 opcode, sljit_sw imm)
47 {
48 	sljit_u8 *inst;
49 	sljit_uw length = (rex ? 2 : 1) + sizeof(sljit_s32);
50 
51 	inst = (sljit_u8*)ensure_buf(compiler, 1 + length);
52 	FAIL_IF(!inst);
53 	INC_SIZE(length);
54 	if (rex)
55 		*inst++ = rex;
56 	*inst++ = opcode;
57 	sljit_unaligned_store_s32(inst, (sljit_s32)imm);
58 	return SLJIT_SUCCESS;
59 }
60 
emit_x86_instruction(struct sljit_compiler * compiler,sljit_uw size,sljit_s32 a,sljit_sw imma,sljit_s32 b,sljit_sw immb)61 static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw size,
62 	/* The register or immediate operand. */
63 	sljit_s32 a, sljit_sw imma,
64 	/* The general operand (not immediate). */
65 	sljit_s32 b, sljit_sw immb)
66 {
67 	sljit_u8 *inst;
68 	sljit_u8 *buf_ptr;
69 	sljit_u8 rex = 0;
70 	sljit_u8 reg_lmap_b;
71 	sljit_uw flags = size;
72 	sljit_uw inst_size;
73 
74 	/* The immediate operand must be 32 bit. */
75 	SLJIT_ASSERT(a != SLJIT_IMM || compiler->mode32 || IS_HALFWORD(imma));
76 	/* Both cannot be switched on. */
77 	SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
78 	/* Size flags not allowed for typed instructions. */
79 	SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
80 	/* Both size flags cannot be switched on. */
81 	SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
82 	/* SSE2 and immediate is not possible. */
83 	SLJIT_ASSERT(a != SLJIT_IMM || !(flags & EX86_SSE2));
84 	SLJIT_ASSERT(((flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66))
85 			& ((flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) - 1)) == 0);
86 	SLJIT_ASSERT((flags & (EX86_VEX_EXT | EX86_REX)) != EX86_VEX_EXT);
87 
88 	size &= 0xf;
89 	/* The mod r/m byte is always present. */
90 	inst_size = size + 1;
91 
92 	if (!compiler->mode32 && !(flags & EX86_NO_REXW))
93 		rex |= REX_W;
94 	else if (flags & EX86_REX)
95 		rex |= REX;
96 
97 	if (flags & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66))
98 		inst_size++;
99 
100 	/* Calculate size of b. */
101 	if (b & SLJIT_MEM) {
102 		if (!(b & OFFS_REG_MASK) && NOT_HALFWORD(immb)) {
103 			PTR_FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immb));
104 			immb = 0;
105 			if (b & REG_MASK)
106 				b |= TO_OFFS_REG(TMP_REG2);
107 			else
108 				b |= TMP_REG2;
109 		}
110 
111 		if (!(b & REG_MASK))
112 			inst_size += 1 + sizeof(sljit_s32); /* SIB byte required to avoid RIP based addressing. */
113 		else {
114 			if (immb != 0 && !(b & OFFS_REG_MASK)) {
115 				/* Immediate operand. */
116 				if (immb <= 127 && immb >= -128)
117 					inst_size += sizeof(sljit_s8);
118 				else
119 					inst_size += sizeof(sljit_s32);
120 			} else if (reg_lmap[b & REG_MASK] == 5) {
121 				/* Swap registers if possible. */
122 				if ((b & OFFS_REG_MASK) && (immb & 0x3) == 0 && reg_lmap[OFFS_REG(b)] != 5)
123 					b = SLJIT_MEM | OFFS_REG(b) | TO_OFFS_REG(b & REG_MASK);
124 				else
125 					inst_size += sizeof(sljit_s8);
126 			}
127 
128 			if (reg_map[b & REG_MASK] >= 8)
129 				rex |= REX_B;
130 
131 			if (reg_lmap[b & REG_MASK] == 4 && !(b & OFFS_REG_MASK))
132 				b |= TO_OFFS_REG(SLJIT_SP);
133 
134 			if (b & OFFS_REG_MASK) {
135 				inst_size += 1; /* SIB byte. */
136 				if (reg_map[OFFS_REG(b)] >= 8)
137 					rex |= REX_X;
138 			}
139 		}
140 	} else if (!(flags & EX86_SSE2_OP2)) {
141 		if (reg_map[b] >= 8)
142 			rex |= REX_B;
143 	} else if (freg_map[b] >= 8)
144 		rex |= REX_B;
145 
146 	if ((flags & EX86_VEX_EXT) && (rex & 0x3)) {
147 		SLJIT_ASSERT(size == 2);
148 		size++;
149 		inst_size++;
150 	}
151 
152 	if (a == SLJIT_IMM) {
153 		if (flags & EX86_BIN_INS) {
154 			if (imma <= 127 && imma >= -128) {
155 				inst_size += 1;
156 				flags |= EX86_BYTE_ARG;
157 			} else
158 				inst_size += 4;
159 		} else if (flags & EX86_SHIFT_INS) {
160 			SLJIT_ASSERT(imma <= (compiler->mode32 ? 0x1f : 0x3f));
161 			if (imma != 1) {
162 				inst_size++;
163 				flags |= EX86_BYTE_ARG;
164 			}
165 		} else if (flags & EX86_BYTE_ARG)
166 			inst_size++;
167 		else if (flags & EX86_HALF_ARG)
168 			inst_size += sizeof(short);
169 		else
170 			inst_size += sizeof(sljit_s32);
171 	} else {
172 		SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
173 		/* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */
174 		if (!(flags & EX86_SSE2_OP1)) {
175 			if (reg_map[a] >= 8)
176 				rex |= REX_R;
177 		}
178 		else if (freg_map[a] >= 8)
179 			rex |= REX_R;
180 	}
181 
182 	if (rex)
183 		inst_size++;
184 
185 	inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size);
186 	PTR_FAIL_IF(!inst);
187 
188 	/* Encoding prefixes. */
189 	INC_SIZE(inst_size);
190 	if (flags & EX86_PREF_F2)
191 		*inst++ = 0xf2;
192 	else if (flags & EX86_PREF_F3)
193 		*inst++ = 0xf3;
194 	else if (flags & EX86_PREF_66)
195 		*inst++ = 0x66;
196 
197 	/* Rex is always the last prefix. */
198 	if (rex)
199 		*inst++ = rex;
200 
201 	buf_ptr = inst + size;
202 
203 	/* Encode mod/rm byte. */
204 	if (!(flags & EX86_SHIFT_INS)) {
205 		if ((flags & EX86_BIN_INS) && a == SLJIT_IMM)
206 			*inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
207 
208 		if (a == SLJIT_IMM)
209 			*buf_ptr = 0;
210 		else if (!(flags & EX86_SSE2_OP1))
211 			*buf_ptr = U8(reg_lmap[a] << 3);
212 		else
213 			*buf_ptr = U8(freg_lmap[a] << 3);
214 	} else {
215 		if (a == SLJIT_IMM) {
216 			if (imma == 1)
217 				*inst = GROUP_SHIFT_1;
218 			else
219 				*inst = GROUP_SHIFT_N;
220 		} else
221 			*inst = GROUP_SHIFT_CL;
222 		*buf_ptr = 0;
223 	}
224 
225 	if (!(b & SLJIT_MEM)) {
226 		*buf_ptr = U8(*buf_ptr | MOD_REG | (!(flags & EX86_SSE2_OP2) ? reg_lmap[b] : freg_lmap[b]));
227 		buf_ptr++;
228 	} else if (b & REG_MASK) {
229 		reg_lmap_b = reg_lmap[b & REG_MASK];
230 
231 		if (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP)) {
232 			if (immb != 0 || reg_lmap_b == 5) {
233 				if (immb <= 127 && immb >= -128)
234 					*buf_ptr |= 0x40;
235 				else
236 					*buf_ptr |= 0x80;
237 			}
238 
239 			if (!(b & OFFS_REG_MASK))
240 				*buf_ptr++ |= reg_lmap_b;
241 			else {
242 				buf_ptr[0] |= 0x04;
243 				buf_ptr[1] = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3));
244 				buf_ptr += 2;
245 			}
246 
247 			if (immb != 0 || reg_lmap_b == 5) {
248 				if (immb <= 127 && immb >= -128)
249 					*buf_ptr++ = U8(immb); /* 8 bit displacement. */
250 				else {
251 					sljit_unaligned_store_s32(buf_ptr, (sljit_s32)immb); /* 32 bit displacement. */
252 					buf_ptr += sizeof(sljit_s32);
253 				}
254 			}
255 		} else {
256 			if (reg_lmap_b == 5)
257 				*buf_ptr |= 0x40;
258 
259 			buf_ptr[0] |= 0x04;
260 			buf_ptr[1] = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6));
261 			buf_ptr += 2;
262 
263 			if (reg_lmap_b == 5)
264 				*buf_ptr++ = 0;
265 		}
266 	} else {
267 		buf_ptr[0] |= 0x04;
268 		buf_ptr[1] = 0x25;
269 		buf_ptr += 2;
270 		sljit_unaligned_store_s32(buf_ptr, (sljit_s32)immb); /* 32 bit displacement. */
271 		buf_ptr += sizeof(sljit_s32);
272 	}
273 
274 	if (a == SLJIT_IMM) {
275 		if (flags & EX86_BYTE_ARG)
276 			*buf_ptr = U8(imma);
277 		else if (flags & EX86_HALF_ARG)
278 			sljit_unaligned_store_s16(buf_ptr, (sljit_s16)imma);
279 		else if (!(flags & EX86_SHIFT_INS))
280 			sljit_unaligned_store_s32(buf_ptr, (sljit_s32)imma);
281 	}
282 
283 	return inst;
284 }
285 
emit_vex_instruction(struct sljit_compiler * compiler,sljit_uw op,sljit_s32 a,sljit_s32 v,sljit_s32 b,sljit_sw immb)286 static sljit_s32 emit_vex_instruction(struct sljit_compiler *compiler, sljit_uw op,
287 	/* The first and second register operand. */
288 	sljit_s32 a, sljit_s32 v,
289 	/* The general operand (not immediate). */
290 	sljit_s32 b, sljit_sw immb)
291 {
292 	sljit_u8 *inst;
293 	sljit_u8 vex = 0;
294 	sljit_u8 vex_m = 0;
295 	sljit_uw size;
296 
297 	SLJIT_ASSERT(((op & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66))
298 			& ((op & (EX86_PREF_F2 | EX86_PREF_F3 | EX86_PREF_66)) - 1)) == 0);
299 
300 	op |= EX86_REX;
301 
302 	if (op & VEX_OP_0F38)
303 		vex_m = 0x2;
304 	else if (op & VEX_OP_0F3A)
305 		vex_m = 0x3;
306 
307 	if ((op & VEX_W) || ((op & VEX_AUTO_W) && !compiler->mode32)) {
308 		if (vex_m == 0)
309 			vex_m = 0x1;
310 
311 		vex |= 0x80;
312 	}
313 
314 	if (op & EX86_PREF_66)
315 		vex |= 0x1;
316 	else if (op & EX86_PREF_F2)
317 		vex |= 0x3;
318 	else if (op & EX86_PREF_F3)
319 		vex |= 0x2;
320 
321 	op &= ~(EX86_PREF_66 | EX86_PREF_F2 | EX86_PREF_F3);
322 
323 	if (op & VEX_256)
324 		vex |= 0x4;
325 
326 	vex = U8(vex | ((((op & VEX_SSE2_OPV) ? freg_map[v] : reg_map[v]) ^ 0xf) << 3));
327 
328 	size = op & ~(sljit_uw)0xff;
329 	size |= (vex_m == 0) ? (EX86_VEX_EXT | 2) : 3;
330 
331 	inst = emit_x86_instruction(compiler, size, a, 0, b, immb);
332 	FAIL_IF(!inst);
333 
334 	SLJIT_ASSERT((inst[-1] & 0xf0) == REX);
335 
336 	/* If X or B is present in REX prefix. */
337 	if (vex_m == 0 && inst[-1] & 0x3)
338 		vex_m = 0x1;
339 
340 	if (vex_m == 0) {
341 		vex |= U8(((inst[-1] >> 2) ^ 0x1) << 7);
342 
343 		inst[-1] = 0xc5;
344 		inst[0] = vex;
345 		inst[1] = U8(op);
346 		return SLJIT_SUCCESS;
347 	}
348 
349 	vex_m |= U8((inst[-1] ^ 0x7) << 5);
350 	inst[-1] = 0xc4;
351 	inst[0] = vex_m;
352 	inst[1] = vex;
353 	inst[2] = U8(op);
354 	return SLJIT_SUCCESS;
355 }
356 
357 /* --------------------------------------------------------------------- */
358 /*  Enter / return                                                       */
359 /* --------------------------------------------------------------------- */
360 
generate_far_jump_code(struct sljit_jump * jump,sljit_u8 * code_ptr)361 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr)
362 {
363 	sljit_uw type = jump->flags >> TYPE_SHIFT;
364 
365 	int short_addr = !(jump->flags & SLJIT_REWRITABLE_JUMP) && !(jump->flags & JUMP_LABEL) && (jump->u.target <= 0xffffffff);
366 
367 	/* The relative jump below specialized for this case. */
368 	SLJIT_ASSERT(reg_map[TMP_REG2] >= 8);
369 
370 	if (type < SLJIT_JUMP) {
371 		/* Invert type. */
372 		*code_ptr++ = U8(get_jump_code(type ^ 0x1) - 0x10);
373 		*code_ptr++ = short_addr ? (6 + 3) : (10 + 3);
374 	}
375 
376 	*code_ptr++ = short_addr ? REX_B : (REX_W | REX_B);
377 	*code_ptr++ = MOV_r_i32 | reg_lmap[TMP_REG2];
378 	jump->addr = (sljit_uw)code_ptr;
379 
380 	if (jump->flags & JUMP_LABEL)
381 		jump->flags |= PATCH_MD;
382 	else if (short_addr)
383 		sljit_unaligned_store_s32(code_ptr, (sljit_s32)jump->u.target);
384 	else
385 		sljit_unaligned_store_sw(code_ptr, (sljit_sw)jump->u.target);
386 
387 	code_ptr += short_addr ? sizeof(sljit_s32) : sizeof(sljit_sw);
388 
389 	*code_ptr++ = REX_B;
390 	*code_ptr++ = GROUP_FF;
391 	*code_ptr++ = U8(MOD_REG | (type >= SLJIT_FAST_CALL ? CALL_rm : JMP_rm) | reg_lmap[TMP_REG2]);
392 
393 	return code_ptr;
394 }
395 
generate_put_label_code(struct sljit_put_label * put_label,sljit_u8 * code_ptr,sljit_uw max_label)396 static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, sljit_u8 *code_ptr, sljit_uw max_label)
397 {
398 	if (max_label > HALFWORD_MAX) {
399 		put_label->addr -= put_label->flags;
400 		put_label->flags = PATCH_MD;
401 		return code_ptr;
402 	}
403 
404 	if (put_label->flags == 0) {
405 		/* Destination is register. */
406 		code_ptr = (sljit_u8*)put_label->addr - 2 - sizeof(sljit_uw);
407 
408 		SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);
409 		SLJIT_ASSERT((code_ptr[1] & 0xf8) == MOV_r_i32);
410 
411 		if ((code_ptr[0] & 0x07) != 0) {
412 			code_ptr[0] = U8(code_ptr[0] & ~0x08);
413 			code_ptr += 2 + sizeof(sljit_s32);
414 		}
415 		else {
416 			code_ptr[0] = code_ptr[1];
417 			code_ptr += 1 + sizeof(sljit_s32);
418 		}
419 
420 		put_label->addr = (sljit_uw)code_ptr;
421 		return code_ptr;
422 	}
423 
424 	code_ptr -= put_label->flags + (2 + sizeof(sljit_uw));
425 	SLJIT_MEMMOVE(code_ptr, code_ptr + (2 + sizeof(sljit_uw)), put_label->flags);
426 
427 	SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);
428 
429 	if ((code_ptr[1] & 0xf8) == MOV_r_i32) {
430 		code_ptr += 2 + sizeof(sljit_uw);
431 		SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);
432 	}
433 
434 	SLJIT_ASSERT(code_ptr[1] == MOV_rm_r);
435 
436 	code_ptr[0] = U8(code_ptr[0] & ~0x4);
437 	code_ptr[1] = MOV_rm_i32;
438 	code_ptr[2] = U8(code_ptr[2] & ~(0x7 << 3));
439 
440 	code_ptr = (sljit_u8*)(put_label->addr - (2 + sizeof(sljit_uw)) + sizeof(sljit_s32));
441 	put_label->addr = (sljit_uw)code_ptr;
442 	put_label->flags = 0;
443 	return code_ptr;
444 }
445 
446 #ifdef _WIN64
447 typedef struct {
448 	sljit_sw regs[2];
449 } sljit_sse2_reg;
450 #endif /* _WIN64 */
451 
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)452 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
453 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
454 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
455 {
456 	sljit_uw size;
457 	sljit_s32 word_arg_count = 0;
458 	sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
459 	sljit_s32 saved_regs_size, tmp, i;
460 #ifdef _WIN64
461 	sljit_s32 saved_float_regs_size;
462 	sljit_s32 saved_float_regs_offset = 0;
463 	sljit_s32 float_arg_count = 0;
464 #endif /* _WIN64 */
465 	sljit_u8 *inst;
466 
467 	CHECK_ERROR();
468 	CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
469 	set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
470 
471 	if (options & SLJIT_ENTER_REG_ARG)
472 		arg_types = 0;
473 
474 	/* Emit ENDBR64 at function entry if needed.  */
475 	FAIL_IF(emit_endbranch(compiler));
476 
477 	compiler->mode32 = 0;
478 
479 	/* Including the return address saved by the call instruction. */
480 	saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1);
481 
482 	tmp = SLJIT_S0 - saveds;
483 	for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) {
484 		size = reg_map[i] >= 8 ? 2 : 1;
485 		inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
486 		FAIL_IF(!inst);
487 		INC_SIZE(size);
488 		if (reg_map[i] >= 8)
489 			*inst++ = REX_B;
490 		PUSH_REG(reg_lmap[i]);
491 	}
492 
493 	for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
494 		size = reg_map[i] >= 8 ? 2 : 1;
495 		inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
496 		FAIL_IF(!inst);
497 		INC_SIZE(size);
498 		if (reg_map[i] >= 8)
499 			*inst++ = REX_B;
500 		PUSH_REG(reg_lmap[i]);
501 	}
502 
503 #ifdef _WIN64
504 	local_size += SLJIT_LOCALS_OFFSET;
505 	saved_float_regs_size = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sse2_reg);
506 
507 	if (saved_float_regs_size > 0) {
508 		saved_float_regs_offset = ((local_size + 0xf) & ~0xf);
509 		local_size = saved_float_regs_offset + saved_float_regs_size;
510 	}
511 #else /* !_WIN64 */
512 	SLJIT_ASSERT(SLJIT_LOCALS_OFFSET == 0);
513 #endif /* _WIN64 */
514 
515 	arg_types >>= SLJIT_ARG_SHIFT;
516 
517 	while (arg_types > 0) {
518 		if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
519 			tmp = 0;
520 #ifndef _WIN64
521 			switch (word_arg_count) {
522 			case 0:
523 				tmp = SLJIT_R2;
524 				break;
525 			case 1:
526 				tmp = SLJIT_R1;
527 				break;
528 			case 2:
529 				tmp = TMP_REG1;
530 				break;
531 			default:
532 				tmp = SLJIT_R3;
533 				break;
534 			}
535 #else /* !_WIN64 */
536 			switch (word_arg_count + float_arg_count) {
537 			case 0:
538 				tmp = SLJIT_R3;
539 				break;
540 			case 1:
541 				tmp = SLJIT_R1;
542 				break;
543 			case 2:
544 				tmp = SLJIT_R2;
545 				break;
546 			default:
547 				tmp = TMP_REG1;
548 				break;
549 			}
550 #endif /* _WIN64 */
551 			if (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) {
552 				if (tmp != SLJIT_R0 + word_arg_count)
553 					EMIT_MOV(compiler, SLJIT_R0 + word_arg_count, 0, tmp, 0);
554 			} else {
555 				EMIT_MOV(compiler, SLJIT_S0 - saved_arg_count, 0, tmp, 0);
556 				saved_arg_count++;
557 			}
558 			word_arg_count++;
559 		} else {
560 #ifdef _WIN64
561 			SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start);
562 			float_arg_count++;
563 			if (float_arg_count != float_arg_count + word_arg_count)
564 				FAIL_IF(emit_sse2_load(compiler, (arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32,
565 					float_arg_count, float_arg_count + word_arg_count, 0));
566 #endif /* _WIN64 */
567 		}
568 		arg_types >>= SLJIT_ARG_SHIFT;
569 	}
570 
571 	local_size = ((local_size + saved_regs_size + 0xf) & ~0xf) - saved_regs_size;
572 	compiler->local_size = local_size;
573 
574 #ifdef _WIN64
575 	if (local_size > 0) {
576 		if (local_size <= 4 * 4096) {
577 			if (local_size > 4096)
578 				EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096);
579 			if (local_size > 2 * 4096)
580 				EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2);
581 			if (local_size > 3 * 4096)
582 				EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);
583 		}
584 		else {
585 			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, local_size >> 12);
586 
587 			EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_MEM1(SLJIT_SP), -4096);
588 			BINARY_IMM32(SUB, 4096, SLJIT_SP, 0);
589 			BINARY_IMM32(SUB, 1, TMP_REG1, 0);
590 
591 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
592 			FAIL_IF(!inst);
593 
594 			INC_SIZE(2);
595 			inst[0] = JNE_i8;
596 			inst[1] = (sljit_u8)-21;
597 			local_size &= 0xfff;
598 		}
599 
600 		if (local_size > 0)
601 			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size);
602 	}
603 #endif /* _WIN64 */
604 
605 	if (local_size > 0)
606 		BINARY_IMM32(SUB, local_size, SLJIT_SP, 0);
607 
608 #ifdef _WIN64
609 	if (saved_float_regs_size > 0) {
610 		compiler->mode32 = 1;
611 
612 		tmp = SLJIT_FS0 - fsaveds;
613 		for (i = SLJIT_FS0; i > tmp; i--) {
614 			FAIL_IF(emit_groupf(compiler, MOVAPS_xm_x | EX86_SSE2, i, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset));
615 			saved_float_regs_offset += 16;
616 		}
617 
618 		for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
619 			FAIL_IF(emit_groupf(compiler, MOVAPS_xm_x | EX86_SSE2, i, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset));
620 			saved_float_regs_offset += 16;
621 		}
622 	}
623 #endif /* _WIN64 */
624 
625 	return SLJIT_SUCCESS;
626 }
627 
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)628 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
629 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
630 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
631 {
632 	sljit_s32 saved_regs_size;
633 #ifdef _WIN64
634 	sljit_s32 saved_float_regs_size;
635 #endif /* _WIN64 */
636 
637 	CHECK_ERROR();
638 	CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
639 	set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
640 
641 #ifdef _WIN64
642 	local_size += SLJIT_LOCALS_OFFSET;
643 	saved_float_regs_size = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sse2_reg);
644 
645 	if (saved_float_regs_size > 0)
646 		local_size = ((local_size + 0xf) & ~0xf) + saved_float_regs_size;
647 #else /* !_WIN64 */
648 	SLJIT_ASSERT(SLJIT_LOCALS_OFFSET == 0);
649 #endif /* _WIN64 */
650 
651 	/* Including the return address saved by the call instruction. */
652 	saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1);
653 	compiler->local_size = ((local_size + saved_regs_size + 0xf) & ~0xf) - saved_regs_size;
654 	return SLJIT_SUCCESS;
655 }
656 
emit_stack_frame_release(struct sljit_compiler * compiler,sljit_s32 is_return_to)657 static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to)
658 {
659 	sljit_uw size;
660 	sljit_s32 local_size, i, tmp;
661 	sljit_u8 *inst;
662 #ifdef _WIN64
663 	sljit_s32 saved_float_regs_offset;
664 	sljit_s32 fscratches = compiler->fscratches;
665 	sljit_s32 fsaveds = compiler->fsaveds;
666 #endif /* _WIN64 */
667 
668 #ifdef _WIN64
669 	saved_float_regs_offset = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, sse2_reg);
670 
671 	if (saved_float_regs_offset > 0) {
672 		compiler->mode32 = 1;
673 		saved_float_regs_offset = (compiler->local_size - saved_float_regs_offset) & ~0xf;
674 
675 		tmp = SLJIT_FS0 - fsaveds;
676 		for (i = SLJIT_FS0; i > tmp; i--) {
677 			FAIL_IF(emit_groupf(compiler, MOVAPS_x_xm | EX86_SSE2, i, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset));
678 			saved_float_regs_offset += 16;
679 		}
680 
681 		for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
682 			FAIL_IF(emit_groupf(compiler, MOVAPS_x_xm | EX86_SSE2, i, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset));
683 			saved_float_regs_offset += 16;
684 		}
685 
686 		compiler->mode32 = 0;
687 	}
688 #endif /* _WIN64 */
689 
690 	local_size = compiler->local_size;
691 
692 	if (is_return_to && compiler->scratches < SLJIT_FIRST_SAVED_REG && (compiler->saveds == SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
693 		local_size += SSIZE_OF(sw);
694 		is_return_to = 0;
695 	}
696 
697 	if (local_size > 0)
698 		BINARY_IMM32(ADD, local_size, SLJIT_SP, 0);
699 
700 	tmp = compiler->scratches;
701 	for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) {
702 		size = reg_map[i] >= 8 ? 2 : 1;
703 		inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
704 		FAIL_IF(!inst);
705 		INC_SIZE(size);
706 		if (reg_map[i] >= 8)
707 			*inst++ = REX_B;
708 		POP_REG(reg_lmap[i]);
709 	}
710 
711 	tmp = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
712 	for (i = SLJIT_S0 + 1 - compiler->saveds; i <= tmp; i++) {
713 		size = reg_map[i] >= 8 ? 2 : 1;
714 		inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
715 		FAIL_IF(!inst);
716 		INC_SIZE(size);
717 		if (reg_map[i] >= 8)
718 			*inst++ = REX_B;
719 		POP_REG(reg_lmap[i]);
720 	}
721 
722 	if (is_return_to)
723 		BINARY_IMM32(ADD, sizeof(sljit_sw), SLJIT_SP, 0);
724 
725 	return SLJIT_SUCCESS;
726 }
727 
sljit_emit_return_void(struct sljit_compiler * compiler)728 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
729 {
730 	CHECK_ERROR();
731 	CHECK(check_sljit_emit_return_void(compiler));
732 
733 	compiler->mode32 = 0;
734 
735 	FAIL_IF(emit_stack_frame_release(compiler, 0));
736 	return emit_byte(compiler, RET_near);
737 }
738 
sljit_emit_return_to(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)739 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
740 	sljit_s32 src, sljit_sw srcw)
741 {
742 	CHECK_ERROR();
743 	CHECK(check_sljit_emit_return_to(compiler, src, srcw));
744 
745 	compiler->mode32 = 0;
746 
747 	if ((src & SLJIT_MEM) || (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options)))) {
748 		ADJUST_LOCAL_OFFSET(src, srcw);
749 
750 		EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);
751 		src = TMP_REG2;
752 		srcw = 0;
753 	}
754 
755 	FAIL_IF(emit_stack_frame_release(compiler, 1));
756 
757 	SLJIT_SKIP_CHECKS(compiler);
758 	return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
759 }
760 
761 /* --------------------------------------------------------------------- */
762 /*  Call / return instructions                                           */
763 /* --------------------------------------------------------------------- */
764 
765 #ifndef _WIN64
766 
call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types,sljit_s32 * src_ptr)767 static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr)
768 {
769 	sljit_s32 src = src_ptr ? (*src_ptr) : 0;
770 	sljit_s32 word_arg_count = 0;
771 
772 	SLJIT_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R3] == 1 && reg_map[TMP_REG1] == 2);
773 	SLJIT_ASSERT(!(src & SLJIT_MEM));
774 
775 	/* Remove return value. */
776 	arg_types >>= SLJIT_ARG_SHIFT;
777 
778 	while (arg_types) {
779 		if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64)
780 			word_arg_count++;
781 		arg_types >>= SLJIT_ARG_SHIFT;
782 	}
783 
784 	if (word_arg_count == 0)
785 		return SLJIT_SUCCESS;
786 
787 	if (word_arg_count >= 3) {
788 		if (src == SLJIT_R2)
789 			*src_ptr = TMP_REG1;
790 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R2, 0);
791 	}
792 
793 	return emit_mov(compiler, SLJIT_R2, 0, SLJIT_R0, 0);
794 }
795 
796 #else
797 
call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types,sljit_s32 * src_ptr)798 static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr)
799 {
800 	sljit_s32 src = src_ptr ? (*src_ptr) : 0;
801 	sljit_s32 arg_count = 0;
802 	sljit_s32 word_arg_count = 0;
803 	sljit_s32 float_arg_count = 0;
804 	sljit_s32 types = 0;
805 	sljit_s32 data_trandfer = 0;
806 	static sljit_u8 word_arg_regs[5] = { 0, SLJIT_R3, SLJIT_R1, SLJIT_R2, TMP_REG1 };
807 
808 	SLJIT_ASSERT(reg_map[SLJIT_R3] == 1 && reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R2] == 8 && reg_map[TMP_REG1] == 9);
809 	SLJIT_ASSERT(!(src & SLJIT_MEM));
810 
811 	arg_types >>= SLJIT_ARG_SHIFT;
812 
813 	while (arg_types) {
814 		types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK);
815 
816 		switch (arg_types & SLJIT_ARG_MASK) {
817 		case SLJIT_ARG_TYPE_F64:
818 		case SLJIT_ARG_TYPE_F32:
819 			arg_count++;
820 			float_arg_count++;
821 
822 			if (arg_count != float_arg_count)
823 				data_trandfer = 1;
824 			break;
825 		default:
826 			arg_count++;
827 			word_arg_count++;
828 
829 			if (arg_count != word_arg_count || arg_count != word_arg_regs[arg_count]) {
830 				data_trandfer = 1;
831 
832 				if (src == word_arg_regs[arg_count]) {
833 					EMIT_MOV(compiler, TMP_REG2, 0, src, 0);
834 					*src_ptr = TMP_REG2;
835 				}
836 			}
837 			break;
838 		}
839 
840 		arg_types >>= SLJIT_ARG_SHIFT;
841 	}
842 
843 	if (!data_trandfer)
844 		return SLJIT_SUCCESS;
845 
846 	while (types) {
847 		switch (types & SLJIT_ARG_MASK) {
848 		case SLJIT_ARG_TYPE_F64:
849 			if (arg_count != float_arg_count)
850 				FAIL_IF(emit_sse2_load(compiler, 0, arg_count, float_arg_count, 0));
851 			arg_count--;
852 			float_arg_count--;
853 			break;
854 		case SLJIT_ARG_TYPE_F32:
855 			if (arg_count != float_arg_count)
856 				FAIL_IF(emit_sse2_load(compiler, 1, arg_count, float_arg_count, 0));
857 			arg_count--;
858 			float_arg_count--;
859 			break;
860 		default:
861 			if (arg_count != word_arg_count || arg_count != word_arg_regs[arg_count])
862 				EMIT_MOV(compiler, word_arg_regs[arg_count], 0, word_arg_count, 0);
863 			arg_count--;
864 			word_arg_count--;
865 			break;
866 		}
867 
868 		types >>= SLJIT_ARG_SHIFT;
869 	}
870 
871 	return SLJIT_SUCCESS;
872 }
873 
874 #endif
875 
sljit_emit_call(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types)876 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
877 	sljit_s32 arg_types)
878 {
879 	CHECK_ERROR_PTR();
880 	CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
881 
882 	compiler->mode32 = 0;
883 
884 	if ((type & 0xff) != SLJIT_CALL_REG_ARG)
885 		PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL));
886 
887 	if (type & SLJIT_CALL_RETURN) {
888 		PTR_FAIL_IF(emit_stack_frame_release(compiler, 0));
889 		type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
890 	}
891 
892 	SLJIT_SKIP_CHECKS(compiler);
893 	return sljit_emit_jump(compiler, type);
894 }
895 
sljit_emit_icall(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)896 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
897 	sljit_s32 arg_types,
898 	sljit_s32 src, sljit_sw srcw)
899 {
900 	CHECK_ERROR();
901 	CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
902 
903 	compiler->mode32 = 0;
904 
905 	if (src & SLJIT_MEM) {
906 		ADJUST_LOCAL_OFFSET(src, srcw);
907 		EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);
908 		src = TMP_REG2;
909 	}
910 
911 	if (type & SLJIT_CALL_RETURN) {
912 		if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
913 			EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);
914 			src = TMP_REG2;
915 		}
916 
917 		FAIL_IF(emit_stack_frame_release(compiler, 0));
918 	}
919 
920 	if ((type & 0xff) != SLJIT_CALL_REG_ARG)
921 		FAIL_IF(call_with_args(compiler, arg_types, &src));
922 
923 	if (type & SLJIT_CALL_RETURN)
924 		type = SLJIT_JUMP;
925 
926 	SLJIT_SKIP_CHECKS(compiler);
927 	return sljit_emit_ijump(compiler, type, src, srcw);
928 }
929 
emit_fast_enter(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)930 static sljit_s32 emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
931 {
932 	sljit_u8 *inst;
933 
934 	if (FAST_IS_REG(dst)) {
935 		if (reg_map[dst] < 8)
936 			return emit_byte(compiler, U8(POP_r + reg_lmap[dst]));
937 
938 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
939 		FAIL_IF(!inst);
940 		INC_SIZE(2);
941 		*inst++ = REX_B;
942 		POP_REG(reg_lmap[dst]);
943 		return SLJIT_SUCCESS;
944 	}
945 
946 	/* REX_W is not necessary (src is not immediate). */
947 	compiler->mode32 = 1;
948 	inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
949 	FAIL_IF(!inst);
950 	*inst = POP_rm;
951 	return SLJIT_SUCCESS;
952 }
953 
emit_fast_return(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)954 static sljit_s32 emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
955 {
956 	sljit_u8 *inst;
957 
958 	if (FAST_IS_REG(src)) {
959 		if (reg_map[src] < 8) {
960 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1);
961 			FAIL_IF(!inst);
962 
963 			INC_SIZE(1 + 1);
964 			PUSH_REG(reg_lmap[src]);
965 		}
966 		else {
967 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 1);
968 			FAIL_IF(!inst);
969 
970 			INC_SIZE(2 + 1);
971 			*inst++ = REX_B;
972 			PUSH_REG(reg_lmap[src]);
973 		}
974 	}
975 	else {
976 		/* REX_W is not necessary (src is not immediate). */
977 		compiler->mode32 = 1;
978 		inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
979 		FAIL_IF(!inst);
980 		inst[0] = GROUP_FF;
981 		inst[1] |= PUSH_rm;
982 
983 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
984 		FAIL_IF(!inst);
985 		INC_SIZE(1);
986 	}
987 
988 	RET();
989 	return SLJIT_SUCCESS;
990 }
991 
sljit_emit_get_return_address(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)992 static sljit_s32 sljit_emit_get_return_address(struct sljit_compiler *compiler,
993 	sljit_s32 dst, sljit_sw dstw)
994 {
995 	sljit_s32 saved_regs_size;
996 
997 	compiler->mode32 = 0;
998 	saved_regs_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds - SLJIT_KEPT_SAVEDS_COUNT(compiler->options), 0);
999 	return emit_mov(compiler, dst, dstw, SLJIT_MEM1(SLJIT_SP), compiler->local_size + saved_regs_size);
1000 }
1001 
1002 /* --------------------------------------------------------------------- */
1003 /*  Other operations                                                     */
1004 /* --------------------------------------------------------------------- */
1005 
sljit_emit_mem(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)1006 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
1007 	sljit_s32 reg,
1008 	sljit_s32 mem, sljit_sw memw)
1009 {
1010 	sljit_u8* inst;
1011 	sljit_s32 i, next, reg_idx;
1012 	sljit_u8 regs[2];
1013 
1014 	CHECK_ERROR();
1015 	CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
1016 
1017 	if (!(reg & REG_PAIR_MASK))
1018 		return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
1019 
1020 	ADJUST_LOCAL_OFFSET(mem, memw);
1021 
1022 	compiler->mode32 = 0;
1023 
1024 	if ((mem & REG_MASK) == 0) {
1025 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, memw);
1026 
1027 		mem = SLJIT_MEM1(TMP_REG1);
1028 		memw = 0;
1029 	} else if (!(mem & OFFS_REG_MASK) && ((memw < HALFWORD_MIN) || (memw > HALFWORD_MAX - SSIZE_OF(sw)))) {
1030 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, memw);
1031 
1032 		mem = SLJIT_MEM2(mem & REG_MASK, TMP_REG1);
1033 		memw = 0;
1034 	}
1035 
1036 	regs[0] = U8(REG_PAIR_FIRST(reg));
1037 	regs[1] = U8(REG_PAIR_SECOND(reg));
1038 
1039 	next = SSIZE_OF(sw);
1040 
1041 	if (!(type & SLJIT_MEM_STORE) && (regs[0] == (mem & REG_MASK) || regs[0] == OFFS_REG(mem))) {
1042 		if (regs[1] == (mem & REG_MASK) || regs[1] == OFFS_REG(mem)) {
1043 			/* Base and offset cannot be TMP_REG1. */
1044 			EMIT_MOV(compiler, TMP_REG1, 0, OFFS_REG(mem), 0);
1045 
1046 			if (regs[1] == OFFS_REG(mem))
1047 				next = -SSIZE_OF(sw);
1048 
1049 			mem = (mem & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG1);
1050 		} else {
1051 			next = -SSIZE_OF(sw);
1052 
1053 			if (!(mem & OFFS_REG_MASK))
1054 				memw += SSIZE_OF(sw);
1055 		}
1056 	}
1057 
1058 	for (i = 0; i < 2; i++) {
1059 		reg_idx = next > 0 ? i : (i ^ 0x1);
1060 		reg = regs[reg_idx];
1061 
1062 		if ((mem & OFFS_REG_MASK) && (reg_idx == 1)) {
1063 			inst = (sljit_u8*)ensure_buf(compiler, (sljit_uw)(1 + 5));
1064 			FAIL_IF(!inst);
1065 
1066 			INC_SIZE(5);
1067 
1068 			inst[0] = U8(REX_W | ((reg_map[reg] >= 8) ? REX_R : 0) | ((reg_map[mem & REG_MASK] >= 8) ? REX_B : 0) | ((reg_map[OFFS_REG(mem)] >= 8) ? REX_X : 0));
1069 			inst[1] = (type & SLJIT_MEM_STORE) ? MOV_rm_r : MOV_r_rm;
1070 			inst[2] = 0x44 | U8(reg_lmap[reg] << 3);
1071 			inst[3] = U8(memw << 6) | U8(reg_lmap[OFFS_REG(mem)] << 3) | reg_lmap[mem & REG_MASK];
1072 			inst[4] = sizeof(sljit_sw);
1073 		} else if (type & SLJIT_MEM_STORE) {
1074 			EMIT_MOV(compiler, mem, memw, reg, 0);
1075 		} else {
1076 			EMIT_MOV(compiler, reg, 0, mem, memw);
1077 		}
1078 
1079 		if (!(mem & OFFS_REG_MASK))
1080 			memw += next;
1081 	}
1082 
1083 	return SLJIT_SUCCESS;
1084 }
1085 
emit_mov_int(struct sljit_compiler * compiler,sljit_s32 sign,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1086 static sljit_s32 emit_mov_int(struct sljit_compiler *compiler, sljit_s32 sign,
1087 	sljit_s32 dst, sljit_sw dstw,
1088 	sljit_s32 src, sljit_sw srcw)
1089 {
1090 	sljit_u8* inst;
1091 	sljit_s32 dst_r;
1092 
1093 	compiler->mode32 = 0;
1094 
1095 	if (src == SLJIT_IMM) {
1096 		if (FAST_IS_REG(dst)) {
1097 			if (!sign || ((sljit_u32)srcw <= 0x7fffffff))
1098 				return emit_do_imm32(compiler, reg_map[dst] <= 7 ? 0 : REX_B, U8(MOV_r_i32 | reg_lmap[dst]), srcw);
1099 
1100 			inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw);
1101 			FAIL_IF(!inst);
1102 			*inst = MOV_rm_i32;
1103 			return SLJIT_SUCCESS;
1104 		}
1105 		compiler->mode32 = 1;
1106 		inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw);
1107 		FAIL_IF(!inst);
1108 		*inst = MOV_rm_i32;
1109 		compiler->mode32 = 0;
1110 		return SLJIT_SUCCESS;
1111 	}
1112 
1113 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1114 
1115 	if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
1116 		dst_r = src;
1117 	else {
1118 		if (sign) {
1119 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw);
1120 			FAIL_IF(!inst);
1121 			*inst = MOVSXD_r_rm;
1122 		} else {
1123 			compiler->mode32 = 1;
1124 			EMIT_MOV(compiler, dst_r, 0, src, srcw);
1125 			compiler->mode32 = 0;
1126 		}
1127 	}
1128 
1129 	if (dst & SLJIT_MEM) {
1130 		compiler->mode32 = 1;
1131 		inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1132 		FAIL_IF(!inst);
1133 		*inst = MOV_rm_r;
1134 		compiler->mode32 = 0;
1135 	}
1136 
1137 	return SLJIT_SUCCESS;
1138 }
1139 
sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1140 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
1141 	sljit_s32 dst, sljit_sw dstw,
1142 	sljit_s32 src, sljit_sw srcw)
1143 {
1144 	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
1145 	sljit_u8 *inst, *jump_inst1, *jump_inst2;
1146 	sljit_uw size1, size2;
1147 
1148 	compiler->mode32 = 0;
1149 
1150 	if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32) {
1151 		if (src != SLJIT_IMM) {
1152 			compiler->mode32 = 1;
1153 			EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1154 			compiler->mode32 = 0;
1155 		} else
1156 			FAIL_IF(emit_do_imm32(compiler, reg_map[TMP_REG1] <= 7 ? 0 : REX_B, U8(MOV_r_i32 | reg_lmap[TMP_REG1]), srcw));
1157 
1158 		FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, TMP_REG1, 0));
1159 
1160 		compiler->mode32 = 1;
1161 
1162 		if (dst_r == TMP_FREG)
1163 			return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
1164 		return SLJIT_SUCCESS;
1165 	}
1166 
1167 	if (!FAST_IS_REG(src)) {
1168 		EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1169 		src = TMP_REG1;
1170 	}
1171 
1172 	BINARY_IMM32(CMP, 0, src, 0);
1173 
1174 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1175 	FAIL_IF(!inst);
1176 	INC_SIZE(2);
1177 	inst[0] = JL_i8;
1178 	jump_inst1 = inst;
1179 
1180 	size1 = compiler->size;
1181 
1182 	compiler->mode32 = 0;
1183 	FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, src, 0));
1184 
1185 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1186 	FAIL_IF(!inst);
1187 	INC_SIZE(2);
1188 	inst[0] = JMP_i8;
1189 	jump_inst2 = inst;
1190 
1191 	size2 = compiler->size;
1192 
1193 	jump_inst1[1] = U8(size2 - size1);
1194 
1195 	if (src != TMP_REG1)
1196 		EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
1197 
1198 	EMIT_MOV(compiler, TMP_REG2, 0, src, 0);
1199 
1200 	inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 1, TMP_REG1, 0);
1201 	FAIL_IF(!inst);
1202 	inst[1] |= SHR;
1203 
1204 	compiler->mode32 = 1;
1205 	BINARY_IMM32(AND, 1, TMP_REG2, 0);
1206 
1207 	compiler->mode32 = 0;
1208 	inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG2, 0);
1209 	FAIL_IF(!inst);
1210 	inst[0] = OR_r_rm;
1211 
1212 	FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, TMP_REG1, 0));
1213 	compiler->mode32 = 1;
1214 	FAIL_IF(emit_groupf(compiler, ADDSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, dst_r, 0));
1215 
1216 	jump_inst2[1] = U8(compiler->size - size2);
1217 
1218 	if (dst_r == TMP_FREG)
1219 		return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
1220 	return SLJIT_SUCCESS;
1221 }
1222 
sljit_emit_fset(struct sljit_compiler * compiler,sljit_s32 freg,sljit_u8 rex,sljit_s32 is_zero)1223 static sljit_s32 sljit_emit_fset(struct sljit_compiler *compiler,
1224 	sljit_s32 freg, sljit_u8 rex, sljit_s32 is_zero)
1225 {
1226 	sljit_u8 *inst;
1227 	sljit_u32 size;
1228 
1229 	if (is_zero) {
1230 		rex = freg_map[freg] >= 8 ? (REX_R | REX_B) : 0;
1231 	} else {
1232 		if (freg_map[freg] >= 8)
1233 			rex |= REX_R;
1234 		if (reg_map[TMP_REG1] >= 8)
1235 			rex |= REX_B;
1236 	}
1237 
1238 	size = (rex != 0) ? 5 : 4;
1239 
1240 	inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
1241 	FAIL_IF(!inst);
1242 	INC_SIZE(size);
1243 
1244 	*inst++ = GROUP_66;
1245 	if (rex != 0)
1246 		*inst++ = rex;
1247 	inst[0] = GROUP_0F;
1248 
1249 	if (is_zero) {
1250 		inst[1] = PXOR_x_xm;
1251 		inst[2] = U8(freg_lmap[freg] | (freg_lmap[freg] << 3) | MOD_REG);
1252 	} else {
1253 		inst[1] = MOVD_x_rm;
1254 		inst[2] = U8(reg_lmap[TMP_REG1] | (freg_lmap[freg] << 3) | MOD_REG);
1255 	}
1256 
1257 	return SLJIT_SUCCESS;
1258 }
1259 
sljit_emit_fset32(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f32 value)1260 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
1261 	sljit_s32 freg, sljit_f32 value)
1262 {
1263 	union {
1264 		sljit_s32 imm;
1265 		sljit_f32 value;
1266 	} u;
1267 
1268 	CHECK_ERROR();
1269 	CHECK(check_sljit_emit_fset32(compiler, freg, value));
1270 
1271 	u.value = value;
1272 
1273 	if (u.imm != 0) {
1274 		compiler->mode32 = 1;
1275 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm);
1276 	}
1277 
1278 	return sljit_emit_fset(compiler, freg, 0, u.imm == 0);
1279 }
1280 
sljit_emit_fset64(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f64 value)1281 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
1282 	sljit_s32 freg, sljit_f64 value)
1283 {
1284 	union {
1285 		sljit_sw imm;
1286 		sljit_f64 value;
1287 	} u;
1288 
1289 	CHECK_ERROR();
1290 	CHECK(check_sljit_emit_fset64(compiler, freg, value));
1291 
1292 	u.value = value;
1293 
1294 	if (u.imm != 0) {
1295 		compiler->mode32 = 0;
1296 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, u.imm);
1297 	}
1298 
1299 	return sljit_emit_fset(compiler, freg, REX_W, u.imm == 0);
1300 }
1301 
sljit_emit_fcopy(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 freg,sljit_s32 reg)1302 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
1303 	sljit_s32 freg, sljit_s32 reg)
1304 {
1305 	sljit_u8 *inst;
1306 	sljit_u32 size;
1307 	sljit_u8 rex = 0;
1308 
1309 	CHECK_ERROR();
1310 	CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
1311 
1312 	if (!(op & SLJIT_32))
1313 		rex = REX_W;
1314 
1315 	if (freg_map[freg] >= 8)
1316 		rex |= REX_R;
1317 
1318 	if (reg_map[reg] >= 8)
1319 		rex |= REX_B;
1320 
1321 	size = (rex != 0) ? 5 : 4;
1322 
1323 	inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
1324 	FAIL_IF(!inst);
1325 	INC_SIZE(size);
1326 
1327 	*inst++ = GROUP_66;
1328 	if (rex != 0)
1329 		*inst++ = rex;
1330 	inst[0] = GROUP_0F;
1331 	inst[1] = GET_OPCODE(op) == SLJIT_COPY_TO_F64 ? MOVD_x_rm : MOVD_rm_x;
1332 	inst[2] = U8(reg_lmap[reg] | (freg_lmap[freg] << 3) | MOD_REG);
1333 
1334 	return SLJIT_SUCCESS;
1335 }
1336 
skip_frames_before_return(struct sljit_compiler * compiler)1337 static sljit_s32 skip_frames_before_return(struct sljit_compiler *compiler)
1338 {
1339 	sljit_s32 tmp, size;
1340 
1341 	/* Don't adjust shadow stack if it isn't enabled.  */
1342 	if (!cpu_has_shadow_stack())
1343 		return SLJIT_SUCCESS;
1344 
1345 	size = compiler->local_size;
1346 	tmp = compiler->scratches;
1347 	if (tmp >= SLJIT_FIRST_SAVED_REG)
1348 		size += (tmp - SLJIT_FIRST_SAVED_REG + 1) * SSIZE_OF(sw);
1349 	tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
1350 	if (SLJIT_S0 >= tmp)
1351 		size += (SLJIT_S0 - tmp + 1) * SSIZE_OF(sw);
1352 
1353 	return adjust_shadow_stack(compiler, SLJIT_MEM1(SLJIT_SP), size);
1354 }
1355