1 /*
2  *    Stack-less Just-In-Time compiler
3  *
4  *    Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without modification, are
7  * permitted provided that the following conditions are met:
8  *
9  *   1. Redistributions of source code must retain the above copyright notice, this list of
10  *      conditions and the following disclaimer.
11  *
12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13  *      of conditions and the following disclaimer in the documentation and/or other materials
14  *      provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 /* x86 64-bit arch dependent functions. */
28 
29 /* --------------------------------------------------------------------- */
30 /*  Operators                                                            */
31 /* --------------------------------------------------------------------- */
32 
emit_load_imm64(struct sljit_compiler * compiler,sljit_s32 reg,sljit_sw imm)33 static sljit_s32 emit_load_imm64(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm)
34 {
35 	sljit_u8 *inst;
36 
37 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + sizeof(sljit_sw));
38 	FAIL_IF(!inst);
39 	INC_SIZE(2 + sizeof(sljit_sw));
40 	*inst++ = REX_W | ((reg_map[reg] <= 7) ? 0 : REX_B);
41 	*inst++ = U8(MOV_r_i32 | (reg_map[reg] & 0x7));
42 	sljit_unaligned_store_sw(inst, imm);
43 	return SLJIT_SUCCESS;
44 }
45 
emit_do_imm32(struct sljit_compiler * compiler,sljit_u8 rex,sljit_u8 opcode,sljit_sw imm)46 static sljit_s32 emit_do_imm32(struct sljit_compiler *compiler, sljit_u8 rex, sljit_u8 opcode, sljit_sw imm)
47 {
48 	sljit_u8 *inst;
49 	sljit_uw length = (rex ? 2 : 1) + sizeof(sljit_s32);
50 
51 	inst = (sljit_u8*)ensure_buf(compiler, 1 + length);
52 	FAIL_IF(!inst);
53 	INC_SIZE(length);
54 	if (rex)
55 		*inst++ = rex;
56 	*inst++ = opcode;
57 	sljit_unaligned_store_s32(inst, (sljit_s32)imm);
58 	return SLJIT_SUCCESS;
59 }
60 
emit_x86_instruction(struct sljit_compiler * compiler,sljit_uw size,sljit_s32 a,sljit_sw imma,sljit_s32 b,sljit_sw immb)61 static sljit_u8* emit_x86_instruction(struct sljit_compiler *compiler, sljit_uw size,
62 	/* The register or immediate operand. */
63 	sljit_s32 a, sljit_sw imma,
64 	/* The general operand (not immediate). */
65 	sljit_s32 b, sljit_sw immb)
66 {
67 	sljit_u8 *inst;
68 	sljit_u8 *buf_ptr;
69 	sljit_u8 rex = 0;
70 	sljit_u8 reg_lmap_b;
71 	sljit_uw flags = size;
72 	sljit_uw inst_size;
73 
74 	/* The immediate operand must be 32 bit. */
75 	SLJIT_ASSERT(!(a & SLJIT_IMM) || compiler->mode32 || IS_HALFWORD(imma));
76 	/* Both cannot be switched on. */
77 	SLJIT_ASSERT((flags & (EX86_BIN_INS | EX86_SHIFT_INS)) != (EX86_BIN_INS | EX86_SHIFT_INS));
78 	/* Size flags not allowed for typed instructions. */
79 	SLJIT_ASSERT(!(flags & (EX86_BIN_INS | EX86_SHIFT_INS)) || (flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) == 0);
80 	/* Both size flags cannot be switched on. */
81 	SLJIT_ASSERT((flags & (EX86_BYTE_ARG | EX86_HALF_ARG)) != (EX86_BYTE_ARG | EX86_HALF_ARG));
82 	/* SSE2 and immediate is not possible. */
83 	SLJIT_ASSERT(!(a & SLJIT_IMM) || !(flags & EX86_SSE2));
84 	SLJIT_ASSERT((flags & (EX86_PREF_F2 | EX86_PREF_F3)) != (EX86_PREF_F2 | EX86_PREF_F3)
85 		&& (flags & (EX86_PREF_F2 | EX86_PREF_66)) != (EX86_PREF_F2 | EX86_PREF_66)
86 		&& (flags & (EX86_PREF_F3 | EX86_PREF_66)) != (EX86_PREF_F3 | EX86_PREF_66));
87 
88 	size &= 0xf;
89 	inst_size = size;
90 
91 	if (!compiler->mode32 && !(flags & EX86_NO_REXW))
92 		rex |= REX_W;
93 	else if (flags & EX86_REX)
94 		rex |= REX;
95 
96 	if (flags & (EX86_PREF_F2 | EX86_PREF_F3))
97 		inst_size++;
98 	if (flags & EX86_PREF_66)
99 		inst_size++;
100 
101 	/* Calculate size of b. */
102 	inst_size += 1; /* mod r/m byte. */
103 	if (b & SLJIT_MEM) {
104 		if (!(b & OFFS_REG_MASK)) {
105 			if (NOT_HALFWORD(immb)) {
106 				PTR_FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immb));
107 				immb = 0;
108 				if (b & REG_MASK)
109 					b |= TO_OFFS_REG(TMP_REG2);
110 				else
111 					b |= TMP_REG2;
112 			}
113 			else if (reg_lmap[b & REG_MASK] == 4)
114 				b |= TO_OFFS_REG(SLJIT_SP);
115 		}
116 
117 		if (!(b & REG_MASK))
118 			inst_size += 1 + sizeof(sljit_s32); /* SIB byte required to avoid RIP based addressing. */
119 		else {
120 			if (reg_map[b & REG_MASK] >= 8)
121 				rex |= REX_B;
122 
123 			if (immb != 0 && (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP))) {
124 				/* Immediate operand. */
125 				if (immb <= 127 && immb >= -128)
126 					inst_size += sizeof(sljit_s8);
127 				else
128 					inst_size += sizeof(sljit_s32);
129 			}
130 			else if (reg_lmap[b & REG_MASK] == 5)
131 				inst_size += sizeof(sljit_s8);
132 
133 			if (b & OFFS_REG_MASK) {
134 				inst_size += 1; /* SIB byte. */
135 				if (reg_map[OFFS_REG(b)] >= 8)
136 					rex |= REX_X;
137 			}
138 		}
139 	}
140 	else if (!(flags & EX86_SSE2_OP2)) {
141 		if (reg_map[b] >= 8)
142 			rex |= REX_B;
143 	}
144 	else if (freg_map[b] >= 8)
145 		rex |= REX_B;
146 
147 	if (a & SLJIT_IMM) {
148 		if (flags & EX86_BIN_INS) {
149 			if (imma <= 127 && imma >= -128) {
150 				inst_size += 1;
151 				flags |= EX86_BYTE_ARG;
152 			} else
153 				inst_size += 4;
154 		}
155 		else if (flags & EX86_SHIFT_INS) {
156 			imma &= compiler->mode32 ? 0x1f : 0x3f;
157 			if (imma != 1) {
158 				inst_size ++;
159 				flags |= EX86_BYTE_ARG;
160 			}
161 		} else if (flags & EX86_BYTE_ARG)
162 			inst_size++;
163 		else if (flags & EX86_HALF_ARG)
164 			inst_size += sizeof(short);
165 		else
166 			inst_size += sizeof(sljit_s32);
167 	}
168 	else {
169 		SLJIT_ASSERT(!(flags & EX86_SHIFT_INS) || a == SLJIT_PREF_SHIFT_REG);
170 		/* reg_map[SLJIT_PREF_SHIFT_REG] is less than 8. */
171 		if (!(flags & EX86_SSE2_OP1)) {
172 			if (reg_map[a] >= 8)
173 				rex |= REX_R;
174 		}
175 		else if (freg_map[a] >= 8)
176 			rex |= REX_R;
177 	}
178 
179 	if (rex)
180 		inst_size++;
181 
182 	inst = (sljit_u8*)ensure_buf(compiler, 1 + inst_size);
183 	PTR_FAIL_IF(!inst);
184 
185 	/* Encoding the byte. */
186 	INC_SIZE(inst_size);
187 	if (flags & EX86_PREF_F2)
188 		*inst++ = 0xf2;
189 	if (flags & EX86_PREF_F3)
190 		*inst++ = 0xf3;
191 	if (flags & EX86_PREF_66)
192 		*inst++ = 0x66;
193 	if (rex)
194 		*inst++ = rex;
195 	buf_ptr = inst + size;
196 
197 	/* Encode mod/rm byte. */
198 	if (!(flags & EX86_SHIFT_INS)) {
199 		if ((flags & EX86_BIN_INS) && (a & SLJIT_IMM))
200 			*inst = (flags & EX86_BYTE_ARG) ? GROUP_BINARY_83 : GROUP_BINARY_81;
201 
202 		if (a & SLJIT_IMM)
203 			*buf_ptr = 0;
204 		else if (!(flags & EX86_SSE2_OP1))
205 			*buf_ptr = U8(reg_lmap[a] << 3);
206 		else
207 			*buf_ptr = U8(freg_lmap[a] << 3);
208 	}
209 	else {
210 		if (a & SLJIT_IMM) {
211 			if (imma == 1)
212 				*inst = GROUP_SHIFT_1;
213 			else
214 				*inst = GROUP_SHIFT_N;
215 		} else
216 			*inst = GROUP_SHIFT_CL;
217 		*buf_ptr = 0;
218 	}
219 
220 	if (!(b & SLJIT_MEM)) {
221 		*buf_ptr = U8(*buf_ptr | MOD_REG | (!(flags & EX86_SSE2_OP2) ? reg_lmap[b] : freg_lmap[b]));
222 		buf_ptr++;
223 	} else if (b & REG_MASK) {
224 		reg_lmap_b = reg_lmap[b & REG_MASK];
225 
226 		if (!(b & OFFS_REG_MASK) || (b & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_SP) || reg_lmap_b == 5) {
227 			if (immb != 0 || reg_lmap_b == 5) {
228 				if (immb <= 127 && immb >= -128)
229 					*buf_ptr |= 0x40;
230 				else
231 					*buf_ptr |= 0x80;
232 			}
233 
234 			if (!(b & OFFS_REG_MASK))
235 				*buf_ptr++ |= reg_lmap_b;
236 			else {
237 				*buf_ptr++ |= 0x04;
238 				*buf_ptr++ = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3));
239 			}
240 
241 			if (immb != 0 || reg_lmap_b == 5) {
242 				if (immb <= 127 && immb >= -128)
243 					*buf_ptr++ = U8(immb); /* 8 bit displacement. */
244 				else {
245 					sljit_unaligned_store_s32(buf_ptr, (sljit_s32)immb); /* 32 bit displacement. */
246 					buf_ptr += sizeof(sljit_s32);
247 				}
248 			}
249 		}
250 		else {
251 			*buf_ptr++ |= 0x04;
252 			*buf_ptr++ = U8(reg_lmap_b | (reg_lmap[OFFS_REG(b)] << 3) | (immb << 6));
253 		}
254 	}
255 	else {
256 		*buf_ptr++ |= 0x04;
257 		*buf_ptr++ = 0x25;
258 		sljit_unaligned_store_s32(buf_ptr, (sljit_s32)immb); /* 32 bit displacement. */
259 		buf_ptr += sizeof(sljit_s32);
260 	}
261 
262 	if (a & SLJIT_IMM) {
263 		if (flags & EX86_BYTE_ARG)
264 			*buf_ptr = U8(imma);
265 		else if (flags & EX86_HALF_ARG)
266 			sljit_unaligned_store_s16(buf_ptr, (sljit_s16)imma);
267 		else if (!(flags & EX86_SHIFT_INS))
268 			sljit_unaligned_store_s32(buf_ptr, (sljit_s32)imma);
269 	}
270 
271 	return !(flags & EX86_SHIFT_INS) ? inst : (inst + 1);
272 }
273 
274 /* --------------------------------------------------------------------- */
275 /*  Enter / return                                                       */
276 /* --------------------------------------------------------------------- */
277 
generate_far_jump_code(struct sljit_jump * jump,sljit_u8 * code_ptr)278 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr)
279 {
280 	sljit_uw type = jump->flags >> TYPE_SHIFT;
281 
282 	int short_addr = !(jump->flags & SLJIT_REWRITABLE_JUMP) && !(jump->flags & JUMP_LABEL) && (jump->u.target <= 0xffffffff);
283 
284 	/* The relative jump below specialized for this case. */
285 	SLJIT_ASSERT(reg_map[TMP_REG2] >= 8);
286 
287 	if (type < SLJIT_JUMP) {
288 		/* Invert type. */
289 		*code_ptr++ = U8(get_jump_code(type ^ 0x1) - 0x10);
290 		*code_ptr++ = short_addr ? (6 + 3) : (10 + 3);
291 	}
292 
293 	*code_ptr++ = short_addr ? REX_B : (REX_W | REX_B);
294 	*code_ptr++ = MOV_r_i32 | reg_lmap[TMP_REG2];
295 	jump->addr = (sljit_uw)code_ptr;
296 
297 	if (jump->flags & JUMP_LABEL)
298 		jump->flags |= PATCH_MD;
299 	else if (short_addr)
300 		sljit_unaligned_store_s32(code_ptr, (sljit_s32)jump->u.target);
301 	else
302 		sljit_unaligned_store_sw(code_ptr, (sljit_sw)jump->u.target);
303 
304 	code_ptr += short_addr ? sizeof(sljit_s32) : sizeof(sljit_sw);
305 
306 	*code_ptr++ = REX_B;
307 	*code_ptr++ = GROUP_FF;
308 	*code_ptr++ = U8(MOD_REG | (type >= SLJIT_FAST_CALL ? CALL_rm : JMP_rm) | reg_lmap[TMP_REG2]);
309 
310 	return code_ptr;
311 }
312 
generate_put_label_code(struct sljit_put_label * put_label,sljit_u8 * code_ptr,sljit_uw max_label)313 static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, sljit_u8 *code_ptr, sljit_uw max_label)
314 {
315 	if (max_label > HALFWORD_MAX) {
316 		put_label->addr -= put_label->flags;
317 		put_label->flags = PATCH_MD;
318 		return code_ptr;
319 	}
320 
321 	if (put_label->flags == 0) {
322 		/* Destination is register. */
323 		code_ptr = (sljit_u8*)put_label->addr - 2 - sizeof(sljit_uw);
324 
325 		SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);
326 		SLJIT_ASSERT((code_ptr[1] & 0xf8) == MOV_r_i32);
327 
328 		if ((code_ptr[0] & 0x07) != 0) {
329 			code_ptr[0] = U8(code_ptr[0] & ~0x08);
330 			code_ptr += 2 + sizeof(sljit_s32);
331 		}
332 		else {
333 			code_ptr[0] = code_ptr[1];
334 			code_ptr += 1 + sizeof(sljit_s32);
335 		}
336 
337 		put_label->addr = (sljit_uw)code_ptr;
338 		return code_ptr;
339 	}
340 
341 	code_ptr -= put_label->flags + (2 + sizeof(sljit_uw));
342 	SLJIT_MEMMOVE(code_ptr, code_ptr + (2 + sizeof(sljit_uw)), put_label->flags);
343 
344 	SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);
345 
346 	if ((code_ptr[1] & 0xf8) == MOV_r_i32) {
347 		code_ptr += 2 + sizeof(sljit_uw);
348 		SLJIT_ASSERT((code_ptr[0] & 0xf8) == REX_W);
349 	}
350 
351 	SLJIT_ASSERT(code_ptr[1] == MOV_rm_r);
352 
353 	code_ptr[0] = U8(code_ptr[0] & ~0x4);
354 	code_ptr[1] = MOV_rm_i32;
355 	code_ptr[2] = U8(code_ptr[2] & ~(0x7 << 3));
356 
357 	code_ptr = (sljit_u8*)(put_label->addr - (2 + sizeof(sljit_uw)) + sizeof(sljit_s32));
358 	put_label->addr = (sljit_uw)code_ptr;
359 	put_label->flags = 0;
360 	return code_ptr;
361 }
362 
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)363 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
364 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
365 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
366 {
367 	sljit_uw size;
368 	sljit_s32 word_arg_count = 0;
369 	sljit_s32 saved_arg_count = 0;
370 	sljit_s32 saved_regs_size, tmp, i;
371 #ifdef _WIN64
372 	sljit_s32 saved_float_regs_size;
373 	sljit_s32 saved_float_regs_offset = 0;
374 	sljit_s32 float_arg_count = 0;
375 #endif /* _WIN64 */
376 	sljit_u8 *inst;
377 
378 	CHECK_ERROR();
379 	CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
380 	set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
381 
382 	/* Emit ENDBR64 at function entry if needed.  */
383 	FAIL_IF(emit_endbranch(compiler));
384 
385 	compiler->mode32 = 0;
386 
387 	/* Including the return address saved by the call instruction. */
388 	saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
389 
390 	tmp = SLJIT_S0 - saveds;
391 	for (i = SLJIT_S0; i > tmp; i--) {
392 		size = reg_map[i] >= 8 ? 2 : 1;
393 		inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
394 		FAIL_IF(!inst);
395 		INC_SIZE(size);
396 		if (reg_map[i] >= 8)
397 			*inst++ = REX_B;
398 		PUSH_REG(reg_lmap[i]);
399 	}
400 
401 	for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
402 		size = reg_map[i] >= 8 ? 2 : 1;
403 		inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
404 		FAIL_IF(!inst);
405 		INC_SIZE(size);
406 		if (reg_map[i] >= 8)
407 			*inst++ = REX_B;
408 		PUSH_REG(reg_lmap[i]);
409 	}
410 
411 #ifdef _WIN64
412 	local_size += SLJIT_LOCALS_OFFSET;
413 	saved_float_regs_size = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, 16);
414 
415 	if (saved_float_regs_size > 0) {
416 		saved_float_regs_offset = ((local_size + 0xf) & ~0xf);
417 		local_size = saved_float_regs_offset + saved_float_regs_size;
418 	}
419 #else /* !_WIN64 */
420 	SLJIT_ASSERT(SLJIT_LOCALS_OFFSET == 0);
421 #endif /* _WIN64 */
422 
423 	arg_types >>= SLJIT_ARG_SHIFT;
424 
425 	while (arg_types > 0) {
426 		if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
427 			tmp = 0;
428 #ifndef _WIN64
429 			switch (word_arg_count) {
430 			case 0:
431 				tmp = SLJIT_R2;
432 				break;
433 			case 1:
434 				tmp = SLJIT_R1;
435 				break;
436 			case 2:
437 				tmp = TMP_REG1;
438 				break;
439 			default:
440 				tmp = SLJIT_R3;
441 				break;
442 			}
443 #else /* !_WIN64 */
444 			switch (word_arg_count + float_arg_count) {
445 			case 0:
446 				tmp = SLJIT_R3;
447 				break;
448 			case 1:
449 				tmp = SLJIT_R1;
450 				break;
451 			case 2:
452 				tmp = SLJIT_R2;
453 				break;
454 			default:
455 				tmp = TMP_REG1;
456 				break;
457 			}
458 #endif /* _WIN64 */
459 			if (arg_types & SLJIT_ARG_TYPE_SCRATCH_REG) {
460 				if (tmp != SLJIT_R0 + word_arg_count)
461 					EMIT_MOV(compiler, SLJIT_R0 + word_arg_count, 0, tmp, 0);
462 			} else {
463 				EMIT_MOV(compiler, SLJIT_S0 - saved_arg_count, 0, tmp, 0);
464 				saved_arg_count++;
465 			}
466 			word_arg_count++;
467 		} else {
468 #ifdef _WIN64
469 			SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start);
470 			float_arg_count++;
471 			if (float_arg_count != float_arg_count + word_arg_count)
472 				FAIL_IF(emit_sse2_load(compiler, (arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32,
473 					float_arg_count, float_arg_count + word_arg_count, 0));
474 #endif /* _WIN64 */
475 		}
476 		arg_types >>= SLJIT_ARG_SHIFT;
477 	}
478 
479 	local_size = ((local_size + saved_regs_size + 0xf) & ~0xf) - saved_regs_size;
480 	compiler->local_size = local_size;
481 
482 #ifdef _WIN64
483 	if (local_size > 0) {
484 		if (local_size <= 4 * 4096) {
485 			if (local_size > 4096)
486 				EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096);
487 			if (local_size > 2 * 4096)
488 				EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 2);
489 			if (local_size > 3 * 4096)
490 				EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -4096 * 3);
491 		}
492 		else {
493 			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, local_size >> 12);
494 
495 			EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_MEM1(SLJIT_SP), -4096);
496 			BINARY_IMM32(SUB, 4096, SLJIT_SP, 0);
497 			BINARY_IMM32(SUB, 1, TMP_REG1, 0);
498 
499 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
500 			FAIL_IF(!inst);
501 
502 			INC_SIZE(2);
503 			inst[0] = JNE_i8;
504 			inst[1] = (sljit_u8)-21;
505 			local_size &= 0xfff;
506 		}
507 
508 		if (local_size > 0)
509 			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(SLJIT_SP), -local_size);
510 	}
511 #endif /* _WIN64 */
512 
513 	if (local_size > 0)
514 		BINARY_IMM32(SUB, local_size, SLJIT_SP, 0);
515 
516 #ifdef _WIN64
517 	if (saved_float_regs_size > 0) {
518 		compiler->mode32 = 1;
519 
520 		tmp = SLJIT_FS0 - fsaveds;
521 		for (i = SLJIT_FS0; i > tmp; i--) {
522 			inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset);
523 			*inst++ = GROUP_0F;
524 			*inst = MOVAPS_xm_x;
525 			saved_float_regs_offset += 16;
526 		}
527 
528 		for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
529 			inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset);
530 			*inst++ = GROUP_0F;
531 			*inst = MOVAPS_xm_x;
532 			saved_float_regs_offset += 16;
533 		}
534 	}
535 #endif /* _WIN64 */
536 
537 	return SLJIT_SUCCESS;
538 }
539 
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)540 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
541 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
542 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
543 {
544 	sljit_s32 saved_regs_size;
545 #ifdef _WIN64
546 	sljit_s32 saved_float_regs_size;
547 #endif /* _WIN64 */
548 
549 	CHECK_ERROR();
550 	CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
551 	set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
552 
553 #ifdef _WIN64
554 	local_size += SLJIT_LOCALS_OFFSET;
555 	saved_float_regs_size = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, 16);
556 
557 	if (saved_float_regs_size > 0)
558 		local_size = ((local_size + 0xf) & ~0xf) + saved_float_regs_size;
559 #else /* !_WIN64 */
560 	SLJIT_ASSERT(SLJIT_LOCALS_OFFSET == 0);
561 #endif /* _WIN64 */
562 
563 	/* Including the return address saved by the call instruction. */
564 	saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1);
565 	compiler->local_size = ((local_size + saved_regs_size + 0xf) & ~0xf) - saved_regs_size;
566 	return SLJIT_SUCCESS;
567 }
568 
emit_stack_frame_release(struct sljit_compiler * compiler)569 static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler)
570 {
571 	sljit_uw size;
572 	sljit_s32 i, tmp;
573 	sljit_u8 *inst;
574 #ifdef _WIN64
575 	sljit_s32 saved_float_regs_offset;
576 	sljit_s32 fscratches = compiler->fscratches;
577 	sljit_s32 fsaveds = compiler->fsaveds;
578 #endif /* _WIN64 */
579 
580 #ifdef _WIN64
581 	saved_float_regs_offset = GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, 16);
582 
583 	if (saved_float_regs_offset > 0) {
584 		compiler->mode32 = 1;
585 		saved_float_regs_offset = (compiler->local_size - saved_float_regs_offset) & ~0xf;
586 
587 		tmp = SLJIT_FS0 - fsaveds;
588 		for (i = SLJIT_FS0; i > tmp; i--) {
589 			inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset);
590 			*inst++ = GROUP_0F;
591 			*inst = MOVAPS_x_xm;
592 			saved_float_regs_offset += 16;
593 		}
594 
595 		for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
596 			inst = emit_x86_instruction(compiler, 2 | EX86_SSE2, i, 0, SLJIT_MEM1(SLJIT_SP), saved_float_regs_offset);
597 			*inst++ = GROUP_0F;
598 			*inst = MOVAPS_x_xm;
599 			saved_float_regs_offset += 16;
600 		}
601 	}
602 #endif /* _WIN64 */
603 
604 	if (compiler->local_size > 0) {
605 		if (compiler->local_size <= 127) {
606 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
607 			FAIL_IF(!inst);
608 			INC_SIZE(4);
609 			*inst++ = REX_W;
610 			*inst++ = GROUP_BINARY_83;
611 			*inst++ = MOD_REG | ADD | 4;
612 			*inst = U8(compiler->local_size);
613 		}
614 		else {
615 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 7);
616 			FAIL_IF(!inst);
617 			INC_SIZE(7);
618 			*inst++ = REX_W;
619 			*inst++ = GROUP_BINARY_81;
620 			*inst++ = MOD_REG | ADD | 4;
621 			sljit_unaligned_store_s32(inst, compiler->local_size);
622 		}
623 	}
624 
625 	tmp = compiler->scratches;
626 	for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) {
627 		size = reg_map[i] >= 8 ? 2 : 1;
628 		inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
629 		FAIL_IF(!inst);
630 		INC_SIZE(size);
631 		if (reg_map[i] >= 8)
632 			*inst++ = REX_B;
633 		POP_REG(reg_lmap[i]);
634 	}
635 
636 	tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
637 	for (i = tmp; i <= SLJIT_S0; i++) {
638 		size = reg_map[i] >= 8 ? 2 : 1;
639 		inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
640 		FAIL_IF(!inst);
641 		INC_SIZE(size);
642 		if (reg_map[i] >= 8)
643 			*inst++ = REX_B;
644 		POP_REG(reg_lmap[i]);
645 	}
646 
647 	return SLJIT_SUCCESS;
648 }
649 
sljit_emit_return_void(struct sljit_compiler * compiler)650 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
651 {
652 	sljit_u8 *inst;
653 
654 	CHECK_ERROR();
655 	CHECK(check_sljit_emit_return_void(compiler));
656 
657 	FAIL_IF(emit_stack_frame_release(compiler));
658 
659 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
660 	FAIL_IF(!inst);
661 	INC_SIZE(1);
662 	RET();
663 	return SLJIT_SUCCESS;
664 }
665 
666 /* --------------------------------------------------------------------- */
667 /*  Call / return instructions                                           */
668 /* --------------------------------------------------------------------- */
669 
670 #ifndef _WIN64
671 
call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types,sljit_s32 * src_ptr)672 static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr)
673 {
674 	sljit_s32 src = src_ptr ? (*src_ptr) : 0;
675 	sljit_s32 word_arg_count = 0;
676 
677 	SLJIT_ASSERT(reg_map[SLJIT_R1] == 6 && reg_map[SLJIT_R3] == 1 && reg_map[TMP_REG1] == 2);
678 	SLJIT_ASSERT(!(src & SLJIT_MEM));
679 
680 	/* Remove return value. */
681 	arg_types >>= SLJIT_ARG_SHIFT;
682 
683 	while (arg_types) {
684 		if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64)
685 			word_arg_count++;
686 		arg_types >>= SLJIT_ARG_SHIFT;
687 	}
688 
689 	if (word_arg_count == 0)
690 		return SLJIT_SUCCESS;
691 
692 	if (word_arg_count >= 3) {
693 		if (src == SLJIT_R2)
694 			*src_ptr = TMP_REG1;
695 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R2, 0);
696 	}
697 
698 	return emit_mov(compiler, SLJIT_R2, 0, SLJIT_R0, 0);
699 }
700 
701 #else
702 
call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types,sljit_s32 * src_ptr)703 static sljit_s32 call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src_ptr)
704 {
705 	sljit_s32 src = src_ptr ? (*src_ptr) : 0;
706 	sljit_s32 arg_count = 0;
707 	sljit_s32 word_arg_count = 0;
708 	sljit_s32 float_arg_count = 0;
709 	sljit_s32 types = 0;
710 	sljit_s32 data_trandfer = 0;
711 	static sljit_u8 word_arg_regs[5] = { 0, SLJIT_R3, SLJIT_R1, SLJIT_R2, TMP_REG1 };
712 
713 	SLJIT_ASSERT(reg_map[SLJIT_R3] == 1 && reg_map[SLJIT_R1] == 2 && reg_map[SLJIT_R2] == 8 && reg_map[TMP_REG1] == 9);
714 	SLJIT_ASSERT(!(src & SLJIT_MEM));
715 
716 	arg_types >>= SLJIT_ARG_SHIFT;
717 
718 	while (arg_types) {
719 		types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK);
720 
721 		switch (arg_types & SLJIT_ARG_MASK) {
722 		case SLJIT_ARG_TYPE_F64:
723 		case SLJIT_ARG_TYPE_F32:
724 			arg_count++;
725 			float_arg_count++;
726 
727 			if (arg_count != float_arg_count)
728 				data_trandfer = 1;
729 			break;
730 		default:
731 			arg_count++;
732 			word_arg_count++;
733 
734 			if (arg_count != word_arg_count || arg_count != word_arg_regs[arg_count]) {
735 				data_trandfer = 1;
736 
737 				if (src == word_arg_regs[arg_count]) {
738 					EMIT_MOV(compiler, TMP_REG2, 0, src, 0);
739 					*src_ptr = TMP_REG2;
740 				}
741 			}
742 			break;
743 		}
744 
745 		arg_types >>= SLJIT_ARG_SHIFT;
746 	}
747 
748 	if (!data_trandfer)
749 		return SLJIT_SUCCESS;
750 
751 	while (types) {
752 		switch (types & SLJIT_ARG_MASK) {
753 		case SLJIT_ARG_TYPE_F64:
754 			if (arg_count != float_arg_count)
755 				FAIL_IF(emit_sse2_load(compiler, 0, arg_count, float_arg_count, 0));
756 			arg_count--;
757 			float_arg_count--;
758 			break;
759 		case SLJIT_ARG_TYPE_F32:
760 			if (arg_count != float_arg_count)
761 				FAIL_IF(emit_sse2_load(compiler, 1, arg_count, float_arg_count, 0));
762 			arg_count--;
763 			float_arg_count--;
764 			break;
765 		default:
766 			if (arg_count != word_arg_count || arg_count != word_arg_regs[arg_count])
767 				EMIT_MOV(compiler, word_arg_regs[arg_count], 0, word_arg_count, 0);
768 			arg_count--;
769 			word_arg_count--;
770 			break;
771 		}
772 
773 		types >>= SLJIT_ARG_SHIFT;
774 	}
775 
776 	return SLJIT_SUCCESS;
777 }
778 
779 #endif
780 
sljit_emit_call(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types)781 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
782 	sljit_s32 arg_types)
783 {
784 	CHECK_ERROR_PTR();
785 	CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
786 
787 	compiler->mode32 = 0;
788 
789 	PTR_FAIL_IF(call_with_args(compiler, arg_types, NULL));
790 
791 	if (type & SLJIT_CALL_RETURN) {
792 		PTR_FAIL_IF(emit_stack_frame_release(compiler));
793 		type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
794 	}
795 
796 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
797 		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
798 	compiler->skip_checks = 1;
799 #endif
800 	return sljit_emit_jump(compiler, type);
801 }
802 
sljit_emit_icall(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)803 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
804 	sljit_s32 arg_types,
805 	sljit_s32 src, sljit_sw srcw)
806 {
807 	CHECK_ERROR();
808 	CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
809 
810 	compiler->mode32 = 0;
811 
812 	if (src & SLJIT_MEM) {
813 		ADJUST_LOCAL_OFFSET(src, srcw);
814 		EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);
815 		src = TMP_REG2;
816 	}
817 
818 	if (type & SLJIT_CALL_RETURN) {
819 		if (src >= SLJIT_FIRST_SAVED_REG && src <= SLJIT_S0) {
820 			EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);
821 			src = TMP_REG2;
822 		}
823 
824 		FAIL_IF(emit_stack_frame_release(compiler));
825 		type = SLJIT_JUMP;
826 	}
827 
828 	FAIL_IF(call_with_args(compiler, arg_types, &src));
829 
830 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
831 		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
832 	compiler->skip_checks = 1;
833 #endif
834 
835 	return sljit_emit_ijump(compiler, type, src, srcw);
836 }
837 
sljit_emit_fast_enter(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)838 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
839 {
840 	sljit_u8 *inst;
841 
842 	CHECK_ERROR();
843 	CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
844 	ADJUST_LOCAL_OFFSET(dst, dstw);
845 
846 	if (FAST_IS_REG(dst)) {
847 		if (reg_map[dst] < 8) {
848 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
849 			FAIL_IF(!inst);
850 			INC_SIZE(1);
851 			POP_REG(reg_lmap[dst]);
852 			return SLJIT_SUCCESS;
853 		}
854 
855 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
856 		FAIL_IF(!inst);
857 		INC_SIZE(2);
858 		*inst++ = REX_B;
859 		POP_REG(reg_lmap[dst]);
860 		return SLJIT_SUCCESS;
861 	}
862 
863 	/* REX_W is not necessary (src is not immediate). */
864 	compiler->mode32 = 1;
865 	inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
866 	FAIL_IF(!inst);
867 	*inst++ = POP_rm;
868 	return SLJIT_SUCCESS;
869 }
870 
emit_fast_return(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)871 static sljit_s32 emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
872 {
873 	sljit_u8 *inst;
874 
875 	if (FAST_IS_REG(src)) {
876 		if (reg_map[src] < 8) {
877 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 1);
878 			FAIL_IF(!inst);
879 
880 			INC_SIZE(1 + 1);
881 			PUSH_REG(reg_lmap[src]);
882 		}
883 		else {
884 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 1);
885 			FAIL_IF(!inst);
886 
887 			INC_SIZE(2 + 1);
888 			*inst++ = REX_B;
889 			PUSH_REG(reg_lmap[src]);
890 		}
891 	}
892 	else {
893 		/* REX_W is not necessary (src is not immediate). */
894 		compiler->mode32 = 1;
895 		inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
896 		FAIL_IF(!inst);
897 		*inst++ = GROUP_FF;
898 		*inst |= PUSH_rm;
899 
900 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
901 		FAIL_IF(!inst);
902 		INC_SIZE(1);
903 	}
904 
905 	RET();
906 	return SLJIT_SUCCESS;
907 }
908 
909 /* --------------------------------------------------------------------- */
910 /*  Extend input                                                         */
911 /* --------------------------------------------------------------------- */
912 
emit_mov_int(struct sljit_compiler * compiler,sljit_s32 sign,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)913 static sljit_s32 emit_mov_int(struct sljit_compiler *compiler, sljit_s32 sign,
914 	sljit_s32 dst, sljit_sw dstw,
915 	sljit_s32 src, sljit_sw srcw)
916 {
917 	sljit_u8* inst;
918 	sljit_s32 dst_r;
919 
920 	compiler->mode32 = 0;
921 
922 	if (src & SLJIT_IMM) {
923 		if (FAST_IS_REG(dst)) {
924 			if (sign || ((sljit_uw)srcw <= 0x7fffffff)) {
925 				inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw);
926 				FAIL_IF(!inst);
927 				*inst = MOV_rm_i32;
928 				return SLJIT_SUCCESS;
929 			}
930 			return emit_load_imm64(compiler, dst, srcw);
931 		}
932 		compiler->mode32 = 1;
933 		inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, (sljit_sw)(sljit_s32)srcw, dst, dstw);
934 		FAIL_IF(!inst);
935 		*inst = MOV_rm_i32;
936 		compiler->mode32 = 0;
937 		return SLJIT_SUCCESS;
938 	}
939 
940 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
941 
942 	if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
943 		dst_r = src;
944 	else {
945 		if (sign) {
946 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src, srcw);
947 			FAIL_IF(!inst);
948 			*inst++ = MOVSXD_r_rm;
949 		} else {
950 			compiler->mode32 = 1;
951 			FAIL_IF(emit_mov(compiler, dst_r, 0, src, srcw));
952 			compiler->mode32 = 0;
953 		}
954 	}
955 
956 	if (dst & SLJIT_MEM) {
957 		compiler->mode32 = 1;
958 		inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
959 		FAIL_IF(!inst);
960 		*inst = MOV_rm_r;
961 		compiler->mode32 = 0;
962 	}
963 
964 	return SLJIT_SUCCESS;
965 }
966 
skip_frames_before_return(struct sljit_compiler * compiler)967 static sljit_s32 skip_frames_before_return(struct sljit_compiler *compiler)
968 {
969 	sljit_s32 tmp, size;
970 
971 	/* Don't adjust shadow stack if it isn't enabled.  */
972 	if (!cpu_has_shadow_stack())
973 		return SLJIT_SUCCESS;
974 
975 	size = compiler->local_size;
976 	tmp = compiler->scratches;
977 	if (tmp >= SLJIT_FIRST_SAVED_REG)
978 		size += (tmp - SLJIT_FIRST_SAVED_REG + 1) * SSIZE_OF(sw);
979 	tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
980 	if (SLJIT_S0 >= tmp)
981 		size += (SLJIT_S0 - tmp + 1) * SSIZE_OF(sw);
982 
983 	return adjust_shadow_stack(compiler, SLJIT_MEM1(SLJIT_SP), size);
984 }
985