1 /*
2  *    Stack-less Just-In-Time compiler
3  *
4  *    Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without modification, are
7  * permitted provided that the following conditions are met:
8  *
9  *   1. Redistributions of source code must retain the above copyright notice, this list of
10  *      conditions and the following disclaimer.
11  *
12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13  *      of conditions and the following disclaimer in the documentation and/or other materials
14  *      provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
sljit_get_platform_name(void)27 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
28 {
29 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
30 	return "x86" SLJIT_CPUINFO " ABI:fastcall";
31 #else
32 	return "x86" SLJIT_CPUINFO;
33 #endif
34 }
35 
36 /*
37    32b register indexes:
38      0 - EAX
39      1 - ECX
40      2 - EDX
41      3 - EBX
42      4 - ESP
43      5 - EBP
44      6 - ESI
45      7 - EDI
46 */
47 
48 /*
49    64b register indexes:
50      0 - RAX
51      1 - RCX
52      2 - RDX
53      3 - RBX
54      4 - RSP
55      5 - RBP
56      6 - RSI
57      7 - RDI
58      8 - R8   - From now on REX prefix is required
59      9 - R9
60     10 - R10
61     11 - R11
62     12 - R12
63     13 - R13
64     14 - R14
65     15 - R15
66 */
67 
68 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
69 
70 /* Last register + 1. */
71 #define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
72 
73 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
74 	0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 7, 6, 3, 4, 5
75 };
76 
77 #define CHECK_EXTRA_REGS(p, w, do) \
78 	if (p >= SLJIT_R3 && p <= SLJIT_S3) { \
79 		if (p <= compiler->scratches) \
80 			w = compiler->saveds_offset - ((p) - SLJIT_R2) * (sljit_sw)sizeof(sljit_sw); \
81 		else \
82 			w = compiler->locals_offset + ((p) - SLJIT_S2) * (sljit_sw)sizeof(sljit_sw); \
83 		p = SLJIT_MEM1(SLJIT_SP); \
84 		do; \
85 	}
86 
87 #else /* SLJIT_CONFIG_X86_32 */
88 
89 /* Last register + 1. */
90 #define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
91 #define TMP_REG2	(SLJIT_NUMBER_OF_REGISTERS + 3)
92 
93 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
94    Note: avoid to use r12 and r13 for memory addessing
95    therefore r12 is better to be a higher saved register. */
96 #ifndef _WIN64
97 /* Args: rdi(=7), rsi(=6), rdx(=2), rcx(=1), r8, r9. Scratches: rax(=0), r10, r11 */
98 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
99 	0, 0, 6, 7, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 9
100 };
101 /* low-map. reg_map & 0x7. */
102 static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
103 	0, 0, 6, 7, 1, 0, 3,  2,  4,  5,  5,  6,  7, 3, 4, 2, 1
104 };
105 #else
106 /* Args: rcx(=1), rdx(=2), r8, r9. Scratches: rax(=0), r10, r11 */
107 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
108 	0, 0, 2, 8, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 9, 10
109 };
110 /* low-map. reg_map & 0x7. */
111 static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
112 	0, 0, 2, 0, 1,  3,  4, 5,  5,  6,  7, 7, 6, 3, 4, 1,  2
113 };
114 #endif
115 
116 /* Args: xmm0-xmm3 */
117 static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
118 	4, 0, 1, 2, 3, 5, 6
119 };
120 /* low-map. freg_map & 0x7. */
121 static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
122 	4, 0, 1, 2, 3, 5, 6
123 };
124 
125 #define REX_W		0x48
126 #define REX_R		0x44
127 #define REX_X		0x42
128 #define REX_B		0x41
129 #define REX		0x40
130 
131 #ifndef _WIN64
132 #define HALFWORD_MAX 0x7fffffffl
133 #define HALFWORD_MIN -0x80000000l
134 #else
135 #define HALFWORD_MAX 0x7fffffffll
136 #define HALFWORD_MIN -0x80000000ll
137 #endif
138 
139 #define IS_HALFWORD(x)		((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)
140 #define NOT_HALFWORD(x)		((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)
141 
142 #define CHECK_EXTRA_REGS(p, w, do)
143 
144 #endif /* SLJIT_CONFIG_X86_32 */
145 
146 #define TMP_FREG	(0)
147 
148 /* Size flags for emit_x86_instruction: */
149 #define EX86_BIN_INS		0x0010
150 #define EX86_SHIFT_INS		0x0020
151 #define EX86_REX		0x0040
152 #define EX86_NO_REXW		0x0080
153 #define EX86_BYTE_ARG		0x0100
154 #define EX86_HALF_ARG		0x0200
155 #define EX86_PREF_66		0x0400
156 #define EX86_PREF_F2		0x0800
157 #define EX86_PREF_F3		0x1000
158 #define EX86_SSE2_OP1		0x2000
159 #define EX86_SSE2_OP2		0x4000
160 #define EX86_SSE2		(EX86_SSE2_OP1 | EX86_SSE2_OP2)
161 
162 /* --------------------------------------------------------------------- */
163 /*  Instrucion forms                                                     */
164 /* --------------------------------------------------------------------- */
165 
166 #define ADD		(/* BINARY */ 0 << 3)
167 #define ADD_EAX_i32	0x05
168 #define ADD_r_rm	0x03
169 #define ADD_rm_r	0x01
170 #define ADDSD_x_xm	0x58
171 #define ADC		(/* BINARY */ 2 << 3)
172 #define ADC_EAX_i32	0x15
173 #define ADC_r_rm	0x13
174 #define ADC_rm_r	0x11
175 #define AND		(/* BINARY */ 4 << 3)
176 #define AND_EAX_i32	0x25
177 #define AND_r_rm	0x23
178 #define AND_rm_r	0x21
179 #define ANDPD_x_xm	0x54
180 #define BSR_r_rm	(/* GROUP_0F */ 0xbd)
181 #define CALL_i32	0xe8
182 #define CALL_rm		(/* GROUP_FF */ 2 << 3)
183 #define CDQ		0x99
184 #define CMOVE_r_rm	(/* GROUP_0F */ 0x44)
185 #define CMP		(/* BINARY */ 7 << 3)
186 #define CMP_EAX_i32	0x3d
187 #define CMP_r_rm	0x3b
188 #define CMP_rm_r	0x39
189 #define CVTPD2PS_x_xm	0x5a
190 #define CVTSI2SD_x_rm	0x2a
191 #define CVTTSD2SI_r_xm	0x2c
192 #define DIV		(/* GROUP_F7 */ 6 << 3)
193 #define DIVSD_x_xm	0x5e
194 #define FSTPS		0xd9
195 #define FSTPD		0xdd
196 #define INT3		0xcc
197 #define IDIV		(/* GROUP_F7 */ 7 << 3)
198 #define IMUL		(/* GROUP_F7 */ 5 << 3)
199 #define IMUL_r_rm	(/* GROUP_0F */ 0xaf)
200 #define IMUL_r_rm_i8	0x6b
201 #define IMUL_r_rm_i32	0x69
202 #define JE_i8		0x74
203 #define JNE_i8		0x75
204 #define JMP_i8		0xeb
205 #define JMP_i32		0xe9
206 #define JMP_rm		(/* GROUP_FF */ 4 << 3)
207 #define LEA_r_m		0x8d
208 #define MOV_r_rm	0x8b
209 #define MOV_r_i32	0xb8
210 #define MOV_rm_r	0x89
211 #define MOV_rm_i32	0xc7
212 #define MOV_rm8_i8	0xc6
213 #define MOV_rm8_r8	0x88
214 #define MOVSD_x_xm	0x10
215 #define MOVSD_xm_x	0x11
216 #define MOVSXD_r_rm	0x63
217 #define MOVSX_r_rm8	(/* GROUP_0F */ 0xbe)
218 #define MOVSX_r_rm16	(/* GROUP_0F */ 0xbf)
219 #define MOVZX_r_rm8	(/* GROUP_0F */ 0xb6)
220 #define MOVZX_r_rm16	(/* GROUP_0F */ 0xb7)
221 #define MUL		(/* GROUP_F7 */ 4 << 3)
222 #define MULSD_x_xm	0x59
223 #define NEG_rm		(/* GROUP_F7 */ 3 << 3)
224 #define NOP		0x90
225 #define NOT_rm		(/* GROUP_F7 */ 2 << 3)
226 #define OR		(/* BINARY */ 1 << 3)
227 #define OR_r_rm		0x0b
228 #define OR_EAX_i32	0x0d
229 #define OR_rm_r		0x09
230 #define OR_rm8_r8	0x08
231 #define POP_r		0x58
232 #define POP_rm		0x8f
233 #define POPF		0x9d
234 #define PREFETCH	0x18
235 #define PUSH_i32	0x68
236 #define PUSH_r		0x50
237 #define PUSH_rm		(/* GROUP_FF */ 6 << 3)
238 #define PUSHF		0x9c
239 #define RET_near	0xc3
240 #define RET_i16		0xc2
241 #define SBB		(/* BINARY */ 3 << 3)
242 #define SBB_EAX_i32	0x1d
243 #define SBB_r_rm	0x1b
244 #define SBB_rm_r	0x19
245 #define SAR		(/* SHIFT */ 7 << 3)
246 #define SHL		(/* SHIFT */ 4 << 3)
247 #define SHR		(/* SHIFT */ 5 << 3)
248 #define SUB		(/* BINARY */ 5 << 3)
249 #define SUB_EAX_i32	0x2d
250 #define SUB_r_rm	0x2b
251 #define SUB_rm_r	0x29
252 #define SUBSD_x_xm	0x5c
253 #define TEST_EAX_i32	0xa9
254 #define TEST_rm_r	0x85
255 #define UCOMISD_x_xm	0x2e
256 #define UNPCKLPD_x_xm	0x14
257 #define XCHG_EAX_r	0x90
258 #define XCHG_r_rm	0x87
259 #define XOR		(/* BINARY */ 6 << 3)
260 #define XOR_EAX_i32	0x35
261 #define XOR_r_rm	0x33
262 #define XOR_rm_r	0x31
263 #define XORPD_x_xm	0x57
264 
265 #define GROUP_0F	0x0f
266 #define GROUP_F7	0xf7
267 #define GROUP_FF	0xff
268 #define GROUP_BINARY_81	0x81
269 #define GROUP_BINARY_83	0x83
270 #define GROUP_SHIFT_1	0xd1
271 #define GROUP_SHIFT_N	0xc1
272 #define GROUP_SHIFT_CL	0xd3
273 
274 #define MOD_REG		0xc0
275 #define MOD_DISP8	0x40
276 
277 #define INC_SIZE(s)			(*inst++ = (s), compiler->size += (s))
278 
279 #define PUSH_REG(r)			(*inst++ = (PUSH_r + (r)))
280 #define POP_REG(r)			(*inst++ = (POP_r + (r)))
281 #define RET()				(*inst++ = (RET_near))
282 #define RET_I16(n)			(*inst++ = (RET_i16), *inst++ = n, *inst++ = 0)
283 /* r32, r/m32 */
284 #define MOV_RM(mod, reg, rm)		(*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm))
285 
286 /* Multithreading does not affect these static variables, since they store
287    built-in CPU features. Therefore they can be overwritten by different threads
288    if they detect the CPU features in the same time. */
289 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
290 static sljit_s32 cpu_has_sse2 = -1;
291 #endif
292 static sljit_s32 cpu_has_cmov = -1;
293 
294 #ifdef _WIN32_WCE
295 #include <cmnintrin.h>
296 #elif defined(_MSC_VER) && _MSC_VER >= 1400
297 #include <intrin.h>
298 #endif
299 
300 /******************************************************/
301 /*    Unaligned-store functions                       */
302 /******************************************************/
303 
sljit_unaligned_store_s16(void * addr,sljit_s16 value)304 static SLJIT_INLINE void sljit_unaligned_store_s16(void *addr, sljit_s16 value)
305 {
306 	SLJIT_MEMCPY(addr, &value, sizeof(value));
307 }
308 
sljit_unaligned_store_s32(void * addr,sljit_s32 value)309 static SLJIT_INLINE void sljit_unaligned_store_s32(void *addr, sljit_s32 value)
310 {
311 	SLJIT_MEMCPY(addr, &value, sizeof(value));
312 }
313 
sljit_unaligned_store_sw(void * addr,sljit_sw value)314 static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value)
315 {
316 	SLJIT_MEMCPY(addr, &value, sizeof(value));
317 }
318 
319 /******************************************************/
320 /*    Utility functions                               */
321 /******************************************************/
322 
get_cpu_features(void)323 static void get_cpu_features(void)
324 {
325 	sljit_u32 features;
326 
327 #if defined(_MSC_VER) && _MSC_VER >= 1400
328 
329 	int CPUInfo[4];
330 	__cpuid(CPUInfo, 1);
331 	features = (sljit_u32)CPUInfo[3];
332 
333 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
334 
335 	/* AT&T syntax. */
336 	__asm__ (
337 		"movl $0x1, %%eax\n"
338 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
339 		/* On x86-32, there is no red zone, so this
340 		   should work (no need for a local variable). */
341 		"push %%ebx\n"
342 #endif
343 		"cpuid\n"
344 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
345 		"pop %%ebx\n"
346 #endif
347 		"movl %%edx, %0\n"
348 		: "=g" (features)
349 		:
350 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
351 		: "%eax", "%ecx", "%edx"
352 #else
353 		: "%rax", "%rbx", "%rcx", "%rdx"
354 #endif
355 	);
356 
357 #else /* _MSC_VER && _MSC_VER >= 1400 */
358 
359 	/* Intel syntax. */
360 	__asm {
361 		mov eax, 1
362 		cpuid
363 		mov features, edx
364 	}
365 
366 #endif /* _MSC_VER && _MSC_VER >= 1400 */
367 
368 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
369 	cpu_has_sse2 = (features >> 26) & 0x1;
370 #endif
371 	cpu_has_cmov = (features >> 15) & 0x1;
372 }
373 
get_jump_code(sljit_s32 type)374 static sljit_u8 get_jump_code(sljit_s32 type)
375 {
376 	switch (type) {
377 	case SLJIT_EQUAL:
378 	case SLJIT_EQUAL_F64:
379 		return 0x84 /* je */;
380 
381 	case SLJIT_NOT_EQUAL:
382 	case SLJIT_NOT_EQUAL_F64:
383 		return 0x85 /* jne */;
384 
385 	case SLJIT_LESS:
386 	case SLJIT_LESS_F64:
387 		return 0x82 /* jc */;
388 
389 	case SLJIT_GREATER_EQUAL:
390 	case SLJIT_GREATER_EQUAL_F64:
391 		return 0x83 /* jae */;
392 
393 	case SLJIT_GREATER:
394 	case SLJIT_GREATER_F64:
395 		return 0x87 /* jnbe */;
396 
397 	case SLJIT_LESS_EQUAL:
398 	case SLJIT_LESS_EQUAL_F64:
399 		return 0x86 /* jbe */;
400 
401 	case SLJIT_SIG_LESS:
402 		return 0x8c /* jl */;
403 
404 	case SLJIT_SIG_GREATER_EQUAL:
405 		return 0x8d /* jnl */;
406 
407 	case SLJIT_SIG_GREATER:
408 		return 0x8f /* jnle */;
409 
410 	case SLJIT_SIG_LESS_EQUAL:
411 		return 0x8e /* jle */;
412 
413 	case SLJIT_OVERFLOW:
414 	case SLJIT_MUL_OVERFLOW:
415 		return 0x80 /* jo */;
416 
417 	case SLJIT_NOT_OVERFLOW:
418 	case SLJIT_MUL_NOT_OVERFLOW:
419 		return 0x81 /* jno */;
420 
421 	case SLJIT_UNORDERED_F64:
422 		return 0x8a /* jp */;
423 
424 	case SLJIT_ORDERED_F64:
425 		return 0x8b /* jpo */;
426 	}
427 	return 0;
428 }
429 
430 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
431 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset);
432 #else
433 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr);
434 static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, sljit_u8 *code_ptr, sljit_uw max_label);
435 #endif
436 
generate_near_jump_code(struct sljit_jump * jump,sljit_u8 * code_ptr,sljit_u8 * code,sljit_sw executable_offset)437 static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset)
438 {
439 	sljit_s32 type = jump->flags >> TYPE_SHIFT;
440 	sljit_s32 short_jump;
441 	sljit_uw label_addr;
442 
443 	if (jump->flags & JUMP_LABEL)
444 		label_addr = (sljit_uw)(code + jump->u.label->size);
445 	else
446 		label_addr = jump->u.target - executable_offset;
447 
448 	short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
449 
450 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
451 	if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN)
452 		return generate_far_jump_code(jump, code_ptr);
453 #endif
454 
455 	if (type == SLJIT_JUMP) {
456 		if (short_jump)
457 			*code_ptr++ = JMP_i8;
458 		else
459 			*code_ptr++ = JMP_i32;
460 		jump->addr++;
461 	}
462 	else if (type >= SLJIT_FAST_CALL) {
463 		short_jump = 0;
464 		*code_ptr++ = CALL_i32;
465 		jump->addr++;
466 	}
467 	else if (short_jump) {
468 		*code_ptr++ = get_jump_code(type) - 0x10;
469 		jump->addr++;
470 	}
471 	else {
472 		*code_ptr++ = GROUP_0F;
473 		*code_ptr++ = get_jump_code(type);
474 		jump->addr += 2;
475 	}
476 
477 	if (short_jump) {
478 		jump->flags |= PATCH_MB;
479 		code_ptr += sizeof(sljit_s8);
480 	} else {
481 		jump->flags |= PATCH_MW;
482 		code_ptr += sizeof(sljit_s32);
483 	}
484 
485 	return code_ptr;
486 }
487 
sljit_generate_code(struct sljit_compiler * compiler)488 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
489 {
490 	struct sljit_memory_fragment *buf;
491 	sljit_u8 *code;
492 	sljit_u8 *code_ptr;
493 	sljit_u8 *buf_ptr;
494 	sljit_u8 *buf_end;
495 	sljit_u8 len;
496 	sljit_sw executable_offset;
497 	sljit_sw jump_addr;
498 
499 	struct sljit_label *label;
500 	struct sljit_jump *jump;
501 	struct sljit_const *const_;
502 	struct sljit_put_label *put_label;
503 
504 	CHECK_ERROR_PTR();
505 	CHECK_PTR(check_sljit_generate_code(compiler));
506 	reverse_buf(compiler);
507 
508 	/* Second code generation pass. */
509 	code = (sljit_u8*)SLJIT_MALLOC_EXEC(compiler->size);
510 	PTR_FAIL_WITH_EXEC_IF(code);
511 	buf = compiler->buf;
512 
513 	code_ptr = code;
514 	label = compiler->labels;
515 	jump = compiler->jumps;
516 	const_ = compiler->consts;
517 	put_label = compiler->put_labels;
518 	executable_offset = SLJIT_EXEC_OFFSET(code);
519 
520 	do {
521 		buf_ptr = buf->memory;
522 		buf_end = buf_ptr + buf->used_size;
523 		do {
524 			len = *buf_ptr++;
525 			if (len > 0) {
526 				/* The code is already generated. */
527 				SLJIT_MEMCPY(code_ptr, buf_ptr, len);
528 				code_ptr += len;
529 				buf_ptr += len;
530 			}
531 			else {
532 				switch (*buf_ptr) {
533 				case 0:
534 					label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
535 					label->size = code_ptr - code;
536 					label = label->next;
537 					break;
538 				case 1:
539 					jump->addr = (sljit_uw)code_ptr;
540 					if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
541 						code_ptr = generate_near_jump_code(jump, code_ptr, code, executable_offset);
542 					else {
543 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
544 						code_ptr = generate_far_jump_code(jump, code_ptr, executable_offset);
545 #else
546 						code_ptr = generate_far_jump_code(jump, code_ptr);
547 #endif
548 					}
549 					jump = jump->next;
550 					break;
551 				case 2:
552 					const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
553 					const_ = const_->next;
554 					break;
555 				default:
556 					SLJIT_ASSERT(*buf_ptr == 3);
557 					SLJIT_ASSERT(put_label->label);
558 					put_label->addr = (sljit_uw)code_ptr;
559 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
560 					code_ptr = generate_put_label_code(put_label, code_ptr, (sljit_uw)(SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size));
561 #endif
562 					put_label = put_label->next;
563 					break;
564 				}
565 				buf_ptr++;
566 			}
567 		} while (buf_ptr < buf_end);
568 		SLJIT_ASSERT(buf_ptr == buf_end);
569 		buf = buf->next;
570 	} while (buf);
571 
572 	SLJIT_ASSERT(!label);
573 	SLJIT_ASSERT(!jump);
574 	SLJIT_ASSERT(!const_);
575 	SLJIT_ASSERT(!put_label);
576 	SLJIT_ASSERT(code_ptr <= code + compiler->size);
577 
578 	jump = compiler->jumps;
579 	while (jump) {
580 		jump_addr = jump->addr + executable_offset;
581 
582 		if (jump->flags & PATCH_MB) {
583 			SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) <= 127);
584 			*(sljit_u8*)jump->addr = (sljit_u8)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8)));
585 		} else if (jump->flags & PATCH_MW) {
586 			if (jump->flags & JUMP_LABEL) {
587 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
588 				sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_sw))));
589 #else
590 				SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
591 				sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))));
592 #endif
593 			}
594 			else {
595 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
596 				sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_sw))));
597 #else
598 				SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
599 				sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.target - (jump_addr + sizeof(sljit_s32))));
600 #endif
601 			}
602 		}
603 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
604 		else if (jump->flags & PATCH_MD)
605 			sljit_unaligned_store_sw((void*)jump->addr, jump->u.label->addr);
606 #endif
607 
608 		jump = jump->next;
609 	}
610 
611 	put_label = compiler->put_labels;
612 	while (put_label) {
613 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
614 		sljit_unaligned_store_sw((void*)(put_label->addr - sizeof(sljit_sw)), (sljit_sw)put_label->label->addr);
615 #else
616 		if (put_label->flags & PATCH_MD) {
617 			SLJIT_ASSERT(put_label->label->addr > HALFWORD_MAX);
618 			sljit_unaligned_store_sw((void*)(put_label->addr - sizeof(sljit_sw)), (sljit_sw)put_label->label->addr);
619 		}
620 		else {
621 			SLJIT_ASSERT(put_label->label->addr <= HALFWORD_MAX);
622 			sljit_unaligned_store_s32((void*)(put_label->addr - sizeof(sljit_s32)), (sljit_s32)put_label->label->addr);
623 		}
624 #endif
625 
626 		put_label = put_label->next;
627 	}
628 
629 	compiler->error = SLJIT_ERR_COMPILED;
630 	compiler->executable_offset = executable_offset;
631 	compiler->executable_size = code_ptr - code;
632 	return (void*)(code + executable_offset);
633 }
634 
sljit_has_cpu_feature(sljit_s32 feature_type)635 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
636 {
637 	switch (feature_type) {
638 	case SLJIT_HAS_FPU:
639 #ifdef SLJIT_IS_FPU_AVAILABLE
640 		return SLJIT_IS_FPU_AVAILABLE;
641 #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
642 		if (cpu_has_sse2 == -1)
643 			get_cpu_features();
644 		return cpu_has_sse2;
645 #else /* SLJIT_DETECT_SSE2 */
646 		return 1;
647 #endif /* SLJIT_DETECT_SSE2 */
648 
649 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
650 	case SLJIT_HAS_VIRTUAL_REGISTERS:
651 		return 1;
652 #endif
653 
654 	case SLJIT_HAS_CLZ:
655 	case SLJIT_HAS_CMOV:
656 		if (cpu_has_cmov == -1)
657 			get_cpu_features();
658 		return cpu_has_cmov;
659 
660 	case SLJIT_HAS_PREFETCH:
661 		return 1;
662 
663 	case SLJIT_HAS_SSE2:
664 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
665 		if (cpu_has_sse2 == -1)
666 			get_cpu_features();
667 		return cpu_has_sse2;
668 #else
669 		return 1;
670 #endif
671 
672 	default:
673 		return 0;
674 	}
675 }
676 
677 /* --------------------------------------------------------------------- */
678 /*  Operators                                                            */
679 /* --------------------------------------------------------------------- */
680 
681 #define BINARY_OPCODE(opcode) (((opcode ## _EAX_i32) << 24) | ((opcode ## _r_rm) << 16) | ((opcode ## _rm_r) << 8) | (opcode))
682 
683 static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
684 	sljit_u32 op_types,
685 	sljit_s32 dst, sljit_sw dstw,
686 	sljit_s32 src1, sljit_sw src1w,
687 	sljit_s32 src2, sljit_sw src2w);
688 
689 static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
690 	sljit_u32 op_types,
691 	sljit_s32 dst, sljit_sw dstw,
692 	sljit_s32 src1, sljit_sw src1w,
693 	sljit_s32 src2, sljit_sw src2w);
694 
695 static sljit_s32 emit_mov(struct sljit_compiler *compiler,
696 	sljit_s32 dst, sljit_sw dstw,
697 	sljit_s32 src, sljit_sw srcw);
698 
699 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
700 	FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
701 
702 static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
703 	sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src);
704 
705 static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
706 	sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
707 
708 static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,
709 	sljit_s32 src1, sljit_sw src1w,
710 	sljit_s32 src2, sljit_sw src2w);
711 
emit_endbranch(struct sljit_compiler * compiler)712 static SLJIT_INLINE sljit_s32 emit_endbranch(struct sljit_compiler *compiler)
713 {
714 #if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET)
715 	/* Emit endbr32/endbr64 when CET is enabled.  */
716 	sljit_u8 *inst;
717 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
718 	FAIL_IF(!inst);
719 	INC_SIZE(4);
720 	*inst++ = 0xf3;
721 	*inst++ = 0x0f;
722 	*inst++ = 0x1e;
723 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
724 	*inst = 0xfb;
725 #else
726 	*inst = 0xfa;
727 #endif
728 #else
729 	SLJIT_UNUSED_ARG(compiler);
730 #endif
731 	return SLJIT_SUCCESS;
732 }
733 
emit_rdssp(struct sljit_compiler * compiler,sljit_s32 reg)734 static SLJIT_INLINE sljit_s32 emit_rdssp(struct sljit_compiler *compiler, sljit_s32 reg)
735 {
736 #if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET)
737 	sljit_u8 *inst;
738 	sljit_s32 size;
739 
740 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
741 	size = 5;
742 #else
743 	size = 4;
744 #endif
745 
746 	inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
747 	FAIL_IF(!inst);
748 	INC_SIZE(size);
749 	*inst++ = 0xf3;
750 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
751 	*inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : REX_B);
752 #endif
753 	*inst++ = 0x0f;
754 	*inst++ = 0x1e;
755 	*inst = (0x3 << 6) | (0x1 << 3) | (reg_map[reg] & 0x7);
756 #else
757 	SLJIT_UNUSED_ARG(compiler);
758 	SLJIT_UNUSED_ARG(reg);
759 #endif
760 	return SLJIT_SUCCESS;
761 }
762 
emit_incssp(struct sljit_compiler * compiler,sljit_s32 reg)763 static SLJIT_INLINE sljit_s32 emit_incssp(struct sljit_compiler *compiler, sljit_s32 reg)
764 {
765 #if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET)
766 	sljit_u8 *inst;
767 	sljit_s32 size;
768 
769 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
770 	size = 5;
771 #else
772 	size = 4;
773 #endif
774 
775 	inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
776 	FAIL_IF(!inst);
777 	INC_SIZE(size);
778 	*inst++ = 0xf3;
779 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
780 	*inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : REX_B);
781 #endif
782 	*inst++ = 0x0f;
783 	*inst++ = 0xae;
784 	*inst = (0x3 << 6) | (0x5 << 3) | (reg_map[reg] & 0x7);
785 #else
786 	SLJIT_UNUSED_ARG(compiler);
787 	SLJIT_UNUSED_ARG(reg);
788 #endif
789 	return SLJIT_SUCCESS;
790 }
791 
cpu_has_shadow_stack(void)792 static SLJIT_INLINE sljit_s32 cpu_has_shadow_stack(void)
793 {
794 #if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET)
795 	return _get_ssp() != 0;
796 #else
797 	return 0;
798 #endif
799 }
800 
adjust_shadow_stack(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw,sljit_s32 base,sljit_sw disp)801 static SLJIT_INLINE sljit_s32 adjust_shadow_stack(struct sljit_compiler *compiler,
802 	sljit_s32 src, sljit_sw srcw, sljit_s32 base, sljit_sw disp)
803 {
804 #if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET)
805 	sljit_u8 *inst;
806 
807 	sljit_s32 size_before_rdssp_inst = compiler->size;
808 
809 	/* Generate "RDSSP TMP_REG1". */
810 	FAIL_IF(emit_rdssp(compiler, TMP_REG1));
811 
812 	/* Load return address on shadow stack into TMP_REG1. */
813 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
814 	SLJIT_ASSERT(reg_map[TMP_REG1] == 5);
815 
816 	/* Hand code unsupported "mov 0x0(%ebp),%ebp". */
817 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
818 	FAIL_IF(!inst);
819 	INC_SIZE(3);
820 	*inst++ = 0x8b;
821 	*inst++ = 0x6d;
822 	*inst = 0;
823 #else /* !SLJIT_CONFIG_X86_32 */
824 	EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(TMP_REG1), 0);
825 #endif /* SLJIT_CONFIG_X86_32 */
826 
827 	if (src == SLJIT_UNUSED) {
828 		/* Return address is on stack.  */
829 		src = SLJIT_MEM1(base);
830 		srcw = disp;
831 	}
832 
833 	/* Compare return address against TMP_REG1. */
834 	FAIL_IF(emit_cmp_binary (compiler, TMP_REG1, 0, src, srcw));
835 
836 	/* Generate JZ to skip shadow stack ajdustment when shadow
837 	   stack matches normal stack. */
838 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
839 	FAIL_IF(!inst);
840 	INC_SIZE(2);
841 	*inst++ = get_jump_code(SLJIT_EQUAL) - 0x10;
842 	sljit_uw size_jz_after_cmp_inst = compiler->size;
843 	sljit_u8 *jz_after_cmp_inst = inst;
844 
845 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
846 	/* REX_W is not necessary. */
847 	compiler->mode32 = 1;
848 #endif
849 	/* Load 1 into TMP_REG1. */
850 	EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
851 
852 	/* Generate "INCSSP TMP_REG1". */
853 	FAIL_IF(emit_incssp(compiler, TMP_REG1));
854 
855 	/* Jump back to "RDSSP TMP_REG1" to check shadow stack again. */
856 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
857 	FAIL_IF(!inst);
858 	INC_SIZE(2);
859 	*inst++ = JMP_i8;
860 	*inst = size_before_rdssp_inst - compiler->size;
861 
862 	*jz_after_cmp_inst = compiler->size - size_jz_after_cmp_inst;
863 #else /* SLJIT_CONFIG_X86_CET */
864 	SLJIT_UNUSED_ARG(compiler);
865 	SLJIT_UNUSED_ARG(src);
866 	SLJIT_UNUSED_ARG(srcw);
867 	SLJIT_UNUSED_ARG(base);
868 	SLJIT_UNUSED_ARG(disp);
869 #endif /* SLJIT_CONFIG_X86_CET */
870 	return SLJIT_SUCCESS;
871 }
872 
873 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
874 #include "sljitNativeX86_32.c"
875 #else
876 #include "sljitNativeX86_64.c"
877 #endif
878 
emit_mov(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)879 static sljit_s32 emit_mov(struct sljit_compiler *compiler,
880 	sljit_s32 dst, sljit_sw dstw,
881 	sljit_s32 src, sljit_sw srcw)
882 {
883 	sljit_u8* inst;
884 
885 	SLJIT_ASSERT(dst != SLJIT_UNUSED);
886 
887 	if (FAST_IS_REG(src)) {
888 		inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
889 		FAIL_IF(!inst);
890 		*inst = MOV_rm_r;
891 		return SLJIT_SUCCESS;
892 	}
893 	if (src & SLJIT_IMM) {
894 		if (FAST_IS_REG(dst)) {
895 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
896 			return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
897 #else
898 			if (!compiler->mode32) {
899 				if (NOT_HALFWORD(srcw))
900 					return emit_load_imm64(compiler, dst, srcw);
901 			}
902 			else
903 				return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw);
904 #endif
905 		}
906 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
907 		if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
908 			/* Immediate to memory move. Only SLJIT_MOV operation copies
909 			   an immediate directly into memory so TMP_REG1 can be used. */
910 			FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));
911 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
912 			FAIL_IF(!inst);
913 			*inst = MOV_rm_r;
914 			return SLJIT_SUCCESS;
915 		}
916 #endif
917 		inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
918 		FAIL_IF(!inst);
919 		*inst = MOV_rm_i32;
920 		return SLJIT_SUCCESS;
921 	}
922 	if (FAST_IS_REG(dst)) {
923 		inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
924 		FAIL_IF(!inst);
925 		*inst = MOV_r_rm;
926 		return SLJIT_SUCCESS;
927 	}
928 
929 	/* Memory to memory move. Only SLJIT_MOV operation copies
930 	   data from memory to memory so TMP_REG1 can be used. */
931 	inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
932 	FAIL_IF(!inst);
933 	*inst = MOV_r_rm;
934 	inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
935 	FAIL_IF(!inst);
936 	*inst = MOV_rm_r;
937 	return SLJIT_SUCCESS;
938 }
939 
sljit_emit_op0(struct sljit_compiler * compiler,sljit_s32 op)940 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
941 {
942 	sljit_u8 *inst;
943 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
944 	sljit_s32 size;
945 #endif
946 
947 	CHECK_ERROR();
948 	CHECK(check_sljit_emit_op0(compiler, op));
949 
950 	switch (GET_OPCODE(op)) {
951 	case SLJIT_BREAKPOINT:
952 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
953 		FAIL_IF(!inst);
954 		INC_SIZE(1);
955 		*inst = INT3;
956 		break;
957 	case SLJIT_NOP:
958 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
959 		FAIL_IF(!inst);
960 		INC_SIZE(1);
961 		*inst = NOP;
962 		break;
963 	case SLJIT_LMUL_UW:
964 	case SLJIT_LMUL_SW:
965 	case SLJIT_DIVMOD_UW:
966 	case SLJIT_DIVMOD_SW:
967 	case SLJIT_DIV_UW:
968 	case SLJIT_DIV_SW:
969 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
970 #ifdef _WIN64
971 		SLJIT_ASSERT(
972 			reg_map[SLJIT_R0] == 0
973 			&& reg_map[SLJIT_R1] == 2
974 			&& reg_map[TMP_REG1] > 7);
975 #else
976 		SLJIT_ASSERT(
977 			reg_map[SLJIT_R0] == 0
978 			&& reg_map[SLJIT_R1] < 7
979 			&& reg_map[TMP_REG1] == 2);
980 #endif
981 		compiler->mode32 = op & SLJIT_I32_OP;
982 #endif
983 		SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
984 
985 		op = GET_OPCODE(op);
986 		if ((op | 0x2) == SLJIT_DIV_UW) {
987 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
988 			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
989 			inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0);
990 #else
991 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
992 #endif
993 			FAIL_IF(!inst);
994 			*inst = XOR_r_rm;
995 		}
996 
997 		if ((op | 0x2) == SLJIT_DIV_SW) {
998 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
999 			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
1000 #endif
1001 
1002 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1003 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1004 			FAIL_IF(!inst);
1005 			INC_SIZE(1);
1006 			*inst = CDQ;
1007 #else
1008 			if (compiler->mode32) {
1009 				inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1010 				FAIL_IF(!inst);
1011 				INC_SIZE(1);
1012 				*inst = CDQ;
1013 			} else {
1014 				inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1015 				FAIL_IF(!inst);
1016 				INC_SIZE(2);
1017 				*inst++ = REX_W;
1018 				*inst = CDQ;
1019 			}
1020 #endif
1021 		}
1022 
1023 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1024 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1025 		FAIL_IF(!inst);
1026 		INC_SIZE(2);
1027 		*inst++ = GROUP_F7;
1028 		*inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);
1029 #else
1030 #ifdef _WIN64
1031 		size = (!compiler->mode32 || op >= SLJIT_DIVMOD_UW) ? 3 : 2;
1032 #else
1033 		size = (!compiler->mode32) ? 3 : 2;
1034 #endif
1035 		inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
1036 		FAIL_IF(!inst);
1037 		INC_SIZE(size);
1038 #ifdef _WIN64
1039 		if (!compiler->mode32)
1040 			*inst++ = REX_W | ((op >= SLJIT_DIVMOD_UW) ? REX_B : 0);
1041 		else if (op >= SLJIT_DIVMOD_UW)
1042 			*inst++ = REX_B;
1043 		*inst++ = GROUP_F7;
1044 		*inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);
1045 #else
1046 		if (!compiler->mode32)
1047 			*inst++ = REX_W;
1048 		*inst++ = GROUP_F7;
1049 		*inst = MOD_REG | reg_map[SLJIT_R1];
1050 #endif
1051 #endif
1052 		switch (op) {
1053 		case SLJIT_LMUL_UW:
1054 			*inst |= MUL;
1055 			break;
1056 		case SLJIT_LMUL_SW:
1057 			*inst |= IMUL;
1058 			break;
1059 		case SLJIT_DIVMOD_UW:
1060 		case SLJIT_DIV_UW:
1061 			*inst |= DIV;
1062 			break;
1063 		case SLJIT_DIVMOD_SW:
1064 		case SLJIT_DIV_SW:
1065 			*inst |= IDIV;
1066 			break;
1067 		}
1068 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
1069 		if (op <= SLJIT_DIVMOD_SW)
1070 			EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
1071 #else
1072 		if (op >= SLJIT_DIV_UW)
1073 			EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
1074 #endif
1075 		break;
1076 	case SLJIT_ENDBR:
1077 		return emit_endbranch(compiler);
1078 	case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
1079 		return skip_frames_before_return(compiler);
1080 	}
1081 
1082 	return SLJIT_SUCCESS;
1083 }
1084 
1085 #define ENCODE_PREFIX(prefix) \
1086 	do { \
1087 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); \
1088 		FAIL_IF(!inst); \
1089 		INC_SIZE(1); \
1090 		*inst = (prefix); \
1091 	} while (0)
1092 
emit_mov_byte(struct sljit_compiler * compiler,sljit_s32 sign,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1093 static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign,
1094 	sljit_s32 dst, sljit_sw dstw,
1095 	sljit_s32 src, sljit_sw srcw)
1096 {
1097 	sljit_u8* inst;
1098 	sljit_s32 dst_r;
1099 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1100 	sljit_s32 work_r;
1101 #endif
1102 
1103 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1104 	compiler->mode32 = 0;
1105 #endif
1106 
1107 	if (src & SLJIT_IMM) {
1108 		if (FAST_IS_REG(dst)) {
1109 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1110 			return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
1111 #else
1112 			inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
1113 			FAIL_IF(!inst);
1114 			*inst = MOV_rm_i32;
1115 			return SLJIT_SUCCESS;
1116 #endif
1117 		}
1118 		inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
1119 		FAIL_IF(!inst);
1120 		*inst = MOV_rm8_i8;
1121 		return SLJIT_SUCCESS;
1122 	}
1123 
1124 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1125 
1126 	if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) {
1127 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1128 		if (reg_map[src] >= 4) {
1129 			SLJIT_ASSERT(dst_r == TMP_REG1);
1130 			EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
1131 		} else
1132 			dst_r = src;
1133 #else
1134 		dst_r = src;
1135 #endif
1136 	}
1137 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1138 	else if (FAST_IS_REG(src) && reg_map[src] >= 4) {
1139 		/* src, dst are registers. */
1140 		SLJIT_ASSERT(SLOW_IS_REG(dst));
1141 		if (reg_map[dst] < 4) {
1142 			if (dst != src)
1143 				EMIT_MOV(compiler, dst, 0, src, 0);
1144 			inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
1145 			FAIL_IF(!inst);
1146 			*inst++ = GROUP_0F;
1147 			*inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
1148 		}
1149 		else {
1150 			if (dst != src)
1151 				EMIT_MOV(compiler, dst, 0, src, 0);
1152 			if (sign) {
1153 				/* shl reg, 24 */
1154 				inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
1155 				FAIL_IF(!inst);
1156 				*inst |= SHL;
1157 				/* sar reg, 24 */
1158 				inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
1159 				FAIL_IF(!inst);
1160 				*inst |= SAR;
1161 			}
1162 			else {
1163 				inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
1164 				FAIL_IF(!inst);
1165 				*(inst + 1) |= AND;
1166 			}
1167 		}
1168 		return SLJIT_SUCCESS;
1169 	}
1170 #endif
1171 	else {
1172 		/* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
1173 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
1174 		FAIL_IF(!inst);
1175 		*inst++ = GROUP_0F;
1176 		*inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
1177 	}
1178 
1179 	if (dst & SLJIT_MEM) {
1180 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1181 		if (dst_r == TMP_REG1) {
1182 			/* Find a non-used register, whose reg_map[src] < 4. */
1183 			if ((dst & REG_MASK) == SLJIT_R0) {
1184 				if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1))
1185 					work_r = SLJIT_R2;
1186 				else
1187 					work_r = SLJIT_R1;
1188 			}
1189 			else {
1190 				if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
1191 					work_r = SLJIT_R0;
1192 				else if ((dst & REG_MASK) == SLJIT_R1)
1193 					work_r = SLJIT_R2;
1194 				else
1195 					work_r = SLJIT_R1;
1196 			}
1197 
1198 			if (work_r == SLJIT_R0) {
1199 				ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
1200 			}
1201 			else {
1202 				inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
1203 				FAIL_IF(!inst);
1204 				*inst = XCHG_r_rm;
1205 			}
1206 
1207 			inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
1208 			FAIL_IF(!inst);
1209 			*inst = MOV_rm8_r8;
1210 
1211 			if (work_r == SLJIT_R0) {
1212 				ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
1213 			}
1214 			else {
1215 				inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
1216 				FAIL_IF(!inst);
1217 				*inst = XCHG_r_rm;
1218 			}
1219 		}
1220 		else {
1221 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1222 			FAIL_IF(!inst);
1223 			*inst = MOV_rm8_r8;
1224 		}
1225 #else
1226 		inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
1227 		FAIL_IF(!inst);
1228 		*inst = MOV_rm8_r8;
1229 #endif
1230 	}
1231 
1232 	return SLJIT_SUCCESS;
1233 }
1234 
emit_prefetch(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)1235 static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, sljit_s32 op,
1236 	sljit_s32 src, sljit_sw srcw)
1237 {
1238 	sljit_u8* inst;
1239 
1240 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1241 	compiler->mode32 = 1;
1242 #endif
1243 
1244 	inst = emit_x86_instruction(compiler, 2, 0, 0, src, srcw);
1245 	FAIL_IF(!inst);
1246 	*inst++ = GROUP_0F;
1247 	*inst++ = PREFETCH;
1248 
1249 	if (op == SLJIT_PREFETCH_L1)
1250 		*inst |= (1 << 3);
1251 	else if (op == SLJIT_PREFETCH_L2)
1252 		*inst |= (2 << 3);
1253 	else if (op == SLJIT_PREFETCH_L3)
1254 		*inst |= (3 << 3);
1255 
1256 	return SLJIT_SUCCESS;
1257 }
1258 
emit_mov_half(struct sljit_compiler * compiler,sljit_s32 sign,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1259 static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign,
1260 	sljit_s32 dst, sljit_sw dstw,
1261 	sljit_s32 src, sljit_sw srcw)
1262 {
1263 	sljit_u8* inst;
1264 	sljit_s32 dst_r;
1265 
1266 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1267 	compiler->mode32 = 0;
1268 #endif
1269 
1270 	if (src & SLJIT_IMM) {
1271 		if (FAST_IS_REG(dst)) {
1272 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1273 			return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
1274 #else
1275 			inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
1276 			FAIL_IF(!inst);
1277 			*inst = MOV_rm_i32;
1278 			return SLJIT_SUCCESS;
1279 #endif
1280 		}
1281 		inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
1282 		FAIL_IF(!inst);
1283 		*inst = MOV_rm_i32;
1284 		return SLJIT_SUCCESS;
1285 	}
1286 
1287 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1288 
1289 	if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
1290 		dst_r = src;
1291 	else {
1292 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
1293 		FAIL_IF(!inst);
1294 		*inst++ = GROUP_0F;
1295 		*inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16;
1296 	}
1297 
1298 	if (dst & SLJIT_MEM) {
1299 		inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
1300 		FAIL_IF(!inst);
1301 		*inst = MOV_rm_r;
1302 	}
1303 
1304 	return SLJIT_SUCCESS;
1305 }
1306 
emit_unary(struct sljit_compiler * compiler,sljit_u8 opcode,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1307 static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode,
1308 	sljit_s32 dst, sljit_sw dstw,
1309 	sljit_s32 src, sljit_sw srcw)
1310 {
1311 	sljit_u8* inst;
1312 
1313 	if (dst == src && dstw == srcw) {
1314 		/* Same input and output */
1315 		inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1316 		FAIL_IF(!inst);
1317 		*inst++ = GROUP_F7;
1318 		*inst |= opcode;
1319 		return SLJIT_SUCCESS;
1320 	}
1321 
1322 	if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED))
1323 		dst = TMP_REG1;
1324 
1325 	if (FAST_IS_REG(dst)) {
1326 		EMIT_MOV(compiler, dst, 0, src, srcw);
1327 		inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0);
1328 		FAIL_IF(!inst);
1329 		*inst++ = GROUP_F7;
1330 		*inst |= opcode;
1331 		return SLJIT_SUCCESS;
1332 	}
1333 
1334 	EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1335 	inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1336 	FAIL_IF(!inst);
1337 	*inst++ = GROUP_F7;
1338 	*inst |= opcode;
1339 	EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1340 	return SLJIT_SUCCESS;
1341 }
1342 
emit_not_with_flags(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1343 static sljit_s32 emit_not_with_flags(struct sljit_compiler *compiler,
1344 	sljit_s32 dst, sljit_sw dstw,
1345 	sljit_s32 src, sljit_sw srcw)
1346 {
1347 	sljit_u8* inst;
1348 
1349 	if (dst == SLJIT_UNUSED)
1350 		dst = TMP_REG1;
1351 
1352 	if (FAST_IS_REG(dst)) {
1353 		EMIT_MOV(compiler, dst, 0, src, srcw);
1354 		inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0);
1355 		FAIL_IF(!inst);
1356 		*inst++ = GROUP_F7;
1357 		*inst |= NOT_rm;
1358 		inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
1359 		FAIL_IF(!inst);
1360 		*inst = OR_r_rm;
1361 		return SLJIT_SUCCESS;
1362 	}
1363 
1364 	EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1365 	inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1366 	FAIL_IF(!inst);
1367 	*inst++ = GROUP_F7;
1368 	*inst |= NOT_rm;
1369 	inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1370 	FAIL_IF(!inst);
1371 	*inst = OR_r_rm;
1372 	EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1373 	return SLJIT_SUCCESS;
1374 }
1375 
1376 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1377 static const sljit_sw emit_clz_arg = 32 + 31;
1378 #endif
1379 
emit_clz(struct sljit_compiler * compiler,sljit_s32 op_flags,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1380 static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags,
1381 	sljit_s32 dst, sljit_sw dstw,
1382 	sljit_s32 src, sljit_sw srcw)
1383 {
1384 	sljit_u8* inst;
1385 	sljit_s32 dst_r;
1386 
1387 	SLJIT_UNUSED_ARG(op_flags);
1388 
1389 	if (cpu_has_cmov == -1)
1390 		get_cpu_features();
1391 
1392 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1393 
1394 	inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
1395 	FAIL_IF(!inst);
1396 	*inst++ = GROUP_0F;
1397 	*inst = BSR_r_rm;
1398 
1399 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1400 	if (cpu_has_cmov) {
1401 		if (dst_r != TMP_REG1) {
1402 			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 32 + 31);
1403 			inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
1404 		}
1405 		else
1406 			inst = emit_x86_instruction(compiler, 2, dst_r, 0, SLJIT_MEM0(), (sljit_sw)&emit_clz_arg);
1407 
1408 		FAIL_IF(!inst);
1409 		*inst++ = GROUP_0F;
1410 		*inst = CMOVE_r_rm;
1411 	}
1412 	else
1413 		FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, 32 + 31));
1414 
1415 	inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
1416 #else
1417 	if (cpu_has_cmov) {
1418 		EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? (64 + 63) : (32 + 31));
1419 
1420 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1421 		FAIL_IF(!inst);
1422 		*inst++ = GROUP_0F;
1423 		*inst = CMOVE_r_rm;
1424 	}
1425 	else
1426 		FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? (64 + 63) : (32 + 31)));
1427 
1428 	inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, dst_r, 0);
1429 #endif
1430 
1431 	FAIL_IF(!inst);
1432 	*(inst + 1) |= XOR;
1433 
1434 	if (dst & SLJIT_MEM)
1435 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1436 	return SLJIT_SUCCESS;
1437 }
1438 
sljit_emit_op1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1439 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1440 	sljit_s32 dst, sljit_sw dstw,
1441 	sljit_s32 src, sljit_sw srcw)
1442 {
1443 	sljit_s32 op_flags = GET_ALL_FLAGS(op);
1444 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1445 	sljit_s32 dst_is_ereg = 0;
1446 #endif
1447 
1448 	CHECK_ERROR();
1449 	CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1450 	ADJUST_LOCAL_OFFSET(dst, dstw);
1451 	ADJUST_LOCAL_OFFSET(src, srcw);
1452 
1453 	CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
1454 	CHECK_EXTRA_REGS(src, srcw, (void)0);
1455 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1456 	compiler->mode32 = op_flags & SLJIT_I32_OP;
1457 #endif
1458 
1459 	op = GET_OPCODE(op);
1460 
1461 	if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) {
1462 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1463 		compiler->mode32 = 0;
1464 #endif
1465 
1466 		if (FAST_IS_REG(src) && src == dst) {
1467 			if (!TYPE_CAST_NEEDED(op))
1468 				return SLJIT_SUCCESS;
1469 		}
1470 
1471 		if (op_flags & SLJIT_I32_OP) {
1472 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1473 			if (src & SLJIT_MEM) {
1474 				if (op == SLJIT_MOV_S32)
1475 					op = SLJIT_MOV_U32;
1476 			}
1477 			else if (src & SLJIT_IMM) {
1478 				if (op == SLJIT_MOV_U32)
1479 					op = SLJIT_MOV_S32;
1480 			}
1481 #endif
1482 		}
1483 
1484 		if (src & SLJIT_IMM) {
1485 			switch (op) {
1486 			case SLJIT_MOV_U8:
1487 				srcw = (sljit_u8)srcw;
1488 				break;
1489 			case SLJIT_MOV_S8:
1490 				srcw = (sljit_s8)srcw;
1491 				break;
1492 			case SLJIT_MOV_U16:
1493 				srcw = (sljit_u16)srcw;
1494 				break;
1495 			case SLJIT_MOV_S16:
1496 				srcw = (sljit_s16)srcw;
1497 				break;
1498 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1499 			case SLJIT_MOV_U32:
1500 				srcw = (sljit_u32)srcw;
1501 				break;
1502 			case SLJIT_MOV_S32:
1503 				srcw = (sljit_s32)srcw;
1504 				break;
1505 #endif
1506 			}
1507 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1508 			if (SLJIT_UNLIKELY(dst_is_ereg))
1509 				return emit_mov(compiler, dst, dstw, src, srcw);
1510 #endif
1511 		}
1512 
1513 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1514 		if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
1515 			SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));
1516 			dst = TMP_REG1;
1517 		}
1518 #endif
1519 
1520 		switch (op) {
1521 		case SLJIT_MOV:
1522 		case SLJIT_MOV_P:
1523 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1524 		case SLJIT_MOV_U32:
1525 		case SLJIT_MOV_S32:
1526 #endif
1527 			FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
1528 			break;
1529 		case SLJIT_MOV_U8:
1530 			FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));
1531 			break;
1532 		case SLJIT_MOV_S8:
1533 			FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));
1534 			break;
1535 		case SLJIT_MOV_U16:
1536 			FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));
1537 			break;
1538 		case SLJIT_MOV_S16:
1539 			FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));
1540 			break;
1541 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1542 		case SLJIT_MOV_U32:
1543 			FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));
1544 			break;
1545 		case SLJIT_MOV_S32:
1546 			FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));
1547 			break;
1548 #endif
1549 		}
1550 
1551 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1552 		if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
1553 			return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);
1554 #endif
1555 		return SLJIT_SUCCESS;
1556 	}
1557 
1558 	switch (op) {
1559 	case SLJIT_NOT:
1560 		if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_Z))
1561 			return emit_not_with_flags(compiler, dst, dstw, src, srcw);
1562 		return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
1563 
1564 	case SLJIT_NEG:
1565 		return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw);
1566 
1567 	case SLJIT_CLZ:
1568 		return emit_clz(compiler, op_flags, dst, dstw, src, srcw);
1569 	}
1570 
1571 	return SLJIT_SUCCESS;
1572 }
1573 
1574 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1575 
1576 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1577 	if (IS_HALFWORD(immw) || compiler->mode32) { \
1578 		inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1579 		FAIL_IF(!inst); \
1580 		*(inst + 1) |= (op_imm); \
1581 	} \
1582 	else { \
1583 		FAIL_IF(emit_load_imm64(compiler, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, immw)); \
1584 		inst = emit_x86_instruction(compiler, 1, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \
1585 		FAIL_IF(!inst); \
1586 		*inst = (op_mr); \
1587 	}
1588 
1589 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1590 	FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
1591 
1592 #else
1593 
1594 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1595 	inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1596 	FAIL_IF(!inst); \
1597 	*(inst + 1) |= (op_imm);
1598 
1599 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1600 	FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
1601 
1602 #endif
1603 
emit_cum_binary(struct sljit_compiler * compiler,sljit_u32 op_types,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1604 static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
1605 	sljit_u32 op_types,
1606 	sljit_s32 dst, sljit_sw dstw,
1607 	sljit_s32 src1, sljit_sw src1w,
1608 	sljit_s32 src2, sljit_sw src2w)
1609 {
1610 	sljit_u8* inst;
1611 	sljit_u8 op_eax_imm = (op_types >> 24);
1612 	sljit_u8 op_rm = (op_types >> 16) & 0xff;
1613 	sljit_u8 op_mr = (op_types >> 8) & 0xff;
1614 	sljit_u8 op_imm = op_types & 0xff;
1615 
1616 	if (dst == SLJIT_UNUSED) {
1617 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1618 		if (src2 & SLJIT_IMM) {
1619 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1620 		}
1621 		else {
1622 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1623 			FAIL_IF(!inst);
1624 			*inst = op_rm;
1625 		}
1626 		return SLJIT_SUCCESS;
1627 	}
1628 
1629 	if (dst == src1 && dstw == src1w) {
1630 		if (src2 & SLJIT_IMM) {
1631 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1632 			if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1633 #else
1634 			if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1635 #endif
1636 				BINARY_EAX_IMM(op_eax_imm, src2w);
1637 			}
1638 			else {
1639 				BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1640 			}
1641 		}
1642 		else if (FAST_IS_REG(dst)) {
1643 			inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1644 			FAIL_IF(!inst);
1645 			*inst = op_rm;
1646 		}
1647 		else if (FAST_IS_REG(src2)) {
1648 			/* Special exception for sljit_emit_op_flags. */
1649 			inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1650 			FAIL_IF(!inst);
1651 			*inst = op_mr;
1652 		}
1653 		else {
1654 			EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1655 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1656 			FAIL_IF(!inst);
1657 			*inst = op_mr;
1658 		}
1659 		return SLJIT_SUCCESS;
1660 	}
1661 
1662 	/* Only for cumulative operations. */
1663 	if (dst == src2 && dstw == src2w) {
1664 		if (src1 & SLJIT_IMM) {
1665 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1666 			if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1667 #else
1668 			if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) {
1669 #endif
1670 				BINARY_EAX_IMM(op_eax_imm, src1w);
1671 			}
1672 			else {
1673 				BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
1674 			}
1675 		}
1676 		else if (FAST_IS_REG(dst)) {
1677 			inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
1678 			FAIL_IF(!inst);
1679 			*inst = op_rm;
1680 		}
1681 		else if (FAST_IS_REG(src1)) {
1682 			inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
1683 			FAIL_IF(!inst);
1684 			*inst = op_mr;
1685 		}
1686 		else {
1687 			EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1688 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1689 			FAIL_IF(!inst);
1690 			*inst = op_mr;
1691 		}
1692 		return SLJIT_SUCCESS;
1693 	}
1694 
1695 	/* General version. */
1696 	if (FAST_IS_REG(dst)) {
1697 		EMIT_MOV(compiler, dst, 0, src1, src1w);
1698 		if (src2 & SLJIT_IMM) {
1699 			BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1700 		}
1701 		else {
1702 			inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1703 			FAIL_IF(!inst);
1704 			*inst = op_rm;
1705 		}
1706 	}
1707 	else {
1708 		/* This version requires less memory writing. */
1709 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1710 		if (src2 & SLJIT_IMM) {
1711 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1712 		}
1713 		else {
1714 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1715 			FAIL_IF(!inst);
1716 			*inst = op_rm;
1717 		}
1718 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1719 	}
1720 
1721 	return SLJIT_SUCCESS;
1722 }
1723 
1724 static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
1725 	sljit_u32 op_types,
1726 	sljit_s32 dst, sljit_sw dstw,
1727 	sljit_s32 src1, sljit_sw src1w,
1728 	sljit_s32 src2, sljit_sw src2w)
1729 {
1730 	sljit_u8* inst;
1731 	sljit_u8 op_eax_imm = (op_types >> 24);
1732 	sljit_u8 op_rm = (op_types >> 16) & 0xff;
1733 	sljit_u8 op_mr = (op_types >> 8) & 0xff;
1734 	sljit_u8 op_imm = op_types & 0xff;
1735 
1736 	if (dst == SLJIT_UNUSED) {
1737 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1738 		if (src2 & SLJIT_IMM) {
1739 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1740 		}
1741 		else {
1742 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1743 			FAIL_IF(!inst);
1744 			*inst = op_rm;
1745 		}
1746 		return SLJIT_SUCCESS;
1747 	}
1748 
1749 	if (dst == src1 && dstw == src1w) {
1750 		if (src2 & SLJIT_IMM) {
1751 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1752 			if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1753 #else
1754 			if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1755 #endif
1756 				BINARY_EAX_IMM(op_eax_imm, src2w);
1757 			}
1758 			else {
1759 				BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1760 			}
1761 		}
1762 		else if (FAST_IS_REG(dst)) {
1763 			inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1764 			FAIL_IF(!inst);
1765 			*inst = op_rm;
1766 		}
1767 		else if (FAST_IS_REG(src2)) {
1768 			inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1769 			FAIL_IF(!inst);
1770 			*inst = op_mr;
1771 		}
1772 		else {
1773 			EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1774 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1775 			FAIL_IF(!inst);
1776 			*inst = op_mr;
1777 		}
1778 		return SLJIT_SUCCESS;
1779 	}
1780 
1781 	/* General version. */
1782 	if (FAST_IS_REG(dst) && dst != src2) {
1783 		EMIT_MOV(compiler, dst, 0, src1, src1w);
1784 		if (src2 & SLJIT_IMM) {
1785 			BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1786 		}
1787 		else {
1788 			inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1789 			FAIL_IF(!inst);
1790 			*inst = op_rm;
1791 		}
1792 	}
1793 	else {
1794 		/* This version requires less memory writing. */
1795 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1796 		if (src2 & SLJIT_IMM) {
1797 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1798 		}
1799 		else {
1800 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1801 			FAIL_IF(!inst);
1802 			*inst = op_rm;
1803 		}
1804 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1805 	}
1806 
1807 	return SLJIT_SUCCESS;
1808 }
1809 
1810 static sljit_s32 emit_mul(struct sljit_compiler *compiler,
1811 	sljit_s32 dst, sljit_sw dstw,
1812 	sljit_s32 src1, sljit_sw src1w,
1813 	sljit_s32 src2, sljit_sw src2w)
1814 {
1815 	sljit_u8* inst;
1816 	sljit_s32 dst_r;
1817 
1818 	dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
1819 
1820 	/* Register destination. */
1821 	if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
1822 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1823 		FAIL_IF(!inst);
1824 		*inst++ = GROUP_0F;
1825 		*inst = IMUL_r_rm;
1826 	}
1827 	else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
1828 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
1829 		FAIL_IF(!inst);
1830 		*inst++ = GROUP_0F;
1831 		*inst = IMUL_r_rm;
1832 	}
1833 	else if (src1 & SLJIT_IMM) {
1834 		if (src2 & SLJIT_IMM) {
1835 			EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
1836 			src2 = dst_r;
1837 			src2w = 0;
1838 		}
1839 
1840 		if (src1w <= 127 && src1w >= -128) {
1841 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1842 			FAIL_IF(!inst);
1843 			*inst = IMUL_r_rm_i8;
1844 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1845 			FAIL_IF(!inst);
1846 			INC_SIZE(1);
1847 			*inst = (sljit_s8)src1w;
1848 		}
1849 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1850 		else {
1851 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1852 			FAIL_IF(!inst);
1853 			*inst = IMUL_r_rm_i32;
1854 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1855 			FAIL_IF(!inst);
1856 			INC_SIZE(4);
1857 			sljit_unaligned_store_sw(inst, src1w);
1858 		}
1859 #else
1860 		else if (IS_HALFWORD(src1w)) {
1861 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1862 			FAIL_IF(!inst);
1863 			*inst = IMUL_r_rm_i32;
1864 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1865 			FAIL_IF(!inst);
1866 			INC_SIZE(4);
1867 			sljit_unaligned_store_s32(inst, (sljit_s32)src1w);
1868 		}
1869 		else {
1870 			if (dst_r != src2)
1871 				EMIT_MOV(compiler, dst_r, 0, src2, src2w);
1872 			FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
1873 			inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1874 			FAIL_IF(!inst);
1875 			*inst++ = GROUP_0F;
1876 			*inst = IMUL_r_rm;
1877 		}
1878 #endif
1879 	}
1880 	else if (src2 & SLJIT_IMM) {
1881 		/* Note: src1 is NOT immediate. */
1882 
1883 		if (src2w <= 127 && src2w >= -128) {
1884 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1885 			FAIL_IF(!inst);
1886 			*inst = IMUL_r_rm_i8;
1887 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1888 			FAIL_IF(!inst);
1889 			INC_SIZE(1);
1890 			*inst = (sljit_s8)src2w;
1891 		}
1892 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1893 		else {
1894 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1895 			FAIL_IF(!inst);
1896 			*inst = IMUL_r_rm_i32;
1897 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1898 			FAIL_IF(!inst);
1899 			INC_SIZE(4);
1900 			sljit_unaligned_store_sw(inst, src2w);
1901 		}
1902 #else
1903 		else if (IS_HALFWORD(src2w)) {
1904 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1905 			FAIL_IF(!inst);
1906 			*inst = IMUL_r_rm_i32;
1907 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1908 			FAIL_IF(!inst);
1909 			INC_SIZE(4);
1910 			sljit_unaligned_store_s32(inst, (sljit_s32)src2w);
1911 		}
1912 		else {
1913 			if (dst_r != src1)
1914 				EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1915 			FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1916 			inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1917 			FAIL_IF(!inst);
1918 			*inst++ = GROUP_0F;
1919 			*inst = IMUL_r_rm;
1920 		}
1921 #endif
1922 	}
1923 	else {
1924 		/* Neither argument is immediate. */
1925 		if (ADDRESSING_DEPENDS_ON(src2, dst_r))
1926 			dst_r = TMP_REG1;
1927 		EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1928 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1929 		FAIL_IF(!inst);
1930 		*inst++ = GROUP_0F;
1931 		*inst = IMUL_r_rm;
1932 	}
1933 
1934 	if (dst & SLJIT_MEM)
1935 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1936 
1937 	return SLJIT_SUCCESS;
1938 }
1939 
1940 static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler,
1941 	sljit_s32 dst, sljit_sw dstw,
1942 	sljit_s32 src1, sljit_sw src1w,
1943 	sljit_s32 src2, sljit_sw src2w)
1944 {
1945 	sljit_u8* inst;
1946 	sljit_s32 dst_r, done = 0;
1947 
1948 	/* These cases better be left to handled by normal way. */
1949 	if (dst == src1 && dstw == src1w)
1950 		return SLJIT_ERR_UNSUPPORTED;
1951 	if (dst == src2 && dstw == src2w)
1952 		return SLJIT_ERR_UNSUPPORTED;
1953 
1954 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1955 
1956 	if (FAST_IS_REG(src1)) {
1957 		if (FAST_IS_REG(src2)) {
1958 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
1959 			FAIL_IF(!inst);
1960 			*inst = LEA_r_m;
1961 			done = 1;
1962 		}
1963 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1964 		if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1965 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_s32)src2w);
1966 #else
1967 		if (src2 & SLJIT_IMM) {
1968 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
1969 #endif
1970 			FAIL_IF(!inst);
1971 			*inst = LEA_r_m;
1972 			done = 1;
1973 		}
1974 	}
1975 	else if (FAST_IS_REG(src2)) {
1976 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1977 		if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1978 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_s32)src1w);
1979 #else
1980 		if (src1 & SLJIT_IMM) {
1981 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
1982 #endif
1983 			FAIL_IF(!inst);
1984 			*inst = LEA_r_m;
1985 			done = 1;
1986 		}
1987 	}
1988 
1989 	if (done) {
1990 		if (dst_r == TMP_REG1)
1991 			return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
1992 		return SLJIT_SUCCESS;
1993 	}
1994 	return SLJIT_ERR_UNSUPPORTED;
1995 }
1996 
1997 static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,
1998 	sljit_s32 src1, sljit_sw src1w,
1999 	sljit_s32 src2, sljit_sw src2w)
2000 {
2001 	sljit_u8* inst;
2002 
2003 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2004 	if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
2005 #else
2006 	if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
2007 #endif
2008 		BINARY_EAX_IMM(CMP_EAX_i32, src2w);
2009 		return SLJIT_SUCCESS;
2010 	}
2011 
2012 	if (FAST_IS_REG(src1)) {
2013 		if (src2 & SLJIT_IMM) {
2014 			BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
2015 		}
2016 		else {
2017 			inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
2018 			FAIL_IF(!inst);
2019 			*inst = CMP_r_rm;
2020 		}
2021 		return SLJIT_SUCCESS;
2022 	}
2023 
2024 	if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) {
2025 		inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
2026 		FAIL_IF(!inst);
2027 		*inst = CMP_rm_r;
2028 		return SLJIT_SUCCESS;
2029 	}
2030 
2031 	if (src2 & SLJIT_IMM) {
2032 		if (src1 & SLJIT_IMM) {
2033 			EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2034 			src1 = TMP_REG1;
2035 			src1w = 0;
2036 		}
2037 		BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);
2038 	}
2039 	else {
2040 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2041 		inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2042 		FAIL_IF(!inst);
2043 		*inst = CMP_r_rm;
2044 	}
2045 	return SLJIT_SUCCESS;
2046 }
2047 
2048 static sljit_s32 emit_test_binary(struct sljit_compiler *compiler,
2049 	sljit_s32 src1, sljit_sw src1w,
2050 	sljit_s32 src2, sljit_sw src2w)
2051 {
2052 	sljit_u8* inst;
2053 
2054 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2055 	if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
2056 #else
2057 	if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
2058 #endif
2059 		BINARY_EAX_IMM(TEST_EAX_i32, src2w);
2060 		return SLJIT_SUCCESS;
2061 	}
2062 
2063 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2064 	if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
2065 #else
2066 	if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
2067 #endif
2068 		BINARY_EAX_IMM(TEST_EAX_i32, src1w);
2069 		return SLJIT_SUCCESS;
2070 	}
2071 
2072 	if (!(src1 & SLJIT_IMM)) {
2073 		if (src2 & SLJIT_IMM) {
2074 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2075 			if (IS_HALFWORD(src2w) || compiler->mode32) {
2076 				inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
2077 				FAIL_IF(!inst);
2078 				*inst = GROUP_F7;
2079 			}
2080 			else {
2081 				FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src2w));
2082 				inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src1, src1w);
2083 				FAIL_IF(!inst);
2084 				*inst = TEST_rm_r;
2085 			}
2086 #else
2087 			inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
2088 			FAIL_IF(!inst);
2089 			*inst = GROUP_F7;
2090 #endif
2091 			return SLJIT_SUCCESS;
2092 		}
2093 		else if (FAST_IS_REG(src1)) {
2094 			inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
2095 			FAIL_IF(!inst);
2096 			*inst = TEST_rm_r;
2097 			return SLJIT_SUCCESS;
2098 		}
2099 	}
2100 
2101 	if (!(src2 & SLJIT_IMM)) {
2102 		if (src1 & SLJIT_IMM) {
2103 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2104 			if (IS_HALFWORD(src1w) || compiler->mode32) {
2105 				inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w);
2106 				FAIL_IF(!inst);
2107 				*inst = GROUP_F7;
2108 			}
2109 			else {
2110 				FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src1w));
2111 				inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2112 				FAIL_IF(!inst);
2113 				*inst = TEST_rm_r;
2114 			}
2115 #else
2116 			inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w);
2117 			FAIL_IF(!inst);
2118 			*inst = GROUP_F7;
2119 #endif
2120 			return SLJIT_SUCCESS;
2121 		}
2122 		else if (FAST_IS_REG(src2)) {
2123 			inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
2124 			FAIL_IF(!inst);
2125 			*inst = TEST_rm_r;
2126 			return SLJIT_SUCCESS;
2127 		}
2128 	}
2129 
2130 	EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2131 	if (src2 & SLJIT_IMM) {
2132 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2133 		if (IS_HALFWORD(src2w) || compiler->mode32) {
2134 			inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
2135 			FAIL_IF(!inst);
2136 			*inst = GROUP_F7;
2137 		}
2138 		else {
2139 			FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
2140 			inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0);
2141 			FAIL_IF(!inst);
2142 			*inst = TEST_rm_r;
2143 		}
2144 #else
2145 		inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
2146 		FAIL_IF(!inst);
2147 		*inst = GROUP_F7;
2148 #endif
2149 	}
2150 	else {
2151 		inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2152 		FAIL_IF(!inst);
2153 		*inst = TEST_rm_r;
2154 	}
2155 	return SLJIT_SUCCESS;
2156 }
2157 
2158 static sljit_s32 emit_shift(struct sljit_compiler *compiler,
2159 	sljit_u8 mode,
2160 	sljit_s32 dst, sljit_sw dstw,
2161 	sljit_s32 src1, sljit_sw src1w,
2162 	sljit_s32 src2, sljit_sw src2w)
2163 {
2164 	sljit_u8* inst;
2165 
2166 	if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
2167 		if (dst == src1 && dstw == src1w) {
2168 			inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
2169 			FAIL_IF(!inst);
2170 			*inst |= mode;
2171 			return SLJIT_SUCCESS;
2172 		}
2173 		if (dst == SLJIT_UNUSED) {
2174 			EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2175 			inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2176 			FAIL_IF(!inst);
2177 			*inst |= mode;
2178 			return SLJIT_SUCCESS;
2179 		}
2180 		if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
2181 			EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2182 			inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2183 			FAIL_IF(!inst);
2184 			*inst |= mode;
2185 			EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2186 			return SLJIT_SUCCESS;
2187 		}
2188 		if (FAST_IS_REG(dst)) {
2189 			EMIT_MOV(compiler, dst, 0, src1, src1w);
2190 			inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
2191 			FAIL_IF(!inst);
2192 			*inst |= mode;
2193 			return SLJIT_SUCCESS;
2194 		}
2195 
2196 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2197 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2198 		FAIL_IF(!inst);
2199 		*inst |= mode;
2200 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2201 		return SLJIT_SUCCESS;
2202 	}
2203 
2204 	if (dst == SLJIT_PREF_SHIFT_REG) {
2205 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2206 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2207 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2208 		FAIL_IF(!inst);
2209 		*inst |= mode;
2210 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2211 	}
2212 	else if (SLOW_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
2213 		if (src1 != dst)
2214 			EMIT_MOV(compiler, dst, 0, src1, src1w);
2215 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2216 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2217 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
2218 		FAIL_IF(!inst);
2219 		*inst |= mode;
2220 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2221 	}
2222 	else {
2223 		/* This case is complex since ecx itself may be used for
2224 		   addressing, and this case must be supported as well. */
2225 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2226 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2227 		EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0);
2228 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2229 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2230 		FAIL_IF(!inst);
2231 		*inst |= mode;
2232 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0);
2233 #else
2234 		EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
2235 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2236 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2237 		FAIL_IF(!inst);
2238 		*inst |= mode;
2239 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
2240 #endif
2241 		if (dst != SLJIT_UNUSED)
2242 			return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2243 	}
2244 
2245 	return SLJIT_SUCCESS;
2246 }
2247 
2248 static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler,
2249 	sljit_u8 mode, sljit_s32 set_flags,
2250 	sljit_s32 dst, sljit_sw dstw,
2251 	sljit_s32 src1, sljit_sw src1w,
2252 	sljit_s32 src2, sljit_sw src2w)
2253 {
2254 	/* The CPU does not set flags if the shift count is 0. */
2255 	if (src2 & SLJIT_IMM) {
2256 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2257 		if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
2258 			return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2259 #else
2260 		if ((src2w & 0x1f) != 0)
2261 			return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2262 #endif
2263 		if (!set_flags)
2264 			return emit_mov(compiler, dst, dstw, src1, src1w);
2265 		/* OR dst, src, 0 */
2266 		return emit_cum_binary(compiler, BINARY_OPCODE(OR),
2267 			dst, dstw, src1, src1w, SLJIT_IMM, 0);
2268 	}
2269 
2270 	if (!set_flags)
2271 		return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2272 
2273 	if (!FAST_IS_REG(dst))
2274 		FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
2275 
2276 	FAIL_IF(emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w));
2277 
2278 	if (FAST_IS_REG(dst))
2279 		return emit_cmp_binary(compiler, (dst == SLJIT_UNUSED) ? TMP_REG1 : dst, dstw, SLJIT_IMM, 0);
2280 	return SLJIT_SUCCESS;
2281 }
2282 
2283 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2284 	sljit_s32 dst, sljit_sw dstw,
2285 	sljit_s32 src1, sljit_sw src1w,
2286 	sljit_s32 src2, sljit_sw src2w)
2287 {
2288 	CHECK_ERROR();
2289 	CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2290 	ADJUST_LOCAL_OFFSET(dst, dstw);
2291 	ADJUST_LOCAL_OFFSET(src1, src1w);
2292 	ADJUST_LOCAL_OFFSET(src2, src2w);
2293 
2294 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
2295 	CHECK_EXTRA_REGS(src1, src1w, (void)0);
2296 	CHECK_EXTRA_REGS(src2, src2w, (void)0);
2297 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2298 	compiler->mode32 = op & SLJIT_I32_OP;
2299 #endif
2300 
2301 	if (dst == SLJIT_UNUSED && !HAS_FLAGS(op))
2302 		return SLJIT_SUCCESS;
2303 
2304 	switch (GET_OPCODE(op)) {
2305 	case SLJIT_ADD:
2306 		if (!HAS_FLAGS(op)) {
2307 			if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
2308 				return compiler->error;
2309 		}
2310 		return emit_cum_binary(compiler, BINARY_OPCODE(ADD),
2311 			dst, dstw, src1, src1w, src2, src2w);
2312 	case SLJIT_ADDC:
2313 		return emit_cum_binary(compiler, BINARY_OPCODE(ADC),
2314 			dst, dstw, src1, src1w, src2, src2w);
2315 	case SLJIT_SUB:
2316 		if (!HAS_FLAGS(op)) {
2317 			if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
2318 				return compiler->error;
2319 			if (SLOW_IS_REG(dst) && src2 == dst) {
2320 				FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), dst, 0, dst, 0, src1, src1w));
2321 				return emit_unary(compiler, NEG_rm, dst, 0, dst, 0);
2322 			}
2323 		}
2324 
2325 		if (dst == SLJIT_UNUSED)
2326 			return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
2327 		return emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
2328 			dst, dstw, src1, src1w, src2, src2w);
2329 	case SLJIT_SUBC:
2330 		return emit_non_cum_binary(compiler, BINARY_OPCODE(SBB),
2331 			dst, dstw, src1, src1w, src2, src2w);
2332 	case SLJIT_MUL:
2333 		return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
2334 	case SLJIT_AND:
2335 		if (dst == SLJIT_UNUSED)
2336 			return emit_test_binary(compiler, src1, src1w, src2, src2w);
2337 		return emit_cum_binary(compiler, BINARY_OPCODE(AND),
2338 			dst, dstw, src1, src1w, src2, src2w);
2339 	case SLJIT_OR:
2340 		return emit_cum_binary(compiler, BINARY_OPCODE(OR),
2341 			dst, dstw, src1, src1w, src2, src2w);
2342 	case SLJIT_XOR:
2343 		return emit_cum_binary(compiler, BINARY_OPCODE(XOR),
2344 			dst, dstw, src1, src1w, src2, src2w);
2345 	case SLJIT_SHL:
2346 		return emit_shift_with_flags(compiler, SHL, HAS_FLAGS(op),
2347 			dst, dstw, src1, src1w, src2, src2w);
2348 	case SLJIT_LSHR:
2349 		return emit_shift_with_flags(compiler, SHR, HAS_FLAGS(op),
2350 			dst, dstw, src1, src1w, src2, src2w);
2351 	case SLJIT_ASHR:
2352 		return emit_shift_with_flags(compiler, SAR, HAS_FLAGS(op),
2353 			dst, dstw, src1, src1w, src2, src2w);
2354 	}
2355 
2356 	return SLJIT_SUCCESS;
2357 }
2358 
2359 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
2360 	sljit_s32 src, sljit_sw srcw)
2361 {
2362 	CHECK_ERROR();
2363 	CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
2364 	ADJUST_LOCAL_OFFSET(src, srcw);
2365 
2366 	CHECK_EXTRA_REGS(src, srcw, (void)0);
2367 
2368 	switch (op) {
2369 	case SLJIT_FAST_RETURN:
2370 		return emit_fast_return(compiler, src, srcw);
2371 	case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
2372 		/* Don't adjust shadow stack if it isn't enabled.  */
2373 		if (!cpu_has_shadow_stack ())
2374 			return SLJIT_SUCCESS;
2375 		return adjust_shadow_stack(compiler, src, srcw, SLJIT_UNUSED, 0);
2376 	case SLJIT_PREFETCH_L1:
2377 	case SLJIT_PREFETCH_L2:
2378 	case SLJIT_PREFETCH_L3:
2379 	case SLJIT_PREFETCH_ONCE:
2380 		return emit_prefetch(compiler, op, src, srcw);
2381 	}
2382 
2383 	return SLJIT_SUCCESS;
2384 }
2385 
2386 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
2387 {
2388 	CHECK_REG_INDEX(check_sljit_get_register_index(reg));
2389 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2390 	if (reg >= SLJIT_R3 && reg <= SLJIT_R8)
2391 		return -1;
2392 #endif
2393 	return reg_map[reg];
2394 }
2395 
2396 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
2397 {
2398 	CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
2399 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2400 	return reg;
2401 #else
2402 	return freg_map[reg];
2403 #endif
2404 }
2405 
2406 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2407 	void *instruction, sljit_s32 size)
2408 {
2409 	sljit_u8 *inst;
2410 
2411 	CHECK_ERROR();
2412 	CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2413 
2414 	inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
2415 	FAIL_IF(!inst);
2416 	INC_SIZE(size);
2417 	SLJIT_MEMCPY(inst, instruction, size);
2418 	return SLJIT_SUCCESS;
2419 }
2420 
2421 /* --------------------------------------------------------------------- */
2422 /*  Floating point operators                                             */
2423 /* --------------------------------------------------------------------- */
2424 
2425 /* Alignment(3) + 4 * 16 bytes. */
2426 static sljit_s32 sse2_data[3 + (4 * 4)];
2427 static sljit_s32 *sse2_buffer;
2428 
2429 static void init_compiler(void)
2430 {
2431 	/* Align to 16 bytes. */
2432 	sse2_buffer = (sljit_s32*)(((sljit_uw)sse2_data + 15) & ~0xf);
2433 
2434 	/* Single precision constants (each constant is 16 byte long). */
2435 	sse2_buffer[0] = 0x80000000;
2436 	sse2_buffer[4] = 0x7fffffff;
2437 	/* Double precision constants (each constant is 16 byte long). */
2438 	sse2_buffer[8] = 0;
2439 	sse2_buffer[9] = 0x80000000;
2440 	sse2_buffer[12] = 0xffffffff;
2441 	sse2_buffer[13] = 0x7fffffff;
2442 }
2443 
2444 static sljit_s32 emit_sse2(struct sljit_compiler *compiler, sljit_u8 opcode,
2445 	sljit_s32 single, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
2446 {
2447 	sljit_u8 *inst;
2448 
2449 	inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2450 	FAIL_IF(!inst);
2451 	*inst++ = GROUP_0F;
2452 	*inst = opcode;
2453 	return SLJIT_SUCCESS;
2454 }
2455 
2456 static sljit_s32 emit_sse2_logic(struct sljit_compiler *compiler, sljit_u8 opcode,
2457 	sljit_s32 pref66, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
2458 {
2459 	sljit_u8 *inst;
2460 
2461 	inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2462 	FAIL_IF(!inst);
2463 	*inst++ = GROUP_0F;
2464 	*inst = opcode;
2465 	return SLJIT_SUCCESS;
2466 }
2467 
2468 static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
2469 	sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
2470 {
2471 	return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw);
2472 }
2473 
2474 static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
2475 	sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src)
2476 {
2477 	return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw);
2478 }
2479 
2480 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
2481 	sljit_s32 dst, sljit_sw dstw,
2482 	sljit_s32 src, sljit_sw srcw)
2483 {
2484 	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
2485 	sljit_u8 *inst;
2486 
2487 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2488 	if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)
2489 		compiler->mode32 = 0;
2490 #endif
2491 
2492 	inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw);
2493 	FAIL_IF(!inst);
2494 	*inst++ = GROUP_0F;
2495 	*inst = CVTTSD2SI_r_xm;
2496 
2497 	if (dst & SLJIT_MEM)
2498 		return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2499 	return SLJIT_SUCCESS;
2500 }
2501 
2502 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
2503 	sljit_s32 dst, sljit_sw dstw,
2504 	sljit_s32 src, sljit_sw srcw)
2505 {
2506 	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2507 	sljit_u8 *inst;
2508 
2509 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2510 	if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
2511 		compiler->mode32 = 0;
2512 #endif
2513 
2514 	if (src & SLJIT_IMM) {
2515 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2516 		if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
2517 			srcw = (sljit_s32)srcw;
2518 #endif
2519 		EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
2520 		src = TMP_REG1;
2521 		srcw = 0;
2522 	}
2523 
2524 	inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw);
2525 	FAIL_IF(!inst);
2526 	*inst++ = GROUP_0F;
2527 	*inst = CVTSI2SD_x_rm;
2528 
2529 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2530 	compiler->mode32 = 1;
2531 #endif
2532 	if (dst_r == TMP_FREG)
2533 		return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2534 	return SLJIT_SUCCESS;
2535 }
2536 
2537 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
2538 	sljit_s32 src1, sljit_sw src1w,
2539 	sljit_s32 src2, sljit_sw src2w)
2540 {
2541 	if (!FAST_IS_REG(src1)) {
2542 		FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
2543 		src1 = TMP_FREG;
2544 	}
2545 
2546 	return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_F32_OP), src1, src2, src2w);
2547 }
2548 
2549 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
2550 	sljit_s32 dst, sljit_sw dstw,
2551 	sljit_s32 src, sljit_sw srcw)
2552 {
2553 	sljit_s32 dst_r;
2554 
2555 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2556 	compiler->mode32 = 1;
2557 #endif
2558 
2559 	CHECK_ERROR();
2560 	SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2561 
2562 	if (GET_OPCODE(op) == SLJIT_MOV_F64) {
2563 		if (FAST_IS_REG(dst))
2564 			return emit_sse2_load(compiler, op & SLJIT_F32_OP, dst, src, srcw);
2565 		if (FAST_IS_REG(src))
2566 			return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, src);
2567 		FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src, srcw));
2568 		return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2569 	}
2570 
2571 	if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) {
2572 		dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2573 		if (FAST_IS_REG(src)) {
2574 			/* We overwrite the high bits of source. From SLJIT point of view,
2575 			   this is not an issue.
2576 			   Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
2577 			FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_F32_OP, src, src, 0));
2578 		}
2579 		else {
2580 			FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_F32_OP), TMP_FREG, src, srcw));
2581 			src = TMP_FREG;
2582 		}
2583 
2584 		FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_F32_OP, dst_r, src, 0));
2585 		if (dst_r == TMP_FREG)
2586 			return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2587 		return SLJIT_SUCCESS;
2588 	}
2589 
2590 	if (FAST_IS_REG(dst)) {
2591 		dst_r = dst;
2592 		if (dst != src)
2593 			FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw));
2594 	}
2595 	else {
2596 		dst_r = TMP_FREG;
2597 		FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw));
2598 	}
2599 
2600 	switch (GET_OPCODE(op)) {
2601 	case SLJIT_NEG_F64:
2602 		FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer : sse2_buffer + 8)));
2603 		break;
2604 
2605 	case SLJIT_ABS_F64:
2606 		FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer + 4 : sse2_buffer + 12)));
2607 		break;
2608 	}
2609 
2610 	if (dst_r == TMP_FREG)
2611 		return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2612 	return SLJIT_SUCCESS;
2613 }
2614 
2615 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
2616 	sljit_s32 dst, sljit_sw dstw,
2617 	sljit_s32 src1, sljit_sw src1w,
2618 	sljit_s32 src2, sljit_sw src2w)
2619 {
2620 	sljit_s32 dst_r;
2621 
2622 	CHECK_ERROR();
2623 	CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2624 	ADJUST_LOCAL_OFFSET(dst, dstw);
2625 	ADJUST_LOCAL_OFFSET(src1, src1w);
2626 	ADJUST_LOCAL_OFFSET(src2, src2w);
2627 
2628 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2629 	compiler->mode32 = 1;
2630 #endif
2631 
2632 	if (FAST_IS_REG(dst)) {
2633 		dst_r = dst;
2634 		if (dst == src1)
2635 			; /* Do nothing here. */
2636 		else if (dst == src2 && (op == SLJIT_ADD_F64 || op == SLJIT_MUL_F64)) {
2637 			/* Swap arguments. */
2638 			src2 = src1;
2639 			src2w = src1w;
2640 		}
2641 		else if (dst != src2)
2642 			FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src1, src1w));
2643 		else {
2644 			dst_r = TMP_FREG;
2645 			FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
2646 		}
2647 	}
2648 	else {
2649 		dst_r = TMP_FREG;
2650 		FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
2651 	}
2652 
2653 	switch (GET_OPCODE(op)) {
2654 	case SLJIT_ADD_F64:
2655 		FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2656 		break;
2657 
2658 	case SLJIT_SUB_F64:
2659 		FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2660 		break;
2661 
2662 	case SLJIT_MUL_F64:
2663 		FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2664 		break;
2665 
2666 	case SLJIT_DIV_F64:
2667 		FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2668 		break;
2669 	}
2670 
2671 	if (dst_r == TMP_FREG)
2672 		return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2673 	return SLJIT_SUCCESS;
2674 }
2675 
2676 /* --------------------------------------------------------------------- */
2677 /*  Conditional instructions                                             */
2678 /* --------------------------------------------------------------------- */
2679 
2680 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2681 {
2682 	sljit_u8 *inst;
2683 	struct sljit_label *label;
2684 
2685 	CHECK_ERROR_PTR();
2686 	CHECK_PTR(check_sljit_emit_label(compiler));
2687 
2688 	if (compiler->last_label && compiler->last_label->size == compiler->size)
2689 		return compiler->last_label;
2690 
2691 	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2692 	PTR_FAIL_IF(!label);
2693 	set_label(label, compiler);
2694 
2695 	inst = (sljit_u8*)ensure_buf(compiler, 2);
2696 	PTR_FAIL_IF(!inst);
2697 
2698 	*inst++ = 0;
2699 	*inst++ = 0;
2700 
2701 	return label;
2702 }
2703 
2704 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
2705 {
2706 	sljit_u8 *inst;
2707 	struct sljit_jump *jump;
2708 
2709 	CHECK_ERROR_PTR();
2710 	CHECK_PTR(check_sljit_emit_jump(compiler, type));
2711 
2712 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2713 	PTR_FAIL_IF_NULL(jump);
2714 	set_jump(jump, compiler, (type & SLJIT_REWRITABLE_JUMP) | ((type & 0xff) << TYPE_SHIFT));
2715 	type &= 0xff;
2716 
2717 	/* Worst case size. */
2718 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2719 	compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
2720 #else
2721 	compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
2722 #endif
2723 
2724 	inst = (sljit_u8*)ensure_buf(compiler, 2);
2725 	PTR_FAIL_IF_NULL(inst);
2726 
2727 	*inst++ = 0;
2728 	*inst++ = 1;
2729 	return jump;
2730 }
2731 
2732 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
2733 {
2734 	sljit_u8 *inst;
2735 	struct sljit_jump *jump;
2736 
2737 	CHECK_ERROR();
2738 	CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
2739 	ADJUST_LOCAL_OFFSET(src, srcw);
2740 
2741 	CHECK_EXTRA_REGS(src, srcw, (void)0);
2742 
2743 	if (src == SLJIT_IMM) {
2744 		jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2745 		FAIL_IF_NULL(jump);
2746 		set_jump(jump, compiler, JUMP_ADDR | (type << TYPE_SHIFT));
2747 		jump->u.target = srcw;
2748 
2749 		/* Worst case size. */
2750 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2751 		compiler->size += 5;
2752 #else
2753 		compiler->size += 10 + 3;
2754 #endif
2755 
2756 		inst = (sljit_u8*)ensure_buf(compiler, 2);
2757 		FAIL_IF_NULL(inst);
2758 
2759 		*inst++ = 0;
2760 		*inst++ = 1;
2761 	}
2762 	else {
2763 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2764 		/* REX_W is not necessary (src is not immediate). */
2765 		compiler->mode32 = 1;
2766 #endif
2767 		inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2768 		FAIL_IF(!inst);
2769 		*inst++ = GROUP_FF;
2770 		*inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm;
2771 	}
2772 	return SLJIT_SUCCESS;
2773 }
2774 
2775 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
2776 	sljit_s32 dst, sljit_sw dstw,
2777 	sljit_s32 type)
2778 {
2779 	sljit_u8 *inst;
2780 	sljit_u8 cond_set = 0;
2781 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2782 	sljit_s32 reg;
2783 #endif
2784 	/* ADJUST_LOCAL_OFFSET and CHECK_EXTRA_REGS might overwrite these values. */
2785 	sljit_s32 dst_save = dst;
2786 	sljit_sw dstw_save = dstw;
2787 
2788 	CHECK_ERROR();
2789 	CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
2790 
2791 	ADJUST_LOCAL_OFFSET(dst, dstw);
2792 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
2793 
2794 	type &= 0xff;
2795 	/* setcc = jcc + 0x10. */
2796 	cond_set = get_jump_code(type) + 0x10;
2797 
2798 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2799 	if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst)) {
2800 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 3);
2801 		FAIL_IF(!inst);
2802 		INC_SIZE(4 + 3);
2803 		/* Set low register to conditional flag. */
2804 		*inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;
2805 		*inst++ = GROUP_0F;
2806 		*inst++ = cond_set;
2807 		*inst++ = MOD_REG | reg_lmap[TMP_REG1];
2808 		*inst++ = REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B);
2809 		*inst++ = OR_rm8_r8;
2810 		*inst++ = MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst];
2811 		return SLJIT_SUCCESS;
2812 	}
2813 
2814 	reg = (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG1;
2815 
2816 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 4);
2817 	FAIL_IF(!inst);
2818 	INC_SIZE(4 + 4);
2819 	/* Set low register to conditional flag. */
2820 	*inst++ = (reg_map[reg] <= 7) ? REX : REX_B;
2821 	*inst++ = GROUP_0F;
2822 	*inst++ = cond_set;
2823 	*inst++ = MOD_REG | reg_lmap[reg];
2824 	*inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
2825 	/* The movzx instruction does not affect flags. */
2826 	*inst++ = GROUP_0F;
2827 	*inst++ = MOVZX_r_rm8;
2828 	*inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg];
2829 
2830 	if (reg != TMP_REG1)
2831 		return SLJIT_SUCCESS;
2832 
2833 	if (GET_OPCODE(op) < SLJIT_ADD) {
2834 		compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
2835 		return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2836 	}
2837 
2838 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2839 		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2840 	compiler->skip_checks = 1;
2841 #endif
2842 	return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
2843 
2844 #else
2845 	/* The SLJIT_CONFIG_X86_32 code path starts here. */
2846 	if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) {
2847 		if (reg_map[dst] <= 4) {
2848 			/* Low byte is accessible. */
2849 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3);
2850 			FAIL_IF(!inst);
2851 			INC_SIZE(3 + 3);
2852 			/* Set low byte to conditional flag. */
2853 			*inst++ = GROUP_0F;
2854 			*inst++ = cond_set;
2855 			*inst++ = MOD_REG | reg_map[dst];
2856 
2857 			*inst++ = GROUP_0F;
2858 			*inst++ = MOVZX_r_rm8;
2859 			*inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst];
2860 			return SLJIT_SUCCESS;
2861 		}
2862 
2863 		/* Low byte is not accessible. */
2864 		if (cpu_has_cmov == -1)
2865 			get_cpu_features();
2866 
2867 		if (cpu_has_cmov) {
2868 			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
2869 			/* a xor reg, reg operation would overwrite the flags. */
2870 			EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0);
2871 
2872 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
2873 			FAIL_IF(!inst);
2874 			INC_SIZE(3);
2875 
2876 			*inst++ = GROUP_0F;
2877 			/* cmovcc = setcc - 0x50. */
2878 			*inst++ = cond_set - 0x50;
2879 			*inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1];
2880 			return SLJIT_SUCCESS;
2881 		}
2882 
2883 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2884 		FAIL_IF(!inst);
2885 		INC_SIZE(1 + 3 + 3 + 1);
2886 		*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2887 		/* Set al to conditional flag. */
2888 		*inst++ = GROUP_0F;
2889 		*inst++ = cond_set;
2890 		*inst++ = MOD_REG | 0 /* eax */;
2891 
2892 		*inst++ = GROUP_0F;
2893 		*inst++ = MOVZX_r_rm8;
2894 		*inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */;
2895 		*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2896 		return SLJIT_SUCCESS;
2897 	}
2898 
2899 	if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && reg_map[dst] <= 4) {
2900 		SLJIT_ASSERT(reg_map[SLJIT_R0] == 0);
2901 
2902 		if (dst != SLJIT_R0) {
2903 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
2904 			FAIL_IF(!inst);
2905 			INC_SIZE(1 + 3 + 2 + 1);
2906 			/* Set low register to conditional flag. */
2907 			*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2908 			*inst++ = GROUP_0F;
2909 			*inst++ = cond_set;
2910 			*inst++ = MOD_REG | 0 /* eax */;
2911 			*inst++ = OR_rm8_r8;
2912 			*inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst];
2913 			*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2914 		}
2915 		else {
2916 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2);
2917 			FAIL_IF(!inst);
2918 			INC_SIZE(2 + 3 + 2 + 2);
2919 			/* Set low register to conditional flag. */
2920 			*inst++ = XCHG_r_rm;
2921 			*inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
2922 			*inst++ = GROUP_0F;
2923 			*inst++ = cond_set;
2924 			*inst++ = MOD_REG | 1 /* ecx */;
2925 			*inst++ = OR_rm8_r8;
2926 			*inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */;
2927 			*inst++ = XCHG_r_rm;
2928 			*inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
2929 		}
2930 		return SLJIT_SUCCESS;
2931 	}
2932 
2933 	/* Set TMP_REG1 to the bit. */
2934 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2935 	FAIL_IF(!inst);
2936 	INC_SIZE(1 + 3 + 3 + 1);
2937 	*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2938 	/* Set al to conditional flag. */
2939 	*inst++ = GROUP_0F;
2940 	*inst++ = cond_set;
2941 	*inst++ = MOD_REG | 0 /* eax */;
2942 
2943 	*inst++ = GROUP_0F;
2944 	*inst++ = MOVZX_r_rm8;
2945 	*inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */;
2946 
2947 	*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2948 
2949 	if (GET_OPCODE(op) < SLJIT_ADD)
2950 		return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2951 
2952 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2953 		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2954 	compiler->skip_checks = 1;
2955 #endif
2956 	return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
2957 #endif /* SLJIT_CONFIG_X86_64 */
2958 }
2959 
2960 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
2961 	sljit_s32 dst_reg,
2962 	sljit_s32 src, sljit_sw srcw)
2963 {
2964 	sljit_u8* inst;
2965 
2966 	CHECK_ERROR();
2967 	CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
2968 
2969 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2970 	dst_reg &= ~SLJIT_I32_OP;
2971 
2972 	if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV) || (dst_reg >= SLJIT_R3 && dst_reg <= SLJIT_S3))
2973 		return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
2974 #else
2975 	if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
2976 		return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
2977 #endif
2978 
2979 	/* ADJUST_LOCAL_OFFSET is not needed. */
2980 	CHECK_EXTRA_REGS(src, srcw, (void)0);
2981 
2982 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2983 	compiler->mode32 = dst_reg & SLJIT_I32_OP;
2984 	dst_reg &= ~SLJIT_I32_OP;
2985 #endif
2986 
2987 	if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
2988 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
2989 		src = TMP_REG1;
2990 		srcw = 0;
2991 	}
2992 
2993 	inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw);
2994 	FAIL_IF(!inst);
2995 	*inst++ = GROUP_0F;
2996 	*inst = get_jump_code(type & 0xff) - 0x40;
2997 	return SLJIT_SUCCESS;
2998 }
2999 
3000 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
3001 {
3002 	CHECK_ERROR();
3003 	CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
3004 	ADJUST_LOCAL_OFFSET(dst, dstw);
3005 
3006 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
3007 
3008 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3009 	compiler->mode32 = 0;
3010 #endif
3011 
3012 	ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
3013 
3014 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3015 	if (NOT_HALFWORD(offset)) {
3016 		FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
3017 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
3018 		SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
3019 		return compiler->error;
3020 #else
3021 		return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);
3022 #endif
3023 	}
3024 #endif
3025 
3026 	if (offset != 0)
3027 		return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
3028 	return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);
3029 }
3030 
3031 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
3032 {
3033 	sljit_u8 *inst;
3034 	struct sljit_const *const_;
3035 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3036 	sljit_s32 reg;
3037 #endif
3038 
3039 	CHECK_ERROR_PTR();
3040 	CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
3041 	ADJUST_LOCAL_OFFSET(dst, dstw);
3042 
3043 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
3044 
3045 	const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
3046 	PTR_FAIL_IF(!const_);
3047 	set_const(const_, compiler);
3048 
3049 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3050 	compiler->mode32 = 0;
3051 	reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
3052 
3053 	if (emit_load_imm64(compiler, reg, init_value))
3054 		return NULL;
3055 #else
3056 	if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
3057 		return NULL;
3058 #endif
3059 
3060 	inst = (sljit_u8*)ensure_buf(compiler, 2);
3061 	PTR_FAIL_IF(!inst);
3062 
3063 	*inst++ = 0;
3064 	*inst++ = 2;
3065 
3066 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3067 	if (dst & SLJIT_MEM)
3068 		if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
3069 			return NULL;
3070 #endif
3071 
3072 	return const_;
3073 }
3074 
3075 SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
3076 {
3077 	struct sljit_put_label *put_label;
3078 	sljit_u8 *inst;
3079 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3080 	sljit_s32 reg;
3081 	sljit_uw start_size;
3082 #endif
3083 
3084 	CHECK_ERROR_PTR();
3085 	CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
3086 	ADJUST_LOCAL_OFFSET(dst, dstw);
3087 
3088 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
3089 
3090 	put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
3091 	PTR_FAIL_IF(!put_label);
3092 	set_put_label(put_label, compiler, 0);
3093 
3094 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3095 	compiler->mode32 = 0;
3096 	reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
3097 
3098 	if (emit_load_imm64(compiler, reg, 0))
3099 		return NULL;
3100 #else
3101 	if (emit_mov(compiler, dst, dstw, SLJIT_IMM, 0))
3102 		return NULL;
3103 #endif
3104 
3105 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3106 	if (dst & SLJIT_MEM) {
3107 		start_size = compiler->size;
3108 		if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
3109 			return NULL;
3110 		put_label->flags = compiler->size - start_size;
3111 	}
3112 #endif
3113 
3114 	inst = (sljit_u8*)ensure_buf(compiler, 2);
3115 	PTR_FAIL_IF(!inst);
3116 
3117 	*inst++ = 0;
3118 	*inst++ = 3;
3119 
3120 	return put_label;
3121 }
3122 
3123 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
3124 {
3125 	SLJIT_UNUSED_ARG(executable_offset);
3126 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3127 	sljit_unaligned_store_sw((void*)addr, new_target - (addr + 4) - (sljit_uw)executable_offset);
3128 #else
3129 	sljit_unaligned_store_sw((void*)addr, (sljit_sw) new_target);
3130 #endif
3131 }
3132 
3133 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
3134 {
3135 	SLJIT_UNUSED_ARG(executable_offset);
3136 	sljit_unaligned_store_sw((void*)addr, new_constant);
3137 }
3138