1 /*
2  *    Stack-less Just-In-Time compiler
3  *
4  *    Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without modification, are
7  * permitted provided that the following conditions are met:
8  *
9  *   1. Redistributions of source code must retain the above copyright notice, this list of
10  *      conditions and the following disclaimer.
11  *
12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13  *      of conditions and the following disclaimer in the documentation and/or other materials
14  *      provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
sljit_get_platform_name(void)27 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
28 {
29 	return "x86" SLJIT_CPUINFO;
30 }
31 
32 /*
33    32b register indexes:
34      0 - EAX
35      1 - ECX
36      2 - EDX
37      3 - EBX
38      4 - none
39      5 - EBP
40      6 - ESI
41      7 - EDI
42 */
43 
44 /*
45    64b register indexes:
46      0 - RAX
47      1 - RCX
48      2 - RDX
49      3 - RBX
50      4 - none
51      5 - RBP
52      6 - RSI
53      7 - RDI
54      8 - R8   - From now on REX prefix is required
55      9 - R9
56     10 - R10
57     11 - R11
58     12 - R12
59     13 - R13
60     14 - R14
61     15 - R15
62 */
63 
64 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
65 
66 /* Last register + 1. */
67 #define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
68 
69 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
70 	0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 7, 6, 3, 4, 5
71 };
72 
73 #define CHECK_EXTRA_REGS(p, w, do) \
74 	if (p >= SLJIT_R3 && p <= SLJIT_S3) { \
75 		if (p <= compiler->scratches) \
76 			w = compiler->saveds_offset - ((p) - SLJIT_R2) * (sljit_sw)sizeof(sljit_sw); \
77 		else \
78 			w = compiler->locals_offset + ((p) - SLJIT_S2) * (sljit_sw)sizeof(sljit_sw); \
79 		p = SLJIT_MEM1(SLJIT_SP); \
80 		do; \
81 	}
82 
83 #else /* SLJIT_CONFIG_X86_32 */
84 
85 /* Last register + 1. */
86 #define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
87 #define TMP_REG2	(SLJIT_NUMBER_OF_REGISTERS + 3)
88 
89 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
90    Note: avoid to use r12 and r13 for memory addessing
91    therefore r12 is better to be a higher saved register. */
92 #ifndef _WIN64
93 /* Args: rdi(=7), rsi(=6), rdx(=2), rcx(=1), r8, r9. Scratches: rax(=0), r10, r11 */
94 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
95 	0, 0, 6, 1, 7, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 9
96 };
97 /* low-map. reg_map & 0x7. */
98 static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
99 	0, 0, 6, 1, 7, 0, 3,  2,  4,  5,  5,  6,  7, 3, 4, 2, 1
100 };
101 #else
102 /* Args: rcx(=1), rdx(=2), r8, r9. Scratches: rax(=0), r10, r11 */
103 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
104 	0, 0, 2, 1, 10, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 8, 9
105 };
106 /* low-map. reg_map & 0x7. */
107 static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
108 	0, 0, 2, 1, 2,  3,  4,  5,  5, 6,  7,  7, 6, 3, 4, 0, 1
109 };
110 #endif
111 
112 #define REX_W		0x48
113 #define REX_R		0x44
114 #define REX_X		0x42
115 #define REX_B		0x41
116 #define REX		0x40
117 
118 #ifndef _WIN64
119 #define HALFWORD_MAX 0x7fffffffl
120 #define HALFWORD_MIN -0x80000000l
121 #else
122 #define HALFWORD_MAX 0x7fffffffll
123 #define HALFWORD_MIN -0x80000000ll
124 #endif
125 
126 #define IS_HALFWORD(x)		((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)
127 #define NOT_HALFWORD(x)		((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)
128 
129 #define CHECK_EXTRA_REGS(p, w, do)
130 
131 #endif /* SLJIT_CONFIG_X86_32 */
132 
133 #define TMP_FREG	(0)
134 
135 /* Size flags for emit_x86_instruction: */
136 #define EX86_BIN_INS		0x0010
137 #define EX86_SHIFT_INS		0x0020
138 #define EX86_REX		0x0040
139 #define EX86_NO_REXW		0x0080
140 #define EX86_BYTE_ARG		0x0100
141 #define EX86_HALF_ARG		0x0200
142 #define EX86_PREF_66		0x0400
143 #define EX86_PREF_F2		0x0800
144 #define EX86_PREF_F3		0x1000
145 #define EX86_SSE2_OP1		0x2000
146 #define EX86_SSE2_OP2		0x4000
147 #define EX86_SSE2		(EX86_SSE2_OP1 | EX86_SSE2_OP2)
148 
149 /* --------------------------------------------------------------------- */
150 /*  Instrucion forms                                                     */
151 /* --------------------------------------------------------------------- */
152 
153 #define ADD		(/* BINARY */ 0 << 3)
154 #define ADD_EAX_i32	0x05
155 #define ADD_r_rm	0x03
156 #define ADD_rm_r	0x01
157 #define ADDSD_x_xm	0x58
158 #define ADC		(/* BINARY */ 2 << 3)
159 #define ADC_EAX_i32	0x15
160 #define ADC_r_rm	0x13
161 #define ADC_rm_r	0x11
162 #define AND		(/* BINARY */ 4 << 3)
163 #define AND_EAX_i32	0x25
164 #define AND_r_rm	0x23
165 #define AND_rm_r	0x21
166 #define ANDPD_x_xm	0x54
167 #define BSR_r_rm	(/* GROUP_0F */ 0xbd)
168 #define CALL_i32	0xe8
169 #define CALL_rm		(/* GROUP_FF */ 2 << 3)
170 #define CDQ		0x99
171 #define CMOVE_r_rm	(/* GROUP_0F */ 0x44)
172 #define CMP		(/* BINARY */ 7 << 3)
173 #define CMP_EAX_i32	0x3d
174 #define CMP_r_rm	0x3b
175 #define CMP_rm_r	0x39
176 #define CVTPD2PS_x_xm	0x5a
177 #define CVTSI2SD_x_rm	0x2a
178 #define CVTTSD2SI_r_xm	0x2c
179 #define DIV		(/* GROUP_F7 */ 6 << 3)
180 #define DIVSD_x_xm	0x5e
181 #define INT3		0xcc
182 #define IDIV		(/* GROUP_F7 */ 7 << 3)
183 #define IMUL		(/* GROUP_F7 */ 5 << 3)
184 #define IMUL_r_rm	(/* GROUP_0F */ 0xaf)
185 #define IMUL_r_rm_i8	0x6b
186 #define IMUL_r_rm_i32	0x69
187 #define JE_i8		0x74
188 #define JNE_i8		0x75
189 #define JMP_i8		0xeb
190 #define JMP_i32		0xe9
191 #define JMP_rm		(/* GROUP_FF */ 4 << 3)
192 #define LEA_r_m		0x8d
193 #define MOV_r_rm	0x8b
194 #define MOV_r_i32	0xb8
195 #define MOV_rm_r	0x89
196 #define MOV_rm_i32	0xc7
197 #define MOV_rm8_i8	0xc6
198 #define MOV_rm8_r8	0x88
199 #define MOVSD_x_xm	0x10
200 #define MOVSD_xm_x	0x11
201 #define MOVSXD_r_rm	0x63
202 #define MOVSX_r_rm8	(/* GROUP_0F */ 0xbe)
203 #define MOVSX_r_rm16	(/* GROUP_0F */ 0xbf)
204 #define MOVZX_r_rm8	(/* GROUP_0F */ 0xb6)
205 #define MOVZX_r_rm16	(/* GROUP_0F */ 0xb7)
206 #define MUL		(/* GROUP_F7 */ 4 << 3)
207 #define MULSD_x_xm	0x59
208 #define NEG_rm		(/* GROUP_F7 */ 3 << 3)
209 #define NOP		0x90
210 #define NOT_rm		(/* GROUP_F7 */ 2 << 3)
211 #define OR		(/* BINARY */ 1 << 3)
212 #define OR_r_rm		0x0b
213 #define OR_EAX_i32	0x0d
214 #define OR_rm_r		0x09
215 #define OR_rm8_r8	0x08
216 #define POP_r		0x58
217 #define POP_rm		0x8f
218 #define POPF		0x9d
219 #define PREFETCH	0x18
220 #define PUSH_i32	0x68
221 #define PUSH_r		0x50
222 #define PUSH_rm		(/* GROUP_FF */ 6 << 3)
223 #define PUSHF		0x9c
224 #define RET_near	0xc3
225 #define RET_i16		0xc2
226 #define SBB		(/* BINARY */ 3 << 3)
227 #define SBB_EAX_i32	0x1d
228 #define SBB_r_rm	0x1b
229 #define SBB_rm_r	0x19
230 #define SAR		(/* SHIFT */ 7 << 3)
231 #define SHL		(/* SHIFT */ 4 << 3)
232 #define SHR		(/* SHIFT */ 5 << 3)
233 #define SUB		(/* BINARY */ 5 << 3)
234 #define SUB_EAX_i32	0x2d
235 #define SUB_r_rm	0x2b
236 #define SUB_rm_r	0x29
237 #define SUBSD_x_xm	0x5c
238 #define TEST_EAX_i32	0xa9
239 #define TEST_rm_r	0x85
240 #define UCOMISD_x_xm	0x2e
241 #define UNPCKLPD_x_xm	0x14
242 #define XCHG_EAX_r	0x90
243 #define XCHG_r_rm	0x87
244 #define XOR		(/* BINARY */ 6 << 3)
245 #define XOR_EAX_i32	0x35
246 #define XOR_r_rm	0x33
247 #define XOR_rm_r	0x31
248 #define XORPD_x_xm	0x57
249 
250 #define GROUP_0F	0x0f
251 #define GROUP_F7	0xf7
252 #define GROUP_FF	0xff
253 #define GROUP_BINARY_81	0x81
254 #define GROUP_BINARY_83	0x83
255 #define GROUP_SHIFT_1	0xd1
256 #define GROUP_SHIFT_N	0xc1
257 #define GROUP_SHIFT_CL	0xd3
258 
259 #define MOD_REG		0xc0
260 #define MOD_DISP8	0x40
261 
262 #define INC_SIZE(s)			(*inst++ = (s), compiler->size += (s))
263 
264 #define PUSH_REG(r)			(*inst++ = (PUSH_r + (r)))
265 #define POP_REG(r)			(*inst++ = (POP_r + (r)))
266 #define RET()				(*inst++ = (RET_near))
267 #define RET_I16(n)			(*inst++ = (RET_i16), *inst++ = n, *inst++ = 0)
268 /* r32, r/m32 */
269 #define MOV_RM(mod, reg, rm)		(*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm))
270 
271 /* Multithreading does not affect these static variables, since they store
272    built-in CPU features. Therefore they can be overwritten by different threads
273    if they detect the CPU features in the same time. */
274 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
275 static sljit_s32 cpu_has_sse2 = -1;
276 #endif
277 static sljit_s32 cpu_has_cmov = -1;
278 
279 #ifdef _WIN32_WCE
280 #include <cmnintrin.h>
281 #elif defined(_MSC_VER) && _MSC_VER >= 1400
282 #include <intrin.h>
283 #endif
284 
285 /******************************************************/
286 /*    Unaligned-store functions                       */
287 /******************************************************/
288 
sljit_unaligned_store_s16(void * addr,sljit_s16 value)289 static SLJIT_INLINE void sljit_unaligned_store_s16(void *addr, sljit_s16 value)
290 {
291 	SLJIT_MEMCPY(addr, &value, sizeof(value));
292 }
293 
sljit_unaligned_store_s32(void * addr,sljit_s32 value)294 static SLJIT_INLINE void sljit_unaligned_store_s32(void *addr, sljit_s32 value)
295 {
296 	SLJIT_MEMCPY(addr, &value, sizeof(value));
297 }
298 
sljit_unaligned_store_sw(void * addr,sljit_sw value)299 static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value)
300 {
301 	SLJIT_MEMCPY(addr, &value, sizeof(value));
302 }
303 
304 /******************************************************/
305 /*    Utility functions                               */
306 /******************************************************/
307 
get_cpu_features(void)308 static void get_cpu_features(void)
309 {
310 	sljit_u32 features;
311 
312 #if defined(_MSC_VER) && _MSC_VER >= 1400
313 
314 	int CPUInfo[4];
315 	__cpuid(CPUInfo, 1);
316 	features = (sljit_u32)CPUInfo[3];
317 
318 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
319 
320 	/* AT&T syntax. */
321 	__asm__ (
322 		"movl $0x1, %%eax\n"
323 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
324 		/* On x86-32, there is no red zone, so this
325 		   should work (no need for a local variable). */
326 		"push %%ebx\n"
327 #endif
328 		"cpuid\n"
329 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
330 		"pop %%ebx\n"
331 #endif
332 		"movl %%edx, %0\n"
333 		: "=g" (features)
334 		:
335 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
336 		: "%eax", "%ecx", "%edx"
337 #else
338 		: "%rax", "%rbx", "%rcx", "%rdx"
339 #endif
340 	);
341 
342 #else /* _MSC_VER && _MSC_VER >= 1400 */
343 
344 	/* Intel syntax. */
345 	__asm {
346 		mov eax, 1
347 		cpuid
348 		mov features, edx
349 	}
350 
351 #endif /* _MSC_VER && _MSC_VER >= 1400 */
352 
353 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
354 	cpu_has_sse2 = (features >> 26) & 0x1;
355 #endif
356 	cpu_has_cmov = (features >> 15) & 0x1;
357 }
358 
get_jump_code(sljit_s32 type)359 static sljit_u8 get_jump_code(sljit_s32 type)
360 {
361 	switch (type) {
362 	case SLJIT_EQUAL:
363 	case SLJIT_EQUAL_F64:
364 		return 0x84 /* je */;
365 
366 	case SLJIT_NOT_EQUAL:
367 	case SLJIT_NOT_EQUAL_F64:
368 		return 0x85 /* jne */;
369 
370 	case SLJIT_LESS:
371 	case SLJIT_LESS_F64:
372 		return 0x82 /* jc */;
373 
374 	case SLJIT_GREATER_EQUAL:
375 	case SLJIT_GREATER_EQUAL_F64:
376 		return 0x83 /* jae */;
377 
378 	case SLJIT_GREATER:
379 	case SLJIT_GREATER_F64:
380 		return 0x87 /* jnbe */;
381 
382 	case SLJIT_LESS_EQUAL:
383 	case SLJIT_LESS_EQUAL_F64:
384 		return 0x86 /* jbe */;
385 
386 	case SLJIT_SIG_LESS:
387 		return 0x8c /* jl */;
388 
389 	case SLJIT_SIG_GREATER_EQUAL:
390 		return 0x8d /* jnl */;
391 
392 	case SLJIT_SIG_GREATER:
393 		return 0x8f /* jnle */;
394 
395 	case SLJIT_SIG_LESS_EQUAL:
396 		return 0x8e /* jle */;
397 
398 	case SLJIT_OVERFLOW:
399 	case SLJIT_MUL_OVERFLOW:
400 		return 0x80 /* jo */;
401 
402 	case SLJIT_NOT_OVERFLOW:
403 	case SLJIT_MUL_NOT_OVERFLOW:
404 		return 0x81 /* jno */;
405 
406 	case SLJIT_UNORDERED_F64:
407 		return 0x8a /* jp */;
408 
409 	case SLJIT_ORDERED_F64:
410 		return 0x8b /* jpo */;
411 	}
412 	return 0;
413 }
414 
415 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
416 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type, sljit_sw executable_offset);
417 #else
418 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type);
419 #endif
420 
generate_near_jump_code(struct sljit_jump * jump,sljit_u8 * code_ptr,sljit_u8 * code,sljit_s32 type,sljit_sw executable_offset)421 static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_s32 type, sljit_sw executable_offset)
422 {
423 	sljit_s32 short_jump;
424 	sljit_uw label_addr;
425 
426 	if (jump->flags & JUMP_LABEL)
427 		label_addr = (sljit_uw)(code + jump->u.label->size);
428 	else
429 		label_addr = jump->u.target - executable_offset;
430 
431 	short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
432 
433 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
434 	if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN)
435 		return generate_far_jump_code(jump, code_ptr, type);
436 #endif
437 
438 	if (type == SLJIT_JUMP) {
439 		if (short_jump)
440 			*code_ptr++ = JMP_i8;
441 		else
442 			*code_ptr++ = JMP_i32;
443 		jump->addr++;
444 	}
445 	else if (type >= SLJIT_FAST_CALL) {
446 		short_jump = 0;
447 		*code_ptr++ = CALL_i32;
448 		jump->addr++;
449 	}
450 	else if (short_jump) {
451 		*code_ptr++ = get_jump_code(type) - 0x10;
452 		jump->addr++;
453 	}
454 	else {
455 		*code_ptr++ = GROUP_0F;
456 		*code_ptr++ = get_jump_code(type);
457 		jump->addr += 2;
458 	}
459 
460 	if (short_jump) {
461 		jump->flags |= PATCH_MB;
462 		code_ptr += sizeof(sljit_s8);
463 	} else {
464 		jump->flags |= PATCH_MW;
465 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
466 		code_ptr += sizeof(sljit_sw);
467 #else
468 		code_ptr += sizeof(sljit_s32);
469 #endif
470 	}
471 
472 	return code_ptr;
473 }
474 
sljit_generate_code(struct sljit_compiler * compiler)475 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
476 {
477 	struct sljit_memory_fragment *buf;
478 	sljit_u8 *code;
479 	sljit_u8 *code_ptr;
480 	sljit_u8 *buf_ptr;
481 	sljit_u8 *buf_end;
482 	sljit_u8 len;
483 	sljit_sw executable_offset;
484 	sljit_sw jump_addr;
485 
486 	struct sljit_label *label;
487 	struct sljit_jump *jump;
488 	struct sljit_const *const_;
489 
490 	CHECK_ERROR_PTR();
491 	CHECK_PTR(check_sljit_generate_code(compiler));
492 	reverse_buf(compiler);
493 
494 	/* Second code generation pass. */
495 	code = (sljit_u8*)SLJIT_MALLOC_EXEC(compiler->size);
496 	PTR_FAIL_WITH_EXEC_IF(code);
497 	buf = compiler->buf;
498 
499 	code_ptr = code;
500 	label = compiler->labels;
501 	jump = compiler->jumps;
502 	const_ = compiler->consts;
503 	executable_offset = SLJIT_EXEC_OFFSET(code);
504 
505 	do {
506 		buf_ptr = buf->memory;
507 		buf_end = buf_ptr + buf->used_size;
508 		do {
509 			len = *buf_ptr++;
510 			if (len > 0) {
511 				/* The code is already generated. */
512 				SLJIT_MEMCPY(code_ptr, buf_ptr, len);
513 				code_ptr += len;
514 				buf_ptr += len;
515 			}
516 			else {
517 				if (*buf_ptr >= 2) {
518 					jump->addr = (sljit_uw)code_ptr;
519 					if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
520 						code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 2, executable_offset);
521 					else {
522 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
523 						code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 2, executable_offset);
524 #else
525 						code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 2);
526 #endif
527 					}
528 					jump = jump->next;
529 				}
530 				else if (*buf_ptr == 0) {
531 					label->addr = ((sljit_uw)code_ptr) + executable_offset;
532 					label->size = code_ptr - code;
533 					label = label->next;
534 				}
535 				else { /* *buf_ptr is 1 */
536 					const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
537 					const_ = const_->next;
538 				}
539 				buf_ptr++;
540 			}
541 		} while (buf_ptr < buf_end);
542 		SLJIT_ASSERT(buf_ptr == buf_end);
543 		buf = buf->next;
544 	} while (buf);
545 
546 	SLJIT_ASSERT(!label);
547 	SLJIT_ASSERT(!jump);
548 	SLJIT_ASSERT(!const_);
549 
550 	jump = compiler->jumps;
551 	while (jump) {
552 		jump_addr = jump->addr + executable_offset;
553 
554 		if (jump->flags & PATCH_MB) {
555 			SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) <= 127);
556 			*(sljit_u8*)jump->addr = (sljit_u8)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8)));
557 		} else if (jump->flags & PATCH_MW) {
558 			if (jump->flags & JUMP_LABEL) {
559 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
560 				sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_sw))));
561 #else
562 				SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
563 				sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))));
564 #endif
565 			}
566 			else {
567 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
568 				sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_sw))));
569 #else
570 				SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
571 				sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.target - (jump_addr + sizeof(sljit_s32))));
572 #endif
573 			}
574 		}
575 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
576 		else if (jump->flags & PATCH_MD)
577 			sljit_unaligned_store_sw((void*)jump->addr, jump->u.label->addr);
578 #endif
579 
580 		jump = jump->next;
581 	}
582 
583 	/* Some space may be wasted because of short jumps. */
584 	SLJIT_ASSERT(code_ptr <= code + compiler->size);
585 	compiler->error = SLJIT_ERR_COMPILED;
586 	compiler->executable_offset = executable_offset;
587 	compiler->executable_size = code_ptr - code;
588 	return (void*)(code + executable_offset);
589 }
590 
sljit_has_cpu_feature(sljit_s32 feature_type)591 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
592 {
593 	switch (feature_type) {
594 	case SLJIT_HAS_FPU:
595 #ifdef SLJIT_IS_FPU_AVAILABLE
596 		return SLJIT_IS_FPU_AVAILABLE;
597 #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
598 		if (cpu_has_sse2 == -1)
599 			get_cpu_features();
600 		return cpu_has_sse2;
601 #else /* SLJIT_DETECT_SSE2 */
602 		return 1;
603 #endif /* SLJIT_DETECT_SSE2 */
604 
605 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
606 	case SLJIT_HAS_VIRTUAL_REGISTERS:
607 		return 1;
608 #endif
609 
610 	case SLJIT_HAS_CLZ:
611 	case SLJIT_HAS_CMOV:
612 		if (cpu_has_cmov == -1)
613 			get_cpu_features();
614 		return cpu_has_cmov;
615 
616 	case SLJIT_HAS_PREF_SHIFT_REG:
617 		return 1;
618 
619 	case SLJIT_HAS_SSE2:
620 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
621 		if (cpu_has_sse2 == -1)
622 			get_cpu_features();
623 		return cpu_has_sse2;
624 #else
625 		return 1;
626 #endif
627 
628 	default:
629 		return 0;
630 	}
631 }
632 
633 /* --------------------------------------------------------------------- */
634 /*  Operators                                                            */
635 /* --------------------------------------------------------------------- */
636 
637 static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
638 	sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm,
639 	sljit_s32 dst, sljit_sw dstw,
640 	sljit_s32 src1, sljit_sw src1w,
641 	sljit_s32 src2, sljit_sw src2w);
642 
643 static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
644 	sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm,
645 	sljit_s32 dst, sljit_sw dstw,
646 	sljit_s32 src1, sljit_sw src1w,
647 	sljit_s32 src2, sljit_sw src2w);
648 
649 static sljit_s32 emit_mov(struct sljit_compiler *compiler,
650 	sljit_s32 dst, sljit_sw dstw,
651 	sljit_s32 src, sljit_sw srcw);
652 
653 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
654 	FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
655 
656 #ifdef _WIN32
657 #include <malloc.h>
658 
sljit_grow_stack(sljit_sw local_size)659 static void SLJIT_CALL sljit_grow_stack(sljit_sw local_size)
660 {
661 	/* Workaround for calling the internal _chkstk() function on Windows.
662 	This function touches all 4k pages belongs to the requested stack space,
663 	which size is passed in local_size. This is necessary on Windows where
664 	the stack can only grow in 4k steps. However, this function just burn
665 	CPU cycles if the stack is large enough. However, you don't know it in
666 	advance, so it must always be called. I think this is a bad design in
667 	general even if it has some reasons. */
668 	*(volatile sljit_s32*)alloca(local_size) = 0;
669 }
670 
671 #endif
672 
673 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
674 #include "sljitNativeX86_32.c"
675 #else
676 #include "sljitNativeX86_64.c"
677 #endif
678 
emit_mov(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)679 static sljit_s32 emit_mov(struct sljit_compiler *compiler,
680 	sljit_s32 dst, sljit_sw dstw,
681 	sljit_s32 src, sljit_sw srcw)
682 {
683 	sljit_u8* inst;
684 
685 	SLJIT_ASSERT(dst != SLJIT_UNUSED);
686 
687 	if (FAST_IS_REG(src)) {
688 		inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
689 		FAIL_IF(!inst);
690 		*inst = MOV_rm_r;
691 		return SLJIT_SUCCESS;
692 	}
693 	if (src & SLJIT_IMM) {
694 		if (FAST_IS_REG(dst)) {
695 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
696 			return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
697 #else
698 			if (!compiler->mode32) {
699 				if (NOT_HALFWORD(srcw))
700 					return emit_load_imm64(compiler, dst, srcw);
701 			}
702 			else
703 				return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw);
704 #endif
705 		}
706 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
707 		if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
708 			/* Immediate to memory move. Only SLJIT_MOV operation copies
709 			   an immediate directly into memory so TMP_REG1 can be used. */
710 			FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));
711 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
712 			FAIL_IF(!inst);
713 			*inst = MOV_rm_r;
714 			return SLJIT_SUCCESS;
715 		}
716 #endif
717 		inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
718 		FAIL_IF(!inst);
719 		*inst = MOV_rm_i32;
720 		return SLJIT_SUCCESS;
721 	}
722 	if (FAST_IS_REG(dst)) {
723 		inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
724 		FAIL_IF(!inst);
725 		*inst = MOV_r_rm;
726 		return SLJIT_SUCCESS;
727 	}
728 
729 	/* Memory to memory move. Only SLJIT_MOV operation copies
730 	   data from memory to memory so TMP_REG1 can be used. */
731 	inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
732 	FAIL_IF(!inst);
733 	*inst = MOV_r_rm;
734 	inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
735 	FAIL_IF(!inst);
736 	*inst = MOV_rm_r;
737 	return SLJIT_SUCCESS;
738 }
739 
sljit_emit_op0(struct sljit_compiler * compiler,sljit_s32 op)740 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
741 {
742 	sljit_u8 *inst;
743 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
744 	sljit_s32 size;
745 #endif
746 
747 	CHECK_ERROR();
748 	CHECK(check_sljit_emit_op0(compiler, op));
749 
750 	switch (GET_OPCODE(op)) {
751 	case SLJIT_BREAKPOINT:
752 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
753 		FAIL_IF(!inst);
754 		INC_SIZE(1);
755 		*inst = INT3;
756 		break;
757 	case SLJIT_NOP:
758 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
759 		FAIL_IF(!inst);
760 		INC_SIZE(1);
761 		*inst = NOP;
762 		break;
763 	case SLJIT_LMUL_UW:
764 	case SLJIT_LMUL_SW:
765 	case SLJIT_DIVMOD_UW:
766 	case SLJIT_DIVMOD_SW:
767 	case SLJIT_DIV_UW:
768 	case SLJIT_DIV_SW:
769 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
770 #ifdef _WIN64
771 		SLJIT_ASSERT(
772 			reg_map[SLJIT_R0] == 0
773 			&& reg_map[SLJIT_R1] == 2
774 			&& reg_map[TMP_REG1] > 7);
775 #else
776 		SLJIT_ASSERT(
777 			reg_map[SLJIT_R0] == 0
778 			&& reg_map[SLJIT_R1] < 7
779 			&& reg_map[TMP_REG1] == 2);
780 #endif
781 		compiler->mode32 = op & SLJIT_I32_OP;
782 #endif
783 		SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
784 
785 		op = GET_OPCODE(op);
786 		if ((op | 0x2) == SLJIT_DIV_UW) {
787 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
788 			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
789 			inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0);
790 #else
791 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
792 #endif
793 			FAIL_IF(!inst);
794 			*inst = XOR_r_rm;
795 		}
796 
797 		if ((op | 0x2) == SLJIT_DIV_SW) {
798 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
799 			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
800 #endif
801 
802 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
803 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
804 			FAIL_IF(!inst);
805 			INC_SIZE(1);
806 			*inst = CDQ;
807 #else
808 			if (compiler->mode32) {
809 				inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
810 				FAIL_IF(!inst);
811 				INC_SIZE(1);
812 				*inst = CDQ;
813 			} else {
814 				inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
815 				FAIL_IF(!inst);
816 				INC_SIZE(2);
817 				*inst++ = REX_W;
818 				*inst = CDQ;
819 			}
820 #endif
821 		}
822 
823 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
824 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
825 		FAIL_IF(!inst);
826 		INC_SIZE(2);
827 		*inst++ = GROUP_F7;
828 		*inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);
829 #else
830 #ifdef _WIN64
831 		size = (!compiler->mode32 || op >= SLJIT_DIVMOD_UW) ? 3 : 2;
832 #else
833 		size = (!compiler->mode32) ? 3 : 2;
834 #endif
835 		inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
836 		FAIL_IF(!inst);
837 		INC_SIZE(size);
838 #ifdef _WIN64
839 		if (!compiler->mode32)
840 			*inst++ = REX_W | ((op >= SLJIT_DIVMOD_UW) ? REX_B : 0);
841 		else if (op >= SLJIT_DIVMOD_UW)
842 			*inst++ = REX_B;
843 		*inst++ = GROUP_F7;
844 		*inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);
845 #else
846 		if (!compiler->mode32)
847 			*inst++ = REX_W;
848 		*inst++ = GROUP_F7;
849 		*inst = MOD_REG | reg_map[SLJIT_R1];
850 #endif
851 #endif
852 		switch (op) {
853 		case SLJIT_LMUL_UW:
854 			*inst |= MUL;
855 			break;
856 		case SLJIT_LMUL_SW:
857 			*inst |= IMUL;
858 			break;
859 		case SLJIT_DIVMOD_UW:
860 		case SLJIT_DIV_UW:
861 			*inst |= DIV;
862 			break;
863 		case SLJIT_DIVMOD_SW:
864 		case SLJIT_DIV_SW:
865 			*inst |= IDIV;
866 			break;
867 		}
868 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
869 		if (op <= SLJIT_DIVMOD_SW)
870 			EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
871 #else
872 		if (op >= SLJIT_DIV_UW)
873 			EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
874 #endif
875 		break;
876 	}
877 
878 	return SLJIT_SUCCESS;
879 }
880 
881 #define ENCODE_PREFIX(prefix) \
882 	do { \
883 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); \
884 		FAIL_IF(!inst); \
885 		INC_SIZE(1); \
886 		*inst = (prefix); \
887 	} while (0)
888 
emit_mov_byte(struct sljit_compiler * compiler,sljit_s32 sign,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)889 static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign,
890 	sljit_s32 dst, sljit_sw dstw,
891 	sljit_s32 src, sljit_sw srcw)
892 {
893 	sljit_u8* inst;
894 	sljit_s32 dst_r;
895 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
896 	sljit_s32 work_r;
897 #endif
898 
899 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
900 	compiler->mode32 = 0;
901 #endif
902 
903 	if (src & SLJIT_IMM) {
904 		if (FAST_IS_REG(dst)) {
905 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
906 			return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
907 #else
908 			inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
909 			FAIL_IF(!inst);
910 			*inst = MOV_rm_i32;
911 			return SLJIT_SUCCESS;
912 #endif
913 		}
914 		inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
915 		FAIL_IF(!inst);
916 		*inst = MOV_rm8_i8;
917 		return SLJIT_SUCCESS;
918 	}
919 
920 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
921 
922 	if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) {
923 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
924 		if (reg_map[src] >= 4) {
925 			SLJIT_ASSERT(dst_r == TMP_REG1);
926 			EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
927 		} else
928 			dst_r = src;
929 #else
930 		dst_r = src;
931 #endif
932 	}
933 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
934 	else if (FAST_IS_REG(src) && reg_map[src] >= 4) {
935 		/* src, dst are registers. */
936 		SLJIT_ASSERT(SLOW_IS_REG(dst));
937 		if (reg_map[dst] < 4) {
938 			if (dst != src)
939 				EMIT_MOV(compiler, dst, 0, src, 0);
940 			inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
941 			FAIL_IF(!inst);
942 			*inst++ = GROUP_0F;
943 			*inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
944 		}
945 		else {
946 			if (dst != src)
947 				EMIT_MOV(compiler, dst, 0, src, 0);
948 			if (sign) {
949 				/* shl reg, 24 */
950 				inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
951 				FAIL_IF(!inst);
952 				*inst |= SHL;
953 				/* sar reg, 24 */
954 				inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
955 				FAIL_IF(!inst);
956 				*inst |= SAR;
957 			}
958 			else {
959 				inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
960 				FAIL_IF(!inst);
961 				*(inst + 1) |= AND;
962 			}
963 		}
964 		return SLJIT_SUCCESS;
965 	}
966 #endif
967 	else {
968 		/* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
969 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
970 		FAIL_IF(!inst);
971 		*inst++ = GROUP_0F;
972 		*inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
973 	}
974 
975 	if (dst & SLJIT_MEM) {
976 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
977 		if (dst_r == TMP_REG1) {
978 			/* Find a non-used register, whose reg_map[src] < 4. */
979 			if ((dst & REG_MASK) == SLJIT_R0) {
980 				if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1))
981 					work_r = SLJIT_R2;
982 				else
983 					work_r = SLJIT_R1;
984 			}
985 			else {
986 				if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
987 					work_r = SLJIT_R0;
988 				else if ((dst & REG_MASK) == SLJIT_R1)
989 					work_r = SLJIT_R2;
990 				else
991 					work_r = SLJIT_R1;
992 			}
993 
994 			if (work_r == SLJIT_R0) {
995 				ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
996 			}
997 			else {
998 				inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
999 				FAIL_IF(!inst);
1000 				*inst = XCHG_r_rm;
1001 			}
1002 
1003 			inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
1004 			FAIL_IF(!inst);
1005 			*inst = MOV_rm8_r8;
1006 
1007 			if (work_r == SLJIT_R0) {
1008 				ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
1009 			}
1010 			else {
1011 				inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
1012 				FAIL_IF(!inst);
1013 				*inst = XCHG_r_rm;
1014 			}
1015 		}
1016 		else {
1017 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1018 			FAIL_IF(!inst);
1019 			*inst = MOV_rm8_r8;
1020 		}
1021 #else
1022 		inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
1023 		FAIL_IF(!inst);
1024 		*inst = MOV_rm8_r8;
1025 #endif
1026 	}
1027 
1028 	return SLJIT_SUCCESS;
1029 }
1030 
emit_prefetch(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)1031 static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, sljit_s32 op,
1032 	sljit_s32 src, sljit_sw srcw)
1033 {
1034 	sljit_u8* inst;
1035 
1036 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1037 	compiler->mode32 = 1;
1038 #endif
1039 
1040 	inst = emit_x86_instruction(compiler, 2, 0, 0, src, srcw);
1041 	FAIL_IF(!inst);
1042 	*inst++ = GROUP_0F;
1043 	*inst++ = PREFETCH;
1044 
1045 	if (op >= SLJIT_MOV_U8 && op <= SLJIT_MOV_S8)
1046 		*inst |= (3 << 3);
1047 	else if (op >= SLJIT_MOV_U16 && op <= SLJIT_MOV_S16)
1048 		*inst |= (2 << 3);
1049 	else
1050 		*inst |= (1 << 3);
1051 
1052 	return SLJIT_SUCCESS;
1053 }
1054 
emit_mov_half(struct sljit_compiler * compiler,sljit_s32 sign,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1055 static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign,
1056 	sljit_s32 dst, sljit_sw dstw,
1057 	sljit_s32 src, sljit_sw srcw)
1058 {
1059 	sljit_u8* inst;
1060 	sljit_s32 dst_r;
1061 
1062 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1063 	compiler->mode32 = 0;
1064 #endif
1065 
1066 	if (src & SLJIT_IMM) {
1067 		if (FAST_IS_REG(dst)) {
1068 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1069 			return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
1070 #else
1071 			inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
1072 			FAIL_IF(!inst);
1073 			*inst = MOV_rm_i32;
1074 			return SLJIT_SUCCESS;
1075 #endif
1076 		}
1077 		inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
1078 		FAIL_IF(!inst);
1079 		*inst = MOV_rm_i32;
1080 		return SLJIT_SUCCESS;
1081 	}
1082 
1083 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1084 
1085 	if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
1086 		dst_r = src;
1087 	else {
1088 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
1089 		FAIL_IF(!inst);
1090 		*inst++ = GROUP_0F;
1091 		*inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16;
1092 	}
1093 
1094 	if (dst & SLJIT_MEM) {
1095 		inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
1096 		FAIL_IF(!inst);
1097 		*inst = MOV_rm_r;
1098 	}
1099 
1100 	return SLJIT_SUCCESS;
1101 }
1102 
emit_unary(struct sljit_compiler * compiler,sljit_u8 opcode,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1103 static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode,
1104 	sljit_s32 dst, sljit_sw dstw,
1105 	sljit_s32 src, sljit_sw srcw)
1106 {
1107 	sljit_u8* inst;
1108 
1109 	if (dst == src && dstw == srcw) {
1110 		/* Same input and output */
1111 		inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1112 		FAIL_IF(!inst);
1113 		*inst++ = GROUP_F7;
1114 		*inst |= opcode;
1115 		return SLJIT_SUCCESS;
1116 	}
1117 
1118 	if (dst == SLJIT_UNUSED)
1119 		dst = TMP_REG1;
1120 
1121 	if (FAST_IS_REG(dst)) {
1122 		EMIT_MOV(compiler, dst, 0, src, srcw);
1123 		inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0);
1124 		FAIL_IF(!inst);
1125 		*inst++ = GROUP_F7;
1126 		*inst |= opcode;
1127 		return SLJIT_SUCCESS;
1128 	}
1129 
1130 	EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1131 	inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1132 	FAIL_IF(!inst);
1133 	*inst++ = GROUP_F7;
1134 	*inst |= opcode;
1135 	EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1136 	return SLJIT_SUCCESS;
1137 }
1138 
emit_not_with_flags(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1139 static sljit_s32 emit_not_with_flags(struct sljit_compiler *compiler,
1140 	sljit_s32 dst, sljit_sw dstw,
1141 	sljit_s32 src, sljit_sw srcw)
1142 {
1143 	sljit_u8* inst;
1144 
1145 	if (dst == SLJIT_UNUSED)
1146 		dst = TMP_REG1;
1147 
1148 	if (FAST_IS_REG(dst)) {
1149 		EMIT_MOV(compiler, dst, 0, src, srcw);
1150 		inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0);
1151 		FAIL_IF(!inst);
1152 		*inst++ = GROUP_F7;
1153 		*inst |= NOT_rm;
1154 		inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
1155 		FAIL_IF(!inst);
1156 		*inst = OR_r_rm;
1157 		return SLJIT_SUCCESS;
1158 	}
1159 
1160 	EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1161 	inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1162 	FAIL_IF(!inst);
1163 	*inst++ = GROUP_F7;
1164 	*inst |= NOT_rm;
1165 	inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1166 	FAIL_IF(!inst);
1167 	*inst = OR_r_rm;
1168 	EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1169 	return SLJIT_SUCCESS;
1170 }
1171 
1172 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1173 static const sljit_sw emit_clz_arg = 32 + 31;
1174 #endif
1175 
emit_clz(struct sljit_compiler * compiler,sljit_s32 op_flags,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1176 static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags,
1177 	sljit_s32 dst, sljit_sw dstw,
1178 	sljit_s32 src, sljit_sw srcw)
1179 {
1180 	sljit_u8* inst;
1181 	sljit_s32 dst_r;
1182 
1183 	SLJIT_UNUSED_ARG(op_flags);
1184 
1185 	if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
1186 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
1187 		src = TMP_REG1;
1188 		srcw = 0;
1189 	}
1190 
1191 	if (cpu_has_cmov == -1)
1192 		get_cpu_features();
1193 
1194 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1195 
1196 	inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
1197 	FAIL_IF(!inst);
1198 	*inst++ = GROUP_0F;
1199 	*inst = BSR_r_rm;
1200 
1201 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1202 	if (cpu_has_cmov) {
1203 		if (dst_r != TMP_REG1) {
1204 			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 32 + 31);
1205 			inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
1206 		}
1207 		else
1208 			inst = emit_x86_instruction(compiler, 2, dst_r, 0, SLJIT_MEM0(), (sljit_sw)&emit_clz_arg);
1209 
1210 		FAIL_IF(!inst);
1211 		*inst++ = GROUP_0F;
1212 		*inst = CMOVE_r_rm;
1213 	}
1214 	else
1215 		FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, 32 + 31));
1216 
1217 	inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
1218 #else
1219 	if (cpu_has_cmov) {
1220 		EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? (64 + 63) : (32 + 31));
1221 
1222 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1223 		FAIL_IF(!inst);
1224 		*inst++ = GROUP_0F;
1225 		*inst = CMOVE_r_rm;
1226 	}
1227 	else
1228 		FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? (64 + 63) : (32 + 31)));
1229 
1230 	inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, dst_r, 0);
1231 #endif
1232 
1233 	FAIL_IF(!inst);
1234 	*(inst + 1) |= XOR;
1235 
1236 	if (dst & SLJIT_MEM)
1237 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1238 	return SLJIT_SUCCESS;
1239 }
1240 
sljit_emit_op1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1241 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1242 	sljit_s32 dst, sljit_sw dstw,
1243 	sljit_s32 src, sljit_sw srcw)
1244 {
1245 	sljit_s32 update = 0;
1246 	sljit_s32 op_flags = GET_ALL_FLAGS(op);
1247 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1248 	sljit_s32 dst_is_ereg = 0;
1249 	sljit_s32 src_is_ereg = 0;
1250 #else
1251 #	define src_is_ereg 0
1252 #endif
1253 
1254 	CHECK_ERROR();
1255 	CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1256 	ADJUST_LOCAL_OFFSET(dst, dstw);
1257 	ADJUST_LOCAL_OFFSET(src, srcw);
1258 
1259 	CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
1260 	CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1);
1261 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1262 	compiler->mode32 = op_flags & SLJIT_I32_OP;
1263 #endif
1264 
1265 	if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) {
1266 		if (op <= SLJIT_MOV_P && (src & SLJIT_MEM))
1267 			return emit_prefetch(compiler, op, src, srcw);
1268 		return SLJIT_SUCCESS;
1269 	}
1270 
1271 	op = GET_OPCODE(op);
1272 
1273 	if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) {
1274 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1275 		compiler->mode32 = 0;
1276 #endif
1277 
1278 		if (op_flags & SLJIT_I32_OP) {
1279 			if (FAST_IS_REG(src) && src == dst) {
1280 				if (!TYPE_CAST_NEEDED(op))
1281 					return SLJIT_SUCCESS;
1282 			}
1283 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1284 			if (op == SLJIT_MOV_S32 && (src & SLJIT_MEM))
1285 				op = SLJIT_MOV_U32;
1286 			if (op == SLJIT_MOVU_S32 && (src & SLJIT_MEM))
1287 				op = SLJIT_MOVU_U32;
1288 			if (op == SLJIT_MOV_U32 && (src & SLJIT_IMM))
1289 				op = SLJIT_MOV_S32;
1290 			if (op == SLJIT_MOVU_U32 && (src & SLJIT_IMM))
1291 				op = SLJIT_MOVU_S32;
1292 #endif
1293 		}
1294 
1295 		SLJIT_COMPILE_ASSERT(SLJIT_MOV + 8 == SLJIT_MOVU, movu_offset);
1296 		if (op >= SLJIT_MOVU) {
1297 			update = 1;
1298 			op -= 8;
1299 		}
1300 
1301 		if (src & SLJIT_IMM) {
1302 			switch (op) {
1303 			case SLJIT_MOV_U8:
1304 				srcw = (sljit_u8)srcw;
1305 				break;
1306 			case SLJIT_MOV_S8:
1307 				srcw = (sljit_s8)srcw;
1308 				break;
1309 			case SLJIT_MOV_U16:
1310 				srcw = (sljit_u16)srcw;
1311 				break;
1312 			case SLJIT_MOV_S16:
1313 				srcw = (sljit_s16)srcw;
1314 				break;
1315 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1316 			case SLJIT_MOV_U32:
1317 				srcw = (sljit_u32)srcw;
1318 				break;
1319 			case SLJIT_MOV_S32:
1320 				srcw = (sljit_s32)srcw;
1321 				break;
1322 #endif
1323 			}
1324 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1325 			if (SLJIT_UNLIKELY(dst_is_ereg))
1326 				return emit_mov(compiler, dst, dstw, src, srcw);
1327 #endif
1328 		}
1329 
1330 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1331 		if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
1332 			SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));
1333 			dst = TMP_REG1;
1334 		}
1335 #endif
1336 
1337 		switch (op) {
1338 		case SLJIT_MOV:
1339 		case SLJIT_MOV_P:
1340 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1341 		case SLJIT_MOV_U32:
1342 		case SLJIT_MOV_S32:
1343 #endif
1344 			FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
1345 			break;
1346 		case SLJIT_MOV_U8:
1347 			FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));
1348 			break;
1349 		case SLJIT_MOV_S8:
1350 			FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));
1351 			break;
1352 		case SLJIT_MOV_U16:
1353 			FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));
1354 			break;
1355 		case SLJIT_MOV_S16:
1356 			FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));
1357 			break;
1358 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1359 		case SLJIT_MOV_U32:
1360 			FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));
1361 			break;
1362 		case SLJIT_MOV_S32:
1363 			FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));
1364 			break;
1365 #endif
1366 		}
1367 
1368 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1369 		if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
1370 			return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);
1371 #endif
1372 
1373 		if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & REG_MASK)) {
1374 			if ((src & OFFS_REG_MASK) != 0) {
1375 				FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
1376 						(src & REG_MASK), 0, (src & REG_MASK), 0, OFFS_REG(dst), 0));
1377 			}
1378 			else if (srcw != 0) {
1379 				FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
1380 						(src & REG_MASK), 0, (src & REG_MASK), 0, SLJIT_IMM, srcw));
1381 			}
1382 		}
1383 
1384 		if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK)) {
1385 			if ((dst & OFFS_REG_MASK) != 0) {
1386 				FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
1387 						(dst & REG_MASK), 0, (dst & REG_MASK), 0, OFFS_REG(dst), 0));
1388 			}
1389 			else if (dstw != 0) {
1390 				FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
1391 						(dst & REG_MASK), 0, (dst & REG_MASK), 0, SLJIT_IMM, dstw));
1392 			}
1393 		}
1394 		return SLJIT_SUCCESS;
1395 	}
1396 
1397 	switch (op) {
1398 	case SLJIT_NOT:
1399 		if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_Z))
1400 			return emit_not_with_flags(compiler, dst, dstw, src, srcw);
1401 		return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
1402 
1403 	case SLJIT_NEG:
1404 		return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw);
1405 
1406 	case SLJIT_CLZ:
1407 		return emit_clz(compiler, op_flags, dst, dstw, src, srcw);
1408 	}
1409 
1410 	return SLJIT_SUCCESS;
1411 
1412 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1413 #	undef src_is_ereg
1414 #endif
1415 }
1416 
1417 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1418 
1419 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1420 	if (IS_HALFWORD(immw) || compiler->mode32) { \
1421 		inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1422 		FAIL_IF(!inst); \
1423 		*(inst + 1) |= (op_imm); \
1424 	} \
1425 	else { \
1426 		FAIL_IF(emit_load_imm64(compiler, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, immw)); \
1427 		inst = emit_x86_instruction(compiler, 1, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \
1428 		FAIL_IF(!inst); \
1429 		*inst = (op_mr); \
1430 	}
1431 
1432 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1433 	FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
1434 
1435 #else
1436 
1437 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1438 	inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1439 	FAIL_IF(!inst); \
1440 	*(inst + 1) |= (op_imm);
1441 
1442 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1443 	FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
1444 
1445 #endif
1446 
emit_cum_binary(struct sljit_compiler * compiler,sljit_u8 op_rm,sljit_u8 op_mr,sljit_u8 op_imm,sljit_u8 op_eax_imm,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1447 static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
1448 	sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm,
1449 	sljit_s32 dst, sljit_sw dstw,
1450 	sljit_s32 src1, sljit_sw src1w,
1451 	sljit_s32 src2, sljit_sw src2w)
1452 {
1453 	sljit_u8* inst;
1454 
1455 	if (dst == SLJIT_UNUSED) {
1456 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1457 		if (src2 & SLJIT_IMM) {
1458 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1459 		}
1460 		else {
1461 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1462 			FAIL_IF(!inst);
1463 			*inst = op_rm;
1464 		}
1465 		return SLJIT_SUCCESS;
1466 	}
1467 
1468 	if (dst == src1 && dstw == src1w) {
1469 		if (src2 & SLJIT_IMM) {
1470 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1471 			if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1472 #else
1473 			if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1474 #endif
1475 				BINARY_EAX_IMM(op_eax_imm, src2w);
1476 			}
1477 			else {
1478 				BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1479 			}
1480 		}
1481 		else if (FAST_IS_REG(dst)) {
1482 			inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1483 			FAIL_IF(!inst);
1484 			*inst = op_rm;
1485 		}
1486 		else if (FAST_IS_REG(src2)) {
1487 			/* Special exception for sljit_emit_op_flags. */
1488 			inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1489 			FAIL_IF(!inst);
1490 			*inst = op_mr;
1491 		}
1492 		else {
1493 			EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1494 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1495 			FAIL_IF(!inst);
1496 			*inst = op_mr;
1497 		}
1498 		return SLJIT_SUCCESS;
1499 	}
1500 
1501 	/* Only for cumulative operations. */
1502 	if (dst == src2 && dstw == src2w) {
1503 		if (src1 & SLJIT_IMM) {
1504 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1505 			if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1506 #else
1507 			if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) {
1508 #endif
1509 				BINARY_EAX_IMM(op_eax_imm, src1w);
1510 			}
1511 			else {
1512 				BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
1513 			}
1514 		}
1515 		else if (FAST_IS_REG(dst)) {
1516 			inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
1517 			FAIL_IF(!inst);
1518 			*inst = op_rm;
1519 		}
1520 		else if (FAST_IS_REG(src1)) {
1521 			inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
1522 			FAIL_IF(!inst);
1523 			*inst = op_mr;
1524 		}
1525 		else {
1526 			EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1527 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1528 			FAIL_IF(!inst);
1529 			*inst = op_mr;
1530 		}
1531 		return SLJIT_SUCCESS;
1532 	}
1533 
1534 	/* General version. */
1535 	if (FAST_IS_REG(dst)) {
1536 		EMIT_MOV(compiler, dst, 0, src1, src1w);
1537 		if (src2 & SLJIT_IMM) {
1538 			BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1539 		}
1540 		else {
1541 			inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1542 			FAIL_IF(!inst);
1543 			*inst = op_rm;
1544 		}
1545 	}
1546 	else {
1547 		/* This version requires less memory writing. */
1548 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1549 		if (src2 & SLJIT_IMM) {
1550 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1551 		}
1552 		else {
1553 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1554 			FAIL_IF(!inst);
1555 			*inst = op_rm;
1556 		}
1557 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1558 	}
1559 
1560 	return SLJIT_SUCCESS;
1561 }
1562 
1563 static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
1564 	sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm,
1565 	sljit_s32 dst, sljit_sw dstw,
1566 	sljit_s32 src1, sljit_sw src1w,
1567 	sljit_s32 src2, sljit_sw src2w)
1568 {
1569 	sljit_u8* inst;
1570 
1571 	if (dst == SLJIT_UNUSED) {
1572 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1573 		if (src2 & SLJIT_IMM) {
1574 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1575 		}
1576 		else {
1577 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1578 			FAIL_IF(!inst);
1579 			*inst = op_rm;
1580 		}
1581 		return SLJIT_SUCCESS;
1582 	}
1583 
1584 	if (dst == src1 && dstw == src1w) {
1585 		if (src2 & SLJIT_IMM) {
1586 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1587 			if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1588 #else
1589 			if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1590 #endif
1591 				BINARY_EAX_IMM(op_eax_imm, src2w);
1592 			}
1593 			else {
1594 				BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1595 			}
1596 		}
1597 		else if (FAST_IS_REG(dst)) {
1598 			inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1599 			FAIL_IF(!inst);
1600 			*inst = op_rm;
1601 		}
1602 		else if (FAST_IS_REG(src2)) {
1603 			inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1604 			FAIL_IF(!inst);
1605 			*inst = op_mr;
1606 		}
1607 		else {
1608 			EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1609 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1610 			FAIL_IF(!inst);
1611 			*inst = op_mr;
1612 		}
1613 		return SLJIT_SUCCESS;
1614 	}
1615 
1616 	/* General version. */
1617 	if (FAST_IS_REG(dst) && dst != src2) {
1618 		EMIT_MOV(compiler, dst, 0, src1, src1w);
1619 		if (src2 & SLJIT_IMM) {
1620 			BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1621 		}
1622 		else {
1623 			inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1624 			FAIL_IF(!inst);
1625 			*inst = op_rm;
1626 		}
1627 	}
1628 	else {
1629 		/* This version requires less memory writing. */
1630 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1631 		if (src2 & SLJIT_IMM) {
1632 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1633 		}
1634 		else {
1635 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1636 			FAIL_IF(!inst);
1637 			*inst = op_rm;
1638 		}
1639 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1640 	}
1641 
1642 	return SLJIT_SUCCESS;
1643 }
1644 
1645 static sljit_s32 emit_mul(struct sljit_compiler *compiler,
1646 	sljit_s32 dst, sljit_sw dstw,
1647 	sljit_s32 src1, sljit_sw src1w,
1648 	sljit_s32 src2, sljit_sw src2w)
1649 {
1650 	sljit_u8* inst;
1651 	sljit_s32 dst_r;
1652 
1653 	dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
1654 
1655 	/* Register destination. */
1656 	if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
1657 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1658 		FAIL_IF(!inst);
1659 		*inst++ = GROUP_0F;
1660 		*inst = IMUL_r_rm;
1661 	}
1662 	else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
1663 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
1664 		FAIL_IF(!inst);
1665 		*inst++ = GROUP_0F;
1666 		*inst = IMUL_r_rm;
1667 	}
1668 	else if (src1 & SLJIT_IMM) {
1669 		if (src2 & SLJIT_IMM) {
1670 			EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
1671 			src2 = dst_r;
1672 			src2w = 0;
1673 		}
1674 
1675 		if (src1w <= 127 && src1w >= -128) {
1676 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1677 			FAIL_IF(!inst);
1678 			*inst = IMUL_r_rm_i8;
1679 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1680 			FAIL_IF(!inst);
1681 			INC_SIZE(1);
1682 			*inst = (sljit_s8)src1w;
1683 		}
1684 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1685 		else {
1686 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1687 			FAIL_IF(!inst);
1688 			*inst = IMUL_r_rm_i32;
1689 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1690 			FAIL_IF(!inst);
1691 			INC_SIZE(4);
1692 			sljit_unaligned_store_sw(inst, src1w);
1693 		}
1694 #else
1695 		else if (IS_HALFWORD(src1w)) {
1696 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1697 			FAIL_IF(!inst);
1698 			*inst = IMUL_r_rm_i32;
1699 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1700 			FAIL_IF(!inst);
1701 			INC_SIZE(4);
1702 			sljit_unaligned_store_s32(inst, (sljit_s32)src1w);
1703 		}
1704 		else {
1705 			if (dst_r != src2)
1706 				EMIT_MOV(compiler, dst_r, 0, src2, src2w);
1707 			FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
1708 			inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1709 			FAIL_IF(!inst);
1710 			*inst++ = GROUP_0F;
1711 			*inst = IMUL_r_rm;
1712 		}
1713 #endif
1714 	}
1715 	else if (src2 & SLJIT_IMM) {
1716 		/* Note: src1 is NOT immediate. */
1717 
1718 		if (src2w <= 127 && src2w >= -128) {
1719 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1720 			FAIL_IF(!inst);
1721 			*inst = IMUL_r_rm_i8;
1722 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1723 			FAIL_IF(!inst);
1724 			INC_SIZE(1);
1725 			*inst = (sljit_s8)src2w;
1726 		}
1727 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1728 		else {
1729 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1730 			FAIL_IF(!inst);
1731 			*inst = IMUL_r_rm_i32;
1732 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1733 			FAIL_IF(!inst);
1734 			INC_SIZE(4);
1735 			sljit_unaligned_store_sw(inst, src2w);
1736 		}
1737 #else
1738 		else if (IS_HALFWORD(src2w)) {
1739 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1740 			FAIL_IF(!inst);
1741 			*inst = IMUL_r_rm_i32;
1742 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1743 			FAIL_IF(!inst);
1744 			INC_SIZE(4);
1745 			sljit_unaligned_store_s32(inst, (sljit_s32)src2w);
1746 		}
1747 		else {
1748 			if (dst_r != src1)
1749 				EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1750 			FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1751 			inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1752 			FAIL_IF(!inst);
1753 			*inst++ = GROUP_0F;
1754 			*inst = IMUL_r_rm;
1755 		}
1756 #endif
1757 	}
1758 	else {
1759 		/* Neither argument is immediate. */
1760 		if (ADDRESSING_DEPENDS_ON(src2, dst_r))
1761 			dst_r = TMP_REG1;
1762 		EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1763 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1764 		FAIL_IF(!inst);
1765 		*inst++ = GROUP_0F;
1766 		*inst = IMUL_r_rm;
1767 	}
1768 
1769 	if (dst & SLJIT_MEM)
1770 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1771 
1772 	return SLJIT_SUCCESS;
1773 }
1774 
1775 static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler,
1776 	sljit_s32 dst, sljit_sw dstw,
1777 	sljit_s32 src1, sljit_sw src1w,
1778 	sljit_s32 src2, sljit_sw src2w)
1779 {
1780 	sljit_u8* inst;
1781 	sljit_s32 dst_r, done = 0;
1782 
1783 	/* These cases better be left to handled by normal way. */
1784 	if (dst == src1 && dstw == src1w)
1785 		return SLJIT_ERR_UNSUPPORTED;
1786 	if (dst == src2 && dstw == src2w)
1787 		return SLJIT_ERR_UNSUPPORTED;
1788 
1789 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1790 
1791 	if (FAST_IS_REG(src1)) {
1792 		if (FAST_IS_REG(src2)) {
1793 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
1794 			FAIL_IF(!inst);
1795 			*inst = LEA_r_m;
1796 			done = 1;
1797 		}
1798 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1799 		if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1800 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_s32)src2w);
1801 #else
1802 		if (src2 & SLJIT_IMM) {
1803 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
1804 #endif
1805 			FAIL_IF(!inst);
1806 			*inst = LEA_r_m;
1807 			done = 1;
1808 		}
1809 	}
1810 	else if (FAST_IS_REG(src2)) {
1811 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1812 		if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1813 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_s32)src1w);
1814 #else
1815 		if (src1 & SLJIT_IMM) {
1816 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
1817 #endif
1818 			FAIL_IF(!inst);
1819 			*inst = LEA_r_m;
1820 			done = 1;
1821 		}
1822 	}
1823 
1824 	if (done) {
1825 		if (dst_r == TMP_REG1)
1826 			return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
1827 		return SLJIT_SUCCESS;
1828 	}
1829 	return SLJIT_ERR_UNSUPPORTED;
1830 }
1831 
1832 static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,
1833 	sljit_s32 src1, sljit_sw src1w,
1834 	sljit_s32 src2, sljit_sw src2w)
1835 {
1836 	sljit_u8* inst;
1837 
1838 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1839 	if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1840 #else
1841 	if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1842 #endif
1843 		BINARY_EAX_IMM(CMP_EAX_i32, src2w);
1844 		return SLJIT_SUCCESS;
1845 	}
1846 
1847 	if (FAST_IS_REG(src1)) {
1848 		if (src2 & SLJIT_IMM) {
1849 			BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
1850 		}
1851 		else {
1852 			inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1853 			FAIL_IF(!inst);
1854 			*inst = CMP_r_rm;
1855 		}
1856 		return SLJIT_SUCCESS;
1857 	}
1858 
1859 	if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) {
1860 		inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1861 		FAIL_IF(!inst);
1862 		*inst = CMP_rm_r;
1863 		return SLJIT_SUCCESS;
1864 	}
1865 
1866 	if (src2 & SLJIT_IMM) {
1867 		if (src1 & SLJIT_IMM) {
1868 			EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1869 			src1 = TMP_REG1;
1870 			src1w = 0;
1871 		}
1872 		BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);
1873 	}
1874 	else {
1875 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1876 		inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1877 		FAIL_IF(!inst);
1878 		*inst = CMP_r_rm;
1879 	}
1880 	return SLJIT_SUCCESS;
1881 }
1882 
1883 static sljit_s32 emit_test_binary(struct sljit_compiler *compiler,
1884 	sljit_s32 src1, sljit_sw src1w,
1885 	sljit_s32 src2, sljit_sw src2w)
1886 {
1887 	sljit_u8* inst;
1888 
1889 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1890 	if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1891 #else
1892 	if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1893 #endif
1894 		BINARY_EAX_IMM(TEST_EAX_i32, src2w);
1895 		return SLJIT_SUCCESS;
1896 	}
1897 
1898 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1899 	if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1900 #else
1901 	if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
1902 #endif
1903 		BINARY_EAX_IMM(TEST_EAX_i32, src1w);
1904 		return SLJIT_SUCCESS;
1905 	}
1906 
1907 	if (!(src1 & SLJIT_IMM)) {
1908 		if (src2 & SLJIT_IMM) {
1909 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1910 			if (IS_HALFWORD(src2w) || compiler->mode32) {
1911 				inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
1912 				FAIL_IF(!inst);
1913 				*inst = GROUP_F7;
1914 			}
1915 			else {
1916 				FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src2w));
1917 				inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src1, src1w);
1918 				FAIL_IF(!inst);
1919 				*inst = TEST_rm_r;
1920 			}
1921 #else
1922 			inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
1923 			FAIL_IF(!inst);
1924 			*inst = GROUP_F7;
1925 #endif
1926 			return SLJIT_SUCCESS;
1927 		}
1928 		else if (FAST_IS_REG(src1)) {
1929 			inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1930 			FAIL_IF(!inst);
1931 			*inst = TEST_rm_r;
1932 			return SLJIT_SUCCESS;
1933 		}
1934 	}
1935 
1936 	if (!(src2 & SLJIT_IMM)) {
1937 		if (src1 & SLJIT_IMM) {
1938 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1939 			if (IS_HALFWORD(src1w) || compiler->mode32) {
1940 				inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w);
1941 				FAIL_IF(!inst);
1942 				*inst = GROUP_F7;
1943 			}
1944 			else {
1945 				FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src1w));
1946 				inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1947 				FAIL_IF(!inst);
1948 				*inst = TEST_rm_r;
1949 			}
1950 #else
1951 			inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w);
1952 			FAIL_IF(!inst);
1953 			*inst = GROUP_F7;
1954 #endif
1955 			return SLJIT_SUCCESS;
1956 		}
1957 		else if (FAST_IS_REG(src2)) {
1958 			inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1959 			FAIL_IF(!inst);
1960 			*inst = TEST_rm_r;
1961 			return SLJIT_SUCCESS;
1962 		}
1963 	}
1964 
1965 	EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1966 	if (src2 & SLJIT_IMM) {
1967 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1968 		if (IS_HALFWORD(src2w) || compiler->mode32) {
1969 			inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
1970 			FAIL_IF(!inst);
1971 			*inst = GROUP_F7;
1972 		}
1973 		else {
1974 			FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1975 			inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0);
1976 			FAIL_IF(!inst);
1977 			*inst = TEST_rm_r;
1978 		}
1979 #else
1980 		inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
1981 		FAIL_IF(!inst);
1982 		*inst = GROUP_F7;
1983 #endif
1984 	}
1985 	else {
1986 		inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1987 		FAIL_IF(!inst);
1988 		*inst = TEST_rm_r;
1989 	}
1990 	return SLJIT_SUCCESS;
1991 }
1992 
1993 static sljit_s32 emit_shift(struct sljit_compiler *compiler,
1994 	sljit_u8 mode,
1995 	sljit_s32 dst, sljit_sw dstw,
1996 	sljit_s32 src1, sljit_sw src1w,
1997 	sljit_s32 src2, sljit_sw src2w)
1998 {
1999 	sljit_u8* inst;
2000 
2001 	if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
2002 		if (dst == src1 && dstw == src1w) {
2003 			inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
2004 			FAIL_IF(!inst);
2005 			*inst |= mode;
2006 			return SLJIT_SUCCESS;
2007 		}
2008 		if (dst == SLJIT_UNUSED) {
2009 			EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2010 			inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2011 			FAIL_IF(!inst);
2012 			*inst |= mode;
2013 			return SLJIT_SUCCESS;
2014 		}
2015 		if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
2016 			EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2017 			inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2018 			FAIL_IF(!inst);
2019 			*inst |= mode;
2020 			EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2021 			return SLJIT_SUCCESS;
2022 		}
2023 		if (FAST_IS_REG(dst)) {
2024 			EMIT_MOV(compiler, dst, 0, src1, src1w);
2025 			inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
2026 			FAIL_IF(!inst);
2027 			*inst |= mode;
2028 			return SLJIT_SUCCESS;
2029 		}
2030 
2031 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2032 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2033 		FAIL_IF(!inst);
2034 		*inst |= mode;
2035 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2036 		return SLJIT_SUCCESS;
2037 	}
2038 
2039 	if (dst == SLJIT_PREF_SHIFT_REG) {
2040 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2041 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2042 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2043 		FAIL_IF(!inst);
2044 		*inst |= mode;
2045 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2046 	}
2047 	else if (FAST_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
2048 		if (src1 != dst)
2049 			EMIT_MOV(compiler, dst, 0, src1, src1w);
2050 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2051 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2052 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
2053 		FAIL_IF(!inst);
2054 		*inst |= mode;
2055 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2056 	}
2057 	else {
2058 		/* This case is complex since ecx itself may be used for
2059 		   addressing, and this case must be supported as well. */
2060 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2061 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2062 		EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0);
2063 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2064 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2065 		FAIL_IF(!inst);
2066 		*inst |= mode;
2067 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0);
2068 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2069 #else
2070 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2071 		EMIT_MOV(compiler, TMP_REG2, 0, src2, src2w);
2072 		inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
2073 		FAIL_IF(!inst);
2074 		*inst = XCHG_r_rm;
2075 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2076 		FAIL_IF(!inst);
2077 		*inst |= mode;
2078 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
2079 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2080 #endif
2081 	}
2082 
2083 	return SLJIT_SUCCESS;
2084 }
2085 
2086 static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler,
2087 	sljit_u8 mode, sljit_s32 set_flags,
2088 	sljit_s32 dst, sljit_sw dstw,
2089 	sljit_s32 src1, sljit_sw src1w,
2090 	sljit_s32 src2, sljit_sw src2w)
2091 {
2092 	/* The CPU does not set flags if the shift count is 0. */
2093 	if (src2 & SLJIT_IMM) {
2094 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2095 		if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
2096 			return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2097 #else
2098 		if ((src2w & 0x1f) != 0)
2099 			return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2100 #endif
2101 		if (!set_flags)
2102 			return emit_mov(compiler, dst, dstw, src1, src1w);
2103 		/* OR dst, src, 0 */
2104 		return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
2105 			dst, dstw, src1, src1w, SLJIT_IMM, 0);
2106 	}
2107 
2108 	if (!set_flags)
2109 		return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2110 
2111 	if (!FAST_IS_REG(dst))
2112 		FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
2113 
2114 	FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w));
2115 
2116 	if (FAST_IS_REG(dst))
2117 		return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
2118 	return SLJIT_SUCCESS;
2119 }
2120 
2121 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2122 	sljit_s32 dst, sljit_sw dstw,
2123 	sljit_s32 src1, sljit_sw src1w,
2124 	sljit_s32 src2, sljit_sw src2w)
2125 {
2126 	CHECK_ERROR();
2127 	CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2128 	ADJUST_LOCAL_OFFSET(dst, dstw);
2129 	ADJUST_LOCAL_OFFSET(src1, src1w);
2130 	ADJUST_LOCAL_OFFSET(src2, src2w);
2131 
2132 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
2133 	CHECK_EXTRA_REGS(src1, src1w, (void)0);
2134 	CHECK_EXTRA_REGS(src2, src2w, (void)0);
2135 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2136 	compiler->mode32 = op & SLJIT_I32_OP;
2137 #endif
2138 
2139 	if (dst == SLJIT_UNUSED && !HAS_FLAGS(op))
2140 		return SLJIT_SUCCESS;
2141 
2142 	switch (GET_OPCODE(op)) {
2143 	case SLJIT_ADD:
2144 		if (!HAS_FLAGS(op)) {
2145 			if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
2146 				return compiler->error;
2147 		}
2148 		return emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
2149 			dst, dstw, src1, src1w, src2, src2w);
2150 	case SLJIT_ADDC:
2151 		return emit_cum_binary(compiler, ADC_r_rm, ADC_rm_r, ADC, ADC_EAX_i32,
2152 			dst, dstw, src1, src1w, src2, src2w);
2153 	case SLJIT_SUB:
2154 		if (!HAS_FLAGS(op)) {
2155 			if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
2156 				return compiler->error;
2157 		}
2158 
2159 		if (dst == SLJIT_UNUSED)
2160 			return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
2161 		return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
2162 			dst, dstw, src1, src1w, src2, src2w);
2163 	case SLJIT_SUBC:
2164 		return emit_non_cum_binary(compiler, SBB_r_rm, SBB_rm_r, SBB, SBB_EAX_i32,
2165 			dst, dstw, src1, src1w, src2, src2w);
2166 	case SLJIT_MUL:
2167 		return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
2168 	case SLJIT_AND:
2169 		if (dst == SLJIT_UNUSED)
2170 			return emit_test_binary(compiler, src1, src1w, src2, src2w);
2171 		return emit_cum_binary(compiler, AND_r_rm, AND_rm_r, AND, AND_EAX_i32,
2172 			dst, dstw, src1, src1w, src2, src2w);
2173 	case SLJIT_OR:
2174 		return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
2175 			dst, dstw, src1, src1w, src2, src2w);
2176 	case SLJIT_XOR:
2177 		return emit_cum_binary(compiler, XOR_r_rm, XOR_rm_r, XOR, XOR_EAX_i32,
2178 			dst, dstw, src1, src1w, src2, src2w);
2179 	case SLJIT_SHL:
2180 		return emit_shift_with_flags(compiler, SHL, HAS_FLAGS(op),
2181 			dst, dstw, src1, src1w, src2, src2w);
2182 	case SLJIT_LSHR:
2183 		return emit_shift_with_flags(compiler, SHR, HAS_FLAGS(op),
2184 			dst, dstw, src1, src1w, src2, src2w);
2185 	case SLJIT_ASHR:
2186 		return emit_shift_with_flags(compiler, SAR, HAS_FLAGS(op),
2187 			dst, dstw, src1, src1w, src2, src2w);
2188 	}
2189 
2190 	return SLJIT_SUCCESS;
2191 }
2192 
2193 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
2194 {
2195 	CHECK_REG_INDEX(check_sljit_get_register_index(reg));
2196 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2197 	if (reg >= SLJIT_R3 && reg <= SLJIT_R8)
2198 		return -1;
2199 #endif
2200 	return reg_map[reg];
2201 }
2202 
2203 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
2204 {
2205 	CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
2206 	return reg;
2207 }
2208 
2209 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2210 	void *instruction, sljit_s32 size)
2211 {
2212 	sljit_u8 *inst;
2213 
2214 	CHECK_ERROR();
2215 	CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2216 
2217 	inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
2218 	FAIL_IF(!inst);
2219 	INC_SIZE(size);
2220 	SLJIT_MEMCPY(inst, instruction, size);
2221 	return SLJIT_SUCCESS;
2222 }
2223 
2224 /* --------------------------------------------------------------------- */
2225 /*  Floating point operators                                             */
2226 /* --------------------------------------------------------------------- */
2227 
2228 /* Alignment(3) + 4 * 16 bytes. */
2229 static sljit_s32 sse2_data[3 + (4 * 4)];
2230 static sljit_s32 *sse2_buffer;
2231 
2232 static void init_compiler(void)
2233 {
2234 	/* Align to 16 bytes. */
2235 	sse2_buffer = (sljit_s32*)(((sljit_uw)sse2_data + 15) & ~0xf);
2236 
2237 	/* Single precision constants (each constant is 16 byte long). */
2238 	sse2_buffer[0] = 0x80000000;
2239 	sse2_buffer[4] = 0x7fffffff;
2240 	/* Double precision constants (each constant is 16 byte long). */
2241 	sse2_buffer[8] = 0;
2242 	sse2_buffer[9] = 0x80000000;
2243 	sse2_buffer[12] = 0xffffffff;
2244 	sse2_buffer[13] = 0x7fffffff;
2245 }
2246 
2247 static sljit_s32 emit_sse2(struct sljit_compiler *compiler, sljit_u8 opcode,
2248 	sljit_s32 single, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
2249 {
2250 	sljit_u8 *inst;
2251 
2252 	inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2253 	FAIL_IF(!inst);
2254 	*inst++ = GROUP_0F;
2255 	*inst = opcode;
2256 	return SLJIT_SUCCESS;
2257 }
2258 
2259 static sljit_s32 emit_sse2_logic(struct sljit_compiler *compiler, sljit_u8 opcode,
2260 	sljit_s32 pref66, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
2261 {
2262 	sljit_u8 *inst;
2263 
2264 	inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2265 	FAIL_IF(!inst);
2266 	*inst++ = GROUP_0F;
2267 	*inst = opcode;
2268 	return SLJIT_SUCCESS;
2269 }
2270 
2271 static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
2272 	sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
2273 {
2274 	return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw);
2275 }
2276 
2277 static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
2278 	sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src)
2279 {
2280 	return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw);
2281 }
2282 
2283 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
2284 	sljit_s32 dst, sljit_sw dstw,
2285 	sljit_s32 src, sljit_sw srcw)
2286 {
2287 	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
2288 	sljit_u8 *inst;
2289 
2290 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2291 	if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)
2292 		compiler->mode32 = 0;
2293 #endif
2294 
2295 	inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw);
2296 	FAIL_IF(!inst);
2297 	*inst++ = GROUP_0F;
2298 	*inst = CVTTSD2SI_r_xm;
2299 
2300 	if (dst & SLJIT_MEM)
2301 		return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2302 	return SLJIT_SUCCESS;
2303 }
2304 
2305 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
2306 	sljit_s32 dst, sljit_sw dstw,
2307 	sljit_s32 src, sljit_sw srcw)
2308 {
2309 	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2310 	sljit_u8 *inst;
2311 
2312 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2313 	if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
2314 		compiler->mode32 = 0;
2315 #endif
2316 
2317 	if (src & SLJIT_IMM) {
2318 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2319 		if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
2320 			srcw = (sljit_s32)srcw;
2321 #endif
2322 		EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
2323 		src = TMP_REG1;
2324 		srcw = 0;
2325 	}
2326 
2327 	inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw);
2328 	FAIL_IF(!inst);
2329 	*inst++ = GROUP_0F;
2330 	*inst = CVTSI2SD_x_rm;
2331 
2332 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2333 	compiler->mode32 = 1;
2334 #endif
2335 	if (dst_r == TMP_FREG)
2336 		return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2337 	return SLJIT_SUCCESS;
2338 }
2339 
2340 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
2341 	sljit_s32 src1, sljit_sw src1w,
2342 	sljit_s32 src2, sljit_sw src2w)
2343 {
2344 	if (!FAST_IS_REG(src1)) {
2345 		FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
2346 		src1 = TMP_FREG;
2347 	}
2348 	return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_F32_OP), src1, src2, src2w);
2349 }
2350 
2351 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
2352 	sljit_s32 dst, sljit_sw dstw,
2353 	sljit_s32 src, sljit_sw srcw)
2354 {
2355 	sljit_s32 dst_r;
2356 
2357 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2358 	compiler->mode32 = 1;
2359 #endif
2360 
2361 	CHECK_ERROR();
2362 	SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2363 
2364 	if (GET_OPCODE(op) == SLJIT_MOV_F64) {
2365 		if (FAST_IS_REG(dst))
2366 			return emit_sse2_load(compiler, op & SLJIT_F32_OP, dst, src, srcw);
2367 		if (FAST_IS_REG(src))
2368 			return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, src);
2369 		FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src, srcw));
2370 		return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2371 	}
2372 
2373 	if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) {
2374 		dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2375 		if (FAST_IS_REG(src)) {
2376 			/* We overwrite the high bits of source. From SLJIT point of view,
2377 			   this is not an issue.
2378 			   Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
2379 			FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_F32_OP, src, src, 0));
2380 		}
2381 		else {
2382 			FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_F32_OP), TMP_FREG, src, srcw));
2383 			src = TMP_FREG;
2384 		}
2385 
2386 		FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_F32_OP, dst_r, src, 0));
2387 		if (dst_r == TMP_FREG)
2388 			return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2389 		return SLJIT_SUCCESS;
2390 	}
2391 
2392 	if (FAST_IS_REG(dst)) {
2393 		dst_r = dst;
2394 		if (dst != src)
2395 			FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw));
2396 	}
2397 	else {
2398 		dst_r = TMP_FREG;
2399 		FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw));
2400 	}
2401 
2402 	switch (GET_OPCODE(op)) {
2403 	case SLJIT_NEG_F64:
2404 		FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer : sse2_buffer + 8)));
2405 		break;
2406 
2407 	case SLJIT_ABS_F64:
2408 		FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer + 4 : sse2_buffer + 12)));
2409 		break;
2410 	}
2411 
2412 	if (dst_r == TMP_FREG)
2413 		return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2414 	return SLJIT_SUCCESS;
2415 }
2416 
2417 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
2418 	sljit_s32 dst, sljit_sw dstw,
2419 	sljit_s32 src1, sljit_sw src1w,
2420 	sljit_s32 src2, sljit_sw src2w)
2421 {
2422 	sljit_s32 dst_r;
2423 
2424 	CHECK_ERROR();
2425 	CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2426 	ADJUST_LOCAL_OFFSET(dst, dstw);
2427 	ADJUST_LOCAL_OFFSET(src1, src1w);
2428 	ADJUST_LOCAL_OFFSET(src2, src2w);
2429 
2430 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2431 	compiler->mode32 = 1;
2432 #endif
2433 
2434 	if (FAST_IS_REG(dst)) {
2435 		dst_r = dst;
2436 		if (dst == src1)
2437 			; /* Do nothing here. */
2438 		else if (dst == src2 && (op == SLJIT_ADD_F64 || op == SLJIT_MUL_F64)) {
2439 			/* Swap arguments. */
2440 			src2 = src1;
2441 			src2w = src1w;
2442 		}
2443 		else if (dst != src2)
2444 			FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src1, src1w));
2445 		else {
2446 			dst_r = TMP_FREG;
2447 			FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
2448 		}
2449 	}
2450 	else {
2451 		dst_r = TMP_FREG;
2452 		FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
2453 	}
2454 
2455 	switch (GET_OPCODE(op)) {
2456 	case SLJIT_ADD_F64:
2457 		FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2458 		break;
2459 
2460 	case SLJIT_SUB_F64:
2461 		FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2462 		break;
2463 
2464 	case SLJIT_MUL_F64:
2465 		FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2466 		break;
2467 
2468 	case SLJIT_DIV_F64:
2469 		FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2470 		break;
2471 	}
2472 
2473 	if (dst_r == TMP_FREG)
2474 		return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2475 	return SLJIT_SUCCESS;
2476 }
2477 
2478 /* --------------------------------------------------------------------- */
2479 /*  Conditional instructions                                             */
2480 /* --------------------------------------------------------------------- */
2481 
2482 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2483 {
2484 	sljit_u8 *inst;
2485 	struct sljit_label *label;
2486 
2487 	CHECK_ERROR_PTR();
2488 	CHECK_PTR(check_sljit_emit_label(compiler));
2489 
2490 	if (compiler->last_label && compiler->last_label->size == compiler->size)
2491 		return compiler->last_label;
2492 
2493 	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2494 	PTR_FAIL_IF(!label);
2495 	set_label(label, compiler);
2496 
2497 	inst = (sljit_u8*)ensure_buf(compiler, 2);
2498 	PTR_FAIL_IF(!inst);
2499 
2500 	*inst++ = 0;
2501 	*inst++ = 0;
2502 
2503 	return label;
2504 }
2505 
2506 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
2507 {
2508 	sljit_u8 *inst;
2509 	struct sljit_jump *jump;
2510 
2511 	CHECK_ERROR_PTR();
2512 	CHECK_PTR(check_sljit_emit_jump(compiler, type));
2513 
2514 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2515 	PTR_FAIL_IF_NULL(jump);
2516 	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2517 	type &= 0xff;
2518 
2519 	if (type >= SLJIT_CALL1)
2520 		PTR_FAIL_IF(call_with_args(compiler, type));
2521 
2522 	/* Worst case size. */
2523 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2524 	compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
2525 #else
2526 	compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
2527 #endif
2528 
2529 	inst = (sljit_u8*)ensure_buf(compiler, 2);
2530 	PTR_FAIL_IF_NULL(inst);
2531 
2532 	*inst++ = 0;
2533 	*inst++ = type + 2;
2534 	return jump;
2535 }
2536 
2537 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2538 #ifndef _WIN64
2539 #define IS_REG_CHANGED_BY_CALL(src, type) ((src) == SLJIT_R3)
2540 #else
2541 #define IS_REG_CHANGED_BY_CALL(src, type) ((src) == SLJIT_R2)
2542 #endif
2543 #endif
2544 
2545 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
2546 {
2547 	sljit_u8 *inst;
2548 	struct sljit_jump *jump;
2549 
2550 	CHECK_ERROR();
2551 	CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
2552 	ADJUST_LOCAL_OFFSET(src, srcw);
2553 
2554 	CHECK_EXTRA_REGS(src, srcw, (void)0);
2555 
2556 	if (type >= SLJIT_CALL1) {
2557 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2558 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
2559 		if (src == SLJIT_R2) {
2560 			EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
2561 			src = TMP_REG1;
2562 		}
2563 		if (src == SLJIT_MEM1(SLJIT_SP) && type >= SLJIT_CALL3)
2564 			srcw += sizeof(sljit_sw);
2565 #endif
2566 #else
2567 		if ((src & SLJIT_MEM) || IS_REG_CHANGED_BY_CALL(src, type)) {
2568 			EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);
2569 			src = TMP_REG2;
2570 		}
2571 #endif
2572 		FAIL_IF(call_with_args(compiler, type));
2573 	}
2574 
2575 	if (src == SLJIT_IMM) {
2576 		jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2577 		FAIL_IF_NULL(jump);
2578 		set_jump(jump, compiler, JUMP_ADDR);
2579 		jump->u.target = srcw;
2580 
2581 		/* Worst case size. */
2582 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2583 		compiler->size += 5;
2584 #else
2585 		compiler->size += 10 + 3;
2586 #endif
2587 
2588 		inst = (sljit_u8*)ensure_buf(compiler, 2);
2589 		FAIL_IF_NULL(inst);
2590 
2591 		*inst++ = 0;
2592 		*inst++ = type + 2;
2593 	}
2594 	else {
2595 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2596 		/* REX_W is not necessary (src is not immediate). */
2597 		compiler->mode32 = 1;
2598 #endif
2599 		inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2600 		FAIL_IF(!inst);
2601 		*inst++ = GROUP_FF;
2602 		*inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm;
2603 	}
2604 	return SLJIT_SUCCESS;
2605 }
2606 
2607 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
2608 	sljit_s32 dst, sljit_sw dstw,
2609 	sljit_s32 type)
2610 {
2611 	sljit_u8 *inst;
2612 	sljit_u8 cond_set = 0;
2613 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2614 	sljit_s32 reg;
2615 #endif
2616 	/* ADJUST_LOCAL_OFFSET and CHECK_EXTRA_REGS might overwrite these values. */
2617 	sljit_s32 dst_save = dst;
2618 	sljit_sw dstw_save = dstw;
2619 
2620 	CHECK_ERROR();
2621 	CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
2622 
2623 	ADJUST_LOCAL_OFFSET(dst, dstw);
2624 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
2625 
2626 	type &= 0xff;
2627 	/* setcc = jcc + 0x10. */
2628 	cond_set = get_jump_code(type) + 0x10;
2629 
2630 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2631 	if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst)) {
2632 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 3);
2633 		FAIL_IF(!inst);
2634 		INC_SIZE(4 + 3);
2635 		/* Set low register to conditional flag. */
2636 		*inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;
2637 		*inst++ = GROUP_0F;
2638 		*inst++ = cond_set;
2639 		*inst++ = MOD_REG | reg_lmap[TMP_REG1];
2640 		*inst++ = REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B);
2641 		*inst++ = OR_rm8_r8;
2642 		*inst++ = MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst];
2643 		return SLJIT_SUCCESS;
2644 	}
2645 
2646 	reg = (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG1;
2647 
2648 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 4);
2649 	FAIL_IF(!inst);
2650 	INC_SIZE(4 + 4);
2651 	/* Set low register to conditional flag. */
2652 	*inst++ = (reg_map[reg] <= 7) ? REX : REX_B;
2653 	*inst++ = GROUP_0F;
2654 	*inst++ = cond_set;
2655 	*inst++ = MOD_REG | reg_lmap[reg];
2656 	*inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
2657 	/* The movzx instruction does not affect flags. */
2658 	*inst++ = GROUP_0F;
2659 	*inst++ = MOVZX_r_rm8;
2660 	*inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg];
2661 
2662 	if (reg != TMP_REG1)
2663 		return SLJIT_SUCCESS;
2664 
2665 	if (GET_OPCODE(op) < SLJIT_ADD) {
2666 		compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
2667 		return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2668 	}
2669 
2670 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2671 		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2672 	compiler->skip_checks = 1;
2673 #endif
2674 	return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
2675 
2676 #else
2677 	/* The SLJIT_CONFIG_X86_32 code path starts here. */
2678 	if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) {
2679 		if (reg_map[dst] <= 4) {
2680 			/* Low byte is accessible. */
2681 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3);
2682 			FAIL_IF(!inst);
2683 			INC_SIZE(3 + 3);
2684 			/* Set low byte to conditional flag. */
2685 			*inst++ = GROUP_0F;
2686 			*inst++ = cond_set;
2687 			*inst++ = MOD_REG | reg_map[dst];
2688 
2689 			*inst++ = GROUP_0F;
2690 			*inst++ = MOVZX_r_rm8;
2691 			*inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst];
2692 			return SLJIT_SUCCESS;
2693 		}
2694 
2695 		/* Low byte is not accessible. */
2696 		if (cpu_has_cmov == -1)
2697 			get_cpu_features();
2698 
2699 		if (cpu_has_cmov) {
2700 			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
2701 			/* a xor reg, reg operation would overwrite the flags. */
2702 			EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0);
2703 
2704 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
2705 			FAIL_IF(!inst);
2706 			INC_SIZE(3);
2707 
2708 			*inst++ = GROUP_0F;
2709 			/* cmovcc = setcc - 0x50. */
2710 			*inst++ = cond_set - 0x50;
2711 			*inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1];
2712 			return SLJIT_SUCCESS;
2713 		}
2714 
2715 		inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2716 		FAIL_IF(!inst);
2717 		INC_SIZE(1 + 3 + 3 + 1);
2718 		*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2719 		/* Set al to conditional flag. */
2720 		*inst++ = GROUP_0F;
2721 		*inst++ = cond_set;
2722 		*inst++ = MOD_REG | 0 /* eax */;
2723 
2724 		*inst++ = GROUP_0F;
2725 		*inst++ = MOVZX_r_rm8;
2726 		*inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */;
2727 		*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2728 		return SLJIT_SUCCESS;
2729 	}
2730 
2731 	if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && reg_map[dst] <= 4) {
2732 		SLJIT_ASSERT(reg_map[SLJIT_R0] == 0);
2733 
2734 		if (dst != SLJIT_R0) {
2735 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
2736 			FAIL_IF(!inst);
2737 			INC_SIZE(1 + 3 + 2 + 1);
2738 			/* Set low register to conditional flag. */
2739 			*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2740 			*inst++ = GROUP_0F;
2741 			*inst++ = cond_set;
2742 			*inst++ = MOD_REG | 0 /* eax */;
2743 			*inst++ = OR_rm8_r8;
2744 			*inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst];
2745 			*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2746 		}
2747 		else {
2748 			inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2);
2749 			FAIL_IF(!inst);
2750 			INC_SIZE(2 + 3 + 2 + 2);
2751 			/* Set low register to conditional flag. */
2752 			*inst++ = XCHG_r_rm;
2753 			*inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
2754 			*inst++ = GROUP_0F;
2755 			*inst++ = cond_set;
2756 			*inst++ = MOD_REG | 1 /* ecx */;
2757 			*inst++ = OR_rm8_r8;
2758 			*inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */;
2759 			*inst++ = XCHG_r_rm;
2760 			*inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
2761 		}
2762 		return SLJIT_SUCCESS;
2763 	}
2764 
2765 	/* Set TMP_REG1 to the bit. */
2766 	inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2767 	FAIL_IF(!inst);
2768 	INC_SIZE(1 + 3 + 3 + 1);
2769 	*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2770 	/* Set al to conditional flag. */
2771 	*inst++ = GROUP_0F;
2772 	*inst++ = cond_set;
2773 	*inst++ = MOD_REG | 0 /* eax */;
2774 
2775 	*inst++ = GROUP_0F;
2776 	*inst++ = MOVZX_r_rm8;
2777 	*inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */;
2778 
2779 	*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2780 
2781 	if (GET_OPCODE(op) < SLJIT_ADD)
2782 		return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2783 
2784 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2785 		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2786 	compiler->skip_checks = 1;
2787 #endif
2788 	return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
2789 #endif /* SLJIT_CONFIG_X86_64 */
2790 }
2791 
2792 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
2793 	sljit_s32 dst_reg,
2794 	sljit_s32 src, sljit_sw srcw)
2795 {
2796 	sljit_u8* inst;
2797 
2798 	CHECK_ERROR();
2799 	CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
2800 
2801 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2802 	dst_reg &= ~SLJIT_I32_OP;
2803 
2804 	if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV) || (dst_reg >= SLJIT_R3 && dst_reg <= SLJIT_S3))
2805 		return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
2806 #else
2807 	if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
2808 		return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
2809 #endif
2810 
2811 	/* ADJUST_LOCAL_OFFSET is not needed. */
2812 	CHECK_EXTRA_REGS(src, srcw, (void)0);
2813 
2814 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2815 	compiler->mode32 = dst_reg & SLJIT_I32_OP;
2816 	dst_reg &= ~SLJIT_I32_OP;
2817 #endif
2818 
2819 	if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
2820 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
2821 		src = TMP_REG1;
2822 		srcw = 0;
2823 	}
2824 
2825 	inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw);
2826 	FAIL_IF(!inst);
2827 	*inst++ = GROUP_0F;
2828 	*inst = get_jump_code(type & 0xff) - 0x40;
2829 	return SLJIT_SUCCESS;
2830 }
2831 
2832 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
2833 {
2834 	CHECK_ERROR();
2835 	CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
2836 	ADJUST_LOCAL_OFFSET(dst, dstw);
2837 
2838 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
2839 
2840 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2841 	compiler->mode32 = 0;
2842 #endif
2843 
2844 	ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
2845 
2846 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2847 	if (NOT_HALFWORD(offset)) {
2848 		FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
2849 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
2850 		SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
2851 		return compiler->error;
2852 #else
2853 		return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);
2854 #endif
2855 	}
2856 #endif
2857 
2858 	if (offset != 0)
2859 		return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
2860 	return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);
2861 }
2862 
2863 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
2864 {
2865 	sljit_u8 *inst;
2866 	struct sljit_const *const_;
2867 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2868 	sljit_s32 reg;
2869 #endif
2870 
2871 	CHECK_ERROR_PTR();
2872 	CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
2873 	ADJUST_LOCAL_OFFSET(dst, dstw);
2874 
2875 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
2876 
2877 	const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
2878 	PTR_FAIL_IF(!const_);
2879 	set_const(const_, compiler);
2880 
2881 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2882 	compiler->mode32 = 0;
2883 	reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
2884 
2885 	if (emit_load_imm64(compiler, reg, init_value))
2886 		return NULL;
2887 #else
2888 	if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
2889 		return NULL;
2890 #endif
2891 
2892 	inst = (sljit_u8*)ensure_buf(compiler, 2);
2893 	PTR_FAIL_IF(!inst);
2894 
2895 	*inst++ = 0;
2896 	*inst++ = 1;
2897 
2898 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2899 	if (dst & SLJIT_MEM)
2900 		if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
2901 			return NULL;
2902 #endif
2903 
2904 	return const_;
2905 }
2906 
2907 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
2908 {
2909 	SLJIT_UNUSED_ARG(executable_offset);
2910 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2911 	sljit_unaligned_store_sw((void*)addr, new_target - (addr + 4) - (sljit_uw)executable_offset);
2912 #else
2913 	sljit_unaligned_store_sw((void*)addr, (sljit_sw) new_target);
2914 #endif
2915 }
2916 
2917 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
2918 {
2919 	SLJIT_UNUSED_ARG(executable_offset);
2920 	sljit_unaligned_store_sw((void*)addr, new_constant);
2921 }
2922