1 /*
2  *    Stack-less Just-In-Time compiler
3  *
4  *    Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without modification, are
7  * permitted provided that the following conditions are met:
8  *
9  *   1. Redistributions of source code must retain the above copyright notice, this list of
10  *      conditions and the following disclaimer.
11  *
12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13  *      of conditions and the following disclaimer in the documentation and/or other materials
14  *      provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
sljit_get_platform_name(void)27 SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
28 {
29 	return "x86" SLJIT_CPUINFO;
30 }
31 
32 /*
33    32b register indexes:
34      0 - EAX
35      1 - ECX
36      2 - EDX
37      3 - EBX
38      4 - none
39      5 - EBP
40      6 - ESI
41      7 - EDI
42 */
43 
44 /*
45    64b register indexes:
46      0 - RAX
47      1 - RCX
48      2 - RDX
49      3 - RBX
50      4 - none
51      5 - RBP
52      6 - RSI
53      7 - RDI
54      8 - R8   - From now on REX prefix is required
55      9 - R9
56     10 - R10
57     11 - R11
58     12 - R12
59     13 - R13
60     14 - R14
61     15 - R15
62 */
63 
64 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
65 
66 /* Last register + 1. */
67 #define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
68 
69 static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
70 	0, 0, 2, 1, 0, 0, 0, 0, 7, 6, 3, 4, 5
71 };
72 
73 #define CHECK_EXTRA_REGS(p, w, do) \
74 	if (p >= SLJIT_R3 && p <= SLJIT_R6) { \
75 		w = SLJIT_LOCALS_OFFSET + ((p) - (SLJIT_R3 + 4)) * sizeof(sljit_sw); \
76 		p = SLJIT_MEM1(SLJIT_SP); \
77 		do; \
78 	}
79 
80 #else /* SLJIT_CONFIG_X86_32 */
81 
82 /* Last register + 1. */
83 #define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
84 #define TMP_REG2	(SLJIT_NUMBER_OF_REGISTERS + 3)
85 #define TMP_REG3	(SLJIT_NUMBER_OF_REGISTERS + 4)
86 
87 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
88    Note: avoid to use r12 and r13 for memory addessing
89    therefore r12 is better for SAVED_EREG than SAVED_REG. */
90 #ifndef _WIN64
91 /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
92 static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
93 	0, 0, 6, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 7, 9
94 };
95 /* low-map. reg_map & 0x7. */
96 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
97 	0, 0, 6, 1, 0, 3,  2,  4,  5,  5,  6,  7, 3, 4, 2, 7, 1
98 };
99 #else
100 /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
101 static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
102 	0, 0, 2, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 10, 8, 9
103 };
104 /* low-map. reg_map & 0x7. */
105 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
106 	0, 0, 2, 1, 3,  4,  5,  5, 6,  7,  7, 6, 3, 4, 2,  0, 1
107 };
108 #endif
109 
110 #define REX_W		0x48
111 #define REX_R		0x44
112 #define REX_X		0x42
113 #define REX_B		0x41
114 #define REX		0x40
115 
116 #ifndef _WIN64
117 #define HALFWORD_MAX 0x7fffffffl
118 #define HALFWORD_MIN -0x80000000l
119 #else
120 #define HALFWORD_MAX 0x7fffffffll
121 #define HALFWORD_MIN -0x80000000ll
122 #endif
123 
124 #define IS_HALFWORD(x)		((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)
125 #define NOT_HALFWORD(x)		((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)
126 
127 #define CHECK_EXTRA_REGS(p, w, do)
128 
129 #endif /* SLJIT_CONFIG_X86_32 */
130 
131 #define TMP_FREG	(0)
132 
133 /* Size flags for emit_x86_instruction: */
134 #define EX86_BIN_INS		0x0010
135 #define EX86_SHIFT_INS		0x0020
136 #define EX86_REX		0x0040
137 #define EX86_NO_REXW		0x0080
138 #define EX86_BYTE_ARG		0x0100
139 #define EX86_HALF_ARG		0x0200
140 #define EX86_PREF_66		0x0400
141 #define EX86_PREF_F2		0x0800
142 #define EX86_PREF_F3		0x1000
143 #define EX86_SSE2_OP1		0x2000
144 #define EX86_SSE2_OP2		0x4000
145 #define EX86_SSE2		(EX86_SSE2_OP1 | EX86_SSE2_OP2)
146 
147 /* --------------------------------------------------------------------- */
148 /*  Instrucion forms                                                     */
149 /* --------------------------------------------------------------------- */
150 
151 #define ADD		(/* BINARY */ 0 << 3)
152 #define ADD_EAX_i32	0x05
153 #define ADD_r_rm	0x03
154 #define ADD_rm_r	0x01
155 #define ADDSD_x_xm	0x58
156 #define ADC		(/* BINARY */ 2 << 3)
157 #define ADC_EAX_i32	0x15
158 #define ADC_r_rm	0x13
159 #define ADC_rm_r	0x11
160 #define AND		(/* BINARY */ 4 << 3)
161 #define AND_EAX_i32	0x25
162 #define AND_r_rm	0x23
163 #define AND_rm_r	0x21
164 #define ANDPD_x_xm	0x54
165 #define BSR_r_rm	(/* GROUP_0F */ 0xbd)
166 #define CALL_i32	0xe8
167 #define CALL_rm		(/* GROUP_FF */ 2 << 3)
168 #define CDQ		0x99
169 #define CMOVNE_r_rm	(/* GROUP_0F */ 0x45)
170 #define CMP		(/* BINARY */ 7 << 3)
171 #define CMP_EAX_i32	0x3d
172 #define CMP_r_rm	0x3b
173 #define CMP_rm_r	0x39
174 #define CVTPD2PS_x_xm	0x5a
175 #define CVTSI2SD_x_rm	0x2a
176 #define CVTTSD2SI_r_xm	0x2c
177 #define DIV		(/* GROUP_F7 */ 6 << 3)
178 #define DIVSD_x_xm	0x5e
179 #define INT3		0xcc
180 #define IDIV		(/* GROUP_F7 */ 7 << 3)
181 #define IMUL		(/* GROUP_F7 */ 5 << 3)
182 #define IMUL_r_rm	(/* GROUP_0F */ 0xaf)
183 #define IMUL_r_rm_i8	0x6b
184 #define IMUL_r_rm_i32	0x69
185 #define JE_i8		0x74
186 #define JNE_i8		0x75
187 #define JMP_i8		0xeb
188 #define JMP_i32		0xe9
189 #define JMP_rm		(/* GROUP_FF */ 4 << 3)
190 #define LEA_r_m		0x8d
191 #define MOV_r_rm	0x8b
192 #define MOV_r_i32	0xb8
193 #define MOV_rm_r	0x89
194 #define MOV_rm_i32	0xc7
195 #define MOV_rm8_i8	0xc6
196 #define MOV_rm8_r8	0x88
197 #define MOVSD_x_xm	0x10
198 #define MOVSD_xm_x	0x11
199 #define MOVSXD_r_rm	0x63
200 #define MOVSX_r_rm8	(/* GROUP_0F */ 0xbe)
201 #define MOVSX_r_rm16	(/* GROUP_0F */ 0xbf)
202 #define MOVZX_r_rm8	(/* GROUP_0F */ 0xb6)
203 #define MOVZX_r_rm16	(/* GROUP_0F */ 0xb7)
204 #define MUL		(/* GROUP_F7 */ 4 << 3)
205 #define MULSD_x_xm	0x59
206 #define NEG_rm		(/* GROUP_F7 */ 3 << 3)
207 #define NOP		0x90
208 #define NOT_rm		(/* GROUP_F7 */ 2 << 3)
209 #define OR		(/* BINARY */ 1 << 3)
210 #define OR_r_rm		0x0b
211 #define OR_EAX_i32	0x0d
212 #define OR_rm_r		0x09
213 #define OR_rm8_r8	0x08
214 #define POP_r		0x58
215 #define POP_rm		0x8f
216 #define POPF		0x9d
217 #define PUSH_i32	0x68
218 #define PUSH_r		0x50
219 #define PUSH_rm		(/* GROUP_FF */ 6 << 3)
220 #define PUSHF		0x9c
221 #define RET_near	0xc3
222 #define RET_i16		0xc2
223 #define SBB		(/* BINARY */ 3 << 3)
224 #define SBB_EAX_i32	0x1d
225 #define SBB_r_rm	0x1b
226 #define SBB_rm_r	0x19
227 #define SAR		(/* SHIFT */ 7 << 3)
228 #define SHL		(/* SHIFT */ 4 << 3)
229 #define SHR		(/* SHIFT */ 5 << 3)
230 #define SUB		(/* BINARY */ 5 << 3)
231 #define SUB_EAX_i32	0x2d
232 #define SUB_r_rm	0x2b
233 #define SUB_rm_r	0x29
234 #define SUBSD_x_xm	0x5c
235 #define TEST_EAX_i32	0xa9
236 #define TEST_rm_r	0x85
237 #define UCOMISD_x_xm	0x2e
238 #define UNPCKLPD_x_xm	0x14
239 #define XCHG_EAX_r	0x90
240 #define XCHG_r_rm	0x87
241 #define XOR		(/* BINARY */ 6 << 3)
242 #define XOR_EAX_i32	0x35
243 #define XOR_r_rm	0x33
244 #define XOR_rm_r	0x31
245 #define XORPD_x_xm	0x57
246 
247 #define GROUP_0F	0x0f
248 #define GROUP_F7	0xf7
249 #define GROUP_FF	0xff
250 #define GROUP_BINARY_81	0x81
251 #define GROUP_BINARY_83	0x83
252 #define GROUP_SHIFT_1	0xd1
253 #define GROUP_SHIFT_N	0xc1
254 #define GROUP_SHIFT_CL	0xd3
255 
256 #define MOD_REG		0xc0
257 #define MOD_DISP8	0x40
258 
259 #define INC_SIZE(s)			(*inst++ = (s), compiler->size += (s))
260 
261 #define PUSH_REG(r)			(*inst++ = (PUSH_r + (r)))
262 #define POP_REG(r)			(*inst++ = (POP_r + (r)))
263 #define RET()				(*inst++ = (RET_near))
264 #define RET_I16(n)			(*inst++ = (RET_i16), *inst++ = n, *inst++ = 0)
265 /* r32, r/m32 */
266 #define MOV_RM(mod, reg, rm)		(*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm))
267 
268 /* Multithreading does not affect these static variables, since they store
269    built-in CPU features. Therefore they can be overwritten by different threads
270    if they detect the CPU features in the same time. */
271 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
272 static sljit_si cpu_has_sse2 = -1;
273 #endif
274 static sljit_si cpu_has_cmov = -1;
275 
276 #if defined(_MSC_VER) && _MSC_VER >= 1400
277 #include <intrin.h>
278 #endif
279 
get_cpu_features(void)280 static void get_cpu_features(void)
281 {
282 	sljit_ui features;
283 
284 #if defined(_MSC_VER) && _MSC_VER >= 1400
285 
286 	int CPUInfo[4];
287 	__cpuid(CPUInfo, 1);
288 	features = (sljit_ui)CPUInfo[3];
289 
290 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
291 
292 	/* AT&T syntax. */
293 	__asm__ (
294 		"movl $0x1, %%eax\n"
295 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
296 		/* On x86-32, there is no red zone, so this
297 		   should work (no need for a local variable). */
298 		"push %%ebx\n"
299 #endif
300 		"cpuid\n"
301 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
302 		"pop %%ebx\n"
303 #endif
304 		"movl %%edx, %0\n"
305 		: "=g" (features)
306 		:
307 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
308 		: "%eax", "%ecx", "%edx"
309 #else
310 		: "%rax", "%rbx", "%rcx", "%rdx"
311 #endif
312 	);
313 
314 #else /* _MSC_VER && _MSC_VER >= 1400 */
315 
316 	/* Intel syntax. */
317 	__asm {
318 		mov eax, 1
319 		cpuid
320 		mov features, edx
321 	}
322 
323 #endif /* _MSC_VER && _MSC_VER >= 1400 */
324 
325 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
326 	cpu_has_sse2 = (features >> 26) & 0x1;
327 #endif
328 	cpu_has_cmov = (features >> 15) & 0x1;
329 }
330 
get_jump_code(sljit_si type)331 static sljit_ub get_jump_code(sljit_si type)
332 {
333 	switch (type) {
334 	case SLJIT_EQUAL:
335 	case SLJIT_D_EQUAL:
336 		return 0x84 /* je */;
337 
338 	case SLJIT_NOT_EQUAL:
339 	case SLJIT_D_NOT_EQUAL:
340 		return 0x85 /* jne */;
341 
342 	case SLJIT_LESS:
343 	case SLJIT_D_LESS:
344 		return 0x82 /* jc */;
345 
346 	case SLJIT_GREATER_EQUAL:
347 	case SLJIT_D_GREATER_EQUAL:
348 		return 0x83 /* jae */;
349 
350 	case SLJIT_GREATER:
351 	case SLJIT_D_GREATER:
352 		return 0x87 /* jnbe */;
353 
354 	case SLJIT_LESS_EQUAL:
355 	case SLJIT_D_LESS_EQUAL:
356 		return 0x86 /* jbe */;
357 
358 	case SLJIT_SIG_LESS:
359 		return 0x8c /* jl */;
360 
361 	case SLJIT_SIG_GREATER_EQUAL:
362 		return 0x8d /* jnl */;
363 
364 	case SLJIT_SIG_GREATER:
365 		return 0x8f /* jnle */;
366 
367 	case SLJIT_SIG_LESS_EQUAL:
368 		return 0x8e /* jle */;
369 
370 	case SLJIT_OVERFLOW:
371 	case SLJIT_MUL_OVERFLOW:
372 		return 0x80 /* jo */;
373 
374 	case SLJIT_NOT_OVERFLOW:
375 	case SLJIT_MUL_NOT_OVERFLOW:
376 		return 0x81 /* jno */;
377 
378 	case SLJIT_D_UNORDERED:
379 		return 0x8a /* jp */;
380 
381 	case SLJIT_D_ORDERED:
382 		return 0x8b /* jpo */;
383 	}
384 	return 0;
385 }
386 
387 static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_si type);
388 
389 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
390 static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_sw addr, sljit_si type);
391 #endif
392 
generate_near_jump_code(struct sljit_jump * jump,sljit_ub * code_ptr,sljit_ub * code,sljit_si type)393 static sljit_ub* generate_near_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_ub *code, sljit_si type)
394 {
395 	sljit_si short_jump;
396 	sljit_uw label_addr;
397 
398 	if (jump->flags & JUMP_LABEL)
399 		label_addr = (sljit_uw)(code + jump->u.label->size);
400 	else
401 		label_addr = jump->u.target;
402 	short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
403 
404 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
405 	if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN)
406 		return generate_far_jump_code(jump, code_ptr, type);
407 #endif
408 
409 	if (type == SLJIT_JUMP) {
410 		if (short_jump)
411 			*code_ptr++ = JMP_i8;
412 		else
413 			*code_ptr++ = JMP_i32;
414 		jump->addr++;
415 	}
416 	else if (type >= SLJIT_FAST_CALL) {
417 		short_jump = 0;
418 		*code_ptr++ = CALL_i32;
419 		jump->addr++;
420 	}
421 	else if (short_jump) {
422 		*code_ptr++ = get_jump_code(type) - 0x10;
423 		jump->addr++;
424 	}
425 	else {
426 		*code_ptr++ = GROUP_0F;
427 		*code_ptr++ = get_jump_code(type);
428 		jump->addr += 2;
429 	}
430 
431 	if (short_jump) {
432 		jump->flags |= PATCH_MB;
433 		code_ptr += sizeof(sljit_sb);
434 	} else {
435 		jump->flags |= PATCH_MW;
436 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
437 		code_ptr += sizeof(sljit_sw);
438 #else
439 		code_ptr += sizeof(sljit_si);
440 #endif
441 	}
442 
443 	return code_ptr;
444 }
445 
sljit_generate_code(struct sljit_compiler * compiler)446 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
447 {
448 	struct sljit_memory_fragment *buf;
449 	sljit_ub *code;
450 	sljit_ub *code_ptr;
451 	sljit_ub *buf_ptr;
452 	sljit_ub *buf_end;
453 	sljit_ub len;
454 
455 	struct sljit_label *label;
456 	struct sljit_jump *jump;
457 	struct sljit_const *const_;
458 
459 	CHECK_ERROR_PTR();
460 	CHECK_PTR(check_sljit_generate_code(compiler));
461 	reverse_buf(compiler);
462 
463 	/* Second code generation pass. */
464 	code = (sljit_ub*)SLJIT_MALLOC_EXEC(compiler->size);
465 	PTR_FAIL_WITH_EXEC_IF(code);
466 	buf = compiler->buf;
467 
468 	code_ptr = code;
469 	label = compiler->labels;
470 	jump = compiler->jumps;
471 	const_ = compiler->consts;
472 	do {
473 		buf_ptr = buf->memory;
474 		buf_end = buf_ptr + buf->used_size;
475 		do {
476 			len = *buf_ptr++;
477 			if (len > 0) {
478 				/* The code is already generated. */
479 				SLJIT_MEMMOVE(code_ptr, buf_ptr, len);
480 				code_ptr += len;
481 				buf_ptr += len;
482 			}
483 			else {
484 				if (*buf_ptr >= 4) {
485 					jump->addr = (sljit_uw)code_ptr;
486 					if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
487 						code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4);
488 					else
489 						code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4);
490 					jump = jump->next;
491 				}
492 				else if (*buf_ptr == 0) {
493 					label->addr = (sljit_uw)code_ptr;
494 					label->size = code_ptr - code;
495 					label = label->next;
496 				}
497 				else if (*buf_ptr == 1) {
498 					const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
499 					const_ = const_->next;
500 				}
501 				else {
502 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
503 					*code_ptr++ = (*buf_ptr == 2) ? CALL_i32 : JMP_i32;
504 					buf_ptr++;
505 					*(sljit_sw*)code_ptr = *(sljit_sw*)buf_ptr - ((sljit_sw)code_ptr + sizeof(sljit_sw));
506 					code_ptr += sizeof(sljit_sw);
507 					buf_ptr += sizeof(sljit_sw) - 1;
508 #else
509 					code_ptr = generate_fixed_jump(code_ptr, *(sljit_sw*)(buf_ptr + 1), *buf_ptr);
510 					buf_ptr += sizeof(sljit_sw);
511 #endif
512 				}
513 				buf_ptr++;
514 			}
515 		} while (buf_ptr < buf_end);
516 		SLJIT_ASSERT(buf_ptr == buf_end);
517 		buf = buf->next;
518 	} while (buf);
519 
520 	SLJIT_ASSERT(!label);
521 	SLJIT_ASSERT(!jump);
522 	SLJIT_ASSERT(!const_);
523 
524 	jump = compiler->jumps;
525 	while (jump) {
526 		if (jump->flags & PATCH_MB) {
527 			SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) <= 127);
528 			*(sljit_ub*)jump->addr = (sljit_ub)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb)));
529 		} else if (jump->flags & PATCH_MW) {
530 			if (jump->flags & JUMP_LABEL) {
531 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
532 				*(sljit_sw*)jump->addr = (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sw)));
533 #else
534 				SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) <= HALFWORD_MAX);
535 				*(sljit_si*)jump->addr = (sljit_si)(jump->u.label->addr - (jump->addr + sizeof(sljit_si)));
536 #endif
537 			}
538 			else {
539 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
540 				*(sljit_sw*)jump->addr = (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_sw)));
541 #else
542 				SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) <= HALFWORD_MAX);
543 				*(sljit_si*)jump->addr = (sljit_si)(jump->u.target - (jump->addr + sizeof(sljit_si)));
544 #endif
545 			}
546 		}
547 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
548 		else if (jump->flags & PATCH_MD)
549 			*(sljit_sw*)jump->addr = jump->u.label->addr;
550 #endif
551 
552 		jump = jump->next;
553 	}
554 
555 	/* Maybe we waste some space because of short jumps. */
556 	SLJIT_ASSERT(code_ptr <= code + compiler->size);
557 	compiler->error = SLJIT_ERR_COMPILED;
558 	compiler->executable_size = code_ptr - code;
559 	return (void*)code;
560 }
561 
562 /* --------------------------------------------------------------------- */
563 /*  Operators                                                            */
564 /* --------------------------------------------------------------------- */
565 
566 static sljit_si emit_cum_binary(struct sljit_compiler *compiler,
567 	sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
568 	sljit_si dst, sljit_sw dstw,
569 	sljit_si src1, sljit_sw src1w,
570 	sljit_si src2, sljit_sw src2w);
571 
572 static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler,
573 	sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
574 	sljit_si dst, sljit_sw dstw,
575 	sljit_si src1, sljit_sw src1w,
576 	sljit_si src2, sljit_sw src2w);
577 
578 static sljit_si emit_mov(struct sljit_compiler *compiler,
579 	sljit_si dst, sljit_sw dstw,
580 	sljit_si src, sljit_sw srcw);
581 
emit_save_flags(struct sljit_compiler * compiler)582 static SLJIT_INLINE sljit_si emit_save_flags(struct sljit_compiler *compiler)
583 {
584 	sljit_ub *inst;
585 
586 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
587 	inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
588 	FAIL_IF(!inst);
589 	INC_SIZE(5);
590 #else
591 	inst = (sljit_ub*)ensure_buf(compiler, 1 + 6);
592 	FAIL_IF(!inst);
593 	INC_SIZE(6);
594 	*inst++ = REX_W;
595 #endif
596 	*inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp + sizeof(sljit_sw)] */
597 	*inst++ = 0x64;
598 	*inst++ = 0x24;
599 	*inst++ = (sljit_ub)sizeof(sljit_sw);
600 	*inst++ = PUSHF;
601 	compiler->flags_saved = 1;
602 	return SLJIT_SUCCESS;
603 }
604 
emit_restore_flags(struct sljit_compiler * compiler,sljit_si keep_flags)605 static SLJIT_INLINE sljit_si emit_restore_flags(struct sljit_compiler *compiler, sljit_si keep_flags)
606 {
607 	sljit_ub *inst;
608 
609 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
610 	inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
611 	FAIL_IF(!inst);
612 	INC_SIZE(5);
613 	*inst++ = POPF;
614 #else
615 	inst = (sljit_ub*)ensure_buf(compiler, 1 + 6);
616 	FAIL_IF(!inst);
617 	INC_SIZE(6);
618 	*inst++ = POPF;
619 	*inst++ = REX_W;
620 #endif
621 	*inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp - sizeof(sljit_sw)] */
622 	*inst++ = 0x64;
623 	*inst++ = 0x24;
624 	*inst++ = (sljit_ub)-(sljit_sb)sizeof(sljit_sw);
625 	compiler->flags_saved = keep_flags;
626 	return SLJIT_SUCCESS;
627 }
628 
629 #ifdef _WIN32
630 #include <malloc.h>
631 
sljit_grow_stack(sljit_sw local_size)632 static void SLJIT_CALL sljit_grow_stack(sljit_sw local_size)
633 {
634 	/* Workaround for calling the internal _chkstk() function on Windows.
635 	This function touches all 4k pages belongs to the requested stack space,
636 	which size is passed in local_size. This is necessary on Windows where
637 	the stack can only grow in 4k steps. However, this function just burn
638 	CPU cycles if the stack is large enough. However, you don't know it in
639 	advance, so it must always be called. I think this is a bad design in
640 	general even if it has some reasons. */
641 	*(volatile sljit_si*)alloca(local_size) = 0;
642 }
643 
644 #endif
645 
646 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
647 #include "sljitNativeX86_32.c"
648 #else
649 #include "sljitNativeX86_64.c"
650 #endif
651 
emit_mov(struct sljit_compiler * compiler,sljit_si dst,sljit_sw dstw,sljit_si src,sljit_sw srcw)652 static sljit_si emit_mov(struct sljit_compiler *compiler,
653 	sljit_si dst, sljit_sw dstw,
654 	sljit_si src, sljit_sw srcw)
655 {
656 	sljit_ub* inst;
657 
658 	if (dst == SLJIT_UNUSED) {
659 		/* No destination, doesn't need to setup flags. */
660 		if (src & SLJIT_MEM) {
661 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
662 			FAIL_IF(!inst);
663 			*inst = MOV_r_rm;
664 		}
665 		return SLJIT_SUCCESS;
666 	}
667 	if (FAST_IS_REG(src)) {
668 		inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
669 		FAIL_IF(!inst);
670 		*inst = MOV_rm_r;
671 		return SLJIT_SUCCESS;
672 	}
673 	if (src & SLJIT_IMM) {
674 		if (FAST_IS_REG(dst)) {
675 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
676 			return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
677 #else
678 			if (!compiler->mode32) {
679 				if (NOT_HALFWORD(srcw))
680 					return emit_load_imm64(compiler, dst, srcw);
681 			}
682 			else
683 				return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw);
684 #endif
685 		}
686 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
687 		if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
688 			FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw));
689 			inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw);
690 			FAIL_IF(!inst);
691 			*inst = MOV_rm_r;
692 			return SLJIT_SUCCESS;
693 		}
694 #endif
695 		inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
696 		FAIL_IF(!inst);
697 		*inst = MOV_rm_i32;
698 		return SLJIT_SUCCESS;
699 	}
700 	if (FAST_IS_REG(dst)) {
701 		inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
702 		FAIL_IF(!inst);
703 		*inst = MOV_r_rm;
704 		return SLJIT_SUCCESS;
705 	}
706 
707 	/* Memory to memory move. Requires two instruction. */
708 	inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
709 	FAIL_IF(!inst);
710 	*inst = MOV_r_rm;
711 	inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
712 	FAIL_IF(!inst);
713 	*inst = MOV_rm_r;
714 	return SLJIT_SUCCESS;
715 }
716 
717 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
718 	FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
719 
sljit_emit_op0(struct sljit_compiler * compiler,sljit_si op)720 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op)
721 {
722 	sljit_ub *inst;
723 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
724 	sljit_si size;
725 #endif
726 
727 	CHECK_ERROR();
728 	CHECK(check_sljit_emit_op0(compiler, op));
729 
730 	switch (GET_OPCODE(op)) {
731 	case SLJIT_BREAKPOINT:
732 		inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
733 		FAIL_IF(!inst);
734 		INC_SIZE(1);
735 		*inst = INT3;
736 		break;
737 	case SLJIT_NOP:
738 		inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
739 		FAIL_IF(!inst);
740 		INC_SIZE(1);
741 		*inst = NOP;
742 		break;
743 	case SLJIT_LUMUL:
744 	case SLJIT_LSMUL:
745 	case SLJIT_LUDIV:
746 	case SLJIT_LSDIV:
747 		compiler->flags_saved = 0;
748 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
749 #ifdef _WIN64
750 		SLJIT_COMPILE_ASSERT(
751 			reg_map[SLJIT_R0] == 0
752 			&& reg_map[SLJIT_R1] == 2
753 			&& reg_map[TMP_REG1] > 7,
754 			invalid_register_assignment_for_div_mul);
755 #else
756 		SLJIT_COMPILE_ASSERT(
757 			reg_map[SLJIT_R0] == 0
758 			&& reg_map[SLJIT_R1] < 7
759 			&& reg_map[TMP_REG1] == 2,
760 			invalid_register_assignment_for_div_mul);
761 #endif
762 		compiler->mode32 = op & SLJIT_INT_OP;
763 #endif
764 
765 		op = GET_OPCODE(op);
766 		if (op == SLJIT_LUDIV) {
767 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
768 			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
769 			inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0);
770 #else
771 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
772 #endif
773 			FAIL_IF(!inst);
774 			*inst = XOR_r_rm;
775 		}
776 
777 		if (op == SLJIT_LSDIV) {
778 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
779 			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
780 #endif
781 
782 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
783 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
784 			FAIL_IF(!inst);
785 			INC_SIZE(1);
786 			*inst = CDQ;
787 #else
788 			if (compiler->mode32) {
789 				inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
790 				FAIL_IF(!inst);
791 				INC_SIZE(1);
792 				*inst = CDQ;
793 			} else {
794 				inst = (sljit_ub*)ensure_buf(compiler, 1 + 2);
795 				FAIL_IF(!inst);
796 				INC_SIZE(2);
797 				*inst++ = REX_W;
798 				*inst = CDQ;
799 			}
800 #endif
801 		}
802 
803 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
804 		inst = (sljit_ub*)ensure_buf(compiler, 1 + 2);
805 		FAIL_IF(!inst);
806 		INC_SIZE(2);
807 		*inst++ = GROUP_F7;
808 		*inst = MOD_REG | ((op >= SLJIT_LUDIV) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);
809 #else
810 #ifdef _WIN64
811 		size = (!compiler->mode32 || op >= SLJIT_LUDIV) ? 3 : 2;
812 #else
813 		size = (!compiler->mode32) ? 3 : 2;
814 #endif
815 		inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
816 		FAIL_IF(!inst);
817 		INC_SIZE(size);
818 #ifdef _WIN64
819 		if (!compiler->mode32)
820 			*inst++ = REX_W | ((op >= SLJIT_LUDIV) ? REX_B : 0);
821 		else if (op >= SLJIT_LUDIV)
822 			*inst++ = REX_B;
823 		*inst++ = GROUP_F7;
824 		*inst = MOD_REG | ((op >= SLJIT_LUDIV) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);
825 #else
826 		if (!compiler->mode32)
827 			*inst++ = REX_W;
828 		*inst++ = GROUP_F7;
829 		*inst = MOD_REG | reg_map[SLJIT_R1];
830 #endif
831 #endif
832 		switch (op) {
833 		case SLJIT_LUMUL:
834 			*inst |= MUL;
835 			break;
836 		case SLJIT_LSMUL:
837 			*inst |= IMUL;
838 			break;
839 		case SLJIT_LUDIV:
840 			*inst |= DIV;
841 			break;
842 		case SLJIT_LSDIV:
843 			*inst |= IDIV;
844 			break;
845 		}
846 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
847 		EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
848 #endif
849 		break;
850 	}
851 
852 	return SLJIT_SUCCESS;
853 }
854 
855 #define ENCODE_PREFIX(prefix) \
856 	do { \
857 		inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); \
858 		FAIL_IF(!inst); \
859 		INC_SIZE(1); \
860 		*inst = (prefix); \
861 	} while (0)
862 
emit_mov_byte(struct sljit_compiler * compiler,sljit_si sign,sljit_si dst,sljit_sw dstw,sljit_si src,sljit_sw srcw)863 static sljit_si emit_mov_byte(struct sljit_compiler *compiler, sljit_si sign,
864 	sljit_si dst, sljit_sw dstw,
865 	sljit_si src, sljit_sw srcw)
866 {
867 	sljit_ub* inst;
868 	sljit_si dst_r;
869 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
870 	sljit_si work_r;
871 #endif
872 
873 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
874 	compiler->mode32 = 0;
875 #endif
876 
877 	if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
878 		return SLJIT_SUCCESS; /* Empty instruction. */
879 
880 	if (src & SLJIT_IMM) {
881 		if (FAST_IS_REG(dst)) {
882 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
883 			return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
884 #else
885 			inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
886 			FAIL_IF(!inst);
887 			*inst = MOV_rm_i32;
888 			return SLJIT_SUCCESS;
889 #endif
890 		}
891 		inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
892 		FAIL_IF(!inst);
893 		*inst = MOV_rm8_i8;
894 		return SLJIT_SUCCESS;
895 	}
896 
897 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
898 
899 	if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) {
900 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
901 		if (reg_map[src] >= 4) {
902 			SLJIT_ASSERT(dst_r == TMP_REG1);
903 			EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
904 		} else
905 			dst_r = src;
906 #else
907 		dst_r = src;
908 #endif
909 	}
910 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
911 	else if (FAST_IS_REG(src) && reg_map[src] >= 4) {
912 		/* src, dst are registers. */
913 		SLJIT_ASSERT(SLOW_IS_REG(dst));
914 		if (reg_map[dst] < 4) {
915 			if (dst != src)
916 				EMIT_MOV(compiler, dst, 0, src, 0);
917 			inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
918 			FAIL_IF(!inst);
919 			*inst++ = GROUP_0F;
920 			*inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
921 		}
922 		else {
923 			if (dst != src)
924 				EMIT_MOV(compiler, dst, 0, src, 0);
925 			if (sign) {
926 				/* shl reg, 24 */
927 				inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
928 				FAIL_IF(!inst);
929 				*inst |= SHL;
930 				/* sar reg, 24 */
931 				inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
932 				FAIL_IF(!inst);
933 				*inst |= SAR;
934 			}
935 			else {
936 				inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
937 				FAIL_IF(!inst);
938 				*(inst + 1) |= AND;
939 			}
940 		}
941 		return SLJIT_SUCCESS;
942 	}
943 #endif
944 	else {
945 		/* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
946 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
947 		FAIL_IF(!inst);
948 		*inst++ = GROUP_0F;
949 		*inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
950 	}
951 
952 	if (dst & SLJIT_MEM) {
953 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
954 		if (dst_r == TMP_REG1) {
955 			/* Find a non-used register, whose reg_map[src] < 4. */
956 			if ((dst & REG_MASK) == SLJIT_R0) {
957 				if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1))
958 					work_r = SLJIT_R2;
959 				else
960 					work_r = SLJIT_R1;
961 			}
962 			else {
963 				if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
964 					work_r = SLJIT_R0;
965 				else if ((dst & REG_MASK) == SLJIT_R1)
966 					work_r = SLJIT_R2;
967 				else
968 					work_r = SLJIT_R1;
969 			}
970 
971 			if (work_r == SLJIT_R0) {
972 				ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
973 			}
974 			else {
975 				inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
976 				FAIL_IF(!inst);
977 				*inst = XCHG_r_rm;
978 			}
979 
980 			inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
981 			FAIL_IF(!inst);
982 			*inst = MOV_rm8_r8;
983 
984 			if (work_r == SLJIT_R0) {
985 				ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
986 			}
987 			else {
988 				inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
989 				FAIL_IF(!inst);
990 				*inst = XCHG_r_rm;
991 			}
992 		}
993 		else {
994 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
995 			FAIL_IF(!inst);
996 			*inst = MOV_rm8_r8;
997 		}
998 #else
999 		inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
1000 		FAIL_IF(!inst);
1001 		*inst = MOV_rm8_r8;
1002 #endif
1003 	}
1004 
1005 	return SLJIT_SUCCESS;
1006 }
1007 
emit_mov_half(struct sljit_compiler * compiler,sljit_si sign,sljit_si dst,sljit_sw dstw,sljit_si src,sljit_sw srcw)1008 static sljit_si emit_mov_half(struct sljit_compiler *compiler, sljit_si sign,
1009 	sljit_si dst, sljit_sw dstw,
1010 	sljit_si src, sljit_sw srcw)
1011 {
1012 	sljit_ub* inst;
1013 	sljit_si dst_r;
1014 
1015 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1016 	compiler->mode32 = 0;
1017 #endif
1018 
1019 	if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
1020 		return SLJIT_SUCCESS; /* Empty instruction. */
1021 
1022 	if (src & SLJIT_IMM) {
1023 		if (FAST_IS_REG(dst)) {
1024 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1025 			return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
1026 #else
1027 			inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
1028 			FAIL_IF(!inst);
1029 			*inst = MOV_rm_i32;
1030 			return SLJIT_SUCCESS;
1031 #endif
1032 		}
1033 		inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
1034 		FAIL_IF(!inst);
1035 		*inst = MOV_rm_i32;
1036 		return SLJIT_SUCCESS;
1037 	}
1038 
1039 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1040 
1041 	if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
1042 		dst_r = src;
1043 	else {
1044 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
1045 		FAIL_IF(!inst);
1046 		*inst++ = GROUP_0F;
1047 		*inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16;
1048 	}
1049 
1050 	if (dst & SLJIT_MEM) {
1051 		inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
1052 		FAIL_IF(!inst);
1053 		*inst = MOV_rm_r;
1054 	}
1055 
1056 	return SLJIT_SUCCESS;
1057 }
1058 
emit_unary(struct sljit_compiler * compiler,sljit_ub opcode,sljit_si dst,sljit_sw dstw,sljit_si src,sljit_sw srcw)1059 static sljit_si emit_unary(struct sljit_compiler *compiler, sljit_ub opcode,
1060 	sljit_si dst, sljit_sw dstw,
1061 	sljit_si src, sljit_sw srcw)
1062 {
1063 	sljit_ub* inst;
1064 
1065 	if (dst == SLJIT_UNUSED) {
1066 		EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1067 		inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1068 		FAIL_IF(!inst);
1069 		*inst++ = GROUP_F7;
1070 		*inst |= opcode;
1071 		return SLJIT_SUCCESS;
1072 	}
1073 	if (dst == src && dstw == srcw) {
1074 		/* Same input and output */
1075 		inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1076 		FAIL_IF(!inst);
1077 		*inst++ = GROUP_F7;
1078 		*inst |= opcode;
1079 		return SLJIT_SUCCESS;
1080 	}
1081 	if (FAST_IS_REG(dst)) {
1082 		EMIT_MOV(compiler, dst, 0, src, srcw);
1083 		inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1084 		FAIL_IF(!inst);
1085 		*inst++ = GROUP_F7;
1086 		*inst |= opcode;
1087 		return SLJIT_SUCCESS;
1088 	}
1089 	EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1090 	inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1091 	FAIL_IF(!inst);
1092 	*inst++ = GROUP_F7;
1093 	*inst |= opcode;
1094 	EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1095 	return SLJIT_SUCCESS;
1096 }
1097 
emit_not_with_flags(struct sljit_compiler * compiler,sljit_si dst,sljit_sw dstw,sljit_si src,sljit_sw srcw)1098 static sljit_si emit_not_with_flags(struct sljit_compiler *compiler,
1099 	sljit_si dst, sljit_sw dstw,
1100 	sljit_si src, sljit_sw srcw)
1101 {
1102 	sljit_ub* inst;
1103 
1104 	if (dst == SLJIT_UNUSED) {
1105 		EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1106 		inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1107 		FAIL_IF(!inst);
1108 		*inst++ = GROUP_F7;
1109 		*inst |= NOT_rm;
1110 		inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1111 		FAIL_IF(!inst);
1112 		*inst = OR_r_rm;
1113 		return SLJIT_SUCCESS;
1114 	}
1115 	if (FAST_IS_REG(dst)) {
1116 		EMIT_MOV(compiler, dst, 0, src, srcw);
1117 		inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1118 		FAIL_IF(!inst);
1119 		*inst++ = GROUP_F7;
1120 		*inst |= NOT_rm;
1121 		inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
1122 		FAIL_IF(!inst);
1123 		*inst = OR_r_rm;
1124 		return SLJIT_SUCCESS;
1125 	}
1126 	EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1127 	inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1128 	FAIL_IF(!inst);
1129 	*inst++ = GROUP_F7;
1130 	*inst |= NOT_rm;
1131 	inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1132 	FAIL_IF(!inst);
1133 	*inst = OR_r_rm;
1134 	EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1135 	return SLJIT_SUCCESS;
1136 }
1137 
emit_clz(struct sljit_compiler * compiler,sljit_si op_flags,sljit_si dst,sljit_sw dstw,sljit_si src,sljit_sw srcw)1138 static sljit_si emit_clz(struct sljit_compiler *compiler, sljit_si op_flags,
1139 	sljit_si dst, sljit_sw dstw,
1140 	sljit_si src, sljit_sw srcw)
1141 {
1142 	sljit_ub* inst;
1143 	sljit_si dst_r;
1144 
1145 	SLJIT_UNUSED_ARG(op_flags);
1146 	if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
1147 		/* Just set the zero flag. */
1148 		EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1149 		inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1150 		FAIL_IF(!inst);
1151 		*inst++ = GROUP_F7;
1152 		*inst |= NOT_rm;
1153 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1154 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REG1, 0);
1155 #else
1156 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, TMP_REG1, 0);
1157 #endif
1158 		FAIL_IF(!inst);
1159 		*inst |= SHR;
1160 		return SLJIT_SUCCESS;
1161 	}
1162 
1163 	if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
1164 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
1165 		src = TMP_REG1;
1166 		srcw = 0;
1167 	}
1168 
1169 	inst = emit_x86_instruction(compiler, 2, TMP_REG1, 0, src, srcw);
1170 	FAIL_IF(!inst);
1171 	*inst++ = GROUP_0F;
1172 	*inst = BSR_r_rm;
1173 
1174 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1175 	if (FAST_IS_REG(dst))
1176 		dst_r = dst;
1177 	else {
1178 		/* Find an unused temporary register. */
1179 		if ((dst & REG_MASK) != SLJIT_R0 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
1180 			dst_r = SLJIT_R0;
1181 		else if ((dst & REG_MASK) != SLJIT_R1 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R1))
1182 			dst_r = SLJIT_R1;
1183 		else
1184 			dst_r = SLJIT_R2;
1185 		EMIT_MOV(compiler, dst, dstw, dst_r, 0);
1186 	}
1187 	EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31);
1188 #else
1189 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
1190 	compiler->mode32 = 0;
1191 	EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 64 + 63 : 32 + 31);
1192 	compiler->mode32 = op_flags & SLJIT_INT_OP;
1193 #endif
1194 
1195 	if (cpu_has_cmov == -1)
1196 		get_cpu_features();
1197 
1198 	if (cpu_has_cmov) {
1199 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
1200 		FAIL_IF(!inst);
1201 		*inst++ = GROUP_0F;
1202 		*inst = CMOVNE_r_rm;
1203 	} else {
1204 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1205 		inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1206 		FAIL_IF(!inst);
1207 		INC_SIZE(4);
1208 
1209 		*inst++ = JE_i8;
1210 		*inst++ = 2;
1211 		*inst++ = MOV_r_rm;
1212 		*inst++ = MOD_REG | (reg_map[dst_r] << 3) | reg_map[TMP_REG1];
1213 #else
1214 		inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
1215 		FAIL_IF(!inst);
1216 		INC_SIZE(5);
1217 
1218 		*inst++ = JE_i8;
1219 		*inst++ = 3;
1220 		*inst++ = REX_W | (reg_map[dst_r] >= 8 ? REX_R : 0) | (reg_map[TMP_REG1] >= 8 ? REX_B : 0);
1221 		*inst++ = MOV_r_rm;
1222 		*inst++ = MOD_REG | (reg_lmap[dst_r] << 3) | reg_lmap[TMP_REG1];
1223 #endif
1224 	}
1225 
1226 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1227 	inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
1228 #else
1229 	inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, dst_r, 0);
1230 #endif
1231 	FAIL_IF(!inst);
1232 	*(inst + 1) |= XOR;
1233 
1234 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1235 	if (dst & SLJIT_MEM) {
1236 		inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1237 		FAIL_IF(!inst);
1238 		*inst = XCHG_r_rm;
1239 	}
1240 #else
1241 	if (dst & SLJIT_MEM)
1242 		EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0);
1243 #endif
1244 	return SLJIT_SUCCESS;
1245 }
1246 
sljit_emit_op1(struct sljit_compiler * compiler,sljit_si op,sljit_si dst,sljit_sw dstw,sljit_si src,sljit_sw srcw)1247 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op,
1248 	sljit_si dst, sljit_sw dstw,
1249 	sljit_si src, sljit_sw srcw)
1250 {
1251 	sljit_ub* inst;
1252 	sljit_si update = 0;
1253 	sljit_si op_flags = GET_ALL_FLAGS(op);
1254 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1255 	sljit_si dst_is_ereg = 0;
1256 	sljit_si src_is_ereg = 0;
1257 #else
1258 #	define src_is_ereg 0
1259 #endif
1260 
1261 	CHECK_ERROR();
1262 	CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1263 	ADJUST_LOCAL_OFFSET(dst, dstw);
1264 	ADJUST_LOCAL_OFFSET(src, srcw);
1265 
1266 	CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
1267 	CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1);
1268 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1269 	compiler->mode32 = op_flags & SLJIT_INT_OP;
1270 #endif
1271 
1272 	op = GET_OPCODE(op);
1273 	if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) {
1274 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1275 		compiler->mode32 = 0;
1276 #endif
1277 
1278 		if (op_flags & SLJIT_INT_OP) {
1279 			if (FAST_IS_REG(src) && src == dst) {
1280 				if (!TYPE_CAST_NEEDED(op))
1281 					return SLJIT_SUCCESS;
1282 			}
1283 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1284 			if (op == SLJIT_MOV_SI && (src & SLJIT_MEM))
1285 				op = SLJIT_MOV_UI;
1286 			if (op == SLJIT_MOVU_SI && (src & SLJIT_MEM))
1287 				op = SLJIT_MOVU_UI;
1288 			if (op == SLJIT_MOV_UI && (src & SLJIT_IMM))
1289 				op = SLJIT_MOV_SI;
1290 			if (op == SLJIT_MOVU_UI && (src & SLJIT_IMM))
1291 				op = SLJIT_MOVU_SI;
1292 #endif
1293 		}
1294 
1295 		SLJIT_COMPILE_ASSERT(SLJIT_MOV + 8 == SLJIT_MOVU, movu_offset);
1296 		if (op >= SLJIT_MOVU) {
1297 			update = 1;
1298 			op -= 8;
1299 		}
1300 
1301 		if (src & SLJIT_IMM) {
1302 			switch (op) {
1303 			case SLJIT_MOV_UB:
1304 				srcw = (sljit_ub)srcw;
1305 				break;
1306 			case SLJIT_MOV_SB:
1307 				srcw = (sljit_sb)srcw;
1308 				break;
1309 			case SLJIT_MOV_UH:
1310 				srcw = (sljit_uh)srcw;
1311 				break;
1312 			case SLJIT_MOV_SH:
1313 				srcw = (sljit_sh)srcw;
1314 				break;
1315 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1316 			case SLJIT_MOV_UI:
1317 				srcw = (sljit_ui)srcw;
1318 				break;
1319 			case SLJIT_MOV_SI:
1320 				srcw = (sljit_si)srcw;
1321 				break;
1322 #endif
1323 			}
1324 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1325 			if (SLJIT_UNLIKELY(dst_is_ereg))
1326 				return emit_mov(compiler, dst, dstw, src, srcw);
1327 #endif
1328 		}
1329 
1330 		if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & REG_MASK) && (srcw != 0 || (src & OFFS_REG_MASK) != 0)) {
1331 			inst = emit_x86_instruction(compiler, 1, src & REG_MASK, 0, src, srcw);
1332 			FAIL_IF(!inst);
1333 			*inst = LEA_r_m;
1334 			src &= SLJIT_MEM | 0xf;
1335 			srcw = 0;
1336 		}
1337 
1338 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1339 		if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
1340 			SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));
1341 			dst = TMP_REG1;
1342 		}
1343 #endif
1344 
1345 		switch (op) {
1346 		case SLJIT_MOV:
1347 		case SLJIT_MOV_P:
1348 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1349 		case SLJIT_MOV_UI:
1350 		case SLJIT_MOV_SI:
1351 #endif
1352 			FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
1353 			break;
1354 		case SLJIT_MOV_UB:
1355 			FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));
1356 			break;
1357 		case SLJIT_MOV_SB:
1358 			FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));
1359 			break;
1360 		case SLJIT_MOV_UH:
1361 			FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));
1362 			break;
1363 		case SLJIT_MOV_SH:
1364 			FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));
1365 			break;
1366 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1367 		case SLJIT_MOV_UI:
1368 			FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));
1369 			break;
1370 		case SLJIT_MOV_SI:
1371 			FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));
1372 			break;
1373 #endif
1374 		}
1375 
1376 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1377 		if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
1378 			return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);
1379 #endif
1380 
1381 		if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK) && (dstw != 0 || (dst & OFFS_REG_MASK) != 0)) {
1382 			inst = emit_x86_instruction(compiler, 1, dst & REG_MASK, 0, dst, dstw);
1383 			FAIL_IF(!inst);
1384 			*inst = LEA_r_m;
1385 		}
1386 		return SLJIT_SUCCESS;
1387 	}
1388 
1389 	if (SLJIT_UNLIKELY(GET_FLAGS(op_flags)))
1390 		compiler->flags_saved = 0;
1391 
1392 	switch (op) {
1393 	case SLJIT_NOT:
1394 		if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_E))
1395 			return emit_not_with_flags(compiler, dst, dstw, src, srcw);
1396 		return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
1397 
1398 	case SLJIT_NEG:
1399 		if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1400 			FAIL_IF(emit_save_flags(compiler));
1401 		return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw);
1402 
1403 	case SLJIT_CLZ:
1404 		if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1405 			FAIL_IF(emit_save_flags(compiler));
1406 		return emit_clz(compiler, op_flags, dst, dstw, src, srcw);
1407 	}
1408 
1409 	return SLJIT_SUCCESS;
1410 
1411 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1412 #	undef src_is_ereg
1413 #endif
1414 }
1415 
1416 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1417 
1418 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1419 	if (IS_HALFWORD(immw) || compiler->mode32) { \
1420 		inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1421 		FAIL_IF(!inst); \
1422 		*(inst + 1) |= (op_imm); \
1423 	} \
1424 	else { \
1425 		FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \
1426 		inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \
1427 		FAIL_IF(!inst); \
1428 		*inst = (op_mr); \
1429 	}
1430 
1431 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1432 	FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
1433 
1434 #else
1435 
1436 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1437 	inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1438 	FAIL_IF(!inst); \
1439 	*(inst + 1) |= (op_imm);
1440 
1441 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1442 	FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
1443 
1444 #endif
1445 
emit_cum_binary(struct sljit_compiler * compiler,sljit_ub op_rm,sljit_ub op_mr,sljit_ub op_imm,sljit_ub op_eax_imm,sljit_si dst,sljit_sw dstw,sljit_si src1,sljit_sw src1w,sljit_si src2,sljit_sw src2w)1446 static sljit_si emit_cum_binary(struct sljit_compiler *compiler,
1447 	sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
1448 	sljit_si dst, sljit_sw dstw,
1449 	sljit_si src1, sljit_sw src1w,
1450 	sljit_si src2, sljit_sw src2w)
1451 {
1452 	sljit_ub* inst;
1453 
1454 	if (dst == SLJIT_UNUSED) {
1455 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1456 		if (src2 & SLJIT_IMM) {
1457 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1458 		}
1459 		else {
1460 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1461 			FAIL_IF(!inst);
1462 			*inst = op_rm;
1463 		}
1464 		return SLJIT_SUCCESS;
1465 	}
1466 
1467 	if (dst == src1 && dstw == src1w) {
1468 		if (src2 & SLJIT_IMM) {
1469 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1470 			if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1471 #else
1472 			if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1473 #endif
1474 				BINARY_EAX_IMM(op_eax_imm, src2w);
1475 			}
1476 			else {
1477 				BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1478 			}
1479 		}
1480 		else if (FAST_IS_REG(dst)) {
1481 			inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1482 			FAIL_IF(!inst);
1483 			*inst = op_rm;
1484 		}
1485 		else if (FAST_IS_REG(src2)) {
1486 			/* Special exception for sljit_emit_op_flags. */
1487 			inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1488 			FAIL_IF(!inst);
1489 			*inst = op_mr;
1490 		}
1491 		else {
1492 			EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1493 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1494 			FAIL_IF(!inst);
1495 			*inst = op_mr;
1496 		}
1497 		return SLJIT_SUCCESS;
1498 	}
1499 
1500 	/* Only for cumulative operations. */
1501 	if (dst == src2 && dstw == src2w) {
1502 		if (src1 & SLJIT_IMM) {
1503 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1504 			if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1505 #else
1506 			if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) {
1507 #endif
1508 				BINARY_EAX_IMM(op_eax_imm, src1w);
1509 			}
1510 			else {
1511 				BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
1512 			}
1513 		}
1514 		else if (FAST_IS_REG(dst)) {
1515 			inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
1516 			FAIL_IF(!inst);
1517 			*inst = op_rm;
1518 		}
1519 		else if (FAST_IS_REG(src1)) {
1520 			inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
1521 			FAIL_IF(!inst);
1522 			*inst = op_mr;
1523 		}
1524 		else {
1525 			EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1526 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1527 			FAIL_IF(!inst);
1528 			*inst = op_mr;
1529 		}
1530 		return SLJIT_SUCCESS;
1531 	}
1532 
1533 	/* General version. */
1534 	if (FAST_IS_REG(dst)) {
1535 		EMIT_MOV(compiler, dst, 0, src1, src1w);
1536 		if (src2 & SLJIT_IMM) {
1537 			BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1538 		}
1539 		else {
1540 			inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1541 			FAIL_IF(!inst);
1542 			*inst = op_rm;
1543 		}
1544 	}
1545 	else {
1546 		/* This version requires less memory writing. */
1547 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1548 		if (src2 & SLJIT_IMM) {
1549 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1550 		}
1551 		else {
1552 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1553 			FAIL_IF(!inst);
1554 			*inst = op_rm;
1555 		}
1556 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1557 	}
1558 
1559 	return SLJIT_SUCCESS;
1560 }
1561 
1562 static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler,
1563 	sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
1564 	sljit_si dst, sljit_sw dstw,
1565 	sljit_si src1, sljit_sw src1w,
1566 	sljit_si src2, sljit_sw src2w)
1567 {
1568 	sljit_ub* inst;
1569 
1570 	if (dst == SLJIT_UNUSED) {
1571 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1572 		if (src2 & SLJIT_IMM) {
1573 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1574 		}
1575 		else {
1576 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1577 			FAIL_IF(!inst);
1578 			*inst = op_rm;
1579 		}
1580 		return SLJIT_SUCCESS;
1581 	}
1582 
1583 	if (dst == src1 && dstw == src1w) {
1584 		if (src2 & SLJIT_IMM) {
1585 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1586 			if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1587 #else
1588 			if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1589 #endif
1590 				BINARY_EAX_IMM(op_eax_imm, src2w);
1591 			}
1592 			else {
1593 				BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1594 			}
1595 		}
1596 		else if (FAST_IS_REG(dst)) {
1597 			inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1598 			FAIL_IF(!inst);
1599 			*inst = op_rm;
1600 		}
1601 		else if (FAST_IS_REG(src2)) {
1602 			inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1603 			FAIL_IF(!inst);
1604 			*inst = op_mr;
1605 		}
1606 		else {
1607 			EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1608 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1609 			FAIL_IF(!inst);
1610 			*inst = op_mr;
1611 		}
1612 		return SLJIT_SUCCESS;
1613 	}
1614 
1615 	/* General version. */
1616 	if (FAST_IS_REG(dst) && dst != src2) {
1617 		EMIT_MOV(compiler, dst, 0, src1, src1w);
1618 		if (src2 & SLJIT_IMM) {
1619 			BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1620 		}
1621 		else {
1622 			inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1623 			FAIL_IF(!inst);
1624 			*inst = op_rm;
1625 		}
1626 	}
1627 	else {
1628 		/* This version requires less memory writing. */
1629 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1630 		if (src2 & SLJIT_IMM) {
1631 			BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1632 		}
1633 		else {
1634 			inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1635 			FAIL_IF(!inst);
1636 			*inst = op_rm;
1637 		}
1638 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1639 	}
1640 
1641 	return SLJIT_SUCCESS;
1642 }
1643 
1644 static sljit_si emit_mul(struct sljit_compiler *compiler,
1645 	sljit_si dst, sljit_sw dstw,
1646 	sljit_si src1, sljit_sw src1w,
1647 	sljit_si src2, sljit_sw src2w)
1648 {
1649 	sljit_ub* inst;
1650 	sljit_si dst_r;
1651 
1652 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1653 
1654 	/* Register destination. */
1655 	if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
1656 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1657 		FAIL_IF(!inst);
1658 		*inst++ = GROUP_0F;
1659 		*inst = IMUL_r_rm;
1660 	}
1661 	else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
1662 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
1663 		FAIL_IF(!inst);
1664 		*inst++ = GROUP_0F;
1665 		*inst = IMUL_r_rm;
1666 	}
1667 	else if (src1 & SLJIT_IMM) {
1668 		if (src2 & SLJIT_IMM) {
1669 			EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
1670 			src2 = dst_r;
1671 			src2w = 0;
1672 		}
1673 
1674 		if (src1w <= 127 && src1w >= -128) {
1675 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1676 			FAIL_IF(!inst);
1677 			*inst = IMUL_r_rm_i8;
1678 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
1679 			FAIL_IF(!inst);
1680 			INC_SIZE(1);
1681 			*inst = (sljit_sb)src1w;
1682 		}
1683 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1684 		else {
1685 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1686 			FAIL_IF(!inst);
1687 			*inst = IMUL_r_rm_i32;
1688 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1689 			FAIL_IF(!inst);
1690 			INC_SIZE(4);
1691 			*(sljit_sw*)inst = src1w;
1692 		}
1693 #else
1694 		else if (IS_HALFWORD(src1w)) {
1695 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1696 			FAIL_IF(!inst);
1697 			*inst = IMUL_r_rm_i32;
1698 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1699 			FAIL_IF(!inst);
1700 			INC_SIZE(4);
1701 			*(sljit_si*)inst = (sljit_si)src1w;
1702 		}
1703 		else {
1704 			EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
1705 			if (dst_r != src2)
1706 				EMIT_MOV(compiler, dst_r, 0, src2, src2w);
1707 			inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1708 			FAIL_IF(!inst);
1709 			*inst++ = GROUP_0F;
1710 			*inst = IMUL_r_rm;
1711 		}
1712 #endif
1713 	}
1714 	else if (src2 & SLJIT_IMM) {
1715 		/* Note: src1 is NOT immediate. */
1716 
1717 		if (src2w <= 127 && src2w >= -128) {
1718 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1719 			FAIL_IF(!inst);
1720 			*inst = IMUL_r_rm_i8;
1721 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
1722 			FAIL_IF(!inst);
1723 			INC_SIZE(1);
1724 			*inst = (sljit_sb)src2w;
1725 		}
1726 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1727 		else {
1728 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1729 			FAIL_IF(!inst);
1730 			*inst = IMUL_r_rm_i32;
1731 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1732 			FAIL_IF(!inst);
1733 			INC_SIZE(4);
1734 			*(sljit_sw*)inst = src2w;
1735 		}
1736 #else
1737 		else if (IS_HALFWORD(src2w)) {
1738 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1739 			FAIL_IF(!inst);
1740 			*inst = IMUL_r_rm_i32;
1741 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1742 			FAIL_IF(!inst);
1743 			INC_SIZE(4);
1744 			*(sljit_si*)inst = (sljit_si)src2w;
1745 		}
1746 		else {
1747 			EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src2w);
1748 			if (dst_r != src1)
1749 				EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1750 			inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1751 			FAIL_IF(!inst);
1752 			*inst++ = GROUP_0F;
1753 			*inst = IMUL_r_rm;
1754 		}
1755 #endif
1756 	}
1757 	else {
1758 		/* Neither argument is immediate. */
1759 		if (ADDRESSING_DEPENDS_ON(src2, dst_r))
1760 			dst_r = TMP_REG1;
1761 		EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1762 		inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1763 		FAIL_IF(!inst);
1764 		*inst++ = GROUP_0F;
1765 		*inst = IMUL_r_rm;
1766 	}
1767 
1768 	if (dst_r == TMP_REG1)
1769 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1770 
1771 	return SLJIT_SUCCESS;
1772 }
1773 
1774 static sljit_si emit_lea_binary(struct sljit_compiler *compiler, sljit_si keep_flags,
1775 	sljit_si dst, sljit_sw dstw,
1776 	sljit_si src1, sljit_sw src1w,
1777 	sljit_si src2, sljit_sw src2w)
1778 {
1779 	sljit_ub* inst;
1780 	sljit_si dst_r, done = 0;
1781 
1782 	/* These cases better be left to handled by normal way. */
1783 	if (!keep_flags) {
1784 		if (dst == src1 && dstw == src1w)
1785 			return SLJIT_ERR_UNSUPPORTED;
1786 		if (dst == src2 && dstw == src2w)
1787 			return SLJIT_ERR_UNSUPPORTED;
1788 	}
1789 
1790 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1791 
1792 	if (FAST_IS_REG(src1)) {
1793 		if (FAST_IS_REG(src2)) {
1794 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
1795 			FAIL_IF(!inst);
1796 			*inst = LEA_r_m;
1797 			done = 1;
1798 		}
1799 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1800 		if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1801 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_si)src2w);
1802 #else
1803 		if (src2 & SLJIT_IMM) {
1804 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
1805 #endif
1806 			FAIL_IF(!inst);
1807 			*inst = LEA_r_m;
1808 			done = 1;
1809 		}
1810 	}
1811 	else if (FAST_IS_REG(src2)) {
1812 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1813 		if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1814 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_si)src1w);
1815 #else
1816 		if (src1 & SLJIT_IMM) {
1817 			inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
1818 #endif
1819 			FAIL_IF(!inst);
1820 			*inst = LEA_r_m;
1821 			done = 1;
1822 		}
1823 	}
1824 
1825 	if (done) {
1826 		if (dst_r == TMP_REG1)
1827 			return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
1828 		return SLJIT_SUCCESS;
1829 	}
1830 	return SLJIT_ERR_UNSUPPORTED;
1831 }
1832 
1833 static sljit_si emit_cmp_binary(struct sljit_compiler *compiler,
1834 	sljit_si src1, sljit_sw src1w,
1835 	sljit_si src2, sljit_sw src2w)
1836 {
1837 	sljit_ub* inst;
1838 
1839 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1840 	if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1841 #else
1842 	if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1843 #endif
1844 		BINARY_EAX_IMM(CMP_EAX_i32, src2w);
1845 		return SLJIT_SUCCESS;
1846 	}
1847 
1848 	if (FAST_IS_REG(src1)) {
1849 		if (src2 & SLJIT_IMM) {
1850 			BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
1851 		}
1852 		else {
1853 			inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1854 			FAIL_IF(!inst);
1855 			*inst = CMP_r_rm;
1856 		}
1857 		return SLJIT_SUCCESS;
1858 	}
1859 
1860 	if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) {
1861 		inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1862 		FAIL_IF(!inst);
1863 		*inst = CMP_rm_r;
1864 		return SLJIT_SUCCESS;
1865 	}
1866 
1867 	if (src2 & SLJIT_IMM) {
1868 		if (src1 & SLJIT_IMM) {
1869 			EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1870 			src1 = TMP_REG1;
1871 			src1w = 0;
1872 		}
1873 		BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);
1874 	}
1875 	else {
1876 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1877 		inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1878 		FAIL_IF(!inst);
1879 		*inst = CMP_r_rm;
1880 	}
1881 	return SLJIT_SUCCESS;
1882 }
1883 
1884 static sljit_si emit_test_binary(struct sljit_compiler *compiler,
1885 	sljit_si src1, sljit_sw src1w,
1886 	sljit_si src2, sljit_sw src2w)
1887 {
1888 	sljit_ub* inst;
1889 
1890 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1891 	if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1892 #else
1893 	if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1894 #endif
1895 		BINARY_EAX_IMM(TEST_EAX_i32, src2w);
1896 		return SLJIT_SUCCESS;
1897 	}
1898 
1899 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1900 	if (src2 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1901 #else
1902 	if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
1903 #endif
1904 		BINARY_EAX_IMM(TEST_EAX_i32, src1w);
1905 		return SLJIT_SUCCESS;
1906 	}
1907 
1908 	if (FAST_IS_REG(src1)) {
1909 		if (src2 & SLJIT_IMM) {
1910 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1911 			if (IS_HALFWORD(src2w) || compiler->mode32) {
1912 				inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
1913 				FAIL_IF(!inst);
1914 				*inst = GROUP_F7;
1915 			}
1916 			else {
1917 				FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1918 				inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, 0);
1919 				FAIL_IF(!inst);
1920 				*inst = TEST_rm_r;
1921 			}
1922 #else
1923 			inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
1924 			FAIL_IF(!inst);
1925 			*inst = GROUP_F7;
1926 #endif
1927 		}
1928 		else {
1929 			inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1930 			FAIL_IF(!inst);
1931 			*inst = TEST_rm_r;
1932 		}
1933 		return SLJIT_SUCCESS;
1934 	}
1935 
1936 	if (FAST_IS_REG(src2)) {
1937 		if (src1 & SLJIT_IMM) {
1938 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1939 			if (IS_HALFWORD(src1w) || compiler->mode32) {
1940 				inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, 0);
1941 				FAIL_IF(!inst);
1942 				*inst = GROUP_F7;
1943 			}
1944 			else {
1945 				FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
1946 				inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, 0);
1947 				FAIL_IF(!inst);
1948 				*inst = TEST_rm_r;
1949 			}
1950 #else
1951 			inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, 0);
1952 			FAIL_IF(!inst);
1953 			*inst = GROUP_F7;
1954 #endif
1955 		}
1956 		else {
1957 			inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1958 			FAIL_IF(!inst);
1959 			*inst = TEST_rm_r;
1960 		}
1961 		return SLJIT_SUCCESS;
1962 	}
1963 
1964 	EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1965 	if (src2 & SLJIT_IMM) {
1966 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1967 		if (IS_HALFWORD(src2w) || compiler->mode32) {
1968 			inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
1969 			FAIL_IF(!inst);
1970 			*inst = GROUP_F7;
1971 		}
1972 		else {
1973 			FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1974 			inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0);
1975 			FAIL_IF(!inst);
1976 			*inst = TEST_rm_r;
1977 		}
1978 #else
1979 		inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
1980 		FAIL_IF(!inst);
1981 		*inst = GROUP_F7;
1982 #endif
1983 	}
1984 	else {
1985 		inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1986 		FAIL_IF(!inst);
1987 		*inst = TEST_rm_r;
1988 	}
1989 	return SLJIT_SUCCESS;
1990 }
1991 
1992 static sljit_si emit_shift(struct sljit_compiler *compiler,
1993 	sljit_ub mode,
1994 	sljit_si dst, sljit_sw dstw,
1995 	sljit_si src1, sljit_sw src1w,
1996 	sljit_si src2, sljit_sw src2w)
1997 {
1998 	sljit_ub* inst;
1999 
2000 	if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
2001 		if (dst == src1 && dstw == src1w) {
2002 			inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
2003 			FAIL_IF(!inst);
2004 			*inst |= mode;
2005 			return SLJIT_SUCCESS;
2006 		}
2007 		if (dst == SLJIT_UNUSED) {
2008 			EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2009 			inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2010 			FAIL_IF(!inst);
2011 			*inst |= mode;
2012 			return SLJIT_SUCCESS;
2013 		}
2014 		if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
2015 			EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2016 			inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2017 			FAIL_IF(!inst);
2018 			*inst |= mode;
2019 			EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2020 			return SLJIT_SUCCESS;
2021 		}
2022 		if (FAST_IS_REG(dst)) {
2023 			EMIT_MOV(compiler, dst, 0, src1, src1w);
2024 			inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
2025 			FAIL_IF(!inst);
2026 			*inst |= mode;
2027 			return SLJIT_SUCCESS;
2028 		}
2029 
2030 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2031 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2032 		FAIL_IF(!inst);
2033 		*inst |= mode;
2034 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2035 		return SLJIT_SUCCESS;
2036 	}
2037 
2038 	if (dst == SLJIT_PREF_SHIFT_REG) {
2039 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2040 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2041 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2042 		FAIL_IF(!inst);
2043 		*inst |= mode;
2044 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2045 	}
2046 	else if (FAST_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
2047 		if (src1 != dst)
2048 			EMIT_MOV(compiler, dst, 0, src1, src1w);
2049 		EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2050 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2051 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
2052 		FAIL_IF(!inst);
2053 		*inst |= mode;
2054 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2055 	}
2056 	else {
2057 		/* This case is really difficult, since ecx itself may used for
2058 		   addressing, and we must ensure to work even in that case. */
2059 		EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2060 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2061 		EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
2062 #else
2063 		/* [esp+0] contains the flags. */
2064 		EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw), SLJIT_PREF_SHIFT_REG, 0);
2065 #endif
2066 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2067 		inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2068 		FAIL_IF(!inst);
2069 		*inst |= mode;
2070 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2071 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
2072 #else
2073 		EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw));
2074 #endif
2075 		EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2076 	}
2077 
2078 	return SLJIT_SUCCESS;
2079 }
2080 
2081 static sljit_si emit_shift_with_flags(struct sljit_compiler *compiler,
2082 	sljit_ub mode, sljit_si set_flags,
2083 	sljit_si dst, sljit_sw dstw,
2084 	sljit_si src1, sljit_sw src1w,
2085 	sljit_si src2, sljit_sw src2w)
2086 {
2087 	/* The CPU does not set flags if the shift count is 0. */
2088 	if (src2 & SLJIT_IMM) {
2089 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2090 		if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
2091 			return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2092 #else
2093 		if ((src2w & 0x1f) != 0)
2094 			return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2095 #endif
2096 		if (!set_flags)
2097 			return emit_mov(compiler, dst, dstw, src1, src1w);
2098 		/* OR dst, src, 0 */
2099 		return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
2100 			dst, dstw, src1, src1w, SLJIT_IMM, 0);
2101 	}
2102 
2103 	if (!set_flags)
2104 		return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2105 
2106 	if (!FAST_IS_REG(dst))
2107 		FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
2108 
2109 	FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w));
2110 
2111 	if (FAST_IS_REG(dst))
2112 		return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
2113 	return SLJIT_SUCCESS;
2114 }
2115 
2116 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op,
2117 	sljit_si dst, sljit_sw dstw,
2118 	sljit_si src1, sljit_sw src1w,
2119 	sljit_si src2, sljit_sw src2w)
2120 {
2121 	CHECK_ERROR();
2122 	CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2123 	ADJUST_LOCAL_OFFSET(dst, dstw);
2124 	ADJUST_LOCAL_OFFSET(src1, src1w);
2125 	ADJUST_LOCAL_OFFSET(src2, src2w);
2126 
2127 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
2128 	CHECK_EXTRA_REGS(src1, src1w, (void)0);
2129 	CHECK_EXTRA_REGS(src2, src2w, (void)0);
2130 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2131 	compiler->mode32 = op & SLJIT_INT_OP;
2132 #endif
2133 
2134 	if (GET_OPCODE(op) >= SLJIT_MUL) {
2135 		if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2136 			compiler->flags_saved = 0;
2137 		else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2138 			FAIL_IF(emit_save_flags(compiler));
2139 	}
2140 
2141 	switch (GET_OPCODE(op)) {
2142 	case SLJIT_ADD:
2143 		if (!GET_FLAGS(op)) {
2144 			if (emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
2145 				return compiler->error;
2146 		}
2147 		else
2148 			compiler->flags_saved = 0;
2149 		if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2150 			FAIL_IF(emit_save_flags(compiler));
2151 		return emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
2152 			dst, dstw, src1, src1w, src2, src2w);
2153 	case SLJIT_ADDC:
2154 		if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
2155 			FAIL_IF(emit_restore_flags(compiler, 1));
2156 		else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
2157 			FAIL_IF(emit_save_flags(compiler));
2158 		if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2159 			compiler->flags_saved = 0;
2160 		return emit_cum_binary(compiler, ADC_r_rm, ADC_rm_r, ADC, ADC_EAX_i32,
2161 			dst, dstw, src1, src1w, src2, src2w);
2162 	case SLJIT_SUB:
2163 		if (!GET_FLAGS(op)) {
2164 			if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
2165 				return compiler->error;
2166 		}
2167 		else
2168 			compiler->flags_saved = 0;
2169 		if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2170 			FAIL_IF(emit_save_flags(compiler));
2171 		if (dst == SLJIT_UNUSED)
2172 			return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
2173 		return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
2174 			dst, dstw, src1, src1w, src2, src2w);
2175 	case SLJIT_SUBC:
2176 		if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
2177 			FAIL_IF(emit_restore_flags(compiler, 1));
2178 		else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
2179 			FAIL_IF(emit_save_flags(compiler));
2180 		if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2181 			compiler->flags_saved = 0;
2182 		return emit_non_cum_binary(compiler, SBB_r_rm, SBB_rm_r, SBB, SBB_EAX_i32,
2183 			dst, dstw, src1, src1w, src2, src2w);
2184 	case SLJIT_MUL:
2185 		return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
2186 	case SLJIT_AND:
2187 		if (dst == SLJIT_UNUSED)
2188 			return emit_test_binary(compiler, src1, src1w, src2, src2w);
2189 		return emit_cum_binary(compiler, AND_r_rm, AND_rm_r, AND, AND_EAX_i32,
2190 			dst, dstw, src1, src1w, src2, src2w);
2191 	case SLJIT_OR:
2192 		return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
2193 			dst, dstw, src1, src1w, src2, src2w);
2194 	case SLJIT_XOR:
2195 		return emit_cum_binary(compiler, XOR_r_rm, XOR_rm_r, XOR, XOR_EAX_i32,
2196 			dst, dstw, src1, src1w, src2, src2w);
2197 	case SLJIT_SHL:
2198 		return emit_shift_with_flags(compiler, SHL, GET_FLAGS(op),
2199 			dst, dstw, src1, src1w, src2, src2w);
2200 	case SLJIT_LSHR:
2201 		return emit_shift_with_flags(compiler, SHR, GET_FLAGS(op),
2202 			dst, dstw, src1, src1w, src2, src2w);
2203 	case SLJIT_ASHR:
2204 		return emit_shift_with_flags(compiler, SAR, GET_FLAGS(op),
2205 			dst, dstw, src1, src1w, src2, src2w);
2206 	}
2207 
2208 	return SLJIT_SUCCESS;
2209 }
2210 
2211 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
2212 {
2213 	CHECK_REG_INDEX(check_sljit_get_register_index(reg));
2214 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2215 	if (reg >= SLJIT_R3 && reg <= SLJIT_R6)
2216 		return -1;
2217 #endif
2218 	return reg_map[reg];
2219 }
2220 
2221 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
2222 {
2223 	CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
2224 	return reg;
2225 }
2226 
2227 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
2228 	void *instruction, sljit_si size)
2229 {
2230 	sljit_ub *inst;
2231 
2232 	CHECK_ERROR();
2233 	CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2234 
2235 	inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
2236 	FAIL_IF(!inst);
2237 	INC_SIZE(size);
2238 	SLJIT_MEMMOVE(inst, instruction, size);
2239 	return SLJIT_SUCCESS;
2240 }
2241 
2242 /* --------------------------------------------------------------------- */
2243 /*  Floating point operators                                             */
2244 /* --------------------------------------------------------------------- */
2245 
2246 /* Alignment + 2 * 16 bytes. */
2247 static sljit_si sse2_data[3 + (4 + 4) * 2];
2248 static sljit_si *sse2_buffer;
2249 
2250 static void init_compiler(void)
2251 {
2252 	sse2_buffer = (sljit_si*)(((sljit_uw)sse2_data + 15) & ~0xf);
2253 	/* Single precision constants. */
2254 	sse2_buffer[0] = 0x80000000;
2255 	sse2_buffer[4] = 0x7fffffff;
2256 	/* Double precision constants. */
2257 	sse2_buffer[8] = 0;
2258 	sse2_buffer[9] = 0x80000000;
2259 	sse2_buffer[12] = 0xffffffff;
2260 	sse2_buffer[13] = 0x7fffffff;
2261 }
2262 
2263 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
2264 {
2265 #ifdef SLJIT_IS_FPU_AVAILABLE
2266 	return SLJIT_IS_FPU_AVAILABLE;
2267 #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
2268 	if (cpu_has_sse2 == -1)
2269 		get_cpu_features();
2270 	return cpu_has_sse2;
2271 #else /* SLJIT_DETECT_SSE2 */
2272 	return 1;
2273 #endif /* SLJIT_DETECT_SSE2 */
2274 }
2275 
2276 static sljit_si emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode,
2277 	sljit_si single, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w)
2278 {
2279 	sljit_ub *inst;
2280 
2281 	inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2282 	FAIL_IF(!inst);
2283 	*inst++ = GROUP_0F;
2284 	*inst = opcode;
2285 	return SLJIT_SUCCESS;
2286 }
2287 
2288 static sljit_si emit_sse2_logic(struct sljit_compiler *compiler, sljit_ub opcode,
2289 	sljit_si pref66, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w)
2290 {
2291 	sljit_ub *inst;
2292 
2293 	inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2294 	FAIL_IF(!inst);
2295 	*inst++ = GROUP_0F;
2296 	*inst = opcode;
2297 	return SLJIT_SUCCESS;
2298 }
2299 
2300 static SLJIT_INLINE sljit_si emit_sse2_load(struct sljit_compiler *compiler,
2301 	sljit_si single, sljit_si dst, sljit_si src, sljit_sw srcw)
2302 {
2303 	return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw);
2304 }
2305 
2306 static SLJIT_INLINE sljit_si emit_sse2_store(struct sljit_compiler *compiler,
2307 	sljit_si single, sljit_si dst, sljit_sw dstw, sljit_si src)
2308 {
2309 	return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw);
2310 }
2311 
2312 static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op,
2313 	sljit_si dst, sljit_sw dstw,
2314 	sljit_si src, sljit_sw srcw)
2315 {
2316 	sljit_si dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
2317 	sljit_ub *inst;
2318 
2319 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2320 	if (GET_OPCODE(op) == SLJIT_CONVW_FROMD)
2321 		compiler->mode32 = 0;
2322 #endif
2323 
2324 	inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_SINGLE_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw);
2325 	FAIL_IF(!inst);
2326 	*inst++ = GROUP_0F;
2327 	*inst = CVTTSD2SI_r_xm;
2328 
2329 	if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED)
2330 		return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2331 	return SLJIT_SUCCESS;
2332 }
2333 
2334 static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op,
2335 	sljit_si dst, sljit_sw dstw,
2336 	sljit_si src, sljit_sw srcw)
2337 {
2338 	sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2339 	sljit_ub *inst;
2340 
2341 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2342 	if (GET_OPCODE(op) == SLJIT_CONVD_FROMW)
2343 		compiler->mode32 = 0;
2344 #endif
2345 
2346 	if (src & SLJIT_IMM) {
2347 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2348 		if (GET_OPCODE(op) == SLJIT_CONVD_FROMI)
2349 			srcw = (sljit_si)srcw;
2350 #endif
2351 		EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
2352 		src = TMP_REG1;
2353 		srcw = 0;
2354 	}
2355 
2356 	inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_SINGLE_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw);
2357 	FAIL_IF(!inst);
2358 	*inst++ = GROUP_0F;
2359 	*inst = CVTSI2SD_x_rm;
2360 
2361 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2362 	compiler->mode32 = 1;
2363 #endif
2364 	if (dst_r == TMP_FREG)
2365 		return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2366 	return SLJIT_SUCCESS;
2367 }
2368 
2369 static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op,
2370 	sljit_si src1, sljit_sw src1w,
2371 	sljit_si src2, sljit_sw src2w)
2372 {
2373 	compiler->flags_saved = 0;
2374 	if (!FAST_IS_REG(src1)) {
2375 		FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
2376 		src1 = TMP_FREG;
2377 	}
2378 	return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_SINGLE_OP), src1, src2, src2w);
2379 }
2380 
2381 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
2382 	sljit_si dst, sljit_sw dstw,
2383 	sljit_si src, sljit_sw srcw)
2384 {
2385 	sljit_si dst_r;
2386 
2387 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2388 	compiler->mode32 = 1;
2389 #endif
2390 
2391 	CHECK_ERROR();
2392 	SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2393 
2394 	if (GET_OPCODE(op) == SLJIT_DMOV) {
2395 		if (FAST_IS_REG(dst))
2396 			return emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst, src, srcw);
2397 		if (FAST_IS_REG(src))
2398 			return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, src);
2399 		FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src, srcw));
2400 		return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2401 	}
2402 
2403 	if (GET_OPCODE(op) == SLJIT_CONVD_FROMS) {
2404 		dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2405 		if (FAST_IS_REG(src)) {
2406 			/* We overwrite the high bits of source. From SLJIT point of view,
2407 			   this is not an issue.
2408 			   Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
2409 			FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_SINGLE_OP, src, src, 0));
2410 		}
2411 		else {
2412 			FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_SINGLE_OP), TMP_FREG, src, srcw));
2413 			src = TMP_FREG;
2414 		}
2415 
2416 		FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_SINGLE_OP, dst_r, src, 0));
2417 		if (dst_r == TMP_FREG)
2418 			return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2419 		return SLJIT_SUCCESS;
2420 	}
2421 
2422 	if (SLOW_IS_REG(dst)) {
2423 		dst_r = dst;
2424 		if (dst != src)
2425 			FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw));
2426 	}
2427 	else {
2428 		dst_r = TMP_FREG;
2429 		FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw));
2430 	}
2431 
2432 	switch (GET_OPCODE(op)) {
2433 	case SLJIT_DNEG:
2434 		FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer : sse2_buffer + 8)));
2435 		break;
2436 
2437 	case SLJIT_DABS:
2438 		FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer + 4 : sse2_buffer + 12)));
2439 		break;
2440 	}
2441 
2442 	if (dst_r == TMP_FREG)
2443 		return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2444 	return SLJIT_SUCCESS;
2445 }
2446 
2447 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
2448 	sljit_si dst, sljit_sw dstw,
2449 	sljit_si src1, sljit_sw src1w,
2450 	sljit_si src2, sljit_sw src2w)
2451 {
2452 	sljit_si dst_r;
2453 
2454 	CHECK_ERROR();
2455 	CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2456 	ADJUST_LOCAL_OFFSET(dst, dstw);
2457 	ADJUST_LOCAL_OFFSET(src1, src1w);
2458 	ADJUST_LOCAL_OFFSET(src2, src2w);
2459 
2460 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2461 	compiler->mode32 = 1;
2462 #endif
2463 
2464 	if (FAST_IS_REG(dst)) {
2465 		dst_r = dst;
2466 		if (dst == src1)
2467 			; /* Do nothing here. */
2468 		else if (dst == src2 && (op == SLJIT_DADD || op == SLJIT_DMUL)) {
2469 			/* Swap arguments. */
2470 			src2 = src1;
2471 			src2w = src1w;
2472 		}
2473 		else if (dst != src2)
2474 			FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src1, src1w));
2475 		else {
2476 			dst_r = TMP_FREG;
2477 			FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
2478 		}
2479 	}
2480 	else {
2481 		dst_r = TMP_FREG;
2482 		FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
2483 	}
2484 
2485 	switch (GET_OPCODE(op)) {
2486 	case SLJIT_DADD:
2487 		FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2488 		break;
2489 
2490 	case SLJIT_DSUB:
2491 		FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2492 		break;
2493 
2494 	case SLJIT_DMUL:
2495 		FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2496 		break;
2497 
2498 	case SLJIT_DDIV:
2499 		FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2500 		break;
2501 	}
2502 
2503 	if (dst_r == TMP_FREG)
2504 		return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2505 	return SLJIT_SUCCESS;
2506 }
2507 
2508 /* --------------------------------------------------------------------- */
2509 /*  Conditional instructions                                             */
2510 /* --------------------------------------------------------------------- */
2511 
2512 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2513 {
2514 	sljit_ub *inst;
2515 	struct sljit_label *label;
2516 
2517 	CHECK_ERROR_PTR();
2518 	CHECK_PTR(check_sljit_emit_label(compiler));
2519 
2520 	/* We should restore the flags before the label,
2521 	   since other taken jumps has their own flags as well. */
2522 	if (SLJIT_UNLIKELY(compiler->flags_saved))
2523 		PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2524 
2525 	if (compiler->last_label && compiler->last_label->size == compiler->size)
2526 		return compiler->last_label;
2527 
2528 	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2529 	PTR_FAIL_IF(!label);
2530 	set_label(label, compiler);
2531 
2532 	inst = (sljit_ub*)ensure_buf(compiler, 2);
2533 	PTR_FAIL_IF(!inst);
2534 
2535 	*inst++ = 0;
2536 	*inst++ = 0;
2537 
2538 	return label;
2539 }
2540 
2541 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
2542 {
2543 	sljit_ub *inst;
2544 	struct sljit_jump *jump;
2545 
2546 	CHECK_ERROR_PTR();
2547 	CHECK_PTR(check_sljit_emit_jump(compiler, type));
2548 
2549 	if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2550 		if ((type & 0xff) <= SLJIT_JUMP)
2551 			PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2552 		compiler->flags_saved = 0;
2553 	}
2554 
2555 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2556 	PTR_FAIL_IF_NULL(jump);
2557 	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2558 	type &= 0xff;
2559 
2560 	if (type >= SLJIT_CALL1)
2561 		PTR_FAIL_IF(call_with_args(compiler, type));
2562 
2563 	/* Worst case size. */
2564 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2565 	compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
2566 #else
2567 	compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
2568 #endif
2569 
2570 	inst = (sljit_ub*)ensure_buf(compiler, 2);
2571 	PTR_FAIL_IF_NULL(inst);
2572 
2573 	*inst++ = 0;
2574 	*inst++ = type + 4;
2575 	return jump;
2576 }
2577 
2578 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
2579 {
2580 	sljit_ub *inst;
2581 	struct sljit_jump *jump;
2582 
2583 	CHECK_ERROR();
2584 	CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
2585 	ADJUST_LOCAL_OFFSET(src, srcw);
2586 
2587 	CHECK_EXTRA_REGS(src, srcw, (void)0);
2588 
2589 	if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2590 		if (type <= SLJIT_JUMP)
2591 			FAIL_IF(emit_restore_flags(compiler, 0));
2592 		compiler->flags_saved = 0;
2593 	}
2594 
2595 	if (type >= SLJIT_CALL1) {
2596 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2597 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
2598 		if (src == SLJIT_R2) {
2599 			EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
2600 			src = TMP_REG1;
2601 		}
2602 		if (src == SLJIT_MEM1(SLJIT_SP) && type >= SLJIT_CALL3)
2603 			srcw += sizeof(sljit_sw);
2604 #endif
2605 #endif
2606 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
2607 		if (src == SLJIT_R2) {
2608 			EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
2609 			src = TMP_REG1;
2610 		}
2611 #endif
2612 		FAIL_IF(call_with_args(compiler, type));
2613 	}
2614 
2615 	if (src == SLJIT_IMM) {
2616 		jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2617 		FAIL_IF_NULL(jump);
2618 		set_jump(jump, compiler, JUMP_ADDR);
2619 		jump->u.target = srcw;
2620 
2621 		/* Worst case size. */
2622 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2623 		compiler->size += 5;
2624 #else
2625 		compiler->size += 10 + 3;
2626 #endif
2627 
2628 		inst = (sljit_ub*)ensure_buf(compiler, 2);
2629 		FAIL_IF_NULL(inst);
2630 
2631 		*inst++ = 0;
2632 		*inst++ = type + 4;
2633 	}
2634 	else {
2635 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2636 		/* REX_W is not necessary (src is not immediate). */
2637 		compiler->mode32 = 1;
2638 #endif
2639 		inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2640 		FAIL_IF(!inst);
2641 		*inst++ = GROUP_FF;
2642 		*inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm;
2643 	}
2644 	return SLJIT_SUCCESS;
2645 }
2646 
2647 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op,
2648 	sljit_si dst, sljit_sw dstw,
2649 	sljit_si src, sljit_sw srcw,
2650 	sljit_si type)
2651 {
2652 	sljit_ub *inst;
2653 	sljit_ub cond_set = 0;
2654 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2655 	sljit_si reg;
2656 #else
2657 	/* CHECK_EXTRA_REGS migh overwrite these values. */
2658 	sljit_si dst_save = dst;
2659 	sljit_sw dstw_save = dstw;
2660 #endif
2661 
2662 	CHECK_ERROR();
2663 	CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
2664 	SLJIT_UNUSED_ARG(srcw);
2665 
2666 	if (dst == SLJIT_UNUSED)
2667 		return SLJIT_SUCCESS;
2668 
2669 	ADJUST_LOCAL_OFFSET(dst, dstw);
2670 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
2671 	if (SLJIT_UNLIKELY(compiler->flags_saved))
2672 		FAIL_IF(emit_restore_flags(compiler, op & SLJIT_KEEP_FLAGS));
2673 
2674 	type &= 0xff;
2675 	/* setcc = jcc + 0x10. */
2676 	cond_set = get_jump_code(type) + 0x10;
2677 
2678 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2679 	if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src) {
2680 		inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 3);
2681 		FAIL_IF(!inst);
2682 		INC_SIZE(4 + 3);
2683 		/* Set low register to conditional flag. */
2684 		*inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;
2685 		*inst++ = GROUP_0F;
2686 		*inst++ = cond_set;
2687 		*inst++ = MOD_REG | reg_lmap[TMP_REG1];
2688 		*inst++ = REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B);
2689 		*inst++ = OR_rm8_r8;
2690 		*inst++ = MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst];
2691 		return SLJIT_SUCCESS;
2692 	}
2693 
2694 	reg = (op == SLJIT_MOV && FAST_IS_REG(dst)) ? dst : TMP_REG1;
2695 
2696 	inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 4);
2697 	FAIL_IF(!inst);
2698 	INC_SIZE(4 + 4);
2699 	/* Set low register to conditional flag. */
2700 	*inst++ = (reg_map[reg] <= 7) ? REX : REX_B;
2701 	*inst++ = GROUP_0F;
2702 	*inst++ = cond_set;
2703 	*inst++ = MOD_REG | reg_lmap[reg];
2704 	*inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
2705 	*inst++ = GROUP_0F;
2706 	*inst++ = MOVZX_r_rm8;
2707 	*inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg];
2708 
2709 	if (reg != TMP_REG1)
2710 		return SLJIT_SUCCESS;
2711 
2712 	if (GET_OPCODE(op) < SLJIT_ADD) {
2713 		compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
2714 		return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2715 	}
2716 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2717 		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2718 	compiler->skip_checks = 1;
2719 #endif
2720 	return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REG1, 0);
2721 #else /* SLJIT_CONFIG_X86_64 */
2722 	if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) {
2723 		if (reg_map[dst] <= 4) {
2724 			/* Low byte is accessible. */
2725 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
2726 			FAIL_IF(!inst);
2727 			INC_SIZE(3 + 3);
2728 			/* Set low byte to conditional flag. */
2729 			*inst++ = GROUP_0F;
2730 			*inst++ = cond_set;
2731 			*inst++ = MOD_REG | reg_map[dst];
2732 
2733 			*inst++ = GROUP_0F;
2734 			*inst++ = MOVZX_r_rm8;
2735 			*inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst];
2736 			return SLJIT_SUCCESS;
2737 		}
2738 
2739 		/* Low byte is not accessible. */
2740 		if (cpu_has_cmov == -1)
2741 			get_cpu_features();
2742 
2743 		if (cpu_has_cmov) {
2744 			EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
2745 			/* a xor reg, reg operation would overwrite the flags. */
2746 			EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0);
2747 
2748 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 3);
2749 			FAIL_IF(!inst);
2750 			INC_SIZE(3);
2751 
2752 			*inst++ = GROUP_0F;
2753 			/* cmovcc = setcc - 0x50. */
2754 			*inst++ = cond_set - 0x50;
2755 			*inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1];
2756 			return SLJIT_SUCCESS;
2757 		}
2758 
2759 		inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2760 		FAIL_IF(!inst);
2761 		INC_SIZE(1 + 3 + 3 + 1);
2762 		*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2763 		/* Set al to conditional flag. */
2764 		*inst++ = GROUP_0F;
2765 		*inst++ = cond_set;
2766 		*inst++ = MOD_REG | 0 /* eax */;
2767 
2768 		*inst++ = GROUP_0F;
2769 		*inst++ = MOVZX_r_rm8;
2770 		*inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */;
2771 		*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2772 		return SLJIT_SUCCESS;
2773 	}
2774 
2775 	if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src && reg_map[dst] <= 4) {
2776 		SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R0] == 0, scratch_reg1_must_be_eax);
2777 		if (dst != SLJIT_R0) {
2778 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
2779 			FAIL_IF(!inst);
2780 			INC_SIZE(1 + 3 + 2 + 1);
2781 			/* Set low register to conditional flag. */
2782 			*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2783 			*inst++ = GROUP_0F;
2784 			*inst++ = cond_set;
2785 			*inst++ = MOD_REG | 0 /* eax */;
2786 			*inst++ = OR_rm8_r8;
2787 			*inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst];
2788 			*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2789 		}
2790 		else {
2791 			inst = (sljit_ub*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2);
2792 			FAIL_IF(!inst);
2793 			INC_SIZE(2 + 3 + 2 + 2);
2794 			/* Set low register to conditional flag. */
2795 			*inst++ = XCHG_r_rm;
2796 			*inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
2797 			*inst++ = GROUP_0F;
2798 			*inst++ = cond_set;
2799 			*inst++ = MOD_REG | 1 /* ecx */;
2800 			*inst++ = OR_rm8_r8;
2801 			*inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */;
2802 			*inst++ = XCHG_r_rm;
2803 			*inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
2804 		}
2805 		return SLJIT_SUCCESS;
2806 	}
2807 
2808 	/* Set TMP_REG1 to the bit. */
2809 	inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2810 	FAIL_IF(!inst);
2811 	INC_SIZE(1 + 3 + 3 + 1);
2812 	*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2813 	/* Set al to conditional flag. */
2814 	*inst++ = GROUP_0F;
2815 	*inst++ = cond_set;
2816 	*inst++ = MOD_REG | 0 /* eax */;
2817 
2818 	*inst++ = GROUP_0F;
2819 	*inst++ = MOVZX_r_rm8;
2820 	*inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */;
2821 
2822 	*inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2823 
2824 	if (GET_OPCODE(op) < SLJIT_ADD)
2825 		return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2826 
2827 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2828 		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2829 	compiler->skip_checks = 1;
2830 #endif
2831 	return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
2832 #endif /* SLJIT_CONFIG_X86_64 */
2833 }
2834 
2835 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset)
2836 {
2837 	CHECK_ERROR();
2838 	CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
2839 	ADJUST_LOCAL_OFFSET(dst, dstw);
2840 
2841 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
2842 
2843 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2844 	compiler->mode32 = 0;
2845 #endif
2846 
2847 	ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
2848 
2849 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2850 	if (NOT_HALFWORD(offset)) {
2851 		FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
2852 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
2853 		SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
2854 		return compiler->error;
2855 #else
2856 		return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);
2857 #endif
2858 	}
2859 #endif
2860 
2861 	if (offset != 0)
2862 		return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
2863 	return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);
2864 }
2865 
2866 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
2867 {
2868 	sljit_ub *inst;
2869 	struct sljit_const *const_;
2870 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2871 	sljit_si reg;
2872 #endif
2873 
2874 	CHECK_ERROR_PTR();
2875 	CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
2876 	ADJUST_LOCAL_OFFSET(dst, dstw);
2877 
2878 	CHECK_EXTRA_REGS(dst, dstw, (void)0);
2879 
2880 	const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
2881 	PTR_FAIL_IF(!const_);
2882 	set_const(const_, compiler);
2883 
2884 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2885 	compiler->mode32 = 0;
2886 	reg = SLOW_IS_REG(dst) ? dst : TMP_REG1;
2887 
2888 	if (emit_load_imm64(compiler, reg, init_value))
2889 		return NULL;
2890 #else
2891 	if (dst == SLJIT_UNUSED)
2892 		dst = TMP_REG1;
2893 
2894 	if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
2895 		return NULL;
2896 #endif
2897 
2898 	inst = (sljit_ub*)ensure_buf(compiler, 2);
2899 	PTR_FAIL_IF(!inst);
2900 
2901 	*inst++ = 0;
2902 	*inst++ = 1;
2903 
2904 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2905 	if (dst & SLJIT_MEM)
2906 		if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
2907 			return NULL;
2908 #endif
2909 
2910 	return const_;
2911 }
2912 
2913 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
2914 {
2915 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2916 	*(sljit_sw*)addr = new_addr - (addr + 4);
2917 #else
2918 	*(sljit_uw*)addr = new_addr;
2919 #endif
2920 }
2921 
2922 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
2923 {
2924 	*(sljit_sw*)addr = new_constant;
2925 }
2926