1 /*
2 * Stack-less Just-In-Time compiler
3 *
4 * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
sljit_get_platform_name(void)27 SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
28 {
29 return "x86" SLJIT_CPUINFO;
30 }
31
32 /*
33 32b register indexes:
34 0 - EAX
35 1 - ECX
36 2 - EDX
37 3 - EBX
38 4 - none
39 5 - EBP
40 6 - ESI
41 7 - EDI
42 */
43
44 /*
45 64b register indexes:
46 0 - RAX
47 1 - RCX
48 2 - RDX
49 3 - RBX
50 4 - none
51 5 - RBP
52 6 - RSI
53 7 - RDI
54 8 - R8 - From now on REX prefix is required
55 9 - R9
56 10 - R10
57 11 - R11
58 12 - R12
59 13 - R13
60 14 - R14
61 15 - R15
62 */
63
64 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
65
66 /* Last register + 1. */
67 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
68
69 static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
70 0, 0, 2, 1, 0, 0, 0, 0, 7, 6, 3, 4, 5
71 };
72
73 #define CHECK_EXTRA_REGS(p, w, do) \
74 if (p >= SLJIT_R3 && p <= SLJIT_R6) { \
75 w = SLJIT_LOCALS_OFFSET + ((p) - (SLJIT_R3 + 4)) * sizeof(sljit_sw); \
76 p = SLJIT_MEM1(SLJIT_SP); \
77 do; \
78 }
79
80 #else /* SLJIT_CONFIG_X86_32 */
81
82 /* Last register + 1. */
83 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
84 #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
85 #define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4)
86
87 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
88 Note: avoid to use r12 and r13 for memory addessing
89 therefore r12 is better for SAVED_EREG than SAVED_REG. */
90 #ifndef _WIN64
91 /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
92 static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
93 0, 0, 6, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 7, 9
94 };
95 /* low-map. reg_map & 0x7. */
96 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
97 0, 0, 6, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 7, 1
98 };
99 #else
100 /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
101 static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
102 0, 0, 2, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 10, 8, 9
103 };
104 /* low-map. reg_map & 0x7. */
105 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
106 0, 0, 2, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 2, 0, 1
107 };
108 #endif
109
110 #define REX_W 0x48
111 #define REX_R 0x44
112 #define REX_X 0x42
113 #define REX_B 0x41
114 #define REX 0x40
115
116 #ifndef _WIN64
117 #define HALFWORD_MAX 0x7fffffffl
118 #define HALFWORD_MIN -0x80000000l
119 #else
120 #define HALFWORD_MAX 0x7fffffffll
121 #define HALFWORD_MIN -0x80000000ll
122 #endif
123
124 #define IS_HALFWORD(x) ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)
125 #define NOT_HALFWORD(x) ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)
126
127 #define CHECK_EXTRA_REGS(p, w, do)
128
129 #endif /* SLJIT_CONFIG_X86_32 */
130
131 #define TMP_FREG (0)
132
133 /* Size flags for emit_x86_instruction: */
134 #define EX86_BIN_INS 0x0010
135 #define EX86_SHIFT_INS 0x0020
136 #define EX86_REX 0x0040
137 #define EX86_NO_REXW 0x0080
138 #define EX86_BYTE_ARG 0x0100
139 #define EX86_HALF_ARG 0x0200
140 #define EX86_PREF_66 0x0400
141 #define EX86_PREF_F2 0x0800
142 #define EX86_PREF_F3 0x1000
143 #define EX86_SSE2_OP1 0x2000
144 #define EX86_SSE2_OP2 0x4000
145 #define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2)
146
147 /* --------------------------------------------------------------------- */
148 /* Instrucion forms */
149 /* --------------------------------------------------------------------- */
150
151 #define ADD (/* BINARY */ 0 << 3)
152 #define ADD_EAX_i32 0x05
153 #define ADD_r_rm 0x03
154 #define ADD_rm_r 0x01
155 #define ADDSD_x_xm 0x58
156 #define ADC (/* BINARY */ 2 << 3)
157 #define ADC_EAX_i32 0x15
158 #define ADC_r_rm 0x13
159 #define ADC_rm_r 0x11
160 #define AND (/* BINARY */ 4 << 3)
161 #define AND_EAX_i32 0x25
162 #define AND_r_rm 0x23
163 #define AND_rm_r 0x21
164 #define ANDPD_x_xm 0x54
165 #define BSR_r_rm (/* GROUP_0F */ 0xbd)
166 #define CALL_i32 0xe8
167 #define CALL_rm (/* GROUP_FF */ 2 << 3)
168 #define CDQ 0x99
169 #define CMOVNE_r_rm (/* GROUP_0F */ 0x45)
170 #define CMP (/* BINARY */ 7 << 3)
171 #define CMP_EAX_i32 0x3d
172 #define CMP_r_rm 0x3b
173 #define CMP_rm_r 0x39
174 #define CVTPD2PS_x_xm 0x5a
175 #define CVTSI2SD_x_rm 0x2a
176 #define CVTTSD2SI_r_xm 0x2c
177 #define DIV (/* GROUP_F7 */ 6 << 3)
178 #define DIVSD_x_xm 0x5e
179 #define INT3 0xcc
180 #define IDIV (/* GROUP_F7 */ 7 << 3)
181 #define IMUL (/* GROUP_F7 */ 5 << 3)
182 #define IMUL_r_rm (/* GROUP_0F */ 0xaf)
183 #define IMUL_r_rm_i8 0x6b
184 #define IMUL_r_rm_i32 0x69
185 #define JE_i8 0x74
186 #define JNE_i8 0x75
187 #define JMP_i8 0xeb
188 #define JMP_i32 0xe9
189 #define JMP_rm (/* GROUP_FF */ 4 << 3)
190 #define LEA_r_m 0x8d
191 #define MOV_r_rm 0x8b
192 #define MOV_r_i32 0xb8
193 #define MOV_rm_r 0x89
194 #define MOV_rm_i32 0xc7
195 #define MOV_rm8_i8 0xc6
196 #define MOV_rm8_r8 0x88
197 #define MOVSD_x_xm 0x10
198 #define MOVSD_xm_x 0x11
199 #define MOVSXD_r_rm 0x63
200 #define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe)
201 #define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf)
202 #define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6)
203 #define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7)
204 #define MUL (/* GROUP_F7 */ 4 << 3)
205 #define MULSD_x_xm 0x59
206 #define NEG_rm (/* GROUP_F7 */ 3 << 3)
207 #define NOP 0x90
208 #define NOT_rm (/* GROUP_F7 */ 2 << 3)
209 #define OR (/* BINARY */ 1 << 3)
210 #define OR_r_rm 0x0b
211 #define OR_EAX_i32 0x0d
212 #define OR_rm_r 0x09
213 #define OR_rm8_r8 0x08
214 #define POP_r 0x58
215 #define POP_rm 0x8f
216 #define POPF 0x9d
217 #define PUSH_i32 0x68
218 #define PUSH_r 0x50
219 #define PUSH_rm (/* GROUP_FF */ 6 << 3)
220 #define PUSHF 0x9c
221 #define RET_near 0xc3
222 #define RET_i16 0xc2
223 #define SBB (/* BINARY */ 3 << 3)
224 #define SBB_EAX_i32 0x1d
225 #define SBB_r_rm 0x1b
226 #define SBB_rm_r 0x19
227 #define SAR (/* SHIFT */ 7 << 3)
228 #define SHL (/* SHIFT */ 4 << 3)
229 #define SHR (/* SHIFT */ 5 << 3)
230 #define SUB (/* BINARY */ 5 << 3)
231 #define SUB_EAX_i32 0x2d
232 #define SUB_r_rm 0x2b
233 #define SUB_rm_r 0x29
234 #define SUBSD_x_xm 0x5c
235 #define TEST_EAX_i32 0xa9
236 #define TEST_rm_r 0x85
237 #define UCOMISD_x_xm 0x2e
238 #define UNPCKLPD_x_xm 0x14
239 #define XCHG_EAX_r 0x90
240 #define XCHG_r_rm 0x87
241 #define XOR (/* BINARY */ 6 << 3)
242 #define XOR_EAX_i32 0x35
243 #define XOR_r_rm 0x33
244 #define XOR_rm_r 0x31
245 #define XORPD_x_xm 0x57
246
247 #define GROUP_0F 0x0f
248 #define GROUP_F7 0xf7
249 #define GROUP_FF 0xff
250 #define GROUP_BINARY_81 0x81
251 #define GROUP_BINARY_83 0x83
252 #define GROUP_SHIFT_1 0xd1
253 #define GROUP_SHIFT_N 0xc1
254 #define GROUP_SHIFT_CL 0xd3
255
256 #define MOD_REG 0xc0
257 #define MOD_DISP8 0x40
258
259 #define INC_SIZE(s) (*inst++ = (s), compiler->size += (s))
260
261 #define PUSH_REG(r) (*inst++ = (PUSH_r + (r)))
262 #define POP_REG(r) (*inst++ = (POP_r + (r)))
263 #define RET() (*inst++ = (RET_near))
264 #define RET_I16(n) (*inst++ = (RET_i16), *inst++ = n, *inst++ = 0)
265 /* r32, r/m32 */
266 #define MOV_RM(mod, reg, rm) (*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm))
267
268 /* Multithreading does not affect these static variables, since they store
269 built-in CPU features. Therefore they can be overwritten by different threads
270 if they detect the CPU features in the same time. */
271 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
272 static sljit_si cpu_has_sse2 = -1;
273 #endif
274 static sljit_si cpu_has_cmov = -1;
275
276 #ifdef _WIN32_WCE
277 #include <cmnintrin.h>
278 #elif defined(_MSC_VER) && _MSC_VER >= 1400
279 #include <intrin.h>
280 #endif
281
get_cpu_features(void)282 static void get_cpu_features(void)
283 {
284 sljit_ui features;
285
286 #if defined(_MSC_VER) && _MSC_VER >= 1400
287
288 int CPUInfo[4];
289 __cpuid(CPUInfo, 1);
290 features = (sljit_ui)CPUInfo[3];
291
292 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
293
294 /* AT&T syntax. */
295 __asm__ (
296 "movl $0x1, %%eax\n"
297 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
298 /* On x86-32, there is no red zone, so this
299 should work (no need for a local variable). */
300 "push %%ebx\n"
301 #endif
302 "cpuid\n"
303 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
304 "pop %%ebx\n"
305 #endif
306 "movl %%edx, %0\n"
307 : "=g" (features)
308 :
309 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
310 : "%eax", "%ecx", "%edx"
311 #else
312 : "%rax", "%rbx", "%rcx", "%rdx"
313 #endif
314 );
315
316 #else /* _MSC_VER && _MSC_VER >= 1400 */
317
318 /* Intel syntax. */
319 __asm {
320 mov eax, 1
321 cpuid
322 mov features, edx
323 }
324
325 #endif /* _MSC_VER && _MSC_VER >= 1400 */
326
327 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
328 cpu_has_sse2 = (features >> 26) & 0x1;
329 #endif
330 cpu_has_cmov = (features >> 15) & 0x1;
331 }
332
get_jump_code(sljit_si type)333 static sljit_ub get_jump_code(sljit_si type)
334 {
335 switch (type) {
336 case SLJIT_EQUAL:
337 case SLJIT_D_EQUAL:
338 return 0x84 /* je */;
339
340 case SLJIT_NOT_EQUAL:
341 case SLJIT_D_NOT_EQUAL:
342 return 0x85 /* jne */;
343
344 case SLJIT_LESS:
345 case SLJIT_D_LESS:
346 return 0x82 /* jc */;
347
348 case SLJIT_GREATER_EQUAL:
349 case SLJIT_D_GREATER_EQUAL:
350 return 0x83 /* jae */;
351
352 case SLJIT_GREATER:
353 case SLJIT_D_GREATER:
354 return 0x87 /* jnbe */;
355
356 case SLJIT_LESS_EQUAL:
357 case SLJIT_D_LESS_EQUAL:
358 return 0x86 /* jbe */;
359
360 case SLJIT_SIG_LESS:
361 return 0x8c /* jl */;
362
363 case SLJIT_SIG_GREATER_EQUAL:
364 return 0x8d /* jnl */;
365
366 case SLJIT_SIG_GREATER:
367 return 0x8f /* jnle */;
368
369 case SLJIT_SIG_LESS_EQUAL:
370 return 0x8e /* jle */;
371
372 case SLJIT_OVERFLOW:
373 case SLJIT_MUL_OVERFLOW:
374 return 0x80 /* jo */;
375
376 case SLJIT_NOT_OVERFLOW:
377 case SLJIT_MUL_NOT_OVERFLOW:
378 return 0x81 /* jno */;
379
380 case SLJIT_D_UNORDERED:
381 return 0x8a /* jp */;
382
383 case SLJIT_D_ORDERED:
384 return 0x8b /* jpo */;
385 }
386 return 0;
387 }
388
389 static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_si type);
390
391 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
392 static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_sw addr, sljit_si type);
393 #endif
394
generate_near_jump_code(struct sljit_jump * jump,sljit_ub * code_ptr,sljit_ub * code,sljit_si type)395 static sljit_ub* generate_near_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_ub *code, sljit_si type)
396 {
397 sljit_si short_jump;
398 sljit_uw label_addr;
399
400 if (jump->flags & JUMP_LABEL)
401 label_addr = (sljit_uw)(code + jump->u.label->size);
402 else
403 label_addr = jump->u.target;
404 short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
405
406 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
407 if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN)
408 return generate_far_jump_code(jump, code_ptr, type);
409 #endif
410
411 if (type == SLJIT_JUMP) {
412 if (short_jump)
413 *code_ptr++ = JMP_i8;
414 else
415 *code_ptr++ = JMP_i32;
416 jump->addr++;
417 }
418 else if (type >= SLJIT_FAST_CALL) {
419 short_jump = 0;
420 *code_ptr++ = CALL_i32;
421 jump->addr++;
422 }
423 else if (short_jump) {
424 *code_ptr++ = get_jump_code(type) - 0x10;
425 jump->addr++;
426 }
427 else {
428 *code_ptr++ = GROUP_0F;
429 *code_ptr++ = get_jump_code(type);
430 jump->addr += 2;
431 }
432
433 if (short_jump) {
434 jump->flags |= PATCH_MB;
435 code_ptr += sizeof(sljit_sb);
436 } else {
437 jump->flags |= PATCH_MW;
438 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
439 code_ptr += sizeof(sljit_sw);
440 #else
441 code_ptr += sizeof(sljit_si);
442 #endif
443 }
444
445 return code_ptr;
446 }
447
sljit_generate_code(struct sljit_compiler * compiler)448 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
449 {
450 struct sljit_memory_fragment *buf;
451 sljit_ub *code;
452 sljit_ub *code_ptr;
453 sljit_ub *buf_ptr;
454 sljit_ub *buf_end;
455 sljit_ub len;
456
457 struct sljit_label *label;
458 struct sljit_jump *jump;
459 struct sljit_const *const_;
460
461 CHECK_ERROR_PTR();
462 CHECK_PTR(check_sljit_generate_code(compiler));
463 reverse_buf(compiler);
464
465 /* Second code generation pass. */
466 code = (sljit_ub*)SLJIT_MALLOC_EXEC(compiler->size);
467 PTR_FAIL_WITH_EXEC_IF(code);
468 buf = compiler->buf;
469
470 code_ptr = code;
471 label = compiler->labels;
472 jump = compiler->jumps;
473 const_ = compiler->consts;
474 do {
475 buf_ptr = buf->memory;
476 buf_end = buf_ptr + buf->used_size;
477 do {
478 len = *buf_ptr++;
479 if (len > 0) {
480 /* The code is already generated. */
481 SLJIT_MEMMOVE(code_ptr, buf_ptr, len);
482 code_ptr += len;
483 buf_ptr += len;
484 }
485 else {
486 if (*buf_ptr >= 4) {
487 jump->addr = (sljit_uw)code_ptr;
488 if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
489 code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4);
490 else
491 code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4);
492 jump = jump->next;
493 }
494 else if (*buf_ptr == 0) {
495 label->addr = (sljit_uw)code_ptr;
496 label->size = code_ptr - code;
497 label = label->next;
498 }
499 else if (*buf_ptr == 1) {
500 const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
501 const_ = const_->next;
502 }
503 else {
504 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
505 *code_ptr++ = (*buf_ptr == 2) ? CALL_i32 : JMP_i32;
506 buf_ptr++;
507 *(sljit_sw*)code_ptr = *(sljit_sw*)buf_ptr - ((sljit_sw)code_ptr + sizeof(sljit_sw));
508 code_ptr += sizeof(sljit_sw);
509 buf_ptr += sizeof(sljit_sw) - 1;
510 #else
511 code_ptr = generate_fixed_jump(code_ptr, *(sljit_sw*)(buf_ptr + 1), *buf_ptr);
512 buf_ptr += sizeof(sljit_sw);
513 #endif
514 }
515 buf_ptr++;
516 }
517 } while (buf_ptr < buf_end);
518 SLJIT_ASSERT(buf_ptr == buf_end);
519 buf = buf->next;
520 } while (buf);
521
522 SLJIT_ASSERT(!label);
523 SLJIT_ASSERT(!jump);
524 SLJIT_ASSERT(!const_);
525
526 jump = compiler->jumps;
527 while (jump) {
528 if (jump->flags & PATCH_MB) {
529 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) <= 127);
530 *(sljit_ub*)jump->addr = (sljit_ub)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb)));
531 } else if (jump->flags & PATCH_MW) {
532 if (jump->flags & JUMP_LABEL) {
533 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
534 *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sw)));
535 #else
536 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) <= HALFWORD_MAX);
537 *(sljit_si*)jump->addr = (sljit_si)(jump->u.label->addr - (jump->addr + sizeof(sljit_si)));
538 #endif
539 }
540 else {
541 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
542 *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_sw)));
543 #else
544 SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) <= HALFWORD_MAX);
545 *(sljit_si*)jump->addr = (sljit_si)(jump->u.target - (jump->addr + sizeof(sljit_si)));
546 #endif
547 }
548 }
549 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
550 else if (jump->flags & PATCH_MD)
551 *(sljit_sw*)jump->addr = jump->u.label->addr;
552 #endif
553
554 jump = jump->next;
555 }
556
557 /* Maybe we waste some space because of short jumps. */
558 SLJIT_ASSERT(code_ptr <= code + compiler->size);
559 compiler->error = SLJIT_ERR_COMPILED;
560 compiler->executable_size = code_ptr - code;
561 return (void*)code;
562 }
563
564 /* --------------------------------------------------------------------- */
565 /* Operators */
566 /* --------------------------------------------------------------------- */
567
568 static sljit_si emit_cum_binary(struct sljit_compiler *compiler,
569 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
570 sljit_si dst, sljit_sw dstw,
571 sljit_si src1, sljit_sw src1w,
572 sljit_si src2, sljit_sw src2w);
573
574 static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler,
575 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
576 sljit_si dst, sljit_sw dstw,
577 sljit_si src1, sljit_sw src1w,
578 sljit_si src2, sljit_sw src2w);
579
580 static sljit_si emit_mov(struct sljit_compiler *compiler,
581 sljit_si dst, sljit_sw dstw,
582 sljit_si src, sljit_sw srcw);
583
emit_save_flags(struct sljit_compiler * compiler)584 static SLJIT_INLINE sljit_si emit_save_flags(struct sljit_compiler *compiler)
585 {
586 sljit_ub *inst;
587
588 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
589 inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
590 FAIL_IF(!inst);
591 INC_SIZE(5);
592 #else
593 inst = (sljit_ub*)ensure_buf(compiler, 1 + 6);
594 FAIL_IF(!inst);
595 INC_SIZE(6);
596 *inst++ = REX_W;
597 #endif
598 *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp + sizeof(sljit_sw)] */
599 *inst++ = 0x64;
600 *inst++ = 0x24;
601 *inst++ = (sljit_ub)sizeof(sljit_sw);
602 *inst++ = PUSHF;
603 compiler->flags_saved = 1;
604 return SLJIT_SUCCESS;
605 }
606
emit_restore_flags(struct sljit_compiler * compiler,sljit_si keep_flags)607 static SLJIT_INLINE sljit_si emit_restore_flags(struct sljit_compiler *compiler, sljit_si keep_flags)
608 {
609 sljit_ub *inst;
610
611 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
612 inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
613 FAIL_IF(!inst);
614 INC_SIZE(5);
615 *inst++ = POPF;
616 #else
617 inst = (sljit_ub*)ensure_buf(compiler, 1 + 6);
618 FAIL_IF(!inst);
619 INC_SIZE(6);
620 *inst++ = POPF;
621 *inst++ = REX_W;
622 #endif
623 *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp - sizeof(sljit_sw)] */
624 *inst++ = 0x64;
625 *inst++ = 0x24;
626 *inst++ = (sljit_ub)-(sljit_sb)sizeof(sljit_sw);
627 compiler->flags_saved = keep_flags;
628 return SLJIT_SUCCESS;
629 }
630
631 #ifdef _WIN32
632 #include <malloc.h>
633
sljit_grow_stack(sljit_sw local_size)634 static void SLJIT_CALL sljit_grow_stack(sljit_sw local_size)
635 {
636 /* Workaround for calling the internal _chkstk() function on Windows.
637 This function touches all 4k pages belongs to the requested stack space,
638 which size is passed in local_size. This is necessary on Windows where
639 the stack can only grow in 4k steps. However, this function just burn
640 CPU cycles if the stack is large enough. However, you don't know it in
641 advance, so it must always be called. I think this is a bad design in
642 general even if it has some reasons. */
643 *(volatile sljit_si*)alloca(local_size) = 0;
644 }
645
646 #endif
647
648 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
649 #include "sljitNativeX86_32.c"
650 #else
651 #include "sljitNativeX86_64.c"
652 #endif
653
emit_mov(struct sljit_compiler * compiler,sljit_si dst,sljit_sw dstw,sljit_si src,sljit_sw srcw)654 static sljit_si emit_mov(struct sljit_compiler *compiler,
655 sljit_si dst, sljit_sw dstw,
656 sljit_si src, sljit_sw srcw)
657 {
658 sljit_ub* inst;
659
660 if (dst == SLJIT_UNUSED) {
661 /* No destination, doesn't need to setup flags. */
662 if (src & SLJIT_MEM) {
663 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
664 FAIL_IF(!inst);
665 *inst = MOV_r_rm;
666 }
667 return SLJIT_SUCCESS;
668 }
669 if (FAST_IS_REG(src)) {
670 inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
671 FAIL_IF(!inst);
672 *inst = MOV_rm_r;
673 return SLJIT_SUCCESS;
674 }
675 if (src & SLJIT_IMM) {
676 if (FAST_IS_REG(dst)) {
677 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
678 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
679 #else
680 if (!compiler->mode32) {
681 if (NOT_HALFWORD(srcw))
682 return emit_load_imm64(compiler, dst, srcw);
683 }
684 else
685 return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw);
686 #endif
687 }
688 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
689 if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
690 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw));
691 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw);
692 FAIL_IF(!inst);
693 *inst = MOV_rm_r;
694 return SLJIT_SUCCESS;
695 }
696 #endif
697 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
698 FAIL_IF(!inst);
699 *inst = MOV_rm_i32;
700 return SLJIT_SUCCESS;
701 }
702 if (FAST_IS_REG(dst)) {
703 inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
704 FAIL_IF(!inst);
705 *inst = MOV_r_rm;
706 return SLJIT_SUCCESS;
707 }
708
709 /* Memory to memory move. Requires two instruction. */
710 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
711 FAIL_IF(!inst);
712 *inst = MOV_r_rm;
713 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
714 FAIL_IF(!inst);
715 *inst = MOV_rm_r;
716 return SLJIT_SUCCESS;
717 }
718
719 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
720 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
721
sljit_emit_op0(struct sljit_compiler * compiler,sljit_si op)722 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op)
723 {
724 sljit_ub *inst;
725 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
726 sljit_si size;
727 #endif
728
729 CHECK_ERROR();
730 CHECK(check_sljit_emit_op0(compiler, op));
731
732 switch (GET_OPCODE(op)) {
733 case SLJIT_BREAKPOINT:
734 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
735 FAIL_IF(!inst);
736 INC_SIZE(1);
737 *inst = INT3;
738 break;
739 case SLJIT_NOP:
740 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
741 FAIL_IF(!inst);
742 INC_SIZE(1);
743 *inst = NOP;
744 break;
745 case SLJIT_LUMUL:
746 case SLJIT_LSMUL:
747 case SLJIT_UDIVMOD:
748 case SLJIT_SDIVMOD:
749 case SLJIT_UDIVI:
750 case SLJIT_SDIVI:
751 compiler->flags_saved = 0;
752 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
753 #ifdef _WIN64
754 SLJIT_COMPILE_ASSERT(
755 reg_map[SLJIT_R0] == 0
756 && reg_map[SLJIT_R1] == 2
757 && reg_map[TMP_REG1] > 7,
758 invalid_register_assignment_for_div_mul);
759 #else
760 SLJIT_COMPILE_ASSERT(
761 reg_map[SLJIT_R0] == 0
762 && reg_map[SLJIT_R1] < 7
763 && reg_map[TMP_REG1] == 2,
764 invalid_register_assignment_for_div_mul);
765 #endif
766 compiler->mode32 = op & SLJIT_INT_OP;
767 #endif
768 SLJIT_COMPILE_ASSERT((SLJIT_UDIVMOD & 0x2) == 0 && SLJIT_UDIVI - 0x2 == SLJIT_UDIVMOD, bad_div_opcode_assignments);
769
770 op = GET_OPCODE(op);
771 if ((op | 0x2) == SLJIT_UDIVI) {
772 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
773 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
774 inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0);
775 #else
776 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
777 #endif
778 FAIL_IF(!inst);
779 *inst = XOR_r_rm;
780 }
781
782 if ((op | 0x2) == SLJIT_SDIVI) {
783 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
784 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
785 #endif
786
787 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
788 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
789 FAIL_IF(!inst);
790 INC_SIZE(1);
791 *inst = CDQ;
792 #else
793 if (compiler->mode32) {
794 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
795 FAIL_IF(!inst);
796 INC_SIZE(1);
797 *inst = CDQ;
798 } else {
799 inst = (sljit_ub*)ensure_buf(compiler, 1 + 2);
800 FAIL_IF(!inst);
801 INC_SIZE(2);
802 *inst++ = REX_W;
803 *inst = CDQ;
804 }
805 #endif
806 }
807
808 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
809 inst = (sljit_ub*)ensure_buf(compiler, 1 + 2);
810 FAIL_IF(!inst);
811 INC_SIZE(2);
812 *inst++ = GROUP_F7;
813 *inst = MOD_REG | ((op >= SLJIT_UDIVMOD) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);
814 #else
815 #ifdef _WIN64
816 size = (!compiler->mode32 || op >= SLJIT_UDIVMOD) ? 3 : 2;
817 #else
818 size = (!compiler->mode32) ? 3 : 2;
819 #endif
820 inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
821 FAIL_IF(!inst);
822 INC_SIZE(size);
823 #ifdef _WIN64
824 if (!compiler->mode32)
825 *inst++ = REX_W | ((op >= SLJIT_UDIVMOD) ? REX_B : 0);
826 else if (op >= SLJIT_UDIVMOD)
827 *inst++ = REX_B;
828 *inst++ = GROUP_F7;
829 *inst = MOD_REG | ((op >= SLJIT_UDIVMOD) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);
830 #else
831 if (!compiler->mode32)
832 *inst++ = REX_W;
833 *inst++ = GROUP_F7;
834 *inst = MOD_REG | reg_map[SLJIT_R1];
835 #endif
836 #endif
837 switch (op) {
838 case SLJIT_LUMUL:
839 *inst |= MUL;
840 break;
841 case SLJIT_LSMUL:
842 *inst |= IMUL;
843 break;
844 case SLJIT_UDIVMOD:
845 case SLJIT_UDIVI:
846 *inst |= DIV;
847 break;
848 case SLJIT_SDIVMOD:
849 case SLJIT_SDIVI:
850 *inst |= IDIV;
851 break;
852 }
853 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
854 if (op <= SLJIT_SDIVMOD)
855 EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
856 #else
857 if (op >= SLJIT_UDIVI)
858 EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
859 #endif
860 break;
861 }
862
863 return SLJIT_SUCCESS;
864 }
865
866 #define ENCODE_PREFIX(prefix) \
867 do { \
868 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); \
869 FAIL_IF(!inst); \
870 INC_SIZE(1); \
871 *inst = (prefix); \
872 } while (0)
873
emit_mov_byte(struct sljit_compiler * compiler,sljit_si sign,sljit_si dst,sljit_sw dstw,sljit_si src,sljit_sw srcw)874 static sljit_si emit_mov_byte(struct sljit_compiler *compiler, sljit_si sign,
875 sljit_si dst, sljit_sw dstw,
876 sljit_si src, sljit_sw srcw)
877 {
878 sljit_ub* inst;
879 sljit_si dst_r;
880 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
881 sljit_si work_r;
882 #endif
883
884 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
885 compiler->mode32 = 0;
886 #endif
887
888 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
889 return SLJIT_SUCCESS; /* Empty instruction. */
890
891 if (src & SLJIT_IMM) {
892 if (FAST_IS_REG(dst)) {
893 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
894 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
895 #else
896 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
897 FAIL_IF(!inst);
898 *inst = MOV_rm_i32;
899 return SLJIT_SUCCESS;
900 #endif
901 }
902 inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
903 FAIL_IF(!inst);
904 *inst = MOV_rm8_i8;
905 return SLJIT_SUCCESS;
906 }
907
908 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
909
910 if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) {
911 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
912 if (reg_map[src] >= 4) {
913 SLJIT_ASSERT(dst_r == TMP_REG1);
914 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
915 } else
916 dst_r = src;
917 #else
918 dst_r = src;
919 #endif
920 }
921 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
922 else if (FAST_IS_REG(src) && reg_map[src] >= 4) {
923 /* src, dst are registers. */
924 SLJIT_ASSERT(SLOW_IS_REG(dst));
925 if (reg_map[dst] < 4) {
926 if (dst != src)
927 EMIT_MOV(compiler, dst, 0, src, 0);
928 inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
929 FAIL_IF(!inst);
930 *inst++ = GROUP_0F;
931 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
932 }
933 else {
934 if (dst != src)
935 EMIT_MOV(compiler, dst, 0, src, 0);
936 if (sign) {
937 /* shl reg, 24 */
938 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
939 FAIL_IF(!inst);
940 *inst |= SHL;
941 /* sar reg, 24 */
942 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
943 FAIL_IF(!inst);
944 *inst |= SAR;
945 }
946 else {
947 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
948 FAIL_IF(!inst);
949 *(inst + 1) |= AND;
950 }
951 }
952 return SLJIT_SUCCESS;
953 }
954 #endif
955 else {
956 /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
957 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
958 FAIL_IF(!inst);
959 *inst++ = GROUP_0F;
960 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
961 }
962
963 if (dst & SLJIT_MEM) {
964 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
965 if (dst_r == TMP_REG1) {
966 /* Find a non-used register, whose reg_map[src] < 4. */
967 if ((dst & REG_MASK) == SLJIT_R0) {
968 if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1))
969 work_r = SLJIT_R2;
970 else
971 work_r = SLJIT_R1;
972 }
973 else {
974 if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
975 work_r = SLJIT_R0;
976 else if ((dst & REG_MASK) == SLJIT_R1)
977 work_r = SLJIT_R2;
978 else
979 work_r = SLJIT_R1;
980 }
981
982 if (work_r == SLJIT_R0) {
983 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
984 }
985 else {
986 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
987 FAIL_IF(!inst);
988 *inst = XCHG_r_rm;
989 }
990
991 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
992 FAIL_IF(!inst);
993 *inst = MOV_rm8_r8;
994
995 if (work_r == SLJIT_R0) {
996 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
997 }
998 else {
999 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
1000 FAIL_IF(!inst);
1001 *inst = XCHG_r_rm;
1002 }
1003 }
1004 else {
1005 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1006 FAIL_IF(!inst);
1007 *inst = MOV_rm8_r8;
1008 }
1009 #else
1010 inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
1011 FAIL_IF(!inst);
1012 *inst = MOV_rm8_r8;
1013 #endif
1014 }
1015
1016 return SLJIT_SUCCESS;
1017 }
1018
emit_mov_half(struct sljit_compiler * compiler,sljit_si sign,sljit_si dst,sljit_sw dstw,sljit_si src,sljit_sw srcw)1019 static sljit_si emit_mov_half(struct sljit_compiler *compiler, sljit_si sign,
1020 sljit_si dst, sljit_sw dstw,
1021 sljit_si src, sljit_sw srcw)
1022 {
1023 sljit_ub* inst;
1024 sljit_si dst_r;
1025
1026 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1027 compiler->mode32 = 0;
1028 #endif
1029
1030 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
1031 return SLJIT_SUCCESS; /* Empty instruction. */
1032
1033 if (src & SLJIT_IMM) {
1034 if (FAST_IS_REG(dst)) {
1035 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1036 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
1037 #else
1038 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
1039 FAIL_IF(!inst);
1040 *inst = MOV_rm_i32;
1041 return SLJIT_SUCCESS;
1042 #endif
1043 }
1044 inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
1045 FAIL_IF(!inst);
1046 *inst = MOV_rm_i32;
1047 return SLJIT_SUCCESS;
1048 }
1049
1050 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1051
1052 if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
1053 dst_r = src;
1054 else {
1055 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
1056 FAIL_IF(!inst);
1057 *inst++ = GROUP_0F;
1058 *inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16;
1059 }
1060
1061 if (dst & SLJIT_MEM) {
1062 inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
1063 FAIL_IF(!inst);
1064 *inst = MOV_rm_r;
1065 }
1066
1067 return SLJIT_SUCCESS;
1068 }
1069
emit_unary(struct sljit_compiler * compiler,sljit_ub opcode,sljit_si dst,sljit_sw dstw,sljit_si src,sljit_sw srcw)1070 static sljit_si emit_unary(struct sljit_compiler *compiler, sljit_ub opcode,
1071 sljit_si dst, sljit_sw dstw,
1072 sljit_si src, sljit_sw srcw)
1073 {
1074 sljit_ub* inst;
1075
1076 if (dst == SLJIT_UNUSED) {
1077 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1078 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1079 FAIL_IF(!inst);
1080 *inst++ = GROUP_F7;
1081 *inst |= opcode;
1082 return SLJIT_SUCCESS;
1083 }
1084 if (dst == src && dstw == srcw) {
1085 /* Same input and output */
1086 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1087 FAIL_IF(!inst);
1088 *inst++ = GROUP_F7;
1089 *inst |= opcode;
1090 return SLJIT_SUCCESS;
1091 }
1092 if (FAST_IS_REG(dst)) {
1093 EMIT_MOV(compiler, dst, 0, src, srcw);
1094 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1095 FAIL_IF(!inst);
1096 *inst++ = GROUP_F7;
1097 *inst |= opcode;
1098 return SLJIT_SUCCESS;
1099 }
1100 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1101 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1102 FAIL_IF(!inst);
1103 *inst++ = GROUP_F7;
1104 *inst |= opcode;
1105 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1106 return SLJIT_SUCCESS;
1107 }
1108
emit_not_with_flags(struct sljit_compiler * compiler,sljit_si dst,sljit_sw dstw,sljit_si src,sljit_sw srcw)1109 static sljit_si emit_not_with_flags(struct sljit_compiler *compiler,
1110 sljit_si dst, sljit_sw dstw,
1111 sljit_si src, sljit_sw srcw)
1112 {
1113 sljit_ub* inst;
1114
1115 if (dst == SLJIT_UNUSED) {
1116 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1117 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1118 FAIL_IF(!inst);
1119 *inst++ = GROUP_F7;
1120 *inst |= NOT_rm;
1121 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1122 FAIL_IF(!inst);
1123 *inst = OR_r_rm;
1124 return SLJIT_SUCCESS;
1125 }
1126 if (FAST_IS_REG(dst)) {
1127 EMIT_MOV(compiler, dst, 0, src, srcw);
1128 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1129 FAIL_IF(!inst);
1130 *inst++ = GROUP_F7;
1131 *inst |= NOT_rm;
1132 inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
1133 FAIL_IF(!inst);
1134 *inst = OR_r_rm;
1135 return SLJIT_SUCCESS;
1136 }
1137 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1138 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1139 FAIL_IF(!inst);
1140 *inst++ = GROUP_F7;
1141 *inst |= NOT_rm;
1142 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1143 FAIL_IF(!inst);
1144 *inst = OR_r_rm;
1145 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1146 return SLJIT_SUCCESS;
1147 }
1148
emit_clz(struct sljit_compiler * compiler,sljit_si op_flags,sljit_si dst,sljit_sw dstw,sljit_si src,sljit_sw srcw)1149 static sljit_si emit_clz(struct sljit_compiler *compiler, sljit_si op_flags,
1150 sljit_si dst, sljit_sw dstw,
1151 sljit_si src, sljit_sw srcw)
1152 {
1153 sljit_ub* inst;
1154 sljit_si dst_r;
1155
1156 SLJIT_UNUSED_ARG(op_flags);
1157 if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
1158 /* Just set the zero flag. */
1159 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1160 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1161 FAIL_IF(!inst);
1162 *inst++ = GROUP_F7;
1163 *inst |= NOT_rm;
1164 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1165 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REG1, 0);
1166 #else
1167 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, TMP_REG1, 0);
1168 #endif
1169 FAIL_IF(!inst);
1170 *inst |= SHR;
1171 return SLJIT_SUCCESS;
1172 }
1173
1174 if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
1175 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
1176 src = TMP_REG1;
1177 srcw = 0;
1178 }
1179
1180 inst = emit_x86_instruction(compiler, 2, TMP_REG1, 0, src, srcw);
1181 FAIL_IF(!inst);
1182 *inst++ = GROUP_0F;
1183 *inst = BSR_r_rm;
1184
1185 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1186 if (FAST_IS_REG(dst))
1187 dst_r = dst;
1188 else {
1189 /* Find an unused temporary register. */
1190 if ((dst & REG_MASK) != SLJIT_R0 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
1191 dst_r = SLJIT_R0;
1192 else if ((dst & REG_MASK) != SLJIT_R1 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R1))
1193 dst_r = SLJIT_R1;
1194 else
1195 dst_r = SLJIT_R2;
1196 EMIT_MOV(compiler, dst, dstw, dst_r, 0);
1197 }
1198 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31);
1199 #else
1200 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
1201 compiler->mode32 = 0;
1202 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 64 + 63 : 32 + 31);
1203 compiler->mode32 = op_flags & SLJIT_INT_OP;
1204 #endif
1205
1206 if (cpu_has_cmov == -1)
1207 get_cpu_features();
1208
1209 if (cpu_has_cmov) {
1210 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
1211 FAIL_IF(!inst);
1212 *inst++ = GROUP_0F;
1213 *inst = CMOVNE_r_rm;
1214 } else {
1215 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1216 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1217 FAIL_IF(!inst);
1218 INC_SIZE(4);
1219
1220 *inst++ = JE_i8;
1221 *inst++ = 2;
1222 *inst++ = MOV_r_rm;
1223 *inst++ = MOD_REG | (reg_map[dst_r] << 3) | reg_map[TMP_REG1];
1224 #else
1225 inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
1226 FAIL_IF(!inst);
1227 INC_SIZE(5);
1228
1229 *inst++ = JE_i8;
1230 *inst++ = 3;
1231 *inst++ = REX_W | (reg_map[dst_r] >= 8 ? REX_R : 0) | (reg_map[TMP_REG1] >= 8 ? REX_B : 0);
1232 *inst++ = MOV_r_rm;
1233 *inst++ = MOD_REG | (reg_lmap[dst_r] << 3) | reg_lmap[TMP_REG1];
1234 #endif
1235 }
1236
1237 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1238 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
1239 #else
1240 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, dst_r, 0);
1241 #endif
1242 FAIL_IF(!inst);
1243 *(inst + 1) |= XOR;
1244
1245 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1246 if (dst & SLJIT_MEM) {
1247 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1248 FAIL_IF(!inst);
1249 *inst = XCHG_r_rm;
1250 }
1251 #else
1252 if (dst & SLJIT_MEM)
1253 EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0);
1254 #endif
1255 return SLJIT_SUCCESS;
1256 }
1257
sljit_emit_op1(struct sljit_compiler * compiler,sljit_si op,sljit_si dst,sljit_sw dstw,sljit_si src,sljit_sw srcw)1258 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op,
1259 sljit_si dst, sljit_sw dstw,
1260 sljit_si src, sljit_sw srcw)
1261 {
1262 sljit_ub* inst;
1263 sljit_si update = 0;
1264 sljit_si op_flags = GET_ALL_FLAGS(op);
1265 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1266 sljit_si dst_is_ereg = 0;
1267 sljit_si src_is_ereg = 0;
1268 #else
1269 # define src_is_ereg 0
1270 #endif
1271
1272 CHECK_ERROR();
1273 CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1274 ADJUST_LOCAL_OFFSET(dst, dstw);
1275 ADJUST_LOCAL_OFFSET(src, srcw);
1276
1277 CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
1278 CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1);
1279 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1280 compiler->mode32 = op_flags & SLJIT_INT_OP;
1281 #endif
1282
1283 op = GET_OPCODE(op);
1284 if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) {
1285 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1286 compiler->mode32 = 0;
1287 #endif
1288
1289 if (op_flags & SLJIT_INT_OP) {
1290 if (FAST_IS_REG(src) && src == dst) {
1291 if (!TYPE_CAST_NEEDED(op))
1292 return SLJIT_SUCCESS;
1293 }
1294 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1295 if (op == SLJIT_MOV_SI && (src & SLJIT_MEM))
1296 op = SLJIT_MOV_UI;
1297 if (op == SLJIT_MOVU_SI && (src & SLJIT_MEM))
1298 op = SLJIT_MOVU_UI;
1299 if (op == SLJIT_MOV_UI && (src & SLJIT_IMM))
1300 op = SLJIT_MOV_SI;
1301 if (op == SLJIT_MOVU_UI && (src & SLJIT_IMM))
1302 op = SLJIT_MOVU_SI;
1303 #endif
1304 }
1305
1306 SLJIT_COMPILE_ASSERT(SLJIT_MOV + 8 == SLJIT_MOVU, movu_offset);
1307 if (op >= SLJIT_MOVU) {
1308 update = 1;
1309 op -= 8;
1310 }
1311
1312 if (src & SLJIT_IMM) {
1313 switch (op) {
1314 case SLJIT_MOV_UB:
1315 srcw = (sljit_ub)srcw;
1316 break;
1317 case SLJIT_MOV_SB:
1318 srcw = (sljit_sb)srcw;
1319 break;
1320 case SLJIT_MOV_UH:
1321 srcw = (sljit_uh)srcw;
1322 break;
1323 case SLJIT_MOV_SH:
1324 srcw = (sljit_sh)srcw;
1325 break;
1326 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1327 case SLJIT_MOV_UI:
1328 srcw = (sljit_ui)srcw;
1329 break;
1330 case SLJIT_MOV_SI:
1331 srcw = (sljit_si)srcw;
1332 break;
1333 #endif
1334 }
1335 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1336 if (SLJIT_UNLIKELY(dst_is_ereg))
1337 return emit_mov(compiler, dst, dstw, src, srcw);
1338 #endif
1339 }
1340
1341 if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & REG_MASK) && (srcw != 0 || (src & OFFS_REG_MASK) != 0)) {
1342 inst = emit_x86_instruction(compiler, 1, src & REG_MASK, 0, src, srcw);
1343 FAIL_IF(!inst);
1344 *inst = LEA_r_m;
1345 src &= SLJIT_MEM | 0xf;
1346 srcw = 0;
1347 }
1348
1349 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1350 if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
1351 SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));
1352 dst = TMP_REG1;
1353 }
1354 #endif
1355
1356 switch (op) {
1357 case SLJIT_MOV:
1358 case SLJIT_MOV_P:
1359 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1360 case SLJIT_MOV_UI:
1361 case SLJIT_MOV_SI:
1362 #endif
1363 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
1364 break;
1365 case SLJIT_MOV_UB:
1366 FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));
1367 break;
1368 case SLJIT_MOV_SB:
1369 FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));
1370 break;
1371 case SLJIT_MOV_UH:
1372 FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));
1373 break;
1374 case SLJIT_MOV_SH:
1375 FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));
1376 break;
1377 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1378 case SLJIT_MOV_UI:
1379 FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));
1380 break;
1381 case SLJIT_MOV_SI:
1382 FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));
1383 break;
1384 #endif
1385 }
1386
1387 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1388 if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
1389 return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);
1390 #endif
1391
1392 if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK) && (dstw != 0 || (dst & OFFS_REG_MASK) != 0)) {
1393 inst = emit_x86_instruction(compiler, 1, dst & REG_MASK, 0, dst, dstw);
1394 FAIL_IF(!inst);
1395 *inst = LEA_r_m;
1396 }
1397 return SLJIT_SUCCESS;
1398 }
1399
1400 if (SLJIT_UNLIKELY(GET_FLAGS(op_flags)))
1401 compiler->flags_saved = 0;
1402
1403 switch (op) {
1404 case SLJIT_NOT:
1405 if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_E))
1406 return emit_not_with_flags(compiler, dst, dstw, src, srcw);
1407 return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
1408
1409 case SLJIT_NEG:
1410 if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1411 FAIL_IF(emit_save_flags(compiler));
1412 return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw);
1413
1414 case SLJIT_CLZ:
1415 if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1416 FAIL_IF(emit_save_flags(compiler));
1417 return emit_clz(compiler, op_flags, dst, dstw, src, srcw);
1418 }
1419
1420 return SLJIT_SUCCESS;
1421
1422 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1423 # undef src_is_ereg
1424 #endif
1425 }
1426
1427 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1428
1429 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1430 if (IS_HALFWORD(immw) || compiler->mode32) { \
1431 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1432 FAIL_IF(!inst); \
1433 *(inst + 1) |= (op_imm); \
1434 } \
1435 else { \
1436 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \
1437 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \
1438 FAIL_IF(!inst); \
1439 *inst = (op_mr); \
1440 }
1441
1442 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1443 FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
1444
1445 #else
1446
1447 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1448 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1449 FAIL_IF(!inst); \
1450 *(inst + 1) |= (op_imm);
1451
1452 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1453 FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
1454
1455 #endif
1456
emit_cum_binary(struct sljit_compiler * compiler,sljit_ub op_rm,sljit_ub op_mr,sljit_ub op_imm,sljit_ub op_eax_imm,sljit_si dst,sljit_sw dstw,sljit_si src1,sljit_sw src1w,sljit_si src2,sljit_sw src2w)1457 static sljit_si emit_cum_binary(struct sljit_compiler *compiler,
1458 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
1459 sljit_si dst, sljit_sw dstw,
1460 sljit_si src1, sljit_sw src1w,
1461 sljit_si src2, sljit_sw src2w)
1462 {
1463 sljit_ub* inst;
1464
1465 if (dst == SLJIT_UNUSED) {
1466 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1467 if (src2 & SLJIT_IMM) {
1468 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1469 }
1470 else {
1471 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1472 FAIL_IF(!inst);
1473 *inst = op_rm;
1474 }
1475 return SLJIT_SUCCESS;
1476 }
1477
1478 if (dst == src1 && dstw == src1w) {
1479 if (src2 & SLJIT_IMM) {
1480 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1481 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1482 #else
1483 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1484 #endif
1485 BINARY_EAX_IMM(op_eax_imm, src2w);
1486 }
1487 else {
1488 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1489 }
1490 }
1491 else if (FAST_IS_REG(dst)) {
1492 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1493 FAIL_IF(!inst);
1494 *inst = op_rm;
1495 }
1496 else if (FAST_IS_REG(src2)) {
1497 /* Special exception for sljit_emit_op_flags. */
1498 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1499 FAIL_IF(!inst);
1500 *inst = op_mr;
1501 }
1502 else {
1503 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1504 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1505 FAIL_IF(!inst);
1506 *inst = op_mr;
1507 }
1508 return SLJIT_SUCCESS;
1509 }
1510
1511 /* Only for cumulative operations. */
1512 if (dst == src2 && dstw == src2w) {
1513 if (src1 & SLJIT_IMM) {
1514 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1515 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1516 #else
1517 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) {
1518 #endif
1519 BINARY_EAX_IMM(op_eax_imm, src1w);
1520 }
1521 else {
1522 BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
1523 }
1524 }
1525 else if (FAST_IS_REG(dst)) {
1526 inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
1527 FAIL_IF(!inst);
1528 *inst = op_rm;
1529 }
1530 else if (FAST_IS_REG(src1)) {
1531 inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
1532 FAIL_IF(!inst);
1533 *inst = op_mr;
1534 }
1535 else {
1536 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1537 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1538 FAIL_IF(!inst);
1539 *inst = op_mr;
1540 }
1541 return SLJIT_SUCCESS;
1542 }
1543
1544 /* General version. */
1545 if (FAST_IS_REG(dst)) {
1546 EMIT_MOV(compiler, dst, 0, src1, src1w);
1547 if (src2 & SLJIT_IMM) {
1548 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1549 }
1550 else {
1551 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1552 FAIL_IF(!inst);
1553 *inst = op_rm;
1554 }
1555 }
1556 else {
1557 /* This version requires less memory writing. */
1558 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1559 if (src2 & SLJIT_IMM) {
1560 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1561 }
1562 else {
1563 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1564 FAIL_IF(!inst);
1565 *inst = op_rm;
1566 }
1567 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1568 }
1569
1570 return SLJIT_SUCCESS;
1571 }
1572
1573 static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler,
1574 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
1575 sljit_si dst, sljit_sw dstw,
1576 sljit_si src1, sljit_sw src1w,
1577 sljit_si src2, sljit_sw src2w)
1578 {
1579 sljit_ub* inst;
1580
1581 if (dst == SLJIT_UNUSED) {
1582 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1583 if (src2 & SLJIT_IMM) {
1584 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1585 }
1586 else {
1587 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1588 FAIL_IF(!inst);
1589 *inst = op_rm;
1590 }
1591 return SLJIT_SUCCESS;
1592 }
1593
1594 if (dst == src1 && dstw == src1w) {
1595 if (src2 & SLJIT_IMM) {
1596 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1597 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1598 #else
1599 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1600 #endif
1601 BINARY_EAX_IMM(op_eax_imm, src2w);
1602 }
1603 else {
1604 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1605 }
1606 }
1607 else if (FAST_IS_REG(dst)) {
1608 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1609 FAIL_IF(!inst);
1610 *inst = op_rm;
1611 }
1612 else if (FAST_IS_REG(src2)) {
1613 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1614 FAIL_IF(!inst);
1615 *inst = op_mr;
1616 }
1617 else {
1618 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1619 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1620 FAIL_IF(!inst);
1621 *inst = op_mr;
1622 }
1623 return SLJIT_SUCCESS;
1624 }
1625
1626 /* General version. */
1627 if (FAST_IS_REG(dst) && dst != src2) {
1628 EMIT_MOV(compiler, dst, 0, src1, src1w);
1629 if (src2 & SLJIT_IMM) {
1630 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1631 }
1632 else {
1633 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1634 FAIL_IF(!inst);
1635 *inst = op_rm;
1636 }
1637 }
1638 else {
1639 /* This version requires less memory writing. */
1640 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1641 if (src2 & SLJIT_IMM) {
1642 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1643 }
1644 else {
1645 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1646 FAIL_IF(!inst);
1647 *inst = op_rm;
1648 }
1649 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1650 }
1651
1652 return SLJIT_SUCCESS;
1653 }
1654
1655 static sljit_si emit_mul(struct sljit_compiler *compiler,
1656 sljit_si dst, sljit_sw dstw,
1657 sljit_si src1, sljit_sw src1w,
1658 sljit_si src2, sljit_sw src2w)
1659 {
1660 sljit_ub* inst;
1661 sljit_si dst_r;
1662
1663 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1664
1665 /* Register destination. */
1666 if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
1667 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1668 FAIL_IF(!inst);
1669 *inst++ = GROUP_0F;
1670 *inst = IMUL_r_rm;
1671 }
1672 else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
1673 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
1674 FAIL_IF(!inst);
1675 *inst++ = GROUP_0F;
1676 *inst = IMUL_r_rm;
1677 }
1678 else if (src1 & SLJIT_IMM) {
1679 if (src2 & SLJIT_IMM) {
1680 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
1681 src2 = dst_r;
1682 src2w = 0;
1683 }
1684
1685 if (src1w <= 127 && src1w >= -128) {
1686 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1687 FAIL_IF(!inst);
1688 *inst = IMUL_r_rm_i8;
1689 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
1690 FAIL_IF(!inst);
1691 INC_SIZE(1);
1692 *inst = (sljit_sb)src1w;
1693 }
1694 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1695 else {
1696 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1697 FAIL_IF(!inst);
1698 *inst = IMUL_r_rm_i32;
1699 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1700 FAIL_IF(!inst);
1701 INC_SIZE(4);
1702 *(sljit_sw*)inst = src1w;
1703 }
1704 #else
1705 else if (IS_HALFWORD(src1w)) {
1706 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1707 FAIL_IF(!inst);
1708 *inst = IMUL_r_rm_i32;
1709 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1710 FAIL_IF(!inst);
1711 INC_SIZE(4);
1712 *(sljit_si*)inst = (sljit_si)src1w;
1713 }
1714 else {
1715 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
1716 if (dst_r != src2)
1717 EMIT_MOV(compiler, dst_r, 0, src2, src2w);
1718 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1719 FAIL_IF(!inst);
1720 *inst++ = GROUP_0F;
1721 *inst = IMUL_r_rm;
1722 }
1723 #endif
1724 }
1725 else if (src2 & SLJIT_IMM) {
1726 /* Note: src1 is NOT immediate. */
1727
1728 if (src2w <= 127 && src2w >= -128) {
1729 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1730 FAIL_IF(!inst);
1731 *inst = IMUL_r_rm_i8;
1732 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
1733 FAIL_IF(!inst);
1734 INC_SIZE(1);
1735 *inst = (sljit_sb)src2w;
1736 }
1737 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1738 else {
1739 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1740 FAIL_IF(!inst);
1741 *inst = IMUL_r_rm_i32;
1742 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1743 FAIL_IF(!inst);
1744 INC_SIZE(4);
1745 *(sljit_sw*)inst = src2w;
1746 }
1747 #else
1748 else if (IS_HALFWORD(src2w)) {
1749 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1750 FAIL_IF(!inst);
1751 *inst = IMUL_r_rm_i32;
1752 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1753 FAIL_IF(!inst);
1754 INC_SIZE(4);
1755 *(sljit_si*)inst = (sljit_si)src2w;
1756 }
1757 else {
1758 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src2w);
1759 if (dst_r != src1)
1760 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1761 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1762 FAIL_IF(!inst);
1763 *inst++ = GROUP_0F;
1764 *inst = IMUL_r_rm;
1765 }
1766 #endif
1767 }
1768 else {
1769 /* Neither argument is immediate. */
1770 if (ADDRESSING_DEPENDS_ON(src2, dst_r))
1771 dst_r = TMP_REG1;
1772 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1773 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1774 FAIL_IF(!inst);
1775 *inst++ = GROUP_0F;
1776 *inst = IMUL_r_rm;
1777 }
1778
1779 if (dst_r == TMP_REG1)
1780 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1781
1782 return SLJIT_SUCCESS;
1783 }
1784
1785 static sljit_si emit_lea_binary(struct sljit_compiler *compiler, sljit_si keep_flags,
1786 sljit_si dst, sljit_sw dstw,
1787 sljit_si src1, sljit_sw src1w,
1788 sljit_si src2, sljit_sw src2w)
1789 {
1790 sljit_ub* inst;
1791 sljit_si dst_r, done = 0;
1792
1793 /* These cases better be left to handled by normal way. */
1794 if (!keep_flags) {
1795 if (dst == src1 && dstw == src1w)
1796 return SLJIT_ERR_UNSUPPORTED;
1797 if (dst == src2 && dstw == src2w)
1798 return SLJIT_ERR_UNSUPPORTED;
1799 }
1800
1801 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1802
1803 if (FAST_IS_REG(src1)) {
1804 if (FAST_IS_REG(src2)) {
1805 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
1806 FAIL_IF(!inst);
1807 *inst = LEA_r_m;
1808 done = 1;
1809 }
1810 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1811 if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1812 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_si)src2w);
1813 #else
1814 if (src2 & SLJIT_IMM) {
1815 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
1816 #endif
1817 FAIL_IF(!inst);
1818 *inst = LEA_r_m;
1819 done = 1;
1820 }
1821 }
1822 else if (FAST_IS_REG(src2)) {
1823 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1824 if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1825 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_si)src1w);
1826 #else
1827 if (src1 & SLJIT_IMM) {
1828 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
1829 #endif
1830 FAIL_IF(!inst);
1831 *inst = LEA_r_m;
1832 done = 1;
1833 }
1834 }
1835
1836 if (done) {
1837 if (dst_r == TMP_REG1)
1838 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
1839 return SLJIT_SUCCESS;
1840 }
1841 return SLJIT_ERR_UNSUPPORTED;
1842 }
1843
1844 static sljit_si emit_cmp_binary(struct sljit_compiler *compiler,
1845 sljit_si src1, sljit_sw src1w,
1846 sljit_si src2, sljit_sw src2w)
1847 {
1848 sljit_ub* inst;
1849
1850 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1851 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1852 #else
1853 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1854 #endif
1855 BINARY_EAX_IMM(CMP_EAX_i32, src2w);
1856 return SLJIT_SUCCESS;
1857 }
1858
1859 if (FAST_IS_REG(src1)) {
1860 if (src2 & SLJIT_IMM) {
1861 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
1862 }
1863 else {
1864 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1865 FAIL_IF(!inst);
1866 *inst = CMP_r_rm;
1867 }
1868 return SLJIT_SUCCESS;
1869 }
1870
1871 if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) {
1872 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1873 FAIL_IF(!inst);
1874 *inst = CMP_rm_r;
1875 return SLJIT_SUCCESS;
1876 }
1877
1878 if (src2 & SLJIT_IMM) {
1879 if (src1 & SLJIT_IMM) {
1880 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1881 src1 = TMP_REG1;
1882 src1w = 0;
1883 }
1884 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);
1885 }
1886 else {
1887 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1888 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1889 FAIL_IF(!inst);
1890 *inst = CMP_r_rm;
1891 }
1892 return SLJIT_SUCCESS;
1893 }
1894
1895 static sljit_si emit_test_binary(struct sljit_compiler *compiler,
1896 sljit_si src1, sljit_sw src1w,
1897 sljit_si src2, sljit_sw src2w)
1898 {
1899 sljit_ub* inst;
1900
1901 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1902 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1903 #else
1904 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1905 #endif
1906 BINARY_EAX_IMM(TEST_EAX_i32, src2w);
1907 return SLJIT_SUCCESS;
1908 }
1909
1910 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1911 if (src2 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1912 #else
1913 if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
1914 #endif
1915 BINARY_EAX_IMM(TEST_EAX_i32, src1w);
1916 return SLJIT_SUCCESS;
1917 }
1918
1919 if (!(src1 & SLJIT_IMM)) {
1920 if (src2 & SLJIT_IMM) {
1921 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1922 if (IS_HALFWORD(src2w) || compiler->mode32) {
1923 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
1924 FAIL_IF(!inst);
1925 *inst = GROUP_F7;
1926 }
1927 else {
1928 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1929 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, src1w);
1930 FAIL_IF(!inst);
1931 *inst = TEST_rm_r;
1932 }
1933 #else
1934 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
1935 FAIL_IF(!inst);
1936 *inst = GROUP_F7;
1937 #endif
1938 return SLJIT_SUCCESS;
1939 }
1940 else if (FAST_IS_REG(src1)) {
1941 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1942 FAIL_IF(!inst);
1943 *inst = TEST_rm_r;
1944 return SLJIT_SUCCESS;
1945 }
1946 }
1947
1948 if (!(src2 & SLJIT_IMM)) {
1949 if (src1 & SLJIT_IMM) {
1950 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1951 if (IS_HALFWORD(src1w) || compiler->mode32) {
1952 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w);
1953 FAIL_IF(!inst);
1954 *inst = GROUP_F7;
1955 }
1956 else {
1957 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
1958 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, src2w);
1959 FAIL_IF(!inst);
1960 *inst = TEST_rm_r;
1961 }
1962 #else
1963 inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w);
1964 FAIL_IF(!inst);
1965 *inst = GROUP_F7;
1966 #endif
1967 return SLJIT_SUCCESS;
1968 }
1969 else if (FAST_IS_REG(src2)) {
1970 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1971 FAIL_IF(!inst);
1972 *inst = TEST_rm_r;
1973 return SLJIT_SUCCESS;
1974 }
1975 }
1976
1977 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1978 if (src2 & SLJIT_IMM) {
1979 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1980 if (IS_HALFWORD(src2w) || compiler->mode32) {
1981 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
1982 FAIL_IF(!inst);
1983 *inst = GROUP_F7;
1984 }
1985 else {
1986 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1987 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0);
1988 FAIL_IF(!inst);
1989 *inst = TEST_rm_r;
1990 }
1991 #else
1992 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
1993 FAIL_IF(!inst);
1994 *inst = GROUP_F7;
1995 #endif
1996 }
1997 else {
1998 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1999 FAIL_IF(!inst);
2000 *inst = TEST_rm_r;
2001 }
2002 return SLJIT_SUCCESS;
2003 }
2004
2005 static sljit_si emit_shift(struct sljit_compiler *compiler,
2006 sljit_ub mode,
2007 sljit_si dst, sljit_sw dstw,
2008 sljit_si src1, sljit_sw src1w,
2009 sljit_si src2, sljit_sw src2w)
2010 {
2011 sljit_ub* inst;
2012
2013 if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
2014 if (dst == src1 && dstw == src1w) {
2015 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
2016 FAIL_IF(!inst);
2017 *inst |= mode;
2018 return SLJIT_SUCCESS;
2019 }
2020 if (dst == SLJIT_UNUSED) {
2021 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2022 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2023 FAIL_IF(!inst);
2024 *inst |= mode;
2025 return SLJIT_SUCCESS;
2026 }
2027 if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
2028 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2029 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2030 FAIL_IF(!inst);
2031 *inst |= mode;
2032 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2033 return SLJIT_SUCCESS;
2034 }
2035 if (FAST_IS_REG(dst)) {
2036 EMIT_MOV(compiler, dst, 0, src1, src1w);
2037 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
2038 FAIL_IF(!inst);
2039 *inst |= mode;
2040 return SLJIT_SUCCESS;
2041 }
2042
2043 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2044 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2045 FAIL_IF(!inst);
2046 *inst |= mode;
2047 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2048 return SLJIT_SUCCESS;
2049 }
2050
2051 if (dst == SLJIT_PREF_SHIFT_REG) {
2052 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2053 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2054 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2055 FAIL_IF(!inst);
2056 *inst |= mode;
2057 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2058 }
2059 else if (FAST_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
2060 if (src1 != dst)
2061 EMIT_MOV(compiler, dst, 0, src1, src1w);
2062 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2063 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2064 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
2065 FAIL_IF(!inst);
2066 *inst |= mode;
2067 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2068 }
2069 else {
2070 /* This case is really difficult, since ecx itself may used for
2071 addressing, and we must ensure to work even in that case. */
2072 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2073 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2074 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
2075 #else
2076 /* [esp+0] contains the flags. */
2077 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw), SLJIT_PREF_SHIFT_REG, 0);
2078 #endif
2079 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2080 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2081 FAIL_IF(!inst);
2082 *inst |= mode;
2083 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2084 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
2085 #else
2086 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw));
2087 #endif
2088 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2089 }
2090
2091 return SLJIT_SUCCESS;
2092 }
2093
2094 static sljit_si emit_shift_with_flags(struct sljit_compiler *compiler,
2095 sljit_ub mode, sljit_si set_flags,
2096 sljit_si dst, sljit_sw dstw,
2097 sljit_si src1, sljit_sw src1w,
2098 sljit_si src2, sljit_sw src2w)
2099 {
2100 /* The CPU does not set flags if the shift count is 0. */
2101 if (src2 & SLJIT_IMM) {
2102 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2103 if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
2104 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2105 #else
2106 if ((src2w & 0x1f) != 0)
2107 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2108 #endif
2109 if (!set_flags)
2110 return emit_mov(compiler, dst, dstw, src1, src1w);
2111 /* OR dst, src, 0 */
2112 return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
2113 dst, dstw, src1, src1w, SLJIT_IMM, 0);
2114 }
2115
2116 if (!set_flags)
2117 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2118
2119 if (!FAST_IS_REG(dst))
2120 FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
2121
2122 FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w));
2123
2124 if (FAST_IS_REG(dst))
2125 return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
2126 return SLJIT_SUCCESS;
2127 }
2128
2129 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op,
2130 sljit_si dst, sljit_sw dstw,
2131 sljit_si src1, sljit_sw src1w,
2132 sljit_si src2, sljit_sw src2w)
2133 {
2134 CHECK_ERROR();
2135 CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2136 ADJUST_LOCAL_OFFSET(dst, dstw);
2137 ADJUST_LOCAL_OFFSET(src1, src1w);
2138 ADJUST_LOCAL_OFFSET(src2, src2w);
2139
2140 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2141 CHECK_EXTRA_REGS(src1, src1w, (void)0);
2142 CHECK_EXTRA_REGS(src2, src2w, (void)0);
2143 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2144 compiler->mode32 = op & SLJIT_INT_OP;
2145 #endif
2146
2147 if (GET_OPCODE(op) >= SLJIT_MUL) {
2148 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2149 compiler->flags_saved = 0;
2150 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2151 FAIL_IF(emit_save_flags(compiler));
2152 }
2153
2154 switch (GET_OPCODE(op)) {
2155 case SLJIT_ADD:
2156 if (!GET_FLAGS(op)) {
2157 if (emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
2158 return compiler->error;
2159 }
2160 else
2161 compiler->flags_saved = 0;
2162 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2163 FAIL_IF(emit_save_flags(compiler));
2164 return emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
2165 dst, dstw, src1, src1w, src2, src2w);
2166 case SLJIT_ADDC:
2167 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
2168 FAIL_IF(emit_restore_flags(compiler, 1));
2169 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
2170 FAIL_IF(emit_save_flags(compiler));
2171 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2172 compiler->flags_saved = 0;
2173 return emit_cum_binary(compiler, ADC_r_rm, ADC_rm_r, ADC, ADC_EAX_i32,
2174 dst, dstw, src1, src1w, src2, src2w);
2175 case SLJIT_SUB:
2176 if (!GET_FLAGS(op)) {
2177 if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
2178 return compiler->error;
2179 }
2180 else
2181 compiler->flags_saved = 0;
2182 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2183 FAIL_IF(emit_save_flags(compiler));
2184 if (dst == SLJIT_UNUSED)
2185 return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
2186 return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
2187 dst, dstw, src1, src1w, src2, src2w);
2188 case SLJIT_SUBC:
2189 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
2190 FAIL_IF(emit_restore_flags(compiler, 1));
2191 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
2192 FAIL_IF(emit_save_flags(compiler));
2193 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2194 compiler->flags_saved = 0;
2195 return emit_non_cum_binary(compiler, SBB_r_rm, SBB_rm_r, SBB, SBB_EAX_i32,
2196 dst, dstw, src1, src1w, src2, src2w);
2197 case SLJIT_MUL:
2198 return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
2199 case SLJIT_AND:
2200 if (dst == SLJIT_UNUSED)
2201 return emit_test_binary(compiler, src1, src1w, src2, src2w);
2202 return emit_cum_binary(compiler, AND_r_rm, AND_rm_r, AND, AND_EAX_i32,
2203 dst, dstw, src1, src1w, src2, src2w);
2204 case SLJIT_OR:
2205 return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
2206 dst, dstw, src1, src1w, src2, src2w);
2207 case SLJIT_XOR:
2208 return emit_cum_binary(compiler, XOR_r_rm, XOR_rm_r, XOR, XOR_EAX_i32,
2209 dst, dstw, src1, src1w, src2, src2w);
2210 case SLJIT_SHL:
2211 return emit_shift_with_flags(compiler, SHL, GET_FLAGS(op),
2212 dst, dstw, src1, src1w, src2, src2w);
2213 case SLJIT_LSHR:
2214 return emit_shift_with_flags(compiler, SHR, GET_FLAGS(op),
2215 dst, dstw, src1, src1w, src2, src2w);
2216 case SLJIT_ASHR:
2217 return emit_shift_with_flags(compiler, SAR, GET_FLAGS(op),
2218 dst, dstw, src1, src1w, src2, src2w);
2219 }
2220
2221 return SLJIT_SUCCESS;
2222 }
2223
2224 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
2225 {
2226 CHECK_REG_INDEX(check_sljit_get_register_index(reg));
2227 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2228 if (reg >= SLJIT_R3 && reg <= SLJIT_R6)
2229 return -1;
2230 #endif
2231 return reg_map[reg];
2232 }
2233
2234 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
2235 {
2236 CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
2237 return reg;
2238 }
2239
2240 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
2241 void *instruction, sljit_si size)
2242 {
2243 sljit_ub *inst;
2244
2245 CHECK_ERROR();
2246 CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2247
2248 inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
2249 FAIL_IF(!inst);
2250 INC_SIZE(size);
2251 SLJIT_MEMMOVE(inst, instruction, size);
2252 return SLJIT_SUCCESS;
2253 }
2254
2255 /* --------------------------------------------------------------------- */
2256 /* Floating point operators */
2257 /* --------------------------------------------------------------------- */
2258
2259 /* Alignment + 2 * 16 bytes. */
2260 static sljit_si sse2_data[3 + (4 + 4) * 2];
2261 static sljit_si *sse2_buffer;
2262
2263 static void init_compiler(void)
2264 {
2265 sse2_buffer = (sljit_si*)(((sljit_uw)sse2_data + 15) & ~0xf);
2266 /* Single precision constants. */
2267 sse2_buffer[0] = 0x80000000;
2268 sse2_buffer[4] = 0x7fffffff;
2269 /* Double precision constants. */
2270 sse2_buffer[8] = 0;
2271 sse2_buffer[9] = 0x80000000;
2272 sse2_buffer[12] = 0xffffffff;
2273 sse2_buffer[13] = 0x7fffffff;
2274 }
2275
2276 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
2277 {
2278 #ifdef SLJIT_IS_FPU_AVAILABLE
2279 return SLJIT_IS_FPU_AVAILABLE;
2280 #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
2281 if (cpu_has_sse2 == -1)
2282 get_cpu_features();
2283 return cpu_has_sse2;
2284 #else /* SLJIT_DETECT_SSE2 */
2285 return 1;
2286 #endif /* SLJIT_DETECT_SSE2 */
2287 }
2288
2289 static sljit_si emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode,
2290 sljit_si single, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w)
2291 {
2292 sljit_ub *inst;
2293
2294 inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2295 FAIL_IF(!inst);
2296 *inst++ = GROUP_0F;
2297 *inst = opcode;
2298 return SLJIT_SUCCESS;
2299 }
2300
2301 static sljit_si emit_sse2_logic(struct sljit_compiler *compiler, sljit_ub opcode,
2302 sljit_si pref66, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w)
2303 {
2304 sljit_ub *inst;
2305
2306 inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2307 FAIL_IF(!inst);
2308 *inst++ = GROUP_0F;
2309 *inst = opcode;
2310 return SLJIT_SUCCESS;
2311 }
2312
2313 static SLJIT_INLINE sljit_si emit_sse2_load(struct sljit_compiler *compiler,
2314 sljit_si single, sljit_si dst, sljit_si src, sljit_sw srcw)
2315 {
2316 return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw);
2317 }
2318
2319 static SLJIT_INLINE sljit_si emit_sse2_store(struct sljit_compiler *compiler,
2320 sljit_si single, sljit_si dst, sljit_sw dstw, sljit_si src)
2321 {
2322 return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw);
2323 }
2324
2325 static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op,
2326 sljit_si dst, sljit_sw dstw,
2327 sljit_si src, sljit_sw srcw)
2328 {
2329 sljit_si dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
2330 sljit_ub *inst;
2331
2332 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2333 if (GET_OPCODE(op) == SLJIT_CONVW_FROMD)
2334 compiler->mode32 = 0;
2335 #endif
2336
2337 inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_SINGLE_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw);
2338 FAIL_IF(!inst);
2339 *inst++ = GROUP_0F;
2340 *inst = CVTTSD2SI_r_xm;
2341
2342 if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED)
2343 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2344 return SLJIT_SUCCESS;
2345 }
2346
2347 static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op,
2348 sljit_si dst, sljit_sw dstw,
2349 sljit_si src, sljit_sw srcw)
2350 {
2351 sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2352 sljit_ub *inst;
2353
2354 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2355 if (GET_OPCODE(op) == SLJIT_CONVD_FROMW)
2356 compiler->mode32 = 0;
2357 #endif
2358
2359 if (src & SLJIT_IMM) {
2360 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2361 if (GET_OPCODE(op) == SLJIT_CONVD_FROMI)
2362 srcw = (sljit_si)srcw;
2363 #endif
2364 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
2365 src = TMP_REG1;
2366 srcw = 0;
2367 }
2368
2369 inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_SINGLE_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw);
2370 FAIL_IF(!inst);
2371 *inst++ = GROUP_0F;
2372 *inst = CVTSI2SD_x_rm;
2373
2374 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2375 compiler->mode32 = 1;
2376 #endif
2377 if (dst_r == TMP_FREG)
2378 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2379 return SLJIT_SUCCESS;
2380 }
2381
2382 static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op,
2383 sljit_si src1, sljit_sw src1w,
2384 sljit_si src2, sljit_sw src2w)
2385 {
2386 compiler->flags_saved = 0;
2387 if (!FAST_IS_REG(src1)) {
2388 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
2389 src1 = TMP_FREG;
2390 }
2391 return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_SINGLE_OP), src1, src2, src2w);
2392 }
2393
2394 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
2395 sljit_si dst, sljit_sw dstw,
2396 sljit_si src, sljit_sw srcw)
2397 {
2398 sljit_si dst_r;
2399
2400 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2401 compiler->mode32 = 1;
2402 #endif
2403
2404 CHECK_ERROR();
2405 SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2406
2407 if (GET_OPCODE(op) == SLJIT_DMOV) {
2408 if (FAST_IS_REG(dst))
2409 return emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst, src, srcw);
2410 if (FAST_IS_REG(src))
2411 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, src);
2412 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src, srcw));
2413 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2414 }
2415
2416 if (GET_OPCODE(op) == SLJIT_CONVD_FROMS) {
2417 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2418 if (FAST_IS_REG(src)) {
2419 /* We overwrite the high bits of source. From SLJIT point of view,
2420 this is not an issue.
2421 Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
2422 FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_SINGLE_OP, src, src, 0));
2423 }
2424 else {
2425 FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_SINGLE_OP), TMP_FREG, src, srcw));
2426 src = TMP_FREG;
2427 }
2428
2429 FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_SINGLE_OP, dst_r, src, 0));
2430 if (dst_r == TMP_FREG)
2431 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2432 return SLJIT_SUCCESS;
2433 }
2434
2435 if (SLOW_IS_REG(dst)) {
2436 dst_r = dst;
2437 if (dst != src)
2438 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw));
2439 }
2440 else {
2441 dst_r = TMP_FREG;
2442 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw));
2443 }
2444
2445 switch (GET_OPCODE(op)) {
2446 case SLJIT_DNEG:
2447 FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer : sse2_buffer + 8)));
2448 break;
2449
2450 case SLJIT_DABS:
2451 FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer + 4 : sse2_buffer + 12)));
2452 break;
2453 }
2454
2455 if (dst_r == TMP_FREG)
2456 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2457 return SLJIT_SUCCESS;
2458 }
2459
2460 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
2461 sljit_si dst, sljit_sw dstw,
2462 sljit_si src1, sljit_sw src1w,
2463 sljit_si src2, sljit_sw src2w)
2464 {
2465 sljit_si dst_r;
2466
2467 CHECK_ERROR();
2468 CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2469 ADJUST_LOCAL_OFFSET(dst, dstw);
2470 ADJUST_LOCAL_OFFSET(src1, src1w);
2471 ADJUST_LOCAL_OFFSET(src2, src2w);
2472
2473 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2474 compiler->mode32 = 1;
2475 #endif
2476
2477 if (FAST_IS_REG(dst)) {
2478 dst_r = dst;
2479 if (dst == src1)
2480 ; /* Do nothing here. */
2481 else if (dst == src2 && (op == SLJIT_DADD || op == SLJIT_DMUL)) {
2482 /* Swap arguments. */
2483 src2 = src1;
2484 src2w = src1w;
2485 }
2486 else if (dst != src2)
2487 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src1, src1w));
2488 else {
2489 dst_r = TMP_FREG;
2490 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
2491 }
2492 }
2493 else {
2494 dst_r = TMP_FREG;
2495 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
2496 }
2497
2498 switch (GET_OPCODE(op)) {
2499 case SLJIT_DADD:
2500 FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2501 break;
2502
2503 case SLJIT_DSUB:
2504 FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2505 break;
2506
2507 case SLJIT_DMUL:
2508 FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2509 break;
2510
2511 case SLJIT_DDIV:
2512 FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2513 break;
2514 }
2515
2516 if (dst_r == TMP_FREG)
2517 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2518 return SLJIT_SUCCESS;
2519 }
2520
2521 /* --------------------------------------------------------------------- */
2522 /* Conditional instructions */
2523 /* --------------------------------------------------------------------- */
2524
2525 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2526 {
2527 sljit_ub *inst;
2528 struct sljit_label *label;
2529
2530 CHECK_ERROR_PTR();
2531 CHECK_PTR(check_sljit_emit_label(compiler));
2532
2533 /* We should restore the flags before the label,
2534 since other taken jumps has their own flags as well. */
2535 if (SLJIT_UNLIKELY(compiler->flags_saved))
2536 PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2537
2538 if (compiler->last_label && compiler->last_label->size == compiler->size)
2539 return compiler->last_label;
2540
2541 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2542 PTR_FAIL_IF(!label);
2543 set_label(label, compiler);
2544
2545 inst = (sljit_ub*)ensure_buf(compiler, 2);
2546 PTR_FAIL_IF(!inst);
2547
2548 *inst++ = 0;
2549 *inst++ = 0;
2550
2551 return label;
2552 }
2553
2554 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
2555 {
2556 sljit_ub *inst;
2557 struct sljit_jump *jump;
2558
2559 CHECK_ERROR_PTR();
2560 CHECK_PTR(check_sljit_emit_jump(compiler, type));
2561
2562 if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2563 if ((type & 0xff) <= SLJIT_JUMP)
2564 PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2565 compiler->flags_saved = 0;
2566 }
2567
2568 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2569 PTR_FAIL_IF_NULL(jump);
2570 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2571 type &= 0xff;
2572
2573 if (type >= SLJIT_CALL1)
2574 PTR_FAIL_IF(call_with_args(compiler, type));
2575
2576 /* Worst case size. */
2577 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2578 compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
2579 #else
2580 compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
2581 #endif
2582
2583 inst = (sljit_ub*)ensure_buf(compiler, 2);
2584 PTR_FAIL_IF_NULL(inst);
2585
2586 *inst++ = 0;
2587 *inst++ = type + 4;
2588 return jump;
2589 }
2590
2591 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
2592 {
2593 sljit_ub *inst;
2594 struct sljit_jump *jump;
2595
2596 CHECK_ERROR();
2597 CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
2598 ADJUST_LOCAL_OFFSET(src, srcw);
2599
2600 CHECK_EXTRA_REGS(src, srcw, (void)0);
2601
2602 if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2603 if (type <= SLJIT_JUMP)
2604 FAIL_IF(emit_restore_flags(compiler, 0));
2605 compiler->flags_saved = 0;
2606 }
2607
2608 if (type >= SLJIT_CALL1) {
2609 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2610 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
2611 if (src == SLJIT_R2) {
2612 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
2613 src = TMP_REG1;
2614 }
2615 if (src == SLJIT_MEM1(SLJIT_SP) && type >= SLJIT_CALL3)
2616 srcw += sizeof(sljit_sw);
2617 #endif
2618 #endif
2619 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
2620 if (src == SLJIT_R2) {
2621 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
2622 src = TMP_REG1;
2623 }
2624 #endif
2625 FAIL_IF(call_with_args(compiler, type));
2626 }
2627
2628 if (src == SLJIT_IMM) {
2629 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2630 FAIL_IF_NULL(jump);
2631 set_jump(jump, compiler, JUMP_ADDR);
2632 jump->u.target = srcw;
2633
2634 /* Worst case size. */
2635 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2636 compiler->size += 5;
2637 #else
2638 compiler->size += 10 + 3;
2639 #endif
2640
2641 inst = (sljit_ub*)ensure_buf(compiler, 2);
2642 FAIL_IF_NULL(inst);
2643
2644 *inst++ = 0;
2645 *inst++ = type + 4;
2646 }
2647 else {
2648 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2649 /* REX_W is not necessary (src is not immediate). */
2650 compiler->mode32 = 1;
2651 #endif
2652 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2653 FAIL_IF(!inst);
2654 *inst++ = GROUP_FF;
2655 *inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm;
2656 }
2657 return SLJIT_SUCCESS;
2658 }
2659
2660 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op,
2661 sljit_si dst, sljit_sw dstw,
2662 sljit_si src, sljit_sw srcw,
2663 sljit_si type)
2664 {
2665 sljit_ub *inst;
2666 sljit_ub cond_set = 0;
2667 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2668 sljit_si reg;
2669 #else
2670 /* CHECK_EXTRA_REGS migh overwrite these values. */
2671 sljit_si dst_save = dst;
2672 sljit_sw dstw_save = dstw;
2673 #endif
2674
2675 CHECK_ERROR();
2676 CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
2677 SLJIT_UNUSED_ARG(srcw);
2678
2679 if (dst == SLJIT_UNUSED)
2680 return SLJIT_SUCCESS;
2681
2682 ADJUST_LOCAL_OFFSET(dst, dstw);
2683 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2684 if (SLJIT_UNLIKELY(compiler->flags_saved))
2685 FAIL_IF(emit_restore_flags(compiler, op & SLJIT_KEEP_FLAGS));
2686
2687 type &= 0xff;
2688 /* setcc = jcc + 0x10. */
2689 cond_set = get_jump_code(type) + 0x10;
2690
2691 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2692 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src) {
2693 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 3);
2694 FAIL_IF(!inst);
2695 INC_SIZE(4 + 3);
2696 /* Set low register to conditional flag. */
2697 *inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;
2698 *inst++ = GROUP_0F;
2699 *inst++ = cond_set;
2700 *inst++ = MOD_REG | reg_lmap[TMP_REG1];
2701 *inst++ = REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B);
2702 *inst++ = OR_rm8_r8;
2703 *inst++ = MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst];
2704 return SLJIT_SUCCESS;
2705 }
2706
2707 reg = (op == SLJIT_MOV && FAST_IS_REG(dst)) ? dst : TMP_REG1;
2708
2709 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 4);
2710 FAIL_IF(!inst);
2711 INC_SIZE(4 + 4);
2712 /* Set low register to conditional flag. */
2713 *inst++ = (reg_map[reg] <= 7) ? REX : REX_B;
2714 *inst++ = GROUP_0F;
2715 *inst++ = cond_set;
2716 *inst++ = MOD_REG | reg_lmap[reg];
2717 *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
2718 *inst++ = GROUP_0F;
2719 *inst++ = MOVZX_r_rm8;
2720 *inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg];
2721
2722 if (reg != TMP_REG1)
2723 return SLJIT_SUCCESS;
2724
2725 if (GET_OPCODE(op) < SLJIT_ADD) {
2726 compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
2727 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2728 }
2729 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2730 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2731 compiler->skip_checks = 1;
2732 #endif
2733 return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REG1, 0);
2734 #else /* SLJIT_CONFIG_X86_64 */
2735 if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) {
2736 if (reg_map[dst] <= 4) {
2737 /* Low byte is accessible. */
2738 inst = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
2739 FAIL_IF(!inst);
2740 INC_SIZE(3 + 3);
2741 /* Set low byte to conditional flag. */
2742 *inst++ = GROUP_0F;
2743 *inst++ = cond_set;
2744 *inst++ = MOD_REG | reg_map[dst];
2745
2746 *inst++ = GROUP_0F;
2747 *inst++ = MOVZX_r_rm8;
2748 *inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst];
2749 return SLJIT_SUCCESS;
2750 }
2751
2752 /* Low byte is not accessible. */
2753 if (cpu_has_cmov == -1)
2754 get_cpu_features();
2755
2756 if (cpu_has_cmov) {
2757 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
2758 /* a xor reg, reg operation would overwrite the flags. */
2759 EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0);
2760
2761 inst = (sljit_ub*)ensure_buf(compiler, 1 + 3);
2762 FAIL_IF(!inst);
2763 INC_SIZE(3);
2764
2765 *inst++ = GROUP_0F;
2766 /* cmovcc = setcc - 0x50. */
2767 *inst++ = cond_set - 0x50;
2768 *inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1];
2769 return SLJIT_SUCCESS;
2770 }
2771
2772 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2773 FAIL_IF(!inst);
2774 INC_SIZE(1 + 3 + 3 + 1);
2775 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2776 /* Set al to conditional flag. */
2777 *inst++ = GROUP_0F;
2778 *inst++ = cond_set;
2779 *inst++ = MOD_REG | 0 /* eax */;
2780
2781 *inst++ = GROUP_0F;
2782 *inst++ = MOVZX_r_rm8;
2783 *inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */;
2784 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2785 return SLJIT_SUCCESS;
2786 }
2787
2788 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src && reg_map[dst] <= 4) {
2789 SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R0] == 0, scratch_reg1_must_be_eax);
2790 if (dst != SLJIT_R0) {
2791 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
2792 FAIL_IF(!inst);
2793 INC_SIZE(1 + 3 + 2 + 1);
2794 /* Set low register to conditional flag. */
2795 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2796 *inst++ = GROUP_0F;
2797 *inst++ = cond_set;
2798 *inst++ = MOD_REG | 0 /* eax */;
2799 *inst++ = OR_rm8_r8;
2800 *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst];
2801 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2802 }
2803 else {
2804 inst = (sljit_ub*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2);
2805 FAIL_IF(!inst);
2806 INC_SIZE(2 + 3 + 2 + 2);
2807 /* Set low register to conditional flag. */
2808 *inst++ = XCHG_r_rm;
2809 *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
2810 *inst++ = GROUP_0F;
2811 *inst++ = cond_set;
2812 *inst++ = MOD_REG | 1 /* ecx */;
2813 *inst++ = OR_rm8_r8;
2814 *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */;
2815 *inst++ = XCHG_r_rm;
2816 *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
2817 }
2818 return SLJIT_SUCCESS;
2819 }
2820
2821 /* Set TMP_REG1 to the bit. */
2822 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2823 FAIL_IF(!inst);
2824 INC_SIZE(1 + 3 + 3 + 1);
2825 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2826 /* Set al to conditional flag. */
2827 *inst++ = GROUP_0F;
2828 *inst++ = cond_set;
2829 *inst++ = MOD_REG | 0 /* eax */;
2830
2831 *inst++ = GROUP_0F;
2832 *inst++ = MOVZX_r_rm8;
2833 *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */;
2834
2835 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2836
2837 if (GET_OPCODE(op) < SLJIT_ADD)
2838 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2839
2840 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2841 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2842 compiler->skip_checks = 1;
2843 #endif
2844 return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
2845 #endif /* SLJIT_CONFIG_X86_64 */
2846 }
2847
2848 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset)
2849 {
2850 CHECK_ERROR();
2851 CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
2852 ADJUST_LOCAL_OFFSET(dst, dstw);
2853
2854 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2855
2856 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2857 compiler->mode32 = 0;
2858 #endif
2859
2860 ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
2861
2862 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2863 if (NOT_HALFWORD(offset)) {
2864 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
2865 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
2866 SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
2867 return compiler->error;
2868 #else
2869 return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);
2870 #endif
2871 }
2872 #endif
2873
2874 if (offset != 0)
2875 return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
2876 return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);
2877 }
2878
2879 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
2880 {
2881 sljit_ub *inst;
2882 struct sljit_const *const_;
2883 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2884 sljit_si reg;
2885 #endif
2886
2887 CHECK_ERROR_PTR();
2888 CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
2889 ADJUST_LOCAL_OFFSET(dst, dstw);
2890
2891 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2892
2893 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
2894 PTR_FAIL_IF(!const_);
2895 set_const(const_, compiler);
2896
2897 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2898 compiler->mode32 = 0;
2899 reg = SLOW_IS_REG(dst) ? dst : TMP_REG1;
2900
2901 if (emit_load_imm64(compiler, reg, init_value))
2902 return NULL;
2903 #else
2904 if (dst == SLJIT_UNUSED)
2905 dst = TMP_REG1;
2906
2907 if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
2908 return NULL;
2909 #endif
2910
2911 inst = (sljit_ub*)ensure_buf(compiler, 2);
2912 PTR_FAIL_IF(!inst);
2913
2914 *inst++ = 0;
2915 *inst++ = 1;
2916
2917 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2918 if (dst & SLJIT_MEM)
2919 if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
2920 return NULL;
2921 #endif
2922
2923 return const_;
2924 }
2925
2926 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
2927 {
2928 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2929 *(sljit_sw*)addr = new_addr - (addr + 4);
2930 #else
2931 *(sljit_uw*)addr = new_addr;
2932 #endif
2933 }
2934
2935 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
2936 {
2937 *(sljit_sw*)addr = new_constant;
2938 }
2939
2940 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_is_sse2_available(void)
2941 {
2942 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
2943 if (cpu_has_sse2 == -1)
2944 get_cpu_features();
2945 return cpu_has_sse2;
2946 #else
2947 return 1;
2948 #endif
2949 }
2950
2951 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_is_cmov_available(void)
2952 {
2953 if (cpu_has_cmov == -1)
2954 get_cpu_features();
2955 return cpu_has_cmov;
2956 }
2957
2958 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_x86_emit_cmov(struct sljit_compiler *compiler,
2959 sljit_si type,
2960 sljit_si dst_reg,
2961 sljit_si src, sljit_sw srcw)
2962 {
2963 sljit_ub* inst;
2964
2965 CHECK_ERROR();
2966 #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2967 CHECK_ARGUMENT(sljit_x86_is_cmov_available());
2968 CHECK_ARGUMENT(!(type & ~(0xff | SLJIT_INT_OP)));
2969 CHECK_ARGUMENT((type & 0xff) >= SLJIT_EQUAL && (type & 0xff) <= SLJIT_D_ORDERED);
2970 CHECK_ARGUMENT(FUNCTION_CHECK_IS_REG(dst_reg & ~SLJIT_INT_OP));
2971 FUNCTION_CHECK_SRC(src, srcw);
2972 #endif
2973 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE)
2974 if (SLJIT_UNLIKELY(!!compiler->verbose)) {
2975 fprintf(compiler->verbose, " x86_cmov%s %s%s, ",
2976 !(dst_reg & SLJIT_INT_OP) ? "" : ".i",
2977 JUMP_PREFIX(type), jump_names[type & 0xff]);
2978 sljit_verbose_reg(compiler, dst_reg & ~SLJIT_INT_OP);
2979 fprintf(compiler->verbose, ", ");
2980 sljit_verbose_param(compiler, src, srcw);
2981 fprintf(compiler->verbose, "\n");
2982 }
2983 #endif
2984
2985 ADJUST_LOCAL_OFFSET(src, srcw);
2986 CHECK_EXTRA_REGS(src, srcw, (void)0);
2987
2988 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2989 compiler->mode32 = dst_reg & SLJIT_INT_OP;
2990 #endif
2991 dst_reg &= ~SLJIT_INT_OP;
2992
2993 if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
2994 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
2995 src = TMP_REG1;
2996 srcw = 0;
2997 }
2998
2999 inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw);
3000 FAIL_IF(!inst);
3001 *inst++ = GROUP_0F;
3002 *inst = get_jump_code(type & 0xff) - 0x40;
3003 return SLJIT_SUCCESS;
3004 }
3005