1 /*
2 * Stack-less Just-In-Time compiler
3 *
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
sljit_get_platform_name(void)27 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
28 {
29 return "x86" SLJIT_CPUINFO;
30 }
31
32 /*
33 32b register indexes:
34 0 - EAX
35 1 - ECX
36 2 - EDX
37 3 - EBX
38 4 - none
39 5 - EBP
40 6 - ESI
41 7 - EDI
42 */
43
44 /*
45 64b register indexes:
46 0 - RAX
47 1 - RCX
48 2 - RDX
49 3 - RBX
50 4 - none
51 5 - RBP
52 6 - RSI
53 7 - RDI
54 8 - R8 - From now on REX prefix is required
55 9 - R9
56 10 - R10
57 11 - R11
58 12 - R12
59 13 - R13
60 14 - R14
61 15 - R15
62 */
63
64 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
65
66 /* Last register + 1. */
67 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
68
69 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
70 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 7, 6, 3, 4, 5
71 };
72
73 #define CHECK_EXTRA_REGS(p, w, do) \
74 if (p >= SLJIT_R3 && p <= SLJIT_S3) { \
75 if (p <= compiler->scratches) \
76 w = compiler->saveds_offset - ((p) - SLJIT_R2) * (sljit_sw)sizeof(sljit_sw); \
77 else \
78 w = compiler->locals_offset + ((p) - SLJIT_S2) * (sljit_sw)sizeof(sljit_sw); \
79 p = SLJIT_MEM1(SLJIT_SP); \
80 do; \
81 }
82
83 #else /* SLJIT_CONFIG_X86_32 */
84
85 /* Last register + 1. */
86 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
87 #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
88
89 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
90 Note: avoid to use r12 and r13 for memory addessing
91 therefore r12 is better to be a higher saved register. */
92 #ifndef _WIN64
93 /* Args: rdi(=7), rsi(=6), rdx(=2), rcx(=1), r8, r9. Scratches: rax(=0), r10, r11 */
94 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
95 0, 0, 6, 1, 7, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 9
96 };
97 /* low-map. reg_map & 0x7. */
98 static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
99 0, 0, 6, 1, 7, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 1
100 };
101 #else
102 /* Args: rcx(=1), rdx(=2), r8, r9. Scratches: rax(=0), r10, r11 */
103 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
104 0, 0, 2, 1, 10, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 8, 9
105 };
106 /* low-map. reg_map & 0x7. */
107 static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
108 0, 0, 2, 1, 2, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 0, 1
109 };
110 #endif
111
112 #define REX_W 0x48
113 #define REX_R 0x44
114 #define REX_X 0x42
115 #define REX_B 0x41
116 #define REX 0x40
117
118 #ifndef _WIN64
119 #define HALFWORD_MAX 0x7fffffffl
120 #define HALFWORD_MIN -0x80000000l
121 #else
122 #define HALFWORD_MAX 0x7fffffffll
123 #define HALFWORD_MIN -0x80000000ll
124 #endif
125
126 #define IS_HALFWORD(x) ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)
127 #define NOT_HALFWORD(x) ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)
128
129 #define CHECK_EXTRA_REGS(p, w, do)
130
131 #endif /* SLJIT_CONFIG_X86_32 */
132
133 #define TMP_FREG (0)
134
135 /* Size flags for emit_x86_instruction: */
136 #define EX86_BIN_INS 0x0010
137 #define EX86_SHIFT_INS 0x0020
138 #define EX86_REX 0x0040
139 #define EX86_NO_REXW 0x0080
140 #define EX86_BYTE_ARG 0x0100
141 #define EX86_HALF_ARG 0x0200
142 #define EX86_PREF_66 0x0400
143 #define EX86_PREF_F2 0x0800
144 #define EX86_PREF_F3 0x1000
145 #define EX86_SSE2_OP1 0x2000
146 #define EX86_SSE2_OP2 0x4000
147 #define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2)
148
149 /* --------------------------------------------------------------------- */
150 /* Instrucion forms */
151 /* --------------------------------------------------------------------- */
152
153 #define ADD (/* BINARY */ 0 << 3)
154 #define ADD_EAX_i32 0x05
155 #define ADD_r_rm 0x03
156 #define ADD_rm_r 0x01
157 #define ADDSD_x_xm 0x58
158 #define ADC (/* BINARY */ 2 << 3)
159 #define ADC_EAX_i32 0x15
160 #define ADC_r_rm 0x13
161 #define ADC_rm_r 0x11
162 #define AND (/* BINARY */ 4 << 3)
163 #define AND_EAX_i32 0x25
164 #define AND_r_rm 0x23
165 #define AND_rm_r 0x21
166 #define ANDPD_x_xm 0x54
167 #define BSR_r_rm (/* GROUP_0F */ 0xbd)
168 #define CALL_i32 0xe8
169 #define CALL_rm (/* GROUP_FF */ 2 << 3)
170 #define CDQ 0x99
171 #define CMOVE_r_rm (/* GROUP_0F */ 0x44)
172 #define CMP (/* BINARY */ 7 << 3)
173 #define CMP_EAX_i32 0x3d
174 #define CMP_r_rm 0x3b
175 #define CMP_rm_r 0x39
176 #define CVTPD2PS_x_xm 0x5a
177 #define CVTSI2SD_x_rm 0x2a
178 #define CVTTSD2SI_r_xm 0x2c
179 #define DIV (/* GROUP_F7 */ 6 << 3)
180 #define DIVSD_x_xm 0x5e
181 #define INT3 0xcc
182 #define IDIV (/* GROUP_F7 */ 7 << 3)
183 #define IMUL (/* GROUP_F7 */ 5 << 3)
184 #define IMUL_r_rm (/* GROUP_0F */ 0xaf)
185 #define IMUL_r_rm_i8 0x6b
186 #define IMUL_r_rm_i32 0x69
187 #define JE_i8 0x74
188 #define JNE_i8 0x75
189 #define JMP_i8 0xeb
190 #define JMP_i32 0xe9
191 #define JMP_rm (/* GROUP_FF */ 4 << 3)
192 #define LEA_r_m 0x8d
193 #define MOV_r_rm 0x8b
194 #define MOV_r_i32 0xb8
195 #define MOV_rm_r 0x89
196 #define MOV_rm_i32 0xc7
197 #define MOV_rm8_i8 0xc6
198 #define MOV_rm8_r8 0x88
199 #define MOVSD_x_xm 0x10
200 #define MOVSD_xm_x 0x11
201 #define MOVSXD_r_rm 0x63
202 #define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe)
203 #define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf)
204 #define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6)
205 #define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7)
206 #define MUL (/* GROUP_F7 */ 4 << 3)
207 #define MULSD_x_xm 0x59
208 #define NEG_rm (/* GROUP_F7 */ 3 << 3)
209 #define NOP 0x90
210 #define NOT_rm (/* GROUP_F7 */ 2 << 3)
211 #define OR (/* BINARY */ 1 << 3)
212 #define OR_r_rm 0x0b
213 #define OR_EAX_i32 0x0d
214 #define OR_rm_r 0x09
215 #define OR_rm8_r8 0x08
216 #define POP_r 0x58
217 #define POP_rm 0x8f
218 #define POPF 0x9d
219 #define PREFETCH 0x18
220 #define PUSH_i32 0x68
221 #define PUSH_r 0x50
222 #define PUSH_rm (/* GROUP_FF */ 6 << 3)
223 #define PUSHF 0x9c
224 #define RET_near 0xc3
225 #define RET_i16 0xc2
226 #define SBB (/* BINARY */ 3 << 3)
227 #define SBB_EAX_i32 0x1d
228 #define SBB_r_rm 0x1b
229 #define SBB_rm_r 0x19
230 #define SAR (/* SHIFT */ 7 << 3)
231 #define SHL (/* SHIFT */ 4 << 3)
232 #define SHR (/* SHIFT */ 5 << 3)
233 #define SUB (/* BINARY */ 5 << 3)
234 #define SUB_EAX_i32 0x2d
235 #define SUB_r_rm 0x2b
236 #define SUB_rm_r 0x29
237 #define SUBSD_x_xm 0x5c
238 #define TEST_EAX_i32 0xa9
239 #define TEST_rm_r 0x85
240 #define UCOMISD_x_xm 0x2e
241 #define UNPCKLPD_x_xm 0x14
242 #define XCHG_EAX_r 0x90
243 #define XCHG_r_rm 0x87
244 #define XOR (/* BINARY */ 6 << 3)
245 #define XOR_EAX_i32 0x35
246 #define XOR_r_rm 0x33
247 #define XOR_rm_r 0x31
248 #define XORPD_x_xm 0x57
249
250 #define GROUP_0F 0x0f
251 #define GROUP_F7 0xf7
252 #define GROUP_FF 0xff
253 #define GROUP_BINARY_81 0x81
254 #define GROUP_BINARY_83 0x83
255 #define GROUP_SHIFT_1 0xd1
256 #define GROUP_SHIFT_N 0xc1
257 #define GROUP_SHIFT_CL 0xd3
258
259 #define MOD_REG 0xc0
260 #define MOD_DISP8 0x40
261
262 #define INC_SIZE(s) (*inst++ = (s), compiler->size += (s))
263
264 #define PUSH_REG(r) (*inst++ = (PUSH_r + (r)))
265 #define POP_REG(r) (*inst++ = (POP_r + (r)))
266 #define RET() (*inst++ = (RET_near))
267 #define RET_I16(n) (*inst++ = (RET_i16), *inst++ = n, *inst++ = 0)
268 /* r32, r/m32 */
269 #define MOV_RM(mod, reg, rm) (*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm))
270
271 /* Multithreading does not affect these static variables, since they store
272 built-in CPU features. Therefore they can be overwritten by different threads
273 if they detect the CPU features in the same time. */
274 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
275 static sljit_s32 cpu_has_sse2 = -1;
276 #endif
277 static sljit_s32 cpu_has_cmov = -1;
278
279 #ifdef _WIN32_WCE
280 #include <cmnintrin.h>
281 #elif defined(_MSC_VER) && _MSC_VER >= 1400
282 #include <intrin.h>
283 #endif
284
285 /******************************************************/
286 /* Unaligned-store functions */
287 /******************************************************/
288
sljit_unaligned_store_s16(void * addr,sljit_s16 value)289 static SLJIT_INLINE void sljit_unaligned_store_s16(void *addr, sljit_s16 value)
290 {
291 SLJIT_MEMCPY(addr, &value, sizeof(value));
292 }
293
sljit_unaligned_store_s32(void * addr,sljit_s32 value)294 static SLJIT_INLINE void sljit_unaligned_store_s32(void *addr, sljit_s32 value)
295 {
296 SLJIT_MEMCPY(addr, &value, sizeof(value));
297 }
298
sljit_unaligned_store_sw(void * addr,sljit_sw value)299 static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value)
300 {
301 SLJIT_MEMCPY(addr, &value, sizeof(value));
302 }
303
304 /******************************************************/
305 /* Utility functions */
306 /******************************************************/
307
get_cpu_features(void)308 static void get_cpu_features(void)
309 {
310 sljit_u32 features;
311
312 #if defined(_MSC_VER) && _MSC_VER >= 1400
313
314 int CPUInfo[4];
315 __cpuid(CPUInfo, 1);
316 features = (sljit_u32)CPUInfo[3];
317
318 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
319
320 /* AT&T syntax. */
321 __asm__ (
322 "movl $0x1, %%eax\n"
323 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
324 /* On x86-32, there is no red zone, so this
325 should work (no need for a local variable). */
326 "push %%ebx\n"
327 #endif
328 "cpuid\n"
329 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
330 "pop %%ebx\n"
331 #endif
332 "movl %%edx, %0\n"
333 : "=g" (features)
334 :
335 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
336 : "%eax", "%ecx", "%edx"
337 #else
338 : "%rax", "%rbx", "%rcx", "%rdx"
339 #endif
340 );
341
342 #else /* _MSC_VER && _MSC_VER >= 1400 */
343
344 /* Intel syntax. */
345 __asm {
346 mov eax, 1
347 cpuid
348 mov features, edx
349 }
350
351 #endif /* _MSC_VER && _MSC_VER >= 1400 */
352
353 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
354 cpu_has_sse2 = (features >> 26) & 0x1;
355 #endif
356 cpu_has_cmov = (features >> 15) & 0x1;
357 }
358
get_jump_code(sljit_s32 type)359 static sljit_u8 get_jump_code(sljit_s32 type)
360 {
361 switch (type) {
362 case SLJIT_EQUAL:
363 case SLJIT_EQUAL_F64:
364 return 0x84 /* je */;
365
366 case SLJIT_NOT_EQUAL:
367 case SLJIT_NOT_EQUAL_F64:
368 return 0x85 /* jne */;
369
370 case SLJIT_LESS:
371 case SLJIT_LESS_F64:
372 return 0x82 /* jc */;
373
374 case SLJIT_GREATER_EQUAL:
375 case SLJIT_GREATER_EQUAL_F64:
376 return 0x83 /* jae */;
377
378 case SLJIT_GREATER:
379 case SLJIT_GREATER_F64:
380 return 0x87 /* jnbe */;
381
382 case SLJIT_LESS_EQUAL:
383 case SLJIT_LESS_EQUAL_F64:
384 return 0x86 /* jbe */;
385
386 case SLJIT_SIG_LESS:
387 return 0x8c /* jl */;
388
389 case SLJIT_SIG_GREATER_EQUAL:
390 return 0x8d /* jnl */;
391
392 case SLJIT_SIG_GREATER:
393 return 0x8f /* jnle */;
394
395 case SLJIT_SIG_LESS_EQUAL:
396 return 0x8e /* jle */;
397
398 case SLJIT_OVERFLOW:
399 case SLJIT_MUL_OVERFLOW:
400 return 0x80 /* jo */;
401
402 case SLJIT_NOT_OVERFLOW:
403 case SLJIT_MUL_NOT_OVERFLOW:
404 return 0x81 /* jno */;
405
406 case SLJIT_UNORDERED_F64:
407 return 0x8a /* jp */;
408
409 case SLJIT_ORDERED_F64:
410 return 0x8b /* jpo */;
411 }
412 return 0;
413 }
414
415 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
416 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type, sljit_sw executable_offset);
417 #else
418 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_s32 type);
419 #endif
420
generate_near_jump_code(struct sljit_jump * jump,sljit_u8 * code_ptr,sljit_u8 * code,sljit_s32 type,sljit_sw executable_offset)421 static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_s32 type, sljit_sw executable_offset)
422 {
423 sljit_s32 short_jump;
424 sljit_uw label_addr;
425
426 if (jump->flags & JUMP_LABEL)
427 label_addr = (sljit_uw)(code + jump->u.label->size);
428 else
429 label_addr = jump->u.target - executable_offset;
430
431 short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
432
433 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
434 if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN)
435 return generate_far_jump_code(jump, code_ptr, type);
436 #endif
437
438 if (type == SLJIT_JUMP) {
439 if (short_jump)
440 *code_ptr++ = JMP_i8;
441 else
442 *code_ptr++ = JMP_i32;
443 jump->addr++;
444 }
445 else if (type >= SLJIT_FAST_CALL) {
446 short_jump = 0;
447 *code_ptr++ = CALL_i32;
448 jump->addr++;
449 }
450 else if (short_jump) {
451 *code_ptr++ = get_jump_code(type) - 0x10;
452 jump->addr++;
453 }
454 else {
455 *code_ptr++ = GROUP_0F;
456 *code_ptr++ = get_jump_code(type);
457 jump->addr += 2;
458 }
459
460 if (short_jump) {
461 jump->flags |= PATCH_MB;
462 code_ptr += sizeof(sljit_s8);
463 } else {
464 jump->flags |= PATCH_MW;
465 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
466 code_ptr += sizeof(sljit_sw);
467 #else
468 code_ptr += sizeof(sljit_s32);
469 #endif
470 }
471
472 return code_ptr;
473 }
474
sljit_generate_code(struct sljit_compiler * compiler)475 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
476 {
477 struct sljit_memory_fragment *buf;
478 sljit_u8 *code;
479 sljit_u8 *code_ptr;
480 sljit_u8 *buf_ptr;
481 sljit_u8 *buf_end;
482 sljit_u8 len;
483 sljit_sw executable_offset;
484 sljit_sw jump_addr;
485
486 struct sljit_label *label;
487 struct sljit_jump *jump;
488 struct sljit_const *const_;
489
490 CHECK_ERROR_PTR();
491 CHECK_PTR(check_sljit_generate_code(compiler));
492 reverse_buf(compiler);
493
494 /* Second code generation pass. */
495 code = (sljit_u8*)SLJIT_MALLOC_EXEC(compiler->size);
496 PTR_FAIL_WITH_EXEC_IF(code);
497 buf = compiler->buf;
498
499 code_ptr = code;
500 label = compiler->labels;
501 jump = compiler->jumps;
502 const_ = compiler->consts;
503 executable_offset = SLJIT_EXEC_OFFSET(code);
504
505 do {
506 buf_ptr = buf->memory;
507 buf_end = buf_ptr + buf->used_size;
508 do {
509 len = *buf_ptr++;
510 if (len > 0) {
511 /* The code is already generated. */
512 SLJIT_MEMCPY(code_ptr, buf_ptr, len);
513 code_ptr += len;
514 buf_ptr += len;
515 }
516 else {
517 if (*buf_ptr >= 2) {
518 jump->addr = (sljit_uw)code_ptr;
519 if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
520 code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 2, executable_offset);
521 else {
522 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
523 code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 2, executable_offset);
524 #else
525 code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 2);
526 #endif
527 }
528 jump = jump->next;
529 }
530 else if (*buf_ptr == 0) {
531 label->addr = ((sljit_uw)code_ptr) + executable_offset;
532 label->size = code_ptr - code;
533 label = label->next;
534 }
535 else { /* *buf_ptr is 1 */
536 const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
537 const_ = const_->next;
538 }
539 buf_ptr++;
540 }
541 } while (buf_ptr < buf_end);
542 SLJIT_ASSERT(buf_ptr == buf_end);
543 buf = buf->next;
544 } while (buf);
545
546 SLJIT_ASSERT(!label);
547 SLJIT_ASSERT(!jump);
548 SLJIT_ASSERT(!const_);
549
550 jump = compiler->jumps;
551 while (jump) {
552 jump_addr = jump->addr + executable_offset;
553
554 if (jump->flags & PATCH_MB) {
555 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8))) <= 127);
556 *(sljit_u8*)jump->addr = (sljit_u8)(jump->u.label->addr - (jump_addr + sizeof(sljit_s8)));
557 } else if (jump->flags & PATCH_MW) {
558 if (jump->flags & JUMP_LABEL) {
559 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
560 sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_sw))));
561 #else
562 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
563 sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.label->addr - (jump_addr + sizeof(sljit_s32))));
564 #endif
565 }
566 else {
567 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
568 sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_sw))));
569 #else
570 SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump_addr + sizeof(sljit_s32))) <= HALFWORD_MAX);
571 sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)(jump->u.target - (jump_addr + sizeof(sljit_s32))));
572 #endif
573 }
574 }
575 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
576 else if (jump->flags & PATCH_MD)
577 sljit_unaligned_store_sw((void*)jump->addr, jump->u.label->addr);
578 #endif
579
580 jump = jump->next;
581 }
582
583 /* Some space may be wasted because of short jumps. */
584 SLJIT_ASSERT(code_ptr <= code + compiler->size);
585 compiler->error = SLJIT_ERR_COMPILED;
586 compiler->executable_offset = executable_offset;
587 compiler->executable_size = code_ptr - code;
588 return (void*)(code + executable_offset);
589 }
590
sljit_has_cpu_feature(sljit_s32 feature_type)591 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
592 {
593 switch (feature_type) {
594 case SLJIT_HAS_FPU:
595 #ifdef SLJIT_IS_FPU_AVAILABLE
596 return SLJIT_IS_FPU_AVAILABLE;
597 #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
598 if (cpu_has_sse2 == -1)
599 get_cpu_features();
600 return cpu_has_sse2;
601 #else /* SLJIT_DETECT_SSE2 */
602 return 1;
603 #endif /* SLJIT_DETECT_SSE2 */
604
605 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
606 case SLJIT_HAS_VIRTUAL_REGISTERS:
607 return 1;
608 #endif
609
610 case SLJIT_HAS_CLZ:
611 case SLJIT_HAS_CMOV:
612 if (cpu_has_cmov == -1)
613 get_cpu_features();
614 return cpu_has_cmov;
615
616 case SLJIT_HAS_PREF_SHIFT_REG:
617 return 1;
618
619 case SLJIT_HAS_SSE2:
620 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
621 if (cpu_has_sse2 == -1)
622 get_cpu_features();
623 return cpu_has_sse2;
624 #else
625 return 1;
626 #endif
627
628 default:
629 return 0;
630 }
631 }
632
633 /* --------------------------------------------------------------------- */
634 /* Operators */
635 /* --------------------------------------------------------------------- */
636
637 static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
638 sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm,
639 sljit_s32 dst, sljit_sw dstw,
640 sljit_s32 src1, sljit_sw src1w,
641 sljit_s32 src2, sljit_sw src2w);
642
643 static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
644 sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm,
645 sljit_s32 dst, sljit_sw dstw,
646 sljit_s32 src1, sljit_sw src1w,
647 sljit_s32 src2, sljit_sw src2w);
648
649 static sljit_s32 emit_mov(struct sljit_compiler *compiler,
650 sljit_s32 dst, sljit_sw dstw,
651 sljit_s32 src, sljit_sw srcw);
652
653 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
654 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
655
656 #ifdef _WIN32
657 #include <malloc.h>
658
sljit_grow_stack(sljit_sw local_size)659 static void SLJIT_CALL sljit_grow_stack(sljit_sw local_size)
660 {
661 /* Workaround for calling the internal _chkstk() function on Windows.
662 This function touches all 4k pages belongs to the requested stack space,
663 which size is passed in local_size. This is necessary on Windows where
664 the stack can only grow in 4k steps. However, this function just burn
665 CPU cycles if the stack is large enough. However, you don't know it in
666 advance, so it must always be called. I think this is a bad design in
667 general even if it has some reasons. */
668 *(volatile sljit_s32*)alloca(local_size) = 0;
669 }
670
671 #endif
672
673 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
674 #include "sljitNativeX86_32.c"
675 #else
676 #include "sljitNativeX86_64.c"
677 #endif
678
emit_mov(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)679 static sljit_s32 emit_mov(struct sljit_compiler *compiler,
680 sljit_s32 dst, sljit_sw dstw,
681 sljit_s32 src, sljit_sw srcw)
682 {
683 sljit_u8* inst;
684
685 SLJIT_ASSERT(dst != SLJIT_UNUSED);
686
687 if (FAST_IS_REG(src)) {
688 inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
689 FAIL_IF(!inst);
690 *inst = MOV_rm_r;
691 return SLJIT_SUCCESS;
692 }
693 if (src & SLJIT_IMM) {
694 if (FAST_IS_REG(dst)) {
695 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
696 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
697 #else
698 if (!compiler->mode32) {
699 if (NOT_HALFWORD(srcw))
700 return emit_load_imm64(compiler, dst, srcw);
701 }
702 else
703 return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw);
704 #endif
705 }
706 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
707 if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
708 /* Immediate to memory move. Only SLJIT_MOV operation copies
709 an immediate directly into memory so TMP_REG1 can be used. */
710 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));
711 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
712 FAIL_IF(!inst);
713 *inst = MOV_rm_r;
714 return SLJIT_SUCCESS;
715 }
716 #endif
717 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
718 FAIL_IF(!inst);
719 *inst = MOV_rm_i32;
720 return SLJIT_SUCCESS;
721 }
722 if (FAST_IS_REG(dst)) {
723 inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
724 FAIL_IF(!inst);
725 *inst = MOV_r_rm;
726 return SLJIT_SUCCESS;
727 }
728
729 /* Memory to memory move. Only SLJIT_MOV operation copies
730 data from memory to memory so TMP_REG1 can be used. */
731 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
732 FAIL_IF(!inst);
733 *inst = MOV_r_rm;
734 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
735 FAIL_IF(!inst);
736 *inst = MOV_rm_r;
737 return SLJIT_SUCCESS;
738 }
739
sljit_emit_op0(struct sljit_compiler * compiler,sljit_s32 op)740 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
741 {
742 sljit_u8 *inst;
743 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
744 sljit_s32 size;
745 #endif
746
747 CHECK_ERROR();
748 CHECK(check_sljit_emit_op0(compiler, op));
749
750 switch (GET_OPCODE(op)) {
751 case SLJIT_BREAKPOINT:
752 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
753 FAIL_IF(!inst);
754 INC_SIZE(1);
755 *inst = INT3;
756 break;
757 case SLJIT_NOP:
758 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
759 FAIL_IF(!inst);
760 INC_SIZE(1);
761 *inst = NOP;
762 break;
763 case SLJIT_LMUL_UW:
764 case SLJIT_LMUL_SW:
765 case SLJIT_DIVMOD_UW:
766 case SLJIT_DIVMOD_SW:
767 case SLJIT_DIV_UW:
768 case SLJIT_DIV_SW:
769 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
770 #ifdef _WIN64
771 SLJIT_ASSERT(
772 reg_map[SLJIT_R0] == 0
773 && reg_map[SLJIT_R1] == 2
774 && reg_map[TMP_REG1] > 7);
775 #else
776 SLJIT_ASSERT(
777 reg_map[SLJIT_R0] == 0
778 && reg_map[SLJIT_R1] < 7
779 && reg_map[TMP_REG1] == 2);
780 #endif
781 compiler->mode32 = op & SLJIT_I32_OP;
782 #endif
783 SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
784
785 op = GET_OPCODE(op);
786 if ((op | 0x2) == SLJIT_DIV_UW) {
787 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
788 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
789 inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0);
790 #else
791 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
792 #endif
793 FAIL_IF(!inst);
794 *inst = XOR_r_rm;
795 }
796
797 if ((op | 0x2) == SLJIT_DIV_SW) {
798 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
799 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
800 #endif
801
802 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
803 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
804 FAIL_IF(!inst);
805 INC_SIZE(1);
806 *inst = CDQ;
807 #else
808 if (compiler->mode32) {
809 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
810 FAIL_IF(!inst);
811 INC_SIZE(1);
812 *inst = CDQ;
813 } else {
814 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
815 FAIL_IF(!inst);
816 INC_SIZE(2);
817 *inst++ = REX_W;
818 *inst = CDQ;
819 }
820 #endif
821 }
822
823 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
824 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
825 FAIL_IF(!inst);
826 INC_SIZE(2);
827 *inst++ = GROUP_F7;
828 *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);
829 #else
830 #ifdef _WIN64
831 size = (!compiler->mode32 || op >= SLJIT_DIVMOD_UW) ? 3 : 2;
832 #else
833 size = (!compiler->mode32) ? 3 : 2;
834 #endif
835 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
836 FAIL_IF(!inst);
837 INC_SIZE(size);
838 #ifdef _WIN64
839 if (!compiler->mode32)
840 *inst++ = REX_W | ((op >= SLJIT_DIVMOD_UW) ? REX_B : 0);
841 else if (op >= SLJIT_DIVMOD_UW)
842 *inst++ = REX_B;
843 *inst++ = GROUP_F7;
844 *inst = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);
845 #else
846 if (!compiler->mode32)
847 *inst++ = REX_W;
848 *inst++ = GROUP_F7;
849 *inst = MOD_REG | reg_map[SLJIT_R1];
850 #endif
851 #endif
852 switch (op) {
853 case SLJIT_LMUL_UW:
854 *inst |= MUL;
855 break;
856 case SLJIT_LMUL_SW:
857 *inst |= IMUL;
858 break;
859 case SLJIT_DIVMOD_UW:
860 case SLJIT_DIV_UW:
861 *inst |= DIV;
862 break;
863 case SLJIT_DIVMOD_SW:
864 case SLJIT_DIV_SW:
865 *inst |= IDIV;
866 break;
867 }
868 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
869 if (op <= SLJIT_DIVMOD_SW)
870 EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
871 #else
872 if (op >= SLJIT_DIV_UW)
873 EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
874 #endif
875 break;
876 }
877
878 return SLJIT_SUCCESS;
879 }
880
881 #define ENCODE_PREFIX(prefix) \
882 do { \
883 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1); \
884 FAIL_IF(!inst); \
885 INC_SIZE(1); \
886 *inst = (prefix); \
887 } while (0)
888
emit_mov_byte(struct sljit_compiler * compiler,sljit_s32 sign,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)889 static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign,
890 sljit_s32 dst, sljit_sw dstw,
891 sljit_s32 src, sljit_sw srcw)
892 {
893 sljit_u8* inst;
894 sljit_s32 dst_r;
895 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
896 sljit_s32 work_r;
897 #endif
898
899 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
900 compiler->mode32 = 0;
901 #endif
902
903 if (src & SLJIT_IMM) {
904 if (FAST_IS_REG(dst)) {
905 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
906 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
907 #else
908 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
909 FAIL_IF(!inst);
910 *inst = MOV_rm_i32;
911 return SLJIT_SUCCESS;
912 #endif
913 }
914 inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
915 FAIL_IF(!inst);
916 *inst = MOV_rm8_i8;
917 return SLJIT_SUCCESS;
918 }
919
920 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
921
922 if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) {
923 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
924 if (reg_map[src] >= 4) {
925 SLJIT_ASSERT(dst_r == TMP_REG1);
926 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
927 } else
928 dst_r = src;
929 #else
930 dst_r = src;
931 #endif
932 }
933 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
934 else if (FAST_IS_REG(src) && reg_map[src] >= 4) {
935 /* src, dst are registers. */
936 SLJIT_ASSERT(SLOW_IS_REG(dst));
937 if (reg_map[dst] < 4) {
938 if (dst != src)
939 EMIT_MOV(compiler, dst, 0, src, 0);
940 inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
941 FAIL_IF(!inst);
942 *inst++ = GROUP_0F;
943 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
944 }
945 else {
946 if (dst != src)
947 EMIT_MOV(compiler, dst, 0, src, 0);
948 if (sign) {
949 /* shl reg, 24 */
950 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
951 FAIL_IF(!inst);
952 *inst |= SHL;
953 /* sar reg, 24 */
954 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
955 FAIL_IF(!inst);
956 *inst |= SAR;
957 }
958 else {
959 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
960 FAIL_IF(!inst);
961 *(inst + 1) |= AND;
962 }
963 }
964 return SLJIT_SUCCESS;
965 }
966 #endif
967 else {
968 /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
969 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
970 FAIL_IF(!inst);
971 *inst++ = GROUP_0F;
972 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
973 }
974
975 if (dst & SLJIT_MEM) {
976 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
977 if (dst_r == TMP_REG1) {
978 /* Find a non-used register, whose reg_map[src] < 4. */
979 if ((dst & REG_MASK) == SLJIT_R0) {
980 if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1))
981 work_r = SLJIT_R2;
982 else
983 work_r = SLJIT_R1;
984 }
985 else {
986 if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
987 work_r = SLJIT_R0;
988 else if ((dst & REG_MASK) == SLJIT_R1)
989 work_r = SLJIT_R2;
990 else
991 work_r = SLJIT_R1;
992 }
993
994 if (work_r == SLJIT_R0) {
995 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
996 }
997 else {
998 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
999 FAIL_IF(!inst);
1000 *inst = XCHG_r_rm;
1001 }
1002
1003 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
1004 FAIL_IF(!inst);
1005 *inst = MOV_rm8_r8;
1006
1007 if (work_r == SLJIT_R0) {
1008 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
1009 }
1010 else {
1011 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
1012 FAIL_IF(!inst);
1013 *inst = XCHG_r_rm;
1014 }
1015 }
1016 else {
1017 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1018 FAIL_IF(!inst);
1019 *inst = MOV_rm8_r8;
1020 }
1021 #else
1022 inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
1023 FAIL_IF(!inst);
1024 *inst = MOV_rm8_r8;
1025 #endif
1026 }
1027
1028 return SLJIT_SUCCESS;
1029 }
1030
emit_prefetch(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)1031 static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, sljit_s32 op,
1032 sljit_s32 src, sljit_sw srcw)
1033 {
1034 sljit_u8* inst;
1035
1036 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1037 compiler->mode32 = 1;
1038 #endif
1039
1040 inst = emit_x86_instruction(compiler, 2, 0, 0, src, srcw);
1041 FAIL_IF(!inst);
1042 *inst++ = GROUP_0F;
1043 *inst++ = PREFETCH;
1044
1045 if (op >= SLJIT_MOV_U8 && op <= SLJIT_MOV_S8)
1046 *inst |= (3 << 3);
1047 else if (op >= SLJIT_MOV_U16 && op <= SLJIT_MOV_S16)
1048 *inst |= (2 << 3);
1049 else
1050 *inst |= (1 << 3);
1051
1052 return SLJIT_SUCCESS;
1053 }
1054
emit_mov_half(struct sljit_compiler * compiler,sljit_s32 sign,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1055 static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign,
1056 sljit_s32 dst, sljit_sw dstw,
1057 sljit_s32 src, sljit_sw srcw)
1058 {
1059 sljit_u8* inst;
1060 sljit_s32 dst_r;
1061
1062 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1063 compiler->mode32 = 0;
1064 #endif
1065
1066 if (src & SLJIT_IMM) {
1067 if (FAST_IS_REG(dst)) {
1068 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1069 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
1070 #else
1071 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
1072 FAIL_IF(!inst);
1073 *inst = MOV_rm_i32;
1074 return SLJIT_SUCCESS;
1075 #endif
1076 }
1077 inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
1078 FAIL_IF(!inst);
1079 *inst = MOV_rm_i32;
1080 return SLJIT_SUCCESS;
1081 }
1082
1083 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1084
1085 if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
1086 dst_r = src;
1087 else {
1088 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
1089 FAIL_IF(!inst);
1090 *inst++ = GROUP_0F;
1091 *inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16;
1092 }
1093
1094 if (dst & SLJIT_MEM) {
1095 inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
1096 FAIL_IF(!inst);
1097 *inst = MOV_rm_r;
1098 }
1099
1100 return SLJIT_SUCCESS;
1101 }
1102
emit_unary(struct sljit_compiler * compiler,sljit_u8 opcode,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1103 static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode,
1104 sljit_s32 dst, sljit_sw dstw,
1105 sljit_s32 src, sljit_sw srcw)
1106 {
1107 sljit_u8* inst;
1108
1109 if (dst == src && dstw == srcw) {
1110 /* Same input and output */
1111 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1112 FAIL_IF(!inst);
1113 *inst++ = GROUP_F7;
1114 *inst |= opcode;
1115 return SLJIT_SUCCESS;
1116 }
1117
1118 if (dst == SLJIT_UNUSED)
1119 dst = TMP_REG1;
1120
1121 if (FAST_IS_REG(dst)) {
1122 EMIT_MOV(compiler, dst, 0, src, srcw);
1123 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0);
1124 FAIL_IF(!inst);
1125 *inst++ = GROUP_F7;
1126 *inst |= opcode;
1127 return SLJIT_SUCCESS;
1128 }
1129
1130 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1131 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1132 FAIL_IF(!inst);
1133 *inst++ = GROUP_F7;
1134 *inst |= opcode;
1135 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1136 return SLJIT_SUCCESS;
1137 }
1138
emit_not_with_flags(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1139 static sljit_s32 emit_not_with_flags(struct sljit_compiler *compiler,
1140 sljit_s32 dst, sljit_sw dstw,
1141 sljit_s32 src, sljit_sw srcw)
1142 {
1143 sljit_u8* inst;
1144
1145 if (dst == SLJIT_UNUSED)
1146 dst = TMP_REG1;
1147
1148 if (FAST_IS_REG(dst)) {
1149 EMIT_MOV(compiler, dst, 0, src, srcw);
1150 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0);
1151 FAIL_IF(!inst);
1152 *inst++ = GROUP_F7;
1153 *inst |= NOT_rm;
1154 inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
1155 FAIL_IF(!inst);
1156 *inst = OR_r_rm;
1157 return SLJIT_SUCCESS;
1158 }
1159
1160 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1161 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1162 FAIL_IF(!inst);
1163 *inst++ = GROUP_F7;
1164 *inst |= NOT_rm;
1165 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1166 FAIL_IF(!inst);
1167 *inst = OR_r_rm;
1168 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1169 return SLJIT_SUCCESS;
1170 }
1171
1172 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1173 static const sljit_sw emit_clz_arg = 32 + 31;
1174 #endif
1175
emit_clz(struct sljit_compiler * compiler,sljit_s32 op_flags,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1176 static sljit_s32 emit_clz(struct sljit_compiler *compiler, sljit_s32 op_flags,
1177 sljit_s32 dst, sljit_sw dstw,
1178 sljit_s32 src, sljit_sw srcw)
1179 {
1180 sljit_u8* inst;
1181 sljit_s32 dst_r;
1182
1183 SLJIT_UNUSED_ARG(op_flags);
1184
1185 if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
1186 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
1187 src = TMP_REG1;
1188 srcw = 0;
1189 }
1190
1191 if (cpu_has_cmov == -1)
1192 get_cpu_features();
1193
1194 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1195
1196 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
1197 FAIL_IF(!inst);
1198 *inst++ = GROUP_0F;
1199 *inst = BSR_r_rm;
1200
1201 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1202 if (cpu_has_cmov) {
1203 if (dst_r != TMP_REG1) {
1204 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 32 + 31);
1205 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
1206 }
1207 else
1208 inst = emit_x86_instruction(compiler, 2, dst_r, 0, SLJIT_MEM0(), (sljit_sw)&emit_clz_arg);
1209
1210 FAIL_IF(!inst);
1211 *inst++ = GROUP_0F;
1212 *inst = CMOVE_r_rm;
1213 }
1214 else
1215 FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, 32 + 31));
1216
1217 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
1218 #else
1219 if (cpu_has_cmov) {
1220 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? (64 + 63) : (32 + 31));
1221
1222 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1223 FAIL_IF(!inst);
1224 *inst++ = GROUP_0F;
1225 *inst = CMOVE_r_rm;
1226 }
1227 else
1228 FAIL_IF(sljit_emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? (64 + 63) : (32 + 31)));
1229
1230 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_I32_OP) ? 63 : 31, dst_r, 0);
1231 #endif
1232
1233 FAIL_IF(!inst);
1234 *(inst + 1) |= XOR;
1235
1236 if (dst & SLJIT_MEM)
1237 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1238 return SLJIT_SUCCESS;
1239 }
1240
sljit_emit_op1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1241 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1242 sljit_s32 dst, sljit_sw dstw,
1243 sljit_s32 src, sljit_sw srcw)
1244 {
1245 sljit_s32 update = 0;
1246 sljit_s32 op_flags = GET_ALL_FLAGS(op);
1247 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1248 sljit_s32 dst_is_ereg = 0;
1249 sljit_s32 src_is_ereg = 0;
1250 #else
1251 # define src_is_ereg 0
1252 #endif
1253
1254 CHECK_ERROR();
1255 CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1256 ADJUST_LOCAL_OFFSET(dst, dstw);
1257 ADJUST_LOCAL_OFFSET(src, srcw);
1258
1259 CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
1260 CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1);
1261 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1262 compiler->mode32 = op_flags & SLJIT_I32_OP;
1263 #endif
1264
1265 if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) {
1266 if (op <= SLJIT_MOV_P && (src & SLJIT_MEM))
1267 return emit_prefetch(compiler, op, src, srcw);
1268 return SLJIT_SUCCESS;
1269 }
1270
1271 op = GET_OPCODE(op);
1272
1273 if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) {
1274 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1275 compiler->mode32 = 0;
1276 #endif
1277
1278 if (op_flags & SLJIT_I32_OP) {
1279 if (FAST_IS_REG(src) && src == dst) {
1280 if (!TYPE_CAST_NEEDED(op))
1281 return SLJIT_SUCCESS;
1282 }
1283 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1284 if (op == SLJIT_MOV_S32 && (src & SLJIT_MEM))
1285 op = SLJIT_MOV_U32;
1286 if (op == SLJIT_MOVU_S32 && (src & SLJIT_MEM))
1287 op = SLJIT_MOVU_U32;
1288 if (op == SLJIT_MOV_U32 && (src & SLJIT_IMM))
1289 op = SLJIT_MOV_S32;
1290 if (op == SLJIT_MOVU_U32 && (src & SLJIT_IMM))
1291 op = SLJIT_MOVU_S32;
1292 #endif
1293 }
1294
1295 SLJIT_COMPILE_ASSERT(SLJIT_MOV + 8 == SLJIT_MOVU, movu_offset);
1296 if (op >= SLJIT_MOVU) {
1297 update = 1;
1298 op -= 8;
1299 }
1300
1301 if (src & SLJIT_IMM) {
1302 switch (op) {
1303 case SLJIT_MOV_U8:
1304 srcw = (sljit_u8)srcw;
1305 break;
1306 case SLJIT_MOV_S8:
1307 srcw = (sljit_s8)srcw;
1308 break;
1309 case SLJIT_MOV_U16:
1310 srcw = (sljit_u16)srcw;
1311 break;
1312 case SLJIT_MOV_S16:
1313 srcw = (sljit_s16)srcw;
1314 break;
1315 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1316 case SLJIT_MOV_U32:
1317 srcw = (sljit_u32)srcw;
1318 break;
1319 case SLJIT_MOV_S32:
1320 srcw = (sljit_s32)srcw;
1321 break;
1322 #endif
1323 }
1324 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1325 if (SLJIT_UNLIKELY(dst_is_ereg))
1326 return emit_mov(compiler, dst, dstw, src, srcw);
1327 #endif
1328 }
1329
1330 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1331 if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
1332 SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));
1333 dst = TMP_REG1;
1334 }
1335 #endif
1336
1337 switch (op) {
1338 case SLJIT_MOV:
1339 case SLJIT_MOV_P:
1340 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1341 case SLJIT_MOV_U32:
1342 case SLJIT_MOV_S32:
1343 #endif
1344 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
1345 break;
1346 case SLJIT_MOV_U8:
1347 FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));
1348 break;
1349 case SLJIT_MOV_S8:
1350 FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));
1351 break;
1352 case SLJIT_MOV_U16:
1353 FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));
1354 break;
1355 case SLJIT_MOV_S16:
1356 FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));
1357 break;
1358 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1359 case SLJIT_MOV_U32:
1360 FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));
1361 break;
1362 case SLJIT_MOV_S32:
1363 FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));
1364 break;
1365 #endif
1366 }
1367
1368 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1369 if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
1370 return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);
1371 #endif
1372
1373 if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & REG_MASK)) {
1374 if ((src & OFFS_REG_MASK) != 0) {
1375 FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
1376 (src & REG_MASK), 0, (src & REG_MASK), 0, OFFS_REG(dst), 0));
1377 }
1378 else if (srcw != 0) {
1379 FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
1380 (src & REG_MASK), 0, (src & REG_MASK), 0, SLJIT_IMM, srcw));
1381 }
1382 }
1383
1384 if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK)) {
1385 if ((dst & OFFS_REG_MASK) != 0) {
1386 FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
1387 (dst & REG_MASK), 0, (dst & REG_MASK), 0, OFFS_REG(dst), 0));
1388 }
1389 else if (dstw != 0) {
1390 FAIL_IF(emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
1391 (dst & REG_MASK), 0, (dst & REG_MASK), 0, SLJIT_IMM, dstw));
1392 }
1393 }
1394 return SLJIT_SUCCESS;
1395 }
1396
1397 switch (op) {
1398 case SLJIT_NOT:
1399 if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_Z))
1400 return emit_not_with_flags(compiler, dst, dstw, src, srcw);
1401 return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
1402
1403 case SLJIT_NEG:
1404 return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw);
1405
1406 case SLJIT_CLZ:
1407 return emit_clz(compiler, op_flags, dst, dstw, src, srcw);
1408 }
1409
1410 return SLJIT_SUCCESS;
1411
1412 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1413 # undef src_is_ereg
1414 #endif
1415 }
1416
1417 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1418
1419 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1420 if (IS_HALFWORD(immw) || compiler->mode32) { \
1421 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1422 FAIL_IF(!inst); \
1423 *(inst + 1) |= (op_imm); \
1424 } \
1425 else { \
1426 FAIL_IF(emit_load_imm64(compiler, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, immw)); \
1427 inst = emit_x86_instruction(compiler, 1, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \
1428 FAIL_IF(!inst); \
1429 *inst = (op_mr); \
1430 }
1431
1432 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1433 FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
1434
1435 #else
1436
1437 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1438 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1439 FAIL_IF(!inst); \
1440 *(inst + 1) |= (op_imm);
1441
1442 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1443 FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
1444
1445 #endif
1446
emit_cum_binary(struct sljit_compiler * compiler,sljit_u8 op_rm,sljit_u8 op_mr,sljit_u8 op_imm,sljit_u8 op_eax_imm,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1447 static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
1448 sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm,
1449 sljit_s32 dst, sljit_sw dstw,
1450 sljit_s32 src1, sljit_sw src1w,
1451 sljit_s32 src2, sljit_sw src2w)
1452 {
1453 sljit_u8* inst;
1454
1455 if (dst == SLJIT_UNUSED) {
1456 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1457 if (src2 & SLJIT_IMM) {
1458 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1459 }
1460 else {
1461 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1462 FAIL_IF(!inst);
1463 *inst = op_rm;
1464 }
1465 return SLJIT_SUCCESS;
1466 }
1467
1468 if (dst == src1 && dstw == src1w) {
1469 if (src2 & SLJIT_IMM) {
1470 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1471 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1472 #else
1473 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1474 #endif
1475 BINARY_EAX_IMM(op_eax_imm, src2w);
1476 }
1477 else {
1478 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1479 }
1480 }
1481 else if (FAST_IS_REG(dst)) {
1482 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1483 FAIL_IF(!inst);
1484 *inst = op_rm;
1485 }
1486 else if (FAST_IS_REG(src2)) {
1487 /* Special exception for sljit_emit_op_flags. */
1488 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1489 FAIL_IF(!inst);
1490 *inst = op_mr;
1491 }
1492 else {
1493 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1494 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1495 FAIL_IF(!inst);
1496 *inst = op_mr;
1497 }
1498 return SLJIT_SUCCESS;
1499 }
1500
1501 /* Only for cumulative operations. */
1502 if (dst == src2 && dstw == src2w) {
1503 if (src1 & SLJIT_IMM) {
1504 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1505 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1506 #else
1507 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) {
1508 #endif
1509 BINARY_EAX_IMM(op_eax_imm, src1w);
1510 }
1511 else {
1512 BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
1513 }
1514 }
1515 else if (FAST_IS_REG(dst)) {
1516 inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
1517 FAIL_IF(!inst);
1518 *inst = op_rm;
1519 }
1520 else if (FAST_IS_REG(src1)) {
1521 inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
1522 FAIL_IF(!inst);
1523 *inst = op_mr;
1524 }
1525 else {
1526 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1527 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1528 FAIL_IF(!inst);
1529 *inst = op_mr;
1530 }
1531 return SLJIT_SUCCESS;
1532 }
1533
1534 /* General version. */
1535 if (FAST_IS_REG(dst)) {
1536 EMIT_MOV(compiler, dst, 0, src1, src1w);
1537 if (src2 & SLJIT_IMM) {
1538 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1539 }
1540 else {
1541 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1542 FAIL_IF(!inst);
1543 *inst = op_rm;
1544 }
1545 }
1546 else {
1547 /* This version requires less memory writing. */
1548 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1549 if (src2 & SLJIT_IMM) {
1550 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1551 }
1552 else {
1553 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1554 FAIL_IF(!inst);
1555 *inst = op_rm;
1556 }
1557 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1558 }
1559
1560 return SLJIT_SUCCESS;
1561 }
1562
1563 static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
1564 sljit_u8 op_rm, sljit_u8 op_mr, sljit_u8 op_imm, sljit_u8 op_eax_imm,
1565 sljit_s32 dst, sljit_sw dstw,
1566 sljit_s32 src1, sljit_sw src1w,
1567 sljit_s32 src2, sljit_sw src2w)
1568 {
1569 sljit_u8* inst;
1570
1571 if (dst == SLJIT_UNUSED) {
1572 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1573 if (src2 & SLJIT_IMM) {
1574 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1575 }
1576 else {
1577 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1578 FAIL_IF(!inst);
1579 *inst = op_rm;
1580 }
1581 return SLJIT_SUCCESS;
1582 }
1583
1584 if (dst == src1 && dstw == src1w) {
1585 if (src2 & SLJIT_IMM) {
1586 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1587 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1588 #else
1589 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1590 #endif
1591 BINARY_EAX_IMM(op_eax_imm, src2w);
1592 }
1593 else {
1594 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1595 }
1596 }
1597 else if (FAST_IS_REG(dst)) {
1598 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1599 FAIL_IF(!inst);
1600 *inst = op_rm;
1601 }
1602 else if (FAST_IS_REG(src2)) {
1603 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1604 FAIL_IF(!inst);
1605 *inst = op_mr;
1606 }
1607 else {
1608 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1609 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1610 FAIL_IF(!inst);
1611 *inst = op_mr;
1612 }
1613 return SLJIT_SUCCESS;
1614 }
1615
1616 /* General version. */
1617 if (FAST_IS_REG(dst) && dst != src2) {
1618 EMIT_MOV(compiler, dst, 0, src1, src1w);
1619 if (src2 & SLJIT_IMM) {
1620 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1621 }
1622 else {
1623 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1624 FAIL_IF(!inst);
1625 *inst = op_rm;
1626 }
1627 }
1628 else {
1629 /* This version requires less memory writing. */
1630 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1631 if (src2 & SLJIT_IMM) {
1632 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1633 }
1634 else {
1635 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1636 FAIL_IF(!inst);
1637 *inst = op_rm;
1638 }
1639 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1640 }
1641
1642 return SLJIT_SUCCESS;
1643 }
1644
1645 static sljit_s32 emit_mul(struct sljit_compiler *compiler,
1646 sljit_s32 dst, sljit_sw dstw,
1647 sljit_s32 src1, sljit_sw src1w,
1648 sljit_s32 src2, sljit_sw src2w)
1649 {
1650 sljit_u8* inst;
1651 sljit_s32 dst_r;
1652
1653 dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
1654
1655 /* Register destination. */
1656 if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
1657 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1658 FAIL_IF(!inst);
1659 *inst++ = GROUP_0F;
1660 *inst = IMUL_r_rm;
1661 }
1662 else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
1663 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
1664 FAIL_IF(!inst);
1665 *inst++ = GROUP_0F;
1666 *inst = IMUL_r_rm;
1667 }
1668 else if (src1 & SLJIT_IMM) {
1669 if (src2 & SLJIT_IMM) {
1670 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
1671 src2 = dst_r;
1672 src2w = 0;
1673 }
1674
1675 if (src1w <= 127 && src1w >= -128) {
1676 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1677 FAIL_IF(!inst);
1678 *inst = IMUL_r_rm_i8;
1679 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1680 FAIL_IF(!inst);
1681 INC_SIZE(1);
1682 *inst = (sljit_s8)src1w;
1683 }
1684 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1685 else {
1686 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1687 FAIL_IF(!inst);
1688 *inst = IMUL_r_rm_i32;
1689 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1690 FAIL_IF(!inst);
1691 INC_SIZE(4);
1692 sljit_unaligned_store_sw(inst, src1w);
1693 }
1694 #else
1695 else if (IS_HALFWORD(src1w)) {
1696 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1697 FAIL_IF(!inst);
1698 *inst = IMUL_r_rm_i32;
1699 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1700 FAIL_IF(!inst);
1701 INC_SIZE(4);
1702 sljit_unaligned_store_s32(inst, (sljit_s32)src1w);
1703 }
1704 else {
1705 if (dst_r != src2)
1706 EMIT_MOV(compiler, dst_r, 0, src2, src2w);
1707 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
1708 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1709 FAIL_IF(!inst);
1710 *inst++ = GROUP_0F;
1711 *inst = IMUL_r_rm;
1712 }
1713 #endif
1714 }
1715 else if (src2 & SLJIT_IMM) {
1716 /* Note: src1 is NOT immediate. */
1717
1718 if (src2w <= 127 && src2w >= -128) {
1719 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1720 FAIL_IF(!inst);
1721 *inst = IMUL_r_rm_i8;
1722 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
1723 FAIL_IF(!inst);
1724 INC_SIZE(1);
1725 *inst = (sljit_s8)src2w;
1726 }
1727 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1728 else {
1729 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1730 FAIL_IF(!inst);
1731 *inst = IMUL_r_rm_i32;
1732 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1733 FAIL_IF(!inst);
1734 INC_SIZE(4);
1735 sljit_unaligned_store_sw(inst, src2w);
1736 }
1737 #else
1738 else if (IS_HALFWORD(src2w)) {
1739 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1740 FAIL_IF(!inst);
1741 *inst = IMUL_r_rm_i32;
1742 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
1743 FAIL_IF(!inst);
1744 INC_SIZE(4);
1745 sljit_unaligned_store_s32(inst, (sljit_s32)src2w);
1746 }
1747 else {
1748 if (dst_r != src1)
1749 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1750 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1751 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1752 FAIL_IF(!inst);
1753 *inst++ = GROUP_0F;
1754 *inst = IMUL_r_rm;
1755 }
1756 #endif
1757 }
1758 else {
1759 /* Neither argument is immediate. */
1760 if (ADDRESSING_DEPENDS_ON(src2, dst_r))
1761 dst_r = TMP_REG1;
1762 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1763 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1764 FAIL_IF(!inst);
1765 *inst++ = GROUP_0F;
1766 *inst = IMUL_r_rm;
1767 }
1768
1769 if (dst & SLJIT_MEM)
1770 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1771
1772 return SLJIT_SUCCESS;
1773 }
1774
1775 static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler,
1776 sljit_s32 dst, sljit_sw dstw,
1777 sljit_s32 src1, sljit_sw src1w,
1778 sljit_s32 src2, sljit_sw src2w)
1779 {
1780 sljit_u8* inst;
1781 sljit_s32 dst_r, done = 0;
1782
1783 /* These cases better be left to handled by normal way. */
1784 if (dst == src1 && dstw == src1w)
1785 return SLJIT_ERR_UNSUPPORTED;
1786 if (dst == src2 && dstw == src2w)
1787 return SLJIT_ERR_UNSUPPORTED;
1788
1789 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1790
1791 if (FAST_IS_REG(src1)) {
1792 if (FAST_IS_REG(src2)) {
1793 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
1794 FAIL_IF(!inst);
1795 *inst = LEA_r_m;
1796 done = 1;
1797 }
1798 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1799 if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1800 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_s32)src2w);
1801 #else
1802 if (src2 & SLJIT_IMM) {
1803 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
1804 #endif
1805 FAIL_IF(!inst);
1806 *inst = LEA_r_m;
1807 done = 1;
1808 }
1809 }
1810 else if (FAST_IS_REG(src2)) {
1811 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1812 if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1813 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_s32)src1w);
1814 #else
1815 if (src1 & SLJIT_IMM) {
1816 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
1817 #endif
1818 FAIL_IF(!inst);
1819 *inst = LEA_r_m;
1820 done = 1;
1821 }
1822 }
1823
1824 if (done) {
1825 if (dst_r == TMP_REG1)
1826 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
1827 return SLJIT_SUCCESS;
1828 }
1829 return SLJIT_ERR_UNSUPPORTED;
1830 }
1831
1832 static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,
1833 sljit_s32 src1, sljit_sw src1w,
1834 sljit_s32 src2, sljit_sw src2w)
1835 {
1836 sljit_u8* inst;
1837
1838 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1839 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1840 #else
1841 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1842 #endif
1843 BINARY_EAX_IMM(CMP_EAX_i32, src2w);
1844 return SLJIT_SUCCESS;
1845 }
1846
1847 if (FAST_IS_REG(src1)) {
1848 if (src2 & SLJIT_IMM) {
1849 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
1850 }
1851 else {
1852 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1853 FAIL_IF(!inst);
1854 *inst = CMP_r_rm;
1855 }
1856 return SLJIT_SUCCESS;
1857 }
1858
1859 if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) {
1860 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1861 FAIL_IF(!inst);
1862 *inst = CMP_rm_r;
1863 return SLJIT_SUCCESS;
1864 }
1865
1866 if (src2 & SLJIT_IMM) {
1867 if (src1 & SLJIT_IMM) {
1868 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1869 src1 = TMP_REG1;
1870 src1w = 0;
1871 }
1872 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);
1873 }
1874 else {
1875 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1876 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1877 FAIL_IF(!inst);
1878 *inst = CMP_r_rm;
1879 }
1880 return SLJIT_SUCCESS;
1881 }
1882
1883 static sljit_s32 emit_test_binary(struct sljit_compiler *compiler,
1884 sljit_s32 src1, sljit_sw src1w,
1885 sljit_s32 src2, sljit_sw src2w)
1886 {
1887 sljit_u8* inst;
1888
1889 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1890 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1891 #else
1892 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1893 #endif
1894 BINARY_EAX_IMM(TEST_EAX_i32, src2w);
1895 return SLJIT_SUCCESS;
1896 }
1897
1898 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1899 if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1900 #else
1901 if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
1902 #endif
1903 BINARY_EAX_IMM(TEST_EAX_i32, src1w);
1904 return SLJIT_SUCCESS;
1905 }
1906
1907 if (!(src1 & SLJIT_IMM)) {
1908 if (src2 & SLJIT_IMM) {
1909 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1910 if (IS_HALFWORD(src2w) || compiler->mode32) {
1911 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
1912 FAIL_IF(!inst);
1913 *inst = GROUP_F7;
1914 }
1915 else {
1916 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src2w));
1917 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src1, src1w);
1918 FAIL_IF(!inst);
1919 *inst = TEST_rm_r;
1920 }
1921 #else
1922 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
1923 FAIL_IF(!inst);
1924 *inst = GROUP_F7;
1925 #endif
1926 return SLJIT_SUCCESS;
1927 }
1928 else if (FAST_IS_REG(src1)) {
1929 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1930 FAIL_IF(!inst);
1931 *inst = TEST_rm_r;
1932 return SLJIT_SUCCESS;
1933 }
1934 }
1935
1936 if (!(src2 & SLJIT_IMM)) {
1937 if (src1 & SLJIT_IMM) {
1938 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1939 if (IS_HALFWORD(src1w) || compiler->mode32) {
1940 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w);
1941 FAIL_IF(!inst);
1942 *inst = GROUP_F7;
1943 }
1944 else {
1945 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src1w));
1946 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1947 FAIL_IF(!inst);
1948 *inst = TEST_rm_r;
1949 }
1950 #else
1951 inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w);
1952 FAIL_IF(!inst);
1953 *inst = GROUP_F7;
1954 #endif
1955 return SLJIT_SUCCESS;
1956 }
1957 else if (FAST_IS_REG(src2)) {
1958 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1959 FAIL_IF(!inst);
1960 *inst = TEST_rm_r;
1961 return SLJIT_SUCCESS;
1962 }
1963 }
1964
1965 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1966 if (src2 & SLJIT_IMM) {
1967 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1968 if (IS_HALFWORD(src2w) || compiler->mode32) {
1969 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
1970 FAIL_IF(!inst);
1971 *inst = GROUP_F7;
1972 }
1973 else {
1974 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1975 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0);
1976 FAIL_IF(!inst);
1977 *inst = TEST_rm_r;
1978 }
1979 #else
1980 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
1981 FAIL_IF(!inst);
1982 *inst = GROUP_F7;
1983 #endif
1984 }
1985 else {
1986 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1987 FAIL_IF(!inst);
1988 *inst = TEST_rm_r;
1989 }
1990 return SLJIT_SUCCESS;
1991 }
1992
1993 static sljit_s32 emit_shift(struct sljit_compiler *compiler,
1994 sljit_u8 mode,
1995 sljit_s32 dst, sljit_sw dstw,
1996 sljit_s32 src1, sljit_sw src1w,
1997 sljit_s32 src2, sljit_sw src2w)
1998 {
1999 sljit_u8* inst;
2000
2001 if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
2002 if (dst == src1 && dstw == src1w) {
2003 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
2004 FAIL_IF(!inst);
2005 *inst |= mode;
2006 return SLJIT_SUCCESS;
2007 }
2008 if (dst == SLJIT_UNUSED) {
2009 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2010 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2011 FAIL_IF(!inst);
2012 *inst |= mode;
2013 return SLJIT_SUCCESS;
2014 }
2015 if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
2016 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2017 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2018 FAIL_IF(!inst);
2019 *inst |= mode;
2020 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2021 return SLJIT_SUCCESS;
2022 }
2023 if (FAST_IS_REG(dst)) {
2024 EMIT_MOV(compiler, dst, 0, src1, src1w);
2025 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
2026 FAIL_IF(!inst);
2027 *inst |= mode;
2028 return SLJIT_SUCCESS;
2029 }
2030
2031 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2032 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2033 FAIL_IF(!inst);
2034 *inst |= mode;
2035 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2036 return SLJIT_SUCCESS;
2037 }
2038
2039 if (dst == SLJIT_PREF_SHIFT_REG) {
2040 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2041 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2042 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2043 FAIL_IF(!inst);
2044 *inst |= mode;
2045 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2046 }
2047 else if (FAST_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
2048 if (src1 != dst)
2049 EMIT_MOV(compiler, dst, 0, src1, src1w);
2050 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2051 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2052 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
2053 FAIL_IF(!inst);
2054 *inst |= mode;
2055 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2056 }
2057 else {
2058 /* This case is complex since ecx itself may be used for
2059 addressing, and this case must be supported as well. */
2060 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2061 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2062 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0);
2063 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2064 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2065 FAIL_IF(!inst);
2066 *inst |= mode;
2067 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0);
2068 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2069 #else
2070 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2071 EMIT_MOV(compiler, TMP_REG2, 0, src2, src2w);
2072 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
2073 FAIL_IF(!inst);
2074 *inst = XCHG_r_rm;
2075 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2076 FAIL_IF(!inst);
2077 *inst |= mode;
2078 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
2079 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2080 #endif
2081 }
2082
2083 return SLJIT_SUCCESS;
2084 }
2085
2086 static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler,
2087 sljit_u8 mode, sljit_s32 set_flags,
2088 sljit_s32 dst, sljit_sw dstw,
2089 sljit_s32 src1, sljit_sw src1w,
2090 sljit_s32 src2, sljit_sw src2w)
2091 {
2092 /* The CPU does not set flags if the shift count is 0. */
2093 if (src2 & SLJIT_IMM) {
2094 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2095 if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
2096 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2097 #else
2098 if ((src2w & 0x1f) != 0)
2099 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2100 #endif
2101 if (!set_flags)
2102 return emit_mov(compiler, dst, dstw, src1, src1w);
2103 /* OR dst, src, 0 */
2104 return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
2105 dst, dstw, src1, src1w, SLJIT_IMM, 0);
2106 }
2107
2108 if (!set_flags)
2109 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2110
2111 if (!FAST_IS_REG(dst))
2112 FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
2113
2114 FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w));
2115
2116 if (FAST_IS_REG(dst))
2117 return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
2118 return SLJIT_SUCCESS;
2119 }
2120
2121 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2122 sljit_s32 dst, sljit_sw dstw,
2123 sljit_s32 src1, sljit_sw src1w,
2124 sljit_s32 src2, sljit_sw src2w)
2125 {
2126 CHECK_ERROR();
2127 CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2128 ADJUST_LOCAL_OFFSET(dst, dstw);
2129 ADJUST_LOCAL_OFFSET(src1, src1w);
2130 ADJUST_LOCAL_OFFSET(src2, src2w);
2131
2132 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2133 CHECK_EXTRA_REGS(src1, src1w, (void)0);
2134 CHECK_EXTRA_REGS(src2, src2w, (void)0);
2135 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2136 compiler->mode32 = op & SLJIT_I32_OP;
2137 #endif
2138
2139 if (dst == SLJIT_UNUSED && !HAS_FLAGS(op))
2140 return SLJIT_SUCCESS;
2141
2142 switch (GET_OPCODE(op)) {
2143 case SLJIT_ADD:
2144 if (!HAS_FLAGS(op)) {
2145 if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
2146 return compiler->error;
2147 }
2148 return emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
2149 dst, dstw, src1, src1w, src2, src2w);
2150 case SLJIT_ADDC:
2151 return emit_cum_binary(compiler, ADC_r_rm, ADC_rm_r, ADC, ADC_EAX_i32,
2152 dst, dstw, src1, src1w, src2, src2w);
2153 case SLJIT_SUB:
2154 if (!HAS_FLAGS(op)) {
2155 if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
2156 return compiler->error;
2157 }
2158
2159 if (dst == SLJIT_UNUSED)
2160 return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
2161 return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
2162 dst, dstw, src1, src1w, src2, src2w);
2163 case SLJIT_SUBC:
2164 return emit_non_cum_binary(compiler, SBB_r_rm, SBB_rm_r, SBB, SBB_EAX_i32,
2165 dst, dstw, src1, src1w, src2, src2w);
2166 case SLJIT_MUL:
2167 return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
2168 case SLJIT_AND:
2169 if (dst == SLJIT_UNUSED)
2170 return emit_test_binary(compiler, src1, src1w, src2, src2w);
2171 return emit_cum_binary(compiler, AND_r_rm, AND_rm_r, AND, AND_EAX_i32,
2172 dst, dstw, src1, src1w, src2, src2w);
2173 case SLJIT_OR:
2174 return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
2175 dst, dstw, src1, src1w, src2, src2w);
2176 case SLJIT_XOR:
2177 return emit_cum_binary(compiler, XOR_r_rm, XOR_rm_r, XOR, XOR_EAX_i32,
2178 dst, dstw, src1, src1w, src2, src2w);
2179 case SLJIT_SHL:
2180 return emit_shift_with_flags(compiler, SHL, HAS_FLAGS(op),
2181 dst, dstw, src1, src1w, src2, src2w);
2182 case SLJIT_LSHR:
2183 return emit_shift_with_flags(compiler, SHR, HAS_FLAGS(op),
2184 dst, dstw, src1, src1w, src2, src2w);
2185 case SLJIT_ASHR:
2186 return emit_shift_with_flags(compiler, SAR, HAS_FLAGS(op),
2187 dst, dstw, src1, src1w, src2, src2w);
2188 }
2189
2190 return SLJIT_SUCCESS;
2191 }
2192
2193 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
2194 {
2195 CHECK_REG_INDEX(check_sljit_get_register_index(reg));
2196 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2197 if (reg >= SLJIT_R3 && reg <= SLJIT_R8)
2198 return -1;
2199 #endif
2200 return reg_map[reg];
2201 }
2202
2203 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
2204 {
2205 CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
2206 return reg;
2207 }
2208
2209 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2210 void *instruction, sljit_s32 size)
2211 {
2212 sljit_u8 *inst;
2213
2214 CHECK_ERROR();
2215 CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2216
2217 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
2218 FAIL_IF(!inst);
2219 INC_SIZE(size);
2220 SLJIT_MEMCPY(inst, instruction, size);
2221 return SLJIT_SUCCESS;
2222 }
2223
2224 /* --------------------------------------------------------------------- */
2225 /* Floating point operators */
2226 /* --------------------------------------------------------------------- */
2227
2228 /* Alignment(3) + 4 * 16 bytes. */
2229 static sljit_s32 sse2_data[3 + (4 * 4)];
2230 static sljit_s32 *sse2_buffer;
2231
2232 static void init_compiler(void)
2233 {
2234 /* Align to 16 bytes. */
2235 sse2_buffer = (sljit_s32*)(((sljit_uw)sse2_data + 15) & ~0xf);
2236
2237 /* Single precision constants (each constant is 16 byte long). */
2238 sse2_buffer[0] = 0x80000000;
2239 sse2_buffer[4] = 0x7fffffff;
2240 /* Double precision constants (each constant is 16 byte long). */
2241 sse2_buffer[8] = 0;
2242 sse2_buffer[9] = 0x80000000;
2243 sse2_buffer[12] = 0xffffffff;
2244 sse2_buffer[13] = 0x7fffffff;
2245 }
2246
2247 static sljit_s32 emit_sse2(struct sljit_compiler *compiler, sljit_u8 opcode,
2248 sljit_s32 single, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
2249 {
2250 sljit_u8 *inst;
2251
2252 inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2253 FAIL_IF(!inst);
2254 *inst++ = GROUP_0F;
2255 *inst = opcode;
2256 return SLJIT_SUCCESS;
2257 }
2258
2259 static sljit_s32 emit_sse2_logic(struct sljit_compiler *compiler, sljit_u8 opcode,
2260 sljit_s32 pref66, sljit_s32 xmm1, sljit_s32 xmm2, sljit_sw xmm2w)
2261 {
2262 sljit_u8 *inst;
2263
2264 inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2265 FAIL_IF(!inst);
2266 *inst++ = GROUP_0F;
2267 *inst = opcode;
2268 return SLJIT_SUCCESS;
2269 }
2270
2271 static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
2272 sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
2273 {
2274 return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw);
2275 }
2276
2277 static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
2278 sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src)
2279 {
2280 return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw);
2281 }
2282
2283 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
2284 sljit_s32 dst, sljit_sw dstw,
2285 sljit_s32 src, sljit_sw srcw)
2286 {
2287 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
2288 sljit_u8 *inst;
2289
2290 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2291 if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)
2292 compiler->mode32 = 0;
2293 #endif
2294
2295 inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw);
2296 FAIL_IF(!inst);
2297 *inst++ = GROUP_0F;
2298 *inst = CVTTSD2SI_r_xm;
2299
2300 if (dst & SLJIT_MEM)
2301 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2302 return SLJIT_SUCCESS;
2303 }
2304
2305 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
2306 sljit_s32 dst, sljit_sw dstw,
2307 sljit_s32 src, sljit_sw srcw)
2308 {
2309 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2310 sljit_u8 *inst;
2311
2312 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2313 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
2314 compiler->mode32 = 0;
2315 #endif
2316
2317 if (src & SLJIT_IMM) {
2318 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2319 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
2320 srcw = (sljit_s32)srcw;
2321 #endif
2322 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
2323 src = TMP_REG1;
2324 srcw = 0;
2325 }
2326
2327 inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_F32_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw);
2328 FAIL_IF(!inst);
2329 *inst++ = GROUP_0F;
2330 *inst = CVTSI2SD_x_rm;
2331
2332 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2333 compiler->mode32 = 1;
2334 #endif
2335 if (dst_r == TMP_FREG)
2336 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2337 return SLJIT_SUCCESS;
2338 }
2339
2340 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
2341 sljit_s32 src1, sljit_sw src1w,
2342 sljit_s32 src2, sljit_sw src2w)
2343 {
2344 if (!FAST_IS_REG(src1)) {
2345 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
2346 src1 = TMP_FREG;
2347 }
2348 return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_F32_OP), src1, src2, src2w);
2349 }
2350
2351 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
2352 sljit_s32 dst, sljit_sw dstw,
2353 sljit_s32 src, sljit_sw srcw)
2354 {
2355 sljit_s32 dst_r;
2356
2357 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2358 compiler->mode32 = 1;
2359 #endif
2360
2361 CHECK_ERROR();
2362 SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2363
2364 if (GET_OPCODE(op) == SLJIT_MOV_F64) {
2365 if (FAST_IS_REG(dst))
2366 return emit_sse2_load(compiler, op & SLJIT_F32_OP, dst, src, srcw);
2367 if (FAST_IS_REG(src))
2368 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, src);
2369 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src, srcw));
2370 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2371 }
2372
2373 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) {
2374 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2375 if (FAST_IS_REG(src)) {
2376 /* We overwrite the high bits of source. From SLJIT point of view,
2377 this is not an issue.
2378 Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
2379 FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_F32_OP, src, src, 0));
2380 }
2381 else {
2382 FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_F32_OP), TMP_FREG, src, srcw));
2383 src = TMP_FREG;
2384 }
2385
2386 FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_F32_OP, dst_r, src, 0));
2387 if (dst_r == TMP_FREG)
2388 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2389 return SLJIT_SUCCESS;
2390 }
2391
2392 if (FAST_IS_REG(dst)) {
2393 dst_r = dst;
2394 if (dst != src)
2395 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw));
2396 }
2397 else {
2398 dst_r = TMP_FREG;
2399 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src, srcw));
2400 }
2401
2402 switch (GET_OPCODE(op)) {
2403 case SLJIT_NEG_F64:
2404 FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer : sse2_buffer + 8)));
2405 break;
2406
2407 case SLJIT_ABS_F64:
2408 FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_F32_OP ? sse2_buffer + 4 : sse2_buffer + 12)));
2409 break;
2410 }
2411
2412 if (dst_r == TMP_FREG)
2413 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2414 return SLJIT_SUCCESS;
2415 }
2416
2417 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
2418 sljit_s32 dst, sljit_sw dstw,
2419 sljit_s32 src1, sljit_sw src1w,
2420 sljit_s32 src2, sljit_sw src2w)
2421 {
2422 sljit_s32 dst_r;
2423
2424 CHECK_ERROR();
2425 CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2426 ADJUST_LOCAL_OFFSET(dst, dstw);
2427 ADJUST_LOCAL_OFFSET(src1, src1w);
2428 ADJUST_LOCAL_OFFSET(src2, src2w);
2429
2430 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2431 compiler->mode32 = 1;
2432 #endif
2433
2434 if (FAST_IS_REG(dst)) {
2435 dst_r = dst;
2436 if (dst == src1)
2437 ; /* Do nothing here. */
2438 else if (dst == src2 && (op == SLJIT_ADD_F64 || op == SLJIT_MUL_F64)) {
2439 /* Swap arguments. */
2440 src2 = src1;
2441 src2w = src1w;
2442 }
2443 else if (dst != src2)
2444 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, dst_r, src1, src1w));
2445 else {
2446 dst_r = TMP_FREG;
2447 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
2448 }
2449 }
2450 else {
2451 dst_r = TMP_FREG;
2452 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_F32_OP, TMP_FREG, src1, src1w));
2453 }
2454
2455 switch (GET_OPCODE(op)) {
2456 case SLJIT_ADD_F64:
2457 FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2458 break;
2459
2460 case SLJIT_SUB_F64:
2461 FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2462 break;
2463
2464 case SLJIT_MUL_F64:
2465 FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2466 break;
2467
2468 case SLJIT_DIV_F64:
2469 FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_F32_OP, dst_r, src2, src2w));
2470 break;
2471 }
2472
2473 if (dst_r == TMP_FREG)
2474 return emit_sse2_store(compiler, op & SLJIT_F32_OP, dst, dstw, TMP_FREG);
2475 return SLJIT_SUCCESS;
2476 }
2477
2478 /* --------------------------------------------------------------------- */
2479 /* Conditional instructions */
2480 /* --------------------------------------------------------------------- */
2481
2482 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2483 {
2484 sljit_u8 *inst;
2485 struct sljit_label *label;
2486
2487 CHECK_ERROR_PTR();
2488 CHECK_PTR(check_sljit_emit_label(compiler));
2489
2490 if (compiler->last_label && compiler->last_label->size == compiler->size)
2491 return compiler->last_label;
2492
2493 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2494 PTR_FAIL_IF(!label);
2495 set_label(label, compiler);
2496
2497 inst = (sljit_u8*)ensure_buf(compiler, 2);
2498 PTR_FAIL_IF(!inst);
2499
2500 *inst++ = 0;
2501 *inst++ = 0;
2502
2503 return label;
2504 }
2505
2506 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
2507 {
2508 sljit_u8 *inst;
2509 struct sljit_jump *jump;
2510
2511 CHECK_ERROR_PTR();
2512 CHECK_PTR(check_sljit_emit_jump(compiler, type));
2513
2514 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2515 PTR_FAIL_IF_NULL(jump);
2516 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2517 type &= 0xff;
2518
2519 if (type >= SLJIT_CALL1)
2520 PTR_FAIL_IF(call_with_args(compiler, type));
2521
2522 /* Worst case size. */
2523 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2524 compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
2525 #else
2526 compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
2527 #endif
2528
2529 inst = (sljit_u8*)ensure_buf(compiler, 2);
2530 PTR_FAIL_IF_NULL(inst);
2531
2532 *inst++ = 0;
2533 *inst++ = type + 2;
2534 return jump;
2535 }
2536
2537 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2538 #ifndef _WIN64
2539 #define IS_REG_CHANGED_BY_CALL(src, type) ((src) == SLJIT_R3)
2540 #else
2541 #define IS_REG_CHANGED_BY_CALL(src, type) ((src) == SLJIT_R2)
2542 #endif
2543 #endif
2544
2545 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
2546 {
2547 sljit_u8 *inst;
2548 struct sljit_jump *jump;
2549
2550 CHECK_ERROR();
2551 CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
2552 ADJUST_LOCAL_OFFSET(src, srcw);
2553
2554 CHECK_EXTRA_REGS(src, srcw, (void)0);
2555
2556 if (type >= SLJIT_CALL1) {
2557 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2558 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
2559 if (src == SLJIT_R2) {
2560 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
2561 src = TMP_REG1;
2562 }
2563 if (src == SLJIT_MEM1(SLJIT_SP) && type >= SLJIT_CALL3)
2564 srcw += sizeof(sljit_sw);
2565 #endif
2566 #else
2567 if ((src & SLJIT_MEM) || IS_REG_CHANGED_BY_CALL(src, type)) {
2568 EMIT_MOV(compiler, TMP_REG2, 0, src, srcw);
2569 src = TMP_REG2;
2570 }
2571 #endif
2572 FAIL_IF(call_with_args(compiler, type));
2573 }
2574
2575 if (src == SLJIT_IMM) {
2576 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2577 FAIL_IF_NULL(jump);
2578 set_jump(jump, compiler, JUMP_ADDR);
2579 jump->u.target = srcw;
2580
2581 /* Worst case size. */
2582 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2583 compiler->size += 5;
2584 #else
2585 compiler->size += 10 + 3;
2586 #endif
2587
2588 inst = (sljit_u8*)ensure_buf(compiler, 2);
2589 FAIL_IF_NULL(inst);
2590
2591 *inst++ = 0;
2592 *inst++ = type + 2;
2593 }
2594 else {
2595 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2596 /* REX_W is not necessary (src is not immediate). */
2597 compiler->mode32 = 1;
2598 #endif
2599 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2600 FAIL_IF(!inst);
2601 *inst++ = GROUP_FF;
2602 *inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm;
2603 }
2604 return SLJIT_SUCCESS;
2605 }
2606
2607 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
2608 sljit_s32 dst, sljit_sw dstw,
2609 sljit_s32 type)
2610 {
2611 sljit_u8 *inst;
2612 sljit_u8 cond_set = 0;
2613 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2614 sljit_s32 reg;
2615 #endif
2616 /* ADJUST_LOCAL_OFFSET and CHECK_EXTRA_REGS might overwrite these values. */
2617 sljit_s32 dst_save = dst;
2618 sljit_sw dstw_save = dstw;
2619
2620 CHECK_ERROR();
2621 CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
2622
2623 ADJUST_LOCAL_OFFSET(dst, dstw);
2624 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2625
2626 type &= 0xff;
2627 /* setcc = jcc + 0x10. */
2628 cond_set = get_jump_code(type) + 0x10;
2629
2630 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2631 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst)) {
2632 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 3);
2633 FAIL_IF(!inst);
2634 INC_SIZE(4 + 3);
2635 /* Set low register to conditional flag. */
2636 *inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;
2637 *inst++ = GROUP_0F;
2638 *inst++ = cond_set;
2639 *inst++ = MOD_REG | reg_lmap[TMP_REG1];
2640 *inst++ = REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B);
2641 *inst++ = OR_rm8_r8;
2642 *inst++ = MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst];
2643 return SLJIT_SUCCESS;
2644 }
2645
2646 reg = (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG1;
2647
2648 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 4);
2649 FAIL_IF(!inst);
2650 INC_SIZE(4 + 4);
2651 /* Set low register to conditional flag. */
2652 *inst++ = (reg_map[reg] <= 7) ? REX : REX_B;
2653 *inst++ = GROUP_0F;
2654 *inst++ = cond_set;
2655 *inst++ = MOD_REG | reg_lmap[reg];
2656 *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
2657 /* The movzx instruction does not affect flags. */
2658 *inst++ = GROUP_0F;
2659 *inst++ = MOVZX_r_rm8;
2660 *inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg];
2661
2662 if (reg != TMP_REG1)
2663 return SLJIT_SUCCESS;
2664
2665 if (GET_OPCODE(op) < SLJIT_ADD) {
2666 compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
2667 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2668 }
2669
2670 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2671 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2672 compiler->skip_checks = 1;
2673 #endif
2674 return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
2675
2676 #else
2677 /* The SLJIT_CONFIG_X86_32 code path starts here. */
2678 if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) {
2679 if (reg_map[dst] <= 4) {
2680 /* Low byte is accessible. */
2681 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3);
2682 FAIL_IF(!inst);
2683 INC_SIZE(3 + 3);
2684 /* Set low byte to conditional flag. */
2685 *inst++ = GROUP_0F;
2686 *inst++ = cond_set;
2687 *inst++ = MOD_REG | reg_map[dst];
2688
2689 *inst++ = GROUP_0F;
2690 *inst++ = MOVZX_r_rm8;
2691 *inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst];
2692 return SLJIT_SUCCESS;
2693 }
2694
2695 /* Low byte is not accessible. */
2696 if (cpu_has_cmov == -1)
2697 get_cpu_features();
2698
2699 if (cpu_has_cmov) {
2700 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
2701 /* a xor reg, reg operation would overwrite the flags. */
2702 EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0);
2703
2704 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3);
2705 FAIL_IF(!inst);
2706 INC_SIZE(3);
2707
2708 *inst++ = GROUP_0F;
2709 /* cmovcc = setcc - 0x50. */
2710 *inst++ = cond_set - 0x50;
2711 *inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1];
2712 return SLJIT_SUCCESS;
2713 }
2714
2715 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2716 FAIL_IF(!inst);
2717 INC_SIZE(1 + 3 + 3 + 1);
2718 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2719 /* Set al to conditional flag. */
2720 *inst++ = GROUP_0F;
2721 *inst++ = cond_set;
2722 *inst++ = MOD_REG | 0 /* eax */;
2723
2724 *inst++ = GROUP_0F;
2725 *inst++ = MOVZX_r_rm8;
2726 *inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */;
2727 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2728 return SLJIT_SUCCESS;
2729 }
2730
2731 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && reg_map[dst] <= 4) {
2732 SLJIT_ASSERT(reg_map[SLJIT_R0] == 0);
2733
2734 if (dst != SLJIT_R0) {
2735 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
2736 FAIL_IF(!inst);
2737 INC_SIZE(1 + 3 + 2 + 1);
2738 /* Set low register to conditional flag. */
2739 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2740 *inst++ = GROUP_0F;
2741 *inst++ = cond_set;
2742 *inst++ = MOD_REG | 0 /* eax */;
2743 *inst++ = OR_rm8_r8;
2744 *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst];
2745 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2746 }
2747 else {
2748 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2);
2749 FAIL_IF(!inst);
2750 INC_SIZE(2 + 3 + 2 + 2);
2751 /* Set low register to conditional flag. */
2752 *inst++ = XCHG_r_rm;
2753 *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
2754 *inst++ = GROUP_0F;
2755 *inst++ = cond_set;
2756 *inst++ = MOD_REG | 1 /* ecx */;
2757 *inst++ = OR_rm8_r8;
2758 *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */;
2759 *inst++ = XCHG_r_rm;
2760 *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
2761 }
2762 return SLJIT_SUCCESS;
2763 }
2764
2765 /* Set TMP_REG1 to the bit. */
2766 inst = (sljit_u8*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2767 FAIL_IF(!inst);
2768 INC_SIZE(1 + 3 + 3 + 1);
2769 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2770 /* Set al to conditional flag. */
2771 *inst++ = GROUP_0F;
2772 *inst++ = cond_set;
2773 *inst++ = MOD_REG | 0 /* eax */;
2774
2775 *inst++ = GROUP_0F;
2776 *inst++ = MOVZX_r_rm8;
2777 *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */;
2778
2779 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2780
2781 if (GET_OPCODE(op) < SLJIT_ADD)
2782 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2783
2784 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2785 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2786 compiler->skip_checks = 1;
2787 #endif
2788 return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
2789 #endif /* SLJIT_CONFIG_X86_64 */
2790 }
2791
2792 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
2793 sljit_s32 dst_reg,
2794 sljit_s32 src, sljit_sw srcw)
2795 {
2796 sljit_u8* inst;
2797
2798 CHECK_ERROR();
2799 CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
2800
2801 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2802 dst_reg &= ~SLJIT_I32_OP;
2803
2804 if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV) || (dst_reg >= SLJIT_R3 && dst_reg <= SLJIT_S3))
2805 return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
2806 #else
2807 if (!sljit_has_cpu_feature(SLJIT_HAS_CMOV))
2808 return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
2809 #endif
2810
2811 /* ADJUST_LOCAL_OFFSET is not needed. */
2812 CHECK_EXTRA_REGS(src, srcw, (void)0);
2813
2814 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2815 compiler->mode32 = dst_reg & SLJIT_I32_OP;
2816 dst_reg &= ~SLJIT_I32_OP;
2817 #endif
2818
2819 if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
2820 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
2821 src = TMP_REG1;
2822 srcw = 0;
2823 }
2824
2825 inst = emit_x86_instruction(compiler, 2, dst_reg, 0, src, srcw);
2826 FAIL_IF(!inst);
2827 *inst++ = GROUP_0F;
2828 *inst = get_jump_code(type & 0xff) - 0x40;
2829 return SLJIT_SUCCESS;
2830 }
2831
2832 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
2833 {
2834 CHECK_ERROR();
2835 CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
2836 ADJUST_LOCAL_OFFSET(dst, dstw);
2837
2838 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2839
2840 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2841 compiler->mode32 = 0;
2842 #endif
2843
2844 ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
2845
2846 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2847 if (NOT_HALFWORD(offset)) {
2848 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
2849 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
2850 SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
2851 return compiler->error;
2852 #else
2853 return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);
2854 #endif
2855 }
2856 #endif
2857
2858 if (offset != 0)
2859 return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
2860 return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);
2861 }
2862
2863 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
2864 {
2865 sljit_u8 *inst;
2866 struct sljit_const *const_;
2867 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2868 sljit_s32 reg;
2869 #endif
2870
2871 CHECK_ERROR_PTR();
2872 CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
2873 ADJUST_LOCAL_OFFSET(dst, dstw);
2874
2875 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2876
2877 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
2878 PTR_FAIL_IF(!const_);
2879 set_const(const_, compiler);
2880
2881 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2882 compiler->mode32 = 0;
2883 reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
2884
2885 if (emit_load_imm64(compiler, reg, init_value))
2886 return NULL;
2887 #else
2888 if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
2889 return NULL;
2890 #endif
2891
2892 inst = (sljit_u8*)ensure_buf(compiler, 2);
2893 PTR_FAIL_IF(!inst);
2894
2895 *inst++ = 0;
2896 *inst++ = 1;
2897
2898 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2899 if (dst & SLJIT_MEM)
2900 if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
2901 return NULL;
2902 #endif
2903
2904 return const_;
2905 }
2906
2907 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
2908 {
2909 SLJIT_UNUSED_ARG(executable_offset);
2910 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2911 sljit_unaligned_store_sw((void*)addr, new_target - (addr + 4) - (sljit_uw)executable_offset);
2912 #else
2913 sljit_unaligned_store_sw((void*)addr, (sljit_sw) new_target);
2914 #endif
2915 }
2916
2917 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
2918 {
2919 SLJIT_UNUSED_ARG(executable_offset);
2920 sljit_unaligned_store_sw((void*)addr, new_constant);
2921 }
2922