1 /*
2 * Stack-less Just-In-Time compiler
3 *
4 * Copyright 2009-2012 Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
sljit_get_platform_name(void)27 SLJIT_API_FUNC_ATTRIBUTE SLJIT_CONST char* sljit_get_platform_name(void)
28 {
29 return "x86" SLJIT_CPUINFO;
30 }
31
32 /*
33 32b register indexes:
34 0 - EAX
35 1 - ECX
36 2 - EDX
37 3 - EBX
38 4 - none
39 5 - EBP
40 6 - ESI
41 7 - EDI
42 */
43
44 /*
45 64b register indexes:
46 0 - RAX
47 1 - RCX
48 2 - RDX
49 3 - RBX
50 4 - none
51 5 - RBP
52 6 - RSI
53 7 - RDI
54 8 - R8 - From now on REX prefix is required
55 9 - R9
56 10 - R10
57 11 - R11
58 12 - R12
59 13 - R13
60 14 - R14
61 15 - R15
62 */
63
64 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
65
66 /* Last register + 1. */
67 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
68
69 static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
70 0, 0, 2, 1, 0, 0, 0, 0, 7, 6, 3, 4, 5
71 };
72
73 #define CHECK_EXTRA_REGS(p, w, do) \
74 if (p >= SLJIT_R3 && p <= SLJIT_R6) { \
75 w = SLJIT_LOCALS_OFFSET + ((p) - (SLJIT_R3 + 4)) * sizeof(sljit_sw); \
76 p = SLJIT_MEM1(SLJIT_SP); \
77 do; \
78 }
79
80 #else /* SLJIT_CONFIG_X86_32 */
81
82 /* Last register + 1. */
83 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
84 #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
85 #define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4)
86
87 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
88 Note: avoid to use r12 and r13 for memory addessing
89 therefore r12 is better for SAVED_EREG than SAVED_REG. */
90 #ifndef _WIN64
91 /* 1st passed in rdi, 2nd argument passed in rsi, 3rd in rdx. */
92 static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
93 0, 0, 6, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 7, 9
94 };
95 /* low-map. reg_map & 0x7. */
96 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
97 0, 0, 6, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 7, 1
98 };
99 #else
100 /* 1st passed in rcx, 2nd argument passed in rdx, 3rd in r8. */
101 static SLJIT_CONST sljit_ub reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
102 0, 0, 2, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 10, 8, 9
103 };
104 /* low-map. reg_map & 0x7. */
105 static SLJIT_CONST sljit_ub reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 5] = {
106 0, 0, 2, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 2, 0, 1
107 };
108 #endif
109
110 #define REX_W 0x48
111 #define REX_R 0x44
112 #define REX_X 0x42
113 #define REX_B 0x41
114 #define REX 0x40
115
116 #ifndef _WIN64
117 #define HALFWORD_MAX 0x7fffffffl
118 #define HALFWORD_MIN -0x80000000l
119 #else
120 #define HALFWORD_MAX 0x7fffffffll
121 #define HALFWORD_MIN -0x80000000ll
122 #endif
123
124 #define IS_HALFWORD(x) ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)
125 #define NOT_HALFWORD(x) ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)
126
127 #define CHECK_EXTRA_REGS(p, w, do)
128
129 #endif /* SLJIT_CONFIG_X86_32 */
130
131 #define TMP_FREG (0)
132
133 /* Size flags for emit_x86_instruction: */
134 #define EX86_BIN_INS 0x0010
135 #define EX86_SHIFT_INS 0x0020
136 #define EX86_REX 0x0040
137 #define EX86_NO_REXW 0x0080
138 #define EX86_BYTE_ARG 0x0100
139 #define EX86_HALF_ARG 0x0200
140 #define EX86_PREF_66 0x0400
141 #define EX86_PREF_F2 0x0800
142 #define EX86_PREF_F3 0x1000
143 #define EX86_SSE2_OP1 0x2000
144 #define EX86_SSE2_OP2 0x4000
145 #define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2)
146
147 /* --------------------------------------------------------------------- */
148 /* Instrucion forms */
149 /* --------------------------------------------------------------------- */
150
151 #define ADD (/* BINARY */ 0 << 3)
152 #define ADD_EAX_i32 0x05
153 #define ADD_r_rm 0x03
154 #define ADD_rm_r 0x01
155 #define ADDSD_x_xm 0x58
156 #define ADC (/* BINARY */ 2 << 3)
157 #define ADC_EAX_i32 0x15
158 #define ADC_r_rm 0x13
159 #define ADC_rm_r 0x11
160 #define AND (/* BINARY */ 4 << 3)
161 #define AND_EAX_i32 0x25
162 #define AND_r_rm 0x23
163 #define AND_rm_r 0x21
164 #define ANDPD_x_xm 0x54
165 #define BSR_r_rm (/* GROUP_0F */ 0xbd)
166 #define CALL_i32 0xe8
167 #define CALL_rm (/* GROUP_FF */ 2 << 3)
168 #define CDQ 0x99
169 #define CMOVNE_r_rm (/* GROUP_0F */ 0x45)
170 #define CMP (/* BINARY */ 7 << 3)
171 #define CMP_EAX_i32 0x3d
172 #define CMP_r_rm 0x3b
173 #define CMP_rm_r 0x39
174 #define CVTPD2PS_x_xm 0x5a
175 #define CVTSI2SD_x_rm 0x2a
176 #define CVTTSD2SI_r_xm 0x2c
177 #define DIV (/* GROUP_F7 */ 6 << 3)
178 #define DIVSD_x_xm 0x5e
179 #define INT3 0xcc
180 #define IDIV (/* GROUP_F7 */ 7 << 3)
181 #define IMUL (/* GROUP_F7 */ 5 << 3)
182 #define IMUL_r_rm (/* GROUP_0F */ 0xaf)
183 #define IMUL_r_rm_i8 0x6b
184 #define IMUL_r_rm_i32 0x69
185 #define JE_i8 0x74
186 #define JNE_i8 0x75
187 #define JMP_i8 0xeb
188 #define JMP_i32 0xe9
189 #define JMP_rm (/* GROUP_FF */ 4 << 3)
190 #define LEA_r_m 0x8d
191 #define MOV_r_rm 0x8b
192 #define MOV_r_i32 0xb8
193 #define MOV_rm_r 0x89
194 #define MOV_rm_i32 0xc7
195 #define MOV_rm8_i8 0xc6
196 #define MOV_rm8_r8 0x88
197 #define MOVSD_x_xm 0x10
198 #define MOVSD_xm_x 0x11
199 #define MOVSXD_r_rm 0x63
200 #define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe)
201 #define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf)
202 #define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6)
203 #define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7)
204 #define MUL (/* GROUP_F7 */ 4 << 3)
205 #define MULSD_x_xm 0x59
206 #define NEG_rm (/* GROUP_F7 */ 3 << 3)
207 #define NOP 0x90
208 #define NOT_rm (/* GROUP_F7 */ 2 << 3)
209 #define OR (/* BINARY */ 1 << 3)
210 #define OR_r_rm 0x0b
211 #define OR_EAX_i32 0x0d
212 #define OR_rm_r 0x09
213 #define OR_rm8_r8 0x08
214 #define POP_r 0x58
215 #define POP_rm 0x8f
216 #define POPF 0x9d
217 #define PUSH_i32 0x68
218 #define PUSH_r 0x50
219 #define PUSH_rm (/* GROUP_FF */ 6 << 3)
220 #define PUSHF 0x9c
221 #define RET_near 0xc3
222 #define RET_i16 0xc2
223 #define SBB (/* BINARY */ 3 << 3)
224 #define SBB_EAX_i32 0x1d
225 #define SBB_r_rm 0x1b
226 #define SBB_rm_r 0x19
227 #define SAR (/* SHIFT */ 7 << 3)
228 #define SHL (/* SHIFT */ 4 << 3)
229 #define SHR (/* SHIFT */ 5 << 3)
230 #define SUB (/* BINARY */ 5 << 3)
231 #define SUB_EAX_i32 0x2d
232 #define SUB_r_rm 0x2b
233 #define SUB_rm_r 0x29
234 #define SUBSD_x_xm 0x5c
235 #define TEST_EAX_i32 0xa9
236 #define TEST_rm_r 0x85
237 #define UCOMISD_x_xm 0x2e
238 #define UNPCKLPD_x_xm 0x14
239 #define XCHG_EAX_r 0x90
240 #define XCHG_r_rm 0x87
241 #define XOR (/* BINARY */ 6 << 3)
242 #define XOR_EAX_i32 0x35
243 #define XOR_r_rm 0x33
244 #define XOR_rm_r 0x31
245 #define XORPD_x_xm 0x57
246
247 #define GROUP_0F 0x0f
248 #define GROUP_F7 0xf7
249 #define GROUP_FF 0xff
250 #define GROUP_BINARY_81 0x81
251 #define GROUP_BINARY_83 0x83
252 #define GROUP_SHIFT_1 0xd1
253 #define GROUP_SHIFT_N 0xc1
254 #define GROUP_SHIFT_CL 0xd3
255
256 #define MOD_REG 0xc0
257 #define MOD_DISP8 0x40
258
259 #define INC_SIZE(s) (*inst++ = (s), compiler->size += (s))
260
261 #define PUSH_REG(r) (*inst++ = (PUSH_r + (r)))
262 #define POP_REG(r) (*inst++ = (POP_r + (r)))
263 #define RET() (*inst++ = (RET_near))
264 #define RET_I16(n) (*inst++ = (RET_i16), *inst++ = n, *inst++ = 0)
265 /* r32, r/m32 */
266 #define MOV_RM(mod, reg, rm) (*inst++ = (MOV_r_rm), *inst++ = (mod) << 6 | (reg) << 3 | (rm))
267
268 /* Multithreading does not affect these static variables, since they store
269 built-in CPU features. Therefore they can be overwritten by different threads
270 if they detect the CPU features in the same time. */
271 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
272 static sljit_si cpu_has_sse2 = -1;
273 #endif
274 static sljit_si cpu_has_cmov = -1;
275
276 #if defined(_MSC_VER) && _MSC_VER >= 1400
277 #include <intrin.h>
278 #endif
279
get_cpu_features(void)280 static void get_cpu_features(void)
281 {
282 sljit_ui features;
283
284 #if defined(_MSC_VER) && _MSC_VER >= 1400
285
286 int CPUInfo[4];
287 __cpuid(CPUInfo, 1);
288 features = (sljit_ui)CPUInfo[3];
289
290 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C)
291
292 /* AT&T syntax. */
293 __asm__ (
294 "movl $0x1, %%eax\n"
295 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
296 /* On x86-32, there is no red zone, so this
297 should work (no need for a local variable). */
298 "push %%ebx\n"
299 #endif
300 "cpuid\n"
301 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
302 "pop %%ebx\n"
303 #endif
304 "movl %%edx, %0\n"
305 : "=g" (features)
306 :
307 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
308 : "%eax", "%ecx", "%edx"
309 #else
310 : "%rax", "%rbx", "%rcx", "%rdx"
311 #endif
312 );
313
314 #else /* _MSC_VER && _MSC_VER >= 1400 */
315
316 /* Intel syntax. */
317 __asm {
318 mov eax, 1
319 cpuid
320 mov features, edx
321 }
322
323 #endif /* _MSC_VER && _MSC_VER >= 1400 */
324
325 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
326 cpu_has_sse2 = (features >> 26) & 0x1;
327 #endif
328 cpu_has_cmov = (features >> 15) & 0x1;
329 }
330
get_jump_code(sljit_si type)331 static sljit_ub get_jump_code(sljit_si type)
332 {
333 switch (type) {
334 case SLJIT_EQUAL:
335 case SLJIT_D_EQUAL:
336 return 0x84 /* je */;
337
338 case SLJIT_NOT_EQUAL:
339 case SLJIT_D_NOT_EQUAL:
340 return 0x85 /* jne */;
341
342 case SLJIT_LESS:
343 case SLJIT_D_LESS:
344 return 0x82 /* jc */;
345
346 case SLJIT_GREATER_EQUAL:
347 case SLJIT_D_GREATER_EQUAL:
348 return 0x83 /* jae */;
349
350 case SLJIT_GREATER:
351 case SLJIT_D_GREATER:
352 return 0x87 /* jnbe */;
353
354 case SLJIT_LESS_EQUAL:
355 case SLJIT_D_LESS_EQUAL:
356 return 0x86 /* jbe */;
357
358 case SLJIT_SIG_LESS:
359 return 0x8c /* jl */;
360
361 case SLJIT_SIG_GREATER_EQUAL:
362 return 0x8d /* jnl */;
363
364 case SLJIT_SIG_GREATER:
365 return 0x8f /* jnle */;
366
367 case SLJIT_SIG_LESS_EQUAL:
368 return 0x8e /* jle */;
369
370 case SLJIT_OVERFLOW:
371 case SLJIT_MUL_OVERFLOW:
372 return 0x80 /* jo */;
373
374 case SLJIT_NOT_OVERFLOW:
375 case SLJIT_MUL_NOT_OVERFLOW:
376 return 0x81 /* jno */;
377
378 case SLJIT_D_UNORDERED:
379 return 0x8a /* jp */;
380
381 case SLJIT_D_ORDERED:
382 return 0x8b /* jpo */;
383 }
384 return 0;
385 }
386
387 static sljit_ub* generate_far_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_si type);
388
389 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
390 static sljit_ub* generate_fixed_jump(sljit_ub *code_ptr, sljit_sw addr, sljit_si type);
391 #endif
392
generate_near_jump_code(struct sljit_jump * jump,sljit_ub * code_ptr,sljit_ub * code,sljit_si type)393 static sljit_ub* generate_near_jump_code(struct sljit_jump *jump, sljit_ub *code_ptr, sljit_ub *code, sljit_si type)
394 {
395 sljit_si short_jump;
396 sljit_uw label_addr;
397
398 if (jump->flags & JUMP_LABEL)
399 label_addr = (sljit_uw)(code + jump->u.label->size);
400 else
401 label_addr = jump->u.target;
402 short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
403
404 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
405 if ((sljit_sw)(label_addr - (jump->addr + 1)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 1)) < HALFWORD_MIN)
406 return generate_far_jump_code(jump, code_ptr, type);
407 #endif
408
409 if (type == SLJIT_JUMP) {
410 if (short_jump)
411 *code_ptr++ = JMP_i8;
412 else
413 *code_ptr++ = JMP_i32;
414 jump->addr++;
415 }
416 else if (type >= SLJIT_FAST_CALL) {
417 short_jump = 0;
418 *code_ptr++ = CALL_i32;
419 jump->addr++;
420 }
421 else if (short_jump) {
422 *code_ptr++ = get_jump_code(type) - 0x10;
423 jump->addr++;
424 }
425 else {
426 *code_ptr++ = GROUP_0F;
427 *code_ptr++ = get_jump_code(type);
428 jump->addr += 2;
429 }
430
431 if (short_jump) {
432 jump->flags |= PATCH_MB;
433 code_ptr += sizeof(sljit_sb);
434 } else {
435 jump->flags |= PATCH_MW;
436 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
437 code_ptr += sizeof(sljit_sw);
438 #else
439 code_ptr += sizeof(sljit_si);
440 #endif
441 }
442
443 return code_ptr;
444 }
445
sljit_generate_code(struct sljit_compiler * compiler)446 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
447 {
448 struct sljit_memory_fragment *buf;
449 sljit_ub *code;
450 sljit_ub *code_ptr;
451 sljit_ub *buf_ptr;
452 sljit_ub *buf_end;
453 sljit_ub len;
454
455 struct sljit_label *label;
456 struct sljit_jump *jump;
457 struct sljit_const *const_;
458
459 CHECK_ERROR_PTR();
460 CHECK_PTR(check_sljit_generate_code(compiler));
461 reverse_buf(compiler);
462
463 /* Second code generation pass. */
464 code = (sljit_ub*)SLJIT_MALLOC_EXEC(compiler->size);
465 PTR_FAIL_WITH_EXEC_IF(code);
466 buf = compiler->buf;
467
468 code_ptr = code;
469 label = compiler->labels;
470 jump = compiler->jumps;
471 const_ = compiler->consts;
472 do {
473 buf_ptr = buf->memory;
474 buf_end = buf_ptr + buf->used_size;
475 do {
476 len = *buf_ptr++;
477 if (len > 0) {
478 /* The code is already generated. */
479 SLJIT_MEMMOVE(code_ptr, buf_ptr, len);
480 code_ptr += len;
481 buf_ptr += len;
482 }
483 else {
484 if (*buf_ptr >= 4) {
485 jump->addr = (sljit_uw)code_ptr;
486 if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
487 code_ptr = generate_near_jump_code(jump, code_ptr, code, *buf_ptr - 4);
488 else
489 code_ptr = generate_far_jump_code(jump, code_ptr, *buf_ptr - 4);
490 jump = jump->next;
491 }
492 else if (*buf_ptr == 0) {
493 label->addr = (sljit_uw)code_ptr;
494 label->size = code_ptr - code;
495 label = label->next;
496 }
497 else if (*buf_ptr == 1) {
498 const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
499 const_ = const_->next;
500 }
501 else {
502 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
503 *code_ptr++ = (*buf_ptr == 2) ? CALL_i32 : JMP_i32;
504 buf_ptr++;
505 *(sljit_sw*)code_ptr = *(sljit_sw*)buf_ptr - ((sljit_sw)code_ptr + sizeof(sljit_sw));
506 code_ptr += sizeof(sljit_sw);
507 buf_ptr += sizeof(sljit_sw) - 1;
508 #else
509 code_ptr = generate_fixed_jump(code_ptr, *(sljit_sw*)(buf_ptr + 1), *buf_ptr);
510 buf_ptr += sizeof(sljit_sw);
511 #endif
512 }
513 buf_ptr++;
514 }
515 } while (buf_ptr < buf_end);
516 SLJIT_ASSERT(buf_ptr == buf_end);
517 buf = buf->next;
518 } while (buf);
519
520 SLJIT_ASSERT(!label);
521 SLJIT_ASSERT(!jump);
522 SLJIT_ASSERT(!const_);
523
524 jump = compiler->jumps;
525 while (jump) {
526 if (jump->flags & PATCH_MB) {
527 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) >= -128 && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb))) <= 127);
528 *(sljit_ub*)jump->addr = (sljit_ub)(jump->u.label->addr - (jump->addr + sizeof(sljit_sb)));
529 } else if (jump->flags & PATCH_MW) {
530 if (jump->flags & JUMP_LABEL) {
531 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
532 *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_sw)));
533 #else
534 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) >= HALFWORD_MIN && (sljit_sw)(jump->u.label->addr - (jump->addr + sizeof(sljit_si))) <= HALFWORD_MAX);
535 *(sljit_si*)jump->addr = (sljit_si)(jump->u.label->addr - (jump->addr + sizeof(sljit_si)));
536 #endif
537 }
538 else {
539 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
540 *(sljit_sw*)jump->addr = (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_sw)));
541 #else
542 SLJIT_ASSERT((sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) >= HALFWORD_MIN && (sljit_sw)(jump->u.target - (jump->addr + sizeof(sljit_si))) <= HALFWORD_MAX);
543 *(sljit_si*)jump->addr = (sljit_si)(jump->u.target - (jump->addr + sizeof(sljit_si)));
544 #endif
545 }
546 }
547 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
548 else if (jump->flags & PATCH_MD)
549 *(sljit_sw*)jump->addr = jump->u.label->addr;
550 #endif
551
552 jump = jump->next;
553 }
554
555 /* Maybe we waste some space because of short jumps. */
556 SLJIT_ASSERT(code_ptr <= code + compiler->size);
557 compiler->error = SLJIT_ERR_COMPILED;
558 compiler->executable_size = code_ptr - code;
559 return (void*)code;
560 }
561
562 /* --------------------------------------------------------------------- */
563 /* Operators */
564 /* --------------------------------------------------------------------- */
565
566 static sljit_si emit_cum_binary(struct sljit_compiler *compiler,
567 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
568 sljit_si dst, sljit_sw dstw,
569 sljit_si src1, sljit_sw src1w,
570 sljit_si src2, sljit_sw src2w);
571
572 static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler,
573 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
574 sljit_si dst, sljit_sw dstw,
575 sljit_si src1, sljit_sw src1w,
576 sljit_si src2, sljit_sw src2w);
577
578 static sljit_si emit_mov(struct sljit_compiler *compiler,
579 sljit_si dst, sljit_sw dstw,
580 sljit_si src, sljit_sw srcw);
581
emit_save_flags(struct sljit_compiler * compiler)582 static SLJIT_INLINE sljit_si emit_save_flags(struct sljit_compiler *compiler)
583 {
584 sljit_ub *inst;
585
586 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
587 inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
588 FAIL_IF(!inst);
589 INC_SIZE(5);
590 #else
591 inst = (sljit_ub*)ensure_buf(compiler, 1 + 6);
592 FAIL_IF(!inst);
593 INC_SIZE(6);
594 *inst++ = REX_W;
595 #endif
596 *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp + sizeof(sljit_sw)] */
597 *inst++ = 0x64;
598 *inst++ = 0x24;
599 *inst++ = (sljit_ub)sizeof(sljit_sw);
600 *inst++ = PUSHF;
601 compiler->flags_saved = 1;
602 return SLJIT_SUCCESS;
603 }
604
emit_restore_flags(struct sljit_compiler * compiler,sljit_si keep_flags)605 static SLJIT_INLINE sljit_si emit_restore_flags(struct sljit_compiler *compiler, sljit_si keep_flags)
606 {
607 sljit_ub *inst;
608
609 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
610 inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
611 FAIL_IF(!inst);
612 INC_SIZE(5);
613 *inst++ = POPF;
614 #else
615 inst = (sljit_ub*)ensure_buf(compiler, 1 + 6);
616 FAIL_IF(!inst);
617 INC_SIZE(6);
618 *inst++ = POPF;
619 *inst++ = REX_W;
620 #endif
621 *inst++ = LEA_r_m; /* lea esp/rsp, [esp/rsp - sizeof(sljit_sw)] */
622 *inst++ = 0x64;
623 *inst++ = 0x24;
624 *inst++ = (sljit_ub)-(sljit_sb)sizeof(sljit_sw);
625 compiler->flags_saved = keep_flags;
626 return SLJIT_SUCCESS;
627 }
628
629 #ifdef _WIN32
630 #include <malloc.h>
631
sljit_grow_stack(sljit_sw local_size)632 static void SLJIT_CALL sljit_grow_stack(sljit_sw local_size)
633 {
634 /* Workaround for calling the internal _chkstk() function on Windows.
635 This function touches all 4k pages belongs to the requested stack space,
636 which size is passed in local_size. This is necessary on Windows where
637 the stack can only grow in 4k steps. However, this function just burn
638 CPU cycles if the stack is large enough. However, you don't know it in
639 advance, so it must always be called. I think this is a bad design in
640 general even if it has some reasons. */
641 *(volatile sljit_si*)alloca(local_size) = 0;
642 }
643
644 #endif
645
646 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
647 #include "sljitNativeX86_32.c"
648 #else
649 #include "sljitNativeX86_64.c"
650 #endif
651
emit_mov(struct sljit_compiler * compiler,sljit_si dst,sljit_sw dstw,sljit_si src,sljit_sw srcw)652 static sljit_si emit_mov(struct sljit_compiler *compiler,
653 sljit_si dst, sljit_sw dstw,
654 sljit_si src, sljit_sw srcw)
655 {
656 sljit_ub* inst;
657
658 if (dst == SLJIT_UNUSED) {
659 /* No destination, doesn't need to setup flags. */
660 if (src & SLJIT_MEM) {
661 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
662 FAIL_IF(!inst);
663 *inst = MOV_r_rm;
664 }
665 return SLJIT_SUCCESS;
666 }
667 if (FAST_IS_REG(src)) {
668 inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
669 FAIL_IF(!inst);
670 *inst = MOV_rm_r;
671 return SLJIT_SUCCESS;
672 }
673 if (src & SLJIT_IMM) {
674 if (FAST_IS_REG(dst)) {
675 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
676 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
677 #else
678 if (!compiler->mode32) {
679 if (NOT_HALFWORD(srcw))
680 return emit_load_imm64(compiler, dst, srcw);
681 }
682 else
683 return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, MOV_r_i32 + reg_lmap[dst], srcw);
684 #endif
685 }
686 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
687 if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
688 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, srcw));
689 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, dst, dstw);
690 FAIL_IF(!inst);
691 *inst = MOV_rm_r;
692 return SLJIT_SUCCESS;
693 }
694 #endif
695 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
696 FAIL_IF(!inst);
697 *inst = MOV_rm_i32;
698 return SLJIT_SUCCESS;
699 }
700 if (FAST_IS_REG(dst)) {
701 inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
702 FAIL_IF(!inst);
703 *inst = MOV_r_rm;
704 return SLJIT_SUCCESS;
705 }
706
707 /* Memory to memory move. Requires two instruction. */
708 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
709 FAIL_IF(!inst);
710 *inst = MOV_r_rm;
711 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
712 FAIL_IF(!inst);
713 *inst = MOV_rm_r;
714 return SLJIT_SUCCESS;
715 }
716
717 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
718 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
719
sljit_emit_op0(struct sljit_compiler * compiler,sljit_si op)720 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op0(struct sljit_compiler *compiler, sljit_si op)
721 {
722 sljit_ub *inst;
723 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
724 sljit_si size;
725 #endif
726
727 CHECK_ERROR();
728 CHECK(check_sljit_emit_op0(compiler, op));
729
730 switch (GET_OPCODE(op)) {
731 case SLJIT_BREAKPOINT:
732 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
733 FAIL_IF(!inst);
734 INC_SIZE(1);
735 *inst = INT3;
736 break;
737 case SLJIT_NOP:
738 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
739 FAIL_IF(!inst);
740 INC_SIZE(1);
741 *inst = NOP;
742 break;
743 case SLJIT_LUMUL:
744 case SLJIT_LSMUL:
745 case SLJIT_LUDIV:
746 case SLJIT_LSDIV:
747 compiler->flags_saved = 0;
748 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
749 #ifdef _WIN64
750 SLJIT_COMPILE_ASSERT(
751 reg_map[SLJIT_R0] == 0
752 && reg_map[SLJIT_R1] == 2
753 && reg_map[TMP_REG1] > 7,
754 invalid_register_assignment_for_div_mul);
755 #else
756 SLJIT_COMPILE_ASSERT(
757 reg_map[SLJIT_R0] == 0
758 && reg_map[SLJIT_R1] < 7
759 && reg_map[TMP_REG1] == 2,
760 invalid_register_assignment_for_div_mul);
761 #endif
762 compiler->mode32 = op & SLJIT_INT_OP;
763 #endif
764
765 op = GET_OPCODE(op);
766 if (op == SLJIT_LUDIV) {
767 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
768 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
769 inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0);
770 #else
771 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
772 #endif
773 FAIL_IF(!inst);
774 *inst = XOR_r_rm;
775 }
776
777 if (op == SLJIT_LSDIV) {
778 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
779 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
780 #endif
781
782 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
783 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
784 FAIL_IF(!inst);
785 INC_SIZE(1);
786 *inst = CDQ;
787 #else
788 if (compiler->mode32) {
789 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
790 FAIL_IF(!inst);
791 INC_SIZE(1);
792 *inst = CDQ;
793 } else {
794 inst = (sljit_ub*)ensure_buf(compiler, 1 + 2);
795 FAIL_IF(!inst);
796 INC_SIZE(2);
797 *inst++ = REX_W;
798 *inst = CDQ;
799 }
800 #endif
801 }
802
803 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
804 inst = (sljit_ub*)ensure_buf(compiler, 1 + 2);
805 FAIL_IF(!inst);
806 INC_SIZE(2);
807 *inst++ = GROUP_F7;
808 *inst = MOD_REG | ((op >= SLJIT_LUDIV) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);
809 #else
810 #ifdef _WIN64
811 size = (!compiler->mode32 || op >= SLJIT_LUDIV) ? 3 : 2;
812 #else
813 size = (!compiler->mode32) ? 3 : 2;
814 #endif
815 inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
816 FAIL_IF(!inst);
817 INC_SIZE(size);
818 #ifdef _WIN64
819 if (!compiler->mode32)
820 *inst++ = REX_W | ((op >= SLJIT_LUDIV) ? REX_B : 0);
821 else if (op >= SLJIT_LUDIV)
822 *inst++ = REX_B;
823 *inst++ = GROUP_F7;
824 *inst = MOD_REG | ((op >= SLJIT_LUDIV) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);
825 #else
826 if (!compiler->mode32)
827 *inst++ = REX_W;
828 *inst++ = GROUP_F7;
829 *inst = MOD_REG | reg_map[SLJIT_R1];
830 #endif
831 #endif
832 switch (op) {
833 case SLJIT_LUMUL:
834 *inst |= MUL;
835 break;
836 case SLJIT_LSMUL:
837 *inst |= IMUL;
838 break;
839 case SLJIT_LUDIV:
840 *inst |= DIV;
841 break;
842 case SLJIT_LSDIV:
843 *inst |= IDIV;
844 break;
845 }
846 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
847 EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
848 #endif
849 break;
850 }
851
852 return SLJIT_SUCCESS;
853 }
854
855 #define ENCODE_PREFIX(prefix) \
856 do { \
857 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1); \
858 FAIL_IF(!inst); \
859 INC_SIZE(1); \
860 *inst = (prefix); \
861 } while (0)
862
emit_mov_byte(struct sljit_compiler * compiler,sljit_si sign,sljit_si dst,sljit_sw dstw,sljit_si src,sljit_sw srcw)863 static sljit_si emit_mov_byte(struct sljit_compiler *compiler, sljit_si sign,
864 sljit_si dst, sljit_sw dstw,
865 sljit_si src, sljit_sw srcw)
866 {
867 sljit_ub* inst;
868 sljit_si dst_r;
869 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
870 sljit_si work_r;
871 #endif
872
873 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
874 compiler->mode32 = 0;
875 #endif
876
877 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
878 return SLJIT_SUCCESS; /* Empty instruction. */
879
880 if (src & SLJIT_IMM) {
881 if (FAST_IS_REG(dst)) {
882 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
883 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
884 #else
885 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
886 FAIL_IF(!inst);
887 *inst = MOV_rm_i32;
888 return SLJIT_SUCCESS;
889 #endif
890 }
891 inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
892 FAIL_IF(!inst);
893 *inst = MOV_rm8_i8;
894 return SLJIT_SUCCESS;
895 }
896
897 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
898
899 if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) {
900 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
901 if (reg_map[src] >= 4) {
902 SLJIT_ASSERT(dst_r == TMP_REG1);
903 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
904 } else
905 dst_r = src;
906 #else
907 dst_r = src;
908 #endif
909 }
910 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
911 else if (FAST_IS_REG(src) && reg_map[src] >= 4) {
912 /* src, dst are registers. */
913 SLJIT_ASSERT(SLOW_IS_REG(dst));
914 if (reg_map[dst] < 4) {
915 if (dst != src)
916 EMIT_MOV(compiler, dst, 0, src, 0);
917 inst = emit_x86_instruction(compiler, 2, dst, 0, dst, 0);
918 FAIL_IF(!inst);
919 *inst++ = GROUP_0F;
920 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
921 }
922 else {
923 if (dst != src)
924 EMIT_MOV(compiler, dst, 0, src, 0);
925 if (sign) {
926 /* shl reg, 24 */
927 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
928 FAIL_IF(!inst);
929 *inst |= SHL;
930 /* sar reg, 24 */
931 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 24, dst, 0);
932 FAIL_IF(!inst);
933 *inst |= SAR;
934 }
935 else {
936 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
937 FAIL_IF(!inst);
938 *(inst + 1) |= AND;
939 }
940 }
941 return SLJIT_SUCCESS;
942 }
943 #endif
944 else {
945 /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
946 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
947 FAIL_IF(!inst);
948 *inst++ = GROUP_0F;
949 *inst = sign ? MOVSX_r_rm8 : MOVZX_r_rm8;
950 }
951
952 if (dst & SLJIT_MEM) {
953 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
954 if (dst_r == TMP_REG1) {
955 /* Find a non-used register, whose reg_map[src] < 4. */
956 if ((dst & REG_MASK) == SLJIT_R0) {
957 if ((dst & OFFS_REG_MASK) == TO_OFFS_REG(SLJIT_R1))
958 work_r = SLJIT_R2;
959 else
960 work_r = SLJIT_R1;
961 }
962 else {
963 if ((dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
964 work_r = SLJIT_R0;
965 else if ((dst & REG_MASK) == SLJIT_R1)
966 work_r = SLJIT_R2;
967 else
968 work_r = SLJIT_R1;
969 }
970
971 if (work_r == SLJIT_R0) {
972 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
973 }
974 else {
975 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
976 FAIL_IF(!inst);
977 *inst = XCHG_r_rm;
978 }
979
980 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst, dstw);
981 FAIL_IF(!inst);
982 *inst = MOV_rm8_r8;
983
984 if (work_r == SLJIT_R0) {
985 ENCODE_PREFIX(XCHG_EAX_r + reg_map[TMP_REG1]);
986 }
987 else {
988 inst = emit_x86_instruction(compiler, 1, work_r, 0, dst_r, 0);
989 FAIL_IF(!inst);
990 *inst = XCHG_r_rm;
991 }
992 }
993 else {
994 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
995 FAIL_IF(!inst);
996 *inst = MOV_rm8_r8;
997 }
998 #else
999 inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
1000 FAIL_IF(!inst);
1001 *inst = MOV_rm8_r8;
1002 #endif
1003 }
1004
1005 return SLJIT_SUCCESS;
1006 }
1007
emit_mov_half(struct sljit_compiler * compiler,sljit_si sign,sljit_si dst,sljit_sw dstw,sljit_si src,sljit_sw srcw)1008 static sljit_si emit_mov_half(struct sljit_compiler *compiler, sljit_si sign,
1009 sljit_si dst, sljit_sw dstw,
1010 sljit_si src, sljit_sw srcw)
1011 {
1012 sljit_ub* inst;
1013 sljit_si dst_r;
1014
1015 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1016 compiler->mode32 = 0;
1017 #endif
1018
1019 if (dst == SLJIT_UNUSED && !(src & SLJIT_MEM))
1020 return SLJIT_SUCCESS; /* Empty instruction. */
1021
1022 if (src & SLJIT_IMM) {
1023 if (FAST_IS_REG(dst)) {
1024 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1025 return emit_do_imm(compiler, MOV_r_i32 + reg_map[dst], srcw);
1026 #else
1027 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
1028 FAIL_IF(!inst);
1029 *inst = MOV_rm_i32;
1030 return SLJIT_SUCCESS;
1031 #endif
1032 }
1033 inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
1034 FAIL_IF(!inst);
1035 *inst = MOV_rm_i32;
1036 return SLJIT_SUCCESS;
1037 }
1038
1039 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1040
1041 if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
1042 dst_r = src;
1043 else {
1044 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src, srcw);
1045 FAIL_IF(!inst);
1046 *inst++ = GROUP_0F;
1047 *inst = sign ? MOVSX_r_rm16 : MOVZX_r_rm16;
1048 }
1049
1050 if (dst & SLJIT_MEM) {
1051 inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
1052 FAIL_IF(!inst);
1053 *inst = MOV_rm_r;
1054 }
1055
1056 return SLJIT_SUCCESS;
1057 }
1058
emit_unary(struct sljit_compiler * compiler,sljit_ub opcode,sljit_si dst,sljit_sw dstw,sljit_si src,sljit_sw srcw)1059 static sljit_si emit_unary(struct sljit_compiler *compiler, sljit_ub opcode,
1060 sljit_si dst, sljit_sw dstw,
1061 sljit_si src, sljit_sw srcw)
1062 {
1063 sljit_ub* inst;
1064
1065 if (dst == SLJIT_UNUSED) {
1066 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1067 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1068 FAIL_IF(!inst);
1069 *inst++ = GROUP_F7;
1070 *inst |= opcode;
1071 return SLJIT_SUCCESS;
1072 }
1073 if (dst == src && dstw == srcw) {
1074 /* Same input and output */
1075 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1076 FAIL_IF(!inst);
1077 *inst++ = GROUP_F7;
1078 *inst |= opcode;
1079 return SLJIT_SUCCESS;
1080 }
1081 if (FAST_IS_REG(dst)) {
1082 EMIT_MOV(compiler, dst, 0, src, srcw);
1083 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1084 FAIL_IF(!inst);
1085 *inst++ = GROUP_F7;
1086 *inst |= opcode;
1087 return SLJIT_SUCCESS;
1088 }
1089 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1090 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1091 FAIL_IF(!inst);
1092 *inst++ = GROUP_F7;
1093 *inst |= opcode;
1094 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1095 return SLJIT_SUCCESS;
1096 }
1097
emit_not_with_flags(struct sljit_compiler * compiler,sljit_si dst,sljit_sw dstw,sljit_si src,sljit_sw srcw)1098 static sljit_si emit_not_with_flags(struct sljit_compiler *compiler,
1099 sljit_si dst, sljit_sw dstw,
1100 sljit_si src, sljit_sw srcw)
1101 {
1102 sljit_ub* inst;
1103
1104 if (dst == SLJIT_UNUSED) {
1105 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1106 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1107 FAIL_IF(!inst);
1108 *inst++ = GROUP_F7;
1109 *inst |= NOT_rm;
1110 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1111 FAIL_IF(!inst);
1112 *inst = OR_r_rm;
1113 return SLJIT_SUCCESS;
1114 }
1115 if (FAST_IS_REG(dst)) {
1116 EMIT_MOV(compiler, dst, 0, src, srcw);
1117 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1118 FAIL_IF(!inst);
1119 *inst++ = GROUP_F7;
1120 *inst |= NOT_rm;
1121 inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
1122 FAIL_IF(!inst);
1123 *inst = OR_r_rm;
1124 return SLJIT_SUCCESS;
1125 }
1126 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1127 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1128 FAIL_IF(!inst);
1129 *inst++ = GROUP_F7;
1130 *inst |= NOT_rm;
1131 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1132 FAIL_IF(!inst);
1133 *inst = OR_r_rm;
1134 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1135 return SLJIT_SUCCESS;
1136 }
1137
emit_clz(struct sljit_compiler * compiler,sljit_si op_flags,sljit_si dst,sljit_sw dstw,sljit_si src,sljit_sw srcw)1138 static sljit_si emit_clz(struct sljit_compiler *compiler, sljit_si op_flags,
1139 sljit_si dst, sljit_sw dstw,
1140 sljit_si src, sljit_sw srcw)
1141 {
1142 sljit_ub* inst;
1143 sljit_si dst_r;
1144
1145 SLJIT_UNUSED_ARG(op_flags);
1146 if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
1147 /* Just set the zero flag. */
1148 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1149 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1150 FAIL_IF(!inst);
1151 *inst++ = GROUP_F7;
1152 *inst |= NOT_rm;
1153 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1154 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 31, TMP_REG1, 0);
1155 #else
1156 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, TMP_REG1, 0);
1157 #endif
1158 FAIL_IF(!inst);
1159 *inst |= SHR;
1160 return SLJIT_SUCCESS;
1161 }
1162
1163 if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
1164 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
1165 src = TMP_REG1;
1166 srcw = 0;
1167 }
1168
1169 inst = emit_x86_instruction(compiler, 2, TMP_REG1, 0, src, srcw);
1170 FAIL_IF(!inst);
1171 *inst++ = GROUP_0F;
1172 *inst = BSR_r_rm;
1173
1174 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1175 if (FAST_IS_REG(dst))
1176 dst_r = dst;
1177 else {
1178 /* Find an unused temporary register. */
1179 if ((dst & REG_MASK) != SLJIT_R0 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R0))
1180 dst_r = SLJIT_R0;
1181 else if ((dst & REG_MASK) != SLJIT_R1 && (dst & OFFS_REG_MASK) != TO_OFFS_REG(SLJIT_R1))
1182 dst_r = SLJIT_R1;
1183 else
1184 dst_r = SLJIT_R2;
1185 EMIT_MOV(compiler, dst, dstw, dst_r, 0);
1186 }
1187 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, 32 + 31);
1188 #else
1189 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
1190 compiler->mode32 = 0;
1191 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 64 + 63 : 32 + 31);
1192 compiler->mode32 = op_flags & SLJIT_INT_OP;
1193 #endif
1194
1195 if (cpu_has_cmov == -1)
1196 get_cpu_features();
1197
1198 if (cpu_has_cmov) {
1199 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
1200 FAIL_IF(!inst);
1201 *inst++ = GROUP_0F;
1202 *inst = CMOVNE_r_rm;
1203 } else {
1204 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1205 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1206 FAIL_IF(!inst);
1207 INC_SIZE(4);
1208
1209 *inst++ = JE_i8;
1210 *inst++ = 2;
1211 *inst++ = MOV_r_rm;
1212 *inst++ = MOD_REG | (reg_map[dst_r] << 3) | reg_map[TMP_REG1];
1213 #else
1214 inst = (sljit_ub*)ensure_buf(compiler, 1 + 5);
1215 FAIL_IF(!inst);
1216 INC_SIZE(5);
1217
1218 *inst++ = JE_i8;
1219 *inst++ = 3;
1220 *inst++ = REX_W | (reg_map[dst_r] >= 8 ? REX_R : 0) | (reg_map[TMP_REG1] >= 8 ? REX_B : 0);
1221 *inst++ = MOV_r_rm;
1222 *inst++ = MOD_REG | (reg_lmap[dst_r] << 3) | reg_lmap[TMP_REG1];
1223 #endif
1224 }
1225
1226 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1227 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
1228 #else
1229 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, !(op_flags & SLJIT_INT_OP) ? 63 : 31, dst_r, 0);
1230 #endif
1231 FAIL_IF(!inst);
1232 *(inst + 1) |= XOR;
1233
1234 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1235 if (dst & SLJIT_MEM) {
1236 inst = emit_x86_instruction(compiler, 1, dst_r, 0, dst, dstw);
1237 FAIL_IF(!inst);
1238 *inst = XCHG_r_rm;
1239 }
1240 #else
1241 if (dst & SLJIT_MEM)
1242 EMIT_MOV(compiler, dst, dstw, TMP_REG2, 0);
1243 #endif
1244 return SLJIT_SUCCESS;
1245 }
1246
sljit_emit_op1(struct sljit_compiler * compiler,sljit_si op,sljit_si dst,sljit_sw dstw,sljit_si src,sljit_sw srcw)1247 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op1(struct sljit_compiler *compiler, sljit_si op,
1248 sljit_si dst, sljit_sw dstw,
1249 sljit_si src, sljit_sw srcw)
1250 {
1251 sljit_ub* inst;
1252 sljit_si update = 0;
1253 sljit_si op_flags = GET_ALL_FLAGS(op);
1254 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1255 sljit_si dst_is_ereg = 0;
1256 sljit_si src_is_ereg = 0;
1257 #else
1258 # define src_is_ereg 0
1259 #endif
1260
1261 CHECK_ERROR();
1262 CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1263 ADJUST_LOCAL_OFFSET(dst, dstw);
1264 ADJUST_LOCAL_OFFSET(src, srcw);
1265
1266 CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
1267 CHECK_EXTRA_REGS(src, srcw, src_is_ereg = 1);
1268 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1269 compiler->mode32 = op_flags & SLJIT_INT_OP;
1270 #endif
1271
1272 op = GET_OPCODE(op);
1273 if (op >= SLJIT_MOV && op <= SLJIT_MOVU_P) {
1274 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1275 compiler->mode32 = 0;
1276 #endif
1277
1278 if (op_flags & SLJIT_INT_OP) {
1279 if (FAST_IS_REG(src) && src == dst) {
1280 if (!TYPE_CAST_NEEDED(op))
1281 return SLJIT_SUCCESS;
1282 }
1283 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1284 if (op == SLJIT_MOV_SI && (src & SLJIT_MEM))
1285 op = SLJIT_MOV_UI;
1286 if (op == SLJIT_MOVU_SI && (src & SLJIT_MEM))
1287 op = SLJIT_MOVU_UI;
1288 if (op == SLJIT_MOV_UI && (src & SLJIT_IMM))
1289 op = SLJIT_MOV_SI;
1290 if (op == SLJIT_MOVU_UI && (src & SLJIT_IMM))
1291 op = SLJIT_MOVU_SI;
1292 #endif
1293 }
1294
1295 SLJIT_COMPILE_ASSERT(SLJIT_MOV + 8 == SLJIT_MOVU, movu_offset);
1296 if (op >= SLJIT_MOVU) {
1297 update = 1;
1298 op -= 8;
1299 }
1300
1301 if (src & SLJIT_IMM) {
1302 switch (op) {
1303 case SLJIT_MOV_UB:
1304 srcw = (sljit_ub)srcw;
1305 break;
1306 case SLJIT_MOV_SB:
1307 srcw = (sljit_sb)srcw;
1308 break;
1309 case SLJIT_MOV_UH:
1310 srcw = (sljit_uh)srcw;
1311 break;
1312 case SLJIT_MOV_SH:
1313 srcw = (sljit_sh)srcw;
1314 break;
1315 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1316 case SLJIT_MOV_UI:
1317 srcw = (sljit_ui)srcw;
1318 break;
1319 case SLJIT_MOV_SI:
1320 srcw = (sljit_si)srcw;
1321 break;
1322 #endif
1323 }
1324 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1325 if (SLJIT_UNLIKELY(dst_is_ereg))
1326 return emit_mov(compiler, dst, dstw, src, srcw);
1327 #endif
1328 }
1329
1330 if (SLJIT_UNLIKELY(update) && (src & SLJIT_MEM) && !src_is_ereg && (src & REG_MASK) && (srcw != 0 || (src & OFFS_REG_MASK) != 0)) {
1331 inst = emit_x86_instruction(compiler, 1, src & REG_MASK, 0, src, srcw);
1332 FAIL_IF(!inst);
1333 *inst = LEA_r_m;
1334 src &= SLJIT_MEM | 0xf;
1335 srcw = 0;
1336 }
1337
1338 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1339 if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_UI || op == SLJIT_MOV_SI || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
1340 SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));
1341 dst = TMP_REG1;
1342 }
1343 #endif
1344
1345 switch (op) {
1346 case SLJIT_MOV:
1347 case SLJIT_MOV_P:
1348 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1349 case SLJIT_MOV_UI:
1350 case SLJIT_MOV_SI:
1351 #endif
1352 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
1353 break;
1354 case SLJIT_MOV_UB:
1355 FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));
1356 break;
1357 case SLJIT_MOV_SB:
1358 FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));
1359 break;
1360 case SLJIT_MOV_UH:
1361 FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));
1362 break;
1363 case SLJIT_MOV_SH:
1364 FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));
1365 break;
1366 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1367 case SLJIT_MOV_UI:
1368 FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));
1369 break;
1370 case SLJIT_MOV_SI:
1371 FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));
1372 break;
1373 #endif
1374 }
1375
1376 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1377 if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
1378 return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);
1379 #endif
1380
1381 if (SLJIT_UNLIKELY(update) && (dst & SLJIT_MEM) && (dst & REG_MASK) && (dstw != 0 || (dst & OFFS_REG_MASK) != 0)) {
1382 inst = emit_x86_instruction(compiler, 1, dst & REG_MASK, 0, dst, dstw);
1383 FAIL_IF(!inst);
1384 *inst = LEA_r_m;
1385 }
1386 return SLJIT_SUCCESS;
1387 }
1388
1389 if (SLJIT_UNLIKELY(GET_FLAGS(op_flags)))
1390 compiler->flags_saved = 0;
1391
1392 switch (op) {
1393 case SLJIT_NOT:
1394 if (SLJIT_UNLIKELY(op_flags & SLJIT_SET_E))
1395 return emit_not_with_flags(compiler, dst, dstw, src, srcw);
1396 return emit_unary(compiler, NOT_rm, dst, dstw, src, srcw);
1397
1398 case SLJIT_NEG:
1399 if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1400 FAIL_IF(emit_save_flags(compiler));
1401 return emit_unary(compiler, NEG_rm, dst, dstw, src, srcw);
1402
1403 case SLJIT_CLZ:
1404 if (SLJIT_UNLIKELY(op_flags & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
1405 FAIL_IF(emit_save_flags(compiler));
1406 return emit_clz(compiler, op_flags, dst, dstw, src, srcw);
1407 }
1408
1409 return SLJIT_SUCCESS;
1410
1411 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1412 # undef src_is_ereg
1413 #endif
1414 }
1415
1416 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1417
1418 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1419 if (IS_HALFWORD(immw) || compiler->mode32) { \
1420 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1421 FAIL_IF(!inst); \
1422 *(inst + 1) |= (op_imm); \
1423 } \
1424 else { \
1425 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, immw)); \
1426 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, arg, argw); \
1427 FAIL_IF(!inst); \
1428 *inst = (op_mr); \
1429 }
1430
1431 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1432 FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
1433
1434 #else
1435
1436 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
1437 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
1438 FAIL_IF(!inst); \
1439 *(inst + 1) |= (op_imm);
1440
1441 #define BINARY_EAX_IMM(op_eax_imm, immw) \
1442 FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
1443
1444 #endif
1445
emit_cum_binary(struct sljit_compiler * compiler,sljit_ub op_rm,sljit_ub op_mr,sljit_ub op_imm,sljit_ub op_eax_imm,sljit_si dst,sljit_sw dstw,sljit_si src1,sljit_sw src1w,sljit_si src2,sljit_sw src2w)1446 static sljit_si emit_cum_binary(struct sljit_compiler *compiler,
1447 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
1448 sljit_si dst, sljit_sw dstw,
1449 sljit_si src1, sljit_sw src1w,
1450 sljit_si src2, sljit_sw src2w)
1451 {
1452 sljit_ub* inst;
1453
1454 if (dst == SLJIT_UNUSED) {
1455 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1456 if (src2 & SLJIT_IMM) {
1457 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1458 }
1459 else {
1460 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1461 FAIL_IF(!inst);
1462 *inst = op_rm;
1463 }
1464 return SLJIT_SUCCESS;
1465 }
1466
1467 if (dst == src1 && dstw == src1w) {
1468 if (src2 & SLJIT_IMM) {
1469 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1470 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1471 #else
1472 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1473 #endif
1474 BINARY_EAX_IMM(op_eax_imm, src2w);
1475 }
1476 else {
1477 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1478 }
1479 }
1480 else if (FAST_IS_REG(dst)) {
1481 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1482 FAIL_IF(!inst);
1483 *inst = op_rm;
1484 }
1485 else if (FAST_IS_REG(src2)) {
1486 /* Special exception for sljit_emit_op_flags. */
1487 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1488 FAIL_IF(!inst);
1489 *inst = op_mr;
1490 }
1491 else {
1492 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1493 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1494 FAIL_IF(!inst);
1495 *inst = op_mr;
1496 }
1497 return SLJIT_SUCCESS;
1498 }
1499
1500 /* Only for cumulative operations. */
1501 if (dst == src2 && dstw == src2w) {
1502 if (src1 & SLJIT_IMM) {
1503 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1504 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1505 #else
1506 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) {
1507 #endif
1508 BINARY_EAX_IMM(op_eax_imm, src1w);
1509 }
1510 else {
1511 BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
1512 }
1513 }
1514 else if (FAST_IS_REG(dst)) {
1515 inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
1516 FAIL_IF(!inst);
1517 *inst = op_rm;
1518 }
1519 else if (FAST_IS_REG(src1)) {
1520 inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
1521 FAIL_IF(!inst);
1522 *inst = op_mr;
1523 }
1524 else {
1525 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1526 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1527 FAIL_IF(!inst);
1528 *inst = op_mr;
1529 }
1530 return SLJIT_SUCCESS;
1531 }
1532
1533 /* General version. */
1534 if (FAST_IS_REG(dst)) {
1535 EMIT_MOV(compiler, dst, 0, src1, src1w);
1536 if (src2 & SLJIT_IMM) {
1537 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1538 }
1539 else {
1540 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1541 FAIL_IF(!inst);
1542 *inst = op_rm;
1543 }
1544 }
1545 else {
1546 /* This version requires less memory writing. */
1547 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1548 if (src2 & SLJIT_IMM) {
1549 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1550 }
1551 else {
1552 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1553 FAIL_IF(!inst);
1554 *inst = op_rm;
1555 }
1556 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1557 }
1558
1559 return SLJIT_SUCCESS;
1560 }
1561
1562 static sljit_si emit_non_cum_binary(struct sljit_compiler *compiler,
1563 sljit_ub op_rm, sljit_ub op_mr, sljit_ub op_imm, sljit_ub op_eax_imm,
1564 sljit_si dst, sljit_sw dstw,
1565 sljit_si src1, sljit_sw src1w,
1566 sljit_si src2, sljit_sw src2w)
1567 {
1568 sljit_ub* inst;
1569
1570 if (dst == SLJIT_UNUSED) {
1571 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1572 if (src2 & SLJIT_IMM) {
1573 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1574 }
1575 else {
1576 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1577 FAIL_IF(!inst);
1578 *inst = op_rm;
1579 }
1580 return SLJIT_SUCCESS;
1581 }
1582
1583 if (dst == src1 && dstw == src1w) {
1584 if (src2 & SLJIT_IMM) {
1585 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1586 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1587 #else
1588 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1589 #endif
1590 BINARY_EAX_IMM(op_eax_imm, src2w);
1591 }
1592 else {
1593 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1594 }
1595 }
1596 else if (FAST_IS_REG(dst)) {
1597 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1598 FAIL_IF(!inst);
1599 *inst = op_rm;
1600 }
1601 else if (FAST_IS_REG(src2)) {
1602 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1603 FAIL_IF(!inst);
1604 *inst = op_mr;
1605 }
1606 else {
1607 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1608 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1609 FAIL_IF(!inst);
1610 *inst = op_mr;
1611 }
1612 return SLJIT_SUCCESS;
1613 }
1614
1615 /* General version. */
1616 if (FAST_IS_REG(dst) && dst != src2) {
1617 EMIT_MOV(compiler, dst, 0, src1, src1w);
1618 if (src2 & SLJIT_IMM) {
1619 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1620 }
1621 else {
1622 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1623 FAIL_IF(!inst);
1624 *inst = op_rm;
1625 }
1626 }
1627 else {
1628 /* This version requires less memory writing. */
1629 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1630 if (src2 & SLJIT_IMM) {
1631 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1632 }
1633 else {
1634 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1635 FAIL_IF(!inst);
1636 *inst = op_rm;
1637 }
1638 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1639 }
1640
1641 return SLJIT_SUCCESS;
1642 }
1643
1644 static sljit_si emit_mul(struct sljit_compiler *compiler,
1645 sljit_si dst, sljit_sw dstw,
1646 sljit_si src1, sljit_sw src1w,
1647 sljit_si src2, sljit_sw src2w)
1648 {
1649 sljit_ub* inst;
1650 sljit_si dst_r;
1651
1652 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1653
1654 /* Register destination. */
1655 if (dst_r == src1 && !(src2 & SLJIT_IMM)) {
1656 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1657 FAIL_IF(!inst);
1658 *inst++ = GROUP_0F;
1659 *inst = IMUL_r_rm;
1660 }
1661 else if (dst_r == src2 && !(src1 & SLJIT_IMM)) {
1662 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src1, src1w);
1663 FAIL_IF(!inst);
1664 *inst++ = GROUP_0F;
1665 *inst = IMUL_r_rm;
1666 }
1667 else if (src1 & SLJIT_IMM) {
1668 if (src2 & SLJIT_IMM) {
1669 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
1670 src2 = dst_r;
1671 src2w = 0;
1672 }
1673
1674 if (src1w <= 127 && src1w >= -128) {
1675 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1676 FAIL_IF(!inst);
1677 *inst = IMUL_r_rm_i8;
1678 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
1679 FAIL_IF(!inst);
1680 INC_SIZE(1);
1681 *inst = (sljit_sb)src1w;
1682 }
1683 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1684 else {
1685 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1686 FAIL_IF(!inst);
1687 *inst = IMUL_r_rm_i32;
1688 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1689 FAIL_IF(!inst);
1690 INC_SIZE(4);
1691 *(sljit_sw*)inst = src1w;
1692 }
1693 #else
1694 else if (IS_HALFWORD(src1w)) {
1695 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
1696 FAIL_IF(!inst);
1697 *inst = IMUL_r_rm_i32;
1698 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1699 FAIL_IF(!inst);
1700 INC_SIZE(4);
1701 *(sljit_si*)inst = (sljit_si)src1w;
1702 }
1703 else {
1704 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src1w);
1705 if (dst_r != src2)
1706 EMIT_MOV(compiler, dst_r, 0, src2, src2w);
1707 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1708 FAIL_IF(!inst);
1709 *inst++ = GROUP_0F;
1710 *inst = IMUL_r_rm;
1711 }
1712 #endif
1713 }
1714 else if (src2 & SLJIT_IMM) {
1715 /* Note: src1 is NOT immediate. */
1716
1717 if (src2w <= 127 && src2w >= -128) {
1718 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1719 FAIL_IF(!inst);
1720 *inst = IMUL_r_rm_i8;
1721 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1);
1722 FAIL_IF(!inst);
1723 INC_SIZE(1);
1724 *inst = (sljit_sb)src2w;
1725 }
1726 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1727 else {
1728 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1729 FAIL_IF(!inst);
1730 *inst = IMUL_r_rm_i32;
1731 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1732 FAIL_IF(!inst);
1733 INC_SIZE(4);
1734 *(sljit_sw*)inst = src2w;
1735 }
1736 #else
1737 else if (IS_HALFWORD(src2w)) {
1738 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
1739 FAIL_IF(!inst);
1740 *inst = IMUL_r_rm_i32;
1741 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4);
1742 FAIL_IF(!inst);
1743 INC_SIZE(4);
1744 *(sljit_si*)inst = (sljit_si)src2w;
1745 }
1746 else {
1747 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, src2w);
1748 if (dst_r != src1)
1749 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1750 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG2, 0);
1751 FAIL_IF(!inst);
1752 *inst++ = GROUP_0F;
1753 *inst = IMUL_r_rm;
1754 }
1755 #endif
1756 }
1757 else {
1758 /* Neither argument is immediate. */
1759 if (ADDRESSING_DEPENDS_ON(src2, dst_r))
1760 dst_r = TMP_REG1;
1761 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
1762 inst = emit_x86_instruction(compiler, 2, dst_r, 0, src2, src2w);
1763 FAIL_IF(!inst);
1764 *inst++ = GROUP_0F;
1765 *inst = IMUL_r_rm;
1766 }
1767
1768 if (dst_r == TMP_REG1)
1769 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1770
1771 return SLJIT_SUCCESS;
1772 }
1773
1774 static sljit_si emit_lea_binary(struct sljit_compiler *compiler, sljit_si keep_flags,
1775 sljit_si dst, sljit_sw dstw,
1776 sljit_si src1, sljit_sw src1w,
1777 sljit_si src2, sljit_sw src2w)
1778 {
1779 sljit_ub* inst;
1780 sljit_si dst_r, done = 0;
1781
1782 /* These cases better be left to handled by normal way. */
1783 if (!keep_flags) {
1784 if (dst == src1 && dstw == src1w)
1785 return SLJIT_ERR_UNSUPPORTED;
1786 if (dst == src2 && dstw == src2w)
1787 return SLJIT_ERR_UNSUPPORTED;
1788 }
1789
1790 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1791
1792 if (FAST_IS_REG(src1)) {
1793 if (FAST_IS_REG(src2)) {
1794 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
1795 FAIL_IF(!inst);
1796 *inst = LEA_r_m;
1797 done = 1;
1798 }
1799 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1800 if ((src2 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1801 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_si)src2w);
1802 #else
1803 if (src2 & SLJIT_IMM) {
1804 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
1805 #endif
1806 FAIL_IF(!inst);
1807 *inst = LEA_r_m;
1808 done = 1;
1809 }
1810 }
1811 else if (FAST_IS_REG(src2)) {
1812 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1813 if ((src1 & SLJIT_IMM) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1814 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_si)src1w);
1815 #else
1816 if (src1 & SLJIT_IMM) {
1817 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
1818 #endif
1819 FAIL_IF(!inst);
1820 *inst = LEA_r_m;
1821 done = 1;
1822 }
1823 }
1824
1825 if (done) {
1826 if (dst_r == TMP_REG1)
1827 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
1828 return SLJIT_SUCCESS;
1829 }
1830 return SLJIT_ERR_UNSUPPORTED;
1831 }
1832
1833 static sljit_si emit_cmp_binary(struct sljit_compiler *compiler,
1834 sljit_si src1, sljit_sw src1w,
1835 sljit_si src2, sljit_sw src2w)
1836 {
1837 sljit_ub* inst;
1838
1839 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1840 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1841 #else
1842 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1843 #endif
1844 BINARY_EAX_IMM(CMP_EAX_i32, src2w);
1845 return SLJIT_SUCCESS;
1846 }
1847
1848 if (FAST_IS_REG(src1)) {
1849 if (src2 & SLJIT_IMM) {
1850 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
1851 }
1852 else {
1853 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1854 FAIL_IF(!inst);
1855 *inst = CMP_r_rm;
1856 }
1857 return SLJIT_SUCCESS;
1858 }
1859
1860 if (FAST_IS_REG(src2) && !(src1 & SLJIT_IMM)) {
1861 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1862 FAIL_IF(!inst);
1863 *inst = CMP_rm_r;
1864 return SLJIT_SUCCESS;
1865 }
1866
1867 if (src2 & SLJIT_IMM) {
1868 if (src1 & SLJIT_IMM) {
1869 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1870 src1 = TMP_REG1;
1871 src1w = 0;
1872 }
1873 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);
1874 }
1875 else {
1876 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1877 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1878 FAIL_IF(!inst);
1879 *inst = CMP_r_rm;
1880 }
1881 return SLJIT_SUCCESS;
1882 }
1883
1884 static sljit_si emit_test_binary(struct sljit_compiler *compiler,
1885 sljit_si src1, sljit_sw src1w,
1886 sljit_si src2, sljit_sw src2w)
1887 {
1888 sljit_ub* inst;
1889
1890 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1891 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1892 #else
1893 if (src1 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src2w > 127 || src2w < -128)) {
1894 #endif
1895 BINARY_EAX_IMM(TEST_EAX_i32, src2w);
1896 return SLJIT_SUCCESS;
1897 }
1898
1899 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1900 if (src2 == SLJIT_R0 && (src2 & SLJIT_IMM) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1901 #else
1902 if (src2 == SLJIT_R0 && (src1 & SLJIT_IMM) && (src1w > 127 || src1w < -128)) {
1903 #endif
1904 BINARY_EAX_IMM(TEST_EAX_i32, src1w);
1905 return SLJIT_SUCCESS;
1906 }
1907
1908 if (FAST_IS_REG(src1)) {
1909 if (src2 & SLJIT_IMM) {
1910 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1911 if (IS_HALFWORD(src2w) || compiler->mode32) {
1912 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
1913 FAIL_IF(!inst);
1914 *inst = GROUP_F7;
1915 }
1916 else {
1917 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1918 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src1, 0);
1919 FAIL_IF(!inst);
1920 *inst = TEST_rm_r;
1921 }
1922 #else
1923 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, 0);
1924 FAIL_IF(!inst);
1925 *inst = GROUP_F7;
1926 #endif
1927 }
1928 else {
1929 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
1930 FAIL_IF(!inst);
1931 *inst = TEST_rm_r;
1932 }
1933 return SLJIT_SUCCESS;
1934 }
1935
1936 if (FAST_IS_REG(src2)) {
1937 if (src1 & SLJIT_IMM) {
1938 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1939 if (IS_HALFWORD(src1w) || compiler->mode32) {
1940 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, 0);
1941 FAIL_IF(!inst);
1942 *inst = GROUP_F7;
1943 }
1944 else {
1945 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
1946 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, src2, 0);
1947 FAIL_IF(!inst);
1948 *inst = TEST_rm_r;
1949 }
1950 #else
1951 inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, 0);
1952 FAIL_IF(!inst);
1953 *inst = GROUP_F7;
1954 #endif
1955 }
1956 else {
1957 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
1958 FAIL_IF(!inst);
1959 *inst = TEST_rm_r;
1960 }
1961 return SLJIT_SUCCESS;
1962 }
1963
1964 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1965 if (src2 & SLJIT_IMM) {
1966 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1967 if (IS_HALFWORD(src2w) || compiler->mode32) {
1968 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
1969 FAIL_IF(!inst);
1970 *inst = GROUP_F7;
1971 }
1972 else {
1973 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
1974 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0);
1975 FAIL_IF(!inst);
1976 *inst = TEST_rm_r;
1977 }
1978 #else
1979 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
1980 FAIL_IF(!inst);
1981 *inst = GROUP_F7;
1982 #endif
1983 }
1984 else {
1985 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1986 FAIL_IF(!inst);
1987 *inst = TEST_rm_r;
1988 }
1989 return SLJIT_SUCCESS;
1990 }
1991
1992 static sljit_si emit_shift(struct sljit_compiler *compiler,
1993 sljit_ub mode,
1994 sljit_si dst, sljit_sw dstw,
1995 sljit_si src1, sljit_sw src1w,
1996 sljit_si src2, sljit_sw src2w)
1997 {
1998 sljit_ub* inst;
1999
2000 if ((src2 & SLJIT_IMM) || (src2 == SLJIT_PREF_SHIFT_REG)) {
2001 if (dst == src1 && dstw == src1w) {
2002 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
2003 FAIL_IF(!inst);
2004 *inst |= mode;
2005 return SLJIT_SUCCESS;
2006 }
2007 if (dst == SLJIT_UNUSED) {
2008 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2009 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2010 FAIL_IF(!inst);
2011 *inst |= mode;
2012 return SLJIT_SUCCESS;
2013 }
2014 if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
2015 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2016 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2017 FAIL_IF(!inst);
2018 *inst |= mode;
2019 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2020 return SLJIT_SUCCESS;
2021 }
2022 if (FAST_IS_REG(dst)) {
2023 EMIT_MOV(compiler, dst, 0, src1, src1w);
2024 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
2025 FAIL_IF(!inst);
2026 *inst |= mode;
2027 return SLJIT_SUCCESS;
2028 }
2029
2030 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2031 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2032 FAIL_IF(!inst);
2033 *inst |= mode;
2034 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2035 return SLJIT_SUCCESS;
2036 }
2037
2038 if (dst == SLJIT_PREF_SHIFT_REG) {
2039 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2040 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2041 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2042 FAIL_IF(!inst);
2043 *inst |= mode;
2044 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2045 }
2046 else if (FAST_IS_REG(dst) && dst != src2 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
2047 if (src1 != dst)
2048 EMIT_MOV(compiler, dst, 0, src1, src1w);
2049 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2050 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2051 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
2052 FAIL_IF(!inst);
2053 *inst |= mode;
2054 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2055 }
2056 else {
2057 /* This case is really difficult, since ecx itself may used for
2058 addressing, and we must ensure to work even in that case. */
2059 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2060 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2061 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
2062 #else
2063 /* [esp+0] contains the flags. */
2064 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw), SLJIT_PREF_SHIFT_REG, 0);
2065 #endif
2066 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2067 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2068 FAIL_IF(!inst);
2069 *inst |= mode;
2070 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2071 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
2072 #else
2073 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_sw));
2074 #endif
2075 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2076 }
2077
2078 return SLJIT_SUCCESS;
2079 }
2080
2081 static sljit_si emit_shift_with_flags(struct sljit_compiler *compiler,
2082 sljit_ub mode, sljit_si set_flags,
2083 sljit_si dst, sljit_sw dstw,
2084 sljit_si src1, sljit_sw src1w,
2085 sljit_si src2, sljit_sw src2w)
2086 {
2087 /* The CPU does not set flags if the shift count is 0. */
2088 if (src2 & SLJIT_IMM) {
2089 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2090 if ((src2w & 0x3f) != 0 || (compiler->mode32 && (src2w & 0x1f) != 0))
2091 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2092 #else
2093 if ((src2w & 0x1f) != 0)
2094 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2095 #endif
2096 if (!set_flags)
2097 return emit_mov(compiler, dst, dstw, src1, src1w);
2098 /* OR dst, src, 0 */
2099 return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
2100 dst, dstw, src1, src1w, SLJIT_IMM, 0);
2101 }
2102
2103 if (!set_flags)
2104 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2105
2106 if (!FAST_IS_REG(dst))
2107 FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
2108
2109 FAIL_IF(emit_shift(compiler,mode, dst, dstw, src1, src1w, src2, src2w));
2110
2111 if (FAST_IS_REG(dst))
2112 return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
2113 return SLJIT_SUCCESS;
2114 }
2115
2116 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op2(struct sljit_compiler *compiler, sljit_si op,
2117 sljit_si dst, sljit_sw dstw,
2118 sljit_si src1, sljit_sw src1w,
2119 sljit_si src2, sljit_sw src2w)
2120 {
2121 CHECK_ERROR();
2122 CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2123 ADJUST_LOCAL_OFFSET(dst, dstw);
2124 ADJUST_LOCAL_OFFSET(src1, src1w);
2125 ADJUST_LOCAL_OFFSET(src2, src2w);
2126
2127 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2128 CHECK_EXTRA_REGS(src1, src1w, (void)0);
2129 CHECK_EXTRA_REGS(src2, src2w, (void)0);
2130 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2131 compiler->mode32 = op & SLJIT_INT_OP;
2132 #endif
2133
2134 if (GET_OPCODE(op) >= SLJIT_MUL) {
2135 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2136 compiler->flags_saved = 0;
2137 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2138 FAIL_IF(emit_save_flags(compiler));
2139 }
2140
2141 switch (GET_OPCODE(op)) {
2142 case SLJIT_ADD:
2143 if (!GET_FLAGS(op)) {
2144 if (emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
2145 return compiler->error;
2146 }
2147 else
2148 compiler->flags_saved = 0;
2149 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2150 FAIL_IF(emit_save_flags(compiler));
2151 return emit_cum_binary(compiler, ADD_r_rm, ADD_rm_r, ADD, ADD_EAX_i32,
2152 dst, dstw, src1, src1w, src2, src2w);
2153 case SLJIT_ADDC:
2154 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
2155 FAIL_IF(emit_restore_flags(compiler, 1));
2156 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
2157 FAIL_IF(emit_save_flags(compiler));
2158 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2159 compiler->flags_saved = 0;
2160 return emit_cum_binary(compiler, ADC_r_rm, ADC_rm_r, ADC, ADC_EAX_i32,
2161 dst, dstw, src1, src1w, src2, src2w);
2162 case SLJIT_SUB:
2163 if (!GET_FLAGS(op)) {
2164 if ((src2 & SLJIT_IMM) && emit_lea_binary(compiler, op & SLJIT_KEEP_FLAGS, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
2165 return compiler->error;
2166 }
2167 else
2168 compiler->flags_saved = 0;
2169 if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS) && !compiler->flags_saved)
2170 FAIL_IF(emit_save_flags(compiler));
2171 if (dst == SLJIT_UNUSED)
2172 return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
2173 return emit_non_cum_binary(compiler, SUB_r_rm, SUB_rm_r, SUB, SUB_EAX_i32,
2174 dst, dstw, src1, src1w, src2, src2w);
2175 case SLJIT_SUBC:
2176 if (SLJIT_UNLIKELY(compiler->flags_saved)) /* C flag must be restored. */
2177 FAIL_IF(emit_restore_flags(compiler, 1));
2178 else if (SLJIT_UNLIKELY(op & SLJIT_KEEP_FLAGS))
2179 FAIL_IF(emit_save_flags(compiler));
2180 if (SLJIT_UNLIKELY(GET_FLAGS(op)))
2181 compiler->flags_saved = 0;
2182 return emit_non_cum_binary(compiler, SBB_r_rm, SBB_rm_r, SBB, SBB_EAX_i32,
2183 dst, dstw, src1, src1w, src2, src2w);
2184 case SLJIT_MUL:
2185 return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
2186 case SLJIT_AND:
2187 if (dst == SLJIT_UNUSED)
2188 return emit_test_binary(compiler, src1, src1w, src2, src2w);
2189 return emit_cum_binary(compiler, AND_r_rm, AND_rm_r, AND, AND_EAX_i32,
2190 dst, dstw, src1, src1w, src2, src2w);
2191 case SLJIT_OR:
2192 return emit_cum_binary(compiler, OR_r_rm, OR_rm_r, OR, OR_EAX_i32,
2193 dst, dstw, src1, src1w, src2, src2w);
2194 case SLJIT_XOR:
2195 return emit_cum_binary(compiler, XOR_r_rm, XOR_rm_r, XOR, XOR_EAX_i32,
2196 dst, dstw, src1, src1w, src2, src2w);
2197 case SLJIT_SHL:
2198 return emit_shift_with_flags(compiler, SHL, GET_FLAGS(op),
2199 dst, dstw, src1, src1w, src2, src2w);
2200 case SLJIT_LSHR:
2201 return emit_shift_with_flags(compiler, SHR, GET_FLAGS(op),
2202 dst, dstw, src1, src1w, src2, src2w);
2203 case SLJIT_ASHR:
2204 return emit_shift_with_flags(compiler, SAR, GET_FLAGS(op),
2205 dst, dstw, src1, src1w, src2, src2w);
2206 }
2207
2208 return SLJIT_SUCCESS;
2209 }
2210
2211 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_register_index(sljit_si reg)
2212 {
2213 CHECK_REG_INDEX(check_sljit_get_register_index(reg));
2214 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2215 if (reg >= SLJIT_R3 && reg <= SLJIT_R6)
2216 return -1;
2217 #endif
2218 return reg_map[reg];
2219 }
2220
2221 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_float_register_index(sljit_si reg)
2222 {
2223 CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
2224 return reg;
2225 }
2226
2227 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_custom(struct sljit_compiler *compiler,
2228 void *instruction, sljit_si size)
2229 {
2230 sljit_ub *inst;
2231
2232 CHECK_ERROR();
2233 CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2234
2235 inst = (sljit_ub*)ensure_buf(compiler, 1 + size);
2236 FAIL_IF(!inst);
2237 INC_SIZE(size);
2238 SLJIT_MEMMOVE(inst, instruction, size);
2239 return SLJIT_SUCCESS;
2240 }
2241
2242 /* --------------------------------------------------------------------- */
2243 /* Floating point operators */
2244 /* --------------------------------------------------------------------- */
2245
2246 /* Alignment + 2 * 16 bytes. */
2247 static sljit_si sse2_data[3 + (4 + 4) * 2];
2248 static sljit_si *sse2_buffer;
2249
2250 static void init_compiler(void)
2251 {
2252 sse2_buffer = (sljit_si*)(((sljit_uw)sse2_data + 15) & ~0xf);
2253 /* Single precision constants. */
2254 sse2_buffer[0] = 0x80000000;
2255 sse2_buffer[4] = 0x7fffffff;
2256 /* Double precision constants. */
2257 sse2_buffer[8] = 0;
2258 sse2_buffer[9] = 0x80000000;
2259 sse2_buffer[12] = 0xffffffff;
2260 sse2_buffer[13] = 0x7fffffff;
2261 }
2262
2263 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_is_fpu_available(void)
2264 {
2265 #ifdef SLJIT_IS_FPU_AVAILABLE
2266 return SLJIT_IS_FPU_AVAILABLE;
2267 #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
2268 if (cpu_has_sse2 == -1)
2269 get_cpu_features();
2270 return cpu_has_sse2;
2271 #else /* SLJIT_DETECT_SSE2 */
2272 return 1;
2273 #endif /* SLJIT_DETECT_SSE2 */
2274 }
2275
2276 static sljit_si emit_sse2(struct sljit_compiler *compiler, sljit_ub opcode,
2277 sljit_si single, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w)
2278 {
2279 sljit_ub *inst;
2280
2281 inst = emit_x86_instruction(compiler, 2 | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2282 FAIL_IF(!inst);
2283 *inst++ = GROUP_0F;
2284 *inst = opcode;
2285 return SLJIT_SUCCESS;
2286 }
2287
2288 static sljit_si emit_sse2_logic(struct sljit_compiler *compiler, sljit_ub opcode,
2289 sljit_si pref66, sljit_si xmm1, sljit_si xmm2, sljit_sw xmm2w)
2290 {
2291 sljit_ub *inst;
2292
2293 inst = emit_x86_instruction(compiler, 2 | (pref66 ? EX86_PREF_66 : 0) | EX86_SSE2, xmm1, 0, xmm2, xmm2w);
2294 FAIL_IF(!inst);
2295 *inst++ = GROUP_0F;
2296 *inst = opcode;
2297 return SLJIT_SUCCESS;
2298 }
2299
2300 static SLJIT_INLINE sljit_si emit_sse2_load(struct sljit_compiler *compiler,
2301 sljit_si single, sljit_si dst, sljit_si src, sljit_sw srcw)
2302 {
2303 return emit_sse2(compiler, MOVSD_x_xm, single, dst, src, srcw);
2304 }
2305
2306 static SLJIT_INLINE sljit_si emit_sse2_store(struct sljit_compiler *compiler,
2307 sljit_si single, sljit_si dst, sljit_sw dstw, sljit_si src)
2308 {
2309 return emit_sse2(compiler, MOVSD_xm_x, single, src, dst, dstw);
2310 }
2311
2312 static SLJIT_INLINE sljit_si sljit_emit_fop1_convw_fromd(struct sljit_compiler *compiler, sljit_si op,
2313 sljit_si dst, sljit_sw dstw,
2314 sljit_si src, sljit_sw srcw)
2315 {
2316 sljit_si dst_r = SLOW_IS_REG(dst) ? dst : TMP_REG1;
2317 sljit_ub *inst;
2318
2319 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2320 if (GET_OPCODE(op) == SLJIT_CONVW_FROMD)
2321 compiler->mode32 = 0;
2322 #endif
2323
2324 inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_SINGLE_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP2, dst_r, 0, src, srcw);
2325 FAIL_IF(!inst);
2326 *inst++ = GROUP_0F;
2327 *inst = CVTTSD2SI_r_xm;
2328
2329 if (dst_r == TMP_REG1 && dst != SLJIT_UNUSED)
2330 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2331 return SLJIT_SUCCESS;
2332 }
2333
2334 static SLJIT_INLINE sljit_si sljit_emit_fop1_convd_fromw(struct sljit_compiler *compiler, sljit_si op,
2335 sljit_si dst, sljit_sw dstw,
2336 sljit_si src, sljit_sw srcw)
2337 {
2338 sljit_si dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2339 sljit_ub *inst;
2340
2341 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2342 if (GET_OPCODE(op) == SLJIT_CONVD_FROMW)
2343 compiler->mode32 = 0;
2344 #endif
2345
2346 if (src & SLJIT_IMM) {
2347 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2348 if (GET_OPCODE(op) == SLJIT_CONVD_FROMI)
2349 srcw = (sljit_si)srcw;
2350 #endif
2351 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
2352 src = TMP_REG1;
2353 srcw = 0;
2354 }
2355
2356 inst = emit_x86_instruction(compiler, 2 | ((op & SLJIT_SINGLE_OP) ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2_OP1, dst_r, 0, src, srcw);
2357 FAIL_IF(!inst);
2358 *inst++ = GROUP_0F;
2359 *inst = CVTSI2SD_x_rm;
2360
2361 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2362 compiler->mode32 = 1;
2363 #endif
2364 if (dst_r == TMP_FREG)
2365 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2366 return SLJIT_SUCCESS;
2367 }
2368
2369 static SLJIT_INLINE sljit_si sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_si op,
2370 sljit_si src1, sljit_sw src1w,
2371 sljit_si src2, sljit_sw src2w)
2372 {
2373 compiler->flags_saved = 0;
2374 if (!FAST_IS_REG(src1)) {
2375 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
2376 src1 = TMP_FREG;
2377 }
2378 return emit_sse2_logic(compiler, UCOMISD_x_xm, !(op & SLJIT_SINGLE_OP), src1, src2, src2w);
2379 }
2380
2381 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop1(struct sljit_compiler *compiler, sljit_si op,
2382 sljit_si dst, sljit_sw dstw,
2383 sljit_si src, sljit_sw srcw)
2384 {
2385 sljit_si dst_r;
2386
2387 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2388 compiler->mode32 = 1;
2389 #endif
2390
2391 CHECK_ERROR();
2392 SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2393
2394 if (GET_OPCODE(op) == SLJIT_DMOV) {
2395 if (FAST_IS_REG(dst))
2396 return emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst, src, srcw);
2397 if (FAST_IS_REG(src))
2398 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, src);
2399 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src, srcw));
2400 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2401 }
2402
2403 if (GET_OPCODE(op) == SLJIT_CONVD_FROMS) {
2404 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2405 if (FAST_IS_REG(src)) {
2406 /* We overwrite the high bits of source. From SLJIT point of view,
2407 this is not an issue.
2408 Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
2409 FAIL_IF(emit_sse2_logic(compiler, UNPCKLPD_x_xm, op & SLJIT_SINGLE_OP, src, src, 0));
2410 }
2411 else {
2412 FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_SINGLE_OP), TMP_FREG, src, srcw));
2413 src = TMP_FREG;
2414 }
2415
2416 FAIL_IF(emit_sse2_logic(compiler, CVTPD2PS_x_xm, op & SLJIT_SINGLE_OP, dst_r, src, 0));
2417 if (dst_r == TMP_FREG)
2418 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2419 return SLJIT_SUCCESS;
2420 }
2421
2422 if (SLOW_IS_REG(dst)) {
2423 dst_r = dst;
2424 if (dst != src)
2425 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw));
2426 }
2427 else {
2428 dst_r = TMP_FREG;
2429 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src, srcw));
2430 }
2431
2432 switch (GET_OPCODE(op)) {
2433 case SLJIT_DNEG:
2434 FAIL_IF(emit_sse2_logic(compiler, XORPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer : sse2_buffer + 8)));
2435 break;
2436
2437 case SLJIT_DABS:
2438 FAIL_IF(emit_sse2_logic(compiler, ANDPD_x_xm, 1, dst_r, SLJIT_MEM0(), (sljit_sw)(op & SLJIT_SINGLE_OP ? sse2_buffer + 4 : sse2_buffer + 12)));
2439 break;
2440 }
2441
2442 if (dst_r == TMP_FREG)
2443 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2444 return SLJIT_SUCCESS;
2445 }
2446
2447 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_fop2(struct sljit_compiler *compiler, sljit_si op,
2448 sljit_si dst, sljit_sw dstw,
2449 sljit_si src1, sljit_sw src1w,
2450 sljit_si src2, sljit_sw src2w)
2451 {
2452 sljit_si dst_r;
2453
2454 CHECK_ERROR();
2455 CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2456 ADJUST_LOCAL_OFFSET(dst, dstw);
2457 ADJUST_LOCAL_OFFSET(src1, src1w);
2458 ADJUST_LOCAL_OFFSET(src2, src2w);
2459
2460 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2461 compiler->mode32 = 1;
2462 #endif
2463
2464 if (FAST_IS_REG(dst)) {
2465 dst_r = dst;
2466 if (dst == src1)
2467 ; /* Do nothing here. */
2468 else if (dst == src2 && (op == SLJIT_DADD || op == SLJIT_DMUL)) {
2469 /* Swap arguments. */
2470 src2 = src1;
2471 src2w = src1w;
2472 }
2473 else if (dst != src2)
2474 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, dst_r, src1, src1w));
2475 else {
2476 dst_r = TMP_FREG;
2477 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
2478 }
2479 }
2480 else {
2481 dst_r = TMP_FREG;
2482 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_SINGLE_OP, TMP_FREG, src1, src1w));
2483 }
2484
2485 switch (GET_OPCODE(op)) {
2486 case SLJIT_DADD:
2487 FAIL_IF(emit_sse2(compiler, ADDSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2488 break;
2489
2490 case SLJIT_DSUB:
2491 FAIL_IF(emit_sse2(compiler, SUBSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2492 break;
2493
2494 case SLJIT_DMUL:
2495 FAIL_IF(emit_sse2(compiler, MULSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2496 break;
2497
2498 case SLJIT_DDIV:
2499 FAIL_IF(emit_sse2(compiler, DIVSD_x_xm, op & SLJIT_SINGLE_OP, dst_r, src2, src2w));
2500 break;
2501 }
2502
2503 if (dst_r == TMP_FREG)
2504 return emit_sse2_store(compiler, op & SLJIT_SINGLE_OP, dst, dstw, TMP_FREG);
2505 return SLJIT_SUCCESS;
2506 }
2507
2508 /* --------------------------------------------------------------------- */
2509 /* Conditional instructions */
2510 /* --------------------------------------------------------------------- */
2511
2512 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2513 {
2514 sljit_ub *inst;
2515 struct sljit_label *label;
2516
2517 CHECK_ERROR_PTR();
2518 CHECK_PTR(check_sljit_emit_label(compiler));
2519
2520 /* We should restore the flags before the label,
2521 since other taken jumps has their own flags as well. */
2522 if (SLJIT_UNLIKELY(compiler->flags_saved))
2523 PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2524
2525 if (compiler->last_label && compiler->last_label->size == compiler->size)
2526 return compiler->last_label;
2527
2528 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2529 PTR_FAIL_IF(!label);
2530 set_label(label, compiler);
2531
2532 inst = (sljit_ub*)ensure_buf(compiler, 2);
2533 PTR_FAIL_IF(!inst);
2534
2535 *inst++ = 0;
2536 *inst++ = 0;
2537
2538 return label;
2539 }
2540
2541 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_si type)
2542 {
2543 sljit_ub *inst;
2544 struct sljit_jump *jump;
2545
2546 CHECK_ERROR_PTR();
2547 CHECK_PTR(check_sljit_emit_jump(compiler, type));
2548
2549 if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2550 if ((type & 0xff) <= SLJIT_JUMP)
2551 PTR_FAIL_IF(emit_restore_flags(compiler, 0));
2552 compiler->flags_saved = 0;
2553 }
2554
2555 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2556 PTR_FAIL_IF_NULL(jump);
2557 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2558 type &= 0xff;
2559
2560 if (type >= SLJIT_CALL1)
2561 PTR_FAIL_IF(call_with_args(compiler, type));
2562
2563 /* Worst case size. */
2564 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2565 compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
2566 #else
2567 compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
2568 #endif
2569
2570 inst = (sljit_ub*)ensure_buf(compiler, 2);
2571 PTR_FAIL_IF_NULL(inst);
2572
2573 *inst++ = 0;
2574 *inst++ = type + 4;
2575 return jump;
2576 }
2577
2578 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_ijump(struct sljit_compiler *compiler, sljit_si type, sljit_si src, sljit_sw srcw)
2579 {
2580 sljit_ub *inst;
2581 struct sljit_jump *jump;
2582
2583 CHECK_ERROR();
2584 CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
2585 ADJUST_LOCAL_OFFSET(src, srcw);
2586
2587 CHECK_EXTRA_REGS(src, srcw, (void)0);
2588
2589 if (SLJIT_UNLIKELY(compiler->flags_saved)) {
2590 if (type <= SLJIT_JUMP)
2591 FAIL_IF(emit_restore_flags(compiler, 0));
2592 compiler->flags_saved = 0;
2593 }
2594
2595 if (type >= SLJIT_CALL1) {
2596 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2597 #if (defined SLJIT_X86_32_FASTCALL && SLJIT_X86_32_FASTCALL)
2598 if (src == SLJIT_R2) {
2599 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
2600 src = TMP_REG1;
2601 }
2602 if (src == SLJIT_MEM1(SLJIT_SP) && type >= SLJIT_CALL3)
2603 srcw += sizeof(sljit_sw);
2604 #endif
2605 #endif
2606 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && defined(_WIN64)
2607 if (src == SLJIT_R2) {
2608 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
2609 src = TMP_REG1;
2610 }
2611 #endif
2612 FAIL_IF(call_with_args(compiler, type));
2613 }
2614
2615 if (src == SLJIT_IMM) {
2616 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2617 FAIL_IF_NULL(jump);
2618 set_jump(jump, compiler, JUMP_ADDR);
2619 jump->u.target = srcw;
2620
2621 /* Worst case size. */
2622 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2623 compiler->size += 5;
2624 #else
2625 compiler->size += 10 + 3;
2626 #endif
2627
2628 inst = (sljit_ub*)ensure_buf(compiler, 2);
2629 FAIL_IF_NULL(inst);
2630
2631 *inst++ = 0;
2632 *inst++ = type + 4;
2633 }
2634 else {
2635 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2636 /* REX_W is not necessary (src is not immediate). */
2637 compiler->mode32 = 1;
2638 #endif
2639 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
2640 FAIL_IF(!inst);
2641 *inst++ = GROUP_FF;
2642 *inst |= (type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm;
2643 }
2644 return SLJIT_SUCCESS;
2645 }
2646
2647 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_si op,
2648 sljit_si dst, sljit_sw dstw,
2649 sljit_si src, sljit_sw srcw,
2650 sljit_si type)
2651 {
2652 sljit_ub *inst;
2653 sljit_ub cond_set = 0;
2654 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2655 sljit_si reg;
2656 #else
2657 /* CHECK_EXTRA_REGS migh overwrite these values. */
2658 sljit_si dst_save = dst;
2659 sljit_sw dstw_save = dstw;
2660 #endif
2661
2662 CHECK_ERROR();
2663 CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, src, srcw, type));
2664 SLJIT_UNUSED_ARG(srcw);
2665
2666 if (dst == SLJIT_UNUSED)
2667 return SLJIT_SUCCESS;
2668
2669 ADJUST_LOCAL_OFFSET(dst, dstw);
2670 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2671 if (SLJIT_UNLIKELY(compiler->flags_saved))
2672 FAIL_IF(emit_restore_flags(compiler, op & SLJIT_KEEP_FLAGS));
2673
2674 type &= 0xff;
2675 /* setcc = jcc + 0x10. */
2676 cond_set = get_jump_code(type) + 0x10;
2677
2678 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2679 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src) {
2680 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 3);
2681 FAIL_IF(!inst);
2682 INC_SIZE(4 + 3);
2683 /* Set low register to conditional flag. */
2684 *inst++ = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;
2685 *inst++ = GROUP_0F;
2686 *inst++ = cond_set;
2687 *inst++ = MOD_REG | reg_lmap[TMP_REG1];
2688 *inst++ = REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B);
2689 *inst++ = OR_rm8_r8;
2690 *inst++ = MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst];
2691 return SLJIT_SUCCESS;
2692 }
2693
2694 reg = (op == SLJIT_MOV && FAST_IS_REG(dst)) ? dst : TMP_REG1;
2695
2696 inst = (sljit_ub*)ensure_buf(compiler, 1 + 4 + 4);
2697 FAIL_IF(!inst);
2698 INC_SIZE(4 + 4);
2699 /* Set low register to conditional flag. */
2700 *inst++ = (reg_map[reg] <= 7) ? REX : REX_B;
2701 *inst++ = GROUP_0F;
2702 *inst++ = cond_set;
2703 *inst++ = MOD_REG | reg_lmap[reg];
2704 *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
2705 *inst++ = GROUP_0F;
2706 *inst++ = MOVZX_r_rm8;
2707 *inst = MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg];
2708
2709 if (reg != TMP_REG1)
2710 return SLJIT_SUCCESS;
2711
2712 if (GET_OPCODE(op) < SLJIT_ADD) {
2713 compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
2714 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2715 }
2716 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2717 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2718 compiler->skip_checks = 1;
2719 #endif
2720 return sljit_emit_op2(compiler, op, dst, dstw, dst, dstw, TMP_REG1, 0);
2721 #else /* SLJIT_CONFIG_X86_64 */
2722 if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) {
2723 if (reg_map[dst] <= 4) {
2724 /* Low byte is accessible. */
2725 inst = (sljit_ub*)ensure_buf(compiler, 1 + 3 + 3);
2726 FAIL_IF(!inst);
2727 INC_SIZE(3 + 3);
2728 /* Set low byte to conditional flag. */
2729 *inst++ = GROUP_0F;
2730 *inst++ = cond_set;
2731 *inst++ = MOD_REG | reg_map[dst];
2732
2733 *inst++ = GROUP_0F;
2734 *inst++ = MOVZX_r_rm8;
2735 *inst = MOD_REG | (reg_map[dst] << 3) | reg_map[dst];
2736 return SLJIT_SUCCESS;
2737 }
2738
2739 /* Low byte is not accessible. */
2740 if (cpu_has_cmov == -1)
2741 get_cpu_features();
2742
2743 if (cpu_has_cmov) {
2744 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
2745 /* a xor reg, reg operation would overwrite the flags. */
2746 EMIT_MOV(compiler, dst, 0, SLJIT_IMM, 0);
2747
2748 inst = (sljit_ub*)ensure_buf(compiler, 1 + 3);
2749 FAIL_IF(!inst);
2750 INC_SIZE(3);
2751
2752 *inst++ = GROUP_0F;
2753 /* cmovcc = setcc - 0x50. */
2754 *inst++ = cond_set - 0x50;
2755 *inst++ = MOD_REG | (reg_map[dst] << 3) | reg_map[TMP_REG1];
2756 return SLJIT_SUCCESS;
2757 }
2758
2759 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2760 FAIL_IF(!inst);
2761 INC_SIZE(1 + 3 + 3 + 1);
2762 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2763 /* Set al to conditional flag. */
2764 *inst++ = GROUP_0F;
2765 *inst++ = cond_set;
2766 *inst++ = MOD_REG | 0 /* eax */;
2767
2768 *inst++ = GROUP_0F;
2769 *inst++ = MOVZX_r_rm8;
2770 *inst++ = MOD_REG | (reg_map[dst] << 3) | 0 /* eax */;
2771 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2772 return SLJIT_SUCCESS;
2773 }
2774
2775 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && dst == src && reg_map[dst] <= 4) {
2776 SLJIT_COMPILE_ASSERT(reg_map[SLJIT_R0] == 0, scratch_reg1_must_be_eax);
2777 if (dst != SLJIT_R0) {
2778 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 2 + 1);
2779 FAIL_IF(!inst);
2780 INC_SIZE(1 + 3 + 2 + 1);
2781 /* Set low register to conditional flag. */
2782 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2783 *inst++ = GROUP_0F;
2784 *inst++ = cond_set;
2785 *inst++ = MOD_REG | 0 /* eax */;
2786 *inst++ = OR_rm8_r8;
2787 *inst++ = MOD_REG | (0 /* eax */ << 3) | reg_map[dst];
2788 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2789 }
2790 else {
2791 inst = (sljit_ub*)ensure_buf(compiler, 1 + 2 + 3 + 2 + 2);
2792 FAIL_IF(!inst);
2793 INC_SIZE(2 + 3 + 2 + 2);
2794 /* Set low register to conditional flag. */
2795 *inst++ = XCHG_r_rm;
2796 *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
2797 *inst++ = GROUP_0F;
2798 *inst++ = cond_set;
2799 *inst++ = MOD_REG | 1 /* ecx */;
2800 *inst++ = OR_rm8_r8;
2801 *inst++ = MOD_REG | (1 /* ecx */ << 3) | 0 /* eax */;
2802 *inst++ = XCHG_r_rm;
2803 *inst++ = MOD_REG | (1 /* ecx */ << 3) | reg_map[TMP_REG1];
2804 }
2805 return SLJIT_SUCCESS;
2806 }
2807
2808 /* Set TMP_REG1 to the bit. */
2809 inst = (sljit_ub*)ensure_buf(compiler, 1 + 1 + 3 + 3 + 1);
2810 FAIL_IF(!inst);
2811 INC_SIZE(1 + 3 + 3 + 1);
2812 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2813 /* Set al to conditional flag. */
2814 *inst++ = GROUP_0F;
2815 *inst++ = cond_set;
2816 *inst++ = MOD_REG | 0 /* eax */;
2817
2818 *inst++ = GROUP_0F;
2819 *inst++ = MOVZX_r_rm8;
2820 *inst++ = MOD_REG | (0 << 3) /* eax */ | 0 /* eax */;
2821
2822 *inst++ = XCHG_EAX_r + reg_map[TMP_REG1];
2823
2824 if (GET_OPCODE(op) < SLJIT_ADD)
2825 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2826
2827 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2828 || (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2829 compiler->skip_checks = 1;
2830 #endif
2831 return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
2832 #endif /* SLJIT_CONFIG_X86_64 */
2833 }
2834
2835 SLJIT_API_FUNC_ATTRIBUTE sljit_si sljit_get_local_base(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw offset)
2836 {
2837 CHECK_ERROR();
2838 CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
2839 ADJUST_LOCAL_OFFSET(dst, dstw);
2840
2841 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2842
2843 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2844 compiler->mode32 = 0;
2845 #endif
2846
2847 ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
2848
2849 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2850 if (NOT_HALFWORD(offset)) {
2851 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
2852 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
2853 SLJIT_ASSERT(emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
2854 return compiler->error;
2855 #else
2856 return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);
2857 #endif
2858 }
2859 #endif
2860
2861 if (offset != 0)
2862 return emit_lea_binary(compiler, SLJIT_KEEP_FLAGS, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
2863 return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);
2864 }
2865
2866 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_si dst, sljit_sw dstw, sljit_sw init_value)
2867 {
2868 sljit_ub *inst;
2869 struct sljit_const *const_;
2870 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2871 sljit_si reg;
2872 #endif
2873
2874 CHECK_ERROR_PTR();
2875 CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
2876 ADJUST_LOCAL_OFFSET(dst, dstw);
2877
2878 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2879
2880 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
2881 PTR_FAIL_IF(!const_);
2882 set_const(const_, compiler);
2883
2884 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2885 compiler->mode32 = 0;
2886 reg = SLOW_IS_REG(dst) ? dst : TMP_REG1;
2887
2888 if (emit_load_imm64(compiler, reg, init_value))
2889 return NULL;
2890 #else
2891 if (dst == SLJIT_UNUSED)
2892 dst = TMP_REG1;
2893
2894 if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
2895 return NULL;
2896 #endif
2897
2898 inst = (sljit_ub*)ensure_buf(compiler, 2);
2899 PTR_FAIL_IF(!inst);
2900
2901 *inst++ = 0;
2902 *inst++ = 1;
2903
2904 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2905 if (dst & SLJIT_MEM)
2906 if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
2907 return NULL;
2908 #endif
2909
2910 return const_;
2911 }
2912
2913 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_addr)
2914 {
2915 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2916 *(sljit_sw*)addr = new_addr - (addr + 4);
2917 #else
2918 *(sljit_uw*)addr = new_addr;
2919 #endif
2920 }
2921
2922 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant)
2923 {
2924 *(sljit_sw*)addr = new_constant;
2925 }
2926