1 /*
2 * Stack-less Just-In-Time compiler
3 *
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #if defined(__has_feature)
28 #if __has_feature(memory_sanitizer)
29 #include <sanitizer/msan_interface.h>
30 #endif /* __has_feature(memory_sanitizer) */
31 #endif /* defined(__has_feature) */
32
sljit_get_platform_name(void)33 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
34 {
35 return "x86" SLJIT_CPUINFO;
36 }
37
38 /*
39 32b register indexes:
40 0 - EAX
41 1 - ECX
42 2 - EDX
43 3 - EBX
44 4 - ESP
45 5 - EBP
46 6 - ESI
47 7 - EDI
48 */
49
50 /*
51 64b register indexes:
52 0 - RAX
53 1 - RCX
54 2 - RDX
55 3 - RBX
56 4 - RSP
57 5 - RBP
58 6 - RSI
59 7 - RDI
60 8 - R8 - From now on REX prefix is required
61 9 - R9
62 10 - R10
63 11 - R11
64 12 - R12
65 13 - R13
66 14 - R14
67 15 - R15
68 */
69
70 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
71 #define TMP_FREG (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
72
73 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
74
75
76 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 3] = {
77 0, 0, 2, 1, 0, 0, 0, 0, 0, 0, 5, 7, 6, 4, 3
78 };
79
80 static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = {
81 0, 1, 2, 3, 4, 5, 6, 7, 0
82 };
83
84 #define CHECK_EXTRA_REGS(p, w, do) \
85 if (p >= SLJIT_R3 && p <= SLJIT_S3) { \
86 w = (2 * SSIZE_OF(sw)) + ((p) - SLJIT_R3) * SSIZE_OF(sw); \
87 p = SLJIT_MEM1(SLJIT_SP); \
88 do; \
89 }
90
91 #else /* SLJIT_CONFIG_X86_32 */
92
93 #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
94
95 /* Note: r12 & 0x7 == 0b100, which decoded as SIB byte present
96 Note: avoid to use r12 and r13 for memory addressing
97 therefore r12 is better to be a higher saved register. */
98 #ifndef _WIN64
99 /* Args: rdi(=7), rsi(=6), rdx(=2), rcx(=1), r8, r9. Scratches: rax(=0), r10, r11 */
100 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
101 0, 0, 6, 7, 1, 8, 11, 10, 12, 5, 13, 14, 15, 3, 4, 2, 9
102 };
103 /* low-map. reg_map & 0x7. */
104 static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
105 0, 0, 6, 7, 1, 0, 3, 2, 4, 5, 5, 6, 7, 3, 4, 2, 1
106 };
107 #else
108 /* Args: rcx(=1), rdx(=2), r8, r9. Scratches: rax(=0), r10, r11 */
109 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
110 0, 0, 2, 8, 1, 11, 12, 5, 13, 14, 15, 7, 6, 3, 4, 9, 10
111 };
112 /* low-map. reg_map & 0x7. */
113 static const sljit_u8 reg_lmap[SLJIT_NUMBER_OF_REGISTERS + 4] = {
114 0, 0, 2, 0, 1, 3, 4, 5, 5, 6, 7, 7, 6, 3, 4, 1, 2
115 };
116 #endif
117
118 /* Args: xmm0-xmm3 */
119 static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = {
120 0, 0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 4
121 };
122 /* low-map. freg_map & 0x7. */
123 static const sljit_u8 freg_lmap[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = {
124 0, 0, 1, 2, 3, 5, 6, 7, 0, 1, 2, 3, 4, 5, 6, 7, 4
125 };
126
127 #define REX_W 0x48
128 #define REX_R 0x44
129 #define REX_X 0x42
130 #define REX_B 0x41
131 #define REX 0x40
132
133 #ifndef _WIN64
134 #define HALFWORD_MAX 0x7fffffffl
135 #define HALFWORD_MIN -0x80000000l
136 #else
137 #define HALFWORD_MAX 0x7fffffffll
138 #define HALFWORD_MIN -0x80000000ll
139 #endif
140
141 #define IS_HALFWORD(x) ((x) <= HALFWORD_MAX && (x) >= HALFWORD_MIN)
142 #define NOT_HALFWORD(x) ((x) > HALFWORD_MAX || (x) < HALFWORD_MIN)
143
144 #define CHECK_EXTRA_REGS(p, w, do)
145
146 #endif /* SLJIT_CONFIG_X86_32 */
147
148 #define U8(v) ((sljit_u8)(v))
149
150 /* Size flags for emit_x86_instruction: */
151 #define EX86_BIN_INS ((sljit_uw)0x000010)
152 #define EX86_SHIFT_INS ((sljit_uw)0x000020)
153 #define EX86_BYTE_ARG ((sljit_uw)0x000040)
154 #define EX86_HALF_ARG ((sljit_uw)0x000080)
155 /* Size flags for both emit_x86_instruction and emit_vex_instruction: */
156 #define EX86_REX ((sljit_uw)0x000100)
157 #define EX86_NO_REXW ((sljit_uw)0x000200)
158 #define EX86_PREF_66 ((sljit_uw)0x000400)
159 #define EX86_PREF_F2 ((sljit_uw)0x000800)
160 #define EX86_PREF_F3 ((sljit_uw)0x001000)
161 #define EX86_SSE2_OP1 ((sljit_uw)0x002000)
162 #define EX86_SSE2_OP2 ((sljit_uw)0x004000)
163 #define EX86_SSE2 (EX86_SSE2_OP1 | EX86_SSE2_OP2)
164 #define EX86_VEX_EXT ((sljit_uw)0x008000)
165 /* Op flags for emit_vex_instruction: */
166 #define VEX_OP_0F38 ((sljit_uw)0x010000)
167 #define VEX_OP_0F3A ((sljit_uw)0x020000)
168 #define VEX_SSE2_OPV ((sljit_uw)0x040000)
169 #define VEX_AUTO_W ((sljit_uw)0x080000)
170 #define VEX_W ((sljit_uw)0x100000)
171 #define VEX_256 ((sljit_uw)0x200000)
172
173 #define EX86_SELECT_66(op) (((op) & SLJIT_32) ? 0 : EX86_PREF_66)
174 #define EX86_SELECT_F2_F3(op) (((op) & SLJIT_32) ? EX86_PREF_F3 : EX86_PREF_F2)
175
176 /* --------------------------------------------------------------------- */
177 /* Instruction forms */
178 /* --------------------------------------------------------------------- */
179
180 #define ADD (/* BINARY */ 0 << 3)
181 #define ADD_EAX_i32 0x05
182 #define ADD_r_rm 0x03
183 #define ADD_rm_r 0x01
184 #define ADDSD_x_xm 0x58
185 #define ADC (/* BINARY */ 2 << 3)
186 #define ADC_EAX_i32 0x15
187 #define ADC_r_rm 0x13
188 #define ADC_rm_r 0x11
189 #define AND (/* BINARY */ 4 << 3)
190 #define AND_EAX_i32 0x25
191 #define AND_r_rm 0x23
192 #define AND_rm_r 0x21
193 #define ANDPD_x_xm 0x54
194 #define BSR_r_rm (/* GROUP_0F */ 0xbd)
195 #define BSF_r_rm (/* GROUP_0F */ 0xbc)
196 #define BSWAP_r (/* GROUP_0F */ 0xc8)
197 #define CALL_i32 0xe8
198 #define CALL_rm (/* GROUP_FF */ 2 << 3)
199 #define CDQ 0x99
200 #define CMOVE_r_rm (/* GROUP_0F */ 0x44)
201 #define CMP (/* BINARY */ 7 << 3)
202 #define CMP_EAX_i32 0x3d
203 #define CMP_r_rm 0x3b
204 #define CMP_rm_r 0x39
205 #define CMPS_x_xm 0xc2
206 #define CMPXCHG_rm_r 0xb1
207 #define CMPXCHG_rm8_r 0xb0
208 #define CVTPD2PS_x_xm 0x5a
209 #define CVTPS2PD_x_xm 0x5a
210 #define CVTSI2SD_x_rm 0x2a
211 #define CVTTSD2SI_r_xm 0x2c
212 #define DIV (/* GROUP_F7 */ 6 << 3)
213 #define DIVSD_x_xm 0x5e
214 #define EXTRACTPS_x_xm 0x17
215 #define FLDS 0xd9
216 #define FLDL 0xdd
217 #define FSTPS 0xd9
218 #define FSTPD 0xdd
219 #define INSERTPS_x_xm 0x21
220 #define INT3 0xcc
221 #define IDIV (/* GROUP_F7 */ 7 << 3)
222 #define IMUL (/* GROUP_F7 */ 5 << 3)
223 #define IMUL_r_rm (/* GROUP_0F */ 0xaf)
224 #define IMUL_r_rm_i8 0x6b
225 #define IMUL_r_rm_i32 0x69
226 #define JL_i8 0x7c
227 #define JE_i8 0x74
228 #define JNC_i8 0x73
229 #define JNE_i8 0x75
230 #define JMP_i8 0xeb
231 #define JMP_i32 0xe9
232 #define JMP_rm (/* GROUP_FF */ 4 << 3)
233 #define LEA_r_m 0x8d
234 #define LOOP_i8 0xe2
235 #define LZCNT_r_rm (/* GROUP_F3 */ /* GROUP_0F */ 0xbd)
236 #define MOV_r_rm 0x8b
237 #define MOV_r_i32 0xb8
238 #define MOV_rm_r 0x89
239 #define MOV_rm_i32 0xc7
240 #define MOV_rm8_i8 0xc6
241 #define MOV_rm8_r8 0x88
242 #define MOVAPS_x_xm 0x28
243 #define MOVAPS_xm_x 0x29
244 #define MOVD_x_rm 0x6e
245 #define MOVD_rm_x 0x7e
246 #define MOVDDUP_x_xm 0x12
247 #define MOVDQA_x_xm 0x6f
248 #define MOVDQA_xm_x 0x7f
249 #define MOVHLPS_x_x 0x12
250 #define MOVHPD_m_x 0x17
251 #define MOVHPD_x_m 0x16
252 #define MOVLHPS_x_x 0x16
253 #define MOVLPD_m_x 0x13
254 #define MOVLPD_x_m 0x12
255 #define MOVMSKPS_r_x (/* GROUP_0F */ 0x50)
256 #define MOVQ_x_xm (/* GROUP_0F */ 0x7e)
257 #define MOVSD_x_xm 0x10
258 #define MOVSD_xm_x 0x11
259 #define MOVSHDUP_x_xm 0x16
260 #define MOVSXD_r_rm 0x63
261 #define MOVSX_r_rm8 (/* GROUP_0F */ 0xbe)
262 #define MOVSX_r_rm16 (/* GROUP_0F */ 0xbf)
263 #define MOVUPS_x_xm 0x10
264 #define MOVZX_r_rm8 (/* GROUP_0F */ 0xb6)
265 #define MOVZX_r_rm16 (/* GROUP_0F */ 0xb7)
266 #define MUL (/* GROUP_F7 */ 4 << 3)
267 #define MULSD_x_xm 0x59
268 #define NEG_rm (/* GROUP_F7 */ 3 << 3)
269 #define NOP 0x90
270 #define NOT_rm (/* GROUP_F7 */ 2 << 3)
271 #define OR (/* BINARY */ 1 << 3)
272 #define OR_r_rm 0x0b
273 #define OR_EAX_i32 0x0d
274 #define OR_rm_r 0x09
275 #define OR_rm8_r8 0x08
276 #define ORPD_x_xm 0x56
277 #define PACKSSWB_x_xm (/* GROUP_0F */ 0x63)
278 #define PAND_x_xm 0xdb
279 #define PCMPEQD_x_xm 0x76
280 #define PINSRB_x_rm_i8 0x20
281 #define PINSRW_x_rm_i8 0xc4
282 #define PINSRD_x_rm_i8 0x22
283 #define PEXTRB_rm_x_i8 0x14
284 #define PEXTRW_rm_x_i8 0x15
285 #define PEXTRD_rm_x_i8 0x16
286 #define PMOVMSKB_r_x (/* GROUP_0F */ 0xd7)
287 #define PMOVSXBD_x_xm 0x21
288 #define PMOVSXBQ_x_xm 0x22
289 #define PMOVSXBW_x_xm 0x20
290 #define PMOVSXDQ_x_xm 0x25
291 #define PMOVSXWD_x_xm 0x23
292 #define PMOVSXWQ_x_xm 0x24
293 #define PMOVZXBD_x_xm 0x31
294 #define PMOVZXBQ_x_xm 0x32
295 #define PMOVZXBW_x_xm 0x30
296 #define PMOVZXDQ_x_xm 0x35
297 #define PMOVZXWD_x_xm 0x33
298 #define PMOVZXWQ_x_xm 0x34
299 #define POP_r 0x58
300 #define POP_rm 0x8f
301 #define POPF 0x9d
302 #define POR_x_xm 0xeb
303 #define PREFETCH 0x18
304 #define PSHUFB_x_xm 0x00
305 #define PSHUFD_x_xm 0x70
306 #define PSHUFLW_x_xm 0x70
307 #define PSRLDQ_x 0x73
308 #define PSLLD_x_i8 0x72
309 #define PSLLQ_x_i8 0x73
310 #define PUSH_i32 0x68
311 #define PUSH_r 0x50
312 #define PUSH_rm (/* GROUP_FF */ 6 << 3)
313 #define PUSHF 0x9c
314 #define PXOR_x_xm 0xef
315 #define ROL (/* SHIFT */ 0 << 3)
316 #define ROR (/* SHIFT */ 1 << 3)
317 #define RET_near 0xc3
318 #define RET_i16 0xc2
319 #define SBB (/* BINARY */ 3 << 3)
320 #define SBB_EAX_i32 0x1d
321 #define SBB_r_rm 0x1b
322 #define SBB_rm_r 0x19
323 #define SAR (/* SHIFT */ 7 << 3)
324 #define SHL (/* SHIFT */ 4 << 3)
325 #define SHLD (/* GROUP_0F */ 0xa5)
326 #define SHRD (/* GROUP_0F */ 0xad)
327 #define SHR (/* SHIFT */ 5 << 3)
328 #define SHUFPS_x_xm 0xc6
329 #define SUB (/* BINARY */ 5 << 3)
330 #define SUB_EAX_i32 0x2d
331 #define SUB_r_rm 0x2b
332 #define SUB_rm_r 0x29
333 #define SUBSD_x_xm 0x5c
334 #define TEST_EAX_i32 0xa9
335 #define TEST_rm_r 0x85
336 #define TZCNT_r_rm (/* GROUP_F3 */ /* GROUP_0F */ 0xbc)
337 #define UCOMISD_x_xm 0x2e
338 #define UNPCKLPD_x_xm 0x14
339 #define UNPCKLPS_x_xm 0x14
340 #define VBROADCASTSD_x_xm 0x19
341 #define VBROADCASTSS_x_xm 0x18
342 #define VEXTRACTF128_x_ym 0x19
343 #define VEXTRACTI128_x_ym 0x39
344 #define VINSERTF128_y_y_xm 0x18
345 #define VINSERTI128_y_y_xm 0x38
346 #define VPBROADCASTB_x_xm 0x78
347 #define VPBROADCASTD_x_xm 0x58
348 #define VPBROADCASTQ_x_xm 0x59
349 #define VPBROADCASTW_x_xm 0x79
350 #define VPERMPD_y_ym 0x01
351 #define VPERMQ_y_ym 0x00
352 #define XCHG_EAX_r 0x90
353 #define XCHG_r_rm 0x87
354 #define XOR (/* BINARY */ 6 << 3)
355 #define XOR_EAX_i32 0x35
356 #define XOR_r_rm 0x33
357 #define XOR_rm_r 0x31
358 #define XORPD_x_xm 0x57
359
360 #define GROUP_0F 0x0f
361 #define GROUP_66 0x66
362 #define GROUP_F3 0xf3
363 #define GROUP_F7 0xf7
364 #define GROUP_FF 0xff
365 #define GROUP_BINARY_81 0x81
366 #define GROUP_BINARY_83 0x83
367 #define GROUP_SHIFT_1 0xd1
368 #define GROUP_SHIFT_N 0xc1
369 #define GROUP_SHIFT_CL 0xd3
370 #define GROUP_LOCK 0xf0
371
372 #define MOD_REG 0xc0
373 #define MOD_DISP8 0x40
374
375 #define INC_SIZE(s) (*inst++ = U8(s), compiler->size += (s))
376
377 #define PUSH_REG(r) (*inst++ = U8(PUSH_r + (r)))
378 #define POP_REG(r) (*inst++ = U8(POP_r + (r)))
379 #define RET() (*inst++ = RET_near)
380 #define RET_I16(n) (*inst++ = RET_i16, *inst++ = U8(n), *inst++ = 0)
381
382 /* Multithreading does not affect these static variables, since they store
383 built-in CPU features. Therefore they can be overwritten by different threads
384 if they detect the CPU features in the same time. */
385 #define CPU_FEATURE_DETECTED 0x001
386 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
387 #define CPU_FEATURE_SSE2 0x002
388 #endif
389 #define CPU_FEATURE_SSE41 0x004
390 #define CPU_FEATURE_LZCNT 0x008
391 #define CPU_FEATURE_TZCNT 0x010
392 #define CPU_FEATURE_CMOV 0x020
393 #define CPU_FEATURE_AVX 0x040
394 #define CPU_FEATURE_AVX2 0x080
395
396 static sljit_u32 cpu_feature_list = 0;
397
398 #ifdef _WIN32_WCE
399 #include <cmnintrin.h>
400 #elif defined(_MSC_VER) && _MSC_VER >= 1400
401 #include <intrin.h>
402 #endif
403
404 /******************************************************/
405 /* Unaligned-store functions */
406 /******************************************************/
407
sljit_unaligned_store_s16(void * addr,sljit_s16 value)408 static SLJIT_INLINE void sljit_unaligned_store_s16(void *addr, sljit_s16 value)
409 {
410 SLJIT_MEMCPY(addr, &value, sizeof(value));
411 }
412
sljit_unaligned_store_s32(void * addr,sljit_s32 value)413 static SLJIT_INLINE void sljit_unaligned_store_s32(void *addr, sljit_s32 value)
414 {
415 SLJIT_MEMCPY(addr, &value, sizeof(value));
416 }
417
sljit_unaligned_store_sw(void * addr,sljit_sw value)418 static SLJIT_INLINE void sljit_unaligned_store_sw(void *addr, sljit_sw value)
419 {
420 SLJIT_MEMCPY(addr, &value, sizeof(value));
421 }
422
423 /******************************************************/
424 /* Utility functions */
425 /******************************************************/
426
execute_cpu_id(sljit_u32 info[4])427 static void execute_cpu_id(sljit_u32 info[4])
428 {
429 #if defined(_MSC_VER) && _MSC_VER >= 1400
430
431 __cpuidex((int*)info, (int)info[0], (int)info[2]);
432
433 #elif defined(__GNUC__) || defined(__INTEL_COMPILER) || defined(__SUNPRO_C) || defined(__TINYC__)
434
435 /* AT&T syntax. */
436 __asm__ (
437 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
438 "movl %0, %%esi\n"
439 "movl (%%esi), %%eax\n"
440 "movl 8(%%esi), %%ecx\n"
441 "pushl %%ebx\n"
442 "cpuid\n"
443 "movl %%eax, (%%esi)\n"
444 "movl %%ebx, 4(%%esi)\n"
445 "popl %%ebx\n"
446 "movl %%ecx, 8(%%esi)\n"
447 "movl %%edx, 12(%%esi)\n"
448 #else /* !SLJIT_CONFIG_X86_32 */
449 "movq %0, %%rsi\n"
450 "movl (%%rsi), %%eax\n"
451 "movl 8(%%rsi), %%ecx\n"
452 "cpuid\n"
453 "movl %%eax, (%%rsi)\n"
454 "movl %%ebx, 4(%%rsi)\n"
455 "movl %%ecx, 8(%%rsi)\n"
456 "movl %%edx, 12(%%rsi)\n"
457 #endif /* SLJIT_CONFIG_X86_32 */
458 :
459 : "r" (info)
460 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
461 : "memory", "eax", "ecx", "edx", "esi"
462 #else /* !SLJIT_CONFIG_X86_32 */
463 : "memory", "rax", "rbx", "rcx", "rdx", "rsi"
464 #endif /* SLJIT_CONFIG_X86_32 */
465 );
466
467 #else /* _MSC_VER < 1400 */
468
469 /* Intel syntax. */
470 __asm {
471 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
472 mov esi, info
473 mov eax, [esi]
474 mov ecx, [esi + 8]
475 cpuid
476 mov [esi], eax
477 mov [esi + 4], ebx
478 mov [esi + 8], ecx
479 mov [esi + 12], edx
480 #else /* !SLJIT_CONFIG_X86_32 */
481 mov rsi, info
482 mov eax, [rsi]
483 mov ecx, [rsi + 8]
484 cpuid
485 mov [rsi], eax
486 mov [rsi + 4], ebx
487 mov [rsi + 8], ecx
488 mov [rsi + 12], edx
489 #endif /* SLJIT_CONFIG_X86_32 */
490 }
491
492 #endif /* _MSC_VER && _MSC_VER >= 1400 */
493
494 #if defined(__has_feature)
495 #if __has_feature(memory_sanitizer)
496 __msan_unpoison(info, 4 * sizeof(sljit_u32));
497 #endif /* __has_feature(memory_sanitizer) */
498 #endif /* defined(__has_feature) */
499
500 }
501
get_cpu_features(void)502 static void get_cpu_features(void)
503 {
504 sljit_u32 feature_list = CPU_FEATURE_DETECTED;
505 sljit_u32 info[4];
506 sljit_u32 max_id;
507
508 info[0] = 0;
509 execute_cpu_id(info);
510 max_id = info[0];
511
512 if (max_id >= 7) {
513 info[0] = 7;
514 info[2] = 0;
515 execute_cpu_id(info);
516
517 if (info[1] & 0x8)
518 feature_list |= CPU_FEATURE_TZCNT;
519 if (info[1] & 0x20)
520 feature_list |= CPU_FEATURE_AVX2;
521 }
522
523 if (max_id >= 1) {
524 info[0] = 1;
525 execute_cpu_id(info);
526
527 if (info[2] & 0x80000)
528 feature_list |= CPU_FEATURE_SSE41;
529 if (info[2] & 0x10000000)
530 feature_list |= CPU_FEATURE_AVX;
531 #if (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
532 if (info[3] & 0x4000000)
533 feature_list |= CPU_FEATURE_SSE2;
534 #endif
535 if (info[3] & 0x8000)
536 feature_list |= CPU_FEATURE_CMOV;
537 }
538
539 info[0] = 0x80000001;
540 info[2] = 0; /* Silences an incorrect compiler warning. */
541 execute_cpu_id(info);
542
543 if (info[2] & 0x20)
544 feature_list |= CPU_FEATURE_LZCNT;
545
546 cpu_feature_list = feature_list;
547 }
548
get_jump_code(sljit_uw type)549 static sljit_u8 get_jump_code(sljit_uw type)
550 {
551 switch (type) {
552 case SLJIT_EQUAL:
553 case SLJIT_ATOMIC_STORED:
554 case SLJIT_F_EQUAL:
555 case SLJIT_UNORDERED_OR_EQUAL:
556 return 0x84 /* je */;
557
558 case SLJIT_NOT_EQUAL:
559 case SLJIT_ATOMIC_NOT_STORED:
560 case SLJIT_F_NOT_EQUAL:
561 case SLJIT_ORDERED_NOT_EQUAL:
562 return 0x85 /* jne */;
563
564 case SLJIT_LESS:
565 case SLJIT_CARRY:
566 case SLJIT_F_LESS:
567 case SLJIT_UNORDERED_OR_LESS:
568 case SLJIT_UNORDERED_OR_GREATER:
569 return 0x82 /* jc */;
570
571 case SLJIT_GREATER_EQUAL:
572 case SLJIT_NOT_CARRY:
573 case SLJIT_F_GREATER_EQUAL:
574 case SLJIT_ORDERED_GREATER_EQUAL:
575 case SLJIT_ORDERED_LESS_EQUAL:
576 return 0x83 /* jae */;
577
578 case SLJIT_GREATER:
579 case SLJIT_F_GREATER:
580 case SLJIT_ORDERED_LESS:
581 case SLJIT_ORDERED_GREATER:
582 return 0x87 /* jnbe */;
583
584 case SLJIT_LESS_EQUAL:
585 case SLJIT_F_LESS_EQUAL:
586 case SLJIT_UNORDERED_OR_GREATER_EQUAL:
587 case SLJIT_UNORDERED_OR_LESS_EQUAL:
588 return 0x86 /* jbe */;
589
590 case SLJIT_SIG_LESS:
591 return 0x8c /* jl */;
592
593 case SLJIT_SIG_GREATER_EQUAL:
594 return 0x8d /* jnl */;
595
596 case SLJIT_SIG_GREATER:
597 return 0x8f /* jnle */;
598
599 case SLJIT_SIG_LESS_EQUAL:
600 return 0x8e /* jle */;
601
602 case SLJIT_OVERFLOW:
603 return 0x80 /* jo */;
604
605 case SLJIT_NOT_OVERFLOW:
606 return 0x81 /* jno */;
607
608 case SLJIT_UNORDERED:
609 case SLJIT_ORDERED_EQUAL: /* NaN. */
610 return 0x8a /* jp */;
611
612 case SLJIT_ORDERED:
613 case SLJIT_UNORDERED_OR_NOT_EQUAL: /* Not NaN. */
614 return 0x8b /* jpo */;
615 }
616 return 0;
617 }
618
619 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
620 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_sw executable_offset);
621 #else
622 static sljit_u8* generate_far_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr);
623 static sljit_u8* generate_put_label_code(struct sljit_put_label *put_label, sljit_u8 *code_ptr, sljit_uw max_label);
624 #endif
625
generate_near_jump_code(struct sljit_jump * jump,sljit_u8 * code_ptr,sljit_u8 * code,sljit_sw executable_offset)626 static sljit_u8* generate_near_jump_code(struct sljit_jump *jump, sljit_u8 *code_ptr, sljit_u8 *code, sljit_sw executable_offset)
627 {
628 sljit_uw type = jump->flags >> TYPE_SHIFT;
629 sljit_s32 short_jump;
630 sljit_uw label_addr;
631
632 if (jump->flags & JUMP_LABEL)
633 label_addr = (sljit_uw)(code + jump->u.label->size);
634 else
635 label_addr = jump->u.target - (sljit_uw)executable_offset;
636
637 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
638 if ((sljit_sw)(label_addr - (jump->addr + 2)) > HALFWORD_MAX || (sljit_sw)(label_addr - (jump->addr + 6)) < HALFWORD_MIN)
639 return generate_far_jump_code(jump, code_ptr);
640 #endif
641
642 short_jump = (sljit_sw)(label_addr - (jump->addr + 2)) >= -128 && (sljit_sw)(label_addr - (jump->addr + 2)) <= 127;
643
644 if (type == SLJIT_JUMP) {
645 if (short_jump)
646 *code_ptr++ = JMP_i8;
647 else
648 *code_ptr++ = JMP_i32;
649 jump->addr++;
650 }
651 else if (type >= SLJIT_FAST_CALL) {
652 short_jump = 0;
653 *code_ptr++ = CALL_i32;
654 jump->addr++;
655 }
656 else if (short_jump) {
657 *code_ptr++ = U8(get_jump_code(type) - 0x10);
658 jump->addr++;
659 }
660 else {
661 *code_ptr++ = GROUP_0F;
662 *code_ptr++ = get_jump_code(type);
663 jump->addr += 2;
664 }
665
666 if (short_jump) {
667 jump->flags |= PATCH_MB;
668 code_ptr += sizeof(sljit_s8);
669 } else {
670 jump->flags |= PATCH_MW;
671 code_ptr += sizeof(sljit_s32);
672 }
673
674 return code_ptr;
675 }
676
sljit_generate_code(struct sljit_compiler * compiler)677 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
678 {
679 struct sljit_memory_fragment *buf;
680 sljit_u8 *code;
681 sljit_u8 *code_ptr;
682 sljit_u8 *buf_ptr;
683 sljit_u8 *buf_end;
684 sljit_u8 len;
685 sljit_sw executable_offset;
686 sljit_uw jump_addr;
687
688 struct sljit_label *label;
689 struct sljit_jump *jump;
690 struct sljit_const *const_;
691 struct sljit_put_label *put_label;
692
693 CHECK_ERROR_PTR();
694 CHECK_PTR(check_sljit_generate_code(compiler));
695 reverse_buf(compiler);
696
697 /* Second code generation pass. */
698 code = (sljit_u8*)SLJIT_MALLOC_EXEC(compiler->size, compiler->exec_allocator_data);
699 PTR_FAIL_WITH_EXEC_IF(code);
700 buf = compiler->buf;
701
702 code_ptr = code;
703 label = compiler->labels;
704 jump = compiler->jumps;
705 const_ = compiler->consts;
706 put_label = compiler->put_labels;
707 executable_offset = SLJIT_EXEC_OFFSET(code);
708
709 do {
710 buf_ptr = buf->memory;
711 buf_end = buf_ptr + buf->used_size;
712 do {
713 len = *buf_ptr++;
714 if (len > 0) {
715 /* The code is already generated. */
716 SLJIT_MEMCPY(code_ptr, buf_ptr, len);
717 code_ptr += len;
718 buf_ptr += len;
719 }
720 else {
721 switch (*buf_ptr) {
722 case 0:
723 label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
724 label->size = (sljit_uw)(code_ptr - code);
725 label = label->next;
726 break;
727 case 1:
728 jump->addr = (sljit_uw)code_ptr;
729 if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
730 code_ptr = generate_near_jump_code(jump, code_ptr, code, executable_offset);
731 else {
732 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
733 code_ptr = generate_far_jump_code(jump, code_ptr, executable_offset);
734 #else
735 code_ptr = generate_far_jump_code(jump, code_ptr);
736 #endif
737 }
738 jump = jump->next;
739 break;
740 case 2:
741 const_->addr = ((sljit_uw)code_ptr) - sizeof(sljit_sw);
742 const_ = const_->next;
743 break;
744 default:
745 SLJIT_ASSERT(*buf_ptr == 3);
746 SLJIT_ASSERT(put_label->label);
747 put_label->addr = (sljit_uw)code_ptr;
748 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
749 code_ptr = generate_put_label_code(put_label, code_ptr, (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size);
750 #endif
751 put_label = put_label->next;
752 break;
753 }
754 buf_ptr++;
755 }
756 } while (buf_ptr < buf_end);
757 SLJIT_ASSERT(buf_ptr == buf_end);
758 buf = buf->next;
759 } while (buf);
760
761 SLJIT_ASSERT(!label);
762 SLJIT_ASSERT(!jump);
763 SLJIT_ASSERT(!const_);
764 SLJIT_ASSERT(!put_label);
765 SLJIT_ASSERT(code_ptr <= code + compiler->size);
766
767 jump = compiler->jumps;
768 while (jump) {
769 if (jump->flags & (PATCH_MB | PATCH_MW)) {
770 if (jump->flags & JUMP_LABEL)
771 jump_addr = jump->u.label->addr;
772 else
773 jump_addr = jump->u.target;
774
775 jump_addr -= jump->addr + (sljit_uw)executable_offset;
776
777 if (jump->flags & PATCH_MB) {
778 jump_addr -= sizeof(sljit_s8);
779 SLJIT_ASSERT((sljit_sw)jump_addr >= -128 && (sljit_sw)jump_addr <= 127);
780 *(sljit_u8*)jump->addr = U8(jump_addr);
781 } else {
782 jump_addr -= sizeof(sljit_s32);
783 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
784 sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)jump_addr);
785 #else
786 SLJIT_ASSERT((sljit_sw)jump_addr >= HALFWORD_MIN && (sljit_sw)jump_addr <= HALFWORD_MAX);
787 sljit_unaligned_store_s32((void*)jump->addr, (sljit_s32)jump_addr);
788 #endif
789 }
790 }
791 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
792 else if (jump->flags & PATCH_MD) {
793 SLJIT_ASSERT(jump->flags & JUMP_LABEL);
794 sljit_unaligned_store_sw((void*)jump->addr, (sljit_sw)jump->u.label->addr);
795 }
796 #endif
797
798 jump = jump->next;
799 }
800
801 put_label = compiler->put_labels;
802 while (put_label) {
803 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
804 sljit_unaligned_store_sw((void*)(put_label->addr - sizeof(sljit_sw)), (sljit_sw)put_label->label->addr);
805 #else
806 if (put_label->flags & PATCH_MD) {
807 SLJIT_ASSERT(put_label->label->addr > HALFWORD_MAX);
808 sljit_unaligned_store_sw((void*)(put_label->addr - sizeof(sljit_sw)), (sljit_sw)put_label->label->addr);
809 }
810 else {
811 SLJIT_ASSERT(put_label->label->addr <= HALFWORD_MAX);
812 sljit_unaligned_store_s32((void*)(put_label->addr - sizeof(sljit_s32)), (sljit_s32)put_label->label->addr);
813 }
814 #endif
815
816 put_label = put_label->next;
817 }
818
819 compiler->error = SLJIT_ERR_COMPILED;
820 compiler->executable_offset = executable_offset;
821 compiler->executable_size = (sljit_uw)(code_ptr - code);
822
823 code = (sljit_u8*)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
824
825 SLJIT_UPDATE_WX_FLAGS(code, (sljit_u8*)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset), 1);
826 return (void*)code;
827 }
828
sljit_has_cpu_feature(sljit_s32 feature_type)829 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
830 {
831 switch (feature_type) {
832 case SLJIT_HAS_FPU:
833 #ifdef SLJIT_IS_FPU_AVAILABLE
834 return (SLJIT_IS_FPU_AVAILABLE) != 0;
835 #elif (defined SLJIT_DETECT_SSE2 && SLJIT_DETECT_SSE2)
836 if (cpu_feature_list == 0)
837 get_cpu_features();
838 return (cpu_feature_list & CPU_FEATURE_SSE2) != 0;
839 #else /* SLJIT_DETECT_SSE2 */
840 return 1;
841 #endif /* SLJIT_DETECT_SSE2 */
842
843 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
844 case SLJIT_HAS_VIRTUAL_REGISTERS:
845 return 1;
846 #endif /* SLJIT_CONFIG_X86_32 */
847
848 case SLJIT_HAS_CLZ:
849 if (cpu_feature_list == 0)
850 get_cpu_features();
851
852 return (cpu_feature_list & CPU_FEATURE_LZCNT) ? 1 : 2;
853
854 case SLJIT_HAS_CTZ:
855 if (cpu_feature_list == 0)
856 get_cpu_features();
857
858 return (cpu_feature_list & CPU_FEATURE_TZCNT) ? 1 : 2;
859
860 case SLJIT_HAS_CMOV:
861 if (cpu_feature_list == 0)
862 get_cpu_features();
863 return (cpu_feature_list & CPU_FEATURE_CMOV) != 0;
864
865 case SLJIT_HAS_REV:
866 case SLJIT_HAS_ROT:
867 case SLJIT_HAS_PREFETCH:
868 case SLJIT_HAS_COPY_F32:
869 case SLJIT_HAS_COPY_F64:
870 case SLJIT_HAS_ATOMIC:
871 return 1;
872
873 #if !(defined SLJIT_IS_FPU_AVAILABLE) || SLJIT_IS_FPU_AVAILABLE
874 case SLJIT_HAS_AVX:
875 if (cpu_feature_list == 0)
876 get_cpu_features();
877 return (cpu_feature_list & CPU_FEATURE_AVX) != 0;
878 case SLJIT_HAS_AVX2:
879 if (cpu_feature_list == 0)
880 get_cpu_features();
881 return (cpu_feature_list & CPU_FEATURE_AVX2) != 0;
882 case SLJIT_HAS_SIMD:
883 if (cpu_feature_list == 0)
884 get_cpu_features();
885 return (cpu_feature_list & CPU_FEATURE_SSE41) != 0;
886 #endif /* SLJIT_IS_FPU_AVAILABLE */
887 default:
888 return 0;
889 }
890 }
891
sljit_cmp_info(sljit_s32 type)892 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
893 {
894 switch (type) {
895 case SLJIT_ORDERED_EQUAL:
896 case SLJIT_UNORDERED_OR_NOT_EQUAL:
897 return 2;
898 }
899
900 return 0;
901 }
902
903 /* --------------------------------------------------------------------- */
904 /* Operators */
905 /* --------------------------------------------------------------------- */
906
907 #define BINARY_OPCODE(opcode) (((opcode ## _EAX_i32) << 24) | ((opcode ## _r_rm) << 16) | ((opcode ## _rm_r) << 8) | (opcode))
908
909 #define BINARY_IMM32(op_imm, immw, arg, argw) \
910 do { \
911 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, immw, arg, argw); \
912 FAIL_IF(!inst); \
913 *(inst + 1) |= (op_imm); \
914 } while (0)
915
916 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
917
918 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
919 do { \
920 if (IS_HALFWORD(immw) || compiler->mode32) { \
921 BINARY_IMM32(op_imm, immw, arg, argw); \
922 } \
923 else { \
924 FAIL_IF(emit_load_imm64(compiler, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, immw)); \
925 inst = emit_x86_instruction(compiler, 1, (arg == TMP_REG1) ? TMP_REG2 : TMP_REG1, 0, arg, argw); \
926 FAIL_IF(!inst); \
927 *inst = (op_mr); \
928 } \
929 } while (0)
930
931 #define BINARY_EAX_IMM(op_eax_imm, immw) \
932 FAIL_IF(emit_do_imm32(compiler, (!compiler->mode32) ? REX_W : 0, (op_eax_imm), immw))
933
934 #else /* !SLJIT_CONFIG_X86_64 */
935
936 #define BINARY_IMM(op_imm, op_mr, immw, arg, argw) \
937 BINARY_IMM32(op_imm, immw, arg, argw)
938
939 #define BINARY_EAX_IMM(op_eax_imm, immw) \
940 FAIL_IF(emit_do_imm(compiler, (op_eax_imm), immw))
941
942 #endif /* SLJIT_CONFIG_X86_64 */
943
emit_byte(struct sljit_compiler * compiler,sljit_u8 byte)944 static sljit_s32 emit_byte(struct sljit_compiler *compiler, sljit_u8 byte)
945 {
946 sljit_u8 *inst = (sljit_u8*)ensure_buf(compiler, 1 + 1);
947 FAIL_IF(!inst);
948 INC_SIZE(1);
949 *inst = byte;
950 return SLJIT_SUCCESS;
951 }
952
953 static sljit_s32 emit_mov(struct sljit_compiler *compiler,
954 sljit_s32 dst, sljit_sw dstw,
955 sljit_s32 src, sljit_sw srcw);
956
957 #define EMIT_MOV(compiler, dst, dstw, src, srcw) \
958 FAIL_IF(emit_mov(compiler, dst, dstw, src, srcw));
959
960 static sljit_s32 emit_groupf(struct sljit_compiler *compiler,
961 sljit_uw op,
962 sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
963
964 static sljit_s32 emit_groupf_ext(struct sljit_compiler *compiler,
965 sljit_uw op,
966 sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
967
968 static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
969 sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src);
970
971 static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
972 sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw);
973
974 static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,
975 sljit_s32 src1, sljit_sw src1w,
976 sljit_s32 src2, sljit_sw src2w);
977
978 static sljit_s32 emit_cmov_generic(struct sljit_compiler *compiler, sljit_s32 type,
979 sljit_s32 dst_reg,
980 sljit_s32 src, sljit_sw srcw);
981
emit_endbranch(struct sljit_compiler * compiler)982 static SLJIT_INLINE sljit_s32 emit_endbranch(struct sljit_compiler *compiler)
983 {
984 #if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET)
985 /* Emit endbr32/endbr64 when CET is enabled. */
986 sljit_u8 *inst;
987 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
988 FAIL_IF(!inst);
989 INC_SIZE(4);
990 inst[0] = GROUP_F3;
991 inst[1] = GROUP_0F;
992 inst[2] = 0x1e;
993 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
994 inst[3] = 0xfb;
995 #else /* !SLJIT_CONFIG_X86_32 */
996 inst[3] = 0xfa;
997 #endif /* SLJIT_CONFIG_X86_32 */
998 #else /* !SLJIT_CONFIG_X86_CET */
999 SLJIT_UNUSED_ARG(compiler);
1000 #endif /* SLJIT_CONFIG_X86_CET */
1001 return SLJIT_SUCCESS;
1002 }
1003
1004 #if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__)
1005
emit_rdssp(struct sljit_compiler * compiler,sljit_s32 reg)1006 static SLJIT_INLINE sljit_s32 emit_rdssp(struct sljit_compiler *compiler, sljit_s32 reg)
1007 {
1008 sljit_u8 *inst;
1009 sljit_s32 size;
1010
1011 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1012 size = 5;
1013 #else
1014 size = 4;
1015 #endif
1016
1017 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
1018 FAIL_IF(!inst);
1019 INC_SIZE(size);
1020 *inst++ = GROUP_F3;
1021 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1022 *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : REX_B);
1023 #endif
1024 inst[0] = GROUP_0F;
1025 inst[1] = 0x1e;
1026 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1027 inst[2] = U8(MOD_REG | (0x1 << 3) | reg_lmap[reg]);
1028 #else
1029 inst[2] = U8(MOD_REG | (0x1 << 3) | reg_map[reg]);
1030 #endif
1031 return SLJIT_SUCCESS;
1032 }
1033
emit_incssp(struct sljit_compiler * compiler,sljit_s32 reg)1034 static SLJIT_INLINE sljit_s32 emit_incssp(struct sljit_compiler *compiler, sljit_s32 reg)
1035 {
1036 sljit_u8 *inst;
1037 sljit_s32 size;
1038
1039 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1040 size = 5;
1041 #else
1042 size = 4;
1043 #endif
1044
1045 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
1046 FAIL_IF(!inst);
1047 INC_SIZE(size);
1048 *inst++ = GROUP_F3;
1049 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1050 *inst++ = REX_W | (reg_map[reg] <= 7 ? 0 : REX_B);
1051 #endif
1052 inst[0] = GROUP_0F;
1053 inst[1] = 0xae;
1054 inst[2] = (0x3 << 6) | (0x5 << 3) | (reg_map[reg] & 0x7);
1055 return SLJIT_SUCCESS;
1056 }
1057
1058 #endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */
1059
cpu_has_shadow_stack(void)1060 static SLJIT_INLINE sljit_s32 cpu_has_shadow_stack(void)
1061 {
1062 #if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__)
1063 return _get_ssp() != 0;
1064 #else /* !SLJIT_CONFIG_X86_CET || !__SHSTK__ */
1065 return 0;
1066 #endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */
1067 }
1068
adjust_shadow_stack(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)1069 static SLJIT_INLINE sljit_s32 adjust_shadow_stack(struct sljit_compiler *compiler,
1070 sljit_s32 src, sljit_sw srcw)
1071 {
1072 #if (defined SLJIT_CONFIG_X86_CET && SLJIT_CONFIG_X86_CET) && defined (__SHSTK__)
1073 sljit_u8 *inst, *jz_after_cmp_inst;
1074 sljit_uw size_jz_after_cmp_inst;
1075
1076 sljit_uw size_before_rdssp_inst = compiler->size;
1077
1078 /* Generate "RDSSP TMP_REG1". */
1079 FAIL_IF(emit_rdssp(compiler, TMP_REG1));
1080
1081 /* Load return address on shadow stack into TMP_REG1. */
1082 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_MEM1(TMP_REG1), 0);
1083
1084 /* Compare return address against TMP_REG1. */
1085 FAIL_IF(emit_cmp_binary (compiler, TMP_REG1, 0, src, srcw));
1086
1087 /* Generate JZ to skip shadow stack ajdustment when shadow
1088 stack matches normal stack. */
1089 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1090 FAIL_IF(!inst);
1091 INC_SIZE(2);
1092 *inst++ = get_jump_code(SLJIT_EQUAL) - 0x10;
1093 size_jz_after_cmp_inst = compiler->size;
1094 jz_after_cmp_inst = inst;
1095
1096 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1097 /* REX_W is not necessary. */
1098 compiler->mode32 = 1;
1099 #endif
1100 /* Load 1 into TMP_REG1. */
1101 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, 1);
1102
1103 /* Generate "INCSSP TMP_REG1". */
1104 FAIL_IF(emit_incssp(compiler, TMP_REG1));
1105
1106 /* Jump back to "RDSSP TMP_REG1" to check shadow stack again. */
1107 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1108 FAIL_IF(!inst);
1109 INC_SIZE(2);
1110 inst[0] = JMP_i8;
1111 inst[1] = size_before_rdssp_inst - compiler->size;
1112
1113 *jz_after_cmp_inst = compiler->size - size_jz_after_cmp_inst;
1114 #else /* !SLJIT_CONFIG_X86_CET || !__SHSTK__ */
1115 SLJIT_UNUSED_ARG(compiler);
1116 SLJIT_UNUSED_ARG(src);
1117 SLJIT_UNUSED_ARG(srcw);
1118 #endif /* SLJIT_CONFIG_X86_CET && __SHSTK__ */
1119 return SLJIT_SUCCESS;
1120 }
1121
1122 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1123 #include "sljitNativeX86_32.c"
1124 #else
1125 #include "sljitNativeX86_64.c"
1126 #endif
1127
emit_mov(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1128 static sljit_s32 emit_mov(struct sljit_compiler *compiler,
1129 sljit_s32 dst, sljit_sw dstw,
1130 sljit_s32 src, sljit_sw srcw)
1131 {
1132 sljit_u8* inst;
1133
1134 if (FAST_IS_REG(src)) {
1135 inst = emit_x86_instruction(compiler, 1, src, 0, dst, dstw);
1136 FAIL_IF(!inst);
1137 *inst = MOV_rm_r;
1138 return SLJIT_SUCCESS;
1139 }
1140
1141 if (src == SLJIT_IMM) {
1142 if (FAST_IS_REG(dst)) {
1143 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1144 return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw);
1145 #else
1146 if (!compiler->mode32) {
1147 if (NOT_HALFWORD(srcw))
1148 return emit_load_imm64(compiler, dst, srcw);
1149 }
1150 else
1151 return emit_do_imm32(compiler, (reg_map[dst] >= 8) ? REX_B : 0, U8(MOV_r_i32 | reg_lmap[dst]), srcw);
1152 #endif
1153 }
1154 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1155 if (!compiler->mode32 && NOT_HALFWORD(srcw)) {
1156 /* Immediate to memory move. Only SLJIT_MOV operation copies
1157 an immediate directly into memory so TMP_REG1 can be used. */
1158 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));
1159 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1160 FAIL_IF(!inst);
1161 *inst = MOV_rm_r;
1162 return SLJIT_SUCCESS;
1163 }
1164 #endif
1165 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, dstw);
1166 FAIL_IF(!inst);
1167 *inst = MOV_rm_i32;
1168 return SLJIT_SUCCESS;
1169 }
1170 if (FAST_IS_REG(dst)) {
1171 inst = emit_x86_instruction(compiler, 1, dst, 0, src, srcw);
1172 FAIL_IF(!inst);
1173 *inst = MOV_r_rm;
1174 return SLJIT_SUCCESS;
1175 }
1176
1177 /* Memory to memory move. Only SLJIT_MOV operation copies
1178 data from memory to memory so TMP_REG1 can be used. */
1179 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src, srcw);
1180 FAIL_IF(!inst);
1181 *inst = MOV_r_rm;
1182 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1183 FAIL_IF(!inst);
1184 *inst = MOV_rm_r;
1185 return SLJIT_SUCCESS;
1186 }
1187
emit_cmov_generic(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_reg,sljit_s32 src,sljit_sw srcw)1188 static sljit_s32 emit_cmov_generic(struct sljit_compiler *compiler, sljit_s32 type,
1189 sljit_s32 dst_reg,
1190 sljit_s32 src, sljit_sw srcw)
1191 {
1192 sljit_u8* inst;
1193 sljit_uw size;
1194
1195 SLJIT_ASSERT(type >= SLJIT_EQUAL && type <= SLJIT_ORDERED_LESS_EQUAL);
1196
1197 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1198 FAIL_IF(!inst);
1199 INC_SIZE(2);
1200 inst[0] = U8(get_jump_code((sljit_uw)type ^ 0x1) - 0x10);
1201
1202 size = compiler->size;
1203 EMIT_MOV(compiler, dst_reg, 0, src, srcw);
1204
1205 inst[1] = U8(compiler->size - size);
1206 return SLJIT_SUCCESS;
1207 }
1208
sljit_emit_op0(struct sljit_compiler * compiler,sljit_s32 op)1209 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
1210 {
1211 sljit_u8 *inst;
1212 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1213 sljit_uw size;
1214 #endif
1215
1216 CHECK_ERROR();
1217 CHECK(check_sljit_emit_op0(compiler, op));
1218
1219 switch (GET_OPCODE(op)) {
1220 case SLJIT_BREAKPOINT:
1221 return emit_byte(compiler, INT3);
1222 case SLJIT_NOP:
1223 return emit_byte(compiler, NOP);
1224 case SLJIT_LMUL_UW:
1225 case SLJIT_LMUL_SW:
1226 case SLJIT_DIVMOD_UW:
1227 case SLJIT_DIVMOD_SW:
1228 case SLJIT_DIV_UW:
1229 case SLJIT_DIV_SW:
1230 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1231 #ifdef _WIN64
1232 SLJIT_ASSERT(
1233 reg_map[SLJIT_R0] == 0
1234 && reg_map[SLJIT_R1] == 2
1235 && reg_map[TMP_REG1] > 7);
1236 #else
1237 SLJIT_ASSERT(
1238 reg_map[SLJIT_R0] == 0
1239 && reg_map[SLJIT_R1] < 7
1240 && reg_map[TMP_REG1] == 2);
1241 #endif
1242 compiler->mode32 = op & SLJIT_32;
1243 #endif
1244 SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
1245
1246 op = GET_OPCODE(op);
1247 if ((op | 0x2) == SLJIT_DIV_UW) {
1248 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
1249 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
1250 inst = emit_x86_instruction(compiler, 1, SLJIT_R1, 0, SLJIT_R1, 0);
1251 #else
1252 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, TMP_REG1, 0);
1253 #endif
1254 FAIL_IF(!inst);
1255 *inst = XOR_r_rm;
1256 }
1257
1258 if ((op | 0x2) == SLJIT_DIV_SW) {
1259 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32) || defined(_WIN64)
1260 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_R1, 0);
1261 #endif
1262
1263 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1264 FAIL_IF(emit_byte(compiler, CDQ));
1265 #else
1266 if (!compiler->mode32) {
1267 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1268 FAIL_IF(!inst);
1269 INC_SIZE(2);
1270 inst[0] = REX_W;
1271 inst[1] = CDQ;
1272 } else
1273 FAIL_IF(emit_byte(compiler, CDQ));
1274 #endif
1275 }
1276
1277 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1278 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
1279 FAIL_IF(!inst);
1280 INC_SIZE(2);
1281 inst[0] = GROUP_F7;
1282 inst[1] = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_map[TMP_REG1] : reg_map[SLJIT_R1]);
1283 #else /* !SLJIT_CONFIG_X86_32 */
1284 #ifdef _WIN64
1285 size = (!compiler->mode32 || op >= SLJIT_DIVMOD_UW) ? 3 : 2;
1286 #else /* !_WIN64 */
1287 size = (!compiler->mode32) ? 3 : 2;
1288 #endif /* _WIN64 */
1289 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
1290 FAIL_IF(!inst);
1291 INC_SIZE(size);
1292 #ifdef _WIN64
1293 if (!compiler->mode32)
1294 *inst++ = REX_W | ((op >= SLJIT_DIVMOD_UW) ? REX_B : 0);
1295 else if (op >= SLJIT_DIVMOD_UW)
1296 *inst++ = REX_B;
1297 inst[0] = GROUP_F7;
1298 inst[1] = MOD_REG | ((op >= SLJIT_DIVMOD_UW) ? reg_lmap[TMP_REG1] : reg_lmap[SLJIT_R1]);
1299 #else /* !_WIN64 */
1300 if (!compiler->mode32)
1301 *inst++ = REX_W;
1302 inst[0] = GROUP_F7;
1303 inst[1] = MOD_REG | reg_map[SLJIT_R1];
1304 #endif /* _WIN64 */
1305 #endif /* SLJIT_CONFIG_X86_32 */
1306 switch (op) {
1307 case SLJIT_LMUL_UW:
1308 inst[1] |= MUL;
1309 break;
1310 case SLJIT_LMUL_SW:
1311 inst[1] |= IMUL;
1312 break;
1313 case SLJIT_DIVMOD_UW:
1314 case SLJIT_DIV_UW:
1315 inst[1] |= DIV;
1316 break;
1317 case SLJIT_DIVMOD_SW:
1318 case SLJIT_DIV_SW:
1319 inst[1] |= IDIV;
1320 break;
1321 }
1322 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64) && !defined(_WIN64)
1323 if (op <= SLJIT_DIVMOD_SW)
1324 EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
1325 #else
1326 if (op >= SLJIT_DIV_UW)
1327 EMIT_MOV(compiler, SLJIT_R1, 0, TMP_REG1, 0);
1328 #endif
1329 break;
1330 case SLJIT_ENDBR:
1331 return emit_endbranch(compiler);
1332 case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
1333 return skip_frames_before_return(compiler);
1334 }
1335
1336 return SLJIT_SUCCESS;
1337 }
1338
emit_mov_byte(struct sljit_compiler * compiler,sljit_s32 sign,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1339 static sljit_s32 emit_mov_byte(struct sljit_compiler *compiler, sljit_s32 sign,
1340 sljit_s32 dst, sljit_sw dstw,
1341 sljit_s32 src, sljit_sw srcw)
1342 {
1343 sljit_u8* inst;
1344 sljit_s32 dst_r;
1345
1346 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1347 compiler->mode32 = 0;
1348 #endif
1349
1350 if (src == SLJIT_IMM) {
1351 if (FAST_IS_REG(dst)) {
1352 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1353 return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw);
1354 #else
1355 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
1356 FAIL_IF(!inst);
1357 *inst = MOV_rm_i32;
1358 return SLJIT_SUCCESS;
1359 #endif
1360 }
1361 inst = emit_x86_instruction(compiler, 1 | EX86_BYTE_ARG | EX86_NO_REXW, SLJIT_IMM, srcw, dst, dstw);
1362 FAIL_IF(!inst);
1363 *inst = MOV_rm8_i8;
1364 return SLJIT_SUCCESS;
1365 }
1366
1367 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1368
1369 if ((dst & SLJIT_MEM) && FAST_IS_REG(src)) {
1370 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1371 if (reg_map[src] >= 4) {
1372 SLJIT_ASSERT(dst_r == TMP_REG1);
1373 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
1374 } else
1375 dst_r = src;
1376 #else
1377 dst_r = src;
1378 #endif
1379 } else {
1380 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1381 if (FAST_IS_REG(src) && reg_map[src] >= 4) {
1382 /* Both src and dst are registers. */
1383 SLJIT_ASSERT(FAST_IS_REG(dst));
1384
1385 if (src == dst && !sign) {
1386 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 0xff, dst, 0);
1387 FAIL_IF(!inst);
1388 *(inst + 1) |= AND;
1389 return SLJIT_SUCCESS;
1390 }
1391
1392 EMIT_MOV(compiler, TMP_REG1, 0, src, 0);
1393 src = TMP_REG1;
1394 srcw = 0;
1395 }
1396 #endif /* !SLJIT_CONFIG_X86_32 */
1397
1398 /* src can be memory addr or reg_map[src] < 4 on x86_32 architectures. */
1399 FAIL_IF(emit_groupf(compiler, sign ? MOVSX_r_rm8 : MOVZX_r_rm8, dst_r, src, srcw));
1400 }
1401
1402 if (dst & SLJIT_MEM) {
1403 inst = emit_x86_instruction(compiler, 1 | EX86_REX | EX86_NO_REXW, dst_r, 0, dst, dstw);
1404 FAIL_IF(!inst);
1405 *inst = MOV_rm8_r8;
1406 }
1407
1408 return SLJIT_SUCCESS;
1409 }
1410
emit_prefetch(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)1411 static sljit_s32 emit_prefetch(struct sljit_compiler *compiler, sljit_s32 op,
1412 sljit_s32 src, sljit_sw srcw)
1413 {
1414 sljit_u8* inst;
1415
1416 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1417 compiler->mode32 = 1;
1418 #endif
1419
1420 inst = emit_x86_instruction(compiler, 2, 0, 0, src, srcw);
1421 FAIL_IF(!inst);
1422 inst[0] = GROUP_0F;
1423 inst[1] = PREFETCH;
1424
1425 if (op == SLJIT_PREFETCH_L1)
1426 inst[2] |= (1 << 3);
1427 else if (op == SLJIT_PREFETCH_L2)
1428 inst[2] |= (2 << 3);
1429 else if (op == SLJIT_PREFETCH_L3)
1430 inst[2] |= (3 << 3);
1431
1432 return SLJIT_SUCCESS;
1433 }
1434
emit_mov_half(struct sljit_compiler * compiler,sljit_s32 sign,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1435 static sljit_s32 emit_mov_half(struct sljit_compiler *compiler, sljit_s32 sign,
1436 sljit_s32 dst, sljit_sw dstw,
1437 sljit_s32 src, sljit_sw srcw)
1438 {
1439 sljit_u8* inst;
1440 sljit_s32 dst_r;
1441
1442 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1443 compiler->mode32 = 0;
1444 #endif
1445
1446 if (src == SLJIT_IMM) {
1447 if (FAST_IS_REG(dst)) {
1448 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1449 return emit_do_imm(compiler, MOV_r_i32 | reg_map[dst], srcw);
1450 #else
1451 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, srcw, dst, 0);
1452 FAIL_IF(!inst);
1453 *inst = MOV_rm_i32;
1454 return SLJIT_SUCCESS;
1455 #endif
1456 }
1457 inst = emit_x86_instruction(compiler, 1 | EX86_HALF_ARG | EX86_NO_REXW | EX86_PREF_66, SLJIT_IMM, srcw, dst, dstw);
1458 FAIL_IF(!inst);
1459 *inst = MOV_rm_i32;
1460 return SLJIT_SUCCESS;
1461 }
1462
1463 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1464
1465 if ((dst & SLJIT_MEM) && FAST_IS_REG(src))
1466 dst_r = src;
1467 else
1468 FAIL_IF(emit_groupf(compiler, sign ? MOVSX_r_rm16 : MOVZX_r_rm16, dst_r, src, srcw));
1469
1470 if (dst & SLJIT_MEM) {
1471 inst = emit_x86_instruction(compiler, 1 | EX86_NO_REXW | EX86_PREF_66, dst_r, 0, dst, dstw);
1472 FAIL_IF(!inst);
1473 *inst = MOV_rm_r;
1474 }
1475
1476 return SLJIT_SUCCESS;
1477 }
1478
emit_unary(struct sljit_compiler * compiler,sljit_u8 opcode,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1479 static sljit_s32 emit_unary(struct sljit_compiler *compiler, sljit_u8 opcode,
1480 sljit_s32 dst, sljit_sw dstw,
1481 sljit_s32 src, sljit_sw srcw)
1482 {
1483 sljit_u8* inst;
1484
1485 if (dst == src && dstw == srcw) {
1486 /* Same input and output */
1487 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, dstw);
1488 FAIL_IF(!inst);
1489 inst[0] = GROUP_F7;
1490 inst[1] |= opcode;
1491 return SLJIT_SUCCESS;
1492 }
1493
1494 if (FAST_IS_REG(dst)) {
1495 EMIT_MOV(compiler, dst, 0, src, srcw);
1496 inst = emit_x86_instruction(compiler, 1, 0, 0, dst, 0);
1497 FAIL_IF(!inst);
1498 inst[0] = GROUP_F7;
1499 inst[1] |= opcode;
1500 return SLJIT_SUCCESS;
1501 }
1502
1503 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
1504 inst = emit_x86_instruction(compiler, 1, 0, 0, TMP_REG1, 0);
1505 FAIL_IF(!inst);
1506 inst[0] = GROUP_F7;
1507 inst[1] |= opcode;
1508 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1509 return SLJIT_SUCCESS;
1510 }
1511
1512 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1513 static const sljit_sw emit_clz_arg = 32 + 31;
1514 static const sljit_sw emit_ctz_arg = 32;
1515 #endif
1516
emit_clz_ctz(struct sljit_compiler * compiler,sljit_s32 is_clz,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1517 static sljit_s32 emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 is_clz,
1518 sljit_s32 dst, sljit_sw dstw,
1519 sljit_s32 src, sljit_sw srcw)
1520 {
1521 sljit_u8* inst;
1522 sljit_s32 dst_r;
1523 sljit_sw max;
1524
1525 SLJIT_ASSERT(cpu_feature_list != 0);
1526
1527 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1528
1529 if (is_clz ? (cpu_feature_list & CPU_FEATURE_LZCNT) : (cpu_feature_list & CPU_FEATURE_TZCNT)) {
1530 FAIL_IF(emit_groupf(compiler, (is_clz ? LZCNT_r_rm : TZCNT_r_rm) | EX86_PREF_F3, dst_r, src, srcw));
1531
1532 if (dst & SLJIT_MEM)
1533 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1534 return SLJIT_SUCCESS;
1535 }
1536
1537 FAIL_IF(emit_groupf(compiler, is_clz ? BSR_r_rm : BSF_r_rm, dst_r, src, srcw));
1538
1539 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1540 max = is_clz ? (32 + 31) : 32;
1541
1542 if (cpu_feature_list & CPU_FEATURE_CMOV) {
1543 if (dst_r != TMP_REG1) {
1544 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, max);
1545 inst = emit_x86_instruction(compiler, 2, dst_r, 0, TMP_REG1, 0);
1546 }
1547 else
1548 inst = emit_x86_instruction(compiler, 2, dst_r, 0, SLJIT_MEM0(), is_clz ? (sljit_sw)&emit_clz_arg : (sljit_sw)&emit_ctz_arg);
1549
1550 FAIL_IF(!inst);
1551 inst[0] = GROUP_0F;
1552 inst[1] = CMOVE_r_rm;
1553 }
1554 else
1555 FAIL_IF(emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, max));
1556
1557 if (is_clz) {
1558 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, 31, dst_r, 0);
1559 FAIL_IF(!inst);
1560 *(inst + 1) |= XOR;
1561 }
1562 #else
1563 if (is_clz)
1564 max = compiler->mode32 ? (32 + 31) : (64 + 63);
1565 else
1566 max = compiler->mode32 ? 32 : 64;
1567
1568 if (cpu_feature_list & CPU_FEATURE_CMOV) {
1569 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_IMM, max);
1570 FAIL_IF(emit_groupf(compiler, CMOVE_r_rm, dst_r, TMP_REG2, 0));
1571 } else
1572 FAIL_IF(emit_cmov_generic(compiler, SLJIT_EQUAL, dst_r, SLJIT_IMM, max));
1573
1574 if (is_clz) {
1575 inst = emit_x86_instruction(compiler, 1 | EX86_BIN_INS, SLJIT_IMM, max >> 1, dst_r, 0);
1576 FAIL_IF(!inst);
1577 *(inst + 1) |= XOR;
1578 }
1579 #endif
1580
1581 if (dst & SLJIT_MEM)
1582 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1583 return SLJIT_SUCCESS;
1584 }
1585
emit_bswap(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1586 static sljit_s32 emit_bswap(struct sljit_compiler *compiler,
1587 sljit_s32 op,
1588 sljit_s32 dst, sljit_sw dstw,
1589 sljit_s32 src, sljit_sw srcw)
1590 {
1591 sljit_u8 *inst;
1592 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1593 sljit_uw size;
1594 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1595 sljit_u8 rex = 0;
1596 #else /* !SLJIT_CONFIG_X86_64 */
1597 sljit_s32 dst_is_ereg = op & SLJIT_32;
1598 #endif /* SLJIT_CONFIG_X86_64 */
1599
1600 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1601 if (op == SLJIT_REV_U32 || op == SLJIT_REV_S32)
1602 compiler->mode32 = 1;
1603 #else /* !SLJIT_CONFIG_X86_64 */
1604 op &= ~SLJIT_32;
1605 #endif /* SLJIT_CONFIG_X86_64 */
1606
1607 if (src != dst_r) {
1608 /* Only the lower 16 bit is read for eregs. */
1609 if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16)
1610 FAIL_IF(emit_mov_half(compiler, 0, dst_r, 0, src, srcw));
1611 else
1612 EMIT_MOV(compiler, dst_r, 0, src, srcw);
1613 }
1614
1615 size = 2;
1616 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1617 if (!compiler->mode32)
1618 rex = REX_W;
1619
1620 if (reg_map[dst_r] >= 8)
1621 rex |= REX_B;
1622
1623 if (rex != 0)
1624 size++;
1625 #endif /* SLJIT_CONFIG_X86_64 */
1626
1627 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
1628 FAIL_IF(!inst);
1629 INC_SIZE(size);
1630
1631 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1632 if (rex != 0)
1633 *inst++ = rex;
1634
1635 inst[0] = GROUP_0F;
1636 inst[1] = BSWAP_r | reg_lmap[dst_r];
1637 #else /* !SLJIT_CONFIG_X86_64 */
1638 inst[0] = GROUP_0F;
1639 inst[1] = BSWAP_r | reg_map[dst_r];
1640 #endif /* SLJIT_CONFIG_X86_64 */
1641
1642 if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16) {
1643 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1644 size = compiler->mode32 ? 16 : 48;
1645 #else /* !SLJIT_CONFIG_X86_64 */
1646 size = 16;
1647 #endif /* SLJIT_CONFIG_X86_64 */
1648
1649 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, (sljit_sw)size, dst_r, 0);
1650 FAIL_IF(!inst);
1651 if (op == SLJIT_REV_U16)
1652 inst[1] |= SHR;
1653 else
1654 inst[1] |= SAR;
1655 }
1656
1657 if (dst & SLJIT_MEM) {
1658 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1659 if (dst_is_ereg)
1660 op = SLJIT_REV;
1661 #endif /* SLJIT_CONFIG_X86_32 */
1662 if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16)
1663 return emit_mov_half(compiler, 0, dst, dstw, TMP_REG1, 0);
1664
1665 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
1666 }
1667
1668 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1669 if (op == SLJIT_REV_S32) {
1670 compiler->mode32 = 0;
1671 inst = emit_x86_instruction(compiler, 1, dst, 0, dst, 0);
1672 FAIL_IF(!inst);
1673 *inst = MOVSXD_r_rm;
1674 }
1675 #endif /* SLJIT_CONFIG_X86_64 */
1676
1677 return SLJIT_SUCCESS;
1678 }
1679
sljit_emit_op1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1680 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1681 sljit_s32 dst, sljit_sw dstw,
1682 sljit_s32 src, sljit_sw srcw)
1683 {
1684 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1685 sljit_s32 dst_is_ereg = 0;
1686 #else /* !SLJIT_CONFIG_X86_32 */
1687 sljit_s32 op_flags = GET_ALL_FLAGS(op);
1688 #endif /* SLJIT_CONFIG_X86_32 */
1689
1690 CHECK_ERROR();
1691 CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1692 ADJUST_LOCAL_OFFSET(dst, dstw);
1693 ADJUST_LOCAL_OFFSET(src, srcw);
1694
1695 CHECK_EXTRA_REGS(dst, dstw, dst_is_ereg = 1);
1696 CHECK_EXTRA_REGS(src, srcw, (void)0);
1697 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1698 compiler->mode32 = op_flags & SLJIT_32;
1699 #endif /* SLJIT_CONFIG_X86_64 */
1700
1701 op = GET_OPCODE(op);
1702
1703 if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) {
1704 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1705 compiler->mode32 = 0;
1706 #endif /* SLJIT_CONFIG_X86_64 */
1707
1708 if (FAST_IS_REG(src) && src == dst) {
1709 if (!TYPE_CAST_NEEDED(op))
1710 return SLJIT_SUCCESS;
1711 }
1712
1713 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1714 if (op_flags & SLJIT_32) {
1715 if (src & SLJIT_MEM) {
1716 if (op == SLJIT_MOV_S32)
1717 op = SLJIT_MOV_U32;
1718 }
1719 else if (src == SLJIT_IMM) {
1720 if (op == SLJIT_MOV_U32)
1721 op = SLJIT_MOV_S32;
1722 }
1723 }
1724 #endif /* SLJIT_CONFIG_X86_64 */
1725
1726 if (src == SLJIT_IMM) {
1727 switch (op) {
1728 case SLJIT_MOV_U8:
1729 srcw = (sljit_u8)srcw;
1730 break;
1731 case SLJIT_MOV_S8:
1732 srcw = (sljit_s8)srcw;
1733 break;
1734 case SLJIT_MOV_U16:
1735 srcw = (sljit_u16)srcw;
1736 break;
1737 case SLJIT_MOV_S16:
1738 srcw = (sljit_s16)srcw;
1739 break;
1740 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1741 case SLJIT_MOV_U32:
1742 srcw = (sljit_u32)srcw;
1743 break;
1744 case SLJIT_MOV_S32:
1745 srcw = (sljit_s32)srcw;
1746 break;
1747 #endif /* SLJIT_CONFIG_X86_64 */
1748 }
1749 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1750 if (SLJIT_UNLIKELY(dst_is_ereg))
1751 return emit_mov(compiler, dst, dstw, src, srcw);
1752 #endif /* SLJIT_CONFIG_X86_32 */
1753 }
1754
1755 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1756 if (SLJIT_UNLIKELY(dst_is_ereg) && (!(op == SLJIT_MOV || op == SLJIT_MOV_U32 || op == SLJIT_MOV_S32 || op == SLJIT_MOV_P) || (src & SLJIT_MEM))) {
1757 SLJIT_ASSERT(dst == SLJIT_MEM1(SLJIT_SP));
1758 dst = TMP_REG1;
1759 }
1760 #endif /* SLJIT_CONFIG_X86_32 */
1761
1762 switch (op) {
1763 case SLJIT_MOV:
1764 case SLJIT_MOV_P:
1765 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1766 case SLJIT_MOV_U32:
1767 case SLJIT_MOV_S32:
1768 case SLJIT_MOV32:
1769 #endif /* SLJIT_CONFIG_X86_32 */
1770 EMIT_MOV(compiler, dst, dstw, src, srcw);
1771 break;
1772 case SLJIT_MOV_U8:
1773 FAIL_IF(emit_mov_byte(compiler, 0, dst, dstw, src, srcw));
1774 break;
1775 case SLJIT_MOV_S8:
1776 FAIL_IF(emit_mov_byte(compiler, 1, dst, dstw, src, srcw));
1777 break;
1778 case SLJIT_MOV_U16:
1779 FAIL_IF(emit_mov_half(compiler, 0, dst, dstw, src, srcw));
1780 break;
1781 case SLJIT_MOV_S16:
1782 FAIL_IF(emit_mov_half(compiler, 1, dst, dstw, src, srcw));
1783 break;
1784 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1785 case SLJIT_MOV_U32:
1786 FAIL_IF(emit_mov_int(compiler, 0, dst, dstw, src, srcw));
1787 break;
1788 case SLJIT_MOV_S32:
1789 FAIL_IF(emit_mov_int(compiler, 1, dst, dstw, src, srcw));
1790 break;
1791 case SLJIT_MOV32:
1792 compiler->mode32 = 1;
1793 EMIT_MOV(compiler, dst, dstw, src, srcw);
1794 compiler->mode32 = 0;
1795 break;
1796 #endif /* SLJIT_CONFIG_X86_64 */
1797 }
1798
1799 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1800 if (SLJIT_UNLIKELY(dst_is_ereg) && dst == TMP_REG1)
1801 return emit_mov(compiler, SLJIT_MEM1(SLJIT_SP), dstw, TMP_REG1, 0);
1802 #endif /* SLJIT_CONFIG_X86_32 */
1803 return SLJIT_SUCCESS;
1804 }
1805
1806 switch (op) {
1807 case SLJIT_CLZ:
1808 case SLJIT_CTZ:
1809 return emit_clz_ctz(compiler, (op == SLJIT_CLZ), dst, dstw, src, srcw);
1810 case SLJIT_REV:
1811 case SLJIT_REV_U16:
1812 case SLJIT_REV_S16:
1813 case SLJIT_REV_U32:
1814 case SLJIT_REV_S32:
1815 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
1816 if (dst_is_ereg)
1817 op |= SLJIT_32;
1818 #endif /* SLJIT_CONFIG_X86_32 */
1819 return emit_bswap(compiler, op, dst, dstw, src, srcw);
1820 }
1821
1822 return SLJIT_SUCCESS;
1823 }
1824
emit_cum_binary(struct sljit_compiler * compiler,sljit_u32 op_types,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1825 static sljit_s32 emit_cum_binary(struct sljit_compiler *compiler,
1826 sljit_u32 op_types,
1827 sljit_s32 dst, sljit_sw dstw,
1828 sljit_s32 src1, sljit_sw src1w,
1829 sljit_s32 src2, sljit_sw src2w)
1830 {
1831 sljit_u8* inst;
1832 sljit_u8 op_eax_imm = U8(op_types >> 24);
1833 sljit_u8 op_rm = U8((op_types >> 16) & 0xff);
1834 sljit_u8 op_mr = U8((op_types >> 8) & 0xff);
1835 sljit_u8 op_imm = U8(op_types & 0xff);
1836
1837 if (dst == src1 && dstw == src1w) {
1838 if (src2 == SLJIT_IMM) {
1839 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1840 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1841 #else
1842 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1843 #endif
1844 BINARY_EAX_IMM(op_eax_imm, src2w);
1845 }
1846 else {
1847 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1848 }
1849 }
1850 else if (FAST_IS_REG(dst)) {
1851 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1852 FAIL_IF(!inst);
1853 *inst = op_rm;
1854 }
1855 else if (FAST_IS_REG(src2)) {
1856 /* Special exception for sljit_emit_op_flags. */
1857 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1858 FAIL_IF(!inst);
1859 *inst = op_mr;
1860 }
1861 else {
1862 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1863 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1864 FAIL_IF(!inst);
1865 *inst = op_mr;
1866 }
1867 return SLJIT_SUCCESS;
1868 }
1869
1870 /* Only for cumulative operations. */
1871 if (dst == src2 && dstw == src2w) {
1872 if (src1 == SLJIT_IMM) {
1873 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1874 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
1875 #else
1876 if ((dst == SLJIT_R0) && (src1w > 127 || src1w < -128)) {
1877 #endif
1878 BINARY_EAX_IMM(op_eax_imm, src1w);
1879 }
1880 else {
1881 BINARY_IMM(op_imm, op_mr, src1w, dst, dstw);
1882 }
1883 }
1884 else if (FAST_IS_REG(dst)) {
1885 inst = emit_x86_instruction(compiler, 1, dst, dstw, src1, src1w);
1886 FAIL_IF(!inst);
1887 *inst = op_rm;
1888 }
1889 else if (FAST_IS_REG(src1)) {
1890 inst = emit_x86_instruction(compiler, 1, src1, src1w, dst, dstw);
1891 FAIL_IF(!inst);
1892 *inst = op_mr;
1893 }
1894 else {
1895 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1896 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1897 FAIL_IF(!inst);
1898 *inst = op_mr;
1899 }
1900 return SLJIT_SUCCESS;
1901 }
1902
1903 /* General version. */
1904 if (FAST_IS_REG(dst)) {
1905 EMIT_MOV(compiler, dst, 0, src1, src1w);
1906 if (src2 == SLJIT_IMM) {
1907 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1908 }
1909 else {
1910 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1911 FAIL_IF(!inst);
1912 *inst = op_rm;
1913 }
1914 }
1915 else {
1916 /* This version requires less memory writing. */
1917 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1918 if (src2 == SLJIT_IMM) {
1919 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1920 }
1921 else {
1922 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1923 FAIL_IF(!inst);
1924 *inst = op_rm;
1925 }
1926 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
1927 }
1928
1929 return SLJIT_SUCCESS;
1930 }
1931
1932 static sljit_s32 emit_non_cum_binary(struct sljit_compiler *compiler,
1933 sljit_u32 op_types,
1934 sljit_s32 dst, sljit_sw dstw,
1935 sljit_s32 src1, sljit_sw src1w,
1936 sljit_s32 src2, sljit_sw src2w)
1937 {
1938 sljit_u8* inst;
1939 sljit_u8 op_eax_imm = U8(op_types >> 24);
1940 sljit_u8 op_rm = U8((op_types >> 16) & 0xff);
1941 sljit_u8 op_mr = U8((op_types >> 8) & 0xff);
1942 sljit_u8 op_imm = U8(op_types & 0xff);
1943
1944 if (dst == src1 && dstw == src1w) {
1945 if (src2 == SLJIT_IMM) {
1946 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
1947 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
1948 #else
1949 if ((dst == SLJIT_R0) && (src2w > 127 || src2w < -128)) {
1950 #endif
1951 BINARY_EAX_IMM(op_eax_imm, src2w);
1952 }
1953 else {
1954 BINARY_IMM(op_imm, op_mr, src2w, dst, dstw);
1955 }
1956 }
1957 else if (FAST_IS_REG(dst)) {
1958 inst = emit_x86_instruction(compiler, 1, dst, dstw, src2, src2w);
1959 FAIL_IF(!inst);
1960 *inst = op_rm;
1961 }
1962 else if (FAST_IS_REG(src2)) {
1963 inst = emit_x86_instruction(compiler, 1, src2, src2w, dst, dstw);
1964 FAIL_IF(!inst);
1965 *inst = op_mr;
1966 }
1967 else {
1968 EMIT_MOV(compiler, TMP_REG1, 0, src2, src2w);
1969 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, dst, dstw);
1970 FAIL_IF(!inst);
1971 *inst = op_mr;
1972 }
1973 return SLJIT_SUCCESS;
1974 }
1975
1976 /* General version. */
1977 if (FAST_IS_REG(dst) && dst != src2) {
1978 EMIT_MOV(compiler, dst, 0, src1, src1w);
1979 if (src2 == SLJIT_IMM) {
1980 BINARY_IMM(op_imm, op_mr, src2w, dst, 0);
1981 }
1982 else {
1983 inst = emit_x86_instruction(compiler, 1, dst, 0, src2, src2w);
1984 FAIL_IF(!inst);
1985 *inst = op_rm;
1986 }
1987 }
1988 else {
1989 /* This version requires less memory writing. */
1990 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
1991 if (src2 == SLJIT_IMM) {
1992 BINARY_IMM(op_imm, op_mr, src2w, TMP_REG1, 0);
1993 }
1994 else {
1995 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
1996 FAIL_IF(!inst);
1997 *inst = op_rm;
1998 }
1999 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2000 }
2001
2002 return SLJIT_SUCCESS;
2003 }
2004
2005 static sljit_s32 emit_mul(struct sljit_compiler *compiler,
2006 sljit_s32 dst, sljit_sw dstw,
2007 sljit_s32 src1, sljit_sw src1w,
2008 sljit_s32 src2, sljit_sw src2w)
2009 {
2010 sljit_u8* inst;
2011 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
2012
2013 /* Register destination. */
2014 if (dst_r == src1 && src2 != SLJIT_IMM) {
2015 FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, src2, src2w));
2016 } else if (dst_r == src2 && src1 != SLJIT_IMM) {
2017 FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, src1, src1w));
2018 } else if (src1 == SLJIT_IMM) {
2019 if (src2 == SLJIT_IMM) {
2020 EMIT_MOV(compiler, dst_r, 0, SLJIT_IMM, src2w);
2021 src2 = dst_r;
2022 src2w = 0;
2023 }
2024
2025 if (src1w <= 127 && src1w >= -128) {
2026 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
2027 FAIL_IF(!inst);
2028 *inst = IMUL_r_rm_i8;
2029
2030 FAIL_IF(emit_byte(compiler, U8(src1w)));
2031 }
2032 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2033 else {
2034 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
2035 FAIL_IF(!inst);
2036 *inst = IMUL_r_rm_i32;
2037 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
2038 FAIL_IF(!inst);
2039 INC_SIZE(4);
2040 sljit_unaligned_store_sw(inst, src1w);
2041 }
2042 #else
2043 else if (IS_HALFWORD(src1w)) {
2044 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src2, src2w);
2045 FAIL_IF(!inst);
2046 *inst = IMUL_r_rm_i32;
2047 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
2048 FAIL_IF(!inst);
2049 INC_SIZE(4);
2050 sljit_unaligned_store_s32(inst, (sljit_s32)src1w);
2051 }
2052 else {
2053 if (dst_r != src2)
2054 EMIT_MOV(compiler, dst_r, 0, src2, src2w);
2055 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src1w));
2056 FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, TMP_REG2, 0));
2057 }
2058 #endif
2059 }
2060 else if (src2 == SLJIT_IMM) {
2061 /* Note: src1 is NOT immediate. */
2062
2063 if (src2w <= 127 && src2w >= -128) {
2064 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
2065 FAIL_IF(!inst);
2066 *inst = IMUL_r_rm_i8;
2067
2068 FAIL_IF(emit_byte(compiler, U8(src2w)));
2069 }
2070 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2071 else {
2072 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
2073 FAIL_IF(!inst);
2074 *inst = IMUL_r_rm_i32;
2075
2076 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
2077 FAIL_IF(!inst);
2078 INC_SIZE(4);
2079 sljit_unaligned_store_sw(inst, src2w);
2080 }
2081 #else
2082 else if (IS_HALFWORD(src2w)) {
2083 inst = emit_x86_instruction(compiler, 1, dst_r, 0, src1, src1w);
2084 FAIL_IF(!inst);
2085 *inst = IMUL_r_rm_i32;
2086
2087 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4);
2088 FAIL_IF(!inst);
2089 INC_SIZE(4);
2090 sljit_unaligned_store_s32(inst, (sljit_s32)src2w);
2091 } else {
2092 if (dst_r != src1)
2093 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
2094 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
2095 FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, TMP_REG2, 0));
2096 }
2097 #endif
2098 } else {
2099 /* Neither argument is immediate. */
2100 if (ADDRESSING_DEPENDS_ON(src2, dst_r))
2101 dst_r = TMP_REG1;
2102 EMIT_MOV(compiler, dst_r, 0, src1, src1w);
2103 FAIL_IF(emit_groupf(compiler, IMUL_r_rm, dst_r, src2, src2w));
2104 }
2105
2106 if (dst & SLJIT_MEM)
2107 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2108
2109 return SLJIT_SUCCESS;
2110 }
2111
2112 static sljit_s32 emit_lea_binary(struct sljit_compiler *compiler,
2113 sljit_s32 dst, sljit_sw dstw,
2114 sljit_s32 src1, sljit_sw src1w,
2115 sljit_s32 src2, sljit_sw src2w)
2116 {
2117 sljit_u8* inst;
2118 sljit_s32 dst_r, done = 0;
2119
2120 /* These cases better be left to handled by normal way. */
2121 if (dst == src1 && dstw == src1w)
2122 return SLJIT_ERR_UNSUPPORTED;
2123 if (dst == src2 && dstw == src2w)
2124 return SLJIT_ERR_UNSUPPORTED;
2125
2126 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
2127
2128 if (FAST_IS_REG(src1)) {
2129 if (FAST_IS_REG(src2)) {
2130 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM2(src1, src2), 0);
2131 FAIL_IF(!inst);
2132 *inst = LEA_r_m;
2133 done = 1;
2134 }
2135 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2136 if (src2 == SLJIT_IMM && (compiler->mode32 || IS_HALFWORD(src2w))) {
2137 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), (sljit_s32)src2w);
2138 #else
2139 if (src2 == SLJIT_IMM) {
2140 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src1), src2w);
2141 #endif
2142 FAIL_IF(!inst);
2143 *inst = LEA_r_m;
2144 done = 1;
2145 }
2146 }
2147 else if (FAST_IS_REG(src2)) {
2148 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2149 if (src1 == SLJIT_IMM && (compiler->mode32 || IS_HALFWORD(src1w))) {
2150 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), (sljit_s32)src1w);
2151 #else
2152 if (src1 == SLJIT_IMM) {
2153 inst = emit_x86_instruction(compiler, 1, dst_r, 0, SLJIT_MEM1(src2), src1w);
2154 #endif
2155 FAIL_IF(!inst);
2156 *inst = LEA_r_m;
2157 done = 1;
2158 }
2159 }
2160
2161 if (done) {
2162 if (dst_r == TMP_REG1)
2163 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2164 return SLJIT_SUCCESS;
2165 }
2166 return SLJIT_ERR_UNSUPPORTED;
2167 }
2168
2169 static sljit_s32 emit_cmp_binary(struct sljit_compiler *compiler,
2170 sljit_s32 src1, sljit_sw src1w,
2171 sljit_s32 src2, sljit_sw src2w)
2172 {
2173 sljit_u8* inst;
2174
2175 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2176 if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
2177 #else
2178 if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128)) {
2179 #endif
2180 BINARY_EAX_IMM(CMP_EAX_i32, src2w);
2181 return SLJIT_SUCCESS;
2182 }
2183
2184 if (FAST_IS_REG(src1)) {
2185 if (src2 == SLJIT_IMM) {
2186 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, 0);
2187 }
2188 else {
2189 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
2190 FAIL_IF(!inst);
2191 *inst = CMP_r_rm;
2192 }
2193 return SLJIT_SUCCESS;
2194 }
2195
2196 if (FAST_IS_REG(src2) && src1 != SLJIT_IMM) {
2197 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
2198 FAIL_IF(!inst);
2199 *inst = CMP_rm_r;
2200 return SLJIT_SUCCESS;
2201 }
2202
2203 if (src2 == SLJIT_IMM) {
2204 if (src1 == SLJIT_IMM) {
2205 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2206 src1 = TMP_REG1;
2207 src1w = 0;
2208 }
2209 BINARY_IMM(CMP, CMP_rm_r, src2w, src1, src1w);
2210 }
2211 else {
2212 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2213 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2214 FAIL_IF(!inst);
2215 *inst = CMP_r_rm;
2216 }
2217 return SLJIT_SUCCESS;
2218 }
2219
2220 static sljit_s32 emit_test_binary(struct sljit_compiler *compiler,
2221 sljit_s32 src1, sljit_sw src1w,
2222 sljit_s32 src2, sljit_sw src2w)
2223 {
2224 sljit_u8* inst;
2225
2226 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2227 if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128) && (compiler->mode32 || IS_HALFWORD(src2w))) {
2228 #else
2229 if (src1 == SLJIT_R0 && src2 == SLJIT_IMM && (src2w > 127 || src2w < -128)) {
2230 #endif
2231 BINARY_EAX_IMM(TEST_EAX_i32, src2w);
2232 return SLJIT_SUCCESS;
2233 }
2234
2235 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2236 if (src2 == SLJIT_R0 && src1 == SLJIT_IMM && (src1w > 127 || src1w < -128) && (compiler->mode32 || IS_HALFWORD(src1w))) {
2237 #else
2238 if (src2 == SLJIT_R0 && src1 == SLJIT_IMM && (src1w > 127 || src1w < -128)) {
2239 #endif
2240 BINARY_EAX_IMM(TEST_EAX_i32, src1w);
2241 return SLJIT_SUCCESS;
2242 }
2243
2244 if (src1 != SLJIT_IMM) {
2245 if (src2 == SLJIT_IMM) {
2246 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2247 if (IS_HALFWORD(src2w) || compiler->mode32) {
2248 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
2249 FAIL_IF(!inst);
2250 *inst = GROUP_F7;
2251 }
2252 else {
2253 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src2w));
2254 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src1, src1w);
2255 FAIL_IF(!inst);
2256 *inst = TEST_rm_r;
2257 }
2258 #else
2259 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, src1, src1w);
2260 FAIL_IF(!inst);
2261 *inst = GROUP_F7;
2262 #endif
2263 return SLJIT_SUCCESS;
2264 }
2265 else if (FAST_IS_REG(src1)) {
2266 inst = emit_x86_instruction(compiler, 1, src1, 0, src2, src2w);
2267 FAIL_IF(!inst);
2268 *inst = TEST_rm_r;
2269 return SLJIT_SUCCESS;
2270 }
2271 }
2272
2273 if (src2 != SLJIT_IMM) {
2274 if (src1 == SLJIT_IMM) {
2275 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2276 if (IS_HALFWORD(src1w) || compiler->mode32) {
2277 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src1w, src2, src2w);
2278 FAIL_IF(!inst);
2279 *inst = GROUP_F7;
2280 }
2281 else {
2282 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, src1w));
2283 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2284 FAIL_IF(!inst);
2285 *inst = TEST_rm_r;
2286 }
2287 #else
2288 inst = emit_x86_instruction(compiler, 1, src1, src1w, src2, src2w);
2289 FAIL_IF(!inst);
2290 *inst = GROUP_F7;
2291 #endif
2292 return SLJIT_SUCCESS;
2293 }
2294 else if (FAST_IS_REG(src2)) {
2295 inst = emit_x86_instruction(compiler, 1, src2, 0, src1, src1w);
2296 FAIL_IF(!inst);
2297 *inst = TEST_rm_r;
2298 return SLJIT_SUCCESS;
2299 }
2300 }
2301
2302 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2303 if (src2 == SLJIT_IMM) {
2304 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2305 if (IS_HALFWORD(src2w) || compiler->mode32) {
2306 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
2307 FAIL_IF(!inst);
2308 *inst = GROUP_F7;
2309 }
2310 else {
2311 FAIL_IF(emit_load_imm64(compiler, TMP_REG2, src2w));
2312 inst = emit_x86_instruction(compiler, 1, TMP_REG2, 0, TMP_REG1, 0);
2313 FAIL_IF(!inst);
2314 *inst = TEST_rm_r;
2315 }
2316 #else
2317 inst = emit_x86_instruction(compiler, 1, SLJIT_IMM, src2w, TMP_REG1, 0);
2318 FAIL_IF(!inst);
2319 *inst = GROUP_F7;
2320 #endif
2321 }
2322 else {
2323 inst = emit_x86_instruction(compiler, 1, TMP_REG1, 0, src2, src2w);
2324 FAIL_IF(!inst);
2325 *inst = TEST_rm_r;
2326 }
2327 return SLJIT_SUCCESS;
2328 }
2329
2330 static sljit_s32 emit_shift(struct sljit_compiler *compiler,
2331 sljit_u8 mode,
2332 sljit_s32 dst, sljit_sw dstw,
2333 sljit_s32 src1, sljit_sw src1w,
2334 sljit_s32 src2, sljit_sw src2w)
2335 {
2336 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2337 sljit_s32 mode32;
2338 #endif
2339 sljit_u8* inst;
2340
2341 if (src2 == SLJIT_IMM || src2 == SLJIT_PREF_SHIFT_REG) {
2342 if (dst == src1 && dstw == src1w) {
2343 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, dstw);
2344 FAIL_IF(!inst);
2345 inst[1] |= mode;
2346 return SLJIT_SUCCESS;
2347 }
2348 if (dst == SLJIT_PREF_SHIFT_REG && src2 == SLJIT_PREF_SHIFT_REG) {
2349 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2350 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2351 FAIL_IF(!inst);
2352 inst[1] |= mode;
2353 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2354 return SLJIT_SUCCESS;
2355 }
2356 if (FAST_IS_REG(dst)) {
2357 EMIT_MOV(compiler, dst, 0, src1, src1w);
2358 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, dst, 0);
2359 FAIL_IF(!inst);
2360 inst[1] |= mode;
2361 return SLJIT_SUCCESS;
2362 }
2363
2364 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2365 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, src2, src2w, TMP_REG1, 0);
2366 FAIL_IF(!inst);
2367 inst[1] |= mode;
2368 EMIT_MOV(compiler, dst, dstw, TMP_REG1, 0);
2369 return SLJIT_SUCCESS;
2370 }
2371
2372 if (dst == SLJIT_PREF_SHIFT_REG) {
2373 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2374 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2375 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2376 FAIL_IF(!inst);
2377 inst[1] |= mode;
2378 return emit_mov(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2379 }
2380
2381 if (FAST_IS_REG(dst) && dst != src2 && dst != TMP_REG1 && !ADDRESSING_DEPENDS_ON(src2, dst)) {
2382 if (src1 != dst)
2383 EMIT_MOV(compiler, dst, 0, src1, src1w);
2384 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2385 mode32 = compiler->mode32;
2386 compiler->mode32 = 0;
2387 #endif
2388 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2389 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2390 compiler->mode32 = mode32;
2391 #endif
2392 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2393 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, dst, 0);
2394 FAIL_IF(!inst);
2395 inst[1] |= mode;
2396 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2397 compiler->mode32 = 0;
2398 #endif
2399 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2400 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2401 compiler->mode32 = mode32;
2402 #endif
2403 return SLJIT_SUCCESS;
2404 }
2405
2406 /* This case is complex since ecx itself may be used for
2407 addressing, and this case must be supported as well. */
2408 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
2409 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2410 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0);
2411 #else /* !SLJIT_CONFIG_X86_32 */
2412 mode32 = compiler->mode32;
2413 compiler->mode32 = 0;
2414 EMIT_MOV(compiler, TMP_REG2, 0, SLJIT_PREF_SHIFT_REG, 0);
2415 compiler->mode32 = mode32;
2416 #endif /* SLJIT_CONFIG_X86_32 */
2417
2418 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src2, src2w);
2419 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2420 FAIL_IF(!inst);
2421 inst[1] |= mode;
2422
2423 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2424 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, SLJIT_MEM1(SLJIT_SP), 0);
2425 #else
2426 compiler->mode32 = 0;
2427 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG2, 0);
2428 compiler->mode32 = mode32;
2429 #endif /* SLJIT_CONFIG_X86_32 */
2430
2431 if (dst != TMP_REG1)
2432 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2433
2434 return SLJIT_SUCCESS;
2435 }
2436
2437 static sljit_s32 emit_shift_with_flags(struct sljit_compiler *compiler,
2438 sljit_u8 mode, sljit_s32 set_flags,
2439 sljit_s32 dst, sljit_sw dstw,
2440 sljit_s32 src1, sljit_sw src1w,
2441 sljit_s32 src2, sljit_sw src2w)
2442 {
2443 /* The CPU does not set flags if the shift count is 0. */
2444 if (src2 == SLJIT_IMM) {
2445 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2446 src2w &= compiler->mode32 ? 0x1f : 0x3f;
2447 #else /* !SLJIT_CONFIG_X86_64 */
2448 src2w &= 0x1f;
2449 #endif /* SLJIT_CONFIG_X86_64 */
2450 if (src2w != 0)
2451 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2452
2453 if (!set_flags)
2454 return emit_mov(compiler, dst, dstw, src1, src1w);
2455 /* OR dst, src, 0 */
2456 return emit_cum_binary(compiler, BINARY_OPCODE(OR),
2457 dst, dstw, src1, src1w, SLJIT_IMM, 0);
2458 }
2459
2460 if (!set_flags)
2461 return emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w);
2462
2463 if (!FAST_IS_REG(dst))
2464 FAIL_IF(emit_cmp_binary(compiler, src1, src1w, SLJIT_IMM, 0));
2465
2466 FAIL_IF(emit_shift(compiler, mode, dst, dstw, src1, src1w, src2, src2w));
2467
2468 if (FAST_IS_REG(dst))
2469 return emit_cmp_binary(compiler, dst, dstw, SLJIT_IMM, 0);
2470 return SLJIT_SUCCESS;
2471 }
2472
2473 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2474 sljit_s32 dst, sljit_sw dstw,
2475 sljit_s32 src1, sljit_sw src1w,
2476 sljit_s32 src2, sljit_sw src2w)
2477 {
2478 CHECK_ERROR();
2479 CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
2480 ADJUST_LOCAL_OFFSET(dst, dstw);
2481 ADJUST_LOCAL_OFFSET(src1, src1w);
2482 ADJUST_LOCAL_OFFSET(src2, src2w);
2483
2484 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2485 CHECK_EXTRA_REGS(src1, src1w, (void)0);
2486 CHECK_EXTRA_REGS(src2, src2w, (void)0);
2487 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2488 compiler->mode32 = op & SLJIT_32;
2489 #endif
2490
2491 SLJIT_ASSERT(dst != TMP_REG1 || HAS_FLAGS(op));
2492
2493 switch (GET_OPCODE(op)) {
2494 case SLJIT_ADD:
2495 if (!HAS_FLAGS(op)) {
2496 if (emit_lea_binary(compiler, dst, dstw, src1, src1w, src2, src2w) != SLJIT_ERR_UNSUPPORTED)
2497 return compiler->error;
2498 }
2499 return emit_cum_binary(compiler, BINARY_OPCODE(ADD),
2500 dst, dstw, src1, src1w, src2, src2w);
2501 case SLJIT_ADDC:
2502 return emit_cum_binary(compiler, BINARY_OPCODE(ADC),
2503 dst, dstw, src1, src1w, src2, src2w);
2504 case SLJIT_SUB:
2505 if (src1 == SLJIT_IMM && src1w == 0)
2506 return emit_unary(compiler, NEG_rm, dst, dstw, src2, src2w);
2507
2508 if (!HAS_FLAGS(op)) {
2509 if (src2 == SLJIT_IMM && emit_lea_binary(compiler, dst, dstw, src1, src1w, SLJIT_IMM, -src2w) != SLJIT_ERR_UNSUPPORTED)
2510 return compiler->error;
2511 if (FAST_IS_REG(dst) && src2 == dst) {
2512 FAIL_IF(emit_non_cum_binary(compiler, BINARY_OPCODE(SUB), dst, 0, dst, 0, src1, src1w));
2513 return emit_unary(compiler, NEG_rm, dst, 0, dst, 0);
2514 }
2515 }
2516
2517 return emit_non_cum_binary(compiler, BINARY_OPCODE(SUB),
2518 dst, dstw, src1, src1w, src2, src2w);
2519 case SLJIT_SUBC:
2520 return emit_non_cum_binary(compiler, BINARY_OPCODE(SBB),
2521 dst, dstw, src1, src1w, src2, src2w);
2522 case SLJIT_MUL:
2523 return emit_mul(compiler, dst, dstw, src1, src1w, src2, src2w);
2524 case SLJIT_AND:
2525 return emit_cum_binary(compiler, BINARY_OPCODE(AND),
2526 dst, dstw, src1, src1w, src2, src2w);
2527 case SLJIT_OR:
2528 return emit_cum_binary(compiler, BINARY_OPCODE(OR),
2529 dst, dstw, src1, src1w, src2, src2w);
2530 case SLJIT_XOR:
2531 if (!HAS_FLAGS(op)) {
2532 if (src2 == SLJIT_IMM && src2w == -1)
2533 return emit_unary(compiler, NOT_rm, dst, dstw, src1, src1w);
2534 if (src1 == SLJIT_IMM && src1w == -1)
2535 return emit_unary(compiler, NOT_rm, dst, dstw, src2, src2w);
2536 }
2537
2538 return emit_cum_binary(compiler, BINARY_OPCODE(XOR),
2539 dst, dstw, src1, src1w, src2, src2w);
2540 case SLJIT_SHL:
2541 case SLJIT_MSHL:
2542 return emit_shift_with_flags(compiler, SHL, HAS_FLAGS(op),
2543 dst, dstw, src1, src1w, src2, src2w);
2544 case SLJIT_LSHR:
2545 case SLJIT_MLSHR:
2546 return emit_shift_with_flags(compiler, SHR, HAS_FLAGS(op),
2547 dst, dstw, src1, src1w, src2, src2w);
2548 case SLJIT_ASHR:
2549 case SLJIT_MASHR:
2550 return emit_shift_with_flags(compiler, SAR, HAS_FLAGS(op),
2551 dst, dstw, src1, src1w, src2, src2w);
2552 case SLJIT_ROTL:
2553 return emit_shift_with_flags(compiler, ROL, 0,
2554 dst, dstw, src1, src1w, src2, src2w);
2555 case SLJIT_ROTR:
2556 return emit_shift_with_flags(compiler, ROR, 0,
2557 dst, dstw, src1, src1w, src2, src2w);
2558 }
2559
2560 return SLJIT_SUCCESS;
2561 }
2562
2563 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
2564 sljit_s32 src1, sljit_sw src1w,
2565 sljit_s32 src2, sljit_sw src2w)
2566 {
2567 sljit_s32 opcode = GET_OPCODE(op);
2568
2569 CHECK_ERROR();
2570 CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
2571
2572 if (opcode != SLJIT_SUB && opcode != SLJIT_AND) {
2573 SLJIT_SKIP_CHECKS(compiler);
2574 return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w);
2575 }
2576
2577 ADJUST_LOCAL_OFFSET(src1, src1w);
2578 ADJUST_LOCAL_OFFSET(src2, src2w);
2579
2580 CHECK_EXTRA_REGS(src1, src1w, (void)0);
2581 CHECK_EXTRA_REGS(src2, src2w, (void)0);
2582 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2583 compiler->mode32 = op & SLJIT_32;
2584 #endif
2585
2586 if (opcode == SLJIT_SUB) {
2587 return emit_cmp_binary(compiler, src1, src1w, src2, src2w);
2588 }
2589 return emit_test_binary(compiler, src1, src1w, src2, src2w);
2590 }
2591
2592 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
2593 sljit_s32 dst_reg,
2594 sljit_s32 src1_reg,
2595 sljit_s32 src2_reg,
2596 sljit_s32 src3, sljit_sw src3w)
2597 {
2598 sljit_s32 is_rotate, is_left, move_src1;
2599 sljit_u8* inst;
2600 sljit_sw src1w = 0;
2601 sljit_sw dstw = 0;
2602 /* The whole register must be saved even for 32 bit operations. */
2603 sljit_u8 restore_ecx = 0;
2604 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2605 sljit_sw src2w = 0;
2606 sljit_s32 restore_sp4 = 0;
2607 #endif /* SLJIT_CONFIG_X86_32 */
2608
2609 CHECK_ERROR();
2610 CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
2611 ADJUST_LOCAL_OFFSET(src3, src3w);
2612
2613 CHECK_EXTRA_REGS(dst_reg, dstw, (void)0);
2614 CHECK_EXTRA_REGS(src3, src3w, (void)0);
2615
2616 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2617 compiler->mode32 = op & SLJIT_32;
2618 #endif /* SLJIT_CONFIG_X86_64 */
2619
2620 if (src3 == SLJIT_IMM) {
2621 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2622 src3w &= 0x1f;
2623 #else /* !SLJIT_CONFIG_X86_32 */
2624 src3w &= (op & SLJIT_32) ? 0x1f : 0x3f;
2625 #endif /* SLJIT_CONFIG_X86_32 */
2626
2627 if (src3w == 0)
2628 return SLJIT_SUCCESS;
2629 }
2630
2631 is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL);
2632
2633 is_rotate = (src1_reg == src2_reg);
2634 CHECK_EXTRA_REGS(src1_reg, src1w, (void)0);
2635 CHECK_EXTRA_REGS(src2_reg, src2w, (void)0);
2636
2637 if (is_rotate)
2638 return emit_shift(compiler, is_left ? ROL : ROR, dst_reg, dstw, src1_reg, src1w, src3, src3w);
2639
2640 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2641 if (src2_reg & SLJIT_MEM) {
2642 EMIT_MOV(compiler, TMP_REG1, 0, src2_reg, src2w);
2643 src2_reg = TMP_REG1;
2644 }
2645 #endif /* SLJIT_CONFIG_X86_32 */
2646
2647 if (dst_reg == SLJIT_PREF_SHIFT_REG && src3 != SLJIT_IMM && (src3 != SLJIT_PREF_SHIFT_REG || src1_reg != SLJIT_PREF_SHIFT_REG)) {
2648 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2649 EMIT_MOV(compiler, TMP_REG1, 0, src1_reg, src1w);
2650 src1_reg = TMP_REG1;
2651 src1w = 0;
2652 #else /* !SLJIT_CONFIG_X86_64 */
2653 if (src2_reg != TMP_REG1) {
2654 EMIT_MOV(compiler, TMP_REG1, 0, src1_reg, src1w);
2655 src1_reg = TMP_REG1;
2656 src1w = 0;
2657 } else if ((src1_reg & SLJIT_MEM) || src1_reg == SLJIT_PREF_SHIFT_REG) {
2658 restore_sp4 = (src3 == SLJIT_R0) ? SLJIT_R1 : SLJIT_R0;
2659 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), restore_sp4, 0);
2660 EMIT_MOV(compiler, restore_sp4, 0, src1_reg, src1w);
2661 src1_reg = restore_sp4;
2662 src1w = 0;
2663 } else {
2664 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), src1_reg, 0);
2665 restore_sp4 = src1_reg;
2666 }
2667 #endif /* SLJIT_CONFIG_X86_64 */
2668
2669 if (src3 != SLJIT_PREF_SHIFT_REG)
2670 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src3, src3w);
2671 } else {
2672 if (src2_reg == SLJIT_PREF_SHIFT_REG && src3 != SLJIT_IMM && src3 != SLJIT_PREF_SHIFT_REG) {
2673 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2674 compiler->mode32 = 0;
2675 #endif /* SLJIT_CONFIG_X86_64 */
2676 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2677 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2678 compiler->mode32 = op & SLJIT_32;
2679 #endif /* SLJIT_CONFIG_X86_64 */
2680 src2_reg = TMP_REG1;
2681 restore_ecx = 1;
2682 }
2683
2684 move_src1 = 0;
2685 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2686 if (dst_reg != src1_reg) {
2687 if (dst_reg != src3) {
2688 EMIT_MOV(compiler, dst_reg, 0, src1_reg, src1w);
2689 src1_reg = dst_reg;
2690 src1w = 0;
2691 } else
2692 move_src1 = 1;
2693 }
2694 #else /* !SLJIT_CONFIG_X86_64 */
2695 if (dst_reg & SLJIT_MEM) {
2696 if (src2_reg != TMP_REG1) {
2697 EMIT_MOV(compiler, TMP_REG1, 0, src1_reg, src1w);
2698 src1_reg = TMP_REG1;
2699 src1w = 0;
2700 } else if ((src1_reg & SLJIT_MEM) || src1_reg == SLJIT_PREF_SHIFT_REG) {
2701 restore_sp4 = (src3 == SLJIT_R0) ? SLJIT_R1 : SLJIT_R0;
2702 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), restore_sp4, 0);
2703 EMIT_MOV(compiler, restore_sp4, 0, src1_reg, src1w);
2704 src1_reg = restore_sp4;
2705 src1w = 0;
2706 } else {
2707 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32), src1_reg, 0);
2708 restore_sp4 = src1_reg;
2709 }
2710 } else if (dst_reg != src1_reg) {
2711 if (dst_reg != src3) {
2712 EMIT_MOV(compiler, dst_reg, 0, src1_reg, src1w);
2713 src1_reg = dst_reg;
2714 src1w = 0;
2715 } else
2716 move_src1 = 1;
2717 }
2718 #endif /* SLJIT_CONFIG_X86_64 */
2719
2720 if (src3 != SLJIT_IMM && src3 != SLJIT_PREF_SHIFT_REG) {
2721 if (!restore_ecx) {
2722 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2723 compiler->mode32 = 0;
2724 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2725 compiler->mode32 = op & SLJIT_32;
2726 restore_ecx = 1;
2727 #else /* !SLJIT_CONFIG_X86_64 */
2728 if (src1_reg != TMP_REG1 && src2_reg != TMP_REG1) {
2729 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_PREF_SHIFT_REG, 0);
2730 restore_ecx = 1;
2731 } else {
2732 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_PREF_SHIFT_REG, 0);
2733 restore_ecx = 2;
2734 }
2735 #endif /* SLJIT_CONFIG_X86_64 */
2736 }
2737 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, src3, src3w);
2738 }
2739
2740 if (move_src1) {
2741 EMIT_MOV(compiler, dst_reg, 0, src1_reg, src1w);
2742 src1_reg = dst_reg;
2743 src1w = 0;
2744 }
2745 }
2746
2747 inst = emit_x86_instruction(compiler, 2, src2_reg, 0, src1_reg, src1w);
2748 FAIL_IF(!inst);
2749 inst[0] = GROUP_0F;
2750
2751 if (src3 == SLJIT_IMM) {
2752 inst[1] = U8((is_left ? SHLD : SHRD) - 1);
2753
2754 /* Immediate argument is added separately. */
2755 FAIL_IF(emit_byte(compiler, U8(src3w)));
2756 } else
2757 inst[1] = U8(is_left ? SHLD : SHRD);
2758
2759 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2760 if (restore_ecx) {
2761 compiler->mode32 = 0;
2762 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, TMP_REG1, 0);
2763 }
2764
2765 if (src1_reg != dst_reg) {
2766 compiler->mode32 = op & SLJIT_32;
2767 return emit_mov(compiler, dst_reg, dstw, src1_reg, 0);
2768 }
2769 #else /* !SLJIT_CONFIG_X86_64 */
2770 if (restore_ecx)
2771 EMIT_MOV(compiler, SLJIT_PREF_SHIFT_REG, 0, restore_ecx == 1 ? TMP_REG1 : SLJIT_MEM1(SLJIT_SP), 0);
2772
2773 if (src1_reg != dst_reg)
2774 EMIT_MOV(compiler, dst_reg, dstw, src1_reg, 0);
2775
2776 if (restore_sp4)
2777 return emit_mov(compiler, restore_sp4, 0, SLJIT_MEM1(SLJIT_SP), sizeof(sljit_s32));
2778 #endif /* SLJIT_CONFIG_X86_32 */
2779
2780 return SLJIT_SUCCESS;
2781 }
2782
2783 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
2784 sljit_s32 src, sljit_sw srcw)
2785 {
2786 CHECK_ERROR();
2787 CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
2788 ADJUST_LOCAL_OFFSET(src, srcw);
2789
2790 CHECK_EXTRA_REGS(src, srcw, (void)0);
2791
2792 switch (op) {
2793 case SLJIT_FAST_RETURN:
2794 return emit_fast_return(compiler, src, srcw);
2795 case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
2796 /* Don't adjust shadow stack if it isn't enabled. */
2797 if (!cpu_has_shadow_stack ())
2798 return SLJIT_SUCCESS;
2799 return adjust_shadow_stack(compiler, src, srcw);
2800 case SLJIT_PREFETCH_L1:
2801 case SLJIT_PREFETCH_L2:
2802 case SLJIT_PREFETCH_L3:
2803 case SLJIT_PREFETCH_ONCE:
2804 return emit_prefetch(compiler, op, src, srcw);
2805 }
2806
2807 return SLJIT_SUCCESS;
2808 }
2809
2810 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
2811 sljit_s32 dst, sljit_sw dstw)
2812 {
2813 CHECK_ERROR();
2814 CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
2815 ADJUST_LOCAL_OFFSET(dst, dstw);
2816
2817 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2818
2819 switch (op) {
2820 case SLJIT_FAST_ENTER:
2821 return emit_fast_enter(compiler, dst, dstw);
2822 case SLJIT_GET_RETURN_ADDRESS:
2823 return sljit_emit_get_return_address(compiler, dst, dstw);
2824 }
2825
2826 return SLJIT_SUCCESS;
2827 }
2828
2829 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
2830 {
2831 CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
2832
2833 if (type == SLJIT_GP_REGISTER) {
2834 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
2835 if (reg >= SLJIT_R3 && reg <= SLJIT_R8)
2836 return -1;
2837 #endif /* SLJIT_CONFIG_X86_32 */
2838 return reg_map[reg];
2839 }
2840
2841 if (type != SLJIT_FLOAT_REGISTER && type != SLJIT_SIMD_REG_128 && type != SLJIT_SIMD_REG_256 && type != SLJIT_SIMD_REG_512)
2842 return -1;
2843
2844 return freg_map[reg];
2845 }
2846
2847 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2848 void *instruction, sljit_u32 size)
2849 {
2850 sljit_u8 *inst;
2851
2852 CHECK_ERROR();
2853 CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2854
2855 inst = (sljit_u8*)ensure_buf(compiler, 1 + size);
2856 FAIL_IF(!inst);
2857 INC_SIZE(size);
2858 SLJIT_MEMCPY(inst, instruction, size);
2859 return SLJIT_SUCCESS;
2860 }
2861
2862 /* --------------------------------------------------------------------- */
2863 /* Floating point operators */
2864 /* --------------------------------------------------------------------- */
2865
2866 /* Alignment(3) + 4 * 16 bytes. */
2867 static sljit_u32 sse2_data[3 + (4 * 4)];
2868 static sljit_u32 *sse2_buffer;
2869
2870 static void init_compiler(void)
2871 {
2872 get_cpu_features();
2873
2874 /* Align to 16 bytes. */
2875 sse2_buffer = (sljit_u32*)(((sljit_uw)sse2_data + 15) & ~(sljit_uw)0xf);
2876
2877 /* Single precision constants (each constant is 16 byte long). */
2878 sse2_buffer[0] = 0x80000000;
2879 sse2_buffer[4] = 0x7fffffff;
2880 /* Double precision constants (each constant is 16 byte long). */
2881 sse2_buffer[8] = 0;
2882 sse2_buffer[9] = 0x80000000;
2883 sse2_buffer[12] = 0xffffffff;
2884 sse2_buffer[13] = 0x7fffffff;
2885 }
2886
2887 static sljit_s32 emit_groupf(struct sljit_compiler *compiler,
2888 sljit_uw op,
2889 sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
2890 {
2891 sljit_u8 *inst = emit_x86_instruction(compiler, 2 | (op & ~(sljit_uw)0xff), dst, 0, src, srcw);
2892 FAIL_IF(!inst);
2893 inst[0] = GROUP_0F;
2894 inst[1] = op & 0xff;
2895 return SLJIT_SUCCESS;
2896 }
2897
2898 static sljit_s32 emit_groupf_ext(struct sljit_compiler *compiler,
2899 sljit_uw op,
2900 sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
2901 {
2902 sljit_u8 *inst;
2903
2904 SLJIT_ASSERT((op & EX86_SSE2) && ((op & VEX_OP_0F38) || (op & VEX_OP_0F3A)));
2905
2906 inst = emit_x86_instruction(compiler, 3 | (op & ~((sljit_uw)0xff | VEX_OP_0F38 | VEX_OP_0F3A)), dst, 0, src, srcw);
2907 FAIL_IF(!inst);
2908 inst[0] = GROUP_0F;
2909 inst[1] = U8((op & VEX_OP_0F38) ? 0x38 : 0x3A);
2910 inst[2] = op & 0xff;
2911 return SLJIT_SUCCESS;
2912 }
2913
2914 static SLJIT_INLINE sljit_s32 emit_sse2_load(struct sljit_compiler *compiler,
2915 sljit_s32 single, sljit_s32 dst, sljit_s32 src, sljit_sw srcw)
2916 {
2917 return emit_groupf(compiler, MOVSD_x_xm | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, dst, src, srcw);
2918 }
2919
2920 static SLJIT_INLINE sljit_s32 emit_sse2_store(struct sljit_compiler *compiler,
2921 sljit_s32 single, sljit_s32 dst, sljit_sw dstw, sljit_s32 src)
2922 {
2923 return emit_groupf(compiler, MOVSD_xm_x | (single ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, src, dst, dstw);
2924 }
2925
2926 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
2927 sljit_s32 dst, sljit_sw dstw,
2928 sljit_s32 src, sljit_sw srcw)
2929 {
2930 sljit_s32 dst_r;
2931
2932 CHECK_EXTRA_REGS(dst, dstw, (void)0);
2933 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
2934
2935 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2936 if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)
2937 compiler->mode32 = 0;
2938 #endif
2939
2940 FAIL_IF(emit_groupf(compiler, CVTTSD2SI_r_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP2, dst_r, src, srcw));
2941
2942 if (dst & SLJIT_MEM)
2943 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
2944 return SLJIT_SUCCESS;
2945 }
2946
2947 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
2948 sljit_s32 dst, sljit_sw dstw,
2949 sljit_s32 src, sljit_sw srcw)
2950 {
2951 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
2952
2953 CHECK_EXTRA_REGS(src, srcw, (void)0);
2954
2955 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2956 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
2957 compiler->mode32 = 0;
2958 #endif
2959
2960 if (src == SLJIT_IMM) {
2961 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2962 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
2963 srcw = (sljit_s32)srcw;
2964 #endif
2965 EMIT_MOV(compiler, TMP_REG1, 0, src, srcw);
2966 src = TMP_REG1;
2967 srcw = 0;
2968 }
2969
2970 FAIL_IF(emit_groupf(compiler, CVTSI2SD_x_rm | EX86_SELECT_F2_F3(op) | EX86_SSE2_OP1, dst_r, src, srcw));
2971
2972 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
2973 compiler->mode32 = 1;
2974 #endif
2975 if (dst_r == TMP_FREG)
2976 return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
2977 return SLJIT_SUCCESS;
2978 }
2979
2980 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
2981 sljit_s32 src1, sljit_sw src1w,
2982 sljit_s32 src2, sljit_sw src2w)
2983 {
2984 switch (GET_FLAG_TYPE(op)) {
2985 case SLJIT_ORDERED_EQUAL:
2986 /* Also: SLJIT_UNORDERED_OR_NOT_EQUAL */
2987 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
2988 FAIL_IF(emit_groupf(compiler, CMPS_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, TMP_FREG, src2, src2w));
2989
2990 /* EQ */
2991 FAIL_IF(emit_byte(compiler, 0));
2992
2993 src1 = TMP_FREG;
2994 src2 = TMP_FREG;
2995 src2w = 0;
2996 break;
2997
2998 case SLJIT_ORDERED_LESS:
2999 case SLJIT_UNORDERED_OR_GREATER:
3000 /* Also: SLJIT_UNORDERED_OR_GREATER_EQUAL, SLJIT_ORDERED_LESS_EQUAL */
3001 if (!FAST_IS_REG(src2)) {
3002 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src2, src2w));
3003 src2 = TMP_FREG;
3004 }
3005
3006 return emit_groupf(compiler, UCOMISD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, src2, src1, src1w);
3007 }
3008
3009 if (!FAST_IS_REG(src1)) {
3010 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
3011 src1 = TMP_FREG;
3012 }
3013
3014 return emit_groupf(compiler, UCOMISD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, src1, src2, src2w);
3015 }
3016
3017 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
3018 sljit_s32 dst, sljit_sw dstw,
3019 sljit_s32 src, sljit_sw srcw)
3020 {
3021 sljit_s32 dst_r;
3022 sljit_u8 *inst;
3023
3024 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3025 compiler->mode32 = 1;
3026 #endif
3027
3028 CHECK_ERROR();
3029 SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
3030
3031 if (GET_OPCODE(op) == SLJIT_MOV_F64) {
3032 if (FAST_IS_REG(dst))
3033 return emit_sse2_load(compiler, op & SLJIT_32, dst, src, srcw);
3034 if (FAST_IS_REG(src))
3035 return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, src);
3036 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src, srcw));
3037 return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
3038 }
3039
3040 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) {
3041 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG;
3042 if (FAST_IS_REG(src)) {
3043 /* We overwrite the high bits of source. From SLJIT point of view,
3044 this is not an issue.
3045 Note: In SSE3, we could also use MOVDDUP and MOVSLDUP. */
3046 FAIL_IF(emit_groupf(compiler, UNPCKLPD_x_xm | ((op & SLJIT_32) ? EX86_PREF_66 : 0) | EX86_SSE2, src, src, 0));
3047 } else {
3048 FAIL_IF(emit_sse2_load(compiler, !(op & SLJIT_32), TMP_FREG, src, srcw));
3049 src = TMP_FREG;
3050 }
3051
3052 FAIL_IF(emit_groupf(compiler, CVTPD2PS_x_xm | ((op & SLJIT_32) ? EX86_PREF_66 : 0) | EX86_SSE2, dst_r, src, 0));
3053 if (dst_r == TMP_FREG)
3054 return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
3055 return SLJIT_SUCCESS;
3056 }
3057
3058 if (FAST_IS_REG(dst)) {
3059 dst_r = (dst == src) ? TMP_FREG : dst;
3060
3061 if (src & SLJIT_MEM)
3062 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src, srcw));
3063
3064 FAIL_IF(emit_groupf(compiler, PCMPEQD_x_xm | EX86_PREF_66 | EX86_SSE2, dst_r, dst_r, 0));
3065
3066 inst = emit_x86_instruction(compiler, 2 | EX86_PREF_66 | EX86_SSE2_OP2, 0, 0, dst_r, 0);
3067 inst[0] = GROUP_0F;
3068 /* Same as PSRLD_x / PSRLQ_x */
3069 inst[1] = (op & SLJIT_32) ? PSLLD_x_i8 : PSLLQ_x_i8;
3070
3071 if (GET_OPCODE(op) == SLJIT_ABS_F64) {
3072 inst[2] |= 2 << 3;
3073 FAIL_IF(emit_byte(compiler, 1));
3074 } else {
3075 inst[2] |= 6 << 3;
3076 FAIL_IF(emit_byte(compiler, ((op & SLJIT_32) ? 31 : 63)));
3077 }
3078
3079 if (dst_r != TMP_FREG)
3080 dst_r = (src & SLJIT_MEM) ? TMP_FREG : src;
3081 return emit_groupf(compiler, (GET_OPCODE(op) == SLJIT_NEG_F64 ? XORPD_x_xm : ANDPD_x_xm) | EX86_SSE2, dst, dst_r, 0);
3082 }
3083
3084 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src, srcw));
3085
3086 switch (GET_OPCODE(op)) {
3087 case SLJIT_NEG_F64:
3088 FAIL_IF(emit_groupf(compiler, XORPD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, TMP_FREG, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer : sse2_buffer + 8)));
3089 break;
3090
3091 case SLJIT_ABS_F64:
3092 FAIL_IF(emit_groupf(compiler, ANDPD_x_xm | EX86_SELECT_66(op) | EX86_SSE2, TMP_FREG, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer + 4 : sse2_buffer + 12)));
3093 break;
3094 }
3095
3096 return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
3097 }
3098
3099 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
3100 sljit_s32 dst, sljit_sw dstw,
3101 sljit_s32 src1, sljit_sw src1w,
3102 sljit_s32 src2, sljit_sw src2w)
3103 {
3104 sljit_s32 dst_r;
3105
3106 CHECK_ERROR();
3107 CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
3108 ADJUST_LOCAL_OFFSET(dst, dstw);
3109 ADJUST_LOCAL_OFFSET(src1, src1w);
3110 ADJUST_LOCAL_OFFSET(src2, src2w);
3111
3112 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3113 compiler->mode32 = 1;
3114 #endif
3115
3116 if (FAST_IS_REG(dst)) {
3117 dst_r = dst;
3118 if (dst == src1)
3119 ; /* Do nothing here. */
3120 else if (dst == src2 && (op == SLJIT_ADD_F64 || op == SLJIT_MUL_F64)) {
3121 /* Swap arguments. */
3122 src2 = src1;
3123 src2w = src1w;
3124 }
3125 else if (dst != src2)
3126 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_r, src1, src1w));
3127 else {
3128 dst_r = TMP_FREG;
3129 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
3130 }
3131 }
3132 else {
3133 dst_r = TMP_FREG;
3134 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
3135 }
3136
3137 switch (GET_OPCODE(op)) {
3138 case SLJIT_ADD_F64:
3139 FAIL_IF(emit_groupf(compiler, ADDSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w));
3140 break;
3141
3142 case SLJIT_SUB_F64:
3143 FAIL_IF(emit_groupf(compiler, SUBSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w));
3144 break;
3145
3146 case SLJIT_MUL_F64:
3147 FAIL_IF(emit_groupf(compiler, MULSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w));
3148 break;
3149
3150 case SLJIT_DIV_F64:
3151 FAIL_IF(emit_groupf(compiler, DIVSD_x_xm | EX86_SELECT_F2_F3(op) | EX86_SSE2, dst_r, src2, src2w));
3152 break;
3153 }
3154
3155 if (dst_r == TMP_FREG)
3156 return emit_sse2_store(compiler, op & SLJIT_32, dst, dstw, TMP_FREG);
3157 return SLJIT_SUCCESS;
3158 }
3159
3160 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op,
3161 sljit_s32 dst_freg,
3162 sljit_s32 src1, sljit_sw src1w,
3163 sljit_s32 src2, sljit_sw src2w)
3164 {
3165 sljit_uw pref;
3166
3167 CHECK_ERROR();
3168 CHECK(check_sljit_emit_fop2r(compiler, op, dst_freg, src1, src1w, src2, src2w));
3169 ADJUST_LOCAL_OFFSET(src1, src1w);
3170 ADJUST_LOCAL_OFFSET(src2, src2w);
3171
3172 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3173 compiler->mode32 = 1;
3174 #endif
3175
3176 if (dst_freg == src1) {
3177 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src2, src2w));
3178 pref = EX86_SELECT_66(op) | EX86_SSE2;
3179 FAIL_IF(emit_groupf(compiler, XORPD_x_xm | pref, TMP_FREG, src1, src1w));
3180 FAIL_IF(emit_groupf(compiler, ANDPD_x_xm | pref, TMP_FREG, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer : sse2_buffer + 8)));
3181 return emit_groupf(compiler, XORPD_x_xm | pref, dst_freg, TMP_FREG, 0);
3182 }
3183
3184 if (src1 & SLJIT_MEM) {
3185 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, TMP_FREG, src1, src1w));
3186 src1 = TMP_FREG;
3187 src1w = 0;
3188 }
3189
3190 if (dst_freg != src2)
3191 FAIL_IF(emit_sse2_load(compiler, op & SLJIT_32, dst_freg, src2, src2w));
3192
3193 pref = EX86_SELECT_66(op) | EX86_SSE2;
3194 FAIL_IF(emit_groupf(compiler, XORPD_x_xm | pref, dst_freg, src1, src1w));
3195 FAIL_IF(emit_groupf(compiler, ANDPD_x_xm | pref, dst_freg, SLJIT_MEM0(), (sljit_sw)((op & SLJIT_32) ? sse2_buffer : sse2_buffer + 8)));
3196 return emit_groupf(compiler, XORPD_x_xm | pref, dst_freg, src1, src1w);
3197 }
3198
3199 /* --------------------------------------------------------------------- */
3200 /* Conditional instructions */
3201 /* --------------------------------------------------------------------- */
3202
3203 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
3204 {
3205 sljit_u8 *inst;
3206 struct sljit_label *label;
3207
3208 CHECK_ERROR_PTR();
3209 CHECK_PTR(check_sljit_emit_label(compiler));
3210
3211 if (compiler->last_label && compiler->last_label->size == compiler->size)
3212 return compiler->last_label;
3213
3214 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
3215 PTR_FAIL_IF(!label);
3216 set_label(label, compiler);
3217
3218 inst = (sljit_u8*)ensure_buf(compiler, 2);
3219 PTR_FAIL_IF(!inst);
3220 inst[0] = 0;
3221 inst[1] = 0;
3222
3223 return label;
3224 }
3225
3226 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
3227 {
3228 sljit_u8 *inst;
3229 struct sljit_jump *jump;
3230
3231 CHECK_ERROR_PTR();
3232 CHECK_PTR(check_sljit_emit_jump(compiler, type));
3233
3234 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
3235 PTR_FAIL_IF_NULL(jump);
3236 set_jump(jump, compiler, (sljit_u32)((type & SLJIT_REWRITABLE_JUMP) | ((type & 0xff) << TYPE_SHIFT)));
3237 type &= 0xff;
3238
3239 /* Worst case size. */
3240 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3241 compiler->size += (type >= SLJIT_JUMP) ? 5 : 6;
3242 #else
3243 compiler->size += (type >= SLJIT_JUMP) ? (10 + 3) : (2 + 10 + 3);
3244 #endif
3245
3246 inst = (sljit_u8*)ensure_buf(compiler, 2);
3247 PTR_FAIL_IF_NULL(inst);
3248
3249 inst[0] = 0;
3250 inst[1] = 1;
3251 return jump;
3252 }
3253
3254 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
3255 {
3256 sljit_u8 *inst;
3257 struct sljit_jump *jump;
3258
3259 CHECK_ERROR();
3260 CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
3261 ADJUST_LOCAL_OFFSET(src, srcw);
3262
3263 CHECK_EXTRA_REGS(src, srcw, (void)0);
3264
3265 if (src == SLJIT_IMM) {
3266 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
3267 FAIL_IF_NULL(jump);
3268 set_jump(jump, compiler, (sljit_u32)(JUMP_ADDR | (type << TYPE_SHIFT)));
3269 jump->u.target = (sljit_uw)srcw;
3270
3271 /* Worst case size. */
3272 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3273 compiler->size += 5;
3274 #else
3275 compiler->size += 10 + 3;
3276 #endif
3277
3278 inst = (sljit_u8*)ensure_buf(compiler, 2);
3279 FAIL_IF_NULL(inst);
3280
3281 inst[0] = 0;
3282 inst[1] = 1;
3283 }
3284 else {
3285 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3286 /* REX_W is not necessary (src is not immediate). */
3287 compiler->mode32 = 1;
3288 #endif
3289 inst = emit_x86_instruction(compiler, 1, 0, 0, src, srcw);
3290 FAIL_IF(!inst);
3291 inst[0] = GROUP_FF;
3292 inst[1] = U8(inst[1] | ((type >= SLJIT_FAST_CALL) ? CALL_rm : JMP_rm));
3293 }
3294 return SLJIT_SUCCESS;
3295 }
3296
3297 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
3298 sljit_s32 dst, sljit_sw dstw,
3299 sljit_s32 type)
3300 {
3301 sljit_u8 *inst;
3302 sljit_u8 cond_set;
3303 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3304 sljit_s32 reg;
3305 #endif /* !SLJIT_CONFIG_X86_64 */
3306 /* ADJUST_LOCAL_OFFSET and CHECK_EXTRA_REGS might overwrite these values. */
3307 sljit_s32 dst_save = dst;
3308 sljit_sw dstw_save = dstw;
3309
3310 CHECK_ERROR();
3311 CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
3312
3313 ADJUST_LOCAL_OFFSET(dst, dstw);
3314 CHECK_EXTRA_REGS(dst, dstw, (void)0);
3315
3316 /* setcc = jcc + 0x10. */
3317 cond_set = U8(get_jump_code((sljit_uw)type) + 0x10);
3318
3319 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3320 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst)) {
3321 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 3);
3322 FAIL_IF(!inst);
3323 INC_SIZE(4 + 3);
3324 /* Set low register to conditional flag. */
3325 inst[0] = (reg_map[TMP_REG1] <= 7) ? REX : REX_B;
3326 inst[1] = GROUP_0F;
3327 inst[2] = cond_set;
3328 inst[3] = MOD_REG | reg_lmap[TMP_REG1];
3329 inst[4] = U8(REX | (reg_map[TMP_REG1] <= 7 ? 0 : REX_R) | (reg_map[dst] <= 7 ? 0 : REX_B));
3330 inst[5] = OR_rm8_r8;
3331 inst[6] = U8(MOD_REG | (reg_lmap[TMP_REG1] << 3) | reg_lmap[dst]);
3332 return SLJIT_SUCCESS;
3333 }
3334
3335 reg = (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG1;
3336
3337 inst = (sljit_u8*)ensure_buf(compiler, 1 + 4 + 4);
3338 FAIL_IF(!inst);
3339 INC_SIZE(4 + 4);
3340 /* Set low register to conditional flag. */
3341 inst[0] = (reg_map[reg] <= 7) ? REX : REX_B;
3342 inst[1] = GROUP_0F;
3343 inst[2] = cond_set;
3344 inst[3] = MOD_REG | reg_lmap[reg];
3345 inst[4] = REX_W | (reg_map[reg] <= 7 ? 0 : (REX_B | REX_R));
3346 /* The movzx instruction does not affect flags. */
3347 inst[5] = GROUP_0F;
3348 inst[6] = MOVZX_r_rm8;
3349 inst[7] = U8(MOD_REG | (reg_lmap[reg] << 3) | reg_lmap[reg]);
3350
3351 if (reg != TMP_REG1)
3352 return SLJIT_SUCCESS;
3353
3354 if (GET_OPCODE(op) < SLJIT_ADD) {
3355 compiler->mode32 = GET_OPCODE(op) != SLJIT_MOV;
3356 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
3357 }
3358
3359 SLJIT_SKIP_CHECKS(compiler);
3360 return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
3361
3362 #else /* !SLJIT_CONFIG_X86_64 */
3363 SLJIT_ASSERT(reg_map[TMP_REG1] < 4);
3364
3365 /* The SLJIT_CONFIG_X86_32 code path starts here. */
3366 if (GET_OPCODE(op) < SLJIT_ADD && FAST_IS_REG(dst) && reg_map[dst] <= 4) {
3367 /* Low byte is accessible. */
3368 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3);
3369 FAIL_IF(!inst);
3370 INC_SIZE(3 + 3);
3371 /* Set low byte to conditional flag. */
3372 inst[0] = GROUP_0F;
3373 inst[1] = cond_set;
3374 inst[2] = U8(MOD_REG | reg_map[dst]);
3375
3376 inst[3] = GROUP_0F;
3377 inst[4] = MOVZX_r_rm8;
3378 inst[5] = U8(MOD_REG | (reg_map[dst] << 3) | reg_map[dst]);
3379 return SLJIT_SUCCESS;
3380 }
3381
3382 if (GET_OPCODE(op) == SLJIT_OR && !GET_ALL_FLAGS(op) && FAST_IS_REG(dst) && reg_map[dst] <= 4) {
3383 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 2);
3384 FAIL_IF(!inst);
3385 INC_SIZE(3 + 2);
3386
3387 /* Set low byte to conditional flag. */
3388 inst[0] = GROUP_0F;
3389 inst[1] = cond_set;
3390 inst[2] = U8(MOD_REG | reg_map[TMP_REG1]);
3391
3392 inst[3] = OR_rm8_r8;
3393 inst[4] = U8(MOD_REG | (reg_map[TMP_REG1] << 3) | reg_map[dst]);
3394 return SLJIT_SUCCESS;
3395 }
3396
3397 inst = (sljit_u8*)ensure_buf(compiler, 1 + 3 + 3);
3398 FAIL_IF(!inst);
3399 INC_SIZE(3 + 3);
3400 /* Set low byte to conditional flag. */
3401 inst[0] = GROUP_0F;
3402 inst[1] = cond_set;
3403 inst[2] = U8(MOD_REG | reg_map[TMP_REG1]);
3404
3405 inst[3] = GROUP_0F;
3406 inst[4] = MOVZX_r_rm8;
3407 inst[5] = U8(MOD_REG | (reg_map[TMP_REG1] << 3) | reg_map[TMP_REG1]);
3408
3409 if (GET_OPCODE(op) < SLJIT_ADD)
3410 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
3411
3412 SLJIT_SKIP_CHECKS(compiler);
3413 return sljit_emit_op2(compiler, op, dst_save, dstw_save, dst_save, dstw_save, TMP_REG1, 0);
3414 #endif /* SLJIT_CONFIG_X86_64 */
3415 }
3416
3417 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
3418 sljit_s32 dst_reg,
3419 sljit_s32 src1, sljit_sw src1w,
3420 sljit_s32 src2_reg)
3421 {
3422 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3423 sljit_s32 dst = dst_reg;
3424 sljit_sw dstw = 0;
3425 #endif /* SLJIT_CONFIG_X86_32 */
3426 sljit_sw src2w = 0;
3427
3428 CHECK_ERROR();
3429 CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
3430
3431 ADJUST_LOCAL_OFFSET(src1, src1w);
3432
3433 CHECK_EXTRA_REGS(dst, dstw, (void)0);
3434 CHECK_EXTRA_REGS(src1, src1w, (void)0);
3435 CHECK_EXTRA_REGS(src2_reg, src2w, (void)0);
3436
3437 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3438 compiler->mode32 = type & SLJIT_32;
3439 #endif /* SLJIT_CONFIG_X86_64 */
3440 type &= ~SLJIT_32;
3441
3442 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3443 if (dst & SLJIT_MEM) {
3444 if (src1 == SLJIT_IMM || (!(src1 & SLJIT_MEM) && (src2_reg & SLJIT_MEM))) {
3445 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
3446 src1 = src2_reg;
3447 src1w = src2w;
3448 type ^= 0x1;
3449 } else
3450 EMIT_MOV(compiler, TMP_REG1, 0, src2_reg, src2w);
3451
3452 dst_reg = TMP_REG1;
3453 } else {
3454 #endif /* SLJIT_CONFIG_X86_32 */
3455 if (dst_reg != src2_reg) {
3456 if (dst_reg == src1) {
3457 src1 = src2_reg;
3458 src1w = src2w;
3459 type ^= 0x1;
3460 } else {
3461 if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) {
3462 EMIT_MOV(compiler, dst_reg, 0, src1, src1w);
3463 src1 = src2_reg;
3464 src1w = src2w;
3465 type ^= 0x1;
3466 } else
3467 EMIT_MOV(compiler, dst_reg, 0, src2_reg, src2w);
3468 }
3469 }
3470
3471 if (SLJIT_UNLIKELY(src1 == SLJIT_IMM)) {
3472 SLJIT_ASSERT(dst_reg != TMP_REG1);
3473 EMIT_MOV(compiler, TMP_REG1, 0, src1, src1w);
3474 src1 = TMP_REG1;
3475 src1w = 0;
3476 }
3477 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3478 }
3479 #endif /* SLJIT_CONFIG_X86_32 */
3480
3481 if (sljit_has_cpu_feature(SLJIT_HAS_CMOV))
3482 FAIL_IF(emit_groupf(compiler, U8(get_jump_code((sljit_uw)type) - 0x40), dst_reg, src1, src1w));
3483 else
3484 FAIL_IF(emit_cmov_generic(compiler, type, dst_reg, src1, src1w));
3485
3486 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3487 if (dst_reg == TMP_REG1)
3488 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
3489 #endif /* SLJIT_CONFIG_X86_32 */
3490 return SLJIT_SUCCESS;
3491 }
3492
3493 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
3494 sljit_s32 dst_freg,
3495 sljit_s32 src1, sljit_sw src1w,
3496 sljit_s32 src2_freg)
3497 {
3498 sljit_u8* inst;
3499 sljit_uw size;
3500
3501 CHECK_ERROR();
3502 CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
3503
3504 ADJUST_LOCAL_OFFSET(src1, src1w);
3505
3506 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3507 compiler->mode32 = 1;
3508 #endif /* SLJIT_CONFIG_X86_64 */
3509
3510 if (dst_freg != src2_freg) {
3511 if (dst_freg == src1) {
3512 src1 = src2_freg;
3513 src1w = 0;
3514 type ^= 0x1;
3515 } else
3516 FAIL_IF(emit_sse2_load(compiler, type & SLJIT_32, dst_freg, src2_freg, 0));
3517 }
3518
3519 inst = (sljit_u8*)ensure_buf(compiler, 1 + 2);
3520 FAIL_IF(!inst);
3521 INC_SIZE(2);
3522 inst[0] = U8(get_jump_code((sljit_uw)(type & ~SLJIT_32) ^ 0x1) - 0x10);
3523
3524 size = compiler->size;
3525 FAIL_IF(emit_sse2_load(compiler, type & SLJIT_32, dst_freg, src1, src1w));
3526
3527 inst[1] = U8(compiler->size - size);
3528 return SLJIT_SUCCESS;
3529 }
3530
3531 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
3532 sljit_s32 freg,
3533 sljit_s32 srcdst, sljit_sw srcdstw)
3534 {
3535 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3536 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3537 sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type);
3538 sljit_uw op;
3539
3540 CHECK_ERROR();
3541 CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw));
3542
3543 ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3544
3545 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3546 compiler->mode32 = 1;
3547 #endif /* SLJIT_CONFIG_X86_64 */
3548
3549 switch (reg_size) {
3550 case 4:
3551 op = EX86_SSE2;
3552 break;
3553 case 5:
3554 if (!(cpu_feature_list & CPU_FEATURE_AVX2))
3555 return SLJIT_ERR_UNSUPPORTED;
3556 op = EX86_SSE2 | VEX_256;
3557 break;
3558 default:
3559 return SLJIT_ERR_UNSUPPORTED;
3560 }
3561
3562 if (!(srcdst & SLJIT_MEM))
3563 alignment = reg_size;
3564
3565 if (type & SLJIT_SIMD_FLOAT) {
3566 if (elem_size == 2 || elem_size == 3) {
3567 op |= alignment >= reg_size ? MOVAPS_x_xm : MOVUPS_x_xm;
3568
3569 if (elem_size == 3)
3570 op |= EX86_PREF_66;
3571
3572 if (type & SLJIT_SIMD_STORE)
3573 op += 1;
3574 } else
3575 return SLJIT_ERR_UNSUPPORTED;
3576 } else {
3577 op |= ((type & SLJIT_SIMD_STORE) ? MOVDQA_xm_x : MOVDQA_x_xm)
3578 | (alignment >= reg_size ? EX86_PREF_66 : EX86_PREF_F3);
3579 }
3580
3581 if (type & SLJIT_SIMD_TEST)
3582 return SLJIT_SUCCESS;
3583
3584 if (op & VEX_256)
3585 return emit_vex_instruction(compiler, op, freg, 0, srcdst, srcdstw);
3586
3587 return emit_groupf(compiler, op, freg, srcdst, srcdstw);
3588 }
3589
3590 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
3591 sljit_s32 freg,
3592 sljit_s32 src, sljit_sw srcw)
3593 {
3594 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3595 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3596 sljit_u8 *inst;
3597 sljit_u8 opcode = 0;
3598 sljit_uw size;
3599
3600 CHECK_ERROR();
3601 CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw));
3602
3603 ADJUST_LOCAL_OFFSET(src, srcw);
3604
3605 if (!(type & SLJIT_SIMD_FLOAT)) {
3606 CHECK_EXTRA_REGS(src, srcw, (void)0);
3607 }
3608
3609 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3610 if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2))
3611 return SLJIT_ERR_UNSUPPORTED;
3612 #else /* !SLJIT_CONFIG_X86_32 */
3613 compiler->mode32 = 1;
3614
3615 if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2))
3616 return SLJIT_ERR_UNSUPPORTED;
3617 #endif /* SLJIT_CONFIG_X86_32 */
3618
3619 if (cpu_feature_list & CPU_FEATURE_AVX2) {
3620 if (reg_size < 4 || reg_size > 5)
3621 return SLJIT_ERR_UNSUPPORTED;
3622
3623 if (src != SLJIT_IMM && (reg_size == 5 || elem_size < 3 || !(type & SLJIT_SIMD_FLOAT))) {
3624 if (type & SLJIT_SIMD_TEST)
3625 return SLJIT_SUCCESS;
3626
3627 if (!(src & SLJIT_MEM) && !(type & SLJIT_SIMD_FLOAT)) {
3628 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3629 if (elem_size >= 3)
3630 compiler->mode32 = 0;
3631 #endif /* SLJIT_CONFIG_X86_64 */
3632 FAIL_IF(emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, freg, src, srcw));
3633 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3634 compiler->mode32 = 1;
3635 #endif /* SLJIT_CONFIG_X86_64 */
3636 src = freg;
3637 srcw = 0;
3638 }
3639
3640 switch (elem_size) {
3641 case 0:
3642 size = VPBROADCASTB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
3643 break;
3644 case 1:
3645 size = VPBROADCASTW_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
3646 break;
3647 case 2:
3648 size = ((type & SLJIT_SIMD_FLOAT) ? VBROADCASTSS_x_xm : VPBROADCASTD_x_xm) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
3649 break;
3650 default:
3651 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3652 size = VBROADCASTSD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
3653 #else /* !SLJIT_CONFIG_X86_32 */
3654 size = ((type & SLJIT_SIMD_FLOAT) ? VBROADCASTSD_x_xm : VPBROADCASTQ_x_xm) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
3655 #endif /* SLJIT_CONFIG_X86_32 */
3656 break;
3657 }
3658
3659 if (reg_size == 5)
3660 size |= VEX_256;
3661
3662 return emit_vex_instruction(compiler, size, freg, 0, src, srcw);
3663 }
3664 } else if (reg_size != 4)
3665 return SLJIT_ERR_UNSUPPORTED;
3666
3667 if (type & SLJIT_SIMD_TEST)
3668 return SLJIT_SUCCESS;
3669
3670 if (type & SLJIT_SIMD_FLOAT) {
3671 if (src == SLJIT_IMM) {
3672 if (reg_size == 5)
3673 return emit_vex_instruction(compiler, XORPD_x_xm | VEX_256 | (elem_size == 3 ? EX86_PREF_66 : 0) | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, freg, 0);
3674
3675 return emit_groupf(compiler, XORPD_x_xm | (elem_size == 3 ? EX86_PREF_66 : 0) | EX86_SSE2, freg, freg, 0);
3676 }
3677
3678 if (elem_size == 2 && freg != src) {
3679 FAIL_IF(emit_sse2_load(compiler, 1, freg, src, srcw));
3680 src = freg;
3681 srcw = 0;
3682 }
3683
3684 FAIL_IF(emit_groupf(compiler, (elem_size == 2 ? SHUFPS_x_xm : MOVDDUP_x_xm) | (elem_size == 2 ? 0 : EX86_PREF_F2) | EX86_SSE2, freg, src, srcw));
3685
3686 if (elem_size == 2)
3687 return emit_byte(compiler, 0);
3688 return SLJIT_SUCCESS;
3689 }
3690
3691 if (src == SLJIT_IMM) {
3692 if (elem_size == 0) {
3693 srcw = (sljit_u8)srcw;
3694 srcw |= srcw << 8;
3695 srcw |= srcw << 16;
3696 elem_size = 2;
3697 } else if (elem_size == 1) {
3698 srcw = (sljit_u16)srcw;
3699 srcw |= srcw << 16;
3700 elem_size = 2;
3701 }
3702
3703 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3704 if (elem_size == 2 && (sljit_s32)srcw == -1)
3705 srcw = -1;
3706 #endif /* SLJIT_CONFIG_X86_64 */
3707
3708 if (srcw == 0 || srcw == -1) {
3709 if (reg_size == 5)
3710 return emit_vex_instruction(compiler, (srcw == 0 ? PXOR_x_xm : PCMPEQD_x_xm) | VEX_256 | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, freg, 0);
3711
3712 return emit_groupf(compiler, (srcw == 0 ? PXOR_x_xm : PCMPEQD_x_xm) | EX86_PREF_66 | EX86_SSE2, freg, freg, 0);
3713 }
3714
3715 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3716 if (elem_size == 3)
3717 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, srcw));
3718 else
3719 #endif /* SLJIT_CONFIG_X86_64 */
3720 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcw);
3721
3722 src = TMP_REG1;
3723 srcw = 0;
3724 }
3725
3726 size = 2;
3727 opcode = MOVD_x_rm;
3728
3729 switch (elem_size) {
3730 case 0:
3731 if (!FAST_IS_REG(src)) {
3732 opcode = 0x3a /* Prefix of PINSRB_x_rm_i8. */;
3733 size = 3;
3734 }
3735 break;
3736 case 1:
3737 if (!FAST_IS_REG(src))
3738 opcode = PINSRW_x_rm_i8;
3739 break;
3740 case 2:
3741 break;
3742 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3743 case 3:
3744 /* MOVQ */
3745 compiler->mode32 = 0;
3746 break;
3747 #endif /* SLJIT_CONFIG_X86_64 */
3748 }
3749
3750 inst = emit_x86_instruction(compiler, size | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, src, srcw);
3751 FAIL_IF(!inst);
3752 inst[0] = GROUP_0F;
3753 inst[1] = opcode;
3754
3755 if (reg_size == 5) {
3756 SLJIT_ASSERT(opcode == MOVD_x_rm);
3757 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3758 size = VPBROADCASTD_x_xm;
3759 #else /* !SLJIT_CONFIG_X86_32 */
3760 size = (elem_size == 3) ? VPBROADCASTQ_x_xm : VPBROADCASTD_x_xm;
3761 #endif /* SLJIT_CONFIG_X86_32 */
3762 return emit_vex_instruction(compiler, size | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, freg, 0);
3763 }
3764
3765 if (size == 3) {
3766 SLJIT_ASSERT(opcode == 0x3a);
3767 inst[2] = PINSRB_x_rm_i8;
3768 }
3769
3770 if (opcode != MOVD_x_rm)
3771 FAIL_IF(emit_byte(compiler, 0));
3772
3773 switch (elem_size) {
3774 case 0:
3775 FAIL_IF(emit_groupf(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, TMP_FREG, 0));
3776 return emit_groupf_ext(compiler, PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, TMP_FREG, 0);
3777 case 1:
3778 FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | EX86_PREF_F2 | EX86_SSE2, freg, freg, 0));
3779 FAIL_IF(emit_byte(compiler, 0));
3780 /* fallthrough */
3781 default:
3782 FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, freg, 0));
3783 return emit_byte(compiler, 0);
3784 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3785 case 3:
3786 compiler->mode32 = 1;
3787 FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, freg, 0));
3788 return emit_byte(compiler, 0x44);
3789 #endif /* SLJIT_CONFIG_X86_64 */
3790 }
3791 }
3792
3793 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
3794 sljit_s32 freg, sljit_s32 lane_index,
3795 sljit_s32 srcdst, sljit_sw srcdstw)
3796 {
3797 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3798 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3799 sljit_u8 *inst;
3800 sljit_u8 opcode = 0;
3801 sljit_uw size;
3802 sljit_s32 freg_orig = freg;
3803 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3804 sljit_s32 srcdst_is_ereg = 0;
3805 sljit_s32 srcdst_orig = 0;
3806 sljit_sw srcdstw_orig = 0;
3807 #endif /* SLJIT_CONFIG_X86_32 */
3808
3809 CHECK_ERROR();
3810 CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw));
3811
3812 ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3813
3814 if (reg_size == 5) {
3815 if (!(cpu_feature_list & CPU_FEATURE_AVX2))
3816 return SLJIT_ERR_UNSUPPORTED;
3817 } else if (reg_size != 4)
3818 return SLJIT_ERR_UNSUPPORTED;
3819
3820 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
3821 if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : elem_size > 2)
3822 return SLJIT_ERR_UNSUPPORTED;
3823 #else /* SLJIT_CONFIG_X86_32 */
3824 if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2))
3825 return SLJIT_ERR_UNSUPPORTED;
3826 #endif /* SLJIT_CONFIG_X86_32 */
3827
3828 if (type & SLJIT_SIMD_TEST)
3829 return SLJIT_SUCCESS;
3830
3831 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3832 compiler->mode32 = 1;
3833 #else /* !SLJIT_CONFIG_X86_64 */
3834 if (!(type & SLJIT_SIMD_FLOAT)) {
3835 CHECK_EXTRA_REGS(srcdst, srcdstw, srcdst_is_ereg = 1);
3836
3837 if ((type & SLJIT_SIMD_STORE) && ((srcdst_is_ereg && elem_size < 2) || (elem_size == 0 && (type & SLJIT_SIMD_LANE_SIGNED) && FAST_IS_REG(srcdst) && reg_map[srcdst] >= 4))) {
3838 srcdst_orig = srcdst;
3839 srcdstw_orig = srcdstw;
3840 srcdst = TMP_REG1;
3841 srcdstw = 0;
3842 }
3843 }
3844 #endif /* SLJIT_CONFIG_X86_64 */
3845
3846 if (type & SLJIT_SIMD_LANE_ZERO) {
3847 if (lane_index == 0) {
3848 if (!(type & SLJIT_SIMD_FLOAT)) {
3849 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
3850 if (elem_size == 3) {
3851 compiler->mode32 = 0;
3852 elem_size = 2;
3853 }
3854 #endif /* SLJIT_CONFIG_X86_64 */
3855 if (srcdst == SLJIT_IMM) {
3856 if (elem_size == 0)
3857 srcdstw = (sljit_u8)srcdstw;
3858 else if (elem_size == 1)
3859 srcdstw = (sljit_u16)srcdstw;
3860
3861 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcdstw);
3862 srcdst = TMP_REG1;
3863 srcdstw = 0;
3864 elem_size = 2;
3865 }
3866
3867 if (elem_size == 2) {
3868 if (reg_size == 4)
3869 return emit_groupf(compiler, MOVD_x_rm | EX86_PREF_66 | EX86_SSE2_OP1, freg, srcdst, srcdstw);
3870 return emit_vex_instruction(compiler, MOVD_x_rm | VEX_AUTO_W | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, srcdst, srcdstw);
3871 }
3872 } else if (srcdst & SLJIT_MEM) {
3873 SLJIT_ASSERT(elem_size == 2 || elem_size == 3);
3874
3875 if (reg_size == 4)
3876 return emit_groupf(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, freg, srcdst, srcdstw);
3877 return emit_vex_instruction(compiler, MOVSD_x_xm | (elem_size == 2 ? EX86_PREF_F3 : EX86_PREF_F2) | EX86_SSE2, freg, 0, srcdst, srcdstw);
3878 } else if (elem_size == 3) {
3879 if (reg_size == 4)
3880 return emit_groupf(compiler, MOVQ_x_xm | EX86_PREF_F3 | EX86_SSE2, freg, srcdst, 0);
3881 return emit_vex_instruction(compiler, MOVQ_x_xm | EX86_PREF_F3 | EX86_SSE2, freg, 0, srcdst, 0);
3882 }
3883 }
3884
3885 if (reg_size == 5 && lane_index >= (1 << (4 - elem_size))) {
3886 freg = TMP_FREG;
3887 lane_index -= (1 << (4 - elem_size));
3888 } else if ((type & SLJIT_SIMD_FLOAT) && freg == srcdst) {
3889 FAIL_IF(emit_sse2_load(compiler, elem_size == 2, TMP_FREG, srcdst, srcdstw));
3890 srcdst = TMP_FREG;
3891 srcdstw = 0;
3892 }
3893
3894 size = ((!(type & SLJIT_SIMD_FLOAT) || elem_size != 2) ? EX86_PREF_66 : 0)
3895 | ((type & SLJIT_SIMD_FLOAT) ? XORPD_x_xm : PXOR_x_xm) | EX86_SSE2;
3896
3897 if (reg_size == 5)
3898 FAIL_IF(emit_vex_instruction(compiler, size | VEX_256 | VEX_SSE2_OPV, freg, freg, freg, 0));
3899 else
3900 FAIL_IF(emit_groupf(compiler, size, freg, freg, 0));
3901 } else if (reg_size == 5 && lane_index >= (1 << (4 - elem_size))) {
3902 FAIL_IF(emit_vex_instruction(compiler, ((type & SLJIT_SIMD_FLOAT) ? VEXTRACTF128_x_ym : VEXTRACTI128_x_ym) | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, 0, TMP_FREG, 0));
3903 FAIL_IF(emit_byte(compiler, 1));
3904
3905 freg = TMP_FREG;
3906 lane_index -= (1 << (4 - elem_size));
3907 }
3908
3909 if (type & SLJIT_SIMD_FLOAT) {
3910 if (elem_size == 3) {
3911 if (srcdst & SLJIT_MEM) {
3912 if (type & SLJIT_SIMD_STORE)
3913 size = lane_index == 0 ? MOVLPD_m_x : MOVHPD_m_x;
3914 else
3915 size = lane_index == 0 ? MOVLPD_x_m : MOVHPD_x_m;
3916
3917 FAIL_IF(emit_groupf(compiler, size | EX86_PREF_66 | EX86_SSE2, freg, srcdst, srcdstw));
3918
3919 /* In case of store, freg is not TMP_FREG. */
3920 } else if (type & SLJIT_SIMD_STORE) {
3921 if (lane_index == 1)
3922 return emit_groupf(compiler, MOVHLPS_x_x | EX86_SSE2, srcdst, freg, 0);
3923 return emit_sse2_load(compiler, 0, srcdst, freg, 0);
3924 } else {
3925 if (lane_index == 1)
3926 FAIL_IF(emit_groupf(compiler, MOVLHPS_x_x | EX86_SSE2, freg, srcdst, 0));
3927 else
3928 FAIL_IF(emit_sse2_store(compiler, 0, freg, 0, srcdst));
3929 }
3930 } else if (type & SLJIT_SIMD_STORE) {
3931 if (lane_index == 0)
3932 return emit_sse2_store(compiler, 1, srcdst, srcdstw, freg);
3933
3934 if (srcdst & SLJIT_MEM) {
3935 FAIL_IF(emit_groupf_ext(compiler, EXTRACTPS_x_xm | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, srcdst, srcdstw));
3936 return emit_byte(compiler, U8(lane_index));
3937 }
3938
3939 if (srcdst == freg)
3940 size = SHUFPS_x_xm | EX86_SSE2;
3941 else {
3942 if (cpu_feature_list & CPU_FEATURE_AVX) {
3943 FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | EX86_SSE2 | VEX_SSE2_OPV, srcdst, freg, freg, 0));
3944 return emit_byte(compiler, U8(lane_index));
3945 }
3946
3947 switch (lane_index) {
3948 case 1:
3949 size = MOVSHDUP_x_xm | EX86_PREF_F3 | EX86_SSE2;
3950 break;
3951 case 2:
3952 size = MOVHLPS_x_x | EX86_SSE2;
3953 break;
3954 default:
3955 SLJIT_ASSERT(lane_index == 3);
3956 size = PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2;
3957 break;
3958 }
3959 }
3960
3961 FAIL_IF(emit_groupf(compiler, size, srcdst, freg, 0));
3962
3963 size &= 0xff;
3964 if (size == SHUFPS_x_xm || size == PSHUFD_x_xm)
3965 return emit_byte(compiler, U8(lane_index));
3966
3967 return SLJIT_SUCCESS;
3968 } else {
3969 if (lane_index != 0 || (srcdst & SLJIT_MEM)) {
3970 FAIL_IF(emit_groupf_ext(compiler, INSERTPS_x_xm | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, srcdst, srcdstw));
3971 FAIL_IF(emit_byte(compiler, U8(lane_index << 4)));
3972 } else
3973 FAIL_IF(emit_sse2_store(compiler, 1, freg, 0, srcdst));
3974 }
3975
3976 if (freg != TMP_FREG || (type & SLJIT_SIMD_STORE))
3977 return SLJIT_SUCCESS;
3978
3979 SLJIT_ASSERT(reg_size == 5);
3980
3981 if (type & SLJIT_SIMD_LANE_ZERO) {
3982 FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg_orig, 0, TMP_FREG, 0));
3983 return emit_byte(compiler, 0x4e);
3984 }
3985
3986 FAIL_IF(emit_vex_instruction(compiler, VINSERTF128_y_y_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2 | VEX_SSE2_OPV, freg_orig, freg_orig, TMP_FREG, 0));
3987 return emit_byte(compiler, 1);
3988 }
3989
3990 if (srcdst == SLJIT_IMM) {
3991 EMIT_MOV(compiler, TMP_REG1, 0, SLJIT_IMM, srcdstw);
3992 srcdst = TMP_REG1;
3993 srcdstw = 0;
3994 }
3995
3996 size = 3;
3997
3998 switch (elem_size) {
3999 case 0:
4000 opcode = (type & SLJIT_SIMD_STORE) ? PEXTRB_rm_x_i8 : PINSRB_x_rm_i8;
4001 break;
4002 case 1:
4003 if (!(type & SLJIT_SIMD_STORE)) {
4004 size = 2;
4005 opcode = PINSRW_x_rm_i8;
4006 } else
4007 opcode = PEXTRW_rm_x_i8;
4008 break;
4009 case 2:
4010 opcode = (type & SLJIT_SIMD_STORE) ? PEXTRD_rm_x_i8 : PINSRD_x_rm_i8;
4011 break;
4012 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4013 case 3:
4014 /* PINSRQ / PEXTRQ */
4015 opcode = (type & SLJIT_SIMD_STORE) ? PEXTRD_rm_x_i8 : PINSRD_x_rm_i8;
4016 compiler->mode32 = 0;
4017 break;
4018 #endif /* SLJIT_CONFIG_X86_64 */
4019 }
4020
4021 inst = emit_x86_instruction(compiler, size | EX86_PREF_66 | EX86_SSE2_OP1, freg, 0, srcdst, srcdstw);
4022 FAIL_IF(!inst);
4023 inst[0] = GROUP_0F;
4024
4025 if (size == 3) {
4026 inst[1] = 0x3a;
4027 inst[2] = opcode;
4028 } else
4029 inst[1] = opcode;
4030
4031 FAIL_IF(emit_byte(compiler, U8(lane_index)));
4032
4033 if (!(type & SLJIT_SIMD_LANE_SIGNED) || (srcdst & SLJIT_MEM)) {
4034 if (freg == TMP_FREG && !(type & SLJIT_SIMD_STORE)) {
4035 SLJIT_ASSERT(reg_size == 5);
4036
4037 if (type & SLJIT_SIMD_LANE_ZERO) {
4038 FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg_orig, 0, TMP_FREG, 0));
4039 return emit_byte(compiler, 0x4e);
4040 }
4041
4042 FAIL_IF(emit_vex_instruction(compiler, VINSERTI128_y_y_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2 | VEX_SSE2_OPV, freg_orig, freg_orig, TMP_FREG, 0));
4043 return emit_byte(compiler, 1);
4044 }
4045
4046 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4047 if (srcdst_orig & SLJIT_MEM)
4048 return emit_mov(compiler, srcdst_orig, srcdstw_orig, TMP_REG1, 0);
4049 #endif /* SLJIT_CONFIG_X86_32 */
4050 return SLJIT_SUCCESS;
4051 }
4052
4053 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4054 if (elem_size >= 3)
4055 return SLJIT_SUCCESS;
4056
4057 compiler->mode32 = (type & SLJIT_32);
4058
4059 size = 2;
4060
4061 if (elem_size == 0)
4062 size |= EX86_REX;
4063
4064 if (elem_size == 2) {
4065 if (type & SLJIT_32)
4066 return SLJIT_SUCCESS;
4067
4068 SLJIT_ASSERT(!(compiler->mode32));
4069 size = 1;
4070 }
4071
4072 inst = emit_x86_instruction(compiler, size, srcdst, 0, srcdst, 0);
4073 FAIL_IF(!inst);
4074
4075 if (size != 1) {
4076 inst[0] = GROUP_0F;
4077 inst[1] = U8((elem_size == 0) ? MOVSX_r_rm8 : MOVSX_r_rm16);
4078 } else
4079 inst[0] = MOVSXD_r_rm;
4080 #else /* !SLJIT_CONFIG_X86_64 */
4081 if (elem_size >= 2)
4082 return SLJIT_SUCCESS;
4083
4084 FAIL_IF(emit_groupf(compiler, (elem_size == 0) ? MOVSX_r_rm8 : MOVSX_r_rm16,
4085 (srcdst_orig != 0 && FAST_IS_REG(srcdst_orig)) ? srcdst_orig : srcdst, srcdst, 0));
4086
4087 if (srcdst_orig & SLJIT_MEM)
4088 return emit_mov(compiler, srcdst_orig, srcdstw_orig, TMP_REG1, 0);
4089 #endif /* SLJIT_CONFIG_X86_64 */
4090 return SLJIT_SUCCESS;
4091 }
4092
4093 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
4094 sljit_s32 freg,
4095 sljit_s32 src, sljit_s32 src_lane_index)
4096 {
4097 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4098 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4099 sljit_uw pref;
4100 sljit_u8 byte;
4101 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4102 sljit_s32 opcode3 = TMP_REG1;
4103 #else /* !SLJIT_CONFIG_X86_32 */
4104 sljit_s32 opcode3 = SLJIT_S0;
4105 #endif /* SLJIT_CONFIG_X86_32 */
4106
4107 CHECK_ERROR();
4108 CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index));
4109
4110 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4111 compiler->mode32 = 1;
4112 #endif /* SLJIT_CONFIG_X86_64 */
4113 SLJIT_ASSERT(reg_map[opcode3] == 3);
4114
4115 if (reg_size == 5) {
4116 if (!(cpu_feature_list & CPU_FEATURE_AVX2))
4117 return SLJIT_ERR_UNSUPPORTED;
4118 } else if (reg_size != 4)
4119 return SLJIT_ERR_UNSUPPORTED;
4120
4121 if (type & SLJIT_SIMD_FLOAT) {
4122 pref = 0;
4123 byte = U8(src_lane_index);
4124
4125 if (elem_size == 3) {
4126 if (type & SLJIT_SIMD_TEST)
4127 return SLJIT_SUCCESS;
4128
4129 if (reg_size == 5) {
4130 if (src_lane_index == 0)
4131 return emit_vex_instruction(compiler, VBROADCASTSD_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, src, 0);
4132
4133 FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0));
4134
4135 byte = U8(byte | (byte << 2));
4136 return emit_byte(compiler, U8(byte | (byte << 4)));
4137 }
4138
4139 if (src_lane_index == 0)
4140 return emit_groupf(compiler, MOVDDUP_x_xm | EX86_PREF_F2 | EX86_SSE2, freg, src, 0);
4141
4142 /* Changes it to SHUFPD_x_xm. */
4143 pref = EX86_PREF_66;
4144 } else if (elem_size != 2)
4145 return SLJIT_ERR_UNSUPPORTED;
4146 else if (type & SLJIT_SIMD_TEST)
4147 return SLJIT_SUCCESS;
4148
4149 if (reg_size == 5) {
4150 SLJIT_ASSERT(elem_size == 2);
4151
4152 if (src_lane_index == 0)
4153 return emit_vex_instruction(compiler, VBROADCASTSS_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, src, 0);
4154
4155 FAIL_IF(emit_vex_instruction(compiler, VPERMPD_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0));
4156
4157 byte = 0x44;
4158 if (src_lane_index >= 4) {
4159 byte = 0xee;
4160 src_lane_index -= 4;
4161 }
4162
4163 FAIL_IF(emit_byte(compiler, byte));
4164 FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | VEX_256 | pref | EX86_SSE2 | VEX_SSE2_OPV, freg, freg, freg, 0));
4165 byte = U8(src_lane_index);
4166 } else if (freg != src && (cpu_feature_list & CPU_FEATURE_AVX)) {
4167 FAIL_IF(emit_vex_instruction(compiler, SHUFPS_x_xm | pref | EX86_SSE2 | VEX_SSE2_OPV, freg, src, src, 0));
4168 } else {
4169 if (freg != src)
4170 FAIL_IF(emit_groupf(compiler, MOVAPS_x_xm | pref | EX86_SSE2, freg, src, 0));
4171
4172 FAIL_IF(emit_groupf(compiler, SHUFPS_x_xm | pref | EX86_SSE2, freg, freg, 0));
4173 }
4174
4175 if (elem_size == 2) {
4176 byte = U8(byte | (byte << 2));
4177 byte = U8(byte | (byte << 4));
4178 } else
4179 byte = U8(byte | (byte << 1));
4180
4181 return emit_byte(compiler, U8(byte));
4182 }
4183
4184 if (type & SLJIT_SIMD_TEST)
4185 return SLJIT_SUCCESS;
4186
4187 if (elem_size == 0) {
4188 if (reg_size == 5 && src_lane_index >= 16) {
4189 FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0));
4190 FAIL_IF(emit_byte(compiler, src_lane_index >= 24 ? 0xff : 0xaa));
4191 src_lane_index &= 0x7;
4192 src = freg;
4193 }
4194
4195 if ((freg != src && !(cpu_feature_list & CPU_FEATURE_AVX2)) || src_lane_index != 0) {
4196 pref = 0;
4197
4198 if ((src_lane_index & 0x3) == 0) {
4199 pref = EX86_PREF_66;
4200 byte = U8(src_lane_index >> 2);
4201 } else if (src_lane_index < 8 && (src_lane_index & 0x1) == 0) {
4202 pref = EX86_PREF_F2;
4203 byte = U8(src_lane_index >> 1);
4204 } else {
4205 if (freg == src || !(cpu_feature_list & CPU_FEATURE_AVX2)) {
4206 if (freg != src)
4207 FAIL_IF(emit_groupf(compiler, MOVDQA_x_xm | EX86_PREF_66 | EX86_SSE2, freg, src, 0));
4208
4209 FAIL_IF(emit_groupf(compiler, PSRLDQ_x | EX86_PREF_66 | EX86_SSE2_OP2, opcode3, freg, 0));
4210 } else
4211 FAIL_IF(emit_vex_instruction(compiler, PSRLDQ_x | EX86_PREF_66 | EX86_SSE2_OP2 | VEX_SSE2_OPV, opcode3, freg, src, 0));
4212
4213 FAIL_IF(emit_byte(compiler, U8(src_lane_index)));
4214 }
4215
4216 if (pref != 0) {
4217 FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, freg, src, 0));
4218 FAIL_IF(emit_byte(compiler, byte));
4219 }
4220
4221 src = freg;
4222 }
4223
4224 if (cpu_feature_list & CPU_FEATURE_AVX2)
4225 return emit_vex_instruction(compiler, VPBROADCASTB_x_xm | (reg_size == 5 ? VEX_256 : 0) | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, src, 0);
4226
4227 SLJIT_ASSERT(reg_size == 4);
4228 FAIL_IF(emit_groupf(compiler, PXOR_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, TMP_FREG, 0));
4229 return emit_groupf_ext(compiler, PSHUFB_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, TMP_FREG, 0);
4230 }
4231
4232 if ((cpu_feature_list & CPU_FEATURE_AVX2) && src_lane_index == 0 && elem_size <= 3) {
4233 switch (elem_size) {
4234 case 1:
4235 pref = VPBROADCASTW_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
4236 break;
4237 case 2:
4238 pref = VPBROADCASTD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
4239 break;
4240 default:
4241 pref = VPBROADCASTQ_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2;
4242 break;
4243 }
4244
4245 if (reg_size == 5)
4246 pref |= VEX_256;
4247
4248 return emit_vex_instruction(compiler, pref, freg, 0, src, 0);
4249 }
4250
4251 if (reg_size == 5) {
4252 switch (elem_size) {
4253 case 1:
4254 byte = U8(src_lane_index & 0x3);
4255 src_lane_index >>= 2;
4256 pref = PSHUFLW_x_xm | VEX_256 | ((src_lane_index & 1) == 0 ? EX86_PREF_F2 : EX86_PREF_F3) | EX86_SSE2;
4257 break;
4258 case 2:
4259 byte = U8(src_lane_index & 0x3);
4260 src_lane_index >>= 1;
4261 pref = PSHUFD_x_xm | VEX_256 | EX86_PREF_66 | EX86_SSE2;
4262 break;
4263 case 3:
4264 pref = 0;
4265 break;
4266 default:
4267 FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0));
4268 return emit_byte(compiler, U8(src_lane_index == 0 ? 0x44 : 0xee));
4269 }
4270
4271 if (pref != 0) {
4272 FAIL_IF(emit_vex_instruction(compiler, pref, freg, 0, src, 0));
4273 byte = U8(byte | (byte << 2));
4274 FAIL_IF(emit_byte(compiler, U8(byte | (byte << 4))));
4275
4276 if (src_lane_index == 0)
4277 return emit_vex_instruction(compiler, VPBROADCASTQ_x_xm | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, freg, 0);
4278
4279 src = freg;
4280 }
4281
4282 FAIL_IF(emit_vex_instruction(compiler, VPERMQ_y_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | VEX_W | EX86_SSE2, freg, 0, src, 0));
4283 byte = U8(src_lane_index);
4284 byte = U8(byte | (byte << 2));
4285 return emit_byte(compiler, U8(byte | (byte << 4)));
4286 }
4287
4288 switch (elem_size) {
4289 case 1:
4290 byte = U8(src_lane_index & 0x3);
4291 src_lane_index >>= 1;
4292 pref = (src_lane_index & 2) == 0 ? EX86_PREF_F2 : EX86_PREF_F3;
4293
4294 FAIL_IF(emit_groupf(compiler, PSHUFLW_x_xm | pref | EX86_SSE2, freg, src, 0));
4295 byte = U8(byte | (byte << 2));
4296 FAIL_IF(emit_byte(compiler, U8(byte | (byte << 4))));
4297
4298 if ((cpu_feature_list & CPU_FEATURE_AVX2) && pref == EX86_PREF_F2)
4299 return emit_vex_instruction(compiler, VPBROADCASTD_x_xm | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, freg, 0);
4300
4301 src = freg;
4302 /* fallthrough */
4303 case 2:
4304 byte = U8(src_lane_index);
4305 byte = U8(byte | (byte << 2));
4306 break;
4307 default:
4308 byte = U8(src_lane_index << 1);
4309 byte = U8(byte | (byte << 2) | 0x4);
4310 break;
4311 }
4312
4313 FAIL_IF(emit_groupf(compiler, PSHUFD_x_xm | EX86_PREF_66 | EX86_SSE2, freg, src, 0));
4314 return emit_byte(compiler, U8(byte | (byte << 4)));
4315 }
4316
4317 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
4318 sljit_s32 freg,
4319 sljit_s32 src, sljit_sw srcw)
4320 {
4321 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4322 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4323 sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
4324 sljit_u8 opcode;
4325
4326 CHECK_ERROR();
4327 CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw));
4328
4329 ADJUST_LOCAL_OFFSET(src, srcw);
4330
4331 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4332 compiler->mode32 = 1;
4333 #endif /* SLJIT_CONFIG_X86_64 */
4334
4335 if (reg_size == 5) {
4336 if (!(cpu_feature_list & CPU_FEATURE_AVX2))
4337 return SLJIT_ERR_UNSUPPORTED;
4338 } else if (reg_size != 4)
4339 return SLJIT_ERR_UNSUPPORTED;
4340
4341 if (type & SLJIT_SIMD_FLOAT) {
4342 if (elem_size != 2 || elem2_size != 3)
4343 return SLJIT_ERR_UNSUPPORTED;
4344
4345 if (type & SLJIT_SIMD_TEST)
4346 return SLJIT_SUCCESS;
4347
4348 if (reg_size == 4)
4349 return emit_groupf(compiler, CVTPS2PD_x_xm | EX86_SSE2, freg, src, srcw);
4350 return emit_vex_instruction(compiler, CVTPS2PD_x_xm | VEX_256 | EX86_SSE2, freg, 0, src, srcw);
4351 }
4352
4353 switch (elem_size) {
4354 case 0:
4355 if (elem2_size == 1)
4356 opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXBW_x_xm : PMOVZXBW_x_xm;
4357 else if (elem2_size == 2)
4358 opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXBD_x_xm : PMOVZXBD_x_xm;
4359 else if (elem2_size == 3)
4360 opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXBQ_x_xm : PMOVZXBQ_x_xm;
4361 else
4362 return SLJIT_ERR_UNSUPPORTED;
4363 break;
4364 case 1:
4365 if (elem2_size == 2)
4366 opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXWD_x_xm : PMOVZXWD_x_xm;
4367 else if (elem2_size == 3)
4368 opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXWQ_x_xm : PMOVZXWQ_x_xm;
4369 else
4370 return SLJIT_ERR_UNSUPPORTED;
4371 break;
4372 case 2:
4373 if (elem2_size == 3)
4374 opcode = (type & SLJIT_SIMD_EXTEND_SIGNED) ? PMOVSXDQ_x_xm : PMOVZXDQ_x_xm;
4375 else
4376 return SLJIT_ERR_UNSUPPORTED;
4377 break;
4378 default:
4379 return SLJIT_ERR_UNSUPPORTED;
4380 }
4381
4382 if (type & SLJIT_SIMD_TEST)
4383 return SLJIT_SUCCESS;
4384
4385 if (reg_size == 4)
4386 return emit_groupf_ext(compiler, opcode | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, src, srcw);
4387 return emit_vex_instruction(compiler, opcode | VEX_256 | EX86_PREF_66 | VEX_OP_0F38 | EX86_SSE2, freg, 0, src, srcw);
4388 }
4389
4390 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
4391 sljit_s32 freg,
4392 sljit_s32 dst, sljit_sw dstw)
4393 {
4394 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4395 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4396 sljit_s32 dst_r;
4397 sljit_uw pref;
4398 sljit_u8 *inst;
4399
4400 CHECK_ERROR();
4401 CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw));
4402
4403 ADJUST_LOCAL_OFFSET(dst, dstw);
4404
4405 CHECK_EXTRA_REGS(dst, dstw, (void)0);
4406 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4407 compiler->mode32 = 1;
4408 #endif /* SLJIT_CONFIG_X86_64 */
4409
4410 if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2))
4411 return SLJIT_ERR_UNSUPPORTED;
4412
4413 if (reg_size == 4) {
4414 if (type & SLJIT_SIMD_TEST)
4415 return SLJIT_SUCCESS;
4416
4417 pref = EX86_PREF_66 | EX86_SSE2_OP2;
4418
4419 switch (elem_size) {
4420 case 1:
4421 FAIL_IF(emit_groupf(compiler, PACKSSWB_x_xm | EX86_PREF_66 | EX86_SSE2, TMP_FREG, freg, 0));
4422 freg = TMP_FREG;
4423 break;
4424 case 2:
4425 pref = EX86_SSE2_OP2;
4426 break;
4427 }
4428
4429 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
4430 FAIL_IF(emit_groupf(compiler, (elem_size < 2 ? PMOVMSKB_r_x : MOVMSKPS_r_x) | pref, dst_r, freg, 0));
4431
4432 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4433 compiler->mode32 = type & SLJIT_32;
4434 #endif /* SLJIT_CONFIG_X86_64 */
4435
4436 if (elem_size == 1) {
4437 inst = emit_x86_instruction(compiler, 1 | EX86_SHIFT_INS, SLJIT_IMM, 8, dst_r, 0);
4438 FAIL_IF(!inst);
4439 inst[1] |= SHR;
4440 }
4441
4442 if (dst_r == TMP_REG1)
4443 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
4444
4445 return SLJIT_SUCCESS;
4446 }
4447
4448 if (reg_size != 5 || !(cpu_feature_list & CPU_FEATURE_AVX2))
4449 return SLJIT_ERR_UNSUPPORTED;
4450
4451 if (type & SLJIT_SIMD_TEST)
4452 return SLJIT_SUCCESS;
4453
4454 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
4455
4456 if (elem_size == 1) {
4457 FAIL_IF(emit_vex_instruction(compiler, VEXTRACTI128_x_ym | VEX_256 | EX86_PREF_66 | VEX_OP_0F3A | EX86_SSE2, freg, 0, TMP_FREG, 0));
4458 FAIL_IF(emit_byte(compiler, 1));
4459 FAIL_IF(emit_vex_instruction(compiler, PACKSSWB_x_xm | VEX_256 | EX86_PREF_66 | EX86_SSE2 | VEX_SSE2_OPV, TMP_FREG, freg, TMP_FREG, 0));
4460 FAIL_IF(emit_groupf(compiler, PMOVMSKB_r_x | EX86_PREF_66 | EX86_SSE2_OP2, dst_r, TMP_FREG, 0));
4461 } else {
4462 pref = MOVMSKPS_r_x | VEX_256 | EX86_SSE2_OP2;
4463
4464 if (elem_size == 0)
4465 pref = PMOVMSKB_r_x | VEX_256 | EX86_PREF_66 | EX86_SSE2_OP2;
4466 else if (elem_size == 3)
4467 pref |= EX86_PREF_66;
4468
4469 FAIL_IF(emit_vex_instruction(compiler, pref, dst_r, 0, freg, 0));
4470 }
4471
4472 if (dst_r == TMP_REG1) {
4473 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4474 compiler->mode32 = type & SLJIT_32;
4475 #endif /* SLJIT_CONFIG_X86_64 */
4476 return emit_mov(compiler, dst, dstw, TMP_REG1, 0);
4477 }
4478
4479 return SLJIT_SUCCESS;
4480 }
4481
4482 static sljit_s32 emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
4483 sljit_s32 dst_freg, sljit_s32 src_freg)
4484 {
4485 sljit_uw op = ((type & SLJIT_SIMD_FLOAT) ? MOVAPS_x_xm : MOVDQA_x_xm) | EX86_SSE2;
4486
4487 SLJIT_ASSERT(SLJIT_SIMD_GET_REG_SIZE(type) == 4);
4488
4489 if (!(type & SLJIT_SIMD_FLOAT) || SLJIT_SIMD_GET_ELEM_SIZE(type) == 3)
4490 op |= EX86_PREF_66;
4491
4492 return emit_groupf(compiler, op, dst_freg, src_freg, 0);
4493 }
4494
4495 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
4496 sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg)
4497 {
4498 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4499 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4500 sljit_s32 needs_move = 0;
4501 sljit_uw op = 0;
4502
4503 CHECK_ERROR();
4504 CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg));
4505
4506 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4507 compiler->mode32 = 1;
4508 #endif /* SLJIT_CONFIG_X86_64 */
4509
4510 if (reg_size == 5) {
4511 if (!(cpu_feature_list & CPU_FEATURE_AVX2))
4512 return SLJIT_ERR_UNSUPPORTED;
4513 } else if (reg_size != 4)
4514 return SLJIT_ERR_UNSUPPORTED;
4515
4516 if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4517 return SLJIT_ERR_UNSUPPORTED;
4518
4519 switch (SLJIT_SIMD_GET_OPCODE(type)) {
4520 case SLJIT_SIMD_OP2_AND:
4521 op = (type & SLJIT_SIMD_FLOAT) ? ANDPD_x_xm : PAND_x_xm;
4522
4523 if (!(type & SLJIT_SIMD_FLOAT) || elem_size == 3)
4524 op |= EX86_PREF_66;
4525 break;
4526 case SLJIT_SIMD_OP2_OR:
4527 op = (type & SLJIT_SIMD_FLOAT) ? ORPD_x_xm : POR_x_xm;
4528
4529 if (!(type & SLJIT_SIMD_FLOAT) || elem_size == 3)
4530 op |= EX86_PREF_66;
4531 break;
4532 case SLJIT_SIMD_OP2_XOR:
4533 op = (type & SLJIT_SIMD_FLOAT) ? XORPD_x_xm : PXOR_x_xm;
4534
4535 if (!(type & SLJIT_SIMD_FLOAT) || elem_size == 3)
4536 op |= EX86_PREF_66;
4537 break;
4538 }
4539
4540 if (type & SLJIT_SIMD_TEST)
4541 return SLJIT_SUCCESS;
4542
4543 needs_move = dst_freg != src1_freg && dst_freg != src2_freg;
4544
4545 if (reg_size == 5 || (needs_move && (cpu_feature_list & CPU_FEATURE_AVX2))) {
4546 if (reg_size == 5)
4547 op |= VEX_256;
4548
4549 return emit_vex_instruction(compiler, op | EX86_SSE2 | VEX_SSE2_OPV, dst_freg, src1_freg, src2_freg, 0);
4550 }
4551
4552 if (needs_move) {
4553 FAIL_IF(emit_simd_mov(compiler, type, dst_freg, src1_freg));
4554 } else if (dst_freg != src1_freg) {
4555 SLJIT_ASSERT(dst_freg == src2_freg);
4556 src2_freg = src1_freg;
4557 }
4558
4559 FAIL_IF(emit_groupf(compiler, op | EX86_SSE2, dst_freg, src2_freg, 0));
4560 return SLJIT_SUCCESS;
4561 }
4562
4563 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
4564 sljit_s32 dst_reg,
4565 sljit_s32 mem_reg)
4566 {
4567 CHECK_ERROR();
4568 CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
4569
4570 SLJIT_SKIP_CHECKS(compiler);
4571 return sljit_emit_op1(compiler, op, dst_reg, 0, SLJIT_MEM1(mem_reg), 0);
4572 }
4573
4574 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op,
4575 sljit_s32 src_reg,
4576 sljit_s32 mem_reg,
4577 sljit_s32 temp_reg)
4578 {
4579 sljit_uw pref;
4580 sljit_s32 free_reg = TMP_REG1;
4581 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4582 sljit_sw srcw = 0;
4583 sljit_sw tempw = 0;
4584 #endif /* SLJIT_CONFIG_X86_32 */
4585
4586 CHECK_ERROR();
4587 CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
4588 CHECK_EXTRA_REGS(src_reg, srcw, (void)0);
4589 CHECK_EXTRA_REGS(temp_reg, tempw, (void)0);
4590
4591 SLJIT_ASSERT(FAST_IS_REG(src_reg) || src_reg == SLJIT_MEM1(SLJIT_SP));
4592 SLJIT_ASSERT(FAST_IS_REG(temp_reg) || temp_reg == SLJIT_MEM1(SLJIT_SP));
4593
4594 op = GET_OPCODE(op);
4595 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4596 if ((src_reg & SLJIT_MEM) || (op == SLJIT_MOV_U8 && reg_map[src_reg] >= 4)) {
4597 /* Src is virtual register or its low byte is not accessible. */
4598 SLJIT_ASSERT(src_reg != SLJIT_R1);
4599 free_reg = src_reg;
4600
4601 EMIT_MOV(compiler, TMP_REG1, 0, src_reg, srcw);
4602 src_reg = TMP_REG1;
4603
4604 if (mem_reg == src_reg)
4605 mem_reg = TMP_REG1;
4606 }
4607 #endif /* SLJIT_CONFIG_X86_32 */
4608
4609 if (temp_reg != SLJIT_R0) {
4610 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4611 compiler->mode32 = 0;
4612
4613 EMIT_MOV(compiler, free_reg, 0, SLJIT_R0, 0);
4614 EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, 0);
4615
4616 if (src_reg == SLJIT_R0)
4617 src_reg = free_reg;
4618 if (mem_reg == SLJIT_R0)
4619 mem_reg = free_reg;
4620 #else /* !SLJIT_CONFIG_X86_64 */
4621 if (src_reg == TMP_REG1 && mem_reg == SLJIT_R0 && (free_reg & SLJIT_MEM)) {
4622 EMIT_MOV(compiler, SLJIT_MEM1(SLJIT_SP), 0, SLJIT_R1, 0);
4623 EMIT_MOV(compiler, SLJIT_R1, 0, SLJIT_R0, 0);
4624 EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, tempw);
4625
4626 mem_reg = SLJIT_R1;
4627 free_reg = SLJIT_R1;
4628 } else {
4629 EMIT_MOV(compiler, free_reg, 0, SLJIT_R0, 0);
4630 EMIT_MOV(compiler, SLJIT_R0, 0, temp_reg, tempw);
4631
4632 if (src_reg == SLJIT_R0)
4633 src_reg = free_reg;
4634 if (mem_reg == SLJIT_R0)
4635 mem_reg = free_reg;
4636 }
4637 #endif /* SLJIT_CONFIG_X86_64 */
4638 }
4639
4640 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4641 compiler->mode32 = op != SLJIT_MOV && op != SLJIT_MOV_P;
4642 #endif /* SLJIT_CONFIG_X86_64 */
4643
4644 /* Lock prefix. */
4645 FAIL_IF(emit_byte(compiler, GROUP_LOCK));
4646
4647 pref = 0;
4648 if (op == SLJIT_MOV_U16)
4649 pref = EX86_HALF_ARG | EX86_PREF_66;
4650 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4651 if (op == SLJIT_MOV_U8)
4652 pref = EX86_REX;
4653 #endif /* SLJIT_CONFIG_X86_64 */
4654
4655 FAIL_IF(emit_groupf(compiler, (op == SLJIT_MOV_U8 ? CMPXCHG_rm8_r : CMPXCHG_rm_r) | pref, src_reg, SLJIT_MEM1(mem_reg), 0));
4656
4657 if (temp_reg != SLJIT_R0) {
4658 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4659 compiler->mode32 = 0;
4660 return emit_mov(compiler, SLJIT_R0, 0, TMP_REG1, 0);
4661 #else /* !SLJIT_CONFIG_X86_64 */
4662 EMIT_MOV(compiler, SLJIT_R0, 0, free_reg, 0);
4663 if (free_reg != TMP_REG1)
4664 return emit_mov(compiler, free_reg, 0, (free_reg == SLJIT_R1) ? SLJIT_MEM1(SLJIT_SP) : TMP_REG1, 0);
4665 #endif /* SLJIT_CONFIG_X86_64 */
4666 }
4667 return SLJIT_SUCCESS;
4668 }
4669
4670 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
4671 {
4672 CHECK_ERROR();
4673 CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
4674 ADJUST_LOCAL_OFFSET(dst, dstw);
4675
4676 CHECK_EXTRA_REGS(dst, dstw, (void)0);
4677
4678 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4679 compiler->mode32 = 0;
4680 #endif
4681
4682 ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
4683
4684 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4685 if (NOT_HALFWORD(offset)) {
4686 FAIL_IF(emit_load_imm64(compiler, TMP_REG1, offset));
4687 #if (defined SLJIT_DEBUG && SLJIT_DEBUG)
4688 SLJIT_ASSERT(emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0) != SLJIT_ERR_UNSUPPORTED);
4689 return compiler->error;
4690 #else
4691 return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, TMP_REG1, 0);
4692 #endif
4693 }
4694 #endif
4695
4696 if (offset != 0)
4697 return emit_lea_binary(compiler, dst, dstw, SLJIT_SP, 0, SLJIT_IMM, offset);
4698 return emit_mov(compiler, dst, dstw, SLJIT_SP, 0);
4699 }
4700
4701 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
4702 {
4703 sljit_u8 *inst;
4704 struct sljit_const *const_;
4705 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4706 sljit_s32 reg;
4707 #endif
4708
4709 CHECK_ERROR_PTR();
4710 CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
4711 ADJUST_LOCAL_OFFSET(dst, dstw);
4712
4713 CHECK_EXTRA_REGS(dst, dstw, (void)0);
4714
4715 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
4716 PTR_FAIL_IF(!const_);
4717 set_const(const_, compiler);
4718
4719 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4720 compiler->mode32 = 0;
4721 reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
4722
4723 if (emit_load_imm64(compiler, reg, init_value))
4724 return NULL;
4725 #else
4726 if (emit_mov(compiler, dst, dstw, SLJIT_IMM, init_value))
4727 return NULL;
4728 #endif
4729
4730 inst = (sljit_u8*)ensure_buf(compiler, 2);
4731 PTR_FAIL_IF(!inst);
4732
4733 inst[0] = 0;
4734 inst[1] = 2;
4735
4736 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4737 if (dst & SLJIT_MEM)
4738 if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
4739 return NULL;
4740 #endif
4741
4742 return const_;
4743 }
4744
4745 SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
4746 {
4747 struct sljit_put_label *put_label;
4748 sljit_u8 *inst;
4749 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4750 sljit_s32 reg;
4751 sljit_uw start_size;
4752 #endif
4753
4754 CHECK_ERROR_PTR();
4755 CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
4756 ADJUST_LOCAL_OFFSET(dst, dstw);
4757
4758 CHECK_EXTRA_REGS(dst, dstw, (void)0);
4759
4760 put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
4761 PTR_FAIL_IF(!put_label);
4762 set_put_label(put_label, compiler, 0);
4763
4764 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4765 compiler->mode32 = 0;
4766 reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
4767
4768 if (emit_load_imm64(compiler, reg, 0))
4769 return NULL;
4770 #else
4771 if (emit_mov(compiler, dst, dstw, SLJIT_IMM, 0))
4772 return NULL;
4773 #endif
4774
4775 #if (defined SLJIT_CONFIG_X86_64 && SLJIT_CONFIG_X86_64)
4776 if (dst & SLJIT_MEM) {
4777 start_size = compiler->size;
4778 if (emit_mov(compiler, dst, dstw, TMP_REG1, 0))
4779 return NULL;
4780 put_label->flags = compiler->size - start_size;
4781 }
4782 #endif
4783
4784 inst = (sljit_u8*)ensure_buf(compiler, 2);
4785 PTR_FAIL_IF(!inst);
4786
4787 inst[0] = 0;
4788 inst[1] = 3;
4789
4790 return put_label;
4791 }
4792
4793 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
4794 {
4795 SLJIT_UNUSED_ARG(executable_offset);
4796
4797 SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_uw)), 0);
4798 #if (defined SLJIT_CONFIG_X86_32 && SLJIT_CONFIG_X86_32)
4799 sljit_unaligned_store_sw((void*)addr, (sljit_sw)(new_target - (addr + 4) - (sljit_uw)executable_offset));
4800 #else
4801 sljit_unaligned_store_sw((void*)addr, (sljit_sw)new_target);
4802 #endif
4803 SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_uw)), 1);
4804 }
4805
4806 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
4807 {
4808 SLJIT_UNUSED_ARG(executable_offset);
4809
4810 SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_sw)), 0);
4811 sljit_unaligned_store_sw((void*)addr, new_constant);
4812 SLJIT_UPDATE_WX_FLAGS((void*)addr, (void*)(addr + sizeof(sljit_sw)), 1);
4813 }
4814