1 /*
2 * Stack-less Just-In-Time compiler
3 *
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
sljit_get_platform_name(void)27 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
28 {
29 return "LOONGARCH" SLJIT_CPUINFO;
30 }
31
32 typedef sljit_u32 sljit_ins;
33
34 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
35 #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
36 #define TMP_REG3 (SLJIT_NUMBER_OF_REGISTERS + 4)
37 #define TMP_ZERO 0
38
39 /* Flags are kept in volatile registers. */
40 #define EQUAL_FLAG (SLJIT_NUMBER_OF_REGISTERS + 5)
41 #define RETURN_ADDR_REG TMP_REG2
42 #define OTHER_FLAG (SLJIT_NUMBER_OF_REGISTERS + 6)
43
44 #define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
45 #define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
46
47 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = {
48 0, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19, 20, 22, 31, 30, 29, 28, 27, 26, 25, 24, 23, 3, 13, 1, 14, 12, 15
49 };
50
51 static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
52 0, 0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 31, 30, 29, 28, 27, 26, 25, 24, 8, 9
53 };
54
55 /* --------------------------------------------------------------------- */
56 /* Instrucion forms */
57 /* --------------------------------------------------------------------- */
58
59 /*
60 LoongArch instructions are 32 bits wide, belonging to 9 basic instruction formats (and variants of them):
61
62 | Format name | Composition |
63 | 2R | Opcode + Rj + Rd |
64 | 3R | Opcode + Rk + Rj + Rd |
65 | 4R | Opcode + Ra + Rk + Rj + Rd |
66 | 2RI8 | Opcode + I8 + Rj + Rd |
67 | 2RI12 | Opcode + I12 + Rj + Rd |
68 | 2RI14 | Opcode + I14 + Rj + Rd |
69 | 2RI16 | Opcode + I16 + Rj + Rd |
70 | 1RI21 | Opcode + I21L + Rj + I21H |
71 | I26 | Opcode + I26L + I26H |
72
73 Rd is the destination register operand, while Rj, Rk and Ra (“a” stands for “additional”) are the source register operands.
74 I8/I12/I14/I16/I21/I26 are immediate operands of respective width. The longer I21 and I26 are stored in separate higher and
75 lower parts in the instruction word, denoted by the “L” and “H” suffixes. */
76
77 #define RD(rd) ((sljit_ins)reg_map[rd])
78 #define RJ(rj) ((sljit_ins)reg_map[rj] << 5)
79 #define RK(rk) ((sljit_ins)reg_map[rk] << 10)
80 #define RA(ra) ((sljit_ins)reg_map[ra] << 15)
81
82 #define FD(fd) ((sljit_ins)reg_map[fd])
83 #define FRD(fd) ((sljit_ins)freg_map[fd])
84 #define FRJ(fj) ((sljit_ins)freg_map[fj] << 5)
85 #define FRK(fk) ((sljit_ins)freg_map[fk] << 10)
86 #define FRA(fa) ((sljit_ins)freg_map[fa] << 15)
87
88 #define IMM_V(imm) ((sljit_ins)(imm) << 10)
89 #define IMM_I8(imm) (((sljit_ins)(imm)&0xff) << 10)
90 #define IMM_I12(imm) (((sljit_ins)(imm)&0xfff) << 10)
91 #define IMM_I14(imm) (((sljit_ins)(imm)&0xfff3) << 10)
92 #define IMM_I16(imm) (((sljit_ins)(imm)&0xffff) << 10)
93 #define IMM_I20(imm) (((sljit_ins)(imm)&0xffffffff) >> 12 << 5)
94 #define IMM_I21(imm) ((((sljit_ins)(imm)&0xffff) << 10) | (((sljit_ins)(imm) >> 16) & 0x1f))
95 #define IMM_I26(imm) ((((sljit_ins)(imm)&0xffff) << 10) | (((sljit_ins)(imm) >> 16) & 0x3ff))
96
97 #define OPC_I26(opc) ((sljit_ins)(opc) << 26)
98 #define OPC_1RI21(opc) ((sljit_ins)(opc) << 26)
99 #define OPC_2RI16(opc) ((sljit_ins)(opc) << 26)
100 #define OPC_2RI14(opc) ((sljit_ins)(opc) << 24)
101 #define OPC_2RI12(opc) ((sljit_ins)(opc) << 22)
102 #define OPC_2RI8(opc) ((sljit_ins)(opc) << 18)
103 #define OPC_4R(opc) ((sljit_ins)(opc) << 20)
104 #define OPC_3R(opc) ((sljit_ins)(opc) << 15)
105 #define OPC_2R(opc) ((sljit_ins)(opc) << 10)
106 #define OPC_1RI20(opc) ((sljit_ins)(opc) << 25)
107
108 /* Arithmetic operation instructions */
109 #define ADD_W OPC_3R(0x20)
110 #define ADD_D OPC_3R(0x21)
111 #define SUB_W OPC_3R(0x22)
112 #define SUB_D OPC_3R(0x23)
113 #define ADDI_W OPC_2RI12(0xa)
114 #define ADDI_D OPC_2RI12(0xb)
115 #define ANDI OPC_2RI12(0xd)
116 #define ORI OPC_2RI12(0xe)
117 #define XORI OPC_2RI12(0xf)
118 #define ADDU16I_D OPC_2RI16(0x4)
119 #define LU12I_W OPC_1RI20(0xa)
120 #define LU32I_D OPC_1RI20(0xb)
121 #define LU52I_D OPC_2RI12(0xc)
122 #define SLT OPC_3R(0x24)
123 #define SLTU OPC_3R(0x25)
124 #define SLTI OPC_2RI12(0x8)
125 #define SLTUI OPC_2RI12(0x9)
126 #define PCADDI OPC_1RI20(0xc)
127 #define PCALAU12I OPC_1RI20(0xd)
128 #define PCADDU12I OPC_1RI20(0xe)
129 #define PCADDU18I OPC_1RI20(0xf)
130 #define NOR OPC_3R(0x28)
131 #define AND OPC_3R(0x29)
132 #define OR OPC_3R(0x2a)
133 #define XOR OPC_3R(0x2b)
134 #define ORN OPC_3R(0x2c)
135 #define ANDN OPC_3R(0x2d)
136 #define MUL_W OPC_3R(0x38)
137 #define MULH_W OPC_3R(0x39)
138 #define MULH_WU OPC_3R(0x3a)
139 #define MUL_D OPC_3R(0x3b)
140 #define MULH_D OPC_3R(0x3c)
141 #define MULH_DU OPC_3R(0x3d)
142 #define MULW_D_W OPC_3R(0x3e)
143 #define MULW_D_WU OPC_3R(0x3f)
144 #define DIV_W OPC_3R(0x40)
145 #define MOD_W OPC_3R(0x41)
146 #define DIV_WU OPC_3R(0x42)
147 #define MOD_WU OPC_3R(0x43)
148 #define DIV_D OPC_3R(0x44)
149 #define MOD_D OPC_3R(0x45)
150 #define DIV_DU OPC_3R(0x46)
151 #define MOD_DU OPC_3R(0x47)
152
153 /* Bit-shift instructions */
154 #define SLL_W OPC_3R(0x2e)
155 #define SRL_W OPC_3R(0x2f)
156 #define SRA_W OPC_3R(0x30)
157 #define SLL_D OPC_3R(0x31)
158 #define SRL_D OPC_3R(0x32)
159 #define SRA_D OPC_3R(0x33)
160 #define ROTR_W OPC_3R(0x36)
161 #define ROTR_D OPC_3R(0x37)
162 #define SLLI_W OPC_3R(0x81)
163 #define SLLI_D ((sljit_ins)(0x41) << 16)
164 #define SRLI_W OPC_3R(0x89)
165 #define SRLI_D ((sljit_ins)(0x45) << 16)
166 #define SRAI_W OPC_3R(0x91)
167 #define SRAI_D ((sljit_ins)(0x49) << 16)
168 #define ROTRI_W OPC_3R(0x99)
169 #define ROTRI_D ((sljit_ins)(0x4d) << 16)
170
171 /* Bit-manipulation instructions */
172 #define CLO_W OPC_2R(0x4)
173 #define CLZ_W OPC_2R(0x5)
174 #define CTO_W OPC_2R(0x6)
175 #define CTZ_W OPC_2R(0x7)
176 #define CLO_D OPC_2R(0x8)
177 #define CLZ_D OPC_2R(0x9)
178 #define CTO_D OPC_2R(0xa)
179 #define CTZ_D OPC_2R(0xb)
180 #define REVB_2H OPC_2R(0xc)
181 #define REVB_4H OPC_2R(0xd)
182 #define REVB_2W OPC_2R(0xe)
183 #define REVB_D OPC_2R(0xf)
184 #define REVH_2W OPC_2R(0x10)
185 #define REVH_D OPC_2R(0x11)
186 #define BITREV_4B OPC_2R(0x12)
187 #define BITREV_8B OPC_2R(0x13)
188 #define BITREV_W OPC_2R(0x14)
189 #define BITREV_D OPC_2R(0x15)
190 #define EXT_W_H OPC_2R(0x16)
191 #define EXT_W_B OPC_2R(0x17)
192 #define BSTRINS_W (0x1 << 22 | 1 << 21)
193 #define BSTRPICK_W (0x1 << 22 | 1 << 21 | 1 << 15)
194 #define BSTRINS_D (0x2 << 22)
195 #define BSTRPICK_D (0x3 << 22)
196
197 /* Branch instructions */
198 #define BEQZ OPC_1RI21(0x10)
199 #define BNEZ OPC_1RI21(0x11)
200 #define JIRL OPC_2RI16(0x13)
201 #define B OPC_I26(0x14)
202 #define BL OPC_I26(0x15)
203 #define BEQ OPC_2RI16(0x16)
204 #define BNE OPC_2RI16(0x17)
205 #define BLT OPC_2RI16(0x18)
206 #define BGE OPC_2RI16(0x19)
207 #define BLTU OPC_2RI16(0x1a)
208 #define BGEU OPC_2RI16(0x1b)
209
210 /* Memory access instructions */
211 #define LD_B OPC_2RI12(0xa0)
212 #define LD_H OPC_2RI12(0xa1)
213 #define LD_W OPC_2RI12(0xa2)
214 #define LD_D OPC_2RI12(0xa3)
215
216 #define ST_B OPC_2RI12(0xa4)
217 #define ST_H OPC_2RI12(0xa5)
218 #define ST_W OPC_2RI12(0xa6)
219 #define ST_D OPC_2RI12(0xa7)
220
221 #define LD_BU OPC_2RI12(0xa8)
222 #define LD_HU OPC_2RI12(0xa9)
223 #define LD_WU OPC_2RI12(0xaa)
224
225 #define LDX_B OPC_3R(0x7000)
226 #define LDX_H OPC_3R(0x7008)
227 #define LDX_W OPC_3R(0x7010)
228 #define LDX_D OPC_3R(0x7018)
229
230 #define STX_B OPC_3R(0x7020)
231 #define STX_H OPC_3R(0x7028)
232 #define STX_W OPC_3R(0x7030)
233 #define STX_D OPC_3R(0x7038)
234
235 #define LDX_BU OPC_3R(0x7040)
236 #define LDX_HU OPC_3R(0x7048)
237 #define LDX_WU OPC_3R(0x7050)
238
239 #define PRELD OPC_2RI12(0xab)
240
241 /* Atomic memory access instructions */
242 #define LL_W OPC_2RI14(0x20)
243 #define SC_W OPC_2RI14(0x21)
244 #define LL_D OPC_2RI14(0x22)
245 #define SC_D OPC_2RI14(0x23)
246
247 /* LoongArch V1.10 Instructions */
248 #define AMCAS_B OPC_3R(0x70B0)
249 #define AMCAS_H OPC_3R(0x70B1)
250 #define AMCAS_W OPC_3R(0x70B2)
251 #define AMCAS_D OPC_3R(0x70B3)
252
253 /* Other instructions */
254 #define BREAK OPC_3R(0x54)
255 #define DBGCALL OPC_3R(0x55)
256 #define SYSCALL OPC_3R(0x56)
257
258 /* Basic Floating-Point Instructions */
259 /* Floating-Point Arithmetic Operation Instructions */
260 #define FADD_S OPC_3R(0x201)
261 #define FADD_D OPC_3R(0x202)
262 #define FSUB_S OPC_3R(0x205)
263 #define FSUB_D OPC_3R(0x206)
264 #define FMUL_S OPC_3R(0x209)
265 #define FMUL_D OPC_3R(0x20a)
266 #define FDIV_S OPC_3R(0x20d)
267 #define FDIV_D OPC_3R(0x20e)
268 #define FCMP_COND_S OPC_4R(0xc1)
269 #define FCMP_COND_D OPC_4R(0xc2)
270 #define FCOPYSIGN_S OPC_3R(0x225)
271 #define FCOPYSIGN_D OPC_3R(0x226)
272 #define FSEL OPC_4R(0xd0)
273 #define FABS_S OPC_2R(0x4501)
274 #define FABS_D OPC_2R(0x4502)
275 #define FNEG_S OPC_2R(0x4505)
276 #define FNEG_D OPC_2R(0x4506)
277 #define FMOV_S OPC_2R(0x4525)
278 #define FMOV_D OPC_2R(0x4526)
279
280 /* Floating-Point Conversion Instructions */
281 #define FCVT_S_D OPC_2R(0x4646)
282 #define FCVT_D_S OPC_2R(0x4649)
283 #define FTINTRZ_W_S OPC_2R(0x46a1)
284 #define FTINTRZ_W_D OPC_2R(0x46a2)
285 #define FTINTRZ_L_S OPC_2R(0x46a9)
286 #define FTINTRZ_L_D OPC_2R(0x46aa)
287 #define FFINT_S_W OPC_2R(0x4744)
288 #define FFINT_S_L OPC_2R(0x4746)
289 #define FFINT_D_W OPC_2R(0x4748)
290 #define FFINT_D_L OPC_2R(0x474a)
291
292 /* Floating-Point Move Instructions */
293 #define FMOV_S OPC_2R(0x4525)
294 #define FMOV_D OPC_2R(0x4526)
295 #define MOVGR2FR_W OPC_2R(0x4529)
296 #define MOVGR2FR_D OPC_2R(0x452a)
297 #define MOVGR2FRH_W OPC_2R(0x452b)
298 #define MOVFR2GR_S OPC_2R(0x452d)
299 #define MOVFR2GR_D OPC_2R(0x452e)
300 #define MOVFRH2GR_S OPC_2R(0x452f)
301 #define MOVGR2FCSR OPC_2R(0x4530)
302 #define MOVFCSR2GR OPC_2R(0x4532)
303 #define MOVFR2CF OPC_2R(0x4534)
304 #define MOVCF2FR OPC_2R(0x4535)
305 #define MOVGR2CF OPC_2R(0x4536)
306 #define MOVCF2GR OPC_2R(0x4537)
307
308 /* Floating-Point Branch Instructions */
309 #define BCEQZ OPC_I26(0x12)
310 #define BCNEZ OPC_I26(0x12)
311
312 /* Floating-Point Common Memory Access Instructions */
313 #define FLD_S OPC_2RI12(0xac)
314 #define FLD_D OPC_2RI12(0xae)
315 #define FST_S OPC_2RI12(0xad)
316 #define FST_D OPC_2RI12(0xaf)
317
318 #define FLDX_S OPC_3R(0x7060)
319 #define FLDX_D OPC_3R(0x7068)
320 #define FSTX_S OPC_3R(0x7070)
321 #define FSTX_D OPC_3R(0x7078)
322
323 /* Vector Instructions */
324
325 /* Vector Arithmetic Instructions */
326 #define VOR_V OPC_3R(0xe24d)
327 #define VXOR_V OPC_3R(0xe24e)
328 #define VAND_V OPC_3R(0xe24c)
329 #define VMSKLTZ OPC_2R(0x1ca710)
330
331 /* Vector Memory Access Instructions */
332 #define VLD OPC_2RI12(0xb0)
333 #define VST OPC_2RI12(0xb1)
334 #define XVLD OPC_2RI12(0xb2)
335 #define XVST OPC_2RI12(0xb3)
336 #define VSTELM OPC_2RI8(0xc40)
337
338 /* Vector Float Conversion Instructions */
339 #define VFCVTL_D_S OPC_2R(0x1ca77c)
340
341 /* Vector Bit Manipulate Instructions */
342 #define VSLLWIL OPC_2R(0x1cc200)
343
344 /* Vector Move And Shuffle Instructions */
345 #define VLDREPL OPC_2R(0xc0000)
346 #define VINSGR2VR OPC_2R(0x1cbac0)
347 #define VPICKVE2GR_U OPC_2R(0x1cbce0)
348 #define VREPLGR2VR OPC_2R(0x1ca7c0)
349 #define VREPLVE OPC_3R(0xe244)
350 #define VREPLVEI OPC_2R(0x1cbde0)
351 #define XVPERMI OPC_2RI8(0x1dfa)
352
353 #define I12_MAX (0x7ff)
354 #define I12_MIN (-0x800)
355 #define BRANCH16_MAX (0x7fff << 2)
356 #define BRANCH16_MIN (-(0x8000 << 2))
357 #define BRANCH21_MAX (0xfffff << 2)
358 #define BRANCH21_MIN (-(0x100000 << 2))
359 #define JUMP_MAX (0x1ffffff << 2)
360 #define JUMP_MIN (-(0x2000000 << 2))
361 #define JIRL_MAX (0x7fff << 2)
362 #define JIRL_MIN (-(0x8000 << 2))
363
364 #define S32_MAX (0x7fffffffl)
365 #define S32_MIN (-0x80000000l)
366 #define S52_MAX (0x7ffffffffffffl)
367
368 #define INST(inst, type) ((sljit_ins)((type & SLJIT_32) ? inst##_W : inst##_D))
369
370 /* LoongArch CPUCFG register for feature detection */
371 #define LOONGARCH_CFG2 0x02
372 #define LOONGARCH_CFG2_LAMCAS (1 << 28)
373
374 static sljit_u32 cfg2_feature_list = 0;
375
376 /* According to Software Development and Build Convention for LoongArch Architectures,
377 + the status of LSX and LASX extension must be checked through HWCAP */
378 #include <sys/auxv.h>
379
380 #define LOONGARCH_HWCAP_LSX (1 << 4)
381 #define LOONGARCH_HWCAP_LASX (1 << 5)
382
383 static sljit_u32 hwcap_feature_list = 0;
384
385 /* Feature type */
386 #define GET_CFG2 0
387 #define GET_HWCAP 1
388
get_cpu_features(sljit_u32 feature_type)389 static SLJIT_INLINE sljit_u32 get_cpu_features(sljit_u32 feature_type)
390 {
391 if (cfg2_feature_list == 0)
392 __asm__ ("cpucfg %0, %1" : "+&r"(cfg2_feature_list) : "r"(LOONGARCH_CFG2));
393 if (hwcap_feature_list == 0)
394 hwcap_feature_list = (sljit_u32)getauxval(AT_HWCAP);
395
396 return feature_type ? hwcap_feature_list : cfg2_feature_list;
397 }
398
push_inst(struct sljit_compiler * compiler,sljit_ins ins)399 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
400 {
401 sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
402 FAIL_IF(!ptr);
403 *ptr = ins;
404 compiler->size++;
405 return SLJIT_SUCCESS;
406 }
407
detect_jump_type(struct sljit_jump * jump,sljit_ins * code,sljit_sw executable_offset)408 static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code, sljit_sw executable_offset)
409 {
410 sljit_sw diff;
411 sljit_uw target_addr;
412 sljit_ins *inst;
413
414 inst = (sljit_ins *)jump->addr;
415
416 if (jump->flags & SLJIT_REWRITABLE_JUMP)
417 goto exit;
418
419 if (jump->flags & JUMP_ADDR)
420 target_addr = jump->u.target;
421 else {
422 SLJIT_ASSERT(jump->u.label != NULL);
423 target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset;
424 }
425
426 diff = (sljit_sw)target_addr - (sljit_sw)inst - executable_offset;
427
428 if (jump->flags & IS_COND) {
429 diff += SSIZE_OF(ins);
430
431 if (diff >= BRANCH16_MIN && diff <= BRANCH16_MAX) {
432 inst--;
433 inst[0] = (inst[0] & 0xfc0003ff) ^ 0x4000000;
434 jump->flags |= PATCH_B;
435 jump->addr = (sljit_uw)inst;
436 return inst;
437 }
438
439 diff -= SSIZE_OF(ins);
440 }
441
442 if (diff >= JUMP_MIN && diff <= JUMP_MAX) {
443 if (jump->flags & IS_COND) {
444 inst[-1] |= (sljit_ins)IMM_I16(2);
445 }
446
447 jump->flags |= PATCH_J;
448 return inst;
449 }
450
451 if (diff >= S32_MIN && diff <= S32_MAX) {
452 if (jump->flags & IS_COND)
453 inst[-1] |= (sljit_ins)IMM_I16(3);
454
455 jump->flags |= PATCH_REL32;
456 inst[1] = inst[0];
457 return inst + 1;
458 }
459
460 if (target_addr <= (sljit_uw)S32_MAX) {
461 if (jump->flags & IS_COND)
462 inst[-1] |= (sljit_ins)IMM_I16(3);
463
464 jump->flags |= PATCH_ABS32;
465 inst[1] = inst[0];
466 return inst + 1;
467 }
468
469 if (target_addr <= S52_MAX) {
470 if (jump->flags & IS_COND)
471 inst[-1] |= (sljit_ins)IMM_I16(4);
472
473 jump->flags |= PATCH_ABS52;
474 inst[2] = inst[0];
475 return inst + 2;
476 }
477
478 exit:
479 if (jump->flags & IS_COND)
480 inst[-1] |= (sljit_ins)IMM_I16(5);
481 inst[3] = inst[0];
482 return inst + 3;
483 }
484
mov_addr_get_length(struct sljit_jump * jump,sljit_ins * code_ptr,sljit_ins * code,sljit_sw executable_offset)485 static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
486 {
487 sljit_uw addr;
488 sljit_sw diff;
489 SLJIT_UNUSED_ARG(executable_offset);
490
491 SLJIT_ASSERT(jump->flags < ((sljit_uw)6 << JUMP_SIZE_SHIFT));
492 if (jump->flags & JUMP_ADDR)
493 addr = jump->u.target;
494 else
495 addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset);
496
497 diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
498
499 if (diff >= S32_MIN && diff <= S32_MAX) {
500 SLJIT_ASSERT(jump->flags >= ((sljit_uw)1 << JUMP_SIZE_SHIFT));
501 jump->flags |= PATCH_REL32;
502 return 1;
503 }
504
505 if (addr <= S32_MAX) {
506 SLJIT_ASSERT(jump->flags >= ((sljit_uw)1 << JUMP_SIZE_SHIFT));
507 jump->flags |= PATCH_ABS32;
508 return 1;
509 }
510
511 if (addr <= S52_MAX) {
512 SLJIT_ASSERT(jump->flags >= ((sljit_uw)2 << JUMP_SIZE_SHIFT));
513 jump->flags |= PATCH_ABS52;
514 return 2;
515 }
516
517 SLJIT_ASSERT(jump->flags >= ((sljit_uw)3 << JUMP_SIZE_SHIFT));
518 return 3;
519 }
520
load_addr_to_reg(struct sljit_jump * jump,sljit_sw executable_offset)521 static SLJIT_INLINE void load_addr_to_reg(struct sljit_jump *jump, sljit_sw executable_offset)
522 {
523 sljit_uw flags = jump->flags;
524 sljit_uw addr = (flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr;
525 sljit_ins *ins = (sljit_ins*)jump->addr;
526 sljit_u32 reg = (flags & JUMP_MOV_ADDR) ? *ins : TMP_REG1;
527 SLJIT_UNUSED_ARG(executable_offset);
528
529 if (flags & PATCH_REL32) {
530 addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(ins, executable_offset);
531
532 SLJIT_ASSERT((sljit_sw)addr >= S32_MIN && (sljit_sw)addr <= S32_MAX);
533
534 if ((addr & 0x800) != 0)
535 addr += 0x1000;
536
537 ins[0] = PCADDU12I | RD(reg) | IMM_I20(addr);
538
539 if (!(flags & JUMP_MOV_ADDR)) {
540 SLJIT_ASSERT((ins[1] & OPC_2RI16(0x3f)) == JIRL);
541 ins[1] = (ins[1] & (OPC_2RI16(0x3f) | 0x3ff)) | IMM_I16((addr & 0xfff) >> 2);
542 } else
543 ins[1] = ADDI_D | RD(reg) | RJ(reg) | IMM_I12(addr);
544 return;
545 }
546
547 if (flags & PATCH_ABS32) {
548 SLJIT_ASSERT(addr <= S32_MAX);
549 ins[0] = LU12I_W | RD(reg) | (sljit_ins)(((addr & 0xffffffff) >> 12) << 5);
550 } else if (flags & PATCH_ABS52) {
551 ins[0] = LU12I_W | RD(reg) | (sljit_ins)(((addr & 0xffffffff) >> 12) << 5);
552 ins[1] = LU32I_D | RD(reg) | (sljit_ins)(((addr >> 32) & 0xfffff) << 5);
553 ins += 1;
554 } else {
555 ins[0] = LU12I_W | RD(reg) | (sljit_ins)(((addr & 0xffffffff) >> 12) << 5);
556 ins[1] = LU32I_D | RD(reg) | (sljit_ins)(((addr >> 32) & 0xfffff) << 5);
557 ins[2] = LU52I_D | RD(reg) | RJ(reg) | IMM_I12(addr >> 52);
558 ins += 2;
559 }
560
561 if (!(flags & JUMP_MOV_ADDR)) {
562 SLJIT_ASSERT((ins[1] & OPC_2RI16(0x3f)) == JIRL);
563 ins[1] = (ins[1] & (OPC_2RI16(0x3f) | 0x3ff)) | IMM_I16((addr & 0xfff) >> 2);
564 } else
565 ins[1] = ORI | RD(reg) | RJ(reg) | IMM_I12(addr);
566 }
567
reduce_code_size(struct sljit_compiler * compiler)568 static void reduce_code_size(struct sljit_compiler *compiler)
569 {
570 struct sljit_label *label;
571 struct sljit_jump *jump;
572 struct sljit_const *const_;
573 SLJIT_NEXT_DEFINE_TYPES;
574 sljit_uw total_size;
575 sljit_uw size_reduce = 0;
576 sljit_sw diff;
577
578 label = compiler->labels;
579 jump = compiler->jumps;
580 const_ = compiler->consts;
581
582 SLJIT_NEXT_INIT_TYPES();
583
584 while (1) {
585 SLJIT_GET_NEXT_MIN();
586
587 if (next_min_addr == SLJIT_MAX_ADDRESS)
588 break;
589
590 if (next_min_addr == next_label_size) {
591 label->size -= size_reduce;
592
593 label = label->next;
594 next_label_size = SLJIT_GET_NEXT_SIZE(label);
595 }
596
597 if (next_min_addr == next_const_addr) {
598 const_->addr -= size_reduce;
599 const_ = const_->next;
600 next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);
601 continue;
602 }
603
604 if (next_min_addr != next_jump_addr)
605 continue;
606
607 jump->addr -= size_reduce;
608 if (!(jump->flags & JUMP_MOV_ADDR)) {
609 total_size = JUMP_MAX_SIZE;
610
611 if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) {
612 if (jump->flags & JUMP_ADDR) {
613 if (jump->u.target <= S32_MAX)
614 total_size = 2;
615 else if (jump->u.target <= S52_MAX)
616 total_size = 3;
617 } else {
618 /* Unit size: instruction. */
619 diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr;
620
621 if ((jump->flags & IS_COND) && (diff + 1) <= (BRANCH16_MAX / SSIZE_OF(ins)) && (diff + 1) >= (BRANCH16_MIN / SSIZE_OF(ins)))
622 total_size = 0;
623 else if (diff >= (JUMP_MIN / SSIZE_OF(ins)) && diff <= (JUMP_MAX / SSIZE_OF(ins)))
624 total_size = 1;
625 else if (diff >= (S32_MIN / SSIZE_OF(ins)) && diff <= (S32_MAX / SSIZE_OF(ins)))
626 total_size = 2;
627 }
628 }
629
630 size_reduce += JUMP_MAX_SIZE - total_size;
631 jump->flags |= total_size << JUMP_SIZE_SHIFT;
632 } else {
633 total_size = 3;
634
635 if (!(jump->flags & JUMP_ADDR)) {
636 /* Real size minus 1. Unit size: instruction. */
637 diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr;
638
639 if (diff >= (S32_MIN / SSIZE_OF(ins)) && diff <= (S32_MAX / SSIZE_OF(ins)))
640 total_size = 1;
641 } else if (jump->u.target < S32_MAX)
642 total_size = 1;
643 else if (jump->u.target <= S52_MAX)
644 total_size = 2;
645
646 size_reduce += 3 - total_size;
647 jump->flags |= total_size << JUMP_SIZE_SHIFT;
648 }
649
650 jump = jump->next;
651 next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
652 }
653
654 compiler->size -= size_reduce;
655 }
656
sljit_generate_code(struct sljit_compiler * compiler,sljit_s32 options,void * exec_allocator_data)657 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data)
658 {
659 struct sljit_memory_fragment *buf;
660 sljit_ins *code;
661 sljit_ins *code_ptr;
662 sljit_ins *buf_ptr;
663 sljit_ins *buf_end;
664 sljit_uw word_count;
665 SLJIT_NEXT_DEFINE_TYPES;
666 sljit_sw executable_offset;
667 sljit_uw addr;
668
669 struct sljit_label *label;
670 struct sljit_jump *jump;
671 struct sljit_const *const_;
672
673 CHECK_ERROR_PTR();
674 CHECK_PTR(check_sljit_generate_code(compiler));
675
676 reduce_code_size(compiler);
677
678 code = (sljit_ins*)allocate_executable_memory(compiler->size * sizeof(sljit_ins), options, exec_allocator_data, &executable_offset);
679 PTR_FAIL_WITH_EXEC_IF(code);
680
681 reverse_buf(compiler);
682 buf = compiler->buf;
683
684 code_ptr = code;
685 word_count = 0;
686 label = compiler->labels;
687 jump = compiler->jumps;
688 const_ = compiler->consts;
689 SLJIT_NEXT_INIT_TYPES();
690 SLJIT_GET_NEXT_MIN();
691
692 do {
693 buf_ptr = (sljit_ins*)buf->memory;
694 buf_end = buf_ptr + (buf->used_size >> 2);
695 do {
696 *code_ptr = *buf_ptr++;
697 if (next_min_addr == word_count) {
698 SLJIT_ASSERT(!label || label->size >= word_count);
699 SLJIT_ASSERT(!jump || jump->addr >= word_count);
700 SLJIT_ASSERT(!const_ || const_->addr >= word_count);
701
702 /* These structures are ordered by their address. */
703 if (next_min_addr == next_label_size) {
704 label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
705 label->size = (sljit_uw)(code_ptr - code);
706 label = label->next;
707 next_label_size = SLJIT_GET_NEXT_SIZE(label);
708 }
709
710 if (next_min_addr == next_jump_addr) {
711 if (!(jump->flags & JUMP_MOV_ADDR)) {
712 word_count = word_count - 1 + (jump->flags >> JUMP_SIZE_SHIFT);
713 jump->addr = (sljit_uw)code_ptr;
714 code_ptr = detect_jump_type(jump, code, executable_offset);
715 SLJIT_ASSERT((jump->flags & PATCH_B) || ((sljit_uw)code_ptr - jump->addr < (jump->flags >> JUMP_SIZE_SHIFT) * sizeof(sljit_ins)));
716 } else {
717 word_count += jump->flags >> JUMP_SIZE_SHIFT;
718 addr = (sljit_uw)code_ptr;
719 code_ptr += mov_addr_get_length(jump, code_ptr, code, executable_offset);
720 jump->addr = addr;
721 }
722 jump = jump->next;
723 next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
724 } else if (next_min_addr == next_const_addr) {
725 const_->addr = (sljit_uw)code_ptr;
726 const_ = const_->next;
727 next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);
728 }
729
730 SLJIT_GET_NEXT_MIN();
731 }
732 code_ptr++;
733 word_count++;
734 } while (buf_ptr < buf_end);
735
736 buf = buf->next;
737 } while (buf);
738
739 if (label && label->size == word_count) {
740 label->u.addr = (sljit_uw)code_ptr;
741 label->size = (sljit_uw)(code_ptr - code);
742 label = label->next;
743 }
744
745 SLJIT_ASSERT(!label);
746 SLJIT_ASSERT(!jump);
747 SLJIT_ASSERT(!const_);
748 SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
749
750 jump = compiler->jumps;
751 while (jump) {
752 do {
753 if (!(jump->flags & (PATCH_B | PATCH_J)) || (jump->flags & JUMP_MOV_ADDR)) {
754 load_addr_to_reg(jump, executable_offset);
755 break;
756 }
757
758 addr = (jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr;
759 buf_ptr = (sljit_ins *)jump->addr;
760 addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset);
761
762 if (jump->flags & PATCH_B) {
763 SLJIT_ASSERT((sljit_sw)addr >= BRANCH16_MIN && (sljit_sw)addr <= BRANCH16_MAX);
764 buf_ptr[0] |= (sljit_ins)IMM_I16(addr >> 2);
765 break;
766 }
767
768 SLJIT_ASSERT((sljit_sw)addr >= JUMP_MIN && (sljit_sw)addr <= JUMP_MAX);
769 if (jump->flags & IS_CALL)
770 buf_ptr[0] = BL | (sljit_ins)IMM_I26(addr >> 2);
771 else
772 buf_ptr[0] = B | (sljit_ins)IMM_I26(addr >> 2);
773 } while (0);
774 jump = jump->next;
775 }
776
777 compiler->error = SLJIT_ERR_COMPILED;
778 compiler->executable_offset = executable_offset;
779 compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins);
780
781 code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
782 code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
783
784 SLJIT_CACHE_FLUSH(code, code_ptr);
785 SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
786 return code;
787 }
788
sljit_has_cpu_feature(sljit_s32 feature_type)789 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
790 {
791 switch (feature_type)
792 {
793 case SLJIT_HAS_FPU:
794 #ifdef SLJIT_IS_FPU_AVAILABLE
795 return (SLJIT_IS_FPU_AVAILABLE) != 0;
796 #else
797 /* Available by default. */
798 return 1;
799 #endif
800
801 case SLJIT_HAS_LASX:
802 return (LOONGARCH_HWCAP_LASX & get_cpu_features(GET_HWCAP));
803
804 case SLJIT_HAS_SIMD:
805 return (LOONGARCH_HWCAP_LSX & get_cpu_features(GET_HWCAP));
806
807 case SLJIT_HAS_ATOMIC:
808 return (LOONGARCH_CFG2_LAMCAS & get_cpu_features(GET_CFG2));
809
810 case SLJIT_HAS_CLZ:
811 case SLJIT_HAS_CTZ:
812 case SLJIT_HAS_REV:
813 case SLJIT_HAS_ROT:
814 case SLJIT_HAS_PREFETCH:
815 case SLJIT_HAS_COPY_F32:
816 case SLJIT_HAS_COPY_F64:
817 return 1;
818
819 default:
820 return 0;
821 }
822 }
823
sljit_cmp_info(sljit_s32 type)824 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
825 {
826 SLJIT_UNUSED_ARG(type);
827
828 return 0;
829 }
830
831 /* --------------------------------------------------------------------- */
832 /* Entry, exit */
833 /* --------------------------------------------------------------------- */
834
835 /* Creates an index in data_transfer_insts array. */
836 #define LOAD_DATA 0x01
837 #define WORD_DATA 0x00
838 #define BYTE_DATA 0x02
839 #define HALF_DATA 0x04
840 #define INT_DATA 0x06
841 #define SIGNED_DATA 0x08
842 /* Separates integer and floating point registers */
843 #define GPR_REG 0x0f
844 #define DOUBLE_DATA 0x10
845 #define SINGLE_DATA 0x12
846
847 #define MEM_MASK 0x1f
848
849 #define ARG_TEST 0x00020
850 #define ALT_KEEP_CACHE 0x00040
851 #define CUMULATIVE_OP 0x00080
852 #define IMM_OP 0x00100
853 #define MOVE_OP 0x00200
854 #define SRC2_IMM 0x00400
855
856 #define UNUSED_DEST 0x00800
857 #define REG_DEST 0x01000
858 #define REG1_SOURCE 0x02000
859 #define REG2_SOURCE 0x04000
860 #define SLOW_SRC1 0x08000
861 #define SLOW_SRC2 0x10000
862 #define SLOW_DEST 0x20000
863 #define MEM_USE_TMP2 0x40000
864
865 #define STACK_STORE ST_D
866 #define STACK_LOAD LD_D
867
load_immediate(struct sljit_compiler * compiler,sljit_s32 dst_r,sljit_sw imm)868 static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r, sljit_sw imm)
869 {
870 if (imm <= I12_MAX && imm >= I12_MIN)
871 return push_inst(compiler, ADDI_D | RD(dst_r) | RJ(TMP_ZERO) | IMM_I12(imm));
872
873 if (imm <= 0x7fffffffl && imm >= -0x80000000l) {
874 FAIL_IF(push_inst(compiler, LU12I_W | RD(dst_r) | (sljit_ins)(((imm & 0xffffffff) >> 12) << 5)));
875 return push_inst(compiler, ORI | RD(dst_r) | RJ(dst_r) | IMM_I12(imm));
876 } else if (imm <= 0x7ffffffffffffl && imm >= -0x8000000000000l) {
877 FAIL_IF(push_inst(compiler, LU12I_W | RD(dst_r) | (sljit_ins)(((imm & 0xffffffff) >> 12) << 5)));
878 FAIL_IF(push_inst(compiler, ORI | RD(dst_r) | RJ(dst_r) | IMM_I12(imm)));
879 return push_inst(compiler, LU32I_D | RD(dst_r) | (sljit_ins)(((imm >> 32) & 0xfffff) << 5));
880 }
881 FAIL_IF(push_inst(compiler, LU12I_W | RD(dst_r) | (sljit_ins)(((imm & 0xffffffff) >> 12) << 5)));
882 FAIL_IF(push_inst(compiler, ORI | RD(dst_r) | RJ(dst_r) | IMM_I12(imm)));
883 FAIL_IF(push_inst(compiler, LU32I_D | RD(dst_r) | (sljit_ins)(((imm >> 32) & 0xfffff) << 5)));
884 return push_inst(compiler, LU52I_D | RD(dst_r) | RJ(dst_r) | IMM_I12(imm >> 52));
885 }
886
887 #define STACK_MAX_DISTANCE (-I12_MIN)
888
889 static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw);
890
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)891 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
892 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
893 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
894 {
895 sljit_s32 i, tmp, offset;
896 sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
897
898 CHECK_ERROR();
899 CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
900 set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
901
902 local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1);
903 local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);
904
905 local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf;
906 compiler->local_size = local_size;
907
908 if (local_size <= STACK_MAX_DISTANCE) {
909 /* Frequent case. */
910 FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(-local_size)));
911 offset = local_size - SSIZE_OF(sw);
912 local_size = 0;
913 } else {
914 FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(STACK_MAX_DISTANCE)));
915 local_size -= STACK_MAX_DISTANCE;
916
917 if (local_size > STACK_MAX_DISTANCE)
918 FAIL_IF(load_immediate(compiler, TMP_REG1, local_size));
919 offset = STACK_MAX_DISTANCE - SSIZE_OF(sw);
920 }
921
922 FAIL_IF(push_inst(compiler, STACK_STORE | RD(RETURN_ADDR_REG) | RJ(SLJIT_SP) | IMM_I12(offset)));
923
924 tmp = SLJIT_S0 - saveds;
925 for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) {
926 offset -= SSIZE_OF(sw);
927 FAIL_IF(push_inst(compiler, STACK_STORE | RD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
928 }
929
930 for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
931 offset -= SSIZE_OF(sw);
932 FAIL_IF(push_inst(compiler, STACK_STORE | RD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
933 }
934
935 tmp = SLJIT_FS0 - fsaveds;
936 for (i = SLJIT_FS0; i > tmp; i--) {
937 offset -= SSIZE_OF(f64);
938 FAIL_IF(push_inst(compiler, FST_D | FRD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
939 }
940
941 for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
942 offset -= SSIZE_OF(f64);
943 FAIL_IF(push_inst(compiler, FST_D | FRD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
944 }
945
946 if (local_size > STACK_MAX_DISTANCE)
947 FAIL_IF(push_inst(compiler, SUB_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | RK(TMP_REG1)));
948 else if (local_size > 0)
949 FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(-local_size)));
950
951 if (options & SLJIT_ENTER_REG_ARG)
952 return SLJIT_SUCCESS;
953
954 arg_types >>= SLJIT_ARG_SHIFT;
955 saved_arg_count = 0;
956 tmp = SLJIT_R0;
957
958 while (arg_types > 0) {
959 if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
960 if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
961 FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_S0 - saved_arg_count) | RJ(tmp) | IMM_I12(0)));
962 saved_arg_count++;
963 }
964 tmp++;
965 }
966
967 arg_types >>= SLJIT_ARG_SHIFT;
968 }
969
970 return SLJIT_SUCCESS;
971 }
972
973 #undef STACK_MAX_DISTANCE
974
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)975 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
976 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
977 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
978 {
979 CHECK_ERROR();
980 CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
981 set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
982
983 local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1);
984 local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);
985
986 compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf;
987
988 return SLJIT_SUCCESS;
989 }
990
991 #define STACK_MAX_DISTANCE (-I12_MIN - 16)
992
emit_stack_frame_release(struct sljit_compiler * compiler,sljit_s32 is_return_to)993 static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to)
994 {
995 sljit_s32 i, tmp, offset;
996 sljit_s32 local_size = compiler->local_size;
997
998 if (local_size > STACK_MAX_DISTANCE) {
999 local_size -= STACK_MAX_DISTANCE;
1000
1001 if (local_size > STACK_MAX_DISTANCE) {
1002 FAIL_IF(load_immediate(compiler, TMP_REG2, local_size));
1003 FAIL_IF(push_inst(compiler, ADD_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | RK(TMP_REG2)));
1004 } else
1005 FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(local_size)));
1006
1007 local_size = STACK_MAX_DISTANCE;
1008 }
1009
1010 SLJIT_ASSERT(local_size > 0);
1011
1012 offset = local_size - SSIZE_OF(sw);
1013 if (!is_return_to)
1014 FAIL_IF(push_inst(compiler, STACK_LOAD | RD(RETURN_ADDR_REG) | RJ(SLJIT_SP) | IMM_I12(offset)));
1015
1016 tmp = SLJIT_S0 - compiler->saveds;
1017 for (i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options); i > tmp; i--) {
1018 offset -= SSIZE_OF(sw);
1019 FAIL_IF(push_inst(compiler, STACK_LOAD | RD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
1020 }
1021
1022 for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
1023 offset -= SSIZE_OF(sw);
1024 FAIL_IF(push_inst(compiler, STACK_LOAD | RD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
1025 }
1026
1027 tmp = SLJIT_FS0 - compiler->fsaveds;
1028 for (i = SLJIT_FS0; i > tmp; i--) {
1029 offset -= SSIZE_OF(f64);
1030 FAIL_IF(push_inst(compiler, FLD_D | FRD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
1031 }
1032
1033 for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
1034 offset -= SSIZE_OF(f64);
1035 FAIL_IF(push_inst(compiler, FLD_D | FRD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
1036 }
1037
1038 return push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(local_size));
1039 }
1040
1041 #undef STACK_MAX_DISTANCE
1042
sljit_emit_return_void(struct sljit_compiler * compiler)1043 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
1044 {
1045 CHECK_ERROR();
1046 CHECK(check_sljit_emit_return_void(compiler));
1047
1048 FAIL_IF(emit_stack_frame_release(compiler, 0));
1049 return push_inst(compiler, JIRL | RD(TMP_ZERO) | RJ(RETURN_ADDR_REG) | IMM_I12(0));
1050 }
1051
sljit_emit_return_to(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)1052 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
1053 sljit_s32 src, sljit_sw srcw)
1054 {
1055 CHECK_ERROR();
1056 CHECK(check_sljit_emit_return_to(compiler, src, srcw));
1057
1058 if (src & SLJIT_MEM) {
1059 ADJUST_LOCAL_OFFSET(src, srcw);
1060 FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw));
1061 src = TMP_REG1;
1062 srcw = 0;
1063 } else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
1064 FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(src) | IMM_I12(0)));
1065 src = TMP_REG1;
1066 srcw = 0;
1067 }
1068
1069 FAIL_IF(emit_stack_frame_release(compiler, 1));
1070
1071 SLJIT_SKIP_CHECKS(compiler);
1072 return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1073 }
1074
1075 /* --------------------------------------------------------------------- */
1076 /* Operators */
1077 /* --------------------------------------------------------------------- */
1078
1079 static const sljit_ins data_transfer_insts[16 + 4] = {
1080 /* u w s */ ST_D /* st.d */,
1081 /* u w l */ LD_D /* ld.d */,
1082 /* u b s */ ST_B /* st.b */,
1083 /* u b l */ LD_BU /* ld.bu */,
1084 /* u h s */ ST_H /* st.h */,
1085 /* u h l */ LD_HU /* ld.hu */,
1086 /* u i s */ ST_W /* st.w */,
1087 /* u i l */ LD_WU /* ld.wu */,
1088
1089 /* s w s */ ST_D /* st.d */,
1090 /* s w l */ LD_D /* ld.d */,
1091 /* s b s */ ST_B /* st.b */,
1092 /* s b l */ LD_B /* ld.b */,
1093 /* s h s */ ST_H /* st.h */,
1094 /* s h l */ LD_H /* ld.h */,
1095 /* s i s */ ST_W /* st.w */,
1096 /* s i l */ LD_W /* ld.w */,
1097
1098 /* d s */ FST_D /* fst.d */,
1099 /* d l */ FLD_D /* fld.d */,
1100 /* s s */ FST_S /* fst.s */,
1101 /* s l */ FLD_S /* fld.s */,
1102 };
1103
1104 static const sljit_ins data_transfer_insts_x[16 + 4] = {
1105 /* u w s */ STX_D /* stx.d */,
1106 /* u w l */ LDX_D /* ldx.d */,
1107 /* u b s */ STX_B /* stx.b */,
1108 /* u b l */ LDX_BU /* ldx.bu */,
1109 /* u h s */ STX_H /* stx.h */,
1110 /* u h l */ LDX_HU /* ldx.hu */,
1111 /* u i s */ STX_W /* stx.w */,
1112 /* u i l */ LDX_WU /* ldx.wu */,
1113
1114 /* s w s */ STX_D /* stx.d */,
1115 /* s w l */ LDX_D /* ldx.d */,
1116 /* s b s */ STX_B /* stx.b */,
1117 /* s b l */ LDX_B /* ldx.b */,
1118 /* s h s */ STX_H /* stx.h */,
1119 /* s h l */ LDX_H /* ldx.h */,
1120 /* s i s */ STX_W /* stx.w */,
1121 /* s i l */ LDX_W /* ldx.w */,
1122
1123 /* d s */ FSTX_D /* fstx.d */,
1124 /* d l */ FLDX_D /* fldx.d */,
1125 /* s s */ FSTX_S /* fstx.s */,
1126 /* s l */ FLDX_S /* fldx.s */,
1127 };
1128
push_mem_inst(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw)1129 static sljit_s32 push_mem_inst(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
1130 {
1131 sljit_ins ins;
1132 sljit_s32 base = arg & REG_MASK;
1133
1134 SLJIT_ASSERT(arg & SLJIT_MEM);
1135
1136 if (arg & OFFS_REG_MASK) {
1137 sljit_s32 offs = OFFS_REG(arg);
1138
1139 SLJIT_ASSERT(!argw);
1140 ins = data_transfer_insts_x[flags & MEM_MASK] |
1141 ((flags & MEM_MASK) <= GPR_REG ? RD(reg) : FRD(reg)) |
1142 RJ(base) | RK(offs);
1143 } else {
1144 SLJIT_ASSERT(argw <= 0xfff && argw >= I12_MIN);
1145
1146 ins = data_transfer_insts[flags & MEM_MASK] |
1147 ((flags & MEM_MASK) <= GPR_REG ? RD(reg) : FRD(reg)) |
1148 RJ(base) | IMM_I12(argw);
1149 }
1150 return push_inst(compiler, ins);
1151 }
1152
1153 /* Can perform an operation using at most 1 instruction. */
getput_arg_fast(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw)1154 static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
1155 {
1156 SLJIT_ASSERT(arg & SLJIT_MEM);
1157
1158 /* argw == 0 (ldx/stx rd, rj, rk) can be used.
1159 * argw in [-2048, 2047] (ld/st rd, rj, imm) can be used. */
1160 if (!argw || (!(arg & OFFS_REG_MASK) && (argw <= I12_MAX && argw >= I12_MIN))) {
1161 /* Works for both absolute and relative addresses. */
1162 if (SLJIT_UNLIKELY(flags & ARG_TEST))
1163 return 1;
1164
1165 FAIL_IF(push_mem_inst(compiler, flags, reg, arg, argw));
1166 return -1;
1167 }
1168 return 0;
1169 }
1170
1171 #define TO_ARGW_HI(argw) (((argw) & ~0xfff) + (((argw) & 0x800) ? 0x1000 : 0))
1172
1173 /* See getput_arg below.
1174 Note: can_cache is called only for binary operators. */
can_cache(sljit_s32 arg,sljit_sw argw,sljit_s32 next_arg,sljit_sw next_argw)1175 static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
1176 {
1177 SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM));
1178
1179 if (arg & OFFS_REG_MASK)
1180 return 0;
1181
1182 if (arg == next_arg) {
1183 if (((next_argw - argw) <= I12_MAX && (next_argw - argw) >= I12_MIN)
1184 || TO_ARGW_HI(argw) == TO_ARGW_HI(next_argw))
1185 return 1;
1186 return 0;
1187 }
1188
1189 return 0;
1190 }
1191
1192 /* Emit the necessary instructions. See can_cache above. */
getput_arg(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw,sljit_s32 next_arg,sljit_sw next_argw)1193 static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
1194 {
1195 sljit_s32 base = arg & REG_MASK;
1196 sljit_s32 tmp_r = (flags & MEM_USE_TMP2) ? TMP_REG2 : TMP_REG1;
1197 sljit_sw offset;
1198
1199 SLJIT_ASSERT(arg & SLJIT_MEM);
1200 if (!(next_arg & SLJIT_MEM)) {
1201 next_arg = 0;
1202 next_argw = 0;
1203 }
1204
1205 if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
1206 argw &= 0x3;
1207
1208 if (SLJIT_UNLIKELY(argw))
1209 FAIL_IF(push_inst(compiler, SLLI_D | RD(TMP_REG3) | RJ(OFFS_REG(arg)) | IMM_I12(argw)));
1210 return push_mem_inst(compiler, flags, reg, SLJIT_MEM2(base, TMP_REG3), 0);
1211 }
1212
1213 if (compiler->cache_arg == arg && argw - compiler->cache_argw <= I12_MAX && argw - compiler->cache_argw >= I12_MIN)
1214 return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(TMP_REG3), argw - compiler->cache_argw);
1215
1216 if (compiler->cache_arg == SLJIT_MEM && (argw - compiler->cache_argw <= I12_MAX) && (argw - compiler->cache_argw >= I12_MIN)) {
1217 offset = argw - compiler->cache_argw;
1218 } else {
1219 sljit_sw argw_hi=TO_ARGW_HI(argw);
1220 compiler->cache_arg = SLJIT_MEM;
1221
1222 if (next_arg && next_argw - argw <= I12_MAX && next_argw - argw >= I12_MIN && argw_hi != TO_ARGW_HI(next_argw)) {
1223 FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1224 compiler->cache_argw = argw;
1225 offset = 0;
1226 } else {
1227 FAIL_IF(load_immediate(compiler, TMP_REG3, argw_hi));
1228 compiler->cache_argw = argw_hi;
1229 offset = argw & 0xfff;
1230 argw = argw_hi;
1231 }
1232 }
1233
1234 if (!base)
1235 return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(TMP_REG3), offset);
1236
1237 if (arg == next_arg && next_argw - argw <= I12_MAX && next_argw - argw >= I12_MIN) {
1238 compiler->cache_arg = arg;
1239 FAIL_IF(push_inst(compiler, ADD_D | RD(TMP_REG3) | RJ(TMP_REG3) | RK(base)));
1240 return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(TMP_REG3), offset);
1241 }
1242
1243 if (!offset)
1244 return push_mem_inst(compiler, flags, reg, SLJIT_MEM2(base, TMP_REG3), 0);
1245
1246 FAIL_IF(push_inst(compiler, ADD_D | RD(tmp_r) | RJ(TMP_REG3) | RK(base)));
1247 return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(tmp_r), offset);
1248 }
1249
emit_op_mem(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw)1250 static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
1251 {
1252 sljit_s32 base = arg & REG_MASK;
1253 sljit_s32 tmp_r = TMP_REG1;
1254
1255 if (getput_arg_fast(compiler, flags, reg, arg, argw))
1256 return compiler->error;
1257
1258 if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA))
1259 tmp_r = reg;
1260
1261 if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
1262 argw &= 0x3;
1263
1264 if (SLJIT_UNLIKELY(argw))
1265 FAIL_IF(push_inst(compiler, SLLI_D | RD(tmp_r) | RJ(OFFS_REG(arg)) | IMM_I12(argw)));
1266 return push_mem_inst(compiler, flags, reg, SLJIT_MEM2(base, tmp_r), 0);
1267 } else {
1268 FAIL_IF(load_immediate(compiler, tmp_r, argw));
1269
1270 if (base != 0)
1271 return push_mem_inst(compiler, flags, reg, SLJIT_MEM2(base, tmp_r), 0);
1272 return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(tmp_r), 0);
1273 }
1274 }
1275
emit_op_mem2(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg1,sljit_sw arg1w,sljit_s32 arg2,sljit_sw arg2w)1276 static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w)
1277 {
1278 if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
1279 return compiler->error;
1280 return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
1281 }
1282
1283 #define IMM_EXTEND(v) (IMM_I12((op & SLJIT_32) ? (v) : (32 + (v))))
1284
1285 /* andi/ori/xori are zero-extended */
1286 #define EMIT_LOGICAL(op_imm, op_reg) \
1287 if (flags & SRC2_IMM) { \
1288 if (op & SLJIT_SET_Z) {\
1289 FAIL_IF(push_inst(compiler, ADDI_D | RD(EQUAL_FLAG) | RJ(TMP_ZERO) | IMM_I12(src2))); \
1290 FAIL_IF(push_inst(compiler, op_reg | RD(EQUAL_FLAG) | RJ(src1) | RK(EQUAL_FLAG))); \
1291 } \
1292 if (!(flags & UNUSED_DEST)) { \
1293 if (dst == src1) { \
1294 FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(TMP_ZERO) | IMM_I12(src2))); \
1295 FAIL_IF(push_inst(compiler, op_reg | RD(dst) | RJ(src1) | RK(TMP_REG1))); \
1296 } else { \
1297 FAIL_IF(push_inst(compiler, ADDI_D | RD(dst) | RJ(TMP_ZERO) | IMM_I12(src2))); \
1298 FAIL_IF(push_inst(compiler, op_reg | RD(dst) | RJ(src1) | RK(dst))); \
1299 } \
1300 } \
1301 } else { \
1302 if (op & SLJIT_SET_Z) \
1303 FAIL_IF(push_inst(compiler, op_reg | RD(EQUAL_FLAG) | RJ(src1) | RK(src2))); \
1304 if (!(flags & UNUSED_DEST)) \
1305 FAIL_IF(push_inst(compiler, op_reg | RD(dst) | RJ(src1) | RK(src2))); \
1306 } \
1307 while (0)
1308
1309 #define EMIT_SHIFT(imm, reg) \
1310 op_imm = (imm); \
1311 op_reg = (reg)
1312
emit_single_op(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 flags,sljit_s32 dst,sljit_s32 src1,sljit_sw src2)1313 static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
1314 sljit_s32 dst, sljit_s32 src1, sljit_sw src2)
1315 {
1316 sljit_s32 is_overflow, is_carry, carry_src_r, is_handled, reg;
1317 sljit_ins op_imm, op_reg;
1318 sljit_ins word_size = ((op & SLJIT_32) ? 32 : 64);
1319
1320 switch (GET_OPCODE(op)) {
1321 case SLJIT_MOV:
1322 SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1323 if (dst != src2)
1324 return push_inst(compiler, INST(ADD, op) | RD(dst) | RJ(src2) | IMM_I12(0));
1325 return SLJIT_SUCCESS;
1326
1327 case SLJIT_MOV_U8:
1328 SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1329 if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1330 return push_inst(compiler, ANDI | RD(dst) | RJ(src2) | IMM_I12(0xff));
1331 SLJIT_ASSERT(dst == src2);
1332 return SLJIT_SUCCESS;
1333
1334 case SLJIT_MOV_S8:
1335 SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1336 if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1337 return push_inst(compiler, EXT_W_B | RD(dst) | RJ(src2));
1338 SLJIT_ASSERT(dst == src2);
1339 return SLJIT_SUCCESS;
1340
1341 case SLJIT_MOV_U16:
1342 SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1343 if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1344 return push_inst(compiler, INST(BSTRPICK, op) | RD(dst) | RJ(src2) | (15 << 16));
1345 SLJIT_ASSERT(dst == src2);
1346 return SLJIT_SUCCESS;
1347
1348 case SLJIT_MOV_S16:
1349 SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1350 if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1351 return push_inst(compiler, EXT_W_H | RD(dst) | RJ(src2));
1352 SLJIT_ASSERT(dst == src2);
1353 return SLJIT_SUCCESS;
1354
1355 case SLJIT_MOV_U32:
1356 SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1357 if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1358 return push_inst(compiler, BSTRPICK_D | RD(dst) | RJ(src2) | (31 << 16));
1359 SLJIT_ASSERT(dst == src2);
1360 return SLJIT_SUCCESS;
1361
1362 case SLJIT_MOV_S32:
1363 SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1364 if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1365 return push_inst(compiler, SLLI_W | RD(dst) | RJ(src2) | IMM_I12(0));
1366 SLJIT_ASSERT(dst == src2);
1367 return SLJIT_SUCCESS;
1368
1369 case SLJIT_CLZ:
1370 SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1371 return push_inst(compiler, INST(CLZ, op) | RD(dst) | RJ(src2));
1372
1373 case SLJIT_CTZ:
1374 SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1375 return push_inst(compiler, INST(CTZ, op) | RD(dst) | RJ(src2));
1376
1377 case SLJIT_REV:
1378 SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1379 return push_inst(compiler, ((op & SLJIT_32) ? REVB_2W : REVB_D) | RD(dst) | RJ(src2));
1380
1381 case SLJIT_REV_S16:
1382 SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1383 FAIL_IF(push_inst(compiler, REVB_2H | RD(dst) | RJ(src2)));
1384 return push_inst(compiler, EXT_W_H | RD(dst) | RJ(dst));
1385
1386 case SLJIT_REV_U16:
1387 SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1388 FAIL_IF(push_inst(compiler, REVB_2H | RD(dst) | RJ(src2)));
1389 return push_inst(compiler, INST(BSTRPICK, op) | RD(dst) | RJ(dst) | (15 << 16));
1390
1391 case SLJIT_REV_S32:
1392 SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM) && dst != TMP_REG1);
1393 FAIL_IF(push_inst(compiler, REVB_2W | RD(dst) | RJ(src2)));
1394 return push_inst(compiler, SLLI_W | RD(dst) | RJ(dst) | IMM_I12(0));
1395
1396 case SLJIT_REV_U32:
1397 SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM) && dst != TMP_REG1);
1398 FAIL_IF(push_inst(compiler, REVB_2W | RD(dst) | RJ(src2)));
1399 return push_inst(compiler, BSTRPICK_D | RD(dst) | RJ(dst) | (31 << 16));
1400
1401 case SLJIT_ADD:
1402 /* Overflow computation (both add and sub): overflow = src1_sign ^ src2_sign ^ result_sign ^ carry_flag */
1403 is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;
1404 carry_src_r = GET_FLAG_TYPE(op) == SLJIT_CARRY;
1405
1406 if (flags & SRC2_IMM) {
1407 if (is_overflow) {
1408 if (src2 >= 0)
1409 FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(0)));
1410 else {
1411 FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(TMP_ZERO) | IMM_I12(-1)));
1412 FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(EQUAL_FLAG)));
1413 }
1414 } else if (op & SLJIT_SET_Z)
1415 FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(src2)));
1416
1417 /* Only the zero flag is needed. */
1418 if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
1419 FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(src2)));
1420 } else {
1421 if (is_overflow)
1422 FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1423 else if (op & SLJIT_SET_Z)
1424 FAIL_IF(push_inst(compiler, INST(ADD, op) | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1425
1426 if (is_overflow || carry_src_r != 0) {
1427 if (src1 != dst)
1428 carry_src_r = (sljit_s32)src1;
1429 else if (src2 != dst)
1430 carry_src_r = (sljit_s32)src2;
1431 else {
1432 FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(0)));
1433 carry_src_r = OTHER_FLAG;
1434 }
1435 }
1436
1437 /* Only the zero flag is needed. */
1438 if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
1439 FAIL_IF(push_inst(compiler, INST(ADD, op) | RD(dst) | RJ(src1) | RK(src2)));
1440 }
1441
1442 /* Carry is zero if a + b >= a or a + b >= b, otherwise it is 1. */
1443 if (is_overflow || carry_src_r != 0) {
1444 if (flags & SRC2_IMM)
1445 FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RJ(dst) | IMM_I12(src2)));
1446 else
1447 FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(dst) | RK(carry_src_r)));
1448 }
1449
1450 if (!is_overflow)
1451 return SLJIT_SUCCESS;
1452
1453 FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RJ(dst) | RK(EQUAL_FLAG)));
1454 if (op & SLJIT_SET_Z)
1455 FAIL_IF(push_inst(compiler, INST(ADD, op) | RD(EQUAL_FLAG) | RJ(dst) | IMM_I12(0)));
1456 FAIL_IF(push_inst(compiler, INST(SRLI, op) | RD(TMP_REG1) | RJ(TMP_REG1) | IMM_EXTEND(31)));
1457 return push_inst(compiler, XOR | RD(OTHER_FLAG) | RJ(TMP_REG1) | RK(OTHER_FLAG));
1458
1459 case SLJIT_ADDC:
1460 carry_src_r = GET_FLAG_TYPE(op) == SLJIT_CARRY;
1461
1462 if (flags & SRC2_IMM) {
1463 FAIL_IF(push_inst(compiler, ADDI_D | RD(dst) | RJ(src1) | IMM_I12(src2)));
1464 } else {
1465 if (carry_src_r != 0) {
1466 if (src1 != dst)
1467 carry_src_r = (sljit_s32)src1;
1468 else if (src2 != dst)
1469 carry_src_r = (sljit_s32)src2;
1470 else {
1471 FAIL_IF(push_inst(compiler, ADDI_D | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(0)));
1472 carry_src_r = EQUAL_FLAG;
1473 }
1474 }
1475
1476 FAIL_IF(push_inst(compiler, ADD_D | RD(dst) | RJ(src1) | RK(src2)));
1477 }
1478
1479 /* Carry is zero if a + b >= a or a + b >= b, otherwise it is 1. */
1480 if (carry_src_r != 0) {
1481 if (flags & SRC2_IMM)
1482 FAIL_IF(push_inst(compiler, SLTUI | RD(EQUAL_FLAG) | RJ(dst) | IMM_I12(src2)));
1483 else
1484 FAIL_IF(push_inst(compiler, SLTU | RD(EQUAL_FLAG) | RJ(dst) | RK(carry_src_r)));
1485 }
1486
1487 FAIL_IF(push_inst(compiler, ADD_D | RD(dst) | RJ(dst) | RK(OTHER_FLAG)));
1488
1489 if (carry_src_r == 0)
1490 return SLJIT_SUCCESS;
1491
1492 /* Set ULESS_FLAG (dst == 0) && (OTHER_FLAG == 1). */
1493 FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(dst) | RK(OTHER_FLAG)));
1494 /* Set carry flag. */
1495 return push_inst(compiler, OR | RD(OTHER_FLAG) | RJ(OTHER_FLAG) | RK(EQUAL_FLAG));
1496
1497 case SLJIT_SUB:
1498 if ((flags & SRC2_IMM) && src2 == I12_MIN) {
1499 FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG2) | RJ(TMP_ZERO) | IMM_I12(src2)));
1500 src2 = TMP_REG2;
1501 flags &= ~SRC2_IMM;
1502 }
1503
1504 is_handled = 0;
1505
1506 if (flags & SRC2_IMM) {
1507 if (GET_FLAG_TYPE(op) == SLJIT_LESS) {
1508 FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(src2)));
1509 is_handled = 1;
1510 } else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS) {
1511 FAIL_IF(push_inst(compiler, SLTI | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(src2)));
1512 is_handled = 1;
1513 }
1514 }
1515
1516 if (!is_handled && GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) {
1517 is_handled = 1;
1518
1519 if (flags & SRC2_IMM) {
1520 reg = (src1 == TMP_REG1) ? TMP_REG2 : TMP_REG1;
1521 FAIL_IF(push_inst(compiler, ADDI_D | RD(reg) | RJ(TMP_ZERO) | IMM_I12(src2)));
1522 src2 = reg;
1523 flags &= ~SRC2_IMM;
1524 }
1525
1526 switch (GET_FLAG_TYPE(op)) {
1527 case SLJIT_LESS:
1528 FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(src1) | RK(src2)));
1529 break;
1530 case SLJIT_GREATER:
1531 FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(src2) | RK(src1)));
1532 break;
1533 case SLJIT_SIG_LESS:
1534 FAIL_IF(push_inst(compiler, SLT | RD(OTHER_FLAG) | RJ(src1) | RK(src2)));
1535 break;
1536 case SLJIT_SIG_GREATER:
1537 FAIL_IF(push_inst(compiler, SLT | RD(OTHER_FLAG) | RJ(src2) | RK(src1)));
1538 break;
1539 }
1540 }
1541
1542 if (is_handled) {
1543 if (flags & SRC2_IMM) {
1544 if (op & SLJIT_SET_Z)
1545 FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(-src2)));
1546 if (!(flags & UNUSED_DEST))
1547 return push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(-src2));
1548 } else {
1549 if (op & SLJIT_SET_Z)
1550 FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1551 if (!(flags & UNUSED_DEST))
1552 return push_inst(compiler, INST(SUB, op) | RD(dst) | RJ(src1) | RK(src2));
1553 }
1554 return SLJIT_SUCCESS;
1555 }
1556
1557 is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;
1558 is_carry = GET_FLAG_TYPE(op) == SLJIT_CARRY;
1559
1560 if (flags & SRC2_IMM) {
1561 if (is_overflow) {
1562 if (src2 >= 0)
1563 FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(0)));
1564 else {
1565 FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(-1)));
1566 FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(EQUAL_FLAG)));
1567 }
1568 } else if (op & SLJIT_SET_Z)
1569 FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(-src2)));
1570
1571 if (is_overflow || is_carry)
1572 FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(src2)));
1573
1574 /* Only the zero flag is needed. */
1575 if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
1576 FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(-src2)));
1577 } else {
1578 if (is_overflow)
1579 FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1580 else if (op & SLJIT_SET_Z)
1581 FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1582
1583 if (is_overflow || is_carry)
1584 FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(src1) | RK(src2)));
1585
1586 /* Only the zero flag is needed. */
1587 if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
1588 FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(dst) | RJ(src1) | RK(src2)));
1589 }
1590
1591 if (!is_overflow)
1592 return SLJIT_SUCCESS;
1593
1594 FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RJ(dst) | RK(EQUAL_FLAG)));
1595 if (op & SLJIT_SET_Z)
1596 FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(dst) | IMM_I12(0)));
1597 FAIL_IF(push_inst(compiler, INST(SRLI, op) | RD(TMP_REG1) | RJ(TMP_REG1) | IMM_EXTEND(31)));
1598 return push_inst(compiler, XOR | RD(OTHER_FLAG) | RJ(TMP_REG1) | RK(OTHER_FLAG));
1599
1600 case SLJIT_SUBC:
1601 if ((flags & SRC2_IMM) && src2 == I12_MIN) {
1602 FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(TMP_REG2) | RJ(TMP_ZERO) | IMM_I12(src2)));
1603 src2 = TMP_REG2;
1604 flags &= ~SRC2_IMM;
1605 }
1606
1607 is_carry = GET_FLAG_TYPE(op) == SLJIT_CARRY;
1608
1609 if (flags & SRC2_IMM) {
1610 if (is_carry)
1611 FAIL_IF(push_inst(compiler, SLTUI | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(src2)));
1612
1613 FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(-src2)));
1614 } else {
1615 if (is_carry)
1616 FAIL_IF(push_inst(compiler, SLTU | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1617
1618 FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(dst) | RJ(src1) | RK(src2)));
1619 }
1620
1621 if (is_carry)
1622 FAIL_IF(push_inst(compiler, SLTU | RD(TMP_REG1) | RJ(dst) | RK(OTHER_FLAG)));
1623
1624 FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(dst) | RJ(dst) | RK(OTHER_FLAG)));
1625
1626 if (!is_carry)
1627 return SLJIT_SUCCESS;
1628
1629 return push_inst(compiler, OR | RD(OTHER_FLAG) | RJ(EQUAL_FLAG) | RK(TMP_REG1));
1630
1631 case SLJIT_MUL:
1632 SLJIT_ASSERT(!(flags & SRC2_IMM));
1633
1634 if (GET_FLAG_TYPE(op) != SLJIT_OVERFLOW)
1635 return push_inst(compiler, INST(MUL, op) | RD(dst) | RJ(src1) | RK(src2));
1636
1637 if (op & SLJIT_32) {
1638 FAIL_IF(push_inst(compiler, MUL_D | RD(OTHER_FLAG) | RJ(src1) | RK(src2)));
1639 FAIL_IF(push_inst(compiler, MUL_W | RD(dst) | RJ(src1) | RK(src2)));
1640 return push_inst(compiler, SUB_D | RD(OTHER_FLAG) | RJ(dst) | RK(OTHER_FLAG));
1641 }
1642
1643 FAIL_IF(push_inst(compiler, MULH_D | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1644 FAIL_IF(push_inst(compiler, MUL_D | RD(dst) | RJ(src1) | RK(src2)));
1645 FAIL_IF(push_inst(compiler, SRAI_D | RD(OTHER_FLAG) | RJ(dst) | IMM_I12((63))));
1646 return push_inst(compiler, SUB_D | RD(OTHER_FLAG) | RJ(EQUAL_FLAG) | RK(OTHER_FLAG));
1647
1648 case SLJIT_AND:
1649 EMIT_LOGICAL(ANDI, AND);
1650 return SLJIT_SUCCESS;
1651
1652 case SLJIT_OR:
1653 EMIT_LOGICAL(ORI, OR);
1654 return SLJIT_SUCCESS;
1655
1656 case SLJIT_XOR:
1657 EMIT_LOGICAL(XORI, XOR);
1658 return SLJIT_SUCCESS;
1659
1660 case SLJIT_SHL:
1661 case SLJIT_MSHL:
1662 if (op & SLJIT_32) {
1663 EMIT_SHIFT(SLLI_W, SLL_W);
1664 } else {
1665 EMIT_SHIFT(SLLI_D, SLL_D);
1666 }
1667 break;
1668
1669 case SLJIT_LSHR:
1670 case SLJIT_MLSHR:
1671 if (op & SLJIT_32) {
1672 EMIT_SHIFT(SRLI_W, SRL_W);
1673 } else {
1674 EMIT_SHIFT(SRLI_D, SRL_D);
1675 }
1676 break;
1677
1678 case SLJIT_ASHR:
1679 case SLJIT_MASHR:
1680 if (op & SLJIT_32) {
1681 EMIT_SHIFT(SRAI_W, SRA_W);
1682 } else {
1683 EMIT_SHIFT(SRAI_D, SRA_D);
1684 }
1685 break;
1686
1687 case SLJIT_ROTL:
1688 case SLJIT_ROTR:
1689 if (flags & SRC2_IMM) {
1690 SLJIT_ASSERT(src2 != 0);
1691
1692 if (GET_OPCODE(op) == SLJIT_ROTL)
1693 src2 = word_size - src2;
1694 return push_inst(compiler, INST(ROTRI, op) | RD(dst) | RJ(src1) | IMM_I12(src2));
1695 }
1696
1697 if (src2 == TMP_ZERO) {
1698 if (dst != src1)
1699 return push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(0));
1700 return SLJIT_SUCCESS;
1701 }
1702
1703 if (GET_OPCODE(op) == SLJIT_ROTL) {
1704 FAIL_IF(push_inst(compiler, INST(SUB, op)| RD(OTHER_FLAG) | RJ(TMP_ZERO) | RK(src2)));
1705 src2 = OTHER_FLAG;
1706 }
1707 return push_inst(compiler, INST(ROTR, op) | RD(dst) | RJ(src1) | RK(src2));
1708
1709 default:
1710 SLJIT_UNREACHABLE();
1711 return SLJIT_SUCCESS;
1712 }
1713
1714 if (flags & SRC2_IMM) {
1715 if (op & SLJIT_SET_Z)
1716 FAIL_IF(push_inst(compiler, op_imm | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(src2)));
1717
1718 if (flags & UNUSED_DEST)
1719 return SLJIT_SUCCESS;
1720 return push_inst(compiler, op_imm | RD(dst) | RJ(src1) | IMM_I12(src2));
1721 }
1722
1723 if (op & SLJIT_SET_Z)
1724 FAIL_IF(push_inst(compiler, op_reg | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1725
1726 if (flags & UNUSED_DEST)
1727 return SLJIT_SUCCESS;
1728 return push_inst(compiler, op_reg | RD(dst) | RJ(src1) | RK(src2));
1729 }
1730
1731 #undef IMM_EXTEND
1732
emit_op(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 flags,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1733 static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
1734 sljit_s32 dst, sljit_sw dstw,
1735 sljit_s32 src1, sljit_sw src1w,
1736 sljit_s32 src2, sljit_sw src2w)
1737 {
1738 /* arg1 goes to TMP_REG1 or src reg
1739 arg2 goes to TMP_REG2, imm or src reg
1740 TMP_REG3 can be used for caching
1741 result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
1742 sljit_s32 dst_r = TMP_REG2;
1743 sljit_s32 src1_r;
1744 sljit_sw src2_r = 0;
1745 sljit_s32 src2_tmp_reg = (GET_OPCODE(op) >= SLJIT_OP2_BASE && FAST_IS_REG(src1)) ? TMP_REG1 : TMP_REG2;
1746
1747 if (!(flags & ALT_KEEP_CACHE)) {
1748 compiler->cache_arg = 0;
1749 compiler->cache_argw = 0;
1750 }
1751
1752 if (dst == 0) {
1753 SLJIT_ASSERT(HAS_FLAGS(op));
1754 flags |= UNUSED_DEST;
1755 dst = TMP_REG2;
1756 } else if (FAST_IS_REG(dst)) {
1757 dst_r = dst;
1758 flags |= REG_DEST;
1759 if (flags & MOVE_OP)
1760 src2_tmp_reg = dst_r;
1761 } else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw))
1762 flags |= SLOW_DEST;
1763
1764 if (flags & IMM_OP) {
1765 if (src2 == SLJIT_IMM && src2w != 0 && src2w <= I12_MAX && src2w >= I12_MIN) {
1766 flags |= SRC2_IMM;
1767 src2_r = src2w;
1768 } else if ((flags & CUMULATIVE_OP) && src1 == SLJIT_IMM && src1w != 0 && src1w <= I12_MAX && src1w >= I12_MIN) {
1769 flags |= SRC2_IMM;
1770 src2_r = src1w;
1771
1772 /* And swap arguments. */
1773 src1 = src2;
1774 src1w = src2w;
1775 src2 = SLJIT_IMM;
1776 /* src2w = src2_r unneeded. */
1777 }
1778 }
1779
1780 /* Source 1. */
1781 if (FAST_IS_REG(src1)) {
1782 src1_r = src1;
1783 flags |= REG1_SOURCE;
1784 } else if (src1 == SLJIT_IMM) {
1785 if (src1w) {
1786 FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
1787 src1_r = TMP_REG1;
1788 }
1789 else
1790 src1_r = TMP_ZERO;
1791 } else {
1792 if (getput_arg_fast(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w))
1793 FAIL_IF(compiler->error);
1794 else
1795 flags |= SLOW_SRC1;
1796 src1_r = TMP_REG1;
1797 }
1798
1799 /* Source 2. */
1800 if (FAST_IS_REG(src2)) {
1801 src2_r = src2;
1802 flags |= REG2_SOURCE;
1803 if ((flags & (REG_DEST | MOVE_OP)) == MOVE_OP)
1804 dst_r = (sljit_s32)src2_r;
1805 } else if (src2 == SLJIT_IMM) {
1806 if (!(flags & SRC2_IMM)) {
1807 if (src2w) {
1808 FAIL_IF(load_immediate(compiler, src2_tmp_reg, src2w));
1809 src2_r = src2_tmp_reg;
1810 } else {
1811 src2_r = TMP_ZERO;
1812 if (flags & MOVE_OP) {
1813 if (dst & SLJIT_MEM)
1814 dst_r = 0;
1815 else
1816 op = SLJIT_MOV;
1817 }
1818 }
1819 }
1820 } else {
1821 if (getput_arg_fast(compiler, flags | LOAD_DATA, src2_tmp_reg, src2, src2w))
1822 FAIL_IF(compiler->error);
1823 else
1824 flags |= SLOW_SRC2;
1825
1826 src2_r = src2_tmp_reg;
1827 }
1828
1829 if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
1830 SLJIT_ASSERT(src2_r == TMP_REG2);
1831 if ((flags & SLOW_DEST) && !can_cache(src2, src2w, src1, src1w) && can_cache(src2, src2w, dst, dstw)) {
1832 FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
1833 FAIL_IF(getput_arg(compiler, flags | LOAD_DATA | MEM_USE_TMP2, TMP_REG2, src2, src2w, dst, dstw));
1834 } else {
1835 FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w));
1836 FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
1837 }
1838 }
1839 else if (flags & SLOW_SRC1)
1840 FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
1841 else if (flags & SLOW_SRC2)
1842 FAIL_IF(getput_arg(compiler, flags | LOAD_DATA | ((src1_r == TMP_REG1) ? MEM_USE_TMP2 : 0), src2_tmp_reg, src2, src2w, dst, dstw));
1843
1844 FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
1845
1846 if (dst & SLJIT_MEM) {
1847 if (!(flags & SLOW_DEST)) {
1848 getput_arg_fast(compiler, flags, dst_r, dst, dstw);
1849 return compiler->error;
1850 }
1851 return getput_arg(compiler, flags, dst_r, dst, dstw, 0, 0);
1852 }
1853
1854 return SLJIT_SUCCESS;
1855 }
1856
sljit_emit_op0(struct sljit_compiler * compiler,sljit_s32 op)1857 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
1858 {
1859 CHECK_ERROR();
1860 CHECK(check_sljit_emit_op0(compiler, op));
1861
1862 switch (GET_OPCODE(op)) {
1863 case SLJIT_BREAKPOINT:
1864 return push_inst(compiler, BREAK);
1865 case SLJIT_NOP:
1866 return push_inst(compiler, ANDI | RD(TMP_ZERO) | RJ(TMP_ZERO) | IMM_I12(0));
1867 case SLJIT_LMUL_UW:
1868 FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(SLJIT_R1) | IMM_I12(0)));
1869 FAIL_IF(push_inst(compiler, MULH_DU | RD(SLJIT_R1) | RJ(SLJIT_R0) | RK(SLJIT_R1)));
1870 return push_inst(compiler, MUL_D | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(TMP_REG1));
1871 case SLJIT_LMUL_SW:
1872 FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(SLJIT_R1) | IMM_I12(0)));
1873 FAIL_IF(push_inst(compiler, MULH_D | RD(SLJIT_R1) | RJ(SLJIT_R0) | RK(SLJIT_R1)));
1874 return push_inst(compiler, MUL_D | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(TMP_REG1));
1875 case SLJIT_DIVMOD_UW:
1876 FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(TMP_REG1) | RJ(SLJIT_R0) | IMM_I12(0)));
1877 FAIL_IF(push_inst(compiler, ((op & SLJIT_32)? DIV_WU: DIV_DU) | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(SLJIT_R1)));
1878 return push_inst(compiler, ((op & SLJIT_32)? MOD_WU: MOD_DU) | RD(SLJIT_R1) | RJ(TMP_REG1) | RK(SLJIT_R1));
1879 case SLJIT_DIVMOD_SW:
1880 FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(TMP_REG1) | RJ(SLJIT_R0) | IMM_I12(0)));
1881 FAIL_IF(push_inst(compiler, INST(DIV, op) | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(SLJIT_R1)));
1882 return push_inst(compiler, INST(MOD, op) | RD(SLJIT_R1) | RJ(TMP_REG1) | RK(SLJIT_R1));
1883 case SLJIT_DIV_UW:
1884 return push_inst(compiler, ((op & SLJIT_32)? DIV_WU: DIV_DU) | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(SLJIT_R1));
1885 case SLJIT_DIV_SW:
1886 return push_inst(compiler, INST(DIV, op) | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(SLJIT_R1));
1887 case SLJIT_ENDBR:
1888 case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
1889 return SLJIT_SUCCESS;
1890 }
1891
1892 SLJIT_UNREACHABLE();
1893 return SLJIT_ERR_UNSUPPORTED;
1894 }
1895
sljit_emit_op1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1896 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1897 sljit_s32 dst, sljit_sw dstw,
1898 sljit_s32 src, sljit_sw srcw)
1899 {
1900 sljit_s32 flags = 0;
1901
1902 CHECK_ERROR();
1903 CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1904 ADJUST_LOCAL_OFFSET(dst, dstw);
1905 ADJUST_LOCAL_OFFSET(src, srcw);
1906
1907 if (op & SLJIT_32)
1908 flags = INT_DATA | SIGNED_DATA;
1909
1910 switch (GET_OPCODE(op)) {
1911 case SLJIT_MOV:
1912 case SLJIT_MOV_P:
1913 return emit_op(compiler, SLJIT_MOV, WORD_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, srcw);
1914
1915 case SLJIT_MOV_U32:
1916 return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u32)srcw : srcw);
1917
1918 case SLJIT_MOV_S32:
1919 /* Logical operators have no W variant, so sign extended input is necessary for them. */
1920 case SLJIT_MOV32:
1921 return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s32)srcw : srcw);
1922
1923 case SLJIT_MOV_U8:
1924 return emit_op(compiler, op, BYTE_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u8)srcw : srcw);
1925
1926 case SLJIT_MOV_S8:
1927 return emit_op(compiler, op, BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s8)srcw : srcw);
1928
1929 case SLJIT_MOV_U16:
1930 return emit_op(compiler, op, HALF_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u16)srcw : srcw);
1931
1932 case SLJIT_MOV_S16:
1933 return emit_op(compiler, op, HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s16)srcw : srcw);
1934
1935 case SLJIT_CLZ:
1936 case SLJIT_CTZ:
1937 case SLJIT_REV:
1938 return emit_op(compiler, op, flags, dst, dstw, TMP_ZERO, 0, src, srcw);
1939
1940 case SLJIT_REV_U16:
1941 case SLJIT_REV_S16:
1942 return emit_op(compiler, op, HALF_DATA, dst, dstw, TMP_ZERO, 0, src, srcw);
1943
1944 case SLJIT_REV_U32:
1945 case SLJIT_REV_S32:
1946 return emit_op(compiler, op | SLJIT_32, INT_DATA, dst, dstw, TMP_ZERO, 0, src, srcw);
1947 }
1948
1949 SLJIT_UNREACHABLE();
1950 return SLJIT_SUCCESS;
1951 }
1952
sljit_emit_op2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1953 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
1954 sljit_s32 dst, sljit_sw dstw,
1955 sljit_s32 src1, sljit_sw src1w,
1956 sljit_s32 src2, sljit_sw src2w)
1957 {
1958 sljit_s32 flags = 0;
1959
1960 CHECK_ERROR();
1961 CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
1962 ADJUST_LOCAL_OFFSET(dst, dstw);
1963 ADJUST_LOCAL_OFFSET(src1, src1w);
1964 ADJUST_LOCAL_OFFSET(src2, src2w);
1965
1966 if (op & SLJIT_32) {
1967 flags |= INT_DATA | SIGNED_DATA;
1968 if (src1 == SLJIT_IMM)
1969 src1w = (sljit_s32)src1w;
1970 if (src2 == SLJIT_IMM)
1971 src2w = (sljit_s32)src2w;
1972 }
1973
1974
1975 switch (GET_OPCODE(op)) {
1976 case SLJIT_ADD:
1977 case SLJIT_ADDC:
1978 compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
1979 return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
1980
1981 case SLJIT_SUB:
1982 case SLJIT_SUBC:
1983 compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
1984 return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
1985
1986 case SLJIT_MUL:
1987 compiler->status_flags_state = 0;
1988 return emit_op(compiler, op, flags | CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w);
1989
1990 case SLJIT_AND:
1991 case SLJIT_OR:
1992 case SLJIT_XOR:
1993 return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
1994
1995 case SLJIT_SHL:
1996 case SLJIT_MSHL:
1997 case SLJIT_LSHR:
1998 case SLJIT_MLSHR:
1999 case SLJIT_ASHR:
2000 case SLJIT_MASHR:
2001 case SLJIT_ROTL:
2002 case SLJIT_ROTR:
2003 if (src2 == SLJIT_IMM) {
2004 if (op & SLJIT_32)
2005 src2w &= 0x1f;
2006 else
2007 src2w &= 0x3f;
2008 }
2009
2010 return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
2011 }
2012
2013 SLJIT_UNREACHABLE();
2014 return SLJIT_SUCCESS;
2015 }
2016
sljit_emit_op2u(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2017 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
2018 sljit_s32 src1, sljit_sw src1w,
2019 sljit_s32 src2, sljit_sw src2w)
2020 {
2021 CHECK_ERROR();
2022 CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
2023
2024 SLJIT_SKIP_CHECKS(compiler);
2025 return sljit_emit_op2(compiler, op, 0, 0, src1, src1w, src2, src2w);
2026 }
2027
sljit_emit_op2r(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2028 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op,
2029 sljit_s32 dst_reg,
2030 sljit_s32 src1, sljit_sw src1w,
2031 sljit_s32 src2, sljit_sw src2w)
2032 {
2033 CHECK_ERROR();
2034 CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w));
2035
2036 switch (GET_OPCODE(op)) {
2037 case SLJIT_MULADD:
2038 SLJIT_SKIP_CHECKS(compiler);
2039 FAIL_IF(sljit_emit_op2(compiler, SLJIT_MUL | (op & SLJIT_32), TMP_REG2, 0, src1, src1w, src2, src2w));
2040 return push_inst(compiler, ADD_D | RD(dst_reg) | RJ(dst_reg) | RK(TMP_REG2));
2041 }
2042
2043 return SLJIT_SUCCESS;
2044 }
2045
sljit_emit_shift_into(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 src1_reg,sljit_s32 src2_reg,sljit_s32 src3,sljit_sw src3w)2046 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
2047 sljit_s32 dst_reg,
2048 sljit_s32 src1_reg,
2049 sljit_s32 src2_reg,
2050 sljit_s32 src3, sljit_sw src3w)
2051 {
2052 sljit_s32 is_left;
2053 sljit_ins ins1, ins2, ins3;
2054 sljit_s32 inp_flags = ((op & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA;
2055 sljit_sw bit_length = (op & SLJIT_32) ? 32 : 64;
2056
2057
2058 CHECK_ERROR();
2059 CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
2060
2061 is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL);
2062
2063 if (src1_reg == src2_reg) {
2064 SLJIT_SKIP_CHECKS(compiler);
2065 return sljit_emit_op2(compiler, (is_left ? SLJIT_ROTL : SLJIT_ROTR) | (op & SLJIT_32), dst_reg, 0, src1_reg, 0, src3, src3w);
2066 }
2067
2068 ADJUST_LOCAL_OFFSET(src3, src3w);
2069
2070 if (src3 == SLJIT_IMM) {
2071 src3w &= bit_length - 1;
2072
2073 if (src3w == 0)
2074 return SLJIT_SUCCESS;
2075
2076 if (is_left) {
2077 ins1 = INST(SLLI, op) | IMM_I12(src3w);
2078 src3w = bit_length - src3w;
2079 ins2 = INST(SRLI, op) | IMM_I12(src3w);
2080 } else {
2081 ins1 = INST(SRLI, op) | IMM_I12(src3w);
2082 src3w = bit_length - src3w;
2083 ins2 = INST(SLLI, op) | IMM_I12(src3w);
2084 }
2085
2086 FAIL_IF(push_inst(compiler, ins1 | RD(dst_reg) | RJ(src1_reg)));
2087 FAIL_IF(push_inst(compiler, ins2 | RD(TMP_REG1) | RJ(src2_reg)));
2088 return push_inst(compiler, OR | RD(dst_reg) | RJ(dst_reg) | RK(TMP_REG1));
2089 }
2090
2091 if (src3 & SLJIT_MEM) {
2092 FAIL_IF(emit_op_mem(compiler, inp_flags, TMP_REG2, src3, src3w));
2093 src3 = TMP_REG2;
2094 } else if (dst_reg == src3) {
2095 push_inst(compiler, INST(ADDI, op) | RD(TMP_REG2) | RJ(src3) | IMM_I12(0));
2096 src3 = TMP_REG2;
2097 }
2098
2099 if (is_left) {
2100 ins1 = INST(SLL, op);
2101 ins2 = INST(SRLI, op);
2102 ins3 = INST(SRL, op);
2103 } else {
2104 ins1 = INST(SRL, op);
2105 ins2 = INST(SLLI, op);
2106 ins3 = INST(SLL, op);
2107 }
2108
2109 FAIL_IF(push_inst(compiler, ins1 | RD(dst_reg) | RJ(src1_reg) | RK(src3)));
2110
2111 if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) {
2112 FAIL_IF(push_inst(compiler, ins2 | RD(TMP_REG1) | RJ(src2_reg) | IMM_I12(1)));
2113 FAIL_IF(push_inst(compiler, XORI | RD(TMP_REG2) | RJ(src3) | IMM_I12((sljit_ins)bit_length - 1)));
2114 src2_reg = TMP_REG1;
2115 } else
2116 FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(TMP_REG2) | RJ(TMP_ZERO) | RK(src3)));
2117
2118 FAIL_IF(push_inst(compiler, ins3 | RD(TMP_REG1) | RJ(src2_reg) | RK(TMP_REG2)));
2119 return push_inst(compiler, OR | RD(dst_reg) | RJ(dst_reg) | RK(TMP_REG1));
2120 }
2121
sljit_emit_op_src(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)2122 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
2123 sljit_s32 src, sljit_sw srcw)
2124 {
2125 sljit_s32 base = src & REG_MASK;
2126
2127 CHECK_ERROR();
2128 CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
2129 ADJUST_LOCAL_OFFSET(src, srcw);
2130
2131 switch (op) {
2132 case SLJIT_FAST_RETURN:
2133 if (FAST_IS_REG(src))
2134 FAIL_IF(push_inst(compiler, ADDI_D | RD(RETURN_ADDR_REG) | RJ(src) | IMM_I12(0)));
2135 else
2136 FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RETURN_ADDR_REG, src, srcw));
2137
2138 return push_inst(compiler, JIRL | RD(TMP_ZERO) | RJ(RETURN_ADDR_REG) | IMM_I12(0));
2139 case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
2140 return SLJIT_SUCCESS;
2141 case SLJIT_PREFETCH_L1:
2142 case SLJIT_PREFETCH_L2:
2143 case SLJIT_PREFETCH_L3:
2144 case SLJIT_PREFETCH_ONCE:
2145 if (SLJIT_UNLIKELY(src & OFFS_REG_MASK)) {
2146 srcw &= 0x3;
2147 if (SLJIT_UNLIKELY(srcw))
2148 FAIL_IF(push_inst(compiler, SLLI_D | RD(TMP_REG1) | RJ(OFFS_REG(src)) | IMM_I12(srcw)));
2149 FAIL_IF(push_inst(compiler, ADD_D | RD(TMP_REG1) | RJ(base) | RK(TMP_REG1)));
2150 } else {
2151 if (base && srcw <= I12_MAX && srcw >= I12_MIN)
2152 return push_inst(compiler,PRELD | RJ(base) | IMM_I12(srcw));
2153
2154 FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
2155 if (base != 0)
2156 FAIL_IF(push_inst(compiler, ADD_D | RD(TMP_REG1) | RJ(base) | RK(TMP_REG1)));
2157 }
2158 return push_inst(compiler, PRELD | RD(0) | RJ(TMP_REG1));
2159 }
2160 return SLJIT_SUCCESS;
2161 }
2162
sljit_emit_op_dst(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw)2163 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
2164 sljit_s32 dst, sljit_sw dstw)
2165 {
2166 sljit_s32 dst_r;
2167
2168 CHECK_ERROR();
2169 CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
2170 ADJUST_LOCAL_OFFSET(dst, dstw);
2171
2172 switch (op) {
2173 case SLJIT_FAST_ENTER:
2174 if (FAST_IS_REG(dst))
2175 return push_inst(compiler, ADDI_D | RD(dst) | RJ(RETURN_ADDR_REG) | IMM_I12(0));
2176
2177 SLJIT_ASSERT(RETURN_ADDR_REG == TMP_REG2);
2178 break;
2179 case SLJIT_GET_RETURN_ADDRESS:
2180 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
2181 FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size - SSIZE_OF(sw)));
2182 break;
2183 }
2184
2185 if (dst & SLJIT_MEM)
2186 return emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw);
2187
2188 return SLJIT_SUCCESS;
2189 }
2190
sljit_get_register_index(sljit_s32 type,sljit_s32 reg)2191 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
2192 {
2193 CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
2194
2195 if (type == SLJIT_GP_REGISTER)
2196 return reg_map[reg];
2197
2198 if (type != SLJIT_FLOAT_REGISTER && type != SLJIT_SIMD_REG_128 && type != SLJIT_SIMD_REG_256)
2199 return -1;
2200
2201 return freg_map[reg];
2202 }
2203
sljit_emit_op_custom(struct sljit_compiler * compiler,void * instruction,sljit_u32 size)2204 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2205 void *instruction, sljit_u32 size)
2206 {
2207 SLJIT_UNUSED_ARG(size);
2208 CHECK_ERROR();
2209 CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2210
2211 return push_inst(compiler, *(sljit_ins*)instruction);
2212 }
2213
2214 /* --------------------------------------------------------------------- */
2215 /* Floating point operators */
2216 /* --------------------------------------------------------------------- */
2217 #define SET_COND(cond) (sljit_ins)(cond << 15)
2218
2219 #define COND_CUN SET_COND(0x8) /* UN */
2220 #define COND_CEQ SET_COND(0x4) /* EQ */
2221 #define COND_CUEQ SET_COND(0xc) /* UN EQ */
2222 #define COND_CLT SET_COND(0x2) /* LT */
2223 #define COND_CULT SET_COND(0xa) /* UN LT */
2224 #define COND_CLE SET_COND(0x6) /* LT EQ */
2225 #define COND_CULE SET_COND(0xe) /* UN LT EQ */
2226 #define COND_CNE SET_COND(0x10) /* GT LT */
2227 #define COND_CUNE SET_COND(0x18) /* UN GT LT */
2228 #define COND_COR SET_COND(0x14) /* GT LT EQ */
2229
2230 #define FINST(inst, type) (sljit_ins)((type & SLJIT_32) ? inst##_S : inst##_D)
2231 #define FCD(cd) (sljit_ins)(cd & 0x7)
2232 #define FCJ(cj) (sljit_ins)((cj & 0x7) << 5)
2233 #define FCA(ca) (sljit_ins)((ca & 0x7) << 15)
2234 #define F_OTHER_FLAG 1
2235
2236 #define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_32) >> 7))
2237
2238 /* convert to inter exact toward zero */
sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2239 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
2240 sljit_s32 dst, sljit_sw dstw,
2241 sljit_s32 src, sljit_sw srcw)
2242 {
2243 sljit_ins inst;
2244 sljit_u32 word_data = 0;
2245 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
2246
2247 switch (GET_OPCODE(op))
2248 {
2249 case SLJIT_CONV_SW_FROM_F64:
2250 word_data = 1;
2251 inst = FINST(FTINTRZ_L, op);
2252 break;
2253 case SLJIT_CONV_S32_FROM_F64:
2254 inst = FINST(FTINTRZ_W, op);
2255 break;
2256 default:
2257 inst = BREAK;
2258 SLJIT_UNREACHABLE();
2259 }
2260
2261 if (src & SLJIT_MEM) {
2262 FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
2263 src = TMP_FREG1;
2264 }
2265
2266 FAIL_IF(push_inst(compiler, inst | FRD(TMP_FREG1) | FRJ(src)));
2267 FAIL_IF(push_inst(compiler, FINST(MOVFR2GR, word_data) | RD(dst_r) | FRJ(TMP_FREG1)));
2268
2269 if (dst & SLJIT_MEM)
2270 return emit_op_mem2(compiler, word_data ? WORD_DATA : INT_DATA, TMP_REG2, dst, dstw, 0, 0);
2271 return SLJIT_SUCCESS;
2272 }
2273
sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2274 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_s32 op,
2275 sljit_s32 dst, sljit_sw dstw,
2276 sljit_s32 src, sljit_sw srcw)
2277 {
2278 sljit_ins inst;
2279 sljit_u32 word_data = 0;
2280 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2281
2282 switch (GET_OPCODE(op))
2283 {
2284 case SLJIT_CONV_F64_FROM_SW:
2285 word_data = 1;
2286 inst = (sljit_ins)((op & SLJIT_32) ? FFINT_S_L : FFINT_D_L);
2287 break;
2288 case SLJIT_CONV_F64_FROM_S32:
2289 inst = (sljit_ins)((op & SLJIT_32) ? FFINT_S_W : FFINT_D_W);
2290 break;
2291 default:
2292 inst = BREAK;
2293 SLJIT_UNREACHABLE();
2294 }
2295
2296 if (src & SLJIT_MEM) {
2297 FAIL_IF(emit_op_mem2(compiler, (word_data ? WORD_DATA : INT_DATA) | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
2298 src = TMP_REG1;
2299 } else if (src == SLJIT_IMM) {
2300 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
2301 srcw = (sljit_s32)srcw;
2302
2303 FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
2304 src = TMP_REG1;
2305 }
2306 FAIL_IF(push_inst(compiler, (word_data ? MOVGR2FR_D : MOVGR2FR_W) | FRD(dst_r) | RJ(src)));
2307 FAIL_IF(push_inst(compiler, inst | FRD(dst_r) | FRJ(dst_r)));
2308
2309 if (dst & SLJIT_MEM)
2310 return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
2311 return SLJIT_SUCCESS;
2312 }
2313
sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2314 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
2315 sljit_s32 dst, sljit_sw dstw,
2316 sljit_s32 src, sljit_sw srcw)
2317 {
2318 return sljit_emit_fop1_conv_f64_from_w(compiler, op, dst, dstw, src, srcw);
2319 }
2320
sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2321 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
2322 sljit_s32 dst, sljit_sw dstw,
2323 sljit_s32 src, sljit_sw srcw)
2324 {
2325 sljit_ins inst;
2326 sljit_u32 word_data = 0;
2327 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2328
2329 switch (GET_OPCODE(op))
2330 {
2331 case SLJIT_CONV_F64_FROM_UW:
2332 word_data = 1;
2333 inst = (sljit_ins)((op & SLJIT_32) ? FFINT_S_L : FFINT_D_L);
2334 break;
2335 case SLJIT_CONV_F64_FROM_U32:
2336 inst = (sljit_ins)((op & SLJIT_32) ? FFINT_S_W : FFINT_D_W);
2337 break;
2338 default:
2339 inst = BREAK;
2340 SLJIT_UNREACHABLE();
2341 }
2342
2343 if (src & SLJIT_MEM) {
2344 FAIL_IF(emit_op_mem2(compiler, (word_data ? WORD_DATA : INT_DATA) | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
2345 src = TMP_REG1;
2346 } else if (src == SLJIT_IMM) {
2347 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32)
2348 srcw = (sljit_u32)srcw;
2349
2350 FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
2351 src = TMP_REG1;
2352 }
2353
2354 if (!word_data)
2355 FAIL_IF(push_inst(compiler, SRLI_W | RD(src) | RJ(src) | IMM_I12(0)));
2356
2357 FAIL_IF(push_inst(compiler, BLT | RJ(src) | RD(TMP_ZERO) | IMM_I16(4)));
2358
2359 FAIL_IF(push_inst(compiler, (word_data ? MOVGR2FR_D : MOVGR2FR_W) | FRD(dst_r) | RJ(src)));
2360 FAIL_IF(push_inst(compiler, inst | FRD(dst_r) | FRJ(dst_r)));
2361 FAIL_IF(push_inst(compiler, B | IMM_I26(7)));
2362
2363 FAIL_IF(push_inst(compiler, ANDI | RD(TMP_REG2) | RJ(src) | IMM_I12(1)));
2364 FAIL_IF(push_inst(compiler, (word_data ? SRLI_D : SRLI_W) | RD(TMP_REG1) | RJ(src) | IMM_I12(1)));
2365 FAIL_IF(push_inst(compiler, OR | RD(TMP_REG1) | RJ(TMP_REG1) | RK(TMP_REG2)));
2366 FAIL_IF(push_inst(compiler, INST(MOVGR2FR, (!word_data)) | FRD(dst_r) | RJ(TMP_REG1)));
2367 FAIL_IF(push_inst(compiler, inst | FRD(dst_r) | FRJ(dst_r)));
2368 FAIL_IF(push_inst(compiler, FINST(FADD, op) | FRD(dst_r) | FRJ(dst_r) | FRK(dst_r)));
2369
2370 if (dst & SLJIT_MEM)
2371 return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
2372 return SLJIT_SUCCESS;
2373 }
2374
sljit_emit_fop1_cmp(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2375 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
2376 sljit_s32 src1, sljit_sw src1w,
2377 sljit_s32 src2, sljit_sw src2w)
2378 {
2379 if (src1 & SLJIT_MEM) {
2380 FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
2381 src1 = TMP_FREG1;
2382 }
2383
2384 if (src2 & SLJIT_MEM) {
2385 FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0));
2386 src2 = TMP_FREG2;
2387 }
2388
2389 FAIL_IF(push_inst(compiler, XOR | RD(OTHER_FLAG) | RJ(OTHER_FLAG) | RK(OTHER_FLAG)));
2390
2391 switch (GET_FLAG_TYPE(op)) {
2392 case SLJIT_F_EQUAL:
2393 case SLJIT_ORDERED_EQUAL:
2394 FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CEQ | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2)));
2395 break;
2396 case SLJIT_F_LESS:
2397 case SLJIT_ORDERED_LESS:
2398 FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CLT | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2)));
2399 break;
2400 case SLJIT_F_GREATER:
2401 case SLJIT_ORDERED_GREATER:
2402 FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CLT | FCD(F_OTHER_FLAG) | FRJ(src2) | FRK(src1)));
2403 break;
2404 case SLJIT_UNORDERED_OR_GREATER:
2405 FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CULT | FCD(F_OTHER_FLAG) | FRJ(src2) | FRK(src1)));
2406 break;
2407 case SLJIT_UNORDERED_OR_LESS:
2408 FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CULT | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2)));
2409 break;
2410 case SLJIT_UNORDERED_OR_EQUAL:
2411 FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CUEQ | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2)));
2412 break;
2413 default: /* SLJIT_UNORDERED */
2414 FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CUN | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2)));
2415 }
2416 return push_inst(compiler, MOVCF2GR | RD(OTHER_FLAG) | FCJ(F_OTHER_FLAG));
2417 }
2418
sljit_emit_fop1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2419 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
2420 sljit_s32 dst, sljit_sw dstw,
2421 sljit_s32 src, sljit_sw srcw)
2422 {
2423 sljit_s32 dst_r;
2424
2425 CHECK_ERROR();
2426 compiler->cache_arg = 0;
2427 compiler->cache_argw = 0;
2428
2429 SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error);
2430 SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2431
2432 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32)
2433 op ^= SLJIT_32;
2434
2435 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2436
2437 if (src & SLJIT_MEM) {
2438 FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw));
2439 src = dst_r;
2440 }
2441
2442 switch (GET_OPCODE(op)) {
2443 case SLJIT_MOV_F64:
2444 if (src != dst_r) {
2445 if (!(dst & SLJIT_MEM))
2446 FAIL_IF(push_inst(compiler, FINST(FMOV, op) | FRD(dst_r) | FRJ(src)));
2447 else
2448 dst_r = src;
2449 }
2450 break;
2451 case SLJIT_NEG_F64:
2452 FAIL_IF(push_inst(compiler, FINST(FNEG, op) | FRD(dst_r) | FRJ(src)));
2453 break;
2454 case SLJIT_ABS_F64:
2455 FAIL_IF(push_inst(compiler, FINST(FABS, op) | FRD(dst_r) | FRJ(src)));
2456 break;
2457 case SLJIT_CONV_F64_FROM_F32:
2458 /* The SLJIT_32 bit is inverted because sljit_f32 needs to be loaded from the memory. */
2459 FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? FCVT_D_S : FCVT_S_D) | FRD(dst_r) | FRJ(src)));
2460 op ^= SLJIT_32;
2461 break;
2462 }
2463
2464 if (dst & SLJIT_MEM)
2465 return emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0);
2466 return SLJIT_SUCCESS;
2467 }
2468
sljit_emit_fop2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2469 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
2470 sljit_s32 dst, sljit_sw dstw,
2471 sljit_s32 src1, sljit_sw src1w,
2472 sljit_s32 src2, sljit_sw src2w)
2473 {
2474 sljit_s32 dst_r, flags = 0;
2475
2476 CHECK_ERROR();
2477 CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2478 ADJUST_LOCAL_OFFSET(dst, dstw);
2479 ADJUST_LOCAL_OFFSET(src1, src1w);
2480 ADJUST_LOCAL_OFFSET(src2, src2w);
2481
2482 compiler->cache_arg = 0;
2483 compiler->cache_argw = 0;
2484
2485 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2;
2486
2487 if (src1 & SLJIT_MEM) {
2488 if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) {
2489 FAIL_IF(compiler->error);
2490 src1 = TMP_FREG1;
2491 } else
2492 flags |= SLOW_SRC1;
2493 }
2494
2495 if (src2 & SLJIT_MEM) {
2496 if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) {
2497 FAIL_IF(compiler->error);
2498 src2 = TMP_FREG2;
2499 } else
2500 flags |= SLOW_SRC2;
2501 }
2502
2503 if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
2504 if ((dst & SLJIT_MEM) && !can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
2505 FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w));
2506 FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
2507 } else {
2508 FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
2509 FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
2510 }
2511 }
2512 else if (flags & SLOW_SRC1)
2513 FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
2514 else if (flags & SLOW_SRC2)
2515 FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
2516
2517 if (flags & SLOW_SRC1)
2518 src1 = TMP_FREG1;
2519 if (flags & SLOW_SRC2)
2520 src2 = TMP_FREG2;
2521
2522 switch (GET_OPCODE(op)) {
2523 case SLJIT_ADD_F64:
2524 FAIL_IF(push_inst(compiler, FINST(FADD, op) | FRD(dst_r) | FRJ(src1) | FRK(src2)));
2525 break;
2526 case SLJIT_SUB_F64:
2527 FAIL_IF(push_inst(compiler, FINST(FSUB, op) | FRD(dst_r) | FRJ(src1) | FRK(src2)));
2528 break;
2529 case SLJIT_MUL_F64:
2530 FAIL_IF(push_inst(compiler, FINST(FMUL, op) | FRD(dst_r) | FRJ(src1) | FRK(src2)));
2531 break;
2532 case SLJIT_DIV_F64:
2533 FAIL_IF(push_inst(compiler, FINST(FDIV, op) | FRD(dst_r) | FRJ(src1) | FRK(src2)));
2534 break;
2535 }
2536
2537 if (dst_r != dst)
2538 FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0));
2539 return SLJIT_SUCCESS;
2540 }
2541
sljit_emit_fop2r(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_freg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2542 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op,
2543 sljit_s32 dst_freg,
2544 sljit_s32 src1, sljit_sw src1w,
2545 sljit_s32 src2, sljit_sw src2w)
2546 {
2547 sljit_s32 reg;
2548
2549 CHECK_ERROR();
2550 CHECK(check_sljit_emit_fop2r(compiler, op, dst_freg, src1, src1w, src2, src2w));
2551 ADJUST_LOCAL_OFFSET(src1, src1w);
2552 ADJUST_LOCAL_OFFSET(src2, src2w);
2553
2554 if (src2 & SLJIT_MEM) {
2555 FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src2, src2w, 0, 0));
2556 src2 = TMP_FREG1;
2557 }
2558
2559 if (src1 & SLJIT_MEM) {
2560 reg = (dst_freg == src2) ? TMP_FREG1 : dst_freg;
2561 FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, reg, src1, src1w, 0, 0));
2562 src1 = reg;
2563 }
2564
2565 return push_inst(compiler, FINST(FCOPYSIGN, op) | FRD(dst_freg) | FRJ(src1) | FRK(src2));
2566 }
2567
sljit_emit_fset32(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f32 value)2568 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
2569 sljit_s32 freg, sljit_f32 value)
2570 {
2571 union {
2572 sljit_s32 imm;
2573 sljit_f32 value;
2574 } u;
2575
2576 CHECK_ERROR();
2577 CHECK(check_sljit_emit_fset32(compiler, freg, value));
2578
2579 u.value = value;
2580
2581 if (u.imm == 0)
2582 return push_inst(compiler, MOVGR2FR_W | RJ(TMP_ZERO) | FRD(freg));
2583
2584 FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm));
2585 return push_inst(compiler, MOVGR2FR_W | RJ(TMP_REG1) | FRD(freg));
2586 }
2587
sljit_emit_fset64(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f64 value)2588 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
2589 sljit_s32 freg, sljit_f64 value)
2590 {
2591 union {
2592 sljit_sw imm;
2593 sljit_f64 value;
2594 } u;
2595
2596 CHECK_ERROR();
2597 CHECK(check_sljit_emit_fset64(compiler, freg, value));
2598
2599 u.value = value;
2600
2601 if (u.imm == 0)
2602 return push_inst(compiler, MOVGR2FR_D | RJ(TMP_ZERO) | FRD(freg));
2603
2604 FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm));
2605 return push_inst(compiler, MOVGR2FR_D | RJ(TMP_REG1) | FRD(freg));
2606 }
2607
sljit_emit_fcopy(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 freg,sljit_s32 reg)2608 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
2609 sljit_s32 freg, sljit_s32 reg)
2610 {
2611 sljit_ins inst;
2612
2613 CHECK_ERROR();
2614 CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
2615
2616 if (GET_OPCODE(op) == SLJIT_COPY_TO_F64)
2617 inst = ((op & SLJIT_32) ? MOVGR2FR_W : MOVGR2FR_D) | FRD(freg) | RJ(reg);
2618 else
2619 inst = ((op & SLJIT_32) ? MOVFR2GR_S : MOVFR2GR_D) | RD(reg) | FRJ(freg);
2620 return push_inst(compiler, inst);
2621 }
2622
2623 /* --------------------------------------------------------------------- */
2624 /* Conditional instructions */
2625 /* --------------------------------------------------------------------- */
2626
sljit_emit_label(struct sljit_compiler * compiler)2627 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2628 {
2629 struct sljit_label *label;
2630
2631 CHECK_ERROR_PTR();
2632 CHECK_PTR(check_sljit_emit_label(compiler));
2633
2634 if (compiler->last_label && compiler->last_label->size == compiler->size)
2635 return compiler->last_label;
2636
2637 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2638 PTR_FAIL_IF(!label);
2639 set_label(label, compiler);
2640 return label;
2641 }
2642
get_jump_instruction(sljit_s32 type)2643 static sljit_ins get_jump_instruction(sljit_s32 type)
2644 {
2645 switch (type) {
2646 case SLJIT_EQUAL:
2647 case SLJIT_ATOMIC_NOT_STORED:
2648 return BNE | RJ(EQUAL_FLAG) | RD(TMP_ZERO);
2649 case SLJIT_NOT_EQUAL:
2650 case SLJIT_ATOMIC_STORED:
2651 return BEQ | RJ(EQUAL_FLAG) | RD(TMP_ZERO);
2652 case SLJIT_LESS:
2653 case SLJIT_GREATER:
2654 case SLJIT_SIG_LESS:
2655 case SLJIT_SIG_GREATER:
2656 case SLJIT_OVERFLOW:
2657 case SLJIT_CARRY:
2658 return BEQ | RJ(OTHER_FLAG) | RD(TMP_ZERO);
2659 case SLJIT_GREATER_EQUAL:
2660 case SLJIT_LESS_EQUAL:
2661 case SLJIT_SIG_GREATER_EQUAL:
2662 case SLJIT_SIG_LESS_EQUAL:
2663 case SLJIT_NOT_OVERFLOW:
2664 case SLJIT_NOT_CARRY:
2665 return BNE | RJ(OTHER_FLAG) | RD(TMP_ZERO);
2666 case SLJIT_F_EQUAL:
2667 case SLJIT_ORDERED_EQUAL:
2668 case SLJIT_F_LESS:
2669 case SLJIT_ORDERED_LESS:
2670 case SLJIT_ORDERED_GREATER:
2671 case SLJIT_UNORDERED_OR_GREATER:
2672 case SLJIT_F_GREATER:
2673 case SLJIT_UNORDERED_OR_LESS:
2674 case SLJIT_UNORDERED_OR_EQUAL:
2675 case SLJIT_UNORDERED:
2676 return BEQ | RJ(OTHER_FLAG) | RD(TMP_ZERO);
2677 case SLJIT_ORDERED_NOT_EQUAL:
2678 case SLJIT_ORDERED_LESS_EQUAL:
2679 case SLJIT_ORDERED_GREATER_EQUAL:
2680 case SLJIT_F_NOT_EQUAL:
2681 case SLJIT_UNORDERED_OR_NOT_EQUAL:
2682 case SLJIT_UNORDERED_OR_GREATER_EQUAL:
2683 case SLJIT_UNORDERED_OR_LESS_EQUAL:
2684 case SLJIT_F_LESS_EQUAL:
2685 case SLJIT_F_GREATER_EQUAL:
2686 case SLJIT_ORDERED:
2687 return BNE | RJ(OTHER_FLAG) | RD(TMP_ZERO);
2688 default:
2689 /* Not conditional branch. */
2690 return 0;
2691 }
2692 }
2693
sljit_emit_jump(struct sljit_compiler * compiler,sljit_s32 type)2694 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
2695 {
2696 struct sljit_jump *jump;
2697 sljit_ins inst;
2698
2699 CHECK_ERROR_PTR();
2700 CHECK_PTR(check_sljit_emit_jump(compiler, type));
2701
2702 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2703 PTR_FAIL_IF(!jump);
2704 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2705 type &= 0xff;
2706
2707 inst = get_jump_instruction(type);
2708
2709 if (inst != 0) {
2710 PTR_FAIL_IF(push_inst(compiler, inst));
2711 jump->flags |= IS_COND;
2712 }
2713
2714 jump->addr = compiler->size;
2715 inst = JIRL | RJ(TMP_REG1) | IMM_I16(0);
2716
2717 if (type >= SLJIT_FAST_CALL) {
2718 jump->flags |= IS_CALL;
2719 inst |= RD(RETURN_ADDR_REG);
2720 }
2721
2722 PTR_FAIL_IF(push_inst(compiler, inst));
2723
2724 /* Maximum number of instructions required for generating a constant. */
2725 compiler->size += JUMP_MAX_SIZE - 1;
2726 return jump;
2727 }
2728
sljit_emit_call(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types)2729 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
2730 sljit_s32 arg_types)
2731 {
2732 SLJIT_UNUSED_ARG(arg_types);
2733 CHECK_ERROR_PTR();
2734 CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
2735
2736 if (type & SLJIT_CALL_RETURN) {
2737 PTR_FAIL_IF(emit_stack_frame_release(compiler, 0));
2738 type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
2739 }
2740
2741 SLJIT_SKIP_CHECKS(compiler);
2742 return sljit_emit_jump(compiler, type);
2743 }
2744
sljit_emit_cmp(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2745 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type,
2746 sljit_s32 src1, sljit_sw src1w,
2747 sljit_s32 src2, sljit_sw src2w)
2748 {
2749 struct sljit_jump *jump;
2750 sljit_s32 flags;
2751 sljit_ins inst;
2752 sljit_s32 src2_tmp_reg = FAST_IS_REG(src1) ? TMP_REG1 : TMP_REG2;
2753
2754 CHECK_ERROR_PTR();
2755 CHECK_PTR(check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w));
2756 ADJUST_LOCAL_OFFSET(src1, src1w);
2757 ADJUST_LOCAL_OFFSET(src2, src2w);
2758
2759 compiler->cache_arg = 0;
2760 compiler->cache_argw = 0;
2761
2762 flags = ((type & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA;
2763
2764 if (src1 & SLJIT_MEM) {
2765 PTR_FAIL_IF(emit_op_mem2(compiler, flags, TMP_REG1, src1, src1w, src2, src2w));
2766 src1 = TMP_REG1;
2767 }
2768
2769 if (src2 & SLJIT_MEM) {
2770 PTR_FAIL_IF(emit_op_mem2(compiler, flags, src2_tmp_reg, src2, src2w, 0, 0));
2771 src2 = src2_tmp_reg;
2772 }
2773
2774 if (src1 == SLJIT_IMM) {
2775 if (src1w != 0) {
2776 PTR_FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
2777 src1 = TMP_REG1;
2778 }
2779 else
2780 src1 = TMP_ZERO;
2781 }
2782
2783 if (src2 == SLJIT_IMM) {
2784 if (src2w != 0) {
2785 PTR_FAIL_IF(load_immediate(compiler, src2_tmp_reg, src2w));
2786 src2 = src2_tmp_reg;
2787 }
2788 else
2789 src2 = TMP_ZERO;
2790 }
2791
2792 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2793 PTR_FAIL_IF(!jump);
2794 set_jump(jump, compiler, (sljit_u32)((type & SLJIT_REWRITABLE_JUMP) | IS_COND));
2795 type &= 0xff;
2796
2797 switch (type) {
2798 case SLJIT_EQUAL:
2799 inst = BNE | RJ(src1) | RD(src2);
2800 break;
2801 case SLJIT_NOT_EQUAL:
2802 inst = BEQ | RJ(src1) | RD(src2);
2803 break;
2804 case SLJIT_LESS:
2805 inst = BGEU | RJ(src1) | RD(src2);
2806 break;
2807 case SLJIT_GREATER_EQUAL:
2808 inst = BLTU | RJ(src1) | RD(src2);
2809 break;
2810 case SLJIT_GREATER:
2811 inst = BGEU | RJ(src2) | RD(src1);
2812 break;
2813 case SLJIT_LESS_EQUAL:
2814 inst = BLTU | RJ(src2) | RD(src1);
2815 break;
2816 case SLJIT_SIG_LESS:
2817 inst = BGE | RJ(src1) | RD(src2);
2818 break;
2819 case SLJIT_SIG_GREATER_EQUAL:
2820 inst = BLT | RJ(src1) | RD(src2);
2821 break;
2822 case SLJIT_SIG_GREATER:
2823 inst = BGE | RJ(src2) | RD(src1);
2824 break;
2825 case SLJIT_SIG_LESS_EQUAL:
2826 inst = BLT | RJ(src2) | RD(src1);
2827 break;
2828 default:
2829 inst = BREAK;
2830 SLJIT_UNREACHABLE();
2831 }
2832
2833 PTR_FAIL_IF(push_inst(compiler, inst));
2834
2835 jump->addr = compiler->size;
2836 PTR_FAIL_IF(push_inst(compiler, JIRL | RD(TMP_ZERO) | RJ(TMP_REG1) | IMM_I12(0)));
2837
2838 /* Maximum number of instructions required for generating a constant. */
2839 compiler->size += JUMP_MAX_SIZE - 1;
2840
2841 return jump;
2842 }
2843
sljit_emit_ijump(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 src,sljit_sw srcw)2844 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
2845 {
2846 struct sljit_jump *jump;
2847
2848 CHECK_ERROR();
2849 CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
2850
2851 if (src != SLJIT_IMM) {
2852 if (src & SLJIT_MEM) {
2853 ADJUST_LOCAL_OFFSET(src, srcw);
2854 FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw));
2855 src = TMP_REG1;
2856 }
2857 return push_inst(compiler, JIRL | RD((type >= SLJIT_FAST_CALL) ? RETURN_ADDR_REG : TMP_ZERO) | RJ(src) | IMM_I12(0));
2858 }
2859
2860 /* These jumps are converted to jump/call instructions when possible. */
2861 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2862 FAIL_IF(!jump);
2863 set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_CALL : 0));
2864 jump->u.target = (sljit_uw)srcw;
2865
2866 jump->addr = compiler->size;
2867 FAIL_IF(push_inst(compiler, JIRL | RD((type >= SLJIT_FAST_CALL) ? RETURN_ADDR_REG : TMP_ZERO) | RJ(TMP_REG1) | IMM_I12(0)));
2868
2869 /* Maximum number of instructions required for generating a constant. */
2870 compiler->size += JUMP_MAX_SIZE - 1;
2871
2872 return SLJIT_SUCCESS;
2873 }
2874
sljit_emit_icall(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)2875 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
2876 sljit_s32 arg_types,
2877 sljit_s32 src, sljit_sw srcw)
2878 {
2879 SLJIT_UNUSED_ARG(arg_types);
2880 CHECK_ERROR();
2881 CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
2882
2883 if (src & SLJIT_MEM) {
2884 ADJUST_LOCAL_OFFSET(src, srcw);
2885 FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw));
2886 src = TMP_REG1;
2887 }
2888
2889 if (type & SLJIT_CALL_RETURN) {
2890 if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
2891 FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(src) | IMM_I12(0)));
2892 src = TMP_REG1;
2893 }
2894
2895 FAIL_IF(emit_stack_frame_release(compiler, 0));
2896 type = SLJIT_JUMP;
2897 }
2898
2899 SLJIT_SKIP_CHECKS(compiler);
2900 return sljit_emit_ijump(compiler, type, src, srcw);
2901 }
2902
sljit_emit_op_flags(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 type)2903 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
2904 sljit_s32 dst, sljit_sw dstw,
2905 sljit_s32 type)
2906 {
2907 sljit_s32 src_r, dst_r, invert;
2908 sljit_s32 saved_op = op;
2909 sljit_s32 mem_type = ((op & SLJIT_32) || op == SLJIT_MOV32) ? (INT_DATA | SIGNED_DATA) : WORD_DATA;
2910
2911 CHECK_ERROR();
2912 CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
2913 ADJUST_LOCAL_OFFSET(dst, dstw);
2914
2915 op = GET_OPCODE(op);
2916 dst_r = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2;
2917
2918 compiler->cache_arg = 0;
2919 compiler->cache_argw = 0;
2920
2921 if (op >= SLJIT_ADD && (dst & SLJIT_MEM))
2922 FAIL_IF(emit_op_mem2(compiler, mem_type | LOAD_DATA, TMP_REG1, dst, dstw, dst, dstw));
2923
2924 if (type < SLJIT_F_EQUAL) {
2925 src_r = OTHER_FLAG;
2926 invert = type & 0x1;
2927
2928 switch (type) {
2929 case SLJIT_EQUAL:
2930 case SLJIT_NOT_EQUAL:
2931 FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RJ(EQUAL_FLAG) | IMM_I12(1)));
2932 src_r = dst_r;
2933 break;
2934 case SLJIT_ATOMIC_STORED:
2935 case SLJIT_ATOMIC_NOT_STORED:
2936 FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RJ(EQUAL_FLAG) | IMM_I12(1)));
2937 src_r = dst_r;
2938 invert ^= 0x1;
2939 break;
2940 case SLJIT_OVERFLOW:
2941 case SLJIT_NOT_OVERFLOW:
2942 if (compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) {
2943 src_r = OTHER_FLAG;
2944 break;
2945 }
2946 FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RJ(OTHER_FLAG) | IMM_I12(1)));
2947 src_r = dst_r;
2948 invert ^= 0x1;
2949 break;
2950 }
2951 } else {
2952 invert = 0;
2953 src_r = OTHER_FLAG;
2954
2955 switch (type) {
2956 case SLJIT_ORDERED_NOT_EQUAL:
2957 case SLJIT_ORDERED_LESS_EQUAL:
2958 case SLJIT_ORDERED_GREATER_EQUAL:
2959 case SLJIT_F_NOT_EQUAL:
2960 case SLJIT_UNORDERED_OR_NOT_EQUAL:
2961 case SLJIT_UNORDERED_OR_GREATER_EQUAL:
2962 case SLJIT_UNORDERED_OR_LESS_EQUAL:
2963 case SLJIT_F_LESS_EQUAL:
2964 case SLJIT_F_GREATER_EQUAL:
2965 case SLJIT_ORDERED:
2966 invert = 1;
2967 break;
2968 }
2969 }
2970
2971 if (invert) {
2972 FAIL_IF(push_inst(compiler, XORI | RD(dst_r) | RJ(src_r) | IMM_I12(1)));
2973 src_r = dst_r;
2974 }
2975
2976 if (op < SLJIT_ADD) {
2977 if (dst & SLJIT_MEM)
2978 return emit_op_mem(compiler, mem_type, src_r, dst, dstw);
2979
2980 if (src_r != dst_r)
2981 return push_inst(compiler, ADDI_D | RD(dst_r) | RJ(src_r) | IMM_I12(0));
2982 return SLJIT_SUCCESS;
2983 }
2984
2985 mem_type |= CUMULATIVE_OP | IMM_OP | ALT_KEEP_CACHE;
2986
2987 if (dst & SLJIT_MEM)
2988 return emit_op(compiler, saved_op, mem_type, dst, dstw, TMP_REG1, 0, src_r, 0);
2989 return emit_op(compiler, saved_op, mem_type, dst, dstw, dst, dstw, src_r, 0);
2990 }
2991
sljit_emit_select(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_reg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2_reg)2992 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
2993 sljit_s32 dst_reg,
2994 sljit_s32 src1, sljit_sw src1w,
2995 sljit_s32 src2_reg)
2996 {
2997 sljit_ins *ptr;
2998 sljit_uw size;
2999 sljit_s32 inp_flags = ((type & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA;
3000
3001 CHECK_ERROR();
3002 CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
3003 ADJUST_LOCAL_OFFSET(src1, src1w);
3004
3005 if (dst_reg != src2_reg) {
3006 if (dst_reg == src1) {
3007 src1 = src2_reg;
3008 src1w = 0;
3009 type ^= 0x1;
3010 } else {
3011 if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) {
3012 FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(dst_reg) | IMM_I12(0)));
3013
3014 if ((src1 & REG_MASK) == dst_reg)
3015 src1 = (src1 & ~REG_MASK) | TMP_REG1;
3016
3017 if (OFFS_REG(src1) == dst_reg)
3018 src1 = (src1 & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG1);
3019 }
3020
3021 FAIL_IF(push_inst(compiler, ADDI_D | RD(dst_reg) | RJ(src2_reg) | IMM_I12(0)));
3022 }
3023 }
3024
3025 size = compiler->size;
3026
3027 ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
3028 FAIL_IF(!ptr);
3029 compiler->size++;
3030
3031 if (src1 & SLJIT_MEM) {
3032 FAIL_IF(emit_op_mem(compiler, inp_flags, dst_reg, src1, src1w));
3033 } else if (src1 == SLJIT_IMM) {
3034 if (type & SLJIT_32)
3035 src1w = (sljit_s32)src1w;
3036 FAIL_IF(load_immediate(compiler, dst_reg, src1w));
3037 } else
3038 FAIL_IF(push_inst(compiler, ADDI_D | RD(dst_reg) | RJ(src1) | IMM_I12(0)));
3039
3040 *ptr = get_jump_instruction(type & ~SLJIT_32) | IMM_I16(compiler->size - size);
3041 return SLJIT_SUCCESS;
3042 }
3043
sljit_emit_fselect(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_freg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2_freg)3044 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
3045 sljit_s32 dst_freg,
3046 sljit_s32 src1, sljit_sw src1w,
3047 sljit_s32 src2_freg)
3048 {
3049 sljit_s32 invert = 0;
3050
3051 CHECK_ERROR();
3052 CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
3053
3054 ADJUST_LOCAL_OFFSET(src1, src1w);
3055
3056 if ((type & ~SLJIT_32) == SLJIT_EQUAL || (type & ~SLJIT_32) == SLJIT_NOT_EQUAL) {
3057 if ((type & ~SLJIT_32) == SLJIT_EQUAL)
3058 invert = 1;
3059 FAIL_IF(push_inst(compiler, MOVGR2CF | FCD(F_OTHER_FLAG) | RJ(EQUAL_FLAG)));
3060 } else {
3061 if (get_jump_instruction(type & ~SLJIT_32) == (BNE | RJ(OTHER_FLAG) | RD(TMP_ZERO)))
3062 invert = 1;
3063 FAIL_IF(push_inst(compiler, MOVGR2CF | FCD(F_OTHER_FLAG) | RJ(OTHER_FLAG)));
3064 }
3065
3066 if (src1 & SLJIT_MEM) {
3067 FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(type) | LOAD_DATA, TMP_FREG2, src1, src1w));
3068 if (invert)
3069 return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(TMP_FREG2) | FRK(src2_freg) | FCA(F_OTHER_FLAG));
3070 return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(src2_freg) | FRK(TMP_FREG2) | FCA(F_OTHER_FLAG));
3071 } else {
3072 if (invert)
3073 return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(src1) | FRK(src2_freg) | FCA(F_OTHER_FLAG));
3074 return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(src2_freg) | FRK(src1) | FCA(F_OTHER_FLAG));
3075 }
3076 }
3077
3078 #undef FLOAT_DATA
3079
sljit_emit_mem(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)3080 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
3081 sljit_s32 reg,
3082 sljit_s32 mem, sljit_sw memw)
3083 {
3084 sljit_s32 flags;
3085
3086 CHECK_ERROR();
3087 CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
3088
3089 if (!(reg & REG_PAIR_MASK))
3090 return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
3091
3092 if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
3093 memw &= 0x3;
3094
3095 if (SLJIT_UNLIKELY(memw != 0)) {
3096 FAIL_IF(push_inst(compiler, SLLI_D | RD(TMP_REG1) | RJ(OFFS_REG(mem)) | IMM_I12(memw)));
3097 FAIL_IF(push_inst(compiler, ADD_D| RD(TMP_REG1) | RJ(TMP_REG1) | RK(mem & REG_MASK)));
3098 } else
3099 FAIL_IF(push_inst(compiler, ADD_D| RD(TMP_REG1) | RJ(mem & REG_MASK) | RK(OFFS_REG(mem))));
3100
3101 mem = TMP_REG1;
3102 memw = 0;
3103 } else if (memw > I12_MAX - SSIZE_OF(sw) || memw < I12_MIN) {
3104 if (((memw + 0x800) & 0xfff) <= 0xfff - SSIZE_OF(sw)) {
3105 FAIL_IF(load_immediate(compiler, TMP_REG1, TO_ARGW_HI(memw)));
3106 memw &= 0xfff;
3107 } else {
3108 FAIL_IF(load_immediate(compiler, TMP_REG1, memw));
3109 memw = 0;
3110 }
3111
3112 if (mem & REG_MASK)
3113 FAIL_IF(push_inst(compiler, ADD_D| RD(TMP_REG1) | RJ(TMP_REG1) | RK(mem & REG_MASK)));
3114
3115 mem = TMP_REG1;
3116 } else {
3117 mem &= REG_MASK;
3118 memw &= 0xfff;
3119 }
3120
3121 SLJIT_ASSERT((memw >= 0 && memw <= I12_MAX - SSIZE_OF(sw)) || (memw > I12_MAX && memw <= 0xfff));
3122
3123 if (!(type & SLJIT_MEM_STORE) && mem == REG_PAIR_FIRST(reg)) {
3124 FAIL_IF(push_mem_inst(compiler, WORD_DATA | LOAD_DATA, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), (memw + SSIZE_OF(sw)) & 0xfff));
3125 return push_mem_inst(compiler, WORD_DATA | LOAD_DATA, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw);
3126 }
3127
3128 flags = WORD_DATA | (!(type & SLJIT_MEM_STORE) ? LOAD_DATA : 0);
3129
3130 FAIL_IF(push_mem_inst(compiler, flags, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw));
3131 return push_mem_inst(compiler, flags, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), (memw + SSIZE_OF(sw)) & 0xfff);
3132 }
3133
3134 #undef TO_ARGW_HI
3135
sljit_emit_simd_mem_offset(struct sljit_compiler * compiler,sljit_s32 * mem_ptr,sljit_sw memw)3136 static sljit_s32 sljit_emit_simd_mem_offset(struct sljit_compiler *compiler, sljit_s32 *mem_ptr, sljit_sw memw)
3137 {
3138 sljit_s32 mem = *mem_ptr;
3139
3140 if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
3141 *mem_ptr = TMP_REG3;
3142 FAIL_IF(push_inst(compiler, SLLI_D | RD(TMP_REG3) | RJ(OFFS_REG(mem)) | IMM_I12(memw & 0x3)));
3143 return push_inst(compiler, ADD_D | RD(TMP_REG3) | RJ(TMP_REG3) | RK(mem & REG_MASK));
3144 }
3145
3146 if (!(mem & REG_MASK)) {
3147 *mem_ptr = TMP_REG3;
3148 return load_immediate(compiler, TMP_REG3, memw);
3149 }
3150
3151 mem &= REG_MASK;
3152
3153 if (memw == 0) {
3154 *mem_ptr = mem;
3155 return SLJIT_SUCCESS;
3156 }
3157
3158 *mem_ptr = TMP_REG3;
3159
3160 FAIL_IF(load_immediate(compiler, TMP_REG3, memw));
3161 return push_inst(compiler, ADD_D | RD(TMP_REG3) | RJ(TMP_REG3) | RK(mem));
3162 }
3163
sljit_emit_simd_mov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 srcdst,sljit_sw srcdstw)3164 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
3165 sljit_s32 freg,
3166 sljit_s32 srcdst, sljit_sw srcdstw)
3167 {
3168 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3169 sljit_ins ins = 0;
3170
3171 CHECK_ERROR();
3172 CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw));
3173
3174 ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3175
3176 if (reg_size != 5 && reg_size != 4)
3177 return SLJIT_ERR_UNSUPPORTED;
3178
3179 if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))
3180 return SLJIT_ERR_UNSUPPORTED;
3181
3182 if (type & SLJIT_SIMD_TEST)
3183 return SLJIT_SUCCESS;
3184
3185 if (!(srcdst & SLJIT_MEM)) {
3186 if (type & SLJIT_SIMD_STORE)
3187 ins = FRD(srcdst) | FRJ(freg) | FRK(freg);
3188 else
3189 ins = FRD(freg) | FRJ(srcdst) | FRK(srcdst);
3190
3191 if (reg_size == 5)
3192 ins |= VOR_V | (sljit_ins)1 << 26;
3193 else
3194 ins |= VOR_V;
3195
3196 return push_inst(compiler, ins);
3197 }
3198
3199 ins = (type & SLJIT_SIMD_STORE) ? VST : VLD;
3200
3201 if (reg_size == 5)
3202 ins = (type & SLJIT_SIMD_STORE) ? XVST : XVLD;
3203
3204 if (FAST_IS_REG(srcdst) && srcdst >= 0 && (srcdstw >= I12_MIN && srcdstw <= I12_MAX))
3205 return push_inst(compiler, ins | FRD(freg) | RJ((sljit_u8)srcdst) | IMM_I12(srcdstw));
3206 else {
3207 FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
3208 return push_inst(compiler, ins | FRD(freg) | RJ(srcdst) | IMM_I12(0));
3209 }
3210 }
3211
sljit_emit_simd_replicate(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_sw srcw)3212 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
3213 sljit_s32 freg,
3214 sljit_s32 src, sljit_sw srcw)
3215 {
3216 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3217 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3218 sljit_ins ins = 0;
3219
3220 CHECK_ERROR();
3221 CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw));
3222
3223 ADJUST_LOCAL_OFFSET(src, srcw);
3224
3225 if (reg_size != 5 && reg_size != 4)
3226 return SLJIT_ERR_UNSUPPORTED;
3227
3228 if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))
3229 return SLJIT_ERR_UNSUPPORTED;
3230
3231 if (type & SLJIT_SIMD_TEST)
3232 return SLJIT_SUCCESS;
3233
3234 if (src & SLJIT_MEM) {
3235 FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
3236
3237 if (reg_size == 5)
3238 ins = (sljit_ins)1 << 25;
3239
3240 return push_inst(compiler, VLDREPL | ins | FRD(freg) | RJ(src) | (sljit_ins)1 << (23 - elem_size));
3241 }
3242
3243 if (reg_size == 5)
3244 ins = (sljit_ins)1 << 26;
3245
3246 if (type & SLJIT_SIMD_FLOAT) {
3247 if (src == SLJIT_IMM)
3248 return push_inst(compiler, VREPLGR2VR | ins | FRD(freg) | RJ(TMP_ZERO) | (sljit_ins)elem_size << 10);
3249
3250 FAIL_IF(push_inst(compiler, VREPLVE | ins | FRD(freg) | FRJ(src) | RK(TMP_ZERO) | (sljit_ins)elem_size << 15));
3251
3252 if (reg_size == 5) {
3253 ins = (sljit_ins)(0x44 << 10);
3254 return push_inst(compiler, XVPERMI | ins | FRD(freg) | FRJ(freg));
3255 }
3256
3257 return SLJIT_SUCCESS;
3258 }
3259
3260 ins |= VREPLGR2VR | (sljit_ins)elem_size << 10;
3261
3262 if (src == SLJIT_IMM) {
3263 FAIL_IF(load_immediate(compiler, TMP_REG2, srcw));
3264 src = TMP_REG2;
3265 }
3266
3267 return push_inst(compiler, ins | FRD(freg) | RJ(src));
3268 }
3269
sljit_emit_simd_lane_mov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 lane_index,sljit_s32 srcdst,sljit_sw srcdstw)3270 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
3271 sljit_s32 freg, sljit_s32 lane_index,
3272 sljit_s32 srcdst, sljit_sw srcdstw)
3273 {
3274 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3275 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3276 sljit_ins ins = 0;
3277
3278 CHECK_ERROR();
3279 CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw));
3280
3281 ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3282
3283 if (reg_size != 5 && reg_size != 4)
3284 return SLJIT_ERR_UNSUPPORTED;
3285
3286 if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))
3287 return SLJIT_ERR_UNSUPPORTED;
3288
3289 if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3290 return SLJIT_ERR_UNSUPPORTED;
3291
3292 if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3293 return SLJIT_ERR_UNSUPPORTED;
3294
3295 if (type & SLJIT_SIMD_TEST)
3296 return SLJIT_SUCCESS;
3297
3298 if (type & SLJIT_SIMD_LANE_ZERO) {
3299 ins = (reg_size == 5) ? ((sljit_ins)1 << 26) : 0;
3300
3301 if ((type & SLJIT_SIMD_FLOAT) && freg == srcdst) {
3302 FAIL_IF(push_inst(compiler, VOR_V | ins | FRD(TMP_FREG1) | FRJ(freg) | FRK(freg)));
3303 srcdst = TMP_FREG1;
3304 srcdstw = 0;
3305 }
3306
3307 FAIL_IF(push_inst(compiler, VXOR_V | ins | FRD(freg) | FRJ(freg) | FRK(freg)));
3308 }
3309
3310 if (srcdst & SLJIT_MEM) {
3311 FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
3312
3313 if (reg_size == 5)
3314 ins = (sljit_ins)1 << 25;
3315
3316 if (type & SLJIT_SIMD_STORE) {
3317 ins |= (sljit_ins)lane_index << 18 | (sljit_ins)(1 << (23 - elem_size));
3318 return push_inst(compiler, VSTELM | ins | FRD(freg) | RJ(srcdst));
3319 } else {
3320 emit_op_mem(compiler, (elem_size == 3 ? WORD_DATA : (elem_size == 2 ? INT_DATA : (elem_size == 1 ? HALF_DATA : BYTE_DATA))) | LOAD_DATA, TMP_REG1, srcdst | SLJIT_MEM, 0);
3321 srcdst = TMP_REG1;
3322 ins = (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10;
3323
3324 if (reg_size == 5) {
3325 if (elem_size < 2) {
3326 FAIL_IF(push_inst(compiler, VOR_V | (sljit_ins)1 << 26 | FRD(TMP_FREG1) | FRJ(freg) | FRK(freg)));
3327 if (lane_index >= (2 << (3 - elem_size))) {
3328 FAIL_IF(push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(TMP_FREG1) | FRJ(freg) | IMM_I8(1)));
3329 FAIL_IF(push_inst(compiler, VINSGR2VR | ins | FRD(TMP_FREG1) | RJ(srcdst) | IMM_V(lane_index % (2 << (3 - elem_size)))));
3330 return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(freg) | FRJ(TMP_FREG1) | IMM_I8(2));
3331 } else {
3332 FAIL_IF(push_inst(compiler, VINSGR2VR | ins | FRD(freg) | RJ(srcdst) | IMM_V(lane_index)));
3333 return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(freg) | FRJ(TMP_FREG1) | IMM_I8(18));
3334 }
3335 } else
3336 ins = (sljit_ins)(0x3f ^ (0x3f >> elem_size)) << 10 | (sljit_ins)1 << 26;
3337 }
3338
3339 return push_inst(compiler, VINSGR2VR | ins | FRD(freg) | RJ(srcdst) | IMM_V(lane_index));
3340 }
3341 }
3342
3343 if (type & SLJIT_SIMD_FLOAT) {
3344 ins = (reg_size == 5) ? (sljit_ins)(0x3f ^ (0x3f >> elem_size)) << 10 | (sljit_ins)1 << 26 : (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10;
3345
3346 if (type & SLJIT_SIMD_STORE) {
3347 FAIL_IF(push_inst(compiler, VPICKVE2GR_U | ins | RD(TMP_REG1) | FRJ(freg) | IMM_V(lane_index)));
3348 return push_inst(compiler, VINSGR2VR | ins | FRD(srcdst) | RJ(TMP_REG1) | IMM_V(0));
3349 } else {
3350 FAIL_IF(push_inst(compiler, VPICKVE2GR_U | ins | RD(TMP_REG1) | FRJ(srcdst) | IMM_V(0)));
3351 return push_inst(compiler, VINSGR2VR | ins | FRD(freg) | RJ(TMP_REG1) | IMM_V(lane_index));
3352 }
3353 }
3354
3355 if (srcdst == SLJIT_IMM) {
3356 FAIL_IF(load_immediate(compiler, TMP_REG1, srcdstw));
3357 srcdst = TMP_REG1;
3358 }
3359
3360 if (type & SLJIT_SIMD_STORE) {
3361 ins = (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10;
3362
3363 if (type & SLJIT_SIMD_LANE_SIGNED)
3364 ins |= (sljit_ins)(VPICKVE2GR_U ^ (0x7 << 18));
3365 else
3366 ins |= VPICKVE2GR_U;
3367
3368 if (reg_size == 5) {
3369 if (elem_size < 2) {
3370 if (lane_index >= (2 << (3 - elem_size))) {
3371 if (type & SLJIT_SIMD_LANE_SIGNED)
3372 ins |= (sljit_ins)(VPICKVE2GR_U ^ (0x7 << 18));
3373 else
3374 ins |= VPICKVE2GR_U;
3375
3376 FAIL_IF(push_inst(compiler, VOR_V | (sljit_ins)1 << 26 | FRD(TMP_FREG1) | FRJ(freg) | FRK(freg)));
3377 FAIL_IF(push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(TMP_FREG1) | FRJ(freg) | IMM_I8(1)));
3378 return push_inst(compiler, ins | RD(srcdst) | FRJ(TMP_FREG1) | IMM_V(lane_index % (2 << (3 - elem_size))));
3379 }
3380 } else {
3381 ins ^= (sljit_ins)1 << (15 - elem_size);
3382 ins |= (sljit_ins)1 << 26;
3383 }
3384 }
3385
3386 return push_inst(compiler, ins | RD(srcdst) | FRJ(freg) | IMM_V(lane_index));
3387 } else {
3388 ins = (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10;
3389
3390 if (reg_size == 5) {
3391 if (elem_size < 2) {
3392 FAIL_IF(push_inst(compiler, VOR_V | (sljit_ins)1 << 26 | FRD(TMP_FREG1) | FRJ(freg) | FRK(freg)));
3393 if (lane_index >= (2 << (3 - elem_size))) {
3394 FAIL_IF(push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(TMP_FREG1) | FRJ(freg) | IMM_I8(1)));
3395 FAIL_IF(push_inst(compiler, VINSGR2VR | ins | FRD(TMP_FREG1) | RJ(srcdst) | IMM_V(lane_index % (2 << (3 - elem_size)))));
3396 return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(freg) | FRJ(TMP_FREG1) | IMM_I8(2));
3397 } else {
3398 FAIL_IF(push_inst(compiler, VINSGR2VR | ins | FRD(freg) | RJ(srcdst) | IMM_V(lane_index)));
3399 return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(freg) | FRJ(TMP_FREG1) | IMM_I8(18));
3400 }
3401 } else
3402 ins = (sljit_ins)(0x3f ^ (0x3f >> elem_size)) << 10 | (sljit_ins)1 << 26;
3403 }
3404
3405 return push_inst(compiler, VINSGR2VR | ins | FRD(freg) | RJ(srcdst) | IMM_V(lane_index));
3406 }
3407
3408 return SLJIT_ERR_UNSUPPORTED;
3409 }
3410
sljit_emit_simd_lane_replicate(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_s32 src_lane_index)3411 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
3412 sljit_s32 freg,
3413 sljit_s32 src, sljit_s32 src_lane_index)
3414 {
3415 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3416 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3417 sljit_ins ins = 0;
3418
3419 CHECK_ERROR();
3420 CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index));
3421
3422 if (reg_size != 5 && reg_size != 4)
3423 return SLJIT_ERR_UNSUPPORTED;
3424
3425 if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))
3426 return SLJIT_ERR_UNSUPPORTED;
3427
3428 if (type & SLJIT_SIMD_TEST)
3429 return SLJIT_SUCCESS;
3430
3431 ins = (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10;
3432
3433 if (reg_size == 5) {
3434 FAIL_IF(push_inst(compiler, VREPLVEI | (sljit_ins)1 << 26 | ins | FRD(freg) | FRJ(src) | IMM_V(src_lane_index % (2 << (3 - elem_size)))));
3435
3436 ins = (src_lane_index < (2 << (3 - elem_size))) ? (sljit_ins)(0x44 << 10) : (sljit_ins)(0xee << 10);
3437
3438 return push_inst(compiler, XVPERMI | ins | FRD(freg) | FRJ(freg));
3439 }
3440
3441 return push_inst(compiler, VREPLVEI | ins | FRD(freg) | FRJ(src) | IMM_V(src_lane_index));
3442 }
3443
sljit_emit_simd_extend(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_sw srcw)3444 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
3445 sljit_s32 freg,
3446 sljit_s32 src, sljit_sw srcw)
3447 {
3448 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3449 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3450 sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
3451 sljit_ins ins = 0;
3452
3453 CHECK_ERROR();
3454 CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw));
3455
3456 ADJUST_LOCAL_OFFSET(src, srcw);
3457
3458 if (reg_size != 5 && reg_size != 4)
3459 return SLJIT_ERR_UNSUPPORTED;
3460
3461 if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))
3462 return SLJIT_ERR_UNSUPPORTED;
3463
3464 if (type & SLJIT_SIMD_TEST)
3465 return SLJIT_SUCCESS;
3466
3467 if (src & SLJIT_MEM) {
3468 ins = (type & SLJIT_SIMD_STORE) ? VST : VLD;
3469
3470 if (reg_size == 5)
3471 ins = (type & SLJIT_SIMD_STORE) ? XVST : XVLD;
3472
3473 if (FAST_IS_REG(src) && src >= 0 && (srcw >= I12_MIN && srcw <= I12_MAX))
3474 FAIL_IF(push_inst(compiler, ins | FRD(freg) | RJ(src) | IMM_I12(srcw)));
3475 else {
3476 FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
3477 FAIL_IF(push_inst(compiler, ins | FRD(freg) | RJ(src) | IMM_I12(0)));
3478 }
3479 src = freg;
3480 }
3481
3482 if (type & SLJIT_SIMD_FLOAT) {
3483 if (elem_size != 2 || elem2_size != 3)
3484 return SLJIT_ERR_UNSUPPORTED;
3485
3486 ins = 0;
3487 if (reg_size == 5) {
3488 ins = (sljit_ins)1 << 26;
3489 FAIL_IF(push_inst(compiler, XVPERMI | FRD(src) | FRJ(src) | IMM_I8(16)));
3490 }
3491
3492 return push_inst(compiler, VFCVTL_D_S | ins | FRD(freg) | FRJ(src));
3493 }
3494
3495 ins = (type & SLJIT_SIMD_EXTEND_SIGNED) ? VSLLWIL : (VSLLWIL | (sljit_ins)1 << 18);
3496
3497 if (reg_size == 5)
3498 ins |= (sljit_ins)1 << 26;
3499
3500 do {
3501 if (reg_size == 5)
3502 FAIL_IF(push_inst(compiler, XVPERMI | FRD(src) | FRJ(src) | IMM_I8(16)));
3503
3504 FAIL_IF(push_inst(compiler, ins | ((sljit_ins)1 << (13 + elem_size)) | FRD(freg) | FRJ(src)));
3505 src = freg;
3506 } while (++elem_size < elem2_size);
3507
3508 return SLJIT_SUCCESS;
3509 }
3510
sljit_emit_simd_sign(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 dst,sljit_sw dstw)3511 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
3512 sljit_s32 freg,
3513 sljit_s32 dst, sljit_sw dstw)
3514 {
3515 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3516 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3517 sljit_ins ins = 0;
3518 sljit_s32 dst_r;
3519
3520 CHECK_ERROR();
3521 CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw));
3522
3523 ADJUST_LOCAL_OFFSET(dst, dstw);
3524
3525 if (reg_size != 5 && reg_size != 4)
3526 return SLJIT_ERR_UNSUPPORTED;
3527
3528 if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))
3529 return SLJIT_ERR_UNSUPPORTED;
3530
3531 if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2))
3532 return SLJIT_ERR_UNSUPPORTED;
3533
3534 if (type & SLJIT_SIMD_TEST)
3535 return SLJIT_SUCCESS;
3536
3537 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
3538
3539 if (reg_size == 5)
3540 ins = (sljit_ins)1 << 26;
3541
3542 FAIL_IF(push_inst(compiler, VMSKLTZ | ins | (sljit_ins)(elem_size << 10) | FRD(TMP_FREG1) | FRJ(freg)));
3543
3544 FAIL_IF(push_inst(compiler, VPICKVE2GR_U | (sljit_ins)(0x3c << 10) | RD(dst_r) | FRJ(TMP_FREG1)));
3545
3546 if (reg_size == 5) {
3547 FAIL_IF(push_inst(compiler, VPICKVE2GR_U | (sljit_ins)(0x38 << 10) | ins | RD(TMP_REG3) | FRJ(TMP_FREG1) | IMM_V(2)));
3548 FAIL_IF(push_inst(compiler, SLLI_W | RD(TMP_REG3) | RJ(TMP_REG3) | IMM_I12(2 << (3 - elem_size))));
3549 FAIL_IF(push_inst(compiler, OR | RD(dst_r) | RJ(dst_r) | RK(TMP_REG3)));
3550 }
3551
3552 if (dst_r == TMP_REG2)
3553 return emit_op_mem(compiler, ((type & SLJIT_32) ? INT_DATA : WORD_DATA), TMP_REG2, dst, dstw);
3554
3555 return SLJIT_SUCCESS;
3556 }
3557
sljit_emit_simd_op2(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_freg,sljit_s32 src1_freg,sljit_s32 src2_freg)3558 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
3559 sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg)
3560 {
3561 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3562 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3563 sljit_ins ins = 0;
3564
3565 CHECK_ERROR();
3566 CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg));
3567
3568 if (reg_size != 5 && reg_size != 4)
3569 return SLJIT_ERR_UNSUPPORTED;
3570
3571 if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))
3572 return SLJIT_ERR_UNSUPPORTED;
3573
3574 if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3575 return SLJIT_ERR_UNSUPPORTED;
3576
3577 if (type & SLJIT_SIMD_TEST)
3578 return SLJIT_SUCCESS;
3579
3580 switch (SLJIT_SIMD_GET_OPCODE(type)) {
3581 case SLJIT_SIMD_OP2_AND:
3582 ins = VAND_V;
3583 break;
3584 case SLJIT_SIMD_OP2_OR:
3585 ins = VOR_V;
3586 break;
3587 case SLJIT_SIMD_OP2_XOR:
3588 ins = VXOR_V;
3589 break;
3590 }
3591
3592 if (reg_size == 5)
3593 ins |= (sljit_ins)1 << 26;
3594
3595 return push_inst(compiler, ins | FRD(dst_freg) | FRJ(src1_freg) | FRK(src2_freg));
3596 }
3597
sljit_emit_atomic_load(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 mem_reg)3598 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler,
3599 sljit_s32 op,
3600 sljit_s32 dst_reg,
3601 sljit_s32 mem_reg)
3602 {
3603 sljit_ins ins;
3604
3605 CHECK_ERROR();
3606 CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
3607
3608 switch(GET_OPCODE(op)) {
3609 case SLJIT_MOV_U8:
3610 ins = LD_BU;
3611 break;
3612 case SLJIT_MOV_U16:
3613 ins = LD_HU;
3614 break;
3615 case SLJIT_MOV32:
3616 ins = LD_W;
3617 break;
3618 case SLJIT_MOV_U32:
3619 ins = LD_WU;
3620 break;
3621 default:
3622 ins = LD_D;
3623 break;
3624 }
3625
3626 return push_inst(compiler, ins | RD(dst_reg) | RJ(mem_reg) | IMM_I12(0));
3627 }
3628
sljit_emit_atomic_store(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src_reg,sljit_s32 mem_reg,sljit_s32 temp_reg)3629 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler,
3630 sljit_s32 op,
3631 sljit_s32 src_reg,
3632 sljit_s32 mem_reg,
3633 sljit_s32 temp_reg)
3634 {
3635 sljit_ins ins = 0;
3636 sljit_ins unsign = 0;
3637 sljit_s32 tmp = temp_reg;
3638
3639 CHECK_ERROR();
3640 CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
3641
3642 switch (GET_OPCODE(op)) {
3643 case SLJIT_MOV_U8:
3644 ins = AMCAS_B;
3645 unsign = BSTRPICK_D | (7 << 16);
3646 break;
3647 case SLJIT_MOV_U16:
3648 ins = AMCAS_H;
3649 unsign = BSTRPICK_D | (15 << 16);
3650 break;
3651 case SLJIT_MOV32:
3652 ins = AMCAS_W;
3653 break;
3654 case SLJIT_MOV_U32:
3655 ins = AMCAS_W;
3656 unsign = BSTRPICK_D | (31 << 16);
3657 break;
3658 default:
3659 ins = AMCAS_D;
3660 break;
3661 }
3662
3663 if (op & SLJIT_SET_ATOMIC_STORED) {
3664 FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RJ(temp_reg) | RK(TMP_ZERO)));
3665 tmp = TMP_REG1;
3666 }
3667 FAIL_IF(push_inst(compiler, ins | RD(tmp) | RJ(mem_reg) | RK(src_reg)));
3668 if (!(op & SLJIT_SET_ATOMIC_STORED))
3669 return SLJIT_SUCCESS;
3670
3671 if (unsign)
3672 FAIL_IF(push_inst(compiler, unsign | RD(tmp) | RJ(tmp)));
3673
3674 FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(tmp) | RK(temp_reg)));
3675 return push_inst(compiler, SLTUI | RD(EQUAL_FLAG) | RJ(EQUAL_FLAG) | IMM_I12(1));
3676 }
3677
emit_const(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw init_value,sljit_ins last_ins)3678 static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value, sljit_ins last_ins)
3679 {
3680 SLJIT_UNUSED_ARG(last_ins);
3681
3682 FAIL_IF(push_inst(compiler, LU12I_W | RD(dst) | (sljit_ins)(((init_value & 0xffffffff) >> 12) << 5)));
3683 FAIL_IF(push_inst(compiler, LU32I_D | RD(dst) | (sljit_ins)(((init_value >> 32) & 0xfffff) << 5)));
3684 FAIL_IF(push_inst(compiler, LU52I_D | RD(dst) | RJ(dst) | (sljit_ins)(IMM_I12(init_value >> 52))));
3685 return push_inst(compiler, ORI | RD(dst) | RJ(dst) | IMM_I12(init_value));
3686 }
3687
sljit_set_jump_addr(sljit_uw addr,sljit_uw new_target,sljit_sw executable_offset)3688 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
3689 {
3690 sljit_ins *inst = (sljit_ins*)addr;
3691 SLJIT_UNUSED_ARG(executable_offset);
3692
3693 SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 0);
3694
3695 SLJIT_ASSERT((inst[0] & OPC_1RI20(0x7f)) == LU12I_W);
3696 inst[0] = (inst[0] & (OPC_1RI20(0x7f) | 0x1f)) | (sljit_ins)(((new_target & 0xffffffff) >> 12) << 5);
3697
3698 SLJIT_ASSERT((inst[1] & OPC_1RI20(0x7f)) == LU32I_D);
3699 inst[1] = (inst[1] & (OPC_1RI20(0x7f) | 0x1f)) | (sljit_ins)(sljit_ins)(((new_target >> 32) & 0xfffff) << 5);
3700
3701 SLJIT_ASSERT((inst[2] & OPC_2RI12(0x3ff)) == LU52I_D);
3702 inst[2] = (inst[2] & (OPC_2RI12(0x3ff) | 0x3ff)) | IMM_I12(new_target >> 52);
3703
3704 SLJIT_ASSERT((inst[3] & OPC_2RI12(0x3ff)) == ORI || (inst[3] & OPC_2RI16(0x3f)) == JIRL);
3705 if ((inst[3] & OPC_2RI12(0x3ff)) == ORI)
3706 inst[3] = (inst[3] & (OPC_2RI12(0x3ff) | 0x3ff)) | IMM_I12(new_target);
3707 else
3708 inst[3] = (inst[3] & (OPC_2RI16(0x3f) | 0x3ff)) | IMM_I12((new_target & 0xfff) >> 2);
3709
3710 SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 1);
3711
3712 inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
3713 SLJIT_CACHE_FLUSH(inst, inst + 4);
3714 }
3715
sljit_emit_const(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_sw init_value)3716 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
3717 {
3718 struct sljit_const *const_;
3719 sljit_s32 dst_r;
3720
3721 CHECK_ERROR_PTR();
3722 CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
3723 ADJUST_LOCAL_OFFSET(dst, dstw);
3724
3725 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
3726 PTR_FAIL_IF(!const_);
3727 set_const(const_, compiler);
3728
3729 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
3730 PTR_FAIL_IF(emit_const(compiler, dst_r, init_value, 0));
3731
3732 if (dst & SLJIT_MEM)
3733 PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw));
3734
3735 return const_;
3736 }
3737
sljit_emit_mov_addr(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)3738 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
3739 {
3740 struct sljit_jump *jump;
3741 sljit_s32 dst_r;
3742
3743 CHECK_ERROR_PTR();
3744 CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw));
3745 ADJUST_LOCAL_OFFSET(dst, dstw);
3746
3747 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
3748 PTR_FAIL_IF(!jump);
3749 set_mov_addr(jump, compiler, 0);
3750
3751 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
3752 PTR_FAIL_IF(push_inst(compiler, (sljit_ins)dst_r));
3753
3754 compiler->size += JUMP_MAX_SIZE - 1;
3755
3756 if (dst & SLJIT_MEM)
3757 PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw));
3758
3759 return jump;
3760 }
3761
sljit_set_const(sljit_uw addr,sljit_sw new_constant,sljit_sw executable_offset)3762 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
3763 {
3764 sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
3765 }
3766