1 /*
2  *    Stack-less Just-In-Time compiler
3  *
4  *    Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without modification, are
7  * permitted provided that the following conditions are met:
8  *
9  *   1. Redistributions of source code must retain the above copyright notice, this list of
10  *      conditions and the following disclaimer.
11  *
12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13  *      of conditions and the following disclaimer in the documentation and/or other materials
14  *      provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
sljit_get_platform_name(void)27 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
28 {
29 	return "LOONGARCH" SLJIT_CPUINFO;
30 }
31 
32 typedef sljit_u32 sljit_ins;
33 
34 #define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
35 #define TMP_REG2	(SLJIT_NUMBER_OF_REGISTERS + 3)
36 #define TMP_REG3	(SLJIT_NUMBER_OF_REGISTERS + 4)
37 #define TMP_ZERO	0
38 
39 /* Flags are kept in volatile registers. */
40 #define EQUAL_FLAG	(SLJIT_NUMBER_OF_REGISTERS + 5)
41 #define RETURN_ADDR_REG	TMP_REG2
42 #define OTHER_FLAG	(SLJIT_NUMBER_OF_REGISTERS + 6)
43 
44 #define TMP_FREG1	(SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
45 #define TMP_FREG2	(SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
46 
47 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = {
48 	0, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19, 20, 22, 31, 30, 29, 28, 27, 26, 25, 24, 23, 3, 13, 1, 14, 12, 15
49 };
50 
51 static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
52 	0, 0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 31, 30, 29, 28, 27, 26, 25, 24, 8, 9
53 };
54 
55 /* --------------------------------------------------------------------- */
56 /*  Instrucion forms                                                     */
57 /* --------------------------------------------------------------------- */
58 
59 /*
60 LoongArch instructions are 32 bits wide, belonging to 9 basic instruction formats (and variants of them):
61 
62 | Format name  | Composition                 |
63 | 2R           | Opcode + Rj + Rd            |
64 | 3R           | Opcode + Rk + Rj + Rd       |
65 | 4R           | Opcode + Ra + Rk + Rj + Rd  |
66 | 2RI8         | Opcode + I8 + Rj + Rd       |
67 | 2RI12        | Opcode + I12 + Rj + Rd      |
68 | 2RI14        | Opcode + I14 + Rj + Rd      |
69 | 2RI16        | Opcode + I16 + Rj + Rd      |
70 | 1RI21        | Opcode + I21L + Rj + I21H   |
71 | I26          | Opcode + I26L + I26H        |
72 
73 Rd is the destination register operand, while Rj, Rk and Ra (“a” stands for “additional”) are the source register operands.
74 I8/I12/I14/I16/I21/I26 are immediate operands of respective width. The longer I21 and I26 are stored in separate higher and
75 lower parts in the instruction word, denoted by the “L” and “H” suffixes. */
76 
77 #define RD(rd) ((sljit_ins)reg_map[rd])
78 #define RJ(rj) ((sljit_ins)reg_map[rj] << 5)
79 #define RK(rk) ((sljit_ins)reg_map[rk] << 10)
80 #define RA(ra) ((sljit_ins)reg_map[ra] << 15)
81 
82 #define FD(fd) ((sljit_ins)reg_map[fd])
83 #define FRD(fd) ((sljit_ins)freg_map[fd])
84 #define FRJ(fj) ((sljit_ins)freg_map[fj] << 5)
85 #define FRK(fk) ((sljit_ins)freg_map[fk] << 10)
86 #define FRA(fa) ((sljit_ins)freg_map[fa] << 15)
87 
88 #define IMM_V(imm) ((sljit_ins)(imm) << 10)
89 #define IMM_I8(imm) (((sljit_ins)(imm)&0xff) << 10)
90 #define IMM_I12(imm) (((sljit_ins)(imm)&0xfff) << 10)
91 #define IMM_I14(imm) (((sljit_ins)(imm)&0xfff3) << 10)
92 #define IMM_I16(imm) (((sljit_ins)(imm)&0xffff) << 10)
93 #define IMM_I20(imm) (((sljit_ins)(imm)&0xffffffff) >> 12 << 5)
94 #define IMM_I21(imm) ((((sljit_ins)(imm)&0xffff) << 10) | (((sljit_ins)(imm) >> 16) & 0x1f))
95 #define IMM_I26(imm) ((((sljit_ins)(imm)&0xffff) << 10) | (((sljit_ins)(imm) >> 16) & 0x3ff))
96 
97 #define OPC_I26(opc) ((sljit_ins)(opc) << 26)
98 #define OPC_1RI21(opc) ((sljit_ins)(opc) << 26)
99 #define OPC_2RI16(opc) ((sljit_ins)(opc) << 26)
100 #define OPC_2RI14(opc) ((sljit_ins)(opc) << 24)
101 #define OPC_2RI12(opc) ((sljit_ins)(opc) << 22)
102 #define OPC_2RI8(opc) ((sljit_ins)(opc) << 18)
103 #define OPC_4R(opc) ((sljit_ins)(opc) << 20)
104 #define OPC_3R(opc) ((sljit_ins)(opc) << 15)
105 #define OPC_2R(opc) ((sljit_ins)(opc) << 10)
106 #define OPC_1RI20(opc) ((sljit_ins)(opc) << 25)
107 
108 /* Arithmetic operation instructions */
109 #define ADD_W OPC_3R(0x20)
110 #define ADD_D OPC_3R(0x21)
111 #define SUB_W OPC_3R(0x22)
112 #define SUB_D OPC_3R(0x23)
113 #define ADDI_W OPC_2RI12(0xa)
114 #define ADDI_D OPC_2RI12(0xb)
115 #define ANDI OPC_2RI12(0xd)
116 #define ORI OPC_2RI12(0xe)
117 #define XORI OPC_2RI12(0xf)
118 #define ADDU16I_D OPC_2RI16(0x4)
119 #define LU12I_W OPC_1RI20(0xa)
120 #define LU32I_D OPC_1RI20(0xb)
121 #define LU52I_D OPC_2RI12(0xc)
122 #define SLT OPC_3R(0x24)
123 #define SLTU OPC_3R(0x25)
124 #define SLTI OPC_2RI12(0x8)
125 #define SLTUI OPC_2RI12(0x9)
126 #define PCADDI OPC_1RI20(0xc)
127 #define PCALAU12I OPC_1RI20(0xd)
128 #define PCADDU12I OPC_1RI20(0xe)
129 #define PCADDU18I OPC_1RI20(0xf)
130 #define NOR OPC_3R(0x28)
131 #define AND OPC_3R(0x29)
132 #define OR OPC_3R(0x2a)
133 #define XOR OPC_3R(0x2b)
134 #define ORN OPC_3R(0x2c)
135 #define ANDN OPC_3R(0x2d)
136 #define MUL_W OPC_3R(0x38)
137 #define MULH_W OPC_3R(0x39)
138 #define MULH_WU OPC_3R(0x3a)
139 #define MUL_D OPC_3R(0x3b)
140 #define MULH_D OPC_3R(0x3c)
141 #define MULH_DU OPC_3R(0x3d)
142 #define MULW_D_W OPC_3R(0x3e)
143 #define MULW_D_WU OPC_3R(0x3f)
144 #define DIV_W OPC_3R(0x40)
145 #define MOD_W OPC_3R(0x41)
146 #define DIV_WU OPC_3R(0x42)
147 #define MOD_WU OPC_3R(0x43)
148 #define DIV_D OPC_3R(0x44)
149 #define MOD_D OPC_3R(0x45)
150 #define DIV_DU OPC_3R(0x46)
151 #define MOD_DU OPC_3R(0x47)
152 
153 /* Bit-shift instructions */
154 #define SLL_W OPC_3R(0x2e)
155 #define SRL_W OPC_3R(0x2f)
156 #define SRA_W OPC_3R(0x30)
157 #define SLL_D OPC_3R(0x31)
158 #define SRL_D OPC_3R(0x32)
159 #define SRA_D OPC_3R(0x33)
160 #define ROTR_W OPC_3R(0x36)
161 #define ROTR_D OPC_3R(0x37)
162 #define SLLI_W OPC_3R(0x81)
163 #define SLLI_D ((sljit_ins)(0x41) << 16)
164 #define SRLI_W OPC_3R(0x89)
165 #define SRLI_D ((sljit_ins)(0x45) << 16)
166 #define SRAI_W OPC_3R(0x91)
167 #define SRAI_D ((sljit_ins)(0x49) << 16)
168 #define ROTRI_W OPC_3R(0x99)
169 #define ROTRI_D ((sljit_ins)(0x4d) << 16)
170 
171 /* Bit-manipulation instructions */
172 #define CLO_W OPC_2R(0x4)
173 #define CLZ_W OPC_2R(0x5)
174 #define CTO_W OPC_2R(0x6)
175 #define CTZ_W OPC_2R(0x7)
176 #define CLO_D OPC_2R(0x8)
177 #define CLZ_D OPC_2R(0x9)
178 #define CTO_D OPC_2R(0xa)
179 #define CTZ_D OPC_2R(0xb)
180 #define REVB_2H OPC_2R(0xc)
181 #define REVB_4H OPC_2R(0xd)
182 #define REVB_2W OPC_2R(0xe)
183 #define REVB_D OPC_2R(0xf)
184 #define REVH_2W OPC_2R(0x10)
185 #define REVH_D OPC_2R(0x11)
186 #define BITREV_4B OPC_2R(0x12)
187 #define BITREV_8B OPC_2R(0x13)
188 #define BITREV_W OPC_2R(0x14)
189 #define BITREV_D OPC_2R(0x15)
190 #define EXT_W_H OPC_2R(0x16)
191 #define EXT_W_B OPC_2R(0x17)
192 #define BSTRINS_W (0x1 << 22 | 1 << 21)
193 #define BSTRPICK_W (0x1 << 22 | 1 << 21 | 1 << 15)
194 #define BSTRINS_D (0x2 << 22)
195 #define BSTRPICK_D (0x3 << 22)
196 
197 /* Branch instructions */
198 #define BEQZ  OPC_1RI21(0x10)
199 #define BNEZ  OPC_1RI21(0x11)
200 #define JIRL  OPC_2RI16(0x13)
201 #define B     OPC_I26(0x14)
202 #define BL    OPC_I26(0x15)
203 #define BEQ   OPC_2RI16(0x16)
204 #define BNE   OPC_2RI16(0x17)
205 #define BLT   OPC_2RI16(0x18)
206 #define BGE   OPC_2RI16(0x19)
207 #define BLTU  OPC_2RI16(0x1a)
208 #define BGEU  OPC_2RI16(0x1b)
209 
210 /* Memory access instructions */
211 #define LD_B OPC_2RI12(0xa0)
212 #define LD_H OPC_2RI12(0xa1)
213 #define LD_W OPC_2RI12(0xa2)
214 #define LD_D OPC_2RI12(0xa3)
215 
216 #define ST_B OPC_2RI12(0xa4)
217 #define ST_H OPC_2RI12(0xa5)
218 #define ST_W OPC_2RI12(0xa6)
219 #define ST_D OPC_2RI12(0xa7)
220 
221 #define LD_BU OPC_2RI12(0xa8)
222 #define LD_HU OPC_2RI12(0xa9)
223 #define LD_WU OPC_2RI12(0xaa)
224 
225 #define LDX_B OPC_3R(0x7000)
226 #define LDX_H OPC_3R(0x7008)
227 #define LDX_W OPC_3R(0x7010)
228 #define LDX_D OPC_3R(0x7018)
229 
230 #define STX_B OPC_3R(0x7020)
231 #define STX_H OPC_3R(0x7028)
232 #define STX_W OPC_3R(0x7030)
233 #define STX_D OPC_3R(0x7038)
234 
235 #define LDX_BU OPC_3R(0x7040)
236 #define LDX_HU OPC_3R(0x7048)
237 #define LDX_WU OPC_3R(0x7050)
238 
239 #define PRELD OPC_2RI12(0xab)
240 
241 /* Atomic memory access instructions */
242 #define LL_W OPC_2RI14(0x20)
243 #define SC_W OPC_2RI14(0x21)
244 #define LL_D OPC_2RI14(0x22)
245 #define SC_D OPC_2RI14(0x23)
246 
247 /* LoongArch V1.10 Instructions */
248 #define AMCAS_B OPC_3R(0x70B0)
249 #define AMCAS_H OPC_3R(0x70B1)
250 #define AMCAS_W OPC_3R(0x70B2)
251 #define AMCAS_D OPC_3R(0x70B3)
252 
253 /* Other instructions */
254 #define BREAK OPC_3R(0x54)
255 #define DBGCALL OPC_3R(0x55)
256 #define SYSCALL OPC_3R(0x56)
257 
258 /* Basic Floating-Point Instructions */
259 /* Floating-Point Arithmetic Operation Instructions */
260 #define FADD_S  OPC_3R(0x201)
261 #define FADD_D  OPC_3R(0x202)
262 #define FSUB_S  OPC_3R(0x205)
263 #define FSUB_D  OPC_3R(0x206)
264 #define FMUL_S  OPC_3R(0x209)
265 #define FMUL_D  OPC_3R(0x20a)
266 #define FDIV_S  OPC_3R(0x20d)
267 #define FDIV_D  OPC_3R(0x20e)
268 #define FCMP_COND_S  OPC_4R(0xc1)
269 #define FCMP_COND_D  OPC_4R(0xc2)
270 #define FCOPYSIGN_S  OPC_3R(0x225)
271 #define FCOPYSIGN_D  OPC_3R(0x226)
272 #define FSEL  OPC_4R(0xd0)
273 #define FABS_S  OPC_2R(0x4501)
274 #define FABS_D  OPC_2R(0x4502)
275 #define FNEG_S  OPC_2R(0x4505)
276 #define FNEG_D  OPC_2R(0x4506)
277 #define FMOV_S  OPC_2R(0x4525)
278 #define FMOV_D  OPC_2R(0x4526)
279 
280 /* Floating-Point Conversion Instructions */
281 #define FCVT_S_D  OPC_2R(0x4646)
282 #define FCVT_D_S  OPC_2R(0x4649)
283 #define FTINTRZ_W_S  OPC_2R(0x46a1)
284 #define FTINTRZ_W_D  OPC_2R(0x46a2)
285 #define FTINTRZ_L_S  OPC_2R(0x46a9)
286 #define FTINTRZ_L_D  OPC_2R(0x46aa)
287 #define FFINT_S_W  OPC_2R(0x4744)
288 #define FFINT_S_L  OPC_2R(0x4746)
289 #define FFINT_D_W  OPC_2R(0x4748)
290 #define FFINT_D_L  OPC_2R(0x474a)
291 
292 /* Floating-Point Move Instructions */
293 #define FMOV_S  OPC_2R(0x4525)
294 #define FMOV_D  OPC_2R(0x4526)
295 #define MOVGR2FR_W  OPC_2R(0x4529)
296 #define MOVGR2FR_D  OPC_2R(0x452a)
297 #define MOVGR2FRH_W  OPC_2R(0x452b)
298 #define MOVFR2GR_S  OPC_2R(0x452d)
299 #define MOVFR2GR_D  OPC_2R(0x452e)
300 #define MOVFRH2GR_S  OPC_2R(0x452f)
301 #define MOVGR2FCSR  OPC_2R(0x4530)
302 #define MOVFCSR2GR  OPC_2R(0x4532)
303 #define MOVFR2CF  OPC_2R(0x4534)
304 #define MOVCF2FR  OPC_2R(0x4535)
305 #define MOVGR2CF  OPC_2R(0x4536)
306 #define MOVCF2GR  OPC_2R(0x4537)
307 
308 /* Floating-Point Branch Instructions */
309 #define BCEQZ OPC_I26(0x12)
310 #define BCNEZ OPC_I26(0x12)
311 
312 /* Floating-Point Common Memory Access Instructions */
313 #define FLD_S OPC_2RI12(0xac)
314 #define FLD_D OPC_2RI12(0xae)
315 #define FST_S OPC_2RI12(0xad)
316 #define FST_D OPC_2RI12(0xaf)
317 
318 #define FLDX_S OPC_3R(0x7060)
319 #define FLDX_D OPC_3R(0x7068)
320 #define FSTX_S OPC_3R(0x7070)
321 #define FSTX_D OPC_3R(0x7078)
322 
323 /* Vector Instructions */
324 
325 /* Vector Arithmetic Instructions */
326 #define VOR_V OPC_3R(0xe24d)
327 #define VXOR_V OPC_3R(0xe24e)
328 #define VAND_V OPC_3R(0xe24c)
329 #define VMSKLTZ OPC_2R(0x1ca710)
330 
331 /* Vector Memory Access Instructions */
332 #define VLD OPC_2RI12(0xb0)
333 #define VST OPC_2RI12(0xb1)
334 #define XVLD OPC_2RI12(0xb2)
335 #define XVST OPC_2RI12(0xb3)
336 #define VSTELM OPC_2RI8(0xc40)
337 
338 /* Vector Float Conversion Instructions */
339 #define VFCVTL_D_S OPC_2R(0x1ca77c)
340 
341 /* Vector Bit Manipulate Instructions */
342 #define VSLLWIL OPC_2R(0x1cc200)
343 
344 /* Vector Move And Shuffle Instructions */
345 #define VLDREPL OPC_2R(0xc0000)
346 #define VINSGR2VR OPC_2R(0x1cbac0)
347 #define VPICKVE2GR_U OPC_2R(0x1cbce0)
348 #define VREPLGR2VR OPC_2R(0x1ca7c0)
349 #define VREPLVE OPC_3R(0xe244)
350 #define VREPLVEI OPC_2R(0x1cbde0)
351 #define XVPERMI OPC_2RI8(0x1dfa)
352 
353 #define I12_MAX (0x7ff)
354 #define I12_MIN (-0x800)
355 #define BRANCH16_MAX (0x7fff << 2)
356 #define BRANCH16_MIN (-(0x8000 << 2))
357 #define BRANCH21_MAX (0xfffff << 2)
358 #define BRANCH21_MIN (-(0x100000 << 2))
359 #define JUMP_MAX (0x1ffffff << 2)
360 #define JUMP_MIN (-(0x2000000 << 2))
361 #define JIRL_MAX (0x7fff << 2)
362 #define JIRL_MIN (-(0x8000 << 2))
363 
364 #define S32_MAX		(0x7fffffffl)
365 #define S32_MIN		(-0x80000000l)
366 #define S52_MAX		(0x7ffffffffffffl)
367 
368 #define INST(inst, type) ((sljit_ins)((type & SLJIT_32) ? inst##_W : inst##_D))
369 
370 /* LoongArch CPUCFG register for feature detection */
371 #define LOONGARCH_CFG2			0x02
372 #define LOONGARCH_CFG2_LAMCAS	(1 << 28)
373 
374 static sljit_u32 cfg2_feature_list = 0;
375 
376 /* According to Software Development and Build Convention for LoongArch Architectures,
377 +   the status of LSX and LASX extension must be checked through HWCAP */
378 #include <sys/auxv.h>
379 
380 #define LOONGARCH_HWCAP_LSX		(1 << 4)
381 #define LOONGARCH_HWCAP_LASX	(1 << 5)
382 
383 static sljit_u32 hwcap_feature_list = 0;
384 
385 /* Feature type */
386 #define GET_CFG2 	0
387 #define GET_HWCAP	1
388 
get_cpu_features(sljit_u32 feature_type)389 static SLJIT_INLINE sljit_u32 get_cpu_features(sljit_u32 feature_type)
390  {
391  	if (cfg2_feature_list == 0)
392  		__asm__ ("cpucfg %0, %1" : "+&r"(cfg2_feature_list) : "r"(LOONGARCH_CFG2));
393 	if (hwcap_feature_list == 0)
394 		hwcap_feature_list = (sljit_u32)getauxval(AT_HWCAP);
395 
396 	return feature_type ? hwcap_feature_list : cfg2_feature_list;
397  }
398 
push_inst(struct sljit_compiler * compiler,sljit_ins ins)399 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
400 {
401 	sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
402 	FAIL_IF(!ptr);
403 	*ptr = ins;
404 	compiler->size++;
405 	return SLJIT_SUCCESS;
406 }
407 
detect_jump_type(struct sljit_jump * jump,sljit_ins * code,sljit_sw executable_offset)408 static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code, sljit_sw executable_offset)
409 {
410 	sljit_sw diff;
411 	sljit_uw target_addr;
412 	sljit_ins *inst;
413 
414 	inst = (sljit_ins *)jump->addr;
415 
416 	if (jump->flags & SLJIT_REWRITABLE_JUMP)
417 		goto exit;
418 
419 	if (jump->flags & JUMP_ADDR)
420 		target_addr = jump->u.target;
421 	else {
422 		SLJIT_ASSERT(jump->u.label != NULL);
423 		target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset;
424 	}
425 
426 	diff = (sljit_sw)target_addr - (sljit_sw)inst - executable_offset;
427 
428 	if (jump->flags & IS_COND) {
429 		diff += SSIZE_OF(ins);
430 
431 		if (diff >= BRANCH16_MIN && diff <= BRANCH16_MAX) {
432 			inst--;
433 			inst[0] = (inst[0] & 0xfc0003ff) ^ 0x4000000;
434 			jump->flags |= PATCH_B;
435 			jump->addr = (sljit_uw)inst;
436 			return inst;
437 		}
438 
439 		diff -= SSIZE_OF(ins);
440 	}
441 
442 	if (diff >= JUMP_MIN && diff <= JUMP_MAX) {
443 		if (jump->flags & IS_COND) {
444 			inst[-1] |= (sljit_ins)IMM_I16(2);
445 		}
446 
447 		jump->flags |= PATCH_J;
448 		return inst;
449 	}
450 
451 	if (diff >= S32_MIN && diff <= S32_MAX) {
452 		if (jump->flags & IS_COND)
453 			inst[-1] |= (sljit_ins)IMM_I16(3);
454 
455 		jump->flags |= PATCH_REL32;
456 		inst[1] = inst[0];
457 		return inst + 1;
458 	}
459 
460 	if (target_addr <= (sljit_uw)S32_MAX) {
461 		if (jump->flags & IS_COND)
462 			inst[-1] |= (sljit_ins)IMM_I16(3);
463 
464 		jump->flags |= PATCH_ABS32;
465 		inst[1] = inst[0];
466 		return inst + 1;
467 	}
468 
469 	if (target_addr <= S52_MAX) {
470 		if (jump->flags & IS_COND)
471 			inst[-1] |= (sljit_ins)IMM_I16(4);
472 
473 		jump->flags |= PATCH_ABS52;
474 		inst[2] = inst[0];
475 		return inst + 2;
476 	}
477 
478 exit:
479 	if (jump->flags & IS_COND)
480 		inst[-1] |= (sljit_ins)IMM_I16(5);
481 	inst[3] = inst[0];
482 	return inst + 3;
483 }
484 
mov_addr_get_length(struct sljit_jump * jump,sljit_ins * code_ptr,sljit_ins * code,sljit_sw executable_offset)485 static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
486 {
487 	sljit_uw addr;
488 	sljit_sw diff;
489 	SLJIT_UNUSED_ARG(executable_offset);
490 
491 	SLJIT_ASSERT(jump->flags < ((sljit_uw)6 << JUMP_SIZE_SHIFT));
492 	if (jump->flags & JUMP_ADDR)
493 		addr = jump->u.target;
494 	else
495 		addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset);
496 
497 	diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
498 
499 	if (diff >= S32_MIN && diff <= S32_MAX) {
500 		SLJIT_ASSERT(jump->flags >= ((sljit_uw)1 << JUMP_SIZE_SHIFT));
501 		jump->flags |= PATCH_REL32;
502 		return 1;
503 	}
504 
505 	if (addr <= S32_MAX) {
506 		SLJIT_ASSERT(jump->flags >= ((sljit_uw)1 << JUMP_SIZE_SHIFT));
507 		jump->flags |= PATCH_ABS32;
508 		return 1;
509 	}
510 
511 	if (addr <= S52_MAX) {
512 		SLJIT_ASSERT(jump->flags >= ((sljit_uw)2 << JUMP_SIZE_SHIFT));
513 		jump->flags |= PATCH_ABS52;
514 		return 2;
515 	}
516 
517 	SLJIT_ASSERT(jump->flags >= ((sljit_uw)3 << JUMP_SIZE_SHIFT));
518 	return 3;
519 }
520 
load_addr_to_reg(struct sljit_jump * jump,sljit_sw executable_offset)521 static SLJIT_INLINE void load_addr_to_reg(struct sljit_jump *jump, sljit_sw executable_offset)
522 {
523 	sljit_uw flags = jump->flags;
524 	sljit_uw addr = (flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr;
525 	sljit_ins *ins = (sljit_ins*)jump->addr;
526 	sljit_u32 reg = (flags & JUMP_MOV_ADDR) ? *ins : TMP_REG1;
527 	SLJIT_UNUSED_ARG(executable_offset);
528 
529 	if (flags & PATCH_REL32) {
530 		addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(ins, executable_offset);
531 
532 		SLJIT_ASSERT((sljit_sw)addr >= S32_MIN && (sljit_sw)addr <= S32_MAX);
533 
534 		if ((addr & 0x800) != 0)
535 			addr += 0x1000;
536 
537 		ins[0] = PCADDU12I | RD(reg) | IMM_I20(addr);
538 
539 		if (!(flags & JUMP_MOV_ADDR)) {
540 			SLJIT_ASSERT((ins[1] & OPC_2RI16(0x3f)) == JIRL);
541 			ins[1] = (ins[1] & (OPC_2RI16(0x3f) | 0x3ff)) | IMM_I16((addr & 0xfff) >> 2);
542 		} else
543 			ins[1] = ADDI_D | RD(reg) | RJ(reg) | IMM_I12(addr);
544 		return;
545 	}
546 
547 	if (flags & PATCH_ABS32) {
548 		SLJIT_ASSERT(addr <= S32_MAX);
549 		ins[0] = LU12I_W | RD(reg) | (sljit_ins)(((addr & 0xffffffff) >> 12) << 5);
550 	} else if (flags & PATCH_ABS52) {
551 		ins[0] = LU12I_W | RD(reg) | (sljit_ins)(((addr & 0xffffffff) >> 12) << 5);
552 		ins[1] = LU32I_D | RD(reg) | (sljit_ins)(((addr >> 32) & 0xfffff) << 5);
553 		ins += 1;
554 	} else {
555 		ins[0] = LU12I_W | RD(reg) | (sljit_ins)(((addr & 0xffffffff) >> 12) << 5);
556 		ins[1] = LU32I_D | RD(reg) | (sljit_ins)(((addr >> 32) & 0xfffff) << 5);
557 		ins[2] = LU52I_D | RD(reg) | RJ(reg) | IMM_I12(addr >> 52);
558 		ins += 2;
559 	}
560 
561 	if (!(flags & JUMP_MOV_ADDR)) {
562 		SLJIT_ASSERT((ins[1] & OPC_2RI16(0x3f)) == JIRL);
563 		ins[1] = (ins[1] & (OPC_2RI16(0x3f) | 0x3ff)) | IMM_I16((addr & 0xfff) >> 2);
564 	} else
565 		ins[1] = ORI | RD(reg) | RJ(reg) | IMM_I12(addr);
566 }
567 
reduce_code_size(struct sljit_compiler * compiler)568 static void reduce_code_size(struct sljit_compiler *compiler)
569 {
570 	struct sljit_label *label;
571 	struct sljit_jump *jump;
572 	struct sljit_const *const_;
573 	SLJIT_NEXT_DEFINE_TYPES;
574 	sljit_uw total_size;
575 	sljit_uw size_reduce = 0;
576 	sljit_sw diff;
577 
578 	label = compiler->labels;
579 	jump = compiler->jumps;
580 	const_ = compiler->consts;
581 
582 	SLJIT_NEXT_INIT_TYPES();
583 
584 	while (1) {
585 		SLJIT_GET_NEXT_MIN();
586 
587 		if (next_min_addr == SLJIT_MAX_ADDRESS)
588 			break;
589 
590 		if (next_min_addr == next_label_size) {
591 			label->size -= size_reduce;
592 
593 			label = label->next;
594 			next_label_size = SLJIT_GET_NEXT_SIZE(label);
595 		}
596 
597 		if (next_min_addr == next_const_addr) {
598 			const_->addr -= size_reduce;
599 			const_ = const_->next;
600 			next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);
601 			continue;
602 		}
603 
604 		if (next_min_addr != next_jump_addr)
605 			continue;
606 
607 		jump->addr -= size_reduce;
608 		if (!(jump->flags & JUMP_MOV_ADDR)) {
609 			total_size = JUMP_MAX_SIZE;
610 
611 			if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) {
612 				if (jump->flags & JUMP_ADDR) {
613 					if (jump->u.target <= S32_MAX)
614 							total_size = 2;
615 					else if (jump->u.target <= S52_MAX)
616 							total_size = 3;
617 				} else {
618 					/* Unit size: instruction. */
619 					diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr;
620 
621 					if ((jump->flags & IS_COND) && (diff + 1) <= (BRANCH16_MAX / SSIZE_OF(ins)) && (diff + 1) >= (BRANCH16_MIN / SSIZE_OF(ins)))
622 						total_size = 0;
623 					else if (diff >= (JUMP_MIN / SSIZE_OF(ins)) && diff <= (JUMP_MAX / SSIZE_OF(ins)))
624 						total_size = 1;
625 					else if (diff >= (S32_MIN / SSIZE_OF(ins)) && diff <= (S32_MAX / SSIZE_OF(ins)))
626 						total_size = 2;
627 				}
628 			}
629 
630 			size_reduce += JUMP_MAX_SIZE - total_size;
631 			jump->flags |= total_size << JUMP_SIZE_SHIFT;
632 		} else {
633 			total_size = 3;
634 
635 			if (!(jump->flags & JUMP_ADDR)) {
636 				/* Real size minus 1. Unit size: instruction. */
637 				diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr;
638 
639 				if (diff >= (S32_MIN / SSIZE_OF(ins)) && diff <= (S32_MAX / SSIZE_OF(ins)))
640 					total_size = 1;
641 			} else if (jump->u.target < S32_MAX)
642 				total_size = 1;
643 			else if (jump->u.target <= S52_MAX)
644 				total_size = 2;
645 
646 			size_reduce += 3 - total_size;
647 			jump->flags |= total_size << JUMP_SIZE_SHIFT;
648 		}
649 
650 		jump = jump->next;
651 		next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
652 	}
653 
654 	compiler->size -= size_reduce;
655 }
656 
sljit_generate_code(struct sljit_compiler * compiler,sljit_s32 options,void * exec_allocator_data)657 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data)
658 {
659 	struct sljit_memory_fragment *buf;
660 	sljit_ins *code;
661 	sljit_ins *code_ptr;
662 	sljit_ins *buf_ptr;
663 	sljit_ins *buf_end;
664 	sljit_uw word_count;
665 	SLJIT_NEXT_DEFINE_TYPES;
666 	sljit_sw executable_offset;
667 	sljit_uw addr;
668 
669 	struct sljit_label *label;
670 	struct sljit_jump *jump;
671 	struct sljit_const *const_;
672 
673 	CHECK_ERROR_PTR();
674 	CHECK_PTR(check_sljit_generate_code(compiler));
675 
676 	reduce_code_size(compiler);
677 
678 	code = (sljit_ins*)allocate_executable_memory(compiler->size * sizeof(sljit_ins), options, exec_allocator_data, &executable_offset);
679 	PTR_FAIL_WITH_EXEC_IF(code);
680 
681 	reverse_buf(compiler);
682 	buf = compiler->buf;
683 
684 	code_ptr = code;
685 	word_count = 0;
686 	label = compiler->labels;
687 	jump = compiler->jumps;
688 	const_ = compiler->consts;
689 	SLJIT_NEXT_INIT_TYPES();
690 	SLJIT_GET_NEXT_MIN();
691 
692 	do {
693 		buf_ptr = (sljit_ins*)buf->memory;
694 		buf_end = buf_ptr + (buf->used_size >> 2);
695 		do {
696 			*code_ptr = *buf_ptr++;
697 			if (next_min_addr == word_count) {
698 				SLJIT_ASSERT(!label || label->size >= word_count);
699 				SLJIT_ASSERT(!jump || jump->addr >= word_count);
700 				SLJIT_ASSERT(!const_ || const_->addr >= word_count);
701 
702 				/* These structures are ordered by their address. */
703 				if (next_min_addr == next_label_size) {
704 					label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
705 					label->size = (sljit_uw)(code_ptr - code);
706 					label = label->next;
707 					next_label_size = SLJIT_GET_NEXT_SIZE(label);
708 				}
709 
710 				if (next_min_addr == next_jump_addr) {
711 					if (!(jump->flags & JUMP_MOV_ADDR)) {
712 						word_count = word_count - 1 + (jump->flags >> JUMP_SIZE_SHIFT);
713 						jump->addr = (sljit_uw)code_ptr;
714 						code_ptr = detect_jump_type(jump, code, executable_offset);
715 						SLJIT_ASSERT((jump->flags & PATCH_B) || ((sljit_uw)code_ptr - jump->addr < (jump->flags >> JUMP_SIZE_SHIFT) * sizeof(sljit_ins)));
716 					} else {
717 						word_count += jump->flags >> JUMP_SIZE_SHIFT;
718 						addr = (sljit_uw)code_ptr;
719 						code_ptr += mov_addr_get_length(jump, code_ptr, code, executable_offset);
720 						jump->addr = addr;
721 					}
722 					jump = jump->next;
723 					next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
724 				} else if (next_min_addr == next_const_addr) {
725 					const_->addr = (sljit_uw)code_ptr;
726 					const_ = const_->next;
727 					next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);
728 				}
729 
730 				SLJIT_GET_NEXT_MIN();
731 			}
732 			code_ptr++;
733 			word_count++;
734 		} while (buf_ptr < buf_end);
735 
736 		buf = buf->next;
737 	} while (buf);
738 
739 	if (label && label->size == word_count) {
740 		label->u.addr = (sljit_uw)code_ptr;
741 		label->size = (sljit_uw)(code_ptr - code);
742 		label = label->next;
743 	}
744 
745 	SLJIT_ASSERT(!label);
746 	SLJIT_ASSERT(!jump);
747 	SLJIT_ASSERT(!const_);
748 	SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
749 
750 	jump = compiler->jumps;
751 	while (jump) {
752 		do {
753 			if (!(jump->flags & (PATCH_B | PATCH_J)) || (jump->flags & JUMP_MOV_ADDR)) {
754 				load_addr_to_reg(jump, executable_offset);
755 				break;
756 			}
757 
758 			addr = (jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr;
759 			buf_ptr = (sljit_ins *)jump->addr;
760 			addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset);
761 
762 			if (jump->flags & PATCH_B) {
763 				SLJIT_ASSERT((sljit_sw)addr >= BRANCH16_MIN && (sljit_sw)addr <= BRANCH16_MAX);
764 				buf_ptr[0] |= (sljit_ins)IMM_I16(addr >> 2);
765 				break;
766 			}
767 
768 			SLJIT_ASSERT((sljit_sw)addr >= JUMP_MIN && (sljit_sw)addr <= JUMP_MAX);
769 			if (jump->flags & IS_CALL)
770 				buf_ptr[0] = BL | (sljit_ins)IMM_I26(addr >> 2);
771 			else
772 				buf_ptr[0] = B | (sljit_ins)IMM_I26(addr >> 2);
773 		} while (0);
774 		jump = jump->next;
775 	}
776 
777 	compiler->error = SLJIT_ERR_COMPILED;
778 	compiler->executable_offset = executable_offset;
779 	compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins);
780 
781 	code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
782 	code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
783 
784 	SLJIT_CACHE_FLUSH(code, code_ptr);
785 	SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
786 	return code;
787 }
788 
sljit_has_cpu_feature(sljit_s32 feature_type)789 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
790 {
791 	switch (feature_type)
792 	{
793 	case SLJIT_HAS_FPU:
794 #ifdef SLJIT_IS_FPU_AVAILABLE
795 		return (SLJIT_IS_FPU_AVAILABLE) != 0;
796 #else
797 		/* Available by default. */
798 		return 1;
799 #endif
800 
801 	case SLJIT_HAS_LASX:
802 		return (LOONGARCH_HWCAP_LASX & get_cpu_features(GET_HWCAP));
803 
804 	case SLJIT_HAS_SIMD:
805 		return (LOONGARCH_HWCAP_LSX & get_cpu_features(GET_HWCAP));
806 
807 	case SLJIT_HAS_ATOMIC:
808 		return (LOONGARCH_CFG2_LAMCAS & get_cpu_features(GET_CFG2));
809 
810 	case SLJIT_HAS_CLZ:
811 	case SLJIT_HAS_CTZ:
812 	case SLJIT_HAS_REV:
813 	case SLJIT_HAS_ROT:
814 	case SLJIT_HAS_PREFETCH:
815 	case SLJIT_HAS_COPY_F32:
816 	case SLJIT_HAS_COPY_F64:
817 		return 1;
818 
819 	default:
820 		return 0;
821 	}
822 }
823 
sljit_cmp_info(sljit_s32 type)824 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
825 {
826 	SLJIT_UNUSED_ARG(type);
827 
828 	return 0;
829 }
830 
831 /* --------------------------------------------------------------------- */
832 /*  Entry, exit                                                          */
833 /* --------------------------------------------------------------------- */
834 
835 /* Creates an index in data_transfer_insts array. */
836 #define LOAD_DATA	0x01
837 #define WORD_DATA	0x00
838 #define BYTE_DATA	0x02
839 #define HALF_DATA	0x04
840 #define INT_DATA	0x06
841 #define SIGNED_DATA	0x08
842 /* Separates integer and floating point registers */
843 #define GPR_REG		0x0f
844 #define DOUBLE_DATA	0x10
845 #define SINGLE_DATA	0x12
846 
847 #define MEM_MASK	0x1f
848 
849 #define ARG_TEST	0x00020
850 #define ALT_KEEP_CACHE	0x00040
851 #define CUMULATIVE_OP	0x00080
852 #define IMM_OP		0x00100
853 #define MOVE_OP		0x00200
854 #define SRC2_IMM	0x00400
855 
856 #define UNUSED_DEST	0x00800
857 #define REG_DEST	0x01000
858 #define REG1_SOURCE	0x02000
859 #define REG2_SOURCE	0x04000
860 #define SLOW_SRC1	0x08000
861 #define SLOW_SRC2	0x10000
862 #define SLOW_DEST	0x20000
863 #define MEM_USE_TMP2	0x40000
864 
865 #define STACK_STORE	ST_D
866 #define STACK_LOAD	LD_D
867 
load_immediate(struct sljit_compiler * compiler,sljit_s32 dst_r,sljit_sw imm)868 static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r, sljit_sw imm)
869 {
870 	if (imm <= I12_MAX && imm >= I12_MIN)
871 		return push_inst(compiler, ADDI_D | RD(dst_r) | RJ(TMP_ZERO) | IMM_I12(imm));
872 
873 	if (imm <= 0x7fffffffl && imm >= -0x80000000l) {
874 		FAIL_IF(push_inst(compiler, LU12I_W | RD(dst_r) | (sljit_ins)(((imm & 0xffffffff) >> 12) << 5)));
875 		return push_inst(compiler, ORI | RD(dst_r) | RJ(dst_r) | IMM_I12(imm));
876 	} else if (imm <= 0x7ffffffffffffl && imm >= -0x8000000000000l) {
877 		FAIL_IF(push_inst(compiler, LU12I_W | RD(dst_r) | (sljit_ins)(((imm & 0xffffffff) >> 12) << 5)));
878 		FAIL_IF(push_inst(compiler, ORI | RD(dst_r) | RJ(dst_r) | IMM_I12(imm)));
879 		return push_inst(compiler, LU32I_D | RD(dst_r) | (sljit_ins)(((imm >> 32) & 0xfffff) << 5));
880 	}
881 	FAIL_IF(push_inst(compiler, LU12I_W | RD(dst_r) | (sljit_ins)(((imm & 0xffffffff) >> 12) << 5)));
882 	FAIL_IF(push_inst(compiler, ORI | RD(dst_r) | RJ(dst_r) | IMM_I12(imm)));
883 	FAIL_IF(push_inst(compiler, LU32I_D | RD(dst_r) | (sljit_ins)(((imm >> 32) & 0xfffff) << 5)));
884 	return push_inst(compiler, LU52I_D | RD(dst_r) | RJ(dst_r) | IMM_I12(imm >> 52));
885 }
886 
887 #define STACK_MAX_DISTANCE (-I12_MIN)
888 
889 static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw);
890 
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)891 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
892 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
893 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
894 {
895 	sljit_s32 i, tmp, offset;
896 	sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
897 
898 	CHECK_ERROR();
899 	CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
900 	set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
901 
902 	local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1);
903 	local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);
904 
905 	local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf;
906 	compiler->local_size = local_size;
907 
908 	if (local_size <= STACK_MAX_DISTANCE) {
909 		/* Frequent case. */
910 		FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(-local_size)));
911 		offset = local_size - SSIZE_OF(sw);
912 		local_size = 0;
913 	} else {
914 		FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(STACK_MAX_DISTANCE)));
915 		local_size -= STACK_MAX_DISTANCE;
916 
917 		if (local_size > STACK_MAX_DISTANCE)
918 			FAIL_IF(load_immediate(compiler, TMP_REG1, local_size));
919 		offset = STACK_MAX_DISTANCE - SSIZE_OF(sw);
920 	}
921 
922 	FAIL_IF(push_inst(compiler, STACK_STORE | RD(RETURN_ADDR_REG) | RJ(SLJIT_SP) | IMM_I12(offset)));
923 
924 	tmp = SLJIT_S0 - saveds;
925 	for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) {
926 		offset -= SSIZE_OF(sw);
927 		FAIL_IF(push_inst(compiler, STACK_STORE | RD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
928 	}
929 
930 	for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
931 		offset -= SSIZE_OF(sw);
932 		FAIL_IF(push_inst(compiler, STACK_STORE | RD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
933 	}
934 
935 	tmp = SLJIT_FS0 - fsaveds;
936 	for (i = SLJIT_FS0; i > tmp; i--) {
937 		offset -= SSIZE_OF(f64);
938 		FAIL_IF(push_inst(compiler, FST_D | FRD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
939 	}
940 
941 	for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
942 		offset -= SSIZE_OF(f64);
943 		FAIL_IF(push_inst(compiler, FST_D | FRD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
944 	}
945 
946 	if (local_size > STACK_MAX_DISTANCE)
947 		FAIL_IF(push_inst(compiler, SUB_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | RK(TMP_REG1)));
948 	else if (local_size > 0)
949 		FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(-local_size)));
950 
951 	if (options & SLJIT_ENTER_REG_ARG)
952 		return SLJIT_SUCCESS;
953 
954 	arg_types >>= SLJIT_ARG_SHIFT;
955 	saved_arg_count = 0;
956 	tmp = SLJIT_R0;
957 
958 	while (arg_types > 0) {
959 		if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
960 			if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
961 				FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_S0 - saved_arg_count) | RJ(tmp) | IMM_I12(0)));
962 				saved_arg_count++;
963 			}
964 			tmp++;
965 		}
966 
967 		arg_types >>= SLJIT_ARG_SHIFT;
968 	}
969 
970 	return SLJIT_SUCCESS;
971 }
972 
973 #undef STACK_MAX_DISTANCE
974 
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)975 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
976 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
977 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
978 {
979 	CHECK_ERROR();
980 	CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
981 	set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
982 
983 	local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1);
984 	local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);
985 
986 	compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf;
987 
988 	return SLJIT_SUCCESS;
989 }
990 
991 #define STACK_MAX_DISTANCE (-I12_MIN - 16)
992 
emit_stack_frame_release(struct sljit_compiler * compiler,sljit_s32 is_return_to)993 static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to)
994 {
995 	sljit_s32 i, tmp, offset;
996 	sljit_s32 local_size = compiler->local_size;
997 
998 	if (local_size > STACK_MAX_DISTANCE) {
999 		local_size -= STACK_MAX_DISTANCE;
1000 
1001 		if (local_size > STACK_MAX_DISTANCE) {
1002 			FAIL_IF(load_immediate(compiler, TMP_REG2, local_size));
1003 			FAIL_IF(push_inst(compiler, ADD_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | RK(TMP_REG2)));
1004 		} else
1005 			FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(local_size)));
1006 
1007 		local_size = STACK_MAX_DISTANCE;
1008 	}
1009 
1010 	SLJIT_ASSERT(local_size > 0);
1011 
1012 	offset = local_size - SSIZE_OF(sw);
1013 	if (!is_return_to)
1014 		FAIL_IF(push_inst(compiler, STACK_LOAD | RD(RETURN_ADDR_REG) | RJ(SLJIT_SP) | IMM_I12(offset)));
1015 
1016 	tmp = SLJIT_S0 - compiler->saveds;
1017 	for (i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options); i > tmp; i--) {
1018 		offset -= SSIZE_OF(sw);
1019 		FAIL_IF(push_inst(compiler, STACK_LOAD | RD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
1020 	}
1021 
1022 	for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
1023 		offset -= SSIZE_OF(sw);
1024 		FAIL_IF(push_inst(compiler, STACK_LOAD | RD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
1025 	}
1026 
1027 	tmp = SLJIT_FS0 - compiler->fsaveds;
1028 	for (i = SLJIT_FS0; i > tmp; i--) {
1029 		offset -= SSIZE_OF(f64);
1030 		FAIL_IF(push_inst(compiler, FLD_D | FRD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
1031 	}
1032 
1033 	for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
1034 		offset -= SSIZE_OF(f64);
1035 		FAIL_IF(push_inst(compiler, FLD_D | FRD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
1036 	}
1037 
1038 	return push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(local_size));
1039 }
1040 
1041 #undef STACK_MAX_DISTANCE
1042 
sljit_emit_return_void(struct sljit_compiler * compiler)1043 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
1044 {
1045 	CHECK_ERROR();
1046 	CHECK(check_sljit_emit_return_void(compiler));
1047 
1048 	FAIL_IF(emit_stack_frame_release(compiler, 0));
1049 	return push_inst(compiler, JIRL | RD(TMP_ZERO) | RJ(RETURN_ADDR_REG) | IMM_I12(0));
1050 }
1051 
sljit_emit_return_to(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)1052 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
1053 	sljit_s32 src, sljit_sw srcw)
1054 {
1055 	CHECK_ERROR();
1056 	CHECK(check_sljit_emit_return_to(compiler, src, srcw));
1057 
1058 	if (src & SLJIT_MEM) {
1059 		ADJUST_LOCAL_OFFSET(src, srcw);
1060 		FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw));
1061 		src = TMP_REG1;
1062 		srcw = 0;
1063 	} else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
1064 		FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(src) | IMM_I12(0)));
1065 		src = TMP_REG1;
1066 		srcw = 0;
1067 	}
1068 
1069 	FAIL_IF(emit_stack_frame_release(compiler, 1));
1070 
1071 	SLJIT_SKIP_CHECKS(compiler);
1072 	return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1073 }
1074 
1075 /* --------------------------------------------------------------------- */
1076 /*  Operators                                                            */
1077 /* --------------------------------------------------------------------- */
1078 
1079 static const sljit_ins data_transfer_insts[16 + 4] = {
1080 /* u w s */ ST_D /* st.d */,
1081 /* u w l */ LD_D /* ld.d */,
1082 /* u b s */ ST_B /* st.b */,
1083 /* u b l */ LD_BU /* ld.bu */,
1084 /* u h s */ ST_H /* st.h */,
1085 /* u h l */ LD_HU /* ld.hu */,
1086 /* u i s */ ST_W /* st.w */,
1087 /* u i l */ LD_WU /* ld.wu */,
1088 
1089 /* s w s */ ST_D /* st.d */,
1090 /* s w l */ LD_D /* ld.d */,
1091 /* s b s */ ST_B /* st.b */,
1092 /* s b l */ LD_B /* ld.b */,
1093 /* s h s */ ST_H /* st.h */,
1094 /* s h l */ LD_H /* ld.h */,
1095 /* s i s */ ST_W /* st.w */,
1096 /* s i l */ LD_W /* ld.w */,
1097 
1098 /* d   s */ FST_D /* fst.d */,
1099 /* d   l */ FLD_D /* fld.d */,
1100 /* s   s */ FST_S /* fst.s */,
1101 /* s   l */ FLD_S /* fld.s */,
1102 };
1103 
1104 static const sljit_ins data_transfer_insts_x[16 + 4] = {
1105 /* u w s */ STX_D /* stx.d */,
1106 /* u w l */ LDX_D /* ldx.d */,
1107 /* u b s */ STX_B /* stx.b */,
1108 /* u b l */ LDX_BU /* ldx.bu */,
1109 /* u h s */ STX_H /* stx.h */,
1110 /* u h l */ LDX_HU /* ldx.hu */,
1111 /* u i s */ STX_W /* stx.w */,
1112 /* u i l */ LDX_WU /* ldx.wu */,
1113 
1114 /* s w s */ STX_D /* stx.d */,
1115 /* s w l */ LDX_D /* ldx.d */,
1116 /* s b s */ STX_B /* stx.b */,
1117 /* s b l */ LDX_B /* ldx.b */,
1118 /* s h s */ STX_H /* stx.h */,
1119 /* s h l */ LDX_H /* ldx.h */,
1120 /* s i s */ STX_W /* stx.w */,
1121 /* s i l */ LDX_W /* ldx.w */,
1122 
1123 /* d   s */ FSTX_D /* fstx.d */,
1124 /* d   l */ FLDX_D /* fldx.d */,
1125 /* s   s */ FSTX_S /* fstx.s */,
1126 /* s   l */ FLDX_S /* fldx.s */,
1127 };
1128 
push_mem_inst(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw)1129 static sljit_s32 push_mem_inst(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
1130 {
1131 	sljit_ins ins;
1132 	sljit_s32 base = arg & REG_MASK;
1133 
1134 	SLJIT_ASSERT(arg & SLJIT_MEM);
1135 
1136 	if (arg & OFFS_REG_MASK) {
1137 		sljit_s32 offs = OFFS_REG(arg);
1138 
1139 		SLJIT_ASSERT(!argw);
1140 		ins = data_transfer_insts_x[flags & MEM_MASK] |
1141 			  ((flags & MEM_MASK) <= GPR_REG ? RD(reg) : FRD(reg)) |
1142 			  RJ(base) | RK(offs);
1143 	} else {
1144 		SLJIT_ASSERT(argw <= 0xfff && argw >= I12_MIN);
1145 
1146 		ins = data_transfer_insts[flags & MEM_MASK] |
1147 			  ((flags & MEM_MASK) <= GPR_REG ? RD(reg) : FRD(reg)) |
1148 			  RJ(base) | IMM_I12(argw);
1149 	}
1150 	return push_inst(compiler, ins);
1151 }
1152 
1153 /* Can perform an operation using at most 1 instruction. */
getput_arg_fast(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw)1154 static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
1155 {
1156 	SLJIT_ASSERT(arg & SLJIT_MEM);
1157 
1158 	/* argw == 0 (ldx/stx rd, rj, rk) can be used.
1159 	 * argw in [-2048, 2047] (ld/st rd, rj, imm) can be used. */
1160 	if (!argw || (!(arg & OFFS_REG_MASK) && (argw <= I12_MAX && argw >= I12_MIN))) {
1161 		/* Works for both absolute and relative addresses. */
1162 		if (SLJIT_UNLIKELY(flags & ARG_TEST))
1163 			return 1;
1164 
1165 		FAIL_IF(push_mem_inst(compiler, flags, reg, arg, argw));
1166 		return -1;
1167 	}
1168 	return 0;
1169 }
1170 
1171 #define TO_ARGW_HI(argw) (((argw) & ~0xfff) + (((argw) & 0x800) ? 0x1000 : 0))
1172 
1173 /* See getput_arg below.
1174    Note: can_cache is called only for binary operators. */
can_cache(sljit_s32 arg,sljit_sw argw,sljit_s32 next_arg,sljit_sw next_argw)1175 static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
1176 {
1177 	SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM));
1178 
1179 	if (arg & OFFS_REG_MASK)
1180 		return 0;
1181 
1182 	if (arg == next_arg) {
1183 		if (((next_argw - argw) <= I12_MAX && (next_argw - argw) >= I12_MIN)
1184 				|| TO_ARGW_HI(argw) == TO_ARGW_HI(next_argw))
1185 			return 1;
1186 		return 0;
1187 	}
1188 
1189 	return 0;
1190 }
1191 
1192 /* Emit the necessary instructions. See can_cache above. */
getput_arg(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw,sljit_s32 next_arg,sljit_sw next_argw)1193 static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
1194 {
1195 	sljit_s32 base = arg & REG_MASK;
1196 	sljit_s32 tmp_r = (flags & MEM_USE_TMP2) ? TMP_REG2 : TMP_REG1;
1197 	sljit_sw offset;
1198 
1199 	SLJIT_ASSERT(arg & SLJIT_MEM);
1200 	if (!(next_arg & SLJIT_MEM)) {
1201 		next_arg = 0;
1202 		next_argw = 0;
1203 	}
1204 
1205 	if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
1206 		argw &= 0x3;
1207 
1208 		if (SLJIT_UNLIKELY(argw))
1209 			FAIL_IF(push_inst(compiler, SLLI_D | RD(TMP_REG3) | RJ(OFFS_REG(arg)) | IMM_I12(argw)));
1210 		return push_mem_inst(compiler, flags, reg, SLJIT_MEM2(base, TMP_REG3), 0);
1211 	}
1212 
1213 	if (compiler->cache_arg == arg && argw - compiler->cache_argw <= I12_MAX && argw - compiler->cache_argw >= I12_MIN)
1214 		return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(TMP_REG3), argw - compiler->cache_argw);
1215 
1216 	if (compiler->cache_arg == SLJIT_MEM && (argw - compiler->cache_argw <= I12_MAX) && (argw - compiler->cache_argw >= I12_MIN)) {
1217 		offset = argw - compiler->cache_argw;
1218 	} else {
1219 		sljit_sw argw_hi=TO_ARGW_HI(argw);
1220 		compiler->cache_arg = SLJIT_MEM;
1221 
1222 		if (next_arg && next_argw - argw <= I12_MAX && next_argw - argw >= I12_MIN && argw_hi != TO_ARGW_HI(next_argw)) {
1223 			FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1224 			compiler->cache_argw = argw;
1225 			offset = 0;
1226 		} else {
1227 			FAIL_IF(load_immediate(compiler, TMP_REG3, argw_hi));
1228 			compiler->cache_argw = argw_hi;
1229 			offset = argw & 0xfff;
1230 			argw = argw_hi;
1231 		}
1232 	}
1233 
1234 	if (!base)
1235 		return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(TMP_REG3), offset);
1236 
1237 	if (arg == next_arg && next_argw - argw <= I12_MAX && next_argw - argw >= I12_MIN) {
1238 		compiler->cache_arg = arg;
1239 		FAIL_IF(push_inst(compiler, ADD_D | RD(TMP_REG3) | RJ(TMP_REG3) | RK(base)));
1240 		return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(TMP_REG3), offset);
1241 	}
1242 
1243 	if (!offset)
1244 		return push_mem_inst(compiler, flags, reg, SLJIT_MEM2(base, TMP_REG3), 0);
1245 
1246 	FAIL_IF(push_inst(compiler, ADD_D | RD(tmp_r) | RJ(TMP_REG3) | RK(base)));
1247 	return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(tmp_r), offset);
1248 }
1249 
emit_op_mem(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw)1250 static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
1251 {
1252 	sljit_s32 base = arg & REG_MASK;
1253 	sljit_s32 tmp_r = TMP_REG1;
1254 
1255 	if (getput_arg_fast(compiler, flags, reg, arg, argw))
1256 		return compiler->error;
1257 
1258 	if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA))
1259 		tmp_r = reg;
1260 
1261 	if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
1262 		argw &= 0x3;
1263 
1264 		if (SLJIT_UNLIKELY(argw))
1265 			FAIL_IF(push_inst(compiler, SLLI_D | RD(tmp_r) | RJ(OFFS_REG(arg)) | IMM_I12(argw)));
1266 		return push_mem_inst(compiler, flags, reg, SLJIT_MEM2(base, tmp_r), 0);
1267 	} else {
1268 		FAIL_IF(load_immediate(compiler, tmp_r, argw));
1269 
1270 		if (base != 0)
1271 			return push_mem_inst(compiler, flags, reg, SLJIT_MEM2(base, tmp_r), 0);
1272 		return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(tmp_r), 0);
1273 	}
1274 }
1275 
emit_op_mem2(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg1,sljit_sw arg1w,sljit_s32 arg2,sljit_sw arg2w)1276 static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w)
1277 {
1278 	if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
1279 		return compiler->error;
1280 	return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
1281 }
1282 
1283 #define IMM_EXTEND(v) (IMM_I12((op & SLJIT_32) ? (v) : (32 + (v))))
1284 
1285 /* andi/ori/xori are zero-extended */
1286 #define EMIT_LOGICAL(op_imm, op_reg) \
1287 	if (flags & SRC2_IMM) { \
1288 		if (op & SLJIT_SET_Z) {\
1289 			FAIL_IF(push_inst(compiler, ADDI_D | RD(EQUAL_FLAG) | RJ(TMP_ZERO) | IMM_I12(src2))); \
1290 			FAIL_IF(push_inst(compiler, op_reg | RD(EQUAL_FLAG) | RJ(src1) | RK(EQUAL_FLAG))); \
1291 		} \
1292 		if (!(flags & UNUSED_DEST)) { \
1293 			if (dst == src1) { \
1294 				FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(TMP_ZERO) | IMM_I12(src2))); \
1295 				FAIL_IF(push_inst(compiler, op_reg | RD(dst) | RJ(src1) | RK(TMP_REG1))); \
1296 			} else { \
1297 				FAIL_IF(push_inst(compiler, ADDI_D | RD(dst) | RJ(TMP_ZERO) | IMM_I12(src2))); \
1298 				FAIL_IF(push_inst(compiler, op_reg | RD(dst) | RJ(src1) | RK(dst))); \
1299 			} \
1300 		} \
1301 	} else { \
1302 		if (op & SLJIT_SET_Z) \
1303 			FAIL_IF(push_inst(compiler, op_reg | RD(EQUAL_FLAG) | RJ(src1) | RK(src2))); \
1304 		if (!(flags & UNUSED_DEST)) \
1305 			FAIL_IF(push_inst(compiler, op_reg | RD(dst) | RJ(src1) | RK(src2))); \
1306 	} \
1307 	while (0)
1308 
1309 #define EMIT_SHIFT(imm, reg) \
1310 	op_imm = (imm); \
1311 	op_reg = (reg)
1312 
emit_single_op(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 flags,sljit_s32 dst,sljit_s32 src1,sljit_sw src2)1313 static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
1314 	sljit_s32 dst, sljit_s32 src1, sljit_sw src2)
1315 {
1316 	sljit_s32 is_overflow, is_carry, carry_src_r, is_handled, reg;
1317 	sljit_ins op_imm, op_reg;
1318 	sljit_ins word_size = ((op & SLJIT_32) ? 32 : 64);
1319 
1320 	switch (GET_OPCODE(op)) {
1321 	case SLJIT_MOV:
1322 		SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1323 		if (dst != src2)
1324 			return push_inst(compiler, INST(ADD, op) | RD(dst) | RJ(src2) | IMM_I12(0));
1325 		return SLJIT_SUCCESS;
1326 
1327 	case SLJIT_MOV_U8:
1328 		SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1329 		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1330 			return push_inst(compiler, ANDI | RD(dst) | RJ(src2) | IMM_I12(0xff));
1331 		SLJIT_ASSERT(dst == src2);
1332 		return SLJIT_SUCCESS;
1333 
1334 	case SLJIT_MOV_S8:
1335 		SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1336 		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1337 			return push_inst(compiler, EXT_W_B | RD(dst) | RJ(src2));
1338 		SLJIT_ASSERT(dst == src2);
1339 		return SLJIT_SUCCESS;
1340 
1341 	case SLJIT_MOV_U16:
1342 		SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1343 		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1344 			return push_inst(compiler, INST(BSTRPICK, op) | RD(dst) | RJ(src2) | (15 << 16));
1345 		SLJIT_ASSERT(dst == src2);
1346 		return SLJIT_SUCCESS;
1347 
1348 	case SLJIT_MOV_S16:
1349 		SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1350 		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1351 			return push_inst(compiler, EXT_W_H | RD(dst) | RJ(src2));
1352 		SLJIT_ASSERT(dst == src2);
1353 		return SLJIT_SUCCESS;
1354 
1355 	case SLJIT_MOV_U32:
1356 		SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1357 		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1358 			return push_inst(compiler, BSTRPICK_D | RD(dst) | RJ(src2) | (31 << 16));
1359 		SLJIT_ASSERT(dst == src2);
1360 		return SLJIT_SUCCESS;
1361 
1362 	case SLJIT_MOV_S32:
1363 		SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1364 		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1365 			return push_inst(compiler, SLLI_W | RD(dst) | RJ(src2) | IMM_I12(0));
1366 		SLJIT_ASSERT(dst == src2);
1367 		return SLJIT_SUCCESS;
1368 
1369 	case SLJIT_CLZ:
1370 		SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1371 		return push_inst(compiler, INST(CLZ, op) | RD(dst) | RJ(src2));
1372 
1373 	case SLJIT_CTZ:
1374 		SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1375 		return push_inst(compiler, INST(CTZ, op) | RD(dst) | RJ(src2));
1376 
1377 	case SLJIT_REV:
1378 		SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1379 		return push_inst(compiler, ((op & SLJIT_32) ? REVB_2W : REVB_D) | RD(dst) | RJ(src2));
1380 
1381 	case SLJIT_REV_S16:
1382 		SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1383 		FAIL_IF(push_inst(compiler, REVB_2H | RD(dst) | RJ(src2)));
1384 		return push_inst(compiler, EXT_W_H | RD(dst) | RJ(dst));
1385 
1386 	case SLJIT_REV_U16:
1387 		SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM));
1388 		FAIL_IF(push_inst(compiler, REVB_2H | RD(dst) | RJ(src2)));
1389 		return push_inst(compiler, INST(BSTRPICK, op) | RD(dst) | RJ(dst) | (15 << 16));
1390 
1391 	case SLJIT_REV_S32:
1392 		SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM) && dst != TMP_REG1);
1393 		FAIL_IF(push_inst(compiler, REVB_2W | RD(dst) | RJ(src2)));
1394 		return push_inst(compiler, SLLI_W | RD(dst) | RJ(dst) | IMM_I12(0));
1395 
1396 	case SLJIT_REV_U32:
1397 		SLJIT_ASSERT(src1 == TMP_ZERO && !(flags & SRC2_IMM) && dst != TMP_REG1);
1398 		FAIL_IF(push_inst(compiler, REVB_2W | RD(dst) | RJ(src2)));
1399 		return push_inst(compiler, BSTRPICK_D | RD(dst) | RJ(dst) | (31 << 16));
1400 
1401 	case SLJIT_ADD:
1402 		/* Overflow computation (both add and sub): overflow = src1_sign ^ src2_sign ^ result_sign ^ carry_flag */
1403 		is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;
1404 		carry_src_r = GET_FLAG_TYPE(op) == SLJIT_CARRY;
1405 
1406 		if (flags & SRC2_IMM) {
1407 			if (is_overflow) {
1408 				if (src2 >= 0)
1409 					FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(0)));
1410 				else {
1411 					FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(TMP_ZERO) | IMM_I12(-1)));
1412 					FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(EQUAL_FLAG)));
1413 				}
1414 			} else if (op & SLJIT_SET_Z)
1415 				FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(src2)));
1416 
1417 			/* Only the zero flag is needed. */
1418 			if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
1419 				FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(src2)));
1420 		} else {
1421 			if (is_overflow)
1422 				FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1423 			else if (op & SLJIT_SET_Z)
1424 				FAIL_IF(push_inst(compiler, INST(ADD, op) | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1425 
1426 			if (is_overflow || carry_src_r != 0) {
1427 				if (src1 != dst)
1428 					carry_src_r = (sljit_s32)src1;
1429 				else if (src2 != dst)
1430 					carry_src_r = (sljit_s32)src2;
1431 				else {
1432 					FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(0)));
1433 					carry_src_r = OTHER_FLAG;
1434 				}
1435 			}
1436 
1437 			/* Only the zero flag is needed. */
1438 			if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
1439 				FAIL_IF(push_inst(compiler, INST(ADD, op) | RD(dst) | RJ(src1) | RK(src2)));
1440 		}
1441 
1442 		/* Carry is zero if a + b >= a or a + b >= b, otherwise it is 1. */
1443 		if (is_overflow || carry_src_r != 0) {
1444 			if (flags & SRC2_IMM)
1445 				FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RJ(dst) | IMM_I12(src2)));
1446 			else
1447 				FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(dst) | RK(carry_src_r)));
1448 		}
1449 
1450 		if (!is_overflow)
1451 			return SLJIT_SUCCESS;
1452 
1453 		FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RJ(dst) | RK(EQUAL_FLAG)));
1454 		if (op & SLJIT_SET_Z)
1455 			FAIL_IF(push_inst(compiler, INST(ADD, op) | RD(EQUAL_FLAG) | RJ(dst) | IMM_I12(0)));
1456 		FAIL_IF(push_inst(compiler, INST(SRLI, op) | RD(TMP_REG1) | RJ(TMP_REG1) | IMM_EXTEND(31)));
1457 		return push_inst(compiler, XOR | RD(OTHER_FLAG) | RJ(TMP_REG1) | RK(OTHER_FLAG));
1458 
1459 	case SLJIT_ADDC:
1460 		carry_src_r = GET_FLAG_TYPE(op) == SLJIT_CARRY;
1461 
1462 		if (flags & SRC2_IMM) {
1463 			FAIL_IF(push_inst(compiler, ADDI_D | RD(dst) | RJ(src1) | IMM_I12(src2)));
1464 		} else {
1465 			if (carry_src_r != 0) {
1466 				if (src1 != dst)
1467 					carry_src_r = (sljit_s32)src1;
1468 				else if (src2 != dst)
1469 					carry_src_r = (sljit_s32)src2;
1470 				else {
1471 					FAIL_IF(push_inst(compiler, ADDI_D | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(0)));
1472 					carry_src_r = EQUAL_FLAG;
1473 				}
1474 			}
1475 
1476 			FAIL_IF(push_inst(compiler, ADD_D | RD(dst) | RJ(src1) | RK(src2)));
1477 		}
1478 
1479 		/* Carry is zero if a + b >= a or a + b >= b, otherwise it is 1. */
1480 		if (carry_src_r != 0) {
1481 			if (flags & SRC2_IMM)
1482 				FAIL_IF(push_inst(compiler, SLTUI | RD(EQUAL_FLAG) | RJ(dst) | IMM_I12(src2)));
1483 			else
1484 				FAIL_IF(push_inst(compiler, SLTU | RD(EQUAL_FLAG) | RJ(dst) | RK(carry_src_r)));
1485 		}
1486 
1487 		FAIL_IF(push_inst(compiler, ADD_D | RD(dst) | RJ(dst) | RK(OTHER_FLAG)));
1488 
1489 		if (carry_src_r == 0)
1490 			return SLJIT_SUCCESS;
1491 
1492 		/* Set ULESS_FLAG (dst == 0) && (OTHER_FLAG == 1). */
1493 		FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(dst) | RK(OTHER_FLAG)));
1494 		/* Set carry flag. */
1495 		return push_inst(compiler, OR | RD(OTHER_FLAG) | RJ(OTHER_FLAG) | RK(EQUAL_FLAG));
1496 
1497 	case SLJIT_SUB:
1498 		if ((flags & SRC2_IMM) && src2 == I12_MIN) {
1499 			FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG2) | RJ(TMP_ZERO) | IMM_I12(src2)));
1500 			src2 = TMP_REG2;
1501 			flags &= ~SRC2_IMM;
1502 		}
1503 
1504 		is_handled = 0;
1505 
1506 		if (flags & SRC2_IMM) {
1507 			if (GET_FLAG_TYPE(op) == SLJIT_LESS) {
1508 				FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(src2)));
1509 				is_handled = 1;
1510 			} else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS) {
1511 				FAIL_IF(push_inst(compiler, SLTI | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(src2)));
1512 				is_handled = 1;
1513 			}
1514 		}
1515 
1516 		if (!is_handled && GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) {
1517 			is_handled = 1;
1518 
1519 			if (flags & SRC2_IMM) {
1520 				reg = (src1 == TMP_REG1) ? TMP_REG2 : TMP_REG1;
1521 				FAIL_IF(push_inst(compiler, ADDI_D | RD(reg) | RJ(TMP_ZERO) | IMM_I12(src2)));
1522 				src2 = reg;
1523 				flags &= ~SRC2_IMM;
1524 			}
1525 
1526 			switch (GET_FLAG_TYPE(op)) {
1527 			case SLJIT_LESS:
1528 				FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(src1) | RK(src2)));
1529 				break;
1530 			case SLJIT_GREATER:
1531 				FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(src2) | RK(src1)));
1532 				break;
1533 			case SLJIT_SIG_LESS:
1534 				FAIL_IF(push_inst(compiler, SLT | RD(OTHER_FLAG) | RJ(src1) | RK(src2)));
1535 				break;
1536 			case SLJIT_SIG_GREATER:
1537 				FAIL_IF(push_inst(compiler, SLT | RD(OTHER_FLAG) | RJ(src2) | RK(src1)));
1538 				break;
1539 			}
1540 		}
1541 
1542 		if (is_handled) {
1543 			if (flags & SRC2_IMM) {
1544 				if (op & SLJIT_SET_Z)
1545 					FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(-src2)));
1546 				if (!(flags & UNUSED_DEST))
1547 					return push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(-src2));
1548 			} else {
1549 				if (op & SLJIT_SET_Z)
1550 					FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1551 				if (!(flags & UNUSED_DEST))
1552 					return push_inst(compiler, INST(SUB, op) | RD(dst) | RJ(src1) | RK(src2));
1553 			}
1554 			return SLJIT_SUCCESS;
1555 		}
1556 
1557 		is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;
1558 		is_carry = GET_FLAG_TYPE(op) == SLJIT_CARRY;
1559 
1560 		if (flags & SRC2_IMM) {
1561 			if (is_overflow) {
1562 				if (src2 >= 0)
1563 					FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(0)));
1564 				else {
1565 					FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(-1)));
1566 					FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(EQUAL_FLAG)));
1567 				}
1568 			} else if (op & SLJIT_SET_Z)
1569 				FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(-src2)));
1570 
1571 			if (is_overflow || is_carry)
1572 				FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(src2)));
1573 
1574 			/* Only the zero flag is needed. */
1575 			if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
1576 				FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(-src2)));
1577 		} else {
1578 			if (is_overflow)
1579 				FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1580 			else if (op & SLJIT_SET_Z)
1581 				FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1582 
1583 			if (is_overflow || is_carry)
1584 				FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(src1) | RK(src2)));
1585 
1586 			/* Only the zero flag is needed. */
1587 			if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
1588 				FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(dst) | RJ(src1) | RK(src2)));
1589 		}
1590 
1591 		if (!is_overflow)
1592 			return SLJIT_SUCCESS;
1593 
1594 		FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RJ(dst) | RK(EQUAL_FLAG)));
1595 		if (op & SLJIT_SET_Z)
1596 			FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(dst) | IMM_I12(0)));
1597 		FAIL_IF(push_inst(compiler, INST(SRLI, op) | RD(TMP_REG1) | RJ(TMP_REG1) | IMM_EXTEND(31)));
1598 		return push_inst(compiler, XOR | RD(OTHER_FLAG) | RJ(TMP_REG1) | RK(OTHER_FLAG));
1599 
1600 	case SLJIT_SUBC:
1601 		if ((flags & SRC2_IMM) && src2 == I12_MIN) {
1602 			FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(TMP_REG2) | RJ(TMP_ZERO) | IMM_I12(src2)));
1603 			src2 = TMP_REG2;
1604 			flags &= ~SRC2_IMM;
1605 		}
1606 
1607 		is_carry = GET_FLAG_TYPE(op) == SLJIT_CARRY;
1608 
1609 		if (flags & SRC2_IMM) {
1610 			if (is_carry)
1611 				FAIL_IF(push_inst(compiler, SLTUI | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(src2)));
1612 
1613 			FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(-src2)));
1614 		} else {
1615 			if (is_carry)
1616 				FAIL_IF(push_inst(compiler, SLTU | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1617 
1618 			FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(dst) | RJ(src1) | RK(src2)));
1619 		}
1620 
1621 		if (is_carry)
1622 			FAIL_IF(push_inst(compiler, SLTU | RD(TMP_REG1) | RJ(dst) | RK(OTHER_FLAG)));
1623 
1624 		FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(dst) | RJ(dst) | RK(OTHER_FLAG)));
1625 
1626 		if (!is_carry)
1627 			return SLJIT_SUCCESS;
1628 
1629 		return push_inst(compiler, OR | RD(OTHER_FLAG) | RJ(EQUAL_FLAG) | RK(TMP_REG1));
1630 
1631 	case SLJIT_MUL:
1632 		SLJIT_ASSERT(!(flags & SRC2_IMM));
1633 
1634 		if (GET_FLAG_TYPE(op) != SLJIT_OVERFLOW)
1635 			return push_inst(compiler, INST(MUL, op) | RD(dst) | RJ(src1) | RK(src2));
1636 
1637 		if (op & SLJIT_32) {
1638 			FAIL_IF(push_inst(compiler, MUL_D | RD(OTHER_FLAG) | RJ(src1) | RK(src2)));
1639 			FAIL_IF(push_inst(compiler, MUL_W | RD(dst) | RJ(src1) | RK(src2)));
1640 			return push_inst(compiler, SUB_D | RD(OTHER_FLAG) | RJ(dst) | RK(OTHER_FLAG));
1641 		}
1642 
1643 		FAIL_IF(push_inst(compiler, MULH_D | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1644 		FAIL_IF(push_inst(compiler, MUL_D | RD(dst) | RJ(src1) | RK(src2)));
1645 		FAIL_IF(push_inst(compiler, SRAI_D | RD(OTHER_FLAG) | RJ(dst) | IMM_I12((63))));
1646 		return push_inst(compiler, SUB_D | RD(OTHER_FLAG) | RJ(EQUAL_FLAG) | RK(OTHER_FLAG));
1647 
1648 	case SLJIT_AND:
1649 		EMIT_LOGICAL(ANDI, AND);
1650 		return SLJIT_SUCCESS;
1651 
1652 	case SLJIT_OR:
1653 		EMIT_LOGICAL(ORI, OR);
1654 		return SLJIT_SUCCESS;
1655 
1656 	case SLJIT_XOR:
1657 		EMIT_LOGICAL(XORI, XOR);
1658 		return SLJIT_SUCCESS;
1659 
1660 	case SLJIT_SHL:
1661 	case SLJIT_MSHL:
1662 		if (op & SLJIT_32) {
1663 			EMIT_SHIFT(SLLI_W, SLL_W);
1664 		} else {
1665 			EMIT_SHIFT(SLLI_D, SLL_D);
1666 		}
1667 		break;
1668 
1669 	case SLJIT_LSHR:
1670 	case SLJIT_MLSHR:
1671 		if (op & SLJIT_32) {
1672 			EMIT_SHIFT(SRLI_W, SRL_W);
1673 		} else {
1674 			EMIT_SHIFT(SRLI_D, SRL_D);
1675 		}
1676 		break;
1677 
1678 	case SLJIT_ASHR:
1679 	case SLJIT_MASHR:
1680 		if (op & SLJIT_32) {
1681 			EMIT_SHIFT(SRAI_W, SRA_W);
1682 		} else {
1683 			EMIT_SHIFT(SRAI_D, SRA_D);
1684 		}
1685 		break;
1686 
1687 	case SLJIT_ROTL:
1688 	case SLJIT_ROTR:
1689 		if (flags & SRC2_IMM) {
1690 			SLJIT_ASSERT(src2 != 0);
1691 
1692 			if (GET_OPCODE(op) == SLJIT_ROTL)
1693 				src2 = word_size - src2;
1694 			return push_inst(compiler, INST(ROTRI, op) | RD(dst) | RJ(src1) | IMM_I12(src2));
1695 		}
1696 
1697 		if (src2 == TMP_ZERO) {
1698 			if (dst != src1)
1699 				return push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(0));
1700 			return SLJIT_SUCCESS;
1701 		}
1702 
1703 		if (GET_OPCODE(op) == SLJIT_ROTL) {
1704 			FAIL_IF(push_inst(compiler, INST(SUB, op)| RD(OTHER_FLAG) | RJ(TMP_ZERO) | RK(src2)));
1705 			src2 = OTHER_FLAG;
1706 		}
1707 		return push_inst(compiler, INST(ROTR, op) | RD(dst) | RJ(src1) | RK(src2));
1708 
1709 	default:
1710 		SLJIT_UNREACHABLE();
1711 		return SLJIT_SUCCESS;
1712 	}
1713 
1714 	if (flags & SRC2_IMM) {
1715 		if (op & SLJIT_SET_Z)
1716 			FAIL_IF(push_inst(compiler, op_imm | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(src2)));
1717 
1718 		if (flags & UNUSED_DEST)
1719 			return SLJIT_SUCCESS;
1720 		return push_inst(compiler, op_imm | RD(dst) | RJ(src1) | IMM_I12(src2));
1721 	}
1722 
1723 	if (op & SLJIT_SET_Z)
1724 		FAIL_IF(push_inst(compiler, op_reg | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1725 
1726 	if (flags & UNUSED_DEST)
1727 		return SLJIT_SUCCESS;
1728 	return push_inst(compiler, op_reg | RD(dst) | RJ(src1) | RK(src2));
1729 }
1730 
1731 #undef IMM_EXTEND
1732 
emit_op(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 flags,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1733 static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
1734 	sljit_s32 dst, sljit_sw dstw,
1735 	sljit_s32 src1, sljit_sw src1w,
1736 	sljit_s32 src2, sljit_sw src2w)
1737 {
1738 	/* arg1 goes to TMP_REG1 or src reg
1739 	   arg2 goes to TMP_REG2, imm or src reg
1740 	   TMP_REG3 can be used for caching
1741 	   result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
1742 	sljit_s32 dst_r = TMP_REG2;
1743 	sljit_s32 src1_r;
1744 	sljit_sw src2_r = 0;
1745 	sljit_s32 src2_tmp_reg = (GET_OPCODE(op) >= SLJIT_OP2_BASE && FAST_IS_REG(src1)) ? TMP_REG1 : TMP_REG2;
1746 
1747 	if (!(flags & ALT_KEEP_CACHE)) {
1748 		compiler->cache_arg = 0;
1749 		compiler->cache_argw = 0;
1750 	}
1751 
1752 	if (dst == 0) {
1753 		SLJIT_ASSERT(HAS_FLAGS(op));
1754 		flags |= UNUSED_DEST;
1755 		dst = TMP_REG2;
1756 	} else if (FAST_IS_REG(dst)) {
1757 		dst_r = dst;
1758 		flags |= REG_DEST;
1759 		if (flags & MOVE_OP)
1760 			src2_tmp_reg = dst_r;
1761 	} else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw))
1762 		flags |= SLOW_DEST;
1763 
1764 	if (flags & IMM_OP) {
1765 		if (src2 == SLJIT_IMM && src2w != 0 && src2w <= I12_MAX && src2w >= I12_MIN) {
1766 			flags |= SRC2_IMM;
1767 			src2_r = src2w;
1768 		} else if ((flags & CUMULATIVE_OP) && src1 == SLJIT_IMM && src1w != 0 && src1w <= I12_MAX && src1w >= I12_MIN) {
1769 			flags |= SRC2_IMM;
1770 			src2_r = src1w;
1771 
1772 			/* And swap arguments. */
1773 			src1 = src2;
1774 			src1w = src2w;
1775 			src2 = SLJIT_IMM;
1776 			/* src2w = src2_r unneeded. */
1777 		}
1778 	}
1779 
1780 	/* Source 1. */
1781 	if (FAST_IS_REG(src1)) {
1782 		src1_r = src1;
1783 		flags |= REG1_SOURCE;
1784 	} else if (src1 == SLJIT_IMM) {
1785 		if (src1w) {
1786 			FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
1787 			src1_r = TMP_REG1;
1788 		}
1789 		else
1790 			src1_r = TMP_ZERO;
1791 	} else {
1792 		if (getput_arg_fast(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w))
1793 			FAIL_IF(compiler->error);
1794 		else
1795 			flags |= SLOW_SRC1;
1796 		src1_r = TMP_REG1;
1797 	}
1798 
1799 	/* Source 2. */
1800 	if (FAST_IS_REG(src2)) {
1801 		src2_r = src2;
1802 		flags |= REG2_SOURCE;
1803 		if ((flags & (REG_DEST | MOVE_OP)) == MOVE_OP)
1804 			dst_r = (sljit_s32)src2_r;
1805 	} else if (src2 == SLJIT_IMM) {
1806 		if (!(flags & SRC2_IMM)) {
1807 			if (src2w) {
1808 				FAIL_IF(load_immediate(compiler, src2_tmp_reg, src2w));
1809 				src2_r = src2_tmp_reg;
1810 			} else {
1811 				src2_r = TMP_ZERO;
1812 				if (flags & MOVE_OP) {
1813 					if (dst & SLJIT_MEM)
1814 						dst_r = 0;
1815 					else
1816 						op = SLJIT_MOV;
1817 				}
1818 			}
1819 		}
1820 	} else {
1821 		if (getput_arg_fast(compiler, flags | LOAD_DATA, src2_tmp_reg, src2, src2w))
1822 			FAIL_IF(compiler->error);
1823 		else
1824 			flags |= SLOW_SRC2;
1825 
1826 		src2_r = src2_tmp_reg;
1827 	}
1828 
1829 	if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
1830 		SLJIT_ASSERT(src2_r == TMP_REG2);
1831 		if ((flags & SLOW_DEST) && !can_cache(src2, src2w, src1, src1w) && can_cache(src2, src2w, dst, dstw)) {
1832 			FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
1833 			FAIL_IF(getput_arg(compiler, flags | LOAD_DATA | MEM_USE_TMP2, TMP_REG2, src2, src2w, dst, dstw));
1834 		} else {
1835 			FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w));
1836 			FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
1837 		}
1838 	}
1839 	else if (flags & SLOW_SRC1)
1840 		FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
1841 	else if (flags & SLOW_SRC2)
1842 		FAIL_IF(getput_arg(compiler, flags | LOAD_DATA | ((src1_r == TMP_REG1) ? MEM_USE_TMP2 : 0), src2_tmp_reg, src2, src2w, dst, dstw));
1843 
1844 	FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
1845 
1846 	if (dst & SLJIT_MEM) {
1847 		if (!(flags & SLOW_DEST)) {
1848 			getput_arg_fast(compiler, flags, dst_r, dst, dstw);
1849 			return compiler->error;
1850 		}
1851 		return getput_arg(compiler, flags, dst_r, dst, dstw, 0, 0);
1852 	}
1853 
1854 	return SLJIT_SUCCESS;
1855 }
1856 
sljit_emit_op0(struct sljit_compiler * compiler,sljit_s32 op)1857 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
1858 {
1859 	CHECK_ERROR();
1860 	CHECK(check_sljit_emit_op0(compiler, op));
1861 
1862 	switch (GET_OPCODE(op)) {
1863 	case SLJIT_BREAKPOINT:
1864 		return push_inst(compiler, BREAK);
1865 	case SLJIT_NOP:
1866 		return push_inst(compiler, ANDI | RD(TMP_ZERO) | RJ(TMP_ZERO) | IMM_I12(0));
1867 	case SLJIT_LMUL_UW:
1868 		FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(SLJIT_R1) | IMM_I12(0)));
1869 		FAIL_IF(push_inst(compiler, MULH_DU | RD(SLJIT_R1) | RJ(SLJIT_R0) | RK(SLJIT_R1)));
1870 		return push_inst(compiler, MUL_D | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(TMP_REG1));
1871 	case SLJIT_LMUL_SW:
1872 		FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(SLJIT_R1) | IMM_I12(0)));
1873 		FAIL_IF(push_inst(compiler, MULH_D | RD(SLJIT_R1) | RJ(SLJIT_R0) | RK(SLJIT_R1)));
1874 		return push_inst(compiler, MUL_D | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(TMP_REG1));
1875 	case SLJIT_DIVMOD_UW:
1876 		FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(TMP_REG1) | RJ(SLJIT_R0) | IMM_I12(0)));
1877 		FAIL_IF(push_inst(compiler, ((op & SLJIT_32)? DIV_WU: DIV_DU) | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(SLJIT_R1)));
1878 		return push_inst(compiler, ((op & SLJIT_32)? MOD_WU: MOD_DU) | RD(SLJIT_R1) | RJ(TMP_REG1) | RK(SLJIT_R1));
1879 	case SLJIT_DIVMOD_SW:
1880 		FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(TMP_REG1) | RJ(SLJIT_R0) | IMM_I12(0)));
1881 		FAIL_IF(push_inst(compiler, INST(DIV, op) | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(SLJIT_R1)));
1882 		return push_inst(compiler, INST(MOD, op) | RD(SLJIT_R1) | RJ(TMP_REG1) | RK(SLJIT_R1));
1883 	case SLJIT_DIV_UW:
1884 		return push_inst(compiler, ((op & SLJIT_32)? DIV_WU: DIV_DU) | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(SLJIT_R1));
1885 	case SLJIT_DIV_SW:
1886 		return push_inst(compiler, INST(DIV, op) | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(SLJIT_R1));
1887 	case SLJIT_ENDBR:
1888 	case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
1889 		return SLJIT_SUCCESS;
1890 	}
1891 
1892 	SLJIT_UNREACHABLE();
1893 	return SLJIT_ERR_UNSUPPORTED;
1894 }
1895 
sljit_emit_op1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1896 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1897 	sljit_s32 dst, sljit_sw dstw,
1898 	sljit_s32 src, sljit_sw srcw)
1899 {
1900 	sljit_s32 flags = 0;
1901 
1902 	CHECK_ERROR();
1903 	CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1904 	ADJUST_LOCAL_OFFSET(dst, dstw);
1905 	ADJUST_LOCAL_OFFSET(src, srcw);
1906 
1907 	if (op & SLJIT_32)
1908 		flags = INT_DATA | SIGNED_DATA;
1909 
1910 	switch (GET_OPCODE(op)) {
1911 	case SLJIT_MOV:
1912 	case SLJIT_MOV_P:
1913 		return emit_op(compiler, SLJIT_MOV, WORD_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, srcw);
1914 
1915 	case SLJIT_MOV_U32:
1916 		return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u32)srcw : srcw);
1917 
1918 	case SLJIT_MOV_S32:
1919 	/* Logical operators have no W variant, so sign extended input is necessary for them. */
1920 	case SLJIT_MOV32:
1921 		return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s32)srcw : srcw);
1922 
1923 	case SLJIT_MOV_U8:
1924 		return emit_op(compiler, op, BYTE_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u8)srcw : srcw);
1925 
1926 	case SLJIT_MOV_S8:
1927 		return emit_op(compiler, op, BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s8)srcw : srcw);
1928 
1929 	case SLJIT_MOV_U16:
1930 		return emit_op(compiler, op, HALF_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_u16)srcw : srcw);
1931 
1932 	case SLJIT_MOV_S16:
1933 		return emit_op(compiler, op, HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_ZERO, 0, src, (src == SLJIT_IMM) ? (sljit_s16)srcw : srcw);
1934 
1935 	case SLJIT_CLZ:
1936 	case SLJIT_CTZ:
1937 	case SLJIT_REV:
1938 		return emit_op(compiler, op, flags, dst, dstw, TMP_ZERO, 0, src, srcw);
1939 
1940 	case SLJIT_REV_U16:
1941 	case SLJIT_REV_S16:
1942 		return emit_op(compiler, op, HALF_DATA, dst, dstw, TMP_ZERO, 0, src, srcw);
1943 
1944 	case SLJIT_REV_U32:
1945 	case SLJIT_REV_S32:
1946 		return emit_op(compiler, op | SLJIT_32, INT_DATA, dst, dstw, TMP_ZERO, 0, src, srcw);
1947 	}
1948 
1949 	SLJIT_UNREACHABLE();
1950 	return SLJIT_SUCCESS;
1951 }
1952 
sljit_emit_op2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1953 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
1954 	sljit_s32 dst, sljit_sw dstw,
1955 	sljit_s32 src1, sljit_sw src1w,
1956 	sljit_s32 src2, sljit_sw src2w)
1957 {
1958 	sljit_s32 flags = 0;
1959 
1960 	CHECK_ERROR();
1961 	CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
1962 	ADJUST_LOCAL_OFFSET(dst, dstw);
1963 	ADJUST_LOCAL_OFFSET(src1, src1w);
1964 	ADJUST_LOCAL_OFFSET(src2, src2w);
1965 
1966 	if (op & SLJIT_32) {
1967 		flags |= INT_DATA | SIGNED_DATA;
1968 		if (src1 == SLJIT_IMM)
1969 			src1w = (sljit_s32)src1w;
1970 		if (src2 == SLJIT_IMM)
1971 			src2w = (sljit_s32)src2w;
1972 	}
1973 
1974 
1975 	switch (GET_OPCODE(op)) {
1976 	case SLJIT_ADD:
1977 	case SLJIT_ADDC:
1978 		compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
1979 		return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
1980 
1981 	case SLJIT_SUB:
1982 	case SLJIT_SUBC:
1983 		compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
1984 		return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
1985 
1986 	case SLJIT_MUL:
1987 		compiler->status_flags_state = 0;
1988 		return emit_op(compiler, op, flags | CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w);
1989 
1990 	case SLJIT_AND:
1991 	case SLJIT_OR:
1992 	case SLJIT_XOR:
1993 		return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
1994 
1995 	case SLJIT_SHL:
1996 	case SLJIT_MSHL:
1997 	case SLJIT_LSHR:
1998 	case SLJIT_MLSHR:
1999 	case SLJIT_ASHR:
2000 	case SLJIT_MASHR:
2001 	case SLJIT_ROTL:
2002 	case SLJIT_ROTR:
2003 		if (src2 == SLJIT_IMM) {
2004 			if (op & SLJIT_32)
2005 				src2w &= 0x1f;
2006 			else
2007 				src2w &= 0x3f;
2008 		}
2009 
2010 		return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
2011 	}
2012 
2013 	SLJIT_UNREACHABLE();
2014 	return SLJIT_SUCCESS;
2015 }
2016 
sljit_emit_op2u(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2017 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
2018 	sljit_s32 src1, sljit_sw src1w,
2019 	sljit_s32 src2, sljit_sw src2w)
2020 {
2021 	CHECK_ERROR();
2022 	CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
2023 
2024 	SLJIT_SKIP_CHECKS(compiler);
2025 	return sljit_emit_op2(compiler, op, 0, 0, src1, src1w, src2, src2w);
2026 }
2027 
sljit_emit_op2r(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2028 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op,
2029 	sljit_s32 dst_reg,
2030 	sljit_s32 src1, sljit_sw src1w,
2031 	sljit_s32 src2, sljit_sw src2w)
2032 {
2033 	CHECK_ERROR();
2034 	CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w));
2035 
2036 	switch (GET_OPCODE(op)) {
2037 	case SLJIT_MULADD:
2038 		SLJIT_SKIP_CHECKS(compiler);
2039 		FAIL_IF(sljit_emit_op2(compiler, SLJIT_MUL | (op & SLJIT_32), TMP_REG2, 0, src1, src1w, src2, src2w));
2040 		return push_inst(compiler, ADD_D | RD(dst_reg) | RJ(dst_reg) | RK(TMP_REG2));
2041 	}
2042 
2043 	return SLJIT_SUCCESS;
2044 }
2045 
sljit_emit_shift_into(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 src1_reg,sljit_s32 src2_reg,sljit_s32 src3,sljit_sw src3w)2046 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
2047 	sljit_s32 dst_reg,
2048 	sljit_s32 src1_reg,
2049 	sljit_s32 src2_reg,
2050 	sljit_s32 src3, sljit_sw src3w)
2051 {
2052 	sljit_s32 is_left;
2053 	sljit_ins ins1, ins2, ins3;
2054 	sljit_s32 inp_flags = ((op & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA;
2055 	sljit_sw bit_length = (op & SLJIT_32) ? 32 : 64;
2056 
2057 
2058 	CHECK_ERROR();
2059 	CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
2060 
2061 	is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL);
2062 
2063 	if (src1_reg == src2_reg) {
2064 		SLJIT_SKIP_CHECKS(compiler);
2065 		return sljit_emit_op2(compiler, (is_left ? SLJIT_ROTL : SLJIT_ROTR) | (op & SLJIT_32), dst_reg, 0, src1_reg, 0, src3, src3w);
2066 	}
2067 
2068 	ADJUST_LOCAL_OFFSET(src3, src3w);
2069 
2070 	if (src3 == SLJIT_IMM) {
2071 		src3w &= bit_length - 1;
2072 
2073 		if (src3w == 0)
2074 			return SLJIT_SUCCESS;
2075 
2076 		if (is_left) {
2077 			ins1 = INST(SLLI, op) | IMM_I12(src3w);
2078 			src3w = bit_length - src3w;
2079 			ins2 = INST(SRLI, op) | IMM_I12(src3w);
2080 		} else {
2081 			ins1 = INST(SRLI, op) | IMM_I12(src3w);
2082 			src3w = bit_length - src3w;
2083 			ins2 = INST(SLLI, op) | IMM_I12(src3w);
2084 		}
2085 
2086 		FAIL_IF(push_inst(compiler, ins1 | RD(dst_reg) | RJ(src1_reg)));
2087 		FAIL_IF(push_inst(compiler, ins2 | RD(TMP_REG1) | RJ(src2_reg)));
2088 		return push_inst(compiler, OR | RD(dst_reg) | RJ(dst_reg) | RK(TMP_REG1));
2089 	}
2090 
2091 	if (src3 & SLJIT_MEM) {
2092 		FAIL_IF(emit_op_mem(compiler, inp_flags, TMP_REG2, src3, src3w));
2093 		src3 = TMP_REG2;
2094 	} else if (dst_reg == src3) {
2095 		push_inst(compiler, INST(ADDI, op) | RD(TMP_REG2) | RJ(src3) | IMM_I12(0));
2096 		src3 = TMP_REG2;
2097 	}
2098 
2099 	if (is_left) {
2100 		ins1 = INST(SLL, op);
2101 		ins2 = INST(SRLI, op);
2102 		ins3 = INST(SRL, op);
2103 	} else {
2104 		ins1 = INST(SRL, op);
2105 		ins2 = INST(SLLI, op);
2106 		ins3 = INST(SLL, op);
2107 	}
2108 
2109 	FAIL_IF(push_inst(compiler, ins1 | RD(dst_reg) | RJ(src1_reg) | RK(src3)));
2110 
2111 	if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) {
2112 		FAIL_IF(push_inst(compiler, ins2 | RD(TMP_REG1) | RJ(src2_reg) | IMM_I12(1)));
2113 		FAIL_IF(push_inst(compiler, XORI | RD(TMP_REG2) | RJ(src3) | IMM_I12((sljit_ins)bit_length - 1)));
2114 		src2_reg = TMP_REG1;
2115 	} else
2116 		FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(TMP_REG2) | RJ(TMP_ZERO) | RK(src3)));
2117 
2118 	FAIL_IF(push_inst(compiler, ins3 | RD(TMP_REG1) | RJ(src2_reg) | RK(TMP_REG2)));
2119 	return push_inst(compiler, OR | RD(dst_reg) | RJ(dst_reg) | RK(TMP_REG1));
2120 }
2121 
sljit_emit_op_src(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)2122 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
2123 	sljit_s32 src, sljit_sw srcw)
2124 {
2125 	sljit_s32 base = src & REG_MASK;
2126 
2127 	CHECK_ERROR();
2128 	CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
2129 	ADJUST_LOCAL_OFFSET(src, srcw);
2130 
2131 	switch (op) {
2132 	case SLJIT_FAST_RETURN:
2133 		if (FAST_IS_REG(src))
2134 			FAIL_IF(push_inst(compiler, ADDI_D | RD(RETURN_ADDR_REG) | RJ(src) | IMM_I12(0)));
2135 		else
2136 			FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RETURN_ADDR_REG, src, srcw));
2137 
2138 		return push_inst(compiler, JIRL | RD(TMP_ZERO) | RJ(RETURN_ADDR_REG) | IMM_I12(0));
2139 	case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
2140 		return SLJIT_SUCCESS;
2141 	case SLJIT_PREFETCH_L1:
2142 	case SLJIT_PREFETCH_L2:
2143 	case SLJIT_PREFETCH_L3:
2144 	case SLJIT_PREFETCH_ONCE:
2145 		if (SLJIT_UNLIKELY(src & OFFS_REG_MASK)) {
2146 			srcw &= 0x3;
2147 			if (SLJIT_UNLIKELY(srcw))
2148 				FAIL_IF(push_inst(compiler, SLLI_D | RD(TMP_REG1) | RJ(OFFS_REG(src)) | IMM_I12(srcw)));
2149 			FAIL_IF(push_inst(compiler, ADD_D | RD(TMP_REG1) | RJ(base) | RK(TMP_REG1)));
2150 		} else {
2151 			if (base && srcw <= I12_MAX && srcw >= I12_MIN)
2152 				return push_inst(compiler,PRELD | RJ(base) | IMM_I12(srcw));
2153 
2154 			FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
2155 			if (base != 0)
2156 				FAIL_IF(push_inst(compiler, ADD_D | RD(TMP_REG1) | RJ(base) | RK(TMP_REG1)));
2157 		}
2158 		return push_inst(compiler, PRELD | RD(0) | RJ(TMP_REG1));
2159 	}
2160 	return SLJIT_SUCCESS;
2161 }
2162 
sljit_emit_op_dst(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw)2163 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
2164 	sljit_s32 dst, sljit_sw dstw)
2165 {
2166 	sljit_s32 dst_r;
2167 
2168 	CHECK_ERROR();
2169 	CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
2170 	ADJUST_LOCAL_OFFSET(dst, dstw);
2171 
2172 	switch (op) {
2173 	case SLJIT_FAST_ENTER:
2174 		if (FAST_IS_REG(dst))
2175 			return push_inst(compiler, ADDI_D | RD(dst) | RJ(RETURN_ADDR_REG) | IMM_I12(0));
2176 
2177 		SLJIT_ASSERT(RETURN_ADDR_REG == TMP_REG2);
2178 		break;
2179 	case SLJIT_GET_RETURN_ADDRESS:
2180 		dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
2181 		FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size - SSIZE_OF(sw)));
2182 		break;
2183 	}
2184 
2185 	if (dst & SLJIT_MEM)
2186 		return emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw);
2187 
2188 	return SLJIT_SUCCESS;
2189 }
2190 
sljit_get_register_index(sljit_s32 type,sljit_s32 reg)2191 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
2192 {
2193 	CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
2194 
2195 	if (type == SLJIT_GP_REGISTER)
2196 		return reg_map[reg];
2197 
2198 	if (type != SLJIT_FLOAT_REGISTER && type != SLJIT_SIMD_REG_128 && type != SLJIT_SIMD_REG_256)
2199 		return -1;
2200 
2201 	return freg_map[reg];
2202 }
2203 
sljit_emit_op_custom(struct sljit_compiler * compiler,void * instruction,sljit_u32 size)2204 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2205 	void *instruction, sljit_u32 size)
2206 {
2207 	SLJIT_UNUSED_ARG(size);
2208 	CHECK_ERROR();
2209 	CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2210 
2211 	return push_inst(compiler, *(sljit_ins*)instruction);
2212 }
2213 
2214 /* --------------------------------------------------------------------- */
2215 /*  Floating point operators                                             */
2216 /* --------------------------------------------------------------------- */
2217 #define SET_COND(cond) (sljit_ins)(cond << 15)
2218 
2219 #define COND_CUN SET_COND(0x8)	 /* UN */
2220 #define COND_CEQ SET_COND(0x4)	 /* EQ */
2221 #define COND_CUEQ SET_COND(0xc)	 /* UN EQ */
2222 #define COND_CLT SET_COND(0x2)	 /* LT */
2223 #define COND_CULT SET_COND(0xa)	 /* UN LT */
2224 #define COND_CLE SET_COND(0x6)	 /* LT EQ */
2225 #define COND_CULE SET_COND(0xe)	 /* UN LT EQ */
2226 #define COND_CNE SET_COND(0x10)	 /* GT LT */
2227 #define COND_CUNE SET_COND(0x18) /* UN GT LT */
2228 #define COND_COR SET_COND(0x14)	 /* GT LT EQ */
2229 
2230 #define FINST(inst, type) (sljit_ins)((type & SLJIT_32) ? inst##_S : inst##_D)
2231 #define FCD(cd) (sljit_ins)(cd & 0x7)
2232 #define FCJ(cj) (sljit_ins)((cj & 0x7) << 5)
2233 #define FCA(ca) (sljit_ins)((ca & 0x7) << 15)
2234 #define F_OTHER_FLAG 1
2235 
2236 #define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_32) >> 7))
2237 
2238 /* convert to inter exact toward zero */
sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2239 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
2240 	sljit_s32 dst, sljit_sw dstw,
2241 	sljit_s32 src, sljit_sw srcw)
2242 {
2243 	sljit_ins inst;
2244 	sljit_u32 word_data = 0;
2245 	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
2246 
2247 	switch (GET_OPCODE(op))
2248 	{
2249 	case SLJIT_CONV_SW_FROM_F64:
2250 		word_data = 1;
2251 		inst = FINST(FTINTRZ_L, op);
2252 		break;
2253 	case SLJIT_CONV_S32_FROM_F64:
2254 		inst = FINST(FTINTRZ_W, op);
2255 		break;
2256 	default:
2257 		inst = BREAK;
2258 		SLJIT_UNREACHABLE();
2259 	}
2260 
2261 	if (src & SLJIT_MEM) {
2262 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
2263 		src = TMP_FREG1;
2264 	}
2265 
2266 	FAIL_IF(push_inst(compiler, inst | FRD(TMP_FREG1) | FRJ(src)));
2267 	FAIL_IF(push_inst(compiler, FINST(MOVFR2GR, word_data) | RD(dst_r) | FRJ(TMP_FREG1)));
2268 
2269 	if (dst & SLJIT_MEM)
2270 		return emit_op_mem2(compiler, word_data ? WORD_DATA : INT_DATA, TMP_REG2, dst, dstw, 0, 0);
2271 	return SLJIT_SUCCESS;
2272 }
2273 
sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2274 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_s32 op,
2275 	sljit_s32 dst, sljit_sw dstw,
2276 	sljit_s32 src, sljit_sw srcw)
2277 {
2278 	sljit_ins inst;
2279 	sljit_u32 word_data = 0;
2280 	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2281 
2282 	switch (GET_OPCODE(op))
2283 	{
2284 	case SLJIT_CONV_F64_FROM_SW:
2285 		word_data = 1;
2286 		inst = (sljit_ins)((op & SLJIT_32) ? FFINT_S_L : FFINT_D_L);
2287 		break;
2288 	case SLJIT_CONV_F64_FROM_S32:
2289 		inst = (sljit_ins)((op & SLJIT_32) ? FFINT_S_W : FFINT_D_W);
2290 		break;
2291 	default:
2292 		inst = BREAK;
2293 		SLJIT_UNREACHABLE();
2294 	}
2295 
2296 	if (src & SLJIT_MEM) {
2297 		FAIL_IF(emit_op_mem2(compiler, (word_data ? WORD_DATA : INT_DATA) | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
2298 		src = TMP_REG1;
2299 	} else if (src == SLJIT_IMM) {
2300 		if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
2301 			srcw = (sljit_s32)srcw;
2302 
2303 		FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
2304 		src = TMP_REG1;
2305 	}
2306 	FAIL_IF(push_inst(compiler, (word_data ? MOVGR2FR_D : MOVGR2FR_W) | FRD(dst_r) | RJ(src)));
2307 	FAIL_IF(push_inst(compiler, inst | FRD(dst_r) | FRJ(dst_r)));
2308 
2309 	if (dst & SLJIT_MEM)
2310 		return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
2311 	return SLJIT_SUCCESS;
2312 }
2313 
sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2314 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
2315 	sljit_s32 dst, sljit_sw dstw,
2316 	sljit_s32 src, sljit_sw srcw)
2317 {
2318 	return sljit_emit_fop1_conv_f64_from_w(compiler, op, dst, dstw, src, srcw);
2319 }
2320 
sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2321 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
2322 	sljit_s32 dst, sljit_sw dstw,
2323 	sljit_s32 src, sljit_sw srcw)
2324 {
2325 	sljit_ins inst;
2326 	sljit_u32 word_data = 0;
2327 	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2328 
2329 	switch (GET_OPCODE(op))
2330 	{
2331 	case SLJIT_CONV_F64_FROM_UW:
2332 		word_data = 1;
2333 		inst = (sljit_ins)((op & SLJIT_32) ? FFINT_S_L : FFINT_D_L);
2334 		break;
2335 	case SLJIT_CONV_F64_FROM_U32:
2336 		inst = (sljit_ins)((op & SLJIT_32) ? FFINT_S_W : FFINT_D_W);
2337 		break;
2338 	default:
2339 		inst = BREAK;
2340 		SLJIT_UNREACHABLE();
2341 	}
2342 
2343 	if (src & SLJIT_MEM) {
2344 		FAIL_IF(emit_op_mem2(compiler, (word_data ? WORD_DATA : INT_DATA) | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
2345 		src = TMP_REG1;
2346 	} else if (src == SLJIT_IMM) {
2347 		if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32)
2348 			srcw = (sljit_u32)srcw;
2349 
2350 		FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
2351 		src = TMP_REG1;
2352 	}
2353 
2354 	if (!word_data)
2355 		FAIL_IF(push_inst(compiler, SRLI_W | RD(src) | RJ(src) | IMM_I12(0)));
2356 
2357 	FAIL_IF(push_inst(compiler, BLT | RJ(src) | RD(TMP_ZERO) | IMM_I16(4)));
2358 
2359 	FAIL_IF(push_inst(compiler, (word_data ? MOVGR2FR_D : MOVGR2FR_W) | FRD(dst_r) | RJ(src)));
2360 	FAIL_IF(push_inst(compiler, inst | FRD(dst_r) | FRJ(dst_r)));
2361 	FAIL_IF(push_inst(compiler, B | IMM_I26(7)));
2362 
2363 	FAIL_IF(push_inst(compiler, ANDI | RD(TMP_REG2) | RJ(src) | IMM_I12(1)));
2364 	FAIL_IF(push_inst(compiler, (word_data ? SRLI_D : SRLI_W) | RD(TMP_REG1) | RJ(src) | IMM_I12(1)));
2365 	FAIL_IF(push_inst(compiler, OR | RD(TMP_REG1) | RJ(TMP_REG1) | RK(TMP_REG2)));
2366 	FAIL_IF(push_inst(compiler, INST(MOVGR2FR, (!word_data)) | FRD(dst_r) | RJ(TMP_REG1)));
2367 	FAIL_IF(push_inst(compiler, inst | FRD(dst_r) | FRJ(dst_r)));
2368 	FAIL_IF(push_inst(compiler, FINST(FADD, op) | FRD(dst_r) | FRJ(dst_r) | FRK(dst_r)));
2369 
2370 	if (dst & SLJIT_MEM)
2371 		return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
2372 	return SLJIT_SUCCESS;
2373 }
2374 
sljit_emit_fop1_cmp(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2375 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
2376 	sljit_s32 src1, sljit_sw src1w,
2377 	sljit_s32 src2, sljit_sw src2w)
2378 {
2379 	if (src1 & SLJIT_MEM) {
2380 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
2381 		src1 = TMP_FREG1;
2382 	}
2383 
2384 	if (src2 & SLJIT_MEM) {
2385 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0));
2386 		src2 = TMP_FREG2;
2387 	}
2388 
2389 	FAIL_IF(push_inst(compiler, XOR | RD(OTHER_FLAG) | RJ(OTHER_FLAG) | RK(OTHER_FLAG)));
2390 
2391 	switch (GET_FLAG_TYPE(op)) {
2392 	case SLJIT_F_EQUAL:
2393 	case SLJIT_ORDERED_EQUAL:
2394 		FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CEQ | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2)));
2395 		break;
2396 	case SLJIT_F_LESS:
2397 	case SLJIT_ORDERED_LESS:
2398 		FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CLT | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2)));
2399 		break;
2400 	case SLJIT_F_GREATER:
2401 	case SLJIT_ORDERED_GREATER:
2402 		FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CLT | FCD(F_OTHER_FLAG) | FRJ(src2) | FRK(src1)));
2403 		break;
2404 	case SLJIT_UNORDERED_OR_GREATER:
2405 		FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CULT | FCD(F_OTHER_FLAG) | FRJ(src2) | FRK(src1)));
2406 		break;
2407 	case SLJIT_UNORDERED_OR_LESS:
2408 		FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CULT | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2)));
2409 		break;
2410 	case SLJIT_UNORDERED_OR_EQUAL:
2411 		FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CUEQ | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2)));
2412 		break;
2413 	default: /* SLJIT_UNORDERED */
2414 		FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CUN | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2)));
2415 	}
2416 	return push_inst(compiler, MOVCF2GR | RD(OTHER_FLAG) | FCJ(F_OTHER_FLAG));
2417 }
2418 
sljit_emit_fop1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2419 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
2420 	sljit_s32 dst, sljit_sw dstw,
2421 	sljit_s32 src, sljit_sw srcw)
2422 {
2423 	sljit_s32 dst_r;
2424 
2425 	CHECK_ERROR();
2426 	compiler->cache_arg = 0;
2427 	compiler->cache_argw = 0;
2428 
2429 	SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error);
2430 	SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2431 
2432 	if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32)
2433 		op ^= SLJIT_32;
2434 
2435 	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2436 
2437 	if (src & SLJIT_MEM) {
2438 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw));
2439 		src = dst_r;
2440 	}
2441 
2442 	switch (GET_OPCODE(op)) {
2443 	case SLJIT_MOV_F64:
2444 		if (src != dst_r) {
2445 			if (!(dst & SLJIT_MEM))
2446 				FAIL_IF(push_inst(compiler, FINST(FMOV, op) | FRD(dst_r) | FRJ(src)));
2447 			else
2448 				dst_r = src;
2449 		}
2450 		break;
2451 	case SLJIT_NEG_F64:
2452 		FAIL_IF(push_inst(compiler, FINST(FNEG, op) | FRD(dst_r) | FRJ(src)));
2453 		break;
2454 	case SLJIT_ABS_F64:
2455 		FAIL_IF(push_inst(compiler, FINST(FABS, op) | FRD(dst_r) | FRJ(src)));
2456 		break;
2457 	case SLJIT_CONV_F64_FROM_F32:
2458 		/* The SLJIT_32 bit is inverted because sljit_f32 needs to be loaded from the memory. */
2459 		FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? FCVT_D_S : FCVT_S_D) | FRD(dst_r) | FRJ(src)));
2460 		op ^= SLJIT_32;
2461 		break;
2462 	}
2463 
2464 	if (dst & SLJIT_MEM)
2465 		return emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0);
2466 	return SLJIT_SUCCESS;
2467 }
2468 
sljit_emit_fop2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2469 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
2470 	sljit_s32 dst, sljit_sw dstw,
2471 	sljit_s32 src1, sljit_sw src1w,
2472 	sljit_s32 src2, sljit_sw src2w)
2473 {
2474 	sljit_s32 dst_r, flags = 0;
2475 
2476 	CHECK_ERROR();
2477 	CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2478 	ADJUST_LOCAL_OFFSET(dst, dstw);
2479 	ADJUST_LOCAL_OFFSET(src1, src1w);
2480 	ADJUST_LOCAL_OFFSET(src2, src2w);
2481 
2482 	compiler->cache_arg = 0;
2483 	compiler->cache_argw = 0;
2484 
2485 	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2;
2486 
2487 	if (src1 & SLJIT_MEM) {
2488 		if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) {
2489 			FAIL_IF(compiler->error);
2490 			src1 = TMP_FREG1;
2491 		} else
2492 			flags |= SLOW_SRC1;
2493 	}
2494 
2495 	if (src2 & SLJIT_MEM) {
2496 		if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) {
2497 			FAIL_IF(compiler->error);
2498 			src2 = TMP_FREG2;
2499 		} else
2500 			flags |= SLOW_SRC2;
2501 	}
2502 
2503 	if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
2504 		if ((dst & SLJIT_MEM) && !can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
2505 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w));
2506 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
2507 		} else {
2508 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
2509 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
2510 		}
2511 	}
2512 	else if (flags & SLOW_SRC1)
2513 		FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
2514 	else if (flags & SLOW_SRC2)
2515 		FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
2516 
2517 	if (flags & SLOW_SRC1)
2518 		src1 = TMP_FREG1;
2519 	if (flags & SLOW_SRC2)
2520 		src2 = TMP_FREG2;
2521 
2522 	switch (GET_OPCODE(op)) {
2523 	case SLJIT_ADD_F64:
2524 		FAIL_IF(push_inst(compiler, FINST(FADD, op) | FRD(dst_r) | FRJ(src1) | FRK(src2)));
2525 		break;
2526 	case SLJIT_SUB_F64:
2527 		FAIL_IF(push_inst(compiler, FINST(FSUB, op) | FRD(dst_r) | FRJ(src1) | FRK(src2)));
2528 		break;
2529 	case SLJIT_MUL_F64:
2530 		FAIL_IF(push_inst(compiler, FINST(FMUL, op) | FRD(dst_r) | FRJ(src1) | FRK(src2)));
2531 		break;
2532 	case SLJIT_DIV_F64:
2533 		FAIL_IF(push_inst(compiler, FINST(FDIV, op) | FRD(dst_r) | FRJ(src1) | FRK(src2)));
2534 		break;
2535 	}
2536 
2537 	if (dst_r != dst)
2538 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0));
2539 	return SLJIT_SUCCESS;
2540 }
2541 
sljit_emit_fop2r(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_freg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2542 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op,
2543 	sljit_s32 dst_freg,
2544 	sljit_s32 src1, sljit_sw src1w,
2545 	sljit_s32 src2, sljit_sw src2w)
2546 {
2547 	sljit_s32 reg;
2548 
2549 	CHECK_ERROR();
2550 	CHECK(check_sljit_emit_fop2r(compiler, op, dst_freg, src1, src1w, src2, src2w));
2551 	ADJUST_LOCAL_OFFSET(src1, src1w);
2552 	ADJUST_LOCAL_OFFSET(src2, src2w);
2553 
2554 	if (src2 & SLJIT_MEM) {
2555 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src2, src2w, 0, 0));
2556 		src2 = TMP_FREG1;
2557 	}
2558 
2559 	if (src1 & SLJIT_MEM) {
2560 		reg = (dst_freg == src2) ? TMP_FREG1 : dst_freg;
2561 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, reg, src1, src1w, 0, 0));
2562 		src1 = reg;
2563 	}
2564 
2565 	return push_inst(compiler, FINST(FCOPYSIGN, op) | FRD(dst_freg) | FRJ(src1) | FRK(src2));
2566 }
2567 
sljit_emit_fset32(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f32 value)2568 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
2569 	sljit_s32 freg, sljit_f32 value)
2570 {
2571 	union {
2572 		sljit_s32 imm;
2573 		sljit_f32 value;
2574 	} u;
2575 
2576 	CHECK_ERROR();
2577 	CHECK(check_sljit_emit_fset32(compiler, freg, value));
2578 
2579 	u.value = value;
2580 
2581 	if (u.imm == 0)
2582 		return push_inst(compiler, MOVGR2FR_W | RJ(TMP_ZERO) | FRD(freg));
2583 
2584 	FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm));
2585 	return push_inst(compiler, MOVGR2FR_W | RJ(TMP_REG1) | FRD(freg));
2586 }
2587 
sljit_emit_fset64(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f64 value)2588 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
2589 	sljit_s32 freg, sljit_f64 value)
2590 {
2591 	union {
2592 		sljit_sw imm;
2593 		sljit_f64 value;
2594 	} u;
2595 
2596 	CHECK_ERROR();
2597 	CHECK(check_sljit_emit_fset64(compiler, freg, value));
2598 
2599 	u.value = value;
2600 
2601 	if (u.imm == 0)
2602 		return push_inst(compiler, MOVGR2FR_D | RJ(TMP_ZERO) | FRD(freg));
2603 
2604 	FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm));
2605 	return push_inst(compiler, MOVGR2FR_D | RJ(TMP_REG1) | FRD(freg));
2606 }
2607 
sljit_emit_fcopy(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 freg,sljit_s32 reg)2608 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
2609 	sljit_s32 freg, sljit_s32 reg)
2610 {
2611 	sljit_ins inst;
2612 
2613 	CHECK_ERROR();
2614 	CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
2615 
2616 	if (GET_OPCODE(op) == SLJIT_COPY_TO_F64)
2617 		inst = ((op & SLJIT_32) ? MOVGR2FR_W : MOVGR2FR_D) | FRD(freg) | RJ(reg);
2618 	else
2619 		inst = ((op & SLJIT_32) ? MOVFR2GR_S : MOVFR2GR_D) | RD(reg) | FRJ(freg);
2620 	return push_inst(compiler, inst);
2621 }
2622 
2623 /* --------------------------------------------------------------------- */
2624 /*  Conditional instructions                                             */
2625 /* --------------------------------------------------------------------- */
2626 
sljit_emit_label(struct sljit_compiler * compiler)2627 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2628 {
2629 	struct sljit_label *label;
2630 
2631 	CHECK_ERROR_PTR();
2632 	CHECK_PTR(check_sljit_emit_label(compiler));
2633 
2634 	if (compiler->last_label && compiler->last_label->size == compiler->size)
2635 		return compiler->last_label;
2636 
2637 	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2638 	PTR_FAIL_IF(!label);
2639 	set_label(label, compiler);
2640 	return label;
2641 }
2642 
get_jump_instruction(sljit_s32 type)2643 static sljit_ins get_jump_instruction(sljit_s32 type)
2644 {
2645 	switch (type) {
2646 	case SLJIT_EQUAL:
2647 	case SLJIT_ATOMIC_NOT_STORED:
2648 		return BNE | RJ(EQUAL_FLAG) | RD(TMP_ZERO);
2649 	case SLJIT_NOT_EQUAL:
2650 	case SLJIT_ATOMIC_STORED:
2651 		return BEQ | RJ(EQUAL_FLAG) | RD(TMP_ZERO);
2652 	case SLJIT_LESS:
2653 	case SLJIT_GREATER:
2654 	case SLJIT_SIG_LESS:
2655 	case SLJIT_SIG_GREATER:
2656 	case SLJIT_OVERFLOW:
2657 	case SLJIT_CARRY:
2658 		return BEQ | RJ(OTHER_FLAG) | RD(TMP_ZERO);
2659 	case SLJIT_GREATER_EQUAL:
2660 	case SLJIT_LESS_EQUAL:
2661 	case SLJIT_SIG_GREATER_EQUAL:
2662 	case SLJIT_SIG_LESS_EQUAL:
2663 	case SLJIT_NOT_OVERFLOW:
2664 	case SLJIT_NOT_CARRY:
2665 		return BNE | RJ(OTHER_FLAG) | RD(TMP_ZERO);
2666 	case SLJIT_F_EQUAL:
2667 	case SLJIT_ORDERED_EQUAL:
2668 	case SLJIT_F_LESS:
2669 	case SLJIT_ORDERED_LESS:
2670 	case SLJIT_ORDERED_GREATER:
2671 	case SLJIT_UNORDERED_OR_GREATER:
2672 	case SLJIT_F_GREATER:
2673 	case SLJIT_UNORDERED_OR_LESS:
2674 	case SLJIT_UNORDERED_OR_EQUAL:
2675 	case SLJIT_UNORDERED:
2676 		return BEQ | RJ(OTHER_FLAG) | RD(TMP_ZERO);
2677 	case SLJIT_ORDERED_NOT_EQUAL:
2678 	case SLJIT_ORDERED_LESS_EQUAL:
2679 	case SLJIT_ORDERED_GREATER_EQUAL:
2680 	case SLJIT_F_NOT_EQUAL:
2681 	case SLJIT_UNORDERED_OR_NOT_EQUAL:
2682 	case SLJIT_UNORDERED_OR_GREATER_EQUAL:
2683 	case SLJIT_UNORDERED_OR_LESS_EQUAL:
2684 	case SLJIT_F_LESS_EQUAL:
2685 	case SLJIT_F_GREATER_EQUAL:
2686 	case SLJIT_ORDERED:
2687 		return BNE | RJ(OTHER_FLAG) | RD(TMP_ZERO);
2688 	default:
2689 		/* Not conditional branch. */
2690 		return 0;
2691 	}
2692 }
2693 
sljit_emit_jump(struct sljit_compiler * compiler,sljit_s32 type)2694 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
2695 {
2696 	struct sljit_jump *jump;
2697 	sljit_ins inst;
2698 
2699 	CHECK_ERROR_PTR();
2700 	CHECK_PTR(check_sljit_emit_jump(compiler, type));
2701 
2702 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2703 	PTR_FAIL_IF(!jump);
2704 	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2705 	type &= 0xff;
2706 
2707 	inst = get_jump_instruction(type);
2708 
2709 	if (inst != 0) {
2710 		PTR_FAIL_IF(push_inst(compiler, inst));
2711 		jump->flags |= IS_COND;
2712 	}
2713 
2714 	jump->addr = compiler->size;
2715 	inst = JIRL | RJ(TMP_REG1) | IMM_I16(0);
2716 
2717 	if (type >= SLJIT_FAST_CALL) {
2718 		jump->flags |= IS_CALL;
2719 		inst |= RD(RETURN_ADDR_REG);
2720 	}
2721 
2722 	PTR_FAIL_IF(push_inst(compiler, inst));
2723 
2724 	/* Maximum number of instructions required for generating a constant. */
2725 	compiler->size += JUMP_MAX_SIZE - 1;
2726 	return jump;
2727 }
2728 
sljit_emit_call(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types)2729 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
2730 	sljit_s32 arg_types)
2731 {
2732 	SLJIT_UNUSED_ARG(arg_types);
2733 	CHECK_ERROR_PTR();
2734 	CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
2735 
2736 	if (type & SLJIT_CALL_RETURN) {
2737 		PTR_FAIL_IF(emit_stack_frame_release(compiler, 0));
2738 		type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
2739 	}
2740 
2741 	SLJIT_SKIP_CHECKS(compiler);
2742 	return sljit_emit_jump(compiler, type);
2743 }
2744 
sljit_emit_cmp(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2745 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type,
2746 	sljit_s32 src1, sljit_sw src1w,
2747 	sljit_s32 src2, sljit_sw src2w)
2748 {
2749 	struct sljit_jump *jump;
2750 	sljit_s32 flags;
2751 	sljit_ins inst;
2752 	sljit_s32 src2_tmp_reg = FAST_IS_REG(src1) ? TMP_REG1 : TMP_REG2;
2753 
2754 	CHECK_ERROR_PTR();
2755 	CHECK_PTR(check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w));
2756 	ADJUST_LOCAL_OFFSET(src1, src1w);
2757 	ADJUST_LOCAL_OFFSET(src2, src2w);
2758 
2759 	compiler->cache_arg = 0;
2760 	compiler->cache_argw = 0;
2761 
2762 	flags = ((type & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA;
2763 
2764 	if (src1 & SLJIT_MEM) {
2765 		PTR_FAIL_IF(emit_op_mem2(compiler, flags, TMP_REG1, src1, src1w, src2, src2w));
2766 		src1 = TMP_REG1;
2767 	}
2768 
2769 	if (src2 & SLJIT_MEM) {
2770 		PTR_FAIL_IF(emit_op_mem2(compiler, flags, src2_tmp_reg, src2, src2w, 0, 0));
2771 		src2 = src2_tmp_reg;
2772 	}
2773 
2774 	if (src1 == SLJIT_IMM) {
2775 		if (src1w != 0) {
2776 			PTR_FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
2777 			src1 = TMP_REG1;
2778 		}
2779 		else
2780 			src1 = TMP_ZERO;
2781 	}
2782 
2783 	if (src2 == SLJIT_IMM) {
2784 		if (src2w != 0) {
2785 			PTR_FAIL_IF(load_immediate(compiler, src2_tmp_reg, src2w));
2786 			src2 = src2_tmp_reg;
2787 		}
2788 		else
2789 			src2 = TMP_ZERO;
2790 	}
2791 
2792 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2793 	PTR_FAIL_IF(!jump);
2794 	set_jump(jump, compiler, (sljit_u32)((type & SLJIT_REWRITABLE_JUMP) | IS_COND));
2795 	type &= 0xff;
2796 
2797 	switch (type) {
2798 	case SLJIT_EQUAL:
2799 		inst = BNE | RJ(src1) | RD(src2);
2800 		break;
2801 	case SLJIT_NOT_EQUAL:
2802 		inst = BEQ | RJ(src1) | RD(src2);
2803 		break;
2804 	case SLJIT_LESS:
2805 		inst = BGEU | RJ(src1) | RD(src2);
2806 		break;
2807 	case SLJIT_GREATER_EQUAL:
2808 		inst = BLTU | RJ(src1) | RD(src2);
2809 		break;
2810 	case SLJIT_GREATER:
2811 		inst = BGEU | RJ(src2) | RD(src1);
2812 		break;
2813 	case SLJIT_LESS_EQUAL:
2814 		inst = BLTU | RJ(src2) | RD(src1);
2815 		break;
2816 	case SLJIT_SIG_LESS:
2817 		inst = BGE | RJ(src1) | RD(src2);
2818 		break;
2819 	case SLJIT_SIG_GREATER_EQUAL:
2820 		inst = BLT | RJ(src1) | RD(src2);
2821 		break;
2822 	case SLJIT_SIG_GREATER:
2823 		inst = BGE | RJ(src2) | RD(src1);
2824 		break;
2825 	case SLJIT_SIG_LESS_EQUAL:
2826 		inst = BLT | RJ(src2) | RD(src1);
2827 		break;
2828 	default:
2829 		inst = BREAK;
2830 		SLJIT_UNREACHABLE();
2831 	}
2832 
2833 	PTR_FAIL_IF(push_inst(compiler, inst));
2834 
2835 	jump->addr = compiler->size;
2836 	PTR_FAIL_IF(push_inst(compiler, JIRL | RD(TMP_ZERO) | RJ(TMP_REG1) | IMM_I12(0)));
2837 
2838 	/* Maximum number of instructions required for generating a constant. */
2839 	compiler->size += JUMP_MAX_SIZE - 1;
2840 
2841 	return jump;
2842 }
2843 
sljit_emit_ijump(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 src,sljit_sw srcw)2844 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
2845 {
2846 	struct sljit_jump *jump;
2847 
2848 	CHECK_ERROR();
2849 	CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
2850 
2851 	if (src != SLJIT_IMM) {
2852 		if (src & SLJIT_MEM) {
2853 			ADJUST_LOCAL_OFFSET(src, srcw);
2854 			FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw));
2855 			src = TMP_REG1;
2856 		}
2857 		return push_inst(compiler, JIRL | RD((type >= SLJIT_FAST_CALL) ? RETURN_ADDR_REG : TMP_ZERO) | RJ(src) | IMM_I12(0));
2858 	}
2859 
2860 	/* These jumps are converted to jump/call instructions when possible. */
2861 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2862 	FAIL_IF(!jump);
2863 	set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_CALL : 0));
2864 	jump->u.target = (sljit_uw)srcw;
2865 
2866 	jump->addr = compiler->size;
2867 	FAIL_IF(push_inst(compiler, JIRL | RD((type >= SLJIT_FAST_CALL) ? RETURN_ADDR_REG : TMP_ZERO) | RJ(TMP_REG1) | IMM_I12(0)));
2868 
2869 	/* Maximum number of instructions required for generating a constant. */
2870 	compiler->size += JUMP_MAX_SIZE - 1;
2871 
2872 	return SLJIT_SUCCESS;
2873 }
2874 
sljit_emit_icall(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)2875 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
2876 	sljit_s32 arg_types,
2877 	sljit_s32 src, sljit_sw srcw)
2878 {
2879 	SLJIT_UNUSED_ARG(arg_types);
2880 	CHECK_ERROR();
2881 	CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
2882 
2883 	if (src & SLJIT_MEM) {
2884 		ADJUST_LOCAL_OFFSET(src, srcw);
2885 		FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw));
2886 		src = TMP_REG1;
2887 	}
2888 
2889 	if (type & SLJIT_CALL_RETURN) {
2890 		if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
2891 			FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(src) | IMM_I12(0)));
2892 			src = TMP_REG1;
2893 		}
2894 
2895 		FAIL_IF(emit_stack_frame_release(compiler, 0));
2896 		type = SLJIT_JUMP;
2897 	}
2898 
2899 	SLJIT_SKIP_CHECKS(compiler);
2900 	return sljit_emit_ijump(compiler, type, src, srcw);
2901 }
2902 
sljit_emit_op_flags(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 type)2903 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
2904 	sljit_s32 dst, sljit_sw dstw,
2905 	sljit_s32 type)
2906 {
2907 	sljit_s32 src_r, dst_r, invert;
2908 	sljit_s32 saved_op = op;
2909 	sljit_s32 mem_type = ((op & SLJIT_32) || op == SLJIT_MOV32) ? (INT_DATA | SIGNED_DATA) : WORD_DATA;
2910 
2911 	CHECK_ERROR();
2912 	CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
2913 	ADJUST_LOCAL_OFFSET(dst, dstw);
2914 
2915 	op = GET_OPCODE(op);
2916 	dst_r = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2;
2917 
2918 	compiler->cache_arg = 0;
2919 	compiler->cache_argw = 0;
2920 
2921 	if (op >= SLJIT_ADD && (dst & SLJIT_MEM))
2922 		FAIL_IF(emit_op_mem2(compiler, mem_type | LOAD_DATA, TMP_REG1, dst, dstw, dst, dstw));
2923 
2924 	if (type < SLJIT_F_EQUAL) {
2925 		src_r = OTHER_FLAG;
2926 		invert = type & 0x1;
2927 
2928 		switch (type) {
2929 		case SLJIT_EQUAL:
2930 		case SLJIT_NOT_EQUAL:
2931 			FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RJ(EQUAL_FLAG) | IMM_I12(1)));
2932 			src_r = dst_r;
2933 			break;
2934 		case SLJIT_ATOMIC_STORED:
2935 		case SLJIT_ATOMIC_NOT_STORED:
2936 			FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RJ(EQUAL_FLAG) | IMM_I12(1)));
2937 			src_r = dst_r;
2938 			invert ^= 0x1;
2939 			break;
2940 		case SLJIT_OVERFLOW:
2941 		case SLJIT_NOT_OVERFLOW:
2942 			if (compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) {
2943 				src_r = OTHER_FLAG;
2944 				break;
2945 			}
2946 			FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RJ(OTHER_FLAG) | IMM_I12(1)));
2947 			src_r = dst_r;
2948 			invert ^= 0x1;
2949 			break;
2950 		}
2951 	} else {
2952 		invert = 0;
2953 		src_r = OTHER_FLAG;
2954 
2955 		switch (type) {
2956 		case SLJIT_ORDERED_NOT_EQUAL:
2957 		case SLJIT_ORDERED_LESS_EQUAL:
2958 		case SLJIT_ORDERED_GREATER_EQUAL:
2959 		case SLJIT_F_NOT_EQUAL:
2960 		case SLJIT_UNORDERED_OR_NOT_EQUAL:
2961 		case SLJIT_UNORDERED_OR_GREATER_EQUAL:
2962 		case SLJIT_UNORDERED_OR_LESS_EQUAL:
2963 		case SLJIT_F_LESS_EQUAL:
2964 		case SLJIT_F_GREATER_EQUAL:
2965 		case SLJIT_ORDERED:
2966 			invert = 1;
2967 			break;
2968 		}
2969 	}
2970 
2971 	if (invert) {
2972 		FAIL_IF(push_inst(compiler, XORI | RD(dst_r) | RJ(src_r) | IMM_I12(1)));
2973 		src_r = dst_r;
2974 	}
2975 
2976 	if (op < SLJIT_ADD) {
2977 		if (dst & SLJIT_MEM)
2978 			return emit_op_mem(compiler, mem_type, src_r, dst, dstw);
2979 
2980 		if (src_r != dst_r)
2981 			return push_inst(compiler, ADDI_D | RD(dst_r) | RJ(src_r) | IMM_I12(0));
2982 		return SLJIT_SUCCESS;
2983 	}
2984 
2985 	mem_type |= CUMULATIVE_OP | IMM_OP | ALT_KEEP_CACHE;
2986 
2987 	if (dst & SLJIT_MEM)
2988 		return emit_op(compiler, saved_op, mem_type, dst, dstw, TMP_REG1, 0, src_r, 0);
2989 	return emit_op(compiler, saved_op, mem_type, dst, dstw, dst, dstw, src_r, 0);
2990 }
2991 
sljit_emit_select(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_reg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2_reg)2992 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
2993 	sljit_s32 dst_reg,
2994 	sljit_s32 src1, sljit_sw src1w,
2995 	sljit_s32 src2_reg)
2996 {
2997 	sljit_ins *ptr;
2998 	sljit_uw size;
2999 	sljit_s32 inp_flags = ((type & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA;
3000 
3001 	CHECK_ERROR();
3002 	CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
3003 	ADJUST_LOCAL_OFFSET(src1, src1w);
3004 
3005 	if (dst_reg != src2_reg) {
3006 		if (dst_reg == src1) {
3007 			src1 = src2_reg;
3008 			src1w = 0;
3009 			type ^= 0x1;
3010 		} else {
3011 			if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) {
3012 				FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(dst_reg) | IMM_I12(0)));
3013 
3014 				if ((src1 & REG_MASK) == dst_reg)
3015 					src1 = (src1 & ~REG_MASK) | TMP_REG1;
3016 
3017 				if (OFFS_REG(src1) == dst_reg)
3018 					src1 = (src1 & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG1);
3019 			}
3020 
3021 			FAIL_IF(push_inst(compiler, ADDI_D | RD(dst_reg) | RJ(src2_reg) | IMM_I12(0)));
3022 		}
3023 	}
3024 
3025 	size = compiler->size;
3026 
3027 	ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
3028 	FAIL_IF(!ptr);
3029 	compiler->size++;
3030 
3031 	if (src1 & SLJIT_MEM) {
3032 		FAIL_IF(emit_op_mem(compiler, inp_flags, dst_reg, src1, src1w));
3033 	} else if (src1 == SLJIT_IMM) {
3034 		if (type & SLJIT_32)
3035 			src1w = (sljit_s32)src1w;
3036 		FAIL_IF(load_immediate(compiler, dst_reg, src1w));
3037 	} else
3038 		FAIL_IF(push_inst(compiler, ADDI_D | RD(dst_reg) | RJ(src1) | IMM_I12(0)));
3039 
3040 	*ptr = get_jump_instruction(type & ~SLJIT_32) | IMM_I16(compiler->size - size);
3041 	return SLJIT_SUCCESS;
3042 }
3043 
sljit_emit_fselect(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_freg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2_freg)3044 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
3045 	sljit_s32 dst_freg,
3046 	sljit_s32 src1, sljit_sw src1w,
3047 	sljit_s32 src2_freg)
3048 {
3049 	sljit_s32 invert = 0;
3050 
3051 	CHECK_ERROR();
3052 	CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
3053 
3054 	ADJUST_LOCAL_OFFSET(src1, src1w);
3055 
3056 	if ((type & ~SLJIT_32) == SLJIT_EQUAL || (type & ~SLJIT_32) == SLJIT_NOT_EQUAL) {
3057 		if ((type & ~SLJIT_32) == SLJIT_EQUAL)
3058 			invert = 1;
3059 		FAIL_IF(push_inst(compiler, MOVGR2CF | FCD(F_OTHER_FLAG) | RJ(EQUAL_FLAG)));
3060 	} else {
3061 		if (get_jump_instruction(type & ~SLJIT_32) == (BNE | RJ(OTHER_FLAG) | RD(TMP_ZERO)))
3062 			invert = 1;
3063 		FAIL_IF(push_inst(compiler, MOVGR2CF | FCD(F_OTHER_FLAG) | RJ(OTHER_FLAG)));
3064 	}
3065 
3066 	if (src1 & SLJIT_MEM) {
3067 		FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(type) | LOAD_DATA, TMP_FREG2, src1, src1w));
3068 		if (invert)
3069 			return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(TMP_FREG2) | FRK(src2_freg) | FCA(F_OTHER_FLAG));
3070 		return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(src2_freg) | FRK(TMP_FREG2) | FCA(F_OTHER_FLAG));
3071 	} else {
3072 		if (invert)
3073 			return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(src1) | FRK(src2_freg) | FCA(F_OTHER_FLAG));
3074 		return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(src2_freg) | FRK(src1) | FCA(F_OTHER_FLAG));
3075 	}
3076 }
3077 
3078 #undef FLOAT_DATA
3079 
sljit_emit_mem(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)3080 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
3081 	sljit_s32 reg,
3082 	sljit_s32 mem, sljit_sw memw)
3083 {
3084 	sljit_s32 flags;
3085 
3086 	CHECK_ERROR();
3087 	CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
3088 
3089 	if (!(reg & REG_PAIR_MASK))
3090 		return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
3091 
3092 	if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
3093 		memw &= 0x3;
3094 
3095 		if (SLJIT_UNLIKELY(memw != 0)) {
3096 			FAIL_IF(push_inst(compiler, SLLI_D | RD(TMP_REG1) | RJ(OFFS_REG(mem)) | IMM_I12(memw)));
3097 			FAIL_IF(push_inst(compiler, ADD_D| RD(TMP_REG1) | RJ(TMP_REG1) | RK(mem & REG_MASK)));
3098 		} else
3099 			FAIL_IF(push_inst(compiler, ADD_D| RD(TMP_REG1) | RJ(mem & REG_MASK) | RK(OFFS_REG(mem))));
3100 
3101 		mem = TMP_REG1;
3102 		memw = 0;
3103 	} else if (memw > I12_MAX - SSIZE_OF(sw) || memw < I12_MIN) {
3104 		if (((memw + 0x800) & 0xfff) <= 0xfff - SSIZE_OF(sw)) {
3105 			FAIL_IF(load_immediate(compiler, TMP_REG1, TO_ARGW_HI(memw)));
3106 			memw &= 0xfff;
3107 		} else {
3108 			FAIL_IF(load_immediate(compiler, TMP_REG1, memw));
3109 			memw = 0;
3110 		}
3111 
3112 		if (mem & REG_MASK)
3113 			FAIL_IF(push_inst(compiler, ADD_D| RD(TMP_REG1) | RJ(TMP_REG1) | RK(mem & REG_MASK)));
3114 
3115 		mem = TMP_REG1;
3116 	} else {
3117 		mem &= REG_MASK;
3118 		memw &= 0xfff;
3119 	}
3120 
3121 	SLJIT_ASSERT((memw >= 0 && memw <= I12_MAX - SSIZE_OF(sw)) || (memw > I12_MAX && memw <= 0xfff));
3122 
3123 	if (!(type & SLJIT_MEM_STORE) && mem == REG_PAIR_FIRST(reg)) {
3124 		FAIL_IF(push_mem_inst(compiler, WORD_DATA | LOAD_DATA, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), (memw + SSIZE_OF(sw)) & 0xfff));
3125 		return push_mem_inst(compiler, WORD_DATA | LOAD_DATA, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw);
3126 	}
3127 
3128 	flags = WORD_DATA | (!(type & SLJIT_MEM_STORE) ? LOAD_DATA : 0);
3129 
3130 	FAIL_IF(push_mem_inst(compiler, flags, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw));
3131 	return push_mem_inst(compiler, flags, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), (memw + SSIZE_OF(sw)) & 0xfff);
3132 }
3133 
3134 #undef TO_ARGW_HI
3135 
sljit_emit_simd_mem_offset(struct sljit_compiler * compiler,sljit_s32 * mem_ptr,sljit_sw memw)3136 static sljit_s32 sljit_emit_simd_mem_offset(struct sljit_compiler *compiler, sljit_s32 *mem_ptr, sljit_sw memw)
3137 {
3138 	sljit_s32 mem = *mem_ptr;
3139 
3140 	if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
3141 		*mem_ptr = TMP_REG3;
3142 		FAIL_IF(push_inst(compiler, SLLI_D | RD(TMP_REG3) | RJ(OFFS_REG(mem)) | IMM_I12(memw & 0x3)));
3143 		return push_inst(compiler, ADD_D | RD(TMP_REG3) | RJ(TMP_REG3) | RK(mem & REG_MASK));
3144 	}
3145 
3146 	if (!(mem & REG_MASK)) {
3147 		*mem_ptr = TMP_REG3;
3148 		return load_immediate(compiler, TMP_REG3, memw);
3149 	}
3150 
3151 	mem &= REG_MASK;
3152 
3153 	if (memw == 0) {
3154 		*mem_ptr = mem;
3155 		return SLJIT_SUCCESS;
3156 	}
3157 
3158 	*mem_ptr = TMP_REG3;
3159 
3160 	FAIL_IF(load_immediate(compiler, TMP_REG3, memw));
3161 	return push_inst(compiler, ADD_D | RD(TMP_REG3) | RJ(TMP_REG3) | RK(mem));
3162 }
3163 
sljit_emit_simd_mov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 srcdst,sljit_sw srcdstw)3164 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
3165 	sljit_s32 freg,
3166 	sljit_s32 srcdst, sljit_sw srcdstw)
3167 {
3168 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3169 	sljit_ins ins = 0;
3170 
3171 	CHECK_ERROR();
3172 	CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw));
3173 
3174 	ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3175 
3176 	if (reg_size != 5 && reg_size != 4)
3177 		return SLJIT_ERR_UNSUPPORTED;
3178 
3179 	if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))
3180 		return SLJIT_ERR_UNSUPPORTED;
3181 
3182 	if (type & SLJIT_SIMD_TEST)
3183 		return SLJIT_SUCCESS;
3184 
3185 	if (!(srcdst & SLJIT_MEM)) {
3186 		if (type & SLJIT_SIMD_STORE)
3187 			ins = FRD(srcdst) | FRJ(freg) | FRK(freg);
3188 		else
3189 			ins = FRD(freg) | FRJ(srcdst) | FRK(srcdst);
3190 
3191 		if (reg_size == 5)
3192 			ins |= VOR_V | (sljit_ins)1 << 26;
3193 		else
3194 			ins |= VOR_V;
3195 
3196 		return push_inst(compiler, ins);
3197 	}
3198 
3199 	ins = (type & SLJIT_SIMD_STORE) ? VST : VLD;
3200 
3201 	if (reg_size == 5)
3202 		ins = (type & SLJIT_SIMD_STORE) ? XVST : XVLD;
3203 
3204 	if (FAST_IS_REG(srcdst) && srcdst >= 0 && (srcdstw >= I12_MIN && srcdstw <= I12_MAX))
3205 		return push_inst(compiler, ins | FRD(freg) | RJ((sljit_u8)srcdst) | IMM_I12(srcdstw));
3206 	else {
3207 		FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
3208 		return push_inst(compiler, ins | FRD(freg) | RJ(srcdst) | IMM_I12(0));
3209 	}
3210 }
3211 
sljit_emit_simd_replicate(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_sw srcw)3212 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
3213 	sljit_s32 freg,
3214 	sljit_s32 src, sljit_sw srcw)
3215 {
3216 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3217 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3218 	sljit_ins ins = 0;
3219 
3220 	CHECK_ERROR();
3221 	CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw));
3222 
3223 	ADJUST_LOCAL_OFFSET(src, srcw);
3224 
3225 	if (reg_size != 5 && reg_size != 4)
3226 		return SLJIT_ERR_UNSUPPORTED;
3227 
3228 	if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))
3229 		return SLJIT_ERR_UNSUPPORTED;
3230 
3231 	if (type & SLJIT_SIMD_TEST)
3232 		return SLJIT_SUCCESS;
3233 
3234 	if (src & SLJIT_MEM) {
3235 		FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
3236 
3237 		if (reg_size == 5)
3238 			ins = (sljit_ins)1 << 25;
3239 
3240 		return push_inst(compiler, VLDREPL | ins | FRD(freg) | RJ(src) | (sljit_ins)1 << (23 - elem_size));
3241 	}
3242 
3243 	if (reg_size == 5)
3244 		ins = (sljit_ins)1 << 26;
3245 
3246 	if (type & SLJIT_SIMD_FLOAT) {
3247 		if (src == SLJIT_IMM)
3248 			return push_inst(compiler, VREPLGR2VR | ins | FRD(freg) | RJ(TMP_ZERO) | (sljit_ins)elem_size << 10);
3249 
3250 		FAIL_IF(push_inst(compiler, VREPLVE | ins | FRD(freg) | FRJ(src) | RK(TMP_ZERO) | (sljit_ins)elem_size << 15));
3251 
3252 		if (reg_size == 5) {
3253 			ins = (sljit_ins)(0x44 << 10);
3254 			return push_inst(compiler, XVPERMI | ins | FRD(freg) | FRJ(freg));
3255 		}
3256 
3257 		return SLJIT_SUCCESS;
3258 	}
3259 
3260 	ins |= VREPLGR2VR | (sljit_ins)elem_size << 10;
3261 
3262 	if (src == SLJIT_IMM) {
3263 		FAIL_IF(load_immediate(compiler, TMP_REG2, srcw));
3264 		src = TMP_REG2;
3265 	}
3266 
3267 	return push_inst(compiler, ins | FRD(freg) | RJ(src));
3268 }
3269 
sljit_emit_simd_lane_mov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 lane_index,sljit_s32 srcdst,sljit_sw srcdstw)3270 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
3271 	sljit_s32 freg, sljit_s32 lane_index,
3272 	sljit_s32 srcdst, sljit_sw srcdstw)
3273 {
3274 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3275 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3276 	sljit_ins ins = 0;
3277 
3278 	CHECK_ERROR();
3279 	CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw));
3280 
3281 	ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3282 
3283 	if (reg_size != 5 && reg_size != 4)
3284 		return SLJIT_ERR_UNSUPPORTED;
3285 
3286 	if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))
3287 		return SLJIT_ERR_UNSUPPORTED;
3288 
3289 	if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3290 		return SLJIT_ERR_UNSUPPORTED;
3291 
3292 	if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3293 		return SLJIT_ERR_UNSUPPORTED;
3294 
3295 	if (type & SLJIT_SIMD_TEST)
3296 		return SLJIT_SUCCESS;
3297 
3298 	if (type & SLJIT_SIMD_LANE_ZERO) {
3299 		ins = (reg_size == 5) ? ((sljit_ins)1 << 26) : 0;
3300 
3301 		if ((type & SLJIT_SIMD_FLOAT) && freg == srcdst) {
3302 			FAIL_IF(push_inst(compiler, VOR_V | ins | FRD(TMP_FREG1) | FRJ(freg) | FRK(freg)));
3303 			srcdst = TMP_FREG1;
3304 			srcdstw = 0;
3305 		}
3306 
3307 		FAIL_IF(push_inst(compiler, VXOR_V | ins | FRD(freg) | FRJ(freg) | FRK(freg)));
3308 	}
3309 
3310 	if (srcdst & SLJIT_MEM) {
3311 		FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
3312 
3313 		if (reg_size == 5)
3314 			ins = (sljit_ins)1 << 25;
3315 
3316 		if (type & SLJIT_SIMD_STORE) {
3317 			ins |= (sljit_ins)lane_index << 18 | (sljit_ins)(1 << (23 - elem_size));
3318 			return push_inst(compiler, VSTELM | ins | FRD(freg) | RJ(srcdst));
3319 		} else {
3320 			emit_op_mem(compiler, (elem_size == 3 ? WORD_DATA : (elem_size == 2 ? INT_DATA : (elem_size == 1 ? HALF_DATA : BYTE_DATA))) | LOAD_DATA, TMP_REG1, srcdst | SLJIT_MEM, 0);
3321 			srcdst = TMP_REG1;
3322 			ins = (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10;
3323 
3324 			if (reg_size == 5) {
3325 				if (elem_size < 2) {
3326 					FAIL_IF(push_inst(compiler, VOR_V | (sljit_ins)1 << 26 | FRD(TMP_FREG1) | FRJ(freg) | FRK(freg)));
3327 					if (lane_index >= (2 << (3 - elem_size))) {
3328 						FAIL_IF(push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(TMP_FREG1) | FRJ(freg) | IMM_I8(1)));
3329 						FAIL_IF(push_inst(compiler, VINSGR2VR | ins | FRD(TMP_FREG1) | RJ(srcdst) | IMM_V(lane_index % (2 << (3 - elem_size)))));
3330 						return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(freg) | FRJ(TMP_FREG1) | IMM_I8(2));
3331 					} else {
3332 						FAIL_IF(push_inst(compiler, VINSGR2VR | ins | FRD(freg) | RJ(srcdst) | IMM_V(lane_index)));
3333 						return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(freg) | FRJ(TMP_FREG1) | IMM_I8(18));
3334 					}
3335 				} else
3336 					ins = (sljit_ins)(0x3f ^ (0x3f >> elem_size)) << 10 | (sljit_ins)1 << 26;
3337 			}
3338 
3339 			return push_inst(compiler, VINSGR2VR | ins | FRD(freg) | RJ(srcdst) | IMM_V(lane_index));
3340 		}
3341 	}
3342 
3343 	if (type & SLJIT_SIMD_FLOAT) {
3344 		ins = (reg_size == 5) ? (sljit_ins)(0x3f ^ (0x3f >> elem_size)) << 10 | (sljit_ins)1 << 26 : (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10;
3345 
3346 		if (type & SLJIT_SIMD_STORE) {
3347 			FAIL_IF(push_inst(compiler, VPICKVE2GR_U | ins | RD(TMP_REG1) | FRJ(freg) | IMM_V(lane_index)));
3348 			return push_inst(compiler, VINSGR2VR | ins | FRD(srcdst) | RJ(TMP_REG1) | IMM_V(0));
3349 		} else {
3350 			FAIL_IF(push_inst(compiler, VPICKVE2GR_U | ins | RD(TMP_REG1) | FRJ(srcdst) | IMM_V(0)));
3351 			return push_inst(compiler, VINSGR2VR | ins | FRD(freg) | RJ(TMP_REG1) | IMM_V(lane_index));
3352 		}
3353 	}
3354 
3355 	if (srcdst == SLJIT_IMM) {
3356 		FAIL_IF(load_immediate(compiler, TMP_REG1, srcdstw));
3357 		srcdst = TMP_REG1;
3358 	}
3359 
3360 	if (type & SLJIT_SIMD_STORE) {
3361 		ins = (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10;
3362 
3363 		if (type & SLJIT_SIMD_LANE_SIGNED)
3364 			ins |= (sljit_ins)(VPICKVE2GR_U ^ (0x7 << 18));
3365 		else
3366 			ins |= VPICKVE2GR_U;
3367 
3368 		if (reg_size == 5) {
3369 			if (elem_size < 2) {
3370 				if (lane_index >= (2 << (3 - elem_size))) {
3371 					if (type & SLJIT_SIMD_LANE_SIGNED)
3372 						ins |= (sljit_ins)(VPICKVE2GR_U ^ (0x7 << 18));
3373 					else
3374 						ins |= VPICKVE2GR_U;
3375 
3376 					FAIL_IF(push_inst(compiler, VOR_V | (sljit_ins)1 << 26 | FRD(TMP_FREG1) | FRJ(freg) | FRK(freg)));
3377 					FAIL_IF(push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(TMP_FREG1) | FRJ(freg) | IMM_I8(1)));
3378 					return push_inst(compiler, ins | RD(srcdst) | FRJ(TMP_FREG1) | IMM_V(lane_index % (2 << (3 - elem_size))));
3379 				}
3380 			} else {
3381 				ins ^= (sljit_ins)1 << (15 - elem_size);
3382 				ins |= (sljit_ins)1 << 26;
3383 			}
3384 		}
3385 
3386 		return push_inst(compiler, ins | RD(srcdst) | FRJ(freg) | IMM_V(lane_index));
3387 	} else {
3388 		ins = (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10;
3389 
3390 		if (reg_size == 5) {
3391 			if (elem_size < 2) {
3392 				FAIL_IF(push_inst(compiler, VOR_V | (sljit_ins)1 << 26 | FRD(TMP_FREG1) | FRJ(freg) | FRK(freg)));
3393 				if (lane_index >= (2 << (3 - elem_size))) {
3394 					FAIL_IF(push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(TMP_FREG1) | FRJ(freg) | IMM_I8(1)));
3395 					FAIL_IF(push_inst(compiler, VINSGR2VR | ins | FRD(TMP_FREG1) | RJ(srcdst) | IMM_V(lane_index % (2 << (3 - elem_size)))));
3396 					return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(freg) | FRJ(TMP_FREG1) | IMM_I8(2));
3397 				} else {
3398 					FAIL_IF(push_inst(compiler, VINSGR2VR | ins | FRD(freg) | RJ(srcdst) | IMM_V(lane_index)));
3399 					return push_inst(compiler, XVPERMI | (sljit_ins)1 << 18 | FRD(freg) | FRJ(TMP_FREG1) | IMM_I8(18));
3400 				}
3401 			} else
3402 				ins = (sljit_ins)(0x3f ^ (0x3f >> elem_size)) << 10 | (sljit_ins)1 << 26;
3403 		}
3404 
3405 		return push_inst(compiler, VINSGR2VR | ins | FRD(freg) | RJ(srcdst) | IMM_V(lane_index));
3406 	}
3407 
3408 	return SLJIT_ERR_UNSUPPORTED;
3409 }
3410 
sljit_emit_simd_lane_replicate(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_s32 src_lane_index)3411 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
3412 	sljit_s32 freg,
3413 	sljit_s32 src, sljit_s32 src_lane_index)
3414 {
3415 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3416 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3417 	sljit_ins ins = 0;
3418 
3419 	CHECK_ERROR();
3420 	CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index));
3421 
3422 	if (reg_size != 5 && reg_size != 4)
3423 		return SLJIT_ERR_UNSUPPORTED;
3424 
3425 	if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))
3426 		return SLJIT_ERR_UNSUPPORTED;
3427 
3428 	if (type & SLJIT_SIMD_TEST)
3429 		return SLJIT_SUCCESS;
3430 
3431 	ins = (sljit_ins)(0x3f ^ (0x1f >> elem_size)) << 10;
3432 
3433 	if (reg_size == 5) {
3434 		FAIL_IF(push_inst(compiler, VREPLVEI | (sljit_ins)1 << 26 | ins | FRD(freg) | FRJ(src) | IMM_V(src_lane_index % (2 << (3 - elem_size)))));
3435 
3436 		ins = (src_lane_index < (2 << (3 - elem_size))) ? (sljit_ins)(0x44 << 10) : (sljit_ins)(0xee << 10);
3437 
3438 		return push_inst(compiler, XVPERMI | ins | FRD(freg) | FRJ(freg));
3439 	}
3440 
3441 	return push_inst(compiler, VREPLVEI | ins | FRD(freg) | FRJ(src) | IMM_V(src_lane_index));
3442 }
3443 
sljit_emit_simd_extend(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_sw srcw)3444 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
3445 	sljit_s32 freg,
3446 	sljit_s32 src, sljit_sw srcw)
3447 {
3448 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3449 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3450 	sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
3451 	sljit_ins ins = 0;
3452 
3453 	CHECK_ERROR();
3454 	CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw));
3455 
3456 	ADJUST_LOCAL_OFFSET(src, srcw);
3457 
3458 	if (reg_size != 5 && reg_size != 4)
3459 		return SLJIT_ERR_UNSUPPORTED;
3460 
3461 	if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))
3462 		return SLJIT_ERR_UNSUPPORTED;
3463 
3464 	if (type & SLJIT_SIMD_TEST)
3465 		return SLJIT_SUCCESS;
3466 
3467 	if (src & SLJIT_MEM) {
3468 		ins = (type & SLJIT_SIMD_STORE) ? VST : VLD;
3469 
3470 		if (reg_size == 5)
3471 			ins = (type & SLJIT_SIMD_STORE) ? XVST : XVLD;
3472 
3473 		if (FAST_IS_REG(src) && src >= 0 && (srcw >= I12_MIN && srcw <= I12_MAX))
3474 			FAIL_IF(push_inst(compiler, ins | FRD(freg) | RJ(src) | IMM_I12(srcw)));
3475 		else {
3476 			FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
3477 			FAIL_IF(push_inst(compiler, ins | FRD(freg) | RJ(src) | IMM_I12(0)));
3478 		}
3479 		src = freg;
3480 	}
3481 
3482 	if (type & SLJIT_SIMD_FLOAT) {
3483 		if (elem_size != 2 || elem2_size != 3)
3484 			return SLJIT_ERR_UNSUPPORTED;
3485 
3486 		ins = 0;
3487 		if (reg_size == 5) {
3488 			ins = (sljit_ins)1 << 26;
3489 			FAIL_IF(push_inst(compiler, XVPERMI | FRD(src) | FRJ(src) | IMM_I8(16)));
3490 		}
3491 
3492 		return push_inst(compiler, VFCVTL_D_S | ins | FRD(freg) | FRJ(src));
3493 	}
3494 
3495 	ins = (type & SLJIT_SIMD_EXTEND_SIGNED) ? VSLLWIL : (VSLLWIL | (sljit_ins)1 << 18);
3496 
3497 	if (reg_size == 5)
3498 		ins |= (sljit_ins)1 << 26;
3499 
3500 	do {
3501 		if (reg_size == 5)
3502 			FAIL_IF(push_inst(compiler, XVPERMI | FRD(src) | FRJ(src) | IMM_I8(16)));
3503 
3504 		FAIL_IF(push_inst(compiler, ins | ((sljit_ins)1 << (13 + elem_size)) | FRD(freg) | FRJ(src)));
3505 		src = freg;
3506 	} while (++elem_size < elem2_size);
3507 
3508 	return SLJIT_SUCCESS;
3509 }
3510 
sljit_emit_simd_sign(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 dst,sljit_sw dstw)3511 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
3512 	sljit_s32 freg,
3513 	sljit_s32 dst, sljit_sw dstw)
3514 {
3515 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3516 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3517 	sljit_ins ins = 0;
3518 	sljit_s32 dst_r;
3519 
3520 	CHECK_ERROR();
3521 	CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw));
3522 
3523 	ADJUST_LOCAL_OFFSET(dst, dstw);
3524 
3525 	if (reg_size != 5 && reg_size != 4)
3526 		return SLJIT_ERR_UNSUPPORTED;
3527 
3528 	if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))
3529 		return SLJIT_ERR_UNSUPPORTED;
3530 
3531 	if (elem_size > 3 || ((type & SLJIT_SIMD_FLOAT) && elem_size < 2))
3532 		return SLJIT_ERR_UNSUPPORTED;
3533 
3534 	if (type & SLJIT_SIMD_TEST)
3535 		return SLJIT_SUCCESS;
3536 
3537 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
3538 
3539 	if (reg_size == 5)
3540 		ins = (sljit_ins)1 << 26;
3541 
3542 	FAIL_IF(push_inst(compiler, VMSKLTZ | ins | (sljit_ins)(elem_size << 10) | FRD(TMP_FREG1) | FRJ(freg)));
3543 
3544 	FAIL_IF(push_inst(compiler, VPICKVE2GR_U | (sljit_ins)(0x3c << 10) | RD(dst_r) | FRJ(TMP_FREG1)));
3545 
3546 	if (reg_size == 5) {
3547 		FAIL_IF(push_inst(compiler, VPICKVE2GR_U | (sljit_ins)(0x38 << 10) | ins | RD(TMP_REG3) | FRJ(TMP_FREG1) | IMM_V(2)));
3548 		FAIL_IF(push_inst(compiler, SLLI_W | RD(TMP_REG3) | RJ(TMP_REG3) | IMM_I12(2 << (3 - elem_size))));
3549 		FAIL_IF(push_inst(compiler, OR | RD(dst_r) | RJ(dst_r) | RK(TMP_REG3)));
3550 	}
3551 
3552 	if (dst_r == TMP_REG2)
3553 		return emit_op_mem(compiler, ((type & SLJIT_32) ? INT_DATA : WORD_DATA), TMP_REG2, dst, dstw);
3554 
3555 	return SLJIT_SUCCESS;
3556 }
3557 
sljit_emit_simd_op2(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_freg,sljit_s32 src1_freg,sljit_s32 src2_freg)3558 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
3559 	sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg)
3560 {
3561 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3562 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3563 	sljit_ins ins = 0;
3564 
3565 	CHECK_ERROR();
3566 	CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg));
3567 
3568 	if (reg_size != 5 && reg_size != 4)
3569 		return SLJIT_ERR_UNSUPPORTED;
3570 
3571 	if (reg_size == 5 && !(get_cpu_features(GET_HWCAP) & LOONGARCH_HWCAP_LASX))
3572 		return SLJIT_ERR_UNSUPPORTED;
3573 
3574 	if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3575 		return SLJIT_ERR_UNSUPPORTED;
3576 
3577 	if (type & SLJIT_SIMD_TEST)
3578 		return SLJIT_SUCCESS;
3579 
3580 	switch (SLJIT_SIMD_GET_OPCODE(type)) {
3581 	case SLJIT_SIMD_OP2_AND:
3582 		ins = VAND_V;
3583 		break;
3584 	case SLJIT_SIMD_OP2_OR:
3585 		ins = VOR_V;
3586 		break;
3587 	case SLJIT_SIMD_OP2_XOR:
3588 		ins = VXOR_V;
3589 		break;
3590 	}
3591 
3592 	if (reg_size == 5)
3593 		ins |= (sljit_ins)1 << 26;
3594 
3595 	return push_inst(compiler, ins | FRD(dst_freg) | FRJ(src1_freg) | FRK(src2_freg));
3596 }
3597 
sljit_emit_atomic_load(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 mem_reg)3598 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler,
3599 	sljit_s32 op,
3600 	sljit_s32 dst_reg,
3601 	sljit_s32 mem_reg)
3602 {
3603 	sljit_ins ins;
3604 
3605 	CHECK_ERROR();
3606 	CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
3607 
3608 	switch(GET_OPCODE(op)) {
3609 	case SLJIT_MOV_U8:
3610 		ins = LD_BU;
3611 		break;
3612 	case SLJIT_MOV_U16:
3613 		ins = LD_HU;
3614 		break;
3615 	case SLJIT_MOV32:
3616 		ins = LD_W;
3617 		break;
3618 	case SLJIT_MOV_U32:
3619 		ins = LD_WU;
3620 		break;
3621 	default:
3622 		ins = LD_D;
3623 		break;
3624 	}
3625 
3626 	return push_inst(compiler, ins | RD(dst_reg) | RJ(mem_reg) | IMM_I12(0));
3627 }
3628 
sljit_emit_atomic_store(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src_reg,sljit_s32 mem_reg,sljit_s32 temp_reg)3629 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler,
3630 	sljit_s32 op,
3631 	sljit_s32 src_reg,
3632 	sljit_s32 mem_reg,
3633 	sljit_s32 temp_reg)
3634 {
3635 	sljit_ins ins = 0;
3636 	sljit_ins unsign = 0;
3637 	sljit_s32 tmp = temp_reg;
3638 
3639 	CHECK_ERROR();
3640 	CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
3641 
3642 	switch (GET_OPCODE(op)) {
3643 	case SLJIT_MOV_U8:
3644 		ins = AMCAS_B;
3645 		unsign = BSTRPICK_D | (7 << 16);
3646 		break;
3647 	case SLJIT_MOV_U16:
3648 		ins = AMCAS_H;
3649 		unsign = BSTRPICK_D | (15 << 16);
3650 		break;
3651 	case SLJIT_MOV32:
3652 		ins = AMCAS_W;
3653 		break;
3654 	case SLJIT_MOV_U32:
3655 		ins = AMCAS_W;
3656 		unsign = BSTRPICK_D | (31 << 16);
3657 		break;
3658 	default:
3659 		ins = AMCAS_D;
3660 		break;
3661 	}
3662 
3663 	if (op & SLJIT_SET_ATOMIC_STORED) {
3664 		FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RJ(temp_reg) | RK(TMP_ZERO)));
3665 		tmp = TMP_REG1;
3666 	}
3667 	FAIL_IF(push_inst(compiler, ins | RD(tmp) | RJ(mem_reg) | RK(src_reg)));
3668 	if (!(op & SLJIT_SET_ATOMIC_STORED))
3669 		return SLJIT_SUCCESS;
3670 
3671 	if (unsign)
3672 		FAIL_IF(push_inst(compiler, unsign | RD(tmp) | RJ(tmp)));
3673 
3674 	FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(tmp) | RK(temp_reg)));
3675 	return push_inst(compiler, SLTUI | RD(EQUAL_FLAG) | RJ(EQUAL_FLAG) | IMM_I12(1));
3676 }
3677 
emit_const(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw init_value,sljit_ins last_ins)3678 static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value, sljit_ins last_ins)
3679 {
3680 	SLJIT_UNUSED_ARG(last_ins);
3681 
3682 	FAIL_IF(push_inst(compiler, LU12I_W | RD(dst) | (sljit_ins)(((init_value & 0xffffffff) >> 12) << 5)));
3683 	FAIL_IF(push_inst(compiler, LU32I_D | RD(dst) | (sljit_ins)(((init_value >> 32) & 0xfffff) << 5)));
3684 	FAIL_IF(push_inst(compiler, LU52I_D | RD(dst) | RJ(dst) | (sljit_ins)(IMM_I12(init_value >> 52))));
3685 	return push_inst(compiler, ORI | RD(dst) | RJ(dst) | IMM_I12(init_value));
3686 }
3687 
sljit_set_jump_addr(sljit_uw addr,sljit_uw new_target,sljit_sw executable_offset)3688 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
3689 {
3690 	sljit_ins *inst = (sljit_ins*)addr;
3691 	SLJIT_UNUSED_ARG(executable_offset);
3692 
3693 	SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 0);
3694 
3695 	SLJIT_ASSERT((inst[0] & OPC_1RI20(0x7f)) == LU12I_W);
3696 	inst[0] = (inst[0] & (OPC_1RI20(0x7f) | 0x1f)) | (sljit_ins)(((new_target & 0xffffffff) >> 12) << 5);
3697 
3698 	SLJIT_ASSERT((inst[1] & OPC_1RI20(0x7f)) == LU32I_D);
3699 	inst[1] = (inst[1] & (OPC_1RI20(0x7f) | 0x1f)) | (sljit_ins)(sljit_ins)(((new_target >> 32) & 0xfffff) << 5);
3700 
3701 	SLJIT_ASSERT((inst[2] & OPC_2RI12(0x3ff)) == LU52I_D);
3702 	inst[2] = (inst[2] & (OPC_2RI12(0x3ff) | 0x3ff)) | IMM_I12(new_target >> 52);
3703 
3704 	SLJIT_ASSERT((inst[3] & OPC_2RI12(0x3ff)) == ORI || (inst[3] & OPC_2RI16(0x3f)) == JIRL);
3705 	if ((inst[3] & OPC_2RI12(0x3ff)) == ORI)
3706 		inst[3] = (inst[3] & (OPC_2RI12(0x3ff) | 0x3ff)) | IMM_I12(new_target);
3707 	else
3708 		inst[3] = (inst[3] & (OPC_2RI16(0x3f) | 0x3ff)) | IMM_I12((new_target & 0xfff) >> 2);
3709 
3710 	SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 1);
3711 
3712 	inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
3713 	SLJIT_CACHE_FLUSH(inst, inst + 4);
3714 }
3715 
sljit_emit_const(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_sw init_value)3716 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
3717 {
3718 	struct sljit_const *const_;
3719 	sljit_s32 dst_r;
3720 
3721 	CHECK_ERROR_PTR();
3722 	CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
3723 	ADJUST_LOCAL_OFFSET(dst, dstw);
3724 
3725 	const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
3726 	PTR_FAIL_IF(!const_);
3727 	set_const(const_, compiler);
3728 
3729 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
3730 	PTR_FAIL_IF(emit_const(compiler, dst_r, init_value, 0));
3731 
3732 	if (dst & SLJIT_MEM)
3733 		PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw));
3734 
3735 	return const_;
3736 }
3737 
sljit_emit_mov_addr(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)3738 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
3739 {
3740 	struct sljit_jump *jump;
3741 	sljit_s32 dst_r;
3742 
3743 	CHECK_ERROR_PTR();
3744 	CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw));
3745 	ADJUST_LOCAL_OFFSET(dst, dstw);
3746 
3747 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
3748 	PTR_FAIL_IF(!jump);
3749 	set_mov_addr(jump, compiler, 0);
3750 
3751 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
3752 	PTR_FAIL_IF(push_inst(compiler, (sljit_ins)dst_r));
3753 
3754 	compiler->size += JUMP_MAX_SIZE - 1;
3755 
3756 	if (dst & SLJIT_MEM)
3757 		PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw));
3758 
3759 	return jump;
3760 }
3761 
sljit_set_const(sljit_uw addr,sljit_sw new_constant,sljit_sw executable_offset)3762 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
3763 {
3764 	sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
3765 }
3766