1 /*
2  *    Stack-less Just-In-Time compiler
3  *
4  *    Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without modification, are
7  * permitted provided that the following conditions are met:
8  *
9  *   1. Redistributions of source code must retain the above copyright notice, this list of
10  *      conditions and the following disclaimer.
11  *
12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13  *      of conditions and the following disclaimer in the documentation and/or other materials
14  *      provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
sljit_get_platform_name(void)27 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
28 {
29 	return "LOONGARCH" SLJIT_CPUINFO;
30 }
31 
32 typedef sljit_u32 sljit_ins;
33 
34 #define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
35 #define TMP_REG2	(SLJIT_NUMBER_OF_REGISTERS + 3)
36 #define TMP_REG3	(SLJIT_NUMBER_OF_REGISTERS + 4)
37 #define TMP_ZERO	0
38 
39 /* Flags are kept in volatile registers. */
40 #define EQUAL_FLAG	(SLJIT_NUMBER_OF_REGISTERS + 5)
41 #define RETURN_ADDR_REG	TMP_REG2
42 #define OTHER_FLAG	(SLJIT_NUMBER_OF_REGISTERS + 6)
43 
44 #define TMP_FREG1	(SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
45 #define TMP_FREG2	(SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
46 
47 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = {
48 	0, 4, 5, 6, 7, 8, 9, 10, 11, 16, 17, 18, 19, 20, 22, 31, 30, 29, 28, 27, 26, 25, 24, 23, 3, 13, 1, 14, 12, 15
49 };
50 
51 static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
52 	0, 0, 1, 2, 3, 4, 5, 6, 7, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 31, 30, 29, 28, 27, 26, 25, 24, 8, 9
53 };
54 
55 /* --------------------------------------------------------------------- */
56 /*  Instrucion forms                                                     */
57 /* --------------------------------------------------------------------- */
58 
59 /*
60 LoongArch instructions are 32 bits wide, belonging to 9 basic instruction formats (and variants of them):
61 
62 | Format name  | Composition                 |
63 | 2R           | Opcode + Rj + Rd            |
64 | 3R           | Opcode + Rk + Rj + Rd       |
65 | 4R           | Opcode + Ra + Rk + Rj + Rd  |
66 | 2RI8         | Opcode + I8 + Rj + Rd       |
67 | 2RI12        | Opcode + I12 + Rj + Rd      |
68 | 2RI14        | Opcode + I14 + Rj + Rd      |
69 | 2RI16        | Opcode + I16 + Rj + Rd      |
70 | 1RI21        | Opcode + I21L + Rj + I21H   |
71 | I26          | Opcode + I26L + I26H        |
72 
73 Rd is the destination register operand, while Rj, Rk and Ra (“a” stands for “additional”) are the source register operands.
74 I8/I12/I14/I16/I21/I26 are immediate operands of respective width. The longer I21 and I26 are stored in separate higher and
75 lower parts in the instruction word, denoted by the “L” and “H” suffixes. */
76 
77 #define RD(rd) ((sljit_ins)reg_map[rd])
78 #define RJ(rj) ((sljit_ins)reg_map[rj] << 5)
79 #define RK(rk) ((sljit_ins)reg_map[rk] << 10)
80 #define RA(ra) ((sljit_ins)reg_map[ra] << 15)
81 
82 #define FD(fd) ((sljit_ins)reg_map[fd])
83 #define FRD(fd) ((sljit_ins)freg_map[fd])
84 #define FRJ(fj) ((sljit_ins)freg_map[fj] << 5)
85 #define FRK(fk) ((sljit_ins)freg_map[fk] << 10)
86 #define FRA(fa) ((sljit_ins)freg_map[fa] << 15)
87 
88 #define IMM_I8(imm) (((sljit_ins)(imm)&0xff) << 10)
89 #define IMM_I12(imm) (((sljit_ins)(imm)&0xfff) << 10)
90 #define IMM_I14(imm) (((sljit_ins)(imm)&0xfff3) << 10)
91 #define IMM_I16(imm) (((sljit_ins)(imm)&0xffff) << 10)
92 #define IMM_I21(imm) ((((sljit_ins)(imm)&0xffff) << 10) | (((sljit_ins)(imm) >> 16) & 0x1f))
93 #define IMM_I26(imm) ((((sljit_ins)(imm)&0xffff) << 10) | (((sljit_ins)(imm) >> 16) & 0x3ff))
94 
95 #define OPC_I26(opc) ((sljit_ins)(opc) << 26)
96 #define OPC_1RI21(opc) ((sljit_ins)(opc) << 26)
97 #define OPC_2RI16(opc) ((sljit_ins)(opc) << 26)
98 #define OPC_2RI14(opc) ((sljit_ins)(opc) << 24)
99 #define OPC_2RI12(opc) ((sljit_ins)(opc) << 22)
100 #define OPC_2RI8(opc) ((sljit_ins)(opc) << 18)
101 #define OPC_4R(opc) ((sljit_ins)(opc) << 20)
102 #define OPC_3R(opc) ((sljit_ins)(opc) << 15)
103 #define OPC_2R(opc) ((sljit_ins)(opc) << 10)
104 #define OPC_1RI20(opc) ((sljit_ins)(opc) << 25)
105 
106 /* Arithmetic operation instructions */
107 #define ADD_W OPC_3R(0x20)
108 #define ADD_D OPC_3R(0x21)
109 #define SUB_W OPC_3R(0x22)
110 #define SUB_D OPC_3R(0x23)
111 #define ADDI_W OPC_2RI12(0xa)
112 #define ADDI_D OPC_2RI12(0xb)
113 #define ANDI OPC_2RI12(0xd)
114 #define ORI OPC_2RI12(0xe)
115 #define XORI OPC_2RI12(0xf)
116 #define ADDU16I_D OPC_2RI16(0x4)
117 #define LU12I_W OPC_1RI20(0xa)
118 #define LU32I_D OPC_1RI20(0xb)
119 #define LU52I_D OPC_2RI12(0xc)
120 #define SLT OPC_3R(0x24)
121 #define SLTU OPC_3R(0x25)
122 #define SLTI OPC_2RI12(0x8)
123 #define SLTUI OPC_2RI12(0x9)
124 #define PCADDI OPC_1RI20(0xc)
125 #define PCALAU12I OPC_1RI20(0xd)
126 #define PCADDU12I OPC_1RI20(0xe)
127 #define PCADDU18I OPC_1RI20(0xf)
128 #define NOR OPC_3R(0x28)
129 #define AND OPC_3R(0x29)
130 #define OR OPC_3R(0x2a)
131 #define XOR OPC_3R(0x2b)
132 #define ORN OPC_3R(0x2c)
133 #define ANDN OPC_3R(0x2d)
134 #define MUL_W OPC_3R(0x38)
135 #define MULH_W OPC_3R(0x39)
136 #define MULH_WU OPC_3R(0x3a)
137 #define MUL_D OPC_3R(0x3b)
138 #define MULH_D OPC_3R(0x3c)
139 #define MULH_DU OPC_3R(0x3d)
140 #define MULW_D_W OPC_3R(0x3e)
141 #define MULW_D_WU OPC_3R(0x3f)
142 #define DIV_W OPC_3R(0x40)
143 #define MOD_W OPC_3R(0x41)
144 #define DIV_WU OPC_3R(0x42)
145 #define MOD_WU OPC_3R(0x43)
146 #define DIV_D OPC_3R(0x44)
147 #define MOD_D OPC_3R(0x45)
148 #define DIV_DU OPC_3R(0x46)
149 #define MOD_DU OPC_3R(0x47)
150 
151 /* Bit-shift instructions */
152 #define SLL_W OPC_3R(0x2e)
153 #define SRL_W OPC_3R(0x2f)
154 #define SRA_W OPC_3R(0x30)
155 #define SLL_D OPC_3R(0x31)
156 #define SRL_D OPC_3R(0x32)
157 #define SRA_D OPC_3R(0x33)
158 #define ROTR_W OPC_3R(0x36)
159 #define ROTR_D OPC_3R(0x37)
160 #define SLLI_W OPC_3R(0x81)
161 #define SLLI_D ((sljit_ins)(0x41) << 16)
162 #define SRLI_W OPC_3R(0x89)
163 #define SRLI_D ((sljit_ins)(0x45) << 16)
164 #define SRAI_W OPC_3R(0x91)
165 #define SRAI_D ((sljit_ins)(0x49) << 16)
166 #define ROTRI_W OPC_3R(0x99)
167 #define ROTRI_D ((sljit_ins)(0x4d) << 16)
168 
169 /* Bit-manipulation instructions */
170 #define CLO_W OPC_2R(0x4)
171 #define CLZ_W OPC_2R(0x5)
172 #define CTO_W OPC_2R(0x6)
173 #define CTZ_W OPC_2R(0x7)
174 #define CLO_D OPC_2R(0x8)
175 #define CLZ_D OPC_2R(0x9)
176 #define CTO_D OPC_2R(0xa)
177 #define CTZ_D OPC_2R(0xb)
178 #define REVB_2H OPC_2R(0xc)
179 #define REVB_4H OPC_2R(0xd)
180 #define REVB_2W OPC_2R(0xe)
181 #define REVB_D OPC_2R(0xf)
182 #define REVH_2W OPC_2R(0x10)
183 #define REVH_D OPC_2R(0x11)
184 #define BITREV_4B OPC_2R(0x12)
185 #define BITREV_8B OPC_2R(0x13)
186 #define BITREV_W OPC_2R(0x14)
187 #define BITREV_D OPC_2R(0x15)
188 #define EXT_W_H OPC_2R(0x16)
189 #define EXT_W_B OPC_2R(0x17)
190 #define BSTRINS_W (0x1 << 22 | 1 << 21)
191 #define BSTRPICK_W (0x1 << 22 | 1 << 21 | 1 << 15)
192 #define BSTRINS_D (0x2 << 22)
193 #define BSTRPICK_D (0x3 << 22)
194 
195 /* Branch instructions */
196 #define BEQZ  OPC_1RI21(0x10)
197 #define BNEZ  OPC_1RI21(0x11)
198 #define JIRL  OPC_2RI16(0x13)
199 #define B     OPC_I26(0x14)
200 #define BL    OPC_I26(0x15)
201 #define BEQ   OPC_2RI16(0x16)
202 #define BNE   OPC_2RI16(0x17)
203 #define BLT   OPC_2RI16(0x18)
204 #define BGE   OPC_2RI16(0x19)
205 #define BLTU  OPC_2RI16(0x1a)
206 #define BGEU  OPC_2RI16(0x1b)
207 
208 /* Memory access instructions */
209 #define LD_B OPC_2RI12(0xa0)
210 #define LD_H OPC_2RI12(0xa1)
211 #define LD_W OPC_2RI12(0xa2)
212 #define LD_D OPC_2RI12(0xa3)
213 
214 #define ST_B OPC_2RI12(0xa4)
215 #define ST_H OPC_2RI12(0xa5)
216 #define ST_W OPC_2RI12(0xa6)
217 #define ST_D OPC_2RI12(0xa7)
218 
219 #define LD_BU OPC_2RI12(0xa8)
220 #define LD_HU OPC_2RI12(0xa9)
221 #define LD_WU OPC_2RI12(0xaa)
222 
223 #define LDX_B OPC_3R(0x7000)
224 #define LDX_H OPC_3R(0x7008)
225 #define LDX_W OPC_3R(0x7010)
226 #define LDX_D OPC_3R(0x7018)
227 
228 #define STX_B OPC_3R(0x7020)
229 #define STX_H OPC_3R(0x7028)
230 #define STX_W OPC_3R(0x7030)
231 #define STX_D OPC_3R(0x7038)
232 
233 #define LDX_BU OPC_3R(0x7040)
234 #define LDX_HU OPC_3R(0x7048)
235 #define LDX_WU OPC_3R(0x7050)
236 
237 #define PRELD OPC_2RI12(0xab)
238 
239 /* Atomic memory access instructions */
240 #define LL_W OPC_2RI14(0x20)
241 #define SC_W OPC_2RI14(0x21)
242 #define LL_D OPC_2RI14(0x22)
243 #define SC_D OPC_2RI14(0x23)
244 
245 /* LoongArch V1.10 Instructions */
246 #define AMCAS_B OPC_3R(0x70B0)
247 #define AMCAS_H OPC_3R(0x70B1)
248 #define AMCAS_W OPC_3R(0x70B2)
249 #define AMCAS_D OPC_3R(0x70B3)
250 
251 /* Other instructions */
252 #define BREAK OPC_3R(0x54)
253 #define DBGCALL OPC_3R(0x55)
254 #define SYSCALL OPC_3R(0x56)
255 
256 /* Basic Floating-Point Instructions */
257 /* Floating-Point Arithmetic Operation Instructions */
258 #define FADD_S  OPC_3R(0x201)
259 #define FADD_D  OPC_3R(0x202)
260 #define FSUB_S  OPC_3R(0x205)
261 #define FSUB_D  OPC_3R(0x206)
262 #define FMUL_S  OPC_3R(0x209)
263 #define FMUL_D  OPC_3R(0x20a)
264 #define FDIV_S  OPC_3R(0x20d)
265 #define FDIV_D  OPC_3R(0x20e)
266 #define FCMP_COND_S  OPC_4R(0xc1)
267 #define FCMP_COND_D  OPC_4R(0xc2)
268 #define FCOPYSIGN_S  OPC_3R(0x225)
269 #define FCOPYSIGN_D  OPC_3R(0x226)
270 #define FSEL  OPC_4R(0xd0)
271 #define FABS_S  OPC_2R(0x4501)
272 #define FABS_D  OPC_2R(0x4502)
273 #define FNEG_S  OPC_2R(0x4505)
274 #define FNEG_D  OPC_2R(0x4506)
275 #define FMOV_S  OPC_2R(0x4525)
276 #define FMOV_D  OPC_2R(0x4526)
277 
278 /* Floating-Point Conversion Instructions */
279 #define FCVT_S_D  OPC_2R(0x4646)
280 #define FCVT_D_S  OPC_2R(0x4649)
281 #define FTINTRZ_W_S  OPC_2R(0x46a1)
282 #define FTINTRZ_W_D  OPC_2R(0x46a2)
283 #define FTINTRZ_L_S  OPC_2R(0x46a9)
284 #define FTINTRZ_L_D  OPC_2R(0x46aa)
285 #define FFINT_S_W  OPC_2R(0x4744)
286 #define FFINT_S_L  OPC_2R(0x4746)
287 #define FFINT_D_W  OPC_2R(0x4748)
288 #define FFINT_D_L  OPC_2R(0x474a)
289 
290 /* Floating-Point Move Instructions */
291 #define FMOV_S  OPC_2R(0x4525)
292 #define FMOV_D  OPC_2R(0x4526)
293 #define MOVGR2FR_W  OPC_2R(0x4529)
294 #define MOVGR2FR_D  OPC_2R(0x452a)
295 #define MOVGR2FRH_W  OPC_2R(0x452b)
296 #define MOVFR2GR_S  OPC_2R(0x452d)
297 #define MOVFR2GR_D  OPC_2R(0x452e)
298 #define MOVFRH2GR_S  OPC_2R(0x452f)
299 #define MOVGR2FCSR  OPC_2R(0x4530)
300 #define MOVFCSR2GR  OPC_2R(0x4532)
301 #define MOVFR2CF  OPC_2R(0x4534)
302 #define MOVCF2FR  OPC_2R(0x4535)
303 #define MOVGR2CF  OPC_2R(0x4536)
304 #define MOVCF2GR  OPC_2R(0x4537)
305 
306 /* Floating-Point Branch Instructions */
307 #define BCEQZ OPC_I26(0x12)
308 #define BCNEZ OPC_I26(0x12)
309 
310 /* Floating-Point Common Memory Access Instructions */
311 #define FLD_S OPC_2RI12(0xac)
312 #define FLD_D OPC_2RI12(0xae)
313 #define FST_S OPC_2RI12(0xad)
314 #define FST_D OPC_2RI12(0xaf)
315 
316 #define FLDX_S OPC_3R(0x7060)
317 #define FLDX_D OPC_3R(0x7068)
318 #define FSTX_S OPC_3R(0x7070)
319 #define FSTX_D OPC_3R(0x7078)
320 
321 #define I12_MAX (0x7ff)
322 #define I12_MIN (-0x800)
323 #define BRANCH16_MAX (0x7fff << 2)
324 #define BRANCH16_MIN (-(0x8000 << 2))
325 #define BRANCH21_MAX (0xfffff << 2)
326 #define BRANCH21_MIN (-(0x100000 << 2))
327 #define JUMP_MAX (0x1ffffff << 2)
328 #define JUMP_MIN (-(0x2000000 << 2))
329 #define JIRL_MAX (0x7fff << 2)
330 #define JIRL_MIN (-(0x8000 << 2))
331 
332 #define S32_MAX		(0x7fffffffl)
333 #define S32_MIN		(-0x80000000l)
334 #define S52_MAX		(0x7ffffffffffffl)
335 
336 #define INST(inst, type) ((sljit_ins)((type & SLJIT_32) ? inst##_W : inst##_D))
337 
338 /* LoongArch CPUCFG register for feature detection */
339 #define LOONGARCH_CFG2			0x02
340 #define LOONGARCH_FEATURE_LAMCAS	(1 << 28)
341 
342 static sljit_u32 cpu_feature_list = 0;
343 
get_cpu_features(void)344 static SLJIT_INLINE sljit_u32 get_cpu_features(void)
345 {
346 	if (cpu_feature_list == 0)
347 		__asm__ ("cpucfg %0, %1" : "+&r"(cpu_feature_list) : "r"(LOONGARCH_CFG2));
348 	return cpu_feature_list;
349 }
350 
push_inst(struct sljit_compiler * compiler,sljit_ins ins)351 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
352 {
353 	sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
354 	FAIL_IF(!ptr);
355 	*ptr = ins;
356 	compiler->size++;
357 	return SLJIT_SUCCESS;
358 }
359 
detect_jump_type(struct sljit_jump * jump,sljit_ins * code,sljit_sw executable_offset)360 static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code, sljit_sw executable_offset)
361 {
362 	sljit_sw diff;
363 	sljit_uw target_addr;
364 	sljit_ins *inst;
365 
366 	inst = (sljit_ins *)jump->addr;
367 
368 	if (jump->flags & SLJIT_REWRITABLE_JUMP)
369 		goto exit;
370 
371 	if (jump->flags & JUMP_ADDR)
372 		target_addr = jump->u.target;
373 	else {
374 		SLJIT_ASSERT(jump->flags & JUMP_LABEL);
375 		target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset;
376 	}
377 
378 	diff = (sljit_sw)target_addr - (sljit_sw)inst - executable_offset;
379 
380 	if (jump->flags & IS_COND) {
381 		inst--;
382 		diff += SSIZE_OF(ins);
383 
384 		if (diff >= BRANCH16_MIN && diff <= BRANCH16_MAX) {
385 			jump->flags |= PATCH_B;
386 			inst[0] = (inst[0] & 0xfc0003ff) ^ 0x4000000;
387 			jump->addr = (sljit_uw)inst;
388 			return inst;
389 		}
390 
391 		inst++;
392 		diff -= SSIZE_OF(ins);
393 	}
394 
395 	if (diff >= JUMP_MIN && diff <= JUMP_MAX) {
396 		if (jump->flags & IS_COND) {
397 			inst[-1] |= (sljit_ins)IMM_I16(2);
398 		}
399 
400 		jump->flags |= PATCH_J;
401 		return inst;
402 	}
403 
404 	if (diff >= S32_MIN && diff <= S32_MAX) {
405 		if (jump->flags & IS_COND)
406 			inst[-1] |= (sljit_ins)IMM_I16(3);
407 
408 		jump->flags |= PATCH_REL32;
409 		inst[1] = inst[0];
410 		return inst + 1;
411 	}
412 
413 	if (target_addr <= (sljit_uw)S32_MAX) {
414 		if (jump->flags & IS_COND)
415 			inst[-1] |= (sljit_ins)IMM_I16(3);
416 
417 		jump->flags |= PATCH_ABS32;
418 		inst[1] = inst[0];
419 		return inst + 1;
420 	}
421 
422 	if (target_addr <= S52_MAX) {
423 		if (jump->flags & IS_COND)
424 			inst[-1] |= (sljit_ins)IMM_I16(4);
425 
426 		jump->flags |= PATCH_ABS52;
427 		inst[2] = inst[0];
428 		return inst + 2;
429 	}
430 
431 exit:
432 	if (jump->flags & IS_COND)
433 		inst[-1] |= (sljit_ins)IMM_I16(5);
434 	inst[3] = inst[0];
435 	return inst + 3;
436 }
437 
put_label_get_length(struct sljit_put_label * put_label,sljit_uw max_label)438 static SLJIT_INLINE sljit_sw put_label_get_length(struct sljit_put_label *put_label, sljit_uw max_label)
439 {
440 	if (max_label <= (sljit_uw)S32_MAX) {
441 		put_label->flags = PATCH_ABS32;
442 		return 1;
443 	}
444 
445 	if (max_label <= S52_MAX) {
446 		put_label->flags = PATCH_ABS52;
447 		return 2;
448 	}
449 
450 	put_label->flags = 0;
451 	return 3;
452 }
453 
load_addr_to_reg(void * dst,sljit_u32 reg)454 static SLJIT_INLINE void load_addr_to_reg(void *dst, sljit_u32 reg)
455 {
456 	struct sljit_jump *jump = NULL;
457 	struct sljit_put_label *put_label;
458 	sljit_uw flags;
459 	sljit_ins *inst;
460 	sljit_uw addr;
461 
462 	if (reg != 0) {
463 		jump = (struct sljit_jump*)dst;
464 		flags = jump->flags;
465 		inst = (sljit_ins*)jump->addr;
466 		addr = (flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
467 	} else {
468 		put_label = (struct sljit_put_label*)dst;
469 		flags = put_label->flags;
470 		inst = (sljit_ins*)put_label->addr;
471 		addr = put_label->label->addr;
472 		reg = *inst;
473 	}
474 
475 	if (flags & PATCH_ABS32) {
476 		SLJIT_ASSERT(addr <= S32_MAX);
477 		inst[0] = LU12I_W | RD(reg) | (sljit_ins)(((addr & 0xffffffff) >> 12) << 5);
478 	} else if (flags & PATCH_ABS52) {
479 		inst[0] = LU12I_W | RD(reg) | (sljit_ins)(((addr & 0xffffffff) >> 12) << 5);
480 		inst[1] = LU32I_D | RD(reg) | (sljit_ins)(((addr >> 32) & 0xfffff) << 5);
481 		inst += 1;
482 	} else {
483 		inst[0] = LU12I_W | RD(reg) | (sljit_ins)(((addr & 0xffffffff) >> 12) << 5);
484 		inst[1] = LU32I_D | RD(reg) | (sljit_ins)(((addr >> 32) & 0xfffff) << 5);
485 		inst[2] = LU52I_D | RD(reg) | RJ(reg) | IMM_I12(addr >> 52);
486 		inst += 2;
487 	}
488 
489 	if (jump != NULL) {
490 		SLJIT_ASSERT((inst[1] & OPC_2RI16(0x3f)) == JIRL);
491 		inst[1] = (inst[1] & (OPC_2RI16(0x3f) | 0x3ff)) | IMM_I16((addr & 0xfff) >> 2);
492 	} else
493 		inst[1] = ORI | RD(reg) | RJ(reg) | IMM_I12(addr);
494 }
495 
sljit_generate_code(struct sljit_compiler * compiler)496 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
497 {
498 	struct sljit_memory_fragment *buf;
499 	sljit_ins *code;
500 	sljit_ins *code_ptr;
501 	sljit_ins *buf_ptr;
502 	sljit_ins *buf_end;
503 	sljit_uw word_count;
504 	sljit_uw next_addr;
505 	sljit_sw executable_offset;
506 	sljit_uw addr;
507 
508 	struct sljit_label *label;
509 	struct sljit_jump *jump;
510 	struct sljit_const *const_;
511 	struct sljit_put_label *put_label;
512 
513 	CHECK_ERROR_PTR();
514 	CHECK_PTR(check_sljit_generate_code(compiler));
515 	reverse_buf(compiler);
516 
517 	code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins), compiler->exec_allocator_data);
518 	PTR_FAIL_WITH_EXEC_IF(code);
519 	buf = compiler->buf;
520 
521 	code_ptr = code;
522 	word_count = 0;
523 	next_addr = 0;
524 	executable_offset = SLJIT_EXEC_OFFSET(code);
525 
526 	label = compiler->labels;
527 	jump = compiler->jumps;
528 	const_ = compiler->consts;
529 	put_label = compiler->put_labels;
530 
531 	do {
532 		buf_ptr = (sljit_ins*)buf->memory;
533 		buf_end = buf_ptr + (buf->used_size >> 2);
534 		do {
535 			*code_ptr = *buf_ptr++;
536 			if (next_addr == word_count) {
537 				SLJIT_ASSERT(!label || label->size >= word_count);
538 				SLJIT_ASSERT(!jump || jump->addr >= word_count);
539 				SLJIT_ASSERT(!const_ || const_->addr >= word_count);
540 				SLJIT_ASSERT(!put_label || put_label->addr >= word_count);
541 
542 				/* These structures are ordered by their address. */
543 				if (label && label->size == word_count) {
544 					label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
545 					label->size = (sljit_uw)(code_ptr - code);
546 					label = label->next;
547 				}
548 				if (jump && jump->addr == word_count) {
549 					word_count += 3;
550 					jump->addr = (sljit_uw)code_ptr;
551 					code_ptr = detect_jump_type(jump, code, executable_offset);
552 					jump = jump->next;
553 				}
554 				if (const_ && const_->addr == word_count) {
555 					const_->addr = (sljit_uw)code_ptr;
556 					const_ = const_->next;
557 				}
558 				if (put_label && put_label->addr == word_count) {
559 					SLJIT_ASSERT(put_label->label);
560 					put_label->addr = (sljit_uw)code_ptr;
561 
562 					code_ptr += put_label_get_length(put_label, (sljit_uw)(SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size));
563 					word_count += 3;
564 
565 					put_label = put_label->next;
566 				}
567 				next_addr = compute_next_addr(label, jump, const_, put_label);
568 			}
569 			code_ptr++;
570 			word_count++;
571 		} while (buf_ptr < buf_end);
572 
573 		buf = buf->next;
574 	} while (buf);
575 
576 	if (label && label->size == word_count) {
577 		label->addr = (sljit_uw)code_ptr;
578 		label->size = (sljit_uw)(code_ptr - code);
579 		label = label->next;
580 	}
581 
582 	SLJIT_ASSERT(!label);
583 	SLJIT_ASSERT(!jump);
584 	SLJIT_ASSERT(!const_);
585 	SLJIT_ASSERT(!put_label);
586 	SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
587 
588 	jump = compiler->jumps;
589 	while (jump) {
590 		do {
591 			if (!(jump->flags & (PATCH_B | PATCH_J | PATCH_REL32))) {
592 				load_addr_to_reg(jump, TMP_REG1);
593 				break;
594 			}
595 
596 			addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
597 			buf_ptr = (sljit_ins *)jump->addr;
598 			addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset);
599 
600 			if (jump->flags & PATCH_B) {
601 				SLJIT_ASSERT((sljit_sw)addr >= BRANCH16_MIN && (sljit_sw)addr <= BRANCH16_MAX);
602 				buf_ptr[0] |= (sljit_ins)IMM_I16(addr >> 2);
603 				break;
604 			}
605 
606 			if (jump->flags & PATCH_REL32) {
607 				SLJIT_ASSERT((sljit_sw)addr >= S32_MIN && (sljit_sw)addr <= S32_MAX);
608 
609 				buf_ptr[0] = PCADDU12I | RD(TMP_REG1) | (sljit_ins)((sljit_sw)addr & ~0xfff);
610 				SLJIT_ASSERT((buf_ptr[1] & OPC_2RI16(0x3f)) == JIRL);
611 				buf_ptr[1] |= IMM_I16((addr & 0xfff) >> 2);
612 				break;
613 			}
614 
615 			SLJIT_ASSERT((sljit_sw)addr >= JUMP_MIN && (sljit_sw)addr <= JUMP_MAX);
616 			if (jump->flags & IS_CALL)
617 				buf_ptr[0] = BL | (sljit_ins)IMM_I26(addr >> 2);
618 			else
619 				buf_ptr[0] = B | (sljit_ins)IMM_I26(addr >> 2);
620 		} while (0);
621 		jump = jump->next;
622 	}
623 
624 	put_label = compiler->put_labels;
625 	while (put_label) {
626 		load_addr_to_reg(put_label, 0);
627 		put_label = put_label->next;
628 	}
629 
630 	compiler->error = SLJIT_ERR_COMPILED;
631 	compiler->executable_offset = executable_offset;
632 	compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins);
633 
634 	code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
635 	code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
636 
637 	SLJIT_CACHE_FLUSH(code, code_ptr);
638 	SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
639 	return code;
640 }
641 
sljit_has_cpu_feature(sljit_s32 feature_type)642 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
643 {
644 	switch (feature_type)
645 	{
646 	case SLJIT_HAS_FPU:
647 #ifdef SLJIT_IS_FPU_AVAILABLE
648 		return (SLJIT_IS_FPU_AVAILABLE) != 0;
649 #else
650 		/* Available by default. */
651 		return 1;
652 #endif
653 
654 	case SLJIT_HAS_ATOMIC:
655 		return (LOONGARCH_FEATURE_LAMCAS & get_cpu_features());
656 
657 	case SLJIT_HAS_CLZ:
658 	case SLJIT_HAS_CTZ:
659 	case SLJIT_HAS_REV:
660 	case SLJIT_HAS_ROT:
661 	case SLJIT_HAS_PREFETCH:
662 	case SLJIT_HAS_COPY_F32:
663 	case SLJIT_HAS_COPY_F64:
664 		return 1;
665 
666 	default:
667 		return 0;
668 	}
669 }
670 
sljit_cmp_info(sljit_s32 type)671 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
672 {
673 	SLJIT_UNUSED_ARG(type);
674 
675 	return 0;
676 }
677 
678 /* --------------------------------------------------------------------- */
679 /*  Entry, exit                                                          */
680 /* --------------------------------------------------------------------- */
681 
682 /* Creates an index in data_transfer_insts array. */
683 #define LOAD_DATA	0x01
684 #define WORD_DATA	0x00
685 #define BYTE_DATA	0x02
686 #define HALF_DATA	0x04
687 #define INT_DATA	0x06
688 #define SIGNED_DATA	0x08
689 /* Separates integer and floating point registers */
690 #define GPR_REG		0x0f
691 #define DOUBLE_DATA	0x10
692 #define SINGLE_DATA	0x12
693 
694 #define MEM_MASK	0x1f
695 
696 #define ARG_TEST	0x00020
697 #define ALT_KEEP_CACHE	0x00040
698 #define CUMULATIVE_OP	0x00080
699 #define IMM_OP		0x00100
700 #define MOVE_OP		0x00200
701 #define SRC2_IMM	0x00400
702 
703 #define UNUSED_DEST	0x00800
704 #define REG_DEST	0x01000
705 #define REG1_SOURCE	0x02000
706 #define REG2_SOURCE	0x04000
707 #define SLOW_SRC1	0x08000
708 #define SLOW_SRC2	0x10000
709 #define SLOW_DEST	0x20000
710 
711 #define STACK_STORE	ST_D
712 #define STACK_LOAD	LD_D
713 
load_immediate(struct sljit_compiler * compiler,sljit_s32 dst_r,sljit_sw imm)714 static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst_r, sljit_sw imm)
715 {
716 	if (imm <= I12_MAX && imm >= I12_MIN)
717 		return push_inst(compiler, ADDI_D | RD(dst_r) | RJ(TMP_ZERO) | IMM_I12(imm));
718 
719 	if (imm <= 0x7fffffffl && imm >= -0x80000000l) {
720 		FAIL_IF(push_inst(compiler, LU12I_W | RD(dst_r) | (sljit_ins)(((imm & 0xffffffff) >> 12) << 5)));
721 		return push_inst(compiler, ORI | RD(dst_r) | RJ(dst_r) | IMM_I12(imm));
722 	} else if (imm <= 0x7ffffffffffffl && imm >= -0x8000000000000l) {
723 		FAIL_IF(push_inst(compiler, LU12I_W | RD(dst_r) | (sljit_ins)(((imm & 0xffffffff) >> 12) << 5)));
724 		FAIL_IF(push_inst(compiler, ORI | RD(dst_r) | RJ(dst_r) | IMM_I12(imm)));
725 		return push_inst(compiler, LU32I_D | RD(dst_r) | (sljit_ins)(((imm >> 32) & 0xfffff) << 5));
726 	}
727 	FAIL_IF(push_inst(compiler, LU12I_W | RD(dst_r) | (sljit_ins)(((imm & 0xffffffff) >> 12) << 5)));
728 	FAIL_IF(push_inst(compiler, ORI | RD(dst_r) | RJ(dst_r) | IMM_I12(imm)));
729 	FAIL_IF(push_inst(compiler, LU32I_D | RD(dst_r) | (sljit_ins)(((imm >> 32) & 0xfffff) << 5)));
730 	return push_inst(compiler, LU52I_D | RD(dst_r) | RJ(dst_r) | IMM_I12(imm >> 52));
731 }
732 
733 #define STACK_MAX_DISTANCE (-I12_MIN)
734 
735 static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw);
736 
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)737 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
738 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
739 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
740 {
741 	sljit_s32 i, tmp, offset;
742 	sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
743 
744 	CHECK_ERROR();
745 	CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
746 	set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
747 
748 	local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1);
749 	local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);
750 
751 	local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf;
752 	compiler->local_size = local_size;
753 
754 	if (local_size <= STACK_MAX_DISTANCE) {
755 		/* Frequent case. */
756 		FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(-local_size)));
757 		offset = local_size - SSIZE_OF(sw);
758 		local_size = 0;
759 	} else {
760 		FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(STACK_MAX_DISTANCE)));
761 		local_size -= STACK_MAX_DISTANCE;
762 
763 		if (local_size > STACK_MAX_DISTANCE)
764 			FAIL_IF(load_immediate(compiler, TMP_REG1, local_size));
765 		offset = STACK_MAX_DISTANCE - SSIZE_OF(sw);
766 	}
767 
768 	FAIL_IF(push_inst(compiler, STACK_STORE | RD(RETURN_ADDR_REG) | RJ(SLJIT_SP) | IMM_I12(offset)));
769 
770 	tmp = SLJIT_S0 - saveds;
771 	for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) {
772 		offset -= SSIZE_OF(sw);
773 		FAIL_IF(push_inst(compiler, STACK_STORE | RD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
774 	}
775 
776 	for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
777 		offset -= SSIZE_OF(sw);
778 		FAIL_IF(push_inst(compiler, STACK_STORE | RD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
779 	}
780 
781 	tmp = SLJIT_FS0 - fsaveds;
782 	for (i = SLJIT_FS0; i > tmp; i--) {
783 		offset -= SSIZE_OF(f64);
784 		FAIL_IF(push_inst(compiler, FST_D | FRD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
785 	}
786 
787 	for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
788 		offset -= SSIZE_OF(f64);
789 		FAIL_IF(push_inst(compiler, FST_D | FRD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
790 	}
791 
792 	if (local_size > STACK_MAX_DISTANCE)
793 		FAIL_IF(push_inst(compiler, SUB_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | RK(TMP_REG1)));
794 	else if (local_size > 0)
795 		FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(-local_size)));
796 
797 	if (options & SLJIT_ENTER_REG_ARG)
798 		return SLJIT_SUCCESS;
799 
800 	arg_types >>= SLJIT_ARG_SHIFT;
801 	saved_arg_count = 0;
802 	tmp = SLJIT_R0;
803 
804 	while (arg_types > 0) {
805 		if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
806 			if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
807 				FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_S0 - saved_arg_count) | RJ(tmp) | IMM_I12(0)));
808 				saved_arg_count++;
809 			}
810 			tmp++;
811 		}
812 
813 		arg_types >>= SLJIT_ARG_SHIFT;
814 	}
815 
816 	return SLJIT_SUCCESS;
817 }
818 
819 #undef STACK_MAX_DISTANCE
820 
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)821 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
822 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
823 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
824 {
825 	CHECK_ERROR();
826 	CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
827 	set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
828 
829 	local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1);
830 	local_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);
831 
832 	compiler->local_size = (local_size + SLJIT_LOCALS_OFFSET + 15) & ~0xf;
833 
834 	return SLJIT_SUCCESS;
835 }
836 
837 #define STACK_MAX_DISTANCE (-I12_MIN - 16)
838 
emit_stack_frame_release(struct sljit_compiler * compiler,sljit_s32 is_return_to)839 static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to)
840 {
841 	sljit_s32 i, tmp, offset;
842 	sljit_s32 local_size = compiler->local_size;
843 
844 	if (local_size > STACK_MAX_DISTANCE) {
845 		local_size -= STACK_MAX_DISTANCE;
846 
847 		if (local_size > STACK_MAX_DISTANCE) {
848 			FAIL_IF(load_immediate(compiler, TMP_REG2, local_size));
849 			FAIL_IF(push_inst(compiler, ADD_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | RK(TMP_REG2)));
850 		} else
851 			FAIL_IF(push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(local_size)));
852 
853 		local_size = STACK_MAX_DISTANCE;
854 	}
855 
856 	SLJIT_ASSERT(local_size > 0);
857 
858 	offset = local_size - SSIZE_OF(sw);
859 	if (!is_return_to)
860 		FAIL_IF(push_inst(compiler, STACK_LOAD | RD(RETURN_ADDR_REG) | RJ(SLJIT_SP) | IMM_I12(offset)));
861 
862 	tmp = SLJIT_S0 - compiler->saveds;
863 	for (i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options); i > tmp; i--) {
864 		offset -= SSIZE_OF(sw);
865 		FAIL_IF(push_inst(compiler, STACK_LOAD | RD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
866 	}
867 
868 	for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
869 		offset -= SSIZE_OF(sw);
870 		FAIL_IF(push_inst(compiler, STACK_LOAD | RD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
871 	}
872 
873 	tmp = SLJIT_FS0 - compiler->fsaveds;
874 	for (i = SLJIT_FS0; i > tmp; i--) {
875 		offset -= SSIZE_OF(f64);
876 		FAIL_IF(push_inst(compiler, FLD_D | FRD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
877 	}
878 
879 	for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
880 		offset -= SSIZE_OF(f64);
881 		FAIL_IF(push_inst(compiler, FLD_D | FRD(i) | RJ(SLJIT_SP) | IMM_I12(offset)));
882 	}
883 
884 	return push_inst(compiler, ADDI_D | RD(SLJIT_SP) | RJ(SLJIT_SP) | IMM_I12(local_size));
885 }
886 
887 #undef STACK_MAX_DISTANCE
888 
sljit_emit_return_void(struct sljit_compiler * compiler)889 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
890 {
891 	CHECK_ERROR();
892 	CHECK(check_sljit_emit_return_void(compiler));
893 
894 	FAIL_IF(emit_stack_frame_release(compiler, 0));
895 	return push_inst(compiler, JIRL | RD(TMP_ZERO) | RJ(RETURN_ADDR_REG) | IMM_I12(0));
896 }
897 
sljit_emit_return_to(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)898 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
899 	sljit_s32 src, sljit_sw srcw)
900 {
901 	CHECK_ERROR();
902 	CHECK(check_sljit_emit_return_to(compiler, src, srcw));
903 
904 	if (src & SLJIT_MEM) {
905 		ADJUST_LOCAL_OFFSET(src, srcw);
906 		FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw));
907 		src = TMP_REG1;
908 		srcw = 0;
909 	} else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
910 		FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(src) | IMM_I12(0)));
911 		src = TMP_REG1;
912 		srcw = 0;
913 	}
914 
915 	FAIL_IF(emit_stack_frame_release(compiler, 1));
916 
917 	SLJIT_SKIP_CHECKS(compiler);
918 	return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
919 }
920 
921 /* --------------------------------------------------------------------- */
922 /*  Operators                                                            */
923 /* --------------------------------------------------------------------- */
924 
925 static const sljit_ins data_transfer_insts[16 + 4] = {
926 /* u w s */ ST_D /* st.d */,
927 /* u w l */ LD_D /* ld.d */,
928 /* u b s */ ST_B /* st.b */,
929 /* u b l */ LD_BU /* ld.bu */,
930 /* u h s */ ST_H /* st.h */,
931 /* u h l */ LD_HU /* ld.hu */,
932 /* u i s */ ST_W /* st.w */,
933 /* u i l */ LD_WU /* ld.wu */,
934 
935 /* s w s */ ST_D /* st.d */,
936 /* s w l */ LD_D /* ld.d */,
937 /* s b s */ ST_B /* st.b */,
938 /* s b l */ LD_B /* ld.b */,
939 /* s h s */ ST_H /* st.h */,
940 /* s h l */ LD_H /* ld.h */,
941 /* s i s */ ST_W /* st.w */,
942 /* s i l */ LD_W /* ld.w */,
943 
944 /* d   s */ FST_D /* fst.d */,
945 /* d   l */ FLD_D /* fld.d */,
946 /* s   s */ FST_S /* fst.s */,
947 /* s   l */ FLD_S /* fld.s */,
948 };
949 
950 static const sljit_ins data_transfer_insts_x[16 + 4] = {
951 /* u w s */ STX_D /* stx.d */,
952 /* u w l */ LDX_D /* ldx.d */,
953 /* u b s */ STX_B /* stx.b */,
954 /* u b l */ LDX_BU /* ldx.bu */,
955 /* u h s */ STX_H /* stx.h */,
956 /* u h l */ LDX_HU /* ldx.hu */,
957 /* u i s */ STX_W /* stx.w */,
958 /* u i l */ LDX_WU /* ldx.wu */,
959 
960 /* s w s */ STX_D /* stx.d */,
961 /* s w l */ LDX_D /* ldx.d */,
962 /* s b s */ STX_B /* stx.b */,
963 /* s b l */ LDX_B /* ldx.b */,
964 /* s h s */ STX_H /* stx.h */,
965 /* s h l */ LDX_H /* ldx.h */,
966 /* s i s */ STX_W /* stx.w */,
967 /* s i l */ LDX_W /* ldx.w */,
968 
969 /* d   s */ FSTX_D /* fstx.d */,
970 /* d   l */ FLDX_D /* fldx.d */,
971 /* s   s */ FSTX_S /* fstx.s */,
972 /* s   l */ FLDX_S /* fldx.s */,
973 };
974 
push_mem_inst(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw)975 static sljit_s32 push_mem_inst(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
976 {
977 	sljit_ins ins;
978 	sljit_s32 base = arg & REG_MASK;
979 
980 	SLJIT_ASSERT(arg & SLJIT_MEM);
981 
982 	if (arg & OFFS_REG_MASK) {
983 		sljit_s32 offs = OFFS_REG(arg);
984 
985 		SLJIT_ASSERT(!argw);
986 		ins = data_transfer_insts_x[flags & MEM_MASK] |
987 			  ((flags & MEM_MASK) <= GPR_REG ? RD(reg) : FRD(reg)) |
988 			  RJ(base) | RK(offs);
989 	} else {
990 		SLJIT_ASSERT(argw <= 0xfff && argw >= I12_MIN);
991 
992 		ins = data_transfer_insts[flags & MEM_MASK] |
993 			  ((flags & MEM_MASK) <= GPR_REG ? RD(reg) : FRD(reg)) |
994 			  RJ(base) | IMM_I12(argw);
995 	}
996 	return push_inst(compiler, ins);
997 }
998 
999 /* Can perform an operation using at most 1 instruction. */
getput_arg_fast(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw)1000 static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
1001 {
1002 	SLJIT_ASSERT(arg & SLJIT_MEM);
1003 
1004 	/* argw == 0 (ldx/stx rd, rj, rk) can be used.
1005 	 * argw in [-2048, 2047] (ld/st rd, rj, imm) can be used. */
1006 	if (!argw || (!(arg & OFFS_REG_MASK) && (argw <= I12_MAX && argw >= I12_MIN))) {
1007 		/* Works for both absolute and relative addresses. */
1008 		if (SLJIT_UNLIKELY(flags & ARG_TEST))
1009 			return 1;
1010 
1011 		FAIL_IF(push_mem_inst(compiler, flags, reg, arg, argw));
1012 		return -1;
1013 	}
1014 	return 0;
1015 }
1016 
1017 #define TO_ARGW_HI(argw) (((argw) & ~0xfff) + (((argw) & 0x800) ? 0x1000 : 0))
1018 
1019 /* See getput_arg below.
1020    Note: can_cache is called only for binary operators. */
can_cache(sljit_s32 arg,sljit_sw argw,sljit_s32 next_arg,sljit_sw next_argw)1021 static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
1022 {
1023 	SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM));
1024 
1025 	if (arg & OFFS_REG_MASK)
1026 		return 0;
1027 
1028 	if (arg == next_arg) {
1029 		if (((next_argw - argw) <= I12_MAX && (next_argw - argw) >= I12_MIN)
1030 				|| TO_ARGW_HI(argw) == TO_ARGW_HI(next_argw))
1031 			return 1;
1032 		return 0;
1033 	}
1034 
1035 	return 0;
1036 }
1037 
1038 /* Emit the necessary instructions. See can_cache above. */
getput_arg(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw,sljit_s32 next_arg,sljit_sw next_argw)1039 static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
1040 {
1041 	sljit_s32 base = arg & REG_MASK;
1042 	sljit_s32 tmp_r = TMP_REG1;
1043 	sljit_sw offset;
1044 
1045 	SLJIT_ASSERT(arg & SLJIT_MEM);
1046 	if (!(next_arg & SLJIT_MEM)) {
1047 		next_arg = 0;
1048 		next_argw = 0;
1049 	}
1050 
1051 	/* Since tmp can be the same as base or offset registers,
1052 	 * these might be unavailable after modifying tmp. */
1053 	if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA))
1054 		tmp_r = reg;
1055 
1056 	if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
1057 		argw &= 0x3;
1058 
1059 		if (SLJIT_UNLIKELY(argw))
1060 			FAIL_IF(push_inst(compiler, SLLI_D | RD(TMP_REG3) | RJ(OFFS_REG(arg)) | IMM_I12(argw)));
1061 		return push_mem_inst(compiler, flags, reg, SLJIT_MEM2(base, TMP_REG3), 0);
1062 	}
1063 
1064 	if (compiler->cache_arg == arg && argw - compiler->cache_argw <= I12_MAX && argw - compiler->cache_argw >= I12_MIN)
1065 		return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(TMP_REG3), argw - compiler->cache_argw);
1066 
1067 	if (compiler->cache_arg == SLJIT_MEM && (argw - compiler->cache_argw <= I12_MAX) && (argw - compiler->cache_argw >= I12_MIN)) {
1068 		offset = argw - compiler->cache_argw;
1069 	} else {
1070 		sljit_sw argw_hi=TO_ARGW_HI(argw);
1071 		compiler->cache_arg = SLJIT_MEM;
1072 
1073 		if (next_arg && next_argw - argw <= I12_MAX && next_argw - argw >= I12_MIN && argw_hi != TO_ARGW_HI(next_argw)) {
1074 			FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1075 			compiler->cache_argw = argw;
1076 			offset = 0;
1077 		} else {
1078 			FAIL_IF(load_immediate(compiler, TMP_REG3, argw_hi));
1079 			compiler->cache_argw = argw_hi;
1080 			offset = argw & 0xfff;
1081 			argw = argw_hi;
1082 		}
1083 	}
1084 
1085 	if (!base)
1086 		return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(TMP_REG3), offset);
1087 
1088 	if (arg == next_arg && next_argw - argw <= I12_MAX && next_argw - argw >= I12_MIN) {
1089 		compiler->cache_arg = arg;
1090 		FAIL_IF(push_inst(compiler, ADD_D | RD(TMP_REG3) | RJ(TMP_REG3) | RK(base)));
1091 		return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(TMP_REG3), offset);
1092 	}
1093 
1094 	if (!offset)
1095 		return push_mem_inst(compiler, flags, reg, SLJIT_MEM2(base, TMP_REG3), 0);
1096 
1097 	FAIL_IF(push_inst(compiler, ADD_D | RD(tmp_r) | RJ(TMP_REG3) | RK(base)));
1098 	return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(tmp_r), offset);
1099 }
1100 
emit_op_mem(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw)1101 static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
1102 {
1103 	sljit_s32 base = arg & REG_MASK;
1104 	sljit_s32 tmp_r = TMP_REG1;
1105 
1106 	if (getput_arg_fast(compiler, flags, reg, arg, argw))
1107 		return compiler->error;
1108 
1109 	if ((flags & MEM_MASK) <= GPR_REG && (flags & LOAD_DATA))
1110 		tmp_r = reg;
1111 
1112 	if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
1113 		argw &= 0x3;
1114 
1115 		if (SLJIT_UNLIKELY(argw))
1116 			FAIL_IF(push_inst(compiler, SLLI_D | RD(tmp_r) | RJ(OFFS_REG(arg)) | IMM_I12(argw)));
1117 		return push_mem_inst(compiler, flags, reg, SLJIT_MEM2(base, tmp_r), 0);
1118 	} else {
1119 		FAIL_IF(load_immediate(compiler, tmp_r, argw));
1120 
1121 		if (base != 0)
1122 			return push_mem_inst(compiler, flags, reg, SLJIT_MEM2(base, tmp_r), 0);
1123 		return push_mem_inst(compiler, flags, reg, SLJIT_MEM1(tmp_r), 0);
1124 	}
1125 }
1126 
emit_op_mem2(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg1,sljit_sw arg1w,sljit_s32 arg2,sljit_sw arg2w)1127 static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w)
1128 {
1129 	if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
1130 		return compiler->error;
1131 	return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
1132 }
1133 
1134 #define IMM_EXTEND(v) (IMM_I12((op & SLJIT_32) ? (v) : (32 + (v))))
1135 
1136 /* andi/ori/xori are zero-extended */
1137 #define EMIT_LOGICAL(op_imm, op_reg) \
1138 	if (flags & SRC2_IMM) { \
1139 		if (op & SLJIT_SET_Z) {\
1140 			FAIL_IF(push_inst(compiler, ADDI_D | RD(EQUAL_FLAG) | RJ(TMP_ZERO) | IMM_I12(src2))); \
1141 			FAIL_IF(push_inst(compiler, op_reg | RD(EQUAL_FLAG) | RJ(src1) | RK(EQUAL_FLAG))); \
1142 		} \
1143 		if (!(flags & UNUSED_DEST)) { \
1144 			if (dst == src1) { \
1145 				FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(TMP_ZERO) | IMM_I12(src2))); \
1146 				FAIL_IF(push_inst(compiler, op_reg | RD(dst) | RJ(src1) | RK(TMP_REG1))); \
1147 			} else { \
1148 				FAIL_IF(push_inst(compiler, ADDI_D | RD(dst) | RJ(TMP_ZERO) | IMM_I12(src2))); \
1149 				FAIL_IF(push_inst(compiler, op_reg | RD(dst) | RJ(src1) | RK(dst))); \
1150 			} \
1151 		} \
1152 	} \
1153 	else { \
1154 		if (op & SLJIT_SET_Z) \
1155 			FAIL_IF(push_inst(compiler, op_reg | RD(EQUAL_FLAG) | RJ(src1) | RK(src2))); \
1156 		if (!(flags & UNUSED_DEST)) \
1157 			FAIL_IF(push_inst(compiler, op_reg | RD(dst) | RJ(src1) | RK(src2))); \
1158 	} \
1159 	while (0)
1160 
1161 #define EMIT_SHIFT(imm, reg) \
1162 	op_imm = (imm); \
1163 	op_reg = (reg)
1164 
emit_single_op(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 flags,sljit_s32 dst,sljit_s32 src1,sljit_sw src2)1165 static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
1166 	sljit_s32 dst, sljit_s32 src1, sljit_sw src2)
1167 {
1168 	sljit_s32 is_overflow, is_carry, carry_src_r, is_handled;
1169 	sljit_ins op_imm, op_reg;
1170 	sljit_ins word_size = ((op & SLJIT_32) ? 32 : 64);
1171 
1172 	switch (GET_OPCODE(op)) {
1173 	case SLJIT_MOV:
1174 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
1175 		if (dst != src2)
1176 			return push_inst(compiler, INST(ADD, op) | RD(dst) | RJ(src2) | IMM_I12(0));
1177 		return SLJIT_SUCCESS;
1178 
1179 	case SLJIT_MOV_U8:
1180 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
1181 		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1182 			return push_inst(compiler, ANDI | RD(dst) | RJ(src2) | IMM_I12(0xff));
1183 		SLJIT_ASSERT(dst == src2);
1184 		return SLJIT_SUCCESS;
1185 
1186 	case SLJIT_MOV_S8:
1187 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
1188 		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1189 			return push_inst(compiler, EXT_W_B | RD(dst) | RJ(src2));
1190 		SLJIT_ASSERT(dst == src2);
1191 		return SLJIT_SUCCESS;
1192 
1193 	case SLJIT_MOV_U16:
1194 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
1195 		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1196 			return push_inst(compiler, INST(BSTRPICK, op) | RD(dst) | RJ(src2) | (15 << 16));
1197 		SLJIT_ASSERT(dst == src2);
1198 		return SLJIT_SUCCESS;
1199 
1200 	case SLJIT_MOV_S16:
1201 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
1202 		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1203 			return push_inst(compiler, EXT_W_H | RD(dst) | RJ(src2));
1204 		SLJIT_ASSERT(dst == src2);
1205 		return SLJIT_SUCCESS;
1206 
1207 	case SLJIT_MOV_U32:
1208 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
1209 		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1210 			return push_inst(compiler, BSTRPICK_D | RD(dst) | RJ(src2) | (31 << 16));
1211 		SLJIT_ASSERT(dst == src2);
1212 		return SLJIT_SUCCESS;
1213 
1214 	case SLJIT_MOV_S32:
1215 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
1216 		if ((flags & (REG_DEST | REG2_SOURCE)) == (REG_DEST | REG2_SOURCE))
1217 			return push_inst(compiler, SLLI_W | RD(dst) | RJ(src2) | IMM_I12(0));
1218 		SLJIT_ASSERT(dst == src2);
1219 		return SLJIT_SUCCESS;
1220 
1221 	case SLJIT_CLZ:
1222 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
1223 		return push_inst(compiler, INST(CLZ, op) | RD(dst) | RJ(src2));
1224 
1225 	case SLJIT_CTZ:
1226 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
1227 		return push_inst(compiler, INST(CTZ, op) | RD(dst) | RJ(src2));
1228 
1229 	case SLJIT_REV:
1230 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
1231 		return push_inst(compiler, ((op & SLJIT_32) ? REVB_2W : REVB_D) | RD(dst) | RJ(src2));
1232 
1233 	case SLJIT_REV_S16:
1234 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
1235 		FAIL_IF(push_inst(compiler, REVB_2H | RD(dst) | RJ(src2)));
1236 		return push_inst(compiler, EXT_W_H | RD(dst) | RJ(dst));
1237 
1238 	case SLJIT_REV_U16:
1239 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM));
1240 		FAIL_IF(push_inst(compiler, REVB_2H | RD(dst) | RJ(src2)));
1241 		return push_inst(compiler, INST(BSTRPICK, op) | RD(dst) | RJ(dst) | (15 << 16));
1242 
1243 	case SLJIT_REV_S32:
1244 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM) && dst != TMP_REG1);
1245 		FAIL_IF(push_inst(compiler, REVB_2W | RD(dst) | RJ(src2)));
1246 		return push_inst(compiler, SLLI_W | RD(dst) | RJ(dst) | IMM_I12(0));
1247 
1248 	case SLJIT_REV_U32:
1249 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & SRC2_IMM) && dst != TMP_REG1);
1250 		FAIL_IF(push_inst(compiler, REVB_2W | RD(dst) | RJ(src2)));
1251 		return push_inst(compiler, BSTRPICK_D | RD(dst) | RJ(dst) | (31 << 16));
1252 
1253 	case SLJIT_ADD:
1254 		/* Overflow computation (both add and sub): overflow = src1_sign ^ src2_sign ^ result_sign ^ carry_flag */
1255 		is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;
1256 		carry_src_r = GET_FLAG_TYPE(op) == SLJIT_CARRY;
1257 
1258 		if (flags & SRC2_IMM) {
1259 			if (is_overflow) {
1260 				if (src2 >= 0)
1261 					FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(0)));
1262 				else {
1263 					FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(TMP_ZERO) | IMM_I12(-1)));
1264 					FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(EQUAL_FLAG)));
1265 				}
1266 			}
1267 			else if (op & SLJIT_SET_Z)
1268 				FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(src2)));
1269 
1270 			/* Only the zero flag is needed. */
1271 			if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
1272 				FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(src2)));
1273 		}
1274 		else {
1275 			if (is_overflow)
1276 				FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1277 			else if (op & SLJIT_SET_Z)
1278 				FAIL_IF(push_inst(compiler, INST(ADD, op) | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1279 
1280 			if (is_overflow || carry_src_r != 0) {
1281 				if (src1 != dst)
1282 					carry_src_r = (sljit_s32)src1;
1283 				else if (src2 != dst)
1284 					carry_src_r = (sljit_s32)src2;
1285 				else {
1286 					FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(0)));
1287 					carry_src_r = OTHER_FLAG;
1288 				}
1289 			}
1290 
1291 			/* Only the zero flag is needed. */
1292 			if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
1293 				FAIL_IF(push_inst(compiler, INST(ADD, op) | RD(dst) | RJ(src1) | RK(src2)));
1294 		}
1295 
1296 		/* Carry is zero if a + b >= a or a + b >= b, otherwise it is 1. */
1297 		if (is_overflow || carry_src_r != 0) {
1298 			if (flags & SRC2_IMM)
1299 				FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RJ(dst) | IMM_I12(src2)));
1300 			else
1301 				FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(dst) | RK(carry_src_r)));
1302 		}
1303 
1304 		if (!is_overflow)
1305 			return SLJIT_SUCCESS;
1306 
1307 		FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RJ(dst) | RK(EQUAL_FLAG)));
1308 		if (op & SLJIT_SET_Z)
1309 			FAIL_IF(push_inst(compiler, INST(ADD, op) | RD(EQUAL_FLAG) | RJ(dst) | IMM_I12(0)));
1310 		FAIL_IF(push_inst(compiler, INST(SRLI, op) | RD(TMP_REG1) | RJ(TMP_REG1) | IMM_EXTEND(31)));
1311 		return push_inst(compiler, XOR | RD(OTHER_FLAG) | RJ(TMP_REG1) | RK(OTHER_FLAG));
1312 
1313 	case SLJIT_ADDC:
1314 		carry_src_r = GET_FLAG_TYPE(op) == SLJIT_CARRY;
1315 
1316 		if (flags & SRC2_IMM) {
1317 			FAIL_IF(push_inst(compiler, ADDI_D | RD(dst) | RJ(src1) | IMM_I12(src2)));
1318 		} else {
1319 			if (carry_src_r != 0) {
1320 				if (src1 != dst)
1321 					carry_src_r = (sljit_s32)src1;
1322 				else if (src2 != dst)
1323 					carry_src_r = (sljit_s32)src2;
1324 				else {
1325 					FAIL_IF(push_inst(compiler, ADDI_D | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(0)));
1326 					carry_src_r = EQUAL_FLAG;
1327 				}
1328 			}
1329 
1330 			FAIL_IF(push_inst(compiler, ADD_D | RD(dst) | RJ(src1) | RK(src2)));
1331 		}
1332 
1333 		/* Carry is zero if a + b >= a or a + b >= b, otherwise it is 1. */
1334 		if (carry_src_r != 0) {
1335 			if (flags & SRC2_IMM)
1336 				FAIL_IF(push_inst(compiler, SLTUI | RD(EQUAL_FLAG) | RJ(dst) | IMM_I12(src2)));
1337 			else
1338 				FAIL_IF(push_inst(compiler, SLTU | RD(EQUAL_FLAG) | RJ(dst) | RK(carry_src_r)));
1339 		}
1340 
1341 		FAIL_IF(push_inst(compiler, ADD_D | RD(dst) | RJ(dst) | RK(OTHER_FLAG)));
1342 
1343 		if (carry_src_r == 0)
1344 			return SLJIT_SUCCESS;
1345 
1346 		/* Set ULESS_FLAG (dst == 0) && (OTHER_FLAG == 1). */
1347 		FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(dst) | RK(OTHER_FLAG)));
1348 		/* Set carry flag. */
1349 		return push_inst(compiler, OR | RD(OTHER_FLAG) | RJ(OTHER_FLAG) | RK(EQUAL_FLAG));
1350 
1351 	case SLJIT_SUB:
1352 		if ((flags & SRC2_IMM) && src2 == I12_MIN) {
1353 			FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG2) | RJ(TMP_ZERO) | IMM_I12(src2)));
1354 			src2 = TMP_REG2;
1355 			flags &= ~SRC2_IMM;
1356 		}
1357 
1358 		is_handled = 0;
1359 
1360 		if (flags & SRC2_IMM) {
1361 			if (GET_FLAG_TYPE(op) == SLJIT_LESS) {
1362 				FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(src2)));
1363 				is_handled = 1;
1364 			}
1365 			else if (GET_FLAG_TYPE(op) == SLJIT_SIG_LESS) {
1366 				FAIL_IF(push_inst(compiler, SLTI | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(src2)));
1367 				is_handled = 1;
1368 			}
1369 		}
1370 
1371 		if (!is_handled && GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_SIG_LESS_EQUAL) {
1372 			is_handled = 1;
1373 
1374 			if (flags & SRC2_IMM) {
1375 				FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG2) | RJ(TMP_ZERO) | IMM_I12(src2)));
1376 				src2 = TMP_REG2;
1377 				flags &= ~SRC2_IMM;
1378 			}
1379 
1380 			switch (GET_FLAG_TYPE(op)) {
1381 			case SLJIT_LESS:
1382 				FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(src1) | RK(src2)));
1383 				break;
1384 			case SLJIT_GREATER:
1385 				FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(src2) | RK(src1)));
1386 				break;
1387 			case SLJIT_SIG_LESS:
1388 				FAIL_IF(push_inst(compiler, SLT | RD(OTHER_FLAG) | RJ(src1) | RK(src2)));
1389 				break;
1390 			case SLJIT_SIG_GREATER:
1391 				FAIL_IF(push_inst(compiler, SLT | RD(OTHER_FLAG) | RJ(src2) | RK(src1)));
1392 				break;
1393 			}
1394 		}
1395 
1396 		if (is_handled) {
1397 			if (flags & SRC2_IMM) {
1398 				if (op & SLJIT_SET_Z)
1399 					FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(-src2)));
1400 				if (!(flags & UNUSED_DEST))
1401 					return push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(-src2));
1402 			}
1403 			else {
1404 				if (op & SLJIT_SET_Z)
1405 					FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1406 				if (!(flags & UNUSED_DEST))
1407 					return push_inst(compiler, INST(SUB, op) | RD(dst) | RJ(src1) | RK(src2));
1408 			}
1409 			return SLJIT_SUCCESS;
1410 		}
1411 
1412 		is_overflow = GET_FLAG_TYPE(op) == SLJIT_OVERFLOW;
1413 		is_carry = GET_FLAG_TYPE(op) == SLJIT_CARRY;
1414 
1415 		if (flags & SRC2_IMM) {
1416 			if (is_overflow) {
1417 				if (src2 >= 0)
1418 					FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(0)));
1419 				else {
1420 					FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(-1)));
1421 					FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(EQUAL_FLAG)));
1422 				}
1423 			}
1424 			else if (op & SLJIT_SET_Z)
1425 				FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(-src2)));
1426 
1427 			if (is_overflow || is_carry)
1428 				FAIL_IF(push_inst(compiler, SLTUI | RD(OTHER_FLAG) | RJ(src1) | IMM_I12(src2)));
1429 
1430 			/* Only the zero flag is needed. */
1431 			if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
1432 				FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(-src2)));
1433 		}
1434 		else {
1435 			if (is_overflow)
1436 				FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1437 			else if (op & SLJIT_SET_Z)
1438 				FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1439 
1440 			if (is_overflow || is_carry)
1441 				FAIL_IF(push_inst(compiler, SLTU | RD(OTHER_FLAG) | RJ(src1) | RK(src2)));
1442 
1443 			/* Only the zero flag is needed. */
1444 			if (!(flags & UNUSED_DEST) || (op & VARIABLE_FLAG_MASK))
1445 				FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(dst) | RJ(src1) | RK(src2)));
1446 		}
1447 
1448 		if (!is_overflow)
1449 			return SLJIT_SUCCESS;
1450 
1451 		FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RJ(dst) | RK(EQUAL_FLAG)));
1452 		if (op & SLJIT_SET_Z)
1453 			FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(EQUAL_FLAG) | RJ(dst) | IMM_I12(0)));
1454 		FAIL_IF(push_inst(compiler, INST(SRLI, op) | RD(TMP_REG1) | RJ(TMP_REG1) | IMM_EXTEND(31)));
1455 		return push_inst(compiler, XOR | RD(OTHER_FLAG) | RJ(TMP_REG1) | RK(OTHER_FLAG));
1456 
1457 	case SLJIT_SUBC:
1458 		if ((flags & SRC2_IMM) && src2 == I12_MIN) {
1459 			FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(TMP_REG2) | RJ(TMP_ZERO) | IMM_I12(src2)));
1460 			src2 = TMP_REG2;
1461 			flags &= ~SRC2_IMM;
1462 		}
1463 
1464 		is_carry = GET_FLAG_TYPE(op) == SLJIT_CARRY;
1465 
1466 		if (flags & SRC2_IMM) {
1467 			if (is_carry)
1468 				FAIL_IF(push_inst(compiler, SLTUI | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(src2)));
1469 
1470 			FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(-src2)));
1471 		}
1472 		else {
1473 			if (is_carry)
1474 				FAIL_IF(push_inst(compiler, SLTU | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1475 
1476 			FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(dst) | RJ(src1) | RK(src2)));
1477 		}
1478 
1479 		if (is_carry)
1480 			FAIL_IF(push_inst(compiler, SLTU | RD(TMP_REG1) | RJ(dst) | RK(OTHER_FLAG)));
1481 
1482 		FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(dst) | RJ(dst) | RK(OTHER_FLAG)));
1483 
1484 		if (!is_carry)
1485 			return SLJIT_SUCCESS;
1486 
1487 		return push_inst(compiler, OR | RD(OTHER_FLAG) | RJ(EQUAL_FLAG) | RK(TMP_REG1));
1488 
1489 	case SLJIT_MUL:
1490 		SLJIT_ASSERT(!(flags & SRC2_IMM));
1491 
1492 		if (GET_FLAG_TYPE(op) != SLJIT_OVERFLOW)
1493 			return push_inst(compiler, INST(MUL, op) | RD(dst) | RJ(src1) | RK(src2));
1494 
1495 		if (op & SLJIT_32) {
1496 			FAIL_IF(push_inst(compiler, MUL_D | RD(OTHER_FLAG) | RJ(src1) | RK(src2)));
1497 			FAIL_IF(push_inst(compiler, MUL_W | RD(dst) | RJ(src1) | RK(src2)));
1498 			return push_inst(compiler, SUB_D | RD(OTHER_FLAG) | RJ(dst) | RK(OTHER_FLAG));
1499 		}
1500 
1501 		FAIL_IF(push_inst(compiler, MULH_D | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1502 		FAIL_IF(push_inst(compiler, MUL_D | RD(dst) | RJ(src1) | RK(src2)));
1503 		FAIL_IF(push_inst(compiler, SRAI_D | RD(OTHER_FLAG) | RJ(dst) | IMM_I12((63))));
1504 		return push_inst(compiler, SUB_D | RD(OTHER_FLAG) | RJ(EQUAL_FLAG) | RK(OTHER_FLAG));
1505 
1506 	case SLJIT_AND:
1507 		EMIT_LOGICAL(ANDI, AND);
1508 		return SLJIT_SUCCESS;
1509 
1510 	case SLJIT_OR:
1511 		EMIT_LOGICAL(ORI, OR);
1512 		return SLJIT_SUCCESS;
1513 
1514 	case SLJIT_XOR:
1515 		EMIT_LOGICAL(XORI, XOR);
1516 		return SLJIT_SUCCESS;
1517 
1518 	case SLJIT_SHL:
1519 	case SLJIT_MSHL:
1520 		if (op & SLJIT_32) {
1521 			EMIT_SHIFT(SLLI_W, SLL_W);
1522 		} else {
1523 			EMIT_SHIFT(SLLI_D, SLL_D);
1524 		}
1525 		break;
1526 
1527 	case SLJIT_LSHR:
1528 	case SLJIT_MLSHR:
1529 		if (op & SLJIT_32) {
1530 			EMIT_SHIFT(SRLI_W, SRL_W);
1531 		} else {
1532 			EMIT_SHIFT(SRLI_D, SRL_D);
1533 		}
1534 		break;
1535 
1536 	case SLJIT_ASHR:
1537 	case SLJIT_MASHR:
1538 		if (op & SLJIT_32) {
1539 			EMIT_SHIFT(SRAI_W, SRA_W);
1540 		} else {
1541 			EMIT_SHIFT(SRAI_D, SRA_D);
1542 		}
1543 		break;
1544 
1545 	case SLJIT_ROTL:
1546 	case SLJIT_ROTR:
1547 		if (flags & SRC2_IMM) {
1548 			SLJIT_ASSERT(src2 != 0);
1549 
1550 			if (GET_OPCODE(op) == SLJIT_ROTL)
1551 				src2 = word_size - src2;
1552 			return push_inst(compiler, INST(ROTRI, op) | RD(dst) | RJ(src1) | IMM_I12(src2));
1553 
1554 		}
1555 
1556 		if (src2 == TMP_ZERO) {
1557 			if (dst != src1)
1558 				return push_inst(compiler, INST(ADDI, op) | RD(dst) | RJ(src1) | IMM_I12(0));
1559 			return SLJIT_SUCCESS;
1560 		}
1561 
1562 		if (GET_OPCODE(op) == SLJIT_ROTL) {
1563 			FAIL_IF(push_inst(compiler, INST(SUB, op)| RD(OTHER_FLAG) | RJ(TMP_ZERO) | RK(src2)));
1564 			src2 = OTHER_FLAG;
1565 		}
1566 		return push_inst(compiler, INST(ROTR, op) | RD(dst) | RJ(src1) | RK(src2));
1567 
1568 	default:
1569 		SLJIT_UNREACHABLE();
1570 		return SLJIT_SUCCESS;
1571 	}
1572 
1573 	if (flags & SRC2_IMM) {
1574 		if (op & SLJIT_SET_Z)
1575 			FAIL_IF(push_inst(compiler, op_imm | RD(EQUAL_FLAG) | RJ(src1) | IMM_I12(src2)));
1576 
1577 		if (flags & UNUSED_DEST)
1578 			return SLJIT_SUCCESS;
1579 		return push_inst(compiler, op_imm | RD(dst) | RJ(src1) | IMM_I12(src2));
1580 	}
1581 
1582 	if (op & SLJIT_SET_Z)
1583 		FAIL_IF(push_inst(compiler, op_reg | RD(EQUAL_FLAG) | RJ(src1) | RK(src2)));
1584 
1585 	if (flags & UNUSED_DEST)
1586 		return SLJIT_SUCCESS;
1587 	return push_inst(compiler, op_reg | RD(dst) | RJ(src1) | RK(src2));
1588 }
1589 
1590 #undef IMM_EXTEND
1591 
emit_op(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 flags,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1592 static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
1593 	sljit_s32 dst, sljit_sw dstw,
1594 	sljit_s32 src1, sljit_sw src1w,
1595 	sljit_s32 src2, sljit_sw src2w)
1596 {
1597 	/* arg1 goes to TMP_REG1 or src reg
1598 	   arg2 goes to TMP_REG2, imm or src reg
1599 	   TMP_REG3 can be used for caching
1600 	   result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
1601 	sljit_s32 dst_r = TMP_REG2;
1602 	sljit_s32 src1_r;
1603 	sljit_sw src2_r = 0;
1604 	sljit_s32 sugg_src2_r = TMP_REG2;
1605 
1606 	if (!(flags & ALT_KEEP_CACHE)) {
1607 		compiler->cache_arg = 0;
1608 		compiler->cache_argw = 0;
1609 	}
1610 
1611 	if (dst == 0) {
1612 		SLJIT_ASSERT(HAS_FLAGS(op));
1613 		flags |= UNUSED_DEST;
1614 		dst = TMP_REG2;
1615 	}
1616 	else if (FAST_IS_REG(dst)) {
1617 		dst_r = dst;
1618 		flags |= REG_DEST;
1619 		if (flags & MOVE_OP)
1620 			sugg_src2_r = dst_r;
1621 	}
1622 	else if ((dst & SLJIT_MEM) && !getput_arg_fast(compiler, flags | ARG_TEST, TMP_REG1, dst, dstw))
1623 		flags |= SLOW_DEST;
1624 
1625 	if (flags & IMM_OP) {
1626 		if (src2 == SLJIT_IMM && src2w != 0 && src2w <= I12_MAX && src2w >= I12_MIN) {
1627 			flags |= SRC2_IMM;
1628 			src2_r = src2w;
1629 		}
1630 		else if ((flags & CUMULATIVE_OP) && src1 == SLJIT_IMM && src1w != 0 && src1w <= I12_MAX && src1w >= I12_MIN) {
1631 			flags |= SRC2_IMM;
1632 			src2_r = src1w;
1633 
1634 			/* And swap arguments. */
1635 			src1 = src2;
1636 			src1w = src2w;
1637 			src2 = SLJIT_IMM;
1638 			/* src2w = src2_r unneeded. */
1639 		}
1640 	}
1641 
1642 	/* Source 1. */
1643 	if (FAST_IS_REG(src1)) {
1644 		src1_r = src1;
1645 		flags |= REG1_SOURCE;
1646 	}
1647 	else if (src1 == SLJIT_IMM) {
1648 		if (src1w) {
1649 			FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
1650 			src1_r = TMP_REG1;
1651 		}
1652 		else
1653 			src1_r = TMP_ZERO;
1654 	}
1655 	else {
1656 		if (getput_arg_fast(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w))
1657 			FAIL_IF(compiler->error);
1658 		else
1659 			flags |= SLOW_SRC1;
1660 		src1_r = TMP_REG1;
1661 	}
1662 
1663 	/* Source 2. */
1664 	if (FAST_IS_REG(src2)) {
1665 		src2_r = src2;
1666 		flags |= REG2_SOURCE;
1667 		if ((flags & (REG_DEST | MOVE_OP)) == MOVE_OP)
1668 			dst_r = (sljit_s32)src2_r;
1669 	}
1670 	else if (src2 == SLJIT_IMM) {
1671 		if (!(flags & SRC2_IMM)) {
1672 			if (src2w) {
1673 				FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w));
1674 				src2_r = sugg_src2_r;
1675 			}
1676 			else {
1677 				src2_r = TMP_ZERO;
1678 				if (flags & MOVE_OP) {
1679 					if (dst & SLJIT_MEM)
1680 						dst_r = 0;
1681 					else
1682 						op = SLJIT_MOV;
1683 				}
1684 			}
1685 		}
1686 	}
1687 	else {
1688 		if (getput_arg_fast(compiler, flags | LOAD_DATA, sugg_src2_r, src2, src2w))
1689 			FAIL_IF(compiler->error);
1690 		else
1691 			flags |= SLOW_SRC2;
1692 
1693 		src2_r = sugg_src2_r;
1694 	}
1695 
1696 	if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
1697 		SLJIT_ASSERT(src2_r == TMP_REG2);
1698 		if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
1699 			FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w));
1700 			FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
1701 		}
1702 		else {
1703 			FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
1704 			FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw));
1705 		}
1706 	}
1707 	else if (flags & SLOW_SRC1)
1708 		FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
1709 	else if (flags & SLOW_SRC2)
1710 		FAIL_IF(getput_arg(compiler, flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw));
1711 
1712 	FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
1713 
1714 	if (dst & SLJIT_MEM) {
1715 		if (!(flags & SLOW_DEST)) {
1716 			getput_arg_fast(compiler, flags, dst_r, dst, dstw);
1717 			return compiler->error;
1718 		}
1719 		return getput_arg(compiler, flags, dst_r, dst, dstw, 0, 0);
1720 	}
1721 
1722 	return SLJIT_SUCCESS;
1723 }
1724 
sljit_emit_op0(struct sljit_compiler * compiler,sljit_s32 op)1725 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
1726 {
1727 	CHECK_ERROR();
1728 	CHECK(check_sljit_emit_op0(compiler, op));
1729 
1730 	switch (GET_OPCODE(op)) {
1731 	case SLJIT_BREAKPOINT:
1732 		return push_inst(compiler, BREAK);
1733 	case SLJIT_NOP:
1734 		return push_inst(compiler, ANDI | RD(TMP_ZERO) | RJ(TMP_ZERO) | IMM_I12(0));
1735 	case SLJIT_LMUL_UW:
1736 		FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(SLJIT_R1) | IMM_I12(0)));
1737 		FAIL_IF(push_inst(compiler, MULH_DU | RD(SLJIT_R1) | RJ(SLJIT_R0) | RK(SLJIT_R1)));
1738 		return push_inst(compiler, MUL_D | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(TMP_REG1));
1739 	case SLJIT_LMUL_SW:
1740 		FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(SLJIT_R1) | IMM_I12(0)));
1741 		FAIL_IF(push_inst(compiler, MULH_D | RD(SLJIT_R1) | RJ(SLJIT_R0) | RK(SLJIT_R1)));
1742 		return push_inst(compiler, MUL_D | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(TMP_REG1));
1743 	case SLJIT_DIVMOD_UW:
1744 		FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(TMP_REG1) | RJ(SLJIT_R0) | IMM_I12(0)));
1745 		FAIL_IF(push_inst(compiler, ((op & SLJIT_32)? DIV_WU: DIV_DU) | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(SLJIT_R1)));
1746 		return push_inst(compiler, ((op & SLJIT_32)? MOD_WU: MOD_DU) | RD(SLJIT_R1) | RJ(TMP_REG1) | RK(SLJIT_R1));
1747 	case SLJIT_DIVMOD_SW:
1748 		FAIL_IF(push_inst(compiler, INST(ADDI, op) | RD(TMP_REG1) | RJ(SLJIT_R0) | IMM_I12(0)));
1749 		FAIL_IF(push_inst(compiler, INST(DIV, op) | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(SLJIT_R1)));
1750 		return push_inst(compiler, INST(MOD, op) | RD(SLJIT_R1) | RJ(TMP_REG1) | RK(SLJIT_R1));
1751 	case SLJIT_DIV_UW:
1752 		return push_inst(compiler, ((op & SLJIT_32)? DIV_WU: DIV_DU) | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(SLJIT_R1));
1753 	case SLJIT_DIV_SW:
1754 		return push_inst(compiler, INST(DIV, op) | RD(SLJIT_R0) | RJ(SLJIT_R0) | RK(SLJIT_R1));
1755 	case SLJIT_ENDBR:
1756 	case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
1757 		return SLJIT_SUCCESS;
1758 	}
1759 
1760 	SLJIT_UNREACHABLE();
1761 	return SLJIT_ERR_UNSUPPORTED;
1762 }
1763 
sljit_emit_op1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1764 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1765 	sljit_s32 dst, sljit_sw dstw,
1766 	sljit_s32 src, sljit_sw srcw)
1767 {
1768 	sljit_s32 flags = 0;
1769 
1770 	CHECK_ERROR();
1771 	CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1772 	ADJUST_LOCAL_OFFSET(dst, dstw);
1773 	ADJUST_LOCAL_OFFSET(src, srcw);
1774 
1775 	if (op & SLJIT_32)
1776 		flags = INT_DATA | SIGNED_DATA;
1777 
1778 	switch (GET_OPCODE(op)) {
1779 	case SLJIT_MOV:
1780 	case SLJIT_MOV_P:
1781 		return emit_op(compiler, SLJIT_MOV, WORD_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, srcw);
1782 
1783 	case SLJIT_MOV_U32:
1784 		return emit_op(compiler, SLJIT_MOV_U32, INT_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u32)srcw : srcw);
1785 
1786 	case SLJIT_MOV_S32:
1787 	/* Logical operators have no W variant, so sign extended input is necessary for them. */
1788 	case SLJIT_MOV32:
1789 		return emit_op(compiler, SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s32)srcw : srcw);
1790 
1791 	case SLJIT_MOV_U8:
1792 		return emit_op(compiler, op, BYTE_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u8)srcw : srcw);
1793 
1794 	case SLJIT_MOV_S8:
1795 		return emit_op(compiler, op, BYTE_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s8)srcw : srcw);
1796 
1797 	case SLJIT_MOV_U16:
1798 		return emit_op(compiler, op, HALF_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u16)srcw : srcw);
1799 
1800 	case SLJIT_MOV_S16:
1801 		return emit_op(compiler, op, HALF_DATA | SIGNED_DATA | MOVE_OP, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s16)srcw : srcw);
1802 
1803 	case SLJIT_CLZ:
1804 	case SLJIT_CTZ:
1805 	case SLJIT_REV:
1806 		return emit_op(compiler, op, flags, dst, dstw, TMP_REG1, 0, src, srcw);
1807 
1808 	case SLJIT_REV_U16:
1809 	case SLJIT_REV_S16:
1810 		return emit_op(compiler, op, HALF_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
1811 
1812 	case SLJIT_REV_U32:
1813 	case SLJIT_REV_S32:
1814 		return emit_op(compiler, op | SLJIT_32, INT_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
1815 	}
1816 
1817 	SLJIT_UNREACHABLE();
1818 	return SLJIT_SUCCESS;
1819 }
1820 
sljit_emit_op2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1821 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
1822 	sljit_s32 dst, sljit_sw dstw,
1823 	sljit_s32 src1, sljit_sw src1w,
1824 	sljit_s32 src2, sljit_sw src2w)
1825 {
1826 	sljit_s32 flags = 0;
1827 
1828 	CHECK_ERROR();
1829 	CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
1830 	ADJUST_LOCAL_OFFSET(dst, dstw);
1831 	ADJUST_LOCAL_OFFSET(src1, src1w);
1832 	ADJUST_LOCAL_OFFSET(src2, src2w);
1833 
1834 	if (op & SLJIT_32) {
1835 		flags |= INT_DATA | SIGNED_DATA;
1836 		if (src1 == SLJIT_IMM)
1837 			src1w = (sljit_s32)src1w;
1838 		if (src2 == SLJIT_IMM)
1839 			src2w = (sljit_s32)src2w;
1840 	}
1841 
1842 
1843 	switch (GET_OPCODE(op)) {
1844 	case SLJIT_ADD:
1845 	case SLJIT_ADDC:
1846 		compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
1847 		return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
1848 
1849 	case SLJIT_SUB:
1850 	case SLJIT_SUBC:
1851 		compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
1852 		return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
1853 
1854 	case SLJIT_MUL:
1855 		compiler->status_flags_state = 0;
1856 		return emit_op(compiler, op, flags | CUMULATIVE_OP, dst, dstw, src1, src1w, src2, src2w);
1857 
1858 	case SLJIT_AND:
1859 	case SLJIT_OR:
1860 	case SLJIT_XOR:
1861 		return emit_op(compiler, op, flags | CUMULATIVE_OP | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
1862 
1863 	case SLJIT_SHL:
1864 	case SLJIT_MSHL:
1865 	case SLJIT_LSHR:
1866 	case SLJIT_MLSHR:
1867 	case SLJIT_ASHR:
1868 	case SLJIT_MASHR:
1869 	case SLJIT_ROTL:
1870 	case SLJIT_ROTR:
1871 		if (src2 == SLJIT_IMM) {
1872 			if (op & SLJIT_32)
1873 				src2w &= 0x1f;
1874 			else
1875 				src2w &= 0x3f;
1876 		}
1877 
1878 		return emit_op(compiler, op, flags | IMM_OP, dst, dstw, src1, src1w, src2, src2w);
1879 	}
1880 
1881 	SLJIT_UNREACHABLE();
1882 	return SLJIT_SUCCESS;
1883 }
1884 
sljit_emit_op2u(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1885 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
1886 	sljit_s32 src1, sljit_sw src1w,
1887 	sljit_s32 src2, sljit_sw src2w)
1888 {
1889 	CHECK_ERROR();
1890 	CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
1891 
1892 	SLJIT_SKIP_CHECKS(compiler);
1893 	return sljit_emit_op2(compiler, op, 0, 0, src1, src1w, src2, src2w);
1894 }
1895 
sljit_emit_shift_into(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 src1_reg,sljit_s32 src2_reg,sljit_s32 src3,sljit_sw src3w)1896 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
1897 	sljit_s32 dst_reg,
1898 	sljit_s32 src1_reg,
1899 	sljit_s32 src2_reg,
1900 	sljit_s32 src3, sljit_sw src3w)
1901 {
1902 	sljit_s32 is_left;
1903 	sljit_ins ins1, ins2, ins3;
1904 	sljit_s32 inp_flags = ((op & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA;
1905 	sljit_sw bit_length = (op & SLJIT_32) ? 32 : 64;
1906 
1907 
1908 	CHECK_ERROR();
1909 	CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
1910 
1911 	is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL);
1912 
1913 	if (src1_reg == src2_reg) {
1914 		SLJIT_SKIP_CHECKS(compiler);
1915 		return sljit_emit_op2(compiler, (is_left ? SLJIT_ROTL : SLJIT_ROTR) | (op & SLJIT_32), dst_reg, 0, src1_reg, 0, src3, src3w);
1916 	}
1917 
1918 	ADJUST_LOCAL_OFFSET(src3, src3w);
1919 
1920 	if (src3 == SLJIT_IMM) {
1921 		src3w &= bit_length - 1;
1922 
1923 		if (src3w == 0)
1924 			return SLJIT_SUCCESS;
1925 
1926 		if (is_left) {
1927 			ins1 = INST(SLLI, op) | IMM_I12(src3w);
1928 			src3w = bit_length - src3w;
1929 			ins2 = INST(SRLI, op) | IMM_I12(src3w);
1930 		} else {
1931 			ins1 = INST(SRLI, op) | IMM_I12(src3w);
1932 			src3w = bit_length - src3w;
1933 			ins2 = INST(SLLI, op) | IMM_I12(src3w);
1934 		}
1935 
1936 		FAIL_IF(push_inst(compiler, ins1 | RD(dst_reg) | RJ(src1_reg)));
1937 		FAIL_IF(push_inst(compiler, ins2 | RD(TMP_REG1) | RJ(src2_reg)));
1938 		return push_inst(compiler, OR | RD(dst_reg) | RJ(dst_reg) | RK(TMP_REG1));
1939 	}
1940 
1941 	if (src3 & SLJIT_MEM) {
1942 		FAIL_IF(emit_op_mem(compiler, inp_flags, TMP_REG2, src3, src3w));
1943 		src3 = TMP_REG2;
1944 	} else if (dst_reg == src3) {
1945 		push_inst(compiler, INST(ADDI, op) | RD(TMP_REG2) | RJ(src3) | IMM_I12(0));
1946 		src3 = TMP_REG2;
1947 	}
1948 
1949 	if (is_left) {
1950 		ins1 = INST(SLL, op);
1951 		ins2 = INST(SRLI, op);
1952 		ins3 = INST(SRL, op);
1953 	} else {
1954 		ins1 = INST(SRL, op);
1955 		ins2 = INST(SLLI, op);
1956 		ins3 = INST(SLL, op);
1957 	}
1958 
1959 	FAIL_IF(push_inst(compiler, ins1 | RD(dst_reg) | RJ(src1_reg) | RK(src3)));
1960 
1961 	if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) {
1962 		FAIL_IF(push_inst(compiler, ins2 | RD(TMP_REG1) | RJ(src2_reg) | IMM_I12(1)));
1963 		FAIL_IF(push_inst(compiler, XORI | RD(TMP_REG2) | RJ(src3) | IMM_I12((sljit_ins)bit_length - 1)));
1964 		src2_reg = TMP_REG1;
1965 	} else
1966 		FAIL_IF(push_inst(compiler, INST(SUB, op) | RD(TMP_REG2) | RJ(TMP_ZERO) | RK(src3)));
1967 
1968 	FAIL_IF(push_inst(compiler, ins3 | RD(TMP_REG1) | RJ(src2_reg) | RK(TMP_REG2)));
1969 	return push_inst(compiler, OR | RD(dst_reg) | RJ(dst_reg) | RK(TMP_REG1));
1970 }
1971 
sljit_emit_op_src(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)1972 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
1973 	sljit_s32 src, sljit_sw srcw)
1974 {
1975 	sljit_s32 base = src & REG_MASK;
1976 
1977 	CHECK_ERROR();
1978 	CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
1979 	ADJUST_LOCAL_OFFSET(src, srcw);
1980 
1981 	switch (op) {
1982 	case SLJIT_FAST_RETURN:
1983 		if (FAST_IS_REG(src))
1984 			FAIL_IF(push_inst(compiler, ADDI_D | RD(RETURN_ADDR_REG) | RJ(src) | IMM_I12(0)));
1985 		else
1986 			FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, RETURN_ADDR_REG, src, srcw));
1987 
1988 		return push_inst(compiler, JIRL | RD(TMP_ZERO) | RJ(RETURN_ADDR_REG) | IMM_I12(0));
1989 	case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
1990 		return SLJIT_SUCCESS;
1991 	case SLJIT_PREFETCH_L1:
1992 	case SLJIT_PREFETCH_L2:
1993 	case SLJIT_PREFETCH_L3:
1994 	case SLJIT_PREFETCH_ONCE:
1995 		if (SLJIT_UNLIKELY(src & OFFS_REG_MASK)) {
1996 			srcw &= 0x3;
1997 			if (SLJIT_UNLIKELY(srcw))
1998 				FAIL_IF(push_inst(compiler, SLLI_D | RD(TMP_REG1) | RJ(OFFS_REG(src)) | IMM_I12(srcw)));
1999 			FAIL_IF(push_inst(compiler, ADD_D | RD(TMP_REG1) | RJ(base) | RK(TMP_REG1)));
2000 		} else {
2001 			if (base && srcw <= I12_MAX && srcw >= I12_MIN)
2002 				return push_inst(compiler,PRELD | RJ(base) | IMM_I12(srcw));
2003 
2004 			FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
2005 			if (base != 0)
2006 				FAIL_IF(push_inst(compiler, ADD_D | RD(TMP_REG1) | RJ(base) | RK(TMP_REG1)));
2007 		}
2008 		return push_inst(compiler, PRELD | RD(0) | RJ(TMP_REG1));
2009 	}
2010 	return SLJIT_SUCCESS;
2011 }
2012 
sljit_emit_op_dst(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw)2013 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
2014 	sljit_s32 dst, sljit_sw dstw)
2015 {
2016 	sljit_s32 dst_r;
2017 
2018 	CHECK_ERROR();
2019 	CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
2020 	ADJUST_LOCAL_OFFSET(dst, dstw);
2021 
2022 	switch (op) {
2023 	case SLJIT_FAST_ENTER:
2024 		if (FAST_IS_REG(dst))
2025 			return push_inst(compiler, ADDI_D | RD(dst) | RJ(RETURN_ADDR_REG) | IMM_I12(0));
2026 
2027 		SLJIT_ASSERT(RETURN_ADDR_REG == TMP_REG2);
2028 		break;
2029 	case SLJIT_GET_RETURN_ADDRESS:
2030 		dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
2031 		FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size - SSIZE_OF(sw)));
2032 		break;
2033 	}
2034 
2035 	if (dst & SLJIT_MEM)
2036 		return emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw);
2037 
2038 	return SLJIT_SUCCESS;
2039 }
2040 
sljit_get_register_index(sljit_s32 type,sljit_s32 reg)2041 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
2042 {
2043 	CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
2044 
2045 	if (type == SLJIT_GP_REGISTER)
2046 		return reg_map[reg];
2047 
2048 	if (type != SLJIT_FLOAT_REGISTER)
2049 		return -1;
2050 
2051 	return freg_map[reg];
2052 }
2053 
sljit_emit_op_custom(struct sljit_compiler * compiler,void * instruction,sljit_u32 size)2054 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2055 	void *instruction, sljit_u32 size)
2056 {
2057 	SLJIT_UNUSED_ARG(size);
2058 	CHECK_ERROR();
2059 	CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2060 
2061 	return push_inst(compiler, *(sljit_ins*)instruction);
2062 }
2063 
2064 /* --------------------------------------------------------------------- */
2065 /*  Floating point operators                                             */
2066 /* --------------------------------------------------------------------- */
2067 #define SET_COND(cond) (sljit_ins)(cond << 15)
2068 
2069 #define COND_CUN SET_COND(0x8)	 /* UN */
2070 #define COND_CEQ SET_COND(0x4)	 /* EQ */
2071 #define COND_CUEQ SET_COND(0xc)	 /* UN EQ */
2072 #define COND_CLT SET_COND(0x2)	 /* LT */
2073 #define COND_CULT SET_COND(0xa)	 /* UN LT */
2074 #define COND_CLE SET_COND(0x6)	 /* LT EQ */
2075 #define COND_CULE SET_COND(0xe)	 /* UN LT EQ */
2076 #define COND_CNE SET_COND(0x10)	 /* GT LT */
2077 #define COND_CUNE SET_COND(0x18) /* UN GT LT */
2078 #define COND_COR SET_COND(0x14)	 /* GT LT EQ */
2079 
2080 #define FINST(inst, type) (sljit_ins)((type & SLJIT_32) ? inst##_S : inst##_D)
2081 #define FCD(cd) (sljit_ins)(cd & 0x7)
2082 #define FCJ(cj) (sljit_ins)((cj & 0x7) << 5)
2083 #define FCA(ca) (sljit_ins)((ca & 0x7) << 15)
2084 #define F_OTHER_FLAG 1
2085 
2086 #define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_32) >> 7))
2087 
2088 /* convert to inter exact toward zero */
sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2089 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
2090 	sljit_s32 dst, sljit_sw dstw,
2091 	sljit_s32 src, sljit_sw srcw)
2092 {
2093 	sljit_ins inst;
2094 	sljit_u32 word_data = 0;
2095 	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
2096 
2097 	switch (GET_OPCODE(op))
2098 	{
2099 	case SLJIT_CONV_SW_FROM_F64:
2100 		word_data = 1;
2101 		inst = FINST(FTINTRZ_L, op);
2102 		break;
2103 	case SLJIT_CONV_S32_FROM_F64:
2104 		inst = FINST(FTINTRZ_W, op);
2105 		break;
2106 	default:
2107 		inst = BREAK;
2108 		SLJIT_UNREACHABLE();
2109 	}
2110 
2111 	if (src & SLJIT_MEM) {
2112 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
2113 		src = TMP_FREG1;
2114 	}
2115 
2116 	FAIL_IF(push_inst(compiler, inst | FRD(TMP_FREG1) | FRJ(src)));
2117 	FAIL_IF(push_inst(compiler, FINST(MOVFR2GR, word_data) | RD(dst_r) | FRJ(TMP_FREG1)));
2118 
2119 	if (dst & SLJIT_MEM)
2120 		return emit_op_mem2(compiler, word_data ? WORD_DATA : INT_DATA, TMP_REG2, dst, dstw, 0, 0);
2121 	return SLJIT_SUCCESS;
2122 }
2123 
sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2124 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_s32 op,
2125 	sljit_s32 dst, sljit_sw dstw,
2126 	sljit_s32 src, sljit_sw srcw)
2127 {
2128 	sljit_ins inst;
2129 	sljit_u32 word_data = 0;
2130 	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2131 
2132 	switch (GET_OPCODE(op))
2133 	{
2134 	case SLJIT_CONV_F64_FROM_SW:
2135 		word_data = 1;
2136 		inst = (sljit_ins)((op & SLJIT_32) ? FFINT_S_L : FFINT_D_L);
2137 		break;
2138 	case SLJIT_CONV_F64_FROM_S32:
2139 		inst = (sljit_ins)((op & SLJIT_32) ? FFINT_S_W : FFINT_D_W);
2140 		break;
2141 	default:
2142 		inst = BREAK;
2143 		SLJIT_UNREACHABLE();
2144 	}
2145 
2146 	if (src & SLJIT_MEM) {
2147 		FAIL_IF(emit_op_mem2(compiler, (word_data ? WORD_DATA : INT_DATA) | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
2148 		src = TMP_REG1;
2149 	} else if (src == SLJIT_IMM) {
2150 		if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
2151 			srcw = (sljit_s32)srcw;
2152 
2153 		FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
2154 		src = TMP_REG1;
2155 	}
2156 	FAIL_IF(push_inst(compiler, (word_data ? MOVGR2FR_D : MOVGR2FR_W) | FRD(dst_r) | RJ(src)));
2157 	FAIL_IF(push_inst(compiler, inst | FRD(dst_r) | FRJ(dst_r)));
2158 
2159 	if (dst & SLJIT_MEM)
2160 		return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
2161 	return SLJIT_SUCCESS;
2162 }
2163 
sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2164 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
2165 	sljit_s32 dst, sljit_sw dstw,
2166 	sljit_s32 src, sljit_sw srcw)
2167 {
2168 	return sljit_emit_fop1_conv_f64_from_w(compiler, op, dst, dstw, src, srcw);
2169 }
2170 
sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2171 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
2172 	sljit_s32 dst, sljit_sw dstw,
2173 	sljit_s32 src, sljit_sw srcw)
2174 {
2175 	sljit_ins inst;
2176 	sljit_u32 word_data = 0;
2177 	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2178 
2179 	switch (GET_OPCODE(op))
2180 	{
2181 	case SLJIT_CONV_F64_FROM_UW:
2182 		word_data = 1;
2183 		inst = (sljit_ins)((op & SLJIT_32) ? FFINT_S_L : FFINT_D_L);
2184 		break;
2185 	case SLJIT_CONV_F64_FROM_U32:
2186 		inst = (sljit_ins)((op & SLJIT_32) ? FFINT_S_W : FFINT_D_W);
2187 		break;
2188 	default:
2189 		inst = BREAK;
2190 		SLJIT_UNREACHABLE();
2191 	}
2192 
2193 	if (src & SLJIT_MEM) {
2194 		FAIL_IF(emit_op_mem2(compiler, (word_data ? WORD_DATA : INT_DATA) | LOAD_DATA, TMP_REG1, src, srcw, dst, dstw));
2195 		src = TMP_REG1;
2196 	} else if (src == SLJIT_IMM) {
2197 		if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32)
2198 			srcw = (sljit_u32)srcw;
2199 
2200 		FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
2201 		src = TMP_REG1;
2202 	}
2203 
2204 	if (!word_data)
2205 		FAIL_IF(push_inst(compiler, SRLI_W | RD(src) | RJ(src) | IMM_I12(0)));
2206 
2207 	FAIL_IF(push_inst(compiler, BLT | RJ(src) | RD(TMP_ZERO) | IMM_I16(4)));
2208 
2209 	FAIL_IF(push_inst(compiler, (word_data ? MOVGR2FR_D : MOVGR2FR_W) | FRD(dst_r) | RJ(src)));
2210 	FAIL_IF(push_inst(compiler, inst | FRD(dst_r) | FRJ(dst_r)));
2211 	FAIL_IF(push_inst(compiler, B | IMM_I26(7)));
2212 
2213 	FAIL_IF(push_inst(compiler, ANDI | RD(TMP_REG2) | RJ(src) | IMM_I12(1)));
2214 	FAIL_IF(push_inst(compiler, (word_data ? SRLI_D : SRLI_W) | RD(TMP_REG1) | RJ(src) | IMM_I12(1)));
2215 	FAIL_IF(push_inst(compiler, OR | RD(TMP_REG1) | RJ(TMP_REG1) | RK(TMP_REG2)));
2216 	FAIL_IF(push_inst(compiler, INST(MOVGR2FR, (!word_data)) | FRD(dst_r) | RJ(TMP_REG1)));
2217 	FAIL_IF(push_inst(compiler, inst | FRD(dst_r) | FRJ(dst_r)));
2218 	FAIL_IF(push_inst(compiler, FINST(FADD, op) | FRD(dst_r) | FRJ(dst_r) | FRK(dst_r)));
2219 
2220 	if (dst & SLJIT_MEM)
2221 		return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
2222 	return SLJIT_SUCCESS;
2223 }
2224 
sljit_emit_fop1_cmp(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2225 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
2226 	sljit_s32 src1, sljit_sw src1w,
2227 	sljit_s32 src2, sljit_sw src2w)
2228 {
2229 	if (src1 & SLJIT_MEM) {
2230 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
2231 		src1 = TMP_FREG1;
2232 	}
2233 
2234 	if (src2 & SLJIT_MEM) {
2235 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0));
2236 		src2 = TMP_FREG2;
2237 	}
2238 
2239 	FAIL_IF(push_inst(compiler, XOR | RD(OTHER_FLAG) | RJ(OTHER_FLAG) | RK(OTHER_FLAG)));
2240 
2241 	switch (GET_FLAG_TYPE(op)) {
2242 	case SLJIT_F_EQUAL:
2243 	case SLJIT_ORDERED_EQUAL:
2244 		FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CEQ | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2)));
2245 		break;
2246 	case SLJIT_F_LESS:
2247 	case SLJIT_ORDERED_LESS:
2248 		FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CLT | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2)));
2249 		break;
2250 	case SLJIT_F_GREATER:
2251 	case SLJIT_ORDERED_GREATER:
2252 		FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CLT | FCD(F_OTHER_FLAG) | FRJ(src2) | FRK(src1)));
2253 		break;
2254 	case SLJIT_UNORDERED_OR_GREATER:
2255 		FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CULT | FCD(F_OTHER_FLAG) | FRJ(src2) | FRK(src1)));
2256 		break;
2257 	case SLJIT_UNORDERED_OR_LESS:
2258 		FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CULT | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2)));
2259 		break;
2260 	case SLJIT_UNORDERED_OR_EQUAL:
2261 		FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CUEQ | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2)));
2262 		break;
2263 	default: /* SLJIT_UNORDERED */
2264 		FAIL_IF(push_inst(compiler, FINST(FCMP_COND, op) | COND_CUN | FCD(F_OTHER_FLAG) | FRJ(src1) | FRK(src2)));
2265 	}
2266 	return push_inst(compiler, MOVCF2GR | RD(OTHER_FLAG) | FCJ(F_OTHER_FLAG));
2267 }
2268 
sljit_emit_fop1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2269 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
2270 	sljit_s32 dst, sljit_sw dstw,
2271 	sljit_s32 src, sljit_sw srcw)
2272 {
2273 	sljit_s32 dst_r;
2274 
2275 	CHECK_ERROR();
2276 	compiler->cache_arg = 0;
2277 	compiler->cache_argw = 0;
2278 
2279 	SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100) && !(DOUBLE_DATA & 0x2), float_transfer_bit_error);
2280 	SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2281 
2282 	if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32)
2283 		op ^= SLJIT_32;
2284 
2285 	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2286 
2287 	if (src & SLJIT_MEM) {
2288 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw));
2289 		src = dst_r;
2290 	}
2291 
2292 	switch (GET_OPCODE(op)) {
2293 	case SLJIT_MOV_F64:
2294 		if (src != dst_r) {
2295 			if (dst_r != TMP_FREG1)
2296 				FAIL_IF(push_inst(compiler, FINST(FMOV, op) | FRD(dst_r) | FRJ(src)));
2297 			else
2298 				dst_r = src;
2299 		}
2300 		break;
2301 	case SLJIT_NEG_F64:
2302 		FAIL_IF(push_inst(compiler, FINST(FNEG, op) | FRD(dst_r) | FRJ(src)));
2303 		break;
2304 	case SLJIT_ABS_F64:
2305 		FAIL_IF(push_inst(compiler, FINST(FABS, op) | FRD(dst_r) | FRJ(src)));
2306 		break;
2307 	case SLJIT_CONV_F64_FROM_F32:
2308 		/* The SLJIT_32 bit is inverted because sljit_f32 needs to be loaded from the memory. */
2309 		FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? FCVT_D_S : FCVT_S_D) | FRD(dst_r) | FRJ(src)));
2310 		op ^= SLJIT_32;
2311 		break;
2312 	}
2313 
2314 	if (dst & SLJIT_MEM)
2315 		return emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0);
2316 	return SLJIT_SUCCESS;
2317 }
2318 
sljit_emit_fop2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2319 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
2320 	sljit_s32 dst, sljit_sw dstw,
2321 	sljit_s32 src1, sljit_sw src1w,
2322 	sljit_s32 src2, sljit_sw src2w)
2323 {
2324 	sljit_s32 dst_r, flags = 0;
2325 
2326 	CHECK_ERROR();
2327 	CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2328 	ADJUST_LOCAL_OFFSET(dst, dstw);
2329 	ADJUST_LOCAL_OFFSET(src1, src1w);
2330 	ADJUST_LOCAL_OFFSET(src2, src2w);
2331 
2332 	compiler->cache_arg = 0;
2333 	compiler->cache_argw = 0;
2334 
2335 	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2;
2336 
2337 	if (src1 & SLJIT_MEM) {
2338 		if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) {
2339 			FAIL_IF(compiler->error);
2340 			src1 = TMP_FREG1;
2341 		} else
2342 			flags |= SLOW_SRC1;
2343 	}
2344 
2345 	if (src2 & SLJIT_MEM) {
2346 		if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) {
2347 			FAIL_IF(compiler->error);
2348 			src2 = TMP_FREG2;
2349 		} else
2350 			flags |= SLOW_SRC2;
2351 	}
2352 
2353 	if ((flags & (SLOW_SRC1 | SLOW_SRC2)) == (SLOW_SRC1 | SLOW_SRC2)) {
2354 		if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
2355 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w));
2356 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
2357 		}
2358 		else {
2359 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
2360 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
2361 		}
2362 	}
2363 	else if (flags & SLOW_SRC1)
2364 		FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
2365 	else if (flags & SLOW_SRC2)
2366 		FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
2367 
2368 	if (flags & SLOW_SRC1)
2369 		src1 = TMP_FREG1;
2370 	if (flags & SLOW_SRC2)
2371 		src2 = TMP_FREG2;
2372 
2373 	switch (GET_OPCODE(op)) {
2374 	case SLJIT_ADD_F64:
2375 		FAIL_IF(push_inst(compiler, FINST(FADD, op) | FRD(dst_r) | FRJ(src1) | FRK(src2)));
2376 		break;
2377 	case SLJIT_SUB_F64:
2378 		FAIL_IF(push_inst(compiler, FINST(FSUB, op) | FRD(dst_r) | FRJ(src1) | FRK(src2)));
2379 		break;
2380 	case SLJIT_MUL_F64:
2381 		FAIL_IF(push_inst(compiler, FINST(FMUL, op) | FRD(dst_r) | FRJ(src1) | FRK(src2)));
2382 		break;
2383 	case SLJIT_DIV_F64:
2384 		FAIL_IF(push_inst(compiler, FINST(FDIV, op) | FRD(dst_r) | FRJ(src1) | FRK(src2)));
2385 		break;
2386 	}
2387 
2388 	if (dst_r == TMP_FREG2)
2389 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0));
2390 	return SLJIT_SUCCESS;
2391 }
2392 
sljit_emit_fop2r(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_freg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2393 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op,
2394 	sljit_s32 dst_freg,
2395 	sljit_s32 src1, sljit_sw src1w,
2396 	sljit_s32 src2, sljit_sw src2w)
2397 {
2398 	sljit_s32 reg;
2399 
2400 	CHECK_ERROR();
2401 	CHECK(check_sljit_emit_fop2r(compiler, op, dst_freg, src1, src1w, src2, src2w));
2402 	ADJUST_LOCAL_OFFSET(src1, src1w);
2403 	ADJUST_LOCAL_OFFSET(src2, src2w);
2404 
2405 	if (src2 & SLJIT_MEM) {
2406 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src2, src2w, 0, 0));
2407 		src2 = TMP_FREG1;
2408 	}
2409 
2410 	if (src1 & SLJIT_MEM) {
2411 		reg = (dst_freg == src2) ? TMP_FREG1 : dst_freg;
2412 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, reg, src1, src1w, 0, 0));
2413 		src1 = reg;
2414 	}
2415 
2416 	return push_inst(compiler, FINST(FCOPYSIGN, op) | FRD(dst_freg) | FRJ(src1) | FRK(src2));
2417 }
2418 
sljit_emit_fset32(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f32 value)2419 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
2420 	sljit_s32 freg, sljit_f32 value)
2421 {
2422 	union {
2423 		sljit_s32 imm;
2424 		sljit_f32 value;
2425 	} u;
2426 
2427 	CHECK_ERROR();
2428 	CHECK(check_sljit_emit_fset32(compiler, freg, value));
2429 
2430 	u.value = value;
2431 
2432 	if (u.imm == 0)
2433 		return push_inst(compiler, MOVGR2FR_W | RJ(TMP_ZERO) | FRD(freg));
2434 
2435 	FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm));
2436 	return push_inst(compiler, MOVGR2FR_W | RJ(TMP_REG1) | FRD(freg));
2437 }
2438 
sljit_emit_fset64(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f64 value)2439 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
2440 	sljit_s32 freg, sljit_f64 value)
2441 {
2442 	union {
2443 		sljit_sw imm;
2444 		sljit_f64 value;
2445 	} u;
2446 
2447 	CHECK_ERROR();
2448 	CHECK(check_sljit_emit_fset64(compiler, freg, value));
2449 
2450 	u.value = value;
2451 
2452 	if (u.imm == 0)
2453 		return push_inst(compiler, MOVGR2FR_D | RJ(TMP_ZERO) | FRD(freg));
2454 
2455 	FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm));
2456 	return push_inst(compiler, MOVGR2FR_D | RJ(TMP_REG1) | FRD(freg));
2457 }
2458 
sljit_emit_fcopy(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 freg,sljit_s32 reg)2459 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
2460 	sljit_s32 freg, sljit_s32 reg)
2461 {
2462 	sljit_ins inst;
2463 
2464 	CHECK_ERROR();
2465 	CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
2466 
2467 	if (GET_OPCODE(op) == SLJIT_COPY_TO_F64)
2468 		inst = ((op & SLJIT_32) ? MOVGR2FR_W : MOVGR2FR_D) | FRD(freg) | RJ(reg);
2469 	else
2470 		inst = ((op & SLJIT_32) ? MOVFR2GR_S : MOVFR2GR_D) | RD(reg) | FRJ(freg);
2471 	return push_inst(compiler, inst);
2472 }
2473 
2474 /* --------------------------------------------------------------------- */
2475 /*  Conditional instructions                                             */
2476 /* --------------------------------------------------------------------- */
2477 
sljit_emit_label(struct sljit_compiler * compiler)2478 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2479 {
2480 	struct sljit_label *label;
2481 
2482 	CHECK_ERROR_PTR();
2483 	CHECK_PTR(check_sljit_emit_label(compiler));
2484 
2485 	if (compiler->last_label && compiler->last_label->size == compiler->size)
2486 		return compiler->last_label;
2487 
2488 	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2489 	PTR_FAIL_IF(!label);
2490 	set_label(label, compiler);
2491 	return label;
2492 }
2493 
get_jump_instruction(sljit_s32 type)2494 static sljit_ins get_jump_instruction(sljit_s32 type)
2495 {
2496 	switch (type) {
2497 	case SLJIT_EQUAL:
2498 	case SLJIT_ATOMIC_NOT_STORED:
2499 		return BNE | RJ(EQUAL_FLAG) | RD(TMP_ZERO);
2500 	case SLJIT_NOT_EQUAL:
2501 	case SLJIT_ATOMIC_STORED:
2502 		return BEQ | RJ(EQUAL_FLAG) | RD(TMP_ZERO);
2503 	case SLJIT_LESS:
2504 	case SLJIT_GREATER:
2505 	case SLJIT_SIG_LESS:
2506 	case SLJIT_SIG_GREATER:
2507 	case SLJIT_OVERFLOW:
2508 	case SLJIT_CARRY:
2509 		return BEQ | RJ(OTHER_FLAG) | RD(TMP_ZERO);
2510 	case SLJIT_GREATER_EQUAL:
2511 	case SLJIT_LESS_EQUAL:
2512 	case SLJIT_SIG_GREATER_EQUAL:
2513 	case SLJIT_SIG_LESS_EQUAL:
2514 	case SLJIT_NOT_OVERFLOW:
2515 	case SLJIT_NOT_CARRY:
2516 		return BNE | RJ(OTHER_FLAG) | RD(TMP_ZERO);
2517 	case SLJIT_F_EQUAL:
2518 	case SLJIT_ORDERED_EQUAL:
2519 	case SLJIT_F_LESS:
2520 	case SLJIT_ORDERED_LESS:
2521 	case SLJIT_ORDERED_GREATER:
2522 	case SLJIT_UNORDERED_OR_GREATER:
2523 	case SLJIT_F_GREATER:
2524 	case SLJIT_UNORDERED_OR_LESS:
2525 	case SLJIT_UNORDERED_OR_EQUAL:
2526 	case SLJIT_UNORDERED:
2527 		return BEQ | RJ(OTHER_FLAG) | RD(TMP_ZERO);
2528 	case SLJIT_ORDERED_NOT_EQUAL:
2529 	case SLJIT_ORDERED_LESS_EQUAL:
2530 	case SLJIT_ORDERED_GREATER_EQUAL:
2531 	case SLJIT_F_NOT_EQUAL:
2532 	case SLJIT_UNORDERED_OR_NOT_EQUAL:
2533 	case SLJIT_UNORDERED_OR_GREATER_EQUAL:
2534 	case SLJIT_UNORDERED_OR_LESS_EQUAL:
2535 	case SLJIT_F_LESS_EQUAL:
2536 	case SLJIT_F_GREATER_EQUAL:
2537 	case SLJIT_ORDERED:
2538 		return BNE | RJ(OTHER_FLAG) | RD(TMP_ZERO);
2539 	default:
2540 		/* Not conditional branch. */
2541 		return 0;
2542 	}
2543 }
2544 
sljit_emit_jump(struct sljit_compiler * compiler,sljit_s32 type)2545 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
2546 {
2547 	struct sljit_jump *jump;
2548 	sljit_ins inst;
2549 
2550 	CHECK_ERROR_PTR();
2551 	CHECK_PTR(check_sljit_emit_jump(compiler, type));
2552 
2553 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2554 	PTR_FAIL_IF(!jump);
2555 	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2556 	type &= 0xff;
2557 
2558 	inst = get_jump_instruction(type);
2559 
2560 	if (inst != 0) {
2561 		PTR_FAIL_IF(push_inst(compiler, inst));
2562 		jump->flags |= IS_COND;
2563 	}
2564 
2565 	jump->addr = compiler->size;
2566 	inst = JIRL | RJ(TMP_REG1) | IMM_I16(0);
2567 
2568 	if (type >= SLJIT_FAST_CALL) {
2569 		jump->flags |= IS_CALL;
2570 		inst |= RD(RETURN_ADDR_REG);
2571 	}
2572 
2573 	PTR_FAIL_IF(push_inst(compiler, inst));
2574 
2575 	/* Maximum number of instructions required for generating a constant. */
2576 	compiler->size += 3;
2577 
2578 	return jump;
2579 }
2580 
sljit_emit_call(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types)2581 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
2582 	sljit_s32 arg_types)
2583 {
2584 	SLJIT_UNUSED_ARG(arg_types);
2585 	CHECK_ERROR_PTR();
2586 	CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
2587 
2588 	if (type & SLJIT_CALL_RETURN) {
2589 		PTR_FAIL_IF(emit_stack_frame_release(compiler, 0));
2590 		type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
2591 	}
2592 
2593 	SLJIT_SKIP_CHECKS(compiler);
2594 	return sljit_emit_jump(compiler, type);
2595 }
2596 
sljit_emit_cmp(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2597 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_cmp(struct sljit_compiler *compiler, sljit_s32 type,
2598 	sljit_s32 src1, sljit_sw src1w,
2599 	sljit_s32 src2, sljit_sw src2w)
2600 {
2601 	struct sljit_jump *jump;
2602 	sljit_s32 flags;
2603 	sljit_ins inst;
2604 
2605 	CHECK_ERROR_PTR();
2606 	CHECK_PTR(check_sljit_emit_cmp(compiler, type, src1, src1w, src2, src2w));
2607 	ADJUST_LOCAL_OFFSET(src1, src1w);
2608 	ADJUST_LOCAL_OFFSET(src2, src2w);
2609 
2610 	compiler->cache_arg = 0;
2611 	compiler->cache_argw = 0;
2612 
2613 	flags = ((type & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA;
2614 
2615 	if (src1 & SLJIT_MEM) {
2616 		PTR_FAIL_IF(emit_op_mem2(compiler, flags, TMP_REG1, src1, src1w, src2, src2w));
2617 		src1 = TMP_REG1;
2618 	}
2619 
2620 	if (src2 & SLJIT_MEM) {
2621 		PTR_FAIL_IF(emit_op_mem2(compiler, flags, TMP_REG2, src2, src2w, 0, 0));
2622 		src2 = TMP_REG2;
2623 	}
2624 
2625 	if (src1 == SLJIT_IMM) {
2626 		if (src1w != 0) {
2627 			PTR_FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
2628 			src1 = TMP_REG1;
2629 		}
2630 		else
2631 			src1 = TMP_ZERO;
2632 	}
2633 
2634 	if (src2 == SLJIT_IMM) {
2635 		if (src2w != 0) {
2636 			PTR_FAIL_IF(load_immediate(compiler, TMP_REG2, src2w));
2637 			src2 = TMP_REG2;
2638 		}
2639 		else
2640 			src2 = TMP_ZERO;
2641 	}
2642 
2643 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2644 	PTR_FAIL_IF(!jump);
2645 	set_jump(jump, compiler, (sljit_u32)((type & SLJIT_REWRITABLE_JUMP) | IS_COND));
2646 	type &= 0xff;
2647 
2648 	switch (type) {
2649 	case SLJIT_EQUAL:
2650 		inst = BNE | RJ(src1) | RD(src2);
2651 		break;
2652 	case SLJIT_NOT_EQUAL:
2653 		inst = BEQ | RJ(src1) | RD(src2);
2654 		break;
2655 	case SLJIT_LESS:
2656 		inst = BGEU | RJ(src1) | RD(src2);
2657 		break;
2658 	case SLJIT_GREATER_EQUAL:
2659 		inst = BLTU | RJ(src1) | RD(src2);
2660 		break;
2661 	case SLJIT_GREATER:
2662 		inst = BGEU | RJ(src2) | RD(src1);
2663 		break;
2664 	case SLJIT_LESS_EQUAL:
2665 		inst = BLTU | RJ(src2) | RD(src1);
2666 		break;
2667 	case SLJIT_SIG_LESS:
2668 		inst = BGE | RJ(src1) | RD(src2);
2669 		break;
2670 	case SLJIT_SIG_GREATER_EQUAL:
2671 		inst = BLT | RJ(src1) | RD(src2);
2672 		break;
2673 	case SLJIT_SIG_GREATER:
2674 		inst = BGE | RJ(src2) | RD(src1);
2675 		break;
2676 	case SLJIT_SIG_LESS_EQUAL:
2677 		inst = BLT | RJ(src2) | RD(src1);
2678 		break;
2679 	default:
2680 		inst = BREAK;
2681 		SLJIT_UNREACHABLE();
2682 	}
2683 
2684 	PTR_FAIL_IF(push_inst(compiler, inst));
2685 
2686 	jump->addr = compiler->size;
2687 	PTR_FAIL_IF(push_inst(compiler, JIRL | RD(TMP_ZERO) | RJ(TMP_REG1) | IMM_I12(0)));
2688 
2689 	/* Maximum number of instructions required for generating a constant. */
2690 	compiler->size += 3;
2691 
2692 	return jump;
2693 }
2694 
sljit_emit_ijump(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 src,sljit_sw srcw)2695 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
2696 {
2697 	struct sljit_jump *jump;
2698 
2699 	CHECK_ERROR();
2700 	CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
2701 
2702 	if (src != SLJIT_IMM) {
2703 		if (src & SLJIT_MEM) {
2704 			ADJUST_LOCAL_OFFSET(src, srcw);
2705 			FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw));
2706 			src = TMP_REG1;
2707 		}
2708 		return push_inst(compiler, JIRL | RD((type >= SLJIT_FAST_CALL) ? RETURN_ADDR_REG : TMP_ZERO) | RJ(src) | IMM_I12(0));
2709 	}
2710 
2711 	/* These jumps are converted to jump/call instructions when possible. */
2712 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2713 	FAIL_IF(!jump);
2714 	set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_CALL : 0));
2715 	jump->u.target = (sljit_uw)srcw;
2716 
2717 	jump->addr = compiler->size;
2718 	FAIL_IF(push_inst(compiler, JIRL | RD((type >= SLJIT_FAST_CALL) ? RETURN_ADDR_REG : TMP_ZERO) | RJ(TMP_REG1) | IMM_I12(0)));
2719 
2720 	/* Maximum number of instructions required for generating a constant. */
2721 	compiler->size += 3;
2722 
2723 	return SLJIT_SUCCESS;
2724 }
2725 
sljit_emit_icall(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)2726 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
2727 	sljit_s32 arg_types,
2728 	sljit_s32 src, sljit_sw srcw)
2729 {
2730 	SLJIT_UNUSED_ARG(arg_types);
2731 	CHECK_ERROR();
2732 	CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
2733 
2734 	if (src & SLJIT_MEM) {
2735 		ADJUST_LOCAL_OFFSET(src, srcw);
2736 		FAIL_IF(emit_op_mem(compiler, WORD_DATA | LOAD_DATA, TMP_REG1, src, srcw));
2737 		src = TMP_REG1;
2738 	}
2739 
2740 	if (type & SLJIT_CALL_RETURN) {
2741 		if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
2742 			FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG1) | RJ(src) | IMM_I12(0)));
2743 			src = TMP_REG1;
2744 		}
2745 
2746 		FAIL_IF(emit_stack_frame_release(compiler, 0));
2747 		type = SLJIT_JUMP;
2748 	}
2749 
2750 	SLJIT_SKIP_CHECKS(compiler);
2751 	return sljit_emit_ijump(compiler, type, src, srcw);
2752 }
2753 
sljit_emit_op_flags(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 type)2754 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
2755 	sljit_s32 dst, sljit_sw dstw,
2756 	sljit_s32 type)
2757 {
2758 	sljit_s32 src_r, dst_r, invert;
2759 	sljit_s32 saved_op = op;
2760 	sljit_s32 mem_type = ((op & SLJIT_32) || op == SLJIT_MOV32) ? (INT_DATA | SIGNED_DATA) : WORD_DATA;
2761 
2762 	CHECK_ERROR();
2763 	CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
2764 	ADJUST_LOCAL_OFFSET(dst, dstw);
2765 
2766 	op = GET_OPCODE(op);
2767 	dst_r = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2;
2768 
2769 	compiler->cache_arg = 0;
2770 	compiler->cache_argw = 0;
2771 
2772 	if (op >= SLJIT_ADD && (dst & SLJIT_MEM))
2773 		FAIL_IF(emit_op_mem2(compiler, mem_type | LOAD_DATA, TMP_REG1, dst, dstw, dst, dstw));
2774 
2775 	if (type < SLJIT_F_EQUAL) {
2776 		src_r = OTHER_FLAG;
2777 		invert = type & 0x1;
2778 
2779 		switch (type) {
2780 		case SLJIT_EQUAL:
2781 		case SLJIT_NOT_EQUAL:
2782 			FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RJ(EQUAL_FLAG) | IMM_I12(1)));
2783 			src_r = dst_r;
2784 			break;
2785 		case SLJIT_ATOMIC_STORED:
2786 		case SLJIT_ATOMIC_NOT_STORED:
2787 			FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RJ(EQUAL_FLAG) | IMM_I12(1)));
2788 			src_r = dst_r;
2789 			invert ^= 0x1;
2790 			break;
2791 		case SLJIT_OVERFLOW:
2792 		case SLJIT_NOT_OVERFLOW:
2793 			if (compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) {
2794 				src_r = OTHER_FLAG;
2795 				break;
2796 			}
2797 			FAIL_IF(push_inst(compiler, SLTUI | RD(dst_r) | RJ(OTHER_FLAG) | IMM_I12(1)));
2798 			src_r = dst_r;
2799 			invert ^= 0x1;
2800 			break;
2801 		}
2802 	} else {
2803 		invert = 0;
2804 		src_r = OTHER_FLAG;
2805 
2806 		switch (type) {
2807 		case SLJIT_ORDERED_NOT_EQUAL:
2808 		case SLJIT_ORDERED_LESS_EQUAL:
2809 		case SLJIT_ORDERED_GREATER_EQUAL:
2810 		case SLJIT_F_NOT_EQUAL:
2811 		case SLJIT_UNORDERED_OR_NOT_EQUAL:
2812 		case SLJIT_UNORDERED_OR_GREATER_EQUAL:
2813 		case SLJIT_UNORDERED_OR_LESS_EQUAL:
2814 		case SLJIT_F_LESS_EQUAL:
2815 		case SLJIT_F_GREATER_EQUAL:
2816 		case SLJIT_ORDERED:
2817 			invert = 1;
2818 			break;
2819 		}
2820 	}
2821 
2822 	if (invert) {
2823 		FAIL_IF(push_inst(compiler, XORI | RD(dst_r) | RJ(src_r) | IMM_I12(1)));
2824 		src_r = dst_r;
2825 	}
2826 
2827 	if (op < SLJIT_ADD) {
2828 		if (dst & SLJIT_MEM)
2829 			return emit_op_mem(compiler, mem_type, src_r, dst, dstw);
2830 
2831 		if (src_r != dst_r)
2832 			return push_inst(compiler, ADDI_D | RD(dst_r) | RJ(src_r) | IMM_I12(0));
2833 		return SLJIT_SUCCESS;
2834 	}
2835 
2836 	mem_type |= CUMULATIVE_OP | IMM_OP | ALT_KEEP_CACHE;
2837 
2838 	if (dst & SLJIT_MEM)
2839 		return emit_op(compiler, saved_op, mem_type, dst, dstw, TMP_REG1, 0, src_r, 0);
2840 	return emit_op(compiler, saved_op, mem_type, dst, dstw, dst, dstw, src_r, 0);
2841 }
2842 
sljit_emit_select(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_reg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2_reg)2843 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
2844 	sljit_s32 dst_reg,
2845 	sljit_s32 src1, sljit_sw src1w,
2846 	sljit_s32 src2_reg)
2847 {
2848 	sljit_ins *ptr;
2849 	sljit_uw size;
2850 	sljit_s32 inp_flags = ((type & SLJIT_32) ? INT_DATA : WORD_DATA) | LOAD_DATA;
2851 
2852 	CHECK_ERROR();
2853 	CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
2854 	ADJUST_LOCAL_OFFSET(src1, src1w);
2855 
2856 	if (dst_reg != src2_reg) {
2857 		if (dst_reg == src1) {
2858 			src1 = src2_reg;
2859 			src1w = 0;
2860 			type ^= 0x1;
2861 		} else {
2862 			if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) {
2863 				FAIL_IF(push_inst(compiler, ADDI_D | RD(TMP_REG2) | RJ(dst_reg) | IMM_I12(0)));
2864 
2865 				if ((src1 & REG_MASK) == dst_reg)
2866 					src1 = (src1 & ~REG_MASK) | TMP_REG2;
2867 
2868 				if (OFFS_REG(src1) == dst_reg)
2869 					src1 = (src1 & ~OFFS_REG_MASK) | TO_OFFS_REG(TMP_REG2);
2870 			}
2871 
2872 			FAIL_IF(push_inst(compiler, ADDI_D | RD(dst_reg) | RJ(src2_reg) | IMM_I12(0)));
2873 		}
2874 	}
2875 
2876 	size = compiler->size;
2877 
2878 	ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
2879 	FAIL_IF(!ptr);
2880 	compiler->size++;
2881 
2882 	if (src1 & SLJIT_MEM) {
2883 		FAIL_IF(emit_op_mem(compiler, inp_flags, dst_reg, src1, src1w));
2884 	} else if (src1 == SLJIT_IMM) {
2885 		if (type & SLJIT_32)
2886 			src1w = (sljit_s32)src1w;
2887 		FAIL_IF(load_immediate(compiler, dst_reg, src1w));
2888 	} else
2889 		FAIL_IF(push_inst(compiler, ADDI_D | RD(dst_reg) | RJ(src1) | IMM_I12(0)));
2890 
2891 	*ptr = get_jump_instruction(type & ~SLJIT_32) | IMM_I16(compiler->size - size);
2892 	return SLJIT_SUCCESS;
2893 }
2894 
sljit_emit_fselect(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_freg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2_freg)2895 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
2896 	sljit_s32 dst_freg,
2897 	sljit_s32 src1, sljit_sw src1w,
2898 	sljit_s32 src2_freg)
2899 {
2900 	sljit_s32 invert = 0;
2901 
2902 	CHECK_ERROR();
2903 	CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
2904 
2905 	ADJUST_LOCAL_OFFSET(src1, src1w);
2906 
2907 	if ((type & ~SLJIT_32) == SLJIT_EQUAL || (type & ~SLJIT_32) == SLJIT_NOT_EQUAL) {
2908 		if ((type & ~SLJIT_32) == SLJIT_EQUAL)
2909 			invert = 1;
2910 		FAIL_IF(push_inst(compiler, MOVGR2CF | FCD(F_OTHER_FLAG) | RJ(EQUAL_FLAG)));
2911 	}
2912 	else
2913 		FAIL_IF(push_inst(compiler, MOVGR2CF | FCD(F_OTHER_FLAG) | RJ(OTHER_FLAG)));
2914 
2915 	if (src1 & SLJIT_MEM) {
2916 		FAIL_IF(emit_op_mem(compiler, FLOAT_DATA(type) | LOAD_DATA, dst_freg, src1, src1w));
2917 		if (invert)
2918 			return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(dst_freg) | FRK(src2_freg) | FCA(F_OTHER_FLAG));
2919 		return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(src2_freg) | FRK(dst_freg) | FCA(F_OTHER_FLAG));
2920 	} else {
2921 		if (invert)
2922 			return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(src1) | FRK(src2_freg) | FCA(F_OTHER_FLAG));
2923 		return push_inst(compiler, FSEL | FRD(dst_freg) | FRJ(src2_freg) | FRK(src1) | FCA(F_OTHER_FLAG));
2924 	}
2925 }
2926 
2927 #undef FLOAT_DATA
2928 
sljit_emit_mem(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)2929 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
2930 	sljit_s32 reg,
2931 	sljit_s32 mem, sljit_sw memw)
2932 {
2933 	sljit_s32 flags;
2934 
2935 	CHECK_ERROR();
2936 	CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
2937 
2938 	if (!(reg & REG_PAIR_MASK))
2939 		return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
2940 
2941 	if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
2942 		memw &= 0x3;
2943 
2944 		if (SLJIT_UNLIKELY(memw != 0)) {
2945 			FAIL_IF(push_inst(compiler, SLLI_D | RD(TMP_REG1) | RJ(OFFS_REG(mem)) | IMM_I12(memw)));
2946 			FAIL_IF(push_inst(compiler, ADD_D| RD(TMP_REG1) | RJ(TMP_REG1) | RK(mem & REG_MASK)));
2947 		} else
2948 			FAIL_IF(push_inst(compiler, ADD_D| RD(TMP_REG1) | RJ(mem & REG_MASK) | RK(OFFS_REG(mem))));
2949 
2950 		mem = TMP_REG1;
2951 		memw = 0;
2952 	} else if (memw > I12_MAX - SSIZE_OF(sw) || memw < I12_MIN) {
2953 		if (((memw + 0x800) & 0xfff) <= 0xfff - SSIZE_OF(sw)) {
2954 			FAIL_IF(load_immediate(compiler, TMP_REG1, TO_ARGW_HI(memw)));
2955 			memw &= 0xfff;
2956 		} else {
2957 			FAIL_IF(load_immediate(compiler, TMP_REG1, memw));
2958 			memw = 0;
2959 		}
2960 
2961 		if (mem & REG_MASK)
2962 			FAIL_IF(push_inst(compiler, ADD_D| RD(TMP_REG1) | RJ(TMP_REG1) | RK(mem & REG_MASK)));
2963 
2964 		mem = TMP_REG1;
2965 	} else {
2966 		mem &= REG_MASK;
2967 		memw &= 0xfff;
2968 	}
2969 
2970 	SLJIT_ASSERT((memw >= 0 && memw <= I12_MAX - SSIZE_OF(sw)) || (memw > I12_MAX && memw <= 0xfff));
2971 
2972 	if (!(type & SLJIT_MEM_STORE) && mem == REG_PAIR_FIRST(reg)) {
2973 		FAIL_IF(push_mem_inst(compiler, WORD_DATA | LOAD_DATA, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), (memw + SSIZE_OF(sw)) & 0xfff));
2974 		return push_mem_inst(compiler, WORD_DATA | LOAD_DATA, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw);
2975 	}
2976 
2977 	flags = WORD_DATA | (!(type & SLJIT_MEM_STORE) ? LOAD_DATA : 0);
2978 
2979 	FAIL_IF(push_mem_inst(compiler, flags, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw));
2980 	return push_mem_inst(compiler, flags, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), (memw + SSIZE_OF(sw)) & 0xfff);
2981 }
2982 
2983 #undef TO_ARGW_HI
2984 
sljit_emit_atomic_load(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 mem_reg)2985 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler,
2986 	sljit_s32 op,
2987 	sljit_s32 dst_reg,
2988 	sljit_s32 mem_reg)
2989 {
2990 	sljit_ins ins;
2991 
2992 	CHECK_ERROR();
2993 	CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
2994 
2995 	if (!(LOONGARCH_FEATURE_LAMCAS & get_cpu_features()))
2996 		return SLJIT_ERR_UNSUPPORTED;
2997 
2998 	switch(GET_OPCODE(op)) {
2999 	case SLJIT_MOV_U8:
3000 		ins = LD_BU;
3001 		break;
3002 	case SLJIT_MOV_U16:
3003 		ins = LD_HU;
3004 		break;
3005 	case SLJIT_MOV32:
3006 		ins = LD_W;
3007 		break;
3008 	case SLJIT_MOV_U32:
3009 		ins = LD_WU;
3010 		break;
3011 	default:
3012 		ins = LD_D;
3013 		break;
3014 	}
3015 
3016 	return push_inst(compiler, ins | RD(dst_reg) | RJ(mem_reg) | IMM_I12(0));
3017 }
3018 
sljit_emit_atomic_store(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src_reg,sljit_s32 mem_reg,sljit_s32 temp_reg)3019 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler,
3020 	sljit_s32 op,
3021 	sljit_s32 src_reg,
3022 	sljit_s32 mem_reg,
3023 	sljit_s32 temp_reg)
3024 {
3025 	sljit_ins ins = 0;
3026 	sljit_ins unsign = 0;
3027 	sljit_s32 tmp = temp_reg;
3028 
3029 	CHECK_ERROR();
3030 	CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
3031 
3032 	if (!(LOONGARCH_FEATURE_LAMCAS & get_cpu_features()))
3033 		return SLJIT_ERR_UNSUPPORTED;
3034 
3035 	switch (GET_OPCODE(op)) {
3036 	case SLJIT_MOV_U8:
3037 		ins = AMCAS_B;
3038 		unsign = BSTRPICK_D | (7 << 16);
3039 		break;
3040 	case SLJIT_MOV_U16:
3041 		ins = AMCAS_H;
3042 		unsign = BSTRPICK_D | (15 << 16);
3043 		break;
3044 	case SLJIT_MOV32:
3045 		ins = AMCAS_W;
3046 		break;
3047 	case SLJIT_MOV_U32:
3048 		ins = AMCAS_W;
3049 		unsign = BSTRPICK_D | (31 << 16);
3050 		break;
3051 	default:
3052 		ins = AMCAS_D;
3053 		break;
3054 	}
3055 
3056 	if (op & SLJIT_SET_ATOMIC_STORED) {
3057 		FAIL_IF(push_inst(compiler, XOR | RD(TMP_REG1) | RJ(temp_reg) | RK(TMP_ZERO)));
3058 		tmp = TMP_REG1;
3059 	}
3060 	FAIL_IF(push_inst(compiler, ins | RD(tmp) | RJ(mem_reg) | RK(src_reg)));
3061 	if (!(op & SLJIT_SET_ATOMIC_STORED))
3062 		return SLJIT_SUCCESS;
3063 
3064 	if (unsign)
3065 		FAIL_IF(push_inst(compiler, unsign | RD(tmp) | RJ(tmp)));
3066 
3067 	FAIL_IF(push_inst(compiler, XOR | RD(EQUAL_FLAG) | RJ(tmp) | RK(temp_reg)));
3068 	return push_inst(compiler, SLTUI | RD(EQUAL_FLAG) | RJ(EQUAL_FLAG) | IMM_I12(1));
3069 }
3070 
emit_const(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw init_value,sljit_ins last_ins)3071 static SLJIT_INLINE sljit_s32 emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw init_value, sljit_ins last_ins)
3072 {
3073 	SLJIT_UNUSED_ARG(last_ins);
3074 
3075 	FAIL_IF(push_inst(compiler, LU12I_W | RD(dst) | (sljit_ins)(((init_value & 0xffffffff) >> 12) << 5)));
3076 	FAIL_IF(push_inst(compiler, LU32I_D | RD(dst) | (sljit_ins)(((init_value >> 32) & 0xfffff) << 5)));
3077 	FAIL_IF(push_inst(compiler, LU52I_D | RD(dst) | RJ(dst) | (sljit_ins)(IMM_I12(init_value >> 52))));
3078 	return push_inst(compiler, ORI | RD(dst) | RJ(dst) | IMM_I12(init_value));
3079 }
3080 
sljit_set_jump_addr(sljit_uw addr,sljit_uw new_target,sljit_sw executable_offset)3081 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
3082 {
3083 	sljit_ins *inst = (sljit_ins*)addr;
3084 	SLJIT_UNUSED_ARG(executable_offset);
3085 
3086 	SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 0);
3087 
3088 	SLJIT_ASSERT((inst[0] & OPC_1RI20(0x7f)) == LU12I_W);
3089 	inst[0] = (inst[0] & (OPC_1RI20(0x7f) | 0x1f)) | (sljit_ins)(((new_target & 0xffffffff) >> 12) << 5);
3090 
3091 	SLJIT_ASSERT((inst[1] & OPC_1RI20(0x7f)) == LU32I_D);
3092 	inst[1] = (inst[1] & (OPC_1RI20(0x7f) | 0x1f)) | (sljit_ins)(sljit_ins)(((new_target >> 32) & 0xfffff) << 5);
3093 
3094 	SLJIT_ASSERT((inst[2] & OPC_2RI12(0x3ff)) == LU52I_D);
3095 	inst[2] = (inst[2] & (OPC_2RI12(0x3ff) | 0x3ff)) | IMM_I12(new_target >> 52);
3096 
3097 	SLJIT_ASSERT((inst[3] & OPC_2RI12(0x3ff)) == ORI || (inst[3] & OPC_2RI16(0x3f)) == JIRL);
3098 	if ((inst[3] & OPC_2RI12(0x3ff)) == ORI)
3099 		inst[3] = (inst[3] & (OPC_2RI12(0x3ff) | 0x3ff)) | IMM_I12(new_target);
3100 	else
3101 		inst[3] = (inst[3] & (OPC_2RI16(0x3f) | 0x3ff)) | IMM_I12((new_target & 0xfff) >> 2);
3102 
3103 	SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 1);
3104 
3105 	inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
3106 	SLJIT_CACHE_FLUSH(inst, inst + 4);
3107 }
3108 
sljit_emit_const(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_sw init_value)3109 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
3110 {
3111 	struct sljit_const *const_;
3112 	sljit_s32 dst_r;
3113 
3114 	CHECK_ERROR_PTR();
3115 	CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
3116 	ADJUST_LOCAL_OFFSET(dst, dstw);
3117 
3118 	const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
3119 	PTR_FAIL_IF(!const_);
3120 	set_const(const_, compiler);
3121 
3122 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
3123 	PTR_FAIL_IF(emit_const(compiler, dst_r, init_value, 0));
3124 
3125 	if (dst & SLJIT_MEM)
3126 		PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw));
3127 
3128 	return const_;
3129 }
3130 
sljit_emit_put_label(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)3131 SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
3132 {
3133 	struct sljit_put_label *put_label;
3134 	sljit_s32 dst_r;
3135 
3136 	CHECK_ERROR_PTR();
3137 	CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
3138 	ADJUST_LOCAL_OFFSET(dst, dstw);
3139 
3140 	put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
3141 	PTR_FAIL_IF(!put_label);
3142 	set_put_label(put_label, compiler, 0);
3143 
3144 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
3145 	PTR_FAIL_IF(push_inst(compiler, (sljit_ins)dst_r));
3146 
3147 	compiler->size += 3;
3148 
3149 	if (dst & SLJIT_MEM)
3150 		PTR_FAIL_IF(emit_op_mem(compiler, WORD_DATA, TMP_REG2, dst, dstw));
3151 
3152 	return put_label;
3153 }
3154 
sljit_set_const(sljit_uw addr,sljit_sw new_constant,sljit_sw executable_offset)3155 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
3156 {
3157 	sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
3158 }
3159