1 /*
2  *    Stack-less Just-In-Time compiler
3  *
4  *    Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without modification, are
7  * permitted provided that the following conditions are met:
8  *
9  *   1. Redistributions of source code must retain the above copyright notice, this list of
10  *      conditions and the following disclaimer.
11  *
12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13  *      of conditions and the following disclaimer in the documentation and/or other materials
14  *      provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
sljit_get_platform_name(void)27 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
28 {
29 #ifdef __SOFTFP__
30 	return "ARM-Thumb2" SLJIT_CPUINFO " ABI:softfp";
31 #else
32 	return "ARM-Thumb2" SLJIT_CPUINFO " ABI:hardfp";
33 #endif
34 }
35 
36 /* Length of an instruction word. */
37 typedef sljit_u32 sljit_ins;
38 
39 /* Last register + 1. */
40 #define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
41 #define TMP_REG2	(SLJIT_NUMBER_OF_REGISTERS + 3)
42 #define TMP_PC		(SLJIT_NUMBER_OF_REGISTERS + 4)
43 
44 #define TMP_FREG1	(SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
45 #define TMP_FREG2	(SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
46 
47 /* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */
48 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
49 	0, 0, 1, 2, 3, 11, 10, 9, 8, 7, 6, 5, 4, 13, 12, 14, 15
50 };
51 
52 static const sljit_u8 freg_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) + 1] = {
53 	0,
54 	0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8,
55 	7, 6,
56 	0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8,
57 	7, 6
58 };
59 
60 static const sljit_u8 freg_ebit_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) + 1] = {
61 	0,
62 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
63 	0, 0,
64 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
65 	1, 1
66 };
67 
68 #define COPY_BITS(src, from, to, bits) \
69 	((from >= to ? ((sljit_ins)(src) >> (from - to)) : ((sljit_ins)(src) << (to - from))) & (((1 << bits) - 1) << to))
70 
71 #define NEGATE(uimm) ((sljit_uw)-(sljit_sw)(uimm))
72 
73 /* Thumb16 encodings. */
74 #define RD3(rd) ((sljit_ins)reg_map[rd])
75 #define RN3(rn) ((sljit_ins)reg_map[rn] << 3)
76 #define RM3(rm) ((sljit_ins)reg_map[rm] << 6)
77 #define RDN3(rdn) ((sljit_ins)reg_map[rdn] << 8)
78 #define IMM3(imm) ((sljit_ins)imm << 6)
79 #define IMM8(imm) ((sljit_ins)imm)
80 
81 /* Thumb16 helpers. */
82 #define SET_REGS44(rd, rn) \
83 	(((sljit_ins)reg_map[rn] << 3) | ((sljit_ins)reg_map[rd] & 0x7) | (((sljit_ins)reg_map[rd] & 0x8) << 4))
84 #define IS_2_LO_REGS(reg1, reg2) \
85 	(reg_map[reg1] <= 7 && reg_map[reg2] <= 7)
86 #define IS_3_LO_REGS(reg1, reg2, reg3) \
87 	(reg_map[reg1] <= 7 && reg_map[reg2] <= 7 && reg_map[reg3] <= 7)
88 
89 /* Thumb32 encodings. */
90 #define RM4(rm) ((sljit_ins)reg_map[rm])
91 #define RD4(rd) ((sljit_ins)reg_map[rd] << 8)
92 #define RT4(rt) ((sljit_ins)reg_map[rt] << 12)
93 #define RN4(rn) ((sljit_ins)reg_map[rn] << 16)
94 
95 #define VM4(vm) (((sljit_ins)freg_map[vm]) | ((sljit_ins)freg_ebit_map[vm] << 5))
96 #define VD4(vd) (((sljit_ins)freg_map[vd] << 12) | ((sljit_ins)freg_ebit_map[vd] << 22))
97 #define VN4(vn) (((sljit_ins)freg_map[vn] << 16) | ((sljit_ins)freg_ebit_map[vn] << 7))
98 
99 #define IMM5(imm) \
100 	(COPY_BITS(imm, 2, 12, 3) | (((sljit_ins)imm & 0x3) << 6))
101 #define IMM12(imm) \
102 	(COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | ((sljit_ins)imm & 0xff))
103 
104 /* --------------------------------------------------------------------- */
105 /*  Instrucion forms                                                     */
106 /* --------------------------------------------------------------------- */
107 
108 /* dot '.' changed to _
109    I immediate form (possibly followed by number of immediate bits). */
110 #define ADCI		0xf1400000
111 #define ADCS		0x4140
112 #define ADC_W		0xeb400000
113 #define ADD		0x4400
114 #define ADDS		0x1800
115 #define ADDSI3		0x1c00
116 #define ADDSI8		0x3000
117 #define ADDWI		0xf2000000
118 #define ADD_SP		0x4485
119 #define ADD_SP_I	0xb000
120 #define ADD_W		0xeb000000
121 #define ADD_WI		0xf1000000
122 #define ANDI		0xf0000000
123 #define ANDS		0x4000
124 #define AND_W		0xea000000
125 #define ASRS		0x4100
126 #define ASRSI		0x1000
127 #define ASR_W		0xfa40f000
128 #define ASR_WI		0xea4f0020
129 #define BCC		0xd000
130 #define BICI		0xf0200000
131 #define BKPT		0xbe00
132 #define BLX		0x4780
133 #define BX		0x4700
134 #define CLZ		0xfab0f080
135 #define CMNI_W		0xf1100f00
136 #define CMP		0x4280
137 #define CMPI		0x2800
138 #define CMPI_W		0xf1b00f00
139 #define CMP_X		0x4500
140 #define CMP_W		0xebb00f00
141 #define EORI		0xf0800000
142 #define EORS		0x4040
143 #define EOR_W		0xea800000
144 #define IT		0xbf00
145 #define LDR		0xf8d00000
146 #define LDR_SP		0x9800
147 #define LDRD		0xe9500000
148 #define LDREX		0xe8500f00
149 #define LDREXB		0xe8d00f4f
150 #define LDREXH		0xe8d00f5f
151 #define LDRI		0xf8500800
152 #define LSLS		0x4080
153 #define LSLSI		0x0000
154 #define LSL_W		0xfa00f000
155 #define LSL_WI		0xea4f0000
156 #define LSRS		0x40c0
157 #define LSRSI		0x0800
158 #define LSR_W		0xfa20f000
159 #define LSR_WI		0xea4f0010
160 #define MOV		0x4600
161 #define MOVS		0x0000
162 #define MOVSI		0x2000
163 #define MOVT		0xf2c00000
164 #define MOVW		0xf2400000
165 #define MOV_W		0xea4f0000
166 #define MOV_WI		0xf04f0000
167 #define MUL		0xfb00f000
168 #define MVNS		0x43c0
169 #define MVN_W		0xea6f0000
170 #define MVN_WI		0xf06f0000
171 #define NOP		0xbf00
172 #define ORNI		0xf0600000
173 #define ORRI		0xf0400000
174 #define ORRS		0x4300
175 #define ORR_W		0xea400000
176 #define POP		0xbc00
177 #define POP_W		0xe8bd0000
178 #define PUSH		0xb400
179 #define PUSH_W		0xe92d0000
180 #define REV		0xba00
181 #define REV_W		0xfa90f080
182 #define REV16		0xba40
183 #define REV16_W		0xfa90f090
184 #define RBIT		0xfa90f0a0
185 #define RORS		0x41c0
186 #define ROR_W		0xfa60f000
187 #define ROR_WI		0xea4f0030
188 #define RSB_WI		0xf1c00000
189 #define RSBSI		0x4240
190 #define SBCI		0xf1600000
191 #define SBCS		0x4180
192 #define SBC_W		0xeb600000
193 #define SDIV		0xfb90f0f0
194 #define SMULL		0xfb800000
195 #define STR_SP		0x9000
196 #define STRD		0xe9400000
197 #define STREX		0xe8400000
198 #define STREXB		0xe8c00f40
199 #define STREXH		0xe8c00f50
200 #define SUBS		0x1a00
201 #define SUBSI3		0x1e00
202 #define SUBSI8		0x3800
203 #define SUB_W		0xeba00000
204 #define SUBWI		0xf2a00000
205 #define SUB_SP_I	0xb080
206 #define SUB_WI		0xf1a00000
207 #define SXTB		0xb240
208 #define SXTB_W		0xfa4ff080
209 #define SXTH		0xb200
210 #define SXTH_W		0xfa0ff080
211 #define TST		0x4200
212 #define TSTI		0xf0000f00
213 #define TST_W		0xea000f00
214 #define UDIV		0xfbb0f0f0
215 #define UMULL		0xfba00000
216 #define UXTB		0xb2c0
217 #define UXTB_W		0xfa5ff080
218 #define UXTH		0xb280
219 #define UXTH_W		0xfa1ff080
220 #define VABS_F32	0xeeb00ac0
221 #define VADD_F32	0xee300a00
222 #define VAND		0xef000110
223 #define VCMP_F32	0xeeb40a40
224 #define VCVT_F32_S32	0xeeb80ac0
225 #define VCVT_F32_U32	0xeeb80a40
226 #define VCVT_F64_F32	0xeeb70ac0
227 #define VCVT_S32_F32	0xeebd0ac0
228 #define VDIV_F32	0xee800a00
229 #define VDUP		0xee800b10
230 #define VDUP_s		0xffb00c00
231 #define VEOR		0xff000110
232 #define VLD1		0xf9200000
233 #define VLD1_r		0xf9a00c00
234 #define VLD1_s		0xf9a00000
235 #define VLDR_F32	0xed100a00
236 #define VMOV_F32	0xeeb00a40
237 #define VMOV		0xee000a10
238 #define VMOV2		0xec400a10
239 #define VMOV_i		0xef800010
240 #define VMOV_s		0xee000b10
241 #define VMOVN		0xffb20200
242 #define VMRS		0xeef1fa10
243 #define VMUL_F32	0xee200a00
244 #define VNEG_F32	0xeeb10a40
245 #define VORR		0xef200110
246 #define VPOP		0xecbd0b00
247 #define VPUSH		0xed2d0b00
248 #define VSHLL		0xef800a10
249 #define VSHR		0xef800010
250 #define VSRA		0xef800110
251 #define VST1		0xf9000000
252 #define VST1_s		0xf9800000
253 #define VSTR_F32	0xed000a00
254 #define VSUB_F32	0xee300a40
255 
256 #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
257 
function_check_is_freg(struct sljit_compiler * compiler,sljit_s32 fr,sljit_s32 is_32)258 static sljit_s32 function_check_is_freg(struct sljit_compiler *compiler, sljit_s32 fr, sljit_s32 is_32)
259 {
260 	if (compiler->scratches == -1)
261 		return 0;
262 
263 	if (is_32 && fr >= SLJIT_F64_SECOND(SLJIT_FR0))
264 		fr -= SLJIT_F64_SECOND(0);
265 
266 	return (fr >= SLJIT_FR0 && fr < (SLJIT_FR0 + compiler->fscratches))
267 		|| (fr > (SLJIT_FS0 - compiler->fsaveds) && fr <= SLJIT_FS0)
268 		|| (fr >= SLJIT_TMP_FREGISTER_BASE && fr < (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS));
269 }
270 
271 #endif /* SLJIT_ARGUMENT_CHECKS */
272 
push_inst16(struct sljit_compiler * compiler,sljit_ins inst)273 static sljit_s32 push_inst16(struct sljit_compiler *compiler, sljit_ins inst)
274 {
275 	sljit_u16 *ptr;
276 	SLJIT_ASSERT(!(inst & 0xffff0000));
277 
278 	ptr = (sljit_u16*)ensure_buf(compiler, sizeof(sljit_u16));
279 	FAIL_IF(!ptr);
280 	*ptr = (sljit_u16)(inst);
281 	compiler->size++;
282 	return SLJIT_SUCCESS;
283 }
284 
push_inst32(struct sljit_compiler * compiler,sljit_ins inst)285 static sljit_s32 push_inst32(struct sljit_compiler *compiler, sljit_ins inst)
286 {
287 	sljit_u16 *ptr = (sljit_u16*)ensure_buf(compiler, sizeof(sljit_ins));
288 	FAIL_IF(!ptr);
289 	*ptr++ = (sljit_u16)(inst >> 16);
290 	*ptr = (sljit_u16)(inst);
291 	compiler->size += 2;
292 	return SLJIT_SUCCESS;
293 }
294 
emit_imm32_const(struct sljit_compiler * compiler,sljit_s32 dst,sljit_uw imm)295 static SLJIT_INLINE sljit_s32 emit_imm32_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm)
296 {
297 	FAIL_IF(push_inst32(compiler, MOVW | RD4(dst)
298 		| COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff)));
299 	return push_inst32(compiler, MOVT | RD4(dst)
300 		| COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16));
301 }
302 
modify_imm32_const(sljit_u16 * inst,sljit_uw new_imm)303 static SLJIT_INLINE void modify_imm32_const(sljit_u16 *inst, sljit_uw new_imm)
304 {
305 	sljit_ins dst = inst[1] & 0x0f00;
306 	SLJIT_ASSERT(((inst[0] & 0xfbf0) == (MOVW >> 16)) && ((inst[2] & 0xfbf0) == (MOVT >> 16)) && dst == (inst[3] & 0x0f00));
307 	inst[0] = (sljit_u16)((MOVW >> 16) | COPY_BITS(new_imm, 12, 0, 4) | COPY_BITS(new_imm, 11, 10, 1));
308 	inst[1] = (sljit_u16)(dst | COPY_BITS(new_imm, 8, 12, 3) | (new_imm & 0xff));
309 	inst[2] = (sljit_u16)((MOVT >> 16) | COPY_BITS(new_imm, 12 + 16, 0, 4) | COPY_BITS(new_imm, 11 + 16, 10, 1));
310 	inst[3] = (sljit_u16)(dst | COPY_BITS(new_imm, 8 + 16, 12, 3) | ((new_imm & 0xff0000) >> 16));
311 }
312 
detect_jump_type(struct sljit_jump * jump,sljit_u16 * code_ptr,sljit_u16 * code,sljit_sw executable_offset)313 static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_u16 *code_ptr, sljit_u16 *code, sljit_sw executable_offset)
314 {
315 	sljit_sw diff;
316 
317 	if (jump->flags & SLJIT_REWRITABLE_JUMP)
318 		return 0;
319 
320 	if (jump->flags & JUMP_ADDR) {
321 		/* Branch to ARM code is not optimized yet. */
322 		if (!(jump->u.target & 0x1))
323 			return 0;
324 		diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2) - executable_offset) >> 1;
325 	}
326 	else {
327 		SLJIT_ASSERT(jump->flags & JUMP_LABEL);
328 		diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2)) >> 1;
329 	}
330 
331 	if (jump->flags & IS_COND) {
332 		SLJIT_ASSERT(!(jump->flags & IS_BL));
333 		if (diff <= 127 && diff >= -128) {
334 			jump->flags |= PATCH_TYPE1;
335 			return 5;
336 		}
337 		if (diff <= 524287 && diff >= -524288) {
338 			jump->flags |= PATCH_TYPE2;
339 			return 4;
340 		}
341 		/* +1 comes from the prefix IT instruction. */
342 		diff--;
343 		if (diff <= 8388607 && diff >= -8388608) {
344 			jump->flags |= PATCH_TYPE3;
345 			return 3;
346 		}
347 	}
348 	else if (jump->flags & IS_BL) {
349 		if (diff <= 8388607 && diff >= -8388608) {
350 			jump->flags |= PATCH_BL;
351 			return 3;
352 		}
353 	}
354 	else {
355 		if (diff <= 1023 && diff >= -1024) {
356 			jump->flags |= PATCH_TYPE4;
357 			return 4;
358 		}
359 		if (diff <= 8388607 && diff >= -8388608) {
360 			jump->flags |= PATCH_TYPE5;
361 			return 3;
362 		}
363 	}
364 
365 	return 0;
366 }
367 
set_jump_instruction(struct sljit_jump * jump,sljit_sw executable_offset)368 static SLJIT_INLINE void set_jump_instruction(struct sljit_jump *jump, sljit_sw executable_offset)
369 {
370 	sljit_s32 type = (jump->flags >> 4) & 0xf;
371 	sljit_sw diff;
372 	sljit_u16 *jump_inst;
373 	sljit_s32 s, j1, j2;
374 
375 	if (SLJIT_UNLIKELY(type == 0)) {
376 		modify_imm32_const((sljit_u16*)jump->addr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target);
377 		return;
378 	}
379 
380 	if (jump->flags & JUMP_ADDR) {
381 		SLJIT_ASSERT(jump->u.target & 0x1);
382 		diff = ((sljit_sw)jump->u.target - (sljit_sw)(jump->addr + sizeof(sljit_u32)) - executable_offset) >> 1;
383 	}
384 	else {
385 		SLJIT_ASSERT(jump->u.label->addr & 0x1);
386 		diff = ((sljit_sw)(jump->u.label->addr) - (sljit_sw)(jump->addr + sizeof(sljit_u32)) - executable_offset) >> 1;
387 	}
388 	jump_inst = (sljit_u16*)jump->addr;
389 
390 	switch (type) {
391 	case 1:
392 		/* Encoding T1 of 'B' instruction */
393 		SLJIT_ASSERT(diff <= 127 && diff >= -128 && (jump->flags & IS_COND));
394 		jump_inst[0] = (sljit_u16)(0xd000 | (jump->flags & 0xf00) | ((sljit_ins)diff & 0xff));
395 		return;
396 	case 2:
397 		/* Encoding T3 of 'B' instruction */
398 		SLJIT_ASSERT(diff <= 524287 && diff >= -524288 && (jump->flags & IS_COND));
399 		jump_inst[0] = (sljit_u16)(0xf000 | COPY_BITS(jump->flags, 8, 6, 4) | COPY_BITS(diff, 11, 0, 6) | COPY_BITS(diff, 19, 10, 1));
400 		jump_inst[1] = (sljit_u16)(0x8000 | COPY_BITS(diff, 17, 13, 1) | COPY_BITS(diff, 18, 11, 1) | ((sljit_ins)diff & 0x7ff));
401 		return;
402 	case 3:
403 		SLJIT_ASSERT(jump->flags & IS_COND);
404 		*jump_inst++ = (sljit_u16)(IT | ((jump->flags >> 4) & 0xf0) | 0x8);
405 		diff--;
406 		type = 5;
407 		break;
408 	case 4:
409 		/* Encoding T2 of 'B' instruction */
410 		SLJIT_ASSERT(diff <= 1023 && diff >= -1024 && !(jump->flags & IS_COND));
411 		jump_inst[0] = (sljit_u16)(0xe000 | (diff & 0x7ff));
412 		return;
413 	}
414 
415 	SLJIT_ASSERT(diff <= 8388607 && diff >= -8388608);
416 
417 	/* Really complex instruction form for branches. */
418 	s = (diff >> 23) & 0x1;
419 	j1 = (~(diff >> 22) ^ s) & 0x1;
420 	j2 = (~(diff >> 21) ^ s) & 0x1;
421 	jump_inst[0] = (sljit_u16)(0xf000 | ((sljit_ins)s << 10) | COPY_BITS(diff, 11, 0, 10));
422 	jump_inst[1] = (sljit_u16)((j1 << 13) | (j2 << 11) | (diff & 0x7ff));
423 
424 	/* The others have a common form. */
425 	if (type == 5) /* Encoding T4 of 'B' instruction */
426 		jump_inst[1] |= 0x9000;
427 	else if (type == 6) /* Encoding T1 of 'BL' instruction */
428 		jump_inst[1] |= 0xd000;
429 	else
430 		SLJIT_UNREACHABLE();
431 }
432 
sljit_generate_code(struct sljit_compiler * compiler)433 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
434 {
435 	struct sljit_memory_fragment *buf;
436 	sljit_u16 *code;
437 	sljit_u16 *code_ptr;
438 	sljit_u16 *buf_ptr;
439 	sljit_u16 *buf_end;
440 	sljit_uw half_count;
441 	sljit_uw next_addr;
442 	sljit_sw executable_offset;
443 
444 	struct sljit_label *label;
445 	struct sljit_jump *jump;
446 	struct sljit_const *const_;
447 	struct sljit_put_label *put_label;
448 
449 	CHECK_ERROR_PTR();
450 	CHECK_PTR(check_sljit_generate_code(compiler));
451 	reverse_buf(compiler);
452 
453 	code = (sljit_u16*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_u16), compiler->exec_allocator_data);
454 	PTR_FAIL_WITH_EXEC_IF(code);
455 	buf = compiler->buf;
456 
457 	code_ptr = code;
458 	half_count = 0;
459 	next_addr = 0;
460 	executable_offset = SLJIT_EXEC_OFFSET(code);
461 
462 	label = compiler->labels;
463 	jump = compiler->jumps;
464 	const_ = compiler->consts;
465 	put_label = compiler->put_labels;
466 
467 	do {
468 		buf_ptr = (sljit_u16*)buf->memory;
469 		buf_end = buf_ptr + (buf->used_size >> 1);
470 		do {
471 			*code_ptr = *buf_ptr++;
472 			if (next_addr == half_count) {
473 				SLJIT_ASSERT(!label || label->size >= half_count);
474 				SLJIT_ASSERT(!jump || jump->addr >= half_count);
475 				SLJIT_ASSERT(!const_ || const_->addr >= half_count);
476 				SLJIT_ASSERT(!put_label || put_label->addr >= half_count);
477 
478 				/* These structures are ordered by their address. */
479 				if (label && label->size == half_count) {
480 					label->addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1;
481 					label->size = (sljit_uw)(code_ptr - code);
482 					label = label->next;
483 				}
484 				if (jump && jump->addr == half_count) {
485 						jump->addr = (sljit_uw)code_ptr - ((jump->flags & IS_COND) ? 10 : 8);
486 						code_ptr -= detect_jump_type(jump, code_ptr, code, executable_offset);
487 						jump = jump->next;
488 				}
489 				if (const_ && const_->addr == half_count) {
490 					const_->addr = (sljit_uw)code_ptr;
491 					const_ = const_->next;
492 				}
493 				if (put_label && put_label->addr == half_count) {
494 					SLJIT_ASSERT(put_label->label);
495 					put_label->addr = (sljit_uw)code_ptr;
496 					put_label = put_label->next;
497 				}
498 				next_addr = compute_next_addr(label, jump, const_, put_label);
499 			}
500 			code_ptr++;
501 			half_count++;
502 		} while (buf_ptr < buf_end);
503 
504 		buf = buf->next;
505 	} while (buf);
506 
507 	if (label && label->size == half_count) {
508 		label->addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1;
509 		label->size = (sljit_uw)(code_ptr - code);
510 		label = label->next;
511 	}
512 
513 	SLJIT_ASSERT(!label);
514 	SLJIT_ASSERT(!jump);
515 	SLJIT_ASSERT(!const_);
516 	SLJIT_ASSERT(!put_label);
517 	SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
518 
519 	jump = compiler->jumps;
520 	while (jump) {
521 		set_jump_instruction(jump, executable_offset);
522 		jump = jump->next;
523 	}
524 
525 	put_label = compiler->put_labels;
526 	while (put_label) {
527 		modify_imm32_const((sljit_u16 *)put_label->addr, put_label->label->addr);
528 		put_label = put_label->next;
529 	}
530 
531 	compiler->error = SLJIT_ERR_COMPILED;
532 	compiler->executable_offset = executable_offset;
533 	compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_u16);
534 
535 	code = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
536 	code_ptr = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
537 
538 	SLJIT_CACHE_FLUSH(code, code_ptr);
539 	SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
540 
541 	/* Set thumb mode flag. */
542 	return (void*)((sljit_uw)code | 0x1);
543 }
544 
sljit_has_cpu_feature(sljit_s32 feature_type)545 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
546 {
547 	switch (feature_type) {
548 	case SLJIT_HAS_FPU:
549 	case SLJIT_HAS_F64_AS_F32_PAIR:
550 	case SLJIT_HAS_SIMD:
551 #ifdef SLJIT_IS_FPU_AVAILABLE
552 		return (SLJIT_IS_FPU_AVAILABLE) != 0;
553 #else
554 		/* Available by default. */
555 		return 1;
556 #endif
557 
558 	case SLJIT_SIMD_REGS_ARE_PAIRS:
559 	case SLJIT_HAS_CLZ:
560 	case SLJIT_HAS_CTZ:
561 	case SLJIT_HAS_REV:
562 	case SLJIT_HAS_ROT:
563 	case SLJIT_HAS_CMOV:
564 	case SLJIT_HAS_PREFETCH:
565 	case SLJIT_HAS_COPY_F32:
566 	case SLJIT_HAS_COPY_F64:
567 	case SLJIT_HAS_ATOMIC:
568 		return 1;
569 
570 	default:
571 		return 0;
572 	}
573 }
574 
575 /* --------------------------------------------------------------------- */
576 /*  Core code generator functions.                                       */
577 /* --------------------------------------------------------------------- */
578 
579 #define INVALID_IMM	0x80000000
get_imm(sljit_uw imm)580 static sljit_uw get_imm(sljit_uw imm)
581 {
582 	/* Thumb immediate form. */
583 	sljit_s32 counter;
584 
585 	if (imm <= 0xff)
586 		return imm;
587 
588 	if ((imm & 0xffff) == (imm >> 16)) {
589 		/* Some special cases. */
590 		if (!(imm & 0xff00))
591 			return (1 << 12) | (imm & 0xff);
592 		if (!(imm & 0xff))
593 			return (2 << 12) | ((imm >> 8) & 0xff);
594 		if ((imm & 0xff00) == ((imm & 0xff) << 8))
595 			return (3 << 12) | (imm & 0xff);
596 	}
597 
598 	/* Assembly optimization: count leading zeroes? */
599 	counter = 8;
600 	if (!(imm & 0xffff0000)) {
601 		counter += 16;
602 		imm <<= 16;
603 	}
604 	if (!(imm & 0xff000000)) {
605 		counter += 8;
606 		imm <<= 8;
607 	}
608 	if (!(imm & 0xf0000000)) {
609 		counter += 4;
610 		imm <<= 4;
611 	}
612 	if (!(imm & 0xc0000000)) {
613 		counter += 2;
614 		imm <<= 2;
615 	}
616 	if (!(imm & 0x80000000)) {
617 		counter += 1;
618 		imm <<= 1;
619 	}
620 	/* Since imm >= 128, this must be true. */
621 	SLJIT_ASSERT(counter <= 31);
622 
623 	if (imm & 0x00ffffff)
624 		return INVALID_IMM; /* Cannot be encoded. */
625 
626 	return ((imm >> 24) & 0x7f) | COPY_BITS(counter, 4, 26, 1) | COPY_BITS(counter, 1, 12, 3) | COPY_BITS(counter, 0, 7, 1);
627 }
628 
load_immediate(struct sljit_compiler * compiler,sljit_s32 dst,sljit_uw imm)629 static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm)
630 {
631 	sljit_uw tmp;
632 
633 	/* MOVS cannot be used since it destroy flags. */
634 
635 	if (imm >= 0x10000) {
636 		tmp = get_imm(imm);
637 		if (tmp != INVALID_IMM)
638 			return push_inst32(compiler, MOV_WI | RD4(dst) | tmp);
639 		tmp = get_imm(~imm);
640 		if (tmp != INVALID_IMM)
641 			return push_inst32(compiler, MVN_WI | RD4(dst) | tmp);
642 	}
643 
644 	/* set low 16 bits, set hi 16 bits to 0. */
645 	FAIL_IF(push_inst32(compiler, MOVW | RD4(dst)
646 		| COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff)));
647 
648 	/* set hi 16 bit if needed. */
649 	if (imm >= 0x10000)
650 		return push_inst32(compiler, MOVT | RD4(dst)
651 			| COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16));
652 	return SLJIT_SUCCESS;
653 }
654 
655 #define ARG1_IMM	0x0010000
656 #define ARG2_IMM	0x0020000
657 /* SET_FLAGS must be 0x100000 as it is also the value of S bit (can be used for optimization). */
658 #define SET_FLAGS	0x0100000
659 #define UNUSED_RETURN	0x0200000
660 
emit_op_imm(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 dst,sljit_uw arg1,sljit_uw arg2)661 static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 dst, sljit_uw arg1, sljit_uw arg2)
662 {
663 	/* dst must be register, TMP_REG1
664 	   arg1 must be register, imm
665 	   arg2 must be register, imm */
666 	sljit_s32 reg;
667 	sljit_uw imm, imm2;
668 
669 	if (SLJIT_UNLIKELY((flags & (ARG1_IMM | ARG2_IMM)) == (ARG1_IMM | ARG2_IMM))) {
670 		/* Both are immediates, no temporaries are used. */
671 		flags &= ~ARG1_IMM;
672 		FAIL_IF(load_immediate(compiler, TMP_REG1, arg1));
673 		arg1 = TMP_REG1;
674 	}
675 
676 	if (flags & (ARG1_IMM | ARG2_IMM)) {
677 		reg = (sljit_s32)((flags & ARG2_IMM) ? arg1 : arg2);
678 		imm = (flags & ARG2_IMM) ? arg2 : arg1;
679 
680 		switch (flags & 0xffff) {
681 		case SLJIT_CLZ:
682 		case SLJIT_CTZ:
683 		case SLJIT_REV:
684 		case SLJIT_REV_U16:
685 		case SLJIT_REV_S16:
686 		case SLJIT_REV_U32:
687 		case SLJIT_REV_S32:
688 		case SLJIT_MUL:
689 			/* No form with immediate operand. */
690 			break;
691 		case SLJIT_MOV:
692 			SLJIT_ASSERT(!(flags & SET_FLAGS) && (flags & ARG2_IMM) && arg1 == TMP_REG2);
693 			return load_immediate(compiler, dst, imm);
694 		case SLJIT_ADD:
695 			compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
696 			imm2 = NEGATE(imm);
697 			if (IS_2_LO_REGS(reg, dst)) {
698 				if (imm <= 0x7)
699 					return push_inst16(compiler, ADDSI3 | IMM3(imm) | RD3(dst) | RN3(reg));
700 				if (imm2 <= 0x7)
701 					return push_inst16(compiler, SUBSI3 | IMM3(imm2) | RD3(dst) | RN3(reg));
702 				if (reg == dst) {
703 					if (imm <= 0xff)
704 						return push_inst16(compiler, ADDSI8 | IMM8(imm) | RDN3(dst));
705 					if (imm2 <= 0xff)
706 						return push_inst16(compiler, SUBSI8 | IMM8(imm2) | RDN3(dst));
707 				}
708 			}
709 			if (!(flags & SET_FLAGS)) {
710 				if (imm <= 0xfff)
711 					return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(imm));
712 				if (imm2 <= 0xfff)
713 					return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(imm2));
714 			}
715 			imm2 = get_imm(imm);
716 			if (imm2 != INVALID_IMM)
717 				return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2);
718 			imm = get_imm(NEGATE(imm));
719 			if (imm != INVALID_IMM)
720 				return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
721 			break;
722 		case SLJIT_ADDC:
723 			compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
724 			imm2 = get_imm(imm);
725 			if (imm2 != INVALID_IMM)
726 				return push_inst32(compiler, ADCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2);
727 			if (flags & ARG2_IMM) {
728 				imm = get_imm(~imm);
729 				if (imm != INVALID_IMM)
730 					return push_inst32(compiler, SBCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
731 			}
732 			break;
733 		case SLJIT_SUB:
734 			compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
735 			if (flags & ARG1_IMM) {
736 				if (imm == 0 && IS_2_LO_REGS(reg, dst))
737 					return push_inst16(compiler, RSBSI | RD3(dst) | RN3(reg));
738 				imm = get_imm(imm);
739 				if (imm != INVALID_IMM)
740 					return push_inst32(compiler, RSB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
741 				break;
742 			}
743 			if (flags & UNUSED_RETURN) {
744 				if (imm <= 0xff && reg_map[reg] <= 7)
745 					return push_inst16(compiler, CMPI | IMM8(imm) | RDN3(reg));
746 				imm2 = get_imm(imm);
747 				if (imm2 != INVALID_IMM)
748 					return push_inst32(compiler, CMPI_W | RN4(reg) | imm2);
749 				imm = get_imm(NEGATE(imm));
750 				if (imm != INVALID_IMM)
751 					return push_inst32(compiler, CMNI_W | RN4(reg) | imm);
752 				break;
753 			}
754 			imm2 = NEGATE(imm);
755 			if (IS_2_LO_REGS(reg, dst)) {
756 				if (imm <= 0x7)
757 					return push_inst16(compiler, SUBSI3 | IMM3(imm) | RD3(dst) | RN3(reg));
758 				if (imm2 <= 0x7)
759 					return push_inst16(compiler, ADDSI3 | IMM3(imm2) | RD3(dst) | RN3(reg));
760 				if (reg == dst) {
761 					if (imm <= 0xff)
762 						return push_inst16(compiler, SUBSI8 | IMM8(imm) | RDN3(dst));
763 					if (imm2 <= 0xff)
764 						return push_inst16(compiler, ADDSI8 | IMM8(imm2) | RDN3(dst));
765 				}
766 			}
767 			if (!(flags & SET_FLAGS)) {
768 				if (imm <= 0xfff)
769 					return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(imm));
770 				if (imm2 <= 0xfff)
771 					return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(imm2));
772 			}
773 			imm2 = get_imm(imm);
774 			if (imm2 != INVALID_IMM)
775 				return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2);
776 			imm = get_imm(NEGATE(imm));
777 			if (imm != INVALID_IMM)
778 				return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
779 			break;
780 		case SLJIT_SUBC:
781 			compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
782 			if (flags & ARG1_IMM)
783 				break;
784 			imm2 = get_imm(imm);
785 			if (imm2 != INVALID_IMM)
786 				return push_inst32(compiler, SBCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2);
787 			imm = get_imm(~imm);
788 			if (imm != INVALID_IMM)
789 				return push_inst32(compiler, ADCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
790 			break;
791 		case SLJIT_AND:
792 			imm2 = get_imm(imm);
793 			if (imm2 != INVALID_IMM)
794 				return push_inst32(compiler, ((flags & UNUSED_RETURN) ? TSTI : ANDI) | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2);
795 			imm = get_imm(~imm);
796 			if (imm != INVALID_IMM)
797 				return push_inst32(compiler, BICI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
798 			break;
799 		case SLJIT_OR:
800 			imm2 = get_imm(imm);
801 			if (imm2 != INVALID_IMM)
802 				return push_inst32(compiler, ORRI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2);
803 			imm = get_imm(~imm);
804 			if (imm != INVALID_IMM)
805 				return push_inst32(compiler, ORNI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
806 			break;
807 		case SLJIT_XOR:
808 			if (imm == (sljit_uw)-1) {
809 				if (IS_2_LO_REGS(dst, reg))
810 					return push_inst16(compiler, MVNS | RD3(dst) | RN3(reg));
811 				return push_inst32(compiler, MVN_W | (flags & SET_FLAGS) | RD4(dst) | RM4(reg));
812 			}
813 			imm = get_imm(imm);
814 			if (imm != INVALID_IMM)
815 				return push_inst32(compiler, EORI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
816 			break;
817 		case SLJIT_SHL:
818 		case SLJIT_MSHL:
819 		case SLJIT_LSHR:
820 		case SLJIT_MLSHR:
821 		case SLJIT_ASHR:
822 		case SLJIT_MASHR:
823 		case SLJIT_ROTL:
824 		case SLJIT_ROTR:
825 			if (flags & ARG1_IMM)
826 				break;
827 			imm &= 0x1f;
828 
829 			if (imm == 0) {
830 				if (!(flags & SET_FLAGS))
831 					return push_inst16(compiler, MOV | SET_REGS44(dst, reg));
832 				if (IS_2_LO_REGS(dst, reg))
833 					return push_inst16(compiler, MOVS | RD3(dst) | RN3(reg));
834 				return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(dst) | RM4(reg));
835 			}
836 
837 			switch (flags & 0xffff) {
838 			case SLJIT_SHL:
839 			case SLJIT_MSHL:
840 				if (IS_2_LO_REGS(dst, reg))
841 					return push_inst16(compiler, LSLSI | RD3(dst) | RN3(reg) | (imm << 6));
842 				return push_inst32(compiler, LSL_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm));
843 			case SLJIT_LSHR:
844 			case SLJIT_MLSHR:
845 				if (IS_2_LO_REGS(dst, reg))
846 					return push_inst16(compiler, LSRSI | RD3(dst) | RN3(reg) | (imm << 6));
847 				return push_inst32(compiler, LSR_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm));
848 			case SLJIT_ASHR:
849 			case SLJIT_MASHR:
850 				if (IS_2_LO_REGS(dst, reg))
851 					return push_inst16(compiler, ASRSI | RD3(dst) | RN3(reg) | (imm << 6));
852 				return push_inst32(compiler, ASR_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm));
853 			case SLJIT_ROTL:
854 				imm = (imm ^ 0x1f) + 1;
855 				/* fallthrough */
856 			default: /* SLJIT_ROTR */
857 				return push_inst32(compiler, ROR_WI | RD4(dst) | RM4(reg) | IMM5(imm));
858 			}
859 		default:
860 			SLJIT_UNREACHABLE();
861 			break;
862 		}
863 
864 		if (flags & ARG2_IMM) {
865 			imm = arg2;
866 			arg2 = (arg1 == TMP_REG1) ? TMP_REG2 : TMP_REG1;
867 			FAIL_IF(load_immediate(compiler, (sljit_s32)arg2, imm));
868 		} else {
869 			imm = arg1;
870 			arg1 = (arg2 == TMP_REG1) ? TMP_REG2 : TMP_REG1;
871 			FAIL_IF(load_immediate(compiler, (sljit_s32)arg1, imm));
872 		}
873 
874 		SLJIT_ASSERT(arg1 != arg2);
875 	}
876 
877 	/* Both arguments are registers. */
878 	switch (flags & 0xffff) {
879 	case SLJIT_MOV:
880 	case SLJIT_MOV_U32:
881 	case SLJIT_MOV_S32:
882 	case SLJIT_MOV32:
883 	case SLJIT_MOV_P:
884 		SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
885 		if (dst == (sljit_s32)arg2)
886 			return SLJIT_SUCCESS;
887 		return push_inst16(compiler, MOV | SET_REGS44(dst, arg2));
888 	case SLJIT_MOV_U8:
889 		SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
890 		if (IS_2_LO_REGS(dst, arg2))
891 			return push_inst16(compiler, UXTB | RD3(dst) | RN3(arg2));
892 		return push_inst32(compiler, UXTB_W | RD4(dst) | RM4(arg2));
893 	case SLJIT_MOV_S8:
894 		SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
895 		if (IS_2_LO_REGS(dst, arg2))
896 			return push_inst16(compiler, SXTB | RD3(dst) | RN3(arg2));
897 		return push_inst32(compiler, SXTB_W | RD4(dst) | RM4(arg2));
898 	case SLJIT_MOV_U16:
899 		SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
900 		if (IS_2_LO_REGS(dst, arg2))
901 			return push_inst16(compiler, UXTH | RD3(dst) | RN3(arg2));
902 		return push_inst32(compiler, UXTH_W | RD4(dst) | RM4(arg2));
903 	case SLJIT_MOV_S16:
904 		SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
905 		if (IS_2_LO_REGS(dst, arg2))
906 			return push_inst16(compiler, SXTH | RD3(dst) | RN3(arg2));
907 		return push_inst32(compiler, SXTH_W | RD4(dst) | RM4(arg2));
908 	case SLJIT_CLZ:
909 		SLJIT_ASSERT(arg1 == TMP_REG2);
910 		return push_inst32(compiler, CLZ | RN4(arg2) | RD4(dst) | RM4(arg2));
911 	case SLJIT_CTZ:
912 		SLJIT_ASSERT(arg1 == TMP_REG2);
913 		FAIL_IF(push_inst32(compiler, RBIT | RN4(arg2) | RD4(dst) | RM4(arg2)));
914 		return push_inst32(compiler, CLZ | RN4(dst) | RD4(dst) | RM4(dst));
915 	case SLJIT_REV:
916 	case SLJIT_REV_U32:
917 	case SLJIT_REV_S32:
918 		SLJIT_ASSERT(arg1 == TMP_REG2);
919 		if (IS_2_LO_REGS(dst, arg2))
920 			return push_inst16(compiler, REV | RD3(dst) | RN3(arg2));
921 		return push_inst32(compiler, REV_W | RN4(arg2) | RD4(dst) | RM4(arg2));
922 	case SLJIT_REV_U16:
923 	case SLJIT_REV_S16:
924 		SLJIT_ASSERT(arg1 == TMP_REG2 && dst != TMP_REG2);
925 
926 		flags &= 0xffff;
927 		if (IS_2_LO_REGS(dst, arg2))
928 			FAIL_IF(push_inst16(compiler, REV16 | RD3(dst) | RN3(arg2)));
929 		else
930 			FAIL_IF(push_inst32(compiler, REV16_W | RN4(arg2) | RD4(dst) | RM4(arg2)));
931 
932 		if (dst == TMP_REG1 || (arg2 == TMP_REG1 && flags == SLJIT_REV_U16))
933 			return SLJIT_SUCCESS;
934 
935 		if (reg_map[dst] <= 7)
936 			return push_inst16(compiler, (flags == SLJIT_REV_U16 ? UXTH : SXTH) | RD3(dst) | RN3(dst));
937 		return push_inst32(compiler, (flags == SLJIT_REV_U16 ? UXTH_W : SXTH_W) | RD4(dst) | RM4(dst));
938 	case SLJIT_ADD:
939 		compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
940 		if (IS_3_LO_REGS(dst, arg1, arg2))
941 			return push_inst16(compiler, ADDS | RD3(dst) | RN3(arg1) | RM3(arg2));
942 		if (dst == (sljit_s32)arg1 && !(flags & SET_FLAGS))
943 			return push_inst16(compiler, ADD | SET_REGS44(dst, arg2));
944 		return push_inst32(compiler, ADD_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
945 	case SLJIT_ADDC:
946 		compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
947 		if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
948 			return push_inst16(compiler, ADCS | RD3(dst) | RN3(arg2));
949 		return push_inst32(compiler, ADC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
950 	case SLJIT_SUB:
951 		compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
952 		if (flags & UNUSED_RETURN) {
953 			if (IS_2_LO_REGS(arg1, arg2))
954 				return push_inst16(compiler, CMP | RD3(arg1) | RN3(arg2));
955 			return push_inst16(compiler, CMP_X | SET_REGS44(arg1, arg2));
956 		}
957 		if (IS_3_LO_REGS(dst, arg1, arg2))
958 			return push_inst16(compiler, SUBS | RD3(dst) | RN3(arg1) | RM3(arg2));
959 		return push_inst32(compiler, SUB_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
960 	case SLJIT_SUBC:
961 		compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
962 		if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
963 			return push_inst16(compiler, SBCS | RD3(dst) | RN3(arg2));
964 		return push_inst32(compiler, SBC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
965 	case SLJIT_MUL:
966 		compiler->status_flags_state = 0;
967 		if (!(flags & SET_FLAGS))
968 			return push_inst32(compiler, MUL | RD4(dst) | RN4(arg1) | RM4(arg2));
969 		SLJIT_ASSERT(dst != TMP_REG2);
970 		FAIL_IF(push_inst32(compiler, SMULL | RT4(dst) | RD4(TMP_REG2) | RN4(arg1) | RM4(arg2)));
971 		/* cmp TMP_REG2, dst asr #31. */
972 		return push_inst32(compiler, CMP_W | RN4(TMP_REG2) | 0x70e0 | RM4(dst));
973 	case SLJIT_AND:
974 		if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
975 			return push_inst16(compiler, ANDS | RD3(dst) | RN3(arg2));
976 		if ((flags & UNUSED_RETURN) && IS_2_LO_REGS(arg1, arg2))
977 			return push_inst16(compiler, TST | RD3(arg1) | RN3(arg2));
978 		return push_inst32(compiler, ((flags & UNUSED_RETURN) ? TST_W : AND_W) | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
979 	case SLJIT_OR:
980 		if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
981 			return push_inst16(compiler, ORRS | RD3(dst) | RN3(arg2));
982 		return push_inst32(compiler, ORR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
983 	case SLJIT_XOR:
984 		if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
985 			return push_inst16(compiler, EORS | RD3(dst) | RN3(arg2));
986 		return push_inst32(compiler, EOR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
987 	case SLJIT_MSHL:
988 		FAIL_IF(push_inst32(compiler, ANDI | RD4(TMP_REG2) | RN4(arg2) | 0x1f));
989 		arg2 = TMP_REG2;
990 		/* fallthrough */
991 	case SLJIT_SHL:
992 		if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
993 			return push_inst16(compiler, LSLS | RD3(dst) | RN3(arg2));
994 		return push_inst32(compiler, LSL_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
995 	case SLJIT_MLSHR:
996 		FAIL_IF(push_inst32(compiler, ANDI | RD4(TMP_REG2) | RN4(arg2) | 0x1f));
997 		arg2 = TMP_REG2;
998 		/* fallthrough */
999 	case SLJIT_LSHR:
1000 		if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
1001 			return push_inst16(compiler, LSRS | RD3(dst) | RN3(arg2));
1002 		return push_inst32(compiler, LSR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1003 	case SLJIT_MASHR:
1004 		FAIL_IF(push_inst32(compiler, ANDI | RD4(TMP_REG2) | RN4(arg2) | 0x1f));
1005 		arg2 = TMP_REG2;
1006 		/* fallthrough */
1007 	case SLJIT_ASHR:
1008 		if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
1009 			return push_inst16(compiler, ASRS | RD3(dst) | RN3(arg2));
1010 		return push_inst32(compiler, ASR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1011 	case SLJIT_ROTL:
1012 		FAIL_IF(push_inst32(compiler, RSB_WI | RD4(TMP_REG2) | RN4(arg2) | 0));
1013 		arg2 = TMP_REG2;
1014 		/* fallthrough */
1015 	case SLJIT_ROTR:
1016 		if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
1017 			return push_inst16(compiler, RORS | RD3(dst) | RN3(arg2));
1018 		return push_inst32(compiler, ROR_W | RD4(dst) | RN4(arg1) | RM4(arg2));
1019 	}
1020 
1021 	SLJIT_UNREACHABLE();
1022 	return SLJIT_SUCCESS;
1023 }
1024 
1025 #define STORE		0x01
1026 #define SIGNED		0x02
1027 
1028 #define WORD_SIZE	0x00
1029 #define BYTE_SIZE	0x04
1030 #define HALF_SIZE	0x08
1031 #define PRELOAD		0x0c
1032 
1033 #define IS_WORD_SIZE(flags)		(!((flags) & (BYTE_SIZE | HALF_SIZE)))
1034 #define ALIGN_CHECK(argw, imm, shift)	(!((argw) & ~((imm) << (shift))))
1035 
1036 /*
1037   1st letter:
1038   w = word
1039   b = byte
1040   h = half
1041 
1042   2nd letter:
1043   s = signed
1044   u = unsigned
1045 
1046   3rd letter:
1047   l = load
1048   s = store
1049 */
1050 
1051 static const sljit_ins sljit_mem16[12] = {
1052 /* w u l */ 0x5800 /* ldr */,
1053 /* w u s */ 0x5000 /* str */,
1054 /* w s l */ 0x5800 /* ldr */,
1055 /* w s s */ 0x5000 /* str */,
1056 
1057 /* b u l */ 0x5c00 /* ldrb */,
1058 /* b u s */ 0x5400 /* strb */,
1059 /* b s l */ 0x5600 /* ldrsb */,
1060 /* b s s */ 0x5400 /* strb */,
1061 
1062 /* h u l */ 0x5a00 /* ldrh */,
1063 /* h u s */ 0x5200 /* strh */,
1064 /* h s l */ 0x5e00 /* ldrsh */,
1065 /* h s s */ 0x5200 /* strh */,
1066 };
1067 
1068 static const sljit_ins sljit_mem16_imm5[12] = {
1069 /* w u l */ 0x6800 /* ldr imm5 */,
1070 /* w u s */ 0x6000 /* str imm5 */,
1071 /* w s l */ 0x6800 /* ldr imm5 */,
1072 /* w s s */ 0x6000 /* str imm5 */,
1073 
1074 /* b u l */ 0x7800 /* ldrb imm5 */,
1075 /* b u s */ 0x7000 /* strb imm5 */,
1076 /* b s l */ 0x0000 /* not allowed */,
1077 /* b s s */ 0x7000 /* strb imm5 */,
1078 
1079 /* h u l */ 0x8800 /* ldrh imm5 */,
1080 /* h u s */ 0x8000 /* strh imm5 */,
1081 /* h s l */ 0x0000 /* not allowed */,
1082 /* h s s */ 0x8000 /* strh imm5 */,
1083 };
1084 
1085 #define MEM_IMM8	0xc00
1086 #define MEM_IMM12	0x800000
1087 static const sljit_ins sljit_mem32[13] = {
1088 /* w u l */ 0xf8500000 /* ldr.w */,
1089 /* w u s */ 0xf8400000 /* str.w */,
1090 /* w s l */ 0xf8500000 /* ldr.w */,
1091 /* w s s */ 0xf8400000 /* str.w */,
1092 
1093 /* b u l */ 0xf8100000 /* ldrb.w */,
1094 /* b u s */ 0xf8000000 /* strb.w */,
1095 /* b s l */ 0xf9100000 /* ldrsb.w */,
1096 /* b s s */ 0xf8000000 /* strb.w */,
1097 
1098 /* h u l */ 0xf8300000 /* ldrh.w */,
1099 /* h u s */ 0xf8200000 /* strsh.w */,
1100 /* h s l */ 0xf9300000 /* ldrsh.w */,
1101 /* h s s */ 0xf8200000 /* strsh.w */,
1102 
1103 /* p u l */ 0xf8100000 /* pld */,
1104 };
1105 
1106 /* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */
emit_set_delta(struct sljit_compiler * compiler,sljit_s32 dst,sljit_s32 reg,sljit_sw value)1107 static sljit_s32 emit_set_delta(struct sljit_compiler *compiler, sljit_s32 dst, sljit_s32 reg, sljit_sw value)
1108 {
1109 	sljit_uw imm;
1110 
1111 	if (value >= 0) {
1112 		if (value <= 0xfff)
1113 			return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(value));
1114 		imm = get_imm((sljit_uw)value);
1115 		if (imm != INVALID_IMM)
1116 			return push_inst32(compiler, ADD_WI | RD4(dst) | RN4(reg) | imm);
1117 	}
1118 	else {
1119 		value = -value;
1120 		if (value <= 0xfff)
1121 			return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(value));
1122 		imm = get_imm((sljit_uw)value);
1123 		if (imm != INVALID_IMM)
1124 			return push_inst32(compiler, SUB_WI | RD4(dst) | RN4(reg) | imm);
1125 	}
1126 	return SLJIT_ERR_UNSUPPORTED;
1127 }
1128 
emit_op_mem(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw,sljit_s32 tmp_reg)1129 static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg,
1130 	sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg)
1131 {
1132 	sljit_s32 other_r;
1133 	sljit_uw imm, tmp;
1134 
1135 	SLJIT_ASSERT(arg & SLJIT_MEM);
1136 	SLJIT_ASSERT((arg & REG_MASK) != tmp_reg || (arg == SLJIT_MEM1(tmp_reg) && argw >= -0xff && argw <= 0xfff));
1137 
1138 	if (SLJIT_UNLIKELY(!(arg & REG_MASK))) {
1139 		imm = get_imm((sljit_uw)argw & ~(sljit_uw)0xfff);
1140 		if (imm != INVALID_IMM) {
1141 			FAIL_IF(push_inst32(compiler, MOV_WI | RD4(tmp_reg) | imm));
1142 			return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(tmp_reg) | (argw & 0xfff));
1143 		}
1144 
1145 		FAIL_IF(load_immediate(compiler, tmp_reg, (sljit_uw)argw));
1146 		if (IS_2_LO_REGS(reg, tmp_reg) && sljit_mem16_imm5[flags])
1147 			return push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(tmp_reg));
1148 		return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(tmp_reg));
1149 	}
1150 
1151 	if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
1152 		argw &= 0x3;
1153 		other_r = OFFS_REG(arg);
1154 		arg &= REG_MASK;
1155 
1156 		if (!argw && IS_3_LO_REGS(reg, arg, other_r))
1157 			return push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(other_r));
1158 		return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(other_r) | ((sljit_ins)argw << 4));
1159 	}
1160 
1161 	arg &= REG_MASK;
1162 
1163 	if (argw > 0xfff) {
1164 		imm = get_imm((sljit_uw)(argw & ~0xfff));
1165 		if (imm != INVALID_IMM) {
1166 			push_inst32(compiler, ADD_WI | RD4(tmp_reg) | RN4(arg) | imm);
1167 			arg = tmp_reg;
1168 			argw = argw & 0xfff;
1169 		}
1170 	}
1171 	else if (argw < -0xff) {
1172 		tmp = (sljit_uw)((-argw + 0xfff) & ~0xfff);
1173 		SLJIT_ASSERT(tmp >= (sljit_uw)-argw);
1174 		imm = get_imm(tmp);
1175 
1176 		if (imm != INVALID_IMM) {
1177 			push_inst32(compiler, SUB_WI | RD4(tmp_reg) | RN4(arg) | imm);
1178 			arg = tmp_reg;
1179 			argw += (sljit_sw)tmp;
1180 
1181 			SLJIT_ASSERT(argw >= 0 && argw <= 0xfff);
1182 		}
1183 	}
1184 
1185 	/* 16 bit instruction forms. */
1186 	if (IS_2_LO_REGS(reg, arg) && sljit_mem16_imm5[flags]) {
1187 		tmp = 3;
1188 		if (IS_WORD_SIZE(flags)) {
1189 			if (ALIGN_CHECK(argw, 0x1f, 2))
1190 				tmp = 2;
1191 		}
1192 		else if (flags & BYTE_SIZE)
1193 		{
1194 			if (ALIGN_CHECK(argw, 0x1f, 0))
1195 				tmp = 0;
1196 		}
1197 		else {
1198 			SLJIT_ASSERT(flags & HALF_SIZE);
1199 			if (ALIGN_CHECK(argw, 0x1f, 1))
1200 				tmp = 1;
1201 		}
1202 
1203 		if (tmp < 3)
1204 			return push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(arg) | ((sljit_ins)argw << (6 - tmp)));
1205 	}
1206 	else if (SLJIT_UNLIKELY(arg == SLJIT_SP) && IS_WORD_SIZE(flags) && ALIGN_CHECK(argw, 0xff, 2) && reg_map[reg] <= 7) {
1207 		/* SP based immediate. */
1208 		return push_inst16(compiler, STR_SP | (sljit_ins)((flags & STORE) ? 0 : 0x800) | RDN3(reg) | ((sljit_ins)argw >> 2));
1209 	}
1210 
1211 	if (argw >= 0 && argw <= 0xfff)
1212 		return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(arg) | (sljit_ins)argw);
1213 	else if (argw < 0 && argw >= -0xff)
1214 		return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(arg) | (sljit_ins)-argw);
1215 
1216 	SLJIT_ASSERT(arg != tmp_reg);
1217 
1218 	FAIL_IF(load_immediate(compiler, tmp_reg, (sljit_uw)argw));
1219 	if (IS_3_LO_REGS(reg, arg, tmp_reg))
1220 		return push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(tmp_reg));
1221 	return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(tmp_reg));
1222 }
1223 
1224 #undef ALIGN_CHECK
1225 #undef IS_WORD_SIZE
1226 
1227 /* --------------------------------------------------------------------- */
1228 /*  Entry, exit                                                          */
1229 /* --------------------------------------------------------------------- */
1230 
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1231 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
1232 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1233 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1234 {
1235 	sljit_s32 size, i, tmp, word_arg_count;
1236 	sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
1237 	sljit_uw offset;
1238 	sljit_uw imm = 0;
1239 #ifdef __SOFTFP__
1240 	sljit_u32 float_arg_count;
1241 #else
1242 	sljit_u32 old_offset, f32_offset;
1243 	sljit_u32 remap[3];
1244 	sljit_u32 *remap_ptr = remap;
1245 #endif
1246 
1247 	CHECK_ERROR();
1248 	CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1249 	set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1250 
1251 	tmp = SLJIT_S0 - saveds;
1252 	for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--)
1253 		imm |= (sljit_uw)1 << reg_map[i];
1254 
1255 	for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--)
1256 		imm |= (sljit_uw)1 << reg_map[i];
1257 
1258 	/* At least two registers must be set for PUSH_W and one for PUSH instruction. */
1259 	FAIL_IF((imm & 0xff00)
1260 		? push_inst32(compiler, PUSH_W | (1 << 14) | imm)
1261 		: push_inst16(compiler, PUSH | (1 << 8) | imm));
1262 
1263 	/* Stack must be aligned to 8 bytes: (LR, R4) */
1264 	size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1);
1265 
1266 	if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
1267 		if ((size & SSIZE_OF(sw)) != 0) {
1268 			FAIL_IF(push_inst16(compiler, SUB_SP_I | (sizeof(sljit_sw) >> 2)));
1269 			size += SSIZE_OF(sw);
1270 		}
1271 
1272 		if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) {
1273 			FAIL_IF(push_inst32(compiler, VPUSH | VD4(SLJIT_FS0) | ((sljit_uw)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1)));
1274 		} else {
1275 			if (fsaveds > 0)
1276 				FAIL_IF(push_inst32(compiler, VPUSH | VD4(SLJIT_FS0) | ((sljit_uw)fsaveds << 1)));
1277 			if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG)
1278 				FAIL_IF(push_inst32(compiler, VPUSH | VD4(fscratches) | ((sljit_uw)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1)));
1279 		}
1280 	}
1281 
1282 	local_size = ((size + local_size + 0x7) & ~0x7) - size;
1283 	compiler->local_size = local_size;
1284 
1285 	if (options & SLJIT_ENTER_REG_ARG)
1286 		arg_types = 0;
1287 
1288 	arg_types >>= SLJIT_ARG_SHIFT;
1289 	word_arg_count = 0;
1290 	saved_arg_count = 0;
1291 #ifdef __SOFTFP__
1292 	SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start);
1293 
1294 	offset = 0;
1295 	float_arg_count = 0;
1296 
1297 	while (arg_types) {
1298 		switch (arg_types & SLJIT_ARG_MASK) {
1299 		case SLJIT_ARG_TYPE_F64:
1300 			if (offset & 0x7)
1301 				offset += sizeof(sljit_sw);
1302 
1303 			if (offset < 4 * sizeof(sljit_sw))
1304 				FAIL_IF(push_inst32(compiler, VMOV2 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count));
1305 			else
1306 				FAIL_IF(push_inst32(compiler, VLDR_F32 | 0x800100 | RN4(SLJIT_SP)
1307 					| (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2)));
1308 			float_arg_count++;
1309 			offset += sizeof(sljit_f64) - sizeof(sljit_sw);
1310 			break;
1311 		case SLJIT_ARG_TYPE_F32:
1312 			if (offset < 4 * sizeof(sljit_sw))
1313 				FAIL_IF(push_inst32(compiler, VMOV | (float_arg_count << 16) | (offset << 10)));
1314 			else
1315 				FAIL_IF(push_inst32(compiler, VLDR_F32 | 0x800000 | RN4(SLJIT_SP)
1316 					| (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2)));
1317 			float_arg_count++;
1318 			break;
1319 		default:
1320 			word_arg_count++;
1321 
1322 			if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
1323 				tmp = SLJIT_S0 - saved_arg_count;
1324 				saved_arg_count++;
1325 			} else if (word_arg_count - 1 != (sljit_s32)(offset >> 2))
1326 				tmp = word_arg_count;
1327 			else
1328 				break;
1329 
1330 			if (offset < 4 * sizeof(sljit_sw))
1331 				FAIL_IF(push_inst16(compiler, MOV | ((sljit_ins)reg_map[tmp] & 0x7) | (((sljit_ins)reg_map[tmp] & 0x8) << 4) | (offset << 1)));
1332 			else if (reg_map[tmp] <= 7)
1333 				FAIL_IF(push_inst16(compiler, LDR_SP | RDN3(tmp)
1334 					| ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2)));
1335 			else
1336 				FAIL_IF(push_inst32(compiler, LDR | RT4(tmp) | RN4(SLJIT_SP)
1337 					| ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)))));
1338 			break;
1339 		}
1340 
1341 		offset += sizeof(sljit_sw);
1342 		arg_types >>= SLJIT_ARG_SHIFT;
1343 	}
1344 
1345 	compiler->args_size = offset;
1346 #else
1347 	offset = SLJIT_FR0;
1348 	old_offset = SLJIT_FR0;
1349 	f32_offset = 0;
1350 
1351 	while (arg_types) {
1352 		switch (arg_types & SLJIT_ARG_MASK) {
1353 		case SLJIT_ARG_TYPE_F64:
1354 			if (offset != old_offset)
1355 				*remap_ptr++ = VMOV_F32 | SLJIT_32 | VD4(offset) | VM4(old_offset);
1356 			old_offset++;
1357 			offset++;
1358 			break;
1359 		case SLJIT_ARG_TYPE_F32:
1360 			if (f32_offset != 0) {
1361 				*remap_ptr++ = VMOV_F32 | 0x20 | VD4(offset) | VM4(f32_offset);
1362 				f32_offset = 0;
1363 			} else {
1364 				if (offset != old_offset)
1365 					*remap_ptr++ = VMOV_F32 | VD4(offset) | VM4(old_offset);
1366 				f32_offset = old_offset;
1367 				old_offset++;
1368 			}
1369 			offset++;
1370 			break;
1371 		default:
1372 			if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
1373 				FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S0 - saved_arg_count, SLJIT_R0 + word_arg_count)));
1374 				saved_arg_count++;
1375 			}
1376 
1377 			word_arg_count++;
1378 			break;
1379 		}
1380 		arg_types >>= SLJIT_ARG_SHIFT;
1381 	}
1382 
1383 	SLJIT_ASSERT((sljit_uw)(remap_ptr - remap) <= sizeof(remap));
1384 
1385 	while (remap_ptr > remap)
1386 		FAIL_IF(push_inst32(compiler, *(--remap_ptr)));
1387 #endif
1388 
1389 #ifdef _WIN32
1390 	if (local_size >= 4096) {
1391 		imm = get_imm(4096);
1392 		SLJIT_ASSERT(imm != INVALID_IMM);
1393 
1394 		FAIL_IF(push_inst32(compiler, SUB_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm));
1395 
1396 		if (local_size < 4 * 4096) {
1397 			if (local_size > 2 * 4096) {
1398 				if (local_size > 3 * 4096) {
1399 					FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP)));
1400 					FAIL_IF(push_inst32(compiler, SUB_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm));
1401 				}
1402 
1403 				FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP)));
1404 				FAIL_IF(push_inst32(compiler, SUB_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm));
1405 			}
1406 		} else {
1407 			FAIL_IF(load_immediate(compiler, TMP_REG2, ((sljit_uw)local_size >> 12) - 1));
1408 			FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP)));
1409 			FAIL_IF(push_inst32(compiler, SUB_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm));
1410 			FAIL_IF(push_inst32(compiler, SUB_WI | SET_FLAGS | RD4(TMP_REG2) | RN4(TMP_REG2) | 1));
1411 			FAIL_IF(push_inst16(compiler, BCC | (0x1 << 8) /* not-equal */ | (-8 & 0xff)));
1412 		}
1413 
1414 		FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP)));
1415 		local_size &= 0xfff;
1416 	}
1417 
1418 	if (local_size >= 256) {
1419 		SLJIT_ASSERT(local_size < 4096);
1420 
1421 		if (local_size <= (127 << 2))
1422 			FAIL_IF(push_inst16(compiler, SUB_SP_I | ((sljit_uw)local_size >> 2)));
1423 		else
1424 			FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, (sljit_uw)local_size));
1425 
1426 		FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP)));
1427 	} else if (local_size > 0)
1428 		FAIL_IF(push_inst32(compiler, LDRI | 0x500 | RT4(TMP_REG1) | RN4(SLJIT_SP) | (sljit_uw)local_size));
1429 #else /* !_WIN32 */
1430 	if (local_size > 0) {
1431 		if (local_size <= (127 << 2))
1432 			FAIL_IF(push_inst16(compiler, SUB_SP_I | ((sljit_uw)local_size >> 2)));
1433 		else
1434 			FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, (sljit_uw)local_size));
1435 	}
1436 #endif /* _WIN32 */
1437 
1438 	return SLJIT_SUCCESS;
1439 }
1440 
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1441 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
1442 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1443 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1444 {
1445 	sljit_s32 size;
1446 
1447 	CHECK_ERROR();
1448 	CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1449 	set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1450 
1451 	size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1);
1452 
1453 	/* Doubles are saved, so alignment is unaffected. */
1454 	if ((size & SSIZE_OF(sw)) != 0 && (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG))
1455 		size += SSIZE_OF(sw);
1456 
1457 	compiler->local_size = ((size + local_size + 0x7) & ~0x7) - size;
1458 	return SLJIT_SUCCESS;
1459 }
1460 
emit_add_sp(struct sljit_compiler * compiler,sljit_uw imm)1461 static sljit_s32 emit_add_sp(struct sljit_compiler *compiler, sljit_uw imm)
1462 {
1463 	sljit_uw imm2;
1464 
1465 	/* The TMP_REG1 register must keep its value. */
1466 	if (imm <= (127u << 2))
1467 		return push_inst16(compiler, ADD_SP_I | (imm >> 2));
1468 
1469 	if (imm <= 0xfff)
1470 		return push_inst32(compiler, ADDWI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | IMM12(imm));
1471 
1472 	imm2 = get_imm(imm);
1473 
1474 	if (imm2 != INVALID_IMM)
1475 		return push_inst32(compiler, ADD_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm2);
1476 
1477 	FAIL_IF(load_immediate(compiler, TMP_REG2, imm));
1478 	return push_inst16(compiler, ADD_SP | RN3(TMP_REG2));
1479 }
1480 
emit_stack_frame_release(struct sljit_compiler * compiler,sljit_s32 frame_size)1481 static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 frame_size)
1482 {
1483 	sljit_s32 local_size, fscratches, fsaveds, i, tmp;
1484 	sljit_s32 restored_reg = 0;
1485 	sljit_s32 lr_dst = TMP_PC;
1486 	sljit_uw reg_list = 0;
1487 
1488 	SLJIT_ASSERT(reg_map[TMP_REG2] == 14 && frame_size <= 128);
1489 
1490 	local_size = compiler->local_size;
1491 	fscratches = compiler->fscratches;
1492 	fsaveds = compiler->fsaveds;
1493 
1494 	if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
1495 		if (local_size > 0)
1496 			FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size));
1497 
1498 		if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) {
1499 			FAIL_IF(push_inst32(compiler, VPOP | VD4(SLJIT_FS0) | ((sljit_uw)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1)));
1500 		} else {
1501 			if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG)
1502 				FAIL_IF(push_inst32(compiler, VPOP | VD4(fscratches) | ((sljit_uw)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1)));
1503 			if (fsaveds > 0)
1504 				FAIL_IF(push_inst32(compiler, VPOP | VD4(SLJIT_FS0) | ((sljit_uw)fsaveds << 1)));
1505 		}
1506 
1507 		local_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1) & 0x7;
1508 	}
1509 
1510 	if (frame_size < 0) {
1511 		lr_dst = TMP_REG2;
1512 		frame_size = 0;
1513 	} else if (frame_size > 0) {
1514 		SLJIT_ASSERT(frame_size == 1 || (frame_size & 0x7) == 0);
1515 		lr_dst = 0;
1516 		frame_size &= ~0x7;
1517 	}
1518 
1519 	tmp = SLJIT_S0 - compiler->saveds;
1520 	i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
1521 	if (tmp < i) {
1522 		restored_reg = i;
1523 		do {
1524 			reg_list |= (sljit_uw)1 << reg_map[i];
1525 		} while (--i > tmp);
1526 	}
1527 
1528 	i = compiler->scratches;
1529 	if (i >= SLJIT_FIRST_SAVED_REG) {
1530 		restored_reg = i;
1531 		do {
1532 			reg_list |= (sljit_uw)1 << reg_map[i];
1533 		} while (--i >= SLJIT_FIRST_SAVED_REG);
1534 	}
1535 
1536 	if (lr_dst == TMP_REG2 && reg_list == 0) {
1537 		reg_list |= (sljit_uw)1 << reg_map[TMP_REG2];
1538 		restored_reg = TMP_REG2;
1539 		lr_dst = 0;
1540 	}
1541 
1542 	if (lr_dst == 0 && (reg_list & (reg_list - 1)) == 0) {
1543 		/* The local_size does not include the saved registers. */
1544 		tmp = 0;
1545 		if (reg_list != 0) {
1546 			tmp = 2;
1547 			if (local_size <= 0xfff) {
1548 				if (local_size == 0) {
1549 					SLJIT_ASSERT(restored_reg != TMP_REG2);
1550 					if (frame_size == 0)
1551 						return push_inst32(compiler, LDRI | RT4(restored_reg) | RN4(SLJIT_SP) | 0x308);
1552 					if (frame_size > 2 * SSIZE_OF(sw))
1553 						return push_inst32(compiler, LDRI | RT4(restored_reg) | RN4(SLJIT_SP) | 0x100 | (sljit_ins)(frame_size - (2 * SSIZE_OF(sw))));
1554 				}
1555 
1556 				if (reg_map[restored_reg] <= 7 && local_size <= 0x3fc)
1557 					FAIL_IF(push_inst16(compiler, STR_SP | 0x800 | RDN3(restored_reg) | (sljit_ins)(local_size >> 2)));
1558 				else
1559 					FAIL_IF(push_inst32(compiler, LDR | RT4(restored_reg) | RN4(SLJIT_SP) | (sljit_ins)local_size));
1560 				tmp = 1;
1561 			} else if (frame_size == 0) {
1562 				frame_size = (restored_reg == TMP_REG2) ? SSIZE_OF(sw) : 2 * SSIZE_OF(sw);
1563 				tmp = 3;
1564 			}
1565 
1566 			/* Place for the saved register. */
1567 			if (restored_reg != TMP_REG2)
1568 				local_size += SSIZE_OF(sw);
1569 		}
1570 
1571 		/* Place for the lr register. */
1572 		local_size += SSIZE_OF(sw);
1573 
1574 		if (frame_size > local_size)
1575 			FAIL_IF(push_inst16(compiler, SUB_SP_I | ((sljit_ins)(frame_size - local_size) >> 2)));
1576 		else if (frame_size < local_size)
1577 			FAIL_IF(emit_add_sp(compiler, (sljit_uw)(local_size - frame_size)));
1578 
1579 		if (tmp <= 1)
1580 			return SLJIT_SUCCESS;
1581 
1582 		if (tmp == 2) {
1583 			frame_size -= SSIZE_OF(sw);
1584 			if (restored_reg != TMP_REG2)
1585 				frame_size -= SSIZE_OF(sw);
1586 
1587 			if (reg_map[restored_reg] <= 7)
1588 				return push_inst16(compiler, STR_SP | 0x800 | RDN3(restored_reg) | (sljit_ins)(frame_size >> 2));
1589 
1590 			return push_inst32(compiler, LDR | RT4(restored_reg) | RN4(SLJIT_SP) | (sljit_ins)frame_size);
1591 		}
1592 
1593 		tmp = (restored_reg == TMP_REG2) ? 0x304 : 0x308;
1594 		return push_inst32(compiler, LDRI | RT4(restored_reg) | RN4(SLJIT_SP) | (sljit_ins)tmp);
1595 	}
1596 
1597 	if (local_size > 0)
1598 		FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size));
1599 
1600 	if (!(reg_list & 0xff00) && lr_dst != TMP_REG2) {
1601 		if (lr_dst == TMP_PC)
1602 			reg_list |= 1u << 8;
1603 
1604 		/* At least one register must be set for POP instruction. */
1605 		SLJIT_ASSERT(reg_list != 0);
1606 
1607 		FAIL_IF(push_inst16(compiler, POP | reg_list));
1608 	} else {
1609 		if (lr_dst != 0)
1610 			reg_list |= (sljit_uw)1 << reg_map[lr_dst];
1611 
1612 		/* At least two registers must be set for POP_W instruction. */
1613 		SLJIT_ASSERT((reg_list & (reg_list - 1)) != 0);
1614 
1615 		FAIL_IF(push_inst32(compiler, POP_W | reg_list));
1616 	}
1617 
1618 	if (frame_size > 0)
1619 		return push_inst16(compiler, SUB_SP_I | (((sljit_ins)frame_size - sizeof(sljit_sw)) >> 2));
1620 
1621 	if (lr_dst != 0)
1622 		return SLJIT_SUCCESS;
1623 
1624 	return push_inst16(compiler, ADD_SP_I | 1);
1625 }
1626 
sljit_emit_return_void(struct sljit_compiler * compiler)1627 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
1628 {
1629 	CHECK_ERROR();
1630 	CHECK(check_sljit_emit_return_void(compiler));
1631 
1632 	return emit_stack_frame_release(compiler, 0);
1633 }
1634 
sljit_emit_return_to(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)1635 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
1636 	sljit_s32 src, sljit_sw srcw)
1637 {
1638 	CHECK_ERROR();
1639 	CHECK(check_sljit_emit_return_to(compiler, src, srcw));
1640 
1641 	if (src & SLJIT_MEM) {
1642 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
1643 		src = TMP_REG1;
1644 		srcw = 0;
1645 	} else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
1646 		FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, src)));
1647 		src = TMP_REG1;
1648 		srcw = 0;
1649 	}
1650 
1651 	FAIL_IF(emit_stack_frame_release(compiler, 1));
1652 
1653 	SLJIT_SKIP_CHECKS(compiler);
1654 	return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1655 }
1656 
1657 /* --------------------------------------------------------------------- */
1658 /*  Operators                                                            */
1659 /* --------------------------------------------------------------------- */
1660 
1661 #if !(defined __ARM_FEATURE_IDIV) && !(defined __ARM_ARCH_EXT_IDIV__)
1662 
1663 #ifdef __cplusplus
1664 extern "C" {
1665 #endif
1666 
1667 #ifdef _WIN32
1668 extern unsigned long long __rt_udiv(unsigned int denominator, unsigned int numerator);
1669 extern long long __rt_sdiv(int denominator, int numerator);
1670 #elif defined(__GNUC__)
1671 extern unsigned int __aeabi_uidivmod(unsigned int numerator, int unsigned denominator);
1672 extern int __aeabi_idivmod(int numerator, int denominator);
1673 #else
1674 #error "Software divmod functions are needed"
1675 #endif
1676 
1677 #ifdef __cplusplus
1678 }
1679 #endif
1680 
1681 #endif /* !__ARM_FEATURE_IDIV && !__ARM_ARCH_EXT_IDIV__ */
1682 
sljit_emit_op0(struct sljit_compiler * compiler,sljit_s32 op)1683 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
1684 {
1685 #if !(defined __ARM_FEATURE_IDIV) && !(defined __ARM_ARCH_EXT_IDIV__)
1686 	sljit_uw saved_reg_list[3];
1687 	sljit_uw saved_reg_count;
1688 #endif
1689 
1690 	CHECK_ERROR();
1691 	CHECK(check_sljit_emit_op0(compiler, op));
1692 
1693 	op = GET_OPCODE(op);
1694 	switch (op) {
1695 	case SLJIT_BREAKPOINT:
1696 		return push_inst16(compiler, BKPT);
1697 	case SLJIT_NOP:
1698 		return push_inst16(compiler, NOP);
1699 	case SLJIT_LMUL_UW:
1700 	case SLJIT_LMUL_SW:
1701 		return push_inst32(compiler, (op == SLJIT_LMUL_UW ? UMULL : SMULL)
1702 			| RD4(SLJIT_R1) | RT4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1));
1703 #if (defined __ARM_FEATURE_IDIV) || (defined __ARM_ARCH_EXT_IDIV__)
1704 	case SLJIT_DIVMOD_UW:
1705 	case SLJIT_DIVMOD_SW:
1706 		FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, SLJIT_R0)));
1707 		FAIL_IF(push_inst32(compiler, (op == SLJIT_DIVMOD_UW ? UDIV : SDIV) | RD4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1)));
1708 		FAIL_IF(push_inst32(compiler, MUL | RD4(SLJIT_R1) | RN4(SLJIT_R0) | RM4(SLJIT_R1)));
1709 		return push_inst32(compiler, SUB_W | RD4(SLJIT_R1) | RN4(TMP_REG1) | RM4(SLJIT_R1));
1710 	case SLJIT_DIV_UW:
1711 	case SLJIT_DIV_SW:
1712 		return push_inst32(compiler, (op == SLJIT_DIV_UW ? UDIV : SDIV) | RD4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1));
1713 #else /* !__ARM_FEATURE_IDIV && !__ARM_ARCH_EXT_IDIV__ */
1714 	case SLJIT_DIVMOD_UW:
1715 	case SLJIT_DIVMOD_SW:
1716 	case SLJIT_DIV_UW:
1717 	case SLJIT_DIV_SW:
1718 		SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
1719 		SLJIT_ASSERT(reg_map[2] == 1 && reg_map[3] == 2 && reg_map[4] == 3);
1720 
1721 		saved_reg_count = 0;
1722 		if (compiler->scratches >= 4)
1723 			saved_reg_list[saved_reg_count++] = 3;
1724 		if (compiler->scratches >= 3)
1725 			saved_reg_list[saved_reg_count++] = 2;
1726 		if (op >= SLJIT_DIV_UW)
1727 			saved_reg_list[saved_reg_count++] = 1;
1728 
1729 		if (saved_reg_count > 0) {
1730 			FAIL_IF(push_inst32(compiler, 0xf84d0d00 | (saved_reg_count >= 3 ? 16 : 8)
1731 						| (saved_reg_list[0] << 12) /* str rX, [sp, #-8/-16]! */));
1732 			if (saved_reg_count >= 2) {
1733 				SLJIT_ASSERT(saved_reg_list[1] < 8);
1734 				FAIL_IF(push_inst16(compiler, 0x9001 | (saved_reg_list[1] << 8) /* str rX, [sp, #4] */));
1735 			}
1736 			if (saved_reg_count >= 3) {
1737 				SLJIT_ASSERT(saved_reg_list[2] < 8);
1738 				FAIL_IF(push_inst16(compiler, 0x9002 | (saved_reg_list[2] << 8) /* str rX, [sp, #8] */));
1739 			}
1740 		}
1741 
1742 #ifdef _WIN32
1743 		FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, SLJIT_R0)));
1744 		FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_R0, SLJIT_R1)));
1745 		FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_R1, TMP_REG1)));
1746 		FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
1747 			((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_ADDR(__rt_udiv) : SLJIT_FUNC_ADDR(__rt_sdiv))));
1748 #elif defined(__GNUC__)
1749 		FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
1750 			((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_ADDR(__aeabi_uidivmod) : SLJIT_FUNC_ADDR(__aeabi_idivmod))));
1751 #else
1752 #error "Software divmod functions are needed"
1753 #endif
1754 
1755 		if (saved_reg_count > 0) {
1756 			if (saved_reg_count >= 3) {
1757 				SLJIT_ASSERT(saved_reg_list[2] < 8);
1758 				FAIL_IF(push_inst16(compiler, 0x9802 | (saved_reg_list[2] << 8) /* ldr rX, [sp, #8] */));
1759 			}
1760 			if (saved_reg_count >= 2) {
1761 				SLJIT_ASSERT(saved_reg_list[1] < 8);
1762 				FAIL_IF(push_inst16(compiler, 0x9801 | (saved_reg_list[1] << 8) /* ldr rX, [sp, #4] */));
1763 			}
1764 			return push_inst32(compiler, 0xf85d0b00 | (saved_reg_count >= 3 ? 16 : 8)
1765 						| (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */);
1766 		}
1767 		return SLJIT_SUCCESS;
1768 #endif /* __ARM_FEATURE_IDIV || __ARM_ARCH_EXT_IDIV__ */
1769 	case SLJIT_ENDBR:
1770 	case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
1771 		return SLJIT_SUCCESS;
1772 	}
1773 
1774 	return SLJIT_SUCCESS;
1775 }
1776 
sljit_emit_op1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1777 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1778 	sljit_s32 dst, sljit_sw dstw,
1779 	sljit_s32 src, sljit_sw srcw)
1780 {
1781 	sljit_s32 dst_r, flags;
1782 	sljit_s32 op_flags = GET_ALL_FLAGS(op);
1783 
1784 	CHECK_ERROR();
1785 	CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1786 	ADJUST_LOCAL_OFFSET(dst, dstw);
1787 	ADJUST_LOCAL_OFFSET(src, srcw);
1788 
1789 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1790 
1791 	op = GET_OPCODE(op);
1792 	if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) {
1793 		switch (op) {
1794 		case SLJIT_MOV:
1795 		case SLJIT_MOV_U32:
1796 		case SLJIT_MOV_S32:
1797 		case SLJIT_MOV32:
1798 		case SLJIT_MOV_P:
1799 			flags = WORD_SIZE;
1800 			break;
1801 		case SLJIT_MOV_U8:
1802 			flags = BYTE_SIZE;
1803 			if (src == SLJIT_IMM)
1804 				srcw = (sljit_u8)srcw;
1805 			break;
1806 		case SLJIT_MOV_S8:
1807 			flags = BYTE_SIZE | SIGNED;
1808 			if (src == SLJIT_IMM)
1809 				srcw = (sljit_s8)srcw;
1810 			break;
1811 		case SLJIT_MOV_U16:
1812 			flags = HALF_SIZE;
1813 			if (src == SLJIT_IMM)
1814 				srcw = (sljit_u16)srcw;
1815 			break;
1816 		case SLJIT_MOV_S16:
1817 			flags = HALF_SIZE | SIGNED;
1818 			if (src == SLJIT_IMM)
1819 				srcw = (sljit_s16)srcw;
1820 			break;
1821 		default:
1822 			SLJIT_UNREACHABLE();
1823 			flags = 0;
1824 			break;
1825 		}
1826 
1827 		if (src == SLJIT_IMM)
1828 			FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG2, (sljit_uw)srcw));
1829 		else if (src & SLJIT_MEM) {
1830 			FAIL_IF(emit_op_mem(compiler, flags, dst_r, src, srcw, TMP_REG1));
1831 		} else {
1832 			if (dst_r != TMP_REG1)
1833 				return emit_op_imm(compiler, op, dst_r, TMP_REG2, (sljit_uw)src);
1834 			dst_r = src;
1835 		}
1836 
1837 		if (!(dst & SLJIT_MEM))
1838 			return SLJIT_SUCCESS;
1839 
1840 		return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, TMP_REG2);
1841 	}
1842 
1843 	SLJIT_COMPILE_ASSERT(WORD_SIZE == 0, word_size_must_be_0);
1844 	flags = HAS_FLAGS(op_flags) ? SET_FLAGS : 0;
1845 
1846 	if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16)
1847 		flags |= HALF_SIZE;
1848 
1849 	if (src & SLJIT_MEM) {
1850 		FAIL_IF(emit_op_mem(compiler, flags, TMP_REG1, src, srcw, TMP_REG1));
1851 		src = TMP_REG1;
1852 	}
1853 
1854 	emit_op_imm(compiler, flags | op, dst_r, TMP_REG2, (sljit_uw)src);
1855 
1856 	if (SLJIT_UNLIKELY(dst & SLJIT_MEM))
1857 		return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, TMP_REG2);
1858 	return SLJIT_SUCCESS;
1859 }
1860 
sljit_emit_op2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1861 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
1862 	sljit_s32 dst, sljit_sw dstw,
1863 	sljit_s32 src1, sljit_sw src1w,
1864 	sljit_s32 src2, sljit_sw src2w)
1865 {
1866 	sljit_s32 dst_reg, flags, src2_reg;
1867 
1868 	CHECK_ERROR();
1869 	CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
1870 	ADJUST_LOCAL_OFFSET(dst, dstw);
1871 	ADJUST_LOCAL_OFFSET(src1, src1w);
1872 	ADJUST_LOCAL_OFFSET(src2, src2w);
1873 
1874 	dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
1875 	flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
1876 
1877 	if (dst == TMP_REG1)
1878 		flags |= UNUSED_RETURN;
1879 
1880 	if (src1 == SLJIT_IMM)
1881 		flags |= ARG1_IMM;
1882 	else if (src1 & SLJIT_MEM) {
1883 		emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src1, src1w, TMP_REG1);
1884 		src1w = TMP_REG1;
1885 	}
1886 	else
1887 		src1w = src1;
1888 
1889 	if (src2 == SLJIT_IMM)
1890 		flags |= ARG2_IMM;
1891 	else if (src2 & SLJIT_MEM) {
1892 		src2_reg = (!(flags & ARG1_IMM) && (src1w == TMP_REG1)) ? TMP_REG2 : TMP_REG1;
1893 		emit_op_mem(compiler, WORD_SIZE, src2_reg, src2, src2w, src2_reg);
1894 		src2w = src2_reg;
1895 	}
1896 	else
1897 		src2w = src2;
1898 
1899 	emit_op_imm(compiler, flags | GET_OPCODE(op), dst_reg, (sljit_uw)src1w, (sljit_uw)src2w);
1900 
1901 	if (!(dst & SLJIT_MEM))
1902 		return SLJIT_SUCCESS;
1903 	return emit_op_mem(compiler, WORD_SIZE | STORE, dst_reg, dst, dstw, TMP_REG2);
1904 }
1905 
sljit_emit_op2u(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1906 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
1907 	sljit_s32 src1, sljit_sw src1w,
1908 	sljit_s32 src2, sljit_sw src2w)
1909 {
1910 	CHECK_ERROR();
1911 	CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
1912 
1913 	SLJIT_SKIP_CHECKS(compiler);
1914 	return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w);
1915 }
1916 
sljit_emit_shift_into(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 src1_reg,sljit_s32 src2_reg,sljit_s32 src3,sljit_sw src3w)1917 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
1918 	sljit_s32 dst_reg,
1919 	sljit_s32 src1_reg,
1920 	sljit_s32 src2_reg,
1921 	sljit_s32 src3, sljit_sw src3w)
1922 {
1923 	sljit_s32 is_left;
1924 
1925 	CHECK_ERROR();
1926 	CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
1927 
1928 	op = GET_OPCODE(op);
1929 	is_left = (op == SLJIT_SHL || op == SLJIT_MSHL);
1930 
1931 	if (src1_reg == src2_reg) {
1932 		SLJIT_SKIP_CHECKS(compiler);
1933 		return sljit_emit_op2(compiler, is_left ? SLJIT_ROTL : SLJIT_ROTR, dst_reg, 0, src1_reg, 0, src3, src3w);
1934 	}
1935 
1936 	ADJUST_LOCAL_OFFSET(src3, src3w);
1937 
1938 	if (src3 == SLJIT_IMM) {
1939 		src3w &= 0x1f;
1940 
1941 		if (src3w == 0)
1942 			return SLJIT_SUCCESS;
1943 
1944 		if (IS_2_LO_REGS(dst_reg, src1_reg))
1945 			FAIL_IF(push_inst16(compiler, (is_left ? LSLSI : LSRSI) | RD3(dst_reg) | RN3(src1_reg) | ((sljit_ins)src3w << 6)));
1946 		else
1947 			FAIL_IF(push_inst32(compiler, (is_left ? LSL_WI : LSR_WI) | RD4(dst_reg) | RM4(src1_reg) | IMM5(src3w)));
1948 
1949 		src3w = (src3w ^ 0x1f) + 1;
1950 		return push_inst32(compiler, ORR_W | RD4(dst_reg) | RN4(dst_reg) | RM4(src2_reg) | (is_left ? 0x10 : 0x0) | IMM5(src3w));
1951 	}
1952 
1953 	if (src3 & SLJIT_MEM) {
1954 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, src3, src3w, TMP_REG2));
1955 		src3 = TMP_REG2;
1956 	}
1957 
1958 	if (op == SLJIT_MSHL || op == SLJIT_MLSHR || dst_reg == src3) {
1959 		FAIL_IF(push_inst32(compiler, ANDI | RD4(TMP_REG2) | RN4(src3) | 0x1f));
1960 		src3 = TMP_REG2;
1961 	}
1962 
1963 	if (dst_reg == src1_reg && IS_2_LO_REGS(dst_reg, src3))
1964 		FAIL_IF(push_inst16(compiler, (is_left ? LSLS : LSRS) | RD3(dst_reg) | RN3(src3)));
1965 	else
1966 		FAIL_IF(push_inst32(compiler, (is_left ? LSL_W : LSR_W) | RD4(dst_reg) | RN4(src1_reg) | RM4(src3)));
1967 
1968 	FAIL_IF(push_inst32(compiler, (is_left ? LSR_WI : LSL_WI) | RD4(TMP_REG1) | RM4(src2_reg) | (1 << 6)));
1969 	FAIL_IF(push_inst32(compiler, EORI | RD4(TMP_REG2) | RN4(src3) | 0x1f));
1970 	FAIL_IF(push_inst32(compiler, (is_left ? LSR_W : LSL_W) | RD4(TMP_REG1) | RN4(TMP_REG1) | RM4(TMP_REG2)));
1971 	return push_inst32(compiler, ORR_W | RD4(dst_reg) | RN4(dst_reg) | RM4(TMP_REG1));
1972 }
1973 
sljit_emit_op_src(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)1974 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
1975 	sljit_s32 src, sljit_sw srcw)
1976 {
1977 	CHECK_ERROR();
1978 	CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
1979 	ADJUST_LOCAL_OFFSET(src, srcw);
1980 
1981 	switch (op) {
1982 	case SLJIT_FAST_RETURN:
1983 		SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
1984 
1985 		if (FAST_IS_REG(src))
1986 			FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG2, src)));
1987 		else
1988 			FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, src, srcw, TMP_REG2));
1989 
1990 		return push_inst16(compiler, BX | RN3(TMP_REG2));
1991 	case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
1992 		return SLJIT_SUCCESS;
1993 	case SLJIT_PREFETCH_L1:
1994 	case SLJIT_PREFETCH_L2:
1995 	case SLJIT_PREFETCH_L3:
1996 	case SLJIT_PREFETCH_ONCE:
1997 		return emit_op_mem(compiler, PRELOAD, TMP_PC, src, srcw, TMP_REG1);
1998 	}
1999 
2000 	return SLJIT_SUCCESS;
2001 }
2002 
sljit_emit_op_dst(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw)2003 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
2004 	sljit_s32 dst, sljit_sw dstw)
2005 {
2006 	sljit_s32 size, dst_r;
2007 
2008 	CHECK_ERROR();
2009 	CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
2010 	ADJUST_LOCAL_OFFSET(dst, dstw);
2011 
2012 	switch (op) {
2013 	case SLJIT_FAST_ENTER:
2014 		SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
2015 
2016 		if (FAST_IS_REG(dst))
2017 			return push_inst16(compiler, MOV | SET_REGS44(dst, TMP_REG2));
2018 		break;
2019 	case SLJIT_GET_RETURN_ADDRESS:
2020 		size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds - SLJIT_KEPT_SAVEDS_COUNT(compiler->options), 0);
2021 
2022 		if (compiler->fsaveds > 0 || compiler->fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
2023 			/* The size of pc is not added above. */
2024 			if ((size & SSIZE_OF(sw)) == 0)
2025 				size += SSIZE_OF(sw);
2026 
2027 			size += GET_SAVED_FLOAT_REGISTERS_SIZE(compiler->fscratches, compiler->fsaveds, f64);
2028 		}
2029 
2030 		SLJIT_ASSERT(((compiler->local_size + size + SSIZE_OF(sw)) & 0x7) == 0);
2031 
2032 		dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
2033 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size + size, TMP_REG1));
2034 		break;
2035 	}
2036 
2037 	if (dst & SLJIT_MEM)
2038 		return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw, TMP_REG1);
2039 
2040 	return SLJIT_SUCCESS;
2041 }
2042 
sljit_get_register_index(sljit_s32 type,sljit_s32 reg)2043 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
2044 {
2045 	CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
2046 
2047 	if (type == SLJIT_GP_REGISTER)
2048 		return reg_map[reg];
2049 
2050 	if (type == SLJIT_FLOAT_REGISTER || type == SLJIT_SIMD_REG_64)
2051 		return freg_map[reg];
2052 
2053 	if (type != SLJIT_SIMD_REG_128)
2054 		return freg_map[reg] & ~0x1;
2055 
2056 	return -1;
2057 }
2058 
sljit_emit_op_custom(struct sljit_compiler * compiler,void * instruction,sljit_u32 size)2059 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2060 	void *instruction, sljit_u32 size)
2061 {
2062 	CHECK_ERROR();
2063 	CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2064 
2065 	if (size == 2)
2066 		return push_inst16(compiler, *(sljit_u16*)instruction);
2067 	return push_inst32(compiler, *(sljit_ins*)instruction);
2068 }
2069 
2070 /* --------------------------------------------------------------------- */
2071 /*  Floating point operators                                             */
2072 /* --------------------------------------------------------------------- */
2073 
2074 #define FPU_LOAD (1 << 20)
2075 
emit_fop_mem(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw)2076 static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
2077 {
2078 	sljit_uw imm;
2079 	sljit_ins inst = VSTR_F32 | (flags & (SLJIT_32 | FPU_LOAD));
2080 
2081 	SLJIT_ASSERT(arg & SLJIT_MEM);
2082 
2083 	/* Fast loads and stores. */
2084 	if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
2085 		FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(arg & REG_MASK) | RM4(OFFS_REG(arg)) | (((sljit_uw)argw & 0x3) << 6)));
2086 		arg = SLJIT_MEM | TMP_REG1;
2087 		argw = 0;
2088 	}
2089 
2090 	if ((arg & REG_MASK) && (argw & 0x3) == 0) {
2091 		if (!(argw & ~0x3fc))
2092 			return push_inst32(compiler, inst | 0x800000 | RN4(arg & REG_MASK) | VD4(reg) | ((sljit_uw)argw >> 2));
2093 		if (!(-argw & ~0x3fc))
2094 			return push_inst32(compiler, inst | RN4(arg & REG_MASK) | VD4(reg) | ((sljit_uw)-argw >> 2));
2095 	}
2096 
2097 	if (arg & REG_MASK) {
2098 		if (emit_set_delta(compiler, TMP_REG1, arg & REG_MASK, argw) != SLJIT_ERR_UNSUPPORTED) {
2099 			FAIL_IF(compiler->error);
2100 			return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | VD4(reg));
2101 		}
2102 
2103 		imm = get_imm((sljit_uw)argw & ~(sljit_uw)0x3fc);
2104 		if (imm != INVALID_IMM) {
2105 			FAIL_IF(push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(arg & REG_MASK) | imm));
2106 			return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | VD4(reg) | (((sljit_uw)argw & 0x3fc) >> 2));
2107 		}
2108 
2109 		imm = get_imm((sljit_uw)-argw & ~(sljit_uw)0x3fc);
2110 		if (imm != INVALID_IMM) {
2111 			argw = -argw;
2112 			FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(arg & REG_MASK) | imm));
2113 			return push_inst32(compiler, inst | RN4(TMP_REG1) | VD4(reg) | (((sljit_uw)argw & 0x3fc) >> 2));
2114 		}
2115 	}
2116 
2117 	FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)argw));
2118 	if (arg & REG_MASK)
2119 		FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, (arg & REG_MASK))));
2120 	return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | VD4(reg));
2121 }
2122 
sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2123 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
2124 	sljit_s32 dst, sljit_sw dstw,
2125 	sljit_s32 src, sljit_sw srcw)
2126 {
2127 	op ^= SLJIT_32;
2128 
2129 	if (src & SLJIT_MEM) {
2130 		FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src, srcw));
2131 		src = TMP_FREG1;
2132 	}
2133 
2134 	FAIL_IF(push_inst32(compiler, VCVT_S32_F32 | (op & SLJIT_32) | VD4(TMP_FREG1) | VM4(src)));
2135 
2136 	if (FAST_IS_REG(dst))
2137 		return push_inst32(compiler, VMOV | (1 << 20) | RT4(dst) | VN4(TMP_FREG1));
2138 
2139 	/* Store the integer value from a VFP register. */
2140 	return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw);
2141 }
2142 
sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2143 static sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_ins ins,
2144 	sljit_s32 dst, sljit_sw dstw,
2145 	sljit_s32 src, sljit_sw srcw)
2146 {
2147 	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2148 
2149 	if (FAST_IS_REG(src))
2150 		FAIL_IF(push_inst32(compiler, VMOV | RT4(src) | VN4(TMP_FREG1)));
2151 	else if (src & SLJIT_MEM) {
2152 		/* Load the integer value into a VFP register. */
2153 		FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw));
2154 	}
2155 	else {
2156 		FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw));
2157 		FAIL_IF(push_inst32(compiler, VMOV | RT4(TMP_REG1) | VN4(TMP_FREG1)));
2158 	}
2159 
2160 	FAIL_IF(push_inst32(compiler, ins | VD4(dst_r) | VM4(TMP_FREG1)));
2161 
2162 	if (dst & SLJIT_MEM)
2163 		return emit_fop_mem(compiler, (ins & SLJIT_32), TMP_FREG1, dst, dstw);
2164 	return SLJIT_SUCCESS;
2165 }
2166 
sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2167 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
2168 	sljit_s32 dst, sljit_sw dstw,
2169 	sljit_s32 src, sljit_sw srcw)
2170 {
2171 	return sljit_emit_fop1_conv_f64_from_w(compiler, VCVT_F32_S32 | (~op & SLJIT_32), dst, dstw, src, srcw);
2172 }
2173 
sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2174 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
2175 	sljit_s32 dst, sljit_sw dstw,
2176 	sljit_s32 src, sljit_sw srcw)
2177 {
2178 	return sljit_emit_fop1_conv_f64_from_w(compiler, VCVT_F32_U32 | (~op & SLJIT_32), dst, dstw, src, srcw);
2179 }
2180 
sljit_emit_fop1_cmp(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2181 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
2182 	sljit_s32 src1, sljit_sw src1w,
2183 	sljit_s32 src2, sljit_sw src2w)
2184 {
2185 	op ^= SLJIT_32;
2186 
2187 	if (src1 & SLJIT_MEM) {
2188 		FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w));
2189 		src1 = TMP_FREG1;
2190 	}
2191 
2192 	if (src2 & SLJIT_MEM) {
2193 		FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w));
2194 		src2 = TMP_FREG2;
2195 	}
2196 
2197 	FAIL_IF(push_inst32(compiler, VCMP_F32 | (op & SLJIT_32) | VD4(src1) | VM4(src2)));
2198 	FAIL_IF(push_inst32(compiler, VMRS));
2199 
2200 	if (GET_FLAG_TYPE(op) != SLJIT_UNORDERED_OR_EQUAL)
2201 		return SLJIT_SUCCESS;
2202 
2203 	FAIL_IF(push_inst16(compiler, IT | (0x6 << 4) | 0x8));
2204 	return push_inst16(compiler, CMP /* Rm, Rn = r0 */);
2205 }
2206 
sljit_emit_fop1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2207 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
2208 	sljit_s32 dst, sljit_sw dstw,
2209 	sljit_s32 src, sljit_sw srcw)
2210 {
2211 	sljit_s32 dst_r;
2212 
2213 	CHECK_ERROR();
2214 
2215 	SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100), float_transfer_bit_error);
2216 	SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2217 
2218 	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2219 
2220 	if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32)
2221 		op ^= SLJIT_32;
2222 
2223 	if (src & SLJIT_MEM) {
2224 		FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, dst_r, src, srcw));
2225 		src = dst_r;
2226 	}
2227 
2228 	switch (GET_OPCODE(op)) {
2229 	case SLJIT_MOV_F64:
2230 		if (src != dst_r) {
2231 			if (dst_r != TMP_FREG1)
2232 				FAIL_IF(push_inst32(compiler, VMOV_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src)));
2233 			else
2234 				dst_r = src;
2235 		}
2236 		break;
2237 	case SLJIT_NEG_F64:
2238 		FAIL_IF(push_inst32(compiler, VNEG_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src)));
2239 		break;
2240 	case SLJIT_ABS_F64:
2241 		FAIL_IF(push_inst32(compiler, VABS_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src)));
2242 		break;
2243 	case SLJIT_CONV_F64_FROM_F32:
2244 		FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src)));
2245 		op ^= SLJIT_32;
2246 		break;
2247 	}
2248 
2249 	if (dst & SLJIT_MEM)
2250 		return emit_fop_mem(compiler, (op & SLJIT_32), dst_r, dst, dstw);
2251 	return SLJIT_SUCCESS;
2252 }
2253 
sljit_emit_fop2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2254 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
2255 	sljit_s32 dst, sljit_sw dstw,
2256 	sljit_s32 src1, sljit_sw src1w,
2257 	sljit_s32 src2, sljit_sw src2w)
2258 {
2259 	sljit_s32 dst_r;
2260 
2261 	CHECK_ERROR();
2262 	CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2263 	ADJUST_LOCAL_OFFSET(dst, dstw);
2264 	ADJUST_LOCAL_OFFSET(src1, src1w);
2265 	ADJUST_LOCAL_OFFSET(src2, src2w);
2266 
2267 	op ^= SLJIT_32;
2268 
2269 	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2270 	if (src1 & SLJIT_MEM) {
2271 		FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w));
2272 		src1 = TMP_FREG1;
2273 	}
2274 	if (src2 & SLJIT_MEM) {
2275 		FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w));
2276 		src2 = TMP_FREG2;
2277 	}
2278 
2279 	switch (GET_OPCODE(op)) {
2280 	case SLJIT_ADD_F64:
2281 		FAIL_IF(push_inst32(compiler, VADD_F32 | (op & SLJIT_32) | VD4(dst_r) | VN4(src1) | VM4(src2)));
2282 		break;
2283 	case SLJIT_SUB_F64:
2284 		FAIL_IF(push_inst32(compiler, VSUB_F32 | (op & SLJIT_32) | VD4(dst_r) | VN4(src1) | VM4(src2)));
2285 		break;
2286 	case SLJIT_MUL_F64:
2287 		FAIL_IF(push_inst32(compiler, VMUL_F32 | (op & SLJIT_32) | VD4(dst_r) | VN4(src1) | VM4(src2)));
2288 		break;
2289 	case SLJIT_DIV_F64:
2290 		FAIL_IF(push_inst32(compiler, VDIV_F32 | (op & SLJIT_32) | VD4(dst_r) | VN4(src1) | VM4(src2)));
2291 		break;
2292 	case SLJIT_COPYSIGN_F64:
2293 		FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | VN4(src2) | RT4(TMP_REG1) | ((op & SLJIT_32) ? (1 << 7) : 0)));
2294 		FAIL_IF(push_inst32(compiler, VABS_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src1)));
2295 		FAIL_IF(push_inst32(compiler, CMPI_W | RN4(TMP_REG1) | 0));
2296 		FAIL_IF(push_inst16(compiler, IT | (0xb << 4) | 0x8));
2297 		return push_inst32(compiler, VNEG_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(dst_r));
2298 	}
2299 
2300 	if (!(dst & SLJIT_MEM))
2301 		return SLJIT_SUCCESS;
2302 	return emit_fop_mem(compiler, (op & SLJIT_32), TMP_FREG1, dst, dstw);
2303 }
2304 
sljit_emit_fset32(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f32 value)2305 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
2306 	sljit_s32 freg, sljit_f32 value)
2307 {
2308 #if defined(__ARM_NEON) && __ARM_NEON
2309 	sljit_u32 exp;
2310 	sljit_ins ins;
2311 #endif /* NEON */
2312 	union {
2313 		sljit_u32 imm;
2314 		sljit_f32 value;
2315 	} u;
2316 
2317 	CHECK_ERROR();
2318 	CHECK(check_sljit_emit_fset32(compiler, freg, value));
2319 
2320 	u.value = value;
2321 
2322 #if defined(__ARM_NEON) && __ARM_NEON
2323 	if ((u.imm << (32 - 19)) == 0) {
2324 		exp = (u.imm >> (23 + 2)) & 0x3f;
2325 
2326 		if (exp == 0x20 || exp == 0x1f) {
2327 			ins = ((u.imm >> 24) & 0x80) | ((u.imm >> 19) & 0x7f);
2328 			return push_inst32(compiler, (VMOV_F32 ^ (1 << 6)) | ((ins & 0xf0) << 12) | VD4(freg) | (ins & 0xf));
2329 		}
2330 	}
2331 #endif /* NEON */
2332 
2333 	FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm));
2334 	return push_inst32(compiler, VMOV | VN4(freg) | RT4(TMP_REG1));
2335 }
2336 
sljit_emit_fset64(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f64 value)2337 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
2338 	sljit_s32 freg, sljit_f64 value)
2339 {
2340 #if defined(__ARM_NEON) && __ARM_NEON
2341 	sljit_u32 exp;
2342 	sljit_ins ins;
2343 #endif /* NEON */
2344 	union {
2345 		sljit_u32 imm[2];
2346 		sljit_f64 value;
2347 	} u;
2348 
2349 	CHECK_ERROR();
2350 	CHECK(check_sljit_emit_fset64(compiler, freg, value));
2351 
2352 	u.value = value;
2353 
2354 #if defined(__ARM_NEON) && __ARM_NEON
2355 	if (u.imm[0] == 0 && (u.imm[1] << (64 - 48)) == 0) {
2356 		exp = (u.imm[1] >> ((52 - 32) + 2)) & 0x1ff;
2357 
2358 		if (exp == 0x100 || exp == 0xff) {
2359 			ins = ((u.imm[1] >> (56 - 32)) & 0x80) | ((u.imm[1] >> (48 - 32)) & 0x7f);
2360 			return push_inst32(compiler, (VMOV_F32 ^ (1 << 6)) | (1 << 8) | ((ins & 0xf0) << 12) | VD4(freg) | (ins & 0xf));
2361 		}
2362 	}
2363 #endif /* NEON */
2364 
2365 	FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm[0]));
2366 	if (u.imm[0] == u.imm[1])
2367 		return push_inst32(compiler, VMOV2 | RN4(TMP_REG1) | RT4(TMP_REG1) | VM4(freg));
2368 
2369 	FAIL_IF(load_immediate(compiler, TMP_REG2, u.imm[1]));
2370 	return push_inst32(compiler, VMOV2 | RN4(TMP_REG2) | RT4(TMP_REG1) | VM4(freg));
2371 }
2372 
sljit_emit_fcopy(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 freg,sljit_s32 reg)2373 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
2374 	sljit_s32 freg, sljit_s32 reg)
2375 {
2376 	sljit_s32 reg2;
2377 	sljit_ins inst;
2378 
2379 	CHECK_ERROR();
2380 	CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
2381 
2382 	if (reg & REG_PAIR_MASK) {
2383 		reg2 = REG_PAIR_SECOND(reg);
2384 		reg = REG_PAIR_FIRST(reg);
2385 
2386 		inst = VMOV2 | RN4(reg) | RT4(reg2) | VM4(freg);
2387 	} else {
2388 		inst = VMOV | VN4(freg) | RT4(reg);
2389 
2390 		if (!(op & SLJIT_32))
2391 			inst |= 1 << 7;
2392 	}
2393 
2394 	if (GET_OPCODE(op) == SLJIT_COPY_FROM_F64)
2395 		inst |= 1 << 20;
2396 
2397 	return push_inst32(compiler, inst);
2398 }
2399 
2400 /* --------------------------------------------------------------------- */
2401 /*  Conditional instructions                                             */
2402 /* --------------------------------------------------------------------- */
2403 
get_cc(struct sljit_compiler * compiler,sljit_s32 type)2404 static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type)
2405 {
2406 	switch (type) {
2407 	case SLJIT_EQUAL:
2408 	case SLJIT_ATOMIC_STORED:
2409 	case SLJIT_F_EQUAL:
2410 	case SLJIT_ORDERED_EQUAL:
2411 	case SLJIT_UNORDERED_OR_EQUAL:
2412 		return 0x0;
2413 
2414 	case SLJIT_NOT_EQUAL:
2415 	case SLJIT_ATOMIC_NOT_STORED:
2416 	case SLJIT_F_NOT_EQUAL:
2417 	case SLJIT_UNORDERED_OR_NOT_EQUAL:
2418 	case SLJIT_ORDERED_NOT_EQUAL:
2419 		return 0x1;
2420 
2421 	case SLJIT_CARRY:
2422 		if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD)
2423 			return 0x2;
2424 		/* fallthrough */
2425 
2426 	case SLJIT_LESS:
2427 		return 0x3;
2428 
2429 	case SLJIT_NOT_CARRY:
2430 		if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD)
2431 			return 0x3;
2432 		/* fallthrough */
2433 
2434 	case SLJIT_GREATER_EQUAL:
2435 		return 0x2;
2436 
2437 	case SLJIT_GREATER:
2438 	case SLJIT_UNORDERED_OR_GREATER:
2439 		return 0x8;
2440 
2441 	case SLJIT_LESS_EQUAL:
2442 	case SLJIT_F_LESS_EQUAL:
2443 	case SLJIT_ORDERED_LESS_EQUAL:
2444 		return 0x9;
2445 
2446 	case SLJIT_SIG_LESS:
2447 	case SLJIT_UNORDERED_OR_LESS:
2448 		return 0xb;
2449 
2450 	case SLJIT_SIG_GREATER_EQUAL:
2451 	case SLJIT_F_GREATER_EQUAL:
2452 	case SLJIT_ORDERED_GREATER_EQUAL:
2453 		return 0xa;
2454 
2455 	case SLJIT_SIG_GREATER:
2456 	case SLJIT_F_GREATER:
2457 	case SLJIT_ORDERED_GREATER:
2458 		return 0xc;
2459 
2460 	case SLJIT_SIG_LESS_EQUAL:
2461 	case SLJIT_UNORDERED_OR_LESS_EQUAL:
2462 		return 0xd;
2463 
2464 	case SLJIT_OVERFLOW:
2465 		if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)))
2466 			return 0x1;
2467 		/* fallthrough */
2468 
2469 	case SLJIT_UNORDERED:
2470 		return 0x6;
2471 
2472 	case SLJIT_NOT_OVERFLOW:
2473 		if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)))
2474 			return 0x0;
2475 		/* fallthrough */
2476 
2477 	case SLJIT_ORDERED:
2478 		return 0x7;
2479 
2480 	case SLJIT_F_LESS:
2481 	case SLJIT_ORDERED_LESS:
2482 		return 0x4;
2483 
2484 	case SLJIT_UNORDERED_OR_GREATER_EQUAL:
2485 		return 0x5;
2486 
2487 	default: /* SLJIT_JUMP */
2488 		SLJIT_UNREACHABLE();
2489 		return 0xe;
2490 	}
2491 }
2492 
sljit_emit_label(struct sljit_compiler * compiler)2493 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2494 {
2495 	struct sljit_label *label;
2496 
2497 	CHECK_ERROR_PTR();
2498 	CHECK_PTR(check_sljit_emit_label(compiler));
2499 
2500 	if (compiler->last_label && compiler->last_label->size == compiler->size)
2501 		return compiler->last_label;
2502 
2503 	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2504 	PTR_FAIL_IF(!label);
2505 	set_label(label, compiler);
2506 	return label;
2507 }
2508 
sljit_emit_jump(struct sljit_compiler * compiler,sljit_s32 type)2509 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
2510 {
2511 	struct sljit_jump *jump;
2512 	sljit_ins cc;
2513 
2514 	CHECK_ERROR_PTR();
2515 	CHECK_PTR(check_sljit_emit_jump(compiler, type));
2516 
2517 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2518 	PTR_FAIL_IF(!jump);
2519 	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2520 	type &= 0xff;
2521 
2522 	PTR_FAIL_IF(emit_imm32_const(compiler, TMP_REG1, 0));
2523 	if (type < SLJIT_JUMP) {
2524 		jump->flags |= IS_COND;
2525 		cc = get_cc(compiler, type);
2526 		jump->flags |= cc << 8;
2527 		PTR_FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
2528 	}
2529 
2530 	jump->addr = compiler->size;
2531 	if (type <= SLJIT_JUMP)
2532 		PTR_FAIL_IF(push_inst16(compiler, BX | RN3(TMP_REG1)));
2533 	else {
2534 		jump->flags |= IS_BL;
2535 		PTR_FAIL_IF(push_inst16(compiler, BLX | RN3(TMP_REG1)));
2536 	}
2537 
2538 	return jump;
2539 }
2540 
2541 #ifdef __SOFTFP__
2542 
softfloat_call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types,sljit_s32 * src,sljit_u32 * extra_space)2543 static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src, sljit_u32 *extra_space)
2544 {
2545 	sljit_u32 is_tail_call = *extra_space & SLJIT_CALL_RETURN;
2546 	sljit_u32 offset = 0;
2547 	sljit_u32 word_arg_offset = 0;
2548 	sljit_u32 float_arg_count = 0;
2549 	sljit_s32 types = 0;
2550 	sljit_u32 src_offset = 4 * sizeof(sljit_sw);
2551 	sljit_u8 offsets[4];
2552 	sljit_u8 *offset_ptr = offsets;
2553 
2554 	if (src && FAST_IS_REG(*src))
2555 		src_offset = (sljit_u32)reg_map[*src] * sizeof(sljit_sw);
2556 
2557 	arg_types >>= SLJIT_ARG_SHIFT;
2558 
2559 	while (arg_types) {
2560 		types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK);
2561 
2562 		switch (arg_types & SLJIT_ARG_MASK) {
2563 		case SLJIT_ARG_TYPE_F64:
2564 			if (offset & 0x7)
2565 				offset += sizeof(sljit_sw);
2566 			*offset_ptr++ = (sljit_u8)offset;
2567 			offset += sizeof(sljit_f64);
2568 			float_arg_count++;
2569 			break;
2570 		case SLJIT_ARG_TYPE_F32:
2571 			*offset_ptr++ = (sljit_u8)offset;
2572 			offset += sizeof(sljit_f32);
2573 			float_arg_count++;
2574 			break;
2575 		default:
2576 			*offset_ptr++ = (sljit_u8)offset;
2577 			offset += sizeof(sljit_sw);
2578 			word_arg_offset += sizeof(sljit_sw);
2579 			break;
2580 		}
2581 
2582 		arg_types >>= SLJIT_ARG_SHIFT;
2583 	}
2584 
2585 	if (offset > 4 * sizeof(sljit_sw) && (!is_tail_call || offset > compiler->args_size)) {
2586 		/* Keep lr register on the stack. */
2587 		if (is_tail_call)
2588 			offset += sizeof(sljit_sw);
2589 
2590 		offset = ((offset - 4 * sizeof(sljit_sw)) + 0x7) & ~(sljit_uw)0x7;
2591 
2592 		*extra_space = offset;
2593 
2594 		if (is_tail_call)
2595 			FAIL_IF(emit_stack_frame_release(compiler, (sljit_s32)offset));
2596 		else
2597 			FAIL_IF(push_inst16(compiler, SUB_SP_I | (offset >> 2)));
2598 	} else {
2599 		if (is_tail_call)
2600 			FAIL_IF(emit_stack_frame_release(compiler, -1));
2601 		*extra_space = 0;
2602 	}
2603 
2604 	SLJIT_ASSERT(reg_map[TMP_REG1] == 12);
2605 
2606 	/* Process arguments in reversed direction. */
2607 	while (types) {
2608 		switch (types & SLJIT_ARG_MASK) {
2609 		case SLJIT_ARG_TYPE_F64:
2610 			float_arg_count--;
2611 			offset = *(--offset_ptr);
2612 
2613 			SLJIT_ASSERT((offset & 0x7) == 0);
2614 
2615 			if (offset < 4 * sizeof(sljit_sw)) {
2616 				if (src_offset == offset || src_offset == offset + sizeof(sljit_sw)) {
2617 					FAIL_IF(push_inst16(compiler, MOV | (src_offset << 1) | 4 | (1 << 7)));
2618 					*src = TMP_REG1;
2619 				}
2620 				FAIL_IF(push_inst32(compiler, VMOV2 | 0x100000 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count));
2621 			} else
2622 				FAIL_IF(push_inst32(compiler, VSTR_F32 | 0x800100 | RN4(SLJIT_SP)
2623 						| (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2)));
2624 			break;
2625 		case SLJIT_ARG_TYPE_F32:
2626 			float_arg_count--;
2627 			offset = *(--offset_ptr);
2628 
2629 			if (offset < 4 * sizeof(sljit_sw)) {
2630 				if (src_offset == offset) {
2631 					FAIL_IF(push_inst16(compiler, MOV | (src_offset << 1) | 4 | (1 << 7)));
2632 					*src = TMP_REG1;
2633 				}
2634 				FAIL_IF(push_inst32(compiler, VMOV | 0x100000 | (float_arg_count << 16) | (offset << 10)));
2635 			} else
2636 				FAIL_IF(push_inst32(compiler, VSTR_F32 | 0x800000 | RN4(SLJIT_SP)
2637 						| (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2)));
2638 			break;
2639 		default:
2640 			word_arg_offset -= sizeof(sljit_sw);
2641 			offset = *(--offset_ptr);
2642 
2643 			SLJIT_ASSERT(offset >= word_arg_offset);
2644 
2645 			if (offset != word_arg_offset) {
2646 				if (offset < 4 * sizeof(sljit_sw)) {
2647 					if (src_offset == offset) {
2648 						FAIL_IF(push_inst16(compiler, MOV | (src_offset << 1) | 4 | (1 << 7)));
2649 						*src = TMP_REG1;
2650 					}
2651 					else if (src_offset == word_arg_offset) {
2652 						*src = (sljit_s32)(1 + (offset >> 2));
2653 						src_offset = offset;
2654 					}
2655 					FAIL_IF(push_inst16(compiler, MOV | (offset >> 2) | (word_arg_offset << 1)));
2656 				} else
2657 					FAIL_IF(push_inst16(compiler, STR_SP | (word_arg_offset << 6) | ((offset - 4 * sizeof(sljit_sw)) >> 2)));
2658 			}
2659 			break;
2660 		}
2661 
2662 		types >>= SLJIT_ARG_SHIFT;
2663 	}
2664 
2665 	return SLJIT_SUCCESS;
2666 }
2667 
softfloat_post_call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types)2668 static sljit_s32 softfloat_post_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)
2669 {
2670 	if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F64)
2671 		FAIL_IF(push_inst32(compiler, VMOV2 | (1 << 16) | (0 << 12) | 0));
2672 	if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32)
2673 		FAIL_IF(push_inst32(compiler, VMOV | (0 << 16) | (0 << 12)));
2674 
2675 	return SLJIT_SUCCESS;
2676 }
2677 
2678 #else
2679 
hardfloat_call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types)2680 static sljit_s32 hardfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)
2681 {
2682 	sljit_u32 offset = SLJIT_FR0;
2683 	sljit_u32 new_offset = SLJIT_FR0;
2684 	sljit_u32 f32_offset = 0;
2685 
2686 	/* Remove return value. */
2687 	arg_types >>= SLJIT_ARG_SHIFT;
2688 
2689 	while (arg_types) {
2690 		switch (arg_types & SLJIT_ARG_MASK) {
2691 		case SLJIT_ARG_TYPE_F64:
2692 			if (offset != new_offset)
2693 				FAIL_IF(push_inst32(compiler, VMOV_F32 | SLJIT_32 | VD4(new_offset) | VM4(offset)));
2694 
2695 			new_offset++;
2696 			offset++;
2697 			break;
2698 		case SLJIT_ARG_TYPE_F32:
2699 			if (f32_offset != 0) {
2700 				FAIL_IF(push_inst32(compiler, VMOV_F32 | 0x400000 | VD4(f32_offset) | VM4(offset)));
2701 				f32_offset = 0;
2702 			} else {
2703 				if (offset != new_offset)
2704 					FAIL_IF(push_inst32(compiler, VMOV_F32 | 0x400000 | VD4(new_offset) | VM4(offset)));
2705 				f32_offset = new_offset;
2706 				new_offset++;
2707 			}
2708 			offset++;
2709 			break;
2710 		}
2711 		arg_types >>= SLJIT_ARG_SHIFT;
2712 	}
2713 
2714 	return SLJIT_SUCCESS;
2715 }
2716 
2717 #endif
2718 
sljit_emit_call(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types)2719 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
2720 	sljit_s32 arg_types)
2721 {
2722 #ifdef __SOFTFP__
2723 	struct sljit_jump *jump;
2724 	sljit_u32 extra_space = (sljit_u32)type;
2725 #endif
2726 
2727 	CHECK_ERROR_PTR();
2728 	CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
2729 
2730 #ifdef __SOFTFP__
2731 	if ((type & 0xff) != SLJIT_CALL_REG_ARG) {
2732 		PTR_FAIL_IF(softfloat_call_with_args(compiler, arg_types, NULL, &extra_space));
2733 		SLJIT_ASSERT((extra_space & 0x7) == 0);
2734 
2735 		if ((type & SLJIT_CALL_RETURN) && extra_space == 0)
2736 			type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
2737 
2738 		SLJIT_SKIP_CHECKS(compiler);
2739 		jump = sljit_emit_jump(compiler, type);
2740 		PTR_FAIL_IF(jump == NULL);
2741 
2742 		if (extra_space > 0) {
2743 			if (type & SLJIT_CALL_RETURN)
2744 				PTR_FAIL_IF(push_inst32(compiler, LDR | RT4(TMP_REG2)
2745 					| RN4(SLJIT_SP) | (extra_space - sizeof(sljit_sw))));
2746 
2747 			PTR_FAIL_IF(push_inst16(compiler, ADD_SP_I | (extra_space >> 2)));
2748 
2749 			if (type & SLJIT_CALL_RETURN) {
2750 				PTR_FAIL_IF(push_inst16(compiler, BX | RN3(TMP_REG2)));
2751 				return jump;
2752 			}
2753 		}
2754 
2755 		SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN));
2756 		PTR_FAIL_IF(softfloat_post_call_with_args(compiler, arg_types));
2757 		return jump;
2758 	}
2759 #endif /* __SOFTFP__ */
2760 
2761 	if (type & SLJIT_CALL_RETURN) {
2762 		/* ldmia sp!, {..., lr} */
2763 		PTR_FAIL_IF(emit_stack_frame_release(compiler, -1));
2764 		type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
2765 	}
2766 
2767 #ifndef __SOFTFP__
2768 	if ((type & 0xff) != SLJIT_CALL_REG_ARG)
2769 		PTR_FAIL_IF(hardfloat_call_with_args(compiler, arg_types));
2770 #endif /* !__SOFTFP__ */
2771 
2772 	SLJIT_SKIP_CHECKS(compiler);
2773 	return sljit_emit_jump(compiler, type);
2774 }
2775 
sljit_emit_ijump(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 src,sljit_sw srcw)2776 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
2777 {
2778 	struct sljit_jump *jump;
2779 
2780 	CHECK_ERROR();
2781 	CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
2782 	ADJUST_LOCAL_OFFSET(src, srcw);
2783 
2784 	SLJIT_ASSERT(reg_map[TMP_REG1] != 14);
2785 
2786 	if (src != SLJIT_IMM) {
2787 		if (FAST_IS_REG(src)) {
2788 			SLJIT_ASSERT(reg_map[src] != 14);
2789 			return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(src));
2790 		}
2791 
2792 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, src, srcw, TMP_REG1));
2793 		if (type >= SLJIT_FAST_CALL)
2794 			return push_inst16(compiler, BLX | RN3(TMP_REG1));
2795 	}
2796 
2797 	/* These jumps are converted to jump/call instructions when possible. */
2798 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2799 	FAIL_IF(!jump);
2800 	set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0));
2801 	jump->u.target = (sljit_uw)srcw;
2802 
2803 	FAIL_IF(emit_imm32_const(compiler, TMP_REG1, 0));
2804 	jump->addr = compiler->size;
2805 	return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(TMP_REG1));
2806 }
2807 
sljit_emit_icall(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)2808 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
2809 	sljit_s32 arg_types,
2810 	sljit_s32 src, sljit_sw srcw)
2811 {
2812 #ifdef __SOFTFP__
2813 	sljit_u32 extra_space = (sljit_u32)type;
2814 #endif
2815 
2816 	CHECK_ERROR();
2817 	CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
2818 
2819 	if (src & SLJIT_MEM) {
2820 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
2821 		src = TMP_REG1;
2822 	}
2823 
2824 	if ((type & SLJIT_CALL_RETURN) && (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options)))) {
2825 		FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, src)));
2826 		src = TMP_REG1;
2827 	}
2828 
2829 #ifdef __SOFTFP__
2830 	if ((type & 0xff) != SLJIT_CALL_REG_ARG) {
2831 		FAIL_IF(softfloat_call_with_args(compiler, arg_types, &src, &extra_space));
2832 		SLJIT_ASSERT((extra_space & 0x7) == 0);
2833 
2834 		if ((type & SLJIT_CALL_RETURN) && extra_space == 0)
2835 			type = SLJIT_JUMP;
2836 
2837 		SLJIT_SKIP_CHECKS(compiler);
2838 		FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
2839 
2840 		if (extra_space > 0) {
2841 			if (type & SLJIT_CALL_RETURN)
2842 				FAIL_IF(push_inst32(compiler, LDR | RT4(TMP_REG2)
2843 					| RN4(SLJIT_SP) | (extra_space - sizeof(sljit_sw))));
2844 
2845 			FAIL_IF(push_inst16(compiler, ADD_SP_I | (extra_space >> 2)));
2846 
2847 			if (type & SLJIT_CALL_RETURN)
2848 				return push_inst16(compiler, BX | RN3(TMP_REG2));
2849 		}
2850 
2851 		SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN));
2852 		return softfloat_post_call_with_args(compiler, arg_types);
2853 	}
2854 #endif /* __SOFTFP__ */
2855 
2856 	if (type & SLJIT_CALL_RETURN) {
2857 		/* ldmia sp!, {..., lr} */
2858 		FAIL_IF(emit_stack_frame_release(compiler, -1));
2859 		type = SLJIT_JUMP;
2860 	}
2861 
2862 #ifndef __SOFTFP__
2863 	if ((type & 0xff) != SLJIT_CALL_REG_ARG)
2864 		FAIL_IF(hardfloat_call_with_args(compiler, arg_types));
2865 #endif /* !__SOFTFP__ */
2866 
2867 	SLJIT_SKIP_CHECKS(compiler);
2868 	return sljit_emit_ijump(compiler, type, src, srcw);
2869 }
2870 
2871 #ifdef __SOFTFP__
2872 
emit_fmov_before_return(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)2873 static SLJIT_INLINE sljit_s32 emit_fmov_before_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
2874 {
2875 	if (compiler->options & SLJIT_ENTER_REG_ARG) {
2876 		if (src == SLJIT_FR0)
2877 			return SLJIT_SUCCESS;
2878 
2879 		SLJIT_SKIP_CHECKS(compiler);
2880 		return sljit_emit_fop1(compiler, op, SLJIT_RETURN_FREG, 0, src, srcw);
2881 	}
2882 
2883 	if (FAST_IS_REG(src)) {
2884 		if (op & SLJIT_32)
2885 			return push_inst32(compiler, VMOV | (1 << 20) | VN4(src) | RT4(SLJIT_R0));
2886 		return push_inst32(compiler, VMOV2 | (1 << 20) | VM4(src) | RT4(SLJIT_R0) | RN4(SLJIT_R1));
2887 	}
2888 
2889 	SLJIT_SKIP_CHECKS(compiler);
2890 
2891 	if (op & SLJIT_32)
2892 		return sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, src, srcw);
2893 	return sljit_emit_mem(compiler, SLJIT_MOV, SLJIT_REG_PAIR(SLJIT_R0, SLJIT_R1), src, srcw);
2894 }
2895 
2896 #endif /* __SOFTFP__ */
2897 
sljit_emit_op_flags(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 type)2898 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
2899 	sljit_s32 dst, sljit_sw dstw,
2900 	sljit_s32 type)
2901 {
2902 	sljit_s32 dst_r, flags = GET_ALL_FLAGS(op);
2903 	sljit_ins cc;
2904 
2905 	CHECK_ERROR();
2906 	CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
2907 	ADJUST_LOCAL_OFFSET(dst, dstw);
2908 
2909 	op = GET_OPCODE(op);
2910 	cc = get_cc(compiler, type);
2911 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
2912 
2913 	if (op < SLJIT_ADD) {
2914 		FAIL_IF(push_inst16(compiler, IT | (cc << 4) | (((cc & 0x1) ^ 0x1) << 3) | 0x4));
2915 		if (reg_map[dst_r] > 7) {
2916 			FAIL_IF(push_inst32(compiler, MOV_WI | RD4(dst_r) | 1));
2917 			FAIL_IF(push_inst32(compiler, MOV_WI | RD4(dst_r) | 0));
2918 		} else {
2919 			/* The movsi (immediate) instruction does not set flags in IT block. */
2920 			FAIL_IF(push_inst16(compiler, MOVSI | RDN3(dst_r) | 1));
2921 			FAIL_IF(push_inst16(compiler, MOVSI | RDN3(dst_r) | 0));
2922 		}
2923 		if (!(dst & SLJIT_MEM))
2924 			return SLJIT_SUCCESS;
2925 		return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG1, dst, dstw, TMP_REG2);
2926 	}
2927 
2928 	if (dst & SLJIT_MEM)
2929 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2));
2930 
2931 	if (op == SLJIT_AND) {
2932 		FAIL_IF(push_inst16(compiler, IT | (cc << 4) | (((cc & 0x1) ^ 0x1) << 3) | 0x4));
2933 		FAIL_IF(push_inst32(compiler, ANDI | RN4(dst_r) | RD4(dst_r) | 1));
2934 		FAIL_IF(push_inst32(compiler, ANDI | RN4(dst_r) | RD4(dst_r) | 0));
2935 	}
2936 	else {
2937 		FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
2938 		FAIL_IF(push_inst32(compiler, ((op == SLJIT_OR) ? ORRI : EORI) | RN4(dst_r) | RD4(dst_r) | 1));
2939 	}
2940 
2941 	if (dst & SLJIT_MEM)
2942 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG1, dst, dstw, TMP_REG2));
2943 
2944 	if (!(flags & SLJIT_SET_Z))
2945 		return SLJIT_SUCCESS;
2946 
2947 	/* The condition must always be set, even if the ORR/EORI is not executed above. */
2948 	return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(TMP_REG1) | RM4(dst_r));
2949 }
2950 
sljit_emit_select(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_reg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2_reg)2951 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
2952 	sljit_s32 dst_reg,
2953 	sljit_s32 src1, sljit_sw src1w,
2954 	sljit_s32 src2_reg)
2955 {
2956 	sljit_uw cc, tmp;
2957 
2958 	CHECK_ERROR();
2959 	CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
2960 
2961 	ADJUST_LOCAL_OFFSET(src1, src1w);
2962 
2963 	if (src2_reg != dst_reg && src1 == dst_reg) {
2964 		src1 = src2_reg;
2965 		src1w = 0;
2966 		src2_reg = dst_reg;
2967 		type ^= 0x1;
2968 	}
2969 
2970 	if (src1 & SLJIT_MEM) {
2971 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE, (src2_reg != dst_reg) ? dst_reg : TMP_REG1, src1, src1w, TMP_REG2));
2972 
2973 		if (src2_reg != dst_reg) {
2974 			src1 = src2_reg;
2975 			src1w = 0;
2976 			type ^= 0x1;
2977 		} else {
2978 			src1 = TMP_REG1;
2979 			src1w = 0;
2980 		}
2981 	} else if (dst_reg != src2_reg)
2982 		FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(dst_reg, src2_reg)));
2983 
2984 	cc = get_cc(compiler, type & ~SLJIT_32);
2985 
2986 	if (src1 != SLJIT_IMM) {
2987 		FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
2988 		return push_inst16(compiler, MOV | SET_REGS44(dst_reg, src1));
2989 	}
2990 
2991 	tmp = (sljit_uw)src1w;
2992 
2993 	if (tmp < 0x10000) {
2994 		/* set low 16 bits, set hi 16 bits to 0. */
2995 		FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
2996 		return push_inst32(compiler, MOVW | RD4(dst_reg)
2997 			| COPY_BITS(tmp, 12, 16, 4) | COPY_BITS(tmp, 11, 26, 1) | COPY_BITS(tmp, 8, 12, 3) | (tmp & 0xff));
2998 	}
2999 
3000 	tmp = get_imm((sljit_uw)src1w);
3001 	if (tmp != INVALID_IMM) {
3002 		FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
3003 		return push_inst32(compiler, MOV_WI | RD4(dst_reg) | tmp);
3004 	}
3005 
3006 	tmp = get_imm(~(sljit_uw)src1w);
3007 	if (tmp != INVALID_IMM) {
3008 		FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
3009 		return push_inst32(compiler, MVN_WI | RD4(dst_reg) | tmp);
3010 	}
3011 
3012 	FAIL_IF(push_inst16(compiler, IT | (cc << 4) | ((cc & 0x1) << 3) | 0x4));
3013 
3014 	tmp = (sljit_uw)src1w;
3015 	FAIL_IF(push_inst32(compiler, MOVW | RD4(dst_reg)
3016 		| COPY_BITS(tmp, 12, 16, 4) | COPY_BITS(tmp, 11, 26, 1) | COPY_BITS(tmp, 8, 12, 3) | (tmp & 0xff)));
3017 	return push_inst32(compiler, MOVT | RD4(dst_reg)
3018 		| COPY_BITS(tmp, 12 + 16, 16, 4) | COPY_BITS(tmp, 11 + 16, 26, 1) | COPY_BITS(tmp, 8 + 16, 12, 3) | ((tmp & 0xff0000) >> 16));
3019 }
3020 
sljit_emit_fselect(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_freg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2_freg)3021 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
3022 	sljit_s32 dst_freg,
3023 	sljit_s32 src1, sljit_sw src1w,
3024 	sljit_s32 src2_freg)
3025 {
3026 	CHECK_ERROR();
3027 	CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
3028 
3029 	ADJUST_LOCAL_OFFSET(src1, src1w);
3030 
3031 	type ^= SLJIT_32;
3032 
3033 	if (dst_freg != src2_freg) {
3034 		if (dst_freg == src1) {
3035 			src1 = src2_freg;
3036 			src1w = 0;
3037 			type ^= 0x1;
3038 		} else
3039 			FAIL_IF(push_inst32(compiler, VMOV_F32 | (type & SLJIT_32) | VD4(dst_freg) | VM4(src2_freg)));
3040 	}
3041 
3042 	if (src1 & SLJIT_MEM) {
3043 		FAIL_IF(emit_fop_mem(compiler, (type & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w));
3044 		src1 = TMP_FREG1;
3045 	}
3046 
3047 	FAIL_IF(push_inst16(compiler, IT | (get_cc(compiler, type & ~SLJIT_32) << 4) | 0x8));
3048 	return push_inst32(compiler, VMOV_F32 | (type & SLJIT_32) | VD4(dst_freg) | VM4(src1));
3049 }
3050 
sljit_emit_mem(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)3051 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
3052 	sljit_s32 reg,
3053 	sljit_s32 mem, sljit_sw memw)
3054 {
3055 	sljit_s32 flags;
3056 	sljit_uw imm, tmp;
3057 
3058 	CHECK_ERROR();
3059 	CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
3060 
3061 	if (!(reg & REG_PAIR_MASK))
3062 		return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
3063 
3064 	if (type & (SLJIT_MEM_UNALIGNED | SLJIT_MEM_ALIGNED_16 | SLJIT_MEM_ALIGNED_32)) {
3065 		if ((mem & REG_MASK) == 0) {
3066 			if ((memw & 0xfff) >= (0x1000 - SSIZE_OF(sw))) {
3067 				imm = get_imm((sljit_uw)((memw + 0x1000) & ~0xfff));
3068 
3069 				if (imm != INVALID_IMM)
3070 					memw = (memw & 0xfff) - 0x1000;
3071 			} else {
3072 				imm = get_imm((sljit_uw)(memw & ~0xfff));
3073 
3074 				if (imm != INVALID_IMM)
3075 					memw &= 0xfff;
3076 			}
3077 
3078 			if (imm == INVALID_IMM) {
3079 				FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3080 				memw = 0;
3081 			} else
3082 				FAIL_IF(push_inst32(compiler, MOV_WI | RD4(TMP_REG1) | imm));
3083 
3084 			mem = SLJIT_MEM1(TMP_REG1);
3085 		} else if (mem & OFFS_REG_MASK) {
3086 			FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(mem & REG_MASK) | RM4(OFFS_REG(mem)) | ((sljit_uw)(memw & 0x3) << 6)));
3087 			memw = 0;
3088 			mem = SLJIT_MEM1(TMP_REG1);
3089 		} else if (memw < -0xff) {
3090 			/* Zero value can be included in the first case. */
3091 			if ((-memw & 0xfff) <= SSIZE_OF(sw))
3092 				tmp = (sljit_uw)((-memw + 0x7ff) & ~0x7ff);
3093 			else
3094 				tmp = (sljit_uw)((-memw + 0xfff) & ~0xfff);
3095 
3096 			SLJIT_ASSERT(tmp >= (sljit_uw)-memw);
3097 			imm = get_imm(tmp);
3098 
3099 			if (imm != INVALID_IMM) {
3100 				FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(mem & REG_MASK) | imm));
3101 				memw += (sljit_sw)tmp;
3102 				SLJIT_ASSERT(memw >= 0 && memw <= 0xfff - SSIZE_OF(sw));
3103 			} else {
3104 				FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3105 				FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, mem & REG_MASK)));
3106 				memw = 0;
3107 			}
3108 
3109 			mem = SLJIT_MEM1(TMP_REG1);
3110 		} else if (memw >= (0x1000 - SSIZE_OF(sw))) {
3111 			if ((memw & 0xfff) >= (0x1000 - SSIZE_OF(sw))) {
3112 				imm = get_imm((sljit_uw)((memw + 0x1000) & ~0xfff));
3113 
3114 				if (imm != INVALID_IMM)
3115 					memw = (memw & 0xfff) - 0x1000;
3116 			} else {
3117 				imm = get_imm((sljit_uw)(memw & ~0xfff));
3118 
3119 				if (imm != INVALID_IMM)
3120 					memw &= 0xfff;
3121 			}
3122 
3123 			if (imm != INVALID_IMM) {
3124 				SLJIT_ASSERT(memw >= -0xff && memw <= 0xfff);
3125 				FAIL_IF(push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(mem & REG_MASK) | imm));
3126 			} else {
3127 				FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3128 				FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, mem & REG_MASK)));
3129 				memw = 0;
3130 			}
3131 
3132 			mem = SLJIT_MEM1(TMP_REG1);
3133 		}
3134 
3135 		flags = WORD_SIZE;
3136 
3137 		SLJIT_ASSERT(memw <= 0xfff - SSIZE_OF(sw) && memw >= -0xff);
3138 
3139 		if (type & SLJIT_MEM_STORE) {
3140 			flags |= STORE;
3141 		} else if (REG_PAIR_FIRST(reg) == (mem & REG_MASK)) {
3142 			FAIL_IF(emit_op_mem(compiler, WORD_SIZE, REG_PAIR_SECOND(reg), mem, memw + SSIZE_OF(sw), TMP_REG2));
3143 			return emit_op_mem(compiler, WORD_SIZE, REG_PAIR_FIRST(reg), mem, memw, TMP_REG2);
3144 		}
3145 
3146 		FAIL_IF(emit_op_mem(compiler, flags, REG_PAIR_FIRST(reg), mem, memw, TMP_REG2));
3147 		return emit_op_mem(compiler, flags, REG_PAIR_SECOND(reg), mem, memw + SSIZE_OF(sw), TMP_REG2);
3148 	}
3149 
3150 	flags = 1 << 23;
3151 
3152 	if ((mem & REG_MASK) == 0) {
3153 		tmp = (sljit_uw)(memw & 0x7fc);
3154 		imm = get_imm((sljit_uw)((memw + (tmp <= 0x400 ? 0 : 0x400)) & ~0x3fc));
3155 
3156 		if (imm == INVALID_IMM) {
3157 			FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3158 			memw = 0;
3159 		} else {
3160 			FAIL_IF(push_inst32(compiler, MOV_WI | RD4(TMP_REG1) | imm));
3161 			memw = (memw & 0x3fc) >> 2;
3162 
3163 			if (tmp > 0x400) {
3164 				memw = 0x100 - memw;
3165 				flags = 0;
3166 			}
3167 
3168 			SLJIT_ASSERT(memw >= 0 && memw <= 0xff);
3169 		}
3170 
3171 		mem = SLJIT_MEM1(TMP_REG1);
3172 	} else if (mem & OFFS_REG_MASK) {
3173 		FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(mem & REG_MASK) | RM4(OFFS_REG(mem)) | ((sljit_uw)(memw & 0x3) << 6)));
3174 		memw = 0;
3175 		mem = SLJIT_MEM1(TMP_REG1);
3176 	} else if (memw < 0) {
3177 		if ((-memw & ~0x3fc) == 0) {
3178 			flags = 0;
3179 			memw = -memw >> 2;
3180 		} else {
3181 			tmp = (sljit_uw)(-memw & 0x7fc);
3182 			imm = get_imm((sljit_uw)((-memw + (tmp <= 0x400 ? 0 : 0x400)) & ~0x3fc));
3183 
3184 			if (imm != INVALID_IMM) {
3185 				FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(mem & REG_MASK) | imm));
3186 				memw = (-memw & 0x3fc) >> 2;
3187 
3188 				if (tmp <= 0x400)
3189 					flags = 0;
3190 				else
3191 					memw = 0x100 - memw;
3192 			} else {
3193 				FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3194 				FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, mem & REG_MASK)));
3195 				memw = 0;
3196 			}
3197 
3198 			mem = SLJIT_MEM1(TMP_REG1);
3199 		}
3200 	} else if ((memw & ~0x3fc) != 0) {
3201 		tmp = (sljit_uw)(memw & 0x7fc);
3202 		imm = get_imm((sljit_uw)((memw + (tmp <= 0x400 ? 0 : 0x400)) & ~0x3fc));
3203 
3204 		if (imm != INVALID_IMM) {
3205 			FAIL_IF(push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(mem & REG_MASK) | imm));
3206 			memw = (memw & 0x3fc) >> 2;
3207 
3208 			if (tmp > 0x400) {
3209 				memw = 0x100 - memw;
3210 				flags = 0;
3211 			}
3212 		} else {
3213 			FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3214 			FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, mem & REG_MASK)));
3215 			memw = 0;
3216 		}
3217 
3218 		mem = SLJIT_MEM1(TMP_REG1);
3219 	} else
3220 		memw >>= 2;
3221 
3222 	SLJIT_ASSERT(memw >= 0 && memw <= 0xff);
3223 	return push_inst32(compiler, ((type & SLJIT_MEM_STORE) ? STRD : LDRD) | (sljit_ins)flags | RN4(mem & REG_MASK) | RT4(REG_PAIR_FIRST(reg)) | RD4(REG_PAIR_SECOND(reg)) | (sljit_ins)memw);
3224 }
3225 
sljit_emit_mem_update(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)3226 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *compiler, sljit_s32 type,
3227 	sljit_s32 reg,
3228 	sljit_s32 mem, sljit_sw memw)
3229 {
3230 	sljit_s32 flags;
3231 	sljit_ins inst;
3232 
3233 	CHECK_ERROR();
3234 	CHECK(check_sljit_emit_mem_update(compiler, type, reg, mem, memw));
3235 
3236 	if ((mem & OFFS_REG_MASK) || (memw > 255 || memw < -255))
3237 		return SLJIT_ERR_UNSUPPORTED;
3238 
3239 	if (type & SLJIT_MEM_SUPP)
3240 		return SLJIT_SUCCESS;
3241 
3242 	switch (type & 0xff) {
3243 	case SLJIT_MOV:
3244 	case SLJIT_MOV_U32:
3245 	case SLJIT_MOV_S32:
3246 	case SLJIT_MOV32:
3247 	case SLJIT_MOV_P:
3248 		flags = WORD_SIZE;
3249 		break;
3250 	case SLJIT_MOV_U8:
3251 		flags = BYTE_SIZE;
3252 		break;
3253 	case SLJIT_MOV_S8:
3254 		flags = BYTE_SIZE | SIGNED;
3255 		break;
3256 	case SLJIT_MOV_U16:
3257 		flags = HALF_SIZE;
3258 		break;
3259 	case SLJIT_MOV_S16:
3260 		flags = HALF_SIZE | SIGNED;
3261 		break;
3262 	default:
3263 		SLJIT_UNREACHABLE();
3264 		flags = WORD_SIZE;
3265 		break;
3266 	}
3267 
3268 	if (type & SLJIT_MEM_STORE)
3269 		flags |= STORE;
3270 
3271 	inst = sljit_mem32[flags] | 0x900;
3272 
3273 	if (!(type & SLJIT_MEM_POST))
3274 		inst |= 0x400;
3275 
3276 	if (memw >= 0)
3277 		inst |= 0x200;
3278 	else
3279 		memw = -memw;
3280 
3281 	return push_inst32(compiler, inst | RT4(reg) | RN4(mem & REG_MASK) | (sljit_ins)memw);
3282 }
3283 
update_mem_addr(struct sljit_compiler * compiler,sljit_s32 * mem,sljit_sw * memw,sljit_s32 max_offset)3284 static sljit_s32 update_mem_addr(struct sljit_compiler *compiler, sljit_s32 *mem, sljit_sw *memw, sljit_s32 max_offset)
3285 {
3286 	sljit_s32 arg = *mem;
3287 	sljit_sw argw = *memw;
3288 	sljit_uw imm;
3289 
3290 	*mem = TMP_REG1;
3291 
3292 	if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
3293 		*memw = 0;
3294 		return push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(arg & REG_MASK) | RM4(OFFS_REG(arg)) | ((sljit_uw)(argw & 0x3) << 6));
3295 	}
3296 
3297 	arg &= REG_MASK;
3298 
3299 	if (arg) {
3300 		if (argw <= max_offset && argw >= -0xff) {
3301 			*mem = arg;
3302 			return SLJIT_SUCCESS;
3303 		}
3304 
3305 		if (argw < 0) {
3306 			imm = get_imm((sljit_uw)(-argw & ~0xff));
3307 
3308 			if (imm) {
3309 				*memw = -(-argw & 0xff);
3310 				return push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(arg) | imm);
3311 			}
3312 		} else if ((argw & 0xfff) <= max_offset) {
3313 			imm = get_imm((sljit_uw)(argw & ~0xfff));
3314 
3315 			if (imm) {
3316 				*memw = argw & 0xfff;
3317 				return push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(arg) | imm);
3318 			}
3319 		} else {
3320 			imm = get_imm((sljit_uw)((argw | 0xfff) + 1));
3321 
3322 			if (imm) {
3323 				*memw = (argw & 0xfff) - 0x1000;
3324 				return push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(arg) | imm);
3325 			}
3326 		}
3327 	}
3328 
3329 	imm = (sljit_uw)(argw & ~0xfff);
3330 
3331 	if ((argw & 0xfff) > max_offset) {
3332 		imm += 0x1000;
3333 		*memw = (argw & 0xfff) - 0x1000;
3334 	} else
3335 		*memw = argw & 0xfff;
3336 
3337 	FAIL_IF(load_immediate(compiler, TMP_REG1, imm));
3338 
3339 	if (arg == 0)
3340 		return SLJIT_SUCCESS;
3341 
3342 	return push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, arg));
3343 }
3344 
sljit_emit_fmem(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 mem,sljit_sw memw)3345 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type,
3346 	sljit_s32 freg,
3347 	sljit_s32 mem, sljit_sw memw)
3348 {
3349 	CHECK_ERROR();
3350 	CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw));
3351 
3352 	if (type & SLJIT_MEM_ALIGNED_32)
3353 		return emit_fop_mem(compiler, ((type ^ SLJIT_32) & SLJIT_32) | ((type & SLJIT_MEM_STORE) ? 0 : FPU_LOAD), freg, mem, memw);
3354 
3355 	if (type & SLJIT_MEM_STORE) {
3356 		FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | VN4(freg) | RT4(TMP_REG2)));
3357 
3358 		if (type & SLJIT_32)
3359 			return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, mem, memw, TMP_REG1);
3360 
3361 		FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4));
3362 		mem |= SLJIT_MEM;
3363 
3364 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, mem, memw, TMP_REG1));
3365 		FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | VN4(freg) | 0x80 | RT4(TMP_REG2)));
3366 		return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, mem, memw + 4, TMP_REG1);
3367 	}
3368 
3369 	if (type & SLJIT_32) {
3370 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1));
3371 		return push_inst32(compiler, VMOV | VN4(freg) | RT4(TMP_REG2));
3372 	}
3373 
3374 	FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4));
3375 	mem |= SLJIT_MEM;
3376 
3377 	FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1));
3378 	FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, mem, memw + 4, TMP_REG1));
3379 	return push_inst32(compiler, VMOV2 | VM4(freg) | RT4(TMP_REG2) | RN4(TMP_REG1));
3380 }
3381 
sljit_emit_simd_mem_offset(struct sljit_compiler * compiler,sljit_s32 * mem_ptr,sljit_sw memw)3382 static sljit_s32 sljit_emit_simd_mem_offset(struct sljit_compiler *compiler, sljit_s32 *mem_ptr, sljit_sw memw)
3383 {
3384 	sljit_uw imm;
3385 	sljit_s32 mem = *mem_ptr;
3386 
3387 	if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
3388 		*mem_ptr = TMP_REG1;
3389 		return push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(mem & REG_MASK) | RM4(OFFS_REG(mem)) | ((sljit_uw)(memw & 0x3) << 6));
3390 	}
3391 
3392 	if (SLJIT_UNLIKELY(!(mem & REG_MASK))) {
3393 		*mem_ptr = TMP_REG1;
3394 		return load_immediate(compiler, TMP_REG1, (sljit_uw)memw);
3395 	}
3396 
3397 	mem &= REG_MASK;
3398 
3399 	if (memw == 0) {
3400 		*mem_ptr = mem;
3401 		return SLJIT_SUCCESS;
3402 	}
3403 
3404 	*mem_ptr = TMP_REG1;
3405 	imm = get_imm((sljit_uw)(memw < 0 ? -memw : memw));
3406 
3407 	if (imm != INVALID_IMM)
3408 		return push_inst32(compiler, ((memw < 0) ? SUB_WI : ADD_WI) | RD4(TMP_REG1) | RN4(mem) | imm);
3409 
3410 	FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3411 	return push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, mem));
3412 }
3413 
simd_get_quad_reg_index(sljit_s32 freg)3414 static SLJIT_INLINE sljit_s32 simd_get_quad_reg_index(sljit_s32 freg)
3415 {
3416 	freg += freg & 0x1;
3417 
3418 	SLJIT_ASSERT((freg_map[freg] & 0x1) == (freg <= SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS));
3419 
3420 	if (freg <= SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS)
3421 		freg--;
3422 
3423 	return freg;
3424 }
3425 
3426 #define SLJIT_QUAD_OTHER_HALF(freg) ((((freg) & 0x1) << 1) - 1)
3427 
sljit_emit_simd_mov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 srcdst,sljit_sw srcdstw)3428 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
3429 	sljit_s32 freg,
3430 	sljit_s32 srcdst, sljit_sw srcdstw)
3431 {
3432 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3433 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3434 	sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type);
3435 	sljit_ins ins;
3436 
3437 	CHECK_ERROR();
3438 	CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw));
3439 
3440 	ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3441 
3442 	if (reg_size != 3 && reg_size != 4)
3443 		return SLJIT_ERR_UNSUPPORTED;
3444 
3445 	if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3446 		return SLJIT_ERR_UNSUPPORTED;
3447 
3448 	if (type & SLJIT_SIMD_TEST)
3449 		return SLJIT_SUCCESS;
3450 
3451 	if (reg_size == 4)
3452 		freg = simd_get_quad_reg_index(freg);
3453 
3454 	if (!(srcdst & SLJIT_MEM)) {
3455 		if (reg_size == 4)
3456 			srcdst = simd_get_quad_reg_index(srcdst);
3457 
3458 		if (type & SLJIT_SIMD_STORE)
3459 			ins = VD4(srcdst) | VN4(freg) | VM4(freg);
3460 		else
3461 			ins = VD4(freg) | VN4(srcdst) | VM4(srcdst);
3462 
3463 		if (reg_size == 4)
3464 			ins |= (sljit_ins)1 << 6;
3465 
3466 		return push_inst32(compiler, VORR | ins);
3467 	}
3468 
3469 	FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
3470 
3471 	if (elem_size > 3)
3472 		elem_size = 3;
3473 
3474 	ins = ((type & SLJIT_SIMD_STORE) ? VST1 : VLD1) | VD4(freg)
3475 		| (sljit_ins)((reg_size == 3) ? (0x7 << 8) : (0xa << 8));
3476 
3477 	SLJIT_ASSERT(reg_size >= alignment);
3478 
3479 	if (alignment == 3)
3480 		ins |= 0x10;
3481 	else if (alignment >= 4)
3482 		ins |= 0x20;
3483 
3484 	return push_inst32(compiler, ins | RN4(srcdst) | ((sljit_ins)elem_size) << 6 | 0xf);
3485 }
3486 
simd_get_imm(sljit_s32 elem_size,sljit_uw value)3487 static sljit_ins simd_get_imm(sljit_s32 elem_size, sljit_uw value)
3488 {
3489 	sljit_ins result;
3490 
3491 	if (elem_size > 1 && (sljit_u16)value == (value >> 16)) {
3492 		elem_size = 1;
3493 		value = (sljit_u16)value;
3494 	}
3495 
3496 	if (elem_size == 1 && (sljit_u8)value == (value >> 8)) {
3497 		elem_size = 0;
3498 		value = (sljit_u8)value;
3499 	}
3500 
3501 	switch (elem_size) {
3502 	case 0:
3503 		SLJIT_ASSERT(value <= 0xff);
3504 		result = 0xe00;
3505 		break;
3506 	case 1:
3507 		SLJIT_ASSERT(value <= 0xffff);
3508 		result = 0;
3509 
3510 		while (1) {
3511 			if (value <= 0xff) {
3512 				result |= 0x800;
3513 				break;
3514 			}
3515 
3516 			if ((value & 0xff) == 0) {
3517 				value >>= 8;
3518 				result |= 0xa00;
3519 				break;
3520 			}
3521 
3522 			if (result != 0)
3523 				return ~(sljit_ins)0;
3524 
3525 			value ^= (sljit_uw)0xffff;
3526 			result = (1 << 5);
3527 		}
3528 		break;
3529 	default:
3530 		SLJIT_ASSERT(value <= 0xffffffff);
3531 		result = 0;
3532 
3533 		while (1) {
3534 			if (value <= 0xff) {
3535 				result |= 0x000;
3536 				break;
3537 			}
3538 
3539 			if ((value & ~(sljit_uw)0xff00) == 0) {
3540 				value >>= 8;
3541 				result |= 0x200;
3542 				break;
3543 			}
3544 
3545 			if ((value & ~(sljit_uw)0xff0000) == 0) {
3546 				value >>= 16;
3547 				result |= 0x400;
3548 				break;
3549 			}
3550 
3551 			if ((value & ~(sljit_uw)0xff000000) == 0) {
3552 				value >>= 24;
3553 				result |= 0x600;
3554 				break;
3555 			}
3556 
3557 			if ((value & (sljit_uw)0xff) == 0xff && (value >> 16) == 0) {
3558 				value >>= 8;
3559 				result |= 0xc00;
3560 				break;
3561 			}
3562 
3563 			if ((value & (sljit_uw)0xffff) == 0xffff && (value >> 24) == 0) {
3564 				value >>= 16;
3565 				result |= 0xd00;
3566 				break;
3567 			}
3568 
3569 			if (result != 0)
3570 				return ~(sljit_ins)0;
3571 
3572 			value = ~value;
3573 			result = (1 << 5);
3574 		}
3575 		break;
3576 	}
3577 
3578 	return ((sljit_ins)value & 0xf) | (((sljit_ins)value & 0x70) << 12) | (((sljit_ins)value & 0x80) << 21) | result;
3579 }
3580 
sljit_emit_simd_replicate(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_sw srcw)3581 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
3582 	sljit_s32 freg,
3583 	sljit_s32 src, sljit_sw srcw)
3584 {
3585 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3586 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3587 	sljit_ins ins, imm;
3588 
3589 	CHECK_ERROR();
3590 	CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw));
3591 
3592 	ADJUST_LOCAL_OFFSET(src, srcw);
3593 
3594 	if (reg_size != 3 && reg_size != 4)
3595 		return SLJIT_ERR_UNSUPPORTED;
3596 
3597 	if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2))
3598 		return SLJIT_ERR_UNSUPPORTED;
3599 
3600 	if (type & SLJIT_SIMD_TEST)
3601 		return SLJIT_SUCCESS;
3602 
3603 	if (reg_size == 4)
3604 		freg = simd_get_quad_reg_index(freg);
3605 
3606 	if (src == SLJIT_IMM && srcw == 0)
3607 		return push_inst32(compiler, VMOV_i | ((reg_size == 4) ? (1 << 6) : 0) | VD4(freg));
3608 
3609 	if (SLJIT_UNLIKELY(elem_size == 3)) {
3610 		SLJIT_ASSERT(type & SLJIT_SIMD_FLOAT);
3611 
3612 		if (src & SLJIT_MEM) {
3613 			FAIL_IF(emit_fop_mem(compiler, FPU_LOAD | SLJIT_32, freg, src, srcw));
3614 			src = freg;
3615 		} else if (freg != src)
3616 			FAIL_IF(push_inst32(compiler, VORR | VD4(freg) | VN4(src) | VM4(src)));
3617 
3618 		freg += SLJIT_QUAD_OTHER_HALF(freg);
3619 
3620 		if (freg != src)
3621 			return push_inst32(compiler, VORR | VD4(freg) | VN4(src) | VM4(src));
3622 		return SLJIT_SUCCESS;
3623 	}
3624 
3625 	if (src & SLJIT_MEM) {
3626 		FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
3627 
3628 		ins = (sljit_ins)(elem_size << 6);
3629 
3630 		if (reg_size == 4)
3631 			ins |= 1 << 5;
3632 
3633 		return push_inst32(compiler, VLD1_r | ins | VD4(freg) | RN4(src) | 0xf);
3634 	}
3635 
3636 	if (type & SLJIT_SIMD_FLOAT) {
3637 		SLJIT_ASSERT(elem_size == 2);
3638 		ins = ((sljit_ins)freg_ebit_map[src] << (16 + 2 + 1)) | ((sljit_ins)1 << (16 + 2));
3639 
3640 		if (reg_size == 4)
3641 			ins |= (sljit_ins)1 << 6;
3642 
3643 		return push_inst32(compiler, VDUP_s | ins | VD4(freg) | (sljit_ins)freg_map[src]);
3644 	}
3645 
3646 	if (src == SLJIT_IMM) {
3647 		if (elem_size < 2)
3648 			srcw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1;
3649 
3650 		imm = simd_get_imm(elem_size, (sljit_uw)srcw);
3651 
3652 		if (imm != ~(sljit_ins)0) {
3653 			if (reg_size == 4)
3654 				imm |= (sljit_ins)1 << 6;
3655 
3656 			return push_inst32(compiler, VMOV_i | imm | VD4(freg));
3657 		}
3658 
3659 		FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw));
3660 		src = TMP_REG1;
3661 	}
3662 
3663 	switch (elem_size) {
3664 	case 0:
3665 		ins = 1 << 22;
3666 		break;
3667 	case 1:
3668 		ins = 1 << 5;
3669 		break;
3670 	default:
3671 		ins = 0;
3672 		break;
3673 	}
3674 
3675 	if (reg_size == 4)
3676 		ins |= (sljit_ins)1 << 21;
3677 
3678 	return push_inst32(compiler, VDUP | ins | VN4(freg) | RT4(src));
3679 }
3680 
sljit_emit_simd_lane_mov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 lane_index,sljit_s32 srcdst,sljit_sw srcdstw)3681 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
3682 	sljit_s32 freg, sljit_s32 lane_index,
3683 	sljit_s32 srcdst, sljit_sw srcdstw)
3684 {
3685 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3686 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3687 	sljit_ins ins;
3688 
3689 	CHECK_ERROR();
3690 	CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw));
3691 
3692 	ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3693 
3694 	if (reg_size != 3 && reg_size != 4)
3695 		return SLJIT_ERR_UNSUPPORTED;
3696 
3697 	if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2))
3698 		return SLJIT_ERR_UNSUPPORTED;
3699 
3700 	if (type & SLJIT_SIMD_TEST)
3701 		return SLJIT_SUCCESS;
3702 
3703 	if (reg_size == 4)
3704 		freg = simd_get_quad_reg_index(freg);
3705 
3706 	if (type & SLJIT_SIMD_LANE_ZERO) {
3707 		ins = (reg_size == 3) ? 0 : ((sljit_ins)1 << 6);
3708 
3709 		if (type & SLJIT_SIMD_FLOAT) {
3710 			if (elem_size == 3 && !(srcdst & SLJIT_MEM)) {
3711 				if (lane_index == 1)
3712 					freg += SLJIT_QUAD_OTHER_HALF(freg);
3713 
3714 				if (srcdst != freg)
3715 					FAIL_IF(push_inst32(compiler, VORR | VD4(freg) | VN4(srcdst) | VM4(srcdst)));
3716 
3717 				freg += SLJIT_QUAD_OTHER_HALF(freg);
3718 				return push_inst32(compiler, VMOV_i | VD4(freg));
3719 			}
3720 
3721 			if (srcdst == freg || (elem_size == 3 && srcdst == (freg + SLJIT_QUAD_OTHER_HALF(freg)))) {
3722 				FAIL_IF(push_inst32(compiler, VORR | ins | VD4(TMP_FREG2) | VN4(freg) | VM4(freg)));
3723 				srcdst = TMP_FREG2;
3724 				srcdstw = 0;
3725 			}
3726 		}
3727 
3728 		FAIL_IF(push_inst32(compiler, VMOV_i | ins | VD4(freg)));
3729 	}
3730 
3731 	if (reg_size == 4 && lane_index >= (0x8 >> elem_size)) {
3732 		lane_index -= (0x8 >> elem_size);
3733 		freg += SLJIT_QUAD_OTHER_HALF(freg);
3734 	}
3735 
3736 	if (srcdst & SLJIT_MEM) {
3737 		if (elem_size == 3)
3738 			return emit_fop_mem(compiler, ((type & SLJIT_SIMD_STORE) ? 0 : FPU_LOAD) | SLJIT_32, freg, srcdst, srcdstw);
3739 
3740 		FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
3741 
3742 		lane_index = lane_index << elem_size;
3743 		ins = (sljit_ins)((elem_size << 10) | (lane_index << 5));
3744 		return push_inst32(compiler, ((type & SLJIT_SIMD_STORE) ? VST1_s : VLD1_s) | ins | VD4(freg) | RN4(srcdst) | 0xf);
3745 	}
3746 
3747 	if (type & SLJIT_SIMD_FLOAT) {
3748 		if (elem_size == 3) {
3749 			if (type & SLJIT_SIMD_STORE)
3750 				return push_inst32(compiler, VORR | VD4(srcdst) | VN4(freg) | VM4(freg));
3751 			return push_inst32(compiler, VMOV_F32 | SLJIT_32 | VD4(freg) | VM4(srcdst));
3752 		}
3753 
3754 		if (type & SLJIT_SIMD_STORE) {
3755 			if (freg_ebit_map[freg] == 0) {
3756 				if (lane_index == 1)
3757 					freg = SLJIT_F64_SECOND(freg);
3758 
3759 				return push_inst32(compiler, VMOV_F32 | VD4(srcdst) | VM4(freg));
3760 			}
3761 
3762 			FAIL_IF(push_inst32(compiler, VMOV_s | (1 << 20) | ((sljit_ins)lane_index << 21) | VN4(freg) | RT4(TMP_REG1)));
3763 			return push_inst32(compiler, VMOV | VN4(srcdst) | RT4(TMP_REG1));
3764 		}
3765 
3766 		FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | VN4(srcdst) | RT4(TMP_REG1)));
3767 		return push_inst32(compiler, VMOV_s | ((sljit_ins)lane_index << 21) | VN4(freg) | RT4(TMP_REG1));
3768 	}
3769 
3770 	if (srcdst == SLJIT_IMM) {
3771 		if (elem_size < 2)
3772 			srcdstw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1;
3773 
3774 		FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcdstw));
3775 		srcdst = TMP_REG1;
3776 	}
3777 
3778 	if (elem_size == 0)
3779 		ins = 0x400000;
3780 	else if (elem_size == 1)
3781 		ins = 0x20;
3782 	else
3783 		ins = 0;
3784 
3785 	lane_index = lane_index << elem_size;
3786 	ins |= (sljit_ins)(((lane_index & 0x4) << 19) | ((lane_index & 0x3) << 5));
3787 
3788 	if (type & SLJIT_SIMD_STORE) {
3789 		ins |= (1 << 20);
3790 
3791 		if (elem_size < 2 && !(type & SLJIT_SIMD_LANE_SIGNED))
3792 			ins |= (1 << 23);
3793 	}
3794 
3795 	return push_inst32(compiler, VMOV_s | ins | VN4(freg) | RT4(srcdst));
3796 }
3797 
sljit_emit_simd_lane_replicate(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_s32 src_lane_index)3798 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
3799 	sljit_s32 freg,
3800 	sljit_s32 src, sljit_s32 src_lane_index)
3801 {
3802 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3803 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3804 	sljit_ins ins;
3805 
3806 	CHECK_ERROR();
3807 	CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index));
3808 
3809 	if (reg_size != 3 && reg_size != 4)
3810 		return SLJIT_ERR_UNSUPPORTED;
3811 
3812 	if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3813 		return SLJIT_ERR_UNSUPPORTED;
3814 
3815 	if (type & SLJIT_SIMD_TEST)
3816 		return SLJIT_SUCCESS;
3817 
3818 	if (reg_size == 4) {
3819 		freg = simd_get_quad_reg_index(freg);
3820 		src = simd_get_quad_reg_index(src);
3821 
3822 		if (src_lane_index >= (0x8 >> elem_size)) {
3823 			src_lane_index -= (0x8 >> elem_size);
3824 			src += SLJIT_QUAD_OTHER_HALF(src);
3825 		}
3826 	}
3827 
3828 	if (elem_size == 3) {
3829 		if (freg != src)
3830 			FAIL_IF(push_inst32(compiler, VORR | VD4(freg) | VN4(src) | VM4(src)));
3831 
3832 		freg += SLJIT_QUAD_OTHER_HALF(freg);
3833 
3834 		if (freg != src)
3835 			return push_inst32(compiler, VORR | VD4(freg) | VN4(src) | VM4(src));
3836 		return SLJIT_SUCCESS;
3837 	}
3838 
3839 	ins = ((((sljit_ins)src_lane_index << 1) | 1) << (16 + elem_size));
3840 
3841 	if (reg_size == 4)
3842 		ins |= (sljit_ins)1 << 6;
3843 
3844 	return push_inst32(compiler, VDUP_s | ins | VD4(freg) | VM4(src));
3845 }
3846 
sljit_emit_simd_extend(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_sw srcw)3847 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
3848 	sljit_s32 freg,
3849 	sljit_s32 src, sljit_sw srcw)
3850 {
3851 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3852 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3853 	sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
3854 	sljit_s32 dst_reg;
3855 
3856 	CHECK_ERROR();
3857 	CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw));
3858 
3859 	ADJUST_LOCAL_OFFSET(src, srcw);
3860 
3861 	if (reg_size != 3 && reg_size != 4)
3862 		return SLJIT_ERR_UNSUPPORTED;
3863 
3864 	if ((type & SLJIT_SIMD_FLOAT) && (elem_size != 2 || elem2_size != 3))
3865 		return SLJIT_ERR_UNSUPPORTED;
3866 
3867 	if (type & SLJIT_SIMD_TEST)
3868 		return SLJIT_SUCCESS;
3869 
3870 	if (reg_size == 4)
3871 		freg = simd_get_quad_reg_index(freg);
3872 
3873 	if (src & SLJIT_MEM) {
3874 		FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
3875 		if (reg_size == 4 && elem2_size - elem_size == 1)
3876 			FAIL_IF(push_inst32(compiler, VLD1 | (0x7 << 8) | VD4(freg) | RN4(src) | 0xf));
3877 		else
3878 			FAIL_IF(push_inst32(compiler, VLD1_s | (sljit_ins)((reg_size - elem2_size + elem_size) << 10) | VD4(freg) | RN4(src) | 0xf));
3879 		src = freg;
3880 	} else if (reg_size == 4)
3881 		src = simd_get_quad_reg_index(src);
3882 
3883 	if (!(type & SLJIT_SIMD_FLOAT)) {
3884 		dst_reg = (reg_size == 4) ? freg : TMP_FREG2;
3885 
3886 		do {
3887 			FAIL_IF(push_inst32(compiler, VSHLL | ((type & SLJIT_SIMD_EXTEND_SIGNED) ? 0 : (1 << 28))
3888 				| ((sljit_ins)1 << (19 + elem_size)) | VD4(dst_reg) | VM4(src)));
3889 			src = dst_reg;
3890 		} while (++elem_size < elem2_size);
3891 
3892 		if (dst_reg == TMP_FREG2)
3893 			return push_inst32(compiler, VORR | VD4(freg) | VN4(TMP_FREG2) | VM4(TMP_FREG2));
3894 		return SLJIT_SUCCESS;
3895 	}
3896 
3897 	/* No SIMD variant, must use VFP instead. */
3898 	SLJIT_ASSERT(reg_size == 4);
3899 
3900 	if (freg == src) {
3901 		freg += SLJIT_QUAD_OTHER_HALF(freg);
3902 		FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | VD4(freg) | VM4(src) | 0x20));
3903 		freg += SLJIT_QUAD_OTHER_HALF(freg);
3904 		return push_inst32(compiler, VCVT_F64_F32 | VD4(freg) | VM4(src));
3905 	}
3906 
3907 	FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | VD4(freg) | VM4(src)));
3908 	freg += SLJIT_QUAD_OTHER_HALF(freg);
3909 	return push_inst32(compiler, VCVT_F64_F32 | VD4(freg) | VM4(src) | 0x20);
3910 }
3911 
sljit_emit_simd_sign(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 dst,sljit_sw dstw)3912 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
3913 	sljit_s32 freg,
3914 	sljit_s32 dst, sljit_sw dstw)
3915 {
3916 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3917 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3918 	sljit_ins ins, imms;
3919 	sljit_s32 dst_r;
3920 
3921 	CHECK_ERROR();
3922 	CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw));
3923 
3924 	ADJUST_LOCAL_OFFSET(dst, dstw);
3925 
3926 	if (reg_size != 3 && reg_size != 4)
3927 		return SLJIT_ERR_UNSUPPORTED;
3928 
3929 	if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3930 		return SLJIT_ERR_UNSUPPORTED;
3931 
3932 	if (type & SLJIT_SIMD_TEST)
3933 		return SLJIT_SUCCESS;
3934 
3935 	switch (elem_size) {
3936 	case 0:
3937 		imms = 0x243219;
3938 		ins = VSHR | (1 << 28) | (0x9 << 16);
3939 		break;
3940 	case 1:
3941 		imms = (reg_size == 4) ? 0x243219 : 0x2231;
3942 		ins = VSHR | (1 << 28) | (0x11 << 16);
3943 		break;
3944 	case 2:
3945 		imms = (reg_size == 4) ? 0x2231 : 0x21;
3946 		ins = VSHR | (1 << 28) | (0x21 << 16);
3947 		break;
3948 	default:
3949 		imms = 0x21;
3950 		ins = VSHR | (1 << 28) | (0x1 << 16) | (1 << 7);
3951 		break;
3952 	}
3953 
3954 	if (reg_size == 4) {
3955 		freg = simd_get_quad_reg_index(freg);
3956 		ins |= (sljit_ins)1 << 6;
3957 	}
3958 
3959 	SLJIT_ASSERT((freg_map[TMP_FREG2] & 0x1) == 0);
3960 	FAIL_IF(push_inst32(compiler, ins | VD4(TMP_FREG2) | VM4(freg)));
3961 
3962 	if (reg_size == 4 && elem_size > 0)
3963 		FAIL_IF(push_inst32(compiler, VMOVN | ((sljit_ins)(elem_size - 1) << 18) | VD4(TMP_FREG2) | VM4(TMP_FREG2)));
3964 
3965 	ins = (reg_size == 4 && elem_size == 0) ? (1 << 6) : 0;
3966 
3967 	while (imms >= 0x100) {
3968 		FAIL_IF(push_inst32(compiler, VSRA | (1 << 28) | ins | ((imms & 0xff) << 16) | VD4(TMP_FREG2) | VM4(TMP_FREG2)));
3969 		imms >>= 8;
3970 	}
3971 
3972 	FAIL_IF(push_inst32(compiler, VSRA | (1 << 28) | ins | (1 << 7) | (imms << 16) | VD4(TMP_FREG2) | VM4(TMP_FREG2)));
3973 
3974 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
3975 	FAIL_IF(push_inst32(compiler, VMOV_s | (1 << 20) | (1 << 23) | (0x2 << 21) | RT4(dst_r) | VN4(TMP_FREG2)));
3976 
3977 	if (reg_size == 4 && elem_size == 0) {
3978 		SLJIT_ASSERT(freg_map[TMP_FREG2] + 1 == freg_map[TMP_FREG1]);
3979 		FAIL_IF(push_inst32(compiler, VMOV_s | (1 << 20) | (1 << 23) | (0x2 << 21) | RT4(TMP_REG2)| VN4(TMP_FREG1)));
3980 		FAIL_IF(push_inst32(compiler, ORR_W | RD4(dst_r) | RN4(dst_r) | RM4(TMP_REG2) | (0x2 << 12)));
3981 	}
3982 
3983 	if (dst_r == TMP_REG1)
3984 		return emit_op_mem(compiler, STORE | WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2);
3985 
3986 	return SLJIT_SUCCESS;
3987 }
3988 
sljit_emit_simd_op2(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_freg,sljit_s32 src1_freg,sljit_s32 src2_freg)3989 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
3990 	sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg)
3991 {
3992 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3993 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3994 	sljit_ins ins = 0;
3995 
3996 	CHECK_ERROR();
3997 	CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg));
3998 
3999 	if (reg_size != 3 && reg_size != 4)
4000 		return SLJIT_ERR_UNSUPPORTED;
4001 
4002 	if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4003 		return SLJIT_ERR_UNSUPPORTED;
4004 
4005 	switch (SLJIT_SIMD_GET_OPCODE(type)) {
4006 	case SLJIT_SIMD_OP2_AND:
4007 		ins = VAND;
4008 		break;
4009 	case SLJIT_SIMD_OP2_OR:
4010 		ins = VORR;
4011 		break;
4012 	case SLJIT_SIMD_OP2_XOR:
4013 		ins = VEOR;
4014 		break;
4015 	}
4016 
4017 	if (type & SLJIT_SIMD_TEST)
4018 		return SLJIT_SUCCESS;
4019 
4020 	if (reg_size == 4) {
4021 		dst_freg = simd_get_quad_reg_index(dst_freg);
4022 		src1_freg = simd_get_quad_reg_index(src1_freg);
4023 		src2_freg = simd_get_quad_reg_index(src2_freg);
4024 		ins |= (sljit_ins)1 << 6;
4025 	}
4026 
4027 	return push_inst32(compiler, ins | VD4(dst_freg) | VN4(src1_freg) | VM4(src2_freg));
4028 }
4029 
4030 #undef FPU_LOAD
4031 
sljit_emit_atomic_load(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 mem_reg)4032 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
4033 	sljit_s32 dst_reg,
4034 	sljit_s32 mem_reg)
4035 {
4036 	sljit_ins ins;
4037 
4038 	CHECK_ERROR();
4039 	CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
4040 
4041 	switch (GET_OPCODE(op)) {
4042 	case SLJIT_MOV_U8:
4043 		ins = LDREXB;
4044 		break;
4045 	case SLJIT_MOV_U16:
4046 		ins = LDREXH;
4047 		break;
4048 	default:
4049 		ins = LDREX;
4050 		break;
4051 	}
4052 
4053 	return push_inst32(compiler, ins | RN4(mem_reg) | RT4(dst_reg));
4054 }
4055 
sljit_emit_atomic_store(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src_reg,sljit_s32 mem_reg,sljit_s32 temp_reg)4056 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op,
4057 	sljit_s32 src_reg,
4058 	sljit_s32 mem_reg,
4059 	sljit_s32 temp_reg)
4060 {
4061 	sljit_ins ins;
4062 
4063 	/* temp_reg == mem_reg is undefined so use another temp register */
4064 	SLJIT_UNUSED_ARG(temp_reg);
4065 
4066 	CHECK_ERROR();
4067 	CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
4068 
4069 	switch (GET_OPCODE(op)) {
4070 	case SLJIT_MOV_U8:
4071 		ins = STREXB | RM4(TMP_REG1);
4072 		break;
4073 	case SLJIT_MOV_U16:
4074 		ins = STREXH | RM4(TMP_REG1);
4075 		break;
4076 	default:
4077 		ins = STREX | RD4(TMP_REG1);
4078 		break;
4079 	}
4080 
4081 	FAIL_IF(push_inst32(compiler, ins | RN4(mem_reg) | RT4(src_reg)));
4082 	if (op & SLJIT_SET_ATOMIC_STORED)
4083 		return push_inst32(compiler, CMPI_W | RN4(TMP_REG1));
4084 
4085 	return SLJIT_SUCCESS;
4086 }
4087 
sljit_emit_const(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_sw init_value)4088 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
4089 {
4090 	struct sljit_const *const_;
4091 	sljit_s32 dst_r;
4092 
4093 	CHECK_ERROR_PTR();
4094 	CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
4095 	ADJUST_LOCAL_OFFSET(dst, dstw);
4096 
4097 	const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
4098 	PTR_FAIL_IF(!const_);
4099 	set_const(const_, compiler);
4100 
4101 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
4102 	PTR_FAIL_IF(emit_imm32_const(compiler, dst_r, (sljit_uw)init_value));
4103 
4104 	if (dst & SLJIT_MEM)
4105 		PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2));
4106 	return const_;
4107 }
4108 
sljit_emit_put_label(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)4109 SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
4110 {
4111 	struct sljit_put_label *put_label;
4112 	sljit_s32 dst_r;
4113 
4114 	CHECK_ERROR_PTR();
4115 	CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
4116 	ADJUST_LOCAL_OFFSET(dst, dstw);
4117 
4118 	put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
4119 	PTR_FAIL_IF(!put_label);
4120 	set_put_label(put_label, compiler, 0);
4121 
4122 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
4123 	PTR_FAIL_IF(emit_imm32_const(compiler, dst_r, 0));
4124 
4125 	if (dst & SLJIT_MEM)
4126 		PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2));
4127 	return put_label;
4128 }
4129 
sljit_set_jump_addr(sljit_uw addr,sljit_uw new_target,sljit_sw executable_offset)4130 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
4131 {
4132 	sljit_u16 *inst = (sljit_u16*)addr;
4133 	SLJIT_UNUSED_ARG(executable_offset);
4134 
4135 	SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 0);
4136 	modify_imm32_const(inst, new_target);
4137 	SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 1);
4138 	inst = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
4139 	SLJIT_CACHE_FLUSH(inst, inst + 4);
4140 }
4141 
sljit_set_const(sljit_uw addr,sljit_sw new_constant,sljit_sw executable_offset)4142 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
4143 {
4144 	sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
4145 }
4146