1 /*
2 * Stack-less Just-In-Time compiler
3 *
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
sljit_get_platform_name(void)27 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
28 {
29 #ifdef __SOFTFP__
30 return "ARM-Thumb2" SLJIT_CPUINFO " ABI:softfp";
31 #else
32 return "ARM-Thumb2" SLJIT_CPUINFO " ABI:hardfp";
33 #endif
34 }
35
36 /* Length of an instruction word. */
37 typedef sljit_u32 sljit_ins;
38
39 /* Last register + 1. */
40 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
41 #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
42 #define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 4)
43
44 #define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
45 #define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
46
47 /* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */
48 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
49 0, 0, 1, 2, 3, 11, 10, 9, 8, 7, 6, 5, 4, 13, 12, 14, 15
50 };
51
52 static const sljit_u8 freg_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) + 1] = {
53 0,
54 0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8,
55 7, 6,
56 0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8,
57 7, 6
58 };
59
60 static const sljit_u8 freg_ebit_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) + 1] = {
61 0,
62 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
63 0, 0,
64 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
65 1, 1
66 };
67
68 #define COPY_BITS(src, from, to, bits) \
69 ((from >= to ? ((sljit_ins)(src) >> (from - to)) : ((sljit_ins)(src) << (to - from))) & (((1 << bits) - 1) << to))
70
71 #define NEGATE(uimm) ((sljit_uw)-(sljit_sw)(uimm))
72
73 /* Thumb16 encodings. */
74 #define RD3(rd) ((sljit_ins)reg_map[rd])
75 #define RN3(rn) ((sljit_ins)reg_map[rn] << 3)
76 #define RM3(rm) ((sljit_ins)reg_map[rm] << 6)
77 #define RDN3(rdn) ((sljit_ins)reg_map[rdn] << 8)
78 #define IMM3(imm) ((sljit_ins)imm << 6)
79 #define IMM8(imm) ((sljit_ins)imm)
80
81 /* Thumb16 helpers. */
82 #define SET_REGS44(rd, rn) \
83 (((sljit_ins)reg_map[rn] << 3) | ((sljit_ins)reg_map[rd] & 0x7) | (((sljit_ins)reg_map[rd] & 0x8) << 4))
84 #define IS_2_LO_REGS(reg1, reg2) \
85 (reg_map[reg1] <= 7 && reg_map[reg2] <= 7)
86 #define IS_3_LO_REGS(reg1, reg2, reg3) \
87 (reg_map[reg1] <= 7 && reg_map[reg2] <= 7 && reg_map[reg3] <= 7)
88
89 /* Thumb32 encodings. */
90 #define RM4(rm) ((sljit_ins)reg_map[rm])
91 #define RD4(rd) ((sljit_ins)reg_map[rd] << 8)
92 #define RT4(rt) ((sljit_ins)reg_map[rt] << 12)
93 #define RN4(rn) ((sljit_ins)reg_map[rn] << 16)
94
95 #define VM4(vm) (((sljit_ins)freg_map[vm]) | ((sljit_ins)freg_ebit_map[vm] << 5))
96 #define VD4(vd) (((sljit_ins)freg_map[vd] << 12) | ((sljit_ins)freg_ebit_map[vd] << 22))
97 #define VN4(vn) (((sljit_ins)freg_map[vn] << 16) | ((sljit_ins)freg_ebit_map[vn] << 7))
98
99 #define IMM5(imm) \
100 (COPY_BITS(imm, 2, 12, 3) | (((sljit_ins)imm & 0x3) << 6))
101 #define IMM12(imm) \
102 (COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | ((sljit_ins)imm & 0xff))
103
104 /* --------------------------------------------------------------------- */
105 /* Instrucion forms */
106 /* --------------------------------------------------------------------- */
107
108 /* dot '.' changed to _
109 I immediate form (possibly followed by number of immediate bits). */
110 #define ADCI 0xf1400000
111 #define ADCS 0x4140
112 #define ADC_W 0xeb400000
113 #define ADD 0x4400
114 #define ADDS 0x1800
115 #define ADDSI3 0x1c00
116 #define ADDSI8 0x3000
117 #define ADDWI 0xf2000000
118 #define ADD_SP 0x4485
119 #define ADD_SP_I 0xb000
120 #define ADD_W 0xeb000000
121 #define ADD_WI 0xf1000000
122 #define ANDI 0xf0000000
123 #define ANDS 0x4000
124 #define AND_W 0xea000000
125 #define ASRS 0x4100
126 #define ASRSI 0x1000
127 #define ASR_W 0xfa40f000
128 #define ASR_WI 0xea4f0020
129 #define BCC 0xd000
130 #define BICI 0xf0200000
131 #define BKPT 0xbe00
132 #define BLX 0x4780
133 #define BX 0x4700
134 #define CLZ 0xfab0f080
135 #define CMNI_W 0xf1100f00
136 #define CMP 0x4280
137 #define CMPI 0x2800
138 #define CMPI_W 0xf1b00f00
139 #define CMP_X 0x4500
140 #define CMP_W 0xebb00f00
141 #define EORI 0xf0800000
142 #define EORS 0x4040
143 #define EOR_W 0xea800000
144 #define IT 0xbf00
145 #define LDR 0xf8d00000
146 #define LDR_SP 0x9800
147 #define LDRD 0xe9500000
148 #define LDREX 0xe8500f00
149 #define LDREXB 0xe8d00f4f
150 #define LDREXH 0xe8d00f5f
151 #define LDRI 0xf8500800
152 #define LSLS 0x4080
153 #define LSLSI 0x0000
154 #define LSL_W 0xfa00f000
155 #define LSL_WI 0xea4f0000
156 #define LSRS 0x40c0
157 #define LSRSI 0x0800
158 #define LSR_W 0xfa20f000
159 #define LSR_WI 0xea4f0010
160 #define MLA 0xfb000000
161 #define MOV 0x4600
162 #define MOVS 0x0000
163 #define MOVSI 0x2000
164 #define MOVT 0xf2c00000
165 #define MOVW 0xf2400000
166 #define MOV_W 0xea4f0000
167 #define MOV_WI 0xf04f0000
168 #define MUL 0xfb00f000
169 #define MVNS 0x43c0
170 #define MVN_W 0xea6f0000
171 #define MVN_WI 0xf06f0000
172 #define NOP 0xbf00
173 #define ORNI 0xf0600000
174 #define ORRI 0xf0400000
175 #define ORRS 0x4300
176 #define ORR_W 0xea400000
177 #define POP 0xbc00
178 #define POP_W 0xe8bd0000
179 #define PUSH 0xb400
180 #define PUSH_W 0xe92d0000
181 #define REV 0xba00
182 #define REV_W 0xfa90f080
183 #define REV16 0xba40
184 #define REV16_W 0xfa90f090
185 #define RBIT 0xfa90f0a0
186 #define RORS 0x41c0
187 #define ROR_W 0xfa60f000
188 #define ROR_WI 0xea4f0030
189 #define RSB_WI 0xf1c00000
190 #define RSBSI 0x4240
191 #define SBCI 0xf1600000
192 #define SBCS 0x4180
193 #define SBC_W 0xeb600000
194 #define SDIV 0xfb90f0f0
195 #define SMULL 0xfb800000
196 #define STR_SP 0x9000
197 #define STRD 0xe9400000
198 #define STREX 0xe8400000
199 #define STREXB 0xe8c00f40
200 #define STREXH 0xe8c00f50
201 #define SUBS 0x1a00
202 #define SUBSI3 0x1e00
203 #define SUBSI8 0x3800
204 #define SUB_W 0xeba00000
205 #define SUBWI 0xf2a00000
206 #define SUB_SP_I 0xb080
207 #define SUB_WI 0xf1a00000
208 #define SXTB 0xb240
209 #define SXTB_W 0xfa4ff080
210 #define SXTH 0xb200
211 #define SXTH_W 0xfa0ff080
212 #define TST 0x4200
213 #define TSTI 0xf0000f00
214 #define TST_W 0xea000f00
215 #define UDIV 0xfbb0f0f0
216 #define UMULL 0xfba00000
217 #define UXTB 0xb2c0
218 #define UXTB_W 0xfa5ff080
219 #define UXTH 0xb280
220 #define UXTH_W 0xfa1ff080
221 #define VABS_F32 0xeeb00ac0
222 #define VADD_F32 0xee300a00
223 #define VAND 0xef000110
224 #define VCMP_F32 0xeeb40a40
225 #define VCVT_F32_S32 0xeeb80ac0
226 #define VCVT_F32_U32 0xeeb80a40
227 #define VCVT_F64_F32 0xeeb70ac0
228 #define VCVT_S32_F32 0xeebd0ac0
229 #define VDIV_F32 0xee800a00
230 #define VDUP 0xee800b10
231 #define VDUP_s 0xffb00c00
232 #define VEOR 0xff000110
233 #define VLD1 0xf9200000
234 #define VLD1_r 0xf9a00c00
235 #define VLD1_s 0xf9a00000
236 #define VLDR_F32 0xed100a00
237 #define VMOV_F32 0xeeb00a40
238 #define VMOV 0xee000a10
239 #define VMOV2 0xec400a10
240 #define VMOV_i 0xef800010
241 #define VMOV_s 0xee000b10
242 #define VMOVN 0xffb20200
243 #define VMRS 0xeef1fa10
244 #define VMUL_F32 0xee200a00
245 #define VNEG_F32 0xeeb10a40
246 #define VORR 0xef200110
247 #define VPOP 0xecbd0b00
248 #define VPUSH 0xed2d0b00
249 #define VSHLL 0xef800a10
250 #define VSHR 0xef800010
251 #define VSRA 0xef800110
252 #define VST1 0xf9000000
253 #define VST1_s 0xf9800000
254 #define VSTR_F32 0xed000a00
255 #define VSUB_F32 0xee300a40
256
257 #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
258
function_check_is_freg(struct sljit_compiler * compiler,sljit_s32 fr,sljit_s32 is_32)259 static sljit_s32 function_check_is_freg(struct sljit_compiler *compiler, sljit_s32 fr, sljit_s32 is_32)
260 {
261 if (compiler->scratches == -1)
262 return 0;
263
264 if (is_32 && fr >= SLJIT_F64_SECOND(SLJIT_FR0))
265 fr -= SLJIT_F64_SECOND(0);
266
267 return (fr >= SLJIT_FR0 && fr < (SLJIT_FR0 + compiler->fscratches))
268 || (fr > (SLJIT_FS0 - compiler->fsaveds) && fr <= SLJIT_FS0)
269 || (fr >= SLJIT_TMP_FREGISTER_BASE && fr < (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS));
270 }
271
272 #endif /* SLJIT_ARGUMENT_CHECKS */
273
push_inst16(struct sljit_compiler * compiler,sljit_ins inst)274 static sljit_s32 push_inst16(struct sljit_compiler *compiler, sljit_ins inst)
275 {
276 sljit_u16 *ptr;
277 SLJIT_ASSERT(!(inst & 0xffff0000));
278
279 ptr = (sljit_u16*)ensure_buf(compiler, sizeof(sljit_u16));
280 FAIL_IF(!ptr);
281 *ptr = (sljit_u16)(inst);
282 compiler->size++;
283 return SLJIT_SUCCESS;
284 }
285
push_inst32(struct sljit_compiler * compiler,sljit_ins inst)286 static sljit_s32 push_inst32(struct sljit_compiler *compiler, sljit_ins inst)
287 {
288 sljit_u16 *ptr = (sljit_u16*)ensure_buf(compiler, sizeof(sljit_ins));
289 FAIL_IF(!ptr);
290 *ptr++ = (sljit_u16)(inst >> 16);
291 *ptr = (sljit_u16)(inst);
292 compiler->size += 2;
293 return SLJIT_SUCCESS;
294 }
295
emit_imm32_const(struct sljit_compiler * compiler,sljit_s32 dst,sljit_uw imm)296 static sljit_s32 emit_imm32_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm)
297 {
298 FAIL_IF(push_inst32(compiler, MOVW | RD4(dst)
299 | COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff)));
300 return push_inst32(compiler, MOVT | RD4(dst)
301 | COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16));
302 }
303
304 /* Dst must be in bits[11-8] */
set_imm32_const(sljit_u16 * inst,sljit_ins dst,sljit_uw new_imm)305 static void set_imm32_const(sljit_u16 *inst, sljit_ins dst, sljit_uw new_imm)
306 {
307 inst[0] = (sljit_u16)((MOVW >> 16) | COPY_BITS(new_imm, 12, 0, 4) | COPY_BITS(new_imm, 11, 10, 1));
308 inst[1] = (sljit_u16)(dst | COPY_BITS(new_imm, 8, 12, 3) | (new_imm & 0xff));
309 inst[2] = (sljit_u16)((MOVT >> 16) | COPY_BITS(new_imm, 12 + 16, 0, 4) | COPY_BITS(new_imm, 11 + 16, 10, 1));
310 inst[3] = (sljit_u16)(dst | COPY_BITS(new_imm, 8 + 16, 12, 3) | ((new_imm & 0xff0000) >> 16));
311 }
312
modify_imm32_const(sljit_u16 * inst,sljit_uw new_imm)313 static SLJIT_INLINE void modify_imm32_const(sljit_u16 *inst, sljit_uw new_imm)
314 {
315 sljit_ins dst = inst[1] & 0x0f00;
316 SLJIT_ASSERT(((inst[0] & 0xfbf0) == (MOVW >> 16)) && ((inst[2] & 0xfbf0) == (MOVT >> 16)) && dst == (inst[3] & 0x0f00));
317 set_imm32_const(inst, dst, new_imm);
318 }
319
detect_jump_type(struct sljit_jump * jump,sljit_u16 * code_ptr,sljit_u16 * code,sljit_sw executable_offset)320 static SLJIT_INLINE sljit_u16* detect_jump_type(struct sljit_jump *jump, sljit_u16 *code_ptr, sljit_u16 *code, sljit_sw executable_offset)
321 {
322 sljit_sw diff;
323
324 if (jump->flags & SLJIT_REWRITABLE_JUMP)
325 goto exit;
326
327 if (jump->flags & JUMP_ADDR) {
328 /* Branch to ARM code is not optimized yet. */
329 if (!(jump->u.target & 0x1))
330 goto exit;
331 diff = (sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2) - executable_offset;
332 } else {
333 SLJIT_ASSERT(jump->u.label != NULL);
334 diff = (sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2);
335 }
336
337 if (jump->flags & IS_COND) {
338 SLJIT_ASSERT(!(jump->flags & IS_BL));
339 /* Size of the prefix IT instruction. */
340 diff += SSIZE_OF(u16);
341 if (diff <= 0xff && diff >= -0x100) {
342 jump->flags |= PATCH_TYPE1;
343 jump->addr = (sljit_uw)(code_ptr - 1);
344 return code_ptr - 1;
345 }
346 if (diff <= 0xfffff && diff >= -0x100000) {
347 jump->flags |= PATCH_TYPE2;
348 jump->addr = (sljit_uw)(code_ptr - 1);
349 return code_ptr;
350 }
351 diff -= SSIZE_OF(u16);
352 } else if (jump->flags & IS_BL) {
353 /* Branch and link. */
354 if (diff <= 0xffffff && diff >= -0x1000000) {
355 jump->flags |= PATCH_TYPE5;
356 return code_ptr + 1;
357 }
358 goto exit;
359 } else if (diff <= 0x7ff && diff >= -0x800) {
360 jump->flags |= PATCH_TYPE3;
361 return code_ptr;
362 }
363
364 if (diff <= 0xffffff && diff >= -0x1000000) {
365 jump->flags |= PATCH_TYPE4;
366 return code_ptr + 1;
367 }
368
369 exit:
370 code_ptr[4] = code_ptr[0];
371
372 if (jump->flags & IS_COND) {
373 code_ptr[3] = code_ptr[-1];
374 jump->addr = (sljit_uw)(code_ptr - 1);
375 }
376
377 return code_ptr + 4;
378 }
379
mov_addr_get_length(struct sljit_jump * jump,sljit_u16 * code_ptr,sljit_u16 * code,sljit_sw executable_offset)380 static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_u16 *code_ptr, sljit_u16 *code, sljit_sw executable_offset)
381 {
382 sljit_uw addr;
383 sljit_sw diff;
384 SLJIT_UNUSED_ARG(executable_offset);
385
386 if (jump->flags & JUMP_ADDR)
387 addr = jump->u.target;
388 else
389 addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset);
390
391 /* The pc+4 offset is represented by the 2 * SSIZE_OF(sljit_u16) below. */
392 diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
393
394 /* Note: ADR with imm8 does not set the last bit (Thumb2 flag). */
395
396 if (diff <= 0xffd + 2 * SSIZE_OF(u16) && diff >= -0xfff + 2 * SSIZE_OF(u16)) {
397 jump->flags |= PATCH_TYPE6;
398 return 1;
399 }
400
401 return 3;
402 }
403
generate_jump_or_mov_addr(struct sljit_jump * jump,sljit_sw executable_offset)404 static SLJIT_INLINE void generate_jump_or_mov_addr(struct sljit_jump *jump, sljit_sw executable_offset)
405 {
406 sljit_s32 type = (jump->flags >> 4) & 0xf;
407 sljit_u16 *jump_inst = (sljit_u16*)jump->addr;
408 sljit_sw diff;
409 sljit_ins ins;
410
411 diff = (sljit_sw)((jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr);
412
413 if (SLJIT_UNLIKELY(type == 0)) {
414 ins = (jump->flags & JUMP_MOV_ADDR) ? *jump_inst : RDN3(TMP_REG1);
415 set_imm32_const((sljit_u16*)jump->addr, ins, (sljit_uw)diff);
416 return;
417 }
418
419 if (SLJIT_UNLIKELY(type == 6)) {
420 SLJIT_ASSERT(jump->flags & JUMP_MOV_ADDR);
421 diff -= (sljit_sw)SLJIT_ADD_EXEC_OFFSET(jump_inst + 2, executable_offset) & ~(sljit_sw)0x3;
422
423 SLJIT_ASSERT(diff <= 0xfff && diff >= -0xfff);
424
425 ins = ADDWI >> 16;
426 if (diff <= 0) {
427 diff = -diff;
428 ins = SUBWI >> 16;
429 }
430
431 jump_inst[1] = (sljit_u16)(jump_inst[0] | COPY_BITS(diff, 8, 12, 3) | (diff & 0xff));
432 jump_inst[0] = (sljit_u16)(ins | 0xf | COPY_BITS(diff, 11, 10, 1));
433 return;
434 }
435
436 SLJIT_ASSERT((diff & 0x1) != 0 && !(jump->flags & JUMP_MOV_ADDR));
437 diff = (diff - (sljit_sw)(jump->addr + sizeof(sljit_u32)) - executable_offset) >> 1;
438
439 switch (type) {
440 case 1:
441 /* Encoding T1 of 'B' instruction */
442 SLJIT_ASSERT(diff <= 0x7f && diff >= -0x80 && (jump->flags & IS_COND));
443 jump_inst[0] = (sljit_u16)(0xd000 | (jump->flags & 0xf00) | ((sljit_ins)diff & 0xff));
444 return;
445 case 2:
446 /* Encoding T3 of 'B' instruction */
447 SLJIT_ASSERT(diff <= 0x7ffff && diff >= -0x80000 && (jump->flags & IS_COND));
448 jump_inst[0] = (sljit_u16)(0xf000 | COPY_BITS(jump->flags, 8, 6, 4) | COPY_BITS(diff, 11, 0, 6) | COPY_BITS(diff, 19, 10, 1));
449 jump_inst[1] = (sljit_u16)(0x8000 | COPY_BITS(diff, 17, 13, 1) | COPY_BITS(diff, 18, 11, 1) | ((sljit_ins)diff & 0x7ff));
450 return;
451 case 3:
452 /* Encoding T2 of 'B' instruction */
453 SLJIT_ASSERT(diff <= 0x3ff && diff >= -0x400 && !(jump->flags & IS_COND));
454 jump_inst[0] = (sljit_u16)(0xe000 | (diff & 0x7ff));
455 return;
456 }
457
458 SLJIT_ASSERT(diff <= 0x7fffff && diff >= -0x800000);
459
460 /* Really complex instruction form for branches. Negate with sign bit. */
461 diff ^= ((diff >> 2) & 0x600000) ^ 0x600000;
462
463 jump_inst[0] = (sljit_u16)(0xf000 | COPY_BITS(diff, 11, 0, 10) | COPY_BITS(diff, 23, 10, 1));
464 jump_inst[1] = (sljit_u16)((diff & 0x7ff) | COPY_BITS(diff, 22, 13, 1) | COPY_BITS(diff, 21, 11, 1));
465
466 SLJIT_ASSERT(type == 4 || type == 5);
467
468 /* The others have a common form. */
469 if (type == 4) /* Encoding T4 of 'B' instruction */
470 jump_inst[1] |= 0x9000;
471 else /* Encoding T1 of 'BL' instruction */
472 jump_inst[1] |= 0xd000;
473 }
474
reduce_code_size(struct sljit_compiler * compiler)475 static void reduce_code_size(struct sljit_compiler *compiler)
476 {
477 struct sljit_label *label;
478 struct sljit_jump *jump;
479 struct sljit_const *const_;
480 SLJIT_NEXT_DEFINE_TYPES;
481 sljit_uw total_size;
482 sljit_uw size_reduce = 0;
483 sljit_sw diff;
484
485 label = compiler->labels;
486 jump = compiler->jumps;
487 const_ = compiler->consts;
488 SLJIT_NEXT_INIT_TYPES();
489
490 while (1) {
491 SLJIT_GET_NEXT_MIN();
492
493 if (next_min_addr == SLJIT_MAX_ADDRESS)
494 break;
495
496 if (next_min_addr == next_label_size) {
497 label->size -= size_reduce;
498
499 label = label->next;
500 next_label_size = SLJIT_GET_NEXT_SIZE(label);
501 }
502
503 if (next_min_addr == next_const_addr) {
504 const_->addr -= size_reduce;
505 const_ = const_->next;
506 next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);
507 continue;
508 }
509
510 if (next_min_addr != next_jump_addr)
511 continue;
512
513 jump->addr -= size_reduce;
514 if (!(jump->flags & JUMP_MOV_ADDR)) {
515 total_size = JUMP_MAX_SIZE;
516
517 if (!(jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR))) {
518 /* Unit size: instruction. */
519 diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr - 2;
520
521 if (jump->flags & IS_COND) {
522 diff++;
523
524 if (diff <= (0xff / SSIZE_OF(u16)) && diff >= (-0x100 / SSIZE_OF(u16)))
525 total_size = 0;
526 else if (diff <= (0xfffff / SSIZE_OF(u16)) && diff >= (-0x100000 / SSIZE_OF(u16)))
527 total_size = 1;
528 diff--;
529 } else if (!(jump->flags & IS_BL) && diff <= (0x7ff / SSIZE_OF(u16)) && diff >= (-0x800 / SSIZE_OF(u16)))
530 total_size = 1;
531
532 if (total_size == JUMP_MAX_SIZE && diff <= (0xffffff / SSIZE_OF(u16)) && diff >= (-0x1000000 / SSIZE_OF(u16)))
533 total_size = 2;
534 }
535
536 size_reduce += JUMP_MAX_SIZE - total_size;
537 } else {
538 /* Real size minus 1. Unit size: instruction. */
539 total_size = 3;
540
541 if (!(jump->flags & JUMP_ADDR)) {
542 diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr;
543
544 if (diff <= (0xffd / SSIZE_OF(u16)) && diff >= (-0xfff / SSIZE_OF(u16)))
545 total_size = 1;
546 }
547
548 size_reduce += 3 - total_size;
549 }
550
551 jump->flags |= total_size << JUMP_SIZE_SHIFT;
552 jump = jump->next;
553 next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
554 }
555
556 compiler->size -= size_reduce;
557 }
558
sljit_generate_code(struct sljit_compiler * compiler,sljit_s32 options,void * exec_allocator_data)559 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data)
560 {
561 struct sljit_memory_fragment *buf;
562 sljit_u16 *code;
563 sljit_u16 *code_ptr;
564 sljit_u16 *buf_ptr;
565 sljit_u16 *buf_end;
566 sljit_uw half_count;
567 SLJIT_NEXT_DEFINE_TYPES;
568 sljit_sw addr;
569 sljit_sw executable_offset;
570
571 struct sljit_label *label;
572 struct sljit_jump *jump;
573 struct sljit_const *const_;
574
575 CHECK_ERROR_PTR();
576 CHECK_PTR(check_sljit_generate_code(compiler));
577
578 reduce_code_size(compiler);
579
580 code = (sljit_u16*)allocate_executable_memory(compiler->size * sizeof(sljit_u16), options, exec_allocator_data, &executable_offset);
581 PTR_FAIL_WITH_EXEC_IF(code);
582
583 reverse_buf(compiler);
584 buf = compiler->buf;
585
586 code_ptr = code;
587 half_count = 0;
588 label = compiler->labels;
589 jump = compiler->jumps;
590 const_ = compiler->consts;
591 SLJIT_NEXT_INIT_TYPES();
592 SLJIT_GET_NEXT_MIN();
593
594 do {
595 buf_ptr = (sljit_u16*)buf->memory;
596 buf_end = buf_ptr + (buf->used_size >> 1);
597 do {
598 *code_ptr = *buf_ptr++;
599 if (next_min_addr == half_count) {
600 SLJIT_ASSERT(!label || label->size >= half_count);
601 SLJIT_ASSERT(!jump || jump->addr >= half_count);
602 SLJIT_ASSERT(!const_ || const_->addr >= half_count);
603
604 /* These structures are ordered by their address. */
605 if (next_min_addr == next_label_size) {
606 label->u.addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1;
607 label->size = (sljit_uw)(code_ptr - code);
608 label = label->next;
609 next_label_size = SLJIT_GET_NEXT_SIZE(label);
610 }
611
612 if (next_min_addr == next_jump_addr) {
613 if (!(jump->flags & JUMP_MOV_ADDR)) {
614 half_count = half_count - 1 + (jump->flags >> JUMP_SIZE_SHIFT);
615 jump->addr = (sljit_uw)code_ptr;
616 code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset);
617 SLJIT_ASSERT((sljit_uw)code_ptr - jump->addr <
618 ((jump->flags >> JUMP_SIZE_SHIFT) + ((jump->flags & 0xf0) <= PATCH_TYPE2)) * sizeof(sljit_u16));
619 } else {
620 half_count += jump->flags >> JUMP_SIZE_SHIFT;
621 addr = (sljit_sw)code_ptr;
622 code_ptr += mov_addr_get_length(jump, code_ptr, code, executable_offset);
623 jump->addr = (sljit_uw)addr;
624 }
625
626 jump = jump->next;
627 next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
628 } else if (next_min_addr == next_const_addr) {
629 const_->addr = (sljit_uw)code_ptr;
630 const_ = const_->next;
631 next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);
632 }
633
634 SLJIT_GET_NEXT_MIN();
635 }
636 code_ptr++;
637 half_count++;
638 } while (buf_ptr < buf_end);
639
640 buf = buf->next;
641 } while (buf);
642
643 if (label && label->size == half_count) {
644 label->u.addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1;
645 label->size = (sljit_uw)(code_ptr - code);
646 label = label->next;
647 }
648
649 SLJIT_ASSERT(!label);
650 SLJIT_ASSERT(!jump);
651 SLJIT_ASSERT(!const_);
652 SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
653
654 jump = compiler->jumps;
655 while (jump) {
656 generate_jump_or_mov_addr(jump, executable_offset);
657 jump = jump->next;
658 }
659
660 compiler->error = SLJIT_ERR_COMPILED;
661 compiler->executable_offset = executable_offset;
662 compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_u16);
663
664 code = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
665 code_ptr = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
666
667 SLJIT_CACHE_FLUSH(code, code_ptr);
668 SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
669
670 /* Set thumb mode flag. */
671 return (void*)((sljit_uw)code | 0x1);
672 }
673
sljit_has_cpu_feature(sljit_s32 feature_type)674 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
675 {
676 switch (feature_type) {
677 case SLJIT_HAS_FPU:
678 case SLJIT_HAS_F64_AS_F32_PAIR:
679 case SLJIT_HAS_SIMD:
680 #ifdef SLJIT_IS_FPU_AVAILABLE
681 return (SLJIT_IS_FPU_AVAILABLE) != 0;
682 #else
683 /* Available by default. */
684 return 1;
685 #endif
686
687 case SLJIT_SIMD_REGS_ARE_PAIRS:
688 case SLJIT_HAS_CLZ:
689 case SLJIT_HAS_CTZ:
690 case SLJIT_HAS_REV:
691 case SLJIT_HAS_ROT:
692 case SLJIT_HAS_CMOV:
693 case SLJIT_HAS_PREFETCH:
694 case SLJIT_HAS_COPY_F32:
695 case SLJIT_HAS_COPY_F64:
696 case SLJIT_HAS_ATOMIC:
697 return 1;
698
699 default:
700 return 0;
701 }
702 }
703
704 /* --------------------------------------------------------------------- */
705 /* Core code generator functions. */
706 /* --------------------------------------------------------------------- */
707
708 #define INVALID_IMM 0x80000000
get_imm(sljit_uw imm)709 static sljit_uw get_imm(sljit_uw imm)
710 {
711 /* Thumb immediate form. */
712 sljit_s32 counter;
713
714 if (imm <= 0xff)
715 return imm;
716
717 if ((imm & 0xffff) == (imm >> 16)) {
718 /* Some special cases. */
719 if (!(imm & 0xff00))
720 return (1 << 12) | (imm & 0xff);
721 if (!(imm & 0xff))
722 return (2 << 12) | ((imm >> 8) & 0xff);
723 if ((imm & 0xff00) == ((imm & 0xff) << 8))
724 return (3 << 12) | (imm & 0xff);
725 }
726
727 /* Assembly optimization: count leading zeroes? */
728 counter = 8;
729 if (!(imm & 0xffff0000)) {
730 counter += 16;
731 imm <<= 16;
732 }
733 if (!(imm & 0xff000000)) {
734 counter += 8;
735 imm <<= 8;
736 }
737 if (!(imm & 0xf0000000)) {
738 counter += 4;
739 imm <<= 4;
740 }
741 if (!(imm & 0xc0000000)) {
742 counter += 2;
743 imm <<= 2;
744 }
745 if (!(imm & 0x80000000)) {
746 counter += 1;
747 imm <<= 1;
748 }
749 /* Since imm >= 128, this must be true. */
750 SLJIT_ASSERT(counter <= 31);
751
752 if (imm & 0x00ffffff)
753 return INVALID_IMM; /* Cannot be encoded. */
754
755 return ((imm >> 24) & 0x7f) | COPY_BITS(counter, 4, 26, 1) | COPY_BITS(counter, 1, 12, 3) | COPY_BITS(counter, 0, 7, 1);
756 }
757
load_immediate(struct sljit_compiler * compiler,sljit_s32 dst,sljit_uw imm)758 static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm)
759 {
760 sljit_uw tmp;
761
762 /* MOVS cannot be used since it destroy flags. */
763
764 if (imm >= 0x10000) {
765 tmp = get_imm(imm);
766 if (tmp != INVALID_IMM)
767 return push_inst32(compiler, MOV_WI | RD4(dst) | tmp);
768 tmp = get_imm(~imm);
769 if (tmp != INVALID_IMM)
770 return push_inst32(compiler, MVN_WI | RD4(dst) | tmp);
771 }
772
773 /* set low 16 bits, set hi 16 bits to 0. */
774 FAIL_IF(push_inst32(compiler, MOVW | RD4(dst)
775 | COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff)));
776
777 /* set hi 16 bit if needed. */
778 if (imm >= 0x10000)
779 return push_inst32(compiler, MOVT | RD4(dst)
780 | COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16));
781 return SLJIT_SUCCESS;
782 }
783
784 #define ARG1_IMM 0x0010000
785 #define ARG2_IMM 0x0020000
786 /* SET_FLAGS must be 0x100000 as it is also the value of S bit (can be used for optimization). */
787 #define SET_FLAGS 0x0100000
788 #define UNUSED_RETURN 0x0200000
789 #define REGISTER_OP 0x0400000
790
emit_op_imm(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 dst,sljit_uw arg1,sljit_uw arg2)791 static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 dst, sljit_uw arg1, sljit_uw arg2)
792 {
793 /* dst must be register
794 arg1 must be register, imm
795 arg2 must be register, imm */
796 sljit_s32 reg;
797 sljit_uw imm, imm2;
798
799 if (SLJIT_UNLIKELY((flags & (ARG1_IMM | ARG2_IMM)) == (ARG1_IMM | ARG2_IMM))) {
800 /* Both are immediates, no temporaries are used. */
801 flags &= ~ARG1_IMM;
802 FAIL_IF(load_immediate(compiler, TMP_REG1, arg1));
803 arg1 = TMP_REG1;
804 }
805
806 if (flags & (ARG1_IMM | ARG2_IMM)) {
807 reg = (sljit_s32)((flags & ARG2_IMM) ? arg1 : arg2);
808 imm = (flags & ARG2_IMM) ? arg2 : arg1;
809
810 switch (flags & 0xffff) {
811 case SLJIT_CLZ:
812 case SLJIT_CTZ:
813 case SLJIT_REV:
814 case SLJIT_REV_U16:
815 case SLJIT_REV_S16:
816 case SLJIT_REV_U32:
817 case SLJIT_REV_S32:
818 case SLJIT_MUL:
819 case SLJIT_MULADD:
820 /* No form with immediate operand. */
821 break;
822 case SLJIT_MOV:
823 SLJIT_ASSERT(!(flags & SET_FLAGS) && (flags & ARG2_IMM) && arg1 == TMP_REG2);
824 return load_immediate(compiler, dst, imm);
825 case SLJIT_ADD:
826 compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
827 imm2 = NEGATE(imm);
828 if (IS_2_LO_REGS(reg, dst)) {
829 if (imm <= 0x7)
830 return push_inst16(compiler, ADDSI3 | IMM3(imm) | RD3(dst) | RN3(reg));
831 if (imm2 <= 0x7)
832 return push_inst16(compiler, SUBSI3 | IMM3(imm2) | RD3(dst) | RN3(reg));
833 if (reg == dst) {
834 if (imm <= 0xff)
835 return push_inst16(compiler, ADDSI8 | IMM8(imm) | RDN3(dst));
836 if (imm2 <= 0xff)
837 return push_inst16(compiler, SUBSI8 | IMM8(imm2) | RDN3(dst));
838 }
839 }
840 if (!(flags & SET_FLAGS)) {
841 if (imm <= 0xfff)
842 return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(imm));
843 if (imm2 <= 0xfff)
844 return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(imm2));
845 }
846 imm2 = get_imm(imm);
847 if (imm2 != INVALID_IMM)
848 return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2);
849 imm = get_imm(NEGATE(imm));
850 if (imm != INVALID_IMM)
851 return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
852 break;
853 case SLJIT_ADDC:
854 compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
855 imm2 = get_imm(imm);
856 if (imm2 != INVALID_IMM)
857 return push_inst32(compiler, ADCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2);
858 if (flags & ARG2_IMM) {
859 imm = get_imm(~imm);
860 if (imm != INVALID_IMM)
861 return push_inst32(compiler, SBCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
862 }
863 break;
864 case SLJIT_SUB:
865 compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
866 if (flags & ARG1_IMM) {
867 if (imm == 0 && IS_2_LO_REGS(reg, dst))
868 return push_inst16(compiler, RSBSI | RD3(dst) | RN3(reg));
869 imm = get_imm(imm);
870 if (imm != INVALID_IMM)
871 return push_inst32(compiler, RSB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
872 break;
873 }
874 if (flags & UNUSED_RETURN) {
875 if (imm <= 0xff && reg_map[reg] <= 7)
876 return push_inst16(compiler, CMPI | IMM8(imm) | RDN3(reg));
877 imm2 = get_imm(imm);
878 if (imm2 != INVALID_IMM)
879 return push_inst32(compiler, CMPI_W | RN4(reg) | imm2);
880 imm = get_imm(NEGATE(imm));
881 if (imm != INVALID_IMM)
882 return push_inst32(compiler, CMNI_W | RN4(reg) | imm);
883 break;
884 }
885 imm2 = NEGATE(imm);
886 if (IS_2_LO_REGS(reg, dst)) {
887 if (imm <= 0x7)
888 return push_inst16(compiler, SUBSI3 | IMM3(imm) | RD3(dst) | RN3(reg));
889 if (imm2 <= 0x7)
890 return push_inst16(compiler, ADDSI3 | IMM3(imm2) | RD3(dst) | RN3(reg));
891 if (reg == dst) {
892 if (imm <= 0xff)
893 return push_inst16(compiler, SUBSI8 | IMM8(imm) | RDN3(dst));
894 if (imm2 <= 0xff)
895 return push_inst16(compiler, ADDSI8 | IMM8(imm2) | RDN3(dst));
896 }
897 }
898 if (!(flags & SET_FLAGS)) {
899 if (imm <= 0xfff)
900 return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(imm));
901 if (imm2 <= 0xfff)
902 return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(imm2));
903 }
904 imm2 = get_imm(imm);
905 if (imm2 != INVALID_IMM)
906 return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2);
907 imm = get_imm(NEGATE(imm));
908 if (imm != INVALID_IMM)
909 return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
910 break;
911 case SLJIT_SUBC:
912 compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
913 if (flags & ARG1_IMM)
914 break;
915 imm2 = get_imm(imm);
916 if (imm2 != INVALID_IMM)
917 return push_inst32(compiler, SBCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2);
918 imm = get_imm(~imm);
919 if (imm != INVALID_IMM)
920 return push_inst32(compiler, ADCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
921 break;
922 case SLJIT_AND:
923 imm2 = get_imm(imm);
924 if (imm2 != INVALID_IMM)
925 return push_inst32(compiler, ((flags & UNUSED_RETURN) ? TSTI : ANDI) | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2);
926 imm = get_imm(~imm);
927 if (imm != INVALID_IMM)
928 return push_inst32(compiler, BICI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
929 break;
930 case SLJIT_OR:
931 imm2 = get_imm(imm);
932 if (imm2 != INVALID_IMM)
933 return push_inst32(compiler, ORRI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2);
934 imm = get_imm(~imm);
935 if (imm != INVALID_IMM)
936 return push_inst32(compiler, ORNI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
937 break;
938 case SLJIT_XOR:
939 if (imm == (sljit_uw)-1) {
940 if (IS_2_LO_REGS(dst, reg))
941 return push_inst16(compiler, MVNS | RD3(dst) | RN3(reg));
942 return push_inst32(compiler, MVN_W | (flags & SET_FLAGS) | RD4(dst) | RM4(reg));
943 }
944 imm = get_imm(imm);
945 if (imm != INVALID_IMM)
946 return push_inst32(compiler, EORI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
947 break;
948 case SLJIT_SHL:
949 case SLJIT_MSHL:
950 case SLJIT_LSHR:
951 case SLJIT_MLSHR:
952 case SLJIT_ASHR:
953 case SLJIT_MASHR:
954 case SLJIT_ROTL:
955 case SLJIT_ROTR:
956 if (flags & ARG1_IMM)
957 break;
958 imm &= 0x1f;
959
960 if (imm == 0) {
961 if (!(flags & SET_FLAGS))
962 return push_inst16(compiler, MOV | SET_REGS44(dst, reg));
963 if (IS_2_LO_REGS(dst, reg))
964 return push_inst16(compiler, MOVS | RD3(dst) | RN3(reg));
965 return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(dst) | RM4(reg));
966 }
967
968 switch (flags & 0xffff) {
969 case SLJIT_SHL:
970 case SLJIT_MSHL:
971 if (IS_2_LO_REGS(dst, reg))
972 return push_inst16(compiler, LSLSI | RD3(dst) | RN3(reg) | (imm << 6));
973 return push_inst32(compiler, LSL_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm));
974 case SLJIT_LSHR:
975 case SLJIT_MLSHR:
976 if (IS_2_LO_REGS(dst, reg))
977 return push_inst16(compiler, LSRSI | RD3(dst) | RN3(reg) | (imm << 6));
978 return push_inst32(compiler, LSR_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm));
979 case SLJIT_ASHR:
980 case SLJIT_MASHR:
981 if (IS_2_LO_REGS(dst, reg))
982 return push_inst16(compiler, ASRSI | RD3(dst) | RN3(reg) | (imm << 6));
983 return push_inst32(compiler, ASR_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm));
984 case SLJIT_ROTL:
985 imm = (imm ^ 0x1f) + 1;
986 /* fallthrough */
987 default: /* SLJIT_ROTR */
988 return push_inst32(compiler, ROR_WI | RD4(dst) | RM4(reg) | IMM5(imm));
989 }
990 default:
991 SLJIT_UNREACHABLE();
992 break;
993 }
994
995 if (flags & ARG2_IMM) {
996 imm = arg2;
997 arg2 = (arg1 == TMP_REG1) ? TMP_REG2 : TMP_REG1;
998 FAIL_IF(load_immediate(compiler, (sljit_s32)arg2, imm));
999 } else {
1000 imm = arg1;
1001 arg1 = (arg2 == TMP_REG1) ? TMP_REG2 : TMP_REG1;
1002 FAIL_IF(load_immediate(compiler, (sljit_s32)arg1, imm));
1003 }
1004
1005 SLJIT_ASSERT(arg1 != arg2);
1006 }
1007
1008 /* Both arguments are registers. */
1009 switch (flags & 0xffff) {
1010 case SLJIT_MOV:
1011 case SLJIT_MOV_U32:
1012 case SLJIT_MOV_S32:
1013 case SLJIT_MOV32:
1014 case SLJIT_MOV_P:
1015 SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
1016 if (dst == (sljit_s32)arg2)
1017 return SLJIT_SUCCESS;
1018 return push_inst16(compiler, MOV | SET_REGS44(dst, arg2));
1019 case SLJIT_MOV_U8:
1020 SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
1021 if (IS_2_LO_REGS(dst, arg2))
1022 return push_inst16(compiler, UXTB | RD3(dst) | RN3(arg2));
1023 return push_inst32(compiler, UXTB_W | RD4(dst) | RM4(arg2));
1024 case SLJIT_MOV_S8:
1025 SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
1026 if (IS_2_LO_REGS(dst, arg2))
1027 return push_inst16(compiler, SXTB | RD3(dst) | RN3(arg2));
1028 return push_inst32(compiler, SXTB_W | RD4(dst) | RM4(arg2));
1029 case SLJIT_MOV_U16:
1030 SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
1031 if (IS_2_LO_REGS(dst, arg2))
1032 return push_inst16(compiler, UXTH | RD3(dst) | RN3(arg2));
1033 return push_inst32(compiler, UXTH_W | RD4(dst) | RM4(arg2));
1034 case SLJIT_MOV_S16:
1035 SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
1036 if (IS_2_LO_REGS(dst, arg2))
1037 return push_inst16(compiler, SXTH | RD3(dst) | RN3(arg2));
1038 return push_inst32(compiler, SXTH_W | RD4(dst) | RM4(arg2));
1039 case SLJIT_CLZ:
1040 SLJIT_ASSERT(arg1 == TMP_REG2);
1041 return push_inst32(compiler, CLZ | RN4(arg2) | RD4(dst) | RM4(arg2));
1042 case SLJIT_CTZ:
1043 SLJIT_ASSERT(arg1 == TMP_REG2);
1044 FAIL_IF(push_inst32(compiler, RBIT | RN4(arg2) | RD4(dst) | RM4(arg2)));
1045 return push_inst32(compiler, CLZ | RN4(dst) | RD4(dst) | RM4(dst));
1046 case SLJIT_REV:
1047 case SLJIT_REV_U32:
1048 case SLJIT_REV_S32:
1049 SLJIT_ASSERT(arg1 == TMP_REG2);
1050 if (IS_2_LO_REGS(dst, arg2))
1051 return push_inst16(compiler, REV | RD3(dst) | RN3(arg2));
1052 return push_inst32(compiler, REV_W | RN4(arg2) | RD4(dst) | RM4(arg2));
1053 case SLJIT_REV_U16:
1054 case SLJIT_REV_S16:
1055 SLJIT_ASSERT(arg1 == TMP_REG2);
1056
1057 if (IS_2_LO_REGS(dst, arg2))
1058 FAIL_IF(push_inst16(compiler, REV16 | RD3(dst) | RN3(arg2)));
1059 else
1060 FAIL_IF(push_inst32(compiler, REV16_W | RN4(arg2) | RD4(dst) | RM4(arg2)));
1061
1062 if (!(flags & REGISTER_OP))
1063 return SLJIT_SUCCESS;
1064
1065 flags &= 0xffff;
1066 if (reg_map[dst] <= 7)
1067 return push_inst16(compiler, (flags == SLJIT_REV_U16 ? UXTH : SXTH) | RD3(dst) | RN3(dst));
1068 return push_inst32(compiler, (flags == SLJIT_REV_U16 ? UXTH_W : SXTH_W) | RD4(dst) | RM4(dst));
1069 case SLJIT_ADD:
1070 compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
1071 if (IS_3_LO_REGS(dst, arg1, arg2))
1072 return push_inst16(compiler, ADDS | RD3(dst) | RN3(arg1) | RM3(arg2));
1073 if (dst == (sljit_s32)arg1 && !(flags & SET_FLAGS))
1074 return push_inst16(compiler, ADD | SET_REGS44(dst, arg2));
1075 return push_inst32(compiler, ADD_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1076 case SLJIT_ADDC:
1077 compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
1078 if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
1079 return push_inst16(compiler, ADCS | RD3(dst) | RN3(arg2));
1080 return push_inst32(compiler, ADC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1081 case SLJIT_SUB:
1082 compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
1083 if (flags & UNUSED_RETURN) {
1084 if (IS_2_LO_REGS(arg1, arg2))
1085 return push_inst16(compiler, CMP | RD3(arg1) | RN3(arg2));
1086 return push_inst16(compiler, CMP_X | SET_REGS44(arg1, arg2));
1087 }
1088 if (IS_3_LO_REGS(dst, arg1, arg2))
1089 return push_inst16(compiler, SUBS | RD3(dst) | RN3(arg1) | RM3(arg2));
1090 return push_inst32(compiler, SUB_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1091 case SLJIT_SUBC:
1092 compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
1093 if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
1094 return push_inst16(compiler, SBCS | RD3(dst) | RN3(arg2));
1095 return push_inst32(compiler, SBC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1096 case SLJIT_MUL:
1097 compiler->status_flags_state = 0;
1098 if (!(flags & SET_FLAGS))
1099 return push_inst32(compiler, MUL | RD4(dst) | RN4(arg1) | RM4(arg2));
1100 reg = (dst == TMP_REG2) ? TMP_REG1 : TMP_REG2;
1101 FAIL_IF(push_inst32(compiler, SMULL | RT4(dst) | RD4(reg) | RN4(arg1) | RM4(arg2)));
1102 /* cmp TMP_REG2, dst asr #31. */
1103 return push_inst32(compiler, CMP_W | RN4(reg) | 0x70e0 | RM4(dst));
1104 case SLJIT_AND:
1105 if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
1106 return push_inst16(compiler, ANDS | RD3(dst) | RN3(arg2));
1107 if ((flags & UNUSED_RETURN) && IS_2_LO_REGS(arg1, arg2))
1108 return push_inst16(compiler, TST | RD3(arg1) | RN3(arg2));
1109 return push_inst32(compiler, ((flags & UNUSED_RETURN) ? TST_W : AND_W) | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1110 case SLJIT_OR:
1111 if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
1112 return push_inst16(compiler, ORRS | RD3(dst) | RN3(arg2));
1113 return push_inst32(compiler, ORR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1114 case SLJIT_XOR:
1115 if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
1116 return push_inst16(compiler, EORS | RD3(dst) | RN3(arg2));
1117 return push_inst32(compiler, EOR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1118 case SLJIT_MSHL:
1119 reg = (arg2 == TMP_REG1) ? TMP_REG1 : TMP_REG2;
1120 FAIL_IF(push_inst32(compiler, ANDI | RD4(reg) | RN4(arg2) | 0x1f));
1121 arg2 = (sljit_uw)reg;
1122 /* fallthrough */
1123 case SLJIT_SHL:
1124 if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
1125 return push_inst16(compiler, LSLS | RD3(dst) | RN3(arg2));
1126 return push_inst32(compiler, LSL_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1127 case SLJIT_MLSHR:
1128 reg = (arg2 == TMP_REG1) ? TMP_REG1 : TMP_REG2;
1129 FAIL_IF(push_inst32(compiler, ANDI | RD4(reg) | RN4(arg2) | 0x1f));
1130 arg2 = (sljit_uw)reg;
1131 /* fallthrough */
1132 case SLJIT_LSHR:
1133 if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
1134 return push_inst16(compiler, LSRS | RD3(dst) | RN3(arg2));
1135 return push_inst32(compiler, LSR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1136 case SLJIT_MASHR:
1137 reg = (arg2 == TMP_REG1) ? TMP_REG1 : TMP_REG2;
1138 FAIL_IF(push_inst32(compiler, ANDI | RD4(reg) | RN4(arg2) | 0x1f));
1139 arg2 = (sljit_uw)reg;
1140 /* fallthrough */
1141 case SLJIT_ASHR:
1142 if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
1143 return push_inst16(compiler, ASRS | RD3(dst) | RN3(arg2));
1144 return push_inst32(compiler, ASR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1145 case SLJIT_ROTL:
1146 reg = (arg2 == TMP_REG1) ? TMP_REG1 : TMP_REG2;
1147 FAIL_IF(push_inst32(compiler, RSB_WI | RD4(reg) | RN4(arg2) | 0));
1148 arg2 = (sljit_uw)reg;
1149 /* fallthrough */
1150 case SLJIT_ROTR:
1151 if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
1152 return push_inst16(compiler, RORS | RD3(dst) | RN3(arg2));
1153 return push_inst32(compiler, ROR_W | RD4(dst) | RN4(arg1) | RM4(arg2));
1154 case SLJIT_MULADD:
1155 compiler->status_flags_state = 0;
1156 return push_inst32(compiler, MLA | RD4(dst) | RN4(arg1) | RM4(arg2) | RT4(dst));
1157 }
1158
1159 SLJIT_UNREACHABLE();
1160 return SLJIT_SUCCESS;
1161 }
1162
1163 #define STORE 0x01
1164 #define SIGNED 0x02
1165
1166 #define WORD_SIZE 0x00
1167 #define BYTE_SIZE 0x04
1168 #define HALF_SIZE 0x08
1169 #define PRELOAD 0x0c
1170
1171 #define IS_WORD_SIZE(flags) (!((flags) & (BYTE_SIZE | HALF_SIZE)))
1172 #define ALIGN_CHECK(argw, imm, shift) (!((argw) & ~((imm) << (shift))))
1173
1174 /*
1175 1st letter:
1176 w = word
1177 b = byte
1178 h = half
1179
1180 2nd letter:
1181 s = signed
1182 u = unsigned
1183
1184 3rd letter:
1185 l = load
1186 s = store
1187 */
1188
1189 static const sljit_ins sljit_mem16[12] = {
1190 /* w u l */ 0x5800 /* ldr */,
1191 /* w u s */ 0x5000 /* str */,
1192 /* w s l */ 0x5800 /* ldr */,
1193 /* w s s */ 0x5000 /* str */,
1194
1195 /* b u l */ 0x5c00 /* ldrb */,
1196 /* b u s */ 0x5400 /* strb */,
1197 /* b s l */ 0x5600 /* ldrsb */,
1198 /* b s s */ 0x5400 /* strb */,
1199
1200 /* h u l */ 0x5a00 /* ldrh */,
1201 /* h u s */ 0x5200 /* strh */,
1202 /* h s l */ 0x5e00 /* ldrsh */,
1203 /* h s s */ 0x5200 /* strh */,
1204 };
1205
1206 static const sljit_ins sljit_mem16_imm5[12] = {
1207 /* w u l */ 0x6800 /* ldr imm5 */,
1208 /* w u s */ 0x6000 /* str imm5 */,
1209 /* w s l */ 0x6800 /* ldr imm5 */,
1210 /* w s s */ 0x6000 /* str imm5 */,
1211
1212 /* b u l */ 0x7800 /* ldrb imm5 */,
1213 /* b u s */ 0x7000 /* strb imm5 */,
1214 /* b s l */ 0x0000 /* not allowed */,
1215 /* b s s */ 0x7000 /* strb imm5 */,
1216
1217 /* h u l */ 0x8800 /* ldrh imm5 */,
1218 /* h u s */ 0x8000 /* strh imm5 */,
1219 /* h s l */ 0x0000 /* not allowed */,
1220 /* h s s */ 0x8000 /* strh imm5 */,
1221 };
1222
1223 #define MEM_IMM8 0xc00
1224 #define MEM_IMM12 0x800000
1225 static const sljit_ins sljit_mem32[13] = {
1226 /* w u l */ 0xf8500000 /* ldr.w */,
1227 /* w u s */ 0xf8400000 /* str.w */,
1228 /* w s l */ 0xf8500000 /* ldr.w */,
1229 /* w s s */ 0xf8400000 /* str.w */,
1230
1231 /* b u l */ 0xf8100000 /* ldrb.w */,
1232 /* b u s */ 0xf8000000 /* strb.w */,
1233 /* b s l */ 0xf9100000 /* ldrsb.w */,
1234 /* b s s */ 0xf8000000 /* strb.w */,
1235
1236 /* h u l */ 0xf8300000 /* ldrh.w */,
1237 /* h u s */ 0xf8200000 /* strsh.w */,
1238 /* h s l */ 0xf9300000 /* ldrsh.w */,
1239 /* h s s */ 0xf8200000 /* strsh.w */,
1240
1241 /* p u l */ 0xf8100000 /* pld */,
1242 };
1243
1244 /* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */
emit_set_delta(struct sljit_compiler * compiler,sljit_s32 dst,sljit_s32 reg,sljit_sw value)1245 static sljit_s32 emit_set_delta(struct sljit_compiler *compiler, sljit_s32 dst, sljit_s32 reg, sljit_sw value)
1246 {
1247 sljit_uw imm;
1248
1249 if (value >= 0) {
1250 if (value <= 0xfff)
1251 return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(value));
1252 imm = get_imm((sljit_uw)value);
1253 if (imm != INVALID_IMM)
1254 return push_inst32(compiler, ADD_WI | RD4(dst) | RN4(reg) | imm);
1255 }
1256 else {
1257 value = -value;
1258 if (value <= 0xfff)
1259 return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(value));
1260 imm = get_imm((sljit_uw)value);
1261 if (imm != INVALID_IMM)
1262 return push_inst32(compiler, SUB_WI | RD4(dst) | RN4(reg) | imm);
1263 }
1264 return SLJIT_ERR_UNSUPPORTED;
1265 }
1266
emit_op_mem(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw,sljit_s32 tmp_reg)1267 static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg,
1268 sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg)
1269 {
1270 sljit_s32 other_r;
1271 sljit_uw imm, tmp;
1272
1273 SLJIT_ASSERT(arg & SLJIT_MEM);
1274 SLJIT_ASSERT((arg & REG_MASK) != tmp_reg || (arg == SLJIT_MEM1(tmp_reg) && argw >= -0xff && argw <= 0xfff));
1275
1276 if (SLJIT_UNLIKELY(!(arg & REG_MASK))) {
1277 imm = get_imm((sljit_uw)argw & ~(sljit_uw)0xfff);
1278 if (imm != INVALID_IMM) {
1279 FAIL_IF(push_inst32(compiler, MOV_WI | RD4(tmp_reg) | imm));
1280 return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(tmp_reg) | (argw & 0xfff));
1281 }
1282
1283 FAIL_IF(load_immediate(compiler, tmp_reg, (sljit_uw)argw));
1284 if (IS_2_LO_REGS(reg, tmp_reg) && sljit_mem16_imm5[flags])
1285 return push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(tmp_reg));
1286 return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(tmp_reg));
1287 }
1288
1289 if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
1290 argw &= 0x3;
1291 other_r = OFFS_REG(arg);
1292 arg &= REG_MASK;
1293
1294 if (!argw && IS_3_LO_REGS(reg, arg, other_r))
1295 return push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(other_r));
1296 return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(other_r) | ((sljit_ins)argw << 4));
1297 }
1298
1299 arg &= REG_MASK;
1300
1301 if (argw > 0xfff) {
1302 imm = get_imm((sljit_uw)(argw & ~0xfff));
1303 if (imm != INVALID_IMM) {
1304 push_inst32(compiler, ADD_WI | RD4(tmp_reg) | RN4(arg) | imm);
1305 arg = tmp_reg;
1306 argw = argw & 0xfff;
1307 }
1308 }
1309 else if (argw < -0xff) {
1310 tmp = (sljit_uw)((-argw + 0xfff) & ~0xfff);
1311 SLJIT_ASSERT(tmp >= (sljit_uw)-argw);
1312 imm = get_imm(tmp);
1313
1314 if (imm != INVALID_IMM) {
1315 push_inst32(compiler, SUB_WI | RD4(tmp_reg) | RN4(arg) | imm);
1316 arg = tmp_reg;
1317 argw += (sljit_sw)tmp;
1318
1319 SLJIT_ASSERT(argw >= 0 && argw <= 0xfff);
1320 }
1321 }
1322
1323 /* 16 bit instruction forms. */
1324 if (IS_2_LO_REGS(reg, arg) && sljit_mem16_imm5[flags]) {
1325 tmp = 3;
1326 if (IS_WORD_SIZE(flags)) {
1327 if (ALIGN_CHECK(argw, 0x1f, 2))
1328 tmp = 2;
1329 }
1330 else if (flags & BYTE_SIZE)
1331 {
1332 if (ALIGN_CHECK(argw, 0x1f, 0))
1333 tmp = 0;
1334 }
1335 else {
1336 SLJIT_ASSERT(flags & HALF_SIZE);
1337 if (ALIGN_CHECK(argw, 0x1f, 1))
1338 tmp = 1;
1339 }
1340
1341 if (tmp < 3)
1342 return push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(arg) | ((sljit_ins)argw << (6 - tmp)));
1343 }
1344 else if (SLJIT_UNLIKELY(arg == SLJIT_SP) && IS_WORD_SIZE(flags) && ALIGN_CHECK(argw, 0xff, 2) && reg_map[reg] <= 7) {
1345 /* SP based immediate. */
1346 return push_inst16(compiler, STR_SP | (sljit_ins)((flags & STORE) ? 0 : 0x800) | RDN3(reg) | ((sljit_ins)argw >> 2));
1347 }
1348
1349 if (argw >= 0 && argw <= 0xfff)
1350 return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(arg) | (sljit_ins)argw);
1351 else if (argw < 0 && argw >= -0xff)
1352 return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(arg) | (sljit_ins)-argw);
1353
1354 SLJIT_ASSERT(arg != tmp_reg);
1355
1356 FAIL_IF(load_immediate(compiler, tmp_reg, (sljit_uw)argw));
1357 if (IS_3_LO_REGS(reg, arg, tmp_reg))
1358 return push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(tmp_reg));
1359 return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(tmp_reg));
1360 }
1361
1362 #undef ALIGN_CHECK
1363 #undef IS_WORD_SIZE
1364
1365 /* --------------------------------------------------------------------- */
1366 /* Entry, exit */
1367 /* --------------------------------------------------------------------- */
1368
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1369 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
1370 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1371 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1372 {
1373 sljit_s32 size, i, tmp, word_arg_count;
1374 sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
1375 sljit_uw offset;
1376 sljit_uw imm = 0;
1377 #ifdef __SOFTFP__
1378 sljit_u32 float_arg_count;
1379 #else
1380 sljit_u32 old_offset, f32_offset;
1381 sljit_u32 remap[3];
1382 sljit_u32 *remap_ptr = remap;
1383 #endif
1384
1385 CHECK_ERROR();
1386 CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1387 set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1388
1389 tmp = SLJIT_S0 - saveds;
1390 for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--)
1391 imm |= (sljit_uw)1 << reg_map[i];
1392
1393 for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--)
1394 imm |= (sljit_uw)1 << reg_map[i];
1395
1396 /* At least two registers must be set for PUSH_W and one for PUSH instruction. */
1397 FAIL_IF((imm & 0xff00)
1398 ? push_inst32(compiler, PUSH_W | (1 << 14) | imm)
1399 : push_inst16(compiler, PUSH | (1 << 8) | imm));
1400
1401 /* Stack must be aligned to 8 bytes: (LR, R4) */
1402 size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1);
1403
1404 if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
1405 if ((size & SSIZE_OF(sw)) != 0) {
1406 FAIL_IF(push_inst16(compiler, SUB_SP_I | (sizeof(sljit_sw) >> 2)));
1407 size += SSIZE_OF(sw);
1408 }
1409
1410 if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) {
1411 FAIL_IF(push_inst32(compiler, VPUSH | VD4(SLJIT_FS0) | ((sljit_uw)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1)));
1412 } else {
1413 if (fsaveds > 0)
1414 FAIL_IF(push_inst32(compiler, VPUSH | VD4(SLJIT_FS0) | ((sljit_uw)fsaveds << 1)));
1415 if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG)
1416 FAIL_IF(push_inst32(compiler, VPUSH | VD4(fscratches) | ((sljit_uw)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1)));
1417 }
1418 }
1419
1420 local_size = ((size + local_size + 0x7) & ~0x7) - size;
1421 compiler->local_size = local_size;
1422
1423 if (options & SLJIT_ENTER_REG_ARG)
1424 arg_types = 0;
1425
1426 arg_types >>= SLJIT_ARG_SHIFT;
1427 word_arg_count = 0;
1428 saved_arg_count = 0;
1429 #ifdef __SOFTFP__
1430 SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start);
1431
1432 offset = 0;
1433 float_arg_count = 0;
1434
1435 while (arg_types) {
1436 switch (arg_types & SLJIT_ARG_MASK) {
1437 case SLJIT_ARG_TYPE_F64:
1438 if (offset & 0x7)
1439 offset += sizeof(sljit_sw);
1440
1441 if (offset < 4 * sizeof(sljit_sw))
1442 FAIL_IF(push_inst32(compiler, VMOV2 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count));
1443 else
1444 FAIL_IF(push_inst32(compiler, VLDR_F32 | 0x800100 | RN4(SLJIT_SP)
1445 | (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2)));
1446 float_arg_count++;
1447 offset += sizeof(sljit_f64) - sizeof(sljit_sw);
1448 break;
1449 case SLJIT_ARG_TYPE_F32:
1450 if (offset < 4 * sizeof(sljit_sw))
1451 FAIL_IF(push_inst32(compiler, VMOV | (float_arg_count << 16) | (offset << 10)));
1452 else
1453 FAIL_IF(push_inst32(compiler, VLDR_F32 | 0x800000 | RN4(SLJIT_SP)
1454 | (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2)));
1455 float_arg_count++;
1456 break;
1457 default:
1458 word_arg_count++;
1459
1460 if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
1461 tmp = SLJIT_S0 - saved_arg_count;
1462 saved_arg_count++;
1463 } else if (word_arg_count - 1 != (sljit_s32)(offset >> 2))
1464 tmp = word_arg_count;
1465 else
1466 break;
1467
1468 if (offset < 4 * sizeof(sljit_sw))
1469 FAIL_IF(push_inst16(compiler, MOV | ((sljit_ins)reg_map[tmp] & 0x7) | (((sljit_ins)reg_map[tmp] & 0x8) << 4) | (offset << 1)));
1470 else if (reg_map[tmp] <= 7)
1471 FAIL_IF(push_inst16(compiler, LDR_SP | RDN3(tmp)
1472 | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2)));
1473 else
1474 FAIL_IF(push_inst32(compiler, LDR | RT4(tmp) | RN4(SLJIT_SP)
1475 | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)))));
1476 break;
1477 }
1478
1479 offset += sizeof(sljit_sw);
1480 arg_types >>= SLJIT_ARG_SHIFT;
1481 }
1482
1483 compiler->args_size = offset;
1484 #else
1485 offset = SLJIT_FR0;
1486 old_offset = SLJIT_FR0;
1487 f32_offset = 0;
1488
1489 while (arg_types) {
1490 switch (arg_types & SLJIT_ARG_MASK) {
1491 case SLJIT_ARG_TYPE_F64:
1492 if (offset != old_offset)
1493 *remap_ptr++ = VMOV_F32 | SLJIT_32 | VD4(offset) | VM4(old_offset);
1494 old_offset++;
1495 offset++;
1496 break;
1497 case SLJIT_ARG_TYPE_F32:
1498 if (f32_offset != 0) {
1499 *remap_ptr++ = VMOV_F32 | 0x20 | VD4(offset) | VM4(f32_offset);
1500 f32_offset = 0;
1501 } else {
1502 if (offset != old_offset)
1503 *remap_ptr++ = VMOV_F32 | VD4(offset) | VM4(old_offset);
1504 f32_offset = old_offset;
1505 old_offset++;
1506 }
1507 offset++;
1508 break;
1509 default:
1510 if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
1511 FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S0 - saved_arg_count, SLJIT_R0 + word_arg_count)));
1512 saved_arg_count++;
1513 }
1514
1515 word_arg_count++;
1516 break;
1517 }
1518 arg_types >>= SLJIT_ARG_SHIFT;
1519 }
1520
1521 SLJIT_ASSERT((sljit_uw)(remap_ptr - remap) <= sizeof(remap));
1522
1523 while (remap_ptr > remap)
1524 FAIL_IF(push_inst32(compiler, *(--remap_ptr)));
1525 #endif
1526
1527 #ifdef _WIN32
1528 if (local_size >= 4096) {
1529 imm = get_imm(4096);
1530 SLJIT_ASSERT(imm != INVALID_IMM);
1531
1532 FAIL_IF(push_inst32(compiler, SUB_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm));
1533
1534 if (local_size < 4 * 4096) {
1535 if (local_size > 2 * 4096) {
1536 if (local_size > 3 * 4096) {
1537 FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP)));
1538 FAIL_IF(push_inst32(compiler, SUB_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm));
1539 }
1540
1541 FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP)));
1542 FAIL_IF(push_inst32(compiler, SUB_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm));
1543 }
1544 } else {
1545 FAIL_IF(load_immediate(compiler, TMP_REG2, ((sljit_uw)local_size >> 12) - 1));
1546 FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP)));
1547 FAIL_IF(push_inst32(compiler, SUB_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm));
1548 FAIL_IF(push_inst32(compiler, SUB_WI | SET_FLAGS | RD4(TMP_REG2) | RN4(TMP_REG2) | 1));
1549 FAIL_IF(push_inst16(compiler, BCC | (0x1 << 8) /* not-equal */ | (-8 & 0xff)));
1550 }
1551
1552 FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP)));
1553 local_size &= 0xfff;
1554 }
1555
1556 if (local_size >= 256) {
1557 SLJIT_ASSERT(local_size < 4096);
1558
1559 if (local_size <= (127 << 2))
1560 FAIL_IF(push_inst16(compiler, SUB_SP_I | ((sljit_uw)local_size >> 2)));
1561 else
1562 FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, (sljit_uw)local_size));
1563
1564 FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP)));
1565 } else if (local_size > 0)
1566 FAIL_IF(push_inst32(compiler, LDRI | 0x500 | RT4(TMP_REG1) | RN4(SLJIT_SP) | (sljit_uw)local_size));
1567 #else /* !_WIN32 */
1568 if (local_size > 0) {
1569 if (local_size <= (127 << 2))
1570 FAIL_IF(push_inst16(compiler, SUB_SP_I | ((sljit_uw)local_size >> 2)));
1571 else
1572 FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, (sljit_uw)local_size));
1573 }
1574 #endif /* _WIN32 */
1575
1576 return SLJIT_SUCCESS;
1577 }
1578
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1579 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
1580 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1581 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1582 {
1583 sljit_s32 size;
1584
1585 CHECK_ERROR();
1586 CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1587 set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1588
1589 size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1);
1590
1591 /* Doubles are saved, so alignment is unaffected. */
1592 if ((size & SSIZE_OF(sw)) != 0 && (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG))
1593 size += SSIZE_OF(sw);
1594
1595 compiler->local_size = ((size + local_size + 0x7) & ~0x7) - size;
1596 return SLJIT_SUCCESS;
1597 }
1598
emit_add_sp(struct sljit_compiler * compiler,sljit_uw imm)1599 static sljit_s32 emit_add_sp(struct sljit_compiler *compiler, sljit_uw imm)
1600 {
1601 sljit_uw imm2;
1602
1603 /* The TMP_REG1 register must keep its value. */
1604 if (imm <= (127u << 2))
1605 return push_inst16(compiler, ADD_SP_I | (imm >> 2));
1606
1607 if (imm <= 0xfff)
1608 return push_inst32(compiler, ADDWI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | IMM12(imm));
1609
1610 imm2 = get_imm(imm);
1611
1612 if (imm2 != INVALID_IMM)
1613 return push_inst32(compiler, ADD_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm2);
1614
1615 FAIL_IF(load_immediate(compiler, TMP_REG2, imm));
1616 return push_inst16(compiler, ADD_SP | RN3(TMP_REG2));
1617 }
1618
emit_stack_frame_release(struct sljit_compiler * compiler,sljit_s32 frame_size)1619 static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 frame_size)
1620 {
1621 sljit_s32 local_size, fscratches, fsaveds, i, tmp;
1622 sljit_s32 restored_reg = 0;
1623 sljit_s32 lr_dst = TMP_PC;
1624 sljit_uw reg_list = 0;
1625
1626 SLJIT_ASSERT(reg_map[TMP_REG2] == 14 && frame_size <= 128);
1627
1628 local_size = compiler->local_size;
1629 fscratches = compiler->fscratches;
1630 fsaveds = compiler->fsaveds;
1631
1632 if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
1633 if (local_size > 0)
1634 FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size));
1635
1636 if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) {
1637 FAIL_IF(push_inst32(compiler, VPOP | VD4(SLJIT_FS0) | ((sljit_uw)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1)));
1638 } else {
1639 if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG)
1640 FAIL_IF(push_inst32(compiler, VPOP | VD4(fscratches) | ((sljit_uw)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1)));
1641 if (fsaveds > 0)
1642 FAIL_IF(push_inst32(compiler, VPOP | VD4(SLJIT_FS0) | ((sljit_uw)fsaveds << 1)));
1643 }
1644
1645 local_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1) & 0x7;
1646 }
1647
1648 if (frame_size < 0) {
1649 lr_dst = TMP_REG2;
1650 frame_size = 0;
1651 } else if (frame_size > 0) {
1652 SLJIT_ASSERT(frame_size == 1 || (frame_size & 0x7) == 0);
1653 lr_dst = 0;
1654 frame_size &= ~0x7;
1655 }
1656
1657 tmp = SLJIT_S0 - compiler->saveds;
1658 i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
1659 if (tmp < i) {
1660 restored_reg = i;
1661 do {
1662 reg_list |= (sljit_uw)1 << reg_map[i];
1663 } while (--i > tmp);
1664 }
1665
1666 i = compiler->scratches;
1667 if (i >= SLJIT_FIRST_SAVED_REG) {
1668 restored_reg = i;
1669 do {
1670 reg_list |= (sljit_uw)1 << reg_map[i];
1671 } while (--i >= SLJIT_FIRST_SAVED_REG);
1672 }
1673
1674 if (lr_dst == TMP_REG2 && reg_list == 0) {
1675 reg_list |= (sljit_uw)1 << reg_map[TMP_REG2];
1676 restored_reg = TMP_REG2;
1677 lr_dst = 0;
1678 }
1679
1680 if (lr_dst == 0 && (reg_list & (reg_list - 1)) == 0) {
1681 /* The local_size does not include the saved registers. */
1682 tmp = 0;
1683 if (reg_list != 0) {
1684 tmp = 2;
1685 if (local_size <= 0xfff) {
1686 if (local_size == 0) {
1687 SLJIT_ASSERT(restored_reg != TMP_REG2);
1688 if (frame_size == 0)
1689 return push_inst32(compiler, LDRI | RT4(restored_reg) | RN4(SLJIT_SP) | 0x308);
1690 if (frame_size > 2 * SSIZE_OF(sw))
1691 return push_inst32(compiler, LDRI | RT4(restored_reg) | RN4(SLJIT_SP) | 0x100 | (sljit_ins)(frame_size - (2 * SSIZE_OF(sw))));
1692 }
1693
1694 if (reg_map[restored_reg] <= 7 && local_size <= 0x3fc)
1695 FAIL_IF(push_inst16(compiler, STR_SP | 0x800 | RDN3(restored_reg) | (sljit_ins)(local_size >> 2)));
1696 else
1697 FAIL_IF(push_inst32(compiler, LDR | RT4(restored_reg) | RN4(SLJIT_SP) | (sljit_ins)local_size));
1698 tmp = 1;
1699 } else if (frame_size == 0) {
1700 frame_size = (restored_reg == TMP_REG2) ? SSIZE_OF(sw) : 2 * SSIZE_OF(sw);
1701 tmp = 3;
1702 }
1703
1704 /* Place for the saved register. */
1705 if (restored_reg != TMP_REG2)
1706 local_size += SSIZE_OF(sw);
1707 }
1708
1709 /* Place for the lr register. */
1710 local_size += SSIZE_OF(sw);
1711
1712 if (frame_size > local_size)
1713 FAIL_IF(push_inst16(compiler, SUB_SP_I | ((sljit_ins)(frame_size - local_size) >> 2)));
1714 else if (frame_size < local_size)
1715 FAIL_IF(emit_add_sp(compiler, (sljit_uw)(local_size - frame_size)));
1716
1717 if (tmp <= 1)
1718 return SLJIT_SUCCESS;
1719
1720 if (tmp == 2) {
1721 frame_size -= SSIZE_OF(sw);
1722 if (restored_reg != TMP_REG2)
1723 frame_size -= SSIZE_OF(sw);
1724
1725 if (reg_map[restored_reg] <= 7)
1726 return push_inst16(compiler, STR_SP | 0x800 | RDN3(restored_reg) | (sljit_ins)(frame_size >> 2));
1727
1728 return push_inst32(compiler, LDR | RT4(restored_reg) | RN4(SLJIT_SP) | (sljit_ins)frame_size);
1729 }
1730
1731 tmp = (restored_reg == TMP_REG2) ? 0x304 : 0x308;
1732 return push_inst32(compiler, LDRI | RT4(restored_reg) | RN4(SLJIT_SP) | (sljit_ins)tmp);
1733 }
1734
1735 if (local_size > 0)
1736 FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size));
1737
1738 if (!(reg_list & 0xff00) && lr_dst != TMP_REG2) {
1739 if (lr_dst == TMP_PC)
1740 reg_list |= 1u << 8;
1741
1742 /* At least one register must be set for POP instruction. */
1743 SLJIT_ASSERT(reg_list != 0);
1744
1745 FAIL_IF(push_inst16(compiler, POP | reg_list));
1746 } else {
1747 if (lr_dst != 0)
1748 reg_list |= (sljit_uw)1 << reg_map[lr_dst];
1749
1750 /* At least two registers must be set for POP_W instruction. */
1751 SLJIT_ASSERT((reg_list & (reg_list - 1)) != 0);
1752
1753 FAIL_IF(push_inst32(compiler, POP_W | reg_list));
1754 }
1755
1756 if (frame_size > 0)
1757 return push_inst16(compiler, SUB_SP_I | (((sljit_ins)frame_size - sizeof(sljit_sw)) >> 2));
1758
1759 if (lr_dst != 0)
1760 return SLJIT_SUCCESS;
1761
1762 return push_inst16(compiler, ADD_SP_I | 1);
1763 }
1764
sljit_emit_return_void(struct sljit_compiler * compiler)1765 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
1766 {
1767 CHECK_ERROR();
1768 CHECK(check_sljit_emit_return_void(compiler));
1769
1770 return emit_stack_frame_release(compiler, 0);
1771 }
1772
sljit_emit_return_to(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)1773 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
1774 sljit_s32 src, sljit_sw srcw)
1775 {
1776 CHECK_ERROR();
1777 CHECK(check_sljit_emit_return_to(compiler, src, srcw));
1778
1779 if (src & SLJIT_MEM) {
1780 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
1781 src = TMP_REG1;
1782 srcw = 0;
1783 } else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
1784 FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, src)));
1785 src = TMP_REG1;
1786 srcw = 0;
1787 }
1788
1789 FAIL_IF(emit_stack_frame_release(compiler, 1));
1790
1791 SLJIT_SKIP_CHECKS(compiler);
1792 return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1793 }
1794
1795 /* --------------------------------------------------------------------- */
1796 /* Operators */
1797 /* --------------------------------------------------------------------- */
1798
1799 #if !(defined __ARM_FEATURE_IDIV) && !(defined __ARM_ARCH_EXT_IDIV__)
1800
1801 #ifdef __cplusplus
1802 extern "C" {
1803 #endif
1804
1805 #ifdef _WIN32
1806 extern unsigned long long __rt_udiv(unsigned int denominator, unsigned int numerator);
1807 extern long long __rt_sdiv(int denominator, int numerator);
1808 #elif defined(__GNUC__)
1809 extern unsigned int __aeabi_uidivmod(unsigned int numerator, int unsigned denominator);
1810 extern int __aeabi_idivmod(int numerator, int denominator);
1811 #else
1812 #error "Software divmod functions are needed"
1813 #endif
1814
1815 #ifdef __cplusplus
1816 }
1817 #endif
1818
1819 #endif /* !__ARM_FEATURE_IDIV && !__ARM_ARCH_EXT_IDIV__ */
1820
sljit_emit_op0(struct sljit_compiler * compiler,sljit_s32 op)1821 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
1822 {
1823 #if !(defined __ARM_FEATURE_IDIV) && !(defined __ARM_ARCH_EXT_IDIV__)
1824 sljit_uw saved_reg_list[3];
1825 sljit_uw saved_reg_count;
1826 #endif
1827
1828 CHECK_ERROR();
1829 CHECK(check_sljit_emit_op0(compiler, op));
1830
1831 op = GET_OPCODE(op);
1832 switch (op) {
1833 case SLJIT_BREAKPOINT:
1834 return push_inst16(compiler, BKPT);
1835 case SLJIT_NOP:
1836 return push_inst16(compiler, NOP);
1837 case SLJIT_LMUL_UW:
1838 case SLJIT_LMUL_SW:
1839 return push_inst32(compiler, (op == SLJIT_LMUL_UW ? UMULL : SMULL)
1840 | RD4(SLJIT_R1) | RT4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1));
1841 #if (defined __ARM_FEATURE_IDIV) || (defined __ARM_ARCH_EXT_IDIV__)
1842 case SLJIT_DIVMOD_UW:
1843 case SLJIT_DIVMOD_SW:
1844 FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, SLJIT_R0)));
1845 FAIL_IF(push_inst32(compiler, (op == SLJIT_DIVMOD_UW ? UDIV : SDIV) | RD4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1)));
1846 FAIL_IF(push_inst32(compiler, MUL | RD4(SLJIT_R1) | RN4(SLJIT_R0) | RM4(SLJIT_R1)));
1847 return push_inst32(compiler, SUB_W | RD4(SLJIT_R1) | RN4(TMP_REG1) | RM4(SLJIT_R1));
1848 case SLJIT_DIV_UW:
1849 case SLJIT_DIV_SW:
1850 return push_inst32(compiler, (op == SLJIT_DIV_UW ? UDIV : SDIV) | RD4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1));
1851 #else /* !__ARM_FEATURE_IDIV && !__ARM_ARCH_EXT_IDIV__ */
1852 case SLJIT_DIVMOD_UW:
1853 case SLJIT_DIVMOD_SW:
1854 case SLJIT_DIV_UW:
1855 case SLJIT_DIV_SW:
1856 SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
1857 SLJIT_ASSERT(reg_map[2] == 1 && reg_map[3] == 2 && reg_map[4] == 3);
1858
1859 saved_reg_count = 0;
1860 if (compiler->scratches >= 4)
1861 saved_reg_list[saved_reg_count++] = 3;
1862 if (compiler->scratches >= 3)
1863 saved_reg_list[saved_reg_count++] = 2;
1864 if (op >= SLJIT_DIV_UW)
1865 saved_reg_list[saved_reg_count++] = 1;
1866
1867 if (saved_reg_count > 0) {
1868 FAIL_IF(push_inst32(compiler, 0xf84d0d00 | (saved_reg_count >= 3 ? 16 : 8)
1869 | (saved_reg_list[0] << 12) /* str rX, [sp, #-8/-16]! */));
1870 if (saved_reg_count >= 2) {
1871 SLJIT_ASSERT(saved_reg_list[1] < 8);
1872 FAIL_IF(push_inst16(compiler, 0x9001 | (saved_reg_list[1] << 8) /* str rX, [sp, #4] */));
1873 }
1874 if (saved_reg_count >= 3) {
1875 SLJIT_ASSERT(saved_reg_list[2] < 8);
1876 FAIL_IF(push_inst16(compiler, 0x9002 | (saved_reg_list[2] << 8) /* str rX, [sp, #8] */));
1877 }
1878 }
1879
1880 #ifdef _WIN32
1881 FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, SLJIT_R0)));
1882 FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_R0, SLJIT_R1)));
1883 FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_R1, TMP_REG1)));
1884 FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
1885 ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_ADDR(__rt_udiv) : SLJIT_FUNC_ADDR(__rt_sdiv))));
1886 #elif defined(__GNUC__)
1887 FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
1888 ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_ADDR(__aeabi_uidivmod) : SLJIT_FUNC_ADDR(__aeabi_idivmod))));
1889 #else
1890 #error "Software divmod functions are needed"
1891 #endif
1892
1893 if (saved_reg_count > 0) {
1894 if (saved_reg_count >= 3) {
1895 SLJIT_ASSERT(saved_reg_list[2] < 8);
1896 FAIL_IF(push_inst16(compiler, 0x9802 | (saved_reg_list[2] << 8) /* ldr rX, [sp, #8] */));
1897 }
1898 if (saved_reg_count >= 2) {
1899 SLJIT_ASSERT(saved_reg_list[1] < 8);
1900 FAIL_IF(push_inst16(compiler, 0x9801 | (saved_reg_list[1] << 8) /* ldr rX, [sp, #4] */));
1901 }
1902 return push_inst32(compiler, 0xf85d0b00 | (saved_reg_count >= 3 ? 16 : 8)
1903 | (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */);
1904 }
1905 return SLJIT_SUCCESS;
1906 #endif /* __ARM_FEATURE_IDIV || __ARM_ARCH_EXT_IDIV__ */
1907 case SLJIT_ENDBR:
1908 case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
1909 return SLJIT_SUCCESS;
1910 }
1911
1912 return SLJIT_SUCCESS;
1913 }
1914
sljit_emit_op1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1915 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1916 sljit_s32 dst, sljit_sw dstw,
1917 sljit_s32 src, sljit_sw srcw)
1918 {
1919 sljit_s32 dst_r, flags;
1920
1921 CHECK_ERROR();
1922 CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1923 ADJUST_LOCAL_OFFSET(dst, dstw);
1924 ADJUST_LOCAL_OFFSET(src, srcw);
1925
1926 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
1927
1928 op = GET_OPCODE(op);
1929 if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) {
1930 switch (op) {
1931 case SLJIT_MOV:
1932 case SLJIT_MOV_U32:
1933 case SLJIT_MOV_S32:
1934 case SLJIT_MOV32:
1935 case SLJIT_MOV_P:
1936 flags = WORD_SIZE;
1937 break;
1938 case SLJIT_MOV_U8:
1939 flags = BYTE_SIZE;
1940 if (src == SLJIT_IMM)
1941 srcw = (sljit_u8)srcw;
1942 break;
1943 case SLJIT_MOV_S8:
1944 flags = BYTE_SIZE | SIGNED;
1945 if (src == SLJIT_IMM)
1946 srcw = (sljit_s8)srcw;
1947 break;
1948 case SLJIT_MOV_U16:
1949 flags = HALF_SIZE;
1950 if (src == SLJIT_IMM)
1951 srcw = (sljit_u16)srcw;
1952 break;
1953 case SLJIT_MOV_S16:
1954 flags = HALF_SIZE | SIGNED;
1955 if (src == SLJIT_IMM)
1956 srcw = (sljit_s16)srcw;
1957 break;
1958 default:
1959 SLJIT_UNREACHABLE();
1960 flags = 0;
1961 break;
1962 }
1963
1964 if (src == SLJIT_IMM)
1965 FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG2, (sljit_uw)srcw));
1966 else if (src & SLJIT_MEM)
1967 FAIL_IF(emit_op_mem(compiler, flags, dst_r, src, srcw, TMP_REG1));
1968 else if (FAST_IS_REG(dst))
1969 return emit_op_imm(compiler, op, dst_r, TMP_REG2, (sljit_uw)src);
1970 else
1971 dst_r = src;
1972
1973 if (!(dst & SLJIT_MEM))
1974 return SLJIT_SUCCESS;
1975
1976 return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, TMP_REG1);
1977 }
1978
1979 SLJIT_COMPILE_ASSERT(WORD_SIZE == 0, word_size_must_be_0);
1980 flags = WORD_SIZE;
1981
1982 if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16) {
1983 if (!(dst & SLJIT_MEM) && (!(src & SLJIT_MEM) || op == SLJIT_REV_S16))
1984 op |= REGISTER_OP;
1985 flags |= HALF_SIZE;
1986 }
1987
1988 if (src & SLJIT_MEM) {
1989 FAIL_IF(emit_op_mem(compiler, flags, TMP_REG1, src, srcw, TMP_REG1));
1990 src = TMP_REG1;
1991 }
1992
1993 emit_op_imm(compiler, op, dst_r, TMP_REG2, (sljit_uw)src);
1994
1995 if (SLJIT_UNLIKELY(dst & SLJIT_MEM))
1996 return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, TMP_REG1);
1997 return SLJIT_SUCCESS;
1998 }
1999
sljit_emit_op2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2000 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2001 sljit_s32 dst, sljit_sw dstw,
2002 sljit_s32 src1, sljit_sw src1w,
2003 sljit_s32 src2, sljit_sw src2w)
2004 {
2005 sljit_s32 dst_reg, src2_tmp_reg, flags;
2006
2007 CHECK_ERROR();
2008 CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
2009 ADJUST_LOCAL_OFFSET(dst, dstw);
2010 ADJUST_LOCAL_OFFSET(src1, src1w);
2011 ADJUST_LOCAL_OFFSET(src2, src2w);
2012
2013 dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG2;
2014 flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
2015
2016 if (dst == TMP_REG1)
2017 flags |= UNUSED_RETURN;
2018
2019 if (src2 == SLJIT_IMM)
2020 flags |= ARG2_IMM;
2021 else if (src2 & SLJIT_MEM) {
2022 src2_tmp_reg = FAST_IS_REG(src1) ? TMP_REG1 : TMP_REG2;
2023 emit_op_mem(compiler, WORD_SIZE, src2_tmp_reg, src2, src2w, TMP_REG1);
2024 src2w = src2_tmp_reg;
2025 } else
2026 src2w = src2;
2027
2028 if (src1 == SLJIT_IMM)
2029 flags |= ARG1_IMM;
2030 else if (src1 & SLJIT_MEM) {
2031 emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src1, src1w, TMP_REG1);
2032 src1w = TMP_REG1;
2033 } else
2034 src1w = src1;
2035
2036 emit_op_imm(compiler, flags | GET_OPCODE(op), dst_reg, (sljit_uw)src1w, (sljit_uw)src2w);
2037
2038 if (!(dst & SLJIT_MEM))
2039 return SLJIT_SUCCESS;
2040 return emit_op_mem(compiler, WORD_SIZE | STORE, dst_reg, dst, dstw, TMP_REG1);
2041 }
2042
sljit_emit_op2u(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2043 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
2044 sljit_s32 src1, sljit_sw src1w,
2045 sljit_s32 src2, sljit_sw src2w)
2046 {
2047 CHECK_ERROR();
2048 CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
2049
2050 SLJIT_SKIP_CHECKS(compiler);
2051 return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w);
2052 }
2053
sljit_emit_op2r(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2054 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op,
2055 sljit_s32 dst_reg,
2056 sljit_s32 src1, sljit_sw src1w,
2057 sljit_s32 src2, sljit_sw src2w)
2058 {
2059 CHECK_ERROR();
2060 CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w));
2061
2062 switch (GET_OPCODE(op)) {
2063 case SLJIT_MULADD:
2064 SLJIT_SKIP_CHECKS(compiler);
2065 return sljit_emit_op2(compiler, op, dst_reg, 0, src1, src1w, src2, src2w);
2066 }
2067
2068 return SLJIT_SUCCESS;
2069 }
2070
sljit_emit_shift_into(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 src1_reg,sljit_s32 src2_reg,sljit_s32 src3,sljit_sw src3w)2071 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
2072 sljit_s32 dst_reg,
2073 sljit_s32 src1_reg,
2074 sljit_s32 src2_reg,
2075 sljit_s32 src3, sljit_sw src3w)
2076 {
2077 sljit_s32 is_left;
2078
2079 CHECK_ERROR();
2080 CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
2081
2082 op = GET_OPCODE(op);
2083 is_left = (op == SLJIT_SHL || op == SLJIT_MSHL);
2084
2085 if (src1_reg == src2_reg) {
2086 SLJIT_SKIP_CHECKS(compiler);
2087 return sljit_emit_op2(compiler, is_left ? SLJIT_ROTL : SLJIT_ROTR, dst_reg, 0, src1_reg, 0, src3, src3w);
2088 }
2089
2090 ADJUST_LOCAL_OFFSET(src3, src3w);
2091
2092 if (src3 == SLJIT_IMM) {
2093 src3w &= 0x1f;
2094
2095 if (src3w == 0)
2096 return SLJIT_SUCCESS;
2097
2098 if (IS_2_LO_REGS(dst_reg, src1_reg))
2099 FAIL_IF(push_inst16(compiler, (is_left ? LSLSI : LSRSI) | RD3(dst_reg) | RN3(src1_reg) | ((sljit_ins)src3w << 6)));
2100 else
2101 FAIL_IF(push_inst32(compiler, (is_left ? LSL_WI : LSR_WI) | RD4(dst_reg) | RM4(src1_reg) | IMM5(src3w)));
2102
2103 src3w = (src3w ^ 0x1f) + 1;
2104 return push_inst32(compiler, ORR_W | RD4(dst_reg) | RN4(dst_reg) | RM4(src2_reg) | (is_left ? 0x10 : 0x0) | IMM5(src3w));
2105 }
2106
2107 if (src3 & SLJIT_MEM) {
2108 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, src3, src3w, TMP_REG2));
2109 src3 = TMP_REG2;
2110 }
2111
2112 if (op == SLJIT_MSHL || op == SLJIT_MLSHR || dst_reg == src3) {
2113 FAIL_IF(push_inst32(compiler, ANDI | RD4(TMP_REG2) | RN4(src3) | 0x1f));
2114 src3 = TMP_REG2;
2115 }
2116
2117 if (dst_reg == src1_reg && IS_2_LO_REGS(dst_reg, src3))
2118 FAIL_IF(push_inst16(compiler, (is_left ? LSLS : LSRS) | RD3(dst_reg) | RN3(src3)));
2119 else
2120 FAIL_IF(push_inst32(compiler, (is_left ? LSL_W : LSR_W) | RD4(dst_reg) | RN4(src1_reg) | RM4(src3)));
2121
2122 FAIL_IF(push_inst32(compiler, (is_left ? LSR_WI : LSL_WI) | RD4(TMP_REG1) | RM4(src2_reg) | (1 << 6)));
2123 FAIL_IF(push_inst32(compiler, EORI | RD4(TMP_REG2) | RN4(src3) | 0x1f));
2124 FAIL_IF(push_inst32(compiler, (is_left ? LSR_W : LSL_W) | RD4(TMP_REG1) | RN4(TMP_REG1) | RM4(TMP_REG2)));
2125 return push_inst32(compiler, ORR_W | RD4(dst_reg) | RN4(dst_reg) | RM4(TMP_REG1));
2126 }
2127
sljit_emit_op_src(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)2128 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
2129 sljit_s32 src, sljit_sw srcw)
2130 {
2131 CHECK_ERROR();
2132 CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
2133 ADJUST_LOCAL_OFFSET(src, srcw);
2134
2135 switch (op) {
2136 case SLJIT_FAST_RETURN:
2137 SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
2138
2139 if (FAST_IS_REG(src))
2140 FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG2, src)));
2141 else
2142 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, src, srcw, TMP_REG2));
2143
2144 return push_inst16(compiler, BX | RN3(TMP_REG2));
2145 case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
2146 return SLJIT_SUCCESS;
2147 case SLJIT_PREFETCH_L1:
2148 case SLJIT_PREFETCH_L2:
2149 case SLJIT_PREFETCH_L3:
2150 case SLJIT_PREFETCH_ONCE:
2151 return emit_op_mem(compiler, PRELOAD, TMP_PC, src, srcw, TMP_REG1);
2152 }
2153
2154 return SLJIT_SUCCESS;
2155 }
2156
sljit_emit_op_dst(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw)2157 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
2158 sljit_s32 dst, sljit_sw dstw)
2159 {
2160 sljit_s32 size, dst_r;
2161
2162 CHECK_ERROR();
2163 CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
2164 ADJUST_LOCAL_OFFSET(dst, dstw);
2165
2166 switch (op) {
2167 case SLJIT_FAST_ENTER:
2168 SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
2169
2170 if (FAST_IS_REG(dst))
2171 return push_inst16(compiler, MOV | SET_REGS44(dst, TMP_REG2));
2172 break;
2173 case SLJIT_GET_RETURN_ADDRESS:
2174 size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds - SLJIT_KEPT_SAVEDS_COUNT(compiler->options), 0);
2175
2176 if (compiler->fsaveds > 0 || compiler->fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
2177 /* The size of pc is not added above. */
2178 if ((size & SSIZE_OF(sw)) == 0)
2179 size += SSIZE_OF(sw);
2180
2181 size += GET_SAVED_FLOAT_REGISTERS_SIZE(compiler->fscratches, compiler->fsaveds, f64);
2182 }
2183
2184 SLJIT_ASSERT(((compiler->local_size + size + SSIZE_OF(sw)) & 0x7) == 0);
2185
2186 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
2187 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size + size, TMP_REG1));
2188 break;
2189 }
2190
2191 if (dst & SLJIT_MEM)
2192 return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw, TMP_REG1);
2193
2194 return SLJIT_SUCCESS;
2195 }
2196
sljit_get_register_index(sljit_s32 type,sljit_s32 reg)2197 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
2198 {
2199 CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
2200
2201 if (type == SLJIT_GP_REGISTER)
2202 return reg_map[reg];
2203
2204 if (type == SLJIT_FLOAT_REGISTER || type == SLJIT_SIMD_REG_64)
2205 return freg_map[reg];
2206
2207 if (type != SLJIT_SIMD_REG_128)
2208 return freg_map[reg] & ~0x1;
2209
2210 return -1;
2211 }
2212
sljit_emit_op_custom(struct sljit_compiler * compiler,void * instruction,sljit_u32 size)2213 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2214 void *instruction, sljit_u32 size)
2215 {
2216 CHECK_ERROR();
2217 CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2218
2219 if (size == 2)
2220 return push_inst16(compiler, *(sljit_u16*)instruction);
2221 return push_inst32(compiler, *(sljit_ins*)instruction);
2222 }
2223
2224 /* --------------------------------------------------------------------- */
2225 /* Floating point operators */
2226 /* --------------------------------------------------------------------- */
2227
2228 #define FPU_LOAD (1 << 20)
2229
emit_fop_mem(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw)2230 static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
2231 {
2232 sljit_uw imm;
2233 sljit_ins inst = VSTR_F32 | (flags & (SLJIT_32 | FPU_LOAD));
2234
2235 SLJIT_ASSERT(arg & SLJIT_MEM);
2236
2237 /* Fast loads and stores. */
2238 if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
2239 FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(arg & REG_MASK) | RM4(OFFS_REG(arg)) | (((sljit_uw)argw & 0x3) << 6)));
2240 arg = SLJIT_MEM | TMP_REG1;
2241 argw = 0;
2242 }
2243
2244 if ((arg & REG_MASK) && (argw & 0x3) == 0) {
2245 if (!(argw & ~0x3fc))
2246 return push_inst32(compiler, inst | 0x800000 | RN4(arg & REG_MASK) | VD4(reg) | ((sljit_uw)argw >> 2));
2247 if (!(-argw & ~0x3fc))
2248 return push_inst32(compiler, inst | RN4(arg & REG_MASK) | VD4(reg) | ((sljit_uw)-argw >> 2));
2249 }
2250
2251 if (arg & REG_MASK) {
2252 if (emit_set_delta(compiler, TMP_REG1, arg & REG_MASK, argw) != SLJIT_ERR_UNSUPPORTED) {
2253 FAIL_IF(compiler->error);
2254 return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | VD4(reg));
2255 }
2256
2257 imm = get_imm((sljit_uw)argw & ~(sljit_uw)0x3fc);
2258 if (imm != INVALID_IMM) {
2259 FAIL_IF(push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(arg & REG_MASK) | imm));
2260 return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | VD4(reg) | (((sljit_uw)argw & 0x3fc) >> 2));
2261 }
2262
2263 imm = get_imm((sljit_uw)-argw & ~(sljit_uw)0x3fc);
2264 if (imm != INVALID_IMM) {
2265 argw = -argw;
2266 FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(arg & REG_MASK) | imm));
2267 return push_inst32(compiler, inst | RN4(TMP_REG1) | VD4(reg) | (((sljit_uw)argw & 0x3fc) >> 2));
2268 }
2269 }
2270
2271 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)argw));
2272 if (arg & REG_MASK)
2273 FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, (arg & REG_MASK))));
2274 return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | VD4(reg));
2275 }
2276
sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2277 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
2278 sljit_s32 dst, sljit_sw dstw,
2279 sljit_s32 src, sljit_sw srcw)
2280 {
2281 op ^= SLJIT_32;
2282
2283 if (src & SLJIT_MEM) {
2284 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src, srcw));
2285 src = TMP_FREG1;
2286 }
2287
2288 FAIL_IF(push_inst32(compiler, VCVT_S32_F32 | (op & SLJIT_32) | VD4(TMP_FREG1) | VM4(src)));
2289
2290 if (FAST_IS_REG(dst))
2291 return push_inst32(compiler, VMOV | (1 << 20) | RT4(dst) | VN4(TMP_FREG1));
2292
2293 /* Store the integer value from a VFP register. */
2294 return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw);
2295 }
2296
sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2297 static sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_ins ins,
2298 sljit_s32 dst, sljit_sw dstw,
2299 sljit_s32 src, sljit_sw srcw)
2300 {
2301 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2302
2303 if (FAST_IS_REG(src))
2304 FAIL_IF(push_inst32(compiler, VMOV | RT4(src) | VN4(TMP_FREG1)));
2305 else if (src & SLJIT_MEM) {
2306 /* Load the integer value into a VFP register. */
2307 FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw));
2308 }
2309 else {
2310 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw));
2311 FAIL_IF(push_inst32(compiler, VMOV | RT4(TMP_REG1) | VN4(TMP_FREG1)));
2312 }
2313
2314 FAIL_IF(push_inst32(compiler, ins | VD4(dst_r) | VM4(TMP_FREG1)));
2315
2316 if (dst & SLJIT_MEM)
2317 return emit_fop_mem(compiler, (ins & SLJIT_32), TMP_FREG1, dst, dstw);
2318 return SLJIT_SUCCESS;
2319 }
2320
sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2321 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
2322 sljit_s32 dst, sljit_sw dstw,
2323 sljit_s32 src, sljit_sw srcw)
2324 {
2325 return sljit_emit_fop1_conv_f64_from_w(compiler, VCVT_F32_S32 | (~op & SLJIT_32), dst, dstw, src, srcw);
2326 }
2327
sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2328 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
2329 sljit_s32 dst, sljit_sw dstw,
2330 sljit_s32 src, sljit_sw srcw)
2331 {
2332 return sljit_emit_fop1_conv_f64_from_w(compiler, VCVT_F32_U32 | (~op & SLJIT_32), dst, dstw, src, srcw);
2333 }
2334
sljit_emit_fop1_cmp(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2335 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
2336 sljit_s32 src1, sljit_sw src1w,
2337 sljit_s32 src2, sljit_sw src2w)
2338 {
2339 op ^= SLJIT_32;
2340
2341 if (src1 & SLJIT_MEM) {
2342 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w));
2343 src1 = TMP_FREG1;
2344 }
2345
2346 if (src2 & SLJIT_MEM) {
2347 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w));
2348 src2 = TMP_FREG2;
2349 }
2350
2351 FAIL_IF(push_inst32(compiler, VCMP_F32 | (op & SLJIT_32) | VD4(src1) | VM4(src2)));
2352 FAIL_IF(push_inst32(compiler, VMRS));
2353
2354 if (GET_FLAG_TYPE(op) != SLJIT_UNORDERED_OR_EQUAL)
2355 return SLJIT_SUCCESS;
2356
2357 FAIL_IF(push_inst16(compiler, IT | (0x6 << 4) | 0x8));
2358 return push_inst16(compiler, CMP /* Rm, Rn = r0 */);
2359 }
2360
sljit_emit_fop1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2361 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
2362 sljit_s32 dst, sljit_sw dstw,
2363 sljit_s32 src, sljit_sw srcw)
2364 {
2365 sljit_s32 dst_r;
2366
2367 CHECK_ERROR();
2368
2369 SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100), float_transfer_bit_error);
2370 SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2371
2372 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2373
2374 if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32)
2375 op ^= SLJIT_32;
2376
2377 if (src & SLJIT_MEM) {
2378 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, dst_r, src, srcw));
2379 src = dst_r;
2380 }
2381
2382 switch (GET_OPCODE(op)) {
2383 case SLJIT_MOV_F64:
2384 if (src != dst_r) {
2385 if (!(dst & SLJIT_MEM))
2386 FAIL_IF(push_inst32(compiler, VMOV_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src)));
2387 else
2388 dst_r = src;
2389 }
2390 break;
2391 case SLJIT_NEG_F64:
2392 FAIL_IF(push_inst32(compiler, VNEG_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src)));
2393 break;
2394 case SLJIT_ABS_F64:
2395 FAIL_IF(push_inst32(compiler, VABS_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src)));
2396 break;
2397 case SLJIT_CONV_F64_FROM_F32:
2398 FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src)));
2399 op ^= SLJIT_32;
2400 break;
2401 }
2402
2403 if (dst & SLJIT_MEM)
2404 return emit_fop_mem(compiler, (op & SLJIT_32), dst_r, dst, dstw);
2405 return SLJIT_SUCCESS;
2406 }
2407
sljit_emit_fop2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2408 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
2409 sljit_s32 dst, sljit_sw dstw,
2410 sljit_s32 src1, sljit_sw src1w,
2411 sljit_s32 src2, sljit_sw src2w)
2412 {
2413 sljit_s32 dst_r;
2414
2415 CHECK_ERROR();
2416 CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2417 ADJUST_LOCAL_OFFSET(dst, dstw);
2418 ADJUST_LOCAL_OFFSET(src1, src1w);
2419 ADJUST_LOCAL_OFFSET(src2, src2w);
2420
2421 op ^= SLJIT_32;
2422
2423 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2424 if (src1 & SLJIT_MEM) {
2425 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w));
2426 src1 = TMP_FREG1;
2427 }
2428 if (src2 & SLJIT_MEM) {
2429 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w));
2430 src2 = TMP_FREG2;
2431 }
2432
2433 switch (GET_OPCODE(op)) {
2434 case SLJIT_ADD_F64:
2435 FAIL_IF(push_inst32(compiler, VADD_F32 | (op & SLJIT_32) | VD4(dst_r) | VN4(src1) | VM4(src2)));
2436 break;
2437 case SLJIT_SUB_F64:
2438 FAIL_IF(push_inst32(compiler, VSUB_F32 | (op & SLJIT_32) | VD4(dst_r) | VN4(src1) | VM4(src2)));
2439 break;
2440 case SLJIT_MUL_F64:
2441 FAIL_IF(push_inst32(compiler, VMUL_F32 | (op & SLJIT_32) | VD4(dst_r) | VN4(src1) | VM4(src2)));
2442 break;
2443 case SLJIT_DIV_F64:
2444 FAIL_IF(push_inst32(compiler, VDIV_F32 | (op & SLJIT_32) | VD4(dst_r) | VN4(src1) | VM4(src2)));
2445 break;
2446 case SLJIT_COPYSIGN_F64:
2447 FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | VN4(src2) | RT4(TMP_REG1) | ((op & SLJIT_32) ? (1 << 7) : 0)));
2448 FAIL_IF(push_inst32(compiler, VABS_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src1)));
2449 FAIL_IF(push_inst32(compiler, CMPI_W | RN4(TMP_REG1) | 0));
2450 FAIL_IF(push_inst16(compiler, IT | (0xb << 4) | 0x8));
2451 return push_inst32(compiler, VNEG_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(dst_r));
2452 }
2453
2454 if (!(dst & SLJIT_MEM))
2455 return SLJIT_SUCCESS;
2456 return emit_fop_mem(compiler, (op & SLJIT_32), TMP_FREG1, dst, dstw);
2457 }
2458
sljit_emit_fset32(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f32 value)2459 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
2460 sljit_s32 freg, sljit_f32 value)
2461 {
2462 #if defined(__ARM_NEON) && __ARM_NEON
2463 sljit_u32 exp;
2464 sljit_ins ins;
2465 #endif /* NEON */
2466 union {
2467 sljit_u32 imm;
2468 sljit_f32 value;
2469 } u;
2470
2471 CHECK_ERROR();
2472 CHECK(check_sljit_emit_fset32(compiler, freg, value));
2473
2474 u.value = value;
2475
2476 #if defined(__ARM_NEON) && __ARM_NEON
2477 if ((u.imm << (32 - 19)) == 0) {
2478 exp = (u.imm >> (23 + 2)) & 0x3f;
2479
2480 if (exp == 0x20 || exp == 0x1f) {
2481 ins = ((u.imm >> 24) & 0x80) | ((u.imm >> 19) & 0x7f);
2482 return push_inst32(compiler, (VMOV_F32 ^ (1 << 6)) | ((ins & 0xf0) << 12) | VD4(freg) | (ins & 0xf));
2483 }
2484 }
2485 #endif /* NEON */
2486
2487 FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm));
2488 return push_inst32(compiler, VMOV | VN4(freg) | RT4(TMP_REG1));
2489 }
2490
sljit_emit_fset64(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f64 value)2491 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
2492 sljit_s32 freg, sljit_f64 value)
2493 {
2494 #if defined(__ARM_NEON) && __ARM_NEON
2495 sljit_u32 exp;
2496 sljit_ins ins;
2497 #endif /* NEON */
2498 union {
2499 sljit_u32 imm[2];
2500 sljit_f64 value;
2501 } u;
2502
2503 CHECK_ERROR();
2504 CHECK(check_sljit_emit_fset64(compiler, freg, value));
2505
2506 u.value = value;
2507
2508 #if defined(__ARM_NEON) && __ARM_NEON
2509 if (u.imm[0] == 0 && (u.imm[1] << (64 - 48)) == 0) {
2510 exp = (u.imm[1] >> ((52 - 32) + 2)) & 0x1ff;
2511
2512 if (exp == 0x100 || exp == 0xff) {
2513 ins = ((u.imm[1] >> (56 - 32)) & 0x80) | ((u.imm[1] >> (48 - 32)) & 0x7f);
2514 return push_inst32(compiler, (VMOV_F32 ^ (1 << 6)) | (1 << 8) | ((ins & 0xf0) << 12) | VD4(freg) | (ins & 0xf));
2515 }
2516 }
2517 #endif /* NEON */
2518
2519 FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm[0]));
2520 if (u.imm[0] == u.imm[1])
2521 return push_inst32(compiler, VMOV2 | RN4(TMP_REG1) | RT4(TMP_REG1) | VM4(freg));
2522
2523 FAIL_IF(load_immediate(compiler, TMP_REG2, u.imm[1]));
2524 return push_inst32(compiler, VMOV2 | RN4(TMP_REG2) | RT4(TMP_REG1) | VM4(freg));
2525 }
2526
sljit_emit_fcopy(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 freg,sljit_s32 reg)2527 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
2528 sljit_s32 freg, sljit_s32 reg)
2529 {
2530 sljit_s32 reg2;
2531 sljit_ins inst;
2532
2533 CHECK_ERROR();
2534 CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
2535
2536 if (reg & REG_PAIR_MASK) {
2537 reg2 = REG_PAIR_SECOND(reg);
2538 reg = REG_PAIR_FIRST(reg);
2539
2540 inst = VMOV2 | RN4(reg) | RT4(reg2) | VM4(freg);
2541 } else {
2542 inst = VMOV | VN4(freg) | RT4(reg);
2543
2544 if (!(op & SLJIT_32))
2545 inst |= 1 << 7;
2546 }
2547
2548 if (GET_OPCODE(op) == SLJIT_COPY_FROM_F64)
2549 inst |= 1 << 20;
2550
2551 return push_inst32(compiler, inst);
2552 }
2553
2554 /* --------------------------------------------------------------------- */
2555 /* Conditional instructions */
2556 /* --------------------------------------------------------------------- */
2557
get_cc(struct sljit_compiler * compiler,sljit_s32 type)2558 static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type)
2559 {
2560 switch (type) {
2561 case SLJIT_EQUAL:
2562 case SLJIT_ATOMIC_STORED:
2563 case SLJIT_F_EQUAL:
2564 case SLJIT_ORDERED_EQUAL:
2565 case SLJIT_UNORDERED_OR_EQUAL:
2566 return 0x0;
2567
2568 case SLJIT_NOT_EQUAL:
2569 case SLJIT_ATOMIC_NOT_STORED:
2570 case SLJIT_F_NOT_EQUAL:
2571 case SLJIT_UNORDERED_OR_NOT_EQUAL:
2572 case SLJIT_ORDERED_NOT_EQUAL:
2573 return 0x1;
2574
2575 case SLJIT_CARRY:
2576 if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD)
2577 return 0x2;
2578 /* fallthrough */
2579
2580 case SLJIT_LESS:
2581 return 0x3;
2582
2583 case SLJIT_NOT_CARRY:
2584 if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD)
2585 return 0x3;
2586 /* fallthrough */
2587
2588 case SLJIT_GREATER_EQUAL:
2589 return 0x2;
2590
2591 case SLJIT_GREATER:
2592 case SLJIT_UNORDERED_OR_GREATER:
2593 return 0x8;
2594
2595 case SLJIT_LESS_EQUAL:
2596 case SLJIT_F_LESS_EQUAL:
2597 case SLJIT_ORDERED_LESS_EQUAL:
2598 return 0x9;
2599
2600 case SLJIT_SIG_LESS:
2601 case SLJIT_UNORDERED_OR_LESS:
2602 return 0xb;
2603
2604 case SLJIT_SIG_GREATER_EQUAL:
2605 case SLJIT_F_GREATER_EQUAL:
2606 case SLJIT_ORDERED_GREATER_EQUAL:
2607 return 0xa;
2608
2609 case SLJIT_SIG_GREATER:
2610 case SLJIT_F_GREATER:
2611 case SLJIT_ORDERED_GREATER:
2612 return 0xc;
2613
2614 case SLJIT_SIG_LESS_EQUAL:
2615 case SLJIT_UNORDERED_OR_LESS_EQUAL:
2616 return 0xd;
2617
2618 case SLJIT_OVERFLOW:
2619 if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)))
2620 return 0x1;
2621 /* fallthrough */
2622
2623 case SLJIT_UNORDERED:
2624 return 0x6;
2625
2626 case SLJIT_NOT_OVERFLOW:
2627 if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)))
2628 return 0x0;
2629 /* fallthrough */
2630
2631 case SLJIT_ORDERED:
2632 return 0x7;
2633
2634 case SLJIT_F_LESS:
2635 case SLJIT_ORDERED_LESS:
2636 return 0x4;
2637
2638 case SLJIT_UNORDERED_OR_GREATER_EQUAL:
2639 return 0x5;
2640
2641 default: /* SLJIT_JUMP */
2642 SLJIT_UNREACHABLE();
2643 return 0xe;
2644 }
2645 }
2646
sljit_emit_label(struct sljit_compiler * compiler)2647 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2648 {
2649 struct sljit_label *label;
2650
2651 CHECK_ERROR_PTR();
2652 CHECK_PTR(check_sljit_emit_label(compiler));
2653
2654 if (compiler->last_label && compiler->last_label->size == compiler->size)
2655 return compiler->last_label;
2656
2657 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2658 PTR_FAIL_IF(!label);
2659 set_label(label, compiler);
2660 return label;
2661 }
2662
sljit_emit_jump(struct sljit_compiler * compiler,sljit_s32 type)2663 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
2664 {
2665 struct sljit_jump *jump;
2666 sljit_ins cc;
2667
2668 CHECK_ERROR_PTR();
2669 CHECK_PTR(check_sljit_emit_jump(compiler, type));
2670
2671 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2672 PTR_FAIL_IF(!jump);
2673 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2674 type &= 0xff;
2675
2676 if (type < SLJIT_JUMP) {
2677 jump->flags |= IS_COND;
2678 cc = get_cc(compiler, type);
2679 jump->flags |= cc << 8;
2680 PTR_FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
2681 }
2682
2683 jump->addr = compiler->size;
2684 if (type <= SLJIT_JUMP)
2685 PTR_FAIL_IF(push_inst16(compiler, BX | RN3(TMP_REG1)));
2686 else {
2687 jump->flags |= IS_BL;
2688 PTR_FAIL_IF(push_inst16(compiler, BLX | RN3(TMP_REG1)));
2689 }
2690
2691 /* Maximum number of instructions required for generating a constant. */
2692 compiler->size += JUMP_MAX_SIZE - 1;
2693 return jump;
2694 }
2695
2696 #ifdef __SOFTFP__
2697
softfloat_call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types,sljit_s32 * src,sljit_u32 * extra_space)2698 static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src, sljit_u32 *extra_space)
2699 {
2700 sljit_u32 is_tail_call = *extra_space & SLJIT_CALL_RETURN;
2701 sljit_u32 offset = 0;
2702 sljit_u32 word_arg_offset = 0;
2703 sljit_u32 float_arg_count = 0;
2704 sljit_s32 types = 0;
2705 sljit_u32 src_offset = 4 * sizeof(sljit_sw);
2706 sljit_u8 offsets[4];
2707 sljit_u8 *offset_ptr = offsets;
2708
2709 if (src && FAST_IS_REG(*src))
2710 src_offset = (sljit_u32)reg_map[*src] * sizeof(sljit_sw);
2711
2712 arg_types >>= SLJIT_ARG_SHIFT;
2713
2714 while (arg_types) {
2715 types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK);
2716
2717 switch (arg_types & SLJIT_ARG_MASK) {
2718 case SLJIT_ARG_TYPE_F64:
2719 if (offset & 0x7)
2720 offset += sizeof(sljit_sw);
2721 *offset_ptr++ = (sljit_u8)offset;
2722 offset += sizeof(sljit_f64);
2723 float_arg_count++;
2724 break;
2725 case SLJIT_ARG_TYPE_F32:
2726 *offset_ptr++ = (sljit_u8)offset;
2727 offset += sizeof(sljit_f32);
2728 float_arg_count++;
2729 break;
2730 default:
2731 *offset_ptr++ = (sljit_u8)offset;
2732 offset += sizeof(sljit_sw);
2733 word_arg_offset += sizeof(sljit_sw);
2734 break;
2735 }
2736
2737 arg_types >>= SLJIT_ARG_SHIFT;
2738 }
2739
2740 if (offset > 4 * sizeof(sljit_sw) && (!is_tail_call || offset > compiler->args_size)) {
2741 /* Keep lr register on the stack. */
2742 if (is_tail_call)
2743 offset += sizeof(sljit_sw);
2744
2745 offset = ((offset - 4 * sizeof(sljit_sw)) + 0x7) & ~(sljit_uw)0x7;
2746
2747 *extra_space = offset;
2748
2749 if (is_tail_call)
2750 FAIL_IF(emit_stack_frame_release(compiler, (sljit_s32)offset));
2751 else
2752 FAIL_IF(push_inst16(compiler, SUB_SP_I | (offset >> 2)));
2753 } else {
2754 if (is_tail_call)
2755 FAIL_IF(emit_stack_frame_release(compiler, -1));
2756 *extra_space = 0;
2757 }
2758
2759 SLJIT_ASSERT(reg_map[TMP_REG1] == 12);
2760
2761 /* Process arguments in reversed direction. */
2762 while (types) {
2763 switch (types & SLJIT_ARG_MASK) {
2764 case SLJIT_ARG_TYPE_F64:
2765 float_arg_count--;
2766 offset = *(--offset_ptr);
2767
2768 SLJIT_ASSERT((offset & 0x7) == 0);
2769
2770 if (offset < 4 * sizeof(sljit_sw)) {
2771 if (src_offset == offset || src_offset == offset + sizeof(sljit_sw)) {
2772 FAIL_IF(push_inst16(compiler, MOV | (src_offset << 1) | 4 | (1 << 7)));
2773 *src = TMP_REG1;
2774 }
2775 FAIL_IF(push_inst32(compiler, VMOV2 | 0x100000 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count));
2776 } else
2777 FAIL_IF(push_inst32(compiler, VSTR_F32 | 0x800100 | RN4(SLJIT_SP)
2778 | (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2)));
2779 break;
2780 case SLJIT_ARG_TYPE_F32:
2781 float_arg_count--;
2782 offset = *(--offset_ptr);
2783
2784 if (offset < 4 * sizeof(sljit_sw)) {
2785 if (src_offset == offset) {
2786 FAIL_IF(push_inst16(compiler, MOV | (src_offset << 1) | 4 | (1 << 7)));
2787 *src = TMP_REG1;
2788 }
2789 FAIL_IF(push_inst32(compiler, VMOV | 0x100000 | (float_arg_count << 16) | (offset << 10)));
2790 } else
2791 FAIL_IF(push_inst32(compiler, VSTR_F32 | 0x800000 | RN4(SLJIT_SP)
2792 | (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2)));
2793 break;
2794 default:
2795 word_arg_offset -= sizeof(sljit_sw);
2796 offset = *(--offset_ptr);
2797
2798 SLJIT_ASSERT(offset >= word_arg_offset);
2799
2800 if (offset != word_arg_offset) {
2801 if (offset < 4 * sizeof(sljit_sw)) {
2802 if (src_offset == offset) {
2803 FAIL_IF(push_inst16(compiler, MOV | (src_offset << 1) | 4 | (1 << 7)));
2804 *src = TMP_REG1;
2805 }
2806 else if (src_offset == word_arg_offset) {
2807 *src = (sljit_s32)(1 + (offset >> 2));
2808 src_offset = offset;
2809 }
2810 FAIL_IF(push_inst16(compiler, MOV | (offset >> 2) | (word_arg_offset << 1)));
2811 } else
2812 FAIL_IF(push_inst16(compiler, STR_SP | (word_arg_offset << 6) | ((offset - 4 * sizeof(sljit_sw)) >> 2)));
2813 }
2814 break;
2815 }
2816
2817 types >>= SLJIT_ARG_SHIFT;
2818 }
2819
2820 return SLJIT_SUCCESS;
2821 }
2822
softfloat_post_call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types)2823 static sljit_s32 softfloat_post_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)
2824 {
2825 if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F64)
2826 FAIL_IF(push_inst32(compiler, VMOV2 | (1 << 16) | (0 << 12) | 0));
2827 if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32)
2828 FAIL_IF(push_inst32(compiler, VMOV | (0 << 16) | (0 << 12)));
2829
2830 return SLJIT_SUCCESS;
2831 }
2832
2833 #else
2834
hardfloat_call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types)2835 static sljit_s32 hardfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)
2836 {
2837 sljit_u32 offset = SLJIT_FR0;
2838 sljit_u32 new_offset = SLJIT_FR0;
2839 sljit_u32 f32_offset = 0;
2840
2841 /* Remove return value. */
2842 arg_types >>= SLJIT_ARG_SHIFT;
2843
2844 while (arg_types) {
2845 switch (arg_types & SLJIT_ARG_MASK) {
2846 case SLJIT_ARG_TYPE_F64:
2847 if (offset != new_offset)
2848 FAIL_IF(push_inst32(compiler, VMOV_F32 | SLJIT_32 | VD4(new_offset) | VM4(offset)));
2849
2850 new_offset++;
2851 offset++;
2852 break;
2853 case SLJIT_ARG_TYPE_F32:
2854 if (f32_offset != 0) {
2855 FAIL_IF(push_inst32(compiler, VMOV_F32 | 0x400000 | VD4(f32_offset) | VM4(offset)));
2856 f32_offset = 0;
2857 } else {
2858 if (offset != new_offset)
2859 FAIL_IF(push_inst32(compiler, VMOV_F32 | 0x400000 | VD4(new_offset) | VM4(offset)));
2860 f32_offset = new_offset;
2861 new_offset++;
2862 }
2863 offset++;
2864 break;
2865 }
2866 arg_types >>= SLJIT_ARG_SHIFT;
2867 }
2868
2869 return SLJIT_SUCCESS;
2870 }
2871
2872 #endif
2873
sljit_emit_call(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types)2874 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
2875 sljit_s32 arg_types)
2876 {
2877 #ifdef __SOFTFP__
2878 struct sljit_jump *jump;
2879 sljit_u32 extra_space = (sljit_u32)type;
2880 #endif
2881
2882 CHECK_ERROR_PTR();
2883 CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
2884
2885 #ifdef __SOFTFP__
2886 if ((type & 0xff) != SLJIT_CALL_REG_ARG) {
2887 PTR_FAIL_IF(softfloat_call_with_args(compiler, arg_types, NULL, &extra_space));
2888 SLJIT_ASSERT((extra_space & 0x7) == 0);
2889
2890 if ((type & SLJIT_CALL_RETURN) && extra_space == 0)
2891 type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
2892
2893 SLJIT_SKIP_CHECKS(compiler);
2894 jump = sljit_emit_jump(compiler, type);
2895 PTR_FAIL_IF(jump == NULL);
2896
2897 if (extra_space > 0) {
2898 if (type & SLJIT_CALL_RETURN)
2899 PTR_FAIL_IF(push_inst32(compiler, LDR | RT4(TMP_REG2)
2900 | RN4(SLJIT_SP) | (extra_space - sizeof(sljit_sw))));
2901
2902 PTR_FAIL_IF(push_inst16(compiler, ADD_SP_I | (extra_space >> 2)));
2903
2904 if (type & SLJIT_CALL_RETURN) {
2905 PTR_FAIL_IF(push_inst16(compiler, BX | RN3(TMP_REG2)));
2906 return jump;
2907 }
2908 }
2909
2910 SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN));
2911 PTR_FAIL_IF(softfloat_post_call_with_args(compiler, arg_types));
2912 return jump;
2913 }
2914 #endif /* __SOFTFP__ */
2915
2916 if (type & SLJIT_CALL_RETURN) {
2917 /* ldmia sp!, {..., lr} */
2918 PTR_FAIL_IF(emit_stack_frame_release(compiler, -1));
2919 type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
2920 }
2921
2922 #ifndef __SOFTFP__
2923 if ((type & 0xff) != SLJIT_CALL_REG_ARG)
2924 PTR_FAIL_IF(hardfloat_call_with_args(compiler, arg_types));
2925 #endif /* !__SOFTFP__ */
2926
2927 SLJIT_SKIP_CHECKS(compiler);
2928 return sljit_emit_jump(compiler, type);
2929 }
2930
sljit_emit_ijump(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 src,sljit_sw srcw)2931 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
2932 {
2933 struct sljit_jump *jump;
2934
2935 CHECK_ERROR();
2936 CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
2937 ADJUST_LOCAL_OFFSET(src, srcw);
2938
2939 SLJIT_ASSERT(reg_map[TMP_REG1] != 14);
2940
2941 if (src != SLJIT_IMM) {
2942 if (FAST_IS_REG(src)) {
2943 SLJIT_ASSERT(reg_map[src] != 14);
2944 return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(src));
2945 }
2946
2947 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, src, srcw, TMP_REG1));
2948 if (type >= SLJIT_FAST_CALL)
2949 return push_inst16(compiler, BLX | RN3(TMP_REG1));
2950 }
2951
2952 /* These jumps are converted to jump/call instructions when possible. */
2953 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2954 FAIL_IF(!jump);
2955 set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0));
2956 jump->u.target = (sljit_uw)srcw;
2957
2958 jump->addr = compiler->size;
2959 /* Maximum number of instructions required for generating a constant. */
2960 compiler->size += JUMP_MAX_SIZE - 1;
2961 return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(TMP_REG1));
2962 }
2963
sljit_emit_icall(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)2964 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
2965 sljit_s32 arg_types,
2966 sljit_s32 src, sljit_sw srcw)
2967 {
2968 #ifdef __SOFTFP__
2969 sljit_u32 extra_space = (sljit_u32)type;
2970 #endif
2971
2972 CHECK_ERROR();
2973 CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
2974
2975 if (src & SLJIT_MEM) {
2976 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
2977 src = TMP_REG1;
2978 }
2979
2980 if ((type & SLJIT_CALL_RETURN) && (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options)))) {
2981 FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, src)));
2982 src = TMP_REG1;
2983 }
2984
2985 #ifdef __SOFTFP__
2986 if ((type & 0xff) != SLJIT_CALL_REG_ARG) {
2987 FAIL_IF(softfloat_call_with_args(compiler, arg_types, &src, &extra_space));
2988 SLJIT_ASSERT((extra_space & 0x7) == 0);
2989
2990 if ((type & SLJIT_CALL_RETURN) && extra_space == 0)
2991 type = SLJIT_JUMP;
2992
2993 SLJIT_SKIP_CHECKS(compiler);
2994 FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
2995
2996 if (extra_space > 0) {
2997 if (type & SLJIT_CALL_RETURN)
2998 FAIL_IF(push_inst32(compiler, LDR | RT4(TMP_REG2)
2999 | RN4(SLJIT_SP) | (extra_space - sizeof(sljit_sw))));
3000
3001 FAIL_IF(push_inst16(compiler, ADD_SP_I | (extra_space >> 2)));
3002
3003 if (type & SLJIT_CALL_RETURN)
3004 return push_inst16(compiler, BX | RN3(TMP_REG2));
3005 }
3006
3007 SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN));
3008 return softfloat_post_call_with_args(compiler, arg_types);
3009 }
3010 #endif /* __SOFTFP__ */
3011
3012 if (type & SLJIT_CALL_RETURN) {
3013 /* ldmia sp!, {..., lr} */
3014 FAIL_IF(emit_stack_frame_release(compiler, -1));
3015 type = SLJIT_JUMP;
3016 }
3017
3018 #ifndef __SOFTFP__
3019 if ((type & 0xff) != SLJIT_CALL_REG_ARG)
3020 FAIL_IF(hardfloat_call_with_args(compiler, arg_types));
3021 #endif /* !__SOFTFP__ */
3022
3023 SLJIT_SKIP_CHECKS(compiler);
3024 return sljit_emit_ijump(compiler, type, src, srcw);
3025 }
3026
3027 #ifdef __SOFTFP__
3028
emit_fmov_before_return(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)3029 static SLJIT_INLINE sljit_s32 emit_fmov_before_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
3030 {
3031 if (compiler->options & SLJIT_ENTER_REG_ARG) {
3032 if (src == SLJIT_FR0)
3033 return SLJIT_SUCCESS;
3034
3035 SLJIT_SKIP_CHECKS(compiler);
3036 return sljit_emit_fop1(compiler, op, SLJIT_RETURN_FREG, 0, src, srcw);
3037 }
3038
3039 if (FAST_IS_REG(src)) {
3040 if (op & SLJIT_32)
3041 return push_inst32(compiler, VMOV | (1 << 20) | VN4(src) | RT4(SLJIT_R0));
3042 return push_inst32(compiler, VMOV2 | (1 << 20) | VM4(src) | RT4(SLJIT_R0) | RN4(SLJIT_R1));
3043 }
3044
3045 SLJIT_SKIP_CHECKS(compiler);
3046
3047 if (op & SLJIT_32)
3048 return sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, src, srcw);
3049 return sljit_emit_mem(compiler, SLJIT_MOV, SLJIT_REG_PAIR(SLJIT_R0, SLJIT_R1), src, srcw);
3050 }
3051
3052 #endif /* __SOFTFP__ */
3053
sljit_emit_op_flags(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 type)3054 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
3055 sljit_s32 dst, sljit_sw dstw,
3056 sljit_s32 type)
3057 {
3058 sljit_s32 dst_r, flags = GET_ALL_FLAGS(op);
3059 sljit_ins cc;
3060
3061 CHECK_ERROR();
3062 CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
3063 ADJUST_LOCAL_OFFSET(dst, dstw);
3064
3065 op = GET_OPCODE(op);
3066 cc = get_cc(compiler, type);
3067 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
3068
3069 if (op < SLJIT_ADD) {
3070 FAIL_IF(push_inst16(compiler, IT | (cc << 4) | (((cc & 0x1) ^ 0x1) << 3) | 0x4));
3071 if (reg_map[dst_r] > 7) {
3072 FAIL_IF(push_inst32(compiler, MOV_WI | RD4(dst_r) | 1));
3073 FAIL_IF(push_inst32(compiler, MOV_WI | RD4(dst_r) | 0));
3074 } else {
3075 /* The movsi (immediate) instruction does not set flags in IT block. */
3076 FAIL_IF(push_inst16(compiler, MOVSI | RDN3(dst_r) | 1));
3077 FAIL_IF(push_inst16(compiler, MOVSI | RDN3(dst_r) | 0));
3078 }
3079 if (!(dst & SLJIT_MEM))
3080 return SLJIT_SUCCESS;
3081 return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG1, dst, dstw, TMP_REG2);
3082 }
3083
3084 if (dst & SLJIT_MEM)
3085 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2));
3086
3087 if (op == SLJIT_AND) {
3088 FAIL_IF(push_inst16(compiler, IT | (cc << 4) | (((cc & 0x1) ^ 0x1) << 3) | 0x4));
3089 FAIL_IF(push_inst32(compiler, ANDI | RN4(dst_r) | RD4(dst_r) | 1));
3090 FAIL_IF(push_inst32(compiler, ANDI | RN4(dst_r) | RD4(dst_r) | 0));
3091 }
3092 else {
3093 FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
3094 FAIL_IF(push_inst32(compiler, ((op == SLJIT_OR) ? ORRI : EORI) | RN4(dst_r) | RD4(dst_r) | 1));
3095 }
3096
3097 if (dst & SLJIT_MEM)
3098 FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG1, dst, dstw, TMP_REG2));
3099
3100 if (!(flags & SLJIT_SET_Z))
3101 return SLJIT_SUCCESS;
3102
3103 /* The condition must always be set, even if the ORR/EORI is not executed above. */
3104 return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(TMP_REG1) | RM4(dst_r));
3105 }
3106
sljit_emit_select(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_reg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2_reg)3107 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
3108 sljit_s32 dst_reg,
3109 sljit_s32 src1, sljit_sw src1w,
3110 sljit_s32 src2_reg)
3111 {
3112 sljit_uw cc, tmp;
3113
3114 CHECK_ERROR();
3115 CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
3116
3117 ADJUST_LOCAL_OFFSET(src1, src1w);
3118
3119 if (src2_reg != dst_reg && src1 == dst_reg) {
3120 src1 = src2_reg;
3121 src1w = 0;
3122 src2_reg = dst_reg;
3123 type ^= 0x1;
3124 }
3125
3126 if (src1 & SLJIT_MEM) {
3127 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, (src2_reg != dst_reg) ? dst_reg : TMP_REG1, src1, src1w, TMP_REG1));
3128
3129 if (src2_reg != dst_reg) {
3130 src1 = src2_reg;
3131 src1w = 0;
3132 type ^= 0x1;
3133 } else {
3134 src1 = TMP_REG1;
3135 src1w = 0;
3136 }
3137 } else if (dst_reg != src2_reg)
3138 FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(dst_reg, src2_reg)));
3139
3140 cc = get_cc(compiler, type & ~SLJIT_32);
3141
3142 if (src1 != SLJIT_IMM) {
3143 FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
3144 return push_inst16(compiler, MOV | SET_REGS44(dst_reg, src1));
3145 }
3146
3147 tmp = (sljit_uw)src1w;
3148
3149 if (tmp < 0x10000) {
3150 /* set low 16 bits, set hi 16 bits to 0. */
3151 FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
3152 return push_inst32(compiler, MOVW | RD4(dst_reg)
3153 | COPY_BITS(tmp, 12, 16, 4) | COPY_BITS(tmp, 11, 26, 1) | COPY_BITS(tmp, 8, 12, 3) | (tmp & 0xff));
3154 }
3155
3156 tmp = get_imm((sljit_uw)src1w);
3157 if (tmp != INVALID_IMM) {
3158 FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
3159 return push_inst32(compiler, MOV_WI | RD4(dst_reg) | tmp);
3160 }
3161
3162 tmp = get_imm(~(sljit_uw)src1w);
3163 if (tmp != INVALID_IMM) {
3164 FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
3165 return push_inst32(compiler, MVN_WI | RD4(dst_reg) | tmp);
3166 }
3167
3168 FAIL_IF(push_inst16(compiler, IT | (cc << 4) | ((cc & 0x1) << 3) | 0x4));
3169
3170 tmp = (sljit_uw)src1w;
3171 FAIL_IF(push_inst32(compiler, MOVW | RD4(dst_reg)
3172 | COPY_BITS(tmp, 12, 16, 4) | COPY_BITS(tmp, 11, 26, 1) | COPY_BITS(tmp, 8, 12, 3) | (tmp & 0xff)));
3173 return push_inst32(compiler, MOVT | RD4(dst_reg)
3174 | COPY_BITS(tmp, 12 + 16, 16, 4) | COPY_BITS(tmp, 11 + 16, 26, 1) | COPY_BITS(tmp, 8 + 16, 12, 3) | ((tmp & 0xff0000) >> 16));
3175 }
3176
sljit_emit_fselect(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_freg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2_freg)3177 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
3178 sljit_s32 dst_freg,
3179 sljit_s32 src1, sljit_sw src1w,
3180 sljit_s32 src2_freg)
3181 {
3182 CHECK_ERROR();
3183 CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
3184
3185 ADJUST_LOCAL_OFFSET(src1, src1w);
3186
3187 type ^= SLJIT_32;
3188
3189 if (dst_freg != src2_freg) {
3190 if (dst_freg == src1) {
3191 src1 = src2_freg;
3192 src1w = 0;
3193 type ^= 0x1;
3194 } else
3195 FAIL_IF(push_inst32(compiler, VMOV_F32 | (type & SLJIT_32) | VD4(dst_freg) | VM4(src2_freg)));
3196 }
3197
3198 if (src1 & SLJIT_MEM) {
3199 FAIL_IF(emit_fop_mem(compiler, (type & SLJIT_32) | FPU_LOAD, TMP_FREG2, src1, src1w));
3200 src1 = TMP_FREG2;
3201 }
3202
3203 FAIL_IF(push_inst16(compiler, IT | (get_cc(compiler, type & ~SLJIT_32) << 4) | 0x8));
3204 return push_inst32(compiler, VMOV_F32 | (type & SLJIT_32) | VD4(dst_freg) | VM4(src1));
3205 }
3206
sljit_emit_mem(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)3207 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
3208 sljit_s32 reg,
3209 sljit_s32 mem, sljit_sw memw)
3210 {
3211 sljit_s32 flags;
3212 sljit_uw imm, tmp;
3213
3214 CHECK_ERROR();
3215 CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
3216
3217 if (!(reg & REG_PAIR_MASK))
3218 return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
3219
3220 if (type & (SLJIT_MEM_UNALIGNED | SLJIT_MEM_ALIGNED_16 | SLJIT_MEM_ALIGNED_32)) {
3221 if ((mem & REG_MASK) == 0) {
3222 if ((memw & 0xfff) >= (0x1000 - SSIZE_OF(sw))) {
3223 imm = get_imm((sljit_uw)((memw + 0x1000) & ~0xfff));
3224
3225 if (imm != INVALID_IMM)
3226 memw = (memw & 0xfff) - 0x1000;
3227 } else {
3228 imm = get_imm((sljit_uw)(memw & ~0xfff));
3229
3230 if (imm != INVALID_IMM)
3231 memw &= 0xfff;
3232 }
3233
3234 if (imm == INVALID_IMM) {
3235 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3236 memw = 0;
3237 } else
3238 FAIL_IF(push_inst32(compiler, MOV_WI | RD4(TMP_REG1) | imm));
3239
3240 mem = SLJIT_MEM1(TMP_REG1);
3241 } else if (mem & OFFS_REG_MASK) {
3242 FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(mem & REG_MASK) | RM4(OFFS_REG(mem)) | ((sljit_uw)(memw & 0x3) << 6)));
3243 memw = 0;
3244 mem = SLJIT_MEM1(TMP_REG1);
3245 } else if (memw < -0xff) {
3246 /* Zero value can be included in the first case. */
3247 if ((-memw & 0xfff) <= SSIZE_OF(sw))
3248 tmp = (sljit_uw)((-memw + 0x7ff) & ~0x7ff);
3249 else
3250 tmp = (sljit_uw)((-memw + 0xfff) & ~0xfff);
3251
3252 SLJIT_ASSERT(tmp >= (sljit_uw)-memw);
3253 imm = get_imm(tmp);
3254
3255 if (imm != INVALID_IMM) {
3256 FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(mem & REG_MASK) | imm));
3257 memw += (sljit_sw)tmp;
3258 SLJIT_ASSERT(memw >= 0 && memw <= 0xfff - SSIZE_OF(sw));
3259 } else {
3260 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3261 FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, mem & REG_MASK)));
3262 memw = 0;
3263 }
3264
3265 mem = SLJIT_MEM1(TMP_REG1);
3266 } else if (memw >= (0x1000 - SSIZE_OF(sw))) {
3267 if ((memw & 0xfff) >= (0x1000 - SSIZE_OF(sw))) {
3268 imm = get_imm((sljit_uw)((memw + 0x1000) & ~0xfff));
3269
3270 if (imm != INVALID_IMM)
3271 memw = (memw & 0xfff) - 0x1000;
3272 } else {
3273 imm = get_imm((sljit_uw)(memw & ~0xfff));
3274
3275 if (imm != INVALID_IMM)
3276 memw &= 0xfff;
3277 }
3278
3279 if (imm != INVALID_IMM) {
3280 SLJIT_ASSERT(memw >= -0xff && memw <= 0xfff);
3281 FAIL_IF(push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(mem & REG_MASK) | imm));
3282 } else {
3283 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3284 FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, mem & REG_MASK)));
3285 memw = 0;
3286 }
3287
3288 mem = SLJIT_MEM1(TMP_REG1);
3289 }
3290
3291 flags = WORD_SIZE;
3292
3293 SLJIT_ASSERT(memw <= 0xfff - SSIZE_OF(sw) && memw >= -0xff);
3294
3295 if (type & SLJIT_MEM_STORE) {
3296 flags |= STORE;
3297 } else if (REG_PAIR_FIRST(reg) == (mem & REG_MASK)) {
3298 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, REG_PAIR_SECOND(reg), mem, memw + SSIZE_OF(sw), TMP_REG2));
3299 return emit_op_mem(compiler, WORD_SIZE, REG_PAIR_FIRST(reg), mem, memw, TMP_REG2);
3300 }
3301
3302 FAIL_IF(emit_op_mem(compiler, flags, REG_PAIR_FIRST(reg), mem, memw, TMP_REG2));
3303 return emit_op_mem(compiler, flags, REG_PAIR_SECOND(reg), mem, memw + SSIZE_OF(sw), TMP_REG2);
3304 }
3305
3306 flags = 1 << 23;
3307
3308 if ((mem & REG_MASK) == 0) {
3309 tmp = (sljit_uw)(memw & 0x7fc);
3310 imm = get_imm((sljit_uw)((memw + (tmp <= 0x400 ? 0 : 0x400)) & ~0x3fc));
3311
3312 if (imm == INVALID_IMM) {
3313 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3314 memw = 0;
3315 } else {
3316 FAIL_IF(push_inst32(compiler, MOV_WI | RD4(TMP_REG1) | imm));
3317 memw = (memw & 0x3fc) >> 2;
3318
3319 if (tmp > 0x400) {
3320 memw = 0x100 - memw;
3321 flags = 0;
3322 }
3323
3324 SLJIT_ASSERT(memw >= 0 && memw <= 0xff);
3325 }
3326
3327 mem = SLJIT_MEM1(TMP_REG1);
3328 } else if (mem & OFFS_REG_MASK) {
3329 FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(mem & REG_MASK) | RM4(OFFS_REG(mem)) | ((sljit_uw)(memw & 0x3) << 6)));
3330 memw = 0;
3331 mem = SLJIT_MEM1(TMP_REG1);
3332 } else if (memw < 0) {
3333 if ((-memw & ~0x3fc) == 0) {
3334 flags = 0;
3335 memw = -memw >> 2;
3336 } else {
3337 tmp = (sljit_uw)(-memw & 0x7fc);
3338 imm = get_imm((sljit_uw)((-memw + (tmp <= 0x400 ? 0 : 0x400)) & ~0x3fc));
3339
3340 if (imm != INVALID_IMM) {
3341 FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(mem & REG_MASK) | imm));
3342 memw = (-memw & 0x3fc) >> 2;
3343
3344 if (tmp <= 0x400)
3345 flags = 0;
3346 else
3347 memw = 0x100 - memw;
3348 } else {
3349 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3350 FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, mem & REG_MASK)));
3351 memw = 0;
3352 }
3353
3354 mem = SLJIT_MEM1(TMP_REG1);
3355 }
3356 } else if ((memw & ~0x3fc) != 0) {
3357 tmp = (sljit_uw)(memw & 0x7fc);
3358 imm = get_imm((sljit_uw)((memw + (tmp <= 0x400 ? 0 : 0x400)) & ~0x3fc));
3359
3360 if (imm != INVALID_IMM) {
3361 FAIL_IF(push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(mem & REG_MASK) | imm));
3362 memw = (memw & 0x3fc) >> 2;
3363
3364 if (tmp > 0x400) {
3365 memw = 0x100 - memw;
3366 flags = 0;
3367 }
3368 } else {
3369 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3370 FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, mem & REG_MASK)));
3371 memw = 0;
3372 }
3373
3374 mem = SLJIT_MEM1(TMP_REG1);
3375 } else
3376 memw >>= 2;
3377
3378 SLJIT_ASSERT(memw >= 0 && memw <= 0xff);
3379 return push_inst32(compiler, ((type & SLJIT_MEM_STORE) ? STRD : LDRD) | (sljit_ins)flags | RN4(mem & REG_MASK) | RT4(REG_PAIR_FIRST(reg)) | RD4(REG_PAIR_SECOND(reg)) | (sljit_ins)memw);
3380 }
3381
sljit_emit_mem_update(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)3382 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *compiler, sljit_s32 type,
3383 sljit_s32 reg,
3384 sljit_s32 mem, sljit_sw memw)
3385 {
3386 sljit_s32 flags;
3387 sljit_ins inst;
3388
3389 CHECK_ERROR();
3390 CHECK(check_sljit_emit_mem_update(compiler, type, reg, mem, memw));
3391
3392 if ((mem & OFFS_REG_MASK) || (memw > 255 || memw < -255))
3393 return SLJIT_ERR_UNSUPPORTED;
3394
3395 if (type & SLJIT_MEM_SUPP)
3396 return SLJIT_SUCCESS;
3397
3398 switch (type & 0xff) {
3399 case SLJIT_MOV:
3400 case SLJIT_MOV_U32:
3401 case SLJIT_MOV_S32:
3402 case SLJIT_MOV32:
3403 case SLJIT_MOV_P:
3404 flags = WORD_SIZE;
3405 break;
3406 case SLJIT_MOV_U8:
3407 flags = BYTE_SIZE;
3408 break;
3409 case SLJIT_MOV_S8:
3410 flags = BYTE_SIZE | SIGNED;
3411 break;
3412 case SLJIT_MOV_U16:
3413 flags = HALF_SIZE;
3414 break;
3415 case SLJIT_MOV_S16:
3416 flags = HALF_SIZE | SIGNED;
3417 break;
3418 default:
3419 SLJIT_UNREACHABLE();
3420 flags = WORD_SIZE;
3421 break;
3422 }
3423
3424 if (type & SLJIT_MEM_STORE)
3425 flags |= STORE;
3426
3427 inst = sljit_mem32[flags] | 0x900;
3428
3429 if (!(type & SLJIT_MEM_POST))
3430 inst |= 0x400;
3431
3432 if (memw >= 0)
3433 inst |= 0x200;
3434 else
3435 memw = -memw;
3436
3437 return push_inst32(compiler, inst | RT4(reg) | RN4(mem & REG_MASK) | (sljit_ins)memw);
3438 }
3439
update_mem_addr(struct sljit_compiler * compiler,sljit_s32 * mem,sljit_sw * memw,sljit_s32 max_offset)3440 static sljit_s32 update_mem_addr(struct sljit_compiler *compiler, sljit_s32 *mem, sljit_sw *memw, sljit_s32 max_offset)
3441 {
3442 sljit_s32 arg = *mem;
3443 sljit_sw argw = *memw;
3444 sljit_uw imm;
3445
3446 *mem = TMP_REG1;
3447
3448 if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
3449 *memw = 0;
3450 return push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(arg & REG_MASK) | RM4(OFFS_REG(arg)) | ((sljit_uw)(argw & 0x3) << 6));
3451 }
3452
3453 arg &= REG_MASK;
3454
3455 if (arg) {
3456 if (argw <= max_offset && argw >= -0xff) {
3457 *mem = arg;
3458 return SLJIT_SUCCESS;
3459 }
3460
3461 if (argw < 0) {
3462 imm = get_imm((sljit_uw)(-argw & ~0xff));
3463
3464 if (imm) {
3465 *memw = -(-argw & 0xff);
3466 return push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(arg) | imm);
3467 }
3468 } else if ((argw & 0xfff) <= max_offset) {
3469 imm = get_imm((sljit_uw)(argw & ~0xfff));
3470
3471 if (imm) {
3472 *memw = argw & 0xfff;
3473 return push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(arg) | imm);
3474 }
3475 } else {
3476 imm = get_imm((sljit_uw)((argw | 0xfff) + 1));
3477
3478 if (imm) {
3479 *memw = (argw & 0xfff) - 0x1000;
3480 return push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(arg) | imm);
3481 }
3482 }
3483 }
3484
3485 imm = (sljit_uw)(argw & ~0xfff);
3486
3487 if ((argw & 0xfff) > max_offset) {
3488 imm += 0x1000;
3489 *memw = (argw & 0xfff) - 0x1000;
3490 } else
3491 *memw = argw & 0xfff;
3492
3493 FAIL_IF(load_immediate(compiler, TMP_REG1, imm));
3494
3495 if (arg == 0)
3496 return SLJIT_SUCCESS;
3497
3498 return push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, arg));
3499 }
3500
sljit_emit_fmem(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 mem,sljit_sw memw)3501 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type,
3502 sljit_s32 freg,
3503 sljit_s32 mem, sljit_sw memw)
3504 {
3505 CHECK_ERROR();
3506 CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw));
3507
3508 if (type & SLJIT_MEM_ALIGNED_32)
3509 return emit_fop_mem(compiler, ((type ^ SLJIT_32) & SLJIT_32) | ((type & SLJIT_MEM_STORE) ? 0 : FPU_LOAD), freg, mem, memw);
3510
3511 if (type & SLJIT_MEM_STORE) {
3512 FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | VN4(freg) | RT4(TMP_REG2)));
3513
3514 if (type & SLJIT_32)
3515 return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, mem, memw, TMP_REG1);
3516
3517 FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4));
3518 mem |= SLJIT_MEM;
3519
3520 FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, mem, memw, TMP_REG1));
3521 FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | VN4(freg) | 0x80 | RT4(TMP_REG2)));
3522 return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, mem, memw + 4, TMP_REG1);
3523 }
3524
3525 if (type & SLJIT_32) {
3526 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1));
3527 return push_inst32(compiler, VMOV | VN4(freg) | RT4(TMP_REG2));
3528 }
3529
3530 FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4));
3531 mem |= SLJIT_MEM;
3532
3533 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1));
3534 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, mem, memw + 4, TMP_REG1));
3535 return push_inst32(compiler, VMOV2 | VM4(freg) | RT4(TMP_REG2) | RN4(TMP_REG1));
3536 }
3537
sljit_emit_simd_mem_offset(struct sljit_compiler * compiler,sljit_s32 * mem_ptr,sljit_sw memw)3538 static sljit_s32 sljit_emit_simd_mem_offset(struct sljit_compiler *compiler, sljit_s32 *mem_ptr, sljit_sw memw)
3539 {
3540 sljit_uw imm;
3541 sljit_s32 mem = *mem_ptr;
3542
3543 if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
3544 *mem_ptr = TMP_REG1;
3545 return push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(mem & REG_MASK) | RM4(OFFS_REG(mem)) | ((sljit_uw)(memw & 0x3) << 6));
3546 }
3547
3548 if (SLJIT_UNLIKELY(!(mem & REG_MASK))) {
3549 *mem_ptr = TMP_REG1;
3550 return load_immediate(compiler, TMP_REG1, (sljit_uw)memw);
3551 }
3552
3553 mem &= REG_MASK;
3554
3555 if (memw == 0) {
3556 *mem_ptr = mem;
3557 return SLJIT_SUCCESS;
3558 }
3559
3560 *mem_ptr = TMP_REG1;
3561 imm = get_imm((sljit_uw)(memw < 0 ? -memw : memw));
3562
3563 if (imm != INVALID_IMM)
3564 return push_inst32(compiler, ((memw < 0) ? SUB_WI : ADD_WI) | RD4(TMP_REG1) | RN4(mem) | imm);
3565
3566 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3567 return push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, mem));
3568 }
3569
simd_get_quad_reg_index(sljit_s32 freg)3570 static SLJIT_INLINE sljit_s32 simd_get_quad_reg_index(sljit_s32 freg)
3571 {
3572 freg += freg & 0x1;
3573
3574 SLJIT_ASSERT((freg_map[freg] & 0x1) == (freg <= SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS));
3575
3576 if (freg <= SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS)
3577 freg--;
3578
3579 return freg;
3580 }
3581
3582 #define SLJIT_QUAD_OTHER_HALF(freg) ((((freg) & 0x1) << 1) - 1)
3583
sljit_emit_simd_mov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 srcdst,sljit_sw srcdstw)3584 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
3585 sljit_s32 freg,
3586 sljit_s32 srcdst, sljit_sw srcdstw)
3587 {
3588 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3589 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3590 sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type);
3591 sljit_ins ins;
3592
3593 CHECK_ERROR();
3594 CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw));
3595
3596 ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3597
3598 if (reg_size != 3 && reg_size != 4)
3599 return SLJIT_ERR_UNSUPPORTED;
3600
3601 if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3602 return SLJIT_ERR_UNSUPPORTED;
3603
3604 if (type & SLJIT_SIMD_TEST)
3605 return SLJIT_SUCCESS;
3606
3607 if (reg_size == 4)
3608 freg = simd_get_quad_reg_index(freg);
3609
3610 if (!(srcdst & SLJIT_MEM)) {
3611 if (reg_size == 4)
3612 srcdst = simd_get_quad_reg_index(srcdst);
3613
3614 if (type & SLJIT_SIMD_STORE)
3615 ins = VD4(srcdst) | VN4(freg) | VM4(freg);
3616 else
3617 ins = VD4(freg) | VN4(srcdst) | VM4(srcdst);
3618
3619 if (reg_size == 4)
3620 ins |= (sljit_ins)1 << 6;
3621
3622 return push_inst32(compiler, VORR | ins);
3623 }
3624
3625 FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
3626
3627 if (elem_size > 3)
3628 elem_size = 3;
3629
3630 ins = ((type & SLJIT_SIMD_STORE) ? VST1 : VLD1) | VD4(freg)
3631 | (sljit_ins)((reg_size == 3) ? (0x7 << 8) : (0xa << 8));
3632
3633 SLJIT_ASSERT(reg_size >= alignment);
3634
3635 if (alignment == 3)
3636 ins |= 0x10;
3637 else if (alignment >= 4)
3638 ins |= 0x20;
3639
3640 return push_inst32(compiler, ins | RN4(srcdst) | ((sljit_ins)elem_size) << 6 | 0xf);
3641 }
3642
simd_get_imm(sljit_s32 elem_size,sljit_uw value)3643 static sljit_ins simd_get_imm(sljit_s32 elem_size, sljit_uw value)
3644 {
3645 sljit_ins result;
3646
3647 if (elem_size > 1 && (sljit_u16)value == (value >> 16)) {
3648 elem_size = 1;
3649 value = (sljit_u16)value;
3650 }
3651
3652 if (elem_size == 1 && (sljit_u8)value == (value >> 8)) {
3653 elem_size = 0;
3654 value = (sljit_u8)value;
3655 }
3656
3657 switch (elem_size) {
3658 case 0:
3659 SLJIT_ASSERT(value <= 0xff);
3660 result = 0xe00;
3661 break;
3662 case 1:
3663 SLJIT_ASSERT(value <= 0xffff);
3664 result = 0;
3665
3666 while (1) {
3667 if (value <= 0xff) {
3668 result |= 0x800;
3669 break;
3670 }
3671
3672 if ((value & 0xff) == 0) {
3673 value >>= 8;
3674 result |= 0xa00;
3675 break;
3676 }
3677
3678 if (result != 0)
3679 return ~(sljit_ins)0;
3680
3681 value ^= (sljit_uw)0xffff;
3682 result = (1 << 5);
3683 }
3684 break;
3685 default:
3686 SLJIT_ASSERT(value <= 0xffffffff);
3687 result = 0;
3688
3689 while (1) {
3690 if (value <= 0xff) {
3691 result |= 0x000;
3692 break;
3693 }
3694
3695 if ((value & ~(sljit_uw)0xff00) == 0) {
3696 value >>= 8;
3697 result |= 0x200;
3698 break;
3699 }
3700
3701 if ((value & ~(sljit_uw)0xff0000) == 0) {
3702 value >>= 16;
3703 result |= 0x400;
3704 break;
3705 }
3706
3707 if ((value & ~(sljit_uw)0xff000000) == 0) {
3708 value >>= 24;
3709 result |= 0x600;
3710 break;
3711 }
3712
3713 if ((value & (sljit_uw)0xff) == 0xff && (value >> 16) == 0) {
3714 value >>= 8;
3715 result |= 0xc00;
3716 break;
3717 }
3718
3719 if ((value & (sljit_uw)0xffff) == 0xffff && (value >> 24) == 0) {
3720 value >>= 16;
3721 result |= 0xd00;
3722 break;
3723 }
3724
3725 if (result != 0)
3726 return ~(sljit_ins)0;
3727
3728 value = ~value;
3729 result = (1 << 5);
3730 }
3731 break;
3732 }
3733
3734 return ((sljit_ins)value & 0xf) | (((sljit_ins)value & 0x70) << 12) | (((sljit_ins)value & 0x80) << 21) | result;
3735 }
3736
sljit_emit_simd_replicate(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_sw srcw)3737 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
3738 sljit_s32 freg,
3739 sljit_s32 src, sljit_sw srcw)
3740 {
3741 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3742 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3743 sljit_ins ins, imm;
3744
3745 CHECK_ERROR();
3746 CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw));
3747
3748 ADJUST_LOCAL_OFFSET(src, srcw);
3749
3750 if (reg_size != 3 && reg_size != 4)
3751 return SLJIT_ERR_UNSUPPORTED;
3752
3753 if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2))
3754 return SLJIT_ERR_UNSUPPORTED;
3755
3756 if (type & SLJIT_SIMD_TEST)
3757 return SLJIT_SUCCESS;
3758
3759 if (reg_size == 4)
3760 freg = simd_get_quad_reg_index(freg);
3761
3762 if (src == SLJIT_IMM && srcw == 0)
3763 return push_inst32(compiler, VMOV_i | ((reg_size == 4) ? (1 << 6) : 0) | VD4(freg));
3764
3765 if (SLJIT_UNLIKELY(elem_size == 3)) {
3766 SLJIT_ASSERT(type & SLJIT_SIMD_FLOAT);
3767
3768 if (src & SLJIT_MEM) {
3769 FAIL_IF(emit_fop_mem(compiler, FPU_LOAD | SLJIT_32, freg, src, srcw));
3770 src = freg;
3771 } else if (freg != src)
3772 FAIL_IF(push_inst32(compiler, VORR | VD4(freg) | VN4(src) | VM4(src)));
3773
3774 freg += SLJIT_QUAD_OTHER_HALF(freg);
3775
3776 if (freg != src)
3777 return push_inst32(compiler, VORR | VD4(freg) | VN4(src) | VM4(src));
3778 return SLJIT_SUCCESS;
3779 }
3780
3781 if (src & SLJIT_MEM) {
3782 FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
3783
3784 ins = (sljit_ins)(elem_size << 6);
3785
3786 if (reg_size == 4)
3787 ins |= 1 << 5;
3788
3789 return push_inst32(compiler, VLD1_r | ins | VD4(freg) | RN4(src) | 0xf);
3790 }
3791
3792 if (type & SLJIT_SIMD_FLOAT) {
3793 SLJIT_ASSERT(elem_size == 2);
3794 ins = ((sljit_ins)freg_ebit_map[src] << (16 + 2 + 1)) | ((sljit_ins)1 << (16 + 2));
3795
3796 if (reg_size == 4)
3797 ins |= (sljit_ins)1 << 6;
3798
3799 return push_inst32(compiler, VDUP_s | ins | VD4(freg) | (sljit_ins)freg_map[src]);
3800 }
3801
3802 if (src == SLJIT_IMM) {
3803 if (elem_size < 2)
3804 srcw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1;
3805
3806 imm = simd_get_imm(elem_size, (sljit_uw)srcw);
3807
3808 if (imm != ~(sljit_ins)0) {
3809 if (reg_size == 4)
3810 imm |= (sljit_ins)1 << 6;
3811
3812 return push_inst32(compiler, VMOV_i | imm | VD4(freg));
3813 }
3814
3815 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw));
3816 src = TMP_REG1;
3817 }
3818
3819 switch (elem_size) {
3820 case 0:
3821 ins = 1 << 22;
3822 break;
3823 case 1:
3824 ins = 1 << 5;
3825 break;
3826 default:
3827 ins = 0;
3828 break;
3829 }
3830
3831 if (reg_size == 4)
3832 ins |= (sljit_ins)1 << 21;
3833
3834 return push_inst32(compiler, VDUP | ins | VN4(freg) | RT4(src));
3835 }
3836
sljit_emit_simd_lane_mov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 lane_index,sljit_s32 srcdst,sljit_sw srcdstw)3837 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
3838 sljit_s32 freg, sljit_s32 lane_index,
3839 sljit_s32 srcdst, sljit_sw srcdstw)
3840 {
3841 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3842 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3843 sljit_ins ins;
3844
3845 CHECK_ERROR();
3846 CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw));
3847
3848 ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3849
3850 if (reg_size != 3 && reg_size != 4)
3851 return SLJIT_ERR_UNSUPPORTED;
3852
3853 if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2))
3854 return SLJIT_ERR_UNSUPPORTED;
3855
3856 if (type & SLJIT_SIMD_TEST)
3857 return SLJIT_SUCCESS;
3858
3859 if (reg_size == 4)
3860 freg = simd_get_quad_reg_index(freg);
3861
3862 if (type & SLJIT_SIMD_LANE_ZERO) {
3863 ins = (reg_size == 3) ? 0 : ((sljit_ins)1 << 6);
3864
3865 if (type & SLJIT_SIMD_FLOAT) {
3866 if (elem_size == 3 && !(srcdst & SLJIT_MEM)) {
3867 if (lane_index == 1)
3868 freg += SLJIT_QUAD_OTHER_HALF(freg);
3869
3870 if (srcdst != freg)
3871 FAIL_IF(push_inst32(compiler, VORR | VD4(freg) | VN4(srcdst) | VM4(srcdst)));
3872
3873 freg += SLJIT_QUAD_OTHER_HALF(freg);
3874 return push_inst32(compiler, VMOV_i | VD4(freg));
3875 }
3876
3877 if (srcdst == freg || (elem_size == 3 && srcdst == (freg + SLJIT_QUAD_OTHER_HALF(freg)))) {
3878 FAIL_IF(push_inst32(compiler, VORR | ins | VD4(TMP_FREG2) | VN4(freg) | VM4(freg)));
3879 srcdst = TMP_FREG2;
3880 srcdstw = 0;
3881 }
3882 }
3883
3884 FAIL_IF(push_inst32(compiler, VMOV_i | ins | VD4(freg)));
3885 }
3886
3887 if (reg_size == 4 && lane_index >= (0x8 >> elem_size)) {
3888 lane_index -= (0x8 >> elem_size);
3889 freg += SLJIT_QUAD_OTHER_HALF(freg);
3890 }
3891
3892 if (srcdst & SLJIT_MEM) {
3893 if (elem_size == 3)
3894 return emit_fop_mem(compiler, ((type & SLJIT_SIMD_STORE) ? 0 : FPU_LOAD) | SLJIT_32, freg, srcdst, srcdstw);
3895
3896 FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
3897
3898 lane_index = lane_index << elem_size;
3899 ins = (sljit_ins)((elem_size << 10) | (lane_index << 5));
3900 return push_inst32(compiler, ((type & SLJIT_SIMD_STORE) ? VST1_s : VLD1_s) | ins | VD4(freg) | RN4(srcdst) | 0xf);
3901 }
3902
3903 if (type & SLJIT_SIMD_FLOAT) {
3904 if (elem_size == 3) {
3905 if (type & SLJIT_SIMD_STORE)
3906 return push_inst32(compiler, VORR | VD4(srcdst) | VN4(freg) | VM4(freg));
3907 return push_inst32(compiler, VMOV_F32 | SLJIT_32 | VD4(freg) | VM4(srcdst));
3908 }
3909
3910 if (type & SLJIT_SIMD_STORE) {
3911 if (freg_ebit_map[freg] == 0) {
3912 if (lane_index == 1)
3913 freg = SLJIT_F64_SECOND(freg);
3914
3915 return push_inst32(compiler, VMOV_F32 | VD4(srcdst) | VM4(freg));
3916 }
3917
3918 FAIL_IF(push_inst32(compiler, VMOV_s | (1 << 20) | ((sljit_ins)lane_index << 21) | VN4(freg) | RT4(TMP_REG1)));
3919 return push_inst32(compiler, VMOV | VN4(srcdst) | RT4(TMP_REG1));
3920 }
3921
3922 FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | VN4(srcdst) | RT4(TMP_REG1)));
3923 return push_inst32(compiler, VMOV_s | ((sljit_ins)lane_index << 21) | VN4(freg) | RT4(TMP_REG1));
3924 }
3925
3926 if (srcdst == SLJIT_IMM) {
3927 if (elem_size < 2)
3928 srcdstw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1;
3929
3930 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcdstw));
3931 srcdst = TMP_REG1;
3932 }
3933
3934 if (elem_size == 0)
3935 ins = 0x400000;
3936 else if (elem_size == 1)
3937 ins = 0x20;
3938 else
3939 ins = 0;
3940
3941 lane_index = lane_index << elem_size;
3942 ins |= (sljit_ins)(((lane_index & 0x4) << 19) | ((lane_index & 0x3) << 5));
3943
3944 if (type & SLJIT_SIMD_STORE) {
3945 ins |= (1 << 20);
3946
3947 if (elem_size < 2 && !(type & SLJIT_SIMD_LANE_SIGNED))
3948 ins |= (1 << 23);
3949 }
3950
3951 return push_inst32(compiler, VMOV_s | ins | VN4(freg) | RT4(srcdst));
3952 }
3953
sljit_emit_simd_lane_replicate(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_s32 src_lane_index)3954 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
3955 sljit_s32 freg,
3956 sljit_s32 src, sljit_s32 src_lane_index)
3957 {
3958 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3959 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3960 sljit_ins ins;
3961
3962 CHECK_ERROR();
3963 CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index));
3964
3965 if (reg_size != 3 && reg_size != 4)
3966 return SLJIT_ERR_UNSUPPORTED;
3967
3968 if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3969 return SLJIT_ERR_UNSUPPORTED;
3970
3971 if (type & SLJIT_SIMD_TEST)
3972 return SLJIT_SUCCESS;
3973
3974 if (reg_size == 4) {
3975 freg = simd_get_quad_reg_index(freg);
3976 src = simd_get_quad_reg_index(src);
3977
3978 if (src_lane_index >= (0x8 >> elem_size)) {
3979 src_lane_index -= (0x8 >> elem_size);
3980 src += SLJIT_QUAD_OTHER_HALF(src);
3981 }
3982 }
3983
3984 if (elem_size == 3) {
3985 if (freg != src)
3986 FAIL_IF(push_inst32(compiler, VORR | VD4(freg) | VN4(src) | VM4(src)));
3987
3988 freg += SLJIT_QUAD_OTHER_HALF(freg);
3989
3990 if (freg != src)
3991 return push_inst32(compiler, VORR | VD4(freg) | VN4(src) | VM4(src));
3992 return SLJIT_SUCCESS;
3993 }
3994
3995 ins = ((((sljit_ins)src_lane_index << 1) | 1) << (16 + elem_size));
3996
3997 if (reg_size == 4)
3998 ins |= (sljit_ins)1 << 6;
3999
4000 return push_inst32(compiler, VDUP_s | ins | VD4(freg) | VM4(src));
4001 }
4002
sljit_emit_simd_extend(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_sw srcw)4003 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
4004 sljit_s32 freg,
4005 sljit_s32 src, sljit_sw srcw)
4006 {
4007 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4008 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4009 sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
4010 sljit_s32 dst_reg;
4011
4012 CHECK_ERROR();
4013 CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw));
4014
4015 ADJUST_LOCAL_OFFSET(src, srcw);
4016
4017 if (reg_size != 3 && reg_size != 4)
4018 return SLJIT_ERR_UNSUPPORTED;
4019
4020 if ((type & SLJIT_SIMD_FLOAT) && (elem_size != 2 || elem2_size != 3))
4021 return SLJIT_ERR_UNSUPPORTED;
4022
4023 if (type & SLJIT_SIMD_TEST)
4024 return SLJIT_SUCCESS;
4025
4026 if (reg_size == 4)
4027 freg = simd_get_quad_reg_index(freg);
4028
4029 if (src & SLJIT_MEM) {
4030 FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
4031 if (reg_size == 4 && elem2_size - elem_size == 1)
4032 FAIL_IF(push_inst32(compiler, VLD1 | (0x7 << 8) | VD4(freg) | RN4(src) | 0xf));
4033 else
4034 FAIL_IF(push_inst32(compiler, VLD1_s | (sljit_ins)((reg_size - elem2_size + elem_size) << 10) | VD4(freg) | RN4(src) | 0xf));
4035 src = freg;
4036 } else if (reg_size == 4)
4037 src = simd_get_quad_reg_index(src);
4038
4039 if (!(type & SLJIT_SIMD_FLOAT)) {
4040 dst_reg = (reg_size == 4) ? freg : TMP_FREG2;
4041
4042 do {
4043 FAIL_IF(push_inst32(compiler, VSHLL | ((type & SLJIT_SIMD_EXTEND_SIGNED) ? 0 : (1 << 28))
4044 | ((sljit_ins)1 << (19 + elem_size)) | VD4(dst_reg) | VM4(src)));
4045 src = dst_reg;
4046 } while (++elem_size < elem2_size);
4047
4048 if (dst_reg == TMP_FREG2)
4049 return push_inst32(compiler, VORR | VD4(freg) | VN4(TMP_FREG2) | VM4(TMP_FREG2));
4050 return SLJIT_SUCCESS;
4051 }
4052
4053 /* No SIMD variant, must use VFP instead. */
4054 SLJIT_ASSERT(reg_size == 4);
4055
4056 if (freg == src) {
4057 freg += SLJIT_QUAD_OTHER_HALF(freg);
4058 FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | VD4(freg) | VM4(src) | 0x20));
4059 freg += SLJIT_QUAD_OTHER_HALF(freg);
4060 return push_inst32(compiler, VCVT_F64_F32 | VD4(freg) | VM4(src));
4061 }
4062
4063 FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | VD4(freg) | VM4(src)));
4064 freg += SLJIT_QUAD_OTHER_HALF(freg);
4065 return push_inst32(compiler, VCVT_F64_F32 | VD4(freg) | VM4(src) | 0x20);
4066 }
4067
sljit_emit_simd_sign(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 dst,sljit_sw dstw)4068 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
4069 sljit_s32 freg,
4070 sljit_s32 dst, sljit_sw dstw)
4071 {
4072 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4073 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4074 sljit_ins ins, imms;
4075 sljit_s32 dst_r;
4076
4077 CHECK_ERROR();
4078 CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw));
4079
4080 ADJUST_LOCAL_OFFSET(dst, dstw);
4081
4082 if (reg_size != 3 && reg_size != 4)
4083 return SLJIT_ERR_UNSUPPORTED;
4084
4085 if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4086 return SLJIT_ERR_UNSUPPORTED;
4087
4088 if (type & SLJIT_SIMD_TEST)
4089 return SLJIT_SUCCESS;
4090
4091 switch (elem_size) {
4092 case 0:
4093 imms = 0x243219;
4094 ins = VSHR | (1 << 28) | (0x9 << 16);
4095 break;
4096 case 1:
4097 imms = (reg_size == 4) ? 0x243219 : 0x2231;
4098 ins = VSHR | (1 << 28) | (0x11 << 16);
4099 break;
4100 case 2:
4101 imms = (reg_size == 4) ? 0x2231 : 0x21;
4102 ins = VSHR | (1 << 28) | (0x21 << 16);
4103 break;
4104 default:
4105 imms = 0x21;
4106 ins = VSHR | (1 << 28) | (0x1 << 16) | (1 << 7);
4107 break;
4108 }
4109
4110 if (reg_size == 4) {
4111 freg = simd_get_quad_reg_index(freg);
4112 ins |= (sljit_ins)1 << 6;
4113 }
4114
4115 SLJIT_ASSERT((freg_map[TMP_FREG2] & 0x1) == 0);
4116 FAIL_IF(push_inst32(compiler, ins | VD4(TMP_FREG2) | VM4(freg)));
4117
4118 if (reg_size == 4 && elem_size > 0)
4119 FAIL_IF(push_inst32(compiler, VMOVN | ((sljit_ins)(elem_size - 1) << 18) | VD4(TMP_FREG2) | VM4(TMP_FREG2)));
4120
4121 ins = (reg_size == 4 && elem_size == 0) ? (1 << 6) : 0;
4122
4123 while (imms >= 0x100) {
4124 FAIL_IF(push_inst32(compiler, VSRA | (1 << 28) | ins | ((imms & 0xff) << 16) | VD4(TMP_FREG2) | VM4(TMP_FREG2)));
4125 imms >>= 8;
4126 }
4127
4128 FAIL_IF(push_inst32(compiler, VSRA | (1 << 28) | ins | (1 << 7) | (imms << 16) | VD4(TMP_FREG2) | VM4(TMP_FREG2)));
4129
4130 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
4131 FAIL_IF(push_inst32(compiler, VMOV_s | (1 << 20) | (1 << 23) | (0x2 << 21) | RT4(dst_r) | VN4(TMP_FREG2)));
4132
4133 if (reg_size == 4 && elem_size == 0) {
4134 SLJIT_ASSERT(freg_map[TMP_FREG2] + 1 == freg_map[TMP_FREG1]);
4135 FAIL_IF(push_inst32(compiler, VMOV_s | (1 << 20) | (1 << 23) | (0x2 << 21) | RT4(TMP_REG2)| VN4(TMP_FREG1)));
4136 FAIL_IF(push_inst32(compiler, ORR_W | RD4(dst_r) | RN4(dst_r) | RM4(TMP_REG2) | (0x2 << 12)));
4137 }
4138
4139 if (dst_r == TMP_REG1)
4140 return emit_op_mem(compiler, STORE | WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2);
4141
4142 return SLJIT_SUCCESS;
4143 }
4144
sljit_emit_simd_op2(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_freg,sljit_s32 src1_freg,sljit_s32 src2_freg)4145 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
4146 sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg)
4147 {
4148 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4149 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4150 sljit_ins ins = 0;
4151
4152 CHECK_ERROR();
4153 CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg));
4154
4155 if (reg_size != 3 && reg_size != 4)
4156 return SLJIT_ERR_UNSUPPORTED;
4157
4158 if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4159 return SLJIT_ERR_UNSUPPORTED;
4160
4161 switch (SLJIT_SIMD_GET_OPCODE(type)) {
4162 case SLJIT_SIMD_OP2_AND:
4163 ins = VAND;
4164 break;
4165 case SLJIT_SIMD_OP2_OR:
4166 ins = VORR;
4167 break;
4168 case SLJIT_SIMD_OP2_XOR:
4169 ins = VEOR;
4170 break;
4171 }
4172
4173 if (type & SLJIT_SIMD_TEST)
4174 return SLJIT_SUCCESS;
4175
4176 if (reg_size == 4) {
4177 dst_freg = simd_get_quad_reg_index(dst_freg);
4178 src1_freg = simd_get_quad_reg_index(src1_freg);
4179 src2_freg = simd_get_quad_reg_index(src2_freg);
4180 ins |= (sljit_ins)1 << 6;
4181 }
4182
4183 return push_inst32(compiler, ins | VD4(dst_freg) | VN4(src1_freg) | VM4(src2_freg));
4184 }
4185
4186 #undef FPU_LOAD
4187
sljit_emit_atomic_load(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 mem_reg)4188 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
4189 sljit_s32 dst_reg,
4190 sljit_s32 mem_reg)
4191 {
4192 sljit_ins ins;
4193
4194 CHECK_ERROR();
4195 CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
4196
4197 switch (GET_OPCODE(op)) {
4198 case SLJIT_MOV_U8:
4199 ins = LDREXB;
4200 break;
4201 case SLJIT_MOV_U16:
4202 ins = LDREXH;
4203 break;
4204 default:
4205 ins = LDREX;
4206 break;
4207 }
4208
4209 return push_inst32(compiler, ins | RN4(mem_reg) | RT4(dst_reg));
4210 }
4211
sljit_emit_atomic_store(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src_reg,sljit_s32 mem_reg,sljit_s32 temp_reg)4212 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op,
4213 sljit_s32 src_reg,
4214 sljit_s32 mem_reg,
4215 sljit_s32 temp_reg)
4216 {
4217 sljit_ins ins;
4218
4219 /* temp_reg == mem_reg is undefined so use another temp register */
4220 SLJIT_UNUSED_ARG(temp_reg);
4221
4222 CHECK_ERROR();
4223 CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
4224
4225 switch (GET_OPCODE(op)) {
4226 case SLJIT_MOV_U8:
4227 ins = STREXB | RM4(TMP_REG1);
4228 break;
4229 case SLJIT_MOV_U16:
4230 ins = STREXH | RM4(TMP_REG1);
4231 break;
4232 default:
4233 ins = STREX | RD4(TMP_REG1);
4234 break;
4235 }
4236
4237 FAIL_IF(push_inst32(compiler, ins | RN4(mem_reg) | RT4(src_reg)));
4238 if (op & SLJIT_SET_ATOMIC_STORED)
4239 return push_inst32(compiler, CMPI_W | RN4(TMP_REG1));
4240
4241 return SLJIT_SUCCESS;
4242 }
4243
sljit_emit_const(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_sw init_value)4244 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
4245 {
4246 struct sljit_const *const_;
4247 sljit_s32 dst_r;
4248
4249 CHECK_ERROR_PTR();
4250 CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
4251 ADJUST_LOCAL_OFFSET(dst, dstw);
4252
4253 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
4254 PTR_FAIL_IF(!const_);
4255 set_const(const_, compiler);
4256
4257 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
4258 PTR_FAIL_IF(emit_imm32_const(compiler, dst_r, (sljit_uw)init_value));
4259
4260 if (dst & SLJIT_MEM)
4261 PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2));
4262 return const_;
4263 }
4264
sljit_emit_mov_addr(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)4265 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
4266 {
4267 struct sljit_jump *jump;
4268 sljit_s32 dst_r;
4269
4270 CHECK_ERROR_PTR();
4271 CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw));
4272 ADJUST_LOCAL_OFFSET(dst, dstw);
4273
4274 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
4275 PTR_FAIL_IF(!jump);
4276 set_mov_addr(jump, compiler, 0);
4277
4278 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
4279 PTR_FAIL_IF(push_inst16(compiler, RDN3(dst_r)));
4280 compiler->size += 3;
4281
4282 if (dst & SLJIT_MEM)
4283 PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2));
4284 return jump;
4285 }
4286
sljit_set_jump_addr(sljit_uw addr,sljit_uw new_target,sljit_sw executable_offset)4287 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
4288 {
4289 sljit_u16 *inst = (sljit_u16*)addr;
4290 SLJIT_UNUSED_ARG(executable_offset);
4291
4292 SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 0);
4293 modify_imm32_const(inst, new_target);
4294 SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 1);
4295 inst = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
4296 SLJIT_CACHE_FLUSH(inst, inst + 4);
4297 }
4298
sljit_set_const(sljit_uw addr,sljit_sw new_constant,sljit_sw executable_offset)4299 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
4300 {
4301 sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
4302 }
4303