1 /*
2 * Stack-less Just-In-Time compiler
3 *
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
sljit_get_platform_name(void)27 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
28 {
29 #ifdef __SOFTFP__
30 return "ARM-Thumb2" SLJIT_CPUINFO " ABI:softfp";
31 #else
32 return "ARM-Thumb2" SLJIT_CPUINFO " ABI:hardfp";
33 #endif
34 }
35
36 /* Length of an instruction word. */
37 typedef sljit_u32 sljit_ins;
38
39 /* Last register + 1. */
40 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
41 #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
42 #define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 4)
43
44 #define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
45 #define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
46
47 /* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */
48 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
49 0, 0, 1, 2, 3, 11, 10, 9, 8, 7, 6, 5, 4, 13, 12, 14, 15
50 };
51
52 static const sljit_u8 freg_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) + 1] = {
53 0,
54 0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8,
55 7, 6,
56 0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8,
57 7, 6
58 };
59
60 static const sljit_u8 freg_ebit_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) + 1] = {
61 0,
62 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
63 0, 0,
64 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
65 1, 1
66 };
67
68 #define COPY_BITS(src, from, to, bits) \
69 ((from >= to ? ((sljit_ins)(src) >> (from - to)) : ((sljit_ins)(src) << (to - from))) & (((1 << bits) - 1) << to))
70
71 #define NEGATE(uimm) ((sljit_uw)-(sljit_sw)(uimm))
72
73 /* Thumb16 encodings. */
74 #define RD3(rd) ((sljit_ins)reg_map[rd])
75 #define RN3(rn) ((sljit_ins)reg_map[rn] << 3)
76 #define RM3(rm) ((sljit_ins)reg_map[rm] << 6)
77 #define RDN3(rdn) ((sljit_ins)reg_map[rdn] << 8)
78 #define IMM3(imm) ((sljit_ins)imm << 6)
79 #define IMM8(imm) ((sljit_ins)imm)
80
81 /* Thumb16 helpers. */
82 #define SET_REGS44(rd, rn) \
83 (((sljit_ins)reg_map[rn] << 3) | ((sljit_ins)reg_map[rd] & 0x7) | (((sljit_ins)reg_map[rd] & 0x8) << 4))
84 #define IS_2_LO_REGS(reg1, reg2) \
85 (reg_map[reg1] <= 7 && reg_map[reg2] <= 7)
86 #define IS_3_LO_REGS(reg1, reg2, reg3) \
87 (reg_map[reg1] <= 7 && reg_map[reg2] <= 7 && reg_map[reg3] <= 7)
88
89 /* Thumb32 encodings. */
90 #define RM4(rm) ((sljit_ins)reg_map[rm])
91 #define RD4(rd) ((sljit_ins)reg_map[rd] << 8)
92 #define RT4(rt) ((sljit_ins)reg_map[rt] << 12)
93 #define RN4(rn) ((sljit_ins)reg_map[rn] << 16)
94
95 #define VM4(vm) (((sljit_ins)freg_map[vm]) | ((sljit_ins)freg_ebit_map[vm] << 5))
96 #define VD4(vd) (((sljit_ins)freg_map[vd] << 12) | ((sljit_ins)freg_ebit_map[vd] << 22))
97 #define VN4(vn) (((sljit_ins)freg_map[vn] << 16) | ((sljit_ins)freg_ebit_map[vn] << 7))
98
99 #define IMM5(imm) \
100 (COPY_BITS(imm, 2, 12, 3) | (((sljit_ins)imm & 0x3) << 6))
101 #define IMM12(imm) \
102 (COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | ((sljit_ins)imm & 0xff))
103
104 /* --------------------------------------------------------------------- */
105 /* Instrucion forms */
106 /* --------------------------------------------------------------------- */
107
108 /* dot '.' changed to _
109 I immediate form (possibly followed by number of immediate bits). */
110 #define ADCI 0xf1400000
111 #define ADCS 0x4140
112 #define ADC_W 0xeb400000
113 #define ADD 0x4400
114 #define ADDS 0x1800
115 #define ADDSI3 0x1c00
116 #define ADDSI8 0x3000
117 #define ADDWI 0xf2000000
118 #define ADD_SP 0x4485
119 #define ADD_SP_I 0xb000
120 #define ADD_W 0xeb000000
121 #define ADD_WI 0xf1000000
122 #define ANDI 0xf0000000
123 #define ANDS 0x4000
124 #define AND_W 0xea000000
125 #define ASRS 0x4100
126 #define ASRSI 0x1000
127 #define ASR_W 0xfa40f000
128 #define ASR_WI 0xea4f0020
129 #define BCC 0xd000
130 #define BICI 0xf0200000
131 #define BKPT 0xbe00
132 #define BLX 0x4780
133 #define BX 0x4700
134 #define CLZ 0xfab0f080
135 #define CMNI_W 0xf1100f00
136 #define CMP 0x4280
137 #define CMPI 0x2800
138 #define CMPI_W 0xf1b00f00
139 #define CMP_X 0x4500
140 #define CMP_W 0xebb00f00
141 #define EORI 0xf0800000
142 #define EORS 0x4040
143 #define EOR_W 0xea800000
144 #define IT 0xbf00
145 #define LDR 0xf8d00000
146 #define LDR_SP 0x9800
147 #define LDRD 0xe9500000
148 #define LDREX 0xe8500f00
149 #define LDREXB 0xe8d00f4f
150 #define LDREXH 0xe8d00f5f
151 #define LDRI 0xf8500800
152 #define LSLS 0x4080
153 #define LSLSI 0x0000
154 #define LSL_W 0xfa00f000
155 #define LSL_WI 0xea4f0000
156 #define LSRS 0x40c0
157 #define LSRSI 0x0800
158 #define LSR_W 0xfa20f000
159 #define LSR_WI 0xea4f0010
160 #define MOV 0x4600
161 #define MOVS 0x0000
162 #define MOVSI 0x2000
163 #define MOVT 0xf2c00000
164 #define MOVW 0xf2400000
165 #define MOV_W 0xea4f0000
166 #define MOV_WI 0xf04f0000
167 #define MUL 0xfb00f000
168 #define MVNS 0x43c0
169 #define MVN_W 0xea6f0000
170 #define MVN_WI 0xf06f0000
171 #define NOP 0xbf00
172 #define ORNI 0xf0600000
173 #define ORRI 0xf0400000
174 #define ORRS 0x4300
175 #define ORR_W 0xea400000
176 #define POP 0xbc00
177 #define POP_W 0xe8bd0000
178 #define PUSH 0xb400
179 #define PUSH_W 0xe92d0000
180 #define REV 0xba00
181 #define REV_W 0xfa90f080
182 #define REV16 0xba40
183 #define REV16_W 0xfa90f090
184 #define RBIT 0xfa90f0a0
185 #define RORS 0x41c0
186 #define ROR_W 0xfa60f000
187 #define ROR_WI 0xea4f0030
188 #define RSB_WI 0xf1c00000
189 #define RSBSI 0x4240
190 #define SBCI 0xf1600000
191 #define SBCS 0x4180
192 #define SBC_W 0xeb600000
193 #define SDIV 0xfb90f0f0
194 #define SMULL 0xfb800000
195 #define STR_SP 0x9000
196 #define STRD 0xe9400000
197 #define STREX 0xe8400000
198 #define STREXB 0xe8c00f40
199 #define STREXH 0xe8c00f50
200 #define SUBS 0x1a00
201 #define SUBSI3 0x1e00
202 #define SUBSI8 0x3800
203 #define SUB_W 0xeba00000
204 #define SUBWI 0xf2a00000
205 #define SUB_SP_I 0xb080
206 #define SUB_WI 0xf1a00000
207 #define SXTB 0xb240
208 #define SXTB_W 0xfa4ff080
209 #define SXTH 0xb200
210 #define SXTH_W 0xfa0ff080
211 #define TST 0x4200
212 #define TSTI 0xf0000f00
213 #define TST_W 0xea000f00
214 #define UDIV 0xfbb0f0f0
215 #define UMULL 0xfba00000
216 #define UXTB 0xb2c0
217 #define UXTB_W 0xfa5ff080
218 #define UXTH 0xb280
219 #define UXTH_W 0xfa1ff080
220 #define VABS_F32 0xeeb00ac0
221 #define VADD_F32 0xee300a00
222 #define VAND 0xef000110
223 #define VCMP_F32 0xeeb40a40
224 #define VCVT_F32_S32 0xeeb80ac0
225 #define VCVT_F32_U32 0xeeb80a40
226 #define VCVT_F64_F32 0xeeb70ac0
227 #define VCVT_S32_F32 0xeebd0ac0
228 #define VDIV_F32 0xee800a00
229 #define VDUP 0xee800b10
230 #define VDUP_s 0xffb00c00
231 #define VEOR 0xff000110
232 #define VLD1 0xf9200000
233 #define VLD1_r 0xf9a00c00
234 #define VLD1_s 0xf9a00000
235 #define VLDR_F32 0xed100a00
236 #define VMOV_F32 0xeeb00a40
237 #define VMOV 0xee000a10
238 #define VMOV2 0xec400a10
239 #define VMOV_i 0xef800010
240 #define VMOV_s 0xee000b10
241 #define VMOVN 0xffb20200
242 #define VMRS 0xeef1fa10
243 #define VMUL_F32 0xee200a00
244 #define VNEG_F32 0xeeb10a40
245 #define VORR 0xef200110
246 #define VPOP 0xecbd0b00
247 #define VPUSH 0xed2d0b00
248 #define VSHLL 0xef800a10
249 #define VSHR 0xef800010
250 #define VSRA 0xef800110
251 #define VST1 0xf9000000
252 #define VST1_s 0xf9800000
253 #define VSTR_F32 0xed000a00
254 #define VSUB_F32 0xee300a40
255
256 #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
257
function_check_is_freg(struct sljit_compiler * compiler,sljit_s32 fr,sljit_s32 is_32)258 static sljit_s32 function_check_is_freg(struct sljit_compiler *compiler, sljit_s32 fr, sljit_s32 is_32)
259 {
260 if (compiler->scratches == -1)
261 return 0;
262
263 if (is_32 && fr >= SLJIT_F64_SECOND(SLJIT_FR0))
264 fr -= SLJIT_F64_SECOND(0);
265
266 return (fr >= SLJIT_FR0 && fr < (SLJIT_FR0 + compiler->fscratches))
267 || (fr > (SLJIT_FS0 - compiler->fsaveds) && fr <= SLJIT_FS0)
268 || (fr >= SLJIT_TMP_FREGISTER_BASE && fr < (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS));
269 }
270
271 #endif /* SLJIT_ARGUMENT_CHECKS */
272
push_inst16(struct sljit_compiler * compiler,sljit_ins inst)273 static sljit_s32 push_inst16(struct sljit_compiler *compiler, sljit_ins inst)
274 {
275 sljit_u16 *ptr;
276 SLJIT_ASSERT(!(inst & 0xffff0000));
277
278 ptr = (sljit_u16*)ensure_buf(compiler, sizeof(sljit_u16));
279 FAIL_IF(!ptr);
280 *ptr = (sljit_u16)(inst);
281 compiler->size++;
282 return SLJIT_SUCCESS;
283 }
284
push_inst32(struct sljit_compiler * compiler,sljit_ins inst)285 static sljit_s32 push_inst32(struct sljit_compiler *compiler, sljit_ins inst)
286 {
287 sljit_u16 *ptr = (sljit_u16*)ensure_buf(compiler, sizeof(sljit_ins));
288 FAIL_IF(!ptr);
289 *ptr++ = (sljit_u16)(inst >> 16);
290 *ptr = (sljit_u16)(inst);
291 compiler->size += 2;
292 return SLJIT_SUCCESS;
293 }
294
emit_imm32_const(struct sljit_compiler * compiler,sljit_s32 dst,sljit_uw imm)295 static SLJIT_INLINE sljit_s32 emit_imm32_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm)
296 {
297 FAIL_IF(push_inst32(compiler, MOVW | RD4(dst)
298 | COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff)));
299 return push_inst32(compiler, MOVT | RD4(dst)
300 | COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16));
301 }
302
modify_imm32_const(sljit_u16 * inst,sljit_uw new_imm)303 static SLJIT_INLINE void modify_imm32_const(sljit_u16 *inst, sljit_uw new_imm)
304 {
305 sljit_ins dst = inst[1] & 0x0f00;
306 SLJIT_ASSERT(((inst[0] & 0xfbf0) == (MOVW >> 16)) && ((inst[2] & 0xfbf0) == (MOVT >> 16)) && dst == (inst[3] & 0x0f00));
307 inst[0] = (sljit_u16)((MOVW >> 16) | COPY_BITS(new_imm, 12, 0, 4) | COPY_BITS(new_imm, 11, 10, 1));
308 inst[1] = (sljit_u16)(dst | COPY_BITS(new_imm, 8, 12, 3) | (new_imm & 0xff));
309 inst[2] = (sljit_u16)((MOVT >> 16) | COPY_BITS(new_imm, 12 + 16, 0, 4) | COPY_BITS(new_imm, 11 + 16, 10, 1));
310 inst[3] = (sljit_u16)(dst | COPY_BITS(new_imm, 8 + 16, 12, 3) | ((new_imm & 0xff0000) >> 16));
311 }
312
detect_jump_type(struct sljit_jump * jump,sljit_u16 * code_ptr,sljit_u16 * code,sljit_sw executable_offset)313 static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_u16 *code_ptr, sljit_u16 *code, sljit_sw executable_offset)
314 {
315 sljit_sw diff;
316
317 if (jump->flags & SLJIT_REWRITABLE_JUMP)
318 return 0;
319
320 if (jump->flags & JUMP_ADDR) {
321 /* Branch to ARM code is not optimized yet. */
322 if (!(jump->u.target & 0x1))
323 return 0;
324 diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2) - executable_offset) >> 1;
325 }
326 else {
327 SLJIT_ASSERT(jump->flags & JUMP_LABEL);
328 diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2)) >> 1;
329 }
330
331 if (jump->flags & IS_COND) {
332 SLJIT_ASSERT(!(jump->flags & IS_BL));
333 if (diff <= 127 && diff >= -128) {
334 jump->flags |= PATCH_TYPE1;
335 return 5;
336 }
337 if (diff <= 524287 && diff >= -524288) {
338 jump->flags |= PATCH_TYPE2;
339 return 4;
340 }
341 /* +1 comes from the prefix IT instruction. */
342 diff--;
343 if (diff <= 8388607 && diff >= -8388608) {
344 jump->flags |= PATCH_TYPE3;
345 return 3;
346 }
347 }
348 else if (jump->flags & IS_BL) {
349 if (diff <= 8388607 && diff >= -8388608) {
350 jump->flags |= PATCH_BL;
351 return 3;
352 }
353 }
354 else {
355 if (diff <= 1023 && diff >= -1024) {
356 jump->flags |= PATCH_TYPE4;
357 return 4;
358 }
359 if (diff <= 8388607 && diff >= -8388608) {
360 jump->flags |= PATCH_TYPE5;
361 return 3;
362 }
363 }
364
365 return 0;
366 }
367
set_jump_instruction(struct sljit_jump * jump,sljit_sw executable_offset)368 static SLJIT_INLINE void set_jump_instruction(struct sljit_jump *jump, sljit_sw executable_offset)
369 {
370 sljit_s32 type = (jump->flags >> 4) & 0xf;
371 sljit_sw diff;
372 sljit_u16 *jump_inst;
373 sljit_s32 s, j1, j2;
374
375 if (SLJIT_UNLIKELY(type == 0)) {
376 modify_imm32_const((sljit_u16*)jump->addr, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target);
377 return;
378 }
379
380 if (jump->flags & JUMP_ADDR) {
381 SLJIT_ASSERT(jump->u.target & 0x1);
382 diff = ((sljit_sw)jump->u.target - (sljit_sw)(jump->addr + sizeof(sljit_u32)) - executable_offset) >> 1;
383 }
384 else {
385 SLJIT_ASSERT(jump->u.label->addr & 0x1);
386 diff = ((sljit_sw)(jump->u.label->addr) - (sljit_sw)(jump->addr + sizeof(sljit_u32)) - executable_offset) >> 1;
387 }
388 jump_inst = (sljit_u16*)jump->addr;
389
390 switch (type) {
391 case 1:
392 /* Encoding T1 of 'B' instruction */
393 SLJIT_ASSERT(diff <= 127 && diff >= -128 && (jump->flags & IS_COND));
394 jump_inst[0] = (sljit_u16)(0xd000 | (jump->flags & 0xf00) | ((sljit_ins)diff & 0xff));
395 return;
396 case 2:
397 /* Encoding T3 of 'B' instruction */
398 SLJIT_ASSERT(diff <= 524287 && diff >= -524288 && (jump->flags & IS_COND));
399 jump_inst[0] = (sljit_u16)(0xf000 | COPY_BITS(jump->flags, 8, 6, 4) | COPY_BITS(diff, 11, 0, 6) | COPY_BITS(diff, 19, 10, 1));
400 jump_inst[1] = (sljit_u16)(0x8000 | COPY_BITS(diff, 17, 13, 1) | COPY_BITS(diff, 18, 11, 1) | ((sljit_ins)diff & 0x7ff));
401 return;
402 case 3:
403 SLJIT_ASSERT(jump->flags & IS_COND);
404 *jump_inst++ = (sljit_u16)(IT | ((jump->flags >> 4) & 0xf0) | 0x8);
405 diff--;
406 type = 5;
407 break;
408 case 4:
409 /* Encoding T2 of 'B' instruction */
410 SLJIT_ASSERT(diff <= 1023 && diff >= -1024 && !(jump->flags & IS_COND));
411 jump_inst[0] = (sljit_u16)(0xe000 | (diff & 0x7ff));
412 return;
413 }
414
415 SLJIT_ASSERT(diff <= 8388607 && diff >= -8388608);
416
417 /* Really complex instruction form for branches. */
418 s = (diff >> 23) & 0x1;
419 j1 = (~(diff >> 22) ^ s) & 0x1;
420 j2 = (~(diff >> 21) ^ s) & 0x1;
421 jump_inst[0] = (sljit_u16)(0xf000 | ((sljit_ins)s << 10) | COPY_BITS(diff, 11, 0, 10));
422 jump_inst[1] = (sljit_u16)((j1 << 13) | (j2 << 11) | (diff & 0x7ff));
423
424 /* The others have a common form. */
425 if (type == 5) /* Encoding T4 of 'B' instruction */
426 jump_inst[1] |= 0x9000;
427 else if (type == 6) /* Encoding T1 of 'BL' instruction */
428 jump_inst[1] |= 0xd000;
429 else
430 SLJIT_UNREACHABLE();
431 }
432
sljit_generate_code(struct sljit_compiler * compiler)433 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
434 {
435 struct sljit_memory_fragment *buf;
436 sljit_u16 *code;
437 sljit_u16 *code_ptr;
438 sljit_u16 *buf_ptr;
439 sljit_u16 *buf_end;
440 sljit_uw half_count;
441 sljit_uw next_addr;
442 sljit_sw executable_offset;
443
444 struct sljit_label *label;
445 struct sljit_jump *jump;
446 struct sljit_const *const_;
447 struct sljit_put_label *put_label;
448
449 CHECK_ERROR_PTR();
450 CHECK_PTR(check_sljit_generate_code(compiler));
451 reverse_buf(compiler);
452
453 code = (sljit_u16*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_u16), compiler->exec_allocator_data);
454 PTR_FAIL_WITH_EXEC_IF(code);
455 buf = compiler->buf;
456
457 code_ptr = code;
458 half_count = 0;
459 next_addr = 0;
460 executable_offset = SLJIT_EXEC_OFFSET(code);
461
462 label = compiler->labels;
463 jump = compiler->jumps;
464 const_ = compiler->consts;
465 put_label = compiler->put_labels;
466
467 do {
468 buf_ptr = (sljit_u16*)buf->memory;
469 buf_end = buf_ptr + (buf->used_size >> 1);
470 do {
471 *code_ptr = *buf_ptr++;
472 if (next_addr == half_count) {
473 SLJIT_ASSERT(!label || label->size >= half_count);
474 SLJIT_ASSERT(!jump || jump->addr >= half_count);
475 SLJIT_ASSERT(!const_ || const_->addr >= half_count);
476 SLJIT_ASSERT(!put_label || put_label->addr >= half_count);
477
478 /* These structures are ordered by their address. */
479 if (label && label->size == half_count) {
480 label->addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1;
481 label->size = (sljit_uw)(code_ptr - code);
482 label = label->next;
483 }
484 if (jump && jump->addr == half_count) {
485 jump->addr = (sljit_uw)code_ptr - ((jump->flags & IS_COND) ? 10 : 8);
486 code_ptr -= detect_jump_type(jump, code_ptr, code, executable_offset);
487 jump = jump->next;
488 }
489 if (const_ && const_->addr == half_count) {
490 const_->addr = (sljit_uw)code_ptr;
491 const_ = const_->next;
492 }
493 if (put_label && put_label->addr == half_count) {
494 SLJIT_ASSERT(put_label->label);
495 put_label->addr = (sljit_uw)code_ptr;
496 put_label = put_label->next;
497 }
498 next_addr = compute_next_addr(label, jump, const_, put_label);
499 }
500 code_ptr++;
501 half_count++;
502 } while (buf_ptr < buf_end);
503
504 buf = buf->next;
505 } while (buf);
506
507 if (label && label->size == half_count) {
508 label->addr = ((sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset)) | 0x1;
509 label->size = (sljit_uw)(code_ptr - code);
510 label = label->next;
511 }
512
513 SLJIT_ASSERT(!label);
514 SLJIT_ASSERT(!jump);
515 SLJIT_ASSERT(!const_);
516 SLJIT_ASSERT(!put_label);
517 SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
518
519 jump = compiler->jumps;
520 while (jump) {
521 set_jump_instruction(jump, executable_offset);
522 jump = jump->next;
523 }
524
525 put_label = compiler->put_labels;
526 while (put_label) {
527 modify_imm32_const((sljit_u16 *)put_label->addr, put_label->label->addr);
528 put_label = put_label->next;
529 }
530
531 compiler->error = SLJIT_ERR_COMPILED;
532 compiler->executable_offset = executable_offset;
533 compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_u16);
534
535 code = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
536 code_ptr = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
537
538 SLJIT_CACHE_FLUSH(code, code_ptr);
539 SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
540
541 /* Set thumb mode flag. */
542 return (void*)((sljit_uw)code | 0x1);
543 }
544
sljit_has_cpu_feature(sljit_s32 feature_type)545 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
546 {
547 switch (feature_type) {
548 case SLJIT_HAS_FPU:
549 case SLJIT_HAS_F64_AS_F32_PAIR:
550 case SLJIT_HAS_SIMD:
551 #ifdef SLJIT_IS_FPU_AVAILABLE
552 return (SLJIT_IS_FPU_AVAILABLE) != 0;
553 #else
554 /* Available by default. */
555 return 1;
556 #endif
557
558 case SLJIT_SIMD_REGS_ARE_PAIRS:
559 case SLJIT_HAS_CLZ:
560 case SLJIT_HAS_CTZ:
561 case SLJIT_HAS_REV:
562 case SLJIT_HAS_ROT:
563 case SLJIT_HAS_CMOV:
564 case SLJIT_HAS_PREFETCH:
565 case SLJIT_HAS_COPY_F32:
566 case SLJIT_HAS_COPY_F64:
567 case SLJIT_HAS_ATOMIC:
568 return 1;
569
570 default:
571 return 0;
572 }
573 }
574
575 /* --------------------------------------------------------------------- */
576 /* Core code generator functions. */
577 /* --------------------------------------------------------------------- */
578
579 #define INVALID_IMM 0x80000000
get_imm(sljit_uw imm)580 static sljit_uw get_imm(sljit_uw imm)
581 {
582 /* Thumb immediate form. */
583 sljit_s32 counter;
584
585 if (imm <= 0xff)
586 return imm;
587
588 if ((imm & 0xffff) == (imm >> 16)) {
589 /* Some special cases. */
590 if (!(imm & 0xff00))
591 return (1 << 12) | (imm & 0xff);
592 if (!(imm & 0xff))
593 return (2 << 12) | ((imm >> 8) & 0xff);
594 if ((imm & 0xff00) == ((imm & 0xff) << 8))
595 return (3 << 12) | (imm & 0xff);
596 }
597
598 /* Assembly optimization: count leading zeroes? */
599 counter = 8;
600 if (!(imm & 0xffff0000)) {
601 counter += 16;
602 imm <<= 16;
603 }
604 if (!(imm & 0xff000000)) {
605 counter += 8;
606 imm <<= 8;
607 }
608 if (!(imm & 0xf0000000)) {
609 counter += 4;
610 imm <<= 4;
611 }
612 if (!(imm & 0xc0000000)) {
613 counter += 2;
614 imm <<= 2;
615 }
616 if (!(imm & 0x80000000)) {
617 counter += 1;
618 imm <<= 1;
619 }
620 /* Since imm >= 128, this must be true. */
621 SLJIT_ASSERT(counter <= 31);
622
623 if (imm & 0x00ffffff)
624 return INVALID_IMM; /* Cannot be encoded. */
625
626 return ((imm >> 24) & 0x7f) | COPY_BITS(counter, 4, 26, 1) | COPY_BITS(counter, 1, 12, 3) | COPY_BITS(counter, 0, 7, 1);
627 }
628
load_immediate(struct sljit_compiler * compiler,sljit_s32 dst,sljit_uw imm)629 static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm)
630 {
631 sljit_uw tmp;
632
633 /* MOVS cannot be used since it destroy flags. */
634
635 if (imm >= 0x10000) {
636 tmp = get_imm(imm);
637 if (tmp != INVALID_IMM)
638 return push_inst32(compiler, MOV_WI | RD4(dst) | tmp);
639 tmp = get_imm(~imm);
640 if (tmp != INVALID_IMM)
641 return push_inst32(compiler, MVN_WI | RD4(dst) | tmp);
642 }
643
644 /* set low 16 bits, set hi 16 bits to 0. */
645 FAIL_IF(push_inst32(compiler, MOVW | RD4(dst)
646 | COPY_BITS(imm, 12, 16, 4) | COPY_BITS(imm, 11, 26, 1) | COPY_BITS(imm, 8, 12, 3) | (imm & 0xff)));
647
648 /* set hi 16 bit if needed. */
649 if (imm >= 0x10000)
650 return push_inst32(compiler, MOVT | RD4(dst)
651 | COPY_BITS(imm, 12 + 16, 16, 4) | COPY_BITS(imm, 11 + 16, 26, 1) | COPY_BITS(imm, 8 + 16, 12, 3) | ((imm & 0xff0000) >> 16));
652 return SLJIT_SUCCESS;
653 }
654
655 #define ARG1_IMM 0x0010000
656 #define ARG2_IMM 0x0020000
657 /* SET_FLAGS must be 0x100000 as it is also the value of S bit (can be used for optimization). */
658 #define SET_FLAGS 0x0100000
659 #define UNUSED_RETURN 0x0200000
660
emit_op_imm(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 dst,sljit_uw arg1,sljit_uw arg2)661 static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 dst, sljit_uw arg1, sljit_uw arg2)
662 {
663 /* dst must be register, TMP_REG1
664 arg1 must be register, imm
665 arg2 must be register, imm */
666 sljit_s32 reg;
667 sljit_uw imm, imm2;
668
669 if (SLJIT_UNLIKELY((flags & (ARG1_IMM | ARG2_IMM)) == (ARG1_IMM | ARG2_IMM))) {
670 /* Both are immediates, no temporaries are used. */
671 flags &= ~ARG1_IMM;
672 FAIL_IF(load_immediate(compiler, TMP_REG1, arg1));
673 arg1 = TMP_REG1;
674 }
675
676 if (flags & (ARG1_IMM | ARG2_IMM)) {
677 reg = (sljit_s32)((flags & ARG2_IMM) ? arg1 : arg2);
678 imm = (flags & ARG2_IMM) ? arg2 : arg1;
679
680 switch (flags & 0xffff) {
681 case SLJIT_CLZ:
682 case SLJIT_CTZ:
683 case SLJIT_REV:
684 case SLJIT_REV_U16:
685 case SLJIT_REV_S16:
686 case SLJIT_REV_U32:
687 case SLJIT_REV_S32:
688 case SLJIT_MUL:
689 /* No form with immediate operand. */
690 break;
691 case SLJIT_MOV:
692 SLJIT_ASSERT(!(flags & SET_FLAGS) && (flags & ARG2_IMM) && arg1 == TMP_REG2);
693 return load_immediate(compiler, dst, imm);
694 case SLJIT_ADD:
695 compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
696 imm2 = NEGATE(imm);
697 if (IS_2_LO_REGS(reg, dst)) {
698 if (imm <= 0x7)
699 return push_inst16(compiler, ADDSI3 | IMM3(imm) | RD3(dst) | RN3(reg));
700 if (imm2 <= 0x7)
701 return push_inst16(compiler, SUBSI3 | IMM3(imm2) | RD3(dst) | RN3(reg));
702 if (reg == dst) {
703 if (imm <= 0xff)
704 return push_inst16(compiler, ADDSI8 | IMM8(imm) | RDN3(dst));
705 if (imm2 <= 0xff)
706 return push_inst16(compiler, SUBSI8 | IMM8(imm2) | RDN3(dst));
707 }
708 }
709 if (!(flags & SET_FLAGS)) {
710 if (imm <= 0xfff)
711 return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(imm));
712 if (imm2 <= 0xfff)
713 return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(imm2));
714 }
715 imm2 = get_imm(imm);
716 if (imm2 != INVALID_IMM)
717 return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2);
718 imm = get_imm(NEGATE(imm));
719 if (imm != INVALID_IMM)
720 return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
721 break;
722 case SLJIT_ADDC:
723 compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
724 imm2 = get_imm(imm);
725 if (imm2 != INVALID_IMM)
726 return push_inst32(compiler, ADCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2);
727 if (flags & ARG2_IMM) {
728 imm = get_imm(~imm);
729 if (imm != INVALID_IMM)
730 return push_inst32(compiler, SBCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
731 }
732 break;
733 case SLJIT_SUB:
734 compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
735 if (flags & ARG1_IMM) {
736 if (imm == 0 && IS_2_LO_REGS(reg, dst))
737 return push_inst16(compiler, RSBSI | RD3(dst) | RN3(reg));
738 imm = get_imm(imm);
739 if (imm != INVALID_IMM)
740 return push_inst32(compiler, RSB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
741 break;
742 }
743 if (flags & UNUSED_RETURN) {
744 if (imm <= 0xff && reg_map[reg] <= 7)
745 return push_inst16(compiler, CMPI | IMM8(imm) | RDN3(reg));
746 imm2 = get_imm(imm);
747 if (imm2 != INVALID_IMM)
748 return push_inst32(compiler, CMPI_W | RN4(reg) | imm2);
749 imm = get_imm(NEGATE(imm));
750 if (imm != INVALID_IMM)
751 return push_inst32(compiler, CMNI_W | RN4(reg) | imm);
752 break;
753 }
754 imm2 = NEGATE(imm);
755 if (IS_2_LO_REGS(reg, dst)) {
756 if (imm <= 0x7)
757 return push_inst16(compiler, SUBSI3 | IMM3(imm) | RD3(dst) | RN3(reg));
758 if (imm2 <= 0x7)
759 return push_inst16(compiler, ADDSI3 | IMM3(imm2) | RD3(dst) | RN3(reg));
760 if (reg == dst) {
761 if (imm <= 0xff)
762 return push_inst16(compiler, SUBSI8 | IMM8(imm) | RDN3(dst));
763 if (imm2 <= 0xff)
764 return push_inst16(compiler, ADDSI8 | IMM8(imm2) | RDN3(dst));
765 }
766 }
767 if (!(flags & SET_FLAGS)) {
768 if (imm <= 0xfff)
769 return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(imm));
770 if (imm2 <= 0xfff)
771 return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(imm2));
772 }
773 imm2 = get_imm(imm);
774 if (imm2 != INVALID_IMM)
775 return push_inst32(compiler, SUB_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2);
776 imm = get_imm(NEGATE(imm));
777 if (imm != INVALID_IMM)
778 return push_inst32(compiler, ADD_WI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
779 break;
780 case SLJIT_SUBC:
781 compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
782 if (flags & ARG1_IMM)
783 break;
784 imm2 = get_imm(imm);
785 if (imm2 != INVALID_IMM)
786 return push_inst32(compiler, SBCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2);
787 imm = get_imm(~imm);
788 if (imm != INVALID_IMM)
789 return push_inst32(compiler, ADCI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
790 break;
791 case SLJIT_AND:
792 imm2 = get_imm(imm);
793 if (imm2 != INVALID_IMM)
794 return push_inst32(compiler, ((flags & UNUSED_RETURN) ? TSTI : ANDI) | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2);
795 imm = get_imm(~imm);
796 if (imm != INVALID_IMM)
797 return push_inst32(compiler, BICI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
798 break;
799 case SLJIT_OR:
800 imm2 = get_imm(imm);
801 if (imm2 != INVALID_IMM)
802 return push_inst32(compiler, ORRI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm2);
803 imm = get_imm(~imm);
804 if (imm != INVALID_IMM)
805 return push_inst32(compiler, ORNI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
806 break;
807 case SLJIT_XOR:
808 if (imm == (sljit_uw)-1) {
809 if (IS_2_LO_REGS(dst, reg))
810 return push_inst16(compiler, MVNS | RD3(dst) | RN3(reg));
811 return push_inst32(compiler, MVN_W | (flags & SET_FLAGS) | RD4(dst) | RM4(reg));
812 }
813 imm = get_imm(imm);
814 if (imm != INVALID_IMM)
815 return push_inst32(compiler, EORI | (flags & SET_FLAGS) | RD4(dst) | RN4(reg) | imm);
816 break;
817 case SLJIT_SHL:
818 case SLJIT_MSHL:
819 case SLJIT_LSHR:
820 case SLJIT_MLSHR:
821 case SLJIT_ASHR:
822 case SLJIT_MASHR:
823 case SLJIT_ROTL:
824 case SLJIT_ROTR:
825 if (flags & ARG1_IMM)
826 break;
827 imm &= 0x1f;
828
829 if (imm == 0) {
830 if (!(flags & SET_FLAGS))
831 return push_inst16(compiler, MOV | SET_REGS44(dst, reg));
832 if (IS_2_LO_REGS(dst, reg))
833 return push_inst16(compiler, MOVS | RD3(dst) | RN3(reg));
834 return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(dst) | RM4(reg));
835 }
836
837 switch (flags & 0xffff) {
838 case SLJIT_SHL:
839 case SLJIT_MSHL:
840 if (IS_2_LO_REGS(dst, reg))
841 return push_inst16(compiler, LSLSI | RD3(dst) | RN3(reg) | (imm << 6));
842 return push_inst32(compiler, LSL_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm));
843 case SLJIT_LSHR:
844 case SLJIT_MLSHR:
845 if (IS_2_LO_REGS(dst, reg))
846 return push_inst16(compiler, LSRSI | RD3(dst) | RN3(reg) | (imm << 6));
847 return push_inst32(compiler, LSR_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm));
848 case SLJIT_ASHR:
849 case SLJIT_MASHR:
850 if (IS_2_LO_REGS(dst, reg))
851 return push_inst16(compiler, ASRSI | RD3(dst) | RN3(reg) | (imm << 6));
852 return push_inst32(compiler, ASR_WI | (flags & SET_FLAGS) | RD4(dst) | RM4(reg) | IMM5(imm));
853 case SLJIT_ROTL:
854 imm = (imm ^ 0x1f) + 1;
855 /* fallthrough */
856 default: /* SLJIT_ROTR */
857 return push_inst32(compiler, ROR_WI | RD4(dst) | RM4(reg) | IMM5(imm));
858 }
859 default:
860 SLJIT_UNREACHABLE();
861 break;
862 }
863
864 if (flags & ARG2_IMM) {
865 imm = arg2;
866 arg2 = (arg1 == TMP_REG1) ? TMP_REG2 : TMP_REG1;
867 FAIL_IF(load_immediate(compiler, (sljit_s32)arg2, imm));
868 } else {
869 imm = arg1;
870 arg1 = (arg2 == TMP_REG1) ? TMP_REG2 : TMP_REG1;
871 FAIL_IF(load_immediate(compiler, (sljit_s32)arg1, imm));
872 }
873
874 SLJIT_ASSERT(arg1 != arg2);
875 }
876
877 /* Both arguments are registers. */
878 switch (flags & 0xffff) {
879 case SLJIT_MOV:
880 case SLJIT_MOV_U32:
881 case SLJIT_MOV_S32:
882 case SLJIT_MOV32:
883 case SLJIT_MOV_P:
884 SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
885 if (dst == (sljit_s32)arg2)
886 return SLJIT_SUCCESS;
887 return push_inst16(compiler, MOV | SET_REGS44(dst, arg2));
888 case SLJIT_MOV_U8:
889 SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
890 if (IS_2_LO_REGS(dst, arg2))
891 return push_inst16(compiler, UXTB | RD3(dst) | RN3(arg2));
892 return push_inst32(compiler, UXTB_W | RD4(dst) | RM4(arg2));
893 case SLJIT_MOV_S8:
894 SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
895 if (IS_2_LO_REGS(dst, arg2))
896 return push_inst16(compiler, SXTB | RD3(dst) | RN3(arg2));
897 return push_inst32(compiler, SXTB_W | RD4(dst) | RM4(arg2));
898 case SLJIT_MOV_U16:
899 SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
900 if (IS_2_LO_REGS(dst, arg2))
901 return push_inst16(compiler, UXTH | RD3(dst) | RN3(arg2));
902 return push_inst32(compiler, UXTH_W | RD4(dst) | RM4(arg2));
903 case SLJIT_MOV_S16:
904 SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG2);
905 if (IS_2_LO_REGS(dst, arg2))
906 return push_inst16(compiler, SXTH | RD3(dst) | RN3(arg2));
907 return push_inst32(compiler, SXTH_W | RD4(dst) | RM4(arg2));
908 case SLJIT_CLZ:
909 SLJIT_ASSERT(arg1 == TMP_REG2);
910 return push_inst32(compiler, CLZ | RN4(arg2) | RD4(dst) | RM4(arg2));
911 case SLJIT_CTZ:
912 SLJIT_ASSERT(arg1 == TMP_REG2);
913 FAIL_IF(push_inst32(compiler, RBIT | RN4(arg2) | RD4(dst) | RM4(arg2)));
914 return push_inst32(compiler, CLZ | RN4(dst) | RD4(dst) | RM4(dst));
915 case SLJIT_REV:
916 case SLJIT_REV_U32:
917 case SLJIT_REV_S32:
918 SLJIT_ASSERT(arg1 == TMP_REG2);
919 if (IS_2_LO_REGS(dst, arg2))
920 return push_inst16(compiler, REV | RD3(dst) | RN3(arg2));
921 return push_inst32(compiler, REV_W | RN4(arg2) | RD4(dst) | RM4(arg2));
922 case SLJIT_REV_U16:
923 case SLJIT_REV_S16:
924 SLJIT_ASSERT(arg1 == TMP_REG2 && dst != TMP_REG2);
925
926 flags &= 0xffff;
927 if (IS_2_LO_REGS(dst, arg2))
928 FAIL_IF(push_inst16(compiler, REV16 | RD3(dst) | RN3(arg2)));
929 else
930 FAIL_IF(push_inst32(compiler, REV16_W | RN4(arg2) | RD4(dst) | RM4(arg2)));
931
932 if (dst == TMP_REG1 || (arg2 == TMP_REG1 && flags == SLJIT_REV_U16))
933 return SLJIT_SUCCESS;
934
935 if (reg_map[dst] <= 7)
936 return push_inst16(compiler, (flags == SLJIT_REV_U16 ? UXTH : SXTH) | RD3(dst) | RN3(dst));
937 return push_inst32(compiler, (flags == SLJIT_REV_U16 ? UXTH_W : SXTH_W) | RD4(dst) | RM4(dst));
938 case SLJIT_ADD:
939 compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
940 if (IS_3_LO_REGS(dst, arg1, arg2))
941 return push_inst16(compiler, ADDS | RD3(dst) | RN3(arg1) | RM3(arg2));
942 if (dst == (sljit_s32)arg1 && !(flags & SET_FLAGS))
943 return push_inst16(compiler, ADD | SET_REGS44(dst, arg2));
944 return push_inst32(compiler, ADD_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
945 case SLJIT_ADDC:
946 compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
947 if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
948 return push_inst16(compiler, ADCS | RD3(dst) | RN3(arg2));
949 return push_inst32(compiler, ADC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
950 case SLJIT_SUB:
951 compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
952 if (flags & UNUSED_RETURN) {
953 if (IS_2_LO_REGS(arg1, arg2))
954 return push_inst16(compiler, CMP | RD3(arg1) | RN3(arg2));
955 return push_inst16(compiler, CMP_X | SET_REGS44(arg1, arg2));
956 }
957 if (IS_3_LO_REGS(dst, arg1, arg2))
958 return push_inst16(compiler, SUBS | RD3(dst) | RN3(arg1) | RM3(arg2));
959 return push_inst32(compiler, SUB_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
960 case SLJIT_SUBC:
961 compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
962 if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
963 return push_inst16(compiler, SBCS | RD3(dst) | RN3(arg2));
964 return push_inst32(compiler, SBC_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
965 case SLJIT_MUL:
966 compiler->status_flags_state = 0;
967 if (!(flags & SET_FLAGS))
968 return push_inst32(compiler, MUL | RD4(dst) | RN4(arg1) | RM4(arg2));
969 SLJIT_ASSERT(dst != TMP_REG2);
970 FAIL_IF(push_inst32(compiler, SMULL | RT4(dst) | RD4(TMP_REG2) | RN4(arg1) | RM4(arg2)));
971 /* cmp TMP_REG2, dst asr #31. */
972 return push_inst32(compiler, CMP_W | RN4(TMP_REG2) | 0x70e0 | RM4(dst));
973 case SLJIT_AND:
974 if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
975 return push_inst16(compiler, ANDS | RD3(dst) | RN3(arg2));
976 if ((flags & UNUSED_RETURN) && IS_2_LO_REGS(arg1, arg2))
977 return push_inst16(compiler, TST | RD3(arg1) | RN3(arg2));
978 return push_inst32(compiler, ((flags & UNUSED_RETURN) ? TST_W : AND_W) | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
979 case SLJIT_OR:
980 if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
981 return push_inst16(compiler, ORRS | RD3(dst) | RN3(arg2));
982 return push_inst32(compiler, ORR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
983 case SLJIT_XOR:
984 if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
985 return push_inst16(compiler, EORS | RD3(dst) | RN3(arg2));
986 return push_inst32(compiler, EOR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
987 case SLJIT_MSHL:
988 FAIL_IF(push_inst32(compiler, ANDI | RD4(TMP_REG2) | RN4(arg2) | 0x1f));
989 arg2 = TMP_REG2;
990 /* fallthrough */
991 case SLJIT_SHL:
992 if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
993 return push_inst16(compiler, LSLS | RD3(dst) | RN3(arg2));
994 return push_inst32(compiler, LSL_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
995 case SLJIT_MLSHR:
996 FAIL_IF(push_inst32(compiler, ANDI | RD4(TMP_REG2) | RN4(arg2) | 0x1f));
997 arg2 = TMP_REG2;
998 /* fallthrough */
999 case SLJIT_LSHR:
1000 if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
1001 return push_inst16(compiler, LSRS | RD3(dst) | RN3(arg2));
1002 return push_inst32(compiler, LSR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1003 case SLJIT_MASHR:
1004 FAIL_IF(push_inst32(compiler, ANDI | RD4(TMP_REG2) | RN4(arg2) | 0x1f));
1005 arg2 = TMP_REG2;
1006 /* fallthrough */
1007 case SLJIT_ASHR:
1008 if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
1009 return push_inst16(compiler, ASRS | RD3(dst) | RN3(arg2));
1010 return push_inst32(compiler, ASR_W | (flags & SET_FLAGS) | RD4(dst) | RN4(arg1) | RM4(arg2));
1011 case SLJIT_ROTL:
1012 FAIL_IF(push_inst32(compiler, RSB_WI | RD4(TMP_REG2) | RN4(arg2) | 0));
1013 arg2 = TMP_REG2;
1014 /* fallthrough */
1015 case SLJIT_ROTR:
1016 if (dst == (sljit_s32)arg1 && IS_2_LO_REGS(dst, arg2))
1017 return push_inst16(compiler, RORS | RD3(dst) | RN3(arg2));
1018 return push_inst32(compiler, ROR_W | RD4(dst) | RN4(arg1) | RM4(arg2));
1019 }
1020
1021 SLJIT_UNREACHABLE();
1022 return SLJIT_SUCCESS;
1023 }
1024
1025 #define STORE 0x01
1026 #define SIGNED 0x02
1027
1028 #define WORD_SIZE 0x00
1029 #define BYTE_SIZE 0x04
1030 #define HALF_SIZE 0x08
1031 #define PRELOAD 0x0c
1032
1033 #define IS_WORD_SIZE(flags) (!((flags) & (BYTE_SIZE | HALF_SIZE)))
1034 #define ALIGN_CHECK(argw, imm, shift) (!((argw) & ~((imm) << (shift))))
1035
1036 /*
1037 1st letter:
1038 w = word
1039 b = byte
1040 h = half
1041
1042 2nd letter:
1043 s = signed
1044 u = unsigned
1045
1046 3rd letter:
1047 l = load
1048 s = store
1049 */
1050
1051 static const sljit_ins sljit_mem16[12] = {
1052 /* w u l */ 0x5800 /* ldr */,
1053 /* w u s */ 0x5000 /* str */,
1054 /* w s l */ 0x5800 /* ldr */,
1055 /* w s s */ 0x5000 /* str */,
1056
1057 /* b u l */ 0x5c00 /* ldrb */,
1058 /* b u s */ 0x5400 /* strb */,
1059 /* b s l */ 0x5600 /* ldrsb */,
1060 /* b s s */ 0x5400 /* strb */,
1061
1062 /* h u l */ 0x5a00 /* ldrh */,
1063 /* h u s */ 0x5200 /* strh */,
1064 /* h s l */ 0x5e00 /* ldrsh */,
1065 /* h s s */ 0x5200 /* strh */,
1066 };
1067
1068 static const sljit_ins sljit_mem16_imm5[12] = {
1069 /* w u l */ 0x6800 /* ldr imm5 */,
1070 /* w u s */ 0x6000 /* str imm5 */,
1071 /* w s l */ 0x6800 /* ldr imm5 */,
1072 /* w s s */ 0x6000 /* str imm5 */,
1073
1074 /* b u l */ 0x7800 /* ldrb imm5 */,
1075 /* b u s */ 0x7000 /* strb imm5 */,
1076 /* b s l */ 0x0000 /* not allowed */,
1077 /* b s s */ 0x7000 /* strb imm5 */,
1078
1079 /* h u l */ 0x8800 /* ldrh imm5 */,
1080 /* h u s */ 0x8000 /* strh imm5 */,
1081 /* h s l */ 0x0000 /* not allowed */,
1082 /* h s s */ 0x8000 /* strh imm5 */,
1083 };
1084
1085 #define MEM_IMM8 0xc00
1086 #define MEM_IMM12 0x800000
1087 static const sljit_ins sljit_mem32[13] = {
1088 /* w u l */ 0xf8500000 /* ldr.w */,
1089 /* w u s */ 0xf8400000 /* str.w */,
1090 /* w s l */ 0xf8500000 /* ldr.w */,
1091 /* w s s */ 0xf8400000 /* str.w */,
1092
1093 /* b u l */ 0xf8100000 /* ldrb.w */,
1094 /* b u s */ 0xf8000000 /* strb.w */,
1095 /* b s l */ 0xf9100000 /* ldrsb.w */,
1096 /* b s s */ 0xf8000000 /* strb.w */,
1097
1098 /* h u l */ 0xf8300000 /* ldrh.w */,
1099 /* h u s */ 0xf8200000 /* strsh.w */,
1100 /* h s l */ 0xf9300000 /* ldrsh.w */,
1101 /* h s s */ 0xf8200000 /* strsh.w */,
1102
1103 /* p u l */ 0xf8100000 /* pld */,
1104 };
1105
1106 /* Helper function. Dst should be reg + value, using at most 1 instruction, flags does not set. */
emit_set_delta(struct sljit_compiler * compiler,sljit_s32 dst,sljit_s32 reg,sljit_sw value)1107 static sljit_s32 emit_set_delta(struct sljit_compiler *compiler, sljit_s32 dst, sljit_s32 reg, sljit_sw value)
1108 {
1109 sljit_uw imm;
1110
1111 if (value >= 0) {
1112 if (value <= 0xfff)
1113 return push_inst32(compiler, ADDWI | RD4(dst) | RN4(reg) | IMM12(value));
1114 imm = get_imm((sljit_uw)value);
1115 if (imm != INVALID_IMM)
1116 return push_inst32(compiler, ADD_WI | RD4(dst) | RN4(reg) | imm);
1117 }
1118 else {
1119 value = -value;
1120 if (value <= 0xfff)
1121 return push_inst32(compiler, SUBWI | RD4(dst) | RN4(reg) | IMM12(value));
1122 imm = get_imm((sljit_uw)value);
1123 if (imm != INVALID_IMM)
1124 return push_inst32(compiler, SUB_WI | RD4(dst) | RN4(reg) | imm);
1125 }
1126 return SLJIT_ERR_UNSUPPORTED;
1127 }
1128
emit_op_mem(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw,sljit_s32 tmp_reg)1129 static SLJIT_INLINE sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg,
1130 sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg)
1131 {
1132 sljit_s32 other_r;
1133 sljit_uw imm, tmp;
1134
1135 SLJIT_ASSERT(arg & SLJIT_MEM);
1136 SLJIT_ASSERT((arg & REG_MASK) != tmp_reg || (arg == SLJIT_MEM1(tmp_reg) && argw >= -0xff && argw <= 0xfff));
1137
1138 if (SLJIT_UNLIKELY(!(arg & REG_MASK))) {
1139 imm = get_imm((sljit_uw)argw & ~(sljit_uw)0xfff);
1140 if (imm != INVALID_IMM) {
1141 FAIL_IF(push_inst32(compiler, MOV_WI | RD4(tmp_reg) | imm));
1142 return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(tmp_reg) | (argw & 0xfff));
1143 }
1144
1145 FAIL_IF(load_immediate(compiler, tmp_reg, (sljit_uw)argw));
1146 if (IS_2_LO_REGS(reg, tmp_reg) && sljit_mem16_imm5[flags])
1147 return push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(tmp_reg));
1148 return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(tmp_reg));
1149 }
1150
1151 if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
1152 argw &= 0x3;
1153 other_r = OFFS_REG(arg);
1154 arg &= REG_MASK;
1155
1156 if (!argw && IS_3_LO_REGS(reg, arg, other_r))
1157 return push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(other_r));
1158 return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(other_r) | ((sljit_ins)argw << 4));
1159 }
1160
1161 arg &= REG_MASK;
1162
1163 if (argw > 0xfff) {
1164 imm = get_imm((sljit_uw)(argw & ~0xfff));
1165 if (imm != INVALID_IMM) {
1166 push_inst32(compiler, ADD_WI | RD4(tmp_reg) | RN4(arg) | imm);
1167 arg = tmp_reg;
1168 argw = argw & 0xfff;
1169 }
1170 }
1171 else if (argw < -0xff) {
1172 tmp = (sljit_uw)((-argw + 0xfff) & ~0xfff);
1173 SLJIT_ASSERT(tmp >= (sljit_uw)-argw);
1174 imm = get_imm(tmp);
1175
1176 if (imm != INVALID_IMM) {
1177 push_inst32(compiler, SUB_WI | RD4(tmp_reg) | RN4(arg) | imm);
1178 arg = tmp_reg;
1179 argw += (sljit_sw)tmp;
1180
1181 SLJIT_ASSERT(argw >= 0 && argw <= 0xfff);
1182 }
1183 }
1184
1185 /* 16 bit instruction forms. */
1186 if (IS_2_LO_REGS(reg, arg) && sljit_mem16_imm5[flags]) {
1187 tmp = 3;
1188 if (IS_WORD_SIZE(flags)) {
1189 if (ALIGN_CHECK(argw, 0x1f, 2))
1190 tmp = 2;
1191 }
1192 else if (flags & BYTE_SIZE)
1193 {
1194 if (ALIGN_CHECK(argw, 0x1f, 0))
1195 tmp = 0;
1196 }
1197 else {
1198 SLJIT_ASSERT(flags & HALF_SIZE);
1199 if (ALIGN_CHECK(argw, 0x1f, 1))
1200 tmp = 1;
1201 }
1202
1203 if (tmp < 3)
1204 return push_inst16(compiler, sljit_mem16_imm5[flags] | RD3(reg) | RN3(arg) | ((sljit_ins)argw << (6 - tmp)));
1205 }
1206 else if (SLJIT_UNLIKELY(arg == SLJIT_SP) && IS_WORD_SIZE(flags) && ALIGN_CHECK(argw, 0xff, 2) && reg_map[reg] <= 7) {
1207 /* SP based immediate. */
1208 return push_inst16(compiler, STR_SP | (sljit_ins)((flags & STORE) ? 0 : 0x800) | RDN3(reg) | ((sljit_ins)argw >> 2));
1209 }
1210
1211 if (argw >= 0 && argw <= 0xfff)
1212 return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM12 | RT4(reg) | RN4(arg) | (sljit_ins)argw);
1213 else if (argw < 0 && argw >= -0xff)
1214 return push_inst32(compiler, sljit_mem32[flags] | MEM_IMM8 | RT4(reg) | RN4(arg) | (sljit_ins)-argw);
1215
1216 SLJIT_ASSERT(arg != tmp_reg);
1217
1218 FAIL_IF(load_immediate(compiler, tmp_reg, (sljit_uw)argw));
1219 if (IS_3_LO_REGS(reg, arg, tmp_reg))
1220 return push_inst16(compiler, sljit_mem16[flags] | RD3(reg) | RN3(arg) | RM3(tmp_reg));
1221 return push_inst32(compiler, sljit_mem32[flags] | RT4(reg) | RN4(arg) | RM4(tmp_reg));
1222 }
1223
1224 #undef ALIGN_CHECK
1225 #undef IS_WORD_SIZE
1226
1227 /* --------------------------------------------------------------------- */
1228 /* Entry, exit */
1229 /* --------------------------------------------------------------------- */
1230
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1231 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
1232 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1233 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1234 {
1235 sljit_s32 size, i, tmp, word_arg_count;
1236 sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
1237 sljit_uw offset;
1238 sljit_uw imm = 0;
1239 #ifdef __SOFTFP__
1240 sljit_u32 float_arg_count;
1241 #else
1242 sljit_u32 old_offset, f32_offset;
1243 sljit_u32 remap[3];
1244 sljit_u32 *remap_ptr = remap;
1245 #endif
1246
1247 CHECK_ERROR();
1248 CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1249 set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1250
1251 tmp = SLJIT_S0 - saveds;
1252 for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--)
1253 imm |= (sljit_uw)1 << reg_map[i];
1254
1255 for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--)
1256 imm |= (sljit_uw)1 << reg_map[i];
1257
1258 /* At least two registers must be set for PUSH_W and one for PUSH instruction. */
1259 FAIL_IF((imm & 0xff00)
1260 ? push_inst32(compiler, PUSH_W | (1 << 14) | imm)
1261 : push_inst16(compiler, PUSH | (1 << 8) | imm));
1262
1263 /* Stack must be aligned to 8 bytes: (LR, R4) */
1264 size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1);
1265
1266 if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
1267 if ((size & SSIZE_OF(sw)) != 0) {
1268 FAIL_IF(push_inst16(compiler, SUB_SP_I | (sizeof(sljit_sw) >> 2)));
1269 size += SSIZE_OF(sw);
1270 }
1271
1272 if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) {
1273 FAIL_IF(push_inst32(compiler, VPUSH | VD4(SLJIT_FS0) | ((sljit_uw)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1)));
1274 } else {
1275 if (fsaveds > 0)
1276 FAIL_IF(push_inst32(compiler, VPUSH | VD4(SLJIT_FS0) | ((sljit_uw)fsaveds << 1)));
1277 if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG)
1278 FAIL_IF(push_inst32(compiler, VPUSH | VD4(fscratches) | ((sljit_uw)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1)));
1279 }
1280 }
1281
1282 local_size = ((size + local_size + 0x7) & ~0x7) - size;
1283 compiler->local_size = local_size;
1284
1285 if (options & SLJIT_ENTER_REG_ARG)
1286 arg_types = 0;
1287
1288 arg_types >>= SLJIT_ARG_SHIFT;
1289 word_arg_count = 0;
1290 saved_arg_count = 0;
1291 #ifdef __SOFTFP__
1292 SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start);
1293
1294 offset = 0;
1295 float_arg_count = 0;
1296
1297 while (arg_types) {
1298 switch (arg_types & SLJIT_ARG_MASK) {
1299 case SLJIT_ARG_TYPE_F64:
1300 if (offset & 0x7)
1301 offset += sizeof(sljit_sw);
1302
1303 if (offset < 4 * sizeof(sljit_sw))
1304 FAIL_IF(push_inst32(compiler, VMOV2 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count));
1305 else
1306 FAIL_IF(push_inst32(compiler, VLDR_F32 | 0x800100 | RN4(SLJIT_SP)
1307 | (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2)));
1308 float_arg_count++;
1309 offset += sizeof(sljit_f64) - sizeof(sljit_sw);
1310 break;
1311 case SLJIT_ARG_TYPE_F32:
1312 if (offset < 4 * sizeof(sljit_sw))
1313 FAIL_IF(push_inst32(compiler, VMOV | (float_arg_count << 16) | (offset << 10)));
1314 else
1315 FAIL_IF(push_inst32(compiler, VLDR_F32 | 0x800000 | RN4(SLJIT_SP)
1316 | (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2)));
1317 float_arg_count++;
1318 break;
1319 default:
1320 word_arg_count++;
1321
1322 if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
1323 tmp = SLJIT_S0 - saved_arg_count;
1324 saved_arg_count++;
1325 } else if (word_arg_count - 1 != (sljit_s32)(offset >> 2))
1326 tmp = word_arg_count;
1327 else
1328 break;
1329
1330 if (offset < 4 * sizeof(sljit_sw))
1331 FAIL_IF(push_inst16(compiler, MOV | ((sljit_ins)reg_map[tmp] & 0x7) | (((sljit_ins)reg_map[tmp] & 0x8) << 4) | (offset << 1)));
1332 else if (reg_map[tmp] <= 7)
1333 FAIL_IF(push_inst16(compiler, LDR_SP | RDN3(tmp)
1334 | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2)));
1335 else
1336 FAIL_IF(push_inst32(compiler, LDR | RT4(tmp) | RN4(SLJIT_SP)
1337 | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)))));
1338 break;
1339 }
1340
1341 offset += sizeof(sljit_sw);
1342 arg_types >>= SLJIT_ARG_SHIFT;
1343 }
1344
1345 compiler->args_size = offset;
1346 #else
1347 offset = SLJIT_FR0;
1348 old_offset = SLJIT_FR0;
1349 f32_offset = 0;
1350
1351 while (arg_types) {
1352 switch (arg_types & SLJIT_ARG_MASK) {
1353 case SLJIT_ARG_TYPE_F64:
1354 if (offset != old_offset)
1355 *remap_ptr++ = VMOV_F32 | SLJIT_32 | VD4(offset) | VM4(old_offset);
1356 old_offset++;
1357 offset++;
1358 break;
1359 case SLJIT_ARG_TYPE_F32:
1360 if (f32_offset != 0) {
1361 *remap_ptr++ = VMOV_F32 | 0x20 | VD4(offset) | VM4(f32_offset);
1362 f32_offset = 0;
1363 } else {
1364 if (offset != old_offset)
1365 *remap_ptr++ = VMOV_F32 | VD4(offset) | VM4(old_offset);
1366 f32_offset = old_offset;
1367 old_offset++;
1368 }
1369 offset++;
1370 break;
1371 default:
1372 if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
1373 FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_S0 - saved_arg_count, SLJIT_R0 + word_arg_count)));
1374 saved_arg_count++;
1375 }
1376
1377 word_arg_count++;
1378 break;
1379 }
1380 arg_types >>= SLJIT_ARG_SHIFT;
1381 }
1382
1383 SLJIT_ASSERT((sljit_uw)(remap_ptr - remap) <= sizeof(remap));
1384
1385 while (remap_ptr > remap)
1386 FAIL_IF(push_inst32(compiler, *(--remap_ptr)));
1387 #endif
1388
1389 #ifdef _WIN32
1390 if (local_size >= 4096) {
1391 imm = get_imm(4096);
1392 SLJIT_ASSERT(imm != INVALID_IMM);
1393
1394 FAIL_IF(push_inst32(compiler, SUB_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm));
1395
1396 if (local_size < 4 * 4096) {
1397 if (local_size > 2 * 4096) {
1398 if (local_size > 3 * 4096) {
1399 FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP)));
1400 FAIL_IF(push_inst32(compiler, SUB_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm));
1401 }
1402
1403 FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP)));
1404 FAIL_IF(push_inst32(compiler, SUB_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm));
1405 }
1406 } else {
1407 FAIL_IF(load_immediate(compiler, TMP_REG2, ((sljit_uw)local_size >> 12) - 1));
1408 FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP)));
1409 FAIL_IF(push_inst32(compiler, SUB_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm));
1410 FAIL_IF(push_inst32(compiler, SUB_WI | SET_FLAGS | RD4(TMP_REG2) | RN4(TMP_REG2) | 1));
1411 FAIL_IF(push_inst16(compiler, BCC | (0x1 << 8) /* not-equal */ | (-8 & 0xff)));
1412 }
1413
1414 FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP)));
1415 local_size &= 0xfff;
1416 }
1417
1418 if (local_size >= 256) {
1419 SLJIT_ASSERT(local_size < 4096);
1420
1421 if (local_size <= (127 << 2))
1422 FAIL_IF(push_inst16(compiler, SUB_SP_I | ((sljit_uw)local_size >> 2)));
1423 else
1424 FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, (sljit_uw)local_size));
1425
1426 FAIL_IF(push_inst32(compiler, LDRI | 0x400 | RT4(TMP_REG1) | RN4(SLJIT_SP)));
1427 } else if (local_size > 0)
1428 FAIL_IF(push_inst32(compiler, LDRI | 0x500 | RT4(TMP_REG1) | RN4(SLJIT_SP) | (sljit_uw)local_size));
1429 #else /* !_WIN32 */
1430 if (local_size > 0) {
1431 if (local_size <= (127 << 2))
1432 FAIL_IF(push_inst16(compiler, SUB_SP_I | ((sljit_uw)local_size >> 2)));
1433 else
1434 FAIL_IF(emit_op_imm(compiler, SLJIT_SUB | ARG2_IMM, SLJIT_SP, SLJIT_SP, (sljit_uw)local_size));
1435 }
1436 #endif /* _WIN32 */
1437
1438 return SLJIT_SUCCESS;
1439 }
1440
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1441 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
1442 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1443 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1444 {
1445 sljit_s32 size;
1446
1447 CHECK_ERROR();
1448 CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1449 set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1450
1451 size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1);
1452
1453 /* Doubles are saved, so alignment is unaffected. */
1454 if ((size & SSIZE_OF(sw)) != 0 && (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG))
1455 size += SSIZE_OF(sw);
1456
1457 compiler->local_size = ((size + local_size + 0x7) & ~0x7) - size;
1458 return SLJIT_SUCCESS;
1459 }
1460
emit_add_sp(struct sljit_compiler * compiler,sljit_uw imm)1461 static sljit_s32 emit_add_sp(struct sljit_compiler *compiler, sljit_uw imm)
1462 {
1463 sljit_uw imm2;
1464
1465 /* The TMP_REG1 register must keep its value. */
1466 if (imm <= (127u << 2))
1467 return push_inst16(compiler, ADD_SP_I | (imm >> 2));
1468
1469 if (imm <= 0xfff)
1470 return push_inst32(compiler, ADDWI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | IMM12(imm));
1471
1472 imm2 = get_imm(imm);
1473
1474 if (imm2 != INVALID_IMM)
1475 return push_inst32(compiler, ADD_WI | RD4(SLJIT_SP) | RN4(SLJIT_SP) | imm2);
1476
1477 FAIL_IF(load_immediate(compiler, TMP_REG2, imm));
1478 return push_inst16(compiler, ADD_SP | RN3(TMP_REG2));
1479 }
1480
emit_stack_frame_release(struct sljit_compiler * compiler,sljit_s32 frame_size)1481 static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 frame_size)
1482 {
1483 sljit_s32 local_size, fscratches, fsaveds, i, tmp;
1484 sljit_s32 restored_reg = 0;
1485 sljit_s32 lr_dst = TMP_PC;
1486 sljit_uw reg_list = 0;
1487
1488 SLJIT_ASSERT(reg_map[TMP_REG2] == 14 && frame_size <= 128);
1489
1490 local_size = compiler->local_size;
1491 fscratches = compiler->fscratches;
1492 fsaveds = compiler->fsaveds;
1493
1494 if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
1495 if (local_size > 0)
1496 FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size));
1497
1498 if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) {
1499 FAIL_IF(push_inst32(compiler, VPOP | VD4(SLJIT_FS0) | ((sljit_uw)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1)));
1500 } else {
1501 if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG)
1502 FAIL_IF(push_inst32(compiler, VPOP | VD4(fscratches) | ((sljit_uw)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1)));
1503 if (fsaveds > 0)
1504 FAIL_IF(push_inst32(compiler, VPOP | VD4(SLJIT_FS0) | ((sljit_uw)fsaveds << 1)));
1505 }
1506
1507 local_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1) & 0x7;
1508 }
1509
1510 if (frame_size < 0) {
1511 lr_dst = TMP_REG2;
1512 frame_size = 0;
1513 } else if (frame_size > 0) {
1514 SLJIT_ASSERT(frame_size == 1 || (frame_size & 0x7) == 0);
1515 lr_dst = 0;
1516 frame_size &= ~0x7;
1517 }
1518
1519 tmp = SLJIT_S0 - compiler->saveds;
1520 i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
1521 if (tmp < i) {
1522 restored_reg = i;
1523 do {
1524 reg_list |= (sljit_uw)1 << reg_map[i];
1525 } while (--i > tmp);
1526 }
1527
1528 i = compiler->scratches;
1529 if (i >= SLJIT_FIRST_SAVED_REG) {
1530 restored_reg = i;
1531 do {
1532 reg_list |= (sljit_uw)1 << reg_map[i];
1533 } while (--i >= SLJIT_FIRST_SAVED_REG);
1534 }
1535
1536 if (lr_dst == TMP_REG2 && reg_list == 0) {
1537 reg_list |= (sljit_uw)1 << reg_map[TMP_REG2];
1538 restored_reg = TMP_REG2;
1539 lr_dst = 0;
1540 }
1541
1542 if (lr_dst == 0 && (reg_list & (reg_list - 1)) == 0) {
1543 /* The local_size does not include the saved registers. */
1544 tmp = 0;
1545 if (reg_list != 0) {
1546 tmp = 2;
1547 if (local_size <= 0xfff) {
1548 if (local_size == 0) {
1549 SLJIT_ASSERT(restored_reg != TMP_REG2);
1550 if (frame_size == 0)
1551 return push_inst32(compiler, LDRI | RT4(restored_reg) | RN4(SLJIT_SP) | 0x308);
1552 if (frame_size > 2 * SSIZE_OF(sw))
1553 return push_inst32(compiler, LDRI | RT4(restored_reg) | RN4(SLJIT_SP) | 0x100 | (sljit_ins)(frame_size - (2 * SSIZE_OF(sw))));
1554 }
1555
1556 if (reg_map[restored_reg] <= 7 && local_size <= 0x3fc)
1557 FAIL_IF(push_inst16(compiler, STR_SP | 0x800 | RDN3(restored_reg) | (sljit_ins)(local_size >> 2)));
1558 else
1559 FAIL_IF(push_inst32(compiler, LDR | RT4(restored_reg) | RN4(SLJIT_SP) | (sljit_ins)local_size));
1560 tmp = 1;
1561 } else if (frame_size == 0) {
1562 frame_size = (restored_reg == TMP_REG2) ? SSIZE_OF(sw) : 2 * SSIZE_OF(sw);
1563 tmp = 3;
1564 }
1565
1566 /* Place for the saved register. */
1567 if (restored_reg != TMP_REG2)
1568 local_size += SSIZE_OF(sw);
1569 }
1570
1571 /* Place for the lr register. */
1572 local_size += SSIZE_OF(sw);
1573
1574 if (frame_size > local_size)
1575 FAIL_IF(push_inst16(compiler, SUB_SP_I | ((sljit_ins)(frame_size - local_size) >> 2)));
1576 else if (frame_size < local_size)
1577 FAIL_IF(emit_add_sp(compiler, (sljit_uw)(local_size - frame_size)));
1578
1579 if (tmp <= 1)
1580 return SLJIT_SUCCESS;
1581
1582 if (tmp == 2) {
1583 frame_size -= SSIZE_OF(sw);
1584 if (restored_reg != TMP_REG2)
1585 frame_size -= SSIZE_OF(sw);
1586
1587 if (reg_map[restored_reg] <= 7)
1588 return push_inst16(compiler, STR_SP | 0x800 | RDN3(restored_reg) | (sljit_ins)(frame_size >> 2));
1589
1590 return push_inst32(compiler, LDR | RT4(restored_reg) | RN4(SLJIT_SP) | (sljit_ins)frame_size);
1591 }
1592
1593 tmp = (restored_reg == TMP_REG2) ? 0x304 : 0x308;
1594 return push_inst32(compiler, LDRI | RT4(restored_reg) | RN4(SLJIT_SP) | (sljit_ins)tmp);
1595 }
1596
1597 if (local_size > 0)
1598 FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size));
1599
1600 if (!(reg_list & 0xff00) && lr_dst != TMP_REG2) {
1601 if (lr_dst == TMP_PC)
1602 reg_list |= 1u << 8;
1603
1604 /* At least one register must be set for POP instruction. */
1605 SLJIT_ASSERT(reg_list != 0);
1606
1607 FAIL_IF(push_inst16(compiler, POP | reg_list));
1608 } else {
1609 if (lr_dst != 0)
1610 reg_list |= (sljit_uw)1 << reg_map[lr_dst];
1611
1612 /* At least two registers must be set for POP_W instruction. */
1613 SLJIT_ASSERT((reg_list & (reg_list - 1)) != 0);
1614
1615 FAIL_IF(push_inst32(compiler, POP_W | reg_list));
1616 }
1617
1618 if (frame_size > 0)
1619 return push_inst16(compiler, SUB_SP_I | (((sljit_ins)frame_size - sizeof(sljit_sw)) >> 2));
1620
1621 if (lr_dst != 0)
1622 return SLJIT_SUCCESS;
1623
1624 return push_inst16(compiler, ADD_SP_I | 1);
1625 }
1626
sljit_emit_return_void(struct sljit_compiler * compiler)1627 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
1628 {
1629 CHECK_ERROR();
1630 CHECK(check_sljit_emit_return_void(compiler));
1631
1632 return emit_stack_frame_release(compiler, 0);
1633 }
1634
sljit_emit_return_to(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)1635 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
1636 sljit_s32 src, sljit_sw srcw)
1637 {
1638 CHECK_ERROR();
1639 CHECK(check_sljit_emit_return_to(compiler, src, srcw));
1640
1641 if (src & SLJIT_MEM) {
1642 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
1643 src = TMP_REG1;
1644 srcw = 0;
1645 } else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
1646 FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, src)));
1647 src = TMP_REG1;
1648 srcw = 0;
1649 }
1650
1651 FAIL_IF(emit_stack_frame_release(compiler, 1));
1652
1653 SLJIT_SKIP_CHECKS(compiler);
1654 return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1655 }
1656
1657 /* --------------------------------------------------------------------- */
1658 /* Operators */
1659 /* --------------------------------------------------------------------- */
1660
1661 #if !(defined __ARM_FEATURE_IDIV) && !(defined __ARM_ARCH_EXT_IDIV__)
1662
1663 #ifdef __cplusplus
1664 extern "C" {
1665 #endif
1666
1667 #ifdef _WIN32
1668 extern unsigned long long __rt_udiv(unsigned int denominator, unsigned int numerator);
1669 extern long long __rt_sdiv(int denominator, int numerator);
1670 #elif defined(__GNUC__)
1671 extern unsigned int __aeabi_uidivmod(unsigned int numerator, int unsigned denominator);
1672 extern int __aeabi_idivmod(int numerator, int denominator);
1673 #else
1674 #error "Software divmod functions are needed"
1675 #endif
1676
1677 #ifdef __cplusplus
1678 }
1679 #endif
1680
1681 #endif /* !__ARM_FEATURE_IDIV && !__ARM_ARCH_EXT_IDIV__ */
1682
sljit_emit_op0(struct sljit_compiler * compiler,sljit_s32 op)1683 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
1684 {
1685 #if !(defined __ARM_FEATURE_IDIV) && !(defined __ARM_ARCH_EXT_IDIV__)
1686 sljit_uw saved_reg_list[3];
1687 sljit_uw saved_reg_count;
1688 #endif
1689
1690 CHECK_ERROR();
1691 CHECK(check_sljit_emit_op0(compiler, op));
1692
1693 op = GET_OPCODE(op);
1694 switch (op) {
1695 case SLJIT_BREAKPOINT:
1696 return push_inst16(compiler, BKPT);
1697 case SLJIT_NOP:
1698 return push_inst16(compiler, NOP);
1699 case SLJIT_LMUL_UW:
1700 case SLJIT_LMUL_SW:
1701 return push_inst32(compiler, (op == SLJIT_LMUL_UW ? UMULL : SMULL)
1702 | RD4(SLJIT_R1) | RT4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1));
1703 #if (defined __ARM_FEATURE_IDIV) || (defined __ARM_ARCH_EXT_IDIV__)
1704 case SLJIT_DIVMOD_UW:
1705 case SLJIT_DIVMOD_SW:
1706 FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, SLJIT_R0)));
1707 FAIL_IF(push_inst32(compiler, (op == SLJIT_DIVMOD_UW ? UDIV : SDIV) | RD4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1)));
1708 FAIL_IF(push_inst32(compiler, MUL | RD4(SLJIT_R1) | RN4(SLJIT_R0) | RM4(SLJIT_R1)));
1709 return push_inst32(compiler, SUB_W | RD4(SLJIT_R1) | RN4(TMP_REG1) | RM4(SLJIT_R1));
1710 case SLJIT_DIV_UW:
1711 case SLJIT_DIV_SW:
1712 return push_inst32(compiler, (op == SLJIT_DIV_UW ? UDIV : SDIV) | RD4(SLJIT_R0) | RN4(SLJIT_R0) | RM4(SLJIT_R1));
1713 #else /* !__ARM_FEATURE_IDIV && !__ARM_ARCH_EXT_IDIV__ */
1714 case SLJIT_DIVMOD_UW:
1715 case SLJIT_DIVMOD_SW:
1716 case SLJIT_DIV_UW:
1717 case SLJIT_DIV_SW:
1718 SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
1719 SLJIT_ASSERT(reg_map[2] == 1 && reg_map[3] == 2 && reg_map[4] == 3);
1720
1721 saved_reg_count = 0;
1722 if (compiler->scratches >= 4)
1723 saved_reg_list[saved_reg_count++] = 3;
1724 if (compiler->scratches >= 3)
1725 saved_reg_list[saved_reg_count++] = 2;
1726 if (op >= SLJIT_DIV_UW)
1727 saved_reg_list[saved_reg_count++] = 1;
1728
1729 if (saved_reg_count > 0) {
1730 FAIL_IF(push_inst32(compiler, 0xf84d0d00 | (saved_reg_count >= 3 ? 16 : 8)
1731 | (saved_reg_list[0] << 12) /* str rX, [sp, #-8/-16]! */));
1732 if (saved_reg_count >= 2) {
1733 SLJIT_ASSERT(saved_reg_list[1] < 8);
1734 FAIL_IF(push_inst16(compiler, 0x9001 | (saved_reg_list[1] << 8) /* str rX, [sp, #4] */));
1735 }
1736 if (saved_reg_count >= 3) {
1737 SLJIT_ASSERT(saved_reg_list[2] < 8);
1738 FAIL_IF(push_inst16(compiler, 0x9002 | (saved_reg_list[2] << 8) /* str rX, [sp, #8] */));
1739 }
1740 }
1741
1742 #ifdef _WIN32
1743 FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, SLJIT_R0)));
1744 FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_R0, SLJIT_R1)));
1745 FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(SLJIT_R1, TMP_REG1)));
1746 FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
1747 ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_ADDR(__rt_udiv) : SLJIT_FUNC_ADDR(__rt_sdiv))));
1748 #elif defined(__GNUC__)
1749 FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
1750 ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_ADDR(__aeabi_uidivmod) : SLJIT_FUNC_ADDR(__aeabi_idivmod))));
1751 #else
1752 #error "Software divmod functions are needed"
1753 #endif
1754
1755 if (saved_reg_count > 0) {
1756 if (saved_reg_count >= 3) {
1757 SLJIT_ASSERT(saved_reg_list[2] < 8);
1758 FAIL_IF(push_inst16(compiler, 0x9802 | (saved_reg_list[2] << 8) /* ldr rX, [sp, #8] */));
1759 }
1760 if (saved_reg_count >= 2) {
1761 SLJIT_ASSERT(saved_reg_list[1] < 8);
1762 FAIL_IF(push_inst16(compiler, 0x9801 | (saved_reg_list[1] << 8) /* ldr rX, [sp, #4] */));
1763 }
1764 return push_inst32(compiler, 0xf85d0b00 | (saved_reg_count >= 3 ? 16 : 8)
1765 | (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */);
1766 }
1767 return SLJIT_SUCCESS;
1768 #endif /* __ARM_FEATURE_IDIV || __ARM_ARCH_EXT_IDIV__ */
1769 case SLJIT_ENDBR:
1770 case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
1771 return SLJIT_SUCCESS;
1772 }
1773
1774 return SLJIT_SUCCESS;
1775 }
1776
sljit_emit_op1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1777 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1778 sljit_s32 dst, sljit_sw dstw,
1779 sljit_s32 src, sljit_sw srcw)
1780 {
1781 sljit_s32 dst_r, flags;
1782 sljit_s32 op_flags = GET_ALL_FLAGS(op);
1783
1784 CHECK_ERROR();
1785 CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1786 ADJUST_LOCAL_OFFSET(dst, dstw);
1787 ADJUST_LOCAL_OFFSET(src, srcw);
1788
1789 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1790
1791 op = GET_OPCODE(op);
1792 if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) {
1793 switch (op) {
1794 case SLJIT_MOV:
1795 case SLJIT_MOV_U32:
1796 case SLJIT_MOV_S32:
1797 case SLJIT_MOV32:
1798 case SLJIT_MOV_P:
1799 flags = WORD_SIZE;
1800 break;
1801 case SLJIT_MOV_U8:
1802 flags = BYTE_SIZE;
1803 if (src == SLJIT_IMM)
1804 srcw = (sljit_u8)srcw;
1805 break;
1806 case SLJIT_MOV_S8:
1807 flags = BYTE_SIZE | SIGNED;
1808 if (src == SLJIT_IMM)
1809 srcw = (sljit_s8)srcw;
1810 break;
1811 case SLJIT_MOV_U16:
1812 flags = HALF_SIZE;
1813 if (src == SLJIT_IMM)
1814 srcw = (sljit_u16)srcw;
1815 break;
1816 case SLJIT_MOV_S16:
1817 flags = HALF_SIZE | SIGNED;
1818 if (src == SLJIT_IMM)
1819 srcw = (sljit_s16)srcw;
1820 break;
1821 default:
1822 SLJIT_UNREACHABLE();
1823 flags = 0;
1824 break;
1825 }
1826
1827 if (src == SLJIT_IMM)
1828 FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG2, (sljit_uw)srcw));
1829 else if (src & SLJIT_MEM) {
1830 FAIL_IF(emit_op_mem(compiler, flags, dst_r, src, srcw, TMP_REG1));
1831 } else {
1832 if (dst_r != TMP_REG1)
1833 return emit_op_imm(compiler, op, dst_r, TMP_REG2, (sljit_uw)src);
1834 dst_r = src;
1835 }
1836
1837 if (!(dst & SLJIT_MEM))
1838 return SLJIT_SUCCESS;
1839
1840 return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, TMP_REG2);
1841 }
1842
1843 SLJIT_COMPILE_ASSERT(WORD_SIZE == 0, word_size_must_be_0);
1844 flags = HAS_FLAGS(op_flags) ? SET_FLAGS : 0;
1845
1846 if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16)
1847 flags |= HALF_SIZE;
1848
1849 if (src & SLJIT_MEM) {
1850 FAIL_IF(emit_op_mem(compiler, flags, TMP_REG1, src, srcw, TMP_REG1));
1851 src = TMP_REG1;
1852 }
1853
1854 emit_op_imm(compiler, flags | op, dst_r, TMP_REG2, (sljit_uw)src);
1855
1856 if (SLJIT_UNLIKELY(dst & SLJIT_MEM))
1857 return emit_op_mem(compiler, flags | STORE, dst_r, dst, dstw, TMP_REG2);
1858 return SLJIT_SUCCESS;
1859 }
1860
sljit_emit_op2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1861 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
1862 sljit_s32 dst, sljit_sw dstw,
1863 sljit_s32 src1, sljit_sw src1w,
1864 sljit_s32 src2, sljit_sw src2w)
1865 {
1866 sljit_s32 dst_reg, flags, src2_reg;
1867
1868 CHECK_ERROR();
1869 CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
1870 ADJUST_LOCAL_OFFSET(dst, dstw);
1871 ADJUST_LOCAL_OFFSET(src1, src1w);
1872 ADJUST_LOCAL_OFFSET(src2, src2w);
1873
1874 dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
1875 flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
1876
1877 if (dst == TMP_REG1)
1878 flags |= UNUSED_RETURN;
1879
1880 if (src1 == SLJIT_IMM)
1881 flags |= ARG1_IMM;
1882 else if (src1 & SLJIT_MEM) {
1883 emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src1, src1w, TMP_REG1);
1884 src1w = TMP_REG1;
1885 }
1886 else
1887 src1w = src1;
1888
1889 if (src2 == SLJIT_IMM)
1890 flags |= ARG2_IMM;
1891 else if (src2 & SLJIT_MEM) {
1892 src2_reg = (!(flags & ARG1_IMM) && (src1w == TMP_REG1)) ? TMP_REG2 : TMP_REG1;
1893 emit_op_mem(compiler, WORD_SIZE, src2_reg, src2, src2w, src2_reg);
1894 src2w = src2_reg;
1895 }
1896 else
1897 src2w = src2;
1898
1899 emit_op_imm(compiler, flags | GET_OPCODE(op), dst_reg, (sljit_uw)src1w, (sljit_uw)src2w);
1900
1901 if (!(dst & SLJIT_MEM))
1902 return SLJIT_SUCCESS;
1903 return emit_op_mem(compiler, WORD_SIZE | STORE, dst_reg, dst, dstw, TMP_REG2);
1904 }
1905
sljit_emit_op2u(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1906 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
1907 sljit_s32 src1, sljit_sw src1w,
1908 sljit_s32 src2, sljit_sw src2w)
1909 {
1910 CHECK_ERROR();
1911 CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
1912
1913 SLJIT_SKIP_CHECKS(compiler);
1914 return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w);
1915 }
1916
sljit_emit_shift_into(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 src1_reg,sljit_s32 src2_reg,sljit_s32 src3,sljit_sw src3w)1917 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
1918 sljit_s32 dst_reg,
1919 sljit_s32 src1_reg,
1920 sljit_s32 src2_reg,
1921 sljit_s32 src3, sljit_sw src3w)
1922 {
1923 sljit_s32 is_left;
1924
1925 CHECK_ERROR();
1926 CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
1927
1928 op = GET_OPCODE(op);
1929 is_left = (op == SLJIT_SHL || op == SLJIT_MSHL);
1930
1931 if (src1_reg == src2_reg) {
1932 SLJIT_SKIP_CHECKS(compiler);
1933 return sljit_emit_op2(compiler, is_left ? SLJIT_ROTL : SLJIT_ROTR, dst_reg, 0, src1_reg, 0, src3, src3w);
1934 }
1935
1936 ADJUST_LOCAL_OFFSET(src3, src3w);
1937
1938 if (src3 == SLJIT_IMM) {
1939 src3w &= 0x1f;
1940
1941 if (src3w == 0)
1942 return SLJIT_SUCCESS;
1943
1944 if (IS_2_LO_REGS(dst_reg, src1_reg))
1945 FAIL_IF(push_inst16(compiler, (is_left ? LSLSI : LSRSI) | RD3(dst_reg) | RN3(src1_reg) | ((sljit_ins)src3w << 6)));
1946 else
1947 FAIL_IF(push_inst32(compiler, (is_left ? LSL_WI : LSR_WI) | RD4(dst_reg) | RM4(src1_reg) | IMM5(src3w)));
1948
1949 src3w = (src3w ^ 0x1f) + 1;
1950 return push_inst32(compiler, ORR_W | RD4(dst_reg) | RN4(dst_reg) | RM4(src2_reg) | (is_left ? 0x10 : 0x0) | IMM5(src3w));
1951 }
1952
1953 if (src3 & SLJIT_MEM) {
1954 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, src3, src3w, TMP_REG2));
1955 src3 = TMP_REG2;
1956 }
1957
1958 if (op == SLJIT_MSHL || op == SLJIT_MLSHR || dst_reg == src3) {
1959 FAIL_IF(push_inst32(compiler, ANDI | RD4(TMP_REG2) | RN4(src3) | 0x1f));
1960 src3 = TMP_REG2;
1961 }
1962
1963 if (dst_reg == src1_reg && IS_2_LO_REGS(dst_reg, src3))
1964 FAIL_IF(push_inst16(compiler, (is_left ? LSLS : LSRS) | RD3(dst_reg) | RN3(src3)));
1965 else
1966 FAIL_IF(push_inst32(compiler, (is_left ? LSL_W : LSR_W) | RD4(dst_reg) | RN4(src1_reg) | RM4(src3)));
1967
1968 FAIL_IF(push_inst32(compiler, (is_left ? LSR_WI : LSL_WI) | RD4(TMP_REG1) | RM4(src2_reg) | (1 << 6)));
1969 FAIL_IF(push_inst32(compiler, EORI | RD4(TMP_REG2) | RN4(src3) | 0x1f));
1970 FAIL_IF(push_inst32(compiler, (is_left ? LSR_W : LSL_W) | RD4(TMP_REG1) | RN4(TMP_REG1) | RM4(TMP_REG2)));
1971 return push_inst32(compiler, ORR_W | RD4(dst_reg) | RN4(dst_reg) | RM4(TMP_REG1));
1972 }
1973
sljit_emit_op_src(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)1974 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
1975 sljit_s32 src, sljit_sw srcw)
1976 {
1977 CHECK_ERROR();
1978 CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
1979 ADJUST_LOCAL_OFFSET(src, srcw);
1980
1981 switch (op) {
1982 case SLJIT_FAST_RETURN:
1983 SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
1984
1985 if (FAST_IS_REG(src))
1986 FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG2, src)));
1987 else
1988 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, src, srcw, TMP_REG2));
1989
1990 return push_inst16(compiler, BX | RN3(TMP_REG2));
1991 case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
1992 return SLJIT_SUCCESS;
1993 case SLJIT_PREFETCH_L1:
1994 case SLJIT_PREFETCH_L2:
1995 case SLJIT_PREFETCH_L3:
1996 case SLJIT_PREFETCH_ONCE:
1997 return emit_op_mem(compiler, PRELOAD, TMP_PC, src, srcw, TMP_REG1);
1998 }
1999
2000 return SLJIT_SUCCESS;
2001 }
2002
sljit_emit_op_dst(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw)2003 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
2004 sljit_s32 dst, sljit_sw dstw)
2005 {
2006 sljit_s32 size, dst_r;
2007
2008 CHECK_ERROR();
2009 CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
2010 ADJUST_LOCAL_OFFSET(dst, dstw);
2011
2012 switch (op) {
2013 case SLJIT_FAST_ENTER:
2014 SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
2015
2016 if (FAST_IS_REG(dst))
2017 return push_inst16(compiler, MOV | SET_REGS44(dst, TMP_REG2));
2018 break;
2019 case SLJIT_GET_RETURN_ADDRESS:
2020 size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds - SLJIT_KEPT_SAVEDS_COUNT(compiler->options), 0);
2021
2022 if (compiler->fsaveds > 0 || compiler->fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
2023 /* The size of pc is not added above. */
2024 if ((size & SSIZE_OF(sw)) == 0)
2025 size += SSIZE_OF(sw);
2026
2027 size += GET_SAVED_FLOAT_REGISTERS_SIZE(compiler->fscratches, compiler->fsaveds, f64);
2028 }
2029
2030 SLJIT_ASSERT(((compiler->local_size + size + SSIZE_OF(sw)) & 0x7) == 0);
2031
2032 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
2033 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size + size, TMP_REG1));
2034 break;
2035 }
2036
2037 if (dst & SLJIT_MEM)
2038 return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, dst, dstw, TMP_REG1);
2039
2040 return SLJIT_SUCCESS;
2041 }
2042
sljit_get_register_index(sljit_s32 type,sljit_s32 reg)2043 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
2044 {
2045 CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
2046
2047 if (type == SLJIT_GP_REGISTER)
2048 return reg_map[reg];
2049
2050 if (type == SLJIT_FLOAT_REGISTER || type == SLJIT_SIMD_REG_64)
2051 return freg_map[reg];
2052
2053 if (type != SLJIT_SIMD_REG_128)
2054 return freg_map[reg] & ~0x1;
2055
2056 return -1;
2057 }
2058
sljit_emit_op_custom(struct sljit_compiler * compiler,void * instruction,sljit_u32 size)2059 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2060 void *instruction, sljit_u32 size)
2061 {
2062 CHECK_ERROR();
2063 CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2064
2065 if (size == 2)
2066 return push_inst16(compiler, *(sljit_u16*)instruction);
2067 return push_inst32(compiler, *(sljit_ins*)instruction);
2068 }
2069
2070 /* --------------------------------------------------------------------- */
2071 /* Floating point operators */
2072 /* --------------------------------------------------------------------- */
2073
2074 #define FPU_LOAD (1 << 20)
2075
emit_fop_mem(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw)2076 static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
2077 {
2078 sljit_uw imm;
2079 sljit_ins inst = VSTR_F32 | (flags & (SLJIT_32 | FPU_LOAD));
2080
2081 SLJIT_ASSERT(arg & SLJIT_MEM);
2082
2083 /* Fast loads and stores. */
2084 if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
2085 FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(arg & REG_MASK) | RM4(OFFS_REG(arg)) | (((sljit_uw)argw & 0x3) << 6)));
2086 arg = SLJIT_MEM | TMP_REG1;
2087 argw = 0;
2088 }
2089
2090 if ((arg & REG_MASK) && (argw & 0x3) == 0) {
2091 if (!(argw & ~0x3fc))
2092 return push_inst32(compiler, inst | 0x800000 | RN4(arg & REG_MASK) | VD4(reg) | ((sljit_uw)argw >> 2));
2093 if (!(-argw & ~0x3fc))
2094 return push_inst32(compiler, inst | RN4(arg & REG_MASK) | VD4(reg) | ((sljit_uw)-argw >> 2));
2095 }
2096
2097 if (arg & REG_MASK) {
2098 if (emit_set_delta(compiler, TMP_REG1, arg & REG_MASK, argw) != SLJIT_ERR_UNSUPPORTED) {
2099 FAIL_IF(compiler->error);
2100 return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | VD4(reg));
2101 }
2102
2103 imm = get_imm((sljit_uw)argw & ~(sljit_uw)0x3fc);
2104 if (imm != INVALID_IMM) {
2105 FAIL_IF(push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(arg & REG_MASK) | imm));
2106 return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | VD4(reg) | (((sljit_uw)argw & 0x3fc) >> 2));
2107 }
2108
2109 imm = get_imm((sljit_uw)-argw & ~(sljit_uw)0x3fc);
2110 if (imm != INVALID_IMM) {
2111 argw = -argw;
2112 FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(arg & REG_MASK) | imm));
2113 return push_inst32(compiler, inst | RN4(TMP_REG1) | VD4(reg) | (((sljit_uw)argw & 0x3fc) >> 2));
2114 }
2115 }
2116
2117 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)argw));
2118 if (arg & REG_MASK)
2119 FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, (arg & REG_MASK))));
2120 return push_inst32(compiler, inst | 0x800000 | RN4(TMP_REG1) | VD4(reg));
2121 }
2122
sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2123 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
2124 sljit_s32 dst, sljit_sw dstw,
2125 sljit_s32 src, sljit_sw srcw)
2126 {
2127 op ^= SLJIT_32;
2128
2129 if (src & SLJIT_MEM) {
2130 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src, srcw));
2131 src = TMP_FREG1;
2132 }
2133
2134 FAIL_IF(push_inst32(compiler, VCVT_S32_F32 | (op & SLJIT_32) | VD4(TMP_FREG1) | VM4(src)));
2135
2136 if (FAST_IS_REG(dst))
2137 return push_inst32(compiler, VMOV | (1 << 20) | RT4(dst) | VN4(TMP_FREG1));
2138
2139 /* Store the integer value from a VFP register. */
2140 return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw);
2141 }
2142
sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2143 static sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_ins ins,
2144 sljit_s32 dst, sljit_sw dstw,
2145 sljit_s32 src, sljit_sw srcw)
2146 {
2147 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2148
2149 if (FAST_IS_REG(src))
2150 FAIL_IF(push_inst32(compiler, VMOV | RT4(src) | VN4(TMP_FREG1)));
2151 else if (src & SLJIT_MEM) {
2152 /* Load the integer value into a VFP register. */
2153 FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw));
2154 }
2155 else {
2156 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw));
2157 FAIL_IF(push_inst32(compiler, VMOV | RT4(TMP_REG1) | VN4(TMP_FREG1)));
2158 }
2159
2160 FAIL_IF(push_inst32(compiler, ins | VD4(dst_r) | VM4(TMP_FREG1)));
2161
2162 if (dst & SLJIT_MEM)
2163 return emit_fop_mem(compiler, (ins & SLJIT_32), TMP_FREG1, dst, dstw);
2164 return SLJIT_SUCCESS;
2165 }
2166
sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2167 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
2168 sljit_s32 dst, sljit_sw dstw,
2169 sljit_s32 src, sljit_sw srcw)
2170 {
2171 return sljit_emit_fop1_conv_f64_from_w(compiler, VCVT_F32_S32 | (~op & SLJIT_32), dst, dstw, src, srcw);
2172 }
2173
sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2174 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
2175 sljit_s32 dst, sljit_sw dstw,
2176 sljit_s32 src, sljit_sw srcw)
2177 {
2178 return sljit_emit_fop1_conv_f64_from_w(compiler, VCVT_F32_U32 | (~op & SLJIT_32), dst, dstw, src, srcw);
2179 }
2180
sljit_emit_fop1_cmp(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2181 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
2182 sljit_s32 src1, sljit_sw src1w,
2183 sljit_s32 src2, sljit_sw src2w)
2184 {
2185 op ^= SLJIT_32;
2186
2187 if (src1 & SLJIT_MEM) {
2188 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w));
2189 src1 = TMP_FREG1;
2190 }
2191
2192 if (src2 & SLJIT_MEM) {
2193 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w));
2194 src2 = TMP_FREG2;
2195 }
2196
2197 FAIL_IF(push_inst32(compiler, VCMP_F32 | (op & SLJIT_32) | VD4(src1) | VM4(src2)));
2198 FAIL_IF(push_inst32(compiler, VMRS));
2199
2200 if (GET_FLAG_TYPE(op) != SLJIT_UNORDERED_OR_EQUAL)
2201 return SLJIT_SUCCESS;
2202
2203 FAIL_IF(push_inst16(compiler, IT | (0x6 << 4) | 0x8));
2204 return push_inst16(compiler, CMP /* Rm, Rn = r0 */);
2205 }
2206
sljit_emit_fop1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2207 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
2208 sljit_s32 dst, sljit_sw dstw,
2209 sljit_s32 src, sljit_sw srcw)
2210 {
2211 sljit_s32 dst_r;
2212
2213 CHECK_ERROR();
2214
2215 SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100), float_transfer_bit_error);
2216 SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2217
2218 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2219
2220 if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32)
2221 op ^= SLJIT_32;
2222
2223 if (src & SLJIT_MEM) {
2224 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, dst_r, src, srcw));
2225 src = dst_r;
2226 }
2227
2228 switch (GET_OPCODE(op)) {
2229 case SLJIT_MOV_F64:
2230 if (src != dst_r) {
2231 if (dst_r != TMP_FREG1)
2232 FAIL_IF(push_inst32(compiler, VMOV_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src)));
2233 else
2234 dst_r = src;
2235 }
2236 break;
2237 case SLJIT_NEG_F64:
2238 FAIL_IF(push_inst32(compiler, VNEG_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src)));
2239 break;
2240 case SLJIT_ABS_F64:
2241 FAIL_IF(push_inst32(compiler, VABS_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src)));
2242 break;
2243 case SLJIT_CONV_F64_FROM_F32:
2244 FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src)));
2245 op ^= SLJIT_32;
2246 break;
2247 }
2248
2249 if (dst & SLJIT_MEM)
2250 return emit_fop_mem(compiler, (op & SLJIT_32), dst_r, dst, dstw);
2251 return SLJIT_SUCCESS;
2252 }
2253
sljit_emit_fop2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2254 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
2255 sljit_s32 dst, sljit_sw dstw,
2256 sljit_s32 src1, sljit_sw src1w,
2257 sljit_s32 src2, sljit_sw src2w)
2258 {
2259 sljit_s32 dst_r;
2260
2261 CHECK_ERROR();
2262 CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2263 ADJUST_LOCAL_OFFSET(dst, dstw);
2264 ADJUST_LOCAL_OFFSET(src1, src1w);
2265 ADJUST_LOCAL_OFFSET(src2, src2w);
2266
2267 op ^= SLJIT_32;
2268
2269 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2270 if (src1 & SLJIT_MEM) {
2271 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w));
2272 src1 = TMP_FREG1;
2273 }
2274 if (src2 & SLJIT_MEM) {
2275 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w));
2276 src2 = TMP_FREG2;
2277 }
2278
2279 switch (GET_OPCODE(op)) {
2280 case SLJIT_ADD_F64:
2281 FAIL_IF(push_inst32(compiler, VADD_F32 | (op & SLJIT_32) | VD4(dst_r) | VN4(src1) | VM4(src2)));
2282 break;
2283 case SLJIT_SUB_F64:
2284 FAIL_IF(push_inst32(compiler, VSUB_F32 | (op & SLJIT_32) | VD4(dst_r) | VN4(src1) | VM4(src2)));
2285 break;
2286 case SLJIT_MUL_F64:
2287 FAIL_IF(push_inst32(compiler, VMUL_F32 | (op & SLJIT_32) | VD4(dst_r) | VN4(src1) | VM4(src2)));
2288 break;
2289 case SLJIT_DIV_F64:
2290 FAIL_IF(push_inst32(compiler, VDIV_F32 | (op & SLJIT_32) | VD4(dst_r) | VN4(src1) | VM4(src2)));
2291 break;
2292 case SLJIT_COPYSIGN_F64:
2293 FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | VN4(src2) | RT4(TMP_REG1) | ((op & SLJIT_32) ? (1 << 7) : 0)));
2294 FAIL_IF(push_inst32(compiler, VABS_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(src1)));
2295 FAIL_IF(push_inst32(compiler, CMPI_W | RN4(TMP_REG1) | 0));
2296 FAIL_IF(push_inst16(compiler, IT | (0xb << 4) | 0x8));
2297 return push_inst32(compiler, VNEG_F32 | (op & SLJIT_32) | VD4(dst_r) | VM4(dst_r));
2298 }
2299
2300 if (!(dst & SLJIT_MEM))
2301 return SLJIT_SUCCESS;
2302 return emit_fop_mem(compiler, (op & SLJIT_32), TMP_FREG1, dst, dstw);
2303 }
2304
sljit_emit_fset32(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f32 value)2305 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
2306 sljit_s32 freg, sljit_f32 value)
2307 {
2308 #if defined(__ARM_NEON) && __ARM_NEON
2309 sljit_u32 exp;
2310 sljit_ins ins;
2311 #endif /* NEON */
2312 union {
2313 sljit_u32 imm;
2314 sljit_f32 value;
2315 } u;
2316
2317 CHECK_ERROR();
2318 CHECK(check_sljit_emit_fset32(compiler, freg, value));
2319
2320 u.value = value;
2321
2322 #if defined(__ARM_NEON) && __ARM_NEON
2323 if ((u.imm << (32 - 19)) == 0) {
2324 exp = (u.imm >> (23 + 2)) & 0x3f;
2325
2326 if (exp == 0x20 || exp == 0x1f) {
2327 ins = ((u.imm >> 24) & 0x80) | ((u.imm >> 19) & 0x7f);
2328 return push_inst32(compiler, (VMOV_F32 ^ (1 << 6)) | ((ins & 0xf0) << 12) | VD4(freg) | (ins & 0xf));
2329 }
2330 }
2331 #endif /* NEON */
2332
2333 FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm));
2334 return push_inst32(compiler, VMOV | VN4(freg) | RT4(TMP_REG1));
2335 }
2336
sljit_emit_fset64(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f64 value)2337 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
2338 sljit_s32 freg, sljit_f64 value)
2339 {
2340 #if defined(__ARM_NEON) && __ARM_NEON
2341 sljit_u32 exp;
2342 sljit_ins ins;
2343 #endif /* NEON */
2344 union {
2345 sljit_u32 imm[2];
2346 sljit_f64 value;
2347 } u;
2348
2349 CHECK_ERROR();
2350 CHECK(check_sljit_emit_fset64(compiler, freg, value));
2351
2352 u.value = value;
2353
2354 #if defined(__ARM_NEON) && __ARM_NEON
2355 if (u.imm[0] == 0 && (u.imm[1] << (64 - 48)) == 0) {
2356 exp = (u.imm[1] >> ((52 - 32) + 2)) & 0x1ff;
2357
2358 if (exp == 0x100 || exp == 0xff) {
2359 ins = ((u.imm[1] >> (56 - 32)) & 0x80) | ((u.imm[1] >> (48 - 32)) & 0x7f);
2360 return push_inst32(compiler, (VMOV_F32 ^ (1 << 6)) | (1 << 8) | ((ins & 0xf0) << 12) | VD4(freg) | (ins & 0xf));
2361 }
2362 }
2363 #endif /* NEON */
2364
2365 FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm[0]));
2366 if (u.imm[0] == u.imm[1])
2367 return push_inst32(compiler, VMOV2 | RN4(TMP_REG1) | RT4(TMP_REG1) | VM4(freg));
2368
2369 FAIL_IF(load_immediate(compiler, TMP_REG2, u.imm[1]));
2370 return push_inst32(compiler, VMOV2 | RN4(TMP_REG2) | RT4(TMP_REG1) | VM4(freg));
2371 }
2372
sljit_emit_fcopy(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 freg,sljit_s32 reg)2373 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
2374 sljit_s32 freg, sljit_s32 reg)
2375 {
2376 sljit_s32 reg2;
2377 sljit_ins inst;
2378
2379 CHECK_ERROR();
2380 CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
2381
2382 if (reg & REG_PAIR_MASK) {
2383 reg2 = REG_PAIR_SECOND(reg);
2384 reg = REG_PAIR_FIRST(reg);
2385
2386 inst = VMOV2 | RN4(reg) | RT4(reg2) | VM4(freg);
2387 } else {
2388 inst = VMOV | VN4(freg) | RT4(reg);
2389
2390 if (!(op & SLJIT_32))
2391 inst |= 1 << 7;
2392 }
2393
2394 if (GET_OPCODE(op) == SLJIT_COPY_FROM_F64)
2395 inst |= 1 << 20;
2396
2397 return push_inst32(compiler, inst);
2398 }
2399
2400 /* --------------------------------------------------------------------- */
2401 /* Conditional instructions */
2402 /* --------------------------------------------------------------------- */
2403
get_cc(struct sljit_compiler * compiler,sljit_s32 type)2404 static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type)
2405 {
2406 switch (type) {
2407 case SLJIT_EQUAL:
2408 case SLJIT_ATOMIC_STORED:
2409 case SLJIT_F_EQUAL:
2410 case SLJIT_ORDERED_EQUAL:
2411 case SLJIT_UNORDERED_OR_EQUAL:
2412 return 0x0;
2413
2414 case SLJIT_NOT_EQUAL:
2415 case SLJIT_ATOMIC_NOT_STORED:
2416 case SLJIT_F_NOT_EQUAL:
2417 case SLJIT_UNORDERED_OR_NOT_EQUAL:
2418 case SLJIT_ORDERED_NOT_EQUAL:
2419 return 0x1;
2420
2421 case SLJIT_CARRY:
2422 if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD)
2423 return 0x2;
2424 /* fallthrough */
2425
2426 case SLJIT_LESS:
2427 return 0x3;
2428
2429 case SLJIT_NOT_CARRY:
2430 if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD)
2431 return 0x3;
2432 /* fallthrough */
2433
2434 case SLJIT_GREATER_EQUAL:
2435 return 0x2;
2436
2437 case SLJIT_GREATER:
2438 case SLJIT_UNORDERED_OR_GREATER:
2439 return 0x8;
2440
2441 case SLJIT_LESS_EQUAL:
2442 case SLJIT_F_LESS_EQUAL:
2443 case SLJIT_ORDERED_LESS_EQUAL:
2444 return 0x9;
2445
2446 case SLJIT_SIG_LESS:
2447 case SLJIT_UNORDERED_OR_LESS:
2448 return 0xb;
2449
2450 case SLJIT_SIG_GREATER_EQUAL:
2451 case SLJIT_F_GREATER_EQUAL:
2452 case SLJIT_ORDERED_GREATER_EQUAL:
2453 return 0xa;
2454
2455 case SLJIT_SIG_GREATER:
2456 case SLJIT_F_GREATER:
2457 case SLJIT_ORDERED_GREATER:
2458 return 0xc;
2459
2460 case SLJIT_SIG_LESS_EQUAL:
2461 case SLJIT_UNORDERED_OR_LESS_EQUAL:
2462 return 0xd;
2463
2464 case SLJIT_OVERFLOW:
2465 if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)))
2466 return 0x1;
2467 /* fallthrough */
2468
2469 case SLJIT_UNORDERED:
2470 return 0x6;
2471
2472 case SLJIT_NOT_OVERFLOW:
2473 if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)))
2474 return 0x0;
2475 /* fallthrough */
2476
2477 case SLJIT_ORDERED:
2478 return 0x7;
2479
2480 case SLJIT_F_LESS:
2481 case SLJIT_ORDERED_LESS:
2482 return 0x4;
2483
2484 case SLJIT_UNORDERED_OR_GREATER_EQUAL:
2485 return 0x5;
2486
2487 default: /* SLJIT_JUMP */
2488 SLJIT_UNREACHABLE();
2489 return 0xe;
2490 }
2491 }
2492
sljit_emit_label(struct sljit_compiler * compiler)2493 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2494 {
2495 struct sljit_label *label;
2496
2497 CHECK_ERROR_PTR();
2498 CHECK_PTR(check_sljit_emit_label(compiler));
2499
2500 if (compiler->last_label && compiler->last_label->size == compiler->size)
2501 return compiler->last_label;
2502
2503 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2504 PTR_FAIL_IF(!label);
2505 set_label(label, compiler);
2506 return label;
2507 }
2508
sljit_emit_jump(struct sljit_compiler * compiler,sljit_s32 type)2509 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
2510 {
2511 struct sljit_jump *jump;
2512 sljit_ins cc;
2513
2514 CHECK_ERROR_PTR();
2515 CHECK_PTR(check_sljit_emit_jump(compiler, type));
2516
2517 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2518 PTR_FAIL_IF(!jump);
2519 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2520 type &= 0xff;
2521
2522 PTR_FAIL_IF(emit_imm32_const(compiler, TMP_REG1, 0));
2523 if (type < SLJIT_JUMP) {
2524 jump->flags |= IS_COND;
2525 cc = get_cc(compiler, type);
2526 jump->flags |= cc << 8;
2527 PTR_FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
2528 }
2529
2530 jump->addr = compiler->size;
2531 if (type <= SLJIT_JUMP)
2532 PTR_FAIL_IF(push_inst16(compiler, BX | RN3(TMP_REG1)));
2533 else {
2534 jump->flags |= IS_BL;
2535 PTR_FAIL_IF(push_inst16(compiler, BLX | RN3(TMP_REG1)));
2536 }
2537
2538 return jump;
2539 }
2540
2541 #ifdef __SOFTFP__
2542
softfloat_call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types,sljit_s32 * src,sljit_u32 * extra_space)2543 static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src, sljit_u32 *extra_space)
2544 {
2545 sljit_u32 is_tail_call = *extra_space & SLJIT_CALL_RETURN;
2546 sljit_u32 offset = 0;
2547 sljit_u32 word_arg_offset = 0;
2548 sljit_u32 float_arg_count = 0;
2549 sljit_s32 types = 0;
2550 sljit_u32 src_offset = 4 * sizeof(sljit_sw);
2551 sljit_u8 offsets[4];
2552 sljit_u8 *offset_ptr = offsets;
2553
2554 if (src && FAST_IS_REG(*src))
2555 src_offset = (sljit_u32)reg_map[*src] * sizeof(sljit_sw);
2556
2557 arg_types >>= SLJIT_ARG_SHIFT;
2558
2559 while (arg_types) {
2560 types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK);
2561
2562 switch (arg_types & SLJIT_ARG_MASK) {
2563 case SLJIT_ARG_TYPE_F64:
2564 if (offset & 0x7)
2565 offset += sizeof(sljit_sw);
2566 *offset_ptr++ = (sljit_u8)offset;
2567 offset += sizeof(sljit_f64);
2568 float_arg_count++;
2569 break;
2570 case SLJIT_ARG_TYPE_F32:
2571 *offset_ptr++ = (sljit_u8)offset;
2572 offset += sizeof(sljit_f32);
2573 float_arg_count++;
2574 break;
2575 default:
2576 *offset_ptr++ = (sljit_u8)offset;
2577 offset += sizeof(sljit_sw);
2578 word_arg_offset += sizeof(sljit_sw);
2579 break;
2580 }
2581
2582 arg_types >>= SLJIT_ARG_SHIFT;
2583 }
2584
2585 if (offset > 4 * sizeof(sljit_sw) && (!is_tail_call || offset > compiler->args_size)) {
2586 /* Keep lr register on the stack. */
2587 if (is_tail_call)
2588 offset += sizeof(sljit_sw);
2589
2590 offset = ((offset - 4 * sizeof(sljit_sw)) + 0x7) & ~(sljit_uw)0x7;
2591
2592 *extra_space = offset;
2593
2594 if (is_tail_call)
2595 FAIL_IF(emit_stack_frame_release(compiler, (sljit_s32)offset));
2596 else
2597 FAIL_IF(push_inst16(compiler, SUB_SP_I | (offset >> 2)));
2598 } else {
2599 if (is_tail_call)
2600 FAIL_IF(emit_stack_frame_release(compiler, -1));
2601 *extra_space = 0;
2602 }
2603
2604 SLJIT_ASSERT(reg_map[TMP_REG1] == 12);
2605
2606 /* Process arguments in reversed direction. */
2607 while (types) {
2608 switch (types & SLJIT_ARG_MASK) {
2609 case SLJIT_ARG_TYPE_F64:
2610 float_arg_count--;
2611 offset = *(--offset_ptr);
2612
2613 SLJIT_ASSERT((offset & 0x7) == 0);
2614
2615 if (offset < 4 * sizeof(sljit_sw)) {
2616 if (src_offset == offset || src_offset == offset + sizeof(sljit_sw)) {
2617 FAIL_IF(push_inst16(compiler, MOV | (src_offset << 1) | 4 | (1 << 7)));
2618 *src = TMP_REG1;
2619 }
2620 FAIL_IF(push_inst32(compiler, VMOV2 | 0x100000 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count));
2621 } else
2622 FAIL_IF(push_inst32(compiler, VSTR_F32 | 0x800100 | RN4(SLJIT_SP)
2623 | (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2)));
2624 break;
2625 case SLJIT_ARG_TYPE_F32:
2626 float_arg_count--;
2627 offset = *(--offset_ptr);
2628
2629 if (offset < 4 * sizeof(sljit_sw)) {
2630 if (src_offset == offset) {
2631 FAIL_IF(push_inst16(compiler, MOV | (src_offset << 1) | 4 | (1 << 7)));
2632 *src = TMP_REG1;
2633 }
2634 FAIL_IF(push_inst32(compiler, VMOV | 0x100000 | (float_arg_count << 16) | (offset << 10)));
2635 } else
2636 FAIL_IF(push_inst32(compiler, VSTR_F32 | 0x800000 | RN4(SLJIT_SP)
2637 | (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2)));
2638 break;
2639 default:
2640 word_arg_offset -= sizeof(sljit_sw);
2641 offset = *(--offset_ptr);
2642
2643 SLJIT_ASSERT(offset >= word_arg_offset);
2644
2645 if (offset != word_arg_offset) {
2646 if (offset < 4 * sizeof(sljit_sw)) {
2647 if (src_offset == offset) {
2648 FAIL_IF(push_inst16(compiler, MOV | (src_offset << 1) | 4 | (1 << 7)));
2649 *src = TMP_REG1;
2650 }
2651 else if (src_offset == word_arg_offset) {
2652 *src = (sljit_s32)(1 + (offset >> 2));
2653 src_offset = offset;
2654 }
2655 FAIL_IF(push_inst16(compiler, MOV | (offset >> 2) | (word_arg_offset << 1)));
2656 } else
2657 FAIL_IF(push_inst16(compiler, STR_SP | (word_arg_offset << 6) | ((offset - 4 * sizeof(sljit_sw)) >> 2)));
2658 }
2659 break;
2660 }
2661
2662 types >>= SLJIT_ARG_SHIFT;
2663 }
2664
2665 return SLJIT_SUCCESS;
2666 }
2667
softfloat_post_call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types)2668 static sljit_s32 softfloat_post_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)
2669 {
2670 if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F64)
2671 FAIL_IF(push_inst32(compiler, VMOV2 | (1 << 16) | (0 << 12) | 0));
2672 if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32)
2673 FAIL_IF(push_inst32(compiler, VMOV | (0 << 16) | (0 << 12)));
2674
2675 return SLJIT_SUCCESS;
2676 }
2677
2678 #else
2679
hardfloat_call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types)2680 static sljit_s32 hardfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)
2681 {
2682 sljit_u32 offset = SLJIT_FR0;
2683 sljit_u32 new_offset = SLJIT_FR0;
2684 sljit_u32 f32_offset = 0;
2685
2686 /* Remove return value. */
2687 arg_types >>= SLJIT_ARG_SHIFT;
2688
2689 while (arg_types) {
2690 switch (arg_types & SLJIT_ARG_MASK) {
2691 case SLJIT_ARG_TYPE_F64:
2692 if (offset != new_offset)
2693 FAIL_IF(push_inst32(compiler, VMOV_F32 | SLJIT_32 | VD4(new_offset) | VM4(offset)));
2694
2695 new_offset++;
2696 offset++;
2697 break;
2698 case SLJIT_ARG_TYPE_F32:
2699 if (f32_offset != 0) {
2700 FAIL_IF(push_inst32(compiler, VMOV_F32 | 0x400000 | VD4(f32_offset) | VM4(offset)));
2701 f32_offset = 0;
2702 } else {
2703 if (offset != new_offset)
2704 FAIL_IF(push_inst32(compiler, VMOV_F32 | 0x400000 | VD4(new_offset) | VM4(offset)));
2705 f32_offset = new_offset;
2706 new_offset++;
2707 }
2708 offset++;
2709 break;
2710 }
2711 arg_types >>= SLJIT_ARG_SHIFT;
2712 }
2713
2714 return SLJIT_SUCCESS;
2715 }
2716
2717 #endif
2718
sljit_emit_call(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types)2719 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
2720 sljit_s32 arg_types)
2721 {
2722 #ifdef __SOFTFP__
2723 struct sljit_jump *jump;
2724 sljit_u32 extra_space = (sljit_u32)type;
2725 #endif
2726
2727 CHECK_ERROR_PTR();
2728 CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
2729
2730 #ifdef __SOFTFP__
2731 if ((type & 0xff) != SLJIT_CALL_REG_ARG) {
2732 PTR_FAIL_IF(softfloat_call_with_args(compiler, arg_types, NULL, &extra_space));
2733 SLJIT_ASSERT((extra_space & 0x7) == 0);
2734
2735 if ((type & SLJIT_CALL_RETURN) && extra_space == 0)
2736 type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
2737
2738 SLJIT_SKIP_CHECKS(compiler);
2739 jump = sljit_emit_jump(compiler, type);
2740 PTR_FAIL_IF(jump == NULL);
2741
2742 if (extra_space > 0) {
2743 if (type & SLJIT_CALL_RETURN)
2744 PTR_FAIL_IF(push_inst32(compiler, LDR | RT4(TMP_REG2)
2745 | RN4(SLJIT_SP) | (extra_space - sizeof(sljit_sw))));
2746
2747 PTR_FAIL_IF(push_inst16(compiler, ADD_SP_I | (extra_space >> 2)));
2748
2749 if (type & SLJIT_CALL_RETURN) {
2750 PTR_FAIL_IF(push_inst16(compiler, BX | RN3(TMP_REG2)));
2751 return jump;
2752 }
2753 }
2754
2755 SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN));
2756 PTR_FAIL_IF(softfloat_post_call_with_args(compiler, arg_types));
2757 return jump;
2758 }
2759 #endif /* __SOFTFP__ */
2760
2761 if (type & SLJIT_CALL_RETURN) {
2762 /* ldmia sp!, {..., lr} */
2763 PTR_FAIL_IF(emit_stack_frame_release(compiler, -1));
2764 type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
2765 }
2766
2767 #ifndef __SOFTFP__
2768 if ((type & 0xff) != SLJIT_CALL_REG_ARG)
2769 PTR_FAIL_IF(hardfloat_call_with_args(compiler, arg_types));
2770 #endif /* !__SOFTFP__ */
2771
2772 SLJIT_SKIP_CHECKS(compiler);
2773 return sljit_emit_jump(compiler, type);
2774 }
2775
sljit_emit_ijump(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 src,sljit_sw srcw)2776 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
2777 {
2778 struct sljit_jump *jump;
2779
2780 CHECK_ERROR();
2781 CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
2782 ADJUST_LOCAL_OFFSET(src, srcw);
2783
2784 SLJIT_ASSERT(reg_map[TMP_REG1] != 14);
2785
2786 if (src != SLJIT_IMM) {
2787 if (FAST_IS_REG(src)) {
2788 SLJIT_ASSERT(reg_map[src] != 14);
2789 return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(src));
2790 }
2791
2792 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, src, srcw, TMP_REG1));
2793 if (type >= SLJIT_FAST_CALL)
2794 return push_inst16(compiler, BLX | RN3(TMP_REG1));
2795 }
2796
2797 /* These jumps are converted to jump/call instructions when possible. */
2798 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2799 FAIL_IF(!jump);
2800 set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0));
2801 jump->u.target = (sljit_uw)srcw;
2802
2803 FAIL_IF(emit_imm32_const(compiler, TMP_REG1, 0));
2804 jump->addr = compiler->size;
2805 return push_inst16(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RN3(TMP_REG1));
2806 }
2807
sljit_emit_icall(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)2808 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
2809 sljit_s32 arg_types,
2810 sljit_s32 src, sljit_sw srcw)
2811 {
2812 #ifdef __SOFTFP__
2813 sljit_u32 extra_space = (sljit_u32)type;
2814 #endif
2815
2816 CHECK_ERROR();
2817 CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
2818
2819 if (src & SLJIT_MEM) {
2820 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
2821 src = TMP_REG1;
2822 }
2823
2824 if ((type & SLJIT_CALL_RETURN) && (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options)))) {
2825 FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(TMP_REG1, src)));
2826 src = TMP_REG1;
2827 }
2828
2829 #ifdef __SOFTFP__
2830 if ((type & 0xff) != SLJIT_CALL_REG_ARG) {
2831 FAIL_IF(softfloat_call_with_args(compiler, arg_types, &src, &extra_space));
2832 SLJIT_ASSERT((extra_space & 0x7) == 0);
2833
2834 if ((type & SLJIT_CALL_RETURN) && extra_space == 0)
2835 type = SLJIT_JUMP;
2836
2837 SLJIT_SKIP_CHECKS(compiler);
2838 FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
2839
2840 if (extra_space > 0) {
2841 if (type & SLJIT_CALL_RETURN)
2842 FAIL_IF(push_inst32(compiler, LDR | RT4(TMP_REG2)
2843 | RN4(SLJIT_SP) | (extra_space - sizeof(sljit_sw))));
2844
2845 FAIL_IF(push_inst16(compiler, ADD_SP_I | (extra_space >> 2)));
2846
2847 if (type & SLJIT_CALL_RETURN)
2848 return push_inst16(compiler, BX | RN3(TMP_REG2));
2849 }
2850
2851 SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN));
2852 return softfloat_post_call_with_args(compiler, arg_types);
2853 }
2854 #endif /* __SOFTFP__ */
2855
2856 if (type & SLJIT_CALL_RETURN) {
2857 /* ldmia sp!, {..., lr} */
2858 FAIL_IF(emit_stack_frame_release(compiler, -1));
2859 type = SLJIT_JUMP;
2860 }
2861
2862 #ifndef __SOFTFP__
2863 if ((type & 0xff) != SLJIT_CALL_REG_ARG)
2864 FAIL_IF(hardfloat_call_with_args(compiler, arg_types));
2865 #endif /* !__SOFTFP__ */
2866
2867 SLJIT_SKIP_CHECKS(compiler);
2868 return sljit_emit_ijump(compiler, type, src, srcw);
2869 }
2870
2871 #ifdef __SOFTFP__
2872
emit_fmov_before_return(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)2873 static SLJIT_INLINE sljit_s32 emit_fmov_before_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
2874 {
2875 if (compiler->options & SLJIT_ENTER_REG_ARG) {
2876 if (src == SLJIT_FR0)
2877 return SLJIT_SUCCESS;
2878
2879 SLJIT_SKIP_CHECKS(compiler);
2880 return sljit_emit_fop1(compiler, op, SLJIT_RETURN_FREG, 0, src, srcw);
2881 }
2882
2883 if (FAST_IS_REG(src)) {
2884 if (op & SLJIT_32)
2885 return push_inst32(compiler, VMOV | (1 << 20) | VN4(src) | RT4(SLJIT_R0));
2886 return push_inst32(compiler, VMOV2 | (1 << 20) | VM4(src) | RT4(SLJIT_R0) | RN4(SLJIT_R1));
2887 }
2888
2889 SLJIT_SKIP_CHECKS(compiler);
2890
2891 if (op & SLJIT_32)
2892 return sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, src, srcw);
2893 return sljit_emit_mem(compiler, SLJIT_MOV, SLJIT_REG_PAIR(SLJIT_R0, SLJIT_R1), src, srcw);
2894 }
2895
2896 #endif /* __SOFTFP__ */
2897
sljit_emit_op_flags(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 type)2898 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
2899 sljit_s32 dst, sljit_sw dstw,
2900 sljit_s32 type)
2901 {
2902 sljit_s32 dst_r, flags = GET_ALL_FLAGS(op);
2903 sljit_ins cc;
2904
2905 CHECK_ERROR();
2906 CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
2907 ADJUST_LOCAL_OFFSET(dst, dstw);
2908
2909 op = GET_OPCODE(op);
2910 cc = get_cc(compiler, type);
2911 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
2912
2913 if (op < SLJIT_ADD) {
2914 FAIL_IF(push_inst16(compiler, IT | (cc << 4) | (((cc & 0x1) ^ 0x1) << 3) | 0x4));
2915 if (reg_map[dst_r] > 7) {
2916 FAIL_IF(push_inst32(compiler, MOV_WI | RD4(dst_r) | 1));
2917 FAIL_IF(push_inst32(compiler, MOV_WI | RD4(dst_r) | 0));
2918 } else {
2919 /* The movsi (immediate) instruction does not set flags in IT block. */
2920 FAIL_IF(push_inst16(compiler, MOVSI | RDN3(dst_r) | 1));
2921 FAIL_IF(push_inst16(compiler, MOVSI | RDN3(dst_r) | 0));
2922 }
2923 if (!(dst & SLJIT_MEM))
2924 return SLJIT_SUCCESS;
2925 return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG1, dst, dstw, TMP_REG2);
2926 }
2927
2928 if (dst & SLJIT_MEM)
2929 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2));
2930
2931 if (op == SLJIT_AND) {
2932 FAIL_IF(push_inst16(compiler, IT | (cc << 4) | (((cc & 0x1) ^ 0x1) << 3) | 0x4));
2933 FAIL_IF(push_inst32(compiler, ANDI | RN4(dst_r) | RD4(dst_r) | 1));
2934 FAIL_IF(push_inst32(compiler, ANDI | RN4(dst_r) | RD4(dst_r) | 0));
2935 }
2936 else {
2937 FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
2938 FAIL_IF(push_inst32(compiler, ((op == SLJIT_OR) ? ORRI : EORI) | RN4(dst_r) | RD4(dst_r) | 1));
2939 }
2940
2941 if (dst & SLJIT_MEM)
2942 FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG1, dst, dstw, TMP_REG2));
2943
2944 if (!(flags & SLJIT_SET_Z))
2945 return SLJIT_SUCCESS;
2946
2947 /* The condition must always be set, even if the ORR/EORI is not executed above. */
2948 return push_inst32(compiler, MOV_W | SET_FLAGS | RD4(TMP_REG1) | RM4(dst_r));
2949 }
2950
sljit_emit_select(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_reg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2_reg)2951 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
2952 sljit_s32 dst_reg,
2953 sljit_s32 src1, sljit_sw src1w,
2954 sljit_s32 src2_reg)
2955 {
2956 sljit_uw cc, tmp;
2957
2958 CHECK_ERROR();
2959 CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
2960
2961 ADJUST_LOCAL_OFFSET(src1, src1w);
2962
2963 if (src2_reg != dst_reg && src1 == dst_reg) {
2964 src1 = src2_reg;
2965 src1w = 0;
2966 src2_reg = dst_reg;
2967 type ^= 0x1;
2968 }
2969
2970 if (src1 & SLJIT_MEM) {
2971 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, (src2_reg != dst_reg) ? dst_reg : TMP_REG1, src1, src1w, TMP_REG2));
2972
2973 if (src2_reg != dst_reg) {
2974 src1 = src2_reg;
2975 src1w = 0;
2976 type ^= 0x1;
2977 } else {
2978 src1 = TMP_REG1;
2979 src1w = 0;
2980 }
2981 } else if (dst_reg != src2_reg)
2982 FAIL_IF(push_inst16(compiler, MOV | SET_REGS44(dst_reg, src2_reg)));
2983
2984 cc = get_cc(compiler, type & ~SLJIT_32);
2985
2986 if (src1 != SLJIT_IMM) {
2987 FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
2988 return push_inst16(compiler, MOV | SET_REGS44(dst_reg, src1));
2989 }
2990
2991 tmp = (sljit_uw)src1w;
2992
2993 if (tmp < 0x10000) {
2994 /* set low 16 bits, set hi 16 bits to 0. */
2995 FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
2996 return push_inst32(compiler, MOVW | RD4(dst_reg)
2997 | COPY_BITS(tmp, 12, 16, 4) | COPY_BITS(tmp, 11, 26, 1) | COPY_BITS(tmp, 8, 12, 3) | (tmp & 0xff));
2998 }
2999
3000 tmp = get_imm((sljit_uw)src1w);
3001 if (tmp != INVALID_IMM) {
3002 FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
3003 return push_inst32(compiler, MOV_WI | RD4(dst_reg) | tmp);
3004 }
3005
3006 tmp = get_imm(~(sljit_uw)src1w);
3007 if (tmp != INVALID_IMM) {
3008 FAIL_IF(push_inst16(compiler, IT | (cc << 4) | 0x8));
3009 return push_inst32(compiler, MVN_WI | RD4(dst_reg) | tmp);
3010 }
3011
3012 FAIL_IF(push_inst16(compiler, IT | (cc << 4) | ((cc & 0x1) << 3) | 0x4));
3013
3014 tmp = (sljit_uw)src1w;
3015 FAIL_IF(push_inst32(compiler, MOVW | RD4(dst_reg)
3016 | COPY_BITS(tmp, 12, 16, 4) | COPY_BITS(tmp, 11, 26, 1) | COPY_BITS(tmp, 8, 12, 3) | (tmp & 0xff)));
3017 return push_inst32(compiler, MOVT | RD4(dst_reg)
3018 | COPY_BITS(tmp, 12 + 16, 16, 4) | COPY_BITS(tmp, 11 + 16, 26, 1) | COPY_BITS(tmp, 8 + 16, 12, 3) | ((tmp & 0xff0000) >> 16));
3019 }
3020
sljit_emit_fselect(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_freg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2_freg)3021 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
3022 sljit_s32 dst_freg,
3023 sljit_s32 src1, sljit_sw src1w,
3024 sljit_s32 src2_freg)
3025 {
3026 CHECK_ERROR();
3027 CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
3028
3029 ADJUST_LOCAL_OFFSET(src1, src1w);
3030
3031 type ^= SLJIT_32;
3032
3033 if (dst_freg != src2_freg) {
3034 if (dst_freg == src1) {
3035 src1 = src2_freg;
3036 src1w = 0;
3037 type ^= 0x1;
3038 } else
3039 FAIL_IF(push_inst32(compiler, VMOV_F32 | (type & SLJIT_32) | VD4(dst_freg) | VM4(src2_freg)));
3040 }
3041
3042 if (src1 & SLJIT_MEM) {
3043 FAIL_IF(emit_fop_mem(compiler, (type & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w));
3044 src1 = TMP_FREG1;
3045 }
3046
3047 FAIL_IF(push_inst16(compiler, IT | (get_cc(compiler, type & ~SLJIT_32) << 4) | 0x8));
3048 return push_inst32(compiler, VMOV_F32 | (type & SLJIT_32) | VD4(dst_freg) | VM4(src1));
3049 }
3050
sljit_emit_mem(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)3051 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
3052 sljit_s32 reg,
3053 sljit_s32 mem, sljit_sw memw)
3054 {
3055 sljit_s32 flags;
3056 sljit_uw imm, tmp;
3057
3058 CHECK_ERROR();
3059 CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
3060
3061 if (!(reg & REG_PAIR_MASK))
3062 return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
3063
3064 if (type & (SLJIT_MEM_UNALIGNED | SLJIT_MEM_ALIGNED_16 | SLJIT_MEM_ALIGNED_32)) {
3065 if ((mem & REG_MASK) == 0) {
3066 if ((memw & 0xfff) >= (0x1000 - SSIZE_OF(sw))) {
3067 imm = get_imm((sljit_uw)((memw + 0x1000) & ~0xfff));
3068
3069 if (imm != INVALID_IMM)
3070 memw = (memw & 0xfff) - 0x1000;
3071 } else {
3072 imm = get_imm((sljit_uw)(memw & ~0xfff));
3073
3074 if (imm != INVALID_IMM)
3075 memw &= 0xfff;
3076 }
3077
3078 if (imm == INVALID_IMM) {
3079 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3080 memw = 0;
3081 } else
3082 FAIL_IF(push_inst32(compiler, MOV_WI | RD4(TMP_REG1) | imm));
3083
3084 mem = SLJIT_MEM1(TMP_REG1);
3085 } else if (mem & OFFS_REG_MASK) {
3086 FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(mem & REG_MASK) | RM4(OFFS_REG(mem)) | ((sljit_uw)(memw & 0x3) << 6)));
3087 memw = 0;
3088 mem = SLJIT_MEM1(TMP_REG1);
3089 } else if (memw < -0xff) {
3090 /* Zero value can be included in the first case. */
3091 if ((-memw & 0xfff) <= SSIZE_OF(sw))
3092 tmp = (sljit_uw)((-memw + 0x7ff) & ~0x7ff);
3093 else
3094 tmp = (sljit_uw)((-memw + 0xfff) & ~0xfff);
3095
3096 SLJIT_ASSERT(tmp >= (sljit_uw)-memw);
3097 imm = get_imm(tmp);
3098
3099 if (imm != INVALID_IMM) {
3100 FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(mem & REG_MASK) | imm));
3101 memw += (sljit_sw)tmp;
3102 SLJIT_ASSERT(memw >= 0 && memw <= 0xfff - SSIZE_OF(sw));
3103 } else {
3104 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3105 FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, mem & REG_MASK)));
3106 memw = 0;
3107 }
3108
3109 mem = SLJIT_MEM1(TMP_REG1);
3110 } else if (memw >= (0x1000 - SSIZE_OF(sw))) {
3111 if ((memw & 0xfff) >= (0x1000 - SSIZE_OF(sw))) {
3112 imm = get_imm((sljit_uw)((memw + 0x1000) & ~0xfff));
3113
3114 if (imm != INVALID_IMM)
3115 memw = (memw & 0xfff) - 0x1000;
3116 } else {
3117 imm = get_imm((sljit_uw)(memw & ~0xfff));
3118
3119 if (imm != INVALID_IMM)
3120 memw &= 0xfff;
3121 }
3122
3123 if (imm != INVALID_IMM) {
3124 SLJIT_ASSERT(memw >= -0xff && memw <= 0xfff);
3125 FAIL_IF(push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(mem & REG_MASK) | imm));
3126 } else {
3127 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3128 FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, mem & REG_MASK)));
3129 memw = 0;
3130 }
3131
3132 mem = SLJIT_MEM1(TMP_REG1);
3133 }
3134
3135 flags = WORD_SIZE;
3136
3137 SLJIT_ASSERT(memw <= 0xfff - SSIZE_OF(sw) && memw >= -0xff);
3138
3139 if (type & SLJIT_MEM_STORE) {
3140 flags |= STORE;
3141 } else if (REG_PAIR_FIRST(reg) == (mem & REG_MASK)) {
3142 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, REG_PAIR_SECOND(reg), mem, memw + SSIZE_OF(sw), TMP_REG2));
3143 return emit_op_mem(compiler, WORD_SIZE, REG_PAIR_FIRST(reg), mem, memw, TMP_REG2);
3144 }
3145
3146 FAIL_IF(emit_op_mem(compiler, flags, REG_PAIR_FIRST(reg), mem, memw, TMP_REG2));
3147 return emit_op_mem(compiler, flags, REG_PAIR_SECOND(reg), mem, memw + SSIZE_OF(sw), TMP_REG2);
3148 }
3149
3150 flags = 1 << 23;
3151
3152 if ((mem & REG_MASK) == 0) {
3153 tmp = (sljit_uw)(memw & 0x7fc);
3154 imm = get_imm((sljit_uw)((memw + (tmp <= 0x400 ? 0 : 0x400)) & ~0x3fc));
3155
3156 if (imm == INVALID_IMM) {
3157 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3158 memw = 0;
3159 } else {
3160 FAIL_IF(push_inst32(compiler, MOV_WI | RD4(TMP_REG1) | imm));
3161 memw = (memw & 0x3fc) >> 2;
3162
3163 if (tmp > 0x400) {
3164 memw = 0x100 - memw;
3165 flags = 0;
3166 }
3167
3168 SLJIT_ASSERT(memw >= 0 && memw <= 0xff);
3169 }
3170
3171 mem = SLJIT_MEM1(TMP_REG1);
3172 } else if (mem & OFFS_REG_MASK) {
3173 FAIL_IF(push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(mem & REG_MASK) | RM4(OFFS_REG(mem)) | ((sljit_uw)(memw & 0x3) << 6)));
3174 memw = 0;
3175 mem = SLJIT_MEM1(TMP_REG1);
3176 } else if (memw < 0) {
3177 if ((-memw & ~0x3fc) == 0) {
3178 flags = 0;
3179 memw = -memw >> 2;
3180 } else {
3181 tmp = (sljit_uw)(-memw & 0x7fc);
3182 imm = get_imm((sljit_uw)((-memw + (tmp <= 0x400 ? 0 : 0x400)) & ~0x3fc));
3183
3184 if (imm != INVALID_IMM) {
3185 FAIL_IF(push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(mem & REG_MASK) | imm));
3186 memw = (-memw & 0x3fc) >> 2;
3187
3188 if (tmp <= 0x400)
3189 flags = 0;
3190 else
3191 memw = 0x100 - memw;
3192 } else {
3193 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3194 FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, mem & REG_MASK)));
3195 memw = 0;
3196 }
3197
3198 mem = SLJIT_MEM1(TMP_REG1);
3199 }
3200 } else if ((memw & ~0x3fc) != 0) {
3201 tmp = (sljit_uw)(memw & 0x7fc);
3202 imm = get_imm((sljit_uw)((memw + (tmp <= 0x400 ? 0 : 0x400)) & ~0x3fc));
3203
3204 if (imm != INVALID_IMM) {
3205 FAIL_IF(push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(mem & REG_MASK) | imm));
3206 memw = (memw & 0x3fc) >> 2;
3207
3208 if (tmp > 0x400) {
3209 memw = 0x100 - memw;
3210 flags = 0;
3211 }
3212 } else {
3213 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3214 FAIL_IF(push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, mem & REG_MASK)));
3215 memw = 0;
3216 }
3217
3218 mem = SLJIT_MEM1(TMP_REG1);
3219 } else
3220 memw >>= 2;
3221
3222 SLJIT_ASSERT(memw >= 0 && memw <= 0xff);
3223 return push_inst32(compiler, ((type & SLJIT_MEM_STORE) ? STRD : LDRD) | (sljit_ins)flags | RN4(mem & REG_MASK) | RT4(REG_PAIR_FIRST(reg)) | RD4(REG_PAIR_SECOND(reg)) | (sljit_ins)memw);
3224 }
3225
sljit_emit_mem_update(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)3226 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *compiler, sljit_s32 type,
3227 sljit_s32 reg,
3228 sljit_s32 mem, sljit_sw memw)
3229 {
3230 sljit_s32 flags;
3231 sljit_ins inst;
3232
3233 CHECK_ERROR();
3234 CHECK(check_sljit_emit_mem_update(compiler, type, reg, mem, memw));
3235
3236 if ((mem & OFFS_REG_MASK) || (memw > 255 || memw < -255))
3237 return SLJIT_ERR_UNSUPPORTED;
3238
3239 if (type & SLJIT_MEM_SUPP)
3240 return SLJIT_SUCCESS;
3241
3242 switch (type & 0xff) {
3243 case SLJIT_MOV:
3244 case SLJIT_MOV_U32:
3245 case SLJIT_MOV_S32:
3246 case SLJIT_MOV32:
3247 case SLJIT_MOV_P:
3248 flags = WORD_SIZE;
3249 break;
3250 case SLJIT_MOV_U8:
3251 flags = BYTE_SIZE;
3252 break;
3253 case SLJIT_MOV_S8:
3254 flags = BYTE_SIZE | SIGNED;
3255 break;
3256 case SLJIT_MOV_U16:
3257 flags = HALF_SIZE;
3258 break;
3259 case SLJIT_MOV_S16:
3260 flags = HALF_SIZE | SIGNED;
3261 break;
3262 default:
3263 SLJIT_UNREACHABLE();
3264 flags = WORD_SIZE;
3265 break;
3266 }
3267
3268 if (type & SLJIT_MEM_STORE)
3269 flags |= STORE;
3270
3271 inst = sljit_mem32[flags] | 0x900;
3272
3273 if (!(type & SLJIT_MEM_POST))
3274 inst |= 0x400;
3275
3276 if (memw >= 0)
3277 inst |= 0x200;
3278 else
3279 memw = -memw;
3280
3281 return push_inst32(compiler, inst | RT4(reg) | RN4(mem & REG_MASK) | (sljit_ins)memw);
3282 }
3283
update_mem_addr(struct sljit_compiler * compiler,sljit_s32 * mem,sljit_sw * memw,sljit_s32 max_offset)3284 static sljit_s32 update_mem_addr(struct sljit_compiler *compiler, sljit_s32 *mem, sljit_sw *memw, sljit_s32 max_offset)
3285 {
3286 sljit_s32 arg = *mem;
3287 sljit_sw argw = *memw;
3288 sljit_uw imm;
3289
3290 *mem = TMP_REG1;
3291
3292 if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
3293 *memw = 0;
3294 return push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(arg & REG_MASK) | RM4(OFFS_REG(arg)) | ((sljit_uw)(argw & 0x3) << 6));
3295 }
3296
3297 arg &= REG_MASK;
3298
3299 if (arg) {
3300 if (argw <= max_offset && argw >= -0xff) {
3301 *mem = arg;
3302 return SLJIT_SUCCESS;
3303 }
3304
3305 if (argw < 0) {
3306 imm = get_imm((sljit_uw)(-argw & ~0xff));
3307
3308 if (imm) {
3309 *memw = -(-argw & 0xff);
3310 return push_inst32(compiler, SUB_WI | RD4(TMP_REG1) | RN4(arg) | imm);
3311 }
3312 } else if ((argw & 0xfff) <= max_offset) {
3313 imm = get_imm((sljit_uw)(argw & ~0xfff));
3314
3315 if (imm) {
3316 *memw = argw & 0xfff;
3317 return push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(arg) | imm);
3318 }
3319 } else {
3320 imm = get_imm((sljit_uw)((argw | 0xfff) + 1));
3321
3322 if (imm) {
3323 *memw = (argw & 0xfff) - 0x1000;
3324 return push_inst32(compiler, ADD_WI | RD4(TMP_REG1) | RN4(arg) | imm);
3325 }
3326 }
3327 }
3328
3329 imm = (sljit_uw)(argw & ~0xfff);
3330
3331 if ((argw & 0xfff) > max_offset) {
3332 imm += 0x1000;
3333 *memw = (argw & 0xfff) - 0x1000;
3334 } else
3335 *memw = argw & 0xfff;
3336
3337 FAIL_IF(load_immediate(compiler, TMP_REG1, imm));
3338
3339 if (arg == 0)
3340 return SLJIT_SUCCESS;
3341
3342 return push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, arg));
3343 }
3344
sljit_emit_fmem(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 mem,sljit_sw memw)3345 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type,
3346 sljit_s32 freg,
3347 sljit_s32 mem, sljit_sw memw)
3348 {
3349 CHECK_ERROR();
3350 CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw));
3351
3352 if (type & SLJIT_MEM_ALIGNED_32)
3353 return emit_fop_mem(compiler, ((type ^ SLJIT_32) & SLJIT_32) | ((type & SLJIT_MEM_STORE) ? 0 : FPU_LOAD), freg, mem, memw);
3354
3355 if (type & SLJIT_MEM_STORE) {
3356 FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | VN4(freg) | RT4(TMP_REG2)));
3357
3358 if (type & SLJIT_32)
3359 return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, mem, memw, TMP_REG1);
3360
3361 FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4));
3362 mem |= SLJIT_MEM;
3363
3364 FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, mem, memw, TMP_REG1));
3365 FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | VN4(freg) | 0x80 | RT4(TMP_REG2)));
3366 return emit_op_mem(compiler, WORD_SIZE | STORE, TMP_REG2, mem, memw + 4, TMP_REG1);
3367 }
3368
3369 if (type & SLJIT_32) {
3370 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1));
3371 return push_inst32(compiler, VMOV | VN4(freg) | RT4(TMP_REG2));
3372 }
3373
3374 FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4));
3375 mem |= SLJIT_MEM;
3376
3377 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1));
3378 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, mem, memw + 4, TMP_REG1));
3379 return push_inst32(compiler, VMOV2 | VM4(freg) | RT4(TMP_REG2) | RN4(TMP_REG1));
3380 }
3381
sljit_emit_simd_mem_offset(struct sljit_compiler * compiler,sljit_s32 * mem_ptr,sljit_sw memw)3382 static sljit_s32 sljit_emit_simd_mem_offset(struct sljit_compiler *compiler, sljit_s32 *mem_ptr, sljit_sw memw)
3383 {
3384 sljit_uw imm;
3385 sljit_s32 mem = *mem_ptr;
3386
3387 if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
3388 *mem_ptr = TMP_REG1;
3389 return push_inst32(compiler, ADD_W | RD4(TMP_REG1) | RN4(mem & REG_MASK) | RM4(OFFS_REG(mem)) | ((sljit_uw)(memw & 0x3) << 6));
3390 }
3391
3392 if (SLJIT_UNLIKELY(!(mem & REG_MASK))) {
3393 *mem_ptr = TMP_REG1;
3394 return load_immediate(compiler, TMP_REG1, (sljit_uw)memw);
3395 }
3396
3397 mem &= REG_MASK;
3398
3399 if (memw == 0) {
3400 *mem_ptr = mem;
3401 return SLJIT_SUCCESS;
3402 }
3403
3404 *mem_ptr = TMP_REG1;
3405 imm = get_imm((sljit_uw)(memw < 0 ? -memw : memw));
3406
3407 if (imm != INVALID_IMM)
3408 return push_inst32(compiler, ((memw < 0) ? SUB_WI : ADD_WI) | RD4(TMP_REG1) | RN4(mem) | imm);
3409
3410 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3411 return push_inst16(compiler, ADD | SET_REGS44(TMP_REG1, mem));
3412 }
3413
simd_get_quad_reg_index(sljit_s32 freg)3414 static SLJIT_INLINE sljit_s32 simd_get_quad_reg_index(sljit_s32 freg)
3415 {
3416 freg += freg & 0x1;
3417
3418 SLJIT_ASSERT((freg_map[freg] & 0x1) == (freg <= SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS));
3419
3420 if (freg <= SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS)
3421 freg--;
3422
3423 return freg;
3424 }
3425
3426 #define SLJIT_QUAD_OTHER_HALF(freg) ((((freg) & 0x1) << 1) - 1)
3427
sljit_emit_simd_mov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 srcdst,sljit_sw srcdstw)3428 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
3429 sljit_s32 freg,
3430 sljit_s32 srcdst, sljit_sw srcdstw)
3431 {
3432 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3433 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3434 sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type);
3435 sljit_ins ins;
3436
3437 CHECK_ERROR();
3438 CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw));
3439
3440 ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3441
3442 if (reg_size != 3 && reg_size != 4)
3443 return SLJIT_ERR_UNSUPPORTED;
3444
3445 if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3446 return SLJIT_ERR_UNSUPPORTED;
3447
3448 if (type & SLJIT_SIMD_TEST)
3449 return SLJIT_SUCCESS;
3450
3451 if (reg_size == 4)
3452 freg = simd_get_quad_reg_index(freg);
3453
3454 if (!(srcdst & SLJIT_MEM)) {
3455 if (reg_size == 4)
3456 srcdst = simd_get_quad_reg_index(srcdst);
3457
3458 if (type & SLJIT_SIMD_STORE)
3459 ins = VD4(srcdst) | VN4(freg) | VM4(freg);
3460 else
3461 ins = VD4(freg) | VN4(srcdst) | VM4(srcdst);
3462
3463 if (reg_size == 4)
3464 ins |= (sljit_ins)1 << 6;
3465
3466 return push_inst32(compiler, VORR | ins);
3467 }
3468
3469 FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
3470
3471 if (elem_size > 3)
3472 elem_size = 3;
3473
3474 ins = ((type & SLJIT_SIMD_STORE) ? VST1 : VLD1) | VD4(freg)
3475 | (sljit_ins)((reg_size == 3) ? (0x7 << 8) : (0xa << 8));
3476
3477 SLJIT_ASSERT(reg_size >= alignment);
3478
3479 if (alignment == 3)
3480 ins |= 0x10;
3481 else if (alignment >= 4)
3482 ins |= 0x20;
3483
3484 return push_inst32(compiler, ins | RN4(srcdst) | ((sljit_ins)elem_size) << 6 | 0xf);
3485 }
3486
simd_get_imm(sljit_s32 elem_size,sljit_uw value)3487 static sljit_ins simd_get_imm(sljit_s32 elem_size, sljit_uw value)
3488 {
3489 sljit_ins result;
3490
3491 if (elem_size > 1 && (sljit_u16)value == (value >> 16)) {
3492 elem_size = 1;
3493 value = (sljit_u16)value;
3494 }
3495
3496 if (elem_size == 1 && (sljit_u8)value == (value >> 8)) {
3497 elem_size = 0;
3498 value = (sljit_u8)value;
3499 }
3500
3501 switch (elem_size) {
3502 case 0:
3503 SLJIT_ASSERT(value <= 0xff);
3504 result = 0xe00;
3505 break;
3506 case 1:
3507 SLJIT_ASSERT(value <= 0xffff);
3508 result = 0;
3509
3510 while (1) {
3511 if (value <= 0xff) {
3512 result |= 0x800;
3513 break;
3514 }
3515
3516 if ((value & 0xff) == 0) {
3517 value >>= 8;
3518 result |= 0xa00;
3519 break;
3520 }
3521
3522 if (result != 0)
3523 return ~(sljit_ins)0;
3524
3525 value ^= (sljit_uw)0xffff;
3526 result = (1 << 5);
3527 }
3528 break;
3529 default:
3530 SLJIT_ASSERT(value <= 0xffffffff);
3531 result = 0;
3532
3533 while (1) {
3534 if (value <= 0xff) {
3535 result |= 0x000;
3536 break;
3537 }
3538
3539 if ((value & ~(sljit_uw)0xff00) == 0) {
3540 value >>= 8;
3541 result |= 0x200;
3542 break;
3543 }
3544
3545 if ((value & ~(sljit_uw)0xff0000) == 0) {
3546 value >>= 16;
3547 result |= 0x400;
3548 break;
3549 }
3550
3551 if ((value & ~(sljit_uw)0xff000000) == 0) {
3552 value >>= 24;
3553 result |= 0x600;
3554 break;
3555 }
3556
3557 if ((value & (sljit_uw)0xff) == 0xff && (value >> 16) == 0) {
3558 value >>= 8;
3559 result |= 0xc00;
3560 break;
3561 }
3562
3563 if ((value & (sljit_uw)0xffff) == 0xffff && (value >> 24) == 0) {
3564 value >>= 16;
3565 result |= 0xd00;
3566 break;
3567 }
3568
3569 if (result != 0)
3570 return ~(sljit_ins)0;
3571
3572 value = ~value;
3573 result = (1 << 5);
3574 }
3575 break;
3576 }
3577
3578 return ((sljit_ins)value & 0xf) | (((sljit_ins)value & 0x70) << 12) | (((sljit_ins)value & 0x80) << 21) | result;
3579 }
3580
sljit_emit_simd_replicate(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_sw srcw)3581 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
3582 sljit_s32 freg,
3583 sljit_s32 src, sljit_sw srcw)
3584 {
3585 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3586 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3587 sljit_ins ins, imm;
3588
3589 CHECK_ERROR();
3590 CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw));
3591
3592 ADJUST_LOCAL_OFFSET(src, srcw);
3593
3594 if (reg_size != 3 && reg_size != 4)
3595 return SLJIT_ERR_UNSUPPORTED;
3596
3597 if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2))
3598 return SLJIT_ERR_UNSUPPORTED;
3599
3600 if (type & SLJIT_SIMD_TEST)
3601 return SLJIT_SUCCESS;
3602
3603 if (reg_size == 4)
3604 freg = simd_get_quad_reg_index(freg);
3605
3606 if (src == SLJIT_IMM && srcw == 0)
3607 return push_inst32(compiler, VMOV_i | ((reg_size == 4) ? (1 << 6) : 0) | VD4(freg));
3608
3609 if (SLJIT_UNLIKELY(elem_size == 3)) {
3610 SLJIT_ASSERT(type & SLJIT_SIMD_FLOAT);
3611
3612 if (src & SLJIT_MEM) {
3613 FAIL_IF(emit_fop_mem(compiler, FPU_LOAD | SLJIT_32, freg, src, srcw));
3614 src = freg;
3615 } else if (freg != src)
3616 FAIL_IF(push_inst32(compiler, VORR | VD4(freg) | VN4(src) | VM4(src)));
3617
3618 freg += SLJIT_QUAD_OTHER_HALF(freg);
3619
3620 if (freg != src)
3621 return push_inst32(compiler, VORR | VD4(freg) | VN4(src) | VM4(src));
3622 return SLJIT_SUCCESS;
3623 }
3624
3625 if (src & SLJIT_MEM) {
3626 FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
3627
3628 ins = (sljit_ins)(elem_size << 6);
3629
3630 if (reg_size == 4)
3631 ins |= 1 << 5;
3632
3633 return push_inst32(compiler, VLD1_r | ins | VD4(freg) | RN4(src) | 0xf);
3634 }
3635
3636 if (type & SLJIT_SIMD_FLOAT) {
3637 SLJIT_ASSERT(elem_size == 2);
3638 ins = ((sljit_ins)freg_ebit_map[src] << (16 + 2 + 1)) | ((sljit_ins)1 << (16 + 2));
3639
3640 if (reg_size == 4)
3641 ins |= (sljit_ins)1 << 6;
3642
3643 return push_inst32(compiler, VDUP_s | ins | VD4(freg) | (sljit_ins)freg_map[src]);
3644 }
3645
3646 if (src == SLJIT_IMM) {
3647 if (elem_size < 2)
3648 srcw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1;
3649
3650 imm = simd_get_imm(elem_size, (sljit_uw)srcw);
3651
3652 if (imm != ~(sljit_ins)0) {
3653 if (reg_size == 4)
3654 imm |= (sljit_ins)1 << 6;
3655
3656 return push_inst32(compiler, VMOV_i | imm | VD4(freg));
3657 }
3658
3659 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw));
3660 src = TMP_REG1;
3661 }
3662
3663 switch (elem_size) {
3664 case 0:
3665 ins = 1 << 22;
3666 break;
3667 case 1:
3668 ins = 1 << 5;
3669 break;
3670 default:
3671 ins = 0;
3672 break;
3673 }
3674
3675 if (reg_size == 4)
3676 ins |= (sljit_ins)1 << 21;
3677
3678 return push_inst32(compiler, VDUP | ins | VN4(freg) | RT4(src));
3679 }
3680
sljit_emit_simd_lane_mov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 lane_index,sljit_s32 srcdst,sljit_sw srcdstw)3681 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
3682 sljit_s32 freg, sljit_s32 lane_index,
3683 sljit_s32 srcdst, sljit_sw srcdstw)
3684 {
3685 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3686 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3687 sljit_ins ins;
3688
3689 CHECK_ERROR();
3690 CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw));
3691
3692 ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3693
3694 if (reg_size != 3 && reg_size != 4)
3695 return SLJIT_ERR_UNSUPPORTED;
3696
3697 if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2))
3698 return SLJIT_ERR_UNSUPPORTED;
3699
3700 if (type & SLJIT_SIMD_TEST)
3701 return SLJIT_SUCCESS;
3702
3703 if (reg_size == 4)
3704 freg = simd_get_quad_reg_index(freg);
3705
3706 if (type & SLJIT_SIMD_LANE_ZERO) {
3707 ins = (reg_size == 3) ? 0 : ((sljit_ins)1 << 6);
3708
3709 if (type & SLJIT_SIMD_FLOAT) {
3710 if (elem_size == 3 && !(srcdst & SLJIT_MEM)) {
3711 if (lane_index == 1)
3712 freg += SLJIT_QUAD_OTHER_HALF(freg);
3713
3714 if (srcdst != freg)
3715 FAIL_IF(push_inst32(compiler, VORR | VD4(freg) | VN4(srcdst) | VM4(srcdst)));
3716
3717 freg += SLJIT_QUAD_OTHER_HALF(freg);
3718 return push_inst32(compiler, VMOV_i | VD4(freg));
3719 }
3720
3721 if (srcdst == freg || (elem_size == 3 && srcdst == (freg + SLJIT_QUAD_OTHER_HALF(freg)))) {
3722 FAIL_IF(push_inst32(compiler, VORR | ins | VD4(TMP_FREG2) | VN4(freg) | VM4(freg)));
3723 srcdst = TMP_FREG2;
3724 srcdstw = 0;
3725 }
3726 }
3727
3728 FAIL_IF(push_inst32(compiler, VMOV_i | ins | VD4(freg)));
3729 }
3730
3731 if (reg_size == 4 && lane_index >= (0x8 >> elem_size)) {
3732 lane_index -= (0x8 >> elem_size);
3733 freg += SLJIT_QUAD_OTHER_HALF(freg);
3734 }
3735
3736 if (srcdst & SLJIT_MEM) {
3737 if (elem_size == 3)
3738 return emit_fop_mem(compiler, ((type & SLJIT_SIMD_STORE) ? 0 : FPU_LOAD) | SLJIT_32, freg, srcdst, srcdstw);
3739
3740 FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
3741
3742 lane_index = lane_index << elem_size;
3743 ins = (sljit_ins)((elem_size << 10) | (lane_index << 5));
3744 return push_inst32(compiler, ((type & SLJIT_SIMD_STORE) ? VST1_s : VLD1_s) | ins | VD4(freg) | RN4(srcdst) | 0xf);
3745 }
3746
3747 if (type & SLJIT_SIMD_FLOAT) {
3748 if (elem_size == 3) {
3749 if (type & SLJIT_SIMD_STORE)
3750 return push_inst32(compiler, VORR | VD4(srcdst) | VN4(freg) | VM4(freg));
3751 return push_inst32(compiler, VMOV_F32 | SLJIT_32 | VD4(freg) | VM4(srcdst));
3752 }
3753
3754 if (type & SLJIT_SIMD_STORE) {
3755 if (freg_ebit_map[freg] == 0) {
3756 if (lane_index == 1)
3757 freg = SLJIT_F64_SECOND(freg);
3758
3759 return push_inst32(compiler, VMOV_F32 | VD4(srcdst) | VM4(freg));
3760 }
3761
3762 FAIL_IF(push_inst32(compiler, VMOV_s | (1 << 20) | ((sljit_ins)lane_index << 21) | VN4(freg) | RT4(TMP_REG1)));
3763 return push_inst32(compiler, VMOV | VN4(srcdst) | RT4(TMP_REG1));
3764 }
3765
3766 FAIL_IF(push_inst32(compiler, VMOV | (1 << 20) | VN4(srcdst) | RT4(TMP_REG1)));
3767 return push_inst32(compiler, VMOV_s | ((sljit_ins)lane_index << 21) | VN4(freg) | RT4(TMP_REG1));
3768 }
3769
3770 if (srcdst == SLJIT_IMM) {
3771 if (elem_size < 2)
3772 srcdstw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1;
3773
3774 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcdstw));
3775 srcdst = TMP_REG1;
3776 }
3777
3778 if (elem_size == 0)
3779 ins = 0x400000;
3780 else if (elem_size == 1)
3781 ins = 0x20;
3782 else
3783 ins = 0;
3784
3785 lane_index = lane_index << elem_size;
3786 ins |= (sljit_ins)(((lane_index & 0x4) << 19) | ((lane_index & 0x3) << 5));
3787
3788 if (type & SLJIT_SIMD_STORE) {
3789 ins |= (1 << 20);
3790
3791 if (elem_size < 2 && !(type & SLJIT_SIMD_LANE_SIGNED))
3792 ins |= (1 << 23);
3793 }
3794
3795 return push_inst32(compiler, VMOV_s | ins | VN4(freg) | RT4(srcdst));
3796 }
3797
sljit_emit_simd_lane_replicate(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_s32 src_lane_index)3798 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
3799 sljit_s32 freg,
3800 sljit_s32 src, sljit_s32 src_lane_index)
3801 {
3802 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3803 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3804 sljit_ins ins;
3805
3806 CHECK_ERROR();
3807 CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index));
3808
3809 if (reg_size != 3 && reg_size != 4)
3810 return SLJIT_ERR_UNSUPPORTED;
3811
3812 if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3813 return SLJIT_ERR_UNSUPPORTED;
3814
3815 if (type & SLJIT_SIMD_TEST)
3816 return SLJIT_SUCCESS;
3817
3818 if (reg_size == 4) {
3819 freg = simd_get_quad_reg_index(freg);
3820 src = simd_get_quad_reg_index(src);
3821
3822 if (src_lane_index >= (0x8 >> elem_size)) {
3823 src_lane_index -= (0x8 >> elem_size);
3824 src += SLJIT_QUAD_OTHER_HALF(src);
3825 }
3826 }
3827
3828 if (elem_size == 3) {
3829 if (freg != src)
3830 FAIL_IF(push_inst32(compiler, VORR | VD4(freg) | VN4(src) | VM4(src)));
3831
3832 freg += SLJIT_QUAD_OTHER_HALF(freg);
3833
3834 if (freg != src)
3835 return push_inst32(compiler, VORR | VD4(freg) | VN4(src) | VM4(src));
3836 return SLJIT_SUCCESS;
3837 }
3838
3839 ins = ((((sljit_ins)src_lane_index << 1) | 1) << (16 + elem_size));
3840
3841 if (reg_size == 4)
3842 ins |= (sljit_ins)1 << 6;
3843
3844 return push_inst32(compiler, VDUP_s | ins | VD4(freg) | VM4(src));
3845 }
3846
sljit_emit_simd_extend(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_sw srcw)3847 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
3848 sljit_s32 freg,
3849 sljit_s32 src, sljit_sw srcw)
3850 {
3851 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3852 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3853 sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
3854 sljit_s32 dst_reg;
3855
3856 CHECK_ERROR();
3857 CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw));
3858
3859 ADJUST_LOCAL_OFFSET(src, srcw);
3860
3861 if (reg_size != 3 && reg_size != 4)
3862 return SLJIT_ERR_UNSUPPORTED;
3863
3864 if ((type & SLJIT_SIMD_FLOAT) && (elem_size != 2 || elem2_size != 3))
3865 return SLJIT_ERR_UNSUPPORTED;
3866
3867 if (type & SLJIT_SIMD_TEST)
3868 return SLJIT_SUCCESS;
3869
3870 if (reg_size == 4)
3871 freg = simd_get_quad_reg_index(freg);
3872
3873 if (src & SLJIT_MEM) {
3874 FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
3875 if (reg_size == 4 && elem2_size - elem_size == 1)
3876 FAIL_IF(push_inst32(compiler, VLD1 | (0x7 << 8) | VD4(freg) | RN4(src) | 0xf));
3877 else
3878 FAIL_IF(push_inst32(compiler, VLD1_s | (sljit_ins)((reg_size - elem2_size + elem_size) << 10) | VD4(freg) | RN4(src) | 0xf));
3879 src = freg;
3880 } else if (reg_size == 4)
3881 src = simd_get_quad_reg_index(src);
3882
3883 if (!(type & SLJIT_SIMD_FLOAT)) {
3884 dst_reg = (reg_size == 4) ? freg : TMP_FREG2;
3885
3886 do {
3887 FAIL_IF(push_inst32(compiler, VSHLL | ((type & SLJIT_SIMD_EXTEND_SIGNED) ? 0 : (1 << 28))
3888 | ((sljit_ins)1 << (19 + elem_size)) | VD4(dst_reg) | VM4(src)));
3889 src = dst_reg;
3890 } while (++elem_size < elem2_size);
3891
3892 if (dst_reg == TMP_FREG2)
3893 return push_inst32(compiler, VORR | VD4(freg) | VN4(TMP_FREG2) | VM4(TMP_FREG2));
3894 return SLJIT_SUCCESS;
3895 }
3896
3897 /* No SIMD variant, must use VFP instead. */
3898 SLJIT_ASSERT(reg_size == 4);
3899
3900 if (freg == src) {
3901 freg += SLJIT_QUAD_OTHER_HALF(freg);
3902 FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | VD4(freg) | VM4(src) | 0x20));
3903 freg += SLJIT_QUAD_OTHER_HALF(freg);
3904 return push_inst32(compiler, VCVT_F64_F32 | VD4(freg) | VM4(src));
3905 }
3906
3907 FAIL_IF(push_inst32(compiler, VCVT_F64_F32 | VD4(freg) | VM4(src)));
3908 freg += SLJIT_QUAD_OTHER_HALF(freg);
3909 return push_inst32(compiler, VCVT_F64_F32 | VD4(freg) | VM4(src) | 0x20);
3910 }
3911
sljit_emit_simd_sign(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 dst,sljit_sw dstw)3912 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
3913 sljit_s32 freg,
3914 sljit_s32 dst, sljit_sw dstw)
3915 {
3916 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3917 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3918 sljit_ins ins, imms;
3919 sljit_s32 dst_r;
3920
3921 CHECK_ERROR();
3922 CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw));
3923
3924 ADJUST_LOCAL_OFFSET(dst, dstw);
3925
3926 if (reg_size != 3 && reg_size != 4)
3927 return SLJIT_ERR_UNSUPPORTED;
3928
3929 if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3930 return SLJIT_ERR_UNSUPPORTED;
3931
3932 if (type & SLJIT_SIMD_TEST)
3933 return SLJIT_SUCCESS;
3934
3935 switch (elem_size) {
3936 case 0:
3937 imms = 0x243219;
3938 ins = VSHR | (1 << 28) | (0x9 << 16);
3939 break;
3940 case 1:
3941 imms = (reg_size == 4) ? 0x243219 : 0x2231;
3942 ins = VSHR | (1 << 28) | (0x11 << 16);
3943 break;
3944 case 2:
3945 imms = (reg_size == 4) ? 0x2231 : 0x21;
3946 ins = VSHR | (1 << 28) | (0x21 << 16);
3947 break;
3948 default:
3949 imms = 0x21;
3950 ins = VSHR | (1 << 28) | (0x1 << 16) | (1 << 7);
3951 break;
3952 }
3953
3954 if (reg_size == 4) {
3955 freg = simd_get_quad_reg_index(freg);
3956 ins |= (sljit_ins)1 << 6;
3957 }
3958
3959 SLJIT_ASSERT((freg_map[TMP_FREG2] & 0x1) == 0);
3960 FAIL_IF(push_inst32(compiler, ins | VD4(TMP_FREG2) | VM4(freg)));
3961
3962 if (reg_size == 4 && elem_size > 0)
3963 FAIL_IF(push_inst32(compiler, VMOVN | ((sljit_ins)(elem_size - 1) << 18) | VD4(TMP_FREG2) | VM4(TMP_FREG2)));
3964
3965 ins = (reg_size == 4 && elem_size == 0) ? (1 << 6) : 0;
3966
3967 while (imms >= 0x100) {
3968 FAIL_IF(push_inst32(compiler, VSRA | (1 << 28) | ins | ((imms & 0xff) << 16) | VD4(TMP_FREG2) | VM4(TMP_FREG2)));
3969 imms >>= 8;
3970 }
3971
3972 FAIL_IF(push_inst32(compiler, VSRA | (1 << 28) | ins | (1 << 7) | (imms << 16) | VD4(TMP_FREG2) | VM4(TMP_FREG2)));
3973
3974 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
3975 FAIL_IF(push_inst32(compiler, VMOV_s | (1 << 20) | (1 << 23) | (0x2 << 21) | RT4(dst_r) | VN4(TMP_FREG2)));
3976
3977 if (reg_size == 4 && elem_size == 0) {
3978 SLJIT_ASSERT(freg_map[TMP_FREG2] + 1 == freg_map[TMP_FREG1]);
3979 FAIL_IF(push_inst32(compiler, VMOV_s | (1 << 20) | (1 << 23) | (0x2 << 21) | RT4(TMP_REG2)| VN4(TMP_FREG1)));
3980 FAIL_IF(push_inst32(compiler, ORR_W | RD4(dst_r) | RN4(dst_r) | RM4(TMP_REG2) | (0x2 << 12)));
3981 }
3982
3983 if (dst_r == TMP_REG1)
3984 return emit_op_mem(compiler, STORE | WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2);
3985
3986 return SLJIT_SUCCESS;
3987 }
3988
sljit_emit_simd_op2(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_freg,sljit_s32 src1_freg,sljit_s32 src2_freg)3989 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
3990 sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg)
3991 {
3992 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3993 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3994 sljit_ins ins = 0;
3995
3996 CHECK_ERROR();
3997 CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg));
3998
3999 if (reg_size != 3 && reg_size != 4)
4000 return SLJIT_ERR_UNSUPPORTED;
4001
4002 if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4003 return SLJIT_ERR_UNSUPPORTED;
4004
4005 switch (SLJIT_SIMD_GET_OPCODE(type)) {
4006 case SLJIT_SIMD_OP2_AND:
4007 ins = VAND;
4008 break;
4009 case SLJIT_SIMD_OP2_OR:
4010 ins = VORR;
4011 break;
4012 case SLJIT_SIMD_OP2_XOR:
4013 ins = VEOR;
4014 break;
4015 }
4016
4017 if (type & SLJIT_SIMD_TEST)
4018 return SLJIT_SUCCESS;
4019
4020 if (reg_size == 4) {
4021 dst_freg = simd_get_quad_reg_index(dst_freg);
4022 src1_freg = simd_get_quad_reg_index(src1_freg);
4023 src2_freg = simd_get_quad_reg_index(src2_freg);
4024 ins |= (sljit_ins)1 << 6;
4025 }
4026
4027 return push_inst32(compiler, ins | VD4(dst_freg) | VN4(src1_freg) | VM4(src2_freg));
4028 }
4029
4030 #undef FPU_LOAD
4031
sljit_emit_atomic_load(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 mem_reg)4032 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
4033 sljit_s32 dst_reg,
4034 sljit_s32 mem_reg)
4035 {
4036 sljit_ins ins;
4037
4038 CHECK_ERROR();
4039 CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
4040
4041 switch (GET_OPCODE(op)) {
4042 case SLJIT_MOV_U8:
4043 ins = LDREXB;
4044 break;
4045 case SLJIT_MOV_U16:
4046 ins = LDREXH;
4047 break;
4048 default:
4049 ins = LDREX;
4050 break;
4051 }
4052
4053 return push_inst32(compiler, ins | RN4(mem_reg) | RT4(dst_reg));
4054 }
4055
sljit_emit_atomic_store(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src_reg,sljit_s32 mem_reg,sljit_s32 temp_reg)4056 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op,
4057 sljit_s32 src_reg,
4058 sljit_s32 mem_reg,
4059 sljit_s32 temp_reg)
4060 {
4061 sljit_ins ins;
4062
4063 /* temp_reg == mem_reg is undefined so use another temp register */
4064 SLJIT_UNUSED_ARG(temp_reg);
4065
4066 CHECK_ERROR();
4067 CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
4068
4069 switch (GET_OPCODE(op)) {
4070 case SLJIT_MOV_U8:
4071 ins = STREXB | RM4(TMP_REG1);
4072 break;
4073 case SLJIT_MOV_U16:
4074 ins = STREXH | RM4(TMP_REG1);
4075 break;
4076 default:
4077 ins = STREX | RD4(TMP_REG1);
4078 break;
4079 }
4080
4081 FAIL_IF(push_inst32(compiler, ins | RN4(mem_reg) | RT4(src_reg)));
4082 if (op & SLJIT_SET_ATOMIC_STORED)
4083 return push_inst32(compiler, CMPI_W | RN4(TMP_REG1));
4084
4085 return SLJIT_SUCCESS;
4086 }
4087
sljit_emit_const(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_sw init_value)4088 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
4089 {
4090 struct sljit_const *const_;
4091 sljit_s32 dst_r;
4092
4093 CHECK_ERROR_PTR();
4094 CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
4095 ADJUST_LOCAL_OFFSET(dst, dstw);
4096
4097 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
4098 PTR_FAIL_IF(!const_);
4099 set_const(const_, compiler);
4100
4101 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
4102 PTR_FAIL_IF(emit_imm32_const(compiler, dst_r, (sljit_uw)init_value));
4103
4104 if (dst & SLJIT_MEM)
4105 PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2));
4106 return const_;
4107 }
4108
sljit_emit_put_label(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)4109 SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
4110 {
4111 struct sljit_put_label *put_label;
4112 sljit_s32 dst_r;
4113
4114 CHECK_ERROR_PTR();
4115 CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
4116 ADJUST_LOCAL_OFFSET(dst, dstw);
4117
4118 put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
4119 PTR_FAIL_IF(!put_label);
4120 set_put_label(put_label, compiler, 0);
4121
4122 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
4123 PTR_FAIL_IF(emit_imm32_const(compiler, dst_r, 0));
4124
4125 if (dst & SLJIT_MEM)
4126 PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2));
4127 return put_label;
4128 }
4129
sljit_set_jump_addr(sljit_uw addr,sljit_uw new_target,sljit_sw executable_offset)4130 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
4131 {
4132 sljit_u16 *inst = (sljit_u16*)addr;
4133 SLJIT_UNUSED_ARG(executable_offset);
4134
4135 SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 0);
4136 modify_imm32_const(inst, new_target);
4137 SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 1);
4138 inst = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
4139 SLJIT_CACHE_FLUSH(inst, inst + 4);
4140 }
4141
sljit_set_const(sljit_uw addr,sljit_sw new_constant,sljit_sw executable_offset)4142 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
4143 {
4144 sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
4145 }
4146