1 /*
2 * Stack-less Just-In-Time compiler
3 *
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #ifdef __SOFTFP__
28 #define ARM_ABI_INFO " ABI:softfp"
29 #else
30 #define ARM_ABI_INFO " ABI:hardfp"
31 #endif
32
sljit_get_platform_name(void)33 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
34 {
35 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
36 return "ARMv7" SLJIT_CPUINFO ARM_ABI_INFO;
37 #elif (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
38 return "ARMv6" SLJIT_CPUINFO ARM_ABI_INFO;
39 #else
40 #error "Internal error: Unknown ARM architecture"
41 #endif
42 }
43
44 /* Length of an instruction word. */
45 typedef sljit_u32 sljit_ins;
46
47 /* Last register + 1. */
48 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
49 #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
50 #define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 4)
51
52 #define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
53 #define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
54
55 /* In ARM instruction words.
56 Cache lines are usually 32 byte aligned. */
57 #define CONST_POOL_ALIGNMENT 8
58 #define CONST_POOL_EMPTY 0xffffffff
59
60 #define ALIGN_INSTRUCTION(ptr) \
61 (sljit_ins*)(((sljit_ins)(ptr) + (CONST_POOL_ALIGNMENT * sizeof(sljit_ins)) - 1) & ~((CONST_POOL_ALIGNMENT * sizeof(sljit_ins)) - 1))
62 #define MAX_DIFFERENCE(max_diff) \
63 (((max_diff) / (sljit_s32)sizeof(sljit_ins)) - (CONST_POOL_ALIGNMENT - 1))
64
65 /* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */
66 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
67 0, 0, 1, 2, 3, 11, 10, 9, 8, 7, 6, 5, 4, 13, 12, 14, 15
68 };
69
70 static const sljit_u8 freg_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) + 1] = {
71 0,
72 0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8,
73 7, 6,
74 0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8,
75 7, 6
76 };
77
78 static const sljit_u8 freg_ebit_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) + 1] = {
79 0,
80 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
81 0, 0,
82 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
83 1, 1
84 };
85
86 #define RM(rm) ((sljit_ins)reg_map[rm])
87 #define RM8(rm) ((sljit_ins)reg_map[rm] << 8)
88 #define RD(rd) ((sljit_ins)reg_map[rd] << 12)
89 #define RN(rn) ((sljit_ins)reg_map[rn] << 16)
90
91 #define VM(vm) (((sljit_ins)freg_map[vm]) | ((sljit_ins)freg_ebit_map[vm] << 5))
92 #define VD(vd) (((sljit_ins)freg_map[vd] << 12) | ((sljit_ins)freg_ebit_map[vd] << 22))
93 #define VN(vn) (((sljit_ins)freg_map[vn] << 16) | ((sljit_ins)freg_ebit_map[vn] << 7))
94
95 /* --------------------------------------------------------------------- */
96 /* Instrucion forms */
97 /* --------------------------------------------------------------------- */
98
99 /* The instruction includes the AL condition.
100 INST_NAME - CONDITIONAL remove this flag. */
101 #define COND_MASK 0xf0000000
102 #define CONDITIONAL 0xe0000000
103 #define PUSH_POOL 0xff000000
104
105 #define ADC 0xe0a00000
106 #define ADD 0xe0800000
107 #define AND 0xe0000000
108 #define B 0xea000000
109 #define BIC 0xe1c00000
110 #define BKPT 0xe1200070
111 #define BL 0xeb000000
112 #define BLX 0xe12fff30
113 #define BX 0xe12fff10
114 #define CLZ 0xe16f0f10
115 #define CMN 0xe1600000
116 #define CMP 0xe1400000
117 #define EOR 0xe0200000
118 #define LDR 0xe5100000
119 #define LDR_POST 0xe4100000
120 #define LDREX 0xe1900f9f
121 #define LDREXB 0xe1d00f9f
122 #define LDREXH 0xe1f00f9f
123 #define MOV 0xe1a00000
124 #define MUL 0xe0000090
125 #define MVN 0xe1e00000
126 #define NOP 0xe1a00000
127 #define ORR 0xe1800000
128 #define PUSH 0xe92d0000
129 #define POP 0xe8bd0000
130 #define REV 0xe6bf0f30
131 #define REV16 0xe6bf0fb0
132 #define RSB 0xe0600000
133 #define RSC 0xe0e00000
134 #define SBC 0xe0c00000
135 #define SMULL 0xe0c00090
136 #define STR 0xe5000000
137 #define STREX 0xe1800f90
138 #define STREXB 0xe1c00f90
139 #define STREXH 0xe1e00f90
140 #define SUB 0xe0400000
141 #define SXTB 0xe6af0070
142 #define SXTH 0xe6bf0070
143 #define TST 0xe1000000
144 #define UMULL 0xe0800090
145 #define UXTB 0xe6ef0070
146 #define UXTH 0xe6ff0070
147 #define VABS_F32 0xeeb00ac0
148 #define VADD_F32 0xee300a00
149 #define VAND 0xf2000110
150 #define VCMP_F32 0xeeb40a40
151 #define VCVT_F32_S32 0xeeb80ac0
152 #define VCVT_F32_U32 0xeeb80a40
153 #define VCVT_F64_F32 0xeeb70ac0
154 #define VCVT_S32_F32 0xeebd0ac0
155 #define VDIV_F32 0xee800a00
156 #define VDUP 0xee800b10
157 #define VDUP_s 0xf3b00c00
158 #define VEOR 0xf3000110
159 #define VLD1 0xf4200000
160 #define VLD1_r 0xf4a00c00
161 #define VLD1_s 0xf4a00000
162 #define VLDR_F32 0xed100a00
163 #define VMOV_F32 0xeeb00a40
164 #define VMOV 0xee000a10
165 #define VMOV2 0xec400a10
166 #define VMOV_i 0xf2800010
167 #define VMOV_s 0xee000b10
168 #define VMOVN 0xf3b20200
169 #define VMRS 0xeef1fa10
170 #define VMUL_F32 0xee200a00
171 #define VNEG_F32 0xeeb10a40
172 #define VORR 0xf2200110
173 #define VPOP 0xecbd0b00
174 #define VPUSH 0xed2d0b00
175 #define VSHLL 0xf2800a10
176 #define VSHR 0xf2800010
177 #define VSRA 0xf2800110
178 #define VST1 0xf4000000
179 #define VST1_s 0xf4800000
180 #define VSTR_F32 0xed000a00
181 #define VSUB_F32 0xee300a40
182
183 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
184 /* Arm v7 specific instructions. */
185 #define MOVT 0xe3400000
186 #define MOVW 0xe3000000
187 #define RBIT 0xe6ff0f30
188 #endif
189
190 #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
191
function_check_is_freg(struct sljit_compiler * compiler,sljit_s32 fr,sljit_s32 is_32)192 static sljit_s32 function_check_is_freg(struct sljit_compiler *compiler, sljit_s32 fr, sljit_s32 is_32)
193 {
194 if (compiler->scratches == -1)
195 return 0;
196
197 if (is_32 && fr >= SLJIT_F64_SECOND(SLJIT_FR0))
198 fr -= SLJIT_F64_SECOND(0);
199
200 return (fr >= SLJIT_FR0 && fr < (SLJIT_FR0 + compiler->fscratches))
201 || (fr > (SLJIT_FS0 - compiler->fsaveds) && fr <= SLJIT_FS0)
202 || (fr >= SLJIT_TMP_FREGISTER_BASE && fr < (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS));
203 }
204
205 #endif /* SLJIT_ARGUMENT_CHECKS */
206
207 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
208
push_cpool(struct sljit_compiler * compiler)209 static sljit_s32 push_cpool(struct sljit_compiler *compiler)
210 {
211 /* Pushing the constant pool into the instruction stream. */
212 sljit_ins* inst;
213 sljit_uw* cpool_ptr;
214 sljit_uw* cpool_end;
215 sljit_s32 i;
216
217 /* The label could point the address after the constant pool. */
218 if (compiler->last_label && compiler->last_label->size == compiler->size)
219 compiler->last_label->size += compiler->cpool_fill + (CONST_POOL_ALIGNMENT - 1) + 1;
220
221 SLJIT_ASSERT(compiler->cpool_fill > 0 && compiler->cpool_fill <= CPOOL_SIZE);
222 inst = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
223 FAIL_IF(!inst);
224 compiler->size++;
225 *inst = 0xff000000 | compiler->cpool_fill;
226
227 for (i = 0; i < CONST_POOL_ALIGNMENT - 1; i++) {
228 inst = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
229 FAIL_IF(!inst);
230 compiler->size++;
231 *inst = 0;
232 }
233
234 cpool_ptr = compiler->cpool;
235 cpool_end = cpool_ptr + compiler->cpool_fill;
236 while (cpool_ptr < cpool_end) {
237 inst = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
238 FAIL_IF(!inst);
239 compiler->size++;
240 *inst = *cpool_ptr++;
241 }
242 compiler->cpool_diff = CONST_POOL_EMPTY;
243 compiler->cpool_fill = 0;
244 return SLJIT_SUCCESS;
245 }
246
push_inst(struct sljit_compiler * compiler,sljit_ins inst)247 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins inst)
248 {
249 sljit_ins* ptr;
250
251 if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)))
252 FAIL_IF(push_cpool(compiler));
253
254 ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
255 FAIL_IF(!ptr);
256 compiler->size++;
257 *ptr = inst;
258 return SLJIT_SUCCESS;
259 }
260
push_inst_with_literal(struct sljit_compiler * compiler,sljit_ins inst,sljit_uw literal)261 static sljit_s32 push_inst_with_literal(struct sljit_compiler *compiler, sljit_ins inst, sljit_uw literal)
262 {
263 sljit_ins* ptr;
264 sljit_uw cpool_index = CPOOL_SIZE;
265 sljit_uw* cpool_ptr;
266 sljit_uw* cpool_end;
267 sljit_u8* cpool_unique_ptr;
268
269 if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)))
270 FAIL_IF(push_cpool(compiler));
271 else if (compiler->cpool_fill > 0) {
272 cpool_ptr = compiler->cpool;
273 cpool_end = cpool_ptr + compiler->cpool_fill;
274 cpool_unique_ptr = compiler->cpool_unique;
275 do {
276 if ((*cpool_ptr == literal) && !(*cpool_unique_ptr)) {
277 cpool_index = (sljit_uw)(cpool_ptr - compiler->cpool);
278 break;
279 }
280 cpool_ptr++;
281 cpool_unique_ptr++;
282 } while (cpool_ptr < cpool_end);
283 }
284
285 if (cpool_index == CPOOL_SIZE) {
286 /* Must allocate a new entry in the literal pool. */
287 if (compiler->cpool_fill < CPOOL_SIZE) {
288 cpool_index = compiler->cpool_fill;
289 compiler->cpool_fill++;
290 }
291 else {
292 FAIL_IF(push_cpool(compiler));
293 cpool_index = 0;
294 compiler->cpool_fill = 1;
295 }
296 }
297
298 SLJIT_ASSERT((inst & 0xfff) == 0);
299 ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
300 FAIL_IF(!ptr);
301 compiler->size++;
302 *ptr = inst | cpool_index;
303
304 compiler->cpool[cpool_index] = literal;
305 compiler->cpool_unique[cpool_index] = 0;
306 if (compiler->cpool_diff == CONST_POOL_EMPTY)
307 compiler->cpool_diff = compiler->size;
308 return SLJIT_SUCCESS;
309 }
310
push_inst_with_unique_literal(struct sljit_compiler * compiler,sljit_ins inst,sljit_uw literal)311 static sljit_s32 push_inst_with_unique_literal(struct sljit_compiler *compiler, sljit_ins inst, sljit_uw literal)
312 {
313 sljit_ins* ptr;
314
315 if (SLJIT_UNLIKELY((compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)) || compiler->cpool_fill >= CPOOL_SIZE))
316 FAIL_IF(push_cpool(compiler));
317
318 SLJIT_ASSERT(compiler->cpool_fill < CPOOL_SIZE && (inst & 0xfff) == 0);
319 ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
320 FAIL_IF(!ptr);
321 compiler->size++;
322 *ptr = inst | compiler->cpool_fill;
323
324 compiler->cpool[compiler->cpool_fill] = literal;
325 compiler->cpool_unique[compiler->cpool_fill] = 1;
326 compiler->cpool_fill++;
327 if (compiler->cpool_diff == CONST_POOL_EMPTY)
328 compiler->cpool_diff = compiler->size;
329 return SLJIT_SUCCESS;
330 }
331
prepare_blx(struct sljit_compiler * compiler)332 static SLJIT_INLINE sljit_s32 prepare_blx(struct sljit_compiler *compiler)
333 {
334 /* Place for at least two instruction (doesn't matter whether the first has a literal). */
335 if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4088)))
336 return push_cpool(compiler);
337 return SLJIT_SUCCESS;
338 }
339
emit_blx(struct sljit_compiler * compiler)340 static SLJIT_INLINE sljit_s32 emit_blx(struct sljit_compiler *compiler)
341 {
342 /* Must follow tightly the previous instruction (to be able to convert it to bl instruction). */
343 SLJIT_ASSERT(compiler->cpool_diff == CONST_POOL_EMPTY || compiler->size - compiler->cpool_diff < MAX_DIFFERENCE(4092));
344 SLJIT_ASSERT(reg_map[TMP_REG1] != 14);
345
346 return push_inst(compiler, BLX | RM(TMP_REG1));
347 }
348
patch_pc_relative_loads(sljit_uw * last_pc_patch,sljit_uw * code_ptr,sljit_uw * const_pool,sljit_uw cpool_size)349 static sljit_uw patch_pc_relative_loads(sljit_uw *last_pc_patch, sljit_uw *code_ptr, sljit_uw* const_pool, sljit_uw cpool_size)
350 {
351 sljit_uw diff;
352 sljit_uw ind;
353 sljit_uw counter = 0;
354 sljit_uw* clear_const_pool = const_pool;
355 sljit_uw* clear_const_pool_end = const_pool + cpool_size;
356
357 SLJIT_ASSERT(const_pool - code_ptr <= CONST_POOL_ALIGNMENT);
358 /* Set unused flag for all literals in the constant pool.
359 I.e.: unused literals can belong to branches, which can be encoded as B or BL.
360 We can "compress" the constant pool by discarding these literals. */
361 while (clear_const_pool < clear_const_pool_end)
362 *clear_const_pool++ = (sljit_uw)(-1);
363
364 while (last_pc_patch < code_ptr) {
365 /* Data transfer instruction with Rn == r15. */
366 if ((*last_pc_patch & 0x0e0f0000) == 0x040f0000) {
367 diff = (sljit_uw)(const_pool - last_pc_patch);
368 ind = (*last_pc_patch) & 0xfff;
369
370 /* Must be a load instruction with immediate offset. */
371 SLJIT_ASSERT(ind < cpool_size && !(*last_pc_patch & (1 << 25)) && (*last_pc_patch & (1 << 20)));
372 if ((sljit_s32)const_pool[ind] < 0) {
373 const_pool[ind] = counter;
374 ind = counter;
375 counter++;
376 }
377 else
378 ind = const_pool[ind];
379
380 SLJIT_ASSERT(diff >= 1);
381 if (diff >= 2 || ind > 0) {
382 diff = (diff + (sljit_uw)ind - 2) << 2;
383 SLJIT_ASSERT(diff <= 0xfff);
384 *last_pc_patch = (*last_pc_patch & ~(sljit_uw)0xfff) | diff;
385 }
386 else
387 *last_pc_patch = (*last_pc_patch & ~(sljit_uw)(0xfff | (1 << 23))) | 0x004;
388 }
389 last_pc_patch++;
390 }
391 return counter;
392 }
393
394 /* In some rare ocasions we may need future patches. The probability is close to 0 in practice. */
395 struct future_patch {
396 struct future_patch* next;
397 sljit_s32 index;
398 sljit_s32 value;
399 };
400
resolve_const_pool_index(struct sljit_compiler * compiler,struct future_patch ** first_patch,sljit_uw cpool_current_index,sljit_uw * cpool_start_address,sljit_uw * buf_ptr)401 static sljit_s32 resolve_const_pool_index(struct sljit_compiler *compiler, struct future_patch **first_patch, sljit_uw cpool_current_index, sljit_uw *cpool_start_address, sljit_uw *buf_ptr)
402 {
403 sljit_u32 value;
404 struct future_patch *curr_patch, *prev_patch;
405
406 SLJIT_UNUSED_ARG(compiler);
407
408 /* Using the values generated by patch_pc_relative_loads. */
409 if (!*first_patch)
410 value = cpool_start_address[cpool_current_index];
411 else {
412 curr_patch = *first_patch;
413 prev_patch = NULL;
414 while (1) {
415 if (!curr_patch) {
416 value = cpool_start_address[cpool_current_index];
417 break;
418 }
419 if ((sljit_uw)curr_patch->index == cpool_current_index) {
420 value = (sljit_uw)curr_patch->value;
421 if (prev_patch)
422 prev_patch->next = curr_patch->next;
423 else
424 *first_patch = curr_patch->next;
425 SLJIT_FREE(curr_patch, compiler->allocator_data);
426 break;
427 }
428 prev_patch = curr_patch;
429 curr_patch = curr_patch->next;
430 }
431 }
432
433 if ((sljit_sw)value >= 0) {
434 if (value > cpool_current_index) {
435 curr_patch = (struct future_patch*)SLJIT_MALLOC(sizeof(struct future_patch), compiler->allocator_data);
436 if (!curr_patch) {
437 while (*first_patch) {
438 curr_patch = *first_patch;
439 *first_patch = (*first_patch)->next;
440 SLJIT_FREE(curr_patch, compiler->allocator_data);
441 }
442 return SLJIT_ERR_ALLOC_FAILED;
443 }
444 curr_patch->next = *first_patch;
445 curr_patch->index = (sljit_sw)value;
446 curr_patch->value = (sljit_sw)cpool_start_address[value];
447 *first_patch = curr_patch;
448 }
449 cpool_start_address[value] = *buf_ptr;
450 }
451 return SLJIT_SUCCESS;
452 }
453
454 #else
455
push_inst(struct sljit_compiler * compiler,sljit_ins inst)456 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins inst)
457 {
458 sljit_ins* ptr;
459
460 ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
461 FAIL_IF(!ptr);
462 compiler->size++;
463 *ptr = inst;
464 return SLJIT_SUCCESS;
465 }
466
emit_imm(struct sljit_compiler * compiler,sljit_s32 reg,sljit_sw imm)467 static SLJIT_INLINE sljit_s32 emit_imm(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm)
468 {
469 FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | ((sljit_u32)imm & 0xfff)));
470 return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | (((sljit_u32)imm >> 16) & 0xfff));
471 }
472
473 #endif
474
detect_jump_type(struct sljit_jump * jump,sljit_uw * code_ptr,sljit_uw * code,sljit_sw executable_offset)475 static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_uw *code_ptr, sljit_uw *code, sljit_sw executable_offset)
476 {
477 sljit_sw diff;
478
479 if (jump->flags & SLJIT_REWRITABLE_JUMP)
480 return 0;
481
482 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
483 if (jump->flags & IS_BL)
484 code_ptr--;
485
486 if (jump->flags & JUMP_ADDR)
487 diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2) - executable_offset);
488 else {
489 SLJIT_ASSERT(jump->flags & JUMP_LABEL);
490 diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2));
491 }
492
493 /* Branch to Thumb code has not been optimized yet. */
494 if (diff & 0x3)
495 return 0;
496
497 if (jump->flags & IS_BL) {
498 if (diff <= 0x01ffffff && diff >= -0x02000000) {
499 *code_ptr = (BL - CONDITIONAL) | (*(code_ptr + 1) & COND_MASK);
500 jump->flags |= PATCH_B;
501 return 1;
502 }
503 }
504 else {
505 if (diff <= 0x01ffffff && diff >= -0x02000000) {
506 *code_ptr = (B - CONDITIONAL) | (*code_ptr & COND_MASK);
507 jump->flags |= PATCH_B;
508 }
509 }
510 #else /* !SLJIT_CONFIG_ARM_V6 */
511 if (jump->flags & JUMP_ADDR)
512 diff = ((sljit_sw)jump->u.target - (sljit_sw)code_ptr - executable_offset);
513 else {
514 SLJIT_ASSERT(jump->flags & JUMP_LABEL);
515 diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)code_ptr);
516 }
517
518 /* Branch to Thumb code has not been optimized yet. */
519 if (diff & 0x3)
520 return 0;
521
522 if (diff <= 0x01ffffff && diff >= -0x02000000) {
523 code_ptr -= 2;
524 *code_ptr = ((jump->flags & IS_BL) ? (BL - CONDITIONAL) : (B - CONDITIONAL)) | (code_ptr[2] & COND_MASK);
525 jump->flags |= PATCH_B;
526 return 1;
527 }
528 #endif /* SLJIT_CONFIG_ARM_V6 */
529 return 0;
530 }
531
inline_set_jump_addr(sljit_uw jump_ptr,sljit_sw executable_offset,sljit_uw new_addr,sljit_s32 flush_cache)532 static SLJIT_INLINE void inline_set_jump_addr(sljit_uw jump_ptr, sljit_sw executable_offset, sljit_uw new_addr, sljit_s32 flush_cache)
533 {
534 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
535 sljit_ins *ptr = (sljit_ins*)jump_ptr;
536 sljit_ins *inst = (sljit_ins*)ptr[0];
537 sljit_ins mov_pc = ptr[1];
538 sljit_s32 bl = (mov_pc & 0x0000f000) != RD(TMP_PC);
539 sljit_sw diff = (sljit_sw)(((sljit_sw)new_addr - (sljit_sw)(inst + 2) - executable_offset) >> 2);
540
541 SLJIT_UNUSED_ARG(executable_offset);
542
543 if (diff <= 0x7fffff && diff >= -0x800000) {
544 /* Turn to branch. */
545 if (!bl) {
546 if (flush_cache) {
547 SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 0);
548 }
549 inst[0] = (mov_pc & COND_MASK) | (B - CONDITIONAL) | (diff & 0xffffff);
550 if (flush_cache) {
551 SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1);
552 inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
553 SLJIT_CACHE_FLUSH(inst, inst + 1);
554 }
555 } else {
556 if (flush_cache) {
557 SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0);
558 }
559 inst[0] = (mov_pc & COND_MASK) | (BL - CONDITIONAL) | (diff & 0xffffff);
560 inst[1] = NOP;
561 if (flush_cache) {
562 SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1);
563 inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
564 SLJIT_CACHE_FLUSH(inst, inst + 2);
565 }
566 }
567 } else {
568 /* Get the position of the constant. */
569 if (mov_pc & (1 << 23))
570 ptr = inst + ((mov_pc & 0xfff) >> 2) + 2;
571 else
572 ptr = inst + 1;
573
574 if (*inst != mov_pc) {
575 if (flush_cache) {
576 SLJIT_UPDATE_WX_FLAGS(inst, inst + (!bl ? 1 : 2), 0);
577 }
578 inst[0] = mov_pc;
579 if (!bl) {
580 if (flush_cache) {
581 SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1);
582 inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
583 SLJIT_CACHE_FLUSH(inst, inst + 1);
584 }
585 } else {
586 inst[1] = BLX | RM(TMP_REG1);
587 if (flush_cache) {
588 SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1);
589 inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
590 SLJIT_CACHE_FLUSH(inst, inst + 2);
591 }
592 }
593 }
594
595 if (flush_cache) {
596 SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 0);
597 }
598
599 *ptr = new_addr;
600
601 if (flush_cache) {
602 SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1);
603 }
604 }
605 #else /* !SLJIT_CONFIG_ARM_V6 */
606 sljit_ins *inst = (sljit_ins*)jump_ptr;
607
608 SLJIT_UNUSED_ARG(executable_offset);
609
610 SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT);
611
612 if (flush_cache) {
613 SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0);
614 }
615
616 inst[0] = MOVW | (inst[0] & 0xf000) | ((new_addr << 4) & 0xf0000) | (new_addr & 0xfff);
617 inst[1] = MOVT | (inst[1] & 0xf000) | ((new_addr >> 12) & 0xf0000) | ((new_addr >> 16) & 0xfff);
618
619 if (flush_cache) {
620 SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1);
621 inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
622 SLJIT_CACHE_FLUSH(inst, inst + 2);
623 }
624 #endif /* SLJIT_CONFIG_ARM_V6 */
625 }
626
627 static sljit_uw get_imm(sljit_uw imm);
628 static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm);
629 static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg);
630
inline_set_const(sljit_uw addr,sljit_sw executable_offset,sljit_uw new_constant,sljit_s32 flush_cache)631 static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw executable_offset, sljit_uw new_constant, sljit_s32 flush_cache)
632 {
633 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
634 sljit_ins *ptr = (sljit_ins*)addr;
635 sljit_ins *inst = (sljit_ins*)ptr[0];
636 sljit_uw ldr_literal = ptr[1];
637 sljit_uw src2;
638
639 SLJIT_UNUSED_ARG(executable_offset);
640
641 src2 = get_imm(new_constant);
642 if (src2) {
643 if (flush_cache) {
644 SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 0);
645 }
646
647 *inst = 0xe3a00000 | (ldr_literal & 0xf000) | src2;
648
649 if (flush_cache) {
650 SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1);
651 inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
652 SLJIT_CACHE_FLUSH(inst, inst + 1);
653 }
654 return;
655 }
656
657 src2 = get_imm(~new_constant);
658 if (src2) {
659 if (flush_cache) {
660 SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 0);
661 }
662
663 *inst = 0xe3e00000 | (ldr_literal & 0xf000) | src2;
664
665 if (flush_cache) {
666 SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1);
667 inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
668 SLJIT_CACHE_FLUSH(inst, inst + 1);
669 }
670 return;
671 }
672
673 if (ldr_literal & (1 << 23))
674 ptr = inst + ((ldr_literal & 0xfff) >> 2) + 2;
675 else
676 ptr = inst + 1;
677
678 if (*inst != ldr_literal) {
679 if (flush_cache) {
680 SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 0);
681 }
682
683 *inst = ldr_literal;
684
685 if (flush_cache) {
686 SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1);
687 inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
688 SLJIT_CACHE_FLUSH(inst, inst + 1);
689 }
690 }
691
692 if (flush_cache) {
693 SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 0);
694 }
695
696 *ptr = new_constant;
697
698 if (flush_cache) {
699 SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1);
700 }
701 #else /* !SLJIT_CONFIG_ARM_V6 */
702 sljit_ins *inst = (sljit_ins*)addr;
703
704 SLJIT_UNUSED_ARG(executable_offset);
705
706 SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT);
707
708 if (flush_cache) {
709 SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0);
710 }
711
712 inst[0] = MOVW | (inst[0] & 0xf000) | ((new_constant << 4) & 0xf0000) | (new_constant & 0xfff);
713 inst[1] = MOVT | (inst[1] & 0xf000) | ((new_constant >> 12) & 0xf0000) | ((new_constant >> 16) & 0xfff);
714
715 if (flush_cache) {
716 SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1);
717 inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
718 SLJIT_CACHE_FLUSH(inst, inst + 2);
719 }
720 #endif /* SLJIT_CONFIG_ARM_V6 */
721 }
722
sljit_generate_code(struct sljit_compiler * compiler)723 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
724 {
725 struct sljit_memory_fragment *buf;
726 sljit_ins *code;
727 sljit_ins *code_ptr;
728 sljit_ins *buf_ptr;
729 sljit_ins *buf_end;
730 sljit_uw size;
731 sljit_uw word_count;
732 sljit_uw next_addr;
733 sljit_sw executable_offset;
734 sljit_uw addr;
735 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
736 sljit_uw cpool_size;
737 sljit_uw cpool_skip_alignment;
738 sljit_uw cpool_current_index;
739 sljit_ins *cpool_start_address;
740 sljit_ins *last_pc_patch;
741 struct future_patch *first_patch;
742 #endif
743
744 struct sljit_label *label;
745 struct sljit_jump *jump;
746 struct sljit_const *const_;
747 struct sljit_put_label *put_label;
748
749 CHECK_ERROR_PTR();
750 CHECK_PTR(check_sljit_generate_code(compiler));
751 reverse_buf(compiler);
752
753 /* Second code generation pass. */
754 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
755 size = compiler->size + (compiler->patches << 1);
756 if (compiler->cpool_fill > 0)
757 size += compiler->cpool_fill + CONST_POOL_ALIGNMENT - 1;
758 #else /* !SLJIT_CONFIG_ARM_V6 */
759 size = compiler->size;
760 #endif /* SLJIT_CONFIG_ARM_V6 */
761 code = (sljit_ins*)SLJIT_MALLOC_EXEC(size * sizeof(sljit_ins), compiler->exec_allocator_data);
762 PTR_FAIL_WITH_EXEC_IF(code);
763 buf = compiler->buf;
764
765 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
766 cpool_size = 0;
767 cpool_skip_alignment = 0;
768 cpool_current_index = 0;
769 cpool_start_address = NULL;
770 first_patch = NULL;
771 last_pc_patch = code;
772 #endif /* SLJIT_CONFIG_ARM_V6 */
773
774 code_ptr = code;
775 word_count = 0;
776 next_addr = 1;
777 executable_offset = SLJIT_EXEC_OFFSET(code);
778
779 label = compiler->labels;
780 jump = compiler->jumps;
781 const_ = compiler->consts;
782 put_label = compiler->put_labels;
783
784 if (label && label->size == 0) {
785 label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
786 label = label->next;
787 }
788
789 do {
790 buf_ptr = (sljit_ins*)buf->memory;
791 buf_end = buf_ptr + (buf->used_size >> 2);
792 do {
793 word_count++;
794 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
795 if (cpool_size > 0) {
796 if (cpool_skip_alignment > 0) {
797 buf_ptr++;
798 cpool_skip_alignment--;
799 }
800 else {
801 if (SLJIT_UNLIKELY(resolve_const_pool_index(compiler, &first_patch, cpool_current_index, cpool_start_address, buf_ptr))) {
802 SLJIT_FREE_EXEC(code, compiler->exec_allocator_data);
803 compiler->error = SLJIT_ERR_ALLOC_FAILED;
804 return NULL;
805 }
806 buf_ptr++;
807 if (++cpool_current_index >= cpool_size) {
808 SLJIT_ASSERT(!first_patch);
809 cpool_size = 0;
810 if (label && label->size == word_count) {
811 /* Points after the current instruction. */
812 label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
813 label->size = (sljit_uw)(code_ptr - code);
814 label = label->next;
815
816 next_addr = compute_next_addr(label, jump, const_, put_label);
817 }
818 }
819 }
820 }
821 else if ((*buf_ptr & 0xff000000) != PUSH_POOL) {
822 #endif /* SLJIT_CONFIG_ARM_V6 */
823 *code_ptr = *buf_ptr++;
824 if (next_addr == word_count) {
825 SLJIT_ASSERT(!label || label->size >= word_count);
826 SLJIT_ASSERT(!jump || jump->addr >= word_count);
827 SLJIT_ASSERT(!const_ || const_->addr >= word_count);
828 SLJIT_ASSERT(!put_label || put_label->addr >= word_count);
829
830 /* These structures are ordered by their address. */
831 if (jump && jump->addr == word_count) {
832 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
833 if (detect_jump_type(jump, code_ptr, code, executable_offset))
834 code_ptr--;
835 jump->addr = (sljit_uw)code_ptr;
836 #else /* !SLJIT_CONFIG_ARM_V6 */
837 jump->addr = (sljit_uw)(code_ptr - 2);
838 if (detect_jump_type(jump, code_ptr, code, executable_offset))
839 code_ptr -= 2;
840 #endif /* SLJIT_CONFIG_ARM_V6 */
841 jump = jump->next;
842 }
843 if (label && label->size == word_count) {
844 /* code_ptr can be affected above. */
845 label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr + 1, executable_offset);
846 label->size = (sljit_uw)((code_ptr + 1) - code);
847 label = label->next;
848 }
849 if (const_ && const_->addr == word_count) {
850 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
851 const_->addr = (sljit_uw)code_ptr;
852 #else /* !SLJIT_CONFIG_ARM_V6 */
853 const_->addr = (sljit_uw)(code_ptr - 1);
854 #endif /* SLJIT_CONFIG_ARM_V6 */
855 const_ = const_->next;
856 }
857 if (put_label && put_label->addr == word_count) {
858 SLJIT_ASSERT(put_label->label);
859 put_label->addr = (sljit_uw)code_ptr;
860 put_label = put_label->next;
861 }
862 next_addr = compute_next_addr(label, jump, const_, put_label);
863 }
864 code_ptr++;
865 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
866 } else {
867 /* Fortunately, no need to shift. */
868 cpool_size = *buf_ptr++ & ~PUSH_POOL;
869 SLJIT_ASSERT(cpool_size > 0);
870 cpool_start_address = ALIGN_INSTRUCTION(code_ptr + 1);
871 cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, cpool_size);
872 if (cpool_current_index > 0) {
873 /* Unconditional branch. */
874 *code_ptr = B | (((sljit_ins)(cpool_start_address - code_ptr) + cpool_current_index - 2) & ~PUSH_POOL);
875 code_ptr = (sljit_ins*)(cpool_start_address + cpool_current_index);
876 }
877 cpool_skip_alignment = CONST_POOL_ALIGNMENT - 1;
878 cpool_current_index = 0;
879 last_pc_patch = code_ptr;
880 }
881 #endif /* SLJIT_CONFIG_ARM_V6 */
882 } while (buf_ptr < buf_end);
883 buf = buf->next;
884 } while (buf);
885
886 SLJIT_ASSERT(!label);
887 SLJIT_ASSERT(!jump);
888 SLJIT_ASSERT(!const_);
889 SLJIT_ASSERT(!put_label);
890
891 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
892 SLJIT_ASSERT(cpool_size == 0);
893 if (compiler->cpool_fill > 0) {
894 cpool_start_address = ALIGN_INSTRUCTION(code_ptr);
895 cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, compiler->cpool_fill);
896 if (cpool_current_index > 0)
897 code_ptr = (sljit_ins*)(cpool_start_address + cpool_current_index);
898
899 buf_ptr = compiler->cpool;
900 buf_end = buf_ptr + compiler->cpool_fill;
901 cpool_current_index = 0;
902 while (buf_ptr < buf_end) {
903 if (SLJIT_UNLIKELY(resolve_const_pool_index(compiler, &first_patch, cpool_current_index, cpool_start_address, buf_ptr))) {
904 SLJIT_FREE_EXEC(code, compiler->exec_allocator_data);
905 compiler->error = SLJIT_ERR_ALLOC_FAILED;
906 return NULL;
907 }
908 buf_ptr++;
909 cpool_current_index++;
910 }
911 SLJIT_ASSERT(!first_patch);
912 }
913 #endif
914
915 jump = compiler->jumps;
916 while (jump) {
917 buf_ptr = (sljit_ins*)jump->addr;
918
919 if (jump->flags & PATCH_B) {
920 addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset);
921 if (!(jump->flags & JUMP_ADDR)) {
922 SLJIT_ASSERT(jump->flags & JUMP_LABEL);
923 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - addr) <= 0x01ffffff && (sljit_sw)(jump->u.label->addr - addr) >= -0x02000000);
924 *buf_ptr |= ((jump->u.label->addr - addr) >> 2) & 0x00ffffff;
925 }
926 else {
927 SLJIT_ASSERT((sljit_sw)(jump->u.target - addr) <= 0x01ffffff && (sljit_sw)(jump->u.target - addr) >= -0x02000000);
928 *buf_ptr |= ((jump->u.target - addr) >> 2) & 0x00ffffff;
929 }
930 }
931 else if (jump->flags & SLJIT_REWRITABLE_JUMP) {
932 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
933 jump->addr = (sljit_uw)code_ptr;
934 code_ptr[0] = (sljit_ins)buf_ptr;
935 code_ptr[1] = *buf_ptr;
936 inline_set_jump_addr((sljit_uw)code_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
937 code_ptr += 2;
938 #else /* !SLJIT_CONFIG_ARM_V6 */
939 inline_set_jump_addr((sljit_uw)buf_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
940 #endif /* SLJIT_CONFIG_ARM_V6 */
941 } else {
942 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
943 if (jump->flags & IS_BL)
944 buf_ptr--;
945 if (*buf_ptr & (1 << 23))
946 buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2;
947 else
948 buf_ptr += 1;
949 *buf_ptr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
950 #else /* !SLJIT_CONFIG_ARM_V6 */
951 inline_set_jump_addr((sljit_uw)buf_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
952 #endif /* SLJIT_CONFIG_ARM_V6 */
953 }
954 jump = jump->next;
955 }
956
957 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
958 const_ = compiler->consts;
959 while (const_) {
960 buf_ptr = (sljit_ins*)const_->addr;
961 const_->addr = (sljit_uw)code_ptr;
962
963 code_ptr[0] = (sljit_ins)buf_ptr;
964 code_ptr[1] = *buf_ptr;
965 if (*buf_ptr & (1 << 23))
966 buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2;
967 else
968 buf_ptr += 1;
969 /* Set the value again (can be a simple constant). */
970 inline_set_const((sljit_uw)code_ptr, executable_offset, *buf_ptr, 0);
971 code_ptr += 2;
972
973 const_ = const_->next;
974 }
975 #endif /* SLJIT_CONFIG_ARM_V6 */
976
977 put_label = compiler->put_labels;
978 while (put_label) {
979 addr = put_label->label->addr;
980 buf_ptr = (sljit_ins*)put_label->addr;
981
982 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
983 SLJIT_ASSERT((buf_ptr[0] & 0xffff0000) == 0xe59f0000);
984 buf_ptr[((buf_ptr[0] & 0xfff) >> 2) + 2] = addr;
985 #else /* !SLJIT_CONFIG_ARM_V6 */
986 SLJIT_ASSERT((buf_ptr[-1] & 0xfff00000) == MOVW && (buf_ptr[0] & 0xfff00000) == MOVT);
987 buf_ptr[-1] |= ((addr << 4) & 0xf0000) | (addr & 0xfff);
988 buf_ptr[0] |= ((addr >> 12) & 0xf0000) | ((addr >> 16) & 0xfff);
989 #endif /* SLJIT_CONFIG_ARM_V6 */
990 put_label = put_label->next;
991 }
992
993 SLJIT_ASSERT(code_ptr - code <= (sljit_s32)size);
994
995 compiler->error = SLJIT_ERR_COMPILED;
996 compiler->executable_offset = executable_offset;
997 compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_uw);
998
999 code = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
1000 code_ptr = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1001
1002 SLJIT_CACHE_FLUSH(code, code_ptr);
1003 SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
1004 return code;
1005 }
1006
sljit_has_cpu_feature(sljit_s32 feature_type)1007 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
1008 {
1009 switch (feature_type) {
1010 case SLJIT_HAS_FPU:
1011 case SLJIT_HAS_F64_AS_F32_PAIR:
1012 #ifdef SLJIT_IS_FPU_AVAILABLE
1013 return (SLJIT_IS_FPU_AVAILABLE) != 0;
1014 #else
1015 /* Available by default. */
1016 return 1;
1017 #endif /* SLJIT_IS_FPU_AVAILABLE */
1018 case SLJIT_HAS_SIMD:
1019 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
1020 return 0;
1021 #else
1022 #ifdef SLJIT_IS_FPU_AVAILABLE
1023 return (SLJIT_IS_FPU_AVAILABLE) != 0;
1024 #else
1025 /* Available by default. */
1026 return 1;
1027 #endif /* SLJIT_IS_FPU_AVAILABLE */
1028 #endif /* SLJIT_CONFIG_ARM_V6 */
1029
1030 case SLJIT_SIMD_REGS_ARE_PAIRS:
1031 case SLJIT_HAS_CLZ:
1032 case SLJIT_HAS_ROT:
1033 case SLJIT_HAS_CMOV:
1034 case SLJIT_HAS_REV:
1035 case SLJIT_HAS_PREFETCH:
1036 case SLJIT_HAS_COPY_F32:
1037 case SLJIT_HAS_COPY_F64:
1038 case SLJIT_HAS_ATOMIC:
1039 return 1;
1040
1041 case SLJIT_HAS_CTZ:
1042 #if defined(SLJIT_CONFIG_ARM_V6) && SLJIT_CONFIG_ARM_V6
1043 return 2;
1044 #else
1045 return 1;
1046 #endif /* SLJIT_CONFIG_ARM_V6 */
1047
1048 default:
1049 return 0;
1050 }
1051 }
1052
1053 /* --------------------------------------------------------------------- */
1054 /* Entry, exit */
1055 /* --------------------------------------------------------------------- */
1056
1057 /* Creates an index in data_transfer_insts array. */
1058 #define WORD_SIZE 0x00
1059 #define BYTE_SIZE 0x01
1060 #define HALF_SIZE 0x02
1061 #define PRELOAD 0x03
1062 #define SIGNED 0x04
1063 #define LOAD_DATA 0x08
1064
1065 /* Flag bits for emit_op. */
1066 #define ALLOW_IMM 0x10
1067 #define ALLOW_INV_IMM 0x20
1068 #define ALLOW_ANY_IMM (ALLOW_IMM | ALLOW_INV_IMM)
1069 #define ALLOW_NEG_IMM 0x40
1070 #define ALLOW_DOUBLE_IMM 0x80
1071
1072 /* s/l - store/load (1 bit)
1073 u/s - signed/unsigned (1 bit)
1074 w/b/h/N - word/byte/half/NOT allowed (2 bit)
1075 Storing signed and unsigned values are the same operations. */
1076
1077 static const sljit_ins data_transfer_insts[16] = {
1078 /* s u w */ 0xe5000000 /* str */,
1079 /* s u b */ 0xe5400000 /* strb */,
1080 /* s u h */ 0xe10000b0 /* strh */,
1081 /* s u N */ 0x00000000 /* not allowed */,
1082 /* s s w */ 0xe5000000 /* str */,
1083 /* s s b */ 0xe5400000 /* strb */,
1084 /* s s h */ 0xe10000b0 /* strh */,
1085 /* s s N */ 0x00000000 /* not allowed */,
1086
1087 /* l u w */ 0xe5100000 /* ldr */,
1088 /* l u b */ 0xe5500000 /* ldrb */,
1089 /* l u h */ 0xe11000b0 /* ldrh */,
1090 /* l u p */ 0xf5500000 /* preload */,
1091 /* l s w */ 0xe5100000 /* ldr */,
1092 /* l s b */ 0xe11000d0 /* ldrsb */,
1093 /* l s h */ 0xe11000f0 /* ldrsh */,
1094 /* l s N */ 0x00000000 /* not allowed */,
1095 };
1096
1097 #define EMIT_DATA_TRANSFER(type, add, target_reg, base_reg, arg) \
1098 (data_transfer_insts[(type) & 0xf] | ((add) << 23) | RD(target_reg) | RN(base_reg) | (sljit_ins)(arg))
1099
1100 /* Normal ldr/str instruction.
1101 Type2: ldrsb, ldrh, ldrsh */
1102 #define IS_TYPE1_TRANSFER(type) \
1103 (data_transfer_insts[(type) & 0xf] & 0x04000000)
1104 #define TYPE2_TRANSFER_IMM(imm) \
1105 (((imm) & 0xf) | (((imm) & 0xf0) << 4) | (1 << 22))
1106
1107 #define EMIT_FPU_OPERATION(opcode, mode, dst, src1, src2) \
1108 ((sljit_ins)(opcode) | (sljit_ins)(mode) | VD(dst) | VM(src1) | VN(src2))
1109
1110 /* Flags for emit_op: */
1111 /* Arguments are swapped. */
1112 #define ARGS_SWAPPED 0x01
1113 /* Inverted immediate. */
1114 #define INV_IMM 0x02
1115 /* Source and destination is register. */
1116 #define MOVE_REG_CONV 0x04
1117 /* Unused return value. */
1118 #define UNUSED_RETURN 0x08
1119 /* SET_FLAGS must be (1 << 20) as it is also the value of S bit (can be used for optimization). */
1120 #define SET_FLAGS (1 << 20)
1121 /* dst: reg
1122 src1: reg
1123 src2: reg or imm (if allowed)
1124 SRC2_IMM must be (1 << 25) as it is also the value of I bit (can be used for optimization). */
1125 #define SRC2_IMM (1 << 25)
1126
1127 static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 inp_flags,
1128 sljit_s32 dst, sljit_sw dstw,
1129 sljit_s32 src1, sljit_sw src1w,
1130 sljit_s32 src2, sljit_sw src2w);
1131
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1132 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
1133 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1134 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1135 {
1136 sljit_uw imm, offset;
1137 sljit_s32 i, tmp, size, word_arg_count;
1138 sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
1139 #ifdef __SOFTFP__
1140 sljit_u32 float_arg_count;
1141 #else
1142 sljit_u32 old_offset, f32_offset;
1143 sljit_u32 remap[3];
1144 sljit_u32 *remap_ptr = remap;
1145 #endif
1146
1147 CHECK_ERROR();
1148 CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1149 set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1150
1151 imm = 0;
1152
1153 tmp = SLJIT_S0 - saveds;
1154 for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--)
1155 imm |= (sljit_uw)1 << reg_map[i];
1156
1157 for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--)
1158 imm |= (sljit_uw)1 << reg_map[i];
1159
1160 SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
1161
1162 /* Push saved and temporary registers
1163 multiple registers: stmdb sp!, {..., lr}
1164 single register: str reg, [sp, #-4]! */
1165 if (imm != 0)
1166 FAIL_IF(push_inst(compiler, PUSH | (1 << 14) | imm));
1167 else
1168 FAIL_IF(push_inst(compiler, 0xe52d0004 | RD(TMP_REG2)));
1169
1170 /* Stack must be aligned to 8 bytes: */
1171 size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1);
1172
1173 if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
1174 if ((size & SSIZE_OF(sw)) != 0) {
1175 FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | sizeof(sljit_sw)));
1176 size += SSIZE_OF(sw);
1177 }
1178
1179 if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) {
1180 FAIL_IF(push_inst(compiler, VPUSH | VD(SLJIT_FS0) | ((sljit_ins)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1)));
1181 } else {
1182 if (fsaveds > 0)
1183 FAIL_IF(push_inst(compiler, VPUSH | VD(SLJIT_FS0) | ((sljit_ins)fsaveds << 1)));
1184 if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG)
1185 FAIL_IF(push_inst(compiler, VPUSH | VD(fscratches) | ((sljit_ins)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1)));
1186 }
1187 }
1188
1189 local_size = ((size + local_size + 0x7) & ~0x7) - size;
1190 compiler->local_size = local_size;
1191
1192 if (options & SLJIT_ENTER_REG_ARG)
1193 arg_types = 0;
1194
1195 arg_types >>= SLJIT_ARG_SHIFT;
1196 word_arg_count = 0;
1197 saved_arg_count = 0;
1198 #ifdef __SOFTFP__
1199 SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start);
1200
1201 offset = 0;
1202 float_arg_count = 0;
1203
1204 while (arg_types) {
1205 switch (arg_types & SLJIT_ARG_MASK) {
1206 case SLJIT_ARG_TYPE_F64:
1207 if (offset & 0x7)
1208 offset += sizeof(sljit_sw);
1209
1210 if (offset < 4 * sizeof(sljit_sw))
1211 FAIL_IF(push_inst(compiler, VMOV2 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count));
1212 else
1213 FAIL_IF(push_inst(compiler, VLDR_F32 | 0x800100 | RN(SLJIT_SP)
1214 | (float_arg_count << 12) | ((offset + (sljit_ins)size - 4 * sizeof(sljit_sw)) >> 2)));
1215 float_arg_count++;
1216 offset += sizeof(sljit_f64) - sizeof(sljit_sw);
1217 break;
1218 case SLJIT_ARG_TYPE_F32:
1219 if (offset < 4 * sizeof(sljit_sw))
1220 FAIL_IF(push_inst(compiler, VMOV | (float_arg_count << 16) | (offset << 10)));
1221 else
1222 FAIL_IF(push_inst(compiler, VLDR_F32 | 0x800000 | RN(SLJIT_SP)
1223 | (float_arg_count << 12) | ((offset + (sljit_ins)size - 4 * sizeof(sljit_sw)) >> 2)));
1224 float_arg_count++;
1225 break;
1226 default:
1227 word_arg_count++;
1228
1229 if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
1230 tmp = SLJIT_S0 - saved_arg_count;
1231 saved_arg_count++;
1232 } else if (word_arg_count - 1 != (sljit_s32)(offset >> 2))
1233 tmp = word_arg_count;
1234 else
1235 break;
1236
1237 if (offset < 4 * sizeof(sljit_sw))
1238 FAIL_IF(push_inst(compiler, MOV | RD(tmp) | (offset >> 2)));
1239 else
1240 FAIL_IF(push_inst(compiler, LDR | 0x800000 | RN(SLJIT_SP) | RD(tmp) | (offset + (sljit_ins)size - 4 * sizeof(sljit_sw))));
1241 break;
1242 }
1243
1244 offset += sizeof(sljit_sw);
1245 arg_types >>= SLJIT_ARG_SHIFT;
1246 }
1247
1248 compiler->args_size = offset;
1249 #else
1250 offset = SLJIT_FR0;
1251 old_offset = SLJIT_FR0;
1252 f32_offset = 0;
1253
1254 while (arg_types) {
1255 switch (arg_types & SLJIT_ARG_MASK) {
1256 case SLJIT_ARG_TYPE_F64:
1257 if (offset != old_offset)
1258 *remap_ptr++ = EMIT_FPU_OPERATION(VMOV_F32, SLJIT_32, offset, old_offset, 0);
1259 old_offset++;
1260 offset++;
1261 break;
1262 case SLJIT_ARG_TYPE_F32:
1263 if (f32_offset != 0) {
1264 *remap_ptr++ = EMIT_FPU_OPERATION(VMOV_F32, 0x20, offset, f32_offset, 0);
1265 f32_offset = 0;
1266 } else {
1267 if (offset != old_offset)
1268 *remap_ptr++ = EMIT_FPU_OPERATION(VMOV_F32, 0, offset, old_offset, 0);
1269 f32_offset = old_offset;
1270 old_offset++;
1271 }
1272 offset++;
1273 break;
1274 default:
1275 if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
1276 FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S0 - saved_arg_count) | RM(SLJIT_R0 + word_arg_count)));
1277 saved_arg_count++;
1278 }
1279
1280 word_arg_count++;
1281 break;
1282 }
1283 arg_types >>= SLJIT_ARG_SHIFT;
1284 }
1285
1286 SLJIT_ASSERT((sljit_uw)(remap_ptr - remap) <= sizeof(remap));
1287
1288 while (remap_ptr > remap)
1289 FAIL_IF(push_inst(compiler, *(--remap_ptr)));
1290 #endif
1291
1292 if (local_size > 0)
1293 FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM | ALLOW_DOUBLE_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size));
1294
1295 return SLJIT_SUCCESS;
1296 }
1297
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1298 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
1299 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1300 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1301 {
1302 sljit_s32 size;
1303
1304 CHECK_ERROR();
1305 CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1306 set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1307
1308 size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1);
1309
1310 /* Doubles are saved, so alignment is unaffected. */
1311 if ((size & SSIZE_OF(sw)) != 0 && (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG))
1312 size += SSIZE_OF(sw);
1313
1314 compiler->local_size = ((size + local_size + 0x7) & ~0x7) - size;
1315 return SLJIT_SUCCESS;
1316 }
1317
emit_add_sp(struct sljit_compiler * compiler,sljit_uw imm)1318 static sljit_s32 emit_add_sp(struct sljit_compiler *compiler, sljit_uw imm)
1319 {
1320 sljit_uw imm2 = get_imm(imm);
1321
1322 if (imm2 == 0)
1323 return emit_op(compiler, SLJIT_ADD, ALLOW_IMM | ALLOW_DOUBLE_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, (sljit_sw)imm);
1324
1325 return push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | imm2);
1326 }
1327
emit_stack_frame_release(struct sljit_compiler * compiler,sljit_s32 frame_size)1328 static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 frame_size)
1329 {
1330 sljit_s32 local_size, fscratches, fsaveds, i, tmp;
1331 sljit_s32 restored_reg = 0;
1332 sljit_s32 lr_dst = TMP_PC;
1333 sljit_uw reg_list = 0;
1334
1335 SLJIT_ASSERT(reg_map[TMP_REG2] == 14 && frame_size <= 128);
1336
1337 local_size = compiler->local_size;
1338 fscratches = compiler->fscratches;
1339 fsaveds = compiler->fsaveds;
1340
1341 if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
1342 if (local_size > 0)
1343 FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size));
1344
1345 if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) {
1346 FAIL_IF(push_inst(compiler, VPOP | VD(SLJIT_FS0) | ((sljit_ins)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1)));
1347 } else {
1348 if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG)
1349 FAIL_IF(push_inst(compiler, VPOP | VD(fscratches) | ((sljit_ins)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1)));
1350 if (fsaveds > 0)
1351 FAIL_IF(push_inst(compiler, VPOP | VD(SLJIT_FS0) | ((sljit_ins)fsaveds << 1)));
1352 }
1353
1354 local_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1) & 0x7;
1355 }
1356
1357 if (frame_size < 0) {
1358 lr_dst = TMP_REG2;
1359 frame_size = 0;
1360 } else if (frame_size > 0) {
1361 SLJIT_ASSERT(frame_size == 1 || (frame_size & 0x7) == 0);
1362 lr_dst = 0;
1363 frame_size &= ~0x7;
1364 }
1365
1366 if (lr_dst != 0)
1367 reg_list |= (sljit_uw)1 << reg_map[lr_dst];
1368
1369 tmp = SLJIT_S0 - compiler->saveds;
1370 i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
1371 if (tmp < i) {
1372 restored_reg = i;
1373 do {
1374 reg_list |= (sljit_uw)1 << reg_map[i];
1375 } while (--i > tmp);
1376 }
1377
1378 i = compiler->scratches;
1379 if (i >= SLJIT_FIRST_SAVED_REG) {
1380 restored_reg = i;
1381 do {
1382 reg_list |= (sljit_uw)1 << reg_map[i];
1383 } while (--i >= SLJIT_FIRST_SAVED_REG);
1384 }
1385
1386 if (lr_dst == TMP_REG2 && reg_list == 0) {
1387 restored_reg = TMP_REG2;
1388 lr_dst = 0;
1389 }
1390
1391 if (lr_dst == 0 && (reg_list & (reg_list - 1)) == 0) {
1392 /* The local_size does not include the saved registers. */
1393 tmp = 0;
1394 if (reg_list != 0) {
1395 tmp = 2;
1396 if (local_size <= 0xfff) {
1397 if (local_size == 0) {
1398 SLJIT_ASSERT(restored_reg != TMP_REG2);
1399 if (frame_size == 0)
1400 return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(restored_reg) | 0x800008);
1401 if (frame_size > 2 * SSIZE_OF(sw))
1402 return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(restored_reg) | (sljit_ins)(frame_size - (2 * SSIZE_OF(sw))));
1403 }
1404
1405 FAIL_IF(push_inst(compiler, LDR | 0x800000 | RN(SLJIT_SP) | RD(restored_reg) | (sljit_ins)local_size));
1406 tmp = 1;
1407 } else if (frame_size == 0) {
1408 frame_size = (restored_reg == TMP_REG2) ? SSIZE_OF(sw) : 2 * SSIZE_OF(sw);
1409 tmp = 3;
1410 }
1411
1412 /* Place for the saved register. */
1413 if (restored_reg != TMP_REG2)
1414 local_size += SSIZE_OF(sw);
1415 }
1416
1417 /* Place for the lr register. */
1418 local_size += SSIZE_OF(sw);
1419
1420 if (frame_size > local_size)
1421 FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 25) | (sljit_ins)(frame_size - local_size)));
1422 else if (frame_size < local_size)
1423 FAIL_IF(emit_add_sp(compiler, (sljit_uw)(local_size - frame_size)));
1424
1425 if (tmp <= 1)
1426 return SLJIT_SUCCESS;
1427
1428 if (tmp == 2) {
1429 frame_size -= SSIZE_OF(sw);
1430 if (restored_reg != TMP_REG2)
1431 frame_size -= SSIZE_OF(sw);
1432
1433 return push_inst(compiler, LDR | 0x800000 | RN(SLJIT_SP) | RD(restored_reg) | (sljit_ins)frame_size);
1434 }
1435
1436 tmp = (restored_reg == TMP_REG2) ? 0x800004 : 0x800008;
1437 return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(restored_reg) | (sljit_ins)tmp);
1438 }
1439
1440 if (local_size > 0)
1441 FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size));
1442
1443 /* Pop saved and temporary registers
1444 multiple registers: ldmia sp!, {...}
1445 single register: ldr reg, [sp], #4 */
1446 if ((reg_list & (reg_list - 1)) == 0) {
1447 SLJIT_ASSERT(lr_dst != 0);
1448 SLJIT_ASSERT(reg_list == (sljit_uw)1 << reg_map[lr_dst]);
1449
1450 return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(lr_dst) | 0x800004);
1451 }
1452
1453 FAIL_IF(push_inst(compiler, POP | reg_list));
1454
1455 if (frame_size > 0)
1456 return push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 25) | ((sljit_ins)frame_size - sizeof(sljit_sw)));
1457
1458 if (lr_dst != 0)
1459 return SLJIT_SUCCESS;
1460
1461 return push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 25) | sizeof(sljit_sw));
1462 }
1463
sljit_emit_return_void(struct sljit_compiler * compiler)1464 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
1465 {
1466 CHECK_ERROR();
1467 CHECK(check_sljit_emit_return_void(compiler));
1468
1469 return emit_stack_frame_release(compiler, 0);
1470 }
1471
sljit_emit_return_to(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)1472 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
1473 sljit_s32 src, sljit_sw srcw)
1474 {
1475 CHECK_ERROR();
1476 CHECK(check_sljit_emit_return_to(compiler, src, srcw));
1477
1478 if (src & SLJIT_MEM) {
1479 FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
1480 src = TMP_REG1;
1481 srcw = 0;
1482 } else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
1483 FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src)));
1484 src = TMP_REG1;
1485 srcw = 0;
1486 }
1487
1488 FAIL_IF(emit_stack_frame_release(compiler, 1));
1489
1490 SLJIT_SKIP_CHECKS(compiler);
1491 return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1492 }
1493
1494 /* --------------------------------------------------------------------- */
1495 /* Operators */
1496 /* --------------------------------------------------------------------- */
1497
emit_single_op(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 flags,sljit_uw dst,sljit_uw src1,sljit_uw src2)1498 static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
1499 sljit_uw dst, sljit_uw src1, sljit_uw src2)
1500 {
1501 sljit_s32 is_masked;
1502 sljit_uw shift_type;
1503
1504 switch (op) {
1505 case SLJIT_MOV:
1506 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
1507 if (dst != src2) {
1508 if (src2 & SRC2_IMM) {
1509 return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2);
1510 }
1511 return push_inst(compiler, MOV | RD(dst) | RM(src2));
1512 }
1513 return SLJIT_SUCCESS;
1514
1515 case SLJIT_MOV_U8:
1516 case SLJIT_MOV_S8:
1517 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
1518 if (flags & MOVE_REG_CONV)
1519 return push_inst(compiler, (op == SLJIT_MOV_U8 ? UXTB : SXTB) | RD(dst) | RM(src2));
1520
1521 if (dst != src2) {
1522 SLJIT_ASSERT(src2 & SRC2_IMM);
1523 return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2);
1524 }
1525 return SLJIT_SUCCESS;
1526
1527 case SLJIT_MOV_U16:
1528 case SLJIT_MOV_S16:
1529 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
1530 if (flags & MOVE_REG_CONV)
1531 return push_inst(compiler, (op == SLJIT_MOV_U16 ? UXTH : SXTH) | RD(dst) | RM(src2));
1532
1533 if (dst != src2) {
1534 SLJIT_ASSERT(src2 & SRC2_IMM);
1535 return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2);
1536 }
1537 return SLJIT_SUCCESS;
1538
1539 case SLJIT_CLZ:
1540 SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM));
1541 FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(src2)));
1542 return SLJIT_SUCCESS;
1543
1544 case SLJIT_CTZ:
1545 SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM));
1546 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
1547 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
1548 FAIL_IF(push_inst(compiler, RSB | SRC2_IMM | RD(TMP_REG1) | RN(src2) | 0));
1549 FAIL_IF(push_inst(compiler, AND | RD(TMP_REG2) | RN(src2) | RM(TMP_REG1)));
1550 FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(TMP_REG2)));
1551 FAIL_IF(push_inst(compiler, CMP | SET_FLAGS | SRC2_IMM | RN(dst) | 32));
1552 return push_inst(compiler, (EOR ^ 0xf0000000) | SRC2_IMM | RD(dst) | RN(dst) | 0x1f);
1553 #else /* !SLJIT_CONFIG_ARM_V6 */
1554 FAIL_IF(push_inst(compiler, RBIT | RD(dst) | RM(src2)));
1555 return push_inst(compiler, CLZ | RD(dst) | RM(dst));
1556 #endif /* SLJIT_CONFIG_ARM_V6 */
1557
1558 case SLJIT_REV:
1559 case SLJIT_REV_U32:
1560 case SLJIT_REV_S32:
1561 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
1562 return push_inst(compiler, REV | RD(dst) | RM(src2));
1563
1564 case SLJIT_REV_U16:
1565 case SLJIT_REV_S16:
1566 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED) && src2 != TMP_REG1 && dst != TMP_REG1);
1567 FAIL_IF(push_inst(compiler, REV16 | RD(dst) | RM(src2)));
1568 if (dst == TMP_REG2 || (src2 == TMP_REG2 && op == SLJIT_REV_U16))
1569 return SLJIT_SUCCESS;
1570 return push_inst(compiler, (op == SLJIT_REV_U16 ? UXTH : SXTH) | RD(dst) | RM(dst));
1571 case SLJIT_ADD:
1572 SLJIT_ASSERT(!(flags & INV_IMM));
1573
1574 if ((flags & (UNUSED_RETURN | ARGS_SWAPPED)) == UNUSED_RETURN)
1575 return push_inst(compiler, CMN | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1576 return push_inst(compiler, ADD | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1577
1578 case SLJIT_ADDC:
1579 SLJIT_ASSERT(!(flags & INV_IMM));
1580 return push_inst(compiler, ADC | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1581
1582 case SLJIT_SUB:
1583 SLJIT_ASSERT(!(flags & INV_IMM));
1584
1585 if ((flags & (UNUSED_RETURN | ARGS_SWAPPED)) == UNUSED_RETURN)
1586 return push_inst(compiler, CMP | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1587
1588 return push_inst(compiler, (!(flags & ARGS_SWAPPED) ? SUB : RSB) | (flags & SET_FLAGS)
1589 | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1590
1591 case SLJIT_SUBC:
1592 SLJIT_ASSERT(!(flags & INV_IMM));
1593 return push_inst(compiler, (!(flags & ARGS_SWAPPED) ? SBC : RSC) | (flags & SET_FLAGS)
1594 | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1595
1596 case SLJIT_MUL:
1597 SLJIT_ASSERT(!(flags & INV_IMM));
1598 SLJIT_ASSERT(!(src2 & SRC2_IMM));
1599 compiler->status_flags_state = 0;
1600
1601 if (!(flags & SET_FLAGS))
1602 return push_inst(compiler, MUL | RN(dst) | RM8(src2) | RM(src1));
1603
1604 FAIL_IF(push_inst(compiler, SMULL | RN(TMP_REG1) | RD(dst) | RM8(src2) | RM(src1)));
1605
1606 /* cmp TMP_REG1, dst asr #31. */
1607 return push_inst(compiler, CMP | SET_FLAGS | RN(TMP_REG1) | RM(dst) | 0xfc0);
1608
1609 case SLJIT_AND:
1610 if ((flags & (UNUSED_RETURN | INV_IMM)) == UNUSED_RETURN)
1611 return push_inst(compiler, TST | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1612 return push_inst(compiler, (!(flags & INV_IMM) ? AND : BIC) | (flags & SET_FLAGS)
1613 | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1614
1615 case SLJIT_OR:
1616 SLJIT_ASSERT(!(flags & INV_IMM));
1617 return push_inst(compiler, ORR | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1618
1619 case SLJIT_XOR:
1620 if (flags & INV_IMM) {
1621 SLJIT_ASSERT(src2 == SRC2_IMM);
1622 return push_inst(compiler, MVN | (flags & SET_FLAGS) | RD(dst) | RM(src1));
1623 }
1624 return push_inst(compiler, EOR | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1625
1626 case SLJIT_SHL:
1627 case SLJIT_MSHL:
1628 shift_type = 0;
1629 is_masked = op == SLJIT_MSHL;
1630 break;
1631
1632 case SLJIT_LSHR:
1633 case SLJIT_MLSHR:
1634 shift_type = 1;
1635 is_masked = op == SLJIT_MLSHR;
1636 break;
1637
1638 case SLJIT_ASHR:
1639 case SLJIT_MASHR:
1640 shift_type = 2;
1641 is_masked = op == SLJIT_MASHR;
1642 break;
1643
1644 case SLJIT_ROTL:
1645 if (compiler->shift_imm == 0x20) {
1646 FAIL_IF(push_inst(compiler, RSB | SRC2_IMM | RD(TMP_REG2) | RN(src2) | 0));
1647 src2 = TMP_REG2;
1648 } else
1649 compiler->shift_imm = (sljit_uw)(-(sljit_sw)compiler->shift_imm) & 0x1f;
1650 /* fallthrough */
1651
1652 case SLJIT_ROTR:
1653 shift_type = 3;
1654 is_masked = 0;
1655 break;
1656
1657 default:
1658 SLJIT_UNREACHABLE();
1659 return SLJIT_SUCCESS;
1660 }
1661
1662 SLJIT_ASSERT(!(flags & ARGS_SWAPPED) && !(flags & INV_IMM) && !(src2 & SRC2_IMM));
1663
1664 if (compiler->shift_imm != 0x20) {
1665 SLJIT_ASSERT(src1 == TMP_REG1);
1666
1667 if (compiler->shift_imm != 0)
1668 return push_inst(compiler, MOV | (flags & SET_FLAGS) |
1669 RD(dst) | (compiler->shift_imm << 7) | (shift_type << 5) | RM(src2));
1670 return push_inst(compiler, MOV | (flags & SET_FLAGS) | RD(dst) | RM(src2));
1671 }
1672
1673 SLJIT_ASSERT(src1 != TMP_REG2);
1674
1675 if (is_masked) {
1676 FAIL_IF(push_inst(compiler, AND | RD(TMP_REG2) | RN(src2) | SRC2_IMM | 0x1f));
1677 src2 = TMP_REG2;
1678 }
1679
1680 return push_inst(compiler, MOV | (flags & SET_FLAGS) | RD(dst)
1681 | RM8(src2) | (sljit_ins)(shift_type << 5) | 0x10 | RM(src1));
1682 }
1683
1684 #undef EMIT_SHIFT_INS_AND_RETURN
1685
1686 /* Tests whether the immediate can be stored in the 12 bit imm field.
1687 Returns with 0 if not possible. */
get_imm(sljit_uw imm)1688 static sljit_uw get_imm(sljit_uw imm)
1689 {
1690 sljit_u32 rol;
1691
1692 if (imm <= 0xff)
1693 return SRC2_IMM | imm;
1694
1695 if (!(imm & 0xff000000)) {
1696 imm <<= 8;
1697 rol = 8;
1698 } else {
1699 imm = (imm << 24) | (imm >> 8);
1700 rol = 0;
1701 }
1702
1703 if (!(imm & 0xff000000)) {
1704 imm <<= 8;
1705 rol += 4;
1706 }
1707
1708 if (!(imm & 0xf0000000)) {
1709 imm <<= 4;
1710 rol += 2;
1711 }
1712
1713 if (!(imm & 0xc0000000)) {
1714 imm <<= 2;
1715 rol += 1;
1716 }
1717
1718 if (!(imm & 0x00ffffff))
1719 return SRC2_IMM | (imm >> 24) | (rol << 8);
1720 return 0;
1721 }
1722
compute_imm(sljit_uw imm,sljit_uw * imm2)1723 static sljit_uw compute_imm(sljit_uw imm, sljit_uw* imm2)
1724 {
1725 sljit_uw mask;
1726 sljit_uw imm1;
1727 sljit_uw rol;
1728
1729 /* Step1: Search a zero byte (8 continous zero bit). */
1730 mask = 0xff000000;
1731 rol = 8;
1732 while (1) {
1733 if (!(imm & mask)) {
1734 /* Rol imm by rol. */
1735 imm = (imm << rol) | (imm >> (32 - rol));
1736 /* Calculate arm rol. */
1737 rol = 4 + (rol >> 1);
1738 break;
1739 }
1740
1741 rol += 2;
1742 mask >>= 2;
1743 if (mask & 0x3) {
1744 /* rol by 8. */
1745 imm = (imm << 8) | (imm >> 24);
1746 mask = 0xff00;
1747 rol = 24;
1748 while (1) {
1749 if (!(imm & mask)) {
1750 /* Rol imm by rol. */
1751 imm = (imm << rol) | (imm >> (32 - rol));
1752 /* Calculate arm rol. */
1753 rol = (rol >> 1) - 8;
1754 break;
1755 }
1756 rol += 2;
1757 mask >>= 2;
1758 if (mask & 0x3)
1759 return 0;
1760 }
1761 break;
1762 }
1763 }
1764
1765 /* The low 8 bit must be zero. */
1766 SLJIT_ASSERT(!(imm & 0xff));
1767
1768 if (!(imm & 0xff000000)) {
1769 imm1 = SRC2_IMM | ((imm >> 16) & 0xff) | (((rol + 4) & 0xf) << 8);
1770 *imm2 = SRC2_IMM | ((imm >> 8) & 0xff) | (((rol + 8) & 0xf) << 8);
1771 } else if (imm & 0xc0000000) {
1772 imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8);
1773 imm <<= 8;
1774 rol += 4;
1775
1776 if (!(imm & 0xff000000)) {
1777 imm <<= 8;
1778 rol += 4;
1779 }
1780
1781 if (!(imm & 0xf0000000)) {
1782 imm <<= 4;
1783 rol += 2;
1784 }
1785
1786 if (!(imm & 0xc0000000)) {
1787 imm <<= 2;
1788 rol += 1;
1789 }
1790
1791 if (!(imm & 0x00ffffff))
1792 *imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8);
1793 else
1794 return 0;
1795 } else {
1796 if (!(imm & 0xf0000000)) {
1797 imm <<= 4;
1798 rol += 2;
1799 }
1800
1801 if (!(imm & 0xc0000000)) {
1802 imm <<= 2;
1803 rol += 1;
1804 }
1805
1806 imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8);
1807 imm <<= 8;
1808 rol += 4;
1809
1810 if (!(imm & 0xf0000000)) {
1811 imm <<= 4;
1812 rol += 2;
1813 }
1814
1815 if (!(imm & 0xc0000000)) {
1816 imm <<= 2;
1817 rol += 1;
1818 }
1819
1820 if (!(imm & 0x00ffffff))
1821 *imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8);
1822 else
1823 return 0;
1824 }
1825
1826 return imm1;
1827 }
1828
load_immediate(struct sljit_compiler * compiler,sljit_s32 reg,sljit_uw imm)1829 static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm)
1830 {
1831 sljit_uw tmp;
1832 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
1833 sljit_uw imm1, imm2;
1834 #else /* !SLJIT_CONFIG_ARM_V6 */
1835 if (!(imm & ~(sljit_uw)0xffff))
1836 return push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff));
1837 #endif /* SLJIT_CONFIG_ARM_V6 */
1838
1839 /* Create imm by 1 inst. */
1840 tmp = get_imm(imm);
1841 if (tmp)
1842 return push_inst(compiler, MOV | RD(reg) | tmp);
1843
1844 tmp = get_imm(~imm);
1845 if (tmp)
1846 return push_inst(compiler, MVN | RD(reg) | tmp);
1847
1848 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
1849 /* Create imm by 2 inst. */
1850 imm1 = compute_imm(imm, &imm2);
1851 if (imm1 != 0) {
1852 FAIL_IF(push_inst(compiler, MOV | RD(reg) | imm1));
1853 return push_inst(compiler, ORR | RD(reg) | RN(reg) | imm2);
1854 }
1855
1856 imm1 = compute_imm(~imm, &imm2);
1857 if (imm1 != 0) {
1858 FAIL_IF(push_inst(compiler, MVN | RD(reg) | imm1));
1859 return push_inst(compiler, BIC | RD(reg) | RN(reg) | imm2);
1860 }
1861
1862 /* Load integer. */
1863 return push_inst_with_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, reg, TMP_PC, 0), imm);
1864 #else /* !SLJIT_CONFIG_ARM_V6 */
1865 FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff)));
1866 if (imm <= 0xffff)
1867 return SLJIT_SUCCESS;
1868 return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | ((imm >> 16) & 0xfff));
1869 #endif /* SLJIT_CONFIG_ARM_V6 */
1870 }
1871
emit_op_mem(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw,sljit_s32 tmp_reg)1872 static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg,
1873 sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg)
1874 {
1875 sljit_uw imm, offset_reg, tmp;
1876 sljit_sw mask = IS_TYPE1_TRANSFER(flags) ? 0xfff : 0xff;
1877 sljit_sw sign = IS_TYPE1_TRANSFER(flags) ? 0x1000 : 0x100;
1878
1879 SLJIT_ASSERT(arg & SLJIT_MEM);
1880 SLJIT_ASSERT((arg & REG_MASK) != tmp_reg || (arg == SLJIT_MEM1(tmp_reg) && argw >= -mask && argw <= mask));
1881
1882 if (SLJIT_UNLIKELY(!(arg & REG_MASK))) {
1883 tmp = (sljit_uw)(argw & (sign | mask));
1884 tmp = (sljit_uw)((argw + (tmp <= (sljit_uw)sign ? 0 : sign)) & ~mask);
1885
1886 FAIL_IF(load_immediate(compiler, tmp_reg, tmp));
1887
1888 argw -= (sljit_sw)tmp;
1889 tmp = 1;
1890
1891 if (argw < 0) {
1892 argw = -argw;
1893 tmp = 0;
1894 }
1895
1896 return push_inst(compiler, EMIT_DATA_TRANSFER(flags, tmp, reg, tmp_reg,
1897 (mask == 0xff) ? TYPE2_TRANSFER_IMM(argw) : argw));
1898 }
1899
1900 if (arg & OFFS_REG_MASK) {
1901 offset_reg = OFFS_REG(arg);
1902 arg &= REG_MASK;
1903 argw &= 0x3;
1904
1905 if (argw != 0 && (mask == 0xff)) {
1906 FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | RM(offset_reg) | ((sljit_ins)argw << 7)));
1907 return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, tmp_reg, TYPE2_TRANSFER_IMM(0)));
1908 }
1909
1910 /* Bit 25: RM is offset. */
1911 return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg,
1912 RM(offset_reg) | (mask == 0xff ? 0 : (1 << 25)) | ((sljit_ins)argw << 7)));
1913 }
1914
1915 arg &= REG_MASK;
1916
1917 if (argw > mask) {
1918 tmp = (sljit_uw)(argw & (sign | mask));
1919 tmp = (sljit_uw)((argw + (tmp <= (sljit_uw)sign ? 0 : sign)) & ~mask);
1920 imm = get_imm(tmp);
1921
1922 if (imm) {
1923 FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | imm));
1924 argw -= (sljit_sw)tmp;
1925 arg = tmp_reg;
1926
1927 SLJIT_ASSERT(argw >= -mask && argw <= mask);
1928 }
1929 } else if (argw < -mask) {
1930 tmp = (sljit_uw)(-argw & (sign | mask));
1931 tmp = (sljit_uw)((-argw + (tmp <= (sljit_uw)sign ? 0 : sign)) & ~mask);
1932 imm = get_imm(tmp);
1933
1934 if (imm) {
1935 FAIL_IF(push_inst(compiler, SUB | RD(tmp_reg) | RN(arg) | imm));
1936 argw += (sljit_sw)tmp;
1937 arg = tmp_reg;
1938
1939 SLJIT_ASSERT(argw >= -mask && argw <= mask);
1940 }
1941 }
1942
1943 if (argw <= mask && argw >= -mask) {
1944 if (argw >= 0) {
1945 if (mask == 0xff)
1946 argw = TYPE2_TRANSFER_IMM(argw);
1947 return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg, argw));
1948 }
1949
1950 argw = -argw;
1951
1952 if (mask == 0xff)
1953 argw = TYPE2_TRANSFER_IMM(argw);
1954
1955 return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 0, reg, arg, argw));
1956 }
1957
1958 FAIL_IF(load_immediate(compiler, tmp_reg, (sljit_uw)argw));
1959 return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg,
1960 RM(tmp_reg) | (mask == 0xff ? 0 : (1 << 25))));
1961 }
1962
emit_op(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 inp_flags,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1963 static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 inp_flags,
1964 sljit_s32 dst, sljit_sw dstw,
1965 sljit_s32 src1, sljit_sw src1w,
1966 sljit_s32 src2, sljit_sw src2w)
1967 {
1968 /* src1 is reg or TMP_REG1
1969 src2 is reg, TMP_REG2, or imm
1970 result goes to TMP_REG2, so put result can use TMP_REG1. */
1971
1972 /* We prefers register and simple consts. */
1973 sljit_s32 dst_reg;
1974 sljit_s32 src1_reg = 0;
1975 sljit_s32 src2_reg = 0;
1976 sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
1977 sljit_s32 neg_op = 0;
1978 sljit_u32 imm2;
1979
1980 op = GET_OPCODE(op);
1981
1982 if (flags & SET_FLAGS)
1983 inp_flags &= ~ALLOW_DOUBLE_IMM;
1984
1985 if (dst == TMP_REG2)
1986 flags |= UNUSED_RETURN;
1987
1988 SLJIT_ASSERT(!(inp_flags & ALLOW_INV_IMM) || (inp_flags & ALLOW_IMM));
1989
1990 if (inp_flags & ALLOW_NEG_IMM) {
1991 switch (op) {
1992 case SLJIT_ADD:
1993 compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
1994 neg_op = SLJIT_SUB;
1995 break;
1996 case SLJIT_ADDC:
1997 compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
1998 neg_op = SLJIT_SUBC;
1999 break;
2000 case SLJIT_SUB:
2001 compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
2002 neg_op = SLJIT_ADD;
2003 break;
2004 case SLJIT_SUBC:
2005 compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
2006 neg_op = SLJIT_ADDC;
2007 break;
2008 }
2009 }
2010
2011 do {
2012 if (!(inp_flags & ALLOW_IMM))
2013 break;
2014
2015 if (src2 == SLJIT_IMM) {
2016 src2_reg = (sljit_s32)get_imm((sljit_uw)src2w);
2017 if (src2_reg)
2018 break;
2019
2020 if (inp_flags & ALLOW_INV_IMM) {
2021 src2_reg = (sljit_s32)get_imm(~(sljit_uw)src2w);
2022 if (src2_reg) {
2023 flags |= INV_IMM;
2024 break;
2025 }
2026 }
2027
2028 if (neg_op != 0) {
2029 src2_reg = (sljit_s32)get_imm((neg_op == SLJIT_ADD || neg_op == SLJIT_SUB) ? (sljit_uw)-src2w : ~(sljit_uw)src2w);
2030 if (src2_reg) {
2031 op = neg_op | GET_ALL_FLAGS(op);
2032 break;
2033 }
2034 }
2035 }
2036
2037 if (src1 == SLJIT_IMM) {
2038 src2_reg = (sljit_s32)get_imm((sljit_uw)src1w);
2039 if (src2_reg) {
2040 flags |= ARGS_SWAPPED;
2041 src1 = src2;
2042 src1w = src2w;
2043 break;
2044 }
2045
2046 if (inp_flags & ALLOW_INV_IMM) {
2047 src2_reg = (sljit_s32)get_imm(~(sljit_uw)src1w);
2048 if (src2_reg) {
2049 flags |= ARGS_SWAPPED | INV_IMM;
2050 src1 = src2;
2051 src1w = src2w;
2052 break;
2053 }
2054 }
2055
2056 if (neg_op >= SLJIT_SUB) {
2057 /* Note: additive operation (commutative). */
2058 SLJIT_ASSERT(op == SLJIT_ADD || op == SLJIT_ADDC);
2059
2060 src2_reg = (sljit_s32)get_imm((sljit_uw)-src1w);
2061 if (src2_reg) {
2062 src1 = src2;
2063 src1w = src2w;
2064 op = neg_op | GET_ALL_FLAGS(op);
2065 break;
2066 }
2067 }
2068 }
2069 } while(0);
2070
2071 /* Source 1. */
2072 if (FAST_IS_REG(src1))
2073 src1_reg = src1;
2074 else if (src1 & SLJIT_MEM) {
2075 FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1));
2076 src1_reg = TMP_REG1;
2077 } else if (!(inp_flags & ALLOW_DOUBLE_IMM) || src2_reg != 0 || op == SLJIT_SUB || op == SLJIT_SUBC) {
2078 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w));
2079 src1_reg = TMP_REG1;
2080 }
2081
2082 /* Destination. */
2083 dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG2;
2084
2085 if (op <= SLJIT_MOV_P) {
2086 if (dst & SLJIT_MEM) {
2087 if (inp_flags & BYTE_SIZE)
2088 inp_flags &= ~SIGNED;
2089
2090 if (FAST_IS_REG(src2))
2091 return emit_op_mem(compiler, inp_flags, src2, dst, dstw, TMP_REG2);
2092 }
2093
2094 if (FAST_IS_REG(src2) && dst_reg != TMP_REG2)
2095 flags |= MOVE_REG_CONV;
2096 }
2097
2098 /* Source 2. */
2099 if (src2_reg == 0) {
2100 src2_reg = (op <= SLJIT_MOV_P) ? dst_reg : TMP_REG2;
2101
2102 if (FAST_IS_REG(src2))
2103 src2_reg = src2;
2104 else if (src2 & SLJIT_MEM)
2105 FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, src2_reg, src2, src2w, TMP_REG2));
2106 else if (!(inp_flags & ALLOW_DOUBLE_IMM))
2107 FAIL_IF(load_immediate(compiler, src2_reg, (sljit_uw)src2w));
2108 else {
2109 SLJIT_ASSERT(!(flags & SET_FLAGS));
2110
2111 if (src1_reg == 0) {
2112 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w));
2113 src1_reg = TMP_REG1;
2114 }
2115
2116 src2_reg = (sljit_s32)compute_imm((sljit_uw)src2w, &imm2);
2117
2118 if (src2_reg == 0 && neg_op != 0) {
2119 src2_reg = (sljit_s32)compute_imm((sljit_uw)-src2w, &imm2);
2120 if (src2_reg != 0)
2121 op = neg_op;
2122 }
2123
2124 if (src2_reg == 0) {
2125 FAIL_IF(load_immediate(compiler, TMP_REG2, (sljit_uw)src2w));
2126 src2_reg = TMP_REG2;
2127 } else {
2128 FAIL_IF(emit_single_op(compiler, op, flags, (sljit_uw)dst_reg, (sljit_uw)src1_reg, (sljit_uw)src2_reg));
2129 src1_reg = dst_reg;
2130 src2_reg = (sljit_s32)imm2;
2131
2132 if (op == SLJIT_ADDC)
2133 op = SLJIT_ADD;
2134 else if (op == SLJIT_SUBC)
2135 op = SLJIT_SUB;
2136 }
2137 }
2138 }
2139
2140 if (src1_reg == 0) {
2141 SLJIT_ASSERT((inp_flags & ALLOW_DOUBLE_IMM) && !(flags & SET_FLAGS));
2142
2143 src1_reg = (sljit_s32)compute_imm((sljit_uw)src1w, &imm2);
2144
2145 if (src1_reg == 0 && neg_op != 0) {
2146 src1_reg = (sljit_s32)compute_imm((sljit_uw)-src1w, &imm2);
2147 if (src1_reg != 0)
2148 op = neg_op;
2149 }
2150
2151 if (src1_reg == 0) {
2152 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w));
2153 src1_reg = TMP_REG1;
2154 } else {
2155 FAIL_IF(emit_single_op(compiler, op, flags, (sljit_uw)dst_reg, (sljit_uw)src2_reg, (sljit_uw)src1_reg));
2156 src1_reg = dst_reg;
2157 src2_reg = (sljit_s32)imm2;
2158
2159 if (op == SLJIT_ADDC)
2160 op = SLJIT_ADD;
2161 }
2162 }
2163
2164 FAIL_IF(emit_single_op(compiler, op, flags, (sljit_uw)dst_reg, (sljit_uw)src1_reg, (sljit_uw)src2_reg));
2165
2166 if (!(dst & SLJIT_MEM))
2167 return SLJIT_SUCCESS;
2168
2169 return emit_op_mem(compiler, inp_flags, dst_reg, dst, dstw, TMP_REG1);
2170 }
2171
2172 #ifdef __cplusplus
2173 extern "C" {
2174 #endif
2175
2176 #if defined(__GNUC__)
2177 extern unsigned int __aeabi_uidivmod(unsigned int numerator, unsigned int denominator);
2178 extern int __aeabi_idivmod(int numerator, int denominator);
2179 #else
2180 #error "Software divmod functions are needed"
2181 #endif
2182
2183 #ifdef __cplusplus
2184 }
2185 #endif
2186
sljit_emit_op0(struct sljit_compiler * compiler,sljit_s32 op)2187 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
2188 {
2189 sljit_uw saved_reg_list[3];
2190 sljit_sw saved_reg_count;
2191
2192 CHECK_ERROR();
2193 CHECK(check_sljit_emit_op0(compiler, op));
2194
2195 op = GET_OPCODE(op);
2196 switch (op) {
2197 case SLJIT_BREAKPOINT:
2198 FAIL_IF(push_inst(compiler, BKPT));
2199 break;
2200 case SLJIT_NOP:
2201 FAIL_IF(push_inst(compiler, NOP));
2202 break;
2203 case SLJIT_LMUL_UW:
2204 case SLJIT_LMUL_SW:
2205 return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULL : SMULL)
2206 | RN(SLJIT_R1) | RD(SLJIT_R0) | RM8(SLJIT_R0) | RM(SLJIT_R1));
2207 case SLJIT_DIVMOD_UW:
2208 case SLJIT_DIVMOD_SW:
2209 case SLJIT_DIV_UW:
2210 case SLJIT_DIV_SW:
2211 SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
2212 SLJIT_ASSERT(reg_map[2] == 1 && reg_map[3] == 2 && reg_map[4] == 3);
2213
2214 saved_reg_count = 0;
2215 if (compiler->scratches >= 4)
2216 saved_reg_list[saved_reg_count++] = 3;
2217 if (compiler->scratches >= 3)
2218 saved_reg_list[saved_reg_count++] = 2;
2219 if (op >= SLJIT_DIV_UW)
2220 saved_reg_list[saved_reg_count++] = 1;
2221
2222 if (saved_reg_count > 0) {
2223 FAIL_IF(push_inst(compiler, STR | 0x2d0000 | (saved_reg_count >= 3 ? 16 : 8)
2224 | (saved_reg_list[0] << 12) /* str rX, [sp, #-8/-16]! */));
2225 if (saved_reg_count >= 2) {
2226 SLJIT_ASSERT(saved_reg_list[1] < 8);
2227 FAIL_IF(push_inst(compiler, STR | 0x8d0004 | (saved_reg_list[1] << 12) /* str rX, [sp, #4] */));
2228 }
2229 if (saved_reg_count >= 3) {
2230 SLJIT_ASSERT(saved_reg_list[2] < 8);
2231 FAIL_IF(push_inst(compiler, STR | 0x8d0008 | (saved_reg_list[2] << 12) /* str rX, [sp, #8] */));
2232 }
2233 }
2234
2235 #if defined(__GNUC__)
2236 FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
2237 ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_ADDR(__aeabi_uidivmod) : SLJIT_FUNC_ADDR(__aeabi_idivmod))));
2238 #else
2239 #error "Software divmod functions are needed"
2240 #endif
2241
2242 if (saved_reg_count > 0) {
2243 if (saved_reg_count >= 3) {
2244 SLJIT_ASSERT(saved_reg_list[2] < 8);
2245 FAIL_IF(push_inst(compiler, LDR | 0x8d0008 | (saved_reg_list[2] << 12) /* ldr rX, [sp, #8] */));
2246 }
2247 if (saved_reg_count >= 2) {
2248 SLJIT_ASSERT(saved_reg_list[1] < 8);
2249 FAIL_IF(push_inst(compiler, LDR | 0x8d0004 | (saved_reg_list[1] << 12) /* ldr rX, [sp, #4] */));
2250 }
2251 return push_inst(compiler, (LDR ^ (1 << 24)) | 0x8d0000 | (sljit_ins)(saved_reg_count >= 3 ? 16 : 8)
2252 | (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */);
2253 }
2254 return SLJIT_SUCCESS;
2255 case SLJIT_ENDBR:
2256 case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
2257 return SLJIT_SUCCESS;
2258 }
2259
2260 return SLJIT_SUCCESS;
2261 }
2262
sljit_emit_op1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2263 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
2264 sljit_s32 dst, sljit_sw dstw,
2265 sljit_s32 src, sljit_sw srcw)
2266 {
2267 CHECK_ERROR();
2268 CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
2269 ADJUST_LOCAL_OFFSET(dst, dstw);
2270 ADJUST_LOCAL_OFFSET(src, srcw);
2271
2272 switch (GET_OPCODE(op)) {
2273 case SLJIT_MOV:
2274 case SLJIT_MOV_U32:
2275 case SLJIT_MOV_S32:
2276 case SLJIT_MOV32:
2277 case SLJIT_MOV_P:
2278 return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw);
2279
2280 case SLJIT_MOV_U8:
2281 return emit_op(compiler, SLJIT_MOV_U8, ALLOW_ANY_IMM | BYTE_SIZE, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u8)srcw : srcw);
2282
2283 case SLJIT_MOV_S8:
2284 return emit_op(compiler, SLJIT_MOV_S8, ALLOW_ANY_IMM | SIGNED | BYTE_SIZE, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s8)srcw : srcw);
2285
2286 case SLJIT_MOV_U16:
2287 return emit_op(compiler, SLJIT_MOV_U16, ALLOW_ANY_IMM | HALF_SIZE, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u16)srcw : srcw);
2288
2289 case SLJIT_MOV_S16:
2290 return emit_op(compiler, SLJIT_MOV_S16, ALLOW_ANY_IMM | SIGNED | HALF_SIZE, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s16)srcw : srcw);
2291
2292 case SLJIT_CLZ:
2293 case SLJIT_CTZ:
2294 case SLJIT_REV:
2295 case SLJIT_REV_U32:
2296 case SLJIT_REV_S32:
2297 return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw);
2298
2299 case SLJIT_REV_U16:
2300 case SLJIT_REV_S16:
2301 return emit_op(compiler, op, HALF_SIZE, dst, dstw, TMP_REG1, 0, src, srcw);
2302 }
2303
2304 return SLJIT_SUCCESS;
2305 }
2306
sljit_emit_op2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2307 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2308 sljit_s32 dst, sljit_sw dstw,
2309 sljit_s32 src1, sljit_sw src1w,
2310 sljit_s32 src2, sljit_sw src2w)
2311 {
2312 sljit_s32 inp_flags;
2313
2314 CHECK_ERROR();
2315 CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
2316 ADJUST_LOCAL_OFFSET(dst, dstw);
2317 ADJUST_LOCAL_OFFSET(src1, src1w);
2318 ADJUST_LOCAL_OFFSET(src2, src2w);
2319
2320 switch (GET_OPCODE(op)) {
2321 case SLJIT_ADD:
2322 case SLJIT_ADDC:
2323 case SLJIT_SUB:
2324 case SLJIT_SUBC:
2325 return emit_op(compiler, op, ALLOW_IMM | ALLOW_NEG_IMM | ALLOW_DOUBLE_IMM, dst, dstw, src1, src1w, src2, src2w);
2326
2327 case SLJIT_OR:
2328 return emit_op(compiler, op, ALLOW_IMM | ALLOW_DOUBLE_IMM, dst, dstw, src1, src1w, src2, src2w);
2329
2330 case SLJIT_XOR:
2331 inp_flags = ALLOW_IMM | ALLOW_DOUBLE_IMM;
2332 if ((src1 == SLJIT_IMM && src1w == -1) || (src2 == SLJIT_IMM && src2w == -1)) {
2333 inp_flags |= ALLOW_INV_IMM;
2334 }
2335 return emit_op(compiler, op, inp_flags, dst, dstw, src1, src1w, src2, src2w);
2336
2337 case SLJIT_MUL:
2338 return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w);
2339
2340 case SLJIT_AND:
2341 return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, src1, src1w, src2, src2w);
2342
2343 case SLJIT_SHL:
2344 case SLJIT_MSHL:
2345 case SLJIT_LSHR:
2346 case SLJIT_MLSHR:
2347 case SLJIT_ASHR:
2348 case SLJIT_MASHR:
2349 case SLJIT_ROTL:
2350 case SLJIT_ROTR:
2351 if (src2 == SLJIT_IMM) {
2352 compiler->shift_imm = src2w & 0x1f;
2353 return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src1, src1w);
2354 } else {
2355 compiler->shift_imm = 0x20;
2356 return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w);
2357 }
2358 }
2359
2360 return SLJIT_SUCCESS;
2361 }
2362
sljit_emit_op2u(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2363 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
2364 sljit_s32 src1, sljit_sw src1w,
2365 sljit_s32 src2, sljit_sw src2w)
2366 {
2367 CHECK_ERROR();
2368 CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
2369
2370 SLJIT_SKIP_CHECKS(compiler);
2371 return sljit_emit_op2(compiler, op, TMP_REG2, 0, src1, src1w, src2, src2w);
2372 }
2373
sljit_emit_shift_into(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 src1_reg,sljit_s32 src2_reg,sljit_s32 src3,sljit_sw src3w)2374 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
2375 sljit_s32 dst_reg,
2376 sljit_s32 src1_reg,
2377 sljit_s32 src2_reg,
2378 sljit_s32 src3, sljit_sw src3w)
2379 {
2380 sljit_s32 is_left;
2381
2382 CHECK_ERROR();
2383 CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
2384
2385 op = GET_OPCODE(op);
2386 is_left = (op == SLJIT_SHL || op == SLJIT_MSHL);
2387
2388 if (src1_reg == src2_reg) {
2389 SLJIT_SKIP_CHECKS(compiler);
2390 return sljit_emit_op2(compiler, is_left ? SLJIT_ROTL : SLJIT_ROTR, dst_reg, 0, src1_reg, 0, src3, src3w);
2391 }
2392
2393 ADJUST_LOCAL_OFFSET(src3, src3w);
2394
2395 /* Shift type of ROR is 3. */
2396 if (src3 == SLJIT_IMM) {
2397 src3w &= 0x1f;
2398
2399 if (src3w == 0)
2400 return SLJIT_SUCCESS;
2401
2402 FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | RM(src1_reg) | ((sljit_ins)(is_left ? 0 : 1) << 5) | ((sljit_ins)src3w << 7)));
2403 src3w = (src3w ^ 0x1f) + 1;
2404 return push_inst(compiler, ORR | RD(dst_reg) | RN(dst_reg) | RM(src2_reg) | ((sljit_ins)(is_left ? 1 : 0) << 5) | ((sljit_ins)src3w << 7));
2405 }
2406
2407 if (src3 & SLJIT_MEM) {
2408 FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, src3, src3w, TMP_REG2));
2409 src3 = TMP_REG2;
2410 }
2411
2412 if (op == SLJIT_MSHL || op == SLJIT_MLSHR || dst_reg == src3) {
2413 FAIL_IF(push_inst(compiler, AND | SRC2_IMM | RD(TMP_REG2) | RN(src3) | 0x1f));
2414 src3 = TMP_REG2;
2415 }
2416
2417 FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | RM8(src3) | ((sljit_ins)(is_left ? 0 : 1) << 5) | 0x10 | RM(src1_reg)));
2418 FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src2_reg) | ((sljit_ins)(is_left ? 1 : 0) << 5) | (1 << 7)));
2419 FAIL_IF(push_inst(compiler, EOR | SRC2_IMM | RD(TMP_REG2) | RN(src3) | 0x1f));
2420 return push_inst(compiler, ORR | RD(dst_reg) | RN(dst_reg) | RM8(TMP_REG2) | ((sljit_ins)(is_left ? 1 : 0) << 5) | 0x10 | RM(TMP_REG1));
2421 }
2422
sljit_emit_op_src(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)2423 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
2424 sljit_s32 src, sljit_sw srcw)
2425 {
2426 CHECK_ERROR();
2427 CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
2428 ADJUST_LOCAL_OFFSET(src, srcw);
2429
2430 switch (op) {
2431 case SLJIT_FAST_RETURN:
2432 SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
2433
2434 if (FAST_IS_REG(src))
2435 FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG2) | RM(src)));
2436 else
2437 FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, src, srcw, TMP_REG1));
2438
2439 return push_inst(compiler, BX | RM(TMP_REG2));
2440 case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
2441 return SLJIT_SUCCESS;
2442 case SLJIT_PREFETCH_L1:
2443 case SLJIT_PREFETCH_L2:
2444 case SLJIT_PREFETCH_L3:
2445 case SLJIT_PREFETCH_ONCE:
2446 SLJIT_ASSERT(src & SLJIT_MEM);
2447 return emit_op_mem(compiler, PRELOAD | LOAD_DATA, TMP_PC, src, srcw, TMP_REG1);
2448 }
2449
2450 return SLJIT_SUCCESS;
2451 }
2452
sljit_emit_op_dst(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw)2453 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
2454 sljit_s32 dst, sljit_sw dstw)
2455 {
2456 sljit_s32 size, dst_r;
2457
2458 CHECK_ERROR();
2459 CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
2460 ADJUST_LOCAL_OFFSET(dst, dstw);
2461
2462 switch (op) {
2463 case SLJIT_FAST_ENTER:
2464 SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
2465
2466 if (FAST_IS_REG(dst))
2467 return push_inst(compiler, MOV | RD(dst) | RM(TMP_REG2));
2468 break;
2469 case SLJIT_GET_RETURN_ADDRESS:
2470 size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds - SLJIT_KEPT_SAVEDS_COUNT(compiler->options), 0);
2471
2472 if (compiler->fsaveds > 0 || compiler->fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
2473 /* The size of pc is not added above. */
2474 if ((size & SSIZE_OF(sw)) == 0)
2475 size += SSIZE_OF(sw);
2476
2477 size += GET_SAVED_FLOAT_REGISTERS_SIZE(compiler->fscratches, compiler->fsaveds, f64);
2478 }
2479
2480 SLJIT_ASSERT(((compiler->local_size + size + SSIZE_OF(sw)) & 0x7) == 0);
2481
2482 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
2483 FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size + size, TMP_REG1));
2484 break;
2485 }
2486
2487 if (dst & SLJIT_MEM)
2488 return emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1);
2489
2490 return SLJIT_SUCCESS;
2491 }
2492
sljit_get_register_index(sljit_s32 type,sljit_s32 reg)2493 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
2494 {
2495 CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
2496
2497 if (type == SLJIT_GP_REGISTER)
2498 return reg_map[reg];
2499
2500 if (type == SLJIT_FLOAT_REGISTER || type == SLJIT_SIMD_REG_64)
2501 return freg_map[reg];
2502
2503 if (type != SLJIT_SIMD_REG_128)
2504 return freg_map[reg] & ~0x1;
2505
2506 return -1;
2507 }
2508
sljit_emit_op_custom(struct sljit_compiler * compiler,void * instruction,sljit_u32 size)2509 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2510 void *instruction, sljit_u32 size)
2511 {
2512 SLJIT_UNUSED_ARG(size);
2513 CHECK_ERROR();
2514 CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2515
2516 return push_inst(compiler, *(sljit_ins*)instruction);
2517 }
2518
2519 /* --------------------------------------------------------------------- */
2520 /* Floating point operators */
2521 /* --------------------------------------------------------------------- */
2522
2523 #define FPU_LOAD (1 << 20)
2524 #define EMIT_FPU_DATA_TRANSFER(inst, add, base, freg, offs) \
2525 ((inst) | (sljit_ins)((add) << 23) | RN(base) | VD(freg) | (sljit_ins)(offs))
2526
emit_fop_mem(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw)2527 static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
2528 {
2529 sljit_uw imm;
2530 sljit_ins inst = VSTR_F32 | (flags & (SLJIT_32 | FPU_LOAD));
2531
2532 SLJIT_ASSERT(arg & SLJIT_MEM);
2533 arg &= ~SLJIT_MEM;
2534
2535 if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
2536 FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (((sljit_ins)argw & 0x3) << 7)));
2537 arg = TMP_REG2;
2538 argw = 0;
2539 }
2540
2541 /* Fast loads and stores. */
2542 if (arg) {
2543 if (!(argw & ~0x3fc))
2544 return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, arg & REG_MASK, reg, argw >> 2));
2545 if (!(-argw & ~0x3fc))
2546 return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, arg & REG_MASK, reg, (-argw) >> 2));
2547
2548 imm = get_imm((sljit_uw)argw & ~(sljit_uw)0x3fc);
2549 if (imm) {
2550 FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | imm));
2551 return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG2, reg, (argw & 0x3fc) >> 2));
2552 }
2553 imm = get_imm((sljit_uw)-argw & ~(sljit_uw)0x3fc);
2554 if (imm) {
2555 argw = -argw;
2556 FAIL_IF(push_inst(compiler, SUB | RD(TMP_REG2) | RN(arg & REG_MASK) | imm));
2557 return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG2, reg, (argw & 0x3fc) >> 2));
2558 }
2559 }
2560
2561 if (arg) {
2562 FAIL_IF(load_immediate(compiler, TMP_REG2, (sljit_uw)argw));
2563 FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | RM(TMP_REG2)));
2564 }
2565 else
2566 FAIL_IF(load_immediate(compiler, TMP_REG2, (sljit_uw)argw));
2567
2568 return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG2, reg, 0));
2569 }
2570
sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2571 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
2572 sljit_s32 dst, sljit_sw dstw,
2573 sljit_s32 src, sljit_sw srcw)
2574 {
2575 op ^= SLJIT_32;
2576
2577 if (src & SLJIT_MEM) {
2578 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src, srcw));
2579 src = TMP_FREG1;
2580 }
2581
2582 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_S32_F32, op & SLJIT_32, TMP_FREG1, src, 0)));
2583
2584 if (FAST_IS_REG(dst))
2585 return push_inst(compiler, VMOV | (1 << 20) | RD(dst) | VN(TMP_FREG1));
2586
2587 /* Store the integer value from a VFP register. */
2588 return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw);
2589 }
2590
sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2591 static sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_ins ins,
2592 sljit_s32 dst, sljit_sw dstw,
2593 sljit_s32 src, sljit_sw srcw)
2594 {
2595 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2596
2597 if (FAST_IS_REG(src))
2598 FAIL_IF(push_inst(compiler, VMOV | RD(src) | VN(TMP_FREG1)));
2599 else if (src & SLJIT_MEM) {
2600 /* Load the integer value into a VFP register. */
2601 FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw));
2602 }
2603 else {
2604 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw));
2605 FAIL_IF(push_inst(compiler, VMOV | RD(TMP_REG1) | VN(TMP_FREG1)));
2606 }
2607
2608 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(ins, ins & SLJIT_32, dst_r, TMP_FREG1, 0)));
2609
2610 if (dst & SLJIT_MEM)
2611 return emit_fop_mem(compiler, (ins & SLJIT_32), TMP_FREG1, dst, dstw);
2612 return SLJIT_SUCCESS;
2613 }
2614
sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2615 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
2616 sljit_s32 dst, sljit_sw dstw,
2617 sljit_s32 src, sljit_sw srcw)
2618 {
2619 return sljit_emit_fop1_conv_f64_from_w(compiler, VCVT_F32_S32 | (~op & SLJIT_32), dst, dstw, src, srcw);
2620 }
2621
sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2622 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
2623 sljit_s32 dst, sljit_sw dstw,
2624 sljit_s32 src, sljit_sw srcw)
2625 {
2626 return sljit_emit_fop1_conv_f64_from_w(compiler, VCVT_F32_U32 | (~op & SLJIT_32), dst, dstw, src, srcw);
2627 }
2628
sljit_emit_fop1_cmp(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2629 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
2630 sljit_s32 src1, sljit_sw src1w,
2631 sljit_s32 src2, sljit_sw src2w)
2632 {
2633 op ^= SLJIT_32;
2634
2635 if (src1 & SLJIT_MEM) {
2636 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w));
2637 src1 = TMP_FREG1;
2638 }
2639
2640 if (src2 & SLJIT_MEM) {
2641 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w));
2642 src2 = TMP_FREG2;
2643 }
2644
2645 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCMP_F32, op & SLJIT_32, src1, src2, 0)));
2646 FAIL_IF(push_inst(compiler, VMRS));
2647
2648 if (GET_FLAG_TYPE(op) != SLJIT_UNORDERED_OR_EQUAL)
2649 return SLJIT_SUCCESS;
2650
2651 return push_inst(compiler, (CMP - CONDITIONAL) | (0x60000000 /* VS */) | SET_FLAGS | RN(TMP_REG1) | RM(TMP_REG1));
2652 }
2653
sljit_emit_fop1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2654 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
2655 sljit_s32 dst, sljit_sw dstw,
2656 sljit_s32 src, sljit_sw srcw)
2657 {
2658 sljit_s32 dst_r;
2659
2660 CHECK_ERROR();
2661
2662 SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100), float_transfer_bit_error);
2663 SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2664
2665 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2666
2667 if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32)
2668 op ^= SLJIT_32;
2669
2670 if (src & SLJIT_MEM) {
2671 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, dst_r, src, srcw));
2672 src = dst_r;
2673 }
2674
2675 switch (GET_OPCODE(op)) {
2676 case SLJIT_MOV_F64:
2677 if (src != dst_r) {
2678 if (dst_r != TMP_FREG1)
2679 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, op & SLJIT_32, dst_r, src, 0)));
2680 else
2681 dst_r = src;
2682 }
2683 break;
2684 case SLJIT_NEG_F64:
2685 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VNEG_F32, op & SLJIT_32, dst_r, src, 0)));
2686 break;
2687 case SLJIT_ABS_F64:
2688 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_32, dst_r, src, 0)));
2689 break;
2690 case SLJIT_CONV_F64_FROM_F32:
2691 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F64_F32, op & SLJIT_32, dst_r, src, 0)));
2692 op ^= SLJIT_32;
2693 break;
2694 }
2695
2696 if (dst & SLJIT_MEM)
2697 return emit_fop_mem(compiler, (op & SLJIT_32), dst_r, dst, dstw);
2698 return SLJIT_SUCCESS;
2699 }
2700
sljit_emit_fop2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2701 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
2702 sljit_s32 dst, sljit_sw dstw,
2703 sljit_s32 src1, sljit_sw src1w,
2704 sljit_s32 src2, sljit_sw src2w)
2705 {
2706 sljit_s32 dst_r;
2707
2708 CHECK_ERROR();
2709 CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2710 ADJUST_LOCAL_OFFSET(dst, dstw);
2711 ADJUST_LOCAL_OFFSET(src1, src1w);
2712 ADJUST_LOCAL_OFFSET(src2, src2w);
2713
2714 op ^= SLJIT_32;
2715
2716 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2717
2718 if (src2 & SLJIT_MEM) {
2719 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w));
2720 src2 = TMP_FREG2;
2721 }
2722
2723 if (src1 & SLJIT_MEM) {
2724 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w));
2725 src1 = TMP_FREG1;
2726 }
2727
2728 switch (GET_OPCODE(op)) {
2729 case SLJIT_ADD_F64:
2730 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VADD_F32, op & SLJIT_32, dst_r, src2, src1)));
2731 break;
2732 case SLJIT_SUB_F64:
2733 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VSUB_F32, op & SLJIT_32, dst_r, src2, src1)));
2734 break;
2735 case SLJIT_MUL_F64:
2736 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMUL_F32, op & SLJIT_32, dst_r, src2, src1)));
2737 break;
2738 case SLJIT_DIV_F64:
2739 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VDIV_F32, op & SLJIT_32, dst_r, src2, src1)));
2740 break;
2741 case SLJIT_COPYSIGN_F64:
2742 FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(src2) | RD(TMP_REG1) | ((op & SLJIT_32) ? (1 << 7) : 0)));
2743 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_32, dst_r, src1, 0)));
2744 FAIL_IF(push_inst(compiler, CMP | SET_FLAGS | RN(TMP_REG1) | SRC2_IMM | 0));
2745 return push_inst(compiler, EMIT_FPU_OPERATION((VNEG_F32 & ~COND_MASK) | 0xb0000000, op & SLJIT_32, dst_r, dst_r, 0));
2746 }
2747
2748 if (dst_r == TMP_FREG1)
2749 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32), TMP_FREG1, dst, dstw));
2750
2751 return SLJIT_SUCCESS;
2752 }
2753
2754 #undef EMIT_FPU_DATA_TRANSFER
2755
sljit_emit_fset32(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f32 value)2756 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
2757 sljit_s32 freg, sljit_f32 value)
2758 {
2759 #if defined(__ARM_NEON) && __ARM_NEON
2760 sljit_u32 exp;
2761 sljit_ins ins;
2762 #endif /* NEON */
2763 union {
2764 sljit_u32 imm;
2765 sljit_f32 value;
2766 } u;
2767
2768 CHECK_ERROR();
2769 CHECK(check_sljit_emit_fset32(compiler, freg, value));
2770
2771 u.value = value;
2772
2773 #if defined(__ARM_NEON) && __ARM_NEON
2774 if ((u.imm << (32 - 19)) == 0) {
2775 exp = (u.imm >> (23 + 2)) & 0x3f;
2776
2777 if (exp == 0x20 || exp == 0x1f) {
2778 ins = ((u.imm >> 24) & 0x80) | ((u.imm >> 19) & 0x7f);
2779 return push_inst(compiler, (VMOV_F32 ^ (1 << 6)) | ((ins & 0xf0) << 12) | VD(freg) | (ins & 0xf));
2780 }
2781 }
2782 #endif /* NEON */
2783
2784 FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm));
2785 return push_inst(compiler, VMOV | VN(freg) | RD(TMP_REG1));
2786 }
2787
sljit_emit_fset64(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f64 value)2788 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
2789 sljit_s32 freg, sljit_f64 value)
2790 {
2791 #if defined(__ARM_NEON) && __ARM_NEON
2792 sljit_u32 exp;
2793 sljit_ins ins;
2794 #endif /* NEON */
2795 union {
2796 sljit_u32 imm[2];
2797 sljit_f64 value;
2798 } u;
2799
2800 CHECK_ERROR();
2801 CHECK(check_sljit_emit_fset64(compiler, freg, value));
2802
2803 u.value = value;
2804
2805 #if defined(__ARM_NEON) && __ARM_NEON
2806 if (u.imm[0] == 0 && (u.imm[1] << (64 - 48)) == 0) {
2807 exp = (u.imm[1] >> ((52 - 32) + 2)) & 0x1ff;
2808
2809 if (exp == 0x100 || exp == 0xff) {
2810 ins = ((u.imm[1] >> (56 - 32)) & 0x80) | ((u.imm[1] >> (48 - 32)) & 0x7f);
2811 return push_inst(compiler, (VMOV_F32 ^ (1 << 6)) | (1 << 8) | ((ins & 0xf0) << 12) | VD(freg) | (ins & 0xf));
2812 }
2813 }
2814 #endif /* NEON */
2815
2816 FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm[0]));
2817 if (u.imm[0] == u.imm[1])
2818 return push_inst(compiler, VMOV2 | RN(TMP_REG1) | RD(TMP_REG1) | VM(freg));
2819
2820 FAIL_IF(load_immediate(compiler, TMP_REG2, u.imm[1]));
2821 return push_inst(compiler, VMOV2 | RN(TMP_REG2) | RD(TMP_REG1) | VM(freg));
2822 }
2823
sljit_emit_fcopy(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 freg,sljit_s32 reg)2824 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
2825 sljit_s32 freg, sljit_s32 reg)
2826 {
2827 sljit_s32 reg2;
2828 sljit_ins inst;
2829
2830 CHECK_ERROR();
2831 CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
2832
2833 if (reg & REG_PAIR_MASK) {
2834 reg2 = REG_PAIR_SECOND(reg);
2835 reg = REG_PAIR_FIRST(reg);
2836
2837 inst = VMOV2 | RN(reg) | RD(reg2) | VM(freg);
2838 } else {
2839 inst = VMOV | VN(freg) | RD(reg);
2840
2841 if (!(op & SLJIT_32))
2842 inst |= 1 << 7;
2843 }
2844
2845 if (GET_OPCODE(op) == SLJIT_COPY_FROM_F64)
2846 inst |= 1 << 20;
2847
2848 return push_inst(compiler, inst);
2849 }
2850
2851 /* --------------------------------------------------------------------- */
2852 /* Conditional instructions */
2853 /* --------------------------------------------------------------------- */
2854
get_cc(struct sljit_compiler * compiler,sljit_s32 type)2855 static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type)
2856 {
2857 switch (type) {
2858 case SLJIT_EQUAL:
2859 case SLJIT_ATOMIC_STORED:
2860 case SLJIT_F_EQUAL:
2861 case SLJIT_ORDERED_EQUAL:
2862 case SLJIT_UNORDERED_OR_EQUAL:
2863 return 0x00000000;
2864
2865 case SLJIT_NOT_EQUAL:
2866 case SLJIT_ATOMIC_NOT_STORED:
2867 case SLJIT_F_NOT_EQUAL:
2868 case SLJIT_UNORDERED_OR_NOT_EQUAL:
2869 case SLJIT_ORDERED_NOT_EQUAL:
2870 return 0x10000000;
2871
2872 case SLJIT_CARRY:
2873 if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD)
2874 return 0x20000000;
2875 /* fallthrough */
2876
2877 case SLJIT_LESS:
2878 return 0x30000000;
2879
2880 case SLJIT_NOT_CARRY:
2881 if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD)
2882 return 0x30000000;
2883 /* fallthrough */
2884
2885 case SLJIT_GREATER_EQUAL:
2886 return 0x20000000;
2887
2888 case SLJIT_GREATER:
2889 case SLJIT_UNORDERED_OR_GREATER:
2890 return 0x80000000;
2891
2892 case SLJIT_LESS_EQUAL:
2893 case SLJIT_F_LESS_EQUAL:
2894 case SLJIT_ORDERED_LESS_EQUAL:
2895 return 0x90000000;
2896
2897 case SLJIT_SIG_LESS:
2898 case SLJIT_UNORDERED_OR_LESS:
2899 return 0xb0000000;
2900
2901 case SLJIT_SIG_GREATER_EQUAL:
2902 case SLJIT_F_GREATER_EQUAL:
2903 case SLJIT_ORDERED_GREATER_EQUAL:
2904 return 0xa0000000;
2905
2906 case SLJIT_SIG_GREATER:
2907 case SLJIT_F_GREATER:
2908 case SLJIT_ORDERED_GREATER:
2909 return 0xc0000000;
2910
2911 case SLJIT_SIG_LESS_EQUAL:
2912 case SLJIT_UNORDERED_OR_LESS_EQUAL:
2913 return 0xd0000000;
2914
2915 case SLJIT_OVERFLOW:
2916 if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)))
2917 return 0x10000000;
2918 /* fallthrough */
2919
2920 case SLJIT_UNORDERED:
2921 return 0x60000000;
2922
2923 case SLJIT_NOT_OVERFLOW:
2924 if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)))
2925 return 0x00000000;
2926 /* fallthrough */
2927
2928 case SLJIT_ORDERED:
2929 return 0x70000000;
2930
2931 case SLJIT_F_LESS:
2932 case SLJIT_ORDERED_LESS:
2933 return 0x40000000;
2934
2935 case SLJIT_UNORDERED_OR_GREATER_EQUAL:
2936 return 0x50000000;
2937
2938 default:
2939 SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL_REG_ARG);
2940 return 0xe0000000;
2941 }
2942 }
2943
sljit_emit_label(struct sljit_compiler * compiler)2944 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2945 {
2946 struct sljit_label *label;
2947
2948 CHECK_ERROR_PTR();
2949 CHECK_PTR(check_sljit_emit_label(compiler));
2950
2951 if (compiler->last_label && compiler->last_label->size == compiler->size)
2952 return compiler->last_label;
2953
2954 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2955 PTR_FAIL_IF(!label);
2956 set_label(label, compiler);
2957 return label;
2958 }
2959
sljit_emit_jump(struct sljit_compiler * compiler,sljit_s32 type)2960 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
2961 {
2962 struct sljit_jump *jump;
2963
2964 CHECK_ERROR_PTR();
2965 CHECK_PTR(check_sljit_emit_jump(compiler, type));
2966
2967 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2968 PTR_FAIL_IF(!jump);
2969 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2970 type &= 0xff;
2971
2972 SLJIT_ASSERT(reg_map[TMP_REG1] != 14);
2973
2974 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
2975 if (type >= SLJIT_FAST_CALL)
2976 PTR_FAIL_IF(prepare_blx(compiler));
2977 PTR_FAIL_IF(push_inst_with_unique_literal(compiler, ((EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1,
2978 type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0)) & ~COND_MASK) | get_cc(compiler, type), 0));
2979
2980 if (jump->flags & SLJIT_REWRITABLE_JUMP) {
2981 jump->addr = compiler->size;
2982 compiler->patches++;
2983 }
2984
2985 if (type >= SLJIT_FAST_CALL) {
2986 jump->flags |= IS_BL;
2987 PTR_FAIL_IF(emit_blx(compiler));
2988 }
2989
2990 if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
2991 jump->addr = compiler->size;
2992 #else /* !SLJIT_CONFIG_ARM_V6 */
2993 if (type >= SLJIT_FAST_CALL)
2994 jump->flags |= IS_BL;
2995 PTR_FAIL_IF(emit_imm(compiler, TMP_REG1, 0));
2996 PTR_FAIL_IF(push_inst(compiler, (((type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)) & ~COND_MASK) | get_cc(compiler, type)));
2997 jump->addr = compiler->size;
2998 #endif /* SLJIT_CONFIG_ARM_V6 */
2999 return jump;
3000 }
3001
3002 #ifdef __SOFTFP__
3003
softfloat_call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types,sljit_s32 * src,sljit_u32 * extra_space)3004 static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src, sljit_u32 *extra_space)
3005 {
3006 sljit_u32 is_tail_call = *extra_space & SLJIT_CALL_RETURN;
3007 sljit_u32 offset = 0;
3008 sljit_u32 word_arg_offset = 0;
3009 sljit_u32 src_offset = 4 * sizeof(sljit_sw);
3010 sljit_u32 float_arg_count = 0;
3011 sljit_s32 types = 0;
3012 sljit_u8 offsets[4];
3013 sljit_u8 *offset_ptr = offsets;
3014
3015 if (src && FAST_IS_REG(*src))
3016 src_offset = (sljit_u32)reg_map[*src] * sizeof(sljit_sw);
3017
3018 arg_types >>= SLJIT_ARG_SHIFT;
3019
3020 while (arg_types) {
3021 types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK);
3022
3023 switch (arg_types & SLJIT_ARG_MASK) {
3024 case SLJIT_ARG_TYPE_F64:
3025 if (offset & 0x7)
3026 offset += sizeof(sljit_sw);
3027 *offset_ptr++ = (sljit_u8)offset;
3028 offset += sizeof(sljit_f64);
3029 float_arg_count++;
3030 break;
3031 case SLJIT_ARG_TYPE_F32:
3032 *offset_ptr++ = (sljit_u8)offset;
3033 offset += sizeof(sljit_f32);
3034 float_arg_count++;
3035 break;
3036 default:
3037 *offset_ptr++ = (sljit_u8)offset;
3038 offset += sizeof(sljit_sw);
3039 word_arg_offset += sizeof(sljit_sw);
3040 break;
3041 }
3042
3043 arg_types >>= SLJIT_ARG_SHIFT;
3044 }
3045
3046 if (offset > 4 * sizeof(sljit_sw) && (!is_tail_call || offset > compiler->args_size)) {
3047 /* Keep lr register on the stack. */
3048 if (is_tail_call)
3049 offset += sizeof(sljit_sw);
3050
3051 offset = ((offset - 4 * sizeof(sljit_sw)) + 0x7) & ~(sljit_u32)0x7;
3052
3053 *extra_space = offset;
3054
3055 if (is_tail_call)
3056 FAIL_IF(emit_stack_frame_release(compiler, (sljit_s32)offset));
3057 else
3058 FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | offset));
3059 } else {
3060 if (is_tail_call)
3061 FAIL_IF(emit_stack_frame_release(compiler, -1));
3062 *extra_space = 0;
3063 }
3064
3065 /* Process arguments in reversed direction. */
3066 while (types) {
3067 switch (types & SLJIT_ARG_MASK) {
3068 case SLJIT_ARG_TYPE_F64:
3069 float_arg_count--;
3070 offset = *(--offset_ptr);
3071
3072 SLJIT_ASSERT((offset & 0x7) == 0);
3073
3074 if (offset < 4 * sizeof(sljit_sw)) {
3075 if (src_offset == offset || src_offset == offset + sizeof(sljit_sw)) {
3076 FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2)));
3077 *src = TMP_REG1;
3078 }
3079 FAIL_IF(push_inst(compiler, VMOV2 | 0x100000 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count));
3080 } else
3081 FAIL_IF(push_inst(compiler, VSTR_F32 | 0x800100 | RN(SLJIT_SP)
3082 | (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2)));
3083 break;
3084 case SLJIT_ARG_TYPE_F32:
3085 float_arg_count--;
3086 offset = *(--offset_ptr);
3087
3088 if (offset < 4 * sizeof(sljit_sw)) {
3089 if (src_offset == offset) {
3090 FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2)));
3091 *src = TMP_REG1;
3092 }
3093 FAIL_IF(push_inst(compiler, VMOV | 0x100000 | (float_arg_count << 16) | (offset << 10)));
3094 } else
3095 FAIL_IF(push_inst(compiler, VSTR_F32 | 0x800000 | RN(SLJIT_SP)
3096 | (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2)));
3097 break;
3098 default:
3099 word_arg_offset -= sizeof(sljit_sw);
3100 offset = *(--offset_ptr);
3101
3102 SLJIT_ASSERT(offset >= word_arg_offset);
3103
3104 if (offset != word_arg_offset) {
3105 if (offset < 4 * sizeof(sljit_sw)) {
3106 if (src_offset == offset) {
3107 FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2)));
3108 *src = TMP_REG1;
3109 }
3110 else if (src_offset == word_arg_offset) {
3111 *src = (sljit_s32)(SLJIT_R0 + (offset >> 2));
3112 src_offset = offset;
3113 }
3114 FAIL_IF(push_inst(compiler, MOV | (offset << 10) | (word_arg_offset >> 2)));
3115 } else
3116 FAIL_IF(push_inst(compiler, STR | 0x800000 | RN(SLJIT_SP) | (word_arg_offset << 10) | (offset - 4 * sizeof(sljit_sw))));
3117 }
3118 break;
3119 }
3120
3121 types >>= SLJIT_ARG_SHIFT;
3122 }
3123
3124 return SLJIT_SUCCESS;
3125 }
3126
softfloat_post_call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types)3127 static sljit_s32 softfloat_post_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)
3128 {
3129 if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F64)
3130 FAIL_IF(push_inst(compiler, VMOV2 | (1 << 16) | (0 << 12) | 0));
3131 if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32)
3132 FAIL_IF(push_inst(compiler, VMOV | (0 << 16) | (0 << 12)));
3133
3134 return SLJIT_SUCCESS;
3135 }
3136
3137 #else /* !__SOFTFP__ */
3138
hardfloat_call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types)3139 static sljit_s32 hardfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)
3140 {
3141 sljit_u32 offset = SLJIT_FR0;
3142 sljit_u32 new_offset = SLJIT_FR0;
3143 sljit_u32 f32_offset = 0;
3144
3145 /* Remove return value. */
3146 arg_types >>= SLJIT_ARG_SHIFT;
3147
3148 while (arg_types) {
3149 switch (arg_types & SLJIT_ARG_MASK) {
3150 case SLJIT_ARG_TYPE_F64:
3151 if (offset != new_offset)
3152 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32,
3153 SLJIT_32, new_offset, offset, 0)));
3154
3155 new_offset++;
3156 offset++;
3157 break;
3158 case SLJIT_ARG_TYPE_F32:
3159 if (f32_offset != 0) {
3160 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32,
3161 0x400000, f32_offset, offset, 0)));
3162 f32_offset = 0;
3163 } else {
3164 if (offset != new_offset)
3165 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32,
3166 0, new_offset, offset, 0)));
3167 f32_offset = new_offset;
3168 new_offset++;
3169 }
3170 offset++;
3171 break;
3172 }
3173 arg_types >>= SLJIT_ARG_SHIFT;
3174 }
3175
3176 return SLJIT_SUCCESS;
3177 }
3178
3179 #endif /* __SOFTFP__ */
3180
sljit_emit_call(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types)3181 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
3182 sljit_s32 arg_types)
3183 {
3184 #ifdef __SOFTFP__
3185 struct sljit_jump *jump;
3186 sljit_u32 extra_space = (sljit_u32)type;
3187 #endif
3188
3189 CHECK_ERROR_PTR();
3190 CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
3191
3192 #ifdef __SOFTFP__
3193 if ((type & 0xff) != SLJIT_CALL_REG_ARG) {
3194 PTR_FAIL_IF(softfloat_call_with_args(compiler, arg_types, NULL, &extra_space));
3195 SLJIT_ASSERT((extra_space & 0x7) == 0);
3196
3197 if ((type & SLJIT_CALL_RETURN) && extra_space == 0)
3198 type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
3199
3200 SLJIT_SKIP_CHECKS(compiler);
3201 jump = sljit_emit_jump(compiler, type);
3202 PTR_FAIL_IF(jump == NULL);
3203
3204 if (extra_space > 0) {
3205 if (type & SLJIT_CALL_RETURN)
3206 PTR_FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1,
3207 TMP_REG2, SLJIT_SP, extra_space - sizeof(sljit_sw))));
3208
3209 PTR_FAIL_IF(push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | extra_space));
3210
3211 if (type & SLJIT_CALL_RETURN) {
3212 PTR_FAIL_IF(push_inst(compiler, BX | RM(TMP_REG2)));
3213 return jump;
3214 }
3215 }
3216
3217 SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN));
3218 PTR_FAIL_IF(softfloat_post_call_with_args(compiler, arg_types));
3219 return jump;
3220 }
3221 #endif /* __SOFTFP__ */
3222
3223 if (type & SLJIT_CALL_RETURN) {
3224 PTR_FAIL_IF(emit_stack_frame_release(compiler, -1));
3225 type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
3226 }
3227
3228 #ifndef __SOFTFP__
3229 if ((type & 0xff) != SLJIT_CALL_REG_ARG)
3230 PTR_FAIL_IF(hardfloat_call_with_args(compiler, arg_types));
3231 #endif /* !__SOFTFP__ */
3232
3233 SLJIT_SKIP_CHECKS(compiler);
3234 return sljit_emit_jump(compiler, type);
3235 }
3236
sljit_emit_ijump(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 src,sljit_sw srcw)3237 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
3238 {
3239 struct sljit_jump *jump;
3240
3241 CHECK_ERROR();
3242 CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
3243 ADJUST_LOCAL_OFFSET(src, srcw);
3244
3245 SLJIT_ASSERT(reg_map[TMP_REG1] != 14);
3246
3247 if (src != SLJIT_IMM) {
3248 if (FAST_IS_REG(src)) {
3249 SLJIT_ASSERT(reg_map[src] != 14);
3250 return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(src));
3251 }
3252
3253 SLJIT_ASSERT(src & SLJIT_MEM);
3254 FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
3255 return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1));
3256 }
3257
3258 /* These jumps are converted to jump/call instructions when possible. */
3259 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
3260 FAIL_IF(!jump);
3261 set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0));
3262 jump->u.target = (sljit_uw)srcw;
3263
3264 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
3265 if (type >= SLJIT_FAST_CALL)
3266 FAIL_IF(prepare_blx(compiler));
3267 FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0), 0));
3268 if (type >= SLJIT_FAST_CALL)
3269 FAIL_IF(emit_blx(compiler));
3270 #else /* !SLJIT_CONFIG_ARM_V6 */
3271 FAIL_IF(emit_imm(compiler, TMP_REG1, 0));
3272 FAIL_IF(push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)));
3273 #endif /* SLJIT_CONFIG_ARM_V6 */
3274 jump->addr = compiler->size;
3275 return SLJIT_SUCCESS;
3276 }
3277
sljit_emit_icall(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)3278 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
3279 sljit_s32 arg_types,
3280 sljit_s32 src, sljit_sw srcw)
3281 {
3282 #ifdef __SOFTFP__
3283 sljit_u32 extra_space = (sljit_u32)type;
3284 #endif
3285
3286 CHECK_ERROR();
3287 CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
3288
3289 if (src & SLJIT_MEM) {
3290 FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
3291 src = TMP_REG1;
3292 }
3293
3294 if ((type & SLJIT_CALL_RETURN) && (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options)))) {
3295 FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src)));
3296 src = TMP_REG1;
3297 }
3298
3299 #ifdef __SOFTFP__
3300 if ((type & 0xff) != SLJIT_CALL_REG_ARG) {
3301 FAIL_IF(softfloat_call_with_args(compiler, arg_types, &src, &extra_space));
3302 SLJIT_ASSERT((extra_space & 0x7) == 0);
3303
3304 if ((type & SLJIT_CALL_RETURN) && extra_space == 0)
3305 type = SLJIT_JUMP;
3306
3307 SLJIT_SKIP_CHECKS(compiler);
3308 FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
3309
3310 if (extra_space > 0) {
3311 if (type & SLJIT_CALL_RETURN)
3312 FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1,
3313 TMP_REG2, SLJIT_SP, extra_space - sizeof(sljit_sw))));
3314
3315 FAIL_IF(push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | extra_space));
3316
3317 if (type & SLJIT_CALL_RETURN)
3318 return push_inst(compiler, BX | RM(TMP_REG2));
3319 }
3320
3321 SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN));
3322 return softfloat_post_call_with_args(compiler, arg_types);
3323 }
3324 #endif /* __SOFTFP__ */
3325
3326 if (type & SLJIT_CALL_RETURN) {
3327 FAIL_IF(emit_stack_frame_release(compiler, -1));
3328 type = SLJIT_JUMP;
3329 }
3330
3331 #ifndef __SOFTFP__
3332 if ((type & 0xff) != SLJIT_CALL_REG_ARG)
3333 FAIL_IF(hardfloat_call_with_args(compiler, arg_types));
3334 #endif /* !__SOFTFP__ */
3335
3336 SLJIT_SKIP_CHECKS(compiler);
3337 return sljit_emit_ijump(compiler, type, src, srcw);
3338 }
3339
3340 #ifdef __SOFTFP__
3341
emit_fmov_before_return(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)3342 static SLJIT_INLINE sljit_s32 emit_fmov_before_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
3343 {
3344 if (compiler->options & SLJIT_ENTER_REG_ARG) {
3345 if (src == SLJIT_FR0)
3346 return SLJIT_SUCCESS;
3347
3348 SLJIT_SKIP_CHECKS(compiler);
3349 return sljit_emit_fop1(compiler, op, SLJIT_RETURN_FREG, 0, src, srcw);
3350 }
3351
3352 if (FAST_IS_REG(src)) {
3353 if (op & SLJIT_32)
3354 return push_inst(compiler, VMOV | (1 << 20) | RD(SLJIT_R0) | VN(src));
3355 return push_inst(compiler, VMOV2 | (1 << 20) | RD(SLJIT_R0) | RN(SLJIT_R1) | VM(src));
3356 }
3357
3358 SLJIT_SKIP_CHECKS(compiler);
3359
3360 if (op & SLJIT_32)
3361 return sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, src, srcw);
3362 return sljit_emit_mem(compiler, SLJIT_MOV, SLJIT_REG_PAIR(SLJIT_R0, SLJIT_R1), src, srcw);
3363 }
3364
3365 #endif /* __SOFTFP__ */
3366
sljit_emit_op_flags(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 type)3367 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
3368 sljit_s32 dst, sljit_sw dstw,
3369 sljit_s32 type)
3370 {
3371 sljit_s32 dst_reg, flags = GET_ALL_FLAGS(op);
3372 sljit_ins cc, ins;
3373
3374 CHECK_ERROR();
3375 CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
3376 ADJUST_LOCAL_OFFSET(dst, dstw);
3377
3378 op = GET_OPCODE(op);
3379 cc = get_cc(compiler, type);
3380 dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
3381
3382 if (op < SLJIT_ADD) {
3383 FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | SRC2_IMM | 0));
3384 FAIL_IF(push_inst(compiler, ((MOV | RD(dst_reg) | SRC2_IMM | 1) & ~COND_MASK) | cc));
3385 if (dst & SLJIT_MEM)
3386 return emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2);
3387 return SLJIT_SUCCESS;
3388 }
3389
3390 ins = (op == SLJIT_AND ? AND : (op == SLJIT_OR ? ORR : EOR));
3391
3392 if (dst & SLJIT_MEM)
3393 FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, dst, dstw, TMP_REG2));
3394
3395 FAIL_IF(push_inst(compiler, ((ins | RD(dst_reg) | RN(dst_reg) | SRC2_IMM | 1) & ~COND_MASK) | cc));
3396
3397 if (op == SLJIT_AND)
3398 FAIL_IF(push_inst(compiler, ((ins | RD(dst_reg) | RN(dst_reg) | SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000)));
3399
3400 if (dst & SLJIT_MEM)
3401 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2));
3402
3403 if (flags & SLJIT_SET_Z)
3404 return push_inst(compiler, MOV | SET_FLAGS | RD(TMP_REG2) | RM(dst_reg));
3405 return SLJIT_SUCCESS;
3406 }
3407
sljit_emit_select(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_reg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2_reg)3408 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
3409 sljit_s32 dst_reg,
3410 sljit_s32 src1, sljit_sw src1w,
3411 sljit_s32 src2_reg)
3412 {
3413 sljit_ins cc, tmp;
3414
3415 CHECK_ERROR();
3416 CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
3417
3418 ADJUST_LOCAL_OFFSET(src1, src1w);
3419
3420 if (src2_reg != dst_reg && src1 == dst_reg) {
3421 src1 = src2_reg;
3422 src1w = 0;
3423 src2_reg = dst_reg;
3424 type ^= 0x1;
3425 }
3426
3427 if (src1 & SLJIT_MEM) {
3428 FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, (src2_reg != dst_reg) ? dst_reg : TMP_REG1, src1, src1w, TMP_REG2));
3429
3430 if (src2_reg != dst_reg) {
3431 src1 = src2_reg;
3432 src1w = 0;
3433 type ^= 0x1;
3434 } else {
3435 src1 = TMP_REG1;
3436 src1w = 0;
3437 }
3438 } else if (dst_reg != src2_reg)
3439 FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | RM(src2_reg)));
3440
3441 cc = get_cc(compiler, type & ~SLJIT_32);
3442
3443 if (SLJIT_UNLIKELY(src1 == SLJIT_IMM)) {
3444 tmp = get_imm((sljit_uw)src1w);
3445 if (tmp)
3446 return push_inst(compiler, ((MOV | RD(dst_reg) | tmp) & ~COND_MASK) | cc);
3447
3448 tmp = get_imm(~(sljit_uw)src1w);
3449 if (tmp)
3450 return push_inst(compiler, ((MVN | RD(dst_reg) | tmp) & ~COND_MASK) | cc);
3451
3452 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
3453 tmp = (sljit_ins)src1w;
3454 FAIL_IF(push_inst(compiler, (MOVW & ~COND_MASK) | cc | RD(dst_reg) | ((tmp << 4) & 0xf0000) | (tmp & 0xfff)));
3455 if (tmp <= 0xffff)
3456 return SLJIT_SUCCESS;
3457 return push_inst(compiler, (MOVT & ~COND_MASK) | cc | RD(dst_reg) | ((tmp >> 12) & 0xf0000) | ((tmp >> 16) & 0xfff));
3458 #else /* !SLJIT_CONFIG_ARM_V7 */
3459 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w));
3460 src1 = TMP_REG1;
3461 #endif /* SLJIT_CONFIG_ARM_V7 */
3462 }
3463
3464 return push_inst(compiler, ((MOV | RD(dst_reg) | RM(src1)) & ~COND_MASK) | cc);
3465 }
3466
sljit_emit_fselect(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_freg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2_freg)3467 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
3468 sljit_s32 dst_freg,
3469 sljit_s32 src1, sljit_sw src1w,
3470 sljit_s32 src2_freg)
3471 {
3472 sljit_ins cc;
3473
3474 CHECK_ERROR();
3475 CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
3476
3477 ADJUST_LOCAL_OFFSET(src1, src1w);
3478
3479 type ^= SLJIT_32;
3480
3481 if (dst_freg != src2_freg) {
3482 if (dst_freg == src1) {
3483 src1 = src2_freg;
3484 src1w = 0;
3485 type ^= 0x1;
3486 } else
3487 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, (type & SLJIT_32), dst_freg, src2_freg, 0)));
3488 }
3489
3490 if (src1 & SLJIT_MEM) {
3491 FAIL_IF(emit_fop_mem(compiler, (type & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w));
3492 src1 = TMP_FREG1;
3493 }
3494
3495 cc = get_cc(compiler, type & ~SLJIT_32);
3496 return push_inst(compiler, EMIT_FPU_OPERATION((VMOV_F32 & ~COND_MASK) | cc, (type & SLJIT_32), dst_freg, src1, 0));
3497 }
3498
3499 #undef EMIT_FPU_OPERATION
3500
update_mem_addr(struct sljit_compiler * compiler,sljit_s32 * mem,sljit_sw * memw,sljit_s32 max_offset)3501 static sljit_s32 update_mem_addr(struct sljit_compiler *compiler, sljit_s32 *mem, sljit_sw *memw, sljit_s32 max_offset)
3502 {
3503 sljit_s32 arg = *mem;
3504 sljit_sw argw = *memw;
3505 sljit_uw imm, tmp;
3506 sljit_sw mask = 0xfff;
3507 sljit_sw sign = 0x1000;
3508
3509 SLJIT_ASSERT(max_offset >= 0xf00);
3510
3511 *mem = TMP_REG1;
3512
3513 if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
3514 *memw = 0;
3515 return push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | ((sljit_ins)(argw & 0x3) << 7));
3516 }
3517
3518 arg &= REG_MASK;
3519
3520 if (arg) {
3521 if (argw <= max_offset && argw >= -mask) {
3522 *mem = arg;
3523 return SLJIT_SUCCESS;
3524 }
3525
3526 if (argw >= 0) {
3527 tmp = (sljit_uw)(argw & (sign | mask));
3528 tmp = (sljit_uw)((argw + ((tmp <= (sljit_uw)max_offset || tmp == (sljit_uw)sign) ? 0 : sign)) & ~mask);
3529 imm = get_imm(tmp);
3530
3531 if (imm) {
3532 *memw = argw - (sljit_sw)tmp;
3533 SLJIT_ASSERT(*memw >= -mask && *memw <= max_offset);
3534
3535 return push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg) | imm);
3536 }
3537 } else {
3538 tmp = (sljit_uw)(-argw & (sign | mask));
3539 tmp = (sljit_uw)((-argw + ((tmp <= (sljit_uw)((sign << 1) - max_offset - 1)) ? 0 : sign)) & ~mask);
3540 imm = get_imm(tmp);
3541
3542 if (imm) {
3543 *memw = argw + (sljit_sw)tmp;
3544 SLJIT_ASSERT(*memw >= -mask && *memw <= max_offset);
3545
3546 return push_inst(compiler, SUB | RD(TMP_REG1) | RN(arg) | imm);
3547 }
3548 }
3549 }
3550
3551 tmp = (sljit_uw)(argw & (sign | mask));
3552 tmp = (sljit_uw)((argw + ((tmp <= (sljit_uw)max_offset || tmp == (sljit_uw)sign) ? 0 : sign)) & ~mask);
3553 *memw = argw - (sljit_sw)tmp;
3554
3555 FAIL_IF(load_immediate(compiler, TMP_REG1, tmp));
3556
3557 if (arg == 0)
3558 return SLJIT_SUCCESS;
3559
3560 return push_inst(compiler, ADD | RD(TMP_REG1) | RN(TMP_REG1) | RM(arg));
3561 }
3562
sljit_emit_mem(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)3563 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
3564 sljit_s32 reg,
3565 sljit_s32 mem, sljit_sw memw)
3566 {
3567 sljit_s32 flags;
3568
3569 CHECK_ERROR();
3570 CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
3571
3572 if (!(reg & REG_PAIR_MASK))
3573 return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
3574
3575 ADJUST_LOCAL_OFFSET(mem, memw);
3576
3577 FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4));
3578
3579 flags = WORD_SIZE;
3580
3581 if (!(type & SLJIT_MEM_STORE)) {
3582 if (REG_PAIR_FIRST(reg) == (mem & REG_MASK)) {
3583 FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), memw + SSIZE_OF(sw), TMP_REG1));
3584 return emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw, TMP_REG1);
3585 }
3586
3587 flags = WORD_SIZE | LOAD_DATA;
3588 }
3589
3590 FAIL_IF(emit_op_mem(compiler, flags, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw, TMP_REG1));
3591 return emit_op_mem(compiler, flags, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), memw + SSIZE_OF(sw), TMP_REG1);
3592 }
3593
sljit_emit_mem_update(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)3594 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *compiler, sljit_s32 type,
3595 sljit_s32 reg,
3596 sljit_s32 mem, sljit_sw memw)
3597 {
3598 sljit_s32 flags;
3599 sljit_ins is_type1_transfer, inst;
3600
3601 CHECK_ERROR();
3602 CHECK(check_sljit_emit_mem_update(compiler, type, reg, mem, memw));
3603
3604 is_type1_transfer = 1;
3605
3606 switch (type & 0xff) {
3607 case SLJIT_MOV:
3608 case SLJIT_MOV_U32:
3609 case SLJIT_MOV_S32:
3610 case SLJIT_MOV32:
3611 case SLJIT_MOV_P:
3612 flags = WORD_SIZE;
3613 break;
3614 case SLJIT_MOV_U8:
3615 flags = BYTE_SIZE;
3616 break;
3617 case SLJIT_MOV_S8:
3618 if (!(type & SLJIT_MEM_STORE))
3619 is_type1_transfer = 0;
3620 flags = BYTE_SIZE | SIGNED;
3621 break;
3622 case SLJIT_MOV_U16:
3623 is_type1_transfer = 0;
3624 flags = HALF_SIZE;
3625 break;
3626 case SLJIT_MOV_S16:
3627 is_type1_transfer = 0;
3628 flags = HALF_SIZE | SIGNED;
3629 break;
3630 default:
3631 SLJIT_UNREACHABLE();
3632 flags = WORD_SIZE;
3633 break;
3634 }
3635
3636 if (!(type & SLJIT_MEM_STORE))
3637 flags |= LOAD_DATA;
3638
3639 SLJIT_ASSERT(is_type1_transfer == !!IS_TYPE1_TRANSFER(flags));
3640
3641 if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
3642 if (!is_type1_transfer && memw != 0)
3643 return SLJIT_ERR_UNSUPPORTED;
3644 } else {
3645 if (is_type1_transfer) {
3646 if (memw > 4095 || memw < -4095)
3647 return SLJIT_ERR_UNSUPPORTED;
3648 } else if (memw > 255 || memw < -255)
3649 return SLJIT_ERR_UNSUPPORTED;
3650 }
3651
3652 if (type & SLJIT_MEM_SUPP)
3653 return SLJIT_SUCCESS;
3654
3655 if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
3656 memw &= 0x3;
3657
3658 inst = EMIT_DATA_TRANSFER(flags, 1, reg, mem & REG_MASK, RM(OFFS_REG(mem)) | ((sljit_ins)memw << 7));
3659
3660 if (is_type1_transfer)
3661 inst |= (1 << 25);
3662
3663 if (type & SLJIT_MEM_POST)
3664 inst ^= (1 << 24);
3665 else
3666 inst |= (1 << 21);
3667
3668 return push_inst(compiler, inst);
3669 }
3670
3671 inst = EMIT_DATA_TRANSFER(flags, 0, reg, mem & REG_MASK, 0);
3672
3673 if (type & SLJIT_MEM_POST)
3674 inst ^= (1 << 24);
3675 else
3676 inst |= (1 << 21);
3677
3678 if (is_type1_transfer) {
3679 if (memw >= 0)
3680 inst |= (1 << 23);
3681 else
3682 memw = -memw;
3683
3684 return push_inst(compiler, inst | (sljit_ins)memw);
3685 }
3686
3687 if (memw >= 0)
3688 inst |= (1 << 23);
3689 else
3690 memw = -memw;
3691
3692 return push_inst(compiler, inst | TYPE2_TRANSFER_IMM((sljit_ins)memw));
3693 }
3694
sljit_emit_fmem(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 mem,sljit_sw memw)3695 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type,
3696 sljit_s32 freg,
3697 sljit_s32 mem, sljit_sw memw)
3698 {
3699 CHECK_ERROR();
3700 CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw));
3701
3702 if (type & SLJIT_MEM_ALIGNED_32)
3703 return emit_fop_mem(compiler, ((type ^ SLJIT_32) & SLJIT_32) | ((type & SLJIT_MEM_STORE) ? 0 : FPU_LOAD), freg, mem, memw);
3704
3705 if (type & SLJIT_MEM_STORE) {
3706 FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(freg) | RD(TMP_REG2)));
3707
3708 if (type & SLJIT_32)
3709 return emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1);
3710
3711 FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4));
3712 mem |= SLJIT_MEM;
3713
3714 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1));
3715 FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(freg) | 0x80 | RD(TMP_REG2)));
3716 return emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw + 4, TMP_REG1);
3717 }
3718
3719 if (type & SLJIT_32) {
3720 FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, mem, memw, TMP_REG1));
3721 return push_inst(compiler, VMOV | VN(freg) | RD(TMP_REG2));
3722 }
3723
3724 FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4));
3725 mem |= SLJIT_MEM;
3726
3727 FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, mem, memw, TMP_REG1));
3728 FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, mem, memw + 4, TMP_REG1));
3729 return push_inst(compiler, VMOV2 | VM(freg) | RD(TMP_REG2) | RN(TMP_REG1));
3730 }
3731
sljit_emit_simd_mem_offset(struct sljit_compiler * compiler,sljit_s32 * mem_ptr,sljit_sw memw)3732 static sljit_s32 sljit_emit_simd_mem_offset(struct sljit_compiler *compiler, sljit_s32 *mem_ptr, sljit_sw memw)
3733 {
3734 sljit_s32 mem = *mem_ptr;
3735 sljit_uw imm;
3736
3737 if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
3738 *mem_ptr = TMP_REG1;
3739 return push_inst(compiler, ADD | RD(TMP_REG1) | RN(mem & REG_MASK) | RM(OFFS_REG(mem)) | ((sljit_ins)(memw & 0x3) << 7));
3740 }
3741
3742 if (SLJIT_UNLIKELY(!(mem & REG_MASK))) {
3743 *mem_ptr = TMP_REG1;
3744 return load_immediate(compiler, TMP_REG1, (sljit_uw)memw);
3745 }
3746
3747 mem &= REG_MASK;
3748
3749 if (memw == 0) {
3750 *mem_ptr = mem;
3751 return SLJIT_SUCCESS;
3752 }
3753
3754 *mem_ptr = TMP_REG1;
3755 imm = get_imm((sljit_uw)(memw < 0 ? -memw : memw));
3756
3757 if (imm != 0)
3758 return push_inst(compiler, ((memw < 0) ? SUB : ADD) | RD(TMP_REG1) | RN(mem) | imm);
3759
3760 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3761 return push_inst(compiler, ADD | RD(TMP_REG1) | RN(TMP_REG1) | RM(mem));
3762 }
3763
simd_get_quad_reg_index(sljit_s32 freg)3764 static SLJIT_INLINE sljit_s32 simd_get_quad_reg_index(sljit_s32 freg)
3765 {
3766 freg += freg & 0x1;
3767
3768 SLJIT_ASSERT((freg_map[freg] & 0x1) == (freg <= SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS));
3769
3770 if (freg <= SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS)
3771 freg--;
3772
3773 return freg;
3774 }
3775
3776 #define SLJIT_QUAD_OTHER_HALF(freg) ((((freg) & 0x1) << 1) - 1)
3777
sljit_emit_simd_mov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 srcdst,sljit_sw srcdstw)3778 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
3779 sljit_s32 freg,
3780 sljit_s32 srcdst, sljit_sw srcdstw)
3781 {
3782 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3783 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3784 sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type);
3785 sljit_ins ins;
3786
3787 CHECK_ERROR();
3788 CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw));
3789
3790 ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3791
3792 if (reg_size != 3 && reg_size != 4)
3793 return SLJIT_ERR_UNSUPPORTED;
3794
3795 if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3796 return SLJIT_ERR_UNSUPPORTED;
3797
3798 if (type & SLJIT_SIMD_TEST)
3799 return SLJIT_SUCCESS;
3800
3801 if (reg_size == 4)
3802 freg = simd_get_quad_reg_index(freg);
3803
3804 if (!(srcdst & SLJIT_MEM)) {
3805 if (reg_size == 4)
3806 srcdst = simd_get_quad_reg_index(srcdst);
3807
3808 if (type & SLJIT_SIMD_STORE)
3809 ins = VD(srcdst) | VN(freg) | VM(freg);
3810 else
3811 ins = VD(freg) | VN(srcdst) | VM(srcdst);
3812
3813 if (reg_size == 4)
3814 ins |= (sljit_ins)1 << 6;
3815
3816 return push_inst(compiler, VORR | ins);
3817 }
3818
3819 FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
3820
3821 if (elem_size > 3)
3822 elem_size = 3;
3823
3824 ins = ((type & SLJIT_SIMD_STORE) ? VST1 : VLD1) | VD(freg)
3825 | (sljit_ins)((reg_size == 3) ? (0x7 << 8) : (0xa << 8));
3826
3827 SLJIT_ASSERT(reg_size >= alignment);
3828
3829 if (alignment == 3)
3830 ins |= 0x10;
3831 else if (alignment >= 3)
3832 ins |= 0x20;
3833
3834 return push_inst(compiler, ins | RN(srcdst) | ((sljit_ins)elem_size) << 6 | 0xf);
3835 }
3836
simd_get_imm(sljit_s32 elem_size,sljit_uw value)3837 static sljit_ins simd_get_imm(sljit_s32 elem_size, sljit_uw value)
3838 {
3839 sljit_ins result;
3840
3841 if (elem_size > 1 && (sljit_u16)value == (value >> 16)) {
3842 elem_size = 1;
3843 value = (sljit_u16)value;
3844 }
3845
3846 if (elem_size == 1 && (sljit_u8)value == (value >> 8)) {
3847 elem_size = 0;
3848 value = (sljit_u8)value;
3849 }
3850
3851 switch (elem_size) {
3852 case 0:
3853 SLJIT_ASSERT(value <= 0xff);
3854 result = 0xe00;
3855 break;
3856 case 1:
3857 SLJIT_ASSERT(value <= 0xffff);
3858 result = 0;
3859
3860 while (1) {
3861 if (value <= 0xff) {
3862 result |= 0x800;
3863 break;
3864 }
3865
3866 if ((value & 0xff) == 0) {
3867 value >>= 8;
3868 result |= 0xa00;
3869 break;
3870 }
3871
3872 if (result != 0)
3873 return ~(sljit_ins)0;
3874
3875 value ^= (sljit_uw)0xffff;
3876 result = (1 << 5);
3877 }
3878 break;
3879 default:
3880 SLJIT_ASSERT(value <= 0xffffffff);
3881 result = 0;
3882
3883 while (1) {
3884 if (value <= 0xff) {
3885 result |= 0x000;
3886 break;
3887 }
3888
3889 if ((value & ~(sljit_uw)0xff00) == 0) {
3890 value >>= 8;
3891 result |= 0x200;
3892 break;
3893 }
3894
3895 if ((value & ~(sljit_uw)0xff0000) == 0) {
3896 value >>= 16;
3897 result |= 0x400;
3898 break;
3899 }
3900
3901 if ((value & ~(sljit_uw)0xff000000) == 0) {
3902 value >>= 24;
3903 result |= 0x600;
3904 break;
3905 }
3906
3907 if ((value & (sljit_uw)0xff) == 0xff && (value >> 16) == 0) {
3908 value >>= 8;
3909 result |= 0xc00;
3910 break;
3911 }
3912
3913 if ((value & (sljit_uw)0xffff) == 0xffff && (value >> 24) == 0) {
3914 value >>= 16;
3915 result |= 0xd00;
3916 break;
3917 }
3918
3919 if (result != 0)
3920 return ~(sljit_ins)0;
3921
3922 value = ~value;
3923 result = (1 << 5);
3924 }
3925 break;
3926 }
3927
3928 return ((sljit_ins)value & 0xf) | (((sljit_ins)value & 0x70) << 12) | (((sljit_ins)value & 0x80) << 17) | result;
3929 }
3930
sljit_emit_simd_replicate(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_sw srcw)3931 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
3932 sljit_s32 freg,
3933 sljit_s32 src, sljit_sw srcw)
3934 {
3935 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3936 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3937 sljit_ins ins, imm;
3938
3939 CHECK_ERROR();
3940 CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw));
3941
3942 ADJUST_LOCAL_OFFSET(src, srcw);
3943
3944 if (reg_size != 3 && reg_size != 4)
3945 return SLJIT_ERR_UNSUPPORTED;
3946
3947 if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2))
3948 return SLJIT_ERR_UNSUPPORTED;
3949
3950 if (type & SLJIT_SIMD_TEST)
3951 return SLJIT_SUCCESS;
3952
3953 if (reg_size == 4)
3954 freg = simd_get_quad_reg_index(freg);
3955
3956 if (src == SLJIT_IMM && srcw == 0)
3957 return push_inst(compiler, VMOV_i | ((reg_size == 4) ? (1 << 6) : 0) | VD(freg));
3958
3959 if (SLJIT_UNLIKELY(elem_size == 3)) {
3960 SLJIT_ASSERT(type & SLJIT_SIMD_FLOAT);
3961
3962 if (src & SLJIT_MEM) {
3963 FAIL_IF(emit_fop_mem(compiler, FPU_LOAD | SLJIT_32, freg, src, srcw));
3964 src = freg;
3965 } else if (freg != src)
3966 FAIL_IF(push_inst(compiler, VORR | VD(freg) | VN(src) | VM(src)));
3967
3968 freg += SLJIT_QUAD_OTHER_HALF(freg);
3969
3970 if (freg != src)
3971 return push_inst(compiler, VORR | VD(freg) | VN(src) | VM(src));
3972 return SLJIT_SUCCESS;
3973 }
3974
3975 if (src & SLJIT_MEM) {
3976 FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
3977
3978 ins = (sljit_ins)(elem_size << 6);
3979
3980 if (reg_size == 4)
3981 ins |= (sljit_ins)1 << 5;
3982
3983 return push_inst(compiler, VLD1_r | ins | VD(freg) | RN(src) | 0xf);
3984 }
3985
3986 if (type & SLJIT_SIMD_FLOAT) {
3987 SLJIT_ASSERT(elem_size == 2);
3988 ins = ((sljit_ins)freg_ebit_map[src] << (16 + 2 + 1)) | ((sljit_ins)1 << (16 + 2));
3989
3990 if (reg_size == 4)
3991 ins |= (sljit_ins)1 << 6;
3992
3993 return push_inst(compiler, VDUP_s | ins | VD(freg) | (sljit_ins)freg_map[src]);
3994 }
3995
3996 if (src == SLJIT_IMM) {
3997 if (elem_size < 2)
3998 srcw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1;
3999
4000 imm = simd_get_imm(elem_size, (sljit_uw)srcw);
4001
4002 if (imm != ~(sljit_ins)0) {
4003 if (reg_size == 4)
4004 imm |= (sljit_ins)1 << 6;
4005
4006 return push_inst(compiler, VMOV_i | imm | VD(freg));
4007 }
4008
4009 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw));
4010 src = TMP_REG1;
4011 }
4012
4013 switch (elem_size) {
4014 case 0:
4015 ins = 1 << 22;
4016 break;
4017 case 1:
4018 ins = 1 << 5;
4019 break;
4020 default:
4021 ins = 0;
4022 break;
4023 }
4024
4025 if (reg_size == 4)
4026 ins |= (sljit_ins)1 << 21;
4027
4028 return push_inst(compiler, VDUP | ins | VN(freg) | RD(src));
4029 }
4030
sljit_emit_simd_lane_mov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 lane_index,sljit_s32 srcdst,sljit_sw srcdstw)4031 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
4032 sljit_s32 freg, sljit_s32 lane_index,
4033 sljit_s32 srcdst, sljit_sw srcdstw)
4034 {
4035 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4036 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4037 sljit_ins ins;
4038
4039 CHECK_ERROR();
4040 CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw));
4041
4042 ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
4043
4044 if (reg_size != 3 && reg_size != 4)
4045 return SLJIT_ERR_UNSUPPORTED;
4046
4047 if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2))
4048 return SLJIT_ERR_UNSUPPORTED;
4049
4050 if (type & SLJIT_SIMD_TEST)
4051 return SLJIT_SUCCESS;
4052
4053 if (reg_size == 4)
4054 freg = simd_get_quad_reg_index(freg);
4055
4056 if (type & SLJIT_SIMD_LANE_ZERO) {
4057 ins = (reg_size == 3) ? 0 : ((sljit_ins)1 << 6);
4058
4059 if (type & SLJIT_SIMD_FLOAT) {
4060 if (elem_size == 3 && !(srcdst & SLJIT_MEM)) {
4061 if (lane_index == 1)
4062 freg += SLJIT_QUAD_OTHER_HALF(freg);
4063
4064 if (srcdst != freg)
4065 FAIL_IF(push_inst(compiler, VORR | VD(freg) | VN(srcdst) | VM(srcdst)));
4066
4067 freg += SLJIT_QUAD_OTHER_HALF(freg);
4068 return push_inst(compiler, VMOV_i | VD(freg));
4069 }
4070
4071 if (srcdst == freg || (elem_size == 3 && srcdst == (freg + SLJIT_QUAD_OTHER_HALF(freg)))) {
4072 FAIL_IF(push_inst(compiler, VORR | ins | VD(TMP_FREG2) | VN(freg) | VM(freg)));
4073 srcdst = TMP_FREG2;
4074 srcdstw = 0;
4075 }
4076 }
4077
4078 FAIL_IF(push_inst(compiler, VMOV_i | ins | VD(freg)));
4079 }
4080
4081 if (reg_size == 4 && lane_index >= (0x8 >> elem_size)) {
4082 lane_index -= (0x8 >> elem_size);
4083 freg += SLJIT_QUAD_OTHER_HALF(freg);
4084 }
4085
4086 if (srcdst & SLJIT_MEM) {
4087 if (elem_size == 3)
4088 return emit_fop_mem(compiler, ((type & SLJIT_SIMD_STORE) ? 0 : FPU_LOAD) | SLJIT_32, freg, srcdst, srcdstw);
4089
4090 FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
4091
4092 lane_index = lane_index << elem_size;
4093 ins = (sljit_ins)((elem_size << 10) | (lane_index << 5));
4094 return push_inst(compiler, ((type & SLJIT_SIMD_STORE) ? VST1_s : VLD1_s) | ins | VD(freg) | RN(srcdst) | 0xf);
4095 }
4096
4097 if (type & SLJIT_SIMD_FLOAT) {
4098 if (elem_size == 3) {
4099 if (type & SLJIT_SIMD_STORE)
4100 return push_inst(compiler, VORR | VD(srcdst) | VN(freg) | VM(freg));
4101 return push_inst(compiler, VMOV_F32 | SLJIT_32 | VD(freg) | VM(srcdst));
4102 }
4103
4104 if (type & SLJIT_SIMD_STORE) {
4105 if (freg_ebit_map[freg] == 0) {
4106 if (lane_index == 1)
4107 freg = SLJIT_F64_SECOND(freg);
4108
4109 return push_inst(compiler, VMOV_F32 | VD(srcdst) | VM(freg));
4110 }
4111
4112 FAIL_IF(push_inst(compiler, VMOV_s | (1 << 20) | ((sljit_ins)lane_index << 21) | VN(freg) | RD(TMP_REG1)));
4113 return push_inst(compiler, VMOV | VN(srcdst) | RD(TMP_REG1));
4114 }
4115
4116 FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(srcdst) | RD(TMP_REG1)));
4117 return push_inst(compiler, VMOV_s | ((sljit_ins)lane_index << 21) | VN(freg) | RD(TMP_REG1));
4118 }
4119
4120 if (srcdst == SLJIT_IMM) {
4121 if (elem_size < 2)
4122 srcdstw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1;
4123
4124 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcdstw));
4125 srcdst = TMP_REG1;
4126 }
4127
4128 if (elem_size == 0)
4129 ins = 0x400000;
4130 else if (elem_size == 1)
4131 ins = 0x20;
4132 else
4133 ins = 0;
4134
4135 lane_index = lane_index << elem_size;
4136 ins |= (sljit_ins)(((lane_index & 0x4) << 19) | ((lane_index & 0x3) << 5));
4137
4138 if (type & SLJIT_SIMD_STORE) {
4139 ins |= (1 << 20);
4140
4141 if (elem_size < 2 && !(type & SLJIT_SIMD_LANE_SIGNED))
4142 ins |= (1 << 23);
4143 }
4144
4145 return push_inst(compiler, VMOV_s | ins | VN(freg) | RD(srcdst));
4146 }
4147
sljit_emit_simd_lane_replicate(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_s32 src_lane_index)4148 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
4149 sljit_s32 freg,
4150 sljit_s32 src, sljit_s32 src_lane_index)
4151 {
4152 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4153 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4154 sljit_ins ins;
4155
4156 CHECK_ERROR();
4157 CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index));
4158
4159 if (reg_size != 3 && reg_size != 4)
4160 return SLJIT_ERR_UNSUPPORTED;
4161
4162 if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4163 return SLJIT_ERR_UNSUPPORTED;
4164
4165 if (type & SLJIT_SIMD_TEST)
4166 return SLJIT_SUCCESS;
4167
4168 if (reg_size == 4) {
4169 freg = simd_get_quad_reg_index(freg);
4170 src = simd_get_quad_reg_index(src);
4171
4172 if (src_lane_index >= (0x8 >> elem_size)) {
4173 src_lane_index -= (0x8 >> elem_size);
4174 src += SLJIT_QUAD_OTHER_HALF(src);
4175 }
4176 }
4177
4178 if (elem_size == 3) {
4179 if (freg != src)
4180 FAIL_IF(push_inst(compiler, VORR | VD(freg) | VN(src) | VM(src)));
4181
4182 freg += SLJIT_QUAD_OTHER_HALF(freg);
4183
4184 if (freg != src)
4185 return push_inst(compiler, VORR | VD(freg) | VN(src) | VM(src));
4186 return SLJIT_SUCCESS;
4187 }
4188
4189 ins = ((((sljit_ins)src_lane_index << 1) | 1) << (16 + elem_size));
4190
4191 if (reg_size == 4)
4192 ins |= (sljit_ins)1 << 6;
4193
4194 return push_inst(compiler, VDUP_s | ins | VD(freg) | VM(src));
4195 }
4196
sljit_emit_simd_extend(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_sw srcw)4197 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
4198 sljit_s32 freg,
4199 sljit_s32 src, sljit_sw srcw)
4200 {
4201 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4202 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4203 sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
4204 sljit_s32 dst_reg;
4205
4206 CHECK_ERROR();
4207 CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw));
4208
4209 ADJUST_LOCAL_OFFSET(src, srcw);
4210
4211 if (reg_size != 3 && reg_size != 4)
4212 return SLJIT_ERR_UNSUPPORTED;
4213
4214 if ((type & SLJIT_SIMD_FLOAT) && (elem_size != 2 || elem2_size != 3))
4215 return SLJIT_ERR_UNSUPPORTED;
4216
4217 if (type & SLJIT_SIMD_TEST)
4218 return SLJIT_SUCCESS;
4219
4220 if (reg_size == 4)
4221 freg = simd_get_quad_reg_index(freg);
4222
4223 if (src & SLJIT_MEM) {
4224 FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
4225 if (reg_size == 4 && elem2_size - elem_size == 1)
4226 FAIL_IF(push_inst(compiler, VLD1 | (0x7 << 8) | VD(freg) | RN(src) | 0xf));
4227 else
4228 FAIL_IF(push_inst(compiler, VLD1_s | (sljit_ins)((reg_size - elem2_size + elem_size) << 10) | VD(freg) | RN(src) | 0xf));
4229 src = freg;
4230 } else if (reg_size == 4)
4231 src = simd_get_quad_reg_index(src);
4232
4233 if (!(type & SLJIT_SIMD_FLOAT)) {
4234 dst_reg = (reg_size == 4) ? freg : TMP_FREG2;
4235
4236 do {
4237 FAIL_IF(push_inst(compiler, VSHLL | ((type & SLJIT_SIMD_EXTEND_SIGNED) ? 0 : (1 << 24))
4238 | ((sljit_ins)1 << (19 + elem_size)) | VD(dst_reg) | VM(src)));
4239 src = dst_reg;
4240 } while (++elem_size < elem2_size);
4241
4242 if (dst_reg == TMP_FREG2)
4243 return push_inst(compiler, VORR | VD(freg) | VN(TMP_FREG2) | VM(TMP_FREG2));
4244 return SLJIT_SUCCESS;
4245 }
4246
4247 /* No SIMD variant, must use VFP instead. */
4248 SLJIT_ASSERT(reg_size == 4);
4249
4250 if (freg == src) {
4251 freg += SLJIT_QUAD_OTHER_HALF(freg);
4252 FAIL_IF(push_inst(compiler, VCVT_F64_F32 | VD(freg) | VM(src) | 0x20));
4253 freg += SLJIT_QUAD_OTHER_HALF(freg);
4254 return push_inst(compiler, VCVT_F64_F32 | VD(freg) | VM(src));
4255 }
4256
4257 FAIL_IF(push_inst(compiler, VCVT_F64_F32 | VD(freg) | VM(src)));
4258 freg += SLJIT_QUAD_OTHER_HALF(freg);
4259 return push_inst(compiler, VCVT_F64_F32 | VD(freg) | VM(src) | 0x20);
4260 }
4261
sljit_emit_simd_sign(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 dst,sljit_sw dstw)4262 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
4263 sljit_s32 freg,
4264 sljit_s32 dst, sljit_sw dstw)
4265 {
4266 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4267 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4268 sljit_ins ins, imms;
4269 sljit_s32 dst_r;
4270
4271 CHECK_ERROR();
4272 CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw));
4273
4274 ADJUST_LOCAL_OFFSET(dst, dstw);
4275
4276 if (reg_size != 3 && reg_size != 4)
4277 return SLJIT_ERR_UNSUPPORTED;
4278
4279 if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4280 return SLJIT_ERR_UNSUPPORTED;
4281
4282 if (type & SLJIT_SIMD_TEST)
4283 return SLJIT_SUCCESS;
4284
4285 switch (elem_size) {
4286 case 0:
4287 imms = 0x243219;
4288 ins = VSHR | (1 << 24) | (0x9 << 16);
4289 break;
4290 case 1:
4291 imms = (reg_size == 4) ? 0x243219 : 0x2231;
4292 ins = VSHR | (1 << 24) | (0x11 << 16);
4293 break;
4294 case 2:
4295 imms = (reg_size == 4) ? 0x2231 : 0x21;
4296 ins = VSHR | (1 << 24) | (0x21 << 16);
4297 break;
4298 default:
4299 imms = 0x21;
4300 ins = VSHR | (1 << 24) | (0x1 << 16) | (1 << 7);
4301 break;
4302 }
4303
4304 if (reg_size == 4) {
4305 freg = simd_get_quad_reg_index(freg);
4306 ins |= (sljit_ins)1 << 6;
4307 }
4308
4309 SLJIT_ASSERT((freg_map[TMP_FREG2] & 0x1) == 0);
4310 FAIL_IF(push_inst(compiler, ins | VD(TMP_FREG2) | VM(freg)));
4311
4312 if (reg_size == 4 && elem_size > 0)
4313 FAIL_IF(push_inst(compiler, VMOVN | ((sljit_ins)(elem_size - 1) << 18) | VD(TMP_FREG2) | VM(TMP_FREG2)));
4314
4315 ins = (reg_size == 4 && elem_size == 0) ? (1 << 6) : 0;
4316
4317 while (imms >= 0x100) {
4318 FAIL_IF(push_inst(compiler, VSRA | (1 << 24) | ins | ((imms & 0xff) << 16) | VD(TMP_FREG2) | VM(TMP_FREG2)));
4319 imms >>= 8;
4320 }
4321
4322 FAIL_IF(push_inst(compiler, VSRA | (1 << 24) | ins | (1 << 7) | (imms << 16) | VD(TMP_FREG2) | VM(TMP_FREG2)));
4323
4324 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
4325 FAIL_IF(push_inst(compiler, VMOV_s | (1 << 20) | (1 << 23) | (0x2 << 21) | RD(dst_r) | VN(TMP_FREG2)));
4326
4327 if (reg_size == 4 && elem_size == 0) {
4328 SLJIT_ASSERT(freg_map[TMP_FREG2] + 1 == freg_map[TMP_FREG1]);
4329 FAIL_IF(push_inst(compiler, VMOV_s | (1 << 20) | (1 << 23) | (0x2 << 21) | RD(TMP_REG2) | VN(TMP_FREG1)));
4330 FAIL_IF(push_inst(compiler, ORR | RD(dst_r) | RN(dst_r) | RM(TMP_REG2) | (0x8 << 7)));
4331 }
4332
4333 if (dst_r == TMP_REG1)
4334 return emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2);
4335
4336 return SLJIT_SUCCESS;
4337 }
4338
sljit_emit_simd_op2(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_freg,sljit_s32 src1_freg,sljit_s32 src2_freg)4339 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
4340 sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg)
4341 {
4342 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4343 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4344 sljit_ins ins = 0;
4345
4346 CHECK_ERROR();
4347 CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg));
4348
4349 if (reg_size != 3 && reg_size != 4)
4350 return SLJIT_ERR_UNSUPPORTED;
4351
4352 if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4353 return SLJIT_ERR_UNSUPPORTED;
4354
4355 switch (SLJIT_SIMD_GET_OPCODE(type)) {
4356 case SLJIT_SIMD_OP2_AND:
4357 ins = VAND;
4358 break;
4359 case SLJIT_SIMD_OP2_OR:
4360 ins = VORR;
4361 break;
4362 case SLJIT_SIMD_OP2_XOR:
4363 ins = VEOR;
4364 break;
4365 }
4366
4367 if (type & SLJIT_SIMD_TEST)
4368 return SLJIT_SUCCESS;
4369
4370 if (reg_size == 4) {
4371 dst_freg = simd_get_quad_reg_index(dst_freg);
4372 src1_freg = simd_get_quad_reg_index(src1_freg);
4373 src2_freg = simd_get_quad_reg_index(src2_freg);
4374 ins |= (sljit_ins)1 << 6;
4375 }
4376
4377 return push_inst(compiler, ins | VD(dst_freg) | VN(src1_freg) | VM(src2_freg));
4378 }
4379
4380 #undef FPU_LOAD
4381
sljit_emit_atomic_load(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 mem_reg)4382 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
4383 sljit_s32 dst_reg,
4384 sljit_s32 mem_reg)
4385 {
4386 sljit_u32 ins;
4387
4388 CHECK_ERROR();
4389 CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
4390
4391 switch (GET_OPCODE(op)) {
4392 case SLJIT_MOV_U8:
4393 ins = LDREXB;
4394 break;
4395 case SLJIT_MOV_U16:
4396 ins = LDREXH;
4397 break;
4398 default:
4399 ins = LDREX;
4400 break;
4401 }
4402
4403 return push_inst(compiler, ins | RN(mem_reg) | RD(dst_reg));
4404 }
4405
sljit_emit_atomic_store(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src_reg,sljit_s32 mem_reg,sljit_s32 temp_reg)4406 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op,
4407 sljit_s32 src_reg,
4408 sljit_s32 mem_reg,
4409 sljit_s32 temp_reg)
4410 {
4411 sljit_u32 ins;
4412
4413 /* temp_reg == mem_reg is undefined so use another temp register */
4414 SLJIT_UNUSED_ARG(temp_reg);
4415
4416 CHECK_ERROR();
4417 CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
4418
4419 switch (GET_OPCODE(op)) {
4420 case SLJIT_MOV_U8:
4421 ins = STREXB;
4422 break;
4423 case SLJIT_MOV_U16:
4424 ins = STREXH;
4425 break;
4426 default:
4427 ins = STREX;
4428 break;
4429 }
4430
4431 FAIL_IF(push_inst(compiler, ins | RN(mem_reg) | RD(TMP_REG1) | RM(src_reg)));
4432 if (op & SLJIT_SET_ATOMIC_STORED)
4433 return push_inst(compiler, CMP | SET_FLAGS | SRC2_IMM | RN(TMP_REG1));
4434
4435 return SLJIT_SUCCESS;
4436 }
4437
sljit_emit_const(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_sw init_value)4438 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
4439 {
4440 struct sljit_const *const_;
4441 sljit_s32 dst_r;
4442
4443 CHECK_ERROR_PTR();
4444 CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
4445 ADJUST_LOCAL_OFFSET(dst, dstw);
4446
4447 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
4448
4449 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
4450 PTR_FAIL_IF(push_inst_with_unique_literal(compiler,
4451 EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), (sljit_ins)init_value));
4452 compiler->patches++;
4453 #else /* !SLJIT_CONFIG_ARM_V6 */
4454 PTR_FAIL_IF(emit_imm(compiler, dst_r, init_value));
4455 #endif /* SLJIT_CONFIG_ARM_V6 */
4456
4457 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
4458 PTR_FAIL_IF(!const_);
4459 set_const(const_, compiler);
4460
4461 if (dst & SLJIT_MEM)
4462 PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1));
4463 return const_;
4464 }
4465
sljit_emit_put_label(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)4466 SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
4467 {
4468 struct sljit_put_label *put_label;
4469 sljit_s32 dst_r;
4470
4471 CHECK_ERROR_PTR();
4472 CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
4473 ADJUST_LOCAL_OFFSET(dst, dstw);
4474
4475 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
4476
4477 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
4478 PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), 0));
4479 compiler->patches++;
4480 #else /* !SLJIT_CONFIG_ARM_V6 */
4481 PTR_FAIL_IF(emit_imm(compiler, dst_r, 0));
4482 #endif /* SLJIT_CONFIG_ARM_V6 */
4483
4484 put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
4485 PTR_FAIL_IF(!put_label);
4486 set_put_label(put_label, compiler, 0);
4487
4488 if (dst & SLJIT_MEM)
4489 PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1));
4490 return put_label;
4491 }
4492
sljit_set_jump_addr(sljit_uw addr,sljit_uw new_target,sljit_sw executable_offset)4493 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
4494 {
4495 inline_set_jump_addr(addr, executable_offset, new_target, 1);
4496 }
4497
sljit_set_const(sljit_uw addr,sljit_sw new_constant,sljit_sw executable_offset)4498 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
4499 {
4500 inline_set_const(addr, executable_offset, (sljit_uw)new_constant, 1);
4501 }
4502