1 /*
2 * Stack-less Just-In-Time compiler
3 *
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #ifdef __SOFTFP__
28 #define ARM_ABI_INFO " ABI:softfp"
29 #else
30 #define ARM_ABI_INFO " ABI:hardfp"
31 #endif
32
sljit_get_platform_name(void)33 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
34 {
35 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
36 return "ARMv7" SLJIT_CPUINFO ARM_ABI_INFO;
37 #elif (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
38 return "ARMv6" SLJIT_CPUINFO ARM_ABI_INFO;
39 #else
40 #error "Internal error: Unknown ARM architecture"
41 #endif
42 }
43
44 /* Length of an instruction word. */
45 typedef sljit_u32 sljit_ins;
46
47 /* Last register + 1. */
48 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
49 #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
50 #define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 4)
51
52 #define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
53 #define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
54
55 /* In ARM instruction words.
56 Cache lines are usually 32 byte aligned. */
57 #define CONST_POOL_ALIGNMENT 8
58 #define CONST_POOL_EMPTY 0xffffffff
59
60 #define ALIGN_INSTRUCTION(ptr) \
61 (sljit_ins*)(((sljit_ins)(ptr) + (CONST_POOL_ALIGNMENT * sizeof(sljit_ins)) - 1) & ~((CONST_POOL_ALIGNMENT * sizeof(sljit_ins)) - 1))
62 #define MAX_DIFFERENCE(max_diff) \
63 (((max_diff) / (sljit_s32)sizeof(sljit_ins)) - (CONST_POOL_ALIGNMENT - 1))
64
65 /* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */
66 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
67 0, 0, 1, 2, 3, 11, 10, 9, 8, 7, 6, 5, 4, 13, 12, 14, 15
68 };
69
70 static const sljit_u8 freg_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) + 1] = {
71 0,
72 0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8,
73 7, 6,
74 0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8,
75 7, 6
76 };
77
78 static const sljit_u8 freg_ebit_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) + 1] = {
79 0,
80 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
81 0, 0,
82 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
83 1, 1
84 };
85
86 #define RM(rm) ((sljit_ins)reg_map[rm])
87 #define RM8(rm) ((sljit_ins)reg_map[rm] << 8)
88 #define RD(rd) ((sljit_ins)reg_map[rd] << 12)
89 #define RN(rn) ((sljit_ins)reg_map[rn] << 16)
90
91 #define VM(vm) (((sljit_ins)freg_map[vm]) | ((sljit_ins)freg_ebit_map[vm] << 5))
92 #define VD(vd) (((sljit_ins)freg_map[vd] << 12) | ((sljit_ins)freg_ebit_map[vd] << 22))
93 #define VN(vn) (((sljit_ins)freg_map[vn] << 16) | ((sljit_ins)freg_ebit_map[vn] << 7))
94
95 /* --------------------------------------------------------------------- */
96 /* Instrucion forms */
97 /* --------------------------------------------------------------------- */
98
99 /* The instruction includes the AL condition.
100 INST_NAME - CONDITIONAL remove this flag. */
101 #define COND_MASK 0xf0000000
102 #define CONDITIONAL 0xe0000000
103 #define PUSH_POOL 0xff000000
104
105 #define ADC 0xe0a00000
106 #define ADD 0xe0800000
107 #define AND 0xe0000000
108 #define B 0xea000000
109 #define BIC 0xe1c00000
110 #define BKPT 0xe1200070
111 #define BL 0xeb000000
112 #define BLX 0xe12fff30
113 #define BX 0xe12fff10
114 #define CLZ 0xe16f0f10
115 #define CMN 0xe1600000
116 #define CMP 0xe1400000
117 #define EOR 0xe0200000
118 #define LDR 0xe5100000
119 #define LDR_POST 0xe4100000
120 #define LDREX 0xe1900f9f
121 #define LDREXB 0xe1d00f9f
122 #define LDREXH 0xe1f00f9f
123 #define MLA 0xe0200090
124 #define MOV 0xe1a00000
125 #define MUL 0xe0000090
126 #define MVN 0xe1e00000
127 #define NOP 0xe1a00000
128 #define ORR 0xe1800000
129 #define PUSH 0xe92d0000
130 #define POP 0xe8bd0000
131 #define REV 0xe6bf0f30
132 #define REV16 0xe6bf0fb0
133 #define RSB 0xe0600000
134 #define RSC 0xe0e00000
135 #define SBC 0xe0c00000
136 #define SMULL 0xe0c00090
137 #define STR 0xe5000000
138 #define STREX 0xe1800f90
139 #define STREXB 0xe1c00f90
140 #define STREXH 0xe1e00f90
141 #define SUB 0xe0400000
142 #define SXTB 0xe6af0070
143 #define SXTH 0xe6bf0070
144 #define TST 0xe1000000
145 #define UMULL 0xe0800090
146 #define UXTB 0xe6ef0070
147 #define UXTH 0xe6ff0070
148 #define VABS_F32 0xeeb00ac0
149 #define VADD_F32 0xee300a00
150 #define VAND 0xf2000110
151 #define VCMP_F32 0xeeb40a40
152 #define VCVT_F32_S32 0xeeb80ac0
153 #define VCVT_F32_U32 0xeeb80a40
154 #define VCVT_F64_F32 0xeeb70ac0
155 #define VCVT_S32_F32 0xeebd0ac0
156 #define VDIV_F32 0xee800a00
157 #define VDUP 0xee800b10
158 #define VDUP_s 0xf3b00c00
159 #define VEOR 0xf3000110
160 #define VLD1 0xf4200000
161 #define VLD1_r 0xf4a00c00
162 #define VLD1_s 0xf4a00000
163 #define VLDR_F32 0xed100a00
164 #define VMOV_F32 0xeeb00a40
165 #define VMOV 0xee000a10
166 #define VMOV2 0xec400a10
167 #define VMOV_i 0xf2800010
168 #define VMOV_s 0xee000b10
169 #define VMOVN 0xf3b20200
170 #define VMRS 0xeef1fa10
171 #define VMUL_F32 0xee200a00
172 #define VNEG_F32 0xeeb10a40
173 #define VORR 0xf2200110
174 #define VPOP 0xecbd0b00
175 #define VPUSH 0xed2d0b00
176 #define VSHLL 0xf2800a10
177 #define VSHR 0xf2800010
178 #define VSRA 0xf2800110
179 #define VST1 0xf4000000
180 #define VST1_s 0xf4800000
181 #define VSTR_F32 0xed000a00
182 #define VSUB_F32 0xee300a40
183
184 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
185 /* Arm v7 specific instructions. */
186 #define MOVT 0xe3400000
187 #define MOVW 0xe3000000
188 #define RBIT 0xe6ff0f30
189 #endif
190
191 #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
192
function_check_is_freg(struct sljit_compiler * compiler,sljit_s32 fr,sljit_s32 is_32)193 static sljit_s32 function_check_is_freg(struct sljit_compiler *compiler, sljit_s32 fr, sljit_s32 is_32)
194 {
195 if (compiler->scratches == -1)
196 return 0;
197
198 if (is_32 && fr >= SLJIT_F64_SECOND(SLJIT_FR0))
199 fr -= SLJIT_F64_SECOND(0);
200
201 return (fr >= SLJIT_FR0 && fr < (SLJIT_FR0 + compiler->fscratches))
202 || (fr > (SLJIT_FS0 - compiler->fsaveds) && fr <= SLJIT_FS0)
203 || (fr >= SLJIT_TMP_FREGISTER_BASE && fr < (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS));
204 }
205
206 #endif /* SLJIT_ARGUMENT_CHECKS */
207
208 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
209
push_cpool(struct sljit_compiler * compiler)210 static sljit_s32 push_cpool(struct sljit_compiler *compiler)
211 {
212 /* Pushing the constant pool into the instruction stream. */
213 sljit_ins* inst;
214 sljit_uw* cpool_ptr;
215 sljit_uw* cpool_end;
216 sljit_s32 i;
217
218 /* The label could point the address after the constant pool. */
219 if (compiler->last_label && compiler->last_label->size == compiler->size)
220 compiler->last_label->size += compiler->cpool_fill + (CONST_POOL_ALIGNMENT - 1) + 1;
221
222 SLJIT_ASSERT(compiler->cpool_fill > 0 && compiler->cpool_fill <= CPOOL_SIZE);
223 inst = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
224 FAIL_IF(!inst);
225 compiler->size++;
226 *inst = 0xff000000 | compiler->cpool_fill;
227
228 for (i = 0; i < CONST_POOL_ALIGNMENT - 1; i++) {
229 inst = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
230 FAIL_IF(!inst);
231 compiler->size++;
232 *inst = 0;
233 }
234
235 cpool_ptr = compiler->cpool;
236 cpool_end = cpool_ptr + compiler->cpool_fill;
237 while (cpool_ptr < cpool_end) {
238 inst = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
239 FAIL_IF(!inst);
240 compiler->size++;
241 *inst = *cpool_ptr++;
242 }
243 compiler->cpool_diff = CONST_POOL_EMPTY;
244 compiler->cpool_fill = 0;
245 return SLJIT_SUCCESS;
246 }
247
push_inst(struct sljit_compiler * compiler,sljit_ins inst)248 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins inst)
249 {
250 sljit_ins* ptr;
251
252 if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)))
253 FAIL_IF(push_cpool(compiler));
254
255 ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
256 FAIL_IF(!ptr);
257 compiler->size++;
258 *ptr = inst;
259 return SLJIT_SUCCESS;
260 }
261
push_inst_with_literal(struct sljit_compiler * compiler,sljit_ins inst,sljit_uw literal)262 static sljit_s32 push_inst_with_literal(struct sljit_compiler *compiler, sljit_ins inst, sljit_uw literal)
263 {
264 sljit_ins* ptr;
265 sljit_uw cpool_index = CPOOL_SIZE;
266 sljit_uw* cpool_ptr;
267 sljit_uw* cpool_end;
268 sljit_u8* cpool_unique_ptr;
269
270 if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)))
271 FAIL_IF(push_cpool(compiler));
272 else if (compiler->cpool_fill > 0) {
273 cpool_ptr = compiler->cpool;
274 cpool_end = cpool_ptr + compiler->cpool_fill;
275 cpool_unique_ptr = compiler->cpool_unique;
276 do {
277 if ((*cpool_ptr == literal) && !(*cpool_unique_ptr)) {
278 cpool_index = (sljit_uw)(cpool_ptr - compiler->cpool);
279 break;
280 }
281 cpool_ptr++;
282 cpool_unique_ptr++;
283 } while (cpool_ptr < cpool_end);
284 }
285
286 if (cpool_index == CPOOL_SIZE) {
287 /* Must allocate a new entry in the literal pool. */
288 if (compiler->cpool_fill < CPOOL_SIZE) {
289 cpool_index = compiler->cpool_fill;
290 compiler->cpool_fill++;
291 }
292 else {
293 FAIL_IF(push_cpool(compiler));
294 cpool_index = 0;
295 compiler->cpool_fill = 1;
296 }
297 }
298
299 SLJIT_ASSERT((inst & 0xfff) == 0);
300 ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
301 FAIL_IF(!ptr);
302 compiler->size++;
303 *ptr = inst | cpool_index;
304
305 compiler->cpool[cpool_index] = literal;
306 compiler->cpool_unique[cpool_index] = 0;
307 if (compiler->cpool_diff == CONST_POOL_EMPTY)
308 compiler->cpool_diff = compiler->size;
309 return SLJIT_SUCCESS;
310 }
311
push_inst_with_unique_literal(struct sljit_compiler * compiler,sljit_ins inst,sljit_uw literal)312 static sljit_s32 push_inst_with_unique_literal(struct sljit_compiler *compiler, sljit_ins inst, sljit_uw literal)
313 {
314 sljit_ins* ptr;
315
316 if (SLJIT_UNLIKELY((compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)) || compiler->cpool_fill >= CPOOL_SIZE))
317 FAIL_IF(push_cpool(compiler));
318
319 SLJIT_ASSERT(compiler->cpool_fill < CPOOL_SIZE && (inst & 0xfff) == 0);
320 ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
321 FAIL_IF(!ptr);
322 compiler->size++;
323 *ptr = inst | compiler->cpool_fill;
324
325 compiler->cpool[compiler->cpool_fill] = literal;
326 compiler->cpool_unique[compiler->cpool_fill] = 1;
327 compiler->cpool_fill++;
328 if (compiler->cpool_diff == CONST_POOL_EMPTY)
329 compiler->cpool_diff = compiler->size;
330 return SLJIT_SUCCESS;
331 }
332
prepare_blx(struct sljit_compiler * compiler)333 static SLJIT_INLINE sljit_s32 prepare_blx(struct sljit_compiler *compiler)
334 {
335 /* Place for at least two instruction (doesn't matter whether the first has a literal). */
336 if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4088)))
337 return push_cpool(compiler);
338 return SLJIT_SUCCESS;
339 }
340
emit_blx(struct sljit_compiler * compiler)341 static SLJIT_INLINE sljit_s32 emit_blx(struct sljit_compiler *compiler)
342 {
343 /* Must follow tightly the previous instruction (to be able to convert it to bl instruction). */
344 SLJIT_ASSERT(compiler->cpool_diff == CONST_POOL_EMPTY || compiler->size - compiler->cpool_diff < MAX_DIFFERENCE(4092));
345 SLJIT_ASSERT(reg_map[TMP_REG1] != 14);
346
347 return push_inst(compiler, BLX | RM(TMP_REG1));
348 }
349
patch_pc_relative_loads(sljit_uw * last_pc_patch,sljit_uw * code_ptr,sljit_uw * const_pool,sljit_uw cpool_size)350 static sljit_uw patch_pc_relative_loads(sljit_uw *last_pc_patch, sljit_uw *code_ptr, sljit_uw* const_pool, sljit_uw cpool_size)
351 {
352 sljit_uw diff;
353 sljit_uw ind;
354 sljit_uw counter = 0;
355 sljit_uw* clear_const_pool = const_pool;
356 sljit_uw* clear_const_pool_end = const_pool + cpool_size;
357
358 SLJIT_ASSERT(const_pool - code_ptr <= CONST_POOL_ALIGNMENT);
359 /* Set unused flag for all literals in the constant pool.
360 I.e.: unused literals can belong to branches, which can be encoded as B or BL.
361 We can "compress" the constant pool by discarding these literals. */
362 while (clear_const_pool < clear_const_pool_end)
363 *clear_const_pool++ = (sljit_uw)(-1);
364
365 while (last_pc_patch < code_ptr) {
366 /* Data transfer instruction with Rn == r15. */
367 if ((*last_pc_patch & 0x0e0f0000) == 0x040f0000) {
368 diff = (sljit_uw)(const_pool - last_pc_patch);
369 ind = (*last_pc_patch) & 0xfff;
370
371 /* Must be a load instruction with immediate offset. */
372 SLJIT_ASSERT(ind < cpool_size && !(*last_pc_patch & (1 << 25)) && (*last_pc_patch & (1 << 20)));
373 if ((sljit_s32)const_pool[ind] < 0) {
374 const_pool[ind] = counter;
375 ind = counter;
376 counter++;
377 }
378 else
379 ind = const_pool[ind];
380
381 SLJIT_ASSERT(diff >= 1);
382 if (diff >= 2 || ind > 0) {
383 diff = (diff + (sljit_uw)ind - 2) << 2;
384 SLJIT_ASSERT(diff <= 0xfff);
385 *last_pc_patch = (*last_pc_patch & ~(sljit_uw)0xfff) | diff;
386 }
387 else
388 *last_pc_patch = (*last_pc_patch & ~(sljit_uw)(0xfff | (1 << 23))) | 0x004;
389 }
390 last_pc_patch++;
391 }
392 return counter;
393 }
394
395 /* In some rare ocasions we may need future patches. The probability is close to 0 in practice. */
396 struct future_patch {
397 struct future_patch* next;
398 sljit_s32 index;
399 sljit_s32 value;
400 };
401
resolve_const_pool_index(struct sljit_compiler * compiler,struct future_patch ** first_patch,sljit_uw cpool_current_index,sljit_uw * cpool_start_address,sljit_uw * buf_ptr)402 static sljit_s32 resolve_const_pool_index(struct sljit_compiler *compiler, struct future_patch **first_patch, sljit_uw cpool_current_index, sljit_uw *cpool_start_address, sljit_uw *buf_ptr)
403 {
404 sljit_u32 value;
405 struct future_patch *curr_patch, *prev_patch;
406
407 SLJIT_UNUSED_ARG(compiler);
408
409 /* Using the values generated by patch_pc_relative_loads. */
410 if (!*first_patch)
411 value = cpool_start_address[cpool_current_index];
412 else {
413 curr_patch = *first_patch;
414 prev_patch = NULL;
415 while (1) {
416 if (!curr_patch) {
417 value = cpool_start_address[cpool_current_index];
418 break;
419 }
420 if ((sljit_uw)curr_patch->index == cpool_current_index) {
421 value = (sljit_uw)curr_patch->value;
422 if (prev_patch)
423 prev_patch->next = curr_patch->next;
424 else
425 *first_patch = curr_patch->next;
426 SLJIT_FREE(curr_patch, compiler->allocator_data);
427 break;
428 }
429 prev_patch = curr_patch;
430 curr_patch = curr_patch->next;
431 }
432 }
433
434 if ((sljit_sw)value >= 0) {
435 if (value > cpool_current_index) {
436 curr_patch = (struct future_patch*)SLJIT_MALLOC(sizeof(struct future_patch), compiler->allocator_data);
437 if (!curr_patch) {
438 while (*first_patch) {
439 curr_patch = *first_patch;
440 *first_patch = (*first_patch)->next;
441 SLJIT_FREE(curr_patch, compiler->allocator_data);
442 }
443 return SLJIT_ERR_ALLOC_FAILED;
444 }
445 curr_patch->next = *first_patch;
446 curr_patch->index = (sljit_sw)value;
447 curr_patch->value = (sljit_sw)cpool_start_address[value];
448 *first_patch = curr_patch;
449 }
450 cpool_start_address[value] = *buf_ptr;
451 }
452 return SLJIT_SUCCESS;
453 }
454
455 #else
456
push_inst(struct sljit_compiler * compiler,sljit_ins inst)457 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins inst)
458 {
459 sljit_ins* ptr;
460
461 ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
462 FAIL_IF(!ptr);
463 compiler->size++;
464 *ptr = inst;
465 return SLJIT_SUCCESS;
466 }
467
emit_imm(struct sljit_compiler * compiler,sljit_s32 reg,sljit_sw imm)468 static SLJIT_INLINE sljit_s32 emit_imm(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm)
469 {
470 FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | ((sljit_u32)imm & 0xfff)));
471 return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | (((sljit_u32)imm >> 16) & 0xfff));
472 }
473
474 #endif
475
detect_jump_type(struct sljit_jump * jump,sljit_uw * code_ptr,sljit_uw * code,sljit_sw executable_offset)476 static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_uw *code_ptr, sljit_uw *code, sljit_sw executable_offset)
477 {
478 sljit_sw diff;
479
480 if (jump->flags & SLJIT_REWRITABLE_JUMP)
481 return 0;
482
483 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
484 if (jump->flags & IS_BL)
485 code_ptr--;
486 #endif /* SLJIT_CONFIG_ARM_V6 */
487
488 if (jump->flags & JUMP_ADDR)
489 diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2) - executable_offset);
490 else {
491 SLJIT_ASSERT(jump->u.label != NULL);
492 diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2));
493 }
494
495 /* Branch to Thumb code has not been optimized yet. */
496 if (diff & 0x3)
497 return 0;
498
499 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
500 if (jump->flags & IS_BL) {
501 if (diff <= 0x01ffffff && diff >= -0x02000000) {
502 *code_ptr = (BL - CONDITIONAL) | (*(code_ptr + 1) & COND_MASK);
503 jump->flags |= PATCH_B;
504 return 1;
505 }
506 }
507 else {
508 if (diff <= 0x01ffffff && diff >= -0x02000000) {
509 *code_ptr = (B - CONDITIONAL) | (*code_ptr & COND_MASK);
510 jump->flags |= PATCH_B;
511 }
512 }
513 #else /* !SLJIT_CONFIG_ARM_V6 */
514 if (diff <= 0x01ffffff && diff >= -0x02000000) {
515 *code_ptr = ((jump->flags & IS_BL) ? (BL - CONDITIONAL) : (B - CONDITIONAL)) | (*code_ptr & COND_MASK);
516 jump->flags |= PATCH_B;
517 return 1;
518 }
519 #endif /* SLJIT_CONFIG_ARM_V6 */
520 return 0;
521 }
522
set_jump_addr(sljit_uw jump_ptr,sljit_sw executable_offset,sljit_uw new_addr,sljit_s32 flush_cache)523 static void set_jump_addr(sljit_uw jump_ptr, sljit_sw executable_offset, sljit_uw new_addr, sljit_s32 flush_cache)
524 {
525 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
526 sljit_ins *ptr = (sljit_ins*)jump_ptr;
527 sljit_ins *inst = (sljit_ins*)ptr[0];
528 sljit_ins mov_pc = ptr[1];
529 sljit_s32 bl = (mov_pc & 0x0000f000) != RD(TMP_PC);
530 sljit_sw diff = (sljit_sw)(((sljit_sw)new_addr - (sljit_sw)(inst + 2) - executable_offset) >> 2);
531
532 SLJIT_UNUSED_ARG(executable_offset);
533
534 if (diff <= 0x7fffff && diff >= -0x800000) {
535 /* Turn to branch. */
536 if (!bl) {
537 if (flush_cache) {
538 SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 0);
539 }
540 inst[0] = (mov_pc & COND_MASK) | (B - CONDITIONAL) | (diff & 0xffffff);
541 if (flush_cache) {
542 SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1);
543 inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
544 SLJIT_CACHE_FLUSH(inst, inst + 1);
545 }
546 } else {
547 if (flush_cache) {
548 SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0);
549 }
550 inst[0] = (mov_pc & COND_MASK) | (BL - CONDITIONAL) | (diff & 0xffffff);
551 inst[1] = NOP;
552 if (flush_cache) {
553 SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1);
554 inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
555 SLJIT_CACHE_FLUSH(inst, inst + 2);
556 }
557 }
558 } else {
559 /* Get the position of the constant. */
560 if (mov_pc & (1 << 23))
561 ptr = inst + ((mov_pc & 0xfff) >> 2) + 2;
562 else
563 ptr = inst + 1;
564
565 if (*inst != mov_pc) {
566 if (flush_cache) {
567 SLJIT_UPDATE_WX_FLAGS(inst, inst + (!bl ? 1 : 2), 0);
568 }
569 inst[0] = mov_pc;
570 if (!bl) {
571 if (flush_cache) {
572 SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1);
573 inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
574 SLJIT_CACHE_FLUSH(inst, inst + 1);
575 }
576 } else {
577 inst[1] = BLX | RM(TMP_REG1);
578 if (flush_cache) {
579 SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1);
580 inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
581 SLJIT_CACHE_FLUSH(inst, inst + 2);
582 }
583 }
584 }
585
586 if (flush_cache) {
587 SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 0);
588 }
589
590 *ptr = new_addr;
591
592 if (flush_cache) {
593 SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1);
594 }
595 }
596 #else /* !SLJIT_CONFIG_ARM_V6 */
597 sljit_ins *inst = (sljit_ins*)jump_ptr;
598
599 SLJIT_UNUSED_ARG(executable_offset);
600
601 SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT);
602
603 if (flush_cache) {
604 SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0);
605 }
606
607 inst[0] = MOVW | (inst[0] & 0xf000) | ((new_addr << 4) & 0xf0000) | (new_addr & 0xfff);
608 inst[1] = MOVT | (inst[1] & 0xf000) | ((new_addr >> 12) & 0xf0000) | ((new_addr >> 16) & 0xfff);
609
610 if (flush_cache) {
611 SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1);
612 inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
613 SLJIT_CACHE_FLUSH(inst, inst + 2);
614 }
615 #endif /* SLJIT_CONFIG_ARM_V6 */
616 }
617
618 static sljit_uw get_imm(sljit_uw imm);
619 static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm);
620 static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg);
621
set_const_value(sljit_uw addr,sljit_sw executable_offset,sljit_uw new_constant,sljit_s32 flush_cache)622 static void set_const_value(sljit_uw addr, sljit_sw executable_offset, sljit_uw new_constant, sljit_s32 flush_cache)
623 {
624 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
625 sljit_ins *ptr = (sljit_ins*)addr;
626 sljit_ins *inst = (sljit_ins*)ptr[0];
627 sljit_uw ldr_literal = ptr[1];
628 sljit_uw src2;
629
630 SLJIT_UNUSED_ARG(executable_offset);
631
632 src2 = get_imm(new_constant);
633 if (src2) {
634 if (flush_cache) {
635 SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 0);
636 }
637
638 *inst = 0xe3a00000 | (ldr_literal & 0xf000) | src2;
639
640 if (flush_cache) {
641 SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1);
642 inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
643 SLJIT_CACHE_FLUSH(inst, inst + 1);
644 }
645 return;
646 }
647
648 src2 = get_imm(~new_constant);
649 if (src2) {
650 if (flush_cache) {
651 SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 0);
652 }
653
654 *inst = 0xe3e00000 | (ldr_literal & 0xf000) | src2;
655
656 if (flush_cache) {
657 SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1);
658 inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
659 SLJIT_CACHE_FLUSH(inst, inst + 1);
660 }
661 return;
662 }
663
664 if (ldr_literal & (1 << 23))
665 ptr = inst + ((ldr_literal & 0xfff) >> 2) + 2;
666 else
667 ptr = inst + 1;
668
669 if (*inst != ldr_literal) {
670 if (flush_cache) {
671 SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 0);
672 }
673
674 *inst = ldr_literal;
675
676 if (flush_cache) {
677 SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1);
678 inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
679 SLJIT_CACHE_FLUSH(inst, inst + 1);
680 }
681 }
682
683 if (flush_cache) {
684 SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 0);
685 }
686
687 *ptr = new_constant;
688
689 if (flush_cache) {
690 SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1);
691 }
692 #else /* !SLJIT_CONFIG_ARM_V6 */
693 sljit_ins *inst = (sljit_ins*)addr;
694
695 SLJIT_UNUSED_ARG(executable_offset);
696
697 SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT);
698
699 if (flush_cache) {
700 SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0);
701 }
702
703 inst[0] = MOVW | (inst[0] & 0xf000) | ((new_constant << 4) & 0xf0000) | (new_constant & 0xfff);
704 inst[1] = MOVT | (inst[1] & 0xf000) | ((new_constant >> 12) & 0xf0000) | ((new_constant >> 16) & 0xfff);
705
706 if (flush_cache) {
707 SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1);
708 inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
709 SLJIT_CACHE_FLUSH(inst, inst + 2);
710 }
711 #endif /* SLJIT_CONFIG_ARM_V6 */
712 }
713
mov_addr_get_length(struct sljit_jump * jump,sljit_ins * code_ptr,sljit_ins * code,sljit_sw executable_offset)714 static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
715 {
716 sljit_uw addr;
717 sljit_sw diff;
718 SLJIT_UNUSED_ARG(executable_offset);
719
720 if (jump->flags & JUMP_ADDR)
721 addr = jump->u.target;
722 else
723 addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset);
724
725 /* The pc+8 offset is represented by the 2 * SSIZE_OF(ins) below. */
726 diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
727
728 if ((diff & 0x3) == 0 && diff <= (0x3fc + 2 * SSIZE_OF(ins)) && diff >= (-0x3fc + 2 * SSIZE_OF(ins))) {
729 jump->flags |= PATCH_B;
730 return 0;
731 }
732
733 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
734 return 0;
735 #else /* !SLJIT_CONFIG_ARM_V6 */
736 return 1;
737 #endif /* SLJIT_CONFIG_ARM_V6 */
738 }
739
740 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
741
reduce_code_size(struct sljit_compiler * compiler)742 static void reduce_code_size(struct sljit_compiler *compiler)
743 {
744 struct sljit_label *label;
745 struct sljit_jump *jump;
746 struct sljit_const *const_;
747 SLJIT_NEXT_DEFINE_TYPES;
748 sljit_uw total_size;
749 sljit_uw size_reduce = 0;
750 sljit_sw diff;
751
752 label = compiler->labels;
753 jump = compiler->jumps;
754 const_ = compiler->consts;
755 SLJIT_NEXT_INIT_TYPES();
756
757 while (1) {
758 SLJIT_GET_NEXT_MIN();
759
760 if (next_min_addr == SLJIT_MAX_ADDRESS)
761 break;
762
763 if (next_min_addr == next_label_size) {
764 label->size -= size_reduce;
765
766 label = label->next;
767 next_label_size = SLJIT_GET_NEXT_SIZE(label);
768 }
769
770 if (next_min_addr == next_const_addr) {
771 const_->addr -= size_reduce;
772 const_ = const_->next;
773 next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);
774 continue;
775 }
776
777 if (next_min_addr != next_jump_addr)
778 continue;
779
780 jump->addr -= size_reduce;
781 if (!(jump->flags & JUMP_MOV_ADDR)) {
782 total_size = JUMP_MAX_SIZE - 1;
783
784 if (!(jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR))) {
785 /* Unit size: instruction. */
786 diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr - 2;
787
788 if (diff <= (0x01ffffff / SSIZE_OF(ins)) && diff >= (-0x02000000 / SSIZE_OF(ins)))
789 total_size = 1 - 1;
790 }
791
792 size_reduce += JUMP_MAX_SIZE - 1 - total_size;
793 } else {
794 /* Real size minus 1. Unit size: instruction. */
795 total_size = 1;
796
797 if (!(jump->flags & JUMP_ADDR)) {
798 diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr;
799 if (diff <= 0xff + 2 && diff >= -0xff + 2)
800 total_size = 0;
801 }
802
803 size_reduce += 1 - total_size;
804 }
805
806 jump->flags |= total_size << JUMP_SIZE_SHIFT;
807 jump = jump->next;
808 next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
809 }
810
811 compiler->size -= size_reduce;
812 }
813
814 #endif /* SLJIT_CONFIG_ARM_V7 */
815
sljit_generate_code(struct sljit_compiler * compiler,sljit_s32 options,void * exec_allocator_data)816 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data)
817 {
818 struct sljit_memory_fragment *buf;
819 sljit_ins *code;
820 sljit_ins *code_ptr;
821 sljit_ins *buf_ptr;
822 sljit_ins *buf_end;
823 sljit_uw word_count;
824 SLJIT_NEXT_DEFINE_TYPES;
825 sljit_sw executable_offset;
826 sljit_uw addr;
827 sljit_sw diff;
828 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
829 sljit_uw cpool_size;
830 sljit_uw cpool_skip_alignment;
831 sljit_uw cpool_current_index;
832 sljit_ins *cpool_start_address;
833 sljit_ins *last_pc_patch;
834 struct future_patch *first_patch;
835 #endif
836
837 struct sljit_label *label;
838 struct sljit_jump *jump;
839 struct sljit_const *const_;
840
841 CHECK_ERROR_PTR();
842 CHECK_PTR(check_sljit_generate_code(compiler));
843
844 /* Second code generation pass. */
845 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
846 compiler->size += (compiler->patches << 1);
847 if (compiler->cpool_fill > 0)
848 compiler->size += compiler->cpool_fill + CONST_POOL_ALIGNMENT - 1;
849 #else /* !SLJIT_CONFIG_ARM_V6 */
850 reduce_code_size(compiler);
851 #endif /* SLJIT_CONFIG_ARM_V6 */
852 code = (sljit_ins*)allocate_executable_memory(compiler->size * sizeof(sljit_ins), options, exec_allocator_data, &executable_offset);
853 PTR_FAIL_WITH_EXEC_IF(code);
854
855 reverse_buf(compiler);
856 buf = compiler->buf;
857
858 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
859 cpool_size = 0;
860 cpool_skip_alignment = 0;
861 cpool_current_index = 0;
862 cpool_start_address = NULL;
863 first_patch = NULL;
864 last_pc_patch = code;
865 #endif /* SLJIT_CONFIG_ARM_V6 */
866
867 code_ptr = code;
868 word_count = 0;
869 label = compiler->labels;
870 jump = compiler->jumps;
871 const_ = compiler->consts;
872 SLJIT_NEXT_INIT_TYPES();
873 SLJIT_GET_NEXT_MIN();
874
875 do {
876 buf_ptr = (sljit_ins*)buf->memory;
877 buf_end = buf_ptr + (buf->used_size >> 2);
878 do {
879 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
880 if (cpool_size > 0) {
881 if (cpool_skip_alignment > 0) {
882 buf_ptr++;
883 cpool_skip_alignment--;
884 } else {
885 if (SLJIT_UNLIKELY(resolve_const_pool_index(compiler, &first_patch, cpool_current_index, cpool_start_address, buf_ptr))) {
886 SLJIT_FREE_EXEC(code, exec_allocator_data);
887 compiler->error = SLJIT_ERR_ALLOC_FAILED;
888 return NULL;
889 }
890 buf_ptr++;
891 if (++cpool_current_index >= cpool_size) {
892 SLJIT_ASSERT(!first_patch);
893 cpool_size = 0;
894 }
895 }
896 } else if ((*buf_ptr & 0xff000000) != PUSH_POOL) {
897 #endif /* SLJIT_CONFIG_ARM_V6 */
898 *code_ptr = *buf_ptr++;
899 if (next_min_addr == word_count) {
900 SLJIT_ASSERT(!label || label->size >= word_count);
901 SLJIT_ASSERT(!jump || jump->addr >= word_count);
902 SLJIT_ASSERT(!const_ || const_->addr >= word_count);
903
904 if (next_min_addr == next_label_size) {
905 label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
906 label->size = (sljit_uw)(code_ptr - code);
907 label = label->next;
908 next_label_size = SLJIT_GET_NEXT_SIZE(label);
909 }
910
911 /* These structures are ordered by their address. */
912 if (next_min_addr == next_jump_addr) {
913 if (!(jump->flags & JUMP_MOV_ADDR)) {
914 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
915 if (detect_jump_type(jump, code_ptr, code, executable_offset))
916 code_ptr--;
917 jump->addr = (sljit_uw)code_ptr;
918 #else /* !SLJIT_CONFIG_ARM_V6 */
919 word_count += jump->flags >> JUMP_SIZE_SHIFT;
920 jump->addr = (sljit_uw)code_ptr;
921 if (!detect_jump_type(jump, code_ptr, code, executable_offset)) {
922 code_ptr[2] = code_ptr[0];
923 addr = ((code_ptr[0] & 0xf) << 12);
924 code_ptr[0] = MOVW | addr;
925 code_ptr[1] = MOVT | addr;
926 code_ptr += 2;
927 }
928 SLJIT_ASSERT((sljit_uw)code_ptr - jump->addr <= (jump->flags >> JUMP_SIZE_SHIFT) * sizeof(sljit_ins));
929 #endif /* SLJIT_CONFIG_ARM_V6 */
930 } else {
931 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
932 word_count += jump->flags >> JUMP_SIZE_SHIFT;
933 #endif /* SLJIT_CONFIG_ARM_V7 */
934 addr = (sljit_uw)code_ptr;
935 code_ptr += mov_addr_get_length(jump, code_ptr, code, executable_offset);
936 jump->addr = addr;
937 }
938 jump = jump->next;
939 next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
940 } else if (next_min_addr == next_const_addr) {
941 const_->addr = (sljit_uw)code_ptr;
942 const_ = const_->next;
943 next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);
944 }
945
946 SLJIT_GET_NEXT_MIN();
947 }
948 code_ptr++;
949 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
950 } else {
951 /* Fortunately, no need to shift. */
952 cpool_size = *buf_ptr++ & ~PUSH_POOL;
953 SLJIT_ASSERT(cpool_size > 0);
954 cpool_start_address = ALIGN_INSTRUCTION(code_ptr + 1);
955 cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, cpool_size);
956 if (cpool_current_index > 0) {
957 /* Unconditional branch. */
958 *code_ptr = B | (((sljit_ins)(cpool_start_address - code_ptr) + cpool_current_index - 2) & ~PUSH_POOL);
959 code_ptr = (sljit_ins*)(cpool_start_address + cpool_current_index);
960 }
961 cpool_skip_alignment = CONST_POOL_ALIGNMENT - 1;
962 cpool_current_index = 0;
963 last_pc_patch = code_ptr;
964 }
965 #endif /* SLJIT_CONFIG_ARM_V6 */
966 word_count++;
967 } while (buf_ptr < buf_end);
968 buf = buf->next;
969 } while (buf);
970
971 if (label && label->size == word_count) {
972 label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
973 label->size = (sljit_uw)(code_ptr - code);
974 label = label->next;
975 }
976
977 SLJIT_ASSERT(!label);
978 SLJIT_ASSERT(!jump);
979 SLJIT_ASSERT(!const_);
980
981 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
982 SLJIT_ASSERT(cpool_size == 0);
983 if (compiler->cpool_fill > 0) {
984 cpool_start_address = ALIGN_INSTRUCTION(code_ptr);
985 cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, compiler->cpool_fill);
986 if (cpool_current_index > 0)
987 code_ptr = (sljit_ins*)(cpool_start_address + cpool_current_index);
988
989 buf_ptr = compiler->cpool;
990 buf_end = buf_ptr + compiler->cpool_fill;
991 cpool_current_index = 0;
992 while (buf_ptr < buf_end) {
993 if (SLJIT_UNLIKELY(resolve_const_pool_index(compiler, &first_patch, cpool_current_index, cpool_start_address, buf_ptr))) {
994 SLJIT_FREE_EXEC(code, exec_allocator_data);
995 compiler->error = SLJIT_ERR_ALLOC_FAILED;
996 return NULL;
997 }
998 buf_ptr++;
999 cpool_current_index++;
1000 }
1001 SLJIT_ASSERT(!first_patch);
1002 }
1003 #endif
1004
1005 jump = compiler->jumps;
1006 while (jump) {
1007 addr = (jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr;
1008 buf_ptr = (sljit_ins*)jump->addr;
1009
1010 if (jump->flags & JUMP_MOV_ADDR) {
1011 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
1012 SLJIT_ASSERT((buf_ptr[0] & (sljit_ins)0xffff0000) == 0xe59f0000);
1013 #else /* !SLJIT_CONFIG_ARM_V6 */
1014 SLJIT_ASSERT((buf_ptr[0] & ~(sljit_ins)0xf000) == 0);
1015 #endif /* SLJIT_CONFIG_ARM_V6 */
1016
1017 if (jump->flags & PATCH_B) {
1018 SLJIT_ASSERT((((sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset)) & 0x3) == 0);
1019 diff = ((sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset)) >> 2;
1020
1021 SLJIT_ASSERT(diff <= 0xff && diff >= -0xff);
1022
1023 addr = ADD;
1024 if (diff < 0) {
1025 diff = -diff;
1026 addr = SUB;
1027 }
1028
1029 buf_ptr[0] = addr | (buf_ptr[0] & 0xf000) | RN(TMP_PC) | (1 << 25) | (0xf << 8) | (sljit_ins)(diff & 0xff);
1030 } else {
1031 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
1032 buf_ptr[((buf_ptr[0] & 0xfff) >> 2) + 2] = addr;
1033 #else /* !SLJIT_CONFIG_ARM_V6 */
1034 buf_ptr[1] = MOVT | buf_ptr[0] | ((addr >> 12) & 0xf0000) | ((addr >> 16) & 0xfff);
1035 buf_ptr[0] = MOVW | buf_ptr[0] | ((addr << 4) & 0xf0000) | (addr & 0xfff);
1036 #endif /* SLJIT_CONFIG_ARM_V6 */
1037 }
1038 } else if (jump->flags & PATCH_B) {
1039 diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset);
1040 SLJIT_ASSERT(diff <= 0x01ffffff && diff >= -0x02000000);
1041 *buf_ptr |= (diff >> 2) & 0x00ffffff;
1042 } else {
1043 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
1044 if (jump->flags & IS_BL)
1045 buf_ptr--;
1046
1047 if (jump->flags & SLJIT_REWRITABLE_JUMP) {
1048 jump->addr = (sljit_uw)code_ptr;
1049 code_ptr[0] = (sljit_ins)buf_ptr;
1050 code_ptr[1] = *buf_ptr;
1051 set_jump_addr((sljit_uw)code_ptr, executable_offset, addr, 0);
1052 code_ptr += 2;
1053 } else {
1054 if (*buf_ptr & (1 << 23))
1055 buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2;
1056 else
1057 buf_ptr += 1;
1058 *buf_ptr = addr;
1059 }
1060 #else /* !SLJIT_CONFIG_ARM_V6 */
1061 set_jump_addr((sljit_uw)buf_ptr, executable_offset, addr, 0);
1062 #endif /* SLJIT_CONFIG_ARM_V6 */
1063 }
1064
1065 jump = jump->next;
1066 }
1067
1068 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
1069 const_ = compiler->consts;
1070 while (const_) {
1071 buf_ptr = (sljit_ins*)const_->addr;
1072 const_->addr = (sljit_uw)code_ptr;
1073
1074 code_ptr[0] = (sljit_ins)buf_ptr;
1075 code_ptr[1] = *buf_ptr;
1076 if (*buf_ptr & (1 << 23))
1077 buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2;
1078 else
1079 buf_ptr += 1;
1080 /* Set the value again (can be a simple constant). */
1081 set_const_value((sljit_uw)code_ptr, executable_offset, *buf_ptr, 0);
1082 code_ptr += 2;
1083
1084 const_ = const_->next;
1085 }
1086 #endif /* SLJIT_CONFIG_ARM_V6 */
1087
1088 SLJIT_ASSERT(code_ptr - code <= (sljit_s32)compiler->size);
1089
1090 compiler->error = SLJIT_ERR_COMPILED;
1091 compiler->executable_offset = executable_offset;
1092 compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_uw);
1093
1094 code = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
1095 code_ptr = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1096
1097 SLJIT_CACHE_FLUSH(code, code_ptr);
1098 SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
1099 return code;
1100 }
1101
sljit_has_cpu_feature(sljit_s32 feature_type)1102 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
1103 {
1104 switch (feature_type) {
1105 case SLJIT_HAS_FPU:
1106 case SLJIT_HAS_F64_AS_F32_PAIR:
1107 #ifdef SLJIT_IS_FPU_AVAILABLE
1108 return (SLJIT_IS_FPU_AVAILABLE) != 0;
1109 #else
1110 /* Available by default. */
1111 return 1;
1112 #endif /* SLJIT_IS_FPU_AVAILABLE */
1113 case SLJIT_HAS_SIMD:
1114 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
1115 return 0;
1116 #else
1117 #ifdef SLJIT_IS_FPU_AVAILABLE
1118 return (SLJIT_IS_FPU_AVAILABLE) != 0;
1119 #else
1120 /* Available by default. */
1121 return 1;
1122 #endif /* SLJIT_IS_FPU_AVAILABLE */
1123 #endif /* SLJIT_CONFIG_ARM_V6 */
1124
1125 case SLJIT_SIMD_REGS_ARE_PAIRS:
1126 case SLJIT_HAS_CLZ:
1127 case SLJIT_HAS_ROT:
1128 case SLJIT_HAS_CMOV:
1129 case SLJIT_HAS_REV:
1130 case SLJIT_HAS_PREFETCH:
1131 case SLJIT_HAS_COPY_F32:
1132 case SLJIT_HAS_COPY_F64:
1133 case SLJIT_HAS_ATOMIC:
1134 return 1;
1135
1136 case SLJIT_HAS_CTZ:
1137 #if defined(SLJIT_CONFIG_ARM_V6) && SLJIT_CONFIG_ARM_V6
1138 return 2;
1139 #else
1140 return 1;
1141 #endif /* SLJIT_CONFIG_ARM_V6 */
1142
1143 default:
1144 return 0;
1145 }
1146 }
1147
1148 /* --------------------------------------------------------------------- */
1149 /* Entry, exit */
1150 /* --------------------------------------------------------------------- */
1151
1152 /* Creates an index in data_transfer_insts array. */
1153 #define WORD_SIZE 0x00
1154 #define BYTE_SIZE 0x01
1155 #define HALF_SIZE 0x02
1156 #define PRELOAD 0x03
1157 #define SIGNED 0x04
1158 #define LOAD_DATA 0x08
1159
1160 /* Flag bits for emit_op. */
1161 #define ALLOW_IMM 0x10
1162 #define ALLOW_INV_IMM 0x20
1163 #define ALLOW_ANY_IMM (ALLOW_IMM | ALLOW_INV_IMM)
1164 #define ALLOW_NEG_IMM 0x40
1165 #define ALLOW_DOUBLE_IMM 0x80
1166
1167 /* s/l - store/load (1 bit)
1168 u/s - signed/unsigned (1 bit)
1169 w/b/h/N - word/byte/half/NOT allowed (2 bit)
1170 Storing signed and unsigned values are the same operations. */
1171
1172 static const sljit_ins data_transfer_insts[16] = {
1173 /* s u w */ 0xe5000000 /* str */,
1174 /* s u b */ 0xe5400000 /* strb */,
1175 /* s u h */ 0xe10000b0 /* strh */,
1176 /* s u N */ 0x00000000 /* not allowed */,
1177 /* s s w */ 0xe5000000 /* str */,
1178 /* s s b */ 0xe5400000 /* strb */,
1179 /* s s h */ 0xe10000b0 /* strh */,
1180 /* s s N */ 0x00000000 /* not allowed */,
1181
1182 /* l u w */ 0xe5100000 /* ldr */,
1183 /* l u b */ 0xe5500000 /* ldrb */,
1184 /* l u h */ 0xe11000b0 /* ldrh */,
1185 /* l u p */ 0xf5500000 /* preload */,
1186 /* l s w */ 0xe5100000 /* ldr */,
1187 /* l s b */ 0xe11000d0 /* ldrsb */,
1188 /* l s h */ 0xe11000f0 /* ldrsh */,
1189 /* l s N */ 0x00000000 /* not allowed */,
1190 };
1191
1192 #define EMIT_DATA_TRANSFER(type, add, target_reg, base_reg, arg) \
1193 (data_transfer_insts[(type) & 0xf] | ((add) << 23) | RD(target_reg) | RN(base_reg) | (sljit_ins)(arg))
1194
1195 /* Normal ldr/str instruction.
1196 Type2: ldrsb, ldrh, ldrsh */
1197 #define IS_TYPE1_TRANSFER(type) \
1198 (data_transfer_insts[(type) & 0xf] & 0x04000000)
1199 #define TYPE2_TRANSFER_IMM(imm) \
1200 (((imm) & 0xf) | (((imm) & 0xf0) << 4) | (1 << 22))
1201
1202 #define EMIT_FPU_OPERATION(opcode, mode, dst, src1, src2) \
1203 ((sljit_ins)(opcode) | (sljit_ins)(mode) | VD(dst) | VM(src1) | VN(src2))
1204
1205 /* Flags for emit_op: */
1206 /* Arguments are swapped. */
1207 #define ARGS_SWAPPED 0x01
1208 /* Inverted immediate. */
1209 #define INV_IMM 0x02
1210 /* Source and destination is register. */
1211 #define REGISTER_OP 0x04
1212 /* Unused return value. */
1213 #define UNUSED_RETURN 0x08
1214 /* SET_FLAGS must be (1 << 20) as it is also the value of S bit (can be used for optimization). */
1215 #define SET_FLAGS (1 << 20)
1216 /* dst: reg
1217 src1: reg
1218 src2: reg or imm (if allowed)
1219 SRC2_IMM must be (1 << 25) as it is also the value of I bit (can be used for optimization). */
1220 #define SRC2_IMM (1 << 25)
1221
1222 static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 inp_flags,
1223 sljit_s32 dst, sljit_sw dstw,
1224 sljit_s32 src1, sljit_sw src1w,
1225 sljit_s32 src2, sljit_sw src2w);
1226
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1227 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
1228 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1229 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1230 {
1231 sljit_uw imm, offset;
1232 sljit_s32 i, tmp, size, word_arg_count;
1233 sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
1234 #ifdef __SOFTFP__
1235 sljit_u32 float_arg_count;
1236 #else
1237 sljit_u32 old_offset, f32_offset;
1238 sljit_u32 remap[3];
1239 sljit_u32 *remap_ptr = remap;
1240 #endif
1241
1242 CHECK_ERROR();
1243 CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1244 set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1245
1246 imm = 0;
1247
1248 tmp = SLJIT_S0 - saveds;
1249 for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--)
1250 imm |= (sljit_uw)1 << reg_map[i];
1251
1252 for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--)
1253 imm |= (sljit_uw)1 << reg_map[i];
1254
1255 SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
1256
1257 /* Push saved and temporary registers
1258 multiple registers: stmdb sp!, {..., lr}
1259 single register: str reg, [sp, #-4]! */
1260 if (imm != 0)
1261 FAIL_IF(push_inst(compiler, PUSH | (1 << 14) | imm));
1262 else
1263 FAIL_IF(push_inst(compiler, 0xe52d0004 | RD(TMP_REG2)));
1264
1265 /* Stack must be aligned to 8 bytes: */
1266 size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1);
1267
1268 if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
1269 if ((size & SSIZE_OF(sw)) != 0) {
1270 FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | sizeof(sljit_sw)));
1271 size += SSIZE_OF(sw);
1272 }
1273
1274 if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) {
1275 FAIL_IF(push_inst(compiler, VPUSH | VD(SLJIT_FS0) | ((sljit_ins)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1)));
1276 } else {
1277 if (fsaveds > 0)
1278 FAIL_IF(push_inst(compiler, VPUSH | VD(SLJIT_FS0) | ((sljit_ins)fsaveds << 1)));
1279 if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG)
1280 FAIL_IF(push_inst(compiler, VPUSH | VD(fscratches) | ((sljit_ins)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1)));
1281 }
1282 }
1283
1284 local_size = ((size + local_size + 0x7) & ~0x7) - size;
1285 compiler->local_size = local_size;
1286
1287 if (options & SLJIT_ENTER_REG_ARG)
1288 arg_types = 0;
1289
1290 arg_types >>= SLJIT_ARG_SHIFT;
1291 word_arg_count = 0;
1292 saved_arg_count = 0;
1293 #ifdef __SOFTFP__
1294 SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start);
1295
1296 offset = 0;
1297 float_arg_count = 0;
1298
1299 while (arg_types) {
1300 switch (arg_types & SLJIT_ARG_MASK) {
1301 case SLJIT_ARG_TYPE_F64:
1302 if (offset & 0x7)
1303 offset += sizeof(sljit_sw);
1304
1305 if (offset < 4 * sizeof(sljit_sw))
1306 FAIL_IF(push_inst(compiler, VMOV2 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count));
1307 else
1308 FAIL_IF(push_inst(compiler, VLDR_F32 | 0x800100 | RN(SLJIT_SP)
1309 | (float_arg_count << 12) | ((offset + (sljit_ins)size - 4 * sizeof(sljit_sw)) >> 2)));
1310 float_arg_count++;
1311 offset += sizeof(sljit_f64) - sizeof(sljit_sw);
1312 break;
1313 case SLJIT_ARG_TYPE_F32:
1314 if (offset < 4 * sizeof(sljit_sw))
1315 FAIL_IF(push_inst(compiler, VMOV | (float_arg_count << 16) | (offset << 10)));
1316 else
1317 FAIL_IF(push_inst(compiler, VLDR_F32 | 0x800000 | RN(SLJIT_SP)
1318 | (float_arg_count << 12) | ((offset + (sljit_ins)size - 4 * sizeof(sljit_sw)) >> 2)));
1319 float_arg_count++;
1320 break;
1321 default:
1322 word_arg_count++;
1323
1324 if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
1325 tmp = SLJIT_S0 - saved_arg_count;
1326 saved_arg_count++;
1327 } else if (word_arg_count - 1 != (sljit_s32)(offset >> 2))
1328 tmp = word_arg_count;
1329 else
1330 break;
1331
1332 if (offset < 4 * sizeof(sljit_sw))
1333 FAIL_IF(push_inst(compiler, MOV | RD(tmp) | (offset >> 2)));
1334 else
1335 FAIL_IF(push_inst(compiler, LDR | 0x800000 | RN(SLJIT_SP) | RD(tmp) | (offset + (sljit_ins)size - 4 * sizeof(sljit_sw))));
1336 break;
1337 }
1338
1339 offset += sizeof(sljit_sw);
1340 arg_types >>= SLJIT_ARG_SHIFT;
1341 }
1342
1343 compiler->args_size = offset;
1344 #else
1345 offset = SLJIT_FR0;
1346 old_offset = SLJIT_FR0;
1347 f32_offset = 0;
1348
1349 while (arg_types) {
1350 switch (arg_types & SLJIT_ARG_MASK) {
1351 case SLJIT_ARG_TYPE_F64:
1352 if (offset != old_offset)
1353 *remap_ptr++ = EMIT_FPU_OPERATION(VMOV_F32, SLJIT_32, offset, old_offset, 0);
1354 old_offset++;
1355 offset++;
1356 break;
1357 case SLJIT_ARG_TYPE_F32:
1358 if (f32_offset != 0) {
1359 *remap_ptr++ = EMIT_FPU_OPERATION(VMOV_F32, 0x20, offset, f32_offset, 0);
1360 f32_offset = 0;
1361 } else {
1362 if (offset != old_offset)
1363 *remap_ptr++ = EMIT_FPU_OPERATION(VMOV_F32, 0, offset, old_offset, 0);
1364 f32_offset = old_offset;
1365 old_offset++;
1366 }
1367 offset++;
1368 break;
1369 default:
1370 if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
1371 FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S0 - saved_arg_count) | RM(SLJIT_R0 + word_arg_count)));
1372 saved_arg_count++;
1373 }
1374
1375 word_arg_count++;
1376 break;
1377 }
1378 arg_types >>= SLJIT_ARG_SHIFT;
1379 }
1380
1381 SLJIT_ASSERT((sljit_uw)(remap_ptr - remap) <= sizeof(remap));
1382
1383 while (remap_ptr > remap)
1384 FAIL_IF(push_inst(compiler, *(--remap_ptr)));
1385 #endif
1386
1387 if (local_size > 0)
1388 FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM | ALLOW_DOUBLE_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size));
1389
1390 return SLJIT_SUCCESS;
1391 }
1392
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1393 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
1394 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1395 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1396 {
1397 sljit_s32 size;
1398
1399 CHECK_ERROR();
1400 CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1401 set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1402
1403 size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1);
1404
1405 /* Doubles are saved, so alignment is unaffected. */
1406 if ((size & SSIZE_OF(sw)) != 0 && (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG))
1407 size += SSIZE_OF(sw);
1408
1409 compiler->local_size = ((size + local_size + 0x7) & ~0x7) - size;
1410 return SLJIT_SUCCESS;
1411 }
1412
emit_add_sp(struct sljit_compiler * compiler,sljit_uw imm)1413 static sljit_s32 emit_add_sp(struct sljit_compiler *compiler, sljit_uw imm)
1414 {
1415 sljit_uw imm2 = get_imm(imm);
1416
1417 if (imm2 == 0)
1418 return emit_op(compiler, SLJIT_ADD, ALLOW_IMM | ALLOW_DOUBLE_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, (sljit_sw)imm);
1419
1420 return push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | imm2);
1421 }
1422
emit_stack_frame_release(struct sljit_compiler * compiler,sljit_s32 frame_size)1423 static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 frame_size)
1424 {
1425 sljit_s32 local_size, fscratches, fsaveds, i, tmp;
1426 sljit_s32 restored_reg = 0;
1427 sljit_s32 lr_dst = TMP_PC;
1428 sljit_uw reg_list = 0;
1429
1430 SLJIT_ASSERT(reg_map[TMP_REG2] == 14 && frame_size <= 128);
1431
1432 local_size = compiler->local_size;
1433 fscratches = compiler->fscratches;
1434 fsaveds = compiler->fsaveds;
1435
1436 if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
1437 if (local_size > 0)
1438 FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size));
1439
1440 if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) {
1441 FAIL_IF(push_inst(compiler, VPOP | VD(SLJIT_FS0) | ((sljit_ins)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1)));
1442 } else {
1443 if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG)
1444 FAIL_IF(push_inst(compiler, VPOP | VD(fscratches) | ((sljit_ins)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1)));
1445 if (fsaveds > 0)
1446 FAIL_IF(push_inst(compiler, VPOP | VD(SLJIT_FS0) | ((sljit_ins)fsaveds << 1)));
1447 }
1448
1449 local_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1) & 0x7;
1450 }
1451
1452 if (frame_size < 0) {
1453 lr_dst = TMP_REG2;
1454 frame_size = 0;
1455 } else if (frame_size > 0) {
1456 SLJIT_ASSERT(frame_size == 1 || (frame_size & 0x7) == 0);
1457 lr_dst = 0;
1458 frame_size &= ~0x7;
1459 }
1460
1461 if (lr_dst != 0)
1462 reg_list |= (sljit_uw)1 << reg_map[lr_dst];
1463
1464 tmp = SLJIT_S0 - compiler->saveds;
1465 i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
1466 if (tmp < i) {
1467 restored_reg = i;
1468 do {
1469 reg_list |= (sljit_uw)1 << reg_map[i];
1470 } while (--i > tmp);
1471 }
1472
1473 i = compiler->scratches;
1474 if (i >= SLJIT_FIRST_SAVED_REG) {
1475 restored_reg = i;
1476 do {
1477 reg_list |= (sljit_uw)1 << reg_map[i];
1478 } while (--i >= SLJIT_FIRST_SAVED_REG);
1479 }
1480
1481 if (lr_dst == TMP_REG2 && reg_list == 0) {
1482 restored_reg = TMP_REG2;
1483 lr_dst = 0;
1484 }
1485
1486 if (lr_dst == 0 && (reg_list & (reg_list - 1)) == 0) {
1487 /* The local_size does not include the saved registers. */
1488 tmp = 0;
1489 if (reg_list != 0) {
1490 tmp = 2;
1491 if (local_size <= 0xfff) {
1492 if (local_size == 0) {
1493 SLJIT_ASSERT(restored_reg != TMP_REG2);
1494 if (frame_size == 0)
1495 return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(restored_reg) | 0x800008);
1496 if (frame_size > 2 * SSIZE_OF(sw))
1497 return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(restored_reg) | (sljit_ins)(frame_size - (2 * SSIZE_OF(sw))));
1498 }
1499
1500 FAIL_IF(push_inst(compiler, LDR | 0x800000 | RN(SLJIT_SP) | RD(restored_reg) | (sljit_ins)local_size));
1501 tmp = 1;
1502 } else if (frame_size == 0) {
1503 frame_size = (restored_reg == TMP_REG2) ? SSIZE_OF(sw) : 2 * SSIZE_OF(sw);
1504 tmp = 3;
1505 }
1506
1507 /* Place for the saved register. */
1508 if (restored_reg != TMP_REG2)
1509 local_size += SSIZE_OF(sw);
1510 }
1511
1512 /* Place for the lr register. */
1513 local_size += SSIZE_OF(sw);
1514
1515 if (frame_size > local_size)
1516 FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 25) | (sljit_ins)(frame_size - local_size)));
1517 else if (frame_size < local_size)
1518 FAIL_IF(emit_add_sp(compiler, (sljit_uw)(local_size - frame_size)));
1519
1520 if (tmp <= 1)
1521 return SLJIT_SUCCESS;
1522
1523 if (tmp == 2) {
1524 frame_size -= SSIZE_OF(sw);
1525 if (restored_reg != TMP_REG2)
1526 frame_size -= SSIZE_OF(sw);
1527
1528 return push_inst(compiler, LDR | 0x800000 | RN(SLJIT_SP) | RD(restored_reg) | (sljit_ins)frame_size);
1529 }
1530
1531 tmp = (restored_reg == TMP_REG2) ? 0x800004 : 0x800008;
1532 return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(restored_reg) | (sljit_ins)tmp);
1533 }
1534
1535 if (local_size > 0)
1536 FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size));
1537
1538 /* Pop saved and temporary registers
1539 multiple registers: ldmia sp!, {...}
1540 single register: ldr reg, [sp], #4 */
1541 if ((reg_list & (reg_list - 1)) == 0) {
1542 SLJIT_ASSERT(lr_dst != 0);
1543 SLJIT_ASSERT(reg_list == (sljit_uw)1 << reg_map[lr_dst]);
1544
1545 return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(lr_dst) | 0x800004);
1546 }
1547
1548 FAIL_IF(push_inst(compiler, POP | reg_list));
1549
1550 if (frame_size > 0)
1551 return push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 25) | ((sljit_ins)frame_size - sizeof(sljit_sw)));
1552
1553 if (lr_dst != 0)
1554 return SLJIT_SUCCESS;
1555
1556 return push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 25) | sizeof(sljit_sw));
1557 }
1558
sljit_emit_return_void(struct sljit_compiler * compiler)1559 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
1560 {
1561 CHECK_ERROR();
1562 CHECK(check_sljit_emit_return_void(compiler));
1563
1564 return emit_stack_frame_release(compiler, 0);
1565 }
1566
sljit_emit_return_to(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)1567 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
1568 sljit_s32 src, sljit_sw srcw)
1569 {
1570 CHECK_ERROR();
1571 CHECK(check_sljit_emit_return_to(compiler, src, srcw));
1572
1573 if (src & SLJIT_MEM) {
1574 FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
1575 src = TMP_REG1;
1576 srcw = 0;
1577 } else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
1578 FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src)));
1579 src = TMP_REG1;
1580 srcw = 0;
1581 }
1582
1583 FAIL_IF(emit_stack_frame_release(compiler, 1));
1584
1585 SLJIT_SKIP_CHECKS(compiler);
1586 return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1587 }
1588
1589 /* --------------------------------------------------------------------- */
1590 /* Operators */
1591 /* --------------------------------------------------------------------- */
1592
emit_single_op(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 flags,sljit_uw dst,sljit_uw src1,sljit_uw src2)1593 static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
1594 sljit_uw dst, sljit_uw src1, sljit_uw src2)
1595 {
1596 sljit_s32 reg, is_masked;
1597 sljit_uw shift_type;
1598
1599 switch (op) {
1600 case SLJIT_MOV:
1601 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
1602 if (dst != src2) {
1603 if (src2 & SRC2_IMM) {
1604 return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2);
1605 }
1606 return push_inst(compiler, MOV | RD(dst) | RM(src2));
1607 }
1608 return SLJIT_SUCCESS;
1609
1610 case SLJIT_MOV_U8:
1611 case SLJIT_MOV_S8:
1612 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
1613 if (flags & REGISTER_OP)
1614 return push_inst(compiler, (op == SLJIT_MOV_U8 ? UXTB : SXTB) | RD(dst) | RM(src2));
1615
1616 if (dst != src2) {
1617 SLJIT_ASSERT(src2 & SRC2_IMM);
1618 return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2);
1619 }
1620 return SLJIT_SUCCESS;
1621
1622 case SLJIT_MOV_U16:
1623 case SLJIT_MOV_S16:
1624 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
1625 if (flags & REGISTER_OP)
1626 return push_inst(compiler, (op == SLJIT_MOV_U16 ? UXTH : SXTH) | RD(dst) | RM(src2));
1627
1628 if (dst != src2) {
1629 SLJIT_ASSERT(src2 & SRC2_IMM);
1630 return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2);
1631 }
1632 return SLJIT_SUCCESS;
1633
1634 case SLJIT_CLZ:
1635 SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM));
1636 FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(src2)));
1637 return SLJIT_SUCCESS;
1638
1639 case SLJIT_CTZ:
1640 SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM));
1641 SLJIT_ASSERT(src1 == TMP_REG1 && src2 != TMP_REG2 && !(flags & ARGS_SWAPPED));
1642 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
1643 FAIL_IF(push_inst(compiler, RSB | SRC2_IMM | RD(TMP_REG2) | RN(src2) | 0));
1644 FAIL_IF(push_inst(compiler, AND | RD(TMP_REG1) | RN(src2) | RM(TMP_REG2)));
1645 FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(TMP_REG1)));
1646 FAIL_IF(push_inst(compiler, CMP | SET_FLAGS | SRC2_IMM | RN(dst) | 32));
1647 return push_inst(compiler, (EOR ^ 0xf0000000) | SRC2_IMM | RD(dst) | RN(dst) | 0x1f);
1648 #else /* !SLJIT_CONFIG_ARM_V6 */
1649 FAIL_IF(push_inst(compiler, RBIT | RD(dst) | RM(src2)));
1650 return push_inst(compiler, CLZ | RD(dst) | RM(dst));
1651 #endif /* SLJIT_CONFIG_ARM_V6 */
1652
1653 case SLJIT_REV:
1654 case SLJIT_REV_U32:
1655 case SLJIT_REV_S32:
1656 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
1657 return push_inst(compiler, REV | RD(dst) | RM(src2));
1658
1659 case SLJIT_REV_U16:
1660 case SLJIT_REV_S16:
1661 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
1662 FAIL_IF(push_inst(compiler, REV16 | RD(dst) | RM(src2)));
1663 if (!(flags & REGISTER_OP))
1664 return SLJIT_SUCCESS;
1665 return push_inst(compiler, (op == SLJIT_REV_U16 ? UXTH : SXTH) | RD(dst) | RM(dst));
1666 case SLJIT_ADD:
1667 SLJIT_ASSERT(!(flags & INV_IMM));
1668
1669 if ((flags & (UNUSED_RETURN | ARGS_SWAPPED)) == UNUSED_RETURN)
1670 return push_inst(compiler, CMN | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1671 return push_inst(compiler, ADD | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1672
1673 case SLJIT_ADDC:
1674 SLJIT_ASSERT(!(flags & INV_IMM));
1675 return push_inst(compiler, ADC | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1676
1677 case SLJIT_SUB:
1678 SLJIT_ASSERT(!(flags & INV_IMM));
1679
1680 if ((flags & (UNUSED_RETURN | ARGS_SWAPPED)) == UNUSED_RETURN)
1681 return push_inst(compiler, CMP | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1682
1683 return push_inst(compiler, (!(flags & ARGS_SWAPPED) ? SUB : RSB) | (flags & SET_FLAGS)
1684 | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1685
1686 case SLJIT_SUBC:
1687 SLJIT_ASSERT(!(flags & INV_IMM));
1688 return push_inst(compiler, (!(flags & ARGS_SWAPPED) ? SBC : RSC) | (flags & SET_FLAGS)
1689 | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1690
1691 case SLJIT_MUL:
1692 SLJIT_ASSERT(!(flags & INV_IMM));
1693 SLJIT_ASSERT(!(src2 & SRC2_IMM));
1694 compiler->status_flags_state = 0;
1695
1696 if (!(flags & SET_FLAGS))
1697 return push_inst(compiler, MUL | RN(dst) | RM8(src2) | RM(src1));
1698
1699 reg = dst == TMP_REG1 ? TMP_REG2 : TMP_REG1;
1700 FAIL_IF(push_inst(compiler, SMULL | RN(reg) | RD(dst) | RM8(src2) | RM(src1)));
1701
1702 /* cmp TMP_REG1, dst asr #31. */
1703 return push_inst(compiler, CMP | SET_FLAGS | RN(reg) | RM(dst) | 0xfc0);
1704
1705 case SLJIT_AND:
1706 if ((flags & (UNUSED_RETURN | INV_IMM)) == UNUSED_RETURN)
1707 return push_inst(compiler, TST | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1708 return push_inst(compiler, (!(flags & INV_IMM) ? AND : BIC) | (flags & SET_FLAGS)
1709 | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1710
1711 case SLJIT_OR:
1712 SLJIT_ASSERT(!(flags & INV_IMM));
1713 return push_inst(compiler, ORR | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1714
1715 case SLJIT_XOR:
1716 if (flags & INV_IMM) {
1717 SLJIT_ASSERT(src2 == SRC2_IMM);
1718 return push_inst(compiler, MVN | (flags & SET_FLAGS) | RD(dst) | RM(src1));
1719 }
1720 return push_inst(compiler, EOR | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1721
1722 case SLJIT_SHL:
1723 case SLJIT_MSHL:
1724 shift_type = 0;
1725 is_masked = op == SLJIT_MSHL;
1726 break;
1727
1728 case SLJIT_LSHR:
1729 case SLJIT_MLSHR:
1730 shift_type = 1;
1731 is_masked = op == SLJIT_MLSHR;
1732 break;
1733
1734 case SLJIT_ASHR:
1735 case SLJIT_MASHR:
1736 shift_type = 2;
1737 is_masked = op == SLJIT_MASHR;
1738 break;
1739
1740 case SLJIT_ROTL:
1741 if (compiler->shift_imm == 0x20) {
1742 FAIL_IF(push_inst(compiler, RSB | SRC2_IMM | RD(TMP_REG2) | RN(src2) | 0));
1743 src2 = TMP_REG2;
1744 } else
1745 compiler->shift_imm = (sljit_uw)(-(sljit_sw)compiler->shift_imm) & 0x1f;
1746 /* fallthrough */
1747
1748 case SLJIT_ROTR:
1749 shift_type = 3;
1750 is_masked = 0;
1751 break;
1752
1753 case SLJIT_MULADD:
1754 return push_inst(compiler, MLA | RN(dst) | RD(dst) | RM8(src2) | RM(src1));
1755
1756 default:
1757 SLJIT_UNREACHABLE();
1758 return SLJIT_SUCCESS;
1759 }
1760
1761 SLJIT_ASSERT(!(flags & ARGS_SWAPPED) && !(flags & INV_IMM) && !(src2 & SRC2_IMM));
1762
1763 if (compiler->shift_imm != 0x20) {
1764 SLJIT_ASSERT(src1 == TMP_REG1);
1765
1766 if (compiler->shift_imm != 0)
1767 return push_inst(compiler, MOV | (flags & SET_FLAGS) |
1768 RD(dst) | (compiler->shift_imm << 7) | (shift_type << 5) | RM(src2));
1769 return push_inst(compiler, MOV | (flags & SET_FLAGS) | RD(dst) | RM(src2));
1770 }
1771
1772 SLJIT_ASSERT(src1 != TMP_REG2);
1773
1774 if (is_masked) {
1775 FAIL_IF(push_inst(compiler, AND | RD(TMP_REG2) | RN(src2) | SRC2_IMM | 0x1f));
1776 src2 = TMP_REG2;
1777 }
1778
1779 return push_inst(compiler, MOV | (flags & SET_FLAGS) | RD(dst)
1780 | RM8(src2) | (sljit_ins)(shift_type << 5) | 0x10 | RM(src1));
1781 }
1782
1783 #undef EMIT_SHIFT_INS_AND_RETURN
1784
1785 /* Tests whether the immediate can be stored in the 12 bit imm field.
1786 Returns with 0 if not possible. */
get_imm(sljit_uw imm)1787 static sljit_uw get_imm(sljit_uw imm)
1788 {
1789 sljit_u32 rol;
1790
1791 if (imm <= 0xff)
1792 return SRC2_IMM | imm;
1793
1794 if (!(imm & 0xff000000)) {
1795 imm <<= 8;
1796 rol = 8;
1797 } else {
1798 imm = (imm << 24) | (imm >> 8);
1799 rol = 0;
1800 }
1801
1802 if (!(imm & 0xff000000)) {
1803 imm <<= 8;
1804 rol += 4;
1805 }
1806
1807 if (!(imm & 0xf0000000)) {
1808 imm <<= 4;
1809 rol += 2;
1810 }
1811
1812 if (!(imm & 0xc0000000)) {
1813 imm <<= 2;
1814 rol += 1;
1815 }
1816
1817 if (!(imm & 0x00ffffff))
1818 return SRC2_IMM | (imm >> 24) | (rol << 8);
1819 return 0;
1820 }
1821
compute_imm(sljit_uw imm,sljit_uw * imm2)1822 static sljit_uw compute_imm(sljit_uw imm, sljit_uw* imm2)
1823 {
1824 sljit_uw mask;
1825 sljit_uw imm1;
1826 sljit_uw rol;
1827
1828 /* Step1: Search a zero byte (8 continous zero bit). */
1829 mask = 0xff000000;
1830 rol = 8;
1831 while (1) {
1832 if (!(imm & mask)) {
1833 /* Rol imm by rol. */
1834 imm = (imm << rol) | (imm >> (32 - rol));
1835 /* Calculate arm rol. */
1836 rol = 4 + (rol >> 1);
1837 break;
1838 }
1839
1840 rol += 2;
1841 mask >>= 2;
1842 if (mask & 0x3) {
1843 /* rol by 8. */
1844 imm = (imm << 8) | (imm >> 24);
1845 mask = 0xff00;
1846 rol = 24;
1847 while (1) {
1848 if (!(imm & mask)) {
1849 /* Rol imm by rol. */
1850 imm = (imm << rol) | (imm >> (32 - rol));
1851 /* Calculate arm rol. */
1852 rol = (rol >> 1) - 8;
1853 break;
1854 }
1855 rol += 2;
1856 mask >>= 2;
1857 if (mask & 0x3)
1858 return 0;
1859 }
1860 break;
1861 }
1862 }
1863
1864 /* The low 8 bit must be zero. */
1865 SLJIT_ASSERT(!(imm & 0xff));
1866
1867 if (!(imm & 0xff000000)) {
1868 imm1 = SRC2_IMM | ((imm >> 16) & 0xff) | (((rol + 4) & 0xf) << 8);
1869 *imm2 = SRC2_IMM | ((imm >> 8) & 0xff) | (((rol + 8) & 0xf) << 8);
1870 } else if (imm & 0xc0000000) {
1871 imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8);
1872 imm <<= 8;
1873 rol += 4;
1874
1875 if (!(imm & 0xff000000)) {
1876 imm <<= 8;
1877 rol += 4;
1878 }
1879
1880 if (!(imm & 0xf0000000)) {
1881 imm <<= 4;
1882 rol += 2;
1883 }
1884
1885 if (!(imm & 0xc0000000)) {
1886 imm <<= 2;
1887 rol += 1;
1888 }
1889
1890 if (!(imm & 0x00ffffff))
1891 *imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8);
1892 else
1893 return 0;
1894 } else {
1895 if (!(imm & 0xf0000000)) {
1896 imm <<= 4;
1897 rol += 2;
1898 }
1899
1900 if (!(imm & 0xc0000000)) {
1901 imm <<= 2;
1902 rol += 1;
1903 }
1904
1905 imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8);
1906 imm <<= 8;
1907 rol += 4;
1908
1909 if (!(imm & 0xf0000000)) {
1910 imm <<= 4;
1911 rol += 2;
1912 }
1913
1914 if (!(imm & 0xc0000000)) {
1915 imm <<= 2;
1916 rol += 1;
1917 }
1918
1919 if (!(imm & 0x00ffffff))
1920 *imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8);
1921 else
1922 return 0;
1923 }
1924
1925 return imm1;
1926 }
1927
load_immediate(struct sljit_compiler * compiler,sljit_s32 reg,sljit_uw imm)1928 static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm)
1929 {
1930 sljit_uw tmp;
1931 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
1932 sljit_uw imm1, imm2;
1933 #else /* !SLJIT_CONFIG_ARM_V6 */
1934 if (!(imm & ~(sljit_uw)0xffff))
1935 return push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff));
1936 #endif /* SLJIT_CONFIG_ARM_V6 */
1937
1938 /* Create imm by 1 inst. */
1939 tmp = get_imm(imm);
1940 if (tmp)
1941 return push_inst(compiler, MOV | RD(reg) | tmp);
1942
1943 tmp = get_imm(~imm);
1944 if (tmp)
1945 return push_inst(compiler, MVN | RD(reg) | tmp);
1946
1947 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
1948 /* Create imm by 2 inst. */
1949 imm1 = compute_imm(imm, &imm2);
1950 if (imm1 != 0) {
1951 FAIL_IF(push_inst(compiler, MOV | RD(reg) | imm1));
1952 return push_inst(compiler, ORR | RD(reg) | RN(reg) | imm2);
1953 }
1954
1955 imm1 = compute_imm(~imm, &imm2);
1956 if (imm1 != 0) {
1957 FAIL_IF(push_inst(compiler, MVN | RD(reg) | imm1));
1958 return push_inst(compiler, BIC | RD(reg) | RN(reg) | imm2);
1959 }
1960
1961 /* Load integer. */
1962 return push_inst_with_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, reg, TMP_PC, 0), imm);
1963 #else /* !SLJIT_CONFIG_ARM_V6 */
1964 FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff)));
1965 if (imm <= 0xffff)
1966 return SLJIT_SUCCESS;
1967 return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | ((imm >> 16) & 0xfff));
1968 #endif /* SLJIT_CONFIG_ARM_V6 */
1969 }
1970
emit_op_mem(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw,sljit_s32 tmp_reg)1971 static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg,
1972 sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg)
1973 {
1974 sljit_uw imm, offset_reg, tmp;
1975 sljit_sw mask = IS_TYPE1_TRANSFER(flags) ? 0xfff : 0xff;
1976 sljit_sw sign = IS_TYPE1_TRANSFER(flags) ? 0x1000 : 0x100;
1977
1978 SLJIT_ASSERT(arg & SLJIT_MEM);
1979 SLJIT_ASSERT((arg & REG_MASK) != tmp_reg || (arg == SLJIT_MEM1(tmp_reg) && argw >= -mask && argw <= mask));
1980
1981 if (SLJIT_UNLIKELY(!(arg & REG_MASK))) {
1982 tmp = (sljit_uw)(argw & (sign | mask));
1983 tmp = (sljit_uw)((argw + (tmp <= (sljit_uw)sign ? 0 : sign)) & ~mask);
1984
1985 FAIL_IF(load_immediate(compiler, tmp_reg, tmp));
1986
1987 argw -= (sljit_sw)tmp;
1988 tmp = 1;
1989
1990 if (argw < 0) {
1991 argw = -argw;
1992 tmp = 0;
1993 }
1994
1995 return push_inst(compiler, EMIT_DATA_TRANSFER(flags, tmp, reg, tmp_reg,
1996 (mask == 0xff) ? TYPE2_TRANSFER_IMM(argw) : argw));
1997 }
1998
1999 if (arg & OFFS_REG_MASK) {
2000 offset_reg = OFFS_REG(arg);
2001 arg &= REG_MASK;
2002 argw &= 0x3;
2003
2004 if (argw != 0 && (mask == 0xff)) {
2005 FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | RM(offset_reg) | ((sljit_ins)argw << 7)));
2006 return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, tmp_reg, TYPE2_TRANSFER_IMM(0)));
2007 }
2008
2009 /* Bit 25: RM is offset. */
2010 return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg,
2011 RM(offset_reg) | (mask == 0xff ? 0 : (1 << 25)) | ((sljit_ins)argw << 7)));
2012 }
2013
2014 arg &= REG_MASK;
2015
2016 if (argw > mask) {
2017 tmp = (sljit_uw)(argw & (sign | mask));
2018 tmp = (sljit_uw)((argw + (tmp <= (sljit_uw)sign ? 0 : sign)) & ~mask);
2019 imm = get_imm(tmp);
2020
2021 if (imm) {
2022 FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | imm));
2023 argw -= (sljit_sw)tmp;
2024 arg = tmp_reg;
2025
2026 SLJIT_ASSERT(argw >= -mask && argw <= mask);
2027 }
2028 } else if (argw < -mask) {
2029 tmp = (sljit_uw)(-argw & (sign | mask));
2030 tmp = (sljit_uw)((-argw + (tmp <= (sljit_uw)sign ? 0 : sign)) & ~mask);
2031 imm = get_imm(tmp);
2032
2033 if (imm) {
2034 FAIL_IF(push_inst(compiler, SUB | RD(tmp_reg) | RN(arg) | imm));
2035 argw += (sljit_sw)tmp;
2036 arg = tmp_reg;
2037
2038 SLJIT_ASSERT(argw >= -mask && argw <= mask);
2039 }
2040 }
2041
2042 if (argw <= mask && argw >= -mask) {
2043 if (argw >= 0) {
2044 if (mask == 0xff)
2045 argw = TYPE2_TRANSFER_IMM(argw);
2046 return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg, argw));
2047 }
2048
2049 argw = -argw;
2050
2051 if (mask == 0xff)
2052 argw = TYPE2_TRANSFER_IMM(argw);
2053
2054 return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 0, reg, arg, argw));
2055 }
2056
2057 FAIL_IF(load_immediate(compiler, tmp_reg, (sljit_uw)argw));
2058 return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg,
2059 RM(tmp_reg) | (mask == 0xff ? 0 : (1 << 25))));
2060 }
2061
emit_op(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 inp_flags,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2062 static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 inp_flags,
2063 sljit_s32 dst, sljit_sw dstw,
2064 sljit_s32 src1, sljit_sw src1w,
2065 sljit_s32 src2, sljit_sw src2w)
2066 {
2067 /* src1 is reg or TMP_REG1
2068 src2 is reg, TMP_REG2, or imm
2069 result goes to TMP_REG2, so put result can use TMP_REG1. */
2070
2071 /* We prefers register and simple consts. */
2072 sljit_s32 dst_reg;
2073 sljit_s32 src1_reg = 0;
2074 sljit_s32 src2_reg = 0;
2075 sljit_s32 src2_tmp_reg = 0;
2076 sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
2077 sljit_s32 neg_op = 0;
2078 sljit_u32 imm2;
2079
2080 op = GET_OPCODE(op);
2081
2082 if (flags & SET_FLAGS)
2083 inp_flags &= ~ALLOW_DOUBLE_IMM;
2084
2085 if (dst == TMP_REG1)
2086 flags |= UNUSED_RETURN;
2087
2088 SLJIT_ASSERT(!(inp_flags & ALLOW_INV_IMM) || (inp_flags & ALLOW_IMM));
2089
2090 if (inp_flags & ALLOW_NEG_IMM) {
2091 switch (op) {
2092 case SLJIT_ADD:
2093 compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
2094 neg_op = SLJIT_SUB;
2095 break;
2096 case SLJIT_ADDC:
2097 compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
2098 neg_op = SLJIT_SUBC;
2099 break;
2100 case SLJIT_SUB:
2101 compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
2102 neg_op = SLJIT_ADD;
2103 break;
2104 case SLJIT_SUBC:
2105 compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
2106 neg_op = SLJIT_ADDC;
2107 break;
2108 }
2109 }
2110
2111 do {
2112 if (!(inp_flags & ALLOW_IMM))
2113 break;
2114
2115 if (src2 == SLJIT_IMM) {
2116 src2_reg = (sljit_s32)get_imm((sljit_uw)src2w);
2117 if (src2_reg)
2118 break;
2119
2120 if (inp_flags & ALLOW_INV_IMM) {
2121 src2_reg = (sljit_s32)get_imm(~(sljit_uw)src2w);
2122 if (src2_reg) {
2123 flags |= INV_IMM;
2124 break;
2125 }
2126 }
2127
2128 if (neg_op != 0) {
2129 src2_reg = (sljit_s32)get_imm((neg_op == SLJIT_ADD || neg_op == SLJIT_SUB) ? (sljit_uw)-src2w : ~(sljit_uw)src2w);
2130 if (src2_reg) {
2131 op = neg_op | GET_ALL_FLAGS(op);
2132 break;
2133 }
2134 }
2135 }
2136
2137 if (src1 == SLJIT_IMM) {
2138 src2_reg = (sljit_s32)get_imm((sljit_uw)src1w);
2139 if (src2_reg) {
2140 flags |= ARGS_SWAPPED;
2141 src1 = src2;
2142 src1w = src2w;
2143 break;
2144 }
2145
2146 if (inp_flags & ALLOW_INV_IMM) {
2147 src2_reg = (sljit_s32)get_imm(~(sljit_uw)src1w);
2148 if (src2_reg) {
2149 flags |= ARGS_SWAPPED | INV_IMM;
2150 src1 = src2;
2151 src1w = src2w;
2152 break;
2153 }
2154 }
2155
2156 if (neg_op >= SLJIT_SUB) {
2157 /* Note: additive operation (commutative). */
2158 SLJIT_ASSERT(op == SLJIT_ADD || op == SLJIT_ADDC);
2159
2160 src2_reg = (sljit_s32)get_imm((sljit_uw)-src1w);
2161 if (src2_reg) {
2162 src1 = src2;
2163 src1w = src2w;
2164 op = neg_op | GET_ALL_FLAGS(op);
2165 break;
2166 }
2167 }
2168 }
2169 } while(0);
2170
2171 /* Destination. */
2172 dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG2;
2173
2174 if (op <= SLJIT_MOV_P) {
2175 if (dst & SLJIT_MEM) {
2176 if (inp_flags & BYTE_SIZE)
2177 inp_flags &= ~SIGNED;
2178
2179 if (FAST_IS_REG(src2))
2180 return emit_op_mem(compiler, inp_flags, src2, dst, dstw, TMP_REG1);
2181 }
2182
2183 if (FAST_IS_REG(src2) && dst_reg != TMP_REG2)
2184 flags |= REGISTER_OP;
2185
2186 src2_tmp_reg = dst_reg;
2187 } else {
2188 if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16) {
2189 if (!(dst & SLJIT_MEM) && (!(src2 & SLJIT_MEM) || op == SLJIT_REV_S16))
2190 flags |= REGISTER_OP;
2191 }
2192
2193 src2_tmp_reg = FAST_IS_REG(src1) ? TMP_REG1 : TMP_REG2;
2194 }
2195
2196 if (src2_reg == 0 && (src2 & SLJIT_MEM)) {
2197 src2_reg = src2_tmp_reg;
2198 FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, src2_reg, src2, src2w, TMP_REG1));
2199 }
2200
2201 /* Source 1. */
2202 if (FAST_IS_REG(src1))
2203 src1_reg = src1;
2204 else if (src1 & SLJIT_MEM) {
2205 FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1));
2206 src1_reg = TMP_REG1;
2207 } else if (!(inp_flags & ALLOW_DOUBLE_IMM) || src2_reg != 0 || op == SLJIT_SUB || op == SLJIT_SUBC) {
2208 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w));
2209 src1_reg = TMP_REG1;
2210 }
2211
2212 /* Source 2. */
2213 if (src2_reg == 0) {
2214 src2_reg = src2_tmp_reg;
2215
2216 if (FAST_IS_REG(src2))
2217 src2_reg = src2;
2218 else if (!(inp_flags & ALLOW_DOUBLE_IMM))
2219 FAIL_IF(load_immediate(compiler, src2_reg, (sljit_uw)src2w));
2220 else {
2221 SLJIT_ASSERT(!(flags & SET_FLAGS));
2222
2223 if (src1_reg == 0) {
2224 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w));
2225 src1_reg = TMP_REG1;
2226 }
2227
2228 src2_reg = (sljit_s32)compute_imm((sljit_uw)src2w, &imm2);
2229
2230 if (src2_reg == 0 && neg_op != 0) {
2231 src2_reg = (sljit_s32)compute_imm((sljit_uw)-src2w, &imm2);
2232 if (src2_reg != 0)
2233 op = neg_op;
2234 }
2235
2236 if (src2_reg == 0) {
2237 FAIL_IF(load_immediate(compiler, src2_tmp_reg, (sljit_uw)src2w));
2238 src2_reg = src2_tmp_reg;
2239 } else {
2240 FAIL_IF(emit_single_op(compiler, op, flags, (sljit_uw)dst_reg, (sljit_uw)src1_reg, (sljit_uw)src2_reg));
2241 src1_reg = dst_reg;
2242 src2_reg = (sljit_s32)imm2;
2243
2244 if (op == SLJIT_ADDC)
2245 op = SLJIT_ADD;
2246 else if (op == SLJIT_SUBC)
2247 op = SLJIT_SUB;
2248 }
2249 }
2250 }
2251
2252 if (src1_reg == 0) {
2253 SLJIT_ASSERT((inp_flags & ALLOW_DOUBLE_IMM) && !(flags & SET_FLAGS));
2254
2255 src1_reg = (sljit_s32)compute_imm((sljit_uw)src1w, &imm2);
2256
2257 if (src1_reg == 0 && neg_op != 0) {
2258 src1_reg = (sljit_s32)compute_imm((sljit_uw)-src1w, &imm2);
2259 if (src1_reg != 0)
2260 op = neg_op;
2261 }
2262
2263 if (src1_reg == 0) {
2264 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w));
2265 src1_reg = TMP_REG1;
2266 } else {
2267 FAIL_IF(emit_single_op(compiler, op, flags, (sljit_uw)dst_reg, (sljit_uw)src2_reg, (sljit_uw)src1_reg));
2268 src1_reg = dst_reg;
2269 src2_reg = (sljit_s32)imm2;
2270
2271 if (op == SLJIT_ADDC)
2272 op = SLJIT_ADD;
2273 }
2274 }
2275
2276 FAIL_IF(emit_single_op(compiler, op, flags, (sljit_uw)dst_reg, (sljit_uw)src1_reg, (sljit_uw)src2_reg));
2277
2278 if (!(dst & SLJIT_MEM))
2279 return SLJIT_SUCCESS;
2280
2281 return emit_op_mem(compiler, inp_flags, dst_reg, dst, dstw, TMP_REG1);
2282 }
2283
2284 #ifdef __cplusplus
2285 extern "C" {
2286 #endif
2287
2288 #if defined(__GNUC__)
2289 extern unsigned int __aeabi_uidivmod(unsigned int numerator, unsigned int denominator);
2290 extern int __aeabi_idivmod(int numerator, int denominator);
2291 #else
2292 #error "Software divmod functions are needed"
2293 #endif
2294
2295 #ifdef __cplusplus
2296 }
2297 #endif
2298
sljit_emit_op0(struct sljit_compiler * compiler,sljit_s32 op)2299 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
2300 {
2301 sljit_uw saved_reg_list[3];
2302 sljit_sw saved_reg_count;
2303
2304 CHECK_ERROR();
2305 CHECK(check_sljit_emit_op0(compiler, op));
2306
2307 op = GET_OPCODE(op);
2308 switch (op) {
2309 case SLJIT_BREAKPOINT:
2310 FAIL_IF(push_inst(compiler, BKPT));
2311 break;
2312 case SLJIT_NOP:
2313 FAIL_IF(push_inst(compiler, NOP));
2314 break;
2315 case SLJIT_LMUL_UW:
2316 case SLJIT_LMUL_SW:
2317 return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULL : SMULL)
2318 | RN(SLJIT_R1) | RD(SLJIT_R0) | RM8(SLJIT_R0) | RM(SLJIT_R1));
2319 case SLJIT_DIVMOD_UW:
2320 case SLJIT_DIVMOD_SW:
2321 case SLJIT_DIV_UW:
2322 case SLJIT_DIV_SW:
2323 SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
2324 SLJIT_ASSERT(reg_map[2] == 1 && reg_map[3] == 2 && reg_map[4] == 3);
2325
2326 saved_reg_count = 0;
2327 if (compiler->scratches >= 4)
2328 saved_reg_list[saved_reg_count++] = 3;
2329 if (compiler->scratches >= 3)
2330 saved_reg_list[saved_reg_count++] = 2;
2331 if (op >= SLJIT_DIV_UW)
2332 saved_reg_list[saved_reg_count++] = 1;
2333
2334 if (saved_reg_count > 0) {
2335 FAIL_IF(push_inst(compiler, STR | 0x2d0000 | (saved_reg_count >= 3 ? 16 : 8)
2336 | (saved_reg_list[0] << 12) /* str rX, [sp, #-8/-16]! */));
2337 if (saved_reg_count >= 2) {
2338 SLJIT_ASSERT(saved_reg_list[1] < 8);
2339 FAIL_IF(push_inst(compiler, STR | 0x8d0004 | (saved_reg_list[1] << 12) /* str rX, [sp, #4] */));
2340 }
2341 if (saved_reg_count >= 3) {
2342 SLJIT_ASSERT(saved_reg_list[2] < 8);
2343 FAIL_IF(push_inst(compiler, STR | 0x8d0008 | (saved_reg_list[2] << 12) /* str rX, [sp, #8] */));
2344 }
2345 }
2346
2347 #if defined(__GNUC__)
2348 FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
2349 ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_ADDR(__aeabi_uidivmod) : SLJIT_FUNC_ADDR(__aeabi_idivmod))));
2350 #else
2351 #error "Software divmod functions are needed"
2352 #endif
2353
2354 if (saved_reg_count > 0) {
2355 if (saved_reg_count >= 3) {
2356 SLJIT_ASSERT(saved_reg_list[2] < 8);
2357 FAIL_IF(push_inst(compiler, LDR | 0x8d0008 | (saved_reg_list[2] << 12) /* ldr rX, [sp, #8] */));
2358 }
2359 if (saved_reg_count >= 2) {
2360 SLJIT_ASSERT(saved_reg_list[1] < 8);
2361 FAIL_IF(push_inst(compiler, LDR | 0x8d0004 | (saved_reg_list[1] << 12) /* ldr rX, [sp, #4] */));
2362 }
2363 return push_inst(compiler, (LDR ^ (1 << 24)) | 0x8d0000 | (sljit_ins)(saved_reg_count >= 3 ? 16 : 8)
2364 | (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */);
2365 }
2366 return SLJIT_SUCCESS;
2367 case SLJIT_ENDBR:
2368 case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
2369 return SLJIT_SUCCESS;
2370 }
2371
2372 return SLJIT_SUCCESS;
2373 }
2374
sljit_emit_op1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2375 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
2376 sljit_s32 dst, sljit_sw dstw,
2377 sljit_s32 src, sljit_sw srcw)
2378 {
2379 CHECK_ERROR();
2380 CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
2381 ADJUST_LOCAL_OFFSET(dst, dstw);
2382 ADJUST_LOCAL_OFFSET(src, srcw);
2383
2384 switch (GET_OPCODE(op)) {
2385 case SLJIT_MOV:
2386 case SLJIT_MOV_U32:
2387 case SLJIT_MOV_S32:
2388 case SLJIT_MOV32:
2389 case SLJIT_MOV_P:
2390 return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw);
2391
2392 case SLJIT_MOV_U8:
2393 return emit_op(compiler, SLJIT_MOV_U8, ALLOW_ANY_IMM | BYTE_SIZE, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u8)srcw : srcw);
2394
2395 case SLJIT_MOV_S8:
2396 return emit_op(compiler, SLJIT_MOV_S8, ALLOW_ANY_IMM | SIGNED | BYTE_SIZE, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s8)srcw : srcw);
2397
2398 case SLJIT_MOV_U16:
2399 return emit_op(compiler, SLJIT_MOV_U16, ALLOW_ANY_IMM | HALF_SIZE, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u16)srcw : srcw);
2400
2401 case SLJIT_MOV_S16:
2402 return emit_op(compiler, SLJIT_MOV_S16, ALLOW_ANY_IMM | SIGNED | HALF_SIZE, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s16)srcw : srcw);
2403
2404 case SLJIT_CLZ:
2405 case SLJIT_CTZ:
2406 case SLJIT_REV:
2407 case SLJIT_REV_U32:
2408 case SLJIT_REV_S32:
2409 return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw);
2410
2411 case SLJIT_REV_U16:
2412 case SLJIT_REV_S16:
2413 return emit_op(compiler, op, HALF_SIZE, dst, dstw, TMP_REG1, 0, src, srcw);
2414 }
2415
2416 return SLJIT_SUCCESS;
2417 }
2418
sljit_emit_op2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2419 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2420 sljit_s32 dst, sljit_sw dstw,
2421 sljit_s32 src1, sljit_sw src1w,
2422 sljit_s32 src2, sljit_sw src2w)
2423 {
2424 sljit_s32 inp_flags;
2425
2426 CHECK_ERROR();
2427 CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
2428 ADJUST_LOCAL_OFFSET(dst, dstw);
2429 ADJUST_LOCAL_OFFSET(src1, src1w);
2430 ADJUST_LOCAL_OFFSET(src2, src2w);
2431
2432 switch (GET_OPCODE(op)) {
2433 case SLJIT_ADD:
2434 case SLJIT_ADDC:
2435 case SLJIT_SUB:
2436 case SLJIT_SUBC:
2437 return emit_op(compiler, op, ALLOW_IMM | ALLOW_NEG_IMM | ALLOW_DOUBLE_IMM, dst, dstw, src1, src1w, src2, src2w);
2438
2439 case SLJIT_OR:
2440 return emit_op(compiler, op, ALLOW_IMM | ALLOW_DOUBLE_IMM, dst, dstw, src1, src1w, src2, src2w);
2441
2442 case SLJIT_XOR:
2443 inp_flags = ALLOW_IMM | ALLOW_DOUBLE_IMM;
2444 if ((src1 == SLJIT_IMM && src1w == -1) || (src2 == SLJIT_IMM && src2w == -1)) {
2445 inp_flags |= ALLOW_INV_IMM;
2446 }
2447 return emit_op(compiler, op, inp_flags, dst, dstw, src1, src1w, src2, src2w);
2448
2449 case SLJIT_MUL:
2450 return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w);
2451
2452 case SLJIT_AND:
2453 return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, src1, src1w, src2, src2w);
2454
2455 case SLJIT_SHL:
2456 case SLJIT_MSHL:
2457 case SLJIT_LSHR:
2458 case SLJIT_MLSHR:
2459 case SLJIT_ASHR:
2460 case SLJIT_MASHR:
2461 case SLJIT_ROTL:
2462 case SLJIT_ROTR:
2463 if (src2 == SLJIT_IMM) {
2464 compiler->shift_imm = src2w & 0x1f;
2465 return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src1, src1w);
2466 } else {
2467 compiler->shift_imm = 0x20;
2468 return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w);
2469 }
2470 }
2471
2472 return SLJIT_SUCCESS;
2473 }
2474
sljit_emit_op2u(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2475 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
2476 sljit_s32 src1, sljit_sw src1w,
2477 sljit_s32 src2, sljit_sw src2w)
2478 {
2479 CHECK_ERROR();
2480 CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
2481
2482 SLJIT_SKIP_CHECKS(compiler);
2483 return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w);
2484 }
2485
sljit_emit_op2r(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2486 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op,
2487 sljit_s32 dst_reg,
2488 sljit_s32 src1, sljit_sw src1w,
2489 sljit_s32 src2, sljit_sw src2w)
2490 {
2491 CHECK_ERROR();
2492 CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w));
2493 ADJUST_LOCAL_OFFSET(src1, src1w);
2494 ADJUST_LOCAL_OFFSET(src2, src2w);
2495
2496 switch (GET_OPCODE(op)) {
2497 case SLJIT_MULADD:
2498 return emit_op(compiler, op, 0, dst_reg, 0, src1, src1w, src2, src2w);
2499 }
2500
2501 return SLJIT_SUCCESS;
2502 }
2503
sljit_emit_shift_into(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 src1_reg,sljit_s32 src2_reg,sljit_s32 src3,sljit_sw src3w)2504 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
2505 sljit_s32 dst_reg,
2506 sljit_s32 src1_reg,
2507 sljit_s32 src2_reg,
2508 sljit_s32 src3, sljit_sw src3w)
2509 {
2510 sljit_s32 is_left;
2511
2512 CHECK_ERROR();
2513 CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
2514
2515 op = GET_OPCODE(op);
2516 is_left = (op == SLJIT_SHL || op == SLJIT_MSHL);
2517
2518 if (src1_reg == src2_reg) {
2519 SLJIT_SKIP_CHECKS(compiler);
2520 return sljit_emit_op2(compiler, is_left ? SLJIT_ROTL : SLJIT_ROTR, dst_reg, 0, src1_reg, 0, src3, src3w);
2521 }
2522
2523 ADJUST_LOCAL_OFFSET(src3, src3w);
2524
2525 /* Shift type of ROR is 3. */
2526 if (src3 == SLJIT_IMM) {
2527 src3w &= 0x1f;
2528
2529 if (src3w == 0)
2530 return SLJIT_SUCCESS;
2531
2532 FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | RM(src1_reg) | ((sljit_ins)(is_left ? 0 : 1) << 5) | ((sljit_ins)src3w << 7)));
2533 src3w = (src3w ^ 0x1f) + 1;
2534 return push_inst(compiler, ORR | RD(dst_reg) | RN(dst_reg) | RM(src2_reg) | ((sljit_ins)(is_left ? 1 : 0) << 5) | ((sljit_ins)src3w << 7));
2535 }
2536
2537 if (src3 & SLJIT_MEM) {
2538 FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, src3, src3w, TMP_REG2));
2539 src3 = TMP_REG2;
2540 }
2541
2542 if (op == SLJIT_MSHL || op == SLJIT_MLSHR || dst_reg == src3) {
2543 FAIL_IF(push_inst(compiler, AND | SRC2_IMM | RD(TMP_REG2) | RN(src3) | 0x1f));
2544 src3 = TMP_REG2;
2545 }
2546
2547 FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | RM8(src3) | ((sljit_ins)(is_left ? 0 : 1) << 5) | 0x10 | RM(src1_reg)));
2548 FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src2_reg) | ((sljit_ins)(is_left ? 1 : 0) << 5) | (1 << 7)));
2549 FAIL_IF(push_inst(compiler, EOR | SRC2_IMM | RD(TMP_REG2) | RN(src3) | 0x1f));
2550 return push_inst(compiler, ORR | RD(dst_reg) | RN(dst_reg) | RM8(TMP_REG2) | ((sljit_ins)(is_left ? 1 : 0) << 5) | 0x10 | RM(TMP_REG1));
2551 }
2552
sljit_emit_op_src(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)2553 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
2554 sljit_s32 src, sljit_sw srcw)
2555 {
2556 CHECK_ERROR();
2557 CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
2558 ADJUST_LOCAL_OFFSET(src, srcw);
2559
2560 switch (op) {
2561 case SLJIT_FAST_RETURN:
2562 SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
2563
2564 if (FAST_IS_REG(src))
2565 FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG2) | RM(src)));
2566 else
2567 FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, src, srcw, TMP_REG1));
2568
2569 return push_inst(compiler, BX | RM(TMP_REG2));
2570 case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
2571 return SLJIT_SUCCESS;
2572 case SLJIT_PREFETCH_L1:
2573 case SLJIT_PREFETCH_L2:
2574 case SLJIT_PREFETCH_L3:
2575 case SLJIT_PREFETCH_ONCE:
2576 SLJIT_ASSERT(src & SLJIT_MEM);
2577 return emit_op_mem(compiler, PRELOAD | LOAD_DATA, TMP_PC, src, srcw, TMP_REG1);
2578 }
2579
2580 return SLJIT_SUCCESS;
2581 }
2582
sljit_emit_op_dst(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw)2583 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
2584 sljit_s32 dst, sljit_sw dstw)
2585 {
2586 sljit_s32 size, dst_r;
2587
2588 CHECK_ERROR();
2589 CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
2590 ADJUST_LOCAL_OFFSET(dst, dstw);
2591
2592 switch (op) {
2593 case SLJIT_FAST_ENTER:
2594 SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
2595
2596 if (FAST_IS_REG(dst))
2597 return push_inst(compiler, MOV | RD(dst) | RM(TMP_REG2));
2598 break;
2599 case SLJIT_GET_RETURN_ADDRESS:
2600 size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds - SLJIT_KEPT_SAVEDS_COUNT(compiler->options), 0);
2601
2602 if (compiler->fsaveds > 0 || compiler->fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
2603 /* The size of pc is not added above. */
2604 if ((size & SSIZE_OF(sw)) == 0)
2605 size += SSIZE_OF(sw);
2606
2607 size += GET_SAVED_FLOAT_REGISTERS_SIZE(compiler->fscratches, compiler->fsaveds, f64);
2608 }
2609
2610 SLJIT_ASSERT(((compiler->local_size + size + SSIZE_OF(sw)) & 0x7) == 0);
2611
2612 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
2613 FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size + size, TMP_REG1));
2614 break;
2615 }
2616
2617 if (dst & SLJIT_MEM)
2618 return emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1);
2619
2620 return SLJIT_SUCCESS;
2621 }
2622
sljit_get_register_index(sljit_s32 type,sljit_s32 reg)2623 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
2624 {
2625 CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
2626
2627 if (type == SLJIT_GP_REGISTER)
2628 return reg_map[reg];
2629
2630 if (type == SLJIT_FLOAT_REGISTER || type == SLJIT_SIMD_REG_64)
2631 return freg_map[reg];
2632
2633 if (type != SLJIT_SIMD_REG_128)
2634 return freg_map[reg] & ~0x1;
2635
2636 return -1;
2637 }
2638
sljit_emit_op_custom(struct sljit_compiler * compiler,void * instruction,sljit_u32 size)2639 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2640 void *instruction, sljit_u32 size)
2641 {
2642 SLJIT_UNUSED_ARG(size);
2643 CHECK_ERROR();
2644 CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2645
2646 return push_inst(compiler, *(sljit_ins*)instruction);
2647 }
2648
2649 /* --------------------------------------------------------------------- */
2650 /* Floating point operators */
2651 /* --------------------------------------------------------------------- */
2652
2653 #define FPU_LOAD (1 << 20)
2654 #define EMIT_FPU_DATA_TRANSFER(inst, add, base, freg, offs) \
2655 ((inst) | (sljit_ins)((add) << 23) | RN(base) | VD(freg) | (sljit_ins)(offs))
2656
emit_fop_mem(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw)2657 static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
2658 {
2659 sljit_uw imm;
2660 sljit_ins inst = VSTR_F32 | (flags & (SLJIT_32 | FPU_LOAD));
2661
2662 SLJIT_ASSERT(arg & SLJIT_MEM);
2663 arg &= ~SLJIT_MEM;
2664
2665 if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
2666 FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (((sljit_ins)argw & 0x3) << 7)));
2667 arg = TMP_REG1;
2668 argw = 0;
2669 }
2670
2671 /* Fast loads and stores. */
2672 if (arg) {
2673 if (!(argw & ~0x3fc))
2674 return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, arg & REG_MASK, reg, argw >> 2));
2675 if (!(-argw & ~0x3fc))
2676 return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, arg & REG_MASK, reg, (-argw) >> 2));
2677
2678 imm = get_imm((sljit_uw)argw & ~(sljit_uw)0x3fc);
2679 if (imm) {
2680 FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | imm));
2681 return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG1, reg, (argw & 0x3fc) >> 2));
2682 }
2683 imm = get_imm((sljit_uw)-argw & ~(sljit_uw)0x3fc);
2684 if (imm) {
2685 argw = -argw;
2686 FAIL_IF(push_inst(compiler, SUB | RD(TMP_REG1) | RN(arg & REG_MASK) | imm));
2687 return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG1, reg, (argw & 0x3fc) >> 2));
2688 }
2689 }
2690
2691 if (arg) {
2692 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)argw));
2693 FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | RM(TMP_REG1)));
2694 }
2695 else
2696 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)argw));
2697
2698 return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG1, reg, 0));
2699 }
2700
sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2701 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
2702 sljit_s32 dst, sljit_sw dstw,
2703 sljit_s32 src, sljit_sw srcw)
2704 {
2705 op ^= SLJIT_32;
2706
2707 if (src & SLJIT_MEM) {
2708 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src, srcw));
2709 src = TMP_FREG1;
2710 }
2711
2712 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_S32_F32, op & SLJIT_32, TMP_FREG1, src, 0)));
2713
2714 if (FAST_IS_REG(dst))
2715 return push_inst(compiler, VMOV | (1 << 20) | RD(dst) | VN(TMP_FREG1));
2716
2717 /* Store the integer value from a VFP register. */
2718 return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw);
2719 }
2720
sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2721 static sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_ins ins,
2722 sljit_s32 dst, sljit_sw dstw,
2723 sljit_s32 src, sljit_sw srcw)
2724 {
2725 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2726
2727 if (FAST_IS_REG(src))
2728 FAIL_IF(push_inst(compiler, VMOV | RD(src) | VN(TMP_FREG1)));
2729 else if (src & SLJIT_MEM) {
2730 /* Load the integer value into a VFP register. */
2731 FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw));
2732 }
2733 else {
2734 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw));
2735 FAIL_IF(push_inst(compiler, VMOV | RD(TMP_REG1) | VN(TMP_FREG1)));
2736 }
2737
2738 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(ins, ins & SLJIT_32, dst_r, TMP_FREG1, 0)));
2739
2740 if (dst & SLJIT_MEM)
2741 return emit_fop_mem(compiler, (ins & SLJIT_32), TMP_FREG1, dst, dstw);
2742 return SLJIT_SUCCESS;
2743 }
2744
sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2745 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
2746 sljit_s32 dst, sljit_sw dstw,
2747 sljit_s32 src, sljit_sw srcw)
2748 {
2749 return sljit_emit_fop1_conv_f64_from_w(compiler, VCVT_F32_S32 | (~op & SLJIT_32), dst, dstw, src, srcw);
2750 }
2751
sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2752 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
2753 sljit_s32 dst, sljit_sw dstw,
2754 sljit_s32 src, sljit_sw srcw)
2755 {
2756 return sljit_emit_fop1_conv_f64_from_w(compiler, VCVT_F32_U32 | (~op & SLJIT_32), dst, dstw, src, srcw);
2757 }
2758
sljit_emit_fop1_cmp(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2759 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
2760 sljit_s32 src1, sljit_sw src1w,
2761 sljit_s32 src2, sljit_sw src2w)
2762 {
2763 op ^= SLJIT_32;
2764
2765 if (src1 & SLJIT_MEM) {
2766 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w));
2767 src1 = TMP_FREG1;
2768 }
2769
2770 if (src2 & SLJIT_MEM) {
2771 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w));
2772 src2 = TMP_FREG2;
2773 }
2774
2775 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCMP_F32, op & SLJIT_32, src1, src2, 0)));
2776 FAIL_IF(push_inst(compiler, VMRS));
2777
2778 if (GET_FLAG_TYPE(op) != SLJIT_UNORDERED_OR_EQUAL)
2779 return SLJIT_SUCCESS;
2780
2781 return push_inst(compiler, (CMP - CONDITIONAL) | (0x60000000 /* VS */) | SET_FLAGS | RN(TMP_REG1) | RM(TMP_REG1));
2782 }
2783
sljit_emit_fop1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2784 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
2785 sljit_s32 dst, sljit_sw dstw,
2786 sljit_s32 src, sljit_sw srcw)
2787 {
2788 sljit_s32 dst_r;
2789
2790 CHECK_ERROR();
2791
2792 SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100), float_transfer_bit_error);
2793 SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2794
2795 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2796
2797 if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32)
2798 op ^= SLJIT_32;
2799
2800 if (src & SLJIT_MEM) {
2801 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, dst_r, src, srcw));
2802 src = dst_r;
2803 }
2804
2805 switch (GET_OPCODE(op)) {
2806 case SLJIT_MOV_F64:
2807 if (src != dst_r) {
2808 if (!(dst & SLJIT_MEM))
2809 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, op & SLJIT_32, dst_r, src, 0)));
2810 else
2811 dst_r = src;
2812 }
2813 break;
2814 case SLJIT_NEG_F64:
2815 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VNEG_F32, op & SLJIT_32, dst_r, src, 0)));
2816 break;
2817 case SLJIT_ABS_F64:
2818 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_32, dst_r, src, 0)));
2819 break;
2820 case SLJIT_CONV_F64_FROM_F32:
2821 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F64_F32, op & SLJIT_32, dst_r, src, 0)));
2822 op ^= SLJIT_32;
2823 break;
2824 }
2825
2826 if (dst & SLJIT_MEM)
2827 return emit_fop_mem(compiler, (op & SLJIT_32), dst_r, dst, dstw);
2828 return SLJIT_SUCCESS;
2829 }
2830
sljit_emit_fop2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2831 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
2832 sljit_s32 dst, sljit_sw dstw,
2833 sljit_s32 src1, sljit_sw src1w,
2834 sljit_s32 src2, sljit_sw src2w)
2835 {
2836 sljit_s32 dst_r;
2837
2838 CHECK_ERROR();
2839 CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2840 ADJUST_LOCAL_OFFSET(dst, dstw);
2841 ADJUST_LOCAL_OFFSET(src1, src1w);
2842 ADJUST_LOCAL_OFFSET(src2, src2w);
2843
2844 op ^= SLJIT_32;
2845
2846 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2847
2848 if (src2 & SLJIT_MEM) {
2849 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w));
2850 src2 = TMP_FREG2;
2851 }
2852
2853 if (src1 & SLJIT_MEM) {
2854 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w));
2855 src1 = TMP_FREG1;
2856 }
2857
2858 switch (GET_OPCODE(op)) {
2859 case SLJIT_ADD_F64:
2860 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VADD_F32, op & SLJIT_32, dst_r, src2, src1)));
2861 break;
2862 case SLJIT_SUB_F64:
2863 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VSUB_F32, op & SLJIT_32, dst_r, src2, src1)));
2864 break;
2865 case SLJIT_MUL_F64:
2866 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMUL_F32, op & SLJIT_32, dst_r, src2, src1)));
2867 break;
2868 case SLJIT_DIV_F64:
2869 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VDIV_F32, op & SLJIT_32, dst_r, src2, src1)));
2870 break;
2871 case SLJIT_COPYSIGN_F64:
2872 FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(src2) | RD(TMP_REG1) | ((op & SLJIT_32) ? (1 << 7) : 0)));
2873 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_32, dst_r, src1, 0)));
2874 FAIL_IF(push_inst(compiler, CMP | SET_FLAGS | RN(TMP_REG1) | SRC2_IMM | 0));
2875 return push_inst(compiler, EMIT_FPU_OPERATION((VNEG_F32 & ~COND_MASK) | 0xb0000000, op & SLJIT_32, dst_r, dst_r, 0));
2876 }
2877
2878 if (dst_r != dst)
2879 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32), TMP_FREG1, dst, dstw));
2880
2881 return SLJIT_SUCCESS;
2882 }
2883
2884 #undef EMIT_FPU_DATA_TRANSFER
2885
sljit_emit_fset32(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f32 value)2886 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
2887 sljit_s32 freg, sljit_f32 value)
2888 {
2889 #if defined(__ARM_NEON) && __ARM_NEON
2890 sljit_u32 exp;
2891 sljit_ins ins;
2892 #endif /* NEON */
2893 union {
2894 sljit_u32 imm;
2895 sljit_f32 value;
2896 } u;
2897
2898 CHECK_ERROR();
2899 CHECK(check_sljit_emit_fset32(compiler, freg, value));
2900
2901 u.value = value;
2902
2903 #if defined(__ARM_NEON) && __ARM_NEON
2904 if ((u.imm << (32 - 19)) == 0) {
2905 exp = (u.imm >> (23 + 2)) & 0x3f;
2906
2907 if (exp == 0x20 || exp == 0x1f) {
2908 ins = ((u.imm >> 24) & 0x80) | ((u.imm >> 19) & 0x7f);
2909 return push_inst(compiler, (VMOV_F32 ^ (1 << 6)) | ((ins & 0xf0) << 12) | VD(freg) | (ins & 0xf));
2910 }
2911 }
2912 #endif /* NEON */
2913
2914 FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm));
2915 return push_inst(compiler, VMOV | VN(freg) | RD(TMP_REG1));
2916 }
2917
sljit_emit_fset64(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f64 value)2918 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
2919 sljit_s32 freg, sljit_f64 value)
2920 {
2921 #if defined(__ARM_NEON) && __ARM_NEON
2922 sljit_u32 exp;
2923 sljit_ins ins;
2924 #endif /* NEON */
2925 union {
2926 sljit_u32 imm[2];
2927 sljit_f64 value;
2928 } u;
2929
2930 CHECK_ERROR();
2931 CHECK(check_sljit_emit_fset64(compiler, freg, value));
2932
2933 u.value = value;
2934
2935 #if defined(__ARM_NEON) && __ARM_NEON
2936 if (u.imm[0] == 0 && (u.imm[1] << (64 - 48)) == 0) {
2937 exp = (u.imm[1] >> ((52 - 32) + 2)) & 0x1ff;
2938
2939 if (exp == 0x100 || exp == 0xff) {
2940 ins = ((u.imm[1] >> (56 - 32)) & 0x80) | ((u.imm[1] >> (48 - 32)) & 0x7f);
2941 return push_inst(compiler, (VMOV_F32 ^ (1 << 6)) | (1 << 8) | ((ins & 0xf0) << 12) | VD(freg) | (ins & 0xf));
2942 }
2943 }
2944 #endif /* NEON */
2945
2946 FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm[0]));
2947 if (u.imm[0] == u.imm[1])
2948 return push_inst(compiler, VMOV2 | RN(TMP_REG1) | RD(TMP_REG1) | VM(freg));
2949
2950 FAIL_IF(load_immediate(compiler, TMP_REG2, u.imm[1]));
2951 return push_inst(compiler, VMOV2 | RN(TMP_REG2) | RD(TMP_REG1) | VM(freg));
2952 }
2953
sljit_emit_fcopy(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 freg,sljit_s32 reg)2954 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
2955 sljit_s32 freg, sljit_s32 reg)
2956 {
2957 sljit_s32 reg2;
2958 sljit_ins inst;
2959
2960 CHECK_ERROR();
2961 CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
2962
2963 if (reg & REG_PAIR_MASK) {
2964 reg2 = REG_PAIR_SECOND(reg);
2965 reg = REG_PAIR_FIRST(reg);
2966
2967 inst = VMOV2 | RN(reg) | RD(reg2) | VM(freg);
2968 } else {
2969 inst = VMOV | VN(freg) | RD(reg);
2970
2971 if (!(op & SLJIT_32))
2972 inst |= 1 << 7;
2973 }
2974
2975 if (GET_OPCODE(op) == SLJIT_COPY_FROM_F64)
2976 inst |= 1 << 20;
2977
2978 return push_inst(compiler, inst);
2979 }
2980
2981 /* --------------------------------------------------------------------- */
2982 /* Conditional instructions */
2983 /* --------------------------------------------------------------------- */
2984
get_cc(struct sljit_compiler * compiler,sljit_s32 type)2985 static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type)
2986 {
2987 switch (type) {
2988 case SLJIT_EQUAL:
2989 case SLJIT_ATOMIC_STORED:
2990 case SLJIT_F_EQUAL:
2991 case SLJIT_ORDERED_EQUAL:
2992 case SLJIT_UNORDERED_OR_EQUAL:
2993 return 0x00000000;
2994
2995 case SLJIT_NOT_EQUAL:
2996 case SLJIT_ATOMIC_NOT_STORED:
2997 case SLJIT_F_NOT_EQUAL:
2998 case SLJIT_UNORDERED_OR_NOT_EQUAL:
2999 case SLJIT_ORDERED_NOT_EQUAL:
3000 return 0x10000000;
3001
3002 case SLJIT_CARRY:
3003 if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD)
3004 return 0x20000000;
3005 /* fallthrough */
3006
3007 case SLJIT_LESS:
3008 return 0x30000000;
3009
3010 case SLJIT_NOT_CARRY:
3011 if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD)
3012 return 0x30000000;
3013 /* fallthrough */
3014
3015 case SLJIT_GREATER_EQUAL:
3016 return 0x20000000;
3017
3018 case SLJIT_GREATER:
3019 case SLJIT_UNORDERED_OR_GREATER:
3020 return 0x80000000;
3021
3022 case SLJIT_LESS_EQUAL:
3023 case SLJIT_F_LESS_EQUAL:
3024 case SLJIT_ORDERED_LESS_EQUAL:
3025 return 0x90000000;
3026
3027 case SLJIT_SIG_LESS:
3028 case SLJIT_UNORDERED_OR_LESS:
3029 return 0xb0000000;
3030
3031 case SLJIT_SIG_GREATER_EQUAL:
3032 case SLJIT_F_GREATER_EQUAL:
3033 case SLJIT_ORDERED_GREATER_EQUAL:
3034 return 0xa0000000;
3035
3036 case SLJIT_SIG_GREATER:
3037 case SLJIT_F_GREATER:
3038 case SLJIT_ORDERED_GREATER:
3039 return 0xc0000000;
3040
3041 case SLJIT_SIG_LESS_EQUAL:
3042 case SLJIT_UNORDERED_OR_LESS_EQUAL:
3043 return 0xd0000000;
3044
3045 case SLJIT_OVERFLOW:
3046 if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)))
3047 return 0x10000000;
3048 /* fallthrough */
3049
3050 case SLJIT_UNORDERED:
3051 return 0x60000000;
3052
3053 case SLJIT_NOT_OVERFLOW:
3054 if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)))
3055 return 0x00000000;
3056 /* fallthrough */
3057
3058 case SLJIT_ORDERED:
3059 return 0x70000000;
3060
3061 case SLJIT_F_LESS:
3062 case SLJIT_ORDERED_LESS:
3063 return 0x40000000;
3064
3065 case SLJIT_UNORDERED_OR_GREATER_EQUAL:
3066 return 0x50000000;
3067
3068 default:
3069 SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL_REG_ARG);
3070 return 0xe0000000;
3071 }
3072 }
3073
sljit_emit_label(struct sljit_compiler * compiler)3074 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
3075 {
3076 struct sljit_label *label;
3077
3078 CHECK_ERROR_PTR();
3079 CHECK_PTR(check_sljit_emit_label(compiler));
3080
3081 if (compiler->last_label && compiler->last_label->size == compiler->size)
3082 return compiler->last_label;
3083
3084 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
3085 PTR_FAIL_IF(!label);
3086 set_label(label, compiler);
3087 return label;
3088 }
3089
sljit_emit_jump(struct sljit_compiler * compiler,sljit_s32 type)3090 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
3091 {
3092 struct sljit_jump *jump;
3093
3094 CHECK_ERROR_PTR();
3095 CHECK_PTR(check_sljit_emit_jump(compiler, type));
3096
3097 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
3098 PTR_FAIL_IF(!jump);
3099 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
3100 type &= 0xff;
3101
3102 SLJIT_ASSERT(reg_map[TMP_REG1] != 14);
3103
3104 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
3105 if (type >= SLJIT_FAST_CALL)
3106 PTR_FAIL_IF(prepare_blx(compiler));
3107
3108 jump->addr = compiler->size;
3109 PTR_FAIL_IF(push_inst_with_unique_literal(compiler, ((EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1,
3110 type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0)) & ~COND_MASK) | get_cc(compiler, type), 0));
3111
3112 if (jump->flags & SLJIT_REWRITABLE_JUMP)
3113 compiler->patches++;
3114
3115 if (type >= SLJIT_FAST_CALL) {
3116 jump->flags |= IS_BL;
3117 jump->addr = compiler->size;
3118 PTR_FAIL_IF(emit_blx(compiler));
3119 }
3120 #else /* !SLJIT_CONFIG_ARM_V6 */
3121 jump->addr = compiler->size;
3122 if (type >= SLJIT_FAST_CALL)
3123 jump->flags |= IS_BL;
3124 PTR_FAIL_IF(push_inst(compiler, (((type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)) & ~COND_MASK) | get_cc(compiler, type)));
3125 compiler->size += JUMP_MAX_SIZE - 1;
3126 #endif /* SLJIT_CONFIG_ARM_V6 */
3127 return jump;
3128 }
3129
3130 #ifdef __SOFTFP__
3131
softfloat_call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types,sljit_s32 * src,sljit_u32 * extra_space)3132 static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src, sljit_u32 *extra_space)
3133 {
3134 sljit_u32 is_tail_call = *extra_space & SLJIT_CALL_RETURN;
3135 sljit_u32 offset = 0;
3136 sljit_u32 word_arg_offset = 0;
3137 sljit_u32 src_offset = 4 * sizeof(sljit_sw);
3138 sljit_u32 float_arg_count = 0;
3139 sljit_s32 types = 0;
3140 sljit_u8 offsets[4];
3141 sljit_u8 *offset_ptr = offsets;
3142
3143 if (src && FAST_IS_REG(*src))
3144 src_offset = (sljit_u32)reg_map[*src] * sizeof(sljit_sw);
3145
3146 arg_types >>= SLJIT_ARG_SHIFT;
3147
3148 while (arg_types) {
3149 types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK);
3150
3151 switch (arg_types & SLJIT_ARG_MASK) {
3152 case SLJIT_ARG_TYPE_F64:
3153 if (offset & 0x7)
3154 offset += sizeof(sljit_sw);
3155 *offset_ptr++ = (sljit_u8)offset;
3156 offset += sizeof(sljit_f64);
3157 float_arg_count++;
3158 break;
3159 case SLJIT_ARG_TYPE_F32:
3160 *offset_ptr++ = (sljit_u8)offset;
3161 offset += sizeof(sljit_f32);
3162 float_arg_count++;
3163 break;
3164 default:
3165 *offset_ptr++ = (sljit_u8)offset;
3166 offset += sizeof(sljit_sw);
3167 word_arg_offset += sizeof(sljit_sw);
3168 break;
3169 }
3170
3171 arg_types >>= SLJIT_ARG_SHIFT;
3172 }
3173
3174 if (offset > 4 * sizeof(sljit_sw) && (!is_tail_call || offset > compiler->args_size)) {
3175 /* Keep lr register on the stack. */
3176 if (is_tail_call)
3177 offset += sizeof(sljit_sw);
3178
3179 offset = ((offset - 4 * sizeof(sljit_sw)) + 0x7) & ~(sljit_u32)0x7;
3180
3181 *extra_space = offset;
3182
3183 if (is_tail_call)
3184 FAIL_IF(emit_stack_frame_release(compiler, (sljit_s32)offset));
3185 else
3186 FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | offset));
3187 } else {
3188 if (is_tail_call)
3189 FAIL_IF(emit_stack_frame_release(compiler, -1));
3190 *extra_space = 0;
3191 }
3192
3193 /* Process arguments in reversed direction. */
3194 while (types) {
3195 switch (types & SLJIT_ARG_MASK) {
3196 case SLJIT_ARG_TYPE_F64:
3197 float_arg_count--;
3198 offset = *(--offset_ptr);
3199
3200 SLJIT_ASSERT((offset & 0x7) == 0);
3201
3202 if (offset < 4 * sizeof(sljit_sw)) {
3203 if (src_offset == offset || src_offset == offset + sizeof(sljit_sw)) {
3204 FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2)));
3205 *src = TMP_REG1;
3206 }
3207 FAIL_IF(push_inst(compiler, VMOV2 | 0x100000 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count));
3208 } else
3209 FAIL_IF(push_inst(compiler, VSTR_F32 | 0x800100 | RN(SLJIT_SP)
3210 | (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2)));
3211 break;
3212 case SLJIT_ARG_TYPE_F32:
3213 float_arg_count--;
3214 offset = *(--offset_ptr);
3215
3216 if (offset < 4 * sizeof(sljit_sw)) {
3217 if (src_offset == offset) {
3218 FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2)));
3219 *src = TMP_REG1;
3220 }
3221 FAIL_IF(push_inst(compiler, VMOV | 0x100000 | (float_arg_count << 16) | (offset << 10)));
3222 } else
3223 FAIL_IF(push_inst(compiler, VSTR_F32 | 0x800000 | RN(SLJIT_SP)
3224 | (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2)));
3225 break;
3226 default:
3227 word_arg_offset -= sizeof(sljit_sw);
3228 offset = *(--offset_ptr);
3229
3230 SLJIT_ASSERT(offset >= word_arg_offset);
3231
3232 if (offset != word_arg_offset) {
3233 if (offset < 4 * sizeof(sljit_sw)) {
3234 if (src_offset == offset) {
3235 FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2)));
3236 *src = TMP_REG1;
3237 }
3238 else if (src_offset == word_arg_offset) {
3239 *src = (sljit_s32)(SLJIT_R0 + (offset >> 2));
3240 src_offset = offset;
3241 }
3242 FAIL_IF(push_inst(compiler, MOV | (offset << 10) | (word_arg_offset >> 2)));
3243 } else
3244 FAIL_IF(push_inst(compiler, STR | 0x800000 | RN(SLJIT_SP) | (word_arg_offset << 10) | (offset - 4 * sizeof(sljit_sw))));
3245 }
3246 break;
3247 }
3248
3249 types >>= SLJIT_ARG_SHIFT;
3250 }
3251
3252 return SLJIT_SUCCESS;
3253 }
3254
softfloat_post_call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types)3255 static sljit_s32 softfloat_post_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)
3256 {
3257 if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F64)
3258 FAIL_IF(push_inst(compiler, VMOV2 | (1 << 16) | (0 << 12) | 0));
3259 if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32)
3260 FAIL_IF(push_inst(compiler, VMOV | (0 << 16) | (0 << 12)));
3261
3262 return SLJIT_SUCCESS;
3263 }
3264
3265 #else /* !__SOFTFP__ */
3266
hardfloat_call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types)3267 static sljit_s32 hardfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)
3268 {
3269 sljit_u32 offset = SLJIT_FR0;
3270 sljit_u32 new_offset = SLJIT_FR0;
3271 sljit_u32 f32_offset = 0;
3272
3273 /* Remove return value. */
3274 arg_types >>= SLJIT_ARG_SHIFT;
3275
3276 while (arg_types) {
3277 switch (arg_types & SLJIT_ARG_MASK) {
3278 case SLJIT_ARG_TYPE_F64:
3279 if (offset != new_offset)
3280 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32,
3281 SLJIT_32, new_offset, offset, 0)));
3282
3283 new_offset++;
3284 offset++;
3285 break;
3286 case SLJIT_ARG_TYPE_F32:
3287 if (f32_offset != 0) {
3288 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32,
3289 0x400000, f32_offset, offset, 0)));
3290 f32_offset = 0;
3291 } else {
3292 if (offset != new_offset)
3293 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32,
3294 0, new_offset, offset, 0)));
3295 f32_offset = new_offset;
3296 new_offset++;
3297 }
3298 offset++;
3299 break;
3300 }
3301 arg_types >>= SLJIT_ARG_SHIFT;
3302 }
3303
3304 return SLJIT_SUCCESS;
3305 }
3306
3307 #endif /* __SOFTFP__ */
3308
sljit_emit_call(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types)3309 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
3310 sljit_s32 arg_types)
3311 {
3312 #ifdef __SOFTFP__
3313 struct sljit_jump *jump;
3314 sljit_u32 extra_space = (sljit_u32)type;
3315 #endif
3316
3317 CHECK_ERROR_PTR();
3318 CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
3319
3320 #ifdef __SOFTFP__
3321 if ((type & 0xff) != SLJIT_CALL_REG_ARG) {
3322 PTR_FAIL_IF(softfloat_call_with_args(compiler, arg_types, NULL, &extra_space));
3323 SLJIT_ASSERT((extra_space & 0x7) == 0);
3324
3325 if ((type & SLJIT_CALL_RETURN) && extra_space == 0)
3326 type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
3327
3328 SLJIT_SKIP_CHECKS(compiler);
3329 jump = sljit_emit_jump(compiler, type);
3330 PTR_FAIL_IF(jump == NULL);
3331
3332 if (extra_space > 0) {
3333 if (type & SLJIT_CALL_RETURN)
3334 PTR_FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1,
3335 TMP_REG2, SLJIT_SP, extra_space - sizeof(sljit_sw))));
3336
3337 PTR_FAIL_IF(push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | extra_space));
3338
3339 if (type & SLJIT_CALL_RETURN) {
3340 PTR_FAIL_IF(push_inst(compiler, BX | RM(TMP_REG2)));
3341 return jump;
3342 }
3343 }
3344
3345 SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN));
3346 PTR_FAIL_IF(softfloat_post_call_with_args(compiler, arg_types));
3347 return jump;
3348 }
3349 #endif /* __SOFTFP__ */
3350
3351 if (type & SLJIT_CALL_RETURN) {
3352 PTR_FAIL_IF(emit_stack_frame_release(compiler, -1));
3353 type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
3354 }
3355
3356 #ifndef __SOFTFP__
3357 if ((type & 0xff) != SLJIT_CALL_REG_ARG)
3358 PTR_FAIL_IF(hardfloat_call_with_args(compiler, arg_types));
3359 #endif /* !__SOFTFP__ */
3360
3361 SLJIT_SKIP_CHECKS(compiler);
3362 return sljit_emit_jump(compiler, type);
3363 }
3364
sljit_emit_ijump(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 src,sljit_sw srcw)3365 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
3366 {
3367 struct sljit_jump *jump;
3368
3369 CHECK_ERROR();
3370 CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
3371 ADJUST_LOCAL_OFFSET(src, srcw);
3372
3373 SLJIT_ASSERT(reg_map[TMP_REG1] != 14);
3374
3375 if (src != SLJIT_IMM) {
3376 if (FAST_IS_REG(src)) {
3377 SLJIT_ASSERT(reg_map[src] != 14);
3378 return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(src));
3379 }
3380
3381 SLJIT_ASSERT(src & SLJIT_MEM);
3382 FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
3383 return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1));
3384 }
3385
3386 /* These jumps are converted to jump/call instructions when possible. */
3387 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
3388 FAIL_IF(!jump);
3389 set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0));
3390 jump->u.target = (sljit_uw)srcw;
3391
3392 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
3393 if (type >= SLJIT_FAST_CALL)
3394 FAIL_IF(prepare_blx(compiler));
3395 jump->addr = compiler->size;
3396 FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0), 0));
3397 if (type >= SLJIT_FAST_CALL) {
3398 jump->addr = compiler->size;
3399 FAIL_IF(emit_blx(compiler));
3400 }
3401 #else /* !SLJIT_CONFIG_ARM_V6 */
3402 jump->addr = compiler->size;
3403 FAIL_IF(push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)));
3404 compiler->size += JUMP_MAX_SIZE - 1;
3405 #endif /* SLJIT_CONFIG_ARM_V6 */
3406 return SLJIT_SUCCESS;
3407 }
3408
sljit_emit_icall(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)3409 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
3410 sljit_s32 arg_types,
3411 sljit_s32 src, sljit_sw srcw)
3412 {
3413 #ifdef __SOFTFP__
3414 sljit_u32 extra_space = (sljit_u32)type;
3415 #endif
3416
3417 CHECK_ERROR();
3418 CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
3419
3420 if (src & SLJIT_MEM) {
3421 FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
3422 src = TMP_REG1;
3423 }
3424
3425 if ((type & SLJIT_CALL_RETURN) && (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options)))) {
3426 FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src)));
3427 src = TMP_REG1;
3428 }
3429
3430 #ifdef __SOFTFP__
3431 if ((type & 0xff) != SLJIT_CALL_REG_ARG) {
3432 FAIL_IF(softfloat_call_with_args(compiler, arg_types, &src, &extra_space));
3433 SLJIT_ASSERT((extra_space & 0x7) == 0);
3434
3435 if ((type & SLJIT_CALL_RETURN) && extra_space == 0)
3436 type = SLJIT_JUMP;
3437
3438 SLJIT_SKIP_CHECKS(compiler);
3439 FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
3440
3441 if (extra_space > 0) {
3442 if (type & SLJIT_CALL_RETURN)
3443 FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1,
3444 TMP_REG2, SLJIT_SP, extra_space - sizeof(sljit_sw))));
3445
3446 FAIL_IF(push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | extra_space));
3447
3448 if (type & SLJIT_CALL_RETURN)
3449 return push_inst(compiler, BX | RM(TMP_REG2));
3450 }
3451
3452 SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN));
3453 return softfloat_post_call_with_args(compiler, arg_types);
3454 }
3455 #endif /* __SOFTFP__ */
3456
3457 if (type & SLJIT_CALL_RETURN) {
3458 FAIL_IF(emit_stack_frame_release(compiler, -1));
3459 type = SLJIT_JUMP;
3460 }
3461
3462 #ifndef __SOFTFP__
3463 if ((type & 0xff) != SLJIT_CALL_REG_ARG)
3464 FAIL_IF(hardfloat_call_with_args(compiler, arg_types));
3465 #endif /* !__SOFTFP__ */
3466
3467 SLJIT_SKIP_CHECKS(compiler);
3468 return sljit_emit_ijump(compiler, type, src, srcw);
3469 }
3470
3471 #ifdef __SOFTFP__
3472
emit_fmov_before_return(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)3473 static SLJIT_INLINE sljit_s32 emit_fmov_before_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
3474 {
3475 if (compiler->options & SLJIT_ENTER_REG_ARG) {
3476 if (src == SLJIT_FR0)
3477 return SLJIT_SUCCESS;
3478
3479 SLJIT_SKIP_CHECKS(compiler);
3480 return sljit_emit_fop1(compiler, op, SLJIT_RETURN_FREG, 0, src, srcw);
3481 }
3482
3483 if (FAST_IS_REG(src)) {
3484 if (op & SLJIT_32)
3485 return push_inst(compiler, VMOV | (1 << 20) | RD(SLJIT_R0) | VN(src));
3486 return push_inst(compiler, VMOV2 | (1 << 20) | RD(SLJIT_R0) | RN(SLJIT_R1) | VM(src));
3487 }
3488
3489 SLJIT_SKIP_CHECKS(compiler);
3490
3491 if (op & SLJIT_32)
3492 return sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, src, srcw);
3493 return sljit_emit_mem(compiler, SLJIT_MOV, SLJIT_REG_PAIR(SLJIT_R0, SLJIT_R1), src, srcw);
3494 }
3495
3496 #endif /* __SOFTFP__ */
3497
sljit_emit_op_flags(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 type)3498 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
3499 sljit_s32 dst, sljit_sw dstw,
3500 sljit_s32 type)
3501 {
3502 sljit_s32 dst_reg, flags = GET_ALL_FLAGS(op);
3503 sljit_ins cc, ins;
3504
3505 CHECK_ERROR();
3506 CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
3507 ADJUST_LOCAL_OFFSET(dst, dstw);
3508
3509 op = GET_OPCODE(op);
3510 cc = get_cc(compiler, type);
3511 dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
3512
3513 if (op < SLJIT_ADD) {
3514 FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | SRC2_IMM | 0));
3515 FAIL_IF(push_inst(compiler, ((MOV | RD(dst_reg) | SRC2_IMM | 1) & ~COND_MASK) | cc));
3516 if (dst & SLJIT_MEM)
3517 return emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2);
3518 return SLJIT_SUCCESS;
3519 }
3520
3521 ins = (op == SLJIT_AND ? AND : (op == SLJIT_OR ? ORR : EOR));
3522
3523 if (dst & SLJIT_MEM)
3524 FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, dst, dstw, TMP_REG2));
3525
3526 FAIL_IF(push_inst(compiler, ((ins | RD(dst_reg) | RN(dst_reg) | SRC2_IMM | 1) & ~COND_MASK) | cc));
3527
3528 if (op == SLJIT_AND)
3529 FAIL_IF(push_inst(compiler, ((ins | RD(dst_reg) | RN(dst_reg) | SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000)));
3530
3531 if (dst & SLJIT_MEM)
3532 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2));
3533
3534 if (flags & SLJIT_SET_Z)
3535 return push_inst(compiler, MOV | SET_FLAGS | RD(TMP_REG2) | RM(dst_reg));
3536 return SLJIT_SUCCESS;
3537 }
3538
sljit_emit_select(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_reg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2_reg)3539 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
3540 sljit_s32 dst_reg,
3541 sljit_s32 src1, sljit_sw src1w,
3542 sljit_s32 src2_reg)
3543 {
3544 sljit_ins cc, tmp;
3545
3546 CHECK_ERROR();
3547 CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
3548
3549 ADJUST_LOCAL_OFFSET(src1, src1w);
3550
3551 if (src2_reg != dst_reg && src1 == dst_reg) {
3552 src1 = src2_reg;
3553 src1w = 0;
3554 src2_reg = dst_reg;
3555 type ^= 0x1;
3556 }
3557
3558 if (src1 & SLJIT_MEM) {
3559 FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, (src2_reg != dst_reg) ? dst_reg : TMP_REG1, src1, src1w, TMP_REG1));
3560
3561 if (src2_reg != dst_reg) {
3562 src1 = src2_reg;
3563 src1w = 0;
3564 type ^= 0x1;
3565 } else {
3566 src1 = TMP_REG1;
3567 src1w = 0;
3568 }
3569 } else if (dst_reg != src2_reg)
3570 FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | RM(src2_reg)));
3571
3572 cc = get_cc(compiler, type & ~SLJIT_32);
3573
3574 if (SLJIT_UNLIKELY(src1 == SLJIT_IMM)) {
3575 tmp = get_imm((sljit_uw)src1w);
3576 if (tmp)
3577 return push_inst(compiler, ((MOV | RD(dst_reg) | tmp) & ~COND_MASK) | cc);
3578
3579 tmp = get_imm(~(sljit_uw)src1w);
3580 if (tmp)
3581 return push_inst(compiler, ((MVN | RD(dst_reg) | tmp) & ~COND_MASK) | cc);
3582
3583 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
3584 tmp = (sljit_ins)src1w;
3585 FAIL_IF(push_inst(compiler, (MOVW & ~COND_MASK) | cc | RD(dst_reg) | ((tmp << 4) & 0xf0000) | (tmp & 0xfff)));
3586 if (tmp <= 0xffff)
3587 return SLJIT_SUCCESS;
3588 return push_inst(compiler, (MOVT & ~COND_MASK) | cc | RD(dst_reg) | ((tmp >> 12) & 0xf0000) | ((tmp >> 16) & 0xfff));
3589 #else /* !SLJIT_CONFIG_ARM_V7 */
3590 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w));
3591 src1 = TMP_REG1;
3592 #endif /* SLJIT_CONFIG_ARM_V7 */
3593 }
3594
3595 return push_inst(compiler, ((MOV | RD(dst_reg) | RM(src1)) & ~COND_MASK) | cc);
3596 }
3597
sljit_emit_fselect(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_freg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2_freg)3598 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
3599 sljit_s32 dst_freg,
3600 sljit_s32 src1, sljit_sw src1w,
3601 sljit_s32 src2_freg)
3602 {
3603 sljit_ins cc;
3604
3605 CHECK_ERROR();
3606 CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
3607
3608 ADJUST_LOCAL_OFFSET(src1, src1w);
3609
3610 type ^= SLJIT_32;
3611
3612 if (dst_freg != src2_freg) {
3613 if (dst_freg == src1) {
3614 src1 = src2_freg;
3615 src1w = 0;
3616 type ^= 0x1;
3617 } else
3618 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, (type & SLJIT_32), dst_freg, src2_freg, 0)));
3619 }
3620
3621 if (src1 & SLJIT_MEM) {
3622 FAIL_IF(emit_fop_mem(compiler, (type & SLJIT_32) | FPU_LOAD, TMP_FREG2, src1, src1w));
3623 src1 = TMP_FREG2;
3624 }
3625
3626 cc = get_cc(compiler, type & ~SLJIT_32);
3627 return push_inst(compiler, EMIT_FPU_OPERATION((VMOV_F32 & ~COND_MASK) | cc, (type & SLJIT_32), dst_freg, src1, 0));
3628 }
3629
3630 #undef EMIT_FPU_OPERATION
3631
update_mem_addr(struct sljit_compiler * compiler,sljit_s32 * mem,sljit_sw * memw,sljit_s32 max_offset)3632 static sljit_s32 update_mem_addr(struct sljit_compiler *compiler, sljit_s32 *mem, sljit_sw *memw, sljit_s32 max_offset)
3633 {
3634 sljit_s32 arg = *mem;
3635 sljit_sw argw = *memw;
3636 sljit_uw imm, tmp;
3637 sljit_sw mask = 0xfff;
3638 sljit_sw sign = 0x1000;
3639
3640 SLJIT_ASSERT(max_offset >= 0xf00);
3641
3642 *mem = TMP_REG1;
3643
3644 if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
3645 *memw = 0;
3646 return push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | ((sljit_ins)(argw & 0x3) << 7));
3647 }
3648
3649 arg &= REG_MASK;
3650
3651 if (arg) {
3652 if (argw <= max_offset && argw >= -mask) {
3653 *mem = arg;
3654 return SLJIT_SUCCESS;
3655 }
3656
3657 if (argw >= 0) {
3658 tmp = (sljit_uw)(argw & (sign | mask));
3659 tmp = (sljit_uw)((argw + ((tmp <= (sljit_uw)max_offset || tmp == (sljit_uw)sign) ? 0 : sign)) & ~mask);
3660 imm = get_imm(tmp);
3661
3662 if (imm) {
3663 *memw = argw - (sljit_sw)tmp;
3664 SLJIT_ASSERT(*memw >= -mask && *memw <= max_offset);
3665
3666 return push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg) | imm);
3667 }
3668 } else {
3669 tmp = (sljit_uw)(-argw & (sign | mask));
3670 tmp = (sljit_uw)((-argw + ((tmp <= (sljit_uw)((sign << 1) - max_offset - 1)) ? 0 : sign)) & ~mask);
3671 imm = get_imm(tmp);
3672
3673 if (imm) {
3674 *memw = argw + (sljit_sw)tmp;
3675 SLJIT_ASSERT(*memw >= -mask && *memw <= max_offset);
3676
3677 return push_inst(compiler, SUB | RD(TMP_REG1) | RN(arg) | imm);
3678 }
3679 }
3680 }
3681
3682 tmp = (sljit_uw)(argw & (sign | mask));
3683 tmp = (sljit_uw)((argw + ((tmp <= (sljit_uw)max_offset || tmp == (sljit_uw)sign) ? 0 : sign)) & ~mask);
3684 *memw = argw - (sljit_sw)tmp;
3685
3686 FAIL_IF(load_immediate(compiler, TMP_REG1, tmp));
3687
3688 if (arg == 0)
3689 return SLJIT_SUCCESS;
3690
3691 return push_inst(compiler, ADD | RD(TMP_REG1) | RN(TMP_REG1) | RM(arg));
3692 }
3693
sljit_emit_mem(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)3694 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
3695 sljit_s32 reg,
3696 sljit_s32 mem, sljit_sw memw)
3697 {
3698 sljit_s32 flags;
3699
3700 CHECK_ERROR();
3701 CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
3702
3703 if (!(reg & REG_PAIR_MASK))
3704 return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
3705
3706 ADJUST_LOCAL_OFFSET(mem, memw);
3707
3708 FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4));
3709
3710 flags = WORD_SIZE;
3711
3712 if (!(type & SLJIT_MEM_STORE)) {
3713 if (REG_PAIR_FIRST(reg) == (mem & REG_MASK)) {
3714 FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), memw + SSIZE_OF(sw), TMP_REG1));
3715 return emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw, TMP_REG1);
3716 }
3717
3718 flags = WORD_SIZE | LOAD_DATA;
3719 }
3720
3721 FAIL_IF(emit_op_mem(compiler, flags, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw, TMP_REG1));
3722 return emit_op_mem(compiler, flags, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), memw + SSIZE_OF(sw), TMP_REG1);
3723 }
3724
sljit_emit_mem_update(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)3725 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *compiler, sljit_s32 type,
3726 sljit_s32 reg,
3727 sljit_s32 mem, sljit_sw memw)
3728 {
3729 sljit_s32 flags;
3730 sljit_ins is_type1_transfer, inst;
3731
3732 CHECK_ERROR();
3733 CHECK(check_sljit_emit_mem_update(compiler, type, reg, mem, memw));
3734
3735 is_type1_transfer = 1;
3736
3737 switch (type & 0xff) {
3738 case SLJIT_MOV:
3739 case SLJIT_MOV_U32:
3740 case SLJIT_MOV_S32:
3741 case SLJIT_MOV32:
3742 case SLJIT_MOV_P:
3743 flags = WORD_SIZE;
3744 break;
3745 case SLJIT_MOV_U8:
3746 flags = BYTE_SIZE;
3747 break;
3748 case SLJIT_MOV_S8:
3749 if (!(type & SLJIT_MEM_STORE))
3750 is_type1_transfer = 0;
3751 flags = BYTE_SIZE | SIGNED;
3752 break;
3753 case SLJIT_MOV_U16:
3754 is_type1_transfer = 0;
3755 flags = HALF_SIZE;
3756 break;
3757 case SLJIT_MOV_S16:
3758 is_type1_transfer = 0;
3759 flags = HALF_SIZE | SIGNED;
3760 break;
3761 default:
3762 SLJIT_UNREACHABLE();
3763 flags = WORD_SIZE;
3764 break;
3765 }
3766
3767 if (!(type & SLJIT_MEM_STORE))
3768 flags |= LOAD_DATA;
3769
3770 SLJIT_ASSERT(is_type1_transfer == !!IS_TYPE1_TRANSFER(flags));
3771
3772 if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
3773 if (!is_type1_transfer && memw != 0)
3774 return SLJIT_ERR_UNSUPPORTED;
3775 } else {
3776 if (is_type1_transfer) {
3777 if (memw > 4095 || memw < -4095)
3778 return SLJIT_ERR_UNSUPPORTED;
3779 } else if (memw > 255 || memw < -255)
3780 return SLJIT_ERR_UNSUPPORTED;
3781 }
3782
3783 if (type & SLJIT_MEM_SUPP)
3784 return SLJIT_SUCCESS;
3785
3786 if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
3787 memw &= 0x3;
3788
3789 inst = EMIT_DATA_TRANSFER(flags, 1, reg, mem & REG_MASK, RM(OFFS_REG(mem)) | ((sljit_ins)memw << 7));
3790
3791 if (is_type1_transfer)
3792 inst |= (1 << 25);
3793
3794 if (type & SLJIT_MEM_POST)
3795 inst ^= (1 << 24);
3796 else
3797 inst |= (1 << 21);
3798
3799 return push_inst(compiler, inst);
3800 }
3801
3802 inst = EMIT_DATA_TRANSFER(flags, 0, reg, mem & REG_MASK, 0);
3803
3804 if (type & SLJIT_MEM_POST)
3805 inst ^= (1 << 24);
3806 else
3807 inst |= (1 << 21);
3808
3809 if (is_type1_transfer) {
3810 if (memw >= 0)
3811 inst |= (1 << 23);
3812 else
3813 memw = -memw;
3814
3815 return push_inst(compiler, inst | (sljit_ins)memw);
3816 }
3817
3818 if (memw >= 0)
3819 inst |= (1 << 23);
3820 else
3821 memw = -memw;
3822
3823 return push_inst(compiler, inst | TYPE2_TRANSFER_IMM((sljit_ins)memw));
3824 }
3825
sljit_emit_fmem(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 mem,sljit_sw memw)3826 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type,
3827 sljit_s32 freg,
3828 sljit_s32 mem, sljit_sw memw)
3829 {
3830 CHECK_ERROR();
3831 CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw));
3832
3833 if (type & SLJIT_MEM_ALIGNED_32)
3834 return emit_fop_mem(compiler, ((type ^ SLJIT_32) & SLJIT_32) | ((type & SLJIT_MEM_STORE) ? 0 : FPU_LOAD), freg, mem, memw);
3835
3836 if (type & SLJIT_MEM_STORE) {
3837 FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(freg) | RD(TMP_REG2)));
3838
3839 if (type & SLJIT_32)
3840 return emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1);
3841
3842 FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4));
3843 mem |= SLJIT_MEM;
3844
3845 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1));
3846 FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(freg) | 0x80 | RD(TMP_REG2)));
3847 return emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw + 4, TMP_REG1);
3848 }
3849
3850 if (type & SLJIT_32) {
3851 FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, mem, memw, TMP_REG1));
3852 return push_inst(compiler, VMOV | VN(freg) | RD(TMP_REG2));
3853 }
3854
3855 FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4));
3856 mem |= SLJIT_MEM;
3857
3858 FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, mem, memw, TMP_REG1));
3859 FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, mem, memw + 4, TMP_REG1));
3860 return push_inst(compiler, VMOV2 | VM(freg) | RD(TMP_REG2) | RN(TMP_REG1));
3861 }
3862
sljit_emit_simd_mem_offset(struct sljit_compiler * compiler,sljit_s32 * mem_ptr,sljit_sw memw)3863 static sljit_s32 sljit_emit_simd_mem_offset(struct sljit_compiler *compiler, sljit_s32 *mem_ptr, sljit_sw memw)
3864 {
3865 sljit_s32 mem = *mem_ptr;
3866 sljit_uw imm;
3867
3868 if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
3869 *mem_ptr = TMP_REG1;
3870 return push_inst(compiler, ADD | RD(TMP_REG1) | RN(mem & REG_MASK) | RM(OFFS_REG(mem)) | ((sljit_ins)(memw & 0x3) << 7));
3871 }
3872
3873 if (SLJIT_UNLIKELY(!(mem & REG_MASK))) {
3874 *mem_ptr = TMP_REG1;
3875 return load_immediate(compiler, TMP_REG1, (sljit_uw)memw);
3876 }
3877
3878 mem &= REG_MASK;
3879
3880 if (memw == 0) {
3881 *mem_ptr = mem;
3882 return SLJIT_SUCCESS;
3883 }
3884
3885 *mem_ptr = TMP_REG1;
3886 imm = get_imm((sljit_uw)(memw < 0 ? -memw : memw));
3887
3888 if (imm != 0)
3889 return push_inst(compiler, ((memw < 0) ? SUB : ADD) | RD(TMP_REG1) | RN(mem) | imm);
3890
3891 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3892 return push_inst(compiler, ADD | RD(TMP_REG1) | RN(TMP_REG1) | RM(mem));
3893 }
3894
simd_get_quad_reg_index(sljit_s32 freg)3895 static SLJIT_INLINE sljit_s32 simd_get_quad_reg_index(sljit_s32 freg)
3896 {
3897 freg += freg & 0x1;
3898
3899 SLJIT_ASSERT((freg_map[freg] & 0x1) == (freg <= SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS));
3900
3901 if (freg <= SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS)
3902 freg--;
3903
3904 return freg;
3905 }
3906
3907 #define SLJIT_QUAD_OTHER_HALF(freg) ((((freg) & 0x1) << 1) - 1)
3908
sljit_emit_simd_mov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 srcdst,sljit_sw srcdstw)3909 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
3910 sljit_s32 freg,
3911 sljit_s32 srcdst, sljit_sw srcdstw)
3912 {
3913 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3914 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3915 sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type);
3916 sljit_ins ins;
3917
3918 CHECK_ERROR();
3919 CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw));
3920
3921 ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3922
3923 if (reg_size != 3 && reg_size != 4)
3924 return SLJIT_ERR_UNSUPPORTED;
3925
3926 if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3927 return SLJIT_ERR_UNSUPPORTED;
3928
3929 if (type & SLJIT_SIMD_TEST)
3930 return SLJIT_SUCCESS;
3931
3932 if (reg_size == 4)
3933 freg = simd_get_quad_reg_index(freg);
3934
3935 if (!(srcdst & SLJIT_MEM)) {
3936 if (reg_size == 4)
3937 srcdst = simd_get_quad_reg_index(srcdst);
3938
3939 if (type & SLJIT_SIMD_STORE)
3940 ins = VD(srcdst) | VN(freg) | VM(freg);
3941 else
3942 ins = VD(freg) | VN(srcdst) | VM(srcdst);
3943
3944 if (reg_size == 4)
3945 ins |= (sljit_ins)1 << 6;
3946
3947 return push_inst(compiler, VORR | ins);
3948 }
3949
3950 FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
3951
3952 if (elem_size > 3)
3953 elem_size = 3;
3954
3955 ins = ((type & SLJIT_SIMD_STORE) ? VST1 : VLD1) | VD(freg)
3956 | (sljit_ins)((reg_size == 3) ? (0x7 << 8) : (0xa << 8));
3957
3958 SLJIT_ASSERT(reg_size >= alignment);
3959
3960 if (alignment == 3)
3961 ins |= 0x10;
3962 else if (alignment >= 3)
3963 ins |= 0x20;
3964
3965 return push_inst(compiler, ins | RN(srcdst) | ((sljit_ins)elem_size) << 6 | 0xf);
3966 }
3967
simd_get_imm(sljit_s32 elem_size,sljit_uw value)3968 static sljit_ins simd_get_imm(sljit_s32 elem_size, sljit_uw value)
3969 {
3970 sljit_ins result;
3971
3972 if (elem_size > 1 && (sljit_u16)value == (value >> 16)) {
3973 elem_size = 1;
3974 value = (sljit_u16)value;
3975 }
3976
3977 if (elem_size == 1 && (sljit_u8)value == (value >> 8)) {
3978 elem_size = 0;
3979 value = (sljit_u8)value;
3980 }
3981
3982 switch (elem_size) {
3983 case 0:
3984 SLJIT_ASSERT(value <= 0xff);
3985 result = 0xe00;
3986 break;
3987 case 1:
3988 SLJIT_ASSERT(value <= 0xffff);
3989 result = 0;
3990
3991 while (1) {
3992 if (value <= 0xff) {
3993 result |= 0x800;
3994 break;
3995 }
3996
3997 if ((value & 0xff) == 0) {
3998 value >>= 8;
3999 result |= 0xa00;
4000 break;
4001 }
4002
4003 if (result != 0)
4004 return ~(sljit_ins)0;
4005
4006 value ^= (sljit_uw)0xffff;
4007 result = (1 << 5);
4008 }
4009 break;
4010 default:
4011 SLJIT_ASSERT(value <= 0xffffffff);
4012 result = 0;
4013
4014 while (1) {
4015 if (value <= 0xff) {
4016 result |= 0x000;
4017 break;
4018 }
4019
4020 if ((value & ~(sljit_uw)0xff00) == 0) {
4021 value >>= 8;
4022 result |= 0x200;
4023 break;
4024 }
4025
4026 if ((value & ~(sljit_uw)0xff0000) == 0) {
4027 value >>= 16;
4028 result |= 0x400;
4029 break;
4030 }
4031
4032 if ((value & ~(sljit_uw)0xff000000) == 0) {
4033 value >>= 24;
4034 result |= 0x600;
4035 break;
4036 }
4037
4038 if ((value & (sljit_uw)0xff) == 0xff && (value >> 16) == 0) {
4039 value >>= 8;
4040 result |= 0xc00;
4041 break;
4042 }
4043
4044 if ((value & (sljit_uw)0xffff) == 0xffff && (value >> 24) == 0) {
4045 value >>= 16;
4046 result |= 0xd00;
4047 break;
4048 }
4049
4050 if (result != 0)
4051 return ~(sljit_ins)0;
4052
4053 value = ~value;
4054 result = (1 << 5);
4055 }
4056 break;
4057 }
4058
4059 return ((sljit_ins)value & 0xf) | (((sljit_ins)value & 0x70) << 12) | (((sljit_ins)value & 0x80) << 17) | result;
4060 }
4061
sljit_emit_simd_replicate(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_sw srcw)4062 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
4063 sljit_s32 freg,
4064 sljit_s32 src, sljit_sw srcw)
4065 {
4066 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4067 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4068 sljit_ins ins, imm;
4069
4070 CHECK_ERROR();
4071 CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw));
4072
4073 ADJUST_LOCAL_OFFSET(src, srcw);
4074
4075 if (reg_size != 3 && reg_size != 4)
4076 return SLJIT_ERR_UNSUPPORTED;
4077
4078 if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2))
4079 return SLJIT_ERR_UNSUPPORTED;
4080
4081 if (type & SLJIT_SIMD_TEST)
4082 return SLJIT_SUCCESS;
4083
4084 if (reg_size == 4)
4085 freg = simd_get_quad_reg_index(freg);
4086
4087 if (src == SLJIT_IMM && srcw == 0)
4088 return push_inst(compiler, VMOV_i | ((reg_size == 4) ? (1 << 6) : 0) | VD(freg));
4089
4090 if (SLJIT_UNLIKELY(elem_size == 3)) {
4091 SLJIT_ASSERT(type & SLJIT_SIMD_FLOAT);
4092
4093 if (src & SLJIT_MEM) {
4094 FAIL_IF(emit_fop_mem(compiler, FPU_LOAD | SLJIT_32, freg, src, srcw));
4095 src = freg;
4096 } else if (freg != src)
4097 FAIL_IF(push_inst(compiler, VORR | VD(freg) | VN(src) | VM(src)));
4098
4099 freg += SLJIT_QUAD_OTHER_HALF(freg);
4100
4101 if (freg != src)
4102 return push_inst(compiler, VORR | VD(freg) | VN(src) | VM(src));
4103 return SLJIT_SUCCESS;
4104 }
4105
4106 if (src & SLJIT_MEM) {
4107 FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
4108
4109 ins = (sljit_ins)(elem_size << 6);
4110
4111 if (reg_size == 4)
4112 ins |= (sljit_ins)1 << 5;
4113
4114 return push_inst(compiler, VLD1_r | ins | VD(freg) | RN(src) | 0xf);
4115 }
4116
4117 if (type & SLJIT_SIMD_FLOAT) {
4118 SLJIT_ASSERT(elem_size == 2);
4119 ins = ((sljit_ins)freg_ebit_map[src] << (16 + 2 + 1)) | ((sljit_ins)1 << (16 + 2));
4120
4121 if (reg_size == 4)
4122 ins |= (sljit_ins)1 << 6;
4123
4124 return push_inst(compiler, VDUP_s | ins | VD(freg) | (sljit_ins)freg_map[src]);
4125 }
4126
4127 if (src == SLJIT_IMM) {
4128 if (elem_size < 2)
4129 srcw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1;
4130
4131 imm = simd_get_imm(elem_size, (sljit_uw)srcw);
4132
4133 if (imm != ~(sljit_ins)0) {
4134 if (reg_size == 4)
4135 imm |= (sljit_ins)1 << 6;
4136
4137 return push_inst(compiler, VMOV_i | imm | VD(freg));
4138 }
4139
4140 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw));
4141 src = TMP_REG1;
4142 }
4143
4144 switch (elem_size) {
4145 case 0:
4146 ins = 1 << 22;
4147 break;
4148 case 1:
4149 ins = 1 << 5;
4150 break;
4151 default:
4152 ins = 0;
4153 break;
4154 }
4155
4156 if (reg_size == 4)
4157 ins |= (sljit_ins)1 << 21;
4158
4159 return push_inst(compiler, VDUP | ins | VN(freg) | RD(src));
4160 }
4161
sljit_emit_simd_lane_mov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 lane_index,sljit_s32 srcdst,sljit_sw srcdstw)4162 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
4163 sljit_s32 freg, sljit_s32 lane_index,
4164 sljit_s32 srcdst, sljit_sw srcdstw)
4165 {
4166 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4167 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4168 sljit_ins ins;
4169
4170 CHECK_ERROR();
4171 CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw));
4172
4173 ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
4174
4175 if (reg_size != 3 && reg_size != 4)
4176 return SLJIT_ERR_UNSUPPORTED;
4177
4178 if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2))
4179 return SLJIT_ERR_UNSUPPORTED;
4180
4181 if (type & SLJIT_SIMD_TEST)
4182 return SLJIT_SUCCESS;
4183
4184 if (reg_size == 4)
4185 freg = simd_get_quad_reg_index(freg);
4186
4187 if (type & SLJIT_SIMD_LANE_ZERO) {
4188 ins = (reg_size == 3) ? 0 : ((sljit_ins)1 << 6);
4189
4190 if (type & SLJIT_SIMD_FLOAT) {
4191 if (elem_size == 3 && !(srcdst & SLJIT_MEM)) {
4192 if (lane_index == 1)
4193 freg += SLJIT_QUAD_OTHER_HALF(freg);
4194
4195 if (srcdst != freg)
4196 FAIL_IF(push_inst(compiler, VORR | VD(freg) | VN(srcdst) | VM(srcdst)));
4197
4198 freg += SLJIT_QUAD_OTHER_HALF(freg);
4199 return push_inst(compiler, VMOV_i | VD(freg));
4200 }
4201
4202 if (srcdst == freg || (elem_size == 3 && srcdst == (freg + SLJIT_QUAD_OTHER_HALF(freg)))) {
4203 FAIL_IF(push_inst(compiler, VORR | ins | VD(TMP_FREG2) | VN(freg) | VM(freg)));
4204 srcdst = TMP_FREG2;
4205 srcdstw = 0;
4206 }
4207 }
4208
4209 FAIL_IF(push_inst(compiler, VMOV_i | ins | VD(freg)));
4210 }
4211
4212 if (reg_size == 4 && lane_index >= (0x8 >> elem_size)) {
4213 lane_index -= (0x8 >> elem_size);
4214 freg += SLJIT_QUAD_OTHER_HALF(freg);
4215 }
4216
4217 if (srcdst & SLJIT_MEM) {
4218 if (elem_size == 3)
4219 return emit_fop_mem(compiler, ((type & SLJIT_SIMD_STORE) ? 0 : FPU_LOAD) | SLJIT_32, freg, srcdst, srcdstw);
4220
4221 FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
4222
4223 lane_index = lane_index << elem_size;
4224 ins = (sljit_ins)((elem_size << 10) | (lane_index << 5));
4225 return push_inst(compiler, ((type & SLJIT_SIMD_STORE) ? VST1_s : VLD1_s) | ins | VD(freg) | RN(srcdst) | 0xf);
4226 }
4227
4228 if (type & SLJIT_SIMD_FLOAT) {
4229 if (elem_size == 3) {
4230 if (type & SLJIT_SIMD_STORE)
4231 return push_inst(compiler, VORR | VD(srcdst) | VN(freg) | VM(freg));
4232 return push_inst(compiler, VMOV_F32 | SLJIT_32 | VD(freg) | VM(srcdst));
4233 }
4234
4235 if (type & SLJIT_SIMD_STORE) {
4236 if (freg_ebit_map[freg] == 0) {
4237 if (lane_index == 1)
4238 freg = SLJIT_F64_SECOND(freg);
4239
4240 return push_inst(compiler, VMOV_F32 | VD(srcdst) | VM(freg));
4241 }
4242
4243 FAIL_IF(push_inst(compiler, VMOV_s | (1 << 20) | ((sljit_ins)lane_index << 21) | VN(freg) | RD(TMP_REG1)));
4244 return push_inst(compiler, VMOV | VN(srcdst) | RD(TMP_REG1));
4245 }
4246
4247 FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(srcdst) | RD(TMP_REG1)));
4248 return push_inst(compiler, VMOV_s | ((sljit_ins)lane_index << 21) | VN(freg) | RD(TMP_REG1));
4249 }
4250
4251 if (srcdst == SLJIT_IMM) {
4252 if (elem_size < 2)
4253 srcdstw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1;
4254
4255 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcdstw));
4256 srcdst = TMP_REG1;
4257 }
4258
4259 if (elem_size == 0)
4260 ins = 0x400000;
4261 else if (elem_size == 1)
4262 ins = 0x20;
4263 else
4264 ins = 0;
4265
4266 lane_index = lane_index << elem_size;
4267 ins |= (sljit_ins)(((lane_index & 0x4) << 19) | ((lane_index & 0x3) << 5));
4268
4269 if (type & SLJIT_SIMD_STORE) {
4270 ins |= (1 << 20);
4271
4272 if (elem_size < 2 && !(type & SLJIT_SIMD_LANE_SIGNED))
4273 ins |= (1 << 23);
4274 }
4275
4276 return push_inst(compiler, VMOV_s | ins | VN(freg) | RD(srcdst));
4277 }
4278
sljit_emit_simd_lane_replicate(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_s32 src_lane_index)4279 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
4280 sljit_s32 freg,
4281 sljit_s32 src, sljit_s32 src_lane_index)
4282 {
4283 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4284 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4285 sljit_ins ins;
4286
4287 CHECK_ERROR();
4288 CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index));
4289
4290 if (reg_size != 3 && reg_size != 4)
4291 return SLJIT_ERR_UNSUPPORTED;
4292
4293 if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4294 return SLJIT_ERR_UNSUPPORTED;
4295
4296 if (type & SLJIT_SIMD_TEST)
4297 return SLJIT_SUCCESS;
4298
4299 if (reg_size == 4) {
4300 freg = simd_get_quad_reg_index(freg);
4301 src = simd_get_quad_reg_index(src);
4302
4303 if (src_lane_index >= (0x8 >> elem_size)) {
4304 src_lane_index -= (0x8 >> elem_size);
4305 src += SLJIT_QUAD_OTHER_HALF(src);
4306 }
4307 }
4308
4309 if (elem_size == 3) {
4310 if (freg != src)
4311 FAIL_IF(push_inst(compiler, VORR | VD(freg) | VN(src) | VM(src)));
4312
4313 freg += SLJIT_QUAD_OTHER_HALF(freg);
4314
4315 if (freg != src)
4316 return push_inst(compiler, VORR | VD(freg) | VN(src) | VM(src));
4317 return SLJIT_SUCCESS;
4318 }
4319
4320 ins = ((((sljit_ins)src_lane_index << 1) | 1) << (16 + elem_size));
4321
4322 if (reg_size == 4)
4323 ins |= (sljit_ins)1 << 6;
4324
4325 return push_inst(compiler, VDUP_s | ins | VD(freg) | VM(src));
4326 }
4327
sljit_emit_simd_extend(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_sw srcw)4328 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
4329 sljit_s32 freg,
4330 sljit_s32 src, sljit_sw srcw)
4331 {
4332 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4333 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4334 sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
4335 sljit_s32 dst_reg;
4336
4337 CHECK_ERROR();
4338 CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw));
4339
4340 ADJUST_LOCAL_OFFSET(src, srcw);
4341
4342 if (reg_size != 3 && reg_size != 4)
4343 return SLJIT_ERR_UNSUPPORTED;
4344
4345 if ((type & SLJIT_SIMD_FLOAT) && (elem_size != 2 || elem2_size != 3))
4346 return SLJIT_ERR_UNSUPPORTED;
4347
4348 if (type & SLJIT_SIMD_TEST)
4349 return SLJIT_SUCCESS;
4350
4351 if (reg_size == 4)
4352 freg = simd_get_quad_reg_index(freg);
4353
4354 if (src & SLJIT_MEM) {
4355 FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
4356 if (reg_size == 4 && elem2_size - elem_size == 1)
4357 FAIL_IF(push_inst(compiler, VLD1 | (0x7 << 8) | VD(freg) | RN(src) | 0xf));
4358 else
4359 FAIL_IF(push_inst(compiler, VLD1_s | (sljit_ins)((reg_size - elem2_size + elem_size) << 10) | VD(freg) | RN(src) | 0xf));
4360 src = freg;
4361 } else if (reg_size == 4)
4362 src = simd_get_quad_reg_index(src);
4363
4364 if (!(type & SLJIT_SIMD_FLOAT)) {
4365 dst_reg = (reg_size == 4) ? freg : TMP_FREG2;
4366
4367 do {
4368 FAIL_IF(push_inst(compiler, VSHLL | ((type & SLJIT_SIMD_EXTEND_SIGNED) ? 0 : (1 << 24))
4369 | ((sljit_ins)1 << (19 + elem_size)) | VD(dst_reg) | VM(src)));
4370 src = dst_reg;
4371 } while (++elem_size < elem2_size);
4372
4373 if (dst_reg == TMP_FREG2)
4374 return push_inst(compiler, VORR | VD(freg) | VN(TMP_FREG2) | VM(TMP_FREG2));
4375 return SLJIT_SUCCESS;
4376 }
4377
4378 /* No SIMD variant, must use VFP instead. */
4379 SLJIT_ASSERT(reg_size == 4);
4380
4381 if (freg == src) {
4382 freg += SLJIT_QUAD_OTHER_HALF(freg);
4383 FAIL_IF(push_inst(compiler, VCVT_F64_F32 | VD(freg) | VM(src) | 0x20));
4384 freg += SLJIT_QUAD_OTHER_HALF(freg);
4385 return push_inst(compiler, VCVT_F64_F32 | VD(freg) | VM(src));
4386 }
4387
4388 FAIL_IF(push_inst(compiler, VCVT_F64_F32 | VD(freg) | VM(src)));
4389 freg += SLJIT_QUAD_OTHER_HALF(freg);
4390 return push_inst(compiler, VCVT_F64_F32 | VD(freg) | VM(src) | 0x20);
4391 }
4392
sljit_emit_simd_sign(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 dst,sljit_sw dstw)4393 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
4394 sljit_s32 freg,
4395 sljit_s32 dst, sljit_sw dstw)
4396 {
4397 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4398 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4399 sljit_ins ins, imms;
4400 sljit_s32 dst_r;
4401
4402 CHECK_ERROR();
4403 CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw));
4404
4405 ADJUST_LOCAL_OFFSET(dst, dstw);
4406
4407 if (reg_size != 3 && reg_size != 4)
4408 return SLJIT_ERR_UNSUPPORTED;
4409
4410 if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4411 return SLJIT_ERR_UNSUPPORTED;
4412
4413 if (type & SLJIT_SIMD_TEST)
4414 return SLJIT_SUCCESS;
4415
4416 switch (elem_size) {
4417 case 0:
4418 imms = 0x243219;
4419 ins = VSHR | (1 << 24) | (0x9 << 16);
4420 break;
4421 case 1:
4422 imms = (reg_size == 4) ? 0x243219 : 0x2231;
4423 ins = VSHR | (1 << 24) | (0x11 << 16);
4424 break;
4425 case 2:
4426 imms = (reg_size == 4) ? 0x2231 : 0x21;
4427 ins = VSHR | (1 << 24) | (0x21 << 16);
4428 break;
4429 default:
4430 imms = 0x21;
4431 ins = VSHR | (1 << 24) | (0x1 << 16) | (1 << 7);
4432 break;
4433 }
4434
4435 if (reg_size == 4) {
4436 freg = simd_get_quad_reg_index(freg);
4437 ins |= (sljit_ins)1 << 6;
4438 }
4439
4440 SLJIT_ASSERT((freg_map[TMP_FREG2] & 0x1) == 0);
4441 FAIL_IF(push_inst(compiler, ins | VD(TMP_FREG2) | VM(freg)));
4442
4443 if (reg_size == 4 && elem_size > 0)
4444 FAIL_IF(push_inst(compiler, VMOVN | ((sljit_ins)(elem_size - 1) << 18) | VD(TMP_FREG2) | VM(TMP_FREG2)));
4445
4446 ins = (reg_size == 4 && elem_size == 0) ? (1 << 6) : 0;
4447
4448 while (imms >= 0x100) {
4449 FAIL_IF(push_inst(compiler, VSRA | (1 << 24) | ins | ((imms & 0xff) << 16) | VD(TMP_FREG2) | VM(TMP_FREG2)));
4450 imms >>= 8;
4451 }
4452
4453 FAIL_IF(push_inst(compiler, VSRA | (1 << 24) | ins | (1 << 7) | (imms << 16) | VD(TMP_FREG2) | VM(TMP_FREG2)));
4454
4455 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
4456 FAIL_IF(push_inst(compiler, VMOV_s | (1 << 20) | (1 << 23) | (0x2 << 21) | RD(dst_r) | VN(TMP_FREG2)));
4457
4458 if (reg_size == 4 && elem_size == 0) {
4459 SLJIT_ASSERT(freg_map[TMP_FREG2] + 1 == freg_map[TMP_FREG1]);
4460 FAIL_IF(push_inst(compiler, VMOV_s | (1 << 20) | (1 << 23) | (0x2 << 21) | RD(TMP_REG2) | VN(TMP_FREG1)));
4461 FAIL_IF(push_inst(compiler, ORR | RD(dst_r) | RN(dst_r) | RM(TMP_REG2) | (0x8 << 7)));
4462 }
4463
4464 if (dst_r == TMP_REG1)
4465 return emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2);
4466
4467 return SLJIT_SUCCESS;
4468 }
4469
sljit_emit_simd_op2(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_freg,sljit_s32 src1_freg,sljit_s32 src2_freg)4470 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
4471 sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg)
4472 {
4473 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4474 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4475 sljit_ins ins = 0;
4476
4477 CHECK_ERROR();
4478 CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg));
4479
4480 if (reg_size != 3 && reg_size != 4)
4481 return SLJIT_ERR_UNSUPPORTED;
4482
4483 if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4484 return SLJIT_ERR_UNSUPPORTED;
4485
4486 switch (SLJIT_SIMD_GET_OPCODE(type)) {
4487 case SLJIT_SIMD_OP2_AND:
4488 ins = VAND;
4489 break;
4490 case SLJIT_SIMD_OP2_OR:
4491 ins = VORR;
4492 break;
4493 case SLJIT_SIMD_OP2_XOR:
4494 ins = VEOR;
4495 break;
4496 }
4497
4498 if (type & SLJIT_SIMD_TEST)
4499 return SLJIT_SUCCESS;
4500
4501 if (reg_size == 4) {
4502 dst_freg = simd_get_quad_reg_index(dst_freg);
4503 src1_freg = simd_get_quad_reg_index(src1_freg);
4504 src2_freg = simd_get_quad_reg_index(src2_freg);
4505 ins |= (sljit_ins)1 << 6;
4506 }
4507
4508 return push_inst(compiler, ins | VD(dst_freg) | VN(src1_freg) | VM(src2_freg));
4509 }
4510
4511 #undef FPU_LOAD
4512
sljit_emit_atomic_load(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 mem_reg)4513 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
4514 sljit_s32 dst_reg,
4515 sljit_s32 mem_reg)
4516 {
4517 sljit_u32 ins;
4518
4519 CHECK_ERROR();
4520 CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
4521
4522 switch (GET_OPCODE(op)) {
4523 case SLJIT_MOV_U8:
4524 ins = LDREXB;
4525 break;
4526 case SLJIT_MOV_U16:
4527 ins = LDREXH;
4528 break;
4529 default:
4530 ins = LDREX;
4531 break;
4532 }
4533
4534 return push_inst(compiler, ins | RN(mem_reg) | RD(dst_reg));
4535 }
4536
sljit_emit_atomic_store(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src_reg,sljit_s32 mem_reg,sljit_s32 temp_reg)4537 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op,
4538 sljit_s32 src_reg,
4539 sljit_s32 mem_reg,
4540 sljit_s32 temp_reg)
4541 {
4542 sljit_u32 ins;
4543
4544 /* temp_reg == mem_reg is undefined so use another temp register */
4545 SLJIT_UNUSED_ARG(temp_reg);
4546
4547 CHECK_ERROR();
4548 CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
4549
4550 switch (GET_OPCODE(op)) {
4551 case SLJIT_MOV_U8:
4552 ins = STREXB;
4553 break;
4554 case SLJIT_MOV_U16:
4555 ins = STREXH;
4556 break;
4557 default:
4558 ins = STREX;
4559 break;
4560 }
4561
4562 FAIL_IF(push_inst(compiler, ins | RN(mem_reg) | RD(TMP_REG1) | RM(src_reg)));
4563 if (op & SLJIT_SET_ATOMIC_STORED)
4564 return push_inst(compiler, CMP | SET_FLAGS | SRC2_IMM | RN(TMP_REG1));
4565
4566 return SLJIT_SUCCESS;
4567 }
4568
sljit_emit_const(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_sw init_value)4569 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
4570 {
4571 struct sljit_const *const_;
4572 sljit_s32 dst_r;
4573
4574 CHECK_ERROR_PTR();
4575 CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
4576 ADJUST_LOCAL_OFFSET(dst, dstw);
4577
4578 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
4579 PTR_FAIL_IF(!const_);
4580 set_const(const_, compiler);
4581
4582 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
4583
4584 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
4585 PTR_FAIL_IF(push_inst_with_unique_literal(compiler,
4586 EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), (sljit_ins)init_value));
4587 compiler->patches++;
4588 #else /* !SLJIT_CONFIG_ARM_V6 */
4589 PTR_FAIL_IF(emit_imm(compiler, dst_r, init_value));
4590 #endif /* SLJIT_CONFIG_ARM_V6 */
4591
4592 if (dst & SLJIT_MEM)
4593 PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1));
4594 return const_;
4595 }
4596
sljit_emit_mov_addr(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)4597 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
4598 {
4599 struct sljit_jump *jump;
4600 sljit_s32 dst_r;
4601
4602 CHECK_ERROR_PTR();
4603 CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw));
4604 ADJUST_LOCAL_OFFSET(dst, dstw);
4605
4606 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
4607
4608 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
4609 PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), 0));
4610 compiler->patches++;
4611 #else /* !SLJIT_CONFIG_ARM_V6 */
4612 PTR_FAIL_IF(push_inst(compiler, RD(dst_r)));
4613 #endif /* SLJIT_CONFIG_ARM_V6 */
4614
4615 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
4616 PTR_FAIL_IF(!jump);
4617 set_mov_addr(jump, compiler, 1);
4618
4619 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
4620 compiler->size += 1;
4621 #endif /* SLJIT_CONFIG_ARM_V7 */
4622
4623 if (dst & SLJIT_MEM)
4624 PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1));
4625 return jump;
4626 }
4627
sljit_set_jump_addr(sljit_uw addr,sljit_uw new_target,sljit_sw executable_offset)4628 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
4629 {
4630 set_jump_addr(addr, executable_offset, new_target, 1);
4631 }
4632
sljit_set_const(sljit_uw addr,sljit_sw new_constant,sljit_sw executable_offset)4633 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
4634 {
4635 set_const_value(addr, executable_offset, (sljit_uw)new_constant, 1);
4636 }
4637