1 /*
2 * Stack-less Just-In-Time compiler
3 *
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #ifdef __SOFTFP__
28 #define ARM_ABI_INFO " ABI:softfp"
29 #else
30 #define ARM_ABI_INFO " ABI:hardfp"
31 #endif
32
sljit_get_platform_name(void)33 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
34 {
35 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
36 return "ARMv7" SLJIT_CPUINFO ARM_ABI_INFO;
37 #elif (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
38 return "ARMv5" SLJIT_CPUINFO ARM_ABI_INFO;
39 #else
40 #error "Internal error: Unknown ARM architecture"
41 #endif
42 }
43
44 /* Last register + 1. */
45 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
46 #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
47 #define TMP_PC (SLJIT_NUMBER_OF_REGISTERS + 4)
48
49 #define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
50 #define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
51
52 /* In ARM instruction words.
53 Cache lines are usually 32 byte aligned. */
54 #define CONST_POOL_ALIGNMENT 8
55 #define CONST_POOL_EMPTY 0xffffffff
56
57 #define ALIGN_INSTRUCTION(ptr) \
58 (sljit_uw*)(((sljit_uw)(ptr) + (CONST_POOL_ALIGNMENT * sizeof(sljit_uw)) - 1) & ~((CONST_POOL_ALIGNMENT * sizeof(sljit_uw)) - 1))
59 #define MAX_DIFFERENCE(max_diff) \
60 (((max_diff) / (sljit_s32)sizeof(sljit_uw)) - (CONST_POOL_ALIGNMENT - 1))
61
62 /* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */
63 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
64 0, 0, 1, 2, 3, 11, 10, 9, 8, 7, 6, 5, 4, 13, 12, 14, 15
65 };
66
67 static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
68 0, 0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8, 6, 7
69 };
70
71 #define RM(rm) ((sljit_uw)reg_map[rm])
72 #define RM8(rm) ((sljit_uw)reg_map[rm] << 8)
73 #define RD(rd) ((sljit_uw)reg_map[rd] << 12)
74 #define RN(rn) ((sljit_uw)reg_map[rn] << 16)
75
76 #define VM(rm) ((sljit_uw)freg_map[rm])
77 #define VD(rd) ((sljit_uw)freg_map[rd] << 12)
78 #define VN(rn) ((sljit_uw)freg_map[rn] << 16)
79
80 /* --------------------------------------------------------------------- */
81 /* Instrucion forms */
82 /* --------------------------------------------------------------------- */
83
84 /* The instruction includes the AL condition.
85 INST_NAME - CONDITIONAL remove this flag. */
86 #define COND_MASK 0xf0000000
87 #define CONDITIONAL 0xe0000000
88 #define PUSH_POOL 0xff000000
89
90 #define ADC 0xe0a00000
91 #define ADD 0xe0800000
92 #define AND 0xe0000000
93 #define B 0xea000000
94 #define BIC 0xe1c00000
95 #define BL 0xeb000000
96 #define BLX 0xe12fff30
97 #define BX 0xe12fff10
98 #define CLZ 0xe16f0f10
99 #define CMN 0xe1600000
100 #define CMP 0xe1400000
101 #define BKPT 0xe1200070
102 #define EOR 0xe0200000
103 #define LDR 0xe5100000
104 #define LDR_POST 0xe4100000
105 #define MOV 0xe1a00000
106 #define MUL 0xe0000090
107 #define MVN 0xe1e00000
108 #define NOP 0xe1a00000
109 #define ORR 0xe1800000
110 #define PUSH 0xe92d0000
111 #define POP 0xe8bd0000
112 #define RBIT 0xe6ff0f30
113 #define RSB 0xe0600000
114 #define RSC 0xe0e00000
115 #define SBC 0xe0c00000
116 #define SMULL 0xe0c00090
117 #define STR 0xe5000000
118 #define SUB 0xe0400000
119 #define TST 0xe1000000
120 #define UMULL 0xe0800090
121 #define VABS_F32 0xeeb00ac0
122 #define VADD_F32 0xee300a00
123 #define VCMP_F32 0xeeb40a40
124 #define VCVT_F32_S32 0xeeb80ac0
125 #define VCVT_F64_F32 0xeeb70ac0
126 #define VCVT_S32_F32 0xeebd0ac0
127 #define VDIV_F32 0xee800a00
128 #define VLDR_F32 0xed100a00
129 #define VMOV_F32 0xeeb00a40
130 #define VMOV 0xee000a10
131 #define VMOV2 0xec400a10
132 #define VMRS 0xeef1fa10
133 #define VMUL_F32 0xee200a00
134 #define VNEG_F32 0xeeb10a40
135 #define VPOP 0xecbd0b00
136 #define VPUSH 0xed2d0b00
137 #define VSTR_F32 0xed000a00
138 #define VSUB_F32 0xee300a40
139
140 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
141 /* Arm v7 specific instructions. */
142 #define MOVW 0xe3000000
143 #define MOVT 0xe3400000
144 #define SXTB 0xe6af0070
145 #define SXTH 0xe6bf0070
146 #define UXTB 0xe6ef0070
147 #define UXTH 0xe6ff0070
148 #endif
149
150 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
151
push_cpool(struct sljit_compiler * compiler)152 static sljit_s32 push_cpool(struct sljit_compiler *compiler)
153 {
154 /* Pushing the constant pool into the instruction stream. */
155 sljit_uw* inst;
156 sljit_uw* cpool_ptr;
157 sljit_uw* cpool_end;
158 sljit_s32 i;
159
160 /* The label could point the address after the constant pool. */
161 if (compiler->last_label && compiler->last_label->size == compiler->size)
162 compiler->last_label->size += compiler->cpool_fill + (CONST_POOL_ALIGNMENT - 1) + 1;
163
164 SLJIT_ASSERT(compiler->cpool_fill > 0 && compiler->cpool_fill <= CPOOL_SIZE);
165 inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
166 FAIL_IF(!inst);
167 compiler->size++;
168 *inst = 0xff000000 | compiler->cpool_fill;
169
170 for (i = 0; i < CONST_POOL_ALIGNMENT - 1; i++) {
171 inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
172 FAIL_IF(!inst);
173 compiler->size++;
174 *inst = 0;
175 }
176
177 cpool_ptr = compiler->cpool;
178 cpool_end = cpool_ptr + compiler->cpool_fill;
179 while (cpool_ptr < cpool_end) {
180 inst = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
181 FAIL_IF(!inst);
182 compiler->size++;
183 *inst = *cpool_ptr++;
184 }
185 compiler->cpool_diff = CONST_POOL_EMPTY;
186 compiler->cpool_fill = 0;
187 return SLJIT_SUCCESS;
188 }
189
push_inst(struct sljit_compiler * compiler,sljit_uw inst)190 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_uw inst)
191 {
192 sljit_uw* ptr;
193
194 if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)))
195 FAIL_IF(push_cpool(compiler));
196
197 ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
198 FAIL_IF(!ptr);
199 compiler->size++;
200 *ptr = inst;
201 return SLJIT_SUCCESS;
202 }
203
push_inst_with_literal(struct sljit_compiler * compiler,sljit_uw inst,sljit_uw literal)204 static sljit_s32 push_inst_with_literal(struct sljit_compiler *compiler, sljit_uw inst, sljit_uw literal)
205 {
206 sljit_uw* ptr;
207 sljit_uw cpool_index = CPOOL_SIZE;
208 sljit_uw* cpool_ptr;
209 sljit_uw* cpool_end;
210 sljit_u8* cpool_unique_ptr;
211
212 if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)))
213 FAIL_IF(push_cpool(compiler));
214 else if (compiler->cpool_fill > 0) {
215 cpool_ptr = compiler->cpool;
216 cpool_end = cpool_ptr + compiler->cpool_fill;
217 cpool_unique_ptr = compiler->cpool_unique;
218 do {
219 if ((*cpool_ptr == literal) && !(*cpool_unique_ptr)) {
220 cpool_index = (sljit_uw)(cpool_ptr - compiler->cpool);
221 break;
222 }
223 cpool_ptr++;
224 cpool_unique_ptr++;
225 } while (cpool_ptr < cpool_end);
226 }
227
228 if (cpool_index == CPOOL_SIZE) {
229 /* Must allocate a new entry in the literal pool. */
230 if (compiler->cpool_fill < CPOOL_SIZE) {
231 cpool_index = compiler->cpool_fill;
232 compiler->cpool_fill++;
233 }
234 else {
235 FAIL_IF(push_cpool(compiler));
236 cpool_index = 0;
237 compiler->cpool_fill = 1;
238 }
239 }
240
241 SLJIT_ASSERT((inst & 0xfff) == 0);
242 ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
243 FAIL_IF(!ptr);
244 compiler->size++;
245 *ptr = inst | cpool_index;
246
247 compiler->cpool[cpool_index] = literal;
248 compiler->cpool_unique[cpool_index] = 0;
249 if (compiler->cpool_diff == CONST_POOL_EMPTY)
250 compiler->cpool_diff = compiler->size;
251 return SLJIT_SUCCESS;
252 }
253
push_inst_with_unique_literal(struct sljit_compiler * compiler,sljit_uw inst,sljit_uw literal)254 static sljit_s32 push_inst_with_unique_literal(struct sljit_compiler *compiler, sljit_uw inst, sljit_uw literal)
255 {
256 sljit_uw* ptr;
257 if (SLJIT_UNLIKELY((compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)) || compiler->cpool_fill >= CPOOL_SIZE))
258 FAIL_IF(push_cpool(compiler));
259
260 SLJIT_ASSERT(compiler->cpool_fill < CPOOL_SIZE && (inst & 0xfff) == 0);
261 ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
262 FAIL_IF(!ptr);
263 compiler->size++;
264 *ptr = inst | compiler->cpool_fill;
265
266 compiler->cpool[compiler->cpool_fill] = literal;
267 compiler->cpool_unique[compiler->cpool_fill] = 1;
268 compiler->cpool_fill++;
269 if (compiler->cpool_diff == CONST_POOL_EMPTY)
270 compiler->cpool_diff = compiler->size;
271 return SLJIT_SUCCESS;
272 }
273
prepare_blx(struct sljit_compiler * compiler)274 static SLJIT_INLINE sljit_s32 prepare_blx(struct sljit_compiler *compiler)
275 {
276 /* Place for at least two instruction (doesn't matter whether the first has a literal). */
277 if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4088)))
278 return push_cpool(compiler);
279 return SLJIT_SUCCESS;
280 }
281
emit_blx(struct sljit_compiler * compiler)282 static SLJIT_INLINE sljit_s32 emit_blx(struct sljit_compiler *compiler)
283 {
284 /* Must follow tightly the previous instruction (to be able to convert it to bl instruction). */
285 SLJIT_ASSERT(compiler->cpool_diff == CONST_POOL_EMPTY || compiler->size - compiler->cpool_diff < MAX_DIFFERENCE(4092));
286 SLJIT_ASSERT(reg_map[TMP_REG1] != 14);
287
288 return push_inst(compiler, BLX | RM(TMP_REG1));
289 }
290
patch_pc_relative_loads(sljit_uw * last_pc_patch,sljit_uw * code_ptr,sljit_uw * const_pool,sljit_uw cpool_size)291 static sljit_uw patch_pc_relative_loads(sljit_uw *last_pc_patch, sljit_uw *code_ptr, sljit_uw* const_pool, sljit_uw cpool_size)
292 {
293 sljit_uw diff;
294 sljit_uw ind;
295 sljit_uw counter = 0;
296 sljit_uw* clear_const_pool = const_pool;
297 sljit_uw* clear_const_pool_end = const_pool + cpool_size;
298
299 SLJIT_ASSERT(const_pool - code_ptr <= CONST_POOL_ALIGNMENT);
300 /* Set unused flag for all literals in the constant pool.
301 I.e.: unused literals can belong to branches, which can be encoded as B or BL.
302 We can "compress" the constant pool by discarding these literals. */
303 while (clear_const_pool < clear_const_pool_end)
304 *clear_const_pool++ = (sljit_uw)(-1);
305
306 while (last_pc_patch < code_ptr) {
307 /* Data transfer instruction with Rn == r15. */
308 if ((*last_pc_patch & 0x0c0f0000) == 0x040f0000) {
309 diff = (sljit_uw)(const_pool - last_pc_patch);
310 ind = (*last_pc_patch) & 0xfff;
311
312 /* Must be a load instruction with immediate offset. */
313 SLJIT_ASSERT(ind < cpool_size && !(*last_pc_patch & (1 << 25)) && (*last_pc_patch & (1 << 20)));
314 if ((sljit_s32)const_pool[ind] < 0) {
315 const_pool[ind] = counter;
316 ind = counter;
317 counter++;
318 }
319 else
320 ind = const_pool[ind];
321
322 SLJIT_ASSERT(diff >= 1);
323 if (diff >= 2 || ind > 0) {
324 diff = (diff + (sljit_uw)ind - 2) << 2;
325 SLJIT_ASSERT(diff <= 0xfff);
326 *last_pc_patch = (*last_pc_patch & ~(sljit_uw)0xfff) | diff;
327 }
328 else
329 *last_pc_patch = (*last_pc_patch & ~(sljit_uw)(0xfff | (1 << 23))) | 0x004;
330 }
331 last_pc_patch++;
332 }
333 return counter;
334 }
335
336 /* In some rare ocasions we may need future patches. The probability is close to 0 in practice. */
337 struct future_patch {
338 struct future_patch* next;
339 sljit_s32 index;
340 sljit_s32 value;
341 };
342
resolve_const_pool_index(struct sljit_compiler * compiler,struct future_patch ** first_patch,sljit_uw cpool_current_index,sljit_uw * cpool_start_address,sljit_uw * buf_ptr)343 static sljit_s32 resolve_const_pool_index(struct sljit_compiler *compiler, struct future_patch **first_patch, sljit_uw cpool_current_index, sljit_uw *cpool_start_address, sljit_uw *buf_ptr)
344 {
345 sljit_u32 value;
346 struct future_patch *curr_patch, *prev_patch;
347
348 SLJIT_UNUSED_ARG(compiler);
349
350 /* Using the values generated by patch_pc_relative_loads. */
351 if (!*first_patch)
352 value = cpool_start_address[cpool_current_index];
353 else {
354 curr_patch = *first_patch;
355 prev_patch = NULL;
356 while (1) {
357 if (!curr_patch) {
358 value = cpool_start_address[cpool_current_index];
359 break;
360 }
361 if ((sljit_uw)curr_patch->index == cpool_current_index) {
362 value = (sljit_uw)curr_patch->value;
363 if (prev_patch)
364 prev_patch->next = curr_patch->next;
365 else
366 *first_patch = curr_patch->next;
367 SLJIT_FREE(curr_patch, compiler->allocator_data);
368 break;
369 }
370 prev_patch = curr_patch;
371 curr_patch = curr_patch->next;
372 }
373 }
374
375 if ((sljit_sw)value >= 0) {
376 if (value > cpool_current_index) {
377 curr_patch = (struct future_patch*)SLJIT_MALLOC(sizeof(struct future_patch), compiler->allocator_data);
378 if (!curr_patch) {
379 while (*first_patch) {
380 curr_patch = *first_patch;
381 *first_patch = (*first_patch)->next;
382 SLJIT_FREE(curr_patch, compiler->allocator_data);
383 }
384 return SLJIT_ERR_ALLOC_FAILED;
385 }
386 curr_patch->next = *first_patch;
387 curr_patch->index = (sljit_sw)value;
388 curr_patch->value = (sljit_sw)cpool_start_address[value];
389 *first_patch = curr_patch;
390 }
391 cpool_start_address[value] = *buf_ptr;
392 }
393 return SLJIT_SUCCESS;
394 }
395
396 #else
397
push_inst(struct sljit_compiler * compiler,sljit_uw inst)398 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_uw inst)
399 {
400 sljit_uw* ptr;
401
402 ptr = (sljit_uw*)ensure_buf(compiler, sizeof(sljit_uw));
403 FAIL_IF(!ptr);
404 compiler->size++;
405 *ptr = inst;
406 return SLJIT_SUCCESS;
407 }
408
emit_imm(struct sljit_compiler * compiler,sljit_s32 reg,sljit_sw imm)409 static SLJIT_INLINE sljit_s32 emit_imm(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm)
410 {
411 FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | ((sljit_u32)imm & 0xfff)));
412 return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | (((sljit_u32)imm >> 16) & 0xfff));
413 }
414
415 #endif
416
detect_jump_type(struct sljit_jump * jump,sljit_uw * code_ptr,sljit_uw * code,sljit_sw executable_offset)417 static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_uw *code_ptr, sljit_uw *code, sljit_sw executable_offset)
418 {
419 sljit_sw diff;
420
421 if (jump->flags & SLJIT_REWRITABLE_JUMP)
422 return 0;
423
424 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
425 if (jump->flags & IS_BL)
426 code_ptr--;
427
428 if (jump->flags & JUMP_ADDR)
429 diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2) - executable_offset);
430 else {
431 SLJIT_ASSERT(jump->flags & JUMP_LABEL);
432 diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2));
433 }
434
435 /* Branch to Thumb code has not been optimized yet. */
436 if (diff & 0x3)
437 return 0;
438
439 if (jump->flags & IS_BL) {
440 if (diff <= 0x01ffffff && diff >= -0x02000000) {
441 *code_ptr = (BL - CONDITIONAL) | (*(code_ptr + 1) & COND_MASK);
442 jump->flags |= PATCH_B;
443 return 1;
444 }
445 }
446 else {
447 if (diff <= 0x01ffffff && diff >= -0x02000000) {
448 *code_ptr = (B - CONDITIONAL) | (*code_ptr & COND_MASK);
449 jump->flags |= PATCH_B;
450 }
451 }
452 #else
453 if (jump->flags & JUMP_ADDR)
454 diff = ((sljit_sw)jump->u.target - (sljit_sw)code_ptr - executable_offset);
455 else {
456 SLJIT_ASSERT(jump->flags & JUMP_LABEL);
457 diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)code_ptr);
458 }
459
460 /* Branch to Thumb code has not been optimized yet. */
461 if (diff & 0x3)
462 return 0;
463
464 if (diff <= 0x01ffffff && diff >= -0x02000000) {
465 code_ptr -= 2;
466 *code_ptr = ((jump->flags & IS_BL) ? (BL - CONDITIONAL) : (B - CONDITIONAL)) | (code_ptr[2] & COND_MASK);
467 jump->flags |= PATCH_B;
468 return 1;
469 }
470 #endif
471 return 0;
472 }
473
inline_set_jump_addr(sljit_uw jump_ptr,sljit_sw executable_offset,sljit_uw new_addr,sljit_s32 flush_cache)474 static SLJIT_INLINE void inline_set_jump_addr(sljit_uw jump_ptr, sljit_sw executable_offset, sljit_uw new_addr, sljit_s32 flush_cache)
475 {
476 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
477 sljit_uw *ptr = (sljit_uw *)jump_ptr;
478 sljit_uw *inst = (sljit_uw *)ptr[0];
479 sljit_uw mov_pc = ptr[1];
480 sljit_s32 bl = (mov_pc & 0x0000f000) != RD(TMP_PC);
481 sljit_sw diff = (sljit_sw)(((sljit_sw)new_addr - (sljit_sw)(inst + 2) - executable_offset) >> 2);
482
483 SLJIT_UNUSED_ARG(executable_offset);
484
485 if (diff <= 0x7fffff && diff >= -0x800000) {
486 /* Turn to branch. */
487 if (!bl) {
488 if (flush_cache) {
489 SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 0);
490 }
491 inst[0] = (mov_pc & COND_MASK) | (B - CONDITIONAL) | (diff & 0xffffff);
492 if (flush_cache) {
493 SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1);
494 inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
495 SLJIT_CACHE_FLUSH(inst, inst + 1);
496 }
497 } else {
498 if (flush_cache) {
499 SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0);
500 }
501 inst[0] = (mov_pc & COND_MASK) | (BL - CONDITIONAL) | (diff & 0xffffff);
502 inst[1] = NOP;
503 if (flush_cache) {
504 SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1);
505 inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
506 SLJIT_CACHE_FLUSH(inst, inst + 2);
507 }
508 }
509 } else {
510 /* Get the position of the constant. */
511 if (mov_pc & (1 << 23))
512 ptr = inst + ((mov_pc & 0xfff) >> 2) + 2;
513 else
514 ptr = inst + 1;
515
516 if (*inst != mov_pc) {
517 if (flush_cache) {
518 SLJIT_UPDATE_WX_FLAGS(inst, inst + (!bl ? 1 : 2), 0);
519 }
520 inst[0] = mov_pc;
521 if (!bl) {
522 if (flush_cache) {
523 SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1);
524 inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
525 SLJIT_CACHE_FLUSH(inst, inst + 1);
526 }
527 } else {
528 inst[1] = BLX | RM(TMP_REG1);
529 if (flush_cache) {
530 SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1);
531 inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
532 SLJIT_CACHE_FLUSH(inst, inst + 2);
533 }
534 }
535 }
536
537 if (flush_cache) {
538 SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 0);
539 }
540
541 *ptr = new_addr;
542
543 if (flush_cache) {
544 SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1);
545 }
546 }
547 #else
548 sljit_uw *inst = (sljit_uw*)jump_ptr;
549
550 SLJIT_UNUSED_ARG(executable_offset);
551
552 SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT);
553
554 if (flush_cache) {
555 SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0);
556 }
557
558 inst[0] = MOVW | (inst[0] & 0xf000) | ((new_addr << 4) & 0xf0000) | (new_addr & 0xfff);
559 inst[1] = MOVT | (inst[1] & 0xf000) | ((new_addr >> 12) & 0xf0000) | ((new_addr >> 16) & 0xfff);
560
561 if (flush_cache) {
562 SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1);
563 inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
564 SLJIT_CACHE_FLUSH(inst, inst + 2);
565 }
566 #endif
567 }
568
569 static sljit_uw get_imm(sljit_uw imm);
570 static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm);
571 static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg);
572
inline_set_const(sljit_uw addr,sljit_sw executable_offset,sljit_uw new_constant,sljit_s32 flush_cache)573 static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw executable_offset, sljit_uw new_constant, sljit_s32 flush_cache)
574 {
575 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
576 sljit_uw *ptr = (sljit_uw*)addr;
577 sljit_uw *inst = (sljit_uw*)ptr[0];
578 sljit_uw ldr_literal = ptr[1];
579 sljit_uw src2;
580
581 SLJIT_UNUSED_ARG(executable_offset);
582
583 src2 = get_imm(new_constant);
584 if (src2) {
585 if (flush_cache) {
586 SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 0);
587 }
588
589 *inst = 0xe3a00000 | (ldr_literal & 0xf000) | src2;
590
591 if (flush_cache) {
592 SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1);
593 inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
594 SLJIT_CACHE_FLUSH(inst, inst + 1);
595 }
596 return;
597 }
598
599 src2 = get_imm(~new_constant);
600 if (src2) {
601 if (flush_cache) {
602 SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 0);
603 }
604
605 *inst = 0xe3e00000 | (ldr_literal & 0xf000) | src2;
606
607 if (flush_cache) {
608 SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1);
609 inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
610 SLJIT_CACHE_FLUSH(inst, inst + 1);
611 }
612 return;
613 }
614
615 if (ldr_literal & (1 << 23))
616 ptr = inst + ((ldr_literal & 0xfff) >> 2) + 2;
617 else
618 ptr = inst + 1;
619
620 if (*inst != ldr_literal) {
621 if (flush_cache) {
622 SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 0);
623 }
624
625 *inst = ldr_literal;
626
627 if (flush_cache) {
628 SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1);
629 inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
630 SLJIT_CACHE_FLUSH(inst, inst + 1);
631 }
632 }
633
634 if (flush_cache) {
635 SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 0);
636 }
637
638 *ptr = new_constant;
639
640 if (flush_cache) {
641 SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1);
642 }
643 #else
644 sljit_uw *inst = (sljit_uw*)addr;
645
646 SLJIT_UNUSED_ARG(executable_offset);
647
648 SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT);
649
650 if (flush_cache) {
651 SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0);
652 }
653
654 inst[0] = MOVW | (inst[0] & 0xf000) | ((new_constant << 4) & 0xf0000) | (new_constant & 0xfff);
655 inst[1] = MOVT | (inst[1] & 0xf000) | ((new_constant >> 12) & 0xf0000) | ((new_constant >> 16) & 0xfff);
656
657 if (flush_cache) {
658 SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1);
659 inst = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
660 SLJIT_CACHE_FLUSH(inst, inst + 2);
661 }
662 #endif
663 }
664
sljit_generate_code(struct sljit_compiler * compiler)665 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
666 {
667 struct sljit_memory_fragment *buf;
668 sljit_uw *code;
669 sljit_uw *code_ptr;
670 sljit_uw *buf_ptr;
671 sljit_uw *buf_end;
672 sljit_uw size;
673 sljit_uw word_count;
674 sljit_uw next_addr;
675 sljit_sw executable_offset;
676 sljit_uw addr;
677 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
678 sljit_uw cpool_size;
679 sljit_uw cpool_skip_alignment;
680 sljit_uw cpool_current_index;
681 sljit_uw *cpool_start_address;
682 sljit_uw *last_pc_patch;
683 struct future_patch *first_patch;
684 #endif
685
686 struct sljit_label *label;
687 struct sljit_jump *jump;
688 struct sljit_const *const_;
689 struct sljit_put_label *put_label;
690
691 CHECK_ERROR_PTR();
692 CHECK_PTR(check_sljit_generate_code(compiler));
693 reverse_buf(compiler);
694
695 /* Second code generation pass. */
696 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
697 size = compiler->size + (compiler->patches << 1);
698 if (compiler->cpool_fill > 0)
699 size += compiler->cpool_fill + CONST_POOL_ALIGNMENT - 1;
700 #else
701 size = compiler->size;
702 #endif
703 code = (sljit_uw*)SLJIT_MALLOC_EXEC(size * sizeof(sljit_uw), compiler->exec_allocator_data);
704 PTR_FAIL_WITH_EXEC_IF(code);
705 buf = compiler->buf;
706
707 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
708 cpool_size = 0;
709 cpool_skip_alignment = 0;
710 cpool_current_index = 0;
711 cpool_start_address = NULL;
712 first_patch = NULL;
713 last_pc_patch = code;
714 #endif
715
716 code_ptr = code;
717 word_count = 0;
718 next_addr = 1;
719 executable_offset = SLJIT_EXEC_OFFSET(code);
720
721 label = compiler->labels;
722 jump = compiler->jumps;
723 const_ = compiler->consts;
724 put_label = compiler->put_labels;
725
726 if (label && label->size == 0) {
727 label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
728 label = label->next;
729 }
730
731 do {
732 buf_ptr = (sljit_uw*)buf->memory;
733 buf_end = buf_ptr + (buf->used_size >> 2);
734 do {
735 word_count++;
736 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
737 if (cpool_size > 0) {
738 if (cpool_skip_alignment > 0) {
739 buf_ptr++;
740 cpool_skip_alignment--;
741 }
742 else {
743 if (SLJIT_UNLIKELY(resolve_const_pool_index(compiler, &first_patch, cpool_current_index, cpool_start_address, buf_ptr))) {
744 SLJIT_FREE_EXEC(code, compiler->exec_allocator_data);
745 compiler->error = SLJIT_ERR_ALLOC_FAILED;
746 return NULL;
747 }
748 buf_ptr++;
749 if (++cpool_current_index >= cpool_size) {
750 SLJIT_ASSERT(!first_patch);
751 cpool_size = 0;
752 if (label && label->size == word_count) {
753 /* Points after the current instruction. */
754 label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
755 label->size = (sljit_uw)(code_ptr - code);
756 label = label->next;
757
758 next_addr = compute_next_addr(label, jump, const_, put_label);
759 }
760 }
761 }
762 }
763 else if ((*buf_ptr & 0xff000000) != PUSH_POOL) {
764 #endif
765 *code_ptr = *buf_ptr++;
766 if (next_addr == word_count) {
767 SLJIT_ASSERT(!label || label->size >= word_count);
768 SLJIT_ASSERT(!jump || jump->addr >= word_count);
769 SLJIT_ASSERT(!const_ || const_->addr >= word_count);
770 SLJIT_ASSERT(!put_label || put_label->addr >= word_count);
771
772 /* These structures are ordered by their address. */
773 if (jump && jump->addr == word_count) {
774 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
775 if (detect_jump_type(jump, code_ptr, code, executable_offset))
776 code_ptr--;
777 jump->addr = (sljit_uw)code_ptr;
778 #else
779 jump->addr = (sljit_uw)(code_ptr - 2);
780 if (detect_jump_type(jump, code_ptr, code, executable_offset))
781 code_ptr -= 2;
782 #endif
783 jump = jump->next;
784 }
785 if (label && label->size == word_count) {
786 /* code_ptr can be affected above. */
787 label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr + 1, executable_offset);
788 label->size = (sljit_uw)((code_ptr + 1) - code);
789 label = label->next;
790 }
791 if (const_ && const_->addr == word_count) {
792 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
793 const_->addr = (sljit_uw)code_ptr;
794 #else
795 const_->addr = (sljit_uw)(code_ptr - 1);
796 #endif
797 const_ = const_->next;
798 }
799 if (put_label && put_label->addr == word_count) {
800 SLJIT_ASSERT(put_label->label);
801 put_label->addr = (sljit_uw)code_ptr;
802 put_label = put_label->next;
803 }
804 next_addr = compute_next_addr(label, jump, const_, put_label);
805 }
806 code_ptr++;
807 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
808 }
809 else {
810 /* Fortunately, no need to shift. */
811 cpool_size = *buf_ptr++ & ~PUSH_POOL;
812 SLJIT_ASSERT(cpool_size > 0);
813 cpool_start_address = ALIGN_INSTRUCTION(code_ptr + 1);
814 cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, cpool_size);
815 if (cpool_current_index > 0) {
816 /* Unconditional branch. */
817 *code_ptr = B | (((sljit_uw)(cpool_start_address - code_ptr) + cpool_current_index - 2) & ~PUSH_POOL);
818 code_ptr = (sljit_uw*)(cpool_start_address + cpool_current_index);
819 }
820 cpool_skip_alignment = CONST_POOL_ALIGNMENT - 1;
821 cpool_current_index = 0;
822 last_pc_patch = code_ptr;
823 }
824 #endif
825 } while (buf_ptr < buf_end);
826 buf = buf->next;
827 } while (buf);
828
829 SLJIT_ASSERT(!label);
830 SLJIT_ASSERT(!jump);
831 SLJIT_ASSERT(!const_);
832 SLJIT_ASSERT(!put_label);
833
834 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
835 SLJIT_ASSERT(cpool_size == 0);
836 if (compiler->cpool_fill > 0) {
837 cpool_start_address = ALIGN_INSTRUCTION(code_ptr);
838 cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, compiler->cpool_fill);
839 if (cpool_current_index > 0)
840 code_ptr = (sljit_uw*)(cpool_start_address + cpool_current_index);
841
842 buf_ptr = compiler->cpool;
843 buf_end = buf_ptr + compiler->cpool_fill;
844 cpool_current_index = 0;
845 while (buf_ptr < buf_end) {
846 if (SLJIT_UNLIKELY(resolve_const_pool_index(compiler, &first_patch, cpool_current_index, cpool_start_address, buf_ptr))) {
847 SLJIT_FREE_EXEC(code, compiler->exec_allocator_data);
848 compiler->error = SLJIT_ERR_ALLOC_FAILED;
849 return NULL;
850 }
851 buf_ptr++;
852 cpool_current_index++;
853 }
854 SLJIT_ASSERT(!first_patch);
855 }
856 #endif
857
858 jump = compiler->jumps;
859 while (jump) {
860 buf_ptr = (sljit_uw *)jump->addr;
861
862 if (jump->flags & PATCH_B) {
863 addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset);
864 if (!(jump->flags & JUMP_ADDR)) {
865 SLJIT_ASSERT(jump->flags & JUMP_LABEL);
866 SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - addr) <= 0x01ffffff && (sljit_sw)(jump->u.label->addr - addr) >= -0x02000000);
867 *buf_ptr |= ((jump->u.label->addr - addr) >> 2) & 0x00ffffff;
868 }
869 else {
870 SLJIT_ASSERT((sljit_sw)(jump->u.target - addr) <= 0x01ffffff && (sljit_sw)(jump->u.target - addr) >= -0x02000000);
871 *buf_ptr |= ((jump->u.target - addr) >> 2) & 0x00ffffff;
872 }
873 }
874 else if (jump->flags & SLJIT_REWRITABLE_JUMP) {
875 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
876 jump->addr = (sljit_uw)code_ptr;
877 code_ptr[0] = (sljit_uw)buf_ptr;
878 code_ptr[1] = *buf_ptr;
879 inline_set_jump_addr((sljit_uw)code_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
880 code_ptr += 2;
881 #else
882 inline_set_jump_addr((sljit_uw)buf_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
883 #endif
884 }
885 else {
886 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
887 if (jump->flags & IS_BL)
888 buf_ptr--;
889 if (*buf_ptr & (1 << 23))
890 buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2;
891 else
892 buf_ptr += 1;
893 *buf_ptr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
894 #else
895 inline_set_jump_addr((sljit_uw)buf_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
896 #endif
897 }
898 jump = jump->next;
899 }
900
901 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
902 const_ = compiler->consts;
903 while (const_) {
904 buf_ptr = (sljit_uw*)const_->addr;
905 const_->addr = (sljit_uw)code_ptr;
906
907 code_ptr[0] = (sljit_uw)buf_ptr;
908 code_ptr[1] = *buf_ptr;
909 if (*buf_ptr & (1 << 23))
910 buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2;
911 else
912 buf_ptr += 1;
913 /* Set the value again (can be a simple constant). */
914 inline_set_const((sljit_uw)code_ptr, executable_offset, *buf_ptr, 0);
915 code_ptr += 2;
916
917 const_ = const_->next;
918 }
919 #endif
920
921 put_label = compiler->put_labels;
922 while (put_label) {
923 addr = put_label->label->addr;
924 buf_ptr = (sljit_uw*)put_label->addr;
925
926 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
927 SLJIT_ASSERT((buf_ptr[0] & 0xffff0000) == 0xe59f0000);
928 buf_ptr[((buf_ptr[0] & 0xfff) >> 2) + 2] = addr;
929 #else
930 SLJIT_ASSERT((buf_ptr[-1] & 0xfff00000) == MOVW && (buf_ptr[0] & 0xfff00000) == MOVT);
931 buf_ptr[-1] |= ((addr << 4) & 0xf0000) | (addr & 0xfff);
932 buf_ptr[0] |= ((addr >> 12) & 0xf0000) | ((addr >> 16) & 0xfff);
933 #endif
934 put_label = put_label->next;
935 }
936
937 SLJIT_ASSERT(code_ptr - code <= (sljit_s32)size);
938
939 compiler->error = SLJIT_ERR_COMPILED;
940 compiler->executable_offset = executable_offset;
941 compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_uw);
942
943 code = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
944 code_ptr = (sljit_uw *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
945
946 SLJIT_CACHE_FLUSH(code, code_ptr);
947 SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
948 return code;
949 }
950
sljit_has_cpu_feature(sljit_s32 feature_type)951 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
952 {
953 switch (feature_type) {
954 case SLJIT_HAS_FPU:
955 #ifdef SLJIT_IS_FPU_AVAILABLE
956 return SLJIT_IS_FPU_AVAILABLE;
957 #else
958 /* Available by default. */
959 return 1;
960 #endif
961
962 case SLJIT_HAS_CLZ:
963 case SLJIT_HAS_ROT:
964 case SLJIT_HAS_CMOV:
965 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
966 case SLJIT_HAS_CTZ:
967 case SLJIT_HAS_PREFETCH:
968 #endif
969 return 1;
970
971 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
972 case SLJIT_HAS_CTZ:
973 return 2;
974 #endif
975
976 default:
977 return 0;
978 }
979 }
980
981 /* --------------------------------------------------------------------- */
982 /* Entry, exit */
983 /* --------------------------------------------------------------------- */
984
985 /* Creates an index in data_transfer_insts array. */
986 #define WORD_SIZE 0x00
987 #define BYTE_SIZE 0x01
988 #define HALF_SIZE 0x02
989 #define PRELOAD 0x03
990 #define SIGNED 0x04
991 #define LOAD_DATA 0x08
992
993 /* Flag bits for emit_op. */
994 #define ALLOW_IMM 0x10
995 #define ALLOW_INV_IMM 0x20
996 #define ALLOW_ANY_IMM (ALLOW_IMM | ALLOW_INV_IMM)
997 #define ALLOW_NEG_IMM 0x40
998
999 /* s/l - store/load (1 bit)
1000 u/s - signed/unsigned (1 bit)
1001 w/b/h/N - word/byte/half/NOT allowed (2 bit)
1002 Storing signed and unsigned values are the same operations. */
1003
1004 static const sljit_uw data_transfer_insts[16] = {
1005 /* s u w */ 0xe5000000 /* str */,
1006 /* s u b */ 0xe5400000 /* strb */,
1007 /* s u h */ 0xe10000b0 /* strh */,
1008 /* s u N */ 0x00000000 /* not allowed */,
1009 /* s s w */ 0xe5000000 /* str */,
1010 /* s s b */ 0xe5400000 /* strb */,
1011 /* s s h */ 0xe10000b0 /* strh */,
1012 /* s s N */ 0x00000000 /* not allowed */,
1013
1014 /* l u w */ 0xe5100000 /* ldr */,
1015 /* l u b */ 0xe5500000 /* ldrb */,
1016 /* l u h */ 0xe11000b0 /* ldrh */,
1017 /* l u p */ 0xf5500000 /* preload */,
1018 /* l s w */ 0xe5100000 /* ldr */,
1019 /* l s b */ 0xe11000d0 /* ldrsb */,
1020 /* l s h */ 0xe11000f0 /* ldrsh */,
1021 /* l s N */ 0x00000000 /* not allowed */,
1022 };
1023
1024 #define EMIT_DATA_TRANSFER(type, add, target_reg, base_reg, arg) \
1025 (data_transfer_insts[(type) & 0xf] | ((add) << 23) | RD(target_reg) | RN(base_reg) | (sljit_uw)(arg))
1026
1027 /* Normal ldr/str instruction.
1028 Type2: ldrsb, ldrh, ldrsh */
1029 #define IS_TYPE1_TRANSFER(type) \
1030 (data_transfer_insts[(type) & 0xf] & 0x04000000)
1031 #define TYPE2_TRANSFER_IMM(imm) \
1032 (((imm) & 0xf) | (((imm) & 0xf0) << 4) | (1 << 22))
1033
1034 #define EMIT_FPU_OPERATION(opcode, mode, dst, src1, src2) \
1035 ((sljit_uw)(opcode) | (sljit_uw)(mode) | VD(dst) | VM(src1) | VN(src2))
1036
1037 /* Flags for emit_op: */
1038 /* Arguments are swapped. */
1039 #define ARGS_SWAPPED 0x01
1040 /* Inverted immediate. */
1041 #define INV_IMM 0x02
1042 /* Source and destination is register. */
1043 #define MOVE_REG_CONV 0x04
1044 /* Unused return value. */
1045 #define UNUSED_RETURN 0x08
1046 /* SET_FLAGS must be (1 << 20) as it is also the value of S bit (can be used for optimization). */
1047 #define SET_FLAGS (1 << 20)
1048 /* dst: reg
1049 src1: reg
1050 src2: reg or imm (if allowed)
1051 SRC2_IMM must be (1 << 25) as it is also the value of I bit (can be used for optimization). */
1052 #define SRC2_IMM (1 << 25)
1053
1054 static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 inp_flags,
1055 sljit_s32 dst, sljit_sw dstw,
1056 sljit_s32 src1, sljit_sw src1w,
1057 sljit_s32 src2, sljit_sw src2w);
1058
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1059 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
1060 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1061 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1062 {
1063 sljit_uw imm, offset;
1064 sljit_s32 i, tmp, size, word_arg_count;
1065 sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
1066 #ifdef __SOFTFP__
1067 sljit_u32 float_arg_count;
1068 #else
1069 sljit_u32 old_offset, f32_offset;
1070 sljit_u32 remap[3];
1071 sljit_u32 *remap_ptr = remap;
1072 #endif
1073
1074 CHECK_ERROR();
1075 CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1076 set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1077
1078 imm = 0;
1079
1080 tmp = SLJIT_S0 - saveds;
1081 for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--)
1082 imm |= (sljit_uw)1 << reg_map[i];
1083
1084 for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--)
1085 imm |= (sljit_uw)1 << reg_map[i];
1086
1087 SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
1088
1089 /* Push saved and temporary registers
1090 multiple registers: stmdb sp!, {..., lr}
1091 single register: str reg, [sp, #-4]! */
1092 if (imm != 0)
1093 FAIL_IF(push_inst(compiler, PUSH | (1 << 14) | imm));
1094 else
1095 FAIL_IF(push_inst(compiler, 0xe52d0004 | RD(TMP_REG2)));
1096
1097 /* Stack must be aligned to 8 bytes: */
1098 size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1);
1099
1100 if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
1101 if ((size & SSIZE_OF(sw)) != 0) {
1102 FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | sizeof(sljit_sw)));
1103 size += SSIZE_OF(sw);
1104 }
1105
1106 if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) {
1107 FAIL_IF(push_inst(compiler, VPUSH | VD(SLJIT_FS0) | ((sljit_uw)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1)));
1108 } else {
1109 if (fsaveds > 0)
1110 FAIL_IF(push_inst(compiler, VPUSH | VD(SLJIT_FS0) | ((sljit_uw)fsaveds << 1)));
1111 if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG)
1112 FAIL_IF(push_inst(compiler, VPUSH | VD(fscratches) | ((sljit_uw)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1)));
1113 }
1114 }
1115
1116 local_size = ((size + local_size + 0x7) & ~0x7) - size;
1117 compiler->local_size = local_size;
1118
1119 if (options & SLJIT_ENTER_REG_ARG)
1120 arg_types = 0;
1121
1122 arg_types >>= SLJIT_ARG_SHIFT;
1123 word_arg_count = 0;
1124 saved_arg_count = 0;
1125 #ifdef __SOFTFP__
1126 SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start);
1127
1128 offset = 0;
1129 float_arg_count = 0;
1130
1131 while (arg_types) {
1132 switch (arg_types & SLJIT_ARG_MASK) {
1133 case SLJIT_ARG_TYPE_F64:
1134 if (offset & 0x7)
1135 offset += sizeof(sljit_sw);
1136
1137 if (offset < 4 * sizeof(sljit_sw))
1138 FAIL_IF(push_inst(compiler, VMOV2 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count));
1139 else
1140 FAIL_IF(push_inst(compiler, VLDR_F32 | 0x800100 | RN(SLJIT_SP)
1141 | (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2)));
1142 float_arg_count++;
1143 offset += sizeof(sljit_f64) - sizeof(sljit_sw);
1144 break;
1145 case SLJIT_ARG_TYPE_F32:
1146 if (offset < 4 * sizeof(sljit_sw))
1147 FAIL_IF(push_inst(compiler, VMOV | (float_arg_count << 16) | (offset << 10)));
1148 else
1149 FAIL_IF(push_inst(compiler, VLDR_F32 | 0x800000 | RN(SLJIT_SP)
1150 | (float_arg_count << 12) | ((offset + (sljit_uw)size - 4 * sizeof(sljit_sw)) >> 2)));
1151 float_arg_count++;
1152 break;
1153 default:
1154 word_arg_count++;
1155
1156 if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
1157 tmp = SLJIT_S0 - saved_arg_count;
1158 saved_arg_count++;
1159 } else if (word_arg_count - 1 != (sljit_s32)(offset >> 2))
1160 tmp = word_arg_count;
1161 else
1162 break;
1163
1164 if (offset < 4 * sizeof(sljit_sw))
1165 FAIL_IF(push_inst(compiler, MOV | RD(tmp) | (offset >> 2)));
1166 else
1167 FAIL_IF(push_inst(compiler, LDR | 0x800000 | RN(SLJIT_SP) | RD(tmp) | (offset + (sljit_uw)size - 4 * sizeof(sljit_sw))));
1168 break;
1169 }
1170
1171 offset += sizeof(sljit_sw);
1172 arg_types >>= SLJIT_ARG_SHIFT;
1173 }
1174
1175 compiler->args_size = offset;
1176 #else
1177 offset = SLJIT_FR0;
1178 old_offset = SLJIT_FR0;
1179 f32_offset = 0;
1180
1181 while (arg_types) {
1182 switch (arg_types & SLJIT_ARG_MASK) {
1183 case SLJIT_ARG_TYPE_F64:
1184 if (offset != old_offset)
1185 *remap_ptr++ = EMIT_FPU_OPERATION(VMOV_F32, SLJIT_32, offset, old_offset, 0);
1186 old_offset++;
1187 offset++;
1188 break;
1189 case SLJIT_ARG_TYPE_F32:
1190 if (f32_offset != 0) {
1191 *remap_ptr++ = EMIT_FPU_OPERATION(VMOV_F32, 0x20, offset, f32_offset, 0);
1192 f32_offset = 0;
1193 } else {
1194 if (offset != old_offset)
1195 *remap_ptr++ = EMIT_FPU_OPERATION(VMOV_F32, 0, offset, old_offset, 0);
1196 f32_offset = old_offset;
1197 old_offset++;
1198 }
1199 offset++;
1200 break;
1201 default:
1202 if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
1203 FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S0 - saved_arg_count) | RM(SLJIT_R0 + word_arg_count)));
1204 saved_arg_count++;
1205 }
1206
1207 word_arg_count++;
1208 break;
1209 }
1210 arg_types >>= SLJIT_ARG_SHIFT;
1211 }
1212
1213 SLJIT_ASSERT((sljit_uw)(remap_ptr - remap) <= sizeof(remap));
1214
1215 while (remap_ptr > remap)
1216 FAIL_IF(push_inst(compiler, *(--remap_ptr)));
1217 #endif
1218
1219 if (local_size > 0)
1220 FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size));
1221
1222 return SLJIT_SUCCESS;
1223 }
1224
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1225 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
1226 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1227 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1228 {
1229 sljit_s32 size;
1230
1231 CHECK_ERROR();
1232 CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1233 set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1234
1235 size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1);
1236
1237 if ((size & SSIZE_OF(sw)) != 0 && (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG))
1238 size += SSIZE_OF(sw);
1239
1240 compiler->local_size = ((size + local_size + 0x7) & ~0x7) - size;
1241 return SLJIT_SUCCESS;
1242 }
1243
emit_add_sp(struct sljit_compiler * compiler,sljit_uw imm)1244 static sljit_s32 emit_add_sp(struct sljit_compiler *compiler, sljit_uw imm)
1245 {
1246 sljit_uw imm2 = get_imm(imm);
1247
1248 if (imm2 == 0) {
1249 imm2 = (imm & ~(sljit_uw)0x3ff) >> 10;
1250 imm = (imm & 0x3ff) >> 2;
1251
1252 FAIL_IF(push_inst(compiler, ADD | SRC2_IMM | RD(SLJIT_SP) | RN(SLJIT_SP) | 0xb00 | imm2));
1253 return push_inst(compiler, ADD | SRC2_IMM | RD(SLJIT_SP) | RN(SLJIT_SP) | 0xf00 | (imm & 0xff));
1254 }
1255
1256 return push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | imm2);
1257 }
1258
emit_stack_frame_release(struct sljit_compiler * compiler,sljit_s32 frame_size)1259 static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 frame_size)
1260 {
1261 sljit_s32 local_size, fscratches, fsaveds, i, tmp;
1262 sljit_s32 restored_reg = 0;
1263 sljit_s32 lr_dst = TMP_PC;
1264 sljit_uw reg_list = 0;
1265
1266 SLJIT_ASSERT(reg_map[TMP_REG2] == 14 && frame_size <= 128);
1267
1268 local_size = compiler->local_size;
1269 fscratches = compiler->fscratches;
1270 fsaveds = compiler->fsaveds;
1271
1272 if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
1273 if (local_size > 0)
1274 FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size));
1275
1276 if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) {
1277 FAIL_IF(push_inst(compiler, VPOP | VD(SLJIT_FS0) | ((sljit_uw)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1)));
1278 } else {
1279 if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG)
1280 FAIL_IF(push_inst(compiler, VPOP | VD(fscratches) | ((sljit_uw)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1)));
1281 if (fsaveds > 0)
1282 FAIL_IF(push_inst(compiler, VPOP | VD(SLJIT_FS0) | ((sljit_uw)fsaveds << 1)));
1283 }
1284
1285 local_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1) & 0x7;
1286 }
1287
1288 if (frame_size < 0) {
1289 lr_dst = TMP_REG2;
1290 frame_size = 0;
1291 } else if (frame_size > 0) {
1292 SLJIT_ASSERT(frame_size == 1 || (frame_size & 0x7) == 0);
1293 lr_dst = 0;
1294 frame_size &= ~0x7;
1295 }
1296
1297 if (lr_dst != 0)
1298 reg_list |= (sljit_uw)1 << reg_map[lr_dst];
1299
1300 tmp = SLJIT_S0 - compiler->saveds;
1301 i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
1302 if (tmp < i) {
1303 restored_reg = i;
1304 do {
1305 reg_list |= (sljit_uw)1 << reg_map[i];
1306 } while (--i > tmp);
1307 }
1308
1309 i = compiler->scratches;
1310 if (i >= SLJIT_FIRST_SAVED_REG) {
1311 restored_reg = i;
1312 do {
1313 reg_list |= (sljit_uw)1 << reg_map[i];
1314 } while (--i >= SLJIT_FIRST_SAVED_REG);
1315 }
1316
1317 if (lr_dst == TMP_REG2 && reg_list == 0) {
1318 restored_reg = TMP_REG2;
1319 lr_dst = 0;
1320 }
1321
1322 if (lr_dst == 0 && (reg_list & (reg_list - 1)) == 0) {
1323 /* The local_size does not include the saved registers. */
1324 tmp = 0;
1325 if (reg_list != 0) {
1326 tmp = 2;
1327 if (local_size <= 0xfff) {
1328 if (local_size == 0) {
1329 SLJIT_ASSERT(restored_reg != TMP_REG2);
1330 if (frame_size == 0)
1331 return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(restored_reg) | 0x800008);
1332 if (frame_size > 2 * SSIZE_OF(sw))
1333 return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(restored_reg) | (sljit_uw)(frame_size - (2 * SSIZE_OF(sw))));
1334 }
1335
1336 FAIL_IF(push_inst(compiler, LDR | 0x800000 | RN(SLJIT_SP) | RD(restored_reg) | (sljit_uw)local_size));
1337 tmp = 1;
1338 } else if (frame_size == 0) {
1339 frame_size = (restored_reg == TMP_REG2) ? SSIZE_OF(sw) : 2 * SSIZE_OF(sw);
1340 tmp = 3;
1341 }
1342
1343 /* Place for the saved register. */
1344 if (restored_reg != TMP_REG2)
1345 local_size += SSIZE_OF(sw);
1346 }
1347
1348 /* Place for the lr register. */
1349 local_size += SSIZE_OF(sw);
1350
1351 if (frame_size > local_size)
1352 FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 25) | (sljit_uw)(frame_size - local_size)));
1353 else if (frame_size < local_size)
1354 FAIL_IF(emit_add_sp(compiler, (sljit_uw)(local_size - frame_size)));
1355
1356 if (tmp <= 1)
1357 return SLJIT_SUCCESS;
1358
1359 if (tmp == 2) {
1360 frame_size -= SSIZE_OF(sw);
1361 if (restored_reg != TMP_REG2)
1362 frame_size -= SSIZE_OF(sw);
1363
1364 return push_inst(compiler, LDR | 0x800000 | RN(SLJIT_SP) | RD(restored_reg) | (sljit_uw)frame_size);
1365 }
1366
1367 tmp = (restored_reg == TMP_REG2) ? 0x800004 : 0x800008;
1368 return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(restored_reg) | (sljit_uw)tmp);
1369 }
1370
1371 if (local_size > 0)
1372 FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size));
1373
1374 /* Pop saved and temporary registers
1375 multiple registers: ldmia sp!, {...}
1376 single register: ldr reg, [sp], #4 */
1377 if ((reg_list & (reg_list - 1)) == 0) {
1378 SLJIT_ASSERT(lr_dst != 0);
1379 SLJIT_ASSERT(reg_list == (sljit_uw)1 << reg_map[lr_dst]);
1380
1381 return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(lr_dst) | 0x800004);
1382 }
1383
1384 FAIL_IF(push_inst(compiler, POP | reg_list));
1385
1386 if (frame_size > 0)
1387 return push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 25) | ((sljit_uw)frame_size - sizeof(sljit_sw)));
1388
1389 if (lr_dst != 0)
1390 return SLJIT_SUCCESS;
1391
1392 return push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 25) | sizeof(sljit_sw));
1393 }
1394
sljit_emit_return_void(struct sljit_compiler * compiler)1395 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
1396 {
1397 CHECK_ERROR();
1398 CHECK(check_sljit_emit_return_void(compiler));
1399
1400 return emit_stack_frame_release(compiler, 0);
1401 }
1402
sljit_emit_return_to(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)1403 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
1404 sljit_s32 src, sljit_sw srcw)
1405 {
1406 CHECK_ERROR();
1407 CHECK(check_sljit_emit_return_to(compiler, src, srcw));
1408
1409 if (src & SLJIT_MEM) {
1410 FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
1411 src = TMP_REG1;
1412 srcw = 0;
1413 } else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
1414 FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src)));
1415 src = TMP_REG1;
1416 srcw = 0;
1417 }
1418
1419 FAIL_IF(emit_stack_frame_release(compiler, 1));
1420
1421 SLJIT_SKIP_CHECKS(compiler);
1422 return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1423 }
1424
1425 /* --------------------------------------------------------------------- */
1426 /* Operators */
1427 /* --------------------------------------------------------------------- */
1428
emit_single_op(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 flags,sljit_uw dst,sljit_uw src1,sljit_uw src2)1429 static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
1430 sljit_uw dst, sljit_uw src1, sljit_uw src2)
1431 {
1432 sljit_s32 is_masked;
1433 sljit_uw shift_type;
1434
1435 switch (GET_OPCODE(op)) {
1436 case SLJIT_MOV:
1437 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
1438 if (dst != src2) {
1439 if (src2 & SRC2_IMM) {
1440 return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2);
1441 }
1442 return push_inst(compiler, MOV | RD(dst) | RM(src2));
1443 }
1444 return SLJIT_SUCCESS;
1445
1446 case SLJIT_MOV_U8:
1447 case SLJIT_MOV_S8:
1448 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
1449 if (flags & MOVE_REG_CONV) {
1450 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
1451 if (op == SLJIT_MOV_U8)
1452 return push_inst(compiler, AND | RD(dst) | RN(src2) | SRC2_IMM | 0xff);
1453 FAIL_IF(push_inst(compiler, MOV | RD(dst) | (24 << 7) | RM(src2)));
1454 return push_inst(compiler, MOV | RD(dst) | (24 << 7) | (op == SLJIT_MOV_U8 ? 0x20 : 0x40) | RM(dst));
1455 #else
1456 return push_inst(compiler, (op == SLJIT_MOV_U8 ? UXTB : SXTB) | RD(dst) | RM(src2));
1457 #endif
1458 }
1459 else if (dst != src2) {
1460 SLJIT_ASSERT(src2 & SRC2_IMM);
1461 return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2);
1462 }
1463 return SLJIT_SUCCESS;
1464
1465 case SLJIT_MOV_U16:
1466 case SLJIT_MOV_S16:
1467 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
1468 if (flags & MOVE_REG_CONV) {
1469 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
1470 FAIL_IF(push_inst(compiler, MOV | RD(dst) | (16 << 7) | RM(src2)));
1471 return push_inst(compiler, MOV | RD(dst) | (16 << 7) | (op == SLJIT_MOV_U16 ? 0x20 : 0x40) | RM(dst));
1472 #else
1473 return push_inst(compiler, (op == SLJIT_MOV_U16 ? UXTH : SXTH) | RD(dst) | RM(src2));
1474 #endif
1475 }
1476 else if (dst != src2) {
1477 SLJIT_ASSERT(src2 & SRC2_IMM);
1478 return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2);
1479 }
1480 return SLJIT_SUCCESS;
1481
1482 case SLJIT_NOT:
1483 if (src2 & SRC2_IMM)
1484 return push_inst(compiler, ((flags & INV_IMM) ? MOV : MVN) | (flags & SET_FLAGS) | RD(dst) | src2);
1485
1486 return push_inst(compiler, MVN | (flags & SET_FLAGS) | RD(dst) | RM(src2));
1487
1488 case SLJIT_CLZ:
1489 SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM));
1490 FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(src2)));
1491 return SLJIT_SUCCESS;
1492
1493 case SLJIT_CTZ:
1494 SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM));
1495 SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
1496 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
1497 FAIL_IF(push_inst(compiler, RSB | SRC2_IMM | RD(TMP_REG1) | RN(src2) | 0));
1498 FAIL_IF(push_inst(compiler, AND | RD(TMP_REG2) | RN(src2) | RM(TMP_REG1)));
1499 FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(TMP_REG2)));
1500 FAIL_IF(push_inst(compiler, CMP | SET_FLAGS | SRC2_IMM | RN(dst) | 32));
1501 return push_inst(compiler, (EOR ^ 0xf0000000) | SRC2_IMM | RD(dst) | RN(dst) | 0x1f);
1502 #else /* !SLJIT_CONFIG_ARM_V5 */
1503 FAIL_IF(push_inst(compiler, RBIT | RD(dst) | RM(src2)));
1504 return push_inst(compiler, CLZ | RD(dst) | RM(dst));
1505 #endif /* SLJIT_CONFIG_ARM_V5 */
1506
1507 case SLJIT_ADD:
1508 SLJIT_ASSERT(!(flags & INV_IMM));
1509
1510 if ((flags & (UNUSED_RETURN | ARGS_SWAPPED)) == UNUSED_RETURN)
1511 return push_inst(compiler, CMN | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1512 return push_inst(compiler, ADD | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1513
1514 case SLJIT_ADDC:
1515 SLJIT_ASSERT(!(flags & INV_IMM));
1516 return push_inst(compiler, ADC | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1517
1518 case SLJIT_SUB:
1519 SLJIT_ASSERT(!(flags & INV_IMM));
1520
1521 if ((flags & (UNUSED_RETURN | ARGS_SWAPPED)) == UNUSED_RETURN)
1522 return push_inst(compiler, CMP | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1523
1524 return push_inst(compiler, (!(flags & ARGS_SWAPPED) ? SUB : RSB) | (flags & SET_FLAGS)
1525 | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1526
1527 case SLJIT_SUBC:
1528 SLJIT_ASSERT(!(flags & INV_IMM));
1529 return push_inst(compiler, (!(flags & ARGS_SWAPPED) ? SBC : RSC) | (flags & SET_FLAGS)
1530 | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1531
1532 case SLJIT_MUL:
1533 SLJIT_ASSERT(!(flags & INV_IMM));
1534 SLJIT_ASSERT(!(src2 & SRC2_IMM));
1535 compiler->status_flags_state = 0;
1536
1537 if (!HAS_FLAGS(op))
1538 return push_inst(compiler, MUL | RN(dst) | RM8(src2) | RM(src1));
1539
1540 FAIL_IF(push_inst(compiler, SMULL | RN(TMP_REG1) | RD(dst) | RM8(src2) | RM(src1)));
1541
1542 /* cmp TMP_REG1, dst asr #31. */
1543 return push_inst(compiler, CMP | SET_FLAGS | RN(TMP_REG1) | RM(dst) | 0xfc0);
1544
1545 case SLJIT_AND:
1546 if ((flags & (UNUSED_RETURN | INV_IMM)) == UNUSED_RETURN)
1547 return push_inst(compiler, TST | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1548 return push_inst(compiler, (!(flags & INV_IMM) ? AND : BIC) | (flags & SET_FLAGS)
1549 | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1550
1551 case SLJIT_OR:
1552 SLJIT_ASSERT(!(flags & INV_IMM));
1553 return push_inst(compiler, ORR | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1554
1555 case SLJIT_XOR:
1556 SLJIT_ASSERT(!(flags & INV_IMM));
1557 return push_inst(compiler, EOR | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1558
1559 case SLJIT_SHL:
1560 case SLJIT_MSHL:
1561 shift_type = 0;
1562 is_masked = GET_OPCODE(op) == SLJIT_MSHL;
1563 break;
1564
1565 case SLJIT_LSHR:
1566 case SLJIT_MLSHR:
1567 shift_type = 1;
1568 is_masked = GET_OPCODE(op) == SLJIT_MLSHR;
1569 break;
1570
1571 case SLJIT_ASHR:
1572 case SLJIT_MASHR:
1573 shift_type = 2;
1574 is_masked = GET_OPCODE(op) == SLJIT_MASHR;
1575 break;
1576
1577 case SLJIT_ROTL:
1578 if (compiler->shift_imm == 0x20) {
1579 FAIL_IF(push_inst(compiler, RSB | SRC2_IMM | RD(TMP_REG2) | RN(src2) | 0));
1580 src2 = TMP_REG2;
1581 } else
1582 compiler->shift_imm = (sljit_uw)(-(sljit_sw)compiler->shift_imm) & 0x1f;
1583 /* fallthrough */
1584
1585 case SLJIT_ROTR:
1586 shift_type = 3;
1587 is_masked = 0;
1588 break;
1589
1590 default:
1591 SLJIT_UNREACHABLE();
1592 return SLJIT_SUCCESS;
1593 }
1594
1595 SLJIT_ASSERT(!(flags & ARGS_SWAPPED) && !(flags & INV_IMM) && !(src2 & SRC2_IMM));
1596
1597 if (compiler->shift_imm != 0x20) {
1598 SLJIT_ASSERT(src1 == TMP_REG1);
1599
1600 if (compiler->shift_imm != 0)
1601 return push_inst(compiler, MOV | (flags & SET_FLAGS) |
1602 RD(dst) | (compiler->shift_imm << 7) | (shift_type << 5) | RM(src2));
1603 return push_inst(compiler, MOV | (flags & SET_FLAGS) | RD(dst) | RM(src2));
1604 }
1605
1606 SLJIT_ASSERT(src1 != TMP_REG2);
1607
1608 if (is_masked) {
1609 FAIL_IF(push_inst(compiler, AND | RD(TMP_REG2) | RN(src2) | SRC2_IMM | 0x1f));
1610 src2 = TMP_REG2;
1611 }
1612
1613 return push_inst(compiler, MOV | (flags & SET_FLAGS) | RD(dst)
1614 | RM8(src2) | (sljit_uw)(shift_type << 5) | 0x10 | RM(src1));
1615 }
1616
1617 #undef EMIT_SHIFT_INS_AND_RETURN
1618
1619 /* Tests whether the immediate can be stored in the 12 bit imm field.
1620 Returns with 0 if not possible. */
get_imm(sljit_uw imm)1621 static sljit_uw get_imm(sljit_uw imm)
1622 {
1623 sljit_u32 rol;
1624
1625 if (imm <= 0xff)
1626 return SRC2_IMM | imm;
1627
1628 if (!(imm & 0xff000000)) {
1629 imm <<= 8;
1630 rol = 8;
1631 }
1632 else {
1633 imm = (imm << 24) | (imm >> 8);
1634 rol = 0;
1635 }
1636
1637 if (!(imm & 0xff000000)) {
1638 imm <<= 8;
1639 rol += 4;
1640 }
1641
1642 if (!(imm & 0xf0000000)) {
1643 imm <<= 4;
1644 rol += 2;
1645 }
1646
1647 if (!(imm & 0xc0000000)) {
1648 imm <<= 2;
1649 rol += 1;
1650 }
1651
1652 if (!(imm & 0x00ffffff))
1653 return SRC2_IMM | (imm >> 24) | (rol << 8);
1654 else
1655 return 0;
1656 }
1657
1658 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
generate_int(struct sljit_compiler * compiler,sljit_s32 reg,sljit_uw imm,sljit_s32 positive)1659 static sljit_s32 generate_int(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm, sljit_s32 positive)
1660 {
1661 sljit_uw mask;
1662 sljit_uw imm1;
1663 sljit_uw imm2;
1664 sljit_uw rol;
1665
1666 /* Step1: Search a zero byte (8 continous zero bit). */
1667 mask = 0xff000000;
1668 rol = 8;
1669 while(1) {
1670 if (!(imm & mask)) {
1671 /* Rol imm by rol. */
1672 imm = (imm << rol) | (imm >> (32 - rol));
1673 /* Calculate arm rol. */
1674 rol = 4 + (rol >> 1);
1675 break;
1676 }
1677 rol += 2;
1678 mask >>= 2;
1679 if (mask & 0x3) {
1680 /* rol by 8. */
1681 imm = (imm << 8) | (imm >> 24);
1682 mask = 0xff00;
1683 rol = 24;
1684 while (1) {
1685 if (!(imm & mask)) {
1686 /* Rol imm by rol. */
1687 imm = (imm << rol) | (imm >> (32 - rol));
1688 /* Calculate arm rol. */
1689 rol = (rol >> 1) - 8;
1690 break;
1691 }
1692 rol += 2;
1693 mask >>= 2;
1694 if (mask & 0x3)
1695 return 0;
1696 }
1697 break;
1698 }
1699 }
1700
1701 /* The low 8 bit must be zero. */
1702 SLJIT_ASSERT(!(imm & 0xff));
1703
1704 if (!(imm & 0xff000000)) {
1705 imm1 = SRC2_IMM | ((imm >> 16) & 0xff) | (((rol + 4) & 0xf) << 8);
1706 imm2 = SRC2_IMM | ((imm >> 8) & 0xff) | (((rol + 8) & 0xf) << 8);
1707 }
1708 else if (imm & 0xc0000000) {
1709 imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8);
1710 imm <<= 8;
1711 rol += 4;
1712
1713 if (!(imm & 0xff000000)) {
1714 imm <<= 8;
1715 rol += 4;
1716 }
1717
1718 if (!(imm & 0xf0000000)) {
1719 imm <<= 4;
1720 rol += 2;
1721 }
1722
1723 if (!(imm & 0xc0000000)) {
1724 imm <<= 2;
1725 rol += 1;
1726 }
1727
1728 if (!(imm & 0x00ffffff))
1729 imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8);
1730 else
1731 return 0;
1732 }
1733 else {
1734 if (!(imm & 0xf0000000)) {
1735 imm <<= 4;
1736 rol += 2;
1737 }
1738
1739 if (!(imm & 0xc0000000)) {
1740 imm <<= 2;
1741 rol += 1;
1742 }
1743
1744 imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8);
1745 imm <<= 8;
1746 rol += 4;
1747
1748 if (!(imm & 0xf0000000)) {
1749 imm <<= 4;
1750 rol += 2;
1751 }
1752
1753 if (!(imm & 0xc0000000)) {
1754 imm <<= 2;
1755 rol += 1;
1756 }
1757
1758 if (!(imm & 0x00ffffff))
1759 imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8);
1760 else
1761 return 0;
1762 }
1763
1764 FAIL_IF(push_inst(compiler, (positive ? MOV : MVN) | RD(reg) | imm1));
1765 FAIL_IF(push_inst(compiler, (positive ? ORR : BIC) | RD(reg) | RN(reg) | imm2));
1766 return 1;
1767 }
1768 #endif
1769
load_immediate(struct sljit_compiler * compiler,sljit_s32 reg,sljit_uw imm)1770 static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm)
1771 {
1772 sljit_uw tmp;
1773
1774 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
1775 if (!(imm & ~(sljit_uw)0xffff))
1776 return push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff));
1777 #endif
1778
1779 /* Create imm by 1 inst. */
1780 tmp = get_imm(imm);
1781 if (tmp)
1782 return push_inst(compiler, MOV | RD(reg) | tmp);
1783
1784 tmp = get_imm(~imm);
1785 if (tmp)
1786 return push_inst(compiler, MVN | RD(reg) | tmp);
1787
1788 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
1789 /* Create imm by 2 inst. */
1790 FAIL_IF(generate_int(compiler, reg, imm, 1));
1791 FAIL_IF(generate_int(compiler, reg, ~imm, 0));
1792
1793 /* Load integer. */
1794 return push_inst_with_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, reg, TMP_PC, 0), imm);
1795 #else
1796 FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff)));
1797 if (imm <= 0xffff)
1798 return SLJIT_SUCCESS;
1799 return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | ((imm >> 16) & 0xfff));
1800 #endif
1801 }
1802
emit_op_mem(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw,sljit_s32 tmp_reg)1803 static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg,
1804 sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg)
1805 {
1806 sljit_uw imm, offset_reg, tmp;
1807 sljit_sw mask = IS_TYPE1_TRANSFER(flags) ? 0xfff : 0xff;
1808 sljit_sw sign = IS_TYPE1_TRANSFER(flags) ? 0x1000 : 0x100;
1809
1810 SLJIT_ASSERT(arg & SLJIT_MEM);
1811 SLJIT_ASSERT((arg & REG_MASK) != tmp_reg || (arg == SLJIT_MEM1(tmp_reg) && argw >= -mask && argw <= mask));
1812
1813 if (SLJIT_UNLIKELY(!(arg & REG_MASK))) {
1814 tmp = (sljit_uw)(argw & (sign | mask));
1815 tmp = (sljit_uw)((argw + (tmp <= (sljit_uw)sign ? 0 : sign)) & ~mask);
1816
1817 FAIL_IF(load_immediate(compiler, tmp_reg, tmp));
1818
1819 argw -= (sljit_sw)tmp;
1820 tmp = 1;
1821
1822 if (argw < 0) {
1823 argw = -argw;
1824 tmp = 0;
1825 }
1826
1827 return push_inst(compiler, EMIT_DATA_TRANSFER(flags, tmp, reg, tmp_reg,
1828 (mask == 0xff) ? TYPE2_TRANSFER_IMM(argw) : argw));
1829 }
1830
1831 if (arg & OFFS_REG_MASK) {
1832 offset_reg = OFFS_REG(arg);
1833 arg &= REG_MASK;
1834 argw &= 0x3;
1835
1836 if (argw != 0 && (mask == 0xff)) {
1837 FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | RM(offset_reg) | ((sljit_uw)argw << 7)));
1838 return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, tmp_reg, TYPE2_TRANSFER_IMM(0)));
1839 }
1840
1841 /* Bit 25: RM is offset. */
1842 return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg,
1843 RM(offset_reg) | (mask == 0xff ? 0 : (1 << 25)) | ((sljit_uw)argw << 7)));
1844 }
1845
1846 arg &= REG_MASK;
1847
1848 if (argw > mask) {
1849 tmp = (sljit_uw)(argw & (sign | mask));
1850 tmp = (sljit_uw)((argw + (tmp <= (sljit_uw)sign ? 0 : sign)) & ~mask);
1851 imm = get_imm(tmp);
1852
1853 if (imm) {
1854 FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | imm));
1855 argw -= (sljit_sw)tmp;
1856 arg = tmp_reg;
1857
1858 SLJIT_ASSERT(argw >= -mask && argw <= mask);
1859 }
1860 } else if (argw < -mask) {
1861 tmp = (sljit_uw)(-argw & (sign | mask));
1862 tmp = (sljit_uw)((-argw + (tmp <= (sljit_uw)sign ? 0 : sign)) & ~mask);
1863 imm = get_imm(tmp);
1864
1865 if (imm) {
1866 FAIL_IF(push_inst(compiler, SUB | RD(tmp_reg) | RN(arg) | imm));
1867 argw += (sljit_sw)tmp;
1868 arg = tmp_reg;
1869
1870 SLJIT_ASSERT(argw >= -mask && argw <= mask);
1871 }
1872 }
1873
1874 if (argw <= mask && argw >= -mask) {
1875 if (argw >= 0) {
1876 if (mask == 0xff)
1877 argw = TYPE2_TRANSFER_IMM(argw);
1878 return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg, argw));
1879 }
1880
1881 argw = -argw;
1882
1883 if (mask == 0xff)
1884 argw = TYPE2_TRANSFER_IMM(argw);
1885
1886 return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 0, reg, arg, argw));
1887 }
1888
1889 FAIL_IF(load_immediate(compiler, tmp_reg, (sljit_uw)argw));
1890 return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg,
1891 RM(tmp_reg) | (mask == 0xff ? 0 : (1 << 25))));
1892 }
1893
emit_op(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 inp_flags,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1894 static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 inp_flags,
1895 sljit_s32 dst, sljit_sw dstw,
1896 sljit_s32 src1, sljit_sw src1w,
1897 sljit_s32 src2, sljit_sw src2w)
1898 {
1899 /* src1 is reg or TMP_REG1
1900 src2 is reg, TMP_REG2, or imm
1901 result goes to TMP_REG2, so put result can use TMP_REG1. */
1902
1903 /* We prefers register and simple consts. */
1904 sljit_s32 dst_reg;
1905 sljit_s32 src1_reg;
1906 sljit_s32 src2_reg = 0;
1907 sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
1908 sljit_s32 neg_op = 0;
1909
1910 if (dst == TMP_REG2)
1911 flags |= UNUSED_RETURN;
1912
1913 SLJIT_ASSERT(!(inp_flags & ALLOW_INV_IMM) || (inp_flags & ALLOW_IMM));
1914
1915 if (inp_flags & ALLOW_NEG_IMM) {
1916 switch (GET_OPCODE(op)) {
1917 case SLJIT_ADD:
1918 compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
1919 neg_op = SLJIT_SUB;
1920 break;
1921 case SLJIT_ADDC:
1922 compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
1923 neg_op = SLJIT_SUBC;
1924 break;
1925 case SLJIT_SUB:
1926 compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
1927 neg_op = SLJIT_ADD;
1928 break;
1929 case SLJIT_SUBC:
1930 compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
1931 neg_op = SLJIT_ADDC;
1932 break;
1933 }
1934 }
1935
1936 do {
1937 if (!(inp_flags & ALLOW_IMM))
1938 break;
1939
1940 if (src2 & SLJIT_IMM) {
1941 src2_reg = (sljit_s32)get_imm((sljit_uw)src2w);
1942 if (src2_reg)
1943 break;
1944 if (inp_flags & ALLOW_INV_IMM) {
1945 src2_reg = (sljit_s32)get_imm(~(sljit_uw)src2w);
1946 if (src2_reg) {
1947 flags |= INV_IMM;
1948 break;
1949 }
1950 }
1951 if (neg_op != 0) {
1952 src2_reg = (sljit_s32)get_imm((sljit_uw)-src2w);
1953 if (src2_reg) {
1954 op = neg_op | GET_ALL_FLAGS(op);
1955 break;
1956 }
1957 }
1958 }
1959
1960 if (src1 & SLJIT_IMM) {
1961 src2_reg = (sljit_s32)get_imm((sljit_uw)src1w);
1962 if (src2_reg) {
1963 flags |= ARGS_SWAPPED;
1964 src1 = src2;
1965 src1w = src2w;
1966 break;
1967 }
1968 if (inp_flags & ALLOW_INV_IMM) {
1969 src2_reg = (sljit_s32)get_imm(~(sljit_uw)src1w);
1970 if (src2_reg) {
1971 flags |= ARGS_SWAPPED | INV_IMM;
1972 src1 = src2;
1973 src1w = src2w;
1974 break;
1975 }
1976 }
1977 if (neg_op >= SLJIT_SUB) {
1978 /* Note: additive operation (commutative). */
1979 src2_reg = (sljit_s32)get_imm((sljit_uw)-src1w);
1980 if (src2_reg) {
1981 src1 = src2;
1982 src1w = src2w;
1983 op = neg_op | GET_ALL_FLAGS(op);
1984 break;
1985 }
1986 }
1987 }
1988 } while(0);
1989
1990 /* Source 1. */
1991 if (FAST_IS_REG(src1))
1992 src1_reg = src1;
1993 else if (src1 & SLJIT_MEM) {
1994 FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1));
1995 src1_reg = TMP_REG1;
1996 }
1997 else {
1998 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w));
1999 src1_reg = TMP_REG1;
2000 }
2001
2002 /* Destination. */
2003 dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG2;
2004
2005 if (op <= SLJIT_MOV_P) {
2006 if (dst & SLJIT_MEM) {
2007 if (inp_flags & BYTE_SIZE)
2008 inp_flags &= ~SIGNED;
2009
2010 if (FAST_IS_REG(src2))
2011 return emit_op_mem(compiler, inp_flags, src2, dst, dstw, TMP_REG2);
2012 }
2013
2014 if (FAST_IS_REG(src2) && dst_reg != TMP_REG2)
2015 flags |= MOVE_REG_CONV;
2016 }
2017
2018 /* Source 2. */
2019 if (src2_reg == 0) {
2020 src2_reg = (op <= SLJIT_MOV_P) ? dst_reg : TMP_REG2;
2021
2022 if (FAST_IS_REG(src2))
2023 src2_reg = src2;
2024 else if (src2 & SLJIT_MEM)
2025 FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, src2_reg, src2, src2w, TMP_REG2));
2026 else
2027 FAIL_IF(load_immediate(compiler, src2_reg, (sljit_uw)src2w));
2028 }
2029
2030 FAIL_IF(emit_single_op(compiler, op, flags, (sljit_uw)dst_reg, (sljit_uw)src1_reg, (sljit_uw)src2_reg));
2031
2032 if (!(dst & SLJIT_MEM))
2033 return SLJIT_SUCCESS;
2034
2035 return emit_op_mem(compiler, inp_flags, dst_reg, dst, dstw, TMP_REG1);
2036 }
2037
2038 #ifdef __cplusplus
2039 extern "C" {
2040 #endif
2041
2042 #if defined(__GNUC__)
2043 extern unsigned int __aeabi_uidivmod(unsigned int numerator, unsigned int denominator);
2044 extern int __aeabi_idivmod(int numerator, int denominator);
2045 #else
2046 #error "Software divmod functions are needed"
2047 #endif
2048
2049 #ifdef __cplusplus
2050 }
2051 #endif
2052
sljit_emit_op0(struct sljit_compiler * compiler,sljit_s32 op)2053 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
2054 {
2055 sljit_uw saved_reg_list[3];
2056 sljit_sw saved_reg_count;
2057
2058 CHECK_ERROR();
2059 CHECK(check_sljit_emit_op0(compiler, op));
2060
2061 op = GET_OPCODE(op);
2062 switch (op) {
2063 case SLJIT_BREAKPOINT:
2064 FAIL_IF(push_inst(compiler, BKPT));
2065 break;
2066 case SLJIT_NOP:
2067 FAIL_IF(push_inst(compiler, NOP));
2068 break;
2069 case SLJIT_LMUL_UW:
2070 case SLJIT_LMUL_SW:
2071 return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULL : SMULL)
2072 | RN(SLJIT_R1) | RD(SLJIT_R0) | RM8(SLJIT_R0) | RM(SLJIT_R1));
2073 case SLJIT_DIVMOD_UW:
2074 case SLJIT_DIVMOD_SW:
2075 case SLJIT_DIV_UW:
2076 case SLJIT_DIV_SW:
2077 SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
2078 SLJIT_ASSERT(reg_map[2] == 1 && reg_map[3] == 2 && reg_map[4] == 3);
2079
2080 saved_reg_count = 0;
2081 if (compiler->scratches >= 4)
2082 saved_reg_list[saved_reg_count++] = 3;
2083 if (compiler->scratches >= 3)
2084 saved_reg_list[saved_reg_count++] = 2;
2085 if (op >= SLJIT_DIV_UW)
2086 saved_reg_list[saved_reg_count++] = 1;
2087
2088 if (saved_reg_count > 0) {
2089 FAIL_IF(push_inst(compiler, STR | 0x2d0000 | (saved_reg_count >= 3 ? 16 : 8)
2090 | (saved_reg_list[0] << 12) /* str rX, [sp, #-8/-16]! */));
2091 if (saved_reg_count >= 2) {
2092 SLJIT_ASSERT(saved_reg_list[1] < 8);
2093 FAIL_IF(push_inst(compiler, STR | 0x8d0004 | (saved_reg_list[1] << 12) /* str rX, [sp, #4] */));
2094 }
2095 if (saved_reg_count >= 3) {
2096 SLJIT_ASSERT(saved_reg_list[2] < 8);
2097 FAIL_IF(push_inst(compiler, STR | 0x8d0008 | (saved_reg_list[2] << 12) /* str rX, [sp, #8] */));
2098 }
2099 }
2100
2101 #if defined(__GNUC__)
2102 FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
2103 ((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_ADDR(__aeabi_uidivmod) : SLJIT_FUNC_ADDR(__aeabi_idivmod))));
2104 #else
2105 #error "Software divmod functions are needed"
2106 #endif
2107
2108 if (saved_reg_count > 0) {
2109 if (saved_reg_count >= 3) {
2110 SLJIT_ASSERT(saved_reg_list[2] < 8);
2111 FAIL_IF(push_inst(compiler, LDR | 0x8d0008 | (saved_reg_list[2] << 12) /* ldr rX, [sp, #8] */));
2112 }
2113 if (saved_reg_count >= 2) {
2114 SLJIT_ASSERT(saved_reg_list[1] < 8);
2115 FAIL_IF(push_inst(compiler, LDR | 0x8d0004 | (saved_reg_list[1] << 12) /* ldr rX, [sp, #4] */));
2116 }
2117 return push_inst(compiler, (LDR ^ (1 << 24)) | 0x8d0000 | (sljit_uw)(saved_reg_count >= 3 ? 16 : 8)
2118 | (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */);
2119 }
2120 return SLJIT_SUCCESS;
2121 case SLJIT_ENDBR:
2122 case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
2123 return SLJIT_SUCCESS;
2124 }
2125
2126 return SLJIT_SUCCESS;
2127 }
2128
sljit_emit_op1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2129 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
2130 sljit_s32 dst, sljit_sw dstw,
2131 sljit_s32 src, sljit_sw srcw)
2132 {
2133 CHECK_ERROR();
2134 CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
2135 ADJUST_LOCAL_OFFSET(dst, dstw);
2136 ADJUST_LOCAL_OFFSET(src, srcw);
2137
2138 switch (GET_OPCODE(op)) {
2139 case SLJIT_MOV:
2140 case SLJIT_MOV_U32:
2141 case SLJIT_MOV_S32:
2142 case SLJIT_MOV32:
2143 case SLJIT_MOV_P:
2144 return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw);
2145
2146 case SLJIT_MOV_U8:
2147 return emit_op(compiler, SLJIT_MOV_U8, ALLOW_ANY_IMM | BYTE_SIZE, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u8)srcw : srcw);
2148
2149 case SLJIT_MOV_S8:
2150 return emit_op(compiler, SLJIT_MOV_S8, ALLOW_ANY_IMM | SIGNED | BYTE_SIZE, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s8)srcw : srcw);
2151
2152 case SLJIT_MOV_U16:
2153 return emit_op(compiler, SLJIT_MOV_U16, ALLOW_ANY_IMM | HALF_SIZE, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_u16)srcw : srcw);
2154
2155 case SLJIT_MOV_S16:
2156 return emit_op(compiler, SLJIT_MOV_S16, ALLOW_ANY_IMM | SIGNED | HALF_SIZE, dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? (sljit_s16)srcw : srcw);
2157
2158 case SLJIT_NOT:
2159 return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw);
2160
2161 case SLJIT_CLZ:
2162 case SLJIT_CTZ:
2163 return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw);
2164 }
2165
2166 return SLJIT_SUCCESS;
2167 }
2168
sljit_emit_op2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2169 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2170 sljit_s32 dst, sljit_sw dstw,
2171 sljit_s32 src1, sljit_sw src1w,
2172 sljit_s32 src2, sljit_sw src2w)
2173 {
2174 CHECK_ERROR();
2175 CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
2176 ADJUST_LOCAL_OFFSET(dst, dstw);
2177 ADJUST_LOCAL_OFFSET(src1, src1w);
2178 ADJUST_LOCAL_OFFSET(src2, src2w);
2179
2180 switch (GET_OPCODE(op)) {
2181 case SLJIT_ADD:
2182 case SLJIT_ADDC:
2183 case SLJIT_SUB:
2184 case SLJIT_SUBC:
2185 return emit_op(compiler, op, ALLOW_IMM | ALLOW_NEG_IMM, dst, dstw, src1, src1w, src2, src2w);
2186
2187 case SLJIT_OR:
2188 case SLJIT_XOR:
2189 return emit_op(compiler, op, ALLOW_IMM, dst, dstw, src1, src1w, src2, src2w);
2190
2191 case SLJIT_MUL:
2192 return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w);
2193
2194 case SLJIT_AND:
2195 return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, src1, src1w, src2, src2w);
2196
2197 case SLJIT_SHL:
2198 case SLJIT_MSHL:
2199 case SLJIT_LSHR:
2200 case SLJIT_MLSHR:
2201 case SLJIT_ASHR:
2202 case SLJIT_MASHR:
2203 case SLJIT_ROTL:
2204 case SLJIT_ROTR:
2205 if (src2 & SLJIT_IMM) {
2206 compiler->shift_imm = src2w & 0x1f;
2207 return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src1, src1w);
2208 } else {
2209 compiler->shift_imm = 0x20;
2210 return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w);
2211 }
2212 }
2213
2214 return SLJIT_SUCCESS;
2215 }
2216
sljit_emit_op2u(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2217 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
2218 sljit_s32 src1, sljit_sw src1w,
2219 sljit_s32 src2, sljit_sw src2w)
2220 {
2221 CHECK_ERROR();
2222 CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
2223
2224 SLJIT_SKIP_CHECKS(compiler);
2225 return sljit_emit_op2(compiler, op, TMP_REG2, 0, src1, src1w, src2, src2w);
2226 }
2227
sljit_emit_shift_into(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src_dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2228 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
2229 sljit_s32 src_dst,
2230 sljit_s32 src1, sljit_sw src1w,
2231 sljit_s32 src2, sljit_sw src2w)
2232 {
2233 sljit_s32 is_left;
2234
2235 CHECK_ERROR();
2236 CHECK(check_sljit_emit_shift_into(compiler, op, src_dst, src1, src1w, src2, src2w));
2237
2238 op = GET_OPCODE(op);
2239 is_left = (op == SLJIT_SHL || op == SLJIT_MSHL);
2240
2241 if (src_dst == src1) {
2242 SLJIT_SKIP_CHECKS(compiler);
2243 return sljit_emit_op2(compiler, is_left ? SLJIT_ROTL : SLJIT_ROTR, src_dst, 0, src_dst, 0, src2, src2w);
2244 }
2245
2246 ADJUST_LOCAL_OFFSET(src1, src1w);
2247 ADJUST_LOCAL_OFFSET(src2, src2w);
2248
2249 /* Shift type of ROR is 3. */
2250 if (src2 & SLJIT_IMM) {
2251 src2w &= 0x1f;
2252
2253 if (src2w == 0)
2254 return SLJIT_SUCCESS;
2255 } else if (src2 & SLJIT_MEM) {
2256 FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, src2, src2w, TMP_REG2));
2257 src2 = TMP_REG2;
2258 }
2259
2260 if (src1 & SLJIT_MEM) {
2261 FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1));
2262 src1 = TMP_REG1;
2263 } else if (src1 & SLJIT_IMM) {
2264 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w));
2265 src1 = TMP_REG1;
2266 }
2267
2268 if (src2 & SLJIT_IMM) {
2269 FAIL_IF(push_inst(compiler, MOV | RD(src_dst) | RM(src_dst) | ((sljit_uw)(is_left ? 0 : 1) << 5) | ((sljit_uw)src2w << 7)));
2270 src2w = (src2w ^ 0x1f) + 1;
2271 return push_inst(compiler, ORR | RD(src_dst) | RN(src_dst) | RM(src1) | ((sljit_uw)(is_left ? 1 : 0) << 5) | ((sljit_uw)src2w << 7));
2272 }
2273
2274 if (op == SLJIT_MSHL || op == SLJIT_MLSHR) {
2275 FAIL_IF(push_inst(compiler, AND | SRC2_IMM | RD(TMP_REG2) | RN(src2) | 0x1f));
2276 src2 = TMP_REG2;
2277 }
2278
2279 FAIL_IF(push_inst(compiler, MOV | RD(src_dst) | RM8(src2) | ((sljit_uw)(is_left ? 0 : 1) << 5) | 0x10 | RM(src_dst)));
2280 FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src1) | ((sljit_uw)(is_left ? 1 : 0) << 5) | (1 << 7)));
2281 FAIL_IF(push_inst(compiler, EOR | SRC2_IMM | RD(TMP_REG2) | RN(src2) | 0x1f));
2282 return push_inst(compiler, ORR | RD(src_dst) | RN(src_dst) | RM(TMP_REG1) | ((sljit_uw)(is_left ? 1 : 0) << 5) | 0x10 | RM8(TMP_REG2));
2283 }
2284
sljit_emit_op_src(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)2285 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
2286 sljit_s32 src, sljit_sw srcw)
2287 {
2288 CHECK_ERROR();
2289 CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
2290 ADJUST_LOCAL_OFFSET(src, srcw);
2291
2292 switch (op) {
2293 case SLJIT_FAST_RETURN:
2294 SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
2295
2296 if (FAST_IS_REG(src))
2297 FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG2) | RM(src)));
2298 else
2299 FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, src, srcw, TMP_REG1));
2300
2301 return push_inst(compiler, BX | RM(TMP_REG2));
2302 case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
2303 return SLJIT_SUCCESS;
2304 case SLJIT_PREFETCH_L1:
2305 case SLJIT_PREFETCH_L2:
2306 case SLJIT_PREFETCH_L3:
2307 case SLJIT_PREFETCH_ONCE:
2308 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
2309 SLJIT_ASSERT(src & SLJIT_MEM);
2310 return emit_op_mem(compiler, PRELOAD | LOAD_DATA, TMP_PC, src, srcw, TMP_REG1);
2311 #else /* !SLJIT_CONFIG_ARM_V7 */
2312 return SLJIT_SUCCESS;
2313 #endif /* SLJIT_CONFIG_ARM_V7 */
2314 }
2315
2316 return SLJIT_SUCCESS;
2317 }
2318
sljit_get_register_index(sljit_s32 reg)2319 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
2320 {
2321 CHECK_REG_INDEX(check_sljit_get_register_index(reg));
2322 return reg_map[reg];
2323 }
2324
sljit_get_float_register_index(sljit_s32 reg)2325 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
2326 {
2327 CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
2328 return (freg_map[reg] << 1);
2329 }
2330
sljit_emit_op_custom(struct sljit_compiler * compiler,void * instruction,sljit_u32 size)2331 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2332 void *instruction, sljit_u32 size)
2333 {
2334 SLJIT_UNUSED_ARG(size);
2335 CHECK_ERROR();
2336 CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2337
2338 return push_inst(compiler, *(sljit_uw*)instruction);
2339 }
2340
2341 /* --------------------------------------------------------------------- */
2342 /* Floating point operators */
2343 /* --------------------------------------------------------------------- */
2344
2345 #define FPU_LOAD (1 << 20)
2346 #define EMIT_FPU_DATA_TRANSFER(inst, add, base, freg, offs) \
2347 ((inst) | (sljit_uw)((add) << 23) | RN(base) | VD(freg) | (sljit_uw)(offs))
2348
emit_fop_mem(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw)2349 static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
2350 {
2351 sljit_uw imm;
2352 sljit_uw inst = VSTR_F32 | (flags & (SLJIT_32 | FPU_LOAD));
2353
2354 SLJIT_ASSERT(arg & SLJIT_MEM);
2355 arg &= ~SLJIT_MEM;
2356
2357 if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
2358 FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (((sljit_uw)argw & 0x3) << 7)));
2359 arg = TMP_REG2;
2360 argw = 0;
2361 }
2362
2363 /* Fast loads and stores. */
2364 if (arg) {
2365 if (!(argw & ~0x3fc))
2366 return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, arg & REG_MASK, reg, argw >> 2));
2367 if (!(-argw & ~0x3fc))
2368 return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, arg & REG_MASK, reg, (-argw) >> 2));
2369
2370 imm = get_imm((sljit_uw)argw & ~(sljit_uw)0x3fc);
2371 if (imm) {
2372 FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | imm));
2373 return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG2, reg, (argw & 0x3fc) >> 2));
2374 }
2375 imm = get_imm((sljit_uw)-argw & ~(sljit_uw)0x3fc);
2376 if (imm) {
2377 argw = -argw;
2378 FAIL_IF(push_inst(compiler, SUB | RD(TMP_REG2) | RN(arg & REG_MASK) | imm));
2379 return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG2, reg, (argw & 0x3fc) >> 2));
2380 }
2381 }
2382
2383 if (arg) {
2384 FAIL_IF(load_immediate(compiler, TMP_REG2, (sljit_uw)argw));
2385 FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | RM(TMP_REG2)));
2386 }
2387 else
2388 FAIL_IF(load_immediate(compiler, TMP_REG2, (sljit_uw)argw));
2389
2390 return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG2, reg, 0));
2391 }
2392
sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2393 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
2394 sljit_s32 dst, sljit_sw dstw,
2395 sljit_s32 src, sljit_sw srcw)
2396 {
2397 op ^= SLJIT_32;
2398
2399 if (src & SLJIT_MEM) {
2400 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src, srcw));
2401 src = TMP_FREG1;
2402 }
2403
2404 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_S32_F32, op & SLJIT_32, TMP_FREG1, src, 0)));
2405
2406 if (FAST_IS_REG(dst))
2407 return push_inst(compiler, VMOV | (1 << 20) | RD(dst) | VN(TMP_FREG1));
2408
2409 /* Store the integer value from a VFP register. */
2410 return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw);
2411 }
2412
sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2413 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
2414 sljit_s32 dst, sljit_sw dstw,
2415 sljit_s32 src, sljit_sw srcw)
2416 {
2417 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2418
2419 op ^= SLJIT_32;
2420
2421 if (FAST_IS_REG(src))
2422 FAIL_IF(push_inst(compiler, VMOV | RD(src) | VN(TMP_FREG1)));
2423 else if (src & SLJIT_MEM) {
2424 /* Load the integer value into a VFP register. */
2425 FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw));
2426 }
2427 else {
2428 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw));
2429 FAIL_IF(push_inst(compiler, VMOV | RD(TMP_REG1) | VN(TMP_FREG1)));
2430 }
2431
2432 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F32_S32, op & SLJIT_32, dst_r, TMP_FREG1, 0)));
2433
2434 if (dst & SLJIT_MEM)
2435 return emit_fop_mem(compiler, (op & SLJIT_32), TMP_FREG1, dst, dstw);
2436 return SLJIT_SUCCESS;
2437 }
2438
sljit_emit_fop1_cmp(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2439 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
2440 sljit_s32 src1, sljit_sw src1w,
2441 sljit_s32 src2, sljit_sw src2w)
2442 {
2443 op ^= SLJIT_32;
2444
2445 if (src1 & SLJIT_MEM) {
2446 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w));
2447 src1 = TMP_FREG1;
2448 }
2449
2450 if (src2 & SLJIT_MEM) {
2451 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w));
2452 src2 = TMP_FREG2;
2453 }
2454
2455 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCMP_F32, op & SLJIT_32, src1, src2, 0)));
2456 return push_inst(compiler, VMRS);
2457 }
2458
sljit_emit_fop1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2459 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
2460 sljit_s32 dst, sljit_sw dstw,
2461 sljit_s32 src, sljit_sw srcw)
2462 {
2463 sljit_s32 dst_r;
2464
2465 CHECK_ERROR();
2466
2467 SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100), float_transfer_bit_error);
2468 SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2469
2470 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2471
2472 if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32)
2473 op ^= SLJIT_32;
2474
2475 if (src & SLJIT_MEM) {
2476 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, dst_r, src, srcw));
2477 src = dst_r;
2478 }
2479
2480 switch (GET_OPCODE(op)) {
2481 case SLJIT_MOV_F64:
2482 if (src != dst_r) {
2483 if (dst_r != TMP_FREG1)
2484 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, op & SLJIT_32, dst_r, src, 0)));
2485 else
2486 dst_r = src;
2487 }
2488 break;
2489 case SLJIT_NEG_F64:
2490 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VNEG_F32, op & SLJIT_32, dst_r, src, 0)));
2491 break;
2492 case SLJIT_ABS_F64:
2493 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_32, dst_r, src, 0)));
2494 break;
2495 case SLJIT_CONV_F64_FROM_F32:
2496 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F64_F32, op & SLJIT_32, dst_r, src, 0)));
2497 op ^= SLJIT_32;
2498 break;
2499 }
2500
2501 if (dst & SLJIT_MEM)
2502 return emit_fop_mem(compiler, (op & SLJIT_32), dst_r, dst, dstw);
2503 return SLJIT_SUCCESS;
2504 }
2505
sljit_emit_fop2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2506 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
2507 sljit_s32 dst, sljit_sw dstw,
2508 sljit_s32 src1, sljit_sw src1w,
2509 sljit_s32 src2, sljit_sw src2w)
2510 {
2511 sljit_s32 dst_r;
2512
2513 CHECK_ERROR();
2514 CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2515 ADJUST_LOCAL_OFFSET(dst, dstw);
2516 ADJUST_LOCAL_OFFSET(src1, src1w);
2517 ADJUST_LOCAL_OFFSET(src2, src2w);
2518
2519 op ^= SLJIT_32;
2520
2521 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2522
2523 if (src2 & SLJIT_MEM) {
2524 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w));
2525 src2 = TMP_FREG2;
2526 }
2527
2528 if (src1 & SLJIT_MEM) {
2529 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w));
2530 src1 = TMP_FREG1;
2531 }
2532
2533 switch (GET_OPCODE(op)) {
2534 case SLJIT_ADD_F64:
2535 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VADD_F32, op & SLJIT_32, dst_r, src2, src1)));
2536 break;
2537
2538 case SLJIT_SUB_F64:
2539 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VSUB_F32, op & SLJIT_32, dst_r, src2, src1)));
2540 break;
2541
2542 case SLJIT_MUL_F64:
2543 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMUL_F32, op & SLJIT_32, dst_r, src2, src1)));
2544 break;
2545
2546 case SLJIT_DIV_F64:
2547 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VDIV_F32, op & SLJIT_32, dst_r, src2, src1)));
2548 break;
2549 }
2550
2551 if (dst_r == TMP_FREG1)
2552 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32), TMP_FREG1, dst, dstw));
2553
2554 return SLJIT_SUCCESS;
2555 }
2556
2557 #undef EMIT_FPU_DATA_TRANSFER
2558
2559 /* --------------------------------------------------------------------- */
2560 /* Other instructions */
2561 /* --------------------------------------------------------------------- */
2562
sljit_emit_fast_enter(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)2563 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
2564 {
2565 CHECK_ERROR();
2566 CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
2567 ADJUST_LOCAL_OFFSET(dst, dstw);
2568
2569 SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
2570
2571 if (FAST_IS_REG(dst))
2572 return push_inst(compiler, MOV | RD(dst) | RM(TMP_REG2));
2573
2574 /* Memory. */
2575 return emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1);
2576 }
2577
2578 /* --------------------------------------------------------------------- */
2579 /* Conditional instructions */
2580 /* --------------------------------------------------------------------- */
2581
get_cc(struct sljit_compiler * compiler,sljit_s32 type)2582 static sljit_uw get_cc(struct sljit_compiler *compiler, sljit_s32 type)
2583 {
2584 switch (type) {
2585 case SLJIT_EQUAL:
2586 case SLJIT_F_EQUAL:
2587 case SLJIT_ORDERED_EQUAL:
2588 case SLJIT_UNORDERED_OR_EQUAL: /* Not supported. */
2589 return 0x00000000;
2590
2591 case SLJIT_NOT_EQUAL:
2592 case SLJIT_F_NOT_EQUAL:
2593 case SLJIT_UNORDERED_OR_NOT_EQUAL:
2594 case SLJIT_ORDERED_NOT_EQUAL: /* Not supported. */
2595 return 0x10000000;
2596
2597 case SLJIT_CARRY:
2598 if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD)
2599 return 0x20000000;
2600 /* fallthrough */
2601
2602 case SLJIT_LESS:
2603 return 0x30000000;
2604
2605 case SLJIT_NOT_CARRY:
2606 if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD)
2607 return 0x30000000;
2608 /* fallthrough */
2609
2610 case SLJIT_GREATER_EQUAL:
2611 return 0x20000000;
2612
2613 case SLJIT_GREATER:
2614 case SLJIT_UNORDERED_OR_GREATER:
2615 return 0x80000000;
2616
2617 case SLJIT_LESS_EQUAL:
2618 case SLJIT_F_LESS_EQUAL:
2619 case SLJIT_ORDERED_LESS_EQUAL:
2620 return 0x90000000;
2621
2622 case SLJIT_SIG_LESS:
2623 case SLJIT_UNORDERED_OR_LESS:
2624 return 0xb0000000;
2625
2626 case SLJIT_SIG_GREATER_EQUAL:
2627 case SLJIT_F_GREATER_EQUAL:
2628 case SLJIT_ORDERED_GREATER_EQUAL:
2629 return 0xa0000000;
2630
2631 case SLJIT_SIG_GREATER:
2632 case SLJIT_F_GREATER:
2633 case SLJIT_ORDERED_GREATER:
2634 return 0xc0000000;
2635
2636 case SLJIT_SIG_LESS_EQUAL:
2637 case SLJIT_UNORDERED_OR_LESS_EQUAL:
2638 return 0xd0000000;
2639
2640 case SLJIT_OVERFLOW:
2641 if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)))
2642 return 0x10000000;
2643 /* fallthrough */
2644
2645 case SLJIT_UNORDERED:
2646 return 0x60000000;
2647
2648 case SLJIT_NOT_OVERFLOW:
2649 if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)))
2650 return 0x00000000;
2651 /* fallthrough */
2652
2653 case SLJIT_ORDERED:
2654 return 0x70000000;
2655
2656 case SLJIT_F_LESS:
2657 case SLJIT_ORDERED_LESS:
2658 return 0x40000000;
2659
2660 case SLJIT_UNORDERED_OR_GREATER_EQUAL:
2661 return 0x50000000;
2662
2663 default:
2664 SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL_REG_ARG);
2665 return 0xe0000000;
2666 }
2667 }
2668
sljit_emit_label(struct sljit_compiler * compiler)2669 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2670 {
2671 struct sljit_label *label;
2672
2673 CHECK_ERROR_PTR();
2674 CHECK_PTR(check_sljit_emit_label(compiler));
2675
2676 if (compiler->last_label && compiler->last_label->size == compiler->size)
2677 return compiler->last_label;
2678
2679 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2680 PTR_FAIL_IF(!label);
2681 set_label(label, compiler);
2682 return label;
2683 }
2684
sljit_emit_jump(struct sljit_compiler * compiler,sljit_s32 type)2685 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
2686 {
2687 struct sljit_jump *jump;
2688
2689 CHECK_ERROR_PTR();
2690 CHECK_PTR(check_sljit_emit_jump(compiler, type));
2691
2692 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2693 PTR_FAIL_IF(!jump);
2694 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2695 type &= 0xff;
2696
2697 SLJIT_ASSERT(reg_map[TMP_REG1] != 14);
2698
2699 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
2700 if (type >= SLJIT_FAST_CALL)
2701 PTR_FAIL_IF(prepare_blx(compiler));
2702 PTR_FAIL_IF(push_inst_with_unique_literal(compiler, ((EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1,
2703 type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0)) & ~COND_MASK) | get_cc(compiler, type), 0));
2704
2705 if (jump->flags & SLJIT_REWRITABLE_JUMP) {
2706 jump->addr = compiler->size;
2707 compiler->patches++;
2708 }
2709
2710 if (type >= SLJIT_FAST_CALL) {
2711 jump->flags |= IS_BL;
2712 PTR_FAIL_IF(emit_blx(compiler));
2713 }
2714
2715 if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
2716 jump->addr = compiler->size;
2717 #else
2718 if (type >= SLJIT_FAST_CALL)
2719 jump->flags |= IS_BL;
2720 PTR_FAIL_IF(emit_imm(compiler, TMP_REG1, 0));
2721 PTR_FAIL_IF(push_inst(compiler, (((type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)) & ~COND_MASK) | get_cc(compiler, type)));
2722 jump->addr = compiler->size;
2723 #endif
2724 return jump;
2725 }
2726
2727 #ifdef __SOFTFP__
2728
softfloat_call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types,sljit_s32 * src,sljit_u32 * extra_space)2729 static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src, sljit_u32 *extra_space)
2730 {
2731 sljit_u32 is_tail_call = *extra_space & SLJIT_CALL_RETURN;
2732 sljit_u32 offset = 0;
2733 sljit_u32 word_arg_offset = 0;
2734 sljit_u32 src_offset = 4 * sizeof(sljit_sw);
2735 sljit_u32 float_arg_count = 0;
2736 sljit_s32 types = 0;
2737 sljit_u8 offsets[4];
2738 sljit_u8 *offset_ptr = offsets;
2739
2740 if (src && FAST_IS_REG(*src))
2741 src_offset = (sljit_uw)reg_map[*src] * sizeof(sljit_sw);
2742
2743 arg_types >>= SLJIT_ARG_SHIFT;
2744
2745 while (arg_types) {
2746 types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK);
2747
2748 switch (arg_types & SLJIT_ARG_MASK) {
2749 case SLJIT_ARG_TYPE_F64:
2750 if (offset & 0x7)
2751 offset += sizeof(sljit_sw);
2752 *offset_ptr++ = (sljit_u8)offset;
2753 offset += sizeof(sljit_f64);
2754 float_arg_count++;
2755 break;
2756 case SLJIT_ARG_TYPE_F32:
2757 *offset_ptr++ = (sljit_u8)offset;
2758 offset += sizeof(sljit_f32);
2759 float_arg_count++;
2760 break;
2761 default:
2762 *offset_ptr++ = (sljit_u8)offset;
2763 offset += sizeof(sljit_sw);
2764 word_arg_offset += sizeof(sljit_sw);
2765 break;
2766 }
2767
2768 arg_types >>= SLJIT_ARG_SHIFT;
2769 }
2770
2771 if (offset > 4 * sizeof(sljit_sw) && (!is_tail_call || offset > compiler->args_size)) {
2772 /* Keep lr register on the stack. */
2773 if (is_tail_call)
2774 offset += sizeof(sljit_sw);
2775
2776 offset = ((offset - 4 * sizeof(sljit_sw)) + 0x7) & ~(sljit_uw)0x7;
2777
2778 *extra_space = offset;
2779
2780 if (is_tail_call)
2781 FAIL_IF(emit_stack_frame_release(compiler, (sljit_s32)offset));
2782 else
2783 FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | offset));
2784 } else {
2785 if (is_tail_call)
2786 FAIL_IF(emit_stack_frame_release(compiler, -1));
2787 *extra_space = 0;
2788 }
2789
2790 /* Process arguments in reversed direction. */
2791 while (types) {
2792 switch (types & SLJIT_ARG_MASK) {
2793 case SLJIT_ARG_TYPE_F64:
2794 float_arg_count--;
2795 offset = *(--offset_ptr);
2796
2797 SLJIT_ASSERT((offset & 0x7) == 0);
2798
2799 if (offset < 4 * sizeof(sljit_sw)) {
2800 if (src_offset == offset || src_offset == offset + sizeof(sljit_sw)) {
2801 FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2)));
2802 *src = TMP_REG1;
2803 }
2804 FAIL_IF(push_inst(compiler, VMOV2 | 0x100000 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count));
2805 } else
2806 FAIL_IF(push_inst(compiler, VSTR_F32 | 0x800100 | RN(SLJIT_SP)
2807 | (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2)));
2808 break;
2809 case SLJIT_ARG_TYPE_F32:
2810 float_arg_count--;
2811 offset = *(--offset_ptr);
2812
2813 if (offset < 4 * sizeof(sljit_sw)) {
2814 if (src_offset == offset) {
2815 FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2)));
2816 *src = TMP_REG1;
2817 }
2818 FAIL_IF(push_inst(compiler, VMOV | 0x100000 | (float_arg_count << 16) | (offset << 10)));
2819 } else
2820 FAIL_IF(push_inst(compiler, VSTR_F32 | 0x800000 | RN(SLJIT_SP)
2821 | (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2)));
2822 break;
2823 default:
2824 word_arg_offset -= sizeof(sljit_sw);
2825 offset = *(--offset_ptr);
2826
2827 SLJIT_ASSERT(offset >= word_arg_offset);
2828
2829 if (offset != word_arg_offset) {
2830 if (offset < 4 * sizeof(sljit_sw)) {
2831 if (src_offset == offset) {
2832 FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2)));
2833 *src = TMP_REG1;
2834 }
2835 else if (src_offset == word_arg_offset) {
2836 *src = (sljit_s32)(SLJIT_R0 + (offset >> 2));
2837 src_offset = offset;
2838 }
2839 FAIL_IF(push_inst(compiler, MOV | (offset << 10) | (word_arg_offset >> 2)));
2840 } else
2841 FAIL_IF(push_inst(compiler, STR | 0x800000 | RN(SLJIT_SP) | (word_arg_offset << 10) | (offset - 4 * sizeof(sljit_sw))));
2842 }
2843 break;
2844 }
2845
2846 types >>= SLJIT_ARG_SHIFT;
2847 }
2848
2849 return SLJIT_SUCCESS;
2850 }
2851
softfloat_post_call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types)2852 static sljit_s32 softfloat_post_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)
2853 {
2854 if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F64)
2855 FAIL_IF(push_inst(compiler, VMOV2 | (1 << 16) | (0 << 12) | 0));
2856 if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32)
2857 FAIL_IF(push_inst(compiler, VMOV | (0 << 16) | (0 << 12)));
2858
2859 return SLJIT_SUCCESS;
2860 }
2861
2862 #else /* !__SOFTFP__ */
2863
hardfloat_call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types)2864 static sljit_s32 hardfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)
2865 {
2866 sljit_u32 offset = SLJIT_FR0;
2867 sljit_u32 new_offset = SLJIT_FR0;
2868 sljit_u32 f32_offset = 0;
2869
2870 /* Remove return value. */
2871 arg_types >>= SLJIT_ARG_SHIFT;
2872
2873 while (arg_types) {
2874 switch (arg_types & SLJIT_ARG_MASK) {
2875 case SLJIT_ARG_TYPE_F64:
2876 if (offset != new_offset)
2877 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32,
2878 SLJIT_32, new_offset, offset, 0)));
2879
2880 new_offset++;
2881 offset++;
2882 break;
2883 case SLJIT_ARG_TYPE_F32:
2884 if (f32_offset != 0) {
2885 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32,
2886 0x400000, f32_offset, offset, 0)));
2887 f32_offset = 0;
2888 } else {
2889 if (offset != new_offset)
2890 FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32,
2891 0, new_offset, offset, 0)));
2892 f32_offset = new_offset;
2893 new_offset++;
2894 }
2895 offset++;
2896 break;
2897 }
2898 arg_types >>= SLJIT_ARG_SHIFT;
2899 }
2900
2901 return SLJIT_SUCCESS;
2902 }
2903
2904 #endif /* __SOFTFP__ */
2905
2906 #undef EMIT_FPU_OPERATION
2907
sljit_emit_call(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types)2908 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
2909 sljit_s32 arg_types)
2910 {
2911 #ifdef __SOFTFP__
2912 struct sljit_jump *jump;
2913 sljit_u32 extra_space = (sljit_u32)type;
2914 #endif
2915
2916 CHECK_ERROR_PTR();
2917 CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
2918
2919 #ifdef __SOFTFP__
2920 if ((type & 0xff) != SLJIT_CALL_REG_ARG) {
2921 PTR_FAIL_IF(softfloat_call_with_args(compiler, arg_types, NULL, &extra_space));
2922 SLJIT_ASSERT((extra_space & 0x7) == 0);
2923
2924 if ((type & SLJIT_CALL_RETURN) && extra_space == 0)
2925 type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
2926
2927 SLJIT_SKIP_CHECKS(compiler);
2928 jump = sljit_emit_jump(compiler, type);
2929 PTR_FAIL_IF(jump == NULL);
2930
2931 if (extra_space > 0) {
2932 if (type & SLJIT_CALL_RETURN)
2933 PTR_FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1,
2934 TMP_REG2, SLJIT_SP, extra_space - sizeof(sljit_sw))));
2935
2936 PTR_FAIL_IF(push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | extra_space));
2937
2938 if (type & SLJIT_CALL_RETURN) {
2939 PTR_FAIL_IF(push_inst(compiler, BX | RM(TMP_REG2)));
2940 return jump;
2941 }
2942 }
2943
2944 SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN));
2945 PTR_FAIL_IF(softfloat_post_call_with_args(compiler, arg_types));
2946 return jump;
2947 }
2948 #endif /* __SOFTFP__ */
2949
2950 if (type & SLJIT_CALL_RETURN) {
2951 PTR_FAIL_IF(emit_stack_frame_release(compiler, -1));
2952 type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
2953 }
2954
2955 #ifndef __SOFTFP__
2956 if ((type & 0xff) != SLJIT_CALL_REG_ARG)
2957 PTR_FAIL_IF(hardfloat_call_with_args(compiler, arg_types));
2958 #endif /* !__SOFTFP__ */
2959
2960 SLJIT_SKIP_CHECKS(compiler);
2961 return sljit_emit_jump(compiler, type);
2962 }
2963
sljit_emit_ijump(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 src,sljit_sw srcw)2964 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
2965 {
2966 struct sljit_jump *jump;
2967
2968 CHECK_ERROR();
2969 CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
2970 ADJUST_LOCAL_OFFSET(src, srcw);
2971
2972 SLJIT_ASSERT(reg_map[TMP_REG1] != 14);
2973
2974 if (!(src & SLJIT_IMM)) {
2975 if (FAST_IS_REG(src)) {
2976 SLJIT_ASSERT(reg_map[src] != 14);
2977 return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(src));
2978 }
2979
2980 SLJIT_ASSERT(src & SLJIT_MEM);
2981 FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
2982 return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1));
2983 }
2984
2985 /* These jumps are converted to jump/call instructions when possible. */
2986 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2987 FAIL_IF(!jump);
2988 set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0));
2989 jump->u.target = (sljit_uw)srcw;
2990
2991 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
2992 if (type >= SLJIT_FAST_CALL)
2993 FAIL_IF(prepare_blx(compiler));
2994 FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0), 0));
2995 if (type >= SLJIT_FAST_CALL)
2996 FAIL_IF(emit_blx(compiler));
2997 #else
2998 FAIL_IF(emit_imm(compiler, TMP_REG1, 0));
2999 FAIL_IF(push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)));
3000 #endif
3001 jump->addr = compiler->size;
3002 return SLJIT_SUCCESS;
3003 }
3004
sljit_emit_icall(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)3005 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
3006 sljit_s32 arg_types,
3007 sljit_s32 src, sljit_sw srcw)
3008 {
3009 #ifdef __SOFTFP__
3010 sljit_u32 extra_space = (sljit_u32)type;
3011 #endif
3012
3013 CHECK_ERROR();
3014 CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
3015
3016 if (src & SLJIT_MEM) {
3017 FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
3018 src = TMP_REG1;
3019 }
3020
3021 if ((type & SLJIT_CALL_RETURN) && (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options)))) {
3022 FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src)));
3023 src = TMP_REG1;
3024 }
3025
3026 #ifdef __SOFTFP__
3027 if ((type & 0xff) != SLJIT_CALL_REG_ARG) {
3028 FAIL_IF(softfloat_call_with_args(compiler, arg_types, &src, &extra_space));
3029 SLJIT_ASSERT((extra_space & 0x7) == 0);
3030
3031 if ((type & SLJIT_CALL_RETURN) && extra_space == 0)
3032 type = SLJIT_JUMP;
3033
3034 SLJIT_SKIP_CHECKS(compiler);
3035 FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
3036
3037 if (extra_space > 0) {
3038 if (type & SLJIT_CALL_RETURN)
3039 FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1,
3040 TMP_REG2, SLJIT_SP, extra_space - sizeof(sljit_sw))));
3041
3042 FAIL_IF(push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | extra_space));
3043
3044 if (type & SLJIT_CALL_RETURN)
3045 return push_inst(compiler, BX | RM(TMP_REG2));
3046 }
3047
3048 SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN));
3049 return softfloat_post_call_with_args(compiler, arg_types);
3050 }
3051 #endif /* __SOFTFP__ */
3052
3053 if (type & SLJIT_CALL_RETURN) {
3054 FAIL_IF(emit_stack_frame_release(compiler, -1));
3055 type = SLJIT_JUMP;
3056 }
3057
3058 #ifndef __SOFTFP__
3059 if ((type & 0xff) != SLJIT_CALL_REG_ARG)
3060 FAIL_IF(hardfloat_call_with_args(compiler, arg_types));
3061 #endif /* !__SOFTFP__ */
3062
3063 SLJIT_SKIP_CHECKS(compiler);
3064 return sljit_emit_ijump(compiler, type, src, srcw);
3065 }
3066
3067 #ifdef __SOFTFP__
3068
emit_fmov_before_return(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)3069 static SLJIT_INLINE sljit_s32 emit_fmov_before_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
3070 {
3071 if (compiler->options & SLJIT_ENTER_REG_ARG) {
3072 if (src == SLJIT_FR0)
3073 return SLJIT_SUCCESS;
3074
3075 SLJIT_SKIP_CHECKS(compiler);
3076 return sljit_emit_fop1(compiler, op, SLJIT_RETURN_FREG, 0, src, srcw);
3077 }
3078
3079 if (FAST_IS_REG(src)) {
3080 if (op & SLJIT_32)
3081 return push_inst(compiler, VMOV | (1 << 20) | RD(SLJIT_R0) | VN(src));
3082 return push_inst(compiler, VMOV2 | (1 << 20) | RD(SLJIT_R0) | RN(SLJIT_R1) | VM(src));
3083 }
3084
3085 SLJIT_SKIP_CHECKS(compiler);
3086
3087 if (op & SLJIT_32)
3088 return sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, src, srcw);
3089 return sljit_emit_mem(compiler, SLJIT_MOV, SLJIT_REG_PAIR(SLJIT_R0, SLJIT_R1), src, srcw);
3090 }
3091
3092 #endif /* __SOFTFP__ */
3093
sljit_emit_op_flags(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 type)3094 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
3095 sljit_s32 dst, sljit_sw dstw,
3096 sljit_s32 type)
3097 {
3098 sljit_s32 dst_reg, flags = GET_ALL_FLAGS(op);
3099 sljit_uw cc, ins;
3100
3101 CHECK_ERROR();
3102 CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
3103 ADJUST_LOCAL_OFFSET(dst, dstw);
3104
3105 op = GET_OPCODE(op);
3106 cc = get_cc(compiler, type);
3107 dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
3108
3109 if (op < SLJIT_ADD) {
3110 FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | SRC2_IMM | 0));
3111 FAIL_IF(push_inst(compiler, ((MOV | RD(dst_reg) | SRC2_IMM | 1) & ~COND_MASK) | cc));
3112 if (dst & SLJIT_MEM)
3113 return emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2);
3114 return SLJIT_SUCCESS;
3115 }
3116
3117 ins = (op == SLJIT_AND ? AND : (op == SLJIT_OR ? ORR : EOR));
3118
3119 if (dst & SLJIT_MEM)
3120 FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, dst, dstw, TMP_REG2));
3121
3122 FAIL_IF(push_inst(compiler, ((ins | RD(dst_reg) | RN(dst_reg) | SRC2_IMM | 1) & ~COND_MASK) | cc));
3123
3124 if (op == SLJIT_AND)
3125 FAIL_IF(push_inst(compiler, ((ins | RD(dst_reg) | RN(dst_reg) | SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000)));
3126
3127 if (dst & SLJIT_MEM)
3128 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2));
3129
3130 if (flags & SLJIT_SET_Z)
3131 return push_inst(compiler, MOV | SET_FLAGS | RD(TMP_REG2) | RM(dst_reg));
3132 return SLJIT_SUCCESS;
3133 }
3134
sljit_emit_cmov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_reg,sljit_s32 src,sljit_sw srcw)3135 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
3136 sljit_s32 dst_reg,
3137 sljit_s32 src, sljit_sw srcw)
3138 {
3139 sljit_uw cc, tmp;
3140
3141 CHECK_ERROR();
3142 CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
3143
3144 cc = get_cc(compiler, type & ~SLJIT_32);
3145
3146 if (SLJIT_UNLIKELY(src & SLJIT_IMM)) {
3147 tmp = get_imm((sljit_uw)srcw);
3148 if (tmp)
3149 return push_inst(compiler, ((MOV | RD(dst_reg) | tmp) & ~COND_MASK) | cc);
3150
3151 tmp = get_imm(~(sljit_uw)srcw);
3152 if (tmp)
3153 return push_inst(compiler, ((MVN | RD(dst_reg) | tmp) & ~COND_MASK) | cc);
3154
3155 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
3156 tmp = (sljit_uw)srcw;
3157 FAIL_IF(push_inst(compiler, (MOVW & ~COND_MASK) | cc | RD(dst_reg) | ((tmp << 4) & 0xf0000) | (tmp & 0xfff)));
3158 if (tmp <= 0xffff)
3159 return SLJIT_SUCCESS;
3160 return push_inst(compiler, (MOVT & ~COND_MASK) | cc | RD(dst_reg) | ((tmp >> 12) & 0xf0000) | ((tmp >> 16) & 0xfff));
3161 #else
3162 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw));
3163 src = TMP_REG1;
3164 #endif
3165 }
3166
3167 return push_inst(compiler, ((MOV | RD(dst_reg) | RM(src)) & ~COND_MASK) | cc);
3168 }
3169
update_mem_addr(struct sljit_compiler * compiler,sljit_s32 * mem,sljit_sw * memw,sljit_s32 max_offset)3170 static sljit_s32 update_mem_addr(struct sljit_compiler *compiler, sljit_s32 *mem, sljit_sw *memw, sljit_s32 max_offset)
3171 {
3172 sljit_s32 arg = *mem;
3173 sljit_sw argw = *memw;
3174 sljit_uw imm, tmp;
3175 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
3176 sljit_sw mask = max_offset >= 0xf00 ? 0xfff : 0xff;
3177 sljit_sw sign = max_offset >= 0xf00 ? 0x1000 : 0x100;
3178 #else /* !SLJIT_CONFIG_ARM_V5 */
3179 sljit_sw mask = 0xfff;
3180 sljit_sw sign = 0x1000;
3181
3182 SLJIT_ASSERT(max_offset >= 0xf00);
3183 #endif /* SLJIT_CONFIG_ARM_V5 */
3184
3185 *mem = TMP_REG1;
3186
3187 if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
3188 *memw = 0;
3189 return push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | ((sljit_uw)(argw & 0x3) << 7));
3190 }
3191
3192 arg &= REG_MASK;
3193
3194 if (arg) {
3195 if (argw <= max_offset && argw >= -mask) {
3196 *mem = arg;
3197 return SLJIT_SUCCESS;
3198 }
3199
3200 if (argw >= 0) {
3201 tmp = (sljit_uw)(argw & (sign | mask));
3202 tmp = (sljit_uw)((argw + ((tmp <= (sljit_uw)max_offset || tmp == (sljit_uw)sign) ? 0 : sign)) & ~mask);
3203 imm = get_imm(tmp);
3204
3205 if (imm) {
3206 *memw = argw - (sljit_sw)tmp;
3207 SLJIT_ASSERT(*memw >= -mask && *memw <= max_offset);
3208
3209 return push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg) | imm);
3210 }
3211 } else {
3212 tmp = (sljit_uw)(-argw & (sign | mask));
3213 tmp = (sljit_uw)((-argw + ((tmp <= (sljit_uw)((sign << 1) - max_offset - 1)) ? 0 : sign)) & ~mask);
3214 imm = get_imm(tmp);
3215
3216 if (imm) {
3217 *memw = argw + (sljit_sw)tmp;
3218 SLJIT_ASSERT(*memw >= -mask && *memw <= max_offset);
3219
3220 return push_inst(compiler, SUB | RD(TMP_REG1) | RN(arg) | imm);
3221 }
3222 }
3223 }
3224
3225 tmp = (sljit_uw)(argw & (sign | mask));
3226 tmp = (sljit_uw)((argw + ((tmp <= (sljit_uw)max_offset || tmp == (sljit_uw)sign) ? 0 : sign)) & ~mask);
3227 *memw = argw - (sljit_sw)tmp;
3228
3229 FAIL_IF(load_immediate(compiler, TMP_REG1, tmp));
3230
3231 if (arg == 0)
3232 return SLJIT_SUCCESS;
3233
3234 return push_inst(compiler, ADD | RD(TMP_REG1) | RN(TMP_REG1) | RM(arg));
3235 }
3236
3237 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
3238
sljit_emit_mem_unaligned(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)3239 static sljit_s32 sljit_emit_mem_unaligned(struct sljit_compiler *compiler, sljit_s32 type,
3240 sljit_s32 reg,
3241 sljit_s32 mem, sljit_sw memw)
3242 {
3243 sljit_s32 flags, steps, tmp_reg;
3244 sljit_uw add, shift;
3245
3246 switch (type & 0xff) {
3247 case SLJIT_MOV_U8:
3248 case SLJIT_MOV_S8:
3249 flags = BYTE_SIZE;
3250 if (!(type & SLJIT_MEM_STORE))
3251 flags |= LOAD_DATA;
3252 if ((type & 0xff) == SLJIT_MOV_S8)
3253 flags |= SIGNED;
3254
3255 return emit_op_mem(compiler, flags, reg, mem, memw, TMP_REG1);
3256
3257 case SLJIT_MOV_U16:
3258 FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 1));
3259 flags = BYTE_SIZE;
3260 steps = 1;
3261 break;
3262
3263 case SLJIT_MOV_S16:
3264 FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xff - 1));
3265 flags = BYTE_SIZE | SIGNED;
3266 steps = 1;
3267 break;
3268
3269 default:
3270 if (type & SLJIT_MEM_UNALIGNED_32) {
3271 flags = WORD_SIZE;
3272 if (!(type & SLJIT_MEM_STORE))
3273 flags |= LOAD_DATA;
3274
3275 return emit_op_mem(compiler, flags, reg, mem, memw, TMP_REG1);
3276 }
3277
3278 if (!(type & SLJIT_MEM_UNALIGNED_16)) {
3279 FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 3));
3280 flags = BYTE_SIZE;
3281 steps = 3;
3282 break;
3283 }
3284
3285 FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xff - 2));
3286
3287 add = 1;
3288 if (memw < 0) {
3289 add = 0;
3290 memw = -memw;
3291 }
3292
3293 tmp_reg = reg;
3294
3295 if (type & SLJIT_MEM_STORE) {
3296 FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(HALF_SIZE, add, reg, mem, TYPE2_TRANSFER_IMM(memw))));
3297 FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG2) | RM(reg) | (16 << 7) | (2 << 4)));
3298 } else {
3299 if (reg == mem) {
3300 SLJIT_ASSERT(reg != TMP_REG1);
3301 tmp_reg = TMP_REG1;
3302 }
3303
3304 FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(HALF_SIZE | LOAD_DATA, add, tmp_reg, mem, TYPE2_TRANSFER_IMM(memw))));
3305 }
3306
3307 if (!add) {
3308 memw -= 2;
3309 if (memw <= 0) {
3310 memw = -memw;
3311 add = 1;
3312 }
3313 } else
3314 memw += 2;
3315
3316 if (type & SLJIT_MEM_STORE)
3317 return push_inst(compiler, EMIT_DATA_TRANSFER(HALF_SIZE, add, TMP_REG2, mem, TYPE2_TRANSFER_IMM(memw)));
3318
3319 FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(HALF_SIZE | LOAD_DATA, add, TMP_REG2, mem, TYPE2_TRANSFER_IMM(memw))));
3320 return push_inst(compiler, ORR | RD(reg) | RN(tmp_reg) | RM(TMP_REG2) | (16 << 7));
3321 }
3322
3323 SLJIT_ASSERT(steps > 0);
3324
3325 add = 1;
3326 if (memw < 0) {
3327 add = 0;
3328 memw = -memw;
3329 }
3330
3331 if (type & SLJIT_MEM_STORE) {
3332 FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(BYTE_SIZE, add, reg, mem, memw)));
3333 FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG2) | RM(reg) | (8 << 7) | (2 << 4)));
3334
3335 while (1) {
3336 if (!add) {
3337 memw -= 1;
3338 if (memw == 0)
3339 add = 1;
3340 } else
3341 memw += 1;
3342
3343 FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(BYTE_SIZE, add, TMP_REG2, mem, memw)));
3344
3345 if (--steps == 0)
3346 return SLJIT_SUCCESS;
3347
3348 FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG2) | RM(TMP_REG2) | (8 << 7) | (2 << 4)));
3349 }
3350 }
3351
3352 tmp_reg = reg;
3353
3354 if (reg == mem) {
3355 SLJIT_ASSERT(reg != TMP_REG1);
3356 tmp_reg = TMP_REG1;
3357 }
3358
3359 shift = 8;
3360 FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(BYTE_SIZE | LOAD_DATA, add, tmp_reg, mem, memw)));
3361
3362 do {
3363 if (!add) {
3364 memw -= 1;
3365 if (memw == 0)
3366 add = 1;
3367 } else
3368 memw += 1;
3369
3370 if (steps > 1) {
3371 FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(BYTE_SIZE | LOAD_DATA, add, TMP_REG2, mem, memw)));
3372 FAIL_IF(push_inst(compiler, ORR | RD(tmp_reg) | RN(tmp_reg) | RM(TMP_REG2) | (shift << 7)));
3373 shift += 8;
3374 }
3375 } while (--steps != 0);
3376
3377 flags |= LOAD_DATA;
3378
3379 if (flags & SIGNED)
3380 FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(flags, add, TMP_REG2, mem, TYPE2_TRANSFER_IMM(memw))));
3381 else
3382 FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(flags, add, TMP_REG2, mem, memw)));
3383
3384 return push_inst(compiler, ORR | RD(reg) | RN(tmp_reg) | RM(TMP_REG2) | (shift << 7));
3385 }
3386
3387 #endif /* SLJIT_CONFIG_ARM_V5 */
3388
sljit_emit_mem(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)3389 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
3390 sljit_s32 reg,
3391 sljit_s32 mem, sljit_sw memw)
3392 {
3393 sljit_s32 flags;
3394
3395 CHECK_ERROR();
3396 CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
3397
3398 if (!(reg & REG_PAIR_MASK)) {
3399 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
3400 ADJUST_LOCAL_OFFSET(mem, memw);
3401 #endif /* SLJIT_CONFIG_ARM_V5 */
3402
3403 return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
3404 }
3405
3406 ADJUST_LOCAL_OFFSET(mem, memw);
3407
3408 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
3409 if (type & (SLJIT_MEM_UNALIGNED | SLJIT_MEM_UNALIGNED_16)) {
3410 FAIL_IF(update_mem_addr(compiler, &mem, &memw, (type & SLJIT_MEM_UNALIGNED_16) ? 0xfff - 6 : 0xfff - 7));
3411
3412 if (!(type & SLJIT_MEM_STORE) && REG_PAIR_FIRST(reg) == (mem & REG_MASK)) {
3413 FAIL_IF(sljit_emit_mem_unaligned(compiler, type, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), memw + SSIZE_OF(sw)));
3414 return sljit_emit_mem_unaligned(compiler, type, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw);
3415 }
3416
3417 FAIL_IF(sljit_emit_mem_unaligned(compiler, type, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw));
3418 return sljit_emit_mem_unaligned(compiler, type, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), memw + SSIZE_OF(sw));
3419 }
3420 #endif /* SLJIT_CONFIG_ARM_V5 */
3421
3422 FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4));
3423
3424 flags = WORD_SIZE;
3425
3426 if (!(type & SLJIT_MEM_STORE)) {
3427 if (REG_PAIR_FIRST(reg) == (mem & REG_MASK)) {
3428 FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), memw + SSIZE_OF(sw), TMP_REG1));
3429 return emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw, TMP_REG1);
3430 }
3431
3432 flags = WORD_SIZE | LOAD_DATA;
3433 }
3434
3435 FAIL_IF(emit_op_mem(compiler, flags, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw, TMP_REG1));
3436 return emit_op_mem(compiler, flags, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), memw + SSIZE_OF(sw), TMP_REG1);
3437 }
3438
sljit_emit_mem_update(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)3439 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *compiler, sljit_s32 type,
3440 sljit_s32 reg,
3441 sljit_s32 mem, sljit_sw memw)
3442 {
3443 sljit_s32 flags;
3444 sljit_uw is_type1_transfer, inst;
3445
3446 CHECK_ERROR();
3447 CHECK(check_sljit_emit_mem_update(compiler, type, reg, mem, memw));
3448
3449 is_type1_transfer = 1;
3450
3451 switch (type & 0xff) {
3452 case SLJIT_MOV:
3453 case SLJIT_MOV_U32:
3454 case SLJIT_MOV_S32:
3455 case SLJIT_MOV32:
3456 case SLJIT_MOV_P:
3457 flags = WORD_SIZE;
3458 break;
3459 case SLJIT_MOV_U8:
3460 flags = BYTE_SIZE;
3461 break;
3462 case SLJIT_MOV_S8:
3463 if (!(type & SLJIT_MEM_STORE))
3464 is_type1_transfer = 0;
3465 flags = BYTE_SIZE | SIGNED;
3466 break;
3467 case SLJIT_MOV_U16:
3468 is_type1_transfer = 0;
3469 flags = HALF_SIZE;
3470 break;
3471 case SLJIT_MOV_S16:
3472 is_type1_transfer = 0;
3473 flags = HALF_SIZE | SIGNED;
3474 break;
3475 default:
3476 SLJIT_UNREACHABLE();
3477 flags = WORD_SIZE;
3478 break;
3479 }
3480
3481 if (!(type & SLJIT_MEM_STORE))
3482 flags |= LOAD_DATA;
3483
3484 SLJIT_ASSERT(is_type1_transfer == !!IS_TYPE1_TRANSFER(flags));
3485
3486 if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
3487 if (!is_type1_transfer && memw != 0)
3488 return SLJIT_ERR_UNSUPPORTED;
3489 } else {
3490 if (is_type1_transfer) {
3491 if (memw > 4095 || memw < -4095)
3492 return SLJIT_ERR_UNSUPPORTED;
3493 } else if (memw > 255 || memw < -255)
3494 return SLJIT_ERR_UNSUPPORTED;
3495 }
3496
3497 if (type & SLJIT_MEM_SUPP)
3498 return SLJIT_SUCCESS;
3499
3500 if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
3501 memw &= 0x3;
3502
3503 inst = EMIT_DATA_TRANSFER(flags, 1, reg, mem & REG_MASK, RM(OFFS_REG(mem)) | ((sljit_uw)memw << 7));
3504
3505 if (is_type1_transfer)
3506 inst |= (1 << 25);
3507
3508 if (type & SLJIT_MEM_POST)
3509 inst ^= (1 << 24);
3510 else
3511 inst |= (1 << 21);
3512
3513 return push_inst(compiler, inst);
3514 }
3515
3516 inst = EMIT_DATA_TRANSFER(flags, 0, reg, mem & REG_MASK, 0);
3517
3518 if (type & SLJIT_MEM_POST)
3519 inst ^= (1 << 24);
3520 else
3521 inst |= (1 << 21);
3522
3523 if (is_type1_transfer) {
3524 if (memw >= 0)
3525 inst |= (1 << 23);
3526 else
3527 memw = -memw;
3528
3529 return push_inst(compiler, inst | (sljit_uw)memw);
3530 }
3531
3532 if (memw >= 0)
3533 inst |= (1 << 23);
3534 else
3535 memw = -memw;
3536
3537 return push_inst(compiler, inst | TYPE2_TRANSFER_IMM((sljit_uw)memw));
3538 }
3539
sljit_emit_fmem(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 mem,sljit_sw memw)3540 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type,
3541 sljit_s32 freg,
3542 sljit_s32 mem, sljit_sw memw)
3543 {
3544 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
3545 sljit_s32 max_offset;
3546 sljit_s32 dst;
3547 #endif /* SLJIT_CONFIG_ARM_V5 */
3548
3549 CHECK_ERROR();
3550 CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw));
3551
3552 if (type & SLJIT_MEM_UNALIGNED_32)
3553 return emit_fop_mem(compiler, ((type ^ SLJIT_32) & SLJIT_32) | ((type & SLJIT_MEM_STORE) ? 0 : FPU_LOAD), freg, mem, memw);
3554
3555 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
3556 if (type & SLJIT_MEM_STORE) {
3557 FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(freg) | RD(TMP_REG2)));
3558
3559 if (type & SLJIT_32)
3560 return sljit_emit_mem_unaligned(compiler, SLJIT_MOV | SLJIT_MEM_STORE | (type & SLJIT_MEM_UNALIGNED_16), TMP_REG2, mem, memw);
3561
3562 max_offset = 0xfff - 7;
3563 if (type & SLJIT_MEM_UNALIGNED_16)
3564 max_offset++;
3565
3566 FAIL_IF(update_mem_addr(compiler, &mem, &memw, max_offset));
3567 mem |= SLJIT_MEM;
3568
3569 FAIL_IF(sljit_emit_mem_unaligned(compiler, SLJIT_MOV | SLJIT_MEM_STORE | (type & SLJIT_MEM_UNALIGNED_16), TMP_REG2, mem, memw));
3570
3571 FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(freg) | 0x80 | RD(TMP_REG2)));
3572 return sljit_emit_mem_unaligned(compiler, SLJIT_MOV | SLJIT_MEM_STORE | (type & SLJIT_MEM_UNALIGNED_16), TMP_REG2, mem, memw + 4);
3573 }
3574
3575 max_offset = (type & SLJIT_32) ? 0xfff - 3 : 0xfff - 7;
3576 if (type & SLJIT_MEM_UNALIGNED_16)
3577 max_offset++;
3578
3579 FAIL_IF(update_mem_addr(compiler, &mem, &memw, max_offset));
3580
3581 dst = TMP_REG1;
3582
3583 /* Stack offset adjustment is not needed because dst
3584 is not stored on the stack when mem is SLJIT_SP. */
3585
3586 if (mem == TMP_REG1) {
3587 dst = SLJIT_R3;
3588
3589 if (compiler->scratches >= 4)
3590 FAIL_IF(push_inst(compiler, STR | (1 << 21) | RN(SLJIT_SP) | RD(SLJIT_R3) | 8));
3591 }
3592
3593 mem |= SLJIT_MEM;
3594
3595 FAIL_IF(sljit_emit_mem_unaligned(compiler, SLJIT_MOV | (type & SLJIT_MEM_UNALIGNED_16), dst, mem, memw));
3596 FAIL_IF(push_inst(compiler, VMOV | VN(freg) | RD(dst)));
3597
3598 if (!(type & SLJIT_32)) {
3599 FAIL_IF(sljit_emit_mem_unaligned(compiler, SLJIT_MOV | (type & SLJIT_MEM_UNALIGNED_16), dst, mem, memw + 4));
3600 FAIL_IF(push_inst(compiler, VMOV | VN(freg) | 0x80 | RD(dst)));
3601 }
3602
3603 if (dst == SLJIT_R3 && compiler->scratches >= 4)
3604 FAIL_IF(push_inst(compiler, (LDR ^ (0x1 << 24)) | (0x1 << 23) | RN(SLJIT_SP) | RD(SLJIT_R3) | 8));
3605 return SLJIT_SUCCESS;
3606 #else /* !SLJIT_CONFIG_ARM_V5 */
3607 if (type & SLJIT_MEM_STORE) {
3608 FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(freg) | RD(TMP_REG2)));
3609
3610 if (type & SLJIT_32)
3611 return emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1);
3612
3613 FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4));
3614 mem |= SLJIT_MEM;
3615
3616 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1));
3617 FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(freg) | 0x80 | RD(TMP_REG2)));
3618 return emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw + 4, TMP_REG1);
3619 }
3620
3621 if (type & SLJIT_32) {
3622 FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, mem, memw, TMP_REG1));
3623 return push_inst(compiler, VMOV | VN(freg) | RD(TMP_REG2));
3624 }
3625
3626 FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4));
3627 mem |= SLJIT_MEM;
3628
3629 FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, mem, memw, TMP_REG1));
3630 FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, mem, memw + 4, TMP_REG1));
3631 return push_inst(compiler, VMOV2 | VM(freg) | RD(TMP_REG2) | RN(TMP_REG1));
3632 #endif /* SLJIT_CONFIG_ARM_V5 */
3633 }
3634
3635 #undef FPU_LOAD
3636
sljit_emit_const(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_sw init_value)3637 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
3638 {
3639 struct sljit_const *const_;
3640 sljit_s32 dst_r;
3641
3642 CHECK_ERROR_PTR();
3643 CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
3644 ADJUST_LOCAL_OFFSET(dst, dstw);
3645
3646 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
3647
3648 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
3649 PTR_FAIL_IF(push_inst_with_unique_literal(compiler,
3650 EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), (sljit_uw)init_value));
3651 compiler->patches++;
3652 #else
3653 PTR_FAIL_IF(emit_imm(compiler, dst_r, init_value));
3654 #endif
3655
3656 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
3657 PTR_FAIL_IF(!const_);
3658 set_const(const_, compiler);
3659
3660 if (dst & SLJIT_MEM)
3661 PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1));
3662 return const_;
3663 }
3664
sljit_emit_put_label(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)3665 SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
3666 {
3667 struct sljit_put_label *put_label;
3668 sljit_s32 dst_r;
3669
3670 CHECK_ERROR_PTR();
3671 CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
3672 ADJUST_LOCAL_OFFSET(dst, dstw);
3673
3674 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
3675
3676 #if (defined SLJIT_CONFIG_ARM_V5 && SLJIT_CONFIG_ARM_V5)
3677 PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), 0));
3678 compiler->patches++;
3679 #else
3680 PTR_FAIL_IF(emit_imm(compiler, dst_r, 0));
3681 #endif
3682
3683 put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
3684 PTR_FAIL_IF(!put_label);
3685 set_put_label(put_label, compiler, 0);
3686
3687 if (dst & SLJIT_MEM)
3688 PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1));
3689 return put_label;
3690 }
3691
sljit_set_jump_addr(sljit_uw addr,sljit_uw new_target,sljit_sw executable_offset)3692 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
3693 {
3694 inline_set_jump_addr(addr, executable_offset, new_target, 1);
3695 }
3696
sljit_set_const(sljit_uw addr,sljit_sw new_constant,sljit_sw executable_offset)3697 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
3698 {
3699 inline_set_const(addr, executable_offset, (sljit_uw)new_constant, 1);
3700 }
3701