1 /*
2  *    Stack-less Just-In-Time compiler
3  *
4  *    Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without modification, are
7  * permitted provided that the following conditions are met:
8  *
9  *   1. Redistributions of source code must retain the above copyright notice, this list of
10  *      conditions and the following disclaimer.
11  *
12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13  *      of conditions and the following disclaimer in the documentation and/or other materials
14  *      provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #ifdef __SOFTFP__
28 #define ARM_ABI_INFO " ABI:softfp"
29 #else
30 #define ARM_ABI_INFO " ABI:hardfp"
31 #endif
32 
sljit_get_platform_name(void)33 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
34 {
35 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
36 	return "ARMv7" SLJIT_CPUINFO ARM_ABI_INFO;
37 #elif (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
38 	return "ARMv6" SLJIT_CPUINFO ARM_ABI_INFO;
39 #else
40 #error "Internal error: Unknown ARM architecture"
41 #endif
42 }
43 
44 /* Length of an instruction word. */
45 typedef sljit_u32 sljit_ins;
46 
47 /* Last register + 1. */
48 #define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
49 #define TMP_REG2	(SLJIT_NUMBER_OF_REGISTERS + 3)
50 #define TMP_PC		(SLJIT_NUMBER_OF_REGISTERS + 4)
51 
52 #define TMP_FREG1	(SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
53 #define TMP_FREG2	(SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
54 
55 /* In ARM instruction words.
56    Cache lines are usually 32 byte aligned. */
57 #define CONST_POOL_ALIGNMENT	8
58 #define CONST_POOL_EMPTY	0xffffffff
59 
60 #define ALIGN_INSTRUCTION(ptr) \
61 	(sljit_ins*)(((sljit_ins)(ptr) + (CONST_POOL_ALIGNMENT * sizeof(sljit_ins)) - 1) & ~((CONST_POOL_ALIGNMENT * sizeof(sljit_ins)) - 1))
62 #define MAX_DIFFERENCE(max_diff) \
63 	(((max_diff) / (sljit_s32)sizeof(sljit_ins)) - (CONST_POOL_ALIGNMENT - 1))
64 
65 /* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */
66 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
67 	0, 0, 1, 2, 3, 11, 10, 9, 8, 7, 6, 5, 4, 13, 12, 14, 15
68 };
69 
70 static const sljit_u8 freg_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) + 1] = {
71 	0,
72 	0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8,
73 	7, 6,
74 	0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8,
75 	7, 6
76 };
77 
78 static const sljit_u8 freg_ebit_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) + 1] = {
79 	0,
80 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
81 	0, 0,
82 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
83 	1, 1
84 };
85 
86 #define RM(rm) ((sljit_ins)reg_map[rm])
87 #define RM8(rm) ((sljit_ins)reg_map[rm] << 8)
88 #define RD(rd) ((sljit_ins)reg_map[rd] << 12)
89 #define RN(rn) ((sljit_ins)reg_map[rn] << 16)
90 
91 #define VM(vm) (((sljit_ins)freg_map[vm]) | ((sljit_ins)freg_ebit_map[vm] << 5))
92 #define VD(vd) (((sljit_ins)freg_map[vd] << 12) | ((sljit_ins)freg_ebit_map[vd] << 22))
93 #define VN(vn) (((sljit_ins)freg_map[vn] << 16) | ((sljit_ins)freg_ebit_map[vn] << 7))
94 
95 /* --------------------------------------------------------------------- */
96 /*  Instrucion forms                                                     */
97 /* --------------------------------------------------------------------- */
98 
99 /* The instruction includes the AL condition.
100    INST_NAME - CONDITIONAL remove this flag. */
101 #define COND_MASK	0xf0000000
102 #define CONDITIONAL	0xe0000000
103 #define PUSH_POOL	0xff000000
104 
105 #define ADC		0xe0a00000
106 #define ADD		0xe0800000
107 #define AND		0xe0000000
108 #define B		0xea000000
109 #define BIC		0xe1c00000
110 #define BKPT		0xe1200070
111 #define BL		0xeb000000
112 #define BLX		0xe12fff30
113 #define BX		0xe12fff10
114 #define CLZ		0xe16f0f10
115 #define CMN		0xe1600000
116 #define CMP		0xe1400000
117 #define EOR		0xe0200000
118 #define LDR		0xe5100000
119 #define LDR_POST	0xe4100000
120 #define LDREX		0xe1900f9f
121 #define LDREXB		0xe1d00f9f
122 #define LDREXH		0xe1f00f9f
123 #define MOV		0xe1a00000
124 #define MUL		0xe0000090
125 #define MVN		0xe1e00000
126 #define NOP		0xe1a00000
127 #define ORR		0xe1800000
128 #define PUSH		0xe92d0000
129 #define POP		0xe8bd0000
130 #define REV		0xe6bf0f30
131 #define REV16		0xe6bf0fb0
132 #define RSB		0xe0600000
133 #define RSC		0xe0e00000
134 #define SBC		0xe0c00000
135 #define SMULL		0xe0c00090
136 #define STR		0xe5000000
137 #define STREX		0xe1800f90
138 #define STREXB		0xe1c00f90
139 #define STREXH		0xe1e00f90
140 #define SUB		0xe0400000
141 #define SXTB		0xe6af0070
142 #define SXTH		0xe6bf0070
143 #define TST		0xe1000000
144 #define UMULL		0xe0800090
145 #define UXTB		0xe6ef0070
146 #define UXTH		0xe6ff0070
147 #define VABS_F32	0xeeb00ac0
148 #define VADD_F32	0xee300a00
149 #define VAND		0xf2000110
150 #define VCMP_F32	0xeeb40a40
151 #define VCVT_F32_S32	0xeeb80ac0
152 #define VCVT_F32_U32	0xeeb80a40
153 #define VCVT_F64_F32	0xeeb70ac0
154 #define VCVT_S32_F32	0xeebd0ac0
155 #define VDIV_F32	0xee800a00
156 #define VDUP		0xee800b10
157 #define VDUP_s		0xf3b00c00
158 #define VEOR		0xf3000110
159 #define VLD1		0xf4200000
160 #define VLD1_r		0xf4a00c00
161 #define VLD1_s		0xf4a00000
162 #define VLDR_F32	0xed100a00
163 #define VMOV_F32	0xeeb00a40
164 #define VMOV		0xee000a10
165 #define VMOV2		0xec400a10
166 #define VMOV_i		0xf2800010
167 #define VMOV_s		0xee000b10
168 #define VMOVN		0xf3b20200
169 #define VMRS		0xeef1fa10
170 #define VMUL_F32	0xee200a00
171 #define VNEG_F32	0xeeb10a40
172 #define VORR		0xf2200110
173 #define VPOP		0xecbd0b00
174 #define VPUSH		0xed2d0b00
175 #define VSHLL		0xf2800a10
176 #define VSHR		0xf2800010
177 #define VSRA		0xf2800110
178 #define VST1		0xf4000000
179 #define VST1_s		0xf4800000
180 #define VSTR_F32	0xed000a00
181 #define VSUB_F32	0xee300a40
182 
183 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
184 /* Arm v7 specific instructions. */
185 #define MOVT		0xe3400000
186 #define MOVW		0xe3000000
187 #define RBIT		0xe6ff0f30
188 #endif
189 
190 #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
191 
function_check_is_freg(struct sljit_compiler * compiler,sljit_s32 fr,sljit_s32 is_32)192 static sljit_s32 function_check_is_freg(struct sljit_compiler *compiler, sljit_s32 fr, sljit_s32 is_32)
193 {
194 	if (compiler->scratches == -1)
195 		return 0;
196 
197 	if (is_32 && fr >= SLJIT_F64_SECOND(SLJIT_FR0))
198 		fr -= SLJIT_F64_SECOND(0);
199 
200 	return (fr >= SLJIT_FR0 && fr < (SLJIT_FR0 + compiler->fscratches))
201 		|| (fr > (SLJIT_FS0 - compiler->fsaveds) && fr <= SLJIT_FS0)
202 		|| (fr >= SLJIT_TMP_FREGISTER_BASE && fr < (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS));
203 }
204 
205 #endif /* SLJIT_ARGUMENT_CHECKS */
206 
207 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
208 
push_cpool(struct sljit_compiler * compiler)209 static sljit_s32 push_cpool(struct sljit_compiler *compiler)
210 {
211 	/* Pushing the constant pool into the instruction stream. */
212 	sljit_ins* inst;
213 	sljit_uw* cpool_ptr;
214 	sljit_uw* cpool_end;
215 	sljit_s32 i;
216 
217 	/* The label could point the address after the constant pool. */
218 	if (compiler->last_label && compiler->last_label->size == compiler->size)
219 		compiler->last_label->size += compiler->cpool_fill + (CONST_POOL_ALIGNMENT - 1) + 1;
220 
221 	SLJIT_ASSERT(compiler->cpool_fill > 0 && compiler->cpool_fill <= CPOOL_SIZE);
222 	inst = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
223 	FAIL_IF(!inst);
224 	compiler->size++;
225 	*inst = 0xff000000 | compiler->cpool_fill;
226 
227 	for (i = 0; i < CONST_POOL_ALIGNMENT - 1; i++) {
228 		inst = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
229 		FAIL_IF(!inst);
230 		compiler->size++;
231 		*inst = 0;
232 	}
233 
234 	cpool_ptr = compiler->cpool;
235 	cpool_end = cpool_ptr + compiler->cpool_fill;
236 	while (cpool_ptr < cpool_end) {
237 		inst = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
238 		FAIL_IF(!inst);
239 		compiler->size++;
240 		*inst = *cpool_ptr++;
241 	}
242 	compiler->cpool_diff = CONST_POOL_EMPTY;
243 	compiler->cpool_fill = 0;
244 	return SLJIT_SUCCESS;
245 }
246 
push_inst(struct sljit_compiler * compiler,sljit_ins inst)247 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins inst)
248 {
249 	sljit_ins* ptr;
250 
251 	if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)))
252 		FAIL_IF(push_cpool(compiler));
253 
254 	ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
255 	FAIL_IF(!ptr);
256 	compiler->size++;
257 	*ptr = inst;
258 	return SLJIT_SUCCESS;
259 }
260 
push_inst_with_literal(struct sljit_compiler * compiler,sljit_ins inst,sljit_uw literal)261 static sljit_s32 push_inst_with_literal(struct sljit_compiler *compiler, sljit_ins inst, sljit_uw literal)
262 {
263 	sljit_ins* ptr;
264 	sljit_uw cpool_index = CPOOL_SIZE;
265 	sljit_uw* cpool_ptr;
266 	sljit_uw* cpool_end;
267 	sljit_u8* cpool_unique_ptr;
268 
269 	if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)))
270 		FAIL_IF(push_cpool(compiler));
271 	else if (compiler->cpool_fill > 0) {
272 		cpool_ptr = compiler->cpool;
273 		cpool_end = cpool_ptr + compiler->cpool_fill;
274 		cpool_unique_ptr = compiler->cpool_unique;
275 		do {
276 			if ((*cpool_ptr == literal) && !(*cpool_unique_ptr)) {
277 				cpool_index = (sljit_uw)(cpool_ptr - compiler->cpool);
278 				break;
279 			}
280 			cpool_ptr++;
281 			cpool_unique_ptr++;
282 		} while (cpool_ptr < cpool_end);
283 	}
284 
285 	if (cpool_index == CPOOL_SIZE) {
286 		/* Must allocate a new entry in the literal pool. */
287 		if (compiler->cpool_fill < CPOOL_SIZE) {
288 			cpool_index = compiler->cpool_fill;
289 			compiler->cpool_fill++;
290 		}
291 		else {
292 			FAIL_IF(push_cpool(compiler));
293 			cpool_index = 0;
294 			compiler->cpool_fill = 1;
295 		}
296 	}
297 
298 	SLJIT_ASSERT((inst & 0xfff) == 0);
299 	ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
300 	FAIL_IF(!ptr);
301 	compiler->size++;
302 	*ptr = inst | cpool_index;
303 
304 	compiler->cpool[cpool_index] = literal;
305 	compiler->cpool_unique[cpool_index] = 0;
306 	if (compiler->cpool_diff == CONST_POOL_EMPTY)
307 		compiler->cpool_diff = compiler->size;
308 	return SLJIT_SUCCESS;
309 }
310 
push_inst_with_unique_literal(struct sljit_compiler * compiler,sljit_ins inst,sljit_uw literal)311 static sljit_s32 push_inst_with_unique_literal(struct sljit_compiler *compiler, sljit_ins inst, sljit_uw literal)
312 {
313 	sljit_ins* ptr;
314 
315 	if (SLJIT_UNLIKELY((compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)) || compiler->cpool_fill >= CPOOL_SIZE))
316 		FAIL_IF(push_cpool(compiler));
317 
318 	SLJIT_ASSERT(compiler->cpool_fill < CPOOL_SIZE && (inst & 0xfff) == 0);
319 	ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
320 	FAIL_IF(!ptr);
321 	compiler->size++;
322 	*ptr = inst | compiler->cpool_fill;
323 
324 	compiler->cpool[compiler->cpool_fill] = literal;
325 	compiler->cpool_unique[compiler->cpool_fill] = 1;
326 	compiler->cpool_fill++;
327 	if (compiler->cpool_diff == CONST_POOL_EMPTY)
328 		compiler->cpool_diff = compiler->size;
329 	return SLJIT_SUCCESS;
330 }
331 
prepare_blx(struct sljit_compiler * compiler)332 static SLJIT_INLINE sljit_s32 prepare_blx(struct sljit_compiler *compiler)
333 {
334 	/* Place for at least two instruction (doesn't matter whether the first has a literal). */
335 	if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4088)))
336 		return push_cpool(compiler);
337 	return SLJIT_SUCCESS;
338 }
339 
emit_blx(struct sljit_compiler * compiler)340 static SLJIT_INLINE sljit_s32 emit_blx(struct sljit_compiler *compiler)
341 {
342 	/* Must follow tightly the previous instruction (to be able to convert it to bl instruction). */
343 	SLJIT_ASSERT(compiler->cpool_diff == CONST_POOL_EMPTY || compiler->size - compiler->cpool_diff < MAX_DIFFERENCE(4092));
344 	SLJIT_ASSERT(reg_map[TMP_REG1] != 14);
345 
346 	return push_inst(compiler, BLX | RM(TMP_REG1));
347 }
348 
patch_pc_relative_loads(sljit_uw * last_pc_patch,sljit_uw * code_ptr,sljit_uw * const_pool,sljit_uw cpool_size)349 static sljit_uw patch_pc_relative_loads(sljit_uw *last_pc_patch, sljit_uw *code_ptr, sljit_uw* const_pool, sljit_uw cpool_size)
350 {
351 	sljit_uw diff;
352 	sljit_uw ind;
353 	sljit_uw counter = 0;
354 	sljit_uw* clear_const_pool = const_pool;
355 	sljit_uw* clear_const_pool_end = const_pool + cpool_size;
356 
357 	SLJIT_ASSERT(const_pool - code_ptr <= CONST_POOL_ALIGNMENT);
358 	/* Set unused flag for all literals in the constant pool.
359 	   I.e.: unused literals can belong to branches, which can be encoded as B or BL.
360 	   We can "compress" the constant pool by discarding these literals. */
361 	while (clear_const_pool < clear_const_pool_end)
362 		*clear_const_pool++ = (sljit_uw)(-1);
363 
364 	while (last_pc_patch < code_ptr) {
365 		/* Data transfer instruction with Rn == r15. */
366 		if ((*last_pc_patch & 0x0e0f0000) == 0x040f0000) {
367 			diff = (sljit_uw)(const_pool - last_pc_patch);
368 			ind = (*last_pc_patch) & 0xfff;
369 
370 			/* Must be a load instruction with immediate offset. */
371 			SLJIT_ASSERT(ind < cpool_size && !(*last_pc_patch & (1 << 25)) && (*last_pc_patch & (1 << 20)));
372 			if ((sljit_s32)const_pool[ind] < 0) {
373 				const_pool[ind] = counter;
374 				ind = counter;
375 				counter++;
376 			}
377 			else
378 				ind = const_pool[ind];
379 
380 			SLJIT_ASSERT(diff >= 1);
381 			if (diff >= 2 || ind > 0) {
382 				diff = (diff + (sljit_uw)ind - 2) << 2;
383 				SLJIT_ASSERT(diff <= 0xfff);
384 				*last_pc_patch = (*last_pc_patch & ~(sljit_uw)0xfff) | diff;
385 			}
386 			else
387 				*last_pc_patch = (*last_pc_patch & ~(sljit_uw)(0xfff | (1 << 23))) | 0x004;
388 		}
389 		last_pc_patch++;
390 	}
391 	return counter;
392 }
393 
394 /* In some rare ocasions we may need future patches. The probability is close to 0 in practice. */
395 struct future_patch {
396 	struct future_patch* next;
397 	sljit_s32 index;
398 	sljit_s32 value;
399 };
400 
resolve_const_pool_index(struct sljit_compiler * compiler,struct future_patch ** first_patch,sljit_uw cpool_current_index,sljit_uw * cpool_start_address,sljit_uw * buf_ptr)401 static sljit_s32 resolve_const_pool_index(struct sljit_compiler *compiler, struct future_patch **first_patch, sljit_uw cpool_current_index, sljit_uw *cpool_start_address, sljit_uw *buf_ptr)
402 {
403 	sljit_u32 value;
404 	struct future_patch *curr_patch, *prev_patch;
405 
406 	SLJIT_UNUSED_ARG(compiler);
407 
408 	/* Using the values generated by patch_pc_relative_loads. */
409 	if (!*first_patch)
410 		value = cpool_start_address[cpool_current_index];
411 	else {
412 		curr_patch = *first_patch;
413 		prev_patch = NULL;
414 		while (1) {
415 			if (!curr_patch) {
416 				value = cpool_start_address[cpool_current_index];
417 				break;
418 			}
419 			if ((sljit_uw)curr_patch->index == cpool_current_index) {
420 				value = (sljit_uw)curr_patch->value;
421 				if (prev_patch)
422 					prev_patch->next = curr_patch->next;
423 				else
424 					*first_patch = curr_patch->next;
425 				SLJIT_FREE(curr_patch, compiler->allocator_data);
426 				break;
427 			}
428 			prev_patch = curr_patch;
429 			curr_patch = curr_patch->next;
430 		}
431 	}
432 
433 	if ((sljit_sw)value >= 0) {
434 		if (value > cpool_current_index) {
435 			curr_patch = (struct future_patch*)SLJIT_MALLOC(sizeof(struct future_patch), compiler->allocator_data);
436 			if (!curr_patch) {
437 				while (*first_patch) {
438 					curr_patch = *first_patch;
439 					*first_patch = (*first_patch)->next;
440 					SLJIT_FREE(curr_patch, compiler->allocator_data);
441 				}
442 				return SLJIT_ERR_ALLOC_FAILED;
443 			}
444 			curr_patch->next = *first_patch;
445 			curr_patch->index = (sljit_sw)value;
446 			curr_patch->value = (sljit_sw)cpool_start_address[value];
447 			*first_patch = curr_patch;
448 		}
449 		cpool_start_address[value] = *buf_ptr;
450 	}
451 	return SLJIT_SUCCESS;
452 }
453 
454 #else
455 
push_inst(struct sljit_compiler * compiler,sljit_ins inst)456 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins inst)
457 {
458 	sljit_ins* ptr;
459 
460 	ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
461 	FAIL_IF(!ptr);
462 	compiler->size++;
463 	*ptr = inst;
464 	return SLJIT_SUCCESS;
465 }
466 
emit_imm(struct sljit_compiler * compiler,sljit_s32 reg,sljit_sw imm)467 static SLJIT_INLINE sljit_s32 emit_imm(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm)
468 {
469 	FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | ((sljit_u32)imm & 0xfff)));
470 	return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | (((sljit_u32)imm >> 16) & 0xfff));
471 }
472 
473 #endif
474 
detect_jump_type(struct sljit_jump * jump,sljit_uw * code_ptr,sljit_uw * code,sljit_sw executable_offset)475 static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_uw *code_ptr, sljit_uw *code, sljit_sw executable_offset)
476 {
477 	sljit_sw diff;
478 
479 	if (jump->flags & SLJIT_REWRITABLE_JUMP)
480 		return 0;
481 
482 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
483 	if (jump->flags & IS_BL)
484 		code_ptr--;
485 
486 	if (jump->flags & JUMP_ADDR)
487 		diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2) - executable_offset);
488 	else {
489 		SLJIT_ASSERT(jump->flags & JUMP_LABEL);
490 		diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2));
491 	}
492 
493 	/* Branch to Thumb code has not been optimized yet. */
494 	if (diff & 0x3)
495 		return 0;
496 
497 	if (jump->flags & IS_BL) {
498 		if (diff <= 0x01ffffff && diff >= -0x02000000) {
499 			*code_ptr = (BL - CONDITIONAL) | (*(code_ptr + 1) & COND_MASK);
500 			jump->flags |= PATCH_B;
501 			return 1;
502 		}
503 	}
504 	else {
505 		if (diff <= 0x01ffffff && diff >= -0x02000000) {
506 			*code_ptr = (B - CONDITIONAL) | (*code_ptr & COND_MASK);
507 			jump->flags |= PATCH_B;
508 		}
509 	}
510 #else /* !SLJIT_CONFIG_ARM_V6 */
511 	if (jump->flags & JUMP_ADDR)
512 		diff = ((sljit_sw)jump->u.target - (sljit_sw)code_ptr - executable_offset);
513 	else {
514 		SLJIT_ASSERT(jump->flags & JUMP_LABEL);
515 		diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)code_ptr);
516 	}
517 
518 	/* Branch to Thumb code has not been optimized yet. */
519 	if (diff & 0x3)
520 		return 0;
521 
522 	if (diff <= 0x01ffffff && diff >= -0x02000000) {
523 		code_ptr -= 2;
524 		*code_ptr = ((jump->flags & IS_BL) ? (BL - CONDITIONAL) : (B - CONDITIONAL)) | (code_ptr[2] & COND_MASK);
525 		jump->flags |= PATCH_B;
526 		return 1;
527 	}
528 #endif /* SLJIT_CONFIG_ARM_V6 */
529 	return 0;
530 }
531 
inline_set_jump_addr(sljit_uw jump_ptr,sljit_sw executable_offset,sljit_uw new_addr,sljit_s32 flush_cache)532 static SLJIT_INLINE void inline_set_jump_addr(sljit_uw jump_ptr, sljit_sw executable_offset, sljit_uw new_addr, sljit_s32 flush_cache)
533 {
534 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
535 	sljit_ins *ptr = (sljit_ins*)jump_ptr;
536 	sljit_ins *inst = (sljit_ins*)ptr[0];
537 	sljit_ins mov_pc = ptr[1];
538 	sljit_s32 bl = (mov_pc & 0x0000f000) != RD(TMP_PC);
539 	sljit_sw diff = (sljit_sw)(((sljit_sw)new_addr - (sljit_sw)(inst + 2) - executable_offset) >> 2);
540 
541 	SLJIT_UNUSED_ARG(executable_offset);
542 
543 	if (diff <= 0x7fffff && diff >= -0x800000) {
544 		/* Turn to branch. */
545 		if (!bl) {
546 			if (flush_cache) {
547 				SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 0);
548 			}
549 			inst[0] = (mov_pc & COND_MASK) | (B - CONDITIONAL) | (diff & 0xffffff);
550 			if (flush_cache) {
551 				SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1);
552 				inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
553 				SLJIT_CACHE_FLUSH(inst, inst + 1);
554 			}
555 		} else {
556 			if (flush_cache) {
557 				SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0);
558 			}
559 			inst[0] = (mov_pc & COND_MASK) | (BL - CONDITIONAL) | (diff & 0xffffff);
560 			inst[1] = NOP;
561 			if (flush_cache) {
562 				SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1);
563 				inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
564 				SLJIT_CACHE_FLUSH(inst, inst + 2);
565 			}
566 		}
567 	} else {
568 		/* Get the position of the constant. */
569 		if (mov_pc & (1 << 23))
570 			ptr = inst + ((mov_pc & 0xfff) >> 2) + 2;
571 		else
572 			ptr = inst + 1;
573 
574 		if (*inst != mov_pc) {
575 			if (flush_cache) {
576 				SLJIT_UPDATE_WX_FLAGS(inst, inst + (!bl ? 1 : 2), 0);
577 			}
578 			inst[0] = mov_pc;
579 			if (!bl) {
580 				if (flush_cache) {
581 					SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1);
582 					inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
583 					SLJIT_CACHE_FLUSH(inst, inst + 1);
584 				}
585 			} else {
586 				inst[1] = BLX | RM(TMP_REG1);
587 				if (flush_cache) {
588 					SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1);
589 					inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
590 					SLJIT_CACHE_FLUSH(inst, inst + 2);
591 				}
592 			}
593 		}
594 
595 		if (flush_cache) {
596 			SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 0);
597 		}
598 
599 		*ptr = new_addr;
600 
601 		if (flush_cache) {
602 			SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1);
603 		}
604 	}
605 #else /* !SLJIT_CONFIG_ARM_V6 */
606 	sljit_ins *inst = (sljit_ins*)jump_ptr;
607 
608 	SLJIT_UNUSED_ARG(executable_offset);
609 
610 	SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT);
611 
612 	if (flush_cache) {
613 		SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0);
614 	}
615 
616 	inst[0] = MOVW | (inst[0] & 0xf000) | ((new_addr << 4) & 0xf0000) | (new_addr & 0xfff);
617 	inst[1] = MOVT | (inst[1] & 0xf000) | ((new_addr >> 12) & 0xf0000) | ((new_addr >> 16) & 0xfff);
618 
619 	if (flush_cache) {
620 		SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1);
621 		inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
622 		SLJIT_CACHE_FLUSH(inst, inst + 2);
623 	}
624 #endif /* SLJIT_CONFIG_ARM_V6 */
625 }
626 
627 static sljit_uw get_imm(sljit_uw imm);
628 static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm);
629 static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg);
630 
inline_set_const(sljit_uw addr,sljit_sw executable_offset,sljit_uw new_constant,sljit_s32 flush_cache)631 static SLJIT_INLINE void inline_set_const(sljit_uw addr, sljit_sw executable_offset, sljit_uw new_constant, sljit_s32 flush_cache)
632 {
633 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
634 	sljit_ins *ptr = (sljit_ins*)addr;
635 	sljit_ins *inst = (sljit_ins*)ptr[0];
636 	sljit_uw ldr_literal = ptr[1];
637 	sljit_uw src2;
638 
639 	SLJIT_UNUSED_ARG(executable_offset);
640 
641 	src2 = get_imm(new_constant);
642 	if (src2) {
643 		if (flush_cache) {
644 			SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 0);
645 		}
646 
647 		*inst = 0xe3a00000 | (ldr_literal & 0xf000) | src2;
648 
649 		if (flush_cache) {
650 			SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1);
651 			inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
652 			SLJIT_CACHE_FLUSH(inst, inst + 1);
653 		}
654 		return;
655 	}
656 
657 	src2 = get_imm(~new_constant);
658 	if (src2) {
659 		if (flush_cache) {
660 			SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 0);
661 		}
662 
663 		*inst = 0xe3e00000 | (ldr_literal & 0xf000) | src2;
664 
665 		if (flush_cache) {
666 			SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1);
667 			inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
668 			SLJIT_CACHE_FLUSH(inst, inst + 1);
669 		}
670 		return;
671 	}
672 
673 	if (ldr_literal & (1 << 23))
674 		ptr = inst + ((ldr_literal & 0xfff) >> 2) + 2;
675 	else
676 		ptr = inst + 1;
677 
678 	if (*inst != ldr_literal) {
679 		if (flush_cache) {
680 			SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 0);
681 		}
682 
683 		*inst = ldr_literal;
684 
685 		if (flush_cache) {
686 			SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1);
687 			inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
688 			SLJIT_CACHE_FLUSH(inst, inst + 1);
689 		}
690 	}
691 
692 	if (flush_cache) {
693 		SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 0);
694 	}
695 
696 	*ptr = new_constant;
697 
698 	if (flush_cache) {
699 		SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1);
700 	}
701 #else /* !SLJIT_CONFIG_ARM_V6 */
702 	sljit_ins *inst = (sljit_ins*)addr;
703 
704 	SLJIT_UNUSED_ARG(executable_offset);
705 
706 	SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT);
707 
708 	if (flush_cache) {
709 		SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0);
710 	}
711 
712 	inst[0] = MOVW | (inst[0] & 0xf000) | ((new_constant << 4) & 0xf0000) | (new_constant & 0xfff);
713 	inst[1] = MOVT | (inst[1] & 0xf000) | ((new_constant >> 12) & 0xf0000) | ((new_constant >> 16) & 0xfff);
714 
715 	if (flush_cache) {
716 		SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1);
717 		inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
718 		SLJIT_CACHE_FLUSH(inst, inst + 2);
719 	}
720 #endif /* SLJIT_CONFIG_ARM_V6 */
721 }
722 
sljit_generate_code(struct sljit_compiler * compiler)723 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
724 {
725 	struct sljit_memory_fragment *buf;
726 	sljit_ins *code;
727 	sljit_ins *code_ptr;
728 	sljit_ins *buf_ptr;
729 	sljit_ins *buf_end;
730 	sljit_uw size;
731 	sljit_uw word_count;
732 	sljit_uw next_addr;
733 	sljit_sw executable_offset;
734 	sljit_uw addr;
735 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
736 	sljit_uw cpool_size;
737 	sljit_uw cpool_skip_alignment;
738 	sljit_uw cpool_current_index;
739 	sljit_ins *cpool_start_address;
740 	sljit_ins *last_pc_patch;
741 	struct future_patch *first_patch;
742 #endif
743 
744 	struct sljit_label *label;
745 	struct sljit_jump *jump;
746 	struct sljit_const *const_;
747 	struct sljit_put_label *put_label;
748 
749 	CHECK_ERROR_PTR();
750 	CHECK_PTR(check_sljit_generate_code(compiler));
751 	reverse_buf(compiler);
752 
753 	/* Second code generation pass. */
754 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
755 	size = compiler->size + (compiler->patches << 1);
756 	if (compiler->cpool_fill > 0)
757 		size += compiler->cpool_fill + CONST_POOL_ALIGNMENT - 1;
758 #else /* !SLJIT_CONFIG_ARM_V6 */
759 	size = compiler->size;
760 #endif /* SLJIT_CONFIG_ARM_V6 */
761 	code = (sljit_ins*)SLJIT_MALLOC_EXEC(size * sizeof(sljit_ins), compiler->exec_allocator_data);
762 	PTR_FAIL_WITH_EXEC_IF(code);
763 	buf = compiler->buf;
764 
765 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
766 	cpool_size = 0;
767 	cpool_skip_alignment = 0;
768 	cpool_current_index = 0;
769 	cpool_start_address = NULL;
770 	first_patch = NULL;
771 	last_pc_patch = code;
772 #endif /* SLJIT_CONFIG_ARM_V6 */
773 
774 	code_ptr = code;
775 	word_count = 0;
776 	next_addr = 1;
777 	executable_offset = SLJIT_EXEC_OFFSET(code);
778 
779 	label = compiler->labels;
780 	jump = compiler->jumps;
781 	const_ = compiler->consts;
782 	put_label = compiler->put_labels;
783 
784 	if (label && label->size == 0) {
785 		label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
786 		label = label->next;
787 	}
788 
789 	do {
790 		buf_ptr = (sljit_ins*)buf->memory;
791 		buf_end = buf_ptr + (buf->used_size >> 2);
792 		do {
793 			word_count++;
794 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
795 			if (cpool_size > 0) {
796 				if (cpool_skip_alignment > 0) {
797 					buf_ptr++;
798 					cpool_skip_alignment--;
799 				}
800 				else {
801 					if (SLJIT_UNLIKELY(resolve_const_pool_index(compiler, &first_patch, cpool_current_index, cpool_start_address, buf_ptr))) {
802 						SLJIT_FREE_EXEC(code, compiler->exec_allocator_data);
803 						compiler->error = SLJIT_ERR_ALLOC_FAILED;
804 						return NULL;
805 					}
806 					buf_ptr++;
807 					if (++cpool_current_index >= cpool_size) {
808 						SLJIT_ASSERT(!first_patch);
809 						cpool_size = 0;
810 						if (label && label->size == word_count) {
811 							/* Points after the current instruction. */
812 							label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
813 							label->size = (sljit_uw)(code_ptr - code);
814 							label = label->next;
815 
816 							next_addr = compute_next_addr(label, jump, const_, put_label);
817 						}
818 					}
819 				}
820 			}
821 			else if ((*buf_ptr & 0xff000000) != PUSH_POOL) {
822 #endif /* SLJIT_CONFIG_ARM_V6 */
823 				*code_ptr = *buf_ptr++;
824 				if (next_addr == word_count) {
825 					SLJIT_ASSERT(!label || label->size >= word_count);
826 					SLJIT_ASSERT(!jump || jump->addr >= word_count);
827 					SLJIT_ASSERT(!const_ || const_->addr >= word_count);
828 					SLJIT_ASSERT(!put_label || put_label->addr >= word_count);
829 
830 				/* These structures are ordered by their address. */
831 					if (jump && jump->addr == word_count) {
832 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
833 						if (detect_jump_type(jump, code_ptr, code, executable_offset))
834 							code_ptr--;
835 						jump->addr = (sljit_uw)code_ptr;
836 #else /* !SLJIT_CONFIG_ARM_V6 */
837 						jump->addr = (sljit_uw)(code_ptr - 2);
838 						if (detect_jump_type(jump, code_ptr, code, executable_offset))
839 							code_ptr -= 2;
840 #endif /* SLJIT_CONFIG_ARM_V6 */
841 						jump = jump->next;
842 					}
843 					if (label && label->size == word_count) {
844 						/* code_ptr can be affected above. */
845 						label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr + 1, executable_offset);
846 						label->size = (sljit_uw)((code_ptr + 1) - code);
847 						label = label->next;
848 					}
849 					if (const_ && const_->addr == word_count) {
850 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
851 						const_->addr = (sljit_uw)code_ptr;
852 #else /* !SLJIT_CONFIG_ARM_V6 */
853 						const_->addr = (sljit_uw)(code_ptr - 1);
854 #endif /* SLJIT_CONFIG_ARM_V6 */
855 						const_ = const_->next;
856 					}
857 					if (put_label && put_label->addr == word_count) {
858 						SLJIT_ASSERT(put_label->label);
859 						put_label->addr = (sljit_uw)code_ptr;
860 						put_label = put_label->next;
861 					}
862 					next_addr = compute_next_addr(label, jump, const_, put_label);
863 				}
864 				code_ptr++;
865 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
866 			} else {
867 				/* Fortunately, no need to shift. */
868 				cpool_size = *buf_ptr++ & ~PUSH_POOL;
869 				SLJIT_ASSERT(cpool_size > 0);
870 				cpool_start_address = ALIGN_INSTRUCTION(code_ptr + 1);
871 				cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, cpool_size);
872 				if (cpool_current_index > 0) {
873 					/* Unconditional branch. */
874 					*code_ptr = B | (((sljit_ins)(cpool_start_address - code_ptr) + cpool_current_index - 2) & ~PUSH_POOL);
875 					code_ptr = (sljit_ins*)(cpool_start_address + cpool_current_index);
876 				}
877 				cpool_skip_alignment = CONST_POOL_ALIGNMENT - 1;
878 				cpool_current_index = 0;
879 				last_pc_patch = code_ptr;
880 			}
881 #endif /* SLJIT_CONFIG_ARM_V6 */
882 		} while (buf_ptr < buf_end);
883 		buf = buf->next;
884 	} while (buf);
885 
886 	SLJIT_ASSERT(!label);
887 	SLJIT_ASSERT(!jump);
888 	SLJIT_ASSERT(!const_);
889 	SLJIT_ASSERT(!put_label);
890 
891 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
892 	SLJIT_ASSERT(cpool_size == 0);
893 	if (compiler->cpool_fill > 0) {
894 		cpool_start_address = ALIGN_INSTRUCTION(code_ptr);
895 		cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, compiler->cpool_fill);
896 		if (cpool_current_index > 0)
897 			code_ptr = (sljit_ins*)(cpool_start_address + cpool_current_index);
898 
899 		buf_ptr = compiler->cpool;
900 		buf_end = buf_ptr + compiler->cpool_fill;
901 		cpool_current_index = 0;
902 		while (buf_ptr < buf_end) {
903 			if (SLJIT_UNLIKELY(resolve_const_pool_index(compiler, &first_patch, cpool_current_index, cpool_start_address, buf_ptr))) {
904 				SLJIT_FREE_EXEC(code, compiler->exec_allocator_data);
905 				compiler->error = SLJIT_ERR_ALLOC_FAILED;
906 				return NULL;
907 			}
908 			buf_ptr++;
909 			cpool_current_index++;
910 		}
911 		SLJIT_ASSERT(!first_patch);
912 	}
913 #endif
914 
915 	jump = compiler->jumps;
916 	while (jump) {
917 		buf_ptr = (sljit_ins*)jump->addr;
918 
919 		if (jump->flags & PATCH_B) {
920 			addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset);
921 			if (!(jump->flags & JUMP_ADDR)) {
922 				SLJIT_ASSERT(jump->flags & JUMP_LABEL);
923 				SLJIT_ASSERT((sljit_sw)(jump->u.label->addr - addr) <= 0x01ffffff && (sljit_sw)(jump->u.label->addr - addr) >= -0x02000000);
924 				*buf_ptr |= ((jump->u.label->addr - addr) >> 2) & 0x00ffffff;
925 			}
926 			else {
927 				SLJIT_ASSERT((sljit_sw)(jump->u.target - addr) <= 0x01ffffff && (sljit_sw)(jump->u.target - addr) >= -0x02000000);
928 				*buf_ptr |= ((jump->u.target - addr) >> 2) & 0x00ffffff;
929 			}
930 		}
931 		else if (jump->flags & SLJIT_REWRITABLE_JUMP) {
932 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
933 			jump->addr = (sljit_uw)code_ptr;
934 			code_ptr[0] = (sljit_ins)buf_ptr;
935 			code_ptr[1] = *buf_ptr;
936 			inline_set_jump_addr((sljit_uw)code_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
937 			code_ptr += 2;
938 #else /* !SLJIT_CONFIG_ARM_V6 */
939 			inline_set_jump_addr((sljit_uw)buf_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
940 #endif /* SLJIT_CONFIG_ARM_V6 */
941 		} else {
942 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
943 			if (jump->flags & IS_BL)
944 				buf_ptr--;
945 			if (*buf_ptr & (1 << 23))
946 				buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2;
947 			else
948 				buf_ptr += 1;
949 			*buf_ptr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
950 #else /* !SLJIT_CONFIG_ARM_V6 */
951 			inline_set_jump_addr((sljit_uw)buf_ptr, executable_offset, (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target, 0);
952 #endif /* SLJIT_CONFIG_ARM_V6 */
953 		}
954 		jump = jump->next;
955 	}
956 
957 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
958 	const_ = compiler->consts;
959 	while (const_) {
960 		buf_ptr = (sljit_ins*)const_->addr;
961 		const_->addr = (sljit_uw)code_ptr;
962 
963 		code_ptr[0] = (sljit_ins)buf_ptr;
964 		code_ptr[1] = *buf_ptr;
965 		if (*buf_ptr & (1 << 23))
966 			buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2;
967 		else
968 			buf_ptr += 1;
969 		/* Set the value again (can be a simple constant). */
970 		inline_set_const((sljit_uw)code_ptr, executable_offset, *buf_ptr, 0);
971 		code_ptr += 2;
972 
973 		const_ = const_->next;
974 	}
975 #endif /* SLJIT_CONFIG_ARM_V6 */
976 
977 	put_label = compiler->put_labels;
978 	while (put_label) {
979 		addr = put_label->label->addr;
980 		buf_ptr = (sljit_ins*)put_label->addr;
981 
982 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
983 		SLJIT_ASSERT((buf_ptr[0] & 0xffff0000) == 0xe59f0000);
984 		buf_ptr[((buf_ptr[0] & 0xfff) >> 2) + 2] = addr;
985 #else /* !SLJIT_CONFIG_ARM_V6 */
986 		SLJIT_ASSERT((buf_ptr[-1] & 0xfff00000) == MOVW && (buf_ptr[0] & 0xfff00000) == MOVT);
987 		buf_ptr[-1] |= ((addr << 4) & 0xf0000) | (addr & 0xfff);
988 		buf_ptr[0] |= ((addr >> 12) & 0xf0000) | ((addr >> 16) & 0xfff);
989 #endif /* SLJIT_CONFIG_ARM_V6 */
990 		put_label = put_label->next;
991 	}
992 
993 	SLJIT_ASSERT(code_ptr - code <= (sljit_s32)size);
994 
995 	compiler->error = SLJIT_ERR_COMPILED;
996 	compiler->executable_offset = executable_offset;
997 	compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_uw);
998 
999 	code = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
1000 	code_ptr = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1001 
1002 	SLJIT_CACHE_FLUSH(code, code_ptr);
1003 	SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
1004 	return code;
1005 }
1006 
sljit_has_cpu_feature(sljit_s32 feature_type)1007 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
1008 {
1009 	switch (feature_type) {
1010 	case SLJIT_HAS_FPU:
1011 	case SLJIT_HAS_F64_AS_F32_PAIR:
1012 #ifdef SLJIT_IS_FPU_AVAILABLE
1013 		return (SLJIT_IS_FPU_AVAILABLE) != 0;
1014 #else
1015 		/* Available by default. */
1016 		return 1;
1017 #endif /* SLJIT_IS_FPU_AVAILABLE */
1018 	case SLJIT_HAS_SIMD:
1019 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
1020 		return 0;
1021 #else
1022 #ifdef SLJIT_IS_FPU_AVAILABLE
1023 		return (SLJIT_IS_FPU_AVAILABLE) != 0;
1024 #else
1025 		/* Available by default. */
1026 		return 1;
1027 #endif /* SLJIT_IS_FPU_AVAILABLE */
1028 #endif /* SLJIT_CONFIG_ARM_V6 */
1029 
1030 	case SLJIT_SIMD_REGS_ARE_PAIRS:
1031 	case SLJIT_HAS_CLZ:
1032 	case SLJIT_HAS_ROT:
1033 	case SLJIT_HAS_CMOV:
1034 	case SLJIT_HAS_REV:
1035 	case SLJIT_HAS_PREFETCH:
1036 	case SLJIT_HAS_COPY_F32:
1037 	case SLJIT_HAS_COPY_F64:
1038 	case SLJIT_HAS_ATOMIC:
1039 		return 1;
1040 
1041 	case SLJIT_HAS_CTZ:
1042 #if defined(SLJIT_CONFIG_ARM_V6) && SLJIT_CONFIG_ARM_V6
1043 		return 2;
1044 #else
1045 		return 1;
1046 #endif /* SLJIT_CONFIG_ARM_V6 */
1047 
1048 	default:
1049 		return 0;
1050 	}
1051 }
1052 
1053 /* --------------------------------------------------------------------- */
1054 /*  Entry, exit                                                          */
1055 /* --------------------------------------------------------------------- */
1056 
1057 /* Creates an index in data_transfer_insts array. */
1058 #define WORD_SIZE	0x00
1059 #define BYTE_SIZE	0x01
1060 #define HALF_SIZE	0x02
1061 #define PRELOAD		0x03
1062 #define SIGNED		0x04
1063 #define LOAD_DATA	0x08
1064 
1065 /* Flag bits for emit_op. */
1066 #define ALLOW_IMM		0x10
1067 #define ALLOW_INV_IMM		0x20
1068 #define ALLOW_ANY_IMM		(ALLOW_IMM | ALLOW_INV_IMM)
1069 #define ALLOW_NEG_IMM		0x40
1070 #define ALLOW_DOUBLE_IMM	0x80
1071 
1072 /* s/l - store/load (1 bit)
1073    u/s - signed/unsigned (1 bit)
1074    w/b/h/N - word/byte/half/NOT allowed (2 bit)
1075    Storing signed and unsigned values are the same operations. */
1076 
1077 static const sljit_ins data_transfer_insts[16] = {
1078 /* s u w */ 0xe5000000 /* str */,
1079 /* s u b */ 0xe5400000 /* strb */,
1080 /* s u h */ 0xe10000b0 /* strh */,
1081 /* s u N */ 0x00000000 /* not allowed */,
1082 /* s s w */ 0xe5000000 /* str */,
1083 /* s s b */ 0xe5400000 /* strb */,
1084 /* s s h */ 0xe10000b0 /* strh */,
1085 /* s s N */ 0x00000000 /* not allowed */,
1086 
1087 /* l u w */ 0xe5100000 /* ldr */,
1088 /* l u b */ 0xe5500000 /* ldrb */,
1089 /* l u h */ 0xe11000b0 /* ldrh */,
1090 /* l u p */ 0xf5500000 /* preload */,
1091 /* l s w */ 0xe5100000 /* ldr */,
1092 /* l s b */ 0xe11000d0 /* ldrsb */,
1093 /* l s h */ 0xe11000f0 /* ldrsh */,
1094 /* l s N */ 0x00000000 /* not allowed */,
1095 };
1096 
1097 #define EMIT_DATA_TRANSFER(type, add, target_reg, base_reg, arg) \
1098 	(data_transfer_insts[(type) & 0xf] | ((add) << 23) | RD(target_reg) | RN(base_reg) | (sljit_ins)(arg))
1099 
1100 /* Normal ldr/str instruction.
1101    Type2: ldrsb, ldrh, ldrsh */
1102 #define IS_TYPE1_TRANSFER(type) \
1103 	(data_transfer_insts[(type) & 0xf] & 0x04000000)
1104 #define TYPE2_TRANSFER_IMM(imm) \
1105 	(((imm) & 0xf) | (((imm) & 0xf0) << 4) | (1 << 22))
1106 
1107 #define EMIT_FPU_OPERATION(opcode, mode, dst, src1, src2) \
1108 	((sljit_ins)(opcode) | (sljit_ins)(mode) | VD(dst) | VM(src1) | VN(src2))
1109 
1110 /* Flags for emit_op: */
1111   /* Arguments are swapped. */
1112 #define ARGS_SWAPPED	0x01
1113   /* Inverted immediate. */
1114 #define INV_IMM		0x02
1115   /* Source and destination is register. */
1116 #define MOVE_REG_CONV	0x04
1117   /* Unused return value. */
1118 #define UNUSED_RETURN	0x08
1119 /* SET_FLAGS must be (1 << 20) as it is also the value of S bit (can be used for optimization). */
1120 #define SET_FLAGS	(1 << 20)
1121 /* dst: reg
1122    src1: reg
1123    src2: reg or imm (if allowed)
1124    SRC2_IMM must be (1 << 25) as it is also the value of I bit (can be used for optimization). */
1125 #define SRC2_IMM	(1 << 25)
1126 
1127 static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 inp_flags,
1128 	sljit_s32 dst, sljit_sw dstw,
1129 	sljit_s32 src1, sljit_sw src1w,
1130 	sljit_s32 src2, sljit_sw src2w);
1131 
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1132 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
1133 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1134 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1135 {
1136 	sljit_uw imm, offset;
1137 	sljit_s32 i, tmp, size, word_arg_count;
1138 	sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
1139 #ifdef __SOFTFP__
1140 	sljit_u32 float_arg_count;
1141 #else
1142 	sljit_u32 old_offset, f32_offset;
1143 	sljit_u32 remap[3];
1144 	sljit_u32 *remap_ptr = remap;
1145 #endif
1146 
1147 	CHECK_ERROR();
1148 	CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1149 	set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1150 
1151 	imm = 0;
1152 
1153 	tmp = SLJIT_S0 - saveds;
1154 	for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--)
1155 		imm |= (sljit_uw)1 << reg_map[i];
1156 
1157 	for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--)
1158 		imm |= (sljit_uw)1 << reg_map[i];
1159 
1160 	SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
1161 
1162 	/* Push saved and temporary registers
1163 	   multiple registers: stmdb sp!, {..., lr}
1164 	   single register: str reg, [sp, #-4]! */
1165 	if (imm != 0)
1166 		FAIL_IF(push_inst(compiler, PUSH | (1 << 14) | imm));
1167 	else
1168 		FAIL_IF(push_inst(compiler, 0xe52d0004 | RD(TMP_REG2)));
1169 
1170 	/* Stack must be aligned to 8 bytes: */
1171 	size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1);
1172 
1173 	if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
1174 		if ((size & SSIZE_OF(sw)) != 0) {
1175 			FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | sizeof(sljit_sw)));
1176 			size += SSIZE_OF(sw);
1177 		}
1178 
1179 		if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) {
1180 			FAIL_IF(push_inst(compiler, VPUSH | VD(SLJIT_FS0) | ((sljit_ins)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1)));
1181 		} else {
1182 			if (fsaveds > 0)
1183 				FAIL_IF(push_inst(compiler, VPUSH | VD(SLJIT_FS0) | ((sljit_ins)fsaveds << 1)));
1184 			if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG)
1185 				FAIL_IF(push_inst(compiler, VPUSH | VD(fscratches) | ((sljit_ins)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1)));
1186 		}
1187 	}
1188 
1189 	local_size = ((size + local_size + 0x7) & ~0x7) - size;
1190 	compiler->local_size = local_size;
1191 
1192 	if (options & SLJIT_ENTER_REG_ARG)
1193 		arg_types = 0;
1194 
1195 	arg_types >>= SLJIT_ARG_SHIFT;
1196 	word_arg_count = 0;
1197 	saved_arg_count = 0;
1198 #ifdef __SOFTFP__
1199 	SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start);
1200 
1201 	offset = 0;
1202 	float_arg_count = 0;
1203 
1204 	while (arg_types) {
1205 		switch (arg_types & SLJIT_ARG_MASK) {
1206 		case SLJIT_ARG_TYPE_F64:
1207 			if (offset & 0x7)
1208 				offset += sizeof(sljit_sw);
1209 
1210 			if (offset < 4 * sizeof(sljit_sw))
1211 				FAIL_IF(push_inst(compiler, VMOV2 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count));
1212 			else
1213 				FAIL_IF(push_inst(compiler, VLDR_F32 | 0x800100 | RN(SLJIT_SP)
1214 						| (float_arg_count << 12) | ((offset + (sljit_ins)size - 4 * sizeof(sljit_sw)) >> 2)));
1215 			float_arg_count++;
1216 			offset += sizeof(sljit_f64) - sizeof(sljit_sw);
1217 			break;
1218 		case SLJIT_ARG_TYPE_F32:
1219 			if (offset < 4 * sizeof(sljit_sw))
1220 				FAIL_IF(push_inst(compiler, VMOV | (float_arg_count << 16) | (offset << 10)));
1221 			else
1222 				FAIL_IF(push_inst(compiler, VLDR_F32 | 0x800000 | RN(SLJIT_SP)
1223 						| (float_arg_count << 12) | ((offset + (sljit_ins)size - 4 * sizeof(sljit_sw)) >> 2)));
1224 			float_arg_count++;
1225 			break;
1226 		default:
1227 			word_arg_count++;
1228 
1229 			if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
1230 				tmp = SLJIT_S0 - saved_arg_count;
1231 				saved_arg_count++;
1232 			} else if (word_arg_count - 1 != (sljit_s32)(offset >> 2))
1233 				tmp = word_arg_count;
1234 			else
1235 				break;
1236 
1237 			if (offset < 4 * sizeof(sljit_sw))
1238 				FAIL_IF(push_inst(compiler, MOV | RD(tmp) | (offset >> 2)));
1239 			else
1240 				FAIL_IF(push_inst(compiler, LDR | 0x800000 | RN(SLJIT_SP) | RD(tmp) | (offset + (sljit_ins)size - 4 * sizeof(sljit_sw))));
1241 			break;
1242 		}
1243 
1244 		offset += sizeof(sljit_sw);
1245 		arg_types >>= SLJIT_ARG_SHIFT;
1246 	}
1247 
1248 	compiler->args_size = offset;
1249 #else
1250 	offset = SLJIT_FR0;
1251 	old_offset = SLJIT_FR0;
1252 	f32_offset = 0;
1253 
1254 	while (arg_types) {
1255 		switch (arg_types & SLJIT_ARG_MASK) {
1256 		case SLJIT_ARG_TYPE_F64:
1257 			if (offset != old_offset)
1258 				*remap_ptr++ = EMIT_FPU_OPERATION(VMOV_F32, SLJIT_32, offset, old_offset, 0);
1259 			old_offset++;
1260 			offset++;
1261 			break;
1262 		case SLJIT_ARG_TYPE_F32:
1263 			if (f32_offset != 0) {
1264 				*remap_ptr++ = EMIT_FPU_OPERATION(VMOV_F32, 0x20, offset, f32_offset, 0);
1265 				f32_offset = 0;
1266 			} else {
1267 				if (offset != old_offset)
1268 					*remap_ptr++ = EMIT_FPU_OPERATION(VMOV_F32, 0, offset, old_offset, 0);
1269 				f32_offset = old_offset;
1270 				old_offset++;
1271 			}
1272 			offset++;
1273 			break;
1274 		default:
1275 			if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
1276 				FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S0 - saved_arg_count) | RM(SLJIT_R0 + word_arg_count)));
1277 				saved_arg_count++;
1278 			}
1279 
1280 			word_arg_count++;
1281 			break;
1282 		}
1283 		arg_types >>= SLJIT_ARG_SHIFT;
1284 	}
1285 
1286 	SLJIT_ASSERT((sljit_uw)(remap_ptr - remap) <= sizeof(remap));
1287 
1288 	while (remap_ptr > remap)
1289 		FAIL_IF(push_inst(compiler, *(--remap_ptr)));
1290 #endif
1291 
1292 	if (local_size > 0)
1293 		FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM | ALLOW_DOUBLE_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size));
1294 
1295 	return SLJIT_SUCCESS;
1296 }
1297 
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1298 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
1299 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1300 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1301 {
1302 	sljit_s32 size;
1303 
1304 	CHECK_ERROR();
1305 	CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1306 	set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1307 
1308 	size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1);
1309 
1310 	/* Doubles are saved, so alignment is unaffected. */
1311 	if ((size & SSIZE_OF(sw)) != 0 && (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG))
1312 		size += SSIZE_OF(sw);
1313 
1314 	compiler->local_size = ((size + local_size + 0x7) & ~0x7) - size;
1315 	return SLJIT_SUCCESS;
1316 }
1317 
emit_add_sp(struct sljit_compiler * compiler,sljit_uw imm)1318 static sljit_s32 emit_add_sp(struct sljit_compiler *compiler, sljit_uw imm)
1319 {
1320 	sljit_uw imm2 = get_imm(imm);
1321 
1322 	if (imm2 == 0)
1323 		return emit_op(compiler, SLJIT_ADD, ALLOW_IMM | ALLOW_DOUBLE_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, (sljit_sw)imm);
1324 
1325 	return push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | imm2);
1326 }
1327 
emit_stack_frame_release(struct sljit_compiler * compiler,sljit_s32 frame_size)1328 static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 frame_size)
1329 {
1330 	sljit_s32 local_size, fscratches, fsaveds, i, tmp;
1331 	sljit_s32 restored_reg = 0;
1332 	sljit_s32 lr_dst = TMP_PC;
1333 	sljit_uw reg_list = 0;
1334 
1335 	SLJIT_ASSERT(reg_map[TMP_REG2] == 14 && frame_size <= 128);
1336 
1337 	local_size = compiler->local_size;
1338 	fscratches = compiler->fscratches;
1339 	fsaveds = compiler->fsaveds;
1340 
1341 	if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
1342 		if (local_size > 0)
1343 			FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size));
1344 
1345 		if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) {
1346 			FAIL_IF(push_inst(compiler, VPOP | VD(SLJIT_FS0) | ((sljit_ins)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1)));
1347 		} else {
1348 			if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG)
1349 				FAIL_IF(push_inst(compiler, VPOP | VD(fscratches) | ((sljit_ins)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1)));
1350 			if (fsaveds > 0)
1351 				FAIL_IF(push_inst(compiler, VPOP | VD(SLJIT_FS0) | ((sljit_ins)fsaveds << 1)));
1352 		}
1353 
1354 		local_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1) & 0x7;
1355 	}
1356 
1357 	if (frame_size < 0) {
1358 		lr_dst = TMP_REG2;
1359 		frame_size = 0;
1360 	} else if (frame_size > 0) {
1361 		SLJIT_ASSERT(frame_size == 1 || (frame_size & 0x7) == 0);
1362 		lr_dst = 0;
1363 		frame_size &= ~0x7;
1364 	}
1365 
1366 	if (lr_dst != 0)
1367 		reg_list |= (sljit_uw)1 << reg_map[lr_dst];
1368 
1369 	tmp = SLJIT_S0 - compiler->saveds;
1370 	i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
1371 	if (tmp < i) {
1372 		restored_reg = i;
1373 		do {
1374 			reg_list |= (sljit_uw)1 << reg_map[i];
1375 		} while (--i > tmp);
1376 	}
1377 
1378 	i = compiler->scratches;
1379 	if (i >= SLJIT_FIRST_SAVED_REG) {
1380 		restored_reg = i;
1381 		do {
1382 			reg_list |= (sljit_uw)1 << reg_map[i];
1383 		} while (--i >= SLJIT_FIRST_SAVED_REG);
1384 	}
1385 
1386 	if (lr_dst == TMP_REG2 && reg_list == 0) {
1387 		restored_reg = TMP_REG2;
1388 		lr_dst = 0;
1389 	}
1390 
1391 	if (lr_dst == 0 && (reg_list & (reg_list - 1)) == 0) {
1392 		/* The local_size does not include the saved registers. */
1393 		tmp = 0;
1394 		if (reg_list != 0) {
1395 			tmp = 2;
1396 			if (local_size <= 0xfff) {
1397 				if (local_size == 0) {
1398 					SLJIT_ASSERT(restored_reg != TMP_REG2);
1399 					if (frame_size == 0)
1400 						return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(restored_reg) | 0x800008);
1401 					if (frame_size > 2 * SSIZE_OF(sw))
1402 						return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(restored_reg) | (sljit_ins)(frame_size - (2 * SSIZE_OF(sw))));
1403 				}
1404 
1405 				FAIL_IF(push_inst(compiler, LDR | 0x800000 | RN(SLJIT_SP) | RD(restored_reg) | (sljit_ins)local_size));
1406 				tmp = 1;
1407 			} else if (frame_size == 0) {
1408 				frame_size = (restored_reg == TMP_REG2) ? SSIZE_OF(sw) : 2 * SSIZE_OF(sw);
1409 				tmp = 3;
1410 			}
1411 
1412 			/* Place for the saved register. */
1413 			if (restored_reg != TMP_REG2)
1414 				local_size += SSIZE_OF(sw);
1415 		}
1416 
1417 		/* Place for the lr register. */
1418 		local_size += SSIZE_OF(sw);
1419 
1420 		if (frame_size > local_size)
1421 			FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 25) | (sljit_ins)(frame_size - local_size)));
1422 		else if (frame_size < local_size)
1423 			FAIL_IF(emit_add_sp(compiler, (sljit_uw)(local_size - frame_size)));
1424 
1425 		if (tmp <= 1)
1426 			return SLJIT_SUCCESS;
1427 
1428 		if (tmp == 2) {
1429 			frame_size -= SSIZE_OF(sw);
1430 			if (restored_reg != TMP_REG2)
1431 				frame_size -= SSIZE_OF(sw);
1432 
1433 			return push_inst(compiler, LDR | 0x800000 | RN(SLJIT_SP) | RD(restored_reg) | (sljit_ins)frame_size);
1434 		}
1435 
1436 		tmp = (restored_reg == TMP_REG2) ? 0x800004 : 0x800008;
1437 		return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(restored_reg) | (sljit_ins)tmp);
1438 	}
1439 
1440 	if (local_size > 0)
1441 		FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size));
1442 
1443 	/* Pop saved and temporary registers
1444 	   multiple registers: ldmia sp!, {...}
1445 	   single register: ldr reg, [sp], #4 */
1446 	if ((reg_list & (reg_list - 1)) == 0) {
1447 		SLJIT_ASSERT(lr_dst != 0);
1448 		SLJIT_ASSERT(reg_list == (sljit_uw)1 << reg_map[lr_dst]);
1449 
1450 		return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(lr_dst) | 0x800004);
1451 	}
1452 
1453 	FAIL_IF(push_inst(compiler, POP | reg_list));
1454 
1455 	if (frame_size > 0)
1456 		return push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 25) | ((sljit_ins)frame_size - sizeof(sljit_sw)));
1457 
1458 	if (lr_dst != 0)
1459 		return SLJIT_SUCCESS;
1460 
1461 	return push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 25) | sizeof(sljit_sw));
1462 }
1463 
sljit_emit_return_void(struct sljit_compiler * compiler)1464 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
1465 {
1466 	CHECK_ERROR();
1467 	CHECK(check_sljit_emit_return_void(compiler));
1468 
1469 	return emit_stack_frame_release(compiler, 0);
1470 }
1471 
sljit_emit_return_to(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)1472 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
1473 	sljit_s32 src, sljit_sw srcw)
1474 {
1475 	CHECK_ERROR();
1476 	CHECK(check_sljit_emit_return_to(compiler, src, srcw));
1477 
1478 	if (src & SLJIT_MEM) {
1479 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
1480 		src = TMP_REG1;
1481 		srcw = 0;
1482 	} else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
1483 		FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src)));
1484 		src = TMP_REG1;
1485 		srcw = 0;
1486 	}
1487 
1488 	FAIL_IF(emit_stack_frame_release(compiler, 1));
1489 
1490 	SLJIT_SKIP_CHECKS(compiler);
1491 	return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1492 }
1493 
1494 /* --------------------------------------------------------------------- */
1495 /*  Operators                                                            */
1496 /* --------------------------------------------------------------------- */
1497 
emit_single_op(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 flags,sljit_uw dst,sljit_uw src1,sljit_uw src2)1498 static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
1499 	sljit_uw dst, sljit_uw src1, sljit_uw src2)
1500 {
1501 	sljit_s32 is_masked;
1502 	sljit_uw shift_type;
1503 
1504 	switch (op) {
1505 	case SLJIT_MOV:
1506 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
1507 		if (dst != src2) {
1508 			if (src2 & SRC2_IMM) {
1509 				return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2);
1510 			}
1511 			return push_inst(compiler, MOV | RD(dst) | RM(src2));
1512 		}
1513 		return SLJIT_SUCCESS;
1514 
1515 	case SLJIT_MOV_U8:
1516 	case SLJIT_MOV_S8:
1517 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
1518 		if (flags & MOVE_REG_CONV)
1519 			return push_inst(compiler, (op == SLJIT_MOV_U8 ? UXTB : SXTB) | RD(dst) | RM(src2));
1520 
1521 		if (dst != src2) {
1522 			SLJIT_ASSERT(src2 & SRC2_IMM);
1523 			return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2);
1524 		}
1525 		return SLJIT_SUCCESS;
1526 
1527 	case SLJIT_MOV_U16:
1528 	case SLJIT_MOV_S16:
1529 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
1530 		if (flags & MOVE_REG_CONV)
1531 			return push_inst(compiler, (op == SLJIT_MOV_U16 ? UXTH : SXTH) | RD(dst) | RM(src2));
1532 
1533 		if (dst != src2) {
1534 			SLJIT_ASSERT(src2 & SRC2_IMM);
1535 			return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2);
1536 		}
1537 		return SLJIT_SUCCESS;
1538 
1539 	case SLJIT_CLZ:
1540 		SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM));
1541 		FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(src2)));
1542 		return SLJIT_SUCCESS;
1543 
1544 	case SLJIT_CTZ:
1545 		SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM));
1546 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
1547 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
1548 		FAIL_IF(push_inst(compiler, RSB | SRC2_IMM | RD(TMP_REG1) | RN(src2) | 0));
1549 		FAIL_IF(push_inst(compiler, AND | RD(TMP_REG2) | RN(src2) | RM(TMP_REG1)));
1550 		FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(TMP_REG2)));
1551 		FAIL_IF(push_inst(compiler, CMP | SET_FLAGS | SRC2_IMM | RN(dst) | 32));
1552 		return push_inst(compiler, (EOR ^ 0xf0000000) | SRC2_IMM | RD(dst) | RN(dst) | 0x1f);
1553 #else /* !SLJIT_CONFIG_ARM_V6 */
1554 		FAIL_IF(push_inst(compiler, RBIT | RD(dst) | RM(src2)));
1555 		return push_inst(compiler, CLZ | RD(dst) | RM(dst));
1556 #endif /* SLJIT_CONFIG_ARM_V6 */
1557 
1558 	case SLJIT_REV:
1559 	case SLJIT_REV_U32:
1560 	case SLJIT_REV_S32:
1561 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
1562 		return push_inst(compiler, REV | RD(dst) | RM(src2));
1563 
1564 	case SLJIT_REV_U16:
1565 	case SLJIT_REV_S16:
1566 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED) && src2 != TMP_REG1 && dst != TMP_REG1);
1567 		FAIL_IF(push_inst(compiler, REV16 | RD(dst) | RM(src2)));
1568 		if (dst == TMP_REG2 || (src2 == TMP_REG2 && op == SLJIT_REV_U16))
1569 			return SLJIT_SUCCESS;
1570 		return push_inst(compiler, (op == SLJIT_REV_U16 ? UXTH : SXTH) | RD(dst) | RM(dst));
1571 	case SLJIT_ADD:
1572 		SLJIT_ASSERT(!(flags & INV_IMM));
1573 
1574 		if ((flags & (UNUSED_RETURN | ARGS_SWAPPED)) == UNUSED_RETURN)
1575 			return push_inst(compiler, CMN | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1576 		return push_inst(compiler, ADD | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1577 
1578 	case SLJIT_ADDC:
1579 		SLJIT_ASSERT(!(flags & INV_IMM));
1580 		return push_inst(compiler, ADC | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1581 
1582 	case SLJIT_SUB:
1583 		SLJIT_ASSERT(!(flags & INV_IMM));
1584 
1585 		if ((flags & (UNUSED_RETURN | ARGS_SWAPPED)) == UNUSED_RETURN)
1586 			return push_inst(compiler, CMP | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1587 
1588 		return push_inst(compiler, (!(flags & ARGS_SWAPPED) ? SUB : RSB) | (flags & SET_FLAGS)
1589 			| RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1590 
1591 	case SLJIT_SUBC:
1592 		SLJIT_ASSERT(!(flags & INV_IMM));
1593 		return push_inst(compiler, (!(flags & ARGS_SWAPPED) ? SBC : RSC) | (flags & SET_FLAGS)
1594 			| RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1595 
1596 	case SLJIT_MUL:
1597 		SLJIT_ASSERT(!(flags & INV_IMM));
1598 		SLJIT_ASSERT(!(src2 & SRC2_IMM));
1599 		compiler->status_flags_state = 0;
1600 
1601 		if (!(flags & SET_FLAGS))
1602 			return push_inst(compiler, MUL | RN(dst) | RM8(src2) | RM(src1));
1603 
1604 		FAIL_IF(push_inst(compiler, SMULL | RN(TMP_REG1) | RD(dst) | RM8(src2) | RM(src1)));
1605 
1606 		/* cmp TMP_REG1, dst asr #31. */
1607 		return push_inst(compiler, CMP | SET_FLAGS | RN(TMP_REG1) | RM(dst) | 0xfc0);
1608 
1609 	case SLJIT_AND:
1610 		if ((flags & (UNUSED_RETURN | INV_IMM)) == UNUSED_RETURN)
1611 			return push_inst(compiler, TST | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1612 		return push_inst(compiler, (!(flags & INV_IMM) ? AND : BIC) | (flags & SET_FLAGS)
1613 			| RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1614 
1615 	case SLJIT_OR:
1616 		SLJIT_ASSERT(!(flags & INV_IMM));
1617 		return push_inst(compiler, ORR | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1618 
1619 	case SLJIT_XOR:
1620 		if (flags & INV_IMM) {
1621 			SLJIT_ASSERT(src2 == SRC2_IMM);
1622 			return push_inst(compiler, MVN | (flags & SET_FLAGS) | RD(dst) | RM(src1));
1623 		}
1624 		return push_inst(compiler, EOR | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1625 
1626 	case SLJIT_SHL:
1627 	case SLJIT_MSHL:
1628 		shift_type = 0;
1629 		is_masked = op == SLJIT_MSHL;
1630 		break;
1631 
1632 	case SLJIT_LSHR:
1633 	case SLJIT_MLSHR:
1634 		shift_type = 1;
1635 		is_masked = op == SLJIT_MLSHR;
1636 		break;
1637 
1638 	case SLJIT_ASHR:
1639 	case SLJIT_MASHR:
1640 		shift_type = 2;
1641 		is_masked = op == SLJIT_MASHR;
1642 		break;
1643 
1644 	case SLJIT_ROTL:
1645 		if (compiler->shift_imm == 0x20) {
1646 			FAIL_IF(push_inst(compiler, RSB | SRC2_IMM | RD(TMP_REG2) | RN(src2) | 0));
1647 			src2 = TMP_REG2;
1648 		} else
1649 			compiler->shift_imm = (sljit_uw)(-(sljit_sw)compiler->shift_imm) & 0x1f;
1650 		/* fallthrough */
1651 
1652 	case SLJIT_ROTR:
1653 		shift_type = 3;
1654 		is_masked = 0;
1655 		break;
1656 
1657 	default:
1658 		SLJIT_UNREACHABLE();
1659 		return SLJIT_SUCCESS;
1660 	}
1661 
1662 	SLJIT_ASSERT(!(flags & ARGS_SWAPPED) && !(flags & INV_IMM) && !(src2 & SRC2_IMM));
1663 
1664 	if (compiler->shift_imm != 0x20) {
1665 		SLJIT_ASSERT(src1 == TMP_REG1);
1666 
1667 		if (compiler->shift_imm != 0)
1668 			return push_inst(compiler, MOV | (flags & SET_FLAGS) |
1669 				RD(dst) | (compiler->shift_imm << 7) | (shift_type << 5) | RM(src2));
1670 		return push_inst(compiler, MOV | (flags & SET_FLAGS) | RD(dst) | RM(src2));
1671 	}
1672 
1673 	SLJIT_ASSERT(src1 != TMP_REG2);
1674 
1675 	if (is_masked) {
1676 		FAIL_IF(push_inst(compiler, AND | RD(TMP_REG2) | RN(src2) | SRC2_IMM | 0x1f));
1677 		src2 = TMP_REG2;
1678 	}
1679 
1680 	return push_inst(compiler, MOV | (flags & SET_FLAGS) | RD(dst)
1681 		| RM8(src2) | (sljit_ins)(shift_type << 5) | 0x10 | RM(src1));
1682 }
1683 
1684 #undef EMIT_SHIFT_INS_AND_RETURN
1685 
1686 /* Tests whether the immediate can be stored in the 12 bit imm field.
1687    Returns with 0 if not possible. */
get_imm(sljit_uw imm)1688 static sljit_uw get_imm(sljit_uw imm)
1689 {
1690 	sljit_u32 rol;
1691 
1692 	if (imm <= 0xff)
1693 		return SRC2_IMM | imm;
1694 
1695 	if (!(imm & 0xff000000)) {
1696 		imm <<= 8;
1697 		rol = 8;
1698 	} else {
1699 		imm = (imm << 24) | (imm >> 8);
1700 		rol = 0;
1701 	}
1702 
1703 	if (!(imm & 0xff000000)) {
1704 		imm <<= 8;
1705 		rol += 4;
1706 	}
1707 
1708 	if (!(imm & 0xf0000000)) {
1709 		imm <<= 4;
1710 		rol += 2;
1711 	}
1712 
1713 	if (!(imm & 0xc0000000)) {
1714 		imm <<= 2;
1715 		rol += 1;
1716 	}
1717 
1718 	if (!(imm & 0x00ffffff))
1719 		return SRC2_IMM | (imm >> 24) | (rol << 8);
1720 	return 0;
1721 }
1722 
compute_imm(sljit_uw imm,sljit_uw * imm2)1723 static sljit_uw compute_imm(sljit_uw imm, sljit_uw* imm2)
1724 {
1725 	sljit_uw mask;
1726 	sljit_uw imm1;
1727 	sljit_uw rol;
1728 
1729 	/* Step1: Search a zero byte (8 continous zero bit). */
1730 	mask = 0xff000000;
1731 	rol = 8;
1732 	while (1) {
1733 		if (!(imm & mask)) {
1734 			/* Rol imm by rol. */
1735 			imm = (imm << rol) | (imm >> (32 - rol));
1736 			/* Calculate arm rol. */
1737 			rol = 4 + (rol >> 1);
1738 			break;
1739 		}
1740 
1741 		rol += 2;
1742 		mask >>= 2;
1743 		if (mask & 0x3) {
1744 			/* rol by 8. */
1745 			imm = (imm << 8) | (imm >> 24);
1746 			mask = 0xff00;
1747 			rol = 24;
1748 			while (1) {
1749 				if (!(imm & mask)) {
1750 					/* Rol imm by rol. */
1751 					imm = (imm << rol) | (imm >> (32 - rol));
1752 					/* Calculate arm rol. */
1753 					rol = (rol >> 1) - 8;
1754 					break;
1755 				}
1756 				rol += 2;
1757 				mask >>= 2;
1758 				if (mask & 0x3)
1759 					return 0;
1760 			}
1761 			break;
1762 		}
1763 	}
1764 
1765 	/* The low 8 bit must be zero. */
1766 	SLJIT_ASSERT(!(imm & 0xff));
1767 
1768 	if (!(imm & 0xff000000)) {
1769 		imm1 = SRC2_IMM | ((imm >> 16) & 0xff) | (((rol + 4) & 0xf) << 8);
1770 		*imm2 = SRC2_IMM | ((imm >> 8) & 0xff) | (((rol + 8) & 0xf) << 8);
1771 	} else if (imm & 0xc0000000) {
1772 		imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8);
1773 		imm <<= 8;
1774 		rol += 4;
1775 
1776 		if (!(imm & 0xff000000)) {
1777 			imm <<= 8;
1778 			rol += 4;
1779 		}
1780 
1781 		if (!(imm & 0xf0000000)) {
1782 			imm <<= 4;
1783 			rol += 2;
1784 		}
1785 
1786 		if (!(imm & 0xc0000000)) {
1787 			imm <<= 2;
1788 			rol += 1;
1789 		}
1790 
1791 		if (!(imm & 0x00ffffff))
1792 			*imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8);
1793 		else
1794 			return 0;
1795 	} else {
1796 		if (!(imm & 0xf0000000)) {
1797 			imm <<= 4;
1798 			rol += 2;
1799 		}
1800 
1801 		if (!(imm & 0xc0000000)) {
1802 			imm <<= 2;
1803 			rol += 1;
1804 		}
1805 
1806 		imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8);
1807 		imm <<= 8;
1808 		rol += 4;
1809 
1810 		if (!(imm & 0xf0000000)) {
1811 			imm <<= 4;
1812 			rol += 2;
1813 		}
1814 
1815 		if (!(imm & 0xc0000000)) {
1816 			imm <<= 2;
1817 			rol += 1;
1818 		}
1819 
1820 		if (!(imm & 0x00ffffff))
1821 			*imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8);
1822 		else
1823 			return 0;
1824 	}
1825 
1826 	return imm1;
1827 }
1828 
load_immediate(struct sljit_compiler * compiler,sljit_s32 reg,sljit_uw imm)1829 static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm)
1830 {
1831 	sljit_uw tmp;
1832 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
1833 	sljit_uw imm1, imm2;
1834 #else /* !SLJIT_CONFIG_ARM_V6 */
1835 	if (!(imm & ~(sljit_uw)0xffff))
1836 		return push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff));
1837 #endif /* SLJIT_CONFIG_ARM_V6 */
1838 
1839 	/* Create imm by 1 inst. */
1840 	tmp = get_imm(imm);
1841 	if (tmp)
1842 		return push_inst(compiler, MOV | RD(reg) | tmp);
1843 
1844 	tmp = get_imm(~imm);
1845 	if (tmp)
1846 		return push_inst(compiler, MVN | RD(reg) | tmp);
1847 
1848 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
1849 	/* Create imm by 2 inst. */
1850 	imm1 = compute_imm(imm, &imm2);
1851 	if (imm1 != 0) {
1852 		FAIL_IF(push_inst(compiler, MOV | RD(reg) | imm1));
1853 		return push_inst(compiler, ORR | RD(reg) | RN(reg) | imm2);
1854 	}
1855 
1856 	imm1 = compute_imm(~imm, &imm2);
1857 	if (imm1 != 0) {
1858 		FAIL_IF(push_inst(compiler, MVN | RD(reg) | imm1));
1859 		return push_inst(compiler, BIC | RD(reg) | RN(reg) | imm2);
1860 	}
1861 
1862 	/* Load integer. */
1863 	return push_inst_with_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, reg, TMP_PC, 0), imm);
1864 #else /* !SLJIT_CONFIG_ARM_V6 */
1865 	FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff)));
1866 	if (imm <= 0xffff)
1867 		return SLJIT_SUCCESS;
1868 	return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | ((imm >> 16) & 0xfff));
1869 #endif /* SLJIT_CONFIG_ARM_V6 */
1870 }
1871 
emit_op_mem(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw,sljit_s32 tmp_reg)1872 static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg,
1873 	sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg)
1874 {
1875 	sljit_uw imm, offset_reg, tmp;
1876 	sljit_sw mask = IS_TYPE1_TRANSFER(flags) ? 0xfff : 0xff;
1877 	sljit_sw sign = IS_TYPE1_TRANSFER(flags) ? 0x1000 : 0x100;
1878 
1879 	SLJIT_ASSERT(arg & SLJIT_MEM);
1880 	SLJIT_ASSERT((arg & REG_MASK) != tmp_reg || (arg == SLJIT_MEM1(tmp_reg) && argw >= -mask && argw <= mask));
1881 
1882 	if (SLJIT_UNLIKELY(!(arg & REG_MASK))) {
1883 		tmp = (sljit_uw)(argw & (sign | mask));
1884 		tmp = (sljit_uw)((argw + (tmp <= (sljit_uw)sign ? 0 : sign)) & ~mask);
1885 
1886 		FAIL_IF(load_immediate(compiler, tmp_reg, tmp));
1887 
1888 		argw -= (sljit_sw)tmp;
1889 		tmp = 1;
1890 
1891 		if (argw < 0) {
1892 			argw = -argw;
1893 			tmp = 0;
1894 		}
1895 
1896 		return push_inst(compiler, EMIT_DATA_TRANSFER(flags, tmp, reg, tmp_reg,
1897 			(mask == 0xff) ? TYPE2_TRANSFER_IMM(argw) : argw));
1898 	}
1899 
1900 	if (arg & OFFS_REG_MASK) {
1901 		offset_reg = OFFS_REG(arg);
1902 		arg &= REG_MASK;
1903 		argw &= 0x3;
1904 
1905 		if (argw != 0 && (mask == 0xff)) {
1906 			FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | RM(offset_reg) | ((sljit_ins)argw << 7)));
1907 			return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, tmp_reg, TYPE2_TRANSFER_IMM(0)));
1908 		}
1909 
1910 		/* Bit 25: RM is offset. */
1911 		return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg,
1912 			RM(offset_reg) | (mask == 0xff ? 0 : (1 << 25)) | ((sljit_ins)argw << 7)));
1913 	}
1914 
1915 	arg &= REG_MASK;
1916 
1917 	if (argw > mask) {
1918 		tmp = (sljit_uw)(argw & (sign | mask));
1919 		tmp = (sljit_uw)((argw + (tmp <= (sljit_uw)sign ? 0 : sign)) & ~mask);
1920 		imm = get_imm(tmp);
1921 
1922 		if (imm) {
1923 			FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | imm));
1924 			argw -= (sljit_sw)tmp;
1925 			arg = tmp_reg;
1926 
1927 			SLJIT_ASSERT(argw >= -mask && argw <= mask);
1928 		}
1929 	} else if (argw < -mask) {
1930 		tmp = (sljit_uw)(-argw & (sign | mask));
1931 		tmp = (sljit_uw)((-argw + (tmp <= (sljit_uw)sign ? 0 : sign)) & ~mask);
1932 		imm = get_imm(tmp);
1933 
1934 		if (imm) {
1935 			FAIL_IF(push_inst(compiler, SUB | RD(tmp_reg) | RN(arg) | imm));
1936 			argw += (sljit_sw)tmp;
1937 			arg = tmp_reg;
1938 
1939 			SLJIT_ASSERT(argw >= -mask && argw <= mask);
1940 		}
1941 	}
1942 
1943 	if (argw <= mask && argw >= -mask) {
1944 		if (argw >= 0) {
1945 			if (mask == 0xff)
1946 				argw = TYPE2_TRANSFER_IMM(argw);
1947 			return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg, argw));
1948 		}
1949 
1950 		argw = -argw;
1951 
1952 		if (mask == 0xff)
1953 			argw = TYPE2_TRANSFER_IMM(argw);
1954 
1955 		return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 0, reg, arg, argw));
1956 	}
1957 
1958 	FAIL_IF(load_immediate(compiler, tmp_reg, (sljit_uw)argw));
1959 	return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg,
1960 		RM(tmp_reg) | (mask == 0xff ? 0 : (1 << 25))));
1961 }
1962 
emit_op(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 inp_flags,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1963 static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 inp_flags,
1964 	sljit_s32 dst, sljit_sw dstw,
1965 	sljit_s32 src1, sljit_sw src1w,
1966 	sljit_s32 src2, sljit_sw src2w)
1967 {
1968 	/* src1 is reg or TMP_REG1
1969 	   src2 is reg, TMP_REG2, or imm
1970 	   result goes to TMP_REG2, so put result can use TMP_REG1. */
1971 
1972 	/* We prefers register and simple consts. */
1973 	sljit_s32 dst_reg;
1974 	sljit_s32 src1_reg = 0;
1975 	sljit_s32 src2_reg = 0;
1976 	sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
1977 	sljit_s32 neg_op = 0;
1978 	sljit_u32 imm2;
1979 
1980 	op = GET_OPCODE(op);
1981 
1982 	if (flags & SET_FLAGS)
1983 		inp_flags &= ~ALLOW_DOUBLE_IMM;
1984 
1985 	if (dst == TMP_REG2)
1986 		flags |= UNUSED_RETURN;
1987 
1988 	SLJIT_ASSERT(!(inp_flags & ALLOW_INV_IMM) || (inp_flags & ALLOW_IMM));
1989 
1990 	if (inp_flags & ALLOW_NEG_IMM) {
1991 		switch (op) {
1992 		case SLJIT_ADD:
1993 			compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
1994 			neg_op = SLJIT_SUB;
1995 			break;
1996 		case SLJIT_ADDC:
1997 			compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
1998 			neg_op = SLJIT_SUBC;
1999 			break;
2000 		case SLJIT_SUB:
2001 			compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
2002 			neg_op = SLJIT_ADD;
2003 			break;
2004 		case SLJIT_SUBC:
2005 			compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
2006 			neg_op = SLJIT_ADDC;
2007 			break;
2008 		}
2009 	}
2010 
2011 	do {
2012 		if (!(inp_flags & ALLOW_IMM))
2013 			break;
2014 
2015 		if (src2 == SLJIT_IMM) {
2016 			src2_reg = (sljit_s32)get_imm((sljit_uw)src2w);
2017 			if (src2_reg)
2018 				break;
2019 
2020 			if (inp_flags & ALLOW_INV_IMM) {
2021 				src2_reg = (sljit_s32)get_imm(~(sljit_uw)src2w);
2022 				if (src2_reg) {
2023 					flags |= INV_IMM;
2024 					break;
2025 				}
2026 			}
2027 
2028 			if (neg_op != 0) {
2029 				src2_reg = (sljit_s32)get_imm((neg_op == SLJIT_ADD || neg_op == SLJIT_SUB) ? (sljit_uw)-src2w : ~(sljit_uw)src2w);
2030 				if (src2_reg) {
2031 					op = neg_op | GET_ALL_FLAGS(op);
2032 					break;
2033 				}
2034 			}
2035 		}
2036 
2037 		if (src1 == SLJIT_IMM) {
2038 			src2_reg = (sljit_s32)get_imm((sljit_uw)src1w);
2039 			if (src2_reg) {
2040 				flags |= ARGS_SWAPPED;
2041 				src1 = src2;
2042 				src1w = src2w;
2043 				break;
2044 			}
2045 
2046 			if (inp_flags & ALLOW_INV_IMM) {
2047 				src2_reg = (sljit_s32)get_imm(~(sljit_uw)src1w);
2048 				if (src2_reg) {
2049 					flags |= ARGS_SWAPPED | INV_IMM;
2050 					src1 = src2;
2051 					src1w = src2w;
2052 					break;
2053 				}
2054 			}
2055 
2056 			if (neg_op >= SLJIT_SUB) {
2057 				/* Note: additive operation (commutative). */
2058 				SLJIT_ASSERT(op == SLJIT_ADD || op == SLJIT_ADDC);
2059 
2060 				src2_reg = (sljit_s32)get_imm((sljit_uw)-src1w);
2061 				if (src2_reg) {
2062 					src1 = src2;
2063 					src1w = src2w;
2064 					op = neg_op | GET_ALL_FLAGS(op);
2065 					break;
2066 				}
2067 			}
2068 		}
2069 	} while(0);
2070 
2071 	/* Source 1. */
2072 	if (FAST_IS_REG(src1))
2073 		src1_reg = src1;
2074 	else if (src1 & SLJIT_MEM) {
2075 		FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1));
2076 		src1_reg = TMP_REG1;
2077 	} else if (!(inp_flags & ALLOW_DOUBLE_IMM) || src2_reg != 0 || op == SLJIT_SUB || op == SLJIT_SUBC) {
2078 		FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w));
2079 		src1_reg = TMP_REG1;
2080 	}
2081 
2082 	/* Destination. */
2083 	dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG2;
2084 
2085 	if (op <= SLJIT_MOV_P) {
2086 		if (dst & SLJIT_MEM) {
2087 			if (inp_flags & BYTE_SIZE)
2088 				inp_flags &= ~SIGNED;
2089 
2090 			if (FAST_IS_REG(src2))
2091 				return emit_op_mem(compiler, inp_flags, src2, dst, dstw, TMP_REG2);
2092 		}
2093 
2094 		if (FAST_IS_REG(src2) && dst_reg != TMP_REG2)
2095 			flags |= MOVE_REG_CONV;
2096 	}
2097 
2098 	/* Source 2. */
2099 	if (src2_reg == 0) {
2100 		src2_reg = (op <= SLJIT_MOV_P) ? dst_reg : TMP_REG2;
2101 
2102 		if (FAST_IS_REG(src2))
2103 			src2_reg = src2;
2104 		else if (src2 & SLJIT_MEM)
2105 			FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, src2_reg, src2, src2w, TMP_REG2));
2106 		else if (!(inp_flags & ALLOW_DOUBLE_IMM))
2107 			FAIL_IF(load_immediate(compiler, src2_reg, (sljit_uw)src2w));
2108 		else {
2109 			SLJIT_ASSERT(!(flags & SET_FLAGS));
2110 
2111 			if (src1_reg == 0) {
2112 				FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w));
2113 				src1_reg = TMP_REG1;
2114 			}
2115 
2116 			src2_reg = (sljit_s32)compute_imm((sljit_uw)src2w, &imm2);
2117 
2118 			if (src2_reg == 0 && neg_op != 0) {
2119 				src2_reg = (sljit_s32)compute_imm((sljit_uw)-src2w, &imm2);
2120 				if (src2_reg != 0)
2121 					op = neg_op;
2122 			}
2123 
2124 			if (src2_reg == 0) {
2125 				FAIL_IF(load_immediate(compiler, TMP_REG2, (sljit_uw)src2w));
2126 				src2_reg = TMP_REG2;
2127 			} else {
2128 				FAIL_IF(emit_single_op(compiler, op, flags, (sljit_uw)dst_reg, (sljit_uw)src1_reg, (sljit_uw)src2_reg));
2129 				src1_reg = dst_reg;
2130 				src2_reg = (sljit_s32)imm2;
2131 
2132 				if (op == SLJIT_ADDC)
2133 					op = SLJIT_ADD;
2134 				else if (op == SLJIT_SUBC)
2135 					op = SLJIT_SUB;
2136 			}
2137 		}
2138 	}
2139 
2140 	if (src1_reg == 0) {
2141 		SLJIT_ASSERT((inp_flags & ALLOW_DOUBLE_IMM) && !(flags & SET_FLAGS));
2142 
2143 		src1_reg = (sljit_s32)compute_imm((sljit_uw)src1w, &imm2);
2144 
2145 		if (src1_reg == 0 && neg_op != 0) {
2146 			src1_reg = (sljit_s32)compute_imm((sljit_uw)-src1w, &imm2);
2147 			if (src1_reg != 0)
2148 				op = neg_op;
2149 		}
2150 
2151 		if (src1_reg == 0) {
2152 			FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w));
2153 			src1_reg = TMP_REG1;
2154 		} else {
2155 			FAIL_IF(emit_single_op(compiler, op, flags, (sljit_uw)dst_reg, (sljit_uw)src2_reg, (sljit_uw)src1_reg));
2156 			src1_reg = dst_reg;
2157 			src2_reg = (sljit_s32)imm2;
2158 
2159 			if (op == SLJIT_ADDC)
2160 				op = SLJIT_ADD;
2161 		}
2162 	}
2163 
2164 	FAIL_IF(emit_single_op(compiler, op, flags, (sljit_uw)dst_reg, (sljit_uw)src1_reg, (sljit_uw)src2_reg));
2165 
2166 	if (!(dst & SLJIT_MEM))
2167 		return SLJIT_SUCCESS;
2168 
2169 	return emit_op_mem(compiler, inp_flags, dst_reg, dst, dstw, TMP_REG1);
2170 }
2171 
2172 #ifdef __cplusplus
2173 extern "C" {
2174 #endif
2175 
2176 #if defined(__GNUC__)
2177 extern unsigned int __aeabi_uidivmod(unsigned int numerator, unsigned int denominator);
2178 extern int __aeabi_idivmod(int numerator, int denominator);
2179 #else
2180 #error "Software divmod functions are needed"
2181 #endif
2182 
2183 #ifdef __cplusplus
2184 }
2185 #endif
2186 
sljit_emit_op0(struct sljit_compiler * compiler,sljit_s32 op)2187 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
2188 {
2189 	sljit_uw saved_reg_list[3];
2190 	sljit_sw saved_reg_count;
2191 
2192 	CHECK_ERROR();
2193 	CHECK(check_sljit_emit_op0(compiler, op));
2194 
2195 	op = GET_OPCODE(op);
2196 	switch (op) {
2197 	case SLJIT_BREAKPOINT:
2198 		FAIL_IF(push_inst(compiler, BKPT));
2199 		break;
2200 	case SLJIT_NOP:
2201 		FAIL_IF(push_inst(compiler, NOP));
2202 		break;
2203 	case SLJIT_LMUL_UW:
2204 	case SLJIT_LMUL_SW:
2205 		return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULL : SMULL)
2206 			| RN(SLJIT_R1) | RD(SLJIT_R0) | RM8(SLJIT_R0) | RM(SLJIT_R1));
2207 	case SLJIT_DIVMOD_UW:
2208 	case SLJIT_DIVMOD_SW:
2209 	case SLJIT_DIV_UW:
2210 	case SLJIT_DIV_SW:
2211 		SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
2212 		SLJIT_ASSERT(reg_map[2] == 1 && reg_map[3] == 2 && reg_map[4] == 3);
2213 
2214 		saved_reg_count = 0;
2215 		if (compiler->scratches >= 4)
2216 			saved_reg_list[saved_reg_count++] = 3;
2217 		if (compiler->scratches >= 3)
2218 			saved_reg_list[saved_reg_count++] = 2;
2219 		if (op >= SLJIT_DIV_UW)
2220 			saved_reg_list[saved_reg_count++] = 1;
2221 
2222 		if (saved_reg_count > 0) {
2223 			FAIL_IF(push_inst(compiler, STR | 0x2d0000 | (saved_reg_count >= 3 ? 16 : 8)
2224 						| (saved_reg_list[0] << 12) /* str rX, [sp, #-8/-16]! */));
2225 			if (saved_reg_count >= 2) {
2226 				SLJIT_ASSERT(saved_reg_list[1] < 8);
2227 				FAIL_IF(push_inst(compiler, STR | 0x8d0004 | (saved_reg_list[1] << 12) /* str rX, [sp, #4] */));
2228 			}
2229 			if (saved_reg_count >= 3) {
2230 				SLJIT_ASSERT(saved_reg_list[2] < 8);
2231 				FAIL_IF(push_inst(compiler, STR | 0x8d0008 | (saved_reg_list[2] << 12) /* str rX, [sp, #8] */));
2232 			}
2233 		}
2234 
2235 #if defined(__GNUC__)
2236 		FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
2237 			((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_ADDR(__aeabi_uidivmod) : SLJIT_FUNC_ADDR(__aeabi_idivmod))));
2238 #else
2239 #error "Software divmod functions are needed"
2240 #endif
2241 
2242 		if (saved_reg_count > 0) {
2243 			if (saved_reg_count >= 3) {
2244 				SLJIT_ASSERT(saved_reg_list[2] < 8);
2245 				FAIL_IF(push_inst(compiler, LDR | 0x8d0008 | (saved_reg_list[2] << 12) /* ldr rX, [sp, #8] */));
2246 			}
2247 			if (saved_reg_count >= 2) {
2248 				SLJIT_ASSERT(saved_reg_list[1] < 8);
2249 				FAIL_IF(push_inst(compiler, LDR | 0x8d0004 | (saved_reg_list[1] << 12) /* ldr rX, [sp, #4] */));
2250 			}
2251 			return push_inst(compiler, (LDR ^ (1 << 24)) | 0x8d0000 | (sljit_ins)(saved_reg_count >= 3 ? 16 : 8)
2252 						| (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */);
2253 		}
2254 		return SLJIT_SUCCESS;
2255 	case SLJIT_ENDBR:
2256 	case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
2257 		return SLJIT_SUCCESS;
2258 	}
2259 
2260 	return SLJIT_SUCCESS;
2261 }
2262 
sljit_emit_op1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2263 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
2264 	sljit_s32 dst, sljit_sw dstw,
2265 	sljit_s32 src, sljit_sw srcw)
2266 {
2267 	CHECK_ERROR();
2268 	CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
2269 	ADJUST_LOCAL_OFFSET(dst, dstw);
2270 	ADJUST_LOCAL_OFFSET(src, srcw);
2271 
2272 	switch (GET_OPCODE(op)) {
2273 	case SLJIT_MOV:
2274 	case SLJIT_MOV_U32:
2275 	case SLJIT_MOV_S32:
2276 	case SLJIT_MOV32:
2277 	case SLJIT_MOV_P:
2278 		return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw);
2279 
2280 	case SLJIT_MOV_U8:
2281 		return emit_op(compiler, SLJIT_MOV_U8, ALLOW_ANY_IMM | BYTE_SIZE, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u8)srcw : srcw);
2282 
2283 	case SLJIT_MOV_S8:
2284 		return emit_op(compiler, SLJIT_MOV_S8, ALLOW_ANY_IMM | SIGNED | BYTE_SIZE, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s8)srcw : srcw);
2285 
2286 	case SLJIT_MOV_U16:
2287 		return emit_op(compiler, SLJIT_MOV_U16, ALLOW_ANY_IMM | HALF_SIZE, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u16)srcw : srcw);
2288 
2289 	case SLJIT_MOV_S16:
2290 		return emit_op(compiler, SLJIT_MOV_S16, ALLOW_ANY_IMM | SIGNED | HALF_SIZE, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s16)srcw : srcw);
2291 
2292 	case SLJIT_CLZ:
2293 	case SLJIT_CTZ:
2294 	case SLJIT_REV:
2295 	case SLJIT_REV_U32:
2296 	case SLJIT_REV_S32:
2297 		return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw);
2298 
2299 	case SLJIT_REV_U16:
2300 	case SLJIT_REV_S16:
2301 		return emit_op(compiler, op, HALF_SIZE, dst, dstw, TMP_REG1, 0, src, srcw);
2302 	}
2303 
2304 	return SLJIT_SUCCESS;
2305 }
2306 
sljit_emit_op2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2307 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2308 	sljit_s32 dst, sljit_sw dstw,
2309 	sljit_s32 src1, sljit_sw src1w,
2310 	sljit_s32 src2, sljit_sw src2w)
2311 {
2312 	sljit_s32 inp_flags;
2313 
2314 	CHECK_ERROR();
2315 	CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
2316 	ADJUST_LOCAL_OFFSET(dst, dstw);
2317 	ADJUST_LOCAL_OFFSET(src1, src1w);
2318 	ADJUST_LOCAL_OFFSET(src2, src2w);
2319 
2320 	switch (GET_OPCODE(op)) {
2321 	case SLJIT_ADD:
2322 	case SLJIT_ADDC:
2323 	case SLJIT_SUB:
2324 	case SLJIT_SUBC:
2325 		return emit_op(compiler, op, ALLOW_IMM | ALLOW_NEG_IMM | ALLOW_DOUBLE_IMM, dst, dstw, src1, src1w, src2, src2w);
2326 
2327 	case SLJIT_OR:
2328 		return emit_op(compiler, op, ALLOW_IMM | ALLOW_DOUBLE_IMM, dst, dstw, src1, src1w, src2, src2w);
2329 
2330 	case SLJIT_XOR:
2331 		inp_flags = ALLOW_IMM | ALLOW_DOUBLE_IMM;
2332 		if ((src1 == SLJIT_IMM && src1w == -1) || (src2 == SLJIT_IMM && src2w == -1)) {
2333 			inp_flags |= ALLOW_INV_IMM;
2334 		}
2335 		return emit_op(compiler, op, inp_flags, dst, dstw, src1, src1w, src2, src2w);
2336 
2337 	case SLJIT_MUL:
2338 		return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w);
2339 
2340 	case SLJIT_AND:
2341 		return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, src1, src1w, src2, src2w);
2342 
2343 	case SLJIT_SHL:
2344 	case SLJIT_MSHL:
2345 	case SLJIT_LSHR:
2346 	case SLJIT_MLSHR:
2347 	case SLJIT_ASHR:
2348 	case SLJIT_MASHR:
2349 	case SLJIT_ROTL:
2350 	case SLJIT_ROTR:
2351 		if (src2 == SLJIT_IMM) {
2352 			compiler->shift_imm = src2w & 0x1f;
2353 			return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src1, src1w);
2354 		} else {
2355 			compiler->shift_imm = 0x20;
2356 			return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w);
2357 		}
2358 	}
2359 
2360 	return SLJIT_SUCCESS;
2361 }
2362 
sljit_emit_op2u(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2363 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
2364 	sljit_s32 src1, sljit_sw src1w,
2365 	sljit_s32 src2, sljit_sw src2w)
2366 {
2367 	CHECK_ERROR();
2368 	CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
2369 
2370 	SLJIT_SKIP_CHECKS(compiler);
2371 	return sljit_emit_op2(compiler, op, TMP_REG2, 0, src1, src1w, src2, src2w);
2372 }
2373 
sljit_emit_shift_into(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 src1_reg,sljit_s32 src2_reg,sljit_s32 src3,sljit_sw src3w)2374 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
2375 	sljit_s32 dst_reg,
2376 	sljit_s32 src1_reg,
2377 	sljit_s32 src2_reg,
2378 	sljit_s32 src3, sljit_sw src3w)
2379 {
2380 	sljit_s32 is_left;
2381 
2382 	CHECK_ERROR();
2383 	CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
2384 
2385 	op = GET_OPCODE(op);
2386 	is_left = (op == SLJIT_SHL || op == SLJIT_MSHL);
2387 
2388 	if (src1_reg == src2_reg) {
2389 		SLJIT_SKIP_CHECKS(compiler);
2390 		return sljit_emit_op2(compiler, is_left ? SLJIT_ROTL : SLJIT_ROTR, dst_reg, 0, src1_reg, 0, src3, src3w);
2391 	}
2392 
2393 	ADJUST_LOCAL_OFFSET(src3, src3w);
2394 
2395 	/* Shift type of ROR is 3. */
2396 	if (src3 == SLJIT_IMM) {
2397 		src3w &= 0x1f;
2398 
2399 		if (src3w == 0)
2400 			return SLJIT_SUCCESS;
2401 
2402 		FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | RM(src1_reg) | ((sljit_ins)(is_left ? 0 : 1) << 5) | ((sljit_ins)src3w << 7)));
2403 		src3w = (src3w ^ 0x1f) + 1;
2404 		return push_inst(compiler, ORR | RD(dst_reg) | RN(dst_reg) | RM(src2_reg) | ((sljit_ins)(is_left ? 1 : 0) << 5) | ((sljit_ins)src3w << 7));
2405 	}
2406 
2407 	if (src3 & SLJIT_MEM) {
2408 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, src3, src3w, TMP_REG2));
2409 		src3 = TMP_REG2;
2410 	}
2411 
2412 	if (op == SLJIT_MSHL || op == SLJIT_MLSHR || dst_reg == src3) {
2413 		FAIL_IF(push_inst(compiler, AND | SRC2_IMM | RD(TMP_REG2) | RN(src3) | 0x1f));
2414 		src3 = TMP_REG2;
2415 	}
2416 
2417 	FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | RM8(src3) | ((sljit_ins)(is_left ? 0 : 1) << 5) | 0x10 | RM(src1_reg)));
2418 	FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src2_reg) | ((sljit_ins)(is_left ? 1 : 0) << 5) | (1 << 7)));
2419 	FAIL_IF(push_inst(compiler, EOR | SRC2_IMM | RD(TMP_REG2) | RN(src3) | 0x1f));
2420 	return push_inst(compiler, ORR | RD(dst_reg) | RN(dst_reg) | RM8(TMP_REG2) | ((sljit_ins)(is_left ? 1 : 0) << 5) | 0x10 | RM(TMP_REG1));
2421 }
2422 
sljit_emit_op_src(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)2423 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
2424 	sljit_s32 src, sljit_sw srcw)
2425 {
2426 	CHECK_ERROR();
2427 	CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
2428 	ADJUST_LOCAL_OFFSET(src, srcw);
2429 
2430 	switch (op) {
2431 	case SLJIT_FAST_RETURN:
2432 		SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
2433 
2434 		if (FAST_IS_REG(src))
2435 			FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG2) | RM(src)));
2436 		else
2437 			FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, src, srcw, TMP_REG1));
2438 
2439 		return push_inst(compiler, BX | RM(TMP_REG2));
2440 	case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
2441 		return SLJIT_SUCCESS;
2442 	case SLJIT_PREFETCH_L1:
2443 	case SLJIT_PREFETCH_L2:
2444 	case SLJIT_PREFETCH_L3:
2445 	case SLJIT_PREFETCH_ONCE:
2446 		SLJIT_ASSERT(src & SLJIT_MEM);
2447 		return emit_op_mem(compiler, PRELOAD | LOAD_DATA, TMP_PC, src, srcw, TMP_REG1);
2448 	}
2449 
2450 	return SLJIT_SUCCESS;
2451 }
2452 
sljit_emit_op_dst(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw)2453 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
2454 	sljit_s32 dst, sljit_sw dstw)
2455 {
2456 	sljit_s32 size, dst_r;
2457 
2458 	CHECK_ERROR();
2459 	CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
2460 	ADJUST_LOCAL_OFFSET(dst, dstw);
2461 
2462 	switch (op) {
2463 	case SLJIT_FAST_ENTER:
2464 		SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
2465 
2466 		if (FAST_IS_REG(dst))
2467 			return push_inst(compiler, MOV | RD(dst) | RM(TMP_REG2));
2468 		break;
2469 	case SLJIT_GET_RETURN_ADDRESS:
2470 		size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds - SLJIT_KEPT_SAVEDS_COUNT(compiler->options), 0);
2471 
2472 		if (compiler->fsaveds > 0 || compiler->fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
2473 			/* The size of pc is not added above. */
2474 			if ((size & SSIZE_OF(sw)) == 0)
2475 				size += SSIZE_OF(sw);
2476 
2477 			size += GET_SAVED_FLOAT_REGISTERS_SIZE(compiler->fscratches, compiler->fsaveds, f64);
2478 		}
2479 
2480 		SLJIT_ASSERT(((compiler->local_size + size + SSIZE_OF(sw)) & 0x7) == 0);
2481 
2482 		dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
2483 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size + size, TMP_REG1));
2484 		break;
2485 	}
2486 
2487 	if (dst & SLJIT_MEM)
2488 		return emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1);
2489 
2490 	return SLJIT_SUCCESS;
2491 }
2492 
sljit_get_register_index(sljit_s32 type,sljit_s32 reg)2493 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
2494 {
2495 	CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
2496 
2497 	if (type == SLJIT_GP_REGISTER)
2498 		return reg_map[reg];
2499 
2500 	if (type == SLJIT_FLOAT_REGISTER || type == SLJIT_SIMD_REG_64)
2501 		return freg_map[reg];
2502 
2503 	if (type != SLJIT_SIMD_REG_128)
2504 		return freg_map[reg] & ~0x1;
2505 
2506 	return -1;
2507 }
2508 
sljit_emit_op_custom(struct sljit_compiler * compiler,void * instruction,sljit_u32 size)2509 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2510 	void *instruction, sljit_u32 size)
2511 {
2512 	SLJIT_UNUSED_ARG(size);
2513 	CHECK_ERROR();
2514 	CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2515 
2516 	return push_inst(compiler, *(sljit_ins*)instruction);
2517 }
2518 
2519 /* --------------------------------------------------------------------- */
2520 /*  Floating point operators                                             */
2521 /* --------------------------------------------------------------------- */
2522 
2523 #define FPU_LOAD (1 << 20)
2524 #define EMIT_FPU_DATA_TRANSFER(inst, add, base, freg, offs) \
2525 	((inst) | (sljit_ins)((add) << 23) | RN(base) | VD(freg) | (sljit_ins)(offs))
2526 
emit_fop_mem(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw)2527 static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
2528 {
2529 	sljit_uw imm;
2530 	sljit_ins inst = VSTR_F32 | (flags & (SLJIT_32 | FPU_LOAD));
2531 
2532 	SLJIT_ASSERT(arg & SLJIT_MEM);
2533 	arg &= ~SLJIT_MEM;
2534 
2535 	if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
2536 		FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (((sljit_ins)argw & 0x3) << 7)));
2537 		arg = TMP_REG2;
2538 		argw = 0;
2539 	}
2540 
2541 	/* Fast loads and stores. */
2542 	if (arg) {
2543 		if (!(argw & ~0x3fc))
2544 			return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, arg & REG_MASK, reg, argw >> 2));
2545 		if (!(-argw & ~0x3fc))
2546 			return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, arg & REG_MASK, reg, (-argw) >> 2));
2547 
2548 		imm = get_imm((sljit_uw)argw & ~(sljit_uw)0x3fc);
2549 		if (imm) {
2550 			FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | imm));
2551 			return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG2, reg, (argw & 0x3fc) >> 2));
2552 		}
2553 		imm = get_imm((sljit_uw)-argw & ~(sljit_uw)0x3fc);
2554 		if (imm) {
2555 			argw = -argw;
2556 			FAIL_IF(push_inst(compiler, SUB | RD(TMP_REG2) | RN(arg & REG_MASK) | imm));
2557 			return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG2, reg, (argw & 0x3fc) >> 2));
2558 		}
2559 	}
2560 
2561 	if (arg) {
2562 		FAIL_IF(load_immediate(compiler, TMP_REG2, (sljit_uw)argw));
2563 		FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | RM(TMP_REG2)));
2564 	}
2565 	else
2566 		FAIL_IF(load_immediate(compiler, TMP_REG2, (sljit_uw)argw));
2567 
2568 	return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG2, reg, 0));
2569 }
2570 
sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2571 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
2572 	sljit_s32 dst, sljit_sw dstw,
2573 	sljit_s32 src, sljit_sw srcw)
2574 {
2575 	op ^= SLJIT_32;
2576 
2577 	if (src & SLJIT_MEM) {
2578 		FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src, srcw));
2579 		src = TMP_FREG1;
2580 	}
2581 
2582 	FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_S32_F32, op & SLJIT_32, TMP_FREG1, src, 0)));
2583 
2584 	if (FAST_IS_REG(dst))
2585 		return push_inst(compiler, VMOV | (1 << 20) | RD(dst) | VN(TMP_FREG1));
2586 
2587 	/* Store the integer value from a VFP register. */
2588 	return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw);
2589 }
2590 
sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2591 static sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_ins ins,
2592 	sljit_s32 dst, sljit_sw dstw,
2593 	sljit_s32 src, sljit_sw srcw)
2594 {
2595 	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2596 
2597 	if (FAST_IS_REG(src))
2598 		FAIL_IF(push_inst(compiler, VMOV | RD(src) | VN(TMP_FREG1)));
2599 	else if (src & SLJIT_MEM) {
2600 		/* Load the integer value into a VFP register. */
2601 		FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw));
2602 	}
2603 	else {
2604 		FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw));
2605 		FAIL_IF(push_inst(compiler, VMOV | RD(TMP_REG1) | VN(TMP_FREG1)));
2606 	}
2607 
2608 	FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(ins, ins & SLJIT_32, dst_r, TMP_FREG1, 0)));
2609 
2610 	if (dst & SLJIT_MEM)
2611 		return emit_fop_mem(compiler, (ins & SLJIT_32), TMP_FREG1, dst, dstw);
2612 	return SLJIT_SUCCESS;
2613 }
2614 
sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2615 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
2616 	sljit_s32 dst, sljit_sw dstw,
2617 	sljit_s32 src, sljit_sw srcw)
2618 {
2619 	return sljit_emit_fop1_conv_f64_from_w(compiler, VCVT_F32_S32 | (~op & SLJIT_32), dst, dstw, src, srcw);
2620 }
2621 
sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2622 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
2623 	sljit_s32 dst, sljit_sw dstw,
2624 	sljit_s32 src, sljit_sw srcw)
2625 {
2626 	return sljit_emit_fop1_conv_f64_from_w(compiler, VCVT_F32_U32 | (~op & SLJIT_32), dst, dstw, src, srcw);
2627 }
2628 
sljit_emit_fop1_cmp(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2629 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
2630 	sljit_s32 src1, sljit_sw src1w,
2631 	sljit_s32 src2, sljit_sw src2w)
2632 {
2633 	op ^= SLJIT_32;
2634 
2635 	if (src1 & SLJIT_MEM) {
2636 		FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w));
2637 		src1 = TMP_FREG1;
2638 	}
2639 
2640 	if (src2 & SLJIT_MEM) {
2641 		FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w));
2642 		src2 = TMP_FREG2;
2643 	}
2644 
2645 	FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCMP_F32, op & SLJIT_32, src1, src2, 0)));
2646 	FAIL_IF(push_inst(compiler, VMRS));
2647 
2648 	if (GET_FLAG_TYPE(op) != SLJIT_UNORDERED_OR_EQUAL)
2649 		return SLJIT_SUCCESS;
2650 
2651 	return push_inst(compiler, (CMP - CONDITIONAL) | (0x60000000 /* VS */) | SET_FLAGS | RN(TMP_REG1) | RM(TMP_REG1));
2652 }
2653 
sljit_emit_fop1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2654 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
2655 	sljit_s32 dst, sljit_sw dstw,
2656 	sljit_s32 src, sljit_sw srcw)
2657 {
2658 	sljit_s32 dst_r;
2659 
2660 	CHECK_ERROR();
2661 
2662 	SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100), float_transfer_bit_error);
2663 	SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2664 
2665 	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2666 
2667 	if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32)
2668 		op ^= SLJIT_32;
2669 
2670 	if (src & SLJIT_MEM) {
2671 		FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, dst_r, src, srcw));
2672 		src = dst_r;
2673 	}
2674 
2675 	switch (GET_OPCODE(op)) {
2676 	case SLJIT_MOV_F64:
2677 		if (src != dst_r) {
2678 			if (dst_r != TMP_FREG1)
2679 				FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, op & SLJIT_32, dst_r, src, 0)));
2680 			else
2681 				dst_r = src;
2682 		}
2683 		break;
2684 	case SLJIT_NEG_F64:
2685 		FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VNEG_F32, op & SLJIT_32, dst_r, src, 0)));
2686 		break;
2687 	case SLJIT_ABS_F64:
2688 		FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_32, dst_r, src, 0)));
2689 		break;
2690 	case SLJIT_CONV_F64_FROM_F32:
2691 		FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F64_F32, op & SLJIT_32, dst_r, src, 0)));
2692 		op ^= SLJIT_32;
2693 		break;
2694 	}
2695 
2696 	if (dst & SLJIT_MEM)
2697 		return emit_fop_mem(compiler, (op & SLJIT_32), dst_r, dst, dstw);
2698 	return SLJIT_SUCCESS;
2699 }
2700 
sljit_emit_fop2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2701 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
2702 	sljit_s32 dst, sljit_sw dstw,
2703 	sljit_s32 src1, sljit_sw src1w,
2704 	sljit_s32 src2, sljit_sw src2w)
2705 {
2706 	sljit_s32 dst_r;
2707 
2708 	CHECK_ERROR();
2709 	CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2710 	ADJUST_LOCAL_OFFSET(dst, dstw);
2711 	ADJUST_LOCAL_OFFSET(src1, src1w);
2712 	ADJUST_LOCAL_OFFSET(src2, src2w);
2713 
2714 	op ^= SLJIT_32;
2715 
2716 	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2717 
2718 	if (src2 & SLJIT_MEM) {
2719 		FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w));
2720 		src2 = TMP_FREG2;
2721 	}
2722 
2723 	if (src1 & SLJIT_MEM) {
2724 		FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w));
2725 		src1 = TMP_FREG1;
2726 	}
2727 
2728 	switch (GET_OPCODE(op)) {
2729 	case SLJIT_ADD_F64:
2730 		FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VADD_F32, op & SLJIT_32, dst_r, src2, src1)));
2731 		break;
2732 	case SLJIT_SUB_F64:
2733 		FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VSUB_F32, op & SLJIT_32, dst_r, src2, src1)));
2734 		break;
2735 	case SLJIT_MUL_F64:
2736 		FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMUL_F32, op & SLJIT_32, dst_r, src2, src1)));
2737 		break;
2738 	case SLJIT_DIV_F64:
2739 		FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VDIV_F32, op & SLJIT_32, dst_r, src2, src1)));
2740 		break;
2741 	case SLJIT_COPYSIGN_F64:
2742 		FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(src2) | RD(TMP_REG1) | ((op & SLJIT_32) ? (1 << 7) : 0)));
2743 		FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_32, dst_r, src1, 0)));
2744 		FAIL_IF(push_inst(compiler, CMP | SET_FLAGS | RN(TMP_REG1) | SRC2_IMM | 0));
2745 		return push_inst(compiler, EMIT_FPU_OPERATION((VNEG_F32 & ~COND_MASK) | 0xb0000000, op & SLJIT_32, dst_r, dst_r, 0));
2746 	}
2747 
2748 	if (dst_r == TMP_FREG1)
2749 		FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32), TMP_FREG1, dst, dstw));
2750 
2751 	return SLJIT_SUCCESS;
2752 }
2753 
2754 #undef EMIT_FPU_DATA_TRANSFER
2755 
sljit_emit_fset32(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f32 value)2756 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
2757 	sljit_s32 freg, sljit_f32 value)
2758 {
2759 #if defined(__ARM_NEON) && __ARM_NEON
2760 	sljit_u32 exp;
2761 	sljit_ins ins;
2762 #endif /* NEON */
2763 	union {
2764 		sljit_u32 imm;
2765 		sljit_f32 value;
2766 	} u;
2767 
2768 	CHECK_ERROR();
2769 	CHECK(check_sljit_emit_fset32(compiler, freg, value));
2770 
2771 	u.value = value;
2772 
2773 #if defined(__ARM_NEON) && __ARM_NEON
2774 	if ((u.imm << (32 - 19)) == 0) {
2775 		exp = (u.imm >> (23 + 2)) & 0x3f;
2776 
2777 		if (exp == 0x20 || exp == 0x1f) {
2778 			ins = ((u.imm >> 24) & 0x80) | ((u.imm >> 19) & 0x7f);
2779 			return push_inst(compiler, (VMOV_F32 ^ (1 << 6)) | ((ins & 0xf0) << 12) | VD(freg) | (ins & 0xf));
2780 		}
2781 	}
2782 #endif /* NEON */
2783 
2784 	FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm));
2785 	return push_inst(compiler, VMOV | VN(freg) | RD(TMP_REG1));
2786 }
2787 
sljit_emit_fset64(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f64 value)2788 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
2789 	sljit_s32 freg, sljit_f64 value)
2790 {
2791 #if defined(__ARM_NEON) && __ARM_NEON
2792 	sljit_u32 exp;
2793 	sljit_ins ins;
2794 #endif /* NEON */
2795 	union {
2796 		sljit_u32 imm[2];
2797 		sljit_f64 value;
2798 	} u;
2799 
2800 	CHECK_ERROR();
2801 	CHECK(check_sljit_emit_fset64(compiler, freg, value));
2802 
2803 	u.value = value;
2804 
2805 #if defined(__ARM_NEON) && __ARM_NEON
2806 	if (u.imm[0] == 0 && (u.imm[1] << (64 - 48)) == 0) {
2807 		exp = (u.imm[1] >> ((52 - 32) + 2)) & 0x1ff;
2808 
2809 		if (exp == 0x100 || exp == 0xff) {
2810 			ins = ((u.imm[1] >> (56 - 32)) & 0x80) | ((u.imm[1] >> (48 - 32)) & 0x7f);
2811 			return push_inst(compiler, (VMOV_F32 ^ (1 << 6)) | (1 << 8) | ((ins & 0xf0) << 12) | VD(freg) | (ins & 0xf));
2812 		}
2813 	}
2814 #endif /* NEON */
2815 
2816 	FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm[0]));
2817 	if (u.imm[0] == u.imm[1])
2818 		return push_inst(compiler, VMOV2 | RN(TMP_REG1) | RD(TMP_REG1) | VM(freg));
2819 
2820 	FAIL_IF(load_immediate(compiler, TMP_REG2, u.imm[1]));
2821 	return push_inst(compiler, VMOV2 | RN(TMP_REG2) | RD(TMP_REG1) | VM(freg));
2822 }
2823 
sljit_emit_fcopy(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 freg,sljit_s32 reg)2824 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
2825 	sljit_s32 freg, sljit_s32 reg)
2826 {
2827 	sljit_s32 reg2;
2828 	sljit_ins inst;
2829 
2830 	CHECK_ERROR();
2831 	CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
2832 
2833 	if (reg & REG_PAIR_MASK) {
2834 		reg2 = REG_PAIR_SECOND(reg);
2835 		reg = REG_PAIR_FIRST(reg);
2836 
2837 		inst = VMOV2 | RN(reg) | RD(reg2) | VM(freg);
2838 	} else {
2839 		inst = VMOV | VN(freg) | RD(reg);
2840 
2841 		if (!(op & SLJIT_32))
2842 			inst |= 1 << 7;
2843 	}
2844 
2845 	if (GET_OPCODE(op) == SLJIT_COPY_FROM_F64)
2846 		inst |= 1 << 20;
2847 
2848 	return push_inst(compiler, inst);
2849 }
2850 
2851 /* --------------------------------------------------------------------- */
2852 /*  Conditional instructions                                             */
2853 /* --------------------------------------------------------------------- */
2854 
get_cc(struct sljit_compiler * compiler,sljit_s32 type)2855 static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type)
2856 {
2857 	switch (type) {
2858 	case SLJIT_EQUAL:
2859 	case SLJIT_ATOMIC_STORED:
2860 	case SLJIT_F_EQUAL:
2861 	case SLJIT_ORDERED_EQUAL:
2862 	case SLJIT_UNORDERED_OR_EQUAL:
2863 		return 0x00000000;
2864 
2865 	case SLJIT_NOT_EQUAL:
2866 	case SLJIT_ATOMIC_NOT_STORED:
2867 	case SLJIT_F_NOT_EQUAL:
2868 	case SLJIT_UNORDERED_OR_NOT_EQUAL:
2869 	case SLJIT_ORDERED_NOT_EQUAL:
2870 		return 0x10000000;
2871 
2872 	case SLJIT_CARRY:
2873 		if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD)
2874 			return 0x20000000;
2875 		/* fallthrough */
2876 
2877 	case SLJIT_LESS:
2878 		return 0x30000000;
2879 
2880 	case SLJIT_NOT_CARRY:
2881 		if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD)
2882 			return 0x30000000;
2883 		/* fallthrough */
2884 
2885 	case SLJIT_GREATER_EQUAL:
2886 		return 0x20000000;
2887 
2888 	case SLJIT_GREATER:
2889 	case SLJIT_UNORDERED_OR_GREATER:
2890 		return 0x80000000;
2891 
2892 	case SLJIT_LESS_EQUAL:
2893 	case SLJIT_F_LESS_EQUAL:
2894 	case SLJIT_ORDERED_LESS_EQUAL:
2895 		return 0x90000000;
2896 
2897 	case SLJIT_SIG_LESS:
2898 	case SLJIT_UNORDERED_OR_LESS:
2899 		return 0xb0000000;
2900 
2901 	case SLJIT_SIG_GREATER_EQUAL:
2902 	case SLJIT_F_GREATER_EQUAL:
2903 	case SLJIT_ORDERED_GREATER_EQUAL:
2904 		return 0xa0000000;
2905 
2906 	case SLJIT_SIG_GREATER:
2907 	case SLJIT_F_GREATER:
2908 	case SLJIT_ORDERED_GREATER:
2909 		return 0xc0000000;
2910 
2911 	case SLJIT_SIG_LESS_EQUAL:
2912 	case SLJIT_UNORDERED_OR_LESS_EQUAL:
2913 		return 0xd0000000;
2914 
2915 	case SLJIT_OVERFLOW:
2916 		if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)))
2917 			return 0x10000000;
2918 		/* fallthrough */
2919 
2920 	case SLJIT_UNORDERED:
2921 		return 0x60000000;
2922 
2923 	case SLJIT_NOT_OVERFLOW:
2924 		if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)))
2925 			return 0x00000000;
2926 		/* fallthrough */
2927 
2928 	case SLJIT_ORDERED:
2929 		return 0x70000000;
2930 
2931 	case SLJIT_F_LESS:
2932 	case SLJIT_ORDERED_LESS:
2933 		return 0x40000000;
2934 
2935 	case SLJIT_UNORDERED_OR_GREATER_EQUAL:
2936 		return 0x50000000;
2937 
2938 	default:
2939 		SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL_REG_ARG);
2940 		return 0xe0000000;
2941 	}
2942 }
2943 
sljit_emit_label(struct sljit_compiler * compiler)2944 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2945 {
2946 	struct sljit_label *label;
2947 
2948 	CHECK_ERROR_PTR();
2949 	CHECK_PTR(check_sljit_emit_label(compiler));
2950 
2951 	if (compiler->last_label && compiler->last_label->size == compiler->size)
2952 		return compiler->last_label;
2953 
2954 	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2955 	PTR_FAIL_IF(!label);
2956 	set_label(label, compiler);
2957 	return label;
2958 }
2959 
sljit_emit_jump(struct sljit_compiler * compiler,sljit_s32 type)2960 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
2961 {
2962 	struct sljit_jump *jump;
2963 
2964 	CHECK_ERROR_PTR();
2965 	CHECK_PTR(check_sljit_emit_jump(compiler, type));
2966 
2967 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2968 	PTR_FAIL_IF(!jump);
2969 	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2970 	type &= 0xff;
2971 
2972 	SLJIT_ASSERT(reg_map[TMP_REG1] != 14);
2973 
2974 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
2975 	if (type >= SLJIT_FAST_CALL)
2976 		PTR_FAIL_IF(prepare_blx(compiler));
2977 	PTR_FAIL_IF(push_inst_with_unique_literal(compiler, ((EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1,
2978 		type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0)) & ~COND_MASK) | get_cc(compiler, type), 0));
2979 
2980 	if (jump->flags & SLJIT_REWRITABLE_JUMP) {
2981 		jump->addr = compiler->size;
2982 		compiler->patches++;
2983 	}
2984 
2985 	if (type >= SLJIT_FAST_CALL) {
2986 		jump->flags |= IS_BL;
2987 		PTR_FAIL_IF(emit_blx(compiler));
2988 	}
2989 
2990 	if (!(jump->flags & SLJIT_REWRITABLE_JUMP))
2991 		jump->addr = compiler->size;
2992 #else /* !SLJIT_CONFIG_ARM_V6 */
2993 	if (type >= SLJIT_FAST_CALL)
2994 		jump->flags |= IS_BL;
2995 	PTR_FAIL_IF(emit_imm(compiler, TMP_REG1, 0));
2996 	PTR_FAIL_IF(push_inst(compiler, (((type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)) & ~COND_MASK) | get_cc(compiler, type)));
2997 	jump->addr = compiler->size;
2998 #endif /* SLJIT_CONFIG_ARM_V6 */
2999 	return jump;
3000 }
3001 
3002 #ifdef __SOFTFP__
3003 
softfloat_call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types,sljit_s32 * src,sljit_u32 * extra_space)3004 static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src, sljit_u32 *extra_space)
3005 {
3006 	sljit_u32 is_tail_call = *extra_space & SLJIT_CALL_RETURN;
3007 	sljit_u32 offset = 0;
3008 	sljit_u32 word_arg_offset = 0;
3009 	sljit_u32 src_offset = 4 * sizeof(sljit_sw);
3010 	sljit_u32 float_arg_count = 0;
3011 	sljit_s32 types = 0;
3012 	sljit_u8 offsets[4];
3013 	sljit_u8 *offset_ptr = offsets;
3014 
3015 	if (src && FAST_IS_REG(*src))
3016 		src_offset = (sljit_u32)reg_map[*src] * sizeof(sljit_sw);
3017 
3018 	arg_types >>= SLJIT_ARG_SHIFT;
3019 
3020 	while (arg_types) {
3021 		types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK);
3022 
3023 		switch (arg_types & SLJIT_ARG_MASK) {
3024 		case SLJIT_ARG_TYPE_F64:
3025 			if (offset & 0x7)
3026 				offset += sizeof(sljit_sw);
3027 			*offset_ptr++ = (sljit_u8)offset;
3028 			offset += sizeof(sljit_f64);
3029 			float_arg_count++;
3030 			break;
3031 		case SLJIT_ARG_TYPE_F32:
3032 			*offset_ptr++ = (sljit_u8)offset;
3033 			offset += sizeof(sljit_f32);
3034 			float_arg_count++;
3035 			break;
3036 		default:
3037 			*offset_ptr++ = (sljit_u8)offset;
3038 			offset += sizeof(sljit_sw);
3039 			word_arg_offset += sizeof(sljit_sw);
3040 			break;
3041 		}
3042 
3043 		arg_types >>= SLJIT_ARG_SHIFT;
3044 	}
3045 
3046 	if (offset > 4 * sizeof(sljit_sw) && (!is_tail_call || offset > compiler->args_size)) {
3047 		/* Keep lr register on the stack. */
3048 		if (is_tail_call)
3049 			offset += sizeof(sljit_sw);
3050 
3051 		offset = ((offset - 4 * sizeof(sljit_sw)) + 0x7) & ~(sljit_u32)0x7;
3052 
3053 		*extra_space = offset;
3054 
3055 		if (is_tail_call)
3056 			FAIL_IF(emit_stack_frame_release(compiler, (sljit_s32)offset));
3057 		else
3058 			FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | offset));
3059 	} else {
3060 		if (is_tail_call)
3061 			FAIL_IF(emit_stack_frame_release(compiler, -1));
3062 		*extra_space = 0;
3063 	}
3064 
3065 	/* Process arguments in reversed direction. */
3066 	while (types) {
3067 		switch (types & SLJIT_ARG_MASK) {
3068 		case SLJIT_ARG_TYPE_F64:
3069 			float_arg_count--;
3070 			offset = *(--offset_ptr);
3071 
3072 			SLJIT_ASSERT((offset & 0x7) == 0);
3073 
3074 			if (offset < 4 * sizeof(sljit_sw)) {
3075 				if (src_offset == offset || src_offset == offset + sizeof(sljit_sw)) {
3076 					FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2)));
3077 					*src = TMP_REG1;
3078 				}
3079 				FAIL_IF(push_inst(compiler, VMOV2 | 0x100000 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count));
3080 			} else
3081 				FAIL_IF(push_inst(compiler, VSTR_F32 | 0x800100 | RN(SLJIT_SP)
3082 						| (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2)));
3083 			break;
3084 		case SLJIT_ARG_TYPE_F32:
3085 			float_arg_count--;
3086 			offset = *(--offset_ptr);
3087 
3088 			if (offset < 4 * sizeof(sljit_sw)) {
3089 				if (src_offset == offset) {
3090 					FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2)));
3091 					*src = TMP_REG1;
3092 				}
3093 				FAIL_IF(push_inst(compiler, VMOV | 0x100000 | (float_arg_count << 16) | (offset << 10)));
3094 			} else
3095 				FAIL_IF(push_inst(compiler, VSTR_F32 | 0x800000 | RN(SLJIT_SP)
3096 						| (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2)));
3097 			break;
3098 		default:
3099 			word_arg_offset -= sizeof(sljit_sw);
3100 			offset = *(--offset_ptr);
3101 
3102 			SLJIT_ASSERT(offset >= word_arg_offset);
3103 
3104 			if (offset != word_arg_offset) {
3105 				if (offset < 4 * sizeof(sljit_sw)) {
3106 					if (src_offset == offset) {
3107 						FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2)));
3108 						*src = TMP_REG1;
3109 					}
3110 					else if (src_offset == word_arg_offset) {
3111 						*src = (sljit_s32)(SLJIT_R0 + (offset >> 2));
3112 						src_offset = offset;
3113 					}
3114 					FAIL_IF(push_inst(compiler, MOV | (offset << 10) | (word_arg_offset >> 2)));
3115 				} else
3116 					FAIL_IF(push_inst(compiler, STR | 0x800000 | RN(SLJIT_SP) | (word_arg_offset << 10) | (offset - 4 * sizeof(sljit_sw))));
3117 			}
3118 			break;
3119 		}
3120 
3121 		types >>= SLJIT_ARG_SHIFT;
3122 	}
3123 
3124 	return SLJIT_SUCCESS;
3125 }
3126 
softfloat_post_call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types)3127 static sljit_s32 softfloat_post_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)
3128 {
3129 	if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F64)
3130 		FAIL_IF(push_inst(compiler, VMOV2 | (1 << 16) | (0 << 12) | 0));
3131 	if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32)
3132 		FAIL_IF(push_inst(compiler, VMOV | (0 << 16) | (0 << 12)));
3133 
3134 	return SLJIT_SUCCESS;
3135 }
3136 
3137 #else /* !__SOFTFP__ */
3138 
hardfloat_call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types)3139 static sljit_s32 hardfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)
3140 {
3141 	sljit_u32 offset = SLJIT_FR0;
3142 	sljit_u32 new_offset = SLJIT_FR0;
3143 	sljit_u32 f32_offset = 0;
3144 
3145 	/* Remove return value. */
3146 	arg_types >>= SLJIT_ARG_SHIFT;
3147 
3148 	while (arg_types) {
3149 		switch (arg_types & SLJIT_ARG_MASK) {
3150 		case SLJIT_ARG_TYPE_F64:
3151 			if (offset != new_offset)
3152 				FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32,
3153 					SLJIT_32, new_offset, offset, 0)));
3154 
3155 			new_offset++;
3156 			offset++;
3157 			break;
3158 		case SLJIT_ARG_TYPE_F32:
3159 			if (f32_offset != 0) {
3160 				FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32,
3161 					0x400000, f32_offset, offset, 0)));
3162 				f32_offset = 0;
3163 			} else {
3164 				if (offset != new_offset)
3165 					FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32,
3166 						0, new_offset, offset, 0)));
3167 				f32_offset = new_offset;
3168 				new_offset++;
3169 			}
3170 			offset++;
3171 			break;
3172 		}
3173 		arg_types >>= SLJIT_ARG_SHIFT;
3174 	}
3175 
3176 	return SLJIT_SUCCESS;
3177 }
3178 
3179 #endif /* __SOFTFP__ */
3180 
sljit_emit_call(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types)3181 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
3182 	sljit_s32 arg_types)
3183 {
3184 #ifdef __SOFTFP__
3185 	struct sljit_jump *jump;
3186 	sljit_u32 extra_space = (sljit_u32)type;
3187 #endif
3188 
3189 	CHECK_ERROR_PTR();
3190 	CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
3191 
3192 #ifdef __SOFTFP__
3193 	if ((type & 0xff) != SLJIT_CALL_REG_ARG) {
3194 		PTR_FAIL_IF(softfloat_call_with_args(compiler, arg_types, NULL, &extra_space));
3195 		SLJIT_ASSERT((extra_space & 0x7) == 0);
3196 
3197 		if ((type & SLJIT_CALL_RETURN) && extra_space == 0)
3198 			type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
3199 
3200 		SLJIT_SKIP_CHECKS(compiler);
3201 		jump = sljit_emit_jump(compiler, type);
3202 		PTR_FAIL_IF(jump == NULL);
3203 
3204 		if (extra_space > 0) {
3205 			if (type & SLJIT_CALL_RETURN)
3206 				PTR_FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1,
3207 					TMP_REG2, SLJIT_SP, extra_space - sizeof(sljit_sw))));
3208 
3209 			PTR_FAIL_IF(push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | extra_space));
3210 
3211 			if (type & SLJIT_CALL_RETURN) {
3212 				PTR_FAIL_IF(push_inst(compiler, BX | RM(TMP_REG2)));
3213 				return jump;
3214 			}
3215 		}
3216 
3217 		SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN));
3218 		PTR_FAIL_IF(softfloat_post_call_with_args(compiler, arg_types));
3219 		return jump;
3220 	}
3221 #endif /* __SOFTFP__ */
3222 
3223 	if (type & SLJIT_CALL_RETURN) {
3224 		PTR_FAIL_IF(emit_stack_frame_release(compiler, -1));
3225 		type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
3226 	}
3227 
3228 #ifndef __SOFTFP__
3229 	if ((type & 0xff) != SLJIT_CALL_REG_ARG)
3230 		PTR_FAIL_IF(hardfloat_call_with_args(compiler, arg_types));
3231 #endif /* !__SOFTFP__ */
3232 
3233 	SLJIT_SKIP_CHECKS(compiler);
3234 	return sljit_emit_jump(compiler, type);
3235 }
3236 
sljit_emit_ijump(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 src,sljit_sw srcw)3237 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
3238 {
3239 	struct sljit_jump *jump;
3240 
3241 	CHECK_ERROR();
3242 	CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
3243 	ADJUST_LOCAL_OFFSET(src, srcw);
3244 
3245 	SLJIT_ASSERT(reg_map[TMP_REG1] != 14);
3246 
3247 	if (src != SLJIT_IMM) {
3248 		if (FAST_IS_REG(src)) {
3249 			SLJIT_ASSERT(reg_map[src] != 14);
3250 			return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(src));
3251 		}
3252 
3253 		SLJIT_ASSERT(src & SLJIT_MEM);
3254 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
3255 		return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1));
3256 	}
3257 
3258 	/* These jumps are converted to jump/call instructions when possible. */
3259 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
3260 	FAIL_IF(!jump);
3261 	set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0));
3262 	jump->u.target = (sljit_uw)srcw;
3263 
3264 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
3265 	if (type >= SLJIT_FAST_CALL)
3266 		FAIL_IF(prepare_blx(compiler));
3267 	FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0), 0));
3268 	if (type >= SLJIT_FAST_CALL)
3269 		FAIL_IF(emit_blx(compiler));
3270 #else /* !SLJIT_CONFIG_ARM_V6 */
3271 	FAIL_IF(emit_imm(compiler, TMP_REG1, 0));
3272 	FAIL_IF(push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)));
3273 #endif /* SLJIT_CONFIG_ARM_V6 */
3274 	jump->addr = compiler->size;
3275 	return SLJIT_SUCCESS;
3276 }
3277 
sljit_emit_icall(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)3278 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
3279 	sljit_s32 arg_types,
3280 	sljit_s32 src, sljit_sw srcw)
3281 {
3282 #ifdef __SOFTFP__
3283 	sljit_u32 extra_space = (sljit_u32)type;
3284 #endif
3285 
3286 	CHECK_ERROR();
3287 	CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
3288 
3289 	if (src & SLJIT_MEM) {
3290 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
3291 		src = TMP_REG1;
3292 	}
3293 
3294 	if ((type & SLJIT_CALL_RETURN) && (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options)))) {
3295 		FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src)));
3296 		src = TMP_REG1;
3297 	}
3298 
3299 #ifdef __SOFTFP__
3300 	if ((type & 0xff) != SLJIT_CALL_REG_ARG) {
3301 		FAIL_IF(softfloat_call_with_args(compiler, arg_types, &src, &extra_space));
3302 		SLJIT_ASSERT((extra_space & 0x7) == 0);
3303 
3304 		if ((type & SLJIT_CALL_RETURN) && extra_space == 0)
3305 			type = SLJIT_JUMP;
3306 
3307 		SLJIT_SKIP_CHECKS(compiler);
3308 		FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
3309 
3310 		if (extra_space > 0) {
3311 			if (type & SLJIT_CALL_RETURN)
3312 				FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1,
3313 					TMP_REG2, SLJIT_SP, extra_space - sizeof(sljit_sw))));
3314 
3315 			FAIL_IF(push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | extra_space));
3316 
3317 			if (type & SLJIT_CALL_RETURN)
3318 				return push_inst(compiler, BX | RM(TMP_REG2));
3319 		}
3320 
3321 		SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN));
3322 		return softfloat_post_call_with_args(compiler, arg_types);
3323 	}
3324 #endif /* __SOFTFP__ */
3325 
3326 	if (type & SLJIT_CALL_RETURN) {
3327 		FAIL_IF(emit_stack_frame_release(compiler, -1));
3328 		type = SLJIT_JUMP;
3329 	}
3330 
3331 #ifndef __SOFTFP__
3332 	if ((type & 0xff) != SLJIT_CALL_REG_ARG)
3333 		FAIL_IF(hardfloat_call_with_args(compiler, arg_types));
3334 #endif /* !__SOFTFP__ */
3335 
3336 	SLJIT_SKIP_CHECKS(compiler);
3337 	return sljit_emit_ijump(compiler, type, src, srcw);
3338 }
3339 
3340 #ifdef __SOFTFP__
3341 
emit_fmov_before_return(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)3342 static SLJIT_INLINE sljit_s32 emit_fmov_before_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
3343 {
3344 	if (compiler->options & SLJIT_ENTER_REG_ARG) {
3345 		if (src == SLJIT_FR0)
3346 			return SLJIT_SUCCESS;
3347 
3348 		SLJIT_SKIP_CHECKS(compiler);
3349 		return sljit_emit_fop1(compiler, op, SLJIT_RETURN_FREG, 0, src, srcw);
3350 	}
3351 
3352 	if (FAST_IS_REG(src)) {
3353 		if (op & SLJIT_32)
3354 			return push_inst(compiler, VMOV | (1 << 20) | RD(SLJIT_R0) | VN(src));
3355 		return push_inst(compiler, VMOV2 | (1 << 20) | RD(SLJIT_R0) | RN(SLJIT_R1) | VM(src));
3356 	}
3357 
3358 	SLJIT_SKIP_CHECKS(compiler);
3359 
3360 	if (op & SLJIT_32)
3361 		return sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, src, srcw);
3362 	return sljit_emit_mem(compiler, SLJIT_MOV, SLJIT_REG_PAIR(SLJIT_R0, SLJIT_R1), src, srcw);
3363 }
3364 
3365 #endif /* __SOFTFP__ */
3366 
sljit_emit_op_flags(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 type)3367 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
3368 	sljit_s32 dst, sljit_sw dstw,
3369 	sljit_s32 type)
3370 {
3371 	sljit_s32 dst_reg, flags = GET_ALL_FLAGS(op);
3372 	sljit_ins cc, ins;
3373 
3374 	CHECK_ERROR();
3375 	CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
3376 	ADJUST_LOCAL_OFFSET(dst, dstw);
3377 
3378 	op = GET_OPCODE(op);
3379 	cc = get_cc(compiler, type);
3380 	dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
3381 
3382 	if (op < SLJIT_ADD) {
3383 		FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | SRC2_IMM | 0));
3384 		FAIL_IF(push_inst(compiler, ((MOV | RD(dst_reg) | SRC2_IMM | 1) & ~COND_MASK) | cc));
3385 		if (dst & SLJIT_MEM)
3386 			return emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2);
3387 		return SLJIT_SUCCESS;
3388 	}
3389 
3390 	ins = (op == SLJIT_AND ? AND : (op == SLJIT_OR ? ORR : EOR));
3391 
3392 	if (dst & SLJIT_MEM)
3393 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, dst, dstw, TMP_REG2));
3394 
3395 	FAIL_IF(push_inst(compiler, ((ins | RD(dst_reg) | RN(dst_reg) | SRC2_IMM | 1) & ~COND_MASK) | cc));
3396 
3397 	if (op == SLJIT_AND)
3398 		FAIL_IF(push_inst(compiler, ((ins | RD(dst_reg) | RN(dst_reg) | SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000)));
3399 
3400 	if (dst & SLJIT_MEM)
3401 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2));
3402 
3403 	if (flags & SLJIT_SET_Z)
3404 		return push_inst(compiler, MOV | SET_FLAGS | RD(TMP_REG2) | RM(dst_reg));
3405 	return SLJIT_SUCCESS;
3406 }
3407 
sljit_emit_select(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_reg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2_reg)3408 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
3409 	sljit_s32 dst_reg,
3410 	sljit_s32 src1, sljit_sw src1w,
3411 	sljit_s32 src2_reg)
3412 {
3413 	sljit_ins cc, tmp;
3414 
3415 	CHECK_ERROR();
3416 	CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
3417 
3418 	ADJUST_LOCAL_OFFSET(src1, src1w);
3419 
3420 	if (src2_reg != dst_reg && src1 == dst_reg) {
3421 		src1 = src2_reg;
3422 		src1w = 0;
3423 		src2_reg = dst_reg;
3424 		type ^= 0x1;
3425 	}
3426 
3427 	if (src1 & SLJIT_MEM) {
3428 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, (src2_reg != dst_reg) ? dst_reg : TMP_REG1, src1, src1w, TMP_REG2));
3429 
3430 		if (src2_reg != dst_reg) {
3431 			src1 = src2_reg;
3432 			src1w = 0;
3433 			type ^= 0x1;
3434 		} else {
3435 			src1 = TMP_REG1;
3436 			src1w = 0;
3437 		}
3438 	} else if (dst_reg != src2_reg)
3439 		FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | RM(src2_reg)));
3440 
3441 	cc = get_cc(compiler, type & ~SLJIT_32);
3442 
3443 	if (SLJIT_UNLIKELY(src1 == SLJIT_IMM)) {
3444 		tmp = get_imm((sljit_uw)src1w);
3445 		if (tmp)
3446 			return push_inst(compiler, ((MOV | RD(dst_reg) | tmp) & ~COND_MASK) | cc);
3447 
3448 		tmp = get_imm(~(sljit_uw)src1w);
3449 		if (tmp)
3450 			return push_inst(compiler, ((MVN | RD(dst_reg) | tmp) & ~COND_MASK) | cc);
3451 
3452 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
3453 		tmp = (sljit_ins)src1w;
3454 		FAIL_IF(push_inst(compiler, (MOVW & ~COND_MASK) | cc | RD(dst_reg) | ((tmp << 4) & 0xf0000) | (tmp & 0xfff)));
3455 		if (tmp <= 0xffff)
3456 			return SLJIT_SUCCESS;
3457 		return push_inst(compiler, (MOVT & ~COND_MASK) | cc | RD(dst_reg) | ((tmp >> 12) & 0xf0000) | ((tmp >> 16) & 0xfff));
3458 #else /* !SLJIT_CONFIG_ARM_V7 */
3459 		FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w));
3460 		src1 = TMP_REG1;
3461 #endif /* SLJIT_CONFIG_ARM_V7 */
3462 	}
3463 
3464 	return push_inst(compiler, ((MOV | RD(dst_reg) | RM(src1)) & ~COND_MASK) | cc);
3465 }
3466 
sljit_emit_fselect(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_freg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2_freg)3467 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
3468 	sljit_s32 dst_freg,
3469 	sljit_s32 src1, sljit_sw src1w,
3470 	sljit_s32 src2_freg)
3471 {
3472 	sljit_ins cc;
3473 
3474 	CHECK_ERROR();
3475 	CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
3476 
3477 	ADJUST_LOCAL_OFFSET(src1, src1w);
3478 
3479 	type ^= SLJIT_32;
3480 
3481 	if (dst_freg != src2_freg) {
3482 		if (dst_freg == src1) {
3483 			src1 = src2_freg;
3484 			src1w = 0;
3485 			type ^= 0x1;
3486 		} else
3487 			FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, (type & SLJIT_32), dst_freg, src2_freg, 0)));
3488 	}
3489 
3490 	if (src1 & SLJIT_MEM) {
3491 		FAIL_IF(emit_fop_mem(compiler, (type & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w));
3492 		src1 = TMP_FREG1;
3493 	}
3494 
3495 	cc = get_cc(compiler, type & ~SLJIT_32);
3496 	return push_inst(compiler, EMIT_FPU_OPERATION((VMOV_F32 & ~COND_MASK) | cc, (type & SLJIT_32), dst_freg, src1, 0));
3497 }
3498 
3499 #undef EMIT_FPU_OPERATION
3500 
update_mem_addr(struct sljit_compiler * compiler,sljit_s32 * mem,sljit_sw * memw,sljit_s32 max_offset)3501 static sljit_s32 update_mem_addr(struct sljit_compiler *compiler, sljit_s32 *mem, sljit_sw *memw, sljit_s32 max_offset)
3502 {
3503 	sljit_s32 arg = *mem;
3504 	sljit_sw argw = *memw;
3505 	sljit_uw imm, tmp;
3506 	sljit_sw mask = 0xfff;
3507 	sljit_sw sign = 0x1000;
3508 
3509 	SLJIT_ASSERT(max_offset >= 0xf00);
3510 
3511 	*mem = TMP_REG1;
3512 
3513 	if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
3514 		*memw = 0;
3515 		return push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | ((sljit_ins)(argw & 0x3) << 7));
3516 	}
3517 
3518 	arg &= REG_MASK;
3519 
3520 	if (arg) {
3521 		if (argw <= max_offset && argw >= -mask) {
3522 			*mem = arg;
3523 			return SLJIT_SUCCESS;
3524 		}
3525 
3526 		if (argw >= 0) {
3527 			tmp = (sljit_uw)(argw & (sign | mask));
3528 			tmp = (sljit_uw)((argw + ((tmp <= (sljit_uw)max_offset || tmp == (sljit_uw)sign) ? 0 : sign)) & ~mask);
3529 			imm = get_imm(tmp);
3530 
3531 			if (imm) {
3532 				*memw = argw - (sljit_sw)tmp;
3533 				SLJIT_ASSERT(*memw >= -mask && *memw <= max_offset);
3534 
3535 				return push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg) | imm);
3536 			}
3537 		} else {
3538 			tmp = (sljit_uw)(-argw & (sign | mask));
3539 			tmp = (sljit_uw)((-argw + ((tmp <= (sljit_uw)((sign << 1) - max_offset - 1)) ? 0 : sign)) & ~mask);
3540 			imm = get_imm(tmp);
3541 
3542 			if (imm) {
3543 				*memw = argw + (sljit_sw)tmp;
3544 				SLJIT_ASSERT(*memw >= -mask && *memw <= max_offset);
3545 
3546 				return push_inst(compiler, SUB | RD(TMP_REG1) | RN(arg) | imm);
3547 			}
3548 		}
3549 	}
3550 
3551 	tmp = (sljit_uw)(argw & (sign | mask));
3552 	tmp = (sljit_uw)((argw + ((tmp <= (sljit_uw)max_offset || tmp == (sljit_uw)sign) ? 0 : sign)) & ~mask);
3553 	*memw = argw - (sljit_sw)tmp;
3554 
3555 	FAIL_IF(load_immediate(compiler, TMP_REG1, tmp));
3556 
3557 	if (arg == 0)
3558 		return SLJIT_SUCCESS;
3559 
3560 	return push_inst(compiler, ADD | RD(TMP_REG1) | RN(TMP_REG1) | RM(arg));
3561 }
3562 
sljit_emit_mem(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)3563 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
3564 	sljit_s32 reg,
3565 	sljit_s32 mem, sljit_sw memw)
3566 {
3567 	sljit_s32 flags;
3568 
3569 	CHECK_ERROR();
3570 	CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
3571 
3572 	if (!(reg & REG_PAIR_MASK))
3573 		return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
3574 
3575 	ADJUST_LOCAL_OFFSET(mem, memw);
3576 
3577 	FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4));
3578 
3579 	flags = WORD_SIZE;
3580 
3581 	if (!(type & SLJIT_MEM_STORE)) {
3582 		if (REG_PAIR_FIRST(reg) == (mem & REG_MASK)) {
3583 			FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), memw + SSIZE_OF(sw), TMP_REG1));
3584 			return emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw, TMP_REG1);
3585 		}
3586 
3587 		flags = WORD_SIZE | LOAD_DATA;
3588 	}
3589 
3590 	FAIL_IF(emit_op_mem(compiler, flags, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw, TMP_REG1));
3591 	return emit_op_mem(compiler, flags, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), memw + SSIZE_OF(sw), TMP_REG1);
3592 }
3593 
sljit_emit_mem_update(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)3594 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *compiler, sljit_s32 type,
3595 	sljit_s32 reg,
3596 	sljit_s32 mem, sljit_sw memw)
3597 {
3598 	sljit_s32 flags;
3599 	sljit_ins is_type1_transfer, inst;
3600 
3601 	CHECK_ERROR();
3602 	CHECK(check_sljit_emit_mem_update(compiler, type, reg, mem, memw));
3603 
3604 	is_type1_transfer = 1;
3605 
3606 	switch (type & 0xff) {
3607 	case SLJIT_MOV:
3608 	case SLJIT_MOV_U32:
3609 	case SLJIT_MOV_S32:
3610 	case SLJIT_MOV32:
3611 	case SLJIT_MOV_P:
3612 		flags = WORD_SIZE;
3613 		break;
3614 	case SLJIT_MOV_U8:
3615 		flags = BYTE_SIZE;
3616 		break;
3617 	case SLJIT_MOV_S8:
3618 		if (!(type & SLJIT_MEM_STORE))
3619 			is_type1_transfer = 0;
3620 		flags = BYTE_SIZE | SIGNED;
3621 		break;
3622 	case SLJIT_MOV_U16:
3623 		is_type1_transfer = 0;
3624 		flags = HALF_SIZE;
3625 		break;
3626 	case SLJIT_MOV_S16:
3627 		is_type1_transfer = 0;
3628 		flags = HALF_SIZE | SIGNED;
3629 		break;
3630 	default:
3631 		SLJIT_UNREACHABLE();
3632 		flags = WORD_SIZE;
3633 		break;
3634 	}
3635 
3636 	if (!(type & SLJIT_MEM_STORE))
3637 		flags |= LOAD_DATA;
3638 
3639 	SLJIT_ASSERT(is_type1_transfer == !!IS_TYPE1_TRANSFER(flags));
3640 
3641 	if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
3642 		if (!is_type1_transfer && memw != 0)
3643 			return SLJIT_ERR_UNSUPPORTED;
3644 	} else {
3645 		if (is_type1_transfer) {
3646 			if (memw > 4095 || memw < -4095)
3647 				return SLJIT_ERR_UNSUPPORTED;
3648 		} else if (memw > 255 || memw < -255)
3649 			return SLJIT_ERR_UNSUPPORTED;
3650 	}
3651 
3652 	if (type & SLJIT_MEM_SUPP)
3653 		return SLJIT_SUCCESS;
3654 
3655 	if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
3656 		memw &= 0x3;
3657 
3658 		inst = EMIT_DATA_TRANSFER(flags, 1, reg, mem & REG_MASK, RM(OFFS_REG(mem)) | ((sljit_ins)memw << 7));
3659 
3660 		if (is_type1_transfer)
3661 			inst |= (1 << 25);
3662 
3663 		if (type & SLJIT_MEM_POST)
3664 			inst ^= (1 << 24);
3665 		else
3666 			inst |= (1 << 21);
3667 
3668 		return push_inst(compiler, inst);
3669 	}
3670 
3671 	inst = EMIT_DATA_TRANSFER(flags, 0, reg, mem & REG_MASK, 0);
3672 
3673 	if (type & SLJIT_MEM_POST)
3674 		inst ^= (1 << 24);
3675 	else
3676 		inst |= (1 << 21);
3677 
3678 	if (is_type1_transfer) {
3679 		if (memw >= 0)
3680 			inst |= (1 << 23);
3681 		else
3682 			memw = -memw;
3683 
3684 		return push_inst(compiler, inst | (sljit_ins)memw);
3685 	}
3686 
3687 	if (memw >= 0)
3688 		inst |= (1 << 23);
3689 	else
3690 		memw = -memw;
3691 
3692 	return push_inst(compiler, inst | TYPE2_TRANSFER_IMM((sljit_ins)memw));
3693 }
3694 
sljit_emit_fmem(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 mem,sljit_sw memw)3695 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type,
3696 	sljit_s32 freg,
3697 	sljit_s32 mem, sljit_sw memw)
3698 {
3699 	CHECK_ERROR();
3700 	CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw));
3701 
3702 	if (type & SLJIT_MEM_ALIGNED_32)
3703 		return emit_fop_mem(compiler, ((type ^ SLJIT_32) & SLJIT_32) | ((type & SLJIT_MEM_STORE) ? 0 : FPU_LOAD), freg, mem, memw);
3704 
3705 	if (type & SLJIT_MEM_STORE) {
3706 		FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(freg) | RD(TMP_REG2)));
3707 
3708 		if (type & SLJIT_32)
3709 			return emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1);
3710 
3711 		FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4));
3712 		mem |= SLJIT_MEM;
3713 
3714 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1));
3715 		FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(freg) | 0x80 | RD(TMP_REG2)));
3716 		return emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw + 4, TMP_REG1);
3717 	}
3718 
3719 	if (type & SLJIT_32) {
3720 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, mem, memw, TMP_REG1));
3721 		return push_inst(compiler, VMOV | VN(freg) | RD(TMP_REG2));
3722 	}
3723 
3724 	FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4));
3725 	mem |= SLJIT_MEM;
3726 
3727 	FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, mem, memw, TMP_REG1));
3728 	FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, mem, memw + 4, TMP_REG1));
3729 	return push_inst(compiler, VMOV2 | VM(freg) | RD(TMP_REG2) | RN(TMP_REG1));
3730 }
3731 
sljit_emit_simd_mem_offset(struct sljit_compiler * compiler,sljit_s32 * mem_ptr,sljit_sw memw)3732 static sljit_s32 sljit_emit_simd_mem_offset(struct sljit_compiler *compiler, sljit_s32 *mem_ptr, sljit_sw memw)
3733 {
3734 	sljit_s32 mem = *mem_ptr;
3735 	sljit_uw imm;
3736 
3737 	if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
3738 		*mem_ptr = TMP_REG1;
3739 		return push_inst(compiler, ADD | RD(TMP_REG1) | RN(mem & REG_MASK) | RM(OFFS_REG(mem)) | ((sljit_ins)(memw & 0x3) << 7));
3740 	}
3741 
3742 	if (SLJIT_UNLIKELY(!(mem & REG_MASK))) {
3743 		*mem_ptr = TMP_REG1;
3744 		return load_immediate(compiler, TMP_REG1, (sljit_uw)memw);
3745 	}
3746 
3747 	mem &= REG_MASK;
3748 
3749 	if (memw == 0) {
3750 		*mem_ptr = mem;
3751 		return SLJIT_SUCCESS;
3752 	}
3753 
3754 	*mem_ptr = TMP_REG1;
3755 	imm = get_imm((sljit_uw)(memw < 0 ? -memw : memw));
3756 
3757 	if (imm != 0)
3758 		return push_inst(compiler, ((memw < 0) ? SUB : ADD) | RD(TMP_REG1) | RN(mem) | imm);
3759 
3760 	FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3761 	return push_inst(compiler, ADD | RD(TMP_REG1) | RN(TMP_REG1) | RM(mem));
3762 }
3763 
simd_get_quad_reg_index(sljit_s32 freg)3764 static SLJIT_INLINE sljit_s32 simd_get_quad_reg_index(sljit_s32 freg)
3765 {
3766 	freg += freg & 0x1;
3767 
3768 	SLJIT_ASSERT((freg_map[freg] & 0x1) == (freg <= SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS));
3769 
3770 	if (freg <= SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS)
3771 		freg--;
3772 
3773 	return freg;
3774 }
3775 
3776 #define SLJIT_QUAD_OTHER_HALF(freg) ((((freg) & 0x1) << 1) - 1)
3777 
sljit_emit_simd_mov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 srcdst,sljit_sw srcdstw)3778 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
3779 	sljit_s32 freg,
3780 	sljit_s32 srcdst, sljit_sw srcdstw)
3781 {
3782 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3783 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3784 	sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type);
3785 	sljit_ins ins;
3786 
3787 	CHECK_ERROR();
3788 	CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw));
3789 
3790 	ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3791 
3792 	if (reg_size != 3 && reg_size != 4)
3793 		return SLJIT_ERR_UNSUPPORTED;
3794 
3795 	if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3796 		return SLJIT_ERR_UNSUPPORTED;
3797 
3798 	if (type & SLJIT_SIMD_TEST)
3799 		return SLJIT_SUCCESS;
3800 
3801 	if (reg_size == 4)
3802 		freg = simd_get_quad_reg_index(freg);
3803 
3804 	if (!(srcdst & SLJIT_MEM)) {
3805 		if (reg_size == 4)
3806 			srcdst = simd_get_quad_reg_index(srcdst);
3807 
3808 		if (type & SLJIT_SIMD_STORE)
3809 			ins = VD(srcdst) | VN(freg) | VM(freg);
3810 		else
3811 			ins = VD(freg) | VN(srcdst) | VM(srcdst);
3812 
3813 		if (reg_size == 4)
3814 			ins |= (sljit_ins)1 << 6;
3815 
3816 		return push_inst(compiler, VORR | ins);
3817 	}
3818 
3819 	FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
3820 
3821 	if (elem_size > 3)
3822 		elem_size = 3;
3823 
3824 	ins = ((type & SLJIT_SIMD_STORE) ? VST1 : VLD1) | VD(freg)
3825 		| (sljit_ins)((reg_size == 3) ? (0x7 << 8) : (0xa << 8));
3826 
3827 	SLJIT_ASSERT(reg_size >= alignment);
3828 
3829 	if (alignment == 3)
3830 		ins |= 0x10;
3831 	else if (alignment >= 3)
3832 		ins |= 0x20;
3833 
3834 	return push_inst(compiler, ins | RN(srcdst) | ((sljit_ins)elem_size) << 6 | 0xf);
3835 }
3836 
simd_get_imm(sljit_s32 elem_size,sljit_uw value)3837 static sljit_ins simd_get_imm(sljit_s32 elem_size, sljit_uw value)
3838 {
3839 	sljit_ins result;
3840 
3841 	if (elem_size > 1 && (sljit_u16)value == (value >> 16)) {
3842 		elem_size = 1;
3843 		value = (sljit_u16)value;
3844 	}
3845 
3846 	if (elem_size == 1 && (sljit_u8)value == (value >> 8)) {
3847 		elem_size = 0;
3848 		value = (sljit_u8)value;
3849 	}
3850 
3851 	switch (elem_size) {
3852 	case 0:
3853 		SLJIT_ASSERT(value <= 0xff);
3854 		result = 0xe00;
3855 		break;
3856 	case 1:
3857 		SLJIT_ASSERT(value <= 0xffff);
3858 		result = 0;
3859 
3860 		while (1) {
3861 			if (value <= 0xff) {
3862 				result |= 0x800;
3863 				break;
3864 			}
3865 
3866 			if ((value & 0xff) == 0) {
3867 				value >>= 8;
3868 				result |= 0xa00;
3869 				break;
3870 			}
3871 
3872 			if (result != 0)
3873 				return ~(sljit_ins)0;
3874 
3875 			value ^= (sljit_uw)0xffff;
3876 			result = (1 << 5);
3877 		}
3878 		break;
3879 	default:
3880 		SLJIT_ASSERT(value <= 0xffffffff);
3881 		result = 0;
3882 
3883 		while (1) {
3884 			if (value <= 0xff) {
3885 				result |= 0x000;
3886 				break;
3887 			}
3888 
3889 			if ((value & ~(sljit_uw)0xff00) == 0) {
3890 				value >>= 8;
3891 				result |= 0x200;
3892 				break;
3893 			}
3894 
3895 			if ((value & ~(sljit_uw)0xff0000) == 0) {
3896 				value >>= 16;
3897 				result |= 0x400;
3898 				break;
3899 			}
3900 
3901 			if ((value & ~(sljit_uw)0xff000000) == 0) {
3902 				value >>= 24;
3903 				result |= 0x600;
3904 				break;
3905 			}
3906 
3907 			if ((value & (sljit_uw)0xff) == 0xff && (value >> 16) == 0) {
3908 				value >>= 8;
3909 				result |= 0xc00;
3910 				break;
3911 			}
3912 
3913 			if ((value & (sljit_uw)0xffff) == 0xffff && (value >> 24) == 0) {
3914 				value >>= 16;
3915 				result |= 0xd00;
3916 				break;
3917 			}
3918 
3919 			if (result != 0)
3920 				return ~(sljit_ins)0;
3921 
3922 			value = ~value;
3923 			result = (1 << 5);
3924 		}
3925 		break;
3926 	}
3927 
3928 	return ((sljit_ins)value & 0xf) | (((sljit_ins)value & 0x70) << 12) | (((sljit_ins)value & 0x80) << 17) | result;
3929 }
3930 
sljit_emit_simd_replicate(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_sw srcw)3931 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
3932 	sljit_s32 freg,
3933 	sljit_s32 src, sljit_sw srcw)
3934 {
3935 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3936 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3937 	sljit_ins ins, imm;
3938 
3939 	CHECK_ERROR();
3940 	CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw));
3941 
3942 	ADJUST_LOCAL_OFFSET(src, srcw);
3943 
3944 	if (reg_size != 3 && reg_size != 4)
3945 		return SLJIT_ERR_UNSUPPORTED;
3946 
3947 	if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2))
3948 		return SLJIT_ERR_UNSUPPORTED;
3949 
3950 	if (type & SLJIT_SIMD_TEST)
3951 		return SLJIT_SUCCESS;
3952 
3953 	if (reg_size == 4)
3954 		freg = simd_get_quad_reg_index(freg);
3955 
3956 	if (src == SLJIT_IMM && srcw == 0)
3957 		return push_inst(compiler, VMOV_i | ((reg_size == 4) ? (1 << 6) : 0) | VD(freg));
3958 
3959 	if (SLJIT_UNLIKELY(elem_size == 3)) {
3960 		SLJIT_ASSERT(type & SLJIT_SIMD_FLOAT);
3961 
3962 		if (src & SLJIT_MEM) {
3963 			FAIL_IF(emit_fop_mem(compiler, FPU_LOAD | SLJIT_32, freg, src, srcw));
3964 			src = freg;
3965 		} else if (freg != src)
3966 			FAIL_IF(push_inst(compiler, VORR | VD(freg) | VN(src) | VM(src)));
3967 
3968 		freg += SLJIT_QUAD_OTHER_HALF(freg);
3969 
3970 		if (freg != src)
3971 			return push_inst(compiler, VORR | VD(freg) | VN(src) | VM(src));
3972 		return SLJIT_SUCCESS;
3973 	}
3974 
3975 	if (src & SLJIT_MEM) {
3976 		FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
3977 
3978 		ins = (sljit_ins)(elem_size << 6);
3979 
3980 		if (reg_size == 4)
3981 			ins |= (sljit_ins)1 << 5;
3982 
3983 		return push_inst(compiler, VLD1_r | ins | VD(freg) | RN(src) | 0xf);
3984 	}
3985 
3986 	if (type & SLJIT_SIMD_FLOAT) {
3987 		SLJIT_ASSERT(elem_size == 2);
3988 		ins = ((sljit_ins)freg_ebit_map[src] << (16 + 2 + 1)) | ((sljit_ins)1 << (16 + 2));
3989 
3990 		if (reg_size == 4)
3991 			ins |= (sljit_ins)1 << 6;
3992 
3993 		return push_inst(compiler, VDUP_s | ins | VD(freg) | (sljit_ins)freg_map[src]);
3994 	}
3995 
3996 	if (src == SLJIT_IMM) {
3997 		if (elem_size < 2)
3998 			srcw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1;
3999 
4000 		imm = simd_get_imm(elem_size, (sljit_uw)srcw);
4001 
4002 		if (imm != ~(sljit_ins)0) {
4003 			if (reg_size == 4)
4004 				imm |= (sljit_ins)1 << 6;
4005 
4006 			return push_inst(compiler, VMOV_i | imm | VD(freg));
4007 		}
4008 
4009 		FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw));
4010 		src = TMP_REG1;
4011 	}
4012 
4013 	switch (elem_size) {
4014 	case 0:
4015 		ins = 1 << 22;
4016 		break;
4017 	case 1:
4018 		ins = 1 << 5;
4019 		break;
4020 	default:
4021 		ins = 0;
4022 		break;
4023 	}
4024 
4025 	if (reg_size == 4)
4026 		ins |= (sljit_ins)1 << 21;
4027 
4028 	return push_inst(compiler, VDUP | ins | VN(freg) | RD(src));
4029 }
4030 
sljit_emit_simd_lane_mov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 lane_index,sljit_s32 srcdst,sljit_sw srcdstw)4031 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
4032 	sljit_s32 freg, sljit_s32 lane_index,
4033 	sljit_s32 srcdst, sljit_sw srcdstw)
4034 {
4035 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4036 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4037 	sljit_ins ins;
4038 
4039 	CHECK_ERROR();
4040 	CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw));
4041 
4042 	ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
4043 
4044 	if (reg_size != 3 && reg_size != 4)
4045 		return SLJIT_ERR_UNSUPPORTED;
4046 
4047 	if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2))
4048 		return SLJIT_ERR_UNSUPPORTED;
4049 
4050 	if (type & SLJIT_SIMD_TEST)
4051 		return SLJIT_SUCCESS;
4052 
4053 	if (reg_size == 4)
4054 		freg = simd_get_quad_reg_index(freg);
4055 
4056 	if (type & SLJIT_SIMD_LANE_ZERO) {
4057 		ins = (reg_size == 3) ? 0 : ((sljit_ins)1 << 6);
4058 
4059 		if (type & SLJIT_SIMD_FLOAT) {
4060 			if (elem_size == 3 && !(srcdst & SLJIT_MEM)) {
4061 				if (lane_index == 1)
4062 					freg += SLJIT_QUAD_OTHER_HALF(freg);
4063 
4064 				if (srcdst != freg)
4065 					FAIL_IF(push_inst(compiler, VORR | VD(freg) | VN(srcdst) | VM(srcdst)));
4066 
4067 				freg += SLJIT_QUAD_OTHER_HALF(freg);
4068 				return push_inst(compiler, VMOV_i | VD(freg));
4069 			}
4070 
4071 			if (srcdst == freg || (elem_size == 3 && srcdst == (freg + SLJIT_QUAD_OTHER_HALF(freg)))) {
4072 				FAIL_IF(push_inst(compiler, VORR | ins | VD(TMP_FREG2) | VN(freg) | VM(freg)));
4073 				srcdst = TMP_FREG2;
4074 				srcdstw = 0;
4075 			}
4076 		}
4077 
4078 		FAIL_IF(push_inst(compiler, VMOV_i | ins | VD(freg)));
4079 	}
4080 
4081 	if (reg_size == 4 && lane_index >= (0x8 >> elem_size)) {
4082 		lane_index -= (0x8 >> elem_size);
4083 		freg += SLJIT_QUAD_OTHER_HALF(freg);
4084 	}
4085 
4086 	if (srcdst & SLJIT_MEM) {
4087 		if (elem_size == 3)
4088 			return emit_fop_mem(compiler, ((type & SLJIT_SIMD_STORE) ? 0 : FPU_LOAD) | SLJIT_32, freg, srcdst, srcdstw);
4089 
4090 		FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
4091 
4092 		lane_index = lane_index << elem_size;
4093 		ins = (sljit_ins)((elem_size << 10) | (lane_index << 5));
4094 		return push_inst(compiler, ((type & SLJIT_SIMD_STORE) ? VST1_s : VLD1_s) | ins | VD(freg) | RN(srcdst) | 0xf);
4095 	}
4096 
4097 	if (type & SLJIT_SIMD_FLOAT) {
4098 		if (elem_size == 3) {
4099 			if (type & SLJIT_SIMD_STORE)
4100 				return push_inst(compiler, VORR | VD(srcdst) | VN(freg) | VM(freg));
4101 			return push_inst(compiler, VMOV_F32 | SLJIT_32 | VD(freg) | VM(srcdst));
4102 		}
4103 
4104 		if (type & SLJIT_SIMD_STORE) {
4105 			if (freg_ebit_map[freg] == 0) {
4106 				if (lane_index == 1)
4107 					freg = SLJIT_F64_SECOND(freg);
4108 
4109 				return push_inst(compiler, VMOV_F32 | VD(srcdst) | VM(freg));
4110 			}
4111 
4112 			FAIL_IF(push_inst(compiler, VMOV_s | (1 << 20) | ((sljit_ins)lane_index << 21) | VN(freg) | RD(TMP_REG1)));
4113 			return push_inst(compiler, VMOV | VN(srcdst) | RD(TMP_REG1));
4114 		}
4115 
4116 		FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(srcdst) | RD(TMP_REG1)));
4117 		return push_inst(compiler, VMOV_s | ((sljit_ins)lane_index << 21) | VN(freg) | RD(TMP_REG1));
4118 	}
4119 
4120 	if (srcdst == SLJIT_IMM) {
4121 		if (elem_size < 2)
4122 			srcdstw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1;
4123 
4124 		FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcdstw));
4125 		srcdst = TMP_REG1;
4126 	}
4127 
4128 	if (elem_size == 0)
4129 		ins = 0x400000;
4130 	else if (elem_size == 1)
4131 		ins = 0x20;
4132 	else
4133 		ins = 0;
4134 
4135 	lane_index = lane_index << elem_size;
4136 	ins |= (sljit_ins)(((lane_index & 0x4) << 19) | ((lane_index & 0x3) << 5));
4137 
4138 	if (type & SLJIT_SIMD_STORE) {
4139 		ins |= (1 << 20);
4140 
4141 		if (elem_size < 2 && !(type & SLJIT_SIMD_LANE_SIGNED))
4142 			ins |= (1 << 23);
4143 	}
4144 
4145 	return push_inst(compiler, VMOV_s | ins | VN(freg) | RD(srcdst));
4146 }
4147 
sljit_emit_simd_lane_replicate(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_s32 src_lane_index)4148 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
4149 	sljit_s32 freg,
4150 	sljit_s32 src, sljit_s32 src_lane_index)
4151 {
4152 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4153 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4154 	sljit_ins ins;
4155 
4156 	CHECK_ERROR();
4157 	CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index));
4158 
4159 	if (reg_size != 3 && reg_size != 4)
4160 		return SLJIT_ERR_UNSUPPORTED;
4161 
4162 	if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4163 		return SLJIT_ERR_UNSUPPORTED;
4164 
4165 	if (type & SLJIT_SIMD_TEST)
4166 		return SLJIT_SUCCESS;
4167 
4168 	if (reg_size == 4) {
4169 		freg = simd_get_quad_reg_index(freg);
4170 		src = simd_get_quad_reg_index(src);
4171 
4172 		if (src_lane_index >= (0x8 >> elem_size)) {
4173 			src_lane_index -= (0x8 >> elem_size);
4174 			src += SLJIT_QUAD_OTHER_HALF(src);
4175 		}
4176 	}
4177 
4178 	if (elem_size == 3) {
4179 		if (freg != src)
4180 			FAIL_IF(push_inst(compiler, VORR | VD(freg) | VN(src) | VM(src)));
4181 
4182 		freg += SLJIT_QUAD_OTHER_HALF(freg);
4183 
4184 		if (freg != src)
4185 			return push_inst(compiler, VORR | VD(freg) | VN(src) | VM(src));
4186 		return SLJIT_SUCCESS;
4187 	}
4188 
4189 	ins = ((((sljit_ins)src_lane_index << 1) | 1) << (16 + elem_size));
4190 
4191 	if (reg_size == 4)
4192 		ins |= (sljit_ins)1 << 6;
4193 
4194 	return push_inst(compiler, VDUP_s | ins | VD(freg) | VM(src));
4195 }
4196 
sljit_emit_simd_extend(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_sw srcw)4197 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
4198 	sljit_s32 freg,
4199 	sljit_s32 src, sljit_sw srcw)
4200 {
4201 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4202 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4203 	sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
4204 	sljit_s32 dst_reg;
4205 
4206 	CHECK_ERROR();
4207 	CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw));
4208 
4209 	ADJUST_LOCAL_OFFSET(src, srcw);
4210 
4211 	if (reg_size != 3 && reg_size != 4)
4212 		return SLJIT_ERR_UNSUPPORTED;
4213 
4214 	if ((type & SLJIT_SIMD_FLOAT) && (elem_size != 2 || elem2_size != 3))
4215 		return SLJIT_ERR_UNSUPPORTED;
4216 
4217 	if (type & SLJIT_SIMD_TEST)
4218 		return SLJIT_SUCCESS;
4219 
4220 	if (reg_size == 4)
4221 		freg = simd_get_quad_reg_index(freg);
4222 
4223 	if (src & SLJIT_MEM) {
4224 		FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
4225 		if (reg_size == 4 && elem2_size - elem_size == 1)
4226 			FAIL_IF(push_inst(compiler, VLD1 | (0x7 << 8) | VD(freg) | RN(src) | 0xf));
4227 		else
4228 			FAIL_IF(push_inst(compiler, VLD1_s | (sljit_ins)((reg_size - elem2_size + elem_size) << 10) | VD(freg) | RN(src) | 0xf));
4229 		src = freg;
4230 	} else if (reg_size == 4)
4231 		src = simd_get_quad_reg_index(src);
4232 
4233 	if (!(type & SLJIT_SIMD_FLOAT)) {
4234 		dst_reg = (reg_size == 4) ? freg : TMP_FREG2;
4235 
4236 		do {
4237 			FAIL_IF(push_inst(compiler, VSHLL | ((type & SLJIT_SIMD_EXTEND_SIGNED) ? 0 : (1 << 24))
4238 				| ((sljit_ins)1 << (19 + elem_size)) | VD(dst_reg) | VM(src)));
4239 			src = dst_reg;
4240 		} while (++elem_size < elem2_size);
4241 
4242 		if (dst_reg == TMP_FREG2)
4243 			return push_inst(compiler, VORR | VD(freg) | VN(TMP_FREG2) | VM(TMP_FREG2));
4244 		return SLJIT_SUCCESS;
4245 	}
4246 
4247 	/* No SIMD variant, must use VFP instead. */
4248 	SLJIT_ASSERT(reg_size == 4);
4249 
4250 	if (freg == src) {
4251 		freg += SLJIT_QUAD_OTHER_HALF(freg);
4252 		FAIL_IF(push_inst(compiler, VCVT_F64_F32 | VD(freg) | VM(src) | 0x20));
4253 		freg += SLJIT_QUAD_OTHER_HALF(freg);
4254 		return push_inst(compiler, VCVT_F64_F32 | VD(freg) | VM(src));
4255 	}
4256 
4257 	FAIL_IF(push_inst(compiler, VCVT_F64_F32 | VD(freg) | VM(src)));
4258 	freg += SLJIT_QUAD_OTHER_HALF(freg);
4259 	return push_inst(compiler, VCVT_F64_F32 | VD(freg) | VM(src) | 0x20);
4260 }
4261 
sljit_emit_simd_sign(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 dst,sljit_sw dstw)4262 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
4263 	sljit_s32 freg,
4264 	sljit_s32 dst, sljit_sw dstw)
4265 {
4266 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4267 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4268 	sljit_ins ins, imms;
4269 	sljit_s32 dst_r;
4270 
4271 	CHECK_ERROR();
4272 	CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw));
4273 
4274 	ADJUST_LOCAL_OFFSET(dst, dstw);
4275 
4276 	if (reg_size != 3 && reg_size != 4)
4277 		return SLJIT_ERR_UNSUPPORTED;
4278 
4279 	if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4280 		return SLJIT_ERR_UNSUPPORTED;
4281 
4282 	if (type & SLJIT_SIMD_TEST)
4283 		return SLJIT_SUCCESS;
4284 
4285 	switch (elem_size) {
4286 	case 0:
4287 		imms = 0x243219;
4288 		ins = VSHR | (1 << 24) | (0x9 << 16);
4289 		break;
4290 	case 1:
4291 		imms = (reg_size == 4) ? 0x243219 : 0x2231;
4292 		ins = VSHR | (1 << 24) | (0x11 << 16);
4293 		break;
4294 	case 2:
4295 		imms = (reg_size == 4) ? 0x2231 : 0x21;
4296 		ins = VSHR | (1 << 24) | (0x21 << 16);
4297 		break;
4298 	default:
4299 		imms = 0x21;
4300 		ins = VSHR | (1 << 24) | (0x1 << 16) | (1 << 7);
4301 		break;
4302 	}
4303 
4304 	if (reg_size == 4) {
4305 		freg = simd_get_quad_reg_index(freg);
4306 		ins |= (sljit_ins)1 << 6;
4307 	}
4308 
4309 	SLJIT_ASSERT((freg_map[TMP_FREG2] & 0x1) == 0);
4310 	FAIL_IF(push_inst(compiler, ins | VD(TMP_FREG2) | VM(freg)));
4311 
4312 	if (reg_size == 4 && elem_size > 0)
4313 		FAIL_IF(push_inst(compiler, VMOVN | ((sljit_ins)(elem_size - 1) << 18) | VD(TMP_FREG2) | VM(TMP_FREG2)));
4314 
4315 	ins = (reg_size == 4 && elem_size == 0) ? (1 << 6) : 0;
4316 
4317 	while (imms >= 0x100) {
4318 		FAIL_IF(push_inst(compiler, VSRA | (1 << 24) | ins | ((imms & 0xff) << 16) | VD(TMP_FREG2) | VM(TMP_FREG2)));
4319 		imms >>= 8;
4320 	}
4321 
4322 	FAIL_IF(push_inst(compiler, VSRA | (1 << 24) | ins | (1 << 7) | (imms << 16) | VD(TMP_FREG2) | VM(TMP_FREG2)));
4323 
4324 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
4325 	FAIL_IF(push_inst(compiler, VMOV_s | (1 << 20) | (1 << 23) | (0x2 << 21) | RD(dst_r) | VN(TMP_FREG2)));
4326 
4327 	if (reg_size == 4 && elem_size == 0) {
4328 		SLJIT_ASSERT(freg_map[TMP_FREG2] + 1 == freg_map[TMP_FREG1]);
4329 		FAIL_IF(push_inst(compiler, VMOV_s | (1 << 20) | (1 << 23) | (0x2 << 21) | RD(TMP_REG2) | VN(TMP_FREG1)));
4330 		FAIL_IF(push_inst(compiler, ORR | RD(dst_r) | RN(dst_r) | RM(TMP_REG2) | (0x8 << 7)));
4331 	}
4332 
4333 	if (dst_r == TMP_REG1)
4334 		return emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2);
4335 
4336 	return SLJIT_SUCCESS;
4337 }
4338 
sljit_emit_simd_op2(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_freg,sljit_s32 src1_freg,sljit_s32 src2_freg)4339 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
4340 	sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg)
4341 {
4342 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4343 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4344 	sljit_ins ins = 0;
4345 
4346 	CHECK_ERROR();
4347 	CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg));
4348 
4349 	if (reg_size != 3 && reg_size != 4)
4350 		return SLJIT_ERR_UNSUPPORTED;
4351 
4352 	if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4353 		return SLJIT_ERR_UNSUPPORTED;
4354 
4355 	switch (SLJIT_SIMD_GET_OPCODE(type)) {
4356 	case SLJIT_SIMD_OP2_AND:
4357 		ins = VAND;
4358 		break;
4359 	case SLJIT_SIMD_OP2_OR:
4360 		ins = VORR;
4361 		break;
4362 	case SLJIT_SIMD_OP2_XOR:
4363 		ins = VEOR;
4364 		break;
4365 	}
4366 
4367 	if (type & SLJIT_SIMD_TEST)
4368 		return SLJIT_SUCCESS;
4369 
4370 	if (reg_size == 4) {
4371 		dst_freg = simd_get_quad_reg_index(dst_freg);
4372 		src1_freg = simd_get_quad_reg_index(src1_freg);
4373 		src2_freg = simd_get_quad_reg_index(src2_freg);
4374 		ins |= (sljit_ins)1 << 6;
4375 	}
4376 
4377 	return push_inst(compiler, ins | VD(dst_freg) | VN(src1_freg) | VM(src2_freg));
4378 }
4379 
4380 #undef FPU_LOAD
4381 
sljit_emit_atomic_load(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 mem_reg)4382 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
4383 	sljit_s32 dst_reg,
4384 	sljit_s32 mem_reg)
4385 {
4386 	sljit_u32 ins;
4387 
4388 	CHECK_ERROR();
4389 	CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
4390 
4391 	switch (GET_OPCODE(op)) {
4392 	case SLJIT_MOV_U8:
4393 		ins = LDREXB;
4394 		break;
4395 	case SLJIT_MOV_U16:
4396 		ins = LDREXH;
4397 		break;
4398 	default:
4399 		ins = LDREX;
4400 		break;
4401 	}
4402 
4403 	return push_inst(compiler, ins | RN(mem_reg) | RD(dst_reg));
4404 }
4405 
sljit_emit_atomic_store(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src_reg,sljit_s32 mem_reg,sljit_s32 temp_reg)4406 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op,
4407 	sljit_s32 src_reg,
4408 	sljit_s32 mem_reg,
4409 	sljit_s32 temp_reg)
4410 {
4411 	sljit_u32 ins;
4412 
4413 	/* temp_reg == mem_reg is undefined so use another temp register */
4414 	SLJIT_UNUSED_ARG(temp_reg);
4415 
4416 	CHECK_ERROR();
4417 	CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
4418 
4419 	switch (GET_OPCODE(op)) {
4420 	case SLJIT_MOV_U8:
4421 		ins = STREXB;
4422 		break;
4423 	case SLJIT_MOV_U16:
4424 		ins = STREXH;
4425 		break;
4426 	default:
4427 		ins = STREX;
4428 		break;
4429 	}
4430 
4431 	FAIL_IF(push_inst(compiler, ins | RN(mem_reg) | RD(TMP_REG1) | RM(src_reg)));
4432 	if (op & SLJIT_SET_ATOMIC_STORED)
4433 		return push_inst(compiler, CMP | SET_FLAGS | SRC2_IMM | RN(TMP_REG1));
4434 
4435 	return SLJIT_SUCCESS;
4436 }
4437 
sljit_emit_const(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_sw init_value)4438 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
4439 {
4440 	struct sljit_const *const_;
4441 	sljit_s32 dst_r;
4442 
4443 	CHECK_ERROR_PTR();
4444 	CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
4445 	ADJUST_LOCAL_OFFSET(dst, dstw);
4446 
4447 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
4448 
4449 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
4450 	PTR_FAIL_IF(push_inst_with_unique_literal(compiler,
4451 		EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), (sljit_ins)init_value));
4452 	compiler->patches++;
4453 #else /* !SLJIT_CONFIG_ARM_V6 */
4454 	PTR_FAIL_IF(emit_imm(compiler, dst_r, init_value));
4455 #endif /* SLJIT_CONFIG_ARM_V6 */
4456 
4457 	const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
4458 	PTR_FAIL_IF(!const_);
4459 	set_const(const_, compiler);
4460 
4461 	if (dst & SLJIT_MEM)
4462 		PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1));
4463 	return const_;
4464 }
4465 
sljit_emit_put_label(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)4466 SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
4467 {
4468 	struct sljit_put_label *put_label;
4469 	sljit_s32 dst_r;
4470 
4471 	CHECK_ERROR_PTR();
4472 	CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
4473 	ADJUST_LOCAL_OFFSET(dst, dstw);
4474 
4475 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
4476 
4477 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
4478 	PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), 0));
4479 	compiler->patches++;
4480 #else /* !SLJIT_CONFIG_ARM_V6 */
4481 	PTR_FAIL_IF(emit_imm(compiler, dst_r, 0));
4482 #endif /* SLJIT_CONFIG_ARM_V6 */
4483 
4484 	put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
4485 	PTR_FAIL_IF(!put_label);
4486 	set_put_label(put_label, compiler, 0);
4487 
4488 	if (dst & SLJIT_MEM)
4489 		PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1));
4490 	return put_label;
4491 }
4492 
sljit_set_jump_addr(sljit_uw addr,sljit_uw new_target,sljit_sw executable_offset)4493 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
4494 {
4495 	inline_set_jump_addr(addr, executable_offset, new_target, 1);
4496 }
4497 
sljit_set_const(sljit_uw addr,sljit_sw new_constant,sljit_sw executable_offset)4498 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
4499 {
4500 	inline_set_const(addr, executable_offset, (sljit_uw)new_constant, 1);
4501 }
4502