1 /*
2  *    Stack-less Just-In-Time compiler
3  *
4  *    Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without modification, are
7  * permitted provided that the following conditions are met:
8  *
9  *   1. Redistributions of source code must retain the above copyright notice, this list of
10  *      conditions and the following disclaimer.
11  *
12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13  *      of conditions and the following disclaimer in the documentation and/or other materials
14  *      provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #ifdef __SOFTFP__
28 #define ARM_ABI_INFO " ABI:softfp"
29 #else
30 #define ARM_ABI_INFO " ABI:hardfp"
31 #endif
32 
sljit_get_platform_name(void)33 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
34 {
35 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
36 	return "ARMv7" SLJIT_CPUINFO ARM_ABI_INFO;
37 #elif (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
38 	return "ARMv6" SLJIT_CPUINFO ARM_ABI_INFO;
39 #else
40 #error "Internal error: Unknown ARM architecture"
41 #endif
42 }
43 
44 /* Length of an instruction word. */
45 typedef sljit_u32 sljit_ins;
46 
47 /* Last register + 1. */
48 #define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
49 #define TMP_REG2	(SLJIT_NUMBER_OF_REGISTERS + 3)
50 #define TMP_PC		(SLJIT_NUMBER_OF_REGISTERS + 4)
51 
52 #define TMP_FREG1	(SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
53 #define TMP_FREG2	(SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
54 
55 /* In ARM instruction words.
56    Cache lines are usually 32 byte aligned. */
57 #define CONST_POOL_ALIGNMENT	8
58 #define CONST_POOL_EMPTY	0xffffffff
59 
60 #define ALIGN_INSTRUCTION(ptr) \
61 	(sljit_ins*)(((sljit_ins)(ptr) + (CONST_POOL_ALIGNMENT * sizeof(sljit_ins)) - 1) & ~((CONST_POOL_ALIGNMENT * sizeof(sljit_ins)) - 1))
62 #define MAX_DIFFERENCE(max_diff) \
63 	(((max_diff) / (sljit_s32)sizeof(sljit_ins)) - (CONST_POOL_ALIGNMENT - 1))
64 
65 /* See sljit_emit_enter and sljit_emit_op0 if you want to change them. */
66 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
67 	0, 0, 1, 2, 3, 11, 10, 9, 8, 7, 6, 5, 4, 13, 12, 14, 15
68 };
69 
70 static const sljit_u8 freg_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) + 1] = {
71 	0,
72 	0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8,
73 	7, 6,
74 	0, 1, 2, 3, 4, 5, 15, 14, 13, 12, 11, 10, 9, 8,
75 	7, 6
76 };
77 
78 static const sljit_u8 freg_ebit_map[((SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2) << 1) + 1] = {
79 	0,
80 	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
81 	0, 0,
82 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
83 	1, 1
84 };
85 
86 #define RM(rm) ((sljit_ins)reg_map[rm])
87 #define RM8(rm) ((sljit_ins)reg_map[rm] << 8)
88 #define RD(rd) ((sljit_ins)reg_map[rd] << 12)
89 #define RN(rn) ((sljit_ins)reg_map[rn] << 16)
90 
91 #define VM(vm) (((sljit_ins)freg_map[vm]) | ((sljit_ins)freg_ebit_map[vm] << 5))
92 #define VD(vd) (((sljit_ins)freg_map[vd] << 12) | ((sljit_ins)freg_ebit_map[vd] << 22))
93 #define VN(vn) (((sljit_ins)freg_map[vn] << 16) | ((sljit_ins)freg_ebit_map[vn] << 7))
94 
95 /* --------------------------------------------------------------------- */
96 /*  Instrucion forms                                                     */
97 /* --------------------------------------------------------------------- */
98 
99 /* The instruction includes the AL condition.
100    INST_NAME - CONDITIONAL remove this flag. */
101 #define COND_MASK	0xf0000000
102 #define CONDITIONAL	0xe0000000
103 #define PUSH_POOL	0xff000000
104 
105 #define ADC		0xe0a00000
106 #define ADD		0xe0800000
107 #define AND		0xe0000000
108 #define B		0xea000000
109 #define BIC		0xe1c00000
110 #define BKPT		0xe1200070
111 #define BL		0xeb000000
112 #define BLX		0xe12fff30
113 #define BX		0xe12fff10
114 #define CLZ		0xe16f0f10
115 #define CMN		0xe1600000
116 #define CMP		0xe1400000
117 #define EOR		0xe0200000
118 #define LDR		0xe5100000
119 #define LDR_POST	0xe4100000
120 #define LDREX		0xe1900f9f
121 #define LDREXB		0xe1d00f9f
122 #define LDREXH		0xe1f00f9f
123 #define MLA		0xe0200090
124 #define MOV		0xe1a00000
125 #define MUL		0xe0000090
126 #define MVN		0xe1e00000
127 #define NOP		0xe1a00000
128 #define ORR		0xe1800000
129 #define PUSH		0xe92d0000
130 #define POP		0xe8bd0000
131 #define REV		0xe6bf0f30
132 #define REV16		0xe6bf0fb0
133 #define RSB		0xe0600000
134 #define RSC		0xe0e00000
135 #define SBC		0xe0c00000
136 #define SMULL		0xe0c00090
137 #define STR		0xe5000000
138 #define STREX		0xe1800f90
139 #define STREXB		0xe1c00f90
140 #define STREXH		0xe1e00f90
141 #define SUB		0xe0400000
142 #define SXTB		0xe6af0070
143 #define SXTH		0xe6bf0070
144 #define TST		0xe1000000
145 #define UMULL		0xe0800090
146 #define UXTB		0xe6ef0070
147 #define UXTH		0xe6ff0070
148 #define VABS_F32	0xeeb00ac0
149 #define VADD_F32	0xee300a00
150 #define VAND		0xf2000110
151 #define VCMP_F32	0xeeb40a40
152 #define VCVT_F32_S32	0xeeb80ac0
153 #define VCVT_F32_U32	0xeeb80a40
154 #define VCVT_F64_F32	0xeeb70ac0
155 #define VCVT_S32_F32	0xeebd0ac0
156 #define VDIV_F32	0xee800a00
157 #define VDUP		0xee800b10
158 #define VDUP_s		0xf3b00c00
159 #define VEOR		0xf3000110
160 #define VLD1		0xf4200000
161 #define VLD1_r		0xf4a00c00
162 #define VLD1_s		0xf4a00000
163 #define VLDR_F32	0xed100a00
164 #define VMOV_F32	0xeeb00a40
165 #define VMOV		0xee000a10
166 #define VMOV2		0xec400a10
167 #define VMOV_i		0xf2800010
168 #define VMOV_s		0xee000b10
169 #define VMOVN		0xf3b20200
170 #define VMRS		0xeef1fa10
171 #define VMUL_F32	0xee200a00
172 #define VNEG_F32	0xeeb10a40
173 #define VORR		0xf2200110
174 #define VPOP		0xecbd0b00
175 #define VPUSH		0xed2d0b00
176 #define VSHLL		0xf2800a10
177 #define VSHR		0xf2800010
178 #define VSRA		0xf2800110
179 #define VST1		0xf4000000
180 #define VST1_s		0xf4800000
181 #define VSTR_F32	0xed000a00
182 #define VSUB_F32	0xee300a40
183 
184 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
185 /* Arm v7 specific instructions. */
186 #define MOVT		0xe3400000
187 #define MOVW		0xe3000000
188 #define RBIT		0xe6ff0f30
189 #endif
190 
191 #if (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
192 
function_check_is_freg(struct sljit_compiler * compiler,sljit_s32 fr,sljit_s32 is_32)193 static sljit_s32 function_check_is_freg(struct sljit_compiler *compiler, sljit_s32 fr, sljit_s32 is_32)
194 {
195 	if (compiler->scratches == -1)
196 		return 0;
197 
198 	if (is_32 && fr >= SLJIT_F64_SECOND(SLJIT_FR0))
199 		fr -= SLJIT_F64_SECOND(0);
200 
201 	return (fr >= SLJIT_FR0 && fr < (SLJIT_FR0 + compiler->fscratches))
202 		|| (fr > (SLJIT_FS0 - compiler->fsaveds) && fr <= SLJIT_FS0)
203 		|| (fr >= SLJIT_TMP_FREGISTER_BASE && fr < (SLJIT_TMP_FREGISTER_BASE + SLJIT_NUMBER_OF_TEMPORARY_FLOAT_REGISTERS));
204 }
205 
206 #endif /* SLJIT_ARGUMENT_CHECKS */
207 
208 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
209 
push_cpool(struct sljit_compiler * compiler)210 static sljit_s32 push_cpool(struct sljit_compiler *compiler)
211 {
212 	/* Pushing the constant pool into the instruction stream. */
213 	sljit_ins* inst;
214 	sljit_uw* cpool_ptr;
215 	sljit_uw* cpool_end;
216 	sljit_s32 i;
217 
218 	/* The label could point the address after the constant pool. */
219 	if (compiler->last_label && compiler->last_label->size == compiler->size)
220 		compiler->last_label->size += compiler->cpool_fill + (CONST_POOL_ALIGNMENT - 1) + 1;
221 
222 	SLJIT_ASSERT(compiler->cpool_fill > 0 && compiler->cpool_fill <= CPOOL_SIZE);
223 	inst = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
224 	FAIL_IF(!inst);
225 	compiler->size++;
226 	*inst = 0xff000000 | compiler->cpool_fill;
227 
228 	for (i = 0; i < CONST_POOL_ALIGNMENT - 1; i++) {
229 		inst = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
230 		FAIL_IF(!inst);
231 		compiler->size++;
232 		*inst = 0;
233 	}
234 
235 	cpool_ptr = compiler->cpool;
236 	cpool_end = cpool_ptr + compiler->cpool_fill;
237 	while (cpool_ptr < cpool_end) {
238 		inst = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
239 		FAIL_IF(!inst);
240 		compiler->size++;
241 		*inst = *cpool_ptr++;
242 	}
243 	compiler->cpool_diff = CONST_POOL_EMPTY;
244 	compiler->cpool_fill = 0;
245 	return SLJIT_SUCCESS;
246 }
247 
push_inst(struct sljit_compiler * compiler,sljit_ins inst)248 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins inst)
249 {
250 	sljit_ins* ptr;
251 
252 	if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)))
253 		FAIL_IF(push_cpool(compiler));
254 
255 	ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
256 	FAIL_IF(!ptr);
257 	compiler->size++;
258 	*ptr = inst;
259 	return SLJIT_SUCCESS;
260 }
261 
push_inst_with_literal(struct sljit_compiler * compiler,sljit_ins inst,sljit_uw literal)262 static sljit_s32 push_inst_with_literal(struct sljit_compiler *compiler, sljit_ins inst, sljit_uw literal)
263 {
264 	sljit_ins* ptr;
265 	sljit_uw cpool_index = CPOOL_SIZE;
266 	sljit_uw* cpool_ptr;
267 	sljit_uw* cpool_end;
268 	sljit_u8* cpool_unique_ptr;
269 
270 	if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)))
271 		FAIL_IF(push_cpool(compiler));
272 	else if (compiler->cpool_fill > 0) {
273 		cpool_ptr = compiler->cpool;
274 		cpool_end = cpool_ptr + compiler->cpool_fill;
275 		cpool_unique_ptr = compiler->cpool_unique;
276 		do {
277 			if ((*cpool_ptr == literal) && !(*cpool_unique_ptr)) {
278 				cpool_index = (sljit_uw)(cpool_ptr - compiler->cpool);
279 				break;
280 			}
281 			cpool_ptr++;
282 			cpool_unique_ptr++;
283 		} while (cpool_ptr < cpool_end);
284 	}
285 
286 	if (cpool_index == CPOOL_SIZE) {
287 		/* Must allocate a new entry in the literal pool. */
288 		if (compiler->cpool_fill < CPOOL_SIZE) {
289 			cpool_index = compiler->cpool_fill;
290 			compiler->cpool_fill++;
291 		}
292 		else {
293 			FAIL_IF(push_cpool(compiler));
294 			cpool_index = 0;
295 			compiler->cpool_fill = 1;
296 		}
297 	}
298 
299 	SLJIT_ASSERT((inst & 0xfff) == 0);
300 	ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
301 	FAIL_IF(!ptr);
302 	compiler->size++;
303 	*ptr = inst | cpool_index;
304 
305 	compiler->cpool[cpool_index] = literal;
306 	compiler->cpool_unique[cpool_index] = 0;
307 	if (compiler->cpool_diff == CONST_POOL_EMPTY)
308 		compiler->cpool_diff = compiler->size;
309 	return SLJIT_SUCCESS;
310 }
311 
push_inst_with_unique_literal(struct sljit_compiler * compiler,sljit_ins inst,sljit_uw literal)312 static sljit_s32 push_inst_with_unique_literal(struct sljit_compiler *compiler, sljit_ins inst, sljit_uw literal)
313 {
314 	sljit_ins* ptr;
315 
316 	if (SLJIT_UNLIKELY((compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4092)) || compiler->cpool_fill >= CPOOL_SIZE))
317 		FAIL_IF(push_cpool(compiler));
318 
319 	SLJIT_ASSERT(compiler->cpool_fill < CPOOL_SIZE && (inst & 0xfff) == 0);
320 	ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
321 	FAIL_IF(!ptr);
322 	compiler->size++;
323 	*ptr = inst | compiler->cpool_fill;
324 
325 	compiler->cpool[compiler->cpool_fill] = literal;
326 	compiler->cpool_unique[compiler->cpool_fill] = 1;
327 	compiler->cpool_fill++;
328 	if (compiler->cpool_diff == CONST_POOL_EMPTY)
329 		compiler->cpool_diff = compiler->size;
330 	return SLJIT_SUCCESS;
331 }
332 
prepare_blx(struct sljit_compiler * compiler)333 static SLJIT_INLINE sljit_s32 prepare_blx(struct sljit_compiler *compiler)
334 {
335 	/* Place for at least two instruction (doesn't matter whether the first has a literal). */
336 	if (SLJIT_UNLIKELY(compiler->cpool_diff != CONST_POOL_EMPTY && compiler->size - compiler->cpool_diff >= MAX_DIFFERENCE(4088)))
337 		return push_cpool(compiler);
338 	return SLJIT_SUCCESS;
339 }
340 
emit_blx(struct sljit_compiler * compiler)341 static SLJIT_INLINE sljit_s32 emit_blx(struct sljit_compiler *compiler)
342 {
343 	/* Must follow tightly the previous instruction (to be able to convert it to bl instruction). */
344 	SLJIT_ASSERT(compiler->cpool_diff == CONST_POOL_EMPTY || compiler->size - compiler->cpool_diff < MAX_DIFFERENCE(4092));
345 	SLJIT_ASSERT(reg_map[TMP_REG1] != 14);
346 
347 	return push_inst(compiler, BLX | RM(TMP_REG1));
348 }
349 
patch_pc_relative_loads(sljit_uw * last_pc_patch,sljit_uw * code_ptr,sljit_uw * const_pool,sljit_uw cpool_size)350 static sljit_uw patch_pc_relative_loads(sljit_uw *last_pc_patch, sljit_uw *code_ptr, sljit_uw* const_pool, sljit_uw cpool_size)
351 {
352 	sljit_uw diff;
353 	sljit_uw ind;
354 	sljit_uw counter = 0;
355 	sljit_uw* clear_const_pool = const_pool;
356 	sljit_uw* clear_const_pool_end = const_pool + cpool_size;
357 
358 	SLJIT_ASSERT(const_pool - code_ptr <= CONST_POOL_ALIGNMENT);
359 	/* Set unused flag for all literals in the constant pool.
360 	   I.e.: unused literals can belong to branches, which can be encoded as B or BL.
361 	   We can "compress" the constant pool by discarding these literals. */
362 	while (clear_const_pool < clear_const_pool_end)
363 		*clear_const_pool++ = (sljit_uw)(-1);
364 
365 	while (last_pc_patch < code_ptr) {
366 		/* Data transfer instruction with Rn == r15. */
367 		if ((*last_pc_patch & 0x0e0f0000) == 0x040f0000) {
368 			diff = (sljit_uw)(const_pool - last_pc_patch);
369 			ind = (*last_pc_patch) & 0xfff;
370 
371 			/* Must be a load instruction with immediate offset. */
372 			SLJIT_ASSERT(ind < cpool_size && !(*last_pc_patch & (1 << 25)) && (*last_pc_patch & (1 << 20)));
373 			if ((sljit_s32)const_pool[ind] < 0) {
374 				const_pool[ind] = counter;
375 				ind = counter;
376 				counter++;
377 			}
378 			else
379 				ind = const_pool[ind];
380 
381 			SLJIT_ASSERT(diff >= 1);
382 			if (diff >= 2 || ind > 0) {
383 				diff = (diff + (sljit_uw)ind - 2) << 2;
384 				SLJIT_ASSERT(diff <= 0xfff);
385 				*last_pc_patch = (*last_pc_patch & ~(sljit_uw)0xfff) | diff;
386 			}
387 			else
388 				*last_pc_patch = (*last_pc_patch & ~(sljit_uw)(0xfff | (1 << 23))) | 0x004;
389 		}
390 		last_pc_patch++;
391 	}
392 	return counter;
393 }
394 
395 /* In some rare ocasions we may need future patches. The probability is close to 0 in practice. */
396 struct future_patch {
397 	struct future_patch* next;
398 	sljit_s32 index;
399 	sljit_s32 value;
400 };
401 
resolve_const_pool_index(struct sljit_compiler * compiler,struct future_patch ** first_patch,sljit_uw cpool_current_index,sljit_uw * cpool_start_address,sljit_uw * buf_ptr)402 static sljit_s32 resolve_const_pool_index(struct sljit_compiler *compiler, struct future_patch **first_patch, sljit_uw cpool_current_index, sljit_uw *cpool_start_address, sljit_uw *buf_ptr)
403 {
404 	sljit_u32 value;
405 	struct future_patch *curr_patch, *prev_patch;
406 
407 	SLJIT_UNUSED_ARG(compiler);
408 
409 	/* Using the values generated by patch_pc_relative_loads. */
410 	if (!*first_patch)
411 		value = cpool_start_address[cpool_current_index];
412 	else {
413 		curr_patch = *first_patch;
414 		prev_patch = NULL;
415 		while (1) {
416 			if (!curr_patch) {
417 				value = cpool_start_address[cpool_current_index];
418 				break;
419 			}
420 			if ((sljit_uw)curr_patch->index == cpool_current_index) {
421 				value = (sljit_uw)curr_patch->value;
422 				if (prev_patch)
423 					prev_patch->next = curr_patch->next;
424 				else
425 					*first_patch = curr_patch->next;
426 				SLJIT_FREE(curr_patch, compiler->allocator_data);
427 				break;
428 			}
429 			prev_patch = curr_patch;
430 			curr_patch = curr_patch->next;
431 		}
432 	}
433 
434 	if ((sljit_sw)value >= 0) {
435 		if (value > cpool_current_index) {
436 			curr_patch = (struct future_patch*)SLJIT_MALLOC(sizeof(struct future_patch), compiler->allocator_data);
437 			if (!curr_patch) {
438 				while (*first_patch) {
439 					curr_patch = *first_patch;
440 					*first_patch = (*first_patch)->next;
441 					SLJIT_FREE(curr_patch, compiler->allocator_data);
442 				}
443 				return SLJIT_ERR_ALLOC_FAILED;
444 			}
445 			curr_patch->next = *first_patch;
446 			curr_patch->index = (sljit_sw)value;
447 			curr_patch->value = (sljit_sw)cpool_start_address[value];
448 			*first_patch = curr_patch;
449 		}
450 		cpool_start_address[value] = *buf_ptr;
451 	}
452 	return SLJIT_SUCCESS;
453 }
454 
455 #else
456 
push_inst(struct sljit_compiler * compiler,sljit_ins inst)457 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins inst)
458 {
459 	sljit_ins* ptr;
460 
461 	ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
462 	FAIL_IF(!ptr);
463 	compiler->size++;
464 	*ptr = inst;
465 	return SLJIT_SUCCESS;
466 }
467 
emit_imm(struct sljit_compiler * compiler,sljit_s32 reg,sljit_sw imm)468 static SLJIT_INLINE sljit_s32 emit_imm(struct sljit_compiler *compiler, sljit_s32 reg, sljit_sw imm)
469 {
470 	FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | ((sljit_u32)imm & 0xfff)));
471 	return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | (((sljit_u32)imm >> 16) & 0xfff));
472 }
473 
474 #endif
475 
detect_jump_type(struct sljit_jump * jump,sljit_uw * code_ptr,sljit_uw * code,sljit_sw executable_offset)476 static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_uw *code_ptr, sljit_uw *code, sljit_sw executable_offset)
477 {
478 	sljit_sw diff;
479 
480 	if (jump->flags & SLJIT_REWRITABLE_JUMP)
481 		return 0;
482 
483 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
484 	if (jump->flags & IS_BL)
485 		code_ptr--;
486 #endif /* SLJIT_CONFIG_ARM_V6 */
487 
488 	if (jump->flags & JUMP_ADDR)
489 		diff = ((sljit_sw)jump->u.target - (sljit_sw)(code_ptr + 2) - executable_offset);
490 	else {
491 		SLJIT_ASSERT(jump->u.label != NULL);
492 		diff = ((sljit_sw)(code + jump->u.label->size) - (sljit_sw)(code_ptr + 2));
493 	}
494 
495 	/* Branch to Thumb code has not been optimized yet. */
496 	if (diff & 0x3)
497 		return 0;
498 
499 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
500 	if (jump->flags & IS_BL) {
501 		if (diff <= 0x01ffffff && diff >= -0x02000000) {
502 			*code_ptr = (BL - CONDITIONAL) | (*(code_ptr + 1) & COND_MASK);
503 			jump->flags |= PATCH_B;
504 			return 1;
505 		}
506 	}
507 	else {
508 		if (diff <= 0x01ffffff && diff >= -0x02000000) {
509 			*code_ptr = (B - CONDITIONAL) | (*code_ptr & COND_MASK);
510 			jump->flags |= PATCH_B;
511 		}
512 	}
513 #else /* !SLJIT_CONFIG_ARM_V6 */
514 	if (diff <= 0x01ffffff && diff >= -0x02000000) {
515 		*code_ptr = ((jump->flags & IS_BL) ? (BL - CONDITIONAL) : (B - CONDITIONAL)) | (*code_ptr & COND_MASK);
516 		jump->flags |= PATCH_B;
517 		return 1;
518 	}
519 #endif /* SLJIT_CONFIG_ARM_V6 */
520 	return 0;
521 }
522 
set_jump_addr(sljit_uw jump_ptr,sljit_sw executable_offset,sljit_uw new_addr,sljit_s32 flush_cache)523 static void set_jump_addr(sljit_uw jump_ptr, sljit_sw executable_offset, sljit_uw new_addr, sljit_s32 flush_cache)
524 {
525 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
526 	sljit_ins *ptr = (sljit_ins*)jump_ptr;
527 	sljit_ins *inst = (sljit_ins*)ptr[0];
528 	sljit_ins mov_pc = ptr[1];
529 	sljit_s32 bl = (mov_pc & 0x0000f000) != RD(TMP_PC);
530 	sljit_sw diff = (sljit_sw)(((sljit_sw)new_addr - (sljit_sw)(inst + 2) - executable_offset) >> 2);
531 
532 	SLJIT_UNUSED_ARG(executable_offset);
533 
534 	if (diff <= 0x7fffff && diff >= -0x800000) {
535 		/* Turn to branch. */
536 		if (!bl) {
537 			if (flush_cache) {
538 				SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 0);
539 			}
540 			inst[0] = (mov_pc & COND_MASK) | (B - CONDITIONAL) | (diff & 0xffffff);
541 			if (flush_cache) {
542 				SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1);
543 				inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
544 				SLJIT_CACHE_FLUSH(inst, inst + 1);
545 			}
546 		} else {
547 			if (flush_cache) {
548 				SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0);
549 			}
550 			inst[0] = (mov_pc & COND_MASK) | (BL - CONDITIONAL) | (diff & 0xffffff);
551 			inst[1] = NOP;
552 			if (flush_cache) {
553 				SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1);
554 				inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
555 				SLJIT_CACHE_FLUSH(inst, inst + 2);
556 			}
557 		}
558 	} else {
559 		/* Get the position of the constant. */
560 		if (mov_pc & (1 << 23))
561 			ptr = inst + ((mov_pc & 0xfff) >> 2) + 2;
562 		else
563 			ptr = inst + 1;
564 
565 		if (*inst != mov_pc) {
566 			if (flush_cache) {
567 				SLJIT_UPDATE_WX_FLAGS(inst, inst + (!bl ? 1 : 2), 0);
568 			}
569 			inst[0] = mov_pc;
570 			if (!bl) {
571 				if (flush_cache) {
572 					SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1);
573 					inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
574 					SLJIT_CACHE_FLUSH(inst, inst + 1);
575 				}
576 			} else {
577 				inst[1] = BLX | RM(TMP_REG1);
578 				if (flush_cache) {
579 					SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1);
580 					inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
581 					SLJIT_CACHE_FLUSH(inst, inst + 2);
582 				}
583 			}
584 		}
585 
586 		if (flush_cache) {
587 			SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 0);
588 		}
589 
590 		*ptr = new_addr;
591 
592 		if (flush_cache) {
593 			SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1);
594 		}
595 	}
596 #else /* !SLJIT_CONFIG_ARM_V6 */
597 	sljit_ins *inst = (sljit_ins*)jump_ptr;
598 
599 	SLJIT_UNUSED_ARG(executable_offset);
600 
601 	SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT);
602 
603 	if (flush_cache) {
604 		SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0);
605 	}
606 
607 	inst[0] = MOVW | (inst[0] & 0xf000) | ((new_addr << 4) & 0xf0000) | (new_addr & 0xfff);
608 	inst[1] = MOVT | (inst[1] & 0xf000) | ((new_addr >> 12) & 0xf0000) | ((new_addr >> 16) & 0xfff);
609 
610 	if (flush_cache) {
611 		SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1);
612 		inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
613 		SLJIT_CACHE_FLUSH(inst, inst + 2);
614 	}
615 #endif /* SLJIT_CONFIG_ARM_V6 */
616 }
617 
618 static sljit_uw get_imm(sljit_uw imm);
619 static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm);
620 static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg);
621 
set_const_value(sljit_uw addr,sljit_sw executable_offset,sljit_uw new_constant,sljit_s32 flush_cache)622 static void set_const_value(sljit_uw addr, sljit_sw executable_offset, sljit_uw new_constant, sljit_s32 flush_cache)
623 {
624 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
625 	sljit_ins *ptr = (sljit_ins*)addr;
626 	sljit_ins *inst = (sljit_ins*)ptr[0];
627 	sljit_uw ldr_literal = ptr[1];
628 	sljit_uw src2;
629 
630 	SLJIT_UNUSED_ARG(executable_offset);
631 
632 	src2 = get_imm(new_constant);
633 	if (src2) {
634 		if (flush_cache) {
635 			SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 0);
636 		}
637 
638 		*inst = 0xe3a00000 | (ldr_literal & 0xf000) | src2;
639 
640 		if (flush_cache) {
641 			SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1);
642 			inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
643 			SLJIT_CACHE_FLUSH(inst, inst + 1);
644 		}
645 		return;
646 	}
647 
648 	src2 = get_imm(~new_constant);
649 	if (src2) {
650 		if (flush_cache) {
651 			SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 0);
652 		}
653 
654 		*inst = 0xe3e00000 | (ldr_literal & 0xf000) | src2;
655 
656 		if (flush_cache) {
657 			SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1);
658 			inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
659 			SLJIT_CACHE_FLUSH(inst, inst + 1);
660 		}
661 		return;
662 	}
663 
664 	if (ldr_literal & (1 << 23))
665 		ptr = inst + ((ldr_literal & 0xfff) >> 2) + 2;
666 	else
667 		ptr = inst + 1;
668 
669 	if (*inst != ldr_literal) {
670 		if (flush_cache) {
671 			SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 0);
672 		}
673 
674 		*inst = ldr_literal;
675 
676 		if (flush_cache) {
677 			SLJIT_UPDATE_WX_FLAGS(inst, inst + 1, 1);
678 			inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
679 			SLJIT_CACHE_FLUSH(inst, inst + 1);
680 		}
681 	}
682 
683 	if (flush_cache) {
684 		SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 0);
685 	}
686 
687 	*ptr = new_constant;
688 
689 	if (flush_cache) {
690 		SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1);
691 	}
692 #else /* !SLJIT_CONFIG_ARM_V6 */
693 	sljit_ins *inst = (sljit_ins*)addr;
694 
695 	SLJIT_UNUSED_ARG(executable_offset);
696 
697 	SLJIT_ASSERT((inst[0] & 0xfff00000) == MOVW && (inst[1] & 0xfff00000) == MOVT);
698 
699 	if (flush_cache) {
700 		SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 0);
701 	}
702 
703 	inst[0] = MOVW | (inst[0] & 0xf000) | ((new_constant << 4) & 0xf0000) | (new_constant & 0xfff);
704 	inst[1] = MOVT | (inst[1] & 0xf000) | ((new_constant >> 12) & 0xf0000) | ((new_constant >> 16) & 0xfff);
705 
706 	if (flush_cache) {
707 		SLJIT_UPDATE_WX_FLAGS(inst, inst + 2, 1);
708 		inst = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
709 		SLJIT_CACHE_FLUSH(inst, inst + 2);
710 	}
711 #endif /* SLJIT_CONFIG_ARM_V6 */
712 }
713 
mov_addr_get_length(struct sljit_jump * jump,sljit_ins * code_ptr,sljit_ins * code,sljit_sw executable_offset)714 static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
715 {
716 	sljit_uw addr;
717 	sljit_sw diff;
718 	SLJIT_UNUSED_ARG(executable_offset);
719 
720 	if (jump->flags & JUMP_ADDR)
721 		addr = jump->u.target;
722 	else
723 		addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset);
724 
725 	/* The pc+8 offset is represented by the 2 * SSIZE_OF(ins) below. */
726 	diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
727 
728 	if ((diff & 0x3) == 0 && diff <= (0x3fc + 2 * SSIZE_OF(ins)) && diff >= (-0x3fc + 2 * SSIZE_OF(ins))) {
729 		jump->flags |= PATCH_B;
730 		return 0;
731 	}
732 
733 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
734 	return 0;
735 #else /* !SLJIT_CONFIG_ARM_V6 */
736 	return 1;
737 #endif /* SLJIT_CONFIG_ARM_V6 */
738 }
739 
740 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
741 
reduce_code_size(struct sljit_compiler * compiler)742 static void reduce_code_size(struct sljit_compiler *compiler)
743 {
744 	struct sljit_label *label;
745 	struct sljit_jump *jump;
746 	struct sljit_const *const_;
747 	SLJIT_NEXT_DEFINE_TYPES;
748 	sljit_uw total_size;
749 	sljit_uw size_reduce = 0;
750 	sljit_sw diff;
751 
752 	label = compiler->labels;
753 	jump = compiler->jumps;
754 	const_ = compiler->consts;
755 	SLJIT_NEXT_INIT_TYPES();
756 
757 	while (1) {
758 		SLJIT_GET_NEXT_MIN();
759 
760 		if (next_min_addr == SLJIT_MAX_ADDRESS)
761 			break;
762 
763 		if (next_min_addr == next_label_size) {
764 			label->size -= size_reduce;
765 
766 			label = label->next;
767 			next_label_size = SLJIT_GET_NEXT_SIZE(label);
768 		}
769 
770 		if (next_min_addr == next_const_addr) {
771 			const_->addr -= size_reduce;
772 			const_ = const_->next;
773 			next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);
774 			continue;
775 		}
776 
777 		if (next_min_addr != next_jump_addr)
778 			continue;
779 
780 		jump->addr -= size_reduce;
781 		if (!(jump->flags & JUMP_MOV_ADDR)) {
782 			total_size = JUMP_MAX_SIZE - 1;
783 
784 			if (!(jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR))) {
785 				/* Unit size: instruction. */
786 				diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr - 2;
787 
788 				if (diff <= (0x01ffffff / SSIZE_OF(ins)) && diff >= (-0x02000000 / SSIZE_OF(ins)))
789 					total_size = 1 - 1;
790 			}
791 
792 			size_reduce += JUMP_MAX_SIZE - 1 - total_size;
793 		} else {
794 			/* Real size minus 1. Unit size: instruction. */
795 			total_size = 1;
796 
797 			if (!(jump->flags & JUMP_ADDR)) {
798 				diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr;
799 				if (diff <= 0xff + 2 && diff >= -0xff + 2)
800 					total_size = 0;
801 			}
802 
803 			size_reduce += 1 - total_size;
804 		}
805 
806 		jump->flags |= total_size << JUMP_SIZE_SHIFT;
807 		jump = jump->next;
808 		next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
809 	}
810 
811 	compiler->size -= size_reduce;
812 }
813 
814 #endif /* SLJIT_CONFIG_ARM_V7 */
815 
sljit_generate_code(struct sljit_compiler * compiler,sljit_s32 options,void * exec_allocator_data)816 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data)
817 {
818 	struct sljit_memory_fragment *buf;
819 	sljit_ins *code;
820 	sljit_ins *code_ptr;
821 	sljit_ins *buf_ptr;
822 	sljit_ins *buf_end;
823 	sljit_uw word_count;
824 	SLJIT_NEXT_DEFINE_TYPES;
825 	sljit_sw executable_offset;
826 	sljit_uw addr;
827 	sljit_sw diff;
828 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
829 	sljit_uw cpool_size;
830 	sljit_uw cpool_skip_alignment;
831 	sljit_uw cpool_current_index;
832 	sljit_ins *cpool_start_address;
833 	sljit_ins *last_pc_patch;
834 	struct future_patch *first_patch;
835 #endif
836 
837 	struct sljit_label *label;
838 	struct sljit_jump *jump;
839 	struct sljit_const *const_;
840 
841 	CHECK_ERROR_PTR();
842 	CHECK_PTR(check_sljit_generate_code(compiler));
843 
844 	/* Second code generation pass. */
845 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
846 	compiler->size += (compiler->patches << 1);
847 	if (compiler->cpool_fill > 0)
848 		compiler->size += compiler->cpool_fill + CONST_POOL_ALIGNMENT - 1;
849 #else /* !SLJIT_CONFIG_ARM_V6 */
850 	reduce_code_size(compiler);
851 #endif /* SLJIT_CONFIG_ARM_V6 */
852 	code = (sljit_ins*)allocate_executable_memory(compiler->size * sizeof(sljit_ins), options, exec_allocator_data, &executable_offset);
853 	PTR_FAIL_WITH_EXEC_IF(code);
854 
855 	reverse_buf(compiler);
856 	buf = compiler->buf;
857 
858 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
859 	cpool_size = 0;
860 	cpool_skip_alignment = 0;
861 	cpool_current_index = 0;
862 	cpool_start_address = NULL;
863 	first_patch = NULL;
864 	last_pc_patch = code;
865 #endif /* SLJIT_CONFIG_ARM_V6 */
866 
867 	code_ptr = code;
868 	word_count = 0;
869 	label = compiler->labels;
870 	jump = compiler->jumps;
871 	const_ = compiler->consts;
872 	SLJIT_NEXT_INIT_TYPES();
873 	SLJIT_GET_NEXT_MIN();
874 
875 	do {
876 		buf_ptr = (sljit_ins*)buf->memory;
877 		buf_end = buf_ptr + (buf->used_size >> 2);
878 		do {
879 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
880 			if (cpool_size > 0) {
881 				if (cpool_skip_alignment > 0) {
882 					buf_ptr++;
883 					cpool_skip_alignment--;
884 				} else {
885 					if (SLJIT_UNLIKELY(resolve_const_pool_index(compiler, &first_patch, cpool_current_index, cpool_start_address, buf_ptr))) {
886 						SLJIT_FREE_EXEC(code, exec_allocator_data);
887 						compiler->error = SLJIT_ERR_ALLOC_FAILED;
888 						return NULL;
889 					}
890 					buf_ptr++;
891 					if (++cpool_current_index >= cpool_size) {
892 						SLJIT_ASSERT(!first_patch);
893 						cpool_size = 0;
894 					}
895 				}
896 			} else if ((*buf_ptr & 0xff000000) != PUSH_POOL) {
897 #endif /* SLJIT_CONFIG_ARM_V6 */
898 				*code_ptr = *buf_ptr++;
899 				if (next_min_addr == word_count) {
900 					SLJIT_ASSERT(!label || label->size >= word_count);
901 					SLJIT_ASSERT(!jump || jump->addr >= word_count);
902 					SLJIT_ASSERT(!const_ || const_->addr >= word_count);
903 
904 					if (next_min_addr == next_label_size) {
905 						label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
906 						label->size = (sljit_uw)(code_ptr - code);
907 						label = label->next;
908 						next_label_size = SLJIT_GET_NEXT_SIZE(label);
909 					}
910 
911 					/* These structures are ordered by their address. */
912 					if (next_min_addr == next_jump_addr) {
913 						if (!(jump->flags & JUMP_MOV_ADDR)) {
914 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
915 							if (detect_jump_type(jump, code_ptr, code, executable_offset))
916 								code_ptr--;
917 							jump->addr = (sljit_uw)code_ptr;
918 #else /* !SLJIT_CONFIG_ARM_V6 */
919 							word_count += jump->flags >> JUMP_SIZE_SHIFT;
920 							jump->addr = (sljit_uw)code_ptr;
921 							if (!detect_jump_type(jump, code_ptr, code, executable_offset)) {
922 								code_ptr[2] = code_ptr[0];
923 								addr = ((code_ptr[0] & 0xf) << 12);
924 								code_ptr[0] = MOVW | addr;
925 								code_ptr[1] = MOVT | addr;
926 								code_ptr += 2;
927 							}
928 							SLJIT_ASSERT((sljit_uw)code_ptr - jump->addr <= (jump->flags >> JUMP_SIZE_SHIFT) * sizeof(sljit_ins));
929 #endif /* SLJIT_CONFIG_ARM_V6 */
930 						} else {
931 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
932 							word_count += jump->flags >> JUMP_SIZE_SHIFT;
933 #endif /* SLJIT_CONFIG_ARM_V7 */
934 							addr = (sljit_uw)code_ptr;
935 							code_ptr += mov_addr_get_length(jump, code_ptr, code, executable_offset);
936 							jump->addr = addr;
937 						}
938 						jump = jump->next;
939 						next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
940 					} else if (next_min_addr == next_const_addr) {
941 						const_->addr = (sljit_uw)code_ptr;
942 						const_ = const_->next;
943 						next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);
944 					}
945 
946 					SLJIT_GET_NEXT_MIN();
947 				}
948 				code_ptr++;
949 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
950 			} else {
951 				/* Fortunately, no need to shift. */
952 				cpool_size = *buf_ptr++ & ~PUSH_POOL;
953 				SLJIT_ASSERT(cpool_size > 0);
954 				cpool_start_address = ALIGN_INSTRUCTION(code_ptr + 1);
955 				cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, cpool_size);
956 				if (cpool_current_index > 0) {
957 					/* Unconditional branch. */
958 					*code_ptr = B | (((sljit_ins)(cpool_start_address - code_ptr) + cpool_current_index - 2) & ~PUSH_POOL);
959 					code_ptr = (sljit_ins*)(cpool_start_address + cpool_current_index);
960 				}
961 				cpool_skip_alignment = CONST_POOL_ALIGNMENT - 1;
962 				cpool_current_index = 0;
963 				last_pc_patch = code_ptr;
964 			}
965 #endif /* SLJIT_CONFIG_ARM_V6 */
966 			word_count++;
967 		} while (buf_ptr < buf_end);
968 		buf = buf->next;
969 	} while (buf);
970 
971 	if (label && label->size == word_count) {
972 		label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
973 		label->size = (sljit_uw)(code_ptr - code);
974 		label = label->next;
975 	}
976 
977 	SLJIT_ASSERT(!label);
978 	SLJIT_ASSERT(!jump);
979 	SLJIT_ASSERT(!const_);
980 
981 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
982 	SLJIT_ASSERT(cpool_size == 0);
983 	if (compiler->cpool_fill > 0) {
984 		cpool_start_address = ALIGN_INSTRUCTION(code_ptr);
985 		cpool_current_index = patch_pc_relative_loads(last_pc_patch, code_ptr, cpool_start_address, compiler->cpool_fill);
986 		if (cpool_current_index > 0)
987 			code_ptr = (sljit_ins*)(cpool_start_address + cpool_current_index);
988 
989 		buf_ptr = compiler->cpool;
990 		buf_end = buf_ptr + compiler->cpool_fill;
991 		cpool_current_index = 0;
992 		while (buf_ptr < buf_end) {
993 			if (SLJIT_UNLIKELY(resolve_const_pool_index(compiler, &first_patch, cpool_current_index, cpool_start_address, buf_ptr))) {
994 				SLJIT_FREE_EXEC(code, exec_allocator_data);
995 				compiler->error = SLJIT_ERR_ALLOC_FAILED;
996 				return NULL;
997 			}
998 			buf_ptr++;
999 			cpool_current_index++;
1000 		}
1001 		SLJIT_ASSERT(!first_patch);
1002 	}
1003 #endif
1004 
1005 	jump = compiler->jumps;
1006 	while (jump) {
1007 		addr = (jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr;
1008 		buf_ptr = (sljit_ins*)jump->addr;
1009 
1010 		if (jump->flags & JUMP_MOV_ADDR) {
1011 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
1012 			SLJIT_ASSERT((buf_ptr[0] & (sljit_ins)0xffff0000) == 0xe59f0000);
1013 #else /* !SLJIT_CONFIG_ARM_V6 */
1014 			SLJIT_ASSERT((buf_ptr[0] & ~(sljit_ins)0xf000) == 0);
1015 #endif /* SLJIT_CONFIG_ARM_V6 */
1016 
1017 			if (jump->flags & PATCH_B) {
1018 				SLJIT_ASSERT((((sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset)) & 0x3) == 0);
1019 				diff = ((sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset)) >> 2;
1020 
1021 				SLJIT_ASSERT(diff <= 0xff && diff >= -0xff);
1022 
1023 				addr = ADD;
1024 				if (diff < 0) {
1025 					diff = -diff;
1026 					addr = SUB;
1027 				}
1028 
1029 				buf_ptr[0] = addr | (buf_ptr[0] & 0xf000) | RN(TMP_PC) | (1 << 25) | (0xf << 8) | (sljit_ins)(diff & 0xff);
1030 			} else {
1031 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
1032 				buf_ptr[((buf_ptr[0] & 0xfff) >> 2) + 2] = addr;
1033 #else /* !SLJIT_CONFIG_ARM_V6 */
1034 				buf_ptr[1] = MOVT | buf_ptr[0] | ((addr >> 12) & 0xf0000) | ((addr >> 16) & 0xfff);
1035 				buf_ptr[0] = MOVW | buf_ptr[0] | ((addr << 4) & 0xf0000) | (addr & 0xfff);
1036 #endif /* SLJIT_CONFIG_ARM_V6 */
1037 			}
1038 		} else if (jump->flags & PATCH_B) {
1039 			diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr + 2, executable_offset);
1040 			SLJIT_ASSERT(diff <= 0x01ffffff && diff >= -0x02000000);
1041 			*buf_ptr |= (diff >> 2) & 0x00ffffff;
1042 		} else {
1043 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
1044 			if (jump->flags & IS_BL)
1045 				buf_ptr--;
1046 
1047 			if (jump->flags & SLJIT_REWRITABLE_JUMP) {
1048 				jump->addr = (sljit_uw)code_ptr;
1049 				code_ptr[0] = (sljit_ins)buf_ptr;
1050 				code_ptr[1] = *buf_ptr;
1051 				set_jump_addr((sljit_uw)code_ptr, executable_offset, addr, 0);
1052 				code_ptr += 2;
1053 			} else {
1054 				if (*buf_ptr & (1 << 23))
1055 					buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2;
1056 				else
1057 					buf_ptr += 1;
1058 				*buf_ptr = addr;
1059 			}
1060 #else /* !SLJIT_CONFIG_ARM_V6 */
1061 			set_jump_addr((sljit_uw)buf_ptr, executable_offset, addr, 0);
1062 #endif /* SLJIT_CONFIG_ARM_V6 */
1063 		}
1064 
1065 		jump = jump->next;
1066 	}
1067 
1068 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
1069 	const_ = compiler->consts;
1070 	while (const_) {
1071 		buf_ptr = (sljit_ins*)const_->addr;
1072 		const_->addr = (sljit_uw)code_ptr;
1073 
1074 		code_ptr[0] = (sljit_ins)buf_ptr;
1075 		code_ptr[1] = *buf_ptr;
1076 		if (*buf_ptr & (1 << 23))
1077 			buf_ptr += ((*buf_ptr & 0xfff) >> 2) + 2;
1078 		else
1079 			buf_ptr += 1;
1080 		/* Set the value again (can be a simple constant). */
1081 		set_const_value((sljit_uw)code_ptr, executable_offset, *buf_ptr, 0);
1082 		code_ptr += 2;
1083 
1084 		const_ = const_->next;
1085 	}
1086 #endif /* SLJIT_CONFIG_ARM_V6 */
1087 
1088 	SLJIT_ASSERT(code_ptr - code <= (sljit_s32)compiler->size);
1089 
1090 	compiler->error = SLJIT_ERR_COMPILED;
1091 	compiler->executable_offset = executable_offset;
1092 	compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_uw);
1093 
1094 	code = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
1095 	code_ptr = (sljit_ins*)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1096 
1097 	SLJIT_CACHE_FLUSH(code, code_ptr);
1098 	SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
1099 	return code;
1100 }
1101 
sljit_has_cpu_feature(sljit_s32 feature_type)1102 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
1103 {
1104 	switch (feature_type) {
1105 	case SLJIT_HAS_FPU:
1106 	case SLJIT_HAS_F64_AS_F32_PAIR:
1107 #ifdef SLJIT_IS_FPU_AVAILABLE
1108 		return (SLJIT_IS_FPU_AVAILABLE) != 0;
1109 #else
1110 		/* Available by default. */
1111 		return 1;
1112 #endif /* SLJIT_IS_FPU_AVAILABLE */
1113 	case SLJIT_HAS_SIMD:
1114 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
1115 		return 0;
1116 #else
1117 #ifdef SLJIT_IS_FPU_AVAILABLE
1118 		return (SLJIT_IS_FPU_AVAILABLE) != 0;
1119 #else
1120 		/* Available by default. */
1121 		return 1;
1122 #endif /* SLJIT_IS_FPU_AVAILABLE */
1123 #endif /* SLJIT_CONFIG_ARM_V6 */
1124 
1125 	case SLJIT_SIMD_REGS_ARE_PAIRS:
1126 	case SLJIT_HAS_CLZ:
1127 	case SLJIT_HAS_ROT:
1128 	case SLJIT_HAS_CMOV:
1129 	case SLJIT_HAS_REV:
1130 	case SLJIT_HAS_PREFETCH:
1131 	case SLJIT_HAS_COPY_F32:
1132 	case SLJIT_HAS_COPY_F64:
1133 	case SLJIT_HAS_ATOMIC:
1134 		return 1;
1135 
1136 	case SLJIT_HAS_CTZ:
1137 #if defined(SLJIT_CONFIG_ARM_V6) && SLJIT_CONFIG_ARM_V6
1138 		return 2;
1139 #else
1140 		return 1;
1141 #endif /* SLJIT_CONFIG_ARM_V6 */
1142 
1143 	default:
1144 		return 0;
1145 	}
1146 }
1147 
1148 /* --------------------------------------------------------------------- */
1149 /*  Entry, exit                                                          */
1150 /* --------------------------------------------------------------------- */
1151 
1152 /* Creates an index in data_transfer_insts array. */
1153 #define WORD_SIZE	0x00
1154 #define BYTE_SIZE	0x01
1155 #define HALF_SIZE	0x02
1156 #define PRELOAD		0x03
1157 #define SIGNED		0x04
1158 #define LOAD_DATA	0x08
1159 
1160 /* Flag bits for emit_op. */
1161 #define ALLOW_IMM		0x10
1162 #define ALLOW_INV_IMM		0x20
1163 #define ALLOW_ANY_IMM		(ALLOW_IMM | ALLOW_INV_IMM)
1164 #define ALLOW_NEG_IMM		0x40
1165 #define ALLOW_DOUBLE_IMM	0x80
1166 
1167 /* s/l - store/load (1 bit)
1168    u/s - signed/unsigned (1 bit)
1169    w/b/h/N - word/byte/half/NOT allowed (2 bit)
1170    Storing signed and unsigned values are the same operations. */
1171 
1172 static const sljit_ins data_transfer_insts[16] = {
1173 /* s u w */ 0xe5000000 /* str */,
1174 /* s u b */ 0xe5400000 /* strb */,
1175 /* s u h */ 0xe10000b0 /* strh */,
1176 /* s u N */ 0x00000000 /* not allowed */,
1177 /* s s w */ 0xe5000000 /* str */,
1178 /* s s b */ 0xe5400000 /* strb */,
1179 /* s s h */ 0xe10000b0 /* strh */,
1180 /* s s N */ 0x00000000 /* not allowed */,
1181 
1182 /* l u w */ 0xe5100000 /* ldr */,
1183 /* l u b */ 0xe5500000 /* ldrb */,
1184 /* l u h */ 0xe11000b0 /* ldrh */,
1185 /* l u p */ 0xf5500000 /* preload */,
1186 /* l s w */ 0xe5100000 /* ldr */,
1187 /* l s b */ 0xe11000d0 /* ldrsb */,
1188 /* l s h */ 0xe11000f0 /* ldrsh */,
1189 /* l s N */ 0x00000000 /* not allowed */,
1190 };
1191 
1192 #define EMIT_DATA_TRANSFER(type, add, target_reg, base_reg, arg) \
1193 	(data_transfer_insts[(type) & 0xf] | ((add) << 23) | RD(target_reg) | RN(base_reg) | (sljit_ins)(arg))
1194 
1195 /* Normal ldr/str instruction.
1196    Type2: ldrsb, ldrh, ldrsh */
1197 #define IS_TYPE1_TRANSFER(type) \
1198 	(data_transfer_insts[(type) & 0xf] & 0x04000000)
1199 #define TYPE2_TRANSFER_IMM(imm) \
1200 	(((imm) & 0xf) | (((imm) & 0xf0) << 4) | (1 << 22))
1201 
1202 #define EMIT_FPU_OPERATION(opcode, mode, dst, src1, src2) \
1203 	((sljit_ins)(opcode) | (sljit_ins)(mode) | VD(dst) | VM(src1) | VN(src2))
1204 
1205 /* Flags for emit_op: */
1206   /* Arguments are swapped. */
1207 #define ARGS_SWAPPED	0x01
1208   /* Inverted immediate. */
1209 #define INV_IMM		0x02
1210   /* Source and destination is register. */
1211 #define REGISTER_OP	0x04
1212   /* Unused return value. */
1213 #define UNUSED_RETURN	0x08
1214 /* SET_FLAGS must be (1 << 20) as it is also the value of S bit (can be used for optimization). */
1215 #define SET_FLAGS	(1 << 20)
1216 /* dst: reg
1217    src1: reg
1218    src2: reg or imm (if allowed)
1219    SRC2_IMM must be (1 << 25) as it is also the value of I bit (can be used for optimization). */
1220 #define SRC2_IMM	(1 << 25)
1221 
1222 static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 inp_flags,
1223 	sljit_s32 dst, sljit_sw dstw,
1224 	sljit_s32 src1, sljit_sw src1w,
1225 	sljit_s32 src2, sljit_sw src2w);
1226 
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1227 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
1228 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1229 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1230 {
1231 	sljit_uw imm, offset;
1232 	sljit_s32 i, tmp, size, word_arg_count;
1233 	sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
1234 #ifdef __SOFTFP__
1235 	sljit_u32 float_arg_count;
1236 #else
1237 	sljit_u32 old_offset, f32_offset;
1238 	sljit_u32 remap[3];
1239 	sljit_u32 *remap_ptr = remap;
1240 #endif
1241 
1242 	CHECK_ERROR();
1243 	CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1244 	set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1245 
1246 	imm = 0;
1247 
1248 	tmp = SLJIT_S0 - saveds;
1249 	for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--)
1250 		imm |= (sljit_uw)1 << reg_map[i];
1251 
1252 	for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--)
1253 		imm |= (sljit_uw)1 << reg_map[i];
1254 
1255 	SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
1256 
1257 	/* Push saved and temporary registers
1258 	   multiple registers: stmdb sp!, {..., lr}
1259 	   single register: str reg, [sp, #-4]! */
1260 	if (imm != 0)
1261 		FAIL_IF(push_inst(compiler, PUSH | (1 << 14) | imm));
1262 	else
1263 		FAIL_IF(push_inst(compiler, 0xe52d0004 | RD(TMP_REG2)));
1264 
1265 	/* Stack must be aligned to 8 bytes: */
1266 	size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 1);
1267 
1268 	if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
1269 		if ((size & SSIZE_OF(sw)) != 0) {
1270 			FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | sizeof(sljit_sw)));
1271 			size += SSIZE_OF(sw);
1272 		}
1273 
1274 		if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) {
1275 			FAIL_IF(push_inst(compiler, VPUSH | VD(SLJIT_FS0) | ((sljit_ins)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1)));
1276 		} else {
1277 			if (fsaveds > 0)
1278 				FAIL_IF(push_inst(compiler, VPUSH | VD(SLJIT_FS0) | ((sljit_ins)fsaveds << 1)));
1279 			if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG)
1280 				FAIL_IF(push_inst(compiler, VPUSH | VD(fscratches) | ((sljit_ins)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1)));
1281 		}
1282 	}
1283 
1284 	local_size = ((size + local_size + 0x7) & ~0x7) - size;
1285 	compiler->local_size = local_size;
1286 
1287 	if (options & SLJIT_ENTER_REG_ARG)
1288 		arg_types = 0;
1289 
1290 	arg_types >>= SLJIT_ARG_SHIFT;
1291 	word_arg_count = 0;
1292 	saved_arg_count = 0;
1293 #ifdef __SOFTFP__
1294 	SLJIT_COMPILE_ASSERT(SLJIT_FR0 == 1, float_register_index_start);
1295 
1296 	offset = 0;
1297 	float_arg_count = 0;
1298 
1299 	while (arg_types) {
1300 		switch (arg_types & SLJIT_ARG_MASK) {
1301 		case SLJIT_ARG_TYPE_F64:
1302 			if (offset & 0x7)
1303 				offset += sizeof(sljit_sw);
1304 
1305 			if (offset < 4 * sizeof(sljit_sw))
1306 				FAIL_IF(push_inst(compiler, VMOV2 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count));
1307 			else
1308 				FAIL_IF(push_inst(compiler, VLDR_F32 | 0x800100 | RN(SLJIT_SP)
1309 						| (float_arg_count << 12) | ((offset + (sljit_ins)size - 4 * sizeof(sljit_sw)) >> 2)));
1310 			float_arg_count++;
1311 			offset += sizeof(sljit_f64) - sizeof(sljit_sw);
1312 			break;
1313 		case SLJIT_ARG_TYPE_F32:
1314 			if (offset < 4 * sizeof(sljit_sw))
1315 				FAIL_IF(push_inst(compiler, VMOV | (float_arg_count << 16) | (offset << 10)));
1316 			else
1317 				FAIL_IF(push_inst(compiler, VLDR_F32 | 0x800000 | RN(SLJIT_SP)
1318 						| (float_arg_count << 12) | ((offset + (sljit_ins)size - 4 * sizeof(sljit_sw)) >> 2)));
1319 			float_arg_count++;
1320 			break;
1321 		default:
1322 			word_arg_count++;
1323 
1324 			if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
1325 				tmp = SLJIT_S0 - saved_arg_count;
1326 				saved_arg_count++;
1327 			} else if (word_arg_count - 1 != (sljit_s32)(offset >> 2))
1328 				tmp = word_arg_count;
1329 			else
1330 				break;
1331 
1332 			if (offset < 4 * sizeof(sljit_sw))
1333 				FAIL_IF(push_inst(compiler, MOV | RD(tmp) | (offset >> 2)));
1334 			else
1335 				FAIL_IF(push_inst(compiler, LDR | 0x800000 | RN(SLJIT_SP) | RD(tmp) | (offset + (sljit_ins)size - 4 * sizeof(sljit_sw))));
1336 			break;
1337 		}
1338 
1339 		offset += sizeof(sljit_sw);
1340 		arg_types >>= SLJIT_ARG_SHIFT;
1341 	}
1342 
1343 	compiler->args_size = offset;
1344 #else
1345 	offset = SLJIT_FR0;
1346 	old_offset = SLJIT_FR0;
1347 	f32_offset = 0;
1348 
1349 	while (arg_types) {
1350 		switch (arg_types & SLJIT_ARG_MASK) {
1351 		case SLJIT_ARG_TYPE_F64:
1352 			if (offset != old_offset)
1353 				*remap_ptr++ = EMIT_FPU_OPERATION(VMOV_F32, SLJIT_32, offset, old_offset, 0);
1354 			old_offset++;
1355 			offset++;
1356 			break;
1357 		case SLJIT_ARG_TYPE_F32:
1358 			if (f32_offset != 0) {
1359 				*remap_ptr++ = EMIT_FPU_OPERATION(VMOV_F32, 0x20, offset, f32_offset, 0);
1360 				f32_offset = 0;
1361 			} else {
1362 				if (offset != old_offset)
1363 					*remap_ptr++ = EMIT_FPU_OPERATION(VMOV_F32, 0, offset, old_offset, 0);
1364 				f32_offset = old_offset;
1365 				old_offset++;
1366 			}
1367 			offset++;
1368 			break;
1369 		default:
1370 			if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
1371 				FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S0 - saved_arg_count) | RM(SLJIT_R0 + word_arg_count)));
1372 				saved_arg_count++;
1373 			}
1374 
1375 			word_arg_count++;
1376 			break;
1377 		}
1378 		arg_types >>= SLJIT_ARG_SHIFT;
1379 	}
1380 
1381 	SLJIT_ASSERT((sljit_uw)(remap_ptr - remap) <= sizeof(remap));
1382 
1383 	while (remap_ptr > remap)
1384 		FAIL_IF(push_inst(compiler, *(--remap_ptr)));
1385 #endif
1386 
1387 	if (local_size > 0)
1388 		FAIL_IF(emit_op(compiler, SLJIT_SUB, ALLOW_IMM | ALLOW_DOUBLE_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, local_size));
1389 
1390 	return SLJIT_SUCCESS;
1391 }
1392 
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1393 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
1394 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1395 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1396 {
1397 	sljit_s32 size;
1398 
1399 	CHECK_ERROR();
1400 	CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1401 	set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1402 
1403 	size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 1);
1404 
1405 	/* Doubles are saved, so alignment is unaffected. */
1406 	if ((size & SSIZE_OF(sw)) != 0 && (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG))
1407 		size += SSIZE_OF(sw);
1408 
1409 	compiler->local_size = ((size + local_size + 0x7) & ~0x7) - size;
1410 	return SLJIT_SUCCESS;
1411 }
1412 
emit_add_sp(struct sljit_compiler * compiler,sljit_uw imm)1413 static sljit_s32 emit_add_sp(struct sljit_compiler *compiler, sljit_uw imm)
1414 {
1415 	sljit_uw imm2 = get_imm(imm);
1416 
1417 	if (imm2 == 0)
1418 		return emit_op(compiler, SLJIT_ADD, ALLOW_IMM | ALLOW_DOUBLE_IMM, SLJIT_SP, 0, SLJIT_SP, 0, SLJIT_IMM, (sljit_sw)imm);
1419 
1420 	return push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | imm2);
1421 }
1422 
emit_stack_frame_release(struct sljit_compiler * compiler,sljit_s32 frame_size)1423 static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 frame_size)
1424 {
1425 	sljit_s32 local_size, fscratches, fsaveds, i, tmp;
1426 	sljit_s32 restored_reg = 0;
1427 	sljit_s32 lr_dst = TMP_PC;
1428 	sljit_uw reg_list = 0;
1429 
1430 	SLJIT_ASSERT(reg_map[TMP_REG2] == 14 && frame_size <= 128);
1431 
1432 	local_size = compiler->local_size;
1433 	fscratches = compiler->fscratches;
1434 	fsaveds = compiler->fsaveds;
1435 
1436 	if (fsaveds > 0 || fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
1437 		if (local_size > 0)
1438 			FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size));
1439 
1440 		if (fsaveds + fscratches >= SLJIT_NUMBER_OF_FLOAT_REGISTERS) {
1441 			FAIL_IF(push_inst(compiler, VPOP | VD(SLJIT_FS0) | ((sljit_ins)SLJIT_NUMBER_OF_SAVED_FLOAT_REGISTERS << 1)));
1442 		} else {
1443 			if (fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG)
1444 				FAIL_IF(push_inst(compiler, VPOP | VD(fscratches) | ((sljit_ins)(fscratches - (SLJIT_FIRST_SAVED_FLOAT_REG - 1)) << 1)));
1445 			if (fsaveds > 0)
1446 				FAIL_IF(push_inst(compiler, VPOP | VD(SLJIT_FS0) | ((sljit_ins)fsaveds << 1)));
1447 		}
1448 
1449 		local_size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1) & 0x7;
1450 	}
1451 
1452 	if (frame_size < 0) {
1453 		lr_dst = TMP_REG2;
1454 		frame_size = 0;
1455 	} else if (frame_size > 0) {
1456 		SLJIT_ASSERT(frame_size == 1 || (frame_size & 0x7) == 0);
1457 		lr_dst = 0;
1458 		frame_size &= ~0x7;
1459 	}
1460 
1461 	if (lr_dst != 0)
1462 		reg_list |= (sljit_uw)1 << reg_map[lr_dst];
1463 
1464 	tmp = SLJIT_S0 - compiler->saveds;
1465 	i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
1466 	if (tmp < i) {
1467 		restored_reg = i;
1468 		do {
1469 			reg_list |= (sljit_uw)1 << reg_map[i];
1470 		} while (--i > tmp);
1471 	}
1472 
1473 	i = compiler->scratches;
1474 	if (i >= SLJIT_FIRST_SAVED_REG) {
1475 		restored_reg = i;
1476 		do {
1477 			reg_list |= (sljit_uw)1 << reg_map[i];
1478 		} while (--i >= SLJIT_FIRST_SAVED_REG);
1479 	}
1480 
1481 	if (lr_dst == TMP_REG2 && reg_list == 0) {
1482 		restored_reg = TMP_REG2;
1483 		lr_dst = 0;
1484 	}
1485 
1486 	if (lr_dst == 0 && (reg_list & (reg_list - 1)) == 0) {
1487 		/* The local_size does not include the saved registers. */
1488 		tmp = 0;
1489 		if (reg_list != 0) {
1490 			tmp = 2;
1491 			if (local_size <= 0xfff) {
1492 				if (local_size == 0) {
1493 					SLJIT_ASSERT(restored_reg != TMP_REG2);
1494 					if (frame_size == 0)
1495 						return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(restored_reg) | 0x800008);
1496 					if (frame_size > 2 * SSIZE_OF(sw))
1497 						return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(restored_reg) | (sljit_ins)(frame_size - (2 * SSIZE_OF(sw))));
1498 				}
1499 
1500 				FAIL_IF(push_inst(compiler, LDR | 0x800000 | RN(SLJIT_SP) | RD(restored_reg) | (sljit_ins)local_size));
1501 				tmp = 1;
1502 			} else if (frame_size == 0) {
1503 				frame_size = (restored_reg == TMP_REG2) ? SSIZE_OF(sw) : 2 * SSIZE_OF(sw);
1504 				tmp = 3;
1505 			}
1506 
1507 			/* Place for the saved register. */
1508 			if (restored_reg != TMP_REG2)
1509 				local_size += SSIZE_OF(sw);
1510 		}
1511 
1512 		/* Place for the lr register. */
1513 		local_size += SSIZE_OF(sw);
1514 
1515 		if (frame_size > local_size)
1516 			FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 25) | (sljit_ins)(frame_size - local_size)));
1517 		else if (frame_size < local_size)
1518 			FAIL_IF(emit_add_sp(compiler, (sljit_uw)(local_size - frame_size)));
1519 
1520 		if (tmp <= 1)
1521 			return SLJIT_SUCCESS;
1522 
1523 		if (tmp == 2) {
1524 			frame_size -= SSIZE_OF(sw);
1525 			if (restored_reg != TMP_REG2)
1526 				frame_size -= SSIZE_OF(sw);
1527 
1528 			return push_inst(compiler, LDR | 0x800000 | RN(SLJIT_SP) | RD(restored_reg) | (sljit_ins)frame_size);
1529 		}
1530 
1531 		tmp = (restored_reg == TMP_REG2) ? 0x800004 : 0x800008;
1532 		return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(restored_reg) | (sljit_ins)tmp);
1533 	}
1534 
1535 	if (local_size > 0)
1536 		FAIL_IF(emit_add_sp(compiler, (sljit_uw)local_size));
1537 
1538 	/* Pop saved and temporary registers
1539 	   multiple registers: ldmia sp!, {...}
1540 	   single register: ldr reg, [sp], #4 */
1541 	if ((reg_list & (reg_list - 1)) == 0) {
1542 		SLJIT_ASSERT(lr_dst != 0);
1543 		SLJIT_ASSERT(reg_list == (sljit_uw)1 << reg_map[lr_dst]);
1544 
1545 		return push_inst(compiler, LDR_POST | RN(SLJIT_SP) | RD(lr_dst) | 0x800004);
1546 	}
1547 
1548 	FAIL_IF(push_inst(compiler, POP | reg_list));
1549 
1550 	if (frame_size > 0)
1551 		return push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 25) | ((sljit_ins)frame_size - sizeof(sljit_sw)));
1552 
1553 	if (lr_dst != 0)
1554 		return SLJIT_SUCCESS;
1555 
1556 	return push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 25) | sizeof(sljit_sw));
1557 }
1558 
sljit_emit_return_void(struct sljit_compiler * compiler)1559 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
1560 {
1561 	CHECK_ERROR();
1562 	CHECK(check_sljit_emit_return_void(compiler));
1563 
1564 	return emit_stack_frame_release(compiler, 0);
1565 }
1566 
sljit_emit_return_to(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)1567 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
1568 	sljit_s32 src, sljit_sw srcw)
1569 {
1570 	CHECK_ERROR();
1571 	CHECK(check_sljit_emit_return_to(compiler, src, srcw));
1572 
1573 	if (src & SLJIT_MEM) {
1574 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
1575 		src = TMP_REG1;
1576 		srcw = 0;
1577 	} else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
1578 		FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src)));
1579 		src = TMP_REG1;
1580 		srcw = 0;
1581 	}
1582 
1583 	FAIL_IF(emit_stack_frame_release(compiler, 1));
1584 
1585 	SLJIT_SKIP_CHECKS(compiler);
1586 	return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1587 }
1588 
1589 /* --------------------------------------------------------------------- */
1590 /*  Operators                                                            */
1591 /* --------------------------------------------------------------------- */
1592 
emit_single_op(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 flags,sljit_uw dst,sljit_uw src1,sljit_uw src2)1593 static SLJIT_INLINE sljit_s32 emit_single_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 flags,
1594 	sljit_uw dst, sljit_uw src1, sljit_uw src2)
1595 {
1596 	sljit_s32 reg, is_masked;
1597 	sljit_uw shift_type;
1598 
1599 	switch (op) {
1600 	case SLJIT_MOV:
1601 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
1602 		if (dst != src2) {
1603 			if (src2 & SRC2_IMM) {
1604 				return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2);
1605 			}
1606 			return push_inst(compiler, MOV | RD(dst) | RM(src2));
1607 		}
1608 		return SLJIT_SUCCESS;
1609 
1610 	case SLJIT_MOV_U8:
1611 	case SLJIT_MOV_S8:
1612 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
1613 		if (flags & REGISTER_OP)
1614 			return push_inst(compiler, (op == SLJIT_MOV_U8 ? UXTB : SXTB) | RD(dst) | RM(src2));
1615 
1616 		if (dst != src2) {
1617 			SLJIT_ASSERT(src2 & SRC2_IMM);
1618 			return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2);
1619 		}
1620 		return SLJIT_SUCCESS;
1621 
1622 	case SLJIT_MOV_U16:
1623 	case SLJIT_MOV_S16:
1624 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
1625 		if (flags & REGISTER_OP)
1626 			return push_inst(compiler, (op == SLJIT_MOV_U16 ? UXTH : SXTH) | RD(dst) | RM(src2));
1627 
1628 		if (dst != src2) {
1629 			SLJIT_ASSERT(src2 & SRC2_IMM);
1630 			return push_inst(compiler, ((flags & INV_IMM) ? MVN : MOV) | RD(dst) | src2);
1631 		}
1632 		return SLJIT_SUCCESS;
1633 
1634 	case SLJIT_CLZ:
1635 		SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM));
1636 		FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(src2)));
1637 		return SLJIT_SUCCESS;
1638 
1639 	case SLJIT_CTZ:
1640 		SLJIT_ASSERT(!(flags & INV_IMM) && !(src2 & SRC2_IMM));
1641 		SLJIT_ASSERT(src1 == TMP_REG1 && src2 != TMP_REG2 && !(flags & ARGS_SWAPPED));
1642 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
1643 		FAIL_IF(push_inst(compiler, RSB | SRC2_IMM | RD(TMP_REG2) | RN(src2) | 0));
1644 		FAIL_IF(push_inst(compiler, AND | RD(TMP_REG1) | RN(src2) | RM(TMP_REG2)));
1645 		FAIL_IF(push_inst(compiler, CLZ | RD(dst) | RM(TMP_REG1)));
1646 		FAIL_IF(push_inst(compiler, CMP | SET_FLAGS | SRC2_IMM | RN(dst) | 32));
1647 		return push_inst(compiler, (EOR ^ 0xf0000000) | SRC2_IMM | RD(dst) | RN(dst) | 0x1f);
1648 #else /* !SLJIT_CONFIG_ARM_V6 */
1649 		FAIL_IF(push_inst(compiler, RBIT | RD(dst) | RM(src2)));
1650 		return push_inst(compiler, CLZ | RD(dst) | RM(dst));
1651 #endif /* SLJIT_CONFIG_ARM_V6 */
1652 
1653 	case SLJIT_REV:
1654 	case SLJIT_REV_U32:
1655 	case SLJIT_REV_S32:
1656 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
1657 		return push_inst(compiler, REV | RD(dst) | RM(src2));
1658 
1659 	case SLJIT_REV_U16:
1660 	case SLJIT_REV_S16:
1661 		SLJIT_ASSERT(src1 == TMP_REG1 && !(flags & ARGS_SWAPPED));
1662 		FAIL_IF(push_inst(compiler, REV16 | RD(dst) | RM(src2)));
1663 		if (!(flags & REGISTER_OP))
1664 			return SLJIT_SUCCESS;
1665 		return push_inst(compiler, (op == SLJIT_REV_U16 ? UXTH : SXTH) | RD(dst) | RM(dst));
1666 	case SLJIT_ADD:
1667 		SLJIT_ASSERT(!(flags & INV_IMM));
1668 
1669 		if ((flags & (UNUSED_RETURN | ARGS_SWAPPED)) == UNUSED_RETURN)
1670 			return push_inst(compiler, CMN | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1671 		return push_inst(compiler, ADD | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1672 
1673 	case SLJIT_ADDC:
1674 		SLJIT_ASSERT(!(flags & INV_IMM));
1675 		return push_inst(compiler, ADC | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1676 
1677 	case SLJIT_SUB:
1678 		SLJIT_ASSERT(!(flags & INV_IMM));
1679 
1680 		if ((flags & (UNUSED_RETURN | ARGS_SWAPPED)) == UNUSED_RETURN)
1681 			return push_inst(compiler, CMP | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1682 
1683 		return push_inst(compiler, (!(flags & ARGS_SWAPPED) ? SUB : RSB) | (flags & SET_FLAGS)
1684 			| RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1685 
1686 	case SLJIT_SUBC:
1687 		SLJIT_ASSERT(!(flags & INV_IMM));
1688 		return push_inst(compiler, (!(flags & ARGS_SWAPPED) ? SBC : RSC) | (flags & SET_FLAGS)
1689 			| RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1690 
1691 	case SLJIT_MUL:
1692 		SLJIT_ASSERT(!(flags & INV_IMM));
1693 		SLJIT_ASSERT(!(src2 & SRC2_IMM));
1694 		compiler->status_flags_state = 0;
1695 
1696 		if (!(flags & SET_FLAGS))
1697 			return push_inst(compiler, MUL | RN(dst) | RM8(src2) | RM(src1));
1698 
1699 		reg = dst == TMP_REG1 ? TMP_REG2 : TMP_REG1;
1700 		FAIL_IF(push_inst(compiler, SMULL | RN(reg) | RD(dst) | RM8(src2) | RM(src1)));
1701 
1702 		/* cmp TMP_REG1, dst asr #31. */
1703 		return push_inst(compiler, CMP | SET_FLAGS | RN(reg) | RM(dst) | 0xfc0);
1704 
1705 	case SLJIT_AND:
1706 		if ((flags & (UNUSED_RETURN | INV_IMM)) == UNUSED_RETURN)
1707 			return push_inst(compiler, TST | SET_FLAGS | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1708 		return push_inst(compiler, (!(flags & INV_IMM) ? AND : BIC) | (flags & SET_FLAGS)
1709 			| RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1710 
1711 	case SLJIT_OR:
1712 		SLJIT_ASSERT(!(flags & INV_IMM));
1713 		return push_inst(compiler, ORR | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1714 
1715 	case SLJIT_XOR:
1716 		if (flags & INV_IMM) {
1717 			SLJIT_ASSERT(src2 == SRC2_IMM);
1718 			return push_inst(compiler, MVN | (flags & SET_FLAGS) | RD(dst) | RM(src1));
1719 		}
1720 		return push_inst(compiler, EOR | (flags & SET_FLAGS) | RD(dst) | RN(src1) | ((src2 & SRC2_IMM) ? src2 : RM(src2)));
1721 
1722 	case SLJIT_SHL:
1723 	case SLJIT_MSHL:
1724 		shift_type = 0;
1725 		is_masked = op == SLJIT_MSHL;
1726 		break;
1727 
1728 	case SLJIT_LSHR:
1729 	case SLJIT_MLSHR:
1730 		shift_type = 1;
1731 		is_masked = op == SLJIT_MLSHR;
1732 		break;
1733 
1734 	case SLJIT_ASHR:
1735 	case SLJIT_MASHR:
1736 		shift_type = 2;
1737 		is_masked = op == SLJIT_MASHR;
1738 		break;
1739 
1740 	case SLJIT_ROTL:
1741 		if (compiler->shift_imm == 0x20) {
1742 			FAIL_IF(push_inst(compiler, RSB | SRC2_IMM | RD(TMP_REG2) | RN(src2) | 0));
1743 			src2 = TMP_REG2;
1744 		} else
1745 			compiler->shift_imm = (sljit_uw)(-(sljit_sw)compiler->shift_imm) & 0x1f;
1746 		/* fallthrough */
1747 
1748 	case SLJIT_ROTR:
1749 		shift_type = 3;
1750 		is_masked = 0;
1751 		break;
1752 
1753 	case SLJIT_MULADD:
1754 		return push_inst(compiler, MLA | RN(dst) | RD(dst) | RM8(src2) | RM(src1));
1755 
1756 	default:
1757 		SLJIT_UNREACHABLE();
1758 		return SLJIT_SUCCESS;
1759 	}
1760 
1761 	SLJIT_ASSERT(!(flags & ARGS_SWAPPED) && !(flags & INV_IMM) && !(src2 & SRC2_IMM));
1762 
1763 	if (compiler->shift_imm != 0x20) {
1764 		SLJIT_ASSERT(src1 == TMP_REG1);
1765 
1766 		if (compiler->shift_imm != 0)
1767 			return push_inst(compiler, MOV | (flags & SET_FLAGS) |
1768 				RD(dst) | (compiler->shift_imm << 7) | (shift_type << 5) | RM(src2));
1769 		return push_inst(compiler, MOV | (flags & SET_FLAGS) | RD(dst) | RM(src2));
1770 	}
1771 
1772 	SLJIT_ASSERT(src1 != TMP_REG2);
1773 
1774 	if (is_masked) {
1775 		FAIL_IF(push_inst(compiler, AND | RD(TMP_REG2) | RN(src2) | SRC2_IMM | 0x1f));
1776 		src2 = TMP_REG2;
1777 	}
1778 
1779 	return push_inst(compiler, MOV | (flags & SET_FLAGS) | RD(dst)
1780 		| RM8(src2) | (sljit_ins)(shift_type << 5) | 0x10 | RM(src1));
1781 }
1782 
1783 #undef EMIT_SHIFT_INS_AND_RETURN
1784 
1785 /* Tests whether the immediate can be stored in the 12 bit imm field.
1786    Returns with 0 if not possible. */
get_imm(sljit_uw imm)1787 static sljit_uw get_imm(sljit_uw imm)
1788 {
1789 	sljit_u32 rol;
1790 
1791 	if (imm <= 0xff)
1792 		return SRC2_IMM | imm;
1793 
1794 	if (!(imm & 0xff000000)) {
1795 		imm <<= 8;
1796 		rol = 8;
1797 	} else {
1798 		imm = (imm << 24) | (imm >> 8);
1799 		rol = 0;
1800 	}
1801 
1802 	if (!(imm & 0xff000000)) {
1803 		imm <<= 8;
1804 		rol += 4;
1805 	}
1806 
1807 	if (!(imm & 0xf0000000)) {
1808 		imm <<= 4;
1809 		rol += 2;
1810 	}
1811 
1812 	if (!(imm & 0xc0000000)) {
1813 		imm <<= 2;
1814 		rol += 1;
1815 	}
1816 
1817 	if (!(imm & 0x00ffffff))
1818 		return SRC2_IMM | (imm >> 24) | (rol << 8);
1819 	return 0;
1820 }
1821 
compute_imm(sljit_uw imm,sljit_uw * imm2)1822 static sljit_uw compute_imm(sljit_uw imm, sljit_uw* imm2)
1823 {
1824 	sljit_uw mask;
1825 	sljit_uw imm1;
1826 	sljit_uw rol;
1827 
1828 	/* Step1: Search a zero byte (8 continous zero bit). */
1829 	mask = 0xff000000;
1830 	rol = 8;
1831 	while (1) {
1832 		if (!(imm & mask)) {
1833 			/* Rol imm by rol. */
1834 			imm = (imm << rol) | (imm >> (32 - rol));
1835 			/* Calculate arm rol. */
1836 			rol = 4 + (rol >> 1);
1837 			break;
1838 		}
1839 
1840 		rol += 2;
1841 		mask >>= 2;
1842 		if (mask & 0x3) {
1843 			/* rol by 8. */
1844 			imm = (imm << 8) | (imm >> 24);
1845 			mask = 0xff00;
1846 			rol = 24;
1847 			while (1) {
1848 				if (!(imm & mask)) {
1849 					/* Rol imm by rol. */
1850 					imm = (imm << rol) | (imm >> (32 - rol));
1851 					/* Calculate arm rol. */
1852 					rol = (rol >> 1) - 8;
1853 					break;
1854 				}
1855 				rol += 2;
1856 				mask >>= 2;
1857 				if (mask & 0x3)
1858 					return 0;
1859 			}
1860 			break;
1861 		}
1862 	}
1863 
1864 	/* The low 8 bit must be zero. */
1865 	SLJIT_ASSERT(!(imm & 0xff));
1866 
1867 	if (!(imm & 0xff000000)) {
1868 		imm1 = SRC2_IMM | ((imm >> 16) & 0xff) | (((rol + 4) & 0xf) << 8);
1869 		*imm2 = SRC2_IMM | ((imm >> 8) & 0xff) | (((rol + 8) & 0xf) << 8);
1870 	} else if (imm & 0xc0000000) {
1871 		imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8);
1872 		imm <<= 8;
1873 		rol += 4;
1874 
1875 		if (!(imm & 0xff000000)) {
1876 			imm <<= 8;
1877 			rol += 4;
1878 		}
1879 
1880 		if (!(imm & 0xf0000000)) {
1881 			imm <<= 4;
1882 			rol += 2;
1883 		}
1884 
1885 		if (!(imm & 0xc0000000)) {
1886 			imm <<= 2;
1887 			rol += 1;
1888 		}
1889 
1890 		if (!(imm & 0x00ffffff))
1891 			*imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8);
1892 		else
1893 			return 0;
1894 	} else {
1895 		if (!(imm & 0xf0000000)) {
1896 			imm <<= 4;
1897 			rol += 2;
1898 		}
1899 
1900 		if (!(imm & 0xc0000000)) {
1901 			imm <<= 2;
1902 			rol += 1;
1903 		}
1904 
1905 		imm1 = SRC2_IMM | ((imm >> 24) & 0xff) | ((rol & 0xf) << 8);
1906 		imm <<= 8;
1907 		rol += 4;
1908 
1909 		if (!(imm & 0xf0000000)) {
1910 			imm <<= 4;
1911 			rol += 2;
1912 		}
1913 
1914 		if (!(imm & 0xc0000000)) {
1915 			imm <<= 2;
1916 			rol += 1;
1917 		}
1918 
1919 		if (!(imm & 0x00ffffff))
1920 			*imm2 = SRC2_IMM | (imm >> 24) | ((rol & 0xf) << 8);
1921 		else
1922 			return 0;
1923 	}
1924 
1925 	return imm1;
1926 }
1927 
load_immediate(struct sljit_compiler * compiler,sljit_s32 reg,sljit_uw imm)1928 static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 reg, sljit_uw imm)
1929 {
1930 	sljit_uw tmp;
1931 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
1932 	sljit_uw imm1, imm2;
1933 #else /* !SLJIT_CONFIG_ARM_V6 */
1934 	if (!(imm & ~(sljit_uw)0xffff))
1935 		return push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff));
1936 #endif /* SLJIT_CONFIG_ARM_V6 */
1937 
1938 	/* Create imm by 1 inst. */
1939 	tmp = get_imm(imm);
1940 	if (tmp)
1941 		return push_inst(compiler, MOV | RD(reg) | tmp);
1942 
1943 	tmp = get_imm(~imm);
1944 	if (tmp)
1945 		return push_inst(compiler, MVN | RD(reg) | tmp);
1946 
1947 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
1948 	/* Create imm by 2 inst. */
1949 	imm1 = compute_imm(imm, &imm2);
1950 	if (imm1 != 0) {
1951 		FAIL_IF(push_inst(compiler, MOV | RD(reg) | imm1));
1952 		return push_inst(compiler, ORR | RD(reg) | RN(reg) | imm2);
1953 	}
1954 
1955 	imm1 = compute_imm(~imm, &imm2);
1956 	if (imm1 != 0) {
1957 		FAIL_IF(push_inst(compiler, MVN | RD(reg) | imm1));
1958 		return push_inst(compiler, BIC | RD(reg) | RN(reg) | imm2);
1959 	}
1960 
1961 	/* Load integer. */
1962 	return push_inst_with_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, reg, TMP_PC, 0), imm);
1963 #else /* !SLJIT_CONFIG_ARM_V6 */
1964 	FAIL_IF(push_inst(compiler, MOVW | RD(reg) | ((imm << 4) & 0xf0000) | (imm & 0xfff)));
1965 	if (imm <= 0xffff)
1966 		return SLJIT_SUCCESS;
1967 	return push_inst(compiler, MOVT | RD(reg) | ((imm >> 12) & 0xf0000) | ((imm >> 16) & 0xfff));
1968 #endif /* SLJIT_CONFIG_ARM_V6 */
1969 }
1970 
emit_op_mem(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw,sljit_s32 tmp_reg)1971 static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg,
1972 	sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg)
1973 {
1974 	sljit_uw imm, offset_reg, tmp;
1975 	sljit_sw mask = IS_TYPE1_TRANSFER(flags) ? 0xfff : 0xff;
1976 	sljit_sw sign = IS_TYPE1_TRANSFER(flags) ? 0x1000 : 0x100;
1977 
1978 	SLJIT_ASSERT(arg & SLJIT_MEM);
1979 	SLJIT_ASSERT((arg & REG_MASK) != tmp_reg || (arg == SLJIT_MEM1(tmp_reg) && argw >= -mask && argw <= mask));
1980 
1981 	if (SLJIT_UNLIKELY(!(arg & REG_MASK))) {
1982 		tmp = (sljit_uw)(argw & (sign | mask));
1983 		tmp = (sljit_uw)((argw + (tmp <= (sljit_uw)sign ? 0 : sign)) & ~mask);
1984 
1985 		FAIL_IF(load_immediate(compiler, tmp_reg, tmp));
1986 
1987 		argw -= (sljit_sw)tmp;
1988 		tmp = 1;
1989 
1990 		if (argw < 0) {
1991 			argw = -argw;
1992 			tmp = 0;
1993 		}
1994 
1995 		return push_inst(compiler, EMIT_DATA_TRANSFER(flags, tmp, reg, tmp_reg,
1996 			(mask == 0xff) ? TYPE2_TRANSFER_IMM(argw) : argw));
1997 	}
1998 
1999 	if (arg & OFFS_REG_MASK) {
2000 		offset_reg = OFFS_REG(arg);
2001 		arg &= REG_MASK;
2002 		argw &= 0x3;
2003 
2004 		if (argw != 0 && (mask == 0xff)) {
2005 			FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | RM(offset_reg) | ((sljit_ins)argw << 7)));
2006 			return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, tmp_reg, TYPE2_TRANSFER_IMM(0)));
2007 		}
2008 
2009 		/* Bit 25: RM is offset. */
2010 		return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg,
2011 			RM(offset_reg) | (mask == 0xff ? 0 : (1 << 25)) | ((sljit_ins)argw << 7)));
2012 	}
2013 
2014 	arg &= REG_MASK;
2015 
2016 	if (argw > mask) {
2017 		tmp = (sljit_uw)(argw & (sign | mask));
2018 		tmp = (sljit_uw)((argw + (tmp <= (sljit_uw)sign ? 0 : sign)) & ~mask);
2019 		imm = get_imm(tmp);
2020 
2021 		if (imm) {
2022 			FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg) | imm));
2023 			argw -= (sljit_sw)tmp;
2024 			arg = tmp_reg;
2025 
2026 			SLJIT_ASSERT(argw >= -mask && argw <= mask);
2027 		}
2028 	} else if (argw < -mask) {
2029 		tmp = (sljit_uw)(-argw & (sign | mask));
2030 		tmp = (sljit_uw)((-argw + (tmp <= (sljit_uw)sign ? 0 : sign)) & ~mask);
2031 		imm = get_imm(tmp);
2032 
2033 		if (imm) {
2034 			FAIL_IF(push_inst(compiler, SUB | RD(tmp_reg) | RN(arg) | imm));
2035 			argw += (sljit_sw)tmp;
2036 			arg = tmp_reg;
2037 
2038 			SLJIT_ASSERT(argw >= -mask && argw <= mask);
2039 		}
2040 	}
2041 
2042 	if (argw <= mask && argw >= -mask) {
2043 		if (argw >= 0) {
2044 			if (mask == 0xff)
2045 				argw = TYPE2_TRANSFER_IMM(argw);
2046 			return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg, argw));
2047 		}
2048 
2049 		argw = -argw;
2050 
2051 		if (mask == 0xff)
2052 			argw = TYPE2_TRANSFER_IMM(argw);
2053 
2054 		return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 0, reg, arg, argw));
2055 	}
2056 
2057 	FAIL_IF(load_immediate(compiler, tmp_reg, (sljit_uw)argw));
2058 	return push_inst(compiler, EMIT_DATA_TRANSFER(flags, 1, reg, arg,
2059 		RM(tmp_reg) | (mask == 0xff ? 0 : (1 << 25))));
2060 }
2061 
emit_op(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 inp_flags,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2062 static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 inp_flags,
2063 	sljit_s32 dst, sljit_sw dstw,
2064 	sljit_s32 src1, sljit_sw src1w,
2065 	sljit_s32 src2, sljit_sw src2w)
2066 {
2067 	/* src1 is reg or TMP_REG1
2068 	   src2 is reg, TMP_REG2, or imm
2069 	   result goes to TMP_REG2, so put result can use TMP_REG1. */
2070 
2071 	/* We prefers register and simple consts. */
2072 	sljit_s32 dst_reg;
2073 	sljit_s32 src1_reg = 0;
2074 	sljit_s32 src2_reg = 0;
2075 	sljit_s32 src2_tmp_reg = 0;
2076 	sljit_s32 flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
2077 	sljit_s32 neg_op = 0;
2078 	sljit_u32 imm2;
2079 
2080 	op = GET_OPCODE(op);
2081 
2082 	if (flags & SET_FLAGS)
2083 		inp_flags &= ~ALLOW_DOUBLE_IMM;
2084 
2085 	if (dst == TMP_REG1)
2086 		flags |= UNUSED_RETURN;
2087 
2088 	SLJIT_ASSERT(!(inp_flags & ALLOW_INV_IMM) || (inp_flags & ALLOW_IMM));
2089 
2090 	if (inp_flags & ALLOW_NEG_IMM) {
2091 		switch (op) {
2092 		case SLJIT_ADD:
2093 			compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
2094 			neg_op = SLJIT_SUB;
2095 			break;
2096 		case SLJIT_ADDC:
2097 			compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
2098 			neg_op = SLJIT_SUBC;
2099 			break;
2100 		case SLJIT_SUB:
2101 			compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
2102 			neg_op = SLJIT_ADD;
2103 			break;
2104 		case SLJIT_SUBC:
2105 			compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
2106 			neg_op = SLJIT_ADDC;
2107 			break;
2108 		}
2109 	}
2110 
2111 	do {
2112 		if (!(inp_flags & ALLOW_IMM))
2113 			break;
2114 
2115 		if (src2 == SLJIT_IMM) {
2116 			src2_reg = (sljit_s32)get_imm((sljit_uw)src2w);
2117 			if (src2_reg)
2118 				break;
2119 
2120 			if (inp_flags & ALLOW_INV_IMM) {
2121 				src2_reg = (sljit_s32)get_imm(~(sljit_uw)src2w);
2122 				if (src2_reg) {
2123 					flags |= INV_IMM;
2124 					break;
2125 				}
2126 			}
2127 
2128 			if (neg_op != 0) {
2129 				src2_reg = (sljit_s32)get_imm((neg_op == SLJIT_ADD || neg_op == SLJIT_SUB) ? (sljit_uw)-src2w : ~(sljit_uw)src2w);
2130 				if (src2_reg) {
2131 					op = neg_op | GET_ALL_FLAGS(op);
2132 					break;
2133 				}
2134 			}
2135 		}
2136 
2137 		if (src1 == SLJIT_IMM) {
2138 			src2_reg = (sljit_s32)get_imm((sljit_uw)src1w);
2139 			if (src2_reg) {
2140 				flags |= ARGS_SWAPPED;
2141 				src1 = src2;
2142 				src1w = src2w;
2143 				break;
2144 			}
2145 
2146 			if (inp_flags & ALLOW_INV_IMM) {
2147 				src2_reg = (sljit_s32)get_imm(~(sljit_uw)src1w);
2148 				if (src2_reg) {
2149 					flags |= ARGS_SWAPPED | INV_IMM;
2150 					src1 = src2;
2151 					src1w = src2w;
2152 					break;
2153 				}
2154 			}
2155 
2156 			if (neg_op >= SLJIT_SUB) {
2157 				/* Note: additive operation (commutative). */
2158 				SLJIT_ASSERT(op == SLJIT_ADD || op == SLJIT_ADDC);
2159 
2160 				src2_reg = (sljit_s32)get_imm((sljit_uw)-src1w);
2161 				if (src2_reg) {
2162 					src1 = src2;
2163 					src1w = src2w;
2164 					op = neg_op | GET_ALL_FLAGS(op);
2165 					break;
2166 				}
2167 			}
2168 		}
2169 	} while(0);
2170 
2171 	/* Destination. */
2172 	dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG2;
2173 
2174 	if (op <= SLJIT_MOV_P) {
2175 		if (dst & SLJIT_MEM) {
2176 			if (inp_flags & BYTE_SIZE)
2177 				inp_flags &= ~SIGNED;
2178 
2179 			if (FAST_IS_REG(src2))
2180 				return emit_op_mem(compiler, inp_flags, src2, dst, dstw, TMP_REG1);
2181 		}
2182 
2183 		if (FAST_IS_REG(src2) && dst_reg != TMP_REG2)
2184 			flags |= REGISTER_OP;
2185 
2186 		src2_tmp_reg = dst_reg;
2187 	} else {
2188 		if (op == SLJIT_REV_U16 || op == SLJIT_REV_S16) {
2189 			if (!(dst & SLJIT_MEM) && (!(src2 & SLJIT_MEM) || op == SLJIT_REV_S16))
2190 				flags |= REGISTER_OP;
2191 		}
2192 
2193 		src2_tmp_reg = FAST_IS_REG(src1) ? TMP_REG1 : TMP_REG2;
2194 	}
2195 
2196 	if (src2_reg == 0 && (src2 & SLJIT_MEM)) {
2197 		src2_reg = src2_tmp_reg;
2198 		FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, src2_reg, src2, src2w, TMP_REG1));
2199 	}
2200 
2201 	/* Source 1. */
2202 	if (FAST_IS_REG(src1))
2203 		src1_reg = src1;
2204 	else if (src1 & SLJIT_MEM) {
2205 		FAIL_IF(emit_op_mem(compiler, inp_flags | LOAD_DATA, TMP_REG1, src1, src1w, TMP_REG1));
2206 		src1_reg = TMP_REG1;
2207 	} else if (!(inp_flags & ALLOW_DOUBLE_IMM) || src2_reg != 0 || op == SLJIT_SUB || op == SLJIT_SUBC) {
2208 		FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w));
2209 		src1_reg = TMP_REG1;
2210 	}
2211 
2212 	/* Source 2. */
2213 	if (src2_reg == 0) {
2214 		src2_reg = src2_tmp_reg;
2215 
2216 		if (FAST_IS_REG(src2))
2217 			src2_reg = src2;
2218 		else if (!(inp_flags & ALLOW_DOUBLE_IMM))
2219 			FAIL_IF(load_immediate(compiler, src2_reg, (sljit_uw)src2w));
2220 		else {
2221 			SLJIT_ASSERT(!(flags & SET_FLAGS));
2222 
2223 			if (src1_reg == 0) {
2224 				FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w));
2225 				src1_reg = TMP_REG1;
2226 			}
2227 
2228 			src2_reg = (sljit_s32)compute_imm((sljit_uw)src2w, &imm2);
2229 
2230 			if (src2_reg == 0 && neg_op != 0) {
2231 				src2_reg = (sljit_s32)compute_imm((sljit_uw)-src2w, &imm2);
2232 				if (src2_reg != 0)
2233 					op = neg_op;
2234 			}
2235 
2236 			if (src2_reg == 0) {
2237 				FAIL_IF(load_immediate(compiler, src2_tmp_reg, (sljit_uw)src2w));
2238 				src2_reg = src2_tmp_reg;
2239 			} else {
2240 				FAIL_IF(emit_single_op(compiler, op, flags, (sljit_uw)dst_reg, (sljit_uw)src1_reg, (sljit_uw)src2_reg));
2241 				src1_reg = dst_reg;
2242 				src2_reg = (sljit_s32)imm2;
2243 
2244 				if (op == SLJIT_ADDC)
2245 					op = SLJIT_ADD;
2246 				else if (op == SLJIT_SUBC)
2247 					op = SLJIT_SUB;
2248 			}
2249 		}
2250 	}
2251 
2252 	if (src1_reg == 0) {
2253 		SLJIT_ASSERT((inp_flags & ALLOW_DOUBLE_IMM) && !(flags & SET_FLAGS));
2254 
2255 		src1_reg = (sljit_s32)compute_imm((sljit_uw)src1w, &imm2);
2256 
2257 		if (src1_reg == 0 && neg_op != 0) {
2258 			src1_reg = (sljit_s32)compute_imm((sljit_uw)-src1w, &imm2);
2259 			if (src1_reg != 0)
2260 				op = neg_op;
2261 		}
2262 
2263 		if (src1_reg == 0) {
2264 			FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w));
2265 			src1_reg = TMP_REG1;
2266 		} else {
2267 			FAIL_IF(emit_single_op(compiler, op, flags, (sljit_uw)dst_reg, (sljit_uw)src2_reg, (sljit_uw)src1_reg));
2268 			src1_reg = dst_reg;
2269 			src2_reg = (sljit_s32)imm2;
2270 
2271 			if (op == SLJIT_ADDC)
2272 				op = SLJIT_ADD;
2273 		}
2274 	}
2275 
2276 	FAIL_IF(emit_single_op(compiler, op, flags, (sljit_uw)dst_reg, (sljit_uw)src1_reg, (sljit_uw)src2_reg));
2277 
2278 	if (!(dst & SLJIT_MEM))
2279 		return SLJIT_SUCCESS;
2280 
2281 	return emit_op_mem(compiler, inp_flags, dst_reg, dst, dstw, TMP_REG1);
2282 }
2283 
2284 #ifdef __cplusplus
2285 extern "C" {
2286 #endif
2287 
2288 #if defined(__GNUC__)
2289 extern unsigned int __aeabi_uidivmod(unsigned int numerator, unsigned int denominator);
2290 extern int __aeabi_idivmod(int numerator, int denominator);
2291 #else
2292 #error "Software divmod functions are needed"
2293 #endif
2294 
2295 #ifdef __cplusplus
2296 }
2297 #endif
2298 
sljit_emit_op0(struct sljit_compiler * compiler,sljit_s32 op)2299 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
2300 {
2301 	sljit_uw saved_reg_list[3];
2302 	sljit_sw saved_reg_count;
2303 
2304 	CHECK_ERROR();
2305 	CHECK(check_sljit_emit_op0(compiler, op));
2306 
2307 	op = GET_OPCODE(op);
2308 	switch (op) {
2309 	case SLJIT_BREAKPOINT:
2310 		FAIL_IF(push_inst(compiler, BKPT));
2311 		break;
2312 	case SLJIT_NOP:
2313 		FAIL_IF(push_inst(compiler, NOP));
2314 		break;
2315 	case SLJIT_LMUL_UW:
2316 	case SLJIT_LMUL_SW:
2317 		return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULL : SMULL)
2318 			| RN(SLJIT_R1) | RD(SLJIT_R0) | RM8(SLJIT_R0) | RM(SLJIT_R1));
2319 	case SLJIT_DIVMOD_UW:
2320 	case SLJIT_DIVMOD_SW:
2321 	case SLJIT_DIV_UW:
2322 	case SLJIT_DIV_SW:
2323 		SLJIT_COMPILE_ASSERT((SLJIT_DIVMOD_UW & 0x2) == 0 && SLJIT_DIV_UW - 0x2 == SLJIT_DIVMOD_UW, bad_div_opcode_assignments);
2324 		SLJIT_ASSERT(reg_map[2] == 1 && reg_map[3] == 2 && reg_map[4] == 3);
2325 
2326 		saved_reg_count = 0;
2327 		if (compiler->scratches >= 4)
2328 			saved_reg_list[saved_reg_count++] = 3;
2329 		if (compiler->scratches >= 3)
2330 			saved_reg_list[saved_reg_count++] = 2;
2331 		if (op >= SLJIT_DIV_UW)
2332 			saved_reg_list[saved_reg_count++] = 1;
2333 
2334 		if (saved_reg_count > 0) {
2335 			FAIL_IF(push_inst(compiler, STR | 0x2d0000 | (saved_reg_count >= 3 ? 16 : 8)
2336 						| (saved_reg_list[0] << 12) /* str rX, [sp, #-8/-16]! */));
2337 			if (saved_reg_count >= 2) {
2338 				SLJIT_ASSERT(saved_reg_list[1] < 8);
2339 				FAIL_IF(push_inst(compiler, STR | 0x8d0004 | (saved_reg_list[1] << 12) /* str rX, [sp, #4] */));
2340 			}
2341 			if (saved_reg_count >= 3) {
2342 				SLJIT_ASSERT(saved_reg_list[2] < 8);
2343 				FAIL_IF(push_inst(compiler, STR | 0x8d0008 | (saved_reg_list[2] << 12) /* str rX, [sp, #8] */));
2344 			}
2345 		}
2346 
2347 #if defined(__GNUC__)
2348 		FAIL_IF(sljit_emit_ijump(compiler, SLJIT_FAST_CALL, SLJIT_IMM,
2349 			((op | 0x2) == SLJIT_DIV_UW ? SLJIT_FUNC_ADDR(__aeabi_uidivmod) : SLJIT_FUNC_ADDR(__aeabi_idivmod))));
2350 #else
2351 #error "Software divmod functions are needed"
2352 #endif
2353 
2354 		if (saved_reg_count > 0) {
2355 			if (saved_reg_count >= 3) {
2356 				SLJIT_ASSERT(saved_reg_list[2] < 8);
2357 				FAIL_IF(push_inst(compiler, LDR | 0x8d0008 | (saved_reg_list[2] << 12) /* ldr rX, [sp, #8] */));
2358 			}
2359 			if (saved_reg_count >= 2) {
2360 				SLJIT_ASSERT(saved_reg_list[1] < 8);
2361 				FAIL_IF(push_inst(compiler, LDR | 0x8d0004 | (saved_reg_list[1] << 12) /* ldr rX, [sp, #4] */));
2362 			}
2363 			return push_inst(compiler, (LDR ^ (1 << 24)) | 0x8d0000 | (sljit_ins)(saved_reg_count >= 3 ? 16 : 8)
2364 						| (saved_reg_list[0] << 12) /* ldr rX, [sp], #8/16 */);
2365 		}
2366 		return SLJIT_SUCCESS;
2367 	case SLJIT_ENDBR:
2368 	case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
2369 		return SLJIT_SUCCESS;
2370 	}
2371 
2372 	return SLJIT_SUCCESS;
2373 }
2374 
sljit_emit_op1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2375 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
2376 	sljit_s32 dst, sljit_sw dstw,
2377 	sljit_s32 src, sljit_sw srcw)
2378 {
2379 	CHECK_ERROR();
2380 	CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
2381 	ADJUST_LOCAL_OFFSET(dst, dstw);
2382 	ADJUST_LOCAL_OFFSET(src, srcw);
2383 
2384 	switch (GET_OPCODE(op)) {
2385 	case SLJIT_MOV:
2386 	case SLJIT_MOV_U32:
2387 	case SLJIT_MOV_S32:
2388 	case SLJIT_MOV32:
2389 	case SLJIT_MOV_P:
2390 		return emit_op(compiler, SLJIT_MOV, ALLOW_ANY_IMM, dst, dstw, TMP_REG1, 0, src, srcw);
2391 
2392 	case SLJIT_MOV_U8:
2393 		return emit_op(compiler, SLJIT_MOV_U8, ALLOW_ANY_IMM | BYTE_SIZE, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u8)srcw : srcw);
2394 
2395 	case SLJIT_MOV_S8:
2396 		return emit_op(compiler, SLJIT_MOV_S8, ALLOW_ANY_IMM | SIGNED | BYTE_SIZE, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s8)srcw : srcw);
2397 
2398 	case SLJIT_MOV_U16:
2399 		return emit_op(compiler, SLJIT_MOV_U16, ALLOW_ANY_IMM | HALF_SIZE, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_u16)srcw : srcw);
2400 
2401 	case SLJIT_MOV_S16:
2402 		return emit_op(compiler, SLJIT_MOV_S16, ALLOW_ANY_IMM | SIGNED | HALF_SIZE, dst, dstw, TMP_REG1, 0, src, (src == SLJIT_IMM) ? (sljit_s16)srcw : srcw);
2403 
2404 	case SLJIT_CLZ:
2405 	case SLJIT_CTZ:
2406 	case SLJIT_REV:
2407 	case SLJIT_REV_U32:
2408 	case SLJIT_REV_S32:
2409 		return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src, srcw);
2410 
2411 	case SLJIT_REV_U16:
2412 	case SLJIT_REV_S16:
2413 		return emit_op(compiler, op, HALF_SIZE, dst, dstw, TMP_REG1, 0, src, srcw);
2414 	}
2415 
2416 	return SLJIT_SUCCESS;
2417 }
2418 
sljit_emit_op2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2419 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2420 	sljit_s32 dst, sljit_sw dstw,
2421 	sljit_s32 src1, sljit_sw src1w,
2422 	sljit_s32 src2, sljit_sw src2w)
2423 {
2424 	sljit_s32 inp_flags;
2425 
2426 	CHECK_ERROR();
2427 	CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
2428 	ADJUST_LOCAL_OFFSET(dst, dstw);
2429 	ADJUST_LOCAL_OFFSET(src1, src1w);
2430 	ADJUST_LOCAL_OFFSET(src2, src2w);
2431 
2432 	switch (GET_OPCODE(op)) {
2433 	case SLJIT_ADD:
2434 	case SLJIT_ADDC:
2435 	case SLJIT_SUB:
2436 	case SLJIT_SUBC:
2437 		return emit_op(compiler, op, ALLOW_IMM | ALLOW_NEG_IMM | ALLOW_DOUBLE_IMM, dst, dstw, src1, src1w, src2, src2w);
2438 
2439 	case SLJIT_OR:
2440 		return emit_op(compiler, op, ALLOW_IMM | ALLOW_DOUBLE_IMM, dst, dstw, src1, src1w, src2, src2w);
2441 
2442 	case SLJIT_XOR:
2443 		inp_flags = ALLOW_IMM | ALLOW_DOUBLE_IMM;
2444 		if ((src1 == SLJIT_IMM && src1w == -1) || (src2 == SLJIT_IMM && src2w == -1)) {
2445 			inp_flags |= ALLOW_INV_IMM;
2446 		}
2447 		return emit_op(compiler, op, inp_flags, dst, dstw, src1, src1w, src2, src2w);
2448 
2449 	case SLJIT_MUL:
2450 		return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w);
2451 
2452 	case SLJIT_AND:
2453 		return emit_op(compiler, op, ALLOW_ANY_IMM, dst, dstw, src1, src1w, src2, src2w);
2454 
2455 	case SLJIT_SHL:
2456 	case SLJIT_MSHL:
2457 	case SLJIT_LSHR:
2458 	case SLJIT_MLSHR:
2459 	case SLJIT_ASHR:
2460 	case SLJIT_MASHR:
2461 	case SLJIT_ROTL:
2462 	case SLJIT_ROTR:
2463 		if (src2 == SLJIT_IMM) {
2464 			compiler->shift_imm = src2w & 0x1f;
2465 			return emit_op(compiler, op, 0, dst, dstw, TMP_REG1, 0, src1, src1w);
2466 		} else {
2467 			compiler->shift_imm = 0x20;
2468 			return emit_op(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w);
2469 		}
2470 	}
2471 
2472 	return SLJIT_SUCCESS;
2473 }
2474 
sljit_emit_op2u(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2475 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
2476 	sljit_s32 src1, sljit_sw src1w,
2477 	sljit_s32 src2, sljit_sw src2w)
2478 {
2479 	CHECK_ERROR();
2480 	CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
2481 
2482 	SLJIT_SKIP_CHECKS(compiler);
2483 	return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w);
2484 }
2485 
sljit_emit_op2r(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2486 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op,
2487 	sljit_s32 dst_reg,
2488 	sljit_s32 src1, sljit_sw src1w,
2489 	sljit_s32 src2, sljit_sw src2w)
2490 {
2491 	CHECK_ERROR();
2492 	CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w));
2493 	ADJUST_LOCAL_OFFSET(src1, src1w);
2494 	ADJUST_LOCAL_OFFSET(src2, src2w);
2495 
2496 	switch (GET_OPCODE(op)) {
2497 	case SLJIT_MULADD:
2498 		return emit_op(compiler, op, 0, dst_reg, 0, src1, src1w, src2, src2w);
2499 	}
2500 
2501 	return SLJIT_SUCCESS;
2502 }
2503 
sljit_emit_shift_into(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 src1_reg,sljit_s32 src2_reg,sljit_s32 src3,sljit_sw src3w)2504 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
2505 	sljit_s32 dst_reg,
2506 	sljit_s32 src1_reg,
2507 	sljit_s32 src2_reg,
2508 	sljit_s32 src3, sljit_sw src3w)
2509 {
2510 	sljit_s32 is_left;
2511 
2512 	CHECK_ERROR();
2513 	CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
2514 
2515 	op = GET_OPCODE(op);
2516 	is_left = (op == SLJIT_SHL || op == SLJIT_MSHL);
2517 
2518 	if (src1_reg == src2_reg) {
2519 		SLJIT_SKIP_CHECKS(compiler);
2520 		return sljit_emit_op2(compiler, is_left ? SLJIT_ROTL : SLJIT_ROTR, dst_reg, 0, src1_reg, 0, src3, src3w);
2521 	}
2522 
2523 	ADJUST_LOCAL_OFFSET(src3, src3w);
2524 
2525 	/* Shift type of ROR is 3. */
2526 	if (src3 == SLJIT_IMM) {
2527 		src3w &= 0x1f;
2528 
2529 		if (src3w == 0)
2530 			return SLJIT_SUCCESS;
2531 
2532 		FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | RM(src1_reg) | ((sljit_ins)(is_left ? 0 : 1) << 5) | ((sljit_ins)src3w << 7)));
2533 		src3w = (src3w ^ 0x1f) + 1;
2534 		return push_inst(compiler, ORR | RD(dst_reg) | RN(dst_reg) | RM(src2_reg) | ((sljit_ins)(is_left ? 1 : 0) << 5) | ((sljit_ins)src3w << 7));
2535 	}
2536 
2537 	if (src3 & SLJIT_MEM) {
2538 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, src3, src3w, TMP_REG2));
2539 		src3 = TMP_REG2;
2540 	}
2541 
2542 	if (op == SLJIT_MSHL || op == SLJIT_MLSHR || dst_reg == src3) {
2543 		FAIL_IF(push_inst(compiler, AND | SRC2_IMM | RD(TMP_REG2) | RN(src3) | 0x1f));
2544 		src3 = TMP_REG2;
2545 	}
2546 
2547 	FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | RM8(src3) | ((sljit_ins)(is_left ? 0 : 1) << 5) | 0x10 | RM(src1_reg)));
2548 	FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src2_reg) | ((sljit_ins)(is_left ? 1 : 0) << 5) | (1 << 7)));
2549 	FAIL_IF(push_inst(compiler, EOR | SRC2_IMM | RD(TMP_REG2) | RN(src3) | 0x1f));
2550 	return push_inst(compiler, ORR | RD(dst_reg) | RN(dst_reg) | RM8(TMP_REG2) | ((sljit_ins)(is_left ? 1 : 0) << 5) | 0x10 | RM(TMP_REG1));
2551 }
2552 
sljit_emit_op_src(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)2553 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
2554 	sljit_s32 src, sljit_sw srcw)
2555 {
2556 	CHECK_ERROR();
2557 	CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
2558 	ADJUST_LOCAL_OFFSET(src, srcw);
2559 
2560 	switch (op) {
2561 	case SLJIT_FAST_RETURN:
2562 		SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
2563 
2564 		if (FAST_IS_REG(src))
2565 			FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG2) | RM(src)));
2566 		else
2567 			FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, src, srcw, TMP_REG1));
2568 
2569 		return push_inst(compiler, BX | RM(TMP_REG2));
2570 	case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
2571 		return SLJIT_SUCCESS;
2572 	case SLJIT_PREFETCH_L1:
2573 	case SLJIT_PREFETCH_L2:
2574 	case SLJIT_PREFETCH_L3:
2575 	case SLJIT_PREFETCH_ONCE:
2576 		SLJIT_ASSERT(src & SLJIT_MEM);
2577 		return emit_op_mem(compiler, PRELOAD | LOAD_DATA, TMP_PC, src, srcw, TMP_REG1);
2578 	}
2579 
2580 	return SLJIT_SUCCESS;
2581 }
2582 
sljit_emit_op_dst(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw)2583 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
2584 	sljit_s32 dst, sljit_sw dstw)
2585 {
2586 	sljit_s32 size, dst_r;
2587 
2588 	CHECK_ERROR();
2589 	CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
2590 	ADJUST_LOCAL_OFFSET(dst, dstw);
2591 
2592 	switch (op) {
2593 	case SLJIT_FAST_ENTER:
2594 		SLJIT_ASSERT(reg_map[TMP_REG2] == 14);
2595 
2596 		if (FAST_IS_REG(dst))
2597 			return push_inst(compiler, MOV | RD(dst) | RM(TMP_REG2));
2598 		break;
2599 	case SLJIT_GET_RETURN_ADDRESS:
2600 		size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds - SLJIT_KEPT_SAVEDS_COUNT(compiler->options), 0);
2601 
2602 		if (compiler->fsaveds > 0 || compiler->fscratches >= SLJIT_FIRST_SAVED_FLOAT_REG) {
2603 			/* The size of pc is not added above. */
2604 			if ((size & SSIZE_OF(sw)) == 0)
2605 				size += SSIZE_OF(sw);
2606 
2607 			size += GET_SAVED_FLOAT_REGISTERS_SIZE(compiler->fscratches, compiler->fsaveds, f64);
2608 		}
2609 
2610 		SLJIT_ASSERT(((compiler->local_size + size + SSIZE_OF(sw)) & 0x7) == 0);
2611 
2612 		dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
2613 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size + size, TMP_REG1));
2614 		break;
2615 	}
2616 
2617 	if (dst & SLJIT_MEM)
2618 		return emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1);
2619 
2620 	return SLJIT_SUCCESS;
2621 }
2622 
sljit_get_register_index(sljit_s32 type,sljit_s32 reg)2623 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
2624 {
2625 	CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
2626 
2627 	if (type == SLJIT_GP_REGISTER)
2628 		return reg_map[reg];
2629 
2630 	if (type == SLJIT_FLOAT_REGISTER || type == SLJIT_SIMD_REG_64)
2631 		return freg_map[reg];
2632 
2633 	if (type != SLJIT_SIMD_REG_128)
2634 		return freg_map[reg] & ~0x1;
2635 
2636 	return -1;
2637 }
2638 
sljit_emit_op_custom(struct sljit_compiler * compiler,void * instruction,sljit_u32 size)2639 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2640 	void *instruction, sljit_u32 size)
2641 {
2642 	SLJIT_UNUSED_ARG(size);
2643 	CHECK_ERROR();
2644 	CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2645 
2646 	return push_inst(compiler, *(sljit_ins*)instruction);
2647 }
2648 
2649 /* --------------------------------------------------------------------- */
2650 /*  Floating point operators                                             */
2651 /* --------------------------------------------------------------------- */
2652 
2653 #define FPU_LOAD (1 << 20)
2654 #define EMIT_FPU_DATA_TRANSFER(inst, add, base, freg, offs) \
2655 	((inst) | (sljit_ins)((add) << 23) | RN(base) | VD(freg) | (sljit_ins)(offs))
2656 
emit_fop_mem(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw)2657 static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
2658 {
2659 	sljit_uw imm;
2660 	sljit_ins inst = VSTR_F32 | (flags & (SLJIT_32 | FPU_LOAD));
2661 
2662 	SLJIT_ASSERT(arg & SLJIT_MEM);
2663 	arg &= ~SLJIT_MEM;
2664 
2665 	if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
2666 		FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (((sljit_ins)argw & 0x3) << 7)));
2667 		arg = TMP_REG1;
2668 		argw = 0;
2669 	}
2670 
2671 	/* Fast loads and stores. */
2672 	if (arg) {
2673 		if (!(argw & ~0x3fc))
2674 			return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, arg & REG_MASK, reg, argw >> 2));
2675 		if (!(-argw & ~0x3fc))
2676 			return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, arg & REG_MASK, reg, (-argw) >> 2));
2677 
2678 		imm = get_imm((sljit_uw)argw & ~(sljit_uw)0x3fc);
2679 		if (imm) {
2680 			FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | imm));
2681 			return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG1, reg, (argw & 0x3fc) >> 2));
2682 		}
2683 		imm = get_imm((sljit_uw)-argw & ~(sljit_uw)0x3fc);
2684 		if (imm) {
2685 			argw = -argw;
2686 			FAIL_IF(push_inst(compiler, SUB | RD(TMP_REG1) | RN(arg & REG_MASK) | imm));
2687 			return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 0, TMP_REG1, reg, (argw & 0x3fc) >> 2));
2688 		}
2689 	}
2690 
2691 	if (arg) {
2692 		FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)argw));
2693 		FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | RM(TMP_REG1)));
2694 	}
2695 	else
2696 		FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)argw));
2697 
2698 	return push_inst(compiler, EMIT_FPU_DATA_TRANSFER(inst, 1, TMP_REG1, reg, 0));
2699 }
2700 
sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2701 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
2702 	sljit_s32 dst, sljit_sw dstw,
2703 	sljit_s32 src, sljit_sw srcw)
2704 {
2705 	op ^= SLJIT_32;
2706 
2707 	if (src & SLJIT_MEM) {
2708 		FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src, srcw));
2709 		src = TMP_FREG1;
2710 	}
2711 
2712 	FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_S32_F32, op & SLJIT_32, TMP_FREG1, src, 0)));
2713 
2714 	if (FAST_IS_REG(dst))
2715 		return push_inst(compiler, VMOV | (1 << 20) | RD(dst) | VN(TMP_FREG1));
2716 
2717 	/* Store the integer value from a VFP register. */
2718 	return emit_fop_mem(compiler, 0, TMP_FREG1, dst, dstw);
2719 }
2720 
sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2721 static sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_ins ins,
2722 	sljit_s32 dst, sljit_sw dstw,
2723 	sljit_s32 src, sljit_sw srcw)
2724 {
2725 	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2726 
2727 	if (FAST_IS_REG(src))
2728 		FAIL_IF(push_inst(compiler, VMOV | RD(src) | VN(TMP_FREG1)));
2729 	else if (src & SLJIT_MEM) {
2730 		/* Load the integer value into a VFP register. */
2731 		FAIL_IF(emit_fop_mem(compiler, FPU_LOAD, TMP_FREG1, src, srcw));
2732 	}
2733 	else {
2734 		FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw));
2735 		FAIL_IF(push_inst(compiler, VMOV | RD(TMP_REG1) | VN(TMP_FREG1)));
2736 	}
2737 
2738 	FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(ins, ins & SLJIT_32, dst_r, TMP_FREG1, 0)));
2739 
2740 	if (dst & SLJIT_MEM)
2741 		return emit_fop_mem(compiler, (ins & SLJIT_32), TMP_FREG1, dst, dstw);
2742 	return SLJIT_SUCCESS;
2743 }
2744 
sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2745 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
2746 	sljit_s32 dst, sljit_sw dstw,
2747 	sljit_s32 src, sljit_sw srcw)
2748 {
2749 	return sljit_emit_fop1_conv_f64_from_w(compiler, VCVT_F32_S32 | (~op & SLJIT_32), dst, dstw, src, srcw);
2750 }
2751 
sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2752 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
2753 	sljit_s32 dst, sljit_sw dstw,
2754 	sljit_s32 src, sljit_sw srcw)
2755 {
2756 	return sljit_emit_fop1_conv_f64_from_w(compiler, VCVT_F32_U32 | (~op & SLJIT_32), dst, dstw, src, srcw);
2757 }
2758 
sljit_emit_fop1_cmp(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2759 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
2760 	sljit_s32 src1, sljit_sw src1w,
2761 	sljit_s32 src2, sljit_sw src2w)
2762 {
2763 	op ^= SLJIT_32;
2764 
2765 	if (src1 & SLJIT_MEM) {
2766 		FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w));
2767 		src1 = TMP_FREG1;
2768 	}
2769 
2770 	if (src2 & SLJIT_MEM) {
2771 		FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w));
2772 		src2 = TMP_FREG2;
2773 	}
2774 
2775 	FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCMP_F32, op & SLJIT_32, src1, src2, 0)));
2776 	FAIL_IF(push_inst(compiler, VMRS));
2777 
2778 	if (GET_FLAG_TYPE(op) != SLJIT_UNORDERED_OR_EQUAL)
2779 		return SLJIT_SUCCESS;
2780 
2781 	return push_inst(compiler, (CMP - CONDITIONAL) | (0x60000000 /* VS */) | SET_FLAGS | RN(TMP_REG1) | RM(TMP_REG1));
2782 }
2783 
sljit_emit_fop1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2784 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
2785 	sljit_s32 dst, sljit_sw dstw,
2786 	sljit_s32 src, sljit_sw srcw)
2787 {
2788 	sljit_s32 dst_r;
2789 
2790 	CHECK_ERROR();
2791 
2792 	SLJIT_COMPILE_ASSERT((SLJIT_32 == 0x100), float_transfer_bit_error);
2793 	SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2794 
2795 	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2796 
2797 	if (GET_OPCODE(op) != SLJIT_CONV_F64_FROM_F32)
2798 		op ^= SLJIT_32;
2799 
2800 	if (src & SLJIT_MEM) {
2801 		FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, dst_r, src, srcw));
2802 		src = dst_r;
2803 	}
2804 
2805 	switch (GET_OPCODE(op)) {
2806 	case SLJIT_MOV_F64:
2807 		if (src != dst_r) {
2808 			if (!(dst & SLJIT_MEM))
2809 				FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, op & SLJIT_32, dst_r, src, 0)));
2810 			else
2811 				dst_r = src;
2812 		}
2813 		break;
2814 	case SLJIT_NEG_F64:
2815 		FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VNEG_F32, op & SLJIT_32, dst_r, src, 0)));
2816 		break;
2817 	case SLJIT_ABS_F64:
2818 		FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_32, dst_r, src, 0)));
2819 		break;
2820 	case SLJIT_CONV_F64_FROM_F32:
2821 		FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VCVT_F64_F32, op & SLJIT_32, dst_r, src, 0)));
2822 		op ^= SLJIT_32;
2823 		break;
2824 	}
2825 
2826 	if (dst & SLJIT_MEM)
2827 		return emit_fop_mem(compiler, (op & SLJIT_32), dst_r, dst, dstw);
2828 	return SLJIT_SUCCESS;
2829 }
2830 
sljit_emit_fop2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2831 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
2832 	sljit_s32 dst, sljit_sw dstw,
2833 	sljit_s32 src1, sljit_sw src1w,
2834 	sljit_s32 src2, sljit_sw src2w)
2835 {
2836 	sljit_s32 dst_r;
2837 
2838 	CHECK_ERROR();
2839 	CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2840 	ADJUST_LOCAL_OFFSET(dst, dstw);
2841 	ADJUST_LOCAL_OFFSET(src1, src1w);
2842 	ADJUST_LOCAL_OFFSET(src2, src2w);
2843 
2844 	op ^= SLJIT_32;
2845 
2846 	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2847 
2848 	if (src2 & SLJIT_MEM) {
2849 		FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG2, src2, src2w));
2850 		src2 = TMP_FREG2;
2851 	}
2852 
2853 	if (src1 & SLJIT_MEM) {
2854 		FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) | FPU_LOAD, TMP_FREG1, src1, src1w));
2855 		src1 = TMP_FREG1;
2856 	}
2857 
2858 	switch (GET_OPCODE(op)) {
2859 	case SLJIT_ADD_F64:
2860 		FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VADD_F32, op & SLJIT_32, dst_r, src2, src1)));
2861 		break;
2862 	case SLJIT_SUB_F64:
2863 		FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VSUB_F32, op & SLJIT_32, dst_r, src2, src1)));
2864 		break;
2865 	case SLJIT_MUL_F64:
2866 		FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMUL_F32, op & SLJIT_32, dst_r, src2, src1)));
2867 		break;
2868 	case SLJIT_DIV_F64:
2869 		FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VDIV_F32, op & SLJIT_32, dst_r, src2, src1)));
2870 		break;
2871 	case SLJIT_COPYSIGN_F64:
2872 		FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(src2) | RD(TMP_REG1) | ((op & SLJIT_32) ? (1 << 7) : 0)));
2873 		FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VABS_F32, op & SLJIT_32, dst_r, src1, 0)));
2874 		FAIL_IF(push_inst(compiler, CMP | SET_FLAGS | RN(TMP_REG1) | SRC2_IMM | 0));
2875 		return push_inst(compiler, EMIT_FPU_OPERATION((VNEG_F32 & ~COND_MASK) | 0xb0000000, op & SLJIT_32, dst_r, dst_r, 0));
2876 	}
2877 
2878 	if (dst_r != dst)
2879 		FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32), TMP_FREG1, dst, dstw));
2880 
2881 	return SLJIT_SUCCESS;
2882 }
2883 
2884 #undef EMIT_FPU_DATA_TRANSFER
2885 
sljit_emit_fset32(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f32 value)2886 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
2887 	sljit_s32 freg, sljit_f32 value)
2888 {
2889 #if defined(__ARM_NEON) && __ARM_NEON
2890 	sljit_u32 exp;
2891 	sljit_ins ins;
2892 #endif /* NEON */
2893 	union {
2894 		sljit_u32 imm;
2895 		sljit_f32 value;
2896 	} u;
2897 
2898 	CHECK_ERROR();
2899 	CHECK(check_sljit_emit_fset32(compiler, freg, value));
2900 
2901 	u.value = value;
2902 
2903 #if defined(__ARM_NEON) && __ARM_NEON
2904 	if ((u.imm << (32 - 19)) == 0) {
2905 		exp = (u.imm >> (23 + 2)) & 0x3f;
2906 
2907 		if (exp == 0x20 || exp == 0x1f) {
2908 			ins = ((u.imm >> 24) & 0x80) | ((u.imm >> 19) & 0x7f);
2909 			return push_inst(compiler, (VMOV_F32 ^ (1 << 6)) | ((ins & 0xf0) << 12) | VD(freg) | (ins & 0xf));
2910 		}
2911 	}
2912 #endif /* NEON */
2913 
2914 	FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm));
2915 	return push_inst(compiler, VMOV | VN(freg) | RD(TMP_REG1));
2916 }
2917 
sljit_emit_fset64(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f64 value)2918 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
2919 	sljit_s32 freg, sljit_f64 value)
2920 {
2921 #if defined(__ARM_NEON) && __ARM_NEON
2922 	sljit_u32 exp;
2923 	sljit_ins ins;
2924 #endif /* NEON */
2925 	union {
2926 		sljit_u32 imm[2];
2927 		sljit_f64 value;
2928 	} u;
2929 
2930 	CHECK_ERROR();
2931 	CHECK(check_sljit_emit_fset64(compiler, freg, value));
2932 
2933 	u.value = value;
2934 
2935 #if defined(__ARM_NEON) && __ARM_NEON
2936 	if (u.imm[0] == 0 && (u.imm[1] << (64 - 48)) == 0) {
2937 		exp = (u.imm[1] >> ((52 - 32) + 2)) & 0x1ff;
2938 
2939 		if (exp == 0x100 || exp == 0xff) {
2940 			ins = ((u.imm[1] >> (56 - 32)) & 0x80) | ((u.imm[1] >> (48 - 32)) & 0x7f);
2941 			return push_inst(compiler, (VMOV_F32 ^ (1 << 6)) | (1 << 8) | ((ins & 0xf0) << 12) | VD(freg) | (ins & 0xf));
2942 		}
2943 	}
2944 #endif /* NEON */
2945 
2946 	FAIL_IF(load_immediate(compiler, TMP_REG1, u.imm[0]));
2947 	if (u.imm[0] == u.imm[1])
2948 		return push_inst(compiler, VMOV2 | RN(TMP_REG1) | RD(TMP_REG1) | VM(freg));
2949 
2950 	FAIL_IF(load_immediate(compiler, TMP_REG2, u.imm[1]));
2951 	return push_inst(compiler, VMOV2 | RN(TMP_REG2) | RD(TMP_REG1) | VM(freg));
2952 }
2953 
sljit_emit_fcopy(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 freg,sljit_s32 reg)2954 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
2955 	sljit_s32 freg, sljit_s32 reg)
2956 {
2957 	sljit_s32 reg2;
2958 	sljit_ins inst;
2959 
2960 	CHECK_ERROR();
2961 	CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
2962 
2963 	if (reg & REG_PAIR_MASK) {
2964 		reg2 = REG_PAIR_SECOND(reg);
2965 		reg = REG_PAIR_FIRST(reg);
2966 
2967 		inst = VMOV2 | RN(reg) | RD(reg2) | VM(freg);
2968 	} else {
2969 		inst = VMOV | VN(freg) | RD(reg);
2970 
2971 		if (!(op & SLJIT_32))
2972 			inst |= 1 << 7;
2973 	}
2974 
2975 	if (GET_OPCODE(op) == SLJIT_COPY_FROM_F64)
2976 		inst |= 1 << 20;
2977 
2978 	return push_inst(compiler, inst);
2979 }
2980 
2981 /* --------------------------------------------------------------------- */
2982 /*  Conditional instructions                                             */
2983 /* --------------------------------------------------------------------- */
2984 
get_cc(struct sljit_compiler * compiler,sljit_s32 type)2985 static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type)
2986 {
2987 	switch (type) {
2988 	case SLJIT_EQUAL:
2989 	case SLJIT_ATOMIC_STORED:
2990 	case SLJIT_F_EQUAL:
2991 	case SLJIT_ORDERED_EQUAL:
2992 	case SLJIT_UNORDERED_OR_EQUAL:
2993 		return 0x00000000;
2994 
2995 	case SLJIT_NOT_EQUAL:
2996 	case SLJIT_ATOMIC_NOT_STORED:
2997 	case SLJIT_F_NOT_EQUAL:
2998 	case SLJIT_UNORDERED_OR_NOT_EQUAL:
2999 	case SLJIT_ORDERED_NOT_EQUAL:
3000 		return 0x10000000;
3001 
3002 	case SLJIT_CARRY:
3003 		if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD)
3004 			return 0x20000000;
3005 		/* fallthrough */
3006 
3007 	case SLJIT_LESS:
3008 		return 0x30000000;
3009 
3010 	case SLJIT_NOT_CARRY:
3011 		if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD)
3012 			return 0x30000000;
3013 		/* fallthrough */
3014 
3015 	case SLJIT_GREATER_EQUAL:
3016 		return 0x20000000;
3017 
3018 	case SLJIT_GREATER:
3019 	case SLJIT_UNORDERED_OR_GREATER:
3020 		return 0x80000000;
3021 
3022 	case SLJIT_LESS_EQUAL:
3023 	case SLJIT_F_LESS_EQUAL:
3024 	case SLJIT_ORDERED_LESS_EQUAL:
3025 		return 0x90000000;
3026 
3027 	case SLJIT_SIG_LESS:
3028 	case SLJIT_UNORDERED_OR_LESS:
3029 		return 0xb0000000;
3030 
3031 	case SLJIT_SIG_GREATER_EQUAL:
3032 	case SLJIT_F_GREATER_EQUAL:
3033 	case SLJIT_ORDERED_GREATER_EQUAL:
3034 		return 0xa0000000;
3035 
3036 	case SLJIT_SIG_GREATER:
3037 	case SLJIT_F_GREATER:
3038 	case SLJIT_ORDERED_GREATER:
3039 		return 0xc0000000;
3040 
3041 	case SLJIT_SIG_LESS_EQUAL:
3042 	case SLJIT_UNORDERED_OR_LESS_EQUAL:
3043 		return 0xd0000000;
3044 
3045 	case SLJIT_OVERFLOW:
3046 		if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)))
3047 			return 0x10000000;
3048 		/* fallthrough */
3049 
3050 	case SLJIT_UNORDERED:
3051 		return 0x60000000;
3052 
3053 	case SLJIT_NOT_OVERFLOW:
3054 		if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)))
3055 			return 0x00000000;
3056 		/* fallthrough */
3057 
3058 	case SLJIT_ORDERED:
3059 		return 0x70000000;
3060 
3061 	case SLJIT_F_LESS:
3062 	case SLJIT_ORDERED_LESS:
3063 		return 0x40000000;
3064 
3065 	case SLJIT_UNORDERED_OR_GREATER_EQUAL:
3066 		return 0x50000000;
3067 
3068 	default:
3069 		SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL_REG_ARG);
3070 		return 0xe0000000;
3071 	}
3072 }
3073 
sljit_emit_label(struct sljit_compiler * compiler)3074 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
3075 {
3076 	struct sljit_label *label;
3077 
3078 	CHECK_ERROR_PTR();
3079 	CHECK_PTR(check_sljit_emit_label(compiler));
3080 
3081 	if (compiler->last_label && compiler->last_label->size == compiler->size)
3082 		return compiler->last_label;
3083 
3084 	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
3085 	PTR_FAIL_IF(!label);
3086 	set_label(label, compiler);
3087 	return label;
3088 }
3089 
sljit_emit_jump(struct sljit_compiler * compiler,sljit_s32 type)3090 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
3091 {
3092 	struct sljit_jump *jump;
3093 
3094 	CHECK_ERROR_PTR();
3095 	CHECK_PTR(check_sljit_emit_jump(compiler, type));
3096 
3097 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
3098 	PTR_FAIL_IF(!jump);
3099 	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
3100 	type &= 0xff;
3101 
3102 	SLJIT_ASSERT(reg_map[TMP_REG1] != 14);
3103 
3104 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
3105 	if (type >= SLJIT_FAST_CALL)
3106 		PTR_FAIL_IF(prepare_blx(compiler));
3107 
3108 	jump->addr = compiler->size;
3109 	PTR_FAIL_IF(push_inst_with_unique_literal(compiler, ((EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1,
3110 		type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0)) & ~COND_MASK) | get_cc(compiler, type), 0));
3111 
3112 	if (jump->flags & SLJIT_REWRITABLE_JUMP)
3113 		compiler->patches++;
3114 
3115 	if (type >= SLJIT_FAST_CALL) {
3116 		jump->flags |= IS_BL;
3117 		jump->addr = compiler->size;
3118 		PTR_FAIL_IF(emit_blx(compiler));
3119 	}
3120 #else /* !SLJIT_CONFIG_ARM_V6 */
3121 	jump->addr = compiler->size;
3122 	if (type >= SLJIT_FAST_CALL)
3123 		jump->flags |= IS_BL;
3124 	PTR_FAIL_IF(push_inst(compiler, (((type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)) & ~COND_MASK) | get_cc(compiler, type)));
3125 	compiler->size += JUMP_MAX_SIZE - 1;
3126 #endif /* SLJIT_CONFIG_ARM_V6 */
3127 	return jump;
3128 }
3129 
3130 #ifdef __SOFTFP__
3131 
softfloat_call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types,sljit_s32 * src,sljit_u32 * extra_space)3132 static sljit_s32 softfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types, sljit_s32 *src, sljit_u32 *extra_space)
3133 {
3134 	sljit_u32 is_tail_call = *extra_space & SLJIT_CALL_RETURN;
3135 	sljit_u32 offset = 0;
3136 	sljit_u32 word_arg_offset = 0;
3137 	sljit_u32 src_offset = 4 * sizeof(sljit_sw);
3138 	sljit_u32 float_arg_count = 0;
3139 	sljit_s32 types = 0;
3140 	sljit_u8 offsets[4];
3141 	sljit_u8 *offset_ptr = offsets;
3142 
3143 	if (src && FAST_IS_REG(*src))
3144 		src_offset = (sljit_u32)reg_map[*src] * sizeof(sljit_sw);
3145 
3146 	arg_types >>= SLJIT_ARG_SHIFT;
3147 
3148 	while (arg_types) {
3149 		types = (types << SLJIT_ARG_SHIFT) | (arg_types & SLJIT_ARG_MASK);
3150 
3151 		switch (arg_types & SLJIT_ARG_MASK) {
3152 		case SLJIT_ARG_TYPE_F64:
3153 			if (offset & 0x7)
3154 				offset += sizeof(sljit_sw);
3155 			*offset_ptr++ = (sljit_u8)offset;
3156 			offset += sizeof(sljit_f64);
3157 			float_arg_count++;
3158 			break;
3159 		case SLJIT_ARG_TYPE_F32:
3160 			*offset_ptr++ = (sljit_u8)offset;
3161 			offset += sizeof(sljit_f32);
3162 			float_arg_count++;
3163 			break;
3164 		default:
3165 			*offset_ptr++ = (sljit_u8)offset;
3166 			offset += sizeof(sljit_sw);
3167 			word_arg_offset += sizeof(sljit_sw);
3168 			break;
3169 		}
3170 
3171 		arg_types >>= SLJIT_ARG_SHIFT;
3172 	}
3173 
3174 	if (offset > 4 * sizeof(sljit_sw) && (!is_tail_call || offset > compiler->args_size)) {
3175 		/* Keep lr register on the stack. */
3176 		if (is_tail_call)
3177 			offset += sizeof(sljit_sw);
3178 
3179 		offset = ((offset - 4 * sizeof(sljit_sw)) + 0x7) & ~(sljit_u32)0x7;
3180 
3181 		*extra_space = offset;
3182 
3183 		if (is_tail_call)
3184 			FAIL_IF(emit_stack_frame_release(compiler, (sljit_s32)offset));
3185 		else
3186 			FAIL_IF(push_inst(compiler, SUB | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | offset));
3187 	} else {
3188 		if (is_tail_call)
3189 			FAIL_IF(emit_stack_frame_release(compiler, -1));
3190 		*extra_space = 0;
3191 	}
3192 
3193 	/* Process arguments in reversed direction. */
3194 	while (types) {
3195 		switch (types & SLJIT_ARG_MASK) {
3196 		case SLJIT_ARG_TYPE_F64:
3197 			float_arg_count--;
3198 			offset = *(--offset_ptr);
3199 
3200 			SLJIT_ASSERT((offset & 0x7) == 0);
3201 
3202 			if (offset < 4 * sizeof(sljit_sw)) {
3203 				if (src_offset == offset || src_offset == offset + sizeof(sljit_sw)) {
3204 					FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2)));
3205 					*src = TMP_REG1;
3206 				}
3207 				FAIL_IF(push_inst(compiler, VMOV2 | 0x100000 | (offset << 10) | ((offset + sizeof(sljit_sw)) << 14) | float_arg_count));
3208 			} else
3209 				FAIL_IF(push_inst(compiler, VSTR_F32 | 0x800100 | RN(SLJIT_SP)
3210 						| (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2)));
3211 			break;
3212 		case SLJIT_ARG_TYPE_F32:
3213 			float_arg_count--;
3214 			offset = *(--offset_ptr);
3215 
3216 			if (offset < 4 * sizeof(sljit_sw)) {
3217 				if (src_offset == offset) {
3218 					FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2)));
3219 					*src = TMP_REG1;
3220 				}
3221 				FAIL_IF(push_inst(compiler, VMOV | 0x100000 | (float_arg_count << 16) | (offset << 10)));
3222 			} else
3223 				FAIL_IF(push_inst(compiler, VSTR_F32 | 0x800000 | RN(SLJIT_SP)
3224 						| (float_arg_count << 12) | ((offset - 4 * sizeof(sljit_sw)) >> 2)));
3225 			break;
3226 		default:
3227 			word_arg_offset -= sizeof(sljit_sw);
3228 			offset = *(--offset_ptr);
3229 
3230 			SLJIT_ASSERT(offset >= word_arg_offset);
3231 
3232 			if (offset != word_arg_offset) {
3233 				if (offset < 4 * sizeof(sljit_sw)) {
3234 					if (src_offset == offset) {
3235 						FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | (src_offset >> 2)));
3236 						*src = TMP_REG1;
3237 					}
3238 					else if (src_offset == word_arg_offset) {
3239 						*src = (sljit_s32)(SLJIT_R0 + (offset >> 2));
3240 						src_offset = offset;
3241 					}
3242 					FAIL_IF(push_inst(compiler, MOV | (offset << 10) | (word_arg_offset >> 2)));
3243 				} else
3244 					FAIL_IF(push_inst(compiler, STR | 0x800000 | RN(SLJIT_SP) | (word_arg_offset << 10) | (offset - 4 * sizeof(sljit_sw))));
3245 			}
3246 			break;
3247 		}
3248 
3249 		types >>= SLJIT_ARG_SHIFT;
3250 	}
3251 
3252 	return SLJIT_SUCCESS;
3253 }
3254 
softfloat_post_call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types)3255 static sljit_s32 softfloat_post_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)
3256 {
3257 	if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F64)
3258 		FAIL_IF(push_inst(compiler, VMOV2 | (1 << 16) | (0 << 12) | 0));
3259 	if ((arg_types & SLJIT_ARG_MASK) == SLJIT_ARG_TYPE_F32)
3260 		FAIL_IF(push_inst(compiler, VMOV | (0 << 16) | (0 << 12)));
3261 
3262 	return SLJIT_SUCCESS;
3263 }
3264 
3265 #else /* !__SOFTFP__ */
3266 
hardfloat_call_with_args(struct sljit_compiler * compiler,sljit_s32 arg_types)3267 static sljit_s32 hardfloat_call_with_args(struct sljit_compiler *compiler, sljit_s32 arg_types)
3268 {
3269 	sljit_u32 offset = SLJIT_FR0;
3270 	sljit_u32 new_offset = SLJIT_FR0;
3271 	sljit_u32 f32_offset = 0;
3272 
3273 	/* Remove return value. */
3274 	arg_types >>= SLJIT_ARG_SHIFT;
3275 
3276 	while (arg_types) {
3277 		switch (arg_types & SLJIT_ARG_MASK) {
3278 		case SLJIT_ARG_TYPE_F64:
3279 			if (offset != new_offset)
3280 				FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32,
3281 					SLJIT_32, new_offset, offset, 0)));
3282 
3283 			new_offset++;
3284 			offset++;
3285 			break;
3286 		case SLJIT_ARG_TYPE_F32:
3287 			if (f32_offset != 0) {
3288 				FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32,
3289 					0x400000, f32_offset, offset, 0)));
3290 				f32_offset = 0;
3291 			} else {
3292 				if (offset != new_offset)
3293 					FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32,
3294 						0, new_offset, offset, 0)));
3295 				f32_offset = new_offset;
3296 				new_offset++;
3297 			}
3298 			offset++;
3299 			break;
3300 		}
3301 		arg_types >>= SLJIT_ARG_SHIFT;
3302 	}
3303 
3304 	return SLJIT_SUCCESS;
3305 }
3306 
3307 #endif /* __SOFTFP__ */
3308 
sljit_emit_call(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types)3309 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
3310 	sljit_s32 arg_types)
3311 {
3312 #ifdef __SOFTFP__
3313 	struct sljit_jump *jump;
3314 	sljit_u32 extra_space = (sljit_u32)type;
3315 #endif
3316 
3317 	CHECK_ERROR_PTR();
3318 	CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
3319 
3320 #ifdef __SOFTFP__
3321 	if ((type & 0xff) != SLJIT_CALL_REG_ARG) {
3322 		PTR_FAIL_IF(softfloat_call_with_args(compiler, arg_types, NULL, &extra_space));
3323 		SLJIT_ASSERT((extra_space & 0x7) == 0);
3324 
3325 		if ((type & SLJIT_CALL_RETURN) && extra_space == 0)
3326 			type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
3327 
3328 		SLJIT_SKIP_CHECKS(compiler);
3329 		jump = sljit_emit_jump(compiler, type);
3330 		PTR_FAIL_IF(jump == NULL);
3331 
3332 		if (extra_space > 0) {
3333 			if (type & SLJIT_CALL_RETURN)
3334 				PTR_FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1,
3335 					TMP_REG2, SLJIT_SP, extra_space - sizeof(sljit_sw))));
3336 
3337 			PTR_FAIL_IF(push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | extra_space));
3338 
3339 			if (type & SLJIT_CALL_RETURN) {
3340 				PTR_FAIL_IF(push_inst(compiler, BX | RM(TMP_REG2)));
3341 				return jump;
3342 			}
3343 		}
3344 
3345 		SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN));
3346 		PTR_FAIL_IF(softfloat_post_call_with_args(compiler, arg_types));
3347 		return jump;
3348 	}
3349 #endif /* __SOFTFP__ */
3350 
3351 	if (type & SLJIT_CALL_RETURN) {
3352 		PTR_FAIL_IF(emit_stack_frame_release(compiler, -1));
3353 		type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
3354 	}
3355 
3356 #ifndef __SOFTFP__
3357 	if ((type & 0xff) != SLJIT_CALL_REG_ARG)
3358 		PTR_FAIL_IF(hardfloat_call_with_args(compiler, arg_types));
3359 #endif /* !__SOFTFP__ */
3360 
3361 	SLJIT_SKIP_CHECKS(compiler);
3362 	return sljit_emit_jump(compiler, type);
3363 }
3364 
sljit_emit_ijump(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 src,sljit_sw srcw)3365 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
3366 {
3367 	struct sljit_jump *jump;
3368 
3369 	CHECK_ERROR();
3370 	CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
3371 	ADJUST_LOCAL_OFFSET(src, srcw);
3372 
3373 	SLJIT_ASSERT(reg_map[TMP_REG1] != 14);
3374 
3375 	if (src != SLJIT_IMM) {
3376 		if (FAST_IS_REG(src)) {
3377 			SLJIT_ASSERT(reg_map[src] != 14);
3378 			return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(src));
3379 		}
3380 
3381 		SLJIT_ASSERT(src & SLJIT_MEM);
3382 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
3383 		return push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1));
3384 	}
3385 
3386 	/* These jumps are converted to jump/call instructions when possible. */
3387 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
3388 	FAIL_IF(!jump);
3389 	set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0));
3390 	jump->u.target = (sljit_uw)srcw;
3391 
3392 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
3393 	if (type >= SLJIT_FAST_CALL)
3394 		FAIL_IF(prepare_blx(compiler));
3395 	jump->addr = compiler->size;
3396 	FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, type <= SLJIT_JUMP ? TMP_PC : TMP_REG1, TMP_PC, 0), 0));
3397 	if (type >= SLJIT_FAST_CALL) {
3398 		jump->addr = compiler->size;
3399 		FAIL_IF(emit_blx(compiler));
3400 	}
3401 #else /* !SLJIT_CONFIG_ARM_V6 */
3402 	jump->addr = compiler->size;
3403 	FAIL_IF(push_inst(compiler, (type <= SLJIT_JUMP ? BX : BLX) | RM(TMP_REG1)));
3404 	compiler->size += JUMP_MAX_SIZE - 1;
3405 #endif /* SLJIT_CONFIG_ARM_V6 */
3406 	return SLJIT_SUCCESS;
3407 }
3408 
sljit_emit_icall(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)3409 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
3410 	sljit_s32 arg_types,
3411 	sljit_s32 src, sljit_sw srcw)
3412 {
3413 #ifdef __SOFTFP__
3414 	sljit_u32 extra_space = (sljit_u32)type;
3415 #endif
3416 
3417 	CHECK_ERROR();
3418 	CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
3419 
3420 	if (src & SLJIT_MEM) {
3421 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, src, srcw, TMP_REG1));
3422 		src = TMP_REG1;
3423 	}
3424 
3425 	if ((type & SLJIT_CALL_RETURN) && (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options)))) {
3426 		FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src)));
3427 		src = TMP_REG1;
3428 	}
3429 
3430 #ifdef __SOFTFP__
3431 	if ((type & 0xff) != SLJIT_CALL_REG_ARG) {
3432 		FAIL_IF(softfloat_call_with_args(compiler, arg_types, &src, &extra_space));
3433 		SLJIT_ASSERT((extra_space & 0x7) == 0);
3434 
3435 		if ((type & SLJIT_CALL_RETURN) && extra_space == 0)
3436 			type = SLJIT_JUMP;
3437 
3438 		SLJIT_SKIP_CHECKS(compiler);
3439 		FAIL_IF(sljit_emit_ijump(compiler, type, src, srcw));
3440 
3441 		if (extra_space > 0) {
3442 			if (type & SLJIT_CALL_RETURN)
3443 				FAIL_IF(push_inst(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1,
3444 					TMP_REG2, SLJIT_SP, extra_space - sizeof(sljit_sw))));
3445 
3446 			FAIL_IF(push_inst(compiler, ADD | RD(SLJIT_SP) | RN(SLJIT_SP) | SRC2_IMM | extra_space));
3447 
3448 			if (type & SLJIT_CALL_RETURN)
3449 				return push_inst(compiler, BX | RM(TMP_REG2));
3450 		}
3451 
3452 		SLJIT_ASSERT(!(type & SLJIT_CALL_RETURN));
3453 		return softfloat_post_call_with_args(compiler, arg_types);
3454 	}
3455 #endif /* __SOFTFP__ */
3456 
3457 	if (type & SLJIT_CALL_RETURN) {
3458 		FAIL_IF(emit_stack_frame_release(compiler, -1));
3459 		type = SLJIT_JUMP;
3460 	}
3461 
3462 #ifndef __SOFTFP__
3463 	if ((type & 0xff) != SLJIT_CALL_REG_ARG)
3464 		FAIL_IF(hardfloat_call_with_args(compiler, arg_types));
3465 #endif /* !__SOFTFP__ */
3466 
3467 	SLJIT_SKIP_CHECKS(compiler);
3468 	return sljit_emit_ijump(compiler, type, src, srcw);
3469 }
3470 
3471 #ifdef __SOFTFP__
3472 
emit_fmov_before_return(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)3473 static SLJIT_INLINE sljit_s32 emit_fmov_before_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
3474 {
3475 	if (compiler->options & SLJIT_ENTER_REG_ARG) {
3476 		if (src == SLJIT_FR0)
3477 			return SLJIT_SUCCESS;
3478 
3479 		SLJIT_SKIP_CHECKS(compiler);
3480 		return sljit_emit_fop1(compiler, op, SLJIT_RETURN_FREG, 0, src, srcw);
3481 	}
3482 
3483 	if (FAST_IS_REG(src)) {
3484 		if (op & SLJIT_32)
3485 			return push_inst(compiler, VMOV | (1 << 20) | RD(SLJIT_R0) | VN(src));
3486 		return push_inst(compiler, VMOV2 | (1 << 20) | RD(SLJIT_R0) | RN(SLJIT_R1) | VM(src));
3487 	}
3488 
3489 	SLJIT_SKIP_CHECKS(compiler);
3490 
3491 	if (op & SLJIT_32)
3492 		return sljit_emit_op1(compiler, SLJIT_MOV, SLJIT_R0, 0, src, srcw);
3493 	return sljit_emit_mem(compiler, SLJIT_MOV, SLJIT_REG_PAIR(SLJIT_R0, SLJIT_R1), src, srcw);
3494 }
3495 
3496 #endif /* __SOFTFP__ */
3497 
sljit_emit_op_flags(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 type)3498 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
3499 	sljit_s32 dst, sljit_sw dstw,
3500 	sljit_s32 type)
3501 {
3502 	sljit_s32 dst_reg, flags = GET_ALL_FLAGS(op);
3503 	sljit_ins cc, ins;
3504 
3505 	CHECK_ERROR();
3506 	CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
3507 	ADJUST_LOCAL_OFFSET(dst, dstw);
3508 
3509 	op = GET_OPCODE(op);
3510 	cc = get_cc(compiler, type);
3511 	dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
3512 
3513 	if (op < SLJIT_ADD) {
3514 		FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | SRC2_IMM | 0));
3515 		FAIL_IF(push_inst(compiler, ((MOV | RD(dst_reg) | SRC2_IMM | 1) & ~COND_MASK) | cc));
3516 		if (dst & SLJIT_MEM)
3517 			return emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2);
3518 		return SLJIT_SUCCESS;
3519 	}
3520 
3521 	ins = (op == SLJIT_AND ? AND : (op == SLJIT_OR ? ORR : EOR));
3522 
3523 	if (dst & SLJIT_MEM)
3524 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, dst, dstw, TMP_REG2));
3525 
3526 	FAIL_IF(push_inst(compiler, ((ins | RD(dst_reg) | RN(dst_reg) | SRC2_IMM | 1) & ~COND_MASK) | cc));
3527 
3528 	if (op == SLJIT_AND)
3529 		FAIL_IF(push_inst(compiler, ((ins | RD(dst_reg) | RN(dst_reg) | SRC2_IMM | 0) & ~COND_MASK) | (cc ^ 0x10000000)));
3530 
3531 	if (dst & SLJIT_MEM)
3532 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2));
3533 
3534 	if (flags & SLJIT_SET_Z)
3535 		return push_inst(compiler, MOV | SET_FLAGS | RD(TMP_REG2) | RM(dst_reg));
3536 	return SLJIT_SUCCESS;
3537 }
3538 
sljit_emit_select(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_reg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2_reg)3539 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
3540 	sljit_s32 dst_reg,
3541 	sljit_s32 src1, sljit_sw src1w,
3542 	sljit_s32 src2_reg)
3543 {
3544 	sljit_ins cc, tmp;
3545 
3546 	CHECK_ERROR();
3547 	CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
3548 
3549 	ADJUST_LOCAL_OFFSET(src1, src1w);
3550 
3551 	if (src2_reg != dst_reg && src1 == dst_reg) {
3552 		src1 = src2_reg;
3553 		src1w = 0;
3554 		src2_reg = dst_reg;
3555 		type ^= 0x1;
3556 	}
3557 
3558 	if (src1 & SLJIT_MEM) {
3559 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, (src2_reg != dst_reg) ? dst_reg : TMP_REG1, src1, src1w, TMP_REG1));
3560 
3561 		if (src2_reg != dst_reg) {
3562 			src1 = src2_reg;
3563 			src1w = 0;
3564 			type ^= 0x1;
3565 		} else {
3566 			src1 = TMP_REG1;
3567 			src1w = 0;
3568 		}
3569 	} else if (dst_reg != src2_reg)
3570 		FAIL_IF(push_inst(compiler, MOV | RD(dst_reg) | RM(src2_reg)));
3571 
3572 	cc = get_cc(compiler, type & ~SLJIT_32);
3573 
3574 	if (SLJIT_UNLIKELY(src1 == SLJIT_IMM)) {
3575 		tmp = get_imm((sljit_uw)src1w);
3576 		if (tmp)
3577 			return push_inst(compiler, ((MOV | RD(dst_reg) | tmp) & ~COND_MASK) | cc);
3578 
3579 		tmp = get_imm(~(sljit_uw)src1w);
3580 		if (tmp)
3581 			return push_inst(compiler, ((MVN | RD(dst_reg) | tmp) & ~COND_MASK) | cc);
3582 
3583 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
3584 		tmp = (sljit_ins)src1w;
3585 		FAIL_IF(push_inst(compiler, (MOVW & ~COND_MASK) | cc | RD(dst_reg) | ((tmp << 4) & 0xf0000) | (tmp & 0xfff)));
3586 		if (tmp <= 0xffff)
3587 			return SLJIT_SUCCESS;
3588 		return push_inst(compiler, (MOVT & ~COND_MASK) | cc | RD(dst_reg) | ((tmp >> 12) & 0xf0000) | ((tmp >> 16) & 0xfff));
3589 #else /* !SLJIT_CONFIG_ARM_V7 */
3590 		FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)src1w));
3591 		src1 = TMP_REG1;
3592 #endif /* SLJIT_CONFIG_ARM_V7 */
3593 	}
3594 
3595 	return push_inst(compiler, ((MOV | RD(dst_reg) | RM(src1)) & ~COND_MASK) | cc);
3596 }
3597 
sljit_emit_fselect(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_freg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2_freg)3598 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
3599 	sljit_s32 dst_freg,
3600 	sljit_s32 src1, sljit_sw src1w,
3601 	sljit_s32 src2_freg)
3602 {
3603 	sljit_ins cc;
3604 
3605 	CHECK_ERROR();
3606 	CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
3607 
3608 	ADJUST_LOCAL_OFFSET(src1, src1w);
3609 
3610 	type ^= SLJIT_32;
3611 
3612 	if (dst_freg != src2_freg) {
3613 		if (dst_freg == src1) {
3614 			src1 = src2_freg;
3615 			src1w = 0;
3616 			type ^= 0x1;
3617 		} else
3618 			FAIL_IF(push_inst(compiler, EMIT_FPU_OPERATION(VMOV_F32, (type & SLJIT_32), dst_freg, src2_freg, 0)));
3619 	}
3620 
3621 	if (src1 & SLJIT_MEM) {
3622 		FAIL_IF(emit_fop_mem(compiler, (type & SLJIT_32) | FPU_LOAD, TMP_FREG2, src1, src1w));
3623 		src1 = TMP_FREG2;
3624 	}
3625 
3626 	cc = get_cc(compiler, type & ~SLJIT_32);
3627 	return push_inst(compiler, EMIT_FPU_OPERATION((VMOV_F32 & ~COND_MASK) | cc, (type & SLJIT_32), dst_freg, src1, 0));
3628 }
3629 
3630 #undef EMIT_FPU_OPERATION
3631 
update_mem_addr(struct sljit_compiler * compiler,sljit_s32 * mem,sljit_sw * memw,sljit_s32 max_offset)3632 static sljit_s32 update_mem_addr(struct sljit_compiler *compiler, sljit_s32 *mem, sljit_sw *memw, sljit_s32 max_offset)
3633 {
3634 	sljit_s32 arg = *mem;
3635 	sljit_sw argw = *memw;
3636 	sljit_uw imm, tmp;
3637 	sljit_sw mask = 0xfff;
3638 	sljit_sw sign = 0x1000;
3639 
3640 	SLJIT_ASSERT(max_offset >= 0xf00);
3641 
3642 	*mem = TMP_REG1;
3643 
3644 	if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
3645 		*memw = 0;
3646 		return push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | ((sljit_ins)(argw & 0x3) << 7));
3647 	}
3648 
3649 	arg &= REG_MASK;
3650 
3651 	if (arg) {
3652 		if (argw <= max_offset && argw >= -mask) {
3653 			*mem = arg;
3654 			return SLJIT_SUCCESS;
3655 		}
3656 
3657 		if (argw >= 0) {
3658 			tmp = (sljit_uw)(argw & (sign | mask));
3659 			tmp = (sljit_uw)((argw + ((tmp <= (sljit_uw)max_offset || tmp == (sljit_uw)sign) ? 0 : sign)) & ~mask);
3660 			imm = get_imm(tmp);
3661 
3662 			if (imm) {
3663 				*memw = argw - (sljit_sw)tmp;
3664 				SLJIT_ASSERT(*memw >= -mask && *memw <= max_offset);
3665 
3666 				return push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg) | imm);
3667 			}
3668 		} else {
3669 			tmp = (sljit_uw)(-argw & (sign | mask));
3670 			tmp = (sljit_uw)((-argw + ((tmp <= (sljit_uw)((sign << 1) - max_offset - 1)) ? 0 : sign)) & ~mask);
3671 			imm = get_imm(tmp);
3672 
3673 			if (imm) {
3674 				*memw = argw + (sljit_sw)tmp;
3675 				SLJIT_ASSERT(*memw >= -mask && *memw <= max_offset);
3676 
3677 				return push_inst(compiler, SUB | RD(TMP_REG1) | RN(arg) | imm);
3678 			}
3679 		}
3680 	}
3681 
3682 	tmp = (sljit_uw)(argw & (sign | mask));
3683 	tmp = (sljit_uw)((argw + ((tmp <= (sljit_uw)max_offset || tmp == (sljit_uw)sign) ? 0 : sign)) & ~mask);
3684 	*memw = argw - (sljit_sw)tmp;
3685 
3686 	FAIL_IF(load_immediate(compiler, TMP_REG1, tmp));
3687 
3688 	if (arg == 0)
3689 		return SLJIT_SUCCESS;
3690 
3691 	return push_inst(compiler, ADD | RD(TMP_REG1) | RN(TMP_REG1) | RM(arg));
3692 }
3693 
sljit_emit_mem(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)3694 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
3695 	sljit_s32 reg,
3696 	sljit_s32 mem, sljit_sw memw)
3697 {
3698 	sljit_s32 flags;
3699 
3700 	CHECK_ERROR();
3701 	CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
3702 
3703 	if (!(reg & REG_PAIR_MASK))
3704 		return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
3705 
3706 	ADJUST_LOCAL_OFFSET(mem, memw);
3707 
3708 	FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4));
3709 
3710 	flags = WORD_SIZE;
3711 
3712 	if (!(type & SLJIT_MEM_STORE)) {
3713 		if (REG_PAIR_FIRST(reg) == (mem & REG_MASK)) {
3714 			FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), memw + SSIZE_OF(sw), TMP_REG1));
3715 			return emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw, TMP_REG1);
3716 		}
3717 
3718 		flags = WORD_SIZE | LOAD_DATA;
3719 	}
3720 
3721 	FAIL_IF(emit_op_mem(compiler, flags, REG_PAIR_FIRST(reg), SLJIT_MEM1(mem), memw, TMP_REG1));
3722 	return emit_op_mem(compiler, flags, REG_PAIR_SECOND(reg), SLJIT_MEM1(mem), memw + SSIZE_OF(sw), TMP_REG1);
3723 }
3724 
sljit_emit_mem_update(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)3725 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *compiler, sljit_s32 type,
3726 	sljit_s32 reg,
3727 	sljit_s32 mem, sljit_sw memw)
3728 {
3729 	sljit_s32 flags;
3730 	sljit_ins is_type1_transfer, inst;
3731 
3732 	CHECK_ERROR();
3733 	CHECK(check_sljit_emit_mem_update(compiler, type, reg, mem, memw));
3734 
3735 	is_type1_transfer = 1;
3736 
3737 	switch (type & 0xff) {
3738 	case SLJIT_MOV:
3739 	case SLJIT_MOV_U32:
3740 	case SLJIT_MOV_S32:
3741 	case SLJIT_MOV32:
3742 	case SLJIT_MOV_P:
3743 		flags = WORD_SIZE;
3744 		break;
3745 	case SLJIT_MOV_U8:
3746 		flags = BYTE_SIZE;
3747 		break;
3748 	case SLJIT_MOV_S8:
3749 		if (!(type & SLJIT_MEM_STORE))
3750 			is_type1_transfer = 0;
3751 		flags = BYTE_SIZE | SIGNED;
3752 		break;
3753 	case SLJIT_MOV_U16:
3754 		is_type1_transfer = 0;
3755 		flags = HALF_SIZE;
3756 		break;
3757 	case SLJIT_MOV_S16:
3758 		is_type1_transfer = 0;
3759 		flags = HALF_SIZE | SIGNED;
3760 		break;
3761 	default:
3762 		SLJIT_UNREACHABLE();
3763 		flags = WORD_SIZE;
3764 		break;
3765 	}
3766 
3767 	if (!(type & SLJIT_MEM_STORE))
3768 		flags |= LOAD_DATA;
3769 
3770 	SLJIT_ASSERT(is_type1_transfer == !!IS_TYPE1_TRANSFER(flags));
3771 
3772 	if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
3773 		if (!is_type1_transfer && memw != 0)
3774 			return SLJIT_ERR_UNSUPPORTED;
3775 	} else {
3776 		if (is_type1_transfer) {
3777 			if (memw > 4095 || memw < -4095)
3778 				return SLJIT_ERR_UNSUPPORTED;
3779 		} else if (memw > 255 || memw < -255)
3780 			return SLJIT_ERR_UNSUPPORTED;
3781 	}
3782 
3783 	if (type & SLJIT_MEM_SUPP)
3784 		return SLJIT_SUCCESS;
3785 
3786 	if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
3787 		memw &= 0x3;
3788 
3789 		inst = EMIT_DATA_TRANSFER(flags, 1, reg, mem & REG_MASK, RM(OFFS_REG(mem)) | ((sljit_ins)memw << 7));
3790 
3791 		if (is_type1_transfer)
3792 			inst |= (1 << 25);
3793 
3794 		if (type & SLJIT_MEM_POST)
3795 			inst ^= (1 << 24);
3796 		else
3797 			inst |= (1 << 21);
3798 
3799 		return push_inst(compiler, inst);
3800 	}
3801 
3802 	inst = EMIT_DATA_TRANSFER(flags, 0, reg, mem & REG_MASK, 0);
3803 
3804 	if (type & SLJIT_MEM_POST)
3805 		inst ^= (1 << 24);
3806 	else
3807 		inst |= (1 << 21);
3808 
3809 	if (is_type1_transfer) {
3810 		if (memw >= 0)
3811 			inst |= (1 << 23);
3812 		else
3813 			memw = -memw;
3814 
3815 		return push_inst(compiler, inst | (sljit_ins)memw);
3816 	}
3817 
3818 	if (memw >= 0)
3819 		inst |= (1 << 23);
3820 	else
3821 		memw = -memw;
3822 
3823 	return push_inst(compiler, inst | TYPE2_TRANSFER_IMM((sljit_ins)memw));
3824 }
3825 
sljit_emit_fmem(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 mem,sljit_sw memw)3826 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem(struct sljit_compiler *compiler, sljit_s32 type,
3827 	sljit_s32 freg,
3828 	sljit_s32 mem, sljit_sw memw)
3829 {
3830 	CHECK_ERROR();
3831 	CHECK(check_sljit_emit_fmem(compiler, type, freg, mem, memw));
3832 
3833 	if (type & SLJIT_MEM_ALIGNED_32)
3834 		return emit_fop_mem(compiler, ((type ^ SLJIT_32) & SLJIT_32) | ((type & SLJIT_MEM_STORE) ? 0 : FPU_LOAD), freg, mem, memw);
3835 
3836 	if (type & SLJIT_MEM_STORE) {
3837 		FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(freg) | RD(TMP_REG2)));
3838 
3839 		if (type & SLJIT_32)
3840 			return emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1);
3841 
3842 		FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4));
3843 		mem |= SLJIT_MEM;
3844 
3845 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw, TMP_REG1));
3846 		FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(freg) | 0x80 | RD(TMP_REG2)));
3847 		return emit_op_mem(compiler, WORD_SIZE, TMP_REG2, mem, memw + 4, TMP_REG1);
3848 	}
3849 
3850 	if (type & SLJIT_32) {
3851 		FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, mem, memw, TMP_REG1));
3852 		return push_inst(compiler, VMOV | VN(freg) | RD(TMP_REG2));
3853 	}
3854 
3855 	FAIL_IF(update_mem_addr(compiler, &mem, &memw, 0xfff - 4));
3856 	mem |= SLJIT_MEM;
3857 
3858 	FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG2, mem, memw, TMP_REG1));
3859 	FAIL_IF(emit_op_mem(compiler, WORD_SIZE | LOAD_DATA, TMP_REG1, mem, memw + 4, TMP_REG1));
3860 	return push_inst(compiler, VMOV2 | VM(freg) | RD(TMP_REG2) | RN(TMP_REG1));
3861 }
3862 
sljit_emit_simd_mem_offset(struct sljit_compiler * compiler,sljit_s32 * mem_ptr,sljit_sw memw)3863 static sljit_s32 sljit_emit_simd_mem_offset(struct sljit_compiler *compiler, sljit_s32 *mem_ptr, sljit_sw memw)
3864 {
3865 	sljit_s32 mem = *mem_ptr;
3866 	sljit_uw imm;
3867 
3868 	if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
3869 		*mem_ptr = TMP_REG1;
3870 		return push_inst(compiler, ADD | RD(TMP_REG1) | RN(mem & REG_MASK) | RM(OFFS_REG(mem)) | ((sljit_ins)(memw & 0x3) << 7));
3871 	}
3872 
3873 	if (SLJIT_UNLIKELY(!(mem & REG_MASK))) {
3874 		*mem_ptr = TMP_REG1;
3875 		return load_immediate(compiler, TMP_REG1, (sljit_uw)memw);
3876 	}
3877 
3878 	mem &= REG_MASK;
3879 
3880 	if (memw == 0) {
3881 		*mem_ptr = mem;
3882 		return SLJIT_SUCCESS;
3883 	}
3884 
3885 	*mem_ptr = TMP_REG1;
3886 	imm = get_imm((sljit_uw)(memw < 0 ? -memw : memw));
3887 
3888 	if (imm != 0)
3889 		return push_inst(compiler, ((memw < 0) ? SUB : ADD) | RD(TMP_REG1) | RN(mem) | imm);
3890 
3891 	FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)memw));
3892 	return push_inst(compiler, ADD | RD(TMP_REG1) | RN(TMP_REG1) | RM(mem));
3893 }
3894 
simd_get_quad_reg_index(sljit_s32 freg)3895 static SLJIT_INLINE sljit_s32 simd_get_quad_reg_index(sljit_s32 freg)
3896 {
3897 	freg += freg & 0x1;
3898 
3899 	SLJIT_ASSERT((freg_map[freg] & 0x1) == (freg <= SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS));
3900 
3901 	if (freg <= SLJIT_NUMBER_OF_SCRATCH_FLOAT_REGISTERS)
3902 		freg--;
3903 
3904 	return freg;
3905 }
3906 
3907 #define SLJIT_QUAD_OTHER_HALF(freg) ((((freg) & 0x1) << 1) - 1)
3908 
sljit_emit_simd_mov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 srcdst,sljit_sw srcdstw)3909 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
3910 	sljit_s32 freg,
3911 	sljit_s32 srcdst, sljit_sw srcdstw)
3912 {
3913 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3914 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3915 	sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type);
3916 	sljit_ins ins;
3917 
3918 	CHECK_ERROR();
3919 	CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw));
3920 
3921 	ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3922 
3923 	if (reg_size != 3 && reg_size != 4)
3924 		return SLJIT_ERR_UNSUPPORTED;
3925 
3926 	if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3927 		return SLJIT_ERR_UNSUPPORTED;
3928 
3929 	if (type & SLJIT_SIMD_TEST)
3930 		return SLJIT_SUCCESS;
3931 
3932 	if (reg_size == 4)
3933 		freg = simd_get_quad_reg_index(freg);
3934 
3935 	if (!(srcdst & SLJIT_MEM)) {
3936 		if (reg_size == 4)
3937 			srcdst = simd_get_quad_reg_index(srcdst);
3938 
3939 		if (type & SLJIT_SIMD_STORE)
3940 			ins = VD(srcdst) | VN(freg) | VM(freg);
3941 		else
3942 			ins = VD(freg) | VN(srcdst) | VM(srcdst);
3943 
3944 		if (reg_size == 4)
3945 			ins |= (sljit_ins)1 << 6;
3946 
3947 		return push_inst(compiler, VORR | ins);
3948 	}
3949 
3950 	FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
3951 
3952 	if (elem_size > 3)
3953 		elem_size = 3;
3954 
3955 	ins = ((type & SLJIT_SIMD_STORE) ? VST1 : VLD1) | VD(freg)
3956 		| (sljit_ins)((reg_size == 3) ? (0x7 << 8) : (0xa << 8));
3957 
3958 	SLJIT_ASSERT(reg_size >= alignment);
3959 
3960 	if (alignment == 3)
3961 		ins |= 0x10;
3962 	else if (alignment >= 3)
3963 		ins |= 0x20;
3964 
3965 	return push_inst(compiler, ins | RN(srcdst) | ((sljit_ins)elem_size) << 6 | 0xf);
3966 }
3967 
simd_get_imm(sljit_s32 elem_size,sljit_uw value)3968 static sljit_ins simd_get_imm(sljit_s32 elem_size, sljit_uw value)
3969 {
3970 	sljit_ins result;
3971 
3972 	if (elem_size > 1 && (sljit_u16)value == (value >> 16)) {
3973 		elem_size = 1;
3974 		value = (sljit_u16)value;
3975 	}
3976 
3977 	if (elem_size == 1 && (sljit_u8)value == (value >> 8)) {
3978 		elem_size = 0;
3979 		value = (sljit_u8)value;
3980 	}
3981 
3982 	switch (elem_size) {
3983 	case 0:
3984 		SLJIT_ASSERT(value <= 0xff);
3985 		result = 0xe00;
3986 		break;
3987 	case 1:
3988 		SLJIT_ASSERT(value <= 0xffff);
3989 		result = 0;
3990 
3991 		while (1) {
3992 			if (value <= 0xff) {
3993 				result |= 0x800;
3994 				break;
3995 			}
3996 
3997 			if ((value & 0xff) == 0) {
3998 				value >>= 8;
3999 				result |= 0xa00;
4000 				break;
4001 			}
4002 
4003 			if (result != 0)
4004 				return ~(sljit_ins)0;
4005 
4006 			value ^= (sljit_uw)0xffff;
4007 			result = (1 << 5);
4008 		}
4009 		break;
4010 	default:
4011 		SLJIT_ASSERT(value <= 0xffffffff);
4012 		result = 0;
4013 
4014 		while (1) {
4015 			if (value <= 0xff) {
4016 				result |= 0x000;
4017 				break;
4018 			}
4019 
4020 			if ((value & ~(sljit_uw)0xff00) == 0) {
4021 				value >>= 8;
4022 				result |= 0x200;
4023 				break;
4024 			}
4025 
4026 			if ((value & ~(sljit_uw)0xff0000) == 0) {
4027 				value >>= 16;
4028 				result |= 0x400;
4029 				break;
4030 			}
4031 
4032 			if ((value & ~(sljit_uw)0xff000000) == 0) {
4033 				value >>= 24;
4034 				result |= 0x600;
4035 				break;
4036 			}
4037 
4038 			if ((value & (sljit_uw)0xff) == 0xff && (value >> 16) == 0) {
4039 				value >>= 8;
4040 				result |= 0xc00;
4041 				break;
4042 			}
4043 
4044 			if ((value & (sljit_uw)0xffff) == 0xffff && (value >> 24) == 0) {
4045 				value >>= 16;
4046 				result |= 0xd00;
4047 				break;
4048 			}
4049 
4050 			if (result != 0)
4051 				return ~(sljit_ins)0;
4052 
4053 			value = ~value;
4054 			result = (1 << 5);
4055 		}
4056 		break;
4057 	}
4058 
4059 	return ((sljit_ins)value & 0xf) | (((sljit_ins)value & 0x70) << 12) | (((sljit_ins)value & 0x80) << 17) | result;
4060 }
4061 
sljit_emit_simd_replicate(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_sw srcw)4062 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
4063 	sljit_s32 freg,
4064 	sljit_s32 src, sljit_sw srcw)
4065 {
4066 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4067 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4068 	sljit_ins ins, imm;
4069 
4070 	CHECK_ERROR();
4071 	CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw));
4072 
4073 	ADJUST_LOCAL_OFFSET(src, srcw);
4074 
4075 	if (reg_size != 3 && reg_size != 4)
4076 		return SLJIT_ERR_UNSUPPORTED;
4077 
4078 	if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2))
4079 		return SLJIT_ERR_UNSUPPORTED;
4080 
4081 	if (type & SLJIT_SIMD_TEST)
4082 		return SLJIT_SUCCESS;
4083 
4084 	if (reg_size == 4)
4085 		freg = simd_get_quad_reg_index(freg);
4086 
4087 	if (src == SLJIT_IMM && srcw == 0)
4088 		return push_inst(compiler, VMOV_i | ((reg_size == 4) ? (1 << 6) : 0) | VD(freg));
4089 
4090 	if (SLJIT_UNLIKELY(elem_size == 3)) {
4091 		SLJIT_ASSERT(type & SLJIT_SIMD_FLOAT);
4092 
4093 		if (src & SLJIT_MEM) {
4094 			FAIL_IF(emit_fop_mem(compiler, FPU_LOAD | SLJIT_32, freg, src, srcw));
4095 			src = freg;
4096 		} else if (freg != src)
4097 			FAIL_IF(push_inst(compiler, VORR | VD(freg) | VN(src) | VM(src)));
4098 
4099 		freg += SLJIT_QUAD_OTHER_HALF(freg);
4100 
4101 		if (freg != src)
4102 			return push_inst(compiler, VORR | VD(freg) | VN(src) | VM(src));
4103 		return SLJIT_SUCCESS;
4104 	}
4105 
4106 	if (src & SLJIT_MEM) {
4107 		FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
4108 
4109 		ins = (sljit_ins)(elem_size << 6);
4110 
4111 		if (reg_size == 4)
4112 			ins |= (sljit_ins)1 << 5;
4113 
4114 		return push_inst(compiler, VLD1_r | ins | VD(freg) | RN(src) | 0xf);
4115 	}
4116 
4117 	if (type & SLJIT_SIMD_FLOAT) {
4118 		SLJIT_ASSERT(elem_size == 2);
4119 		ins = ((sljit_ins)freg_ebit_map[src] << (16 + 2 + 1)) | ((sljit_ins)1 << (16 + 2));
4120 
4121 		if (reg_size == 4)
4122 			ins |= (sljit_ins)1 << 6;
4123 
4124 		return push_inst(compiler, VDUP_s | ins | VD(freg) | (sljit_ins)freg_map[src]);
4125 	}
4126 
4127 	if (src == SLJIT_IMM) {
4128 		if (elem_size < 2)
4129 			srcw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1;
4130 
4131 		imm = simd_get_imm(elem_size, (sljit_uw)srcw);
4132 
4133 		if (imm != ~(sljit_ins)0) {
4134 			if (reg_size == 4)
4135 				imm |= (sljit_ins)1 << 6;
4136 
4137 			return push_inst(compiler, VMOV_i | imm | VD(freg));
4138 		}
4139 
4140 		FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcw));
4141 		src = TMP_REG1;
4142 	}
4143 
4144 	switch (elem_size) {
4145 	case 0:
4146 		ins = 1 << 22;
4147 		break;
4148 	case 1:
4149 		ins = 1 << 5;
4150 		break;
4151 	default:
4152 		ins = 0;
4153 		break;
4154 	}
4155 
4156 	if (reg_size == 4)
4157 		ins |= (sljit_ins)1 << 21;
4158 
4159 	return push_inst(compiler, VDUP | ins | VN(freg) | RD(src));
4160 }
4161 
sljit_emit_simd_lane_mov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 lane_index,sljit_s32 srcdst,sljit_sw srcdstw)4162 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
4163 	sljit_s32 freg, sljit_s32 lane_index,
4164 	sljit_s32 srcdst, sljit_sw srcdstw)
4165 {
4166 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4167 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4168 	sljit_ins ins;
4169 
4170 	CHECK_ERROR();
4171 	CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw));
4172 
4173 	ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
4174 
4175 	if (reg_size != 3 && reg_size != 4)
4176 		return SLJIT_ERR_UNSUPPORTED;
4177 
4178 	if ((type & SLJIT_SIMD_FLOAT) ? (elem_size < 2 || elem_size > 3) : (elem_size > 2))
4179 		return SLJIT_ERR_UNSUPPORTED;
4180 
4181 	if (type & SLJIT_SIMD_TEST)
4182 		return SLJIT_SUCCESS;
4183 
4184 	if (reg_size == 4)
4185 		freg = simd_get_quad_reg_index(freg);
4186 
4187 	if (type & SLJIT_SIMD_LANE_ZERO) {
4188 		ins = (reg_size == 3) ? 0 : ((sljit_ins)1 << 6);
4189 
4190 		if (type & SLJIT_SIMD_FLOAT) {
4191 			if (elem_size == 3 && !(srcdst & SLJIT_MEM)) {
4192 				if (lane_index == 1)
4193 					freg += SLJIT_QUAD_OTHER_HALF(freg);
4194 
4195 				if (srcdst != freg)
4196 					FAIL_IF(push_inst(compiler, VORR | VD(freg) | VN(srcdst) | VM(srcdst)));
4197 
4198 				freg += SLJIT_QUAD_OTHER_HALF(freg);
4199 				return push_inst(compiler, VMOV_i | VD(freg));
4200 			}
4201 
4202 			if (srcdst == freg || (elem_size == 3 && srcdst == (freg + SLJIT_QUAD_OTHER_HALF(freg)))) {
4203 				FAIL_IF(push_inst(compiler, VORR | ins | VD(TMP_FREG2) | VN(freg) | VM(freg)));
4204 				srcdst = TMP_FREG2;
4205 				srcdstw = 0;
4206 			}
4207 		}
4208 
4209 		FAIL_IF(push_inst(compiler, VMOV_i | ins | VD(freg)));
4210 	}
4211 
4212 	if (reg_size == 4 && lane_index >= (0x8 >> elem_size)) {
4213 		lane_index -= (0x8 >> elem_size);
4214 		freg += SLJIT_QUAD_OTHER_HALF(freg);
4215 	}
4216 
4217 	if (srcdst & SLJIT_MEM) {
4218 		if (elem_size == 3)
4219 			return emit_fop_mem(compiler, ((type & SLJIT_SIMD_STORE) ? 0 : FPU_LOAD) | SLJIT_32, freg, srcdst, srcdstw);
4220 
4221 		FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
4222 
4223 		lane_index = lane_index << elem_size;
4224 		ins = (sljit_ins)((elem_size << 10) | (lane_index << 5));
4225 		return push_inst(compiler, ((type & SLJIT_SIMD_STORE) ? VST1_s : VLD1_s) | ins | VD(freg) | RN(srcdst) | 0xf);
4226 	}
4227 
4228 	if (type & SLJIT_SIMD_FLOAT) {
4229 		if (elem_size == 3) {
4230 			if (type & SLJIT_SIMD_STORE)
4231 				return push_inst(compiler, VORR | VD(srcdst) | VN(freg) | VM(freg));
4232 			return push_inst(compiler, VMOV_F32 | SLJIT_32 | VD(freg) | VM(srcdst));
4233 		}
4234 
4235 		if (type & SLJIT_SIMD_STORE) {
4236 			if (freg_ebit_map[freg] == 0) {
4237 				if (lane_index == 1)
4238 					freg = SLJIT_F64_SECOND(freg);
4239 
4240 				return push_inst(compiler, VMOV_F32 | VD(srcdst) | VM(freg));
4241 			}
4242 
4243 			FAIL_IF(push_inst(compiler, VMOV_s | (1 << 20) | ((sljit_ins)lane_index << 21) | VN(freg) | RD(TMP_REG1)));
4244 			return push_inst(compiler, VMOV | VN(srcdst) | RD(TMP_REG1));
4245 		}
4246 
4247 		FAIL_IF(push_inst(compiler, VMOV | (1 << 20) | VN(srcdst) | RD(TMP_REG1)));
4248 		return push_inst(compiler, VMOV_s | ((sljit_ins)lane_index << 21) | VN(freg) | RD(TMP_REG1));
4249 	}
4250 
4251 	if (srcdst == SLJIT_IMM) {
4252 		if (elem_size < 2)
4253 			srcdstw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1;
4254 
4255 		FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_uw)srcdstw));
4256 		srcdst = TMP_REG1;
4257 	}
4258 
4259 	if (elem_size == 0)
4260 		ins = 0x400000;
4261 	else if (elem_size == 1)
4262 		ins = 0x20;
4263 	else
4264 		ins = 0;
4265 
4266 	lane_index = lane_index << elem_size;
4267 	ins |= (sljit_ins)(((lane_index & 0x4) << 19) | ((lane_index & 0x3) << 5));
4268 
4269 	if (type & SLJIT_SIMD_STORE) {
4270 		ins |= (1 << 20);
4271 
4272 		if (elem_size < 2 && !(type & SLJIT_SIMD_LANE_SIGNED))
4273 			ins |= (1 << 23);
4274 	}
4275 
4276 	return push_inst(compiler, VMOV_s | ins | VN(freg) | RD(srcdst));
4277 }
4278 
sljit_emit_simd_lane_replicate(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_s32 src_lane_index)4279 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
4280 	sljit_s32 freg,
4281 	sljit_s32 src, sljit_s32 src_lane_index)
4282 {
4283 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4284 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4285 	sljit_ins ins;
4286 
4287 	CHECK_ERROR();
4288 	CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index));
4289 
4290 	if (reg_size != 3 && reg_size != 4)
4291 		return SLJIT_ERR_UNSUPPORTED;
4292 
4293 	if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4294 		return SLJIT_ERR_UNSUPPORTED;
4295 
4296 	if (type & SLJIT_SIMD_TEST)
4297 		return SLJIT_SUCCESS;
4298 
4299 	if (reg_size == 4) {
4300 		freg = simd_get_quad_reg_index(freg);
4301 		src = simd_get_quad_reg_index(src);
4302 
4303 		if (src_lane_index >= (0x8 >> elem_size)) {
4304 			src_lane_index -= (0x8 >> elem_size);
4305 			src += SLJIT_QUAD_OTHER_HALF(src);
4306 		}
4307 	}
4308 
4309 	if (elem_size == 3) {
4310 		if (freg != src)
4311 			FAIL_IF(push_inst(compiler, VORR | VD(freg) | VN(src) | VM(src)));
4312 
4313 		freg += SLJIT_QUAD_OTHER_HALF(freg);
4314 
4315 		if (freg != src)
4316 			return push_inst(compiler, VORR | VD(freg) | VN(src) | VM(src));
4317 		return SLJIT_SUCCESS;
4318 	}
4319 
4320 	ins = ((((sljit_ins)src_lane_index << 1) | 1) << (16 + elem_size));
4321 
4322 	if (reg_size == 4)
4323 		ins |= (sljit_ins)1 << 6;
4324 
4325 	return push_inst(compiler, VDUP_s | ins | VD(freg) | VM(src));
4326 }
4327 
sljit_emit_simd_extend(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_sw srcw)4328 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
4329 	sljit_s32 freg,
4330 	sljit_s32 src, sljit_sw srcw)
4331 {
4332 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4333 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4334 	sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
4335 	sljit_s32 dst_reg;
4336 
4337 	CHECK_ERROR();
4338 	CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw));
4339 
4340 	ADJUST_LOCAL_OFFSET(src, srcw);
4341 
4342 	if (reg_size != 3 && reg_size != 4)
4343 		return SLJIT_ERR_UNSUPPORTED;
4344 
4345 	if ((type & SLJIT_SIMD_FLOAT) && (elem_size != 2 || elem2_size != 3))
4346 		return SLJIT_ERR_UNSUPPORTED;
4347 
4348 	if (type & SLJIT_SIMD_TEST)
4349 		return SLJIT_SUCCESS;
4350 
4351 	if (reg_size == 4)
4352 		freg = simd_get_quad_reg_index(freg);
4353 
4354 	if (src & SLJIT_MEM) {
4355 		FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
4356 		if (reg_size == 4 && elem2_size - elem_size == 1)
4357 			FAIL_IF(push_inst(compiler, VLD1 | (0x7 << 8) | VD(freg) | RN(src) | 0xf));
4358 		else
4359 			FAIL_IF(push_inst(compiler, VLD1_s | (sljit_ins)((reg_size - elem2_size + elem_size) << 10) | VD(freg) | RN(src) | 0xf));
4360 		src = freg;
4361 	} else if (reg_size == 4)
4362 		src = simd_get_quad_reg_index(src);
4363 
4364 	if (!(type & SLJIT_SIMD_FLOAT)) {
4365 		dst_reg = (reg_size == 4) ? freg : TMP_FREG2;
4366 
4367 		do {
4368 			FAIL_IF(push_inst(compiler, VSHLL | ((type & SLJIT_SIMD_EXTEND_SIGNED) ? 0 : (1 << 24))
4369 				| ((sljit_ins)1 << (19 + elem_size)) | VD(dst_reg) | VM(src)));
4370 			src = dst_reg;
4371 		} while (++elem_size < elem2_size);
4372 
4373 		if (dst_reg == TMP_FREG2)
4374 			return push_inst(compiler, VORR | VD(freg) | VN(TMP_FREG2) | VM(TMP_FREG2));
4375 		return SLJIT_SUCCESS;
4376 	}
4377 
4378 	/* No SIMD variant, must use VFP instead. */
4379 	SLJIT_ASSERT(reg_size == 4);
4380 
4381 	if (freg == src) {
4382 		freg += SLJIT_QUAD_OTHER_HALF(freg);
4383 		FAIL_IF(push_inst(compiler, VCVT_F64_F32 | VD(freg) | VM(src) | 0x20));
4384 		freg += SLJIT_QUAD_OTHER_HALF(freg);
4385 		return push_inst(compiler, VCVT_F64_F32 | VD(freg) | VM(src));
4386 	}
4387 
4388 	FAIL_IF(push_inst(compiler, VCVT_F64_F32 | VD(freg) | VM(src)));
4389 	freg += SLJIT_QUAD_OTHER_HALF(freg);
4390 	return push_inst(compiler, VCVT_F64_F32 | VD(freg) | VM(src) | 0x20);
4391 }
4392 
sljit_emit_simd_sign(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 dst,sljit_sw dstw)4393 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
4394 	sljit_s32 freg,
4395 	sljit_s32 dst, sljit_sw dstw)
4396 {
4397 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4398 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4399 	sljit_ins ins, imms;
4400 	sljit_s32 dst_r;
4401 
4402 	CHECK_ERROR();
4403 	CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw));
4404 
4405 	ADJUST_LOCAL_OFFSET(dst, dstw);
4406 
4407 	if (reg_size != 3 && reg_size != 4)
4408 		return SLJIT_ERR_UNSUPPORTED;
4409 
4410 	if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4411 		return SLJIT_ERR_UNSUPPORTED;
4412 
4413 	if (type & SLJIT_SIMD_TEST)
4414 		return SLJIT_SUCCESS;
4415 
4416 	switch (elem_size) {
4417 	case 0:
4418 		imms = 0x243219;
4419 		ins = VSHR | (1 << 24) | (0x9 << 16);
4420 		break;
4421 	case 1:
4422 		imms = (reg_size == 4) ? 0x243219 : 0x2231;
4423 		ins = VSHR | (1 << 24) | (0x11 << 16);
4424 		break;
4425 	case 2:
4426 		imms = (reg_size == 4) ? 0x2231 : 0x21;
4427 		ins = VSHR | (1 << 24) | (0x21 << 16);
4428 		break;
4429 	default:
4430 		imms = 0x21;
4431 		ins = VSHR | (1 << 24) | (0x1 << 16) | (1 << 7);
4432 		break;
4433 	}
4434 
4435 	if (reg_size == 4) {
4436 		freg = simd_get_quad_reg_index(freg);
4437 		ins |= (sljit_ins)1 << 6;
4438 	}
4439 
4440 	SLJIT_ASSERT((freg_map[TMP_FREG2] & 0x1) == 0);
4441 	FAIL_IF(push_inst(compiler, ins | VD(TMP_FREG2) | VM(freg)));
4442 
4443 	if (reg_size == 4 && elem_size > 0)
4444 		FAIL_IF(push_inst(compiler, VMOVN | ((sljit_ins)(elem_size - 1) << 18) | VD(TMP_FREG2) | VM(TMP_FREG2)));
4445 
4446 	ins = (reg_size == 4 && elem_size == 0) ? (1 << 6) : 0;
4447 
4448 	while (imms >= 0x100) {
4449 		FAIL_IF(push_inst(compiler, VSRA | (1 << 24) | ins | ((imms & 0xff) << 16) | VD(TMP_FREG2) | VM(TMP_FREG2)));
4450 		imms >>= 8;
4451 	}
4452 
4453 	FAIL_IF(push_inst(compiler, VSRA | (1 << 24) | ins | (1 << 7) | (imms << 16) | VD(TMP_FREG2) | VM(TMP_FREG2)));
4454 
4455 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
4456 	FAIL_IF(push_inst(compiler, VMOV_s | (1 << 20) | (1 << 23) | (0x2 << 21) | RD(dst_r) | VN(TMP_FREG2)));
4457 
4458 	if (reg_size == 4 && elem_size == 0) {
4459 		SLJIT_ASSERT(freg_map[TMP_FREG2] + 1 == freg_map[TMP_FREG1]);
4460 		FAIL_IF(push_inst(compiler, VMOV_s | (1 << 20) | (1 << 23) | (0x2 << 21) | RD(TMP_REG2) | VN(TMP_FREG1)));
4461 		FAIL_IF(push_inst(compiler, ORR | RD(dst_r) | RN(dst_r) | RM(TMP_REG2) | (0x8 << 7)));
4462 	}
4463 
4464 	if (dst_r == TMP_REG1)
4465 		return emit_op_mem(compiler, WORD_SIZE, TMP_REG1, dst, dstw, TMP_REG2);
4466 
4467 	return SLJIT_SUCCESS;
4468 }
4469 
sljit_emit_simd_op2(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_freg,sljit_s32 src1_freg,sljit_s32 src2_freg)4470 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
4471 	sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg)
4472 {
4473 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4474 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4475 	sljit_ins ins = 0;
4476 
4477 	CHECK_ERROR();
4478 	CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg));
4479 
4480 	if (reg_size != 3 && reg_size != 4)
4481 		return SLJIT_ERR_UNSUPPORTED;
4482 
4483 	if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4484 		return SLJIT_ERR_UNSUPPORTED;
4485 
4486 	switch (SLJIT_SIMD_GET_OPCODE(type)) {
4487 	case SLJIT_SIMD_OP2_AND:
4488 		ins = VAND;
4489 		break;
4490 	case SLJIT_SIMD_OP2_OR:
4491 		ins = VORR;
4492 		break;
4493 	case SLJIT_SIMD_OP2_XOR:
4494 		ins = VEOR;
4495 		break;
4496 	}
4497 
4498 	if (type & SLJIT_SIMD_TEST)
4499 		return SLJIT_SUCCESS;
4500 
4501 	if (reg_size == 4) {
4502 		dst_freg = simd_get_quad_reg_index(dst_freg);
4503 		src1_freg = simd_get_quad_reg_index(src1_freg);
4504 		src2_freg = simd_get_quad_reg_index(src2_freg);
4505 		ins |= (sljit_ins)1 << 6;
4506 	}
4507 
4508 	return push_inst(compiler, ins | VD(dst_freg) | VN(src1_freg) | VM(src2_freg));
4509 }
4510 
4511 #undef FPU_LOAD
4512 
sljit_emit_atomic_load(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 mem_reg)4513 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
4514 	sljit_s32 dst_reg,
4515 	sljit_s32 mem_reg)
4516 {
4517 	sljit_u32 ins;
4518 
4519 	CHECK_ERROR();
4520 	CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
4521 
4522 	switch (GET_OPCODE(op)) {
4523 	case SLJIT_MOV_U8:
4524 		ins = LDREXB;
4525 		break;
4526 	case SLJIT_MOV_U16:
4527 		ins = LDREXH;
4528 		break;
4529 	default:
4530 		ins = LDREX;
4531 		break;
4532 	}
4533 
4534 	return push_inst(compiler, ins | RN(mem_reg) | RD(dst_reg));
4535 }
4536 
sljit_emit_atomic_store(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src_reg,sljit_s32 mem_reg,sljit_s32 temp_reg)4537 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op,
4538 	sljit_s32 src_reg,
4539 	sljit_s32 mem_reg,
4540 	sljit_s32 temp_reg)
4541 {
4542 	sljit_u32 ins;
4543 
4544 	/* temp_reg == mem_reg is undefined so use another temp register */
4545 	SLJIT_UNUSED_ARG(temp_reg);
4546 
4547 	CHECK_ERROR();
4548 	CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
4549 
4550 	switch (GET_OPCODE(op)) {
4551 	case SLJIT_MOV_U8:
4552 		ins = STREXB;
4553 		break;
4554 	case SLJIT_MOV_U16:
4555 		ins = STREXH;
4556 		break;
4557 	default:
4558 		ins = STREX;
4559 		break;
4560 	}
4561 
4562 	FAIL_IF(push_inst(compiler, ins | RN(mem_reg) | RD(TMP_REG1) | RM(src_reg)));
4563 	if (op & SLJIT_SET_ATOMIC_STORED)
4564 		return push_inst(compiler, CMP | SET_FLAGS | SRC2_IMM | RN(TMP_REG1));
4565 
4566 	return SLJIT_SUCCESS;
4567 }
4568 
sljit_emit_const(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_sw init_value)4569 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
4570 {
4571 	struct sljit_const *const_;
4572 	sljit_s32 dst_r;
4573 
4574 	CHECK_ERROR_PTR();
4575 	CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
4576 	ADJUST_LOCAL_OFFSET(dst, dstw);
4577 
4578 	const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
4579 	PTR_FAIL_IF(!const_);
4580 	set_const(const_, compiler);
4581 
4582 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
4583 
4584 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
4585 	PTR_FAIL_IF(push_inst_with_unique_literal(compiler,
4586 		EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), (sljit_ins)init_value));
4587 	compiler->patches++;
4588 #else /* !SLJIT_CONFIG_ARM_V6 */
4589 	PTR_FAIL_IF(emit_imm(compiler, dst_r, init_value));
4590 #endif /* SLJIT_CONFIG_ARM_V6 */
4591 
4592 	if (dst & SLJIT_MEM)
4593 		PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1));
4594 	return const_;
4595 }
4596 
sljit_emit_mov_addr(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)4597 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
4598 {
4599 	struct sljit_jump *jump;
4600 	sljit_s32 dst_r;
4601 
4602 	CHECK_ERROR_PTR();
4603 	CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw));
4604 	ADJUST_LOCAL_OFFSET(dst, dstw);
4605 
4606 	dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
4607 
4608 #if (defined SLJIT_CONFIG_ARM_V6 && SLJIT_CONFIG_ARM_V6)
4609 	PTR_FAIL_IF(push_inst_with_unique_literal(compiler, EMIT_DATA_TRANSFER(WORD_SIZE | LOAD_DATA, 1, dst_r, TMP_PC, 0), 0));
4610 	compiler->patches++;
4611 #else /* !SLJIT_CONFIG_ARM_V6 */
4612 	PTR_FAIL_IF(push_inst(compiler, RD(dst_r)));
4613 #endif /* SLJIT_CONFIG_ARM_V6 */
4614 
4615 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
4616 	PTR_FAIL_IF(!jump);
4617 	set_mov_addr(jump, compiler, 1);
4618 
4619 #if (defined SLJIT_CONFIG_ARM_V7 && SLJIT_CONFIG_ARM_V7)
4620 	compiler->size += 1;
4621 #endif /* SLJIT_CONFIG_ARM_V7 */
4622 
4623 	if (dst & SLJIT_MEM)
4624 		PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, dst, dstw, TMP_REG1));
4625 	return jump;
4626 }
4627 
sljit_set_jump_addr(sljit_uw addr,sljit_uw new_target,sljit_sw executable_offset)4628 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
4629 {
4630 	set_jump_addr(addr, executable_offset, new_target, 1);
4631 }
4632 
sljit_set_const(sljit_uw addr,sljit_sw new_constant,sljit_sw executable_offset)4633 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
4634 {
4635 	set_const_value(addr, executable_offset, (sljit_uw)new_constant, 1);
4636 }
4637