1 /*
2 * Stack-less Just-In-Time compiler
3 *
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
sljit_get_platform_name(void)27 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
28 {
29 return "ARM-64" SLJIT_CPUINFO;
30 }
31
32 /* Length of an instruction word */
33 typedef sljit_u32 sljit_ins;
34
35 #define TMP_ZERO (0)
36
37 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
38 #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
39 #define TMP_LR (SLJIT_NUMBER_OF_REGISTERS + 4)
40 #define TMP_FP (SLJIT_NUMBER_OF_REGISTERS + 5)
41
42 #define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
43 #define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
44
45 /* r18 - platform register, currently not used */
46 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 8] = {
47 31, 0, 1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 8, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 31, 9, 10, 30, 29
48 };
49
50 static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
51 0, 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 15, 14, 13, 12, 11, 10, 9, 8, 30, 31
52 };
53
54 #define W_OP ((sljit_ins)1 << 31)
55 #define RD(rd) ((sljit_ins)reg_map[rd])
56 #define RT(rt) ((sljit_ins)reg_map[rt])
57 #define RN(rn) ((sljit_ins)reg_map[rn] << 5)
58 #define RT2(rt2) ((sljit_ins)reg_map[rt2] << 10)
59 #define RM(rm) ((sljit_ins)reg_map[rm] << 16)
60 #define VD(vd) ((sljit_ins)freg_map[vd])
61 #define VT(vt) ((sljit_ins)freg_map[vt])
62 #define VT2(vt) ((sljit_ins)freg_map[vt] << 10)
63 #define VN(vn) ((sljit_ins)freg_map[vn] << 5)
64 #define VM(vm) ((sljit_ins)freg_map[vm] << 16)
65
66 /* --------------------------------------------------------------------- */
67 /* Instrucion forms */
68 /* --------------------------------------------------------------------- */
69
70 #define ADC 0x9a000000
71 #define ADD 0x8b000000
72 #define ADDE 0x8b200000
73 #define ADDI 0x91000000
74 #define ADR 0x10000000
75 #define ADRP 0x90000000
76 #define AND 0x8a000000
77 #define ANDI 0x92000000
78 #define AND_v 0x0e201c00
79 #define ASRV 0x9ac02800
80 #define B 0x14000000
81 #define B_CC 0x54000000
82 #define BL 0x94000000
83 #define BLR 0xd63f0000
84 #define BR 0xd61f0000
85 #define BRK 0xd4200000
86 #define CAS 0xc8a07c00
87 #define CASB 0x08a07c00
88 #define CASH 0x48a07c00
89 #define CBZ 0xb4000000
90 #define CCMPI 0xfa400800
91 #define CLZ 0xdac01000
92 #define CSEL 0x9a800000
93 #define CSINC 0x9a800400
94 #define DUP_e 0x0e000400
95 #define DUP_g 0x0e000c00
96 #define EOR 0xca000000
97 #define EOR_v 0x2e201c00
98 #define EORI 0xd2000000
99 #define EXTR 0x93c00000
100 #define FABS 0x1e60c000
101 #define FADD 0x1e602800
102 #define FCMP 0x1e602000
103 #define FCSEL 0x1e600c00
104 #define FCVT 0x1e224000
105 #define FCVTL 0x0e217800
106 #define FCVTZS 0x9e780000
107 #define FDIV 0x1e601800
108 #define FMOV 0x1e604000
109 #define FMOV_R 0x9e660000
110 #define FMOV_I 0x1e601000
111 #define FMUL 0x1e600800
112 #define FNEG 0x1e614000
113 #define FSUB 0x1e603800
114 #define INS 0x4e001c00
115 #define INS_e 0x6e000400
116 #define LD1 0x0c407000
117 #define LD1_s 0x0d400000
118 #define LD1R 0x0d40c000
119 #define LDRI 0xf9400000
120 #define LDRI_F64 0xfd400000
121 #define LDRI_POST 0xf8400400
122 #define LDP 0xa9400000
123 #define LDP_F64 0x6d400000
124 #define LDP_POST 0xa8c00000
125 #define LDR_PRE 0xf8400c00
126 #define LDXR 0xc85f7c00
127 #define LDXRB 0x085f7c00
128 #define LDXRH 0x485f7c00
129 #define LSLV 0x9ac02000
130 #define LSRV 0x9ac02400
131 #define MADD 0x9b000000
132 #define MOVI 0x0f000400
133 #define MOVK 0xf2800000
134 #define MOVN 0x92800000
135 #define MOVZ 0xd2800000
136 #define NOP 0xd503201f
137 #define ORN 0xaa200000
138 #define ORR 0xaa000000
139 #define ORR_v 0x0ea01c00
140 #define ORRI 0xb2000000
141 #define RBIT 0xdac00000
142 #define RET 0xd65f0000
143 #define REV 0xdac00c00
144 #define REV16 0xdac00400
145 #define RORV 0x9ac02c00
146 #define SBC 0xda000000
147 #define SBFM 0x93400000
148 #define SCVTF 0x9e620000
149 #define SDIV 0x9ac00c00
150 #define SMADDL 0x9b200000
151 #define SMOV 0x0e002c00
152 #define SMULH 0x9b403c00
153 #define SSHLL 0x0f00a400
154 #define ST1 0x0c007000
155 #define ST1_s 0x0d000000
156 #define STP 0xa9000000
157 #define STP_F64 0x6d000000
158 #define STP_PRE 0xa9800000
159 #define STRB 0x38206800
160 #define STRBI 0x39000000
161 #define STRI 0xf9000000
162 #define STRI_F64 0xfd000000
163 #define STR_FI 0x3d000000
164 #define STR_FR 0x3c206800
165 #define STUR_FI 0x3c000000
166 #define STURBI 0x38000000
167 #define STXR 0xc8007c00
168 #define STXRB 0x8007c00
169 #define STXRH 0x48007c00
170 #define SUB 0xcb000000
171 #define SUBI 0xd1000000
172 #define SUBS 0xeb000000
173 #define TBZ 0x36000000
174 #define UBFM 0xd3400000
175 #define UCVTF 0x9e630000
176 #define UDIV 0x9ac00800
177 #define UMOV 0x0e003c00
178 #define UMULH 0x9bc03c00
179 #define USHLL 0x2f00a400
180 #define USHR 0x2f000400
181 #define USRA 0x2f001400
182 #define XTN 0x0e212800
183
184 #define CSET (CSINC | RM(TMP_ZERO) | RN(TMP_ZERO))
185 #define LDR (STRI | (1 << 22))
186 #define LDRB (STRBI | (1 << 22))
187 #define LDRH (LDRB | (1 << 30))
188 #define MOV (ORR | RN(TMP_ZERO))
189
push_inst(struct sljit_compiler * compiler,sljit_ins ins)190 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
191 {
192 sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
193 FAIL_IF(!ptr);
194 *ptr = ins;
195 compiler->size++;
196 return SLJIT_SUCCESS;
197 }
198
emit_imm64_const(struct sljit_compiler * compiler,sljit_s32 dst,sljit_uw imm)199 static SLJIT_INLINE sljit_s32 emit_imm64_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm)
200 {
201 FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((sljit_ins)(imm & 0xffff) << 5)));
202 FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)(imm >> 16) & 0xffff) << 5) | (1 << 21)));
203 FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)(imm >> 32) & 0xffff) << 5) | (2 << 21)));
204 return push_inst(compiler, MOVK | RD(dst) | ((sljit_ins)(imm >> 48) << 5) | (3 << 21));
205 }
206
detect_jump_type(struct sljit_jump * jump,sljit_ins * code_ptr,sljit_ins * code,sljit_sw executable_offset)207 static SLJIT_INLINE sljit_ins* detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
208 {
209 sljit_sw diff;
210 sljit_uw target_addr;
211
212 if (jump->flags & SLJIT_REWRITABLE_JUMP)
213 goto exit;
214
215 if (jump->flags & JUMP_ADDR)
216 target_addr = jump->u.target;
217 else {
218 SLJIT_ASSERT(jump->u.label != NULL);
219 target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset;
220 }
221
222 diff = (sljit_sw)target_addr - (sljit_sw)code_ptr - executable_offset;
223
224 if (jump->flags & IS_COND) {
225 diff += SSIZE_OF(ins);
226 if (diff <= 0xfffff && diff >= -0x100000) {
227 *(--code_ptr) ^= (jump->flags & IS_CBZ) ? (0x1 << 24) : 0x1;
228 jump->flags |= PATCH_COND;
229 jump->addr -= sizeof(sljit_ins);
230 return code_ptr;
231 }
232 diff -= SSIZE_OF(ins);
233 }
234
235 if (diff <= 0x7ffffff && diff >= -0x8000000) {
236 if (jump->flags & IS_COND)
237 code_ptr[-1] -= (4 << 5);
238 jump->flags |= PATCH_B;
239 return code_ptr;
240 }
241
242 if (target_addr < 0x100000000l) {
243 if (jump->flags & IS_COND)
244 code_ptr[-1] -= (2 << 5);
245 code_ptr[2] = code_ptr[0];
246 return code_ptr + 2;
247 }
248
249 if (diff <= 0xfffff000l && diff >= -0x100000000l) {
250 if (jump->flags & IS_COND)
251 code_ptr[-1] -= (2 << 5);
252 jump->flags |= PATCH_B32;
253 code_ptr[2] = code_ptr[0];
254 return code_ptr + 2;
255 }
256
257 if (target_addr < 0x1000000000000l) {
258 if (jump->flags & IS_COND)
259 code_ptr[-1] -= (1 << 5);
260 jump->flags |= PATCH_ABS48;
261 code_ptr[3] = code_ptr[0];
262 return code_ptr + 3;
263 }
264
265 exit:
266 jump->flags |= PATCH_ABS64;
267 code_ptr[4] = code_ptr[0];
268 return code_ptr + 4;
269 }
270
mov_addr_get_length(struct sljit_jump * jump,sljit_ins * code_ptr,sljit_ins * code,sljit_sw executable_offset)271 static SLJIT_INLINE sljit_sw mov_addr_get_length(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
272 {
273 sljit_uw addr;
274 sljit_sw diff;
275 SLJIT_UNUSED_ARG(executable_offset);
276
277 SLJIT_ASSERT(jump->flags < ((sljit_uw)4 << JUMP_SIZE_SHIFT));
278 if (jump->flags & JUMP_ADDR)
279 addr = jump->u.target;
280 else
281 addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code + jump->u.label->size, executable_offset);
282
283 diff = (sljit_sw)addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
284
285 if (diff <= 0xfffff && diff >= -0x100000) {
286 jump->flags |= PATCH_B;
287 return 0;
288 }
289
290 if (diff <= 0xfffff000l && diff >= -0x100000000l) {
291 SLJIT_ASSERT(jump->flags >= ((sljit_uw)1 << JUMP_SIZE_SHIFT));
292 jump->flags |= PATCH_B32;
293 return 1;
294 }
295
296 if (addr < 0x100000000l) {
297 SLJIT_ASSERT(jump->flags >= ((sljit_uw)1 << JUMP_SIZE_SHIFT));
298 return 1;
299 }
300
301 if (addr < 0x1000000000000l) {
302 SLJIT_ASSERT(jump->flags >= ((sljit_uw)2 << JUMP_SIZE_SHIFT));
303 jump->flags |= PATCH_ABS48;
304 return 2;
305 }
306
307 SLJIT_ASSERT(jump->flags >= ((sljit_uw)3 << JUMP_SIZE_SHIFT));
308 jump->flags |= PATCH_ABS64;
309 return 3;
310 }
311
generate_jump_or_mov_addr(struct sljit_jump * jump,sljit_sw executable_offset)312 static SLJIT_INLINE void generate_jump_or_mov_addr(struct sljit_jump *jump, sljit_sw executable_offset)
313 {
314 sljit_sw addr = (sljit_sw)((jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr);
315 sljit_ins* buf_ptr = (sljit_ins*)jump->addr;
316 sljit_u32 dst;
317 SLJIT_UNUSED_ARG(executable_offset);
318
319 if (!(jump->flags & JUMP_MOV_ADDR)) {
320 if (jump->flags & PATCH_COND) {
321 addr = (addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2;
322 SLJIT_ASSERT(addr <= 0x3ffff && addr >= -0x40000);
323 buf_ptr[0] = (buf_ptr[0] & ~(sljit_ins)0xffffe0) | (sljit_ins)((addr & 0x7ffff) << 5);
324 return;
325 }
326
327 if (jump->flags & PATCH_B) {
328 addr = (addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2;
329 SLJIT_ASSERT(addr <= 0x1ffffff && addr >= -0x2000000);
330 buf_ptr[0] = ((jump->flags & IS_BL) ? BL : B) | (sljit_ins)(addr & 0x3ffffff);
331 return;
332 }
333
334 dst = (buf_ptr[0] >> 5) & 0x1f;
335
336 if (jump->flags & PATCH_B32) {
337 addr -= (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset) & ~(sljit_sw)0xfff;
338 SLJIT_ASSERT(addr <= 0xfffff000l && addr >= -0x100000000l);
339 buf_ptr[0] = ADRP | (((sljit_ins)(addr >> 12) & 0x3) << 29) | (((sljit_ins)(addr >> 14) & 0x7ffff) << 5) | dst;
340 buf_ptr[1] = ADDI | dst | (dst << 5) | ((sljit_ins)(addr & 0xfff) << 10);
341 return;
342 }
343 } else {
344 dst = *buf_ptr;
345
346 if (jump->flags & PATCH_B) {
347 addr -= (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset);
348 SLJIT_ASSERT(addr <= 0xfffff && addr >= -0x100000);
349 buf_ptr[0] = ADR | (((sljit_ins)addr & 0x3) << 29) | (((sljit_ins)(addr >> 2) & 0x7ffff) << 5) | dst;
350 return;
351 }
352
353 if (jump->flags & PATCH_B32) {
354 addr -= ((sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) & ~(sljit_sw)0xfff;
355 SLJIT_ASSERT(addr <= 0xffffffffl && addr >= -0x100000000l);
356 buf_ptr[0] = ADRP | (((sljit_ins)(addr >> 12) & 0x3) << 29) | (((sljit_ins)(addr >> 14) & 0x7ffff) << 5) | dst;
357 buf_ptr[1] = ADDI | dst | (dst << 5) | ((sljit_ins)(addr & 0xfff) << 10);
358 return;
359 }
360 }
361
362 SLJIT_ASSERT((jump->flags & (PATCH_ABS48 | PATCH_ABS64)) || (sljit_uw)addr <= (sljit_uw)0xffffffff);
363 SLJIT_ASSERT((jump->flags & PATCH_ABS64) || (sljit_uw)addr <= (sljit_uw)0xffffffffffff);
364
365 buf_ptr[0] = MOVZ | (((sljit_ins)addr & 0xffff) << 5) | dst;
366 buf_ptr[1] = MOVK | (((sljit_ins)(addr >> 16) & 0xffff) << 5) | (1 << 21) | dst;
367 if (jump->flags & (PATCH_ABS48 | PATCH_ABS64))
368 buf_ptr[2] = MOVK | (((sljit_ins)(addr >> 32) & 0xffff) << 5) | (2 << 21) | dst;
369
370 if (jump->flags & PATCH_ABS64)
371 buf_ptr[3] = MOVK | ((sljit_ins)((sljit_uw)addr >> 48) << 5) | (3 << 21) | dst;
372 }
373
reduce_code_size(struct sljit_compiler * compiler)374 static void reduce_code_size(struct sljit_compiler *compiler)
375 {
376 struct sljit_label *label;
377 struct sljit_jump *jump;
378 struct sljit_const *const_;
379 SLJIT_NEXT_DEFINE_TYPES;
380 sljit_uw total_size;
381 sljit_uw size_reduce = 0;
382 sljit_sw diff;
383
384 label = compiler->labels;
385 jump = compiler->jumps;
386 const_ = compiler->consts;
387 SLJIT_NEXT_INIT_TYPES();
388
389 while (1) {
390 SLJIT_GET_NEXT_MIN();
391
392 if (next_min_addr == SLJIT_MAX_ADDRESS)
393 break;
394
395 if (next_min_addr == next_label_size) {
396 label->size -= size_reduce;
397
398 label = label->next;
399 next_label_size = SLJIT_GET_NEXT_SIZE(label);
400 }
401
402 if (next_min_addr == next_const_addr) {
403 const_->addr -= size_reduce;
404 const_ = const_->next;
405 next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);
406 continue;
407 }
408
409 if (next_min_addr != next_jump_addr)
410 continue;
411
412 jump->addr -= size_reduce;
413 if (!(jump->flags & JUMP_MOV_ADDR)) {
414 total_size = JUMP_MAX_SIZE;
415
416 if (!(jump->flags & SLJIT_REWRITABLE_JUMP)) {
417 if (jump->flags & JUMP_ADDR) {
418 if (jump->u.target < 0x100000000l)
419 total_size = 3;
420 else if (jump->u.target < 0x1000000000000l)
421 total_size = 4;
422 } else {
423 /* Unit size: instruction. */
424 diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr;
425
426 if ((jump->flags & IS_COND) && (diff + 1) <= (0xfffff / SSIZE_OF(ins)) && (diff + 1) >= (-0x100000 / SSIZE_OF(ins)))
427 total_size = 0;
428 else if (diff <= (0x7ffffff / SSIZE_OF(ins)) && diff >= (-0x8000000 / SSIZE_OF(ins)))
429 total_size = 1;
430 else if (diff <= (0xfffff000l / SSIZE_OF(ins)) && diff >= (-0x100000000l / SSIZE_OF(ins)))
431 total_size = 3;
432 }
433 }
434
435 size_reduce += JUMP_MAX_SIZE - total_size;
436 } else {
437 /* Real size minus 1. Unit size: instruction. */
438 total_size = 3;
439
440 if (!(jump->flags & JUMP_ADDR)) {
441 diff = (sljit_sw)jump->u.label->size - (sljit_sw)jump->addr;
442
443 if (diff <= (0xfffff / SSIZE_OF(ins)) && diff >= (-0x100000 / SSIZE_OF(ins)))
444 total_size = 0;
445 else if (diff <= (0xfffff000l / SSIZE_OF(ins)) && diff >= (-0x100000000l / SSIZE_OF(ins)))
446 total_size = 1;
447 } else if (jump->u.target < 0x100000000l)
448 total_size = 1;
449 else if (jump->u.target < 0x1000000000000l)
450 total_size = 2;
451
452 size_reduce += 3 - total_size;
453 }
454
455 jump->flags |= total_size << JUMP_SIZE_SHIFT;
456 jump = jump->next;
457 next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
458 }
459
460 compiler->size -= size_reduce;
461 }
462
sljit_generate_code(struct sljit_compiler * compiler,sljit_s32 options,void * exec_allocator_data)463 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data)
464 {
465 struct sljit_memory_fragment *buf;
466 sljit_ins *code;
467 sljit_ins *code_ptr;
468 sljit_ins *buf_ptr;
469 sljit_ins *buf_end;
470 sljit_uw word_count;
471 SLJIT_NEXT_DEFINE_TYPES;
472 sljit_sw executable_offset;
473 sljit_sw addr;
474
475 struct sljit_label *label;
476 struct sljit_jump *jump;
477 struct sljit_const *const_;
478
479 CHECK_ERROR_PTR();
480 CHECK_PTR(check_sljit_generate_code(compiler));
481
482 reduce_code_size(compiler);
483
484 code = (sljit_ins*)allocate_executable_memory(compiler->size * sizeof(sljit_ins), options, exec_allocator_data, &executable_offset);
485 PTR_FAIL_WITH_EXEC_IF(code);
486
487 reverse_buf(compiler);
488 buf = compiler->buf;
489
490 code_ptr = code;
491 word_count = 0;
492 label = compiler->labels;
493 jump = compiler->jumps;
494 const_ = compiler->consts;
495 SLJIT_NEXT_INIT_TYPES();
496 SLJIT_GET_NEXT_MIN();
497
498 do {
499 buf_ptr = (sljit_ins*)buf->memory;
500 buf_end = buf_ptr + (buf->used_size >> 2);
501 do {
502 *code_ptr = *buf_ptr++;
503 if (next_min_addr == word_count) {
504 SLJIT_ASSERT(!label || label->size >= word_count);
505 SLJIT_ASSERT(!jump || jump->addr >= word_count);
506 SLJIT_ASSERT(!const_ || const_->addr >= word_count);
507
508 /* These structures are ordered by their address. */
509 if (next_min_addr == next_label_size) {
510 label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
511 label->size = (sljit_uw)(code_ptr - code);
512 label = label->next;
513 next_label_size = SLJIT_GET_NEXT_SIZE(label);
514 }
515
516 if (next_min_addr == next_jump_addr) {
517 if (!(jump->flags & JUMP_MOV_ADDR)) {
518 word_count = word_count - 1 + (jump->flags >> JUMP_SIZE_SHIFT);
519 jump->addr = (sljit_uw)code_ptr;
520 code_ptr = detect_jump_type(jump, code_ptr, code, executable_offset);
521 SLJIT_ASSERT((jump->flags & PATCH_COND) || ((sljit_uw)code_ptr - jump->addr < (jump->flags >> JUMP_SIZE_SHIFT) * sizeof(sljit_ins)));
522 } else {
523 word_count += jump->flags >> JUMP_SIZE_SHIFT;
524 addr = (sljit_sw)code_ptr;
525 code_ptr += mov_addr_get_length(jump, code_ptr, code, executable_offset);
526 jump->addr = (sljit_uw)addr;
527 }
528
529 jump = jump->next;
530 next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
531 } else if (next_min_addr == next_const_addr) {
532 const_->addr = (sljit_uw)code_ptr;
533 const_ = const_->next;
534 next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);
535 }
536
537 SLJIT_GET_NEXT_MIN();
538 }
539 code_ptr++;
540 word_count++;
541 } while (buf_ptr < buf_end);
542
543 buf = buf->next;
544 } while (buf);
545
546 if (label && label->size == word_count) {
547 label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
548 label->size = (sljit_uw)(code_ptr - code);
549 label = label->next;
550 }
551
552 SLJIT_ASSERT(!label);
553 SLJIT_ASSERT(!jump);
554 SLJIT_ASSERT(!const_);
555 SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
556
557 jump = compiler->jumps;
558 while (jump) {
559 generate_jump_or_mov_addr(jump, executable_offset);
560 jump = jump->next;
561 }
562
563 compiler->error = SLJIT_ERR_COMPILED;
564 compiler->executable_offset = executable_offset;
565 compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins);
566
567 code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
568 code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
569
570 SLJIT_CACHE_FLUSH(code, code_ptr);
571 SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
572 return code;
573 }
574
sljit_has_cpu_feature(sljit_s32 feature_type)575 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
576 {
577 switch (feature_type) {
578 case SLJIT_HAS_FPU:
579 case SLJIT_HAS_SIMD:
580 #ifdef SLJIT_IS_FPU_AVAILABLE
581 return (SLJIT_IS_FPU_AVAILABLE) != 0;
582 #else
583 /* Available by default. */
584 return 1;
585 #endif
586
587 case SLJIT_HAS_CLZ:
588 case SLJIT_HAS_CTZ:
589 case SLJIT_HAS_REV:
590 case SLJIT_HAS_ROT:
591 case SLJIT_HAS_CMOV:
592 case SLJIT_HAS_PREFETCH:
593 case SLJIT_HAS_COPY_F32:
594 case SLJIT_HAS_COPY_F64:
595 case SLJIT_HAS_ATOMIC:
596 return 1;
597
598 default:
599 return 0;
600 }
601 }
602
sljit_cmp_info(sljit_s32 type)603 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
604 {
605 switch (type) {
606 case SLJIT_UNORDERED_OR_EQUAL:
607 case SLJIT_ORDERED_NOT_EQUAL:
608 return 2;
609 }
610
611 return 0;
612 }
613
614 /* --------------------------------------------------------------------- */
615 /* Core code generator functions. */
616 /* --------------------------------------------------------------------- */
617
618 #define COUNT_TRAILING_ZERO(value, result) \
619 result = 0; \
620 if (!(value & 0xffffffff)) { \
621 result += 32; \
622 value >>= 32; \
623 } \
624 if (!(value & 0xffff)) { \
625 result += 16; \
626 value >>= 16; \
627 } \
628 if (!(value & 0xff)) { \
629 result += 8; \
630 value >>= 8; \
631 } \
632 if (!(value & 0xf)) { \
633 result += 4; \
634 value >>= 4; \
635 } \
636 if (!(value & 0x3)) { \
637 result += 2; \
638 value >>= 2; \
639 } \
640 if (!(value & 0x1)) { \
641 result += 1; \
642 value >>= 1; \
643 }
644
645 #define LOGICAL_IMM_CHECK (sljit_ins)0x100
646
logical_imm(sljit_sw imm,sljit_u32 len)647 static sljit_ins logical_imm(sljit_sw imm, sljit_u32 len)
648 {
649 sljit_s32 negated;
650 sljit_u32 ones, right;
651 sljit_uw mask, uimm;
652 sljit_ins ins;
653
654 if (len & LOGICAL_IMM_CHECK) {
655 len &= ~LOGICAL_IMM_CHECK;
656 if (len == 32 && (imm == 0 || imm == -1))
657 return 0;
658 if (len == 16 && ((sljit_s32)imm == 0 || (sljit_s32)imm == -1))
659 return 0;
660 }
661
662 SLJIT_ASSERT((len == 32 && imm != 0 && imm != -1)
663 || (len == 16 && (sljit_s32)imm != 0 && (sljit_s32)imm != -1));
664
665 uimm = (sljit_uw)imm;
666 while (1) {
667 if (len <= 0) {
668 SLJIT_UNREACHABLE();
669 return 0;
670 }
671
672 mask = ((sljit_uw)1 << len) - 1;
673 if ((uimm & mask) != ((uimm >> len) & mask))
674 break;
675 len >>= 1;
676 }
677
678 len <<= 1;
679
680 negated = 0;
681 if (uimm & 0x1) {
682 negated = 1;
683 uimm = ~uimm;
684 }
685
686 if (len < 64)
687 uimm &= ((sljit_uw)1 << len) - 1;
688
689 /* Unsigned right shift. */
690 COUNT_TRAILING_ZERO(uimm, right);
691
692 /* Signed shift. We also know that the highest bit is set. */
693 imm = (sljit_sw)~uimm;
694 SLJIT_ASSERT(imm < 0);
695
696 COUNT_TRAILING_ZERO(imm, ones);
697
698 if (~imm)
699 return 0;
700
701 if (len == 64)
702 ins = 1 << 22;
703 else
704 ins = (0x3f - ((len << 1) - 1)) << 10;
705
706 if (negated)
707 return ins | ((len - ones - 1) << 10) | ((len - ones - right) << 16);
708
709 return ins | ((ones - 1) << 10) | ((len - right) << 16);
710 }
711
712 #undef COUNT_TRAILING_ZERO
713
load_immediate(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw simm)714 static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw simm)
715 {
716 sljit_uw imm = (sljit_uw)simm;
717 sljit_u32 i, zeros, ones, first;
718 sljit_ins bitmask;
719
720 /* Handling simple immediates first. */
721 if (imm <= 0xffff)
722 return push_inst(compiler, MOVZ | RD(dst) | ((sljit_ins)imm << 5));
723
724 if (simm < 0 && simm >= -0x10000)
725 return push_inst(compiler, MOVN | RD(dst) | (((sljit_ins)~imm & 0xffff) << 5));
726
727 if (imm <= 0xffffffffl) {
728 if ((imm & 0xffff) == 0)
729 return push_inst(compiler, MOVZ | RD(dst) | ((sljit_ins)(imm >> 16) << 5) | (1 << 21));
730 if ((imm & 0xffff0000l) == 0xffff0000)
731 return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | (((sljit_ins)~imm & 0xffff) << 5));
732 if ((imm & 0xffff) == 0xffff)
733 return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | (((sljit_ins)~imm & 0xffff0000u) >> (16 - 5)) | (1 << 21));
734
735 bitmask = logical_imm(simm, 16);
736 if (bitmask != 0)
737 return push_inst(compiler, (ORRI ^ W_OP) | RD(dst) | RN(TMP_ZERO) | bitmask);
738
739 FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | (((sljit_ins)imm & 0xffff) << 5)));
740 return push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)imm & 0xffff0000u) >> (16 - 5)) | (1 << 21));
741 }
742
743 bitmask = logical_imm(simm, 32);
744 if (bitmask != 0)
745 return push_inst(compiler, ORRI | RD(dst) | RN(TMP_ZERO) | bitmask);
746
747 if (simm < 0 && simm >= -0x100000000l) {
748 if ((imm & 0xffff) == 0xffff)
749 return push_inst(compiler, MOVN | RD(dst) | (((sljit_ins)~imm & 0xffff0000u) >> (16 - 5)) | (1 << 21));
750
751 FAIL_IF(push_inst(compiler, MOVN | RD(dst) | (((sljit_ins)~imm & 0xffff) << 5)));
752 return push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)imm & 0xffff0000u) >> (16 - 5)) | (1 << 21));
753 }
754
755 /* A large amount of number can be constructed from ORR and MOVx, but computing them is costly. */
756
757 zeros = 0;
758 ones = 0;
759 for (i = 4; i > 0; i--) {
760 if ((simm & 0xffff) == 0)
761 zeros++;
762 if ((simm & 0xffff) == 0xffff)
763 ones++;
764 simm >>= 16;
765 }
766
767 simm = (sljit_sw)imm;
768 first = 1;
769 if (ones > zeros) {
770 simm = ~simm;
771 for (i = 0; i < 4; i++) {
772 if (!(simm & 0xffff)) {
773 simm >>= 16;
774 continue;
775 }
776 if (first) {
777 first = 0;
778 FAIL_IF(push_inst(compiler, MOVN | RD(dst) | (((sljit_ins)simm & 0xffff) << 5) | (i << 21)));
779 }
780 else
781 FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)~simm & 0xffff) << 5) | (i << 21)));
782 simm >>= 16;
783 }
784 return SLJIT_SUCCESS;
785 }
786
787 for (i = 0; i < 4; i++) {
788 if (!(simm & 0xffff)) {
789 simm >>= 16;
790 continue;
791 }
792 if (first) {
793 first = 0;
794 FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | (((sljit_ins)simm & 0xffff) << 5) | (i << 21)));
795 }
796 else
797 FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)simm & 0xffff) << 5) | (i << 21)));
798 simm >>= 16;
799 }
800 return SLJIT_SUCCESS;
801 }
802
803 #define ARG1_IMM 0x0010000
804 #define ARG2_IMM 0x0020000
805 #define INT_OP 0x0040000
806 #define SET_FLAGS 0x0080000
807 #define UNUSED_RETURN 0x0100000
808
809 #define CHECK_FLAGS(flag_bits) \
810 if (flags & SET_FLAGS) { \
811 inv_bits |= flag_bits; \
812 if (flags & UNUSED_RETURN) \
813 dst = TMP_ZERO; \
814 }
815
emit_op_imm(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 dst,sljit_sw arg1,sljit_sw arg2)816 static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 dst, sljit_sw arg1, sljit_sw arg2)
817 {
818 /* dst must be register, TMP_REG1
819 arg1 must be register, TMP_REG1, imm
820 arg2 must be register, TMP_REG2, imm */
821 sljit_ins inv_bits = (flags & INT_OP) ? W_OP : 0;
822 sljit_ins inst_bits;
823 sljit_s32 op = (flags & 0xffff);
824 sljit_s32 reg;
825 sljit_sw imm, nimm;
826
827 if (SLJIT_UNLIKELY((flags & (ARG1_IMM | ARG2_IMM)) == (ARG1_IMM | ARG2_IMM))) {
828 /* Both are immediates. */
829 flags &= ~ARG1_IMM;
830 if (arg1 == 0 && op != SLJIT_ADD && op != SLJIT_SUB)
831 arg1 = TMP_ZERO;
832 else {
833 FAIL_IF(load_immediate(compiler, TMP_REG1, arg1));
834 arg1 = TMP_REG1;
835 }
836 }
837
838 if (flags & (ARG1_IMM | ARG2_IMM)) {
839 reg = (sljit_s32)((flags & ARG2_IMM) ? arg1 : arg2);
840 imm = (flags & ARG2_IMM) ? arg2 : arg1;
841
842 switch (op) {
843 case SLJIT_CLZ:
844 case SLJIT_CTZ:
845 case SLJIT_REV:
846 case SLJIT_REV_U16:
847 case SLJIT_REV_S16:
848 case SLJIT_REV_U32:
849 case SLJIT_REV_S32:
850 case SLJIT_ADDC:
851 case SLJIT_SUBC:
852 case SLJIT_MUL:
853 case SLJIT_MULADD:
854 /* No form with immediate operand (except imm 0, which
855 is represented by a ZERO register). */
856 break;
857 case SLJIT_MOV:
858 SLJIT_ASSERT(!(flags & SET_FLAGS) && (flags & ARG2_IMM) && arg1 == TMP_REG1);
859 return load_immediate(compiler, dst, imm);
860 case SLJIT_SUB:
861 compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
862 if (flags & ARG1_IMM)
863 break;
864 imm = -imm;
865 /* Fall through. */
866 case SLJIT_ADD:
867 if (op != SLJIT_SUB)
868 compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
869
870 if (imm == 0) {
871 CHECK_FLAGS(1 << 29);
872 return push_inst(compiler, ((op == SLJIT_ADD ? ADDI : SUBI) ^ inv_bits) | RD(dst) | RN(reg));
873 }
874 if (imm > 0 && imm <= 0xfff) {
875 CHECK_FLAGS(1 << 29);
876 return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | ((sljit_ins)imm << 10));
877 }
878 nimm = -imm;
879 if (nimm > 0 && nimm <= 0xfff) {
880 CHECK_FLAGS(1 << 29);
881 return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | ((sljit_ins)nimm << 10));
882 }
883 if (imm > 0 && imm <= 0xffffff && !(imm & 0xfff)) {
884 CHECK_FLAGS(1 << 29);
885 return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | (((sljit_ins)imm >> 12) << 10) | (1 << 22));
886 }
887 if (nimm > 0 && nimm <= 0xffffff && !(nimm & 0xfff)) {
888 CHECK_FLAGS(1 << 29);
889 return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | (((sljit_ins)nimm >> 12) << 10) | (1 << 22));
890 }
891 if (imm > 0 && imm <= 0xffffff && !(flags & SET_FLAGS)) {
892 FAIL_IF(push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | (((sljit_ins)imm >> 12) << 10) | (1 << 22)));
893 return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(dst) | (((sljit_ins)imm & 0xfff) << 10));
894 }
895 if (nimm > 0 && nimm <= 0xffffff && !(flags & SET_FLAGS)) {
896 FAIL_IF(push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | (((sljit_ins)nimm >> 12) << 10) | (1 << 22)));
897 return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(dst) | (((sljit_ins)nimm & 0xfff) << 10));
898 }
899 break;
900 case SLJIT_AND:
901 inst_bits = logical_imm(imm, LOGICAL_IMM_CHECK | ((flags & INT_OP) ? 16 : 32));
902 if (!inst_bits)
903 break;
904 CHECK_FLAGS(3 << 29);
905 return push_inst(compiler, (ANDI ^ inv_bits) | RD(dst) | RN(reg) | inst_bits);
906 case SLJIT_XOR:
907 if (imm == -1) {
908 FAIL_IF(push_inst(compiler, (ORN ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(reg)));
909 goto set_flags;
910 }
911 /* fallthrough */
912 case SLJIT_OR:
913 inst_bits = logical_imm(imm, LOGICAL_IMM_CHECK | ((flags & INT_OP) ? 16 : 32));
914 if (!inst_bits)
915 break;
916 if (op == SLJIT_OR)
917 inst_bits |= ORRI;
918 else
919 inst_bits |= EORI;
920 FAIL_IF(push_inst(compiler, (inst_bits ^ inv_bits) | RD(dst) | RN(reg)));
921 goto set_flags;
922 case SLJIT_SHL:
923 case SLJIT_MSHL:
924 if (flags & ARG1_IMM)
925 break;
926
927 if (flags & INT_OP) {
928 imm &= 0x1f;
929 inst_bits = (((sljit_ins)-imm & 0x1f) << 16) | ((31 - (sljit_ins)imm) << 10);
930 } else {
931 imm &= 0x3f;
932 inst_bits = ((sljit_ins)1 << 22) | (((sljit_ins)-imm & 0x3f) << 16) | ((63 - (sljit_ins)imm) << 10);
933 }
934
935 inv_bits |= inv_bits >> 9;
936 FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | inst_bits));
937 goto set_flags;
938 case SLJIT_LSHR:
939 case SLJIT_MLSHR:
940 case SLJIT_ASHR:
941 case SLJIT_MASHR:
942 if (flags & ARG1_IMM)
943 break;
944
945 inv_bits |= inv_bits >> 9;
946 if (op >= SLJIT_ASHR)
947 inv_bits |= 1 << 30;
948
949 if (flags & INT_OP) {
950 imm &= 0x1f;
951 inst_bits = ((sljit_ins)imm << 16) | (31 << 10);
952 } else {
953 imm &= 0x3f;
954 inst_bits = ((sljit_ins)1 << 22) | ((sljit_ins)imm << 16) | (63 << 10);
955 }
956
957 FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | inst_bits));
958 goto set_flags;
959 case SLJIT_ROTL:
960 case SLJIT_ROTR:
961 if (flags & ARG1_IMM)
962 break;
963
964 if (op == SLJIT_ROTL)
965 imm = -imm;
966
967 imm &= (flags & INT_OP) ? 0x1f : 0x3f;
968 return push_inst(compiler, (EXTR ^ (inv_bits | (inv_bits >> 9))) | RD(dst) | RN(arg1) | RM(arg1) | ((sljit_ins)imm << 10));
969 default:
970 SLJIT_UNREACHABLE();
971 break;
972 }
973
974 if (flags & ARG2_IMM) {
975 if (arg2 == 0)
976 arg2 = TMP_ZERO;
977 else {
978 FAIL_IF(load_immediate(compiler, TMP_REG2, arg2));
979 arg2 = TMP_REG2;
980 }
981 }
982 else {
983 if (arg1 == 0)
984 arg1 = TMP_ZERO;
985 else {
986 FAIL_IF(load_immediate(compiler, TMP_REG1, arg1));
987 arg1 = TMP_REG1;
988 }
989 }
990 }
991
992 /* Both arguments are registers. */
993 switch (op) {
994 case SLJIT_MOV:
995 case SLJIT_MOV_P:
996 SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
997 if (dst == arg2)
998 return SLJIT_SUCCESS;
999 return push_inst(compiler, MOV | RD(dst) | RM(arg2));
1000 case SLJIT_MOV_U8:
1001 SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
1002 inv_bits |= inv_bits >> 9;
1003 return push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg2) | (7 << 10));
1004 case SLJIT_MOV_S8:
1005 SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
1006 inv_bits |= inv_bits >> 9;
1007 return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (7 << 10));
1008 case SLJIT_MOV_U16:
1009 SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
1010 inv_bits |= inv_bits >> 9;
1011 return push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg2) | (15 << 10));
1012 case SLJIT_MOV_S16:
1013 SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
1014 inv_bits |= inv_bits >> 9;
1015 return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (15 << 10));
1016 case SLJIT_MOV32:
1017 SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
1018 if (dst == arg2)
1019 return SLJIT_SUCCESS;
1020 /* fallthrough */
1021 case SLJIT_MOV_U32:
1022 SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
1023 return push_inst(compiler, (MOV ^ W_OP) | RD(dst) | RM(arg2));
1024 case SLJIT_MOV_S32:
1025 SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
1026 return push_inst(compiler, SBFM | (1 << 22) | RD(dst) | RN(arg2) | (31 << 10));
1027 case SLJIT_CLZ:
1028 SLJIT_ASSERT(arg1 == TMP_REG1);
1029 return push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(arg2));
1030 case SLJIT_CTZ:
1031 SLJIT_ASSERT(arg1 == TMP_REG1);
1032 FAIL_IF(push_inst(compiler, (RBIT ^ inv_bits) | RD(dst) | RN(arg2)));
1033 return push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(dst));
1034 case SLJIT_REV:
1035 SLJIT_ASSERT(arg1 == TMP_REG1);
1036 inv_bits |= inv_bits >> 21;
1037 return push_inst(compiler, (REV ^ inv_bits) | RD(dst) | RN(arg2));
1038 case SLJIT_REV_U16:
1039 case SLJIT_REV_S16:
1040 SLJIT_ASSERT(arg1 == TMP_REG1 && dst != TMP_REG2);
1041 FAIL_IF(push_inst(compiler, (REV16 ^ (sljit_ins)0x80000000) | RD(dst) | RN(arg2)));
1042 if (dst == TMP_REG1 || (arg2 == TMP_REG2 && op == SLJIT_REV_U16))
1043 return SLJIT_SUCCESS;
1044 inv_bits |= inv_bits >> 9;
1045 return push_inst(compiler, ((op == SLJIT_REV_U16 ? UBFM : SBFM) ^ inv_bits) | RD(dst) | RN(dst) | (15 << 10));
1046 case SLJIT_REV_U32:
1047 case SLJIT_REV_S32:
1048 SLJIT_ASSERT(arg1 == TMP_REG1 && dst != TMP_REG2);
1049 FAIL_IF(push_inst(compiler, (REV ^ (sljit_ins)0x80000400) | RD(dst) | RN(arg2)));
1050 if (op == SLJIT_REV_U32 || dst == TMP_REG1)
1051 return SLJIT_SUCCESS;
1052 return push_inst(compiler, SBFM | (1 << 22) | RD(dst) | RN(dst) | (31 << 10));
1053 case SLJIT_ADD:
1054 compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
1055 CHECK_FLAGS(1 << 29);
1056 return push_inst(compiler, (ADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
1057 case SLJIT_ADDC:
1058 compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
1059 CHECK_FLAGS(1 << 29);
1060 return push_inst(compiler, (ADC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
1061 case SLJIT_SUB:
1062 compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
1063 CHECK_FLAGS(1 << 29);
1064 return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
1065 case SLJIT_SUBC:
1066 compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
1067 CHECK_FLAGS(1 << 29);
1068 return push_inst(compiler, (SBC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
1069 case SLJIT_MUL:
1070 compiler->status_flags_state = 0;
1071 if (!(flags & SET_FLAGS))
1072 return push_inst(compiler, (MADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2) | RT2(TMP_ZERO));
1073 if (flags & INT_OP) {
1074 FAIL_IF(push_inst(compiler, SMADDL | RD(dst) | RN(arg1) | RM(arg2) | (31 << 10)));
1075 FAIL_IF(push_inst(compiler, ADD | RD(TMP_LR) | RN(TMP_ZERO) | RM(dst) | (2 << 22) | (31 << 10)));
1076 return push_inst(compiler, SUBS | RD(TMP_ZERO) | RN(TMP_LR) | RM(dst) | (2 << 22) | (63 << 10));
1077 }
1078 FAIL_IF(push_inst(compiler, SMULH | RD(TMP_LR) | RN(arg1) | RM(arg2)));
1079 FAIL_IF(push_inst(compiler, MADD | RD(dst) | RN(arg1) | RM(arg2) | RT2(TMP_ZERO)));
1080 return push_inst(compiler, SUBS | RD(TMP_ZERO) | RN(TMP_LR) | RM(dst) | (2 << 22) | (63 << 10));
1081 case SLJIT_AND:
1082 CHECK_FLAGS(3 << 29);
1083 return push_inst(compiler, (AND ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
1084 case SLJIT_OR:
1085 FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
1086 break; /* Set flags. */
1087 case SLJIT_XOR:
1088 FAIL_IF(push_inst(compiler, (EOR ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
1089 break; /* Set flags. */
1090 case SLJIT_SHL:
1091 case SLJIT_MSHL:
1092 FAIL_IF(push_inst(compiler, (LSLV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
1093 break; /* Set flags. */
1094 case SLJIT_LSHR:
1095 case SLJIT_MLSHR:
1096 FAIL_IF(push_inst(compiler, (LSRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
1097 break; /* Set flags. */
1098 case SLJIT_ASHR:
1099 case SLJIT_MASHR:
1100 FAIL_IF(push_inst(compiler, (ASRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
1101 break; /* Set flags. */
1102 case SLJIT_ROTL:
1103 FAIL_IF(push_inst(compiler, (SUB ^ inv_bits) | RD(TMP_REG2) | RN(TMP_ZERO) | RM(arg2)));
1104 arg2 = TMP_REG2;
1105 /* fallthrough */
1106 case SLJIT_ROTR:
1107 return push_inst(compiler, (RORV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
1108 case SLJIT_MULADD:
1109 compiler->status_flags_state = 0;
1110 return push_inst(compiler, (MADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2) | RT2(dst));
1111 default:
1112 SLJIT_UNREACHABLE();
1113 return SLJIT_SUCCESS;
1114 }
1115
1116 set_flags:
1117 if (flags & SET_FLAGS)
1118 return push_inst(compiler, (SUBS ^ inv_bits) | RD(TMP_ZERO) | RN(dst) | RM(TMP_ZERO));
1119 return SLJIT_SUCCESS;
1120 }
1121
1122 #define STORE 0x10
1123 #define SIGNED 0x20
1124
1125 #define BYTE_SIZE 0x0
1126 #define HALF_SIZE 0x1
1127 #define INT_SIZE 0x2
1128 #define WORD_SIZE 0x3
1129
1130 #define MEM_SIZE_SHIFT(flags) ((sljit_ins)(flags) & 0x3)
1131
emit_op_mem(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw,sljit_s32 tmp_reg)1132 static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg,
1133 sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg)
1134 {
1135 sljit_u32 shift = MEM_SIZE_SHIFT(flags);
1136 sljit_u32 type = (shift << 30);
1137
1138 if (!(flags & STORE))
1139 type |= (flags & SIGNED) ? 0x00800000 : 0x00400000;
1140
1141 SLJIT_ASSERT(arg & SLJIT_MEM);
1142
1143 if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
1144 argw &= 0x3;
1145
1146 if (argw == 0 || argw == shift)
1147 return push_inst(compiler, STRB | type | RT(reg)
1148 | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw ? (1 << 12) : 0));
1149
1150 FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | ((sljit_ins)argw << 10)));
1151 return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg));
1152 }
1153
1154 arg &= REG_MASK;
1155
1156 if (!arg) {
1157 FAIL_IF(load_immediate(compiler, tmp_reg, argw & ~(0xfff << shift)));
1158
1159 argw = (argw >> shift) & 0xfff;
1160
1161 return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | ((sljit_ins)argw << 10));
1162 }
1163
1164 if ((argw & ((1 << shift) - 1)) == 0) {
1165 if (argw >= 0) {
1166 if ((argw >> shift) <= 0xfff)
1167 return push_inst(compiler, STRBI | type | RT(reg) | RN(arg) | ((sljit_ins)argw << (10 - shift)));
1168
1169 if (argw <= 0xffffff) {
1170 FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)argw >> 12) << 10)));
1171
1172 argw = ((argw & 0xfff) >> shift);
1173 return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | ((sljit_ins)argw << 10));
1174 }
1175 } else if (argw < -256 && argw >= -0xfff000) {
1176 FAIL_IF(push_inst(compiler, SUBI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)(-argw + 0xfff) >> 12) << 10)));
1177 argw = ((0x1000 + argw) & 0xfff) >> shift;
1178 return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | ((sljit_ins)argw << 10));
1179 }
1180 }
1181
1182 if (argw <= 0xff && argw >= -0x100)
1183 return push_inst(compiler, STURBI | type | RT(reg) | RN(arg) | (((sljit_ins)argw & 0x1ff) << 12));
1184
1185 if (((argw + 0x100) & 0xfff) <= 0x1ff && argw <= 0xfff0ff && argw >= -0xfff100) {
1186 if (argw >= 0) {
1187 if (argw & 0x100)
1188 argw += 0x1000;
1189
1190 FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)argw >> 12) << 10)));
1191 return push_inst(compiler, STURBI | type | RT(reg) | RN(tmp_reg) | (((sljit_ins)argw & 0x1ff) << 12));
1192 } else {
1193 if (!(argw & 0x100))
1194 argw -= 0x1000;
1195
1196 FAIL_IF(push_inst(compiler, SUBI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)-argw >> 12) << 10)));
1197 return push_inst(compiler, STURBI | type | RT(reg) | RN(tmp_reg) | (((sljit_ins)argw & 0x1ff) << 12));
1198 }
1199 }
1200
1201 FAIL_IF(load_immediate(compiler, tmp_reg, argw));
1202
1203 return push_inst(compiler, STRB | type | RT(reg) | RN(arg) | RM(tmp_reg));
1204 }
1205
1206 /* --------------------------------------------------------------------- */
1207 /* Entry, exit */
1208 /* --------------------------------------------------------------------- */
1209
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1210 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
1211 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1212 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1213 {
1214 sljit_s32 prev, fprev, saved_regs_size, i, tmp;
1215 sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
1216 sljit_ins offs;
1217
1218 CHECK_ERROR();
1219 CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1220 set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1221
1222 saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 2);
1223 saved_regs_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);
1224
1225 local_size = (local_size + saved_regs_size + 0xf) & ~0xf;
1226 compiler->local_size = local_size;
1227
1228 if (local_size <= 512) {
1229 FAIL_IF(push_inst(compiler, STP_PRE | RT(TMP_FP) | RT2(TMP_LR)
1230 | RN(SLJIT_SP) | (sljit_ins)((-(local_size >> 3) & 0x7f) << 15)));
1231 offs = (sljit_ins)(local_size - 2 * SSIZE_OF(sw)) << (15 - 3);
1232 local_size = 0;
1233 } else {
1234 saved_regs_size = ((saved_regs_size - 2 * SSIZE_OF(sw)) + 0xf) & ~0xf;
1235
1236 FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((sljit_ins)saved_regs_size << 10)));
1237 offs = (sljit_ins)(saved_regs_size - 2 * SSIZE_OF(sw)) << (15 - 3);
1238 local_size -= saved_regs_size;
1239 SLJIT_ASSERT(local_size > 0);
1240 }
1241
1242 prev = -1;
1243
1244 tmp = SLJIT_S0 - saveds;
1245 for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) {
1246 if (prev == -1) {
1247 prev = i;
1248 continue;
1249 }
1250 FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
1251 offs -= (sljit_ins)2 << 15;
1252 prev = -1;
1253 }
1254
1255 for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
1256 if (prev == -1) {
1257 prev = i;
1258 continue;
1259 }
1260 FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
1261 offs -= (sljit_ins)2 << 15;
1262 prev = -1;
1263 }
1264
1265 fprev = -1;
1266
1267 tmp = SLJIT_FS0 - fsaveds;
1268 for (i = SLJIT_FS0; i > tmp; i--) {
1269 if (fprev == -1) {
1270 fprev = i;
1271 continue;
1272 }
1273 FAIL_IF(push_inst(compiler, STP_F64 | VT(fprev) | VT2(i) | RN(SLJIT_SP) | offs));
1274 offs -= (sljit_ins)2 << 15;
1275 fprev = -1;
1276 }
1277
1278 for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
1279 if (fprev == -1) {
1280 fprev = i;
1281 continue;
1282 }
1283 FAIL_IF(push_inst(compiler, STP_F64 | VT(fprev) | VT2(i) | RN(SLJIT_SP) | offs));
1284 offs -= (sljit_ins)2 << 15;
1285 fprev = -1;
1286 }
1287
1288 if (fprev != -1)
1289 FAIL_IF(push_inst(compiler, STRI_F64 | VT(fprev) | RN(SLJIT_SP) | (offs >> 5) | (1 << 10)));
1290
1291 if (prev != -1)
1292 FAIL_IF(push_inst(compiler, STRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5) | ((fprev == -1) ? (1 << 10) : 0)));
1293
1294
1295 #ifdef _WIN32
1296 if (local_size > 4096)
1297 FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 10) | (1 << 22)));
1298 #endif /* _WIN32 */
1299
1300 if (!(options & SLJIT_ENTER_REG_ARG)) {
1301 arg_types >>= SLJIT_ARG_SHIFT;
1302 saved_arg_count = 0;
1303 tmp = SLJIT_R0;
1304
1305 while (arg_types) {
1306 if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
1307 if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
1308 FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S0 - saved_arg_count) | RM(tmp)));
1309 saved_arg_count++;
1310 }
1311 tmp++;
1312 }
1313 arg_types >>= SLJIT_ARG_SHIFT;
1314 }
1315 }
1316
1317 #ifdef _WIN32
1318 if (local_size > 4096) {
1319 if (local_size < 4 * 4096) {
1320 /* No need for a loop. */
1321
1322 if (local_size >= 2 * 4096) {
1323 if (local_size >= 3 * 4096) {
1324 FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(SLJIT_SP)));
1325 FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 10) | (1 << 22)));
1326 }
1327
1328 FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(SLJIT_SP)));
1329 FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 10) | (1 << 22)));
1330 }
1331 }
1332 else {
1333 FAIL_IF(push_inst(compiler, MOVZ | RD(TMP_REG1) | ((((sljit_ins)local_size >> 12) - 1) << 5)));
1334 FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(SLJIT_SP)));
1335 FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 10) | (1 << 22)));
1336 FAIL_IF(push_inst(compiler, SUBI | (1 << 29) | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10)));
1337 FAIL_IF(push_inst(compiler, B_CC | ((((sljit_ins) -3) & 0x7ffff) << 5) | 0x1 /* not-equal */));
1338 }
1339
1340 local_size &= 0xfff;
1341
1342 if (local_size > 0)
1343 FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(SLJIT_SP)));
1344 else
1345 FAIL_IF(push_inst(compiler, STP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP)));
1346 }
1347
1348 if (local_size > 0) {
1349 if (local_size <= 512)
1350 FAIL_IF(push_inst(compiler, STP_PRE | RT(TMP_FP) | RT2(TMP_LR)
1351 | RN(SLJIT_SP) | (sljit_ins)((-(local_size >> 3) & 0x7f) << 15)));
1352 else {
1353 if (local_size >= 4096)
1354 local_size = (1 << (22 - 10));
1355
1356 FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((sljit_ins)local_size << 10)));
1357 FAIL_IF(push_inst(compiler, STP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP)));
1358 }
1359 }
1360
1361 #else /* !_WIN32 */
1362
1363 /* The local_size does not include saved registers size. */
1364 if (local_size != 0) {
1365 if (local_size > 0xfff) {
1366 FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (((sljit_ins)local_size >> 12) << 10) | (1 << 22)));
1367 local_size &= 0xfff;
1368 }
1369
1370 if (local_size > 512 || local_size == 0) {
1371 if (local_size != 0)
1372 FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((sljit_ins)local_size << 10)));
1373
1374 FAIL_IF(push_inst(compiler, STP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP)));
1375 } else
1376 FAIL_IF(push_inst(compiler, STP_PRE | RT(TMP_FP) | RT2(TMP_LR)
1377 | RN(SLJIT_SP) | (sljit_ins)((-(local_size >> 3) & 0x7f) << 15)));
1378 }
1379
1380 #endif /* _WIN32 */
1381
1382 return push_inst(compiler, ADDI | RD(TMP_FP) | RN(SLJIT_SP) | (0 << 10));
1383 }
1384
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1385 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
1386 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1387 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1388 {
1389 sljit_s32 saved_regs_size;
1390
1391 CHECK_ERROR();
1392 CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1393 set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1394
1395 saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 2);
1396 saved_regs_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);
1397
1398 compiler->local_size = (local_size + saved_regs_size + 0xf) & ~0xf;
1399 return SLJIT_SUCCESS;
1400 }
1401
emit_stack_frame_release(struct sljit_compiler * compiler,sljit_s32 is_return_to)1402 static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to)
1403 {
1404 sljit_s32 local_size, prev, fprev, i, tmp;
1405 sljit_ins offs;
1406
1407 local_size = compiler->local_size;
1408
1409 if (!is_return_to) {
1410 if (local_size > 512 && local_size <= 512 + 496) {
1411 FAIL_IF(push_inst(compiler, LDP_POST | RT(TMP_FP) | RT2(TMP_LR)
1412 | RN(SLJIT_SP) | ((sljit_ins)(local_size - 512) << (15 - 3))));
1413 local_size = 512;
1414 } else
1415 FAIL_IF(push_inst(compiler, LDP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP)));
1416 } else {
1417 if (local_size > 512 && local_size <= 512 + 248) {
1418 FAIL_IF(push_inst(compiler, LDRI_POST | RT(TMP_FP) | RN(SLJIT_SP) | ((sljit_ins)(local_size - 512) << 12)));
1419 local_size = 512;
1420 } else
1421 FAIL_IF(push_inst(compiler, LDRI | RT(TMP_FP) | RN(SLJIT_SP) | 0));
1422 }
1423
1424 if (local_size > 512) {
1425 local_size -= 512;
1426 if (local_size > 0xfff) {
1427 FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP)
1428 | (((sljit_ins)local_size >> 12) << 10) | (1 << 22)));
1429 local_size &= 0xfff;
1430 }
1431
1432 FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((sljit_ins)local_size << 10)));
1433 local_size = 512;
1434 }
1435
1436 offs = (sljit_ins)(local_size - 2 * SSIZE_OF(sw)) << (15 - 3);
1437 prev = -1;
1438
1439 tmp = SLJIT_S0 - compiler->saveds;
1440 for (i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options); i > tmp; i--) {
1441 if (prev == -1) {
1442 prev = i;
1443 continue;
1444 }
1445 FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
1446 offs -= (sljit_ins)2 << 15;
1447 prev = -1;
1448 }
1449
1450 for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
1451 if (prev == -1) {
1452 prev = i;
1453 continue;
1454 }
1455 FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
1456 offs -= (sljit_ins)2 << 15;
1457 prev = -1;
1458 }
1459
1460 fprev = -1;
1461
1462 tmp = SLJIT_FS0 - compiler->fsaveds;
1463 for (i = SLJIT_FS0; i > tmp; i--) {
1464 if (fprev == -1) {
1465 fprev = i;
1466 continue;
1467 }
1468 FAIL_IF(push_inst(compiler, LDP_F64 | VT(fprev) | VT2(i) | RN(SLJIT_SP) | offs));
1469 offs -= (sljit_ins)2 << 15;
1470 fprev = -1;
1471 }
1472
1473 for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
1474 if (fprev == -1) {
1475 fprev = i;
1476 continue;
1477 }
1478 FAIL_IF(push_inst(compiler, LDP_F64 | VT(fprev) | VT2(i) | RN(SLJIT_SP) | offs));
1479 offs -= (sljit_ins)2 << 15;
1480 fprev = -1;
1481 }
1482
1483 if (fprev != -1)
1484 FAIL_IF(push_inst(compiler, LDRI_F64 | VT(fprev) | RN(SLJIT_SP) | (offs >> 5) | (1 << 10)));
1485
1486 if (prev != -1)
1487 FAIL_IF(push_inst(compiler, LDRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5) | ((fprev == -1) ? (1 << 10) : 0)));
1488
1489 /* This and the next call/jump instruction can be executed parallelly. */
1490 return push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | (sljit_ins)(local_size << 10));
1491 }
1492
sljit_emit_return_void(struct sljit_compiler * compiler)1493 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
1494 {
1495 CHECK_ERROR();
1496 CHECK(check_sljit_emit_return_void(compiler));
1497
1498 FAIL_IF(emit_stack_frame_release(compiler, 0));
1499
1500 return push_inst(compiler, RET | RN(TMP_LR));
1501 }
1502
sljit_emit_return_to(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)1503 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
1504 sljit_s32 src, sljit_sw srcw)
1505 {
1506 CHECK_ERROR();
1507 CHECK(check_sljit_emit_return_to(compiler, src, srcw));
1508
1509 if (src & SLJIT_MEM) {
1510 ADJUST_LOCAL_OFFSET(src, srcw);
1511 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
1512 src = TMP_REG1;
1513 srcw = 0;
1514 } else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
1515 FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src)));
1516 src = TMP_REG1;
1517 srcw = 0;
1518 }
1519
1520 FAIL_IF(emit_stack_frame_release(compiler, 1));
1521
1522 SLJIT_SKIP_CHECKS(compiler);
1523 return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1524 }
1525
1526 /* --------------------------------------------------------------------- */
1527 /* Operators */
1528 /* --------------------------------------------------------------------- */
1529
sljit_emit_op0(struct sljit_compiler * compiler,sljit_s32 op)1530 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
1531 {
1532 sljit_ins inv_bits = (op & SLJIT_32) ? W_OP : 0;
1533
1534 CHECK_ERROR();
1535 CHECK(check_sljit_emit_op0(compiler, op));
1536
1537 op = GET_OPCODE(op);
1538 switch (op) {
1539 case SLJIT_BREAKPOINT:
1540 return push_inst(compiler, BRK);
1541 case SLJIT_NOP:
1542 return push_inst(compiler, NOP);
1543 case SLJIT_LMUL_UW:
1544 case SLJIT_LMUL_SW:
1545 FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(SLJIT_R0)));
1546 FAIL_IF(push_inst(compiler, MADD | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO)));
1547 return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULH : SMULH) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1));
1548 case SLJIT_DIVMOD_UW:
1549 case SLJIT_DIVMOD_SW:
1550 FAIL_IF(push_inst(compiler, (MOV ^ inv_bits) | RD(TMP_REG1) | RM(SLJIT_R0)));
1551 FAIL_IF(push_inst(compiler, ((op == SLJIT_DIVMOD_UW ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1)));
1552 FAIL_IF(push_inst(compiler, (MADD ^ inv_bits) | RD(SLJIT_R1) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO)));
1553 return push_inst(compiler, (SUB ^ inv_bits) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1));
1554 case SLJIT_DIV_UW:
1555 case SLJIT_DIV_SW:
1556 return push_inst(compiler, ((op == SLJIT_DIV_UW ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1));
1557 case SLJIT_ENDBR:
1558 case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
1559 return SLJIT_SUCCESS;
1560 }
1561
1562 return SLJIT_SUCCESS;
1563 }
1564
sljit_emit_op1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1565 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1566 sljit_s32 dst, sljit_sw dstw,
1567 sljit_s32 src, sljit_sw srcw)
1568 {
1569 sljit_s32 dst_r, flags, mem_flags;
1570 sljit_s32 op_flags = GET_ALL_FLAGS(op);
1571
1572 CHECK_ERROR();
1573 CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1574 ADJUST_LOCAL_OFFSET(dst, dstw);
1575 ADJUST_LOCAL_OFFSET(src, srcw);
1576
1577 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1578
1579 op = GET_OPCODE(op);
1580 if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) {
1581 /* Both operands are registers. */
1582 if (FAST_IS_REG(dst) && FAST_IS_REG(src))
1583 return emit_op_imm(compiler, op | ((op_flags & SLJIT_32) ? INT_OP : 0), dst_r, TMP_REG1, src);
1584
1585 switch (op) {
1586 case SLJIT_MOV:
1587 case SLJIT_MOV_P:
1588 mem_flags = WORD_SIZE;
1589 break;
1590 case SLJIT_MOV_U8:
1591 mem_flags = BYTE_SIZE;
1592 if (src == SLJIT_IMM)
1593 srcw = (sljit_u8)srcw;
1594 break;
1595 case SLJIT_MOV_S8:
1596 mem_flags = BYTE_SIZE | SIGNED;
1597 if (src == SLJIT_IMM)
1598 srcw = (sljit_s8)srcw;
1599 break;
1600 case SLJIT_MOV_U16:
1601 mem_flags = HALF_SIZE;
1602 if (src == SLJIT_IMM)
1603 srcw = (sljit_u16)srcw;
1604 break;
1605 case SLJIT_MOV_S16:
1606 mem_flags = HALF_SIZE | SIGNED;
1607 if (src == SLJIT_IMM)
1608 srcw = (sljit_s16)srcw;
1609 break;
1610 case SLJIT_MOV_U32:
1611 mem_flags = INT_SIZE;
1612 if (src == SLJIT_IMM)
1613 srcw = (sljit_u32)srcw;
1614 break;
1615 case SLJIT_MOV_S32:
1616 case SLJIT_MOV32:
1617 mem_flags = INT_SIZE | SIGNED;
1618 if (src == SLJIT_IMM)
1619 srcw = (sljit_s32)srcw;
1620 break;
1621 default:
1622 SLJIT_UNREACHABLE();
1623 mem_flags = 0;
1624 break;
1625 }
1626
1627 if (src == SLJIT_IMM)
1628 FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG1, srcw));
1629 else if (!(src & SLJIT_MEM))
1630 dst_r = src;
1631 else
1632 FAIL_IF(emit_op_mem(compiler, mem_flags, dst_r, src, srcw, TMP_REG2));
1633
1634 if (dst & SLJIT_MEM)
1635 return emit_op_mem(compiler, mem_flags | STORE, dst_r, dst, dstw, TMP_REG2);
1636 return SLJIT_SUCCESS;
1637 }
1638
1639 flags = HAS_FLAGS(op_flags) ? SET_FLAGS : 0;
1640
1641 switch (op) {
1642 case SLJIT_REV_U16:
1643 case SLJIT_REV_S16:
1644 mem_flags = HALF_SIZE;
1645 break;
1646 case SLJIT_REV_U32:
1647 case SLJIT_REV_S32:
1648 mem_flags = INT_SIZE;
1649 break;
1650 default:
1651 mem_flags = WORD_SIZE;
1652
1653 if (op_flags & SLJIT_32) {
1654 flags |= INT_OP;
1655 mem_flags = INT_SIZE;
1656 }
1657 break;
1658 }
1659
1660 if (src & SLJIT_MEM) {
1661 FAIL_IF(emit_op_mem(compiler, mem_flags, TMP_REG2, src, srcw, TMP_REG2));
1662 src = TMP_REG2;
1663 }
1664
1665 emit_op_imm(compiler, flags | op, dst_r, TMP_REG1, src);
1666
1667 if (SLJIT_UNLIKELY(dst & SLJIT_MEM))
1668 return emit_op_mem(compiler, mem_flags | STORE, dst_r, dst, dstw, TMP_REG2);
1669 return SLJIT_SUCCESS;
1670 }
1671
sljit_emit_op2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1672 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
1673 sljit_s32 dst, sljit_sw dstw,
1674 sljit_s32 src1, sljit_sw src1w,
1675 sljit_s32 src2, sljit_sw src2w)
1676 {
1677 sljit_s32 dst_r, flags, mem_flags;
1678
1679 CHECK_ERROR();
1680 CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
1681 ADJUST_LOCAL_OFFSET(dst, dstw);
1682 ADJUST_LOCAL_OFFSET(src1, src1w);
1683 ADJUST_LOCAL_OFFSET(src2, src2w);
1684
1685 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1686 flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
1687 mem_flags = WORD_SIZE;
1688
1689 if (op & SLJIT_32) {
1690 flags |= INT_OP;
1691 mem_flags = INT_SIZE;
1692 }
1693
1694 if (dst == TMP_REG2)
1695 flags |= UNUSED_RETURN;
1696
1697 if (src1 & SLJIT_MEM) {
1698 FAIL_IF(emit_op_mem(compiler, mem_flags, TMP_REG1, src1, src1w, TMP_REG1));
1699 src1 = TMP_REG1;
1700 }
1701
1702 if (src2 & SLJIT_MEM) {
1703 FAIL_IF(emit_op_mem(compiler, mem_flags, TMP_REG2, src2, src2w, TMP_REG2));
1704 src2 = TMP_REG2;
1705 }
1706
1707 if (src1 == SLJIT_IMM)
1708 flags |= ARG1_IMM;
1709 else
1710 src1w = src1;
1711
1712 if (src2 == SLJIT_IMM)
1713 flags |= ARG2_IMM;
1714 else
1715 src2w = src2;
1716
1717 emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src1w, src2w);
1718
1719 if (dst & SLJIT_MEM)
1720 return emit_op_mem(compiler, mem_flags | STORE, dst_r, dst, dstw, TMP_REG2);
1721 return SLJIT_SUCCESS;
1722 }
1723
sljit_emit_op2u(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1724 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
1725 sljit_s32 src1, sljit_sw src1w,
1726 sljit_s32 src2, sljit_sw src2w)
1727 {
1728 CHECK_ERROR();
1729 CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
1730
1731 SLJIT_SKIP_CHECKS(compiler);
1732 return sljit_emit_op2(compiler, op, TMP_REG2, 0, src1, src1w, src2, src2w);
1733 }
1734
sljit_emit_op2r(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1735 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op,
1736 sljit_s32 dst_reg,
1737 sljit_s32 src1, sljit_sw src1w,
1738 sljit_s32 src2, sljit_sw src2w)
1739 {
1740 CHECK_ERROR();
1741 CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w));
1742
1743 switch (GET_OPCODE(op)) {
1744 case SLJIT_MULADD:
1745 SLJIT_SKIP_CHECKS(compiler);
1746 return sljit_emit_op2(compiler, op, dst_reg, 0, src1, src1w, src2, src2w);
1747 }
1748
1749 return SLJIT_SUCCESS;
1750 }
1751
sljit_emit_shift_into(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 src1_reg,sljit_s32 src2_reg,sljit_s32 src3,sljit_sw src3w)1752 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
1753 sljit_s32 dst_reg,
1754 sljit_s32 src1_reg,
1755 sljit_s32 src2_reg,
1756 sljit_s32 src3, sljit_sw src3w)
1757 {
1758 sljit_ins inv_bits, imm;
1759 sljit_s32 is_left;
1760 sljit_sw mask;
1761
1762 CHECK_ERROR();
1763 CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
1764
1765 is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL);
1766
1767 if (src1_reg == src2_reg) {
1768 SLJIT_SKIP_CHECKS(compiler);
1769 return sljit_emit_op2(compiler, (is_left ? SLJIT_ROTL : SLJIT_ROTR) | (op & SLJIT_32), dst_reg, 0, src1_reg, 0, src3, src3w);
1770 }
1771
1772 ADJUST_LOCAL_OFFSET(src3, src3w);
1773
1774 inv_bits = (op & SLJIT_32) ? W_OP : 0;
1775
1776 if (src3 == SLJIT_IMM) {
1777 mask = inv_bits ? 0x1f : 0x3f;
1778 src3w &= mask;
1779
1780 if (src3w == 0)
1781 return SLJIT_SUCCESS;
1782
1783 if (is_left)
1784 src3w = (src3w ^ mask) + 1;
1785
1786 return push_inst(compiler, (EXTR ^ (inv_bits | (inv_bits >> 9))) | RD(dst_reg)
1787 | RN(is_left ? src1_reg : src2_reg) | RM(is_left ? src2_reg : src1_reg) | ((sljit_ins)src3w << 10));
1788 }
1789
1790 if (src3 & SLJIT_MEM) {
1791 FAIL_IF(emit_op_mem(compiler, inv_bits ? INT_SIZE : WORD_SIZE, TMP_REG2, src3, src3w, TMP_REG2));
1792 src3 = TMP_REG2;
1793 } else if (dst_reg == src3) {
1794 FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG2) | RM(src3)));
1795 src3 = TMP_REG2;
1796 }
1797
1798 FAIL_IF(push_inst(compiler, ((is_left ? LSLV : LSRV) ^ inv_bits) | RD(dst_reg) | RN(src1_reg) | RM(src3)));
1799
1800 if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) {
1801 /* Shift left/right by 1. */
1802 if (is_left)
1803 imm = (sljit_ins)(inv_bits ? ((1 << 16) | (31 << 10)) : ((1 << 16) | (63 << 10) | (1 << 22)));
1804 else
1805 imm = (sljit_ins)(inv_bits ? ((31 << 16) | (30 << 10)) : ((63 << 16) | (62 << 10) | (1 << 22)));
1806
1807 FAIL_IF(push_inst(compiler, (UBFM ^ (inv_bits | (inv_bits >> 9))) | RD(TMP_REG1) | RN(src2_reg) | imm));
1808
1809 /* Set imm to mask. */
1810 imm = (sljit_ins)(inv_bits ? (4 << 10) : ((5 << 10) | (1 << 22)));
1811 FAIL_IF(push_inst(compiler, (EORI ^ inv_bits) | RD(TMP_REG2) | RN(src3) | imm));
1812
1813 src2_reg = TMP_REG1;
1814 } else
1815 FAIL_IF(push_inst(compiler, (SUB ^ inv_bits) | RD(TMP_REG2) | RN(TMP_ZERO) | RM(src3)));
1816
1817 FAIL_IF(push_inst(compiler, ((is_left ? LSRV : LSLV) ^ inv_bits) | RD(TMP_REG1) | RN(src2_reg) | RM(TMP_REG2)));
1818 return push_inst(compiler, (ORR ^ inv_bits) | RD(dst_reg) | RN(dst_reg) | RM(TMP_REG1));
1819 }
1820
sljit_emit_op_src(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)1821 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
1822 sljit_s32 src, sljit_sw srcw)
1823 {
1824 CHECK_ERROR();
1825 CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
1826 ADJUST_LOCAL_OFFSET(src, srcw);
1827
1828 switch (op) {
1829 case SLJIT_FAST_RETURN:
1830 if (FAST_IS_REG(src))
1831 FAIL_IF(push_inst(compiler, MOV | RD(TMP_LR) | RM(src)));
1832 else
1833 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_LR, src, srcw, TMP_REG1));
1834
1835 return push_inst(compiler, RET | RN(TMP_LR));
1836 case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
1837 return SLJIT_SUCCESS;
1838 case SLJIT_PREFETCH_L1:
1839 case SLJIT_PREFETCH_L2:
1840 case SLJIT_PREFETCH_L3:
1841 case SLJIT_PREFETCH_ONCE:
1842 SLJIT_ASSERT(reg_map[1] == 0 && reg_map[3] == 2 && reg_map[5] == 4);
1843
1844 /* The reg_map[op] should provide the appropriate constant. */
1845 if (op == SLJIT_PREFETCH_L1)
1846 op = 1;
1847 else if (op == SLJIT_PREFETCH_L2)
1848 op = 3;
1849 else if (op == SLJIT_PREFETCH_L3)
1850 op = 5;
1851 else
1852 op = 2;
1853
1854 /* Signed word sized load is the prefetch instruction. */
1855 return emit_op_mem(compiler, WORD_SIZE | SIGNED, op, src, srcw, TMP_REG1);
1856 }
1857
1858 return SLJIT_SUCCESS;
1859 }
1860
sljit_emit_op_dst(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw)1861 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
1862 sljit_s32 dst, sljit_sw dstw)
1863 {
1864 sljit_s32 dst_r = TMP_LR;
1865
1866 CHECK_ERROR();
1867 CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
1868 ADJUST_LOCAL_OFFSET(dst, dstw);
1869
1870 switch (op) {
1871 case SLJIT_FAST_ENTER:
1872 if (FAST_IS_REG(dst))
1873 return push_inst(compiler, MOV | RD(dst) | RM(TMP_LR));
1874 break;
1875 case SLJIT_GET_RETURN_ADDRESS:
1876 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1877 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, dst_r, SLJIT_MEM1(SLJIT_SP), 0x8, TMP_REG2));
1878 break;
1879 }
1880
1881 if (dst & SLJIT_MEM)
1882 return emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2);
1883
1884 return SLJIT_SUCCESS;
1885 }
1886
sljit_get_register_index(sljit_s32 type,sljit_s32 reg)1887 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
1888 {
1889 CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
1890
1891 if (type == SLJIT_GP_REGISTER)
1892 return reg_map[reg];
1893
1894 if (type != SLJIT_FLOAT_REGISTER && type != SLJIT_SIMD_REG_64 && type != SLJIT_SIMD_REG_128)
1895 return -1;
1896
1897 return freg_map[reg];
1898 }
1899
sljit_emit_op_custom(struct sljit_compiler * compiler,void * instruction,sljit_u32 size)1900 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
1901 void *instruction, sljit_u32 size)
1902 {
1903 SLJIT_UNUSED_ARG(size);
1904 CHECK_ERROR();
1905 CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
1906
1907 return push_inst(compiler, *(sljit_ins*)instruction);
1908 }
1909
1910 /* --------------------------------------------------------------------- */
1911 /* Floating point operators */
1912 /* --------------------------------------------------------------------- */
1913
emit_fop_mem(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw)1914 static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
1915 {
1916 sljit_u32 shift = MEM_SIZE_SHIFT(flags);
1917 sljit_ins type = (shift << 30);
1918
1919 SLJIT_ASSERT(arg & SLJIT_MEM);
1920
1921 if (!(flags & STORE))
1922 type |= 0x00400000;
1923
1924 if (arg & OFFS_REG_MASK) {
1925 argw &= 3;
1926 if (argw == 0 || argw == shift)
1927 return push_inst(compiler, STR_FR | type | VT(reg)
1928 | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw ? (1 << 12) : 0));
1929
1930 FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG2) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | ((sljit_ins)argw << 10)));
1931 return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG2));
1932 }
1933
1934 arg &= REG_MASK;
1935
1936 if (!arg) {
1937 FAIL_IF(load_immediate(compiler, TMP_REG2, argw & ~(0xfff << shift)));
1938
1939 argw = (argw >> shift) & 0xfff;
1940
1941 return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG2) | ((sljit_ins)argw << 10));
1942 }
1943
1944 if (argw >= 0 && (argw & ((1 << shift) - 1)) == 0) {
1945 if ((argw >> shift) <= 0xfff)
1946 return push_inst(compiler, STR_FI | type | VT(reg) | RN(arg) | ((sljit_ins)argw << (10 - shift)));
1947
1948 if (argw <= 0xffffff) {
1949 FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(TMP_REG2) | RN(arg) | (((sljit_ins)argw >> 12) << 10)));
1950
1951 argw = ((argw & 0xfff) >> shift);
1952 return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG2) | ((sljit_ins)argw << 10));
1953 }
1954 }
1955
1956 if (argw <= 255 && argw >= -256)
1957 return push_inst(compiler, STUR_FI | type | VT(reg) | RN(arg) | (((sljit_ins)argw & 0x1ff) << 12));
1958
1959 FAIL_IF(load_immediate(compiler, TMP_REG2, argw));
1960 return push_inst(compiler, STR_FR | type | VT(reg) | RN(arg) | RM(TMP_REG2));
1961 }
1962
sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1963 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
1964 sljit_s32 dst, sljit_sw dstw,
1965 sljit_s32 src, sljit_sw srcw)
1966 {
1967 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1968 sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0;
1969
1970 if (GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64)
1971 inv_bits |= W_OP;
1972
1973 if (src & SLJIT_MEM) {
1974 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src, srcw));
1975 src = TMP_FREG1;
1976 }
1977
1978 FAIL_IF(push_inst(compiler, (FCVTZS ^ inv_bits) | RD(dst_r) | VN(src)));
1979
1980 if (dst & SLJIT_MEM)
1981 return emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64) ? INT_SIZE : WORD_SIZE) | STORE, TMP_REG1, dst, dstw, TMP_REG2);
1982 return SLJIT_SUCCESS;
1983 }
1984
sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1985 static sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_ins ins,
1986 sljit_s32 dst, sljit_sw dstw,
1987 sljit_s32 src, sljit_sw srcw)
1988 {
1989 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
1990
1991 if (src & SLJIT_MEM) {
1992 emit_op_mem(compiler, (ins & W_OP) ? WORD_SIZE : INT_SIZE, TMP_REG1, src, srcw, TMP_REG1);
1993 src = TMP_REG1;
1994 } else if (src == SLJIT_IMM) {
1995 FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
1996 src = TMP_REG1;
1997 }
1998
1999 FAIL_IF(push_inst(compiler, ins | VD(dst_r) | RN(src)));
2000
2001 if (dst & SLJIT_MEM)
2002 return emit_fop_mem(compiler, ((ins & (1 << 22)) ? WORD_SIZE : INT_SIZE) | STORE, TMP_FREG1, dst, dstw);
2003 return SLJIT_SUCCESS;
2004 }
2005
sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2006 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
2007 sljit_s32 dst, sljit_sw dstw,
2008 sljit_s32 src, sljit_sw srcw)
2009 {
2010 sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0;
2011
2012 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) {
2013 inv_bits |= W_OP;
2014
2015 if (src == SLJIT_IMM)
2016 srcw = (sljit_s32)srcw;
2017 }
2018
2019 return sljit_emit_fop1_conv_f64_from_w(compiler, SCVTF ^ inv_bits, dst, dstw, src, srcw);
2020 }
2021
sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2022 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
2023 sljit_s32 dst, sljit_sw dstw,
2024 sljit_s32 src, sljit_sw srcw)
2025 {
2026 sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0;
2027
2028 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32) {
2029 inv_bits |= W_OP;
2030
2031 if (src == SLJIT_IMM)
2032 srcw = (sljit_u32)srcw;
2033 }
2034
2035 return sljit_emit_fop1_conv_f64_from_w(compiler, UCVTF ^ inv_bits, dst, dstw, src, srcw);
2036 }
2037
sljit_emit_fop1_cmp(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2038 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
2039 sljit_s32 src1, sljit_sw src1w,
2040 sljit_s32 src2, sljit_sw src2w)
2041 {
2042 sljit_s32 mem_flags = (op & SLJIT_32) ? INT_SIZE : WORD_SIZE;
2043 sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0;
2044
2045 if (src1 & SLJIT_MEM) {
2046 FAIL_IF(emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w));
2047 src1 = TMP_FREG1;
2048 }
2049
2050 if (src2 & SLJIT_MEM) {
2051 FAIL_IF(emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w));
2052 src2 = TMP_FREG2;
2053 }
2054
2055 FAIL_IF(push_inst(compiler, (FCMP ^ inv_bits) | VN(src1) | VM(src2)));
2056
2057 if (GET_FLAG_TYPE(op) != SLJIT_UNORDERED_OR_EQUAL)
2058 return SLJIT_SUCCESS;
2059
2060 FAIL_IF(push_inst(compiler, CSINC | (0x0 << 12) | RD(TMP_REG1) | RN(TMP_ZERO) | RM(TMP_ZERO)));
2061 return push_inst(compiler, CCMPI | (0x0 << 16) | (0x7 << 12) | RN(TMP_REG1) | 0x4);
2062 }
2063
sljit_emit_fop1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2064 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
2065 sljit_s32 dst, sljit_sw dstw,
2066 sljit_s32 src, sljit_sw srcw)
2067 {
2068 sljit_s32 dst_r, mem_flags = (op & SLJIT_32) ? INT_SIZE : WORD_SIZE;
2069 sljit_ins inv_bits;
2070
2071 CHECK_ERROR();
2072
2073 SLJIT_COMPILE_ASSERT((INT_SIZE ^ 0x1) == WORD_SIZE, must_be_one_bit_difference);
2074 SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2075
2076 inv_bits = (op & SLJIT_32) ? (1 << 22) : 0;
2077 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2078
2079 if (src & SLJIT_MEM) {
2080 FAIL_IF(emit_fop_mem(compiler, (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) ? (mem_flags ^ 0x1) : mem_flags, dst_r, src, srcw));
2081 src = dst_r;
2082 }
2083
2084 switch (GET_OPCODE(op)) {
2085 case SLJIT_MOV_F64:
2086 if (src != dst_r) {
2087 if (!(dst & SLJIT_MEM))
2088 FAIL_IF(push_inst(compiler, (FMOV ^ inv_bits) | VD(dst_r) | VN(src)));
2089 else
2090 dst_r = src;
2091 }
2092 break;
2093 case SLJIT_NEG_F64:
2094 FAIL_IF(push_inst(compiler, (FNEG ^ inv_bits) | VD(dst_r) | VN(src)));
2095 break;
2096 case SLJIT_ABS_F64:
2097 FAIL_IF(push_inst(compiler, (FABS ^ inv_bits) | VD(dst_r) | VN(src)));
2098 break;
2099 case SLJIT_CONV_F64_FROM_F32:
2100 FAIL_IF(push_inst(compiler, FCVT | (sljit_ins)((op & SLJIT_32) ? (1 << 22) : (1 << 15)) | VD(dst_r) | VN(src)));
2101 break;
2102 }
2103
2104 if (dst & SLJIT_MEM)
2105 return emit_fop_mem(compiler, mem_flags | STORE, dst_r, dst, dstw);
2106 return SLJIT_SUCCESS;
2107 }
2108
sljit_emit_fop2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2109 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
2110 sljit_s32 dst, sljit_sw dstw,
2111 sljit_s32 src1, sljit_sw src1w,
2112 sljit_s32 src2, sljit_sw src2w)
2113 {
2114 sljit_s32 dst_r, mem_flags = (op & SLJIT_32) ? INT_SIZE : WORD_SIZE;
2115 sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0;
2116
2117 CHECK_ERROR();
2118 CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2119 ADJUST_LOCAL_OFFSET(dst, dstw);
2120 ADJUST_LOCAL_OFFSET(src1, src1w);
2121 ADJUST_LOCAL_OFFSET(src2, src2w);
2122
2123 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2124 if (src1 & SLJIT_MEM) {
2125 FAIL_IF(emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w));
2126 src1 = TMP_FREG1;
2127 }
2128 if (src2 & SLJIT_MEM) {
2129 FAIL_IF(emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w));
2130 src2 = TMP_FREG2;
2131 }
2132
2133 switch (GET_OPCODE(op)) {
2134 case SLJIT_ADD_F64:
2135 FAIL_IF(push_inst(compiler, (FADD ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2)));
2136 break;
2137 case SLJIT_SUB_F64:
2138 FAIL_IF(push_inst(compiler, (FSUB ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2)));
2139 break;
2140 case SLJIT_MUL_F64:
2141 FAIL_IF(push_inst(compiler, (FMUL ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2)));
2142 break;
2143 case SLJIT_DIV_F64:
2144 FAIL_IF(push_inst(compiler, (FDIV ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2)));
2145 break;
2146 case SLJIT_COPYSIGN_F64:
2147 FAIL_IF(push_inst(compiler, (FMOV_R ^ ((op & SLJIT_32) ? (W_OP | (1 << 22)) : 0)) | VN(src2) | RD(TMP_REG1)));
2148 FAIL_IF(push_inst(compiler, (FABS ^ inv_bits) | VD(dst_r) | VN(src1)));
2149 FAIL_IF(push_inst(compiler, TBZ | ((op & SLJIT_32) ? 0 : ((sljit_ins)1 << 31)) | (0x1f << 19) | (2 << 5) | RT(TMP_REG1)));
2150 return push_inst(compiler, (FNEG ^ inv_bits) | VD(dst_r) | VN(dst_r));
2151 }
2152
2153 if (!(dst & SLJIT_MEM))
2154 return SLJIT_SUCCESS;
2155 return emit_fop_mem(compiler, mem_flags | STORE, TMP_FREG1, dst, dstw);
2156 }
2157
sljit_emit_fset32(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f32 value)2158 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
2159 sljit_s32 freg, sljit_f32 value)
2160 {
2161 sljit_u32 exp;
2162 union {
2163 sljit_u32 imm;
2164 sljit_f32 value;
2165 } u;
2166
2167 CHECK_ERROR();
2168 CHECK(check_sljit_emit_fset32(compiler, freg, value));
2169
2170 u.value = value;
2171
2172 if (u.imm == 0)
2173 return push_inst(compiler, (FMOV_R ^ (W_OP | (1 << 22))) | RN(TMP_ZERO) | VD(freg) | (1 << 16));
2174
2175 if ((u.imm << (32 - 19)) == 0) {
2176 exp = (u.imm >> (23 + 2)) & 0x3f;
2177
2178 if (exp == 0x20 || exp == 0x1f)
2179 return push_inst(compiler, (FMOV_I ^ (1 << 22)) | (sljit_ins)((((u.imm >> 24) & 0x80) | ((u.imm >> 19) & 0x7f)) << 13) | VD(freg));
2180 }
2181
2182 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_s32)u.imm));
2183 return push_inst(compiler, (FMOV_R ^ (W_OP | (1 << 22))) | RN(TMP_REG1) | VD(freg) | (1 << 16));
2184 }
2185
sljit_emit_fset64(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f64 value)2186 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
2187 sljit_s32 freg, sljit_f64 value)
2188 {
2189 sljit_uw exp;
2190 union {
2191 sljit_uw imm;
2192 sljit_f64 value;
2193 } u;
2194
2195 CHECK_ERROR();
2196 CHECK(check_sljit_emit_fset64(compiler, freg, value));
2197
2198 u.value = value;
2199
2200 if (u.imm == 0)
2201 return push_inst(compiler, FMOV_R | RN(TMP_ZERO) | VD(freg) | (sljit_ins)1 << 16);
2202
2203 if ((u.imm << (64 - 48)) == 0) {
2204 exp = (u.imm >> (52 + 2)) & 0x1ff;
2205
2206 if (exp == 0x100 || exp == 0xff)
2207 return push_inst(compiler, FMOV_I | (sljit_ins)((((u.imm >> 56) & 0x80) | ((u.imm >> 48) & 0x7f)) << 13) | VD(freg));
2208 }
2209
2210 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_sw)u.imm));
2211 return push_inst(compiler, FMOV_R | RN(TMP_REG1) | VD(freg) | (1 << 16));
2212 }
2213
sljit_emit_fcopy(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 freg,sljit_s32 reg)2214 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
2215 sljit_s32 freg, sljit_s32 reg)
2216 {
2217 sljit_ins inst;
2218
2219 CHECK_ERROR();
2220 CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
2221
2222 if (GET_OPCODE(op) == SLJIT_COPY_TO_F64)
2223 inst = FMOV_R | RN(reg) | VD(freg) | (1 << 16);
2224 else
2225 inst = FMOV_R | VN(freg) | RD(reg);
2226
2227 if (op & SLJIT_32)
2228 inst ^= W_OP | (1 << 22);
2229
2230 return push_inst(compiler, inst);
2231 }
2232
2233 /* --------------------------------------------------------------------- */
2234 /* Conditional instructions */
2235 /* --------------------------------------------------------------------- */
2236
get_cc(struct sljit_compiler * compiler,sljit_s32 type)2237 static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type)
2238 {
2239 switch (type) {
2240 case SLJIT_EQUAL:
2241 case SLJIT_ATOMIC_STORED:
2242 case SLJIT_F_EQUAL:
2243 case SLJIT_ORDERED_EQUAL:
2244 case SLJIT_UNORDERED_OR_EQUAL:
2245 return 0x1;
2246
2247 case SLJIT_NOT_EQUAL:
2248 case SLJIT_ATOMIC_NOT_STORED:
2249 case SLJIT_F_NOT_EQUAL:
2250 case SLJIT_UNORDERED_OR_NOT_EQUAL:
2251 case SLJIT_ORDERED_NOT_EQUAL:
2252 return 0x0;
2253
2254 case SLJIT_CARRY:
2255 if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD)
2256 return 0x3;
2257 /* fallthrough */
2258
2259 case SLJIT_LESS:
2260 return 0x2;
2261
2262 case SLJIT_NOT_CARRY:
2263 if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD)
2264 return 0x2;
2265 /* fallthrough */
2266
2267 case SLJIT_GREATER_EQUAL:
2268 return 0x3;
2269
2270 case SLJIT_GREATER:
2271 case SLJIT_UNORDERED_OR_GREATER:
2272 return 0x9;
2273
2274 case SLJIT_LESS_EQUAL:
2275 case SLJIT_F_LESS_EQUAL:
2276 case SLJIT_ORDERED_LESS_EQUAL:
2277 return 0x8;
2278
2279 case SLJIT_SIG_LESS:
2280 case SLJIT_UNORDERED_OR_LESS:
2281 return 0xa;
2282
2283 case SLJIT_SIG_GREATER_EQUAL:
2284 case SLJIT_F_GREATER_EQUAL:
2285 case SLJIT_ORDERED_GREATER_EQUAL:
2286 return 0xb;
2287
2288 case SLJIT_SIG_GREATER:
2289 case SLJIT_F_GREATER:
2290 case SLJIT_ORDERED_GREATER:
2291 return 0xd;
2292
2293 case SLJIT_SIG_LESS_EQUAL:
2294 case SLJIT_UNORDERED_OR_LESS_EQUAL:
2295 return 0xc;
2296
2297 case SLJIT_OVERFLOW:
2298 if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)))
2299 return 0x0;
2300 /* fallthrough */
2301
2302 case SLJIT_UNORDERED:
2303 return 0x7;
2304
2305 case SLJIT_NOT_OVERFLOW:
2306 if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)))
2307 return 0x1;
2308 /* fallthrough */
2309
2310 case SLJIT_ORDERED:
2311 return 0x6;
2312
2313 case SLJIT_F_LESS:
2314 case SLJIT_ORDERED_LESS:
2315 return 0x5;
2316
2317 case SLJIT_UNORDERED_OR_GREATER_EQUAL:
2318 return 0x4;
2319
2320 default:
2321 SLJIT_UNREACHABLE();
2322 return 0xe;
2323 }
2324 }
2325
sljit_emit_label(struct sljit_compiler * compiler)2326 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2327 {
2328 struct sljit_label *label;
2329
2330 CHECK_ERROR_PTR();
2331 CHECK_PTR(check_sljit_emit_label(compiler));
2332
2333 if (compiler->last_label && compiler->last_label->size == compiler->size)
2334 return compiler->last_label;
2335
2336 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2337 PTR_FAIL_IF(!label);
2338 set_label(label, compiler);
2339 return label;
2340 }
2341
sljit_emit_jump(struct sljit_compiler * compiler,sljit_s32 type)2342 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
2343 {
2344 struct sljit_jump *jump;
2345
2346 CHECK_ERROR_PTR();
2347 CHECK_PTR(check_sljit_emit_jump(compiler, type));
2348
2349 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2350 PTR_FAIL_IF(!jump);
2351 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2352 type &= 0xff;
2353
2354 if (type < SLJIT_JUMP) {
2355 jump->flags |= IS_COND;
2356 PTR_FAIL_IF(push_inst(compiler, B_CC | (6 << 5) | get_cc(compiler, type)));
2357 } else if (type >= SLJIT_FAST_CALL)
2358 jump->flags |= IS_BL;
2359
2360 jump->addr = compiler->size;
2361 PTR_FAIL_IF(push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG2)));
2362
2363 /* Maximum number of instructions required for generating a constant. */
2364 compiler->size += JUMP_MAX_SIZE - 1;
2365 return jump;
2366 }
2367
sljit_emit_call(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types)2368 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
2369 sljit_s32 arg_types)
2370 {
2371 SLJIT_UNUSED_ARG(arg_types);
2372 CHECK_ERROR_PTR();
2373 CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
2374
2375 if (type & SLJIT_CALL_RETURN) {
2376 PTR_FAIL_IF(emit_stack_frame_release(compiler, 0));
2377 type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
2378 }
2379
2380 SLJIT_SKIP_CHECKS(compiler);
2381 return sljit_emit_jump(compiler, type);
2382 }
2383
emit_cmp_to0(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 src,sljit_sw srcw)2384 static SLJIT_INLINE struct sljit_jump* emit_cmp_to0(struct sljit_compiler *compiler, sljit_s32 type,
2385 sljit_s32 src, sljit_sw srcw)
2386 {
2387 struct sljit_jump *jump;
2388 sljit_ins inv_bits = (type & SLJIT_32) ? W_OP : 0;
2389
2390 SLJIT_ASSERT((type & 0xff) == SLJIT_EQUAL || (type & 0xff) == SLJIT_NOT_EQUAL);
2391 ADJUST_LOCAL_OFFSET(src, srcw);
2392
2393 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2394 PTR_FAIL_IF(!jump);
2395 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2396 jump->flags |= IS_CBZ | IS_COND;
2397
2398 if (src & SLJIT_MEM) {
2399 PTR_FAIL_IF(emit_op_mem(compiler, inv_bits ? INT_SIZE : WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
2400 src = TMP_REG1;
2401 }
2402 else if (src == SLJIT_IMM) {
2403 PTR_FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
2404 src = TMP_REG1;
2405 }
2406
2407 SLJIT_ASSERT(FAST_IS_REG(src));
2408
2409 if ((type & 0xff) == SLJIT_EQUAL)
2410 inv_bits |= 1 << 24;
2411
2412 PTR_FAIL_IF(push_inst(compiler, (CBZ ^ inv_bits) | (6 << 5) | RT(src)));
2413 jump->addr = compiler->size;
2414 PTR_FAIL_IF(push_inst(compiler, BR | RN(TMP_REG2)));
2415
2416 /* Maximum number of instructions required for generating a constant. */
2417 compiler->size += JUMP_MAX_SIZE - 1;
2418 return jump;
2419 }
2420
sljit_emit_ijump(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 src,sljit_sw srcw)2421 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
2422 {
2423 struct sljit_jump *jump;
2424
2425 CHECK_ERROR();
2426 CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
2427
2428 if (src != SLJIT_IMM) {
2429 if (src & SLJIT_MEM) {
2430 ADJUST_LOCAL_OFFSET(src, srcw);
2431 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, src, srcw, TMP_REG2));
2432 src = TMP_REG2;
2433 }
2434 return push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(src));
2435 }
2436
2437 /* These jumps are converted to jump/call instructions when possible. */
2438 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2439 FAIL_IF(!jump);
2440 set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0));
2441 jump->u.target = (sljit_uw)srcw;
2442
2443 jump->addr = compiler->size;
2444 /* Maximum number of instructions required for generating a constant. */
2445 compiler->size += JUMP_MAX_SIZE - 1;
2446 return push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG2));
2447 }
2448
sljit_emit_icall(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)2449 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
2450 sljit_s32 arg_types,
2451 sljit_s32 src, sljit_sw srcw)
2452 {
2453 SLJIT_UNUSED_ARG(arg_types);
2454 CHECK_ERROR();
2455 CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
2456
2457 if (src & SLJIT_MEM) {
2458 ADJUST_LOCAL_OFFSET(src, srcw);
2459 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
2460 src = TMP_REG1;
2461 }
2462
2463 if (type & SLJIT_CALL_RETURN) {
2464 if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
2465 FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src)));
2466 src = TMP_REG1;
2467 }
2468
2469 FAIL_IF(emit_stack_frame_release(compiler, 0));
2470 type = SLJIT_JUMP;
2471 }
2472
2473 SLJIT_SKIP_CHECKS(compiler);
2474 return sljit_emit_ijump(compiler, type, src, srcw);
2475 }
2476
sljit_emit_op_flags(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 type)2477 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
2478 sljit_s32 dst, sljit_sw dstw,
2479 sljit_s32 type)
2480 {
2481 sljit_s32 dst_r, src_r, flags, mem_flags;
2482 sljit_ins cc;
2483
2484 CHECK_ERROR();
2485 CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
2486 ADJUST_LOCAL_OFFSET(dst, dstw);
2487
2488 cc = get_cc(compiler, type);
2489 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
2490
2491 if (GET_OPCODE(op) < SLJIT_ADD) {
2492 FAIL_IF(push_inst(compiler, CSINC | (cc << 12) | RD(dst_r) | RN(TMP_ZERO) | RM(TMP_ZERO)));
2493
2494 if (dst & SLJIT_MEM) {
2495 mem_flags = (GET_OPCODE(op) == SLJIT_MOV ? WORD_SIZE : INT_SIZE) | STORE;
2496 return emit_op_mem(compiler, mem_flags, TMP_REG1, dst, dstw, TMP_REG2);
2497 }
2498
2499 return SLJIT_SUCCESS;
2500 }
2501
2502 flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
2503 mem_flags = WORD_SIZE;
2504
2505 if (op & SLJIT_32) {
2506 flags |= INT_OP;
2507 mem_flags = INT_SIZE;
2508 }
2509
2510 src_r = dst;
2511
2512 if (dst & SLJIT_MEM) {
2513 FAIL_IF(emit_op_mem(compiler, mem_flags, TMP_REG1, dst, dstw, TMP_REG1));
2514 src_r = TMP_REG1;
2515 }
2516
2517 FAIL_IF(push_inst(compiler, CSINC | (cc << 12) | RD(TMP_REG2) | RN(TMP_ZERO) | RM(TMP_ZERO)));
2518 emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src_r, TMP_REG2);
2519
2520 if (dst & SLJIT_MEM)
2521 return emit_op_mem(compiler, mem_flags | STORE, TMP_REG1, dst, dstw, TMP_REG2);
2522 return SLJIT_SUCCESS;
2523 }
2524
sljit_emit_select(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_reg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2_reg)2525 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
2526 sljit_s32 dst_reg,
2527 sljit_s32 src1, sljit_sw src1w,
2528 sljit_s32 src2_reg)
2529 {
2530 sljit_ins inv_bits = (type & SLJIT_32) ? W_OP : 0;
2531 sljit_ins cc;
2532
2533 CHECK_ERROR();
2534 CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
2535
2536 ADJUST_LOCAL_OFFSET(src1, src1w);
2537
2538 if (src1 == SLJIT_IMM) {
2539 if (type & SLJIT_32)
2540 src1w = (sljit_s32)src1w;
2541 FAIL_IF(load_immediate(compiler, TMP_REG2, src1w));
2542 src1 = TMP_REG2;
2543 } else if (src1 & SLJIT_MEM) {
2544 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG2, src1, src1w, TMP_REG2));
2545 src1 = TMP_REG2;
2546 }
2547
2548 cc = get_cc(compiler, type & ~SLJIT_32);
2549 return push_inst(compiler, (CSEL ^ inv_bits) | (cc << 12) | RD(dst_reg) | RN(src2_reg) | RM(src1));
2550 }
2551
sljit_emit_fselect(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_freg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2_freg)2552 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
2553 sljit_s32 dst_freg,
2554 sljit_s32 src1, sljit_sw src1w,
2555 sljit_s32 src2_freg)
2556 {
2557 sljit_ins inv_bits = (type & SLJIT_32) ? (1 << 22) : 0;
2558 sljit_ins cc;
2559
2560 CHECK_ERROR();
2561 CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
2562
2563 ADJUST_LOCAL_OFFSET(src1, src1w);
2564
2565 if (src1 & SLJIT_MEM) {
2566 FAIL_IF(emit_fop_mem(compiler, (type & SLJIT_32) ? INT_SIZE : WORD_SIZE, TMP_FREG2, src1, src1w));
2567 src1 = TMP_FREG2;
2568 }
2569
2570 cc = get_cc(compiler, type & ~SLJIT_32);
2571 return push_inst(compiler, (FCSEL ^ inv_bits) | (cc << 12) | VD(dst_freg) | VN(src2_freg) | VM(src1));
2572 }
2573
sljit_emit_mem(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)2574 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
2575 sljit_s32 reg,
2576 sljit_s32 mem, sljit_sw memw)
2577 {
2578 sljit_u32 inst;
2579
2580 CHECK_ERROR();
2581 CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
2582
2583 if (!(reg & REG_PAIR_MASK))
2584 return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
2585
2586 ADJUST_LOCAL_OFFSET(mem, memw);
2587
2588 if (!(mem & REG_MASK)) {
2589 FAIL_IF(load_immediate(compiler, TMP_REG1, memw & ~0x1f8));
2590
2591 mem = SLJIT_MEM1(TMP_REG1);
2592 memw &= 0x1f8;
2593 } else if (mem & OFFS_REG_MASK) {
2594 FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(mem & REG_MASK) | RM(OFFS_REG(mem)) | ((sljit_ins)(memw & 0x3) << 10)));
2595
2596 mem = SLJIT_MEM1(TMP_REG1);
2597 memw = 0;
2598 } else if ((memw & 0x7) != 0 || memw > 0x1f8 || memw < -0x200) {
2599 inst = ADDI;
2600
2601 if (memw < 0) {
2602 /* Remains negative for integer min. */
2603 memw = -memw;
2604 inst = SUBI;
2605 } else if ((memw & 0x7) == 0 && memw <= 0x7ff0) {
2606 if (!(type & SLJIT_MEM_STORE) && (mem & REG_MASK) == REG_PAIR_FIRST(reg)) {
2607 FAIL_IF(push_inst(compiler, LDRI | RD(REG_PAIR_SECOND(reg)) | RN(mem & REG_MASK) | ((sljit_ins)memw << 7)));
2608 return push_inst(compiler, LDRI | RD(REG_PAIR_FIRST(reg)) | RN(mem & REG_MASK) | ((sljit_ins)(memw + 0x8) << 7));
2609 }
2610
2611 inst = (type & SLJIT_MEM_STORE) ? STRI : LDRI;
2612
2613 FAIL_IF(push_inst(compiler, inst | RD(REG_PAIR_FIRST(reg)) | RN(mem & REG_MASK) | ((sljit_ins)memw << 7)));
2614 return push_inst(compiler, inst | RD(REG_PAIR_SECOND(reg)) | RN(mem & REG_MASK) | ((sljit_ins)(memw + 0x8) << 7));
2615 }
2616
2617 if ((sljit_uw)memw <= 0xfff) {
2618 FAIL_IF(push_inst(compiler, inst | RD(TMP_REG1) | RN(mem & REG_MASK) | ((sljit_ins)memw << 10)));
2619 memw = 0;
2620 } else if ((sljit_uw)memw <= 0xffffff) {
2621 FAIL_IF(push_inst(compiler, inst | (1 << 22) | RD(TMP_REG1) | RN(mem & REG_MASK) | (((sljit_ins)memw >> 12) << 10)));
2622
2623 if ((memw & 0xe07) != 0) {
2624 FAIL_IF(push_inst(compiler, inst | RD(TMP_REG1) | RN(TMP_REG1) | (((sljit_ins)memw & 0xfff) << 10)));
2625 memw = 0;
2626 } else {
2627 memw &= 0xfff;
2628 }
2629 } else {
2630 FAIL_IF(load_immediate(compiler, TMP_REG1, memw));
2631 FAIL_IF(push_inst(compiler, (inst == ADDI ? ADD : SUB) | RD(TMP_REG1) | RN(mem & REG_MASK) | RM(TMP_REG1)));
2632 memw = 0;
2633 }
2634
2635 mem = SLJIT_MEM1(TMP_REG1);
2636
2637 if (inst == SUBI)
2638 memw = -memw;
2639 }
2640
2641 SLJIT_ASSERT((memw & 0x7) == 0 && memw <= 0x1f8 && memw >= -0x200);
2642 return push_inst(compiler, ((type & SLJIT_MEM_STORE) ? STP : LDP) | RT(REG_PAIR_FIRST(reg)) | RT2(REG_PAIR_SECOND(reg)) | RN(mem & REG_MASK) | (sljit_ins)((memw & 0x3f8) << 12));
2643 }
2644
sljit_emit_mem_update(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)2645 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *compiler, sljit_s32 type,
2646 sljit_s32 reg,
2647 sljit_s32 mem, sljit_sw memw)
2648 {
2649 sljit_u32 sign = 0, inst;
2650
2651 CHECK_ERROR();
2652 CHECK(check_sljit_emit_mem_update(compiler, type, reg, mem, memw));
2653
2654 if ((mem & OFFS_REG_MASK) || (memw > 255 || memw < -256))
2655 return SLJIT_ERR_UNSUPPORTED;
2656
2657 if (type & SLJIT_MEM_SUPP)
2658 return SLJIT_SUCCESS;
2659
2660 switch (type & 0xff) {
2661 case SLJIT_MOV:
2662 case SLJIT_MOV_P:
2663 inst = STURBI | (MEM_SIZE_SHIFT(WORD_SIZE) << 30) | 0x400;
2664 break;
2665 case SLJIT_MOV_S8:
2666 sign = 1;
2667 /* fallthrough */
2668 case SLJIT_MOV_U8:
2669 inst = STURBI | (MEM_SIZE_SHIFT(BYTE_SIZE) << 30) | 0x400;
2670 break;
2671 case SLJIT_MOV_S16:
2672 sign = 1;
2673 /* fallthrough */
2674 case SLJIT_MOV_U16:
2675 inst = STURBI | (MEM_SIZE_SHIFT(HALF_SIZE) << 30) | 0x400;
2676 break;
2677 case SLJIT_MOV_S32:
2678 sign = 1;
2679 /* fallthrough */
2680 case SLJIT_MOV_U32:
2681 case SLJIT_MOV32:
2682 inst = STURBI | (MEM_SIZE_SHIFT(INT_SIZE) << 30) | 0x400;
2683 break;
2684 default:
2685 SLJIT_UNREACHABLE();
2686 inst = STURBI | (MEM_SIZE_SHIFT(WORD_SIZE) << 30) | 0x400;
2687 break;
2688 }
2689
2690 if (!(type & SLJIT_MEM_STORE))
2691 inst |= sign ? 0x00800000 : 0x00400000;
2692
2693 if (!(type & SLJIT_MEM_POST))
2694 inst |= 0x800;
2695
2696 return push_inst(compiler, inst | RT(reg) | RN(mem & REG_MASK) | (sljit_ins)((memw & 0x1ff) << 12));
2697 }
2698
sljit_emit_fmem_update(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 mem,sljit_sw memw)2699 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem_update(struct sljit_compiler *compiler, sljit_s32 type,
2700 sljit_s32 freg,
2701 sljit_s32 mem, sljit_sw memw)
2702 {
2703 sljit_u32 inst;
2704
2705 CHECK_ERROR();
2706 CHECK(check_sljit_emit_fmem_update(compiler, type, freg, mem, memw));
2707
2708 if ((mem & OFFS_REG_MASK) || (memw > 255 || memw < -256))
2709 return SLJIT_ERR_UNSUPPORTED;
2710
2711 if (type & SLJIT_MEM_SUPP)
2712 return SLJIT_SUCCESS;
2713
2714 inst = STUR_FI | 0x80000400;
2715
2716 if (!(type & SLJIT_32))
2717 inst |= 0x40000000;
2718
2719 if (!(type & SLJIT_MEM_STORE))
2720 inst |= 0x00400000;
2721
2722 if (!(type & SLJIT_MEM_POST))
2723 inst |= 0x800;
2724
2725 return push_inst(compiler, inst | VT(freg) | RN(mem & REG_MASK) | (sljit_ins)((memw & 0x1ff) << 12));
2726 }
2727
sljit_emit_simd_mem_offset(struct sljit_compiler * compiler,sljit_s32 * mem_ptr,sljit_sw memw)2728 static sljit_s32 sljit_emit_simd_mem_offset(struct sljit_compiler *compiler, sljit_s32 *mem_ptr, sljit_sw memw)
2729 {
2730 sljit_ins ins;
2731 sljit_s32 mem = *mem_ptr;
2732
2733 if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
2734 *mem_ptr = TMP_REG2;
2735 return push_inst(compiler, ADD | RD(TMP_REG2) | RN(mem & REG_MASK) | RM(OFFS_REG(mem)) | ((sljit_ins)(memw & 0x3) << 10));
2736 }
2737
2738 if (!(mem & REG_MASK)) {
2739 *mem_ptr = TMP_REG2;
2740 return load_immediate(compiler, TMP_REG2, memw);
2741 }
2742
2743 mem &= REG_MASK;
2744
2745 if (memw == 0) {
2746 *mem_ptr = mem;
2747 return SLJIT_SUCCESS;
2748 }
2749
2750 *mem_ptr = TMP_REG2;
2751
2752 if (memw < -0xffffff || memw > 0xffffff) {
2753 FAIL_IF(load_immediate(compiler, TMP_REG2, memw));
2754 return push_inst(compiler, ADD | RD(TMP_REG2) | RN(TMP_REG2) | RM(mem));
2755 }
2756
2757 ins = ADDI;
2758
2759 if (memw < 0) {
2760 memw = -memw;
2761 ins = SUBI;
2762 }
2763
2764 if (memw > 0xfff) {
2765 FAIL_IF(push_inst(compiler, ins | (1 << 22) | RD(TMP_REG2) | RN(mem) | ((sljit_ins)(memw >> 12) << 10)));
2766
2767 memw &= 0xfff;
2768 if (memw == 0)
2769 return SLJIT_SUCCESS;
2770
2771 mem = TMP_REG2;
2772 }
2773
2774 return push_inst(compiler, ins | RD(TMP_REG2) | RN(mem) | ((sljit_ins)memw << 10));
2775 }
2776
sljit_emit_simd_mov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 srcdst,sljit_sw srcdstw)2777 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
2778 sljit_s32 freg,
2779 sljit_s32 srcdst, sljit_sw srcdstw)
2780 {
2781 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
2782 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
2783 sljit_ins ins;
2784
2785 CHECK_ERROR();
2786 CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw));
2787
2788 ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
2789
2790 if (reg_size != 3 && reg_size != 4)
2791 return SLJIT_ERR_UNSUPPORTED;
2792
2793 if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
2794 return SLJIT_ERR_UNSUPPORTED;
2795
2796 if (type & SLJIT_SIMD_TEST)
2797 return SLJIT_SUCCESS;
2798
2799 if (!(srcdst & SLJIT_MEM)) {
2800 if (type & SLJIT_SIMD_STORE)
2801 ins = VD(srcdst) | VN(freg) | VM(freg);
2802 else
2803 ins = VD(freg) | VN(srcdst) | VM(srcdst);
2804
2805 if (reg_size == 4)
2806 ins |= (1 << 30);
2807
2808 return push_inst(compiler, ORR_v | ins);
2809 }
2810
2811 FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
2812
2813 if (elem_size > 3)
2814 elem_size = 3;
2815
2816 ins = (type & SLJIT_SIMD_STORE) ? ST1 : LD1;
2817
2818 if (reg_size == 4)
2819 ins |= (1 << 30);
2820
2821 return push_inst(compiler, ins | ((sljit_ins)elem_size << 10) | RN(srcdst) | VT(freg));
2822 }
2823
simd_get_imm(sljit_s32 elem_size,sljit_uw value)2824 static sljit_ins simd_get_imm(sljit_s32 elem_size, sljit_uw value)
2825 {
2826 sljit_ins result;
2827
2828 if (elem_size > 2 && (sljit_u32)value == (value >> 32)) {
2829 elem_size = 2;
2830 value = (sljit_u32)value;
2831 }
2832
2833 if (elem_size == 2 && (sljit_u16)value == (value >> 16)) {
2834 elem_size = 1;
2835 value = (sljit_u16)value;
2836 }
2837
2838 if (elem_size == 1 && (sljit_u8)value == (value >> 8)) {
2839 elem_size = 0;
2840 value = (sljit_u8)value;
2841 }
2842
2843 switch (elem_size) {
2844 case 0:
2845 SLJIT_ASSERT(value <= 0xff);
2846 result = 0xe000;
2847 break;
2848 case 1:
2849 SLJIT_ASSERT(value <= 0xffff);
2850 result = 0;
2851
2852 while (1) {
2853 if (value <= 0xff) {
2854 result |= 0x8000;
2855 break;
2856 }
2857
2858 if ((value & 0xff) == 0) {
2859 value >>= 8;
2860 result |= 0xa000;
2861 break;
2862 }
2863
2864 if (result != 0)
2865 return ~(sljit_ins)0;
2866
2867 value ^= (sljit_uw)0xffff;
2868 result = (1 << 29);
2869 }
2870 break;
2871 case 2:
2872 SLJIT_ASSERT(value <= 0xffffffff);
2873 result = 0;
2874
2875 while (1) {
2876 if (value <= 0xff) {
2877 result |= 0x0000;
2878 break;
2879 }
2880
2881 if ((value & ~(sljit_uw)0xff00) == 0) {
2882 value >>= 8;
2883 result |= 0x2000;
2884 break;
2885 }
2886
2887 if ((value & ~(sljit_uw)0xff0000) == 0) {
2888 value >>= 16;
2889 result |= 0x4000;
2890 break;
2891 }
2892
2893 if ((value & ~(sljit_uw)0xff000000) == 0) {
2894 value >>= 24;
2895 result |= 0x6000;
2896 break;
2897 }
2898
2899 if ((value & (sljit_uw)0xff) == 0xff && (value >> 16) == 0) {
2900 value >>= 8;
2901 result |= 0xc000;
2902 break;
2903 }
2904
2905 if ((value & (sljit_uw)0xffff) == 0xffff && (value >> 24) == 0) {
2906 value >>= 16;
2907 result |= 0xd000;
2908 break;
2909 }
2910
2911 if (result != 0)
2912 return ~(sljit_ins)0;
2913
2914 value ^= (sljit_uw)0xffffffff;
2915 result = (1 << 29);
2916 }
2917 break;
2918 default:
2919 return ~(sljit_ins)0;
2920 }
2921
2922 return (((sljit_ins)value & 0x1f) << 5) | (((sljit_ins)value & 0xe0) << 11) | result;
2923 }
2924
sljit_emit_simd_replicate(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_sw srcw)2925 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
2926 sljit_s32 freg,
2927 sljit_s32 src, sljit_sw srcw)
2928 {
2929 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
2930 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
2931 sljit_ins ins, imm;
2932
2933 CHECK_ERROR();
2934 CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw));
2935
2936 ADJUST_LOCAL_OFFSET(src, srcw);
2937
2938 if (reg_size != 3 && reg_size != 4)
2939 return SLJIT_ERR_UNSUPPORTED;
2940
2941 if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
2942 return SLJIT_ERR_UNSUPPORTED;
2943
2944 if (type & SLJIT_SIMD_TEST)
2945 return SLJIT_SUCCESS;
2946
2947 if (src & SLJIT_MEM) {
2948 FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
2949
2950 ins = (sljit_ins)elem_size << 10;
2951
2952 if (reg_size == 4)
2953 ins |= (sljit_ins)1 << 30;
2954
2955 return push_inst(compiler, LD1R | ins | RN(src) | VT(freg));
2956 }
2957
2958 ins = (sljit_ins)1 << (16 + elem_size);
2959
2960 if (reg_size == 4)
2961 ins |= (sljit_ins)1 << 30;
2962
2963 if (type & SLJIT_SIMD_FLOAT) {
2964 if (src == SLJIT_IMM)
2965 return push_inst(compiler, MOVI | (ins & ((sljit_ins)1 << 30)) | VD(freg));
2966
2967 return push_inst(compiler, DUP_e | ins | VD(freg) | VN(src));
2968 }
2969
2970 if (src == SLJIT_IMM) {
2971 if (elem_size < 3)
2972 srcw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1;
2973
2974 imm = simd_get_imm(elem_size, (sljit_uw)srcw);
2975
2976 if (imm != ~(sljit_ins)0) {
2977 imm |= ins & ((sljit_ins)1 << 30);
2978
2979 return push_inst(compiler, MOVI | imm | VD(freg));
2980 }
2981
2982 FAIL_IF(load_immediate(compiler, TMP_REG2, srcw));
2983 src = TMP_REG2;
2984 }
2985
2986 return push_inst(compiler, DUP_g | ins | VD(freg) | RN(src));
2987 }
2988
sljit_emit_simd_lane_mov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 lane_index,sljit_s32 srcdst,sljit_sw srcdstw)2989 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
2990 sljit_s32 freg, sljit_s32 lane_index,
2991 sljit_s32 srcdst, sljit_sw srcdstw)
2992 {
2993 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
2994 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
2995 sljit_ins ins;
2996
2997 CHECK_ERROR();
2998 CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw));
2999
3000 ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3001
3002 if (reg_size != 3 && reg_size != 4)
3003 return SLJIT_ERR_UNSUPPORTED;
3004
3005 if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3006 return SLJIT_ERR_UNSUPPORTED;
3007
3008 if (type & SLJIT_SIMD_TEST)
3009 return SLJIT_SUCCESS;
3010
3011 if (type & SLJIT_SIMD_LANE_ZERO) {
3012 ins = (reg_size == 3) ? 0 : ((sljit_ins)1 << 30);
3013
3014 if ((type & SLJIT_SIMD_FLOAT) && freg == srcdst) {
3015 FAIL_IF(push_inst(compiler, ORR_v | ins | VD(TMP_FREG1) | VN(freg) | VM(freg)));
3016 srcdst = TMP_FREG1;
3017 srcdstw = 0;
3018 }
3019
3020 FAIL_IF(push_inst(compiler, MOVI | ins | VD(freg)));
3021 }
3022
3023 if (srcdst & SLJIT_MEM) {
3024 FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
3025
3026 if (elem_size == 3)
3027 ins = 0x8400;
3028 else if (elem_size == 0)
3029 ins = 0;
3030 else
3031 ins = (sljit_ins)0x2000 << elem_size;
3032
3033 lane_index = lane_index << elem_size;
3034 ins |= (sljit_ins)(((lane_index & 0x8) << 27) | ((lane_index & 0x7) << 10));
3035
3036 return push_inst(compiler, ((type & SLJIT_SIMD_STORE) ? ST1_s : LD1_s) | ins | RN(srcdst) | VT(freg));
3037 }
3038
3039 if (type & SLJIT_SIMD_FLOAT) {
3040 if (type & SLJIT_SIMD_STORE)
3041 ins = INS_e | ((sljit_ins)1 << (16 + elem_size)) | ((sljit_ins)lane_index << (11 + elem_size)) | VD(srcdst) | VN(freg);
3042 else
3043 ins = INS_e | ((((sljit_ins)lane_index << 1) | 1) << (16 + elem_size)) | VD(freg) | VN(srcdst);
3044
3045 return push_inst(compiler, ins);
3046 }
3047
3048 if (srcdst == SLJIT_IMM) {
3049 if (elem_size < 3)
3050 srcdstw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1;
3051
3052 FAIL_IF(load_immediate(compiler, TMP_REG2, srcdstw));
3053 srcdst = TMP_REG2;
3054 }
3055
3056 if (type & SLJIT_SIMD_STORE) {
3057 ins = RD(srcdst) | VN(freg);
3058
3059 if ((type & SLJIT_SIMD_LANE_SIGNED) && (elem_size < 2 || (elem_size == 2 && !(type & SLJIT_32)))) {
3060 ins |= SMOV;
3061
3062 if (!(type & SLJIT_32))
3063 ins |= (sljit_ins)1 << 30;
3064 } else
3065 ins |= UMOV;
3066 } else
3067 ins = INS | VD(freg) | RN(srcdst);
3068
3069 if (elem_size == 3)
3070 ins |= (sljit_ins)1 << 30;
3071
3072 return push_inst(compiler, ins | ((((sljit_ins)lane_index << 1) | 1) << (16 + elem_size)));
3073 }
3074
sljit_emit_simd_lane_replicate(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_s32 src_lane_index)3075 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
3076 sljit_s32 freg,
3077 sljit_s32 src, sljit_s32 src_lane_index)
3078 {
3079 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3080 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3081 sljit_ins ins;
3082
3083 CHECK_ERROR();
3084 CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index));
3085
3086 if (reg_size != 3 && reg_size != 4)
3087 return SLJIT_ERR_UNSUPPORTED;
3088
3089 if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3090 return SLJIT_ERR_UNSUPPORTED;
3091
3092 if (type & SLJIT_SIMD_TEST)
3093 return SLJIT_SUCCESS;
3094
3095 ins = (((sljit_ins)src_lane_index << 1) | 1) << (16 + elem_size);
3096
3097 if (reg_size == 4)
3098 ins |= (sljit_ins)1 << 30;
3099
3100 return push_inst(compiler, DUP_e | ins | VD(freg) | VN(src));
3101 }
3102
sljit_emit_simd_extend(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_sw srcw)3103 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
3104 sljit_s32 freg,
3105 sljit_s32 src, sljit_sw srcw)
3106 {
3107 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3108 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3109 sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
3110
3111 CHECK_ERROR();
3112 CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw));
3113
3114 ADJUST_LOCAL_OFFSET(src, srcw);
3115
3116 if (reg_size != 3 && reg_size != 4)
3117 return SLJIT_ERR_UNSUPPORTED;
3118
3119 if ((type & SLJIT_SIMD_FLOAT) && (elem_size != 2 || elem2_size != 3))
3120 return SLJIT_ERR_UNSUPPORTED;
3121
3122 if (type & SLJIT_SIMD_TEST)
3123 return SLJIT_SUCCESS;
3124
3125 if (src & SLJIT_MEM) {
3126 FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
3127
3128 if (reg_size == 4 && elem2_size - elem_size == 1)
3129 FAIL_IF(push_inst(compiler, LD1 | ((sljit_ins)elem_size << 10) | RN(src) | VT(freg)));
3130 else
3131 FAIL_IF(push_inst(compiler, LD1_s | ((sljit_ins)0x2000 << (reg_size - elem2_size + elem_size)) | RN(src) | VT(freg)));
3132 src = freg;
3133 }
3134
3135 if (type & SLJIT_SIMD_FLOAT) {
3136 SLJIT_ASSERT(reg_size == 4);
3137 return push_inst(compiler, FCVTL | (1 << 22) | VD(freg) | VN(src));
3138 }
3139
3140 do {
3141 FAIL_IF(push_inst(compiler, ((type & SLJIT_SIMD_EXTEND_SIGNED) ? SSHLL : USHLL)
3142 | ((sljit_ins)1 << (19 + elem_size)) | VD(freg) | VN(src)));
3143 src = freg;
3144 } while (++elem_size < elem2_size);
3145
3146 return SLJIT_SUCCESS;
3147 }
3148
sljit_emit_simd_sign(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 dst,sljit_sw dstw)3149 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
3150 sljit_s32 freg,
3151 sljit_s32 dst, sljit_sw dstw)
3152 {
3153 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3154 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3155 sljit_ins ins, imms;
3156 sljit_s32 dst_r;
3157
3158 CHECK_ERROR();
3159 CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw));
3160
3161 ADJUST_LOCAL_OFFSET(dst, dstw);
3162
3163 if (reg_size != 3 && reg_size != 4)
3164 return SLJIT_ERR_UNSUPPORTED;
3165
3166 if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3167 return SLJIT_ERR_UNSUPPORTED;
3168
3169 if (type & SLJIT_SIMD_TEST)
3170 return SLJIT_SUCCESS;
3171
3172 switch (elem_size) {
3173 case 0:
3174 imms = 0x643219;
3175 ins = USHR | (0x9 << 16);
3176 break;
3177 case 1:
3178 imms = (reg_size == 4) ? 0x643219 : 0x6231;
3179 ins = USHR | (0x11 << 16);
3180 break;
3181 case 2:
3182 imms = (reg_size == 4) ? 0x6231 : 0x61;
3183 ins = USHR | (0x21 << 16);
3184 break;
3185 default:
3186 imms = 0x61;
3187 ins = USHR | (0x41 << 16);
3188 break;
3189 }
3190
3191 if (reg_size == 4)
3192 ins |= (1 << 30);
3193
3194 FAIL_IF(push_inst(compiler, ins | VD(TMP_FREG1) | VN(freg)));
3195
3196 if (reg_size == 4 && elem_size > 0)
3197 FAIL_IF(push_inst(compiler, XTN | ((sljit_ins)(elem_size - 1) << 22) | VD(TMP_FREG1) | VN(TMP_FREG1)));
3198
3199 if (imms >= 0x100) {
3200 ins = (reg_size == 4 && elem_size == 0) ? (1 << 30) : 0;
3201
3202 do {
3203 FAIL_IF(push_inst(compiler, USRA | ins | ((imms & 0xff) << 16) | VD(TMP_FREG1) | VN(TMP_FREG1)));
3204 imms >>= 8;
3205 } while (imms >= 0x100);
3206 }
3207
3208 FAIL_IF(push_inst(compiler, USRA | (1 << 30) | (imms << 16) | VD(TMP_FREG1) | VN(TMP_FREG1)));
3209
3210 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG2;
3211 ins = (0x1 << 16);
3212
3213 if (reg_size == 4 && elem_size == 0) {
3214 FAIL_IF(push_inst(compiler, INS_e | (0x3 << 16) | (0x8 << 11) | VD(TMP_FREG1) | VN(TMP_FREG1)));
3215 ins = (0x2 << 16);
3216 }
3217
3218 FAIL_IF(push_inst(compiler, UMOV | ins | RD(dst_r) | VN(TMP_FREG1)));
3219
3220 if (dst_r == TMP_REG2)
3221 return emit_op_mem(compiler, STORE | ((type & SLJIT_32) ? INT_SIZE : WORD_SIZE), TMP_REG2, dst, dstw, TMP_REG1);
3222
3223 return SLJIT_SUCCESS;
3224 }
3225
sljit_emit_simd_op2(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_freg,sljit_s32 src1_freg,sljit_s32 src2_freg)3226 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
3227 sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg)
3228 {
3229 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3230 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3231 sljit_ins ins = 0;
3232
3233 CHECK_ERROR();
3234 CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg));
3235
3236 if (reg_size != 3 && reg_size != 4)
3237 return SLJIT_ERR_UNSUPPORTED;
3238
3239 if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3240 return SLJIT_ERR_UNSUPPORTED;
3241
3242 switch (SLJIT_SIMD_GET_OPCODE(type)) {
3243 case SLJIT_SIMD_OP2_AND:
3244 ins = AND_v;
3245 break;
3246 case SLJIT_SIMD_OP2_OR:
3247 ins = ORR_v;
3248 break;
3249 case SLJIT_SIMD_OP2_XOR:
3250 ins = EOR_v;
3251 break;
3252 }
3253
3254 if (type & SLJIT_SIMD_TEST)
3255 return SLJIT_SUCCESS;
3256
3257 if (reg_size == 4)
3258 ins |= (sljit_ins)1 << 30;
3259
3260 return push_inst(compiler, ins | VD(dst_freg) | VN(src1_freg) | VM(src2_freg));
3261 }
3262
sljit_emit_atomic_load(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 mem_reg)3263 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
3264 sljit_s32 dst_reg,
3265 sljit_s32 mem_reg)
3266 {
3267 sljit_ins ins;
3268
3269 CHECK_ERROR();
3270 CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
3271
3272 #ifdef __ARM_FEATURE_ATOMICS
3273 switch (GET_OPCODE(op)) {
3274 case SLJIT_MOV32:
3275 case SLJIT_MOV_U32:
3276 ins = LDR ^ (1 << 30);
3277 break;
3278 case SLJIT_MOV_U16:
3279 ins = LDRH;
3280 break;
3281 case SLJIT_MOV_U8:
3282 ins = LDRB;
3283 break;
3284 default:
3285 ins = LDR;
3286 break;
3287 }
3288 #else /* !__ARM_FEATURE_ATOMICS */
3289 switch (GET_OPCODE(op)) {
3290 case SLJIT_MOV32:
3291 case SLJIT_MOV_U32:
3292 ins = LDXR ^ (1 << 30);
3293 break;
3294 case SLJIT_MOV_U8:
3295 ins = LDXRB;
3296 break;
3297 case SLJIT_MOV_U16:
3298 ins = LDXRH;
3299 break;
3300 default:
3301 ins = LDXR;
3302 break;
3303 }
3304 #endif /* ARM_FEATURE_ATOMICS */
3305 return push_inst(compiler, ins | RN(mem_reg) | RT(dst_reg));
3306 }
3307
sljit_emit_atomic_store(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src_reg,sljit_s32 mem_reg,sljit_s32 temp_reg)3308 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op,
3309 sljit_s32 src_reg,
3310 sljit_s32 mem_reg,
3311 sljit_s32 temp_reg)
3312 {
3313 sljit_ins ins;
3314 sljit_s32 tmp = temp_reg;
3315 sljit_ins cmp = 0;
3316 sljit_ins inv_bits = W_OP;
3317
3318 CHECK_ERROR();
3319 CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
3320
3321 #ifdef __ARM_FEATURE_ATOMICS
3322 if (op & SLJIT_SET_ATOMIC_STORED)
3323 cmp = (SUBS ^ W_OP) | RD(TMP_ZERO);
3324
3325 switch (GET_OPCODE(op)) {
3326 case SLJIT_MOV32:
3327 case SLJIT_MOV_U32:
3328 ins = CAS ^ (1 << 30);
3329 break;
3330 case SLJIT_MOV_U16:
3331 ins = CASH;
3332 break;
3333 case SLJIT_MOV_U8:
3334 ins = CASB;
3335 break;
3336 default:
3337 ins = CAS;
3338 inv_bits = 0;
3339 if (cmp)
3340 cmp ^= W_OP;
3341 break;
3342 }
3343
3344 if (cmp) {
3345 FAIL_IF(push_inst(compiler, (MOV ^ inv_bits) | RM(temp_reg) | RD(TMP_REG1)));
3346 tmp = TMP_REG1;
3347 }
3348 FAIL_IF(push_inst(compiler, ins | RM(tmp) | RN(mem_reg) | RD(src_reg)));
3349 if (!cmp)
3350 return SLJIT_SUCCESS;
3351
3352 FAIL_IF(push_inst(compiler, cmp | RM(tmp) | RN(temp_reg)));
3353 FAIL_IF(push_inst(compiler, (CSET ^ inv_bits) | RD(tmp)));
3354 return push_inst(compiler, cmp | RM(tmp) | RN(TMP_ZERO));
3355 #else /* !__ARM_FEATURE_ATOMICS */
3356 SLJIT_UNUSED_ARG(tmp);
3357 SLJIT_UNUSED_ARG(inv_bits);
3358
3359 if (op & SLJIT_SET_ATOMIC_STORED)
3360 cmp = (SUBI ^ W_OP) | (1 << 29);
3361
3362 switch (GET_OPCODE(op)) {
3363 case SLJIT_MOV32:
3364 case SLJIT_MOV_U32:
3365 ins = STXR ^ (1 << 30);
3366 break;
3367 case SLJIT_MOV_U8:
3368 ins = STXRB;
3369 break;
3370 case SLJIT_MOV_U16:
3371 ins = STXRH;
3372 break;
3373 default:
3374 ins = STXR;
3375 break;
3376 }
3377
3378 FAIL_IF(push_inst(compiler, ins | RM(TMP_REG1) | RN(mem_reg) | RT(src_reg)));
3379 return cmp ? push_inst(compiler, cmp | RD(TMP_ZERO) | RN(TMP_REG1)) : SLJIT_SUCCESS;
3380 #endif /* __ARM_FEATURE_ATOMICS */
3381 }
3382
sljit_get_local_base(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_sw offset)3383 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
3384 {
3385 sljit_s32 dst_reg;
3386 sljit_ins ins;
3387
3388 CHECK_ERROR();
3389 CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
3390 ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
3391
3392 dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
3393
3394 /* Not all instruction forms support accessing SP register. */
3395 if (offset <= 0xffffff && offset >= -0xffffff) {
3396 ins = ADDI;
3397 if (offset < 0) {
3398 offset = -offset;
3399 ins = SUBI;
3400 }
3401
3402 if (offset <= 0xfff)
3403 FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(SLJIT_SP) | (sljit_ins)(offset << 10)));
3404 else {
3405 FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(SLJIT_SP) | (sljit_ins)((offset & 0xfff000) >> (12 - 10)) | (1 << 22)));
3406
3407 offset &= 0xfff;
3408 if (offset != 0)
3409 FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(dst_reg) | (sljit_ins)(offset << 10)));
3410 }
3411 }
3412 else {
3413 FAIL_IF(load_immediate (compiler, dst_reg, offset));
3414 /* Add extended register form. */
3415 FAIL_IF(push_inst(compiler, ADDE | (0x3 << 13) | RD(dst_reg) | RN(SLJIT_SP) | RM(dst_reg)));
3416 }
3417
3418 if (SLJIT_UNLIKELY(dst & SLJIT_MEM))
3419 return emit_op_mem(compiler, WORD_SIZE | STORE, dst_reg, dst, dstw, TMP_REG1);
3420 return SLJIT_SUCCESS;
3421 }
3422
sljit_emit_const(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_sw init_value)3423 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
3424 {
3425 struct sljit_const *const_;
3426 sljit_s32 dst_r;
3427
3428 CHECK_ERROR_PTR();
3429 CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
3430 ADJUST_LOCAL_OFFSET(dst, dstw);
3431
3432 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
3433 PTR_FAIL_IF(!const_);
3434 set_const(const_, compiler);
3435
3436 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
3437 PTR_FAIL_IF(emit_imm64_const(compiler, dst_r, (sljit_uw)init_value));
3438
3439 if (dst & SLJIT_MEM)
3440 PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2));
3441 return const_;
3442 }
3443
sljit_emit_mov_addr(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)3444 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
3445 {
3446 struct sljit_jump *jump;
3447 sljit_s32 dst_r;
3448
3449 CHECK_ERROR_PTR();
3450 CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw));
3451 ADJUST_LOCAL_OFFSET(dst, dstw);
3452
3453 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
3454 PTR_FAIL_IF(push_inst(compiler, RD(dst_r)));
3455
3456 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
3457 PTR_FAIL_IF(!jump);
3458 set_mov_addr(jump, compiler, 1);
3459
3460 compiler->size += 3;
3461
3462 if (dst & SLJIT_MEM)
3463 PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2));
3464
3465 return jump;
3466 }
3467
sljit_set_jump_addr(sljit_uw addr,sljit_uw new_target,sljit_sw executable_offset)3468 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
3469 {
3470 sljit_ins* inst = (sljit_ins*)addr;
3471 sljit_u32 dst;
3472 SLJIT_UNUSED_ARG(executable_offset);
3473
3474 SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 0);
3475
3476 dst = inst[0] & 0x1f;
3477 SLJIT_ASSERT((inst[0] & 0xffe00000) == MOVZ && (inst[1] & 0xffe00000) == (MOVK | (1 << 21)));
3478 inst[0] = MOVZ | dst | (((sljit_u32)new_target & 0xffff) << 5);
3479 inst[1] = MOVK | dst | (((sljit_u32)(new_target >> 16) & 0xffff) << 5) | (1 << 21);
3480 inst[2] = MOVK | dst | (((sljit_u32)(new_target >> 32) & 0xffff) << 5) | (2 << 21);
3481 inst[3] = MOVK | dst | ((sljit_u32)(new_target >> 48) << 5) | (3 << 21);
3482
3483 SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 1);
3484 inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
3485 SLJIT_CACHE_FLUSH(inst, inst + 4);
3486 }
3487
sljit_set_const(sljit_uw addr,sljit_sw new_constant,sljit_sw executable_offset)3488 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
3489 {
3490 sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
3491 }
3492