1 /*
2 * Stack-less Just-In-Time compiler
3 *
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
sljit_get_platform_name(void)27 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
28 {
29 return "ARM-64" SLJIT_CPUINFO;
30 }
31
32 /* Length of an instruction word */
33 typedef sljit_u32 sljit_ins;
34
35 #define TMP_ZERO (0)
36
37 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
38 #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
39 #define TMP_LR (SLJIT_NUMBER_OF_REGISTERS + 4)
40 #define TMP_FP (SLJIT_NUMBER_OF_REGISTERS + 5)
41
42 #define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
43 #define TMP_FREG2 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2)
44
45 /* r18 - platform register, currently not used */
46 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 8] = {
47 31, 0, 1, 2, 3, 4, 5, 6, 7, 11, 12, 13, 14, 15, 16, 17, 8, 28, 27, 26, 25, 24, 23, 22, 21, 20, 19, 31, 9, 10, 30, 29
48 };
49
50 static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 3] = {
51 0, 0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 15, 14, 13, 12, 11, 10, 9, 8, 30, 31
52 };
53
54 #define W_OP ((sljit_ins)1 << 31)
55 #define RD(rd) ((sljit_ins)reg_map[rd])
56 #define RT(rt) ((sljit_ins)reg_map[rt])
57 #define RN(rn) ((sljit_ins)reg_map[rn] << 5)
58 #define RT2(rt2) ((sljit_ins)reg_map[rt2] << 10)
59 #define RM(rm) ((sljit_ins)reg_map[rm] << 16)
60 #define VD(vd) ((sljit_ins)freg_map[vd])
61 #define VT(vt) ((sljit_ins)freg_map[vt])
62 #define VT2(vt) ((sljit_ins)freg_map[vt] << 10)
63 #define VN(vn) ((sljit_ins)freg_map[vn] << 5)
64 #define VM(vm) ((sljit_ins)freg_map[vm] << 16)
65
66 /* --------------------------------------------------------------------- */
67 /* Instrucion forms */
68 /* --------------------------------------------------------------------- */
69
70 #define ADC 0x9a000000
71 #define ADD 0x8b000000
72 #define ADDE 0x8b200000
73 #define ADDI 0x91000000
74 #define AND 0x8a000000
75 #define ANDI 0x92000000
76 #define AND_v 0x0e201c00
77 #define ASRV 0x9ac02800
78 #define B 0x14000000
79 #define B_CC 0x54000000
80 #define BL 0x94000000
81 #define BLR 0xd63f0000
82 #define BR 0xd61f0000
83 #define BRK 0xd4200000
84 #define CAS 0xc8a07c00
85 #define CASB 0x08a07c00
86 #define CASH 0x48a07c00
87 #define CBZ 0xb4000000
88 #define CCMPI 0xfa400800
89 #define CLZ 0xdac01000
90 #define CSEL 0x9a800000
91 #define CSINC 0x9a800400
92 #define DUP_e 0x0e000400
93 #define DUP_g 0x0e000c00
94 #define EOR 0xca000000
95 #define EOR_v 0x2e201c00
96 #define EORI 0xd2000000
97 #define EXTR 0x93c00000
98 #define FABS 0x1e60c000
99 #define FADD 0x1e602800
100 #define FCMP 0x1e602000
101 #define FCSEL 0x1e600c00
102 #define FCVT 0x1e224000
103 #define FCVTL 0x0e217800
104 #define FCVTZS 0x9e780000
105 #define FDIV 0x1e601800
106 #define FMOV 0x1e604000
107 #define FMOV_R 0x9e660000
108 #define FMOV_I 0x1e601000
109 #define FMUL 0x1e600800
110 #define FNEG 0x1e614000
111 #define FSUB 0x1e603800
112 #define INS 0x4e001c00
113 #define INS_e 0x6e000400
114 #define LD1 0x0c407000
115 #define LD1_s 0x0d400000
116 #define LD1R 0x0d40c000
117 #define LDRI 0xf9400000
118 #define LDRI_F64 0xfd400000
119 #define LDRI_POST 0xf8400400
120 #define LDP 0xa9400000
121 #define LDP_F64 0x6d400000
122 #define LDP_POST 0xa8c00000
123 #define LDR_PRE 0xf8400c00
124 #define LDXR 0xc85f7c00
125 #define LDXRB 0x085f7c00
126 #define LDXRH 0x485f7c00
127 #define LSLV 0x9ac02000
128 #define LSRV 0x9ac02400
129 #define MADD 0x9b000000
130 #define MOVI 0x0f000400
131 #define MOVK 0xf2800000
132 #define MOVN 0x92800000
133 #define MOVZ 0xd2800000
134 #define NOP 0xd503201f
135 #define ORN 0xaa200000
136 #define ORR 0xaa000000
137 #define ORR_v 0x0ea01c00
138 #define ORRI 0xb2000000
139 #define RBIT 0xdac00000
140 #define RET 0xd65f0000
141 #define REV 0xdac00c00
142 #define REV16 0xdac00400
143 #define RORV 0x9ac02c00
144 #define SBC 0xda000000
145 #define SBFM 0x93400000
146 #define SCVTF 0x9e620000
147 #define SDIV 0x9ac00c00
148 #define SMADDL 0x9b200000
149 #define SMOV 0x0e002c00
150 #define SMULH 0x9b403c00
151 #define SSHLL 0x0f00a400
152 #define ST1 0x0c007000
153 #define ST1_s 0x0d000000
154 #define STP 0xa9000000
155 #define STP_F64 0x6d000000
156 #define STP_PRE 0xa9800000
157 #define STRB 0x38206800
158 #define STRBI 0x39000000
159 #define STRI 0xf9000000
160 #define STRI_F64 0xfd000000
161 #define STR_FI 0x3d000000
162 #define STR_FR 0x3c206800
163 #define STUR_FI 0x3c000000
164 #define STURBI 0x38000000
165 #define STXR 0xc8007c00
166 #define STXRB 0x8007c00
167 #define STXRH 0x48007c00
168 #define SUB 0xcb000000
169 #define SUBI 0xd1000000
170 #define SUBS 0xeb000000
171 #define TBZ 0x36000000
172 #define UBFM 0xd3400000
173 #define UCVTF 0x9e630000
174 #define UDIV 0x9ac00800
175 #define UMOV 0x0e003c00
176 #define UMULH 0x9bc03c00
177 #define USHLL 0x2f00a400
178 #define USHR 0x2f000400
179 #define USRA 0x2f001400
180 #define XTN 0x0e212800
181
182 #define CSET (CSINC | RM(TMP_ZERO) | RN(TMP_ZERO))
183 #define LDR (STRI | (1 << 22))
184 #define LDRB (STRBI | (1 << 22))
185 #define LDRH (LDRB | (1 << 30))
186 #define MOV (ORR | RN(TMP_ZERO))
187
push_inst(struct sljit_compiler * compiler,sljit_ins ins)188 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
189 {
190 sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
191 FAIL_IF(!ptr);
192 *ptr = ins;
193 compiler->size++;
194 return SLJIT_SUCCESS;
195 }
196
emit_imm64_const(struct sljit_compiler * compiler,sljit_s32 dst,sljit_uw imm)197 static SLJIT_INLINE sljit_s32 emit_imm64_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_uw imm)
198 {
199 FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | ((sljit_ins)(imm & 0xffff) << 5)));
200 FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)(imm >> 16) & 0xffff) << 5) | (1 << 21)));
201 FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)(imm >> 32) & 0xffff) << 5) | (2 << 21)));
202 return push_inst(compiler, MOVK | RD(dst) | ((sljit_ins)(imm >> 48) << 5) | (3 << 21));
203 }
204
detect_jump_type(struct sljit_jump * jump,sljit_ins * code_ptr,sljit_ins * code,sljit_sw executable_offset)205 static SLJIT_INLINE sljit_sw detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
206 {
207 sljit_sw diff;
208 sljit_uw target_addr;
209
210 if (jump->flags & SLJIT_REWRITABLE_JUMP) {
211 jump->flags |= PATCH_ABS64;
212 return 0;
213 }
214
215 if (jump->flags & JUMP_ADDR)
216 target_addr = jump->u.target;
217 else {
218 SLJIT_ASSERT(jump->flags & JUMP_LABEL);
219 target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset;
220 }
221
222 diff = (sljit_sw)target_addr - (sljit_sw)(code_ptr - 4) - executable_offset;
223
224 if (jump->flags & IS_COND) {
225 diff += SSIZE_OF(ins);
226 if (diff <= 0xfffff && diff >= -0x100000) {
227 code_ptr[-5] ^= (jump->flags & IS_CBZ) ? (0x1 << 24) : 0x1;
228 jump->addr -= sizeof(sljit_ins);
229 jump->flags |= PATCH_COND;
230 return 5;
231 }
232 diff -= SSIZE_OF(ins);
233 }
234
235 if (diff <= 0x7ffffff && diff >= -0x8000000) {
236 jump->flags |= PATCH_B;
237 return 4;
238 }
239
240 if (target_addr < 0x100000000l) {
241 if (jump->flags & IS_COND)
242 code_ptr[-5] -= (2 << 5);
243 code_ptr[-2] = code_ptr[0];
244 return 2;
245 }
246
247 if (target_addr < 0x1000000000000l) {
248 if (jump->flags & IS_COND)
249 code_ptr[-5] -= (1 << 5);
250 jump->flags |= PATCH_ABS48;
251 code_ptr[-1] = code_ptr[0];
252 return 1;
253 }
254
255 jump->flags |= PATCH_ABS64;
256 return 0;
257 }
258
put_label_get_length(struct sljit_put_label * put_label,sljit_uw max_label)259 static SLJIT_INLINE sljit_sw put_label_get_length(struct sljit_put_label *put_label, sljit_uw max_label)
260 {
261 if (max_label < 0x100000000l) {
262 put_label->flags = 0;
263 return 2;
264 }
265
266 if (max_label < 0x1000000000000l) {
267 put_label->flags = 1;
268 return 1;
269 }
270
271 put_label->flags = 2;
272 return 0;
273 }
274
sljit_generate_code(struct sljit_compiler * compiler)275 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
276 {
277 struct sljit_memory_fragment *buf;
278 sljit_ins *code;
279 sljit_ins *code_ptr;
280 sljit_ins *buf_ptr;
281 sljit_ins *buf_end;
282 sljit_uw word_count;
283 sljit_uw next_addr;
284 sljit_sw executable_offset;
285 sljit_sw addr;
286 sljit_u32 dst;
287
288 struct sljit_label *label;
289 struct sljit_jump *jump;
290 struct sljit_const *const_;
291 struct sljit_put_label *put_label;
292
293 CHECK_ERROR_PTR();
294 CHECK_PTR(check_sljit_generate_code(compiler));
295 reverse_buf(compiler);
296
297 code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins), compiler->exec_allocator_data);
298 PTR_FAIL_WITH_EXEC_IF(code);
299 buf = compiler->buf;
300
301 code_ptr = code;
302 word_count = 0;
303 next_addr = 0;
304 executable_offset = SLJIT_EXEC_OFFSET(code);
305
306 label = compiler->labels;
307 jump = compiler->jumps;
308 const_ = compiler->consts;
309 put_label = compiler->put_labels;
310
311 do {
312 buf_ptr = (sljit_ins*)buf->memory;
313 buf_end = buf_ptr + (buf->used_size >> 2);
314 do {
315 *code_ptr = *buf_ptr++;
316 if (next_addr == word_count) {
317 SLJIT_ASSERT(!label || label->size >= word_count);
318 SLJIT_ASSERT(!jump || jump->addr >= word_count);
319 SLJIT_ASSERT(!const_ || const_->addr >= word_count);
320 SLJIT_ASSERT(!put_label || put_label->addr >= word_count);
321
322 /* These structures are ordered by their address. */
323 if (label && label->size == word_count) {
324 label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
325 label->size = (sljit_uw)(code_ptr - code);
326 label = label->next;
327 }
328 if (jump && jump->addr == word_count) {
329 jump->addr = (sljit_uw)(code_ptr - 4);
330 code_ptr -= detect_jump_type(jump, code_ptr, code, executable_offset);
331 jump = jump->next;
332 }
333 if (const_ && const_->addr == word_count) {
334 const_->addr = (sljit_uw)code_ptr;
335 const_ = const_->next;
336 }
337 if (put_label && put_label->addr == word_count) {
338 SLJIT_ASSERT(put_label->label);
339 put_label->addr = (sljit_uw)(code_ptr - 3);
340 code_ptr -= put_label_get_length(put_label, (sljit_uw)(SLJIT_ADD_EXEC_OFFSET(code, executable_offset) + put_label->label->size));
341 put_label = put_label->next;
342 }
343 next_addr = compute_next_addr(label, jump, const_, put_label);
344 }
345 code_ptr++;
346 word_count++;
347 } while (buf_ptr < buf_end);
348
349 buf = buf->next;
350 } while (buf);
351
352 if (label && label->size == word_count) {
353 label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
354 label->size = (sljit_uw)(code_ptr - code);
355 label = label->next;
356 }
357
358 SLJIT_ASSERT(!label);
359 SLJIT_ASSERT(!jump);
360 SLJIT_ASSERT(!const_);
361 SLJIT_ASSERT(!put_label);
362 SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
363
364 jump = compiler->jumps;
365 while (jump) {
366 do {
367 addr = (sljit_sw)((jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target);
368 buf_ptr = (sljit_ins *)jump->addr;
369
370 if (jump->flags & PATCH_B) {
371 addr = (addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2;
372 SLJIT_ASSERT(addr <= 0x1ffffff && addr >= -0x2000000);
373 buf_ptr[0] = ((jump->flags & IS_BL) ? BL : B) | (sljit_ins)(addr & 0x3ffffff);
374 if (jump->flags & IS_COND)
375 buf_ptr[-1] -= (4 << 5);
376 break;
377 }
378 if (jump->flags & PATCH_COND) {
379 addr = (addr - (sljit_sw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset)) >> 2;
380 SLJIT_ASSERT(addr <= 0x3ffff && addr >= -0x40000);
381 buf_ptr[0] = (buf_ptr[0] & ~(sljit_ins)0xffffe0) | (sljit_ins)((addr & 0x7ffff) << 5);
382 break;
383 }
384
385 SLJIT_ASSERT((jump->flags & (PATCH_ABS48 | PATCH_ABS64)) || (sljit_uw)addr <= (sljit_uw)0xffffffff);
386 SLJIT_ASSERT((jump->flags & PATCH_ABS64) || (sljit_uw)addr <= (sljit_uw)0xffffffffffff);
387
388 dst = buf_ptr[0] & 0x1f;
389 buf_ptr[0] = MOVZ | dst | (((sljit_ins)addr & 0xffff) << 5);
390 buf_ptr[1] = MOVK | dst | (((sljit_ins)(addr >> 16) & 0xffff) << 5) | (1 << 21);
391 if (jump->flags & (PATCH_ABS48 | PATCH_ABS64))
392 buf_ptr[2] = MOVK | dst | (((sljit_ins)(addr >> 32) & 0xffff) << 5) | (2 << 21);
393 if (jump->flags & PATCH_ABS64)
394 buf_ptr[3] = MOVK | dst | ((sljit_ins)(addr >> 48) << 5) | (3 << 21);
395 } while (0);
396 jump = jump->next;
397 }
398
399 put_label = compiler->put_labels;
400 while (put_label) {
401 addr = (sljit_sw)put_label->label->addr;
402 buf_ptr = (sljit_ins*)put_label->addr;
403
404 buf_ptr[0] |= ((sljit_ins)addr & 0xffff) << 5;
405 buf_ptr[1] |= ((sljit_ins)(addr >> 16) & 0xffff) << 5;
406
407 if (put_label->flags >= 1)
408 buf_ptr[2] |= ((sljit_ins)(addr >> 32) & 0xffff) << 5;
409
410 if (put_label->flags >= 2)
411 buf_ptr[3] |= (sljit_ins)(addr >> 48) << 5;
412
413 put_label = put_label->next;
414 }
415
416 compiler->error = SLJIT_ERR_COMPILED;
417 compiler->executable_offset = executable_offset;
418 compiler->executable_size = (sljit_uw)(code_ptr - code) * sizeof(sljit_ins);
419
420 code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
421 code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
422
423 SLJIT_CACHE_FLUSH(code, code_ptr);
424 SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
425 return code;
426 }
427
sljit_has_cpu_feature(sljit_s32 feature_type)428 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
429 {
430 switch (feature_type) {
431 case SLJIT_HAS_FPU:
432 case SLJIT_HAS_SIMD:
433 #ifdef SLJIT_IS_FPU_AVAILABLE
434 return (SLJIT_IS_FPU_AVAILABLE) != 0;
435 #else
436 /* Available by default. */
437 return 1;
438 #endif
439
440 case SLJIT_HAS_CLZ:
441 case SLJIT_HAS_CTZ:
442 case SLJIT_HAS_REV:
443 case SLJIT_HAS_ROT:
444 case SLJIT_HAS_CMOV:
445 case SLJIT_HAS_PREFETCH:
446 case SLJIT_HAS_COPY_F32:
447 case SLJIT_HAS_COPY_F64:
448 case SLJIT_HAS_ATOMIC:
449 return 1;
450
451 default:
452 return 0;
453 }
454 }
455
sljit_cmp_info(sljit_s32 type)456 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
457 {
458 switch (type) {
459 case SLJIT_UNORDERED_OR_EQUAL:
460 case SLJIT_ORDERED_NOT_EQUAL:
461 return 2;
462 }
463
464 return 0;
465 }
466
467 /* --------------------------------------------------------------------- */
468 /* Core code generator functions. */
469 /* --------------------------------------------------------------------- */
470
471 #define COUNT_TRAILING_ZERO(value, result) \
472 result = 0; \
473 if (!(value & 0xffffffff)) { \
474 result += 32; \
475 value >>= 32; \
476 } \
477 if (!(value & 0xffff)) { \
478 result += 16; \
479 value >>= 16; \
480 } \
481 if (!(value & 0xff)) { \
482 result += 8; \
483 value >>= 8; \
484 } \
485 if (!(value & 0xf)) { \
486 result += 4; \
487 value >>= 4; \
488 } \
489 if (!(value & 0x3)) { \
490 result += 2; \
491 value >>= 2; \
492 } \
493 if (!(value & 0x1)) { \
494 result += 1; \
495 value >>= 1; \
496 }
497
498 #define LOGICAL_IMM_CHECK (sljit_ins)0x100
499
logical_imm(sljit_sw imm,sljit_u32 len)500 static sljit_ins logical_imm(sljit_sw imm, sljit_u32 len)
501 {
502 sljit_s32 negated;
503 sljit_u32 ones, right;
504 sljit_uw mask, uimm;
505 sljit_ins ins;
506
507 if (len & LOGICAL_IMM_CHECK) {
508 len &= ~LOGICAL_IMM_CHECK;
509 if (len == 32 && (imm == 0 || imm == -1))
510 return 0;
511 if (len == 16 && ((sljit_s32)imm == 0 || (sljit_s32)imm == -1))
512 return 0;
513 }
514
515 SLJIT_ASSERT((len == 32 && imm != 0 && imm != -1)
516 || (len == 16 && (sljit_s32)imm != 0 && (sljit_s32)imm != -1));
517
518 uimm = (sljit_uw)imm;
519 while (1) {
520 if (len <= 0) {
521 SLJIT_UNREACHABLE();
522 return 0;
523 }
524
525 mask = ((sljit_uw)1 << len) - 1;
526 if ((uimm & mask) != ((uimm >> len) & mask))
527 break;
528 len >>= 1;
529 }
530
531 len <<= 1;
532
533 negated = 0;
534 if (uimm & 0x1) {
535 negated = 1;
536 uimm = ~uimm;
537 }
538
539 if (len < 64)
540 uimm &= ((sljit_uw)1 << len) - 1;
541
542 /* Unsigned right shift. */
543 COUNT_TRAILING_ZERO(uimm, right);
544
545 /* Signed shift. We also know that the highest bit is set. */
546 imm = (sljit_sw)~uimm;
547 SLJIT_ASSERT(imm < 0);
548
549 COUNT_TRAILING_ZERO(imm, ones);
550
551 if (~imm)
552 return 0;
553
554 if (len == 64)
555 ins = 1 << 22;
556 else
557 ins = (0x3f - ((len << 1) - 1)) << 10;
558
559 if (negated)
560 return ins | ((len - ones - 1) << 10) | ((len - ones - right) << 16);
561
562 return ins | ((ones - 1) << 10) | ((len - right) << 16);
563 }
564
565 #undef COUNT_TRAILING_ZERO
566
load_immediate(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw simm)567 static sljit_s32 load_immediate(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw simm)
568 {
569 sljit_uw imm = (sljit_uw)simm;
570 sljit_u32 i, zeros, ones, first;
571 sljit_ins bitmask;
572
573 /* Handling simple immediates first. */
574 if (imm <= 0xffff)
575 return push_inst(compiler, MOVZ | RD(dst) | ((sljit_ins)imm << 5));
576
577 if (simm < 0 && simm >= -0x10000)
578 return push_inst(compiler, MOVN | RD(dst) | (((sljit_ins)~imm & 0xffff) << 5));
579
580 if (imm <= 0xffffffffl) {
581 if ((imm & 0xffff) == 0)
582 return push_inst(compiler, MOVZ | RD(dst) | ((sljit_ins)(imm >> 16) << 5) | (1 << 21));
583 if ((imm & 0xffff0000l) == 0xffff0000)
584 return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | (((sljit_ins)~imm & 0xffff) << 5));
585 if ((imm & 0xffff) == 0xffff)
586 return push_inst(compiler, (MOVN ^ W_OP) | RD(dst) | (((sljit_ins)~imm & 0xffff0000u) >> (16 - 5)) | (1 << 21));
587
588 bitmask = logical_imm(simm, 16);
589 if (bitmask != 0)
590 return push_inst(compiler, (ORRI ^ W_OP) | RD(dst) | RN(TMP_ZERO) | bitmask);
591
592 FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | (((sljit_ins)imm & 0xffff) << 5)));
593 return push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)imm & 0xffff0000u) >> (16 - 5)) | (1 << 21));
594 }
595
596 bitmask = logical_imm(simm, 32);
597 if (bitmask != 0)
598 return push_inst(compiler, ORRI | RD(dst) | RN(TMP_ZERO) | bitmask);
599
600 if (simm < 0 && simm >= -0x100000000l) {
601 if ((imm & 0xffff) == 0xffff)
602 return push_inst(compiler, MOVN | RD(dst) | (((sljit_ins)~imm & 0xffff0000u) >> (16 - 5)) | (1 << 21));
603
604 FAIL_IF(push_inst(compiler, MOVN | RD(dst) | (((sljit_ins)~imm & 0xffff) << 5)));
605 return push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)imm & 0xffff0000u) >> (16 - 5)) | (1 << 21));
606 }
607
608 /* A large amount of number can be constructed from ORR and MOVx, but computing them is costly. */
609
610 zeros = 0;
611 ones = 0;
612 for (i = 4; i > 0; i--) {
613 if ((simm & 0xffff) == 0)
614 zeros++;
615 if ((simm & 0xffff) == 0xffff)
616 ones++;
617 simm >>= 16;
618 }
619
620 simm = (sljit_sw)imm;
621 first = 1;
622 if (ones > zeros) {
623 simm = ~simm;
624 for (i = 0; i < 4; i++) {
625 if (!(simm & 0xffff)) {
626 simm >>= 16;
627 continue;
628 }
629 if (first) {
630 first = 0;
631 FAIL_IF(push_inst(compiler, MOVN | RD(dst) | (((sljit_ins)simm & 0xffff) << 5) | (i << 21)));
632 }
633 else
634 FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)~simm & 0xffff) << 5) | (i << 21)));
635 simm >>= 16;
636 }
637 return SLJIT_SUCCESS;
638 }
639
640 for (i = 0; i < 4; i++) {
641 if (!(simm & 0xffff)) {
642 simm >>= 16;
643 continue;
644 }
645 if (first) {
646 first = 0;
647 FAIL_IF(push_inst(compiler, MOVZ | RD(dst) | (((sljit_ins)simm & 0xffff) << 5) | (i << 21)));
648 }
649 else
650 FAIL_IF(push_inst(compiler, MOVK | RD(dst) | (((sljit_ins)simm & 0xffff) << 5) | (i << 21)));
651 simm >>= 16;
652 }
653 return SLJIT_SUCCESS;
654 }
655
656 #define ARG1_IMM 0x0010000
657 #define ARG2_IMM 0x0020000
658 #define INT_OP 0x0040000
659 #define SET_FLAGS 0x0080000
660 #define UNUSED_RETURN 0x0100000
661
662 #define CHECK_FLAGS(flag_bits) \
663 if (flags & SET_FLAGS) { \
664 inv_bits |= flag_bits; \
665 if (flags & UNUSED_RETURN) \
666 dst = TMP_ZERO; \
667 }
668
emit_op_imm(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 dst,sljit_sw arg1,sljit_sw arg2)669 static sljit_s32 emit_op_imm(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 dst, sljit_sw arg1, sljit_sw arg2)
670 {
671 /* dst must be register, TMP_REG1
672 arg1 must be register, TMP_REG1, imm
673 arg2 must be register, TMP_REG2, imm */
674 sljit_ins inv_bits = (flags & INT_OP) ? W_OP : 0;
675 sljit_ins inst_bits;
676 sljit_s32 op = (flags & 0xffff);
677 sljit_s32 reg;
678 sljit_sw imm, nimm;
679
680 if (SLJIT_UNLIKELY((flags & (ARG1_IMM | ARG2_IMM)) == (ARG1_IMM | ARG2_IMM))) {
681 /* Both are immediates. */
682 flags &= ~ARG1_IMM;
683 if (arg1 == 0 && op != SLJIT_ADD && op != SLJIT_SUB)
684 arg1 = TMP_ZERO;
685 else {
686 FAIL_IF(load_immediate(compiler, TMP_REG1, arg1));
687 arg1 = TMP_REG1;
688 }
689 }
690
691 if (flags & (ARG1_IMM | ARG2_IMM)) {
692 reg = (sljit_s32)((flags & ARG2_IMM) ? arg1 : arg2);
693 imm = (flags & ARG2_IMM) ? arg2 : arg1;
694
695 switch (op) {
696 case SLJIT_MUL:
697 case SLJIT_CLZ:
698 case SLJIT_CTZ:
699 case SLJIT_REV:
700 case SLJIT_REV_U16:
701 case SLJIT_REV_S16:
702 case SLJIT_REV_U32:
703 case SLJIT_REV_S32:
704 case SLJIT_ADDC:
705 case SLJIT_SUBC:
706 /* No form with immediate operand (except imm 0, which
707 is represented by a ZERO register). */
708 break;
709 case SLJIT_MOV:
710 SLJIT_ASSERT(!(flags & SET_FLAGS) && (flags & ARG2_IMM) && arg1 == TMP_REG1);
711 return load_immediate(compiler, dst, imm);
712 case SLJIT_SUB:
713 compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
714 if (flags & ARG1_IMM)
715 break;
716 imm = -imm;
717 /* Fall through. */
718 case SLJIT_ADD:
719 if (op != SLJIT_SUB)
720 compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
721
722 if (imm == 0) {
723 CHECK_FLAGS(1 << 29);
724 return push_inst(compiler, ((op == SLJIT_ADD ? ADDI : SUBI) ^ inv_bits) | RD(dst) | RN(reg));
725 }
726 if (imm > 0 && imm <= 0xfff) {
727 CHECK_FLAGS(1 << 29);
728 return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | ((sljit_ins)imm << 10));
729 }
730 nimm = -imm;
731 if (nimm > 0 && nimm <= 0xfff) {
732 CHECK_FLAGS(1 << 29);
733 return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | ((sljit_ins)nimm << 10));
734 }
735 if (imm > 0 && imm <= 0xffffff && !(imm & 0xfff)) {
736 CHECK_FLAGS(1 << 29);
737 return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | (((sljit_ins)imm >> 12) << 10) | (1 << 22));
738 }
739 if (nimm > 0 && nimm <= 0xffffff && !(nimm & 0xfff)) {
740 CHECK_FLAGS(1 << 29);
741 return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | (((sljit_ins)nimm >> 12) << 10) | (1 << 22));
742 }
743 if (imm > 0 && imm <= 0xffffff && !(flags & SET_FLAGS)) {
744 FAIL_IF(push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(reg) | (((sljit_ins)imm >> 12) << 10) | (1 << 22)));
745 return push_inst(compiler, (ADDI ^ inv_bits) | RD(dst) | RN(dst) | (((sljit_ins)imm & 0xfff) << 10));
746 }
747 if (nimm > 0 && nimm <= 0xffffff && !(flags & SET_FLAGS)) {
748 FAIL_IF(push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(reg) | (((sljit_ins)nimm >> 12) << 10) | (1 << 22)));
749 return push_inst(compiler, (SUBI ^ inv_bits) | RD(dst) | RN(dst) | (((sljit_ins)nimm & 0xfff) << 10));
750 }
751 break;
752 case SLJIT_AND:
753 inst_bits = logical_imm(imm, LOGICAL_IMM_CHECK | ((flags & INT_OP) ? 16 : 32));
754 if (!inst_bits)
755 break;
756 CHECK_FLAGS(3 << 29);
757 return push_inst(compiler, (ANDI ^ inv_bits) | RD(dst) | RN(reg) | inst_bits);
758 case SLJIT_XOR:
759 if (imm == -1) {
760 FAIL_IF(push_inst(compiler, (ORN ^ inv_bits) | RD(dst) | RN(TMP_ZERO) | RM(reg)));
761 goto set_flags;
762 }
763 /* fallthrough */
764 case SLJIT_OR:
765 inst_bits = logical_imm(imm, LOGICAL_IMM_CHECK | ((flags & INT_OP) ? 16 : 32));
766 if (!inst_bits)
767 break;
768 if (op == SLJIT_OR)
769 inst_bits |= ORRI;
770 else
771 inst_bits |= EORI;
772 FAIL_IF(push_inst(compiler, (inst_bits ^ inv_bits) | RD(dst) | RN(reg)));
773 goto set_flags;
774 case SLJIT_SHL:
775 case SLJIT_MSHL:
776 if (flags & ARG1_IMM)
777 break;
778
779 if (flags & INT_OP) {
780 imm &= 0x1f;
781 inst_bits = (((sljit_ins)-imm & 0x1f) << 16) | ((31 - (sljit_ins)imm) << 10);
782 } else {
783 imm &= 0x3f;
784 inst_bits = ((sljit_ins)1 << 22) | (((sljit_ins)-imm & 0x3f) << 16) | ((63 - (sljit_ins)imm) << 10);
785 }
786
787 inv_bits |= inv_bits >> 9;
788 FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | inst_bits));
789 goto set_flags;
790 case SLJIT_LSHR:
791 case SLJIT_MLSHR:
792 case SLJIT_ASHR:
793 case SLJIT_MASHR:
794 if (flags & ARG1_IMM)
795 break;
796
797 inv_bits |= inv_bits >> 9;
798 if (op >= SLJIT_ASHR)
799 inv_bits |= 1 << 30;
800
801 if (flags & INT_OP) {
802 imm &= 0x1f;
803 inst_bits = ((sljit_ins)imm << 16) | (31 << 10);
804 } else {
805 imm &= 0x3f;
806 inst_bits = ((sljit_ins)1 << 22) | ((sljit_ins)imm << 16) | (63 << 10);
807 }
808
809 FAIL_IF(push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg1) | inst_bits));
810 goto set_flags;
811 case SLJIT_ROTL:
812 case SLJIT_ROTR:
813 if (flags & ARG1_IMM)
814 break;
815
816 if (op == SLJIT_ROTL)
817 imm = -imm;
818
819 imm &= (flags & INT_OP) ? 0x1f : 0x3f;
820 return push_inst(compiler, (EXTR ^ (inv_bits | (inv_bits >> 9))) | RD(dst) | RN(arg1) | RM(arg1) | ((sljit_ins)imm << 10));
821 default:
822 SLJIT_UNREACHABLE();
823 break;
824 }
825
826 if (flags & ARG2_IMM) {
827 if (arg2 == 0)
828 arg2 = TMP_ZERO;
829 else {
830 FAIL_IF(load_immediate(compiler, TMP_REG2, arg2));
831 arg2 = TMP_REG2;
832 }
833 }
834 else {
835 if (arg1 == 0)
836 arg1 = TMP_ZERO;
837 else {
838 FAIL_IF(load_immediate(compiler, TMP_REG1, arg1));
839 arg1 = TMP_REG1;
840 }
841 }
842 }
843
844 /* Both arguments are registers. */
845 switch (op) {
846 case SLJIT_MOV:
847 case SLJIT_MOV_P:
848 SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
849 if (dst == arg2)
850 return SLJIT_SUCCESS;
851 return push_inst(compiler, MOV | RD(dst) | RM(arg2));
852 case SLJIT_MOV_U8:
853 SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
854 inv_bits |= inv_bits >> 9;
855 return push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg2) | (7 << 10));
856 case SLJIT_MOV_S8:
857 SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
858 inv_bits |= inv_bits >> 9;
859 return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (7 << 10));
860 case SLJIT_MOV_U16:
861 SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
862 inv_bits |= inv_bits >> 9;
863 return push_inst(compiler, (UBFM ^ inv_bits) | RD(dst) | RN(arg2) | (15 << 10));
864 case SLJIT_MOV_S16:
865 SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
866 inv_bits |= inv_bits >> 9;
867 return push_inst(compiler, (SBFM ^ inv_bits) | RD(dst) | RN(arg2) | (15 << 10));
868 case SLJIT_MOV32:
869 SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
870 if (dst == arg2)
871 return SLJIT_SUCCESS;
872 /* fallthrough */
873 case SLJIT_MOV_U32:
874 SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
875 return push_inst(compiler, (MOV ^ W_OP) | RD(dst) | RM(arg2));
876 case SLJIT_MOV_S32:
877 SLJIT_ASSERT(!(flags & SET_FLAGS) && arg1 == TMP_REG1);
878 return push_inst(compiler, SBFM | (1 << 22) | RD(dst) | RN(arg2) | (31 << 10));
879 case SLJIT_CLZ:
880 SLJIT_ASSERT(arg1 == TMP_REG1);
881 return push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(arg2));
882 case SLJIT_CTZ:
883 SLJIT_ASSERT(arg1 == TMP_REG1);
884 FAIL_IF(push_inst(compiler, (RBIT ^ inv_bits) | RD(dst) | RN(arg2)));
885 return push_inst(compiler, (CLZ ^ inv_bits) | RD(dst) | RN(dst));
886 case SLJIT_REV:
887 SLJIT_ASSERT(arg1 == TMP_REG1);
888 inv_bits |= inv_bits >> 21;
889 return push_inst(compiler, (REV ^ inv_bits) | RD(dst) | RN(arg2));
890 case SLJIT_REV_U16:
891 case SLJIT_REV_S16:
892 SLJIT_ASSERT(arg1 == TMP_REG1 && dst != TMP_REG2);
893 FAIL_IF(push_inst(compiler, (REV16 ^ (sljit_ins)0x80000000) | RD(dst) | RN(arg2)));
894 if (dst == TMP_REG1 || (arg2 == TMP_REG2 && op == SLJIT_REV_U16))
895 return SLJIT_SUCCESS;
896 inv_bits |= inv_bits >> 9;
897 return push_inst(compiler, ((op == SLJIT_REV_U16 ? UBFM : SBFM) ^ inv_bits) | RD(dst) | RN(dst) | (15 << 10));
898 case SLJIT_REV_U32:
899 case SLJIT_REV_S32:
900 SLJIT_ASSERT(arg1 == TMP_REG1 && dst != TMP_REG2);
901 FAIL_IF(push_inst(compiler, (REV ^ (sljit_ins)0x80000400) | RD(dst) | RN(arg2)));
902 if (op == SLJIT_REV_U32 || dst == TMP_REG1)
903 return SLJIT_SUCCESS;
904 return push_inst(compiler, SBFM | (1 << 22) | RD(dst) | RN(dst) | (31 << 10));
905 case SLJIT_ADD:
906 compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
907 CHECK_FLAGS(1 << 29);
908 return push_inst(compiler, (ADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
909 case SLJIT_ADDC:
910 compiler->status_flags_state = SLJIT_CURRENT_FLAGS_ADD;
911 CHECK_FLAGS(1 << 29);
912 return push_inst(compiler, (ADC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
913 case SLJIT_SUB:
914 compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
915 CHECK_FLAGS(1 << 29);
916 return push_inst(compiler, (SUB ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
917 case SLJIT_SUBC:
918 compiler->status_flags_state = SLJIT_CURRENT_FLAGS_SUB;
919 CHECK_FLAGS(1 << 29);
920 return push_inst(compiler, (SBC ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
921 case SLJIT_MUL:
922 compiler->status_flags_state = 0;
923 if (!(flags & SET_FLAGS))
924 return push_inst(compiler, (MADD ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2) | RT2(TMP_ZERO));
925 if (flags & INT_OP) {
926 FAIL_IF(push_inst(compiler, SMADDL | RD(dst) | RN(arg1) | RM(arg2) | (31 << 10)));
927 FAIL_IF(push_inst(compiler, ADD | RD(TMP_LR) | RN(TMP_ZERO) | RM(dst) | (2 << 22) | (31 << 10)));
928 return push_inst(compiler, SUBS | RD(TMP_ZERO) | RN(TMP_LR) | RM(dst) | (2 << 22) | (63 << 10));
929 }
930 FAIL_IF(push_inst(compiler, SMULH | RD(TMP_LR) | RN(arg1) | RM(arg2)));
931 FAIL_IF(push_inst(compiler, MADD | RD(dst) | RN(arg1) | RM(arg2) | RT2(TMP_ZERO)));
932 return push_inst(compiler, SUBS | RD(TMP_ZERO) | RN(TMP_LR) | RM(dst) | (2 << 22) | (63 << 10));
933 case SLJIT_AND:
934 CHECK_FLAGS(3 << 29);
935 return push_inst(compiler, (AND ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
936 case SLJIT_OR:
937 FAIL_IF(push_inst(compiler, (ORR ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
938 break; /* Set flags. */
939 case SLJIT_XOR:
940 FAIL_IF(push_inst(compiler, (EOR ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
941 break; /* Set flags. */
942 case SLJIT_SHL:
943 case SLJIT_MSHL:
944 FAIL_IF(push_inst(compiler, (LSLV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
945 break; /* Set flags. */
946 case SLJIT_LSHR:
947 case SLJIT_MLSHR:
948 FAIL_IF(push_inst(compiler, (LSRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
949 break; /* Set flags. */
950 case SLJIT_ASHR:
951 case SLJIT_MASHR:
952 FAIL_IF(push_inst(compiler, (ASRV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2)));
953 break; /* Set flags. */
954 case SLJIT_ROTL:
955 FAIL_IF(push_inst(compiler, (SUB ^ inv_bits) | RD(TMP_REG2) | RN(TMP_ZERO) | RM(arg2)));
956 arg2 = TMP_REG2;
957 /* fallthrough */
958 case SLJIT_ROTR:
959 return push_inst(compiler, (RORV ^ inv_bits) | RD(dst) | RN(arg1) | RM(arg2));
960 default:
961 SLJIT_UNREACHABLE();
962 return SLJIT_SUCCESS;
963 }
964
965 set_flags:
966 if (flags & SET_FLAGS)
967 return push_inst(compiler, (SUBS ^ inv_bits) | RD(TMP_ZERO) | RN(dst) | RM(TMP_ZERO));
968 return SLJIT_SUCCESS;
969 }
970
971 #define STORE 0x10
972 #define SIGNED 0x20
973
974 #define BYTE_SIZE 0x0
975 #define HALF_SIZE 0x1
976 #define INT_SIZE 0x2
977 #define WORD_SIZE 0x3
978
979 #define MEM_SIZE_SHIFT(flags) ((sljit_ins)(flags) & 0x3)
980
emit_op_mem(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw,sljit_s32 tmp_reg)981 static sljit_s32 emit_op_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg,
982 sljit_s32 arg, sljit_sw argw, sljit_s32 tmp_reg)
983 {
984 sljit_u32 shift = MEM_SIZE_SHIFT(flags);
985 sljit_u32 type = (shift << 30);
986
987 if (!(flags & STORE))
988 type |= (flags & SIGNED) ? 0x00800000 : 0x00400000;
989
990 SLJIT_ASSERT(arg & SLJIT_MEM);
991
992 if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
993 argw &= 0x3;
994
995 if (argw == 0 || argw == shift)
996 return push_inst(compiler, STRB | type | RT(reg)
997 | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw ? (1 << 12) : 0));
998
999 FAIL_IF(push_inst(compiler, ADD | RD(tmp_reg) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | ((sljit_ins)argw << 10)));
1000 return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg));
1001 }
1002
1003 arg &= REG_MASK;
1004
1005 if (!arg) {
1006 FAIL_IF(load_immediate(compiler, tmp_reg, argw & ~(0xfff << shift)));
1007
1008 argw = (argw >> shift) & 0xfff;
1009
1010 return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | ((sljit_ins)argw << 10));
1011 }
1012
1013 if ((argw & ((1 << shift) - 1)) == 0) {
1014 if (argw >= 0) {
1015 if ((argw >> shift) <= 0xfff)
1016 return push_inst(compiler, STRBI | type | RT(reg) | RN(arg) | ((sljit_ins)argw << (10 - shift)));
1017
1018 if (argw <= 0xffffff) {
1019 FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)argw >> 12) << 10)));
1020
1021 argw = ((argw & 0xfff) >> shift);
1022 return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | ((sljit_ins)argw << 10));
1023 }
1024 } else if (argw < -256 && argw >= -0xfff000) {
1025 FAIL_IF(push_inst(compiler, SUBI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)(-argw + 0xfff) >> 12) << 10)));
1026 argw = ((0x1000 + argw) & 0xfff) >> shift;
1027 return push_inst(compiler, STRBI | type | RT(reg) | RN(tmp_reg) | ((sljit_ins)argw << 10));
1028 }
1029 }
1030
1031 if (argw <= 0xff && argw >= -0x100)
1032 return push_inst(compiler, STURBI | type | RT(reg) | RN(arg) | (((sljit_ins)argw & 0x1ff) << 12));
1033
1034 if (argw >= 0) {
1035 if (argw <= 0xfff0ff && ((argw + 0x100) & 0xfff) <= 0x1ff) {
1036 FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)argw >> 12) << 10)));
1037 return push_inst(compiler, STURBI | type | RT(reg) | RN(tmp_reg) | (((sljit_ins)argw & 0x1ff) << 12));
1038 }
1039 } else if (argw >= -0xfff100 && ((-argw + 0xff) & 0xfff) <= 0x1ff) {
1040 FAIL_IF(push_inst(compiler, SUBI | (1 << 22) | RD(tmp_reg) | RN(arg) | (((sljit_ins)-argw >> 12) << 10)));
1041 return push_inst(compiler, STURBI | type | RT(reg) | RN(tmp_reg) | (((sljit_ins)argw & 0x1ff) << 12));
1042 }
1043
1044 FAIL_IF(load_immediate(compiler, tmp_reg, argw));
1045
1046 return push_inst(compiler, STRB | type | RT(reg) | RN(arg) | RM(tmp_reg));
1047 }
1048
1049 /* --------------------------------------------------------------------- */
1050 /* Entry, exit */
1051 /* --------------------------------------------------------------------- */
1052
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1053 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
1054 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1055 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1056 {
1057 sljit_s32 prev, fprev, saved_regs_size, i, tmp;
1058 sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
1059 sljit_ins offs;
1060
1061 CHECK_ERROR();
1062 CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1063 set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1064
1065 saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - saved_arg_count, 2);
1066 saved_regs_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);
1067
1068 local_size = (local_size + saved_regs_size + 0xf) & ~0xf;
1069 compiler->local_size = local_size;
1070
1071 if (local_size <= 512) {
1072 FAIL_IF(push_inst(compiler, STP_PRE | RT(TMP_FP) | RT2(TMP_LR)
1073 | RN(SLJIT_SP) | (sljit_ins)((-(local_size >> 3) & 0x7f) << 15)));
1074 offs = (sljit_ins)(local_size - 2 * SSIZE_OF(sw)) << (15 - 3);
1075 local_size = 0;
1076 } else {
1077 saved_regs_size = ((saved_regs_size - 2 * SSIZE_OF(sw)) + 0xf) & ~0xf;
1078
1079 FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((sljit_ins)saved_regs_size << 10)));
1080 offs = (sljit_ins)(saved_regs_size - 2 * SSIZE_OF(sw)) << (15 - 3);
1081 local_size -= saved_regs_size;
1082 SLJIT_ASSERT(local_size > 0);
1083 }
1084
1085 prev = -1;
1086
1087 tmp = SLJIT_S0 - saveds;
1088 for (i = SLJIT_S0 - saved_arg_count; i > tmp; i--) {
1089 if (prev == -1) {
1090 prev = i;
1091 continue;
1092 }
1093 FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
1094 offs -= (sljit_ins)2 << 15;
1095 prev = -1;
1096 }
1097
1098 for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
1099 if (prev == -1) {
1100 prev = i;
1101 continue;
1102 }
1103 FAIL_IF(push_inst(compiler, STP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
1104 offs -= (sljit_ins)2 << 15;
1105 prev = -1;
1106 }
1107
1108 fprev = -1;
1109
1110 tmp = SLJIT_FS0 - fsaveds;
1111 for (i = SLJIT_FS0; i > tmp; i--) {
1112 if (fprev == -1) {
1113 fprev = i;
1114 continue;
1115 }
1116 FAIL_IF(push_inst(compiler, STP_F64 | VT(fprev) | VT2(i) | RN(SLJIT_SP) | offs));
1117 offs -= (sljit_ins)2 << 15;
1118 fprev = -1;
1119 }
1120
1121 for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
1122 if (fprev == -1) {
1123 fprev = i;
1124 continue;
1125 }
1126 FAIL_IF(push_inst(compiler, STP_F64 | VT(fprev) | VT2(i) | RN(SLJIT_SP) | offs));
1127 offs -= (sljit_ins)2 << 15;
1128 fprev = -1;
1129 }
1130
1131 if (fprev != -1)
1132 FAIL_IF(push_inst(compiler, STRI_F64 | VT(fprev) | RN(SLJIT_SP) | (offs >> 5) | (1 << 10)));
1133
1134 if (prev != -1)
1135 FAIL_IF(push_inst(compiler, STRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5) | ((fprev == -1) ? (1 << 10) : 0)));
1136
1137
1138 #ifdef _WIN32
1139 if (local_size > 4096)
1140 FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 10) | (1 << 22)));
1141 #endif /* _WIN32 */
1142
1143 if (!(options & SLJIT_ENTER_REG_ARG)) {
1144 arg_types >>= SLJIT_ARG_SHIFT;
1145 saved_arg_count = 0;
1146 tmp = SLJIT_R0;
1147
1148 while (arg_types) {
1149 if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
1150 if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
1151 FAIL_IF(push_inst(compiler, MOV | RD(SLJIT_S0 - saved_arg_count) | RM(tmp)));
1152 saved_arg_count++;
1153 }
1154 tmp++;
1155 }
1156 arg_types >>= SLJIT_ARG_SHIFT;
1157 }
1158 }
1159
1160 #ifdef _WIN32
1161 if (local_size > 4096) {
1162 if (local_size < 4 * 4096) {
1163 /* No need for a loop. */
1164
1165 if (local_size >= 2 * 4096) {
1166 if (local_size >= 3 * 4096) {
1167 FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(SLJIT_SP)));
1168 FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 10) | (1 << 22)));
1169 }
1170
1171 FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(SLJIT_SP)));
1172 FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 10) | (1 << 22)));
1173 }
1174 }
1175 else {
1176 FAIL_IF(push_inst(compiler, MOVZ | RD(TMP_REG1) | ((((sljit_ins)local_size >> 12) - 1) << 5)));
1177 FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(SLJIT_SP)));
1178 FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (1 << 10) | (1 << 22)));
1179 FAIL_IF(push_inst(compiler, SUBI | (1 << 29) | RD(TMP_REG1) | RN(TMP_REG1) | (1 << 10)));
1180 FAIL_IF(push_inst(compiler, B_CC | ((((sljit_ins) -3) & 0x7ffff) << 5) | 0x1 /* not-equal */));
1181 }
1182
1183 local_size &= 0xfff;
1184
1185 if (local_size > 0)
1186 FAIL_IF(push_inst(compiler, LDRI | RT(TMP_ZERO) | RN(SLJIT_SP)));
1187 else
1188 FAIL_IF(push_inst(compiler, STP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP)));
1189 }
1190
1191 if (local_size > 0) {
1192 if (local_size <= 512)
1193 FAIL_IF(push_inst(compiler, STP_PRE | RT(TMP_FP) | RT2(TMP_LR)
1194 | RN(SLJIT_SP) | (sljit_ins)((-(local_size >> 3) & 0x7f) << 15)));
1195 else {
1196 if (local_size >= 4096)
1197 local_size = (1 << (22 - 10));
1198
1199 FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((sljit_ins)local_size << 10)));
1200 FAIL_IF(push_inst(compiler, STP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP)));
1201 }
1202 }
1203
1204 #else /* !_WIN32 */
1205
1206 /* The local_size does not include saved registers size. */
1207 if (local_size != 0) {
1208 if (local_size > 0xfff) {
1209 FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | (((sljit_ins)local_size >> 12) << 10) | (1 << 22)));
1210 local_size &= 0xfff;
1211 }
1212
1213 if (local_size > 512 || local_size == 0) {
1214 if (local_size != 0)
1215 FAIL_IF(push_inst(compiler, SUBI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((sljit_ins)local_size << 10)));
1216
1217 FAIL_IF(push_inst(compiler, STP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP)));
1218 } else
1219 FAIL_IF(push_inst(compiler, STP_PRE | RT(TMP_FP) | RT2(TMP_LR)
1220 | RN(SLJIT_SP) | (sljit_ins)((-(local_size >> 3) & 0x7f) << 15)));
1221 }
1222
1223 #endif /* _WIN32 */
1224
1225 return push_inst(compiler, ADDI | RD(TMP_FP) | RN(SLJIT_SP) | (0 << 10));
1226 }
1227
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1228 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
1229 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1230 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1231 {
1232 sljit_s32 saved_regs_size;
1233
1234 CHECK_ERROR();
1235 CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1236 set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1237
1238 saved_regs_size = GET_SAVED_REGISTERS_SIZE(scratches, saveds - SLJIT_KEPT_SAVEDS_COUNT(options), 2);
1239 saved_regs_size += GET_SAVED_FLOAT_REGISTERS_SIZE(fscratches, fsaveds, f64);
1240
1241 compiler->local_size = (local_size + saved_regs_size + 0xf) & ~0xf;
1242 return SLJIT_SUCCESS;
1243 }
1244
emit_stack_frame_release(struct sljit_compiler * compiler,sljit_s32 is_return_to)1245 static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_s32 is_return_to)
1246 {
1247 sljit_s32 local_size, prev, fprev, i, tmp;
1248 sljit_ins offs;
1249
1250 local_size = compiler->local_size;
1251
1252 if (!is_return_to) {
1253 if (local_size > 512 && local_size <= 512 + 496) {
1254 FAIL_IF(push_inst(compiler, LDP_POST | RT(TMP_FP) | RT2(TMP_LR)
1255 | RN(SLJIT_SP) | ((sljit_ins)(local_size - 512) << (15 - 3))));
1256 local_size = 512;
1257 } else
1258 FAIL_IF(push_inst(compiler, LDP | RT(TMP_FP) | RT2(TMP_LR) | RN(SLJIT_SP)));
1259 } else {
1260 if (local_size > 512 && local_size <= 512 + 248) {
1261 FAIL_IF(push_inst(compiler, LDRI_POST | RT(TMP_FP) | RN(SLJIT_SP) | ((sljit_ins)(local_size - 512) << 12)));
1262 local_size = 512;
1263 } else
1264 FAIL_IF(push_inst(compiler, LDRI | RT(TMP_FP) | RN(SLJIT_SP) | 0));
1265 }
1266
1267 if (local_size > 512) {
1268 local_size -= 512;
1269 if (local_size > 0xfff) {
1270 FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP)
1271 | (((sljit_ins)local_size >> 12) << 10) | (1 << 22)));
1272 local_size &= 0xfff;
1273 }
1274
1275 FAIL_IF(push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | ((sljit_ins)local_size << 10)));
1276 local_size = 512;
1277 }
1278
1279 offs = (sljit_ins)(local_size - 2 * SSIZE_OF(sw)) << (15 - 3);
1280 prev = -1;
1281
1282 tmp = SLJIT_S0 - compiler->saveds;
1283 for (i = SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options); i > tmp; i--) {
1284 if (prev == -1) {
1285 prev = i;
1286 continue;
1287 }
1288 FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
1289 offs -= (sljit_ins)2 << 15;
1290 prev = -1;
1291 }
1292
1293 for (i = compiler->scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
1294 if (prev == -1) {
1295 prev = i;
1296 continue;
1297 }
1298 FAIL_IF(push_inst(compiler, LDP | RT(prev) | RT2(i) | RN(SLJIT_SP) | offs));
1299 offs -= (sljit_ins)2 << 15;
1300 prev = -1;
1301 }
1302
1303 fprev = -1;
1304
1305 tmp = SLJIT_FS0 - compiler->fsaveds;
1306 for (i = SLJIT_FS0; i > tmp; i--) {
1307 if (fprev == -1) {
1308 fprev = i;
1309 continue;
1310 }
1311 FAIL_IF(push_inst(compiler, LDP_F64 | VT(fprev) | VT2(i) | RN(SLJIT_SP) | offs));
1312 offs -= (sljit_ins)2 << 15;
1313 fprev = -1;
1314 }
1315
1316 for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
1317 if (fprev == -1) {
1318 fprev = i;
1319 continue;
1320 }
1321 FAIL_IF(push_inst(compiler, LDP_F64 | VT(fprev) | VT2(i) | RN(SLJIT_SP) | offs));
1322 offs -= (sljit_ins)2 << 15;
1323 fprev = -1;
1324 }
1325
1326 if (fprev != -1)
1327 FAIL_IF(push_inst(compiler, LDRI_F64 | VT(fprev) | RN(SLJIT_SP) | (offs >> 5) | (1 << 10)));
1328
1329 if (prev != -1)
1330 FAIL_IF(push_inst(compiler, LDRI | RT(prev) | RN(SLJIT_SP) | (offs >> 5) | ((fprev == -1) ? (1 << 10) : 0)));
1331
1332 /* This and the next call/jump instruction can be executed parallelly. */
1333 return push_inst(compiler, ADDI | RD(SLJIT_SP) | RN(SLJIT_SP) | (sljit_ins)(local_size << 10));
1334 }
1335
sljit_emit_return_void(struct sljit_compiler * compiler)1336 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
1337 {
1338 CHECK_ERROR();
1339 CHECK(check_sljit_emit_return_void(compiler));
1340
1341 FAIL_IF(emit_stack_frame_release(compiler, 0));
1342
1343 return push_inst(compiler, RET | RN(TMP_LR));
1344 }
1345
sljit_emit_return_to(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)1346 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
1347 sljit_s32 src, sljit_sw srcw)
1348 {
1349 CHECK_ERROR();
1350 CHECK(check_sljit_emit_return_to(compiler, src, srcw));
1351
1352 if (src & SLJIT_MEM) {
1353 ADJUST_LOCAL_OFFSET(src, srcw);
1354 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
1355 src = TMP_REG1;
1356 srcw = 0;
1357 } else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
1358 FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src)));
1359 src = TMP_REG1;
1360 srcw = 0;
1361 }
1362
1363 FAIL_IF(emit_stack_frame_release(compiler, 1));
1364
1365 SLJIT_SKIP_CHECKS(compiler);
1366 return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1367 }
1368
1369 /* --------------------------------------------------------------------- */
1370 /* Operators */
1371 /* --------------------------------------------------------------------- */
1372
sljit_emit_op0(struct sljit_compiler * compiler,sljit_s32 op)1373 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
1374 {
1375 sljit_ins inv_bits = (op & SLJIT_32) ? W_OP : 0;
1376
1377 CHECK_ERROR();
1378 CHECK(check_sljit_emit_op0(compiler, op));
1379
1380 op = GET_OPCODE(op);
1381 switch (op) {
1382 case SLJIT_BREAKPOINT:
1383 return push_inst(compiler, BRK);
1384 case SLJIT_NOP:
1385 return push_inst(compiler, NOP);
1386 case SLJIT_LMUL_UW:
1387 case SLJIT_LMUL_SW:
1388 FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(SLJIT_R0)));
1389 FAIL_IF(push_inst(compiler, MADD | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO)));
1390 return push_inst(compiler, (op == SLJIT_LMUL_UW ? UMULH : SMULH) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1));
1391 case SLJIT_DIVMOD_UW:
1392 case SLJIT_DIVMOD_SW:
1393 FAIL_IF(push_inst(compiler, (MOV ^ inv_bits) | RD(TMP_REG1) | RM(SLJIT_R0)));
1394 FAIL_IF(push_inst(compiler, ((op == SLJIT_DIVMOD_UW ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1)));
1395 FAIL_IF(push_inst(compiler, (MADD ^ inv_bits) | RD(SLJIT_R1) | RN(SLJIT_R0) | RM(SLJIT_R1) | RT2(TMP_ZERO)));
1396 return push_inst(compiler, (SUB ^ inv_bits) | RD(SLJIT_R1) | RN(TMP_REG1) | RM(SLJIT_R1));
1397 case SLJIT_DIV_UW:
1398 case SLJIT_DIV_SW:
1399 return push_inst(compiler, ((op == SLJIT_DIV_UW ? UDIV : SDIV) ^ inv_bits) | RD(SLJIT_R0) | RN(SLJIT_R0) | RM(SLJIT_R1));
1400 case SLJIT_ENDBR:
1401 case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
1402 return SLJIT_SUCCESS;
1403 }
1404
1405 return SLJIT_SUCCESS;
1406 }
1407
sljit_emit_op1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1408 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1409 sljit_s32 dst, sljit_sw dstw,
1410 sljit_s32 src, sljit_sw srcw)
1411 {
1412 sljit_s32 dst_r, flags, mem_flags;
1413 sljit_s32 op_flags = GET_ALL_FLAGS(op);
1414
1415 CHECK_ERROR();
1416 CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1417 ADJUST_LOCAL_OFFSET(dst, dstw);
1418 ADJUST_LOCAL_OFFSET(src, srcw);
1419
1420 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1421
1422 op = GET_OPCODE(op);
1423 if (op >= SLJIT_MOV && op <= SLJIT_MOV_P) {
1424 /* Both operands are registers. */
1425 if (dst_r != TMP_REG1 && FAST_IS_REG(src))
1426 return emit_op_imm(compiler, op | ((op_flags & SLJIT_32) ? INT_OP : 0), dst_r, TMP_REG1, src);
1427
1428 switch (op) {
1429 case SLJIT_MOV:
1430 case SLJIT_MOV_P:
1431 mem_flags = WORD_SIZE;
1432 break;
1433 case SLJIT_MOV_U8:
1434 mem_flags = BYTE_SIZE;
1435 if (src == SLJIT_IMM)
1436 srcw = (sljit_u8)srcw;
1437 break;
1438 case SLJIT_MOV_S8:
1439 mem_flags = BYTE_SIZE | SIGNED;
1440 if (src == SLJIT_IMM)
1441 srcw = (sljit_s8)srcw;
1442 break;
1443 case SLJIT_MOV_U16:
1444 mem_flags = HALF_SIZE;
1445 if (src == SLJIT_IMM)
1446 srcw = (sljit_u16)srcw;
1447 break;
1448 case SLJIT_MOV_S16:
1449 mem_flags = HALF_SIZE | SIGNED;
1450 if (src == SLJIT_IMM)
1451 srcw = (sljit_s16)srcw;
1452 break;
1453 case SLJIT_MOV_U32:
1454 mem_flags = INT_SIZE;
1455 if (src == SLJIT_IMM)
1456 srcw = (sljit_u32)srcw;
1457 break;
1458 case SLJIT_MOV_S32:
1459 case SLJIT_MOV32:
1460 mem_flags = INT_SIZE | SIGNED;
1461 if (src == SLJIT_IMM)
1462 srcw = (sljit_s32)srcw;
1463 break;
1464 default:
1465 SLJIT_UNREACHABLE();
1466 mem_flags = 0;
1467 break;
1468 }
1469
1470 if (src == SLJIT_IMM)
1471 FAIL_IF(emit_op_imm(compiler, SLJIT_MOV | ARG2_IMM, dst_r, TMP_REG1, srcw));
1472 else if (!(src & SLJIT_MEM))
1473 dst_r = src;
1474 else
1475 FAIL_IF(emit_op_mem(compiler, mem_flags, dst_r, src, srcw, TMP_REG1));
1476
1477 if (dst & SLJIT_MEM)
1478 return emit_op_mem(compiler, mem_flags | STORE, dst_r, dst, dstw, TMP_REG2);
1479 return SLJIT_SUCCESS;
1480 }
1481
1482 flags = HAS_FLAGS(op_flags) ? SET_FLAGS : 0;
1483
1484 switch (op) {
1485 case SLJIT_REV_U16:
1486 case SLJIT_REV_S16:
1487 mem_flags = HALF_SIZE;
1488 break;
1489 case SLJIT_REV_U32:
1490 case SLJIT_REV_S32:
1491 mem_flags = INT_SIZE;
1492 break;
1493 default:
1494 mem_flags = WORD_SIZE;
1495
1496 if (op_flags & SLJIT_32) {
1497 flags |= INT_OP;
1498 mem_flags = INT_SIZE;
1499 }
1500 break;
1501 }
1502
1503 if (src & SLJIT_MEM) {
1504 FAIL_IF(emit_op_mem(compiler, mem_flags, TMP_REG2, src, srcw, TMP_REG2));
1505 src = TMP_REG2;
1506 }
1507
1508 emit_op_imm(compiler, flags | op, dst_r, TMP_REG1, src);
1509
1510 if (SLJIT_UNLIKELY(dst & SLJIT_MEM))
1511 return emit_op_mem(compiler, mem_flags | STORE, dst_r, dst, dstw, TMP_REG2);
1512 return SLJIT_SUCCESS;
1513 }
1514
sljit_emit_op2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1515 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
1516 sljit_s32 dst, sljit_sw dstw,
1517 sljit_s32 src1, sljit_sw src1w,
1518 sljit_s32 src2, sljit_sw src2w)
1519 {
1520 sljit_s32 dst_r, flags, mem_flags;
1521
1522 CHECK_ERROR();
1523 CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
1524 ADJUST_LOCAL_OFFSET(dst, dstw);
1525 ADJUST_LOCAL_OFFSET(src1, src1w);
1526 ADJUST_LOCAL_OFFSET(src2, src2w);
1527
1528 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1529 flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
1530 mem_flags = WORD_SIZE;
1531
1532 if (op & SLJIT_32) {
1533 flags |= INT_OP;
1534 mem_flags = INT_SIZE;
1535 }
1536
1537 if (dst == TMP_REG1)
1538 flags |= UNUSED_RETURN;
1539
1540 if (src1 & SLJIT_MEM) {
1541 FAIL_IF(emit_op_mem(compiler, mem_flags, TMP_REG1, src1, src1w, TMP_REG1));
1542 src1 = TMP_REG1;
1543 }
1544
1545 if (src2 & SLJIT_MEM) {
1546 FAIL_IF(emit_op_mem(compiler, mem_flags, TMP_REG2, src2, src2w, TMP_REG2));
1547 src2 = TMP_REG2;
1548 }
1549
1550 if (src1 == SLJIT_IMM)
1551 flags |= ARG1_IMM;
1552 else
1553 src1w = src1;
1554
1555 if (src2 == SLJIT_IMM)
1556 flags |= ARG2_IMM;
1557 else
1558 src2w = src2;
1559
1560 emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src1w, src2w);
1561
1562 if (dst & SLJIT_MEM)
1563 return emit_op_mem(compiler, mem_flags | STORE, dst_r, dst, dstw, TMP_REG2);
1564 return SLJIT_SUCCESS;
1565 }
1566
sljit_emit_op2u(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1567 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
1568 sljit_s32 src1, sljit_sw src1w,
1569 sljit_s32 src2, sljit_sw src2w)
1570 {
1571 CHECK_ERROR();
1572 CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
1573
1574 SLJIT_SKIP_CHECKS(compiler);
1575 return sljit_emit_op2(compiler, op, TMP_REG1, 0, src1, src1w, src2, src2w);
1576 }
1577
sljit_emit_shift_into(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 src1_reg,sljit_s32 src2_reg,sljit_s32 src3,sljit_sw src3w)1578 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
1579 sljit_s32 dst_reg,
1580 sljit_s32 src1_reg,
1581 sljit_s32 src2_reg,
1582 sljit_s32 src3, sljit_sw src3w)
1583 {
1584 sljit_ins inv_bits, imm;
1585 sljit_s32 is_left;
1586 sljit_sw mask;
1587
1588 CHECK_ERROR();
1589 CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
1590
1591 is_left = (GET_OPCODE(op) == SLJIT_SHL || GET_OPCODE(op) == SLJIT_MSHL);
1592
1593 if (src1_reg == src2_reg) {
1594 SLJIT_SKIP_CHECKS(compiler);
1595 return sljit_emit_op2(compiler, (is_left ? SLJIT_ROTL : SLJIT_ROTR) | (op & SLJIT_32), dst_reg, 0, src1_reg, 0, src3, src3w);
1596 }
1597
1598 ADJUST_LOCAL_OFFSET(src3, src3w);
1599
1600 inv_bits = (op & SLJIT_32) ? W_OP : 0;
1601
1602 if (src3 == SLJIT_IMM) {
1603 mask = inv_bits ? 0x1f : 0x3f;
1604 src3w &= mask;
1605
1606 if (src3w == 0)
1607 return SLJIT_SUCCESS;
1608
1609 if (is_left)
1610 src3w = (src3w ^ mask) + 1;
1611
1612 return push_inst(compiler, (EXTR ^ (inv_bits | (inv_bits >> 9))) | RD(dst_reg)
1613 | RN(is_left ? src1_reg : src2_reg) | RM(is_left ? src2_reg : src1_reg) | ((sljit_ins)src3w << 10));
1614 }
1615
1616 if (src3 & SLJIT_MEM) {
1617 FAIL_IF(emit_op_mem(compiler, inv_bits ? INT_SIZE : WORD_SIZE, TMP_REG2, src3, src3w, TMP_REG2));
1618 src3 = TMP_REG2;
1619 } else if (dst_reg == src3) {
1620 FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG2) | RM(src3)));
1621 src3 = TMP_REG2;
1622 }
1623
1624 FAIL_IF(push_inst(compiler, ((is_left ? LSLV : LSRV) ^ inv_bits) | RD(dst_reg) | RN(src1_reg) | RM(src3)));
1625
1626 if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) {
1627 /* Shift left/right by 1. */
1628 if (is_left)
1629 imm = (sljit_ins)(inv_bits ? ((1 << 16) | (31 << 10)) : ((1 << 16) | (63 << 10) | (1 << 22)));
1630 else
1631 imm = (sljit_ins)(inv_bits ? ((31 << 16) | (30 << 10)) : ((63 << 16) | (62 << 10) | (1 << 22)));
1632
1633 FAIL_IF(push_inst(compiler, (UBFM ^ (inv_bits | (inv_bits >> 9))) | RD(TMP_REG1) | RN(src2_reg) | imm));
1634
1635 /* Set imm to mask. */
1636 imm = (sljit_ins)(inv_bits ? (4 << 10) : ((5 << 10) | (1 << 22)));
1637 FAIL_IF(push_inst(compiler, (EORI ^ inv_bits) | RD(TMP_REG2) | RN(src3) | imm));
1638
1639 src2_reg = TMP_REG1;
1640 } else
1641 FAIL_IF(push_inst(compiler, (SUB ^ inv_bits) | RD(TMP_REG2) | RN(TMP_ZERO) | RM(src3)));
1642
1643 FAIL_IF(push_inst(compiler, ((is_left ? LSRV : LSLV) ^ inv_bits) | RD(TMP_REG1) | RN(src2_reg) | RM(TMP_REG2)));
1644 return push_inst(compiler, (ORR ^ inv_bits) | RD(dst_reg) | RN(dst_reg) | RM(TMP_REG1));
1645 }
1646
sljit_emit_op_src(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)1647 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
1648 sljit_s32 src, sljit_sw srcw)
1649 {
1650 CHECK_ERROR();
1651 CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
1652 ADJUST_LOCAL_OFFSET(src, srcw);
1653
1654 switch (op) {
1655 case SLJIT_FAST_RETURN:
1656 if (FAST_IS_REG(src))
1657 FAIL_IF(push_inst(compiler, MOV | RD(TMP_LR) | RM(src)));
1658 else
1659 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_LR, src, srcw, TMP_REG1));
1660
1661 return push_inst(compiler, RET | RN(TMP_LR));
1662 case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
1663 return SLJIT_SUCCESS;
1664 case SLJIT_PREFETCH_L1:
1665 case SLJIT_PREFETCH_L2:
1666 case SLJIT_PREFETCH_L3:
1667 case SLJIT_PREFETCH_ONCE:
1668 SLJIT_ASSERT(reg_map[1] == 0 && reg_map[3] == 2 && reg_map[5] == 4);
1669
1670 /* The reg_map[op] should provide the appropriate constant. */
1671 if (op == SLJIT_PREFETCH_L1)
1672 op = 1;
1673 else if (op == SLJIT_PREFETCH_L2)
1674 op = 3;
1675 else if (op == SLJIT_PREFETCH_L3)
1676 op = 5;
1677 else
1678 op = 2;
1679
1680 /* Signed word sized load is the prefetch instruction. */
1681 return emit_op_mem(compiler, WORD_SIZE | SIGNED, op, src, srcw, TMP_REG1);
1682 }
1683
1684 return SLJIT_SUCCESS;
1685 }
1686
sljit_emit_op_dst(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw)1687 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
1688 sljit_s32 dst, sljit_sw dstw)
1689 {
1690 sljit_s32 dst_r = TMP_LR;
1691
1692 CHECK_ERROR();
1693 CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
1694 ADJUST_LOCAL_OFFSET(dst, dstw);
1695
1696 switch (op) {
1697 case SLJIT_FAST_ENTER:
1698 if (FAST_IS_REG(dst))
1699 return push_inst(compiler, MOV | RD(dst) | RM(TMP_LR));
1700 break;
1701 case SLJIT_GET_RETURN_ADDRESS:
1702 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1703 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, dst_r, SLJIT_MEM1(SLJIT_SP), 0x8, TMP_REG2));
1704 break;
1705 }
1706
1707 if (dst & SLJIT_MEM)
1708 return emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2);
1709
1710 return SLJIT_SUCCESS;
1711 }
1712
sljit_get_register_index(sljit_s32 type,sljit_s32 reg)1713 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
1714 {
1715 CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
1716
1717 if (type == SLJIT_GP_REGISTER)
1718 return reg_map[reg];
1719
1720 if (type != SLJIT_FLOAT_REGISTER && type != SLJIT_SIMD_REG_64 && type != SLJIT_SIMD_REG_128)
1721 return -1;
1722
1723 return freg_map[reg];
1724 }
1725
sljit_emit_op_custom(struct sljit_compiler * compiler,void * instruction,sljit_u32 size)1726 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
1727 void *instruction, sljit_u32 size)
1728 {
1729 SLJIT_UNUSED_ARG(size);
1730 CHECK_ERROR();
1731 CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
1732
1733 return push_inst(compiler, *(sljit_ins*)instruction);
1734 }
1735
1736 /* --------------------------------------------------------------------- */
1737 /* Floating point operators */
1738 /* --------------------------------------------------------------------- */
1739
emit_fop_mem(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw)1740 static sljit_s32 emit_fop_mem(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
1741 {
1742 sljit_u32 shift = MEM_SIZE_SHIFT(flags);
1743 sljit_ins type = (shift << 30);
1744
1745 SLJIT_ASSERT(arg & SLJIT_MEM);
1746
1747 if (!(flags & STORE))
1748 type |= 0x00400000;
1749
1750 if (arg & OFFS_REG_MASK) {
1751 argw &= 3;
1752 if (argw == 0 || argw == shift)
1753 return push_inst(compiler, STR_FR | type | VT(reg)
1754 | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | (argw ? (1 << 12) : 0));
1755
1756 FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(arg & REG_MASK) | RM(OFFS_REG(arg)) | ((sljit_ins)argw << 10)));
1757 return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG1));
1758 }
1759
1760 arg &= REG_MASK;
1761
1762 if (!arg) {
1763 FAIL_IF(load_immediate(compiler, TMP_REG1, argw & ~(0xfff << shift)));
1764
1765 argw = (argw >> shift) & 0xfff;
1766
1767 return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG1) | ((sljit_ins)argw << 10));
1768 }
1769
1770 if (argw >= 0 && (argw & ((1 << shift) - 1)) == 0) {
1771 if ((argw >> shift) <= 0xfff)
1772 return push_inst(compiler, STR_FI | type | VT(reg) | RN(arg) | ((sljit_ins)argw << (10 - shift)));
1773
1774 if (argw <= 0xffffff) {
1775 FAIL_IF(push_inst(compiler, ADDI | (1 << 22) | RD(TMP_REG1) | RN(arg) | (((sljit_ins)argw >> 12) << 10)));
1776
1777 argw = ((argw & 0xfff) >> shift);
1778 return push_inst(compiler, STR_FI | type | VT(reg) | RN(TMP_REG1) | ((sljit_ins)argw << 10));
1779 }
1780 }
1781
1782 if (argw <= 255 && argw >= -256)
1783 return push_inst(compiler, STUR_FI | type | VT(reg) | RN(arg) | (((sljit_ins)argw & 0x1ff) << 12));
1784
1785 FAIL_IF(load_immediate(compiler, TMP_REG1, argw));
1786 return push_inst(compiler, STR_FR | type | VT(reg) | RN(arg) | RM(TMP_REG1));
1787 }
1788
sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1789 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
1790 sljit_s32 dst, sljit_sw dstw,
1791 sljit_s32 src, sljit_sw srcw)
1792 {
1793 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
1794 sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0;
1795
1796 if (GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64)
1797 inv_bits |= W_OP;
1798
1799 if (src & SLJIT_MEM) {
1800 FAIL_IF(emit_fop_mem(compiler, (op & SLJIT_32) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src, srcw));
1801 src = TMP_FREG1;
1802 }
1803
1804 FAIL_IF(push_inst(compiler, (FCVTZS ^ inv_bits) | RD(dst_r) | VN(src)));
1805
1806 if (dst & SLJIT_MEM)
1807 return emit_op_mem(compiler, ((GET_OPCODE(op) == SLJIT_CONV_S32_FROM_F64) ? INT_SIZE : WORD_SIZE) | STORE, TMP_REG1, dst, dstw, TMP_REG2);
1808 return SLJIT_SUCCESS;
1809 }
1810
sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1811 static sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_ins ins,
1812 sljit_s32 dst, sljit_sw dstw,
1813 sljit_s32 src, sljit_sw srcw)
1814 {
1815 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
1816
1817 if (src & SLJIT_MEM) {
1818 emit_op_mem(compiler, (ins & W_OP) ? WORD_SIZE : INT_SIZE, TMP_REG1, src, srcw, TMP_REG1);
1819 src = TMP_REG1;
1820 } else if (src == SLJIT_IMM) {
1821 FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
1822 src = TMP_REG1;
1823 }
1824
1825 FAIL_IF(push_inst(compiler, ins | VD(dst_r) | RN(src)));
1826
1827 if (dst & SLJIT_MEM)
1828 return emit_fop_mem(compiler, ((ins & (1 << 22)) ? WORD_SIZE : INT_SIZE) | STORE, TMP_FREG1, dst, dstw);
1829 return SLJIT_SUCCESS;
1830 }
1831
sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1832 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
1833 sljit_s32 dst, sljit_sw dstw,
1834 sljit_s32 src, sljit_sw srcw)
1835 {
1836 sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0;
1837
1838 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) {
1839 inv_bits |= W_OP;
1840
1841 if (src == SLJIT_IMM)
1842 srcw = (sljit_s32)srcw;
1843 }
1844
1845 return sljit_emit_fop1_conv_f64_from_w(compiler, SCVTF ^ inv_bits, dst, dstw, src, srcw);
1846 }
1847
sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1848 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
1849 sljit_s32 dst, sljit_sw dstw,
1850 sljit_s32 src, sljit_sw srcw)
1851 {
1852 sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0;
1853
1854 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32) {
1855 inv_bits |= W_OP;
1856
1857 if (src == SLJIT_IMM)
1858 srcw = (sljit_u32)srcw;
1859 }
1860
1861 return sljit_emit_fop1_conv_f64_from_w(compiler, UCVTF ^ inv_bits, dst, dstw, src, srcw);
1862 }
1863
sljit_emit_fop1_cmp(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1864 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
1865 sljit_s32 src1, sljit_sw src1w,
1866 sljit_s32 src2, sljit_sw src2w)
1867 {
1868 sljit_s32 mem_flags = (op & SLJIT_32) ? INT_SIZE : WORD_SIZE;
1869 sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0;
1870
1871 if (src1 & SLJIT_MEM) {
1872 FAIL_IF(emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w));
1873 src1 = TMP_FREG1;
1874 }
1875
1876 if (src2 & SLJIT_MEM) {
1877 FAIL_IF(emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w));
1878 src2 = TMP_FREG2;
1879 }
1880
1881 FAIL_IF(push_inst(compiler, (FCMP ^ inv_bits) | VN(src1) | VM(src2)));
1882
1883 if (GET_FLAG_TYPE(op) != SLJIT_UNORDERED_OR_EQUAL)
1884 return SLJIT_SUCCESS;
1885
1886 FAIL_IF(push_inst(compiler, CSINC | (0x0 << 12) | RD(TMP_REG1) | RN(TMP_ZERO) | RM(TMP_ZERO)));
1887 return push_inst(compiler, CCMPI | (0x0 << 16) | (0x7 << 12) | RN(TMP_REG1) | 0x4);
1888 }
1889
sljit_emit_fop1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1890 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
1891 sljit_s32 dst, sljit_sw dstw,
1892 sljit_s32 src, sljit_sw srcw)
1893 {
1894 sljit_s32 dst_r, mem_flags = (op & SLJIT_32) ? INT_SIZE : WORD_SIZE;
1895 sljit_ins inv_bits;
1896
1897 CHECK_ERROR();
1898
1899 SLJIT_COMPILE_ASSERT((INT_SIZE ^ 0x1) == WORD_SIZE, must_be_one_bit_difference);
1900 SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
1901
1902 inv_bits = (op & SLJIT_32) ? (1 << 22) : 0;
1903 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
1904
1905 if (src & SLJIT_MEM) {
1906 FAIL_IF(emit_fop_mem(compiler, (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32) ? (mem_flags ^ 0x1) : mem_flags, dst_r, src, srcw));
1907 src = dst_r;
1908 }
1909
1910 switch (GET_OPCODE(op)) {
1911 case SLJIT_MOV_F64:
1912 if (src != dst_r) {
1913 if (dst_r != TMP_FREG1)
1914 FAIL_IF(push_inst(compiler, (FMOV ^ inv_bits) | VD(dst_r) | VN(src)));
1915 else
1916 dst_r = src;
1917 }
1918 break;
1919 case SLJIT_NEG_F64:
1920 FAIL_IF(push_inst(compiler, (FNEG ^ inv_bits) | VD(dst_r) | VN(src)));
1921 break;
1922 case SLJIT_ABS_F64:
1923 FAIL_IF(push_inst(compiler, (FABS ^ inv_bits) | VD(dst_r) | VN(src)));
1924 break;
1925 case SLJIT_CONV_F64_FROM_F32:
1926 FAIL_IF(push_inst(compiler, FCVT | (sljit_ins)((op & SLJIT_32) ? (1 << 22) : (1 << 15)) | VD(dst_r) | VN(src)));
1927 break;
1928 }
1929
1930 if (dst & SLJIT_MEM)
1931 return emit_fop_mem(compiler, mem_flags | STORE, dst_r, dst, dstw);
1932 return SLJIT_SUCCESS;
1933 }
1934
sljit_emit_fop2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1935 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
1936 sljit_s32 dst, sljit_sw dstw,
1937 sljit_s32 src1, sljit_sw src1w,
1938 sljit_s32 src2, sljit_sw src2w)
1939 {
1940 sljit_s32 dst_r, mem_flags = (op & SLJIT_32) ? INT_SIZE : WORD_SIZE;
1941 sljit_ins inv_bits = (op & SLJIT_32) ? (1 << 22) : 0;
1942
1943 CHECK_ERROR();
1944 CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
1945 ADJUST_LOCAL_OFFSET(dst, dstw);
1946 ADJUST_LOCAL_OFFSET(src1, src1w);
1947 ADJUST_LOCAL_OFFSET(src2, src2w);
1948
1949 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
1950 if (src1 & SLJIT_MEM) {
1951 FAIL_IF(emit_fop_mem(compiler, mem_flags, TMP_FREG1, src1, src1w));
1952 src1 = TMP_FREG1;
1953 }
1954 if (src2 & SLJIT_MEM) {
1955 FAIL_IF(emit_fop_mem(compiler, mem_flags, TMP_FREG2, src2, src2w));
1956 src2 = TMP_FREG2;
1957 }
1958
1959 switch (GET_OPCODE(op)) {
1960 case SLJIT_ADD_F64:
1961 FAIL_IF(push_inst(compiler, (FADD ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2)));
1962 break;
1963 case SLJIT_SUB_F64:
1964 FAIL_IF(push_inst(compiler, (FSUB ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2)));
1965 break;
1966 case SLJIT_MUL_F64:
1967 FAIL_IF(push_inst(compiler, (FMUL ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2)));
1968 break;
1969 case SLJIT_DIV_F64:
1970 FAIL_IF(push_inst(compiler, (FDIV ^ inv_bits) | VD(dst_r) | VN(src1) | VM(src2)));
1971 break;
1972 case SLJIT_COPYSIGN_F64:
1973 FAIL_IF(push_inst(compiler, (FMOV_R ^ ((op & SLJIT_32) ? (W_OP | (1 << 22)) : 0)) | VN(src2) | RD(TMP_REG1)));
1974 FAIL_IF(push_inst(compiler, (FABS ^ inv_bits) | VD(dst_r) | VN(src1)));
1975 FAIL_IF(push_inst(compiler, TBZ | ((op & SLJIT_32) ? 0 : ((sljit_ins)1 << 31)) | (0x1f << 19) | (2 << 5) | RT(TMP_REG1)));
1976 return push_inst(compiler, (FNEG ^ inv_bits) | VD(dst_r) | VN(dst_r));
1977 }
1978
1979 if (!(dst & SLJIT_MEM))
1980 return SLJIT_SUCCESS;
1981 return emit_fop_mem(compiler, mem_flags | STORE, TMP_FREG1, dst, dstw);
1982 }
1983
sljit_emit_fset32(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f32 value)1984 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
1985 sljit_s32 freg, sljit_f32 value)
1986 {
1987 sljit_u32 exp;
1988 union {
1989 sljit_u32 imm;
1990 sljit_f32 value;
1991 } u;
1992
1993 CHECK_ERROR();
1994 CHECK(check_sljit_emit_fset32(compiler, freg, value));
1995
1996 u.value = value;
1997
1998 if (u.imm == 0)
1999 return push_inst(compiler, (FMOV_R ^ (W_OP | (1 << 22))) | RN(TMP_ZERO) | VD(freg) | (1 << 16));
2000
2001 if ((u.imm << (32 - 19)) == 0) {
2002 exp = (u.imm >> (23 + 2)) & 0x3f;
2003
2004 if (exp == 0x20 || exp == 0x1f)
2005 return push_inst(compiler, (FMOV_I ^ (1 << 22)) | (sljit_ins)((((u.imm >> 24) & 0x80) | ((u.imm >> 19) & 0x7f)) << 13) | VD(freg));
2006 }
2007
2008 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_s32)u.imm));
2009 return push_inst(compiler, (FMOV_R ^ (W_OP | (1 << 22))) | RN(TMP_REG1) | VD(freg) | (1 << 16));
2010 }
2011
sljit_emit_fset64(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f64 value)2012 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
2013 sljit_s32 freg, sljit_f64 value)
2014 {
2015 sljit_uw exp;
2016 union {
2017 sljit_uw imm;
2018 sljit_f64 value;
2019 } u;
2020
2021 CHECK_ERROR();
2022 CHECK(check_sljit_emit_fset64(compiler, freg, value));
2023
2024 u.value = value;
2025
2026 if (u.imm == 0)
2027 return push_inst(compiler, FMOV_R | RN(TMP_ZERO) | VD(freg) | (sljit_ins)1 << 16);
2028
2029 if ((u.imm << (64 - 48)) == 0) {
2030 exp = (u.imm >> (52 + 2)) & 0x1ff;
2031
2032 if (exp == 0x100 || exp == 0xff)
2033 return push_inst(compiler, FMOV_I | (sljit_ins)((((u.imm >> 56) & 0x80) | ((u.imm >> 48) & 0x7f)) << 13) | VD(freg));
2034 }
2035
2036 FAIL_IF(load_immediate(compiler, TMP_REG1, (sljit_sw)u.imm));
2037 return push_inst(compiler, FMOV_R | RN(TMP_REG1) | VD(freg) | (1 << 16));
2038 }
2039
sljit_emit_fcopy(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 freg,sljit_s32 reg)2040 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
2041 sljit_s32 freg, sljit_s32 reg)
2042 {
2043 sljit_ins inst;
2044
2045 CHECK_ERROR();
2046 CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
2047
2048 if (GET_OPCODE(op) == SLJIT_COPY_TO_F64)
2049 inst = FMOV_R | RN(reg) | VD(freg) | (1 << 16);
2050 else
2051 inst = FMOV_R | VN(freg) | RD(reg);
2052
2053 if (op & SLJIT_32)
2054 inst ^= W_OP | (1 << 22);
2055
2056 return push_inst(compiler, inst);
2057 }
2058
2059 /* --------------------------------------------------------------------- */
2060 /* Conditional instructions */
2061 /* --------------------------------------------------------------------- */
2062
get_cc(struct sljit_compiler * compiler,sljit_s32 type)2063 static sljit_ins get_cc(struct sljit_compiler *compiler, sljit_s32 type)
2064 {
2065 switch (type) {
2066 case SLJIT_EQUAL:
2067 case SLJIT_ATOMIC_STORED:
2068 case SLJIT_F_EQUAL:
2069 case SLJIT_ORDERED_EQUAL:
2070 case SLJIT_UNORDERED_OR_EQUAL:
2071 return 0x1;
2072
2073 case SLJIT_NOT_EQUAL:
2074 case SLJIT_ATOMIC_NOT_STORED:
2075 case SLJIT_F_NOT_EQUAL:
2076 case SLJIT_UNORDERED_OR_NOT_EQUAL:
2077 case SLJIT_ORDERED_NOT_EQUAL:
2078 return 0x0;
2079
2080 case SLJIT_CARRY:
2081 if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD)
2082 return 0x3;
2083 /* fallthrough */
2084
2085 case SLJIT_LESS:
2086 return 0x2;
2087
2088 case SLJIT_NOT_CARRY:
2089 if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_ADD)
2090 return 0x2;
2091 /* fallthrough */
2092
2093 case SLJIT_GREATER_EQUAL:
2094 return 0x3;
2095
2096 case SLJIT_GREATER:
2097 case SLJIT_UNORDERED_OR_GREATER:
2098 return 0x9;
2099
2100 case SLJIT_LESS_EQUAL:
2101 case SLJIT_F_LESS_EQUAL:
2102 case SLJIT_ORDERED_LESS_EQUAL:
2103 return 0x8;
2104
2105 case SLJIT_SIG_LESS:
2106 case SLJIT_UNORDERED_OR_LESS:
2107 return 0xa;
2108
2109 case SLJIT_SIG_GREATER_EQUAL:
2110 case SLJIT_F_GREATER_EQUAL:
2111 case SLJIT_ORDERED_GREATER_EQUAL:
2112 return 0xb;
2113
2114 case SLJIT_SIG_GREATER:
2115 case SLJIT_F_GREATER:
2116 case SLJIT_ORDERED_GREATER:
2117 return 0xd;
2118
2119 case SLJIT_SIG_LESS_EQUAL:
2120 case SLJIT_UNORDERED_OR_LESS_EQUAL:
2121 return 0xc;
2122
2123 case SLJIT_OVERFLOW:
2124 if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)))
2125 return 0x0;
2126 /* fallthrough */
2127
2128 case SLJIT_UNORDERED:
2129 return 0x7;
2130
2131 case SLJIT_NOT_OVERFLOW:
2132 if (!(compiler->status_flags_state & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)))
2133 return 0x1;
2134 /* fallthrough */
2135
2136 case SLJIT_ORDERED:
2137 return 0x6;
2138
2139 case SLJIT_F_LESS:
2140 case SLJIT_ORDERED_LESS:
2141 return 0x5;
2142
2143 case SLJIT_UNORDERED_OR_GREATER_EQUAL:
2144 return 0x4;
2145
2146 default:
2147 SLJIT_UNREACHABLE();
2148 return 0xe;
2149 }
2150 }
2151
sljit_emit_label(struct sljit_compiler * compiler)2152 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2153 {
2154 struct sljit_label *label;
2155
2156 CHECK_ERROR_PTR();
2157 CHECK_PTR(check_sljit_emit_label(compiler));
2158
2159 if (compiler->last_label && compiler->last_label->size == compiler->size)
2160 return compiler->last_label;
2161
2162 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2163 PTR_FAIL_IF(!label);
2164 set_label(label, compiler);
2165 return label;
2166 }
2167
sljit_emit_jump(struct sljit_compiler * compiler,sljit_s32 type)2168 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
2169 {
2170 struct sljit_jump *jump;
2171
2172 CHECK_ERROR_PTR();
2173 CHECK_PTR(check_sljit_emit_jump(compiler, type));
2174
2175 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2176 PTR_FAIL_IF(!jump);
2177 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2178 type &= 0xff;
2179
2180 if (type < SLJIT_JUMP) {
2181 jump->flags |= IS_COND;
2182 PTR_FAIL_IF(push_inst(compiler, B_CC | (6 << 5) | get_cc(compiler, type)));
2183 }
2184 else if (type >= SLJIT_FAST_CALL)
2185 jump->flags |= IS_BL;
2186
2187 PTR_FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0));
2188 jump->addr = compiler->size;
2189 PTR_FAIL_IF(push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG1)));
2190
2191 return jump;
2192 }
2193
sljit_emit_call(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types)2194 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
2195 sljit_s32 arg_types)
2196 {
2197 SLJIT_UNUSED_ARG(arg_types);
2198 CHECK_ERROR_PTR();
2199 CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
2200
2201 if (type & SLJIT_CALL_RETURN) {
2202 PTR_FAIL_IF(emit_stack_frame_release(compiler, 0));
2203 type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
2204 }
2205
2206 SLJIT_SKIP_CHECKS(compiler);
2207 return sljit_emit_jump(compiler, type);
2208 }
2209
emit_cmp_to0(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 src,sljit_sw srcw)2210 static SLJIT_INLINE struct sljit_jump* emit_cmp_to0(struct sljit_compiler *compiler, sljit_s32 type,
2211 sljit_s32 src, sljit_sw srcw)
2212 {
2213 struct sljit_jump *jump;
2214 sljit_ins inv_bits = (type & SLJIT_32) ? W_OP : 0;
2215
2216 SLJIT_ASSERT((type & 0xff) == SLJIT_EQUAL || (type & 0xff) == SLJIT_NOT_EQUAL);
2217 ADJUST_LOCAL_OFFSET(src, srcw);
2218
2219 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2220 PTR_FAIL_IF(!jump);
2221 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2222 jump->flags |= IS_CBZ | IS_COND;
2223
2224 if (src & SLJIT_MEM) {
2225 PTR_FAIL_IF(emit_op_mem(compiler, inv_bits ? INT_SIZE : WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
2226 src = TMP_REG1;
2227 }
2228 else if (src == SLJIT_IMM) {
2229 PTR_FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
2230 src = TMP_REG1;
2231 }
2232
2233 SLJIT_ASSERT(FAST_IS_REG(src));
2234
2235 if ((type & 0xff) == SLJIT_EQUAL)
2236 inv_bits |= 1 << 24;
2237
2238 PTR_FAIL_IF(push_inst(compiler, (CBZ ^ inv_bits) | (6 << 5) | RT(src)));
2239 PTR_FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0));
2240 jump->addr = compiler->size;
2241 PTR_FAIL_IF(push_inst(compiler, BR | RN(TMP_REG1)));
2242 return jump;
2243 }
2244
sljit_emit_ijump(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 src,sljit_sw srcw)2245 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
2246 {
2247 struct sljit_jump *jump;
2248
2249 CHECK_ERROR();
2250 CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
2251
2252 if (src != SLJIT_IMM) {
2253 if (src & SLJIT_MEM) {
2254 ADJUST_LOCAL_OFFSET(src, srcw);
2255 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
2256 src = TMP_REG1;
2257 }
2258 return push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(src));
2259 }
2260
2261 /* These jumps are converted to jump/call instructions when possible. */
2262 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2263 FAIL_IF(!jump);
2264 set_jump(jump, compiler, JUMP_ADDR | ((type >= SLJIT_FAST_CALL) ? IS_BL : 0));
2265 jump->u.target = (sljit_uw)srcw;
2266
2267 FAIL_IF(emit_imm64_const(compiler, TMP_REG1, 0));
2268 jump->addr = compiler->size;
2269 return push_inst(compiler, ((type >= SLJIT_FAST_CALL) ? BLR : BR) | RN(TMP_REG1));
2270 }
2271
sljit_emit_icall(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)2272 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
2273 sljit_s32 arg_types,
2274 sljit_s32 src, sljit_sw srcw)
2275 {
2276 SLJIT_UNUSED_ARG(arg_types);
2277 CHECK_ERROR();
2278 CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
2279
2280 if (src & SLJIT_MEM) {
2281 ADJUST_LOCAL_OFFSET(src, srcw);
2282 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src, srcw, TMP_REG1));
2283 src = TMP_REG1;
2284 }
2285
2286 if (type & SLJIT_CALL_RETURN) {
2287 if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
2288 FAIL_IF(push_inst(compiler, MOV | RD(TMP_REG1) | RM(src)));
2289 src = TMP_REG1;
2290 }
2291
2292 FAIL_IF(emit_stack_frame_release(compiler, 0));
2293 type = SLJIT_JUMP;
2294 }
2295
2296 SLJIT_SKIP_CHECKS(compiler);
2297 return sljit_emit_ijump(compiler, type, src, srcw);
2298 }
2299
sljit_emit_op_flags(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 type)2300 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
2301 sljit_s32 dst, sljit_sw dstw,
2302 sljit_s32 type)
2303 {
2304 sljit_s32 dst_r, src_r, flags, mem_flags;
2305 sljit_ins cc;
2306
2307 CHECK_ERROR();
2308 CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
2309 ADJUST_LOCAL_OFFSET(dst, dstw);
2310
2311 cc = get_cc(compiler, type);
2312 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
2313
2314 if (GET_OPCODE(op) < SLJIT_ADD) {
2315 FAIL_IF(push_inst(compiler, CSINC | (cc << 12) | RD(dst_r) | RN(TMP_ZERO) | RM(TMP_ZERO)));
2316
2317 if (dst_r == TMP_REG1) {
2318 mem_flags = (GET_OPCODE(op) == SLJIT_MOV ? WORD_SIZE : INT_SIZE) | STORE;
2319 return emit_op_mem(compiler, mem_flags, TMP_REG1, dst, dstw, TMP_REG2);
2320 }
2321
2322 return SLJIT_SUCCESS;
2323 }
2324
2325 flags = HAS_FLAGS(op) ? SET_FLAGS : 0;
2326 mem_flags = WORD_SIZE;
2327
2328 if (op & SLJIT_32) {
2329 flags |= INT_OP;
2330 mem_flags = INT_SIZE;
2331 }
2332
2333 src_r = dst;
2334
2335 if (dst & SLJIT_MEM) {
2336 FAIL_IF(emit_op_mem(compiler, mem_flags, TMP_REG1, dst, dstw, TMP_REG1));
2337 src_r = TMP_REG1;
2338 }
2339
2340 FAIL_IF(push_inst(compiler, CSINC | (cc << 12) | RD(TMP_REG2) | RN(TMP_ZERO) | RM(TMP_ZERO)));
2341 emit_op_imm(compiler, flags | GET_OPCODE(op), dst_r, src_r, TMP_REG2);
2342
2343 if (dst & SLJIT_MEM)
2344 return emit_op_mem(compiler, mem_flags | STORE, TMP_REG1, dst, dstw, TMP_REG2);
2345 return SLJIT_SUCCESS;
2346 }
2347
sljit_emit_select(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_reg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2_reg)2348 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
2349 sljit_s32 dst_reg,
2350 sljit_s32 src1, sljit_sw src1w,
2351 sljit_s32 src2_reg)
2352 {
2353 sljit_ins inv_bits = (type & SLJIT_32) ? W_OP : 0;
2354 sljit_ins cc;
2355
2356 CHECK_ERROR();
2357 CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
2358
2359 ADJUST_LOCAL_OFFSET(src1, src1w);
2360
2361 if (src1 == SLJIT_IMM) {
2362 if (type & SLJIT_32)
2363 src1w = (sljit_s32)src1w;
2364 FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
2365 src1 = TMP_REG1;
2366 } else if (src1 & SLJIT_MEM) {
2367 FAIL_IF(emit_op_mem(compiler, WORD_SIZE, TMP_REG1, src1, src1w, TMP_REG2));
2368 src1 = TMP_REG1;
2369 }
2370
2371 cc = get_cc(compiler, type & ~SLJIT_32);
2372 return push_inst(compiler, (CSEL ^ inv_bits) | (cc << 12) | RD(dst_reg) | RN(src2_reg) | RM(src1));
2373 }
2374
sljit_emit_fselect(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_freg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2_freg)2375 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
2376 sljit_s32 dst_freg,
2377 sljit_s32 src1, sljit_sw src1w,
2378 sljit_s32 src2_freg)
2379 {
2380 sljit_ins inv_bits = (type & SLJIT_32) ? (1 << 22) : 0;
2381 sljit_ins cc;
2382
2383 CHECK_ERROR();
2384 CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
2385
2386 ADJUST_LOCAL_OFFSET(src1, src1w);
2387
2388 if (src1 & SLJIT_MEM) {
2389 FAIL_IF(emit_fop_mem(compiler, (type & SLJIT_32) ? INT_SIZE : WORD_SIZE, TMP_FREG1, src1, src1w));
2390 src1 = TMP_FREG1;
2391 }
2392
2393 cc = get_cc(compiler, type & ~SLJIT_32);
2394 return push_inst(compiler, (FCSEL ^ inv_bits) | (cc << 12) | VD(dst_freg) | VN(src2_freg) | VM(src1));
2395 }
2396
sljit_emit_mem(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)2397 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
2398 sljit_s32 reg,
2399 sljit_s32 mem, sljit_sw memw)
2400 {
2401 sljit_u32 inst;
2402
2403 CHECK_ERROR();
2404 CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
2405
2406 if (!(reg & REG_PAIR_MASK))
2407 return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
2408
2409 ADJUST_LOCAL_OFFSET(mem, memw);
2410
2411 if (!(mem & REG_MASK)) {
2412 FAIL_IF(load_immediate(compiler, TMP_REG1, memw & ~0x1f8));
2413
2414 mem = SLJIT_MEM1(TMP_REG1);
2415 memw &= 0x1f8;
2416 } else if (mem & OFFS_REG_MASK) {
2417 FAIL_IF(push_inst(compiler, ADD | RD(TMP_REG1) | RN(mem & REG_MASK) | RM(OFFS_REG(mem)) | ((sljit_ins)(memw & 0x3) << 10)));
2418
2419 mem = SLJIT_MEM1(TMP_REG1);
2420 memw = 0;
2421 } else if ((memw & 0x7) != 0 || memw > 0x1f8 || memw < -0x200) {
2422 inst = ADDI;
2423
2424 if (memw < 0) {
2425 /* Remains negative for integer min. */
2426 memw = -memw;
2427 inst = SUBI;
2428 } else if ((memw & 0x7) == 0 && memw <= 0x7ff0) {
2429 if (!(type & SLJIT_MEM_STORE) && (mem & REG_MASK) == REG_PAIR_FIRST(reg)) {
2430 FAIL_IF(push_inst(compiler, LDRI | RD(REG_PAIR_SECOND(reg)) | RN(mem & REG_MASK) | ((sljit_ins)memw << 7)));
2431 return push_inst(compiler, LDRI | RD(REG_PAIR_FIRST(reg)) | RN(mem & REG_MASK) | ((sljit_ins)(memw + 0x8) << 7));
2432 }
2433
2434 inst = (type & SLJIT_MEM_STORE) ? STRI : LDRI;
2435
2436 FAIL_IF(push_inst(compiler, inst | RD(REG_PAIR_FIRST(reg)) | RN(mem & REG_MASK) | ((sljit_ins)memw << 7)));
2437 return push_inst(compiler, inst | RD(REG_PAIR_SECOND(reg)) | RN(mem & REG_MASK) | ((sljit_ins)(memw + 0x8) << 7));
2438 }
2439
2440 if ((sljit_uw)memw <= 0xfff) {
2441 FAIL_IF(push_inst(compiler, inst | RD(TMP_REG1) | RN(mem & REG_MASK) | ((sljit_ins)memw << 10)));
2442 memw = 0;
2443 } else if ((sljit_uw)memw <= 0xffffff) {
2444 FAIL_IF(push_inst(compiler, inst | (1 << 22) | RD(TMP_REG1) | RN(mem & REG_MASK) | (((sljit_ins)memw >> 12) << 10)));
2445
2446 if ((memw & 0xe07) != 0) {
2447 FAIL_IF(push_inst(compiler, inst | RD(TMP_REG1) | RN(TMP_REG1) | (((sljit_ins)memw & 0xfff) << 10)));
2448 memw = 0;
2449 } else {
2450 memw &= 0xfff;
2451 }
2452 } else {
2453 FAIL_IF(load_immediate(compiler, TMP_REG1, memw));
2454 FAIL_IF(push_inst(compiler, (inst == ADDI ? ADD : SUB) | RD(TMP_REG1) | RN(mem & REG_MASK) | RM(TMP_REG1)));
2455 memw = 0;
2456 }
2457
2458 mem = SLJIT_MEM1(TMP_REG1);
2459
2460 if (inst == SUBI)
2461 memw = -memw;
2462 }
2463
2464 SLJIT_ASSERT((memw & 0x7) == 0 && memw <= 0x1f8 && memw >= -0x200);
2465 return push_inst(compiler, ((type & SLJIT_MEM_STORE) ? STP : LDP) | RT(REG_PAIR_FIRST(reg)) | RT2(REG_PAIR_SECOND(reg)) | RN(mem & REG_MASK) | (sljit_ins)((memw & 0x3f8) << 12));
2466 }
2467
sljit_emit_mem_update(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)2468 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem_update(struct sljit_compiler *compiler, sljit_s32 type,
2469 sljit_s32 reg,
2470 sljit_s32 mem, sljit_sw memw)
2471 {
2472 sljit_u32 sign = 0, inst;
2473
2474 CHECK_ERROR();
2475 CHECK(check_sljit_emit_mem_update(compiler, type, reg, mem, memw));
2476
2477 if ((mem & OFFS_REG_MASK) || (memw > 255 || memw < -256))
2478 return SLJIT_ERR_UNSUPPORTED;
2479
2480 if (type & SLJIT_MEM_SUPP)
2481 return SLJIT_SUCCESS;
2482
2483 switch (type & 0xff) {
2484 case SLJIT_MOV:
2485 case SLJIT_MOV_P:
2486 inst = STURBI | (MEM_SIZE_SHIFT(WORD_SIZE) << 30) | 0x400;
2487 break;
2488 case SLJIT_MOV_S8:
2489 sign = 1;
2490 /* fallthrough */
2491 case SLJIT_MOV_U8:
2492 inst = STURBI | (MEM_SIZE_SHIFT(BYTE_SIZE) << 30) | 0x400;
2493 break;
2494 case SLJIT_MOV_S16:
2495 sign = 1;
2496 /* fallthrough */
2497 case SLJIT_MOV_U16:
2498 inst = STURBI | (MEM_SIZE_SHIFT(HALF_SIZE) << 30) | 0x400;
2499 break;
2500 case SLJIT_MOV_S32:
2501 sign = 1;
2502 /* fallthrough */
2503 case SLJIT_MOV_U32:
2504 case SLJIT_MOV32:
2505 inst = STURBI | (MEM_SIZE_SHIFT(INT_SIZE) << 30) | 0x400;
2506 break;
2507 default:
2508 SLJIT_UNREACHABLE();
2509 inst = STURBI | (MEM_SIZE_SHIFT(WORD_SIZE) << 30) | 0x400;
2510 break;
2511 }
2512
2513 if (!(type & SLJIT_MEM_STORE))
2514 inst |= sign ? 0x00800000 : 0x00400000;
2515
2516 if (!(type & SLJIT_MEM_POST))
2517 inst |= 0x800;
2518
2519 return push_inst(compiler, inst | RT(reg) | RN(mem & REG_MASK) | (sljit_ins)((memw & 0x1ff) << 12));
2520 }
2521
sljit_emit_fmem_update(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 mem,sljit_sw memw)2522 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fmem_update(struct sljit_compiler *compiler, sljit_s32 type,
2523 sljit_s32 freg,
2524 sljit_s32 mem, sljit_sw memw)
2525 {
2526 sljit_u32 inst;
2527
2528 CHECK_ERROR();
2529 CHECK(check_sljit_emit_fmem_update(compiler, type, freg, mem, memw));
2530
2531 if ((mem & OFFS_REG_MASK) || (memw > 255 || memw < -256))
2532 return SLJIT_ERR_UNSUPPORTED;
2533
2534 if (type & SLJIT_MEM_SUPP)
2535 return SLJIT_SUCCESS;
2536
2537 inst = STUR_FI | 0x80000400;
2538
2539 if (!(type & SLJIT_32))
2540 inst |= 0x40000000;
2541
2542 if (!(type & SLJIT_MEM_STORE))
2543 inst |= 0x00400000;
2544
2545 if (!(type & SLJIT_MEM_POST))
2546 inst |= 0x800;
2547
2548 return push_inst(compiler, inst | VT(freg) | RN(mem & REG_MASK) | (sljit_ins)((memw & 0x1ff) << 12));
2549 }
2550
sljit_emit_simd_mem_offset(struct sljit_compiler * compiler,sljit_s32 * mem_ptr,sljit_sw memw)2551 static sljit_s32 sljit_emit_simd_mem_offset(struct sljit_compiler *compiler, sljit_s32 *mem_ptr, sljit_sw memw)
2552 {
2553 sljit_ins ins;
2554 sljit_s32 mem = *mem_ptr;
2555
2556 if (SLJIT_UNLIKELY(mem & OFFS_REG_MASK)) {
2557 *mem_ptr = TMP_REG1;
2558 return push_inst(compiler, ADD | RD(TMP_REG1) | RN(mem & REG_MASK) | RM(OFFS_REG(mem)) | ((sljit_ins)(memw & 0x3) << 10));
2559 }
2560
2561 if (!(mem & REG_MASK)) {
2562 *mem_ptr = TMP_REG1;
2563 return load_immediate(compiler, TMP_REG1, memw);
2564 }
2565
2566 mem &= REG_MASK;
2567
2568 if (memw == 0) {
2569 *mem_ptr = mem;
2570 return SLJIT_SUCCESS;
2571 }
2572
2573 *mem_ptr = TMP_REG1;
2574
2575 if (memw < -0xffffff || memw > 0xffffff) {
2576 FAIL_IF(load_immediate(compiler, TMP_REG1, memw));
2577 return push_inst(compiler, ADD | RD(TMP_REG1) | RN(TMP_REG1) | RM(mem));
2578 }
2579
2580 ins = ADDI;
2581
2582 if (memw < 0) {
2583 memw = -memw;
2584 ins = SUBI;
2585 }
2586
2587 if (memw > 0xfff) {
2588 FAIL_IF(push_inst(compiler, ins | (1 << 22) | RD(TMP_REG1) | RN(mem) | ((sljit_ins)(memw >> 12) << 10)));
2589
2590 memw &= 0xfff;
2591 if (memw == 0)
2592 return SLJIT_SUCCESS;
2593
2594 mem = TMP_REG1;
2595 }
2596
2597 return push_inst(compiler, ins | RD(TMP_REG1) | RN(mem) | ((sljit_ins)memw << 10));
2598 }
2599
sljit_emit_simd_mov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 srcdst,sljit_sw srcdstw)2600 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
2601 sljit_s32 freg,
2602 sljit_s32 srcdst, sljit_sw srcdstw)
2603 {
2604 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
2605 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
2606 sljit_ins ins;
2607
2608 CHECK_ERROR();
2609 CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw));
2610
2611 ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
2612
2613 if (reg_size != 3 && reg_size != 4)
2614 return SLJIT_ERR_UNSUPPORTED;
2615
2616 if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
2617 return SLJIT_ERR_UNSUPPORTED;
2618
2619 if (type & SLJIT_SIMD_TEST)
2620 return SLJIT_SUCCESS;
2621
2622 if (!(srcdst & SLJIT_MEM)) {
2623 if (type & SLJIT_SIMD_STORE)
2624 ins = VD(srcdst) | VN(freg) | VM(freg);
2625 else
2626 ins = VD(freg) | VN(srcdst) | VM(srcdst);
2627
2628 if (reg_size == 4)
2629 ins |= (1 << 30);
2630
2631 return push_inst(compiler, ORR_v | ins);
2632 }
2633
2634 FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
2635
2636 if (elem_size > 3)
2637 elem_size = 3;
2638
2639 ins = (type & SLJIT_SIMD_STORE) ? ST1 : LD1;
2640
2641 if (reg_size == 4)
2642 ins |= (1 << 30);
2643
2644 return push_inst(compiler, ins | ((sljit_ins)elem_size << 10) | RN(srcdst) | VT(freg));
2645 }
2646
simd_get_imm(sljit_s32 elem_size,sljit_uw value)2647 static sljit_ins simd_get_imm(sljit_s32 elem_size, sljit_uw value)
2648 {
2649 sljit_ins result;
2650
2651 if (elem_size > 2 && (sljit_u32)value == (value >> 32)) {
2652 elem_size = 2;
2653 value = (sljit_u32)value;
2654 }
2655
2656 if (elem_size == 2 && (sljit_u16)value == (value >> 16)) {
2657 elem_size = 1;
2658 value = (sljit_u16)value;
2659 }
2660
2661 if (elem_size == 1 && (sljit_u8)value == (value >> 8)) {
2662 elem_size = 0;
2663 value = (sljit_u8)value;
2664 }
2665
2666 switch (elem_size) {
2667 case 0:
2668 SLJIT_ASSERT(value <= 0xff);
2669 result = 0xe000;
2670 break;
2671 case 1:
2672 SLJIT_ASSERT(value <= 0xffff);
2673 result = 0;
2674
2675 while (1) {
2676 if (value <= 0xff) {
2677 result |= 0x8000;
2678 break;
2679 }
2680
2681 if ((value & 0xff) == 0) {
2682 value >>= 8;
2683 result |= 0xa000;
2684 break;
2685 }
2686
2687 if (result != 0)
2688 return ~(sljit_ins)0;
2689
2690 value ^= (sljit_uw)0xffff;
2691 result = (1 << 29);
2692 }
2693 break;
2694 case 2:
2695 SLJIT_ASSERT(value <= 0xffffffff);
2696 result = 0;
2697
2698 while (1) {
2699 if (value <= 0xff) {
2700 result |= 0x0000;
2701 break;
2702 }
2703
2704 if ((value & ~(sljit_uw)0xff00) == 0) {
2705 value >>= 8;
2706 result |= 0x2000;
2707 break;
2708 }
2709
2710 if ((value & ~(sljit_uw)0xff0000) == 0) {
2711 value >>= 16;
2712 result |= 0x4000;
2713 break;
2714 }
2715
2716 if ((value & ~(sljit_uw)0xff000000) == 0) {
2717 value >>= 24;
2718 result |= 0x6000;
2719 break;
2720 }
2721
2722 if ((value & (sljit_uw)0xff) == 0xff && (value >> 16) == 0) {
2723 value >>= 8;
2724 result |= 0xc000;
2725 break;
2726 }
2727
2728 if ((value & (sljit_uw)0xffff) == 0xffff && (value >> 24) == 0) {
2729 value >>= 16;
2730 result |= 0xd000;
2731 break;
2732 }
2733
2734 if (result != 0)
2735 return ~(sljit_ins)0;
2736
2737 value ^= (sljit_uw)0xffffffff;
2738 result = (1 << 29);
2739 }
2740 break;
2741 default:
2742 return ~(sljit_ins)0;
2743 }
2744
2745 return (((sljit_ins)value & 0x1f) << 5) | (((sljit_ins)value & 0xe0) << 11) | result;
2746 }
2747
sljit_emit_simd_replicate(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_sw srcw)2748 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
2749 sljit_s32 freg,
2750 sljit_s32 src, sljit_sw srcw)
2751 {
2752 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
2753 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
2754 sljit_ins ins, imm;
2755
2756 CHECK_ERROR();
2757 CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw));
2758
2759 ADJUST_LOCAL_OFFSET(src, srcw);
2760
2761 if (reg_size != 3 && reg_size != 4)
2762 return SLJIT_ERR_UNSUPPORTED;
2763
2764 if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
2765 return SLJIT_ERR_UNSUPPORTED;
2766
2767 if (type & SLJIT_SIMD_TEST)
2768 return SLJIT_SUCCESS;
2769
2770 if (src & SLJIT_MEM) {
2771 FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
2772
2773 ins = (sljit_ins)elem_size << 10;
2774
2775 if (reg_size == 4)
2776 ins |= (sljit_ins)1 << 30;
2777
2778 return push_inst(compiler, LD1R | ins | RN(src) | VT(freg));
2779 }
2780
2781 ins = (sljit_ins)1 << (16 + elem_size);
2782
2783 if (reg_size == 4)
2784 ins |= (sljit_ins)1 << 30;
2785
2786 if (type & SLJIT_SIMD_FLOAT) {
2787 if (src == SLJIT_IMM)
2788 return push_inst(compiler, MOVI | (ins & ((sljit_ins)1 << 30)) | VD(freg));
2789
2790 return push_inst(compiler, DUP_e | ins | VD(freg) | VN(src));
2791 }
2792
2793 if (src == SLJIT_IMM) {
2794 if (elem_size < 3)
2795 srcw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1;
2796
2797 imm = simd_get_imm(elem_size, (sljit_uw)srcw);
2798
2799 if (imm != ~(sljit_ins)0) {
2800 imm |= ins & ((sljit_ins)1 << 30);
2801
2802 return push_inst(compiler, MOVI | imm | VD(freg));
2803 }
2804
2805 FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
2806 src = TMP_REG1;
2807 }
2808
2809 return push_inst(compiler, DUP_g | ins | VD(freg) | RN(src));
2810 }
2811
sljit_emit_simd_lane_mov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 lane_index,sljit_s32 srcdst,sljit_sw srcdstw)2812 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
2813 sljit_s32 freg, sljit_s32 lane_index,
2814 sljit_s32 srcdst, sljit_sw srcdstw)
2815 {
2816 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
2817 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
2818 sljit_ins ins;
2819
2820 CHECK_ERROR();
2821 CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw));
2822
2823 ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
2824
2825 if (reg_size != 3 && reg_size != 4)
2826 return SLJIT_ERR_UNSUPPORTED;
2827
2828 if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
2829 return SLJIT_ERR_UNSUPPORTED;
2830
2831 if (type & SLJIT_SIMD_TEST)
2832 return SLJIT_SUCCESS;
2833
2834 if (type & SLJIT_SIMD_LANE_ZERO) {
2835 ins = (reg_size == 3) ? 0 : ((sljit_ins)1 << 30);
2836
2837 if ((type & SLJIT_SIMD_FLOAT) && freg == srcdst) {
2838 FAIL_IF(push_inst(compiler, ORR_v | ins | VD(TMP_FREG1) | VN(freg) | VM(freg)));
2839 srcdst = TMP_FREG1;
2840 srcdstw = 0;
2841 }
2842
2843 FAIL_IF(push_inst(compiler, MOVI | ins | VD(freg)));
2844 }
2845
2846 if (srcdst & SLJIT_MEM) {
2847 FAIL_IF(sljit_emit_simd_mem_offset(compiler, &srcdst, srcdstw));
2848
2849 if (elem_size == 3)
2850 ins = 0x8400;
2851 else if (elem_size == 0)
2852 ins = 0;
2853 else
2854 ins = (sljit_ins)0x2000 << elem_size;
2855
2856 lane_index = lane_index << elem_size;
2857 ins |= (sljit_ins)(((lane_index & 0x8) << 27) | ((lane_index & 0x7) << 10));
2858
2859 return push_inst(compiler, ((type & SLJIT_SIMD_STORE) ? ST1_s : LD1_s) | ins | RN(srcdst) | VT(freg));
2860 }
2861
2862 if (type & SLJIT_SIMD_FLOAT) {
2863 if (type & SLJIT_SIMD_STORE)
2864 ins = INS_e | ((sljit_ins)1 << (16 + elem_size)) | ((sljit_ins)lane_index << (11 + elem_size)) | VD(srcdst) | VN(freg);
2865 else
2866 ins = INS_e | ((((sljit_ins)lane_index << 1) | 1) << (16 + elem_size)) | VD(freg) | VN(srcdst);
2867
2868 return push_inst(compiler, ins);
2869 }
2870
2871 if (srcdst == SLJIT_IMM) {
2872 if (elem_size < 3)
2873 srcdstw &= ((sljit_sw)1 << (((sljit_sw)1 << elem_size) << 3)) - 1;
2874
2875 FAIL_IF(load_immediate(compiler, TMP_REG1, srcdstw));
2876 srcdst = TMP_REG1;
2877 }
2878
2879 if (type & SLJIT_SIMD_STORE) {
2880 ins = RD(srcdst) | VN(freg);
2881
2882 if ((type & SLJIT_SIMD_LANE_SIGNED) && (elem_size < 2 || (elem_size == 2 && !(type & SLJIT_32)))) {
2883 ins |= SMOV;
2884
2885 if (!(type & SLJIT_32))
2886 ins |= (sljit_ins)1 << 30;
2887 } else
2888 ins |= UMOV;
2889 } else
2890 ins = INS | VD(freg) | RN(srcdst);
2891
2892 if (elem_size == 3)
2893 ins |= (sljit_ins)1 << 30;
2894
2895 return push_inst(compiler, ins | ((((sljit_ins)lane_index << 1) | 1) << (16 + elem_size)));
2896 }
2897
sljit_emit_simd_lane_replicate(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_s32 src_lane_index)2898 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
2899 sljit_s32 freg,
2900 sljit_s32 src, sljit_s32 src_lane_index)
2901 {
2902 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
2903 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
2904 sljit_ins ins;
2905
2906 CHECK_ERROR();
2907 CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index));
2908
2909 if (reg_size != 3 && reg_size != 4)
2910 return SLJIT_ERR_UNSUPPORTED;
2911
2912 if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
2913 return SLJIT_ERR_UNSUPPORTED;
2914
2915 if (type & SLJIT_SIMD_TEST)
2916 return SLJIT_SUCCESS;
2917
2918 ins = (((sljit_ins)src_lane_index << 1) | 1) << (16 + elem_size);
2919
2920 if (reg_size == 4)
2921 ins |= (sljit_ins)1 << 30;
2922
2923 return push_inst(compiler, DUP_e | ins | VD(freg) | VN(src));
2924 }
2925
sljit_emit_simd_extend(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_sw srcw)2926 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
2927 sljit_s32 freg,
2928 sljit_s32 src, sljit_sw srcw)
2929 {
2930 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
2931 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
2932 sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
2933
2934 CHECK_ERROR();
2935 CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw));
2936
2937 ADJUST_LOCAL_OFFSET(src, srcw);
2938
2939 if (reg_size != 3 && reg_size != 4)
2940 return SLJIT_ERR_UNSUPPORTED;
2941
2942 if ((type & SLJIT_SIMD_FLOAT) && (elem_size != 2 || elem2_size != 3))
2943 return SLJIT_ERR_UNSUPPORTED;
2944
2945 if (type & SLJIT_SIMD_TEST)
2946 return SLJIT_SUCCESS;
2947
2948 if (src & SLJIT_MEM) {
2949 FAIL_IF(sljit_emit_simd_mem_offset(compiler, &src, srcw));
2950
2951 if (reg_size == 4 && elem2_size - elem_size == 1)
2952 FAIL_IF(push_inst(compiler, LD1 | ((sljit_ins)elem_size << 10) | RN(src) | VT(freg)));
2953 else
2954 FAIL_IF(push_inst(compiler, LD1_s | ((sljit_ins)0x2000 << (reg_size - elem2_size + elem_size)) | RN(src) | VT(freg)));
2955 src = freg;
2956 }
2957
2958 if (type & SLJIT_SIMD_FLOAT) {
2959 SLJIT_ASSERT(reg_size == 4);
2960 return push_inst(compiler, FCVTL | (1 << 22) | VD(freg) | VN(src));
2961 }
2962
2963 do {
2964 FAIL_IF(push_inst(compiler, ((type & SLJIT_SIMD_EXTEND_SIGNED) ? SSHLL : USHLL)
2965 | ((sljit_ins)1 << (19 + elem_size)) | VD(freg) | VN(src)));
2966 src = freg;
2967 } while (++elem_size < elem2_size);
2968
2969 return SLJIT_SUCCESS;
2970 }
2971
sljit_emit_simd_sign(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 dst,sljit_sw dstw)2972 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
2973 sljit_s32 freg,
2974 sljit_s32 dst, sljit_sw dstw)
2975 {
2976 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
2977 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
2978 sljit_ins ins, imms;
2979 sljit_s32 dst_r;
2980
2981 CHECK_ERROR();
2982 CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw));
2983
2984 ADJUST_LOCAL_OFFSET(dst, dstw);
2985
2986 if (reg_size != 3 && reg_size != 4)
2987 return SLJIT_ERR_UNSUPPORTED;
2988
2989 if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
2990 return SLJIT_ERR_UNSUPPORTED;
2991
2992 if (type & SLJIT_SIMD_TEST)
2993 return SLJIT_SUCCESS;
2994
2995 switch (elem_size) {
2996 case 0:
2997 imms = 0x643219;
2998 ins = USHR | (0x9 << 16);
2999 break;
3000 case 1:
3001 imms = (reg_size == 4) ? 0x643219 : 0x6231;
3002 ins = USHR | (0x11 << 16);
3003 break;
3004 case 2:
3005 imms = (reg_size == 4) ? 0x6231 : 0x61;
3006 ins = USHR | (0x21 << 16);
3007 break;
3008 default:
3009 imms = 0x61;
3010 ins = USHR | (0x41 << 16);
3011 break;
3012 }
3013
3014 if (reg_size == 4)
3015 ins |= (1 << 30);
3016
3017 FAIL_IF(push_inst(compiler, ins | VD(TMP_FREG1) | VN(freg)));
3018
3019 if (reg_size == 4 && elem_size > 0)
3020 FAIL_IF(push_inst(compiler, XTN | ((sljit_ins)(elem_size - 1) << 22) | VD(TMP_FREG1) | VN(TMP_FREG1)));
3021
3022 if (imms >= 0x100) {
3023 ins = (reg_size == 4 && elem_size == 0) ? (1 << 30) : 0;
3024
3025 do {
3026 FAIL_IF(push_inst(compiler, USRA | ins | ((imms & 0xff) << 16) | VD(TMP_FREG1) | VN(TMP_FREG1)));
3027 imms >>= 8;
3028 } while (imms >= 0x100);
3029 }
3030
3031 FAIL_IF(push_inst(compiler, USRA | (1 << 30) | (imms << 16) | VD(TMP_FREG1) | VN(TMP_FREG1)));
3032
3033 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
3034 ins = (0x1 << 16);
3035
3036 if (reg_size == 4 && elem_size == 0) {
3037 FAIL_IF(push_inst(compiler, INS_e | (0x3 << 16) | (0x8 << 11) | VD(TMP_FREG1) | VN(TMP_FREG1)));
3038 ins = (0x2 << 16);
3039 }
3040
3041 FAIL_IF(push_inst(compiler, UMOV | ins | RD(dst_r) | VN(TMP_FREG1)));
3042
3043 if (dst_r == TMP_REG1)
3044 return emit_op_mem(compiler, STORE | ((type & SLJIT_32) ? INT_SIZE : WORD_SIZE), TMP_REG1, dst, dstw, TMP_REG2);
3045
3046 return SLJIT_SUCCESS;
3047 }
3048
sljit_emit_simd_op2(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_freg,sljit_s32 src1_freg,sljit_s32 src2_freg)3049 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
3050 sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg)
3051 {
3052 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3053 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3054 sljit_ins ins = 0;
3055
3056 CHECK_ERROR();
3057 CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg));
3058
3059 if (reg_size != 3 && reg_size != 4)
3060 return SLJIT_ERR_UNSUPPORTED;
3061
3062 if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3063 return SLJIT_ERR_UNSUPPORTED;
3064
3065 switch (SLJIT_SIMD_GET_OPCODE(type)) {
3066 case SLJIT_SIMD_OP2_AND:
3067 ins = AND_v;
3068 break;
3069 case SLJIT_SIMD_OP2_OR:
3070 ins = ORR_v;
3071 break;
3072 case SLJIT_SIMD_OP2_XOR:
3073 ins = EOR_v;
3074 break;
3075 }
3076
3077 if (type & SLJIT_SIMD_TEST)
3078 return SLJIT_SUCCESS;
3079
3080 if (reg_size == 4)
3081 ins |= (sljit_ins)1 << 30;
3082
3083 return push_inst(compiler, ins | VD(dst_freg) | VN(src1_freg) | VM(src2_freg));
3084 }
3085
sljit_emit_atomic_load(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 mem_reg)3086 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
3087 sljit_s32 dst_reg,
3088 sljit_s32 mem_reg)
3089 {
3090 sljit_ins ins;
3091
3092 CHECK_ERROR();
3093 CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
3094
3095 #ifdef __ARM_FEATURE_ATOMICS
3096 switch (GET_OPCODE(op)) {
3097 case SLJIT_MOV32:
3098 case SLJIT_MOV_U32:
3099 ins = LDR ^ (1 << 30);
3100 break;
3101 case SLJIT_MOV_U16:
3102 ins = LDRH;
3103 break;
3104 case SLJIT_MOV_U8:
3105 ins = LDRB;
3106 break;
3107 default:
3108 ins = LDR;
3109 break;
3110 }
3111 #else /* !__ARM_FEATURE_ATOMICS */
3112 switch (GET_OPCODE(op)) {
3113 case SLJIT_MOV32:
3114 case SLJIT_MOV_U32:
3115 ins = LDXR ^ (1 << 30);
3116 break;
3117 case SLJIT_MOV_U8:
3118 ins = LDXRB;
3119 break;
3120 case SLJIT_MOV_U16:
3121 ins = LDXRH;
3122 break;
3123 default:
3124 ins = LDXR;
3125 break;
3126 }
3127 #endif /* ARM_FEATURE_ATOMICS */
3128 return push_inst(compiler, ins | RN(mem_reg) | RT(dst_reg));
3129 }
3130
sljit_emit_atomic_store(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src_reg,sljit_s32 mem_reg,sljit_s32 temp_reg)3131 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op,
3132 sljit_s32 src_reg,
3133 sljit_s32 mem_reg,
3134 sljit_s32 temp_reg)
3135 {
3136 sljit_ins ins;
3137 sljit_s32 tmp = temp_reg;
3138 sljit_ins cmp = 0;
3139 sljit_ins inv_bits = W_OP;
3140
3141 CHECK_ERROR();
3142 CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
3143
3144 #ifdef __ARM_FEATURE_ATOMICS
3145 if (op & SLJIT_SET_ATOMIC_STORED)
3146 cmp = (SUBS ^ W_OP) | RD(TMP_ZERO);
3147
3148 switch (GET_OPCODE(op)) {
3149 case SLJIT_MOV32:
3150 case SLJIT_MOV_U32:
3151 ins = CAS ^ (1 << 30);
3152 break;
3153 case SLJIT_MOV_U16:
3154 ins = CASH;
3155 break;
3156 case SLJIT_MOV_U8:
3157 ins = CASB;
3158 break;
3159 default:
3160 ins = CAS;
3161 inv_bits = 0;
3162 if (cmp)
3163 cmp ^= W_OP;
3164 break;
3165 }
3166
3167 if (cmp) {
3168 FAIL_IF(push_inst(compiler, (MOV ^ inv_bits) | RM(temp_reg) | RD(TMP_REG1)));
3169 tmp = TMP_REG1;
3170 }
3171 FAIL_IF(push_inst(compiler, ins | RM(tmp) | RN(mem_reg) | RD(src_reg)));
3172 if (!cmp)
3173 return SLJIT_SUCCESS;
3174
3175 FAIL_IF(push_inst(compiler, cmp | RM(tmp) | RN(temp_reg)));
3176 FAIL_IF(push_inst(compiler, (CSET ^ inv_bits) | RD(tmp)));
3177 return push_inst(compiler, cmp | RM(tmp) | RN(TMP_ZERO));
3178 #else /* !__ARM_FEATURE_ATOMICS */
3179 SLJIT_UNUSED_ARG(tmp);
3180 SLJIT_UNUSED_ARG(inv_bits);
3181
3182 if (op & SLJIT_SET_ATOMIC_STORED)
3183 cmp = (SUBI ^ W_OP) | (1 << 29);
3184
3185 switch (GET_OPCODE(op)) {
3186 case SLJIT_MOV32:
3187 case SLJIT_MOV_U32:
3188 ins = STXR ^ (1 << 30);
3189 break;
3190 case SLJIT_MOV_U8:
3191 ins = STXRB;
3192 break;
3193 case SLJIT_MOV_U16:
3194 ins = STXRH;
3195 break;
3196 default:
3197 ins = STXR;
3198 break;
3199 }
3200
3201 FAIL_IF(push_inst(compiler, ins | RM(TMP_REG1) | RN(mem_reg) | RT(src_reg)));
3202 return cmp ? push_inst(compiler, cmp | RD(TMP_ZERO) | RN(TMP_REG1)) : SLJIT_SUCCESS;
3203 #endif /* __ARM_FEATURE_ATOMICS */
3204 }
3205
sljit_get_local_base(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_sw offset)3206 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_local_base(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw offset)
3207 {
3208 sljit_s32 dst_reg;
3209 sljit_ins ins;
3210
3211 CHECK_ERROR();
3212 CHECK(check_sljit_get_local_base(compiler, dst, dstw, offset));
3213 ADJUST_LOCAL_OFFSET(SLJIT_MEM1(SLJIT_SP), offset);
3214
3215 dst_reg = FAST_IS_REG(dst) ? dst : TMP_REG1;
3216
3217 /* Not all instruction forms support accessing SP register. */
3218 if (offset <= 0xffffff && offset >= -0xffffff) {
3219 ins = ADDI;
3220 if (offset < 0) {
3221 offset = -offset;
3222 ins = SUBI;
3223 }
3224
3225 if (offset <= 0xfff)
3226 FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(SLJIT_SP) | (sljit_ins)(offset << 10)));
3227 else {
3228 FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(SLJIT_SP) | (sljit_ins)((offset & 0xfff000) >> (12 - 10)) | (1 << 22)));
3229
3230 offset &= 0xfff;
3231 if (offset != 0)
3232 FAIL_IF(push_inst(compiler, ins | RD(dst_reg) | RN(dst_reg) | (sljit_ins)(offset << 10)));
3233 }
3234 }
3235 else {
3236 FAIL_IF(load_immediate (compiler, dst_reg, offset));
3237 /* Add extended register form. */
3238 FAIL_IF(push_inst(compiler, ADDE | (0x3 << 13) | RD(dst_reg) | RN(SLJIT_SP) | RM(dst_reg)));
3239 }
3240
3241 if (SLJIT_UNLIKELY(dst & SLJIT_MEM))
3242 return emit_op_mem(compiler, WORD_SIZE | STORE, dst_reg, dst, dstw, TMP_REG1);
3243 return SLJIT_SUCCESS;
3244 }
3245
sljit_emit_const(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_sw init_value)3246 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
3247 {
3248 struct sljit_const *const_;
3249 sljit_s32 dst_r;
3250
3251 CHECK_ERROR_PTR();
3252 CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
3253 ADJUST_LOCAL_OFFSET(dst, dstw);
3254
3255 const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
3256 PTR_FAIL_IF(!const_);
3257 set_const(const_, compiler);
3258
3259 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
3260 PTR_FAIL_IF(emit_imm64_const(compiler, dst_r, (sljit_uw)init_value));
3261
3262 if (dst & SLJIT_MEM)
3263 PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2));
3264 return const_;
3265 }
3266
sljit_emit_put_label(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)3267 SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label* sljit_emit_put_label(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
3268 {
3269 struct sljit_put_label *put_label;
3270 sljit_s32 dst_r;
3271
3272 CHECK_ERROR_PTR();
3273 CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
3274 ADJUST_LOCAL_OFFSET(dst, dstw);
3275
3276 dst_r = FAST_IS_REG(dst) ? dst : TMP_REG1;
3277 PTR_FAIL_IF(emit_imm64_const(compiler, dst_r, 0));
3278
3279 put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
3280 PTR_FAIL_IF(!put_label);
3281 set_put_label(put_label, compiler, 1);
3282
3283 if (dst & SLJIT_MEM)
3284 PTR_FAIL_IF(emit_op_mem(compiler, WORD_SIZE | STORE, dst_r, dst, dstw, TMP_REG2));
3285
3286 return put_label;
3287 }
3288
sljit_set_jump_addr(sljit_uw addr,sljit_uw new_target,sljit_sw executable_offset)3289 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
3290 {
3291 sljit_ins* inst = (sljit_ins*)addr;
3292 sljit_u32 dst;
3293 SLJIT_UNUSED_ARG(executable_offset);
3294
3295 SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 0);
3296
3297 dst = inst[0] & 0x1f;
3298 SLJIT_ASSERT((inst[0] & 0xffe00000) == MOVZ && (inst[1] & 0xffe00000) == (MOVK | (1 << 21)));
3299 inst[0] = MOVZ | dst | (((sljit_u32)new_target & 0xffff) << 5);
3300 inst[1] = MOVK | dst | (((sljit_u32)(new_target >> 16) & 0xffff) << 5) | (1 << 21);
3301 inst[2] = MOVK | dst | (((sljit_u32)(new_target >> 32) & 0xffff) << 5) | (2 << 21);
3302 inst[3] = MOVK | dst | ((sljit_u32)(new_target >> 48) << 5) | (3 << 21);
3303
3304 SLJIT_UPDATE_WX_FLAGS(inst, inst + 4, 1);
3305 inst = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(inst, executable_offset);
3306 SLJIT_CACHE_FLUSH(inst, inst + 4);
3307 }
3308
sljit_set_const(sljit_uw addr,sljit_sw new_constant,sljit_sw executable_offset)3309 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
3310 {
3311 sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
3312 }
3313