1 /*
2 * Stack-less Just-In-Time compiler
3 *
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #include <sys/auxv.h>
28
29 #ifdef __ARCH__
30 #define ENABLE_STATIC_FACILITY_DETECTION 1
31 #else
32 #define ENABLE_STATIC_FACILITY_DETECTION 0
33 #endif
34 #define ENABLE_DYNAMIC_FACILITY_DETECTION 1
35
sljit_get_platform_name(void)36 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
37 {
38 return "s390x" SLJIT_CPUINFO;
39 }
40
41 /* Instructions. */
42 typedef sljit_uw sljit_ins;
43
44 /* Instruction tags (most significant halfword). */
45 static const sljit_ins sljit_ins_const = (sljit_ins)1 << 48;
46
47 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
48 #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
49
50 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
51 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 0, 1, 14
52 };
53
54 /* there are also a[2-15] available, but they are slower to access and
55 * their use is limited as mundaym explained:
56 * https://github.com/zherczeg/sljit/pull/91#discussion_r486895689
57 */
58
59 /* General Purpose Registers [0-15]. */
60 typedef sljit_uw sljit_gpr;
61
62 /*
63 * WARNING
64 * the following code is non standard and should be improved for
65 * consistency, but doesn't use SLJIT_NUMBER_OF_REGISTERS based
66 * registers because r0 and r1 are the ABI recommended volatiles.
67 * there is a gpr() function that maps sljit to physical register numbers
68 * that should be used instead of the usual index into reg_map[] and
69 * will be retired ASAP (TODO: carenas)
70 */
71
72 static const sljit_gpr r0 = 0; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 2]: 0 in address calculations; reserved */
73 static const sljit_gpr r1 = 1; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 3]: reserved */
74 static const sljit_gpr r2 = 2; /* reg_map[1]: 1st argument */
75 static const sljit_gpr r3 = 3; /* reg_map[2]: 2nd argument */
76 static const sljit_gpr r4 = 4; /* reg_map[3]: 3rd argument */
77 static const sljit_gpr r5 = 5; /* reg_map[4]: 4th argument */
78 static const sljit_gpr r6 = 6; /* reg_map[5]: 5th argument; 1st saved register */
79 static const sljit_gpr r7 = 7; /* reg_map[6] */
80 static const sljit_gpr r8 = 8; /* reg_map[7] */
81 static const sljit_gpr r9 = 9; /* reg_map[8] */
82 static const sljit_gpr r10 = 10; /* reg_map[9] */
83 static const sljit_gpr r11 = 11; /* reg_map[10] */
84 static const sljit_gpr r12 = 12; /* reg_map[11]: GOT */
85 static const sljit_gpr r13 = 13; /* reg_map[12]: Literal Pool pointer */
86 static const sljit_gpr r14 = 14; /* reg_map[0]: return address */
87 static const sljit_gpr r15 = 15; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stack pointer */
88
89 /* WARNING: r12 and r13 shouldn't be used as per ABI recommendation */
90 /* TODO(carenas): r12 might conflict in PIC code, reserve? */
91 /* TODO(carenas): r13 is usually pointed to "pool" per ABI, using a tmp
92 * like we do know might be faster though, reserve?
93 */
94
95 /* TODO(carenas): should be named TMP_REG[1-2] for consistency */
96 #define tmp0 r0
97 #define tmp1 r1
98
99 /* When reg cannot be unused. */
100 #define IS_GPR_REG(reg) ((reg > 0) && (reg) <= SLJIT_SP)
101
102 /* Link register. */
103 static const sljit_gpr link_r = 14; /* r14 */
104
105 #define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
106
107 static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = {
108 0, 0, 2, 4, 6, 3, 5, 7, 15, 14, 13, 12, 11, 10, 9, 8, 1
109 };
110
111 #define R0A(r) (r)
112 #define R4A(r) ((r) << 4)
113 #define R8A(r) ((r) << 8)
114 #define R12A(r) ((r) << 12)
115 #define R16A(r) ((r) << 16)
116 #define R20A(r) ((r) << 20)
117 #define R28A(r) ((r) << 28)
118 #define R32A(r) ((r) << 32)
119 #define R36A(r) ((r) << 36)
120
121 #define R0(r) ((sljit_ins)reg_map[r])
122
123 #define F0(r) ((sljit_ins)freg_map[r])
124 #define F4(r) (R4A((sljit_ins)freg_map[r]))
125 #define F12(r) (R12A((sljit_ins)freg_map[r]))
126 #define F20(r) (R20A((sljit_ins)freg_map[r]))
127 #define F28(r) (R28A((sljit_ins)freg_map[r]))
128 #define F32(r) (R32A((sljit_ins)freg_map[r]))
129 #define F36(r) (R36A((sljit_ins)freg_map[r]))
130
131 struct sljit_s390x_const {
132 struct sljit_const const_; /* must be first */
133 sljit_sw init_value; /* required to build literal pool */
134 };
135
136 /* Convert SLJIT register to hardware register. */
gpr(sljit_s32 r)137 static SLJIT_INLINE sljit_gpr gpr(sljit_s32 r)
138 {
139 SLJIT_ASSERT(r >= 0 && r < (sljit_s32)(sizeof(reg_map) / sizeof(reg_map[0])));
140 return reg_map[r];
141 }
142
143 /* Size of instruction in bytes. Tags must already be cleared. */
sizeof_ins(sljit_ins ins)144 static SLJIT_INLINE sljit_uw sizeof_ins(sljit_ins ins)
145 {
146 /* keep faulting instructions */
147 if (ins == 0)
148 return 2;
149
150 if ((ins & 0x00000000ffffL) == ins)
151 return 2;
152 if ((ins & 0x0000ffffffffL) == ins)
153 return 4;
154 if ((ins & 0xffffffffffffL) == ins)
155 return 6;
156
157 SLJIT_UNREACHABLE();
158 return (sljit_uw)-1;
159 }
160
push_inst(struct sljit_compiler * compiler,sljit_ins ins)161 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
162 {
163 sljit_ins *ibuf = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins));
164 FAIL_IF(!ibuf);
165 *ibuf = ins;
166 compiler->size++;
167 return SLJIT_SUCCESS;
168 }
169
encode_inst(void ** ptr,sljit_ins ins)170 static sljit_s32 encode_inst(void **ptr, sljit_ins ins)
171 {
172 sljit_u16 *ibuf = (sljit_u16 *)*ptr;
173 sljit_uw size = sizeof_ins(ins);
174
175 SLJIT_ASSERT((size & 6) == size);
176 switch (size) {
177 case 6:
178 *ibuf++ = (sljit_u16)(ins >> 32);
179 /* fallthrough */
180 case 4:
181 *ibuf++ = (sljit_u16)(ins >> 16);
182 /* fallthrough */
183 case 2:
184 *ibuf++ = (sljit_u16)(ins);
185 }
186 *ptr = (void*)ibuf;
187 return SLJIT_SUCCESS;
188 }
189
190 #define SLJIT_ADD_SUB_NO_COMPARE(status_flags_state) \
191 (((status_flags_state) & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) \
192 && !((status_flags_state) & SLJIT_CURRENT_FLAGS_COMPARE))
193
194 /* Map the given type to a 4-bit condition code mask. */
get_cc(struct sljit_compiler * compiler,sljit_s32 type)195 static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 type) {
196 const sljit_u8 cc0 = 1 << 3; /* equal {,to zero} */
197 const sljit_u8 cc1 = 1 << 2; /* less than {,zero} */
198 const sljit_u8 cc2 = 1 << 1; /* greater than {,zero} */
199 const sljit_u8 cc3 = 1 << 0; /* {overflow,NaN} */
200
201 switch (type) {
202 case SLJIT_EQUAL:
203 if (SLJIT_ADD_SUB_NO_COMPARE(compiler->status_flags_state)) {
204 sljit_s32 type = GET_FLAG_TYPE(compiler->status_flags_state);
205 if (type >= SLJIT_SIG_LESS && type <= SLJIT_SIG_LESS_EQUAL)
206 return cc0;
207 if (type == SLJIT_OVERFLOW)
208 return (cc0 | cc3);
209 return (cc0 | cc2);
210 }
211 /* fallthrough */
212
213 case SLJIT_ATOMIC_STORED:
214 case SLJIT_F_EQUAL:
215 case SLJIT_ORDERED_EQUAL:
216 return cc0;
217
218 case SLJIT_NOT_EQUAL:
219 if (SLJIT_ADD_SUB_NO_COMPARE(compiler->status_flags_state)) {
220 sljit_s32 type = GET_FLAG_TYPE(compiler->status_flags_state);
221 if (type >= SLJIT_SIG_LESS && type <= SLJIT_SIG_LESS_EQUAL)
222 return (cc1 | cc2 | cc3);
223 if (type == SLJIT_OVERFLOW)
224 return (cc1 | cc2);
225 return (cc1 | cc3);
226 }
227 /* fallthrough */
228
229 case SLJIT_UNORDERED_OR_NOT_EQUAL:
230 return (cc1 | cc2 | cc3);
231
232 case SLJIT_LESS:
233 case SLJIT_ATOMIC_NOT_STORED:
234 return cc1;
235
236 case SLJIT_GREATER_EQUAL:
237 case SLJIT_UNORDERED_OR_GREATER_EQUAL:
238 return (cc0 | cc2 | cc3);
239
240 case SLJIT_GREATER:
241 if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_COMPARE)
242 return cc2;
243 return cc3;
244
245 case SLJIT_LESS_EQUAL:
246 if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_COMPARE)
247 return (cc0 | cc1);
248 return (cc0 | cc1 | cc2);
249
250 case SLJIT_SIG_LESS:
251 case SLJIT_F_LESS:
252 case SLJIT_ORDERED_LESS:
253 return cc1;
254
255 case SLJIT_NOT_CARRY:
256 if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB)
257 return (cc2 | cc3);
258 /* fallthrough */
259
260 case SLJIT_SIG_LESS_EQUAL:
261 case SLJIT_F_LESS_EQUAL:
262 case SLJIT_ORDERED_LESS_EQUAL:
263 return (cc0 | cc1);
264
265 case SLJIT_CARRY:
266 if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB)
267 return (cc0 | cc1);
268 /* fallthrough */
269
270 case SLJIT_SIG_GREATER:
271 case SLJIT_UNORDERED_OR_GREATER:
272 /* Overflow is considered greater, see SLJIT_SUB. */
273 return cc2 | cc3;
274
275 case SLJIT_SIG_GREATER_EQUAL:
276 return (cc0 | cc2 | cc3);
277
278 case SLJIT_OVERFLOW:
279 if (compiler->status_flags_state & SLJIT_SET_Z)
280 return (cc2 | cc3);
281 /* fallthrough */
282
283 case SLJIT_UNORDERED:
284 return cc3;
285
286 case SLJIT_NOT_OVERFLOW:
287 if (compiler->status_flags_state & SLJIT_SET_Z)
288 return (cc0 | cc1);
289 /* fallthrough */
290
291 case SLJIT_ORDERED:
292 return (cc0 | cc1 | cc2);
293
294 case SLJIT_F_NOT_EQUAL:
295 case SLJIT_ORDERED_NOT_EQUAL:
296 return (cc1 | cc2);
297
298 case SLJIT_F_GREATER:
299 case SLJIT_ORDERED_GREATER:
300 return cc2;
301
302 case SLJIT_F_GREATER_EQUAL:
303 case SLJIT_ORDERED_GREATER_EQUAL:
304 return (cc0 | cc2);
305
306 case SLJIT_UNORDERED_OR_LESS_EQUAL:
307 return (cc0 | cc1 | cc3);
308
309 case SLJIT_UNORDERED_OR_EQUAL:
310 return (cc0 | cc3);
311
312 case SLJIT_UNORDERED_OR_LESS:
313 return (cc1 | cc3);
314 }
315
316 SLJIT_UNREACHABLE();
317 return (sljit_u8)-1;
318 }
319
320 /* Facility to bit index mappings.
321 Note: some facilities share the same bit index. */
322 typedef sljit_uw facility_bit;
323 #define STORE_FACILITY_LIST_EXTENDED_FACILITY 7
324 #define FAST_LONG_DISPLACEMENT_FACILITY 19
325 #define EXTENDED_IMMEDIATE_FACILITY 21
326 #define GENERAL_INSTRUCTION_EXTENSION_FACILITY 34
327 #define DISTINCT_OPERAND_FACILITY 45
328 #define HIGH_WORD_FACILITY 45
329 #define POPULATION_COUNT_FACILITY 45
330 #define LOAD_STORE_ON_CONDITION_1_FACILITY 45
331 #define MISCELLANEOUS_INSTRUCTION_EXTENSIONS_1_FACILITY 49
332 #define LOAD_STORE_ON_CONDITION_2_FACILITY 53
333 #define MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY 58
334 #define VECTOR_FACILITY 129
335 #define VECTOR_ENHANCEMENTS_1_FACILITY 135
336
337 /* Report whether a facility is known to be present due to the compiler
338 settings. This function should always be compiled to a constant
339 value given a constant argument. */
have_facility_static(facility_bit x)340 static SLJIT_INLINE int have_facility_static(facility_bit x)
341 {
342 #if ENABLE_STATIC_FACILITY_DETECTION
343 switch (x) {
344 case FAST_LONG_DISPLACEMENT_FACILITY:
345 return (__ARCH__ >= 6 /* z990 */);
346 case EXTENDED_IMMEDIATE_FACILITY:
347 case STORE_FACILITY_LIST_EXTENDED_FACILITY:
348 return (__ARCH__ >= 7 /* z9-109 */);
349 case GENERAL_INSTRUCTION_EXTENSION_FACILITY:
350 return (__ARCH__ >= 8 /* z10 */);
351 case DISTINCT_OPERAND_FACILITY:
352 return (__ARCH__ >= 9 /* z196 */);
353 case MISCELLANEOUS_INSTRUCTION_EXTENSIONS_1_FACILITY:
354 return (__ARCH__ >= 10 /* zEC12 */);
355 case LOAD_STORE_ON_CONDITION_2_FACILITY:
356 case VECTOR_FACILITY:
357 return (__ARCH__ >= 11 /* z13 */);
358 case MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY:
359 case VECTOR_ENHANCEMENTS_1_FACILITY:
360 return (__ARCH__ >= 12 /* z14 */);
361 default:
362 SLJIT_UNREACHABLE();
363 }
364 #endif
365 return 0;
366 }
367
get_hwcap()368 static SLJIT_INLINE unsigned long get_hwcap()
369 {
370 static unsigned long hwcap = 0;
371 if (SLJIT_UNLIKELY(!hwcap)) {
372 hwcap = getauxval(AT_HWCAP);
373 SLJIT_ASSERT(hwcap != 0);
374 }
375 return hwcap;
376 }
377
have_stfle()378 static SLJIT_INLINE int have_stfle()
379 {
380 if (have_facility_static(STORE_FACILITY_LIST_EXTENDED_FACILITY))
381 return 1;
382
383 return (get_hwcap() & HWCAP_S390_STFLE);
384 }
385
386 /* Report whether the given facility is available. This function always
387 performs a runtime check. */
have_facility_dynamic(facility_bit x)388 static int have_facility_dynamic(facility_bit x)
389 {
390 #if ENABLE_DYNAMIC_FACILITY_DETECTION
391 static struct {
392 sljit_uw bits[4];
393 } cpu_features;
394 size_t size = sizeof(cpu_features);
395 const sljit_uw word_index = x >> 6;
396 const sljit_uw bit_index = ((1UL << 63) >> (x & 63));
397
398 SLJIT_ASSERT(x < size * 8);
399 if (SLJIT_UNLIKELY(!have_stfle()))
400 return 0;
401
402 if (SLJIT_UNLIKELY(cpu_features.bits[0] == 0)) {
403 __asm__ __volatile__ (
404 "lgr %%r0, %0;"
405 "stfle 0(%1);"
406 /* outputs */:
407 /* inputs */: "d" ((size / 8) - 1), "a" (&cpu_features)
408 /* clobbers */: "r0", "cc", "memory"
409 );
410 SLJIT_ASSERT(cpu_features.bits[0] != 0);
411 }
412 return (cpu_features.bits[word_index] & bit_index) != 0;
413 #else
414 return 0;
415 #endif
416 }
417
418 #define HAVE_FACILITY(name, bit) \
419 static SLJIT_INLINE int name() \
420 { \
421 static int have = -1; \
422 /* Static check first. May allow the function to be optimized away. */ \
423 if (have_facility_static(bit)) \
424 have = 1; \
425 else if (SLJIT_UNLIKELY(have < 0)) \
426 have = have_facility_dynamic(bit) ? 1 : 0; \
427 \
428 return have; \
429 }
430
HAVE_FACILITY(have_eimm,EXTENDED_IMMEDIATE_FACILITY)431 HAVE_FACILITY(have_eimm, EXTENDED_IMMEDIATE_FACILITY)
432 HAVE_FACILITY(have_ldisp, FAST_LONG_DISPLACEMENT_FACILITY)
433 HAVE_FACILITY(have_genext, GENERAL_INSTRUCTION_EXTENSION_FACILITY)
434 HAVE_FACILITY(have_lscond1, LOAD_STORE_ON_CONDITION_1_FACILITY)
435 HAVE_FACILITY(have_lscond2, LOAD_STORE_ON_CONDITION_2_FACILITY)
436 HAVE_FACILITY(have_misc2, MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY)
437 #undef HAVE_FACILITY
438
439 #define is_u12(d) (0 <= (d) && (d) <= 0x00000fffL)
440 #define is_u32(d) (0 <= (d) && (d) <= 0xffffffffL)
441
442 #define CHECK_SIGNED(v, bitlen) \
443 ((v) >= -(1 << ((bitlen) - 1)) && (v) < (1 << ((bitlen) - 1)))
444
445 #define is_s8(d) CHECK_SIGNED((d), 8)
446 #define is_s16(d) CHECK_SIGNED((d), 16)
447 #define is_s20(d) CHECK_SIGNED((d), 20)
448 #define is_s32(d) ((d) == (sljit_s32)(d))
449
450 static SLJIT_INLINE sljit_ins disp_s20(sljit_s32 d)
451 {
452 sljit_uw dh, dl;
453
454 SLJIT_ASSERT(is_s20(d));
455
456 dh = (d >> 12) & 0xff;
457 dl = ((sljit_uw)d << 8) & 0xfff00;
458 return (dh | dl) << 8;
459 }
460
461 /* TODO(carenas): variadic macro is not strictly needed */
462 #define SLJIT_S390X_INSTRUCTION(op, ...) \
463 static SLJIT_INLINE sljit_ins op(__VA_ARGS__)
464
465 /* RR form instructions. */
466 #define SLJIT_S390X_RR(name, pattern) \
467 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \
468 { \
469 return (pattern) | ((dst & 0xf) << 4) | (src & 0xf); \
470 }
471
472 /* AND */
473 SLJIT_S390X_RR(nr, 0x1400)
474
475 /* BRANCH AND SAVE */
476 SLJIT_S390X_RR(basr, 0x0d00)
477
478 /* BRANCH ON CONDITION */
479 SLJIT_S390X_RR(bcr, 0x0700) /* TODO(mundaym): type for mask? */
480
481 /* DIVIDE */
482 SLJIT_S390X_RR(dr, 0x1d00)
483
484 /* EXCLUSIVE OR */
485 SLJIT_S390X_RR(xr, 0x1700)
486
487 /* LOAD */
488 SLJIT_S390X_RR(lr, 0x1800)
489
490 /* LOAD COMPLEMENT */
491 SLJIT_S390X_RR(lcr, 0x1300)
492
493 /* OR */
494 SLJIT_S390X_RR(or, 0x1600)
495
496 #undef SLJIT_S390X_RR
497
498 /* RRE form instructions */
499 #define SLJIT_S390X_RRE(name, pattern) \
500 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \
501 { \
502 return (pattern) | R4A(dst) | R0A(src); \
503 }
504
505 /* AND */
506 SLJIT_S390X_RRE(ngr, 0xb9800000)
507
508 /* DIVIDE LOGICAL */
509 SLJIT_S390X_RRE(dlr, 0xb9970000)
510 SLJIT_S390X_RRE(dlgr, 0xb9870000)
511
512 /* DIVIDE SINGLE */
513 SLJIT_S390X_RRE(dsgr, 0xb90d0000)
514
515 /* EXCLUSIVE OR */
516 SLJIT_S390X_RRE(xgr, 0xb9820000)
517
518 /* LOAD */
519 SLJIT_S390X_RRE(lgr, 0xb9040000)
520 SLJIT_S390X_RRE(lgfr, 0xb9140000)
521
522 /* LOAD BYTE */
523 SLJIT_S390X_RRE(lbr, 0xb9260000)
524 SLJIT_S390X_RRE(lgbr, 0xb9060000)
525
526 /* LOAD COMPLEMENT */
527 SLJIT_S390X_RRE(lcgr, 0xb9030000)
528
529 /* LOAD HALFWORD */
530 SLJIT_S390X_RRE(lhr, 0xb9270000)
531 SLJIT_S390X_RRE(lghr, 0xb9070000)
532
533 /* LOAD LOGICAL */
534 SLJIT_S390X_RRE(llgfr, 0xb9160000)
535
536 /* LOAD LOGICAL CHARACTER */
537 SLJIT_S390X_RRE(llcr, 0xb9940000)
538 SLJIT_S390X_RRE(llgcr, 0xb9840000)
539
540 /* LOAD LOGICAL HALFWORD */
541 SLJIT_S390X_RRE(llhr, 0xb9950000)
542 SLJIT_S390X_RRE(llghr, 0xb9850000)
543
544 /* MULTIPLY LOGICAL */
545 SLJIT_S390X_RRE(mlgr, 0xb9860000)
546
547 /* MULTIPLY SINGLE */
548 SLJIT_S390X_RRE(msgfr, 0xb91c0000)
549
550 /* OR */
551 SLJIT_S390X_RRE(ogr, 0xb9810000)
552
553 /* SUBTRACT */
554 SLJIT_S390X_RRE(sgr, 0xb9090000)
555
556 #undef SLJIT_S390X_RRE
557
558 /* RI-a form instructions */
559 #define SLJIT_S390X_RIA(name, pattern, imm_type) \
560 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \
561 { \
562 return (pattern) | R20A(reg) | (imm & 0xffff); \
563 }
564
565 /* ADD HALFWORD IMMEDIATE */
566 SLJIT_S390X_RIA(aghi, 0xa70b0000, sljit_s16)
567
568 /* LOAD HALFWORD IMMEDIATE */
569 SLJIT_S390X_RIA(lhi, 0xa7080000, sljit_s16)
570 SLJIT_S390X_RIA(lghi, 0xa7090000, sljit_s16)
571
572 /* LOAD LOGICAL IMMEDIATE */
573 SLJIT_S390X_RIA(llihh, 0xa50c0000, sljit_u16)
574 SLJIT_S390X_RIA(llihl, 0xa50d0000, sljit_u16)
575 SLJIT_S390X_RIA(llilh, 0xa50e0000, sljit_u16)
576 SLJIT_S390X_RIA(llill, 0xa50f0000, sljit_u16)
577
578 /* MULTIPLY HALFWORD IMMEDIATE */
579 SLJIT_S390X_RIA(mhi, 0xa70c0000, sljit_s16)
580 SLJIT_S390X_RIA(mghi, 0xa70d0000, sljit_s16)
581
582 /* OR IMMEDIATE */
583 SLJIT_S390X_RIA(oilh, 0xa50a0000, sljit_u16)
584
585 #undef SLJIT_S390X_RIA
586
587 /* RIL-a form instructions (requires extended immediate facility) */
588 #define SLJIT_S390X_RILA(name, pattern, imm_type) \
589 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \
590 { \
591 SLJIT_ASSERT(have_eimm()); \
592 return (pattern) | R36A(reg) | ((sljit_ins)imm & 0xffffffffu); \
593 }
594
595 /* ADD IMMEDIATE */
596 SLJIT_S390X_RILA(agfi, 0xc20800000000, sljit_s32)
597
598 /* ADD IMMEDIATE HIGH */
599 SLJIT_S390X_RILA(aih, 0xcc0800000000, sljit_s32) /* TODO(mundaym): high-word facility? */
600
601 /* AND IMMEDIATE */
602 SLJIT_S390X_RILA(nihf, 0xc00a00000000, sljit_u32)
603
604 /* EXCLUSIVE OR IMMEDIATE */
605 SLJIT_S390X_RILA(xilf, 0xc00700000000, sljit_u32)
606
607 /* INSERT IMMEDIATE */
608 SLJIT_S390X_RILA(iihf, 0xc00800000000, sljit_u32)
609 SLJIT_S390X_RILA(iilf, 0xc00900000000, sljit_u32)
610
611 /* LOAD IMMEDIATE */
612 SLJIT_S390X_RILA(lgfi, 0xc00100000000, sljit_s32)
613
614 /* LOAD LOGICAL IMMEDIATE */
615 SLJIT_S390X_RILA(llihf, 0xc00e00000000, sljit_u32)
616 SLJIT_S390X_RILA(llilf, 0xc00f00000000, sljit_u32)
617
618 /* SUBTRACT LOGICAL IMMEDIATE */
619 SLJIT_S390X_RILA(slfi, 0xc20500000000, sljit_u32)
620
621 #undef SLJIT_S390X_RILA
622
623 /* RX-a form instructions */
624 #define SLJIT_S390X_RXA(name, pattern) \
625 SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \
626 { \
627 SLJIT_ASSERT((d & 0xfff) == d); \
628 \
629 return (pattern) | R20A(r) | R16A(x) | R12A(b) | (sljit_ins)(d & 0xfff); \
630 }
631
632 /* LOAD */
633 SLJIT_S390X_RXA(l, 0x58000000)
634
635 /* LOAD ADDRESS */
636 SLJIT_S390X_RXA(la, 0x41000000)
637
638 /* LOAD HALFWORD */
639 SLJIT_S390X_RXA(lh, 0x48000000)
640
641 /* MULTIPLY SINGLE */
642 SLJIT_S390X_RXA(ms, 0x71000000)
643
644 /* STORE */
645 SLJIT_S390X_RXA(st, 0x50000000)
646
647 /* STORE CHARACTER */
648 SLJIT_S390X_RXA(stc, 0x42000000)
649
650 /* STORE HALFWORD */
651 SLJIT_S390X_RXA(sth, 0x40000000)
652
653 #undef SLJIT_S390X_RXA
654
655 /* RXY-a instructions */
656 #define SLJIT_S390X_RXYA(name, pattern, cond) \
657 SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \
658 { \
659 SLJIT_ASSERT(cond); \
660 \
661 return (pattern) | R36A(r) | R32A(x) | R28A(b) | disp_s20(d); \
662 }
663
664 /* LOAD */
665 SLJIT_S390X_RXYA(ly, 0xe30000000058, have_ldisp())
666 SLJIT_S390X_RXYA(lg, 0xe30000000004, 1)
667 SLJIT_S390X_RXYA(lgf, 0xe30000000014, 1)
668
669 /* LOAD BYTE */
670 SLJIT_S390X_RXYA(lb, 0xe30000000076, have_ldisp())
671 SLJIT_S390X_RXYA(lgb, 0xe30000000077, have_ldisp())
672
673 /* LOAD HALFWORD */
674 SLJIT_S390X_RXYA(lhy, 0xe30000000078, have_ldisp())
675 SLJIT_S390X_RXYA(lgh, 0xe30000000015, 1)
676
677 /* LOAD LOGICAL */
678 SLJIT_S390X_RXYA(llgf, 0xe30000000016, 1)
679
680 /* LOAD LOGICAL CHARACTER */
681 SLJIT_S390X_RXYA(llc, 0xe30000000094, have_eimm())
682 SLJIT_S390X_RXYA(llgc, 0xe30000000090, 1)
683
684 /* LOAD LOGICAL HALFWORD */
685 SLJIT_S390X_RXYA(llh, 0xe30000000095, have_eimm())
686 SLJIT_S390X_RXYA(llgh, 0xe30000000091, 1)
687
688 /* MULTIPLY SINGLE */
689 SLJIT_S390X_RXYA(msy, 0xe30000000051, have_ldisp())
690 SLJIT_S390X_RXYA(msg, 0xe3000000000c, 1)
691
692 /* STORE */
693 SLJIT_S390X_RXYA(sty, 0xe30000000050, have_ldisp())
694 SLJIT_S390X_RXYA(stg, 0xe30000000024, 1)
695
696 /* STORE CHARACTER */
697 SLJIT_S390X_RXYA(stcy, 0xe30000000072, have_ldisp())
698
699 /* STORE HALFWORD */
700 SLJIT_S390X_RXYA(sthy, 0xe30000000070, have_ldisp())
701
702 #undef SLJIT_S390X_RXYA
703
704 /* RSY-a instructions */
705 #define SLJIT_S390X_RSYA(name, pattern, cond) \
706 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_s32 d, sljit_gpr b) \
707 { \
708 SLJIT_ASSERT(cond); \
709 \
710 return (pattern) | R36A(dst) | R32A(src) | R28A(b) | disp_s20(d); \
711 }
712
713 /* LOAD MULTIPLE */
714 SLJIT_S390X_RSYA(lmg, 0xeb0000000004, 1)
715
716 /* SHIFT LEFT LOGICAL */
717 SLJIT_S390X_RSYA(sllg, 0xeb000000000d, 1)
718
719 /* SHIFT RIGHT SINGLE */
720 SLJIT_S390X_RSYA(srag, 0xeb000000000a, 1)
721
722 /* STORE MULTIPLE */
723 SLJIT_S390X_RSYA(stmg, 0xeb0000000024, 1)
724
725 #undef SLJIT_S390X_RSYA
726
727 /* RIE-f instructions (require general-instructions-extension facility) */
728 #define SLJIT_S390X_RIEF(name, pattern) \
729 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot) \
730 { \
731 sljit_ins i3, i4, i5; \
732 \
733 SLJIT_ASSERT(have_genext()); \
734 i3 = (sljit_ins)start << 24; \
735 i4 = (sljit_ins)end << 16; \
736 i5 = (sljit_ins)rot << 8; \
737 \
738 return (pattern) | R36A(dst & 0xf) | R32A(src & 0xf) | i3 | i4 | i5; \
739 }
740
741 /* ROTATE THEN AND SELECTED BITS */
742 /* SLJIT_S390X_RIEF(rnsbg, 0xec0000000054) */
743
744 /* ROTATE THEN EXCLUSIVE OR SELECTED BITS */
745 /* SLJIT_S390X_RIEF(rxsbg, 0xec0000000057) */
746
747 /* ROTATE THEN OR SELECTED BITS */
748 SLJIT_S390X_RIEF(rosbg, 0xec0000000056)
749
750 /* ROTATE THEN INSERT SELECTED BITS */
751 /* SLJIT_S390X_RIEF(risbg, 0xec0000000055) */
752 /* SLJIT_S390X_RIEF(risbgn, 0xec0000000059) */
753
754 /* ROTATE THEN INSERT SELECTED BITS HIGH */
755 SLJIT_S390X_RIEF(risbhg, 0xec000000005d)
756
757 /* ROTATE THEN INSERT SELECTED BITS LOW */
758 /* SLJIT_S390X_RIEF(risblg, 0xec0000000051) */
759
760 #undef SLJIT_S390X_RIEF
761
762 /* RRF-c instructions (require load/store-on-condition 1 facility) */
763 #define SLJIT_S390X_RRFC(name, pattern) \
764 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_uw mask) \
765 { \
766 sljit_ins m3; \
767 \
768 SLJIT_ASSERT(have_lscond1()); \
769 m3 = (sljit_ins)(mask & 0xf) << 12; \
770 \
771 return (pattern) | m3 | R4A(dst) | R0A(src); \
772 }
773
774 /* LOAD HALFWORD IMMEDIATE ON CONDITION */
775 SLJIT_S390X_RRFC(locr, 0xb9f20000)
776 SLJIT_S390X_RRFC(locgr, 0xb9e20000)
777
778 #undef SLJIT_S390X_RRFC
779
780 /* RIE-g instructions (require load/store-on-condition 2 facility) */
781 #define SLJIT_S390X_RIEG(name, pattern) \
782 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw imm, sljit_uw mask) \
783 { \
784 sljit_ins m3, i2; \
785 \
786 SLJIT_ASSERT(have_lscond2()); \
787 m3 = (sljit_ins)(mask & 0xf) << 32; \
788 i2 = (sljit_ins)(imm & 0xffffL) << 16; \
789 \
790 return (pattern) | R36A(reg) | m3 | i2; \
791 }
792
793 /* LOAD HALFWORD IMMEDIATE ON CONDITION */
794 SLJIT_S390X_RIEG(lochi, 0xec0000000042)
795 SLJIT_S390X_RIEG(locghi, 0xec0000000046)
796
797 #undef SLJIT_S390X_RIEG
798
799 #define SLJIT_S390X_RILB(name, pattern, cond) \
800 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw ri) \
801 { \
802 SLJIT_ASSERT(cond); \
803 \
804 return (pattern) | R36A(reg) | (sljit_ins)(ri & 0xffffffff); \
805 }
806
807 /* BRANCH RELATIVE AND SAVE LONG */
808 SLJIT_S390X_RILB(brasl, 0xc00500000000, 1)
809
810 /* LOAD ADDRESS RELATIVE LONG */
811 SLJIT_S390X_RILB(larl, 0xc00000000000, 1)
812
813 /* LOAD RELATIVE LONG */
814 SLJIT_S390X_RILB(lgrl, 0xc40800000000, have_genext())
815
816 #undef SLJIT_S390X_RILB
817
SLJIT_S390X_INSTRUCTION(br,sljit_gpr target)818 SLJIT_S390X_INSTRUCTION(br, sljit_gpr target)
819 {
820 return 0x07f0 | target;
821 }
822
SLJIT_S390X_INSTRUCTION(brc,sljit_uw mask,sljit_sw target)823 SLJIT_S390X_INSTRUCTION(brc, sljit_uw mask, sljit_sw target)
824 {
825 sljit_ins m1 = (sljit_ins)(mask & 0xf) << 20;
826 sljit_ins ri2 = (sljit_ins)target & 0xffff;
827 return 0xa7040000L | m1 | ri2;
828 }
829
SLJIT_S390X_INSTRUCTION(brcl,sljit_uw mask,sljit_sw target)830 SLJIT_S390X_INSTRUCTION(brcl, sljit_uw mask, sljit_sw target)
831 {
832 sljit_ins m1 = (sljit_ins)(mask & 0xf) << 36;
833 sljit_ins ri2 = (sljit_ins)target & 0xffffffff;
834 return 0xc00400000000L | m1 | ri2;
835 }
836
SLJIT_S390X_INSTRUCTION(flogr,sljit_gpr dst,sljit_gpr src)837 SLJIT_S390X_INSTRUCTION(flogr, sljit_gpr dst, sljit_gpr src)
838 {
839 SLJIT_ASSERT(have_eimm());
840 return 0xb9830000 | R8A(dst) | R0A(src);
841 }
842
843 /* INSERT PROGRAM MASK */
SLJIT_S390X_INSTRUCTION(ipm,sljit_gpr dst)844 SLJIT_S390X_INSTRUCTION(ipm, sljit_gpr dst)
845 {
846 return 0xb2220000 | R4A(dst);
847 }
848
849 /* SET PROGRAM MASK */
SLJIT_S390X_INSTRUCTION(spm,sljit_gpr dst)850 SLJIT_S390X_INSTRUCTION(spm, sljit_gpr dst)
851 {
852 return 0x0400 | R4A(dst);
853 }
854
855 /* ROTATE THEN INSERT SELECTED BITS HIGH (ZERO) */
SLJIT_S390X_INSTRUCTION(risbhgz,sljit_gpr dst,sljit_gpr src,sljit_u8 start,sljit_u8 end,sljit_u8 rot)856 SLJIT_S390X_INSTRUCTION(risbhgz, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot)
857 {
858 return risbhg(dst, src, start, 0x8 | end, rot);
859 }
860
861 #undef SLJIT_S390X_INSTRUCTION
862
update_zero_overflow(struct sljit_compiler * compiler,sljit_s32 op,sljit_gpr dst_r)863 static sljit_s32 update_zero_overflow(struct sljit_compiler *compiler, sljit_s32 op, sljit_gpr dst_r)
864 {
865 /* Condition codes: bits 18 and 19.
866 Transformation:
867 0 (zero and no overflow) : unchanged
868 1 (non-zero and no overflow) : unchanged
869 2 (zero and overflow) : decreased by 1
870 3 (non-zero and overflow) : decreased by 1 if non-zero */
871 FAIL_IF(push_inst(compiler, brc(0xc, 2 + 2 + ((op & SLJIT_32) ? 1 : 2) + 2 + 3 + 1)));
872 FAIL_IF(push_inst(compiler, ipm(tmp1)));
873 FAIL_IF(push_inst(compiler, (op & SLJIT_32) ? or(dst_r, dst_r) : ogr(dst_r, dst_r)));
874 FAIL_IF(push_inst(compiler, brc(0x8, 2 + 3)));
875 FAIL_IF(push_inst(compiler, slfi(tmp1, 0x10000000)));
876 FAIL_IF(push_inst(compiler, spm(tmp1)));
877 return SLJIT_SUCCESS;
878 }
879
880 /* load 64-bit immediate into register without clobbering flags */
push_load_imm_inst(struct sljit_compiler * compiler,sljit_gpr target,sljit_sw v)881 static sljit_s32 push_load_imm_inst(struct sljit_compiler *compiler, sljit_gpr target, sljit_sw v)
882 {
883 /* 4 byte instructions */
884 if (is_s16(v))
885 return push_inst(compiler, lghi(target, (sljit_s16)v));
886
887 if (((sljit_uw)v & ~(sljit_uw)0x000000000000ffff) == 0)
888 return push_inst(compiler, llill(target, (sljit_u16)v));
889
890 if (((sljit_uw)v & ~(sljit_uw)0x00000000ffff0000) == 0)
891 return push_inst(compiler, llilh(target, (sljit_u16)(v >> 16)));
892
893 if (((sljit_uw)v & ~(sljit_uw)0x0000ffff00000000) == 0)
894 return push_inst(compiler, llihl(target, (sljit_u16)(v >> 32)));
895
896 if (((sljit_uw)v & ~(sljit_uw)0xffff000000000000) == 0)
897 return push_inst(compiler, llihh(target, (sljit_u16)(v >> 48)));
898
899 if (is_s32(v))
900 return push_inst(compiler, lgfi(target, (sljit_s32)v));
901
902 if (((sljit_uw)v >> 32) == 0)
903 return push_inst(compiler, llilf(target, (sljit_u32)v));
904
905 if (((sljit_uw)v << 32) == 0)
906 return push_inst(compiler, llihf(target, (sljit_u32)((sljit_uw)v >> 32)));
907
908 FAIL_IF(push_inst(compiler, llilf(target, (sljit_u32)v)));
909 return push_inst(compiler, iihf(target, (sljit_u32)(v >> 32)));
910 }
911
912 struct addr {
913 sljit_gpr base;
914 sljit_gpr index;
915 sljit_s32 offset;
916 };
917
918 /* transform memory operand into D(X,B) form with a signed 20-bit offset */
make_addr_bxy(struct sljit_compiler * compiler,struct addr * addr,sljit_s32 mem,sljit_sw off,sljit_gpr tmp)919 static sljit_s32 make_addr_bxy(struct sljit_compiler *compiler,
920 struct addr *addr, sljit_s32 mem, sljit_sw off,
921 sljit_gpr tmp /* clobbered, must not be r0 */)
922 {
923 sljit_gpr base = r0;
924 sljit_gpr index = r0;
925
926 SLJIT_ASSERT(tmp != r0);
927 if (mem & REG_MASK)
928 base = gpr(mem & REG_MASK);
929
930 if (mem & OFFS_REG_MASK) {
931 index = gpr(OFFS_REG(mem));
932 if (off != 0) {
933 /* shift and put the result into tmp */
934 SLJIT_ASSERT(0 <= off && off < 64);
935 FAIL_IF(push_inst(compiler, sllg(tmp, index, (sljit_s32)off, 0)));
936 index = tmp;
937 off = 0; /* clear offset */
938 }
939 }
940 else if (!is_s20(off)) {
941 FAIL_IF(push_load_imm_inst(compiler, tmp, off));
942 index = tmp;
943 off = 0; /* clear offset */
944 }
945 addr->base = base;
946 addr->index = index;
947 addr->offset = (sljit_s32)off;
948 return SLJIT_SUCCESS;
949 }
950
951 /* transform memory operand into D(X,B) form with an unsigned 12-bit offset */
make_addr_bx(struct sljit_compiler * compiler,struct addr * addr,sljit_s32 mem,sljit_sw off,sljit_gpr tmp)952 static sljit_s32 make_addr_bx(struct sljit_compiler *compiler,
953 struct addr *addr, sljit_s32 mem, sljit_sw off,
954 sljit_gpr tmp /* clobbered, must not be r0 */)
955 {
956 sljit_gpr base = r0;
957 sljit_gpr index = r0;
958
959 SLJIT_ASSERT(tmp != r0);
960 if (mem & REG_MASK)
961 base = gpr(mem & REG_MASK);
962
963 if (mem & OFFS_REG_MASK) {
964 index = gpr(OFFS_REG(mem));
965 if (off != 0) {
966 /* shift and put the result into tmp */
967 SLJIT_ASSERT(0 <= off && off < 64);
968 FAIL_IF(push_inst(compiler, sllg(tmp, index, (sljit_s32)off, 0)));
969 index = tmp;
970 off = 0; /* clear offset */
971 }
972 }
973 else if (!is_u12(off)) {
974 FAIL_IF(push_load_imm_inst(compiler, tmp, off));
975 index = tmp;
976 off = 0; /* clear offset */
977 }
978 addr->base = base;
979 addr->index = index;
980 addr->offset = (sljit_s32)off;
981 return SLJIT_SUCCESS;
982 }
983
984 #define EVAL(op, r, addr) op(r, addr.offset, addr.index, addr.base)
985 #define WHEN(cond, r, i1, i2, addr) \
986 (cond) ? EVAL(i1, r, addr) : EVAL(i2, r, addr)
987
988 /* May clobber tmp1. */
load_store_op(struct sljit_compiler * compiler,sljit_gpr reg,sljit_s32 mem,sljit_sw memw,sljit_s32 is_32bit,const sljit_ins * forms)989 static sljit_s32 load_store_op(struct sljit_compiler *compiler, sljit_gpr reg,
990 sljit_s32 mem, sljit_sw memw,
991 sljit_s32 is_32bit, const sljit_ins* forms)
992 {
993 struct addr addr;
994
995 SLJIT_ASSERT(mem & SLJIT_MEM);
996
997 if (is_32bit && ((mem & OFFS_REG_MASK) || is_u12(memw) || !is_s20(memw))) {
998 FAIL_IF(make_addr_bx(compiler, &addr, mem, memw, tmp1));
999 return push_inst(compiler, forms[0] | R20A(reg) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset);
1000 }
1001
1002 FAIL_IF(make_addr_bxy(compiler, &addr, mem, memw, tmp1));
1003 return push_inst(compiler, (is_32bit ? forms[1] : forms[2]) | R36A(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
1004 }
1005
1006 static const sljit_ins load_forms[3] = {
1007 0x58000000 /* l */,
1008 0xe30000000058 /* ly */,
1009 0xe30000000004 /* lg */
1010 };
1011
1012 static const sljit_ins store_forms[3] = {
1013 0x50000000 /* st */,
1014 0xe30000000050 /* sty */,
1015 0xe30000000024 /* stg */
1016 };
1017
1018 static const sljit_ins load_halfword_forms[3] = {
1019 0x48000000 /* lh */,
1020 0xe30000000078 /* lhy */,
1021 0xe30000000015 /* lgh */
1022 };
1023
1024 /* May clobber tmp1. */
load_word(struct sljit_compiler * compiler,sljit_gpr dst_r,sljit_s32 src,sljit_sw srcw,sljit_s32 is_32bit)1025 static SLJIT_INLINE sljit_s32 load_word(struct sljit_compiler *compiler, sljit_gpr dst_r,
1026 sljit_s32 src, sljit_sw srcw,
1027 sljit_s32 is_32bit)
1028 {
1029 return load_store_op(compiler, dst_r, src, srcw, is_32bit, load_forms);
1030 }
1031
1032 /* May clobber tmp1. */
load_unsigned_word(struct sljit_compiler * compiler,sljit_gpr dst_r,sljit_s32 src,sljit_sw srcw,sljit_s32 is_32bit)1033 static sljit_s32 load_unsigned_word(struct sljit_compiler *compiler, sljit_gpr dst_r,
1034 sljit_s32 src, sljit_sw srcw,
1035 sljit_s32 is_32bit)
1036 {
1037 struct addr addr;
1038 sljit_ins ins;
1039
1040 SLJIT_ASSERT(src & SLJIT_MEM);
1041
1042 FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
1043
1044 ins = is_32bit ? 0xe30000000016 /* llgf */ : 0xe30000000004 /* lg */;
1045 return push_inst(compiler, ins | R36A(dst_r) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
1046 }
1047
1048 /* May clobber tmp1. */
store_word(struct sljit_compiler * compiler,sljit_gpr src_r,sljit_s32 dst,sljit_sw dstw,sljit_s32 is_32bit)1049 static SLJIT_INLINE sljit_s32 store_word(struct sljit_compiler *compiler, sljit_gpr src_r,
1050 sljit_s32 dst, sljit_sw dstw,
1051 sljit_s32 is_32bit)
1052 {
1053 return load_store_op(compiler, src_r, dst, dstw, is_32bit, store_forms);
1054 }
1055
1056 #undef WHEN
1057
emit_move(struct sljit_compiler * compiler,sljit_gpr dst_r,sljit_s32 src,sljit_sw srcw)1058 static sljit_s32 emit_move(struct sljit_compiler *compiler,
1059 sljit_gpr dst_r,
1060 sljit_s32 src, sljit_sw srcw)
1061 {
1062 sljit_gpr src_r;
1063
1064 SLJIT_ASSERT(!IS_GPR_REG(src) || dst_r != gpr(src & REG_MASK));
1065
1066 if (src == SLJIT_IMM)
1067 return push_load_imm_inst(compiler, dst_r, srcw);
1068
1069 if (src & SLJIT_MEM)
1070 return load_word(compiler, dst_r, src, srcw, (compiler->mode & SLJIT_32) != 0);
1071
1072 src_r = gpr(src & REG_MASK);
1073 return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, src_r) : lgr(dst_r, src_r));
1074 }
1075
emit_rr(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1076 static sljit_s32 emit_rr(struct sljit_compiler *compiler, sljit_ins ins,
1077 sljit_s32 dst,
1078 sljit_s32 src1, sljit_sw src1w,
1079 sljit_s32 src2, sljit_sw src2w)
1080 {
1081 sljit_gpr dst_r = tmp0;
1082 sljit_gpr src_r = tmp1;
1083 sljit_s32 needs_move = 1;
1084
1085 if (FAST_IS_REG(dst)) {
1086 dst_r = gpr(dst);
1087
1088 if (dst == src1)
1089 needs_move = 0;
1090 else if (dst == src2) {
1091 dst_r = tmp0;
1092 needs_move = 2;
1093 }
1094 }
1095
1096 if (needs_move)
1097 FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
1098
1099 if (FAST_IS_REG(src2))
1100 src_r = gpr(src2);
1101 else
1102 FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
1103
1104 FAIL_IF(push_inst(compiler, ins | R4A(dst_r) | R0A(src_r)));
1105
1106 if (needs_move != 2)
1107 return SLJIT_SUCCESS;
1108
1109 dst_r = gpr(dst & REG_MASK);
1110 return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0));
1111 }
1112
emit_rr1(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w)1113 static sljit_s32 emit_rr1(struct sljit_compiler *compiler, sljit_ins ins,
1114 sljit_s32 dst,
1115 sljit_s32 src1, sljit_sw src1w)
1116 {
1117 sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
1118 sljit_gpr src_r = tmp1;
1119
1120 if (FAST_IS_REG(src1))
1121 src_r = gpr(src1);
1122 else
1123 FAIL_IF(emit_move(compiler, tmp1, src1, src1w));
1124
1125 return push_inst(compiler, ins | R4A(dst_r) | R0A(src_r));
1126 }
1127
emit_rrf(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1128 static sljit_s32 emit_rrf(struct sljit_compiler *compiler, sljit_ins ins,
1129 sljit_s32 dst,
1130 sljit_s32 src1, sljit_sw src1w,
1131 sljit_s32 src2, sljit_sw src2w)
1132 {
1133 sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
1134 sljit_gpr src1_r = tmp0;
1135 sljit_gpr src2_r = tmp1;
1136
1137 if (FAST_IS_REG(src1))
1138 src1_r = gpr(src1);
1139 else
1140 FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
1141
1142 if (FAST_IS_REG(src2))
1143 src2_r = gpr(src2);
1144 else
1145 FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
1146
1147 return push_inst(compiler, ins | R4A(dst_r) | R0A(src1_r) | R12A(src2_r));
1148 }
1149
1150 typedef enum {
1151 RI_A,
1152 RIL_A,
1153 } emit_ril_type;
1154
emit_ri(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_sw src2w,emit_ril_type type)1155 static sljit_s32 emit_ri(struct sljit_compiler *compiler, sljit_ins ins,
1156 sljit_s32 dst,
1157 sljit_s32 src1, sljit_sw src1w,
1158 sljit_sw src2w,
1159 emit_ril_type type)
1160 {
1161 sljit_gpr dst_r = tmp0;
1162 sljit_s32 needs_move = 1;
1163
1164 if (FAST_IS_REG(dst)) {
1165 dst_r = gpr(dst);
1166
1167 if (dst == src1)
1168 needs_move = 0;
1169 }
1170
1171 if (needs_move)
1172 FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
1173
1174 if (type == RIL_A)
1175 return push_inst(compiler, ins | R36A(dst_r) | (src2w & 0xffffffff));
1176 return push_inst(compiler, ins | R20A(dst_r) | (src2w & 0xffff));
1177 }
1178
emit_rie_d(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_sw src2w)1179 static sljit_s32 emit_rie_d(struct sljit_compiler *compiler, sljit_ins ins,
1180 sljit_s32 dst,
1181 sljit_s32 src1, sljit_sw src1w,
1182 sljit_sw src2w)
1183 {
1184 sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
1185 sljit_gpr src_r = tmp0;
1186
1187 if (!FAST_IS_REG(src1))
1188 FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
1189 else
1190 src_r = gpr(src1 & REG_MASK);
1191
1192 return push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | (sljit_ins)(src2w & 0xffff) << 16);
1193 }
1194
1195 typedef enum {
1196 RX_A,
1197 RXY_A,
1198 } emit_rx_type;
1199
emit_rx(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w,emit_rx_type type)1200 static sljit_s32 emit_rx(struct sljit_compiler *compiler, sljit_ins ins,
1201 sljit_s32 dst,
1202 sljit_s32 src1, sljit_sw src1w,
1203 sljit_s32 src2, sljit_sw src2w,
1204 emit_rx_type type)
1205 {
1206 sljit_gpr dst_r = tmp0;
1207 sljit_s32 needs_move = 1;
1208 sljit_gpr base, index;
1209
1210 SLJIT_ASSERT(src2 & SLJIT_MEM);
1211
1212 if (FAST_IS_REG(dst)) {
1213 dst_r = gpr(dst);
1214
1215 if (dst == src1)
1216 needs_move = 0;
1217 else if (dst == (src2 & REG_MASK) || (dst == OFFS_REG(src2))) {
1218 dst_r = tmp0;
1219 needs_move = 2;
1220 }
1221 }
1222
1223 if (needs_move)
1224 FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
1225
1226 base = gpr(src2 & REG_MASK);
1227 index = tmp0;
1228
1229 if (src2 & OFFS_REG_MASK) {
1230 index = gpr(OFFS_REG(src2));
1231
1232 if (src2w != 0) {
1233 FAIL_IF(push_inst(compiler, sllg(tmp1, index, src2w & 0x3, 0)));
1234 src2w = 0;
1235 index = tmp1;
1236 }
1237 } else if ((type == RX_A && !is_u12(src2w)) || (type == RXY_A && !is_s20(src2w))) {
1238 FAIL_IF(push_load_imm_inst(compiler, tmp1, src2w));
1239
1240 if (src2 & REG_MASK)
1241 index = tmp1;
1242 else
1243 base = tmp1;
1244 src2w = 0;
1245 }
1246
1247 if (type == RX_A)
1248 ins |= R20A(dst_r) | R16A(index) | R12A(base) | (sljit_ins)src2w;
1249 else
1250 ins |= R36A(dst_r) | R32A(index) | R28A(base) | disp_s20((sljit_s32)src2w);
1251
1252 FAIL_IF(push_inst(compiler, ins));
1253
1254 if (needs_move != 2)
1255 return SLJIT_SUCCESS;
1256
1257 dst_r = gpr(dst);
1258 return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0));
1259 }
1260
emit_siy(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_sw dstw,sljit_sw srcw)1261 static sljit_s32 emit_siy(struct sljit_compiler *compiler, sljit_ins ins,
1262 sljit_s32 dst, sljit_sw dstw,
1263 sljit_sw srcw)
1264 {
1265 sljit_gpr dst_r = tmp1;
1266
1267 SLJIT_ASSERT(dst & SLJIT_MEM);
1268
1269 if (dst & OFFS_REG_MASK) {
1270 sljit_gpr index = tmp1;
1271
1272 if ((dstw & 0x3) == 0)
1273 index = gpr(OFFS_REG(dst));
1274 else
1275 FAIL_IF(push_inst(compiler, sllg(tmp1, index, dstw & 0x3, 0)));
1276
1277 FAIL_IF(push_inst(compiler, la(tmp1, 0, dst_r, index)));
1278 dstw = 0;
1279 }
1280 else if (!is_s20(dstw)) {
1281 FAIL_IF(push_load_imm_inst(compiler, tmp1, dstw));
1282
1283 if (dst & REG_MASK)
1284 FAIL_IF(push_inst(compiler, la(tmp1, 0, dst_r, tmp1)));
1285
1286 dstw = 0;
1287 }
1288 else
1289 dst_r = gpr(dst & REG_MASK);
1290
1291 return push_inst(compiler, ins | ((sljit_ins)(srcw & 0xff) << 32) | R28A(dst_r) | disp_s20((sljit_s32)dstw));
1292 }
1293
1294 struct ins_forms {
1295 sljit_ins op_r;
1296 sljit_ins op_gr;
1297 sljit_ins op_rk;
1298 sljit_ins op_grk;
1299 sljit_ins op;
1300 sljit_ins op_y;
1301 sljit_ins op_g;
1302 };
1303
emit_commutative(struct sljit_compiler * compiler,const struct ins_forms * forms,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1304 static sljit_s32 emit_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms,
1305 sljit_s32 dst,
1306 sljit_s32 src1, sljit_sw src1w,
1307 sljit_s32 src2, sljit_sw src2w)
1308 {
1309 sljit_s32 mode = compiler->mode;
1310 sljit_ins ins, ins_k;
1311
1312 if ((src1 | src2) & SLJIT_MEM) {
1313 sljit_ins ins12, ins20;
1314
1315 if (mode & SLJIT_32) {
1316 ins12 = forms->op;
1317 ins20 = forms->op_y;
1318 }
1319 else {
1320 ins12 = 0;
1321 ins20 = forms->op_g;
1322 }
1323
1324 if (ins12 && ins20) {
1325 /* Extra instructions needed for address computation can be executed independently. */
1326 if ((src2 & SLJIT_MEM) && (!(src1 & SLJIT_MEM)
1327 || ((src1 & OFFS_REG_MASK) ? (src1w & 0x3) == 0 : is_s20(src1w)))) {
1328 if ((src2 & OFFS_REG_MASK) || is_u12(src2w) || !is_s20(src2w))
1329 return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);
1330
1331 return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);
1332 }
1333
1334 if (src1 & SLJIT_MEM) {
1335 if ((src1 & OFFS_REG_MASK) || is_u12(src1w) || !is_s20(src1w))
1336 return emit_rx(compiler, ins12, dst, src2, src2w, src1, src1w, RX_A);
1337
1338 return emit_rx(compiler, ins20, dst, src2, src2w, src1, src1w, RXY_A);
1339 }
1340 }
1341 else if (ins12 || ins20) {
1342 emit_rx_type rx_type;
1343
1344 if (ins12) {
1345 rx_type = RX_A;
1346 ins = ins12;
1347 }
1348 else {
1349 rx_type = RXY_A;
1350 ins = ins20;
1351 }
1352
1353 if ((src2 & SLJIT_MEM) && (!(src1 & SLJIT_MEM)
1354 || ((src1 & OFFS_REG_MASK) ? (src1w & 0x3) == 0 : (rx_type == RX_A ? is_u12(src1w) : is_s20(src1w)))))
1355 return emit_rx(compiler, ins, dst, src1, src1w, src2, src2w, rx_type);
1356
1357 if (src1 & SLJIT_MEM)
1358 return emit_rx(compiler, ins, dst, src2, src2w, src1, src1w, rx_type);
1359 }
1360 }
1361
1362 if (mode & SLJIT_32) {
1363 ins = forms->op_r;
1364 ins_k = forms->op_rk;
1365 }
1366 else {
1367 ins = forms->op_gr;
1368 ins_k = forms->op_grk;
1369 }
1370
1371 SLJIT_ASSERT(ins != 0 || ins_k != 0);
1372
1373 if (ins && FAST_IS_REG(dst)) {
1374 if (dst == src1)
1375 return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w);
1376
1377 if (dst == src2)
1378 return emit_rr(compiler, ins, dst, src2, src2w, src1, src1w);
1379 }
1380
1381 if (ins_k == 0)
1382 return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w);
1383
1384 return emit_rrf(compiler, ins_k, dst, src1, src1w, src2, src2w);
1385 }
1386
emit_non_commutative(struct sljit_compiler * compiler,const struct ins_forms * forms,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1387 static sljit_s32 emit_non_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms,
1388 sljit_s32 dst,
1389 sljit_s32 src1, sljit_sw src1w,
1390 sljit_s32 src2, sljit_sw src2w)
1391 {
1392 sljit_s32 mode = compiler->mode;
1393 sljit_ins ins;
1394
1395 if (src2 & SLJIT_MEM) {
1396 sljit_ins ins12, ins20;
1397
1398 if (mode & SLJIT_32) {
1399 ins12 = forms->op;
1400 ins20 = forms->op_y;
1401 }
1402 else {
1403 ins12 = 0;
1404 ins20 = forms->op_g;
1405 }
1406
1407 if (ins12 && ins20) {
1408 if ((src2 & OFFS_REG_MASK) || is_u12(src2w) || !is_s20(src2w))
1409 return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);
1410
1411 return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);
1412 }
1413 else if (ins12)
1414 return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);
1415 else if (ins20)
1416 return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);
1417 }
1418
1419 ins = (mode & SLJIT_32) ? forms->op_rk : forms->op_grk;
1420
1421 if (ins == 0 || (FAST_IS_REG(dst) && dst == src1))
1422 return emit_rr(compiler, (mode & SLJIT_32) ? forms->op_r : forms->op_gr, dst, src1, src1w, src2, src2w);
1423
1424 return emit_rrf(compiler, ins, dst, src1, src1w, src2, src2w);
1425 }
1426
sljit_generate_code(struct sljit_compiler * compiler)1427 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
1428 {
1429 struct sljit_label *label;
1430 struct sljit_jump *jump;
1431 struct sljit_s390x_const *const_;
1432 struct sljit_put_label *put_label;
1433 sljit_sw executable_offset;
1434 sljit_uw ins_size = 0; /* instructions */
1435 sljit_uw pool_size = 0; /* literal pool */
1436 sljit_uw pad_size;
1437 sljit_uw i, j = 0;
1438 struct sljit_memory_fragment *buf;
1439 void *code, *code_ptr;
1440 sljit_uw *pool, *pool_ptr;
1441 sljit_sw source, offset; /* TODO(carenas): only need 32 bit */
1442
1443 CHECK_ERROR_PTR();
1444 CHECK_PTR(check_sljit_generate_code(compiler));
1445 reverse_buf(compiler);
1446
1447 /* branch handling */
1448 label = compiler->labels;
1449 jump = compiler->jumps;
1450 put_label = compiler->put_labels;
1451
1452 /* TODO(carenas): compiler->executable_size could be calculated
1453 * before to avoid the following loop (except for
1454 * pool_size)
1455 */
1456 /* calculate the size of the code */
1457 for (buf = compiler->buf; buf != NULL; buf = buf->next) {
1458 sljit_uw len = buf->used_size / sizeof(sljit_ins);
1459 sljit_ins *ibuf = (sljit_ins *)buf->memory;
1460 for (i = 0; i < len; ++i, ++j) {
1461 sljit_ins ins = ibuf[i];
1462
1463 /* TODO(carenas): instruction tag vs size/addr == j
1464 * using instruction tags for const is creative
1465 * but unlike all other architectures, and is not
1466 * done consistently for all other objects.
1467 * This might need reviewing later.
1468 */
1469 if (ins & sljit_ins_const) {
1470 pool_size += sizeof(*pool);
1471 ins &= ~sljit_ins_const;
1472 }
1473 if (label && label->size == j) {
1474 label->size = ins_size;
1475 label = label->next;
1476 }
1477 if (jump && jump->addr == j) {
1478 if ((jump->flags & SLJIT_REWRITABLE_JUMP) || (jump->flags & JUMP_ADDR)) {
1479 /* encoded: */
1480 /* brasl %r14, <rel_addr> (or brcl <mask>, <rel_addr>) */
1481 /* replace with: */
1482 /* lgrl %r1, <pool_addr> */
1483 /* bras %r14, %r1 (or bcr <mask>, %r1) */
1484 pool_size += sizeof(*pool);
1485 ins_size += 2;
1486 }
1487 jump = jump->next;
1488 }
1489 if (put_label && put_label->addr == j) {
1490 pool_size += sizeof(*pool);
1491 put_label = put_label->next;
1492 }
1493 ins_size += sizeof_ins(ins);
1494 }
1495 }
1496
1497 /* emit trailing label */
1498 if (label && label->size == j) {
1499 label->size = ins_size;
1500 label = label->next;
1501 }
1502
1503 SLJIT_ASSERT(!label);
1504 SLJIT_ASSERT(!jump);
1505 SLJIT_ASSERT(!put_label);
1506
1507 /* pad code size to 8 bytes so is accessible with half word offsets */
1508 /* the literal pool needs to be doubleword aligned */
1509 pad_size = ((ins_size + 7UL) & ~7UL) - ins_size;
1510 SLJIT_ASSERT(pad_size < 8UL);
1511
1512 /* allocate target buffer */
1513 code = SLJIT_MALLOC_EXEC(ins_size + pad_size + pool_size,
1514 compiler->exec_allocator_data);
1515 PTR_FAIL_WITH_EXEC_IF(code);
1516 code_ptr = code;
1517 executable_offset = SLJIT_EXEC_OFFSET(code);
1518
1519 /* TODO(carenas): pool is optional, and the ABI recommends it to
1520 * be created before the function code, instead of
1521 * globally; if generated code is too big could
1522 * need offsets bigger than 32bit words and asser()
1523 */
1524 pool = (sljit_uw *)((sljit_uw)code + ins_size + pad_size);
1525 pool_ptr = pool;
1526 const_ = (struct sljit_s390x_const *)compiler->consts;
1527
1528 /* update label addresses */
1529 label = compiler->labels;
1530 while (label) {
1531 label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(
1532 (sljit_uw)code_ptr + label->size, executable_offset);
1533 label = label->next;
1534 }
1535
1536 /* reset jumps */
1537 jump = compiler->jumps;
1538 put_label = compiler->put_labels;
1539
1540 /* emit the code */
1541 j = 0;
1542 for (buf = compiler->buf; buf != NULL; buf = buf->next) {
1543 sljit_uw len = buf->used_size / sizeof(sljit_ins);
1544 sljit_ins *ibuf = (sljit_ins *)buf->memory;
1545 for (i = 0; i < len; ++i, ++j) {
1546 sljit_ins ins = ibuf[i];
1547 if (ins & sljit_ins_const) {
1548 /* clear the const tag */
1549 ins &= ~sljit_ins_const;
1550
1551 /* update instruction with relative address of constant */
1552 source = (sljit_sw)code_ptr;
1553 offset = (sljit_sw)pool_ptr - source;
1554
1555 SLJIT_ASSERT(!(offset & 1));
1556 offset >>= 1; /* halfword (not byte) offset */
1557 SLJIT_ASSERT(is_s32(offset));
1558
1559 ins |= (sljit_ins)offset & 0xffffffff;
1560
1561 /* update address */
1562 const_->const_.addr = (sljit_uw)pool_ptr;
1563
1564 /* store initial value into pool and update pool address */
1565 *(pool_ptr++) = (sljit_uw)const_->init_value;
1566
1567 /* move to next constant */
1568 const_ = (struct sljit_s390x_const *)const_->const_.next;
1569 }
1570 if (jump && jump->addr == j) {
1571 sljit_sw target = (sljit_sw)((jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target);
1572 if ((jump->flags & SLJIT_REWRITABLE_JUMP) || (jump->flags & JUMP_ADDR)) {
1573 sljit_ins op, arg;
1574
1575 jump->addr = (sljit_uw)pool_ptr;
1576
1577 /* load address into tmp1 */
1578 source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1579 offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source;
1580
1581 SLJIT_ASSERT(!(offset & 1));
1582 offset >>= 1;
1583 SLJIT_ASSERT(is_s32(offset));
1584
1585 encode_inst(&code_ptr, lgrl(tmp1, offset & 0xffffffff));
1586
1587 /* store jump target into pool and update pool address */
1588 *(pool_ptr++) = (sljit_uw)target;
1589
1590 /* branch to tmp1 */
1591 op = (ins >> 32) & 0xf;
1592 arg = (ins >> 36) & 0xf;
1593 switch (op) {
1594 case 4: /* brcl -> bcr */
1595 ins = bcr(arg, tmp1);
1596 break;
1597 case 5: /* brasl -> basr */
1598 ins = basr(arg, tmp1);
1599 break;
1600 default:
1601 abort();
1602 }
1603 }
1604 else {
1605 jump->addr = (sljit_uw)code_ptr + 2;
1606 source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1607 offset = target - source;
1608
1609 /* offset must be halfword aligned */
1610 SLJIT_ASSERT(!(offset & 1));
1611 offset >>= 1;
1612 SLJIT_ASSERT(is_s32(offset)); /* TODO(mundaym): handle arbitrary offsets */
1613
1614 /* patch jump target */
1615 ins |= (sljit_ins)offset & 0xffffffff;
1616 }
1617 jump = jump->next;
1618 }
1619 if (put_label && put_label->addr == j) {
1620 source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1621
1622 SLJIT_ASSERT(put_label->label);
1623 put_label->addr = (sljit_uw)code_ptr;
1624
1625 /* store target into pool */
1626 *pool_ptr = put_label->label->addr;
1627 offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source;
1628 pool_ptr++;
1629
1630 SLJIT_ASSERT(!(offset & 1));
1631 offset >>= 1;
1632 SLJIT_ASSERT(is_s32(offset));
1633 ins |= (sljit_ins)offset & 0xffffffff;
1634
1635 put_label = put_label->next;
1636 }
1637 encode_inst(&code_ptr, ins);
1638 }
1639 }
1640 SLJIT_ASSERT((sljit_u8 *)code + ins_size == code_ptr);
1641 SLJIT_ASSERT((sljit_u8 *)pool + pool_size == (sljit_u8 *)pool_ptr);
1642
1643 compiler->error = SLJIT_ERR_COMPILED;
1644 compiler->executable_offset = executable_offset;
1645 compiler->executable_size = ins_size;
1646 if (pool_size)
1647 compiler->executable_size += (pad_size + pool_size);
1648 code = SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
1649 code_ptr = SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1650 SLJIT_CACHE_FLUSH(code, code_ptr);
1651 SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
1652 return code;
1653 }
1654
sljit_has_cpu_feature(sljit_s32 feature_type)1655 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
1656 {
1657 /* TODO(mundaym): implement all */
1658 switch (feature_type) {
1659 case SLJIT_HAS_FPU:
1660 #ifdef SLJIT_IS_FPU_AVAILABLE
1661 return (SLJIT_IS_FPU_AVAILABLE) != 0;
1662 #else
1663 return 1;
1664 #endif /* SLJIT_IS_FPU_AVAILABLE */
1665
1666 case SLJIT_HAS_CLZ:
1667 case SLJIT_HAS_REV:
1668 case SLJIT_HAS_ROT:
1669 case SLJIT_HAS_PREFETCH:
1670 case SLJIT_HAS_COPY_F32:
1671 case SLJIT_HAS_COPY_F64:
1672 case SLJIT_HAS_SIMD:
1673 case SLJIT_HAS_ATOMIC:
1674 return 1;
1675
1676 case SLJIT_HAS_CTZ:
1677 return 2;
1678
1679 case SLJIT_HAS_CMOV:
1680 return have_lscond1() ? 1 : 0;
1681 }
1682 return 0;
1683 }
1684
sljit_cmp_info(sljit_s32 type)1685 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
1686 {
1687 SLJIT_UNUSED_ARG(type);
1688 return 0;
1689 }
1690
1691 /* --------------------------------------------------------------------- */
1692 /* Entry, exit */
1693 /* --------------------------------------------------------------------- */
1694
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1695 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
1696 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1697 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1698 {
1699 sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
1700 sljit_s32 offset, i, tmp;
1701
1702 CHECK_ERROR();
1703 CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1704 set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1705
1706 /* Saved registers are stored in callee allocated save area. */
1707 SLJIT_ASSERT(gpr(SLJIT_FIRST_SAVED_REG) == r6 && gpr(SLJIT_S0) == r13);
1708
1709 offset = 2 * SSIZE_OF(sw);
1710 if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) {
1711 if (saved_arg_count == 0) {
1712 FAIL_IF(push_inst(compiler, stmg(r6, r14, offset, r15)));
1713 offset += 9 * SSIZE_OF(sw);
1714 } else {
1715 FAIL_IF(push_inst(compiler, stmg(r6, r13 - (sljit_gpr)saved_arg_count, offset, r15)));
1716 offset += (8 - saved_arg_count) * SSIZE_OF(sw);
1717 }
1718 } else {
1719 if (scratches == SLJIT_FIRST_SAVED_REG) {
1720 FAIL_IF(push_inst(compiler, stg(r6, offset, 0, r15)));
1721 offset += SSIZE_OF(sw);
1722 } else if (scratches > SLJIT_FIRST_SAVED_REG) {
1723 FAIL_IF(push_inst(compiler, stmg(r6, r6 + (sljit_gpr)(scratches - SLJIT_FIRST_SAVED_REG), offset, r15)));
1724 offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw);
1725 }
1726
1727 if (saved_arg_count == 0) {
1728 if (saveds == 0) {
1729 FAIL_IF(push_inst(compiler, stg(r14, offset, 0, r15)));
1730 offset += SSIZE_OF(sw);
1731 } else {
1732 FAIL_IF(push_inst(compiler, stmg(r14 - (sljit_gpr)saveds, r14, offset, r15)));
1733 offset += (saveds + 1) * SSIZE_OF(sw);
1734 }
1735 } else if (saveds > saved_arg_count) {
1736 if (saveds == saved_arg_count + 1) {
1737 FAIL_IF(push_inst(compiler, stg(r14 - (sljit_gpr)saveds, offset, 0, r15)));
1738 offset += SSIZE_OF(sw);
1739 } else {
1740 FAIL_IF(push_inst(compiler, stmg(r14 - (sljit_gpr)saveds, r13 - (sljit_gpr)saved_arg_count, offset, r15)));
1741 offset += (saveds - saved_arg_count) * SSIZE_OF(sw);
1742 }
1743 }
1744 }
1745
1746 if (saved_arg_count > 0) {
1747 FAIL_IF(push_inst(compiler, stg(r14, offset, 0, r15)));
1748 offset += SSIZE_OF(sw);
1749 }
1750
1751 tmp = SLJIT_FS0 - fsaveds;
1752 for (i = SLJIT_FS0; i > tmp; i--) {
1753 FAIL_IF(push_inst(compiler, 0x60000000 /* std */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1754 offset += SSIZE_OF(sw);
1755 }
1756
1757 for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
1758 FAIL_IF(push_inst(compiler, 0x60000000 /* std */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1759 offset += SSIZE_OF(sw);
1760 }
1761
1762 local_size = (local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + 0xf) & ~0xf;
1763 compiler->local_size = local_size;
1764
1765 if (is_s20(-local_size))
1766 FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(-local_size)));
1767 else
1768 FAIL_IF(push_inst(compiler, 0xc20400000000 /* slgfi */ | R36A(r15) | (sljit_ins)local_size));
1769
1770 if (options & SLJIT_ENTER_REG_ARG)
1771 return SLJIT_SUCCESS;
1772
1773 arg_types >>= SLJIT_ARG_SHIFT;
1774 saved_arg_count = 0;
1775 tmp = 0;
1776 while (arg_types > 0) {
1777 if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
1778 if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
1779 FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S0 - saved_arg_count), gpr(SLJIT_R0 + tmp))));
1780 saved_arg_count++;
1781 }
1782 tmp++;
1783 }
1784
1785 arg_types >>= SLJIT_ARG_SHIFT;
1786 }
1787
1788 return SLJIT_SUCCESS;
1789 }
1790
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1791 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
1792 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1793 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1794 {
1795 CHECK_ERROR();
1796 CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1797 set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1798
1799 compiler->local_size = (local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + 0xf) & ~0xf;
1800 return SLJIT_SUCCESS;
1801 }
1802
emit_stack_frame_release(struct sljit_compiler * compiler,sljit_gpr last_reg)1803 static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_gpr last_reg)
1804 {
1805 sljit_s32 offset, i, tmp;
1806 sljit_s32 local_size = compiler->local_size;
1807 sljit_s32 saveds = compiler->saveds;
1808 sljit_s32 scratches = compiler->scratches;
1809 sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
1810
1811 if (is_u12(local_size))
1812 FAIL_IF(push_inst(compiler, 0x41000000 /* ly */ | R20A(r15) | R12A(r15) | (sljit_ins)local_size));
1813 else if (is_s20(local_size))
1814 FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(local_size)));
1815 else
1816 FAIL_IF(push_inst(compiler, 0xc20a00000000 /* algfi */ | R36A(r15) | (sljit_ins)local_size));
1817
1818 offset = 2 * SSIZE_OF(sw);
1819 if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) {
1820 if (kept_saveds_count == 0) {
1821 FAIL_IF(push_inst(compiler, lmg(r6, last_reg, offset, r15)));
1822 offset += 9 * SSIZE_OF(sw);
1823 } else {
1824 FAIL_IF(push_inst(compiler, lmg(r6, r13 - (sljit_gpr)kept_saveds_count, offset, r15)));
1825 offset += (8 - kept_saveds_count) * SSIZE_OF(sw);
1826 }
1827 } else {
1828 if (scratches == SLJIT_FIRST_SAVED_REG) {
1829 FAIL_IF(push_inst(compiler, lg(r6, offset, 0, r15)));
1830 offset += SSIZE_OF(sw);
1831 } else if (scratches > SLJIT_FIRST_SAVED_REG) {
1832 FAIL_IF(push_inst(compiler, lmg(r6, r6 + (sljit_gpr)(scratches - SLJIT_FIRST_SAVED_REG), offset, r15)));
1833 offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw);
1834 }
1835
1836 if (kept_saveds_count == 0) {
1837 if (saveds == 0) {
1838 if (last_reg == r14)
1839 FAIL_IF(push_inst(compiler, lg(r14, offset, 0, r15)));
1840 offset += SSIZE_OF(sw);
1841 } else if (saveds == 1 && last_reg == r13) {
1842 FAIL_IF(push_inst(compiler, lg(r13, offset, 0, r15)));
1843 offset += 2 * SSIZE_OF(sw);
1844 } else {
1845 FAIL_IF(push_inst(compiler, lmg(r14 - (sljit_gpr)saveds, last_reg, offset, r15)));
1846 offset += (saveds + 1) * SSIZE_OF(sw);
1847 }
1848 } else if (saveds > kept_saveds_count) {
1849 if (saveds == kept_saveds_count + 1) {
1850 FAIL_IF(push_inst(compiler, lg(r14 - (sljit_gpr)saveds, offset, 0, r15)));
1851 offset += SSIZE_OF(sw);
1852 } else {
1853 FAIL_IF(push_inst(compiler, lmg(r14 - (sljit_gpr)saveds, r13 - (sljit_gpr)kept_saveds_count, offset, r15)));
1854 offset += (saveds - kept_saveds_count) * SSIZE_OF(sw);
1855 }
1856 }
1857 }
1858
1859 if (kept_saveds_count > 0) {
1860 if (last_reg == r14)
1861 FAIL_IF(push_inst(compiler, lg(r14, offset, 0, r15)));
1862 offset += SSIZE_OF(sw);
1863 }
1864
1865 tmp = SLJIT_FS0 - compiler->fsaveds;
1866 for (i = SLJIT_FS0; i > tmp; i--) {
1867 FAIL_IF(push_inst(compiler, 0x68000000 /* ld */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1868 offset += SSIZE_OF(sw);
1869 }
1870
1871 for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
1872 FAIL_IF(push_inst(compiler, 0x68000000 /* ld */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1873 offset += SSIZE_OF(sw);
1874 }
1875
1876 return SLJIT_SUCCESS;
1877 }
1878
sljit_emit_return_void(struct sljit_compiler * compiler)1879 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
1880 {
1881 CHECK_ERROR();
1882 CHECK(check_sljit_emit_return_void(compiler));
1883
1884 FAIL_IF(emit_stack_frame_release(compiler, r14));
1885 return push_inst(compiler, br(r14)); /* return */
1886 }
1887
sljit_emit_return_to(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)1888 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
1889 sljit_s32 src, sljit_sw srcw)
1890 {
1891 CHECK_ERROR();
1892 CHECK(check_sljit_emit_return_to(compiler, src, srcw));
1893
1894 if (src & SLJIT_MEM) {
1895 ADJUST_LOCAL_OFFSET(src, srcw);
1896 FAIL_IF(load_word(compiler, tmp1, src, srcw, 0 /* 64-bit */));
1897 src = TMP_REG2;
1898 srcw = 0;
1899 } else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
1900 FAIL_IF(push_inst(compiler, lgr(tmp1, gpr(src))));
1901 src = TMP_REG2;
1902 srcw = 0;
1903 }
1904
1905 FAIL_IF(emit_stack_frame_release(compiler, r13));
1906
1907 SLJIT_SKIP_CHECKS(compiler);
1908 return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1909 }
1910
1911 /* --------------------------------------------------------------------- */
1912 /* Operators */
1913 /* --------------------------------------------------------------------- */
1914
sljit_emit_op0(struct sljit_compiler * compiler,sljit_s32 op)1915 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
1916 {
1917 sljit_gpr arg0 = gpr(SLJIT_R0);
1918 sljit_gpr arg1 = gpr(SLJIT_R1);
1919
1920 CHECK_ERROR();
1921 CHECK(check_sljit_emit_op0(compiler, op));
1922
1923 op = GET_OPCODE(op) | (op & SLJIT_32);
1924 switch (op) {
1925 case SLJIT_BREAKPOINT:
1926 /* The following invalid instruction is emitted by gdb. */
1927 return push_inst(compiler, 0x0001 /* 2-byte trap */);
1928 case SLJIT_NOP:
1929 return push_inst(compiler, 0x0700 /* 2-byte nop */);
1930 case SLJIT_LMUL_UW:
1931 FAIL_IF(push_inst(compiler, mlgr(arg0, arg0)));
1932 break;
1933 case SLJIT_LMUL_SW:
1934 /* signed multiplication from: */
1935 /* Hacker's Delight, Second Edition: Chapter 8-3. */
1936 FAIL_IF(push_inst(compiler, srag(tmp0, arg0, 63, 0)));
1937 FAIL_IF(push_inst(compiler, srag(tmp1, arg1, 63, 0)));
1938 FAIL_IF(push_inst(compiler, ngr(tmp0, arg1)));
1939 FAIL_IF(push_inst(compiler, ngr(tmp1, arg0)));
1940
1941 /* unsigned multiplication */
1942 FAIL_IF(push_inst(compiler, mlgr(arg0, arg0)));
1943
1944 FAIL_IF(push_inst(compiler, sgr(arg0, tmp0)));
1945 FAIL_IF(push_inst(compiler, sgr(arg0, tmp1)));
1946 break;
1947 case SLJIT_DIV_U32:
1948 case SLJIT_DIVMOD_U32:
1949 FAIL_IF(push_inst(compiler, lhi(tmp0, 0)));
1950 FAIL_IF(push_inst(compiler, lr(tmp1, arg0)));
1951 FAIL_IF(push_inst(compiler, dlr(tmp0, arg1)));
1952 FAIL_IF(push_inst(compiler, lr(arg0, tmp1))); /* quotient */
1953 if (op == SLJIT_DIVMOD_U32)
1954 return push_inst(compiler, lr(arg1, tmp0)); /* remainder */
1955
1956 return SLJIT_SUCCESS;
1957 case SLJIT_DIV_S32:
1958 case SLJIT_DIVMOD_S32:
1959 FAIL_IF(push_inst(compiler, lhi(tmp0, 0)));
1960 FAIL_IF(push_inst(compiler, lr(tmp1, arg0)));
1961 FAIL_IF(push_inst(compiler, dr(tmp0, arg1)));
1962 FAIL_IF(push_inst(compiler, lr(arg0, tmp1))); /* quotient */
1963 if (op == SLJIT_DIVMOD_S32)
1964 return push_inst(compiler, lr(arg1, tmp0)); /* remainder */
1965
1966 return SLJIT_SUCCESS;
1967 case SLJIT_DIV_UW:
1968 case SLJIT_DIVMOD_UW:
1969 FAIL_IF(push_inst(compiler, lghi(tmp0, 0)));
1970 FAIL_IF(push_inst(compiler, lgr(tmp1, arg0)));
1971 FAIL_IF(push_inst(compiler, dlgr(tmp0, arg1)));
1972 FAIL_IF(push_inst(compiler, lgr(arg0, tmp1))); /* quotient */
1973 if (op == SLJIT_DIVMOD_UW)
1974 return push_inst(compiler, lgr(arg1, tmp0)); /* remainder */
1975
1976 return SLJIT_SUCCESS;
1977 case SLJIT_DIV_SW:
1978 case SLJIT_DIVMOD_SW:
1979 FAIL_IF(push_inst(compiler, lgr(tmp1, arg0)));
1980 FAIL_IF(push_inst(compiler, dsgr(tmp0, arg1)));
1981 FAIL_IF(push_inst(compiler, lgr(arg0, tmp1))); /* quotient */
1982 if (op == SLJIT_DIVMOD_SW)
1983 return push_inst(compiler, lgr(arg1, tmp0)); /* remainder */
1984
1985 return SLJIT_SUCCESS;
1986 case SLJIT_ENDBR:
1987 return SLJIT_SUCCESS;
1988 case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
1989 return SLJIT_SUCCESS;
1990 default:
1991 SLJIT_UNREACHABLE();
1992 }
1993 /* swap result registers */
1994 FAIL_IF(push_inst(compiler, lgr(tmp0, arg0)));
1995 FAIL_IF(push_inst(compiler, lgr(arg0, arg1)));
1996 return push_inst(compiler, lgr(arg1, tmp0));
1997 }
1998
sljit_emit_clz_ctz(struct sljit_compiler * compiler,sljit_s32 op,sljit_gpr dst_r,sljit_gpr src_r)1999 static sljit_s32 sljit_emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 op, sljit_gpr dst_r, sljit_gpr src_r)
2000 {
2001 sljit_s32 is_ctz = (GET_OPCODE(op) == SLJIT_CTZ);
2002
2003 if ((op & SLJIT_32) && src_r != tmp0) {
2004 FAIL_IF(push_inst(compiler, 0xb9160000 /* llgfr */ | R4A(tmp0) | R0A(src_r)));
2005 src_r = tmp0;
2006 }
2007
2008 if (is_ctz) {
2009 FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */) | R4A(tmp1) | R0A(src_r)));
2010
2011 if (src_r == tmp0)
2012 FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? 0x1400 /* nr */ : 0xb9800000 /* ngr */) | R4A(tmp0) | R0A(tmp1)));
2013 else
2014 FAIL_IF(push_inst(compiler, 0xb9e40000 /* ngrk */ | R12A(tmp1) | R4A(tmp0) | R0A(src_r)));
2015
2016 src_r = tmp0;
2017 }
2018
2019 FAIL_IF(push_inst(compiler, 0xb9830000 /* flogr */ | R4A(tmp0) | R0A(src_r)));
2020
2021 if (is_ctz)
2022 FAIL_IF(push_inst(compiler, 0xec00000000d9 /* aghik */ | R36A(tmp1) | R32A(tmp0) | ((sljit_ins)(-64 & 0xffff) << 16)));
2023
2024 if (op & SLJIT_32) {
2025 if (!is_ctz && dst_r != tmp0)
2026 return push_inst(compiler, 0xec00000000d9 /* aghik */ | R36A(dst_r) | R32A(tmp0) | ((sljit_ins)(-32 & 0xffff) << 16));
2027
2028 FAIL_IF(push_inst(compiler, 0xc20800000000 /* agfi */ | R36A(tmp0) | (sljit_u32)-32));
2029 }
2030
2031 if (is_ctz)
2032 FAIL_IF(push_inst(compiler, 0xec0000000057 /* rxsbg */ | R36A(tmp0) | R32A(tmp1) | ((sljit_ins)((op & SLJIT_32) ? 59 : 58) << 24) | (63 << 16) | ((sljit_ins)((op & SLJIT_32) ? 5 : 6) << 8)));
2033
2034 if (dst_r == tmp0)
2035 return SLJIT_SUCCESS;
2036
2037 return push_inst(compiler, ((op & SLJIT_32) ? 0x1800 /* lr */ : 0xb9040000 /* lgr */) | R4A(dst_r) | R0A(tmp0));
2038 }
2039
sljit_emit_rev(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2040 static sljit_s32 sljit_emit_rev(struct sljit_compiler *compiler, sljit_s32 op,
2041 sljit_s32 dst, sljit_sw dstw,
2042 sljit_s32 src, sljit_sw srcw)
2043 {
2044 struct addr addr;
2045 sljit_gpr reg;
2046 sljit_ins ins;
2047 sljit_s32 opcode = GET_OPCODE(op);
2048 sljit_s32 is_16bit = (opcode == SLJIT_REV_U16 || opcode == SLJIT_REV_S16);
2049
2050 if (dst & SLJIT_MEM) {
2051 if (src & SLJIT_MEM) {
2052 FAIL_IF(load_store_op(compiler, tmp0, src, srcw, op & SLJIT_32, is_16bit ? load_halfword_forms : load_forms));
2053 reg = tmp0;
2054 } else
2055 reg = gpr(src);
2056
2057 FAIL_IF(make_addr_bxy(compiler, &addr, dst, dstw, tmp1));
2058
2059 if (is_16bit)
2060 ins = 0xe3000000003f /* strvh */;
2061 else
2062 ins = (op & SLJIT_32) ? 0xe3000000003e /* strv */ : 0xe3000000002f /* strvg */;
2063
2064 return push_inst(compiler, ins | R36A(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
2065 }
2066
2067 reg = gpr(dst);
2068
2069 if (src & SLJIT_MEM) {
2070 FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
2071
2072 if (is_16bit)
2073 ins = 0xe3000000001f /* lrvh */;
2074 else
2075 ins = (op & SLJIT_32) ? 0xe3000000001e /* lrv */ : 0xe3000000000f /* lrvg */;
2076
2077 FAIL_IF(push_inst(compiler, ins | R36A(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset)));
2078
2079 if (opcode == SLJIT_REV)
2080 return SLJIT_SUCCESS;
2081
2082 if (is_16bit) {
2083 if (op & SLJIT_32)
2084 ins = (opcode == SLJIT_REV_U16) ? 0xb9950000 /* llhr */ : 0xb9270000 /* lhr */;
2085 else
2086 ins = (opcode == SLJIT_REV_U16) ? 0xb9850000 /* llghr */ : 0xb9070000 /* lghr */;
2087 } else
2088 ins = (opcode == SLJIT_REV_U32) ? 0xb9160000 /* llgfr */ : 0xb9140000 /* lgfr */;
2089
2090 return push_inst(compiler, ins | R4A(reg) | R0A(reg));
2091 }
2092
2093 ins = (op & SLJIT_32) ? 0xb91f0000 /* lrvr */ : 0xb90f0000 /* lrvgr */;
2094 FAIL_IF(push_inst(compiler, ins | R4A(reg) | R0A(gpr(src))));
2095
2096 if (opcode == SLJIT_REV)
2097 return SLJIT_SUCCESS;
2098
2099 if (!is_16bit) {
2100 ins = (opcode == SLJIT_REV_U32) ? 0xb9160000 /* llgfr */ : 0xb9140000 /* lgfr */;
2101 return push_inst(compiler, ins | R4A(reg) | R0A(reg));
2102 }
2103
2104 if (op & SLJIT_32) {
2105 ins = (opcode == SLJIT_REV_U16) ? 0x88000000 /* srl */ : 0x8a000000 /* sra */;
2106 return push_inst(compiler, ins | R20A(reg) | 16);
2107 }
2108
2109 ins = (opcode == SLJIT_REV_U16) ? 0xeb000000000c /* srlg */ : 0xeb000000000a /* srag */;
2110 return push_inst(compiler, ins | R36A(reg) | R32A(reg) | (48 << 16));
2111 }
2112
2113 /* LEVAL will be defined later with different parameters as needed */
2114 #define WHEN2(cond, i1, i2) (cond) ? LEVAL(i1) : LEVAL(i2)
2115
sljit_emit_op1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2116 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
2117 sljit_s32 dst, sljit_sw dstw,
2118 sljit_s32 src, sljit_sw srcw)
2119 {
2120 sljit_ins ins;
2121 struct addr mem;
2122 sljit_gpr dst_r;
2123 sljit_gpr src_r;
2124 sljit_s32 opcode = GET_OPCODE(op);
2125
2126 CHECK_ERROR();
2127 CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
2128 ADJUST_LOCAL_OFFSET(dst, dstw);
2129 ADJUST_LOCAL_OFFSET(src, srcw);
2130
2131 if (opcode >= SLJIT_MOV && opcode <= SLJIT_MOV_P) {
2132 /* LOAD REGISTER */
2133 if (FAST_IS_REG(dst) && FAST_IS_REG(src)) {
2134 dst_r = gpr(dst);
2135 src_r = gpr(src);
2136 switch (opcode | (op & SLJIT_32)) {
2137 /* 32-bit */
2138 case SLJIT_MOV32_U8:
2139 ins = llcr(dst_r, src_r);
2140 break;
2141 case SLJIT_MOV32_S8:
2142 ins = lbr(dst_r, src_r);
2143 break;
2144 case SLJIT_MOV32_U16:
2145 ins = llhr(dst_r, src_r);
2146 break;
2147 case SLJIT_MOV32_S16:
2148 ins = lhr(dst_r, src_r);
2149 break;
2150 case SLJIT_MOV32:
2151 if (dst_r == src_r)
2152 return SLJIT_SUCCESS;
2153 ins = lr(dst_r, src_r);
2154 break;
2155 /* 64-bit */
2156 case SLJIT_MOV_U8:
2157 ins = llgcr(dst_r, src_r);
2158 break;
2159 case SLJIT_MOV_S8:
2160 ins = lgbr(dst_r, src_r);
2161 break;
2162 case SLJIT_MOV_U16:
2163 ins = llghr(dst_r, src_r);
2164 break;
2165 case SLJIT_MOV_S16:
2166 ins = lghr(dst_r, src_r);
2167 break;
2168 case SLJIT_MOV_U32:
2169 ins = llgfr(dst_r, src_r);
2170 break;
2171 case SLJIT_MOV_S32:
2172 ins = lgfr(dst_r, src_r);
2173 break;
2174 case SLJIT_MOV:
2175 case SLJIT_MOV_P:
2176 if (dst_r == src_r)
2177 return SLJIT_SUCCESS;
2178 ins = lgr(dst_r, src_r);
2179 break;
2180 default:
2181 ins = 0;
2182 SLJIT_UNREACHABLE();
2183 break;
2184 }
2185 FAIL_IF(push_inst(compiler, ins));
2186 return SLJIT_SUCCESS;
2187 }
2188 /* LOAD IMMEDIATE */
2189 if (FAST_IS_REG(dst) && src == SLJIT_IMM) {
2190 switch (opcode) {
2191 case SLJIT_MOV_U8:
2192 srcw = (sljit_sw)((sljit_u8)(srcw));
2193 break;
2194 case SLJIT_MOV_S8:
2195 srcw = (sljit_sw)((sljit_s8)(srcw));
2196 break;
2197 case SLJIT_MOV_U16:
2198 srcw = (sljit_sw)((sljit_u16)(srcw));
2199 break;
2200 case SLJIT_MOV_S16:
2201 srcw = (sljit_sw)((sljit_s16)(srcw));
2202 break;
2203 case SLJIT_MOV_U32:
2204 srcw = (sljit_sw)((sljit_u32)(srcw));
2205 break;
2206 case SLJIT_MOV_S32:
2207 case SLJIT_MOV32:
2208 srcw = (sljit_sw)((sljit_s32)(srcw));
2209 break;
2210 }
2211 return push_load_imm_inst(compiler, gpr(dst), srcw);
2212 }
2213 /* LOAD */
2214 /* TODO(carenas): avoid reg being defined later */
2215 #define LEVAL(i) EVAL(i, reg, mem)
2216 if (FAST_IS_REG(dst) && (src & SLJIT_MEM)) {
2217 sljit_gpr reg = gpr(dst);
2218
2219 FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1));
2220 /* TODO(carenas): convert all calls below to LEVAL */
2221 switch (opcode | (op & SLJIT_32)) {
2222 case SLJIT_MOV32_U8:
2223 ins = llc(reg, mem.offset, mem.index, mem.base);
2224 break;
2225 case SLJIT_MOV32_S8:
2226 ins = lb(reg, mem.offset, mem.index, mem.base);
2227 break;
2228 case SLJIT_MOV32_U16:
2229 ins = llh(reg, mem.offset, mem.index, mem.base);
2230 break;
2231 case SLJIT_MOV32_S16:
2232 ins = WHEN2(is_u12(mem.offset), lh, lhy);
2233 break;
2234 case SLJIT_MOV32:
2235 ins = WHEN2(is_u12(mem.offset), l, ly);
2236 break;
2237 case SLJIT_MOV_U8:
2238 ins = LEVAL(llgc);
2239 break;
2240 case SLJIT_MOV_S8:
2241 ins = lgb(reg, mem.offset, mem.index, mem.base);
2242 break;
2243 case SLJIT_MOV_U16:
2244 ins = LEVAL(llgh);
2245 break;
2246 case SLJIT_MOV_S16:
2247 ins = lgh(reg, mem.offset, mem.index, mem.base);
2248 break;
2249 case SLJIT_MOV_U32:
2250 ins = LEVAL(llgf);
2251 break;
2252 case SLJIT_MOV_S32:
2253 ins = lgf(reg, mem.offset, mem.index, mem.base);
2254 break;
2255 case SLJIT_MOV_P:
2256 case SLJIT_MOV:
2257 ins = lg(reg, mem.offset, mem.index, mem.base);
2258 break;
2259 default:
2260 ins = 0;
2261 SLJIT_UNREACHABLE();
2262 break;
2263 }
2264 FAIL_IF(push_inst(compiler, ins));
2265 return SLJIT_SUCCESS;
2266 }
2267 /* STORE and STORE IMMEDIATE */
2268 if ((dst & SLJIT_MEM) && (FAST_IS_REG(src) || src == SLJIT_IMM)) {
2269 struct addr mem;
2270 sljit_gpr reg = FAST_IS_REG(src) ? gpr(src) : tmp0;
2271
2272 if (src == SLJIT_IMM) {
2273 /* TODO(mundaym): MOVE IMMEDIATE? */
2274 FAIL_IF(push_load_imm_inst(compiler, reg, srcw));
2275 }
2276 FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2277 switch (opcode) {
2278 case SLJIT_MOV_U8:
2279 case SLJIT_MOV_S8:
2280 return push_inst(compiler,
2281 WHEN2(is_u12(mem.offset), stc, stcy));
2282 case SLJIT_MOV_U16:
2283 case SLJIT_MOV_S16:
2284 return push_inst(compiler,
2285 WHEN2(is_u12(mem.offset), sth, sthy));
2286 case SLJIT_MOV_U32:
2287 case SLJIT_MOV_S32:
2288 case SLJIT_MOV32:
2289 return push_inst(compiler,
2290 WHEN2(is_u12(mem.offset), st, sty));
2291 case SLJIT_MOV_P:
2292 case SLJIT_MOV:
2293 FAIL_IF(push_inst(compiler, LEVAL(stg)));
2294 return SLJIT_SUCCESS;
2295 default:
2296 SLJIT_UNREACHABLE();
2297 }
2298 }
2299 #undef LEVAL
2300 /* MOVE CHARACTERS */
2301 if ((dst & SLJIT_MEM) && (src & SLJIT_MEM)) {
2302 struct addr mem;
2303 FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1));
2304 switch (opcode) {
2305 case SLJIT_MOV_U8:
2306 case SLJIT_MOV_S8:
2307 FAIL_IF(push_inst(compiler,
2308 EVAL(llgc, tmp0, mem)));
2309 FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2310 return push_inst(compiler,
2311 EVAL(stcy, tmp0, mem));
2312 case SLJIT_MOV_U16:
2313 case SLJIT_MOV_S16:
2314 FAIL_IF(push_inst(compiler,
2315 EVAL(llgh, tmp0, mem)));
2316 FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2317 return push_inst(compiler,
2318 EVAL(sthy, tmp0, mem));
2319 case SLJIT_MOV_U32:
2320 case SLJIT_MOV_S32:
2321 case SLJIT_MOV32:
2322 FAIL_IF(push_inst(compiler,
2323 EVAL(ly, tmp0, mem)));
2324 FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2325 return push_inst(compiler,
2326 EVAL(sty, tmp0, mem));
2327 case SLJIT_MOV_P:
2328 case SLJIT_MOV:
2329 FAIL_IF(push_inst(compiler,
2330 EVAL(lg, tmp0, mem)));
2331 FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2332 FAIL_IF(push_inst(compiler,
2333 EVAL(stg, tmp0, mem)));
2334 return SLJIT_SUCCESS;
2335 default:
2336 SLJIT_UNREACHABLE();
2337 }
2338 }
2339 SLJIT_UNREACHABLE();
2340 }
2341
2342 SLJIT_ASSERT(src != SLJIT_IMM);
2343
2344 dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
2345 src_r = FAST_IS_REG(src) ? gpr(src) : tmp0;
2346
2347 compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z);
2348
2349 /* TODO(mundaym): optimize loads and stores */
2350 switch (opcode) {
2351 case SLJIT_CLZ:
2352 case SLJIT_CTZ:
2353 if (src & SLJIT_MEM)
2354 FAIL_IF(load_unsigned_word(compiler, src_r, src, srcw, op & SLJIT_32));
2355
2356 FAIL_IF(sljit_emit_clz_ctz(compiler, op, dst_r, src_r));
2357 break;
2358 case SLJIT_REV_U32:
2359 case SLJIT_REV_S32:
2360 op |= SLJIT_32;
2361 /* fallthrough */
2362 case SLJIT_REV:
2363 case SLJIT_REV_U16:
2364 case SLJIT_REV_S16:
2365 return sljit_emit_rev(compiler, op, dst, dstw, src, srcw);
2366 default:
2367 SLJIT_UNREACHABLE();
2368 }
2369
2370 if (dst & SLJIT_MEM)
2371 return store_word(compiler, dst_r, dst, dstw, op & SLJIT_32);
2372
2373 return SLJIT_SUCCESS;
2374 }
2375
is_commutative(sljit_s32 op)2376 static SLJIT_INLINE int is_commutative(sljit_s32 op)
2377 {
2378 switch (GET_OPCODE(op)) {
2379 case SLJIT_ADD:
2380 case SLJIT_ADDC:
2381 case SLJIT_MUL:
2382 case SLJIT_AND:
2383 case SLJIT_OR:
2384 case SLJIT_XOR:
2385 return 1;
2386 }
2387 return 0;
2388 }
2389
2390 static const struct ins_forms add_forms = {
2391 0x1a00, /* ar */
2392 0xb9080000, /* agr */
2393 0xb9f80000, /* ark */
2394 0xb9e80000, /* agrk */
2395 0x5a000000, /* a */
2396 0xe3000000005a, /* ay */
2397 0xe30000000008, /* ag */
2398 };
2399
2400 static const struct ins_forms logical_add_forms = {
2401 0x1e00, /* alr */
2402 0xb90a0000, /* algr */
2403 0xb9fa0000, /* alrk */
2404 0xb9ea0000, /* algrk */
2405 0x5e000000, /* al */
2406 0xe3000000005e, /* aly */
2407 0xe3000000000a, /* alg */
2408 };
2409
sljit_emit_add(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2410 static sljit_s32 sljit_emit_add(struct sljit_compiler *compiler, sljit_s32 op,
2411 sljit_s32 dst, sljit_sw dstw,
2412 sljit_s32 src1, sljit_sw src1w,
2413 sljit_s32 src2, sljit_sw src2w)
2414 {
2415 int sets_overflow = (op & VARIABLE_FLAG_MASK) == SLJIT_SET_OVERFLOW;
2416 int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW);
2417 const struct ins_forms *forms;
2418 sljit_ins ins;
2419
2420 if (src2 == SLJIT_IMM) {
2421 if (!sets_zero_overflow && is_s8(src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) {
2422 if (sets_overflow)
2423 ins = (op & SLJIT_32) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */;
2424 else
2425 ins = (op & SLJIT_32) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */;
2426 return emit_siy(compiler, ins, dst, dstw, src2w);
2427 }
2428
2429 if (is_s16(src2w)) {
2430 if (sets_overflow)
2431 ins = (op & SLJIT_32) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */;
2432 else
2433 ins = (op & SLJIT_32) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */;
2434 FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, src2w));
2435 goto done;
2436 }
2437
2438 if (!sets_overflow) {
2439 if ((op & SLJIT_32) || is_u32(src2w)) {
2440 ins = (op & SLJIT_32) ? 0xc20b00000000 /* alfi */ : 0xc20a00000000 /* algfi */;
2441 FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));
2442 goto done;
2443 }
2444 if (is_u32(-src2w)) {
2445 FAIL_IF(emit_ri(compiler, 0xc20400000000 /* slgfi */, dst, src1, src1w, -src2w, RIL_A));
2446 goto done;
2447 }
2448 }
2449 else if ((op & SLJIT_32) || is_s32(src2w)) {
2450 ins = (op & SLJIT_32) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */;
2451 FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));
2452 goto done;
2453 }
2454 }
2455
2456 forms = sets_overflow ? &add_forms : &logical_add_forms;
2457 FAIL_IF(emit_commutative(compiler, forms, dst, src1, src1w, src2, src2w));
2458
2459 done:
2460 if (sets_zero_overflow)
2461 FAIL_IF(update_zero_overflow(compiler, op, FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0));
2462
2463 if (dst & SLJIT_MEM)
2464 return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2465
2466 return SLJIT_SUCCESS;
2467 }
2468
2469 static const struct ins_forms sub_forms = {
2470 0x1b00, /* sr */
2471 0xb9090000, /* sgr */
2472 0xb9f90000, /* srk */
2473 0xb9e90000, /* sgrk */
2474 0x5b000000, /* s */
2475 0xe3000000005b, /* sy */
2476 0xe30000000009, /* sg */
2477 };
2478
2479 static const struct ins_forms logical_sub_forms = {
2480 0x1f00, /* slr */
2481 0xb90b0000, /* slgr */
2482 0xb9fb0000, /* slrk */
2483 0xb9eb0000, /* slgrk */
2484 0x5f000000, /* sl */
2485 0xe3000000005f, /* sly */
2486 0xe3000000000b, /* slg */
2487 };
2488
sljit_emit_sub(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2489 static sljit_s32 sljit_emit_sub(struct sljit_compiler *compiler, sljit_s32 op,
2490 sljit_s32 dst, sljit_sw dstw,
2491 sljit_s32 src1, sljit_sw src1w,
2492 sljit_s32 src2, sljit_sw src2w)
2493 {
2494 sljit_s32 flag_type = GET_FLAG_TYPE(op);
2495 int sets_signed = (flag_type >= SLJIT_SIG_LESS && flag_type <= SLJIT_NOT_OVERFLOW);
2496 int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW);
2497 const struct ins_forms *forms;
2498 sljit_ins ins;
2499
2500 if (dst == (sljit_s32)tmp0 && flag_type <= SLJIT_SIG_LESS_EQUAL) {
2501 int compare_signed = flag_type >= SLJIT_SIG_LESS;
2502
2503 compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_COMPARE;
2504
2505 if (src2 == SLJIT_IMM) {
2506 if (compare_signed || ((op & VARIABLE_FLAG_MASK) == 0 && is_s32(src2w))) {
2507 if ((op & SLJIT_32) || is_s32(src2w)) {
2508 ins = (op & SLJIT_32) ? 0xc20d00000000 /* cfi */ : 0xc20c00000000 /* cgfi */;
2509 return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A);
2510 }
2511 }
2512 else {
2513 if ((op & SLJIT_32) || is_u32(src2w)) {
2514 ins = (op & SLJIT_32) ? 0xc20f00000000 /* clfi */ : 0xc20e00000000 /* clgfi */;
2515 return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A);
2516 }
2517 if (is_s16(src2w))
2518 return emit_rie_d(compiler, 0xec00000000db /* alghsik */, (sljit_s32)tmp0, src1, src1w, src2w);
2519 }
2520 }
2521 else if (src2 & SLJIT_MEM) {
2522 if ((op & SLJIT_32) && ((src2 & OFFS_REG_MASK) || is_u12(src2w))) {
2523 ins = compare_signed ? 0x59000000 /* c */ : 0x55000000 /* cl */;
2524 return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RX_A);
2525 }
2526
2527 if (compare_signed)
2528 ins = (op & SLJIT_32) ? 0xe30000000059 /* cy */ : 0xe30000000020 /* cg */;
2529 else
2530 ins = (op & SLJIT_32) ? 0xe30000000055 /* cly */ : 0xe30000000021 /* clg */;
2531 return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RXY_A);
2532 }
2533
2534 if (compare_signed)
2535 ins = (op & SLJIT_32) ? 0x1900 /* cr */ : 0xb9200000 /* cgr */;
2536 else
2537 ins = (op & SLJIT_32) ? 0x1500 /* clr */ : 0xb9210000 /* clgr */;
2538 return emit_rr(compiler, ins, src1, src1, src1w, src2, src2w);
2539 }
2540
2541 if (src1 == SLJIT_IMM && src1w == 0 && (flag_type == 0 || sets_signed)) {
2542 ins = (op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */;
2543 FAIL_IF(emit_rr1(compiler, ins, dst, src2, src2w));
2544 goto done;
2545 }
2546
2547 if (src2 == SLJIT_IMM) {
2548 sljit_sw neg_src2w = -src2w;
2549
2550 if (sets_signed || neg_src2w != 0 || (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == 0) {
2551 if (!sets_zero_overflow && is_s8(neg_src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) {
2552 if (sets_signed)
2553 ins = (op & SLJIT_32) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */;
2554 else
2555 ins = (op & SLJIT_32) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */;
2556 return emit_siy(compiler, ins, dst, dstw, neg_src2w);
2557 }
2558
2559 if (is_s16(neg_src2w)) {
2560 if (sets_signed)
2561 ins = (op & SLJIT_32) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */;
2562 else
2563 ins = (op & SLJIT_32) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */;
2564 FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, neg_src2w));
2565 goto done;
2566 }
2567 }
2568
2569 if (!sets_signed) {
2570 if ((op & SLJIT_32) || is_u32(src2w)) {
2571 ins = (op & SLJIT_32) ? 0xc20500000000 /* slfi */ : 0xc20400000000 /* slgfi */;
2572 FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));
2573 goto done;
2574 }
2575 if (is_u32(neg_src2w)) {
2576 FAIL_IF(emit_ri(compiler, 0xc20a00000000 /* algfi */, dst, src1, src1w, neg_src2w, RIL_A));
2577 goto done;
2578 }
2579 }
2580 else if ((op & SLJIT_32) || is_s32(neg_src2w)) {
2581 ins = (op & SLJIT_32) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */;
2582 FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, neg_src2w, RIL_A));
2583 goto done;
2584 }
2585 }
2586
2587 forms = sets_signed ? &sub_forms : &logical_sub_forms;
2588 FAIL_IF(emit_non_commutative(compiler, forms, dst, src1, src1w, src2, src2w));
2589
2590 done:
2591 if (sets_signed) {
2592 sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2593
2594 if ((op & VARIABLE_FLAG_MASK) != SLJIT_SET_OVERFLOW) {
2595 /* In case of overflow, the sign bit of the two source operands must be different, and
2596 - the first operand is greater if the sign bit of the result is set
2597 - the first operand is less if the sign bit of the result is not set
2598 The -result operation sets the corrent sign, because the result cannot be zero.
2599 The overflow is considered greater, since the result must be equal to INT_MIN so its sign bit is set. */
2600 FAIL_IF(push_inst(compiler, brc(0xe, 2 + 2)));
2601 FAIL_IF(push_inst(compiler, (op & SLJIT_32) ? lcr(tmp1, dst_r) : lcgr(tmp1, dst_r)));
2602 }
2603 else if (op & SLJIT_SET_Z)
2604 FAIL_IF(update_zero_overflow(compiler, op, dst_r));
2605 }
2606
2607 if (dst & SLJIT_MEM)
2608 return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2609
2610 return SLJIT_SUCCESS;
2611 }
2612
2613 static const struct ins_forms multiply_forms = {
2614 0xb2520000, /* msr */
2615 0xb90c0000, /* msgr */
2616 0xb9fd0000, /* msrkc */
2617 0xb9ed0000, /* msgrkc */
2618 0x71000000, /* ms */
2619 0xe30000000051, /* msy */
2620 0xe3000000000c, /* msg */
2621 };
2622
2623 static const struct ins_forms multiply_overflow_forms = {
2624 0,
2625 0,
2626 0xb9fd0000, /* msrkc */
2627 0xb9ed0000, /* msgrkc */
2628 0,
2629 0xe30000000053, /* msc */
2630 0xe30000000083, /* msgc */
2631 };
2632
sljit_emit_multiply(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2633 static sljit_s32 sljit_emit_multiply(struct sljit_compiler *compiler, sljit_s32 op,
2634 sljit_s32 dst,
2635 sljit_s32 src1, sljit_sw src1w,
2636 sljit_s32 src2, sljit_sw src2w)
2637 {
2638 sljit_ins ins;
2639
2640 if (HAS_FLAGS(op)) {
2641 /* if have_misc2 fails, this operation should be emulated. 32 bit emulation:
2642 FAIL_IF(push_inst(compiler, lgfr(tmp0, src1_r)));
2643 FAIL_IF(push_inst(compiler, msgfr(tmp0, src2_r)));
2644 if (dst_r != tmp0) {
2645 FAIL_IF(push_inst(compiler, lr(dst_r, tmp0)));
2646 }
2647 FAIL_IF(push_inst(compiler, aih(tmp0, 1)));
2648 FAIL_IF(push_inst(compiler, nihf(tmp0, ~1U)));
2649 FAIL_IF(push_inst(compiler, ipm(tmp1)));
2650 FAIL_IF(push_inst(compiler, oilh(tmp1, 0x2000))); */
2651
2652 return emit_commutative(compiler, &multiply_overflow_forms, dst, src1, src1w, src2, src2w);
2653 }
2654
2655 if (src2 == SLJIT_IMM) {
2656 if (is_s16(src2w)) {
2657 ins = (op & SLJIT_32) ? 0xa70c0000 /* mhi */ : 0xa70d0000 /* mghi */;
2658 return emit_ri(compiler, ins, dst, src1, src1w, src2w, RI_A);
2659 }
2660
2661 if (is_s32(src2w)) {
2662 ins = (op & SLJIT_32) ? 0xc20100000000 /* msfi */ : 0xc20000000000 /* msgfi */;
2663 return emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A);
2664 }
2665 }
2666
2667 return emit_commutative(compiler, &multiply_forms, dst, src1, src1w, src2, src2w);
2668 }
2669
sljit_emit_bitwise_imm(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_uw imm,sljit_s32 count16)2670 static sljit_s32 sljit_emit_bitwise_imm(struct sljit_compiler *compiler, sljit_s32 type,
2671 sljit_s32 dst,
2672 sljit_s32 src1, sljit_sw src1w,
2673 sljit_uw imm, sljit_s32 count16)
2674 {
2675 sljit_s32 mode = compiler->mode;
2676 sljit_gpr dst_r = tmp0;
2677 sljit_s32 needs_move = 1;
2678
2679 if (IS_GPR_REG(dst)) {
2680 dst_r = gpr(dst & REG_MASK);
2681 if (dst == src1)
2682 needs_move = 0;
2683 }
2684
2685 if (needs_move)
2686 FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
2687
2688 if (type == SLJIT_AND) {
2689 if (!(mode & SLJIT_32))
2690 FAIL_IF(push_inst(compiler, 0xc00a00000000 /* nihf */ | R36A(dst_r) | (imm >> 32)));
2691 return push_inst(compiler, 0xc00b00000000 /* nilf */ | R36A(dst_r) | (imm & 0xffffffff));
2692 }
2693 else if (type == SLJIT_OR) {
2694 if (count16 >= 3) {
2695 FAIL_IF(push_inst(compiler, 0xc00c00000000 /* oihf */ | R36A(dst_r) | (imm >> 32)));
2696 return push_inst(compiler, 0xc00d00000000 /* oilf */ | R36A(dst_r) | (imm & 0xffffffff));
2697 }
2698
2699 if (count16 >= 2) {
2700 if ((imm & 0x00000000ffffffffull) == 0)
2701 return push_inst(compiler, 0xc00c00000000 /* oihf */ | R36A(dst_r) | (imm >> 32));
2702 if ((imm & 0xffffffff00000000ull) == 0)
2703 return push_inst(compiler, 0xc00d00000000 /* oilf */ | R36A(dst_r) | (imm & 0xffffffff));
2704 }
2705
2706 if ((imm & 0xffff000000000000ull) != 0)
2707 FAIL_IF(push_inst(compiler, 0xa5080000 /* oihh */ | R20A(dst_r) | (imm >> 48)));
2708 if ((imm & 0x0000ffff00000000ull) != 0)
2709 FAIL_IF(push_inst(compiler, 0xa5090000 /* oihl */ | R20A(dst_r) | ((imm >> 32) & 0xffff)));
2710 if ((imm & 0x00000000ffff0000ull) != 0)
2711 FAIL_IF(push_inst(compiler, 0xa50a0000 /* oilh */ | R20A(dst_r) | ((imm >> 16) & 0xffff)));
2712 if ((imm & 0x000000000000ffffull) != 0 || imm == 0)
2713 return push_inst(compiler, 0xa50b0000 /* oill */ | R20A(dst_r) | (imm & 0xffff));
2714 return SLJIT_SUCCESS;
2715 }
2716
2717 if ((imm & 0xffffffff00000000ull) != 0)
2718 FAIL_IF(push_inst(compiler, 0xc00600000000 /* xihf */ | R36A(dst_r) | (imm >> 32)));
2719 if ((imm & 0x00000000ffffffffull) != 0 || imm == 0)
2720 return push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(dst_r) | (imm & 0xffffffff));
2721 return SLJIT_SUCCESS;
2722 }
2723
2724 static const struct ins_forms bitwise_and_forms = {
2725 0x1400, /* nr */
2726 0xb9800000, /* ngr */
2727 0xb9f40000, /* nrk */
2728 0xb9e40000, /* ngrk */
2729 0x54000000, /* n */
2730 0xe30000000054, /* ny */
2731 0xe30000000080, /* ng */
2732 };
2733
2734 static const struct ins_forms bitwise_or_forms = {
2735 0x1600, /* or */
2736 0xb9810000, /* ogr */
2737 0xb9f60000, /* ork */
2738 0xb9e60000, /* ogrk */
2739 0x56000000, /* o */
2740 0xe30000000056, /* oy */
2741 0xe30000000081, /* og */
2742 };
2743
2744 static const struct ins_forms bitwise_xor_forms = {
2745 0x1700, /* xr */
2746 0xb9820000, /* xgr */
2747 0xb9f70000, /* xrk */
2748 0xb9e70000, /* xgrk */
2749 0x57000000, /* x */
2750 0xe30000000057, /* xy */
2751 0xe30000000082, /* xg */
2752 };
2753
sljit_emit_bitwise(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2754 static sljit_s32 sljit_emit_bitwise(struct sljit_compiler *compiler, sljit_s32 op,
2755 sljit_s32 dst,
2756 sljit_s32 src1, sljit_sw src1w,
2757 sljit_s32 src2, sljit_sw src2w)
2758 {
2759 sljit_s32 type = GET_OPCODE(op);
2760 const struct ins_forms *forms;
2761
2762 if (src2 == SLJIT_IMM && (!(op & SLJIT_SET_Z) || (type == SLJIT_AND && dst == (sljit_s32)tmp0))) {
2763 sljit_s32 count16 = 0;
2764 sljit_uw imm = (sljit_uw)src2w;
2765
2766 if (op & SLJIT_32)
2767 imm &= 0xffffffffull;
2768
2769 if ((imm & 0x000000000000ffffull) != 0 || imm == 0)
2770 count16++;
2771 if ((imm & 0x00000000ffff0000ull) != 0)
2772 count16++;
2773 if ((imm & 0x0000ffff00000000ull) != 0)
2774 count16++;
2775 if ((imm & 0xffff000000000000ull) != 0)
2776 count16++;
2777
2778 if (type == SLJIT_AND && dst == (sljit_s32)tmp0 && count16 == 1) {
2779 sljit_gpr src_r = tmp0;
2780
2781 if (FAST_IS_REG(src1))
2782 src_r = gpr(src1 & REG_MASK);
2783 else
2784 FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
2785
2786 if ((imm & 0x000000000000ffffull) != 0 || imm == 0)
2787 return push_inst(compiler, 0xa7010000 /* tmll */ | R20A(src_r) | imm);
2788 if ((imm & 0x00000000ffff0000ull) != 0)
2789 return push_inst(compiler, 0xa7000000 /* tmlh */ | R20A(src_r) | (imm >> 16));
2790 if ((imm & 0x0000ffff00000000ull) != 0)
2791 return push_inst(compiler, 0xa7030000 /* tmhl */ | R20A(src_r) | (imm >> 32));
2792 return push_inst(compiler, 0xa7020000 /* tmhh */ | R20A(src_r) | (imm >> 48));
2793 }
2794
2795 if (!(op & SLJIT_SET_Z))
2796 return sljit_emit_bitwise_imm(compiler, type, dst, src1, src1w, imm, count16);
2797 }
2798
2799 if (type == SLJIT_AND)
2800 forms = &bitwise_and_forms;
2801 else if (type == SLJIT_OR)
2802 forms = &bitwise_or_forms;
2803 else
2804 forms = &bitwise_xor_forms;
2805
2806 return emit_commutative(compiler, forms, dst, src1, src1w, src2, src2w);
2807 }
2808
sljit_emit_shift(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2809 static sljit_s32 sljit_emit_shift(struct sljit_compiler *compiler, sljit_s32 op,
2810 sljit_s32 dst,
2811 sljit_s32 src1, sljit_sw src1w,
2812 sljit_s32 src2, sljit_sw src2w)
2813 {
2814 sljit_s32 type = GET_OPCODE(op);
2815 sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2816 sljit_gpr src_r = tmp0;
2817 sljit_gpr base_r = tmp0;
2818 sljit_ins imm = 0;
2819 sljit_ins ins;
2820
2821 if (FAST_IS_REG(src1))
2822 src_r = gpr(src1);
2823 else
2824 FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
2825
2826 if (src2 != SLJIT_IMM) {
2827 if (FAST_IS_REG(src2))
2828 base_r = gpr(src2);
2829 else {
2830 FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
2831 base_r = tmp1;
2832 }
2833
2834 if ((op & SLJIT_32) && (type == SLJIT_MSHL || type == SLJIT_MLSHR || type == SLJIT_MASHR)) {
2835 if (base_r != tmp1) {
2836 FAIL_IF(push_inst(compiler, 0xec0000000055 /* risbg */ | R36A(tmp1) | R32A(base_r) | (59 << 24) | (1 << 23) | (63 << 16)));
2837 base_r = tmp1;
2838 } else
2839 FAIL_IF(push_inst(compiler, 0xa5070000 /* nill */ | R20A(tmp1) | 0x1f));
2840 }
2841 } else
2842 imm = (sljit_ins)(src2w & ((op & SLJIT_32) ? 0x1f : 0x3f));
2843
2844 if ((op & SLJIT_32) && dst_r == src_r) {
2845 if (type == SLJIT_SHL || type == SLJIT_MSHL)
2846 ins = 0x89000000 /* sll */;
2847 else if (type == SLJIT_LSHR || type == SLJIT_MLSHR)
2848 ins = 0x88000000 /* srl */;
2849 else
2850 ins = 0x8a000000 /* sra */;
2851
2852 FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | R12A(base_r) | imm));
2853 } else {
2854 if (type == SLJIT_SHL || type == SLJIT_MSHL)
2855 ins = (op & SLJIT_32) ? 0xeb00000000df /* sllk */ : 0xeb000000000d /* sllg */;
2856 else if (type == SLJIT_LSHR || type == SLJIT_MLSHR)
2857 ins = (op & SLJIT_32) ? 0xeb00000000de /* srlk */ : 0xeb000000000c /* srlg */;
2858 else
2859 ins = (op & SLJIT_32) ? 0xeb00000000dc /* srak */ : 0xeb000000000a /* srag */;
2860
2861 FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | R28A(base_r) | (imm << 16)));
2862 }
2863
2864 if ((op & SLJIT_SET_Z) && type != SLJIT_ASHR)
2865 return push_inst(compiler, (op & SLJIT_32) ? or(dst_r, dst_r) : ogr(dst_r, dst_r));
2866
2867 return SLJIT_SUCCESS;
2868 }
2869
sljit_emit_rotate(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2870 static sljit_s32 sljit_emit_rotate(struct sljit_compiler *compiler, sljit_s32 op,
2871 sljit_s32 dst,
2872 sljit_s32 src1, sljit_sw src1w,
2873 sljit_s32 src2, sljit_sw src2w)
2874 {
2875 sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2876 sljit_gpr src_r = tmp0;
2877 sljit_gpr base_r = tmp0;
2878 sljit_ins imm = 0;
2879 sljit_ins ins;
2880
2881 if (FAST_IS_REG(src1))
2882 src_r = gpr(src1);
2883 else
2884 FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
2885
2886 if (src2 != SLJIT_IMM) {
2887 if (FAST_IS_REG(src2))
2888 base_r = gpr(src2);
2889 else {
2890 FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
2891 base_r = tmp1;
2892 }
2893 }
2894
2895 if (GET_OPCODE(op) == SLJIT_ROTR) {
2896 if (src2 != SLJIT_IMM) {
2897 ins = (op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */;
2898 FAIL_IF(push_inst(compiler, ins | R4A(tmp1) | R0A(base_r)));
2899 base_r = tmp1;
2900 } else
2901 src2w = -src2w;
2902 }
2903
2904 if (src2 == SLJIT_IMM)
2905 imm = (sljit_ins)(src2w & ((op & SLJIT_32) ? 0x1f : 0x3f));
2906
2907 ins = (op & SLJIT_32) ? 0xeb000000001d /* rll */ : 0xeb000000001c /* rllg */;
2908 return push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | R28A(base_r) | (imm << 16));
2909 }
2910
2911 static const struct ins_forms addc_forms = {
2912 0xb9980000, /* alcr */
2913 0xb9880000, /* alcgr */
2914 0,
2915 0,
2916 0,
2917 0xe30000000098, /* alc */
2918 0xe30000000088, /* alcg */
2919 };
2920
2921 static const struct ins_forms subc_forms = {
2922 0xb9990000, /* slbr */
2923 0xb9890000, /* slbgr */
2924 0,
2925 0,
2926 0,
2927 0xe30000000099, /* slb */
2928 0xe30000000089, /* slbg */
2929 };
2930
sljit_emit_op2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2931 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2932 sljit_s32 dst, sljit_sw dstw,
2933 sljit_s32 src1, sljit_sw src1w,
2934 sljit_s32 src2, sljit_sw src2w)
2935 {
2936 CHECK_ERROR();
2937 CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
2938 ADJUST_LOCAL_OFFSET(dst, dstw);
2939 ADJUST_LOCAL_OFFSET(src1, src1w);
2940 ADJUST_LOCAL_OFFSET(src2, src2w);
2941
2942 compiler->mode = op & SLJIT_32;
2943 compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z);
2944
2945 if (is_commutative(op) && src1 == SLJIT_IMM && src2 != SLJIT_IMM) {
2946 src1 ^= src2;
2947 src2 ^= src1;
2948 src1 ^= src2;
2949
2950 src1w ^= src2w;
2951 src2w ^= src1w;
2952 src1w ^= src2w;
2953 }
2954
2955 switch (GET_OPCODE(op)) {
2956 case SLJIT_ADD:
2957 compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD;
2958 return sljit_emit_add(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2959 case SLJIT_ADDC:
2960 compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD;
2961 FAIL_IF(emit_commutative(compiler, &addc_forms, dst, src1, src1w, src2, src2w));
2962 if (dst & SLJIT_MEM)
2963 return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2964 return SLJIT_SUCCESS;
2965 case SLJIT_SUB:
2966 compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_SUB;
2967 return sljit_emit_sub(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2968 case SLJIT_SUBC:
2969 compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_SUB;
2970 FAIL_IF(emit_non_commutative(compiler, &subc_forms, dst, src1, src1w, src2, src2w));
2971 if (dst & SLJIT_MEM)
2972 return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2973 return SLJIT_SUCCESS;
2974 case SLJIT_MUL:
2975 FAIL_IF(sljit_emit_multiply(compiler, op, dst, src1, src1w, src2, src2w));
2976 break;
2977 case SLJIT_AND:
2978 case SLJIT_OR:
2979 case SLJIT_XOR:
2980 FAIL_IF(sljit_emit_bitwise(compiler, op, dst, src1, src1w, src2, src2w));
2981 break;
2982 case SLJIT_SHL:
2983 case SLJIT_MSHL:
2984 case SLJIT_LSHR:
2985 case SLJIT_MLSHR:
2986 case SLJIT_ASHR:
2987 case SLJIT_MASHR:
2988 FAIL_IF(sljit_emit_shift(compiler, op, dst, src1, src1w, src2, src2w));
2989 break;
2990 case SLJIT_ROTL:
2991 case SLJIT_ROTR:
2992 FAIL_IF(sljit_emit_rotate(compiler, op, dst, src1, src1w, src2, src2w));
2993 break;
2994 }
2995
2996 if (dst & SLJIT_MEM)
2997 return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2998 return SLJIT_SUCCESS;
2999 }
3000
sljit_emit_op2u(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)3001 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
3002 sljit_s32 src1, sljit_sw src1w,
3003 sljit_s32 src2, sljit_sw src2w)
3004 {
3005 CHECK_ERROR();
3006 CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
3007
3008 SLJIT_SKIP_CHECKS(compiler);
3009 return sljit_emit_op2(compiler, op, (sljit_s32)tmp0, 0, src1, src1w, src2, src2w);
3010 }
3011
sljit_emit_shift_into(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 src1_reg,sljit_s32 src2_reg,sljit_s32 src3,sljit_sw src3w)3012 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
3013 sljit_s32 dst_reg,
3014 sljit_s32 src1_reg,
3015 sljit_s32 src2_reg,
3016 sljit_s32 src3, sljit_sw src3w)
3017 {
3018 sljit_s32 is_right;
3019 sljit_sw bit_length = (op & SLJIT_32) ? 32 : 64;
3020 sljit_gpr dst_r = gpr(dst_reg);
3021 sljit_gpr src1_r = gpr(src1_reg);
3022 sljit_gpr src2_r = gpr(src2_reg);
3023 sljit_gpr src3_r = tmp1;
3024 sljit_ins ins;
3025
3026 CHECK_ERROR();
3027 CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
3028
3029 is_right = (GET_OPCODE(op) == SLJIT_LSHR || GET_OPCODE(op) == SLJIT_MLSHR);
3030
3031 if (src1_reg == src2_reg) {
3032 SLJIT_SKIP_CHECKS(compiler);
3033 return sljit_emit_op2(compiler, (is_right ? SLJIT_ROTR : SLJIT_ROTL) | (op & SLJIT_32), dst_reg, 0, src1_reg, 0, src3, src3w);
3034 }
3035
3036 ADJUST_LOCAL_OFFSET(src3, src3w);
3037
3038 if (src3 == SLJIT_IMM) {
3039 src3w &= bit_length - 1;
3040
3041 if (src3w == 0)
3042 return SLJIT_SUCCESS;
3043
3044 if (op & SLJIT_32) {
3045 if (dst_r == src1_r) {
3046 ins = is_right ? 0x88000000 /* srl */ : 0x89000000 /* sll */;
3047 FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | (sljit_ins)src3w));
3048 } else {
3049 ins = is_right ? 0xeb00000000de /* srlk */ : 0xeb00000000df /* sllk */;
3050 FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | ((sljit_ins)src3w << 16)));
3051 }
3052 } else {
3053 ins = is_right ? 0xeb000000000c /* srlg */ : 0xeb000000000d /* sllg */;
3054 FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | ((sljit_ins)src3w << 16)));
3055 }
3056
3057 ins = 0xec0000000055 /* risbg */;
3058
3059 if (is_right) {
3060 src3w = bit_length - src3w;
3061 ins |= ((sljit_ins)(64 - bit_length) << 24) | ((sljit_ins)(63 - src3w) << 16) | ((sljit_ins)src3w << 8);
3062 } else
3063 ins |= ((sljit_ins)(64 - src3w) << 24) | ((sljit_ins)63 << 16) | ((sljit_ins)(src3w + 64 - bit_length) << 8);
3064
3065 return push_inst(compiler, ins | R36A(dst_r) | R32A(src2_r));
3066 }
3067
3068 if (!(src3 & SLJIT_MEM)) {
3069 src3_r = gpr(src3);
3070
3071 if (dst_r == src3_r) {
3072 FAIL_IF(push_inst(compiler, 0x1800 /* lr */ | R4A(tmp1) | R0A(src3_r)));
3073 src3_r = tmp1;
3074 }
3075 } else
3076 FAIL_IF(load_word(compiler, tmp1, src3, src3w, op & SLJIT_32));
3077
3078 if (op & SLJIT_32) {
3079 if (GET_OPCODE(op) == SLJIT_MSHL || GET_OPCODE(op) == SLJIT_MLSHR) {
3080 if (src3_r != tmp1) {
3081 FAIL_IF(push_inst(compiler, 0xec0000000055 /* risbg */ | R36A(tmp1) | R32A(src3_r) | (59 << 24) | (1 << 23) | (63 << 16)));
3082 src3_r = tmp1;
3083 } else
3084 FAIL_IF(push_inst(compiler, 0xa5070000 /* nill */ | R20A(tmp1) | 0x1f));
3085 }
3086
3087 if (dst_r == src1_r) {
3088 ins = is_right ? 0x88000000 /* srl */ : 0x89000000 /* sll */;
3089 FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | R12A(src3_r)));
3090 } else {
3091 ins = is_right ? 0xeb00000000de /* srlk */ : 0xeb00000000df /* sllk */;
3092 FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | R28A(src3_r)));
3093 }
3094
3095 if (src3_r != tmp1) {
3096 FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | 0x1f));
3097 FAIL_IF(push_inst(compiler, 0x1700 /* xr */ | R4A(tmp1) | R0A(src3_r)));
3098 } else
3099 FAIL_IF(push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(tmp1) | 0x1f));
3100
3101 ins = is_right ? 0xeb00000000df /* sllk */ : 0xeb00000000de /* srlk */;
3102 FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src2_r) | R28A(tmp1) | (0x1 << 16)));
3103
3104 return push_inst(compiler, 0x1600 /* or */ | R4A(dst_r) | R0A(tmp0));
3105 }
3106
3107 ins = is_right ? 0xeb000000000c /* srlg */ : 0xeb000000000d /* sllg */;
3108 FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | R28A(src3_r)));
3109
3110 ins = is_right ? 0xeb000000000d /* sllg */ : 0xeb000000000c /* srlg */;
3111
3112 if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) {
3113 if (src3_r != tmp1)
3114 FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | 0x3f));
3115
3116 FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src2_r) | (0x1 << 16)));
3117 src2_r = tmp0;
3118
3119 if (src3_r != tmp1)
3120 FAIL_IF(push_inst(compiler, 0xb9820000 /* xgr */ | R4A(tmp1) | R0A(src3_r)));
3121 else
3122 FAIL_IF(push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(tmp1) | 0x3f));
3123 } else
3124 FAIL_IF(push_inst(compiler, 0xb9030000 /* lcgr */ | R4A(tmp1) | R0A(src3_r)));
3125
3126 FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src2_r) | R28A(tmp1)));
3127 return push_inst(compiler, 0xb9810000 /* ogr */ | R4A(dst_r) | R0A(tmp0));
3128 }
3129
sljit_emit_op_src(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)3130 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
3131 sljit_s32 src, sljit_sw srcw)
3132 {
3133 sljit_gpr src_r;
3134 struct addr addr;
3135
3136 CHECK_ERROR();
3137 CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
3138 ADJUST_LOCAL_OFFSET(src, srcw);
3139
3140 switch (op) {
3141 case SLJIT_FAST_RETURN:
3142 src_r = FAST_IS_REG(src) ? gpr(src) : tmp1;
3143 if (src & SLJIT_MEM)
3144 FAIL_IF(load_word(compiler, tmp1, src, srcw, 0));
3145
3146 return push_inst(compiler, br(src_r));
3147 case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
3148 return SLJIT_SUCCESS;
3149 case SLJIT_PREFETCH_L1:
3150 case SLJIT_PREFETCH_L2:
3151 case SLJIT_PREFETCH_L3:
3152 case SLJIT_PREFETCH_ONCE:
3153 FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
3154 return push_inst(compiler, 0xe31000000036 /* pfd */ | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
3155 default:
3156 return SLJIT_SUCCESS;
3157 }
3158
3159 return SLJIT_SUCCESS;
3160 }
3161
sljit_emit_op_dst(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw)3162 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
3163 sljit_s32 dst, sljit_sw dstw)
3164 {
3165 sljit_gpr dst_r = link_r;
3166 sljit_s32 size;
3167
3168 CHECK_ERROR();
3169 CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
3170 ADJUST_LOCAL_OFFSET(dst, dstw);
3171
3172 switch (op) {
3173 case SLJIT_FAST_ENTER:
3174 if (FAST_IS_REG(dst))
3175 return push_inst(compiler, lgr(gpr(dst), link_r));
3176 break;
3177 case SLJIT_GET_RETURN_ADDRESS:
3178 dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
3179
3180 size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds - SLJIT_KEPT_SAVEDS_COUNT(compiler->options), 2);
3181 FAIL_IF(load_word(compiler, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size + size, 0));
3182 break;
3183 }
3184
3185 if (dst & SLJIT_MEM)
3186 return store_word(compiler, dst_r, dst, dstw, 0);
3187
3188 return SLJIT_SUCCESS;
3189 }
3190
sljit_get_register_index(sljit_s32 type,sljit_s32 reg)3191 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
3192 {
3193 CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
3194
3195 if (type == SLJIT_GP_REGISTER)
3196 return (sljit_s32)gpr(reg);
3197
3198 if (type != SLJIT_FLOAT_REGISTER)
3199 return -1;
3200
3201 return (sljit_s32)freg_map[reg];
3202 }
3203
sljit_emit_op_custom(struct sljit_compiler * compiler,void * instruction,sljit_u32 size)3204 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
3205 void *instruction, sljit_u32 size)
3206 {
3207 sljit_ins ins = 0;
3208
3209 CHECK_ERROR();
3210 CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
3211
3212 memcpy((sljit_u8 *)&ins + sizeof(ins) - size, instruction, size);
3213 return push_inst(compiler, ins);
3214 }
3215
3216 /* --------------------------------------------------------------------- */
3217 /* Floating point operators */
3218 /* --------------------------------------------------------------------- */
3219
3220 #define FLOAT_LOAD 0
3221 #define FLOAT_STORE 1
3222
float_mem(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)3223 static sljit_s32 float_mem(struct sljit_compiler *compiler, sljit_s32 op,
3224 sljit_s32 reg,
3225 sljit_s32 mem, sljit_sw memw)
3226 {
3227 struct addr addr;
3228 sljit_ins ins;
3229
3230 SLJIT_ASSERT(mem & SLJIT_MEM);
3231
3232 if ((mem & OFFS_REG_MASK) || is_u12(memw) || !is_s20(memw)) {
3233 FAIL_IF(make_addr_bx(compiler, &addr, mem, memw, tmp1));
3234
3235 if (op & FLOAT_STORE)
3236 ins = (op & SLJIT_32) ? 0x70000000 /* ste */ : 0x60000000 /* std */;
3237 else
3238 ins = (op & SLJIT_32) ? 0x78000000 /* le */ : 0x68000000 /* ld */;
3239
3240 return push_inst(compiler, ins | F20(reg) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset);
3241 }
3242
3243 FAIL_IF(make_addr_bxy(compiler, &addr, mem, memw, tmp1));
3244
3245 if (op & FLOAT_STORE)
3246 ins = (op & SLJIT_32) ? 0xed0000000066 /* stey */ : 0xed0000000067 /* stdy */;
3247 else
3248 ins = (op & SLJIT_32) ? 0xed0000000064 /* ley */ : 0xed0000000065 /* ldy */;
3249
3250 return push_inst(compiler, ins | F36(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
3251 }
3252
emit_float(struct sljit_compiler * compiler,sljit_ins ins_r,sljit_ins ins,sljit_s32 reg,sljit_s32 src,sljit_sw srcw)3253 static sljit_s32 emit_float(struct sljit_compiler *compiler, sljit_ins ins_r, sljit_ins ins,
3254 sljit_s32 reg,
3255 sljit_s32 src, sljit_sw srcw)
3256 {
3257 struct addr addr;
3258
3259 if (!(src & SLJIT_MEM))
3260 return push_inst(compiler, ins_r | F4(reg) | F0(src));
3261
3262 FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));
3263 return push_inst(compiler, ins | F36(reg) | R32A(addr.index) | R28A(addr.base) | ((sljit_ins)addr.offset << 16));
3264 }
3265
sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)3266 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
3267 sljit_s32 dst, sljit_sw dstw,
3268 sljit_s32 src, sljit_sw srcw)
3269 {
3270 sljit_ins dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
3271 sljit_ins ins;
3272
3273 if (src & SLJIT_MEM) {
3274 FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src, srcw));
3275 src = TMP_FREG1;
3276 }
3277
3278 /* M3 is set to 5 */
3279 if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)
3280 ins = (op & SLJIT_32) ? 0xb3a85000 /* cgebr */ : 0xb3a95000 /* cgdbr */;
3281 else
3282 ins = (op & SLJIT_32) ? 0xb3985000 /* cfebr */ : 0xb3995000 /* cfdbr */;
3283
3284 FAIL_IF(push_inst(compiler, ins | R4A(dst_r) | F0(src)));
3285
3286 if (dst & SLJIT_MEM)
3287 return store_word(compiler, dst_r, dst, dstw, GET_OPCODE(op) >= SLJIT_CONV_S32_FROM_F64);
3288
3289 return SLJIT_SUCCESS;
3290 }
3291
sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)3292 static sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_ins ins,
3293 sljit_s32 dst, sljit_sw dstw,
3294 sljit_s32 src, sljit_sw srcw)
3295 {
3296 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
3297
3298 if (src == SLJIT_IMM) {
3299 FAIL_IF(push_load_imm_inst(compiler, tmp0, srcw));
3300 src = (sljit_s32)tmp0;
3301 }
3302 else if (src & SLJIT_MEM) {
3303 FAIL_IF(load_word(compiler, tmp0, src, srcw, ins & 0x100000));
3304 src = (sljit_s32)tmp0;
3305 }
3306
3307 FAIL_IF(push_inst(compiler, ins | F4(dst_r) | R0(src)));
3308
3309 if (dst & SLJIT_MEM)
3310 return float_mem(compiler, FLOAT_STORE | ((ins & 0x10000) ? 0 : SLJIT_32), TMP_FREG1, dst, dstw);
3311
3312 return SLJIT_SUCCESS;
3313 }
3314
sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)3315 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
3316 sljit_s32 dst, sljit_sw dstw,
3317 sljit_s32 src, sljit_sw srcw)
3318 {
3319 sljit_ins ins;
3320
3321 if (src == SLJIT_IMM && GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
3322 srcw = (sljit_s32)srcw;
3323
3324 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
3325 ins = (op & SLJIT_32) ? 0xb3a40000 /* cegbr */ : 0xb3a50000 /* cdgbr */;
3326 else
3327 ins = (op & SLJIT_32) ? 0xb3940000 /* cefbr */ : 0xb3950000 /* cdfbr */;
3328
3329 return sljit_emit_fop1_conv_f64_from_w(compiler, ins, dst, dstw, src, srcw);
3330 }
3331
sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)3332 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
3333 sljit_s32 dst, sljit_sw dstw,
3334 sljit_s32 src, sljit_sw srcw)
3335 {
3336 sljit_ins ins;
3337
3338 if (src == SLJIT_IMM && GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32)
3339 srcw = (sljit_u32)srcw;
3340
3341 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_UW)
3342 ins = (op & SLJIT_32) ? 0xb3a00000 /* celgbr */ : 0xb3a10000 /* cdlgbr */;
3343 else
3344 ins = (op & SLJIT_32) ? 0xb3900000 /* celfbr */ : 0xb3910000 /* cdlfbr */;
3345
3346 return sljit_emit_fop1_conv_f64_from_w(compiler, ins, dst, dstw, src, srcw);
3347 }
3348
sljit_emit_fop1_cmp(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)3349 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
3350 sljit_s32 src1, sljit_sw src1w,
3351 sljit_s32 src2, sljit_sw src2w)
3352 {
3353 sljit_ins ins_r, ins;
3354
3355 if (src1 & SLJIT_MEM) {
3356 FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src1, src1w));
3357 src1 = TMP_FREG1;
3358 }
3359
3360 if (op & SLJIT_32) {
3361 ins_r = 0xb3090000 /* cebr */;
3362 ins = 0xed0000000009 /* ceb */;
3363 } else {
3364 ins_r = 0xb3190000 /* cdbr */;
3365 ins = 0xed0000000019 /* cdb */;
3366 }
3367
3368 return emit_float(compiler, ins_r, ins, src1, src2, src2w);
3369 }
3370
sljit_emit_fop1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)3371 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
3372 sljit_s32 dst, sljit_sw dstw,
3373 sljit_s32 src, sljit_sw srcw)
3374 {
3375 sljit_s32 dst_r;
3376 sljit_ins ins;
3377
3378 CHECK_ERROR();
3379
3380 SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
3381
3382 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
3383
3384 if (op == SLJIT_CONV_F64_FROM_F32)
3385 FAIL_IF(emit_float(compiler, 0xb3040000 /* ldebr */, 0xed0000000004 /* ldeb */, dst_r, src, srcw));
3386 else {
3387 if (src & SLJIT_MEM) {
3388 FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op == SLJIT_CONV_F32_FROM_F64 ? 0 : (op & SLJIT_32)), dst_r, src, srcw));
3389 src = dst_r;
3390 }
3391
3392 switch (GET_OPCODE(op)) {
3393 case SLJIT_MOV_F64:
3394 if (FAST_IS_REG(dst)) {
3395 if (dst == src)
3396 return SLJIT_SUCCESS;
3397
3398 ins = (op & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */;
3399 break;
3400 }
3401 return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), src, dst, dstw);
3402 case SLJIT_CONV_F64_FROM_F32:
3403 /* Only SLJIT_CONV_F32_FROM_F64. */
3404 ins = 0xb3440000 /* ledbr */;
3405 break;
3406 case SLJIT_NEG_F64:
3407 ins = (op & SLJIT_32) ? 0xb3030000 /* lcebr */ : 0xb3130000 /* lcdbr */;
3408 break;
3409 default:
3410 SLJIT_ASSERT(GET_OPCODE(op) == SLJIT_ABS_F64);
3411 ins = (op & SLJIT_32) ? 0xb3000000 /* lpebr */ : 0xb3100000 /* lpdbr */;
3412 break;
3413 }
3414
3415 FAIL_IF(push_inst(compiler, ins | F4(dst_r) | F0(src)));
3416 }
3417
3418 if (!(dst & SLJIT_MEM))
3419 return SLJIT_SUCCESS;
3420
3421 SLJIT_ASSERT(dst_r == TMP_FREG1);
3422
3423 return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw);
3424 }
3425
3426 #define FLOAT_MOV(op, dst_r, src_r) \
3427 (((op & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */) | F4(dst_r) | F0(src_r))
3428
sljit_emit_fop2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)3429 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
3430 sljit_s32 dst, sljit_sw dstw,
3431 sljit_s32 src1, sljit_sw src1w,
3432 sljit_s32 src2, sljit_sw src2w)
3433 {
3434 sljit_s32 dst_r = TMP_FREG1;
3435 sljit_ins ins_r, ins;
3436
3437 CHECK_ERROR();
3438 CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
3439 ADJUST_LOCAL_OFFSET(dst, dstw);
3440 ADJUST_LOCAL_OFFSET(src1, src1w);
3441 ADJUST_LOCAL_OFFSET(src2, src2w);
3442
3443 do {
3444 if (FAST_IS_REG(dst)) {
3445 dst_r = dst;
3446
3447 if (dst == src1)
3448 break;
3449
3450 if (dst == src2) {
3451 if (GET_OPCODE(op) == SLJIT_ADD_F64 || GET_OPCODE(op) == SLJIT_MUL_F64) {
3452 src2 = src1;
3453 src2w = src1w;
3454 src1 = dst;
3455 break;
3456 }
3457
3458 FAIL_IF(push_inst(compiler, FLOAT_MOV(op, TMP_FREG1, src2)));
3459 src2 = TMP_FREG1;
3460 }
3461 }
3462
3463 if (src1 & SLJIT_MEM)
3464 FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), dst_r, src1, src1w));
3465 else
3466 FAIL_IF(push_inst(compiler, FLOAT_MOV(op, dst_r, src1)));
3467 } while (0);
3468
3469 switch (GET_OPCODE(op)) {
3470 case SLJIT_ADD_F64:
3471 ins_r = (op & SLJIT_32) ? 0xb30a0000 /* aebr */ : 0xb31a0000 /* adbr */;
3472 ins = (op & SLJIT_32) ? 0xed000000000a /* aeb */ : 0xed000000001a /* adb */;
3473 break;
3474 case SLJIT_SUB_F64:
3475 ins_r = (op & SLJIT_32) ? 0xb30b0000 /* sebr */ : 0xb31b0000 /* sdbr */;
3476 ins = (op & SLJIT_32) ? 0xed000000000b /* seb */ : 0xed000000001b /* sdb */;
3477 break;
3478 case SLJIT_MUL_F64:
3479 ins_r = (op & SLJIT_32) ? 0xb3170000 /* meebr */ : 0xb31c0000 /* mdbr */;
3480 ins = (op & SLJIT_32) ? 0xed0000000017 /* meeb */ : 0xed000000001c /* mdb */;
3481 break;
3482 default:
3483 SLJIT_ASSERT(GET_OPCODE(op) == SLJIT_DIV_F64);
3484 ins_r = (op & SLJIT_32) ? 0xb30d0000 /* debr */ : 0xb31d0000 /* ddbr */;
3485 ins = (op & SLJIT_32) ? 0xed000000000d /* deb */ : 0xed000000001d /* ddb */;
3486 break;
3487 }
3488
3489 FAIL_IF(emit_float(compiler, ins_r, ins, dst_r, src2, src2w));
3490
3491 if (dst & SLJIT_MEM)
3492 return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw);
3493
3494 SLJIT_ASSERT(dst_r != TMP_FREG1);
3495 return SLJIT_SUCCESS;
3496 }
3497
sljit_emit_fop2r(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_freg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)3498 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op,
3499 sljit_s32 dst_freg,
3500 sljit_s32 src1, sljit_sw src1w,
3501 sljit_s32 src2, sljit_sw src2w)
3502 {
3503 sljit_s32 reg;
3504
3505 CHECK_ERROR();
3506 CHECK(check_sljit_emit_fop2r(compiler, op, dst_freg, src1, src1w, src2, src2w));
3507 ADJUST_LOCAL_OFFSET(src1, src1w);
3508 ADJUST_LOCAL_OFFSET(src2, src2w);
3509
3510 if (src2 & SLJIT_MEM) {
3511 FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src2, src2w));
3512 src2 = TMP_FREG1;
3513 }
3514
3515 if (src1 & SLJIT_MEM) {
3516 reg = (dst_freg == src2) ? TMP_FREG1 : dst_freg;
3517 FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), reg, src1, src1w));
3518 src1 = reg;
3519 }
3520
3521 return push_inst(compiler, 0xb3720000 /* cpsdr */ | F12(src2) | F4(dst_freg) | F0(src1));
3522 }
3523
sljit_emit_fset32(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f32 value)3524 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
3525 sljit_s32 freg, sljit_f32 value)
3526 {
3527 union {
3528 sljit_s32 imm;
3529 sljit_f32 value;
3530 } u;
3531
3532 CHECK_ERROR();
3533 CHECK(check_sljit_emit_fset32(compiler, freg, value));
3534
3535 u.value = value;
3536
3537 FAIL_IF(push_load_imm_inst(compiler, tmp1, (sljit_sw)(((sljit_uw)u.imm << 32))));
3538 return push_inst(compiler, 0xb3c10000 /* ldgr */ | F4(freg) | R0A(tmp1));
3539 }
3540
sljit_emit_fset64(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f64 value)3541 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
3542 sljit_s32 freg, sljit_f64 value)
3543 {
3544 union {
3545 sljit_sw imm;
3546 sljit_f64 value;
3547 } u;
3548
3549 CHECK_ERROR();
3550 CHECK(check_sljit_emit_fset64(compiler, freg, value));
3551
3552 u.value = value;
3553
3554 FAIL_IF(push_load_imm_inst(compiler, tmp1, (sljit_sw)u.imm));
3555 return push_inst(compiler, 0xb3c10000 /* ldgr */ | F4(freg) | R0A(tmp1));
3556 }
3557
sljit_emit_fcopy(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 freg,sljit_s32 reg)3558 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
3559 sljit_s32 freg, sljit_s32 reg)
3560 {
3561 sljit_gpr gen_r;
3562
3563 CHECK_ERROR();
3564 CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
3565
3566 gen_r = gpr(reg);
3567
3568 if (GET_OPCODE(op) == SLJIT_COPY_TO_F64) {
3569 if (op & SLJIT_32) {
3570 FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp0) | R32A(gen_r) | (32 << 16)));
3571 gen_r = tmp0;
3572 }
3573
3574 return push_inst(compiler, 0xb3c10000 /* ldgr */ | F4(freg) | R0A(gen_r));
3575 }
3576
3577 FAIL_IF(push_inst(compiler, 0xb3cd0000 /* lgdr */ | R4A(gen_r) | F0(freg)));
3578
3579 if (!(op & SLJIT_32))
3580 return SLJIT_SUCCESS;
3581
3582 return push_inst(compiler, 0xeb000000000c /* srlg */ | R36A(gen_r) | R32A(gen_r) | (32 << 16));
3583 }
3584
3585 /* --------------------------------------------------------------------- */
3586 /* Conditional instructions */
3587 /* --------------------------------------------------------------------- */
3588
sljit_emit_label(struct sljit_compiler * compiler)3589 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
3590 {
3591 struct sljit_label *label;
3592
3593 CHECK_ERROR_PTR();
3594 CHECK_PTR(check_sljit_emit_label(compiler));
3595
3596 if (compiler->last_label && compiler->last_label->size == compiler->size)
3597 return compiler->last_label;
3598
3599 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
3600 PTR_FAIL_IF(!label);
3601 set_label(label, compiler);
3602 return label;
3603 }
3604
sljit_emit_jump(struct sljit_compiler * compiler,sljit_s32 type)3605 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
3606 {
3607 struct sljit_jump *jump;
3608 sljit_u8 mask = ((type & 0xff) < SLJIT_JUMP) ? get_cc(compiler, type & 0xff) : 0xf;
3609
3610 CHECK_ERROR_PTR();
3611 CHECK_PTR(check_sljit_emit_jump(compiler, type));
3612
3613 /* record jump */
3614 jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump));
3615 PTR_FAIL_IF(!jump);
3616 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
3617 jump->addr = compiler->size;
3618
3619 /* emit jump instruction */
3620 type &= 0xff;
3621 if (type >= SLJIT_FAST_CALL)
3622 PTR_FAIL_IF(push_inst(compiler, brasl(link_r, 0)));
3623 else
3624 PTR_FAIL_IF(push_inst(compiler, brcl(mask, 0)));
3625
3626 return jump;
3627 }
3628
sljit_emit_call(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types)3629 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
3630 sljit_s32 arg_types)
3631 {
3632 SLJIT_UNUSED_ARG(arg_types);
3633 CHECK_ERROR_PTR();
3634 CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
3635
3636 if (type & SLJIT_CALL_RETURN) {
3637 PTR_FAIL_IF(emit_stack_frame_release(compiler, r14));
3638 type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
3639 }
3640
3641 SLJIT_SKIP_CHECKS(compiler);
3642 return sljit_emit_jump(compiler, type);
3643 }
3644
sljit_emit_ijump(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 src,sljit_sw srcw)3645 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
3646 {
3647 sljit_gpr src_r = FAST_IS_REG(src) ? gpr(src) : tmp1;
3648
3649 CHECK_ERROR();
3650 CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
3651
3652 if (src == SLJIT_IMM) {
3653 SLJIT_ASSERT(!(srcw & 1)); /* target address must be even */
3654 FAIL_IF(push_load_imm_inst(compiler, src_r, srcw));
3655 }
3656 else if (src & SLJIT_MEM) {
3657 ADJUST_LOCAL_OFFSET(src, srcw);
3658 FAIL_IF(load_word(compiler, src_r, src, srcw, 0 /* 64-bit */));
3659 }
3660
3661 /* emit jump instruction */
3662 if (type >= SLJIT_FAST_CALL)
3663 return push_inst(compiler, basr(link_r, src_r));
3664
3665 return push_inst(compiler, br(src_r));
3666 }
3667
sljit_emit_icall(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)3668 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
3669 sljit_s32 arg_types,
3670 sljit_s32 src, sljit_sw srcw)
3671 {
3672 SLJIT_UNUSED_ARG(arg_types);
3673
3674 CHECK_ERROR();
3675 CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
3676
3677 SLJIT_ASSERT(gpr(TMP_REG2) == tmp1);
3678
3679 if (src & SLJIT_MEM) {
3680 ADJUST_LOCAL_OFFSET(src, srcw);
3681 FAIL_IF(load_word(compiler, tmp1, src, srcw, 0 /* 64-bit */));
3682 src = TMP_REG2;
3683 srcw = 0;
3684 }
3685
3686 if (type & SLJIT_CALL_RETURN) {
3687 if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
3688 FAIL_IF(push_inst(compiler, lgr(tmp1, gpr(src))));
3689 src = TMP_REG2;
3690 srcw = 0;
3691 }
3692
3693 FAIL_IF(emit_stack_frame_release(compiler, r14));
3694 type = SLJIT_JUMP;
3695 }
3696
3697 SLJIT_SKIP_CHECKS(compiler);
3698 return sljit_emit_ijump(compiler, type, src, srcw);
3699 }
3700
sljit_emit_op_flags(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 type)3701 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
3702 sljit_s32 dst, sljit_sw dstw,
3703 sljit_s32 type)
3704 {
3705 sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
3706 sljit_gpr loc_r = tmp1;
3707 sljit_u8 mask = get_cc(compiler, type);
3708
3709 CHECK_ERROR();
3710 CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
3711
3712 switch (GET_OPCODE(op)) {
3713 case SLJIT_AND:
3714 case SLJIT_OR:
3715 case SLJIT_XOR:
3716 compiler->status_flags_state = op & SLJIT_SET_Z;
3717
3718 /* dst is also source operand */
3719 if (dst & SLJIT_MEM)
3720 FAIL_IF(load_word(compiler, dst_r, dst, dstw, op & SLJIT_32));
3721
3722 break;
3723 case SLJIT_MOV32:
3724 op |= SLJIT_32;
3725 /* fallthrough */
3726 case SLJIT_MOV:
3727 /* can write straight into destination */
3728 loc_r = dst_r;
3729 break;
3730 default:
3731 SLJIT_UNREACHABLE();
3732 }
3733
3734 /* TODO(mundaym): fold into cmov helper function? */
3735 #define LEVAL(i) i(loc_r, 1, mask)
3736 if (have_lscond2()) {
3737 FAIL_IF(push_load_imm_inst(compiler, loc_r, 0));
3738 FAIL_IF(push_inst(compiler,
3739 WHEN2(op & SLJIT_32, lochi, locghi)));
3740 } else {
3741 /* TODO(mundaym): no load/store-on-condition 2 facility (ipm? branch-and-set?) */
3742 abort();
3743 }
3744 #undef LEVAL
3745
3746 /* apply bitwise op and set condition codes */
3747 switch (GET_OPCODE(op)) {
3748 #define LEVAL(i) i(dst_r, loc_r)
3749 case SLJIT_AND:
3750 FAIL_IF(push_inst(compiler,
3751 WHEN2(op & SLJIT_32, nr, ngr)));
3752 break;
3753 case SLJIT_OR:
3754 FAIL_IF(push_inst(compiler,
3755 WHEN2(op & SLJIT_32, or, ogr)));
3756 break;
3757 case SLJIT_XOR:
3758 FAIL_IF(push_inst(compiler,
3759 WHEN2(op & SLJIT_32, xr, xgr)));
3760 break;
3761 #undef LEVAL
3762 }
3763
3764 /* store result to memory if required */
3765 if (dst & SLJIT_MEM)
3766 return store_word(compiler, dst_r, dst, dstw, (op & SLJIT_32));
3767
3768 return SLJIT_SUCCESS;
3769 }
3770
sljit_emit_select(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_reg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2_reg)3771 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
3772 sljit_s32 dst_reg,
3773 sljit_s32 src1, sljit_sw src1w,
3774 sljit_s32 src2_reg)
3775 {
3776 sljit_ins mask;
3777 sljit_gpr src_r;
3778 sljit_gpr dst_r = gpr(dst_reg);
3779 sljit_ins ins;
3780
3781 CHECK_ERROR();
3782 CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
3783
3784 ADJUST_LOCAL_OFFSET(src1, src1w);
3785
3786 if (dst_reg != src2_reg) {
3787 if (src1 == dst_reg) {
3788 src1 = src2_reg;
3789 src1w = 0;
3790 type ^= 0x1;
3791 } else {
3792 if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) {
3793 FAIL_IF(load_word(compiler, dst_r, src1, src1w, type & SLJIT_32));
3794 src1 = src2_reg;
3795 src1w = 0;
3796 type ^= 0x1;
3797 } else
3798 FAIL_IF(push_inst(compiler, ((type & SLJIT_32) ? 0x1800 /* lr */ : 0xb9040000 /* lgr */) | R4A(dst_r) | R0A(gpr(src2_reg))));
3799 }
3800 }
3801
3802 mask = get_cc(compiler, type & ~SLJIT_32);
3803
3804 if (src1 & SLJIT_MEM) {
3805 if (src1 & OFFS_REG_MASK) {
3806 src_r = gpr(OFFS_REG(src1));
3807
3808 if (src1w != 0) {
3809 FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp1) | R32A(src_r) | ((sljit_ins)(src1w & 0x3) << 16)));
3810 src_r = tmp1;
3811 }
3812
3813 FAIL_IF(push_inst(compiler, 0xb9e80000 /* agrk */ | R12A(src_r) | R4A(tmp1) | R0A(gpr(src1 & REG_MASK))));
3814 src_r = tmp1;
3815 src1w = 0;
3816 } else if (!is_s20(src1w)) {
3817 FAIL_IF(push_load_imm_inst(compiler, tmp1, src1w));
3818
3819 if (src1 & REG_MASK)
3820 FAIL_IF(push_inst(compiler, 0xb9e80000 /* agrk */ | R12A(tmp1) | R4A(tmp1) | R0A(gpr(src1 & REG_MASK))));
3821
3822 src_r = tmp1;
3823 src1w = 0;
3824 } else
3825 src_r = gpr(src1 & REG_MASK);
3826
3827 ins = (type & SLJIT_32) ? 0xeb00000000f2 /* loc */ : 0xeb00000000e2 /* locg */;
3828 return push_inst(compiler, ins | R36A(dst_r) | (mask << 32) | R28A(src_r) | disp_s20((sljit_s32)src1w));
3829 }
3830
3831 if (src1 == SLJIT_IMM) {
3832 if (type & SLJIT_32)
3833 src1w = (sljit_s32)src1w;
3834
3835 if (have_lscond2() && is_s16(src1w)) {
3836 ins = (type & SLJIT_32) ? 0xec0000000042 /* lochi */ : 0xec0000000046 /* locghi */;
3837 return push_inst(compiler, ins | R36A(dst_r) | (mask << 32) | (sljit_ins)(src1w & 0xffff) << 16);
3838 }
3839
3840 FAIL_IF(push_load_imm_inst(compiler, tmp0, src1w));
3841 src_r = tmp0;
3842 } else
3843 src_r = gpr(src1);
3844
3845 ins = (type & SLJIT_32) ? 0xb9f20000 /* locr */ : 0xb9e20000 /* locgr */;
3846 return push_inst(compiler, ins | (mask << 12) | R4A(dst_r) | R0A(src_r));
3847 }
3848
sljit_emit_fselect(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_freg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2_freg)3849 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
3850 sljit_s32 dst_freg,
3851 sljit_s32 src1, sljit_sw src1w,
3852 sljit_s32 src2_freg)
3853 {
3854 sljit_ins ins;
3855 struct sljit_label *label;
3856 struct sljit_jump *jump;
3857
3858 CHECK_ERROR();
3859 CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
3860
3861 ADJUST_LOCAL_OFFSET(src1, src1w);
3862
3863 if (dst_freg != src2_freg) {
3864 if (dst_freg == src1) {
3865 src1 = src2_freg;
3866 src1w = 0;
3867 type ^= 0x1;
3868 } else {
3869 ins = (type & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */;
3870 FAIL_IF(push_inst(compiler, ins | F4(dst_freg) | F0(src2_freg)));
3871 }
3872 }
3873
3874 SLJIT_SKIP_CHECKS(compiler);
3875 jump = sljit_emit_jump(compiler, (type & ~SLJIT_32) ^ 0x1);
3876 FAIL_IF(!jump);
3877
3878 if (!(src1 & SLJIT_MEM)) {
3879 ins = (type & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */;
3880 FAIL_IF(push_inst(compiler, ins | F4(dst_freg) | F0(src1)));
3881 } else
3882 FAIL_IF(float_mem(compiler, FLOAT_LOAD | (type & SLJIT_32), dst_freg, src1, src1w));
3883
3884 SLJIT_SKIP_CHECKS(compiler);
3885 label = sljit_emit_label(compiler);
3886 FAIL_IF(!label);
3887
3888 sljit_set_label(jump, label);
3889 return SLJIT_SUCCESS;
3890 }
3891
sljit_emit_mem(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)3892 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
3893 sljit_s32 reg,
3894 sljit_s32 mem, sljit_sw memw)
3895 {
3896 sljit_ins ins, reg1, reg2, base, offs = 0;
3897
3898 CHECK_ERROR();
3899 CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
3900
3901 if (!(reg & REG_PAIR_MASK))
3902 return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
3903
3904 ADJUST_LOCAL_OFFSET(mem, memw);
3905
3906 base = gpr(mem & REG_MASK);
3907 reg1 = gpr(REG_PAIR_FIRST(reg));
3908 reg2 = gpr(REG_PAIR_SECOND(reg));
3909
3910 if (mem & OFFS_REG_MASK) {
3911 memw &= 0x3;
3912 offs = gpr(OFFS_REG(mem));
3913
3914 if (memw != 0) {
3915 FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp1) | R32A(offs) | ((sljit_ins)memw << 16)));
3916 offs = tmp1;
3917 } else if (!(type & SLJIT_MEM_STORE) && (base == reg1 || base == reg2) && (offs == reg1 || offs == reg2)) {
3918 FAIL_IF(push_inst(compiler, 0xb9f80000 | R12A(tmp1) | R4A(base) | R0A(offs)));
3919 base = tmp1;
3920 offs = 0;
3921 }
3922
3923 memw = 0;
3924 } else if (memw < -0x80000 || memw > 0x7ffff - ((reg2 == reg1 + 1) ? 0 : SSIZE_OF(sw))) {
3925 FAIL_IF(push_load_imm_inst(compiler, tmp1, memw));
3926
3927 if (base == 0)
3928 base = tmp1;
3929 else
3930 offs = tmp1;
3931
3932 memw = 0;
3933 }
3934
3935 if (offs == 0 && reg2 == (reg1 + 1)) {
3936 ins = (type & SLJIT_MEM_STORE) ? 0xeb0000000024 /* stmg */ : 0xeb0000000004 /* lmg */;
3937 return push_inst(compiler, ins | R36A(reg1) | R32A(reg2) | R28A(base) | disp_s20((sljit_s32)memw));
3938 }
3939
3940 ins = ((type & SLJIT_MEM_STORE) ? 0xe30000000024 /* stg */ : 0xe30000000004 /* lg */) | R32A(offs) | R28A(base);
3941
3942 if (!(type & SLJIT_MEM_STORE) && base == reg1) {
3943 FAIL_IF(push_inst(compiler, ins | R36A(reg2) | disp_s20((sljit_s32)memw + SSIZE_OF(sw))));
3944 return push_inst(compiler, ins | R36A(reg1) | disp_s20((sljit_s32)memw));
3945 }
3946
3947 FAIL_IF(push_inst(compiler, ins | R36A(reg1) | disp_s20((sljit_s32)memw)));
3948 return push_inst(compiler, ins | R36A(reg2) | disp_s20((sljit_s32)memw + SSIZE_OF(sw)));
3949 }
3950
sljit_emit_simd_mov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 srcdst,sljit_sw srcdstw)3951 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
3952 sljit_s32 freg,
3953 sljit_s32 srcdst, sljit_sw srcdstw)
3954 {
3955 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3956 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3957 sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type);
3958 struct addr addr;
3959 sljit_ins ins;
3960
3961 CHECK_ERROR();
3962 CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw));
3963
3964 ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3965
3966 if (reg_size != 4)
3967 return SLJIT_ERR_UNSUPPORTED;
3968
3969 if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3970 return SLJIT_ERR_UNSUPPORTED;
3971
3972 if (type & SLJIT_SIMD_TEST)
3973 return SLJIT_SUCCESS;
3974
3975 if (!(srcdst & SLJIT_MEM)) {
3976 if (type & SLJIT_SIMD_STORE)
3977 ins = F36(srcdst) | F32(freg);
3978 else
3979 ins = F36(freg) | F32(srcdst);
3980
3981 return push_inst(compiler, 0xe70000000056 /* vlr */ | ins);
3982 }
3983
3984 FAIL_IF(make_addr_bx(compiler, &addr, srcdst, srcdstw, tmp1));
3985 ins = F36(freg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset);
3986
3987 if (alignment >= 4)
3988 ins |= 4 << 12;
3989 else if (alignment == 3)
3990 ins |= 3 << 12;
3991
3992 return push_inst(compiler, ((type & SLJIT_SIMD_STORE) ? 0xe7000000000e /* vst */ : 0xe70000000006 /* vl */) | ins);
3993 }
3994
sljit_emit_simd_replicate(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_sw srcw)3995 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
3996 sljit_s32 freg,
3997 sljit_s32 src, sljit_sw srcw)
3998 {
3999 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4000 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4001 struct addr addr;
4002 sljit_gpr reg;
4003 sljit_sw sign_ext;
4004
4005 CHECK_ERROR();
4006 CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw));
4007
4008 ADJUST_LOCAL_OFFSET(src, srcw);
4009
4010 if (reg_size != 4)
4011 return SLJIT_ERR_UNSUPPORTED;
4012
4013 if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
4014 return SLJIT_ERR_UNSUPPORTED;
4015
4016 if (type & SLJIT_SIMD_TEST)
4017 return SLJIT_SUCCESS;
4018
4019 if (src & SLJIT_MEM) {
4020 FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));
4021 return push_inst(compiler, 0xe70000000005 /* vlrep */ | F36(freg)
4022 | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset) | ((sljit_ins)elem_size << 12));
4023 }
4024
4025 if (type & SLJIT_SIMD_FLOAT) {
4026 if (src == SLJIT_IMM)
4027 return push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(freg));
4028
4029 return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(freg) | F32(src) | ((sljit_ins)elem_size << 12));
4030 }
4031
4032 if (src == SLJIT_IMM) {
4033 sign_ext = 0x10000;
4034
4035 switch (elem_size) {
4036 case 0:
4037 srcw &= 0xff;
4038 sign_ext = (sljit_s8)srcw;
4039 break;
4040 case 1:
4041 srcw &= 0xffff;
4042 sign_ext = (sljit_s16)srcw;
4043 break;
4044 case 2:
4045 if ((sljit_s32)srcw == (sljit_s16)srcw) {
4046 srcw &= 0xffff;
4047 sign_ext = (sljit_s16)srcw;
4048 } else
4049 srcw &= 0xffffffff;
4050 break;
4051 default:
4052 if (srcw == (sljit_s16)srcw) {
4053 srcw &= 0xffff;
4054 sign_ext = (sljit_s16)srcw;
4055 }
4056 break;
4057 }
4058
4059 if (sign_ext != 0x10000) {
4060 if (sign_ext == 0 || sign_ext == -1)
4061 return push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(freg)
4062 | (sign_ext == 0 ? 0 : ((sljit_ins)0xffff << 16)));
4063
4064 return push_inst(compiler, 0xe70000000045 /* vrepi */ | F36(freg)
4065 | ((sljit_ins)srcw << 16) | ((sljit_ins)elem_size << 12));
4066 }
4067
4068 push_load_imm_inst(compiler, tmp0, srcw);
4069 reg = tmp0;
4070 } else
4071 reg = gpr(src);
4072
4073 FAIL_IF(push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(freg) | R32A(reg) | ((sljit_ins)elem_size << 12)));
4074 return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(freg) | F32(freg) | ((sljit_ins)elem_size << 12));
4075 }
4076
sljit_emit_simd_lane_mov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 lane_index,sljit_s32 srcdst,sljit_sw srcdstw)4077 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
4078 sljit_s32 freg, sljit_s32 lane_index,
4079 sljit_s32 srcdst, sljit_sw srcdstw)
4080 {
4081 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4082 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4083 struct addr addr;
4084 sljit_gpr reg;
4085 sljit_ins ins = 0;
4086
4087 CHECK_ERROR();
4088 CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw));
4089
4090 ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
4091
4092 if (reg_size != 4)
4093 return SLJIT_ERR_UNSUPPORTED;
4094
4095 if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
4096 return SLJIT_ERR_UNSUPPORTED;
4097
4098 if (type & SLJIT_SIMD_TEST)
4099 return SLJIT_SUCCESS;
4100
4101 if (srcdst & SLJIT_MEM) {
4102 FAIL_IF(make_addr_bx(compiler, &addr, srcdst, srcdstw, tmp1));
4103 ins = F36(freg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset);
4104 }
4105
4106 if (type & SLJIT_SIMD_LANE_ZERO) {
4107 if ((srcdst & SLJIT_MEM) && lane_index == ((1 << (3 - elem_size)) - 1))
4108 return push_inst(compiler, 0xe70000000004 /* vllez */ | ins | ((sljit_ins)elem_size << 12));
4109
4110 if ((type & SLJIT_SIMD_FLOAT) && freg == srcdst) {
4111 FAIL_IF(push_inst(compiler, 0xe70000000056 /* vlr */ | F36(TMP_FREG1) | F32(freg)));
4112 srcdst = TMP_FREG1;
4113 srcdstw = 0;
4114 }
4115
4116 FAIL_IF(push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(freg)));
4117 }
4118
4119 if (srcdst & SLJIT_MEM) {
4120 switch (elem_size) {
4121 case 0:
4122 ins |= 0xe70000000000 /* vleb */;
4123 break;
4124 case 1:
4125 ins |= 0xe70000000001 /* vleh */;
4126 break;
4127 case 2:
4128 ins |= 0xe70000000003 /* vlef */;
4129 break;
4130 default:
4131 ins |= 0xe70000000002 /* vleg */;
4132 break;
4133 }
4134
4135 /* Convert to vsteb - vsteg */
4136 if (type & SLJIT_SIMD_STORE)
4137 ins |= 0x8;
4138
4139 return push_inst(compiler, ins | ((sljit_ins)lane_index << 12));
4140 }
4141
4142 if (type & SLJIT_SIMD_FLOAT) {
4143 if (type & SLJIT_SIMD_STORE)
4144 return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(srcdst) | F32(freg) | ((sljit_ins)lane_index << 16) | ((sljit_ins)elem_size << 12));
4145
4146 if (elem_size == 3) {
4147 if (lane_index == 0)
4148 ins = F32(srcdst) | F28(freg) | (1 << 12);
4149 else
4150 ins = F32(freg) | F28(srcdst);
4151
4152 return push_inst(compiler, 0xe70000000084 /* vpdi */ | F36(freg) | ins);
4153 }
4154
4155 FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(tmp0) | F32(srcdst) | ((sljit_ins)2 << 12)));
4156 return push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(freg) | R32A(tmp0) | ((sljit_ins)lane_index << 16) | ((sljit_ins)2 << 12));
4157 }
4158
4159 if (srcdst == SLJIT_IMM) {
4160 switch (elem_size) {
4161 case 0:
4162 ins = 0xe70000000040 /* vleib */;
4163 srcdstw &= 0xff;
4164 break;
4165 case 1:
4166 ins = 0xe70000000041 /* vleih */;
4167 srcdstw &= 0xffff;
4168 break;
4169 case 2:
4170 if ((sljit_s32)srcdstw == (sljit_s16)srcdstw) {
4171 srcdstw &= 0xffff;
4172 ins = 0xe70000000043 /* vleif */;
4173 } else
4174 srcdstw &= 0xffffffff;
4175 break;
4176 default:
4177 if (srcdstw == (sljit_s16)srcdstw) {
4178 srcdstw &= 0xffff;
4179 ins = 0xe70000000042 /* vleig */;
4180 }
4181 break;
4182 }
4183
4184 if (ins != 0)
4185 return push_inst(compiler, ins | F36(freg) | ((sljit_ins)srcdstw << 16) | ((sljit_ins)lane_index << 12));
4186
4187 push_load_imm_inst(compiler, tmp0, srcdstw);
4188 reg = tmp0;
4189 } else
4190 reg = gpr(srcdst);
4191
4192 ins = ((sljit_ins)lane_index << 16) | ((sljit_ins)elem_size << 12);
4193
4194 if (!(type & SLJIT_SIMD_STORE))
4195 return push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(freg) | R32A(reg) | ins);
4196
4197 FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(reg) | F32(freg) | ins));
4198
4199 if (!(type & SLJIT_SIMD_LANE_SIGNED) || elem_size >= 3)
4200 return SLJIT_SUCCESS;
4201
4202 switch (elem_size) {
4203 case 0:
4204 ins = 0xb9060000 /* lgbr */;
4205 break;
4206 case 1:
4207 ins = 0xb9070000 /* lghr */;
4208 break;
4209 default:
4210 ins = 0xb9140000 /* lgfr */;
4211 break;
4212 }
4213
4214 return push_inst(compiler, ins | R4A(reg) | R0A(reg));
4215 }
4216
sljit_emit_simd_lane_replicate(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_s32 src_lane_index)4217 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
4218 sljit_s32 freg,
4219 sljit_s32 src, sljit_s32 src_lane_index)
4220 {
4221 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4222 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4223
4224 CHECK_ERROR();
4225 CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index));
4226
4227 if (reg_size != 4)
4228 return SLJIT_ERR_UNSUPPORTED;
4229
4230 if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
4231 return SLJIT_ERR_UNSUPPORTED;
4232
4233 if (type & SLJIT_SIMD_TEST)
4234 return SLJIT_SUCCESS;
4235
4236 return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(freg) | F32(src)
4237 | ((sljit_ins)src_lane_index << 16) | ((sljit_ins)elem_size << 12));
4238 }
4239
sljit_emit_simd_extend(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_sw srcw)4240 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
4241 sljit_s32 freg,
4242 sljit_s32 src, sljit_sw srcw)
4243 {
4244 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4245 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4246 sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
4247 struct addr addr;
4248 sljit_ins ins;
4249
4250 CHECK_ERROR();
4251 CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw));
4252
4253 ADJUST_LOCAL_OFFSET(src, srcw);
4254
4255 if (reg_size != 4)
4256 return SLJIT_ERR_UNSUPPORTED;
4257
4258 if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
4259 return SLJIT_ERR_UNSUPPORTED;
4260
4261 if (type & SLJIT_SIMD_TEST)
4262 return SLJIT_SUCCESS;
4263
4264 if (src & SLJIT_MEM) {
4265 FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));
4266 ins = F36(freg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset);
4267
4268 switch (elem2_size - elem_size) {
4269 case 1:
4270 ins |= 0xe70000000002 /* vleg */;
4271 break;
4272 case 2:
4273 ins |= 0xe70000000003 /* vlef */;
4274 break;
4275 default:
4276 ins |= 0xe70000000001 /* vleh */;
4277 break;
4278 }
4279
4280 FAIL_IF(push_inst(compiler, ins));
4281 src = freg;
4282 }
4283
4284 if (type & SLJIT_SIMD_FLOAT) {
4285 FAIL_IF(push_inst(compiler, 0xe700000000d5 /* vuplh */ | F36(freg) | F32(src) | (2 << 12)));
4286 FAIL_IF(push_inst(compiler, 0xe70000000030 /* vesl */ | F36(freg) | F32(freg) | (32 << 16) | (3 << 12)));
4287 return push_inst(compiler, 0xe700000000c4 /* vfll */ | F36(freg) | F32(freg) | (2 << 12));
4288 }
4289
4290 ins = ((type & SLJIT_SIMD_EXTEND_SIGNED) ? 0xe700000000d7 /* vuph */ : 0xe700000000d5 /* vuplh */) | F36(freg);
4291
4292 do {
4293 FAIL_IF(push_inst(compiler, ins | F32(src) | ((sljit_ins)elem_size << 12)));
4294 src = freg;
4295 } while (++elem_size < elem2_size);
4296
4297 return SLJIT_SUCCESS;
4298 }
4299
sljit_emit_simd_sign(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 dst,sljit_sw dstw)4300 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
4301 sljit_s32 freg,
4302 sljit_s32 dst, sljit_sw dstw)
4303 {
4304 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4305 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4306 sljit_gpr dst_r;
4307
4308 CHECK_ERROR();
4309 CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw));
4310
4311 ADJUST_LOCAL_OFFSET(dst, dstw);
4312
4313 if (reg_size != 4)
4314 return SLJIT_ERR_UNSUPPORTED;
4315
4316 if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
4317 return SLJIT_ERR_UNSUPPORTED;
4318
4319 if (type & SLJIT_SIMD_TEST)
4320 return SLJIT_SUCCESS;
4321
4322 switch (elem_size) {
4323 case 0:
4324 push_load_imm_inst(compiler, tmp0, (sljit_sw)0x4048505860687078);
4325 push_load_imm_inst(compiler, tmp1, (sljit_sw)0x0008101820283038);
4326 FAIL_IF(push_inst(compiler, 0xe70000000062 /* vlvgp */ | F36(TMP_FREG1) | R32A(tmp1) | R28A(tmp0)));
4327 break;
4328 case 1:
4329 push_load_imm_inst(compiler, tmp0, (sljit_sw)0x0010203040506070);
4330 break;
4331 case 2:
4332 push_load_imm_inst(compiler, tmp0, (sljit_sw)0x8080808000204060);
4333 break;
4334 default:
4335 push_load_imm_inst(compiler, tmp0, (sljit_sw)0x8080808080800040);
4336 break;
4337 }
4338
4339 if (elem_size != 0)
4340 FAIL_IF(push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(TMP_FREG1) | R32A(tmp0) | (1 << 16) | (3 << 12)));
4341
4342 FAIL_IF(push_inst(compiler, 0xe70000000085 /* vbperm */ | F36(TMP_FREG1) | F32(freg) | F28(TMP_FREG1)));
4343
4344 dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
4345 FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(dst_r) | F32(TMP_FREG1)
4346 | (elem_size == 0 ? ((3 << 16) | (1 << 12)) : (7 << 16))));
4347
4348 if (dst_r == tmp0)
4349 return store_word(compiler, tmp0, dst, dstw, type & SLJIT_32);
4350
4351 return SLJIT_SUCCESS;
4352 }
4353
sljit_emit_simd_op2(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_freg,sljit_s32 src1_freg,sljit_s32 src2_freg)4354 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
4355 sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg)
4356 {
4357 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4358 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4359 sljit_ins ins = 0;
4360
4361 CHECK_ERROR();
4362 CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg));
4363
4364 if (reg_size != 4)
4365 return SLJIT_ERR_UNSUPPORTED;
4366
4367 if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4368 return SLJIT_ERR_UNSUPPORTED;
4369
4370 if (type & SLJIT_SIMD_TEST)
4371 return SLJIT_SUCCESS;
4372
4373 switch (SLJIT_SIMD_GET_OPCODE(type)) {
4374 case SLJIT_SIMD_OP2_AND:
4375 ins = 0xe70000000068 /* vn */;
4376 break;
4377 case SLJIT_SIMD_OP2_OR:
4378 ins = 0xe7000000006a /* vo */;
4379 break;
4380 case SLJIT_SIMD_OP2_XOR:
4381 ins = 0xe7000000006d /* vx */;
4382 break;
4383 }
4384
4385 if (type & SLJIT_SIMD_TEST)
4386 return SLJIT_SUCCESS;
4387
4388 return push_inst(compiler, ins | F36(dst_freg) | F32(src1_freg) | F28(src2_freg));
4389 }
4390
sljit_emit_atomic_load(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 mem_reg)4391 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
4392 sljit_s32 dst_reg,
4393 sljit_s32 mem_reg)
4394 {
4395 CHECK_ERROR();
4396 CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
4397
4398 SLJIT_SKIP_CHECKS(compiler);
4399 return sljit_emit_op1(compiler, op, dst_reg, 0, SLJIT_MEM1(mem_reg), 0);
4400 }
4401
sljit_emit_atomic_store(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src_reg,sljit_s32 mem_reg,sljit_s32 temp_reg)4402 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op,
4403 sljit_s32 src_reg,
4404 sljit_s32 mem_reg,
4405 sljit_s32 temp_reg)
4406 {
4407 sljit_ins mask;
4408 sljit_gpr tmp_r = gpr(temp_reg);
4409 sljit_gpr mem_r = gpr(mem_reg);
4410
4411 CHECK_ERROR();
4412 CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
4413
4414 switch (GET_OPCODE(op)) {
4415 case SLJIT_MOV32:
4416 case SLJIT_MOV_U32:
4417 return push_inst(compiler, 0xba000000 /* cs */ | R20A(tmp_r) | R16A(gpr(src_reg)) | R12A(mem_r));
4418 case SLJIT_MOV_U8:
4419 mask = 0xff;
4420 break;
4421 case SLJIT_MOV_U16:
4422 mask = 0xffff;
4423 break;
4424 default:
4425 return push_inst(compiler, 0xeb0000000030 /* csg */ | R36A(tmp_r) | R32A(gpr(src_reg)) | R28A(mem_r));
4426 }
4427
4428 /* tmp0 = (src_reg ^ tmp_r) & mask */
4429 FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | mask));
4430 FAIL_IF(push_inst(compiler, 0xb9e70000 /* xgrk */ | R4A(tmp0) | R0A(gpr(src_reg)) | R12A(tmp_r)));
4431 FAIL_IF(push_inst(compiler, 0xa7090000 /* lghi */ | R20A(tmp_r) | 0xfffc));
4432 FAIL_IF(push_inst(compiler, 0xb9800000 /* ngr */ | R4A(tmp0) | R0A(tmp1)));
4433
4434 /* tmp0 = tmp0 << (((mem_r ^ 0x3) & 0x3) << 3) */
4435 FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | (sljit_ins)((mask == 0xff) ? 0x18 : 0x10)));
4436 FAIL_IF(push_inst(compiler, 0xb9800000 /* ngr */ | R4A(tmp_r) | R0A(mem_r)));
4437 FAIL_IF(push_inst(compiler, 0xec0000000057 /* rxsbg */ | R36A(tmp1) | R32A(mem_r) | (59 << 24) | (60 << 16) | (3 << 8)));
4438 FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp0) | R32A(tmp0) | R28A(tmp1)));
4439
4440 /* Already computed: tmp_r = mem_r & ~0x3 */
4441
4442 FAIL_IF(push_inst(compiler, 0x58000000 /* l */ | R20A(tmp1) | R12A(tmp_r)));
4443 FAIL_IF(push_inst(compiler, 0x1700 /* x */ | R4A(tmp0) | R0A(tmp1)));
4444 return push_inst(compiler, 0xba000000 /* cs */ | R20A(tmp1) | R16A(tmp0) | R12A(tmp_r));
4445 }
4446
4447 /* --------------------------------------------------------------------- */
4448 /* Other instructions */
4449 /* --------------------------------------------------------------------- */
4450
4451 /* On s390x we build a literal pool to hold constants. This has two main
4452 advantages:
4453
4454 1. we only need one instruction in the instruction stream (LGRL)
4455 2. we can store 64 bit addresses and use 32 bit offsets
4456
4457 To retrofit the extra information needed to build the literal pool we
4458 add a new sljit_s390x_const struct that contains the initial value but
4459 can still be cast to a sljit_const. */
4460
sljit_emit_const(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_sw init_value)4461 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
4462 {
4463 struct sljit_s390x_const *const_;
4464 sljit_gpr dst_r;
4465
4466 CHECK_ERROR_PTR();
4467 CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
4468
4469 const_ = (struct sljit_s390x_const*)ensure_abuf(compiler,
4470 sizeof(struct sljit_s390x_const));
4471 PTR_FAIL_IF(!const_);
4472 set_const((struct sljit_const*)const_, compiler);
4473 const_->init_value = init_value;
4474
4475 dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
4476 if (have_genext())
4477 PTR_FAIL_IF(push_inst(compiler, sljit_ins_const | lgrl(dst_r, 0)));
4478 else {
4479 PTR_FAIL_IF(push_inst(compiler, sljit_ins_const | larl(tmp1, 0)));
4480 PTR_FAIL_IF(push_inst(compiler, lg(dst_r, 0, r0, tmp1)));
4481 }
4482
4483 if (dst & SLJIT_MEM)
4484 PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, 0 /* always 64-bit */));
4485
4486 return (struct sljit_const*)const_;
4487 }
4488
sljit_set_jump_addr(sljit_uw addr,sljit_uw new_target,sljit_sw executable_offset)4489 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
4490 {
4491 /* Update the constant pool. */
4492 sljit_uw *ptr = (sljit_uw *)addr;
4493 SLJIT_UNUSED_ARG(executable_offset);
4494
4495 SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 0);
4496 *ptr = new_target;
4497 SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1);
4498 SLJIT_CACHE_FLUSH(ptr, ptr + 1);
4499 }
4500
sljit_set_const(sljit_uw addr,sljit_sw new_constant,sljit_sw executable_offset)4501 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
4502 {
4503 sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
4504 }
4505
sljit_emit_put_label(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)4506 SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label *sljit_emit_put_label(
4507 struct sljit_compiler *compiler,
4508 sljit_s32 dst, sljit_sw dstw)
4509 {
4510 struct sljit_put_label *put_label;
4511 sljit_gpr dst_r;
4512
4513 CHECK_ERROR_PTR();
4514 CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
4515 ADJUST_LOCAL_OFFSET(dst, dstw);
4516
4517 put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
4518 PTR_FAIL_IF(!put_label);
4519 set_put_label(put_label, compiler, 0);
4520
4521 dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
4522
4523 if (have_genext())
4524 PTR_FAIL_IF(push_inst(compiler, lgrl(dst_r, 0)));
4525 else {
4526 PTR_FAIL_IF(push_inst(compiler, larl(tmp1, 0)));
4527 PTR_FAIL_IF(push_inst(compiler, lg(dst_r, 0, r0, tmp1)));
4528 }
4529
4530 if (dst & SLJIT_MEM)
4531 PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, 0));
4532
4533 return put_label;
4534 }
4535
4536 /* TODO(carenas): EVAL probably should move up or be refactored */
4537 #undef WHEN2
4538 #undef EVAL
4539
4540 #undef tmp1
4541 #undef tmp0
4542
4543 /* TODO(carenas): undef other macros that spill like is_u12? */
4544