1 /*
2 * Stack-less Just-In-Time compiler
3 *
4 * Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without modification, are
7 * permitted provided that the following conditions are met:
8 *
9 * 1. Redistributions of source code must retain the above copyright notice, this list of
10 * conditions and the following disclaimer.
11 *
12 * 2. Redistributions in binary form must reproduce the above copyright notice, this list
13 * of conditions and the following disclaimer in the documentation and/or other materials
14 * provided with the distribution.
15 *
16 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19 * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22 * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24 * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 */
26
27 #include <sys/auxv.h>
28
29 #ifdef __ARCH__
30 #define ENABLE_STATIC_FACILITY_DETECTION 1
31 #else
32 #define ENABLE_STATIC_FACILITY_DETECTION 0
33 #endif
34 #define ENABLE_DYNAMIC_FACILITY_DETECTION 1
35
sljit_get_platform_name(void)36 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
37 {
38 return "s390x" SLJIT_CPUINFO;
39 }
40
41 /* Instructions are stored as 64 bit values regardless their size. */
42 typedef sljit_uw sljit_ins;
43
44 #define TMP_REG1 (SLJIT_NUMBER_OF_REGISTERS + 2)
45 #define TMP_REG2 (SLJIT_NUMBER_OF_REGISTERS + 3)
46
47 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
48 0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 0, 1, 14
49 };
50
51 /* there are also a[2-15] available, but they are slower to access and
52 * their use is limited as mundaym explained:
53 * https://github.com/zherczeg/sljit/pull/91#discussion_r486895689
54 */
55
56 /* General Purpose Registers [0-15]. */
57 typedef sljit_uw sljit_gpr;
58
59 /*
60 * WARNING
61 * the following code is non standard and should be improved for
62 * consistency, but doesn't use SLJIT_NUMBER_OF_REGISTERS based
63 * registers because r0 and r1 are the ABI recommended volatiles.
64 * there is a gpr() function that maps sljit to physical register numbers
65 * that should be used instead of the usual index into reg_map[] and
66 * will be retired ASAP (TODO: carenas)
67 */
68
69 static const sljit_gpr r0 = 0; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 2]: 0 in address calculations; reserved */
70 static const sljit_gpr r1 = 1; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 3]: reserved */
71 static const sljit_gpr r2 = 2; /* reg_map[1]: 1st argument */
72 static const sljit_gpr r3 = 3; /* reg_map[2]: 2nd argument */
73 static const sljit_gpr r4 = 4; /* reg_map[3]: 3rd argument */
74 static const sljit_gpr r5 = 5; /* reg_map[4]: 4th argument */
75 static const sljit_gpr r6 = 6; /* reg_map[5]: 5th argument; 1st saved register */
76 static const sljit_gpr r7 = 7; /* reg_map[6] */
77 static const sljit_gpr r8 = 8; /* reg_map[7] */
78 static const sljit_gpr r9 = 9; /* reg_map[8] */
79 static const sljit_gpr r10 = 10; /* reg_map[9] */
80 static const sljit_gpr r11 = 11; /* reg_map[10] */
81 static const sljit_gpr r12 = 12; /* reg_map[11]: GOT */
82 static const sljit_gpr r13 = 13; /* reg_map[12]: Literal Pool pointer */
83 static const sljit_gpr r14 = 14; /* reg_map[0]: return address */
84 static const sljit_gpr r15 = 15; /* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stack pointer */
85
86 /* WARNING: r12 and r13 shouldn't be used as per ABI recommendation */
87 /* TODO(carenas): r12 might conflict in PIC code, reserve? */
88 /* TODO(carenas): r13 is usually pointed to "pool" per ABI, using a tmp
89 * like we do know might be faster though, reserve?
90 */
91
92 /* TODO(carenas): should be named TMP_REG[1-2] for consistency */
93 #define tmp0 r0
94 #define tmp1 r1
95
96 /* When reg cannot be unused. */
97 #define IS_GPR_REG(reg) ((reg > 0) && (reg) <= SLJIT_SP)
98
99 /* Link register. */
100 static const sljit_gpr link_r = 14; /* r14 */
101
102 #define TMP_FREG1 (SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
103
104 static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = {
105 0, 0, 2, 4, 6, 3, 5, 7, 15, 14, 13, 12, 11, 10, 9, 8, 1
106 };
107
108 #define R0A(r) (r)
109 #define R4A(r) ((r) << 4)
110 #define R8A(r) ((r) << 8)
111 #define R12A(r) ((r) << 12)
112 #define R16A(r) ((r) << 16)
113 #define R20A(r) ((r) << 20)
114 #define R28A(r) ((r) << 28)
115 #define R32A(r) ((r) << 32)
116 #define R36A(r) ((r) << 36)
117
118 #define R0(r) ((sljit_ins)reg_map[r])
119
120 #define F0(r) ((sljit_ins)freg_map[r])
121 #define F4(r) (R4A((sljit_ins)freg_map[r]))
122 #define F12(r) (R12A((sljit_ins)freg_map[r]))
123 #define F20(r) (R20A((sljit_ins)freg_map[r]))
124 #define F28(r) (R28A((sljit_ins)freg_map[r]))
125 #define F32(r) (R32A((sljit_ins)freg_map[r]))
126 #define F36(r) (R36A((sljit_ins)freg_map[r]))
127
128 struct sljit_s390x_const {
129 struct sljit_const const_; /* must be first */
130 sljit_sw init_value; /* required to build literal pool */
131 };
132
133 /* Convert SLJIT register to hardware register. */
gpr(sljit_s32 r)134 static SLJIT_INLINE sljit_gpr gpr(sljit_s32 r)
135 {
136 SLJIT_ASSERT(r >= 0 && r < (sljit_s32)(sizeof(reg_map) / sizeof(reg_map[0])));
137 return reg_map[r];
138 }
139
push_inst(struct sljit_compiler * compiler,sljit_ins ins)140 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
141 {
142 sljit_ins *ibuf = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins));
143 FAIL_IF(!ibuf);
144 *ibuf = ins;
145
146 SLJIT_ASSERT(ins <= 0xffffffffffffL);
147
148 compiler->size++;
149 if (ins & 0xffff00000000L)
150 compiler->size++;
151
152 if (ins & 0xffffffff0000L)
153 compiler->size++;
154
155 return SLJIT_SUCCESS;
156 }
157
158 #define SLJIT_ADD_SUB_NO_COMPARE(status_flags_state) \
159 (((status_flags_state) & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) \
160 && !((status_flags_state) & SLJIT_CURRENT_FLAGS_COMPARE))
161
162 /* Map the given type to a 4-bit condition code mask. */
get_cc(struct sljit_compiler * compiler,sljit_s32 type)163 static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 type) {
164 const sljit_u8 cc0 = 1 << 3; /* equal {,to zero} */
165 const sljit_u8 cc1 = 1 << 2; /* less than {,zero} */
166 const sljit_u8 cc2 = 1 << 1; /* greater than {,zero} */
167 const sljit_u8 cc3 = 1 << 0; /* {overflow,NaN} */
168
169 switch (type) {
170 case SLJIT_EQUAL:
171 if (SLJIT_ADD_SUB_NO_COMPARE(compiler->status_flags_state)) {
172 sljit_s32 type = GET_FLAG_TYPE(compiler->status_flags_state);
173 if (type >= SLJIT_SIG_LESS && type <= SLJIT_SIG_LESS_EQUAL)
174 return cc0;
175 if (type == SLJIT_OVERFLOW)
176 return (cc0 | cc3);
177 return (cc0 | cc2);
178 }
179 /* fallthrough */
180
181 case SLJIT_ATOMIC_STORED:
182 case SLJIT_F_EQUAL:
183 case SLJIT_ORDERED_EQUAL:
184 return cc0;
185
186 case SLJIT_NOT_EQUAL:
187 if (SLJIT_ADD_SUB_NO_COMPARE(compiler->status_flags_state)) {
188 sljit_s32 type = GET_FLAG_TYPE(compiler->status_flags_state);
189 if (type >= SLJIT_SIG_LESS && type <= SLJIT_SIG_LESS_EQUAL)
190 return (cc1 | cc2 | cc3);
191 if (type == SLJIT_OVERFLOW)
192 return (cc1 | cc2);
193 return (cc1 | cc3);
194 }
195 /* fallthrough */
196
197 case SLJIT_UNORDERED_OR_NOT_EQUAL:
198 return (cc1 | cc2 | cc3);
199
200 case SLJIT_LESS:
201 case SLJIT_ATOMIC_NOT_STORED:
202 return cc1;
203
204 case SLJIT_GREATER_EQUAL:
205 case SLJIT_UNORDERED_OR_GREATER_EQUAL:
206 return (cc0 | cc2 | cc3);
207
208 case SLJIT_GREATER:
209 if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_COMPARE)
210 return cc2;
211 return cc3;
212
213 case SLJIT_LESS_EQUAL:
214 if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_COMPARE)
215 return (cc0 | cc1);
216 return (cc0 | cc1 | cc2);
217
218 case SLJIT_SIG_LESS:
219 case SLJIT_F_LESS:
220 case SLJIT_ORDERED_LESS:
221 return cc1;
222
223 case SLJIT_NOT_CARRY:
224 if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB)
225 return (cc2 | cc3);
226 /* fallthrough */
227
228 case SLJIT_SIG_LESS_EQUAL:
229 case SLJIT_F_LESS_EQUAL:
230 case SLJIT_ORDERED_LESS_EQUAL:
231 return (cc0 | cc1);
232
233 case SLJIT_CARRY:
234 if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB)
235 return (cc0 | cc1);
236 /* fallthrough */
237
238 case SLJIT_SIG_GREATER:
239 case SLJIT_UNORDERED_OR_GREATER:
240 /* Overflow is considered greater, see SLJIT_SUB. */
241 return cc2 | cc3;
242
243 case SLJIT_SIG_GREATER_EQUAL:
244 return (cc0 | cc2 | cc3);
245
246 case SLJIT_OVERFLOW:
247 if (compiler->status_flags_state & SLJIT_SET_Z)
248 return (cc2 | cc3);
249 /* fallthrough */
250
251 case SLJIT_UNORDERED:
252 return cc3;
253
254 case SLJIT_NOT_OVERFLOW:
255 if (compiler->status_flags_state & SLJIT_SET_Z)
256 return (cc0 | cc1);
257 /* fallthrough */
258
259 case SLJIT_ORDERED:
260 return (cc0 | cc1 | cc2);
261
262 case SLJIT_F_NOT_EQUAL:
263 case SLJIT_ORDERED_NOT_EQUAL:
264 return (cc1 | cc2);
265
266 case SLJIT_F_GREATER:
267 case SLJIT_ORDERED_GREATER:
268 return cc2;
269
270 case SLJIT_F_GREATER_EQUAL:
271 case SLJIT_ORDERED_GREATER_EQUAL:
272 return (cc0 | cc2);
273
274 case SLJIT_UNORDERED_OR_LESS_EQUAL:
275 return (cc0 | cc1 | cc3);
276
277 case SLJIT_UNORDERED_OR_EQUAL:
278 return (cc0 | cc3);
279
280 case SLJIT_UNORDERED_OR_LESS:
281 return (cc1 | cc3);
282 }
283
284 SLJIT_UNREACHABLE();
285 return (sljit_u8)-1;
286 }
287
288 /* Facility to bit index mappings.
289 Note: some facilities share the same bit index. */
290 typedef sljit_uw facility_bit;
291 #define STORE_FACILITY_LIST_EXTENDED_FACILITY 7
292 #define FAST_LONG_DISPLACEMENT_FACILITY 19
293 #define EXTENDED_IMMEDIATE_FACILITY 21
294 #define GENERAL_INSTRUCTION_EXTENSION_FACILITY 34
295 #define DISTINCT_OPERAND_FACILITY 45
296 #define HIGH_WORD_FACILITY 45
297 #define POPULATION_COUNT_FACILITY 45
298 #define LOAD_STORE_ON_CONDITION_1_FACILITY 45
299 #define MISCELLANEOUS_INSTRUCTION_EXTENSIONS_1_FACILITY 49
300 #define LOAD_STORE_ON_CONDITION_2_FACILITY 53
301 #define MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY 58
302 #define VECTOR_FACILITY 129
303 #define VECTOR_ENHANCEMENTS_1_FACILITY 135
304
305 /* Report whether a facility is known to be present due to the compiler
306 settings. This function should always be compiled to a constant
307 value given a constant argument. */
have_facility_static(facility_bit x)308 static SLJIT_INLINE int have_facility_static(facility_bit x)
309 {
310 #if ENABLE_STATIC_FACILITY_DETECTION
311 switch (x) {
312 case FAST_LONG_DISPLACEMENT_FACILITY:
313 return (__ARCH__ >= 6 /* z990 */);
314 case EXTENDED_IMMEDIATE_FACILITY:
315 case STORE_FACILITY_LIST_EXTENDED_FACILITY:
316 return (__ARCH__ >= 7 /* z9-109 */);
317 case GENERAL_INSTRUCTION_EXTENSION_FACILITY:
318 return (__ARCH__ >= 8 /* z10 */);
319 case DISTINCT_OPERAND_FACILITY:
320 return (__ARCH__ >= 9 /* z196 */);
321 case MISCELLANEOUS_INSTRUCTION_EXTENSIONS_1_FACILITY:
322 return (__ARCH__ >= 10 /* zEC12 */);
323 case LOAD_STORE_ON_CONDITION_2_FACILITY:
324 case VECTOR_FACILITY:
325 return (__ARCH__ >= 11 /* z13 */);
326 case MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY:
327 case VECTOR_ENHANCEMENTS_1_FACILITY:
328 return (__ARCH__ >= 12 /* z14 */);
329 default:
330 SLJIT_UNREACHABLE();
331 }
332 #endif
333 return 0;
334 }
335
get_hwcap()336 static SLJIT_INLINE unsigned long get_hwcap()
337 {
338 static unsigned long hwcap = 0;
339 if (SLJIT_UNLIKELY(!hwcap)) {
340 hwcap = getauxval(AT_HWCAP);
341 SLJIT_ASSERT(hwcap != 0);
342 }
343 return hwcap;
344 }
345
have_stfle()346 static SLJIT_INLINE int have_stfle()
347 {
348 if (have_facility_static(STORE_FACILITY_LIST_EXTENDED_FACILITY))
349 return 1;
350
351 return (get_hwcap() & HWCAP_S390_STFLE);
352 }
353
354 /* Report whether the given facility is available. This function always
355 performs a runtime check. */
have_facility_dynamic(facility_bit x)356 static int have_facility_dynamic(facility_bit x)
357 {
358 #if ENABLE_DYNAMIC_FACILITY_DETECTION
359 static struct {
360 sljit_uw bits[4];
361 } cpu_features;
362 size_t size = sizeof(cpu_features);
363 const sljit_uw word_index = x >> 6;
364 const sljit_uw bit_index = ((1UL << 63) >> (x & 63));
365
366 SLJIT_ASSERT(x < size * 8);
367 if (SLJIT_UNLIKELY(!have_stfle()))
368 return 0;
369
370 if (SLJIT_UNLIKELY(cpu_features.bits[0] == 0)) {
371 __asm__ __volatile__ (
372 "lgr %%r0, %0;"
373 "stfle 0(%1);"
374 /* outputs */:
375 /* inputs */: "d" ((size / 8) - 1), "a" (&cpu_features)
376 /* clobbers */: "r0", "cc", "memory"
377 );
378 SLJIT_ASSERT(cpu_features.bits[0] != 0);
379 }
380 return (cpu_features.bits[word_index] & bit_index) != 0;
381 #else
382 return 0;
383 #endif
384 }
385
386 #define HAVE_FACILITY(name, bit) \
387 static SLJIT_INLINE int name() \
388 { \
389 static int have = -1; \
390 /* Static check first. May allow the function to be optimized away. */ \
391 if (have_facility_static(bit)) \
392 have = 1; \
393 else if (SLJIT_UNLIKELY(have < 0)) \
394 have = have_facility_dynamic(bit) ? 1 : 0; \
395 \
396 return have; \
397 }
398
HAVE_FACILITY(have_eimm,EXTENDED_IMMEDIATE_FACILITY)399 HAVE_FACILITY(have_eimm, EXTENDED_IMMEDIATE_FACILITY)
400 HAVE_FACILITY(have_ldisp, FAST_LONG_DISPLACEMENT_FACILITY)
401 HAVE_FACILITY(have_genext, GENERAL_INSTRUCTION_EXTENSION_FACILITY)
402 HAVE_FACILITY(have_lscond1, LOAD_STORE_ON_CONDITION_1_FACILITY)
403 HAVE_FACILITY(have_lscond2, LOAD_STORE_ON_CONDITION_2_FACILITY)
404 HAVE_FACILITY(have_misc2, MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY)
405 #undef HAVE_FACILITY
406
407 #define is_u12(d) (0 <= (d) && (d) <= 0x00000fffL)
408 #define is_u32(d) (0 <= (d) && (d) <= 0xffffffffL)
409
410 #define CHECK_SIGNED(v, bitlen) \
411 ((v) >= -(1 << ((bitlen) - 1)) && (v) < (1 << ((bitlen) - 1)))
412
413 #define is_s8(d) CHECK_SIGNED((d), 8)
414 #define is_s16(d) CHECK_SIGNED((d), 16)
415 #define is_s20(d) CHECK_SIGNED((d), 20)
416 #define is_s32(d) ((d) == (sljit_s32)(d))
417
418 static SLJIT_INLINE sljit_ins disp_s20(sljit_s32 d)
419 {
420 sljit_uw dh, dl;
421
422 SLJIT_ASSERT(is_s20(d));
423
424 dh = (d >> 12) & 0xff;
425 dl = ((sljit_uw)d << 8) & 0xfff00;
426 return (dh | dl) << 8;
427 }
428
429 /* TODO(carenas): variadic macro is not strictly needed */
430 #define SLJIT_S390X_INSTRUCTION(op, ...) \
431 static SLJIT_INLINE sljit_ins op(__VA_ARGS__)
432
433 /* RR form instructions. */
434 #define SLJIT_S390X_RR(name, pattern) \
435 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \
436 { \
437 return (pattern) | ((dst & 0xf) << 4) | (src & 0xf); \
438 }
439
440 /* AND */
441 SLJIT_S390X_RR(nr, 0x1400)
442
443 /* BRANCH AND SAVE */
444 SLJIT_S390X_RR(basr, 0x0d00)
445
446 /* BRANCH ON CONDITION */
447 SLJIT_S390X_RR(bcr, 0x0700) /* TODO(mundaym): type for mask? */
448
449 /* DIVIDE */
450 SLJIT_S390X_RR(dr, 0x1d00)
451
452 /* EXCLUSIVE OR */
453 SLJIT_S390X_RR(xr, 0x1700)
454
455 /* LOAD */
456 SLJIT_S390X_RR(lr, 0x1800)
457
458 /* LOAD COMPLEMENT */
459 SLJIT_S390X_RR(lcr, 0x1300)
460
461 /* OR */
462 SLJIT_S390X_RR(or, 0x1600)
463
464 #undef SLJIT_S390X_RR
465
466 /* RRE form instructions */
467 #define SLJIT_S390X_RRE(name, pattern) \
468 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \
469 { \
470 return (pattern) | R4A(dst) | R0A(src); \
471 }
472
473 /* AND */
474 SLJIT_S390X_RRE(ngr, 0xb9800000)
475
476 /* DIVIDE LOGICAL */
477 SLJIT_S390X_RRE(dlr, 0xb9970000)
478 SLJIT_S390X_RRE(dlgr, 0xb9870000)
479
480 /* DIVIDE SINGLE */
481 SLJIT_S390X_RRE(dsgr, 0xb90d0000)
482
483 /* EXCLUSIVE OR */
484 SLJIT_S390X_RRE(xgr, 0xb9820000)
485
486 /* LOAD */
487 SLJIT_S390X_RRE(lgr, 0xb9040000)
488 SLJIT_S390X_RRE(lgfr, 0xb9140000)
489
490 /* LOAD BYTE */
491 SLJIT_S390X_RRE(lbr, 0xb9260000)
492 SLJIT_S390X_RRE(lgbr, 0xb9060000)
493
494 /* LOAD COMPLEMENT */
495 SLJIT_S390X_RRE(lcgr, 0xb9030000)
496
497 /* LOAD HALFWORD */
498 SLJIT_S390X_RRE(lhr, 0xb9270000)
499 SLJIT_S390X_RRE(lghr, 0xb9070000)
500
501 /* LOAD LOGICAL */
502 SLJIT_S390X_RRE(llgfr, 0xb9160000)
503
504 /* LOAD LOGICAL CHARACTER */
505 SLJIT_S390X_RRE(llcr, 0xb9940000)
506 SLJIT_S390X_RRE(llgcr, 0xb9840000)
507
508 /* LOAD LOGICAL HALFWORD */
509 SLJIT_S390X_RRE(llhr, 0xb9950000)
510 SLJIT_S390X_RRE(llghr, 0xb9850000)
511
512 /* MULTIPLY LOGICAL */
513 SLJIT_S390X_RRE(mlgr, 0xb9860000)
514
515 /* MULTIPLY SINGLE */
516 SLJIT_S390X_RRE(msgfr, 0xb91c0000)
517
518 /* OR */
519 SLJIT_S390X_RRE(ogr, 0xb9810000)
520
521 /* SUBTRACT */
522 SLJIT_S390X_RRE(sgr, 0xb9090000)
523
524 #undef SLJIT_S390X_RRE
525
526 /* RI-a form instructions */
527 #define SLJIT_S390X_RIA(name, pattern, imm_type) \
528 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \
529 { \
530 return (pattern) | R20A(reg) | (imm & 0xffff); \
531 }
532
533 /* ADD HALFWORD IMMEDIATE */
534 SLJIT_S390X_RIA(aghi, 0xa70b0000, sljit_s16)
535
536 /* LOAD HALFWORD IMMEDIATE */
537 SLJIT_S390X_RIA(lhi, 0xa7080000, sljit_s16)
538 SLJIT_S390X_RIA(lghi, 0xa7090000, sljit_s16)
539
540 /* LOAD LOGICAL IMMEDIATE */
541 SLJIT_S390X_RIA(llihh, 0xa50c0000, sljit_u16)
542 SLJIT_S390X_RIA(llihl, 0xa50d0000, sljit_u16)
543 SLJIT_S390X_RIA(llilh, 0xa50e0000, sljit_u16)
544 SLJIT_S390X_RIA(llill, 0xa50f0000, sljit_u16)
545
546 /* MULTIPLY HALFWORD IMMEDIATE */
547 SLJIT_S390X_RIA(mhi, 0xa70c0000, sljit_s16)
548 SLJIT_S390X_RIA(mghi, 0xa70d0000, sljit_s16)
549
550 /* OR IMMEDIATE */
551 SLJIT_S390X_RIA(oilh, 0xa50a0000, sljit_u16)
552
553 #undef SLJIT_S390X_RIA
554
555 /* RIL-a form instructions (requires extended immediate facility) */
556 #define SLJIT_S390X_RILA(name, pattern, imm_type) \
557 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \
558 { \
559 SLJIT_ASSERT(have_eimm()); \
560 return (pattern) | R36A(reg) | ((sljit_ins)imm & 0xffffffffu); \
561 }
562
563 /* ADD IMMEDIATE */
564 SLJIT_S390X_RILA(agfi, 0xc20800000000, sljit_s32)
565
566 /* ADD IMMEDIATE HIGH */
567 SLJIT_S390X_RILA(aih, 0xcc0800000000, sljit_s32) /* TODO(mundaym): high-word facility? */
568
569 /* AND IMMEDIATE */
570 SLJIT_S390X_RILA(nihf, 0xc00a00000000, sljit_u32)
571
572 /* EXCLUSIVE OR IMMEDIATE */
573 SLJIT_S390X_RILA(xilf, 0xc00700000000, sljit_u32)
574
575 /* INSERT IMMEDIATE */
576 SLJIT_S390X_RILA(iihf, 0xc00800000000, sljit_u32)
577 SLJIT_S390X_RILA(iilf, 0xc00900000000, sljit_u32)
578
579 /* LOAD IMMEDIATE */
580 SLJIT_S390X_RILA(lgfi, 0xc00100000000, sljit_s32)
581
582 /* LOAD LOGICAL IMMEDIATE */
583 SLJIT_S390X_RILA(llihf, 0xc00e00000000, sljit_u32)
584 SLJIT_S390X_RILA(llilf, 0xc00f00000000, sljit_u32)
585
586 /* SUBTRACT LOGICAL IMMEDIATE */
587 SLJIT_S390X_RILA(slfi, 0xc20500000000, sljit_u32)
588
589 #undef SLJIT_S390X_RILA
590
591 /* RX-a form instructions */
592 #define SLJIT_S390X_RXA(name, pattern) \
593 SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \
594 { \
595 SLJIT_ASSERT((d & 0xfff) == d); \
596 \
597 return (pattern) | R20A(r) | R16A(x) | R12A(b) | (sljit_ins)(d & 0xfff); \
598 }
599
600 /* LOAD */
601 SLJIT_S390X_RXA(l, 0x58000000)
602
603 /* LOAD ADDRESS */
604 SLJIT_S390X_RXA(la, 0x41000000)
605
606 /* LOAD HALFWORD */
607 SLJIT_S390X_RXA(lh, 0x48000000)
608
609 /* MULTIPLY SINGLE */
610 SLJIT_S390X_RXA(ms, 0x71000000)
611
612 /* STORE */
613 SLJIT_S390X_RXA(st, 0x50000000)
614
615 /* STORE CHARACTER */
616 SLJIT_S390X_RXA(stc, 0x42000000)
617
618 /* STORE HALFWORD */
619 SLJIT_S390X_RXA(sth, 0x40000000)
620
621 #undef SLJIT_S390X_RXA
622
623 /* RXY-a instructions */
624 #define SLJIT_S390X_RXYA(name, pattern, cond) \
625 SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \
626 { \
627 SLJIT_ASSERT(cond); \
628 \
629 return (pattern) | R36A(r) | R32A(x) | R28A(b) | disp_s20(d); \
630 }
631
632 /* LOAD */
633 SLJIT_S390X_RXYA(ly, 0xe30000000058, have_ldisp())
634 SLJIT_S390X_RXYA(lg, 0xe30000000004, 1)
635 SLJIT_S390X_RXYA(lgf, 0xe30000000014, 1)
636
637 /* LOAD BYTE */
638 SLJIT_S390X_RXYA(lb, 0xe30000000076, have_ldisp())
639 SLJIT_S390X_RXYA(lgb, 0xe30000000077, have_ldisp())
640
641 /* LOAD HALFWORD */
642 SLJIT_S390X_RXYA(lhy, 0xe30000000078, have_ldisp())
643 SLJIT_S390X_RXYA(lgh, 0xe30000000015, 1)
644
645 /* LOAD LOGICAL */
646 SLJIT_S390X_RXYA(llgf, 0xe30000000016, 1)
647
648 /* LOAD LOGICAL CHARACTER */
649 SLJIT_S390X_RXYA(llc, 0xe30000000094, have_eimm())
650 SLJIT_S390X_RXYA(llgc, 0xe30000000090, 1)
651
652 /* LOAD LOGICAL HALFWORD */
653 SLJIT_S390X_RXYA(llh, 0xe30000000095, have_eimm())
654 SLJIT_S390X_RXYA(llgh, 0xe30000000091, 1)
655
656 /* MULTIPLY SINGLE */
657 SLJIT_S390X_RXYA(msy, 0xe30000000051, have_ldisp())
658 SLJIT_S390X_RXYA(msg, 0xe3000000000c, 1)
659
660 /* STORE */
661 SLJIT_S390X_RXYA(sty, 0xe30000000050, have_ldisp())
662 SLJIT_S390X_RXYA(stg, 0xe30000000024, 1)
663
664 /* STORE CHARACTER */
665 SLJIT_S390X_RXYA(stcy, 0xe30000000072, have_ldisp())
666
667 /* STORE HALFWORD */
668 SLJIT_S390X_RXYA(sthy, 0xe30000000070, have_ldisp())
669
670 #undef SLJIT_S390X_RXYA
671
672 /* RSY-a instructions */
673 #define SLJIT_S390X_RSYA(name, pattern, cond) \
674 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_s32 d, sljit_gpr b) \
675 { \
676 SLJIT_ASSERT(cond); \
677 \
678 return (pattern) | R36A(dst) | R32A(src) | R28A(b) | disp_s20(d); \
679 }
680
681 /* LOAD MULTIPLE */
682 SLJIT_S390X_RSYA(lmg, 0xeb0000000004, 1)
683
684 /* SHIFT LEFT LOGICAL */
685 SLJIT_S390X_RSYA(sllg, 0xeb000000000d, 1)
686
687 /* SHIFT RIGHT SINGLE */
688 SLJIT_S390X_RSYA(srag, 0xeb000000000a, 1)
689
690 /* STORE MULTIPLE */
691 SLJIT_S390X_RSYA(stmg, 0xeb0000000024, 1)
692
693 #undef SLJIT_S390X_RSYA
694
695 /* RIE-f instructions (require general-instructions-extension facility) */
696 #define SLJIT_S390X_RIEF(name, pattern) \
697 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot) \
698 { \
699 sljit_ins i3, i4, i5; \
700 \
701 SLJIT_ASSERT(have_genext()); \
702 i3 = (sljit_ins)start << 24; \
703 i4 = (sljit_ins)end << 16; \
704 i5 = (sljit_ins)rot << 8; \
705 \
706 return (pattern) | R36A(dst & 0xf) | R32A(src & 0xf) | i3 | i4 | i5; \
707 }
708
709 /* ROTATE THEN AND SELECTED BITS */
710 /* SLJIT_S390X_RIEF(rnsbg, 0xec0000000054) */
711
712 /* ROTATE THEN EXCLUSIVE OR SELECTED BITS */
713 /* SLJIT_S390X_RIEF(rxsbg, 0xec0000000057) */
714
715 /* ROTATE THEN OR SELECTED BITS */
716 SLJIT_S390X_RIEF(rosbg, 0xec0000000056)
717
718 /* ROTATE THEN INSERT SELECTED BITS */
719 /* SLJIT_S390X_RIEF(risbg, 0xec0000000055) */
720 /* SLJIT_S390X_RIEF(risbgn, 0xec0000000059) */
721
722 /* ROTATE THEN INSERT SELECTED BITS HIGH */
723 SLJIT_S390X_RIEF(risbhg, 0xec000000005d)
724
725 /* ROTATE THEN INSERT SELECTED BITS LOW */
726 /* SLJIT_S390X_RIEF(risblg, 0xec0000000051) */
727
728 #undef SLJIT_S390X_RIEF
729
730 /* RRF-c instructions (require load/store-on-condition 1 facility) */
731 #define SLJIT_S390X_RRFC(name, pattern) \
732 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_uw mask) \
733 { \
734 sljit_ins m3; \
735 \
736 SLJIT_ASSERT(have_lscond1()); \
737 m3 = (sljit_ins)(mask & 0xf) << 12; \
738 \
739 return (pattern) | m3 | R4A(dst) | R0A(src); \
740 }
741
742 /* LOAD HALFWORD IMMEDIATE ON CONDITION */
743 SLJIT_S390X_RRFC(locr, 0xb9f20000)
744 SLJIT_S390X_RRFC(locgr, 0xb9e20000)
745
746 #undef SLJIT_S390X_RRFC
747
748 /* RIE-g instructions (require load/store-on-condition 2 facility) */
749 #define SLJIT_S390X_RIEG(name, pattern) \
750 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw imm, sljit_uw mask) \
751 { \
752 sljit_ins m3, i2; \
753 \
754 SLJIT_ASSERT(have_lscond2()); \
755 m3 = (sljit_ins)(mask & 0xf) << 32; \
756 i2 = (sljit_ins)(imm & 0xffffL) << 16; \
757 \
758 return (pattern) | R36A(reg) | m3 | i2; \
759 }
760
761 /* LOAD HALFWORD IMMEDIATE ON CONDITION */
762 SLJIT_S390X_RIEG(lochi, 0xec0000000042)
763 SLJIT_S390X_RIEG(locghi, 0xec0000000046)
764
765 #undef SLJIT_S390X_RIEG
766
767 #define SLJIT_S390X_RILB(name, pattern, cond) \
768 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw ri) \
769 { \
770 SLJIT_ASSERT(cond); \
771 \
772 return (pattern) | R36A(reg) | (sljit_ins)(ri & 0xffffffff); \
773 }
774
775 /* BRANCH RELATIVE AND SAVE LONG */
776 SLJIT_S390X_RILB(brasl, 0xc00500000000, 1)
777
778 /* LOAD ADDRESS RELATIVE LONG */
779 SLJIT_S390X_RILB(larl, 0xc00000000000, 1)
780
781 /* LOAD RELATIVE LONG */
782 SLJIT_S390X_RILB(lgrl, 0xc40800000000, have_genext())
783
784 #undef SLJIT_S390X_RILB
785
SLJIT_S390X_INSTRUCTION(br,sljit_gpr target)786 SLJIT_S390X_INSTRUCTION(br, sljit_gpr target)
787 {
788 return 0x07f0 | target;
789 }
790
SLJIT_S390X_INSTRUCTION(brc,sljit_uw mask,sljit_sw target)791 SLJIT_S390X_INSTRUCTION(brc, sljit_uw mask, sljit_sw target)
792 {
793 sljit_ins m1 = (sljit_ins)(mask & 0xf) << 20;
794 sljit_ins ri2 = (sljit_ins)target & 0xffff;
795 return 0xa7040000L | m1 | ri2;
796 }
797
SLJIT_S390X_INSTRUCTION(brcl,sljit_uw mask,sljit_sw target)798 SLJIT_S390X_INSTRUCTION(brcl, sljit_uw mask, sljit_sw target)
799 {
800 sljit_ins m1 = (sljit_ins)(mask & 0xf) << 36;
801 sljit_ins ri2 = (sljit_ins)target & 0xffffffff;
802 return 0xc00400000000L | m1 | ri2;
803 }
804
SLJIT_S390X_INSTRUCTION(flogr,sljit_gpr dst,sljit_gpr src)805 SLJIT_S390X_INSTRUCTION(flogr, sljit_gpr dst, sljit_gpr src)
806 {
807 SLJIT_ASSERT(have_eimm());
808 return 0xb9830000 | R8A(dst) | R0A(src);
809 }
810
811 /* INSERT PROGRAM MASK */
SLJIT_S390X_INSTRUCTION(ipm,sljit_gpr dst)812 SLJIT_S390X_INSTRUCTION(ipm, sljit_gpr dst)
813 {
814 return 0xb2220000 | R4A(dst);
815 }
816
817 /* SET PROGRAM MASK */
SLJIT_S390X_INSTRUCTION(spm,sljit_gpr dst)818 SLJIT_S390X_INSTRUCTION(spm, sljit_gpr dst)
819 {
820 return 0x0400 | R4A(dst);
821 }
822
823 /* ROTATE THEN INSERT SELECTED BITS HIGH (ZERO) */
SLJIT_S390X_INSTRUCTION(risbhgz,sljit_gpr dst,sljit_gpr src,sljit_u8 start,sljit_u8 end,sljit_u8 rot)824 SLJIT_S390X_INSTRUCTION(risbhgz, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot)
825 {
826 return risbhg(dst, src, start, 0x8 | end, rot);
827 }
828
829 #undef SLJIT_S390X_INSTRUCTION
830
update_zero_overflow(struct sljit_compiler * compiler,sljit_s32 op,sljit_gpr dst_r)831 static sljit_s32 update_zero_overflow(struct sljit_compiler *compiler, sljit_s32 op, sljit_gpr dst_r)
832 {
833 /* Condition codes: bits 18 and 19.
834 Transformation:
835 0 (zero and no overflow) : unchanged
836 1 (non-zero and no overflow) : unchanged
837 2 (zero and overflow) : decreased by 1
838 3 (non-zero and overflow) : decreased by 1 if non-zero */
839 FAIL_IF(push_inst(compiler, brc(0xc, 2 + 2 + ((op & SLJIT_32) ? 1 : 2) + 2 + 3 + 1)));
840 FAIL_IF(push_inst(compiler, ipm(tmp1)));
841 FAIL_IF(push_inst(compiler, (op & SLJIT_32) ? or(dst_r, dst_r) : ogr(dst_r, dst_r)));
842 FAIL_IF(push_inst(compiler, brc(0x8, 2 + 3)));
843 FAIL_IF(push_inst(compiler, slfi(tmp1, 0x10000000)));
844 FAIL_IF(push_inst(compiler, spm(tmp1)));
845 return SLJIT_SUCCESS;
846 }
847
848 /* load 64-bit immediate into register without clobbering flags */
push_load_imm_inst(struct sljit_compiler * compiler,sljit_gpr target,sljit_sw v)849 static sljit_s32 push_load_imm_inst(struct sljit_compiler *compiler, sljit_gpr target, sljit_sw v)
850 {
851 /* 4 byte instructions */
852 if (is_s16(v))
853 return push_inst(compiler, lghi(target, (sljit_s16)v));
854
855 if (((sljit_uw)v & ~(sljit_uw)0x000000000000ffff) == 0)
856 return push_inst(compiler, llill(target, (sljit_u16)v));
857
858 if (((sljit_uw)v & ~(sljit_uw)0x00000000ffff0000) == 0)
859 return push_inst(compiler, llilh(target, (sljit_u16)(v >> 16)));
860
861 if (((sljit_uw)v & ~(sljit_uw)0x0000ffff00000000) == 0)
862 return push_inst(compiler, llihl(target, (sljit_u16)(v >> 32)));
863
864 if (((sljit_uw)v & ~(sljit_uw)0xffff000000000000) == 0)
865 return push_inst(compiler, llihh(target, (sljit_u16)(v >> 48)));
866
867 if (is_s32(v))
868 return push_inst(compiler, lgfi(target, (sljit_s32)v));
869
870 if (((sljit_uw)v >> 32) == 0)
871 return push_inst(compiler, llilf(target, (sljit_u32)v));
872
873 if (((sljit_uw)v << 32) == 0)
874 return push_inst(compiler, llihf(target, (sljit_u32)((sljit_uw)v >> 32)));
875
876 FAIL_IF(push_inst(compiler, llilf(target, (sljit_u32)v)));
877 return push_inst(compiler, iihf(target, (sljit_u32)(v >> 32)));
878 }
879
880 struct addr {
881 sljit_gpr base;
882 sljit_gpr index;
883 sljit_s32 offset;
884 };
885
886 /* transform memory operand into D(X,B) form with a signed 20-bit offset */
make_addr_bxy(struct sljit_compiler * compiler,struct addr * addr,sljit_s32 mem,sljit_sw off,sljit_gpr tmp)887 static sljit_s32 make_addr_bxy(struct sljit_compiler *compiler,
888 struct addr *addr, sljit_s32 mem, sljit_sw off,
889 sljit_gpr tmp /* clobbered, must not be r0 */)
890 {
891 sljit_gpr base = r0;
892 sljit_gpr index = r0;
893
894 SLJIT_ASSERT(tmp != r0);
895 if (mem & REG_MASK)
896 base = gpr(mem & REG_MASK);
897
898 if (mem & OFFS_REG_MASK) {
899 index = gpr(OFFS_REG(mem));
900 if (off != 0) {
901 /* shift and put the result into tmp */
902 SLJIT_ASSERT(0 <= off && off < 64);
903 FAIL_IF(push_inst(compiler, sllg(tmp, index, (sljit_s32)off, 0)));
904 index = tmp;
905 off = 0; /* clear offset */
906 }
907 }
908 else if (!is_s20(off)) {
909 FAIL_IF(push_load_imm_inst(compiler, tmp, off));
910 index = tmp;
911 off = 0; /* clear offset */
912 }
913 addr->base = base;
914 addr->index = index;
915 addr->offset = (sljit_s32)off;
916 return SLJIT_SUCCESS;
917 }
918
919 /* transform memory operand into D(X,B) form with an unsigned 12-bit offset */
make_addr_bx(struct sljit_compiler * compiler,struct addr * addr,sljit_s32 mem,sljit_sw off,sljit_gpr tmp)920 static sljit_s32 make_addr_bx(struct sljit_compiler *compiler,
921 struct addr *addr, sljit_s32 mem, sljit_sw off,
922 sljit_gpr tmp /* clobbered, must not be r0 */)
923 {
924 sljit_gpr base = r0;
925 sljit_gpr index = r0;
926
927 SLJIT_ASSERT(tmp != r0);
928 if (mem & REG_MASK)
929 base = gpr(mem & REG_MASK);
930
931 if (mem & OFFS_REG_MASK) {
932 index = gpr(OFFS_REG(mem));
933 if (off != 0) {
934 /* shift and put the result into tmp */
935 SLJIT_ASSERT(0 <= off && off < 64);
936 FAIL_IF(push_inst(compiler, sllg(tmp, index, (sljit_s32)off, 0)));
937 index = tmp;
938 off = 0; /* clear offset */
939 }
940 }
941 else if (!is_u12(off)) {
942 FAIL_IF(push_load_imm_inst(compiler, tmp, off));
943 index = tmp;
944 off = 0; /* clear offset */
945 }
946 addr->base = base;
947 addr->index = index;
948 addr->offset = (sljit_s32)off;
949 return SLJIT_SUCCESS;
950 }
951
952 #define EVAL(op, r, addr) op(r, addr.offset, addr.index, addr.base)
953 #define WHEN(cond, r, i1, i2, addr) \
954 (cond) ? EVAL(i1, r, addr) : EVAL(i2, r, addr)
955
956 /* May clobber tmp1. */
load_store_op(struct sljit_compiler * compiler,sljit_gpr reg,sljit_s32 mem,sljit_sw memw,sljit_s32 is_32bit,const sljit_ins * forms)957 static sljit_s32 load_store_op(struct sljit_compiler *compiler, sljit_gpr reg,
958 sljit_s32 mem, sljit_sw memw,
959 sljit_s32 is_32bit, const sljit_ins* forms)
960 {
961 struct addr addr;
962
963 SLJIT_ASSERT(mem & SLJIT_MEM);
964
965 if (is_32bit && ((mem & OFFS_REG_MASK) || is_u12(memw) || !is_s20(memw))) {
966 FAIL_IF(make_addr_bx(compiler, &addr, mem, memw, tmp1));
967 return push_inst(compiler, forms[0] | R20A(reg) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset);
968 }
969
970 FAIL_IF(make_addr_bxy(compiler, &addr, mem, memw, tmp1));
971 return push_inst(compiler, (is_32bit ? forms[1] : forms[2]) | R36A(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
972 }
973
974 static const sljit_ins load_forms[3] = {
975 0x58000000 /* l */,
976 0xe30000000058 /* ly */,
977 0xe30000000004 /* lg */
978 };
979
980 static const sljit_ins store_forms[3] = {
981 0x50000000 /* st */,
982 0xe30000000050 /* sty */,
983 0xe30000000024 /* stg */
984 };
985
986 static const sljit_ins load_halfword_forms[3] = {
987 0x48000000 /* lh */,
988 0xe30000000078 /* lhy */,
989 0xe30000000015 /* lgh */
990 };
991
992 /* May clobber tmp1. */
load_word(struct sljit_compiler * compiler,sljit_gpr dst_r,sljit_s32 src,sljit_sw srcw,sljit_s32 is_32bit)993 static SLJIT_INLINE sljit_s32 load_word(struct sljit_compiler *compiler, sljit_gpr dst_r,
994 sljit_s32 src, sljit_sw srcw,
995 sljit_s32 is_32bit)
996 {
997 return load_store_op(compiler, dst_r, src, srcw, is_32bit, load_forms);
998 }
999
1000 /* May clobber tmp1. */
load_unsigned_word(struct sljit_compiler * compiler,sljit_gpr dst_r,sljit_s32 src,sljit_sw srcw,sljit_s32 is_32bit)1001 static sljit_s32 load_unsigned_word(struct sljit_compiler *compiler, sljit_gpr dst_r,
1002 sljit_s32 src, sljit_sw srcw,
1003 sljit_s32 is_32bit)
1004 {
1005 struct addr addr;
1006 sljit_ins ins;
1007
1008 SLJIT_ASSERT(src & SLJIT_MEM);
1009
1010 FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
1011
1012 ins = is_32bit ? 0xe30000000016 /* llgf */ : 0xe30000000004 /* lg */;
1013 return push_inst(compiler, ins | R36A(dst_r) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
1014 }
1015
1016 /* May clobber tmp1. */
store_word(struct sljit_compiler * compiler,sljit_gpr src_r,sljit_s32 dst,sljit_sw dstw,sljit_s32 is_32bit)1017 static SLJIT_INLINE sljit_s32 store_word(struct sljit_compiler *compiler, sljit_gpr src_r,
1018 sljit_s32 dst, sljit_sw dstw,
1019 sljit_s32 is_32bit)
1020 {
1021 return load_store_op(compiler, src_r, dst, dstw, is_32bit, store_forms);
1022 }
1023
1024 #undef WHEN
1025
emit_move(struct sljit_compiler * compiler,sljit_gpr dst_r,sljit_s32 src,sljit_sw srcw)1026 static sljit_s32 emit_move(struct sljit_compiler *compiler,
1027 sljit_gpr dst_r,
1028 sljit_s32 src, sljit_sw srcw)
1029 {
1030 sljit_gpr src_r;
1031
1032 SLJIT_ASSERT(!IS_GPR_REG(src) || dst_r != gpr(src & REG_MASK));
1033
1034 if (src == SLJIT_IMM)
1035 return push_load_imm_inst(compiler, dst_r, srcw);
1036
1037 if (src & SLJIT_MEM)
1038 return load_word(compiler, dst_r, src, srcw, (compiler->mode & SLJIT_32) != 0);
1039
1040 src_r = gpr(src & REG_MASK);
1041 return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, src_r) : lgr(dst_r, src_r));
1042 }
1043
emit_rr(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1044 static sljit_s32 emit_rr(struct sljit_compiler *compiler, sljit_ins ins,
1045 sljit_s32 dst,
1046 sljit_s32 src1, sljit_sw src1w,
1047 sljit_s32 src2, sljit_sw src2w)
1048 {
1049 sljit_gpr dst_r = tmp0;
1050 sljit_gpr src_r = tmp1;
1051 sljit_s32 needs_move = 1;
1052
1053 if (FAST_IS_REG(dst)) {
1054 dst_r = gpr(dst);
1055
1056 if (dst == src1)
1057 needs_move = 0;
1058 else if (dst == src2) {
1059 dst_r = tmp0;
1060 needs_move = 2;
1061 }
1062 }
1063
1064 if (needs_move)
1065 FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
1066
1067 if (FAST_IS_REG(src2))
1068 src_r = gpr(src2);
1069 else
1070 FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
1071
1072 FAIL_IF(push_inst(compiler, ins | R4A(dst_r) | R0A(src_r)));
1073
1074 if (needs_move != 2)
1075 return SLJIT_SUCCESS;
1076
1077 dst_r = gpr(dst & REG_MASK);
1078 return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0));
1079 }
1080
emit_rr1(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w)1081 static sljit_s32 emit_rr1(struct sljit_compiler *compiler, sljit_ins ins,
1082 sljit_s32 dst,
1083 sljit_s32 src1, sljit_sw src1w)
1084 {
1085 sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
1086 sljit_gpr src_r = tmp1;
1087
1088 if (FAST_IS_REG(src1))
1089 src_r = gpr(src1);
1090 else
1091 FAIL_IF(emit_move(compiler, tmp1, src1, src1w));
1092
1093 return push_inst(compiler, ins | R4A(dst_r) | R0A(src_r));
1094 }
1095
emit_rrf(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1096 static sljit_s32 emit_rrf(struct sljit_compiler *compiler, sljit_ins ins,
1097 sljit_s32 dst,
1098 sljit_s32 src1, sljit_sw src1w,
1099 sljit_s32 src2, sljit_sw src2w)
1100 {
1101 sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
1102 sljit_gpr src1_r = tmp0;
1103 sljit_gpr src2_r = tmp1;
1104
1105 if (FAST_IS_REG(src1))
1106 src1_r = gpr(src1);
1107 else
1108 FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
1109
1110 if (FAST_IS_REG(src2))
1111 src2_r = gpr(src2);
1112 else
1113 FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
1114
1115 return push_inst(compiler, ins | R4A(dst_r) | R0A(src1_r) | R12A(src2_r));
1116 }
1117
1118 typedef enum {
1119 RI_A,
1120 RIL_A,
1121 } emit_ril_type;
1122
emit_ri(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_sw src2w,emit_ril_type type)1123 static sljit_s32 emit_ri(struct sljit_compiler *compiler, sljit_ins ins,
1124 sljit_s32 dst,
1125 sljit_s32 src1, sljit_sw src1w,
1126 sljit_sw src2w,
1127 emit_ril_type type)
1128 {
1129 sljit_gpr dst_r = tmp0;
1130 sljit_s32 needs_move = 1;
1131
1132 if (FAST_IS_REG(dst)) {
1133 dst_r = gpr(dst);
1134
1135 if (dst == src1)
1136 needs_move = 0;
1137 }
1138
1139 if (needs_move)
1140 FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
1141
1142 if (type == RIL_A)
1143 return push_inst(compiler, ins | R36A(dst_r) | (src2w & 0xffffffff));
1144 return push_inst(compiler, ins | R20A(dst_r) | (src2w & 0xffff));
1145 }
1146
emit_rie_d(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_sw src2w)1147 static sljit_s32 emit_rie_d(struct sljit_compiler *compiler, sljit_ins ins,
1148 sljit_s32 dst,
1149 sljit_s32 src1, sljit_sw src1w,
1150 sljit_sw src2w)
1151 {
1152 sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
1153 sljit_gpr src_r = tmp0;
1154
1155 if (!FAST_IS_REG(src1))
1156 FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
1157 else
1158 src_r = gpr(src1 & REG_MASK);
1159
1160 return push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | (sljit_ins)(src2w & 0xffff) << 16);
1161 }
1162
1163 typedef enum {
1164 RX_A,
1165 RXY_A,
1166 } emit_rx_type;
1167
emit_rx(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w,emit_rx_type type)1168 static sljit_s32 emit_rx(struct sljit_compiler *compiler, sljit_ins ins,
1169 sljit_s32 dst,
1170 sljit_s32 src1, sljit_sw src1w,
1171 sljit_s32 src2, sljit_sw src2w,
1172 emit_rx_type type)
1173 {
1174 sljit_gpr dst_r = tmp0;
1175 sljit_s32 needs_move = 1;
1176 sljit_gpr base, index;
1177
1178 SLJIT_ASSERT(src2 & SLJIT_MEM);
1179
1180 if (FAST_IS_REG(dst)) {
1181 dst_r = gpr(dst);
1182
1183 if (dst == src1)
1184 needs_move = 0;
1185 else if (dst == (src2 & REG_MASK) || (dst == OFFS_REG(src2))) {
1186 dst_r = tmp0;
1187 needs_move = 2;
1188 }
1189 }
1190
1191 if (needs_move)
1192 FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
1193
1194 base = gpr(src2 & REG_MASK);
1195 index = tmp0;
1196
1197 if (src2 & OFFS_REG_MASK) {
1198 index = gpr(OFFS_REG(src2));
1199
1200 if (src2w != 0) {
1201 FAIL_IF(push_inst(compiler, sllg(tmp1, index, src2w & 0x3, 0)));
1202 src2w = 0;
1203 index = tmp1;
1204 }
1205 } else if ((type == RX_A && !is_u12(src2w)) || (type == RXY_A && !is_s20(src2w))) {
1206 FAIL_IF(push_load_imm_inst(compiler, tmp1, src2w));
1207
1208 if (src2 & REG_MASK)
1209 index = tmp1;
1210 else
1211 base = tmp1;
1212 src2w = 0;
1213 }
1214
1215 if (type == RX_A)
1216 ins |= R20A(dst_r) | R16A(index) | R12A(base) | (sljit_ins)src2w;
1217 else
1218 ins |= R36A(dst_r) | R32A(index) | R28A(base) | disp_s20((sljit_s32)src2w);
1219
1220 FAIL_IF(push_inst(compiler, ins));
1221
1222 if (needs_move != 2)
1223 return SLJIT_SUCCESS;
1224
1225 dst_r = gpr(dst);
1226 return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0));
1227 }
1228
emit_siy(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_sw dstw,sljit_sw srcw)1229 static sljit_s32 emit_siy(struct sljit_compiler *compiler, sljit_ins ins,
1230 sljit_s32 dst, sljit_sw dstw,
1231 sljit_sw srcw)
1232 {
1233 sljit_gpr dst_r = tmp1;
1234
1235 SLJIT_ASSERT(dst & SLJIT_MEM);
1236
1237 if (dst & OFFS_REG_MASK) {
1238 sljit_gpr index = tmp1;
1239
1240 if ((dstw & 0x3) == 0)
1241 index = gpr(OFFS_REG(dst));
1242 else
1243 FAIL_IF(push_inst(compiler, sllg(tmp1, index, dstw & 0x3, 0)));
1244
1245 FAIL_IF(push_inst(compiler, la(tmp1, 0, dst_r, index)));
1246 dstw = 0;
1247 }
1248 else if (!is_s20(dstw)) {
1249 FAIL_IF(push_load_imm_inst(compiler, tmp1, dstw));
1250
1251 if (dst & REG_MASK)
1252 FAIL_IF(push_inst(compiler, la(tmp1, 0, dst_r, tmp1)));
1253
1254 dstw = 0;
1255 }
1256 else
1257 dst_r = gpr(dst & REG_MASK);
1258
1259 return push_inst(compiler, ins | ((sljit_ins)(srcw & 0xff) << 32) | R28A(dst_r) | disp_s20((sljit_s32)dstw));
1260 }
1261
1262 struct ins_forms {
1263 sljit_ins op_r;
1264 sljit_ins op_gr;
1265 sljit_ins op_rk;
1266 sljit_ins op_grk;
1267 sljit_ins op;
1268 sljit_ins op_y;
1269 sljit_ins op_g;
1270 };
1271
emit_commutative(struct sljit_compiler * compiler,const struct ins_forms * forms,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1272 static sljit_s32 emit_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms,
1273 sljit_s32 dst,
1274 sljit_s32 src1, sljit_sw src1w,
1275 sljit_s32 src2, sljit_sw src2w)
1276 {
1277 sljit_s32 mode = compiler->mode;
1278 sljit_ins ins, ins_k;
1279
1280 if ((src1 | src2) & SLJIT_MEM) {
1281 sljit_ins ins12, ins20;
1282
1283 if (mode & SLJIT_32) {
1284 ins12 = forms->op;
1285 ins20 = forms->op_y;
1286 }
1287 else {
1288 ins12 = 0;
1289 ins20 = forms->op_g;
1290 }
1291
1292 if (ins12 && ins20) {
1293 /* Extra instructions needed for address computation can be executed independently. */
1294 if ((src2 & SLJIT_MEM) && (!(src1 & SLJIT_MEM)
1295 || ((src1 & OFFS_REG_MASK) ? (src1w & 0x3) == 0 : is_s20(src1w)))) {
1296 if ((src2 & OFFS_REG_MASK) || is_u12(src2w) || !is_s20(src2w))
1297 return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);
1298
1299 return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);
1300 }
1301
1302 if (src1 & SLJIT_MEM) {
1303 if ((src1 & OFFS_REG_MASK) || is_u12(src1w) || !is_s20(src1w))
1304 return emit_rx(compiler, ins12, dst, src2, src2w, src1, src1w, RX_A);
1305
1306 return emit_rx(compiler, ins20, dst, src2, src2w, src1, src1w, RXY_A);
1307 }
1308 }
1309 else if (ins12 || ins20) {
1310 emit_rx_type rx_type;
1311
1312 if (ins12) {
1313 rx_type = RX_A;
1314 ins = ins12;
1315 }
1316 else {
1317 rx_type = RXY_A;
1318 ins = ins20;
1319 }
1320
1321 if ((src2 & SLJIT_MEM) && (!(src1 & SLJIT_MEM)
1322 || ((src1 & OFFS_REG_MASK) ? (src1w & 0x3) == 0 : (rx_type == RX_A ? is_u12(src1w) : is_s20(src1w)))))
1323 return emit_rx(compiler, ins, dst, src1, src1w, src2, src2w, rx_type);
1324
1325 if (src1 & SLJIT_MEM)
1326 return emit_rx(compiler, ins, dst, src2, src2w, src1, src1w, rx_type);
1327 }
1328 }
1329
1330 if (mode & SLJIT_32) {
1331 ins = forms->op_r;
1332 ins_k = forms->op_rk;
1333 }
1334 else {
1335 ins = forms->op_gr;
1336 ins_k = forms->op_grk;
1337 }
1338
1339 SLJIT_ASSERT(ins != 0 || ins_k != 0);
1340
1341 if (ins && FAST_IS_REG(dst)) {
1342 if (dst == src1)
1343 return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w);
1344
1345 if (dst == src2)
1346 return emit_rr(compiler, ins, dst, src2, src2w, src1, src1w);
1347 }
1348
1349 if (ins_k == 0)
1350 return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w);
1351
1352 return emit_rrf(compiler, ins_k, dst, src1, src1w, src2, src2w);
1353 }
1354
emit_non_commutative(struct sljit_compiler * compiler,const struct ins_forms * forms,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1355 static sljit_s32 emit_non_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms,
1356 sljit_s32 dst,
1357 sljit_s32 src1, sljit_sw src1w,
1358 sljit_s32 src2, sljit_sw src2w)
1359 {
1360 sljit_s32 mode = compiler->mode;
1361 sljit_ins ins;
1362
1363 if (src2 & SLJIT_MEM) {
1364 sljit_ins ins12, ins20;
1365
1366 if (mode & SLJIT_32) {
1367 ins12 = forms->op;
1368 ins20 = forms->op_y;
1369 }
1370 else {
1371 ins12 = 0;
1372 ins20 = forms->op_g;
1373 }
1374
1375 if (ins12 && ins20) {
1376 if ((src2 & OFFS_REG_MASK) || is_u12(src2w) || !is_s20(src2w))
1377 return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);
1378
1379 return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);
1380 }
1381 else if (ins12)
1382 return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);
1383 else if (ins20)
1384 return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);
1385 }
1386
1387 ins = (mode & SLJIT_32) ? forms->op_rk : forms->op_grk;
1388
1389 if (ins == 0 || (FAST_IS_REG(dst) && dst == src1))
1390 return emit_rr(compiler, (mode & SLJIT_32) ? forms->op_r : forms->op_gr, dst, src1, src1w, src2, src2w);
1391
1392 return emit_rrf(compiler, ins, dst, src1, src1w, src2, src2w);
1393 }
1394
sljit_generate_code(struct sljit_compiler * compiler,sljit_s32 options,void * exec_allocator_data)1395 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data)
1396 {
1397 struct sljit_label *label;
1398 struct sljit_jump *jump;
1399 struct sljit_const *const_;
1400 sljit_sw executable_offset;
1401 sljit_uw ins_size = compiler->size << 1;
1402 sljit_uw pool_size = 0; /* literal pool */
1403 sljit_uw pad_size;
1404 sljit_uw half_count;
1405 SLJIT_NEXT_DEFINE_TYPES;
1406 struct sljit_memory_fragment *buf;
1407 sljit_ins *buf_ptr;
1408 sljit_ins *buf_end;
1409 sljit_u16 *code;
1410 sljit_u16 *code_ptr;
1411 sljit_uw *pool, *pool_ptr;
1412 sljit_ins ins;
1413 sljit_sw source, offset;
1414
1415 CHECK_ERROR_PTR();
1416 CHECK_PTR(check_sljit_generate_code(compiler));
1417 reverse_buf(compiler);
1418
1419 jump = compiler->jumps;
1420 while (jump != NULL) {
1421 if (jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR | JUMP_MOV_ADDR)) {
1422 /* encoded: */
1423 /* brasl %r14, <rel_addr> (or brcl <mask>, <rel_addr>) */
1424 /* replace with: */
1425 /* lgrl %r1, <pool_addr> */
1426 /* bras %r14, %r1 (or bcr <mask>, %r1) */
1427 pool_size += sizeof(*pool);
1428 if (!(jump->flags & JUMP_MOV_ADDR))
1429 ins_size += 2;
1430 }
1431 jump = jump->next;
1432 }
1433
1434 const_ = compiler->consts;
1435 while (const_) {
1436 pool_size += sizeof(*pool);
1437 const_ = const_->next;
1438 }
1439
1440 /* pad code size to 8 bytes so is accessible with half word offsets */
1441 /* the literal pool needs to be doubleword aligned */
1442 pad_size = ((ins_size + 7UL) & ~7UL) - ins_size;
1443 SLJIT_ASSERT(pad_size < 8UL);
1444
1445 /* allocate target buffer */
1446 code = (sljit_u16*)allocate_executable_memory(ins_size + pad_size + pool_size, options, exec_allocator_data, &executable_offset);
1447 PTR_FAIL_WITH_EXEC_IF(code);
1448 code_ptr = code;
1449
1450 /* TODO(carenas): pool is optional, and the ABI recommends it to
1451 * be created before the function code, instead of
1452 * globally; if generated code is too big could
1453 * need offsets bigger than 32bit words and asser()
1454 */
1455 pool = (sljit_uw *)((sljit_uw)code + ins_size + pad_size);
1456 pool_ptr = pool;
1457 buf = compiler->buf;
1458 half_count = 0;
1459
1460 label = compiler->labels;
1461 jump = compiler->jumps;
1462 const_ = compiler->consts;
1463 SLJIT_NEXT_INIT_TYPES();
1464 SLJIT_GET_NEXT_MIN();
1465
1466 do {
1467 buf_ptr = (sljit_ins*)buf->memory;
1468 buf_end = buf_ptr + (buf->used_size >> 3);
1469 do {
1470 ins = *buf_ptr++;
1471
1472 if (next_min_addr == half_count) {
1473 SLJIT_ASSERT(!label || label->size >= half_count);
1474 SLJIT_ASSERT(!jump || jump->addr >= half_count);
1475 SLJIT_ASSERT(!const_ || const_->addr >= half_count);
1476
1477 if (next_min_addr == next_label_size) {
1478 label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1479 label = label->next;
1480 next_label_size = SLJIT_GET_NEXT_SIZE(label);
1481 }
1482
1483 if (next_min_addr == next_jump_addr) {
1484 if (SLJIT_UNLIKELY(jump->flags & JUMP_MOV_ADDR)) {
1485 source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1486
1487 jump->addr = (sljit_uw)pool_ptr;
1488
1489 /* store target into pool */
1490 offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source;
1491 pool_ptr++;
1492
1493 SLJIT_ASSERT(!(offset & 1));
1494 offset >>= 1;
1495 SLJIT_ASSERT(is_s32(offset));
1496 ins |= (sljit_ins)offset & 0xffffffff;
1497 } else if (jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR)) {
1498 sljit_ins arg;
1499
1500 jump->addr = (sljit_uw)pool_ptr;
1501
1502 /* load address into tmp1 */
1503 source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1504 offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source;
1505
1506 SLJIT_ASSERT(!(offset & 1));
1507 offset >>= 1;
1508 SLJIT_ASSERT(is_s32(offset));
1509
1510 code_ptr[0] = (sljit_u16)(0xc408 | R4A(tmp1) /* lgrl */);
1511 code_ptr[1] = (sljit_u16)(offset >> 16);
1512 code_ptr[2] = (sljit_u16)offset;
1513 code_ptr += 3;
1514 pool_ptr++;
1515
1516 /* branch to tmp1 */
1517 arg = (ins >> 36) & 0xf;
1518 if (((ins >> 32) & 0xf) == 4) {
1519 /* brcl -> bcr */
1520 ins = bcr(arg, tmp1);
1521 } else {
1522 SLJIT_ASSERT(((ins >> 32) & 0xf) == 5);
1523 /* brasl -> basr */
1524 ins = basr(arg, tmp1);
1525 }
1526
1527 /* Adjust half_count. */
1528 half_count += 2;
1529 } else
1530 jump->addr = (sljit_uw)code_ptr;
1531
1532 jump = jump->next;
1533 next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
1534 } else if (next_min_addr == next_const_addr) {
1535 /* update instruction with relative address of constant */
1536 source = (sljit_sw)code_ptr;
1537 offset = (sljit_sw)pool_ptr - source;
1538
1539 SLJIT_ASSERT(!(offset & 0x1));
1540 offset >>= 1; /* halfword (not byte) offset */
1541 SLJIT_ASSERT(is_s32(offset));
1542
1543 ins |= (sljit_ins)offset & 0xffffffff;
1544
1545 /* update address */
1546 const_->addr = (sljit_uw)pool_ptr;
1547
1548 /* store initial value into pool and update pool address */
1549 *(pool_ptr++) = (sljit_uw)(((struct sljit_s390x_const*)const_)->init_value);
1550
1551 /* move to next constant */
1552 const_ = const_->next;
1553 next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);
1554 }
1555
1556 SLJIT_GET_NEXT_MIN();
1557 }
1558
1559 if (ins & 0xffff00000000L) {
1560 *code_ptr++ = (sljit_u16)(ins >> 32);
1561 half_count++;
1562 }
1563
1564 if (ins & 0xffffffff0000L) {
1565 *code_ptr++ = (sljit_u16)(ins >> 16);
1566 half_count++;
1567 }
1568
1569 *code_ptr++ = (sljit_u16)ins;
1570 half_count++;
1571 } while (buf_ptr < buf_end);
1572
1573 buf = buf->next;
1574 } while (buf);
1575
1576 if (next_label_size == half_count) {
1577 label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1578 label = label->next;
1579 }
1580
1581 SLJIT_ASSERT(!label);
1582 SLJIT_ASSERT(!jump);
1583 SLJIT_ASSERT(!const_);
1584 SLJIT_ASSERT(code + (ins_size >> 1) == code_ptr);
1585 SLJIT_ASSERT((sljit_u8 *)pool + pool_size == (sljit_u8 *)pool_ptr);
1586
1587 jump = compiler->jumps;
1588 while (jump != NULL) {
1589 offset = (sljit_sw)((jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr);
1590
1591 if (jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR | JUMP_MOV_ADDR)) {
1592 /* Store jump target into pool. */
1593 *(sljit_uw*)(jump->addr) = (sljit_uw)offset;
1594 } else {
1595 code_ptr = (sljit_u16*)jump->addr;
1596 offset -= (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1597
1598 /* offset must be halfword aligned */
1599 SLJIT_ASSERT(!(offset & 1));
1600 offset >>= 1;
1601 SLJIT_ASSERT(is_s32(offset)); /* TODO(mundaym): handle arbitrary offsets */
1602
1603 code_ptr[1] = (sljit_u16)(offset >> 16);
1604 code_ptr[2] = (sljit_u16)offset;
1605 }
1606 jump = jump->next;
1607 }
1608
1609 compiler->error = SLJIT_ERR_COMPILED;
1610 compiler->executable_offset = executable_offset;
1611 compiler->executable_size = ins_size;
1612 if (pool_size)
1613 compiler->executable_size += (pad_size + pool_size);
1614
1615 code = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
1616 code_ptr = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1617 SLJIT_CACHE_FLUSH(code, code_ptr);
1618 SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
1619 return code;
1620 }
1621
sljit_has_cpu_feature(sljit_s32 feature_type)1622 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
1623 {
1624 /* TODO(mundaym): implement all */
1625 switch (feature_type) {
1626 case SLJIT_HAS_FPU:
1627 #ifdef SLJIT_IS_FPU_AVAILABLE
1628 return (SLJIT_IS_FPU_AVAILABLE) != 0;
1629 #else
1630 return 1;
1631 #endif /* SLJIT_IS_FPU_AVAILABLE */
1632
1633 case SLJIT_HAS_CLZ:
1634 case SLJIT_HAS_REV:
1635 case SLJIT_HAS_ROT:
1636 case SLJIT_HAS_PREFETCH:
1637 case SLJIT_HAS_COPY_F32:
1638 case SLJIT_HAS_COPY_F64:
1639 case SLJIT_HAS_SIMD:
1640 case SLJIT_HAS_ATOMIC:
1641 return 1;
1642
1643 case SLJIT_HAS_CTZ:
1644 return 2;
1645
1646 case SLJIT_HAS_CMOV:
1647 return have_lscond1() ? 1 : 0;
1648 }
1649 return 0;
1650 }
1651
sljit_cmp_info(sljit_s32 type)1652 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
1653 {
1654 SLJIT_UNUSED_ARG(type);
1655 return 0;
1656 }
1657
1658 /* --------------------------------------------------------------------- */
1659 /* Entry, exit */
1660 /* --------------------------------------------------------------------- */
1661
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1662 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
1663 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1664 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1665 {
1666 sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
1667 sljit_s32 offset, i, tmp;
1668
1669 CHECK_ERROR();
1670 CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1671 set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1672
1673 /* Saved registers are stored in callee allocated save area. */
1674 SLJIT_ASSERT(gpr(SLJIT_FIRST_SAVED_REG) == r6 && gpr(SLJIT_S0) == r13);
1675
1676 offset = 2 * SSIZE_OF(sw);
1677 if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) {
1678 if (saved_arg_count == 0) {
1679 FAIL_IF(push_inst(compiler, stmg(r6, r14, offset, r15)));
1680 offset += 9 * SSIZE_OF(sw);
1681 } else {
1682 FAIL_IF(push_inst(compiler, stmg(r6, r13 - (sljit_gpr)saved_arg_count, offset, r15)));
1683 offset += (8 - saved_arg_count) * SSIZE_OF(sw);
1684 }
1685 } else {
1686 if (scratches == SLJIT_FIRST_SAVED_REG) {
1687 FAIL_IF(push_inst(compiler, stg(r6, offset, 0, r15)));
1688 offset += SSIZE_OF(sw);
1689 } else if (scratches > SLJIT_FIRST_SAVED_REG) {
1690 FAIL_IF(push_inst(compiler, stmg(r6, r6 + (sljit_gpr)(scratches - SLJIT_FIRST_SAVED_REG), offset, r15)));
1691 offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw);
1692 }
1693
1694 if (saved_arg_count == 0) {
1695 if (saveds == 0) {
1696 FAIL_IF(push_inst(compiler, stg(r14, offset, 0, r15)));
1697 offset += SSIZE_OF(sw);
1698 } else {
1699 FAIL_IF(push_inst(compiler, stmg(r14 - (sljit_gpr)saveds, r14, offset, r15)));
1700 offset += (saveds + 1) * SSIZE_OF(sw);
1701 }
1702 } else if (saveds > saved_arg_count) {
1703 if (saveds == saved_arg_count + 1) {
1704 FAIL_IF(push_inst(compiler, stg(r14 - (sljit_gpr)saveds, offset, 0, r15)));
1705 offset += SSIZE_OF(sw);
1706 } else {
1707 FAIL_IF(push_inst(compiler, stmg(r14 - (sljit_gpr)saveds, r13 - (sljit_gpr)saved_arg_count, offset, r15)));
1708 offset += (saveds - saved_arg_count) * SSIZE_OF(sw);
1709 }
1710 }
1711 }
1712
1713 if (saved_arg_count > 0) {
1714 FAIL_IF(push_inst(compiler, stg(r14, offset, 0, r15)));
1715 offset += SSIZE_OF(sw);
1716 }
1717
1718 tmp = SLJIT_FS0 - fsaveds;
1719 for (i = SLJIT_FS0; i > tmp; i--) {
1720 FAIL_IF(push_inst(compiler, 0x60000000 /* std */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1721 offset += SSIZE_OF(sw);
1722 }
1723
1724 for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
1725 FAIL_IF(push_inst(compiler, 0x60000000 /* std */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1726 offset += SSIZE_OF(sw);
1727 }
1728
1729 local_size = (local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + 0xf) & ~0xf;
1730 compiler->local_size = local_size;
1731
1732 if (is_s20(-local_size))
1733 FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(-local_size)));
1734 else
1735 FAIL_IF(push_inst(compiler, 0xc20400000000 /* slgfi */ | R36A(r15) | (sljit_ins)local_size));
1736
1737 if (options & SLJIT_ENTER_REG_ARG)
1738 return SLJIT_SUCCESS;
1739
1740 arg_types >>= SLJIT_ARG_SHIFT;
1741 saved_arg_count = 0;
1742 tmp = 0;
1743 while (arg_types > 0) {
1744 if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
1745 if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
1746 FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S0 - saved_arg_count), gpr(SLJIT_R0 + tmp))));
1747 saved_arg_count++;
1748 }
1749 tmp++;
1750 }
1751
1752 arg_types >>= SLJIT_ARG_SHIFT;
1753 }
1754
1755 return SLJIT_SUCCESS;
1756 }
1757
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1758 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
1759 sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1760 sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1761 {
1762 CHECK_ERROR();
1763 CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1764 set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1765
1766 compiler->local_size = (local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + 0xf) & ~0xf;
1767 return SLJIT_SUCCESS;
1768 }
1769
emit_stack_frame_release(struct sljit_compiler * compiler,sljit_gpr last_reg)1770 static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_gpr last_reg)
1771 {
1772 sljit_s32 offset, i, tmp;
1773 sljit_s32 local_size = compiler->local_size;
1774 sljit_s32 saveds = compiler->saveds;
1775 sljit_s32 scratches = compiler->scratches;
1776 sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
1777
1778 if (is_u12(local_size))
1779 FAIL_IF(push_inst(compiler, 0x41000000 /* ly */ | R20A(r15) | R12A(r15) | (sljit_ins)local_size));
1780 else if (is_s20(local_size))
1781 FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(local_size)));
1782 else
1783 FAIL_IF(push_inst(compiler, 0xc20a00000000 /* algfi */ | R36A(r15) | (sljit_ins)local_size));
1784
1785 offset = 2 * SSIZE_OF(sw);
1786 if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) {
1787 if (kept_saveds_count == 0) {
1788 FAIL_IF(push_inst(compiler, lmg(r6, last_reg, offset, r15)));
1789 offset += 9 * SSIZE_OF(sw);
1790 } else {
1791 FAIL_IF(push_inst(compiler, lmg(r6, r13 - (sljit_gpr)kept_saveds_count, offset, r15)));
1792 offset += (8 - kept_saveds_count) * SSIZE_OF(sw);
1793 }
1794 } else {
1795 if (scratches == SLJIT_FIRST_SAVED_REG) {
1796 FAIL_IF(push_inst(compiler, lg(r6, offset, 0, r15)));
1797 offset += SSIZE_OF(sw);
1798 } else if (scratches > SLJIT_FIRST_SAVED_REG) {
1799 FAIL_IF(push_inst(compiler, lmg(r6, r6 + (sljit_gpr)(scratches - SLJIT_FIRST_SAVED_REG), offset, r15)));
1800 offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw);
1801 }
1802
1803 if (kept_saveds_count == 0) {
1804 if (saveds == 0) {
1805 if (last_reg == r14)
1806 FAIL_IF(push_inst(compiler, lg(r14, offset, 0, r15)));
1807 offset += SSIZE_OF(sw);
1808 } else if (saveds == 1 && last_reg == r13) {
1809 FAIL_IF(push_inst(compiler, lg(r13, offset, 0, r15)));
1810 offset += 2 * SSIZE_OF(sw);
1811 } else {
1812 FAIL_IF(push_inst(compiler, lmg(r14 - (sljit_gpr)saveds, last_reg, offset, r15)));
1813 offset += (saveds + 1) * SSIZE_OF(sw);
1814 }
1815 } else if (saveds > kept_saveds_count) {
1816 if (saveds == kept_saveds_count + 1) {
1817 FAIL_IF(push_inst(compiler, lg(r14 - (sljit_gpr)saveds, offset, 0, r15)));
1818 offset += SSIZE_OF(sw);
1819 } else {
1820 FAIL_IF(push_inst(compiler, lmg(r14 - (sljit_gpr)saveds, r13 - (sljit_gpr)kept_saveds_count, offset, r15)));
1821 offset += (saveds - kept_saveds_count) * SSIZE_OF(sw);
1822 }
1823 }
1824 }
1825
1826 if (kept_saveds_count > 0) {
1827 if (last_reg == r14)
1828 FAIL_IF(push_inst(compiler, lg(r14, offset, 0, r15)));
1829 offset += SSIZE_OF(sw);
1830 }
1831
1832 tmp = SLJIT_FS0 - compiler->fsaveds;
1833 for (i = SLJIT_FS0; i > tmp; i--) {
1834 FAIL_IF(push_inst(compiler, 0x68000000 /* ld */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1835 offset += SSIZE_OF(sw);
1836 }
1837
1838 for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
1839 FAIL_IF(push_inst(compiler, 0x68000000 /* ld */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1840 offset += SSIZE_OF(sw);
1841 }
1842
1843 return SLJIT_SUCCESS;
1844 }
1845
sljit_emit_return_void(struct sljit_compiler * compiler)1846 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
1847 {
1848 CHECK_ERROR();
1849 CHECK(check_sljit_emit_return_void(compiler));
1850
1851 FAIL_IF(emit_stack_frame_release(compiler, r14));
1852 return push_inst(compiler, br(r14)); /* return */
1853 }
1854
sljit_emit_return_to(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)1855 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
1856 sljit_s32 src, sljit_sw srcw)
1857 {
1858 CHECK_ERROR();
1859 CHECK(check_sljit_emit_return_to(compiler, src, srcw));
1860
1861 if (src & SLJIT_MEM) {
1862 ADJUST_LOCAL_OFFSET(src, srcw);
1863 FAIL_IF(load_word(compiler, tmp1, src, srcw, 0 /* 64-bit */));
1864 src = TMP_REG2;
1865 srcw = 0;
1866 } else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
1867 FAIL_IF(push_inst(compiler, lgr(tmp1, gpr(src))));
1868 src = TMP_REG2;
1869 srcw = 0;
1870 }
1871
1872 FAIL_IF(emit_stack_frame_release(compiler, r13));
1873
1874 SLJIT_SKIP_CHECKS(compiler);
1875 return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1876 }
1877
1878 /* --------------------------------------------------------------------- */
1879 /* Operators */
1880 /* --------------------------------------------------------------------- */
1881
sljit_emit_op0(struct sljit_compiler * compiler,sljit_s32 op)1882 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
1883 {
1884 sljit_gpr arg0 = gpr(SLJIT_R0);
1885 sljit_gpr arg1 = gpr(SLJIT_R1);
1886
1887 CHECK_ERROR();
1888 CHECK(check_sljit_emit_op0(compiler, op));
1889
1890 op = GET_OPCODE(op) | (op & SLJIT_32);
1891 switch (op) {
1892 case SLJIT_BREAKPOINT:
1893 /* The following invalid instruction is emitted by gdb. */
1894 return push_inst(compiler, 0x0001 /* 2-byte trap */);
1895 case SLJIT_NOP:
1896 return push_inst(compiler, 0x0700 /* 2-byte nop */);
1897 case SLJIT_LMUL_UW:
1898 FAIL_IF(push_inst(compiler, mlgr(arg0, arg0)));
1899 break;
1900 case SLJIT_LMUL_SW:
1901 /* signed multiplication from: */
1902 /* Hacker's Delight, Second Edition: Chapter 8-3. */
1903 FAIL_IF(push_inst(compiler, srag(tmp0, arg0, 63, 0)));
1904 FAIL_IF(push_inst(compiler, srag(tmp1, arg1, 63, 0)));
1905 FAIL_IF(push_inst(compiler, ngr(tmp0, arg1)));
1906 FAIL_IF(push_inst(compiler, ngr(tmp1, arg0)));
1907
1908 /* unsigned multiplication */
1909 FAIL_IF(push_inst(compiler, mlgr(arg0, arg0)));
1910
1911 FAIL_IF(push_inst(compiler, sgr(arg0, tmp0)));
1912 FAIL_IF(push_inst(compiler, sgr(arg0, tmp1)));
1913 break;
1914 case SLJIT_DIV_U32:
1915 case SLJIT_DIVMOD_U32:
1916 FAIL_IF(push_inst(compiler, lhi(tmp0, 0)));
1917 FAIL_IF(push_inst(compiler, lr(tmp1, arg0)));
1918 FAIL_IF(push_inst(compiler, dlr(tmp0, arg1)));
1919 FAIL_IF(push_inst(compiler, lr(arg0, tmp1))); /* quotient */
1920 if (op == SLJIT_DIVMOD_U32)
1921 return push_inst(compiler, lr(arg1, tmp0)); /* remainder */
1922
1923 return SLJIT_SUCCESS;
1924 case SLJIT_DIV_S32:
1925 case SLJIT_DIVMOD_S32:
1926 FAIL_IF(push_inst(compiler, lhi(tmp0, 0)));
1927 FAIL_IF(push_inst(compiler, lr(tmp1, arg0)));
1928 FAIL_IF(push_inst(compiler, dr(tmp0, arg1)));
1929 FAIL_IF(push_inst(compiler, lr(arg0, tmp1))); /* quotient */
1930 if (op == SLJIT_DIVMOD_S32)
1931 return push_inst(compiler, lr(arg1, tmp0)); /* remainder */
1932
1933 return SLJIT_SUCCESS;
1934 case SLJIT_DIV_UW:
1935 case SLJIT_DIVMOD_UW:
1936 FAIL_IF(push_inst(compiler, lghi(tmp0, 0)));
1937 FAIL_IF(push_inst(compiler, lgr(tmp1, arg0)));
1938 FAIL_IF(push_inst(compiler, dlgr(tmp0, arg1)));
1939 FAIL_IF(push_inst(compiler, lgr(arg0, tmp1))); /* quotient */
1940 if (op == SLJIT_DIVMOD_UW)
1941 return push_inst(compiler, lgr(arg1, tmp0)); /* remainder */
1942
1943 return SLJIT_SUCCESS;
1944 case SLJIT_DIV_SW:
1945 case SLJIT_DIVMOD_SW:
1946 FAIL_IF(push_inst(compiler, lgr(tmp1, arg0)));
1947 FAIL_IF(push_inst(compiler, dsgr(tmp0, arg1)));
1948 FAIL_IF(push_inst(compiler, lgr(arg0, tmp1))); /* quotient */
1949 if (op == SLJIT_DIVMOD_SW)
1950 return push_inst(compiler, lgr(arg1, tmp0)); /* remainder */
1951
1952 return SLJIT_SUCCESS;
1953 case SLJIT_ENDBR:
1954 return SLJIT_SUCCESS;
1955 case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
1956 return SLJIT_SUCCESS;
1957 default:
1958 SLJIT_UNREACHABLE();
1959 }
1960 /* swap result registers */
1961 FAIL_IF(push_inst(compiler, lgr(tmp0, arg0)));
1962 FAIL_IF(push_inst(compiler, lgr(arg0, arg1)));
1963 return push_inst(compiler, lgr(arg1, tmp0));
1964 }
1965
sljit_emit_clz_ctz(struct sljit_compiler * compiler,sljit_s32 op,sljit_gpr dst_r,sljit_gpr src_r)1966 static sljit_s32 sljit_emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 op, sljit_gpr dst_r, sljit_gpr src_r)
1967 {
1968 sljit_s32 is_ctz = (GET_OPCODE(op) == SLJIT_CTZ);
1969
1970 if ((op & SLJIT_32) && src_r != tmp0) {
1971 FAIL_IF(push_inst(compiler, 0xb9160000 /* llgfr */ | R4A(tmp0) | R0A(src_r)));
1972 src_r = tmp0;
1973 }
1974
1975 if (is_ctz) {
1976 FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */) | R4A(tmp1) | R0A(src_r)));
1977
1978 if (src_r == tmp0)
1979 FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? 0x1400 /* nr */ : 0xb9800000 /* ngr */) | R4A(tmp0) | R0A(tmp1)));
1980 else
1981 FAIL_IF(push_inst(compiler, 0xb9e40000 /* ngrk */ | R12A(tmp1) | R4A(tmp0) | R0A(src_r)));
1982
1983 src_r = tmp0;
1984 }
1985
1986 FAIL_IF(push_inst(compiler, 0xb9830000 /* flogr */ | R4A(tmp0) | R0A(src_r)));
1987
1988 if (is_ctz)
1989 FAIL_IF(push_inst(compiler, 0xec00000000d9 /* aghik */ | R36A(tmp1) | R32A(tmp0) | ((sljit_ins)(-64 & 0xffff) << 16)));
1990
1991 if (op & SLJIT_32) {
1992 if (!is_ctz && dst_r != tmp0)
1993 return push_inst(compiler, 0xec00000000d9 /* aghik */ | R36A(dst_r) | R32A(tmp0) | ((sljit_ins)(-32 & 0xffff) << 16));
1994
1995 FAIL_IF(push_inst(compiler, 0xc20800000000 /* agfi */ | R36A(tmp0) | (sljit_u32)-32));
1996 }
1997
1998 if (is_ctz)
1999 FAIL_IF(push_inst(compiler, 0xec0000000057 /* rxsbg */ | R36A(tmp0) | R32A(tmp1) | ((sljit_ins)((op & SLJIT_32) ? 59 : 58) << 24) | (63 << 16) | ((sljit_ins)((op & SLJIT_32) ? 5 : 6) << 8)));
2000
2001 if (dst_r == tmp0)
2002 return SLJIT_SUCCESS;
2003
2004 return push_inst(compiler, ((op & SLJIT_32) ? 0x1800 /* lr */ : 0xb9040000 /* lgr */) | R4A(dst_r) | R0A(tmp0));
2005 }
2006
sljit_emit_rev(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2007 static sljit_s32 sljit_emit_rev(struct sljit_compiler *compiler, sljit_s32 op,
2008 sljit_s32 dst, sljit_sw dstw,
2009 sljit_s32 src, sljit_sw srcw)
2010 {
2011 struct addr addr;
2012 sljit_gpr reg;
2013 sljit_ins ins;
2014 sljit_s32 opcode = GET_OPCODE(op);
2015 sljit_s32 is_16bit = (opcode == SLJIT_REV_U16 || opcode == SLJIT_REV_S16);
2016
2017 if (dst & SLJIT_MEM) {
2018 if (src & SLJIT_MEM) {
2019 FAIL_IF(load_store_op(compiler, tmp0, src, srcw, op & SLJIT_32, is_16bit ? load_halfword_forms : load_forms));
2020 reg = tmp0;
2021 } else
2022 reg = gpr(src);
2023
2024 FAIL_IF(make_addr_bxy(compiler, &addr, dst, dstw, tmp1));
2025
2026 if (is_16bit)
2027 ins = 0xe3000000003f /* strvh */;
2028 else
2029 ins = (op & SLJIT_32) ? 0xe3000000003e /* strv */ : 0xe3000000002f /* strvg */;
2030
2031 return push_inst(compiler, ins | R36A(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
2032 }
2033
2034 reg = gpr(dst);
2035
2036 if (src & SLJIT_MEM) {
2037 FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
2038
2039 if (is_16bit)
2040 ins = 0xe3000000001f /* lrvh */;
2041 else
2042 ins = (op & SLJIT_32) ? 0xe3000000001e /* lrv */ : 0xe3000000000f /* lrvg */;
2043
2044 FAIL_IF(push_inst(compiler, ins | R36A(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset)));
2045
2046 if (opcode == SLJIT_REV)
2047 return SLJIT_SUCCESS;
2048
2049 if (is_16bit) {
2050 if (op & SLJIT_32)
2051 ins = (opcode == SLJIT_REV_U16) ? 0xb9950000 /* llhr */ : 0xb9270000 /* lhr */;
2052 else
2053 ins = (opcode == SLJIT_REV_U16) ? 0xb9850000 /* llghr */ : 0xb9070000 /* lghr */;
2054 } else
2055 ins = (opcode == SLJIT_REV_U32) ? 0xb9160000 /* llgfr */ : 0xb9140000 /* lgfr */;
2056
2057 return push_inst(compiler, ins | R4A(reg) | R0A(reg));
2058 }
2059
2060 ins = (op & SLJIT_32) ? 0xb91f0000 /* lrvr */ : 0xb90f0000 /* lrvgr */;
2061 FAIL_IF(push_inst(compiler, ins | R4A(reg) | R0A(gpr(src))));
2062
2063 if (opcode == SLJIT_REV)
2064 return SLJIT_SUCCESS;
2065
2066 if (!is_16bit) {
2067 ins = (opcode == SLJIT_REV_U32) ? 0xb9160000 /* llgfr */ : 0xb9140000 /* lgfr */;
2068 return push_inst(compiler, ins | R4A(reg) | R0A(reg));
2069 }
2070
2071 if (op & SLJIT_32) {
2072 ins = (opcode == SLJIT_REV_U16) ? 0x88000000 /* srl */ : 0x8a000000 /* sra */;
2073 return push_inst(compiler, ins | R20A(reg) | 16);
2074 }
2075
2076 ins = (opcode == SLJIT_REV_U16) ? 0xeb000000000c /* srlg */ : 0xeb000000000a /* srag */;
2077 return push_inst(compiler, ins | R36A(reg) | R32A(reg) | (48 << 16));
2078 }
2079
2080 /* LEVAL will be defined later with different parameters as needed */
2081 #define WHEN2(cond, i1, i2) (cond) ? LEVAL(i1) : LEVAL(i2)
2082
sljit_emit_op1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2083 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
2084 sljit_s32 dst, sljit_sw dstw,
2085 sljit_s32 src, sljit_sw srcw)
2086 {
2087 sljit_ins ins;
2088 struct addr mem;
2089 sljit_gpr dst_r;
2090 sljit_gpr src_r;
2091 sljit_s32 opcode = GET_OPCODE(op);
2092
2093 CHECK_ERROR();
2094 CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
2095 ADJUST_LOCAL_OFFSET(dst, dstw);
2096 ADJUST_LOCAL_OFFSET(src, srcw);
2097
2098 if (opcode >= SLJIT_MOV && opcode <= SLJIT_MOV_P) {
2099 /* LOAD REGISTER */
2100 if (FAST_IS_REG(dst) && FAST_IS_REG(src)) {
2101 dst_r = gpr(dst);
2102 src_r = gpr(src);
2103 switch (opcode | (op & SLJIT_32)) {
2104 /* 32-bit */
2105 case SLJIT_MOV32_U8:
2106 ins = llcr(dst_r, src_r);
2107 break;
2108 case SLJIT_MOV32_S8:
2109 ins = lbr(dst_r, src_r);
2110 break;
2111 case SLJIT_MOV32_U16:
2112 ins = llhr(dst_r, src_r);
2113 break;
2114 case SLJIT_MOV32_S16:
2115 ins = lhr(dst_r, src_r);
2116 break;
2117 case SLJIT_MOV32:
2118 if (dst_r == src_r)
2119 return SLJIT_SUCCESS;
2120 ins = lr(dst_r, src_r);
2121 break;
2122 /* 64-bit */
2123 case SLJIT_MOV_U8:
2124 ins = llgcr(dst_r, src_r);
2125 break;
2126 case SLJIT_MOV_S8:
2127 ins = lgbr(dst_r, src_r);
2128 break;
2129 case SLJIT_MOV_U16:
2130 ins = llghr(dst_r, src_r);
2131 break;
2132 case SLJIT_MOV_S16:
2133 ins = lghr(dst_r, src_r);
2134 break;
2135 case SLJIT_MOV_U32:
2136 ins = llgfr(dst_r, src_r);
2137 break;
2138 case SLJIT_MOV_S32:
2139 ins = lgfr(dst_r, src_r);
2140 break;
2141 case SLJIT_MOV:
2142 case SLJIT_MOV_P:
2143 if (dst_r == src_r)
2144 return SLJIT_SUCCESS;
2145 ins = lgr(dst_r, src_r);
2146 break;
2147 default:
2148 ins = 0;
2149 SLJIT_UNREACHABLE();
2150 break;
2151 }
2152 FAIL_IF(push_inst(compiler, ins));
2153 return SLJIT_SUCCESS;
2154 }
2155 /* LOAD IMMEDIATE */
2156 if (FAST_IS_REG(dst) && src == SLJIT_IMM) {
2157 switch (opcode) {
2158 case SLJIT_MOV_U8:
2159 srcw = (sljit_sw)((sljit_u8)(srcw));
2160 break;
2161 case SLJIT_MOV_S8:
2162 srcw = (sljit_sw)((sljit_s8)(srcw));
2163 break;
2164 case SLJIT_MOV_U16:
2165 srcw = (sljit_sw)((sljit_u16)(srcw));
2166 break;
2167 case SLJIT_MOV_S16:
2168 srcw = (sljit_sw)((sljit_s16)(srcw));
2169 break;
2170 case SLJIT_MOV_U32:
2171 srcw = (sljit_sw)((sljit_u32)(srcw));
2172 break;
2173 case SLJIT_MOV_S32:
2174 case SLJIT_MOV32:
2175 srcw = (sljit_sw)((sljit_s32)(srcw));
2176 break;
2177 }
2178 return push_load_imm_inst(compiler, gpr(dst), srcw);
2179 }
2180 /* LOAD */
2181 /* TODO(carenas): avoid reg being defined later */
2182 #define LEVAL(i) EVAL(i, reg, mem)
2183 if (FAST_IS_REG(dst) && (src & SLJIT_MEM)) {
2184 sljit_gpr reg = gpr(dst);
2185
2186 FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1));
2187 /* TODO(carenas): convert all calls below to LEVAL */
2188 switch (opcode | (op & SLJIT_32)) {
2189 case SLJIT_MOV32_U8:
2190 ins = llc(reg, mem.offset, mem.index, mem.base);
2191 break;
2192 case SLJIT_MOV32_S8:
2193 ins = lb(reg, mem.offset, mem.index, mem.base);
2194 break;
2195 case SLJIT_MOV32_U16:
2196 ins = llh(reg, mem.offset, mem.index, mem.base);
2197 break;
2198 case SLJIT_MOV32_S16:
2199 ins = WHEN2(is_u12(mem.offset), lh, lhy);
2200 break;
2201 case SLJIT_MOV32:
2202 ins = WHEN2(is_u12(mem.offset), l, ly);
2203 break;
2204 case SLJIT_MOV_U8:
2205 ins = LEVAL(llgc);
2206 break;
2207 case SLJIT_MOV_S8:
2208 ins = lgb(reg, mem.offset, mem.index, mem.base);
2209 break;
2210 case SLJIT_MOV_U16:
2211 ins = LEVAL(llgh);
2212 break;
2213 case SLJIT_MOV_S16:
2214 ins = lgh(reg, mem.offset, mem.index, mem.base);
2215 break;
2216 case SLJIT_MOV_U32:
2217 ins = LEVAL(llgf);
2218 break;
2219 case SLJIT_MOV_S32:
2220 ins = lgf(reg, mem.offset, mem.index, mem.base);
2221 break;
2222 case SLJIT_MOV_P:
2223 case SLJIT_MOV:
2224 ins = lg(reg, mem.offset, mem.index, mem.base);
2225 break;
2226 default:
2227 ins = 0;
2228 SLJIT_UNREACHABLE();
2229 break;
2230 }
2231 FAIL_IF(push_inst(compiler, ins));
2232 return SLJIT_SUCCESS;
2233 }
2234 /* STORE and STORE IMMEDIATE */
2235 if ((dst & SLJIT_MEM) && (FAST_IS_REG(src) || src == SLJIT_IMM)) {
2236 struct addr mem;
2237 sljit_gpr reg = FAST_IS_REG(src) ? gpr(src) : tmp0;
2238
2239 if (src == SLJIT_IMM) {
2240 /* TODO(mundaym): MOVE IMMEDIATE? */
2241 FAIL_IF(push_load_imm_inst(compiler, reg, srcw));
2242 }
2243 FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2244 switch (opcode) {
2245 case SLJIT_MOV_U8:
2246 case SLJIT_MOV_S8:
2247 return push_inst(compiler,
2248 WHEN2(is_u12(mem.offset), stc, stcy));
2249 case SLJIT_MOV_U16:
2250 case SLJIT_MOV_S16:
2251 return push_inst(compiler,
2252 WHEN2(is_u12(mem.offset), sth, sthy));
2253 case SLJIT_MOV_U32:
2254 case SLJIT_MOV_S32:
2255 case SLJIT_MOV32:
2256 return push_inst(compiler,
2257 WHEN2(is_u12(mem.offset), st, sty));
2258 case SLJIT_MOV_P:
2259 case SLJIT_MOV:
2260 FAIL_IF(push_inst(compiler, LEVAL(stg)));
2261 return SLJIT_SUCCESS;
2262 default:
2263 SLJIT_UNREACHABLE();
2264 }
2265 }
2266 #undef LEVAL
2267 /* MOVE CHARACTERS */
2268 if ((dst & SLJIT_MEM) && (src & SLJIT_MEM)) {
2269 struct addr mem;
2270 FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1));
2271 switch (opcode) {
2272 case SLJIT_MOV_U8:
2273 case SLJIT_MOV_S8:
2274 FAIL_IF(push_inst(compiler,
2275 EVAL(llgc, tmp0, mem)));
2276 FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2277 return push_inst(compiler,
2278 EVAL(stcy, tmp0, mem));
2279 case SLJIT_MOV_U16:
2280 case SLJIT_MOV_S16:
2281 FAIL_IF(push_inst(compiler,
2282 EVAL(llgh, tmp0, mem)));
2283 FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2284 return push_inst(compiler,
2285 EVAL(sthy, tmp0, mem));
2286 case SLJIT_MOV_U32:
2287 case SLJIT_MOV_S32:
2288 case SLJIT_MOV32:
2289 FAIL_IF(push_inst(compiler,
2290 EVAL(ly, tmp0, mem)));
2291 FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2292 return push_inst(compiler,
2293 EVAL(sty, tmp0, mem));
2294 case SLJIT_MOV_P:
2295 case SLJIT_MOV:
2296 FAIL_IF(push_inst(compiler,
2297 EVAL(lg, tmp0, mem)));
2298 FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2299 FAIL_IF(push_inst(compiler,
2300 EVAL(stg, tmp0, mem)));
2301 return SLJIT_SUCCESS;
2302 default:
2303 SLJIT_UNREACHABLE();
2304 }
2305 }
2306 SLJIT_UNREACHABLE();
2307 }
2308
2309 SLJIT_ASSERT(src != SLJIT_IMM);
2310
2311 dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
2312 src_r = FAST_IS_REG(src) ? gpr(src) : tmp0;
2313
2314 compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z);
2315
2316 /* TODO(mundaym): optimize loads and stores */
2317 switch (opcode) {
2318 case SLJIT_CLZ:
2319 case SLJIT_CTZ:
2320 if (src & SLJIT_MEM)
2321 FAIL_IF(load_unsigned_word(compiler, src_r, src, srcw, op & SLJIT_32));
2322
2323 FAIL_IF(sljit_emit_clz_ctz(compiler, op, dst_r, src_r));
2324 break;
2325 case SLJIT_REV_U32:
2326 case SLJIT_REV_S32:
2327 op |= SLJIT_32;
2328 /* fallthrough */
2329 case SLJIT_REV:
2330 case SLJIT_REV_U16:
2331 case SLJIT_REV_S16:
2332 return sljit_emit_rev(compiler, op, dst, dstw, src, srcw);
2333 default:
2334 SLJIT_UNREACHABLE();
2335 }
2336
2337 if (dst & SLJIT_MEM)
2338 return store_word(compiler, dst_r, dst, dstw, op & SLJIT_32);
2339
2340 return SLJIT_SUCCESS;
2341 }
2342
is_commutative(sljit_s32 op)2343 static SLJIT_INLINE int is_commutative(sljit_s32 op)
2344 {
2345 switch (GET_OPCODE(op)) {
2346 case SLJIT_ADD:
2347 case SLJIT_ADDC:
2348 case SLJIT_MUL:
2349 case SLJIT_AND:
2350 case SLJIT_OR:
2351 case SLJIT_XOR:
2352 return 1;
2353 }
2354 return 0;
2355 }
2356
2357 static const struct ins_forms add_forms = {
2358 0x1a00, /* ar */
2359 0xb9080000, /* agr */
2360 0xb9f80000, /* ark */
2361 0xb9e80000, /* agrk */
2362 0x5a000000, /* a */
2363 0xe3000000005a, /* ay */
2364 0xe30000000008, /* ag */
2365 };
2366
2367 static const struct ins_forms logical_add_forms = {
2368 0x1e00, /* alr */
2369 0xb90a0000, /* algr */
2370 0xb9fa0000, /* alrk */
2371 0xb9ea0000, /* algrk */
2372 0x5e000000, /* al */
2373 0xe3000000005e, /* aly */
2374 0xe3000000000a, /* alg */
2375 };
2376
sljit_emit_add(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2377 static sljit_s32 sljit_emit_add(struct sljit_compiler *compiler, sljit_s32 op,
2378 sljit_s32 dst, sljit_sw dstw,
2379 sljit_s32 src1, sljit_sw src1w,
2380 sljit_s32 src2, sljit_sw src2w)
2381 {
2382 int sets_overflow = (op & VARIABLE_FLAG_MASK) == SLJIT_SET_OVERFLOW;
2383 int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW);
2384 const struct ins_forms *forms;
2385 sljit_ins ins;
2386
2387 if (src2 == SLJIT_IMM) {
2388 if (!sets_zero_overflow && is_s8(src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) {
2389 if (sets_overflow)
2390 ins = (op & SLJIT_32) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */;
2391 else
2392 ins = (op & SLJIT_32) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */;
2393 return emit_siy(compiler, ins, dst, dstw, src2w);
2394 }
2395
2396 if (is_s16(src2w)) {
2397 if (sets_overflow)
2398 ins = (op & SLJIT_32) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */;
2399 else
2400 ins = (op & SLJIT_32) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */;
2401 FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, src2w));
2402 goto done;
2403 }
2404
2405 if (!sets_overflow) {
2406 if ((op & SLJIT_32) || is_u32(src2w)) {
2407 ins = (op & SLJIT_32) ? 0xc20b00000000 /* alfi */ : 0xc20a00000000 /* algfi */;
2408 FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));
2409 goto done;
2410 }
2411 if (is_u32(-src2w)) {
2412 FAIL_IF(emit_ri(compiler, 0xc20400000000 /* slgfi */, dst, src1, src1w, -src2w, RIL_A));
2413 goto done;
2414 }
2415 }
2416 else if ((op & SLJIT_32) || is_s32(src2w)) {
2417 ins = (op & SLJIT_32) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */;
2418 FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));
2419 goto done;
2420 }
2421 }
2422
2423 forms = sets_overflow ? &add_forms : &logical_add_forms;
2424 FAIL_IF(emit_commutative(compiler, forms, dst, src1, src1w, src2, src2w));
2425
2426 done:
2427 if (sets_zero_overflow)
2428 FAIL_IF(update_zero_overflow(compiler, op, FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0));
2429
2430 if (dst & SLJIT_MEM)
2431 return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2432
2433 return SLJIT_SUCCESS;
2434 }
2435
2436 static const struct ins_forms sub_forms = {
2437 0x1b00, /* sr */
2438 0xb9090000, /* sgr */
2439 0xb9f90000, /* srk */
2440 0xb9e90000, /* sgrk */
2441 0x5b000000, /* s */
2442 0xe3000000005b, /* sy */
2443 0xe30000000009, /* sg */
2444 };
2445
2446 static const struct ins_forms logical_sub_forms = {
2447 0x1f00, /* slr */
2448 0xb90b0000, /* slgr */
2449 0xb9fb0000, /* slrk */
2450 0xb9eb0000, /* slgrk */
2451 0x5f000000, /* sl */
2452 0xe3000000005f, /* sly */
2453 0xe3000000000b, /* slg */
2454 };
2455
sljit_emit_sub(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2456 static sljit_s32 sljit_emit_sub(struct sljit_compiler *compiler, sljit_s32 op,
2457 sljit_s32 dst, sljit_sw dstw,
2458 sljit_s32 src1, sljit_sw src1w,
2459 sljit_s32 src2, sljit_sw src2w)
2460 {
2461 sljit_s32 flag_type = GET_FLAG_TYPE(op);
2462 int sets_signed = (flag_type >= SLJIT_SIG_LESS && flag_type <= SLJIT_NOT_OVERFLOW);
2463 int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW);
2464 const struct ins_forms *forms;
2465 sljit_ins ins;
2466
2467 if (dst == TMP_REG2 && flag_type <= SLJIT_SIG_LESS_EQUAL) {
2468 int compare_signed = flag_type >= SLJIT_SIG_LESS;
2469
2470 compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_COMPARE;
2471
2472 if (src2 == SLJIT_IMM) {
2473 if (compare_signed || ((op & VARIABLE_FLAG_MASK) == 0 && is_s32(src2w))) {
2474 if ((op & SLJIT_32) || is_s32(src2w)) {
2475 ins = (op & SLJIT_32) ? 0xc20d00000000 /* cfi */ : 0xc20c00000000 /* cgfi */;
2476 return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A);
2477 }
2478 }
2479 else {
2480 if ((op & SLJIT_32) || is_u32(src2w)) {
2481 ins = (op & SLJIT_32) ? 0xc20f00000000 /* clfi */ : 0xc20e00000000 /* clgfi */;
2482 return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A);
2483 }
2484 if (is_s16(src2w))
2485 return emit_rie_d(compiler, 0xec00000000db /* alghsik */, (sljit_s32)tmp0, src1, src1w, src2w);
2486 }
2487 }
2488 else if (src2 & SLJIT_MEM) {
2489 if ((op & SLJIT_32) && ((src2 & OFFS_REG_MASK) || is_u12(src2w))) {
2490 ins = compare_signed ? 0x59000000 /* c */ : 0x55000000 /* cl */;
2491 return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RX_A);
2492 }
2493
2494 if (compare_signed)
2495 ins = (op & SLJIT_32) ? 0xe30000000059 /* cy */ : 0xe30000000020 /* cg */;
2496 else
2497 ins = (op & SLJIT_32) ? 0xe30000000055 /* cly */ : 0xe30000000021 /* clg */;
2498 return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RXY_A);
2499 }
2500
2501 if (compare_signed)
2502 ins = (op & SLJIT_32) ? 0x1900 /* cr */ : 0xb9200000 /* cgr */;
2503 else
2504 ins = (op & SLJIT_32) ? 0x1500 /* clr */ : 0xb9210000 /* clgr */;
2505 return emit_rr(compiler, ins, src1, src1, src1w, src2, src2w);
2506 }
2507
2508 if (src1 == SLJIT_IMM && src1w == 0 && (flag_type == 0 || sets_signed)) {
2509 ins = (op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */;
2510 FAIL_IF(emit_rr1(compiler, ins, dst, src2, src2w));
2511 goto done;
2512 }
2513
2514 if (src2 == SLJIT_IMM) {
2515 sljit_sw neg_src2w = -src2w;
2516
2517 if (sets_signed || neg_src2w != 0 || (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == 0) {
2518 if (!sets_zero_overflow && is_s8(neg_src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) {
2519 if (sets_signed)
2520 ins = (op & SLJIT_32) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */;
2521 else
2522 ins = (op & SLJIT_32) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */;
2523 return emit_siy(compiler, ins, dst, dstw, neg_src2w);
2524 }
2525
2526 if (is_s16(neg_src2w)) {
2527 if (sets_signed)
2528 ins = (op & SLJIT_32) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */;
2529 else
2530 ins = (op & SLJIT_32) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */;
2531 FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, neg_src2w));
2532 goto done;
2533 }
2534 }
2535
2536 if (!sets_signed) {
2537 if ((op & SLJIT_32) || is_u32(src2w)) {
2538 ins = (op & SLJIT_32) ? 0xc20500000000 /* slfi */ : 0xc20400000000 /* slgfi */;
2539 FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));
2540 goto done;
2541 }
2542 if (is_u32(neg_src2w)) {
2543 FAIL_IF(emit_ri(compiler, 0xc20a00000000 /* algfi */, dst, src1, src1w, neg_src2w, RIL_A));
2544 goto done;
2545 }
2546 }
2547 else if ((op & SLJIT_32) || is_s32(neg_src2w)) {
2548 ins = (op & SLJIT_32) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */;
2549 FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, neg_src2w, RIL_A));
2550 goto done;
2551 }
2552 }
2553
2554 forms = sets_signed ? &sub_forms : &logical_sub_forms;
2555 FAIL_IF(emit_non_commutative(compiler, forms, dst, src1, src1w, src2, src2w));
2556
2557 done:
2558 if (sets_signed) {
2559 sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2560
2561 if ((op & VARIABLE_FLAG_MASK) != SLJIT_SET_OVERFLOW) {
2562 /* In case of overflow, the sign bit of the two source operands must be different, and
2563 - the first operand is greater if the sign bit of the result is set
2564 - the first operand is less if the sign bit of the result is not set
2565 The -result operation sets the corrent sign, because the result cannot be zero.
2566 The overflow is considered greater, since the result must be equal to INT_MIN so its sign bit is set. */
2567 FAIL_IF(push_inst(compiler, brc(0xe, (op & SLJIT_32) ? (2 + 1) : (2 + 2))));
2568 FAIL_IF(push_inst(compiler, (op & SLJIT_32) ? lcr(tmp1, dst_r) : lcgr(tmp1, dst_r)));
2569 }
2570 else if (op & SLJIT_SET_Z)
2571 FAIL_IF(update_zero_overflow(compiler, op, dst_r));
2572 }
2573
2574 if (dst & SLJIT_MEM)
2575 return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2576
2577 return SLJIT_SUCCESS;
2578 }
2579
2580 static const struct ins_forms multiply_forms = {
2581 0xb2520000, /* msr */
2582 0xb90c0000, /* msgr */
2583 0xb9fd0000, /* msrkc */
2584 0xb9ed0000, /* msgrkc */
2585 0x71000000, /* ms */
2586 0xe30000000051, /* msy */
2587 0xe3000000000c, /* msg */
2588 };
2589
2590 static const struct ins_forms multiply_overflow_forms = {
2591 0,
2592 0,
2593 0xb9fd0000, /* msrkc */
2594 0xb9ed0000, /* msgrkc */
2595 0,
2596 0xe30000000053, /* msc */
2597 0xe30000000083, /* msgc */
2598 };
2599
sljit_emit_multiply(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2600 static sljit_s32 sljit_emit_multiply(struct sljit_compiler *compiler, sljit_s32 op,
2601 sljit_s32 dst,
2602 sljit_s32 src1, sljit_sw src1w,
2603 sljit_s32 src2, sljit_sw src2w)
2604 {
2605 sljit_ins ins;
2606
2607 if (HAS_FLAGS(op)) {
2608 /* if have_misc2 fails, this operation should be emulated. 32 bit emulation:
2609 FAIL_IF(push_inst(compiler, lgfr(tmp0, src1_r)));
2610 FAIL_IF(push_inst(compiler, msgfr(tmp0, src2_r)));
2611 if (dst_r != tmp0) {
2612 FAIL_IF(push_inst(compiler, lr(dst_r, tmp0)));
2613 }
2614 FAIL_IF(push_inst(compiler, aih(tmp0, 1)));
2615 FAIL_IF(push_inst(compiler, nihf(tmp0, ~1U)));
2616 FAIL_IF(push_inst(compiler, ipm(tmp1)));
2617 FAIL_IF(push_inst(compiler, oilh(tmp1, 0x2000))); */
2618
2619 return emit_commutative(compiler, &multiply_overflow_forms, dst, src1, src1w, src2, src2w);
2620 }
2621
2622 if (src2 == SLJIT_IMM) {
2623 if (is_s16(src2w)) {
2624 ins = (op & SLJIT_32) ? 0xa70c0000 /* mhi */ : 0xa70d0000 /* mghi */;
2625 return emit_ri(compiler, ins, dst, src1, src1w, src2w, RI_A);
2626 }
2627
2628 if (is_s32(src2w)) {
2629 ins = (op & SLJIT_32) ? 0xc20100000000 /* msfi */ : 0xc20000000000 /* msgfi */;
2630 return emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A);
2631 }
2632 }
2633
2634 return emit_commutative(compiler, &multiply_forms, dst, src1, src1w, src2, src2w);
2635 }
2636
sljit_emit_bitwise_imm(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_uw imm,sljit_s32 count16)2637 static sljit_s32 sljit_emit_bitwise_imm(struct sljit_compiler *compiler, sljit_s32 type,
2638 sljit_s32 dst,
2639 sljit_s32 src1, sljit_sw src1w,
2640 sljit_uw imm, sljit_s32 count16)
2641 {
2642 sljit_s32 mode = compiler->mode;
2643 sljit_gpr dst_r = tmp0;
2644 sljit_s32 needs_move = 1;
2645
2646 if (IS_GPR_REG(dst)) {
2647 dst_r = gpr(dst & REG_MASK);
2648 if (dst == src1)
2649 needs_move = 0;
2650 }
2651
2652 if (needs_move)
2653 FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
2654
2655 if (type == SLJIT_AND) {
2656 if (!(mode & SLJIT_32))
2657 FAIL_IF(push_inst(compiler, 0xc00a00000000 /* nihf */ | R36A(dst_r) | (imm >> 32)));
2658 return push_inst(compiler, 0xc00b00000000 /* nilf */ | R36A(dst_r) | (imm & 0xffffffff));
2659 }
2660 else if (type == SLJIT_OR) {
2661 if (count16 >= 3) {
2662 FAIL_IF(push_inst(compiler, 0xc00c00000000 /* oihf */ | R36A(dst_r) | (imm >> 32)));
2663 return push_inst(compiler, 0xc00d00000000 /* oilf */ | R36A(dst_r) | (imm & 0xffffffff));
2664 }
2665
2666 if (count16 >= 2) {
2667 if ((imm & 0x00000000ffffffffull) == 0)
2668 return push_inst(compiler, 0xc00c00000000 /* oihf */ | R36A(dst_r) | (imm >> 32));
2669 if ((imm & 0xffffffff00000000ull) == 0)
2670 return push_inst(compiler, 0xc00d00000000 /* oilf */ | R36A(dst_r) | (imm & 0xffffffff));
2671 }
2672
2673 if ((imm & 0xffff000000000000ull) != 0)
2674 FAIL_IF(push_inst(compiler, 0xa5080000 /* oihh */ | R20A(dst_r) | (imm >> 48)));
2675 if ((imm & 0x0000ffff00000000ull) != 0)
2676 FAIL_IF(push_inst(compiler, 0xa5090000 /* oihl */ | R20A(dst_r) | ((imm >> 32) & 0xffff)));
2677 if ((imm & 0x00000000ffff0000ull) != 0)
2678 FAIL_IF(push_inst(compiler, 0xa50a0000 /* oilh */ | R20A(dst_r) | ((imm >> 16) & 0xffff)));
2679 if ((imm & 0x000000000000ffffull) != 0 || imm == 0)
2680 return push_inst(compiler, 0xa50b0000 /* oill */ | R20A(dst_r) | (imm & 0xffff));
2681 return SLJIT_SUCCESS;
2682 }
2683
2684 if ((imm & 0xffffffff00000000ull) != 0)
2685 FAIL_IF(push_inst(compiler, 0xc00600000000 /* xihf */ | R36A(dst_r) | (imm >> 32)));
2686 if ((imm & 0x00000000ffffffffull) != 0 || imm == 0)
2687 return push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(dst_r) | (imm & 0xffffffff));
2688 return SLJIT_SUCCESS;
2689 }
2690
2691 static const struct ins_forms bitwise_and_forms = {
2692 0x1400, /* nr */
2693 0xb9800000, /* ngr */
2694 0xb9f40000, /* nrk */
2695 0xb9e40000, /* ngrk */
2696 0x54000000, /* n */
2697 0xe30000000054, /* ny */
2698 0xe30000000080, /* ng */
2699 };
2700
2701 static const struct ins_forms bitwise_or_forms = {
2702 0x1600, /* or */
2703 0xb9810000, /* ogr */
2704 0xb9f60000, /* ork */
2705 0xb9e60000, /* ogrk */
2706 0x56000000, /* o */
2707 0xe30000000056, /* oy */
2708 0xe30000000081, /* og */
2709 };
2710
2711 static const struct ins_forms bitwise_xor_forms = {
2712 0x1700, /* xr */
2713 0xb9820000, /* xgr */
2714 0xb9f70000, /* xrk */
2715 0xb9e70000, /* xgrk */
2716 0x57000000, /* x */
2717 0xe30000000057, /* xy */
2718 0xe30000000082, /* xg */
2719 };
2720
sljit_emit_bitwise(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2721 static sljit_s32 sljit_emit_bitwise(struct sljit_compiler *compiler, sljit_s32 op,
2722 sljit_s32 dst,
2723 sljit_s32 src1, sljit_sw src1w,
2724 sljit_s32 src2, sljit_sw src2w)
2725 {
2726 sljit_s32 type = GET_OPCODE(op);
2727 const struct ins_forms *forms;
2728
2729 if (src2 == SLJIT_IMM && (!(op & SLJIT_SET_Z) || (type == SLJIT_AND && dst == TMP_REG2))) {
2730 sljit_s32 count16 = 0;
2731 sljit_uw imm = (sljit_uw)src2w;
2732
2733 if (op & SLJIT_32)
2734 imm &= 0xffffffffull;
2735
2736 if ((imm & 0x000000000000ffffull) != 0 || imm == 0)
2737 count16++;
2738 if ((imm & 0x00000000ffff0000ull) != 0)
2739 count16++;
2740 if ((imm & 0x0000ffff00000000ull) != 0)
2741 count16++;
2742 if ((imm & 0xffff000000000000ull) != 0)
2743 count16++;
2744
2745 if (type == SLJIT_AND && dst == TMP_REG2 && count16 == 1) {
2746 sljit_gpr src_r = tmp1;
2747
2748 if (FAST_IS_REG(src1))
2749 src_r = gpr(src1 & REG_MASK);
2750 else
2751 FAIL_IF(emit_move(compiler, tmp1, src1, src1w));
2752
2753 if ((imm & 0x000000000000ffffull) != 0 || imm == 0)
2754 return push_inst(compiler, 0xa7010000 /* tmll */ | R20A(src_r) | imm);
2755 if ((imm & 0x00000000ffff0000ull) != 0)
2756 return push_inst(compiler, 0xa7000000 /* tmlh */ | R20A(src_r) | (imm >> 16));
2757 if ((imm & 0x0000ffff00000000ull) != 0)
2758 return push_inst(compiler, 0xa7030000 /* tmhl */ | R20A(src_r) | (imm >> 32));
2759 return push_inst(compiler, 0xa7020000 /* tmhh */ | R20A(src_r) | (imm >> 48));
2760 }
2761
2762 if (!(op & SLJIT_SET_Z))
2763 return sljit_emit_bitwise_imm(compiler, type, dst, src1, src1w, imm, count16);
2764 }
2765
2766 if (type == SLJIT_AND)
2767 forms = &bitwise_and_forms;
2768 else if (type == SLJIT_OR)
2769 forms = &bitwise_or_forms;
2770 else
2771 forms = &bitwise_xor_forms;
2772
2773 return emit_commutative(compiler, forms, dst, src1, src1w, src2, src2w);
2774 }
2775
sljit_emit_shift(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2776 static sljit_s32 sljit_emit_shift(struct sljit_compiler *compiler, sljit_s32 op,
2777 sljit_s32 dst,
2778 sljit_s32 src1, sljit_sw src1w,
2779 sljit_s32 src2, sljit_sw src2w)
2780 {
2781 sljit_s32 type = GET_OPCODE(op);
2782 sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2783 sljit_gpr src_r = tmp0;
2784 sljit_gpr base_r = tmp0;
2785 sljit_ins imm = 0;
2786 sljit_ins ins;
2787
2788 if (FAST_IS_REG(src1))
2789 src_r = gpr(src1);
2790 else
2791 FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
2792
2793 if (src2 != SLJIT_IMM) {
2794 if (FAST_IS_REG(src2))
2795 base_r = gpr(src2);
2796 else {
2797 FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
2798 base_r = tmp1;
2799 }
2800
2801 if ((op & SLJIT_32) && (type == SLJIT_MSHL || type == SLJIT_MLSHR || type == SLJIT_MASHR)) {
2802 if (base_r != tmp1) {
2803 FAIL_IF(push_inst(compiler, 0xec0000000055 /* risbg */ | R36A(tmp1) | R32A(base_r) | (59 << 24) | (1 << 23) | (63 << 16)));
2804 base_r = tmp1;
2805 } else
2806 FAIL_IF(push_inst(compiler, 0xa5070000 /* nill */ | R20A(tmp1) | 0x1f));
2807 }
2808 } else
2809 imm = (sljit_ins)(src2w & ((op & SLJIT_32) ? 0x1f : 0x3f));
2810
2811 if ((op & SLJIT_32) && dst_r == src_r) {
2812 if (type == SLJIT_SHL || type == SLJIT_MSHL)
2813 ins = 0x89000000 /* sll */;
2814 else if (type == SLJIT_LSHR || type == SLJIT_MLSHR)
2815 ins = 0x88000000 /* srl */;
2816 else
2817 ins = 0x8a000000 /* sra */;
2818
2819 FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | R12A(base_r) | imm));
2820 } else {
2821 if (type == SLJIT_SHL || type == SLJIT_MSHL)
2822 ins = (op & SLJIT_32) ? 0xeb00000000df /* sllk */ : 0xeb000000000d /* sllg */;
2823 else if (type == SLJIT_LSHR || type == SLJIT_MLSHR)
2824 ins = (op & SLJIT_32) ? 0xeb00000000de /* srlk */ : 0xeb000000000c /* srlg */;
2825 else
2826 ins = (op & SLJIT_32) ? 0xeb00000000dc /* srak */ : 0xeb000000000a /* srag */;
2827
2828 FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | R28A(base_r) | (imm << 16)));
2829 }
2830
2831 if ((op & SLJIT_SET_Z) && type != SLJIT_ASHR)
2832 return push_inst(compiler, (op & SLJIT_32) ? or(dst_r, dst_r) : ogr(dst_r, dst_r));
2833
2834 return SLJIT_SUCCESS;
2835 }
2836
sljit_emit_rotate(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2837 static sljit_s32 sljit_emit_rotate(struct sljit_compiler *compiler, sljit_s32 op,
2838 sljit_s32 dst,
2839 sljit_s32 src1, sljit_sw src1w,
2840 sljit_s32 src2, sljit_sw src2w)
2841 {
2842 sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2843 sljit_gpr src_r = tmp0;
2844 sljit_gpr base_r = tmp0;
2845 sljit_ins imm = 0;
2846 sljit_ins ins;
2847
2848 if (FAST_IS_REG(src1))
2849 src_r = gpr(src1);
2850 else
2851 FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
2852
2853 if (src2 != SLJIT_IMM) {
2854 if (FAST_IS_REG(src2))
2855 base_r = gpr(src2);
2856 else {
2857 FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
2858 base_r = tmp1;
2859 }
2860 }
2861
2862 if (GET_OPCODE(op) == SLJIT_ROTR) {
2863 if (src2 != SLJIT_IMM) {
2864 ins = (op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */;
2865 FAIL_IF(push_inst(compiler, ins | R4A(tmp1) | R0A(base_r)));
2866 base_r = tmp1;
2867 } else
2868 src2w = -src2w;
2869 }
2870
2871 if (src2 == SLJIT_IMM)
2872 imm = (sljit_ins)(src2w & ((op & SLJIT_32) ? 0x1f : 0x3f));
2873
2874 ins = (op & SLJIT_32) ? 0xeb000000001d /* rll */ : 0xeb000000001c /* rllg */;
2875 return push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | R28A(base_r) | (imm << 16));
2876 }
2877
2878 static const struct ins_forms addc_forms = {
2879 0xb9980000, /* alcr */
2880 0xb9880000, /* alcgr */
2881 0,
2882 0,
2883 0,
2884 0xe30000000098, /* alc */
2885 0xe30000000088, /* alcg */
2886 };
2887
2888 static const struct ins_forms subc_forms = {
2889 0xb9990000, /* slbr */
2890 0xb9890000, /* slbgr */
2891 0,
2892 0,
2893 0,
2894 0xe30000000099, /* slb */
2895 0xe30000000089, /* slbg */
2896 };
2897
sljit_emit_op2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2898 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2899 sljit_s32 dst, sljit_sw dstw,
2900 sljit_s32 src1, sljit_sw src1w,
2901 sljit_s32 src2, sljit_sw src2w)
2902 {
2903 CHECK_ERROR();
2904 CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
2905 ADJUST_LOCAL_OFFSET(dst, dstw);
2906 ADJUST_LOCAL_OFFSET(src1, src1w);
2907 ADJUST_LOCAL_OFFSET(src2, src2w);
2908
2909 compiler->mode = op & SLJIT_32;
2910 compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z);
2911
2912 if (is_commutative(op) && src1 == SLJIT_IMM && src2 != SLJIT_IMM) {
2913 src1 ^= src2;
2914 src2 ^= src1;
2915 src1 ^= src2;
2916
2917 src1w ^= src2w;
2918 src2w ^= src1w;
2919 src1w ^= src2w;
2920 }
2921
2922 switch (GET_OPCODE(op)) {
2923 case SLJIT_ADD:
2924 compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD;
2925 return sljit_emit_add(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2926 case SLJIT_ADDC:
2927 compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD;
2928 FAIL_IF(emit_commutative(compiler, &addc_forms, dst, src1, src1w, src2, src2w));
2929 if (dst & SLJIT_MEM)
2930 return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2931 return SLJIT_SUCCESS;
2932 case SLJIT_SUB:
2933 compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_SUB;
2934 return sljit_emit_sub(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2935 case SLJIT_SUBC:
2936 compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_SUB;
2937 FAIL_IF(emit_non_commutative(compiler, &subc_forms, dst, src1, src1w, src2, src2w));
2938 if (dst & SLJIT_MEM)
2939 return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2940 return SLJIT_SUCCESS;
2941 case SLJIT_MUL:
2942 FAIL_IF(sljit_emit_multiply(compiler, op, dst, src1, src1w, src2, src2w));
2943 break;
2944 case SLJIT_AND:
2945 case SLJIT_OR:
2946 case SLJIT_XOR:
2947 FAIL_IF(sljit_emit_bitwise(compiler, op, dst, src1, src1w, src2, src2w));
2948 break;
2949 case SLJIT_SHL:
2950 case SLJIT_MSHL:
2951 case SLJIT_LSHR:
2952 case SLJIT_MLSHR:
2953 case SLJIT_ASHR:
2954 case SLJIT_MASHR:
2955 FAIL_IF(sljit_emit_shift(compiler, op, dst, src1, src1w, src2, src2w));
2956 break;
2957 case SLJIT_ROTL:
2958 case SLJIT_ROTR:
2959 FAIL_IF(sljit_emit_rotate(compiler, op, dst, src1, src1w, src2, src2w));
2960 break;
2961 }
2962
2963 if (dst & SLJIT_MEM)
2964 return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2965 return SLJIT_SUCCESS;
2966 }
2967
sljit_emit_op2u(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2968 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
2969 sljit_s32 src1, sljit_sw src1w,
2970 sljit_s32 src2, sljit_sw src2w)
2971 {
2972 sljit_s32 dst_reg = (GET_OPCODE(op) == SLJIT_SUB || GET_OPCODE(op) == SLJIT_AND) ? TMP_REG2 : TMP_REG1;
2973
2974 CHECK_ERROR();
2975 CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
2976
2977 SLJIT_SKIP_CHECKS(compiler);
2978 return sljit_emit_op2(compiler, op, dst_reg, 0, src1, src1w, src2, src2w);
2979 }
2980
sljit_emit_op2r(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2981 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op,
2982 sljit_s32 dst_reg,
2983 sljit_s32 src1, sljit_sw src1w,
2984 sljit_s32 src2, sljit_sw src2w)
2985 {
2986 CHECK_ERROR();
2987 CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w));
2988
2989 switch (GET_OPCODE(op)) {
2990 case SLJIT_MULADD:
2991 SLJIT_SKIP_CHECKS(compiler);
2992 FAIL_IF(sljit_emit_op2(compiler, SLJIT_MUL | (op & SLJIT_32), 0 /* tmp0 */, 0, src1, src1w, src2, src2w));
2993 return push_inst(compiler, ((op & SLJIT_32) ? 0x1a00 /* ar */ : 0xb9080000 /* agr */) | R4A(gpr(dst_reg)) | R0A(tmp0));
2994 }
2995
2996 return SLJIT_SUCCESS;
2997 }
2998
sljit_emit_shift_into(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 src1_reg,sljit_s32 src2_reg,sljit_s32 src3,sljit_sw src3w)2999 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
3000 sljit_s32 dst_reg,
3001 sljit_s32 src1_reg,
3002 sljit_s32 src2_reg,
3003 sljit_s32 src3, sljit_sw src3w)
3004 {
3005 sljit_s32 is_right;
3006 sljit_sw bit_length = (op & SLJIT_32) ? 32 : 64;
3007 sljit_gpr dst_r = gpr(dst_reg);
3008 sljit_gpr src1_r = gpr(src1_reg);
3009 sljit_gpr src2_r = gpr(src2_reg);
3010 sljit_gpr src3_r = tmp1;
3011 sljit_ins ins;
3012
3013 CHECK_ERROR();
3014 CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
3015
3016 is_right = (GET_OPCODE(op) == SLJIT_LSHR || GET_OPCODE(op) == SLJIT_MLSHR);
3017
3018 if (src1_reg == src2_reg) {
3019 SLJIT_SKIP_CHECKS(compiler);
3020 return sljit_emit_op2(compiler, (is_right ? SLJIT_ROTR : SLJIT_ROTL) | (op & SLJIT_32), dst_reg, 0, src1_reg, 0, src3, src3w);
3021 }
3022
3023 ADJUST_LOCAL_OFFSET(src3, src3w);
3024
3025 if (src3 == SLJIT_IMM) {
3026 src3w &= bit_length - 1;
3027
3028 if (src3w == 0)
3029 return SLJIT_SUCCESS;
3030
3031 if (op & SLJIT_32) {
3032 if (dst_r == src1_r) {
3033 ins = is_right ? 0x88000000 /* srl */ : 0x89000000 /* sll */;
3034 FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | (sljit_ins)src3w));
3035 } else {
3036 ins = is_right ? 0xeb00000000de /* srlk */ : 0xeb00000000df /* sllk */;
3037 FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | ((sljit_ins)src3w << 16)));
3038 }
3039 } else {
3040 ins = is_right ? 0xeb000000000c /* srlg */ : 0xeb000000000d /* sllg */;
3041 FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | ((sljit_ins)src3w << 16)));
3042 }
3043
3044 ins = 0xec0000000055 /* risbg */;
3045
3046 if (is_right) {
3047 src3w = bit_length - src3w;
3048 ins |= ((sljit_ins)(64 - bit_length) << 24) | ((sljit_ins)(63 - src3w) << 16) | ((sljit_ins)src3w << 8);
3049 } else
3050 ins |= ((sljit_ins)(64 - src3w) << 24) | ((sljit_ins)63 << 16) | ((sljit_ins)(src3w + 64 - bit_length) << 8);
3051
3052 return push_inst(compiler, ins | R36A(dst_r) | R32A(src2_r));
3053 }
3054
3055 if (!(src3 & SLJIT_MEM)) {
3056 src3_r = gpr(src3);
3057
3058 if (dst_r == src3_r) {
3059 FAIL_IF(push_inst(compiler, 0x1800 /* lr */ | R4A(tmp1) | R0A(src3_r)));
3060 src3_r = tmp1;
3061 }
3062 } else
3063 FAIL_IF(load_word(compiler, tmp1, src3, src3w, op & SLJIT_32));
3064
3065 if (op & SLJIT_32) {
3066 if (GET_OPCODE(op) == SLJIT_MSHL || GET_OPCODE(op) == SLJIT_MLSHR) {
3067 if (src3_r != tmp1) {
3068 FAIL_IF(push_inst(compiler, 0xec0000000055 /* risbg */ | R36A(tmp1) | R32A(src3_r) | (59 << 24) | (1 << 23) | (63 << 16)));
3069 src3_r = tmp1;
3070 } else
3071 FAIL_IF(push_inst(compiler, 0xa5070000 /* nill */ | R20A(tmp1) | 0x1f));
3072 }
3073
3074 if (dst_r == src1_r) {
3075 ins = is_right ? 0x88000000 /* srl */ : 0x89000000 /* sll */;
3076 FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | R12A(src3_r)));
3077 } else {
3078 ins = is_right ? 0xeb00000000de /* srlk */ : 0xeb00000000df /* sllk */;
3079 FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | R28A(src3_r)));
3080 }
3081
3082 if (src3_r != tmp1) {
3083 FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | 0x1f));
3084 FAIL_IF(push_inst(compiler, 0x1700 /* xr */ | R4A(tmp1) | R0A(src3_r)));
3085 } else
3086 FAIL_IF(push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(tmp1) | 0x1f));
3087
3088 ins = is_right ? 0xeb00000000df /* sllk */ : 0xeb00000000de /* srlk */;
3089 FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src2_r) | R28A(tmp1) | (0x1 << 16)));
3090
3091 return push_inst(compiler, 0x1600 /* or */ | R4A(dst_r) | R0A(tmp0));
3092 }
3093
3094 ins = is_right ? 0xeb000000000c /* srlg */ : 0xeb000000000d /* sllg */;
3095 FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | R28A(src3_r)));
3096
3097 ins = is_right ? 0xeb000000000d /* sllg */ : 0xeb000000000c /* srlg */;
3098
3099 if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) {
3100 if (src3_r != tmp1)
3101 FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | 0x3f));
3102
3103 FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src2_r) | (0x1 << 16)));
3104 src2_r = tmp0;
3105
3106 if (src3_r != tmp1)
3107 FAIL_IF(push_inst(compiler, 0xb9820000 /* xgr */ | R4A(tmp1) | R0A(src3_r)));
3108 else
3109 FAIL_IF(push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(tmp1) | 0x3f));
3110 } else
3111 FAIL_IF(push_inst(compiler, 0xb9030000 /* lcgr */ | R4A(tmp1) | R0A(src3_r)));
3112
3113 FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src2_r) | R28A(tmp1)));
3114 return push_inst(compiler, 0xb9810000 /* ogr */ | R4A(dst_r) | R0A(tmp0));
3115 }
3116
sljit_emit_op_src(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)3117 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
3118 sljit_s32 src, sljit_sw srcw)
3119 {
3120 sljit_gpr src_r;
3121 struct addr addr;
3122
3123 CHECK_ERROR();
3124 CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
3125 ADJUST_LOCAL_OFFSET(src, srcw);
3126
3127 switch (op) {
3128 case SLJIT_FAST_RETURN:
3129 src_r = FAST_IS_REG(src) ? gpr(src) : tmp1;
3130 if (src & SLJIT_MEM)
3131 FAIL_IF(load_word(compiler, tmp1, src, srcw, 0));
3132
3133 return push_inst(compiler, br(src_r));
3134 case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
3135 return SLJIT_SUCCESS;
3136 case SLJIT_PREFETCH_L1:
3137 case SLJIT_PREFETCH_L2:
3138 case SLJIT_PREFETCH_L3:
3139 case SLJIT_PREFETCH_ONCE:
3140 FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
3141 return push_inst(compiler, 0xe31000000036 /* pfd */ | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
3142 default:
3143 return SLJIT_SUCCESS;
3144 }
3145
3146 return SLJIT_SUCCESS;
3147 }
3148
sljit_emit_op_dst(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw)3149 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
3150 sljit_s32 dst, sljit_sw dstw)
3151 {
3152 sljit_gpr dst_r = link_r;
3153 sljit_s32 size;
3154
3155 CHECK_ERROR();
3156 CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
3157 ADJUST_LOCAL_OFFSET(dst, dstw);
3158
3159 switch (op) {
3160 case SLJIT_FAST_ENTER:
3161 if (FAST_IS_REG(dst))
3162 return push_inst(compiler, lgr(gpr(dst), link_r));
3163 break;
3164 case SLJIT_GET_RETURN_ADDRESS:
3165 dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
3166
3167 size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds - SLJIT_KEPT_SAVEDS_COUNT(compiler->options), 2);
3168 FAIL_IF(load_word(compiler, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size + size, 0));
3169 break;
3170 }
3171
3172 if (dst & SLJIT_MEM)
3173 return store_word(compiler, dst_r, dst, dstw, 0);
3174
3175 return SLJIT_SUCCESS;
3176 }
3177
sljit_get_register_index(sljit_s32 type,sljit_s32 reg)3178 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
3179 {
3180 CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
3181
3182 if (type == SLJIT_GP_REGISTER)
3183 return (sljit_s32)gpr(reg);
3184
3185 if (type != SLJIT_FLOAT_REGISTER)
3186 return -1;
3187
3188 return (sljit_s32)freg_map[reg];
3189 }
3190
sljit_emit_op_custom(struct sljit_compiler * compiler,void * instruction,sljit_u32 size)3191 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
3192 void *instruction, sljit_u32 size)
3193 {
3194 sljit_ins ins = 0;
3195
3196 CHECK_ERROR();
3197 CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
3198
3199 memcpy((sljit_u8 *)&ins + sizeof(ins) - size, instruction, size);
3200 return push_inst(compiler, ins);
3201 }
3202
3203 /* --------------------------------------------------------------------- */
3204 /* Floating point operators */
3205 /* --------------------------------------------------------------------- */
3206
3207 #define FLOAT_LOAD 0
3208 #define FLOAT_STORE 1
3209
float_mem(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)3210 static sljit_s32 float_mem(struct sljit_compiler *compiler, sljit_s32 op,
3211 sljit_s32 reg,
3212 sljit_s32 mem, sljit_sw memw)
3213 {
3214 struct addr addr;
3215 sljit_ins ins;
3216
3217 SLJIT_ASSERT(mem & SLJIT_MEM);
3218
3219 if ((mem & OFFS_REG_MASK) || is_u12(memw) || !is_s20(memw)) {
3220 FAIL_IF(make_addr_bx(compiler, &addr, mem, memw, tmp1));
3221
3222 if (op & FLOAT_STORE)
3223 ins = (op & SLJIT_32) ? 0x70000000 /* ste */ : 0x60000000 /* std */;
3224 else
3225 ins = (op & SLJIT_32) ? 0x78000000 /* le */ : 0x68000000 /* ld */;
3226
3227 return push_inst(compiler, ins | F20(reg) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset);
3228 }
3229
3230 FAIL_IF(make_addr_bxy(compiler, &addr, mem, memw, tmp1));
3231
3232 if (op & FLOAT_STORE)
3233 ins = (op & SLJIT_32) ? 0xed0000000066 /* stey */ : 0xed0000000067 /* stdy */;
3234 else
3235 ins = (op & SLJIT_32) ? 0xed0000000064 /* ley */ : 0xed0000000065 /* ldy */;
3236
3237 return push_inst(compiler, ins | F36(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
3238 }
3239
emit_float(struct sljit_compiler * compiler,sljit_ins ins_r,sljit_ins ins,sljit_s32 reg,sljit_s32 src,sljit_sw srcw)3240 static sljit_s32 emit_float(struct sljit_compiler *compiler, sljit_ins ins_r, sljit_ins ins,
3241 sljit_s32 reg,
3242 sljit_s32 src, sljit_sw srcw)
3243 {
3244 struct addr addr;
3245
3246 if (!(src & SLJIT_MEM))
3247 return push_inst(compiler, ins_r | F4(reg) | F0(src));
3248
3249 FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));
3250 return push_inst(compiler, ins | F36(reg) | R32A(addr.index) | R28A(addr.base) | ((sljit_ins)addr.offset << 16));
3251 }
3252
sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)3253 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
3254 sljit_s32 dst, sljit_sw dstw,
3255 sljit_s32 src, sljit_sw srcw)
3256 {
3257 sljit_ins dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
3258 sljit_ins ins;
3259
3260 if (src & SLJIT_MEM) {
3261 FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src, srcw));
3262 src = TMP_FREG1;
3263 }
3264
3265 /* M3 is set to 5 */
3266 if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)
3267 ins = (op & SLJIT_32) ? 0xb3a85000 /* cgebr */ : 0xb3a95000 /* cgdbr */;
3268 else
3269 ins = (op & SLJIT_32) ? 0xb3985000 /* cfebr */ : 0xb3995000 /* cfdbr */;
3270
3271 FAIL_IF(push_inst(compiler, ins | R4A(dst_r) | F0(src)));
3272
3273 if (dst & SLJIT_MEM)
3274 return store_word(compiler, dst_r, dst, dstw, GET_OPCODE(op) >= SLJIT_CONV_S32_FROM_F64);
3275
3276 return SLJIT_SUCCESS;
3277 }
3278
sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)3279 static sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_ins ins,
3280 sljit_s32 dst, sljit_sw dstw,
3281 sljit_s32 src, sljit_sw srcw)
3282 {
3283 sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
3284
3285 if (src == SLJIT_IMM) {
3286 FAIL_IF(push_load_imm_inst(compiler, tmp0, srcw));
3287 src = (sljit_s32)tmp0;
3288 }
3289 else if (src & SLJIT_MEM) {
3290 FAIL_IF(load_word(compiler, tmp0, src, srcw, ins & 0x100000));
3291 src = (sljit_s32)tmp0;
3292 }
3293
3294 FAIL_IF(push_inst(compiler, ins | F4(dst_r) | R0(src)));
3295
3296 if (dst & SLJIT_MEM)
3297 return float_mem(compiler, FLOAT_STORE | ((ins & 0x10000) ? 0 : SLJIT_32), TMP_FREG1, dst, dstw);
3298
3299 return SLJIT_SUCCESS;
3300 }
3301
sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)3302 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
3303 sljit_s32 dst, sljit_sw dstw,
3304 sljit_s32 src, sljit_sw srcw)
3305 {
3306 sljit_ins ins;
3307
3308 if (src == SLJIT_IMM && GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
3309 srcw = (sljit_s32)srcw;
3310
3311 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
3312 ins = (op & SLJIT_32) ? 0xb3a40000 /* cegbr */ : 0xb3a50000 /* cdgbr */;
3313 else
3314 ins = (op & SLJIT_32) ? 0xb3940000 /* cefbr */ : 0xb3950000 /* cdfbr */;
3315
3316 return sljit_emit_fop1_conv_f64_from_w(compiler, ins, dst, dstw, src, srcw);
3317 }
3318
sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)3319 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
3320 sljit_s32 dst, sljit_sw dstw,
3321 sljit_s32 src, sljit_sw srcw)
3322 {
3323 sljit_ins ins;
3324
3325 if (src == SLJIT_IMM && GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32)
3326 srcw = (sljit_u32)srcw;
3327
3328 if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_UW)
3329 ins = (op & SLJIT_32) ? 0xb3a00000 /* celgbr */ : 0xb3a10000 /* cdlgbr */;
3330 else
3331 ins = (op & SLJIT_32) ? 0xb3900000 /* celfbr */ : 0xb3910000 /* cdlfbr */;
3332
3333 return sljit_emit_fop1_conv_f64_from_w(compiler, ins, dst, dstw, src, srcw);
3334 }
3335
sljit_emit_fop1_cmp(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)3336 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
3337 sljit_s32 src1, sljit_sw src1w,
3338 sljit_s32 src2, sljit_sw src2w)
3339 {
3340 sljit_ins ins_r, ins;
3341
3342 if (src1 & SLJIT_MEM) {
3343 FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src1, src1w));
3344 src1 = TMP_FREG1;
3345 }
3346
3347 if (op & SLJIT_32) {
3348 ins_r = 0xb3090000 /* cebr */;
3349 ins = 0xed0000000009 /* ceb */;
3350 } else {
3351 ins_r = 0xb3190000 /* cdbr */;
3352 ins = 0xed0000000019 /* cdb */;
3353 }
3354
3355 return emit_float(compiler, ins_r, ins, src1, src2, src2w);
3356 }
3357
sljit_emit_fop1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)3358 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
3359 sljit_s32 dst, sljit_sw dstw,
3360 sljit_s32 src, sljit_sw srcw)
3361 {
3362 sljit_s32 dst_r;
3363 sljit_ins ins;
3364
3365 CHECK_ERROR();
3366
3367 SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
3368
3369 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
3370
3371 if (op == SLJIT_CONV_F64_FROM_F32)
3372 FAIL_IF(emit_float(compiler, 0xb3040000 /* ldebr */, 0xed0000000004 /* ldeb */, dst_r, src, srcw));
3373 else {
3374 if (src & SLJIT_MEM) {
3375 FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op == SLJIT_CONV_F32_FROM_F64 ? 0 : (op & SLJIT_32)), dst_r, src, srcw));
3376 src = dst_r;
3377 }
3378
3379 switch (GET_OPCODE(op)) {
3380 case SLJIT_MOV_F64:
3381 if (FAST_IS_REG(dst)) {
3382 if (dst == src)
3383 return SLJIT_SUCCESS;
3384
3385 ins = (op & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */;
3386 break;
3387 }
3388 return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), src, dst, dstw);
3389 case SLJIT_CONV_F64_FROM_F32:
3390 /* Only SLJIT_CONV_F32_FROM_F64. */
3391 ins = 0xb3440000 /* ledbr */;
3392 break;
3393 case SLJIT_NEG_F64:
3394 ins = (op & SLJIT_32) ? 0xb3030000 /* lcebr */ : 0xb3130000 /* lcdbr */;
3395 break;
3396 default:
3397 SLJIT_ASSERT(GET_OPCODE(op) == SLJIT_ABS_F64);
3398 ins = (op & SLJIT_32) ? 0xb3000000 /* lpebr */ : 0xb3100000 /* lpdbr */;
3399 break;
3400 }
3401
3402 FAIL_IF(push_inst(compiler, ins | F4(dst_r) | F0(src)));
3403 }
3404
3405 if (dst & SLJIT_MEM)
3406 return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw);
3407
3408 return SLJIT_SUCCESS;
3409 }
3410
3411 #define FLOAT_MOV(op, dst_r, src_r) \
3412 (((op & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */) | F4(dst_r) | F0(src_r))
3413
sljit_emit_fop2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)3414 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
3415 sljit_s32 dst, sljit_sw dstw,
3416 sljit_s32 src1, sljit_sw src1w,
3417 sljit_s32 src2, sljit_sw src2w)
3418 {
3419 sljit_s32 dst_r = TMP_FREG1;
3420 sljit_ins ins_r, ins;
3421
3422 CHECK_ERROR();
3423 CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
3424 ADJUST_LOCAL_OFFSET(dst, dstw);
3425 ADJUST_LOCAL_OFFSET(src1, src1w);
3426 ADJUST_LOCAL_OFFSET(src2, src2w);
3427
3428 do {
3429 if (FAST_IS_REG(dst)) {
3430 dst_r = dst;
3431
3432 if (dst == src1)
3433 break;
3434
3435 if (dst == src2) {
3436 if (GET_OPCODE(op) == SLJIT_ADD_F64 || GET_OPCODE(op) == SLJIT_MUL_F64) {
3437 src2 = src1;
3438 src2w = src1w;
3439 src1 = dst;
3440 break;
3441 }
3442
3443 FAIL_IF(push_inst(compiler, FLOAT_MOV(op, TMP_FREG1, src2)));
3444 src2 = TMP_FREG1;
3445 }
3446 }
3447
3448 if (src1 & SLJIT_MEM)
3449 FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), dst_r, src1, src1w));
3450 else
3451 FAIL_IF(push_inst(compiler, FLOAT_MOV(op, dst_r, src1)));
3452 } while (0);
3453
3454 switch (GET_OPCODE(op)) {
3455 case SLJIT_ADD_F64:
3456 ins_r = (op & SLJIT_32) ? 0xb30a0000 /* aebr */ : 0xb31a0000 /* adbr */;
3457 ins = (op & SLJIT_32) ? 0xed000000000a /* aeb */ : 0xed000000001a /* adb */;
3458 break;
3459 case SLJIT_SUB_F64:
3460 ins_r = (op & SLJIT_32) ? 0xb30b0000 /* sebr */ : 0xb31b0000 /* sdbr */;
3461 ins = (op & SLJIT_32) ? 0xed000000000b /* seb */ : 0xed000000001b /* sdb */;
3462 break;
3463 case SLJIT_MUL_F64:
3464 ins_r = (op & SLJIT_32) ? 0xb3170000 /* meebr */ : 0xb31c0000 /* mdbr */;
3465 ins = (op & SLJIT_32) ? 0xed0000000017 /* meeb */ : 0xed000000001c /* mdb */;
3466 break;
3467 default:
3468 SLJIT_ASSERT(GET_OPCODE(op) == SLJIT_DIV_F64);
3469 ins_r = (op & SLJIT_32) ? 0xb30d0000 /* debr */ : 0xb31d0000 /* ddbr */;
3470 ins = (op & SLJIT_32) ? 0xed000000000d /* deb */ : 0xed000000001d /* ddb */;
3471 break;
3472 }
3473
3474 FAIL_IF(emit_float(compiler, ins_r, ins, dst_r, src2, src2w));
3475
3476 if (dst & SLJIT_MEM)
3477 return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw);
3478
3479 return SLJIT_SUCCESS;
3480 }
3481
sljit_emit_fop2r(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_freg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)3482 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op,
3483 sljit_s32 dst_freg,
3484 sljit_s32 src1, sljit_sw src1w,
3485 sljit_s32 src2, sljit_sw src2w)
3486 {
3487 sljit_s32 reg;
3488
3489 CHECK_ERROR();
3490 CHECK(check_sljit_emit_fop2r(compiler, op, dst_freg, src1, src1w, src2, src2w));
3491 ADJUST_LOCAL_OFFSET(src1, src1w);
3492 ADJUST_LOCAL_OFFSET(src2, src2w);
3493
3494 if (src2 & SLJIT_MEM) {
3495 FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src2, src2w));
3496 src2 = TMP_FREG1;
3497 }
3498
3499 if (src1 & SLJIT_MEM) {
3500 reg = (dst_freg == src2) ? TMP_FREG1 : dst_freg;
3501 FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), reg, src1, src1w));
3502 src1 = reg;
3503 }
3504
3505 return push_inst(compiler, 0xb3720000 /* cpsdr */ | F12(src2) | F4(dst_freg) | F0(src1));
3506 }
3507
sljit_emit_fset32(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f32 value)3508 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
3509 sljit_s32 freg, sljit_f32 value)
3510 {
3511 union {
3512 sljit_s32 imm;
3513 sljit_f32 value;
3514 } u;
3515
3516 CHECK_ERROR();
3517 CHECK(check_sljit_emit_fset32(compiler, freg, value));
3518
3519 u.value = value;
3520
3521 FAIL_IF(push_load_imm_inst(compiler, tmp1, (sljit_sw)(((sljit_uw)u.imm << 32))));
3522 return push_inst(compiler, 0xb3c10000 /* ldgr */ | F4(freg) | R0A(tmp1));
3523 }
3524
sljit_emit_fset64(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f64 value)3525 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
3526 sljit_s32 freg, sljit_f64 value)
3527 {
3528 union {
3529 sljit_sw imm;
3530 sljit_f64 value;
3531 } u;
3532
3533 CHECK_ERROR();
3534 CHECK(check_sljit_emit_fset64(compiler, freg, value));
3535
3536 u.value = value;
3537
3538 FAIL_IF(push_load_imm_inst(compiler, tmp1, (sljit_sw)u.imm));
3539 return push_inst(compiler, 0xb3c10000 /* ldgr */ | F4(freg) | R0A(tmp1));
3540 }
3541
sljit_emit_fcopy(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 freg,sljit_s32 reg)3542 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
3543 sljit_s32 freg, sljit_s32 reg)
3544 {
3545 sljit_gpr gen_r;
3546
3547 CHECK_ERROR();
3548 CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
3549
3550 gen_r = gpr(reg);
3551
3552 if (GET_OPCODE(op) == SLJIT_COPY_TO_F64) {
3553 if (op & SLJIT_32) {
3554 FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp0) | R32A(gen_r) | (32 << 16)));
3555 gen_r = tmp0;
3556 }
3557
3558 return push_inst(compiler, 0xb3c10000 /* ldgr */ | F4(freg) | R0A(gen_r));
3559 }
3560
3561 FAIL_IF(push_inst(compiler, 0xb3cd0000 /* lgdr */ | R4A(gen_r) | F0(freg)));
3562
3563 if (!(op & SLJIT_32))
3564 return SLJIT_SUCCESS;
3565
3566 return push_inst(compiler, 0xeb000000000c /* srlg */ | R36A(gen_r) | R32A(gen_r) | (32 << 16));
3567 }
3568
3569 /* --------------------------------------------------------------------- */
3570 /* Conditional instructions */
3571 /* --------------------------------------------------------------------- */
3572
sljit_emit_label(struct sljit_compiler * compiler)3573 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
3574 {
3575 struct sljit_label *label;
3576
3577 CHECK_ERROR_PTR();
3578 CHECK_PTR(check_sljit_emit_label(compiler));
3579
3580 if (compiler->last_label && compiler->last_label->size == compiler->size)
3581 return compiler->last_label;
3582
3583 label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
3584 PTR_FAIL_IF(!label);
3585 set_label(label, compiler);
3586 return label;
3587 }
3588
sljit_emit_jump(struct sljit_compiler * compiler,sljit_s32 type)3589 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
3590 {
3591 struct sljit_jump *jump;
3592 sljit_u8 mask = ((type & 0xff) < SLJIT_JUMP) ? get_cc(compiler, type & 0xff) : 0xf;
3593
3594 CHECK_ERROR_PTR();
3595 CHECK_PTR(check_sljit_emit_jump(compiler, type));
3596
3597 /* record jump */
3598 jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump));
3599 PTR_FAIL_IF(!jump);
3600 set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
3601 jump->addr = compiler->size;
3602
3603 /* emit jump instruction */
3604 type &= 0xff;
3605 if (type >= SLJIT_FAST_CALL)
3606 PTR_FAIL_IF(push_inst(compiler, brasl(link_r, 0)));
3607 else
3608 PTR_FAIL_IF(push_inst(compiler, brcl(mask, 0)));
3609
3610 return jump;
3611 }
3612
sljit_emit_call(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types)3613 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
3614 sljit_s32 arg_types)
3615 {
3616 SLJIT_UNUSED_ARG(arg_types);
3617 CHECK_ERROR_PTR();
3618 CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
3619
3620 if (type & SLJIT_CALL_RETURN) {
3621 PTR_FAIL_IF(emit_stack_frame_release(compiler, r14));
3622 type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
3623 }
3624
3625 SLJIT_SKIP_CHECKS(compiler);
3626 return sljit_emit_jump(compiler, type);
3627 }
3628
sljit_emit_ijump(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 src,sljit_sw srcw)3629 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
3630 {
3631 sljit_gpr src_r = FAST_IS_REG(src) ? gpr(src) : tmp1;
3632
3633 CHECK_ERROR();
3634 CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
3635
3636 if (src == SLJIT_IMM) {
3637 SLJIT_ASSERT(!(srcw & 1)); /* target address must be even */
3638 FAIL_IF(push_load_imm_inst(compiler, src_r, srcw));
3639 }
3640 else if (src & SLJIT_MEM) {
3641 ADJUST_LOCAL_OFFSET(src, srcw);
3642 FAIL_IF(load_word(compiler, src_r, src, srcw, 0 /* 64-bit */));
3643 }
3644
3645 /* emit jump instruction */
3646 if (type >= SLJIT_FAST_CALL)
3647 return push_inst(compiler, basr(link_r, src_r));
3648
3649 return push_inst(compiler, br(src_r));
3650 }
3651
sljit_emit_icall(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)3652 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
3653 sljit_s32 arg_types,
3654 sljit_s32 src, sljit_sw srcw)
3655 {
3656 SLJIT_UNUSED_ARG(arg_types);
3657
3658 CHECK_ERROR();
3659 CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
3660
3661 SLJIT_ASSERT(gpr(TMP_REG2) == tmp1);
3662
3663 if (src & SLJIT_MEM) {
3664 ADJUST_LOCAL_OFFSET(src, srcw);
3665 FAIL_IF(load_word(compiler, tmp1, src, srcw, 0 /* 64-bit */));
3666 src = TMP_REG2;
3667 srcw = 0;
3668 }
3669
3670 if (type & SLJIT_CALL_RETURN) {
3671 if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
3672 FAIL_IF(push_inst(compiler, lgr(tmp1, gpr(src))));
3673 src = TMP_REG2;
3674 srcw = 0;
3675 }
3676
3677 FAIL_IF(emit_stack_frame_release(compiler, r14));
3678 type = SLJIT_JUMP;
3679 }
3680
3681 SLJIT_SKIP_CHECKS(compiler);
3682 return sljit_emit_ijump(compiler, type, src, srcw);
3683 }
3684
sljit_emit_op_flags(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 type)3685 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
3686 sljit_s32 dst, sljit_sw dstw,
3687 sljit_s32 type)
3688 {
3689 sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
3690 sljit_gpr loc_r = tmp1;
3691 sljit_u8 mask = get_cc(compiler, type);
3692
3693 CHECK_ERROR();
3694 CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
3695
3696 switch (GET_OPCODE(op)) {
3697 case SLJIT_AND:
3698 case SLJIT_OR:
3699 case SLJIT_XOR:
3700 compiler->status_flags_state = op & SLJIT_SET_Z;
3701
3702 /* dst is also source operand */
3703 if (dst & SLJIT_MEM)
3704 FAIL_IF(load_word(compiler, dst_r, dst, dstw, op & SLJIT_32));
3705
3706 break;
3707 case SLJIT_MOV32:
3708 op |= SLJIT_32;
3709 /* fallthrough */
3710 case SLJIT_MOV:
3711 /* can write straight into destination */
3712 loc_r = dst_r;
3713 break;
3714 default:
3715 SLJIT_UNREACHABLE();
3716 }
3717
3718 /* TODO(mundaym): fold into cmov helper function? */
3719 #define LEVAL(i) i(loc_r, 1, mask)
3720 if (have_lscond2()) {
3721 FAIL_IF(push_load_imm_inst(compiler, loc_r, 0));
3722 FAIL_IF(push_inst(compiler,
3723 WHEN2(op & SLJIT_32, lochi, locghi)));
3724 } else {
3725 FAIL_IF(push_load_imm_inst(compiler, loc_r, 1));
3726 FAIL_IF(push_inst(compiler, brc(mask, 2 + 2)));
3727 FAIL_IF(push_load_imm_inst(compiler, loc_r, 0));
3728 }
3729 #undef LEVAL
3730
3731 /* apply bitwise op and set condition codes */
3732 switch (GET_OPCODE(op)) {
3733 #define LEVAL(i) i(dst_r, loc_r)
3734 case SLJIT_AND:
3735 FAIL_IF(push_inst(compiler,
3736 WHEN2(op & SLJIT_32, nr, ngr)));
3737 break;
3738 case SLJIT_OR:
3739 FAIL_IF(push_inst(compiler,
3740 WHEN2(op & SLJIT_32, or, ogr)));
3741 break;
3742 case SLJIT_XOR:
3743 FAIL_IF(push_inst(compiler,
3744 WHEN2(op & SLJIT_32, xr, xgr)));
3745 break;
3746 #undef LEVAL
3747 }
3748
3749 /* store result to memory if required */
3750 if (dst & SLJIT_MEM)
3751 return store_word(compiler, dst_r, dst, dstw, (op & SLJIT_32));
3752
3753 return SLJIT_SUCCESS;
3754 }
3755
sljit_emit_select(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_reg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2_reg)3756 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
3757 sljit_s32 dst_reg,
3758 sljit_s32 src1, sljit_sw src1w,
3759 sljit_s32 src2_reg)
3760 {
3761 sljit_ins mask;
3762 sljit_gpr src_r;
3763 sljit_gpr dst_r = gpr(dst_reg);
3764 sljit_ins ins;
3765
3766 CHECK_ERROR();
3767 CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
3768
3769 ADJUST_LOCAL_OFFSET(src1, src1w);
3770
3771 if (dst_reg != src2_reg) {
3772 if (src1 == dst_reg) {
3773 src1 = src2_reg;
3774 src1w = 0;
3775 type ^= 0x1;
3776 } else {
3777 if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) {
3778 FAIL_IF(load_word(compiler, dst_r, src1, src1w, type & SLJIT_32));
3779 src1 = src2_reg;
3780 src1w = 0;
3781 type ^= 0x1;
3782 } else
3783 FAIL_IF(push_inst(compiler, ((type & SLJIT_32) ? 0x1800 /* lr */ : 0xb9040000 /* lgr */) | R4A(dst_r) | R0A(gpr(src2_reg))));
3784 }
3785 }
3786
3787 mask = get_cc(compiler, type & ~SLJIT_32);
3788
3789 if (src1 & SLJIT_MEM) {
3790 if (src1 & OFFS_REG_MASK) {
3791 src_r = gpr(OFFS_REG(src1));
3792
3793 if (src1w != 0) {
3794 FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp1) | R32A(src_r) | ((sljit_ins)(src1w & 0x3) << 16)));
3795 src_r = tmp1;
3796 }
3797
3798 FAIL_IF(push_inst(compiler, 0xb9e80000 /* agrk */ | R12A(src_r) | R4A(tmp1) | R0A(gpr(src1 & REG_MASK))));
3799 src_r = tmp1;
3800 src1w = 0;
3801 } else if (!is_s20(src1w)) {
3802 FAIL_IF(push_load_imm_inst(compiler, tmp1, src1w));
3803
3804 if (src1 & REG_MASK)
3805 FAIL_IF(push_inst(compiler, 0xb9e80000 /* agrk */ | R12A(tmp1) | R4A(tmp1) | R0A(gpr(src1 & REG_MASK))));
3806
3807 src_r = tmp1;
3808 src1w = 0;
3809 } else
3810 src_r = gpr(src1 & REG_MASK);
3811
3812 ins = (type & SLJIT_32) ? 0xeb00000000f2 /* loc */ : 0xeb00000000e2 /* locg */;
3813 return push_inst(compiler, ins | R36A(dst_r) | (mask << 32) | R28A(src_r) | disp_s20((sljit_s32)src1w));
3814 }
3815
3816 if (src1 == SLJIT_IMM) {
3817 if (type & SLJIT_32)
3818 src1w = (sljit_s32)src1w;
3819
3820 if (have_lscond2() && is_s16(src1w)) {
3821 ins = (type & SLJIT_32) ? 0xec0000000042 /* lochi */ : 0xec0000000046 /* locghi */;
3822 return push_inst(compiler, ins | R36A(dst_r) | (mask << 32) | (sljit_ins)(src1w & 0xffff) << 16);
3823 }
3824
3825 FAIL_IF(push_load_imm_inst(compiler, tmp1, src1w));
3826 src_r = tmp1;
3827 } else
3828 src_r = gpr(src1);
3829
3830 ins = (type & SLJIT_32) ? 0xb9f20000 /* locr */ : 0xb9e20000 /* locgr */;
3831 return push_inst(compiler, ins | (mask << 12) | R4A(dst_r) | R0A(src_r));
3832 }
3833
sljit_emit_fselect(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_freg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2_freg)3834 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
3835 sljit_s32 dst_freg,
3836 sljit_s32 src1, sljit_sw src1w,
3837 sljit_s32 src2_freg)
3838 {
3839 sljit_ins ins;
3840 struct sljit_label *label;
3841 struct sljit_jump *jump;
3842
3843 CHECK_ERROR();
3844 CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
3845
3846 ADJUST_LOCAL_OFFSET(src1, src1w);
3847
3848 if (dst_freg != src2_freg) {
3849 if (dst_freg == src1) {
3850 src1 = src2_freg;
3851 src1w = 0;
3852 type ^= 0x1;
3853 } else {
3854 ins = (type & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */;
3855 FAIL_IF(push_inst(compiler, ins | F4(dst_freg) | F0(src2_freg)));
3856 }
3857 }
3858
3859 SLJIT_SKIP_CHECKS(compiler);
3860 jump = sljit_emit_jump(compiler, (type & ~SLJIT_32) ^ 0x1);
3861 FAIL_IF(!jump);
3862
3863 if (!(src1 & SLJIT_MEM)) {
3864 ins = (type & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */;
3865 FAIL_IF(push_inst(compiler, ins | F4(dst_freg) | F0(src1)));
3866 } else
3867 FAIL_IF(float_mem(compiler, FLOAT_LOAD | (type & SLJIT_32), dst_freg, src1, src1w));
3868
3869 SLJIT_SKIP_CHECKS(compiler);
3870 label = sljit_emit_label(compiler);
3871 FAIL_IF(!label);
3872
3873 sljit_set_label(jump, label);
3874 return SLJIT_SUCCESS;
3875 }
3876
sljit_emit_mem(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)3877 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
3878 sljit_s32 reg,
3879 sljit_s32 mem, sljit_sw memw)
3880 {
3881 sljit_ins ins, reg1, reg2, base, offs = 0;
3882
3883 CHECK_ERROR();
3884 CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
3885
3886 if (!(reg & REG_PAIR_MASK))
3887 return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
3888
3889 ADJUST_LOCAL_OFFSET(mem, memw);
3890
3891 base = gpr(mem & REG_MASK);
3892 reg1 = gpr(REG_PAIR_FIRST(reg));
3893 reg2 = gpr(REG_PAIR_SECOND(reg));
3894
3895 if (mem & OFFS_REG_MASK) {
3896 memw &= 0x3;
3897 offs = gpr(OFFS_REG(mem));
3898
3899 if (memw != 0) {
3900 FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp1) | R32A(offs) | ((sljit_ins)memw << 16)));
3901 offs = tmp1;
3902 } else if (!(type & SLJIT_MEM_STORE) && (base == reg1 || base == reg2) && (offs == reg1 || offs == reg2)) {
3903 FAIL_IF(push_inst(compiler, 0xb9f80000 | R12A(tmp1) | R4A(base) | R0A(offs)));
3904 base = tmp1;
3905 offs = 0;
3906 }
3907
3908 memw = 0;
3909 } else if (memw < -0x80000 || memw > 0x7ffff - ((reg2 == reg1 + 1) ? 0 : SSIZE_OF(sw))) {
3910 FAIL_IF(push_load_imm_inst(compiler, tmp1, memw));
3911
3912 if (base == 0)
3913 base = tmp1;
3914 else
3915 offs = tmp1;
3916
3917 memw = 0;
3918 }
3919
3920 if (offs == 0 && reg2 == (reg1 + 1)) {
3921 ins = (type & SLJIT_MEM_STORE) ? 0xeb0000000024 /* stmg */ : 0xeb0000000004 /* lmg */;
3922 return push_inst(compiler, ins | R36A(reg1) | R32A(reg2) | R28A(base) | disp_s20((sljit_s32)memw));
3923 }
3924
3925 ins = ((type & SLJIT_MEM_STORE) ? 0xe30000000024 /* stg */ : 0xe30000000004 /* lg */) | R32A(offs) | R28A(base);
3926
3927 if (!(type & SLJIT_MEM_STORE) && base == reg1) {
3928 FAIL_IF(push_inst(compiler, ins | R36A(reg2) | disp_s20((sljit_s32)memw + SSIZE_OF(sw))));
3929 return push_inst(compiler, ins | R36A(reg1) | disp_s20((sljit_s32)memw));
3930 }
3931
3932 FAIL_IF(push_inst(compiler, ins | R36A(reg1) | disp_s20((sljit_s32)memw)));
3933 return push_inst(compiler, ins | R36A(reg2) | disp_s20((sljit_s32)memw + SSIZE_OF(sw)));
3934 }
3935
sljit_emit_simd_mov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 srcdst,sljit_sw srcdstw)3936 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
3937 sljit_s32 freg,
3938 sljit_s32 srcdst, sljit_sw srcdstw)
3939 {
3940 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3941 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3942 sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type);
3943 struct addr addr;
3944 sljit_ins ins;
3945
3946 CHECK_ERROR();
3947 CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw));
3948
3949 ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3950
3951 if (reg_size != 4)
3952 return SLJIT_ERR_UNSUPPORTED;
3953
3954 if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3955 return SLJIT_ERR_UNSUPPORTED;
3956
3957 if (type & SLJIT_SIMD_TEST)
3958 return SLJIT_SUCCESS;
3959
3960 if (!(srcdst & SLJIT_MEM)) {
3961 if (type & SLJIT_SIMD_STORE)
3962 ins = F36(srcdst) | F32(freg);
3963 else
3964 ins = F36(freg) | F32(srcdst);
3965
3966 return push_inst(compiler, 0xe70000000056 /* vlr */ | ins);
3967 }
3968
3969 FAIL_IF(make_addr_bx(compiler, &addr, srcdst, srcdstw, tmp1));
3970 ins = F36(freg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset);
3971
3972 if (alignment >= 4)
3973 ins |= 4 << 12;
3974 else if (alignment == 3)
3975 ins |= 3 << 12;
3976
3977 return push_inst(compiler, ((type & SLJIT_SIMD_STORE) ? 0xe7000000000e /* vst */ : 0xe70000000006 /* vl */) | ins);
3978 }
3979
sljit_emit_simd_replicate(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_sw srcw)3980 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
3981 sljit_s32 freg,
3982 sljit_s32 src, sljit_sw srcw)
3983 {
3984 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3985 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3986 struct addr addr;
3987 sljit_gpr reg;
3988 sljit_sw sign_ext;
3989
3990 CHECK_ERROR();
3991 CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw));
3992
3993 ADJUST_LOCAL_OFFSET(src, srcw);
3994
3995 if (reg_size != 4)
3996 return SLJIT_ERR_UNSUPPORTED;
3997
3998 if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
3999 return SLJIT_ERR_UNSUPPORTED;
4000
4001 if (type & SLJIT_SIMD_TEST)
4002 return SLJIT_SUCCESS;
4003
4004 if (src & SLJIT_MEM) {
4005 FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));
4006 return push_inst(compiler, 0xe70000000005 /* vlrep */ | F36(freg)
4007 | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset) | ((sljit_ins)elem_size << 12));
4008 }
4009
4010 if (type & SLJIT_SIMD_FLOAT) {
4011 if (src == SLJIT_IMM)
4012 return push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(freg));
4013
4014 return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(freg) | F32(src) | ((sljit_ins)elem_size << 12));
4015 }
4016
4017 if (src == SLJIT_IMM) {
4018 sign_ext = 0x10000;
4019
4020 switch (elem_size) {
4021 case 0:
4022 srcw &= 0xff;
4023 sign_ext = (sljit_s8)srcw;
4024 break;
4025 case 1:
4026 srcw &= 0xffff;
4027 sign_ext = (sljit_s16)srcw;
4028 break;
4029 case 2:
4030 if ((sljit_s32)srcw == (sljit_s16)srcw) {
4031 srcw &= 0xffff;
4032 sign_ext = (sljit_s16)srcw;
4033 } else
4034 srcw &= 0xffffffff;
4035 break;
4036 default:
4037 if (srcw == (sljit_s16)srcw) {
4038 srcw &= 0xffff;
4039 sign_ext = (sljit_s16)srcw;
4040 }
4041 break;
4042 }
4043
4044 if (sign_ext != 0x10000) {
4045 if (sign_ext == 0 || sign_ext == -1)
4046 return push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(freg)
4047 | (sign_ext == 0 ? 0 : ((sljit_ins)0xffff << 16)));
4048
4049 return push_inst(compiler, 0xe70000000045 /* vrepi */ | F36(freg)
4050 | ((sljit_ins)srcw << 16) | ((sljit_ins)elem_size << 12));
4051 }
4052
4053 push_load_imm_inst(compiler, tmp0, srcw);
4054 reg = tmp0;
4055 } else
4056 reg = gpr(src);
4057
4058 FAIL_IF(push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(freg) | R32A(reg) | ((sljit_ins)elem_size << 12)));
4059 return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(freg) | F32(freg) | ((sljit_ins)elem_size << 12));
4060 }
4061
sljit_emit_simd_lane_mov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 lane_index,sljit_s32 srcdst,sljit_sw srcdstw)4062 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
4063 sljit_s32 freg, sljit_s32 lane_index,
4064 sljit_s32 srcdst, sljit_sw srcdstw)
4065 {
4066 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4067 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4068 struct addr addr;
4069 sljit_gpr reg;
4070 sljit_ins ins = 0;
4071
4072 CHECK_ERROR();
4073 CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw));
4074
4075 ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
4076
4077 if (reg_size != 4)
4078 return SLJIT_ERR_UNSUPPORTED;
4079
4080 if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
4081 return SLJIT_ERR_UNSUPPORTED;
4082
4083 if (type & SLJIT_SIMD_TEST)
4084 return SLJIT_SUCCESS;
4085
4086 if (srcdst & SLJIT_MEM) {
4087 FAIL_IF(make_addr_bx(compiler, &addr, srcdst, srcdstw, tmp1));
4088 ins = F36(freg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset);
4089 }
4090
4091 if (type & SLJIT_SIMD_LANE_ZERO) {
4092 if ((srcdst & SLJIT_MEM) && lane_index == ((1 << (3 - elem_size)) - 1))
4093 return push_inst(compiler, 0xe70000000004 /* vllez */ | ins | ((sljit_ins)elem_size << 12));
4094
4095 if ((type & SLJIT_SIMD_FLOAT) && freg == srcdst) {
4096 FAIL_IF(push_inst(compiler, 0xe70000000056 /* vlr */ | F36(TMP_FREG1) | F32(freg)));
4097 srcdst = TMP_FREG1;
4098 srcdstw = 0;
4099 }
4100
4101 FAIL_IF(push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(freg)));
4102 }
4103
4104 if (srcdst & SLJIT_MEM) {
4105 switch (elem_size) {
4106 case 0:
4107 ins |= 0xe70000000000 /* vleb */;
4108 break;
4109 case 1:
4110 ins |= 0xe70000000001 /* vleh */;
4111 break;
4112 case 2:
4113 ins |= 0xe70000000003 /* vlef */;
4114 break;
4115 default:
4116 ins |= 0xe70000000002 /* vleg */;
4117 break;
4118 }
4119
4120 /* Convert to vsteb - vsteg */
4121 if (type & SLJIT_SIMD_STORE)
4122 ins |= 0x8;
4123
4124 return push_inst(compiler, ins | ((sljit_ins)lane_index << 12));
4125 }
4126
4127 if (type & SLJIT_SIMD_FLOAT) {
4128 if (type & SLJIT_SIMD_STORE)
4129 return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(srcdst) | F32(freg) | ((sljit_ins)lane_index << 16) | ((sljit_ins)elem_size << 12));
4130
4131 if (elem_size == 3) {
4132 if (lane_index == 0)
4133 ins = F32(srcdst) | F28(freg) | (1 << 12);
4134 else
4135 ins = F32(freg) | F28(srcdst);
4136
4137 return push_inst(compiler, 0xe70000000084 /* vpdi */ | F36(freg) | ins);
4138 }
4139
4140 FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(tmp0) | F32(srcdst) | ((sljit_ins)2 << 12)));
4141 return push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(freg) | R32A(tmp0) | ((sljit_ins)lane_index << 16) | ((sljit_ins)2 << 12));
4142 }
4143
4144 if (srcdst == SLJIT_IMM) {
4145 switch (elem_size) {
4146 case 0:
4147 ins = 0xe70000000040 /* vleib */;
4148 srcdstw &= 0xff;
4149 break;
4150 case 1:
4151 ins = 0xe70000000041 /* vleih */;
4152 srcdstw &= 0xffff;
4153 break;
4154 case 2:
4155 if ((sljit_s32)srcdstw == (sljit_s16)srcdstw) {
4156 srcdstw &= 0xffff;
4157 ins = 0xe70000000043 /* vleif */;
4158 } else
4159 srcdstw &= 0xffffffff;
4160 break;
4161 default:
4162 if (srcdstw == (sljit_s16)srcdstw) {
4163 srcdstw &= 0xffff;
4164 ins = 0xe70000000042 /* vleig */;
4165 }
4166 break;
4167 }
4168
4169 if (ins != 0)
4170 return push_inst(compiler, ins | F36(freg) | ((sljit_ins)srcdstw << 16) | ((sljit_ins)lane_index << 12));
4171
4172 push_load_imm_inst(compiler, tmp0, srcdstw);
4173 reg = tmp0;
4174 } else
4175 reg = gpr(srcdst);
4176
4177 ins = ((sljit_ins)lane_index << 16) | ((sljit_ins)elem_size << 12);
4178
4179 if (!(type & SLJIT_SIMD_STORE))
4180 return push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(freg) | R32A(reg) | ins);
4181
4182 FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(reg) | F32(freg) | ins));
4183
4184 if (!(type & SLJIT_SIMD_LANE_SIGNED) || elem_size >= 3)
4185 return SLJIT_SUCCESS;
4186
4187 switch (elem_size) {
4188 case 0:
4189 ins = 0xb9060000 /* lgbr */;
4190 break;
4191 case 1:
4192 ins = 0xb9070000 /* lghr */;
4193 break;
4194 default:
4195 ins = 0xb9140000 /* lgfr */;
4196 break;
4197 }
4198
4199 return push_inst(compiler, ins | R4A(reg) | R0A(reg));
4200 }
4201
sljit_emit_simd_lane_replicate(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_s32 src_lane_index)4202 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
4203 sljit_s32 freg,
4204 sljit_s32 src, sljit_s32 src_lane_index)
4205 {
4206 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4207 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4208
4209 CHECK_ERROR();
4210 CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index));
4211
4212 if (reg_size != 4)
4213 return SLJIT_ERR_UNSUPPORTED;
4214
4215 if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
4216 return SLJIT_ERR_UNSUPPORTED;
4217
4218 if (type & SLJIT_SIMD_TEST)
4219 return SLJIT_SUCCESS;
4220
4221 return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(freg) | F32(src)
4222 | ((sljit_ins)src_lane_index << 16) | ((sljit_ins)elem_size << 12));
4223 }
4224
sljit_emit_simd_extend(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_sw srcw)4225 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
4226 sljit_s32 freg,
4227 sljit_s32 src, sljit_sw srcw)
4228 {
4229 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4230 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4231 sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
4232 struct addr addr;
4233 sljit_ins ins;
4234
4235 CHECK_ERROR();
4236 CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw));
4237
4238 ADJUST_LOCAL_OFFSET(src, srcw);
4239
4240 if (reg_size != 4)
4241 return SLJIT_ERR_UNSUPPORTED;
4242
4243 if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
4244 return SLJIT_ERR_UNSUPPORTED;
4245
4246 if (type & SLJIT_SIMD_TEST)
4247 return SLJIT_SUCCESS;
4248
4249 if (src & SLJIT_MEM) {
4250 FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));
4251 ins = F36(freg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset);
4252
4253 switch (elem2_size - elem_size) {
4254 case 1:
4255 ins |= 0xe70000000002 /* vleg */;
4256 break;
4257 case 2:
4258 ins |= 0xe70000000003 /* vlef */;
4259 break;
4260 default:
4261 ins |= 0xe70000000001 /* vleh */;
4262 break;
4263 }
4264
4265 FAIL_IF(push_inst(compiler, ins));
4266 src = freg;
4267 }
4268
4269 if (type & SLJIT_SIMD_FLOAT) {
4270 FAIL_IF(push_inst(compiler, 0xe700000000d5 /* vuplh */ | F36(freg) | F32(src) | (2 << 12)));
4271 FAIL_IF(push_inst(compiler, 0xe70000000030 /* vesl */ | F36(freg) | F32(freg) | (32 << 16) | (3 << 12)));
4272 return push_inst(compiler, 0xe700000000c4 /* vfll */ | F36(freg) | F32(freg) | (2 << 12));
4273 }
4274
4275 ins = ((type & SLJIT_SIMD_EXTEND_SIGNED) ? 0xe700000000d7 /* vuph */ : 0xe700000000d5 /* vuplh */) | F36(freg);
4276
4277 do {
4278 FAIL_IF(push_inst(compiler, ins | F32(src) | ((sljit_ins)elem_size << 12)));
4279 src = freg;
4280 } while (++elem_size < elem2_size);
4281
4282 return SLJIT_SUCCESS;
4283 }
4284
sljit_emit_simd_sign(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 dst,sljit_sw dstw)4285 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
4286 sljit_s32 freg,
4287 sljit_s32 dst, sljit_sw dstw)
4288 {
4289 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4290 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4291 sljit_gpr dst_r;
4292
4293 CHECK_ERROR();
4294 CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw));
4295
4296 ADJUST_LOCAL_OFFSET(dst, dstw);
4297
4298 if (reg_size != 4)
4299 return SLJIT_ERR_UNSUPPORTED;
4300
4301 if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
4302 return SLJIT_ERR_UNSUPPORTED;
4303
4304 if (type & SLJIT_SIMD_TEST)
4305 return SLJIT_SUCCESS;
4306
4307 switch (elem_size) {
4308 case 0:
4309 push_load_imm_inst(compiler, tmp0, (sljit_sw)0x4048505860687078);
4310 push_load_imm_inst(compiler, tmp1, (sljit_sw)0x0008101820283038);
4311 FAIL_IF(push_inst(compiler, 0xe70000000062 /* vlvgp */ | F36(TMP_FREG1) | R32A(tmp1) | R28A(tmp0)));
4312 break;
4313 case 1:
4314 push_load_imm_inst(compiler, tmp0, (sljit_sw)0x0010203040506070);
4315 break;
4316 case 2:
4317 push_load_imm_inst(compiler, tmp0, (sljit_sw)0x8080808000204060);
4318 break;
4319 default:
4320 push_load_imm_inst(compiler, tmp0, (sljit_sw)0x8080808080800040);
4321 break;
4322 }
4323
4324 if (elem_size != 0)
4325 FAIL_IF(push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(TMP_FREG1) | R32A(tmp0) | (1 << 16) | (3 << 12)));
4326
4327 FAIL_IF(push_inst(compiler, 0xe70000000085 /* vbperm */ | F36(TMP_FREG1) | F32(freg) | F28(TMP_FREG1)));
4328
4329 dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
4330 FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(dst_r) | F32(TMP_FREG1)
4331 | (elem_size == 0 ? ((3 << 16) | (1 << 12)) : (7 << 16))));
4332
4333 if (dst_r == tmp0)
4334 return store_word(compiler, tmp0, dst, dstw, type & SLJIT_32);
4335
4336 return SLJIT_SUCCESS;
4337 }
4338
sljit_emit_simd_op2(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_freg,sljit_s32 src1_freg,sljit_s32 src2_freg)4339 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
4340 sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg)
4341 {
4342 sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4343 sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4344 sljit_ins ins = 0;
4345
4346 CHECK_ERROR();
4347 CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg));
4348
4349 if (reg_size != 4)
4350 return SLJIT_ERR_UNSUPPORTED;
4351
4352 if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4353 return SLJIT_ERR_UNSUPPORTED;
4354
4355 if (type & SLJIT_SIMD_TEST)
4356 return SLJIT_SUCCESS;
4357
4358 switch (SLJIT_SIMD_GET_OPCODE(type)) {
4359 case SLJIT_SIMD_OP2_AND:
4360 ins = 0xe70000000068 /* vn */;
4361 break;
4362 case SLJIT_SIMD_OP2_OR:
4363 ins = 0xe7000000006a /* vo */;
4364 break;
4365 case SLJIT_SIMD_OP2_XOR:
4366 ins = 0xe7000000006d /* vx */;
4367 break;
4368 }
4369
4370 if (type & SLJIT_SIMD_TEST)
4371 return SLJIT_SUCCESS;
4372
4373 return push_inst(compiler, ins | F36(dst_freg) | F32(src1_freg) | F28(src2_freg));
4374 }
4375
sljit_emit_atomic_load(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 mem_reg)4376 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
4377 sljit_s32 dst_reg,
4378 sljit_s32 mem_reg)
4379 {
4380 CHECK_ERROR();
4381 CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
4382
4383 SLJIT_SKIP_CHECKS(compiler);
4384 return sljit_emit_op1(compiler, op, dst_reg, 0, SLJIT_MEM1(mem_reg), 0);
4385 }
4386
sljit_emit_atomic_store(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src_reg,sljit_s32 mem_reg,sljit_s32 temp_reg)4387 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op,
4388 sljit_s32 src_reg,
4389 sljit_s32 mem_reg,
4390 sljit_s32 temp_reg)
4391 {
4392 sljit_ins mask;
4393 sljit_gpr tmp_r = gpr(temp_reg);
4394 sljit_gpr mem_r = gpr(mem_reg);
4395
4396 CHECK_ERROR();
4397 CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
4398
4399 switch (GET_OPCODE(op)) {
4400 case SLJIT_MOV32:
4401 case SLJIT_MOV_U32:
4402 return push_inst(compiler, 0xba000000 /* cs */ | R20A(tmp_r) | R16A(gpr(src_reg)) | R12A(mem_r));
4403 case SLJIT_MOV_U8:
4404 mask = 0xff;
4405 break;
4406 case SLJIT_MOV_U16:
4407 mask = 0xffff;
4408 break;
4409 default:
4410 return push_inst(compiler, 0xeb0000000030 /* csg */ | R36A(tmp_r) | R32A(gpr(src_reg)) | R28A(mem_r));
4411 }
4412
4413 /* tmp0 = (src_reg ^ tmp_r) & mask */
4414 FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | mask));
4415 FAIL_IF(push_inst(compiler, 0xb9e70000 /* xgrk */ | R4A(tmp0) | R0A(gpr(src_reg)) | R12A(tmp_r)));
4416 FAIL_IF(push_inst(compiler, 0xa7090000 /* lghi */ | R20A(tmp_r) | 0xfffc));
4417 FAIL_IF(push_inst(compiler, 0xb9800000 /* ngr */ | R4A(tmp0) | R0A(tmp1)));
4418
4419 /* tmp0 = tmp0 << (((mem_r ^ 0x3) & 0x3) << 3) */
4420 FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | (sljit_ins)((mask == 0xff) ? 0x18 : 0x10)));
4421 FAIL_IF(push_inst(compiler, 0xb9800000 /* ngr */ | R4A(tmp_r) | R0A(mem_r)));
4422 FAIL_IF(push_inst(compiler, 0xec0000000057 /* rxsbg */ | R36A(tmp1) | R32A(mem_r) | (59 << 24) | (60 << 16) | (3 << 8)));
4423 FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp0) | R32A(tmp0) | R28A(tmp1)));
4424
4425 /* Already computed: tmp_r = mem_r & ~0x3 */
4426
4427 FAIL_IF(push_inst(compiler, 0x58000000 /* l */ | R20A(tmp1) | R12A(tmp_r)));
4428 FAIL_IF(push_inst(compiler, 0x1700 /* x */ | R4A(tmp0) | R0A(tmp1)));
4429 return push_inst(compiler, 0xba000000 /* cs */ | R20A(tmp1) | R16A(tmp0) | R12A(tmp_r));
4430 }
4431
4432 /* --------------------------------------------------------------------- */
4433 /* Other instructions */
4434 /* --------------------------------------------------------------------- */
4435
4436 /* On s390x we build a literal pool to hold constants. This has two main
4437 advantages:
4438
4439 1. we only need one instruction in the instruction stream (LGRL)
4440 2. we can store 64 bit addresses and use 32 bit offsets
4441
4442 To retrofit the extra information needed to build the literal pool we
4443 add a new sljit_s390x_const struct that contains the initial value but
4444 can still be cast to a sljit_const. */
4445
sljit_emit_const(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_sw init_value)4446 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
4447 {
4448 struct sljit_s390x_const *const_;
4449 sljit_gpr dst_r;
4450
4451 CHECK_ERROR_PTR();
4452 CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
4453
4454 const_ = (struct sljit_s390x_const*)ensure_abuf(compiler,
4455 sizeof(struct sljit_s390x_const));
4456 PTR_FAIL_IF(!const_);
4457 set_const((struct sljit_const*)const_, compiler);
4458 const_->init_value = init_value;
4459
4460 dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
4461 if (have_genext())
4462 PTR_FAIL_IF(push_inst(compiler, lgrl(dst_r, 0)));
4463 else {
4464 PTR_FAIL_IF(push_inst(compiler, larl(tmp1, 0)));
4465 PTR_FAIL_IF(push_inst(compiler, lg(dst_r, 0, r0, tmp1)));
4466 }
4467
4468 if (dst & SLJIT_MEM)
4469 PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, 0 /* always 64-bit */));
4470
4471 return (struct sljit_const*)const_;
4472 }
4473
sljit_set_jump_addr(sljit_uw addr,sljit_uw new_target,sljit_sw executable_offset)4474 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
4475 {
4476 /* Update the constant pool. */
4477 sljit_uw *ptr = (sljit_uw *)addr;
4478 SLJIT_UNUSED_ARG(executable_offset);
4479
4480 SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 0);
4481 *ptr = new_target;
4482 SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1);
4483 SLJIT_CACHE_FLUSH(ptr, ptr + 1);
4484 }
4485
sljit_set_const(sljit_uw addr,sljit_sw new_constant,sljit_sw executable_offset)4486 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
4487 {
4488 sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
4489 }
4490
sljit_emit_mov_addr(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)4491 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
4492 {
4493 struct sljit_jump *jump;
4494 sljit_gpr dst_r;
4495
4496 CHECK_ERROR_PTR();
4497 CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw));
4498 ADJUST_LOCAL_OFFSET(dst, dstw);
4499
4500 jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
4501 PTR_FAIL_IF(!jump);
4502 set_mov_addr(jump, compiler, 0);
4503
4504 dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
4505
4506 if (have_genext())
4507 PTR_FAIL_IF(push_inst(compiler, lgrl(dst_r, 0)));
4508 else {
4509 PTR_FAIL_IF(push_inst(compiler, larl(tmp1, 0)));
4510 PTR_FAIL_IF(push_inst(compiler, lg(dst_r, 0, r0, tmp1)));
4511 }
4512
4513 if (dst & SLJIT_MEM)
4514 PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, 0));
4515
4516 return jump;
4517 }
4518
4519 /* TODO(carenas): EVAL probably should move up or be refactored */
4520 #undef WHEN2
4521 #undef EVAL
4522
4523 #undef tmp1
4524 #undef tmp0
4525
4526 /* TODO(carenas): undef other macros that spill like is_u12? */
4527