1 /*
2  *    Stack-less Just-In-Time compiler
3  *
4  *    Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without modification, are
7  * permitted provided that the following conditions are met:
8  *
9  *   1. Redistributions of source code must retain the above copyright notice, this list of
10  *      conditions and the following disclaimer.
11  *
12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13  *      of conditions and the following disclaimer in the documentation and/or other materials
14  *      provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include <sys/auxv.h>
28 
29 #ifdef __ARCH__
30 #define ENABLE_STATIC_FACILITY_DETECTION 1
31 #else
32 #define ENABLE_STATIC_FACILITY_DETECTION 0
33 #endif
34 #define ENABLE_DYNAMIC_FACILITY_DETECTION 1
35 
sljit_get_platform_name(void)36 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
37 {
38 	return "s390x" SLJIT_CPUINFO;
39 }
40 
41 /* Instructions. */
42 typedef sljit_uw sljit_ins;
43 
44 /* Instruction tags (most significant halfword). */
45 static const sljit_ins sljit_ins_const = (sljit_ins)1 << 48;
46 
47 #define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
48 #define TMP_REG2	(SLJIT_NUMBER_OF_REGISTERS + 3)
49 
50 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
51 	0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 0, 1, 14
52 };
53 
54 /* there are also a[2-15] available, but they are slower to access and
55  * their use is limited as mundaym explained:
56  *   https://github.com/zherczeg/sljit/pull/91#discussion_r486895689
57  */
58 
59 /* General Purpose Registers [0-15]. */
60 typedef sljit_uw sljit_gpr;
61 
62 /*
63  * WARNING
64  * the following code is non standard and should be improved for
65  * consistency, but doesn't use SLJIT_NUMBER_OF_REGISTERS based
66  * registers because r0 and r1 are the ABI recommended volatiles.
67  * there is a gpr() function that maps sljit to physical register numbers
68  * that should be used instead of the usual index into reg_map[] and
69  * will be retired ASAP (TODO: carenas)
70  */
71 
72 static const sljit_gpr r0 = 0;		/* reg_map[SLJIT_NUMBER_OF_REGISTERS + 2]: 0 in address calculations; reserved */
73 static const sljit_gpr r1 = 1;		/* reg_map[SLJIT_NUMBER_OF_REGISTERS + 3]: reserved */
74 static const sljit_gpr r2 = 2;		/* reg_map[1]: 1st argument */
75 static const sljit_gpr r3 = 3;		/* reg_map[2]: 2nd argument */
76 static const sljit_gpr r4 = 4;		/* reg_map[3]: 3rd argument */
77 static const sljit_gpr r5 = 5;		/* reg_map[4]: 4th argument */
78 static const sljit_gpr r6 = 6;		/* reg_map[5]: 5th argument; 1st saved register */
79 static const sljit_gpr r7 = 7;		/* reg_map[6] */
80 static const sljit_gpr r8 = 8;		/* reg_map[7] */
81 static const sljit_gpr r9 = 9;		/* reg_map[8] */
82 static const sljit_gpr r10 = 10;	/* reg_map[9] */
83 static const sljit_gpr r11 = 11;	/* reg_map[10] */
84 static const sljit_gpr r12 = 12;	/* reg_map[11]: GOT */
85 static const sljit_gpr r13 = 13;	/* reg_map[12]: Literal Pool pointer */
86 static const sljit_gpr r14 = 14;	/* reg_map[0]: return address */
87 static const sljit_gpr r15 = 15;	/* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stack pointer */
88 
89 /* WARNING: r12 and r13 shouldn't be used as per ABI recommendation */
90 /* TODO(carenas): r12 might conflict in PIC code, reserve? */
91 /* TODO(carenas): r13 is usually pointed to "pool" per ABI, using a tmp
92  *                like we do know might be faster though, reserve?
93  */
94 
95 /* TODO(carenas): should be named TMP_REG[1-2] for consistency */
96 #define tmp0	r0
97 #define tmp1	r1
98 
99 /* When reg cannot be unused. */
100 #define IS_GPR_REG(reg)		((reg > 0) && (reg) <= SLJIT_SP)
101 
102 /* Link register. */
103 static const sljit_gpr link_r = 14;     /* r14 */
104 
105 #define TMP_FREG1	(SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
106 
107 static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = {
108 	0, 0, 2, 4, 6, 3, 5, 7, 15, 14, 13, 12, 11, 10, 9, 8, 1
109 };
110 
111 #define R0A(r) (r)
112 #define R4A(r) ((r) << 4)
113 #define R8A(r) ((r) << 8)
114 #define R12A(r) ((r) << 12)
115 #define R16A(r) ((r) << 16)
116 #define R20A(r) ((r) << 20)
117 #define R28A(r) ((r) << 28)
118 #define R32A(r) ((r) << 32)
119 #define R36A(r) ((r) << 36)
120 
121 #define R0(r) ((sljit_ins)reg_map[r])
122 
123 #define F0(r) ((sljit_ins)freg_map[r])
124 #define F4(r) (R4A((sljit_ins)freg_map[r]))
125 #define F12(r) (R12A((sljit_ins)freg_map[r]))
126 #define F20(r) (R20A((sljit_ins)freg_map[r]))
127 #define F28(r) (R28A((sljit_ins)freg_map[r]))
128 #define F32(r) (R32A((sljit_ins)freg_map[r]))
129 #define F36(r) (R36A((sljit_ins)freg_map[r]))
130 
131 struct sljit_s390x_const {
132 	struct sljit_const const_; /* must be first */
133 	sljit_sw init_value;       /* required to build literal pool */
134 };
135 
136 /* Convert SLJIT register to hardware register. */
gpr(sljit_s32 r)137 static SLJIT_INLINE sljit_gpr gpr(sljit_s32 r)
138 {
139 	SLJIT_ASSERT(r >= 0 && r < (sljit_s32)(sizeof(reg_map) / sizeof(reg_map[0])));
140 	return reg_map[r];
141 }
142 
143 /* Size of instruction in bytes. Tags must already be cleared. */
sizeof_ins(sljit_ins ins)144 static SLJIT_INLINE sljit_uw sizeof_ins(sljit_ins ins)
145 {
146 	/* keep faulting instructions */
147 	if (ins == 0)
148 		return 2;
149 
150 	if ((ins & 0x00000000ffffL) == ins)
151 		return 2;
152 	if ((ins & 0x0000ffffffffL) == ins)
153 		return 4;
154 	if ((ins & 0xffffffffffffL) == ins)
155 		return 6;
156 
157 	SLJIT_UNREACHABLE();
158 	return (sljit_uw)-1;
159 }
160 
push_inst(struct sljit_compiler * compiler,sljit_ins ins)161 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
162 {
163 	sljit_ins *ibuf = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins));
164 	FAIL_IF(!ibuf);
165 	*ibuf = ins;
166 	compiler->size++;
167 	return SLJIT_SUCCESS;
168 }
169 
encode_inst(void ** ptr,sljit_ins ins)170 static sljit_s32 encode_inst(void **ptr, sljit_ins ins)
171 {
172 	sljit_u16 *ibuf = (sljit_u16 *)*ptr;
173 	sljit_uw size = sizeof_ins(ins);
174 
175 	SLJIT_ASSERT((size & 6) == size);
176 	switch (size) {
177 	case 6:
178 		*ibuf++ = (sljit_u16)(ins >> 32);
179 		/* fallthrough */
180 	case 4:
181 		*ibuf++ = (sljit_u16)(ins >> 16);
182 		/* fallthrough */
183 	case 2:
184 		*ibuf++ = (sljit_u16)(ins);
185 	}
186 	*ptr = (void*)ibuf;
187 	return SLJIT_SUCCESS;
188 }
189 
190 #define SLJIT_ADD_SUB_NO_COMPARE(status_flags_state) \
191 	(((status_flags_state) & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) \
192 		&& !((status_flags_state) & SLJIT_CURRENT_FLAGS_COMPARE))
193 
194 /* Map the given type to a 4-bit condition code mask. */
get_cc(struct sljit_compiler * compiler,sljit_s32 type)195 static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 type) {
196 	const sljit_u8 cc0 = 1 << 3; /* equal {,to zero} */
197 	const sljit_u8 cc1 = 1 << 2; /* less than {,zero} */
198 	const sljit_u8 cc2 = 1 << 1; /* greater than {,zero} */
199 	const sljit_u8 cc3 = 1 << 0; /* {overflow,NaN} */
200 
201 	switch (type) {
202 	case SLJIT_EQUAL:
203 		if (SLJIT_ADD_SUB_NO_COMPARE(compiler->status_flags_state)) {
204 			sljit_s32 type = GET_FLAG_TYPE(compiler->status_flags_state);
205 			if (type >= SLJIT_SIG_LESS && type <= SLJIT_SIG_LESS_EQUAL)
206 				return cc0;
207 			if (type == SLJIT_OVERFLOW)
208 				return (cc0 | cc3);
209 			return (cc0 | cc2);
210 		}
211 		/* fallthrough */
212 
213 	case SLJIT_ATOMIC_STORED:
214 	case SLJIT_F_EQUAL:
215 	case SLJIT_ORDERED_EQUAL:
216 		return cc0;
217 
218 	case SLJIT_NOT_EQUAL:
219 		if (SLJIT_ADD_SUB_NO_COMPARE(compiler->status_flags_state)) {
220 			sljit_s32 type = GET_FLAG_TYPE(compiler->status_flags_state);
221 			if (type >= SLJIT_SIG_LESS && type <= SLJIT_SIG_LESS_EQUAL)
222 				return (cc1 | cc2 | cc3);
223 			if (type == SLJIT_OVERFLOW)
224 				return (cc1 | cc2);
225 			return (cc1 | cc3);
226 		}
227 		/* fallthrough */
228 
229 	case SLJIT_UNORDERED_OR_NOT_EQUAL:
230 		return (cc1 | cc2 | cc3);
231 
232 	case SLJIT_LESS:
233 	case SLJIT_ATOMIC_NOT_STORED:
234 		return cc1;
235 
236 	case SLJIT_GREATER_EQUAL:
237 	case SLJIT_UNORDERED_OR_GREATER_EQUAL:
238 		return (cc0 | cc2 | cc3);
239 
240 	case SLJIT_GREATER:
241 		if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_COMPARE)
242 			return cc2;
243 		return cc3;
244 
245 	case SLJIT_LESS_EQUAL:
246 		if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_COMPARE)
247 			return (cc0 | cc1);
248 		return (cc0 | cc1 | cc2);
249 
250 	case SLJIT_SIG_LESS:
251 	case SLJIT_F_LESS:
252 	case SLJIT_ORDERED_LESS:
253 		return cc1;
254 
255 	case SLJIT_NOT_CARRY:
256 		if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB)
257 			return (cc2 | cc3);
258 		/* fallthrough */
259 
260 	case SLJIT_SIG_LESS_EQUAL:
261 	case SLJIT_F_LESS_EQUAL:
262 	case SLJIT_ORDERED_LESS_EQUAL:
263 		return (cc0 | cc1);
264 
265 	case SLJIT_CARRY:
266 		if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB)
267 			return (cc0 | cc1);
268 		/* fallthrough */
269 
270 	case SLJIT_SIG_GREATER:
271 	case SLJIT_UNORDERED_OR_GREATER:
272 		/* Overflow is considered greater, see SLJIT_SUB. */
273 		return cc2 | cc3;
274 
275 	case SLJIT_SIG_GREATER_EQUAL:
276 		return (cc0 | cc2 | cc3);
277 
278 	case SLJIT_OVERFLOW:
279 		if (compiler->status_flags_state & SLJIT_SET_Z)
280 			return (cc2 | cc3);
281 		/* fallthrough */
282 
283 	case SLJIT_UNORDERED:
284 		return cc3;
285 
286 	case SLJIT_NOT_OVERFLOW:
287 		if (compiler->status_flags_state & SLJIT_SET_Z)
288 			return (cc0 | cc1);
289 		/* fallthrough */
290 
291 	case SLJIT_ORDERED:
292 		return (cc0 | cc1 | cc2);
293 
294 	case SLJIT_F_NOT_EQUAL:
295 	case SLJIT_ORDERED_NOT_EQUAL:
296 		return (cc1 | cc2);
297 
298 	case SLJIT_F_GREATER:
299 	case SLJIT_ORDERED_GREATER:
300 		return cc2;
301 
302 	case SLJIT_F_GREATER_EQUAL:
303 	case SLJIT_ORDERED_GREATER_EQUAL:
304 		return (cc0 | cc2);
305 
306 	case SLJIT_UNORDERED_OR_LESS_EQUAL:
307 		return (cc0 | cc1 | cc3);
308 
309 	case SLJIT_UNORDERED_OR_EQUAL:
310 		return (cc0 | cc3);
311 
312 	case SLJIT_UNORDERED_OR_LESS:
313 		return (cc1 | cc3);
314 	}
315 
316 	SLJIT_UNREACHABLE();
317 	return (sljit_u8)-1;
318 }
319 
320 /* Facility to bit index mappings.
321    Note: some facilities share the same bit index. */
322 typedef sljit_uw facility_bit;
323 #define STORE_FACILITY_LIST_EXTENDED_FACILITY 7
324 #define FAST_LONG_DISPLACEMENT_FACILITY 19
325 #define EXTENDED_IMMEDIATE_FACILITY 21
326 #define GENERAL_INSTRUCTION_EXTENSION_FACILITY 34
327 #define DISTINCT_OPERAND_FACILITY 45
328 #define HIGH_WORD_FACILITY 45
329 #define POPULATION_COUNT_FACILITY 45
330 #define LOAD_STORE_ON_CONDITION_1_FACILITY 45
331 #define MISCELLANEOUS_INSTRUCTION_EXTENSIONS_1_FACILITY 49
332 #define LOAD_STORE_ON_CONDITION_2_FACILITY 53
333 #define MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY 58
334 #define VECTOR_FACILITY 129
335 #define VECTOR_ENHANCEMENTS_1_FACILITY 135
336 
337 /* Report whether a facility is known to be present due to the compiler
338    settings. This function should always be compiled to a constant
339    value given a constant argument. */
have_facility_static(facility_bit x)340 static SLJIT_INLINE int have_facility_static(facility_bit x)
341 {
342 #if ENABLE_STATIC_FACILITY_DETECTION
343 	switch (x) {
344 	case FAST_LONG_DISPLACEMENT_FACILITY:
345 		return (__ARCH__ >=  6 /* z990 */);
346 	case EXTENDED_IMMEDIATE_FACILITY:
347 	case STORE_FACILITY_LIST_EXTENDED_FACILITY:
348 		return (__ARCH__ >=  7 /* z9-109 */);
349 	case GENERAL_INSTRUCTION_EXTENSION_FACILITY:
350 		return (__ARCH__ >=  8 /* z10 */);
351 	case DISTINCT_OPERAND_FACILITY:
352 		return (__ARCH__ >=  9 /* z196 */);
353 	case MISCELLANEOUS_INSTRUCTION_EXTENSIONS_1_FACILITY:
354 		return (__ARCH__ >= 10 /* zEC12 */);
355 	case LOAD_STORE_ON_CONDITION_2_FACILITY:
356 	case VECTOR_FACILITY:
357 		return (__ARCH__ >= 11 /* z13 */);
358 	case MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY:
359 	case VECTOR_ENHANCEMENTS_1_FACILITY:
360 		return (__ARCH__ >= 12 /* z14 */);
361 	default:
362 		SLJIT_UNREACHABLE();
363 	}
364 #endif
365 	return 0;
366 }
367 
get_hwcap()368 static SLJIT_INLINE unsigned long get_hwcap()
369 {
370 	static unsigned long hwcap = 0;
371 	if (SLJIT_UNLIKELY(!hwcap)) {
372 		hwcap = getauxval(AT_HWCAP);
373 		SLJIT_ASSERT(hwcap != 0);
374 	}
375 	return hwcap;
376 }
377 
have_stfle()378 static SLJIT_INLINE int have_stfle()
379 {
380 	if (have_facility_static(STORE_FACILITY_LIST_EXTENDED_FACILITY))
381 		return 1;
382 
383 	return (get_hwcap() & HWCAP_S390_STFLE);
384 }
385 
386 /* Report whether the given facility is available. This function always
387    performs a runtime check. */
have_facility_dynamic(facility_bit x)388 static int have_facility_dynamic(facility_bit x)
389 {
390 #if ENABLE_DYNAMIC_FACILITY_DETECTION
391 	static struct {
392 		sljit_uw bits[4];
393 	} cpu_features;
394 	size_t size = sizeof(cpu_features);
395 	const sljit_uw word_index = x >> 6;
396 	const sljit_uw bit_index = ((1UL << 63) >> (x & 63));
397 
398 	SLJIT_ASSERT(x < size * 8);
399 	if (SLJIT_UNLIKELY(!have_stfle()))
400 		return 0;
401 
402 	if (SLJIT_UNLIKELY(cpu_features.bits[0] == 0)) {
403 		__asm__ __volatile__ (
404 			"lgr   %%r0, %0;"
405 			"stfle 0(%1);"
406 			/* outputs  */:
407 			/* inputs   */: "d" ((size / 8) - 1), "a" (&cpu_features)
408 			/* clobbers */: "r0", "cc", "memory"
409 		);
410 		SLJIT_ASSERT(cpu_features.bits[0] != 0);
411 	}
412 	return (cpu_features.bits[word_index] & bit_index) != 0;
413 #else
414 	return 0;
415 #endif
416 }
417 
418 #define HAVE_FACILITY(name, bit) \
419 static SLJIT_INLINE int name() \
420 { \
421 	static int have = -1; \
422 	/* Static check first. May allow the function to be optimized away. */ \
423 	if (have_facility_static(bit)) \
424 		have = 1; \
425 	else if (SLJIT_UNLIKELY(have < 0)) \
426 		have = have_facility_dynamic(bit) ? 1 : 0; \
427 \
428 	return have; \
429 }
430 
HAVE_FACILITY(have_eimm,EXTENDED_IMMEDIATE_FACILITY)431 HAVE_FACILITY(have_eimm,    EXTENDED_IMMEDIATE_FACILITY)
432 HAVE_FACILITY(have_ldisp,   FAST_LONG_DISPLACEMENT_FACILITY)
433 HAVE_FACILITY(have_genext,  GENERAL_INSTRUCTION_EXTENSION_FACILITY)
434 HAVE_FACILITY(have_lscond1, LOAD_STORE_ON_CONDITION_1_FACILITY)
435 HAVE_FACILITY(have_lscond2, LOAD_STORE_ON_CONDITION_2_FACILITY)
436 HAVE_FACILITY(have_misc2,   MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY)
437 #undef HAVE_FACILITY
438 
439 #define is_u12(d)	(0 <= (d) && (d) <= 0x00000fffL)
440 #define is_u32(d)	(0 <= (d) && (d) <= 0xffffffffL)
441 
442 #define CHECK_SIGNED(v, bitlen) \
443 	((v) >= -(1 << ((bitlen) - 1)) && (v) < (1 << ((bitlen) - 1)))
444 
445 #define is_s8(d)	CHECK_SIGNED((d), 8)
446 #define is_s16(d)	CHECK_SIGNED((d), 16)
447 #define is_s20(d)	CHECK_SIGNED((d), 20)
448 #define is_s32(d)	((d) == (sljit_s32)(d))
449 
450 static SLJIT_INLINE sljit_ins disp_s20(sljit_s32 d)
451 {
452 	sljit_uw dh, dl;
453 
454 	SLJIT_ASSERT(is_s20(d));
455 
456 	dh = (d >> 12) & 0xff;
457 	dl = ((sljit_uw)d << 8) & 0xfff00;
458 	return (dh | dl) << 8;
459 }
460 
461 /* TODO(carenas): variadic macro is not strictly needed */
462 #define SLJIT_S390X_INSTRUCTION(op, ...) \
463 static SLJIT_INLINE sljit_ins op(__VA_ARGS__)
464 
465 /* RR form instructions. */
466 #define SLJIT_S390X_RR(name, pattern) \
467 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \
468 { \
469 	return (pattern) | ((dst & 0xf) << 4) | (src & 0xf); \
470 }
471 
472 /* AND */
473 SLJIT_S390X_RR(nr,   0x1400)
474 
475 /* BRANCH AND SAVE */
476 SLJIT_S390X_RR(basr, 0x0d00)
477 
478 /* BRANCH ON CONDITION */
479 SLJIT_S390X_RR(bcr,  0x0700) /* TODO(mundaym): type for mask? */
480 
481 /* DIVIDE */
482 SLJIT_S390X_RR(dr,   0x1d00)
483 
484 /* EXCLUSIVE OR */
485 SLJIT_S390X_RR(xr,   0x1700)
486 
487 /* LOAD */
488 SLJIT_S390X_RR(lr,   0x1800)
489 
490 /* LOAD COMPLEMENT */
491 SLJIT_S390X_RR(lcr,  0x1300)
492 
493 /* OR */
494 SLJIT_S390X_RR(or,   0x1600)
495 
496 #undef SLJIT_S390X_RR
497 
498 /* RRE form instructions */
499 #define SLJIT_S390X_RRE(name, pattern) \
500 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \
501 { \
502 	return (pattern) | R4A(dst) | R0A(src); \
503 }
504 
505 /* AND */
506 SLJIT_S390X_RRE(ngr,   0xb9800000)
507 
508 /* DIVIDE LOGICAL */
509 SLJIT_S390X_RRE(dlr,   0xb9970000)
510 SLJIT_S390X_RRE(dlgr,  0xb9870000)
511 
512 /* DIVIDE SINGLE */
513 SLJIT_S390X_RRE(dsgr,  0xb90d0000)
514 
515 /* EXCLUSIVE OR */
516 SLJIT_S390X_RRE(xgr,   0xb9820000)
517 
518 /* LOAD */
519 SLJIT_S390X_RRE(lgr,   0xb9040000)
520 SLJIT_S390X_RRE(lgfr,  0xb9140000)
521 
522 /* LOAD BYTE */
523 SLJIT_S390X_RRE(lbr,   0xb9260000)
524 SLJIT_S390X_RRE(lgbr,  0xb9060000)
525 
526 /* LOAD COMPLEMENT */
527 SLJIT_S390X_RRE(lcgr,  0xb9030000)
528 
529 /* LOAD HALFWORD */
530 SLJIT_S390X_RRE(lhr,   0xb9270000)
531 SLJIT_S390X_RRE(lghr,  0xb9070000)
532 
533 /* LOAD LOGICAL */
534 SLJIT_S390X_RRE(llgfr, 0xb9160000)
535 
536 /* LOAD LOGICAL CHARACTER */
537 SLJIT_S390X_RRE(llcr,  0xb9940000)
538 SLJIT_S390X_RRE(llgcr, 0xb9840000)
539 
540 /* LOAD LOGICAL HALFWORD */
541 SLJIT_S390X_RRE(llhr,  0xb9950000)
542 SLJIT_S390X_RRE(llghr, 0xb9850000)
543 
544 /* MULTIPLY LOGICAL */
545 SLJIT_S390X_RRE(mlgr,  0xb9860000)
546 
547 /* MULTIPLY SINGLE */
548 SLJIT_S390X_RRE(msgfr, 0xb91c0000)
549 
550 /* OR */
551 SLJIT_S390X_RRE(ogr,   0xb9810000)
552 
553 /* SUBTRACT */
554 SLJIT_S390X_RRE(sgr,   0xb9090000)
555 
556 #undef SLJIT_S390X_RRE
557 
558 /* RI-a form instructions */
559 #define SLJIT_S390X_RIA(name, pattern, imm_type) \
560 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \
561 { \
562 	return (pattern) | R20A(reg) | (imm & 0xffff); \
563 }
564 
565 /* ADD HALFWORD IMMEDIATE */
566 SLJIT_S390X_RIA(aghi,  0xa70b0000, sljit_s16)
567 
568 /* LOAD HALFWORD IMMEDIATE */
569 SLJIT_S390X_RIA(lhi,   0xa7080000, sljit_s16)
570 SLJIT_S390X_RIA(lghi,  0xa7090000, sljit_s16)
571 
572 /* LOAD LOGICAL IMMEDIATE */
573 SLJIT_S390X_RIA(llihh, 0xa50c0000, sljit_u16)
574 SLJIT_S390X_RIA(llihl, 0xa50d0000, sljit_u16)
575 SLJIT_S390X_RIA(llilh, 0xa50e0000, sljit_u16)
576 SLJIT_S390X_RIA(llill, 0xa50f0000, sljit_u16)
577 
578 /* MULTIPLY HALFWORD IMMEDIATE */
579 SLJIT_S390X_RIA(mhi,   0xa70c0000, sljit_s16)
580 SLJIT_S390X_RIA(mghi,  0xa70d0000, sljit_s16)
581 
582 /* OR IMMEDIATE */
583 SLJIT_S390X_RIA(oilh,  0xa50a0000, sljit_u16)
584 
585 #undef SLJIT_S390X_RIA
586 
587 /* RIL-a form instructions (requires extended immediate facility) */
588 #define SLJIT_S390X_RILA(name, pattern, imm_type) \
589 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \
590 { \
591 	SLJIT_ASSERT(have_eimm()); \
592 	return (pattern) | R36A(reg) | ((sljit_ins)imm & 0xffffffffu); \
593 }
594 
595 /* ADD IMMEDIATE */
596 SLJIT_S390X_RILA(agfi,  0xc20800000000, sljit_s32)
597 
598 /* ADD IMMEDIATE HIGH */
599 SLJIT_S390X_RILA(aih,   0xcc0800000000, sljit_s32) /* TODO(mundaym): high-word facility? */
600 
601 /* AND IMMEDIATE */
602 SLJIT_S390X_RILA(nihf,  0xc00a00000000, sljit_u32)
603 
604 /* EXCLUSIVE OR IMMEDIATE */
605 SLJIT_S390X_RILA(xilf,  0xc00700000000, sljit_u32)
606 
607 /* INSERT IMMEDIATE */
608 SLJIT_S390X_RILA(iihf,  0xc00800000000, sljit_u32)
609 SLJIT_S390X_RILA(iilf,  0xc00900000000, sljit_u32)
610 
611 /* LOAD IMMEDIATE */
612 SLJIT_S390X_RILA(lgfi,  0xc00100000000, sljit_s32)
613 
614 /* LOAD LOGICAL IMMEDIATE */
615 SLJIT_S390X_RILA(llihf, 0xc00e00000000, sljit_u32)
616 SLJIT_S390X_RILA(llilf, 0xc00f00000000, sljit_u32)
617 
618 /* SUBTRACT LOGICAL IMMEDIATE */
619 SLJIT_S390X_RILA(slfi,  0xc20500000000, sljit_u32)
620 
621 #undef SLJIT_S390X_RILA
622 
623 /* RX-a form instructions */
624 #define SLJIT_S390X_RXA(name, pattern) \
625 SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \
626 { \
627 	SLJIT_ASSERT((d & 0xfff) == d); \
628 \
629 	return (pattern) | R20A(r) | R16A(x) | R12A(b) | (sljit_ins)(d & 0xfff); \
630 }
631 
632 /* LOAD */
633 SLJIT_S390X_RXA(l,   0x58000000)
634 
635 /* LOAD ADDRESS */
636 SLJIT_S390X_RXA(la,  0x41000000)
637 
638 /* LOAD HALFWORD */
639 SLJIT_S390X_RXA(lh,  0x48000000)
640 
641 /* MULTIPLY SINGLE */
642 SLJIT_S390X_RXA(ms,  0x71000000)
643 
644 /* STORE */
645 SLJIT_S390X_RXA(st,  0x50000000)
646 
647 /* STORE CHARACTER */
648 SLJIT_S390X_RXA(stc, 0x42000000)
649 
650 /* STORE HALFWORD */
651 SLJIT_S390X_RXA(sth, 0x40000000)
652 
653 #undef SLJIT_S390X_RXA
654 
655 /* RXY-a instructions */
656 #define SLJIT_S390X_RXYA(name, pattern, cond) \
657 SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \
658 { \
659 	SLJIT_ASSERT(cond); \
660 \
661 	return (pattern) | R36A(r) | R32A(x) | R28A(b) | disp_s20(d); \
662 }
663 
664 /* LOAD */
665 SLJIT_S390X_RXYA(ly,    0xe30000000058, have_ldisp())
666 SLJIT_S390X_RXYA(lg,    0xe30000000004, 1)
667 SLJIT_S390X_RXYA(lgf,   0xe30000000014, 1)
668 
669 /* LOAD BYTE */
670 SLJIT_S390X_RXYA(lb,    0xe30000000076, have_ldisp())
671 SLJIT_S390X_RXYA(lgb,   0xe30000000077, have_ldisp())
672 
673 /* LOAD HALFWORD */
674 SLJIT_S390X_RXYA(lhy,   0xe30000000078, have_ldisp())
675 SLJIT_S390X_RXYA(lgh,   0xe30000000015, 1)
676 
677 /* LOAD LOGICAL */
678 SLJIT_S390X_RXYA(llgf,  0xe30000000016, 1)
679 
680 /* LOAD LOGICAL CHARACTER */
681 SLJIT_S390X_RXYA(llc,   0xe30000000094, have_eimm())
682 SLJIT_S390X_RXYA(llgc,  0xe30000000090, 1)
683 
684 /* LOAD LOGICAL HALFWORD */
685 SLJIT_S390X_RXYA(llh,   0xe30000000095, have_eimm())
686 SLJIT_S390X_RXYA(llgh,  0xe30000000091, 1)
687 
688 /* MULTIPLY SINGLE */
689 SLJIT_S390X_RXYA(msy,   0xe30000000051, have_ldisp())
690 SLJIT_S390X_RXYA(msg,   0xe3000000000c, 1)
691 
692 /* STORE */
693 SLJIT_S390X_RXYA(sty,   0xe30000000050, have_ldisp())
694 SLJIT_S390X_RXYA(stg,   0xe30000000024, 1)
695 
696 /* STORE CHARACTER */
697 SLJIT_S390X_RXYA(stcy,  0xe30000000072, have_ldisp())
698 
699 /* STORE HALFWORD */
700 SLJIT_S390X_RXYA(sthy,  0xe30000000070, have_ldisp())
701 
702 #undef SLJIT_S390X_RXYA
703 
704 /* RSY-a instructions */
705 #define SLJIT_S390X_RSYA(name, pattern, cond) \
706 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_s32 d, sljit_gpr b) \
707 { \
708 	SLJIT_ASSERT(cond); \
709 \
710 	return (pattern) | R36A(dst) | R32A(src) | R28A(b) | disp_s20(d); \
711 }
712 
713 /* LOAD MULTIPLE */
714 SLJIT_S390X_RSYA(lmg,   0xeb0000000004, 1)
715 
716 /* SHIFT LEFT LOGICAL */
717 SLJIT_S390X_RSYA(sllg,  0xeb000000000d, 1)
718 
719 /* SHIFT RIGHT SINGLE */
720 SLJIT_S390X_RSYA(srag,  0xeb000000000a, 1)
721 
722 /* STORE MULTIPLE */
723 SLJIT_S390X_RSYA(stmg,  0xeb0000000024, 1)
724 
725 #undef SLJIT_S390X_RSYA
726 
727 /* RIE-f instructions (require general-instructions-extension facility) */
728 #define SLJIT_S390X_RIEF(name, pattern) \
729 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot) \
730 { \
731 	sljit_ins i3, i4, i5; \
732 \
733 	SLJIT_ASSERT(have_genext()); \
734 	i3 = (sljit_ins)start << 24; \
735 	i4 = (sljit_ins)end << 16; \
736 	i5 = (sljit_ins)rot << 8; \
737 \
738 	return (pattern) | R36A(dst & 0xf) | R32A(src & 0xf) | i3 | i4 | i5; \
739 }
740 
741 /* ROTATE THEN AND SELECTED BITS */
742 /* SLJIT_S390X_RIEF(rnsbg,  0xec0000000054) */
743 
744 /* ROTATE THEN EXCLUSIVE OR SELECTED BITS */
745 /* SLJIT_S390X_RIEF(rxsbg,  0xec0000000057) */
746 
747 /* ROTATE THEN OR SELECTED BITS */
748 SLJIT_S390X_RIEF(rosbg,  0xec0000000056)
749 
750 /* ROTATE THEN INSERT SELECTED BITS */
751 /* SLJIT_S390X_RIEF(risbg,  0xec0000000055) */
752 /* SLJIT_S390X_RIEF(risbgn, 0xec0000000059) */
753 
754 /* ROTATE THEN INSERT SELECTED BITS HIGH */
755 SLJIT_S390X_RIEF(risbhg, 0xec000000005d)
756 
757 /* ROTATE THEN INSERT SELECTED BITS LOW */
758 /* SLJIT_S390X_RIEF(risblg, 0xec0000000051) */
759 
760 #undef SLJIT_S390X_RIEF
761 
762 /* RRF-c instructions (require load/store-on-condition 1 facility) */
763 #define SLJIT_S390X_RRFC(name, pattern) \
764 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_uw mask) \
765 { \
766 	sljit_ins m3; \
767 \
768 	SLJIT_ASSERT(have_lscond1()); \
769 	m3 = (sljit_ins)(mask & 0xf) << 12; \
770 \
771 	return (pattern) | m3 | R4A(dst) | R0A(src); \
772 }
773 
774 /* LOAD HALFWORD IMMEDIATE ON CONDITION */
775 SLJIT_S390X_RRFC(locr,  0xb9f20000)
776 SLJIT_S390X_RRFC(locgr, 0xb9e20000)
777 
778 #undef SLJIT_S390X_RRFC
779 
780 /* RIE-g instructions (require load/store-on-condition 2 facility) */
781 #define SLJIT_S390X_RIEG(name, pattern) \
782 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw imm, sljit_uw mask) \
783 { \
784 	sljit_ins m3, i2; \
785 \
786 	SLJIT_ASSERT(have_lscond2()); \
787 	m3 = (sljit_ins)(mask & 0xf) << 32; \
788 	i2 = (sljit_ins)(imm & 0xffffL) << 16; \
789 \
790 	return (pattern) | R36A(reg) | m3 | i2; \
791 }
792 
793 /* LOAD HALFWORD IMMEDIATE ON CONDITION */
794 SLJIT_S390X_RIEG(lochi,  0xec0000000042)
795 SLJIT_S390X_RIEG(locghi, 0xec0000000046)
796 
797 #undef SLJIT_S390X_RIEG
798 
799 #define SLJIT_S390X_RILB(name, pattern, cond) \
800 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw ri) \
801 { \
802 	SLJIT_ASSERT(cond); \
803 \
804 	return (pattern) | R36A(reg) | (sljit_ins)(ri & 0xffffffff); \
805 }
806 
807 /* BRANCH RELATIVE AND SAVE LONG */
808 SLJIT_S390X_RILB(brasl, 0xc00500000000, 1)
809 
810 /* LOAD ADDRESS RELATIVE LONG */
811 SLJIT_S390X_RILB(larl,  0xc00000000000, 1)
812 
813 /* LOAD RELATIVE LONG */
814 SLJIT_S390X_RILB(lgrl,  0xc40800000000, have_genext())
815 
816 #undef SLJIT_S390X_RILB
817 
SLJIT_S390X_INSTRUCTION(br,sljit_gpr target)818 SLJIT_S390X_INSTRUCTION(br, sljit_gpr target)
819 {
820 	return 0x07f0 | target;
821 }
822 
SLJIT_S390X_INSTRUCTION(brc,sljit_uw mask,sljit_sw target)823 SLJIT_S390X_INSTRUCTION(brc, sljit_uw mask, sljit_sw target)
824 {
825 	sljit_ins m1 = (sljit_ins)(mask & 0xf) << 20;
826 	sljit_ins ri2 = (sljit_ins)target & 0xffff;
827 	return 0xa7040000L | m1 | ri2;
828 }
829 
SLJIT_S390X_INSTRUCTION(brcl,sljit_uw mask,sljit_sw target)830 SLJIT_S390X_INSTRUCTION(brcl, sljit_uw mask, sljit_sw target)
831 {
832 	sljit_ins m1 = (sljit_ins)(mask & 0xf) << 36;
833 	sljit_ins ri2 = (sljit_ins)target & 0xffffffff;
834 	return 0xc00400000000L | m1 | ri2;
835 }
836 
SLJIT_S390X_INSTRUCTION(flogr,sljit_gpr dst,sljit_gpr src)837 SLJIT_S390X_INSTRUCTION(flogr, sljit_gpr dst, sljit_gpr src)
838 {
839 	SLJIT_ASSERT(have_eimm());
840 	return 0xb9830000 | R8A(dst) | R0A(src);
841 }
842 
843 /* INSERT PROGRAM MASK */
SLJIT_S390X_INSTRUCTION(ipm,sljit_gpr dst)844 SLJIT_S390X_INSTRUCTION(ipm, sljit_gpr dst)
845 {
846 	return 0xb2220000 | R4A(dst);
847 }
848 
849 /* SET PROGRAM MASK */
SLJIT_S390X_INSTRUCTION(spm,sljit_gpr dst)850 SLJIT_S390X_INSTRUCTION(spm, sljit_gpr dst)
851 {
852 	return 0x0400 | R4A(dst);
853 }
854 
855 /* ROTATE THEN INSERT SELECTED BITS HIGH (ZERO) */
SLJIT_S390X_INSTRUCTION(risbhgz,sljit_gpr dst,sljit_gpr src,sljit_u8 start,sljit_u8 end,sljit_u8 rot)856 SLJIT_S390X_INSTRUCTION(risbhgz, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot)
857 {
858 	return risbhg(dst, src, start, 0x8 | end, rot);
859 }
860 
861 #undef SLJIT_S390X_INSTRUCTION
862 
update_zero_overflow(struct sljit_compiler * compiler,sljit_s32 op,sljit_gpr dst_r)863 static sljit_s32 update_zero_overflow(struct sljit_compiler *compiler, sljit_s32 op, sljit_gpr dst_r)
864 {
865 	/* Condition codes: bits 18 and 19.
866 	   Transformation:
867 	     0 (zero and no overflow) : unchanged
868 	     1 (non-zero and no overflow) : unchanged
869 	     2 (zero and overflow) : decreased by 1
870 	     3 (non-zero and overflow) : decreased by 1 if non-zero */
871 	FAIL_IF(push_inst(compiler, brc(0xc, 2 + 2 + ((op & SLJIT_32) ? 1 : 2) + 2 + 3 + 1)));
872 	FAIL_IF(push_inst(compiler, ipm(tmp1)));
873 	FAIL_IF(push_inst(compiler, (op & SLJIT_32) ? or(dst_r, dst_r) : ogr(dst_r, dst_r)));
874 	FAIL_IF(push_inst(compiler, brc(0x8, 2 + 3)));
875 	FAIL_IF(push_inst(compiler, slfi(tmp1, 0x10000000)));
876 	FAIL_IF(push_inst(compiler, spm(tmp1)));
877 	return SLJIT_SUCCESS;
878 }
879 
880 /* load 64-bit immediate into register without clobbering flags */
push_load_imm_inst(struct sljit_compiler * compiler,sljit_gpr target,sljit_sw v)881 static sljit_s32 push_load_imm_inst(struct sljit_compiler *compiler, sljit_gpr target, sljit_sw v)
882 {
883 	/* 4 byte instructions */
884 	if (is_s16(v))
885 		return push_inst(compiler, lghi(target, (sljit_s16)v));
886 
887 	if (((sljit_uw)v & ~(sljit_uw)0x000000000000ffff) == 0)
888 		return push_inst(compiler, llill(target, (sljit_u16)v));
889 
890 	if (((sljit_uw)v & ~(sljit_uw)0x00000000ffff0000) == 0)
891 		return push_inst(compiler, llilh(target, (sljit_u16)(v >> 16)));
892 
893 	if (((sljit_uw)v & ~(sljit_uw)0x0000ffff00000000) == 0)
894 		return push_inst(compiler, llihl(target, (sljit_u16)(v >> 32)));
895 
896 	if (((sljit_uw)v & ~(sljit_uw)0xffff000000000000) == 0)
897 		return push_inst(compiler, llihh(target, (sljit_u16)(v >> 48)));
898 
899 	if (is_s32(v))
900 		return push_inst(compiler, lgfi(target, (sljit_s32)v));
901 
902 	if (((sljit_uw)v >> 32) == 0)
903 		return push_inst(compiler, llilf(target, (sljit_u32)v));
904 
905 	if (((sljit_uw)v << 32) == 0)
906 		return push_inst(compiler, llihf(target, (sljit_u32)((sljit_uw)v >> 32)));
907 
908 	FAIL_IF(push_inst(compiler, llilf(target, (sljit_u32)v)));
909 	return push_inst(compiler, iihf(target, (sljit_u32)(v >> 32)));
910 }
911 
912 struct addr {
913 	sljit_gpr base;
914 	sljit_gpr index;
915 	sljit_s32 offset;
916 };
917 
918 /* transform memory operand into D(X,B) form with a signed 20-bit offset */
make_addr_bxy(struct sljit_compiler * compiler,struct addr * addr,sljit_s32 mem,sljit_sw off,sljit_gpr tmp)919 static sljit_s32 make_addr_bxy(struct sljit_compiler *compiler,
920 	struct addr *addr, sljit_s32 mem, sljit_sw off,
921 	sljit_gpr tmp /* clobbered, must not be r0 */)
922 {
923 	sljit_gpr base = r0;
924 	sljit_gpr index = r0;
925 
926 	SLJIT_ASSERT(tmp != r0);
927 	if (mem & REG_MASK)
928 		base = gpr(mem & REG_MASK);
929 
930 	if (mem & OFFS_REG_MASK) {
931 		index = gpr(OFFS_REG(mem));
932 		if (off != 0) {
933 			/* shift and put the result into tmp */
934 			SLJIT_ASSERT(0 <= off && off < 64);
935 			FAIL_IF(push_inst(compiler, sllg(tmp, index, (sljit_s32)off, 0)));
936 			index = tmp;
937 			off = 0; /* clear offset */
938 		}
939 	}
940 	else if (!is_s20(off)) {
941 		FAIL_IF(push_load_imm_inst(compiler, tmp, off));
942 		index = tmp;
943 		off = 0; /* clear offset */
944 	}
945 	addr->base = base;
946 	addr->index = index;
947 	addr->offset = (sljit_s32)off;
948 	return SLJIT_SUCCESS;
949 }
950 
951 /* transform memory operand into D(X,B) form with an unsigned 12-bit offset */
make_addr_bx(struct sljit_compiler * compiler,struct addr * addr,sljit_s32 mem,sljit_sw off,sljit_gpr tmp)952 static sljit_s32 make_addr_bx(struct sljit_compiler *compiler,
953 	struct addr *addr, sljit_s32 mem, sljit_sw off,
954 	sljit_gpr tmp /* clobbered, must not be r0 */)
955 {
956 	sljit_gpr base = r0;
957 	sljit_gpr index = r0;
958 
959 	SLJIT_ASSERT(tmp != r0);
960 	if (mem & REG_MASK)
961 		base = gpr(mem & REG_MASK);
962 
963 	if (mem & OFFS_REG_MASK) {
964 		index = gpr(OFFS_REG(mem));
965 		if (off != 0) {
966 			/* shift and put the result into tmp */
967 			SLJIT_ASSERT(0 <= off && off < 64);
968 			FAIL_IF(push_inst(compiler, sllg(tmp, index, (sljit_s32)off, 0)));
969 			index = tmp;
970 			off = 0; /* clear offset */
971 		}
972 	}
973 	else if (!is_u12(off)) {
974 		FAIL_IF(push_load_imm_inst(compiler, tmp, off));
975 		index = tmp;
976 		off = 0; /* clear offset */
977 	}
978 	addr->base = base;
979 	addr->index = index;
980 	addr->offset = (sljit_s32)off;
981 	return SLJIT_SUCCESS;
982 }
983 
984 #define EVAL(op, r, addr) op(r, addr.offset, addr.index, addr.base)
985 #define WHEN(cond, r, i1, i2, addr) \
986 	(cond) ? EVAL(i1, r, addr) : EVAL(i2, r, addr)
987 
988 /* May clobber tmp1. */
load_store_op(struct sljit_compiler * compiler,sljit_gpr reg,sljit_s32 mem,sljit_sw memw,sljit_s32 is_32bit,const sljit_ins * forms)989 static sljit_s32 load_store_op(struct sljit_compiler *compiler, sljit_gpr reg,
990 		sljit_s32 mem, sljit_sw memw,
991 		sljit_s32 is_32bit, const sljit_ins* forms)
992 {
993 	struct addr addr;
994 
995 	SLJIT_ASSERT(mem & SLJIT_MEM);
996 
997 	if (is_32bit && ((mem & OFFS_REG_MASK) || is_u12(memw) || !is_s20(memw))) {
998 		FAIL_IF(make_addr_bx(compiler, &addr, mem, memw, tmp1));
999 		return push_inst(compiler, forms[0] | R20A(reg) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset);
1000 	}
1001 
1002 	FAIL_IF(make_addr_bxy(compiler, &addr, mem, memw, tmp1));
1003 	return push_inst(compiler, (is_32bit ? forms[1] : forms[2]) | R36A(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
1004 }
1005 
1006 static const sljit_ins load_forms[3] = {
1007 	0x58000000 /* l */,
1008 	0xe30000000058 /* ly */,
1009 	0xe30000000004 /* lg */
1010 };
1011 
1012 static const sljit_ins store_forms[3] = {
1013 	0x50000000 /* st */,
1014 	0xe30000000050 /* sty */,
1015 	0xe30000000024 /* stg */
1016 };
1017 
1018 static const sljit_ins load_halfword_forms[3] = {
1019 	0x48000000 /* lh */,
1020 	0xe30000000078 /* lhy */,
1021 	0xe30000000015 /* lgh */
1022 };
1023 
1024 /* May clobber tmp1. */
load_word(struct sljit_compiler * compiler,sljit_gpr dst_r,sljit_s32 src,sljit_sw srcw,sljit_s32 is_32bit)1025 static SLJIT_INLINE sljit_s32 load_word(struct sljit_compiler *compiler, sljit_gpr dst_r,
1026 		sljit_s32 src, sljit_sw srcw,
1027 		sljit_s32 is_32bit)
1028 {
1029 	return load_store_op(compiler, dst_r, src, srcw, is_32bit, load_forms);
1030 }
1031 
1032 /* May clobber tmp1. */
load_unsigned_word(struct sljit_compiler * compiler,sljit_gpr dst_r,sljit_s32 src,sljit_sw srcw,sljit_s32 is_32bit)1033 static sljit_s32 load_unsigned_word(struct sljit_compiler *compiler, sljit_gpr dst_r,
1034 		sljit_s32 src, sljit_sw srcw,
1035 		sljit_s32 is_32bit)
1036 {
1037 	struct addr addr;
1038 	sljit_ins ins;
1039 
1040 	SLJIT_ASSERT(src & SLJIT_MEM);
1041 
1042 	FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
1043 
1044 	ins = is_32bit ? 0xe30000000016 /* llgf */ : 0xe30000000004 /* lg */;
1045 	return push_inst(compiler, ins | R36A(dst_r) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
1046 }
1047 
1048 /* May clobber tmp1. */
store_word(struct sljit_compiler * compiler,sljit_gpr src_r,sljit_s32 dst,sljit_sw dstw,sljit_s32 is_32bit)1049 static SLJIT_INLINE sljit_s32 store_word(struct sljit_compiler *compiler, sljit_gpr src_r,
1050 		sljit_s32 dst, sljit_sw dstw,
1051 		sljit_s32 is_32bit)
1052 {
1053 	return load_store_op(compiler, src_r, dst, dstw, is_32bit, store_forms);
1054 }
1055 
1056 #undef WHEN
1057 
emit_move(struct sljit_compiler * compiler,sljit_gpr dst_r,sljit_s32 src,sljit_sw srcw)1058 static sljit_s32 emit_move(struct sljit_compiler *compiler,
1059 	sljit_gpr dst_r,
1060 	sljit_s32 src, sljit_sw srcw)
1061 {
1062 	sljit_gpr src_r;
1063 
1064 	SLJIT_ASSERT(!IS_GPR_REG(src) || dst_r != gpr(src & REG_MASK));
1065 
1066 	if (src == SLJIT_IMM)
1067 		return push_load_imm_inst(compiler, dst_r, srcw);
1068 
1069 	if (src & SLJIT_MEM)
1070 		return load_word(compiler, dst_r, src, srcw, (compiler->mode & SLJIT_32) != 0);
1071 
1072 	src_r = gpr(src & REG_MASK);
1073 	return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, src_r) : lgr(dst_r, src_r));
1074 }
1075 
emit_rr(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1076 static sljit_s32 emit_rr(struct sljit_compiler *compiler, sljit_ins ins,
1077 	sljit_s32 dst,
1078 	sljit_s32 src1, sljit_sw src1w,
1079 	sljit_s32 src2, sljit_sw src2w)
1080 {
1081 	sljit_gpr dst_r = tmp0;
1082 	sljit_gpr src_r = tmp1;
1083 	sljit_s32 needs_move = 1;
1084 
1085 	if (FAST_IS_REG(dst)) {
1086 		dst_r = gpr(dst);
1087 
1088 		if (dst == src1)
1089 			needs_move = 0;
1090 		else if (dst == src2) {
1091 			dst_r = tmp0;
1092 			needs_move = 2;
1093 		}
1094 	}
1095 
1096 	if (needs_move)
1097 		FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
1098 
1099 	if (FAST_IS_REG(src2))
1100 		src_r = gpr(src2);
1101 	else
1102 		FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
1103 
1104 	FAIL_IF(push_inst(compiler, ins | R4A(dst_r) | R0A(src_r)));
1105 
1106 	if (needs_move != 2)
1107 		return SLJIT_SUCCESS;
1108 
1109 	dst_r = gpr(dst & REG_MASK);
1110 	return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0));
1111 }
1112 
emit_rr1(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w)1113 static sljit_s32 emit_rr1(struct sljit_compiler *compiler, sljit_ins ins,
1114 	sljit_s32 dst,
1115 	sljit_s32 src1, sljit_sw src1w)
1116 {
1117 	sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
1118 	sljit_gpr src_r = tmp1;
1119 
1120 	if (FAST_IS_REG(src1))
1121 		src_r = gpr(src1);
1122 	else
1123 		FAIL_IF(emit_move(compiler, tmp1, src1, src1w));
1124 
1125 	return push_inst(compiler, ins | R4A(dst_r) | R0A(src_r));
1126 }
1127 
emit_rrf(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1128 static sljit_s32 emit_rrf(struct sljit_compiler *compiler, sljit_ins ins,
1129 	sljit_s32 dst,
1130 	sljit_s32 src1, sljit_sw src1w,
1131 	sljit_s32 src2, sljit_sw src2w)
1132 {
1133 	sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
1134 	sljit_gpr src1_r = tmp0;
1135 	sljit_gpr src2_r = tmp1;
1136 
1137 	if (FAST_IS_REG(src1))
1138 		src1_r = gpr(src1);
1139 	else
1140 		FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
1141 
1142 	if (FAST_IS_REG(src2))
1143 		src2_r = gpr(src2);
1144 	else
1145 		FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
1146 
1147 	return push_inst(compiler, ins | R4A(dst_r) | R0A(src1_r) | R12A(src2_r));
1148 }
1149 
1150 typedef enum {
1151 	RI_A,
1152 	RIL_A,
1153 } emit_ril_type;
1154 
emit_ri(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_sw src2w,emit_ril_type type)1155 static sljit_s32 emit_ri(struct sljit_compiler *compiler, sljit_ins ins,
1156 	sljit_s32 dst,
1157 	sljit_s32 src1, sljit_sw src1w,
1158 	sljit_sw src2w,
1159 	emit_ril_type type)
1160 {
1161 	sljit_gpr dst_r = tmp0;
1162 	sljit_s32 needs_move = 1;
1163 
1164 	if (FAST_IS_REG(dst)) {
1165 		dst_r = gpr(dst);
1166 
1167 		if (dst == src1)
1168 			needs_move = 0;
1169 	}
1170 
1171 	if (needs_move)
1172 		FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
1173 
1174 	if (type == RIL_A)
1175 		return push_inst(compiler, ins | R36A(dst_r) | (src2w & 0xffffffff));
1176 	return push_inst(compiler, ins | R20A(dst_r) | (src2w & 0xffff));
1177 }
1178 
emit_rie_d(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_sw src2w)1179 static sljit_s32 emit_rie_d(struct sljit_compiler *compiler, sljit_ins ins,
1180 	sljit_s32 dst,
1181 	sljit_s32 src1, sljit_sw src1w,
1182 	sljit_sw src2w)
1183 {
1184 	sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
1185 	sljit_gpr src_r = tmp0;
1186 
1187 	if (!FAST_IS_REG(src1))
1188 		FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
1189 	else
1190 		src_r = gpr(src1 & REG_MASK);
1191 
1192 	return push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | (sljit_ins)(src2w & 0xffff) << 16);
1193 }
1194 
1195 typedef enum {
1196 	RX_A,
1197 	RXY_A,
1198 } emit_rx_type;
1199 
emit_rx(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w,emit_rx_type type)1200 static sljit_s32 emit_rx(struct sljit_compiler *compiler, sljit_ins ins,
1201 	sljit_s32 dst,
1202 	sljit_s32 src1, sljit_sw src1w,
1203 	sljit_s32 src2, sljit_sw src2w,
1204 	emit_rx_type type)
1205 {
1206 	sljit_gpr dst_r = tmp0;
1207 	sljit_s32 needs_move = 1;
1208 	sljit_gpr base, index;
1209 
1210 	SLJIT_ASSERT(src2 & SLJIT_MEM);
1211 
1212 	if (FAST_IS_REG(dst)) {
1213 		dst_r = gpr(dst);
1214 
1215 		if (dst == src1)
1216 			needs_move = 0;
1217 		else if (dst == (src2 & REG_MASK) || (dst == OFFS_REG(src2))) {
1218 			dst_r = tmp0;
1219 			needs_move = 2;
1220 		}
1221 	}
1222 
1223 	if (needs_move)
1224 		FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
1225 
1226 	base = gpr(src2 & REG_MASK);
1227 	index = tmp0;
1228 
1229 	if (src2 & OFFS_REG_MASK) {
1230 		index = gpr(OFFS_REG(src2));
1231 
1232 		if (src2w != 0) {
1233 			FAIL_IF(push_inst(compiler, sllg(tmp1, index, src2w & 0x3, 0)));
1234 			src2w = 0;
1235 			index = tmp1;
1236 		}
1237 	} else if ((type == RX_A && !is_u12(src2w)) || (type == RXY_A && !is_s20(src2w))) {
1238 		FAIL_IF(push_load_imm_inst(compiler, tmp1, src2w));
1239 
1240 		if (src2 & REG_MASK)
1241 			index = tmp1;
1242 		else
1243 			base = tmp1;
1244 		src2w = 0;
1245 	}
1246 
1247 	if (type == RX_A)
1248 		ins |= R20A(dst_r) | R16A(index) | R12A(base) | (sljit_ins)src2w;
1249 	else
1250 		ins |= R36A(dst_r) | R32A(index) | R28A(base) | disp_s20((sljit_s32)src2w);
1251 
1252 	FAIL_IF(push_inst(compiler, ins));
1253 
1254 	if (needs_move != 2)
1255 		return SLJIT_SUCCESS;
1256 
1257 	dst_r = gpr(dst);
1258 	return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0));
1259 }
1260 
emit_siy(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_sw dstw,sljit_sw srcw)1261 static sljit_s32 emit_siy(struct sljit_compiler *compiler, sljit_ins ins,
1262 	sljit_s32 dst, sljit_sw dstw,
1263 	sljit_sw srcw)
1264 {
1265 	sljit_gpr dst_r = tmp1;
1266 
1267 	SLJIT_ASSERT(dst & SLJIT_MEM);
1268 
1269 	if (dst & OFFS_REG_MASK) {
1270 		sljit_gpr index = tmp1;
1271 
1272 		if ((dstw & 0x3) == 0)
1273 			index = gpr(OFFS_REG(dst));
1274 		else
1275 			FAIL_IF(push_inst(compiler, sllg(tmp1, index, dstw & 0x3, 0)));
1276 
1277 		FAIL_IF(push_inst(compiler, la(tmp1, 0, dst_r, index)));
1278 		dstw = 0;
1279 	}
1280 	else if (!is_s20(dstw)) {
1281 		FAIL_IF(push_load_imm_inst(compiler, tmp1, dstw));
1282 
1283 		if (dst & REG_MASK)
1284 			FAIL_IF(push_inst(compiler, la(tmp1, 0, dst_r, tmp1)));
1285 
1286 		dstw = 0;
1287 	}
1288 	else
1289 		dst_r = gpr(dst & REG_MASK);
1290 
1291 	return push_inst(compiler, ins | ((sljit_ins)(srcw & 0xff) << 32) | R28A(dst_r) | disp_s20((sljit_s32)dstw));
1292 }
1293 
1294 struct ins_forms {
1295 	sljit_ins op_r;
1296 	sljit_ins op_gr;
1297 	sljit_ins op_rk;
1298 	sljit_ins op_grk;
1299 	sljit_ins op;
1300 	sljit_ins op_y;
1301 	sljit_ins op_g;
1302 };
1303 
emit_commutative(struct sljit_compiler * compiler,const struct ins_forms * forms,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1304 static sljit_s32 emit_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms,
1305 	sljit_s32 dst,
1306 	sljit_s32 src1, sljit_sw src1w,
1307 	sljit_s32 src2, sljit_sw src2w)
1308 {
1309 	sljit_s32 mode = compiler->mode;
1310 	sljit_ins ins, ins_k;
1311 
1312 	if ((src1 | src2) & SLJIT_MEM) {
1313 		sljit_ins ins12, ins20;
1314 
1315 		if (mode & SLJIT_32) {
1316 			ins12 = forms->op;
1317 			ins20 = forms->op_y;
1318 		}
1319 		else {
1320 			ins12 = 0;
1321 			ins20 = forms->op_g;
1322 		}
1323 
1324 		if (ins12 && ins20) {
1325 			/* Extra instructions needed for address computation can be executed independently. */
1326 			if ((src2 & SLJIT_MEM) && (!(src1 & SLJIT_MEM)
1327 					|| ((src1 & OFFS_REG_MASK) ? (src1w & 0x3) == 0 : is_s20(src1w)))) {
1328 				if ((src2 & OFFS_REG_MASK) || is_u12(src2w) || !is_s20(src2w))
1329 					return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);
1330 
1331 				return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);
1332 			}
1333 
1334 			if (src1 & SLJIT_MEM) {
1335 				if ((src1 & OFFS_REG_MASK) || is_u12(src1w) || !is_s20(src1w))
1336 					return emit_rx(compiler, ins12, dst, src2, src2w, src1, src1w, RX_A);
1337 
1338 				return emit_rx(compiler, ins20, dst, src2, src2w, src1, src1w, RXY_A);
1339 			}
1340 		}
1341 		else if (ins12 || ins20) {
1342 			emit_rx_type rx_type;
1343 
1344 			if (ins12) {
1345 				rx_type = RX_A;
1346 				ins = ins12;
1347 			}
1348 			else {
1349 				rx_type = RXY_A;
1350 				ins = ins20;
1351 			}
1352 
1353 			if ((src2 & SLJIT_MEM) && (!(src1 & SLJIT_MEM)
1354 					|| ((src1 & OFFS_REG_MASK) ? (src1w & 0x3) == 0 : (rx_type == RX_A ? is_u12(src1w) : is_s20(src1w)))))
1355 				return emit_rx(compiler, ins, dst, src1, src1w, src2, src2w, rx_type);
1356 
1357 			if (src1 & SLJIT_MEM)
1358 				return emit_rx(compiler, ins, dst, src2, src2w, src1, src1w, rx_type);
1359 		}
1360 	}
1361 
1362 	if (mode & SLJIT_32) {
1363 		ins = forms->op_r;
1364 		ins_k = forms->op_rk;
1365 	}
1366 	else {
1367 		ins = forms->op_gr;
1368 		ins_k = forms->op_grk;
1369 	}
1370 
1371 	SLJIT_ASSERT(ins != 0 || ins_k != 0);
1372 
1373 	if (ins && FAST_IS_REG(dst)) {
1374 		if (dst == src1)
1375 			return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w);
1376 
1377 		if (dst == src2)
1378 			return emit_rr(compiler, ins, dst, src2, src2w, src1, src1w);
1379 	}
1380 
1381 	if (ins_k == 0)
1382 		return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w);
1383 
1384 	return emit_rrf(compiler, ins_k, dst, src1, src1w, src2, src2w);
1385 }
1386 
emit_non_commutative(struct sljit_compiler * compiler,const struct ins_forms * forms,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1387 static sljit_s32 emit_non_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms,
1388 	sljit_s32 dst,
1389 	sljit_s32 src1, sljit_sw src1w,
1390 	sljit_s32 src2, sljit_sw src2w)
1391 {
1392 	sljit_s32 mode = compiler->mode;
1393 	sljit_ins ins;
1394 
1395 	if (src2 & SLJIT_MEM) {
1396 		sljit_ins ins12, ins20;
1397 
1398 		if (mode & SLJIT_32) {
1399 			ins12 = forms->op;
1400 			ins20 = forms->op_y;
1401 		}
1402 		else {
1403 			ins12 = 0;
1404 			ins20 = forms->op_g;
1405 		}
1406 
1407 		if (ins12 && ins20) {
1408 			if ((src2 & OFFS_REG_MASK) || is_u12(src2w) || !is_s20(src2w))
1409 				return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);
1410 
1411 			return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);
1412 		}
1413 		else if (ins12)
1414 			return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);
1415 		else if (ins20)
1416 			return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);
1417 	}
1418 
1419 	ins = (mode & SLJIT_32) ? forms->op_rk : forms->op_grk;
1420 
1421 	if (ins == 0 || (FAST_IS_REG(dst) && dst == src1))
1422 		return emit_rr(compiler, (mode & SLJIT_32) ? forms->op_r : forms->op_gr, dst, src1, src1w, src2, src2w);
1423 
1424 	return emit_rrf(compiler, ins, dst, src1, src1w, src2, src2w);
1425 }
1426 
sljit_generate_code(struct sljit_compiler * compiler)1427 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
1428 {
1429 	struct sljit_label *label;
1430 	struct sljit_jump *jump;
1431 	struct sljit_s390x_const *const_;
1432 	struct sljit_put_label *put_label;
1433 	sljit_sw executable_offset;
1434 	sljit_uw ins_size = 0; /* instructions */
1435 	sljit_uw pool_size = 0; /* literal pool */
1436 	sljit_uw pad_size;
1437 	sljit_uw i, j = 0;
1438 	struct sljit_memory_fragment *buf;
1439 	void *code, *code_ptr;
1440 	sljit_uw *pool, *pool_ptr;
1441 	sljit_sw source, offset; /* TODO(carenas): only need 32 bit */
1442 
1443 	CHECK_ERROR_PTR();
1444 	CHECK_PTR(check_sljit_generate_code(compiler));
1445 	reverse_buf(compiler);
1446 
1447 	/* branch handling */
1448 	label = compiler->labels;
1449 	jump = compiler->jumps;
1450 	put_label = compiler->put_labels;
1451 
1452 	/* TODO(carenas): compiler->executable_size could be calculated
1453          *                before to avoid the following loop (except for
1454          *                pool_size)
1455          */
1456 	/* calculate the size of the code */
1457 	for (buf = compiler->buf; buf != NULL; buf = buf->next) {
1458 		sljit_uw len = buf->used_size / sizeof(sljit_ins);
1459 		sljit_ins *ibuf = (sljit_ins *)buf->memory;
1460 		for (i = 0; i < len; ++i, ++j) {
1461 			sljit_ins ins = ibuf[i];
1462 
1463 			/* TODO(carenas): instruction tag vs size/addr == j
1464 			 * using instruction tags for const is creative
1465 			 * but unlike all other architectures, and is not
1466 			 * done consistently for all other objects.
1467 			 * This might need reviewing later.
1468 			 */
1469 			if (ins & sljit_ins_const) {
1470 				pool_size += sizeof(*pool);
1471 				ins &= ~sljit_ins_const;
1472 			}
1473 			if (label && label->size == j) {
1474 				label->size = ins_size;
1475 				label = label->next;
1476 			}
1477 			if (jump && jump->addr == j) {
1478 				if ((jump->flags & SLJIT_REWRITABLE_JUMP) || (jump->flags & JUMP_ADDR)) {
1479 					/* encoded: */
1480 					/*   brasl %r14, <rel_addr> (or brcl <mask>, <rel_addr>) */
1481 					/* replace with: */
1482 					/*   lgrl %r1, <pool_addr> */
1483 					/*   bras %r14, %r1 (or bcr <mask>, %r1) */
1484 					pool_size += sizeof(*pool);
1485 					ins_size += 2;
1486 				}
1487 				jump = jump->next;
1488 			}
1489 			if (put_label && put_label->addr == j) {
1490 				pool_size += sizeof(*pool);
1491 				put_label = put_label->next;
1492 			}
1493 			ins_size += sizeof_ins(ins);
1494 		}
1495 	}
1496 
1497 	/* emit trailing label */
1498 	if (label && label->size == j) {
1499 		label->size = ins_size;
1500 		label = label->next;
1501 	}
1502 
1503 	SLJIT_ASSERT(!label);
1504 	SLJIT_ASSERT(!jump);
1505 	SLJIT_ASSERT(!put_label);
1506 
1507 	/* pad code size to 8 bytes so is accessible with half word offsets */
1508 	/* the literal pool needs to be doubleword aligned */
1509 	pad_size = ((ins_size + 7UL) & ~7UL) - ins_size;
1510 	SLJIT_ASSERT(pad_size < 8UL);
1511 
1512 	/* allocate target buffer */
1513 	code = SLJIT_MALLOC_EXEC(ins_size + pad_size + pool_size,
1514 					compiler->exec_allocator_data);
1515 	PTR_FAIL_WITH_EXEC_IF(code);
1516 	code_ptr = code;
1517 	executable_offset = SLJIT_EXEC_OFFSET(code);
1518 
1519 	/* TODO(carenas): pool is optional, and the ABI recommends it to
1520          *                be created before the function code, instead of
1521          *                globally; if generated code is too big could
1522          *                need offsets bigger than 32bit words and asser()
1523          */
1524 	pool = (sljit_uw *)((sljit_uw)code + ins_size + pad_size);
1525 	pool_ptr = pool;
1526 	const_ = (struct sljit_s390x_const *)compiler->consts;
1527 
1528 	/* update label addresses */
1529 	label = compiler->labels;
1530 	while (label) {
1531 		label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(
1532 			(sljit_uw)code_ptr + label->size, executable_offset);
1533 		label = label->next;
1534 	}
1535 
1536 	/* reset jumps */
1537 	jump = compiler->jumps;
1538 	put_label = compiler->put_labels;
1539 
1540 	/* emit the code */
1541 	j = 0;
1542 	for (buf = compiler->buf; buf != NULL; buf = buf->next) {
1543 		sljit_uw len = buf->used_size / sizeof(sljit_ins);
1544 		sljit_ins *ibuf = (sljit_ins *)buf->memory;
1545 		for (i = 0; i < len; ++i, ++j) {
1546 			sljit_ins ins = ibuf[i];
1547 			if (ins & sljit_ins_const) {
1548 				/* clear the const tag */
1549 				ins &= ~sljit_ins_const;
1550 
1551 				/* update instruction with relative address of constant */
1552 				source = (sljit_sw)code_ptr;
1553 				offset = (sljit_sw)pool_ptr - source;
1554 
1555 				SLJIT_ASSERT(!(offset & 1));
1556 				offset >>= 1; /* halfword (not byte) offset */
1557 				SLJIT_ASSERT(is_s32(offset));
1558 
1559 				ins |= (sljit_ins)offset & 0xffffffff;
1560 
1561 				/* update address */
1562 				const_->const_.addr = (sljit_uw)pool_ptr;
1563 
1564 				/* store initial value into pool and update pool address */
1565 				*(pool_ptr++) = (sljit_uw)const_->init_value;
1566 
1567 				/* move to next constant */
1568 				const_ = (struct sljit_s390x_const *)const_->const_.next;
1569 			}
1570 			if (jump && jump->addr == j) {
1571 				sljit_sw target = (sljit_sw)((jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target);
1572 				if ((jump->flags & SLJIT_REWRITABLE_JUMP) || (jump->flags & JUMP_ADDR)) {
1573 					sljit_ins op, arg;
1574 
1575 					jump->addr = (sljit_uw)pool_ptr;
1576 
1577 					/* load address into tmp1 */
1578 					source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1579 					offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source;
1580 
1581 					SLJIT_ASSERT(!(offset & 1));
1582 					offset >>= 1;
1583 					SLJIT_ASSERT(is_s32(offset));
1584 
1585 					encode_inst(&code_ptr, lgrl(tmp1, offset & 0xffffffff));
1586 
1587 					/* store jump target into pool and update pool address */
1588 					*(pool_ptr++) = (sljit_uw)target;
1589 
1590 					/* branch to tmp1 */
1591 					op = (ins >> 32) & 0xf;
1592 					arg = (ins >> 36) & 0xf;
1593 					switch (op) {
1594 					case 4: /* brcl -> bcr */
1595 						ins = bcr(arg, tmp1);
1596 						break;
1597 					case 5: /* brasl -> basr */
1598 						ins = basr(arg, tmp1);
1599 						break;
1600 					default:
1601 						abort();
1602 					}
1603 				}
1604 				else {
1605 					jump->addr = (sljit_uw)code_ptr + 2;
1606 					source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1607 					offset = target - source;
1608 
1609 					/* offset must be halfword aligned */
1610 					SLJIT_ASSERT(!(offset & 1));
1611 					offset >>= 1;
1612 					SLJIT_ASSERT(is_s32(offset)); /* TODO(mundaym): handle arbitrary offsets */
1613 
1614 					/* patch jump target */
1615 					ins |= (sljit_ins)offset & 0xffffffff;
1616 				}
1617 				jump = jump->next;
1618 			}
1619 			if (put_label && put_label->addr == j) {
1620 				source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1621 
1622 				SLJIT_ASSERT(put_label->label);
1623 				put_label->addr = (sljit_uw)code_ptr;
1624 
1625 				/* store target into pool */
1626 				*pool_ptr = put_label->label->addr;
1627 				offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source;
1628 				pool_ptr++;
1629 
1630 				SLJIT_ASSERT(!(offset & 1));
1631 				offset >>= 1;
1632 				SLJIT_ASSERT(is_s32(offset));
1633 				ins |= (sljit_ins)offset & 0xffffffff;
1634 
1635 				put_label = put_label->next;
1636 			}
1637 			encode_inst(&code_ptr, ins);
1638 		}
1639 	}
1640 	SLJIT_ASSERT((sljit_u8 *)code + ins_size == code_ptr);
1641 	SLJIT_ASSERT((sljit_u8 *)pool + pool_size == (sljit_u8 *)pool_ptr);
1642 
1643 	compiler->error = SLJIT_ERR_COMPILED;
1644 	compiler->executable_offset = executable_offset;
1645 	compiler->executable_size = ins_size;
1646 	if (pool_size)
1647 		compiler->executable_size += (pad_size + pool_size);
1648 	code = SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
1649 	code_ptr = SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1650 	SLJIT_CACHE_FLUSH(code, code_ptr);
1651 	SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
1652 	return code;
1653 }
1654 
sljit_has_cpu_feature(sljit_s32 feature_type)1655 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
1656 {
1657 	/* TODO(mundaym): implement all */
1658 	switch (feature_type) {
1659 	case SLJIT_HAS_FPU:
1660 #ifdef SLJIT_IS_FPU_AVAILABLE
1661 		return (SLJIT_IS_FPU_AVAILABLE) != 0;
1662 #else
1663 		return 1;
1664 #endif /* SLJIT_IS_FPU_AVAILABLE */
1665 
1666 	case SLJIT_HAS_CLZ:
1667 	case SLJIT_HAS_REV:
1668 	case SLJIT_HAS_ROT:
1669 	case SLJIT_HAS_PREFETCH:
1670 	case SLJIT_HAS_COPY_F32:
1671 	case SLJIT_HAS_COPY_F64:
1672 	case SLJIT_HAS_SIMD:
1673 	case SLJIT_HAS_ATOMIC:
1674 		return 1;
1675 
1676 	case SLJIT_HAS_CTZ:
1677 		return 2;
1678 
1679 	case SLJIT_HAS_CMOV:
1680 		return have_lscond1() ? 1 : 0;
1681 	}
1682 	return 0;
1683 }
1684 
sljit_cmp_info(sljit_s32 type)1685 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
1686 {
1687 	SLJIT_UNUSED_ARG(type);
1688 	return 0;
1689 }
1690 
1691 /* --------------------------------------------------------------------- */
1692 /*  Entry, exit                                                          */
1693 /* --------------------------------------------------------------------- */
1694 
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1695 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
1696 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1697 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1698 {
1699 	sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
1700 	sljit_s32 offset, i, tmp;
1701 
1702 	CHECK_ERROR();
1703 	CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1704 	set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1705 
1706 	/* Saved registers are stored in callee allocated save area. */
1707 	SLJIT_ASSERT(gpr(SLJIT_FIRST_SAVED_REG) == r6 && gpr(SLJIT_S0) == r13);
1708 
1709 	offset = 2 * SSIZE_OF(sw);
1710 	if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) {
1711 		if (saved_arg_count == 0) {
1712 			FAIL_IF(push_inst(compiler, stmg(r6, r14, offset, r15)));
1713 			offset += 9 * SSIZE_OF(sw);
1714 		} else {
1715 			FAIL_IF(push_inst(compiler, stmg(r6, r13 - (sljit_gpr)saved_arg_count, offset, r15)));
1716 			offset += (8 - saved_arg_count) * SSIZE_OF(sw);
1717 		}
1718 	} else {
1719 		if (scratches == SLJIT_FIRST_SAVED_REG) {
1720 			FAIL_IF(push_inst(compiler, stg(r6, offset, 0, r15)));
1721 			offset += SSIZE_OF(sw);
1722 		} else if (scratches > SLJIT_FIRST_SAVED_REG) {
1723 			FAIL_IF(push_inst(compiler, stmg(r6, r6 + (sljit_gpr)(scratches - SLJIT_FIRST_SAVED_REG), offset, r15)));
1724 			offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw);
1725 		}
1726 
1727 		if (saved_arg_count == 0) {
1728 			if (saveds == 0) {
1729 				FAIL_IF(push_inst(compiler, stg(r14, offset, 0, r15)));
1730 				offset += SSIZE_OF(sw);
1731 			} else {
1732 				FAIL_IF(push_inst(compiler, stmg(r14 - (sljit_gpr)saveds, r14, offset, r15)));
1733 				offset += (saveds + 1) * SSIZE_OF(sw);
1734 			}
1735 		} else if (saveds > saved_arg_count) {
1736 			if (saveds == saved_arg_count + 1) {
1737 				FAIL_IF(push_inst(compiler, stg(r14 - (sljit_gpr)saveds, offset, 0, r15)));
1738 				offset += SSIZE_OF(sw);
1739 			} else {
1740 				FAIL_IF(push_inst(compiler, stmg(r14 - (sljit_gpr)saveds, r13 - (sljit_gpr)saved_arg_count, offset, r15)));
1741 				offset += (saveds - saved_arg_count) * SSIZE_OF(sw);
1742 			}
1743 		}
1744 	}
1745 
1746 	if (saved_arg_count > 0) {
1747 		FAIL_IF(push_inst(compiler, stg(r14, offset, 0, r15)));
1748 		offset += SSIZE_OF(sw);
1749 	}
1750 
1751 	tmp = SLJIT_FS0 - fsaveds;
1752 	for (i = SLJIT_FS0; i > tmp; i--) {
1753 		FAIL_IF(push_inst(compiler, 0x60000000 /* std */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1754 		offset += SSIZE_OF(sw);
1755 	}
1756 
1757 	for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
1758 		FAIL_IF(push_inst(compiler, 0x60000000 /* std */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1759 		offset += SSIZE_OF(sw);
1760 	}
1761 
1762 	local_size = (local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + 0xf) & ~0xf;
1763 	compiler->local_size = local_size;
1764 
1765 	if (is_s20(-local_size))
1766 		FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(-local_size)));
1767 	else
1768 		FAIL_IF(push_inst(compiler, 0xc20400000000 /* slgfi */ | R36A(r15) | (sljit_ins)local_size));
1769 
1770 	if (options & SLJIT_ENTER_REG_ARG)
1771 		return SLJIT_SUCCESS;
1772 
1773 	arg_types >>= SLJIT_ARG_SHIFT;
1774 	saved_arg_count = 0;
1775 	tmp = 0;
1776 	while (arg_types > 0) {
1777 		if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
1778 			if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
1779 				FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S0 - saved_arg_count), gpr(SLJIT_R0 + tmp))));
1780 				saved_arg_count++;
1781 			}
1782 			tmp++;
1783 		}
1784 
1785 		arg_types >>= SLJIT_ARG_SHIFT;
1786 	}
1787 
1788 	return SLJIT_SUCCESS;
1789 }
1790 
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1791 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
1792 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1793 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1794 {
1795 	CHECK_ERROR();
1796 	CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1797 	set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1798 
1799 	compiler->local_size = (local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + 0xf) & ~0xf;
1800 	return SLJIT_SUCCESS;
1801 }
1802 
emit_stack_frame_release(struct sljit_compiler * compiler,sljit_gpr last_reg)1803 static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_gpr last_reg)
1804 {
1805 	sljit_s32 offset, i, tmp;
1806 	sljit_s32 local_size = compiler->local_size;
1807 	sljit_s32 saveds = compiler->saveds;
1808 	sljit_s32 scratches = compiler->scratches;
1809 	sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
1810 
1811 	if (is_u12(local_size))
1812 		FAIL_IF(push_inst(compiler, 0x41000000 /* ly */ | R20A(r15) | R12A(r15) | (sljit_ins)local_size));
1813 	else if (is_s20(local_size))
1814 		FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(local_size)));
1815 	else
1816 		FAIL_IF(push_inst(compiler, 0xc20a00000000 /* algfi */ | R36A(r15) | (sljit_ins)local_size));
1817 
1818 	offset = 2 * SSIZE_OF(sw);
1819 	if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) {
1820 		if (kept_saveds_count == 0) {
1821 			FAIL_IF(push_inst(compiler, lmg(r6, last_reg, offset, r15)));
1822 			offset += 9 * SSIZE_OF(sw);
1823 		} else {
1824 			FAIL_IF(push_inst(compiler, lmg(r6, r13 - (sljit_gpr)kept_saveds_count, offset, r15)));
1825 			offset += (8 - kept_saveds_count) * SSIZE_OF(sw);
1826 		}
1827 	} else {
1828 		if (scratches == SLJIT_FIRST_SAVED_REG) {
1829 			FAIL_IF(push_inst(compiler, lg(r6, offset, 0, r15)));
1830 			offset += SSIZE_OF(sw);
1831 		} else if (scratches > SLJIT_FIRST_SAVED_REG) {
1832 			FAIL_IF(push_inst(compiler, lmg(r6, r6 + (sljit_gpr)(scratches - SLJIT_FIRST_SAVED_REG), offset, r15)));
1833 			offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw);
1834 		}
1835 
1836 		if (kept_saveds_count == 0) {
1837 			if (saveds == 0) {
1838 				if (last_reg == r14)
1839 					FAIL_IF(push_inst(compiler, lg(r14, offset, 0, r15)));
1840 				offset += SSIZE_OF(sw);
1841 			} else if (saveds == 1 && last_reg == r13) {
1842 				FAIL_IF(push_inst(compiler, lg(r13, offset, 0, r15)));
1843 				offset += 2 * SSIZE_OF(sw);
1844 			} else {
1845 				FAIL_IF(push_inst(compiler, lmg(r14 - (sljit_gpr)saveds, last_reg, offset, r15)));
1846 				offset += (saveds + 1) * SSIZE_OF(sw);
1847 			}
1848 		} else if (saveds > kept_saveds_count) {
1849 			if (saveds == kept_saveds_count + 1) {
1850 				FAIL_IF(push_inst(compiler, lg(r14 - (sljit_gpr)saveds, offset, 0, r15)));
1851 				offset += SSIZE_OF(sw);
1852 			} else {
1853 				FAIL_IF(push_inst(compiler, lmg(r14 - (sljit_gpr)saveds, r13 - (sljit_gpr)kept_saveds_count, offset, r15)));
1854 				offset += (saveds - kept_saveds_count) * SSIZE_OF(sw);
1855 			}
1856 		}
1857 	}
1858 
1859 	if (kept_saveds_count > 0) {
1860 		if (last_reg == r14)
1861 			FAIL_IF(push_inst(compiler, lg(r14, offset, 0, r15)));
1862 		offset += SSIZE_OF(sw);
1863 	}
1864 
1865 	tmp = SLJIT_FS0 - compiler->fsaveds;
1866 	for (i = SLJIT_FS0; i > tmp; i--) {
1867 		FAIL_IF(push_inst(compiler, 0x68000000 /* ld */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1868 		offset += SSIZE_OF(sw);
1869 	}
1870 
1871 	for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
1872 		FAIL_IF(push_inst(compiler, 0x68000000 /* ld */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1873 		offset += SSIZE_OF(sw);
1874 	}
1875 
1876 	return SLJIT_SUCCESS;
1877 }
1878 
sljit_emit_return_void(struct sljit_compiler * compiler)1879 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
1880 {
1881 	CHECK_ERROR();
1882 	CHECK(check_sljit_emit_return_void(compiler));
1883 
1884 	FAIL_IF(emit_stack_frame_release(compiler, r14));
1885 	return push_inst(compiler, br(r14)); /* return */
1886 }
1887 
sljit_emit_return_to(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)1888 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
1889 	sljit_s32 src, sljit_sw srcw)
1890 {
1891 	CHECK_ERROR();
1892 	CHECK(check_sljit_emit_return_to(compiler, src, srcw));
1893 
1894 	if (src & SLJIT_MEM) {
1895 		ADJUST_LOCAL_OFFSET(src, srcw);
1896 		FAIL_IF(load_word(compiler, tmp1, src, srcw, 0 /* 64-bit */));
1897 		src = TMP_REG2;
1898 		srcw = 0;
1899 	} else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
1900 		FAIL_IF(push_inst(compiler, lgr(tmp1, gpr(src))));
1901 		src = TMP_REG2;
1902 		srcw = 0;
1903 	}
1904 
1905 	FAIL_IF(emit_stack_frame_release(compiler, r13));
1906 
1907 	SLJIT_SKIP_CHECKS(compiler);
1908 	return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1909 }
1910 
1911 /* --------------------------------------------------------------------- */
1912 /*  Operators                                                            */
1913 /* --------------------------------------------------------------------- */
1914 
sljit_emit_op0(struct sljit_compiler * compiler,sljit_s32 op)1915 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
1916 {
1917 	sljit_gpr arg0 = gpr(SLJIT_R0);
1918 	sljit_gpr arg1 = gpr(SLJIT_R1);
1919 
1920 	CHECK_ERROR();
1921 	CHECK(check_sljit_emit_op0(compiler, op));
1922 
1923 	op = GET_OPCODE(op) | (op & SLJIT_32);
1924 	switch (op) {
1925 	case SLJIT_BREAKPOINT:
1926 		/* The following invalid instruction is emitted by gdb. */
1927 		return push_inst(compiler, 0x0001 /* 2-byte trap */);
1928 	case SLJIT_NOP:
1929 		return push_inst(compiler, 0x0700 /* 2-byte nop */);
1930 	case SLJIT_LMUL_UW:
1931 		FAIL_IF(push_inst(compiler, mlgr(arg0, arg0)));
1932 		break;
1933 	case SLJIT_LMUL_SW:
1934 		/* signed multiplication from: */
1935 		/* Hacker's Delight, Second Edition: Chapter 8-3. */
1936 		FAIL_IF(push_inst(compiler, srag(tmp0, arg0, 63, 0)));
1937 		FAIL_IF(push_inst(compiler, srag(tmp1, arg1, 63, 0)));
1938 		FAIL_IF(push_inst(compiler, ngr(tmp0, arg1)));
1939 		FAIL_IF(push_inst(compiler, ngr(tmp1, arg0)));
1940 
1941 		/* unsigned multiplication */
1942 		FAIL_IF(push_inst(compiler, mlgr(arg0, arg0)));
1943 
1944 		FAIL_IF(push_inst(compiler, sgr(arg0, tmp0)));
1945 		FAIL_IF(push_inst(compiler, sgr(arg0, tmp1)));
1946 		break;
1947 	case SLJIT_DIV_U32:
1948 	case SLJIT_DIVMOD_U32:
1949 		FAIL_IF(push_inst(compiler, lhi(tmp0, 0)));
1950 		FAIL_IF(push_inst(compiler, lr(tmp1, arg0)));
1951 		FAIL_IF(push_inst(compiler, dlr(tmp0, arg1)));
1952 		FAIL_IF(push_inst(compiler, lr(arg0, tmp1))); /* quotient */
1953 		if (op == SLJIT_DIVMOD_U32)
1954 			return push_inst(compiler, lr(arg1, tmp0)); /* remainder */
1955 
1956 		return SLJIT_SUCCESS;
1957 	case SLJIT_DIV_S32:
1958 	case SLJIT_DIVMOD_S32:
1959 		FAIL_IF(push_inst(compiler, lhi(tmp0, 0)));
1960 		FAIL_IF(push_inst(compiler, lr(tmp1, arg0)));
1961 		FAIL_IF(push_inst(compiler, dr(tmp0, arg1)));
1962 		FAIL_IF(push_inst(compiler, lr(arg0, tmp1))); /* quotient */
1963 		if (op == SLJIT_DIVMOD_S32)
1964 			return push_inst(compiler, lr(arg1, tmp0)); /* remainder */
1965 
1966 		return SLJIT_SUCCESS;
1967 	case SLJIT_DIV_UW:
1968 	case SLJIT_DIVMOD_UW:
1969 		FAIL_IF(push_inst(compiler, lghi(tmp0, 0)));
1970 		FAIL_IF(push_inst(compiler, lgr(tmp1, arg0)));
1971 		FAIL_IF(push_inst(compiler, dlgr(tmp0, arg1)));
1972 		FAIL_IF(push_inst(compiler, lgr(arg0, tmp1))); /* quotient */
1973 		if (op == SLJIT_DIVMOD_UW)
1974 			return push_inst(compiler, lgr(arg1, tmp0)); /* remainder */
1975 
1976 		return SLJIT_SUCCESS;
1977 	case SLJIT_DIV_SW:
1978 	case SLJIT_DIVMOD_SW:
1979 		FAIL_IF(push_inst(compiler, lgr(tmp1, arg0)));
1980 		FAIL_IF(push_inst(compiler, dsgr(tmp0, arg1)));
1981 		FAIL_IF(push_inst(compiler, lgr(arg0, tmp1))); /* quotient */
1982 		if (op == SLJIT_DIVMOD_SW)
1983 			return push_inst(compiler, lgr(arg1, tmp0)); /* remainder */
1984 
1985 		return SLJIT_SUCCESS;
1986 	case SLJIT_ENDBR:
1987 		return SLJIT_SUCCESS;
1988 	case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
1989 		return SLJIT_SUCCESS;
1990 	default:
1991 		SLJIT_UNREACHABLE();
1992 	}
1993 	/* swap result registers */
1994 	FAIL_IF(push_inst(compiler, lgr(tmp0, arg0)));
1995 	FAIL_IF(push_inst(compiler, lgr(arg0, arg1)));
1996 	return push_inst(compiler, lgr(arg1, tmp0));
1997 }
1998 
sljit_emit_clz_ctz(struct sljit_compiler * compiler,sljit_s32 op,sljit_gpr dst_r,sljit_gpr src_r)1999 static sljit_s32 sljit_emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 op, sljit_gpr dst_r, sljit_gpr src_r)
2000 {
2001 	sljit_s32 is_ctz = (GET_OPCODE(op) == SLJIT_CTZ);
2002 
2003 	if ((op & SLJIT_32) && src_r != tmp0) {
2004 		FAIL_IF(push_inst(compiler, 0xb9160000 /* llgfr */ | R4A(tmp0) | R0A(src_r)));
2005 		src_r = tmp0;
2006 	}
2007 
2008 	if (is_ctz) {
2009 		FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */) | R4A(tmp1) | R0A(src_r)));
2010 
2011 		if (src_r == tmp0)
2012 			FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? 0x1400 /* nr */ : 0xb9800000 /* ngr */) | R4A(tmp0) | R0A(tmp1)));
2013 		else
2014 			FAIL_IF(push_inst(compiler, 0xb9e40000 /* ngrk */ | R12A(tmp1) | R4A(tmp0) | R0A(src_r)));
2015 
2016 		src_r = tmp0;
2017 	}
2018 
2019 	FAIL_IF(push_inst(compiler, 0xb9830000 /* flogr */ | R4A(tmp0) | R0A(src_r)));
2020 
2021 	if (is_ctz)
2022 		FAIL_IF(push_inst(compiler, 0xec00000000d9 /* aghik */ | R36A(tmp1) | R32A(tmp0) | ((sljit_ins)(-64 & 0xffff) << 16)));
2023 
2024 	if (op & SLJIT_32) {
2025 		if (!is_ctz && dst_r != tmp0)
2026 			return push_inst(compiler, 0xec00000000d9 /* aghik */ | R36A(dst_r) | R32A(tmp0) | ((sljit_ins)(-32 & 0xffff) << 16));
2027 
2028 		FAIL_IF(push_inst(compiler, 0xc20800000000 /* agfi */ | R36A(tmp0) | (sljit_u32)-32));
2029 	}
2030 
2031 	if (is_ctz)
2032 		FAIL_IF(push_inst(compiler, 0xec0000000057 /* rxsbg */ | R36A(tmp0) | R32A(tmp1) | ((sljit_ins)((op & SLJIT_32) ? 59 : 58) << 24) | (63 << 16) | ((sljit_ins)((op & SLJIT_32) ? 5 : 6) << 8)));
2033 
2034 	if (dst_r == tmp0)
2035 		return SLJIT_SUCCESS;
2036 
2037 	return push_inst(compiler, ((op & SLJIT_32) ? 0x1800 /* lr */ : 0xb9040000 /* lgr */) | R4A(dst_r) | R0A(tmp0));
2038 }
2039 
sljit_emit_rev(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2040 static sljit_s32 sljit_emit_rev(struct sljit_compiler *compiler, sljit_s32 op,
2041 	sljit_s32 dst, sljit_sw dstw,
2042 	sljit_s32 src, sljit_sw srcw)
2043 {
2044 	struct addr addr;
2045 	sljit_gpr reg;
2046 	sljit_ins ins;
2047 	sljit_s32 opcode = GET_OPCODE(op);
2048 	sljit_s32 is_16bit = (opcode == SLJIT_REV_U16 || opcode == SLJIT_REV_S16);
2049 
2050 	if (dst & SLJIT_MEM) {
2051 		if (src & SLJIT_MEM) {
2052 			FAIL_IF(load_store_op(compiler, tmp0, src, srcw, op & SLJIT_32, is_16bit ? load_halfword_forms : load_forms));
2053 			reg = tmp0;
2054 		} else
2055 			reg = gpr(src);
2056 
2057 		FAIL_IF(make_addr_bxy(compiler, &addr, dst, dstw, tmp1));
2058 
2059 		if (is_16bit)
2060 			ins = 0xe3000000003f /* strvh */;
2061 		else
2062 			ins = (op & SLJIT_32) ? 0xe3000000003e /* strv */ : 0xe3000000002f /* strvg */;
2063 
2064 		return push_inst(compiler, ins | R36A(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
2065 	}
2066 
2067 	reg = gpr(dst);
2068 
2069 	if (src & SLJIT_MEM) {
2070 		FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
2071 
2072 		if (is_16bit)
2073 			ins = 0xe3000000001f /* lrvh */;
2074 		else
2075 			ins = (op & SLJIT_32) ? 0xe3000000001e /* lrv */ : 0xe3000000000f /* lrvg */;
2076 
2077 		FAIL_IF(push_inst(compiler, ins | R36A(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset)));
2078 
2079 		if (opcode == SLJIT_REV)
2080 			return SLJIT_SUCCESS;
2081 
2082 		if (is_16bit) {
2083 			if (op & SLJIT_32)
2084 				ins = (opcode == SLJIT_REV_U16) ? 0xb9950000 /* llhr */ : 0xb9270000 /* lhr */;
2085 			else
2086 				ins = (opcode == SLJIT_REV_U16) ? 0xb9850000 /* llghr */ : 0xb9070000 /* lghr */;
2087 		} else
2088 			ins = (opcode == SLJIT_REV_U32) ? 0xb9160000 /* llgfr */ : 0xb9140000 /* lgfr */;
2089 
2090 		return push_inst(compiler, ins | R4A(reg) | R0A(reg));
2091 	}
2092 
2093 	ins = (op & SLJIT_32) ? 0xb91f0000 /* lrvr */ : 0xb90f0000 /* lrvgr */;
2094 	FAIL_IF(push_inst(compiler, ins | R4A(reg) | R0A(gpr(src))));
2095 
2096 	if (opcode == SLJIT_REV)
2097 		return SLJIT_SUCCESS;
2098 
2099 	if (!is_16bit) {
2100 		ins = (opcode == SLJIT_REV_U32) ? 0xb9160000 /* llgfr */ : 0xb9140000 /* lgfr */;
2101 		return push_inst(compiler, ins | R4A(reg) | R0A(reg));
2102 	}
2103 
2104 	if (op & SLJIT_32) {
2105 		ins = (opcode == SLJIT_REV_U16) ? 0x88000000 /* srl */ : 0x8a000000 /* sra */;
2106 		return push_inst(compiler, ins | R20A(reg) | 16);
2107 	}
2108 
2109 	ins = (opcode == SLJIT_REV_U16) ? 0xeb000000000c /* srlg */ : 0xeb000000000a /* srag */;
2110 	return push_inst(compiler, ins | R36A(reg) | R32A(reg) | (48 << 16));
2111 }
2112 
2113 /* LEVAL will be defined later with different parameters as needed */
2114 #define WHEN2(cond, i1, i2) (cond) ? LEVAL(i1) : LEVAL(i2)
2115 
sljit_emit_op1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2116 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
2117 	sljit_s32 dst, sljit_sw dstw,
2118 	sljit_s32 src, sljit_sw srcw)
2119 {
2120 	sljit_ins ins;
2121 	struct addr mem;
2122 	sljit_gpr dst_r;
2123 	sljit_gpr src_r;
2124 	sljit_s32 opcode = GET_OPCODE(op);
2125 
2126 	CHECK_ERROR();
2127 	CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
2128 	ADJUST_LOCAL_OFFSET(dst, dstw);
2129 	ADJUST_LOCAL_OFFSET(src, srcw);
2130 
2131 	if (opcode >= SLJIT_MOV && opcode <= SLJIT_MOV_P) {
2132 		/* LOAD REGISTER */
2133 		if (FAST_IS_REG(dst) && FAST_IS_REG(src)) {
2134 			dst_r = gpr(dst);
2135 			src_r = gpr(src);
2136 			switch (opcode | (op & SLJIT_32)) {
2137 			/* 32-bit */
2138 			case SLJIT_MOV32_U8:
2139 				ins = llcr(dst_r, src_r);
2140 				break;
2141 			case SLJIT_MOV32_S8:
2142 				ins = lbr(dst_r, src_r);
2143 				break;
2144 			case SLJIT_MOV32_U16:
2145 				ins = llhr(dst_r, src_r);
2146 				break;
2147 			case SLJIT_MOV32_S16:
2148 				ins = lhr(dst_r, src_r);
2149 				break;
2150 			case SLJIT_MOV32:
2151 				if (dst_r == src_r)
2152 					return SLJIT_SUCCESS;
2153 				ins = lr(dst_r, src_r);
2154 				break;
2155 			/* 64-bit */
2156 			case SLJIT_MOV_U8:
2157 				ins = llgcr(dst_r, src_r);
2158 				break;
2159 			case SLJIT_MOV_S8:
2160 				ins = lgbr(dst_r, src_r);
2161 				break;
2162 			case SLJIT_MOV_U16:
2163 				ins = llghr(dst_r, src_r);
2164 				break;
2165 			case SLJIT_MOV_S16:
2166 				ins = lghr(dst_r, src_r);
2167 				break;
2168 			case SLJIT_MOV_U32:
2169 				ins = llgfr(dst_r, src_r);
2170 				break;
2171 			case SLJIT_MOV_S32:
2172 				ins = lgfr(dst_r, src_r);
2173 				break;
2174 			case SLJIT_MOV:
2175 			case SLJIT_MOV_P:
2176 				if (dst_r == src_r)
2177 					return SLJIT_SUCCESS;
2178 				ins = lgr(dst_r, src_r);
2179 				break;
2180 			default:
2181 				ins = 0;
2182 				SLJIT_UNREACHABLE();
2183 				break;
2184 			}
2185 			FAIL_IF(push_inst(compiler, ins));
2186 			return SLJIT_SUCCESS;
2187 		}
2188 		/* LOAD IMMEDIATE */
2189 		if (FAST_IS_REG(dst) && src == SLJIT_IMM) {
2190 			switch (opcode) {
2191 			case SLJIT_MOV_U8:
2192 				srcw = (sljit_sw)((sljit_u8)(srcw));
2193 				break;
2194 			case SLJIT_MOV_S8:
2195 				srcw = (sljit_sw)((sljit_s8)(srcw));
2196 				break;
2197 			case SLJIT_MOV_U16:
2198 				srcw = (sljit_sw)((sljit_u16)(srcw));
2199 				break;
2200 			case SLJIT_MOV_S16:
2201 				srcw = (sljit_sw)((sljit_s16)(srcw));
2202 				break;
2203 			case SLJIT_MOV_U32:
2204 				srcw = (sljit_sw)((sljit_u32)(srcw));
2205 				break;
2206 			case SLJIT_MOV_S32:
2207 			case SLJIT_MOV32:
2208 				srcw = (sljit_sw)((sljit_s32)(srcw));
2209 				break;
2210 			}
2211 			return push_load_imm_inst(compiler, gpr(dst), srcw);
2212 		}
2213 		/* LOAD */
2214 		/* TODO(carenas): avoid reg being defined later */
2215 		#define LEVAL(i) EVAL(i, reg, mem)
2216 		if (FAST_IS_REG(dst) && (src & SLJIT_MEM)) {
2217 			sljit_gpr reg = gpr(dst);
2218 
2219 			FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1));
2220 			/* TODO(carenas): convert all calls below to LEVAL */
2221 			switch (opcode | (op & SLJIT_32)) {
2222 			case SLJIT_MOV32_U8:
2223 				ins = llc(reg, mem.offset, mem.index, mem.base);
2224 				break;
2225 			case SLJIT_MOV32_S8:
2226 				ins = lb(reg, mem.offset, mem.index, mem.base);
2227 				break;
2228 			case SLJIT_MOV32_U16:
2229 				ins = llh(reg, mem.offset, mem.index, mem.base);
2230 				break;
2231 			case SLJIT_MOV32_S16:
2232 				ins = WHEN2(is_u12(mem.offset), lh, lhy);
2233 				break;
2234 			case SLJIT_MOV32:
2235 				ins = WHEN2(is_u12(mem.offset), l, ly);
2236 				break;
2237 			case SLJIT_MOV_U8:
2238 				ins = LEVAL(llgc);
2239 				break;
2240 			case SLJIT_MOV_S8:
2241 				ins = lgb(reg, mem.offset, mem.index, mem.base);
2242 				break;
2243 			case SLJIT_MOV_U16:
2244 				ins = LEVAL(llgh);
2245 				break;
2246 			case SLJIT_MOV_S16:
2247 				ins = lgh(reg, mem.offset, mem.index, mem.base);
2248 				break;
2249 			case SLJIT_MOV_U32:
2250 				ins = LEVAL(llgf);
2251 				break;
2252 			case SLJIT_MOV_S32:
2253 				ins = lgf(reg, mem.offset, mem.index, mem.base);
2254 				break;
2255 			case SLJIT_MOV_P:
2256 			case SLJIT_MOV:
2257 				ins = lg(reg, mem.offset, mem.index, mem.base);
2258 				break;
2259 			default:
2260 				ins = 0;
2261 				SLJIT_UNREACHABLE();
2262 				break;
2263 			}
2264 			FAIL_IF(push_inst(compiler, ins));
2265 			return SLJIT_SUCCESS;
2266 		}
2267 		/* STORE and STORE IMMEDIATE */
2268 		if ((dst & SLJIT_MEM) && (FAST_IS_REG(src) || src == SLJIT_IMM)) {
2269 			struct addr mem;
2270 			sljit_gpr reg = FAST_IS_REG(src) ? gpr(src) : tmp0;
2271 
2272 			if (src == SLJIT_IMM) {
2273 				/* TODO(mundaym): MOVE IMMEDIATE? */
2274 				FAIL_IF(push_load_imm_inst(compiler, reg, srcw));
2275 			}
2276 			FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2277 			switch (opcode) {
2278 			case SLJIT_MOV_U8:
2279 			case SLJIT_MOV_S8:
2280 				return push_inst(compiler,
2281 					WHEN2(is_u12(mem.offset), stc, stcy));
2282 			case SLJIT_MOV_U16:
2283 			case SLJIT_MOV_S16:
2284 				return push_inst(compiler,
2285 					WHEN2(is_u12(mem.offset), sth, sthy));
2286 			case SLJIT_MOV_U32:
2287 			case SLJIT_MOV_S32:
2288 			case SLJIT_MOV32:
2289 				return push_inst(compiler,
2290 					WHEN2(is_u12(mem.offset), st, sty));
2291 			case SLJIT_MOV_P:
2292 			case SLJIT_MOV:
2293 				FAIL_IF(push_inst(compiler, LEVAL(stg)));
2294 				return SLJIT_SUCCESS;
2295 			default:
2296 				SLJIT_UNREACHABLE();
2297 			}
2298 		}
2299 		#undef LEVAL
2300 		/* MOVE CHARACTERS */
2301 		if ((dst & SLJIT_MEM) && (src & SLJIT_MEM)) {
2302 			struct addr mem;
2303 			FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1));
2304 			switch (opcode) {
2305 			case SLJIT_MOV_U8:
2306 			case SLJIT_MOV_S8:
2307 				FAIL_IF(push_inst(compiler,
2308 					EVAL(llgc, tmp0, mem)));
2309 				FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2310 				return push_inst(compiler,
2311 					EVAL(stcy, tmp0, mem));
2312 			case SLJIT_MOV_U16:
2313 			case SLJIT_MOV_S16:
2314 				FAIL_IF(push_inst(compiler,
2315 					EVAL(llgh, tmp0, mem)));
2316 				FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2317 				return push_inst(compiler,
2318 					EVAL(sthy, tmp0, mem));
2319 			case SLJIT_MOV_U32:
2320 			case SLJIT_MOV_S32:
2321 			case SLJIT_MOV32:
2322 				FAIL_IF(push_inst(compiler,
2323 					EVAL(ly, tmp0, mem)));
2324 				FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2325 				return push_inst(compiler,
2326 					EVAL(sty, tmp0, mem));
2327 			case SLJIT_MOV_P:
2328 			case SLJIT_MOV:
2329 				FAIL_IF(push_inst(compiler,
2330 					EVAL(lg, tmp0, mem)));
2331 				FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2332 				FAIL_IF(push_inst(compiler,
2333 					EVAL(stg, tmp0, mem)));
2334 				return SLJIT_SUCCESS;
2335 			default:
2336 				SLJIT_UNREACHABLE();
2337 			}
2338 		}
2339 		SLJIT_UNREACHABLE();
2340 	}
2341 
2342 	SLJIT_ASSERT(src != SLJIT_IMM);
2343 
2344 	dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
2345 	src_r = FAST_IS_REG(src) ? gpr(src) : tmp0;
2346 
2347 	compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z);
2348 
2349 	/* TODO(mundaym): optimize loads and stores */
2350 	switch (opcode) {
2351 	case SLJIT_CLZ:
2352 	case SLJIT_CTZ:
2353 		if (src & SLJIT_MEM)
2354 			FAIL_IF(load_unsigned_word(compiler, src_r, src, srcw, op & SLJIT_32));
2355 
2356 		FAIL_IF(sljit_emit_clz_ctz(compiler, op, dst_r, src_r));
2357 		break;
2358 	case SLJIT_REV_U32:
2359 	case SLJIT_REV_S32:
2360 		op |= SLJIT_32;
2361 		/* fallthrough */
2362 	case SLJIT_REV:
2363 	case SLJIT_REV_U16:
2364 	case SLJIT_REV_S16:
2365 		return sljit_emit_rev(compiler, op, dst, dstw, src, srcw);
2366 	default:
2367 		SLJIT_UNREACHABLE();
2368 	}
2369 
2370 	if (dst & SLJIT_MEM)
2371 		return store_word(compiler, dst_r, dst, dstw, op & SLJIT_32);
2372 
2373 	return SLJIT_SUCCESS;
2374 }
2375 
is_commutative(sljit_s32 op)2376 static SLJIT_INLINE int is_commutative(sljit_s32 op)
2377 {
2378 	switch (GET_OPCODE(op)) {
2379 	case SLJIT_ADD:
2380 	case SLJIT_ADDC:
2381 	case SLJIT_MUL:
2382 	case SLJIT_AND:
2383 	case SLJIT_OR:
2384 	case SLJIT_XOR:
2385 		return 1;
2386 	}
2387 	return 0;
2388 }
2389 
2390 static const struct ins_forms add_forms = {
2391 	0x1a00, /* ar */
2392 	0xb9080000, /* agr */
2393 	0xb9f80000, /* ark */
2394 	0xb9e80000, /* agrk */
2395 	0x5a000000, /* a */
2396 	0xe3000000005a, /* ay */
2397 	0xe30000000008, /* ag */
2398 };
2399 
2400 static const struct ins_forms logical_add_forms = {
2401 	0x1e00, /* alr */
2402 	0xb90a0000, /* algr */
2403 	0xb9fa0000, /* alrk */
2404 	0xb9ea0000, /* algrk */
2405 	0x5e000000, /* al */
2406 	0xe3000000005e, /* aly */
2407 	0xe3000000000a, /* alg */
2408 };
2409 
sljit_emit_add(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2410 static sljit_s32 sljit_emit_add(struct sljit_compiler *compiler, sljit_s32 op,
2411 	sljit_s32 dst, sljit_sw dstw,
2412 	sljit_s32 src1, sljit_sw src1w,
2413 	sljit_s32 src2, sljit_sw src2w)
2414 {
2415 	int sets_overflow = (op & VARIABLE_FLAG_MASK) == SLJIT_SET_OVERFLOW;
2416 	int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW);
2417 	const struct ins_forms *forms;
2418 	sljit_ins ins;
2419 
2420 	if (src2 == SLJIT_IMM) {
2421 		if (!sets_zero_overflow && is_s8(src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) {
2422 			if (sets_overflow)
2423 				ins = (op & SLJIT_32) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */;
2424 			else
2425 				ins = (op & SLJIT_32) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */;
2426 			return emit_siy(compiler, ins, dst, dstw, src2w);
2427 		}
2428 
2429 		if (is_s16(src2w)) {
2430 			if (sets_overflow)
2431 				ins = (op & SLJIT_32) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */;
2432 			else
2433 				ins = (op & SLJIT_32) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */;
2434 			FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, src2w));
2435 			goto done;
2436 		}
2437 
2438 		if (!sets_overflow) {
2439 			if ((op & SLJIT_32) || is_u32(src2w)) {
2440 				ins = (op & SLJIT_32) ? 0xc20b00000000 /* alfi */ : 0xc20a00000000 /* algfi */;
2441 				FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));
2442 				goto done;
2443 			}
2444 			if (is_u32(-src2w)) {
2445 				FAIL_IF(emit_ri(compiler, 0xc20400000000 /* slgfi */, dst, src1, src1w, -src2w, RIL_A));
2446 				goto done;
2447 			}
2448 		}
2449 		else if ((op & SLJIT_32) || is_s32(src2w)) {
2450 			ins = (op & SLJIT_32) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */;
2451 			FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));
2452 			goto done;
2453 		}
2454 	}
2455 
2456 	forms = sets_overflow ? &add_forms : &logical_add_forms;
2457 	FAIL_IF(emit_commutative(compiler, forms, dst, src1, src1w, src2, src2w));
2458 
2459 done:
2460 	if (sets_zero_overflow)
2461 		FAIL_IF(update_zero_overflow(compiler, op, FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0));
2462 
2463 	if (dst & SLJIT_MEM)
2464 		return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2465 
2466 	return SLJIT_SUCCESS;
2467 }
2468 
2469 static const struct ins_forms sub_forms = {
2470 	0x1b00, /* sr */
2471 	0xb9090000, /* sgr */
2472 	0xb9f90000, /* srk */
2473 	0xb9e90000, /* sgrk */
2474 	0x5b000000, /* s */
2475 	0xe3000000005b, /* sy */
2476 	0xe30000000009, /* sg */
2477 };
2478 
2479 static const struct ins_forms logical_sub_forms = {
2480 	0x1f00, /* slr */
2481 	0xb90b0000, /* slgr */
2482 	0xb9fb0000, /* slrk */
2483 	0xb9eb0000, /* slgrk */
2484 	0x5f000000, /* sl */
2485 	0xe3000000005f, /* sly */
2486 	0xe3000000000b, /* slg */
2487 };
2488 
sljit_emit_sub(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2489 static sljit_s32 sljit_emit_sub(struct sljit_compiler *compiler, sljit_s32 op,
2490 	sljit_s32 dst, sljit_sw dstw,
2491 	sljit_s32 src1, sljit_sw src1w,
2492 	sljit_s32 src2, sljit_sw src2w)
2493 {
2494 	sljit_s32 flag_type = GET_FLAG_TYPE(op);
2495 	int sets_signed = (flag_type >= SLJIT_SIG_LESS && flag_type <= SLJIT_NOT_OVERFLOW);
2496 	int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW);
2497 	const struct ins_forms *forms;
2498 	sljit_ins ins;
2499 
2500 	if (dst == (sljit_s32)tmp0 && flag_type <= SLJIT_SIG_LESS_EQUAL) {
2501 		int compare_signed = flag_type >= SLJIT_SIG_LESS;
2502 
2503 		compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_COMPARE;
2504 
2505 		if (src2 == SLJIT_IMM) {
2506 			if (compare_signed || ((op & VARIABLE_FLAG_MASK) == 0 && is_s32(src2w))) {
2507 				if ((op & SLJIT_32) || is_s32(src2w)) {
2508 					ins = (op & SLJIT_32) ? 0xc20d00000000 /* cfi */ : 0xc20c00000000 /* cgfi */;
2509 					return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A);
2510 				}
2511 			}
2512 			else {
2513 				if ((op & SLJIT_32) || is_u32(src2w)) {
2514 					ins = (op & SLJIT_32) ? 0xc20f00000000 /* clfi */ : 0xc20e00000000 /* clgfi */;
2515 					return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A);
2516 				}
2517 				if (is_s16(src2w))
2518 					return emit_rie_d(compiler, 0xec00000000db /* alghsik */, (sljit_s32)tmp0, src1, src1w, src2w);
2519 			}
2520 		}
2521 		else if (src2 & SLJIT_MEM) {
2522 			if ((op & SLJIT_32) && ((src2 & OFFS_REG_MASK) || is_u12(src2w))) {
2523 				ins = compare_signed ? 0x59000000 /* c */ : 0x55000000 /* cl */;
2524 				return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RX_A);
2525 			}
2526 
2527 			if (compare_signed)
2528 				ins = (op & SLJIT_32) ? 0xe30000000059 /* cy */ : 0xe30000000020 /* cg */;
2529 			else
2530 				ins = (op & SLJIT_32) ? 0xe30000000055 /* cly */ : 0xe30000000021 /* clg */;
2531 			return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RXY_A);
2532 		}
2533 
2534 		if (compare_signed)
2535 			ins = (op & SLJIT_32) ? 0x1900 /* cr */ : 0xb9200000 /* cgr */;
2536 		else
2537 			ins = (op & SLJIT_32) ? 0x1500 /* clr */ : 0xb9210000 /* clgr */;
2538 		return emit_rr(compiler, ins, src1, src1, src1w, src2, src2w);
2539 	}
2540 
2541 	if (src1 == SLJIT_IMM && src1w == 0 && (flag_type == 0 || sets_signed)) {
2542 		ins = (op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */;
2543 		FAIL_IF(emit_rr1(compiler, ins, dst, src2, src2w));
2544 		goto done;
2545 	}
2546 
2547 	if (src2 == SLJIT_IMM) {
2548 		sljit_sw neg_src2w = -src2w;
2549 
2550 		if (sets_signed || neg_src2w != 0 || (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == 0) {
2551 			if (!sets_zero_overflow && is_s8(neg_src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) {
2552 				if (sets_signed)
2553 					ins = (op & SLJIT_32) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */;
2554 				else
2555 					ins = (op & SLJIT_32) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */;
2556 				return emit_siy(compiler, ins, dst, dstw, neg_src2w);
2557 			}
2558 
2559 			if (is_s16(neg_src2w)) {
2560 				if (sets_signed)
2561 					ins = (op & SLJIT_32) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */;
2562 				else
2563 					ins = (op & SLJIT_32) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */;
2564 				FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, neg_src2w));
2565 				goto done;
2566 			}
2567 		}
2568 
2569 		if (!sets_signed) {
2570 			if ((op & SLJIT_32) || is_u32(src2w)) {
2571 				ins = (op & SLJIT_32) ? 0xc20500000000 /* slfi */ : 0xc20400000000 /* slgfi */;
2572 				FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));
2573 				goto done;
2574 			}
2575 			if (is_u32(neg_src2w)) {
2576 				FAIL_IF(emit_ri(compiler, 0xc20a00000000 /* algfi */, dst, src1, src1w, neg_src2w, RIL_A));
2577 				goto done;
2578 			}
2579 		}
2580 		else if ((op & SLJIT_32) || is_s32(neg_src2w)) {
2581 			ins = (op & SLJIT_32) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */;
2582 			FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, neg_src2w, RIL_A));
2583 			goto done;
2584 		}
2585 	}
2586 
2587 	forms = sets_signed ? &sub_forms : &logical_sub_forms;
2588 	FAIL_IF(emit_non_commutative(compiler, forms, dst, src1, src1w, src2, src2w));
2589 
2590 done:
2591 	if (sets_signed) {
2592 		sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2593 
2594 		if ((op & VARIABLE_FLAG_MASK) != SLJIT_SET_OVERFLOW) {
2595 			/* In case of overflow, the sign bit of the two source operands must be different, and
2596 			     - the first operand is greater if the sign bit of the result is set
2597 			     - the first operand is less if the sign bit of the result is not set
2598 			   The -result operation sets the corrent sign, because the result cannot be zero.
2599 			   The overflow is considered greater, since the result must be equal to INT_MIN so its sign bit is set. */
2600 			FAIL_IF(push_inst(compiler, brc(0xe, 2 + 2)));
2601 			FAIL_IF(push_inst(compiler, (op & SLJIT_32) ? lcr(tmp1, dst_r) : lcgr(tmp1, dst_r)));
2602 		}
2603 		else if (op & SLJIT_SET_Z)
2604 			FAIL_IF(update_zero_overflow(compiler, op, dst_r));
2605 	}
2606 
2607 	if (dst & SLJIT_MEM)
2608 		return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2609 
2610 	return SLJIT_SUCCESS;
2611 }
2612 
2613 static const struct ins_forms multiply_forms = {
2614 	0xb2520000, /* msr */
2615 	0xb90c0000, /* msgr */
2616 	0xb9fd0000, /* msrkc */
2617 	0xb9ed0000, /* msgrkc */
2618 	0x71000000, /* ms */
2619 	0xe30000000051, /* msy */
2620 	0xe3000000000c, /* msg */
2621 };
2622 
2623 static const struct ins_forms multiply_overflow_forms = {
2624 	0,
2625 	0,
2626 	0xb9fd0000, /* msrkc */
2627 	0xb9ed0000, /* msgrkc */
2628 	0,
2629 	0xe30000000053, /* msc */
2630 	0xe30000000083, /* msgc */
2631 };
2632 
sljit_emit_multiply(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2633 static sljit_s32 sljit_emit_multiply(struct sljit_compiler *compiler, sljit_s32 op,
2634 	sljit_s32 dst,
2635 	sljit_s32 src1, sljit_sw src1w,
2636 	sljit_s32 src2, sljit_sw src2w)
2637 {
2638 	sljit_ins ins;
2639 
2640 	if (HAS_FLAGS(op)) {
2641 		/* if have_misc2 fails, this operation should be emulated. 32 bit emulation:
2642 		FAIL_IF(push_inst(compiler, lgfr(tmp0, src1_r)));
2643 		FAIL_IF(push_inst(compiler, msgfr(tmp0, src2_r)));
2644 		if (dst_r != tmp0) {
2645 			FAIL_IF(push_inst(compiler, lr(dst_r, tmp0)));
2646 		}
2647 		FAIL_IF(push_inst(compiler, aih(tmp0, 1)));
2648 		FAIL_IF(push_inst(compiler, nihf(tmp0, ~1U)));
2649 		FAIL_IF(push_inst(compiler, ipm(tmp1)));
2650 		FAIL_IF(push_inst(compiler, oilh(tmp1, 0x2000))); */
2651 
2652 		return emit_commutative(compiler, &multiply_overflow_forms, dst, src1, src1w, src2, src2w);
2653 	}
2654 
2655 	if (src2 == SLJIT_IMM) {
2656 		if (is_s16(src2w)) {
2657 			ins = (op & SLJIT_32) ? 0xa70c0000 /* mhi */ : 0xa70d0000 /* mghi */;
2658 			return emit_ri(compiler, ins, dst, src1, src1w, src2w, RI_A);
2659 		}
2660 
2661 		if (is_s32(src2w)) {
2662 			ins = (op & SLJIT_32) ? 0xc20100000000 /* msfi */ : 0xc20000000000 /* msgfi */;
2663 			return emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A);
2664 		}
2665 	}
2666 
2667 	return emit_commutative(compiler, &multiply_forms, dst, src1, src1w, src2, src2w);
2668 }
2669 
sljit_emit_bitwise_imm(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_uw imm,sljit_s32 count16)2670 static sljit_s32 sljit_emit_bitwise_imm(struct sljit_compiler *compiler, sljit_s32 type,
2671 	sljit_s32 dst,
2672 	sljit_s32 src1, sljit_sw src1w,
2673 	sljit_uw imm, sljit_s32 count16)
2674 {
2675 	sljit_s32 mode = compiler->mode;
2676 	sljit_gpr dst_r = tmp0;
2677 	sljit_s32 needs_move = 1;
2678 
2679 	if (IS_GPR_REG(dst)) {
2680 		dst_r = gpr(dst & REG_MASK);
2681 		if (dst == src1)
2682 			needs_move = 0;
2683 	}
2684 
2685 	if (needs_move)
2686 		FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
2687 
2688 	if (type == SLJIT_AND) {
2689 		if (!(mode & SLJIT_32))
2690 			FAIL_IF(push_inst(compiler, 0xc00a00000000 /* nihf */ | R36A(dst_r) | (imm >> 32)));
2691 		return push_inst(compiler, 0xc00b00000000 /* nilf */ | R36A(dst_r) | (imm & 0xffffffff));
2692 	}
2693 	else if (type == SLJIT_OR) {
2694 		if (count16 >= 3) {
2695 			FAIL_IF(push_inst(compiler, 0xc00c00000000 /* oihf */ | R36A(dst_r) | (imm >> 32)));
2696 			return push_inst(compiler, 0xc00d00000000 /* oilf */ | R36A(dst_r) | (imm & 0xffffffff));
2697 		}
2698 
2699 		if (count16 >= 2) {
2700 			if ((imm & 0x00000000ffffffffull) == 0)
2701 				return push_inst(compiler, 0xc00c00000000 /* oihf */ | R36A(dst_r) | (imm >> 32));
2702 			if ((imm & 0xffffffff00000000ull) == 0)
2703 				return push_inst(compiler, 0xc00d00000000 /* oilf */ | R36A(dst_r) | (imm & 0xffffffff));
2704 		}
2705 
2706 		if ((imm & 0xffff000000000000ull) != 0)
2707 			FAIL_IF(push_inst(compiler, 0xa5080000 /* oihh */ | R20A(dst_r) | (imm >> 48)));
2708 		if ((imm & 0x0000ffff00000000ull) != 0)
2709 			FAIL_IF(push_inst(compiler, 0xa5090000 /* oihl */ | R20A(dst_r) | ((imm >> 32) & 0xffff)));
2710 		if ((imm & 0x00000000ffff0000ull) != 0)
2711 			FAIL_IF(push_inst(compiler, 0xa50a0000 /* oilh */ | R20A(dst_r) | ((imm >> 16) & 0xffff)));
2712 		if ((imm & 0x000000000000ffffull) != 0 || imm == 0)
2713 			return push_inst(compiler, 0xa50b0000 /* oill */ | R20A(dst_r) | (imm & 0xffff));
2714 		return SLJIT_SUCCESS;
2715 	}
2716 
2717 	if ((imm & 0xffffffff00000000ull) != 0)
2718 		FAIL_IF(push_inst(compiler, 0xc00600000000 /* xihf */ | R36A(dst_r) | (imm >> 32)));
2719 	if ((imm & 0x00000000ffffffffull) != 0 || imm == 0)
2720 		return push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(dst_r) | (imm & 0xffffffff));
2721 	return SLJIT_SUCCESS;
2722 }
2723 
2724 static const struct ins_forms bitwise_and_forms = {
2725 	0x1400, /* nr */
2726 	0xb9800000, /* ngr */
2727 	0xb9f40000, /* nrk */
2728 	0xb9e40000, /* ngrk */
2729 	0x54000000, /* n */
2730 	0xe30000000054, /* ny */
2731 	0xe30000000080, /* ng */
2732 };
2733 
2734 static const struct ins_forms bitwise_or_forms = {
2735 	0x1600, /* or */
2736 	0xb9810000, /* ogr */
2737 	0xb9f60000, /* ork */
2738 	0xb9e60000, /* ogrk */
2739 	0x56000000, /* o */
2740 	0xe30000000056, /* oy */
2741 	0xe30000000081, /* og */
2742 };
2743 
2744 static const struct ins_forms bitwise_xor_forms = {
2745 	0x1700, /* xr */
2746 	0xb9820000, /* xgr */
2747 	0xb9f70000, /* xrk */
2748 	0xb9e70000, /* xgrk */
2749 	0x57000000, /* x */
2750 	0xe30000000057, /* xy */
2751 	0xe30000000082, /* xg */
2752 };
2753 
sljit_emit_bitwise(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2754 static sljit_s32 sljit_emit_bitwise(struct sljit_compiler *compiler, sljit_s32 op,
2755 	sljit_s32 dst,
2756 	sljit_s32 src1, sljit_sw src1w,
2757 	sljit_s32 src2, sljit_sw src2w)
2758 {
2759 	sljit_s32 type = GET_OPCODE(op);
2760 	const struct ins_forms *forms;
2761 
2762 	if (src2 == SLJIT_IMM && (!(op & SLJIT_SET_Z) || (type == SLJIT_AND && dst == (sljit_s32)tmp0))) {
2763 		sljit_s32 count16 = 0;
2764 		sljit_uw imm = (sljit_uw)src2w;
2765 
2766 		if (op & SLJIT_32)
2767 			imm &= 0xffffffffull;
2768 
2769 		if ((imm & 0x000000000000ffffull) != 0 || imm == 0)
2770 			count16++;
2771 		if ((imm & 0x00000000ffff0000ull) != 0)
2772 			count16++;
2773 		if ((imm & 0x0000ffff00000000ull) != 0)
2774 			count16++;
2775 		if ((imm & 0xffff000000000000ull) != 0)
2776 			count16++;
2777 
2778 		if (type == SLJIT_AND && dst == (sljit_s32)tmp0 && count16 == 1) {
2779 			sljit_gpr src_r = tmp0;
2780 
2781 			if (FAST_IS_REG(src1))
2782 				src_r = gpr(src1 & REG_MASK);
2783 			else
2784 				FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
2785 
2786 			if ((imm & 0x000000000000ffffull) != 0 || imm == 0)
2787 				return push_inst(compiler, 0xa7010000 /* tmll */ | R20A(src_r) | imm);
2788 			if ((imm & 0x00000000ffff0000ull) != 0)
2789 				return push_inst(compiler, 0xa7000000 /* tmlh */ | R20A(src_r) | (imm >> 16));
2790 			if ((imm & 0x0000ffff00000000ull) != 0)
2791 				return push_inst(compiler, 0xa7030000 /* tmhl */ | R20A(src_r) | (imm >> 32));
2792 			return push_inst(compiler, 0xa7020000 /* tmhh */ | R20A(src_r) | (imm >> 48));
2793 		}
2794 
2795 		if (!(op & SLJIT_SET_Z))
2796 			return sljit_emit_bitwise_imm(compiler, type, dst, src1, src1w, imm, count16);
2797 	}
2798 
2799 	if (type == SLJIT_AND)
2800 		forms = &bitwise_and_forms;
2801 	else if (type == SLJIT_OR)
2802 		forms = &bitwise_or_forms;
2803 	else
2804 		forms = &bitwise_xor_forms;
2805 
2806 	return emit_commutative(compiler, forms, dst, src1, src1w, src2, src2w);
2807 }
2808 
sljit_emit_shift(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2809 static sljit_s32 sljit_emit_shift(struct sljit_compiler *compiler, sljit_s32 op,
2810 	sljit_s32 dst,
2811 	sljit_s32 src1, sljit_sw src1w,
2812 	sljit_s32 src2, sljit_sw src2w)
2813 {
2814 	sljit_s32 type = GET_OPCODE(op);
2815 	sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2816 	sljit_gpr src_r = tmp0;
2817 	sljit_gpr base_r = tmp0;
2818 	sljit_ins imm = 0;
2819 	sljit_ins ins;
2820 
2821 	if (FAST_IS_REG(src1))
2822 		src_r = gpr(src1);
2823 	else
2824 		FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
2825 
2826 	if (src2 != SLJIT_IMM) {
2827 		if (FAST_IS_REG(src2))
2828 			base_r = gpr(src2);
2829 		else {
2830 			FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
2831 			base_r = tmp1;
2832 		}
2833 
2834 		if ((op & SLJIT_32) && (type == SLJIT_MSHL || type == SLJIT_MLSHR || type == SLJIT_MASHR)) {
2835 			if (base_r != tmp1) {
2836 				FAIL_IF(push_inst(compiler, 0xec0000000055 /* risbg */ | R36A(tmp1) | R32A(base_r) | (59 << 24) | (1 << 23) | (63 << 16)));
2837 				base_r = tmp1;
2838 			} else
2839 				FAIL_IF(push_inst(compiler, 0xa5070000 /* nill */ | R20A(tmp1) | 0x1f));
2840 		}
2841 	} else
2842 		imm = (sljit_ins)(src2w & ((op & SLJIT_32) ? 0x1f : 0x3f));
2843 
2844 	if ((op & SLJIT_32) && dst_r == src_r) {
2845 		if (type == SLJIT_SHL || type == SLJIT_MSHL)
2846 			ins = 0x89000000 /* sll */;
2847 		else if (type == SLJIT_LSHR || type == SLJIT_MLSHR)
2848 			ins = 0x88000000 /* srl */;
2849 		else
2850 			ins = 0x8a000000 /* sra */;
2851 
2852 		FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | R12A(base_r) | imm));
2853 	} else {
2854 		if (type == SLJIT_SHL || type == SLJIT_MSHL)
2855 			ins = (op & SLJIT_32) ? 0xeb00000000df /* sllk */ : 0xeb000000000d /* sllg */;
2856 		else if (type == SLJIT_LSHR || type == SLJIT_MLSHR)
2857 			ins = (op & SLJIT_32) ? 0xeb00000000de /* srlk */ : 0xeb000000000c /* srlg */;
2858 		else
2859 			ins = (op & SLJIT_32) ? 0xeb00000000dc /* srak */ : 0xeb000000000a /* srag */;
2860 
2861 		FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | R28A(base_r) | (imm << 16)));
2862 	}
2863 
2864 	if ((op & SLJIT_SET_Z) && type != SLJIT_ASHR)
2865 		return push_inst(compiler, (op & SLJIT_32) ? or(dst_r, dst_r) : ogr(dst_r, dst_r));
2866 
2867 	return SLJIT_SUCCESS;
2868 }
2869 
sljit_emit_rotate(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2870 static sljit_s32 sljit_emit_rotate(struct sljit_compiler *compiler, sljit_s32 op,
2871 	sljit_s32 dst,
2872 	sljit_s32 src1, sljit_sw src1w,
2873 	sljit_s32 src2, sljit_sw src2w)
2874 {
2875 	sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2876 	sljit_gpr src_r = tmp0;
2877 	sljit_gpr base_r = tmp0;
2878 	sljit_ins imm = 0;
2879 	sljit_ins ins;
2880 
2881 	if (FAST_IS_REG(src1))
2882 		src_r = gpr(src1);
2883 	else
2884 		FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
2885 
2886 	if (src2 != SLJIT_IMM) {
2887 		if (FAST_IS_REG(src2))
2888 			base_r = gpr(src2);
2889 		else {
2890 			FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
2891 			base_r = tmp1;
2892 		}
2893 	}
2894 
2895 	if (GET_OPCODE(op) == SLJIT_ROTR) {
2896 		if (src2 != SLJIT_IMM) {
2897 			ins = (op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */;
2898 			FAIL_IF(push_inst(compiler, ins | R4A(tmp1) | R0A(base_r)));
2899 			base_r = tmp1;
2900 		} else
2901 			src2w = -src2w;
2902 	}
2903 
2904 	if (src2 == SLJIT_IMM)
2905 		imm = (sljit_ins)(src2w & ((op & SLJIT_32) ? 0x1f : 0x3f));
2906 
2907 	ins = (op & SLJIT_32) ? 0xeb000000001d /* rll */ : 0xeb000000001c /* rllg */;
2908 	return push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | R28A(base_r) | (imm << 16));
2909 }
2910 
2911 static const struct ins_forms addc_forms = {
2912 	0xb9980000, /* alcr */
2913 	0xb9880000, /* alcgr */
2914 	0,
2915 	0,
2916 	0,
2917 	0xe30000000098, /* alc */
2918 	0xe30000000088, /* alcg */
2919 };
2920 
2921 static const struct ins_forms subc_forms = {
2922 	0xb9990000, /* slbr */
2923 	0xb9890000, /* slbgr */
2924 	0,
2925 	0,
2926 	0,
2927 	0xe30000000099, /* slb */
2928 	0xe30000000089, /* slbg */
2929 };
2930 
sljit_emit_op2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2931 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2932 	sljit_s32 dst, sljit_sw dstw,
2933 	sljit_s32 src1, sljit_sw src1w,
2934 	sljit_s32 src2, sljit_sw src2w)
2935 {
2936 	CHECK_ERROR();
2937 	CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
2938 	ADJUST_LOCAL_OFFSET(dst, dstw);
2939 	ADJUST_LOCAL_OFFSET(src1, src1w);
2940 	ADJUST_LOCAL_OFFSET(src2, src2w);
2941 
2942 	compiler->mode = op & SLJIT_32;
2943 	compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z);
2944 
2945 	if (is_commutative(op) && src1 == SLJIT_IMM && src2 != SLJIT_IMM) {
2946 		src1 ^= src2;
2947 		src2 ^= src1;
2948 		src1 ^= src2;
2949 
2950 		src1w ^= src2w;
2951 		src2w ^= src1w;
2952 		src1w ^= src2w;
2953 	}
2954 
2955 	switch (GET_OPCODE(op)) {
2956 	case SLJIT_ADD:
2957 		compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD;
2958 		return sljit_emit_add(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2959 	case SLJIT_ADDC:
2960 		compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD;
2961 		FAIL_IF(emit_commutative(compiler, &addc_forms, dst, src1, src1w, src2, src2w));
2962 		if (dst & SLJIT_MEM)
2963 			return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2964 		return SLJIT_SUCCESS;
2965 	case SLJIT_SUB:
2966 		compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_SUB;
2967 		return sljit_emit_sub(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2968 	case SLJIT_SUBC:
2969 		compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_SUB;
2970 		FAIL_IF(emit_non_commutative(compiler, &subc_forms, dst, src1, src1w, src2, src2w));
2971 		if (dst & SLJIT_MEM)
2972 			return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2973 		return SLJIT_SUCCESS;
2974 	case SLJIT_MUL:
2975 		FAIL_IF(sljit_emit_multiply(compiler, op, dst, src1, src1w, src2, src2w));
2976 		break;
2977 	case SLJIT_AND:
2978 	case SLJIT_OR:
2979 	case SLJIT_XOR:
2980 		FAIL_IF(sljit_emit_bitwise(compiler, op, dst, src1, src1w, src2, src2w));
2981 		break;
2982 	case SLJIT_SHL:
2983 	case SLJIT_MSHL:
2984 	case SLJIT_LSHR:
2985 	case SLJIT_MLSHR:
2986 	case SLJIT_ASHR:
2987 	case SLJIT_MASHR:
2988 		FAIL_IF(sljit_emit_shift(compiler, op, dst, src1, src1w, src2, src2w));
2989 		break;
2990 	case SLJIT_ROTL:
2991 	case SLJIT_ROTR:
2992 		FAIL_IF(sljit_emit_rotate(compiler, op, dst, src1, src1w, src2, src2w));
2993 		break;
2994 	}
2995 
2996 	if (dst & SLJIT_MEM)
2997 		return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2998 	return SLJIT_SUCCESS;
2999 }
3000 
sljit_emit_op2u(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)3001 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
3002 	sljit_s32 src1, sljit_sw src1w,
3003 	sljit_s32 src2, sljit_sw src2w)
3004 {
3005 	CHECK_ERROR();
3006 	CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
3007 
3008 	SLJIT_SKIP_CHECKS(compiler);
3009 	return sljit_emit_op2(compiler, op, (sljit_s32)tmp0, 0, src1, src1w, src2, src2w);
3010 }
3011 
sljit_emit_shift_into(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 src1_reg,sljit_s32 src2_reg,sljit_s32 src3,sljit_sw src3w)3012 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
3013 	sljit_s32 dst_reg,
3014 	sljit_s32 src1_reg,
3015 	sljit_s32 src2_reg,
3016 	sljit_s32 src3, sljit_sw src3w)
3017 {
3018 	sljit_s32 is_right;
3019 	sljit_sw bit_length = (op & SLJIT_32) ? 32 : 64;
3020 	sljit_gpr dst_r = gpr(dst_reg);
3021 	sljit_gpr src1_r = gpr(src1_reg);
3022 	sljit_gpr src2_r = gpr(src2_reg);
3023 	sljit_gpr src3_r = tmp1;
3024 	sljit_ins ins;
3025 
3026 	CHECK_ERROR();
3027 	CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
3028 
3029 	is_right = (GET_OPCODE(op) == SLJIT_LSHR || GET_OPCODE(op) == SLJIT_MLSHR);
3030 
3031 	if (src1_reg == src2_reg) {
3032 		SLJIT_SKIP_CHECKS(compiler);
3033 		return sljit_emit_op2(compiler, (is_right ? SLJIT_ROTR : SLJIT_ROTL) | (op & SLJIT_32), dst_reg, 0, src1_reg, 0, src3, src3w);
3034 	}
3035 
3036 	ADJUST_LOCAL_OFFSET(src3, src3w);
3037 
3038 	if (src3 == SLJIT_IMM) {
3039 		src3w &= bit_length - 1;
3040 
3041 		if (src3w == 0)
3042 			return SLJIT_SUCCESS;
3043 
3044 		if (op & SLJIT_32) {
3045 			if (dst_r == src1_r) {
3046 				ins = is_right ? 0x88000000 /* srl */ : 0x89000000 /* sll */;
3047 				FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | (sljit_ins)src3w));
3048 			} else {
3049 				ins = is_right ? 0xeb00000000de /* srlk */ : 0xeb00000000df /* sllk */;
3050 				FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | ((sljit_ins)src3w << 16)));
3051 			}
3052 		} else {
3053 			ins = is_right ? 0xeb000000000c /* srlg */ : 0xeb000000000d /* sllg */;
3054 			FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | ((sljit_ins)src3w << 16)));
3055 		}
3056 
3057 		ins = 0xec0000000055 /* risbg */;
3058 
3059 		if (is_right) {
3060 			src3w = bit_length - src3w;
3061 			ins |= ((sljit_ins)(64 - bit_length) << 24) | ((sljit_ins)(63 - src3w) << 16) | ((sljit_ins)src3w << 8);
3062 		} else
3063 			ins |= ((sljit_ins)(64 - src3w) << 24) | ((sljit_ins)63 << 16) | ((sljit_ins)(src3w + 64 - bit_length) << 8);
3064 
3065 		return push_inst(compiler, ins | R36A(dst_r) | R32A(src2_r));
3066 	}
3067 
3068 	if (!(src3 & SLJIT_MEM)) {
3069 		src3_r = gpr(src3);
3070 
3071 		if (dst_r == src3_r) {
3072 			FAIL_IF(push_inst(compiler, 0x1800 /* lr */ | R4A(tmp1) | R0A(src3_r)));
3073 			src3_r = tmp1;
3074 		}
3075 	} else
3076 		FAIL_IF(load_word(compiler, tmp1, src3, src3w, op & SLJIT_32));
3077 
3078 	if (op & SLJIT_32) {
3079 		if (GET_OPCODE(op) == SLJIT_MSHL || GET_OPCODE(op) == SLJIT_MLSHR) {
3080 			if (src3_r != tmp1) {
3081 				FAIL_IF(push_inst(compiler, 0xec0000000055 /* risbg */ | R36A(tmp1) | R32A(src3_r) | (59 << 24) | (1 << 23) | (63 << 16)));
3082 				src3_r = tmp1;
3083 			} else
3084 				FAIL_IF(push_inst(compiler, 0xa5070000 /* nill */ | R20A(tmp1) | 0x1f));
3085 		}
3086 
3087 		if (dst_r == src1_r) {
3088 			ins = is_right ? 0x88000000 /* srl */ : 0x89000000 /* sll */;
3089 			FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | R12A(src3_r)));
3090 		} else {
3091 			ins = is_right ? 0xeb00000000de /* srlk */ : 0xeb00000000df /* sllk */;
3092 			FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | R28A(src3_r)));
3093 		}
3094 
3095 		if (src3_r != tmp1) {
3096 			FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | 0x1f));
3097 			FAIL_IF(push_inst(compiler, 0x1700 /* xr */ | R4A(tmp1) | R0A(src3_r)));
3098 		} else
3099 			FAIL_IF(push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(tmp1) | 0x1f));
3100 
3101 		ins = is_right ? 0xeb00000000df /* sllk */ : 0xeb00000000de /* srlk */;
3102 		FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src2_r) | R28A(tmp1) | (0x1 << 16)));
3103 
3104 		return push_inst(compiler, 0x1600 /* or */ | R4A(dst_r) | R0A(tmp0));
3105 	}
3106 
3107 	ins = is_right ? 0xeb000000000c /* srlg */ : 0xeb000000000d /* sllg */;
3108 	FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | R28A(src3_r)));
3109 
3110 	ins = is_right ? 0xeb000000000d /* sllg */ : 0xeb000000000c /* srlg */;
3111 
3112 	if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) {
3113 		if (src3_r != tmp1)
3114 			FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | 0x3f));
3115 
3116 		FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src2_r) | (0x1 << 16)));
3117 		src2_r = tmp0;
3118 
3119 		if (src3_r != tmp1)
3120 			FAIL_IF(push_inst(compiler, 0xb9820000 /* xgr */ | R4A(tmp1) | R0A(src3_r)));
3121 		else
3122 			FAIL_IF(push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(tmp1) | 0x3f));
3123 	} else
3124 		FAIL_IF(push_inst(compiler, 0xb9030000 /* lcgr */ | R4A(tmp1) | R0A(src3_r)));
3125 
3126 	FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src2_r) | R28A(tmp1)));
3127 	return push_inst(compiler, 0xb9810000 /* ogr */ | R4A(dst_r) | R0A(tmp0));
3128 }
3129 
sljit_emit_op_src(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)3130 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
3131 	sljit_s32 src, sljit_sw srcw)
3132 {
3133 	sljit_gpr src_r;
3134 	struct addr addr;
3135 
3136 	CHECK_ERROR();
3137 	CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
3138 	ADJUST_LOCAL_OFFSET(src, srcw);
3139 
3140 	switch (op) {
3141 	case SLJIT_FAST_RETURN:
3142 		src_r = FAST_IS_REG(src) ? gpr(src) : tmp1;
3143 		if (src & SLJIT_MEM)
3144 			FAIL_IF(load_word(compiler, tmp1, src, srcw, 0));
3145 
3146 		return push_inst(compiler, br(src_r));
3147 	case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
3148 		return SLJIT_SUCCESS;
3149 	case SLJIT_PREFETCH_L1:
3150 	case SLJIT_PREFETCH_L2:
3151 	case SLJIT_PREFETCH_L3:
3152 	case SLJIT_PREFETCH_ONCE:
3153 		FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
3154 		return push_inst(compiler, 0xe31000000036 /* pfd */ | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
3155 	default:
3156 		return SLJIT_SUCCESS;
3157 	}
3158 
3159 	return SLJIT_SUCCESS;
3160 }
3161 
sljit_emit_op_dst(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw)3162 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
3163 	sljit_s32 dst, sljit_sw dstw)
3164 {
3165 	sljit_gpr dst_r = link_r;
3166 	sljit_s32 size;
3167 
3168 	CHECK_ERROR();
3169 	CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
3170 	ADJUST_LOCAL_OFFSET(dst, dstw);
3171 
3172 	switch (op) {
3173 	case SLJIT_FAST_ENTER:
3174 		if (FAST_IS_REG(dst))
3175 			return push_inst(compiler, lgr(gpr(dst), link_r));
3176 		break;
3177 	case SLJIT_GET_RETURN_ADDRESS:
3178 		dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
3179 
3180 		size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds - SLJIT_KEPT_SAVEDS_COUNT(compiler->options), 2);
3181 		FAIL_IF(load_word(compiler, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size + size, 0));
3182 		break;
3183 	}
3184 
3185 	if (dst & SLJIT_MEM)
3186 		return store_word(compiler, dst_r, dst, dstw, 0);
3187 
3188 	return SLJIT_SUCCESS;
3189 }
3190 
sljit_get_register_index(sljit_s32 type,sljit_s32 reg)3191 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
3192 {
3193 	CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
3194 
3195 	if (type == SLJIT_GP_REGISTER)
3196 		return (sljit_s32)gpr(reg);
3197 
3198 	if (type != SLJIT_FLOAT_REGISTER)
3199 		return -1;
3200 
3201 	return (sljit_s32)freg_map[reg];
3202 }
3203 
sljit_emit_op_custom(struct sljit_compiler * compiler,void * instruction,sljit_u32 size)3204 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
3205 	void *instruction, sljit_u32 size)
3206 {
3207 	sljit_ins ins = 0;
3208 
3209 	CHECK_ERROR();
3210 	CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
3211 
3212 	memcpy((sljit_u8 *)&ins + sizeof(ins) - size, instruction, size);
3213 	return push_inst(compiler, ins);
3214 }
3215 
3216 /* --------------------------------------------------------------------- */
3217 /*  Floating point operators                                             */
3218 /* --------------------------------------------------------------------- */
3219 
3220 #define FLOAT_LOAD 0
3221 #define FLOAT_STORE 1
3222 
float_mem(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)3223 static sljit_s32 float_mem(struct sljit_compiler *compiler, sljit_s32 op,
3224 	sljit_s32 reg,
3225 	sljit_s32 mem, sljit_sw memw)
3226 {
3227 	struct addr addr;
3228 	sljit_ins ins;
3229 
3230 	SLJIT_ASSERT(mem & SLJIT_MEM);
3231 
3232 	if ((mem & OFFS_REG_MASK) || is_u12(memw) || !is_s20(memw)) {
3233 		FAIL_IF(make_addr_bx(compiler, &addr, mem, memw, tmp1));
3234 
3235 		if (op & FLOAT_STORE)
3236 			ins = (op & SLJIT_32) ? 0x70000000 /* ste */ : 0x60000000 /* std */;
3237 		else
3238 			ins = (op & SLJIT_32) ? 0x78000000 /* le */ : 0x68000000 /* ld */;
3239 
3240 		return push_inst(compiler, ins | F20(reg) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset);
3241 	}
3242 
3243 	FAIL_IF(make_addr_bxy(compiler, &addr, mem, memw, tmp1));
3244 
3245 	if (op & FLOAT_STORE)
3246 		ins = (op & SLJIT_32) ? 0xed0000000066 /* stey */ : 0xed0000000067 /* stdy */;
3247 	else
3248 		ins = (op & SLJIT_32) ? 0xed0000000064 /* ley */ : 0xed0000000065 /* ldy */;
3249 
3250 	return push_inst(compiler, ins | F36(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
3251 }
3252 
emit_float(struct sljit_compiler * compiler,sljit_ins ins_r,sljit_ins ins,sljit_s32 reg,sljit_s32 src,sljit_sw srcw)3253 static sljit_s32 emit_float(struct sljit_compiler *compiler, sljit_ins ins_r, sljit_ins ins,
3254 	sljit_s32 reg,
3255 	sljit_s32 src, sljit_sw srcw)
3256 {
3257 	struct addr addr;
3258 
3259 	if (!(src & SLJIT_MEM))
3260 		return push_inst(compiler, ins_r | F4(reg) | F0(src));
3261 
3262 	FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));
3263 	return push_inst(compiler, ins | F36(reg) | R32A(addr.index) | R28A(addr.base) | ((sljit_ins)addr.offset << 16));
3264 }
3265 
sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)3266 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
3267 	sljit_s32 dst, sljit_sw dstw,
3268 	sljit_s32 src, sljit_sw srcw)
3269 {
3270 	sljit_ins dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
3271 	sljit_ins ins;
3272 
3273 	if (src & SLJIT_MEM) {
3274 		FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src, srcw));
3275 		src = TMP_FREG1;
3276 	}
3277 
3278 	/* M3 is set to 5 */
3279 	if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)
3280 		ins = (op & SLJIT_32) ? 0xb3a85000 /* cgebr */ : 0xb3a95000 /* cgdbr */;
3281 	else
3282 		ins = (op & SLJIT_32) ? 0xb3985000 /* cfebr */ : 0xb3995000 /* cfdbr */;
3283 
3284 	FAIL_IF(push_inst(compiler, ins | R4A(dst_r) | F0(src)));
3285 
3286 	if (dst & SLJIT_MEM)
3287 		return store_word(compiler, dst_r, dst, dstw, GET_OPCODE(op) >= SLJIT_CONV_S32_FROM_F64);
3288 
3289 	return SLJIT_SUCCESS;
3290 }
3291 
sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)3292 static sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_ins ins,
3293 	sljit_s32 dst, sljit_sw dstw,
3294 	sljit_s32 src, sljit_sw srcw)
3295 {
3296 	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
3297 
3298 	if (src == SLJIT_IMM) {
3299 		FAIL_IF(push_load_imm_inst(compiler, tmp0, srcw));
3300 		src = (sljit_s32)tmp0;
3301 	}
3302 	else if (src & SLJIT_MEM) {
3303 		FAIL_IF(load_word(compiler, tmp0, src, srcw, ins & 0x100000));
3304 		src = (sljit_s32)tmp0;
3305 	}
3306 
3307 	FAIL_IF(push_inst(compiler, ins | F4(dst_r) | R0(src)));
3308 
3309 	if (dst & SLJIT_MEM)
3310 		return float_mem(compiler, FLOAT_STORE | ((ins & 0x10000) ? 0 : SLJIT_32), TMP_FREG1, dst, dstw);
3311 
3312 	return SLJIT_SUCCESS;
3313 }
3314 
sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)3315 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
3316 	sljit_s32 dst, sljit_sw dstw,
3317 	sljit_s32 src, sljit_sw srcw)
3318 {
3319 	sljit_ins ins;
3320 
3321 	if (src == SLJIT_IMM && GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
3322 		srcw = (sljit_s32)srcw;
3323 
3324 	if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
3325 		ins = (op & SLJIT_32) ? 0xb3a40000 /* cegbr */ : 0xb3a50000 /* cdgbr */;
3326 	else
3327 		ins = (op & SLJIT_32) ? 0xb3940000 /* cefbr */ : 0xb3950000 /* cdfbr */;
3328 
3329 	return sljit_emit_fop1_conv_f64_from_w(compiler, ins, dst, dstw, src, srcw);
3330 }
3331 
sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)3332 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
3333 	sljit_s32 dst, sljit_sw dstw,
3334 	sljit_s32 src, sljit_sw srcw)
3335 {
3336 	sljit_ins ins;
3337 
3338 	if (src == SLJIT_IMM && GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32)
3339 		srcw = (sljit_u32)srcw;
3340 
3341 	if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_UW)
3342 		ins = (op & SLJIT_32) ? 0xb3a00000 /* celgbr */ : 0xb3a10000 /* cdlgbr */;
3343 	else
3344 		ins = (op & SLJIT_32) ? 0xb3900000 /* celfbr */ : 0xb3910000 /* cdlfbr */;
3345 
3346 	return sljit_emit_fop1_conv_f64_from_w(compiler, ins, dst, dstw, src, srcw);
3347 }
3348 
sljit_emit_fop1_cmp(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)3349 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
3350 	sljit_s32 src1, sljit_sw src1w,
3351 	sljit_s32 src2, sljit_sw src2w)
3352 {
3353 	sljit_ins ins_r, ins;
3354 
3355 	if (src1 & SLJIT_MEM) {
3356 		FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src1, src1w));
3357 		src1 = TMP_FREG1;
3358 	}
3359 
3360 	if (op & SLJIT_32) {
3361 		ins_r = 0xb3090000 /* cebr */;
3362 		ins = 0xed0000000009 /* ceb */;
3363 	} else {
3364 		ins_r = 0xb3190000 /* cdbr */;
3365 		ins = 0xed0000000019 /* cdb */;
3366 	}
3367 
3368 	return emit_float(compiler, ins_r, ins, src1, src2, src2w);
3369 }
3370 
sljit_emit_fop1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)3371 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
3372 	sljit_s32 dst, sljit_sw dstw,
3373 	sljit_s32 src, sljit_sw srcw)
3374 {
3375 	sljit_s32 dst_r;
3376 	sljit_ins ins;
3377 
3378 	CHECK_ERROR();
3379 
3380 	SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
3381 
3382 	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
3383 
3384 	if (op == SLJIT_CONV_F64_FROM_F32)
3385 		FAIL_IF(emit_float(compiler, 0xb3040000 /* ldebr */, 0xed0000000004 /* ldeb */, dst_r, src, srcw));
3386 	else {
3387 		if (src & SLJIT_MEM) {
3388 			FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op == SLJIT_CONV_F32_FROM_F64 ? 0 : (op & SLJIT_32)), dst_r, src, srcw));
3389 			src = dst_r;
3390 		}
3391 
3392 		switch (GET_OPCODE(op)) {
3393 		case SLJIT_MOV_F64:
3394 			if (FAST_IS_REG(dst)) {
3395 				if (dst == src)
3396 					return SLJIT_SUCCESS;
3397 
3398 				ins = (op & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */;
3399 				break;
3400 			}
3401 			return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), src, dst, dstw);
3402 		case SLJIT_CONV_F64_FROM_F32:
3403 			/* Only SLJIT_CONV_F32_FROM_F64. */
3404 			ins = 0xb3440000 /* ledbr */;
3405 			break;
3406 		case SLJIT_NEG_F64:
3407 			ins = (op & SLJIT_32) ? 0xb3030000 /* lcebr */ : 0xb3130000 /* lcdbr */;
3408 			break;
3409 		default:
3410 			SLJIT_ASSERT(GET_OPCODE(op) == SLJIT_ABS_F64);
3411 			ins = (op & SLJIT_32) ? 0xb3000000 /* lpebr */ : 0xb3100000 /* lpdbr */;
3412 			break;
3413 		}
3414 
3415 		FAIL_IF(push_inst(compiler, ins | F4(dst_r) | F0(src)));
3416 	}
3417 
3418 	if (!(dst & SLJIT_MEM))
3419 		return SLJIT_SUCCESS;
3420 
3421 	SLJIT_ASSERT(dst_r == TMP_FREG1);
3422 
3423 	return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw);
3424 }
3425 
3426 #define FLOAT_MOV(op, dst_r, src_r) \
3427 	(((op & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */) | F4(dst_r) | F0(src_r))
3428 
sljit_emit_fop2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)3429 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
3430 	sljit_s32 dst, sljit_sw dstw,
3431 	sljit_s32 src1, sljit_sw src1w,
3432 	sljit_s32 src2, sljit_sw src2w)
3433 {
3434 	sljit_s32 dst_r = TMP_FREG1;
3435 	sljit_ins ins_r, ins;
3436 
3437 	CHECK_ERROR();
3438 	CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
3439 	ADJUST_LOCAL_OFFSET(dst, dstw);
3440 	ADJUST_LOCAL_OFFSET(src1, src1w);
3441 	ADJUST_LOCAL_OFFSET(src2, src2w);
3442 
3443 	do {
3444 		if (FAST_IS_REG(dst)) {
3445 			dst_r = dst;
3446 
3447 			if (dst == src1)
3448 				break;
3449 
3450 			if (dst == src2) {
3451 				if (GET_OPCODE(op) == SLJIT_ADD_F64 || GET_OPCODE(op) == SLJIT_MUL_F64) {
3452 					src2 = src1;
3453 					src2w = src1w;
3454 					src1 = dst;
3455 					break;
3456 				}
3457 
3458 				FAIL_IF(push_inst(compiler, FLOAT_MOV(op, TMP_FREG1, src2)));
3459 				src2 = TMP_FREG1;
3460 			}
3461 		}
3462 
3463 		if (src1 & SLJIT_MEM)
3464 			FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), dst_r, src1, src1w));
3465 		else
3466 			FAIL_IF(push_inst(compiler, FLOAT_MOV(op, dst_r, src1)));
3467 	} while (0);
3468 
3469 	switch (GET_OPCODE(op)) {
3470 	case SLJIT_ADD_F64:
3471 		ins_r = (op & SLJIT_32) ? 0xb30a0000 /* aebr */ : 0xb31a0000 /* adbr */;
3472 		ins = (op & SLJIT_32) ? 0xed000000000a /* aeb */ : 0xed000000001a /* adb */;
3473 		break;
3474 	case SLJIT_SUB_F64:
3475 		ins_r = (op & SLJIT_32) ? 0xb30b0000 /* sebr */ : 0xb31b0000 /* sdbr */;
3476 		ins = (op & SLJIT_32) ? 0xed000000000b /* seb */ : 0xed000000001b /* sdb */;
3477 		break;
3478 	case SLJIT_MUL_F64:
3479 		ins_r = (op & SLJIT_32) ? 0xb3170000 /* meebr */ : 0xb31c0000 /* mdbr */;
3480 		ins = (op & SLJIT_32) ? 0xed0000000017 /* meeb */ : 0xed000000001c /* mdb */;
3481 		break;
3482 	default:
3483 		SLJIT_ASSERT(GET_OPCODE(op) == SLJIT_DIV_F64);
3484 		ins_r = (op & SLJIT_32) ? 0xb30d0000 /* debr */ : 0xb31d0000 /* ddbr */;
3485 		ins = (op & SLJIT_32) ? 0xed000000000d /* deb */ : 0xed000000001d /* ddb */;
3486 		break;
3487 	}
3488 
3489 	FAIL_IF(emit_float(compiler, ins_r, ins, dst_r, src2, src2w));
3490 
3491 	if (dst & SLJIT_MEM)
3492 		return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw);
3493 
3494 	SLJIT_ASSERT(dst_r != TMP_FREG1);
3495 	return SLJIT_SUCCESS;
3496 }
3497 
sljit_emit_fop2r(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_freg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)3498 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op,
3499 	sljit_s32 dst_freg,
3500 	sljit_s32 src1, sljit_sw src1w,
3501 	sljit_s32 src2, sljit_sw src2w)
3502 {
3503 	sljit_s32 reg;
3504 
3505 	CHECK_ERROR();
3506 	CHECK(check_sljit_emit_fop2r(compiler, op, dst_freg, src1, src1w, src2, src2w));
3507 	ADJUST_LOCAL_OFFSET(src1, src1w);
3508 	ADJUST_LOCAL_OFFSET(src2, src2w);
3509 
3510 	if (src2 & SLJIT_MEM) {
3511 		FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src2, src2w));
3512 		src2 = TMP_FREG1;
3513 	}
3514 
3515 	if (src1 & SLJIT_MEM) {
3516 		reg = (dst_freg == src2) ? TMP_FREG1 : dst_freg;
3517 		FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), reg, src1, src1w));
3518 		src1 = reg;
3519 	}
3520 
3521 	return push_inst(compiler, 0xb3720000 /* cpsdr */ | F12(src2) | F4(dst_freg) | F0(src1));
3522 }
3523 
sljit_emit_fset32(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f32 value)3524 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
3525 	sljit_s32 freg, sljit_f32 value)
3526 {
3527 	union {
3528 		sljit_s32 imm;
3529 		sljit_f32 value;
3530 	} u;
3531 
3532 	CHECK_ERROR();
3533 	CHECK(check_sljit_emit_fset32(compiler, freg, value));
3534 
3535 	u.value = value;
3536 
3537 	FAIL_IF(push_load_imm_inst(compiler, tmp1, (sljit_sw)(((sljit_uw)u.imm << 32))));
3538 	return push_inst(compiler, 0xb3c10000 /* ldgr */ | F4(freg) | R0A(tmp1));
3539 }
3540 
sljit_emit_fset64(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f64 value)3541 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
3542 	sljit_s32 freg, sljit_f64 value)
3543 {
3544 	union {
3545 		sljit_sw imm;
3546 		sljit_f64 value;
3547 	} u;
3548 
3549 	CHECK_ERROR();
3550 	CHECK(check_sljit_emit_fset64(compiler, freg, value));
3551 
3552 	u.value = value;
3553 
3554 	FAIL_IF(push_load_imm_inst(compiler, tmp1, (sljit_sw)u.imm));
3555 	return push_inst(compiler, 0xb3c10000 /* ldgr */ | F4(freg) | R0A(tmp1));
3556 }
3557 
sljit_emit_fcopy(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 freg,sljit_s32 reg)3558 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
3559 	sljit_s32 freg, sljit_s32 reg)
3560 {
3561 	sljit_gpr gen_r;
3562 
3563 	CHECK_ERROR();
3564 	CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
3565 
3566 	gen_r = gpr(reg);
3567 
3568 	if (GET_OPCODE(op) == SLJIT_COPY_TO_F64) {
3569 		if (op & SLJIT_32) {
3570 			FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp0) | R32A(gen_r) | (32 << 16)));
3571 			gen_r = tmp0;
3572 		}
3573 
3574 		return push_inst(compiler, 0xb3c10000 /* ldgr */ | F4(freg) | R0A(gen_r));
3575 	}
3576 
3577 	FAIL_IF(push_inst(compiler, 0xb3cd0000 /* lgdr */ | R4A(gen_r) | F0(freg)));
3578 
3579 	if (!(op & SLJIT_32))
3580 		return SLJIT_SUCCESS;
3581 
3582 	return push_inst(compiler, 0xeb000000000c /* srlg */ | R36A(gen_r) | R32A(gen_r) | (32 << 16));
3583 }
3584 
3585 /* --------------------------------------------------------------------- */
3586 /*  Conditional instructions                                             */
3587 /* --------------------------------------------------------------------- */
3588 
sljit_emit_label(struct sljit_compiler * compiler)3589 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
3590 {
3591 	struct sljit_label *label;
3592 
3593 	CHECK_ERROR_PTR();
3594 	CHECK_PTR(check_sljit_emit_label(compiler));
3595 
3596 	if (compiler->last_label && compiler->last_label->size == compiler->size)
3597 		return compiler->last_label;
3598 
3599 	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
3600 	PTR_FAIL_IF(!label);
3601 	set_label(label, compiler);
3602 	return label;
3603 }
3604 
sljit_emit_jump(struct sljit_compiler * compiler,sljit_s32 type)3605 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
3606 {
3607 	struct sljit_jump *jump;
3608 	sljit_u8 mask = ((type & 0xff) < SLJIT_JUMP) ? get_cc(compiler, type & 0xff) : 0xf;
3609 
3610 	CHECK_ERROR_PTR();
3611 	CHECK_PTR(check_sljit_emit_jump(compiler, type));
3612 
3613 	/* record jump */
3614 	jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump));
3615 	PTR_FAIL_IF(!jump);
3616 	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
3617 	jump->addr = compiler->size;
3618 
3619 	/* emit jump instruction */
3620 	type &= 0xff;
3621 	if (type >= SLJIT_FAST_CALL)
3622 		PTR_FAIL_IF(push_inst(compiler, brasl(link_r, 0)));
3623 	else
3624 		PTR_FAIL_IF(push_inst(compiler, brcl(mask, 0)));
3625 
3626 	return jump;
3627 }
3628 
sljit_emit_call(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types)3629 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
3630 	sljit_s32 arg_types)
3631 {
3632 	SLJIT_UNUSED_ARG(arg_types);
3633 	CHECK_ERROR_PTR();
3634 	CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
3635 
3636 	if (type & SLJIT_CALL_RETURN) {
3637 		PTR_FAIL_IF(emit_stack_frame_release(compiler, r14));
3638 		type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
3639 	}
3640 
3641 	SLJIT_SKIP_CHECKS(compiler);
3642 	return sljit_emit_jump(compiler, type);
3643 }
3644 
sljit_emit_ijump(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 src,sljit_sw srcw)3645 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
3646 {
3647 	sljit_gpr src_r = FAST_IS_REG(src) ? gpr(src) : tmp1;
3648 
3649 	CHECK_ERROR();
3650 	CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
3651 
3652 	if (src == SLJIT_IMM) {
3653 		SLJIT_ASSERT(!(srcw & 1)); /* target address must be even */
3654 		FAIL_IF(push_load_imm_inst(compiler, src_r, srcw));
3655 	}
3656 	else if (src & SLJIT_MEM) {
3657 		ADJUST_LOCAL_OFFSET(src, srcw);
3658 		FAIL_IF(load_word(compiler, src_r, src, srcw, 0 /* 64-bit */));
3659 	}
3660 
3661 	/* emit jump instruction */
3662 	if (type >= SLJIT_FAST_CALL)
3663 		return push_inst(compiler, basr(link_r, src_r));
3664 
3665 	return push_inst(compiler, br(src_r));
3666 }
3667 
sljit_emit_icall(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)3668 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
3669 	sljit_s32 arg_types,
3670 	sljit_s32 src, sljit_sw srcw)
3671 {
3672 	SLJIT_UNUSED_ARG(arg_types);
3673 
3674 	CHECK_ERROR();
3675 	CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
3676 
3677 	SLJIT_ASSERT(gpr(TMP_REG2) == tmp1);
3678 
3679 	if (src & SLJIT_MEM) {
3680 		ADJUST_LOCAL_OFFSET(src, srcw);
3681 		FAIL_IF(load_word(compiler, tmp1, src, srcw, 0 /* 64-bit */));
3682 		src = TMP_REG2;
3683 		srcw = 0;
3684 	}
3685 
3686 	if (type & SLJIT_CALL_RETURN) {
3687 		if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
3688 			FAIL_IF(push_inst(compiler, lgr(tmp1, gpr(src))));
3689 			src = TMP_REG2;
3690 			srcw = 0;
3691 		}
3692 
3693 		FAIL_IF(emit_stack_frame_release(compiler, r14));
3694 		type = SLJIT_JUMP;
3695 	}
3696 
3697 	SLJIT_SKIP_CHECKS(compiler);
3698 	return sljit_emit_ijump(compiler, type, src, srcw);
3699 }
3700 
sljit_emit_op_flags(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 type)3701 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
3702 	sljit_s32 dst, sljit_sw dstw,
3703 	sljit_s32 type)
3704 {
3705 	sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
3706 	sljit_gpr loc_r = tmp1;
3707 	sljit_u8 mask = get_cc(compiler, type);
3708 
3709 	CHECK_ERROR();
3710 	CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
3711 
3712 	switch (GET_OPCODE(op)) {
3713 	case SLJIT_AND:
3714 	case SLJIT_OR:
3715 	case SLJIT_XOR:
3716 		compiler->status_flags_state = op & SLJIT_SET_Z;
3717 
3718 		/* dst is also source operand */
3719 		if (dst & SLJIT_MEM)
3720 			FAIL_IF(load_word(compiler, dst_r, dst, dstw, op & SLJIT_32));
3721 
3722 		break;
3723 	case SLJIT_MOV32:
3724 		op |= SLJIT_32;
3725 		/* fallthrough */
3726 	case SLJIT_MOV:
3727 		/* can write straight into destination */
3728 		loc_r = dst_r;
3729 		break;
3730 	default:
3731 		SLJIT_UNREACHABLE();
3732 	}
3733 
3734 	/* TODO(mundaym): fold into cmov helper function? */
3735 	#define LEVAL(i) i(loc_r, 1, mask)
3736 	if (have_lscond2()) {
3737 		FAIL_IF(push_load_imm_inst(compiler, loc_r, 0));
3738 		FAIL_IF(push_inst(compiler,
3739 			WHEN2(op & SLJIT_32, lochi, locghi)));
3740 	} else {
3741 		/* TODO(mundaym): no load/store-on-condition 2 facility (ipm? branch-and-set?) */
3742 		abort();
3743 	}
3744 	#undef LEVAL
3745 
3746 	/* apply bitwise op and set condition codes */
3747 	switch (GET_OPCODE(op)) {
3748 	#define LEVAL(i) i(dst_r, loc_r)
3749 	case SLJIT_AND:
3750 		FAIL_IF(push_inst(compiler,
3751 			WHEN2(op & SLJIT_32, nr, ngr)));
3752 		break;
3753 	case SLJIT_OR:
3754 		FAIL_IF(push_inst(compiler,
3755 			WHEN2(op & SLJIT_32, or, ogr)));
3756 		break;
3757 	case SLJIT_XOR:
3758 		FAIL_IF(push_inst(compiler,
3759 			WHEN2(op & SLJIT_32, xr, xgr)));
3760 		break;
3761 	#undef LEVAL
3762 	}
3763 
3764 	/* store result to memory if required */
3765 	if (dst & SLJIT_MEM)
3766 		return store_word(compiler, dst_r, dst, dstw, (op & SLJIT_32));
3767 
3768 	return SLJIT_SUCCESS;
3769 }
3770 
sljit_emit_select(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_reg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2_reg)3771 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
3772 	sljit_s32 dst_reg,
3773 	sljit_s32 src1, sljit_sw src1w,
3774 	sljit_s32 src2_reg)
3775 {
3776 	sljit_ins mask;
3777 	sljit_gpr src_r;
3778 	sljit_gpr dst_r = gpr(dst_reg);
3779 	sljit_ins ins;
3780 
3781 	CHECK_ERROR();
3782 	CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
3783 
3784 	ADJUST_LOCAL_OFFSET(src1, src1w);
3785 
3786 	if (dst_reg != src2_reg) {
3787 		if (src1 == dst_reg) {
3788 			src1 = src2_reg;
3789 			src1w = 0;
3790 			type ^= 0x1;
3791 		} else {
3792 			if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) {
3793 				FAIL_IF(load_word(compiler, dst_r, src1, src1w, type & SLJIT_32));
3794 				src1 = src2_reg;
3795 				src1w = 0;
3796 				type ^= 0x1;
3797 			} else
3798 				FAIL_IF(push_inst(compiler, ((type & SLJIT_32) ? 0x1800 /* lr */ : 0xb9040000 /* lgr */) | R4A(dst_r) | R0A(gpr(src2_reg))));
3799 		}
3800 	}
3801 
3802 	mask = get_cc(compiler, type & ~SLJIT_32);
3803 
3804 	if (src1 & SLJIT_MEM) {
3805 		if (src1 & OFFS_REG_MASK) {
3806 			src_r = gpr(OFFS_REG(src1));
3807 
3808 			if (src1w != 0) {
3809 				FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp1) | R32A(src_r) | ((sljit_ins)(src1w & 0x3) << 16)));
3810 				src_r = tmp1;
3811 			}
3812 
3813 			FAIL_IF(push_inst(compiler, 0xb9e80000 /* agrk */ | R12A(src_r) | R4A(tmp1) | R0A(gpr(src1 & REG_MASK))));
3814 			src_r = tmp1;
3815 			src1w = 0;
3816 		} else if (!is_s20(src1w)) {
3817 			FAIL_IF(push_load_imm_inst(compiler, tmp1, src1w));
3818 
3819 			if (src1 & REG_MASK)
3820 				FAIL_IF(push_inst(compiler, 0xb9e80000 /* agrk */ | R12A(tmp1) | R4A(tmp1) | R0A(gpr(src1 & REG_MASK))));
3821 
3822 			src_r = tmp1;
3823 			src1w = 0;
3824 		} else
3825 			src_r = gpr(src1 & REG_MASK);
3826 
3827 		ins = (type & SLJIT_32) ? 0xeb00000000f2 /* loc */ : 0xeb00000000e2 /* locg */;
3828 		return push_inst(compiler, ins | R36A(dst_r) | (mask << 32) | R28A(src_r) | disp_s20((sljit_s32)src1w));
3829 	}
3830 
3831 	if (src1 == SLJIT_IMM) {
3832 		if (type & SLJIT_32)
3833 			src1w = (sljit_s32)src1w;
3834 
3835 		if (have_lscond2() && is_s16(src1w)) {
3836 			ins = (type & SLJIT_32) ? 0xec0000000042 /* lochi */ : 0xec0000000046 /* locghi */;
3837 			return push_inst(compiler, ins | R36A(dst_r) | (mask << 32) | (sljit_ins)(src1w & 0xffff) << 16);
3838 		}
3839 
3840 		FAIL_IF(push_load_imm_inst(compiler, tmp0, src1w));
3841 		src_r = tmp0;
3842 	} else
3843 		src_r = gpr(src1);
3844 
3845 	ins = (type & SLJIT_32) ? 0xb9f20000 /* locr */ : 0xb9e20000 /* locgr */;
3846 	return push_inst(compiler, ins | (mask << 12) | R4A(dst_r) | R0A(src_r));
3847 }
3848 
sljit_emit_fselect(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_freg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2_freg)3849 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
3850 	sljit_s32 dst_freg,
3851 	sljit_s32 src1, sljit_sw src1w,
3852 	sljit_s32 src2_freg)
3853 {
3854 	sljit_ins ins;
3855 	struct sljit_label *label;
3856 	struct sljit_jump *jump;
3857 
3858 	CHECK_ERROR();
3859 	CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
3860 
3861 	ADJUST_LOCAL_OFFSET(src1, src1w);
3862 
3863 	if (dst_freg != src2_freg) {
3864 		if (dst_freg == src1) {
3865 			src1 = src2_freg;
3866 			src1w = 0;
3867 			type ^= 0x1;
3868 		} else {
3869 			ins = (type & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */;
3870 			FAIL_IF(push_inst(compiler, ins | F4(dst_freg) | F0(src2_freg)));
3871 		}
3872 	}
3873 
3874 	SLJIT_SKIP_CHECKS(compiler);
3875 	jump = sljit_emit_jump(compiler, (type & ~SLJIT_32) ^ 0x1);
3876 	FAIL_IF(!jump);
3877 
3878 	if (!(src1 & SLJIT_MEM)) {
3879 		ins = (type & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */;
3880 		FAIL_IF(push_inst(compiler, ins | F4(dst_freg) | F0(src1)));
3881 	} else
3882 		FAIL_IF(float_mem(compiler, FLOAT_LOAD | (type & SLJIT_32), dst_freg, src1, src1w));
3883 
3884 	SLJIT_SKIP_CHECKS(compiler);
3885 	label = sljit_emit_label(compiler);
3886 	FAIL_IF(!label);
3887 
3888 	sljit_set_label(jump, label);
3889 	return SLJIT_SUCCESS;
3890 }
3891 
sljit_emit_mem(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)3892 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
3893 	sljit_s32 reg,
3894 	sljit_s32 mem, sljit_sw memw)
3895 {
3896 	sljit_ins ins, reg1, reg2, base, offs = 0;
3897 
3898 	CHECK_ERROR();
3899 	CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
3900 
3901 	if (!(reg & REG_PAIR_MASK))
3902 		return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
3903 
3904 	ADJUST_LOCAL_OFFSET(mem, memw);
3905 
3906 	base = gpr(mem & REG_MASK);
3907 	reg1 = gpr(REG_PAIR_FIRST(reg));
3908 	reg2 = gpr(REG_PAIR_SECOND(reg));
3909 
3910 	if (mem & OFFS_REG_MASK) {
3911 		memw &= 0x3;
3912 		offs = gpr(OFFS_REG(mem));
3913 
3914 		if (memw != 0) {
3915 			FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp1) | R32A(offs) | ((sljit_ins)memw << 16)));
3916 			offs = tmp1;
3917 		} else if (!(type & SLJIT_MEM_STORE) && (base == reg1 || base == reg2) && (offs == reg1 || offs == reg2)) {
3918 			FAIL_IF(push_inst(compiler, 0xb9f80000 | R12A(tmp1) | R4A(base) | R0A(offs)));
3919 			base = tmp1;
3920 			offs = 0;
3921 		}
3922 
3923 		memw = 0;
3924 	} else if (memw < -0x80000 || memw > 0x7ffff - ((reg2 == reg1 + 1) ? 0 : SSIZE_OF(sw))) {
3925 		FAIL_IF(push_load_imm_inst(compiler, tmp1, memw));
3926 
3927 		if (base == 0)
3928 			base = tmp1;
3929 		else
3930 			offs = tmp1;
3931 
3932 		memw = 0;
3933 	}
3934 
3935 	if (offs == 0 && reg2 == (reg1 + 1)) {
3936 		ins = (type & SLJIT_MEM_STORE) ? 0xeb0000000024 /* stmg */ : 0xeb0000000004 /* lmg */;
3937 		return push_inst(compiler, ins | R36A(reg1) | R32A(reg2) | R28A(base) | disp_s20((sljit_s32)memw));
3938 	}
3939 
3940 	ins = ((type & SLJIT_MEM_STORE) ? 0xe30000000024 /* stg */ : 0xe30000000004 /* lg */) | R32A(offs) | R28A(base);
3941 
3942 	if (!(type & SLJIT_MEM_STORE) && base == reg1) {
3943 		FAIL_IF(push_inst(compiler, ins | R36A(reg2) | disp_s20((sljit_s32)memw + SSIZE_OF(sw))));
3944 		return push_inst(compiler, ins | R36A(reg1) | disp_s20((sljit_s32)memw));
3945 	}
3946 
3947 	FAIL_IF(push_inst(compiler, ins | R36A(reg1) | disp_s20((sljit_s32)memw)));
3948 	return push_inst(compiler, ins | R36A(reg2) | disp_s20((sljit_s32)memw + SSIZE_OF(sw)));
3949 }
3950 
sljit_emit_simd_mov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 srcdst,sljit_sw srcdstw)3951 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
3952 	sljit_s32 freg,
3953 	sljit_s32 srcdst, sljit_sw srcdstw)
3954 {
3955 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3956 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3957 	sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type);
3958 	struct addr addr;
3959 	sljit_ins ins;
3960 
3961 	CHECK_ERROR();
3962 	CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw));
3963 
3964 	ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3965 
3966 	if (reg_size != 4)
3967 		return SLJIT_ERR_UNSUPPORTED;
3968 
3969 	if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3970 		return SLJIT_ERR_UNSUPPORTED;
3971 
3972 	if (type & SLJIT_SIMD_TEST)
3973 		return SLJIT_SUCCESS;
3974 
3975 	if (!(srcdst & SLJIT_MEM)) {
3976 		if (type & SLJIT_SIMD_STORE)
3977 			ins = F36(srcdst) | F32(freg);
3978 		else
3979 			ins = F36(freg) | F32(srcdst);
3980 
3981 		return push_inst(compiler, 0xe70000000056 /* vlr */ | ins);
3982 	}
3983 
3984 	FAIL_IF(make_addr_bx(compiler, &addr, srcdst, srcdstw, tmp1));
3985 	ins = F36(freg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset);
3986 
3987 	if (alignment >= 4)
3988 		ins |= 4 << 12;
3989 	else if (alignment == 3)
3990 		ins |= 3 << 12;
3991 
3992 	return push_inst(compiler, ((type & SLJIT_SIMD_STORE) ? 0xe7000000000e /* vst */ : 0xe70000000006 /* vl */) | ins);
3993 }
3994 
sljit_emit_simd_replicate(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_sw srcw)3995 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
3996 	sljit_s32 freg,
3997 	sljit_s32 src, sljit_sw srcw)
3998 {
3999 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4000 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4001 	struct addr addr;
4002 	sljit_gpr reg;
4003 	sljit_sw sign_ext;
4004 
4005 	CHECK_ERROR();
4006 	CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw));
4007 
4008 	ADJUST_LOCAL_OFFSET(src, srcw);
4009 
4010 	if (reg_size != 4)
4011 		return SLJIT_ERR_UNSUPPORTED;
4012 
4013 	if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
4014 		return SLJIT_ERR_UNSUPPORTED;
4015 
4016 	if (type & SLJIT_SIMD_TEST)
4017 		return SLJIT_SUCCESS;
4018 
4019 	if (src & SLJIT_MEM) {
4020 		FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));
4021 		return push_inst(compiler, 0xe70000000005 /* vlrep */ | F36(freg)
4022 			| R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset) | ((sljit_ins)elem_size << 12));
4023 	}
4024 
4025 	if (type & SLJIT_SIMD_FLOAT) {
4026 		if (src == SLJIT_IMM)
4027 			return push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(freg));
4028 
4029 		return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(freg) | F32(src) | ((sljit_ins)elem_size << 12));
4030 	}
4031 
4032 	if (src == SLJIT_IMM) {
4033 		sign_ext = 0x10000;
4034 
4035 		switch (elem_size) {
4036 		case 0:
4037 			srcw &= 0xff;
4038 			sign_ext = (sljit_s8)srcw;
4039 			break;
4040 		case 1:
4041 			srcw &= 0xffff;
4042 			sign_ext = (sljit_s16)srcw;
4043 			break;
4044 		case 2:
4045 			if ((sljit_s32)srcw == (sljit_s16)srcw) {
4046 				srcw &= 0xffff;
4047 				sign_ext = (sljit_s16)srcw;
4048 			} else
4049 				srcw &= 0xffffffff;
4050 			break;
4051 		default:
4052 			if (srcw == (sljit_s16)srcw) {
4053 				srcw &= 0xffff;
4054 				sign_ext = (sljit_s16)srcw;
4055 			}
4056 			break;
4057 		}
4058 
4059 		if (sign_ext != 0x10000) {
4060 			if (sign_ext == 0 || sign_ext == -1)
4061 				return push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(freg)
4062 					| (sign_ext == 0 ? 0 : ((sljit_ins)0xffff << 16)));
4063 
4064 			return push_inst(compiler, 0xe70000000045 /* vrepi */ | F36(freg)
4065 				| ((sljit_ins)srcw << 16) | ((sljit_ins)elem_size << 12));
4066 		}
4067 
4068 		push_load_imm_inst(compiler, tmp0, srcw);
4069 		reg = tmp0;
4070 	} else
4071 		reg = gpr(src);
4072 
4073 	FAIL_IF(push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(freg) | R32A(reg) | ((sljit_ins)elem_size << 12)));
4074 	return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(freg) | F32(freg) | ((sljit_ins)elem_size << 12));
4075 }
4076 
sljit_emit_simd_lane_mov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 lane_index,sljit_s32 srcdst,sljit_sw srcdstw)4077 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
4078 	sljit_s32 freg, sljit_s32 lane_index,
4079 	sljit_s32 srcdst, sljit_sw srcdstw)
4080 {
4081 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4082 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4083 	struct addr addr;
4084 	sljit_gpr reg;
4085 	sljit_ins ins = 0;
4086 
4087 	CHECK_ERROR();
4088 	CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw));
4089 
4090 	ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
4091 
4092 	if (reg_size != 4)
4093 		return SLJIT_ERR_UNSUPPORTED;
4094 
4095 	if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
4096 		return SLJIT_ERR_UNSUPPORTED;
4097 
4098 	if (type & SLJIT_SIMD_TEST)
4099 		return SLJIT_SUCCESS;
4100 
4101 	if (srcdst & SLJIT_MEM) {
4102 		FAIL_IF(make_addr_bx(compiler, &addr, srcdst, srcdstw, tmp1));
4103 		ins = F36(freg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset);
4104 	}
4105 
4106 	if (type & SLJIT_SIMD_LANE_ZERO) {
4107 		if ((srcdst & SLJIT_MEM) && lane_index == ((1 << (3 - elem_size)) - 1))
4108 			return push_inst(compiler, 0xe70000000004 /* vllez */ | ins | ((sljit_ins)elem_size << 12));
4109 
4110 		if ((type & SLJIT_SIMD_FLOAT) && freg == srcdst) {
4111 			FAIL_IF(push_inst(compiler, 0xe70000000056 /* vlr */ | F36(TMP_FREG1) | F32(freg)));
4112 			srcdst = TMP_FREG1;
4113 			srcdstw = 0;
4114 		}
4115 
4116 		FAIL_IF(push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(freg)));
4117 	}
4118 
4119 	if (srcdst & SLJIT_MEM) {
4120 		switch (elem_size) {
4121 		case 0:
4122 			ins |= 0xe70000000000 /* vleb */;
4123 			break;
4124 		case 1:
4125 			ins |= 0xe70000000001 /* vleh */;
4126 			break;
4127 		case 2:
4128 			ins |= 0xe70000000003 /* vlef */;
4129 			break;
4130 		default:
4131 			ins |= 0xe70000000002 /* vleg */;
4132 			break;
4133 		}
4134 
4135 		/* Convert to vsteb - vsteg  */
4136 		if (type & SLJIT_SIMD_STORE)
4137 			ins |= 0x8;
4138 
4139 		return push_inst(compiler, ins | ((sljit_ins)lane_index << 12));
4140 	}
4141 
4142 	if (type & SLJIT_SIMD_FLOAT) {
4143 		if (type & SLJIT_SIMD_STORE)
4144 			return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(srcdst) | F32(freg) | ((sljit_ins)lane_index << 16) | ((sljit_ins)elem_size << 12));
4145 
4146 		if (elem_size == 3) {
4147 			if (lane_index == 0)
4148 				ins = F32(srcdst) | F28(freg) | (1 << 12);
4149 			else
4150 				ins = F32(freg) | F28(srcdst);
4151 
4152 			return push_inst(compiler, 0xe70000000084 /* vpdi */ | F36(freg) | ins);
4153 		}
4154 
4155 		FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(tmp0) | F32(srcdst) | ((sljit_ins)2 << 12)));
4156 		return push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(freg) | R32A(tmp0) | ((sljit_ins)lane_index << 16) | ((sljit_ins)2 << 12));
4157 	}
4158 
4159 	if (srcdst == SLJIT_IMM) {
4160 		switch (elem_size) {
4161 		case 0:
4162 			ins = 0xe70000000040 /* vleib */;
4163 			srcdstw &= 0xff;
4164 			break;
4165 		case 1:
4166 			ins = 0xe70000000041 /* vleih */;
4167 			srcdstw &= 0xffff;
4168 			break;
4169 		case 2:
4170 			if ((sljit_s32)srcdstw == (sljit_s16)srcdstw) {
4171 				srcdstw &= 0xffff;
4172 				ins = 0xe70000000043 /* vleif */;
4173 			} else
4174 				srcdstw &= 0xffffffff;
4175 			break;
4176 		default:
4177 			if (srcdstw == (sljit_s16)srcdstw) {
4178 				srcdstw &= 0xffff;
4179 				ins = 0xe70000000042 /* vleig */;
4180 			}
4181 			break;
4182 		}
4183 
4184 		if (ins != 0)
4185 			return push_inst(compiler, ins | F36(freg) | ((sljit_ins)srcdstw << 16) | ((sljit_ins)lane_index << 12));
4186 
4187 		push_load_imm_inst(compiler, tmp0, srcdstw);
4188 		reg = tmp0;
4189 	} else
4190 		reg = gpr(srcdst);
4191 
4192 	ins = ((sljit_ins)lane_index << 16) | ((sljit_ins)elem_size << 12);
4193 
4194 	if (!(type & SLJIT_SIMD_STORE))
4195 		return push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(freg) | R32A(reg) | ins);
4196 
4197 	FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(reg) | F32(freg) | ins));
4198 
4199 	if (!(type & SLJIT_SIMD_LANE_SIGNED) || elem_size >= 3)
4200 		return SLJIT_SUCCESS;
4201 
4202 	switch (elem_size) {
4203 	case 0:
4204 		ins = 0xb9060000 /* lgbr */;
4205 		break;
4206 	case 1:
4207 		ins = 0xb9070000 /* lghr */;
4208 		break;
4209 	default:
4210 		ins = 0xb9140000 /* lgfr */;
4211 		break;
4212 	}
4213 
4214 	return push_inst(compiler, ins | R4A(reg) | R0A(reg));
4215 }
4216 
sljit_emit_simd_lane_replicate(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_s32 src_lane_index)4217 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
4218 	sljit_s32 freg,
4219 	sljit_s32 src, sljit_s32 src_lane_index)
4220 {
4221 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4222 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4223 
4224 	CHECK_ERROR();
4225 	CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index));
4226 
4227 	if (reg_size != 4)
4228 		return SLJIT_ERR_UNSUPPORTED;
4229 
4230 	if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
4231 		return SLJIT_ERR_UNSUPPORTED;
4232 
4233 	if (type & SLJIT_SIMD_TEST)
4234 		return SLJIT_SUCCESS;
4235 
4236 	return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(freg) | F32(src)
4237 		| ((sljit_ins)src_lane_index << 16) | ((sljit_ins)elem_size << 12));
4238 }
4239 
sljit_emit_simd_extend(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_sw srcw)4240 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
4241 	sljit_s32 freg,
4242 	sljit_s32 src, sljit_sw srcw)
4243 {
4244 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4245 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4246 	sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
4247 	struct addr addr;
4248 	sljit_ins ins;
4249 
4250 	CHECK_ERROR();
4251 	CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw));
4252 
4253 	ADJUST_LOCAL_OFFSET(src, srcw);
4254 
4255 	if (reg_size != 4)
4256 		return SLJIT_ERR_UNSUPPORTED;
4257 
4258 	if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
4259 		return SLJIT_ERR_UNSUPPORTED;
4260 
4261 	if (type & SLJIT_SIMD_TEST)
4262 		return SLJIT_SUCCESS;
4263 
4264 	if (src & SLJIT_MEM) {
4265 		FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));
4266 		ins = F36(freg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset);
4267 
4268 		switch (elem2_size - elem_size) {
4269 		case 1:
4270 			ins |= 0xe70000000002 /* vleg */;
4271 			break;
4272 		case 2:
4273 			ins |= 0xe70000000003 /* vlef */;
4274 			break;
4275 		default:
4276 			ins |= 0xe70000000001 /* vleh */;
4277 			break;
4278 		}
4279 
4280 		FAIL_IF(push_inst(compiler, ins));
4281 		src = freg;
4282 	}
4283 
4284 	if (type & SLJIT_SIMD_FLOAT) {
4285 		FAIL_IF(push_inst(compiler, 0xe700000000d5 /* vuplh */ | F36(freg) | F32(src) | (2 << 12)));
4286 		FAIL_IF(push_inst(compiler, 0xe70000000030 /* vesl */ | F36(freg) | F32(freg) | (32 << 16) | (3 << 12)));
4287 		return push_inst(compiler, 0xe700000000c4 /* vfll */ | F36(freg) | F32(freg) | (2 << 12));
4288 	}
4289 
4290 	ins = ((type & SLJIT_SIMD_EXTEND_SIGNED) ? 0xe700000000d7 /* vuph */ : 0xe700000000d5 /* vuplh */) | F36(freg);
4291 
4292 	do {
4293 		FAIL_IF(push_inst(compiler, ins | F32(src) | ((sljit_ins)elem_size << 12)));
4294 		src = freg;
4295 	} while (++elem_size < elem2_size);
4296 
4297 	return SLJIT_SUCCESS;
4298 }
4299 
sljit_emit_simd_sign(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 dst,sljit_sw dstw)4300 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
4301 	sljit_s32 freg,
4302 	sljit_s32 dst, sljit_sw dstw)
4303 {
4304 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4305 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4306 	sljit_gpr dst_r;
4307 
4308 	CHECK_ERROR();
4309 	CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw));
4310 
4311 	ADJUST_LOCAL_OFFSET(dst, dstw);
4312 
4313 	if (reg_size != 4)
4314 		return SLJIT_ERR_UNSUPPORTED;
4315 
4316 	if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
4317 		return SLJIT_ERR_UNSUPPORTED;
4318 
4319 	if (type & SLJIT_SIMD_TEST)
4320 		return SLJIT_SUCCESS;
4321 
4322 	switch (elem_size) {
4323 	case 0:
4324 		push_load_imm_inst(compiler, tmp0, (sljit_sw)0x4048505860687078);
4325 		push_load_imm_inst(compiler, tmp1, (sljit_sw)0x0008101820283038);
4326 		FAIL_IF(push_inst(compiler, 0xe70000000062 /* vlvgp */ | F36(TMP_FREG1) | R32A(tmp1) | R28A(tmp0)));
4327 		break;
4328 	case 1:
4329 		push_load_imm_inst(compiler, tmp0, (sljit_sw)0x0010203040506070);
4330 		break;
4331 	case 2:
4332 		push_load_imm_inst(compiler, tmp0, (sljit_sw)0x8080808000204060);
4333 		break;
4334 	default:
4335 		push_load_imm_inst(compiler, tmp0, (sljit_sw)0x8080808080800040);
4336 		break;
4337 	}
4338 
4339 	if (elem_size != 0)
4340 		FAIL_IF(push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(TMP_FREG1) | R32A(tmp0) | (1 << 16) | (3 << 12)));
4341 
4342 	FAIL_IF(push_inst(compiler, 0xe70000000085 /* vbperm */ | F36(TMP_FREG1) | F32(freg) | F28(TMP_FREG1)));
4343 
4344 	dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
4345 	FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(dst_r) | F32(TMP_FREG1)
4346 		| (elem_size == 0 ? ((3 << 16) | (1 << 12)) : (7 << 16))));
4347 
4348 	if (dst_r == tmp0)
4349 		return store_word(compiler, tmp0, dst, dstw, type & SLJIT_32);
4350 
4351 	return SLJIT_SUCCESS;
4352 }
4353 
sljit_emit_simd_op2(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_freg,sljit_s32 src1_freg,sljit_s32 src2_freg)4354 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
4355 	sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg)
4356 {
4357 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4358 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4359 	sljit_ins ins = 0;
4360 
4361 	CHECK_ERROR();
4362 	CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg));
4363 
4364 	if (reg_size != 4)
4365 		return SLJIT_ERR_UNSUPPORTED;
4366 
4367 	if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4368 		return SLJIT_ERR_UNSUPPORTED;
4369 
4370 	if (type & SLJIT_SIMD_TEST)
4371 		return SLJIT_SUCCESS;
4372 
4373 	switch (SLJIT_SIMD_GET_OPCODE(type)) {
4374 	case SLJIT_SIMD_OP2_AND:
4375 		ins = 0xe70000000068 /* vn */;
4376 		break;
4377 	case SLJIT_SIMD_OP2_OR:
4378 		ins = 0xe7000000006a /* vo */;
4379 		break;
4380 	case SLJIT_SIMD_OP2_XOR:
4381 		ins = 0xe7000000006d /* vx */;
4382 		break;
4383 	}
4384 
4385 	if (type & SLJIT_SIMD_TEST)
4386 		return SLJIT_SUCCESS;
4387 
4388 	return push_inst(compiler, ins | F36(dst_freg) | F32(src1_freg) | F28(src2_freg));
4389 }
4390 
sljit_emit_atomic_load(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 mem_reg)4391 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
4392 	sljit_s32 dst_reg,
4393 	sljit_s32 mem_reg)
4394 {
4395 	CHECK_ERROR();
4396 	CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
4397 
4398 	SLJIT_SKIP_CHECKS(compiler);
4399 	return sljit_emit_op1(compiler, op, dst_reg, 0, SLJIT_MEM1(mem_reg), 0);
4400 }
4401 
sljit_emit_atomic_store(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src_reg,sljit_s32 mem_reg,sljit_s32 temp_reg)4402 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op,
4403 	sljit_s32 src_reg,
4404 	sljit_s32 mem_reg,
4405 	sljit_s32 temp_reg)
4406 {
4407 	sljit_ins mask;
4408 	sljit_gpr tmp_r = gpr(temp_reg);
4409 	sljit_gpr mem_r = gpr(mem_reg);
4410 
4411 	CHECK_ERROR();
4412 	CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
4413 
4414 	switch (GET_OPCODE(op)) {
4415 	case SLJIT_MOV32:
4416 	case SLJIT_MOV_U32:
4417 		return push_inst(compiler, 0xba000000 /* cs */ | R20A(tmp_r) | R16A(gpr(src_reg)) | R12A(mem_r));
4418 	case SLJIT_MOV_U8:
4419 		mask = 0xff;
4420 		break;
4421 	case SLJIT_MOV_U16:
4422 		mask = 0xffff;
4423 		break;
4424 	default:
4425 		return push_inst(compiler, 0xeb0000000030 /* csg */ | R36A(tmp_r) | R32A(gpr(src_reg)) | R28A(mem_r));
4426 	}
4427 
4428 	/* tmp0 = (src_reg ^ tmp_r) & mask */
4429 	FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | mask));
4430 	FAIL_IF(push_inst(compiler, 0xb9e70000 /* xgrk */ | R4A(tmp0) | R0A(gpr(src_reg)) | R12A(tmp_r)));
4431 	FAIL_IF(push_inst(compiler, 0xa7090000 /* lghi */ | R20A(tmp_r) | 0xfffc));
4432 	FAIL_IF(push_inst(compiler, 0xb9800000 /* ngr */ | R4A(tmp0) | R0A(tmp1)));
4433 
4434 	/* tmp0 = tmp0 << (((mem_r ^ 0x3) & 0x3) << 3) */
4435 	FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | (sljit_ins)((mask == 0xff) ? 0x18 : 0x10)));
4436 	FAIL_IF(push_inst(compiler, 0xb9800000 /* ngr */ | R4A(tmp_r) | R0A(mem_r)));
4437 	FAIL_IF(push_inst(compiler, 0xec0000000057 /* rxsbg */ | R36A(tmp1) | R32A(mem_r) | (59 << 24) | (60 << 16) | (3 << 8)));
4438 	FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp0) | R32A(tmp0) | R28A(tmp1)));
4439 
4440 	/* Already computed: tmp_r = mem_r & ~0x3 */
4441 
4442 	FAIL_IF(push_inst(compiler, 0x58000000 /* l */ | R20A(tmp1) | R12A(tmp_r)));
4443 	FAIL_IF(push_inst(compiler, 0x1700 /* x */ | R4A(tmp0) | R0A(tmp1)));
4444 	return push_inst(compiler, 0xba000000 /* cs */ | R20A(tmp1) | R16A(tmp0) | R12A(tmp_r));
4445 }
4446 
4447 /* --------------------------------------------------------------------- */
4448 /*  Other instructions                                                   */
4449 /* --------------------------------------------------------------------- */
4450 
4451 /* On s390x we build a literal pool to hold constants. This has two main
4452    advantages:
4453 
4454      1. we only need one instruction in the instruction stream (LGRL)
4455      2. we can store 64 bit addresses and use 32 bit offsets
4456 
4457    To retrofit the extra information needed to build the literal pool we
4458    add a new sljit_s390x_const struct that contains the initial value but
4459    can still be cast to a sljit_const. */
4460 
sljit_emit_const(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_sw init_value)4461 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
4462 {
4463 	struct sljit_s390x_const *const_;
4464 	sljit_gpr dst_r;
4465 
4466 	CHECK_ERROR_PTR();
4467 	CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
4468 
4469 	const_ = (struct sljit_s390x_const*)ensure_abuf(compiler,
4470 					sizeof(struct sljit_s390x_const));
4471 	PTR_FAIL_IF(!const_);
4472 	set_const((struct sljit_const*)const_, compiler);
4473 	const_->init_value = init_value;
4474 
4475 	dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
4476 	if (have_genext())
4477 		PTR_FAIL_IF(push_inst(compiler, sljit_ins_const | lgrl(dst_r, 0)));
4478 	else {
4479 		PTR_FAIL_IF(push_inst(compiler, sljit_ins_const | larl(tmp1, 0)));
4480 		PTR_FAIL_IF(push_inst(compiler, lg(dst_r, 0, r0, tmp1)));
4481 	}
4482 
4483 	if (dst & SLJIT_MEM)
4484 		PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, 0 /* always 64-bit */));
4485 
4486 	return (struct sljit_const*)const_;
4487 }
4488 
sljit_set_jump_addr(sljit_uw addr,sljit_uw new_target,sljit_sw executable_offset)4489 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
4490 {
4491 	/* Update the constant pool. */
4492 	sljit_uw *ptr = (sljit_uw *)addr;
4493 	SLJIT_UNUSED_ARG(executable_offset);
4494 
4495 	SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 0);
4496 	*ptr = new_target;
4497 	SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1);
4498 	SLJIT_CACHE_FLUSH(ptr, ptr + 1);
4499 }
4500 
sljit_set_const(sljit_uw addr,sljit_sw new_constant,sljit_sw executable_offset)4501 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
4502 {
4503 	sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
4504 }
4505 
sljit_emit_put_label(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)4506 SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label *sljit_emit_put_label(
4507 	struct sljit_compiler *compiler,
4508 	sljit_s32 dst, sljit_sw dstw)
4509 {
4510 	struct sljit_put_label *put_label;
4511 	sljit_gpr dst_r;
4512 
4513 	CHECK_ERROR_PTR();
4514 	CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
4515 	ADJUST_LOCAL_OFFSET(dst, dstw);
4516 
4517 	put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
4518 	PTR_FAIL_IF(!put_label);
4519 	set_put_label(put_label, compiler, 0);
4520 
4521 	dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
4522 
4523 	if (have_genext())
4524 		PTR_FAIL_IF(push_inst(compiler, lgrl(dst_r, 0)));
4525 	else {
4526 		PTR_FAIL_IF(push_inst(compiler, larl(tmp1, 0)));
4527 		PTR_FAIL_IF(push_inst(compiler, lg(dst_r, 0, r0, tmp1)));
4528 	}
4529 
4530 	if (dst & SLJIT_MEM)
4531 		PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, 0));
4532 
4533 	return put_label;
4534 }
4535 
4536 /* TODO(carenas): EVAL probably should move up or be refactored */
4537 #undef WHEN2
4538 #undef EVAL
4539 
4540 #undef tmp1
4541 #undef tmp0
4542 
4543 /* TODO(carenas): undef other macros that spill like is_u12? */
4544