1 /*
2  *    Stack-less Just-In-Time compiler
3  *
4  *    Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without modification, are
7  * permitted provided that the following conditions are met:
8  *
9  *   1. Redistributions of source code must retain the above copyright notice, this list of
10  *      conditions and the following disclaimer.
11  *
12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13  *      of conditions and the following disclaimer in the documentation and/or other materials
14  *      provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include <sys/auxv.h>
28 
29 #ifdef __ARCH__
30 #define ENABLE_STATIC_FACILITY_DETECTION 1
31 #else
32 #define ENABLE_STATIC_FACILITY_DETECTION 0
33 #endif
34 #define ENABLE_DYNAMIC_FACILITY_DETECTION 1
35 
sljit_get_platform_name(void)36 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
37 {
38 	return "s390x" SLJIT_CPUINFO;
39 }
40 
41 /* Instructions. */
42 typedef sljit_uw sljit_ins;
43 
44 /* Instruction tags (most significant halfword). */
45 static const sljit_ins sljit_ins_const = (sljit_ins)1 << 48;
46 
47 #define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
48 #define TMP_REG2	(SLJIT_NUMBER_OF_REGISTERS + 3)
49 
50 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
51 	0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 0, 1
52 };
53 
54 /* there are also a[2-15] available, but they are slower to access and
55  * their use is limited as mundaym explained:
56  *   https://github.com/zherczeg/sljit/pull/91#discussion_r486895689
57  */
58 
59 /* General Purpose Registers [0-15]. */
60 typedef sljit_uw sljit_gpr;
61 
62 /*
63  * WARNING
64  * the following code is non standard and should be improved for
65  * consistency, but doesn't use SLJIT_NUMBER_OF_REGISTERS based
66  * registers because r0 and r1 are the ABI recommended volatiles.
67  * there is a gpr() function that maps sljit to physical register numbers
68  * that should be used instead of the usual index into reg_map[] and
69  * will be retired ASAP (TODO: carenas)
70  */
71 
72 static const sljit_gpr r0 = 0;		/* reg_map[SLJIT_NUMBER_OF_REGISTERS + 2]: 0 in address calculations; reserved */
73 static const sljit_gpr r1 = 1;		/* reg_map[SLJIT_NUMBER_OF_REGISTERS + 3]: reserved */
74 static const sljit_gpr r2 = 2;		/* reg_map[1]: 1st argument */
75 static const sljit_gpr r3 = 3;		/* reg_map[2]: 2nd argument */
76 static const sljit_gpr r4 = 4;		/* reg_map[3]: 3rd argument */
77 static const sljit_gpr r5 = 5;		/* reg_map[4]: 4th argument */
78 static const sljit_gpr r6 = 6;		/* reg_map[5]: 5th argument; 1st saved register */
79 static const sljit_gpr r7 = 7;		/* reg_map[6] */
80 static const sljit_gpr r8 = 8;		/* reg_map[7] */
81 static const sljit_gpr r9 = 9;		/* reg_map[8] */
82 static const sljit_gpr r10 = 10;	/* reg_map[9] */
83 static const sljit_gpr r11 = 11;	/* reg_map[10] */
84 static const sljit_gpr r12 = 12;	/* reg_map[11]: GOT */
85 static const sljit_gpr r13 = 13;	/* reg_map[12]: Literal Pool pointer */
86 static const sljit_gpr r14 = 14;	/* reg_map[0]: return address and flag register */
87 static const sljit_gpr r15 = 15;	/* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stack pointer */
88 
89 /* WARNING: r12 and r13 shouldn't be used as per ABI recommendation */
90 /* TODO(carenas): r12 might conflict in PIC code, reserve? */
91 /* TODO(carenas): r13 is usually pointed to "pool" per ABI, using a tmp
92  *                like we do know might be faster though, reserve?
93  */
94 
95 /* TODO(carenas): should be named TMP_REG[1-2] for consistency */
96 #define tmp0	r0
97 #define tmp1	r1
98 
99 /* TODO(carenas): flags should move to a different register so that
100  *                link register doesn't need to change
101  */
102 
103 /* When reg cannot be unused. */
104 #define IS_GPR_REG(reg)		((reg > 0) && (reg) <= SLJIT_SP)
105 
106 /* Link register. */
107 static const sljit_gpr link_r = 14;     /* r14 */
108 
109 #define TMP_FREG1	(0)
110 
111 static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
112 	1, 0, 2, 4, 6, 3, 5, 7, 15, 14, 13, 12, 11, 10, 9, 8,
113 };
114 
115 #define R0A(r) (r)
116 #define R4A(r) ((r) << 4)
117 #define R8A(r) ((r) << 8)
118 #define R12A(r) ((r) << 12)
119 #define R16A(r) ((r) << 16)
120 #define R20A(r) ((r) << 20)
121 #define R28A(r) ((r) << 28)
122 #define R32A(r) ((r) << 32)
123 #define R36A(r) ((r) << 36)
124 
125 #define R0(r) ((sljit_ins)reg_map[r])
126 
127 #define F0(r) ((sljit_ins)freg_map[r])
128 #define F4(r) (R4A((sljit_ins)freg_map[r]))
129 #define F20(r) (R20A((sljit_ins)freg_map[r]))
130 #define F36(r) (R36A((sljit_ins)freg_map[r]))
131 
132 struct sljit_s390x_const {
133 	struct sljit_const const_; /* must be first */
134 	sljit_sw init_value;       /* required to build literal pool */
135 };
136 
137 /* Convert SLJIT register to hardware register. */
gpr(sljit_s32 r)138 static SLJIT_INLINE sljit_gpr gpr(sljit_s32 r)
139 {
140 	SLJIT_ASSERT(r >= 0 && r < (sljit_s32)(sizeof(reg_map) / sizeof(reg_map[0])));
141 	return reg_map[r];
142 }
143 
fgpr(sljit_s32 r)144 static SLJIT_INLINE sljit_gpr fgpr(sljit_s32 r)
145 {
146 	SLJIT_ASSERT(r >= 0 && r < (sljit_s32)(sizeof(freg_map) / sizeof(freg_map[0])));
147 	return freg_map[r];
148 }
149 
150 /* Size of instruction in bytes. Tags must already be cleared. */
sizeof_ins(sljit_ins ins)151 static SLJIT_INLINE sljit_uw sizeof_ins(sljit_ins ins)
152 {
153 	/* keep faulting instructions */
154 	if (ins == 0)
155 		return 2;
156 
157 	if ((ins & 0x00000000ffffL) == ins)
158 		return 2;
159 	if ((ins & 0x0000ffffffffL) == ins)
160 		return 4;
161 	if ((ins & 0xffffffffffffL) == ins)
162 		return 6;
163 
164 	SLJIT_UNREACHABLE();
165 	return (sljit_uw)-1;
166 }
167 
push_inst(struct sljit_compiler * compiler,sljit_ins ins)168 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
169 {
170 	sljit_ins *ibuf = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins));
171 	FAIL_IF(!ibuf);
172 	*ibuf = ins;
173 	compiler->size++;
174 	return SLJIT_SUCCESS;
175 }
176 
encode_inst(void ** ptr,sljit_ins ins)177 static sljit_s32 encode_inst(void **ptr, sljit_ins ins)
178 {
179 	sljit_u16 *ibuf = (sljit_u16 *)*ptr;
180 	sljit_uw size = sizeof_ins(ins);
181 
182 	SLJIT_ASSERT((size & 6) == size);
183 	switch (size) {
184 	case 6:
185 		*ibuf++ = (sljit_u16)(ins >> 32);
186 		/* fallthrough */
187 	case 4:
188 		*ibuf++ = (sljit_u16)(ins >> 16);
189 		/* fallthrough */
190 	case 2:
191 		*ibuf++ = (sljit_u16)(ins);
192 	}
193 	*ptr = (void*)ibuf;
194 	return SLJIT_SUCCESS;
195 }
196 
197 #define SLJIT_ADD_SUB_NO_COMPARE(status_flags_state) \
198 	(((status_flags_state) & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) \
199 		&& !((status_flags_state) & SLJIT_CURRENT_FLAGS_COMPARE))
200 
201 /* Map the given type to a 4-bit condition code mask. */
get_cc(struct sljit_compiler * compiler,sljit_s32 type)202 static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 type) {
203 	const sljit_u8 cc0 = 1 << 3; /* equal {,to zero} */
204 	const sljit_u8 cc1 = 1 << 2; /* less than {,zero} */
205 	const sljit_u8 cc2 = 1 << 1; /* greater than {,zero} */
206 	const sljit_u8 cc3 = 1 << 0; /* {overflow,NaN} */
207 
208 	switch (type) {
209 	case SLJIT_EQUAL:
210 		if (SLJIT_ADD_SUB_NO_COMPARE(compiler->status_flags_state)) {
211 			sljit_s32 type = GET_FLAG_TYPE(compiler->status_flags_state);
212 			if (type >= SLJIT_SIG_LESS && type <= SLJIT_SIG_LESS_EQUAL)
213 				return cc0;
214 			if (type == SLJIT_OVERFLOW)
215 				return (cc0 | cc3);
216 			return (cc0 | cc2);
217 		}
218 		/* fallthrough */
219 
220 	case SLJIT_F_EQUAL:
221 	case SLJIT_ORDERED_EQUAL:
222 		return cc0;
223 
224 	case SLJIT_NOT_EQUAL:
225 		if (SLJIT_ADD_SUB_NO_COMPARE(compiler->status_flags_state)) {
226 			sljit_s32 type = GET_FLAG_TYPE(compiler->status_flags_state);
227 			if (type >= SLJIT_SIG_LESS && type <= SLJIT_SIG_LESS_EQUAL)
228 				return (cc1 | cc2 | cc3);
229 			if (type == SLJIT_OVERFLOW)
230 				return (cc1 | cc2);
231 			return (cc1 | cc3);
232 		}
233 		/* fallthrough */
234 
235 	case SLJIT_UNORDERED_OR_NOT_EQUAL:
236 		return (cc1 | cc2 | cc3);
237 
238 	case SLJIT_LESS:
239 		return cc1;
240 
241 	case SLJIT_GREATER_EQUAL:
242 	case SLJIT_UNORDERED_OR_GREATER_EQUAL:
243 		return (cc0 | cc2 | cc3);
244 
245 	case SLJIT_GREATER:
246 		if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_COMPARE)
247 			return cc2;
248 		return cc3;
249 
250 	case SLJIT_LESS_EQUAL:
251 		if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_COMPARE)
252 			return (cc0 | cc1);
253 		return (cc0 | cc1 | cc2);
254 
255 	case SLJIT_SIG_LESS:
256 	case SLJIT_F_LESS:
257 	case SLJIT_ORDERED_LESS:
258 		return cc1;
259 
260 	case SLJIT_NOT_CARRY:
261 		if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB)
262 			return (cc2 | cc3);
263 		/* fallthrough */
264 
265 	case SLJIT_SIG_LESS_EQUAL:
266 	case SLJIT_F_LESS_EQUAL:
267 	case SLJIT_ORDERED_LESS_EQUAL:
268 		return (cc0 | cc1);
269 
270 	case SLJIT_CARRY:
271 		if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB)
272 			return (cc0 | cc1);
273 		/* fallthrough */
274 
275 	case SLJIT_SIG_GREATER:
276 	case SLJIT_UNORDERED_OR_GREATER:
277 		/* Overflow is considered greater, see SLJIT_SUB. */
278 		return cc2 | cc3;
279 
280 	case SLJIT_SIG_GREATER_EQUAL:
281 		return (cc0 | cc2 | cc3);
282 
283 	case SLJIT_OVERFLOW:
284 		if (compiler->status_flags_state & SLJIT_SET_Z)
285 			return (cc2 | cc3);
286 		/* fallthrough */
287 
288 	case SLJIT_UNORDERED:
289 		return cc3;
290 
291 	case SLJIT_NOT_OVERFLOW:
292 		if (compiler->status_flags_state & SLJIT_SET_Z)
293 			return (cc0 | cc1);
294 		/* fallthrough */
295 
296 	case SLJIT_ORDERED:
297 		return (cc0 | cc1 | cc2);
298 
299 	case SLJIT_F_NOT_EQUAL:
300 	case SLJIT_ORDERED_NOT_EQUAL:
301 		return (cc1 | cc2);
302 
303 	case SLJIT_F_GREATER:
304 	case SLJIT_ORDERED_GREATER:
305 		return cc2;
306 
307 	case SLJIT_F_GREATER_EQUAL:
308 	case SLJIT_ORDERED_GREATER_EQUAL:
309 		return (cc0 | cc2);
310 
311 	case SLJIT_UNORDERED_OR_LESS_EQUAL:
312 		return (cc0 | cc1 | cc3);
313 
314 	case SLJIT_UNORDERED_OR_EQUAL:
315 		return (cc0 | cc3);
316 
317 	case SLJIT_UNORDERED_OR_LESS:
318 		return (cc1 | cc3);
319 	}
320 
321 	SLJIT_UNREACHABLE();
322 	return (sljit_u8)-1;
323 }
324 
325 /* Facility to bit index mappings.
326    Note: some facilities share the same bit index. */
327 typedef sljit_uw facility_bit;
328 #define STORE_FACILITY_LIST_EXTENDED_FACILITY 7
329 #define FAST_LONG_DISPLACEMENT_FACILITY 19
330 #define EXTENDED_IMMEDIATE_FACILITY 21
331 #define GENERAL_INSTRUCTION_EXTENSION_FACILITY 34
332 #define DISTINCT_OPERAND_FACILITY 45
333 #define HIGH_WORD_FACILITY 45
334 #define POPULATION_COUNT_FACILITY 45
335 #define LOAD_STORE_ON_CONDITION_1_FACILITY 45
336 #define MISCELLANEOUS_INSTRUCTION_EXTENSIONS_1_FACILITY 49
337 #define LOAD_STORE_ON_CONDITION_2_FACILITY 53
338 #define MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY 58
339 #define VECTOR_FACILITY 129
340 #define VECTOR_ENHANCEMENTS_1_FACILITY 135
341 
342 /* Report whether a facility is known to be present due to the compiler
343    settings. This function should always be compiled to a constant
344    value given a constant argument. */
have_facility_static(facility_bit x)345 static SLJIT_INLINE int have_facility_static(facility_bit x)
346 {
347 #if ENABLE_STATIC_FACILITY_DETECTION
348 	switch (x) {
349 	case FAST_LONG_DISPLACEMENT_FACILITY:
350 		return (__ARCH__ >=  6 /* z990 */);
351 	case EXTENDED_IMMEDIATE_FACILITY:
352 	case STORE_FACILITY_LIST_EXTENDED_FACILITY:
353 		return (__ARCH__ >=  7 /* z9-109 */);
354 	case GENERAL_INSTRUCTION_EXTENSION_FACILITY:
355 		return (__ARCH__ >=  8 /* z10 */);
356 	case DISTINCT_OPERAND_FACILITY:
357 		return (__ARCH__ >=  9 /* z196 */);
358 	case MISCELLANEOUS_INSTRUCTION_EXTENSIONS_1_FACILITY:
359 		return (__ARCH__ >= 10 /* zEC12 */);
360 	case LOAD_STORE_ON_CONDITION_2_FACILITY:
361 	case VECTOR_FACILITY:
362 		return (__ARCH__ >= 11 /* z13 */);
363 	case MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY:
364 	case VECTOR_ENHANCEMENTS_1_FACILITY:
365 		return (__ARCH__ >= 12 /* z14 */);
366 	default:
367 		SLJIT_UNREACHABLE();
368 	}
369 #endif
370 	return 0;
371 }
372 
get_hwcap()373 static SLJIT_INLINE unsigned long get_hwcap()
374 {
375 	static unsigned long hwcap = 0;
376 	if (SLJIT_UNLIKELY(!hwcap)) {
377 		hwcap = getauxval(AT_HWCAP);
378 		SLJIT_ASSERT(hwcap != 0);
379 	}
380 	return hwcap;
381 }
382 
have_stfle()383 static SLJIT_INLINE int have_stfle()
384 {
385 	if (have_facility_static(STORE_FACILITY_LIST_EXTENDED_FACILITY))
386 		return 1;
387 
388 	return (get_hwcap() & HWCAP_S390_STFLE);
389 }
390 
391 /* Report whether the given facility is available. This function always
392    performs a runtime check. */
have_facility_dynamic(facility_bit x)393 static int have_facility_dynamic(facility_bit x)
394 {
395 #if ENABLE_DYNAMIC_FACILITY_DETECTION
396 	static struct {
397 		sljit_uw bits[4];
398 	} cpu_features;
399 	size_t size = sizeof(cpu_features);
400 	const sljit_uw word_index = x >> 6;
401 	const sljit_uw bit_index = ((1UL << 63) >> (x & 63));
402 
403 	SLJIT_ASSERT(x < size * 8);
404 	if (SLJIT_UNLIKELY(!have_stfle()))
405 		return 0;
406 
407 	if (SLJIT_UNLIKELY(cpu_features.bits[0] == 0)) {
408 		__asm__ __volatile__ (
409 			"lgr   %%r0, %0;"
410 			"stfle 0(%1);"
411 			/* outputs  */:
412 			/* inputs   */: "d" ((size / 8) - 1), "a" (&cpu_features)
413 			/* clobbers */: "r0", "cc", "memory"
414 		);
415 		SLJIT_ASSERT(cpu_features.bits[0] != 0);
416 	}
417 	return (cpu_features.bits[word_index] & bit_index) != 0;
418 #else
419 	return 0;
420 #endif
421 }
422 
423 #define HAVE_FACILITY(name, bit) \
424 static SLJIT_INLINE int name() \
425 { \
426 	static int have = -1; \
427 	/* Static check first. May allow the function to be optimized away. */ \
428 	if (have_facility_static(bit)) \
429 		have = 1; \
430 	else if (SLJIT_UNLIKELY(have < 0)) \
431 		have = have_facility_dynamic(bit) ? 1 : 0; \
432 \
433 	return have; \
434 }
435 
HAVE_FACILITY(have_eimm,EXTENDED_IMMEDIATE_FACILITY)436 HAVE_FACILITY(have_eimm,    EXTENDED_IMMEDIATE_FACILITY)
437 HAVE_FACILITY(have_ldisp,   FAST_LONG_DISPLACEMENT_FACILITY)
438 HAVE_FACILITY(have_genext,  GENERAL_INSTRUCTION_EXTENSION_FACILITY)
439 HAVE_FACILITY(have_lscond1, LOAD_STORE_ON_CONDITION_1_FACILITY)
440 HAVE_FACILITY(have_lscond2, LOAD_STORE_ON_CONDITION_2_FACILITY)
441 HAVE_FACILITY(have_misc2,   MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY)
442 #undef HAVE_FACILITY
443 
444 #define is_u12(d)	(0 <= (d) && (d) <= 0x00000fffL)
445 #define is_u32(d)	(0 <= (d) && (d) <= 0xffffffffL)
446 
447 #define CHECK_SIGNED(v, bitlen) \
448 	((v) >= -(1 << ((bitlen) - 1)) && (v) < (1 << ((bitlen) - 1)))
449 
450 #define is_s8(d)	CHECK_SIGNED((d), 8)
451 #define is_s16(d)	CHECK_SIGNED((d), 16)
452 #define is_s20(d)	CHECK_SIGNED((d), 20)
453 #define is_s32(d)	((d) == (sljit_s32)(d))
454 
455 static SLJIT_INLINE sljit_ins disp_s20(sljit_s32 d)
456 {
457 	SLJIT_ASSERT(is_s20(d));
458 
459 	sljit_uw dh = (d >> 12) & 0xff;
460 	sljit_uw dl = (d << 8) & 0xfff00;
461 	return (dh | dl) << 8;
462 }
463 
464 /* TODO(carenas): variadic macro is not strictly needed */
465 #define SLJIT_S390X_INSTRUCTION(op, ...) \
466 static SLJIT_INLINE sljit_ins op(__VA_ARGS__)
467 
468 /* RR form instructions. */
469 #define SLJIT_S390X_RR(name, pattern) \
470 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \
471 { \
472 	return (pattern) | ((dst & 0xf) << 4) | (src & 0xf); \
473 }
474 
475 /* AND */
476 SLJIT_S390X_RR(nr,   0x1400)
477 
478 /* BRANCH AND SAVE */
479 SLJIT_S390X_RR(basr, 0x0d00)
480 
481 /* BRANCH ON CONDITION */
482 SLJIT_S390X_RR(bcr,  0x0700) /* TODO(mundaym): type for mask? */
483 
484 /* DIVIDE */
485 SLJIT_S390X_RR(dr,   0x1d00)
486 
487 /* EXCLUSIVE OR */
488 SLJIT_S390X_RR(xr,   0x1700)
489 
490 /* LOAD */
491 SLJIT_S390X_RR(lr,   0x1800)
492 
493 /* LOAD COMPLEMENT */
494 SLJIT_S390X_RR(lcr,  0x1300)
495 
496 /* OR */
497 SLJIT_S390X_RR(or,   0x1600)
498 
499 #undef SLJIT_S390X_RR
500 
501 /* RRE form instructions */
502 #define SLJIT_S390X_RRE(name, pattern) \
503 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \
504 { \
505 	return (pattern) | R4A(dst) | R0A(src); \
506 }
507 
508 /* AND */
509 SLJIT_S390X_RRE(ngr,   0xb9800000)
510 
511 /* DIVIDE LOGICAL */
512 SLJIT_S390X_RRE(dlr,   0xb9970000)
513 SLJIT_S390X_RRE(dlgr,  0xb9870000)
514 
515 /* DIVIDE SINGLE */
516 SLJIT_S390X_RRE(dsgr,  0xb90d0000)
517 
518 /* EXCLUSIVE OR */
519 SLJIT_S390X_RRE(xgr,   0xb9820000)
520 
521 /* LOAD */
522 SLJIT_S390X_RRE(lgr,   0xb9040000)
523 SLJIT_S390X_RRE(lgfr,  0xb9140000)
524 
525 /* LOAD BYTE */
526 SLJIT_S390X_RRE(lbr,   0xb9260000)
527 SLJIT_S390X_RRE(lgbr,  0xb9060000)
528 
529 /* LOAD COMPLEMENT */
530 SLJIT_S390X_RRE(lcgr,  0xb9030000)
531 
532 /* LOAD HALFWORD */
533 SLJIT_S390X_RRE(lhr,   0xb9270000)
534 SLJIT_S390X_RRE(lghr,  0xb9070000)
535 
536 /* LOAD LOGICAL */
537 SLJIT_S390X_RRE(llgfr, 0xb9160000)
538 
539 /* LOAD LOGICAL CHARACTER */
540 SLJIT_S390X_RRE(llcr,  0xb9940000)
541 SLJIT_S390X_RRE(llgcr, 0xb9840000)
542 
543 /* LOAD LOGICAL HALFWORD */
544 SLJIT_S390X_RRE(llhr,  0xb9950000)
545 SLJIT_S390X_RRE(llghr, 0xb9850000)
546 
547 /* MULTIPLY LOGICAL */
548 SLJIT_S390X_RRE(mlgr,  0xb9860000)
549 
550 /* MULTIPLY SINGLE */
551 SLJIT_S390X_RRE(msgfr, 0xb91c0000)
552 
553 /* OR */
554 SLJIT_S390X_RRE(ogr,   0xb9810000)
555 
556 /* SUBTRACT */
557 SLJIT_S390X_RRE(sgr,   0xb9090000)
558 
559 #undef SLJIT_S390X_RRE
560 
561 /* RI-a form instructions */
562 #define SLJIT_S390X_RIA(name, pattern, imm_type) \
563 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \
564 { \
565 	return (pattern) | R20A(reg) | (imm & 0xffff); \
566 }
567 
568 /* ADD HALFWORD IMMEDIATE */
569 SLJIT_S390X_RIA(aghi,  0xa70b0000, sljit_s16)
570 
571 /* LOAD HALFWORD IMMEDIATE */
572 SLJIT_S390X_RIA(lhi,   0xa7080000, sljit_s16)
573 SLJIT_S390X_RIA(lghi,  0xa7090000, sljit_s16)
574 
575 /* LOAD LOGICAL IMMEDIATE */
576 SLJIT_S390X_RIA(llihh, 0xa50c0000, sljit_u16)
577 SLJIT_S390X_RIA(llihl, 0xa50d0000, sljit_u16)
578 SLJIT_S390X_RIA(llilh, 0xa50e0000, sljit_u16)
579 SLJIT_S390X_RIA(llill, 0xa50f0000, sljit_u16)
580 
581 /* MULTIPLY HALFWORD IMMEDIATE */
582 SLJIT_S390X_RIA(mhi,   0xa70c0000, sljit_s16)
583 SLJIT_S390X_RIA(mghi,  0xa70d0000, sljit_s16)
584 
585 /* OR IMMEDIATE */
586 SLJIT_S390X_RIA(oilh,  0xa50a0000, sljit_u16)
587 
588 #undef SLJIT_S390X_RIA
589 
590 /* RIL-a form instructions (requires extended immediate facility) */
591 #define SLJIT_S390X_RILA(name, pattern, imm_type) \
592 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \
593 { \
594 	SLJIT_ASSERT(have_eimm()); \
595 	return (pattern) | R36A(reg) | ((sljit_ins)imm & 0xffffffffu); \
596 }
597 
598 /* ADD IMMEDIATE */
599 SLJIT_S390X_RILA(agfi,  0xc20800000000, sljit_s32)
600 
601 /* ADD IMMEDIATE HIGH */
602 SLJIT_S390X_RILA(aih,   0xcc0800000000, sljit_s32) /* TODO(mundaym): high-word facility? */
603 
604 /* AND IMMEDIATE */
605 SLJIT_S390X_RILA(nihf,  0xc00a00000000, sljit_u32)
606 
607 /* EXCLUSIVE OR IMMEDIATE */
608 SLJIT_S390X_RILA(xilf,  0xc00700000000, sljit_u32)
609 
610 /* INSERT IMMEDIATE */
611 SLJIT_S390X_RILA(iihf,  0xc00800000000, sljit_u32)
612 SLJIT_S390X_RILA(iilf,  0xc00900000000, sljit_u32)
613 
614 /* LOAD IMMEDIATE */
615 SLJIT_S390X_RILA(lgfi,  0xc00100000000, sljit_s32)
616 
617 /* LOAD LOGICAL IMMEDIATE */
618 SLJIT_S390X_RILA(llihf, 0xc00e00000000, sljit_u32)
619 SLJIT_S390X_RILA(llilf, 0xc00f00000000, sljit_u32)
620 
621 /* SUBTRACT LOGICAL IMMEDIATE */
622 SLJIT_S390X_RILA(slfi,  0xc20500000000, sljit_u32)
623 
624 #undef SLJIT_S390X_RILA
625 
626 /* RX-a form instructions */
627 #define SLJIT_S390X_RXA(name, pattern) \
628 SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \
629 { \
630 	SLJIT_ASSERT((d & 0xfff) == d); \
631 \
632 	return (pattern) | R20A(r) | R16A(x) | R12A(b) | (sljit_ins)(d & 0xfff); \
633 }
634 
635 /* LOAD */
636 SLJIT_S390X_RXA(l,   0x58000000)
637 
638 /* LOAD ADDRESS */
639 SLJIT_S390X_RXA(la,  0x41000000)
640 
641 /* LOAD HALFWORD */
642 SLJIT_S390X_RXA(lh,  0x48000000)
643 
644 /* MULTIPLY SINGLE */
645 SLJIT_S390X_RXA(ms,  0x71000000)
646 
647 /* STORE */
648 SLJIT_S390X_RXA(st,  0x50000000)
649 
650 /* STORE CHARACTER */
651 SLJIT_S390X_RXA(stc, 0x42000000)
652 
653 /* STORE HALFWORD */
654 SLJIT_S390X_RXA(sth, 0x40000000)
655 
656 #undef SLJIT_S390X_RXA
657 
658 /* RXY-a instructions */
659 #define SLJIT_S390X_RXYA(name, pattern, cond) \
660 SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \
661 { \
662 	SLJIT_ASSERT(cond); \
663 \
664 	return (pattern) | R36A(r) | R32A(x) | R28A(b) | disp_s20(d); \
665 }
666 
667 /* LOAD */
668 SLJIT_S390X_RXYA(ly,    0xe30000000058, have_ldisp())
669 SLJIT_S390X_RXYA(lg,    0xe30000000004, 1)
670 SLJIT_S390X_RXYA(lgf,   0xe30000000014, 1)
671 
672 /* LOAD BYTE */
673 SLJIT_S390X_RXYA(lb,    0xe30000000076, have_ldisp())
674 SLJIT_S390X_RXYA(lgb,   0xe30000000077, have_ldisp())
675 
676 /* LOAD HALFWORD */
677 SLJIT_S390X_RXYA(lhy,   0xe30000000078, have_ldisp())
678 SLJIT_S390X_RXYA(lgh,   0xe30000000015, 1)
679 
680 /* LOAD LOGICAL */
681 SLJIT_S390X_RXYA(llgf,  0xe30000000016, 1)
682 
683 /* LOAD LOGICAL CHARACTER */
684 SLJIT_S390X_RXYA(llc,   0xe30000000094, have_eimm())
685 SLJIT_S390X_RXYA(llgc,  0xe30000000090, 1)
686 
687 /* LOAD LOGICAL HALFWORD */
688 SLJIT_S390X_RXYA(llh,   0xe30000000095, have_eimm())
689 SLJIT_S390X_RXYA(llgh,  0xe30000000091, 1)
690 
691 /* MULTIPLY SINGLE */
692 SLJIT_S390X_RXYA(msy,   0xe30000000051, have_ldisp())
693 SLJIT_S390X_RXYA(msg,   0xe3000000000c, 1)
694 
695 /* STORE */
696 SLJIT_S390X_RXYA(sty,   0xe30000000050, have_ldisp())
697 SLJIT_S390X_RXYA(stg,   0xe30000000024, 1)
698 
699 /* STORE CHARACTER */
700 SLJIT_S390X_RXYA(stcy,  0xe30000000072, have_ldisp())
701 
702 /* STORE HALFWORD */
703 SLJIT_S390X_RXYA(sthy,  0xe30000000070, have_ldisp())
704 
705 #undef SLJIT_S390X_RXYA
706 
707 /* RSY-a instructions */
708 #define SLJIT_S390X_RSYA(name, pattern, cond) \
709 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_s32 d, sljit_gpr b) \
710 { \
711 	SLJIT_ASSERT(cond); \
712 \
713 	return (pattern) | R36A(dst) | R32A(src) | R28A(b) | disp_s20(d); \
714 }
715 
716 /* LOAD MULTIPLE */
717 SLJIT_S390X_RSYA(lmg,   0xeb0000000004, 1)
718 
719 /* SHIFT LEFT LOGICAL */
720 SLJIT_S390X_RSYA(sllg,  0xeb000000000d, 1)
721 
722 /* SHIFT RIGHT SINGLE */
723 SLJIT_S390X_RSYA(srag,  0xeb000000000a, 1)
724 
725 /* STORE MULTIPLE */
726 SLJIT_S390X_RSYA(stmg,  0xeb0000000024, 1)
727 
728 #undef SLJIT_S390X_RSYA
729 
730 /* RIE-f instructions (require general-instructions-extension facility) */
731 #define SLJIT_S390X_RIEF(name, pattern) \
732 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot) \
733 { \
734 	sljit_ins i3, i4, i5; \
735 \
736 	SLJIT_ASSERT(have_genext()); \
737 	i3 = (sljit_ins)start << 24; \
738 	i4 = (sljit_ins)end << 16; \
739 	i5 = (sljit_ins)rot << 8; \
740 \
741 	return (pattern) | R36A(dst & 0xf) | R32A(src & 0xf) | i3 | i4 | i5; \
742 }
743 
744 /* ROTATE THEN AND SELECTED BITS */
745 /* SLJIT_S390X_RIEF(rnsbg,  0xec0000000054) */
746 
747 /* ROTATE THEN EXCLUSIVE OR SELECTED BITS */
748 /* SLJIT_S390X_RIEF(rxsbg,  0xec0000000057) */
749 
750 /* ROTATE THEN OR SELECTED BITS */
751 SLJIT_S390X_RIEF(rosbg,  0xec0000000056)
752 
753 /* ROTATE THEN INSERT SELECTED BITS */
754 /* SLJIT_S390X_RIEF(risbg,  0xec0000000055) */
755 /* SLJIT_S390X_RIEF(risbgn, 0xec0000000059) */
756 
757 /* ROTATE THEN INSERT SELECTED BITS HIGH */
758 SLJIT_S390X_RIEF(risbhg, 0xec000000005d)
759 
760 /* ROTATE THEN INSERT SELECTED BITS LOW */
761 /* SLJIT_S390X_RIEF(risblg, 0xec0000000051) */
762 
763 #undef SLJIT_S390X_RIEF
764 
765 /* RRF-c instructions (require load/store-on-condition 1 facility) */
766 #define SLJIT_S390X_RRFC(name, pattern) \
767 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_uw mask) \
768 { \
769 	sljit_ins m3; \
770 \
771 	SLJIT_ASSERT(have_lscond1()); \
772 	m3 = (sljit_ins)(mask & 0xf) << 12; \
773 \
774 	return (pattern) | m3 | R4A(dst) | R0A(src); \
775 }
776 
777 /* LOAD HALFWORD IMMEDIATE ON CONDITION */
778 SLJIT_S390X_RRFC(locr,  0xb9f20000)
779 SLJIT_S390X_RRFC(locgr, 0xb9e20000)
780 
781 #undef SLJIT_S390X_RRFC
782 
783 /* RIE-g instructions (require load/store-on-condition 2 facility) */
784 #define SLJIT_S390X_RIEG(name, pattern) \
785 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw imm, sljit_uw mask) \
786 { \
787 	sljit_ins m3, i2; \
788 \
789 	SLJIT_ASSERT(have_lscond2()); \
790 	m3 = (sljit_ins)(mask & 0xf) << 32; \
791 	i2 = (sljit_ins)(imm & 0xffffL) << 16; \
792 \
793 	return (pattern) | R36A(reg) | m3 | i2; \
794 }
795 
796 /* LOAD HALFWORD IMMEDIATE ON CONDITION */
797 SLJIT_S390X_RIEG(lochi,  0xec0000000042)
798 SLJIT_S390X_RIEG(locghi, 0xec0000000046)
799 
800 #undef SLJIT_S390X_RIEG
801 
802 #define SLJIT_S390X_RILB(name, pattern, cond) \
803 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw ri) \
804 { \
805 	SLJIT_ASSERT(cond); \
806 \
807 	return (pattern) | R36A(reg) | (sljit_ins)(ri & 0xffffffff); \
808 }
809 
810 /* BRANCH RELATIVE AND SAVE LONG */
811 SLJIT_S390X_RILB(brasl, 0xc00500000000, 1)
812 
813 /* LOAD ADDRESS RELATIVE LONG */
814 SLJIT_S390X_RILB(larl,  0xc00000000000, 1)
815 
816 /* LOAD RELATIVE LONG */
817 SLJIT_S390X_RILB(lgrl,  0xc40800000000, have_genext())
818 
819 #undef SLJIT_S390X_RILB
820 
SLJIT_S390X_INSTRUCTION(br,sljit_gpr target)821 SLJIT_S390X_INSTRUCTION(br, sljit_gpr target)
822 {
823 	return 0x07f0 | target;
824 }
825 
SLJIT_S390X_INSTRUCTION(brc,sljit_uw mask,sljit_sw target)826 SLJIT_S390X_INSTRUCTION(brc, sljit_uw mask, sljit_sw target)
827 {
828 	sljit_ins m1 = (sljit_ins)(mask & 0xf) << 20;
829 	sljit_ins ri2 = (sljit_ins)target & 0xffff;
830 	return 0xa7040000L | m1 | ri2;
831 }
832 
SLJIT_S390X_INSTRUCTION(brcl,sljit_uw mask,sljit_sw target)833 SLJIT_S390X_INSTRUCTION(brcl, sljit_uw mask, sljit_sw target)
834 {
835 	sljit_ins m1 = (sljit_ins)(mask & 0xf) << 36;
836 	sljit_ins ri2 = (sljit_ins)target & 0xffffffff;
837 	return 0xc00400000000L | m1 | ri2;
838 }
839 
SLJIT_S390X_INSTRUCTION(flogr,sljit_gpr dst,sljit_gpr src)840 SLJIT_S390X_INSTRUCTION(flogr, sljit_gpr dst, sljit_gpr src)
841 {
842 	SLJIT_ASSERT(have_eimm());
843 	return 0xb9830000 | R8A(dst) | R0A(src);
844 }
845 
846 /* INSERT PROGRAM MASK */
SLJIT_S390X_INSTRUCTION(ipm,sljit_gpr dst)847 SLJIT_S390X_INSTRUCTION(ipm, sljit_gpr dst)
848 {
849 	return 0xb2220000 | R4A(dst);
850 }
851 
852 /* SET PROGRAM MASK */
SLJIT_S390X_INSTRUCTION(spm,sljit_gpr dst)853 SLJIT_S390X_INSTRUCTION(spm, sljit_gpr dst)
854 {
855 	return 0x0400 | R4A(dst);
856 }
857 
858 /* ROTATE THEN INSERT SELECTED BITS HIGH (ZERO) */
SLJIT_S390X_INSTRUCTION(risbhgz,sljit_gpr dst,sljit_gpr src,sljit_u8 start,sljit_u8 end,sljit_u8 rot)859 SLJIT_S390X_INSTRUCTION(risbhgz, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot)
860 {
861 	return risbhg(dst, src, start, 0x8 | end, rot);
862 }
863 
864 #undef SLJIT_S390X_INSTRUCTION
865 
update_zero_overflow(struct sljit_compiler * compiler,sljit_s32 op,sljit_gpr dst_r)866 static sljit_s32 update_zero_overflow(struct sljit_compiler *compiler, sljit_s32 op, sljit_gpr dst_r)
867 {
868 	/* Condition codes: bits 18 and 19.
869 	   Transformation:
870 	     0 (zero and no overflow) : unchanged
871 	     1 (non-zero and no overflow) : unchanged
872 	     2 (zero and overflow) : decreased by 1
873 	     3 (non-zero and overflow) : decreased by 1 if non-zero */
874 	FAIL_IF(push_inst(compiler, brc(0xc, 2 + 2 + ((op & SLJIT_32) ? 1 : 2) + 2 + 3 + 1)));
875 	FAIL_IF(push_inst(compiler, ipm(tmp1)));
876 	FAIL_IF(push_inst(compiler, (op & SLJIT_32) ? or(dst_r, dst_r) : ogr(dst_r, dst_r)));
877 	FAIL_IF(push_inst(compiler, brc(0x8, 2 + 3)));
878 	FAIL_IF(push_inst(compiler, slfi(tmp1, 0x10000000)));
879 	FAIL_IF(push_inst(compiler, spm(tmp1)));
880 	return SLJIT_SUCCESS;
881 }
882 
883 /* load 64-bit immediate into register without clobbering flags */
push_load_imm_inst(struct sljit_compiler * compiler,sljit_gpr target,sljit_sw v)884 static sljit_s32 push_load_imm_inst(struct sljit_compiler *compiler, sljit_gpr target, sljit_sw v)
885 {
886 	/* 4 byte instructions */
887 	if (is_s16(v))
888 		return push_inst(compiler, lghi(target, (sljit_s16)v));
889 
890 	if (((sljit_uw)v & ~(sljit_uw)0x000000000000ffff) == 0)
891 		return push_inst(compiler, llill(target, (sljit_u16)v));
892 
893 	if (((sljit_uw)v & ~(sljit_uw)0x00000000ffff0000) == 0)
894 		return push_inst(compiler, llilh(target, (sljit_u16)(v >> 16)));
895 
896 	if (((sljit_uw)v & ~(sljit_uw)0x0000ffff00000000) == 0)
897 		return push_inst(compiler, llihl(target, (sljit_u16)(v >> 32)));
898 
899 	if (((sljit_uw)v & ~(sljit_uw)0xffff000000000000) == 0)
900 		return push_inst(compiler, llihh(target, (sljit_u16)(v >> 48)));
901 
902 	/* 6 byte instructions (requires extended immediate facility) */
903 	if (have_eimm()) {
904 		if (is_s32(v))
905 			return push_inst(compiler, lgfi(target, (sljit_s32)v));
906 
907 		if (((sljit_uw)v >> 32) == 0)
908 			return push_inst(compiler, llilf(target, (sljit_u32)v));
909 
910 		if (((sljit_uw)v << 32) == 0)
911 			return push_inst(compiler, llihf(target, (sljit_u32)((sljit_uw)v >> 32)));
912 
913 		FAIL_IF(push_inst(compiler, llilf(target, (sljit_u32)v)));
914 		return push_inst(compiler, iihf(target, (sljit_u32)(v >> 32)));
915 	}
916 
917 	/* TODO(mundaym): instruction sequences that don't use extended immediates */
918 	abort();
919 }
920 
921 struct addr {
922 	sljit_gpr base;
923 	sljit_gpr index;
924 	sljit_s32 offset;
925 };
926 
927 /* transform memory operand into D(X,B) form with a signed 20-bit offset */
make_addr_bxy(struct sljit_compiler * compiler,struct addr * addr,sljit_s32 mem,sljit_sw off,sljit_gpr tmp)928 static sljit_s32 make_addr_bxy(struct sljit_compiler *compiler,
929 	struct addr *addr, sljit_s32 mem, sljit_sw off,
930 	sljit_gpr tmp /* clobbered, must not be r0 */)
931 {
932 	sljit_gpr base = r0;
933 	sljit_gpr index = r0;
934 
935 	SLJIT_ASSERT(tmp != r0);
936 	if (mem & REG_MASK)
937 		base = gpr(mem & REG_MASK);
938 
939 	if (mem & OFFS_REG_MASK) {
940 		index = gpr(OFFS_REG(mem));
941 		if (off != 0) {
942 			/* shift and put the result into tmp */
943 			SLJIT_ASSERT(0 <= off && off < 64);
944 			FAIL_IF(push_inst(compiler, sllg(tmp, index, (sljit_s32)off, 0)));
945 			index = tmp;
946 			off = 0; /* clear offset */
947 		}
948 	}
949 	else if (!is_s20(off)) {
950 		FAIL_IF(push_load_imm_inst(compiler, tmp, off));
951 		index = tmp;
952 		off = 0; /* clear offset */
953 	}
954 	addr->base = base;
955 	addr->index = index;
956 	addr->offset = (sljit_s32)off;
957 	return SLJIT_SUCCESS;
958 }
959 
960 /* transform memory operand into D(X,B) form with an unsigned 12-bit offset */
make_addr_bx(struct sljit_compiler * compiler,struct addr * addr,sljit_s32 mem,sljit_sw off,sljit_gpr tmp)961 static sljit_s32 make_addr_bx(struct sljit_compiler *compiler,
962 	struct addr *addr, sljit_s32 mem, sljit_sw off,
963 	sljit_gpr tmp /* clobbered, must not be r0 */)
964 {
965 	sljit_gpr base = r0;
966 	sljit_gpr index = r0;
967 
968 	SLJIT_ASSERT(tmp != r0);
969 	if (mem & REG_MASK)
970 		base = gpr(mem & REG_MASK);
971 
972 	if (mem & OFFS_REG_MASK) {
973 		index = gpr(OFFS_REG(mem));
974 		if (off != 0) {
975 			/* shift and put the result into tmp */
976 			SLJIT_ASSERT(0 <= off && off < 64);
977 			FAIL_IF(push_inst(compiler, sllg(tmp, index, (sljit_s32)off, 0)));
978 			index = tmp;
979 			off = 0; /* clear offset */
980 		}
981 	}
982 	else if (!is_u12(off)) {
983 		FAIL_IF(push_load_imm_inst(compiler, tmp, off));
984 		index = tmp;
985 		off = 0; /* clear offset */
986 	}
987 	addr->base = base;
988 	addr->index = index;
989 	addr->offset = (sljit_s32)off;
990 	return SLJIT_SUCCESS;
991 }
992 
993 #define EVAL(op, r, addr) op(r, addr.offset, addr.index, addr.base)
994 #define WHEN(cond, r, i1, i2, addr) \
995 	(cond) ? EVAL(i1, r, addr) : EVAL(i2, r, addr)
996 
997 /* May clobber tmp1. */
load_word(struct sljit_compiler * compiler,sljit_gpr dst_r,sljit_s32 src,sljit_sw srcw,sljit_s32 is_32bit)998 static sljit_s32 load_word(struct sljit_compiler *compiler, sljit_gpr dst_r,
999 		sljit_s32 src, sljit_sw srcw,
1000 		sljit_s32 is_32bit)
1001 {
1002 	struct addr addr;
1003 	sljit_ins ins;
1004 
1005 	SLJIT_ASSERT(src & SLJIT_MEM);
1006 
1007 	if (is_32bit && ((src & OFFS_REG_MASK) || is_u12(srcw) || !is_s20(srcw))) {
1008 		FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));
1009 		return push_inst(compiler, 0x58000000 /* l */ | R20A(dst_r) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset);
1010 	}
1011 
1012 	FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
1013 
1014 	ins = is_32bit ? 0xe30000000058 /* ly */ : 0xe30000000004 /* lg */;
1015 	return push_inst(compiler, ins | R36A(dst_r) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
1016 }
1017 
1018 /* May clobber tmp1. */
load_unsigned_word(struct sljit_compiler * compiler,sljit_gpr dst_r,sljit_s32 src,sljit_sw srcw,sljit_s32 is_32bit)1019 static sljit_s32 load_unsigned_word(struct sljit_compiler *compiler, sljit_gpr dst_r,
1020 		sljit_s32 src, sljit_sw srcw,
1021 		sljit_s32 is_32bit)
1022 {
1023 	struct addr addr;
1024 	sljit_ins ins;
1025 
1026 	SLJIT_ASSERT(src & SLJIT_MEM);
1027 
1028 	FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
1029 
1030 	ins = is_32bit ? 0xe30000000016 /* llgf */ : 0xe30000000004 /* lg */;
1031 	return push_inst(compiler, ins | R36A(dst_r) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
1032 }
1033 
1034 /* May clobber tmp1. */
store_word(struct sljit_compiler * compiler,sljit_gpr src_r,sljit_s32 dst,sljit_sw dstw,sljit_s32 is_32bit)1035 static sljit_s32 store_word(struct sljit_compiler *compiler, sljit_gpr src_r,
1036 		sljit_s32 dst, sljit_sw dstw,
1037 		sljit_s32 is_32bit)
1038 {
1039 	struct addr addr;
1040 	sljit_ins ins;
1041 
1042 	SLJIT_ASSERT(dst & SLJIT_MEM);
1043 
1044 	if (is_32bit && ((dst & OFFS_REG_MASK) || is_u12(dstw) || !is_s20(dstw))) {
1045 		FAIL_IF(make_addr_bx(compiler, &addr, dst, dstw, tmp1));
1046 		return push_inst(compiler, 0x50000000 /* st */ | R20A(src_r) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset);
1047 	}
1048 
1049 	FAIL_IF(make_addr_bxy(compiler, &addr, dst, dstw, tmp1));
1050 
1051 	ins = is_32bit ? 0xe30000000050 /* sty */ : 0xe30000000024 /* stg */;
1052 	return push_inst(compiler, ins | R36A(src_r) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
1053 }
1054 
1055 #undef WHEN
1056 
emit_move(struct sljit_compiler * compiler,sljit_gpr dst_r,sljit_s32 src,sljit_sw srcw)1057 static sljit_s32 emit_move(struct sljit_compiler *compiler,
1058 	sljit_gpr dst_r,
1059 	sljit_s32 src, sljit_sw srcw)
1060 {
1061 	SLJIT_ASSERT(!IS_GPR_REG(src) || dst_r != gpr(src & REG_MASK));
1062 
1063 	if (src & SLJIT_IMM)
1064 		return push_load_imm_inst(compiler, dst_r, srcw);
1065 
1066 	if (src & SLJIT_MEM)
1067 		return load_word(compiler, dst_r, src, srcw, (compiler->mode & SLJIT_32) != 0);
1068 
1069 	sljit_gpr src_r = gpr(src & REG_MASK);
1070 	return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, src_r) : lgr(dst_r, src_r));
1071 }
1072 
emit_rr(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1073 static sljit_s32 emit_rr(struct sljit_compiler *compiler, sljit_ins ins,
1074 	sljit_s32 dst,
1075 	sljit_s32 src1, sljit_sw src1w,
1076 	sljit_s32 src2, sljit_sw src2w)
1077 {
1078 	sljit_gpr dst_r = tmp0;
1079 	sljit_gpr src_r = tmp1;
1080 	sljit_s32 needs_move = 1;
1081 
1082 	if (FAST_IS_REG(dst)) {
1083 		dst_r = gpr(dst);
1084 
1085 		if (dst == src1)
1086 			needs_move = 0;
1087 		else if (dst == src2) {
1088 			dst_r = tmp0;
1089 			needs_move = 2;
1090 		}
1091 	}
1092 
1093 	if (needs_move)
1094 		FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
1095 
1096 	if (FAST_IS_REG(src2))
1097 		src_r = gpr(src2);
1098 	else
1099 		FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
1100 
1101 	FAIL_IF(push_inst(compiler, ins | R4A(dst_r) | R0A(src_r)));
1102 
1103 	if (needs_move != 2)
1104 		return SLJIT_SUCCESS;
1105 
1106 	dst_r = gpr(dst & REG_MASK);
1107 	return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0));
1108 }
1109 
emit_rr1(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w)1110 static sljit_s32 emit_rr1(struct sljit_compiler *compiler, sljit_ins ins,
1111 	sljit_s32 dst,
1112 	sljit_s32 src1, sljit_sw src1w)
1113 {
1114 	sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
1115 	sljit_gpr src_r = tmp1;
1116 
1117 	if (FAST_IS_REG(src1))
1118 		src_r = gpr(src1);
1119 	else
1120 		FAIL_IF(emit_move(compiler, tmp1, src1, src1w));
1121 
1122 	return push_inst(compiler, ins | R4A(dst_r) | R0A(src_r));
1123 }
1124 
emit_rrf(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1125 static sljit_s32 emit_rrf(struct sljit_compiler *compiler, sljit_ins ins,
1126 	sljit_s32 dst,
1127 	sljit_s32 src1, sljit_sw src1w,
1128 	sljit_s32 src2, sljit_sw src2w)
1129 {
1130 	sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
1131 	sljit_gpr src1_r = tmp0;
1132 	sljit_gpr src2_r = tmp1;
1133 
1134 	if (FAST_IS_REG(src1))
1135 		src1_r = gpr(src1);
1136 	else
1137 		FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
1138 
1139 	if (FAST_IS_REG(src2))
1140 		src2_r = gpr(src2);
1141 	else
1142 		FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
1143 
1144 	return push_inst(compiler, ins | R4A(dst_r) | R0A(src1_r) | R12A(src2_r));
1145 }
1146 
1147 typedef enum {
1148 	RI_A,
1149 	RIL_A,
1150 } emit_ril_type;
1151 
emit_ri(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_sw src2w,emit_ril_type type)1152 static sljit_s32 emit_ri(struct sljit_compiler *compiler, sljit_ins ins,
1153 	sljit_s32 dst,
1154 	sljit_s32 src1, sljit_sw src1w,
1155 	sljit_sw src2w,
1156 	emit_ril_type type)
1157 {
1158 	sljit_gpr dst_r = tmp0;
1159 	sljit_s32 needs_move = 1;
1160 
1161 	if (FAST_IS_REG(dst)) {
1162 		dst_r = gpr(dst);
1163 
1164 		if (dst == src1)
1165 			needs_move = 0;
1166 	}
1167 
1168 	if (needs_move)
1169 		FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
1170 
1171 	if (type == RIL_A)
1172 		return push_inst(compiler, ins | R36A(dst_r) | (src2w & 0xffffffff));
1173 	return push_inst(compiler, ins | R20A(dst_r) | (src2w & 0xffff));
1174 }
1175 
emit_rie_d(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_sw src2w)1176 static sljit_s32 emit_rie_d(struct sljit_compiler *compiler, sljit_ins ins,
1177 	sljit_s32 dst,
1178 	sljit_s32 src1, sljit_sw src1w,
1179 	sljit_sw src2w)
1180 {
1181 	sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
1182 	sljit_gpr src_r = tmp0;
1183 
1184 	if (!FAST_IS_REG(src1))
1185 		FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
1186 	else
1187 		src_r = gpr(src1 & REG_MASK);
1188 
1189 	return push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | (sljit_ins)(src2w & 0xffff) << 16);
1190 }
1191 
1192 typedef enum {
1193 	RX_A,
1194 	RXY_A,
1195 } emit_rx_type;
1196 
emit_rx(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w,emit_rx_type type)1197 static sljit_s32 emit_rx(struct sljit_compiler *compiler, sljit_ins ins,
1198 	sljit_s32 dst,
1199 	sljit_s32 src1, sljit_sw src1w,
1200 	sljit_s32 src2, sljit_sw src2w,
1201 	emit_rx_type type)
1202 {
1203 	sljit_gpr dst_r = tmp0;
1204 	sljit_s32 needs_move = 1;
1205 	sljit_gpr base, index;
1206 
1207 	SLJIT_ASSERT(src2 & SLJIT_MEM);
1208 
1209 	if (FAST_IS_REG(dst)) {
1210 		dst_r = gpr(dst);
1211 
1212 		if (dst == src1)
1213 			needs_move = 0;
1214 		else if (dst == (src2 & REG_MASK) || (dst == OFFS_REG(src2))) {
1215 			dst_r = tmp0;
1216 			needs_move = 2;
1217 		}
1218 	}
1219 
1220 	if (needs_move)
1221 		FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
1222 
1223 	base = gpr(src2 & REG_MASK);
1224 	index = tmp0;
1225 
1226 	if (src2 & OFFS_REG_MASK) {
1227 		index = gpr(OFFS_REG(src2));
1228 
1229 		if (src2w != 0) {
1230 			FAIL_IF(push_inst(compiler, sllg(tmp1, index, src2w & 0x3, 0)));
1231 			src2w = 0;
1232 			index = tmp1;
1233 		}
1234 	} else if ((type == RX_A && !is_u12(src2w)) || (type == RXY_A && !is_s20(src2w))) {
1235 		FAIL_IF(push_load_imm_inst(compiler, tmp1, src2w));
1236 
1237 		if (src2 & REG_MASK)
1238 			index = tmp1;
1239 		else
1240 			base = tmp1;
1241 		src2w = 0;
1242 	}
1243 
1244 	if (type == RX_A)
1245 		ins |= R20A(dst_r) | R16A(index) | R12A(base) | (sljit_ins)src2w;
1246 	else
1247 		ins |= R36A(dst_r) | R32A(index) | R28A(base) | disp_s20((sljit_s32)src2w);
1248 
1249 	FAIL_IF(push_inst(compiler, ins));
1250 
1251 	if (needs_move != 2)
1252 		return SLJIT_SUCCESS;
1253 
1254 	dst_r = gpr(dst);
1255 	return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0));
1256 }
1257 
emit_siy(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_sw dstw,sljit_sw srcw)1258 static sljit_s32 emit_siy(struct sljit_compiler *compiler, sljit_ins ins,
1259 	sljit_s32 dst, sljit_sw dstw,
1260 	sljit_sw srcw)
1261 {
1262 	SLJIT_ASSERT(dst & SLJIT_MEM);
1263 
1264 	sljit_gpr dst_r = tmp1;
1265 
1266 	if (dst & OFFS_REG_MASK) {
1267 		sljit_gpr index = tmp1;
1268 
1269 		if ((dstw & 0x3) == 0)
1270 			index = gpr(OFFS_REG(dst));
1271 		else
1272 			FAIL_IF(push_inst(compiler, sllg(tmp1, index, dstw & 0x3, 0)));
1273 
1274 		FAIL_IF(push_inst(compiler, la(tmp1, 0, dst_r, index)));
1275 		dstw = 0;
1276 	}
1277 	else if (!is_s20(dstw)) {
1278 		FAIL_IF(push_load_imm_inst(compiler, tmp1, dstw));
1279 
1280 		if (dst & REG_MASK)
1281 			FAIL_IF(push_inst(compiler, la(tmp1, 0, dst_r, tmp1)));
1282 
1283 		dstw = 0;
1284 	}
1285 	else
1286 		dst_r = gpr(dst & REG_MASK);
1287 
1288 	return push_inst(compiler, ins | ((sljit_ins)(srcw & 0xff) << 32) | R28A(dst_r) | disp_s20((sljit_s32)dstw));
1289 }
1290 
1291 struct ins_forms {
1292 	sljit_ins op_r;
1293 	sljit_ins op_gr;
1294 	sljit_ins op_rk;
1295 	sljit_ins op_grk;
1296 	sljit_ins op;
1297 	sljit_ins op_y;
1298 	sljit_ins op_g;
1299 };
1300 
emit_commutative(struct sljit_compiler * compiler,const struct ins_forms * forms,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1301 static sljit_s32 emit_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms,
1302 	sljit_s32 dst,
1303 	sljit_s32 src1, sljit_sw src1w,
1304 	sljit_s32 src2, sljit_sw src2w)
1305 {
1306 	sljit_s32 mode = compiler->mode;
1307 	sljit_ins ins, ins_k;
1308 
1309 	if ((src1 | src2) & SLJIT_MEM) {
1310 		sljit_ins ins12, ins20;
1311 
1312 		if (mode & SLJIT_32) {
1313 			ins12 = forms->op;
1314 			ins20 = forms->op_y;
1315 		}
1316 		else {
1317 			ins12 = 0;
1318 			ins20 = forms->op_g;
1319 		}
1320 
1321 		if (ins12 && ins20) {
1322 			/* Extra instructions needed for address computation can be executed independently. */
1323 			if ((src2 & SLJIT_MEM) && (!(src1 & SLJIT_MEM)
1324 					|| ((src1 & OFFS_REG_MASK) ? (src1w & 0x3) == 0 : is_s20(src1w)))) {
1325 				if ((src2 & OFFS_REG_MASK) || is_u12(src2w) || !is_s20(src2w))
1326 					return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);
1327 
1328 				return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);
1329 			}
1330 
1331 			if (src1 & SLJIT_MEM) {
1332 				if ((src1 & OFFS_REG_MASK) || is_u12(src1w) || !is_s20(src1w))
1333 					return emit_rx(compiler, ins12, dst, src2, src2w, src1, src1w, RX_A);
1334 
1335 				return emit_rx(compiler, ins20, dst, src2, src2w, src1, src1w, RXY_A);
1336 			}
1337 		}
1338 		else if (ins12 || ins20) {
1339 			emit_rx_type rx_type;
1340 
1341 			if (ins12) {
1342 				rx_type = RX_A;
1343 				ins = ins12;
1344 			}
1345 			else {
1346 				rx_type = RXY_A;
1347 				ins = ins20;
1348 			}
1349 
1350 			if ((src2 & SLJIT_MEM) && (!(src1 & SLJIT_MEM)
1351 					|| ((src1 & OFFS_REG_MASK) ? (src1w & 0x3) == 0 : (rx_type == RX_A ? is_u12(src1w) : is_s20(src1w)))))
1352 				return emit_rx(compiler, ins, dst, src1, src1w, src2, src2w, rx_type);
1353 
1354 			if (src1 & SLJIT_MEM)
1355 				return emit_rx(compiler, ins, dst, src2, src2w, src1, src1w, rx_type);
1356 		}
1357 	}
1358 
1359 	if (mode & SLJIT_32) {
1360 		ins = forms->op_r;
1361 		ins_k = forms->op_rk;
1362 	}
1363 	else {
1364 		ins = forms->op_gr;
1365 		ins_k = forms->op_grk;
1366 	}
1367 
1368 	SLJIT_ASSERT(ins != 0 || ins_k != 0);
1369 
1370 	if (ins && FAST_IS_REG(dst)) {
1371 		if (dst == src1)
1372 			return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w);
1373 
1374 		if (dst == src2)
1375 			return emit_rr(compiler, ins, dst, src2, src2w, src1, src1w);
1376 	}
1377 
1378 	if (ins_k == 0)
1379 		return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w);
1380 
1381 	return emit_rrf(compiler, ins_k, dst, src1, src1w, src2, src2w);
1382 }
1383 
emit_non_commutative(struct sljit_compiler * compiler,const struct ins_forms * forms,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1384 static sljit_s32 emit_non_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms,
1385 	sljit_s32 dst,
1386 	sljit_s32 src1, sljit_sw src1w,
1387 	sljit_s32 src2, sljit_sw src2w)
1388 {
1389 	sljit_s32 mode = compiler->mode;
1390 	sljit_ins ins;
1391 
1392 	if (src2 & SLJIT_MEM) {
1393 		sljit_ins ins12, ins20;
1394 
1395 		if (mode & SLJIT_32) {
1396 			ins12 = forms->op;
1397 			ins20 = forms->op_y;
1398 		}
1399 		else {
1400 			ins12 = 0;
1401 			ins20 = forms->op_g;
1402 		}
1403 
1404 		if (ins12 && ins20) {
1405 			if ((src2 & OFFS_REG_MASK) || is_u12(src2w) || !is_s20(src2w))
1406 				return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);
1407 
1408 			return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);
1409 		}
1410 		else if (ins12)
1411 			return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);
1412 		else if (ins20)
1413 			return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);
1414 	}
1415 
1416 	ins = (mode & SLJIT_32) ? forms->op_rk : forms->op_grk;
1417 
1418 	if (ins == 0 || (FAST_IS_REG(dst) && dst == src1))
1419 		return emit_rr(compiler, (mode & SLJIT_32) ? forms->op_r : forms->op_gr, dst, src1, src1w, src2, src2w);
1420 
1421 	return emit_rrf(compiler, ins, dst, src1, src1w, src2, src2w);
1422 }
1423 
sljit_generate_code(struct sljit_compiler * compiler)1424 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
1425 {
1426 	struct sljit_label *label;
1427 	struct sljit_jump *jump;
1428 	struct sljit_s390x_const *const_;
1429 	struct sljit_put_label *put_label;
1430 	sljit_sw executable_offset;
1431 	sljit_uw ins_size = 0; /* instructions */
1432 	sljit_uw pool_size = 0; /* literal pool */
1433 	sljit_uw pad_size;
1434 	sljit_uw i, j = 0;
1435 	struct sljit_memory_fragment *buf;
1436 	void *code, *code_ptr;
1437 	sljit_uw *pool, *pool_ptr;
1438 	sljit_sw source, offset; /* TODO(carenas): only need 32 bit */
1439 
1440 	CHECK_ERROR_PTR();
1441 	CHECK_PTR(check_sljit_generate_code(compiler));
1442 	reverse_buf(compiler);
1443 
1444 	/* branch handling */
1445 	label = compiler->labels;
1446 	jump = compiler->jumps;
1447 	put_label = compiler->put_labels;
1448 
1449 	/* TODO(carenas): compiler->executable_size could be calculated
1450          *                before to avoid the following loop (except for
1451          *                pool_size)
1452          */
1453 	/* calculate the size of the code */
1454 	for (buf = compiler->buf; buf != NULL; buf = buf->next) {
1455 		sljit_uw len = buf->used_size / sizeof(sljit_ins);
1456 		sljit_ins *ibuf = (sljit_ins *)buf->memory;
1457 		for (i = 0; i < len; ++i, ++j) {
1458 			sljit_ins ins = ibuf[i];
1459 
1460 			/* TODO(carenas): instruction tag vs size/addr == j
1461 			 * using instruction tags for const is creative
1462 			 * but unlike all other architectures, and is not
1463 			 * done consistently for all other objects.
1464 			 * This might need reviewing later.
1465 			 */
1466 			if (ins & sljit_ins_const) {
1467 				pool_size += sizeof(*pool);
1468 				ins &= ~sljit_ins_const;
1469 			}
1470 			if (label && label->size == j) {
1471 				label->size = ins_size;
1472 				label = label->next;
1473 			}
1474 			if (jump && jump->addr == j) {
1475 				if ((jump->flags & SLJIT_REWRITABLE_JUMP) || (jump->flags & JUMP_ADDR)) {
1476 					/* encoded: */
1477 					/*   brasl %r14, <rel_addr> (or brcl <mask>, <rel_addr>) */
1478 					/* replace with: */
1479 					/*   lgrl %r1, <pool_addr> */
1480 					/*   bras %r14, %r1 (or bcr <mask>, %r1) */
1481 					pool_size += sizeof(*pool);
1482 					ins_size += 2;
1483 				}
1484 				jump = jump->next;
1485 			}
1486 			if (put_label && put_label->addr == j) {
1487 				pool_size += sizeof(*pool);
1488 				put_label = put_label->next;
1489 			}
1490 			ins_size += sizeof_ins(ins);
1491 		}
1492 	}
1493 
1494 	/* emit trailing label */
1495 	if (label && label->size == j) {
1496 		label->size = ins_size;
1497 		label = label->next;
1498 	}
1499 
1500 	SLJIT_ASSERT(!label);
1501 	SLJIT_ASSERT(!jump);
1502 	SLJIT_ASSERT(!put_label);
1503 
1504 	/* pad code size to 8 bytes so is accessible with half word offsets */
1505 	/* the literal pool needs to be doubleword aligned */
1506 	pad_size = ((ins_size + 7UL) & ~7UL) - ins_size;
1507 	SLJIT_ASSERT(pad_size < 8UL);
1508 
1509 	/* allocate target buffer */
1510 	code = SLJIT_MALLOC_EXEC(ins_size + pad_size + pool_size,
1511 					compiler->exec_allocator_data);
1512 	PTR_FAIL_WITH_EXEC_IF(code);
1513 	code_ptr = code;
1514 	executable_offset = SLJIT_EXEC_OFFSET(code);
1515 
1516 	/* TODO(carenas): pool is optional, and the ABI recommends it to
1517          *                be created before the function code, instead of
1518          *                globally; if generated code is too big could
1519          *                need offsets bigger than 32bit words and asser()
1520          */
1521 	pool = (sljit_uw *)((sljit_uw)code + ins_size + pad_size);
1522 	pool_ptr = pool;
1523 	const_ = (struct sljit_s390x_const *)compiler->consts;
1524 
1525 	/* update label addresses */
1526 	label = compiler->labels;
1527 	while (label) {
1528 		label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(
1529 			(sljit_uw)code_ptr + label->size, executable_offset);
1530 		label = label->next;
1531 	}
1532 
1533 	/* reset jumps */
1534 	jump = compiler->jumps;
1535 	put_label = compiler->put_labels;
1536 
1537 	/* emit the code */
1538 	j = 0;
1539 	for (buf = compiler->buf; buf != NULL; buf = buf->next) {
1540 		sljit_uw len = buf->used_size / sizeof(sljit_ins);
1541 		sljit_ins *ibuf = (sljit_ins *)buf->memory;
1542 		for (i = 0; i < len; ++i, ++j) {
1543 			sljit_ins ins = ibuf[i];
1544 			if (ins & sljit_ins_const) {
1545 				/* clear the const tag */
1546 				ins &= ~sljit_ins_const;
1547 
1548 				/* update instruction with relative address of constant */
1549 				source = (sljit_sw)code_ptr;
1550 				offset = (sljit_sw)pool_ptr - source;
1551 
1552 				SLJIT_ASSERT(!(offset & 1));
1553 				offset >>= 1; /* halfword (not byte) offset */
1554 				SLJIT_ASSERT(is_s32(offset));
1555 
1556 				ins |= (sljit_ins)offset & 0xffffffff;
1557 
1558 				/* update address */
1559 				const_->const_.addr = (sljit_uw)pool_ptr;
1560 
1561 				/* store initial value into pool and update pool address */
1562 				*(pool_ptr++) = (sljit_uw)const_->init_value;
1563 
1564 				/* move to next constant */
1565 				const_ = (struct sljit_s390x_const *)const_->const_.next;
1566 			}
1567 			if (jump && jump->addr == j) {
1568 				sljit_sw target = (sljit_sw)((jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target);
1569 				if ((jump->flags & SLJIT_REWRITABLE_JUMP) || (jump->flags & JUMP_ADDR)) {
1570 					jump->addr = (sljit_uw)pool_ptr;
1571 
1572 					/* load address into tmp1 */
1573 					source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1574 					offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source;
1575 
1576 					SLJIT_ASSERT(!(offset & 1));
1577 					offset >>= 1;
1578 					SLJIT_ASSERT(is_s32(offset));
1579 
1580 					encode_inst(&code_ptr, lgrl(tmp1, offset & 0xffffffff));
1581 
1582 					/* store jump target into pool and update pool address */
1583 					*(pool_ptr++) = (sljit_uw)target;
1584 
1585 					/* branch to tmp1 */
1586 					sljit_ins op = (ins >> 32) & 0xf;
1587 					sljit_ins arg = (ins >> 36) & 0xf;
1588 					switch (op) {
1589 					case 4: /* brcl -> bcr */
1590 						ins = bcr(arg, tmp1);
1591 						break;
1592 					case 5: /* brasl -> basr */
1593 						ins = basr(arg, tmp1);
1594 						break;
1595 					default:
1596 						abort();
1597 					}
1598 				}
1599 				else {
1600 					jump->addr = (sljit_uw)code_ptr + 2;
1601 					source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1602 					offset = target - source;
1603 
1604 					/* offset must be halfword aligned */
1605 					SLJIT_ASSERT(!(offset & 1));
1606 					offset >>= 1;
1607 					SLJIT_ASSERT(is_s32(offset)); /* TODO(mundaym): handle arbitrary offsets */
1608 
1609 					/* patch jump target */
1610 					ins |= (sljit_ins)offset & 0xffffffff;
1611 				}
1612 				jump = jump->next;
1613 			}
1614 			if (put_label && put_label->addr == j) {
1615 				source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1616 
1617 				SLJIT_ASSERT(put_label->label);
1618 				put_label->addr = (sljit_uw)code_ptr;
1619 
1620 				/* store target into pool */
1621 				*pool_ptr = put_label->label->addr;
1622 				offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source;
1623 				pool_ptr++;
1624 
1625 				SLJIT_ASSERT(!(offset & 1));
1626 				offset >>= 1;
1627 				SLJIT_ASSERT(is_s32(offset));
1628 				ins |= (sljit_ins)offset & 0xffffffff;
1629 
1630 				put_label = put_label->next;
1631 			}
1632 			encode_inst(&code_ptr, ins);
1633 		}
1634 	}
1635 	SLJIT_ASSERT((sljit_u8 *)code + ins_size == code_ptr);
1636 	SLJIT_ASSERT((sljit_u8 *)pool + pool_size == (sljit_u8 *)pool_ptr);
1637 
1638 	compiler->error = SLJIT_ERR_COMPILED;
1639 	compiler->executable_offset = executable_offset;
1640 	compiler->executable_size = ins_size;
1641 	code = SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
1642 	code_ptr = SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1643 	SLJIT_CACHE_FLUSH(code, code_ptr);
1644 	SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
1645 	return code;
1646 }
1647 
sljit_has_cpu_feature(sljit_s32 feature_type)1648 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
1649 {
1650 	/* TODO(mundaym): implement all */
1651 	switch (feature_type) {
1652 	case SLJIT_HAS_FPU:
1653 	case SLJIT_HAS_CLZ:
1654 	case SLJIT_HAS_ROT:
1655 	case SLJIT_HAS_PREFETCH:
1656 		return 1;
1657 	case SLJIT_HAS_CTZ:
1658 		return 2;
1659 	case SLJIT_HAS_CMOV:
1660 		return have_lscond1() ? 1 : 0;
1661 	}
1662 	return 0;
1663 }
1664 
sljit_cmp_info(sljit_s32 type)1665 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
1666 {
1667 	return (type >= SLJIT_UNORDERED && type <= SLJIT_ORDERED_LESS_EQUAL);
1668 }
1669 
1670 /* --------------------------------------------------------------------- */
1671 /*  Entry, exit                                                          */
1672 /* --------------------------------------------------------------------- */
1673 
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1674 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
1675 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1676 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1677 {
1678 	sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
1679 	sljit_s32 offset, i, tmp;
1680 
1681 	CHECK_ERROR();
1682 	CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1683 	set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1684 
1685 	/* Saved registers are stored in callee allocated save area. */
1686 	SLJIT_ASSERT(gpr(SLJIT_FIRST_SAVED_REG) == r6 && gpr(SLJIT_S0) == r13);
1687 
1688 	offset = 2 * SSIZE_OF(sw);
1689 	if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) {
1690 		if (saved_arg_count == 0) {
1691 			FAIL_IF(push_inst(compiler, stmg(r6, r14, offset, r15)));
1692 			offset += 9 * SSIZE_OF(sw);
1693 		} else {
1694 			FAIL_IF(push_inst(compiler, stmg(r6, r13 - (sljit_gpr)saved_arg_count, offset, r15)));
1695 			offset += (8 - saved_arg_count) * SSIZE_OF(sw);
1696 		}
1697 	} else {
1698 		if (scratches == SLJIT_FIRST_SAVED_REG) {
1699 			FAIL_IF(push_inst(compiler, stg(r6, offset, 0, r15)));
1700 			offset += SSIZE_OF(sw);
1701 		} else if (scratches > SLJIT_FIRST_SAVED_REG) {
1702 			FAIL_IF(push_inst(compiler, stmg(r6, r6 + (sljit_gpr)(scratches - SLJIT_FIRST_SAVED_REG), offset, r15)));
1703 			offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw);
1704 		}
1705 
1706 		if (saved_arg_count == 0) {
1707 			if (saveds == 0) {
1708 				FAIL_IF(push_inst(compiler, stg(r14, offset, 0, r15)));
1709 				offset += SSIZE_OF(sw);
1710 			} else {
1711 				FAIL_IF(push_inst(compiler, stmg(r14 - (sljit_gpr)saveds, r14, offset, r15)));
1712 				offset += (saveds + 1) * SSIZE_OF(sw);
1713 			}
1714 		} else if (saveds > saved_arg_count) {
1715 			if (saveds == saved_arg_count + 1) {
1716 				FAIL_IF(push_inst(compiler, stg(r14 - (sljit_gpr)saveds, offset, 0, r15)));
1717 				offset += SSIZE_OF(sw);
1718 			} else {
1719 				FAIL_IF(push_inst(compiler, stmg(r14 - (sljit_gpr)saveds, r13 - (sljit_gpr)saved_arg_count, offset, r15)));
1720 				offset += (saveds - saved_arg_count) * SSIZE_OF(sw);
1721 			}
1722 		}
1723 	}
1724 
1725 	if (saved_arg_count > 0) {
1726 		FAIL_IF(push_inst(compiler, stg(r14, offset, 0, r15)));
1727 		offset += SSIZE_OF(sw);
1728 	}
1729 
1730 	tmp = SLJIT_FS0 - fsaveds;
1731 	for (i = SLJIT_FS0; i > tmp; i--) {
1732 		FAIL_IF(push_inst(compiler, 0x60000000 /* std */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1733 		offset += SSIZE_OF(sw);
1734 	}
1735 
1736 	for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
1737 		FAIL_IF(push_inst(compiler, 0x60000000 /* std */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1738 		offset += SSIZE_OF(sw);
1739 	}
1740 
1741 	local_size = (local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + 0xf) & ~0xf;
1742 	compiler->local_size = local_size;
1743 
1744 	FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(-local_size)));
1745 
1746 	if (options & SLJIT_ENTER_REG_ARG)
1747 		return SLJIT_SUCCESS;
1748 
1749 	arg_types >>= SLJIT_ARG_SHIFT;
1750 	saved_arg_count = 0;
1751 	tmp = 0;
1752 	while (arg_types > 0) {
1753 		if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
1754 			if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
1755 				FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S0 - saved_arg_count), gpr(SLJIT_R0 + tmp))));
1756 				saved_arg_count++;
1757 			}
1758 			tmp++;
1759 		}
1760 
1761 		arg_types >>= SLJIT_ARG_SHIFT;
1762 	}
1763 
1764 	return SLJIT_SUCCESS;
1765 }
1766 
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1767 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
1768 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1769 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1770 {
1771 	CHECK_ERROR();
1772 	CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1773 	set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1774 
1775 	compiler->local_size = (local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + 0xf) & ~0xf;
1776 	return SLJIT_SUCCESS;
1777 }
1778 
emit_stack_frame_release(struct sljit_compiler * compiler,sljit_gpr last_reg)1779 static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_gpr last_reg)
1780 {
1781 	sljit_s32 offset, i, tmp;
1782 	sljit_s32 local_size = compiler->local_size;
1783 	sljit_s32 saveds = compiler->saveds;
1784 	sljit_s32 scratches = compiler->scratches;
1785 	sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
1786 
1787 	if (is_u12(local_size))
1788 		FAIL_IF(push_inst(compiler, 0x41000000 /* ly */ | R20A(r15) | R12A(r15) | (sljit_ins)local_size));
1789 	else
1790 		FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(local_size)));
1791 
1792 	offset = 2 * SSIZE_OF(sw);
1793 	if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) {
1794 		if (kept_saveds_count == 0) {
1795 			FAIL_IF(push_inst(compiler, lmg(r6, last_reg, offset, r15)));
1796 			offset += 9 * SSIZE_OF(sw);
1797 		} else {
1798 			FAIL_IF(push_inst(compiler, lmg(r6, r13 - (sljit_gpr)kept_saveds_count, offset, r15)));
1799 			offset += (8 - kept_saveds_count) * SSIZE_OF(sw);
1800 		}
1801 	} else {
1802 		if (scratches == SLJIT_FIRST_SAVED_REG) {
1803 			FAIL_IF(push_inst(compiler, lg(r6, offset, 0, r15)));
1804 			offset += SSIZE_OF(sw);
1805 		} else if (scratches > SLJIT_FIRST_SAVED_REG) {
1806 			FAIL_IF(push_inst(compiler, lmg(r6, r6 + (sljit_gpr)(scratches - SLJIT_FIRST_SAVED_REG), offset, r15)));
1807 			offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw);
1808 		}
1809 
1810 		if (kept_saveds_count == 0) {
1811 			if (saveds == 0) {
1812 				if (last_reg == r14)
1813 					FAIL_IF(push_inst(compiler, lg(r14, offset, 0, r15)));
1814 				offset += SSIZE_OF(sw);
1815 			} else if (saveds == 1 && last_reg == r13) {
1816 				FAIL_IF(push_inst(compiler, lg(r13, offset, 0, r15)));
1817 				offset += 2 * SSIZE_OF(sw);
1818 			} else {
1819 				FAIL_IF(push_inst(compiler, lmg(r14 - (sljit_gpr)saveds, last_reg, offset, r15)));
1820 				offset += (saveds + 1) * SSIZE_OF(sw);
1821 			}
1822 		} else if (saveds > kept_saveds_count) {
1823 			if (saveds == kept_saveds_count + 1) {
1824 				FAIL_IF(push_inst(compiler, lg(r14 - (sljit_gpr)saveds, offset, 0, r15)));
1825 				offset += SSIZE_OF(sw);
1826 			} else {
1827 				FAIL_IF(push_inst(compiler, lmg(r14 - (sljit_gpr)saveds, r13 - (sljit_gpr)kept_saveds_count, offset, r15)));
1828 				offset += (saveds - kept_saveds_count) * SSIZE_OF(sw);
1829 			}
1830 		}
1831 	}
1832 
1833 	if (kept_saveds_count > 0) {
1834 		if (last_reg == r14)
1835 			FAIL_IF(push_inst(compiler, lg(r14, offset, 0, r15)));
1836 		offset += SSIZE_OF(sw);
1837 	}
1838 
1839 	tmp = SLJIT_FS0 - compiler->fsaveds;
1840 	for (i = SLJIT_FS0; i > tmp; i--) {
1841 		FAIL_IF(push_inst(compiler, 0x68000000 /* ld */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1842 		offset += SSIZE_OF(sw);
1843 	}
1844 
1845 	for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
1846 		FAIL_IF(push_inst(compiler, 0x68000000 /* ld */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1847 		offset += SSIZE_OF(sw);
1848 	}
1849 
1850 	return SLJIT_SUCCESS;
1851 }
1852 
sljit_emit_return_void(struct sljit_compiler * compiler)1853 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
1854 {
1855 	CHECK_ERROR();
1856 	CHECK(check_sljit_emit_return_void(compiler));
1857 
1858 	FAIL_IF(emit_stack_frame_release(compiler, r14));
1859 	return push_inst(compiler, br(r14)); /* return */
1860 }
1861 
sljit_emit_return_to(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)1862 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
1863 	sljit_s32 src, sljit_sw srcw)
1864 {
1865 	CHECK_ERROR();
1866 	CHECK(check_sljit_emit_return_to(compiler, src, srcw));
1867 
1868 	if (src & SLJIT_MEM) {
1869 		ADJUST_LOCAL_OFFSET(src, srcw);
1870 		FAIL_IF(load_word(compiler, tmp1, src, srcw, 0 /* 64-bit */));
1871 		src = TMP_REG2;
1872 		srcw = 0;
1873 	} else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
1874 		FAIL_IF(push_inst(compiler, lgr(tmp1, gpr(src))));
1875 		src = TMP_REG2;
1876 		srcw = 0;
1877 	}
1878 
1879 	FAIL_IF(emit_stack_frame_release(compiler, r13));
1880 
1881 	SLJIT_SKIP_CHECKS(compiler);
1882 	return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1883 }
1884 
1885 /* --------------------------------------------------------------------- */
1886 /*  Operators                                                            */
1887 /* --------------------------------------------------------------------- */
1888 
sljit_emit_op0(struct sljit_compiler * compiler,sljit_s32 op)1889 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
1890 {
1891 	sljit_gpr arg0 = gpr(SLJIT_R0);
1892 	sljit_gpr arg1 = gpr(SLJIT_R1);
1893 
1894 	CHECK_ERROR();
1895 	CHECK(check_sljit_emit_op0(compiler, op));
1896 
1897 	op = GET_OPCODE(op) | (op & SLJIT_32);
1898 	switch (op) {
1899 	case SLJIT_BREAKPOINT:
1900 		/* The following invalid instruction is emitted by gdb. */
1901 		return push_inst(compiler, 0x0001 /* 2-byte trap */);
1902 	case SLJIT_NOP:
1903 		return push_inst(compiler, 0x0700 /* 2-byte nop */);
1904 	case SLJIT_LMUL_UW:
1905 		FAIL_IF(push_inst(compiler, mlgr(arg0, arg0)));
1906 		break;
1907 	case SLJIT_LMUL_SW:
1908 		/* signed multiplication from: */
1909 		/* Hacker's Delight, Second Edition: Chapter 8-3. */
1910 		FAIL_IF(push_inst(compiler, srag(tmp0, arg0, 63, 0)));
1911 		FAIL_IF(push_inst(compiler, srag(tmp1, arg1, 63, 0)));
1912 		FAIL_IF(push_inst(compiler, ngr(tmp0, arg1)));
1913 		FAIL_IF(push_inst(compiler, ngr(tmp1, arg0)));
1914 
1915 		/* unsigned multiplication */
1916 		FAIL_IF(push_inst(compiler, mlgr(arg0, arg0)));
1917 
1918 		FAIL_IF(push_inst(compiler, sgr(arg0, tmp0)));
1919 		FAIL_IF(push_inst(compiler, sgr(arg0, tmp1)));
1920 		break;
1921 	case SLJIT_DIV_U32:
1922 	case SLJIT_DIVMOD_U32:
1923 		FAIL_IF(push_inst(compiler, lhi(tmp0, 0)));
1924 		FAIL_IF(push_inst(compiler, lr(tmp1, arg0)));
1925 		FAIL_IF(push_inst(compiler, dlr(tmp0, arg1)));
1926 		FAIL_IF(push_inst(compiler, lr(arg0, tmp1))); /* quotient */
1927 		if (op == SLJIT_DIVMOD_U32)
1928 			return push_inst(compiler, lr(arg1, tmp0)); /* remainder */
1929 
1930 		return SLJIT_SUCCESS;
1931 	case SLJIT_DIV_S32:
1932 	case SLJIT_DIVMOD_S32:
1933 		FAIL_IF(push_inst(compiler, lhi(tmp0, 0)));
1934 		FAIL_IF(push_inst(compiler, lr(tmp1, arg0)));
1935 		FAIL_IF(push_inst(compiler, dr(tmp0, arg1)));
1936 		FAIL_IF(push_inst(compiler, lr(arg0, tmp1))); /* quotient */
1937 		if (op == SLJIT_DIVMOD_S32)
1938 			return push_inst(compiler, lr(arg1, tmp0)); /* remainder */
1939 
1940 		return SLJIT_SUCCESS;
1941 	case SLJIT_DIV_UW:
1942 	case SLJIT_DIVMOD_UW:
1943 		FAIL_IF(push_inst(compiler, lghi(tmp0, 0)));
1944 		FAIL_IF(push_inst(compiler, lgr(tmp1, arg0)));
1945 		FAIL_IF(push_inst(compiler, dlgr(tmp0, arg1)));
1946 		FAIL_IF(push_inst(compiler, lgr(arg0, tmp1))); /* quotient */
1947 		if (op == SLJIT_DIVMOD_UW)
1948 			return push_inst(compiler, lgr(arg1, tmp0)); /* remainder */
1949 
1950 		return SLJIT_SUCCESS;
1951 	case SLJIT_DIV_SW:
1952 	case SLJIT_DIVMOD_SW:
1953 		FAIL_IF(push_inst(compiler, lgr(tmp1, arg0)));
1954 		FAIL_IF(push_inst(compiler, dsgr(tmp0, arg1)));
1955 		FAIL_IF(push_inst(compiler, lgr(arg0, tmp1))); /* quotient */
1956 		if (op == SLJIT_DIVMOD_SW)
1957 			return push_inst(compiler, lgr(arg1, tmp0)); /* remainder */
1958 
1959 		return SLJIT_SUCCESS;
1960 	case SLJIT_ENDBR:
1961 		return SLJIT_SUCCESS;
1962 	case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
1963 		return SLJIT_SUCCESS;
1964 	default:
1965 		SLJIT_UNREACHABLE();
1966 	}
1967 	/* swap result registers */
1968 	FAIL_IF(push_inst(compiler, lgr(tmp0, arg0)));
1969 	FAIL_IF(push_inst(compiler, lgr(arg0, arg1)));
1970 	return push_inst(compiler, lgr(arg1, tmp0));
1971 }
1972 
sljit_emit_clz_ctz(struct sljit_compiler * compiler,sljit_s32 op,sljit_gpr dst_r,sljit_gpr src_r)1973 static sljit_s32 sljit_emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 op, sljit_gpr dst_r, sljit_gpr src_r)
1974 {
1975 	sljit_s32 is_ctz = (GET_OPCODE(op) == SLJIT_CTZ);
1976 
1977 	if ((op & SLJIT_32) && src_r != tmp0) {
1978 		FAIL_IF(push_inst(compiler, 0xb9160000 /* llgfr */ | R4A(tmp0) | R0A(src_r)));
1979 		src_r = tmp0;
1980 	}
1981 
1982 	if (is_ctz) {
1983 		FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */) | R4A(tmp1) | R0A(src_r)));
1984 
1985 		if (src_r == tmp0)
1986 			FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? 0x1400 /* nr */ : 0xb9800000 /* ngr */) | R4A(tmp0) | R0A(tmp1)));
1987 		else
1988 			FAIL_IF(push_inst(compiler, 0xb9e40000 /* ngrk */ | R12A(tmp1) | R4A(tmp0) | R0A(src_r)));
1989 
1990 		src_r = tmp0;
1991 	}
1992 
1993 	FAIL_IF(push_inst(compiler, 0xb9830000 /* flogr */ | R4A(tmp0) | R0A(src_r)));
1994 
1995 	if (is_ctz)
1996 		FAIL_IF(push_inst(compiler, 0xec00000000d9 /* aghik */ | R36A(tmp1) | R32A(tmp0) | ((sljit_ins)(-64 & 0xffff) << 16)));
1997 
1998 	if (op & SLJIT_32) {
1999 		if (!is_ctz && dst_r != tmp0)
2000 			return push_inst(compiler, 0xec00000000d9 /* aghik */ | R36A(dst_r) | R32A(tmp0) | ((sljit_ins)(-32 & 0xffff) << 16));
2001 
2002 		FAIL_IF(push_inst(compiler, 0xc20800000000 /* agfi */ | R36A(tmp0) | (sljit_u32)-32));
2003 	}
2004 
2005 	if (is_ctz)
2006 		FAIL_IF(push_inst(compiler, 0xec0000000057 /* rxsbg */ | R36A(tmp0) | R32A(tmp1) | ((sljit_ins)((op & SLJIT_32) ? 59 : 58) << 24) | (63 << 16) | ((sljit_ins)((op & SLJIT_32) ? 5 : 6) << 8)));
2007 
2008 	if (dst_r == tmp0)
2009 		return SLJIT_SUCCESS;
2010 
2011 	return push_inst(compiler, ((op & SLJIT_32) ? 0x1800 /* lr */ : 0xb9040000 /* lgr */) | R4A(dst_r) | R0A(tmp0));
2012 }
2013 
2014 /* LEVAL will be defined later with different parameters as needed */
2015 #define WHEN2(cond, i1, i2) (cond) ? LEVAL(i1) : LEVAL(i2)
2016 
sljit_emit_op1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2017 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
2018         sljit_s32 dst, sljit_sw dstw,
2019         sljit_s32 src, sljit_sw srcw)
2020 {
2021 	sljit_ins ins;
2022 	struct addr mem;
2023 	sljit_gpr dst_r;
2024 	sljit_gpr src_r;
2025 	sljit_s32 opcode = GET_OPCODE(op);
2026 
2027 	CHECK_ERROR();
2028 	CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
2029 	ADJUST_LOCAL_OFFSET(dst, dstw);
2030 	ADJUST_LOCAL_OFFSET(src, srcw);
2031 
2032 	if (opcode >= SLJIT_MOV && opcode <= SLJIT_MOV_P) {
2033 		/* LOAD REGISTER */
2034 		if (FAST_IS_REG(dst) && FAST_IS_REG(src)) {
2035 			dst_r = gpr(dst);
2036 			src_r = gpr(src);
2037 			switch (opcode | (op & SLJIT_32)) {
2038 			/* 32-bit */
2039 			case SLJIT_MOV32_U8:
2040 				ins = llcr(dst_r, src_r);
2041 				break;
2042 			case SLJIT_MOV32_S8:
2043 				ins = lbr(dst_r, src_r);
2044 				break;
2045 			case SLJIT_MOV32_U16:
2046 				ins = llhr(dst_r, src_r);
2047 				break;
2048 			case SLJIT_MOV32_S16:
2049 				ins = lhr(dst_r, src_r);
2050 				break;
2051 			case SLJIT_MOV32:
2052 				if (dst_r == src_r)
2053 					return SLJIT_SUCCESS;
2054 				ins = lr(dst_r, src_r);
2055 				break;
2056 			/* 64-bit */
2057 			case SLJIT_MOV_U8:
2058 				ins = llgcr(dst_r, src_r);
2059 				break;
2060 			case SLJIT_MOV_S8:
2061 				ins = lgbr(dst_r, src_r);
2062 				break;
2063 			case SLJIT_MOV_U16:
2064 				ins = llghr(dst_r, src_r);
2065 				break;
2066 			case SLJIT_MOV_S16:
2067 				ins = lghr(dst_r, src_r);
2068 				break;
2069 			case SLJIT_MOV_U32:
2070 				ins = llgfr(dst_r, src_r);
2071 				break;
2072 			case SLJIT_MOV_S32:
2073 				ins = lgfr(dst_r, src_r);
2074 				break;
2075 			case SLJIT_MOV:
2076 			case SLJIT_MOV_P:
2077 				if (dst_r == src_r)
2078 					return SLJIT_SUCCESS;
2079 				ins = lgr(dst_r, src_r);
2080 				break;
2081 			default:
2082 				ins = 0;
2083 				SLJIT_UNREACHABLE();
2084 				break;
2085 			}
2086 			FAIL_IF(push_inst(compiler, ins));
2087 			return SLJIT_SUCCESS;
2088 		}
2089 		/* LOAD IMMEDIATE */
2090 		if (FAST_IS_REG(dst) && (src & SLJIT_IMM)) {
2091 			switch (opcode) {
2092 			case SLJIT_MOV_U8:
2093 				srcw = (sljit_sw)((sljit_u8)(srcw));
2094 				break;
2095 			case SLJIT_MOV_S8:
2096 				srcw = (sljit_sw)((sljit_s8)(srcw));
2097 				break;
2098 			case SLJIT_MOV_U16:
2099 				srcw = (sljit_sw)((sljit_u16)(srcw));
2100 				break;
2101 			case SLJIT_MOV_S16:
2102 				srcw = (sljit_sw)((sljit_s16)(srcw));
2103 				break;
2104 			case SLJIT_MOV_U32:
2105 				srcw = (sljit_sw)((sljit_u32)(srcw));
2106 				break;
2107 			case SLJIT_MOV_S32:
2108 			case SLJIT_MOV32:
2109 				srcw = (sljit_sw)((sljit_s32)(srcw));
2110 				break;
2111 			}
2112 			return push_load_imm_inst(compiler, gpr(dst), srcw);
2113 		}
2114 		/* LOAD */
2115 		/* TODO(carenas): avoid reg being defined later */
2116 		#define LEVAL(i) EVAL(i, reg, mem)
2117 		if (FAST_IS_REG(dst) && (src & SLJIT_MEM)) {
2118 			sljit_gpr reg = gpr(dst);
2119 
2120 			FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1));
2121 			/* TODO(carenas): convert all calls below to LEVAL */
2122 			switch (opcode | (op & SLJIT_32)) {
2123 			case SLJIT_MOV32_U8:
2124 				ins = llc(reg, mem.offset, mem.index, mem.base);
2125 				break;
2126 			case SLJIT_MOV32_S8:
2127 				ins = lb(reg, mem.offset, mem.index, mem.base);
2128 				break;
2129 			case SLJIT_MOV32_U16:
2130 				ins = llh(reg, mem.offset, mem.index, mem.base);
2131 				break;
2132 			case SLJIT_MOV32_S16:
2133 				ins = WHEN2(is_u12(mem.offset), lh, lhy);
2134 				break;
2135 			case SLJIT_MOV32:
2136 				ins = WHEN2(is_u12(mem.offset), l, ly);
2137 				break;
2138 			case SLJIT_MOV_U8:
2139 				ins = LEVAL(llgc);
2140 				break;
2141 			case SLJIT_MOV_S8:
2142 				ins = lgb(reg, mem.offset, mem.index, mem.base);
2143 				break;
2144 			case SLJIT_MOV_U16:
2145 				ins = LEVAL(llgh);
2146 				break;
2147 			case SLJIT_MOV_S16:
2148 				ins = lgh(reg, mem.offset, mem.index, mem.base);
2149 				break;
2150 			case SLJIT_MOV_U32:
2151 				ins = LEVAL(llgf);
2152 				break;
2153 			case SLJIT_MOV_S32:
2154 				ins = lgf(reg, mem.offset, mem.index, mem.base);
2155 				break;
2156 			case SLJIT_MOV_P:
2157 			case SLJIT_MOV:
2158 				ins = lg(reg, mem.offset, mem.index, mem.base);
2159 				break;
2160 			default:
2161 				ins = 0;
2162 				SLJIT_UNREACHABLE();
2163 				break;
2164 			}
2165 			FAIL_IF(push_inst(compiler, ins));
2166 			return SLJIT_SUCCESS;
2167 		}
2168 		/* STORE and STORE IMMEDIATE */
2169 		if ((dst & SLJIT_MEM)
2170 			&& (FAST_IS_REG(src) || (src & SLJIT_IMM))) {
2171 			sljit_gpr reg = FAST_IS_REG(src) ? gpr(src) : tmp0;
2172 			if (src & SLJIT_IMM) {
2173 				/* TODO(mundaym): MOVE IMMEDIATE? */
2174 				FAIL_IF(push_load_imm_inst(compiler, reg, srcw));
2175 			}
2176 			struct addr mem;
2177 			FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2178 			switch (opcode) {
2179 			case SLJIT_MOV_U8:
2180 			case SLJIT_MOV_S8:
2181 				return push_inst(compiler,
2182 					WHEN2(is_u12(mem.offset), stc, stcy));
2183 			case SLJIT_MOV_U16:
2184 			case SLJIT_MOV_S16:
2185 				return push_inst(compiler,
2186 					WHEN2(is_u12(mem.offset), sth, sthy));
2187 			case SLJIT_MOV_U32:
2188 			case SLJIT_MOV_S32:
2189 			case SLJIT_MOV32:
2190 				return push_inst(compiler,
2191 					WHEN2(is_u12(mem.offset), st, sty));
2192 			case SLJIT_MOV_P:
2193 			case SLJIT_MOV:
2194 				FAIL_IF(push_inst(compiler, LEVAL(stg)));
2195 				return SLJIT_SUCCESS;
2196 			default:
2197 				SLJIT_UNREACHABLE();
2198 			}
2199 		}
2200 		#undef LEVAL
2201 		/* MOVE CHARACTERS */
2202 		if ((dst & SLJIT_MEM) && (src & SLJIT_MEM)) {
2203 			struct addr mem;
2204 			FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1));
2205 			switch (opcode) {
2206 			case SLJIT_MOV_U8:
2207 			case SLJIT_MOV_S8:
2208 				FAIL_IF(push_inst(compiler,
2209 					EVAL(llgc, tmp0, mem)));
2210 				FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2211 				return push_inst(compiler,
2212 					EVAL(stcy, tmp0, mem));
2213 			case SLJIT_MOV_U16:
2214 			case SLJIT_MOV_S16:
2215 				FAIL_IF(push_inst(compiler,
2216 					EVAL(llgh, tmp0, mem)));
2217 				FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2218 				return push_inst(compiler,
2219 					EVAL(sthy, tmp0, mem));
2220 			case SLJIT_MOV_U32:
2221 			case SLJIT_MOV_S32:
2222 			case SLJIT_MOV32:
2223 				FAIL_IF(push_inst(compiler,
2224 					EVAL(ly, tmp0, mem)));
2225 				FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2226 				return push_inst(compiler,
2227 					EVAL(sty, tmp0, mem));
2228 			case SLJIT_MOV_P:
2229 			case SLJIT_MOV:
2230 				FAIL_IF(push_inst(compiler,
2231 					EVAL(lg, tmp0, mem)));
2232 				FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2233 				FAIL_IF(push_inst(compiler,
2234 					EVAL(stg, tmp0, mem)));
2235 				return SLJIT_SUCCESS;
2236 			default:
2237 				SLJIT_UNREACHABLE();
2238 			}
2239 		}
2240 		SLJIT_UNREACHABLE();
2241 	}
2242 
2243 	SLJIT_ASSERT((src & SLJIT_IMM) == 0); /* no immediates */
2244 
2245 	dst_r = FAST_IS_REG(dst) ? gpr(REG_MASK & dst) : tmp0;
2246 	src_r = FAST_IS_REG(src) ? gpr(REG_MASK & src) : tmp0;
2247 
2248 	compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z);
2249 
2250 	/* TODO(mundaym): optimize loads and stores */
2251 	switch (opcode) {
2252 	case SLJIT_NOT:
2253 		if (src & SLJIT_MEM)
2254 			FAIL_IF(load_word(compiler, src_r, src, srcw, op & SLJIT_32));
2255 
2256 		/* emulate ~x with x^-1 */
2257 		if (!(op & SLJIT_32)) {
2258 			FAIL_IF(push_load_imm_inst(compiler, tmp1, -1));
2259 			if (src_r != dst_r)
2260 				FAIL_IF(push_inst(compiler, lgr(dst_r, src_r)));
2261 
2262 			FAIL_IF(push_inst(compiler, xgr(dst_r, tmp1)));
2263 			break;
2264 		}
2265 
2266 		if (have_eimm())
2267 			FAIL_IF(push_inst(compiler, xilf(dst_r, 0xffffffff)));
2268 		else {
2269 			FAIL_IF(push_load_imm_inst(compiler, tmp1, -1));
2270 			if (src_r != dst_r)
2271 				FAIL_IF(push_inst(compiler, lr(dst_r, src_r)));
2272 
2273 			FAIL_IF(push_inst(compiler, xr(dst_r, tmp1)));
2274 		}
2275 		break;
2276 	case SLJIT_CLZ:
2277 	case SLJIT_CTZ:
2278 		if (src & SLJIT_MEM)
2279 			FAIL_IF(load_unsigned_word(compiler, src_r, src, srcw, op & SLJIT_32));
2280 
2281 		FAIL_IF(sljit_emit_clz_ctz(compiler, op, dst_r, src_r));
2282 		break;
2283 	default:
2284 		SLJIT_UNREACHABLE();
2285 	}
2286 
2287 	if ((op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW))
2288 		FAIL_IF(update_zero_overflow(compiler, op, dst_r));
2289 
2290 	if (dst & SLJIT_MEM)
2291 		return store_word(compiler, dst_r, dst, dstw, op & SLJIT_32);
2292 
2293 	return SLJIT_SUCCESS;
2294 }
2295 
is_commutative(sljit_s32 op)2296 static SLJIT_INLINE int is_commutative(sljit_s32 op)
2297 {
2298 	switch (GET_OPCODE(op)) {
2299 	case SLJIT_ADD:
2300 	case SLJIT_ADDC:
2301 	case SLJIT_MUL:
2302 	case SLJIT_AND:
2303 	case SLJIT_OR:
2304 	case SLJIT_XOR:
2305 		return 1;
2306 	}
2307 	return 0;
2308 }
2309 
2310 static const struct ins_forms add_forms = {
2311 	0x1a00, /* ar */
2312 	0xb9080000, /* agr */
2313 	0xb9f80000, /* ark */
2314 	0xb9e80000, /* agrk */
2315 	0x5a000000, /* a */
2316 	0xe3000000005a, /* ay */
2317 	0xe30000000008, /* ag */
2318 };
2319 
2320 static const struct ins_forms logical_add_forms = {
2321 	0x1e00, /* alr */
2322 	0xb90a0000, /* algr */
2323 	0xb9fa0000, /* alrk */
2324 	0xb9ea0000, /* algrk */
2325 	0x5e000000, /* al */
2326 	0xe3000000005e, /* aly */
2327 	0xe3000000000a, /* alg */
2328 };
2329 
sljit_emit_add(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2330 static sljit_s32 sljit_emit_add(struct sljit_compiler *compiler, sljit_s32 op,
2331 	sljit_s32 dst, sljit_sw dstw,
2332 	sljit_s32 src1, sljit_sw src1w,
2333 	sljit_s32 src2, sljit_sw src2w)
2334 {
2335 	int sets_overflow = (op & VARIABLE_FLAG_MASK) == SLJIT_SET_OVERFLOW;
2336 	int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW);
2337 	const struct ins_forms *forms;
2338 	sljit_ins ins;
2339 
2340 	if (src2 & SLJIT_IMM) {
2341 		if (!sets_zero_overflow && is_s8(src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) {
2342 			if (sets_overflow)
2343 				ins = (op & SLJIT_32) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */;
2344 			else
2345 				ins = (op & SLJIT_32) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */;
2346 			return emit_siy(compiler, ins, dst, dstw, src2w);
2347 		}
2348 
2349 		if (is_s16(src2w)) {
2350 			if (sets_overflow)
2351 				ins = (op & SLJIT_32) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */;
2352 			else
2353 				ins = (op & SLJIT_32) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */;
2354 			FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, src2w));
2355 			goto done;
2356 		}
2357 
2358 		if (!sets_overflow) {
2359 			if ((op & SLJIT_32) || is_u32(src2w)) {
2360 				ins = (op & SLJIT_32) ? 0xc20b00000000 /* alfi */ : 0xc20a00000000 /* algfi */;
2361 				FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));
2362 				goto done;
2363 			}
2364 			if (is_u32(-src2w)) {
2365 				FAIL_IF(emit_ri(compiler, 0xc20400000000 /* slgfi */, dst, src1, src1w, -src2w, RIL_A));
2366 				goto done;
2367 			}
2368 		}
2369 		else if ((op & SLJIT_32) || is_s32(src2w)) {
2370 			ins = (op & SLJIT_32) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */;
2371 			FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));
2372 			goto done;
2373 		}
2374 	}
2375 
2376 	forms = sets_overflow ? &add_forms : &logical_add_forms;
2377 	FAIL_IF(emit_commutative(compiler, forms, dst, src1, src1w, src2, src2w));
2378 
2379 done:
2380 	if (sets_zero_overflow)
2381 		FAIL_IF(update_zero_overflow(compiler, op, FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0));
2382 
2383 	if (dst & SLJIT_MEM)
2384 		return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2385 
2386 	return SLJIT_SUCCESS;
2387 }
2388 
2389 static const struct ins_forms sub_forms = {
2390 	0x1b00, /* sr */
2391 	0xb9090000, /* sgr */
2392 	0xb9f90000, /* srk */
2393 	0xb9e90000, /* sgrk */
2394 	0x5b000000, /* s */
2395 	0xe3000000005b, /* sy */
2396 	0xe30000000009, /* sg */
2397 };
2398 
2399 static const struct ins_forms logical_sub_forms = {
2400 	0x1f00, /* slr */
2401 	0xb90b0000, /* slgr */
2402 	0xb9fb0000, /* slrk */
2403 	0xb9eb0000, /* slgrk */
2404 	0x5f000000, /* sl */
2405 	0xe3000000005f, /* sly */
2406 	0xe3000000000b, /* slg */
2407 };
2408 
sljit_emit_sub(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2409 static sljit_s32 sljit_emit_sub(struct sljit_compiler *compiler, sljit_s32 op,
2410 	sljit_s32 dst, sljit_sw dstw,
2411 	sljit_s32 src1, sljit_sw src1w,
2412 	sljit_s32 src2, sljit_sw src2w)
2413 {
2414 	sljit_s32 flag_type = GET_FLAG_TYPE(op);
2415 	int sets_signed = (flag_type >= SLJIT_SIG_LESS && flag_type <= SLJIT_NOT_OVERFLOW);
2416 	int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW);
2417 	const struct ins_forms *forms;
2418 	sljit_ins ins;
2419 
2420 	if (dst == (sljit_s32)tmp0 && flag_type <= SLJIT_SIG_LESS_EQUAL) {
2421 		int compare_signed = flag_type >= SLJIT_SIG_LESS;
2422 
2423 		compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_COMPARE;
2424 
2425 		if (src2 & SLJIT_IMM) {
2426 			if (compare_signed || ((op & VARIABLE_FLAG_MASK) == 0 && is_s32(src2w)))
2427 			{
2428 				if ((op & SLJIT_32) || is_s32(src2w)) {
2429 					ins = (op & SLJIT_32) ? 0xc20d00000000 /* cfi */ : 0xc20c00000000 /* cgfi */;
2430 					return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A);
2431 				}
2432 			}
2433 			else {
2434 				if ((op & SLJIT_32) || is_u32(src2w)) {
2435 					ins = (op & SLJIT_32) ? 0xc20f00000000 /* clfi */ : 0xc20e00000000 /* clgfi */;
2436 					return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A);
2437 				}
2438 				if (is_s16(src2w))
2439 					return emit_rie_d(compiler, 0xec00000000db /* alghsik */, (sljit_s32)tmp0, src1, src1w, src2w);
2440 			}
2441 		}
2442 		else if (src2 & SLJIT_MEM) {
2443 			if ((op & SLJIT_32) && ((src2 & OFFS_REG_MASK) || is_u12(src2w))) {
2444 				ins = compare_signed ? 0x59000000 /* c */ : 0x55000000 /* cl */;
2445 				return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RX_A);
2446 			}
2447 
2448 			if (compare_signed)
2449 				ins = (op & SLJIT_32) ? 0xe30000000059 /* cy */ : 0xe30000000020 /* cg */;
2450 			else
2451 				ins = (op & SLJIT_32) ? 0xe30000000055 /* cly */ : 0xe30000000021 /* clg */;
2452 			return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RXY_A);
2453 		}
2454 
2455 		if (compare_signed)
2456 			ins = (op & SLJIT_32) ? 0x1900 /* cr */ : 0xb9200000 /* cgr */;
2457 		else
2458 			ins = (op & SLJIT_32) ? 0x1500 /* clr */ : 0xb9210000 /* clgr */;
2459 		return emit_rr(compiler, ins, src1, src1, src1w, src2, src2w);
2460 	}
2461 
2462 	if (src1 == SLJIT_IMM && src1w == 0 && (flag_type == 0 || sets_signed)) {
2463 		ins = (op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */;
2464 		FAIL_IF(emit_rr1(compiler, ins, dst, src2, src2w));
2465 		goto done;
2466 	}
2467 
2468 	if (src2 & SLJIT_IMM) {
2469 		sljit_sw neg_src2w = -src2w;
2470 
2471 		if (sets_signed || neg_src2w != 0 || (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == 0) {
2472 			if (!sets_zero_overflow && is_s8(neg_src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) {
2473 				if (sets_signed)
2474 					ins = (op & SLJIT_32) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */;
2475 				else
2476 					ins = (op & SLJIT_32) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */;
2477 				return emit_siy(compiler, ins, dst, dstw, neg_src2w);
2478 			}
2479 
2480 			if (is_s16(neg_src2w)) {
2481 				if (sets_signed)
2482 					ins = (op & SLJIT_32) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */;
2483 				else
2484 					ins = (op & SLJIT_32) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */;
2485 				FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, neg_src2w));
2486 				goto done;
2487 			}
2488 		}
2489 
2490 		if (!sets_signed) {
2491 			if ((op & SLJIT_32) || is_u32(src2w)) {
2492 				ins = (op & SLJIT_32) ? 0xc20500000000 /* slfi */ : 0xc20400000000 /* slgfi */;
2493 				FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));
2494 				goto done;
2495 			}
2496 			if (is_u32(neg_src2w)) {
2497 				FAIL_IF(emit_ri(compiler, 0xc20a00000000 /* algfi */, dst, src1, src1w, neg_src2w, RIL_A));
2498 				goto done;
2499 			}
2500 		}
2501 		else if ((op & SLJIT_32) || is_s32(neg_src2w)) {
2502 			ins = (op & SLJIT_32) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */;
2503 			FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, neg_src2w, RIL_A));
2504 			goto done;
2505 		}
2506 	}
2507 
2508 	forms = sets_signed ? &sub_forms : &logical_sub_forms;
2509 	FAIL_IF(emit_non_commutative(compiler, forms, dst, src1, src1w, src2, src2w));
2510 
2511 done:
2512 	if (sets_signed) {
2513 		sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2514 
2515 		if ((op & VARIABLE_FLAG_MASK) != SLJIT_SET_OVERFLOW) {
2516 			/* In case of overflow, the sign bit of the two source operands must be different, and
2517 			     - the first operand is greater if the sign bit of the result is set
2518 			     - the first operand is less if the sign bit of the result is not set
2519 			   The -result operation sets the corrent sign, because the result cannot be zero.
2520 			   The overflow is considered greater, since the result must be equal to INT_MIN so its sign bit is set. */
2521 			FAIL_IF(push_inst(compiler, brc(0xe, 2 + 2)));
2522 			FAIL_IF(push_inst(compiler, (op & SLJIT_32) ? lcr(tmp1, dst_r) : lcgr(tmp1, dst_r)));
2523 		}
2524 		else if (op & SLJIT_SET_Z)
2525 			FAIL_IF(update_zero_overflow(compiler, op, dst_r));
2526 	}
2527 
2528 	if (dst & SLJIT_MEM)
2529 		return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2530 
2531 	return SLJIT_SUCCESS;
2532 }
2533 
2534 static const struct ins_forms multiply_forms = {
2535 	0xb2520000, /* msr */
2536 	0xb90c0000, /* msgr */
2537 	0xb9fd0000, /* msrkc */
2538 	0xb9ed0000, /* msgrkc */
2539 	0x71000000, /* ms */
2540 	0xe30000000051, /* msy */
2541 	0xe3000000000c, /* msg */
2542 };
2543 
2544 static const struct ins_forms multiply_overflow_forms = {
2545 	0,
2546 	0,
2547 	0xb9fd0000, /* msrkc */
2548 	0xb9ed0000, /* msgrkc */
2549 	0,
2550 	0xe30000000053, /* msc */
2551 	0xe30000000083, /* msgc */
2552 };
2553 
sljit_emit_multiply(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2554 static sljit_s32 sljit_emit_multiply(struct sljit_compiler *compiler, sljit_s32 op,
2555 	sljit_s32 dst,
2556 	sljit_s32 src1, sljit_sw src1w,
2557 	sljit_s32 src2, sljit_sw src2w)
2558 {
2559 	sljit_ins ins;
2560 
2561 	if (HAS_FLAGS(op)) {
2562 		/* if have_misc2 fails, this operation should be emulated. 32 bit emulation:
2563 		FAIL_IF(push_inst(compiler, lgfr(tmp0, src1_r)));
2564 		FAIL_IF(push_inst(compiler, msgfr(tmp0, src2_r)));
2565 		if (dst_r != tmp0) {
2566 			FAIL_IF(push_inst(compiler, lr(dst_r, tmp0)));
2567 		}
2568 		FAIL_IF(push_inst(compiler, aih(tmp0, 1)));
2569 		FAIL_IF(push_inst(compiler, nihf(tmp0, ~1U)));
2570 		FAIL_IF(push_inst(compiler, ipm(tmp1)));
2571 		FAIL_IF(push_inst(compiler, oilh(tmp1, 0x2000))); */
2572 
2573 		return emit_commutative(compiler, &multiply_overflow_forms, dst, src1, src1w, src2, src2w);
2574 	}
2575 
2576 	if (src2 & SLJIT_IMM) {
2577 		if (is_s16(src2w)) {
2578 			ins = (op & SLJIT_32) ? 0xa70c0000 /* mhi */ : 0xa70d0000 /* mghi */;
2579 			return emit_ri(compiler, ins, dst, src1, src1w, src2w, RI_A);
2580 		}
2581 
2582 		if (is_s32(src2w)) {
2583 			ins = (op & SLJIT_32) ? 0xc20100000000 /* msfi */ : 0xc20000000000 /* msgfi */;
2584 			return emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A);
2585 		}
2586 	}
2587 
2588 	return emit_commutative(compiler, &multiply_forms, dst, src1, src1w, src2, src2w);
2589 }
2590 
sljit_emit_bitwise_imm(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_uw imm,sljit_s32 count16)2591 static sljit_s32 sljit_emit_bitwise_imm(struct sljit_compiler *compiler, sljit_s32 type,
2592 	sljit_s32 dst,
2593 	sljit_s32 src1, sljit_sw src1w,
2594 	sljit_uw imm, sljit_s32 count16)
2595 {
2596 	sljit_s32 mode = compiler->mode;
2597 	sljit_gpr dst_r = tmp0;
2598 	sljit_s32 needs_move = 1;
2599 
2600 	if (IS_GPR_REG(dst)) {
2601 		dst_r = gpr(dst & REG_MASK);
2602 		if (dst == src1)
2603 			needs_move = 0;
2604 	}
2605 
2606 	if (needs_move)
2607 		FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
2608 
2609 	if (type == SLJIT_AND) {
2610 		if (!(mode & SLJIT_32))
2611 			FAIL_IF(push_inst(compiler, 0xc00a00000000 /* nihf */ | R36A(dst_r) | (imm >> 32)));
2612 		return push_inst(compiler, 0xc00b00000000 /* nilf */ | R36A(dst_r) | (imm & 0xffffffff));
2613 	}
2614 	else if (type == SLJIT_OR) {
2615 		if (count16 >= 3) {
2616 			FAIL_IF(push_inst(compiler, 0xc00c00000000 /* oihf */ | R36A(dst_r) | (imm >> 32)));
2617 			return push_inst(compiler, 0xc00d00000000 /* oilf */ | R36A(dst_r) | (imm & 0xffffffff));
2618 		}
2619 
2620 		if (count16 >= 2) {
2621 			if ((imm & 0x00000000ffffffffull) == 0)
2622 				return push_inst(compiler, 0xc00c00000000 /* oihf */ | R36A(dst_r) | (imm >> 32));
2623 			if ((imm & 0xffffffff00000000ull) == 0)
2624 				return push_inst(compiler, 0xc00d00000000 /* oilf */ | R36A(dst_r) | (imm & 0xffffffff));
2625 		}
2626 
2627 		if ((imm & 0xffff000000000000ull) != 0)
2628 			FAIL_IF(push_inst(compiler, 0xa5080000 /* oihh */ | R20A(dst_r) | (imm >> 48)));
2629 		if ((imm & 0x0000ffff00000000ull) != 0)
2630 			FAIL_IF(push_inst(compiler, 0xa5090000 /* oihl */ | R20A(dst_r) | ((imm >> 32) & 0xffff)));
2631 		if ((imm & 0x00000000ffff0000ull) != 0)
2632 			FAIL_IF(push_inst(compiler, 0xa50a0000 /* oilh */ | R20A(dst_r) | ((imm >> 16) & 0xffff)));
2633 		if ((imm & 0x000000000000ffffull) != 0 || imm == 0)
2634 			return push_inst(compiler, 0xa50b0000 /* oill */ | R20A(dst_r) | (imm & 0xffff));
2635 		return SLJIT_SUCCESS;
2636 	}
2637 
2638 	if ((imm & 0xffffffff00000000ull) != 0)
2639 		FAIL_IF(push_inst(compiler, 0xc00600000000 /* xihf */ | R36A(dst_r) | (imm >> 32)));
2640 	if ((imm & 0x00000000ffffffffull) != 0 || imm == 0)
2641 		return push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(dst_r) | (imm & 0xffffffff));
2642 	return SLJIT_SUCCESS;
2643 }
2644 
2645 static const struct ins_forms bitwise_and_forms = {
2646 	0x1400, /* nr */
2647 	0xb9800000, /* ngr */
2648 	0xb9f40000, /* nrk */
2649 	0xb9e40000, /* ngrk */
2650 	0x54000000, /* n */
2651 	0xe30000000054, /* ny */
2652 	0xe30000000080, /* ng */
2653 };
2654 
2655 static const struct ins_forms bitwise_or_forms = {
2656 	0x1600, /* or */
2657 	0xb9810000, /* ogr */
2658 	0xb9f60000, /* ork */
2659 	0xb9e60000, /* ogrk */
2660 	0x56000000, /* o */
2661 	0xe30000000056, /* oy */
2662 	0xe30000000081, /* og */
2663 };
2664 
2665 static const struct ins_forms bitwise_xor_forms = {
2666 	0x1700, /* xr */
2667 	0xb9820000, /* xgr */
2668 	0xb9f70000, /* xrk */
2669 	0xb9e70000, /* xgrk */
2670 	0x57000000, /* x */
2671 	0xe30000000057, /* xy */
2672 	0xe30000000082, /* xg */
2673 };
2674 
sljit_emit_bitwise(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2675 static sljit_s32 sljit_emit_bitwise(struct sljit_compiler *compiler, sljit_s32 op,
2676 	sljit_s32 dst,
2677 	sljit_s32 src1, sljit_sw src1w,
2678 	sljit_s32 src2, sljit_sw src2w)
2679 {
2680 	sljit_s32 type = GET_OPCODE(op);
2681 	const struct ins_forms *forms;
2682 
2683 	if ((src2 & SLJIT_IMM) && (!(op & SLJIT_SET_Z) || (type == SLJIT_AND && dst == (sljit_s32)tmp0))) {
2684 		sljit_s32 count16 = 0;
2685 		sljit_uw imm = (sljit_uw)src2w;
2686 
2687 		if (op & SLJIT_32)
2688 			imm &= 0xffffffffull;
2689 
2690 		if ((imm & 0x000000000000ffffull) != 0 || imm == 0)
2691 			count16++;
2692 		if ((imm & 0x00000000ffff0000ull) != 0)
2693 			count16++;
2694 		if ((imm & 0x0000ffff00000000ull) != 0)
2695 			count16++;
2696 		if ((imm & 0xffff000000000000ull) != 0)
2697 			count16++;
2698 
2699 		if (type == SLJIT_AND && dst == (sljit_s32)tmp0 && count16 == 1) {
2700 			sljit_gpr src_r = tmp0;
2701 
2702 			if (FAST_IS_REG(src1))
2703 				src_r = gpr(src1 & REG_MASK);
2704 			else
2705 				FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
2706 
2707 			if ((imm & 0x000000000000ffffull) != 0 || imm == 0)
2708 				return push_inst(compiler, 0xa7010000 | R20A(src_r) | imm);
2709 			if ((imm & 0x00000000ffff0000ull) != 0)
2710 				return push_inst(compiler, 0xa7000000 | R20A(src_r) | (imm >> 16));
2711 			if ((imm & 0x0000ffff00000000ull) != 0)
2712 				return push_inst(compiler, 0xa7030000 | R20A(src_r) | (imm >> 32));
2713 			return push_inst(compiler, 0xa7020000 | R20A(src_r) | (imm >> 48));
2714 		}
2715 
2716 		if (!(op & SLJIT_SET_Z))
2717 			return sljit_emit_bitwise_imm(compiler, type, dst, src1, src1w, imm, count16);
2718 	}
2719 
2720 	if (type == SLJIT_AND)
2721 		forms = &bitwise_and_forms;
2722 	else if (type == SLJIT_OR)
2723 		forms = &bitwise_or_forms;
2724 	else
2725 		forms = &bitwise_xor_forms;
2726 
2727 	return emit_commutative(compiler, forms, dst, src1, src1w, src2, src2w);
2728 }
2729 
sljit_emit_shift(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2730 static sljit_s32 sljit_emit_shift(struct sljit_compiler *compiler, sljit_s32 op,
2731 	sljit_s32 dst,
2732 	sljit_s32 src1, sljit_sw src1w,
2733 	sljit_s32 src2, sljit_sw src2w)
2734 {
2735 	sljit_s32 type = GET_OPCODE(op);
2736 	sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2737 	sljit_gpr src_r = tmp0;
2738 	sljit_gpr base_r = tmp0;
2739 	sljit_ins imm = 0;
2740 	sljit_ins ins;
2741 
2742 	if (FAST_IS_REG(src1))
2743 		src_r = gpr(src1);
2744 	else
2745 		FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
2746 
2747 	if (!(src2 & SLJIT_IMM)) {
2748 		if (FAST_IS_REG(src2))
2749 			base_r = gpr(src2);
2750 		else {
2751 			FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
2752 			base_r = tmp1;
2753 		}
2754 
2755 		if ((op & SLJIT_32) && (type == SLJIT_MSHL || type == SLJIT_MLSHR || type == SLJIT_MASHR)) {
2756 			if (base_r != tmp1) {
2757 				FAIL_IF(push_inst(compiler, 0xec0000000055 /* risbg */ | R36A(tmp1) | R32A(base_r) | (59 << 24) | (1 << 23) | (63 << 16)));
2758 				base_r = tmp1;
2759 			} else
2760 				FAIL_IF(push_inst(compiler, 0xa5070000 /* nill */ | R20A(tmp1) | 0x1f));
2761 		}
2762 	} else
2763 		imm = (sljit_ins)(src2w & ((op & SLJIT_32) ? 0x1f : 0x3f));
2764 
2765 	if ((op & SLJIT_32) && dst_r == src_r) {
2766 		if (type == SLJIT_SHL || type == SLJIT_MSHL)
2767 			ins = 0x89000000 /* sll */;
2768 		else if (type == SLJIT_LSHR || type == SLJIT_MLSHR)
2769 			ins = 0x88000000 /* srl */;
2770 		else
2771 			ins = 0x8a000000 /* sra */;
2772 
2773 		FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | R12A(base_r) | imm));
2774 	} else {
2775 		if (type == SLJIT_SHL || type == SLJIT_MSHL)
2776 			ins = (op & SLJIT_32) ? 0xeb00000000df /* sllk */ : 0xeb000000000d /* sllg */;
2777 		else if (type == SLJIT_LSHR || type == SLJIT_MLSHR)
2778 			ins = (op & SLJIT_32) ? 0xeb00000000de /* srlk */ : 0xeb000000000c /* srlg */;
2779 		else
2780 			ins = (op & SLJIT_32) ? 0xeb00000000dc /* srak */ : 0xeb000000000a /* srag */;
2781 
2782 		FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | R28A(base_r) | (imm << 16)));
2783 	}
2784 
2785 	if ((op & SLJIT_SET_Z) && type != SLJIT_ASHR)
2786 		return push_inst(compiler, (op & SLJIT_32) ? or(dst_r, dst_r) : ogr(dst_r, dst_r));
2787 
2788 	return SLJIT_SUCCESS;
2789 }
2790 
sljit_emit_rotate(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2791 static sljit_s32 sljit_emit_rotate(struct sljit_compiler *compiler, sljit_s32 op,
2792 	sljit_s32 dst,
2793 	sljit_s32 src1, sljit_sw src1w,
2794 	sljit_s32 src2, sljit_sw src2w)
2795 {
2796 	sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2797 	sljit_gpr src_r = tmp0;
2798 	sljit_gpr base_r = tmp0;
2799 	sljit_ins imm = 0;
2800 	sljit_ins ins;
2801 
2802 	if (FAST_IS_REG(src1))
2803 		src_r = gpr(src1);
2804 	else
2805 		FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
2806 
2807 	if (!(src2 & SLJIT_IMM)) {
2808 		if (FAST_IS_REG(src2))
2809 			base_r = gpr(src2);
2810 		else {
2811 			FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
2812 			base_r = tmp1;
2813 		}
2814 	}
2815 
2816 	if (GET_OPCODE(op) == SLJIT_ROTR) {
2817 		if (!(src2 & SLJIT_IMM)) {
2818 			ins = (op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */;
2819 			FAIL_IF(push_inst(compiler, ins | R4A(tmp1) | R0A(base_r)));
2820 			base_r = tmp1;
2821 		} else
2822 			src2w = -src2w;
2823 	}
2824 
2825 	if (src2 & SLJIT_IMM)
2826 		imm = (sljit_ins)(src2w & ((op & SLJIT_32) ? 0x1f : 0x3f));
2827 
2828 	ins = (op & SLJIT_32) ? 0xeb000000001d /* rll */ : 0xeb000000001c /* rllg */;
2829 	return push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | R28A(base_r) | (imm << 16));
2830 }
2831 
2832 static const struct ins_forms addc_forms = {
2833 	0xb9980000, /* alcr */
2834 	0xb9880000, /* alcgr */
2835 	0,
2836 	0,
2837 	0,
2838 	0xe30000000098, /* alc */
2839 	0xe30000000088, /* alcg */
2840 };
2841 
2842 static const struct ins_forms subc_forms = {
2843 	0xb9990000, /* slbr */
2844 	0xb9890000, /* slbgr */
2845 	0,
2846 	0,
2847 	0,
2848 	0xe30000000099, /* slb */
2849 	0xe30000000089, /* slbg */
2850 };
2851 
sljit_emit_op2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2852 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2853 	sljit_s32 dst, sljit_sw dstw,
2854 	sljit_s32 src1, sljit_sw src1w,
2855 	sljit_s32 src2, sljit_sw src2w)
2856 {
2857 	CHECK_ERROR();
2858 	CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
2859 	ADJUST_LOCAL_OFFSET(dst, dstw);
2860 	ADJUST_LOCAL_OFFSET(src1, src1w);
2861 	ADJUST_LOCAL_OFFSET(src2, src2w);
2862 
2863 	compiler->mode = op & SLJIT_32;
2864 	compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z);
2865 
2866 	if (is_commutative(op) && (src1 & SLJIT_IMM) && !(src2 & SLJIT_IMM)) {
2867 		src1 ^= src2;
2868 		src2 ^= src1;
2869 		src1 ^= src2;
2870 
2871 		src1w ^= src2w;
2872 		src2w ^= src1w;
2873 		src1w ^= src2w;
2874 	}
2875 
2876 	switch (GET_OPCODE(op)) {
2877 	case SLJIT_ADD:
2878 		compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD;
2879 		return sljit_emit_add(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2880 	case SLJIT_ADDC:
2881 		compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD;
2882 		FAIL_IF(emit_commutative(compiler, &addc_forms, dst, src1, src1w, src2, src2w));
2883 		if (dst & SLJIT_MEM)
2884 			return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2885 		return SLJIT_SUCCESS;
2886 	case SLJIT_SUB:
2887 		compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_SUB;
2888 		return sljit_emit_sub(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2889 	case SLJIT_SUBC:
2890 		compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_SUB;
2891 		FAIL_IF(emit_non_commutative(compiler, &subc_forms, dst, src1, src1w, src2, src2w));
2892 		if (dst & SLJIT_MEM)
2893 			return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2894 		return SLJIT_SUCCESS;
2895 	case SLJIT_MUL:
2896 		FAIL_IF(sljit_emit_multiply(compiler, op, dst, src1, src1w, src2, src2w));
2897 		break;
2898 	case SLJIT_AND:
2899 	case SLJIT_OR:
2900 	case SLJIT_XOR:
2901 		FAIL_IF(sljit_emit_bitwise(compiler, op, dst, src1, src1w, src2, src2w));
2902 		break;
2903 	case SLJIT_SHL:
2904 	case SLJIT_MSHL:
2905 	case SLJIT_LSHR:
2906 	case SLJIT_MLSHR:
2907 	case SLJIT_ASHR:
2908 	case SLJIT_MASHR:
2909 		FAIL_IF(sljit_emit_shift(compiler, op, dst, src1, src1w, src2, src2w));
2910 		break;
2911 	case SLJIT_ROTL:
2912 	case SLJIT_ROTR:
2913 		FAIL_IF(sljit_emit_rotate(compiler, op, dst, src1, src1w, src2, src2w));
2914 		break;
2915 	}
2916 
2917 	if (dst & SLJIT_MEM)
2918 		return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2919 	return SLJIT_SUCCESS;
2920 }
2921 
sljit_emit_op2u(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2922 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
2923 	sljit_s32 src1, sljit_sw src1w,
2924 	sljit_s32 src2, sljit_sw src2w)
2925 {
2926 	CHECK_ERROR();
2927 	CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
2928 
2929 	SLJIT_SKIP_CHECKS(compiler);
2930 	return sljit_emit_op2(compiler, op, (sljit_s32)tmp0, 0, src1, src1w, src2, src2w);
2931 }
2932 
sljit_emit_shift_into(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src_dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2933 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
2934 	sljit_s32 src_dst,
2935 	sljit_s32 src1, sljit_sw src1w,
2936 	sljit_s32 src2, sljit_sw src2w)
2937 {
2938 	sljit_s32 is_right;
2939 	sljit_sw bit_length = (op & SLJIT_32) ? 32 : 64;
2940 	sljit_gpr src_dst_r = gpr(src_dst);
2941 	sljit_gpr src1_r = tmp0;
2942 	sljit_gpr src2_r = tmp1;
2943 	sljit_ins ins;
2944 
2945 	CHECK_ERROR();
2946 	CHECK(check_sljit_emit_shift_into(compiler, op, src_dst, src1, src1w, src2, src2w));
2947 
2948 	is_right = (GET_OPCODE(op) == SLJIT_LSHR || GET_OPCODE(op) == SLJIT_MLSHR);
2949 
2950 	if (src_dst == src1) {
2951 		SLJIT_SKIP_CHECKS(compiler);
2952 		return sljit_emit_op2(compiler, (is_right ? SLJIT_ROTR : SLJIT_ROTL) | (op & SLJIT_32), src_dst, 0, src_dst, 0, src2, src2w);
2953 	}
2954 
2955 	ADJUST_LOCAL_OFFSET(src1, src1w);
2956 	ADJUST_LOCAL_OFFSET(src2, src2w);
2957 
2958 	if (src1 & SLJIT_MEM)
2959 		FAIL_IF(load_word(compiler, tmp0, src1, src1w, op & SLJIT_32));
2960 	else if (src1 & SLJIT_IMM)
2961 		FAIL_IF(push_load_imm_inst(compiler, tmp0, src1w));
2962 	else
2963 		src1_r = gpr(src1);
2964 
2965 	if (src2 & SLJIT_IMM) {
2966 		src2w &= bit_length - 1;
2967 
2968 		if (src2w == 0)
2969 			return SLJIT_SUCCESS;
2970 	} else if (!(src2 & SLJIT_MEM))
2971 		src2_r = gpr(src2);
2972 	else
2973 		FAIL_IF(load_word(compiler, tmp1, src2, src2w, op & SLJIT_32));
2974 
2975 	if (src2 & SLJIT_IMM) {
2976 		if (op & SLJIT_32) {
2977 			ins = is_right ? 0x88000000 /* srl */ : 0x89000000 /* sll */;
2978 			FAIL_IF(push_inst(compiler, ins | R20A(src_dst_r) | (sljit_ins)src2w));
2979 		} else {
2980 			ins = is_right ? 0xeb000000000c /* srlg */ : 0xeb000000000d /* sllg */;
2981 			FAIL_IF(push_inst(compiler, ins | R36A(src_dst_r) | R32A(src_dst_r) | ((sljit_ins)src2w << 16)));
2982 		}
2983 
2984 		ins = 0xec0000000055 /* risbg */;
2985 
2986 		if (is_right) {
2987 			src2w = bit_length - src2w;
2988 			ins |= ((sljit_ins)(64 - bit_length) << 24) | ((sljit_ins)(63 - src2w) << 16) | ((sljit_ins)src2w << 8);
2989 		} else
2990 			ins |= ((sljit_ins)(64 - src2w) << 24) | ((sljit_ins)63 << 16) | ((sljit_ins)src2w << 8);
2991 
2992 		return push_inst(compiler, ins | R36A(src_dst_r) | R32A(src1_r));
2993 	}
2994 
2995 	if (op & SLJIT_32) {
2996 		if (GET_OPCODE(op) == SLJIT_MSHL || GET_OPCODE(op) == SLJIT_MLSHR) {
2997 			if (src2_r != tmp1) {
2998 				FAIL_IF(push_inst(compiler, 0xec0000000055 /* risbg */ | R36A(tmp1) | R32A(src2_r) | (59 << 24) | (1 << 23) | (63 << 16)));
2999 				src2_r = tmp1;
3000 			} else
3001 				FAIL_IF(push_inst(compiler, 0xa5070000 /* nill */ | R20A(tmp1) | 0x1f));
3002 		}
3003 
3004 		ins = is_right ? 0x88000000 /* srl */ : 0x89000000 /* sll */;
3005 		FAIL_IF(push_inst(compiler, ins | R20A(src_dst_r) | R12A(src2_r)));
3006 
3007 		if (src2_r != tmp1) {
3008 			FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | 0x1f));
3009 			FAIL_IF(push_inst(compiler, 0x1700 /* xr */ | R4A(tmp1) | R0A(src2_r)));
3010 		} else
3011 			FAIL_IF(push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(tmp1) | 0x1f));
3012 
3013 		if (src1_r == tmp0) {
3014 			ins = is_right ? 0x89000000 /* sll */ : 0x88000000 /* srl */;
3015 			FAIL_IF(push_inst(compiler, ins | R20A(tmp0) | R12A(tmp1) | 0x1));
3016 		} else {
3017 			ins = is_right ? 0xeb00000000df /* sllk */ : 0xeb00000000de /* srlk */;
3018 			FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src1_r) | R28A(tmp1) | (0x1 << 16)));
3019 		}
3020 
3021 		return push_inst(compiler, 0x1600 /* or */ | R4A(src_dst_r) | R0A(tmp0));
3022 	}
3023 
3024 	ins = is_right ? 0xeb000000000c /* srlg */ : 0xeb000000000d /* sllg */;
3025 	FAIL_IF(push_inst(compiler, ins | R36A(src_dst_r) | R32A(src_dst_r) | R28A(src2_r)));
3026 
3027 	ins = is_right ? 0xeb000000000d /* sllg */ : 0xeb000000000c /* srlg */;
3028 
3029 	if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) {
3030 		if (src2_r != tmp1)
3031 			FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | 0x3f));
3032 
3033 		FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src1_r) | (0x1 << 16)));
3034 		src1_r = tmp0;
3035 
3036 		if (src2_r != tmp1)
3037 			FAIL_IF(push_inst(compiler, 0xb9820000 /* xgr */ | R4A(tmp1) | R0A(src2_r)));
3038 		else
3039 			FAIL_IF(push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(tmp1) | 0x3f));
3040 	} else
3041 		FAIL_IF(push_inst(compiler, 0xb9030000 /* lcgr */ | R4A(tmp1) | R0A(src2_r)));
3042 
3043 	FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src1_r) | R28A(tmp1)));
3044 	return push_inst(compiler, 0xb9810000 /* ogr */ | R4A(src_dst_r) | R0A(tmp0));
3045 }
3046 
sljit_emit_op_src(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)3047 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(
3048 	struct sljit_compiler *compiler,
3049 	sljit_s32 op, sljit_s32 src, sljit_sw srcw)
3050 {
3051 	sljit_gpr src_r;
3052 	struct addr addr;
3053 
3054 	CHECK_ERROR();
3055 	CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
3056 	ADJUST_LOCAL_OFFSET(src, srcw);
3057 
3058 	switch (op) {
3059 	case SLJIT_FAST_RETURN:
3060 		src_r = FAST_IS_REG(src) ? gpr(src) : tmp1;
3061 		if (src & SLJIT_MEM)
3062 			FAIL_IF(load_word(compiler, tmp1, src, srcw, 0));
3063 
3064 		return push_inst(compiler, br(src_r));
3065 	case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
3066 		return SLJIT_SUCCESS;
3067 	case SLJIT_PREFETCH_L1:
3068 	case SLJIT_PREFETCH_L2:
3069 	case SLJIT_PREFETCH_L3:
3070 	case SLJIT_PREFETCH_ONCE:
3071 		FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
3072 		return push_inst(compiler, 0xe31000000036 /* pfd */ | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
3073 	default:
3074 		return SLJIT_SUCCESS;
3075 	}
3076 
3077 	return SLJIT_SUCCESS;
3078 }
3079 
sljit_get_register_index(sljit_s32 reg)3080 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
3081 {
3082 	CHECK_REG_INDEX(check_sljit_get_register_index(reg));
3083 	return (sljit_s32)gpr(reg);
3084 }
3085 
sljit_get_float_register_index(sljit_s32 reg)3086 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
3087 {
3088 	CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
3089 	return (sljit_s32)fgpr(reg);
3090 }
3091 
sljit_emit_op_custom(struct sljit_compiler * compiler,void * instruction,sljit_u32 size)3092 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
3093 	void *instruction, sljit_u32 size)
3094 {
3095 	sljit_ins ins = 0;
3096 
3097 	CHECK_ERROR();
3098 	CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
3099 
3100 	memcpy((sljit_u8 *)&ins + sizeof(ins) - size, instruction, size);
3101 	return push_inst(compiler, ins);
3102 }
3103 
3104 /* --------------------------------------------------------------------- */
3105 /*  Floating point operators                                             */
3106 /* --------------------------------------------------------------------- */
3107 
3108 #define FLOAT_LOAD 0
3109 #define FLOAT_STORE 1
3110 
float_mem(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)3111 static sljit_s32 float_mem(struct sljit_compiler *compiler, sljit_s32 op,
3112 	sljit_s32 reg,
3113 	sljit_s32 mem, sljit_sw memw)
3114 {
3115 	struct addr addr;
3116 	sljit_ins ins;
3117 
3118 	SLJIT_ASSERT(mem & SLJIT_MEM);
3119 
3120 	if ((mem & OFFS_REG_MASK) || is_u12(memw) || !is_s20(memw)) {
3121 		FAIL_IF(make_addr_bx(compiler, &addr, mem, memw, tmp1));
3122 
3123 		if (op & FLOAT_STORE)
3124 			ins = (op & SLJIT_32) ? 0x70000000 /* ste */ : 0x60000000 /* std */;
3125 		else
3126 			ins = (op & SLJIT_32) ? 0x78000000 /* le */ : 0x68000000 /* ld */;
3127 
3128 		return push_inst(compiler, ins | F20(reg) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset);
3129 	}
3130 
3131 	FAIL_IF(make_addr_bxy(compiler, &addr, mem, memw, tmp1));
3132 
3133 	if (op & FLOAT_STORE)
3134 		ins = (op & SLJIT_32) ? 0xed0000000066 /* stey */ : 0xed0000000067 /* stdy */;
3135 	else
3136 		ins = (op & SLJIT_32) ? 0xed0000000064 /* ley */ : 0xed0000000065 /* ldy */;
3137 
3138 	return push_inst(compiler, ins | F36(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
3139 }
3140 
emit_float(struct sljit_compiler * compiler,sljit_ins ins_r,sljit_ins ins,sljit_s32 reg,sljit_s32 src,sljit_sw srcw)3141 static sljit_s32 emit_float(struct sljit_compiler *compiler, sljit_ins ins_r, sljit_ins ins,
3142 	sljit_s32 reg,
3143 	sljit_s32 src, sljit_sw srcw)
3144 {
3145 	struct addr addr;
3146 
3147 	if (!(src & SLJIT_MEM))
3148 		return push_inst(compiler, ins_r | F4(reg) | F0(src));
3149 
3150 	FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));
3151 	return push_inst(compiler, ins | F36(reg) | R32A(addr.index) | R28A(addr.base) | ((sljit_ins)addr.offset << 16));
3152 }
3153 
sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)3154 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
3155 	sljit_s32 dst, sljit_sw dstw,
3156 	sljit_s32 src, sljit_sw srcw)
3157 {
3158 	sljit_ins dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
3159 	sljit_ins ins;
3160 
3161 	if (src & SLJIT_MEM) {
3162 		FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src, srcw));
3163 		src = TMP_FREG1;
3164 	}
3165 
3166 	/* M3 is set to 5 */
3167 	if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)
3168 		ins = (op & SLJIT_32) ? 0xb3a85000 /* cgebr */ : 0xb3a95000 /* cgdbr */;
3169 	else
3170 		ins = (op & SLJIT_32) ? 0xb3985000 /* cfebr */ : 0xb3995000 /* cfdbr */;
3171 
3172 	FAIL_IF(push_inst(compiler, ins | R4A(dst_r) | F0(src)));
3173 
3174 	if (dst & SLJIT_MEM)
3175 		return store_word(compiler, dst_r, dst, dstw, GET_OPCODE(op) >= SLJIT_CONV_S32_FROM_F64);
3176 
3177 	return SLJIT_SUCCESS;
3178 }
3179 
sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)3180 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
3181 	sljit_s32 dst, sljit_sw dstw,
3182 	sljit_s32 src, sljit_sw srcw)
3183 {
3184 	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
3185 	sljit_ins ins;
3186 
3187 	if (src & SLJIT_IMM) {
3188 		FAIL_IF(push_load_imm_inst(compiler, tmp0, srcw));
3189 		src = (sljit_s32)tmp0;
3190 	}
3191 	else if (src & SLJIT_MEM) {
3192 		FAIL_IF(load_word(compiler, tmp0, src, srcw, GET_OPCODE(op) >= SLJIT_CONV_F64_FROM_S32));
3193 		src = (sljit_s32)tmp0;
3194 	}
3195 
3196 	if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
3197 		ins = (op & SLJIT_32) ? 0xb3a40000 /* cegbr */ : 0xb3a50000 /* cdgbr */;
3198 	else
3199 		ins = (op & SLJIT_32) ? 0xb3940000 /* cefbr */ : 0xb3950000 /* cdfbr */;
3200 
3201 	FAIL_IF(push_inst(compiler, ins | F4(dst_r) | R0(src)));
3202 
3203 	if (dst & SLJIT_MEM)
3204 		return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw);
3205 
3206 	return SLJIT_SUCCESS;
3207 }
3208 
sljit_emit_fop1_cmp(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)3209 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
3210 	sljit_s32 src1, sljit_sw src1w,
3211 	sljit_s32 src2, sljit_sw src2w)
3212 {
3213 	sljit_ins ins_r, ins;
3214 
3215 	if (src1 & SLJIT_MEM) {
3216 		FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src1, src1w));
3217 		src1 = TMP_FREG1;
3218 	}
3219 
3220 	if (op & SLJIT_32) {
3221 		ins_r = 0xb3090000 /* cebr */;
3222 		ins = 0xed0000000009 /* ceb */;
3223 	} else {
3224 		ins_r = 0xb3190000 /* cdbr */;
3225 		ins = 0xed0000000019 /* cdb */;
3226 	}
3227 
3228 	return emit_float(compiler, ins_r, ins, src1, src2, src2w);
3229 }
3230 
sljit_emit_fop1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)3231 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
3232 	sljit_s32 dst, sljit_sw dstw,
3233 	sljit_s32 src, sljit_sw srcw)
3234 {
3235 	sljit_s32 dst_r;
3236 	sljit_ins ins;
3237 
3238 	CHECK_ERROR();
3239 
3240 	SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
3241 
3242 	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
3243 
3244 	if (op == SLJIT_CONV_F64_FROM_F32)
3245 		FAIL_IF(emit_float(compiler, 0xb3040000 /* ldebr */, 0xed0000000004 /* ldeb */, dst_r, src, srcw));
3246 	else {
3247 		if (src & SLJIT_MEM) {
3248 			FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op == SLJIT_CONV_F32_FROM_F64 ? 0 : (op & SLJIT_32)), dst_r, src, srcw));
3249 			src = dst_r;
3250 		}
3251 
3252 		switch (GET_OPCODE(op)) {
3253 		case SLJIT_MOV_F64:
3254 			if (FAST_IS_REG(dst)) {
3255 				if (dst == src)
3256 					return SLJIT_SUCCESS;
3257 
3258 				ins = (op & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */;
3259 				break;
3260 			}
3261 			return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), src, dst, dstw);
3262 		case SLJIT_CONV_F64_FROM_F32:
3263 			/* Only SLJIT_CONV_F32_FROM_F64. */
3264 			ins = 0xb3440000 /* ledbr */;
3265 			break;
3266 		case SLJIT_NEG_F64:
3267 			ins = (op & SLJIT_32) ? 0xb3030000 /* lcebr */ : 0xb3130000 /* lcdbr */;
3268 			break;
3269 		default:
3270 			SLJIT_ASSERT(GET_OPCODE(op) == SLJIT_ABS_F64);
3271 			ins = (op & SLJIT_32) ? 0xb3000000 /* lpebr */ : 0xb3100000 /* lpdbr */;
3272 			break;
3273 		}
3274 
3275 		FAIL_IF(push_inst(compiler, ins | F4(dst_r) | F0(src)));
3276 	}
3277 
3278 	if (!(dst & SLJIT_MEM))
3279 		return SLJIT_SUCCESS;
3280 
3281 	SLJIT_ASSERT(dst_r == TMP_FREG1);
3282 
3283 	return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw);
3284 }
3285 
3286 #define FLOAT_MOV(op, dst_r, src_r) \
3287 	(((op & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */) | F4(dst_r) | F0(src_r))
3288 
sljit_emit_fop2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)3289 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
3290 	sljit_s32 dst, sljit_sw dstw,
3291 	sljit_s32 src1, sljit_sw src1w,
3292 	sljit_s32 src2, sljit_sw src2w)
3293 {
3294 	sljit_s32 dst_r = TMP_FREG1;
3295 	sljit_ins ins_r, ins;
3296 
3297 	CHECK_ERROR();
3298 	CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
3299 	ADJUST_LOCAL_OFFSET(dst, dstw);
3300 	ADJUST_LOCAL_OFFSET(src1, src1w);
3301 	ADJUST_LOCAL_OFFSET(src2, src2w);
3302 
3303 	do {
3304 		if (FAST_IS_REG(dst)) {
3305 			dst_r = dst;
3306 
3307 			if (dst == src1)
3308 				break;
3309 
3310 			if (dst == src2) {
3311 				if (GET_OPCODE(op) == SLJIT_ADD_F64 || GET_OPCODE(op) == SLJIT_MUL_F64) {
3312 					src2 = src1;
3313 					src2w = src1w;
3314 					src1 = dst;
3315 					break;
3316 				}
3317 
3318 				FAIL_IF(push_inst(compiler, FLOAT_MOV(op, TMP_FREG1, src2)));
3319 				src2 = TMP_FREG1;
3320 			}
3321 		}
3322 
3323 		if (src1 & SLJIT_MEM)
3324 			FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), dst_r, src1, src1w));
3325 		else
3326 			FAIL_IF(push_inst(compiler, FLOAT_MOV(op, dst_r, src1)));
3327 	} while (0);
3328 
3329 	switch (GET_OPCODE(op)) {
3330 	case SLJIT_ADD_F64:
3331 		ins_r = (op & SLJIT_32) ? 0xb30a0000 /* aebr */ : 0xb31a0000 /* adbr */;
3332 		ins = (op & SLJIT_32) ? 0xed000000000a /* aeb */ : 0xed000000001a /* adb */;
3333 		break;
3334 	case SLJIT_SUB_F64:
3335 		ins_r = (op & SLJIT_32) ? 0xb30b0000 /* sebr */ : 0xb31b0000 /* sdbr */;
3336 		ins = (op & SLJIT_32) ? 0xed000000000b /* seb */ : 0xed000000001b /* sdb */;
3337 		break;
3338 	case SLJIT_MUL_F64:
3339 		ins_r = (op & SLJIT_32) ? 0xb3170000 /* meebr */ : 0xb31c0000 /* mdbr */;
3340 		ins = (op & SLJIT_32) ? 0xed0000000017 /* meeb */ : 0xed000000001c /* mdb */;
3341 		break;
3342 	default:
3343 		SLJIT_ASSERT(GET_OPCODE(op) == SLJIT_DIV_F64);
3344 		ins_r = (op & SLJIT_32) ? 0xb30d0000 /* debr */ : 0xb31d0000 /* ddbr */;
3345 		ins = (op & SLJIT_32) ? 0xed000000000d /* deb */ : 0xed000000001d /* ddb */;
3346 		break;
3347 	}
3348 
3349 	FAIL_IF(emit_float(compiler, ins_r, ins, dst_r, src2, src2w));
3350 
3351 	if (dst & SLJIT_MEM)
3352 		return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw);
3353 
3354 	SLJIT_ASSERT(dst_r != TMP_FREG1);
3355 	return SLJIT_SUCCESS;
3356 }
3357 
3358 /* --------------------------------------------------------------------- */
3359 /*  Other instructions                                                   */
3360 /* --------------------------------------------------------------------- */
3361 
sljit_emit_fast_enter(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)3362 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
3363 {
3364 	CHECK_ERROR();
3365 	CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
3366 	ADJUST_LOCAL_OFFSET(dst, dstw);
3367 
3368 	if (FAST_IS_REG(dst))
3369 		return push_inst(compiler, lgr(gpr(dst), link_r));
3370 
3371 	/* memory */
3372 	return store_word(compiler, link_r, dst, dstw, 0);
3373 }
3374 
3375 /* --------------------------------------------------------------------- */
3376 /*  Conditional instructions                                             */
3377 /* --------------------------------------------------------------------- */
3378 
sljit_emit_label(struct sljit_compiler * compiler)3379 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
3380 {
3381 	struct sljit_label *label;
3382 
3383 	CHECK_ERROR_PTR();
3384 	CHECK_PTR(check_sljit_emit_label(compiler));
3385 
3386 	if (compiler->last_label && compiler->last_label->size == compiler->size)
3387 		return compiler->last_label;
3388 
3389 	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
3390 	PTR_FAIL_IF(!label);
3391 	set_label(label, compiler);
3392 	return label;
3393 }
3394 
sljit_emit_jump(struct sljit_compiler * compiler,sljit_s32 type)3395 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
3396 {
3397 	sljit_u8 mask = ((type & 0xff) < SLJIT_JUMP) ? get_cc(compiler, type & 0xff) : 0xf;
3398 
3399 	CHECK_ERROR_PTR();
3400 	CHECK_PTR(check_sljit_emit_jump(compiler, type));
3401 
3402 	/* record jump */
3403 	struct sljit_jump *jump = (struct sljit_jump *)
3404 		ensure_abuf(compiler, sizeof(struct sljit_jump));
3405 	PTR_FAIL_IF(!jump);
3406 	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
3407 	jump->addr = compiler->size;
3408 
3409 	/* emit jump instruction */
3410 	type &= 0xff;
3411 	if (type >= SLJIT_FAST_CALL)
3412 		PTR_FAIL_IF(push_inst(compiler, brasl(link_r, 0)));
3413 	else
3414 		PTR_FAIL_IF(push_inst(compiler, brcl(mask, 0)));
3415 
3416 	return jump;
3417 }
3418 
sljit_emit_call(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types)3419 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
3420 	sljit_s32 arg_types)
3421 {
3422 	SLJIT_UNUSED_ARG(arg_types);
3423 	CHECK_ERROR_PTR();
3424 	CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
3425 
3426 	if (type & SLJIT_CALL_RETURN) {
3427 		PTR_FAIL_IF(emit_stack_frame_release(compiler, r14));
3428 		type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
3429 	}
3430 
3431 	SLJIT_SKIP_CHECKS(compiler);
3432 	return sljit_emit_jump(compiler, type);
3433 }
3434 
sljit_emit_ijump(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 src,sljit_sw srcw)3435 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
3436 {
3437 	sljit_gpr src_r = FAST_IS_REG(src) ? gpr(src) : tmp1;
3438 
3439 	CHECK_ERROR();
3440 	CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
3441 
3442 	if (src & SLJIT_IMM) {
3443 		SLJIT_ASSERT(!(srcw & 1)); /* target address must be even */
3444 		FAIL_IF(push_load_imm_inst(compiler, src_r, srcw));
3445 	}
3446 	else if (src & SLJIT_MEM) {
3447 		ADJUST_LOCAL_OFFSET(src, srcw);
3448 		FAIL_IF(load_word(compiler, src_r, src, srcw, 0 /* 64-bit */));
3449 	}
3450 
3451 	/* emit jump instruction */
3452 	if (type >= SLJIT_FAST_CALL)
3453 		return push_inst(compiler, basr(link_r, src_r));
3454 
3455 	return push_inst(compiler, br(src_r));
3456 }
3457 
sljit_emit_icall(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)3458 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
3459 	sljit_s32 arg_types,
3460 	sljit_s32 src, sljit_sw srcw)
3461 {
3462 	CHECK_ERROR();
3463 	CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
3464 
3465 	SLJIT_ASSERT(gpr(TMP_REG2) == tmp1);
3466 
3467 	if (src & SLJIT_MEM) {
3468 		ADJUST_LOCAL_OFFSET(src, srcw);
3469 		FAIL_IF(load_word(compiler, tmp1, src, srcw, 0 /* 64-bit */));
3470 		src = TMP_REG2;
3471 		srcw = 0;
3472 	}
3473 
3474 	if (type & SLJIT_CALL_RETURN) {
3475 		if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
3476 			FAIL_IF(push_inst(compiler, lgr(tmp1, gpr(src))));
3477 			src = TMP_REG2;
3478 			srcw = 0;
3479 		}
3480 
3481 		FAIL_IF(emit_stack_frame_release(compiler, r14));
3482 		type = SLJIT_JUMP;
3483 	}
3484 
3485 	SLJIT_SKIP_CHECKS(compiler);
3486 	return sljit_emit_ijump(compiler, type, src, srcw);
3487 }
3488 
sljit_emit_op_flags(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 type)3489 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
3490 	sljit_s32 dst, sljit_sw dstw,
3491 	sljit_s32 type)
3492 {
3493 	sljit_u8 mask = get_cc(compiler, type);
3494 
3495 	CHECK_ERROR();
3496 	CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
3497 
3498 	sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
3499 	sljit_gpr loc_r = tmp1;
3500 	switch (GET_OPCODE(op)) {
3501 	case SLJIT_AND:
3502 	case SLJIT_OR:
3503 	case SLJIT_XOR:
3504 		compiler->status_flags_state = op & SLJIT_SET_Z;
3505 
3506 		/* dst is also source operand */
3507 		if (dst & SLJIT_MEM)
3508 			FAIL_IF(load_word(compiler, dst_r, dst, dstw, op & SLJIT_32));
3509 
3510 		break;
3511 	case SLJIT_MOV32:
3512 		op |= SLJIT_32;
3513 		/* fallthrough */
3514 	case SLJIT_MOV:
3515 		/* can write straight into destination */
3516 		loc_r = dst_r;
3517 		break;
3518 	default:
3519 		SLJIT_UNREACHABLE();
3520 	}
3521 
3522 	/* TODO(mundaym): fold into cmov helper function? */
3523 	#define LEVAL(i) i(loc_r, 1, mask)
3524 	if (have_lscond2()) {
3525 		FAIL_IF(push_load_imm_inst(compiler, loc_r, 0));
3526 		FAIL_IF(push_inst(compiler,
3527 			WHEN2(op & SLJIT_32, lochi, locghi)));
3528 	} else {
3529 		/* TODO(mundaym): no load/store-on-condition 2 facility (ipm? branch-and-set?) */
3530 		abort();
3531 	}
3532 	#undef LEVAL
3533 
3534 	/* apply bitwise op and set condition codes */
3535 	switch (GET_OPCODE(op)) {
3536 	#define LEVAL(i) i(dst_r, loc_r)
3537 	case SLJIT_AND:
3538 		FAIL_IF(push_inst(compiler,
3539 			WHEN2(op & SLJIT_32, nr, ngr)));
3540 		break;
3541 	case SLJIT_OR:
3542 		FAIL_IF(push_inst(compiler,
3543 			WHEN2(op & SLJIT_32, or, ogr)));
3544 		break;
3545 	case SLJIT_XOR:
3546 		FAIL_IF(push_inst(compiler,
3547 			WHEN2(op & SLJIT_32, xr, xgr)));
3548 		break;
3549 	#undef LEVAL
3550 	}
3551 
3552 	/* store result to memory if required */
3553 	if (dst & SLJIT_MEM)
3554 		return store_word(compiler, dst_r, dst, dstw, (op & SLJIT_32));
3555 
3556 	return SLJIT_SUCCESS;
3557 }
3558 
sljit_emit_cmov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_reg,sljit_s32 src,sljit_sw srcw)3559 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
3560 	sljit_s32 dst_reg,
3561 	sljit_s32 src, sljit_sw srcw)
3562 {
3563 	sljit_ins mask = get_cc(compiler, type & ~SLJIT_32);
3564 	sljit_gpr src_r;
3565 	sljit_ins ins;
3566 
3567 	CHECK_ERROR();
3568 	CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
3569 
3570 	if (type & SLJIT_32)
3571 		srcw = (sljit_s32)srcw;
3572 
3573 	if (have_lscond2() && (src & SLJIT_IMM) && is_s16(srcw)) {
3574 		ins = (type & SLJIT_32) ? 0xec0000000042 /* lochi */ : 0xec0000000046 /* locghi */;
3575 		return push_inst(compiler, ins | R36A(gpr(dst_reg)) | (mask << 32) | (sljit_ins)(srcw & 0xffff) << 16);
3576 	}
3577 
3578 	if (src & SLJIT_IMM) {
3579 		FAIL_IF(push_load_imm_inst(compiler, tmp0, srcw));
3580 		src_r = tmp0;
3581 	} else
3582 		src_r = gpr(src);
3583 
3584 	if (have_lscond1()) {
3585 		ins = (type & SLJIT_32) ? 0xb9f20000 /* locr */ : 0xb9e20000 /* locgr */;
3586 		return push_inst(compiler, ins | (mask << 12) | R4A(gpr(dst_reg)) | R0A(src_r));
3587 	}
3588 
3589 	return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);
3590 }
3591 
sljit_emit_mem(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)3592 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
3593 	sljit_s32 reg,
3594 	sljit_s32 mem, sljit_sw memw)
3595 {
3596 	sljit_ins ins, reg1, reg2, base, offs = 0;
3597 
3598 	CHECK_ERROR();
3599 	CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
3600 
3601 	if (!(reg & REG_PAIR_MASK))
3602 		return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
3603 
3604 	ADJUST_LOCAL_OFFSET(mem, memw);
3605 
3606 	base = gpr(mem & REG_MASK);
3607 	reg1 = gpr(REG_PAIR_FIRST(reg));
3608 	reg2 = gpr(REG_PAIR_SECOND(reg));
3609 
3610 	if (mem & OFFS_REG_MASK) {
3611 		memw &= 0x3;
3612 		offs = gpr(OFFS_REG(mem));
3613 
3614 		if (memw != 0) {
3615 			FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp1) | R32A(offs) | ((sljit_ins)memw << 16)));
3616 			offs = tmp1;
3617 		} else if (!(type & SLJIT_MEM_STORE) && (base == reg1 || base == reg2) && (offs == reg1 || offs == reg2)) {
3618 			FAIL_IF(push_inst(compiler, 0xb9f80000 | R12A(tmp1) | R4A(base) | R0A(offs)));
3619 			base = tmp1;
3620 			offs = 0;
3621 		}
3622 
3623 		memw = 0;
3624 	} else if (memw < -0x80000 || memw > 0x7ffff - ((reg2 == reg1 + 1) ? 0 : SSIZE_OF(sw))) {
3625 		FAIL_IF(push_load_imm_inst(compiler, tmp1, memw));
3626 
3627 		if (base == 0)
3628 			base = tmp1;
3629 		else
3630 			offs = tmp1;
3631 
3632 		memw = 0;
3633 	}
3634 
3635 	if (offs == 0 && reg2 == (reg1 + 1)) {
3636 		ins = (type & SLJIT_MEM_STORE) ? 0xeb0000000024 /* stmg */ : 0xeb0000000004 /* lmg */;
3637 		return push_inst(compiler, ins | R36A(reg1) | R32A(reg2) | R28A(base) | disp_s20((sljit_s32)memw));
3638 	}
3639 
3640 	ins = ((type & SLJIT_MEM_STORE) ? 0xe30000000024 /* stg */ : 0xe30000000004 /* lg */) | R32A(offs) | R28A(base);
3641 
3642 	if (!(type & SLJIT_MEM_STORE) && base == reg1) {
3643 		FAIL_IF(push_inst(compiler, ins | R36A(reg2) | disp_s20((sljit_s32)memw + SSIZE_OF(sw))));
3644 		return push_inst(compiler, ins | R36A(reg1) | disp_s20((sljit_s32)memw));
3645 	}
3646 
3647 	FAIL_IF(push_inst(compiler, ins | R36A(reg1) | disp_s20((sljit_s32)memw)));
3648 	return push_inst(compiler, ins | R36A(reg2) | disp_s20((sljit_s32)memw + SSIZE_OF(sw)));
3649 }
3650 
3651 /* --------------------------------------------------------------------- */
3652 /*  Other instructions                                                   */
3653 /* --------------------------------------------------------------------- */
3654 
3655 /* On s390x we build a literal pool to hold constants. This has two main
3656    advantages:
3657 
3658      1. we only need one instruction in the instruction stream (LGRL)
3659      2. we can store 64 bit addresses and use 32 bit offsets
3660 
3661    To retrofit the extra information needed to build the literal pool we
3662    add a new sljit_s390x_const struct that contains the initial value but
3663    can still be cast to a sljit_const. */
3664 
sljit_emit_const(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_sw init_value)3665 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
3666 {
3667 	struct sljit_s390x_const *const_;
3668 	sljit_gpr dst_r;
3669 
3670 	CHECK_ERROR_PTR();
3671 	CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
3672 
3673 	const_ = (struct sljit_s390x_const*)ensure_abuf(compiler,
3674 					sizeof(struct sljit_s390x_const));
3675 	PTR_FAIL_IF(!const_);
3676 	set_const((struct sljit_const*)const_, compiler);
3677 	const_->init_value = init_value;
3678 
3679 	dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
3680 	if (have_genext())
3681 		PTR_FAIL_IF(push_inst(compiler, sljit_ins_const | lgrl(dst_r, 0)));
3682 	else {
3683 		PTR_FAIL_IF(push_inst(compiler, sljit_ins_const | larl(tmp1, 0)));
3684 		PTR_FAIL_IF(push_inst(compiler, lg(dst_r, 0, r0, tmp1)));
3685 	}
3686 
3687 	if (dst & SLJIT_MEM)
3688 		PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, 0 /* always 64-bit */));
3689 
3690 	return (struct sljit_const*)const_;
3691 }
3692 
sljit_set_jump_addr(sljit_uw addr,sljit_uw new_target,sljit_sw executable_offset)3693 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
3694 {
3695 	/* Update the constant pool. */
3696 	sljit_uw *ptr = (sljit_uw *)addr;
3697 	SLJIT_UNUSED_ARG(executable_offset);
3698 
3699 	SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 0);
3700 	*ptr = new_target;
3701 	SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1);
3702 	SLJIT_CACHE_FLUSH(ptr, ptr + 1);
3703 }
3704 
sljit_set_const(sljit_uw addr,sljit_sw new_constant,sljit_sw executable_offset)3705 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
3706 {
3707 	sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
3708 }
3709 
sljit_emit_put_label(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)3710 SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label *sljit_emit_put_label(
3711 	struct sljit_compiler *compiler,
3712 	sljit_s32 dst, sljit_sw dstw)
3713 {
3714 	struct sljit_put_label *put_label;
3715 	sljit_gpr dst_r;
3716 
3717 	CHECK_ERROR_PTR();
3718 	CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
3719 	ADJUST_LOCAL_OFFSET(dst, dstw);
3720 
3721 	put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
3722 	PTR_FAIL_IF(!put_label);
3723 	set_put_label(put_label, compiler, 0);
3724 
3725 	dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
3726 
3727 	if (have_genext())
3728 		PTR_FAIL_IF(push_inst(compiler, lgrl(dst_r, 0)));
3729 	else {
3730 		PTR_FAIL_IF(push_inst(compiler, larl(tmp1, 0)));
3731 		PTR_FAIL_IF(push_inst(compiler, lg(dst_r, 0, r0, tmp1)));
3732 	}
3733 
3734 	if (dst & SLJIT_MEM)
3735 		PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, 0));
3736 
3737 	return put_label;
3738 }
3739 
3740 /* TODO(carenas): EVAL probably should move up or be refactored */
3741 #undef WHEN2
3742 #undef EVAL
3743 
3744 #undef tmp1
3745 #undef tmp0
3746 
3747 /* TODO(carenas): undef other macros that spill like is_u12? */
3748