1 /*
2  *    Stack-less Just-In-Time compiler
3  *
4  *    Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without modification, are
7  * permitted provided that the following conditions are met:
8  *
9  *   1. Redistributions of source code must retain the above copyright notice, this list of
10  *      conditions and the following disclaimer.
11  *
12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13  *      of conditions and the following disclaimer in the documentation and/or other materials
14  *      provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include <sys/auxv.h>
28 
29 #ifdef __ARCH__
30 #define ENABLE_STATIC_FACILITY_DETECTION 1
31 #else
32 #define ENABLE_STATIC_FACILITY_DETECTION 0
33 #endif
34 #define ENABLE_DYNAMIC_FACILITY_DETECTION 1
35 
sljit_get_platform_name(void)36 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
37 {
38 	return "s390x" SLJIT_CPUINFO;
39 }
40 
41 /* Instructions. */
42 typedef sljit_uw sljit_ins;
43 
44 /* Instruction tags (most significant halfword). */
45 static const sljit_ins sljit_ins_const = (sljit_ins)1 << 48;
46 
47 #define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
48 #define TMP_REG2	(SLJIT_NUMBER_OF_REGISTERS + 3)
49 
50 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 4] = {
51 	0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 0, 1
52 };
53 
54 /* there are also a[2-15] available, but they are slower to access and
55  * their use is limited as mundaym explained:
56  *   https://github.com/zherczeg/sljit/pull/91#discussion_r486895689
57  */
58 
59 /* General Purpose Registers [0-15]. */
60 typedef sljit_uw sljit_gpr;
61 
62 /*
63  * WARNING
64  * the following code is non standard and should be improved for
65  * consistency, but doesn't use SLJIT_NUMBER_OF_REGISTERS based
66  * registers because r0 and r1 are the ABI recommended volatiles.
67  * there is a gpr() function that maps sljit to physical register numbers
68  * that should be used instead of the usual index into reg_map[] and
69  * will be retired ASAP (TODO: carenas)
70  */
71 
72 static const sljit_gpr r0 = 0;		/* reg_map[SLJIT_NUMBER_OF_REGISTERS + 2]: 0 in address calculations; reserved */
73 static const sljit_gpr r1 = 1;		/* reg_map[SLJIT_NUMBER_OF_REGISTERS + 3]: reserved */
74 static const sljit_gpr r2 = 2;		/* reg_map[1]: 1st argument */
75 static const sljit_gpr r3 = 3;		/* reg_map[2]: 2nd argument */
76 static const sljit_gpr r4 = 4;		/* reg_map[3]: 3rd argument */
77 static const sljit_gpr r5 = 5;		/* reg_map[4]: 4th argument */
78 static const sljit_gpr r6 = 6;		/* reg_map[5]: 5th argument; 1st saved register */
79 static const sljit_gpr r7 = 7;		/* reg_map[6] */
80 static const sljit_gpr r8 = 8;		/* reg_map[7] */
81 static const sljit_gpr r9 = 9;		/* reg_map[8] */
82 static const sljit_gpr r10 = 10;	/* reg_map[9] */
83 static const sljit_gpr r11 = 11;	/* reg_map[10] */
84 static const sljit_gpr r12 = 12;	/* reg_map[11]: GOT */
85 static const sljit_gpr r13 = 13;	/* reg_map[12]: Literal Pool pointer */
86 static const sljit_gpr r14 = 14;	/* reg_map[0]: return address and flag register */
87 static const sljit_gpr r15 = 15;	/* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stack pointer */
88 
89 /* WARNING: r12 and r13 shouldn't be used as per ABI recommendation */
90 /* TODO(carenas): r12 might conflict in PIC code, reserve? */
91 /* TODO(carenas): r13 is usually pointed to "pool" per ABI, using a tmp
92  *                like we do know might be faster though, reserve?
93  */
94 
95 /* TODO(carenas): should be named TMP_REG[1-2] for consistency */
96 #define tmp0	r0
97 #define tmp1	r1
98 
99 /* TODO(carenas): flags should move to a different register so that
100  *                link register doesn't need to change
101  */
102 
103 /* When reg cannot be unused. */
104 #define IS_GPR_REG(reg)		((reg > 0) && (reg) <= SLJIT_SP)
105 
106 /* Link registers. The normal link register is r14, but since
107    we use that for flags we need to use r0 instead to do fast
108    calls so that flags are preserved. */
109 static const sljit_gpr link_r = 14;     /* r14 */
110 static const sljit_gpr fast_link_r = 0; /* r0 */
111 
112 #define TMP_FREG1	(0)
113 
114 static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1] = {
115 	1, 0, 2, 4, 6, 3, 5, 7, 15, 14, 13, 12, 11, 10, 9, 8,
116 };
117 
118 #define R0A(r) (r)
119 #define R4A(r) ((r) << 4)
120 #define R8A(r) ((r) << 8)
121 #define R12A(r) ((r) << 12)
122 #define R16A(r) ((r) << 16)
123 #define R20A(r) ((r) << 20)
124 #define R28A(r) ((r) << 28)
125 #define R32A(r) ((r) << 32)
126 #define R36A(r) ((r) << 36)
127 
128 #define R0(r) ((sljit_ins)reg_map[r])
129 
130 #define F0(r) ((sljit_ins)freg_map[r])
131 #define F4(r) (R4A((sljit_ins)freg_map[r]))
132 #define F20(r) (R20A((sljit_ins)freg_map[r]))
133 #define F36(r) (R36A((sljit_ins)freg_map[r]))
134 
135 struct sljit_s390x_const {
136 	struct sljit_const const_; /* must be first */
137 	sljit_sw init_value;       /* required to build literal pool */
138 };
139 
140 /* Convert SLJIT register to hardware register. */
gpr(sljit_s32 r)141 static SLJIT_INLINE sljit_gpr gpr(sljit_s32 r)
142 {
143 	SLJIT_ASSERT(r >= 0 && r < (sljit_s32)(sizeof(reg_map) / sizeof(reg_map[0])));
144 	return reg_map[r];
145 }
146 
fgpr(sljit_s32 r)147 static SLJIT_INLINE sljit_gpr fgpr(sljit_s32 r)
148 {
149 	SLJIT_ASSERT(r >= 0 && r < (sljit_s32)(sizeof(freg_map) / sizeof(freg_map[0])));
150 	return freg_map[r];
151 }
152 
153 /* Size of instruction in bytes. Tags must already be cleared. */
sizeof_ins(sljit_ins ins)154 static SLJIT_INLINE sljit_uw sizeof_ins(sljit_ins ins)
155 {
156 	/* keep faulting instructions */
157 	if (ins == 0)
158 		return 2;
159 
160 	if ((ins & 0x00000000ffffL) == ins)
161 		return 2;
162 	if ((ins & 0x0000ffffffffL) == ins)
163 		return 4;
164 	if ((ins & 0xffffffffffffL) == ins)
165 		return 6;
166 
167 	SLJIT_UNREACHABLE();
168 	return (sljit_uw)-1;
169 }
170 
push_inst(struct sljit_compiler * compiler,sljit_ins ins)171 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
172 {
173 	sljit_ins *ibuf = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins));
174 	FAIL_IF(!ibuf);
175 	*ibuf = ins;
176 	compiler->size++;
177 	return SLJIT_SUCCESS;
178 }
179 
encode_inst(void ** ptr,sljit_ins ins)180 static sljit_s32 encode_inst(void **ptr, sljit_ins ins)
181 {
182 	sljit_u16 *ibuf = (sljit_u16 *)*ptr;
183 	sljit_uw size = sizeof_ins(ins);
184 
185 	SLJIT_ASSERT((size & 6) == size);
186 	switch (size) {
187 	case 6:
188 		*ibuf++ = (sljit_u16)(ins >> 32);
189 		/* fallthrough */
190 	case 4:
191 		*ibuf++ = (sljit_u16)(ins >> 16);
192 		/* fallthrough */
193 	case 2:
194 		*ibuf++ = (sljit_u16)(ins);
195 	}
196 	*ptr = (void*)ibuf;
197 	return SLJIT_SUCCESS;
198 }
199 
200 #define SLJIT_ADD_SUB_NO_COMPARE(status_flags_state) \
201 	(((status_flags_state) & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) \
202 		&& !((status_flags_state) & SLJIT_CURRENT_FLAGS_COMPARE))
203 
204 /* Map the given type to a 4-bit condition code mask. */
get_cc(struct sljit_compiler * compiler,sljit_s32 type)205 static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 type) {
206 	const sljit_u8 cc0 = 1 << 3; /* equal {,to zero} */
207 	const sljit_u8 cc1 = 1 << 2; /* less than {,zero} */
208 	const sljit_u8 cc2 = 1 << 1; /* greater than {,zero} */
209 	const sljit_u8 cc3 = 1 << 0; /* {overflow,NaN} */
210 
211 	switch (type) {
212 	case SLJIT_EQUAL:
213 		if (SLJIT_ADD_SUB_NO_COMPARE(compiler->status_flags_state)) {
214 			sljit_s32 type = GET_FLAG_TYPE(compiler->status_flags_state);
215 			if (type >= SLJIT_SIG_LESS && type <= SLJIT_SIG_LESS_EQUAL)
216 				return cc0;
217 			if (type == SLJIT_OVERFLOW)
218 				return (cc0 | cc3);
219 			return (cc0 | cc2);
220 		}
221 		/* fallthrough */
222 
223 	case SLJIT_EQUAL_F64:
224 		return cc0;
225 
226 	case SLJIT_NOT_EQUAL:
227 		if (SLJIT_ADD_SUB_NO_COMPARE(compiler->status_flags_state)) {
228 			sljit_s32 type = GET_FLAG_TYPE(compiler->status_flags_state);
229 			if (type >= SLJIT_SIG_LESS && type <= SLJIT_SIG_LESS_EQUAL)
230 				return (cc1 | cc2 | cc3);
231 			if (type == SLJIT_OVERFLOW)
232 				return (cc1 | cc2);
233 			return (cc1 | cc3);
234 		}
235 		/* fallthrough */
236 
237 	case SLJIT_NOT_EQUAL_F64:
238 		return (cc1 | cc2 | cc3);
239 
240 	case SLJIT_LESS:
241 		return cc1;
242 
243 	case SLJIT_GREATER_EQUAL:
244 		return (cc0 | cc2 | cc3);
245 
246 	case SLJIT_GREATER:
247 		if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_COMPARE)
248 			return cc2;
249 		return cc3;
250 
251 	case SLJIT_LESS_EQUAL:
252 		if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_COMPARE)
253 			return (cc0 | cc1);
254 		return (cc0 | cc1 | cc2);
255 
256 	case SLJIT_SIG_LESS:
257 	case SLJIT_LESS_F64:
258 		return cc1;
259 
260 	case SLJIT_NOT_CARRY:
261 		if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB)
262 			return (cc2 | cc3);
263 		/* fallthrough */
264 
265 	case SLJIT_SIG_LESS_EQUAL:
266 	case SLJIT_LESS_EQUAL_F64:
267 		return (cc0 | cc1);
268 
269 	case SLJIT_CARRY:
270 		if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB)
271 			return (cc0 | cc1);
272 		/* fallthrough */
273 
274 	case SLJIT_SIG_GREATER:
275 		/* Overflow is considered greater, see SLJIT_SUB. */
276 		return cc2 | cc3;
277 
278 	case SLJIT_SIG_GREATER_EQUAL:
279 		return (cc0 | cc2 | cc3);
280 
281 	case SLJIT_OVERFLOW:
282 		if (compiler->status_flags_state & SLJIT_SET_Z)
283 			return (cc2 | cc3);
284 		/* fallthrough */
285 
286 	case SLJIT_UNORDERED_F64:
287 		return cc3;
288 
289 	case SLJIT_NOT_OVERFLOW:
290 		if (compiler->status_flags_state & SLJIT_SET_Z)
291 			return (cc0 | cc1);
292 		/* fallthrough */
293 
294 	case SLJIT_ORDERED_F64:
295 		return (cc0 | cc1 | cc2);
296 
297 	case SLJIT_GREATER_F64:
298 		return cc2;
299 
300 	case SLJIT_GREATER_EQUAL_F64:
301 		return (cc0 | cc2);
302 	}
303 
304 	SLJIT_UNREACHABLE();
305 	return (sljit_u8)-1;
306 }
307 
308 /* Facility to bit index mappings.
309    Note: some facilities share the same bit index. */
310 typedef sljit_uw facility_bit;
311 #define STORE_FACILITY_LIST_EXTENDED_FACILITY 7
312 #define FAST_LONG_DISPLACEMENT_FACILITY 19
313 #define EXTENDED_IMMEDIATE_FACILITY 21
314 #define GENERAL_INSTRUCTION_EXTENSION_FACILITY 34
315 #define DISTINCT_OPERAND_FACILITY 45
316 #define HIGH_WORD_FACILITY 45
317 #define POPULATION_COUNT_FACILITY 45
318 #define LOAD_STORE_ON_CONDITION_1_FACILITY 45
319 #define MISCELLANEOUS_INSTRUCTION_EXTENSIONS_1_FACILITY 49
320 #define LOAD_STORE_ON_CONDITION_2_FACILITY 53
321 #define MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY 58
322 #define VECTOR_FACILITY 129
323 #define VECTOR_ENHANCEMENTS_1_FACILITY 135
324 
325 /* Report whether a facility is known to be present due to the compiler
326    settings. This function should always be compiled to a constant
327    value given a constant argument. */
have_facility_static(facility_bit x)328 static SLJIT_INLINE int have_facility_static(facility_bit x)
329 {
330 #if ENABLE_STATIC_FACILITY_DETECTION
331 	switch (x) {
332 	case FAST_LONG_DISPLACEMENT_FACILITY:
333 		return (__ARCH__ >=  6 /* z990 */);
334 	case EXTENDED_IMMEDIATE_FACILITY:
335 	case STORE_FACILITY_LIST_EXTENDED_FACILITY:
336 		return (__ARCH__ >=  7 /* z9-109 */);
337 	case GENERAL_INSTRUCTION_EXTENSION_FACILITY:
338 		return (__ARCH__ >=  8 /* z10 */);
339 	case DISTINCT_OPERAND_FACILITY:
340 		return (__ARCH__ >=  9 /* z196 */);
341 	case MISCELLANEOUS_INSTRUCTION_EXTENSIONS_1_FACILITY:
342 		return (__ARCH__ >= 10 /* zEC12 */);
343 	case LOAD_STORE_ON_CONDITION_2_FACILITY:
344 	case VECTOR_FACILITY:
345 		return (__ARCH__ >= 11 /* z13 */);
346 	case MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY:
347 	case VECTOR_ENHANCEMENTS_1_FACILITY:
348 		return (__ARCH__ >= 12 /* z14 */);
349 	default:
350 		SLJIT_UNREACHABLE();
351 	}
352 #endif
353 	return 0;
354 }
355 
get_hwcap()356 static SLJIT_INLINE unsigned long get_hwcap()
357 {
358 	static unsigned long hwcap = 0;
359 	if (SLJIT_UNLIKELY(!hwcap)) {
360 		hwcap = getauxval(AT_HWCAP);
361 		SLJIT_ASSERT(hwcap != 0);
362 	}
363 	return hwcap;
364 }
365 
have_stfle()366 static SLJIT_INLINE int have_stfle()
367 {
368 	if (have_facility_static(STORE_FACILITY_LIST_EXTENDED_FACILITY))
369 		return 1;
370 
371 	return (get_hwcap() & HWCAP_S390_STFLE);
372 }
373 
374 /* Report whether the given facility is available. This function always
375    performs a runtime check. */
have_facility_dynamic(facility_bit x)376 static int have_facility_dynamic(facility_bit x)
377 {
378 #if ENABLE_DYNAMIC_FACILITY_DETECTION
379 	static struct {
380 		sljit_uw bits[4];
381 	} cpu_features;
382 	size_t size = sizeof(cpu_features);
383 	const sljit_uw word_index = x >> 6;
384 	const sljit_uw bit_index = ((1UL << 63) >> (x & 63));
385 
386 	SLJIT_ASSERT(x < size * 8);
387 	if (SLJIT_UNLIKELY(!have_stfle()))
388 		return 0;
389 
390 	if (SLJIT_UNLIKELY(cpu_features.bits[0] == 0)) {
391 		__asm__ __volatile__ (
392 			"lgr   %%r0, %0;"
393 			"stfle 0(%1);"
394 			/* outputs  */:
395 			/* inputs   */: "d" ((size / 8) - 1), "a" (&cpu_features)
396 			/* clobbers */: "r0", "cc", "memory"
397 		);
398 		SLJIT_ASSERT(cpu_features.bits[0] != 0);
399 	}
400 	return (cpu_features.bits[word_index] & bit_index) != 0;
401 #else
402 	return 0;
403 #endif
404 }
405 
406 #define HAVE_FACILITY(name, bit) \
407 static SLJIT_INLINE int name() \
408 { \
409 	static int have = -1; \
410 	/* Static check first. May allow the function to be optimized away. */ \
411 	if (have_facility_static(bit)) \
412 		have = 1; \
413 	else if (SLJIT_UNLIKELY(have < 0)) \
414 		have = have_facility_dynamic(bit) ? 1 : 0; \
415 \
416 	return have; \
417 }
418 
HAVE_FACILITY(have_eimm,EXTENDED_IMMEDIATE_FACILITY)419 HAVE_FACILITY(have_eimm,    EXTENDED_IMMEDIATE_FACILITY)
420 HAVE_FACILITY(have_ldisp,   FAST_LONG_DISPLACEMENT_FACILITY)
421 HAVE_FACILITY(have_genext,  GENERAL_INSTRUCTION_EXTENSION_FACILITY)
422 HAVE_FACILITY(have_lscond1, LOAD_STORE_ON_CONDITION_1_FACILITY)
423 HAVE_FACILITY(have_lscond2, LOAD_STORE_ON_CONDITION_2_FACILITY)
424 HAVE_FACILITY(have_misc2,   MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY)
425 #undef HAVE_FACILITY
426 
427 #define is_u12(d)	(0 <= (d) && (d) <= 0x00000fffL)
428 #define is_u32(d)	(0 <= (d) && (d) <= 0xffffffffL)
429 
430 #define CHECK_SIGNED(v, bitlen) \
431 	((v) >= -(1 << ((bitlen) - 1)) && (v) < (1 << ((bitlen) - 1)))
432 
433 #define is_s8(d)	CHECK_SIGNED((d), 8)
434 #define is_s16(d)	CHECK_SIGNED((d), 16)
435 #define is_s20(d)	CHECK_SIGNED((d), 20)
436 #define is_s32(d)	((d) == (sljit_s32)(d))
437 
438 static SLJIT_INLINE sljit_ins disp_s20(sljit_s32 d)
439 {
440 	SLJIT_ASSERT(is_s20(d));
441 
442 	sljit_uw dh = (d >> 12) & 0xff;
443 	sljit_uw dl = (d << 8) & 0xfff00;
444 	return (dh | dl) << 8;
445 }
446 
447 /* TODO(carenas): variadic macro is not strictly needed */
448 #define SLJIT_S390X_INSTRUCTION(op, ...) \
449 static SLJIT_INLINE sljit_ins op(__VA_ARGS__)
450 
451 /* RR form instructions. */
452 #define SLJIT_S390X_RR(name, pattern) \
453 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \
454 { \
455 	return (pattern) | ((dst & 0xf) << 4) | (src & 0xf); \
456 }
457 
458 /* AND */
459 SLJIT_S390X_RR(nr,   0x1400)
460 
461 /* BRANCH AND SAVE */
462 SLJIT_S390X_RR(basr, 0x0d00)
463 
464 /* BRANCH ON CONDITION */
465 SLJIT_S390X_RR(bcr,  0x0700) /* TODO(mundaym): type for mask? */
466 
467 /* DIVIDE */
468 SLJIT_S390X_RR(dr,   0x1d00)
469 
470 /* EXCLUSIVE OR */
471 SLJIT_S390X_RR(xr,   0x1700)
472 
473 /* LOAD */
474 SLJIT_S390X_RR(lr,   0x1800)
475 
476 /* LOAD COMPLEMENT */
477 SLJIT_S390X_RR(lcr,  0x1300)
478 
479 /* OR */
480 SLJIT_S390X_RR(or,   0x1600)
481 
482 #undef SLJIT_S390X_RR
483 
484 /* RRE form instructions */
485 #define SLJIT_S390X_RRE(name, pattern) \
486 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \
487 { \
488 	return (pattern) | R4A(dst) | R0A(src); \
489 }
490 
491 /* AND */
492 SLJIT_S390X_RRE(ngr,   0xb9800000)
493 
494 /* DIVIDE LOGICAL */
495 SLJIT_S390X_RRE(dlr,   0xb9970000)
496 SLJIT_S390X_RRE(dlgr,  0xb9870000)
497 
498 /* DIVIDE SINGLE */
499 SLJIT_S390X_RRE(dsgr,  0xb90d0000)
500 
501 /* EXCLUSIVE OR */
502 SLJIT_S390X_RRE(xgr,   0xb9820000)
503 
504 /* LOAD */
505 SLJIT_S390X_RRE(lgr,   0xb9040000)
506 SLJIT_S390X_RRE(lgfr,  0xb9140000)
507 
508 /* LOAD BYTE */
509 SLJIT_S390X_RRE(lbr,   0xb9260000)
510 SLJIT_S390X_RRE(lgbr,  0xb9060000)
511 
512 /* LOAD COMPLEMENT */
513 SLJIT_S390X_RRE(lcgr,  0xb9030000)
514 
515 /* LOAD HALFWORD */
516 SLJIT_S390X_RRE(lhr,   0xb9270000)
517 SLJIT_S390X_RRE(lghr,  0xb9070000)
518 
519 /* LOAD LOGICAL */
520 SLJIT_S390X_RRE(llgfr, 0xb9160000)
521 
522 /* LOAD LOGICAL CHARACTER */
523 SLJIT_S390X_RRE(llcr,  0xb9940000)
524 SLJIT_S390X_RRE(llgcr, 0xb9840000)
525 
526 /* LOAD LOGICAL HALFWORD */
527 SLJIT_S390X_RRE(llhr,  0xb9950000)
528 SLJIT_S390X_RRE(llghr, 0xb9850000)
529 
530 /* MULTIPLY LOGICAL */
531 SLJIT_S390X_RRE(mlgr,  0xb9860000)
532 
533 /* MULTIPLY SINGLE */
534 SLJIT_S390X_RRE(msgfr, 0xb91c0000)
535 
536 /* OR */
537 SLJIT_S390X_RRE(ogr,   0xb9810000)
538 
539 /* SUBTRACT */
540 SLJIT_S390X_RRE(sgr,   0xb9090000)
541 
542 #undef SLJIT_S390X_RRE
543 
544 /* RI-a form instructions */
545 #define SLJIT_S390X_RIA(name, pattern, imm_type) \
546 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \
547 { \
548 	return (pattern) | R20A(reg) | (imm & 0xffff); \
549 }
550 
551 /* ADD HALFWORD IMMEDIATE */
552 SLJIT_S390X_RIA(aghi,  0xa70b0000, sljit_s16)
553 
554 /* LOAD HALFWORD IMMEDIATE */
555 SLJIT_S390X_RIA(lhi,   0xa7080000, sljit_s16)
556 SLJIT_S390X_RIA(lghi,  0xa7090000, sljit_s16)
557 
558 /* LOAD LOGICAL IMMEDIATE */
559 SLJIT_S390X_RIA(llihh, 0xa50c0000, sljit_u16)
560 SLJIT_S390X_RIA(llihl, 0xa50d0000, sljit_u16)
561 SLJIT_S390X_RIA(llilh, 0xa50e0000, sljit_u16)
562 SLJIT_S390X_RIA(llill, 0xa50f0000, sljit_u16)
563 
564 /* MULTIPLY HALFWORD IMMEDIATE */
565 SLJIT_S390X_RIA(mhi,   0xa70c0000, sljit_s16)
566 SLJIT_S390X_RIA(mghi,  0xa70d0000, sljit_s16)
567 
568 /* OR IMMEDIATE */
569 SLJIT_S390X_RIA(oilh,  0xa50a0000, sljit_u16)
570 
571 #undef SLJIT_S390X_RIA
572 
573 /* RIL-a form instructions (requires extended immediate facility) */
574 #define SLJIT_S390X_RILA(name, pattern, imm_type) \
575 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \
576 { \
577 	SLJIT_ASSERT(have_eimm()); \
578 	return (pattern) | R36A(reg) | ((sljit_ins)imm & 0xffffffffu); \
579 }
580 
581 /* ADD IMMEDIATE */
582 SLJIT_S390X_RILA(agfi,  0xc20800000000, sljit_s32)
583 
584 /* ADD IMMEDIATE HIGH */
585 SLJIT_S390X_RILA(aih,   0xcc0800000000, sljit_s32) /* TODO(mundaym): high-word facility? */
586 
587 /* AND IMMEDIATE */
588 SLJIT_S390X_RILA(nihf,  0xc00a00000000, sljit_u32)
589 
590 /* EXCLUSIVE OR IMMEDIATE */
591 SLJIT_S390X_RILA(xilf,  0xc00700000000, sljit_u32)
592 
593 /* INSERT IMMEDIATE */
594 SLJIT_S390X_RILA(iihf,  0xc00800000000, sljit_u32)
595 SLJIT_S390X_RILA(iilf,  0xc00900000000, sljit_u32)
596 
597 /* LOAD IMMEDIATE */
598 SLJIT_S390X_RILA(lgfi,  0xc00100000000, sljit_s32)
599 
600 /* LOAD LOGICAL IMMEDIATE */
601 SLJIT_S390X_RILA(llihf, 0xc00e00000000, sljit_u32)
602 SLJIT_S390X_RILA(llilf, 0xc00f00000000, sljit_u32)
603 
604 /* SUBTRACT LOGICAL IMMEDIATE */
605 SLJIT_S390X_RILA(slfi,  0xc20500000000, sljit_u32)
606 
607 #undef SLJIT_S390X_RILA
608 
609 /* RX-a form instructions */
610 #define SLJIT_S390X_RXA(name, pattern) \
611 SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \
612 { \
613 	SLJIT_ASSERT((d & 0xfff) == d); \
614 \
615 	return (pattern) | R20A(r) | R16A(x) | R12A(b) | (sljit_ins)(d & 0xfff); \
616 }
617 
618 /* LOAD */
619 SLJIT_S390X_RXA(l,   0x58000000)
620 
621 /* LOAD ADDRESS */
622 SLJIT_S390X_RXA(la,  0x41000000)
623 
624 /* LOAD HALFWORD */
625 SLJIT_S390X_RXA(lh,  0x48000000)
626 
627 /* MULTIPLY SINGLE */
628 SLJIT_S390X_RXA(ms,  0x71000000)
629 
630 /* STORE */
631 SLJIT_S390X_RXA(st,  0x50000000)
632 
633 /* STORE CHARACTER */
634 SLJIT_S390X_RXA(stc, 0x42000000)
635 
636 /* STORE HALFWORD */
637 SLJIT_S390X_RXA(sth, 0x40000000)
638 
639 #undef SLJIT_S390X_RXA
640 
641 /* RXY-a instructions */
642 #define SLJIT_S390X_RXYA(name, pattern, cond) \
643 SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \
644 { \
645 	SLJIT_ASSERT(cond); \
646 \
647 	return (pattern) | R36A(r) | R32A(x) | R28A(b) | disp_s20(d); \
648 }
649 
650 /* LOAD */
651 SLJIT_S390X_RXYA(ly,    0xe30000000058, have_ldisp())
652 SLJIT_S390X_RXYA(lg,    0xe30000000004, 1)
653 SLJIT_S390X_RXYA(lgf,   0xe30000000014, 1)
654 
655 /* LOAD BYTE */
656 SLJIT_S390X_RXYA(lb,    0xe30000000076, have_ldisp())
657 SLJIT_S390X_RXYA(lgb,   0xe30000000077, have_ldisp())
658 
659 /* LOAD HALFWORD */
660 SLJIT_S390X_RXYA(lhy,   0xe30000000078, have_ldisp())
661 SLJIT_S390X_RXYA(lgh,   0xe30000000015, 1)
662 
663 /* LOAD LOGICAL */
664 SLJIT_S390X_RXYA(llgf,  0xe30000000016, 1)
665 
666 /* LOAD LOGICAL CHARACTER */
667 SLJIT_S390X_RXYA(llc,   0xe30000000094, have_eimm())
668 SLJIT_S390X_RXYA(llgc,  0xe30000000090, 1)
669 
670 /* LOAD LOGICAL HALFWORD */
671 SLJIT_S390X_RXYA(llh,   0xe30000000095, have_eimm())
672 SLJIT_S390X_RXYA(llgh,  0xe30000000091, 1)
673 
674 /* MULTIPLY SINGLE */
675 SLJIT_S390X_RXYA(msy,   0xe30000000051, have_ldisp())
676 SLJIT_S390X_RXYA(msg,   0xe3000000000c, 1)
677 
678 /* STORE */
679 SLJIT_S390X_RXYA(sty,   0xe30000000050, have_ldisp())
680 SLJIT_S390X_RXYA(stg,   0xe30000000024, 1)
681 
682 /* STORE CHARACTER */
683 SLJIT_S390X_RXYA(stcy,  0xe30000000072, have_ldisp())
684 
685 /* STORE HALFWORD */
686 SLJIT_S390X_RXYA(sthy,  0xe30000000070, have_ldisp())
687 
688 #undef SLJIT_S390X_RXYA
689 
690 /* RSY-a instructions */
691 #define SLJIT_S390X_RSYA(name, pattern, cond) \
692 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_s32 d, sljit_gpr b) \
693 { \
694 	SLJIT_ASSERT(cond); \
695 \
696 	return (pattern) | R36A(dst) | R32A(src) | R28A(b) | disp_s20(d); \
697 }
698 
699 /* LOAD MULTIPLE */
700 SLJIT_S390X_RSYA(lmg,   0xeb0000000004, 1)
701 
702 /* SHIFT LEFT LOGICAL */
703 SLJIT_S390X_RSYA(sllg,  0xeb000000000d, 1)
704 
705 /* SHIFT RIGHT SINGLE */
706 SLJIT_S390X_RSYA(srag,  0xeb000000000a, 1)
707 
708 /* STORE MULTIPLE */
709 SLJIT_S390X_RSYA(stmg,  0xeb0000000024, 1)
710 
711 #undef SLJIT_S390X_RSYA
712 
713 /* RIE-f instructions (require general-instructions-extension facility) */
714 #define SLJIT_S390X_RIEF(name, pattern) \
715 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot) \
716 { \
717 	sljit_ins i3, i4, i5; \
718 \
719 	SLJIT_ASSERT(have_genext()); \
720 	i3 = (sljit_ins)start << 24; \
721 	i4 = (sljit_ins)end << 16; \
722 	i5 = (sljit_ins)rot << 8; \
723 \
724 	return (pattern) | R36A(dst & 0xf) | R32A(src & 0xf) | i3 | i4 | i5; \
725 }
726 
727 /* ROTATE THEN AND SELECTED BITS */
728 /* SLJIT_S390X_RIEF(rnsbg,  0xec0000000054) */
729 
730 /* ROTATE THEN EXCLUSIVE OR SELECTED BITS */
731 /* SLJIT_S390X_RIEF(rxsbg,  0xec0000000057) */
732 
733 /* ROTATE THEN OR SELECTED BITS */
734 SLJIT_S390X_RIEF(rosbg,  0xec0000000056)
735 
736 /* ROTATE THEN INSERT SELECTED BITS */
737 /* SLJIT_S390X_RIEF(risbg,  0xec0000000055) */
738 /* SLJIT_S390X_RIEF(risbgn, 0xec0000000059) */
739 
740 /* ROTATE THEN INSERT SELECTED BITS HIGH */
741 SLJIT_S390X_RIEF(risbhg, 0xec000000005d)
742 
743 /* ROTATE THEN INSERT SELECTED BITS LOW */
744 /* SLJIT_S390X_RIEF(risblg, 0xec0000000051) */
745 
746 #undef SLJIT_S390X_RIEF
747 
748 /* RRF-c instructions (require load/store-on-condition 1 facility) */
749 #define SLJIT_S390X_RRFC(name, pattern) \
750 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_uw mask) \
751 { \
752 	sljit_ins m3; \
753 \
754 	SLJIT_ASSERT(have_lscond1()); \
755 	m3 = (sljit_ins)(mask & 0xf) << 12; \
756 \
757 	return (pattern) | m3 | R4A(dst) | R0A(src); \
758 }
759 
760 /* LOAD HALFWORD IMMEDIATE ON CONDITION */
761 SLJIT_S390X_RRFC(locr,  0xb9f20000)
762 SLJIT_S390X_RRFC(locgr, 0xb9e20000)
763 
764 #undef SLJIT_S390X_RRFC
765 
766 /* RIE-g instructions (require load/store-on-condition 2 facility) */
767 #define SLJIT_S390X_RIEG(name, pattern) \
768 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw imm, sljit_uw mask) \
769 { \
770 	sljit_ins m3, i2; \
771 \
772 	SLJIT_ASSERT(have_lscond2()); \
773 	m3 = (sljit_ins)(mask & 0xf) << 32; \
774 	i2 = (sljit_ins)(imm & 0xffffL) << 16; \
775 \
776 	return (pattern) | R36A(reg) | m3 | i2; \
777 }
778 
779 /* LOAD HALFWORD IMMEDIATE ON CONDITION */
780 SLJIT_S390X_RIEG(lochi,  0xec0000000042)
781 SLJIT_S390X_RIEG(locghi, 0xec0000000046)
782 
783 #undef SLJIT_S390X_RIEG
784 
785 #define SLJIT_S390X_RILB(name, pattern, cond) \
786 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw ri) \
787 { \
788 	SLJIT_ASSERT(cond); \
789 \
790 	return (pattern) | R36A(reg) | (sljit_ins)(ri & 0xffffffff); \
791 }
792 
793 /* BRANCH RELATIVE AND SAVE LONG */
794 SLJIT_S390X_RILB(brasl, 0xc00500000000, 1)
795 
796 /* LOAD ADDRESS RELATIVE LONG */
797 SLJIT_S390X_RILB(larl,  0xc00000000000, 1)
798 
799 /* LOAD RELATIVE LONG */
800 SLJIT_S390X_RILB(lgrl,  0xc40800000000, have_genext())
801 
802 #undef SLJIT_S390X_RILB
803 
SLJIT_S390X_INSTRUCTION(br,sljit_gpr target)804 SLJIT_S390X_INSTRUCTION(br, sljit_gpr target)
805 {
806 	return 0x07f0 | target;
807 }
808 
SLJIT_S390X_INSTRUCTION(brc,sljit_uw mask,sljit_sw target)809 SLJIT_S390X_INSTRUCTION(brc, sljit_uw mask, sljit_sw target)
810 {
811 	sljit_ins m1 = (sljit_ins)(mask & 0xf) << 20;
812 	sljit_ins ri2 = (sljit_ins)target & 0xffff;
813 	return 0xa7040000L | m1 | ri2;
814 }
815 
SLJIT_S390X_INSTRUCTION(brcl,sljit_uw mask,sljit_sw target)816 SLJIT_S390X_INSTRUCTION(brcl, sljit_uw mask, sljit_sw target)
817 {
818 	sljit_ins m1 = (sljit_ins)(mask & 0xf) << 36;
819 	sljit_ins ri2 = (sljit_ins)target & 0xffffffff;
820 	return 0xc00400000000L | m1 | ri2;
821 }
822 
SLJIT_S390X_INSTRUCTION(flogr,sljit_gpr dst,sljit_gpr src)823 SLJIT_S390X_INSTRUCTION(flogr, sljit_gpr dst, sljit_gpr src)
824 {
825 	SLJIT_ASSERT(have_eimm());
826 	return 0xb9830000 | R8A(dst) | R0A(src);
827 }
828 
829 /* INSERT PROGRAM MASK */
SLJIT_S390X_INSTRUCTION(ipm,sljit_gpr dst)830 SLJIT_S390X_INSTRUCTION(ipm, sljit_gpr dst)
831 {
832 	return 0xb2220000 | R4A(dst);
833 }
834 
835 /* SET PROGRAM MASK */
SLJIT_S390X_INSTRUCTION(spm,sljit_gpr dst)836 SLJIT_S390X_INSTRUCTION(spm, sljit_gpr dst)
837 {
838 	return 0x0400 | R4A(dst);
839 }
840 
841 /* ROTATE THEN INSERT SELECTED BITS HIGH (ZERO) */
SLJIT_S390X_INSTRUCTION(risbhgz,sljit_gpr dst,sljit_gpr src,sljit_u8 start,sljit_u8 end,sljit_u8 rot)842 SLJIT_S390X_INSTRUCTION(risbhgz, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot)
843 {
844 	return risbhg(dst, src, start, 0x8 | end, rot);
845 }
846 
847 #undef SLJIT_S390X_INSTRUCTION
848 
update_zero_overflow(struct sljit_compiler * compiler,sljit_s32 op,sljit_gpr dst_r)849 static sljit_s32 update_zero_overflow(struct sljit_compiler *compiler, sljit_s32 op, sljit_gpr dst_r)
850 {
851 	/* Condition codes: bits 18 and 19.
852 	   Transformation:
853 	     0 (zero and no overflow) : unchanged
854 	     1 (non-zero and no overflow) : unchanged
855 	     2 (zero and overflow) : decreased by 1
856 	     3 (non-zero and overflow) : decreased by 1 if non-zero */
857 	FAIL_IF(push_inst(compiler, brc(0xc, 2 + 2 + ((op & SLJIT_32) ? 1 : 2) + 2 + 3 + 1)));
858 	FAIL_IF(push_inst(compiler, ipm(tmp1)));
859 	FAIL_IF(push_inst(compiler, (op & SLJIT_32) ? or(dst_r, dst_r) : ogr(dst_r, dst_r)));
860 	FAIL_IF(push_inst(compiler, brc(0x8, 2 + 3)));
861 	FAIL_IF(push_inst(compiler, slfi(tmp1, 0x10000000)));
862 	FAIL_IF(push_inst(compiler, spm(tmp1)));
863 	return SLJIT_SUCCESS;
864 }
865 
866 /* load 64-bit immediate into register without clobbering flags */
push_load_imm_inst(struct sljit_compiler * compiler,sljit_gpr target,sljit_sw v)867 static sljit_s32 push_load_imm_inst(struct sljit_compiler *compiler, sljit_gpr target, sljit_sw v)
868 {
869 	/* 4 byte instructions */
870 	if (is_s16(v))
871 		return push_inst(compiler, lghi(target, (sljit_s16)v));
872 
873 	if (((sljit_uw)v & ~(sljit_uw)0x000000000000ffff) == 0)
874 		return push_inst(compiler, llill(target, (sljit_u16)v));
875 
876 	if (((sljit_uw)v & ~(sljit_uw)0x00000000ffff0000) == 0)
877 		return push_inst(compiler, llilh(target, (sljit_u16)(v >> 16)));
878 
879 	if (((sljit_uw)v & ~(sljit_uw)0x0000ffff00000000) == 0)
880 		return push_inst(compiler, llihl(target, (sljit_u16)(v >> 32)));
881 
882 	if (((sljit_uw)v & ~(sljit_uw)0xffff000000000000) == 0)
883 		return push_inst(compiler, llihh(target, (sljit_u16)(v >> 48)));
884 
885 	/* 6 byte instructions (requires extended immediate facility) */
886 	if (have_eimm()) {
887 		if (is_s32(v))
888 			return push_inst(compiler, lgfi(target, (sljit_s32)v));
889 
890 		if (((sljit_uw)v >> 32) == 0)
891 			return push_inst(compiler, llilf(target, (sljit_u32)v));
892 
893 		if (((sljit_uw)v << 32) == 0)
894 			return push_inst(compiler, llihf(target, (sljit_u32)((sljit_uw)v >> 32)));
895 
896 		FAIL_IF(push_inst(compiler, llilf(target, (sljit_u32)v)));
897 		return push_inst(compiler, iihf(target, (sljit_u32)(v >> 32)));
898 	}
899 
900 	/* TODO(mundaym): instruction sequences that don't use extended immediates */
901 	abort();
902 }
903 
904 struct addr {
905 	sljit_gpr base;
906 	sljit_gpr index;
907 	sljit_s32 offset;
908 };
909 
910 /* transform memory operand into D(X,B) form with a signed 20-bit offset */
make_addr_bxy(struct sljit_compiler * compiler,struct addr * addr,sljit_s32 mem,sljit_sw off,sljit_gpr tmp)911 static sljit_s32 make_addr_bxy(struct sljit_compiler *compiler,
912 	struct addr *addr, sljit_s32 mem, sljit_sw off,
913 	sljit_gpr tmp /* clobbered, must not be r0 */)
914 {
915 	sljit_gpr base = r0;
916 	sljit_gpr index = r0;
917 
918 	SLJIT_ASSERT(tmp != r0);
919 	if (mem & REG_MASK)
920 		base = gpr(mem & REG_MASK);
921 
922 	if (mem & OFFS_REG_MASK) {
923 		index = gpr(OFFS_REG(mem));
924 		if (off != 0) {
925 			/* shift and put the result into tmp */
926 			SLJIT_ASSERT(0 <= off && off < 64);
927 			FAIL_IF(push_inst(compiler, sllg(tmp, index, (sljit_s32)off, 0)));
928 			index = tmp;
929 			off = 0; /* clear offset */
930 		}
931 	}
932 	else if (!is_s20(off)) {
933 		FAIL_IF(push_load_imm_inst(compiler, tmp, off));
934 		index = tmp;
935 		off = 0; /* clear offset */
936 	}
937 	addr->base = base;
938 	addr->index = index;
939 	addr->offset = (sljit_s32)off;
940 	return SLJIT_SUCCESS;
941 }
942 
943 /* transform memory operand into D(X,B) form with an unsigned 12-bit offset */
make_addr_bx(struct sljit_compiler * compiler,struct addr * addr,sljit_s32 mem,sljit_sw off,sljit_gpr tmp)944 static sljit_s32 make_addr_bx(struct sljit_compiler *compiler,
945 	struct addr *addr, sljit_s32 mem, sljit_sw off,
946 	sljit_gpr tmp /* clobbered, must not be r0 */)
947 {
948 	sljit_gpr base = r0;
949 	sljit_gpr index = r0;
950 
951 	SLJIT_ASSERT(tmp != r0);
952 	if (mem & REG_MASK)
953 		base = gpr(mem & REG_MASK);
954 
955 	if (mem & OFFS_REG_MASK) {
956 		index = gpr(OFFS_REG(mem));
957 		if (off != 0) {
958 			/* shift and put the result into tmp */
959 			SLJIT_ASSERT(0 <= off && off < 64);
960 			FAIL_IF(push_inst(compiler, sllg(tmp, index, (sljit_s32)off, 0)));
961 			index = tmp;
962 			off = 0; /* clear offset */
963 		}
964 	}
965 	else if (!is_u12(off)) {
966 		FAIL_IF(push_load_imm_inst(compiler, tmp, off));
967 		index = tmp;
968 		off = 0; /* clear offset */
969 	}
970 	addr->base = base;
971 	addr->index = index;
972 	addr->offset = (sljit_s32)off;
973 	return SLJIT_SUCCESS;
974 }
975 
976 #define EVAL(op, r, addr) op(r, addr.offset, addr.index, addr.base)
977 #define WHEN(cond, r, i1, i2, addr) \
978 	(cond) ? EVAL(i1, r, addr) : EVAL(i2, r, addr)
979 
980 /* May clobber tmp1. */
load_word(struct sljit_compiler * compiler,sljit_gpr dst,sljit_s32 src,sljit_sw srcw,sljit_s32 is_32bit)981 static sljit_s32 load_word(struct sljit_compiler *compiler, sljit_gpr dst,
982 		sljit_s32 src, sljit_sw srcw,
983 		sljit_s32 is_32bit)
984 {
985 	struct addr addr;
986 	sljit_ins ins;
987 
988 	SLJIT_ASSERT(src & SLJIT_MEM);
989 	if (have_ldisp() || !is_32bit)
990 		FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
991 	else
992 		FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));
993 
994 	if (is_32bit)
995 		ins = WHEN(is_u12(addr.offset), dst, l, ly, addr);
996 	else
997 		ins = lg(dst, addr.offset, addr.index, addr.base);
998 
999 	return push_inst(compiler, ins);
1000 }
1001 
1002 /* May clobber tmp1. */
store_word(struct sljit_compiler * compiler,sljit_gpr src,sljit_s32 dst,sljit_sw dstw,sljit_s32 is_32bit)1003 static sljit_s32 store_word(struct sljit_compiler *compiler, sljit_gpr src,
1004 		sljit_s32 dst, sljit_sw dstw,
1005 		sljit_s32 is_32bit)
1006 {
1007 	struct addr addr;
1008 	sljit_ins ins;
1009 
1010 	SLJIT_ASSERT(dst & SLJIT_MEM);
1011 	if (have_ldisp() || !is_32bit)
1012 		FAIL_IF(make_addr_bxy(compiler, &addr, dst, dstw, tmp1));
1013 	else
1014 		FAIL_IF(make_addr_bx(compiler, &addr, dst, dstw, tmp1));
1015 
1016 	if (is_32bit)
1017 		ins = WHEN(is_u12(addr.offset), src, st, sty, addr);
1018 	else
1019 		ins = stg(src, addr.offset, addr.index, addr.base);
1020 
1021 	return push_inst(compiler, ins);
1022 }
1023 
1024 #undef WHEN
1025 
emit_move(struct sljit_compiler * compiler,sljit_gpr dst_r,sljit_s32 src,sljit_sw srcw)1026 static sljit_s32 emit_move(struct sljit_compiler *compiler,
1027 	sljit_gpr dst_r,
1028 	sljit_s32 src, sljit_sw srcw)
1029 {
1030 	SLJIT_ASSERT(!IS_GPR_REG(src) || dst_r != gpr(src & REG_MASK));
1031 
1032 	if (src & SLJIT_IMM)
1033 		return push_load_imm_inst(compiler, dst_r, srcw);
1034 
1035 	if (src & SLJIT_MEM)
1036 		return load_word(compiler, dst_r, src, srcw, (compiler->mode & SLJIT_32) != 0);
1037 
1038 	sljit_gpr src_r = gpr(src & REG_MASK);
1039 	return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, src_r) : lgr(dst_r, src_r));
1040 }
1041 
emit_rr(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1042 static sljit_s32 emit_rr(struct sljit_compiler *compiler, sljit_ins ins,
1043 	sljit_s32 dst,
1044 	sljit_s32 src1, sljit_sw src1w,
1045 	sljit_s32 src2, sljit_sw src2w)
1046 {
1047 	sljit_gpr dst_r = tmp0;
1048 	sljit_gpr src_r = tmp1;
1049 	sljit_s32 needs_move = 1;
1050 
1051 	if (FAST_IS_REG(dst)) {
1052 		dst_r = gpr(dst);
1053 
1054 		if (dst == src1)
1055 			needs_move = 0;
1056 		else if (dst == src2) {
1057 			dst_r = tmp0;
1058 			needs_move = 2;
1059 		}
1060 	}
1061 
1062 	if (needs_move)
1063 		FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
1064 
1065 	if (FAST_IS_REG(src2))
1066 		src_r = gpr(src2);
1067 	else
1068 		FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
1069 
1070 	FAIL_IF(push_inst(compiler, ins | R4A(dst_r) | R0A(src_r)));
1071 
1072 	if (needs_move != 2)
1073 		return SLJIT_SUCCESS;
1074 
1075 	dst_r = gpr(dst & REG_MASK);
1076 	return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0));
1077 }
1078 
emit_rr1(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w)1079 static sljit_s32 emit_rr1(struct sljit_compiler *compiler, sljit_ins ins,
1080 	sljit_s32 dst,
1081 	sljit_s32 src1, sljit_sw src1w)
1082 {
1083 	sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
1084 	sljit_gpr src_r = tmp1;
1085 
1086 	if (FAST_IS_REG(src1))
1087 		src_r = gpr(src1);
1088 	else
1089 		FAIL_IF(emit_move(compiler, tmp1, src1, src1w));
1090 
1091 	return push_inst(compiler, ins | R4A(dst_r) | R0A(src_r));
1092 }
1093 
emit_rrf(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1094 static sljit_s32 emit_rrf(struct sljit_compiler *compiler, sljit_ins ins,
1095 	sljit_s32 dst,
1096 	sljit_s32 src1, sljit_sw src1w,
1097 	sljit_s32 src2, sljit_sw src2w)
1098 {
1099 	sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
1100 	sljit_gpr src1_r = tmp0;
1101 	sljit_gpr src2_r = tmp1;
1102 
1103 	if (FAST_IS_REG(src1))
1104 		src1_r = gpr(src1);
1105 	else
1106 		FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
1107 
1108 	if (FAST_IS_REG(src2))
1109 		src2_r = gpr(src2);
1110 	else
1111 		FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
1112 
1113 	return push_inst(compiler, ins | R4A(dst_r) | R0A(src1_r) | R12A(src2_r));
1114 }
1115 
1116 typedef enum {
1117 	RI_A,
1118 	RIL_A,
1119 } emit_ril_type;
1120 
emit_ri(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_sw src2w,emit_ril_type type)1121 static sljit_s32 emit_ri(struct sljit_compiler *compiler, sljit_ins ins,
1122 	sljit_s32 dst,
1123 	sljit_s32 src1, sljit_sw src1w,
1124 	sljit_sw src2w,
1125 	emit_ril_type type)
1126 {
1127 	sljit_gpr dst_r = tmp0;
1128 	sljit_s32 needs_move = 1;
1129 
1130 	if (FAST_IS_REG(dst)) {
1131 		dst_r = gpr(dst);
1132 
1133 		if (dst == src1)
1134 			needs_move = 0;
1135 	}
1136 
1137 	if (needs_move)
1138 		FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
1139 
1140 	if (type == RIL_A)
1141 		return push_inst(compiler, ins | R36A(dst_r) | (src2w & 0xffffffff));
1142 	return push_inst(compiler, ins | R20A(dst_r) | (src2w & 0xffff));
1143 }
1144 
emit_rie_d(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_sw src2w)1145 static sljit_s32 emit_rie_d(struct sljit_compiler *compiler, sljit_ins ins,
1146 	sljit_s32 dst,
1147 	sljit_s32 src1, sljit_sw src1w,
1148 	sljit_sw src2w)
1149 {
1150 	sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
1151 	sljit_gpr src_r = tmp0;
1152 
1153 	if (!FAST_IS_REG(src1))
1154 		FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
1155 	else
1156 		src_r = gpr(src1 & REG_MASK);
1157 
1158 	return push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | (sljit_ins)(src2w & 0xffff) << 16);
1159 }
1160 
1161 typedef enum {
1162 	RX_A,
1163 	RXY_A,
1164 } emit_rx_type;
1165 
emit_rx(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w,emit_rx_type type)1166 static sljit_s32 emit_rx(struct sljit_compiler *compiler, sljit_ins ins,
1167 	sljit_s32 dst,
1168 	sljit_s32 src1, sljit_sw src1w,
1169 	sljit_s32 src2, sljit_sw src2w,
1170 	emit_rx_type type)
1171 {
1172 	sljit_gpr dst_r = tmp0;
1173 	sljit_s32 needs_move = 1;
1174 	sljit_gpr base, index;
1175 
1176 	SLJIT_ASSERT(src2 & SLJIT_MEM);
1177 
1178 	if (FAST_IS_REG(dst)) {
1179 		dst_r = gpr(dst);
1180 
1181 		if (dst == src1)
1182 			needs_move = 0;
1183 		else if (dst == (src2 & REG_MASK) || (dst == OFFS_REG(src2))) {
1184 			dst_r = tmp0;
1185 			needs_move = 2;
1186 		}
1187 	}
1188 
1189 	if (needs_move)
1190 		FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
1191 
1192 	base = gpr(src2 & REG_MASK);
1193 	index = tmp0;
1194 
1195 	if (src2 & OFFS_REG_MASK) {
1196 		index = gpr(OFFS_REG(src2));
1197 
1198 		if (src2w != 0) {
1199 			FAIL_IF(push_inst(compiler, sllg(tmp1, index, src2w & 0x3, 0)));
1200 			src2w = 0;
1201 			index = tmp1;
1202 		}
1203 	} else if ((type == RX_A && !is_u12(src2w)) || (type == RXY_A && !is_s20(src2w))) {
1204 		FAIL_IF(push_load_imm_inst(compiler, tmp1, src2w));
1205 
1206 		if (src2 & REG_MASK)
1207 			index = tmp1;
1208 		else
1209 			base = tmp1;
1210 		src2w = 0;
1211 	}
1212 
1213 	if (type == RX_A)
1214 		ins |= R20A(dst_r) | R16A(index) | R12A(base) | (sljit_ins)src2w;
1215 	else
1216 		ins |= R36A(dst_r) | R32A(index) | R28A(base) | disp_s20((sljit_s32)src2w);
1217 
1218 	FAIL_IF(push_inst(compiler, ins));
1219 
1220 	if (needs_move != 2)
1221 		return SLJIT_SUCCESS;
1222 
1223 	dst_r = gpr(dst);
1224 	return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0));
1225 }
1226 
emit_siy(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_sw dstw,sljit_sw srcw)1227 static sljit_s32 emit_siy(struct sljit_compiler *compiler, sljit_ins ins,
1228 	sljit_s32 dst, sljit_sw dstw,
1229 	sljit_sw srcw)
1230 {
1231 	SLJIT_ASSERT(dst & SLJIT_MEM);
1232 
1233 	sljit_gpr dst_r = tmp1;
1234 
1235 	if (dst & OFFS_REG_MASK) {
1236 		sljit_gpr index = tmp1;
1237 
1238 		if ((dstw & 0x3) == 0)
1239 			index = gpr(OFFS_REG(dst));
1240 		else
1241 			FAIL_IF(push_inst(compiler, sllg(tmp1, index, dstw & 0x3, 0)));
1242 
1243 		FAIL_IF(push_inst(compiler, la(tmp1, 0, dst_r, index)));
1244 		dstw = 0;
1245 	}
1246 	else if (!is_s20(dstw)) {
1247 		FAIL_IF(push_load_imm_inst(compiler, tmp1, dstw));
1248 
1249 		if (dst & REG_MASK)
1250 			FAIL_IF(push_inst(compiler, la(tmp1, 0, dst_r, tmp1)));
1251 
1252 		dstw = 0;
1253 	}
1254 	else
1255 		dst_r = gpr(dst & REG_MASK);
1256 
1257 	return push_inst(compiler, ins | ((sljit_ins)(srcw & 0xff) << 32) | R28A(dst_r) | disp_s20((sljit_s32)dstw));
1258 }
1259 
1260 struct ins_forms {
1261 	sljit_ins op_r;
1262 	sljit_ins op_gr;
1263 	sljit_ins op_rk;
1264 	sljit_ins op_grk;
1265 	sljit_ins op;
1266 	sljit_ins op_y;
1267 	sljit_ins op_g;
1268 };
1269 
emit_commutative(struct sljit_compiler * compiler,const struct ins_forms * forms,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1270 static sljit_s32 emit_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms,
1271 	sljit_s32 dst,
1272 	sljit_s32 src1, sljit_sw src1w,
1273 	sljit_s32 src2, sljit_sw src2w)
1274 {
1275 	sljit_s32 mode = compiler->mode;
1276 	sljit_ins ins, ins_k;
1277 
1278 	if ((src1 | src2) & SLJIT_MEM) {
1279 		sljit_ins ins12, ins20;
1280 
1281 		if (mode & SLJIT_32) {
1282 			ins12 = forms->op;
1283 			ins20 = forms->op_y;
1284 		}
1285 		else {
1286 			ins12 = 0;
1287 			ins20 = forms->op_g;
1288 		}
1289 
1290 		if (ins12 && ins20) {
1291 			/* Extra instructions needed for address computation can be executed independently. */
1292 			if ((src2 & SLJIT_MEM) && (!(src1 & SLJIT_MEM)
1293 					|| ((src1 & OFFS_REG_MASK) ? (src1w & 0x3) == 0 : is_s20(src1w)))) {
1294 				if ((src2 & OFFS_REG_MASK) || is_u12(src2w) || !is_s20(src2w))
1295 					return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);
1296 
1297 				return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);
1298 			}
1299 
1300 			if (src1 & SLJIT_MEM) {
1301 				if ((src1 & OFFS_REG_MASK) || is_u12(src1w) || !is_s20(src1w))
1302 					return emit_rx(compiler, ins12, dst, src2, src2w, src1, src1w, RX_A);
1303 
1304 				return emit_rx(compiler, ins20, dst, src2, src2w, src1, src1w, RXY_A);
1305 			}
1306 		}
1307 		else if (ins12 || ins20) {
1308 			emit_rx_type rx_type;
1309 
1310 			if (ins12) {
1311 				rx_type = RX_A;
1312 				ins = ins12;
1313 			}
1314 			else {
1315 				rx_type = RXY_A;
1316 				ins = ins20;
1317 			}
1318 
1319 			if ((src2 & SLJIT_MEM) && (!(src1 & SLJIT_MEM)
1320 					|| ((src1 & OFFS_REG_MASK) ? (src1w & 0x3) == 0 : (rx_type == RX_A ? is_u12(src1w) : is_s20(src1w)))))
1321 				return emit_rx(compiler, ins, dst, src1, src1w, src2, src2w, rx_type);
1322 
1323 			if (src1 & SLJIT_MEM)
1324 				return emit_rx(compiler, ins, dst, src2, src2w, src1, src1w, rx_type);
1325 		}
1326 	}
1327 
1328 	if (mode & SLJIT_32) {
1329 		ins = forms->op_r;
1330 		ins_k = forms->op_rk;
1331 	}
1332 	else {
1333 		ins = forms->op_gr;
1334 		ins_k = forms->op_grk;
1335 	}
1336 
1337 	SLJIT_ASSERT(ins != 0 || ins_k != 0);
1338 
1339 	if (ins && FAST_IS_REG(dst)) {
1340 		if (dst == src1)
1341 			return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w);
1342 
1343 		if (dst == src2)
1344 			return emit_rr(compiler, ins, dst, src2, src2w, src1, src1w);
1345 	}
1346 
1347 	if (ins_k == 0)
1348 		return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w);
1349 
1350 	return emit_rrf(compiler, ins_k, dst, src1, src1w, src2, src2w);
1351 }
1352 
emit_non_commutative(struct sljit_compiler * compiler,const struct ins_forms * forms,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1353 static sljit_s32 emit_non_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms,
1354 	sljit_s32 dst,
1355 	sljit_s32 src1, sljit_sw src1w,
1356 	sljit_s32 src2, sljit_sw src2w)
1357 {
1358 	sljit_s32 mode = compiler->mode;
1359 	sljit_ins ins;
1360 
1361 	if (src2 & SLJIT_MEM) {
1362 		sljit_ins ins12, ins20;
1363 
1364 		if (mode & SLJIT_32) {
1365 			ins12 = forms->op;
1366 			ins20 = forms->op_y;
1367 		}
1368 		else {
1369 			ins12 = 0;
1370 			ins20 = forms->op_g;
1371 		}
1372 
1373 		if (ins12 && ins20) {
1374 			if ((src2 & OFFS_REG_MASK) || is_u12(src2w) || !is_s20(src2w))
1375 				return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);
1376 
1377 			return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);
1378 		}
1379 		else if (ins12)
1380 			return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);
1381 		else if (ins20)
1382 			return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);
1383 	}
1384 
1385 	ins = (mode & SLJIT_32) ? forms->op_rk : forms->op_grk;
1386 
1387 	if (ins == 0 || (FAST_IS_REG(dst) && dst == src1))
1388 		return emit_rr(compiler, (mode & SLJIT_32) ? forms->op_r : forms->op_gr, dst, src1, src1w, src2, src2w);
1389 
1390 	return emit_rrf(compiler, ins, dst, src1, src1w, src2, src2w);
1391 }
1392 
sljit_generate_code(struct sljit_compiler * compiler)1393 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
1394 {
1395 	struct sljit_label *label;
1396 	struct sljit_jump *jump;
1397 	struct sljit_s390x_const *const_;
1398 	struct sljit_put_label *put_label;
1399 	sljit_sw executable_offset;
1400 	sljit_uw ins_size = 0; /* instructions */
1401 	sljit_uw pool_size = 0; /* literal pool */
1402 	sljit_uw pad_size;
1403 	sljit_uw i, j = 0;
1404 	struct sljit_memory_fragment *buf;
1405 	void *code, *code_ptr;
1406 	sljit_uw *pool, *pool_ptr;
1407 	sljit_sw source, offset; /* TODO(carenas): only need 32 bit */
1408 
1409 	CHECK_ERROR_PTR();
1410 	CHECK_PTR(check_sljit_generate_code(compiler));
1411 	reverse_buf(compiler);
1412 
1413 	/* branch handling */
1414 	label = compiler->labels;
1415 	jump = compiler->jumps;
1416 	put_label = compiler->put_labels;
1417 
1418 	/* TODO(carenas): compiler->executable_size could be calculated
1419          *                before to avoid the following loop (except for
1420          *                pool_size)
1421          */
1422 	/* calculate the size of the code */
1423 	for (buf = compiler->buf; buf != NULL; buf = buf->next) {
1424 		sljit_uw len = buf->used_size / sizeof(sljit_ins);
1425 		sljit_ins *ibuf = (sljit_ins *)buf->memory;
1426 		for (i = 0; i < len; ++i, ++j) {
1427 			sljit_ins ins = ibuf[i];
1428 
1429 			/* TODO(carenas): instruction tag vs size/addr == j
1430 			 * using instruction tags for const is creative
1431 			 * but unlike all other architectures, and is not
1432 			 * done consistently for all other objects.
1433 			 * This might need reviewing later.
1434 			 */
1435 			if (ins & sljit_ins_const) {
1436 				pool_size += sizeof(*pool);
1437 				ins &= ~sljit_ins_const;
1438 			}
1439 			if (label && label->size == j) {
1440 				label->size = ins_size;
1441 				label = label->next;
1442 			}
1443 			if (jump && jump->addr == j) {
1444 				if ((jump->flags & SLJIT_REWRITABLE_JUMP) || (jump->flags & JUMP_ADDR)) {
1445 					/* encoded: */
1446 					/*   brasl %r14, <rel_addr> (or brcl <mask>, <rel_addr>) */
1447 					/* replace with: */
1448 					/*   lgrl %r1, <pool_addr> */
1449 					/*   bras %r14, %r1 (or bcr <mask>, %r1) */
1450 					pool_size += sizeof(*pool);
1451 					ins_size += 2;
1452 				}
1453 				jump = jump->next;
1454 			}
1455 			if (put_label && put_label->addr == j) {
1456 				pool_size += sizeof(*pool);
1457 				put_label = put_label->next;
1458 			}
1459 			ins_size += sizeof_ins(ins);
1460 		}
1461 	}
1462 
1463 	/* emit trailing label */
1464 	if (label && label->size == j) {
1465 		label->size = ins_size;
1466 		label = label->next;
1467 	}
1468 
1469 	SLJIT_ASSERT(!label);
1470 	SLJIT_ASSERT(!jump);
1471 	SLJIT_ASSERT(!put_label);
1472 
1473 	/* pad code size to 8 bytes so is accessible with half word offsets */
1474 	/* the literal pool needs to be doubleword aligned */
1475 	pad_size = ((ins_size + 7UL) & ~7UL) - ins_size;
1476 	SLJIT_ASSERT(pad_size < 8UL);
1477 
1478 	/* allocate target buffer */
1479 	code = SLJIT_MALLOC_EXEC(ins_size + pad_size + pool_size,
1480 					compiler->exec_allocator_data);
1481 	PTR_FAIL_WITH_EXEC_IF(code);
1482 	code_ptr = code;
1483 	executable_offset = SLJIT_EXEC_OFFSET(code);
1484 
1485 	/* TODO(carenas): pool is optional, and the ABI recommends it to
1486          *                be created before the function code, instead of
1487          *                globally; if generated code is too big could
1488          *                need offsets bigger than 32bit words and asser()
1489          */
1490 	pool = (sljit_uw *)((sljit_uw)code + ins_size + pad_size);
1491 	pool_ptr = pool;
1492 	const_ = (struct sljit_s390x_const *)compiler->consts;
1493 
1494 	/* update label addresses */
1495 	label = compiler->labels;
1496 	while (label) {
1497 		label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(
1498 			(sljit_uw)code_ptr + label->size, executable_offset);
1499 		label = label->next;
1500 	}
1501 
1502 	/* reset jumps */
1503 	jump = compiler->jumps;
1504 	put_label = compiler->put_labels;
1505 
1506 	/* emit the code */
1507 	j = 0;
1508 	for (buf = compiler->buf; buf != NULL; buf = buf->next) {
1509 		sljit_uw len = buf->used_size / sizeof(sljit_ins);
1510 		sljit_ins *ibuf = (sljit_ins *)buf->memory;
1511 		for (i = 0; i < len; ++i, ++j) {
1512 			sljit_ins ins = ibuf[i];
1513 			if (ins & sljit_ins_const) {
1514 				/* clear the const tag */
1515 				ins &= ~sljit_ins_const;
1516 
1517 				/* update instruction with relative address of constant */
1518 				source = (sljit_sw)code_ptr;
1519 				offset = (sljit_sw)pool_ptr - source;
1520 
1521 				SLJIT_ASSERT(!(offset & 1));
1522 				offset >>= 1; /* halfword (not byte) offset */
1523 				SLJIT_ASSERT(is_s32(offset));
1524 
1525 				ins |= (sljit_ins)offset & 0xffffffff;
1526 
1527 				/* update address */
1528 				const_->const_.addr = (sljit_uw)pool_ptr;
1529 
1530 				/* store initial value into pool and update pool address */
1531 				*(pool_ptr++) = (sljit_uw)const_->init_value;
1532 
1533 				/* move to next constant */
1534 				const_ = (struct sljit_s390x_const *)const_->const_.next;
1535 			}
1536 			if (jump && jump->addr == j) {
1537 				sljit_sw target = (sljit_sw)((jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target);
1538 				if ((jump->flags & SLJIT_REWRITABLE_JUMP) || (jump->flags & JUMP_ADDR)) {
1539 					jump->addr = (sljit_uw)pool_ptr;
1540 
1541 					/* load address into tmp1 */
1542 					source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1543 					offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source;
1544 
1545 					SLJIT_ASSERT(!(offset & 1));
1546 					offset >>= 1;
1547 					SLJIT_ASSERT(is_s32(offset));
1548 
1549 					encode_inst(&code_ptr, lgrl(tmp1, offset & 0xffffffff));
1550 
1551 					/* store jump target into pool and update pool address */
1552 					*(pool_ptr++) = (sljit_uw)target;
1553 
1554 					/* branch to tmp1 */
1555 					sljit_ins op = (ins >> 32) & 0xf;
1556 					sljit_ins arg = (ins >> 36) & 0xf;
1557 					switch (op) {
1558 					case 4: /* brcl -> bcr */
1559 						ins = bcr(arg, tmp1);
1560 						break;
1561 					case 5: /* brasl -> basr */
1562 						ins = basr(arg, tmp1);
1563 						break;
1564 					default:
1565 						abort();
1566 					}
1567 				}
1568 				else {
1569 					jump->addr = (sljit_uw)code_ptr + 2;
1570 					source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1571 					offset = target - source;
1572 
1573 					/* offset must be halfword aligned */
1574 					SLJIT_ASSERT(!(offset & 1));
1575 					offset >>= 1;
1576 					SLJIT_ASSERT(is_s32(offset)); /* TODO(mundaym): handle arbitrary offsets */
1577 
1578 					/* patch jump target */
1579 					ins |= (sljit_ins)offset & 0xffffffff;
1580 				}
1581 				jump = jump->next;
1582 			}
1583 			if (put_label && put_label->addr == j) {
1584 				source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1585 
1586 				SLJIT_ASSERT(put_label->label);
1587 				put_label->addr = (sljit_uw)code_ptr;
1588 
1589 				/* store target into pool */
1590 				*pool_ptr = put_label->label->addr;
1591 				offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source;
1592 				pool_ptr++;
1593 
1594 				SLJIT_ASSERT(!(offset & 1));
1595 				offset >>= 1;
1596 				SLJIT_ASSERT(is_s32(offset));
1597 				ins |= (sljit_ins)offset & 0xffffffff;
1598 
1599 				put_label = put_label->next;
1600 			}
1601 			encode_inst(&code_ptr, ins);
1602 		}
1603 	}
1604 	SLJIT_ASSERT((sljit_u8 *)code + ins_size == code_ptr);
1605 	SLJIT_ASSERT((sljit_u8 *)pool + pool_size == (sljit_u8 *)pool_ptr);
1606 
1607 	compiler->error = SLJIT_ERR_COMPILED;
1608 	compiler->executable_offset = executable_offset;
1609 	compiler->executable_size = ins_size;
1610 	code = SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
1611 	code_ptr = SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1612 	SLJIT_CACHE_FLUSH(code, code_ptr);
1613 	SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
1614 	return code;
1615 }
1616 
sljit_has_cpu_feature(sljit_s32 feature_type)1617 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
1618 {
1619 	/* TODO(mundaym): implement all */
1620 	switch (feature_type) {
1621 	case SLJIT_HAS_CLZ:
1622 		return have_eimm() ? 1 : 0; /* FLOGR instruction */
1623 	case SLJIT_HAS_CMOV:
1624 		return have_lscond1() ? 1 : 0;
1625 	case SLJIT_HAS_FPU:
1626 		return 1;
1627 	}
1628 	return 0;
1629 }
1630 
1631 /* --------------------------------------------------------------------- */
1632 /*  Entry, exit                                                          */
1633 /* --------------------------------------------------------------------- */
1634 
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1635 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
1636 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1637 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1638 {
1639 	sljit_s32 word_arg_count = 0;
1640 	sljit_s32 offset, i, tmp;
1641 
1642 	CHECK_ERROR();
1643 	CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1644 	set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1645 
1646 	/* Saved registers are stored in callee allocated save area. */
1647 	SLJIT_ASSERT(gpr(SLJIT_FIRST_SAVED_REG) == r6 && gpr(SLJIT_S0) == r13);
1648 
1649 	offset = 2 * SSIZE_OF(sw);
1650 	if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) {
1651 		FAIL_IF(push_inst(compiler, stmg(r6, r14, offset, r15))); /* save registers TODO(MGM): optimize */
1652 		offset += 9 * SSIZE_OF(sw);
1653 	} else {
1654 		if (scratches == SLJIT_FIRST_SAVED_REG) {
1655 			FAIL_IF(push_inst(compiler, stg(r6, offset, 0, r15)));
1656 			offset += SSIZE_OF(sw);
1657 		} else if (scratches > SLJIT_FIRST_SAVED_REG) {
1658 			FAIL_IF(push_inst(compiler, stmg(r6, r6 + (sljit_gpr)(scratches - SLJIT_FIRST_SAVED_REG), offset, r15)));
1659 			offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw);
1660 		}
1661 
1662 		if (saveds == 0) {
1663 			FAIL_IF(push_inst(compiler, stg(r14, offset, 0, r15)));
1664 			offset += SSIZE_OF(sw);
1665 		} else {
1666 			FAIL_IF(push_inst(compiler, stmg(r14 - (sljit_gpr)saveds, r14, offset, r15)));
1667 			offset += (saveds + 1) * SSIZE_OF(sw);
1668 		}
1669 	}
1670 
1671 	tmp = SLJIT_FS0 - fsaveds;
1672 	for (i = SLJIT_FS0; i > tmp; i--) {
1673 		FAIL_IF(push_inst(compiler, 0x60000000 /* std */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1674 		offset += SSIZE_OF(sw);
1675 	}
1676 
1677 	for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
1678 		FAIL_IF(push_inst(compiler, 0x60000000 /* std */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1679 		offset += SSIZE_OF(sw);
1680 	}
1681 
1682 	local_size = (local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + 0xf) & ~0xf;
1683 	compiler->local_size = local_size;
1684 
1685 	FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(-local_size)));
1686 
1687 	arg_types >>= SLJIT_ARG_SHIFT;
1688 	tmp = 0;
1689 	while (arg_types > 0) {
1690 		if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
1691 			if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
1692 				FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S0 - tmp), gpr(SLJIT_R0 + word_arg_count))));
1693 				tmp++;
1694 			}
1695 			word_arg_count++;
1696 		}
1697 
1698 		arg_types >>= SLJIT_ARG_SHIFT;
1699 	}
1700 
1701 	return SLJIT_SUCCESS;
1702 }
1703 
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1704 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
1705 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1706 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1707 {
1708 	CHECK_ERROR();
1709 	CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1710 	set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1711 
1712 	compiler->local_size = (local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + 0xf) & ~0xf;
1713 	return SLJIT_SUCCESS;
1714 }
1715 
emit_stack_frame_release(struct sljit_compiler * compiler)1716 static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler)
1717 {
1718 	sljit_s32 offset, i, tmp;
1719 	sljit_s32 local_size = compiler->local_size;
1720 	sljit_s32 saveds = compiler->saveds;
1721 	sljit_s32 scratches = compiler->scratches;
1722 
1723 	if (is_u12(local_size))
1724 		FAIL_IF(push_inst(compiler, 0x41000000 /* ly */ | R20A(r15) | R12A(r15) | (sljit_ins)local_size));
1725 	else
1726 		FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(local_size)));
1727 
1728 	offset = 2 * SSIZE_OF(sw);
1729 	if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) {
1730 		FAIL_IF(push_inst(compiler, lmg(r6, r14, offset, r15))); /* save registers TODO(MGM): optimize */
1731 		offset += 9 * SSIZE_OF(sw);
1732 	} else {
1733 		if (scratches == SLJIT_FIRST_SAVED_REG) {
1734 			FAIL_IF(push_inst(compiler, lg(r6, offset, 0, r15)));
1735 			offset += SSIZE_OF(sw);
1736 		} else if (scratches > SLJIT_FIRST_SAVED_REG) {
1737 			FAIL_IF(push_inst(compiler, lmg(r6, r6 + (sljit_gpr)(scratches - SLJIT_FIRST_SAVED_REG), offset, r15)));
1738 			offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw);
1739 		}
1740 
1741 		if (saveds == 0) {
1742 			FAIL_IF(push_inst(compiler, lg(r14, offset, 0, r15)));
1743 			offset += SSIZE_OF(sw);
1744 		} else {
1745 			FAIL_IF(push_inst(compiler, lmg(r14 - (sljit_gpr)saveds, r14, offset, r15)));
1746 			offset += (saveds + 1) * SSIZE_OF(sw);
1747 		}
1748 	}
1749 
1750 	tmp = SLJIT_FS0 - compiler->fsaveds;
1751 	for (i = SLJIT_FS0; i > tmp; i--) {
1752 		FAIL_IF(push_inst(compiler, 0x68000000 /* ld */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1753 		offset += SSIZE_OF(sw);
1754 	}
1755 
1756 	for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
1757 		FAIL_IF(push_inst(compiler, 0x68000000 /* ld */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1758 		offset += SSIZE_OF(sw);
1759 	}
1760 
1761 	return SLJIT_SUCCESS;
1762 }
1763 
sljit_emit_return_void(struct sljit_compiler * compiler)1764 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
1765 {
1766 	CHECK_ERROR();
1767 	CHECK(check_sljit_emit_return_void(compiler));
1768 
1769 	FAIL_IF(emit_stack_frame_release(compiler));
1770 	return push_inst(compiler, br(r14)); /* return */
1771 }
1772 
1773 /* --------------------------------------------------------------------- */
1774 /*  Operators                                                            */
1775 /* --------------------------------------------------------------------- */
1776 
sljit_emit_op0(struct sljit_compiler * compiler,sljit_s32 op)1777 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
1778 {
1779 	sljit_gpr arg0 = gpr(SLJIT_R0);
1780 	sljit_gpr arg1 = gpr(SLJIT_R1);
1781 
1782 	CHECK_ERROR();
1783 	CHECK(check_sljit_emit_op0(compiler, op));
1784 
1785 	op = GET_OPCODE(op) | (op & SLJIT_32);
1786 	switch (op) {
1787 	case SLJIT_BREAKPOINT:
1788 		/* The following invalid instruction is emitted by gdb. */
1789 		return push_inst(compiler, 0x0001 /* 2-byte trap */);
1790 	case SLJIT_NOP:
1791 		return push_inst(compiler, 0x0700 /* 2-byte nop */);
1792 	case SLJIT_LMUL_UW:
1793 		FAIL_IF(push_inst(compiler, mlgr(arg0, arg0)));
1794 		break;
1795 	case SLJIT_LMUL_SW:
1796 		/* signed multiplication from: */
1797 		/* Hacker's Delight, Second Edition: Chapter 8-3. */
1798 		FAIL_IF(push_inst(compiler, srag(tmp0, arg0, 63, 0)));
1799 		FAIL_IF(push_inst(compiler, srag(tmp1, arg1, 63, 0)));
1800 		FAIL_IF(push_inst(compiler, ngr(tmp0, arg1)));
1801 		FAIL_IF(push_inst(compiler, ngr(tmp1, arg0)));
1802 
1803 		/* unsigned multiplication */
1804 		FAIL_IF(push_inst(compiler, mlgr(arg0, arg0)));
1805 
1806 		FAIL_IF(push_inst(compiler, sgr(arg0, tmp0)));
1807 		FAIL_IF(push_inst(compiler, sgr(arg0, tmp1)));
1808 		break;
1809 	case SLJIT_DIV_U32:
1810 	case SLJIT_DIVMOD_U32:
1811 		FAIL_IF(push_inst(compiler, lhi(tmp0, 0)));
1812 		FAIL_IF(push_inst(compiler, lr(tmp1, arg0)));
1813 		FAIL_IF(push_inst(compiler, dlr(tmp0, arg1)));
1814 		FAIL_IF(push_inst(compiler, lr(arg0, tmp1))); /* quotient */
1815 		if (op == SLJIT_DIVMOD_U32)
1816 			return push_inst(compiler, lr(arg1, tmp0)); /* remainder */
1817 
1818 		return SLJIT_SUCCESS;
1819 	case SLJIT_DIV_S32:
1820 	case SLJIT_DIVMOD_S32:
1821 		FAIL_IF(push_inst(compiler, lhi(tmp0, 0)));
1822 		FAIL_IF(push_inst(compiler, lr(tmp1, arg0)));
1823 		FAIL_IF(push_inst(compiler, dr(tmp0, arg1)));
1824 		FAIL_IF(push_inst(compiler, lr(arg0, tmp1))); /* quotient */
1825 		if (op == SLJIT_DIVMOD_S32)
1826 			return push_inst(compiler, lr(arg1, tmp0)); /* remainder */
1827 
1828 		return SLJIT_SUCCESS;
1829 	case SLJIT_DIV_UW:
1830 	case SLJIT_DIVMOD_UW:
1831 		FAIL_IF(push_inst(compiler, lghi(tmp0, 0)));
1832 		FAIL_IF(push_inst(compiler, lgr(tmp1, arg0)));
1833 		FAIL_IF(push_inst(compiler, dlgr(tmp0, arg1)));
1834 		FAIL_IF(push_inst(compiler, lgr(arg0, tmp1))); /* quotient */
1835 		if (op == SLJIT_DIVMOD_UW)
1836 			return push_inst(compiler, lgr(arg1, tmp0)); /* remainder */
1837 
1838 		return SLJIT_SUCCESS;
1839 	case SLJIT_DIV_SW:
1840 	case SLJIT_DIVMOD_SW:
1841 		FAIL_IF(push_inst(compiler, lgr(tmp1, arg0)));
1842 		FAIL_IF(push_inst(compiler, dsgr(tmp0, arg1)));
1843 		FAIL_IF(push_inst(compiler, lgr(arg0, tmp1))); /* quotient */
1844 		if (op == SLJIT_DIVMOD_SW)
1845 			return push_inst(compiler, lgr(arg1, tmp0)); /* remainder */
1846 
1847 		return SLJIT_SUCCESS;
1848 	case SLJIT_ENDBR:
1849 		return SLJIT_SUCCESS;
1850 	case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
1851 		return SLJIT_SUCCESS;
1852 	default:
1853 		SLJIT_UNREACHABLE();
1854 	}
1855 	/* swap result registers */
1856 	FAIL_IF(push_inst(compiler, lgr(tmp0, arg0)));
1857 	FAIL_IF(push_inst(compiler, lgr(arg0, arg1)));
1858 	return push_inst(compiler, lgr(arg1, tmp0));
1859 }
1860 
1861 /* LEVAL will be defined later with different parameters as needed */
1862 #define WHEN2(cond, i1, i2) (cond) ? LEVAL(i1) : LEVAL(i2)
1863 
sljit_emit_op1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1864 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1865         sljit_s32 dst, sljit_sw dstw,
1866         sljit_s32 src, sljit_sw srcw)
1867 {
1868 	sljit_ins ins;
1869 	struct addr mem;
1870 	sljit_gpr dst_r;
1871 	sljit_gpr src_r;
1872 	sljit_s32 opcode = GET_OPCODE(op);
1873 
1874 	CHECK_ERROR();
1875 	CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1876 	ADJUST_LOCAL_OFFSET(dst, dstw);
1877 	ADJUST_LOCAL_OFFSET(src, srcw);
1878 
1879 	if (opcode >= SLJIT_MOV && opcode <= SLJIT_MOV_P) {
1880 		/* LOAD REGISTER */
1881 		if (FAST_IS_REG(dst) && FAST_IS_REG(src)) {
1882 			dst_r = gpr(dst);
1883 			src_r = gpr(src);
1884 			switch (opcode | (op & SLJIT_32)) {
1885 			/* 32-bit */
1886 			case SLJIT_MOV32_U8:
1887 				ins = llcr(dst_r, src_r);
1888 				break;
1889 			case SLJIT_MOV32_S8:
1890 				ins = lbr(dst_r, src_r);
1891 				break;
1892 			case SLJIT_MOV32_U16:
1893 				ins = llhr(dst_r, src_r);
1894 				break;
1895 			case SLJIT_MOV32_S16:
1896 				ins = lhr(dst_r, src_r);
1897 				break;
1898 			case SLJIT_MOV32:
1899 				if (dst_r == src_r)
1900 					return SLJIT_SUCCESS;
1901 				ins = lr(dst_r, src_r);
1902 				break;
1903 			/* 64-bit */
1904 			case SLJIT_MOV_U8:
1905 				ins = llgcr(dst_r, src_r);
1906 				break;
1907 			case SLJIT_MOV_S8:
1908 				ins = lgbr(dst_r, src_r);
1909 				break;
1910 			case SLJIT_MOV_U16:
1911 				ins = llghr(dst_r, src_r);
1912 				break;
1913 			case SLJIT_MOV_S16:
1914 				ins = lghr(dst_r, src_r);
1915 				break;
1916 			case SLJIT_MOV_U32:
1917 				ins = llgfr(dst_r, src_r);
1918 				break;
1919 			case SLJIT_MOV_S32:
1920 				ins = lgfr(dst_r, src_r);
1921 				break;
1922 			case SLJIT_MOV:
1923 			case SLJIT_MOV_P:
1924 				if (dst_r == src_r)
1925 					return SLJIT_SUCCESS;
1926 				ins = lgr(dst_r, src_r);
1927 				break;
1928 			default:
1929 				ins = 0;
1930 				SLJIT_UNREACHABLE();
1931 				break;
1932 			}
1933 			FAIL_IF(push_inst(compiler, ins));
1934 			return SLJIT_SUCCESS;
1935 		}
1936 		/* LOAD IMMEDIATE */
1937 		if (FAST_IS_REG(dst) && (src & SLJIT_IMM)) {
1938 			switch (opcode) {
1939 			case SLJIT_MOV_U8:
1940 				srcw = (sljit_sw)((sljit_u8)(srcw));
1941 				break;
1942 			case SLJIT_MOV_S8:
1943 				srcw = (sljit_sw)((sljit_s8)(srcw));
1944 				break;
1945 			case SLJIT_MOV_U16:
1946 				srcw = (sljit_sw)((sljit_u16)(srcw));
1947 				break;
1948 			case SLJIT_MOV_S16:
1949 				srcw = (sljit_sw)((sljit_s16)(srcw));
1950 				break;
1951 			case SLJIT_MOV_U32:
1952 				srcw = (sljit_sw)((sljit_u32)(srcw));
1953 				break;
1954 			case SLJIT_MOV_S32:
1955 			case SLJIT_MOV32:
1956 				srcw = (sljit_sw)((sljit_s32)(srcw));
1957 				break;
1958 			}
1959 			return push_load_imm_inst(compiler, gpr(dst), srcw);
1960 		}
1961 		/* LOAD */
1962 		/* TODO(carenas): avoid reg being defined later */
1963 		#define LEVAL(i) EVAL(i, reg, mem)
1964 		if (FAST_IS_REG(dst) && (src & SLJIT_MEM)) {
1965 			sljit_gpr reg = gpr(dst);
1966 
1967 			FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1));
1968 			/* TODO(carenas): convert all calls below to LEVAL */
1969 			switch (opcode | (op & SLJIT_32)) {
1970 			case SLJIT_MOV32_U8:
1971 				ins = llc(reg, mem.offset, mem.index, mem.base);
1972 				break;
1973 			case SLJIT_MOV32_S8:
1974 				ins = lb(reg, mem.offset, mem.index, mem.base);
1975 				break;
1976 			case SLJIT_MOV32_U16:
1977 				ins = llh(reg, mem.offset, mem.index, mem.base);
1978 				break;
1979 			case SLJIT_MOV32_S16:
1980 				ins = WHEN2(is_u12(mem.offset), lh, lhy);
1981 				break;
1982 			case SLJIT_MOV32:
1983 				ins = WHEN2(is_u12(mem.offset), l, ly);
1984 				break;
1985 			case SLJIT_MOV_U8:
1986 				ins = LEVAL(llgc);
1987 				break;
1988 			case SLJIT_MOV_S8:
1989 				ins = lgb(reg, mem.offset, mem.index, mem.base);
1990 				break;
1991 			case SLJIT_MOV_U16:
1992 				ins = LEVAL(llgh);
1993 				break;
1994 			case SLJIT_MOV_S16:
1995 				ins = lgh(reg, mem.offset, mem.index, mem.base);
1996 				break;
1997 			case SLJIT_MOV_U32:
1998 				ins = LEVAL(llgf);
1999 				break;
2000 			case SLJIT_MOV_S32:
2001 				ins = lgf(reg, mem.offset, mem.index, mem.base);
2002 				break;
2003 			case SLJIT_MOV_P:
2004 			case SLJIT_MOV:
2005 				ins = lg(reg, mem.offset, mem.index, mem.base);
2006 				break;
2007 			default:
2008 				ins = 0;
2009 				SLJIT_UNREACHABLE();
2010 				break;
2011 			}
2012 			FAIL_IF(push_inst(compiler, ins));
2013 			return SLJIT_SUCCESS;
2014 		}
2015 		/* STORE and STORE IMMEDIATE */
2016 		if ((dst & SLJIT_MEM)
2017 			&& (FAST_IS_REG(src) || (src & SLJIT_IMM))) {
2018 			sljit_gpr reg = FAST_IS_REG(src) ? gpr(src) : tmp0;
2019 			if (src & SLJIT_IMM) {
2020 				/* TODO(mundaym): MOVE IMMEDIATE? */
2021 				FAIL_IF(push_load_imm_inst(compiler, reg, srcw));
2022 			}
2023 			struct addr mem;
2024 			FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2025 			switch (opcode) {
2026 			case SLJIT_MOV_U8:
2027 			case SLJIT_MOV_S8:
2028 				return push_inst(compiler,
2029 					WHEN2(is_u12(mem.offset), stc, stcy));
2030 			case SLJIT_MOV_U16:
2031 			case SLJIT_MOV_S16:
2032 				return push_inst(compiler,
2033 					WHEN2(is_u12(mem.offset), sth, sthy));
2034 			case SLJIT_MOV_U32:
2035 			case SLJIT_MOV_S32:
2036 			case SLJIT_MOV32:
2037 				return push_inst(compiler,
2038 					WHEN2(is_u12(mem.offset), st, sty));
2039 			case SLJIT_MOV_P:
2040 			case SLJIT_MOV:
2041 				FAIL_IF(push_inst(compiler, LEVAL(stg)));
2042 				return SLJIT_SUCCESS;
2043 			default:
2044 				SLJIT_UNREACHABLE();
2045 			}
2046 		}
2047 		#undef LEVAL
2048 		/* MOVE CHARACTERS */
2049 		if ((dst & SLJIT_MEM) && (src & SLJIT_MEM)) {
2050 			struct addr mem;
2051 			FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1));
2052 			switch (opcode) {
2053 			case SLJIT_MOV_U8:
2054 			case SLJIT_MOV_S8:
2055 				FAIL_IF(push_inst(compiler,
2056 					EVAL(llgc, tmp0, mem)));
2057 				FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2058 				return push_inst(compiler,
2059 					EVAL(stcy, tmp0, mem));
2060 			case SLJIT_MOV_U16:
2061 			case SLJIT_MOV_S16:
2062 				FAIL_IF(push_inst(compiler,
2063 					EVAL(llgh, tmp0, mem)));
2064 				FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2065 				return push_inst(compiler,
2066 					EVAL(sthy, tmp0, mem));
2067 			case SLJIT_MOV_U32:
2068 			case SLJIT_MOV_S32:
2069 			case SLJIT_MOV32:
2070 				FAIL_IF(push_inst(compiler,
2071 					EVAL(ly, tmp0, mem)));
2072 				FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2073 				return push_inst(compiler,
2074 					EVAL(sty, tmp0, mem));
2075 			case SLJIT_MOV_P:
2076 			case SLJIT_MOV:
2077 				FAIL_IF(push_inst(compiler,
2078 					EVAL(lg, tmp0, mem)));
2079 				FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2080 				FAIL_IF(push_inst(compiler,
2081 					EVAL(stg, tmp0, mem)));
2082 				return SLJIT_SUCCESS;
2083 			default:
2084 				SLJIT_UNREACHABLE();
2085 			}
2086 		}
2087 		SLJIT_UNREACHABLE();
2088 	}
2089 
2090 	SLJIT_ASSERT((src & SLJIT_IMM) == 0); /* no immediates */
2091 
2092 	dst_r = FAST_IS_REG(dst) ? gpr(REG_MASK & dst) : tmp0;
2093 	src_r = FAST_IS_REG(src) ? gpr(REG_MASK & src) : tmp0;
2094 	if (src & SLJIT_MEM)
2095 		FAIL_IF(load_word(compiler, src_r, src, srcw, src & SLJIT_32));
2096 
2097 	compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z);
2098 
2099 	/* TODO(mundaym): optimize loads and stores */
2100 	switch (opcode | (op & SLJIT_32)) {
2101 	case SLJIT_NOT:
2102 		/* emulate ~x with x^-1 */
2103 		FAIL_IF(push_load_imm_inst(compiler, tmp1, -1));
2104 		if (src_r != dst_r)
2105 			FAIL_IF(push_inst(compiler, lgr(dst_r, src_r)));
2106 
2107 		FAIL_IF(push_inst(compiler, xgr(dst_r, tmp1)));
2108 		break;
2109 	case SLJIT_NOT32:
2110 		/* emulate ~x with x^-1 */
2111 		if (have_eimm())
2112 			FAIL_IF(push_inst(compiler, xilf(dst_r, 0xffffffff)));
2113 		else {
2114 			FAIL_IF(push_load_imm_inst(compiler, tmp1, -1));
2115 			if (src_r != dst_r)
2116 				FAIL_IF(push_inst(compiler, lr(dst_r, src_r)));
2117 
2118 			FAIL_IF(push_inst(compiler, xr(dst_r, tmp1)));
2119 		}
2120 		break;
2121 	case SLJIT_CLZ:
2122 		if (have_eimm()) {
2123 			FAIL_IF(push_inst(compiler, flogr(tmp0, src_r))); /* clobbers tmp1 */
2124 			if (dst_r != tmp0)
2125 				FAIL_IF(push_inst(compiler, lgr(dst_r, tmp0)));
2126 		} else {
2127 			abort(); /* TODO(mundaym): no eimm (?) */
2128 		}
2129 		break;
2130 	case SLJIT_CLZ32:
2131 		if (have_eimm()) {
2132 			FAIL_IF(push_inst(compiler, sllg(tmp1, src_r, 32, 0)));
2133 			FAIL_IF(push_inst(compiler, iilf(tmp1, 0xffffffff)));
2134 			FAIL_IF(push_inst(compiler, flogr(tmp0, tmp1))); /* clobbers tmp1 */
2135 			if (dst_r != tmp0)
2136 				FAIL_IF(push_inst(compiler, lr(dst_r, tmp0)));
2137 		} else {
2138 			abort(); /* TODO(mundaym): no eimm (?) */
2139 		}
2140 		break;
2141 	default:
2142 		SLJIT_UNREACHABLE();
2143 	}
2144 
2145 	if ((op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW))
2146 		FAIL_IF(update_zero_overflow(compiler, op, dst_r));
2147 
2148 	/* TODO(carenas): doesn't need FAIL_IF */
2149 	if (dst & SLJIT_MEM)
2150 		FAIL_IF(store_word(compiler, dst_r, dst, dstw, op & SLJIT_32));
2151 
2152 	return SLJIT_SUCCESS;
2153 }
2154 
is_commutative(sljit_s32 op)2155 static SLJIT_INLINE int is_commutative(sljit_s32 op)
2156 {
2157 	switch (GET_OPCODE(op)) {
2158 	case SLJIT_ADD:
2159 	case SLJIT_ADDC:
2160 	case SLJIT_MUL:
2161 	case SLJIT_AND:
2162 	case SLJIT_OR:
2163 	case SLJIT_XOR:
2164 		return 1;
2165 	}
2166 	return 0;
2167 }
2168 
is_shift(sljit_s32 op)2169 static SLJIT_INLINE int is_shift(sljit_s32 op) {
2170 	sljit_s32 v = GET_OPCODE(op);
2171 	return (v == SLJIT_SHL || v == SLJIT_ASHR || v == SLJIT_LSHR) ? 1 : 0;
2172 }
2173 
2174 static const struct ins_forms add_forms = {
2175 	0x1a00, /* ar */
2176 	0xb9080000, /* agr */
2177 	0xb9f80000, /* ark */
2178 	0xb9e80000, /* agrk */
2179 	0x5a000000, /* a */
2180 	0xe3000000005a, /* ay */
2181 	0xe30000000008, /* ag */
2182 };
2183 
2184 static const struct ins_forms logical_add_forms = {
2185 	0x1e00, /* alr */
2186 	0xb90a0000, /* algr */
2187 	0xb9fa0000, /* alrk */
2188 	0xb9ea0000, /* algrk */
2189 	0x5e000000, /* al */
2190 	0xe3000000005e, /* aly */
2191 	0xe3000000000a, /* alg */
2192 };
2193 
sljit_emit_add(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2194 static sljit_s32 sljit_emit_add(struct sljit_compiler *compiler, sljit_s32 op,
2195 	sljit_s32 dst, sljit_sw dstw,
2196 	sljit_s32 src1, sljit_sw src1w,
2197 	sljit_s32 src2, sljit_sw src2w)
2198 {
2199 	int sets_overflow = (op & VARIABLE_FLAG_MASK) == SLJIT_SET_OVERFLOW;
2200 	int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW);
2201 	const struct ins_forms *forms;
2202 	sljit_ins ins;
2203 
2204 	if (src2 & SLJIT_IMM) {
2205 		if (!sets_zero_overflow && is_s8(src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) {
2206 			if (sets_overflow)
2207 				ins = (op & SLJIT_32) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */;
2208 			else
2209 				ins = (op & SLJIT_32) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */;
2210 			return emit_siy(compiler, ins, dst, dstw, src2w);
2211 		}
2212 
2213 		if (is_s16(src2w)) {
2214 			if (sets_overflow)
2215 				ins = (op & SLJIT_32) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */;
2216 			else
2217 				ins = (op & SLJIT_32) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */;
2218 			FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, src2w));
2219 			goto done;
2220 		}
2221 
2222 		if (!sets_overflow) {
2223 			if ((op & SLJIT_32) || is_u32(src2w)) {
2224 				ins = (op & SLJIT_32) ? 0xc20b00000000 /* alfi */ : 0xc20a00000000 /* algfi */;
2225 				FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));
2226 				goto done;
2227 			}
2228 			if (is_u32(-src2w)) {
2229 				FAIL_IF(emit_ri(compiler, 0xc20400000000 /* slgfi */, dst, src1, src1w, -src2w, RIL_A));
2230 				goto done;
2231 			}
2232 		}
2233 		else if ((op & SLJIT_32) || is_s32(src2w)) {
2234 			ins = (op & SLJIT_32) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */;
2235 			FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));
2236 			goto done;
2237 		}
2238 	}
2239 
2240 	forms = sets_overflow ? &add_forms : &logical_add_forms;
2241 	FAIL_IF(emit_commutative(compiler, forms, dst, src1, src1w, src2, src2w));
2242 
2243 done:
2244 	if (sets_zero_overflow)
2245 		FAIL_IF(update_zero_overflow(compiler, op, FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0));
2246 
2247 	if (dst & SLJIT_MEM)
2248 		return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2249 
2250 	return SLJIT_SUCCESS;
2251 }
2252 
2253 static const struct ins_forms sub_forms = {
2254 	0x1b00, /* sr */
2255 	0xb9090000, /* sgr */
2256 	0xb9f90000, /* srk */
2257 	0xb9e90000, /* sgrk */
2258 	0x5b000000, /* s */
2259 	0xe3000000005b, /* sy */
2260 	0xe30000000009, /* sg */
2261 };
2262 
2263 static const struct ins_forms logical_sub_forms = {
2264 	0x1f00, /* slr */
2265 	0xb90b0000, /* slgr */
2266 	0xb9fb0000, /* slrk */
2267 	0xb9eb0000, /* slgrk */
2268 	0x5f000000, /* sl */
2269 	0xe3000000005f, /* sly */
2270 	0xe3000000000b, /* slg */
2271 };
2272 
sljit_emit_sub(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2273 static sljit_s32 sljit_emit_sub(struct sljit_compiler *compiler, sljit_s32 op,
2274 	sljit_s32 dst, sljit_sw dstw,
2275 	sljit_s32 src1, sljit_sw src1w,
2276 	sljit_s32 src2, sljit_sw src2w)
2277 {
2278 	sljit_s32 flag_type = GET_FLAG_TYPE(op);
2279 	int sets_signed = (flag_type >= SLJIT_SIG_LESS && flag_type <= SLJIT_NOT_OVERFLOW);
2280 	int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW);
2281 	const struct ins_forms *forms;
2282 	sljit_ins ins;
2283 
2284 	if (dst == (sljit_s32)tmp0 && flag_type <= SLJIT_SIG_LESS_EQUAL) {
2285 		int compare_signed = flag_type >= SLJIT_SIG_LESS;
2286 
2287 		compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_COMPARE;
2288 
2289 		if (src2 & SLJIT_IMM) {
2290 			if (compare_signed || ((op & VARIABLE_FLAG_MASK) == 0 && is_s32(src2w)))
2291 			{
2292 				if ((op & SLJIT_32) || is_s32(src2w)) {
2293 					ins = (op & SLJIT_32) ? 0xc20d00000000 /* cfi */ : 0xc20c00000000 /* cgfi */;
2294 					return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A);
2295 				}
2296 			}
2297 			else {
2298 				if ((op & SLJIT_32) || is_u32(src2w)) {
2299 					ins = (op & SLJIT_32) ? 0xc20f00000000 /* clfi */ : 0xc20e00000000 /* clgfi */;
2300 					return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A);
2301 				}
2302 				if (is_s16(src2w))
2303 					return emit_rie_d(compiler, 0xec00000000db /* alghsik */, (sljit_s32)tmp0, src1, src1w, src2w);
2304 			}
2305 		}
2306 		else if (src2 & SLJIT_MEM) {
2307 			if ((op & SLJIT_32) && ((src2 & OFFS_REG_MASK) || is_u12(src2w))) {
2308 				ins = compare_signed ? 0x59000000 /* c */ : 0x55000000 /* cl */;
2309 				return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RX_A);
2310 			}
2311 
2312 			if (compare_signed)
2313 				ins = (op & SLJIT_32) ? 0xe30000000059 /* cy */ : 0xe30000000020 /* cg */;
2314 			else
2315 				ins = (op & SLJIT_32) ? 0xe30000000055 /* cly */ : 0xe30000000021 /* clg */;
2316 			return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RXY_A);
2317 		}
2318 
2319 		if (compare_signed)
2320 			ins = (op & SLJIT_32) ? 0x1900 /* cr */ : 0xb9200000 /* cgr */;
2321 		else
2322 			ins = (op & SLJIT_32) ? 0x1500 /* clr */ : 0xb9210000 /* clgr */;
2323 		return emit_rr(compiler, ins, src1, src1, src1w, src2, src2w);
2324 	}
2325 
2326 	if (src1 == SLJIT_IMM && src1w == 0 && (flag_type == 0 || sets_signed)) {
2327 		ins = (op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */;
2328 		FAIL_IF(emit_rr1(compiler, ins, dst, src2, src2w));
2329 		goto done;
2330 	}
2331 
2332 	if (src2 & SLJIT_IMM) {
2333 		sljit_sw neg_src2w = -src2w;
2334 
2335 		if (sets_signed || neg_src2w != 0 || (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == 0) {
2336 			if (!sets_zero_overflow && is_s8(neg_src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) {
2337 				if (sets_signed)
2338 					ins = (op & SLJIT_32) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */;
2339 				else
2340 					ins = (op & SLJIT_32) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */;
2341 				return emit_siy(compiler, ins, dst, dstw, neg_src2w);
2342 			}
2343 
2344 			if (is_s16(neg_src2w)) {
2345 				if (sets_signed)
2346 					ins = (op & SLJIT_32) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */;
2347 				else
2348 					ins = (op & SLJIT_32) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */;
2349 				FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, neg_src2w));
2350 				goto done;
2351 			}
2352 		}
2353 
2354 		if (!sets_signed) {
2355 			if ((op & SLJIT_32) || is_u32(src2w)) {
2356 				ins = (op & SLJIT_32) ? 0xc20500000000 /* slfi */ : 0xc20400000000 /* slgfi */;
2357 				FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));
2358 				goto done;
2359 			}
2360 			if (is_u32(neg_src2w)) {
2361 				FAIL_IF(emit_ri(compiler, 0xc20a00000000 /* algfi */, dst, src1, src1w, neg_src2w, RIL_A));
2362 				goto done;
2363 			}
2364 		}
2365 		else if ((op & SLJIT_32) || is_s32(neg_src2w)) {
2366 			ins = (op & SLJIT_32) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */;
2367 			FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, neg_src2w, RIL_A));
2368 			goto done;
2369 		}
2370 	}
2371 
2372 	forms = sets_signed ? &sub_forms : &logical_sub_forms;
2373 	FAIL_IF(emit_non_commutative(compiler, forms, dst, src1, src1w, src2, src2w));
2374 
2375 done:
2376 	if (sets_signed) {
2377 		sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2378 
2379 		if ((op & VARIABLE_FLAG_MASK) != SLJIT_SET_OVERFLOW) {
2380 			/* In case of overflow, the sign bit of the two source operands must be different, and
2381 			     - the first operand is greater if the sign bit of the result is set
2382 			     - the first operand is less if the sign bit of the result is not set
2383 			   The -result operation sets the corrent sign, because the result cannot be zero.
2384 			   The overflow is considered greater, since the result must be equal to INT_MIN so its sign bit is set. */
2385 			FAIL_IF(push_inst(compiler, brc(0xe, 2 + 2)));
2386 			FAIL_IF(push_inst(compiler, (op & SLJIT_32) ? lcr(tmp1, dst_r) : lcgr(tmp1, dst_r)));
2387 		}
2388 		else if (op & SLJIT_SET_Z)
2389 			FAIL_IF(update_zero_overflow(compiler, op, dst_r));
2390 	}
2391 
2392 	if (dst & SLJIT_MEM)
2393 		return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2394 
2395 	return SLJIT_SUCCESS;
2396 }
2397 
2398 static const struct ins_forms multiply_forms = {
2399 	0xb2520000, /* msr */
2400 	0xb90c0000, /* msgr */
2401 	0xb9fd0000, /* msrkc */
2402 	0xb9ed0000, /* msgrkc */
2403 	0x71000000, /* ms */
2404 	0xe30000000051, /* msy */
2405 	0xe3000000000c, /* msg */
2406 };
2407 
2408 static const struct ins_forms multiply_overflow_forms = {
2409 	0,
2410 	0,
2411 	0xb9fd0000, /* msrkc */
2412 	0xb9ed0000, /* msgrkc */
2413 	0,
2414 	0xe30000000053, /* msc */
2415 	0xe30000000083, /* msgc */
2416 };
2417 
sljit_emit_multiply(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2418 static sljit_s32 sljit_emit_multiply(struct sljit_compiler *compiler, sljit_s32 op,
2419 	sljit_s32 dst,
2420 	sljit_s32 src1, sljit_sw src1w,
2421 	sljit_s32 src2, sljit_sw src2w)
2422 {
2423 	sljit_ins ins;
2424 
2425 	if (HAS_FLAGS(op)) {
2426 		/* if have_misc2 fails, this operation should be emulated. 32 bit emulation:
2427 		FAIL_IF(push_inst(compiler, lgfr(tmp0, src1_r)));
2428 		FAIL_IF(push_inst(compiler, msgfr(tmp0, src2_r)));
2429 		if (dst_r != tmp0) {
2430 			FAIL_IF(push_inst(compiler, lr(dst_r, tmp0)));
2431 		}
2432 		FAIL_IF(push_inst(compiler, aih(tmp0, 1)));
2433 		FAIL_IF(push_inst(compiler, nihf(tmp0, ~1U)));
2434 		FAIL_IF(push_inst(compiler, ipm(tmp1)));
2435 		FAIL_IF(push_inst(compiler, oilh(tmp1, 0x2000))); */
2436 
2437 		return emit_commutative(compiler, &multiply_overflow_forms, dst, src1, src1w, src2, src2w);
2438 	}
2439 
2440 	if (src2 & SLJIT_IMM) {
2441 		if (is_s16(src2w)) {
2442 			ins = (op & SLJIT_32) ? 0xa70c0000 /* mhi */ : 0xa70d0000 /* mghi */;
2443 			return emit_ri(compiler, ins, dst, src1, src1w, src2w, RI_A);
2444 		}
2445 
2446 		if (is_s32(src2w)) {
2447 			ins = (op & SLJIT_32) ? 0xc20100000000 /* msfi */ : 0xc20000000000 /* msgfi */;
2448 			return emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A);
2449 		}
2450 	}
2451 
2452 	return emit_commutative(compiler, &multiply_forms, dst, src1, src1w, src2, src2w);
2453 }
2454 
sljit_emit_bitwise_imm(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_uw imm,sljit_s32 count16)2455 static sljit_s32 sljit_emit_bitwise_imm(struct sljit_compiler *compiler, sljit_s32 type,
2456 	sljit_s32 dst,
2457 	sljit_s32 src1, sljit_sw src1w,
2458 	sljit_uw imm, sljit_s32 count16)
2459 {
2460 	sljit_s32 mode = compiler->mode;
2461 	sljit_gpr dst_r = tmp0;
2462 	sljit_s32 needs_move = 1;
2463 
2464 	if (IS_GPR_REG(dst)) {
2465 		dst_r = gpr(dst & REG_MASK);
2466 		if (dst == src1)
2467 			needs_move = 0;
2468 	}
2469 
2470 	if (needs_move)
2471 		FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
2472 
2473 	if (type == SLJIT_AND) {
2474 		if (!(mode & SLJIT_32))
2475 			FAIL_IF(push_inst(compiler, 0xc00a00000000 /* nihf */ | R36A(dst_r) | (imm >> 32)));
2476 		return push_inst(compiler, 0xc00b00000000 /* nilf */ | R36A(dst_r) | (imm & 0xffffffff));
2477 	}
2478 	else if (type == SLJIT_OR) {
2479 		if (count16 >= 3) {
2480 			FAIL_IF(push_inst(compiler, 0xc00c00000000 /* oihf */ | R36A(dst_r) | (imm >> 32)));
2481 			return push_inst(compiler, 0xc00d00000000 /* oilf */ | R36A(dst_r) | (imm & 0xffffffff));
2482 		}
2483 
2484 		if (count16 >= 2) {
2485 			if ((imm & 0x00000000ffffffffull) == 0)
2486 				return push_inst(compiler, 0xc00c00000000 /* oihf */ | R36A(dst_r) | (imm >> 32));
2487 			if ((imm & 0xffffffff00000000ull) == 0)
2488 				return push_inst(compiler, 0xc00d00000000 /* oilf */ | R36A(dst_r) | (imm & 0xffffffff));
2489 		}
2490 
2491 		if ((imm & 0xffff000000000000ull) != 0)
2492 			FAIL_IF(push_inst(compiler, 0xa5080000 /* oihh */ | R20A(dst_r) | (imm >> 48)));
2493 		if ((imm & 0x0000ffff00000000ull) != 0)
2494 			FAIL_IF(push_inst(compiler, 0xa5090000 /* oihl */ | R20A(dst_r) | ((imm >> 32) & 0xffff)));
2495 		if ((imm & 0x00000000ffff0000ull) != 0)
2496 			FAIL_IF(push_inst(compiler, 0xa50a0000 /* oilh */ | R20A(dst_r) | ((imm >> 16) & 0xffff)));
2497 		if ((imm & 0x000000000000ffffull) != 0 || imm == 0)
2498 			return push_inst(compiler, 0xa50b0000 /* oill */ | R20A(dst_r) | (imm & 0xffff));
2499 		return SLJIT_SUCCESS;
2500 	}
2501 
2502 	if ((imm & 0xffffffff00000000ull) != 0)
2503 		FAIL_IF(push_inst(compiler, 0xc00600000000 /* xihf */ | R36A(dst_r) | (imm >> 32)));
2504 	if ((imm & 0x00000000ffffffffull) != 0 || imm == 0)
2505 		return push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(dst_r) | (imm & 0xffffffff));
2506 	return SLJIT_SUCCESS;
2507 }
2508 
2509 static const struct ins_forms bitwise_and_forms = {
2510 	0x1400, /* nr */
2511 	0xb9800000, /* ngr */
2512 	0xb9f40000, /* nrk */
2513 	0xb9e40000, /* ngrk */
2514 	0x54000000, /* n */
2515 	0xe30000000054, /* ny */
2516 	0xe30000000080, /* ng */
2517 };
2518 
2519 static const struct ins_forms bitwise_or_forms = {
2520 	0x1600, /* or */
2521 	0xb9810000, /* ogr */
2522 	0xb9f60000, /* ork */
2523 	0xb9e60000, /* ogrk */
2524 	0x56000000, /* o */
2525 	0xe30000000056, /* oy */
2526 	0xe30000000081, /* og */
2527 };
2528 
2529 static const struct ins_forms bitwise_xor_forms = {
2530 	0x1700, /* xr */
2531 	0xb9820000, /* xgr */
2532 	0xb9f70000, /* xrk */
2533 	0xb9e70000, /* xgrk */
2534 	0x57000000, /* x */
2535 	0xe30000000057, /* xy */
2536 	0xe30000000082, /* xg */
2537 };
2538 
sljit_emit_bitwise(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2539 static sljit_s32 sljit_emit_bitwise(struct sljit_compiler *compiler, sljit_s32 op,
2540 	sljit_s32 dst,
2541 	sljit_s32 src1, sljit_sw src1w,
2542 	sljit_s32 src2, sljit_sw src2w)
2543 {
2544 	sljit_s32 type = GET_OPCODE(op);
2545 	const struct ins_forms *forms;
2546 
2547 	if ((src2 & SLJIT_IMM) && (!(op & SLJIT_SET_Z) || (type == SLJIT_AND && dst == (sljit_s32)tmp0))) {
2548 		sljit_s32 count16 = 0;
2549 		sljit_uw imm = (sljit_uw)src2w;
2550 
2551 		if (op & SLJIT_32)
2552 			imm &= 0xffffffffull;
2553 
2554 		if ((imm & 0x000000000000ffffull) != 0 || imm == 0)
2555 			count16++;
2556 		if ((imm & 0x00000000ffff0000ull) != 0)
2557 			count16++;
2558 		if ((imm & 0x0000ffff00000000ull) != 0)
2559 			count16++;
2560 		if ((imm & 0xffff000000000000ull) != 0)
2561 			count16++;
2562 
2563 		if (type == SLJIT_AND && dst == (sljit_s32)tmp0 && count16 == 1) {
2564 			sljit_gpr src_r = tmp0;
2565 
2566 			if (FAST_IS_REG(src1))
2567 				src_r = gpr(src1 & REG_MASK);
2568 			else
2569 				FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
2570 
2571 			if ((imm & 0x000000000000ffffull) != 0 || imm == 0)
2572 				return push_inst(compiler, 0xa7010000 | R20A(src_r) | imm);
2573 			if ((imm & 0x00000000ffff0000ull) != 0)
2574 				return push_inst(compiler, 0xa7000000 | R20A(src_r) | (imm >> 16));
2575 			if ((imm & 0x0000ffff00000000ull) != 0)
2576 				return push_inst(compiler, 0xa7030000 | R20A(src_r) | (imm >> 32));
2577 			return push_inst(compiler, 0xa7020000 | R20A(src_r) | (imm >> 48));
2578 		}
2579 
2580 		if (!(op & SLJIT_SET_Z))
2581 			return sljit_emit_bitwise_imm(compiler, type, dst, src1, src1w, imm, count16);
2582 	}
2583 
2584 	if (type == SLJIT_AND)
2585 		forms = &bitwise_and_forms;
2586 	else if (type == SLJIT_OR)
2587 		forms = &bitwise_or_forms;
2588 	else
2589 		forms = &bitwise_xor_forms;
2590 
2591 	return emit_commutative(compiler, forms, dst, src1, src1w, src2, src2w);
2592 }
2593 
sljit_emit_shift(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2594 static sljit_s32 sljit_emit_shift(struct sljit_compiler *compiler, sljit_s32 op,
2595 	sljit_s32 dst,
2596 	sljit_s32 src1, sljit_sw src1w,
2597 	sljit_s32 src2, sljit_sw src2w)
2598 {
2599 	sljit_s32 type = GET_OPCODE(op);
2600 	sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2601 	sljit_gpr src_r = tmp0;
2602 	sljit_gpr base_r = tmp0;
2603 	sljit_ins imm = 0;
2604 	sljit_ins ins;
2605 
2606 	if (FAST_IS_REG(src1))
2607 		src_r = gpr(src1 & REG_MASK);
2608 	else
2609 		FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
2610 
2611 	if (src2 & SLJIT_IMM)
2612 		imm = (sljit_ins)(src2w & ((op & SLJIT_32) ? 0x1f : 0x3f));
2613 	else if (FAST_IS_REG(src2))
2614 		base_r = gpr(src2 & REG_MASK);
2615 	else {
2616 		FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
2617 		base_r = tmp1;
2618 	}
2619 
2620 	if ((op & SLJIT_32) && dst_r == src_r) {
2621 		if (type == SLJIT_SHL)
2622 			ins = 0x89000000 /* sll */;
2623 		else if (type == SLJIT_LSHR)
2624 			ins = 0x88000000 /* srl */;
2625 		else
2626 			ins = 0x8a000000 /* sra */;
2627 
2628 		FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | R12A(base_r) | imm));
2629 	}
2630 	else {
2631 		if (type == SLJIT_SHL)
2632 			ins = (op & SLJIT_32) ? 0xeb00000000df /* sllk */ : 0xeb000000000d /* sllg */;
2633 		else if (type == SLJIT_LSHR)
2634 			ins = (op & SLJIT_32) ? 0xeb00000000de /* srlk */ : 0xeb000000000c /* srlg */;
2635 		else
2636 			ins = (op & SLJIT_32) ? 0xeb00000000dc /* srak */ : 0xeb000000000a /* srag */;
2637 
2638 		FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | R28A(base_r) | (imm << 16)));
2639 	}
2640 
2641 	if ((op & SLJIT_SET_Z) && type != SLJIT_ASHR)
2642 		return push_inst(compiler, (op & SLJIT_32) ? or(dst_r, dst_r) : ogr(dst_r, dst_r));
2643 
2644 	return SLJIT_SUCCESS;
2645 }
2646 
2647 static const struct ins_forms addc_forms = {
2648 	0xb9980000, /* alcr */
2649 	0xb9880000, /* alcgr */
2650 	0,
2651 	0,
2652 	0,
2653 	0xe30000000098, /* alc */
2654 	0xe30000000088, /* alcg */
2655 };
2656 
2657 static const struct ins_forms subc_forms = {
2658 	0xb9990000, /* slbr */
2659 	0xb9890000, /* slbgr */
2660 	0,
2661 	0,
2662 	0,
2663 	0xe30000000099, /* slb */
2664 	0xe30000000089, /* slbg */
2665 };
2666 
sljit_emit_op2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2667 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2668 	sljit_s32 dst, sljit_sw dstw,
2669 	sljit_s32 src1, sljit_sw src1w,
2670 	sljit_s32 src2, sljit_sw src2w)
2671 {
2672 	CHECK_ERROR();
2673 	CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
2674 	ADJUST_LOCAL_OFFSET(dst, dstw);
2675 	ADJUST_LOCAL_OFFSET(src1, src1w);
2676 	ADJUST_LOCAL_OFFSET(src2, src2w);
2677 
2678 	compiler->mode = op & SLJIT_32;
2679 	compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z);
2680 
2681 	if (is_commutative(op) && (src1 & SLJIT_IMM) && !(src2 & SLJIT_IMM)) {
2682 		src1 ^= src2;
2683 		src2 ^= src1;
2684 		src1 ^= src2;
2685 
2686 		src1w ^= src2w;
2687 		src2w ^= src1w;
2688 		src1w ^= src2w;
2689 	}
2690 
2691 	switch (GET_OPCODE(op)) {
2692 	case SLJIT_ADD:
2693 		compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD;
2694 		return sljit_emit_add(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2695 	case SLJIT_ADDC:
2696 		compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD;
2697 		FAIL_IF(emit_commutative(compiler, &addc_forms, dst, src1, src1w, src2, src2w));
2698 		if (dst & SLJIT_MEM)
2699 			return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2700 		return SLJIT_SUCCESS;
2701 	case SLJIT_SUB:
2702 		compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_SUB;
2703 		return sljit_emit_sub(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2704 	case SLJIT_SUBC:
2705 		compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_SUB;
2706 		FAIL_IF(emit_non_commutative(compiler, &subc_forms, dst, src1, src1w, src2, src2w));
2707 		if (dst & SLJIT_MEM)
2708 			return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2709 		return SLJIT_SUCCESS;
2710 	case SLJIT_MUL:
2711 		FAIL_IF(sljit_emit_multiply(compiler, op, dst, src1, src1w, src2, src2w));
2712 		break;
2713 	case SLJIT_AND:
2714 	case SLJIT_OR:
2715 	case SLJIT_XOR:
2716 		FAIL_IF(sljit_emit_bitwise(compiler, op, dst, src1, src1w, src2, src2w));
2717 		break;
2718 	case SLJIT_SHL:
2719 	case SLJIT_LSHR:
2720 	case SLJIT_ASHR:
2721 		FAIL_IF(sljit_emit_shift(compiler, op, dst, src1, src1w, src2, src2w));
2722 		break;
2723 	}
2724 
2725 	if (dst & SLJIT_MEM)
2726 		return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2727 	return SLJIT_SUCCESS;
2728 }
2729 
sljit_emit_op2u(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2730 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
2731 	sljit_s32 src1, sljit_sw src1w,
2732 	sljit_s32 src2, sljit_sw src2w)
2733 {
2734 	CHECK_ERROR();
2735 	CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
2736 
2737 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2738 		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2739 	compiler->skip_checks = 1;
2740 #endif
2741 	return sljit_emit_op2(compiler, op, (sljit_s32)tmp0, 0, src1, src1w, src2, src2w);
2742 }
2743 
sljit_emit_op_src(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)2744 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(
2745 	struct sljit_compiler *compiler,
2746 	sljit_s32 op, sljit_s32 src, sljit_sw srcw)
2747 {
2748 	sljit_gpr src_r;
2749 
2750 	CHECK_ERROR();
2751 	CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
2752 	ADJUST_LOCAL_OFFSET(src, srcw);
2753 
2754 	switch (op) {
2755 	case SLJIT_FAST_RETURN:
2756 		src_r = FAST_IS_REG(src) ? gpr(src) : tmp1;
2757 		if (src & SLJIT_MEM)
2758 			FAIL_IF(load_word(compiler, tmp1, src, srcw, 0));
2759 
2760 		return push_inst(compiler, br(src_r));
2761 	case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
2762 		/* TODO(carenas): implement? */
2763 		return SLJIT_SUCCESS;
2764 	case SLJIT_PREFETCH_L1:
2765 	case SLJIT_PREFETCH_L2:
2766 	case SLJIT_PREFETCH_L3:
2767 	case SLJIT_PREFETCH_ONCE:
2768 		/* TODO(carenas): implement */
2769 		return SLJIT_SUCCESS;
2770 	default:
2771                 /* TODO(carenas): probably should not success by default */
2772 		return SLJIT_SUCCESS;
2773 	}
2774 
2775 	return SLJIT_SUCCESS;
2776 }
2777 
sljit_get_register_index(sljit_s32 reg)2778 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
2779 {
2780 	CHECK_REG_INDEX(check_sljit_get_register_index(reg));
2781 	return (sljit_s32)gpr(reg);
2782 }
2783 
sljit_get_float_register_index(sljit_s32 reg)2784 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
2785 {
2786 	CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
2787 	return (sljit_s32)fgpr(reg);
2788 }
2789 
sljit_emit_op_custom(struct sljit_compiler * compiler,void * instruction,sljit_u32 size)2790 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
2791 	void *instruction, sljit_u32 size)
2792 {
2793 	sljit_ins ins = 0;
2794 
2795 	CHECK_ERROR();
2796 	CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
2797 
2798 	memcpy((sljit_u8 *)&ins + sizeof(ins) - size, instruction, size);
2799 	return push_inst(compiler, ins);
2800 }
2801 
2802 /* --------------------------------------------------------------------- */
2803 /*  Floating point operators                                             */
2804 /* --------------------------------------------------------------------- */
2805 
2806 #define FLOAT_LOAD 0
2807 #define FLOAT_STORE 1
2808 
float_mem(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)2809 static sljit_s32 float_mem(struct sljit_compiler *compiler, sljit_s32 op,
2810 	sljit_s32 reg,
2811 	sljit_s32 mem, sljit_sw memw)
2812 {
2813 	struct addr addr;
2814 	sljit_ins ins;
2815 
2816 	SLJIT_ASSERT(mem & SLJIT_MEM);
2817 
2818 	if ((mem & OFFS_REG_MASK) || is_u12(memw) || !is_s20(memw)) {
2819 		FAIL_IF(make_addr_bx(compiler, &addr, mem, memw, tmp1));
2820 
2821 		if (op & FLOAT_STORE)
2822 			ins = (op & SLJIT_32) ? 0x70000000 /* ste */ : 0x60000000 /* std */;
2823 		else
2824 			ins = (op & SLJIT_32) ? 0x78000000 /* le */ : 0x68000000 /* ld */;
2825 
2826 		return push_inst(compiler, ins | F20(reg) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset);
2827 	}
2828 
2829 	FAIL_IF(make_addr_bxy(compiler, &addr, mem, memw, tmp1));
2830 
2831 	if (op & FLOAT_STORE)
2832 		ins = (op & SLJIT_32) ? 0xed0000000066 /* stey */ : 0xed0000000067 /* stdy */;
2833 	else
2834 		ins = (op & SLJIT_32) ? 0xed0000000064 /* ley */ : 0xed0000000065 /* ldy */;
2835 
2836 	return push_inst(compiler, ins | F36(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
2837 }
2838 
emit_float(struct sljit_compiler * compiler,sljit_ins ins_r,sljit_ins ins,sljit_s32 reg,sljit_s32 src,sljit_sw srcw)2839 static sljit_s32 emit_float(struct sljit_compiler *compiler, sljit_ins ins_r, sljit_ins ins,
2840 	sljit_s32 reg,
2841 	sljit_s32 src, sljit_sw srcw)
2842 {
2843 	struct addr addr;
2844 
2845 	if (!(src & SLJIT_MEM))
2846 		return push_inst(compiler, ins_r | F4(reg) | F0(src));
2847 
2848 	FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));
2849 	return push_inst(compiler, ins | F36(reg) | R32A(addr.index) | R28A(addr.base) | ((sljit_ins)addr.offset << 16));
2850 }
2851 
sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2852 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
2853 	sljit_s32 dst, sljit_sw dstw,
2854 	sljit_s32 src, sljit_sw srcw)
2855 {
2856 	sljit_ins dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
2857 	sljit_ins ins;
2858 
2859 	if (src & SLJIT_MEM) {
2860 		FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src, srcw));
2861 		src = TMP_FREG1;
2862 	}
2863 
2864 	/* M3 is set to 5 */
2865 	if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)
2866 		ins = (op & SLJIT_32) ? 0xb3a85000 /* cgebr */ : 0xb3a95000 /* cgdbr */;
2867 	else
2868 		ins = (op & SLJIT_32) ? 0xb3985000 /* cfebr */ : 0xb3995000 /* cfdbr */;
2869 
2870 	FAIL_IF(push_inst(compiler, ins | R4A(dst_r) | F0(src)));
2871 
2872 	if (dst & SLJIT_MEM)
2873 		return store_word(compiler, dst_r, dst, dstw, GET_OPCODE(op) >= SLJIT_CONV_S32_FROM_F64);
2874 
2875 	return SLJIT_SUCCESS;
2876 }
2877 
sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2878 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
2879 	sljit_s32 dst, sljit_sw dstw,
2880 	sljit_s32 src, sljit_sw srcw)
2881 {
2882 	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2883 	sljit_ins ins;
2884 
2885 	if (src & SLJIT_IMM) {
2886 		FAIL_IF(push_load_imm_inst(compiler, tmp0, srcw));
2887 		src = (sljit_s32)tmp0;
2888 	}
2889 	else if (src & SLJIT_MEM) {
2890 		FAIL_IF(load_word(compiler, tmp0, src, srcw, GET_OPCODE(op) >= SLJIT_CONV_F64_FROM_S32));
2891 		src = (sljit_s32)tmp0;
2892 	}
2893 
2894 	if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
2895 		ins = (op & SLJIT_32) ? 0xb3a40000 /* cegbr */ : 0xb3a50000 /* cdgbr */;
2896 	else
2897 		ins = (op & SLJIT_32) ? 0xb3940000 /* cefbr */ : 0xb3950000 /* cdfbr */;
2898 
2899 	FAIL_IF(push_inst(compiler, ins | F4(dst_r) | R0(src)));
2900 
2901 	if (dst & SLJIT_MEM)
2902 		return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw);
2903 
2904 	return SLJIT_SUCCESS;
2905 }
2906 
sljit_emit_fop1_cmp(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2907 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
2908 	sljit_s32 src1, sljit_sw src1w,
2909 	sljit_s32 src2, sljit_sw src2w)
2910 {
2911 	sljit_ins ins_r, ins;
2912 
2913 	if (src1 & SLJIT_MEM) {
2914 		FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src1, src1w));
2915 		src1 = TMP_FREG1;
2916 	}
2917 
2918 	if (op & SLJIT_32) {
2919 		ins_r = 0xb3090000 /* cebr */;
2920 		ins = 0xed0000000009 /* ceb */;
2921 	} else {
2922 		ins_r = 0xb3190000 /* cdbr */;
2923 		ins = 0xed0000000019 /* cdb */;
2924 	}
2925 
2926 	return emit_float(compiler, ins_r, ins, src1, src2, src2w);
2927 }
2928 
sljit_emit_fop1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2929 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
2930 	sljit_s32 dst, sljit_sw dstw,
2931 	sljit_s32 src, sljit_sw srcw)
2932 {
2933 	sljit_s32 dst_r;
2934 	sljit_ins ins;
2935 
2936 	CHECK_ERROR();
2937 
2938 	SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
2939 
2940 	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
2941 
2942 	if (op == SLJIT_CONV_F64_FROM_F32)
2943 		FAIL_IF(emit_float(compiler, 0xb3040000 /* ldebr */, 0xed0000000004 /* ldeb */, dst_r, src, srcw));
2944 	else {
2945 		if (src & SLJIT_MEM) {
2946 			FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op == SLJIT_CONV_F32_FROM_F64 ? 0 : (op & SLJIT_32)), dst_r, src, srcw));
2947 			src = dst_r;
2948 		}
2949 
2950 		switch (GET_OPCODE(op)) {
2951 		case SLJIT_MOV_F64:
2952 			if (FAST_IS_REG(dst)) {
2953 				if (dst == src)
2954 					return SLJIT_SUCCESS;
2955 
2956 				ins = (op & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */;
2957 				break;
2958 			}
2959 			return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), src, dst, dstw);
2960 		case SLJIT_CONV_F64_FROM_F32:
2961 			/* Only SLJIT_CONV_F32_FROM_F64. */
2962 			ins = 0xb3440000 /* ledbr */;
2963 			break;
2964 		case SLJIT_NEG_F64:
2965 			ins = (op & SLJIT_32) ? 0xb3030000 /* lcebr */ : 0xb3130000 /* lcdbr */;
2966 			break;
2967 		default:
2968 			SLJIT_ASSERT(GET_OPCODE(op) == SLJIT_ABS_F64);
2969 			ins = (op & SLJIT_32) ? 0xb3000000 /* lpebr */ : 0xb3100000 /* lpdbr */;
2970 			break;
2971 		}
2972 
2973 		FAIL_IF(push_inst(compiler, ins | F4(dst_r) | F0(src)));
2974 	}
2975 
2976 	if (!(dst & SLJIT_MEM))
2977 		return SLJIT_SUCCESS;
2978 
2979 	SLJIT_ASSERT(dst_r == TMP_FREG1);
2980 
2981 	return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw);
2982 }
2983 
2984 #define FLOAT_MOV(op, dst_r, src_r) \
2985 	(((op & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */) | F4(dst_r) | F0(src_r))
2986 
sljit_emit_fop2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2987 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
2988 	sljit_s32 dst, sljit_sw dstw,
2989 	sljit_s32 src1, sljit_sw src1w,
2990 	sljit_s32 src2, sljit_sw src2w)
2991 {
2992 	sljit_s32 dst_r = TMP_FREG1;
2993 	sljit_ins ins_r, ins;
2994 
2995 	CHECK_ERROR();
2996 	CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
2997 	ADJUST_LOCAL_OFFSET(dst, dstw);
2998 	ADJUST_LOCAL_OFFSET(src1, src1w);
2999 	ADJUST_LOCAL_OFFSET(src2, src2w);
3000 
3001 	do {
3002 		if (FAST_IS_REG(dst)) {
3003 			dst_r = dst;
3004 
3005 			if (dst == src1)
3006 				break;
3007 
3008 			if (dst == src2) {
3009 				if (GET_OPCODE(op) == SLJIT_ADD_F64 || GET_OPCODE(op) == SLJIT_MUL_F64) {
3010 					src2 = src1;
3011 					src2w = src1w;
3012 					src1 = dst;
3013 					break;
3014 				}
3015 
3016 				FAIL_IF(push_inst(compiler, FLOAT_MOV(op, TMP_FREG1, src2)));
3017 				src2 = TMP_FREG1;
3018 			}
3019 		}
3020 
3021 		if (src1 & SLJIT_MEM)
3022 			FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), dst_r, src1, src1w));
3023 		else
3024 			FAIL_IF(push_inst(compiler, FLOAT_MOV(op, dst_r, src1)));
3025 	} while (0);
3026 
3027 	switch (GET_OPCODE(op)) {
3028 	case SLJIT_ADD_F64:
3029 		ins_r = (op & SLJIT_32) ? 0xb30a0000 /* aebr */ : 0xb31a0000 /* adbr */;
3030 		ins = (op & SLJIT_32) ? 0xed000000000a /* aeb */ : 0xed000000001a /* adb */;
3031 		break;
3032 	case SLJIT_SUB_F64:
3033 		ins_r = (op & SLJIT_32) ? 0xb30b0000 /* sebr */ : 0xb31b0000 /* sdbr */;
3034 		ins = (op & SLJIT_32) ? 0xed000000000b /* seb */ : 0xed000000001b /* sdb */;
3035 		break;
3036 	case SLJIT_MUL_F64:
3037 		ins_r = (op & SLJIT_32) ? 0xb3170000 /* meebr */ : 0xb31c0000 /* mdbr */;
3038 		ins = (op & SLJIT_32) ? 0xed0000000017 /* meeb */ : 0xed000000001c /* mdb */;
3039 		break;
3040 	default:
3041 		SLJIT_ASSERT(GET_OPCODE(op) == SLJIT_DIV_F64);
3042 		ins_r = (op & SLJIT_32) ? 0xb30d0000 /* debr */ : 0xb31d0000 /* ddbr */;
3043 		ins = (op & SLJIT_32) ? 0xed000000000d /* deb */ : 0xed000000001d /* ddb */;
3044 		break;
3045 	}
3046 
3047 	FAIL_IF(emit_float(compiler, ins_r, ins, dst_r, src2, src2w));
3048 
3049 	if (dst & SLJIT_MEM)
3050 		return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw);
3051 
3052 	SLJIT_ASSERT(dst_r != TMP_FREG1);
3053 	return SLJIT_SUCCESS;
3054 }
3055 
3056 /* --------------------------------------------------------------------- */
3057 /*  Other instructions                                                   */
3058 /* --------------------------------------------------------------------- */
3059 
sljit_emit_fast_enter(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)3060 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
3061 {
3062 	CHECK_ERROR();
3063 	CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
3064 	ADJUST_LOCAL_OFFSET(dst, dstw);
3065 
3066 	if (FAST_IS_REG(dst))
3067 		return push_inst(compiler, lgr(gpr(dst), fast_link_r));
3068 
3069 	/* memory */
3070 	return store_word(compiler, fast_link_r, dst, dstw, 0);
3071 }
3072 
3073 /* --------------------------------------------------------------------- */
3074 /*  Conditional instructions                                             */
3075 /* --------------------------------------------------------------------- */
3076 
sljit_emit_label(struct sljit_compiler * compiler)3077 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
3078 {
3079 	struct sljit_label *label;
3080 
3081 	CHECK_ERROR_PTR();
3082 	CHECK_PTR(check_sljit_emit_label(compiler));
3083 
3084 	if (compiler->last_label && compiler->last_label->size == compiler->size)
3085 		return compiler->last_label;
3086 
3087 	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
3088 	PTR_FAIL_IF(!label);
3089 	set_label(label, compiler);
3090 	return label;
3091 }
3092 
sljit_emit_jump(struct sljit_compiler * compiler,sljit_s32 type)3093 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
3094 {
3095 	sljit_u8 mask = ((type & 0xff) < SLJIT_JUMP) ? get_cc(compiler, type & 0xff) : 0xf;
3096 
3097 	CHECK_ERROR_PTR();
3098 	CHECK_PTR(check_sljit_emit_jump(compiler, type));
3099 
3100 	/* record jump */
3101 	struct sljit_jump *jump = (struct sljit_jump *)
3102 		ensure_abuf(compiler, sizeof(struct sljit_jump));
3103 	PTR_FAIL_IF(!jump);
3104 	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
3105 	jump->addr = compiler->size;
3106 
3107 	/* emit jump instruction */
3108 	type &= 0xff;
3109 	if (type >= SLJIT_FAST_CALL)
3110 		PTR_FAIL_IF(push_inst(compiler, brasl(type == SLJIT_FAST_CALL ? fast_link_r : link_r, 0)));
3111 	else
3112 		PTR_FAIL_IF(push_inst(compiler, brcl(mask, 0)));
3113 
3114 	return jump;
3115 }
3116 
sljit_emit_call(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types)3117 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
3118 	sljit_s32 arg_types)
3119 {
3120 	CHECK_ERROR_PTR();
3121 	CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
3122 
3123 	if (type & SLJIT_CALL_RETURN) {
3124 		PTR_FAIL_IF(emit_stack_frame_release(compiler));
3125 		type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
3126 	}
3127 
3128 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
3129 		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
3130 	compiler->skip_checks = 1;
3131 #endif
3132 
3133 	return sljit_emit_jump(compiler, type);
3134 }
3135 
sljit_emit_ijump(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 src,sljit_sw srcw)3136 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
3137 {
3138 	sljit_gpr src_r = FAST_IS_REG(src) ? gpr(src) : tmp1;
3139 
3140 	CHECK_ERROR();
3141 	CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
3142 
3143 	if (src & SLJIT_IMM) {
3144 		SLJIT_ASSERT(!(srcw & 1)); /* target address must be even */
3145 		FAIL_IF(push_load_imm_inst(compiler, src_r, srcw));
3146 	}
3147 	else if (src & SLJIT_MEM) {
3148 		ADJUST_LOCAL_OFFSET(src, srcw);
3149 		FAIL_IF(load_word(compiler, src_r, src, srcw, 0 /* 64-bit */));
3150 	}
3151 
3152 	/* emit jump instruction */
3153 	if (type >= SLJIT_FAST_CALL)
3154 		return push_inst(compiler, basr(type == SLJIT_FAST_CALL ? fast_link_r : link_r, src_r));
3155 
3156 	return push_inst(compiler, br(src_r));
3157 }
3158 
sljit_emit_icall(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)3159 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
3160 	sljit_s32 arg_types,
3161 	sljit_s32 src, sljit_sw srcw)
3162 {
3163 	CHECK_ERROR();
3164 	CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
3165 
3166 	SLJIT_ASSERT(gpr(TMP_REG2) == tmp1);
3167 
3168 	if (src & SLJIT_MEM) {
3169 		ADJUST_LOCAL_OFFSET(src, srcw);
3170 		FAIL_IF(load_word(compiler, tmp1, src, srcw, 0 /* 64-bit */));
3171 		src = TMP_REG2;
3172 	}
3173 
3174 	if (type & SLJIT_CALL_RETURN) {
3175 		if (src >= SLJIT_FIRST_SAVED_REG && src <= SLJIT_S0) {
3176 			FAIL_IF(push_inst(compiler, lgr(tmp1, gpr(src))));
3177 			src = TMP_REG2;
3178 		}
3179 
3180 		FAIL_IF(emit_stack_frame_release(compiler));
3181 		type = SLJIT_JUMP;
3182 	}
3183 
3184 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
3185 		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
3186 	compiler->skip_checks = 1;
3187 #endif
3188 
3189 	return sljit_emit_ijump(compiler, type, src, srcw);
3190 }
3191 
sljit_emit_op_flags(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 type)3192 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
3193 	sljit_s32 dst, sljit_sw dstw,
3194 	sljit_s32 type)
3195 {
3196 	sljit_u8 mask = get_cc(compiler, type & 0xff);
3197 
3198 	CHECK_ERROR();
3199 	CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
3200 
3201 	sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
3202 	sljit_gpr loc_r = tmp1;
3203 	switch (GET_OPCODE(op)) {
3204 	case SLJIT_AND:
3205 	case SLJIT_OR:
3206 	case SLJIT_XOR:
3207 		compiler->status_flags_state = op & SLJIT_SET_Z;
3208 
3209 		/* dst is also source operand */
3210 		if (dst & SLJIT_MEM)
3211 			FAIL_IF(load_word(compiler, dst_r, dst, dstw, op & SLJIT_32));
3212 
3213 		break;
3214 	case SLJIT_MOV32:
3215 		op |= SLJIT_32;
3216 		/* fallthrough */
3217 	case SLJIT_MOV:
3218 		/* can write straight into destination */
3219 		loc_r = dst_r;
3220 		break;
3221 	default:
3222 		SLJIT_UNREACHABLE();
3223 	}
3224 
3225 	/* TODO(mundaym): fold into cmov helper function? */
3226 	#define LEVAL(i) i(loc_r, 1, mask)
3227 	if (have_lscond2()) {
3228 		FAIL_IF(push_load_imm_inst(compiler, loc_r, 0));
3229 		FAIL_IF(push_inst(compiler,
3230 			WHEN2(op & SLJIT_32, lochi, locghi)));
3231 	} else {
3232 		/* TODO(mundaym): no load/store-on-condition 2 facility (ipm? branch-and-set?) */
3233 		abort();
3234 	}
3235 	#undef LEVAL
3236 
3237 	/* apply bitwise op and set condition codes */
3238 	switch (GET_OPCODE(op)) {
3239 	#define LEVAL(i) i(dst_r, loc_r)
3240 	case SLJIT_AND:
3241 		FAIL_IF(push_inst(compiler,
3242 			WHEN2(op & SLJIT_32, nr, ngr)));
3243 		break;
3244 	case SLJIT_OR:
3245 		FAIL_IF(push_inst(compiler,
3246 			WHEN2(op & SLJIT_32, or, ogr)));
3247 		break;
3248 	case SLJIT_XOR:
3249 		FAIL_IF(push_inst(compiler,
3250 			WHEN2(op & SLJIT_32, xr, xgr)));
3251 		break;
3252 	#undef LEVAL
3253 	}
3254 
3255 	/* store result to memory if required */
3256 	if (dst & SLJIT_MEM)
3257 		return store_word(compiler, dst_r, dst, dstw, (op & SLJIT_32));
3258 
3259 	return SLJIT_SUCCESS;
3260 }
3261 
sljit_emit_cmov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_reg,sljit_s32 src,sljit_sw srcw)3262 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
3263 	sljit_s32 dst_reg,
3264 	sljit_s32 src, sljit_sw srcw)
3265 {
3266 	sljit_u8 mask = get_cc(compiler, type & 0xff);
3267 	sljit_gpr dst_r = gpr(dst_reg & ~SLJIT_32);
3268 	sljit_gpr src_r = FAST_IS_REG(src) ? gpr(src) : tmp0;
3269 
3270 	CHECK_ERROR();
3271 	CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
3272 
3273 	if (src & SLJIT_IMM) {
3274 		/* TODO(mundaym): fast path with lscond2 */
3275 		FAIL_IF(push_load_imm_inst(compiler, src_r, srcw));
3276 	}
3277 
3278 	#define LEVAL(i) i(dst_r, src_r, mask)
3279 	if (have_lscond1())
3280 		return push_inst(compiler,
3281 			WHEN2(dst_reg & SLJIT_32, locr, locgr));
3282 
3283 	#undef LEVAL
3284 
3285 	/* TODO(mundaym): implement */
3286 	return SLJIT_ERR_UNSUPPORTED;
3287 }
3288 
3289 /* --------------------------------------------------------------------- */
3290 /*  Other instructions                                                   */
3291 /* --------------------------------------------------------------------- */
3292 
3293 /* On s390x we build a literal pool to hold constants. This has two main
3294    advantages:
3295 
3296      1. we only need one instruction in the instruction stream (LGRL)
3297      2. we can store 64 bit addresses and use 32 bit offsets
3298 
3299    To retrofit the extra information needed to build the literal pool we
3300    add a new sljit_s390x_const struct that contains the initial value but
3301    can still be cast to a sljit_const. */
3302 
sljit_emit_const(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_sw init_value)3303 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
3304 {
3305 	struct sljit_s390x_const *const_;
3306 	sljit_gpr dst_r;
3307 
3308 	CHECK_ERROR_PTR();
3309 	CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
3310 
3311 	const_ = (struct sljit_s390x_const*)ensure_abuf(compiler,
3312 					sizeof(struct sljit_s390x_const));
3313 	PTR_FAIL_IF(!const_);
3314 	set_const((struct sljit_const*)const_, compiler);
3315 	const_->init_value = init_value;
3316 
3317 	dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
3318 	if (have_genext())
3319 		PTR_FAIL_IF(push_inst(compiler, sljit_ins_const | lgrl(dst_r, 0)));
3320 	else {
3321 		PTR_FAIL_IF(push_inst(compiler, sljit_ins_const | larl(tmp1, 0)));
3322 		PTR_FAIL_IF(push_inst(compiler, lg(dst_r, 0, r0, tmp1)));
3323 	}
3324 
3325 	if (dst & SLJIT_MEM)
3326 		PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, 0 /* always 64-bit */));
3327 
3328 	return (struct sljit_const*)const_;
3329 }
3330 
sljit_set_jump_addr(sljit_uw addr,sljit_uw new_target,sljit_sw executable_offset)3331 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
3332 {
3333 	/* Update the constant pool. */
3334 	sljit_uw *ptr = (sljit_uw *)addr;
3335 	SLJIT_UNUSED_ARG(executable_offset);
3336 
3337 	SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 0);
3338 	*ptr = new_target;
3339 	SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1);
3340 	SLJIT_CACHE_FLUSH(ptr, ptr + 1);
3341 }
3342 
sljit_set_const(sljit_uw addr,sljit_sw new_constant,sljit_sw executable_offset)3343 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
3344 {
3345 	sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
3346 }
3347 
sljit_emit_put_label(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)3348 SLJIT_API_FUNC_ATTRIBUTE struct sljit_put_label *sljit_emit_put_label(
3349 	struct sljit_compiler *compiler,
3350 	sljit_s32 dst, sljit_sw dstw)
3351 {
3352 	struct sljit_put_label *put_label;
3353 	sljit_gpr dst_r;
3354 
3355 	CHECK_ERROR_PTR();
3356 	CHECK_PTR(check_sljit_emit_put_label(compiler, dst, dstw));
3357 	ADJUST_LOCAL_OFFSET(dst, dstw);
3358 
3359 	put_label = (struct sljit_put_label*)ensure_abuf(compiler, sizeof(struct sljit_put_label));
3360 	PTR_FAIL_IF(!put_label);
3361 	set_put_label(put_label, compiler, 0);
3362 
3363 	dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
3364 
3365 	if (have_genext())
3366 		PTR_FAIL_IF(push_inst(compiler, lgrl(dst_r, 0)));
3367 	else {
3368 		PTR_FAIL_IF(push_inst(compiler, larl(tmp1, 0)));
3369 		PTR_FAIL_IF(push_inst(compiler, lg(dst_r, 0, r0, tmp1)));
3370 	}
3371 
3372 	if (dst & SLJIT_MEM)
3373 		PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, 0));
3374 
3375 	return put_label;
3376 }
3377 
3378 /* TODO(carenas): EVAL probably should move up or be refactored */
3379 #undef WHEN2
3380 #undef EVAL
3381 
3382 #undef tmp1
3383 #undef tmp0
3384 
3385 /* TODO(carenas): undef other macros that spill like is_u12? */
3386