1 /*
2  *    Stack-less Just-In-Time compiler
3  *
4  *    Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without modification, are
7  * permitted provided that the following conditions are met:
8  *
9  *   1. Redistributions of source code must retain the above copyright notice, this list of
10  *      conditions and the following disclaimer.
11  *
12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13  *      of conditions and the following disclaimer in the documentation and/or other materials
14  *      provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
27 #include <sys/auxv.h>
28 
29 #ifdef __ARCH__
30 #define ENABLE_STATIC_FACILITY_DETECTION 1
31 #else
32 #define ENABLE_STATIC_FACILITY_DETECTION 0
33 #endif
34 #define ENABLE_DYNAMIC_FACILITY_DETECTION 1
35 
sljit_get_platform_name(void)36 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
37 {
38 	return "s390x" SLJIT_CPUINFO;
39 }
40 
41 /* Instructions are stored as 64 bit values regardless their size. */
42 typedef sljit_uw sljit_ins;
43 
44 #define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
45 #define TMP_REG2	(SLJIT_NUMBER_OF_REGISTERS + 3)
46 
47 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 5] = {
48 	0, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 15, 0, 1, 14
49 };
50 
51 /* there are also a[2-15] available, but they are slower to access and
52  * their use is limited as mundaym explained:
53  *   https://github.com/zherczeg/sljit/pull/91#discussion_r486895689
54  */
55 
56 /* General Purpose Registers [0-15]. */
57 typedef sljit_uw sljit_gpr;
58 
59 /*
60  * WARNING
61  * the following code is non standard and should be improved for
62  * consistency, but doesn't use SLJIT_NUMBER_OF_REGISTERS based
63  * registers because r0 and r1 are the ABI recommended volatiles.
64  * there is a gpr() function that maps sljit to physical register numbers
65  * that should be used instead of the usual index into reg_map[] and
66  * will be retired ASAP (TODO: carenas)
67  */
68 
69 static const sljit_gpr r0 = 0;		/* reg_map[SLJIT_NUMBER_OF_REGISTERS + 2]: 0 in address calculations; reserved */
70 static const sljit_gpr r1 = 1;		/* reg_map[SLJIT_NUMBER_OF_REGISTERS + 3]: reserved */
71 static const sljit_gpr r2 = 2;		/* reg_map[1]: 1st argument */
72 static const sljit_gpr r3 = 3;		/* reg_map[2]: 2nd argument */
73 static const sljit_gpr r4 = 4;		/* reg_map[3]: 3rd argument */
74 static const sljit_gpr r5 = 5;		/* reg_map[4]: 4th argument */
75 static const sljit_gpr r6 = 6;		/* reg_map[5]: 5th argument; 1st saved register */
76 static const sljit_gpr r7 = 7;		/* reg_map[6] */
77 static const sljit_gpr r8 = 8;		/* reg_map[7] */
78 static const sljit_gpr r9 = 9;		/* reg_map[8] */
79 static const sljit_gpr r10 = 10;	/* reg_map[9] */
80 static const sljit_gpr r11 = 11;	/* reg_map[10] */
81 static const sljit_gpr r12 = 12;	/* reg_map[11]: GOT */
82 static const sljit_gpr r13 = 13;	/* reg_map[12]: Literal Pool pointer */
83 static const sljit_gpr r14 = 14;	/* reg_map[0]: return address */
84 static const sljit_gpr r15 = 15;	/* reg_map[SLJIT_NUMBER_OF_REGISTERS + 1]: stack pointer */
85 
86 /* WARNING: r12 and r13 shouldn't be used as per ABI recommendation */
87 /* TODO(carenas): r12 might conflict in PIC code, reserve? */
88 /* TODO(carenas): r13 is usually pointed to "pool" per ABI, using a tmp
89  *                like we do know might be faster though, reserve?
90  */
91 
92 /* TODO(carenas): should be named TMP_REG[1-2] for consistency */
93 #define tmp0	r0
94 #define tmp1	r1
95 
96 /* When reg cannot be unused. */
97 #define IS_GPR_REG(reg)		((reg > 0) && (reg) <= SLJIT_SP)
98 
99 /* Link register. */
100 static const sljit_gpr link_r = 14;     /* r14 */
101 
102 #define TMP_FREG1	(SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
103 
104 static const sljit_u8 freg_map[SLJIT_NUMBER_OF_FLOAT_REGISTERS + 2] = {
105 	0, 0, 2, 4, 6, 3, 5, 7, 15, 14, 13, 12, 11, 10, 9, 8, 1
106 };
107 
108 #define R0A(r) (r)
109 #define R4A(r) ((r) << 4)
110 #define R8A(r) ((r) << 8)
111 #define R12A(r) ((r) << 12)
112 #define R16A(r) ((r) << 16)
113 #define R20A(r) ((r) << 20)
114 #define R28A(r) ((r) << 28)
115 #define R32A(r) ((r) << 32)
116 #define R36A(r) ((r) << 36)
117 
118 #define R0(r) ((sljit_ins)reg_map[r])
119 
120 #define F0(r) ((sljit_ins)freg_map[r])
121 #define F4(r) (R4A((sljit_ins)freg_map[r]))
122 #define F12(r) (R12A((sljit_ins)freg_map[r]))
123 #define F20(r) (R20A((sljit_ins)freg_map[r]))
124 #define F28(r) (R28A((sljit_ins)freg_map[r]))
125 #define F32(r) (R32A((sljit_ins)freg_map[r]))
126 #define F36(r) (R36A((sljit_ins)freg_map[r]))
127 
128 struct sljit_s390x_const {
129 	struct sljit_const const_; /* must be first */
130 	sljit_sw init_value;       /* required to build literal pool */
131 };
132 
133 /* Convert SLJIT register to hardware register. */
gpr(sljit_s32 r)134 static SLJIT_INLINE sljit_gpr gpr(sljit_s32 r)
135 {
136 	SLJIT_ASSERT(r >= 0 && r < (sljit_s32)(sizeof(reg_map) / sizeof(reg_map[0])));
137 	return reg_map[r];
138 }
139 
push_inst(struct sljit_compiler * compiler,sljit_ins ins)140 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
141 {
142 	sljit_ins *ibuf = (sljit_ins *)ensure_buf(compiler, sizeof(sljit_ins));
143 	FAIL_IF(!ibuf);
144 	*ibuf = ins;
145 
146 	SLJIT_ASSERT(ins <= 0xffffffffffffL);
147 
148 	compiler->size++;
149 	if (ins & 0xffff00000000L)
150 		compiler->size++;
151 
152 	if (ins & 0xffffffff0000L)
153 		compiler->size++;
154 
155 	return SLJIT_SUCCESS;
156 }
157 
158 #define SLJIT_ADD_SUB_NO_COMPARE(status_flags_state) \
159 	(((status_flags_state) & (SLJIT_CURRENT_FLAGS_ADD | SLJIT_CURRENT_FLAGS_SUB)) \
160 		&& !((status_flags_state) & SLJIT_CURRENT_FLAGS_COMPARE))
161 
162 /* Map the given type to a 4-bit condition code mask. */
get_cc(struct sljit_compiler * compiler,sljit_s32 type)163 static SLJIT_INLINE sljit_u8 get_cc(struct sljit_compiler *compiler, sljit_s32 type) {
164 	const sljit_u8 cc0 = 1 << 3; /* equal {,to zero} */
165 	const sljit_u8 cc1 = 1 << 2; /* less than {,zero} */
166 	const sljit_u8 cc2 = 1 << 1; /* greater than {,zero} */
167 	const sljit_u8 cc3 = 1 << 0; /* {overflow,NaN} */
168 
169 	switch (type) {
170 	case SLJIT_EQUAL:
171 		if (SLJIT_ADD_SUB_NO_COMPARE(compiler->status_flags_state)) {
172 			sljit_s32 type = GET_FLAG_TYPE(compiler->status_flags_state);
173 			if (type >= SLJIT_SIG_LESS && type <= SLJIT_SIG_LESS_EQUAL)
174 				return cc0;
175 			if (type == SLJIT_OVERFLOW)
176 				return (cc0 | cc3);
177 			return (cc0 | cc2);
178 		}
179 		/* fallthrough */
180 
181 	case SLJIT_ATOMIC_STORED:
182 	case SLJIT_F_EQUAL:
183 	case SLJIT_ORDERED_EQUAL:
184 		return cc0;
185 
186 	case SLJIT_NOT_EQUAL:
187 		if (SLJIT_ADD_SUB_NO_COMPARE(compiler->status_flags_state)) {
188 			sljit_s32 type = GET_FLAG_TYPE(compiler->status_flags_state);
189 			if (type >= SLJIT_SIG_LESS && type <= SLJIT_SIG_LESS_EQUAL)
190 				return (cc1 | cc2 | cc3);
191 			if (type == SLJIT_OVERFLOW)
192 				return (cc1 | cc2);
193 			return (cc1 | cc3);
194 		}
195 		/* fallthrough */
196 
197 	case SLJIT_UNORDERED_OR_NOT_EQUAL:
198 		return (cc1 | cc2 | cc3);
199 
200 	case SLJIT_LESS:
201 	case SLJIT_ATOMIC_NOT_STORED:
202 		return cc1;
203 
204 	case SLJIT_GREATER_EQUAL:
205 	case SLJIT_UNORDERED_OR_GREATER_EQUAL:
206 		return (cc0 | cc2 | cc3);
207 
208 	case SLJIT_GREATER:
209 		if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_COMPARE)
210 			return cc2;
211 		return cc3;
212 
213 	case SLJIT_LESS_EQUAL:
214 		if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_COMPARE)
215 			return (cc0 | cc1);
216 		return (cc0 | cc1 | cc2);
217 
218 	case SLJIT_SIG_LESS:
219 	case SLJIT_F_LESS:
220 	case SLJIT_ORDERED_LESS:
221 		return cc1;
222 
223 	case SLJIT_NOT_CARRY:
224 		if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB)
225 			return (cc2 | cc3);
226 		/* fallthrough */
227 
228 	case SLJIT_SIG_LESS_EQUAL:
229 	case SLJIT_F_LESS_EQUAL:
230 	case SLJIT_ORDERED_LESS_EQUAL:
231 		return (cc0 | cc1);
232 
233 	case SLJIT_CARRY:
234 		if (compiler->status_flags_state & SLJIT_CURRENT_FLAGS_SUB)
235 			return (cc0 | cc1);
236 		/* fallthrough */
237 
238 	case SLJIT_SIG_GREATER:
239 	case SLJIT_UNORDERED_OR_GREATER:
240 		/* Overflow is considered greater, see SLJIT_SUB. */
241 		return cc2 | cc3;
242 
243 	case SLJIT_SIG_GREATER_EQUAL:
244 		return (cc0 | cc2 | cc3);
245 
246 	case SLJIT_OVERFLOW:
247 		if (compiler->status_flags_state & SLJIT_SET_Z)
248 			return (cc2 | cc3);
249 		/* fallthrough */
250 
251 	case SLJIT_UNORDERED:
252 		return cc3;
253 
254 	case SLJIT_NOT_OVERFLOW:
255 		if (compiler->status_flags_state & SLJIT_SET_Z)
256 			return (cc0 | cc1);
257 		/* fallthrough */
258 
259 	case SLJIT_ORDERED:
260 		return (cc0 | cc1 | cc2);
261 
262 	case SLJIT_F_NOT_EQUAL:
263 	case SLJIT_ORDERED_NOT_EQUAL:
264 		return (cc1 | cc2);
265 
266 	case SLJIT_F_GREATER:
267 	case SLJIT_ORDERED_GREATER:
268 		return cc2;
269 
270 	case SLJIT_F_GREATER_EQUAL:
271 	case SLJIT_ORDERED_GREATER_EQUAL:
272 		return (cc0 | cc2);
273 
274 	case SLJIT_UNORDERED_OR_LESS_EQUAL:
275 		return (cc0 | cc1 | cc3);
276 
277 	case SLJIT_UNORDERED_OR_EQUAL:
278 		return (cc0 | cc3);
279 
280 	case SLJIT_UNORDERED_OR_LESS:
281 		return (cc1 | cc3);
282 	}
283 
284 	SLJIT_UNREACHABLE();
285 	return (sljit_u8)-1;
286 }
287 
288 /* Facility to bit index mappings.
289    Note: some facilities share the same bit index. */
290 typedef sljit_uw facility_bit;
291 #define STORE_FACILITY_LIST_EXTENDED_FACILITY 7
292 #define FAST_LONG_DISPLACEMENT_FACILITY 19
293 #define EXTENDED_IMMEDIATE_FACILITY 21
294 #define GENERAL_INSTRUCTION_EXTENSION_FACILITY 34
295 #define DISTINCT_OPERAND_FACILITY 45
296 #define HIGH_WORD_FACILITY 45
297 #define POPULATION_COUNT_FACILITY 45
298 #define LOAD_STORE_ON_CONDITION_1_FACILITY 45
299 #define MISCELLANEOUS_INSTRUCTION_EXTENSIONS_1_FACILITY 49
300 #define LOAD_STORE_ON_CONDITION_2_FACILITY 53
301 #define MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY 58
302 #define VECTOR_FACILITY 129
303 #define VECTOR_ENHANCEMENTS_1_FACILITY 135
304 
305 /* Report whether a facility is known to be present due to the compiler
306    settings. This function should always be compiled to a constant
307    value given a constant argument. */
have_facility_static(facility_bit x)308 static SLJIT_INLINE int have_facility_static(facility_bit x)
309 {
310 #if ENABLE_STATIC_FACILITY_DETECTION
311 	switch (x) {
312 	case FAST_LONG_DISPLACEMENT_FACILITY:
313 		return (__ARCH__ >=  6 /* z990 */);
314 	case EXTENDED_IMMEDIATE_FACILITY:
315 	case STORE_FACILITY_LIST_EXTENDED_FACILITY:
316 		return (__ARCH__ >=  7 /* z9-109 */);
317 	case GENERAL_INSTRUCTION_EXTENSION_FACILITY:
318 		return (__ARCH__ >=  8 /* z10 */);
319 	case DISTINCT_OPERAND_FACILITY:
320 		return (__ARCH__ >=  9 /* z196 */);
321 	case MISCELLANEOUS_INSTRUCTION_EXTENSIONS_1_FACILITY:
322 		return (__ARCH__ >= 10 /* zEC12 */);
323 	case LOAD_STORE_ON_CONDITION_2_FACILITY:
324 	case VECTOR_FACILITY:
325 		return (__ARCH__ >= 11 /* z13 */);
326 	case MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY:
327 	case VECTOR_ENHANCEMENTS_1_FACILITY:
328 		return (__ARCH__ >= 12 /* z14 */);
329 	default:
330 		SLJIT_UNREACHABLE();
331 	}
332 #endif
333 	return 0;
334 }
335 
get_hwcap()336 static SLJIT_INLINE unsigned long get_hwcap()
337 {
338 	static unsigned long hwcap = 0;
339 	if (SLJIT_UNLIKELY(!hwcap)) {
340 		hwcap = getauxval(AT_HWCAP);
341 		SLJIT_ASSERT(hwcap != 0);
342 	}
343 	return hwcap;
344 }
345 
have_stfle()346 static SLJIT_INLINE int have_stfle()
347 {
348 	if (have_facility_static(STORE_FACILITY_LIST_EXTENDED_FACILITY))
349 		return 1;
350 
351 	return (get_hwcap() & HWCAP_S390_STFLE);
352 }
353 
354 /* Report whether the given facility is available. This function always
355    performs a runtime check. */
have_facility_dynamic(facility_bit x)356 static int have_facility_dynamic(facility_bit x)
357 {
358 #if ENABLE_DYNAMIC_FACILITY_DETECTION
359 	static struct {
360 		sljit_uw bits[4];
361 	} cpu_features;
362 	size_t size = sizeof(cpu_features);
363 	const sljit_uw word_index = x >> 6;
364 	const sljit_uw bit_index = ((1UL << 63) >> (x & 63));
365 
366 	SLJIT_ASSERT(x < size * 8);
367 	if (SLJIT_UNLIKELY(!have_stfle()))
368 		return 0;
369 
370 	if (SLJIT_UNLIKELY(cpu_features.bits[0] == 0)) {
371 		__asm__ __volatile__ (
372 			"lgr   %%r0, %0;"
373 			"stfle 0(%1);"
374 			/* outputs  */:
375 			/* inputs   */: "d" ((size / 8) - 1), "a" (&cpu_features)
376 			/* clobbers */: "r0", "cc", "memory"
377 		);
378 		SLJIT_ASSERT(cpu_features.bits[0] != 0);
379 	}
380 	return (cpu_features.bits[word_index] & bit_index) != 0;
381 #else
382 	return 0;
383 #endif
384 }
385 
386 #define HAVE_FACILITY(name, bit) \
387 static SLJIT_INLINE int name() \
388 { \
389 	static int have = -1; \
390 	/* Static check first. May allow the function to be optimized away. */ \
391 	if (have_facility_static(bit)) \
392 		have = 1; \
393 	else if (SLJIT_UNLIKELY(have < 0)) \
394 		have = have_facility_dynamic(bit) ? 1 : 0; \
395 \
396 	return have; \
397 }
398 
HAVE_FACILITY(have_eimm,EXTENDED_IMMEDIATE_FACILITY)399 HAVE_FACILITY(have_eimm,    EXTENDED_IMMEDIATE_FACILITY)
400 HAVE_FACILITY(have_ldisp,   FAST_LONG_DISPLACEMENT_FACILITY)
401 HAVE_FACILITY(have_genext,  GENERAL_INSTRUCTION_EXTENSION_FACILITY)
402 HAVE_FACILITY(have_lscond1, LOAD_STORE_ON_CONDITION_1_FACILITY)
403 HAVE_FACILITY(have_lscond2, LOAD_STORE_ON_CONDITION_2_FACILITY)
404 HAVE_FACILITY(have_misc2,   MISCELLANEOUS_INSTRUCTION_EXTENSIONS_2_FACILITY)
405 #undef HAVE_FACILITY
406 
407 #define is_u12(d)	(0 <= (d) && (d) <= 0x00000fffL)
408 #define is_u32(d)	(0 <= (d) && (d) <= 0xffffffffL)
409 
410 #define CHECK_SIGNED(v, bitlen) \
411 	((v) >= -(1 << ((bitlen) - 1)) && (v) < (1 << ((bitlen) - 1)))
412 
413 #define is_s8(d)	CHECK_SIGNED((d), 8)
414 #define is_s16(d)	CHECK_SIGNED((d), 16)
415 #define is_s20(d)	CHECK_SIGNED((d), 20)
416 #define is_s32(d)	((d) == (sljit_s32)(d))
417 
418 static SLJIT_INLINE sljit_ins disp_s20(sljit_s32 d)
419 {
420 	sljit_uw dh, dl;
421 
422 	SLJIT_ASSERT(is_s20(d));
423 
424 	dh = (d >> 12) & 0xff;
425 	dl = ((sljit_uw)d << 8) & 0xfff00;
426 	return (dh | dl) << 8;
427 }
428 
429 /* TODO(carenas): variadic macro is not strictly needed */
430 #define SLJIT_S390X_INSTRUCTION(op, ...) \
431 static SLJIT_INLINE sljit_ins op(__VA_ARGS__)
432 
433 /* RR form instructions. */
434 #define SLJIT_S390X_RR(name, pattern) \
435 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \
436 { \
437 	return (pattern) | ((dst & 0xf) << 4) | (src & 0xf); \
438 }
439 
440 /* AND */
441 SLJIT_S390X_RR(nr,   0x1400)
442 
443 /* BRANCH AND SAVE */
444 SLJIT_S390X_RR(basr, 0x0d00)
445 
446 /* BRANCH ON CONDITION */
447 SLJIT_S390X_RR(bcr,  0x0700) /* TODO(mundaym): type for mask? */
448 
449 /* DIVIDE */
450 SLJIT_S390X_RR(dr,   0x1d00)
451 
452 /* EXCLUSIVE OR */
453 SLJIT_S390X_RR(xr,   0x1700)
454 
455 /* LOAD */
456 SLJIT_S390X_RR(lr,   0x1800)
457 
458 /* LOAD COMPLEMENT */
459 SLJIT_S390X_RR(lcr,  0x1300)
460 
461 /* OR */
462 SLJIT_S390X_RR(or,   0x1600)
463 
464 #undef SLJIT_S390X_RR
465 
466 /* RRE form instructions */
467 #define SLJIT_S390X_RRE(name, pattern) \
468 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src) \
469 { \
470 	return (pattern) | R4A(dst) | R0A(src); \
471 }
472 
473 /* AND */
474 SLJIT_S390X_RRE(ngr,   0xb9800000)
475 
476 /* DIVIDE LOGICAL */
477 SLJIT_S390X_RRE(dlr,   0xb9970000)
478 SLJIT_S390X_RRE(dlgr,  0xb9870000)
479 
480 /* DIVIDE SINGLE */
481 SLJIT_S390X_RRE(dsgr,  0xb90d0000)
482 
483 /* EXCLUSIVE OR */
484 SLJIT_S390X_RRE(xgr,   0xb9820000)
485 
486 /* LOAD */
487 SLJIT_S390X_RRE(lgr,   0xb9040000)
488 SLJIT_S390X_RRE(lgfr,  0xb9140000)
489 
490 /* LOAD BYTE */
491 SLJIT_S390X_RRE(lbr,   0xb9260000)
492 SLJIT_S390X_RRE(lgbr,  0xb9060000)
493 
494 /* LOAD COMPLEMENT */
495 SLJIT_S390X_RRE(lcgr,  0xb9030000)
496 
497 /* LOAD HALFWORD */
498 SLJIT_S390X_RRE(lhr,   0xb9270000)
499 SLJIT_S390X_RRE(lghr,  0xb9070000)
500 
501 /* LOAD LOGICAL */
502 SLJIT_S390X_RRE(llgfr, 0xb9160000)
503 
504 /* LOAD LOGICAL CHARACTER */
505 SLJIT_S390X_RRE(llcr,  0xb9940000)
506 SLJIT_S390X_RRE(llgcr, 0xb9840000)
507 
508 /* LOAD LOGICAL HALFWORD */
509 SLJIT_S390X_RRE(llhr,  0xb9950000)
510 SLJIT_S390X_RRE(llghr, 0xb9850000)
511 
512 /* MULTIPLY LOGICAL */
513 SLJIT_S390X_RRE(mlgr,  0xb9860000)
514 
515 /* MULTIPLY SINGLE */
516 SLJIT_S390X_RRE(msgfr, 0xb91c0000)
517 
518 /* OR */
519 SLJIT_S390X_RRE(ogr,   0xb9810000)
520 
521 /* SUBTRACT */
522 SLJIT_S390X_RRE(sgr,   0xb9090000)
523 
524 #undef SLJIT_S390X_RRE
525 
526 /* RI-a form instructions */
527 #define SLJIT_S390X_RIA(name, pattern, imm_type) \
528 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \
529 { \
530 	return (pattern) | R20A(reg) | (imm & 0xffff); \
531 }
532 
533 /* ADD HALFWORD IMMEDIATE */
534 SLJIT_S390X_RIA(aghi,  0xa70b0000, sljit_s16)
535 
536 /* LOAD HALFWORD IMMEDIATE */
537 SLJIT_S390X_RIA(lhi,   0xa7080000, sljit_s16)
538 SLJIT_S390X_RIA(lghi,  0xa7090000, sljit_s16)
539 
540 /* LOAD LOGICAL IMMEDIATE */
541 SLJIT_S390X_RIA(llihh, 0xa50c0000, sljit_u16)
542 SLJIT_S390X_RIA(llihl, 0xa50d0000, sljit_u16)
543 SLJIT_S390X_RIA(llilh, 0xa50e0000, sljit_u16)
544 SLJIT_S390X_RIA(llill, 0xa50f0000, sljit_u16)
545 
546 /* MULTIPLY HALFWORD IMMEDIATE */
547 SLJIT_S390X_RIA(mhi,   0xa70c0000, sljit_s16)
548 SLJIT_S390X_RIA(mghi,  0xa70d0000, sljit_s16)
549 
550 /* OR IMMEDIATE */
551 SLJIT_S390X_RIA(oilh,  0xa50a0000, sljit_u16)
552 
553 #undef SLJIT_S390X_RIA
554 
555 /* RIL-a form instructions (requires extended immediate facility) */
556 #define SLJIT_S390X_RILA(name, pattern, imm_type) \
557 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, imm_type imm) \
558 { \
559 	SLJIT_ASSERT(have_eimm()); \
560 	return (pattern) | R36A(reg) | ((sljit_ins)imm & 0xffffffffu); \
561 }
562 
563 /* ADD IMMEDIATE */
564 SLJIT_S390X_RILA(agfi,  0xc20800000000, sljit_s32)
565 
566 /* ADD IMMEDIATE HIGH */
567 SLJIT_S390X_RILA(aih,   0xcc0800000000, sljit_s32) /* TODO(mundaym): high-word facility? */
568 
569 /* AND IMMEDIATE */
570 SLJIT_S390X_RILA(nihf,  0xc00a00000000, sljit_u32)
571 
572 /* EXCLUSIVE OR IMMEDIATE */
573 SLJIT_S390X_RILA(xilf,  0xc00700000000, sljit_u32)
574 
575 /* INSERT IMMEDIATE */
576 SLJIT_S390X_RILA(iihf,  0xc00800000000, sljit_u32)
577 SLJIT_S390X_RILA(iilf,  0xc00900000000, sljit_u32)
578 
579 /* LOAD IMMEDIATE */
580 SLJIT_S390X_RILA(lgfi,  0xc00100000000, sljit_s32)
581 
582 /* LOAD LOGICAL IMMEDIATE */
583 SLJIT_S390X_RILA(llihf, 0xc00e00000000, sljit_u32)
584 SLJIT_S390X_RILA(llilf, 0xc00f00000000, sljit_u32)
585 
586 /* SUBTRACT LOGICAL IMMEDIATE */
587 SLJIT_S390X_RILA(slfi,  0xc20500000000, sljit_u32)
588 
589 #undef SLJIT_S390X_RILA
590 
591 /* RX-a form instructions */
592 #define SLJIT_S390X_RXA(name, pattern) \
593 SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \
594 { \
595 	SLJIT_ASSERT((d & 0xfff) == d); \
596 \
597 	return (pattern) | R20A(r) | R16A(x) | R12A(b) | (sljit_ins)(d & 0xfff); \
598 }
599 
600 /* LOAD */
601 SLJIT_S390X_RXA(l,   0x58000000)
602 
603 /* LOAD ADDRESS */
604 SLJIT_S390X_RXA(la,  0x41000000)
605 
606 /* LOAD HALFWORD */
607 SLJIT_S390X_RXA(lh,  0x48000000)
608 
609 /* MULTIPLY SINGLE */
610 SLJIT_S390X_RXA(ms,  0x71000000)
611 
612 /* STORE */
613 SLJIT_S390X_RXA(st,  0x50000000)
614 
615 /* STORE CHARACTER */
616 SLJIT_S390X_RXA(stc, 0x42000000)
617 
618 /* STORE HALFWORD */
619 SLJIT_S390X_RXA(sth, 0x40000000)
620 
621 #undef SLJIT_S390X_RXA
622 
623 /* RXY-a instructions */
624 #define SLJIT_S390X_RXYA(name, pattern, cond) \
625 SLJIT_S390X_INSTRUCTION(name, sljit_gpr r, sljit_s32 d, sljit_gpr x, sljit_gpr b) \
626 { \
627 	SLJIT_ASSERT(cond); \
628 \
629 	return (pattern) | R36A(r) | R32A(x) | R28A(b) | disp_s20(d); \
630 }
631 
632 /* LOAD */
633 SLJIT_S390X_RXYA(ly,    0xe30000000058, have_ldisp())
634 SLJIT_S390X_RXYA(lg,    0xe30000000004, 1)
635 SLJIT_S390X_RXYA(lgf,   0xe30000000014, 1)
636 
637 /* LOAD BYTE */
638 SLJIT_S390X_RXYA(lb,    0xe30000000076, have_ldisp())
639 SLJIT_S390X_RXYA(lgb,   0xe30000000077, have_ldisp())
640 
641 /* LOAD HALFWORD */
642 SLJIT_S390X_RXYA(lhy,   0xe30000000078, have_ldisp())
643 SLJIT_S390X_RXYA(lgh,   0xe30000000015, 1)
644 
645 /* LOAD LOGICAL */
646 SLJIT_S390X_RXYA(llgf,  0xe30000000016, 1)
647 
648 /* LOAD LOGICAL CHARACTER */
649 SLJIT_S390X_RXYA(llc,   0xe30000000094, have_eimm())
650 SLJIT_S390X_RXYA(llgc,  0xe30000000090, 1)
651 
652 /* LOAD LOGICAL HALFWORD */
653 SLJIT_S390X_RXYA(llh,   0xe30000000095, have_eimm())
654 SLJIT_S390X_RXYA(llgh,  0xe30000000091, 1)
655 
656 /* MULTIPLY SINGLE */
657 SLJIT_S390X_RXYA(msy,   0xe30000000051, have_ldisp())
658 SLJIT_S390X_RXYA(msg,   0xe3000000000c, 1)
659 
660 /* STORE */
661 SLJIT_S390X_RXYA(sty,   0xe30000000050, have_ldisp())
662 SLJIT_S390X_RXYA(stg,   0xe30000000024, 1)
663 
664 /* STORE CHARACTER */
665 SLJIT_S390X_RXYA(stcy,  0xe30000000072, have_ldisp())
666 
667 /* STORE HALFWORD */
668 SLJIT_S390X_RXYA(sthy,  0xe30000000070, have_ldisp())
669 
670 #undef SLJIT_S390X_RXYA
671 
672 /* RSY-a instructions */
673 #define SLJIT_S390X_RSYA(name, pattern, cond) \
674 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_s32 d, sljit_gpr b) \
675 { \
676 	SLJIT_ASSERT(cond); \
677 \
678 	return (pattern) | R36A(dst) | R32A(src) | R28A(b) | disp_s20(d); \
679 }
680 
681 /* LOAD MULTIPLE */
682 SLJIT_S390X_RSYA(lmg,   0xeb0000000004, 1)
683 
684 /* SHIFT LEFT LOGICAL */
685 SLJIT_S390X_RSYA(sllg,  0xeb000000000d, 1)
686 
687 /* SHIFT RIGHT SINGLE */
688 SLJIT_S390X_RSYA(srag,  0xeb000000000a, 1)
689 
690 /* STORE MULTIPLE */
691 SLJIT_S390X_RSYA(stmg,  0xeb0000000024, 1)
692 
693 #undef SLJIT_S390X_RSYA
694 
695 /* RIE-f instructions (require general-instructions-extension facility) */
696 #define SLJIT_S390X_RIEF(name, pattern) \
697 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot) \
698 { \
699 	sljit_ins i3, i4, i5; \
700 \
701 	SLJIT_ASSERT(have_genext()); \
702 	i3 = (sljit_ins)start << 24; \
703 	i4 = (sljit_ins)end << 16; \
704 	i5 = (sljit_ins)rot << 8; \
705 \
706 	return (pattern) | R36A(dst & 0xf) | R32A(src & 0xf) | i3 | i4 | i5; \
707 }
708 
709 /* ROTATE THEN AND SELECTED BITS */
710 /* SLJIT_S390X_RIEF(rnsbg,  0xec0000000054) */
711 
712 /* ROTATE THEN EXCLUSIVE OR SELECTED BITS */
713 /* SLJIT_S390X_RIEF(rxsbg,  0xec0000000057) */
714 
715 /* ROTATE THEN OR SELECTED BITS */
716 SLJIT_S390X_RIEF(rosbg,  0xec0000000056)
717 
718 /* ROTATE THEN INSERT SELECTED BITS */
719 /* SLJIT_S390X_RIEF(risbg,  0xec0000000055) */
720 /* SLJIT_S390X_RIEF(risbgn, 0xec0000000059) */
721 
722 /* ROTATE THEN INSERT SELECTED BITS HIGH */
723 SLJIT_S390X_RIEF(risbhg, 0xec000000005d)
724 
725 /* ROTATE THEN INSERT SELECTED BITS LOW */
726 /* SLJIT_S390X_RIEF(risblg, 0xec0000000051) */
727 
728 #undef SLJIT_S390X_RIEF
729 
730 /* RRF-c instructions (require load/store-on-condition 1 facility) */
731 #define SLJIT_S390X_RRFC(name, pattern) \
732 SLJIT_S390X_INSTRUCTION(name, sljit_gpr dst, sljit_gpr src, sljit_uw mask) \
733 { \
734 	sljit_ins m3; \
735 \
736 	SLJIT_ASSERT(have_lscond1()); \
737 	m3 = (sljit_ins)(mask & 0xf) << 12; \
738 \
739 	return (pattern) | m3 | R4A(dst) | R0A(src); \
740 }
741 
742 /* LOAD HALFWORD IMMEDIATE ON CONDITION */
743 SLJIT_S390X_RRFC(locr,  0xb9f20000)
744 SLJIT_S390X_RRFC(locgr, 0xb9e20000)
745 
746 #undef SLJIT_S390X_RRFC
747 
748 /* RIE-g instructions (require load/store-on-condition 2 facility) */
749 #define SLJIT_S390X_RIEG(name, pattern) \
750 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw imm, sljit_uw mask) \
751 { \
752 	sljit_ins m3, i2; \
753 \
754 	SLJIT_ASSERT(have_lscond2()); \
755 	m3 = (sljit_ins)(mask & 0xf) << 32; \
756 	i2 = (sljit_ins)(imm & 0xffffL) << 16; \
757 \
758 	return (pattern) | R36A(reg) | m3 | i2; \
759 }
760 
761 /* LOAD HALFWORD IMMEDIATE ON CONDITION */
762 SLJIT_S390X_RIEG(lochi,  0xec0000000042)
763 SLJIT_S390X_RIEG(locghi, 0xec0000000046)
764 
765 #undef SLJIT_S390X_RIEG
766 
767 #define SLJIT_S390X_RILB(name, pattern, cond) \
768 SLJIT_S390X_INSTRUCTION(name, sljit_gpr reg, sljit_sw ri) \
769 { \
770 	SLJIT_ASSERT(cond); \
771 \
772 	return (pattern) | R36A(reg) | (sljit_ins)(ri & 0xffffffff); \
773 }
774 
775 /* BRANCH RELATIVE AND SAVE LONG */
776 SLJIT_S390X_RILB(brasl, 0xc00500000000, 1)
777 
778 /* LOAD ADDRESS RELATIVE LONG */
779 SLJIT_S390X_RILB(larl,  0xc00000000000, 1)
780 
781 /* LOAD RELATIVE LONG */
782 SLJIT_S390X_RILB(lgrl,  0xc40800000000, have_genext())
783 
784 #undef SLJIT_S390X_RILB
785 
SLJIT_S390X_INSTRUCTION(br,sljit_gpr target)786 SLJIT_S390X_INSTRUCTION(br, sljit_gpr target)
787 {
788 	return 0x07f0 | target;
789 }
790 
SLJIT_S390X_INSTRUCTION(brc,sljit_uw mask,sljit_sw target)791 SLJIT_S390X_INSTRUCTION(brc, sljit_uw mask, sljit_sw target)
792 {
793 	sljit_ins m1 = (sljit_ins)(mask & 0xf) << 20;
794 	sljit_ins ri2 = (sljit_ins)target & 0xffff;
795 	return 0xa7040000L | m1 | ri2;
796 }
797 
SLJIT_S390X_INSTRUCTION(brcl,sljit_uw mask,sljit_sw target)798 SLJIT_S390X_INSTRUCTION(brcl, sljit_uw mask, sljit_sw target)
799 {
800 	sljit_ins m1 = (sljit_ins)(mask & 0xf) << 36;
801 	sljit_ins ri2 = (sljit_ins)target & 0xffffffff;
802 	return 0xc00400000000L | m1 | ri2;
803 }
804 
SLJIT_S390X_INSTRUCTION(flogr,sljit_gpr dst,sljit_gpr src)805 SLJIT_S390X_INSTRUCTION(flogr, sljit_gpr dst, sljit_gpr src)
806 {
807 	SLJIT_ASSERT(have_eimm());
808 	return 0xb9830000 | R8A(dst) | R0A(src);
809 }
810 
811 /* INSERT PROGRAM MASK */
SLJIT_S390X_INSTRUCTION(ipm,sljit_gpr dst)812 SLJIT_S390X_INSTRUCTION(ipm, sljit_gpr dst)
813 {
814 	return 0xb2220000 | R4A(dst);
815 }
816 
817 /* SET PROGRAM MASK */
SLJIT_S390X_INSTRUCTION(spm,sljit_gpr dst)818 SLJIT_S390X_INSTRUCTION(spm, sljit_gpr dst)
819 {
820 	return 0x0400 | R4A(dst);
821 }
822 
823 /* ROTATE THEN INSERT SELECTED BITS HIGH (ZERO) */
SLJIT_S390X_INSTRUCTION(risbhgz,sljit_gpr dst,sljit_gpr src,sljit_u8 start,sljit_u8 end,sljit_u8 rot)824 SLJIT_S390X_INSTRUCTION(risbhgz, sljit_gpr dst, sljit_gpr src, sljit_u8 start, sljit_u8 end, sljit_u8 rot)
825 {
826 	return risbhg(dst, src, start, 0x8 | end, rot);
827 }
828 
829 #undef SLJIT_S390X_INSTRUCTION
830 
update_zero_overflow(struct sljit_compiler * compiler,sljit_s32 op,sljit_gpr dst_r)831 static sljit_s32 update_zero_overflow(struct sljit_compiler *compiler, sljit_s32 op, sljit_gpr dst_r)
832 {
833 	/* Condition codes: bits 18 and 19.
834 	   Transformation:
835 	     0 (zero and no overflow) : unchanged
836 	     1 (non-zero and no overflow) : unchanged
837 	     2 (zero and overflow) : decreased by 1
838 	     3 (non-zero and overflow) : decreased by 1 if non-zero */
839 	FAIL_IF(push_inst(compiler, brc(0xc, 2 + 2 + ((op & SLJIT_32) ? 1 : 2) + 2 + 3 + 1)));
840 	FAIL_IF(push_inst(compiler, ipm(tmp1)));
841 	FAIL_IF(push_inst(compiler, (op & SLJIT_32) ? or(dst_r, dst_r) : ogr(dst_r, dst_r)));
842 	FAIL_IF(push_inst(compiler, brc(0x8, 2 + 3)));
843 	FAIL_IF(push_inst(compiler, slfi(tmp1, 0x10000000)));
844 	FAIL_IF(push_inst(compiler, spm(tmp1)));
845 	return SLJIT_SUCCESS;
846 }
847 
848 /* load 64-bit immediate into register without clobbering flags */
push_load_imm_inst(struct sljit_compiler * compiler,sljit_gpr target,sljit_sw v)849 static sljit_s32 push_load_imm_inst(struct sljit_compiler *compiler, sljit_gpr target, sljit_sw v)
850 {
851 	/* 4 byte instructions */
852 	if (is_s16(v))
853 		return push_inst(compiler, lghi(target, (sljit_s16)v));
854 
855 	if (((sljit_uw)v & ~(sljit_uw)0x000000000000ffff) == 0)
856 		return push_inst(compiler, llill(target, (sljit_u16)v));
857 
858 	if (((sljit_uw)v & ~(sljit_uw)0x00000000ffff0000) == 0)
859 		return push_inst(compiler, llilh(target, (sljit_u16)(v >> 16)));
860 
861 	if (((sljit_uw)v & ~(sljit_uw)0x0000ffff00000000) == 0)
862 		return push_inst(compiler, llihl(target, (sljit_u16)(v >> 32)));
863 
864 	if (((sljit_uw)v & ~(sljit_uw)0xffff000000000000) == 0)
865 		return push_inst(compiler, llihh(target, (sljit_u16)(v >> 48)));
866 
867 	if (is_s32(v))
868 		return push_inst(compiler, lgfi(target, (sljit_s32)v));
869 
870 	if (((sljit_uw)v >> 32) == 0)
871 		return push_inst(compiler, llilf(target, (sljit_u32)v));
872 
873 	if (((sljit_uw)v << 32) == 0)
874 		return push_inst(compiler, llihf(target, (sljit_u32)((sljit_uw)v >> 32)));
875 
876 	FAIL_IF(push_inst(compiler, llilf(target, (sljit_u32)v)));
877 	return push_inst(compiler, iihf(target, (sljit_u32)(v >> 32)));
878 }
879 
880 struct addr {
881 	sljit_gpr base;
882 	sljit_gpr index;
883 	sljit_s32 offset;
884 };
885 
886 /* transform memory operand into D(X,B) form with a signed 20-bit offset */
make_addr_bxy(struct sljit_compiler * compiler,struct addr * addr,sljit_s32 mem,sljit_sw off,sljit_gpr tmp)887 static sljit_s32 make_addr_bxy(struct sljit_compiler *compiler,
888 	struct addr *addr, sljit_s32 mem, sljit_sw off,
889 	sljit_gpr tmp /* clobbered, must not be r0 */)
890 {
891 	sljit_gpr base = r0;
892 	sljit_gpr index = r0;
893 
894 	SLJIT_ASSERT(tmp != r0);
895 	if (mem & REG_MASK)
896 		base = gpr(mem & REG_MASK);
897 
898 	if (mem & OFFS_REG_MASK) {
899 		index = gpr(OFFS_REG(mem));
900 		if (off != 0) {
901 			/* shift and put the result into tmp */
902 			SLJIT_ASSERT(0 <= off && off < 64);
903 			FAIL_IF(push_inst(compiler, sllg(tmp, index, (sljit_s32)off, 0)));
904 			index = tmp;
905 			off = 0; /* clear offset */
906 		}
907 	}
908 	else if (!is_s20(off)) {
909 		FAIL_IF(push_load_imm_inst(compiler, tmp, off));
910 		index = tmp;
911 		off = 0; /* clear offset */
912 	}
913 	addr->base = base;
914 	addr->index = index;
915 	addr->offset = (sljit_s32)off;
916 	return SLJIT_SUCCESS;
917 }
918 
919 /* transform memory operand into D(X,B) form with an unsigned 12-bit offset */
make_addr_bx(struct sljit_compiler * compiler,struct addr * addr,sljit_s32 mem,sljit_sw off,sljit_gpr tmp)920 static sljit_s32 make_addr_bx(struct sljit_compiler *compiler,
921 	struct addr *addr, sljit_s32 mem, sljit_sw off,
922 	sljit_gpr tmp /* clobbered, must not be r0 */)
923 {
924 	sljit_gpr base = r0;
925 	sljit_gpr index = r0;
926 
927 	SLJIT_ASSERT(tmp != r0);
928 	if (mem & REG_MASK)
929 		base = gpr(mem & REG_MASK);
930 
931 	if (mem & OFFS_REG_MASK) {
932 		index = gpr(OFFS_REG(mem));
933 		if (off != 0) {
934 			/* shift and put the result into tmp */
935 			SLJIT_ASSERT(0 <= off && off < 64);
936 			FAIL_IF(push_inst(compiler, sllg(tmp, index, (sljit_s32)off, 0)));
937 			index = tmp;
938 			off = 0; /* clear offset */
939 		}
940 	}
941 	else if (!is_u12(off)) {
942 		FAIL_IF(push_load_imm_inst(compiler, tmp, off));
943 		index = tmp;
944 		off = 0; /* clear offset */
945 	}
946 	addr->base = base;
947 	addr->index = index;
948 	addr->offset = (sljit_s32)off;
949 	return SLJIT_SUCCESS;
950 }
951 
952 #define EVAL(op, r, addr) op(r, addr.offset, addr.index, addr.base)
953 #define WHEN(cond, r, i1, i2, addr) \
954 	(cond) ? EVAL(i1, r, addr) : EVAL(i2, r, addr)
955 
956 /* May clobber tmp1. */
load_store_op(struct sljit_compiler * compiler,sljit_gpr reg,sljit_s32 mem,sljit_sw memw,sljit_s32 is_32bit,const sljit_ins * forms)957 static sljit_s32 load_store_op(struct sljit_compiler *compiler, sljit_gpr reg,
958 		sljit_s32 mem, sljit_sw memw,
959 		sljit_s32 is_32bit, const sljit_ins* forms)
960 {
961 	struct addr addr;
962 
963 	SLJIT_ASSERT(mem & SLJIT_MEM);
964 
965 	if (is_32bit && ((mem & OFFS_REG_MASK) || is_u12(memw) || !is_s20(memw))) {
966 		FAIL_IF(make_addr_bx(compiler, &addr, mem, memw, tmp1));
967 		return push_inst(compiler, forms[0] | R20A(reg) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset);
968 	}
969 
970 	FAIL_IF(make_addr_bxy(compiler, &addr, mem, memw, tmp1));
971 	return push_inst(compiler, (is_32bit ? forms[1] : forms[2]) | R36A(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
972 }
973 
974 static const sljit_ins load_forms[3] = {
975 	0x58000000 /* l */,
976 	0xe30000000058 /* ly */,
977 	0xe30000000004 /* lg */
978 };
979 
980 static const sljit_ins store_forms[3] = {
981 	0x50000000 /* st */,
982 	0xe30000000050 /* sty */,
983 	0xe30000000024 /* stg */
984 };
985 
986 static const sljit_ins load_halfword_forms[3] = {
987 	0x48000000 /* lh */,
988 	0xe30000000078 /* lhy */,
989 	0xe30000000015 /* lgh */
990 };
991 
992 /* May clobber tmp1. */
load_word(struct sljit_compiler * compiler,sljit_gpr dst_r,sljit_s32 src,sljit_sw srcw,sljit_s32 is_32bit)993 static SLJIT_INLINE sljit_s32 load_word(struct sljit_compiler *compiler, sljit_gpr dst_r,
994 		sljit_s32 src, sljit_sw srcw,
995 		sljit_s32 is_32bit)
996 {
997 	return load_store_op(compiler, dst_r, src, srcw, is_32bit, load_forms);
998 }
999 
1000 /* May clobber tmp1. */
load_unsigned_word(struct sljit_compiler * compiler,sljit_gpr dst_r,sljit_s32 src,sljit_sw srcw,sljit_s32 is_32bit)1001 static sljit_s32 load_unsigned_word(struct sljit_compiler *compiler, sljit_gpr dst_r,
1002 		sljit_s32 src, sljit_sw srcw,
1003 		sljit_s32 is_32bit)
1004 {
1005 	struct addr addr;
1006 	sljit_ins ins;
1007 
1008 	SLJIT_ASSERT(src & SLJIT_MEM);
1009 
1010 	FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
1011 
1012 	ins = is_32bit ? 0xe30000000016 /* llgf */ : 0xe30000000004 /* lg */;
1013 	return push_inst(compiler, ins | R36A(dst_r) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
1014 }
1015 
1016 /* May clobber tmp1. */
store_word(struct sljit_compiler * compiler,sljit_gpr src_r,sljit_s32 dst,sljit_sw dstw,sljit_s32 is_32bit)1017 static SLJIT_INLINE sljit_s32 store_word(struct sljit_compiler *compiler, sljit_gpr src_r,
1018 		sljit_s32 dst, sljit_sw dstw,
1019 		sljit_s32 is_32bit)
1020 {
1021 	return load_store_op(compiler, src_r, dst, dstw, is_32bit, store_forms);
1022 }
1023 
1024 #undef WHEN
1025 
emit_move(struct sljit_compiler * compiler,sljit_gpr dst_r,sljit_s32 src,sljit_sw srcw)1026 static sljit_s32 emit_move(struct sljit_compiler *compiler,
1027 	sljit_gpr dst_r,
1028 	sljit_s32 src, sljit_sw srcw)
1029 {
1030 	sljit_gpr src_r;
1031 
1032 	SLJIT_ASSERT(!IS_GPR_REG(src) || dst_r != gpr(src & REG_MASK));
1033 
1034 	if (src == SLJIT_IMM)
1035 		return push_load_imm_inst(compiler, dst_r, srcw);
1036 
1037 	if (src & SLJIT_MEM)
1038 		return load_word(compiler, dst_r, src, srcw, (compiler->mode & SLJIT_32) != 0);
1039 
1040 	src_r = gpr(src & REG_MASK);
1041 	return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, src_r) : lgr(dst_r, src_r));
1042 }
1043 
emit_rr(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1044 static sljit_s32 emit_rr(struct sljit_compiler *compiler, sljit_ins ins,
1045 	sljit_s32 dst,
1046 	sljit_s32 src1, sljit_sw src1w,
1047 	sljit_s32 src2, sljit_sw src2w)
1048 {
1049 	sljit_gpr dst_r = tmp0;
1050 	sljit_gpr src_r = tmp1;
1051 	sljit_s32 needs_move = 1;
1052 
1053 	if (FAST_IS_REG(dst)) {
1054 		dst_r = gpr(dst);
1055 
1056 		if (dst == src1)
1057 			needs_move = 0;
1058 		else if (dst == src2) {
1059 			dst_r = tmp0;
1060 			needs_move = 2;
1061 		}
1062 	}
1063 
1064 	if (needs_move)
1065 		FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
1066 
1067 	if (FAST_IS_REG(src2))
1068 		src_r = gpr(src2);
1069 	else
1070 		FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
1071 
1072 	FAIL_IF(push_inst(compiler, ins | R4A(dst_r) | R0A(src_r)));
1073 
1074 	if (needs_move != 2)
1075 		return SLJIT_SUCCESS;
1076 
1077 	dst_r = gpr(dst & REG_MASK);
1078 	return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0));
1079 }
1080 
emit_rr1(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w)1081 static sljit_s32 emit_rr1(struct sljit_compiler *compiler, sljit_ins ins,
1082 	sljit_s32 dst,
1083 	sljit_s32 src1, sljit_sw src1w)
1084 {
1085 	sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
1086 	sljit_gpr src_r = tmp1;
1087 
1088 	if (FAST_IS_REG(src1))
1089 		src_r = gpr(src1);
1090 	else
1091 		FAIL_IF(emit_move(compiler, tmp1, src1, src1w));
1092 
1093 	return push_inst(compiler, ins | R4A(dst_r) | R0A(src_r));
1094 }
1095 
emit_rrf(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1096 static sljit_s32 emit_rrf(struct sljit_compiler *compiler, sljit_ins ins,
1097 	sljit_s32 dst,
1098 	sljit_s32 src1, sljit_sw src1w,
1099 	sljit_s32 src2, sljit_sw src2w)
1100 {
1101 	sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
1102 	sljit_gpr src1_r = tmp0;
1103 	sljit_gpr src2_r = tmp1;
1104 
1105 	if (FAST_IS_REG(src1))
1106 		src1_r = gpr(src1);
1107 	else
1108 		FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
1109 
1110 	if (FAST_IS_REG(src2))
1111 		src2_r = gpr(src2);
1112 	else
1113 		FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
1114 
1115 	return push_inst(compiler, ins | R4A(dst_r) | R0A(src1_r) | R12A(src2_r));
1116 }
1117 
1118 typedef enum {
1119 	RI_A,
1120 	RIL_A,
1121 } emit_ril_type;
1122 
emit_ri(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_sw src2w,emit_ril_type type)1123 static sljit_s32 emit_ri(struct sljit_compiler *compiler, sljit_ins ins,
1124 	sljit_s32 dst,
1125 	sljit_s32 src1, sljit_sw src1w,
1126 	sljit_sw src2w,
1127 	emit_ril_type type)
1128 {
1129 	sljit_gpr dst_r = tmp0;
1130 	sljit_s32 needs_move = 1;
1131 
1132 	if (FAST_IS_REG(dst)) {
1133 		dst_r = gpr(dst);
1134 
1135 		if (dst == src1)
1136 			needs_move = 0;
1137 	}
1138 
1139 	if (needs_move)
1140 		FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
1141 
1142 	if (type == RIL_A)
1143 		return push_inst(compiler, ins | R36A(dst_r) | (src2w & 0xffffffff));
1144 	return push_inst(compiler, ins | R20A(dst_r) | (src2w & 0xffff));
1145 }
1146 
emit_rie_d(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_sw src2w)1147 static sljit_s32 emit_rie_d(struct sljit_compiler *compiler, sljit_ins ins,
1148 	sljit_s32 dst,
1149 	sljit_s32 src1, sljit_sw src1w,
1150 	sljit_sw src2w)
1151 {
1152 	sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
1153 	sljit_gpr src_r = tmp0;
1154 
1155 	if (!FAST_IS_REG(src1))
1156 		FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
1157 	else
1158 		src_r = gpr(src1 & REG_MASK);
1159 
1160 	return push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | (sljit_ins)(src2w & 0xffff) << 16);
1161 }
1162 
1163 typedef enum {
1164 	RX_A,
1165 	RXY_A,
1166 } emit_rx_type;
1167 
emit_rx(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w,emit_rx_type type)1168 static sljit_s32 emit_rx(struct sljit_compiler *compiler, sljit_ins ins,
1169 	sljit_s32 dst,
1170 	sljit_s32 src1, sljit_sw src1w,
1171 	sljit_s32 src2, sljit_sw src2w,
1172 	emit_rx_type type)
1173 {
1174 	sljit_gpr dst_r = tmp0;
1175 	sljit_s32 needs_move = 1;
1176 	sljit_gpr base, index;
1177 
1178 	SLJIT_ASSERT(src2 & SLJIT_MEM);
1179 
1180 	if (FAST_IS_REG(dst)) {
1181 		dst_r = gpr(dst);
1182 
1183 		if (dst == src1)
1184 			needs_move = 0;
1185 		else if (dst == (src2 & REG_MASK) || (dst == OFFS_REG(src2))) {
1186 			dst_r = tmp0;
1187 			needs_move = 2;
1188 		}
1189 	}
1190 
1191 	if (needs_move)
1192 		FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
1193 
1194 	base = gpr(src2 & REG_MASK);
1195 	index = tmp0;
1196 
1197 	if (src2 & OFFS_REG_MASK) {
1198 		index = gpr(OFFS_REG(src2));
1199 
1200 		if (src2w != 0) {
1201 			FAIL_IF(push_inst(compiler, sllg(tmp1, index, src2w & 0x3, 0)));
1202 			src2w = 0;
1203 			index = tmp1;
1204 		}
1205 	} else if ((type == RX_A && !is_u12(src2w)) || (type == RXY_A && !is_s20(src2w))) {
1206 		FAIL_IF(push_load_imm_inst(compiler, tmp1, src2w));
1207 
1208 		if (src2 & REG_MASK)
1209 			index = tmp1;
1210 		else
1211 			base = tmp1;
1212 		src2w = 0;
1213 	}
1214 
1215 	if (type == RX_A)
1216 		ins |= R20A(dst_r) | R16A(index) | R12A(base) | (sljit_ins)src2w;
1217 	else
1218 		ins |= R36A(dst_r) | R32A(index) | R28A(base) | disp_s20((sljit_s32)src2w);
1219 
1220 	FAIL_IF(push_inst(compiler, ins));
1221 
1222 	if (needs_move != 2)
1223 		return SLJIT_SUCCESS;
1224 
1225 	dst_r = gpr(dst);
1226 	return push_inst(compiler, (compiler->mode & SLJIT_32) ? lr(dst_r, tmp0) : lgr(dst_r, tmp0));
1227 }
1228 
emit_siy(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_sw dstw,sljit_sw srcw)1229 static sljit_s32 emit_siy(struct sljit_compiler *compiler, sljit_ins ins,
1230 	sljit_s32 dst, sljit_sw dstw,
1231 	sljit_sw srcw)
1232 {
1233 	sljit_gpr dst_r = tmp1;
1234 
1235 	SLJIT_ASSERT(dst & SLJIT_MEM);
1236 
1237 	if (dst & OFFS_REG_MASK) {
1238 		sljit_gpr index = tmp1;
1239 
1240 		if ((dstw & 0x3) == 0)
1241 			index = gpr(OFFS_REG(dst));
1242 		else
1243 			FAIL_IF(push_inst(compiler, sllg(tmp1, index, dstw & 0x3, 0)));
1244 
1245 		FAIL_IF(push_inst(compiler, la(tmp1, 0, dst_r, index)));
1246 		dstw = 0;
1247 	}
1248 	else if (!is_s20(dstw)) {
1249 		FAIL_IF(push_load_imm_inst(compiler, tmp1, dstw));
1250 
1251 		if (dst & REG_MASK)
1252 			FAIL_IF(push_inst(compiler, la(tmp1, 0, dst_r, tmp1)));
1253 
1254 		dstw = 0;
1255 	}
1256 	else
1257 		dst_r = gpr(dst & REG_MASK);
1258 
1259 	return push_inst(compiler, ins | ((sljit_ins)(srcw & 0xff) << 32) | R28A(dst_r) | disp_s20((sljit_s32)dstw));
1260 }
1261 
1262 struct ins_forms {
1263 	sljit_ins op_r;
1264 	sljit_ins op_gr;
1265 	sljit_ins op_rk;
1266 	sljit_ins op_grk;
1267 	sljit_ins op;
1268 	sljit_ins op_y;
1269 	sljit_ins op_g;
1270 };
1271 
emit_commutative(struct sljit_compiler * compiler,const struct ins_forms * forms,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1272 static sljit_s32 emit_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms,
1273 	sljit_s32 dst,
1274 	sljit_s32 src1, sljit_sw src1w,
1275 	sljit_s32 src2, sljit_sw src2w)
1276 {
1277 	sljit_s32 mode = compiler->mode;
1278 	sljit_ins ins, ins_k;
1279 
1280 	if ((src1 | src2) & SLJIT_MEM) {
1281 		sljit_ins ins12, ins20;
1282 
1283 		if (mode & SLJIT_32) {
1284 			ins12 = forms->op;
1285 			ins20 = forms->op_y;
1286 		}
1287 		else {
1288 			ins12 = 0;
1289 			ins20 = forms->op_g;
1290 		}
1291 
1292 		if (ins12 && ins20) {
1293 			/* Extra instructions needed for address computation can be executed independently. */
1294 			if ((src2 & SLJIT_MEM) && (!(src1 & SLJIT_MEM)
1295 					|| ((src1 & OFFS_REG_MASK) ? (src1w & 0x3) == 0 : is_s20(src1w)))) {
1296 				if ((src2 & OFFS_REG_MASK) || is_u12(src2w) || !is_s20(src2w))
1297 					return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);
1298 
1299 				return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);
1300 			}
1301 
1302 			if (src1 & SLJIT_MEM) {
1303 				if ((src1 & OFFS_REG_MASK) || is_u12(src1w) || !is_s20(src1w))
1304 					return emit_rx(compiler, ins12, dst, src2, src2w, src1, src1w, RX_A);
1305 
1306 				return emit_rx(compiler, ins20, dst, src2, src2w, src1, src1w, RXY_A);
1307 			}
1308 		}
1309 		else if (ins12 || ins20) {
1310 			emit_rx_type rx_type;
1311 
1312 			if (ins12) {
1313 				rx_type = RX_A;
1314 				ins = ins12;
1315 			}
1316 			else {
1317 				rx_type = RXY_A;
1318 				ins = ins20;
1319 			}
1320 
1321 			if ((src2 & SLJIT_MEM) && (!(src1 & SLJIT_MEM)
1322 					|| ((src1 & OFFS_REG_MASK) ? (src1w & 0x3) == 0 : (rx_type == RX_A ? is_u12(src1w) : is_s20(src1w)))))
1323 				return emit_rx(compiler, ins, dst, src1, src1w, src2, src2w, rx_type);
1324 
1325 			if (src1 & SLJIT_MEM)
1326 				return emit_rx(compiler, ins, dst, src2, src2w, src1, src1w, rx_type);
1327 		}
1328 	}
1329 
1330 	if (mode & SLJIT_32) {
1331 		ins = forms->op_r;
1332 		ins_k = forms->op_rk;
1333 	}
1334 	else {
1335 		ins = forms->op_gr;
1336 		ins_k = forms->op_grk;
1337 	}
1338 
1339 	SLJIT_ASSERT(ins != 0 || ins_k != 0);
1340 
1341 	if (ins && FAST_IS_REG(dst)) {
1342 		if (dst == src1)
1343 			return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w);
1344 
1345 		if (dst == src2)
1346 			return emit_rr(compiler, ins, dst, src2, src2w, src1, src1w);
1347 	}
1348 
1349 	if (ins_k == 0)
1350 		return emit_rr(compiler, ins, dst, src1, src1w, src2, src2w);
1351 
1352 	return emit_rrf(compiler, ins_k, dst, src1, src1w, src2, src2w);
1353 }
1354 
emit_non_commutative(struct sljit_compiler * compiler,const struct ins_forms * forms,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1355 static sljit_s32 emit_non_commutative(struct sljit_compiler *compiler, const struct ins_forms *forms,
1356 	sljit_s32 dst,
1357 	sljit_s32 src1, sljit_sw src1w,
1358 	sljit_s32 src2, sljit_sw src2w)
1359 {
1360 	sljit_s32 mode = compiler->mode;
1361 	sljit_ins ins;
1362 
1363 	if (src2 & SLJIT_MEM) {
1364 		sljit_ins ins12, ins20;
1365 
1366 		if (mode & SLJIT_32) {
1367 			ins12 = forms->op;
1368 			ins20 = forms->op_y;
1369 		}
1370 		else {
1371 			ins12 = 0;
1372 			ins20 = forms->op_g;
1373 		}
1374 
1375 		if (ins12 && ins20) {
1376 			if ((src2 & OFFS_REG_MASK) || is_u12(src2w) || !is_s20(src2w))
1377 				return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);
1378 
1379 			return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);
1380 		}
1381 		else if (ins12)
1382 			return emit_rx(compiler, ins12, dst, src1, src1w, src2, src2w, RX_A);
1383 		else if (ins20)
1384 			return emit_rx(compiler, ins20, dst, src1, src1w, src2, src2w, RXY_A);
1385 	}
1386 
1387 	ins = (mode & SLJIT_32) ? forms->op_rk : forms->op_grk;
1388 
1389 	if (ins == 0 || (FAST_IS_REG(dst) && dst == src1))
1390 		return emit_rr(compiler, (mode & SLJIT_32) ? forms->op_r : forms->op_gr, dst, src1, src1w, src2, src2w);
1391 
1392 	return emit_rrf(compiler, ins, dst, src1, src1w, src2, src2w);
1393 }
1394 
sljit_generate_code(struct sljit_compiler * compiler,sljit_s32 options,void * exec_allocator_data)1395 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler, sljit_s32 options, void *exec_allocator_data)
1396 {
1397 	struct sljit_label *label;
1398 	struct sljit_jump *jump;
1399 	struct sljit_const *const_;
1400 	sljit_sw executable_offset;
1401 	sljit_uw ins_size = compiler->size << 1;
1402 	sljit_uw pool_size = 0; /* literal pool */
1403 	sljit_uw pad_size;
1404 	sljit_uw half_count;
1405 	SLJIT_NEXT_DEFINE_TYPES;
1406 	struct sljit_memory_fragment *buf;
1407 	sljit_ins *buf_ptr;
1408 	sljit_ins *buf_end;
1409 	sljit_u16 *code;
1410 	sljit_u16 *code_ptr;
1411 	sljit_uw *pool, *pool_ptr;
1412 	sljit_ins ins;
1413 	sljit_sw source, offset;
1414 
1415 	CHECK_ERROR_PTR();
1416 	CHECK_PTR(check_sljit_generate_code(compiler));
1417 	reverse_buf(compiler);
1418 
1419 	jump = compiler->jumps;
1420 	while (jump != NULL) {
1421 		if (jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR | JUMP_MOV_ADDR)) {
1422 			/* encoded: */
1423 			/*   brasl %r14, <rel_addr> (or brcl <mask>, <rel_addr>) */
1424 			/* replace with: */
1425 			/*   lgrl %r1, <pool_addr> */
1426 			/*   bras %r14, %r1 (or bcr <mask>, %r1) */
1427 			pool_size += sizeof(*pool);
1428 			if (!(jump->flags & JUMP_MOV_ADDR))
1429 				ins_size += 2;
1430 		}
1431 		jump = jump->next;
1432 	}
1433 
1434 	const_ = compiler->consts;
1435 	while (const_) {
1436 		pool_size += sizeof(*pool);
1437 		const_ = const_->next;
1438 	}
1439 
1440 	/* pad code size to 8 bytes so is accessible with half word offsets */
1441 	/* the literal pool needs to be doubleword aligned */
1442 	pad_size = ((ins_size + 7UL) & ~7UL) - ins_size;
1443 	SLJIT_ASSERT(pad_size < 8UL);
1444 
1445 	/* allocate target buffer */
1446 	code = (sljit_u16*)allocate_executable_memory(ins_size + pad_size + pool_size, options, exec_allocator_data, &executable_offset);
1447 	PTR_FAIL_WITH_EXEC_IF(code);
1448 	code_ptr = code;
1449 
1450 	/* TODO(carenas): pool is optional, and the ABI recommends it to
1451          *                be created before the function code, instead of
1452          *                globally; if generated code is too big could
1453          *                need offsets bigger than 32bit words and asser()
1454          */
1455 	pool = (sljit_uw *)((sljit_uw)code + ins_size + pad_size);
1456 	pool_ptr = pool;
1457 	buf = compiler->buf;
1458 	half_count = 0;
1459 
1460 	label = compiler->labels;
1461 	jump = compiler->jumps;
1462 	const_ = compiler->consts;
1463 	SLJIT_NEXT_INIT_TYPES();
1464 	SLJIT_GET_NEXT_MIN();
1465 
1466 	do {
1467 		buf_ptr = (sljit_ins*)buf->memory;
1468 		buf_end = buf_ptr + (buf->used_size >> 3);
1469 		do {
1470 			ins = *buf_ptr++;
1471 
1472 			if (next_min_addr == half_count) {
1473 				SLJIT_ASSERT(!label || label->size >= half_count);
1474 				SLJIT_ASSERT(!jump || jump->addr >= half_count);
1475 				SLJIT_ASSERT(!const_ || const_->addr >= half_count);
1476 
1477 				if (next_min_addr == next_label_size) {
1478 					label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1479 					label = label->next;
1480 					next_label_size = SLJIT_GET_NEXT_SIZE(label);
1481 				}
1482 
1483 				if (next_min_addr == next_jump_addr) {
1484 					if (SLJIT_UNLIKELY(jump->flags & JUMP_MOV_ADDR)) {
1485 						source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1486 
1487 						jump->addr = (sljit_uw)pool_ptr;
1488 
1489 						/* store target into pool */
1490 						offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source;
1491 						pool_ptr++;
1492 
1493 						SLJIT_ASSERT(!(offset & 1));
1494 						offset >>= 1;
1495 						SLJIT_ASSERT(is_s32(offset));
1496 						ins |= (sljit_ins)offset & 0xffffffff;
1497 					} else if (jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR)) {
1498 						sljit_ins arg;
1499 
1500 						jump->addr = (sljit_uw)pool_ptr;
1501 
1502 						/* load address into tmp1 */
1503 						source = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1504 						offset = (sljit_sw)SLJIT_ADD_EXEC_OFFSET(pool_ptr, executable_offset) - source;
1505 
1506 						SLJIT_ASSERT(!(offset & 1));
1507 						offset >>= 1;
1508 						SLJIT_ASSERT(is_s32(offset));
1509 
1510 						code_ptr[0] = (sljit_u16)(0xc408 | R4A(tmp1) /* lgrl */);
1511 						code_ptr[1] = (sljit_u16)(offset >> 16);
1512 						code_ptr[2] = (sljit_u16)offset;
1513 						code_ptr += 3;
1514 						pool_ptr++;
1515 
1516 						/* branch to tmp1 */
1517 						arg = (ins >> 36) & 0xf;
1518 						if (((ins >> 32) & 0xf) == 4) {
1519 							/* brcl -> bcr */
1520 							ins = bcr(arg, tmp1);
1521 						} else {
1522 							SLJIT_ASSERT(((ins >> 32) & 0xf) == 5);
1523 							/* brasl -> basr */
1524 							ins = basr(arg, tmp1);
1525 						}
1526 
1527 						/* Adjust half_count. */
1528 						half_count += 2;
1529 					} else
1530 						jump->addr = (sljit_uw)code_ptr;
1531 
1532 					jump = jump->next;
1533 					next_jump_addr = SLJIT_GET_NEXT_ADDRESS(jump);
1534 				} else  if (next_min_addr == next_const_addr) {
1535 					/* update instruction with relative address of constant */
1536 					source = (sljit_sw)code_ptr;
1537 					offset = (sljit_sw)pool_ptr - source;
1538 
1539 					SLJIT_ASSERT(!(offset & 0x1));
1540 					offset >>= 1; /* halfword (not byte) offset */
1541 					SLJIT_ASSERT(is_s32(offset));
1542 
1543 					ins |= (sljit_ins)offset & 0xffffffff;
1544 
1545 					/* update address */
1546 					const_->addr = (sljit_uw)pool_ptr;
1547 
1548 					/* store initial value into pool and update pool address */
1549 					*(pool_ptr++) = (sljit_uw)(((struct sljit_s390x_const*)const_)->init_value);
1550 
1551 					/* move to next constant */
1552 					const_ = const_->next;
1553 					next_const_addr = SLJIT_GET_NEXT_ADDRESS(const_);
1554 				}
1555 
1556 				SLJIT_GET_NEXT_MIN();
1557 			}
1558 
1559 			if (ins & 0xffff00000000L) {
1560 				*code_ptr++ = (sljit_u16)(ins >> 32);
1561 				half_count++;
1562 			}
1563 
1564 			if (ins & 0xffffffff0000L) {
1565 				*code_ptr++ = (sljit_u16)(ins >> 16);
1566 				half_count++;
1567 			}
1568 
1569 			*code_ptr++ = (sljit_u16)ins;
1570 			half_count++;
1571 		} while (buf_ptr < buf_end);
1572 
1573 		buf = buf->next;
1574 	} while (buf);
1575 
1576 	if (next_label_size == half_count) {
1577 		label->u.addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1578 		label = label->next;
1579 	}
1580 
1581 	SLJIT_ASSERT(!label);
1582 	SLJIT_ASSERT(!jump);
1583 	SLJIT_ASSERT(!const_);
1584 	SLJIT_ASSERT(code + (ins_size >> 1) == code_ptr);
1585 	SLJIT_ASSERT((sljit_u8 *)pool + pool_size == (sljit_u8 *)pool_ptr);
1586 
1587 	jump = compiler->jumps;
1588 	while (jump != NULL) {
1589 		offset = (sljit_sw)((jump->flags & JUMP_ADDR) ? jump->u.target : jump->u.label->u.addr);
1590 
1591 		if (jump->flags & (SLJIT_REWRITABLE_JUMP | JUMP_ADDR | JUMP_MOV_ADDR)) {
1592 			/* Store jump target into pool. */
1593 			*(sljit_uw*)(jump->addr) = (sljit_uw)offset;
1594 		} else {
1595 			code_ptr = (sljit_u16*)jump->addr;
1596 			offset -= (sljit_sw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1597 
1598 			/* offset must be halfword aligned */
1599 			SLJIT_ASSERT(!(offset & 1));
1600 			offset >>= 1;
1601 			SLJIT_ASSERT(is_s32(offset)); /* TODO(mundaym): handle arbitrary offsets */
1602 
1603 			code_ptr[1] = (sljit_u16)(offset >> 16);
1604 			code_ptr[2] = (sljit_u16)offset;
1605 		}
1606 		jump = jump->next;
1607 	}
1608 
1609 	compiler->error = SLJIT_ERR_COMPILED;
1610 	compiler->executable_offset = executable_offset;
1611 	compiler->executable_size = ins_size;
1612 	if (pool_size)
1613 		compiler->executable_size += (pad_size + pool_size);
1614 
1615 	code = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
1616 	code_ptr = (sljit_u16 *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
1617 	SLJIT_CACHE_FLUSH(code, code_ptr);
1618 	SLJIT_UPDATE_WX_FLAGS(code, code_ptr, 1);
1619 	return code;
1620 }
1621 
sljit_has_cpu_feature(sljit_s32 feature_type)1622 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
1623 {
1624 	/* TODO(mundaym): implement all */
1625 	switch (feature_type) {
1626 	case SLJIT_HAS_FPU:
1627 #ifdef SLJIT_IS_FPU_AVAILABLE
1628 		return (SLJIT_IS_FPU_AVAILABLE) != 0;
1629 #else
1630 		return 1;
1631 #endif /* SLJIT_IS_FPU_AVAILABLE */
1632 
1633 	case SLJIT_HAS_CLZ:
1634 	case SLJIT_HAS_REV:
1635 	case SLJIT_HAS_ROT:
1636 	case SLJIT_HAS_PREFETCH:
1637 	case SLJIT_HAS_COPY_F32:
1638 	case SLJIT_HAS_COPY_F64:
1639 	case SLJIT_HAS_SIMD:
1640 	case SLJIT_HAS_ATOMIC:
1641 		return 1;
1642 
1643 	case SLJIT_HAS_CTZ:
1644 		return 2;
1645 
1646 	case SLJIT_HAS_CMOV:
1647 		return have_lscond1() ? 1 : 0;
1648 	}
1649 	return 0;
1650 }
1651 
sljit_cmp_info(sljit_s32 type)1652 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_cmp_info(sljit_s32 type)
1653 {
1654 	SLJIT_UNUSED_ARG(type);
1655 	return 0;
1656 }
1657 
1658 /* --------------------------------------------------------------------- */
1659 /*  Entry, exit                                                          */
1660 /* --------------------------------------------------------------------- */
1661 
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1662 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
1663 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1664 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1665 {
1666 	sljit_s32 saved_arg_count = SLJIT_KEPT_SAVEDS_COUNT(options);
1667 	sljit_s32 offset, i, tmp;
1668 
1669 	CHECK_ERROR();
1670 	CHECK(check_sljit_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1671 	set_emit_enter(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1672 
1673 	/* Saved registers are stored in callee allocated save area. */
1674 	SLJIT_ASSERT(gpr(SLJIT_FIRST_SAVED_REG) == r6 && gpr(SLJIT_S0) == r13);
1675 
1676 	offset = 2 * SSIZE_OF(sw);
1677 	if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) {
1678 		if (saved_arg_count == 0) {
1679 			FAIL_IF(push_inst(compiler, stmg(r6, r14, offset, r15)));
1680 			offset += 9 * SSIZE_OF(sw);
1681 		} else {
1682 			FAIL_IF(push_inst(compiler, stmg(r6, r13 - (sljit_gpr)saved_arg_count, offset, r15)));
1683 			offset += (8 - saved_arg_count) * SSIZE_OF(sw);
1684 		}
1685 	} else {
1686 		if (scratches == SLJIT_FIRST_SAVED_REG) {
1687 			FAIL_IF(push_inst(compiler, stg(r6, offset, 0, r15)));
1688 			offset += SSIZE_OF(sw);
1689 		} else if (scratches > SLJIT_FIRST_SAVED_REG) {
1690 			FAIL_IF(push_inst(compiler, stmg(r6, r6 + (sljit_gpr)(scratches - SLJIT_FIRST_SAVED_REG), offset, r15)));
1691 			offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw);
1692 		}
1693 
1694 		if (saved_arg_count == 0) {
1695 			if (saveds == 0) {
1696 				FAIL_IF(push_inst(compiler, stg(r14, offset, 0, r15)));
1697 				offset += SSIZE_OF(sw);
1698 			} else {
1699 				FAIL_IF(push_inst(compiler, stmg(r14 - (sljit_gpr)saveds, r14, offset, r15)));
1700 				offset += (saveds + 1) * SSIZE_OF(sw);
1701 			}
1702 		} else if (saveds > saved_arg_count) {
1703 			if (saveds == saved_arg_count + 1) {
1704 				FAIL_IF(push_inst(compiler, stg(r14 - (sljit_gpr)saveds, offset, 0, r15)));
1705 				offset += SSIZE_OF(sw);
1706 			} else {
1707 				FAIL_IF(push_inst(compiler, stmg(r14 - (sljit_gpr)saveds, r13 - (sljit_gpr)saved_arg_count, offset, r15)));
1708 				offset += (saveds - saved_arg_count) * SSIZE_OF(sw);
1709 			}
1710 		}
1711 	}
1712 
1713 	if (saved_arg_count > 0) {
1714 		FAIL_IF(push_inst(compiler, stg(r14, offset, 0, r15)));
1715 		offset += SSIZE_OF(sw);
1716 	}
1717 
1718 	tmp = SLJIT_FS0 - fsaveds;
1719 	for (i = SLJIT_FS0; i > tmp; i--) {
1720 		FAIL_IF(push_inst(compiler, 0x60000000 /* std */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1721 		offset += SSIZE_OF(sw);
1722 	}
1723 
1724 	for (i = fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
1725 		FAIL_IF(push_inst(compiler, 0x60000000 /* std */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1726 		offset += SSIZE_OF(sw);
1727 	}
1728 
1729 	local_size = (local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + 0xf) & ~0xf;
1730 	compiler->local_size = local_size;
1731 
1732 	if (is_s20(-local_size))
1733 		FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(-local_size)));
1734 	else
1735 		FAIL_IF(push_inst(compiler, 0xc20400000000 /* slgfi */ | R36A(r15) | (sljit_ins)local_size));
1736 
1737 	if (options & SLJIT_ENTER_REG_ARG)
1738 		return SLJIT_SUCCESS;
1739 
1740 	arg_types >>= SLJIT_ARG_SHIFT;
1741 	saved_arg_count = 0;
1742 	tmp = 0;
1743 	while (arg_types > 0) {
1744 		if ((arg_types & SLJIT_ARG_MASK) < SLJIT_ARG_TYPE_F64) {
1745 			if (!(arg_types & SLJIT_ARG_TYPE_SCRATCH_REG)) {
1746 				FAIL_IF(push_inst(compiler, lgr(gpr(SLJIT_S0 - saved_arg_count), gpr(SLJIT_R0 + tmp))));
1747 				saved_arg_count++;
1748 			}
1749 			tmp++;
1750 		}
1751 
1752 		arg_types >>= SLJIT_ARG_SHIFT;
1753 	}
1754 
1755 	return SLJIT_SUCCESS;
1756 }
1757 
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 arg_types,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)1758 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
1759 	sljit_s32 options, sljit_s32 arg_types, sljit_s32 scratches, sljit_s32 saveds,
1760 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
1761 {
1762 	CHECK_ERROR();
1763 	CHECK(check_sljit_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size));
1764 	set_set_context(compiler, options, arg_types, scratches, saveds, fscratches, fsaveds, local_size);
1765 
1766 	compiler->local_size = (local_size + SLJIT_S390X_DEFAULT_STACK_FRAME_SIZE + 0xf) & ~0xf;
1767 	return SLJIT_SUCCESS;
1768 }
1769 
emit_stack_frame_release(struct sljit_compiler * compiler,sljit_gpr last_reg)1770 static sljit_s32 emit_stack_frame_release(struct sljit_compiler *compiler, sljit_gpr last_reg)
1771 {
1772 	sljit_s32 offset, i, tmp;
1773 	sljit_s32 local_size = compiler->local_size;
1774 	sljit_s32 saveds = compiler->saveds;
1775 	sljit_s32 scratches = compiler->scratches;
1776 	sljit_s32 kept_saveds_count = SLJIT_KEPT_SAVEDS_COUNT(compiler->options);
1777 
1778 	if (is_u12(local_size))
1779 		FAIL_IF(push_inst(compiler, 0x41000000 /* ly */ | R20A(r15) | R12A(r15) | (sljit_ins)local_size));
1780 	else if (is_s20(local_size))
1781 		FAIL_IF(push_inst(compiler, 0xe30000000071 /* lay */ | R36A(r15) | R28A(r15) | disp_s20(local_size)));
1782 	else
1783 		FAIL_IF(push_inst(compiler, 0xc20a00000000 /* algfi */ | R36A(r15) | (sljit_ins)local_size));
1784 
1785 	offset = 2 * SSIZE_OF(sw);
1786 	if (saveds + scratches >= SLJIT_NUMBER_OF_REGISTERS) {
1787 		if (kept_saveds_count == 0) {
1788 			FAIL_IF(push_inst(compiler, lmg(r6, last_reg, offset, r15)));
1789 			offset += 9 * SSIZE_OF(sw);
1790 		} else {
1791 			FAIL_IF(push_inst(compiler, lmg(r6, r13 - (sljit_gpr)kept_saveds_count, offset, r15)));
1792 			offset += (8 - kept_saveds_count) * SSIZE_OF(sw);
1793 		}
1794 	} else {
1795 		if (scratches == SLJIT_FIRST_SAVED_REG) {
1796 			FAIL_IF(push_inst(compiler, lg(r6, offset, 0, r15)));
1797 			offset += SSIZE_OF(sw);
1798 		} else if (scratches > SLJIT_FIRST_SAVED_REG) {
1799 			FAIL_IF(push_inst(compiler, lmg(r6, r6 + (sljit_gpr)(scratches - SLJIT_FIRST_SAVED_REG), offset, r15)));
1800 			offset += (scratches - (SLJIT_FIRST_SAVED_REG - 1)) * SSIZE_OF(sw);
1801 		}
1802 
1803 		if (kept_saveds_count == 0) {
1804 			if (saveds == 0) {
1805 				if (last_reg == r14)
1806 					FAIL_IF(push_inst(compiler, lg(r14, offset, 0, r15)));
1807 				offset += SSIZE_OF(sw);
1808 			} else if (saveds == 1 && last_reg == r13) {
1809 				FAIL_IF(push_inst(compiler, lg(r13, offset, 0, r15)));
1810 				offset += 2 * SSIZE_OF(sw);
1811 			} else {
1812 				FAIL_IF(push_inst(compiler, lmg(r14 - (sljit_gpr)saveds, last_reg, offset, r15)));
1813 				offset += (saveds + 1) * SSIZE_OF(sw);
1814 			}
1815 		} else if (saveds > kept_saveds_count) {
1816 			if (saveds == kept_saveds_count + 1) {
1817 				FAIL_IF(push_inst(compiler, lg(r14 - (sljit_gpr)saveds, offset, 0, r15)));
1818 				offset += SSIZE_OF(sw);
1819 			} else {
1820 				FAIL_IF(push_inst(compiler, lmg(r14 - (sljit_gpr)saveds, r13 - (sljit_gpr)kept_saveds_count, offset, r15)));
1821 				offset += (saveds - kept_saveds_count) * SSIZE_OF(sw);
1822 			}
1823 		}
1824 	}
1825 
1826 	if (kept_saveds_count > 0) {
1827 		if (last_reg == r14)
1828 			FAIL_IF(push_inst(compiler, lg(r14, offset, 0, r15)));
1829 		offset += SSIZE_OF(sw);
1830 	}
1831 
1832 	tmp = SLJIT_FS0 - compiler->fsaveds;
1833 	for (i = SLJIT_FS0; i > tmp; i--) {
1834 		FAIL_IF(push_inst(compiler, 0x68000000 /* ld */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1835 		offset += SSIZE_OF(sw);
1836 	}
1837 
1838 	for (i = compiler->fscratches; i >= SLJIT_FIRST_SAVED_FLOAT_REG; i--) {
1839 		FAIL_IF(push_inst(compiler, 0x68000000 /* ld */ | F20(i) | R12A(r15) | (sljit_ins)offset));
1840 		offset += SSIZE_OF(sw);
1841 	}
1842 
1843 	return SLJIT_SUCCESS;
1844 }
1845 
sljit_emit_return_void(struct sljit_compiler * compiler)1846 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_void(struct sljit_compiler *compiler)
1847 {
1848 	CHECK_ERROR();
1849 	CHECK(check_sljit_emit_return_void(compiler));
1850 
1851 	FAIL_IF(emit_stack_frame_release(compiler, r14));
1852 	return push_inst(compiler, br(r14)); /* return */
1853 }
1854 
sljit_emit_return_to(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)1855 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return_to(struct sljit_compiler *compiler,
1856 	sljit_s32 src, sljit_sw srcw)
1857 {
1858 	CHECK_ERROR();
1859 	CHECK(check_sljit_emit_return_to(compiler, src, srcw));
1860 
1861 	if (src & SLJIT_MEM) {
1862 		ADJUST_LOCAL_OFFSET(src, srcw);
1863 		FAIL_IF(load_word(compiler, tmp1, src, srcw, 0 /* 64-bit */));
1864 		src = TMP_REG2;
1865 		srcw = 0;
1866 	} else if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
1867 		FAIL_IF(push_inst(compiler, lgr(tmp1, gpr(src))));
1868 		src = TMP_REG2;
1869 		srcw = 0;
1870 	}
1871 
1872 	FAIL_IF(emit_stack_frame_release(compiler, r13));
1873 
1874 	SLJIT_SKIP_CHECKS(compiler);
1875 	return sljit_emit_ijump(compiler, SLJIT_JUMP, src, srcw);
1876 }
1877 
1878 /* --------------------------------------------------------------------- */
1879 /*  Operators                                                            */
1880 /* --------------------------------------------------------------------- */
1881 
sljit_emit_op0(struct sljit_compiler * compiler,sljit_s32 op)1882 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
1883 {
1884 	sljit_gpr arg0 = gpr(SLJIT_R0);
1885 	sljit_gpr arg1 = gpr(SLJIT_R1);
1886 
1887 	CHECK_ERROR();
1888 	CHECK(check_sljit_emit_op0(compiler, op));
1889 
1890 	op = GET_OPCODE(op) | (op & SLJIT_32);
1891 	switch (op) {
1892 	case SLJIT_BREAKPOINT:
1893 		/* The following invalid instruction is emitted by gdb. */
1894 		return push_inst(compiler, 0x0001 /* 2-byte trap */);
1895 	case SLJIT_NOP:
1896 		return push_inst(compiler, 0x0700 /* 2-byte nop */);
1897 	case SLJIT_LMUL_UW:
1898 		FAIL_IF(push_inst(compiler, mlgr(arg0, arg0)));
1899 		break;
1900 	case SLJIT_LMUL_SW:
1901 		/* signed multiplication from: */
1902 		/* Hacker's Delight, Second Edition: Chapter 8-3. */
1903 		FAIL_IF(push_inst(compiler, srag(tmp0, arg0, 63, 0)));
1904 		FAIL_IF(push_inst(compiler, srag(tmp1, arg1, 63, 0)));
1905 		FAIL_IF(push_inst(compiler, ngr(tmp0, arg1)));
1906 		FAIL_IF(push_inst(compiler, ngr(tmp1, arg0)));
1907 
1908 		/* unsigned multiplication */
1909 		FAIL_IF(push_inst(compiler, mlgr(arg0, arg0)));
1910 
1911 		FAIL_IF(push_inst(compiler, sgr(arg0, tmp0)));
1912 		FAIL_IF(push_inst(compiler, sgr(arg0, tmp1)));
1913 		break;
1914 	case SLJIT_DIV_U32:
1915 	case SLJIT_DIVMOD_U32:
1916 		FAIL_IF(push_inst(compiler, lhi(tmp0, 0)));
1917 		FAIL_IF(push_inst(compiler, lr(tmp1, arg0)));
1918 		FAIL_IF(push_inst(compiler, dlr(tmp0, arg1)));
1919 		FAIL_IF(push_inst(compiler, lr(arg0, tmp1))); /* quotient */
1920 		if (op == SLJIT_DIVMOD_U32)
1921 			return push_inst(compiler, lr(arg1, tmp0)); /* remainder */
1922 
1923 		return SLJIT_SUCCESS;
1924 	case SLJIT_DIV_S32:
1925 	case SLJIT_DIVMOD_S32:
1926 		FAIL_IF(push_inst(compiler, lhi(tmp0, 0)));
1927 		FAIL_IF(push_inst(compiler, lr(tmp1, arg0)));
1928 		FAIL_IF(push_inst(compiler, dr(tmp0, arg1)));
1929 		FAIL_IF(push_inst(compiler, lr(arg0, tmp1))); /* quotient */
1930 		if (op == SLJIT_DIVMOD_S32)
1931 			return push_inst(compiler, lr(arg1, tmp0)); /* remainder */
1932 
1933 		return SLJIT_SUCCESS;
1934 	case SLJIT_DIV_UW:
1935 	case SLJIT_DIVMOD_UW:
1936 		FAIL_IF(push_inst(compiler, lghi(tmp0, 0)));
1937 		FAIL_IF(push_inst(compiler, lgr(tmp1, arg0)));
1938 		FAIL_IF(push_inst(compiler, dlgr(tmp0, arg1)));
1939 		FAIL_IF(push_inst(compiler, lgr(arg0, tmp1))); /* quotient */
1940 		if (op == SLJIT_DIVMOD_UW)
1941 			return push_inst(compiler, lgr(arg1, tmp0)); /* remainder */
1942 
1943 		return SLJIT_SUCCESS;
1944 	case SLJIT_DIV_SW:
1945 	case SLJIT_DIVMOD_SW:
1946 		FAIL_IF(push_inst(compiler, lgr(tmp1, arg0)));
1947 		FAIL_IF(push_inst(compiler, dsgr(tmp0, arg1)));
1948 		FAIL_IF(push_inst(compiler, lgr(arg0, tmp1))); /* quotient */
1949 		if (op == SLJIT_DIVMOD_SW)
1950 			return push_inst(compiler, lgr(arg1, tmp0)); /* remainder */
1951 
1952 		return SLJIT_SUCCESS;
1953 	case SLJIT_ENDBR:
1954 		return SLJIT_SUCCESS;
1955 	case SLJIT_SKIP_FRAMES_BEFORE_RETURN:
1956 		return SLJIT_SUCCESS;
1957 	default:
1958 		SLJIT_UNREACHABLE();
1959 	}
1960 	/* swap result registers */
1961 	FAIL_IF(push_inst(compiler, lgr(tmp0, arg0)));
1962 	FAIL_IF(push_inst(compiler, lgr(arg0, arg1)));
1963 	return push_inst(compiler, lgr(arg1, tmp0));
1964 }
1965 
sljit_emit_clz_ctz(struct sljit_compiler * compiler,sljit_s32 op,sljit_gpr dst_r,sljit_gpr src_r)1966 static sljit_s32 sljit_emit_clz_ctz(struct sljit_compiler *compiler, sljit_s32 op, sljit_gpr dst_r, sljit_gpr src_r)
1967 {
1968 	sljit_s32 is_ctz = (GET_OPCODE(op) == SLJIT_CTZ);
1969 
1970 	if ((op & SLJIT_32) && src_r != tmp0) {
1971 		FAIL_IF(push_inst(compiler, 0xb9160000 /* llgfr */ | R4A(tmp0) | R0A(src_r)));
1972 		src_r = tmp0;
1973 	}
1974 
1975 	if (is_ctz) {
1976 		FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */) | R4A(tmp1) | R0A(src_r)));
1977 
1978 		if (src_r == tmp0)
1979 			FAIL_IF(push_inst(compiler, ((op & SLJIT_32) ? 0x1400 /* nr */ : 0xb9800000 /* ngr */) | R4A(tmp0) | R0A(tmp1)));
1980 		else
1981 			FAIL_IF(push_inst(compiler, 0xb9e40000 /* ngrk */ | R12A(tmp1) | R4A(tmp0) | R0A(src_r)));
1982 
1983 		src_r = tmp0;
1984 	}
1985 
1986 	FAIL_IF(push_inst(compiler, 0xb9830000 /* flogr */ | R4A(tmp0) | R0A(src_r)));
1987 
1988 	if (is_ctz)
1989 		FAIL_IF(push_inst(compiler, 0xec00000000d9 /* aghik */ | R36A(tmp1) | R32A(tmp0) | ((sljit_ins)(-64 & 0xffff) << 16)));
1990 
1991 	if (op & SLJIT_32) {
1992 		if (!is_ctz && dst_r != tmp0)
1993 			return push_inst(compiler, 0xec00000000d9 /* aghik */ | R36A(dst_r) | R32A(tmp0) | ((sljit_ins)(-32 & 0xffff) << 16));
1994 
1995 		FAIL_IF(push_inst(compiler, 0xc20800000000 /* agfi */ | R36A(tmp0) | (sljit_u32)-32));
1996 	}
1997 
1998 	if (is_ctz)
1999 		FAIL_IF(push_inst(compiler, 0xec0000000057 /* rxsbg */ | R36A(tmp0) | R32A(tmp1) | ((sljit_ins)((op & SLJIT_32) ? 59 : 58) << 24) | (63 << 16) | ((sljit_ins)((op & SLJIT_32) ? 5 : 6) << 8)));
2000 
2001 	if (dst_r == tmp0)
2002 		return SLJIT_SUCCESS;
2003 
2004 	return push_inst(compiler, ((op & SLJIT_32) ? 0x1800 /* lr */ : 0xb9040000 /* lgr */) | R4A(dst_r) | R0A(tmp0));
2005 }
2006 
sljit_emit_rev(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2007 static sljit_s32 sljit_emit_rev(struct sljit_compiler *compiler, sljit_s32 op,
2008 	sljit_s32 dst, sljit_sw dstw,
2009 	sljit_s32 src, sljit_sw srcw)
2010 {
2011 	struct addr addr;
2012 	sljit_gpr reg;
2013 	sljit_ins ins;
2014 	sljit_s32 opcode = GET_OPCODE(op);
2015 	sljit_s32 is_16bit = (opcode == SLJIT_REV_U16 || opcode == SLJIT_REV_S16);
2016 
2017 	if (dst & SLJIT_MEM) {
2018 		if (src & SLJIT_MEM) {
2019 			FAIL_IF(load_store_op(compiler, tmp0, src, srcw, op & SLJIT_32, is_16bit ? load_halfword_forms : load_forms));
2020 			reg = tmp0;
2021 		} else
2022 			reg = gpr(src);
2023 
2024 		FAIL_IF(make_addr_bxy(compiler, &addr, dst, dstw, tmp1));
2025 
2026 		if (is_16bit)
2027 			ins = 0xe3000000003f /* strvh */;
2028 		else
2029 			ins = (op & SLJIT_32) ? 0xe3000000003e /* strv */ : 0xe3000000002f /* strvg */;
2030 
2031 		return push_inst(compiler, ins | R36A(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
2032 	}
2033 
2034 	reg = gpr(dst);
2035 
2036 	if (src & SLJIT_MEM) {
2037 		FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
2038 
2039 		if (is_16bit)
2040 			ins = 0xe3000000001f /* lrvh */;
2041 		else
2042 			ins = (op & SLJIT_32) ? 0xe3000000001e /* lrv */ : 0xe3000000000f /* lrvg */;
2043 
2044 		FAIL_IF(push_inst(compiler, ins | R36A(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset)));
2045 
2046 		if (opcode == SLJIT_REV)
2047 			return SLJIT_SUCCESS;
2048 
2049 		if (is_16bit) {
2050 			if (op & SLJIT_32)
2051 				ins = (opcode == SLJIT_REV_U16) ? 0xb9950000 /* llhr */ : 0xb9270000 /* lhr */;
2052 			else
2053 				ins = (opcode == SLJIT_REV_U16) ? 0xb9850000 /* llghr */ : 0xb9070000 /* lghr */;
2054 		} else
2055 			ins = (opcode == SLJIT_REV_U32) ? 0xb9160000 /* llgfr */ : 0xb9140000 /* lgfr */;
2056 
2057 		return push_inst(compiler, ins | R4A(reg) | R0A(reg));
2058 	}
2059 
2060 	ins = (op & SLJIT_32) ? 0xb91f0000 /* lrvr */ : 0xb90f0000 /* lrvgr */;
2061 	FAIL_IF(push_inst(compiler, ins | R4A(reg) | R0A(gpr(src))));
2062 
2063 	if (opcode == SLJIT_REV)
2064 		return SLJIT_SUCCESS;
2065 
2066 	if (!is_16bit) {
2067 		ins = (opcode == SLJIT_REV_U32) ? 0xb9160000 /* llgfr */ : 0xb9140000 /* lgfr */;
2068 		return push_inst(compiler, ins | R4A(reg) | R0A(reg));
2069 	}
2070 
2071 	if (op & SLJIT_32) {
2072 		ins = (opcode == SLJIT_REV_U16) ? 0x88000000 /* srl */ : 0x8a000000 /* sra */;
2073 		return push_inst(compiler, ins | R20A(reg) | 16);
2074 	}
2075 
2076 	ins = (opcode == SLJIT_REV_U16) ? 0xeb000000000c /* srlg */ : 0xeb000000000a /* srag */;
2077 	return push_inst(compiler, ins | R36A(reg) | R32A(reg) | (48 << 16));
2078 }
2079 
2080 /* LEVAL will be defined later with different parameters as needed */
2081 #define WHEN2(cond, i1, i2) (cond) ? LEVAL(i1) : LEVAL(i2)
2082 
sljit_emit_op1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)2083 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
2084 	sljit_s32 dst, sljit_sw dstw,
2085 	sljit_s32 src, sljit_sw srcw)
2086 {
2087 	sljit_ins ins;
2088 	struct addr mem;
2089 	sljit_gpr dst_r;
2090 	sljit_gpr src_r;
2091 	sljit_s32 opcode = GET_OPCODE(op);
2092 
2093 	CHECK_ERROR();
2094 	CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
2095 	ADJUST_LOCAL_OFFSET(dst, dstw);
2096 	ADJUST_LOCAL_OFFSET(src, srcw);
2097 
2098 	if (opcode >= SLJIT_MOV && opcode <= SLJIT_MOV_P) {
2099 		/* LOAD REGISTER */
2100 		if (FAST_IS_REG(dst) && FAST_IS_REG(src)) {
2101 			dst_r = gpr(dst);
2102 			src_r = gpr(src);
2103 			switch (opcode | (op & SLJIT_32)) {
2104 			/* 32-bit */
2105 			case SLJIT_MOV32_U8:
2106 				ins = llcr(dst_r, src_r);
2107 				break;
2108 			case SLJIT_MOV32_S8:
2109 				ins = lbr(dst_r, src_r);
2110 				break;
2111 			case SLJIT_MOV32_U16:
2112 				ins = llhr(dst_r, src_r);
2113 				break;
2114 			case SLJIT_MOV32_S16:
2115 				ins = lhr(dst_r, src_r);
2116 				break;
2117 			case SLJIT_MOV32:
2118 				if (dst_r == src_r)
2119 					return SLJIT_SUCCESS;
2120 				ins = lr(dst_r, src_r);
2121 				break;
2122 			/* 64-bit */
2123 			case SLJIT_MOV_U8:
2124 				ins = llgcr(dst_r, src_r);
2125 				break;
2126 			case SLJIT_MOV_S8:
2127 				ins = lgbr(dst_r, src_r);
2128 				break;
2129 			case SLJIT_MOV_U16:
2130 				ins = llghr(dst_r, src_r);
2131 				break;
2132 			case SLJIT_MOV_S16:
2133 				ins = lghr(dst_r, src_r);
2134 				break;
2135 			case SLJIT_MOV_U32:
2136 				ins = llgfr(dst_r, src_r);
2137 				break;
2138 			case SLJIT_MOV_S32:
2139 				ins = lgfr(dst_r, src_r);
2140 				break;
2141 			case SLJIT_MOV:
2142 			case SLJIT_MOV_P:
2143 				if (dst_r == src_r)
2144 					return SLJIT_SUCCESS;
2145 				ins = lgr(dst_r, src_r);
2146 				break;
2147 			default:
2148 				ins = 0;
2149 				SLJIT_UNREACHABLE();
2150 				break;
2151 			}
2152 			FAIL_IF(push_inst(compiler, ins));
2153 			return SLJIT_SUCCESS;
2154 		}
2155 		/* LOAD IMMEDIATE */
2156 		if (FAST_IS_REG(dst) && src == SLJIT_IMM) {
2157 			switch (opcode) {
2158 			case SLJIT_MOV_U8:
2159 				srcw = (sljit_sw)((sljit_u8)(srcw));
2160 				break;
2161 			case SLJIT_MOV_S8:
2162 				srcw = (sljit_sw)((sljit_s8)(srcw));
2163 				break;
2164 			case SLJIT_MOV_U16:
2165 				srcw = (sljit_sw)((sljit_u16)(srcw));
2166 				break;
2167 			case SLJIT_MOV_S16:
2168 				srcw = (sljit_sw)((sljit_s16)(srcw));
2169 				break;
2170 			case SLJIT_MOV_U32:
2171 				srcw = (sljit_sw)((sljit_u32)(srcw));
2172 				break;
2173 			case SLJIT_MOV_S32:
2174 			case SLJIT_MOV32:
2175 				srcw = (sljit_sw)((sljit_s32)(srcw));
2176 				break;
2177 			}
2178 			return push_load_imm_inst(compiler, gpr(dst), srcw);
2179 		}
2180 		/* LOAD */
2181 		/* TODO(carenas): avoid reg being defined later */
2182 		#define LEVAL(i) EVAL(i, reg, mem)
2183 		if (FAST_IS_REG(dst) && (src & SLJIT_MEM)) {
2184 			sljit_gpr reg = gpr(dst);
2185 
2186 			FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1));
2187 			/* TODO(carenas): convert all calls below to LEVAL */
2188 			switch (opcode | (op & SLJIT_32)) {
2189 			case SLJIT_MOV32_U8:
2190 				ins = llc(reg, mem.offset, mem.index, mem.base);
2191 				break;
2192 			case SLJIT_MOV32_S8:
2193 				ins = lb(reg, mem.offset, mem.index, mem.base);
2194 				break;
2195 			case SLJIT_MOV32_U16:
2196 				ins = llh(reg, mem.offset, mem.index, mem.base);
2197 				break;
2198 			case SLJIT_MOV32_S16:
2199 				ins = WHEN2(is_u12(mem.offset), lh, lhy);
2200 				break;
2201 			case SLJIT_MOV32:
2202 				ins = WHEN2(is_u12(mem.offset), l, ly);
2203 				break;
2204 			case SLJIT_MOV_U8:
2205 				ins = LEVAL(llgc);
2206 				break;
2207 			case SLJIT_MOV_S8:
2208 				ins = lgb(reg, mem.offset, mem.index, mem.base);
2209 				break;
2210 			case SLJIT_MOV_U16:
2211 				ins = LEVAL(llgh);
2212 				break;
2213 			case SLJIT_MOV_S16:
2214 				ins = lgh(reg, mem.offset, mem.index, mem.base);
2215 				break;
2216 			case SLJIT_MOV_U32:
2217 				ins = LEVAL(llgf);
2218 				break;
2219 			case SLJIT_MOV_S32:
2220 				ins = lgf(reg, mem.offset, mem.index, mem.base);
2221 				break;
2222 			case SLJIT_MOV_P:
2223 			case SLJIT_MOV:
2224 				ins = lg(reg, mem.offset, mem.index, mem.base);
2225 				break;
2226 			default:
2227 				ins = 0;
2228 				SLJIT_UNREACHABLE();
2229 				break;
2230 			}
2231 			FAIL_IF(push_inst(compiler, ins));
2232 			return SLJIT_SUCCESS;
2233 		}
2234 		/* STORE and STORE IMMEDIATE */
2235 		if ((dst & SLJIT_MEM) && (FAST_IS_REG(src) || src == SLJIT_IMM)) {
2236 			struct addr mem;
2237 			sljit_gpr reg = FAST_IS_REG(src) ? gpr(src) : tmp0;
2238 
2239 			if (src == SLJIT_IMM) {
2240 				/* TODO(mundaym): MOVE IMMEDIATE? */
2241 				FAIL_IF(push_load_imm_inst(compiler, reg, srcw));
2242 			}
2243 			FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2244 			switch (opcode) {
2245 			case SLJIT_MOV_U8:
2246 			case SLJIT_MOV_S8:
2247 				return push_inst(compiler,
2248 					WHEN2(is_u12(mem.offset), stc, stcy));
2249 			case SLJIT_MOV_U16:
2250 			case SLJIT_MOV_S16:
2251 				return push_inst(compiler,
2252 					WHEN2(is_u12(mem.offset), sth, sthy));
2253 			case SLJIT_MOV_U32:
2254 			case SLJIT_MOV_S32:
2255 			case SLJIT_MOV32:
2256 				return push_inst(compiler,
2257 					WHEN2(is_u12(mem.offset), st, sty));
2258 			case SLJIT_MOV_P:
2259 			case SLJIT_MOV:
2260 				FAIL_IF(push_inst(compiler, LEVAL(stg)));
2261 				return SLJIT_SUCCESS;
2262 			default:
2263 				SLJIT_UNREACHABLE();
2264 			}
2265 		}
2266 		#undef LEVAL
2267 		/* MOVE CHARACTERS */
2268 		if ((dst & SLJIT_MEM) && (src & SLJIT_MEM)) {
2269 			struct addr mem;
2270 			FAIL_IF(make_addr_bxy(compiler, &mem, src, srcw, tmp1));
2271 			switch (opcode) {
2272 			case SLJIT_MOV_U8:
2273 			case SLJIT_MOV_S8:
2274 				FAIL_IF(push_inst(compiler,
2275 					EVAL(llgc, tmp0, mem)));
2276 				FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2277 				return push_inst(compiler,
2278 					EVAL(stcy, tmp0, mem));
2279 			case SLJIT_MOV_U16:
2280 			case SLJIT_MOV_S16:
2281 				FAIL_IF(push_inst(compiler,
2282 					EVAL(llgh, tmp0, mem)));
2283 				FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2284 				return push_inst(compiler,
2285 					EVAL(sthy, tmp0, mem));
2286 			case SLJIT_MOV_U32:
2287 			case SLJIT_MOV_S32:
2288 			case SLJIT_MOV32:
2289 				FAIL_IF(push_inst(compiler,
2290 					EVAL(ly, tmp0, mem)));
2291 				FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2292 				return push_inst(compiler,
2293 					EVAL(sty, tmp0, mem));
2294 			case SLJIT_MOV_P:
2295 			case SLJIT_MOV:
2296 				FAIL_IF(push_inst(compiler,
2297 					EVAL(lg, tmp0, mem)));
2298 				FAIL_IF(make_addr_bxy(compiler, &mem, dst, dstw, tmp1));
2299 				FAIL_IF(push_inst(compiler,
2300 					EVAL(stg, tmp0, mem)));
2301 				return SLJIT_SUCCESS;
2302 			default:
2303 				SLJIT_UNREACHABLE();
2304 			}
2305 		}
2306 		SLJIT_UNREACHABLE();
2307 	}
2308 
2309 	SLJIT_ASSERT(src != SLJIT_IMM);
2310 
2311 	dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
2312 	src_r = FAST_IS_REG(src) ? gpr(src) : tmp0;
2313 
2314 	compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z);
2315 
2316 	/* TODO(mundaym): optimize loads and stores */
2317 	switch (opcode) {
2318 	case SLJIT_CLZ:
2319 	case SLJIT_CTZ:
2320 		if (src & SLJIT_MEM)
2321 			FAIL_IF(load_unsigned_word(compiler, src_r, src, srcw, op & SLJIT_32));
2322 
2323 		FAIL_IF(sljit_emit_clz_ctz(compiler, op, dst_r, src_r));
2324 		break;
2325 	case SLJIT_REV_U32:
2326 	case SLJIT_REV_S32:
2327 		op |= SLJIT_32;
2328 		/* fallthrough */
2329 	case SLJIT_REV:
2330 	case SLJIT_REV_U16:
2331 	case SLJIT_REV_S16:
2332 		return sljit_emit_rev(compiler, op, dst, dstw, src, srcw);
2333 	default:
2334 		SLJIT_UNREACHABLE();
2335 	}
2336 
2337 	if (dst & SLJIT_MEM)
2338 		return store_word(compiler, dst_r, dst, dstw, op & SLJIT_32);
2339 
2340 	return SLJIT_SUCCESS;
2341 }
2342 
is_commutative(sljit_s32 op)2343 static SLJIT_INLINE int is_commutative(sljit_s32 op)
2344 {
2345 	switch (GET_OPCODE(op)) {
2346 	case SLJIT_ADD:
2347 	case SLJIT_ADDC:
2348 	case SLJIT_MUL:
2349 	case SLJIT_AND:
2350 	case SLJIT_OR:
2351 	case SLJIT_XOR:
2352 		return 1;
2353 	}
2354 	return 0;
2355 }
2356 
2357 static const struct ins_forms add_forms = {
2358 	0x1a00, /* ar */
2359 	0xb9080000, /* agr */
2360 	0xb9f80000, /* ark */
2361 	0xb9e80000, /* agrk */
2362 	0x5a000000, /* a */
2363 	0xe3000000005a, /* ay */
2364 	0xe30000000008, /* ag */
2365 };
2366 
2367 static const struct ins_forms logical_add_forms = {
2368 	0x1e00, /* alr */
2369 	0xb90a0000, /* algr */
2370 	0xb9fa0000, /* alrk */
2371 	0xb9ea0000, /* algrk */
2372 	0x5e000000, /* al */
2373 	0xe3000000005e, /* aly */
2374 	0xe3000000000a, /* alg */
2375 };
2376 
sljit_emit_add(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2377 static sljit_s32 sljit_emit_add(struct sljit_compiler *compiler, sljit_s32 op,
2378 	sljit_s32 dst, sljit_sw dstw,
2379 	sljit_s32 src1, sljit_sw src1w,
2380 	sljit_s32 src2, sljit_sw src2w)
2381 {
2382 	int sets_overflow = (op & VARIABLE_FLAG_MASK) == SLJIT_SET_OVERFLOW;
2383 	int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW);
2384 	const struct ins_forms *forms;
2385 	sljit_ins ins;
2386 
2387 	if (src2 == SLJIT_IMM) {
2388 		if (!sets_zero_overflow && is_s8(src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) {
2389 			if (sets_overflow)
2390 				ins = (op & SLJIT_32) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */;
2391 			else
2392 				ins = (op & SLJIT_32) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */;
2393 			return emit_siy(compiler, ins, dst, dstw, src2w);
2394 		}
2395 
2396 		if (is_s16(src2w)) {
2397 			if (sets_overflow)
2398 				ins = (op & SLJIT_32) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */;
2399 			else
2400 				ins = (op & SLJIT_32) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */;
2401 			FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, src2w));
2402 			goto done;
2403 		}
2404 
2405 		if (!sets_overflow) {
2406 			if ((op & SLJIT_32) || is_u32(src2w)) {
2407 				ins = (op & SLJIT_32) ? 0xc20b00000000 /* alfi */ : 0xc20a00000000 /* algfi */;
2408 				FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));
2409 				goto done;
2410 			}
2411 			if (is_u32(-src2w)) {
2412 				FAIL_IF(emit_ri(compiler, 0xc20400000000 /* slgfi */, dst, src1, src1w, -src2w, RIL_A));
2413 				goto done;
2414 			}
2415 		}
2416 		else if ((op & SLJIT_32) || is_s32(src2w)) {
2417 			ins = (op & SLJIT_32) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */;
2418 			FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));
2419 			goto done;
2420 		}
2421 	}
2422 
2423 	forms = sets_overflow ? &add_forms : &logical_add_forms;
2424 	FAIL_IF(emit_commutative(compiler, forms, dst, src1, src1w, src2, src2w));
2425 
2426 done:
2427 	if (sets_zero_overflow)
2428 		FAIL_IF(update_zero_overflow(compiler, op, FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0));
2429 
2430 	if (dst & SLJIT_MEM)
2431 		return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2432 
2433 	return SLJIT_SUCCESS;
2434 }
2435 
2436 static const struct ins_forms sub_forms = {
2437 	0x1b00, /* sr */
2438 	0xb9090000, /* sgr */
2439 	0xb9f90000, /* srk */
2440 	0xb9e90000, /* sgrk */
2441 	0x5b000000, /* s */
2442 	0xe3000000005b, /* sy */
2443 	0xe30000000009, /* sg */
2444 };
2445 
2446 static const struct ins_forms logical_sub_forms = {
2447 	0x1f00, /* slr */
2448 	0xb90b0000, /* slgr */
2449 	0xb9fb0000, /* slrk */
2450 	0xb9eb0000, /* slgrk */
2451 	0x5f000000, /* sl */
2452 	0xe3000000005f, /* sly */
2453 	0xe3000000000b, /* slg */
2454 };
2455 
sljit_emit_sub(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2456 static sljit_s32 sljit_emit_sub(struct sljit_compiler *compiler, sljit_s32 op,
2457 	sljit_s32 dst, sljit_sw dstw,
2458 	sljit_s32 src1, sljit_sw src1w,
2459 	sljit_s32 src2, sljit_sw src2w)
2460 {
2461 	sljit_s32 flag_type = GET_FLAG_TYPE(op);
2462 	int sets_signed = (flag_type >= SLJIT_SIG_LESS && flag_type <= SLJIT_NOT_OVERFLOW);
2463 	int sets_zero_overflow = (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == (SLJIT_SET_Z | SLJIT_SET_OVERFLOW);
2464 	const struct ins_forms *forms;
2465 	sljit_ins ins;
2466 
2467 	if (dst == TMP_REG2 && flag_type <= SLJIT_SIG_LESS_EQUAL) {
2468 		int compare_signed = flag_type >= SLJIT_SIG_LESS;
2469 
2470 		compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_COMPARE;
2471 
2472 		if (src2 == SLJIT_IMM) {
2473 			if (compare_signed || ((op & VARIABLE_FLAG_MASK) == 0 && is_s32(src2w))) {
2474 				if ((op & SLJIT_32) || is_s32(src2w)) {
2475 					ins = (op & SLJIT_32) ? 0xc20d00000000 /* cfi */ : 0xc20c00000000 /* cgfi */;
2476 					return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A);
2477 				}
2478 			}
2479 			else {
2480 				if ((op & SLJIT_32) || is_u32(src2w)) {
2481 					ins = (op & SLJIT_32) ? 0xc20f00000000 /* clfi */ : 0xc20e00000000 /* clgfi */;
2482 					return emit_ri(compiler, ins, src1, src1, src1w, src2w, RIL_A);
2483 				}
2484 				if (is_s16(src2w))
2485 					return emit_rie_d(compiler, 0xec00000000db /* alghsik */, (sljit_s32)tmp0, src1, src1w, src2w);
2486 			}
2487 		}
2488 		else if (src2 & SLJIT_MEM) {
2489 			if ((op & SLJIT_32) && ((src2 & OFFS_REG_MASK) || is_u12(src2w))) {
2490 				ins = compare_signed ? 0x59000000 /* c */ : 0x55000000 /* cl */;
2491 				return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RX_A);
2492 			}
2493 
2494 			if (compare_signed)
2495 				ins = (op & SLJIT_32) ? 0xe30000000059 /* cy */ : 0xe30000000020 /* cg */;
2496 			else
2497 				ins = (op & SLJIT_32) ? 0xe30000000055 /* cly */ : 0xe30000000021 /* clg */;
2498 			return emit_rx(compiler, ins, src1, src1, src1w, src2, src2w, RXY_A);
2499 		}
2500 
2501 		if (compare_signed)
2502 			ins = (op & SLJIT_32) ? 0x1900 /* cr */ : 0xb9200000 /* cgr */;
2503 		else
2504 			ins = (op & SLJIT_32) ? 0x1500 /* clr */ : 0xb9210000 /* clgr */;
2505 		return emit_rr(compiler, ins, src1, src1, src1w, src2, src2w);
2506 	}
2507 
2508 	if (src1 == SLJIT_IMM && src1w == 0 && (flag_type == 0 || sets_signed)) {
2509 		ins = (op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */;
2510 		FAIL_IF(emit_rr1(compiler, ins, dst, src2, src2w));
2511 		goto done;
2512 	}
2513 
2514 	if (src2 == SLJIT_IMM) {
2515 		sljit_sw neg_src2w = -src2w;
2516 
2517 		if (sets_signed || neg_src2w != 0 || (op & (SLJIT_SET_Z | VARIABLE_FLAG_MASK)) == 0) {
2518 			if (!sets_zero_overflow && is_s8(neg_src2w) && (src1 & SLJIT_MEM) && (dst == src1 && dstw == src1w)) {
2519 				if (sets_signed)
2520 					ins = (op & SLJIT_32) ? 0xeb000000006a /* asi */ : 0xeb000000007a /* agsi */;
2521 				else
2522 					ins = (op & SLJIT_32) ? 0xeb000000006e /* alsi */ : 0xeb000000007e /* algsi */;
2523 				return emit_siy(compiler, ins, dst, dstw, neg_src2w);
2524 			}
2525 
2526 			if (is_s16(neg_src2w)) {
2527 				if (sets_signed)
2528 					ins = (op & SLJIT_32) ? 0xec00000000d8 /* ahik */ : 0xec00000000d9 /* aghik */;
2529 				else
2530 					ins = (op & SLJIT_32) ? 0xec00000000da /* alhsik */ : 0xec00000000db /* alghsik */;
2531 				FAIL_IF(emit_rie_d(compiler, ins, dst, src1, src1w, neg_src2w));
2532 				goto done;
2533 			}
2534 		}
2535 
2536 		if (!sets_signed) {
2537 			if ((op & SLJIT_32) || is_u32(src2w)) {
2538 				ins = (op & SLJIT_32) ? 0xc20500000000 /* slfi */ : 0xc20400000000 /* slgfi */;
2539 				FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A));
2540 				goto done;
2541 			}
2542 			if (is_u32(neg_src2w)) {
2543 				FAIL_IF(emit_ri(compiler, 0xc20a00000000 /* algfi */, dst, src1, src1w, neg_src2w, RIL_A));
2544 				goto done;
2545 			}
2546 		}
2547 		else if ((op & SLJIT_32) || is_s32(neg_src2w)) {
2548 			ins = (op & SLJIT_32) ? 0xc20900000000 /* afi */ : 0xc20800000000 /* agfi */;
2549 			FAIL_IF(emit_ri(compiler, ins, dst, src1, src1w, neg_src2w, RIL_A));
2550 			goto done;
2551 		}
2552 	}
2553 
2554 	forms = sets_signed ? &sub_forms : &logical_sub_forms;
2555 	FAIL_IF(emit_non_commutative(compiler, forms, dst, src1, src1w, src2, src2w));
2556 
2557 done:
2558 	if (sets_signed) {
2559 		sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2560 
2561 		if ((op & VARIABLE_FLAG_MASK) != SLJIT_SET_OVERFLOW) {
2562 			/* In case of overflow, the sign bit of the two source operands must be different, and
2563 			     - the first operand is greater if the sign bit of the result is set
2564 			     - the first operand is less if the sign bit of the result is not set
2565 			   The -result operation sets the corrent sign, because the result cannot be zero.
2566 			   The overflow is considered greater, since the result must be equal to INT_MIN so its sign bit is set. */
2567 			FAIL_IF(push_inst(compiler, brc(0xe, (op & SLJIT_32) ? (2 + 1) : (2 + 2))));
2568 			FAIL_IF(push_inst(compiler, (op & SLJIT_32) ? lcr(tmp1, dst_r) : lcgr(tmp1, dst_r)));
2569 		}
2570 		else if (op & SLJIT_SET_Z)
2571 			FAIL_IF(update_zero_overflow(compiler, op, dst_r));
2572 	}
2573 
2574 	if (dst & SLJIT_MEM)
2575 		return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2576 
2577 	return SLJIT_SUCCESS;
2578 }
2579 
2580 static const struct ins_forms multiply_forms = {
2581 	0xb2520000, /* msr */
2582 	0xb90c0000, /* msgr */
2583 	0xb9fd0000, /* msrkc */
2584 	0xb9ed0000, /* msgrkc */
2585 	0x71000000, /* ms */
2586 	0xe30000000051, /* msy */
2587 	0xe3000000000c, /* msg */
2588 };
2589 
2590 static const struct ins_forms multiply_overflow_forms = {
2591 	0,
2592 	0,
2593 	0xb9fd0000, /* msrkc */
2594 	0xb9ed0000, /* msgrkc */
2595 	0,
2596 	0xe30000000053, /* msc */
2597 	0xe30000000083, /* msgc */
2598 };
2599 
sljit_emit_multiply(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2600 static sljit_s32 sljit_emit_multiply(struct sljit_compiler *compiler, sljit_s32 op,
2601 	sljit_s32 dst,
2602 	sljit_s32 src1, sljit_sw src1w,
2603 	sljit_s32 src2, sljit_sw src2w)
2604 {
2605 	sljit_ins ins;
2606 
2607 	if (HAS_FLAGS(op)) {
2608 		/* if have_misc2 fails, this operation should be emulated. 32 bit emulation:
2609 		FAIL_IF(push_inst(compiler, lgfr(tmp0, src1_r)));
2610 		FAIL_IF(push_inst(compiler, msgfr(tmp0, src2_r)));
2611 		if (dst_r != tmp0) {
2612 			FAIL_IF(push_inst(compiler, lr(dst_r, tmp0)));
2613 		}
2614 		FAIL_IF(push_inst(compiler, aih(tmp0, 1)));
2615 		FAIL_IF(push_inst(compiler, nihf(tmp0, ~1U)));
2616 		FAIL_IF(push_inst(compiler, ipm(tmp1)));
2617 		FAIL_IF(push_inst(compiler, oilh(tmp1, 0x2000))); */
2618 
2619 		return emit_commutative(compiler, &multiply_overflow_forms, dst, src1, src1w, src2, src2w);
2620 	}
2621 
2622 	if (src2 == SLJIT_IMM) {
2623 		if (is_s16(src2w)) {
2624 			ins = (op & SLJIT_32) ? 0xa70c0000 /* mhi */ : 0xa70d0000 /* mghi */;
2625 			return emit_ri(compiler, ins, dst, src1, src1w, src2w, RI_A);
2626 		}
2627 
2628 		if (is_s32(src2w)) {
2629 			ins = (op & SLJIT_32) ? 0xc20100000000 /* msfi */ : 0xc20000000000 /* msgfi */;
2630 			return emit_ri(compiler, ins, dst, src1, src1w, src2w, RIL_A);
2631 		}
2632 	}
2633 
2634 	return emit_commutative(compiler, &multiply_forms, dst, src1, src1w, src2, src2w);
2635 }
2636 
sljit_emit_bitwise_imm(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_uw imm,sljit_s32 count16)2637 static sljit_s32 sljit_emit_bitwise_imm(struct sljit_compiler *compiler, sljit_s32 type,
2638 	sljit_s32 dst,
2639 	sljit_s32 src1, sljit_sw src1w,
2640 	sljit_uw imm, sljit_s32 count16)
2641 {
2642 	sljit_s32 mode = compiler->mode;
2643 	sljit_gpr dst_r = tmp0;
2644 	sljit_s32 needs_move = 1;
2645 
2646 	if (IS_GPR_REG(dst)) {
2647 		dst_r = gpr(dst & REG_MASK);
2648 		if (dst == src1)
2649 			needs_move = 0;
2650 	}
2651 
2652 	if (needs_move)
2653 		FAIL_IF(emit_move(compiler, dst_r, src1, src1w));
2654 
2655 	if (type == SLJIT_AND) {
2656 		if (!(mode & SLJIT_32))
2657 			FAIL_IF(push_inst(compiler, 0xc00a00000000 /* nihf */ | R36A(dst_r) | (imm >> 32)));
2658 		return push_inst(compiler, 0xc00b00000000 /* nilf */ | R36A(dst_r) | (imm & 0xffffffff));
2659 	}
2660 	else if (type == SLJIT_OR) {
2661 		if (count16 >= 3) {
2662 			FAIL_IF(push_inst(compiler, 0xc00c00000000 /* oihf */ | R36A(dst_r) | (imm >> 32)));
2663 			return push_inst(compiler, 0xc00d00000000 /* oilf */ | R36A(dst_r) | (imm & 0xffffffff));
2664 		}
2665 
2666 		if (count16 >= 2) {
2667 			if ((imm & 0x00000000ffffffffull) == 0)
2668 				return push_inst(compiler, 0xc00c00000000 /* oihf */ | R36A(dst_r) | (imm >> 32));
2669 			if ((imm & 0xffffffff00000000ull) == 0)
2670 				return push_inst(compiler, 0xc00d00000000 /* oilf */ | R36A(dst_r) | (imm & 0xffffffff));
2671 		}
2672 
2673 		if ((imm & 0xffff000000000000ull) != 0)
2674 			FAIL_IF(push_inst(compiler, 0xa5080000 /* oihh */ | R20A(dst_r) | (imm >> 48)));
2675 		if ((imm & 0x0000ffff00000000ull) != 0)
2676 			FAIL_IF(push_inst(compiler, 0xa5090000 /* oihl */ | R20A(dst_r) | ((imm >> 32) & 0xffff)));
2677 		if ((imm & 0x00000000ffff0000ull) != 0)
2678 			FAIL_IF(push_inst(compiler, 0xa50a0000 /* oilh */ | R20A(dst_r) | ((imm >> 16) & 0xffff)));
2679 		if ((imm & 0x000000000000ffffull) != 0 || imm == 0)
2680 			return push_inst(compiler, 0xa50b0000 /* oill */ | R20A(dst_r) | (imm & 0xffff));
2681 		return SLJIT_SUCCESS;
2682 	}
2683 
2684 	if ((imm & 0xffffffff00000000ull) != 0)
2685 		FAIL_IF(push_inst(compiler, 0xc00600000000 /* xihf */ | R36A(dst_r) | (imm >> 32)));
2686 	if ((imm & 0x00000000ffffffffull) != 0 || imm == 0)
2687 		return push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(dst_r) | (imm & 0xffffffff));
2688 	return SLJIT_SUCCESS;
2689 }
2690 
2691 static const struct ins_forms bitwise_and_forms = {
2692 	0x1400, /* nr */
2693 	0xb9800000, /* ngr */
2694 	0xb9f40000, /* nrk */
2695 	0xb9e40000, /* ngrk */
2696 	0x54000000, /* n */
2697 	0xe30000000054, /* ny */
2698 	0xe30000000080, /* ng */
2699 };
2700 
2701 static const struct ins_forms bitwise_or_forms = {
2702 	0x1600, /* or */
2703 	0xb9810000, /* ogr */
2704 	0xb9f60000, /* ork */
2705 	0xb9e60000, /* ogrk */
2706 	0x56000000, /* o */
2707 	0xe30000000056, /* oy */
2708 	0xe30000000081, /* og */
2709 };
2710 
2711 static const struct ins_forms bitwise_xor_forms = {
2712 	0x1700, /* xr */
2713 	0xb9820000, /* xgr */
2714 	0xb9f70000, /* xrk */
2715 	0xb9e70000, /* xgrk */
2716 	0x57000000, /* x */
2717 	0xe30000000057, /* xy */
2718 	0xe30000000082, /* xg */
2719 };
2720 
sljit_emit_bitwise(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2721 static sljit_s32 sljit_emit_bitwise(struct sljit_compiler *compiler, sljit_s32 op,
2722 	sljit_s32 dst,
2723 	sljit_s32 src1, sljit_sw src1w,
2724 	sljit_s32 src2, sljit_sw src2w)
2725 {
2726 	sljit_s32 type = GET_OPCODE(op);
2727 	const struct ins_forms *forms;
2728 
2729 	if (src2 == SLJIT_IMM && (!(op & SLJIT_SET_Z) || (type == SLJIT_AND && dst == TMP_REG2))) {
2730 		sljit_s32 count16 = 0;
2731 		sljit_uw imm = (sljit_uw)src2w;
2732 
2733 		if (op & SLJIT_32)
2734 			imm &= 0xffffffffull;
2735 
2736 		if ((imm & 0x000000000000ffffull) != 0 || imm == 0)
2737 			count16++;
2738 		if ((imm & 0x00000000ffff0000ull) != 0)
2739 			count16++;
2740 		if ((imm & 0x0000ffff00000000ull) != 0)
2741 			count16++;
2742 		if ((imm & 0xffff000000000000ull) != 0)
2743 			count16++;
2744 
2745 		if (type == SLJIT_AND && dst == TMP_REG2 && count16 == 1) {
2746 			sljit_gpr src_r = tmp1;
2747 
2748 			if (FAST_IS_REG(src1))
2749 				src_r = gpr(src1 & REG_MASK);
2750 			else
2751 				FAIL_IF(emit_move(compiler, tmp1, src1, src1w));
2752 
2753 			if ((imm & 0x000000000000ffffull) != 0 || imm == 0)
2754 				return push_inst(compiler, 0xa7010000 /* tmll */ | R20A(src_r) | imm);
2755 			if ((imm & 0x00000000ffff0000ull) != 0)
2756 				return push_inst(compiler, 0xa7000000 /* tmlh */ | R20A(src_r) | (imm >> 16));
2757 			if ((imm & 0x0000ffff00000000ull) != 0)
2758 				return push_inst(compiler, 0xa7030000 /* tmhl */ | R20A(src_r) | (imm >> 32));
2759 			return push_inst(compiler, 0xa7020000 /* tmhh */ | R20A(src_r) | (imm >> 48));
2760 		}
2761 
2762 		if (!(op & SLJIT_SET_Z))
2763 			return sljit_emit_bitwise_imm(compiler, type, dst, src1, src1w, imm, count16);
2764 	}
2765 
2766 	if (type == SLJIT_AND)
2767 		forms = &bitwise_and_forms;
2768 	else if (type == SLJIT_OR)
2769 		forms = &bitwise_or_forms;
2770 	else
2771 		forms = &bitwise_xor_forms;
2772 
2773 	return emit_commutative(compiler, forms, dst, src1, src1w, src2, src2w);
2774 }
2775 
sljit_emit_shift(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2776 static sljit_s32 sljit_emit_shift(struct sljit_compiler *compiler, sljit_s32 op,
2777 	sljit_s32 dst,
2778 	sljit_s32 src1, sljit_sw src1w,
2779 	sljit_s32 src2, sljit_sw src2w)
2780 {
2781 	sljit_s32 type = GET_OPCODE(op);
2782 	sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2783 	sljit_gpr src_r = tmp0;
2784 	sljit_gpr base_r = tmp0;
2785 	sljit_ins imm = 0;
2786 	sljit_ins ins;
2787 
2788 	if (FAST_IS_REG(src1))
2789 		src_r = gpr(src1);
2790 	else
2791 		FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
2792 
2793 	if (src2 != SLJIT_IMM) {
2794 		if (FAST_IS_REG(src2))
2795 			base_r = gpr(src2);
2796 		else {
2797 			FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
2798 			base_r = tmp1;
2799 		}
2800 
2801 		if ((op & SLJIT_32) && (type == SLJIT_MSHL || type == SLJIT_MLSHR || type == SLJIT_MASHR)) {
2802 			if (base_r != tmp1) {
2803 				FAIL_IF(push_inst(compiler, 0xec0000000055 /* risbg */ | R36A(tmp1) | R32A(base_r) | (59 << 24) | (1 << 23) | (63 << 16)));
2804 				base_r = tmp1;
2805 			} else
2806 				FAIL_IF(push_inst(compiler, 0xa5070000 /* nill */ | R20A(tmp1) | 0x1f));
2807 		}
2808 	} else
2809 		imm = (sljit_ins)(src2w & ((op & SLJIT_32) ? 0x1f : 0x3f));
2810 
2811 	if ((op & SLJIT_32) && dst_r == src_r) {
2812 		if (type == SLJIT_SHL || type == SLJIT_MSHL)
2813 			ins = 0x89000000 /* sll */;
2814 		else if (type == SLJIT_LSHR || type == SLJIT_MLSHR)
2815 			ins = 0x88000000 /* srl */;
2816 		else
2817 			ins = 0x8a000000 /* sra */;
2818 
2819 		FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | R12A(base_r) | imm));
2820 	} else {
2821 		if (type == SLJIT_SHL || type == SLJIT_MSHL)
2822 			ins = (op & SLJIT_32) ? 0xeb00000000df /* sllk */ : 0xeb000000000d /* sllg */;
2823 		else if (type == SLJIT_LSHR || type == SLJIT_MLSHR)
2824 			ins = (op & SLJIT_32) ? 0xeb00000000de /* srlk */ : 0xeb000000000c /* srlg */;
2825 		else
2826 			ins = (op & SLJIT_32) ? 0xeb00000000dc /* srak */ : 0xeb000000000a /* srag */;
2827 
2828 		FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | R28A(base_r) | (imm << 16)));
2829 	}
2830 
2831 	if ((op & SLJIT_SET_Z) && type != SLJIT_ASHR)
2832 		return push_inst(compiler, (op & SLJIT_32) ? or(dst_r, dst_r) : ogr(dst_r, dst_r));
2833 
2834 	return SLJIT_SUCCESS;
2835 }
2836 
sljit_emit_rotate(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2837 static sljit_s32 sljit_emit_rotate(struct sljit_compiler *compiler, sljit_s32 op,
2838 	sljit_s32 dst,
2839 	sljit_s32 src1, sljit_sw src1w,
2840 	sljit_s32 src2, sljit_sw src2w)
2841 {
2842 	sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
2843 	sljit_gpr src_r = tmp0;
2844 	sljit_gpr base_r = tmp0;
2845 	sljit_ins imm = 0;
2846 	sljit_ins ins;
2847 
2848 	if (FAST_IS_REG(src1))
2849 		src_r = gpr(src1);
2850 	else
2851 		FAIL_IF(emit_move(compiler, tmp0, src1, src1w));
2852 
2853 	if (src2 != SLJIT_IMM) {
2854 		if (FAST_IS_REG(src2))
2855 			base_r = gpr(src2);
2856 		else {
2857 			FAIL_IF(emit_move(compiler, tmp1, src2, src2w));
2858 			base_r = tmp1;
2859 		}
2860 	}
2861 
2862 	if (GET_OPCODE(op) == SLJIT_ROTR) {
2863 		if (src2 != SLJIT_IMM) {
2864 			ins = (op & SLJIT_32) ? 0x1300 /* lcr */ : 0xb9030000 /* lcgr */;
2865 			FAIL_IF(push_inst(compiler, ins | R4A(tmp1) | R0A(base_r)));
2866 			base_r = tmp1;
2867 		} else
2868 			src2w = -src2w;
2869 	}
2870 
2871 	if (src2 == SLJIT_IMM)
2872 		imm = (sljit_ins)(src2w & ((op & SLJIT_32) ? 0x1f : 0x3f));
2873 
2874 	ins = (op & SLJIT_32) ? 0xeb000000001d /* rll */ : 0xeb000000001c /* rllg */;
2875 	return push_inst(compiler, ins | R36A(dst_r) | R32A(src_r) | R28A(base_r) | (imm << 16));
2876 }
2877 
2878 static const struct ins_forms addc_forms = {
2879 	0xb9980000, /* alcr */
2880 	0xb9880000, /* alcgr */
2881 	0,
2882 	0,
2883 	0,
2884 	0xe30000000098, /* alc */
2885 	0xe30000000088, /* alcg */
2886 };
2887 
2888 static const struct ins_forms subc_forms = {
2889 	0xb9990000, /* slbr */
2890 	0xb9890000, /* slbgr */
2891 	0,
2892 	0,
2893 	0,
2894 	0xe30000000099, /* slb */
2895 	0xe30000000089, /* slbg */
2896 };
2897 
sljit_emit_op2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2898 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
2899 	sljit_s32 dst, sljit_sw dstw,
2900 	sljit_s32 src1, sljit_sw src1w,
2901 	sljit_s32 src2, sljit_sw src2w)
2902 {
2903 	CHECK_ERROR();
2904 	CHECK(check_sljit_emit_op2(compiler, op, 0, dst, dstw, src1, src1w, src2, src2w));
2905 	ADJUST_LOCAL_OFFSET(dst, dstw);
2906 	ADJUST_LOCAL_OFFSET(src1, src1w);
2907 	ADJUST_LOCAL_OFFSET(src2, src2w);
2908 
2909 	compiler->mode = op & SLJIT_32;
2910 	compiler->status_flags_state = op & (VARIABLE_FLAG_MASK | SLJIT_SET_Z);
2911 
2912 	if (is_commutative(op) && src1 == SLJIT_IMM && src2 != SLJIT_IMM) {
2913 		src1 ^= src2;
2914 		src2 ^= src1;
2915 		src1 ^= src2;
2916 
2917 		src1w ^= src2w;
2918 		src2w ^= src1w;
2919 		src1w ^= src2w;
2920 	}
2921 
2922 	switch (GET_OPCODE(op)) {
2923 	case SLJIT_ADD:
2924 		compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD;
2925 		return sljit_emit_add(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2926 	case SLJIT_ADDC:
2927 		compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_ADD;
2928 		FAIL_IF(emit_commutative(compiler, &addc_forms, dst, src1, src1w, src2, src2w));
2929 		if (dst & SLJIT_MEM)
2930 			return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2931 		return SLJIT_SUCCESS;
2932 	case SLJIT_SUB:
2933 		compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_SUB;
2934 		return sljit_emit_sub(compiler, op, dst, dstw, src1, src1w, src2, src2w);
2935 	case SLJIT_SUBC:
2936 		compiler->status_flags_state |= SLJIT_CURRENT_FLAGS_SUB;
2937 		FAIL_IF(emit_non_commutative(compiler, &subc_forms, dst, src1, src1w, src2, src2w));
2938 		if (dst & SLJIT_MEM)
2939 			return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2940 		return SLJIT_SUCCESS;
2941 	case SLJIT_MUL:
2942 		FAIL_IF(sljit_emit_multiply(compiler, op, dst, src1, src1w, src2, src2w));
2943 		break;
2944 	case SLJIT_AND:
2945 	case SLJIT_OR:
2946 	case SLJIT_XOR:
2947 		FAIL_IF(sljit_emit_bitwise(compiler, op, dst, src1, src1w, src2, src2w));
2948 		break;
2949 	case SLJIT_SHL:
2950 	case SLJIT_MSHL:
2951 	case SLJIT_LSHR:
2952 	case SLJIT_MLSHR:
2953 	case SLJIT_ASHR:
2954 	case SLJIT_MASHR:
2955 		FAIL_IF(sljit_emit_shift(compiler, op, dst, src1, src1w, src2, src2w));
2956 		break;
2957 	case SLJIT_ROTL:
2958 	case SLJIT_ROTR:
2959 		FAIL_IF(sljit_emit_rotate(compiler, op, dst, src1, src1w, src2, src2w));
2960 		break;
2961 	}
2962 
2963 	if (dst & SLJIT_MEM)
2964 		return store_word(compiler, tmp0, dst, dstw, op & SLJIT_32);
2965 	return SLJIT_SUCCESS;
2966 }
2967 
sljit_emit_op2u(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2968 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2u(struct sljit_compiler *compiler, sljit_s32 op,
2969 	sljit_s32 src1, sljit_sw src1w,
2970 	sljit_s32 src2, sljit_sw src2w)
2971 {
2972 	sljit_s32 dst_reg = (GET_OPCODE(op) == SLJIT_SUB || GET_OPCODE(op) == SLJIT_AND) ? TMP_REG2 : TMP_REG1;
2973 
2974 	CHECK_ERROR();
2975 	CHECK(check_sljit_emit_op2(compiler, op, 1, 0, 0, src1, src1w, src2, src2w));
2976 
2977 	SLJIT_SKIP_CHECKS(compiler);
2978 	return sljit_emit_op2(compiler, op, dst_reg, 0, src1, src1w, src2, src2w);
2979 }
2980 
sljit_emit_op2r(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)2981 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2r(struct sljit_compiler *compiler, sljit_s32 op,
2982 	sljit_s32 dst_reg,
2983 	sljit_s32 src1, sljit_sw src1w,
2984 	sljit_s32 src2, sljit_sw src2w)
2985 {
2986 	CHECK_ERROR();
2987 	CHECK(check_sljit_emit_op2r(compiler, op, dst_reg, src1, src1w, src2, src2w));
2988 
2989 	switch (GET_OPCODE(op)) {
2990 	case SLJIT_MULADD:
2991 		SLJIT_SKIP_CHECKS(compiler);
2992 		FAIL_IF(sljit_emit_op2(compiler, SLJIT_MUL | (op & SLJIT_32), 0 /* tmp0 */, 0, src1, src1w, src2, src2w));
2993 		return push_inst(compiler, ((op & SLJIT_32) ? 0x1a00 /* ar */ : 0xb9080000 /* agr */) | R4A(gpr(dst_reg)) | R0A(tmp0));
2994 	}
2995 
2996 	return SLJIT_SUCCESS;
2997 }
2998 
sljit_emit_shift_into(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 src1_reg,sljit_s32 src2_reg,sljit_s32 src3,sljit_sw src3w)2999 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_shift_into(struct sljit_compiler *compiler, sljit_s32 op,
3000 	sljit_s32 dst_reg,
3001 	sljit_s32 src1_reg,
3002 	sljit_s32 src2_reg,
3003 	sljit_s32 src3, sljit_sw src3w)
3004 {
3005 	sljit_s32 is_right;
3006 	sljit_sw bit_length = (op & SLJIT_32) ? 32 : 64;
3007 	sljit_gpr dst_r = gpr(dst_reg);
3008 	sljit_gpr src1_r = gpr(src1_reg);
3009 	sljit_gpr src2_r = gpr(src2_reg);
3010 	sljit_gpr src3_r = tmp1;
3011 	sljit_ins ins;
3012 
3013 	CHECK_ERROR();
3014 	CHECK(check_sljit_emit_shift_into(compiler, op, dst_reg, src1_reg, src2_reg, src3, src3w));
3015 
3016 	is_right = (GET_OPCODE(op) == SLJIT_LSHR || GET_OPCODE(op) == SLJIT_MLSHR);
3017 
3018 	if (src1_reg == src2_reg) {
3019 		SLJIT_SKIP_CHECKS(compiler);
3020 		return sljit_emit_op2(compiler, (is_right ? SLJIT_ROTR : SLJIT_ROTL) | (op & SLJIT_32), dst_reg, 0, src1_reg, 0, src3, src3w);
3021 	}
3022 
3023 	ADJUST_LOCAL_OFFSET(src3, src3w);
3024 
3025 	if (src3 == SLJIT_IMM) {
3026 		src3w &= bit_length - 1;
3027 
3028 		if (src3w == 0)
3029 			return SLJIT_SUCCESS;
3030 
3031 		if (op & SLJIT_32) {
3032 			if (dst_r == src1_r) {
3033 				ins = is_right ? 0x88000000 /* srl */ : 0x89000000 /* sll */;
3034 				FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | (sljit_ins)src3w));
3035 			} else {
3036 				ins = is_right ? 0xeb00000000de /* srlk */ : 0xeb00000000df /* sllk */;
3037 				FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | ((sljit_ins)src3w << 16)));
3038 			}
3039 		} else {
3040 			ins = is_right ? 0xeb000000000c /* srlg */ : 0xeb000000000d /* sllg */;
3041 			FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | ((sljit_ins)src3w << 16)));
3042 		}
3043 
3044 		ins = 0xec0000000055 /* risbg */;
3045 
3046 		if (is_right) {
3047 			src3w = bit_length - src3w;
3048 			ins |= ((sljit_ins)(64 - bit_length) << 24) | ((sljit_ins)(63 - src3w) << 16) | ((sljit_ins)src3w << 8);
3049 		} else
3050 			ins |= ((sljit_ins)(64 - src3w) << 24) | ((sljit_ins)63 << 16) | ((sljit_ins)(src3w + 64 - bit_length) << 8);
3051 
3052 		return push_inst(compiler, ins | R36A(dst_r) | R32A(src2_r));
3053 	}
3054 
3055 	if (!(src3 & SLJIT_MEM)) {
3056 		src3_r = gpr(src3);
3057 
3058 		if (dst_r == src3_r) {
3059 			FAIL_IF(push_inst(compiler, 0x1800 /* lr */ | R4A(tmp1) | R0A(src3_r)));
3060 			src3_r = tmp1;
3061 		}
3062 	} else
3063 		FAIL_IF(load_word(compiler, tmp1, src3, src3w, op & SLJIT_32));
3064 
3065 	if (op & SLJIT_32) {
3066 		if (GET_OPCODE(op) == SLJIT_MSHL || GET_OPCODE(op) == SLJIT_MLSHR) {
3067 			if (src3_r != tmp1) {
3068 				FAIL_IF(push_inst(compiler, 0xec0000000055 /* risbg */ | R36A(tmp1) | R32A(src3_r) | (59 << 24) | (1 << 23) | (63 << 16)));
3069 				src3_r = tmp1;
3070 			} else
3071 				FAIL_IF(push_inst(compiler, 0xa5070000 /* nill */ | R20A(tmp1) | 0x1f));
3072 		}
3073 
3074 		if (dst_r == src1_r) {
3075 			ins = is_right ? 0x88000000 /* srl */ : 0x89000000 /* sll */;
3076 			FAIL_IF(push_inst(compiler, ins | R20A(dst_r) | R12A(src3_r)));
3077 		} else {
3078 			ins = is_right ? 0xeb00000000de /* srlk */ : 0xeb00000000df /* sllk */;
3079 			FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | R28A(src3_r)));
3080 		}
3081 
3082 		if (src3_r != tmp1) {
3083 			FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | 0x1f));
3084 			FAIL_IF(push_inst(compiler, 0x1700 /* xr */ | R4A(tmp1) | R0A(src3_r)));
3085 		} else
3086 			FAIL_IF(push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(tmp1) | 0x1f));
3087 
3088 		ins = is_right ? 0xeb00000000df /* sllk */ : 0xeb00000000de /* srlk */;
3089 		FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src2_r) | R28A(tmp1) | (0x1 << 16)));
3090 
3091 		return push_inst(compiler, 0x1600 /* or */ | R4A(dst_r) | R0A(tmp0));
3092 	}
3093 
3094 	ins = is_right ? 0xeb000000000c /* srlg */ : 0xeb000000000d /* sllg */;
3095 	FAIL_IF(push_inst(compiler, ins | R36A(dst_r) | R32A(src1_r) | R28A(src3_r)));
3096 
3097 	ins = is_right ? 0xeb000000000d /* sllg */ : 0xeb000000000c /* srlg */;
3098 
3099 	if (!(op & SLJIT_SHIFT_INTO_NON_ZERO)) {
3100 		if (src3_r != tmp1)
3101 			FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | 0x3f));
3102 
3103 		FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src2_r) | (0x1 << 16)));
3104 		src2_r = tmp0;
3105 
3106 		if (src3_r != tmp1)
3107 			FAIL_IF(push_inst(compiler, 0xb9820000 /* xgr */ | R4A(tmp1) | R0A(src3_r)));
3108 		else
3109 			FAIL_IF(push_inst(compiler, 0xc00700000000 /* xilf */ | R36A(tmp1) | 0x3f));
3110 	} else
3111 		FAIL_IF(push_inst(compiler, 0xb9030000 /* lcgr */ | R4A(tmp1) | R0A(src3_r)));
3112 
3113 	FAIL_IF(push_inst(compiler, ins | R36A(tmp0) | R32A(src2_r) | R28A(tmp1)));
3114 	return push_inst(compiler, 0xb9810000 /* ogr */ | R4A(dst_r) | R0A(tmp0));
3115 }
3116 
sljit_emit_op_src(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)3117 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_src(struct sljit_compiler *compiler, sljit_s32 op,
3118 	sljit_s32 src, sljit_sw srcw)
3119 {
3120 	sljit_gpr src_r;
3121 	struct addr addr;
3122 
3123 	CHECK_ERROR();
3124 	CHECK(check_sljit_emit_op_src(compiler, op, src, srcw));
3125 	ADJUST_LOCAL_OFFSET(src, srcw);
3126 
3127 	switch (op) {
3128 	case SLJIT_FAST_RETURN:
3129 		src_r = FAST_IS_REG(src) ? gpr(src) : tmp1;
3130 		if (src & SLJIT_MEM)
3131 			FAIL_IF(load_word(compiler, tmp1, src, srcw, 0));
3132 
3133 		return push_inst(compiler, br(src_r));
3134 	case SLJIT_SKIP_FRAMES_BEFORE_FAST_RETURN:
3135 		return SLJIT_SUCCESS;
3136 	case SLJIT_PREFETCH_L1:
3137 	case SLJIT_PREFETCH_L2:
3138 	case SLJIT_PREFETCH_L3:
3139 	case SLJIT_PREFETCH_ONCE:
3140 		FAIL_IF(make_addr_bxy(compiler, &addr, src, srcw, tmp1));
3141 		return push_inst(compiler, 0xe31000000036 /* pfd */ | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
3142 	default:
3143 		return SLJIT_SUCCESS;
3144 	}
3145 
3146 	return SLJIT_SUCCESS;
3147 }
3148 
sljit_emit_op_dst(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw)3149 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_dst(struct sljit_compiler *compiler, sljit_s32 op,
3150 	sljit_s32 dst, sljit_sw dstw)
3151 {
3152 	sljit_gpr dst_r = link_r;
3153 	sljit_s32 size;
3154 
3155 	CHECK_ERROR();
3156 	CHECK(check_sljit_emit_op_dst(compiler, op, dst, dstw));
3157 	ADJUST_LOCAL_OFFSET(dst, dstw);
3158 
3159 	switch (op) {
3160 	case SLJIT_FAST_ENTER:
3161 		if (FAST_IS_REG(dst))
3162 			return push_inst(compiler, lgr(gpr(dst), link_r));
3163 		break;
3164 	case SLJIT_GET_RETURN_ADDRESS:
3165 		dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
3166 
3167 		size = GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds - SLJIT_KEPT_SAVEDS_COUNT(compiler->options), 2);
3168 		FAIL_IF(load_word(compiler, dst_r, SLJIT_MEM1(SLJIT_SP), compiler->local_size + size, 0));
3169 		break;
3170 	}
3171 
3172 	if (dst & SLJIT_MEM)
3173 		return store_word(compiler, dst_r, dst, dstw, 0);
3174 
3175 	return SLJIT_SUCCESS;
3176 }
3177 
sljit_get_register_index(sljit_s32 type,sljit_s32 reg)3178 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 type, sljit_s32 reg)
3179 {
3180 	CHECK_REG_INDEX(check_sljit_get_register_index(type, reg));
3181 
3182 	if (type == SLJIT_GP_REGISTER)
3183 		return (sljit_s32)gpr(reg);
3184 
3185 	if (type != SLJIT_FLOAT_REGISTER)
3186 		return -1;
3187 
3188 	return (sljit_s32)freg_map[reg];
3189 }
3190 
sljit_emit_op_custom(struct sljit_compiler * compiler,void * instruction,sljit_u32 size)3191 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
3192 	void *instruction, sljit_u32 size)
3193 {
3194 	sljit_ins ins = 0;
3195 
3196 	CHECK_ERROR();
3197 	CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
3198 
3199 	memcpy((sljit_u8 *)&ins + sizeof(ins) - size, instruction, size);
3200 	return push_inst(compiler, ins);
3201 }
3202 
3203 /* --------------------------------------------------------------------- */
3204 /*  Floating point operators                                             */
3205 /* --------------------------------------------------------------------- */
3206 
3207 #define FLOAT_LOAD 0
3208 #define FLOAT_STORE 1
3209 
float_mem(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)3210 static sljit_s32 float_mem(struct sljit_compiler *compiler, sljit_s32 op,
3211 	sljit_s32 reg,
3212 	sljit_s32 mem, sljit_sw memw)
3213 {
3214 	struct addr addr;
3215 	sljit_ins ins;
3216 
3217 	SLJIT_ASSERT(mem & SLJIT_MEM);
3218 
3219 	if ((mem & OFFS_REG_MASK) || is_u12(memw) || !is_s20(memw)) {
3220 		FAIL_IF(make_addr_bx(compiler, &addr, mem, memw, tmp1));
3221 
3222 		if (op & FLOAT_STORE)
3223 			ins = (op & SLJIT_32) ? 0x70000000 /* ste */ : 0x60000000 /* std */;
3224 		else
3225 			ins = (op & SLJIT_32) ? 0x78000000 /* le */ : 0x68000000 /* ld */;
3226 
3227 		return push_inst(compiler, ins | F20(reg) | R16A(addr.index) | R12A(addr.base) | (sljit_ins)addr.offset);
3228 	}
3229 
3230 	FAIL_IF(make_addr_bxy(compiler, &addr, mem, memw, tmp1));
3231 
3232 	if (op & FLOAT_STORE)
3233 		ins = (op & SLJIT_32) ? 0xed0000000066 /* stey */ : 0xed0000000067 /* stdy */;
3234 	else
3235 		ins = (op & SLJIT_32) ? 0xed0000000064 /* ley */ : 0xed0000000065 /* ldy */;
3236 
3237 	return push_inst(compiler, ins | F36(reg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset));
3238 }
3239 
emit_float(struct sljit_compiler * compiler,sljit_ins ins_r,sljit_ins ins,sljit_s32 reg,sljit_s32 src,sljit_sw srcw)3240 static sljit_s32 emit_float(struct sljit_compiler *compiler, sljit_ins ins_r, sljit_ins ins,
3241 	sljit_s32 reg,
3242 	sljit_s32 src, sljit_sw srcw)
3243 {
3244 	struct addr addr;
3245 
3246 	if (!(src & SLJIT_MEM))
3247 		return push_inst(compiler, ins_r | F4(reg) | F0(src));
3248 
3249 	FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));
3250 	return push_inst(compiler, ins | F36(reg) | R32A(addr.index) | R28A(addr.base) | ((sljit_ins)addr.offset << 16));
3251 }
3252 
sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)3253 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
3254 	sljit_s32 dst, sljit_sw dstw,
3255 	sljit_s32 src, sljit_sw srcw)
3256 {
3257 	sljit_ins dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
3258 	sljit_ins ins;
3259 
3260 	if (src & SLJIT_MEM) {
3261 		FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src, srcw));
3262 		src = TMP_FREG1;
3263 	}
3264 
3265 	/* M3 is set to 5 */
3266 	if (GET_OPCODE(op) == SLJIT_CONV_SW_FROM_F64)
3267 		ins = (op & SLJIT_32) ? 0xb3a85000 /* cgebr */ : 0xb3a95000 /* cgdbr */;
3268 	else
3269 		ins = (op & SLJIT_32) ? 0xb3985000 /* cfebr */ : 0xb3995000 /* cfdbr */;
3270 
3271 	FAIL_IF(push_inst(compiler, ins | R4A(dst_r) | F0(src)));
3272 
3273 	if (dst & SLJIT_MEM)
3274 		return store_word(compiler, dst_r, dst, dstw, GET_OPCODE(op) >= SLJIT_CONV_S32_FROM_F64);
3275 
3276 	return SLJIT_SUCCESS;
3277 }
3278 
sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler * compiler,sljit_ins ins,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)3279 static sljit_s32 sljit_emit_fop1_conv_f64_from_w(struct sljit_compiler *compiler, sljit_ins ins,
3280 	sljit_s32 dst, sljit_sw dstw,
3281 	sljit_s32 src, sljit_sw srcw)
3282 {
3283 	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
3284 
3285 	if (src == SLJIT_IMM) {
3286 		FAIL_IF(push_load_imm_inst(compiler, tmp0, srcw));
3287 		src = (sljit_s32)tmp0;
3288 	}
3289 	else if (src & SLJIT_MEM) {
3290 		FAIL_IF(load_word(compiler, tmp0, src, srcw, ins & 0x100000));
3291 		src = (sljit_s32)tmp0;
3292 	}
3293 
3294 	FAIL_IF(push_inst(compiler, ins | F4(dst_r) | R0(src)));
3295 
3296 	if (dst & SLJIT_MEM)
3297 		return float_mem(compiler, FLOAT_STORE | ((ins & 0x10000) ? 0 : SLJIT_32), TMP_FREG1, dst, dstw);
3298 
3299 	return SLJIT_SUCCESS;
3300 }
3301 
sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)3302 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
3303 	sljit_s32 dst, sljit_sw dstw,
3304 	sljit_s32 src, sljit_sw srcw)
3305 {
3306 	sljit_ins ins;
3307 
3308 	if (src == SLJIT_IMM && GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
3309 		srcw = (sljit_s32)srcw;
3310 
3311 	if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_SW)
3312 		ins = (op & SLJIT_32) ? 0xb3a40000 /* cegbr */ : 0xb3a50000 /* cdgbr */;
3313 	else
3314 		ins = (op & SLJIT_32) ? 0xb3940000 /* cefbr */ : 0xb3950000 /* cdfbr */;
3315 
3316 	return sljit_emit_fop1_conv_f64_from_w(compiler, ins, dst, dstw, src, srcw);
3317 }
3318 
sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)3319 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_uw(struct sljit_compiler *compiler, sljit_s32 op,
3320 	sljit_s32 dst, sljit_sw dstw,
3321 	sljit_s32 src, sljit_sw srcw)
3322 {
3323 	sljit_ins ins;
3324 
3325 	if (src == SLJIT_IMM && GET_OPCODE(op) == SLJIT_CONV_F64_FROM_U32)
3326 		srcw = (sljit_u32)srcw;
3327 
3328 	if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_UW)
3329 		ins = (op & SLJIT_32) ? 0xb3a00000 /* celgbr */ : 0xb3a10000 /* cdlgbr */;
3330 	else
3331 		ins = (op & SLJIT_32) ? 0xb3900000 /* celfbr */ : 0xb3910000 /* cdlfbr */;
3332 
3333 	return sljit_emit_fop1_conv_f64_from_w(compiler, ins, dst, dstw, src, srcw);
3334 }
3335 
sljit_emit_fop1_cmp(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)3336 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
3337 	sljit_s32 src1, sljit_sw src1w,
3338 	sljit_s32 src2, sljit_sw src2w)
3339 {
3340 	sljit_ins ins_r, ins;
3341 
3342 	if (src1 & SLJIT_MEM) {
3343 		FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src1, src1w));
3344 		src1 = TMP_FREG1;
3345 	}
3346 
3347 	if (op & SLJIT_32) {
3348 		ins_r = 0xb3090000 /* cebr */;
3349 		ins = 0xed0000000009 /* ceb */;
3350 	} else {
3351 		ins_r = 0xb3190000 /* cdbr */;
3352 		ins = 0xed0000000019 /* cdb */;
3353 	}
3354 
3355 	return emit_float(compiler, ins_r, ins, src1, src2, src2w);
3356 }
3357 
sljit_emit_fop1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)3358 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
3359 	sljit_s32 dst, sljit_sw dstw,
3360 	sljit_s32 src, sljit_sw srcw)
3361 {
3362 	sljit_s32 dst_r;
3363 	sljit_ins ins;
3364 
3365 	CHECK_ERROR();
3366 
3367 	SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
3368 
3369 	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
3370 
3371 	if (op == SLJIT_CONV_F64_FROM_F32)
3372 		FAIL_IF(emit_float(compiler, 0xb3040000 /* ldebr */, 0xed0000000004 /* ldeb */, dst_r, src, srcw));
3373 	else {
3374 		if (src & SLJIT_MEM) {
3375 			FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op == SLJIT_CONV_F32_FROM_F64 ? 0 : (op & SLJIT_32)), dst_r, src, srcw));
3376 			src = dst_r;
3377 		}
3378 
3379 		switch (GET_OPCODE(op)) {
3380 		case SLJIT_MOV_F64:
3381 			if (FAST_IS_REG(dst)) {
3382 				if (dst == src)
3383 					return SLJIT_SUCCESS;
3384 
3385 				ins = (op & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */;
3386 				break;
3387 			}
3388 			return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), src, dst, dstw);
3389 		case SLJIT_CONV_F64_FROM_F32:
3390 			/* Only SLJIT_CONV_F32_FROM_F64. */
3391 			ins = 0xb3440000 /* ledbr */;
3392 			break;
3393 		case SLJIT_NEG_F64:
3394 			ins = (op & SLJIT_32) ? 0xb3030000 /* lcebr */ : 0xb3130000 /* lcdbr */;
3395 			break;
3396 		default:
3397 			SLJIT_ASSERT(GET_OPCODE(op) == SLJIT_ABS_F64);
3398 			ins = (op & SLJIT_32) ? 0xb3000000 /* lpebr */ : 0xb3100000 /* lpdbr */;
3399 			break;
3400 		}
3401 
3402 		FAIL_IF(push_inst(compiler, ins | F4(dst_r) | F0(src)));
3403 	}
3404 
3405 	if (dst & SLJIT_MEM)
3406 		return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw);
3407 
3408 	return SLJIT_SUCCESS;
3409 }
3410 
3411 #define FLOAT_MOV(op, dst_r, src_r) \
3412 	(((op & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */) | F4(dst_r) | F0(src_r))
3413 
sljit_emit_fop2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)3414 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
3415 	sljit_s32 dst, sljit_sw dstw,
3416 	sljit_s32 src1, sljit_sw src1w,
3417 	sljit_s32 src2, sljit_sw src2w)
3418 {
3419 	sljit_s32 dst_r = TMP_FREG1;
3420 	sljit_ins ins_r, ins;
3421 
3422 	CHECK_ERROR();
3423 	CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
3424 	ADJUST_LOCAL_OFFSET(dst, dstw);
3425 	ADJUST_LOCAL_OFFSET(src1, src1w);
3426 	ADJUST_LOCAL_OFFSET(src2, src2w);
3427 
3428 	do {
3429 		if (FAST_IS_REG(dst)) {
3430 			dst_r = dst;
3431 
3432 			if (dst == src1)
3433 				break;
3434 
3435 			if (dst == src2) {
3436 				if (GET_OPCODE(op) == SLJIT_ADD_F64 || GET_OPCODE(op) == SLJIT_MUL_F64) {
3437 					src2 = src1;
3438 					src2w = src1w;
3439 					src1 = dst;
3440 					break;
3441 				}
3442 
3443 				FAIL_IF(push_inst(compiler, FLOAT_MOV(op, TMP_FREG1, src2)));
3444 				src2 = TMP_FREG1;
3445 			}
3446 		}
3447 
3448 		if (src1 & SLJIT_MEM)
3449 			FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), dst_r, src1, src1w));
3450 		else
3451 			FAIL_IF(push_inst(compiler, FLOAT_MOV(op, dst_r, src1)));
3452 	} while (0);
3453 
3454 	switch (GET_OPCODE(op)) {
3455 	case SLJIT_ADD_F64:
3456 		ins_r = (op & SLJIT_32) ? 0xb30a0000 /* aebr */ : 0xb31a0000 /* adbr */;
3457 		ins = (op & SLJIT_32) ? 0xed000000000a /* aeb */ : 0xed000000001a /* adb */;
3458 		break;
3459 	case SLJIT_SUB_F64:
3460 		ins_r = (op & SLJIT_32) ? 0xb30b0000 /* sebr */ : 0xb31b0000 /* sdbr */;
3461 		ins = (op & SLJIT_32) ? 0xed000000000b /* seb */ : 0xed000000001b /* sdb */;
3462 		break;
3463 	case SLJIT_MUL_F64:
3464 		ins_r = (op & SLJIT_32) ? 0xb3170000 /* meebr */ : 0xb31c0000 /* mdbr */;
3465 		ins = (op & SLJIT_32) ? 0xed0000000017 /* meeb */ : 0xed000000001c /* mdb */;
3466 		break;
3467 	default:
3468 		SLJIT_ASSERT(GET_OPCODE(op) == SLJIT_DIV_F64);
3469 		ins_r = (op & SLJIT_32) ? 0xb30d0000 /* debr */ : 0xb31d0000 /* ddbr */;
3470 		ins = (op & SLJIT_32) ? 0xed000000000d /* deb */ : 0xed000000001d /* ddb */;
3471 		break;
3472 	}
3473 
3474 	FAIL_IF(emit_float(compiler, ins_r, ins, dst_r, src2, src2w));
3475 
3476 	if (dst & SLJIT_MEM)
3477 		return float_mem(compiler, FLOAT_STORE | (op & SLJIT_32), TMP_FREG1, dst, dstw);
3478 
3479 	return SLJIT_SUCCESS;
3480 }
3481 
sljit_emit_fop2r(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_freg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)3482 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2r(struct sljit_compiler *compiler, sljit_s32 op,
3483 	sljit_s32 dst_freg,
3484 	sljit_s32 src1, sljit_sw src1w,
3485 	sljit_s32 src2, sljit_sw src2w)
3486 {
3487 	sljit_s32 reg;
3488 
3489 	CHECK_ERROR();
3490 	CHECK(check_sljit_emit_fop2r(compiler, op, dst_freg, src1, src1w, src2, src2w));
3491 	ADJUST_LOCAL_OFFSET(src1, src1w);
3492 	ADJUST_LOCAL_OFFSET(src2, src2w);
3493 
3494 	if (src2 & SLJIT_MEM) {
3495 		FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), TMP_FREG1, src2, src2w));
3496 		src2 = TMP_FREG1;
3497 	}
3498 
3499 	if (src1 & SLJIT_MEM) {
3500 		reg = (dst_freg == src2) ? TMP_FREG1 : dst_freg;
3501 		FAIL_IF(float_mem(compiler, FLOAT_LOAD | (op & SLJIT_32), reg, src1, src1w));
3502 		src1 = reg;
3503 	}
3504 
3505 	return push_inst(compiler, 0xb3720000 /* cpsdr */ | F12(src2) | F4(dst_freg) | F0(src1));
3506 }
3507 
sljit_emit_fset32(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f32 value)3508 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset32(struct sljit_compiler *compiler,
3509 	sljit_s32 freg, sljit_f32 value)
3510 {
3511 	union {
3512 		sljit_s32 imm;
3513 		sljit_f32 value;
3514 	} u;
3515 
3516 	CHECK_ERROR();
3517 	CHECK(check_sljit_emit_fset32(compiler, freg, value));
3518 
3519 	u.value = value;
3520 
3521 	FAIL_IF(push_load_imm_inst(compiler, tmp1, (sljit_sw)(((sljit_uw)u.imm << 32))));
3522 	return push_inst(compiler, 0xb3c10000 /* ldgr */ | F4(freg) | R0A(tmp1));
3523 }
3524 
sljit_emit_fset64(struct sljit_compiler * compiler,sljit_s32 freg,sljit_f64 value)3525 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fset64(struct sljit_compiler *compiler,
3526 	sljit_s32 freg, sljit_f64 value)
3527 {
3528 	union {
3529 		sljit_sw imm;
3530 		sljit_f64 value;
3531 	} u;
3532 
3533 	CHECK_ERROR();
3534 	CHECK(check_sljit_emit_fset64(compiler, freg, value));
3535 
3536 	u.value = value;
3537 
3538 	FAIL_IF(push_load_imm_inst(compiler, tmp1, (sljit_sw)u.imm));
3539 	return push_inst(compiler, 0xb3c10000 /* ldgr */ | F4(freg) | R0A(tmp1));
3540 }
3541 
sljit_emit_fcopy(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 freg,sljit_s32 reg)3542 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fcopy(struct sljit_compiler *compiler, sljit_s32 op,
3543 	sljit_s32 freg, sljit_s32 reg)
3544 {
3545 	sljit_gpr gen_r;
3546 
3547 	CHECK_ERROR();
3548 	CHECK(check_sljit_emit_fcopy(compiler, op, freg, reg));
3549 
3550 	gen_r = gpr(reg);
3551 
3552 	if (GET_OPCODE(op) == SLJIT_COPY_TO_F64) {
3553 		if (op & SLJIT_32) {
3554 			FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp0) | R32A(gen_r) | (32 << 16)));
3555 			gen_r = tmp0;
3556 		}
3557 
3558 		return push_inst(compiler, 0xb3c10000 /* ldgr */ | F4(freg) | R0A(gen_r));
3559 	}
3560 
3561 	FAIL_IF(push_inst(compiler, 0xb3cd0000 /* lgdr */ | R4A(gen_r) | F0(freg)));
3562 
3563 	if (!(op & SLJIT_32))
3564 		return SLJIT_SUCCESS;
3565 
3566 	return push_inst(compiler, 0xeb000000000c /* srlg */ | R36A(gen_r) | R32A(gen_r) | (32 << 16));
3567 }
3568 
3569 /* --------------------------------------------------------------------- */
3570 /*  Conditional instructions                                             */
3571 /* --------------------------------------------------------------------- */
3572 
sljit_emit_label(struct sljit_compiler * compiler)3573 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
3574 {
3575 	struct sljit_label *label;
3576 
3577 	CHECK_ERROR_PTR();
3578 	CHECK_PTR(check_sljit_emit_label(compiler));
3579 
3580 	if (compiler->last_label && compiler->last_label->size == compiler->size)
3581 		return compiler->last_label;
3582 
3583 	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
3584 	PTR_FAIL_IF(!label);
3585 	set_label(label, compiler);
3586 	return label;
3587 }
3588 
sljit_emit_jump(struct sljit_compiler * compiler,sljit_s32 type)3589 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
3590 {
3591 	struct sljit_jump *jump;
3592 	sljit_u8 mask = ((type & 0xff) < SLJIT_JUMP) ? get_cc(compiler, type & 0xff) : 0xf;
3593 
3594 	CHECK_ERROR_PTR();
3595 	CHECK_PTR(check_sljit_emit_jump(compiler, type));
3596 
3597 	/* record jump */
3598 	jump = (struct sljit_jump *)ensure_abuf(compiler, sizeof(struct sljit_jump));
3599 	PTR_FAIL_IF(!jump);
3600 	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
3601 	jump->addr = compiler->size;
3602 
3603 	/* emit jump instruction */
3604 	type &= 0xff;
3605 	if (type >= SLJIT_FAST_CALL)
3606 		PTR_FAIL_IF(push_inst(compiler, brasl(link_r, 0)));
3607 	else
3608 		PTR_FAIL_IF(push_inst(compiler, brcl(mask, 0)));
3609 
3610 	return jump;
3611 }
3612 
sljit_emit_call(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types)3613 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_call(struct sljit_compiler *compiler, sljit_s32 type,
3614 	sljit_s32 arg_types)
3615 {
3616 	SLJIT_UNUSED_ARG(arg_types);
3617 	CHECK_ERROR_PTR();
3618 	CHECK_PTR(check_sljit_emit_call(compiler, type, arg_types));
3619 
3620 	if (type & SLJIT_CALL_RETURN) {
3621 		PTR_FAIL_IF(emit_stack_frame_release(compiler, r14));
3622 		type = SLJIT_JUMP | (type & SLJIT_REWRITABLE_JUMP);
3623 	}
3624 
3625 	SLJIT_SKIP_CHECKS(compiler);
3626 	return sljit_emit_jump(compiler, type);
3627 }
3628 
sljit_emit_ijump(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 src,sljit_sw srcw)3629 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
3630 {
3631 	sljit_gpr src_r = FAST_IS_REG(src) ? gpr(src) : tmp1;
3632 
3633 	CHECK_ERROR();
3634 	CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
3635 
3636 	if (src == SLJIT_IMM) {
3637 		SLJIT_ASSERT(!(srcw & 1)); /* target address must be even */
3638 		FAIL_IF(push_load_imm_inst(compiler, src_r, srcw));
3639 	}
3640 	else if (src & SLJIT_MEM) {
3641 		ADJUST_LOCAL_OFFSET(src, srcw);
3642 		FAIL_IF(load_word(compiler, src_r, src, srcw, 0 /* 64-bit */));
3643 	}
3644 
3645 	/* emit jump instruction */
3646 	if (type >= SLJIT_FAST_CALL)
3647 		return push_inst(compiler, basr(link_r, src_r));
3648 
3649 	return push_inst(compiler, br(src_r));
3650 }
3651 
sljit_emit_icall(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 arg_types,sljit_s32 src,sljit_sw srcw)3652 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_icall(struct sljit_compiler *compiler, sljit_s32 type,
3653 	sljit_s32 arg_types,
3654 	sljit_s32 src, sljit_sw srcw)
3655 {
3656 	SLJIT_UNUSED_ARG(arg_types);
3657 
3658 	CHECK_ERROR();
3659 	CHECK(check_sljit_emit_icall(compiler, type, arg_types, src, srcw));
3660 
3661 	SLJIT_ASSERT(gpr(TMP_REG2) == tmp1);
3662 
3663 	if (src & SLJIT_MEM) {
3664 		ADJUST_LOCAL_OFFSET(src, srcw);
3665 		FAIL_IF(load_word(compiler, tmp1, src, srcw, 0 /* 64-bit */));
3666 		src = TMP_REG2;
3667 		srcw = 0;
3668 	}
3669 
3670 	if (type & SLJIT_CALL_RETURN) {
3671 		if (src >= SLJIT_FIRST_SAVED_REG && src <= (SLJIT_S0 - SLJIT_KEPT_SAVEDS_COUNT(compiler->options))) {
3672 			FAIL_IF(push_inst(compiler, lgr(tmp1, gpr(src))));
3673 			src = TMP_REG2;
3674 			srcw = 0;
3675 		}
3676 
3677 		FAIL_IF(emit_stack_frame_release(compiler, r14));
3678 		type = SLJIT_JUMP;
3679 	}
3680 
3681 	SLJIT_SKIP_CHECKS(compiler);
3682 	return sljit_emit_ijump(compiler, type, src, srcw);
3683 }
3684 
sljit_emit_op_flags(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 type)3685 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
3686 	sljit_s32 dst, sljit_sw dstw,
3687 	sljit_s32 type)
3688 {
3689 	sljit_gpr dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
3690 	sljit_gpr loc_r = tmp1;
3691 	sljit_u8 mask = get_cc(compiler, type);
3692 
3693 	CHECK_ERROR();
3694 	CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
3695 
3696 	switch (GET_OPCODE(op)) {
3697 	case SLJIT_AND:
3698 	case SLJIT_OR:
3699 	case SLJIT_XOR:
3700 		compiler->status_flags_state = op & SLJIT_SET_Z;
3701 
3702 		/* dst is also source operand */
3703 		if (dst & SLJIT_MEM)
3704 			FAIL_IF(load_word(compiler, dst_r, dst, dstw, op & SLJIT_32));
3705 
3706 		break;
3707 	case SLJIT_MOV32:
3708 		op |= SLJIT_32;
3709 		/* fallthrough */
3710 	case SLJIT_MOV:
3711 		/* can write straight into destination */
3712 		loc_r = dst_r;
3713 		break;
3714 	default:
3715 		SLJIT_UNREACHABLE();
3716 	}
3717 
3718 	/* TODO(mundaym): fold into cmov helper function? */
3719 	#define LEVAL(i) i(loc_r, 1, mask)
3720 	if (have_lscond2()) {
3721 		FAIL_IF(push_load_imm_inst(compiler, loc_r, 0));
3722 		FAIL_IF(push_inst(compiler,
3723 			WHEN2(op & SLJIT_32, lochi, locghi)));
3724 	} else {
3725 		FAIL_IF(push_load_imm_inst(compiler, loc_r, 1));
3726 		FAIL_IF(push_inst(compiler, brc(mask, 2 + 2)));
3727 		FAIL_IF(push_load_imm_inst(compiler, loc_r, 0));
3728 	}
3729 	#undef LEVAL
3730 
3731 	/* apply bitwise op and set condition codes */
3732 	switch (GET_OPCODE(op)) {
3733 	#define LEVAL(i) i(dst_r, loc_r)
3734 	case SLJIT_AND:
3735 		FAIL_IF(push_inst(compiler,
3736 			WHEN2(op & SLJIT_32, nr, ngr)));
3737 		break;
3738 	case SLJIT_OR:
3739 		FAIL_IF(push_inst(compiler,
3740 			WHEN2(op & SLJIT_32, or, ogr)));
3741 		break;
3742 	case SLJIT_XOR:
3743 		FAIL_IF(push_inst(compiler,
3744 			WHEN2(op & SLJIT_32, xr, xgr)));
3745 		break;
3746 	#undef LEVAL
3747 	}
3748 
3749 	/* store result to memory if required */
3750 	if (dst & SLJIT_MEM)
3751 		return store_word(compiler, dst_r, dst, dstw, (op & SLJIT_32));
3752 
3753 	return SLJIT_SUCCESS;
3754 }
3755 
sljit_emit_select(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_reg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2_reg)3756 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_select(struct sljit_compiler *compiler, sljit_s32 type,
3757 	sljit_s32 dst_reg,
3758 	sljit_s32 src1, sljit_sw src1w,
3759 	sljit_s32 src2_reg)
3760 {
3761 	sljit_ins mask;
3762 	sljit_gpr src_r;
3763 	sljit_gpr dst_r = gpr(dst_reg);
3764 	sljit_ins ins;
3765 
3766 	CHECK_ERROR();
3767 	CHECK(check_sljit_emit_select(compiler, type, dst_reg, src1, src1w, src2_reg));
3768 
3769 	ADJUST_LOCAL_OFFSET(src1, src1w);
3770 
3771 	if (dst_reg != src2_reg) {
3772 		if (src1 == dst_reg) {
3773 			src1 = src2_reg;
3774 			src1w = 0;
3775 			type ^= 0x1;
3776 		} else {
3777 			if (ADDRESSING_DEPENDS_ON(src1, dst_reg)) {
3778 				FAIL_IF(load_word(compiler, dst_r, src1, src1w, type & SLJIT_32));
3779 				src1 = src2_reg;
3780 				src1w = 0;
3781 				type ^= 0x1;
3782 			} else
3783 				FAIL_IF(push_inst(compiler, ((type & SLJIT_32) ? 0x1800 /* lr */ : 0xb9040000 /* lgr */) | R4A(dst_r) | R0A(gpr(src2_reg))));
3784 		}
3785 	}
3786 
3787 	mask = get_cc(compiler, type & ~SLJIT_32);
3788 
3789 	if (src1 & SLJIT_MEM) {
3790 		if (src1 & OFFS_REG_MASK) {
3791 			src_r = gpr(OFFS_REG(src1));
3792 
3793 			if (src1w != 0) {
3794 				FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp1) | R32A(src_r) | ((sljit_ins)(src1w & 0x3) << 16)));
3795 				src_r = tmp1;
3796 			}
3797 
3798 			FAIL_IF(push_inst(compiler, 0xb9e80000 /* agrk */ | R12A(src_r) | R4A(tmp1) | R0A(gpr(src1 & REG_MASK))));
3799 			src_r = tmp1;
3800 			src1w = 0;
3801 		} else if (!is_s20(src1w)) {
3802 			FAIL_IF(push_load_imm_inst(compiler, tmp1, src1w));
3803 
3804 			if (src1 & REG_MASK)
3805 				FAIL_IF(push_inst(compiler, 0xb9e80000 /* agrk */ | R12A(tmp1) | R4A(tmp1) | R0A(gpr(src1 & REG_MASK))));
3806 
3807 			src_r = tmp1;
3808 			src1w = 0;
3809 		} else
3810 			src_r = gpr(src1 & REG_MASK);
3811 
3812 		ins = (type & SLJIT_32) ? 0xeb00000000f2 /* loc */ : 0xeb00000000e2 /* locg */;
3813 		return push_inst(compiler, ins | R36A(dst_r) | (mask << 32) | R28A(src_r) | disp_s20((sljit_s32)src1w));
3814 	}
3815 
3816 	if (src1 == SLJIT_IMM) {
3817 		if (type & SLJIT_32)
3818 			src1w = (sljit_s32)src1w;
3819 
3820 		if (have_lscond2() && is_s16(src1w)) {
3821 			ins = (type & SLJIT_32) ? 0xec0000000042 /* lochi */ : 0xec0000000046 /* locghi */;
3822 			return push_inst(compiler, ins | R36A(dst_r) | (mask << 32) | (sljit_ins)(src1w & 0xffff) << 16);
3823 		}
3824 
3825 		FAIL_IF(push_load_imm_inst(compiler, tmp1, src1w));
3826 		src_r = tmp1;
3827 	} else
3828 		src_r = gpr(src1);
3829 
3830 	ins = (type & SLJIT_32) ? 0xb9f20000 /* locr */ : 0xb9e20000 /* locgr */;
3831 	return push_inst(compiler, ins | (mask << 12) | R4A(dst_r) | R0A(src_r));
3832 }
3833 
sljit_emit_fselect(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_freg,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2_freg)3834 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fselect(struct sljit_compiler *compiler, sljit_s32 type,
3835 	sljit_s32 dst_freg,
3836 	sljit_s32 src1, sljit_sw src1w,
3837 	sljit_s32 src2_freg)
3838 {
3839 	sljit_ins ins;
3840 	struct sljit_label *label;
3841 	struct sljit_jump *jump;
3842 
3843 	CHECK_ERROR();
3844 	CHECK(check_sljit_emit_fselect(compiler, type, dst_freg, src1, src1w, src2_freg));
3845 
3846 	ADJUST_LOCAL_OFFSET(src1, src1w);
3847 
3848 	if (dst_freg != src2_freg) {
3849 		if (dst_freg == src1) {
3850 			src1 = src2_freg;
3851 			src1w = 0;
3852 			type ^= 0x1;
3853 		} else {
3854 			ins = (type & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */;
3855 			FAIL_IF(push_inst(compiler, ins | F4(dst_freg) | F0(src2_freg)));
3856 		}
3857 	}
3858 
3859 	SLJIT_SKIP_CHECKS(compiler);
3860 	jump = sljit_emit_jump(compiler, (type & ~SLJIT_32) ^ 0x1);
3861 	FAIL_IF(!jump);
3862 
3863 	if (!(src1 & SLJIT_MEM)) {
3864 		ins = (type & SLJIT_32) ? 0x3800 /* ler */ : 0x2800 /* ldr */;
3865 		FAIL_IF(push_inst(compiler, ins | F4(dst_freg) | F0(src1)));
3866 	} else
3867 		FAIL_IF(float_mem(compiler, FLOAT_LOAD | (type & SLJIT_32), dst_freg, src1, src1w));
3868 
3869 	SLJIT_SKIP_CHECKS(compiler);
3870 	label = sljit_emit_label(compiler);
3871 	FAIL_IF(!label);
3872 
3873 	sljit_set_label(jump, label);
3874 	return SLJIT_SUCCESS;
3875 }
3876 
sljit_emit_mem(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 reg,sljit_s32 mem,sljit_sw memw)3877 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_mem(struct sljit_compiler *compiler, sljit_s32 type,
3878 	sljit_s32 reg,
3879 	sljit_s32 mem, sljit_sw memw)
3880 {
3881 	sljit_ins ins, reg1, reg2, base, offs = 0;
3882 
3883 	CHECK_ERROR();
3884 	CHECK(check_sljit_emit_mem(compiler, type, reg, mem, memw));
3885 
3886 	if (!(reg & REG_PAIR_MASK))
3887 		return sljit_emit_mem_unaligned(compiler, type, reg, mem, memw);
3888 
3889 	ADJUST_LOCAL_OFFSET(mem, memw);
3890 
3891 	base = gpr(mem & REG_MASK);
3892 	reg1 = gpr(REG_PAIR_FIRST(reg));
3893 	reg2 = gpr(REG_PAIR_SECOND(reg));
3894 
3895 	if (mem & OFFS_REG_MASK) {
3896 		memw &= 0x3;
3897 		offs = gpr(OFFS_REG(mem));
3898 
3899 		if (memw != 0) {
3900 			FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp1) | R32A(offs) | ((sljit_ins)memw << 16)));
3901 			offs = tmp1;
3902 		} else if (!(type & SLJIT_MEM_STORE) && (base == reg1 || base == reg2) && (offs == reg1 || offs == reg2)) {
3903 			FAIL_IF(push_inst(compiler, 0xb9f80000 | R12A(tmp1) | R4A(base) | R0A(offs)));
3904 			base = tmp1;
3905 			offs = 0;
3906 		}
3907 
3908 		memw = 0;
3909 	} else if (memw < -0x80000 || memw > 0x7ffff - ((reg2 == reg1 + 1) ? 0 : SSIZE_OF(sw))) {
3910 		FAIL_IF(push_load_imm_inst(compiler, tmp1, memw));
3911 
3912 		if (base == 0)
3913 			base = tmp1;
3914 		else
3915 			offs = tmp1;
3916 
3917 		memw = 0;
3918 	}
3919 
3920 	if (offs == 0 && reg2 == (reg1 + 1)) {
3921 		ins = (type & SLJIT_MEM_STORE) ? 0xeb0000000024 /* stmg */ : 0xeb0000000004 /* lmg */;
3922 		return push_inst(compiler, ins | R36A(reg1) | R32A(reg2) | R28A(base) | disp_s20((sljit_s32)memw));
3923 	}
3924 
3925 	ins = ((type & SLJIT_MEM_STORE) ? 0xe30000000024 /* stg */ : 0xe30000000004 /* lg */) | R32A(offs) | R28A(base);
3926 
3927 	if (!(type & SLJIT_MEM_STORE) && base == reg1) {
3928 		FAIL_IF(push_inst(compiler, ins | R36A(reg2) | disp_s20((sljit_s32)memw + SSIZE_OF(sw))));
3929 		return push_inst(compiler, ins | R36A(reg1) | disp_s20((sljit_s32)memw));
3930 	}
3931 
3932 	FAIL_IF(push_inst(compiler, ins | R36A(reg1) | disp_s20((sljit_s32)memw)));
3933 	return push_inst(compiler, ins | R36A(reg2) | disp_s20((sljit_s32)memw + SSIZE_OF(sw)));
3934 }
3935 
sljit_emit_simd_mov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 srcdst,sljit_sw srcdstw)3936 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_mov(struct sljit_compiler *compiler, sljit_s32 type,
3937 	sljit_s32 freg,
3938 	sljit_s32 srcdst, sljit_sw srcdstw)
3939 {
3940 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3941 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3942 	sljit_s32 alignment = SLJIT_SIMD_GET_ELEM2_SIZE(type);
3943 	struct addr addr;
3944 	sljit_ins ins;
3945 
3946 	CHECK_ERROR();
3947 	CHECK(check_sljit_emit_simd_mov(compiler, type, freg, srcdst, srcdstw));
3948 
3949 	ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
3950 
3951 	if (reg_size != 4)
3952 		return SLJIT_ERR_UNSUPPORTED;
3953 
3954 	if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
3955 		return SLJIT_ERR_UNSUPPORTED;
3956 
3957 	if (type & SLJIT_SIMD_TEST)
3958 		return SLJIT_SUCCESS;
3959 
3960 	if (!(srcdst & SLJIT_MEM)) {
3961 		if (type & SLJIT_SIMD_STORE)
3962 			ins = F36(srcdst) | F32(freg);
3963 		else
3964 			ins = F36(freg) | F32(srcdst);
3965 
3966 		return push_inst(compiler, 0xe70000000056 /* vlr */ | ins);
3967 	}
3968 
3969 	FAIL_IF(make_addr_bx(compiler, &addr, srcdst, srcdstw, tmp1));
3970 	ins = F36(freg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset);
3971 
3972 	if (alignment >= 4)
3973 		ins |= 4 << 12;
3974 	else if (alignment == 3)
3975 		ins |= 3 << 12;
3976 
3977 	return push_inst(compiler, ((type & SLJIT_SIMD_STORE) ? 0xe7000000000e /* vst */ : 0xe70000000006 /* vl */) | ins);
3978 }
3979 
sljit_emit_simd_replicate(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_sw srcw)3980 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_replicate(struct sljit_compiler *compiler, sljit_s32 type,
3981 	sljit_s32 freg,
3982 	sljit_s32 src, sljit_sw srcw)
3983 {
3984 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
3985 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
3986 	struct addr addr;
3987 	sljit_gpr reg;
3988 	sljit_sw sign_ext;
3989 
3990 	CHECK_ERROR();
3991 	CHECK(check_sljit_emit_simd_replicate(compiler, type, freg, src, srcw));
3992 
3993 	ADJUST_LOCAL_OFFSET(src, srcw);
3994 
3995 	if (reg_size != 4)
3996 		return SLJIT_ERR_UNSUPPORTED;
3997 
3998 	if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
3999 		return SLJIT_ERR_UNSUPPORTED;
4000 
4001 	if (type & SLJIT_SIMD_TEST)
4002 		return SLJIT_SUCCESS;
4003 
4004 	if (src & SLJIT_MEM) {
4005 		FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));
4006 		return push_inst(compiler, 0xe70000000005 /* vlrep */ | F36(freg)
4007 			| R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset) | ((sljit_ins)elem_size << 12));
4008 	}
4009 
4010 	if (type & SLJIT_SIMD_FLOAT) {
4011 		if (src == SLJIT_IMM)
4012 			return push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(freg));
4013 
4014 		return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(freg) | F32(src) | ((sljit_ins)elem_size << 12));
4015 	}
4016 
4017 	if (src == SLJIT_IMM) {
4018 		sign_ext = 0x10000;
4019 
4020 		switch (elem_size) {
4021 		case 0:
4022 			srcw &= 0xff;
4023 			sign_ext = (sljit_s8)srcw;
4024 			break;
4025 		case 1:
4026 			srcw &= 0xffff;
4027 			sign_ext = (sljit_s16)srcw;
4028 			break;
4029 		case 2:
4030 			if ((sljit_s32)srcw == (sljit_s16)srcw) {
4031 				srcw &= 0xffff;
4032 				sign_ext = (sljit_s16)srcw;
4033 			} else
4034 				srcw &= 0xffffffff;
4035 			break;
4036 		default:
4037 			if (srcw == (sljit_s16)srcw) {
4038 				srcw &= 0xffff;
4039 				sign_ext = (sljit_s16)srcw;
4040 			}
4041 			break;
4042 		}
4043 
4044 		if (sign_ext != 0x10000) {
4045 			if (sign_ext == 0 || sign_ext == -1)
4046 				return push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(freg)
4047 					| (sign_ext == 0 ? 0 : ((sljit_ins)0xffff << 16)));
4048 
4049 			return push_inst(compiler, 0xe70000000045 /* vrepi */ | F36(freg)
4050 				| ((sljit_ins)srcw << 16) | ((sljit_ins)elem_size << 12));
4051 		}
4052 
4053 		push_load_imm_inst(compiler, tmp0, srcw);
4054 		reg = tmp0;
4055 	} else
4056 		reg = gpr(src);
4057 
4058 	FAIL_IF(push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(freg) | R32A(reg) | ((sljit_ins)elem_size << 12)));
4059 	return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(freg) | F32(freg) | ((sljit_ins)elem_size << 12));
4060 }
4061 
sljit_emit_simd_lane_mov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 lane_index,sljit_s32 srcdst,sljit_sw srcdstw)4062 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_mov(struct sljit_compiler *compiler, sljit_s32 type,
4063 	sljit_s32 freg, sljit_s32 lane_index,
4064 	sljit_s32 srcdst, sljit_sw srcdstw)
4065 {
4066 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4067 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4068 	struct addr addr;
4069 	sljit_gpr reg;
4070 	sljit_ins ins = 0;
4071 
4072 	CHECK_ERROR();
4073 	CHECK(check_sljit_emit_simd_lane_mov(compiler, type, freg, lane_index, srcdst, srcdstw));
4074 
4075 	ADJUST_LOCAL_OFFSET(srcdst, srcdstw);
4076 
4077 	if (reg_size != 4)
4078 		return SLJIT_ERR_UNSUPPORTED;
4079 
4080 	if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
4081 		return SLJIT_ERR_UNSUPPORTED;
4082 
4083 	if (type & SLJIT_SIMD_TEST)
4084 		return SLJIT_SUCCESS;
4085 
4086 	if (srcdst & SLJIT_MEM) {
4087 		FAIL_IF(make_addr_bx(compiler, &addr, srcdst, srcdstw, tmp1));
4088 		ins = F36(freg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset);
4089 	}
4090 
4091 	if (type & SLJIT_SIMD_LANE_ZERO) {
4092 		if ((srcdst & SLJIT_MEM) && lane_index == ((1 << (3 - elem_size)) - 1))
4093 			return push_inst(compiler, 0xe70000000004 /* vllez */ | ins | ((sljit_ins)elem_size << 12));
4094 
4095 		if ((type & SLJIT_SIMD_FLOAT) && freg == srcdst) {
4096 			FAIL_IF(push_inst(compiler, 0xe70000000056 /* vlr */ | F36(TMP_FREG1) | F32(freg)));
4097 			srcdst = TMP_FREG1;
4098 			srcdstw = 0;
4099 		}
4100 
4101 		FAIL_IF(push_inst(compiler, 0xe70000000044 /* vgbm */ | F36(freg)));
4102 	}
4103 
4104 	if (srcdst & SLJIT_MEM) {
4105 		switch (elem_size) {
4106 		case 0:
4107 			ins |= 0xe70000000000 /* vleb */;
4108 			break;
4109 		case 1:
4110 			ins |= 0xe70000000001 /* vleh */;
4111 			break;
4112 		case 2:
4113 			ins |= 0xe70000000003 /* vlef */;
4114 			break;
4115 		default:
4116 			ins |= 0xe70000000002 /* vleg */;
4117 			break;
4118 		}
4119 
4120 		/* Convert to vsteb - vsteg  */
4121 		if (type & SLJIT_SIMD_STORE)
4122 			ins |= 0x8;
4123 
4124 		return push_inst(compiler, ins | ((sljit_ins)lane_index << 12));
4125 	}
4126 
4127 	if (type & SLJIT_SIMD_FLOAT) {
4128 		if (type & SLJIT_SIMD_STORE)
4129 			return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(srcdst) | F32(freg) | ((sljit_ins)lane_index << 16) | ((sljit_ins)elem_size << 12));
4130 
4131 		if (elem_size == 3) {
4132 			if (lane_index == 0)
4133 				ins = F32(srcdst) | F28(freg) | (1 << 12);
4134 			else
4135 				ins = F32(freg) | F28(srcdst);
4136 
4137 			return push_inst(compiler, 0xe70000000084 /* vpdi */ | F36(freg) | ins);
4138 		}
4139 
4140 		FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(tmp0) | F32(srcdst) | ((sljit_ins)2 << 12)));
4141 		return push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(freg) | R32A(tmp0) | ((sljit_ins)lane_index << 16) | ((sljit_ins)2 << 12));
4142 	}
4143 
4144 	if (srcdst == SLJIT_IMM) {
4145 		switch (elem_size) {
4146 		case 0:
4147 			ins = 0xe70000000040 /* vleib */;
4148 			srcdstw &= 0xff;
4149 			break;
4150 		case 1:
4151 			ins = 0xe70000000041 /* vleih */;
4152 			srcdstw &= 0xffff;
4153 			break;
4154 		case 2:
4155 			if ((sljit_s32)srcdstw == (sljit_s16)srcdstw) {
4156 				srcdstw &= 0xffff;
4157 				ins = 0xe70000000043 /* vleif */;
4158 			} else
4159 				srcdstw &= 0xffffffff;
4160 			break;
4161 		default:
4162 			if (srcdstw == (sljit_s16)srcdstw) {
4163 				srcdstw &= 0xffff;
4164 				ins = 0xe70000000042 /* vleig */;
4165 			}
4166 			break;
4167 		}
4168 
4169 		if (ins != 0)
4170 			return push_inst(compiler, ins | F36(freg) | ((sljit_ins)srcdstw << 16) | ((sljit_ins)lane_index << 12));
4171 
4172 		push_load_imm_inst(compiler, tmp0, srcdstw);
4173 		reg = tmp0;
4174 	} else
4175 		reg = gpr(srcdst);
4176 
4177 	ins = ((sljit_ins)lane_index << 16) | ((sljit_ins)elem_size << 12);
4178 
4179 	if (!(type & SLJIT_SIMD_STORE))
4180 		return push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(freg) | R32A(reg) | ins);
4181 
4182 	FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(reg) | F32(freg) | ins));
4183 
4184 	if (!(type & SLJIT_SIMD_LANE_SIGNED) || elem_size >= 3)
4185 		return SLJIT_SUCCESS;
4186 
4187 	switch (elem_size) {
4188 	case 0:
4189 		ins = 0xb9060000 /* lgbr */;
4190 		break;
4191 	case 1:
4192 		ins = 0xb9070000 /* lghr */;
4193 		break;
4194 	default:
4195 		ins = 0xb9140000 /* lgfr */;
4196 		break;
4197 	}
4198 
4199 	return push_inst(compiler, ins | R4A(reg) | R0A(reg));
4200 }
4201 
sljit_emit_simd_lane_replicate(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_s32 src_lane_index)4202 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_lane_replicate(struct sljit_compiler *compiler, sljit_s32 type,
4203 	sljit_s32 freg,
4204 	sljit_s32 src, sljit_s32 src_lane_index)
4205 {
4206 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4207 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4208 
4209 	CHECK_ERROR();
4210 	CHECK(check_sljit_emit_simd_lane_replicate(compiler, type, freg, src, src_lane_index));
4211 
4212 	if (reg_size != 4)
4213 		return SLJIT_ERR_UNSUPPORTED;
4214 
4215 	if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
4216 		return SLJIT_ERR_UNSUPPORTED;
4217 
4218 	if (type & SLJIT_SIMD_TEST)
4219 		return SLJIT_SUCCESS;
4220 
4221 	return push_inst(compiler, 0xe7000000004d /* vrep */ | F36(freg) | F32(src)
4222 		| ((sljit_ins)src_lane_index << 16) | ((sljit_ins)elem_size << 12));
4223 }
4224 
sljit_emit_simd_extend(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 src,sljit_sw srcw)4225 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_extend(struct sljit_compiler *compiler, sljit_s32 type,
4226 	sljit_s32 freg,
4227 	sljit_s32 src, sljit_sw srcw)
4228 {
4229 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4230 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4231 	sljit_s32 elem2_size = SLJIT_SIMD_GET_ELEM2_SIZE(type);
4232 	struct addr addr;
4233 	sljit_ins ins;
4234 
4235 	CHECK_ERROR();
4236 	CHECK(check_sljit_emit_simd_extend(compiler, type, freg, src, srcw));
4237 
4238 	ADJUST_LOCAL_OFFSET(src, srcw);
4239 
4240 	if (reg_size != 4)
4241 		return SLJIT_ERR_UNSUPPORTED;
4242 
4243 	if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
4244 		return SLJIT_ERR_UNSUPPORTED;
4245 
4246 	if (type & SLJIT_SIMD_TEST)
4247 		return SLJIT_SUCCESS;
4248 
4249 	if (src & SLJIT_MEM) {
4250 		FAIL_IF(make_addr_bx(compiler, &addr, src, srcw, tmp1));
4251 		ins = F36(freg) | R32A(addr.index) | R28A(addr.base) | disp_s20(addr.offset);
4252 
4253 		switch (elem2_size - elem_size) {
4254 		case 1:
4255 			ins |= 0xe70000000002 /* vleg */;
4256 			break;
4257 		case 2:
4258 			ins |= 0xe70000000003 /* vlef */;
4259 			break;
4260 		default:
4261 			ins |= 0xe70000000001 /* vleh */;
4262 			break;
4263 		}
4264 
4265 		FAIL_IF(push_inst(compiler, ins));
4266 		src = freg;
4267 	}
4268 
4269 	if (type & SLJIT_SIMD_FLOAT) {
4270 		FAIL_IF(push_inst(compiler, 0xe700000000d5 /* vuplh */ | F36(freg) | F32(src) | (2 << 12)));
4271 		FAIL_IF(push_inst(compiler, 0xe70000000030 /* vesl */ | F36(freg) | F32(freg) | (32 << 16) | (3 << 12)));
4272 		return push_inst(compiler, 0xe700000000c4 /* vfll */ | F36(freg) | F32(freg) | (2 << 12));
4273 	}
4274 
4275 	ins = ((type & SLJIT_SIMD_EXTEND_SIGNED) ? 0xe700000000d7 /* vuph */ : 0xe700000000d5 /* vuplh */) | F36(freg);
4276 
4277 	do {
4278 		FAIL_IF(push_inst(compiler, ins | F32(src) | ((sljit_ins)elem_size << 12)));
4279 		src = freg;
4280 	} while (++elem_size < elem2_size);
4281 
4282 	return SLJIT_SUCCESS;
4283 }
4284 
sljit_emit_simd_sign(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 freg,sljit_s32 dst,sljit_sw dstw)4285 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_sign(struct sljit_compiler *compiler, sljit_s32 type,
4286 	sljit_s32 freg,
4287 	sljit_s32 dst, sljit_sw dstw)
4288 {
4289 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4290 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4291 	sljit_gpr dst_r;
4292 
4293 	CHECK_ERROR();
4294 	CHECK(check_sljit_emit_simd_sign(compiler, type, freg, dst, dstw));
4295 
4296 	ADJUST_LOCAL_OFFSET(dst, dstw);
4297 
4298 	if (reg_size != 4)
4299 		return SLJIT_ERR_UNSUPPORTED;
4300 
4301 	if ((type & SLJIT_SIMD_FLOAT) && elem_size < 2)
4302 		return SLJIT_ERR_UNSUPPORTED;
4303 
4304 	if (type & SLJIT_SIMD_TEST)
4305 		return SLJIT_SUCCESS;
4306 
4307 	switch (elem_size) {
4308 	case 0:
4309 		push_load_imm_inst(compiler, tmp0, (sljit_sw)0x4048505860687078);
4310 		push_load_imm_inst(compiler, tmp1, (sljit_sw)0x0008101820283038);
4311 		FAIL_IF(push_inst(compiler, 0xe70000000062 /* vlvgp */ | F36(TMP_FREG1) | R32A(tmp1) | R28A(tmp0)));
4312 		break;
4313 	case 1:
4314 		push_load_imm_inst(compiler, tmp0, (sljit_sw)0x0010203040506070);
4315 		break;
4316 	case 2:
4317 		push_load_imm_inst(compiler, tmp0, (sljit_sw)0x8080808000204060);
4318 		break;
4319 	default:
4320 		push_load_imm_inst(compiler, tmp0, (sljit_sw)0x8080808080800040);
4321 		break;
4322 	}
4323 
4324 	if (elem_size != 0)
4325 		FAIL_IF(push_inst(compiler, 0xe70000000022 /* vlvg */ | F36(TMP_FREG1) | R32A(tmp0) | (1 << 16) | (3 << 12)));
4326 
4327 	FAIL_IF(push_inst(compiler, 0xe70000000085 /* vbperm */ | F36(TMP_FREG1) | F32(freg) | F28(TMP_FREG1)));
4328 
4329 	dst_r = FAST_IS_REG(dst) ? gpr(dst) : tmp0;
4330 	FAIL_IF(push_inst(compiler, 0xe70000000021 /* vlgv */ | R36A(dst_r) | F32(TMP_FREG1)
4331 		| (elem_size == 0 ? ((3 << 16) | (1 << 12)) : (7 << 16))));
4332 
4333 	if (dst_r == tmp0)
4334 		return store_word(compiler, tmp0, dst, dstw, type & SLJIT_32);
4335 
4336 	return SLJIT_SUCCESS;
4337 }
4338 
sljit_emit_simd_op2(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_freg,sljit_s32 src1_freg,sljit_s32 src2_freg)4339 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_simd_op2(struct sljit_compiler *compiler, sljit_s32 type,
4340 	sljit_s32 dst_freg, sljit_s32 src1_freg, sljit_s32 src2_freg)
4341 {
4342 	sljit_s32 reg_size = SLJIT_SIMD_GET_REG_SIZE(type);
4343 	sljit_s32 elem_size = SLJIT_SIMD_GET_ELEM_SIZE(type);
4344 	sljit_ins ins = 0;
4345 
4346 	CHECK_ERROR();
4347 	CHECK(check_sljit_emit_simd_op2(compiler, type, dst_freg, src1_freg, src2_freg));
4348 
4349 	if (reg_size != 4)
4350 		return SLJIT_ERR_UNSUPPORTED;
4351 
4352 	if ((type & SLJIT_SIMD_FLOAT) && (elem_size < 2 || elem_size > 3))
4353 		return SLJIT_ERR_UNSUPPORTED;
4354 
4355 	if (type & SLJIT_SIMD_TEST)
4356 		return SLJIT_SUCCESS;
4357 
4358 	switch (SLJIT_SIMD_GET_OPCODE(type)) {
4359 	case SLJIT_SIMD_OP2_AND:
4360 		ins = 0xe70000000068 /* vn */;
4361 		break;
4362 	case SLJIT_SIMD_OP2_OR:
4363 		ins = 0xe7000000006a /* vo */;
4364 		break;
4365 	case SLJIT_SIMD_OP2_XOR:
4366 		ins = 0xe7000000006d /* vx */;
4367 		break;
4368 	}
4369 
4370 	if (type & SLJIT_SIMD_TEST)
4371 		return SLJIT_SUCCESS;
4372 
4373 	return push_inst(compiler, ins | F36(dst_freg) | F32(src1_freg) | F28(src2_freg));
4374 }
4375 
sljit_emit_atomic_load(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst_reg,sljit_s32 mem_reg)4376 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_load(struct sljit_compiler *compiler, sljit_s32 op,
4377 	sljit_s32 dst_reg,
4378 	sljit_s32 mem_reg)
4379 {
4380 	CHECK_ERROR();
4381 	CHECK(check_sljit_emit_atomic_load(compiler, op, dst_reg, mem_reg));
4382 
4383 	SLJIT_SKIP_CHECKS(compiler);
4384 	return sljit_emit_op1(compiler, op, dst_reg, 0, SLJIT_MEM1(mem_reg), 0);
4385 }
4386 
sljit_emit_atomic_store(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src_reg,sljit_s32 mem_reg,sljit_s32 temp_reg)4387 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_atomic_store(struct sljit_compiler *compiler, sljit_s32 op,
4388 	sljit_s32 src_reg,
4389 	sljit_s32 mem_reg,
4390 	sljit_s32 temp_reg)
4391 {
4392 	sljit_ins mask;
4393 	sljit_gpr tmp_r = gpr(temp_reg);
4394 	sljit_gpr mem_r = gpr(mem_reg);
4395 
4396 	CHECK_ERROR();
4397 	CHECK(check_sljit_emit_atomic_store(compiler, op, src_reg, mem_reg, temp_reg));
4398 
4399 	switch (GET_OPCODE(op)) {
4400 	case SLJIT_MOV32:
4401 	case SLJIT_MOV_U32:
4402 		return push_inst(compiler, 0xba000000 /* cs */ | R20A(tmp_r) | R16A(gpr(src_reg)) | R12A(mem_r));
4403 	case SLJIT_MOV_U8:
4404 		mask = 0xff;
4405 		break;
4406 	case SLJIT_MOV_U16:
4407 		mask = 0xffff;
4408 		break;
4409 	default:
4410 		return push_inst(compiler, 0xeb0000000030 /* csg */ | R36A(tmp_r) | R32A(gpr(src_reg)) | R28A(mem_r));
4411 	}
4412 
4413 	/* tmp0 = (src_reg ^ tmp_r) & mask */
4414 	FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | mask));
4415 	FAIL_IF(push_inst(compiler, 0xb9e70000 /* xgrk */ | R4A(tmp0) | R0A(gpr(src_reg)) | R12A(tmp_r)));
4416 	FAIL_IF(push_inst(compiler, 0xa7090000 /* lghi */ | R20A(tmp_r) | 0xfffc));
4417 	FAIL_IF(push_inst(compiler, 0xb9800000 /* ngr */ | R4A(tmp0) | R0A(tmp1)));
4418 
4419 	/* tmp0 = tmp0 << (((mem_r ^ 0x3) & 0x3) << 3) */
4420 	FAIL_IF(push_inst(compiler, 0xa50f0000 /* llill */ | R20A(tmp1) | (sljit_ins)((mask == 0xff) ? 0x18 : 0x10)));
4421 	FAIL_IF(push_inst(compiler, 0xb9800000 /* ngr */ | R4A(tmp_r) | R0A(mem_r)));
4422 	FAIL_IF(push_inst(compiler, 0xec0000000057 /* rxsbg */ | R36A(tmp1) | R32A(mem_r) | (59 << 24) | (60 << 16) | (3 << 8)));
4423 	FAIL_IF(push_inst(compiler, 0xeb000000000d /* sllg */ | R36A(tmp0) | R32A(tmp0) | R28A(tmp1)));
4424 
4425 	/* Already computed: tmp_r = mem_r & ~0x3 */
4426 
4427 	FAIL_IF(push_inst(compiler, 0x58000000 /* l */ | R20A(tmp1) | R12A(tmp_r)));
4428 	FAIL_IF(push_inst(compiler, 0x1700 /* x */ | R4A(tmp0) | R0A(tmp1)));
4429 	return push_inst(compiler, 0xba000000 /* cs */ | R20A(tmp1) | R16A(tmp0) | R12A(tmp_r));
4430 }
4431 
4432 /* --------------------------------------------------------------------- */
4433 /*  Other instructions                                                   */
4434 /* --------------------------------------------------------------------- */
4435 
4436 /* On s390x we build a literal pool to hold constants. This has two main
4437    advantages:
4438 
4439      1. we only need one instruction in the instruction stream (LGRL)
4440      2. we can store 64 bit addresses and use 32 bit offsets
4441 
4442    To retrofit the extra information needed to build the literal pool we
4443    add a new sljit_s390x_const struct that contains the initial value but
4444    can still be cast to a sljit_const. */
4445 
sljit_emit_const(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_sw init_value)4446 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
4447 {
4448 	struct sljit_s390x_const *const_;
4449 	sljit_gpr dst_r;
4450 
4451 	CHECK_ERROR_PTR();
4452 	CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
4453 
4454 	const_ = (struct sljit_s390x_const*)ensure_abuf(compiler,
4455 					sizeof(struct sljit_s390x_const));
4456 	PTR_FAIL_IF(!const_);
4457 	set_const((struct sljit_const*)const_, compiler);
4458 	const_->init_value = init_value;
4459 
4460 	dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
4461 	if (have_genext())
4462 		PTR_FAIL_IF(push_inst(compiler, lgrl(dst_r, 0)));
4463 	else {
4464 		PTR_FAIL_IF(push_inst(compiler, larl(tmp1, 0)));
4465 		PTR_FAIL_IF(push_inst(compiler, lg(dst_r, 0, r0, tmp1)));
4466 	}
4467 
4468 	if (dst & SLJIT_MEM)
4469 		PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, 0 /* always 64-bit */));
4470 
4471 	return (struct sljit_const*)const_;
4472 }
4473 
sljit_set_jump_addr(sljit_uw addr,sljit_uw new_target,sljit_sw executable_offset)4474 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_jump_addr(sljit_uw addr, sljit_uw new_target, sljit_sw executable_offset)
4475 {
4476 	/* Update the constant pool. */
4477 	sljit_uw *ptr = (sljit_uw *)addr;
4478 	SLJIT_UNUSED_ARG(executable_offset);
4479 
4480 	SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 0);
4481 	*ptr = new_target;
4482 	SLJIT_UPDATE_WX_FLAGS(ptr, ptr + 1, 1);
4483 	SLJIT_CACHE_FLUSH(ptr, ptr + 1);
4484 }
4485 
sljit_set_const(sljit_uw addr,sljit_sw new_constant,sljit_sw executable_offset)4486 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_const(sljit_uw addr, sljit_sw new_constant, sljit_sw executable_offset)
4487 {
4488 	sljit_set_jump_addr(addr, (sljit_uw)new_constant, executable_offset);
4489 }
4490 
sljit_emit_mov_addr(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)4491 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_mov_addr(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
4492 {
4493 	struct sljit_jump *jump;
4494 	sljit_gpr dst_r;
4495 
4496 	CHECK_ERROR_PTR();
4497 	CHECK_PTR(check_sljit_emit_mov_addr(compiler, dst, dstw));
4498 	ADJUST_LOCAL_OFFSET(dst, dstw);
4499 
4500 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
4501 	PTR_FAIL_IF(!jump);
4502 	set_mov_addr(jump, compiler, 0);
4503 
4504 	dst_r = FAST_IS_REG(dst) ? gpr(dst & REG_MASK) : tmp0;
4505 
4506 	if (have_genext())
4507 		PTR_FAIL_IF(push_inst(compiler, lgrl(dst_r, 0)));
4508 	else {
4509 		PTR_FAIL_IF(push_inst(compiler, larl(tmp1, 0)));
4510 		PTR_FAIL_IF(push_inst(compiler, lg(dst_r, 0, r0, tmp1)));
4511 	}
4512 
4513 	if (dst & SLJIT_MEM)
4514 		PTR_FAIL_IF(store_word(compiler, dst_r, dst, dstw, 0));
4515 
4516 	return jump;
4517 }
4518 
4519 /* TODO(carenas): EVAL probably should move up or be refactored */
4520 #undef WHEN2
4521 #undef EVAL
4522 
4523 #undef tmp1
4524 #undef tmp0
4525 
4526 /* TODO(carenas): undef other macros that spill like is_u12? */
4527