1 /*
2  *    Stack-less Just-In-Time compiler
3  *
4  *    Copyright Zoltan Herczeg (hzmester@freemail.hu). All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without modification, are
7  * permitted provided that the following conditions are met:
8  *
9  *   1. Redistributions of source code must retain the above copyright notice, this list of
10  *      conditions and the following disclaimer.
11  *
12  *   2. Redistributions in binary form must reproduce the above copyright notice, this list
13  *      of conditions and the following disclaimer in the documentation and/or other materials
14  *      provided with the distribution.
15  *
16  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER(S) AND CONTRIBUTORS ``AS IS'' AND ANY
17  * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
18  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT
19  * SHALL THE COPYRIGHT HOLDER(S) OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
20  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
21  * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
22  * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
23  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
24  * ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25  */
26 
sljit_get_platform_name(void)27 SLJIT_API_FUNC_ATTRIBUTE const char* sljit_get_platform_name(void)
28 {
29 	return "PowerPC" SLJIT_CPUINFO;
30 }
31 
32 /* Length of an instruction word.
33    Both for ppc-32 and ppc-64. */
34 typedef sljit_u32 sljit_ins;
35 
36 #if ((defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32) && (defined _AIX)) \
37 	|| (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
38 #define SLJIT_PPC_STACK_FRAME_V2 1
39 #endif
40 
41 #ifdef _AIX
42 #include <sys/cache.h>
43 #endif
44 
45 #if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
46 #define SLJIT_PASS_ENTRY_ADDR_TO_CALL 1
47 #endif
48 
49 #if (defined SLJIT_CACHE_FLUSH_OWN_IMPL && SLJIT_CACHE_FLUSH_OWN_IMPL)
50 
ppc_cache_flush(sljit_ins * from,sljit_ins * to)51 static void ppc_cache_flush(sljit_ins *from, sljit_ins *to)
52 {
53 #ifdef _AIX
54 	_sync_cache_range((caddr_t)from, (int)((size_t)to - (size_t)from));
55 #elif defined(__GNUC__) || (defined(__IBM_GCC_ASM) && __IBM_GCC_ASM)
56 #	if defined(_ARCH_PWR) || defined(_ARCH_PWR2)
57 	/* Cache flush for POWER architecture. */
58 	while (from < to) {
59 		__asm__ volatile (
60 			"clf 0, %0\n"
61 			"dcs\n"
62 			: : "r"(from)
63 		);
64 		from++;
65 	}
66 	__asm__ volatile ( "ics" );
67 #	elif defined(_ARCH_COM) && !defined(_ARCH_PPC)
68 #	error "Cache flush is not implemented for PowerPC/POWER common mode."
69 #	else
70 	/* Cache flush for PowerPC architecture. */
71 	while (from < to) {
72 		__asm__ volatile (
73 			"dcbf 0, %0\n"
74 			"sync\n"
75 			"icbi 0, %0\n"
76 			: : "r"(from)
77 		);
78 		from++;
79 	}
80 	__asm__ volatile ( "isync" );
81 #	endif
82 #	ifdef __xlc__
83 #	warning "This file may fail to compile if -qfuncsect is used"
84 #	endif
85 #elif defined(__xlc__)
86 #error "Please enable GCC syntax for inline assembly statements with -qasm=gcc"
87 #else
88 #error "This platform requires a cache flush implementation."
89 #endif /* _AIX */
90 }
91 
92 #endif /* (defined SLJIT_CACHE_FLUSH_OWN_IMPL && SLJIT_CACHE_FLUSH_OWN_IMPL) */
93 
94 #define TMP_REG1	(SLJIT_NUMBER_OF_REGISTERS + 2)
95 #define TMP_REG2	(SLJIT_NUMBER_OF_REGISTERS + 3)
96 #define TMP_REG3	(SLJIT_NUMBER_OF_REGISTERS + 4)
97 #define TMP_ZERO	(SLJIT_NUMBER_OF_REGISTERS + 5)
98 
99 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
100 #define TMP_CALL_REG	(SLJIT_NUMBER_OF_REGISTERS + 6)
101 #else
102 #define TMP_CALL_REG	TMP_REG2
103 #endif
104 
105 #define TMP_FREG1	(0)
106 #define TMP_FREG2	(SLJIT_NUMBER_OF_FLOAT_REGISTERS + 1)
107 
108 static const sljit_u8 reg_map[SLJIT_NUMBER_OF_REGISTERS + 7] = {
109 	0, 3, 4, 5, 6, 7, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 1, 8, 9, 10, 31, 12
110 };
111 
112 /* --------------------------------------------------------------------- */
113 /*  Instrucion forms                                                     */
114 /* --------------------------------------------------------------------- */
115 #define D(d)		(reg_map[d] << 21)
116 #define S(s)		(reg_map[s] << 21)
117 #define A(a)		(reg_map[a] << 16)
118 #define B(b)		(reg_map[b] << 11)
119 #define C(c)		(reg_map[c] << 6)
120 #define FD(fd)		((fd) << 21)
121 #define FS(fs)		((fs) << 21)
122 #define FA(fa)		((fa) << 16)
123 #define FB(fb)		((fb) << 11)
124 #define FC(fc)		((fc) << 6)
125 #define IMM(imm)	((imm) & 0xffff)
126 #define CRD(d)		((d) << 21)
127 
128 /* Instruction bit sections.
129    OE and Rc flag (see ALT_SET_FLAGS). */
130 #define OE(flags)	((flags) & ALT_SET_FLAGS)
131 /* Rc flag (see ALT_SET_FLAGS). */
132 #define RC(flags)	(((flags) & ALT_SET_FLAGS) >> 10)
133 #define HI(opcode)	((opcode) << 26)
134 #define LO(opcode)	((opcode) << 1)
135 
136 #define ADD		(HI(31) | LO(266))
137 #define ADDC		(HI(31) | LO(10))
138 #define ADDE		(HI(31) | LO(138))
139 #define ADDI		(HI(14))
140 #define ADDIC		(HI(13))
141 #define ADDIS		(HI(15))
142 #define ADDME		(HI(31) | LO(234))
143 #define AND		(HI(31) | LO(28))
144 #define ANDI		(HI(28))
145 #define ANDIS		(HI(29))
146 #define Bx		(HI(18))
147 #define BCx		(HI(16))
148 #define BCCTR		(HI(19) | LO(528) | (3 << 11))
149 #define BLR		(HI(19) | LO(16) | (0x14 << 21))
150 #define CNTLZD		(HI(31) | LO(58))
151 #define CNTLZW		(HI(31) | LO(26))
152 #define CMP		(HI(31) | LO(0))
153 #define CMPI		(HI(11))
154 #define CMPL		(HI(31) | LO(32))
155 #define CMPLI		(HI(10))
156 #define CROR		(HI(19) | LO(449))
157 #define DCBT		(HI(31) | LO(278))
158 #define DIVD		(HI(31) | LO(489))
159 #define DIVDU		(HI(31) | LO(457))
160 #define DIVW		(HI(31) | LO(491))
161 #define DIVWU		(HI(31) | LO(459))
162 #define EXTSB		(HI(31) | LO(954))
163 #define EXTSH		(HI(31) | LO(922))
164 #define EXTSW		(HI(31) | LO(986))
165 #define FABS		(HI(63) | LO(264))
166 #define FADD		(HI(63) | LO(21))
167 #define FADDS		(HI(59) | LO(21))
168 #define FCFID		(HI(63) | LO(846))
169 #define FCMPU		(HI(63) | LO(0))
170 #define FCTIDZ		(HI(63) | LO(815))
171 #define FCTIWZ		(HI(63) | LO(15))
172 #define FDIV		(HI(63) | LO(18))
173 #define FDIVS		(HI(59) | LO(18))
174 #define FMR		(HI(63) | LO(72))
175 #define FMUL		(HI(63) | LO(25))
176 #define FMULS		(HI(59) | LO(25))
177 #define FNEG		(HI(63) | LO(40))
178 #define FRSP		(HI(63) | LO(12))
179 #define FSUB		(HI(63) | LO(20))
180 #define FSUBS		(HI(59) | LO(20))
181 #define LD		(HI(58) | 0)
182 #define LWZ		(HI(32))
183 #define MFCR		(HI(31) | LO(19))
184 #define MFLR		(HI(31) | LO(339) | 0x80000)
185 #define MFXER		(HI(31) | LO(339) | 0x10000)
186 #define MTCTR		(HI(31) | LO(467) | 0x90000)
187 #define MTLR		(HI(31) | LO(467) | 0x80000)
188 #define MTXER		(HI(31) | LO(467) | 0x10000)
189 #define MULHD		(HI(31) | LO(73))
190 #define MULHDU		(HI(31) | LO(9))
191 #define MULHW		(HI(31) | LO(75))
192 #define MULHWU		(HI(31) | LO(11))
193 #define MULLD		(HI(31) | LO(233))
194 #define MULLI		(HI(7))
195 #define MULLW		(HI(31) | LO(235))
196 #define NEG		(HI(31) | LO(104))
197 #define NOP		(HI(24))
198 #define NOR		(HI(31) | LO(124))
199 #define OR		(HI(31) | LO(444))
200 #define ORI		(HI(24))
201 #define ORIS		(HI(25))
202 #define RLDICL		(HI(30))
203 #define RLWINM		(HI(21))
204 #define SLD		(HI(31) | LO(27))
205 #define SLW		(HI(31) | LO(24))
206 #define SRAD		(HI(31) | LO(794))
207 #define SRADI		(HI(31) | LO(413 << 1))
208 #define SRAW		(HI(31) | LO(792))
209 #define SRAWI		(HI(31) | LO(824))
210 #define SRD		(HI(31) | LO(539))
211 #define SRW		(HI(31) | LO(536))
212 #define STD		(HI(62) | 0)
213 #define STDU		(HI(62) | 1)
214 #define STDUX		(HI(31) | LO(181))
215 #define STFIWX		(HI(31) | LO(983))
216 #define STW		(HI(36))
217 #define STWU		(HI(37))
218 #define STWUX		(HI(31) | LO(183))
219 #define SUBF		(HI(31) | LO(40))
220 #define SUBFC		(HI(31) | LO(8))
221 #define SUBFE		(HI(31) | LO(136))
222 #define SUBFIC		(HI(8))
223 #define XOR		(HI(31) | LO(316))
224 #define XORI		(HI(26))
225 #define XORIS		(HI(27))
226 
227 #define SIMM_MAX	(0x7fff)
228 #define SIMM_MIN	(-0x8000)
229 #define UIMM_MAX	(0xffff)
230 
231 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
sljit_set_function_context(void ** func_ptr,struct sljit_function_context * context,sljit_sw addr,void * func)232 SLJIT_API_FUNC_ATTRIBUTE void sljit_set_function_context(void** func_ptr, struct sljit_function_context* context, sljit_sw addr, void* func)
233 {
234 	sljit_sw* ptrs;
235 	if (func_ptr)
236 		*func_ptr = (void*)context;
237 	ptrs = (sljit_sw*)func;
238 	context->addr = addr ? addr : ptrs[0];
239 	context->r2 = ptrs[1];
240 	context->r11 = ptrs[2];
241 }
242 #endif
243 
push_inst(struct sljit_compiler * compiler,sljit_ins ins)244 static sljit_s32 push_inst(struct sljit_compiler *compiler, sljit_ins ins)
245 {
246 	sljit_ins *ptr = (sljit_ins*)ensure_buf(compiler, sizeof(sljit_ins));
247 	FAIL_IF(!ptr);
248 	*ptr = ins;
249 	compiler->size++;
250 	return SLJIT_SUCCESS;
251 }
252 
detect_jump_type(struct sljit_jump * jump,sljit_ins * code_ptr,sljit_ins * code,sljit_sw executable_offset)253 static SLJIT_INLINE sljit_s32 detect_jump_type(struct sljit_jump *jump, sljit_ins *code_ptr, sljit_ins *code, sljit_sw executable_offset)
254 {
255 	sljit_sw diff;
256 	sljit_uw target_addr;
257 	sljit_sw extra_jump_flags;
258 
259 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
260 	if (jump->flags & (SLJIT_REWRITABLE_JUMP | IS_CALL))
261 		return 0;
262 #else
263 	if (jump->flags & SLJIT_REWRITABLE_JUMP)
264 		return 0;
265 #endif
266 
267 	if (jump->flags & JUMP_ADDR)
268 		target_addr = jump->u.target;
269 	else {
270 		SLJIT_ASSERT(jump->flags & JUMP_LABEL);
271 		target_addr = (sljit_uw)(code + jump->u.label->size) + (sljit_uw)executable_offset;
272 	}
273 
274 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL) && (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
275 	if (jump->flags & IS_CALL)
276 		goto keep_address;
277 #endif
278 
279 	diff = ((sljit_sw)target_addr - (sljit_sw)(code_ptr) - executable_offset) & ~0x3l;
280 
281 	extra_jump_flags = 0;
282 	if (jump->flags & IS_COND) {
283 		if (diff <= 0x7fff && diff >= -0x8000) {
284 			jump->flags |= PATCH_B;
285 			return 1;
286 		}
287 		if (target_addr <= 0xffff) {
288 			jump->flags |= PATCH_B | PATCH_ABS_B;
289 			return 1;
290 		}
291 		extra_jump_flags = REMOVE_COND;
292 
293 		diff -= sizeof(sljit_ins);
294 	}
295 
296 	if (diff <= 0x01ffffff && diff >= -0x02000000) {
297 		jump->flags |= PATCH_B | extra_jump_flags;
298 		return 1;
299 	}
300 
301 	if (target_addr <= 0x03ffffff) {
302 		jump->flags |= PATCH_B | PATCH_ABS_B | extra_jump_flags;
303 		return 1;
304 	}
305 
306 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
307 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
308 keep_address:
309 #endif
310 	if (target_addr <= 0x7fffffff) {
311 		jump->flags |= PATCH_ABS32;
312 		return 1;
313 	}
314 
315 	if (target_addr <= 0x7fffffffffffl) {
316 		jump->flags |= PATCH_ABS48;
317 		return 1;
318 	}
319 #endif
320 
321 	return 0;
322 }
323 
sljit_generate_code(struct sljit_compiler * compiler)324 SLJIT_API_FUNC_ATTRIBUTE void* sljit_generate_code(struct sljit_compiler *compiler)
325 {
326 	struct sljit_memory_fragment *buf;
327 	sljit_ins *code;
328 	sljit_ins *code_ptr;
329 	sljit_ins *buf_ptr;
330 	sljit_ins *buf_end;
331 	sljit_uw word_count;
332 	sljit_sw executable_offset;
333 	sljit_uw addr;
334 
335 	struct sljit_label *label;
336 	struct sljit_jump *jump;
337 	struct sljit_const *const_;
338 
339 	CHECK_ERROR_PTR();
340 	CHECK_PTR(check_sljit_generate_code(compiler));
341 	reverse_buf(compiler);
342 
343 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
344 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
345 	compiler->size += (compiler->size & 0x1) + (sizeof(struct sljit_function_context) / sizeof(sljit_ins));
346 #else
347 	compiler->size += (sizeof(struct sljit_function_context) / sizeof(sljit_ins));
348 #endif
349 #endif
350 	code = (sljit_ins*)SLJIT_MALLOC_EXEC(compiler->size * sizeof(sljit_ins));
351 	PTR_FAIL_WITH_EXEC_IF(code);
352 	buf = compiler->buf;
353 
354 	code_ptr = code;
355 	word_count = 0;
356 	executable_offset = SLJIT_EXEC_OFFSET(code);
357 
358 	label = compiler->labels;
359 	jump = compiler->jumps;
360 	const_ = compiler->consts;
361 
362 	do {
363 		buf_ptr = (sljit_ins*)buf->memory;
364 		buf_end = buf_ptr + (buf->used_size >> 2);
365 		do {
366 			*code_ptr = *buf_ptr++;
367 			SLJIT_ASSERT(!label || label->size >= word_count);
368 			SLJIT_ASSERT(!jump || jump->addr >= word_count);
369 			SLJIT_ASSERT(!const_ || const_->addr >= word_count);
370 			/* These structures are ordered by their address. */
371 			if (label && label->size == word_count) {
372 				/* Just recording the address. */
373 				label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
374 				label->size = code_ptr - code;
375 				label = label->next;
376 			}
377 			if (jump && jump->addr == word_count) {
378 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
379 				jump->addr = (sljit_uw)(code_ptr - 3);
380 #else
381 				jump->addr = (sljit_uw)(code_ptr - 6);
382 #endif
383 				if (detect_jump_type(jump, code_ptr, code, executable_offset)) {
384 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
385 					code_ptr[-3] = code_ptr[0];
386 					code_ptr -= 3;
387 #else
388 					if (jump->flags & PATCH_ABS32) {
389 						code_ptr -= 3;
390 						code_ptr[-1] = code_ptr[2];
391 						code_ptr[0] = code_ptr[3];
392 					}
393 					else if (jump->flags & PATCH_ABS48) {
394 						code_ptr--;
395 						code_ptr[-1] = code_ptr[0];
396 						code_ptr[0] = code_ptr[1];
397 						/* rldicr rX,rX,32,31 -> rX,rX,16,47 */
398 						SLJIT_ASSERT((code_ptr[-3] & 0xfc00ffff) == 0x780007c6);
399 						code_ptr[-3] ^= 0x8422;
400 						/* oris -> ori */
401 						code_ptr[-2] ^= 0x4000000;
402 					}
403 					else {
404 						code_ptr[-6] = code_ptr[0];
405 						code_ptr -= 6;
406 					}
407 #endif
408 					if (jump->flags & REMOVE_COND) {
409 						code_ptr[0] = BCx | (2 << 2) | ((code_ptr[0] ^ (8 << 21)) & 0x03ff0001);
410 						code_ptr++;
411 						jump->addr += sizeof(sljit_ins);
412 						code_ptr[0] = Bx;
413 						jump->flags -= IS_COND;
414 					}
415 				}
416 				jump = jump->next;
417 			}
418 			if (const_ && const_->addr == word_count) {
419 				const_->addr = (sljit_uw)code_ptr;
420 				const_ = const_->next;
421 			}
422 			code_ptr ++;
423 			word_count ++;
424 		} while (buf_ptr < buf_end);
425 
426 		buf = buf->next;
427 	} while (buf);
428 
429 	if (label && label->size == word_count) {
430 		label->addr = (sljit_uw)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
431 		label->size = code_ptr - code;
432 		label = label->next;
433 	}
434 
435 	SLJIT_ASSERT(!label);
436 	SLJIT_ASSERT(!jump);
437 	SLJIT_ASSERT(!const_);
438 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
439 	SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size - (sizeof(struct sljit_function_context) / sizeof(sljit_ins)));
440 #else
441 	SLJIT_ASSERT(code_ptr - code <= (sljit_sw)compiler->size);
442 #endif
443 
444 	jump = compiler->jumps;
445 	while (jump) {
446 		do {
447 			addr = (jump->flags & JUMP_LABEL) ? jump->u.label->addr : jump->u.target;
448 			buf_ptr = (sljit_ins *)jump->addr;
449 
450 			if (jump->flags & PATCH_B) {
451 				if (jump->flags & IS_COND) {
452 					if (!(jump->flags & PATCH_ABS_B)) {
453 						addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset);
454 						SLJIT_ASSERT((sljit_sw)addr <= 0x7fff && (sljit_sw)addr >= -0x8000);
455 						*buf_ptr = BCx | (addr & 0xfffc) | ((*buf_ptr) & 0x03ff0001);
456 					}
457 					else {
458 						SLJIT_ASSERT(addr <= 0xffff);
459 						*buf_ptr = BCx | (addr & 0xfffc) | 0x2 | ((*buf_ptr) & 0x03ff0001);
460 					}
461 				}
462 				else {
463 					if (!(jump->flags & PATCH_ABS_B)) {
464 						addr -= (sljit_uw)SLJIT_ADD_EXEC_OFFSET(buf_ptr, executable_offset);
465 						SLJIT_ASSERT((sljit_sw)addr <= 0x01ffffff && (sljit_sw)addr >= -0x02000000);
466 						*buf_ptr = Bx | (addr & 0x03fffffc) | ((*buf_ptr) & 0x1);
467 					}
468 					else {
469 						SLJIT_ASSERT(addr <= 0x03ffffff);
470 						*buf_ptr = Bx | (addr & 0x03fffffc) | 0x2 | ((*buf_ptr) & 0x1);
471 					}
472 				}
473 				break;
474 			}
475 
476 			/* Set the fields of immediate loads. */
477 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
478 			buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff);
479 			buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff);
480 #else
481 			if (jump->flags & PATCH_ABS32) {
482 				SLJIT_ASSERT(addr <= 0x7fffffff);
483 				buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 16) & 0xffff);
484 				buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | (addr & 0xffff);
485 				break;
486 			}
487 			if (jump->flags & PATCH_ABS48) {
488 				SLJIT_ASSERT(addr <= 0x7fffffffffff);
489 				buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 32) & 0xffff);
490 				buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 16) & 0xffff);
491 				buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | (addr & 0xffff);
492 				break;
493 			}
494 			buf_ptr[0] = (buf_ptr[0] & 0xffff0000) | ((addr >> 48) & 0xffff);
495 			buf_ptr[1] = (buf_ptr[1] & 0xffff0000) | ((addr >> 32) & 0xffff);
496 			buf_ptr[3] = (buf_ptr[3] & 0xffff0000) | ((addr >> 16) & 0xffff);
497 			buf_ptr[4] = (buf_ptr[4] & 0xffff0000) | (addr & 0xffff);
498 #endif
499 		} while (0);
500 		jump = jump->next;
501 	}
502 
503 	compiler->error = SLJIT_ERR_COMPILED;
504 	compiler->executable_offset = executable_offset;
505 	compiler->executable_size = (code_ptr - code) * sizeof(sljit_ins);
506 
507 	code = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code, executable_offset);
508 
509 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
510 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
511 	if (((sljit_sw)code_ptr) & 0x4)
512 		code_ptr++;
513 #endif
514 	sljit_set_function_context(NULL, (struct sljit_function_context*)code_ptr, (sljit_sw)code, (void*)sljit_generate_code);
515 #endif
516 
517 	code_ptr = (sljit_ins *)SLJIT_ADD_EXEC_OFFSET(code_ptr, executable_offset);
518 
519 	SLJIT_CACHE_FLUSH(code, code_ptr);
520 
521 #if (defined SLJIT_INDIRECT_CALL && SLJIT_INDIRECT_CALL)
522 	return code_ptr;
523 #else
524 	return code;
525 #endif
526 }
527 
sljit_has_cpu_feature(sljit_s32 feature_type)528 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_has_cpu_feature(sljit_s32 feature_type)
529 {
530 	switch (feature_type) {
531 	case SLJIT_HAS_FPU:
532 #ifdef SLJIT_IS_FPU_AVAILABLE
533 		return SLJIT_IS_FPU_AVAILABLE;
534 #else
535 		/* Available by default. */
536 		return 1;
537 #endif
538 
539 	case SLJIT_HAS_PRE_UPDATE:
540 	case SLJIT_HAS_CLZ:
541 		return 1;
542 
543 	default:
544 		return 0;
545 	}
546 }
547 
548 /* --------------------------------------------------------------------- */
549 /*  Entry, exit                                                          */
550 /* --------------------------------------------------------------------- */
551 
552 /* inp_flags: */
553 
554 /* Creates an index in data_transfer_insts array. */
555 #define LOAD_DATA	0x01
556 #define INDEXED		0x02
557 #define WRITE_BACK	0x04
558 #define WORD_DATA	0x00
559 #define BYTE_DATA	0x08
560 #define HALF_DATA	0x10
561 #define INT_DATA	0x18
562 #define SIGNED_DATA	0x20
563 /* Separates integer and floating point registers */
564 #define GPR_REG		0x3f
565 #define DOUBLE_DATA	0x40
566 
567 #define MEM_MASK	0x7f
568 
569 /* Other inp_flags. */
570 
571 #define ARG_TEST	0x000100
572 /* Integer opertion and set flags -> requires exts on 64 bit systems. */
573 #define ALT_SIGN_EXT	0x000200
574 /* This flag affects the RC() and OERC() macros. */
575 #define ALT_SET_FLAGS	0x000400
576 #define ALT_KEEP_CACHE	0x000800
577 #define ALT_FORM1	0x010000
578 #define ALT_FORM2	0x020000
579 #define ALT_FORM3	0x040000
580 #define ALT_FORM4	0x080000
581 #define ALT_FORM5	0x100000
582 
583 /* Source and destination is register. */
584 #define REG_DEST	0x000001
585 #define REG1_SOURCE	0x000002
586 #define REG2_SOURCE	0x000004
587 /* getput_arg_fast returned true. */
588 #define FAST_DEST	0x000008
589 /* Multiple instructions are required. */
590 #define SLOW_DEST	0x000010
591 /*
592 ALT_SIGN_EXT		0x000200
593 ALT_SET_FLAGS		0x000400
594 ALT_FORM1		0x010000
595 ...
596 ALT_FORM5		0x100000 */
597 
598 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
599 #include "sljitNativePPC_32.c"
600 #else
601 #include "sljitNativePPC_64.c"
602 #endif
603 
604 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
605 #define STACK_STORE	STW
606 #define STACK_LOAD	LWZ
607 #else
608 #define STACK_STORE	STD
609 #define STACK_LOAD	LD
610 #endif
611 
sljit_emit_enter(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 args,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)612 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_enter(struct sljit_compiler *compiler,
613 	sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
614 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
615 {
616 	sljit_s32 i, tmp, offs;
617 
618 	CHECK_ERROR();
619 	CHECK(check_sljit_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
620 	set_emit_enter(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
621 
622 	FAIL_IF(push_inst(compiler, MFLR | D(0)));
623 	offs = -(sljit_s32)(sizeof(sljit_sw));
624 	FAIL_IF(push_inst(compiler, STACK_STORE | S(TMP_ZERO) | A(SLJIT_SP) | IMM(offs)));
625 
626 	tmp = saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - saveds) : SLJIT_FIRST_SAVED_REG;
627 	for (i = SLJIT_S0; i >= tmp; i--) {
628 		offs -= (sljit_s32)(sizeof(sljit_sw));
629 		FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs)));
630 	}
631 
632 	for (i = scratches; i >= SLJIT_FIRST_SAVED_REG; i--) {
633 		offs -= (sljit_s32)(sizeof(sljit_sw));
634 		FAIL_IF(push_inst(compiler, STACK_STORE | S(i) | A(SLJIT_SP) | IMM(offs)));
635 	}
636 
637 	SLJIT_ASSERT(offs == -(sljit_s32)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1));
638 
639 #if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
640 	FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw))));
641 #else
642 	FAIL_IF(push_inst(compiler, STACK_STORE | S(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw))));
643 #endif
644 
645 	FAIL_IF(push_inst(compiler, ADDI | D(TMP_ZERO) | A(0) | 0));
646 	if (args >= 1)
647 		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(SLJIT_S0) | B(SLJIT_R0)));
648 	if (args >= 2)
649 		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R1) | A(SLJIT_S1) | B(SLJIT_R1)));
650 	if (args >= 3)
651 		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R2) | A(SLJIT_S2) | B(SLJIT_R2)));
652 
653 	local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET;
654 	local_size = (local_size + 15) & ~0xf;
655 	compiler->local_size = local_size;
656 
657 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
658 	if (local_size <= SIMM_MAX)
659 		FAIL_IF(push_inst(compiler, STWU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size)));
660 	else {
661 		FAIL_IF(load_immediate(compiler, 0, -local_size));
662 		FAIL_IF(push_inst(compiler, STWUX | S(SLJIT_SP) | A(SLJIT_SP) | B(0)));
663 	}
664 #else
665 	if (local_size <= SIMM_MAX)
666 		FAIL_IF(push_inst(compiler, STDU | S(SLJIT_SP) | A(SLJIT_SP) | IMM(-local_size)));
667 	else {
668 		FAIL_IF(load_immediate(compiler, 0, -local_size));
669 		FAIL_IF(push_inst(compiler, STDUX | S(SLJIT_SP) | A(SLJIT_SP) | B(0)));
670 	}
671 #endif
672 
673 	return SLJIT_SUCCESS;
674 }
675 
sljit_set_context(struct sljit_compiler * compiler,sljit_s32 options,sljit_s32 args,sljit_s32 scratches,sljit_s32 saveds,sljit_s32 fscratches,sljit_s32 fsaveds,sljit_s32 local_size)676 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_set_context(struct sljit_compiler *compiler,
677 	sljit_s32 options, sljit_s32 args, sljit_s32 scratches, sljit_s32 saveds,
678 	sljit_s32 fscratches, sljit_s32 fsaveds, sljit_s32 local_size)
679 {
680 	CHECK_ERROR();
681 	CHECK(check_sljit_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size));
682 	set_set_context(compiler, options, args, scratches, saveds, fscratches, fsaveds, local_size);
683 
684 	local_size += GET_SAVED_REGISTERS_SIZE(scratches, saveds, 1) + SLJIT_LOCALS_OFFSET;
685 	compiler->local_size = (local_size + 15) & ~0xf;
686 	return SLJIT_SUCCESS;
687 }
688 
sljit_emit_return(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src,sljit_sw srcw)689 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_return(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 src, sljit_sw srcw)
690 {
691 	sljit_s32 i, tmp, offs;
692 
693 	CHECK_ERROR();
694 	CHECK(check_sljit_emit_return(compiler, op, src, srcw));
695 
696 	FAIL_IF(emit_mov_before_return(compiler, op, src, srcw));
697 
698 	if (compiler->local_size <= SIMM_MAX)
699 		FAIL_IF(push_inst(compiler, ADDI | D(SLJIT_SP) | A(SLJIT_SP) | IMM(compiler->local_size)));
700 	else {
701 		FAIL_IF(load_immediate(compiler, 0, compiler->local_size));
702 		FAIL_IF(push_inst(compiler, ADD | D(SLJIT_SP) | A(SLJIT_SP) | B(0)));
703 	}
704 
705 #if (defined SLJIT_PPC_STACK_FRAME_V2 && SLJIT_PPC_STACK_FRAME_V2)
706 	FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(2 * sizeof(sljit_sw))));
707 #else
708 	FAIL_IF(push_inst(compiler, STACK_LOAD | D(0) | A(SLJIT_SP) | IMM(sizeof(sljit_sw))));
709 #endif
710 
711 	offs = -(sljit_s32)GET_SAVED_REGISTERS_SIZE(compiler->scratches, compiler->saveds, 1);
712 
713 	tmp = compiler->scratches;
714 	for (i = SLJIT_FIRST_SAVED_REG; i <= tmp; i++) {
715 		FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs)));
716 		offs += (sljit_s32)(sizeof(sljit_sw));
717 	}
718 
719 	tmp = compiler->saveds < SLJIT_NUMBER_OF_SAVED_REGISTERS ? (SLJIT_S0 + 1 - compiler->saveds) : SLJIT_FIRST_SAVED_REG;
720 	for (i = tmp; i <= SLJIT_S0; i++) {
721 		FAIL_IF(push_inst(compiler, STACK_LOAD | D(i) | A(SLJIT_SP) | IMM(offs)));
722 		offs += (sljit_s32)(sizeof(sljit_sw));
723 	}
724 
725 	FAIL_IF(push_inst(compiler, STACK_LOAD | D(TMP_ZERO) | A(SLJIT_SP) | IMM(offs)));
726 	SLJIT_ASSERT(offs == -(sljit_sw)(sizeof(sljit_sw)));
727 
728 	FAIL_IF(push_inst(compiler, MTLR | S(0)));
729 	FAIL_IF(push_inst(compiler, BLR));
730 
731 	return SLJIT_SUCCESS;
732 }
733 
734 #undef STACK_STORE
735 #undef STACK_LOAD
736 
737 /* --------------------------------------------------------------------- */
738 /*  Operators                                                            */
739 /* --------------------------------------------------------------------- */
740 
741 /* i/x - immediate/indexed form
742    n/w - no write-back / write-back (1 bit)
743    s/l - store/load (1 bit)
744    u/s - signed/unsigned (1 bit)
745    w/b/h/i - word/byte/half/int allowed (2 bit)
746    It contans 32 items, but not all are different. */
747 
748 /* 64 bit only: [reg+imm] must be aligned to 4 bytes. */
749 #define INT_ALIGNED	0x10000
750 /* 64-bit only: there is no lwau instruction. */
751 #define UPDATE_REQ	0x20000
752 
753 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
754 #define ARCH_32_64(a, b)	a
755 #define INST_CODE_AND_DST(inst, flags, reg) \
756 	((inst) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)))
757 #else
758 #define ARCH_32_64(a, b)	b
759 #define INST_CODE_AND_DST(inst, flags, reg) \
760 	(((inst) & ~(INT_ALIGNED | UPDATE_REQ)) | (((flags) & MEM_MASK) <= GPR_REG ? D(reg) : FD(reg)))
761 #endif
762 
763 static const sljit_ins data_transfer_insts[64 + 8] = {
764 
765 /* -------- Unsigned -------- */
766 
767 /* Word. */
768 
769 /* u w n i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */),
770 /* u w n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */),
771 /* u w n x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */),
772 /* u w n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */),
773 
774 /* u w w i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */),
775 /* u w w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */),
776 /* u w w x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */),
777 /* u w w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */),
778 
779 /* Byte. */
780 
781 /* u b n i s */ HI(38) /* stb */,
782 /* u b n i l */ HI(34) /* lbz */,
783 /* u b n x s */ HI(31) | LO(215) /* stbx */,
784 /* u b n x l */ HI(31) | LO(87) /* lbzx */,
785 
786 /* u b w i s */ HI(39) /* stbu */,
787 /* u b w i l */ HI(35) /* lbzu */,
788 /* u b w x s */ HI(31) | LO(247) /* stbux */,
789 /* u b w x l */ HI(31) | LO(119) /* lbzux */,
790 
791 /* Half. */
792 
793 /* u h n i s */ HI(44) /* sth */,
794 /* u h n i l */ HI(40) /* lhz */,
795 /* u h n x s */ HI(31) | LO(407) /* sthx */,
796 /* u h n x l */ HI(31) | LO(279) /* lhzx */,
797 
798 /* u h w i s */ HI(45) /* sthu */,
799 /* u h w i l */ HI(41) /* lhzu */,
800 /* u h w x s */ HI(31) | LO(439) /* sthux */,
801 /* u h w x l */ HI(31) | LO(311) /* lhzux */,
802 
803 /* Int. */
804 
805 /* u i n i s */ HI(36) /* stw */,
806 /* u i n i l */ HI(32) /* lwz */,
807 /* u i n x s */ HI(31) | LO(151) /* stwx */,
808 /* u i n x l */ HI(31) | LO(23) /* lwzx */,
809 
810 /* u i w i s */ HI(37) /* stwu */,
811 /* u i w i l */ HI(33) /* lwzu */,
812 /* u i w x s */ HI(31) | LO(183) /* stwux */,
813 /* u i w x l */ HI(31) | LO(55) /* lwzux */,
814 
815 /* -------- Signed -------- */
816 
817 /* Word. */
818 
819 /* s w n i s */ ARCH_32_64(HI(36) /* stw */, HI(62) | INT_ALIGNED | 0x0 /* std */),
820 /* s w n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x0 /* ld */),
821 /* s w n x s */ ARCH_32_64(HI(31) | LO(151) /* stwx */, HI(31) | LO(149) /* stdx */),
822 /* s w n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(21) /* ldx */),
823 
824 /* s w w i s */ ARCH_32_64(HI(37) /* stwu */, HI(62) | INT_ALIGNED | 0x1 /* stdu */),
825 /* s w w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | 0x1 /* ldu */),
826 /* s w w x s */ ARCH_32_64(HI(31) | LO(183) /* stwux */, HI(31) | LO(181) /* stdux */),
827 /* s w w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(53) /* ldux */),
828 
829 /* Byte. */
830 
831 /* s b n i s */ HI(38) /* stb */,
832 /* s b n i l */ HI(34) /* lbz */ /* EXTS_REQ */,
833 /* s b n x s */ HI(31) | LO(215) /* stbx */,
834 /* s b n x l */ HI(31) | LO(87) /* lbzx */ /* EXTS_REQ */,
835 
836 /* s b w i s */ HI(39) /* stbu */,
837 /* s b w i l */ HI(35) /* lbzu */ /* EXTS_REQ */,
838 /* s b w x s */ HI(31) | LO(247) /* stbux */,
839 /* s b w x l */ HI(31) | LO(119) /* lbzux */ /* EXTS_REQ */,
840 
841 /* Half. */
842 
843 /* s h n i s */ HI(44) /* sth */,
844 /* s h n i l */ HI(42) /* lha */,
845 /* s h n x s */ HI(31) | LO(407) /* sthx */,
846 /* s h n x l */ HI(31) | LO(343) /* lhax */,
847 
848 /* s h w i s */ HI(45) /* sthu */,
849 /* s h w i l */ HI(43) /* lhau */,
850 /* s h w x s */ HI(31) | LO(439) /* sthux */,
851 /* s h w x l */ HI(31) | LO(375) /* lhaux */,
852 
853 /* Int. */
854 
855 /* s i n i s */ HI(36) /* stw */,
856 /* s i n i l */ ARCH_32_64(HI(32) /* lwz */, HI(58) | INT_ALIGNED | 0x2 /* lwa */),
857 /* s i n x s */ HI(31) | LO(151) /* stwx */,
858 /* s i n x l */ ARCH_32_64(HI(31) | LO(23) /* lwzx */, HI(31) | LO(341) /* lwax */),
859 
860 /* s i w i s */ HI(37) /* stwu */,
861 /* s i w i l */ ARCH_32_64(HI(33) /* lwzu */, HI(58) | INT_ALIGNED | UPDATE_REQ | 0x2 /* lwa */),
862 /* s i w x s */ HI(31) | LO(183) /* stwux */,
863 /* s i w x l */ ARCH_32_64(HI(31) | LO(55) /* lwzux */, HI(31) | LO(373) /* lwaux */),
864 
865 /* -------- Double -------- */
866 
867 /* d   n i s */ HI(54) /* stfd */,
868 /* d   n i l */ HI(50) /* lfd */,
869 /* d   n x s */ HI(31) | LO(727) /* stfdx */,
870 /* d   n x l */ HI(31) | LO(599) /* lfdx */,
871 
872 /* s   n i s */ HI(52) /* stfs */,
873 /* s   n i l */ HI(48) /* lfs */,
874 /* s   n x s */ HI(31) | LO(663) /* stfsx */,
875 /* s   n x l */ HI(31) | LO(535) /* lfsx */,
876 
877 };
878 
879 #undef ARCH_32_64
880 
881 /* Simple cases, (no caching is required). */
getput_arg_fast(struct sljit_compiler * compiler,sljit_s32 inp_flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw)882 static sljit_s32 getput_arg_fast(struct sljit_compiler *compiler, sljit_s32 inp_flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw)
883 {
884 	sljit_ins inst;
885 
886 	/* Should work when (arg & REG_MASK) == 0. */
887 	SLJIT_ASSERT(A(0) == 0);
888 	SLJIT_ASSERT(arg & SLJIT_MEM);
889 
890 	if (arg & OFFS_REG_MASK) {
891 		if (argw & 0x3)
892 			return 0;
893 		if (inp_flags & ARG_TEST)
894 			return 1;
895 
896 		inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
897 		SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
898 		FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(OFFS_REG(arg))));
899 		return -1;
900 	}
901 
902 	if (SLJIT_UNLIKELY(!(arg & REG_MASK)))
903 		inp_flags &= ~WRITE_BACK;
904 
905 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
906 	inst = data_transfer_insts[inp_flags & MEM_MASK];
907 	SLJIT_ASSERT((arg & REG_MASK) || !(inst & UPDATE_REQ));
908 
909 	if (argw > SIMM_MAX || argw < SIMM_MIN || ((inst & INT_ALIGNED) && (argw & 0x3)) || (inst & UPDATE_REQ))
910 		return 0;
911 	if (inp_flags & ARG_TEST)
912 		return 1;
913 #endif
914 
915 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
916 	if (argw > SIMM_MAX || argw < SIMM_MIN)
917 		return 0;
918 	if (inp_flags & ARG_TEST)
919 		return 1;
920 
921 	inst = data_transfer_insts[inp_flags & MEM_MASK];
922 	SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
923 #endif
924 
925 	FAIL_IF(push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | IMM(argw)));
926 	return -1;
927 }
928 
929 /* See getput_arg below.
930    Note: can_cache is called only for binary operators. Those operator always
931    uses word arguments without write back. */
can_cache(sljit_s32 arg,sljit_sw argw,sljit_s32 next_arg,sljit_sw next_argw)932 static sljit_s32 can_cache(sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
933 {
934 	sljit_sw high_short, next_high_short;
935 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
936 	sljit_sw diff;
937 #endif
938 
939 	SLJIT_ASSERT((arg & SLJIT_MEM) && (next_arg & SLJIT_MEM));
940 
941 	if (arg & OFFS_REG_MASK)
942 		return ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && (argw & 0x3) == (next_argw & 0x3));
943 
944 	if (next_arg & OFFS_REG_MASK)
945 		return 0;
946 
947 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
948 	high_short = (argw + ((argw & 0x8000) << 1)) & ~0xffff;
949 	next_high_short = (next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
950 	return high_short == next_high_short;
951 #else
952 	if (argw <= 0x7fffffffl && argw >= -0x80000000l) {
953 		high_short = (argw + ((argw & 0x8000) << 1)) & ~0xffff;
954 		next_high_short = (next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
955 		if (high_short == next_high_short)
956 			return 1;
957 	}
958 
959 	diff = argw - next_argw;
960 	if (!(arg & REG_MASK))
961 		return diff <= SIMM_MAX && diff >= SIMM_MIN;
962 
963 	if (arg == next_arg && diff <= SIMM_MAX && diff >= SIMM_MIN)
964 		return 1;
965 
966 	return 0;
967 #endif
968 }
969 
970 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
971 #define ADJUST_CACHED_IMM(imm) \
972 	if ((inst & INT_ALIGNED) && (imm & 0x3)) { \
973 		/* Adjust cached value. Fortunately this is really a rare case */ \
974 		compiler->cache_argw += imm & 0x3; \
975 		FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG3) | A(TMP_REG3) | (imm & 0x3))); \
976 		imm &= ~0x3; \
977 	}
978 #endif
979 
980 /* Emit the necessary instructions. See can_cache above. */
getput_arg(struct sljit_compiler * compiler,sljit_s32 inp_flags,sljit_s32 reg,sljit_s32 arg,sljit_sw argw,sljit_s32 next_arg,sljit_sw next_argw)981 static sljit_s32 getput_arg(struct sljit_compiler *compiler, sljit_s32 inp_flags, sljit_s32 reg, sljit_s32 arg, sljit_sw argw, sljit_s32 next_arg, sljit_sw next_argw)
982 {
983 	sljit_s32 tmp_r;
984 	sljit_ins inst;
985 	sljit_sw high_short, next_high_short;
986 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
987 	sljit_sw diff;
988 #endif
989 
990 	SLJIT_ASSERT(arg & SLJIT_MEM);
991 
992 	tmp_r = ((inp_flags & LOAD_DATA) && ((inp_flags) & MEM_MASK) <= GPR_REG) ? reg : TMP_REG1;
993 	/* Special case for "mov reg, [reg, ... ]". */
994 	if ((arg & REG_MASK) == tmp_r)
995 		tmp_r = TMP_REG1;
996 
997 	if (SLJIT_UNLIKELY(arg & OFFS_REG_MASK)) {
998 		argw &= 0x3;
999 		/* Otherwise getput_arg_fast would capture it. */
1000 		SLJIT_ASSERT(argw);
1001 
1002 		if ((SLJIT_MEM | (arg & OFFS_REG_MASK)) == compiler->cache_arg && argw == compiler->cache_argw)
1003 			tmp_r = TMP_REG3;
1004 		else {
1005 			if ((arg & OFFS_REG_MASK) == (next_arg & OFFS_REG_MASK) && argw == (next_argw & 0x3)) {
1006 				compiler->cache_arg = SLJIT_MEM | (arg & OFFS_REG_MASK);
1007 				compiler->cache_argw = argw;
1008 				tmp_r = TMP_REG3;
1009 			}
1010 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
1011 			FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(arg)) | A(tmp_r) | (argw << 11) | ((31 - argw) << 1)));
1012 #else
1013 			FAIL_IF(push_inst(compiler, RLDI(tmp_r, OFFS_REG(arg), argw, 63 - argw, 1)));
1014 #endif
1015 		}
1016 		inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
1017 		SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
1018 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(tmp_r));
1019 	}
1020 
1021 	if (SLJIT_UNLIKELY(!(arg & REG_MASK)))
1022 		inp_flags &= ~WRITE_BACK;
1023 
1024 	inst = data_transfer_insts[inp_flags & MEM_MASK];
1025 	SLJIT_ASSERT((arg & REG_MASK) || !(inst & UPDATE_REQ));
1026 
1027 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1028 	if (argw <= 0x7fff7fffl && argw >= -0x80000000l
1029 			&& (!(inst & INT_ALIGNED) || !(argw & 0x3)) && !(inst & UPDATE_REQ)) {
1030 #endif
1031 
1032 		arg &= REG_MASK;
1033 		high_short = (sljit_s32)(argw + ((argw & 0x8000) << 1)) & ~0xffff;
1034 		/* The getput_arg_fast should handle this otherwise. */
1035 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1036 		SLJIT_ASSERT(high_short && high_short <= 0x7fffffffl && high_short >= -0x80000000l);
1037 #else
1038 		SLJIT_ASSERT(high_short && !(inst & (INT_ALIGNED | UPDATE_REQ)));
1039 #endif
1040 
1041 		if (inp_flags & WRITE_BACK) {
1042 			tmp_r = arg;
1043 			FAIL_IF(push_inst(compiler, ADDIS | D(arg) | A(arg) | IMM(high_short >> 16)));
1044 		}
1045 		else if (compiler->cache_arg != (SLJIT_MEM | arg) || high_short != compiler->cache_argw) {
1046 			if ((next_arg & SLJIT_MEM) && !(next_arg & OFFS_REG_MASK)) {
1047 				next_high_short = (sljit_s32)(next_argw + ((next_argw & 0x8000) << 1)) & ~0xffff;
1048 				if (high_short == next_high_short) {
1049 					compiler->cache_arg = SLJIT_MEM | arg;
1050 					compiler->cache_argw = high_short;
1051 					tmp_r = TMP_REG3;
1052 				}
1053 			}
1054 			FAIL_IF(push_inst(compiler, ADDIS | D(tmp_r) | A(arg & REG_MASK) | IMM(high_short >> 16)));
1055 		}
1056 		else
1057 			tmp_r = TMP_REG3;
1058 
1059 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_r) | IMM(argw));
1060 
1061 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1062 	}
1063 
1064 	/* Everything else is PPC-64 only. */
1065 	if (SLJIT_UNLIKELY(!(arg & REG_MASK))) {
1066 		diff = argw - compiler->cache_argw;
1067 		if ((compiler->cache_arg & SLJIT_IMM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1068 			ADJUST_CACHED_IMM(diff);
1069 			return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3) | IMM(diff));
1070 		}
1071 
1072 		diff = argw - next_argw;
1073 		if ((next_arg & SLJIT_MEM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1074 			SLJIT_ASSERT(inp_flags & LOAD_DATA);
1075 
1076 			compiler->cache_arg = SLJIT_IMM;
1077 			compiler->cache_argw = argw;
1078 			tmp_r = TMP_REG3;
1079 		}
1080 
1081 		FAIL_IF(load_immediate(compiler, tmp_r, argw));
1082 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(tmp_r));
1083 	}
1084 
1085 	diff = argw - compiler->cache_argw;
1086 	if (compiler->cache_arg == arg && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1087 		SLJIT_ASSERT(!(inp_flags & WRITE_BACK) && !(inst & UPDATE_REQ));
1088 		ADJUST_CACHED_IMM(diff);
1089 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3) | IMM(diff));
1090 	}
1091 
1092 	if ((compiler->cache_arg & SLJIT_IMM) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1093 		inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
1094 		SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
1095 		if (compiler->cache_argw != argw) {
1096 			FAIL_IF(push_inst(compiler, ADDI | D(TMP_REG3) | A(TMP_REG3) | IMM(diff)));
1097 			compiler->cache_argw = argw;
1098 		}
1099 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(TMP_REG3));
1100 	}
1101 
1102 	if (argw == next_argw && (next_arg & SLJIT_MEM)) {
1103 		SLJIT_ASSERT(inp_flags & LOAD_DATA);
1104 		FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1105 
1106 		compiler->cache_arg = SLJIT_IMM;
1107 		compiler->cache_argw = argw;
1108 
1109 		inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
1110 		SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
1111 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(TMP_REG3));
1112 	}
1113 
1114 	diff = argw - next_argw;
1115 	if (arg == next_arg && !(inp_flags & WRITE_BACK) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1116 		SLJIT_ASSERT(inp_flags & LOAD_DATA);
1117 		FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1118 		FAIL_IF(push_inst(compiler, ADD | D(TMP_REG3) | A(TMP_REG3) | B(arg & REG_MASK)));
1119 
1120 		compiler->cache_arg = arg;
1121 		compiler->cache_argw = argw;
1122 
1123 		return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(TMP_REG3));
1124 	}
1125 
1126 	if ((next_arg & SLJIT_MEM) && !(next_arg & OFFS_REG_MASK) && diff <= SIMM_MAX && diff >= SIMM_MIN) {
1127 		SLJIT_ASSERT(inp_flags & LOAD_DATA);
1128 		FAIL_IF(load_immediate(compiler, TMP_REG3, argw));
1129 
1130 		compiler->cache_arg = SLJIT_IMM;
1131 		compiler->cache_argw = argw;
1132 		tmp_r = TMP_REG3;
1133 	}
1134 	else
1135 		FAIL_IF(load_immediate(compiler, tmp_r, argw));
1136 
1137 	/* Get the indexed version instead of the normal one. */
1138 	inst = data_transfer_insts[(inp_flags | INDEXED) & MEM_MASK];
1139 	SLJIT_ASSERT(!(inst & (INT_ALIGNED | UPDATE_REQ)));
1140 	return push_inst(compiler, INST_CODE_AND_DST(inst, inp_flags, reg) | A(arg & REG_MASK) | B(tmp_r));
1141 #endif
1142 }
1143 
emit_op_mem2(struct sljit_compiler * compiler,sljit_s32 flags,sljit_s32 reg,sljit_s32 arg1,sljit_sw arg1w,sljit_s32 arg2,sljit_sw arg2w)1144 static SLJIT_INLINE sljit_s32 emit_op_mem2(struct sljit_compiler *compiler, sljit_s32 flags, sljit_s32 reg, sljit_s32 arg1, sljit_sw arg1w, sljit_s32 arg2, sljit_sw arg2w)
1145 {
1146 	if (getput_arg_fast(compiler, flags, reg, arg1, arg1w))
1147 		return compiler->error;
1148 	return getput_arg(compiler, flags, reg, arg1, arg1w, arg2, arg2w);
1149 }
1150 
emit_op(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 input_flags,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1151 static sljit_s32 emit_op(struct sljit_compiler *compiler, sljit_s32 op, sljit_s32 input_flags,
1152 	sljit_s32 dst, sljit_sw dstw,
1153 	sljit_s32 src1, sljit_sw src1w,
1154 	sljit_s32 src2, sljit_sw src2w)
1155 {
1156 	/* arg1 goes to TMP_REG1 or src reg
1157 	   arg2 goes to TMP_REG2, imm or src reg
1158 	   TMP_REG3 can be used for caching
1159 	   result goes to TMP_REG2, so put result can use TMP_REG1 and TMP_REG3. */
1160 	sljit_s32 dst_r;
1161 	sljit_s32 src1_r;
1162 	sljit_s32 src2_r;
1163 	sljit_s32 sugg_src2_r = TMP_REG2;
1164 	sljit_s32 flags = input_flags & (ALT_FORM1 | ALT_FORM2 | ALT_FORM3 | ALT_FORM4 | ALT_FORM5 | ALT_SIGN_EXT | ALT_SET_FLAGS);
1165 
1166 	if (!(input_flags & ALT_KEEP_CACHE)) {
1167 		compiler->cache_arg = 0;
1168 		compiler->cache_argw = 0;
1169 	}
1170 
1171 	/* Destination check. */
1172 	if (SLJIT_UNLIKELY(dst == SLJIT_UNUSED)) {
1173 		dst_r = TMP_REG2;
1174 	}
1175 	else if (FAST_IS_REG(dst)) {
1176 		dst_r = dst;
1177 		flags |= REG_DEST;
1178 		if (op >= SLJIT_MOV && op <= SLJIT_MOVU_S32)
1179 			sugg_src2_r = dst_r;
1180 	}
1181 	else {
1182 		SLJIT_ASSERT(dst & SLJIT_MEM);
1183 		if (getput_arg_fast(compiler, input_flags | ARG_TEST, TMP_REG2, dst, dstw)) {
1184 			flags |= FAST_DEST;
1185 			dst_r = TMP_REG2;
1186 		}
1187 		else {
1188 			flags |= SLOW_DEST;
1189 			dst_r = 0;
1190 		}
1191 	}
1192 
1193 	/* Source 1. */
1194 	if (FAST_IS_REG(src1)) {
1195 		src1_r = src1;
1196 		flags |= REG1_SOURCE;
1197 	}
1198 	else if (src1 & SLJIT_IMM) {
1199 		FAIL_IF(load_immediate(compiler, TMP_REG1, src1w));
1200 		src1_r = TMP_REG1;
1201 	}
1202 	else if (getput_arg_fast(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w)) {
1203 		FAIL_IF(compiler->error);
1204 		src1_r = TMP_REG1;
1205 	}
1206 	else
1207 		src1_r = 0;
1208 
1209 	/* Source 2. */
1210 	if (FAST_IS_REG(src2)) {
1211 		src2_r = src2;
1212 		flags |= REG2_SOURCE;
1213 		if (!(flags & REG_DEST) && op >= SLJIT_MOV && op <= SLJIT_MOVU_S32)
1214 			dst_r = src2_r;
1215 	}
1216 	else if (src2 & SLJIT_IMM) {
1217 		FAIL_IF(load_immediate(compiler, sugg_src2_r, src2w));
1218 		src2_r = sugg_src2_r;
1219 	}
1220 	else if (getput_arg_fast(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w)) {
1221 		FAIL_IF(compiler->error);
1222 		src2_r = sugg_src2_r;
1223 	}
1224 	else
1225 		src2_r = 0;
1226 
1227 	/* src1_r, src2_r and dst_r can be zero (=unprocessed).
1228 	   All arguments are complex addressing modes, and it is a binary operator. */
1229 	if (src1_r == 0 && src2_r == 0 && dst_r == 0) {
1230 		if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
1231 			FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG2, src2, src2w, src1, src1w));
1232 			FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
1233 		}
1234 		else {
1235 			FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
1236 			FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG2, src2, src2w, dst, dstw));
1237 		}
1238 		src1_r = TMP_REG1;
1239 		src2_r = TMP_REG2;
1240 	}
1241 	else if (src1_r == 0 && src2_r == 0) {
1242 		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, src2, src2w));
1243 		src1_r = TMP_REG1;
1244 	}
1245 	else if (src1_r == 0 && dst_r == 0) {
1246 		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, dst, dstw));
1247 		src1_r = TMP_REG1;
1248 	}
1249 	else if (src2_r == 0 && dst_r == 0) {
1250 		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, dst, dstw));
1251 		src2_r = sugg_src2_r;
1252 	}
1253 
1254 	if (dst_r == 0)
1255 		dst_r = TMP_REG2;
1256 
1257 	if (src1_r == 0) {
1258 		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, TMP_REG1, src1, src1w, 0, 0));
1259 		src1_r = TMP_REG1;
1260 	}
1261 
1262 	if (src2_r == 0) {
1263 		FAIL_IF(getput_arg(compiler, input_flags | LOAD_DATA, sugg_src2_r, src2, src2w, 0, 0));
1264 		src2_r = sugg_src2_r;
1265 	}
1266 
1267 	FAIL_IF(emit_single_op(compiler, op, flags, dst_r, src1_r, src2_r));
1268 
1269 	if (flags & (FAST_DEST | SLOW_DEST)) {
1270 		if (flags & FAST_DEST)
1271 			FAIL_IF(getput_arg_fast(compiler, input_flags, dst_r, dst, dstw));
1272 		else
1273 			FAIL_IF(getput_arg(compiler, input_flags, dst_r, dst, dstw, 0, 0));
1274 	}
1275 	return SLJIT_SUCCESS;
1276 }
1277 
sljit_emit_op0(struct sljit_compiler * compiler,sljit_s32 op)1278 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op0(struct sljit_compiler *compiler, sljit_s32 op)
1279 {
1280 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1281 	sljit_s32 int_op = op & SLJIT_I32_OP;
1282 #endif
1283 
1284 	CHECK_ERROR();
1285 	CHECK(check_sljit_emit_op0(compiler, op));
1286 
1287 	op = GET_OPCODE(op);
1288 	switch (op) {
1289 	case SLJIT_BREAKPOINT:
1290 	case SLJIT_NOP:
1291 		return push_inst(compiler, NOP);
1292 	case SLJIT_LMUL_UW:
1293 	case SLJIT_LMUL_SW:
1294 		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0)));
1295 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1296 		FAIL_IF(push_inst(compiler, MULLD | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
1297 		return push_inst(compiler, (op == SLJIT_LMUL_UW ? MULHDU : MULHD) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1));
1298 #else
1299 		FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R1)));
1300 		return push_inst(compiler, (op == SLJIT_LMUL_UW ? MULHWU : MULHW) | D(SLJIT_R1) | A(TMP_REG1) | B(SLJIT_R1));
1301 #endif
1302 	case SLJIT_DIVMOD_UW:
1303 	case SLJIT_DIVMOD_SW:
1304 		FAIL_IF(push_inst(compiler, OR | S(SLJIT_R0) | A(TMP_REG1) | B(SLJIT_R0)));
1305 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1306 		FAIL_IF(push_inst(compiler, (int_op ? (op == SLJIT_DIVMOD_UW ? DIVWU : DIVW) : (op == SLJIT_DIVMOD_UW ? DIVDU : DIVD)) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1)));
1307 		FAIL_IF(push_inst(compiler, (int_op ? MULLW : MULLD) | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1)));
1308 #else
1309 		FAIL_IF(push_inst(compiler, (op == SLJIT_DIVMOD_UW ? DIVWU : DIVW) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1)));
1310 		FAIL_IF(push_inst(compiler, MULLW | D(SLJIT_R1) | A(SLJIT_R0) | B(SLJIT_R1)));
1311 #endif
1312 		return push_inst(compiler, SUBF | D(SLJIT_R1) | A(SLJIT_R1) | B(TMP_REG1));
1313 	case SLJIT_DIV_UW:
1314 	case SLJIT_DIV_SW:
1315 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1316 		return push_inst(compiler, (int_op ? (op == SLJIT_DIV_UW ? DIVWU : DIVW) : (op == SLJIT_DIV_UW ? DIVDU : DIVD)) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1));
1317 #else
1318 		return push_inst(compiler, (op == SLJIT_DIV_UW ? DIVWU : DIVW) | D(SLJIT_R0) | A(SLJIT_R0) | B(SLJIT_R1));
1319 #endif
1320 	}
1321 
1322 	return SLJIT_SUCCESS;
1323 }
1324 
emit_prefetch(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)1325 static sljit_s32 emit_prefetch(struct sljit_compiler *compiler,
1326         sljit_s32 src, sljit_sw srcw)
1327 {
1328 	if (!(src & OFFS_REG_MASK)) {
1329 		if (srcw == 0 && (src & REG_MASK) != SLJIT_UNUSED)
1330 			return push_inst(compiler, DCBT | A(0) | B(src & REG_MASK));
1331 
1332 		FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
1333 		/* Works with SLJIT_MEM0() case as well. */
1334 		return push_inst(compiler, DCBT | A(src & REG_MASK) | B(TMP_REG1));
1335 	}
1336 
1337 	srcw &= 0x3;
1338 
1339 	if (srcw == 0)
1340 		return push_inst(compiler, DCBT | A(src & REG_MASK) | B(OFFS_REG(src)));
1341 
1342 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
1343 	FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(src)) | A(TMP_REG1) | (srcw << 11) | ((31 - srcw) << 1)));
1344 #else
1345 	FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, OFFS_REG(src), srcw, 63 - srcw, 1)));
1346 #endif
1347 	return push_inst(compiler, DCBT | A(src & REG_MASK) | B(TMP_REG1));
1348 }
1349 
1350 #define EMIT_MOV(type, type_flags, type_cast) \
1351 	emit_op(compiler, (src & SLJIT_IMM) ? SLJIT_MOV : type, flags | (type_flags), dst, dstw, TMP_REG1, 0, src, (src & SLJIT_IMM) ? type_cast srcw : srcw)
1352 
sljit_emit_op1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1353 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op1(struct sljit_compiler *compiler, sljit_s32 op,
1354 	sljit_s32 dst, sljit_sw dstw,
1355 	sljit_s32 src, sljit_sw srcw)
1356 {
1357 	sljit_s32 flags = HAS_FLAGS(op) ? ALT_SET_FLAGS : 0;
1358 	sljit_s32 op_flags = GET_ALL_FLAGS(op);
1359 
1360 	CHECK_ERROR();
1361 	CHECK(check_sljit_emit_op1(compiler, op, dst, dstw, src, srcw));
1362 	ADJUST_LOCAL_OFFSET(dst, dstw);
1363 	ADJUST_LOCAL_OFFSET(src, srcw);
1364 
1365 	if (dst == SLJIT_UNUSED && !HAS_FLAGS(op)) {
1366 		if (op <= SLJIT_MOV_P && (src & SLJIT_MEM))
1367 			return emit_prefetch(compiler, src, srcw);
1368 
1369 		return SLJIT_SUCCESS;
1370 	}
1371 
1372 	op = GET_OPCODE(op);
1373 	if ((src & SLJIT_IMM) && srcw == 0)
1374 		src = TMP_ZERO;
1375 
1376 	if (GET_FLAG_TYPE(op_flags) == SLJIT_OVERFLOW)
1377 		FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
1378 
1379 	if (op_flags & SLJIT_I32_OP) {
1380 		if (op < SLJIT_NOT) {
1381 			if (FAST_IS_REG(src) && src == dst) {
1382 				if (!TYPE_CAST_NEEDED(op))
1383 					return SLJIT_SUCCESS;
1384 			}
1385 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1386 			if (op == SLJIT_MOV_S32 && (src & SLJIT_MEM))
1387 				op = SLJIT_MOV_U32;
1388 			if (op == SLJIT_MOVU_S32 && (src & SLJIT_MEM))
1389 				op = SLJIT_MOVU_U32;
1390 			if (op == SLJIT_MOV_U32 && (src & SLJIT_IMM))
1391 				op = SLJIT_MOV_S32;
1392 			if (op == SLJIT_MOVU_U32 && (src & SLJIT_IMM))
1393 				op = SLJIT_MOVU_S32;
1394 #endif
1395 		}
1396 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1397 		else {
1398 			/* Most operations expect sign extended arguments. */
1399 			flags |= INT_DATA | SIGNED_DATA;
1400 			if (src & SLJIT_IMM)
1401 				srcw = (sljit_s32)srcw;
1402 			if (HAS_FLAGS(op_flags))
1403 				flags |= ALT_SIGN_EXT;
1404 		}
1405 #endif
1406 	}
1407 
1408 	switch (op) {
1409 	case SLJIT_MOV:
1410 	case SLJIT_MOV_P:
1411 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
1412 	case SLJIT_MOV_U32:
1413 	case SLJIT_MOV_S32:
1414 #endif
1415 		return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA, dst, dstw, TMP_REG1, 0, src, srcw);
1416 
1417 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1418 	case SLJIT_MOV_U32:
1419 		return EMIT_MOV(SLJIT_MOV_U32, INT_DATA, (sljit_u32));
1420 
1421 	case SLJIT_MOV_S32:
1422 		return EMIT_MOV(SLJIT_MOV_S32, INT_DATA | SIGNED_DATA, (sljit_s32));
1423 #endif
1424 
1425 	case SLJIT_MOV_U8:
1426 		return EMIT_MOV(SLJIT_MOV_U8, BYTE_DATA, (sljit_u8));
1427 
1428 	case SLJIT_MOV_S8:
1429 		return EMIT_MOV(SLJIT_MOV_S8, BYTE_DATA | SIGNED_DATA, (sljit_s8));
1430 
1431 	case SLJIT_MOV_U16:
1432 		return EMIT_MOV(SLJIT_MOV_U16, HALF_DATA, (sljit_u16));
1433 
1434 	case SLJIT_MOV_S16:
1435 		return EMIT_MOV(SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA, (sljit_s16));
1436 
1437 	case SLJIT_MOVU:
1438 	case SLJIT_MOVU_P:
1439 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
1440 	case SLJIT_MOVU_U32:
1441 	case SLJIT_MOVU_S32:
1442 #endif
1443 		return emit_op(compiler, SLJIT_MOV, flags | WORD_DATA | WRITE_BACK, dst, dstw, TMP_REG1, 0, src, srcw);
1444 
1445 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1446 	case SLJIT_MOVU_U32:
1447 		return EMIT_MOV(SLJIT_MOV_U32, INT_DATA | WRITE_BACK, (sljit_u32));
1448 
1449 	case SLJIT_MOVU_S32:
1450 		return EMIT_MOV(SLJIT_MOV_S32, INT_DATA | SIGNED_DATA | WRITE_BACK, (sljit_s32));
1451 #endif
1452 
1453 	case SLJIT_MOVU_U8:
1454 		return EMIT_MOV(SLJIT_MOV_U8, BYTE_DATA | WRITE_BACK, (sljit_u8));
1455 
1456 	case SLJIT_MOVU_S8:
1457 		return EMIT_MOV(SLJIT_MOV_S8, BYTE_DATA | SIGNED_DATA | WRITE_BACK, (sljit_s8));
1458 
1459 	case SLJIT_MOVU_U16:
1460 		return EMIT_MOV(SLJIT_MOV_U16, HALF_DATA | WRITE_BACK, (sljit_u16));
1461 
1462 	case SLJIT_MOVU_S16:
1463 		return EMIT_MOV(SLJIT_MOV_S16, HALF_DATA | SIGNED_DATA | WRITE_BACK, (sljit_s16));
1464 
1465 	case SLJIT_NOT:
1466 		return emit_op(compiler, SLJIT_NOT, flags, dst, dstw, TMP_REG1, 0, src, srcw);
1467 
1468 	case SLJIT_NEG:
1469 		return emit_op(compiler, SLJIT_NEG, flags | (GET_FLAG_TYPE(op_flags) ? ALT_FORM1 : 0), dst, dstw, TMP_REG1, 0, src, srcw);
1470 
1471 	case SLJIT_CLZ:
1472 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1473 		return emit_op(compiler, SLJIT_CLZ, flags | (!(op_flags & SLJIT_I32_OP) ? 0 : ALT_FORM1), dst, dstw, TMP_REG1, 0, src, srcw);
1474 #else
1475 		return emit_op(compiler, SLJIT_CLZ, flags, dst, dstw, TMP_REG1, 0, src, srcw);
1476 #endif
1477 	}
1478 
1479 	return SLJIT_SUCCESS;
1480 }
1481 
1482 #undef EMIT_MOV
1483 
1484 #define TEST_SL_IMM(src, srcw) \
1485 	(((src) & SLJIT_IMM) && (srcw) <= SIMM_MAX && (srcw) >= SIMM_MIN)
1486 
1487 #define TEST_UL_IMM(src, srcw) \
1488 	(((src) & SLJIT_IMM) && !((srcw) & ~0xffff))
1489 
1490 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1491 #define TEST_SH_IMM(src, srcw) \
1492 	(((src) & SLJIT_IMM) && !((srcw) & 0xffff) && (srcw) <= 0x7fffffffl && (srcw) >= -0x80000000l)
1493 #else
1494 #define TEST_SH_IMM(src, srcw) \
1495 	(((src) & SLJIT_IMM) && !((srcw) & 0xffff))
1496 #endif
1497 
1498 #define TEST_UH_IMM(src, srcw) \
1499 	(((src) & SLJIT_IMM) && !((srcw) & ~0xffff0000))
1500 
1501 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1502 #define TEST_ADD_IMM(src, srcw) \
1503 	(((src) & SLJIT_IMM) && (srcw) <= 0x7fff7fffl && (srcw) >= -0x80000000l)
1504 #else
1505 #define TEST_ADD_IMM(src, srcw) \
1506 	((src) & SLJIT_IMM)
1507 #endif
1508 
1509 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1510 #define TEST_UI_IMM(src, srcw) \
1511 	(((src) & SLJIT_IMM) && !((srcw) & ~0xffffffff))
1512 #else
1513 #define TEST_UI_IMM(src, srcw) \
1514 	((src) & SLJIT_IMM)
1515 #endif
1516 
sljit_emit_op2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1517 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op2(struct sljit_compiler *compiler, sljit_s32 op,
1518 	sljit_s32 dst, sljit_sw dstw,
1519 	sljit_s32 src1, sljit_sw src1w,
1520 	sljit_s32 src2, sljit_sw src2w)
1521 {
1522 	sljit_s32 flags = HAS_FLAGS(op) ? ALT_SET_FLAGS : 0;
1523 
1524 	CHECK_ERROR();
1525 	CHECK(check_sljit_emit_op2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
1526 	ADJUST_LOCAL_OFFSET(dst, dstw);
1527 	ADJUST_LOCAL_OFFSET(src1, src1w);
1528 	ADJUST_LOCAL_OFFSET(src2, src2w);
1529 
1530 	if (dst == SLJIT_UNUSED && !HAS_FLAGS(op))
1531 		return SLJIT_SUCCESS;
1532 
1533 	if ((src1 & SLJIT_IMM) && src1w == 0)
1534 		src1 = TMP_ZERO;
1535 	if ((src2 & SLJIT_IMM) && src2w == 0)
1536 		src2 = TMP_ZERO;
1537 
1538 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1539 	if (op & SLJIT_I32_OP) {
1540 		/* Most operations expect sign extended arguments. */
1541 		flags |= INT_DATA | SIGNED_DATA;
1542 		if (src1 & SLJIT_IMM)
1543 			src1w = (sljit_s32)(src1w);
1544 		if (src2 & SLJIT_IMM)
1545 			src2w = (sljit_s32)(src2w);
1546 		if (HAS_FLAGS(op))
1547 			flags |= ALT_SIGN_EXT;
1548 	}
1549 #endif
1550 	if (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW)
1551 		FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
1552 	if (src2 == TMP_REG2)
1553 		flags |= ALT_KEEP_CACHE;
1554 
1555 	switch (GET_OPCODE(op)) {
1556 	case SLJIT_ADD:
1557 		if (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW)
1558 			return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM1, dst, dstw, src1, src1w, src2, src2w);
1559 
1560 		if (!HAS_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
1561 			if (TEST_SL_IMM(src2, src2w)) {
1562 				compiler->imm = src2w & 0xffff;
1563 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
1564 			}
1565 			if (TEST_SL_IMM(src1, src1w)) {
1566 				compiler->imm = src1w & 0xffff;
1567 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
1568 			}
1569 			if (TEST_SH_IMM(src2, src2w)) {
1570 				compiler->imm = (src2w >> 16) & 0xffff;
1571 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1572 			}
1573 			if (TEST_SH_IMM(src1, src1w)) {
1574 				compiler->imm = (src1w >> 16) & 0xffff;
1575 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
1576 			}
1577 			/* Range between -1 and -32768 is covered above. */
1578 			if (TEST_ADD_IMM(src2, src2w)) {
1579 				compiler->imm = src2w & 0xffffffff;
1580 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
1581 			}
1582 			if (TEST_ADD_IMM(src1, src1w)) {
1583 				compiler->imm = src1w & 0xffffffff;
1584 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM4, dst, dstw, src2, src2w, TMP_REG2, 0);
1585 			}
1586 		}
1587 		if (HAS_FLAGS(op)) {
1588 			if (TEST_SL_IMM(src2, src2w)) {
1589 				compiler->imm = src2w & 0xffff;
1590 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1591 			}
1592 			if (TEST_SL_IMM(src1, src1w)) {
1593 				compiler->imm = src1w & 0xffff;
1594 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
1595 			}
1596 		}
1597 		return emit_op(compiler, SLJIT_ADD, flags | ((GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)) ? ALT_FORM4 : 0), dst, dstw, src1, src1w, src2, src2w);
1598 
1599 	case SLJIT_ADDC:
1600 		return emit_op(compiler, SLJIT_ADDC, flags, dst, dstw, src1, src1w, src2, src2w);
1601 
1602 	case SLJIT_SUB:
1603 		if (GET_FLAG_TYPE(op) >= SLJIT_LESS && GET_FLAG_TYPE(op) <= SLJIT_LESS_EQUAL) {
1604 			if (dst == SLJIT_UNUSED) {
1605 				if (TEST_UL_IMM(src2, src2w)) {
1606 					compiler->imm = src2w & 0xffff;
1607 					return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
1608 				}
1609 				return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1, dst, dstw, src1, src1w, src2, src2w);
1610 			}
1611 
1612 			if ((src2 & SLJIT_IMM) && src2w >= 0 && src2w <= (SIMM_MAX + 1)) {
1613 				compiler->imm = src2w;
1614 				return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1615 			}
1616 			return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM1 | ALT_FORM3, dst, dstw, src1, src1w, src2, src2w);
1617 		}
1618 
1619 		if (GET_FLAG_TYPE(op) == SLJIT_OVERFLOW)
1620 			return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM2, dst, dstw, src1, src1w, src2, src2w);
1621 
1622 		if (!HAS_FLAGS(op) && ((src1 | src2) & SLJIT_IMM)) {
1623 			if (TEST_SL_IMM(src2, -src2w)) {
1624 				compiler->imm = (-src2w) & 0xffff;
1625 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
1626 			}
1627 			if (TEST_SL_IMM(src1, src1w)) {
1628 				compiler->imm = src1w & 0xffff;
1629 				return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
1630 			}
1631 			if (TEST_SH_IMM(src2, -src2w)) {
1632 				compiler->imm = ((-src2w) >> 16) & 0xffff;
1633 				return emit_op(compiler, SLJIT_ADD, flags |  ALT_FORM2 | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1634 			}
1635 			/* Range between -1 and -32768 is covered above. */
1636 			if (TEST_ADD_IMM(src2, -src2w)) {
1637 				compiler->imm = -src2w & 0xffffffff;
1638 				return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM2 | ALT_FORM4, dst, dstw, src1, src1w, TMP_REG2, 0);
1639 			}
1640 		}
1641 
1642 		if (dst == SLJIT_UNUSED && GET_FLAG_TYPE(op) != GET_FLAG_TYPE(SLJIT_SET_CARRY)) {
1643 			if (TEST_SL_IMM(src2, src2w)) {
1644 				compiler->imm = src2w & 0xffff;
1645 				return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4 | ALT_FORM5, dst, dstw, src1, src1w, TMP_REG2, 0);
1646 			}
1647 			return emit_op(compiler, SLJIT_SUB, flags | ALT_FORM4, dst, dstw, src1, src1w, src2, src2w);
1648 		}
1649 
1650 		if (TEST_SL_IMM(src2, -src2w)) {
1651 			compiler->imm = (-src2w) & 0xffff;
1652 			return emit_op(compiler, SLJIT_ADD, flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1653 		}
1654 		/* We know ALT_SIGN_EXT is set if it is an SLJIT_I32_OP on 64 bit systems. */
1655 		return emit_op(compiler, SLJIT_SUB, flags | ((GET_FLAG_TYPE(op) == GET_FLAG_TYPE(SLJIT_SET_CARRY)) ? ALT_FORM5 : 0), dst, dstw, src1, src1w, src2, src2w);
1656 
1657 	case SLJIT_SUBC:
1658 		return emit_op(compiler, SLJIT_SUBC, flags, dst, dstw, src1, src1w, src2, src2w);
1659 
1660 	case SLJIT_MUL:
1661 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1662 		if (op & SLJIT_I32_OP)
1663 			flags |= ALT_FORM2;
1664 #endif
1665 		if (!HAS_FLAGS(op)) {
1666 			if (TEST_SL_IMM(src2, src2w)) {
1667 				compiler->imm = src2w & 0xffff;
1668 				return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
1669 			}
1670 			if (TEST_SL_IMM(src1, src1w)) {
1671 				compiler->imm = src1w & 0xffff;
1672 				return emit_op(compiler, SLJIT_MUL, flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
1673 			}
1674 		}
1675 		else
1676 			FAIL_IF(push_inst(compiler, MTXER | S(TMP_ZERO)));
1677 		return emit_op(compiler, SLJIT_MUL, flags, dst, dstw, src1, src1w, src2, src2w);
1678 
1679 	case SLJIT_AND:
1680 	case SLJIT_OR:
1681 	case SLJIT_XOR:
1682 		/* Commutative unsigned operations. */
1683 		if (!HAS_FLAGS(op) || GET_OPCODE(op) == SLJIT_AND) {
1684 			if (TEST_UL_IMM(src2, src2w)) {
1685 				compiler->imm = src2w;
1686 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
1687 			}
1688 			if (TEST_UL_IMM(src1, src1w)) {
1689 				compiler->imm = src1w;
1690 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src2, src2w, TMP_REG2, 0);
1691 			}
1692 			if (TEST_UH_IMM(src2, src2w)) {
1693 				compiler->imm = (src2w >> 16) & 0xffff;
1694 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src1, src1w, TMP_REG2, 0);
1695 			}
1696 			if (TEST_UH_IMM(src1, src1w)) {
1697 				compiler->imm = (src1w >> 16) & 0xffff;
1698 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM2, dst, dstw, src2, src2w, TMP_REG2, 0);
1699 			}
1700 		}
1701 		if (GET_OPCODE(op) != SLJIT_AND && GET_OPCODE(op) != SLJIT_AND) {
1702 			/* Unlike or and xor, and resets unwanted bits as well. */
1703 			if (TEST_UI_IMM(src2, src2w)) {
1704 				compiler->imm = src2w;
1705 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src1, src1w, TMP_REG2, 0);
1706 			}
1707 			if (TEST_UI_IMM(src1, src1w)) {
1708 				compiler->imm = src1w;
1709 				return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM3, dst, dstw, src2, src2w, TMP_REG2, 0);
1710 			}
1711 		}
1712 		return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w);
1713 
1714 	case SLJIT_SHL:
1715 	case SLJIT_LSHR:
1716 	case SLJIT_ASHR:
1717 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1718 		if (op & SLJIT_I32_OP)
1719 			flags |= ALT_FORM2;
1720 #endif
1721 		if (src2 & SLJIT_IMM) {
1722 			compiler->imm = src2w;
1723 			return emit_op(compiler, GET_OPCODE(op), flags | ALT_FORM1, dst, dstw, src1, src1w, TMP_REG2, 0);
1724 		}
1725 		return emit_op(compiler, GET_OPCODE(op), flags, dst, dstw, src1, src1w, src2, src2w);
1726 	}
1727 
1728 	return SLJIT_SUCCESS;
1729 }
1730 
sljit_get_register_index(sljit_s32 reg)1731 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_register_index(sljit_s32 reg)
1732 {
1733 	CHECK_REG_INDEX(check_sljit_get_register_index(reg));
1734 	return reg_map[reg];
1735 }
1736 
sljit_get_float_register_index(sljit_s32 reg)1737 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_get_float_register_index(sljit_s32 reg)
1738 {
1739 	CHECK_REG_INDEX(check_sljit_get_float_register_index(reg));
1740 	return reg;
1741 }
1742 
sljit_emit_op_custom(struct sljit_compiler * compiler,void * instruction,sljit_s32 size)1743 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_custom(struct sljit_compiler *compiler,
1744 	void *instruction, sljit_s32 size)
1745 {
1746 	CHECK_ERROR();
1747 	CHECK(check_sljit_emit_op_custom(compiler, instruction, size));
1748 
1749 	return push_inst(compiler, *(sljit_ins*)instruction);
1750 }
1751 
1752 /* --------------------------------------------------------------------- */
1753 /*  Floating point operators                                             */
1754 /* --------------------------------------------------------------------- */
1755 
1756 #define FLOAT_DATA(op) (DOUBLE_DATA | ((op & SLJIT_F32_OP) >> 6))
1757 #define SELECT_FOP(op, single, double) ((op & SLJIT_F32_OP) ? single : double)
1758 
1759 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1760 #define FLOAT_TMP_MEM_OFFSET (6 * sizeof(sljit_sw))
1761 #else
1762 #define FLOAT_TMP_MEM_OFFSET (2 * sizeof(sljit_sw))
1763 
1764 #if (defined SLJIT_LITTLE_ENDIAN && SLJIT_LITTLE_ENDIAN)
1765 #define FLOAT_TMP_MEM_OFFSET_LOW (2 * sizeof(sljit_sw))
1766 #define FLOAT_TMP_MEM_OFFSET_HI (3 * sizeof(sljit_sw))
1767 #else
1768 #define FLOAT_TMP_MEM_OFFSET_LOW (3 * sizeof(sljit_sw))
1769 #define FLOAT_TMP_MEM_OFFSET_HI (2 * sizeof(sljit_sw))
1770 #endif
1771 
1772 #endif /* SLJIT_CONFIG_PPC_64 */
1773 
sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1774 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_sw_from_f64(struct sljit_compiler *compiler, sljit_s32 op,
1775 	sljit_s32 dst, sljit_sw dstw,
1776 	sljit_s32 src, sljit_sw srcw)
1777 {
1778 	if (src & SLJIT_MEM) {
1779 		/* We can ignore the temporary data store on the stack from caching point of view. */
1780 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
1781 		src = TMP_FREG1;
1782 	}
1783 
1784 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1785 	op = GET_OPCODE(op);
1786 	FAIL_IF(push_inst(compiler, (op == SLJIT_CONV_S32_FROM_F64 ? FCTIWZ : FCTIDZ) | FD(TMP_FREG1) | FB(src)));
1787 
1788 	if (op == SLJIT_CONV_SW_FROM_F64) {
1789 		if (FAST_IS_REG(dst)) {
1790 			FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0));
1791 			return emit_op_mem2(compiler, WORD_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0);
1792 		}
1793 		return emit_op_mem2(compiler, DOUBLE_DATA, TMP_FREG1, dst, dstw, 0, 0);
1794 	}
1795 #else
1796 	FAIL_IF(push_inst(compiler, FCTIWZ | FD(TMP_FREG1) | FB(src)));
1797 #endif
1798 
1799 	if (FAST_IS_REG(dst)) {
1800 		FAIL_IF(load_immediate(compiler, TMP_REG1, FLOAT_TMP_MEM_OFFSET));
1801 		FAIL_IF(push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(SLJIT_SP) | B(TMP_REG1)));
1802 		return emit_op_mem2(compiler, INT_DATA | LOAD_DATA, dst, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, 0, 0);
1803 	}
1804 
1805 	SLJIT_ASSERT(dst & SLJIT_MEM);
1806 
1807 	if (dst & OFFS_REG_MASK) {
1808 		dstw &= 0x3;
1809 		if (dstw) {
1810 #if (defined SLJIT_CONFIG_PPC_32 && SLJIT_CONFIG_PPC_32)
1811 			FAIL_IF(push_inst(compiler, RLWINM | S(OFFS_REG(dst)) | A(TMP_REG1) | (dstw << 11) | ((31 - dstw) << 1)));
1812 #else
1813 			FAIL_IF(push_inst(compiler, RLDI(TMP_REG1, OFFS_REG(dst), dstw, 63 - dstw, 1)));
1814 #endif
1815 			dstw = TMP_REG1;
1816 		}
1817 		else
1818 			dstw = OFFS_REG(dst);
1819 	}
1820 	else {
1821 		if ((dst & REG_MASK) && !dstw) {
1822 			dstw = dst & REG_MASK;
1823 			dst = 0;
1824 		}
1825 		else {
1826 			/* This works regardless we have SLJIT_MEM1 or SLJIT_MEM0. */
1827 			FAIL_IF(load_immediate(compiler, TMP_REG1, dstw));
1828 			dstw = TMP_REG1;
1829 		}
1830 	}
1831 
1832 	return push_inst(compiler, STFIWX | FS(TMP_FREG1) | A(dst & REG_MASK) | B(dstw));
1833 }
1834 
sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1835 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_conv_f64_from_sw(struct sljit_compiler *compiler, sljit_s32 op,
1836 	sljit_s32 dst, sljit_sw dstw,
1837 	sljit_s32 src, sljit_sw srcw)
1838 {
1839 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
1840 
1841 	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
1842 
1843 	if (src & SLJIT_IMM) {
1844 		if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32)
1845 			srcw = (sljit_s32)srcw;
1846 		FAIL_IF(load_immediate(compiler, TMP_REG1, srcw));
1847 		src = TMP_REG1;
1848 	}
1849 	else if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_S32) {
1850 		if (FAST_IS_REG(src))
1851 			FAIL_IF(push_inst(compiler, EXTSW | S(src) | A(TMP_REG1)));
1852 		else
1853 			FAIL_IF(emit_op_mem2(compiler, INT_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
1854 		src = TMP_REG1;
1855 	}
1856 
1857 	if (FAST_IS_REG(src)) {
1858 		FAIL_IF(emit_op_mem2(compiler, WORD_DATA, src, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
1859 		FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, dst, dstw));
1860 	}
1861 	else
1862 		FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, src, srcw, dst, dstw));
1863 
1864 	FAIL_IF(push_inst(compiler, FCFID | FD(dst_r) | FB(TMP_FREG1)));
1865 
1866 	if (dst & SLJIT_MEM)
1867 		return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
1868 	if (op & SLJIT_F32_OP)
1869 		return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r));
1870 	return SLJIT_SUCCESS;
1871 
1872 #else
1873 
1874 	sljit_s32 dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
1875 	sljit_s32 invert_sign = 1;
1876 
1877 	if (src & SLJIT_IMM) {
1878 		FAIL_IF(load_immediate(compiler, TMP_REG1, srcw ^ 0x80000000));
1879 		src = TMP_REG1;
1880 		invert_sign = 0;
1881 	}
1882 	else if (!FAST_IS_REG(src)) {
1883 		FAIL_IF(emit_op_mem2(compiler, WORD_DATA | SIGNED_DATA | LOAD_DATA, TMP_REG1, src, srcw, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW));
1884 		src = TMP_REG1;
1885 	}
1886 
1887 	/* First, a special double floating point value is constructed: (2^53 + (input xor (2^31)))
1888 	   The double precision format has exactly 53 bit precision, so the lower 32 bit represents
1889 	   the lower 32 bit of such value. The result of xor 2^31 is the same as adding 0x80000000
1890 	   to the input, which shifts it into the 0 - 0xffffffff range. To get the converted floating
1891 	   point value, we need to substract 2^53 + 2^31 from the constructed value. */
1892 	FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG2) | A(0) | 0x4330));
1893 	if (invert_sign)
1894 		FAIL_IF(push_inst(compiler, XORIS | S(src) | A(TMP_REG1) | 0x8000));
1895 	FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_HI, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
1896 	FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_HI));
1897 	FAIL_IF(push_inst(compiler, ADDIS | D(TMP_REG1) | A(0) | 0x8000));
1898 	FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW));
1899 	FAIL_IF(emit_op_mem2(compiler, WORD_DATA, TMP_REG1, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET));
1900 	FAIL_IF(emit_op_mem2(compiler, DOUBLE_DATA | LOAD_DATA, TMP_FREG2, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET, SLJIT_MEM1(SLJIT_SP), FLOAT_TMP_MEM_OFFSET_LOW));
1901 
1902 	FAIL_IF(push_inst(compiler, FSUB | FD(dst_r) | FA(TMP_FREG1) | FB(TMP_FREG2)));
1903 
1904 	if (dst & SLJIT_MEM)
1905 		return emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG1, dst, dstw, 0, 0);
1906 	if (op & SLJIT_F32_OP)
1907 		return push_inst(compiler, FRSP | FD(dst_r) | FB(dst_r));
1908 	return SLJIT_SUCCESS;
1909 
1910 #endif
1911 }
1912 
sljit_emit_fop1_cmp(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1913 static SLJIT_INLINE sljit_s32 sljit_emit_fop1_cmp(struct sljit_compiler *compiler, sljit_s32 op,
1914 	sljit_s32 src1, sljit_sw src1w,
1915 	sljit_s32 src2, sljit_sw src2w)
1916 {
1917 	if (src1 & SLJIT_MEM) {
1918 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
1919 		src1 = TMP_FREG1;
1920 	}
1921 
1922 	if (src2 & SLJIT_MEM) {
1923 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, 0, 0));
1924 		src2 = TMP_FREG2;
1925 	}
1926 
1927 	return push_inst(compiler, FCMPU | CRD(4) | FA(src1) | FB(src2));
1928 }
1929 
sljit_emit_fop1(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src,sljit_sw srcw)1930 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop1(struct sljit_compiler *compiler, sljit_s32 op,
1931 	sljit_s32 dst, sljit_sw dstw,
1932 	sljit_s32 src, sljit_sw srcw)
1933 {
1934 	sljit_s32 dst_r;
1935 
1936 	CHECK_ERROR();
1937 	compiler->cache_arg = 0;
1938 	compiler->cache_argw = 0;
1939 
1940 	SLJIT_COMPILE_ASSERT((SLJIT_F32_OP == 0x100) && !(DOUBLE_DATA & 0x4), float_transfer_bit_error);
1941 	SELECT_FOP1_OPERATION_WITH_CHECKS(compiler, op, dst, dstw, src, srcw);
1942 
1943 	if (GET_OPCODE(op) == SLJIT_CONV_F64_FROM_F32)
1944 		op ^= SLJIT_F32_OP;
1945 
1946 	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG1;
1947 
1948 	if (src & SLJIT_MEM) {
1949 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op) | LOAD_DATA, dst_r, src, srcw, dst, dstw));
1950 		src = dst_r;
1951 	}
1952 
1953 	switch (GET_OPCODE(op)) {
1954 	case SLJIT_CONV_F64_FROM_F32:
1955 		op ^= SLJIT_F32_OP;
1956 		if (op & SLJIT_F32_OP) {
1957 			FAIL_IF(push_inst(compiler, FRSP | FD(dst_r) | FB(src)));
1958 			break;
1959 		}
1960 		/* Fall through. */
1961 	case SLJIT_MOV_F64:
1962 		if (src != dst_r) {
1963 			if (dst_r != TMP_FREG1)
1964 				FAIL_IF(push_inst(compiler, FMR | FD(dst_r) | FB(src)));
1965 			else
1966 				dst_r = src;
1967 		}
1968 		break;
1969 	case SLJIT_NEG_F64:
1970 		FAIL_IF(push_inst(compiler, FNEG | FD(dst_r) | FB(src)));
1971 		break;
1972 	case SLJIT_ABS_F64:
1973 		FAIL_IF(push_inst(compiler, FABS | FD(dst_r) | FB(src)));
1974 		break;
1975 	}
1976 
1977 	if (dst & SLJIT_MEM)
1978 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), dst_r, dst, dstw, 0, 0));
1979 	return SLJIT_SUCCESS;
1980 }
1981 
sljit_emit_fop2(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 src1,sljit_sw src1w,sljit_s32 src2,sljit_sw src2w)1982 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fop2(struct sljit_compiler *compiler, sljit_s32 op,
1983 	sljit_s32 dst, sljit_sw dstw,
1984 	sljit_s32 src1, sljit_sw src1w,
1985 	sljit_s32 src2, sljit_sw src2w)
1986 {
1987 	sljit_s32 dst_r, flags = 0;
1988 
1989 	CHECK_ERROR();
1990 	CHECK(check_sljit_emit_fop2(compiler, op, dst, dstw, src1, src1w, src2, src2w));
1991 	ADJUST_LOCAL_OFFSET(dst, dstw);
1992 	ADJUST_LOCAL_OFFSET(src1, src1w);
1993 	ADJUST_LOCAL_OFFSET(src2, src2w);
1994 
1995 	compiler->cache_arg = 0;
1996 	compiler->cache_argw = 0;
1997 
1998 	dst_r = FAST_IS_REG(dst) ? dst : TMP_FREG2;
1999 
2000 	if (src1 & SLJIT_MEM) {
2001 		if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w)) {
2002 			FAIL_IF(compiler->error);
2003 			src1 = TMP_FREG1;
2004 		} else
2005 			flags |= ALT_FORM1;
2006 	}
2007 
2008 	if (src2 & SLJIT_MEM) {
2009 		if (getput_arg_fast(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w)) {
2010 			FAIL_IF(compiler->error);
2011 			src2 = TMP_FREG2;
2012 		} else
2013 			flags |= ALT_FORM2;
2014 	}
2015 
2016 	if ((flags & (ALT_FORM1 | ALT_FORM2)) == (ALT_FORM1 | ALT_FORM2)) {
2017 		if (!can_cache(src1, src1w, src2, src2w) && can_cache(src1, src1w, dst, dstw)) {
2018 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, src1, src1w));
2019 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
2020 		}
2021 		else {
2022 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, src2, src2w));
2023 			FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
2024 		}
2025 	}
2026 	else if (flags & ALT_FORM1)
2027 		FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG1, src1, src1w, dst, dstw));
2028 	else if (flags & ALT_FORM2)
2029 		FAIL_IF(getput_arg(compiler, FLOAT_DATA(op) | LOAD_DATA, TMP_FREG2, src2, src2w, dst, dstw));
2030 
2031 	if (flags & ALT_FORM1)
2032 		src1 = TMP_FREG1;
2033 	if (flags & ALT_FORM2)
2034 		src2 = TMP_FREG2;
2035 
2036 	switch (GET_OPCODE(op)) {
2037 	case SLJIT_ADD_F64:
2038 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FADDS, FADD) | FD(dst_r) | FA(src1) | FB(src2)));
2039 		break;
2040 
2041 	case SLJIT_SUB_F64:
2042 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FSUBS, FSUB) | FD(dst_r) | FA(src1) | FB(src2)));
2043 		break;
2044 
2045 	case SLJIT_MUL_F64:
2046 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FMULS, FMUL) | FD(dst_r) | FA(src1) | FC(src2) /* FMUL use FC as src2 */));
2047 		break;
2048 
2049 	case SLJIT_DIV_F64:
2050 		FAIL_IF(push_inst(compiler, SELECT_FOP(op, FDIVS, FDIV) | FD(dst_r) | FA(src1) | FB(src2)));
2051 		break;
2052 	}
2053 
2054 	if (dst_r == TMP_FREG2)
2055 		FAIL_IF(emit_op_mem2(compiler, FLOAT_DATA(op), TMP_FREG2, dst, dstw, 0, 0));
2056 
2057 	return SLJIT_SUCCESS;
2058 }
2059 
2060 #undef FLOAT_DATA
2061 #undef SELECT_FOP
2062 
2063 /* --------------------------------------------------------------------- */
2064 /*  Other instructions                                                   */
2065 /* --------------------------------------------------------------------- */
2066 
sljit_emit_fast_enter(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw)2067 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_enter(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw)
2068 {
2069 	CHECK_ERROR();
2070 	CHECK(check_sljit_emit_fast_enter(compiler, dst, dstw));
2071 	ADJUST_LOCAL_OFFSET(dst, dstw);
2072 
2073 	if (FAST_IS_REG(dst))
2074 		return push_inst(compiler, MFLR | D(dst));
2075 
2076 	/* Memory. */
2077 	FAIL_IF(push_inst(compiler, MFLR | D(TMP_REG2)));
2078 	return emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0);
2079 }
2080 
sljit_emit_fast_return(struct sljit_compiler * compiler,sljit_s32 src,sljit_sw srcw)2081 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_fast_return(struct sljit_compiler *compiler, sljit_s32 src, sljit_sw srcw)
2082 {
2083 	CHECK_ERROR();
2084 	CHECK(check_sljit_emit_fast_return(compiler, src, srcw));
2085 	ADJUST_LOCAL_OFFSET(src, srcw);
2086 
2087 	if (FAST_IS_REG(src))
2088 		FAIL_IF(push_inst(compiler, MTLR | S(src)));
2089 	else {
2090 		if (src & SLJIT_MEM)
2091 			FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_REG2, 0, TMP_REG1, 0, src, srcw));
2092 		else if (src & SLJIT_IMM)
2093 			FAIL_IF(load_immediate(compiler, TMP_REG2, srcw));
2094 		FAIL_IF(push_inst(compiler, MTLR | S(TMP_REG2)));
2095 	}
2096 	return push_inst(compiler, BLR);
2097 }
2098 
2099 /* --------------------------------------------------------------------- */
2100 /*  Conditional instructions                                             */
2101 /* --------------------------------------------------------------------- */
2102 
sljit_emit_label(struct sljit_compiler * compiler)2103 SLJIT_API_FUNC_ATTRIBUTE struct sljit_label* sljit_emit_label(struct sljit_compiler *compiler)
2104 {
2105 	struct sljit_label *label;
2106 
2107 	CHECK_ERROR_PTR();
2108 	CHECK_PTR(check_sljit_emit_label(compiler));
2109 
2110 	if (compiler->last_label && compiler->last_label->size == compiler->size)
2111 		return compiler->last_label;
2112 
2113 	label = (struct sljit_label*)ensure_abuf(compiler, sizeof(struct sljit_label));
2114 	PTR_FAIL_IF(!label);
2115 	set_label(label, compiler);
2116 	return label;
2117 }
2118 
get_bo_bi_flags(sljit_s32 type)2119 static sljit_ins get_bo_bi_flags(sljit_s32 type)
2120 {
2121 	switch (type) {
2122 	case SLJIT_EQUAL:
2123 		return (12 << 21) | (2 << 16);
2124 
2125 	case SLJIT_NOT_EQUAL:
2126 		return (4 << 21) | (2 << 16);
2127 
2128 	case SLJIT_LESS:
2129 	case SLJIT_SIG_LESS:
2130 		return (12 << 21) | (0 << 16);
2131 
2132 	case SLJIT_GREATER_EQUAL:
2133 	case SLJIT_SIG_GREATER_EQUAL:
2134 		return (4 << 21) | (0 << 16);
2135 
2136 	case SLJIT_GREATER:
2137 	case SLJIT_SIG_GREATER:
2138 		return (12 << 21) | (1 << 16);
2139 
2140 	case SLJIT_LESS_EQUAL:
2141 	case SLJIT_SIG_LESS_EQUAL:
2142 		return (4 << 21) | (1 << 16);
2143 
2144 	case SLJIT_LESS_F64:
2145 		return (12 << 21) | ((4 + 0) << 16);
2146 
2147 	case SLJIT_GREATER_EQUAL_F64:
2148 		return (4 << 21) | ((4 + 0) << 16);
2149 
2150 	case SLJIT_GREATER_F64:
2151 		return (12 << 21) | ((4 + 1) << 16);
2152 
2153 	case SLJIT_LESS_EQUAL_F64:
2154 		return (4 << 21) | ((4 + 1) << 16);
2155 
2156 	case SLJIT_OVERFLOW:
2157 	case SLJIT_MUL_OVERFLOW:
2158 		return (12 << 21) | (3 << 16);
2159 
2160 	case SLJIT_NOT_OVERFLOW:
2161 	case SLJIT_MUL_NOT_OVERFLOW:
2162 		return (4 << 21) | (3 << 16);
2163 
2164 	case SLJIT_EQUAL_F64:
2165 		return (12 << 21) | ((4 + 2) << 16);
2166 
2167 	case SLJIT_NOT_EQUAL_F64:
2168 		return (4 << 21) | ((4 + 2) << 16);
2169 
2170 	case SLJIT_UNORDERED_F64:
2171 		return (12 << 21) | ((4 + 3) << 16);
2172 
2173 	case SLJIT_ORDERED_F64:
2174 		return (4 << 21) | ((4 + 3) << 16);
2175 
2176 	default:
2177 		SLJIT_ASSERT(type >= SLJIT_JUMP && type <= SLJIT_CALL3);
2178 		return (20 << 21);
2179 	}
2180 }
2181 
sljit_emit_jump(struct sljit_compiler * compiler,sljit_s32 type)2182 SLJIT_API_FUNC_ATTRIBUTE struct sljit_jump* sljit_emit_jump(struct sljit_compiler *compiler, sljit_s32 type)
2183 {
2184 	struct sljit_jump *jump;
2185 	sljit_ins bo_bi_flags;
2186 
2187 	CHECK_ERROR_PTR();
2188 	CHECK_PTR(check_sljit_emit_jump(compiler, type));
2189 
2190 	bo_bi_flags = get_bo_bi_flags(type & 0xff);
2191 	if (!bo_bi_flags)
2192 		return NULL;
2193 
2194 	jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2195 	PTR_FAIL_IF(!jump);
2196 	set_jump(jump, compiler, type & SLJIT_REWRITABLE_JUMP);
2197 	type &= 0xff;
2198 
2199 	/* In PPC, we don't need to touch the arguments. */
2200 	if (type < SLJIT_JUMP)
2201 		jump->flags |= IS_COND;
2202 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
2203 	if (type >= SLJIT_CALL0)
2204 		jump->flags |= IS_CALL;
2205 #endif
2206 
2207 	PTR_FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0));
2208 	PTR_FAIL_IF(push_inst(compiler, MTCTR | S(TMP_CALL_REG)));
2209 	jump->addr = compiler->size;
2210 	PTR_FAIL_IF(push_inst(compiler, BCCTR | bo_bi_flags | (type >= SLJIT_FAST_CALL ? 1 : 0)));
2211 	return jump;
2212 }
2213 
sljit_emit_ijump(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 src,sljit_sw srcw)2214 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_ijump(struct sljit_compiler *compiler, sljit_s32 type, sljit_s32 src, sljit_sw srcw)
2215 {
2216 	struct sljit_jump *jump = NULL;
2217 	sljit_s32 src_r;
2218 
2219 	CHECK_ERROR();
2220 	CHECK(check_sljit_emit_ijump(compiler, type, src, srcw));
2221 	ADJUST_LOCAL_OFFSET(src, srcw);
2222 
2223 	if (FAST_IS_REG(src)) {
2224 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
2225 		if (type >= SLJIT_CALL0) {
2226 			FAIL_IF(push_inst(compiler, OR | S(src) | A(TMP_CALL_REG) | B(src)));
2227 			src_r = TMP_CALL_REG;
2228 		}
2229 		else
2230 			src_r = src;
2231 #else
2232 		src_r = src;
2233 #endif
2234 	} else if (src & SLJIT_IMM) {
2235 		jump = (struct sljit_jump*)ensure_abuf(compiler, sizeof(struct sljit_jump));
2236 		FAIL_IF(!jump);
2237 		set_jump(jump, compiler, JUMP_ADDR);
2238 		jump->u.target = srcw;
2239 #if (defined SLJIT_PASS_ENTRY_ADDR_TO_CALL && SLJIT_PASS_ENTRY_ADDR_TO_CALL)
2240 		if (type >= SLJIT_CALL0)
2241 			jump->flags |= IS_CALL;
2242 #endif
2243 		FAIL_IF(emit_const(compiler, TMP_CALL_REG, 0));
2244 		src_r = TMP_CALL_REG;
2245 	}
2246 	else {
2247 		FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, TMP_CALL_REG, 0, TMP_REG1, 0, src, srcw));
2248 		src_r = TMP_CALL_REG;
2249 	}
2250 
2251 	FAIL_IF(push_inst(compiler, MTCTR | S(src_r)));
2252 	if (jump)
2253 		jump->addr = compiler->size;
2254 	return push_inst(compiler, BCCTR | (20 << 21) | (type >= SLJIT_FAST_CALL ? 1 : 0));
2255 }
2256 
sljit_emit_op_flags(struct sljit_compiler * compiler,sljit_s32 op,sljit_s32 dst,sljit_sw dstw,sljit_s32 type)2257 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_op_flags(struct sljit_compiler *compiler, sljit_s32 op,
2258 	sljit_s32 dst, sljit_sw dstw,
2259 	sljit_s32 type)
2260 {
2261 	sljit_s32 reg, input_flags, cr_bit, invert;
2262 	sljit_s32 saved_op = op;
2263 	sljit_sw saved_dstw = dstw;
2264 
2265 	CHECK_ERROR();
2266 	CHECK(check_sljit_emit_op_flags(compiler, op, dst, dstw, type));
2267 	ADJUST_LOCAL_OFFSET(dst, dstw);
2268 
2269 #if (defined SLJIT_CONFIG_PPC_64 && SLJIT_CONFIG_PPC_64)
2270 	input_flags = (op & SLJIT_I32_OP) ? INT_DATA : WORD_DATA;
2271 #else
2272 	input_flags = WORD_DATA;
2273 #endif
2274 
2275 	op = GET_OPCODE(op);
2276 	reg = (op < SLJIT_ADD && FAST_IS_REG(dst)) ? dst : TMP_REG2;
2277 
2278 	compiler->cache_arg = 0;
2279 	compiler->cache_argw = 0;
2280 
2281 	if (op >= SLJIT_ADD && (dst & SLJIT_MEM))
2282 		FAIL_IF(emit_op_mem2(compiler, input_flags | LOAD_DATA, TMP_REG1, dst, dstw, dst, dstw));
2283 
2284 	invert = 0;
2285 
2286 	switch (type & 0xff) {
2287 	case SLJIT_LESS:
2288 	case SLJIT_SIG_LESS:
2289 		cr_bit = 0;
2290 		break;
2291 
2292 	case SLJIT_GREATER_EQUAL:
2293 	case SLJIT_SIG_GREATER_EQUAL:
2294 		cr_bit = 0;
2295 		invert = 1;
2296 		break;
2297 
2298 	case SLJIT_GREATER:
2299 	case SLJIT_SIG_GREATER:
2300 		cr_bit = 1;
2301 		break;
2302 
2303 	case SLJIT_LESS_EQUAL:
2304 	case SLJIT_SIG_LESS_EQUAL:
2305 		cr_bit = 1;
2306 		invert = 1;
2307 		break;
2308 
2309 	case SLJIT_EQUAL:
2310 		cr_bit = 2;
2311 		break;
2312 
2313 	case SLJIT_NOT_EQUAL:
2314 		cr_bit = 2;
2315 		invert = 1;
2316 		break;
2317 
2318 	case SLJIT_OVERFLOW:
2319 	case SLJIT_MUL_OVERFLOW:
2320 		cr_bit = 3;
2321 		break;
2322 
2323 	case SLJIT_NOT_OVERFLOW:
2324 	case SLJIT_MUL_NOT_OVERFLOW:
2325 		cr_bit = 3;
2326 		invert = 1;
2327 		break;
2328 
2329 	case SLJIT_LESS_F64:
2330 		cr_bit = 4 + 0;
2331 		break;
2332 
2333 	case SLJIT_GREATER_EQUAL_F64:
2334 		cr_bit = 4 + 0;
2335 		invert = 1;
2336 		break;
2337 
2338 	case SLJIT_GREATER_F64:
2339 		cr_bit = 4 + 1;
2340 		break;
2341 
2342 	case SLJIT_LESS_EQUAL_F64:
2343 		cr_bit = 4 + 1;
2344 		invert = 1;
2345 		break;
2346 
2347 	case SLJIT_EQUAL_F64:
2348 		cr_bit = 4 + 2;
2349 		break;
2350 
2351 	case SLJIT_NOT_EQUAL_F64:
2352 		cr_bit = 4 + 2;
2353 		invert = 1;
2354 		break;
2355 
2356 	case SLJIT_UNORDERED_F64:
2357 		cr_bit = 4 + 3;
2358 		break;
2359 
2360 	case SLJIT_ORDERED_F64:
2361 		cr_bit = 4 + 3;
2362 		invert = 1;
2363 		break;
2364 
2365 	default:
2366 		SLJIT_UNREACHABLE();
2367 		break;
2368 	}
2369 
2370 	FAIL_IF(push_inst(compiler, MFCR | D(reg)));
2371 	FAIL_IF(push_inst(compiler, RLWINM | S(reg) | A(reg) | ((1 + (cr_bit)) << 11) | (31 << 6) | (31 << 1)));
2372 
2373 	if (invert)
2374 		FAIL_IF(push_inst(compiler, XORI | S(reg) | A(reg) | 0x1));
2375 
2376 	if (op < SLJIT_ADD) {
2377 		if (!(dst & SLJIT_MEM))
2378 			return SLJIT_SUCCESS;
2379 		return emit_op_mem2(compiler, input_flags, reg, dst, dstw, reg, 0);
2380 	}
2381 
2382 #if (defined SLJIT_VERBOSE && SLJIT_VERBOSE) \
2383 		|| (defined SLJIT_ARGUMENT_CHECKS && SLJIT_ARGUMENT_CHECKS)
2384 	compiler->skip_checks = 1;
2385 #endif
2386 	if (dst & SLJIT_MEM)
2387 		return sljit_emit_op2(compiler, saved_op, dst, saved_dstw, TMP_REG1, 0, TMP_REG2, 0);
2388 	return sljit_emit_op2(compiler, saved_op, dst, 0, dst, 0, TMP_REG2, 0);
2389 }
2390 
sljit_emit_cmov(struct sljit_compiler * compiler,sljit_s32 type,sljit_s32 dst_reg,sljit_s32 src,sljit_sw srcw)2391 SLJIT_API_FUNC_ATTRIBUTE sljit_s32 sljit_emit_cmov(struct sljit_compiler *compiler, sljit_s32 type,
2392 	sljit_s32 dst_reg,
2393 	sljit_s32 src, sljit_sw srcw)
2394 {
2395 	CHECK_ERROR();
2396 	CHECK(check_sljit_emit_cmov(compiler, type, dst_reg, src, srcw));
2397 
2398 	return sljit_emit_cmov_generic(compiler, type, dst_reg, src, srcw);;
2399 }
2400 
sljit_emit_const(struct sljit_compiler * compiler,sljit_s32 dst,sljit_sw dstw,sljit_sw init_value)2401 SLJIT_API_FUNC_ATTRIBUTE struct sljit_const* sljit_emit_const(struct sljit_compiler *compiler, sljit_s32 dst, sljit_sw dstw, sljit_sw init_value)
2402 {
2403 	struct sljit_const *const_;
2404 	sljit_s32 reg;
2405 
2406 	CHECK_ERROR_PTR();
2407 	CHECK_PTR(check_sljit_emit_const(compiler, dst, dstw, init_value));
2408 	ADJUST_LOCAL_OFFSET(dst, dstw);
2409 
2410 	const_ = (struct sljit_const*)ensure_abuf(compiler, sizeof(struct sljit_const));
2411 	PTR_FAIL_IF(!const_);
2412 	set_const(const_, compiler);
2413 
2414 	reg = FAST_IS_REG(dst) ? dst : TMP_REG2;
2415 
2416 	PTR_FAIL_IF(emit_const(compiler, reg, init_value));
2417 
2418 	if (dst & SLJIT_MEM)
2419 		PTR_FAIL_IF(emit_op(compiler, SLJIT_MOV, WORD_DATA, dst, dstw, TMP_REG1, 0, TMP_REG2, 0));
2420 	return const_;
2421 }
2422