xref: /openssl/crypto/ia64cpuid.S (revision 0e9725bc)
1// Copyright 2004-2017 The OpenSSL Project Authors. All Rights Reserved.
2//
3// Licensed under the Apache License 2.0 (the "License").  You may not use
4// this file except in compliance with the License.  You can obtain a copy
5// in the file LICENSE in the source distribution or at
6// https://www.openssl.org/source/license.html
7// Works on all IA-64 platforms: Linux, HP-UX, Win64i...
8// On Win64i compile with ias.exe.
9.text
10
11#if defined(_HPUX_SOURCE) && !defined(_LP64)
12#define	ADDP	addp4
13#else
14#define	ADDP	add
15#endif
16
17.global	OPENSSL_cpuid_setup#
18.proc	OPENSSL_cpuid_setup#
19OPENSSL_cpuid_setup:
20{ .mib;	br.ret.sptk.many	b0		};;
21.endp	OPENSSL_cpuid_setup#
22
23.global	OPENSSL_rdtsc#
24.proc	OPENSSL_rdtsc#
25OPENSSL_rdtsc:
26{ .mib;	mov			r8=ar.itc
27	br.ret.sptk.many	b0		};;
28.endp   OPENSSL_rdtsc#
29
30.global	OPENSSL_atomic_add#
31.proc	OPENSSL_atomic_add#
32.align	32
33OPENSSL_atomic_add:
34{ .mii;	ld4		r2=[r32]
35	nop.i		0
36	nop.i		0		};;
37.Lspin:
38{ .mii;	mov		ar.ccv=r2
39	add		r8=r2,r33
40	mov		r3=r2		};;
41{ .mmi;	mf;;
42	cmpxchg4.acq	r2=[r32],r8,ar.ccv
43	nop.i		0		};;
44{ .mib;	cmp.ne		p6,p0=r2,r3
45	nop.i		0
46(p6)	br.dpnt		.Lspin		};;
47{ .mib;	nop.m		0
48	sxt4		r8=r8
49	br.ret.sptk.many	b0	};;
50.endp	OPENSSL_atomic_add#
51
52// Returns a structure comprising pointer to the top of stack of
53// the caller and pointer beyond backing storage for the current
54// register frame. The latter is required, because it might be
55// insufficient to wipe backing storage for the current frame
56// (as this procedure does), one might have to go further, toward
57// higher addresses to reach for whole "retroactively" saved
58// context...
59.global	OPENSSL_wipe_cpu#
60.proc	OPENSSL_wipe_cpu#
61.align	32
62OPENSSL_wipe_cpu:
63	.prologue
64	.fframe	0
65	.save	ar.pfs,r2
66	.save	ar.lc,r3
67{ .mib;	alloc		r2=ar.pfs,0,96,0,96
68	mov		r3=ar.lc
69	brp.loop.imp	.L_wipe_top,.L_wipe_end-16
70					};;
71{ .mii;	mov		r9=ar.bsp
72	mov		r8=pr
73	mov		ar.lc=96	};;
74	.body
75{ .mii;	add		r9=96*8-8,r9
76	mov		ar.ec=1		};;
77
78// One can sweep double as fast, but then we can't guarantee
79// that backing storage is wiped...
80.L_wipe_top:
81{ .mfi;	st8		[r9]=r0,-8
82	mov		f127=f0
83	mov		r127=r0		}
84{ .mfb;	nop.m		0
85	nop.f		0
86	br.ctop.sptk	.L_wipe_top	};;
87.L_wipe_end:
88
89{ .mfi;	mov		r11=r0
90	mov		f6=f0
91	mov		r14=r0		}
92{ .mfi;	mov		r15=r0
93	mov		f7=f0
94	mov		r16=r0		}
95{ .mfi;	mov		r17=r0
96	mov		f8=f0
97	mov		r18=r0		}
98{ .mfi;	mov		r19=r0
99	mov		f9=f0
100	mov		r20=r0		}
101{ .mfi;	mov		r21=r0
102	mov		f10=f0
103	mov		r22=r0		}
104{ .mfi;	mov		r23=r0
105	mov		f11=f0
106	mov		r24=r0		}
107{ .mfi;	mov		r25=r0
108	mov		f12=f0
109	mov		r26=r0		}
110{ .mfi;	mov		r27=r0
111	mov		f13=f0
112	mov		r28=r0		}
113{ .mfi;	mov		r29=r0
114	mov		f14=f0
115	mov		r30=r0		}
116{ .mfi;	mov		r31=r0
117	mov		f15=f0
118	nop.i		0		}
119{ .mfi;	mov		f16=f0		}
120{ .mfi;	mov		f17=f0		}
121{ .mfi;	mov		f18=f0		}
122{ .mfi;	mov		f19=f0		}
123{ .mfi;	mov		f20=f0		}
124{ .mfi;	mov		f21=f0		}
125{ .mfi;	mov		f22=f0		}
126{ .mfi;	mov		f23=f0		}
127{ .mfi;	mov		f24=f0		}
128{ .mfi;	mov		f25=f0		}
129{ .mfi;	mov		f26=f0		}
130{ .mfi;	mov		f27=f0		}
131{ .mfi;	mov		f28=f0		}
132{ .mfi;	mov		f29=f0		}
133{ .mfi;	mov		f30=f0		}
134{ .mfi;	add		r9=96*8+8,r9
135	mov		f31=f0
136	mov		pr=r8,0x1ffff	}
137{ .mib;	mov		r8=sp
138	mov		ar.lc=r3
139	br.ret.sptk	b0		};;
140.endp	OPENSSL_wipe_cpu#
141
142.global	OPENSSL_cleanse#
143.proc	OPENSSL_cleanse#
144OPENSSL_cleanse:
145{ .mib;	cmp.eq		p6,p0=0,r33	    // len==0
146	ADDP		r32=0,r32
147(p6)	br.ret.spnt	b0		};;
148{ .mib;	and		r2=7,r32
149	cmp.leu		p6,p0=15,r33	    // len>=15
150(p6)	br.cond.dptk	.Lot		};;
151
152.Little:
153{ .mib;	st1		[r32]=r0,1
154	cmp.ltu		p6,p7=1,r33	}  // len>1
155{ .mbb;	add		r33=-1,r33	   // len--
156(p6)	br.cond.dptk	.Little
157(p7)	br.ret.sptk.many	b0	};;
158
159.Lot:
160{ .mib;	cmp.eq		p6,p0=0,r2
161(p6)	br.cond.dptk	.Laligned	};;
162{ .mmi;	st1		[r32]=r0,1;;
163	and		r2=7,r32	}
164{ .mib;	add		r33=-1,r33
165	br		.Lot		};;
166
167.Laligned:
168{ .mmi;	st8		[r32]=r0,8
169	and		r2=-8,r33	    // len&~7
170	add		r33=-8,r33	};; // len-=8
171{ .mib;	cmp.ltu		p6,p0=8,r2	    // ((len+8)&~7)>8
172(p6)	br.cond.dptk	.Laligned	};;
173
174{ .mbb;	cmp.eq		p6,p7=r0,r33
175(p7)	br.cond.dpnt	.Little
176(p6)	br.ret.sptk.many	b0	};;
177.endp	OPENSSL_cleanse#
178
179.global	CRYPTO_memcmp#
180.proc	CRYPTO_memcmp#
181.align	32
182.skip	16
183CRYPTO_memcmp:
184	.prologue
185{ .mib;	mov		r8=0
186	cmp.eq		p6,p0=0,r34	    // len==0?
187(p6)	br.ret.spnt	b0		};;
188	.save		ar.pfs,r2
189{ .mib;	alloc		r2=ar.pfs,3,5,0,8
190	.save		ar.lc,r3
191	mov		r3=ar.lc
192	brp.loop.imp	.Loop_cmp_ctop,.Loop_cmp_cend-16
193					}
194{ .mib;	sub		r10=r34,r0,1
195	.save		pr,r9
196	mov		r9=pr		};;
197{ .mii;	ADDP		r16=0,r32
198	mov		ar.lc=r10
199	mov		ar.ec=4		}
200{ .mib;	ADDP		r17=0,r33
201	mov		pr.rot=1<<16	};;
202
203.Loop_cmp_ctop:
204{ .mib;	(p16)	ld1	r32=[r16],1
205	(p18)	xor	r34=r34,r38	}
206{ .mib;	(p16)	ld1	r36=[r17],1
207	(p19)	or	r8=r8,r35
208	br.ctop.sptk	.Loop_cmp_ctop	};;
209.Loop_cmp_cend:
210
211{ .mib;	cmp.ne		p6,p0=0,r8
212	mov		ar.lc=r3	};;
213{ .mib;
214(p6)	mov		r8=1
215	mov		pr=r9,0x1ffff
216	br.ret.sptk.many	b0	};;
217.endp	CRYPTO_memcmp#
218
219.global	OPENSSL_instrument_bus#
220.proc	OPENSSL_instrument_bus#
221OPENSSL_instrument_bus:
222{ .mmi;	mov		r2=r33
223	ADDP		r32=0,r32	}
224{ .mmi;	mov		r8=ar.itc;;
225	mov		r10=r0
226	mov		r9=r8		};;
227
228{ .mmi;	fc		r32;;
229	ld4		r8=[r32]	};;
230{ .mmi;	mf
231	mov		ar.ccv=r8
232	add		r8=r8,r10	};;
233{ .mmi;	cmpxchg4.acq	r3=[r32],r8,ar.ccv
234					};;
235.Loop:
236{ .mmi;	mov		r8=ar.itc;;
237	sub		r10=r8,r9		// diff=tick-lasttick
238	mov		r9=r8		};;	// lasttick=tick
239{ .mmi;	fc		r32;;
240	ld4		r8=[r32]	};;
241{ .mmi;	mf
242	mov		ar.ccv=r8
243	add		r8=r8,r10	};;
244{ .mmi;	cmpxchg4.acq	r3=[r32],r8,ar.ccv
245	add		r33=-1,r33
246	add		r32=4,r32	};;
247{ .mib;	cmp4.ne		p6,p0=0,r33
248(p6)	br.cond.dptk	.Loop		};;
249
250{ .mib;	sub		r8=r2,r33
251	br.ret.sptk.many	b0	};;
252.endp	OPENSSL_instrument_bus#
253
254.global	OPENSSL_instrument_bus2#
255.proc	OPENSSL_instrument_bus2#
256OPENSSL_instrument_bus2:
257{ .mmi;	mov		r2=r33			// put aside cnt
258	ADDP		r32=0,r32	}
259{ .mmi;	mov		r8=ar.itc;;
260	mov		r10=r0
261	mov		r9=r8		};;
262
263{ .mmi;	fc		r32;;
264	ld4		r8=[r32]	};;
265{ .mmi;	mf
266	mov		ar.ccv=r8
267	add		r8=r8,r10	};;
268{ .mmi;	cmpxchg4.acq	r3=[r32],r8,ar.ccv
269					};;
270
271{ .mmi;	mov		r8=ar.itc;;
272	sub		r10=r8,r9
273	mov		r9=r8		};;
274.Loop2:
275{ .mmi;	mov		r11=r10			// lastdiff=diff
276	add		r34=-1,r34	};;	// --max
277{ .mmi;	fc		r32;;
278	ld4		r8=[r32]
279	cmp4.eq		p6,p0=0,r34	};;
280{ .mmi;	mf
281	mov		ar.ccv=r8
282	add		r8=r8,r10	};;
283{ .mmb;	cmpxchg4.acq	r3=[r32],r8,ar.ccv
284(p6)	br.cond.spnt	.Ldone2		};;
285
286{ .mmi;	mov		r8=ar.itc;;
287	sub		r10=r8,r9		// diff=tick-lasttick
288	mov		r9=r8		};;	// lasttick=tick
289{ .mmi;	cmp.ne		p6,p0=r10,r11;;		// diff!=lastdiff
290(p6)	add		r33=-1,r33	};;	// conditional --cnt
291{ .mib;	cmp4.ne		p7,p0=0,r33
292(p6)	add		r32=4,r32		// conditional ++out
293(p7)	br.cond.dptk	.Loop2		};;
294.Ldone2:
295{ .mib;	sub		r8=r2,r33
296	br.ret.sptk.many	b0	};;
297.endp	OPENSSL_instrument_bus2#
298