xref: /openssl/crypto/x86cpuid.pl (revision acc26552)
1#! /usr/bin/env perl
2# Copyright 2004-2020 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the Apache License 2.0 (the "License").  You may not use
5# this file except in compliance with the License.  You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
9$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
10push(@INC, "${dir}perlasm", "perlasm");
11require "x86asm.pl";
12
13$output = pop and open STDOUT,">$output";
14
15&asm_init($ARGV[0]);
16
17for (@ARGV) { $sse2=1 if (/-DOPENSSL_IA32_SSE2/); }
18
19&function_begin("OPENSSL_ia32_cpuid");
20	&xor	("edx","edx");
21	&pushf	();
22	&pop	("eax");
23	&mov	("ecx","eax");
24	&xor	("eax",1<<21);
25	&push	("eax");
26	&popf	();
27	&pushf	();
28	&pop	("eax");
29	&xor	("ecx","eax");
30	&xor	("eax","eax");
31	&mov	("esi",&wparam(0));
32	&mov	(&DWP(8,"esi"),"eax");	# clear extended feature flags
33	&bt	("ecx",21);
34	&jnc	(&label("nocpuid"));
35	&cpuid	();
36	&mov	("edi","eax");		# max value for standard query level
37
38	&xor	("eax","eax");
39	&cmp	("ebx",0x756e6547);	# "Genu"
40	&setne	(&LB("eax"));
41	&mov	("ebp","eax");
42	&cmp	("edx",0x49656e69);	# "ineI"
43	&setne	(&LB("eax"));
44	&or	("ebp","eax");
45	&cmp	("ecx",0x6c65746e);	# "ntel"
46	&setne	(&LB("eax"));
47	&or	("ebp","eax");		# 0 indicates Intel CPU
48	&jz	(&label("intel"));
49
50	&cmp	("ebx",0x68747541);	# "Auth"
51	&setne	(&LB("eax"));
52	&mov	("esi","eax");
53	&cmp	("edx",0x69746E65);	# "enti"
54	&setne	(&LB("eax"));
55	&or	("esi","eax");
56	&cmp	("ecx",0x444D4163);	# "cAMD"
57	&setne	(&LB("eax"));
58	&or	("esi","eax");		# 0 indicates AMD CPU
59	&jnz	(&label("intel"));
60
61	# AMD specific
62	&mov	("eax",0x80000000);
63	&cpuid	();
64	&cmp	("eax",0x80000001);
65	&jb	(&label("intel"));
66	&mov	("esi","eax");
67	&mov	("eax",0x80000001);
68	&cpuid	();
69	&or	("ebp","ecx");
70	&and	("ebp",1<<11|1);	# isolate XOP bit
71	&cmp	("esi",0x80000008);
72	&jb	(&label("intel"));
73
74	&mov	("eax",0x80000008);
75	&cpuid	();
76	&movz	("esi",&LB("ecx"));	# number of cores - 1
77	&inc	("esi");		# number of cores
78
79	&mov	("eax",1);
80	&xor	("ecx","ecx");
81	&cpuid	();
82	&bt	("edx",28);
83	&jnc	(&label("generic"));
84	&shr	("ebx",16);
85	&and	("ebx",0xff);
86	&cmp	("ebx","esi");
87	&ja	(&label("generic"));
88	&and	("edx",0xefffffff);	# clear hyper-threading bit
89	&jmp	(&label("generic"));
90
91&set_label("intel");
92	&cmp	("edi",4);
93	&mov	("esi",-1);
94	&jb	(&label("nocacheinfo"));
95
96	&mov	("eax",4);
97	&mov	("ecx",0);		# query L1D
98	&cpuid	();
99	&mov	("esi","eax");
100	&shr	("esi",14);
101	&and	("esi",0xfff);		# number of cores -1 per L1D
102
103&set_label("nocacheinfo");
104	&mov	("eax",1);
105	&xor	("ecx","ecx");
106	&cpuid	();
107	&and	("edx",0xbfefffff);	# force reserved bits #20, #30 to 0
108	&cmp	("ebp",0);
109	&jne	(&label("notintel"));
110	&or	("edx",1<<30);		# set reserved bit#30 on Intel CPUs
111	&and	(&HB("eax"),15);	# family ID
112	&cmp	(&HB("eax"),15);	# P4?
113	&jne	(&label("notintel"));
114	&or	("edx",1<<20);		# set reserved bit#20 to engage RC4_CHAR
115&set_label("notintel");
116	&bt	("edx",28);		# test hyper-threading bit
117	&jnc	(&label("generic"));
118	&and	("edx",0xefffffff);
119	&cmp	("esi",0);
120	&je	(&label("generic"));
121
122	&or	("edx",0x10000000);
123	&shr	("ebx",16);
124	&cmp	(&LB("ebx"),1);
125	&ja	(&label("generic"));
126	&and	("edx",0xefffffff);	# clear hyper-threading bit if not
127
128&set_label("generic");
129	&and	("ebp",1<<11);		# isolate AMD XOP flag
130	&and	("ecx",0xfffff7ff);	# force 11th bit to 0
131	&mov	("esi","edx");		# %ebp:%esi is copy of %ecx:%edx
132	&or	("ebp","ecx");		# merge AMD XOP flag
133
134	&cmp	("edi",7);
135	&mov	("edi",&wparam(0));
136	&jb	(&label("no_extended_info"));
137	&mov	("eax",7);
138	&xor	("ecx","ecx");
139	&cpuid	();
140	&mov	(&DWP(8,"edi"),"ebx");	# save cpuid(EAX=0x7, ECX=0x0).EBX to OPENSSL_ia32cap_P[2]
141	&mov	(&DWP(12,"edi"),"ecx");	# save cpuid(EAX=0x7, ECX=0x0).ECX to OPENSSL_ia32cap_P[3]
142	&mov	(&DWP(16,"edi"),"edx");	# save cpuid(EAX=0x7, ECX=0x0).EDX to OPENSSL_ia32cap_P[4]
143	&cmp	("eax",1);				# Do we have cpuid(EAX=0x7, ECX=0x1)?
144	&jb	(&label("no_extended_info"));
145	&mov	("eax",7);
146	&mov	("ecx",1);
147	&cpuid	();						# cpuid(EAX=0x7, ECX=0x1)
148	&mov	(&DWP(20,"edi"),"eax");	# save cpuid(EAX=0x7, ECX=0x1).EAX to OPENSSL_ia32cap_P[5]
149	&mov	(&DWP(24,"edi"),"edx");	# save cpuid(EAX=0x7, ECX=0x1).EDX to OPENSSL_ia32cap_P[6]
150	&mov	(&DWP(28,"edi"),"ebx");	# save cpuid(EAX=0x7, ECX=0x1).EBX to OPENSSL_ia32cap_P[7]
151	&mov	(&DWP(32,"edi"),"ecx");	# save cpuid(EAX=0x7, ECX=0x1).ECX to OPENSSL_ia32cap_P[8]
152
153	&and	("edx",0x80000);		# Mask cpuid(EAX=0x7, ECX=0x1).EDX bit 19 to detect AVX10 support
154	&cmp	("edx",0x0);
155	&je (&label("no_extended_info"));
156
157	&mov	("eax",0x24);			# Have AVX10 Support, query for details
158	&mov	("ecx",0x0);
159	&cpuid	();						# cpuid(EAX=0x24, ECX=0x0) AVX10 Leaf
160	&mov	(&DWP(36,"edi"),"ebx");	# save cpuid(EAX=0x24, ECX=0x0).EBX to OPENSSL_ia32cap_P[9]
161
162&set_label("no_extended_info");
163
164	&bt	("ebp",27);		# check OSXSAVE bit
165	&jnc	(&label("clear_avx"));
166	&xor	("ecx","ecx");
167	&data_byte(0x0f,0x01,0xd0);	# xgetbv
168	&and	("eax",6);
169	&cmp	("eax",6);
170	&je	(&label("done"));
171	&cmp	("eax",2);
172	&je	(&label("clear_avx"));
173&set_label("clear_xmm");
174	&and	("ebp",0xfdfffffd);	# clear AESNI and PCLMULQDQ bits
175	&and	("esi",0xfeffffff);	# clear FXSR
176&set_label("clear_avx");
177	&and	("ebp",0xefffe7ff);	# clear AVX, FMA and AMD XOP bits
178	&and	(&DWP(20,"edi"),0xff7fffff);	# ~(1<<23) clear AVXIFMA,
179											# which is VEX-encoded
180											# and requires YMM state support
181	&and	(&DWP(8,"edi"),0xffffffdf);	# clear AVX2
182&set_label("done");
183	&mov	("eax","esi");
184	&mov	("edx","ebp");
185&set_label("nocpuid");
186&function_end("OPENSSL_ia32_cpuid");
187
188&external_label("OPENSSL_ia32cap_P");
189
190&function_begin_B("OPENSSL_rdtsc","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
191	&xor	("eax","eax");
192	&xor	("edx","edx");
193	&picmeup("ecx","OPENSSL_ia32cap_P");
194	&bt	(&DWP(0,"ecx"),4);
195	&jnc	(&label("notsc"));
196	&rdtsc	();
197&set_label("notsc");
198	&ret	();
199&function_end_B("OPENSSL_rdtsc");
200
201# This works in Ring 0 only [read DJGPP+MS-DOS+privileged DPMI host],
202# but it's safe to call it on any [supported] 32-bit platform...
203# Just check for [non-]zero return value...
204&function_begin_B("OPENSSL_instrument_halt","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
205	&picmeup("ecx","OPENSSL_ia32cap_P");
206	&bt	(&DWP(0,"ecx"),4);
207	&jnc	(&label("nohalt"));	# no TSC
208
209	&data_word(0x9058900e);		# push %cs; pop %eax
210	&and	("eax",3);
211	&jnz	(&label("nohalt"));	# not enough privileges
212
213	&pushf	();
214	&pop	("eax");
215	&bt	("eax",9);
216	&jnc	(&label("nohalt"));	# interrupts are disabled
217
218	&rdtsc	();
219	&push	("edx");
220	&push	("eax");
221	&halt	();
222	&rdtsc	();
223
224	&sub	("eax",&DWP(0,"esp"));
225	&sbb	("edx",&DWP(4,"esp"));
226	&add	("esp",8);
227	&ret	();
228
229&set_label("nohalt");
230	&xor	("eax","eax");
231	&xor	("edx","edx");
232	&ret	();
233&function_end_B("OPENSSL_instrument_halt");
234
235# Essentially there is only one use for this function. Under DJGPP:
236#
237#	#include <go32.h>
238#	...
239#	i=OPENSSL_far_spin(_dos_ds,0x46c);
240#	...
241# to obtain the number of spins till closest timer interrupt.
242
243&function_begin_B("OPENSSL_far_spin");
244	&pushf	();
245	&pop	("eax");
246	&bt	("eax",9);
247	&jnc	(&label("nospin"));	# interrupts are disabled
248
249	&mov	("eax",&DWP(4,"esp"));
250	&mov	("ecx",&DWP(8,"esp"));
251	&data_word (0x90d88e1e);	# push %ds, mov %eax,%ds
252	&xor	("eax","eax");
253	&mov	("edx",&DWP(0,"ecx"));
254	&jmp	(&label("spin"));
255
256	&align	(16);
257&set_label("spin");
258	&inc	("eax");
259	&cmp	("edx",&DWP(0,"ecx"));
260	&je	(&label("spin"));
261
262	&data_word (0x1f909090);	# pop	%ds
263	&ret	();
264
265&set_label("nospin");
266	&xor	("eax","eax");
267	&xor	("edx","edx");
268	&ret	();
269&function_end_B("OPENSSL_far_spin");
270
271&function_begin_B("OPENSSL_wipe_cpu","EXTRN\t_OPENSSL_ia32cap_P:DWORD");
272	&xor	("eax","eax");
273	&xor	("edx","edx");
274	&picmeup("ecx","OPENSSL_ia32cap_P");
275	&mov	("ecx",&DWP(0,"ecx"));
276	&bt	(&DWP(0,"ecx"),1);
277	&jnc	(&label("no_x87"));
278	if ($sse2) {
279		&and	("ecx",1<<26|1<<24);	# check SSE2 and FXSR bits
280		&cmp	("ecx",1<<26|1<<24);
281		&jne	(&label("no_sse2"));
282		&pxor	("xmm0","xmm0");
283		&pxor	("xmm1","xmm1");
284		&pxor	("xmm2","xmm2");
285		&pxor	("xmm3","xmm3");
286		&pxor	("xmm4","xmm4");
287		&pxor	("xmm5","xmm5");
288		&pxor	("xmm6","xmm6");
289		&pxor	("xmm7","xmm7");
290	&set_label("no_sse2");
291	}
292	# just a bunch of fldz to zap the fp/mm bank followed by finit...
293	&data_word(0xeed9eed9,0xeed9eed9,0xeed9eed9,0xeed9eed9,0x90e3db9b);
294&set_label("no_x87");
295	&lea	("eax",&DWP(4,"esp"));
296	&ret	();
297&function_end_B("OPENSSL_wipe_cpu");
298
299&function_begin_B("OPENSSL_atomic_add");
300	&mov	("edx",&DWP(4,"esp"));	# fetch the pointer, 1st arg
301	&mov	("ecx",&DWP(8,"esp"));	# fetch the increment, 2nd arg
302	&push	("ebx");
303	&nop	();
304	&mov	("eax",&DWP(0,"edx"));
305&set_label("spin");
306	&lea	("ebx",&DWP(0,"eax","ecx"));
307	&nop	();
308	&data_word(0x1ab10ff0);	# lock;	cmpxchg	%ebx,(%edx)	# %eax is involved and is always reloaded
309	&jne	(&label("spin"));
310	&mov	("eax","ebx");	# OpenSSL expects the new value
311	&pop	("ebx");
312	&ret	();
313&function_end_B("OPENSSL_atomic_add");
314
315&function_begin_B("OPENSSL_cleanse");
316	&mov	("edx",&wparam(0));
317	&mov	("ecx",&wparam(1));
318	&xor	("eax","eax");
319	&cmp	("ecx",7);
320	&jae	(&label("lot"));
321	&cmp	("ecx",0);
322	&je	(&label("ret"));
323&set_label("little");
324	&mov	(&BP(0,"edx"),"al");
325	&sub	("ecx",1);
326	&lea	("edx",&DWP(1,"edx"));
327	&jnz	(&label("little"));
328&set_label("ret");
329	&ret	();
330
331&set_label("lot",16);
332	&test	("edx",3);
333	&jz	(&label("aligned"));
334	&mov	(&BP(0,"edx"),"al");
335	&lea	("ecx",&DWP(-1,"ecx"));
336	&lea	("edx",&DWP(1,"edx"));
337	&jmp	(&label("lot"));
338&set_label("aligned");
339	&mov	(&DWP(0,"edx"),"eax");
340	&lea	("ecx",&DWP(-4,"ecx"));
341	&test	("ecx",-4);
342	&lea	("edx",&DWP(4,"edx"));
343	&jnz	(&label("aligned"));
344	&cmp	("ecx",0);
345	&jne	(&label("little"));
346	&ret	();
347&function_end_B("OPENSSL_cleanse");
348
349&function_begin_B("CRYPTO_memcmp");
350	&push	("esi");
351	&push	("edi");
352	&mov	("esi",&wparam(0));
353	&mov	("edi",&wparam(1));
354	&mov	("ecx",&wparam(2));
355	&xor	("eax","eax");
356	&xor	("edx","edx");
357	&cmp	("ecx",0);
358	&je	(&label("no_data"));
359&set_label("loop");
360	&mov	("dl",&BP(0,"esi"));
361	&lea	("esi",&DWP(1,"esi"));
362	&xor	("dl",&BP(0,"edi"));
363	&lea	("edi",&DWP(1,"edi"));
364	&or	("al","dl");
365	&dec	("ecx");
366	&jnz	(&label("loop"));
367	&neg	("eax");
368	&shr	("eax",31);
369&set_label("no_data");
370	&pop	("edi");
371	&pop	("esi");
372	&ret	();
373&function_end_B("CRYPTO_memcmp");
374{
375my $lasttick = "esi";
376my $lastdiff = "ebx";
377my $out = "edi";
378my $cnt = "ecx";
379my $max = "ebp";
380
381&function_begin("OPENSSL_instrument_bus");
382    &mov	("eax",0);
383    if ($sse2) {
384	&picmeup("edx","OPENSSL_ia32cap_P");
385	&bt	(&DWP(0,"edx"),4);
386	&jnc	(&label("nogo"));	# no TSC
387	&bt	(&DWP(0,"edx"),19);
388	&jnc	(&label("nogo"));	# no CLFLUSH
389
390	&mov	($out,&wparam(0));	# load arguments
391	&mov	($cnt,&wparam(1));
392
393	# collect 1st tick
394	&rdtsc	();
395	&mov	($lasttick,"eax");	# lasttick = tick
396	&mov	($lastdiff,0);		# lastdiff = 0
397	&clflush(&DWP(0,$out));
398	&data_byte(0xf0);		# lock
399	&add	(&DWP(0,$out),$lastdiff);
400	&jmp	(&label("loop"));
401
402&set_label("loop",16);
403	&rdtsc	();
404	&mov	("edx","eax");		# put aside tick (yes, I neglect edx)
405	&sub	("eax",$lasttick);	# diff
406	&mov	($lasttick,"edx");	# lasttick = tick
407	&mov	($lastdiff,"eax");	# lastdiff = diff
408	&clflush(&DWP(0,$out));
409	&data_byte(0xf0);		# lock
410	&add	(&DWP(0,$out),"eax");	# accumulate diff
411	&lea	($out,&DWP(4,$out));	# ++$out
412	&sub	($cnt,1);		# --$cnt
413	&jnz	(&label("loop"));
414
415	&mov	("eax",&wparam(1));
416&set_label("nogo");
417    }
418&function_end("OPENSSL_instrument_bus");
419
420&function_begin("OPENSSL_instrument_bus2");
421    &mov	("eax",0);
422    if ($sse2) {
423	&picmeup("edx","OPENSSL_ia32cap_P");
424	&bt	(&DWP(0,"edx"),4);
425	&jnc	(&label("nogo"));	# no TSC
426	&bt	(&DWP(0,"edx"),19);
427	&jnc	(&label("nogo"));	# no CLFLUSH
428
429	&mov	($out,&wparam(0));	# load arguments
430	&mov	($cnt,&wparam(1));
431	&mov	($max,&wparam(2));
432
433	&rdtsc	();			# collect 1st tick
434	&mov	($lasttick,"eax");	# lasttick = tick
435	&mov	($lastdiff,0);		# lastdiff = 0
436
437	&clflush(&DWP(0,$out));
438	&data_byte(0xf0);		# lock
439	&add	(&DWP(0,$out),$lastdiff);
440
441	&rdtsc	();			# collect 1st diff
442	&mov	("edx","eax");		# put aside tick (yes, I neglect edx)
443	&sub	("eax",$lasttick);	# diff
444	&mov	($lasttick,"edx");	# lasttick = tick
445	&mov	($lastdiff,"eax");	# lastdiff = diff
446	&jmp	(&label("loop2"));
447
448&set_label("loop2",16);
449	&clflush(&DWP(0,$out));
450	&data_byte(0xf0);		# lock
451	&add	(&DWP(0,$out),"eax");	# accumulate diff
452
453	&sub	($max,1);
454	&jz	(&label("done2"));
455
456	&rdtsc	();
457	&mov	("edx","eax");		# put aside tick (yes, I neglect edx)
458	&sub	("eax",$lasttick);	# diff
459	&mov	($lasttick,"edx");	# lasttick = tick
460	&cmp	("eax",$lastdiff);
461	&mov	($lastdiff,"eax");	# lastdiff = diff
462	&mov	("edx",0);
463	&setne	("dl");
464	&sub	($cnt,"edx");		# conditional --$cnt
465	&lea	($out,&DWP(0,$out,"edx",4));	# conditional ++$out
466	&jnz	(&label("loop2"));
467
468&set_label("done2");
469	&mov	("eax",&wparam(1));
470	&sub	("eax",$cnt);
471&set_label("nogo");
472    }
473&function_end("OPENSSL_instrument_bus2");
474}
475
476sub gen_random {
477my $rdop = shift;
478&function_begin_B("OPENSSL_ia32_${rdop}_bytes");
479	&push	("edi");
480	&push	("ebx");
481	&xor	("eax","eax");		# return value
482	&mov	("edi",&wparam(0));
483	&mov	("ebx",&wparam(1));
484
485	&cmp	("ebx",0);
486	&je	(&label("done"));
487
488	&mov	("ecx",8);
489&set_label("loop");
490	&${rdop}("edx");
491	&jc	(&label("break"));
492	&loop	(&label("loop"));
493	&jmp	(&label("done"));
494
495&set_label("break",16);
496	&cmp	("ebx",4);
497	&jb	(&label("tail"));
498	&mov	(&DWP(0,"edi"),"edx");
499	&lea	("edi",&DWP(4,"edi"));
500	&add	("eax",4);
501	&sub	("ebx",4);
502	&jz	(&label("done"));
503	&mov	("ecx",8);
504	&jmp	(&label("loop"));
505
506&set_label("tail",16);
507	&mov	(&BP(0,"edi"),"dl");
508	&lea	("edi",&DWP(1,"edi"));
509	&inc	("eax");
510	&shr	("edx",8);
511	&dec	("ebx");
512	&jnz	(&label("tail"));
513
514&set_label("done");
515	&xor	("edx","edx");		# Clear random value from registers
516	&pop	("ebx");
517	&pop	("edi");
518	&ret	();
519&function_end_B("OPENSSL_ia32_${rdop}_bytes");
520}
521&gen_random("rdrand");
522&gen_random("rdseed");
523
524&initseg("OPENSSL_cpuid_setup");
525
526&hidden("OPENSSL_cpuid_setup");
527&hidden("OPENSSL_ia32cap_P");
528
529&asm_finish();
530
531close STDOUT or die "error closing STDOUT: $!";
532