xref: /openssl/crypto/bn/asm/sparcv9-gf2m.pl (revision 54b40531)
1#! /usr/bin/env perl
2# Copyright 2012-2021 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the Apache License 2.0 (the "License").  You may not use
5# this file except in compliance with the License.  You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8
9#
10# ====================================================================
11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12# project. The module is, however, dual licensed under OpenSSL and
13# CRYPTOGAMS licenses depending on where you obtain it. For further
14# details see http://www.openssl.org/~appro/cryptogams/.
15# ====================================================================
16#
17# October 2012
18#
19# The module implements bn_GF2m_mul_2x2 polynomial multiplication used
20# in bn_gf2m.c. It's kind of low-hanging mechanical port from C for
21# the time being... Except that it has two code paths: one suitable
22# for all SPARCv9 processors and one for VIS3-capable ones. Former
23# delivers ~25-45% more, more for longer keys, heaviest DH and DSA
24# verify operations on venerable UltraSPARC II. On T4 VIS3 code is
25# ~100-230% faster than gcc-generated code and ~35-90% faster than
26# the pure SPARCv9 code path.
27
28$output = pop and open STDOUT,">$output";
29
30$locals=16*8;
31
32$tab="%l0";
33
34@T=("%g2","%g3");
35@i=("%g4","%g5");
36
37($a1,$a2,$a4,$a8,$a12,$a48)=map("%o$_",(0..5));
38($lo,$hi,$b)=("%g1",$a8,"%o7"); $a=$lo;
39
40$code.=<<___;
41#ifndef __ASSEMBLER__
42# define __ASSEMBLER__ 1
43#endif
44#include "crypto/sparc_arch.h"
45
46#ifdef __arch64__
47.register	%g2,#scratch
48.register	%g3,#scratch
49#endif
50
51#ifdef __PIC__
52SPARC_PIC_THUNK(%g1)
53#endif
54
55.globl	bn_GF2m_mul_2x2
56.align	16
57bn_GF2m_mul_2x2:
58        SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5)
59        ld	[%g1+0],%g1             	! OPENSSL_sparcv9cap_P[0]
60
61        andcc	%g1, SPARCV9_VIS3, %g0
62        bz,pn	%icc,.Lsoftware
63        nop
64
65	sllx	%o1, 32, %o1
66	sllx	%o3, 32, %o3
67	or	%o2, %o1, %o1
68	or	%o4, %o3, %o3
69	.word	0x95b262ab			! xmulx   %o1, %o3, %o2
70	.word	0x99b262cb			! xmulxhi %o1, %o3, %o4
71	srlx	%o2, 32, %o1			! 13 cycles later
72	st	%o2, [%o0+0]
73	st	%o1, [%o0+4]
74	srlx	%o4, 32, %o3
75	st	%o4, [%o0+8]
76	retl
77	st	%o3, [%o0+12]
78
79.align	16
80.Lsoftware:
81	save	%sp,-STACK_FRAME-$locals,%sp
82
83	sllx	%i1,32,$a
84	mov	-1,$a12
85	sllx	%i3,32,$b
86	or	%i2,$a,$a
87	srlx	$a12,1,$a48			! 0x7fff...
88	or	%i4,$b,$b
89	srlx	$a12,2,$a12			! 0x3fff...
90	add	%sp,STACK_BIAS+STACK_FRAME,$tab
91
92	sllx	$a,2,$a4
93	mov	$a,$a1
94	sllx	$a,1,$a2
95
96	srax	$a4,63,@i[1]			! broadcast 61st bit
97	and	$a48,$a4,$a4			! (a<<2)&0x7fff...
98	srlx	$a48,2,$a48
99	srax	$a2,63,@i[0]			! broadcast 62nd bit
100	and	$a12,$a2,$a2			! (a<<1)&0x3fff...
101	srax	$a1,63,$lo			! broadcast 63rd bit
102	and	$a48,$a1,$a1			! (a<<0)&0x1fff...
103
104	sllx	$a1,3,$a8
105	and	$b,$lo,$lo
106	and	$b,@i[0],@i[0]
107	and	$b,@i[1],@i[1]
108
109	stx	%g0,[$tab+0*8]			! tab[0]=0
110	xor	$a1,$a2,$a12
111	stx	$a1,[$tab+1*8]			! tab[1]=a1
112	stx	$a2,[$tab+2*8]			! tab[2]=a2
113	 xor	$a4,$a8,$a48
114	stx	$a12,[$tab+3*8]			! tab[3]=a1^a2
115	 xor	$a4,$a1,$a1
116
117	stx	$a4,[$tab+4*8]			! tab[4]=a4
118	xor	$a4,$a2,$a2
119	stx	$a1,[$tab+5*8]			! tab[5]=a1^a4
120	xor	$a4,$a12,$a12
121	stx	$a2,[$tab+6*8]			! tab[6]=a2^a4
122	 xor	$a48,$a1,$a1
123	stx	$a12,[$tab+7*8]			! tab[7]=a1^a2^a4
124	 xor	$a48,$a2,$a2
125
126	stx	$a8,[$tab+8*8]			! tab[8]=a8
127	xor	$a48,$a12,$a12
128	stx	$a1,[$tab+9*8]			! tab[9]=a1^a8
129	 xor	$a4,$a1,$a1
130	stx	$a2,[$tab+10*8]			! tab[10]=a2^a8
131	 xor	$a4,$a2,$a2
132	stx	$a12,[$tab+11*8]		! tab[11]=a1^a2^a8
133
134	xor	$a4,$a12,$a12
135	stx	$a48,[$tab+12*8]		! tab[12]=a4^a8
136	 srlx	$lo,1,$hi
137	stx	$a1,[$tab+13*8]			! tab[13]=a1^a4^a8
138	 sllx	$lo,63,$lo
139	stx	$a2,[$tab+14*8]			! tab[14]=a2^a4^a8
140	 srlx	@i[0],2,@T[0]
141	stx	$a12,[$tab+15*8]		! tab[15]=a1^a2^a4^a8
142
143	sllx	@i[0],62,$a1
144	 sllx	$b,3,@i[0]
145	srlx	@i[1],3,@T[1]
146	 and	@i[0],`0xf<<3`,@i[0]
147	sllx	@i[1],61,$a2
148	 ldx	[$tab+@i[0]],@i[0]
149	 srlx	$b,4-3,@i[1]
150	xor	@T[0],$hi,$hi
151	 and	@i[1],`0xf<<3`,@i[1]
152	xor	$a1,$lo,$lo
153	 ldx	[$tab+@i[1]],@i[1]
154	xor	@T[1],$hi,$hi
155
156	xor	@i[0],$lo,$lo
157	srlx	$b,8-3,@i[0]
158	 xor	$a2,$lo,$lo
159	and	@i[0],`0xf<<3`,@i[0]
160___
161for($n=1;$n<14;$n++) {
162$code.=<<___;
163	sllx	@i[1],`$n*4`,@T[0]
164	ldx	[$tab+@i[0]],@i[0]
165	srlx	@i[1],`64-$n*4`,@T[1]
166	xor	@T[0],$lo,$lo
167	srlx	$b,`($n+2)*4`-3,@i[1]
168	xor	@T[1],$hi,$hi
169	and	@i[1],`0xf<<3`,@i[1]
170___
171	push(@i,shift(@i)); push(@T,shift(@T));
172}
173$code.=<<___;
174	sllx	@i[1],`$n*4`,@T[0]
175	ldx	[$tab+@i[0]],@i[0]
176	srlx	@i[1],`64-$n*4`,@T[1]
177	xor	@T[0],$lo,$lo
178
179	sllx	@i[0],`($n+1)*4`,@T[0]
180	 xor	@T[1],$hi,$hi
181	srlx	@i[0],`64-($n+1)*4`,@T[1]
182	xor	@T[0],$lo,$lo
183	xor	@T[1],$hi,$hi
184
185	srlx	$lo,32,%i1
186	st	$lo,[%i0+0]
187	st	%i1,[%i0+4]
188	srlx	$hi,32,%i2
189	st	$hi,[%i0+8]
190	st	%i2,[%i0+12]
191
192	ret
193	restore
194.type	bn_GF2m_mul_2x2,#function
195.size	bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
196.asciz	"GF(2^m) Multiplication for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>"
197.align	4
198___
199
200$code =~ s/\`([^\`]*)\`/eval($1)/gem;
201print $code;
202close STDOUT or die "error closing STDOUT: $!";
203