1#! /usr/bin/env perl 2# Copyright 2012-2021 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the Apache License 2.0 (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9# 10# ==================================================================== 11# Written by Andy Polyakov <appro@openssl.org> for the OpenSSL 12# project. The module is, however, dual licensed under OpenSSL and 13# CRYPTOGAMS licenses depending on where you obtain it. For further 14# details see http://www.openssl.org/~appro/cryptogams/. 15# ==================================================================== 16# 17# October 2012 18# 19# The module implements bn_GF2m_mul_2x2 polynomial multiplication used 20# in bn_gf2m.c. It's kind of low-hanging mechanical port from C for 21# the time being... Except that it has two code paths: one suitable 22# for all SPARCv9 processors and one for VIS3-capable ones. Former 23# delivers ~25-45% more, more for longer keys, heaviest DH and DSA 24# verify operations on venerable UltraSPARC II. On T4 VIS3 code is 25# ~100-230% faster than gcc-generated code and ~35-90% faster than 26# the pure SPARCv9 code path. 27 28$output = pop and open STDOUT,">$output"; 29 30$locals=16*8; 31 32$tab="%l0"; 33 34@T=("%g2","%g3"); 35@i=("%g4","%g5"); 36 37($a1,$a2,$a4,$a8,$a12,$a48)=map("%o$_",(0..5)); 38($lo,$hi,$b)=("%g1",$a8,"%o7"); $a=$lo; 39 40$code.=<<___; 41#ifndef __ASSEMBLER__ 42# define __ASSEMBLER__ 1 43#endif 44#include "crypto/sparc_arch.h" 45 46#ifdef __arch64__ 47.register %g2,#scratch 48.register %g3,#scratch 49#endif 50 51#ifdef __PIC__ 52SPARC_PIC_THUNK(%g1) 53#endif 54 55.globl bn_GF2m_mul_2x2 56.align 16 57bn_GF2m_mul_2x2: 58 SPARC_LOAD_ADDRESS_LEAF(OPENSSL_sparcv9cap_P,%g1,%g5) 59 ld [%g1+0],%g1 ! OPENSSL_sparcv9cap_P[0] 60 61 andcc %g1, SPARCV9_VIS3, %g0 62 bz,pn %icc,.Lsoftware 63 nop 64 65 sllx %o1, 32, %o1 66 sllx %o3, 32, %o3 67 or %o2, %o1, %o1 68 or %o4, %o3, %o3 69 .word 0x95b262ab ! xmulx %o1, %o3, %o2 70 .word 0x99b262cb ! xmulxhi %o1, %o3, %o4 71 srlx %o2, 32, %o1 ! 13 cycles later 72 st %o2, [%o0+0] 73 st %o1, [%o0+4] 74 srlx %o4, 32, %o3 75 st %o4, [%o0+8] 76 retl 77 st %o3, [%o0+12] 78 79.align 16 80.Lsoftware: 81 save %sp,-STACK_FRAME-$locals,%sp 82 83 sllx %i1,32,$a 84 mov -1,$a12 85 sllx %i3,32,$b 86 or %i2,$a,$a 87 srlx $a12,1,$a48 ! 0x7fff... 88 or %i4,$b,$b 89 srlx $a12,2,$a12 ! 0x3fff... 90 add %sp,STACK_BIAS+STACK_FRAME,$tab 91 92 sllx $a,2,$a4 93 mov $a,$a1 94 sllx $a,1,$a2 95 96 srax $a4,63,@i[1] ! broadcast 61st bit 97 and $a48,$a4,$a4 ! (a<<2)&0x7fff... 98 srlx $a48,2,$a48 99 srax $a2,63,@i[0] ! broadcast 62nd bit 100 and $a12,$a2,$a2 ! (a<<1)&0x3fff... 101 srax $a1,63,$lo ! broadcast 63rd bit 102 and $a48,$a1,$a1 ! (a<<0)&0x1fff... 103 104 sllx $a1,3,$a8 105 and $b,$lo,$lo 106 and $b,@i[0],@i[0] 107 and $b,@i[1],@i[1] 108 109 stx %g0,[$tab+0*8] ! tab[0]=0 110 xor $a1,$a2,$a12 111 stx $a1,[$tab+1*8] ! tab[1]=a1 112 stx $a2,[$tab+2*8] ! tab[2]=a2 113 xor $a4,$a8,$a48 114 stx $a12,[$tab+3*8] ! tab[3]=a1^a2 115 xor $a4,$a1,$a1 116 117 stx $a4,[$tab+4*8] ! tab[4]=a4 118 xor $a4,$a2,$a2 119 stx $a1,[$tab+5*8] ! tab[5]=a1^a4 120 xor $a4,$a12,$a12 121 stx $a2,[$tab+6*8] ! tab[6]=a2^a4 122 xor $a48,$a1,$a1 123 stx $a12,[$tab+7*8] ! tab[7]=a1^a2^a4 124 xor $a48,$a2,$a2 125 126 stx $a8,[$tab+8*8] ! tab[8]=a8 127 xor $a48,$a12,$a12 128 stx $a1,[$tab+9*8] ! tab[9]=a1^a8 129 xor $a4,$a1,$a1 130 stx $a2,[$tab+10*8] ! tab[10]=a2^a8 131 xor $a4,$a2,$a2 132 stx $a12,[$tab+11*8] ! tab[11]=a1^a2^a8 133 134 xor $a4,$a12,$a12 135 stx $a48,[$tab+12*8] ! tab[12]=a4^a8 136 srlx $lo,1,$hi 137 stx $a1,[$tab+13*8] ! tab[13]=a1^a4^a8 138 sllx $lo,63,$lo 139 stx $a2,[$tab+14*8] ! tab[14]=a2^a4^a8 140 srlx @i[0],2,@T[0] 141 stx $a12,[$tab+15*8] ! tab[15]=a1^a2^a4^a8 142 143 sllx @i[0],62,$a1 144 sllx $b,3,@i[0] 145 srlx @i[1],3,@T[1] 146 and @i[0],`0xf<<3`,@i[0] 147 sllx @i[1],61,$a2 148 ldx [$tab+@i[0]],@i[0] 149 srlx $b,4-3,@i[1] 150 xor @T[0],$hi,$hi 151 and @i[1],`0xf<<3`,@i[1] 152 xor $a1,$lo,$lo 153 ldx [$tab+@i[1]],@i[1] 154 xor @T[1],$hi,$hi 155 156 xor @i[0],$lo,$lo 157 srlx $b,8-3,@i[0] 158 xor $a2,$lo,$lo 159 and @i[0],`0xf<<3`,@i[0] 160___ 161for($n=1;$n<14;$n++) { 162$code.=<<___; 163 sllx @i[1],`$n*4`,@T[0] 164 ldx [$tab+@i[0]],@i[0] 165 srlx @i[1],`64-$n*4`,@T[1] 166 xor @T[0],$lo,$lo 167 srlx $b,`($n+2)*4`-3,@i[1] 168 xor @T[1],$hi,$hi 169 and @i[1],`0xf<<3`,@i[1] 170___ 171 push(@i,shift(@i)); push(@T,shift(@T)); 172} 173$code.=<<___; 174 sllx @i[1],`$n*4`,@T[0] 175 ldx [$tab+@i[0]],@i[0] 176 srlx @i[1],`64-$n*4`,@T[1] 177 xor @T[0],$lo,$lo 178 179 sllx @i[0],`($n+1)*4`,@T[0] 180 xor @T[1],$hi,$hi 181 srlx @i[0],`64-($n+1)*4`,@T[1] 182 xor @T[0],$lo,$lo 183 xor @T[1],$hi,$hi 184 185 srlx $lo,32,%i1 186 st $lo,[%i0+0] 187 st %i1,[%i0+4] 188 srlx $hi,32,%i2 189 st $hi,[%i0+8] 190 st %i2,[%i0+12] 191 192 ret 193 restore 194.type bn_GF2m_mul_2x2,#function 195.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2 196.asciz "GF(2^m) Multiplication for SPARCv9, CRYPTOGAMS by <appro\@openssl.org>" 197.align 4 198___ 199 200$code =~ s/\`([^\`]*)\`/eval($1)/gem; 201print $code; 202close STDOUT or die "error closing STDOUT: $!"; 203