1#! /usr/bin/env perl 2# Copyright 2023 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the Apache License 2.0 (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8# 9# ==================================================================== 10# Written by Rohan McLure <rmclure@linux.ibm.com> for the OpenSSL 11# project. 12# ==================================================================== 13# 14# p384 lower-level primitives for PPC64 using vector instructions. 15# 16 17use strict; 18use warnings; 19 20my $flavour = shift; 21my $output = ""; 22while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {} 23if (!$output) { 24 $output = "-"; 25} 26 27my ($xlate, $dir); 28$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 29( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or 30( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or 31die "can't locate ppc-xlate.pl"; 32 33open OUT,"| \"$^X\" $xlate $flavour $output"; 34*STDOUT=*OUT; 35 36my $code = ""; 37 38my ($sp, $outp, $savelr, $savesp) = ("r1", "r3", "r10", "r12"); 39 40my $vzero = "v32"; 41 42sub startproc($) 43{ 44 my ($name) = @_; 45 46 $code.=<<___; 47 .globl ${name} 48 .align 5 49${name}: 50 51___ 52} 53 54sub endproc($) 55{ 56 my ($name) = @_; 57 58 $code.=<<___; 59 blr 60 .size ${name},.-${name} 61 62___ 63} 64 65sub load_vrs($$) 66{ 67 my ($pointer, $reg_list) = @_; 68 69 for (my $i = 0; $i <= 6; $i++) { 70 my $offset = $i * 8; 71 $code.=<<___; 72 lxsd $reg_list->[$i],$offset($pointer) 73___ 74 } 75 76 $code.=<<___; 77 78___ 79} 80 81sub store_vrs($$) 82{ 83 my ($pointer, $reg_list) = @_; 84 85 for (my $i = 0; $i <= 12; $i++) { 86 my $offset = $i * 16; 87 $code.=<<___; 88 stxv $reg_list->[$i],$offset($pointer) 89___ 90 } 91 92 $code.=<<___; 93 94___ 95} 96 97$code.=<<___; 98.machine "any" 99.text 100 101___ 102 103{ 104 # mul/square common 105 my ($t1, $t2, $t3, $t4) = ("v33", "v34", "v42", "v43"); 106 my ($zero, $one) = ("r8", "r9"); 107 my $out = "v51"; 108 109 { 110 # 111 # p384_felem_mul 112 # 113 114 my ($in1p, $in2p) = ("r4", "r5"); 115 my @in1 = map("v$_",(44..50)); 116 my @in2 = map("v$_",(35..41)); 117 118 startproc("p384_felem_mul"); 119 120 $code.=<<___; 121 vspltisw $vzero,0 122 123___ 124 125 load_vrs($in1p, \@in1); 126 load_vrs($in2p, \@in2); 127 128 $code.=<<___; 129 vmsumudm $out,$in1[0],$in2[0],$vzero 130 stxv $out,0($outp) 131 132 xxpermdi $t1,$in1[0],$in1[1],0b00 133 xxpermdi $t2,$in2[1],$in2[0],0b00 134 vmsumudm $out,$t1,$t2,$vzero 135 stxv $out,16($outp) 136 137 xxpermdi $t2,$in2[2],$in2[1],0b00 138 vmsumudm $out,$t1,$t2,$vzero 139 vmsumudm $out,$in1[2],$in2[0],$out 140 stxv $out,32($outp) 141 142 xxpermdi $t2,$in2[1],$in2[0],0b00 143 xxpermdi $t3,$in1[2],$in1[3],0b00 144 xxpermdi $t4,$in2[3],$in2[2],0b00 145 vmsumudm $out,$t1,$t4,$vzero 146 vmsumudm $out,$t3,$t2,$out 147 stxv $out,48($outp) 148 149 xxpermdi $t2,$in2[4],$in2[3],0b00 150 xxpermdi $t4,$in2[2],$in2[1],0b00 151 vmsumudm $out,$t1,$t2,$vzero 152 vmsumudm $out,$t3,$t4,$out 153 vmsumudm $out,$in1[4],$in2[0],$out 154 stxv $out,64($outp) 155 156 xxpermdi $t2,$in2[5],$in2[4],0b00 157 xxpermdi $t4,$in2[3],$in2[2],0b00 158 vmsumudm $out,$t1,$t2,$vzero 159 vmsumudm $out,$t3,$t4,$out 160 xxpermdi $t4,$in2[1],$in2[0],0b00 161 xxpermdi $t1,$in1[4],$in1[5],0b00 162 vmsumudm $out,$t1,$t4,$out 163 stxv $out,80($outp) 164 165 xxpermdi $t1,$in1[0],$in1[1],0b00 166 xxpermdi $t2,$in2[6],$in2[5],0b00 167 xxpermdi $t4,$in2[4],$in2[3],0b00 168 vmsumudm $out,$t1,$t2,$vzero 169 vmsumudm $out,$t3,$t4,$out 170 xxpermdi $t2,$in2[2],$in2[1],0b00 171 xxpermdi $t1,$in1[4],$in1[5],0b00 172 vmsumudm $out,$t1,$t2,$out 173 vmsumudm $out,$in1[6],$in2[0],$out 174 stxv $out,96($outp) 175 176 xxpermdi $t1,$in1[1],$in1[2],0b00 177 xxpermdi $t2,$in2[6],$in2[5],0b00 178 xxpermdi $t3,$in1[3],$in1[4],0b00 179 vmsumudm $out,$t1,$t2,$vzero 180 vmsumudm $out,$t3,$t4,$out 181 xxpermdi $t3,$in2[2],$in2[1],0b00 182 xxpermdi $t1,$in1[5],$in1[6],0b00 183 vmsumudm $out,$t1,$t3,$out 184 stxv $out,112($outp) 185 186 xxpermdi $t1,$in1[2],$in1[3],0b00 187 xxpermdi $t3,$in1[4],$in1[5],0b00 188 vmsumudm $out,$t1,$t2,$vzero 189 vmsumudm $out,$t3,$t4,$out 190 vmsumudm $out,$in1[6],$in2[2],$out 191 stxv $out,128($outp) 192 193 xxpermdi $t1,$in1[3],$in1[4],0b00 194 vmsumudm $out,$t1,$t2,$vzero 195 xxpermdi $t1,$in1[5],$in1[6],0b00 196 vmsumudm $out,$t1,$t4,$out 197 stxv $out,144($outp) 198 199 vmsumudm $out,$t3,$t2,$vzero 200 vmsumudm $out,$in1[6],$in2[4],$out 201 stxv $out,160($outp) 202 203 vmsumudm $out,$t1,$t2,$vzero 204 stxv $out,176($outp) 205 206 vmsumudm $out,$in1[6],$in2[6],$vzero 207 stxv $out,192($outp) 208___ 209 210 endproc("p384_felem_mul"); 211 } 212 213 { 214 # 215 # p384_felem_square 216 # 217 218 my ($inp) = ("r4"); 219 my @in = map("v$_",(44..50)); 220 my @inx2 = map("v$_",(35..41)); 221 222 startproc("p384_felem_square"); 223 224 $code.=<<___; 225 vspltisw $vzero,0 226 227___ 228 229 load_vrs($inp, \@in); 230 231 $code.=<<___; 232 li $zero,0 233 li $one,1 234 mtvsrdd $t1,$one,$zero 235___ 236 237 for (my $i = 0; $i <= 6; $i++) { 238 $code.=<<___; 239 vsld $inx2[$i],$in[$i],$t1 240___ 241 } 242 243 $code.=<<___; 244 vmsumudm $out,$in[0],$in[0],$vzero 245 stxv $out,0($outp) 246 247 vmsumudm $out,$in[0],$inx2[1],$vzero 248 stxv $out,16($outp) 249 250 vmsumudm $out,$in[0],$inx2[2],$vzero 251 vmsumudm $out,$in[1],$in[1],$out 252 stxv $out,32($outp) 253 254 xxpermdi $t1,$in[0],$in[1],0b00 255 xxpermdi $t2,$inx2[3],$inx2[2],0b00 256 vmsumudm $out,$t1,$t2,$vzero 257 stxv $out,48($outp) 258 259 xxpermdi $t4,$inx2[4],$inx2[3],0b00 260 vmsumudm $out,$t1,$t4,$vzero 261 vmsumudm $out,$in[2],$in[2],$out 262 stxv $out,64($outp) 263 264 xxpermdi $t2,$inx2[5],$inx2[4],0b00 265 vmsumudm $out,$t1,$t2,$vzero 266 vmsumudm $out,$in[2],$inx2[3],$out 267 stxv $out,80($outp) 268 269 xxpermdi $t2,$inx2[6],$inx2[5],0b00 270 vmsumudm $out,$t1,$t2,$vzero 271 vmsumudm $out,$in[2],$inx2[4],$out 272 vmsumudm $out,$in[3],$in[3],$out 273 stxv $out,96($outp) 274 275 xxpermdi $t3,$in[1],$in[2],0b00 276 vmsumudm $out,$t3,$t2,$vzero 277 vmsumudm $out,$in[3],$inx2[4],$out 278 stxv $out,112($outp) 279 280 xxpermdi $t1,$in[2],$in[3],0b00 281 vmsumudm $out,$t1,$t2,$vzero 282 vmsumudm $out,$in[4],$in[4],$out 283 stxv $out,128($outp) 284 285 xxpermdi $t1,$in[3],$in[4],0b00 286 vmsumudm $out,$t1,$t2,$vzero 287 stxv $out,144($outp) 288 289 vmsumudm $out,$in[4],$inx2[6],$vzero 290 vmsumudm $out,$in[5],$in[5],$out 291 stxv $out,160($outp) 292 293 vmsumudm $out,$in[5],$inx2[6],$vzero 294 stxv $out,176($outp) 295 296 vmsumudm $out,$in[6],$in[6],$vzero 297 stxv $out,192($outp) 298___ 299 300 endproc("p384_felem_square"); 301 } 302} 303 304$code =~ s/\`([^\`]*)\`/eval $1/gem; 305print $code; 306close STDOUT or die "error closing STDOUT: $!"; 307