xref: /openssl/crypto/ec/asm/ecp_nistp384-ppc64.pl (revision 50f8b936)
1#! /usr/bin/env perl
2# Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
3#
4# Licensed under the Apache License 2.0 (the "License").  You may not use
5# this file except in compliance with the License.  You can obtain a copy
6# in the file LICENSE in the source distribution or at
7# https://www.openssl.org/source/license.html
8#
9# ====================================================================
10# Written by Rohan McLure <rmclure@linux.ibm.com> for the OpenSSL
11# project.
12# ====================================================================
13#
14# p384 lower-level primitives for PPC64 using vector instructions.
15#
16
17use strict;
18use warnings;
19
20my $flavour = shift;
21my $output = "";
22while (($output=shift) && ($output!~/\w[\w\-]*\.\w+$/)) {}
23if (!$output) {
24    $output = "-";
25}
26
27my ($xlate, $dir);
28$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1;
29( $xlate="${dir}ppc-xlate.pl" and -f $xlate ) or
30( $xlate="${dir}../../perlasm/ppc-xlate.pl" and -f $xlate) or
31die "can't locate ppc-xlate.pl";
32
33open OUT,"| \"$^X\" $xlate $flavour $output";
34*STDOUT=*OUT;
35
36my $code = "";
37
38my ($sp, $outp, $savelr, $savesp) = ("r1", "r3", "r10", "r12");
39
40my $vzero = "v32";
41
42sub startproc($)
43{
44    my ($name) = @_;
45
46    $code.=<<___;
47    .globl ${name}
48    .align 5
49${name}:
50
51___
52}
53
54sub endproc($)
55{
56    my ($name) = @_;
57
58    $code.=<<___;
59    blr
60        .size ${name},.-${name}
61
62___
63}
64
65sub load_vrs($$)
66{
67    my ($pointer, $reg_list) = @_;
68
69    for (my $i = 0; $i <= 6; $i++) {
70        my $offset = $i * 8;
71        $code.=<<___;
72    lxsd        $reg_list->[$i],$offset($pointer)
73___
74    }
75
76    $code.=<<___;
77
78___
79}
80
81sub store_vrs($$)
82{
83    my ($pointer, $reg_list) = @_;
84
85    for (my $i = 0; $i <= 12; $i++) {
86        my $offset = $i * 16;
87        $code.=<<___;
88    stxv        $reg_list->[$i],$offset($pointer)
89___
90    }
91
92    $code.=<<___;
93
94___
95}
96
97$code.=<<___;
98.machine    "any"
99.text
100
101___
102
103{
104    # mul/square common
105    my ($t1, $t2, $t3, $t4) = ("v33", "v34", "v42", "v43");
106    my ($zero, $one) = ("r8", "r9");
107    my $out = "v51";
108
109    {
110        #
111        # p384_felem_mul
112        #
113
114        my ($in1p, $in2p) = ("r4", "r5");
115        my @in1 = map("v$_",(44..50));
116        my @in2 = map("v$_",(35..41));
117
118        startproc("p384_felem_mul");
119
120        $code.=<<___;
121    vspltisw    $vzero,0
122
123___
124
125        load_vrs($in1p, \@in1);
126        load_vrs($in2p, \@in2);
127
128        $code.=<<___;
129    vmsumudm    $out,$in1[0],$in2[0],$vzero
130    stxv        $out,0($outp)
131
132    xxpermdi    $t1,$in1[0],$in1[1],0b00
133    xxpermdi    $t2,$in2[1],$in2[0],0b00
134    vmsumudm    $out,$t1,$t2,$vzero
135    stxv        $out,16($outp)
136
137    xxpermdi    $t2,$in2[2],$in2[1],0b00
138    vmsumudm    $out,$t1,$t2,$vzero
139    vmsumudm    $out,$in1[2],$in2[0],$out
140    stxv        $out,32($outp)
141
142    xxpermdi    $t2,$in2[1],$in2[0],0b00
143    xxpermdi    $t3,$in1[2],$in1[3],0b00
144    xxpermdi    $t4,$in2[3],$in2[2],0b00
145    vmsumudm    $out,$t1,$t4,$vzero
146    vmsumudm    $out,$t3,$t2,$out
147    stxv        $out,48($outp)
148
149    xxpermdi    $t2,$in2[4],$in2[3],0b00
150    xxpermdi    $t4,$in2[2],$in2[1],0b00
151    vmsumudm    $out,$t1,$t2,$vzero
152    vmsumudm    $out,$t3,$t4,$out
153    vmsumudm    $out,$in1[4],$in2[0],$out
154    stxv        $out,64($outp)
155
156    xxpermdi    $t2,$in2[5],$in2[4],0b00
157    xxpermdi    $t4,$in2[3],$in2[2],0b00
158    vmsumudm    $out,$t1,$t2,$vzero
159    vmsumudm    $out,$t3,$t4,$out
160    xxpermdi    $t4,$in2[1],$in2[0],0b00
161    xxpermdi    $t1,$in1[4],$in1[5],0b00
162    vmsumudm    $out,$t1,$t4,$out
163    stxv        $out,80($outp)
164
165    xxpermdi    $t1,$in1[0],$in1[1],0b00
166    xxpermdi    $t2,$in2[6],$in2[5],0b00
167    xxpermdi    $t4,$in2[4],$in2[3],0b00
168    vmsumudm    $out,$t1,$t2,$vzero
169    vmsumudm    $out,$t3,$t4,$out
170    xxpermdi    $t2,$in2[2],$in2[1],0b00
171    xxpermdi    $t1,$in1[4],$in1[5],0b00
172    vmsumudm    $out,$t1,$t2,$out
173    vmsumudm    $out,$in1[6],$in2[0],$out
174    stxv        $out,96($outp)
175
176    xxpermdi    $t1,$in1[1],$in1[2],0b00
177    xxpermdi    $t2,$in2[6],$in2[5],0b00
178    xxpermdi    $t3,$in1[3],$in1[4],0b00
179    vmsumudm    $out,$t1,$t2,$vzero
180    vmsumudm    $out,$t3,$t4,$out
181    xxpermdi    $t3,$in2[2],$in2[1],0b00
182    xxpermdi    $t1,$in1[5],$in1[6],0b00
183    vmsumudm    $out,$t1,$t3,$out
184    stxv        $out,112($outp)
185
186    xxpermdi    $t1,$in1[2],$in1[3],0b00
187    xxpermdi    $t3,$in1[4],$in1[5],0b00
188    vmsumudm    $out,$t1,$t2,$vzero
189    vmsumudm    $out,$t3,$t4,$out
190    vmsumudm    $out,$in1[6],$in2[2],$out
191    stxv        $out,128($outp)
192
193    xxpermdi    $t1,$in1[3],$in1[4],0b00
194    vmsumudm    $out,$t1,$t2,$vzero
195    xxpermdi    $t1,$in1[5],$in1[6],0b00
196    vmsumudm    $out,$t1,$t4,$out
197    stxv        $out,144($outp)
198
199    vmsumudm    $out,$t3,$t2,$vzero
200    vmsumudm    $out,$in1[6],$in2[4],$out
201    stxv        $out,160($outp)
202
203    vmsumudm    $out,$t1,$t2,$vzero
204    stxv        $out,176($outp)
205
206    vmsumudm    $out,$in1[6],$in2[6],$vzero
207    stxv        $out,192($outp)
208___
209
210        endproc("p384_felem_mul");
211    }
212
213    {
214        #
215        # p384_felem_square
216        #
217
218        my ($inp) = ("r4");
219        my @in = map("v$_",(44..50));
220        my @inx2 = map("v$_",(35..41));
221
222        startproc("p384_felem_square");
223
224        $code.=<<___;
225    vspltisw    $vzero,0
226
227___
228
229        load_vrs($inp, \@in);
230
231        $code.=<<___;
232    li        $zero,0
233    li        $one,1
234    mtvsrdd        $t1,$one,$zero
235___
236
237        for (my $i = 0; $i <= 6; $i++) {
238            $code.=<<___;
239    vsld        $inx2[$i],$in[$i],$t1
240___
241        }
242
243        $code.=<<___;
244    vmsumudm    $out,$in[0],$in[0],$vzero
245    stxv        $out,0($outp)
246
247    vmsumudm    $out,$in[0],$inx2[1],$vzero
248    stxv        $out,16($outp)
249
250    vmsumudm    $out,$in[0],$inx2[2],$vzero
251    vmsumudm    $out,$in[1],$in[1],$out
252    stxv        $out,32($outp)
253
254    xxpermdi    $t1,$in[0],$in[1],0b00
255    xxpermdi    $t2,$inx2[3],$inx2[2],0b00
256    vmsumudm    $out,$t1,$t2,$vzero
257    stxv        $out,48($outp)
258
259    xxpermdi    $t4,$inx2[4],$inx2[3],0b00
260    vmsumudm    $out,$t1,$t4,$vzero
261    vmsumudm    $out,$in[2],$in[2],$out
262    stxv        $out,64($outp)
263
264    xxpermdi    $t2,$inx2[5],$inx2[4],0b00
265    vmsumudm    $out,$t1,$t2,$vzero
266    vmsumudm    $out,$in[2],$inx2[3],$out
267    stxv        $out,80($outp)
268
269    xxpermdi    $t2,$inx2[6],$inx2[5],0b00
270    vmsumudm    $out,$t1,$t2,$vzero
271    vmsumudm    $out,$in[2],$inx2[4],$out
272    vmsumudm    $out,$in[3],$in[3],$out
273    stxv        $out,96($outp)
274
275    xxpermdi    $t3,$in[1],$in[2],0b00
276    vmsumudm    $out,$t3,$t2,$vzero
277    vmsumudm    $out,$in[3],$inx2[4],$out
278    stxv        $out,112($outp)
279
280    xxpermdi    $t1,$in[2],$in[3],0b00
281    vmsumudm    $out,$t1,$t2,$vzero
282    vmsumudm    $out,$in[4],$in[4],$out
283    stxv        $out,128($outp)
284
285    xxpermdi    $t1,$in[3],$in[4],0b00
286    vmsumudm    $out,$t1,$t2,$vzero
287    stxv        $out,144($outp)
288
289    vmsumudm    $out,$in[4],$inx2[6],$vzero
290    vmsumudm    $out,$in[5],$in[5],$out
291    stxv        $out,160($outp)
292
293    vmsumudm    $out,$in[5],$inx2[6],$vzero
294    stxv        $out,176($outp)
295
296    vmsumudm    $out,$in[6],$in[6],$vzero
297    stxv        $out,192($outp)
298___
299
300        endproc("p384_felem_square");
301    }
302}
303
304$code =~ s/\`([^\`]*)\`/eval $1/gem;
305print $code;
306close STDOUT or die "error closing STDOUT: $!";
307