1#! /usr/bin/env perl
2# This file is dual-licensed, meaning that you can use it under your
3# choice of either of the following two licenses:
4#
5# Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
6#
7# Licensed under the Apache License 2.0 (the "License"). You can obtain
8# a copy in the file LICENSE in the source distribution or at
9# https://www.openssl.org/source/license.html
10#
11# or
12#
13# Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>
14# Copyright (c) 2023, Phoebe Chen <phoebe.chen@sifive.com>
15# All rights reserved.
16#
17# Redistribution and use in source and binary forms, with or without
18# modification, are permitted provided that the following conditions
19# are met:
20# 1. Redistributions of source code must retain the above copyright
21#    notice, this list of conditions and the following disclaimer.
22# 2. Redistributions in binary form must reproduce the above copyright
23#    notice, this list of conditions and the following disclaimer in the
24#    documentation and/or other materials provided with the distribution.
25#
26# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
27# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
28# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
29# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
30# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
31# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
32# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
33# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
34# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
35# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
36# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
37
38# The generated code of this file depends on the following RISC-V extensions:
39# - RV64I
40# - RISC-V Vector ('V') with VLEN >= 128
41# - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb')
42# - RISC-V Vector SHA-2 Secure Hash extension ('Zvknha' or 'Zvknhb')
43
44use strict;
45use warnings;
46
47use FindBin qw($Bin);
48use lib "$Bin";
49use lib "$Bin/../../perlasm";
50use riscv;
51
52# $output is the last argument if it looks like a file (it has an extension)
53# $flavour is the first argument if it doesn't look like a file
54my $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
55my $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
56
57$output and open STDOUT,">$output";
58
59my $code=<<___;
60.text
61___
62
63my ($V0, $V1, $V2, $V3, $V4, $V5, $V6, $V7,
64    $V8, $V9, $V10, $V11, $V12, $V13, $V14, $V15,
65    $V16, $V17, $V18, $V19, $V20, $V21, $V22, $V23,
66    $V24, $V25, $V26, $V27, $V28, $V29, $V30, $V31,
67) = map("v$_",(0..31));
68
69my $K256 = "K256";
70
71# Function arguments
72my ($H, $INP, $LEN, $KT, $H2, $INDEX_PATTERN) = ("a0", "a1", "a2", "a3", "t3", "t4");
73
74sub sha_256_load_constant {
75    my $code=<<___;
76    la $KT, $K256 # Load round constants K256
77    @{[vle32_v $V10, $KT]}
78    addi $KT, $KT, 16
79    @{[vle32_v $V11, $KT]}
80    addi $KT, $KT, 16
81    @{[vle32_v $V12, $KT]}
82    addi $KT, $KT, 16
83    @{[vle32_v $V13, $KT]}
84    addi $KT, $KT, 16
85    @{[vle32_v $V14, $KT]}
86    addi $KT, $KT, 16
87    @{[vle32_v $V15, $KT]}
88    addi $KT, $KT, 16
89    @{[vle32_v $V16, $KT]}
90    addi $KT, $KT, 16
91    @{[vle32_v $V17, $KT]}
92    addi $KT, $KT, 16
93    @{[vle32_v $V18, $KT]}
94    addi $KT, $KT, 16
95    @{[vle32_v $V19, $KT]}
96    addi $KT, $KT, 16
97    @{[vle32_v $V20, $KT]}
98    addi $KT, $KT, 16
99    @{[vle32_v $V21, $KT]}
100    addi $KT, $KT, 16
101    @{[vle32_v $V22, $KT]}
102    addi $KT, $KT, 16
103    @{[vle32_v $V23, $KT]}
104    addi $KT, $KT, 16
105    @{[vle32_v $V24, $KT]}
106    addi $KT, $KT, 16
107    @{[vle32_v $V25, $KT]}
108___
109
110    return $code;
111}
112
113################################################################################
114# void sha256_block_data_order_zvkb_zvknha_or_zvknhb(void *c, const void *p, size_t len)
115$code .= <<___;
116.p2align 2
117.globl sha256_block_data_order_zvkb_zvknha_or_zvknhb
118.type   sha256_block_data_order_zvkb_zvknha_or_zvknhb,\@function
119sha256_block_data_order_zvkb_zvknha_or_zvknhb:
120    @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
121
122    @{[sha_256_load_constant]}
123
124    # H is stored as {a,b,c,d},{e,f,g,h}, but we need {f,e,b,a},{h,g,d,c}
125    # The dst vtype is e32m1 and the index vtype is e8mf4.
126    # We use index-load with the following index pattern at v26.
127    #   i8 index:
128    #     20, 16, 4, 0
129    # Instead of setting the i8 index, we could use a single 32bit
130    # little-endian value to cover the 4xi8 index.
131    #   i32 value:
132    #     0x 00 04 10 14
133    li $INDEX_PATTERN, 0x00041014
134    @{[vsetivli "zero", 1, "e32", "m1", "ta", "ma"]}
135    @{[vmv_v_x $V26, $INDEX_PATTERN]}
136
137    addi $H2, $H, 8
138
139    # Use index-load to get {f,e,b,a},{h,g,d,c}
140    @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
141    @{[vluxei8_v $V6, $H, $V26]}
142    @{[vluxei8_v $V7, $H2, $V26]}
143
144    # Setup v0 mask for the vmerge to replace the first word (idx==0) in key-scheduling.
145    # The AVL is 4 in SHA, so we could use a single e8(8 element masking) for masking.
146    @{[vsetivli "zero", 1, "e8", "m1", "ta", "ma"]}
147    @{[vmv_v_i $V0, 0x01]}
148
149    @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
150
151L_round_loop:
152    # Decrement length by 1
153    add $LEN, $LEN, -1
154
155    # Keep the current state as we need it later: H' = H+{a',b',c',...,h'}.
156    @{[vmv_v_v $V30, $V6]}
157    @{[vmv_v_v $V31, $V7]}
158
159    # Load the 512-bits of the message block in v1-v4 and perform
160    # an endian swap on each 4 bytes element.
161    @{[vle32_v $V1, $INP]}
162    @{[vrev8_v $V1, $V1]}
163    add $INP, $INP, 16
164    @{[vle32_v $V2, $INP]}
165    @{[vrev8_v $V2, $V2]}
166    add $INP, $INP, 16
167    @{[vle32_v $V3, $INP]}
168    @{[vrev8_v $V3, $V3]}
169    add $INP, $INP, 16
170    @{[vle32_v $V4, $INP]}
171    @{[vrev8_v $V4, $V4]}
172    add $INP, $INP, 16
173
174    # Quad-round 0 (+0, Wt from oldest to newest in v1->v2->v3->v4)
175    @{[vadd_vv $V5, $V10, $V1]}
176    @{[vsha2cl_vv $V7, $V6, $V5]}
177    @{[vsha2ch_vv $V6, $V7, $V5]}
178    @{[vmerge_vvm $V5, $V3, $V2, $V0]}
179    @{[vsha2ms_vv $V1, $V5, $V4]}  # Generate W[19:16]
180
181    # Quad-round 1 (+1, v2->v3->v4->v1)
182    @{[vadd_vv $V5, $V11, $V2]}
183    @{[vsha2cl_vv $V7, $V6, $V5]}
184    @{[vsha2ch_vv $V6, $V7, $V5]}
185    @{[vmerge_vvm $V5, $V4, $V3, $V0]}
186    @{[vsha2ms_vv $V2, $V5, $V1]}  # Generate W[23:20]
187
188    # Quad-round 2 (+2, v3->v4->v1->v2)
189    @{[vadd_vv $V5, $V12, $V3]}
190    @{[vsha2cl_vv $V7, $V6, $V5]}
191    @{[vsha2ch_vv $V6, $V7, $V5]}
192    @{[vmerge_vvm $V5, $V1, $V4, $V0]}
193    @{[vsha2ms_vv $V3, $V5, $V2]}  # Generate W[27:24]
194
195    # Quad-round 3 (+3, v4->v1->v2->v3)
196    @{[vadd_vv $V5, $V13, $V4]}
197    @{[vsha2cl_vv $V7, $V6, $V5]}
198    @{[vsha2ch_vv $V6, $V7, $V5]}
199    @{[vmerge_vvm $V5, $V2, $V1, $V0]}
200    @{[vsha2ms_vv $V4, $V5, $V3]}  # Generate W[31:28]
201
202    # Quad-round 4 (+0, v1->v2->v3->v4)
203    @{[vadd_vv $V5, $V14, $V1]}
204    @{[vsha2cl_vv $V7, $V6, $V5]}
205    @{[vsha2ch_vv $V6, $V7, $V5]}
206    @{[vmerge_vvm $V5, $V3, $V2, $V0]}
207    @{[vsha2ms_vv $V1, $V5, $V4]}  # Generate W[35:32]
208
209    # Quad-round 5 (+1, v2->v3->v4->v1)
210    @{[vadd_vv $V5, $V15, $V2]}
211    @{[vsha2cl_vv $V7, $V6, $V5]}
212    @{[vsha2ch_vv $V6, $V7, $V5]}
213    @{[vmerge_vvm $V5, $V4, $V3, $V0]}
214    @{[vsha2ms_vv $V2, $V5, $V1]}  # Generate W[39:36]
215
216    # Quad-round 6 (+2, v3->v4->v1->v2)
217    @{[vadd_vv $V5, $V16, $V3]}
218    @{[vsha2cl_vv $V7, $V6, $V5]}
219    @{[vsha2ch_vv $V6, $V7, $V5]}
220    @{[vmerge_vvm $V5, $V1, $V4, $V0]}
221    @{[vsha2ms_vv $V3, $V5, $V2]}  # Generate W[43:40]
222
223    # Quad-round 7 (+3, v4->v1->v2->v3)
224    @{[vadd_vv $V5, $V17, $V4]}
225    @{[vsha2cl_vv $V7, $V6, $V5]}
226    @{[vsha2ch_vv $V6, $V7, $V5]}
227    @{[vmerge_vvm $V5, $V2, $V1, $V0]}
228    @{[vsha2ms_vv $V4, $V5, $V3]}  # Generate W[47:44]
229
230    # Quad-round 8 (+0, v1->v2->v3->v4)
231    @{[vadd_vv $V5, $V18, $V1]}
232    @{[vsha2cl_vv $V7, $V6, $V5]}
233    @{[vsha2ch_vv $V6, $V7, $V5]}
234    @{[vmerge_vvm $V5, $V3, $V2, $V0]}
235    @{[vsha2ms_vv $V1, $V5, $V4]}  # Generate W[51:48]
236
237    # Quad-round 9 (+1, v2->v3->v4->v1)
238    @{[vadd_vv $V5, $V19, $V2]}
239    @{[vsha2cl_vv $V7, $V6, $V5]}
240    @{[vsha2ch_vv $V6, $V7, $V5]}
241    @{[vmerge_vvm $V5, $V4, $V3, $V0]}
242    @{[vsha2ms_vv $V2, $V5, $V1]}  # Generate W[55:52]
243
244    # Quad-round 10 (+2, v3->v4->v1->v2)
245    @{[vadd_vv $V5, $V20, $V3]}
246    @{[vsha2cl_vv $V7, $V6, $V5]}
247    @{[vsha2ch_vv $V6, $V7, $V5]}
248    @{[vmerge_vvm $V5, $V1, $V4, $V0]}
249    @{[vsha2ms_vv $V3, $V5, $V2]}  # Generate W[59:56]
250
251    # Quad-round 11 (+3, v4->v1->v2->v3)
252    @{[vadd_vv $V5, $V21, $V4]}
253    @{[vsha2cl_vv $V7, $V6, $V5]}
254    @{[vsha2ch_vv $V6, $V7, $V5]}
255    @{[vmerge_vvm $V5, $V2, $V1, $V0]}
256    @{[vsha2ms_vv $V4, $V5, $V3]}  # Generate W[63:60]
257
258    # Quad-round 12 (+0, v1->v2->v3->v4)
259    # Note that we stop generating new message schedule words (Wt, v1-13)
260    # as we already generated all the words we end up consuming (i.e., W[63:60]).
261    @{[vadd_vv $V5, $V22, $V1]}
262    @{[vsha2cl_vv $V7, $V6, $V5]}
263    @{[vsha2ch_vv $V6, $V7, $V5]}
264
265    # Quad-round 13 (+1, v2->v3->v4->v1)
266    @{[vadd_vv $V5, $V23, $V2]}
267    @{[vsha2cl_vv $V7, $V6, $V5]}
268    @{[vsha2ch_vv $V6, $V7, $V5]}
269
270    # Quad-round 14 (+2, v3->v4->v1->v2)
271    @{[vadd_vv $V5, $V24, $V3]}
272    @{[vsha2cl_vv $V7, $V6, $V5]}
273    @{[vsha2ch_vv $V6, $V7, $V5]}
274
275    # Quad-round 15 (+3, v4->v1->v2->v3)
276    @{[vadd_vv $V5, $V25, $V4]}
277    @{[vsha2cl_vv $V7, $V6, $V5]}
278    @{[vsha2ch_vv $V6, $V7, $V5]}
279
280    # H' = H+{a',b',c',...,h'}
281    @{[vadd_vv $V6, $V30, $V6]}
282    @{[vadd_vv $V7, $V31, $V7]}
283    bnez $LEN, L_round_loop
284
285    # Store {f,e,b,a},{h,g,d,c} back to {a,b,c,d},{e,f,g,h}.
286    @{[vsuxei8_v $V6, $H, $V26]}
287    @{[vsuxei8_v $V7, $H2, $V26]}
288
289    ret
290.size sha256_block_data_order_zvkb_zvknha_or_zvknhb,.-sha256_block_data_order_zvkb_zvknha_or_zvknhb
291
292.p2align 2
293.type $K256,\@object
294$K256:
295    .word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5
296    .word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5
297    .word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3
298    .word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174
299    .word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc
300    .word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da
301    .word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7
302    .word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967
303    .word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13
304    .word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85
305    .word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3
306    .word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070
307    .word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5
308    .word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3
309    .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208
310    .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2
311.size $K256,.-$K256
312___
313
314print $code;
315
316close STDOUT or die "error closing STDOUT: $!";
317