1#! /usr/bin/env perl
2# This file is dual-licensed, meaning that you can use it under your
3# choice of either of the following two licenses:
4#
5# Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
6#
7# Licensed under the Apache License 2.0 (the "License"). You can obtain
8# a copy in the file LICENSE in the source distribution or at
9# https://www.openssl.org/source/license.html
10#
11# or
12#
13# Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
14# All rights reserved.
15#
16# Redistribution and use in source and binary forms, with or without
17# modification, are permitted provided that the following conditions
18# are met:
19# 1. Redistributions of source code must retain the above copyright
20#    notice, this list of conditions and the following disclaimer.
21# 2. Redistributions in binary form must reproduce the above copyright
22#    notice, this list of conditions and the following disclaimer in the
23#    documentation and/or other materials provided with the distribution.
24#
25# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36
37# - RV64I
38# - RISC-V Vector ('V') with VLEN >= 128
39# - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb')
40# - RISC-V Vector AES block cipher extension ('Zvkned')
41# - RISC-V Zicclsm(Main memory supports misaligned loads/stores)
42
43use strict;
44use warnings;
45
46use FindBin qw($Bin);
47use lib "$Bin";
48use lib "$Bin/../../perlasm";
49use riscv;
50
51# $output is the last argument if it looks like a file (it has an extension)
52# $flavour is the first argument if it doesn't look like a file
53my $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
54my $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
55
56$output and open STDOUT,">$output";
57
58my $code=<<___;
59.text
60___
61
62################################################################################
63# void rv64i_zvkb_zvkned_ctr32_encrypt_blocks(const unsigned char *in,
64#                                             unsigned char *out, size_t blocks,
65#                                             const void *key,
66#                                             const unsigned char ivec[16]);
67{
68my ($INP, $OUTP, $BLOCK_NUM, $KEYP, $IVP) = ("a0", "a1", "a2", "a3", "a4");
69my ($T0, $T1, $T2, $T3) = ("t0", "t1", "t2", "t3");
70my ($VL) = ("t4");
71my ($LEN32) = ("t5");
72my ($CTR) = ("t6");
73my ($MASK) = ("v0");
74my ($V0, $V1, $V2, $V3, $V4, $V5, $V6, $V7,
75    $V8, $V9, $V10, $V11, $V12, $V13, $V14, $V15,
76    $V16, $V17, $V18, $V19, $V20, $V21, $V22, $V23,
77    $V24, $V25, $V26, $V27, $V28, $V29, $V30, $V31,
78) = map("v$_",(0..31));
79
80# Prepare the AES ctr input data into v16.
81sub init_aes_ctr_input {
82    my $code=<<___;
83    # Setup mask into v0
84    # The mask pattern for 4*N-th elements
85    # mask v0: [000100010001....]
86    # Note:
87    #   We could setup the mask just for the maximum element length instead of
88    #   the VLMAX.
89    li $T0, 0b10001000
90    @{[vsetvli $T2, "zero", "e8", "m1", "ta", "ma"]}
91    @{[vmv_v_x $MASK, $T0]}
92    # Load IV.
93    # v31:[IV0, IV1, IV2, big-endian count]
94    @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
95    @{[vle32_v $V31, $IVP]}
96    # Convert the big-endian counter into little-endian.
97    @{[vsetivli "zero", 4, "e32", "m1", "ta", "mu"]}
98    @{[vrev8_v $V31, $V31, $MASK]}
99    # Splat the IV to v16
100    @{[vsetvli "zero", $LEN32, "e32", "m4", "ta", "ma"]}
101    @{[vmv_v_i $V16, 0]}
102    @{[vaesz_vs $V16, $V31]}
103    # Prepare the ctr pattern into v20
104    # v20: [x, x, x, 0, x, x, x, 1, x, x, x, 2, ...]
105    @{[viota_m $V20, $MASK, $MASK]}
106    # v16:[IV0, IV1, IV2, count+0, IV0, IV1, IV2, count+1, ...]
107    @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "mu"]}
108    @{[vadd_vv $V16, $V16, $V20, $MASK]}
109___
110
111    return $code;
112}
113
114$code .= <<___;
115.p2align 3
116.globl rv64i_zvkb_zvkned_ctr32_encrypt_blocks
117.type rv64i_zvkb_zvkned_ctr32_encrypt_blocks,\@function
118rv64i_zvkb_zvkned_ctr32_encrypt_blocks:
119    beqz $BLOCK_NUM, 1f
120
121    # Load number of rounds
122    lwu $T0, 240($KEYP)
123    li $T1, 14
124    li $T2, 12
125    li $T3, 10
126
127    slli $LEN32, $BLOCK_NUM, 2
128
129    beq $T0, $T1, ctr32_encrypt_blocks_256
130    beq $T0, $T2, ctr32_encrypt_blocks_192
131    beq $T0, $T3, ctr32_encrypt_blocks_128
132
1331:
134    ret
135
136.size rv64i_zvkb_zvkned_ctr32_encrypt_blocks,.-rv64i_zvkb_zvkned_ctr32_encrypt_blocks
137___
138
139$code .= <<___;
140.p2align 3
141ctr32_encrypt_blocks_128:
142    # Load all 11 round keys to v1-v11 registers.
143    @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
144    @{[vle32_v $V1, $KEYP]}
145    addi $KEYP, $KEYP, 16
146    @{[vle32_v $V2, $KEYP]}
147    addi $KEYP, $KEYP, 16
148    @{[vle32_v $V3, $KEYP]}
149    addi $KEYP, $KEYP, 16
150    @{[vle32_v $V4, $KEYP]}
151    addi $KEYP, $KEYP, 16
152    @{[vle32_v $V5, $KEYP]}
153    addi $KEYP, $KEYP, 16
154    @{[vle32_v $V6, $KEYP]}
155    addi $KEYP, $KEYP, 16
156    @{[vle32_v $V7, $KEYP]}
157    addi $KEYP, $KEYP, 16
158    @{[vle32_v $V8, $KEYP]}
159    addi $KEYP, $KEYP, 16
160    @{[vle32_v $V9, $KEYP]}
161    addi $KEYP, $KEYP, 16
162    @{[vle32_v $V10, $KEYP]}
163    addi $KEYP, $KEYP, 16
164    @{[vle32_v $V11, $KEYP]}
165
166    @{[init_aes_ctr_input]}
167
168    ##### AES body
169    j 2f
1701:
171    @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "mu"]}
172    # Increase ctr in v16.
173    @{[vadd_vx $V16, $V16, $CTR, $MASK]}
1742:
175    # Load plaintext into v20
176    @{[vle32_v $V20, $INP]}
177    slli $T0, $VL, 2
178    srli $CTR, $VL, 2
179    sub $LEN32, $LEN32, $VL
180    add $INP, $INP, $T0
181    # Prepare the AES ctr input into v24.
182    # The ctr data uses big-endian form.
183    @{[vmv_v_v $V24, $V16]}
184    @{[vrev8_v $V24, $V24, $MASK]}
185
186    @{[vaesz_vs $V24, $V1]}
187    @{[vaesem_vs $V24, $V2]}
188    @{[vaesem_vs $V24, $V3]}
189    @{[vaesem_vs $V24, $V4]}
190    @{[vaesem_vs $V24, $V5]}
191    @{[vaesem_vs $V24, $V6]}
192    @{[vaesem_vs $V24, $V7]}
193    @{[vaesem_vs $V24, $V8]}
194    @{[vaesem_vs $V24, $V9]}
195    @{[vaesem_vs $V24, $V10]}
196    @{[vaesef_vs $V24, $V11]}
197
198    # ciphertext
199    @{[vxor_vv $V24, $V24, $V20]}
200
201    # Store the ciphertext.
202    @{[vse32_v $V24, $OUTP]}
203    add $OUTP, $OUTP, $T0
204
205    bnez $LEN32, 1b
206
207    ret
208.size ctr32_encrypt_blocks_128,.-ctr32_encrypt_blocks_128
209___
210
211$code .= <<___;
212.p2align 3
213ctr32_encrypt_blocks_192:
214    # Load all 13 round keys to v1-v13 registers.
215    @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
216    @{[vle32_v $V1, $KEYP]}
217    addi $KEYP, $KEYP, 16
218    @{[vle32_v $V2, $KEYP]}
219    addi $KEYP, $KEYP, 16
220    @{[vle32_v $V3, $KEYP]}
221    addi $KEYP, $KEYP, 16
222    @{[vle32_v $V4, $KEYP]}
223    addi $KEYP, $KEYP, 16
224    @{[vle32_v $V5, $KEYP]}
225    addi $KEYP, $KEYP, 16
226    @{[vle32_v $V6, $KEYP]}
227    addi $KEYP, $KEYP, 16
228    @{[vle32_v $V7, $KEYP]}
229    addi $KEYP, $KEYP, 16
230    @{[vle32_v $V8, $KEYP]}
231    addi $KEYP, $KEYP, 16
232    @{[vle32_v $V9, $KEYP]}
233    addi $KEYP, $KEYP, 16
234    @{[vle32_v $V10, $KEYP]}
235    addi $KEYP, $KEYP, 16
236    @{[vle32_v $V11, $KEYP]}
237    addi $KEYP, $KEYP, 16
238    @{[vle32_v $V12, $KEYP]}
239    addi $KEYP, $KEYP, 16
240    @{[vle32_v $V13, $KEYP]}
241
242    @{[init_aes_ctr_input]}
243
244    ##### AES body
245    j 2f
2461:
247    @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "mu"]}
248    # Increase ctr in v16.
249    @{[vadd_vx $V16, $V16, $CTR, $MASK]}
2502:
251    # Load plaintext into v20
252    @{[vle32_v $V20, $INP]}
253    slli $T0, $VL, 2
254    srli $CTR, $VL, 2
255    sub $LEN32, $LEN32, $VL
256    add $INP, $INP, $T0
257    # Prepare the AES ctr input into v24.
258    # The ctr data uses big-endian form.
259    @{[vmv_v_v $V24, $V16]}
260    @{[vrev8_v $V24, $V24, $MASK]}
261
262    @{[vaesz_vs $V24, $V1]}
263    @{[vaesem_vs $V24, $V2]}
264    @{[vaesem_vs $V24, $V3]}
265    @{[vaesem_vs $V24, $V4]}
266    @{[vaesem_vs $V24, $V5]}
267    @{[vaesem_vs $V24, $V6]}
268    @{[vaesem_vs $V24, $V7]}
269    @{[vaesem_vs $V24, $V8]}
270    @{[vaesem_vs $V24, $V9]}
271    @{[vaesem_vs $V24, $V10]}
272    @{[vaesem_vs $V24, $V11]}
273    @{[vaesem_vs $V24, $V12]}
274    @{[vaesef_vs $V24, $V13]}
275
276    # ciphertext
277    @{[vxor_vv $V24, $V24, $V20]}
278
279    # Store the ciphertext.
280    @{[vse32_v $V24, $OUTP]}
281    add $OUTP, $OUTP, $T0
282
283    bnez $LEN32, 1b
284
285    ret
286.size ctr32_encrypt_blocks_192,.-ctr32_encrypt_blocks_192
287___
288
289$code .= <<___;
290.p2align 3
291ctr32_encrypt_blocks_256:
292    # Load all 15 round keys to v1-v15 registers.
293    @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]}
294    @{[vle32_v $V1, $KEYP]}
295    addi $KEYP, $KEYP, 16
296    @{[vle32_v $V2, $KEYP]}
297    addi $KEYP, $KEYP, 16
298    @{[vle32_v $V3, $KEYP]}
299    addi $KEYP, $KEYP, 16
300    @{[vle32_v $V4, $KEYP]}
301    addi $KEYP, $KEYP, 16
302    @{[vle32_v $V5, $KEYP]}
303    addi $KEYP, $KEYP, 16
304    @{[vle32_v $V6, $KEYP]}
305    addi $KEYP, $KEYP, 16
306    @{[vle32_v $V7, $KEYP]}
307    addi $KEYP, $KEYP, 16
308    @{[vle32_v $V8, $KEYP]}
309    addi $KEYP, $KEYP, 16
310    @{[vle32_v $V9, $KEYP]}
311    addi $KEYP, $KEYP, 16
312    @{[vle32_v $V10, $KEYP]}
313    addi $KEYP, $KEYP, 16
314    @{[vle32_v $V11, $KEYP]}
315    addi $KEYP, $KEYP, 16
316    @{[vle32_v $V12, $KEYP]}
317    addi $KEYP, $KEYP, 16
318    @{[vle32_v $V13, $KEYP]}
319    addi $KEYP, $KEYP, 16
320    @{[vle32_v $V14, $KEYP]}
321    addi $KEYP, $KEYP, 16
322    @{[vle32_v $V15, $KEYP]}
323
324    @{[init_aes_ctr_input]}
325
326    ##### AES body
327    j 2f
3281:
329    @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "mu"]}
330    # Increase ctr in v16.
331    @{[vadd_vx $V16, $V16, $CTR, $MASK]}
3322:
333    # Load plaintext into v20
334    @{[vle32_v $V20, $INP]}
335    slli $T0, $VL, 2
336    srli $CTR, $VL, 2
337    sub $LEN32, $LEN32, $VL
338    add $INP, $INP, $T0
339    # Prepare the AES ctr input into v24.
340    # The ctr data uses big-endian form.
341    @{[vmv_v_v $V24, $V16]}
342    @{[vrev8_v $V24, $V24, $MASK]}
343
344    @{[vaesz_vs $V24, $V1]}
345    @{[vaesem_vs $V24, $V2]}
346    @{[vaesem_vs $V24, $V3]}
347    @{[vaesem_vs $V24, $V4]}
348    @{[vaesem_vs $V24, $V5]}
349    @{[vaesem_vs $V24, $V6]}
350    @{[vaesem_vs $V24, $V7]}
351    @{[vaesem_vs $V24, $V8]}
352    @{[vaesem_vs $V24, $V9]}
353    @{[vaesem_vs $V24, $V10]}
354    @{[vaesem_vs $V24, $V11]}
355    @{[vaesem_vs $V24, $V12]}
356    @{[vaesem_vs $V24, $V13]}
357    @{[vaesem_vs $V24, $V14]}
358    @{[vaesef_vs $V24, $V15]}
359
360    # ciphertext
361    @{[vxor_vv $V24, $V24, $V20]}
362
363    # Store the ciphertext.
364    @{[vse32_v $V24, $OUTP]}
365    add $OUTP, $OUTP, $T0
366
367    bnez $LEN32, 1b
368
369    ret
370.size ctr32_encrypt_blocks_256,.-ctr32_encrypt_blocks_256
371___
372}
373
374print $code;
375
376close STDOUT or die "error closing STDOUT: $!";
377