1#! /usr/bin/env perl 2# This file is dual-licensed, meaning that you can use it under your 3# choice of either of the following two licenses: 4# 5# Copyright 2023 The OpenSSL Project Authors. All Rights Reserved. 6# 7# Licensed under the Apache License 2.0 (the "License"). You can obtain 8# a copy in the file LICENSE in the source distribution or at 9# https://www.openssl.org/source/license.html 10# 11# or 12# 13# Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com> 14# All rights reserved. 15# 16# Redistribution and use in source and binary forms, with or without 17# modification, are permitted provided that the following conditions 18# are met: 19# 1. Redistributions of source code must retain the above copyright 20# notice, this list of conditions and the following disclaimer. 21# 2. Redistributions in binary form must reproduce the above copyright 22# notice, this list of conditions and the following disclaimer in the 23# documentation and/or other materials provided with the distribution. 24# 25# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 37# - RV64I 38# - RISC-V Vector ('V') with VLEN >= 128 39# - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb') 40# - RISC-V Vector AES block cipher extension ('Zvkned') 41# - RISC-V Zicclsm(Main memory supports misaligned loads/stores) 42 43use strict; 44use warnings; 45 46use FindBin qw($Bin); 47use lib "$Bin"; 48use lib "$Bin/../../perlasm"; 49use riscv; 50 51# $output is the last argument if it looks like a file (it has an extension) 52# $flavour is the first argument if it doesn't look like a file 53my $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 54my $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; 55 56$output and open STDOUT,">$output"; 57 58my $code=<<___; 59.text 60___ 61 62################################################################################ 63# void rv64i_zvkb_zvkned_ctr32_encrypt_blocks(const unsigned char *in, 64# unsigned char *out, size_t blocks, 65# const void *key, 66# const unsigned char ivec[16]); 67{ 68my ($INP, $OUTP, $BLOCK_NUM, $KEYP, $IVP) = ("a0", "a1", "a2", "a3", "a4"); 69my ($T0, $T1, $T2, $T3) = ("t0", "t1", "t2", "t3"); 70my ($VL) = ("t4"); 71my ($LEN32) = ("t5"); 72my ($CTR) = ("t6"); 73my ($MASK) = ("v0"); 74my ($V0, $V1, $V2, $V3, $V4, $V5, $V6, $V7, 75 $V8, $V9, $V10, $V11, $V12, $V13, $V14, $V15, 76 $V16, $V17, $V18, $V19, $V20, $V21, $V22, $V23, 77 $V24, $V25, $V26, $V27, $V28, $V29, $V30, $V31, 78) = map("v$_",(0..31)); 79 80# Prepare the AES ctr input data into v16. 81sub init_aes_ctr_input { 82 my $code=<<___; 83 # Setup mask into v0 84 # The mask pattern for 4*N-th elements 85 # mask v0: [000100010001....] 86 # Note: 87 # We could setup the mask just for the maximum element length instead of 88 # the VLMAX. 89 li $T0, 0b10001000 90 @{[vsetvli $T2, "zero", "e8", "m1", "ta", "ma"]} 91 @{[vmv_v_x $MASK, $T0]} 92 # Load IV. 93 # v31:[IV0, IV1, IV2, big-endian count] 94 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} 95 @{[vle32_v $V31, $IVP]} 96 # Convert the big-endian counter into little-endian. 97 @{[vsetivli "zero", 4, "e32", "m1", "ta", "mu"]} 98 @{[vrev8_v $V31, $V31, $MASK]} 99 # Splat the IV to v16 100 @{[vsetvli "zero", $LEN32, "e32", "m4", "ta", "ma"]} 101 @{[vmv_v_i $V16, 0]} 102 @{[vaesz_vs $V16, $V31]} 103 # Prepare the ctr pattern into v20 104 # v20: [x, x, x, 0, x, x, x, 1, x, x, x, 2, ...] 105 @{[viota_m $V20, $MASK, $MASK]} 106 # v16:[IV0, IV1, IV2, count+0, IV0, IV1, IV2, count+1, ...] 107 @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "mu"]} 108 @{[vadd_vv $V16, $V16, $V20, $MASK]} 109___ 110 111 return $code; 112} 113 114$code .= <<___; 115.p2align 3 116.globl rv64i_zvkb_zvkned_ctr32_encrypt_blocks 117.type rv64i_zvkb_zvkned_ctr32_encrypt_blocks,\@function 118rv64i_zvkb_zvkned_ctr32_encrypt_blocks: 119 beqz $BLOCK_NUM, 1f 120 121 # Load number of rounds 122 lwu $T0, 240($KEYP) 123 li $T1, 14 124 li $T2, 12 125 li $T3, 10 126 127 slli $LEN32, $BLOCK_NUM, 2 128 129 beq $T0, $T1, ctr32_encrypt_blocks_256 130 beq $T0, $T2, ctr32_encrypt_blocks_192 131 beq $T0, $T3, ctr32_encrypt_blocks_128 132 1331: 134 ret 135 136.size rv64i_zvkb_zvkned_ctr32_encrypt_blocks,.-rv64i_zvkb_zvkned_ctr32_encrypt_blocks 137___ 138 139$code .= <<___; 140.p2align 3 141ctr32_encrypt_blocks_128: 142 # Load all 11 round keys to v1-v11 registers. 143 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} 144 @{[vle32_v $V1, $KEYP]} 145 addi $KEYP, $KEYP, 16 146 @{[vle32_v $V2, $KEYP]} 147 addi $KEYP, $KEYP, 16 148 @{[vle32_v $V3, $KEYP]} 149 addi $KEYP, $KEYP, 16 150 @{[vle32_v $V4, $KEYP]} 151 addi $KEYP, $KEYP, 16 152 @{[vle32_v $V5, $KEYP]} 153 addi $KEYP, $KEYP, 16 154 @{[vle32_v $V6, $KEYP]} 155 addi $KEYP, $KEYP, 16 156 @{[vle32_v $V7, $KEYP]} 157 addi $KEYP, $KEYP, 16 158 @{[vle32_v $V8, $KEYP]} 159 addi $KEYP, $KEYP, 16 160 @{[vle32_v $V9, $KEYP]} 161 addi $KEYP, $KEYP, 16 162 @{[vle32_v $V10, $KEYP]} 163 addi $KEYP, $KEYP, 16 164 @{[vle32_v $V11, $KEYP]} 165 166 @{[init_aes_ctr_input]} 167 168 ##### AES body 169 j 2f 1701: 171 @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "mu"]} 172 # Increase ctr in v16. 173 @{[vadd_vx $V16, $V16, $CTR, $MASK]} 1742: 175 # Load plaintext into v20 176 @{[vle32_v $V20, $INP]} 177 slli $T0, $VL, 2 178 srli $CTR, $VL, 2 179 sub $LEN32, $LEN32, $VL 180 add $INP, $INP, $T0 181 # Prepare the AES ctr input into v24. 182 # The ctr data uses big-endian form. 183 @{[vmv_v_v $V24, $V16]} 184 @{[vrev8_v $V24, $V24, $MASK]} 185 186 @{[vaesz_vs $V24, $V1]} 187 @{[vaesem_vs $V24, $V2]} 188 @{[vaesem_vs $V24, $V3]} 189 @{[vaesem_vs $V24, $V4]} 190 @{[vaesem_vs $V24, $V5]} 191 @{[vaesem_vs $V24, $V6]} 192 @{[vaesem_vs $V24, $V7]} 193 @{[vaesem_vs $V24, $V8]} 194 @{[vaesem_vs $V24, $V9]} 195 @{[vaesem_vs $V24, $V10]} 196 @{[vaesef_vs $V24, $V11]} 197 198 # ciphertext 199 @{[vxor_vv $V24, $V24, $V20]} 200 201 # Store the ciphertext. 202 @{[vse32_v $V24, $OUTP]} 203 add $OUTP, $OUTP, $T0 204 205 bnez $LEN32, 1b 206 207 ret 208.size ctr32_encrypt_blocks_128,.-ctr32_encrypt_blocks_128 209___ 210 211$code .= <<___; 212.p2align 3 213ctr32_encrypt_blocks_192: 214 # Load all 13 round keys to v1-v13 registers. 215 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} 216 @{[vle32_v $V1, $KEYP]} 217 addi $KEYP, $KEYP, 16 218 @{[vle32_v $V2, $KEYP]} 219 addi $KEYP, $KEYP, 16 220 @{[vle32_v $V3, $KEYP]} 221 addi $KEYP, $KEYP, 16 222 @{[vle32_v $V4, $KEYP]} 223 addi $KEYP, $KEYP, 16 224 @{[vle32_v $V5, $KEYP]} 225 addi $KEYP, $KEYP, 16 226 @{[vle32_v $V6, $KEYP]} 227 addi $KEYP, $KEYP, 16 228 @{[vle32_v $V7, $KEYP]} 229 addi $KEYP, $KEYP, 16 230 @{[vle32_v $V8, $KEYP]} 231 addi $KEYP, $KEYP, 16 232 @{[vle32_v $V9, $KEYP]} 233 addi $KEYP, $KEYP, 16 234 @{[vle32_v $V10, $KEYP]} 235 addi $KEYP, $KEYP, 16 236 @{[vle32_v $V11, $KEYP]} 237 addi $KEYP, $KEYP, 16 238 @{[vle32_v $V12, $KEYP]} 239 addi $KEYP, $KEYP, 16 240 @{[vle32_v $V13, $KEYP]} 241 242 @{[init_aes_ctr_input]} 243 244 ##### AES body 245 j 2f 2461: 247 @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "mu"]} 248 # Increase ctr in v16. 249 @{[vadd_vx $V16, $V16, $CTR, $MASK]} 2502: 251 # Load plaintext into v20 252 @{[vle32_v $V20, $INP]} 253 slli $T0, $VL, 2 254 srli $CTR, $VL, 2 255 sub $LEN32, $LEN32, $VL 256 add $INP, $INP, $T0 257 # Prepare the AES ctr input into v24. 258 # The ctr data uses big-endian form. 259 @{[vmv_v_v $V24, $V16]} 260 @{[vrev8_v $V24, $V24, $MASK]} 261 262 @{[vaesz_vs $V24, $V1]} 263 @{[vaesem_vs $V24, $V2]} 264 @{[vaesem_vs $V24, $V3]} 265 @{[vaesem_vs $V24, $V4]} 266 @{[vaesem_vs $V24, $V5]} 267 @{[vaesem_vs $V24, $V6]} 268 @{[vaesem_vs $V24, $V7]} 269 @{[vaesem_vs $V24, $V8]} 270 @{[vaesem_vs $V24, $V9]} 271 @{[vaesem_vs $V24, $V10]} 272 @{[vaesem_vs $V24, $V11]} 273 @{[vaesem_vs $V24, $V12]} 274 @{[vaesef_vs $V24, $V13]} 275 276 # ciphertext 277 @{[vxor_vv $V24, $V24, $V20]} 278 279 # Store the ciphertext. 280 @{[vse32_v $V24, $OUTP]} 281 add $OUTP, $OUTP, $T0 282 283 bnez $LEN32, 1b 284 285 ret 286.size ctr32_encrypt_blocks_192,.-ctr32_encrypt_blocks_192 287___ 288 289$code .= <<___; 290.p2align 3 291ctr32_encrypt_blocks_256: 292 # Load all 15 round keys to v1-v15 registers. 293 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} 294 @{[vle32_v $V1, $KEYP]} 295 addi $KEYP, $KEYP, 16 296 @{[vle32_v $V2, $KEYP]} 297 addi $KEYP, $KEYP, 16 298 @{[vle32_v $V3, $KEYP]} 299 addi $KEYP, $KEYP, 16 300 @{[vle32_v $V4, $KEYP]} 301 addi $KEYP, $KEYP, 16 302 @{[vle32_v $V5, $KEYP]} 303 addi $KEYP, $KEYP, 16 304 @{[vle32_v $V6, $KEYP]} 305 addi $KEYP, $KEYP, 16 306 @{[vle32_v $V7, $KEYP]} 307 addi $KEYP, $KEYP, 16 308 @{[vle32_v $V8, $KEYP]} 309 addi $KEYP, $KEYP, 16 310 @{[vle32_v $V9, $KEYP]} 311 addi $KEYP, $KEYP, 16 312 @{[vle32_v $V10, $KEYP]} 313 addi $KEYP, $KEYP, 16 314 @{[vle32_v $V11, $KEYP]} 315 addi $KEYP, $KEYP, 16 316 @{[vle32_v $V12, $KEYP]} 317 addi $KEYP, $KEYP, 16 318 @{[vle32_v $V13, $KEYP]} 319 addi $KEYP, $KEYP, 16 320 @{[vle32_v $V14, $KEYP]} 321 addi $KEYP, $KEYP, 16 322 @{[vle32_v $V15, $KEYP]} 323 324 @{[init_aes_ctr_input]} 325 326 ##### AES body 327 j 2f 3281: 329 @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "mu"]} 330 # Increase ctr in v16. 331 @{[vadd_vx $V16, $V16, $CTR, $MASK]} 3322: 333 # Load plaintext into v20 334 @{[vle32_v $V20, $INP]} 335 slli $T0, $VL, 2 336 srli $CTR, $VL, 2 337 sub $LEN32, $LEN32, $VL 338 add $INP, $INP, $T0 339 # Prepare the AES ctr input into v24. 340 # The ctr data uses big-endian form. 341 @{[vmv_v_v $V24, $V16]} 342 @{[vrev8_v $V24, $V24, $MASK]} 343 344 @{[vaesz_vs $V24, $V1]} 345 @{[vaesem_vs $V24, $V2]} 346 @{[vaesem_vs $V24, $V3]} 347 @{[vaesem_vs $V24, $V4]} 348 @{[vaesem_vs $V24, $V5]} 349 @{[vaesem_vs $V24, $V6]} 350 @{[vaesem_vs $V24, $V7]} 351 @{[vaesem_vs $V24, $V8]} 352 @{[vaesem_vs $V24, $V9]} 353 @{[vaesem_vs $V24, $V10]} 354 @{[vaesem_vs $V24, $V11]} 355 @{[vaesem_vs $V24, $V12]} 356 @{[vaesem_vs $V24, $V13]} 357 @{[vaesem_vs $V24, $V14]} 358 @{[vaesef_vs $V24, $V15]} 359 360 # ciphertext 361 @{[vxor_vv $V24, $V24, $V20]} 362 363 # Store the ciphertext. 364 @{[vse32_v $V24, $OUTP]} 365 add $OUTP, $OUTP, $T0 366 367 bnez $LEN32, 1b 368 369 ret 370.size ctr32_encrypt_blocks_256,.-ctr32_encrypt_blocks_256 371___ 372} 373 374print $code; 375 376close STDOUT or die "error closing STDOUT: $!"; 377