1#! /usr/bin/env perl 2# This file is dual-licensed, meaning that you can use it under your 3# choice of either of the following two licenses: 4# 5# Copyright 2023 The OpenSSL Project Authors. All Rights Reserved. 6# 7# Licensed under the Apache License 2.0 (the "License"). You can obtain 8# a copy in the file LICENSE in the source distribution or at 9# https://www.openssl.org/source/license.html 10# 11# or 12# 13# Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com> 14# All rights reserved. 15# 16# Redistribution and use in source and binary forms, with or without 17# modification, are permitted provided that the following conditions 18# are met: 19# 1. Redistributions of source code must retain the above copyright 20# notice, this list of conditions and the following disclaimer. 21# 2. Redistributions in binary form must reproduce the above copyright 22# notice, this list of conditions and the following disclaimer in the 23# documentation and/or other materials provided with the distribution. 24# 25# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 37# - RV64I 38# - RISC-V Vector ('V') with VLEN >= 128 39# - RISC-V Vector Bit-manipulation extension ('Zvbb') 40# - RISC-V Vector GCM/GMAC extension ('Zvkg') 41# - RISC-V Vector AES block cipher extension ('Zvkned') 42# - RISC-V Zicclsm(Main memory supports misaligned loads/stores) 43 44use strict; 45use warnings; 46 47use FindBin qw($Bin); 48use lib "$Bin"; 49use lib "$Bin/../../perlasm"; 50use riscv; 51 52# $output is the last argument if it looks like a file (it has an extension) 53# $flavour is the first argument if it doesn't look like a file 54my $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 55my $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; 56 57$output and open STDOUT,">$output"; 58 59my $code=<<___; 60.text 61___ 62 63{ 64################################################################################ 65# void rv64i_zvbb_zvkg_zvkned_aes_xts_encrypt(const unsigned char *in, 66# unsigned char *out, size_t length, 67# const AES_KEY *key1, 68# const AES_KEY *key2, 69# const unsigned char iv[16]) 70my ($INPUT, $OUTPUT, $LENGTH, $KEY1, $KEY2, $IV) = ("a0", "a1", "a2", "a3", "a4", "a5"); 71my ($TAIL_LENGTH) = ("a6"); 72my ($VL) = ("a7"); 73my ($T0, $T1, $T2) = ("t0", "t1", "t2"); 74my ($STORE_LEN32) = ("t3"); 75my ($LEN32) = ("t4"); 76my ($V0, $V1, $V2, $V3, $V4, $V5, $V6, $V7, 77 $V8, $V9, $V10, $V11, $V12, $V13, $V14, $V15, 78 $V16, $V17, $V18, $V19, $V20, $V21, $V22, $V23, 79 $V24, $V25, $V26, $V27, $V28, $V29, $V30, $V31, 80) = map("v$_",(0..31)); 81 82sub compute_xts_iv0 { 83 my $code=<<___; 84 # Load number of rounds 85 lwu $T0, 240($KEY2) 86 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} 87 @{[vle32_v $V28, $IV]} 88 @{[vle32_v $V29, $KEY2]} 89 @{[vaesz_vs $V28, $V29]} 90 addi $T0, $T0, -1 91 addi $KEY2, $KEY2, 16 921: 93 @{[vle32_v $V29, $KEY2]} 94 @{[vaesem_vs $V28, $V29]} 95 addi $T0, $T0, -1 96 addi $KEY2, $KEY2, 16 97 bnez $T0, 1b 98 @{[vle32_v $V29, $KEY2]} 99 @{[vaesef_vs $V28, $V29]} 100___ 101 102 return $code; 103} 104 105# prepare input data(v24), iv(v28), bit-reversed-iv(v16), bit-reversed-iv-multiplier(v20) 106sub init_first_round { 107 my $code=<<___; 108 # load input 109 @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "ma"]} 110 @{[vle32_v $V24, $INPUT]} 111 112 li $T0, 5 113 # We could simplify the initialization steps if we have `block<=1`. 114 blt $LEN32, $T0, 1f 115 116 # Note: We use `vgmul` for GF(2^128) multiplication. The `vgmul` uses 117 # different order of coefficients. We should use`vbrev8` to reverse the 118 # data when we use `vgmul`. 119 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} 120 @{[vbrev8_v $V0, $V28]} 121 @{[vsetvli "zero", $LEN32, "e32", "m4", "ta", "ma"]} 122 @{[vmv_v_i $V16, 0]} 123 # v16: [r-IV0, r-IV0, ...] 124 @{[vaesz_vs $V16, $V0]} 125 126 # Prepare GF(2^128) multiplier [1, x, x^2, x^3, ...] in v8. 127 slli $T0, $LEN32, 2 128 @{[vsetvli "zero", $T0, "e32", "m1", "ta", "ma"]} 129 # v2: [`1`, `1`, `1`, `1`, ...] 130 @{[vmv_v_i $V2, 1]} 131 # v3: [`0`, `1`, `2`, `3`, ...] 132 @{[vid_v $V3]} 133 @{[vsetvli "zero", $T0, "e64", "m2", "ta", "ma"]} 134 # v4: [`1`, 0, `1`, 0, `1`, 0, `1`, 0, ...] 135 @{[vzext_vf2 $V4, $V2]} 136 # v6: [`0`, 0, `1`, 0, `2`, 0, `3`, 0, ...] 137 @{[vzext_vf2 $V6, $V3]} 138 slli $T0, $LEN32, 1 139 @{[vsetvli "zero", $T0, "e32", "m2", "ta", "ma"]} 140 # v8: [1<<0=1, 0, 0, 0, 1<<1=x, 0, 0, 0, 1<<2=x^2, 0, 0, 0, ...] 141 @{[vwsll_vv $V8, $V4, $V6]} 142 143 # Compute [r-IV0*1, r-IV0*x, r-IV0*x^2, r-IV0*x^3, ...] in v16 144 @{[vsetvli "zero", $LEN32, "e32", "m4", "ta", "ma"]} 145 @{[vbrev8_v $V8, $V8]} 146 @{[vgmul_vv $V16, $V8]} 147 148 # Compute [IV0*1, IV0*x, IV0*x^2, IV0*x^3, ...] in v28. 149 # Reverse the bits order back. 150 @{[vbrev8_v $V28, $V16]} 151 152 # Prepare the x^n multiplier in v20. The `n` is the aes-xts block number 153 # in a LMUL=4 register group. 154 # n = ((VLEN*LMUL)/(32*4)) = ((VLEN*4)/(32*4)) 155 # = (VLEN/32) 156 # We could use vsetvli with `e32, m1` to compute the `n` number. 157 @{[vsetvli $T0, "zero", "e32", "m1", "ta", "ma"]} 158 li $T1, 1 159 sll $T0, $T1, $T0 160 @{[vsetivli "zero", 2, "e64", "m1", "ta", "ma"]} 161 @{[vmv_v_i $V0, 0]} 162 @{[vsetivli "zero", 1, "e64", "m1", "tu", "ma"]} 163 @{[vmv_v_x $V0, $T0]} 164 @{[vsetivli "zero", 2, "e64", "m1", "ta", "ma"]} 165 @{[vbrev8_v $V0, $V0]} 166 @{[vsetvli "zero", $LEN32, "e32", "m4", "ta", "ma"]} 167 @{[vmv_v_i $V20, 0]} 168 @{[vaesz_vs $V20, $V0]} 169 170 j 2f 1711: 172 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} 173 @{[vbrev8_v $V16, $V28]} 1742: 175___ 176 177 return $code; 178} 179 180# prepare xts enc last block's input(v24) and iv(v28) 181sub handle_xts_enc_last_block { 182 my $code=<<___; 183 bnez $TAIL_LENGTH, 1f 184 ret 1851: 186 # slidedown second to last block 187 addi $VL, $VL, -4 188 @{[vsetivli "zero", 4, "e32", "m4", "ta", "ma"]} 189 # ciphertext 190 @{[vslidedown_vx $V24, $V24, $VL]} 191 # multiplier 192 @{[vslidedown_vx $V16, $V16, $VL]} 193 194 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} 195 @{[vmv_v_v $V25, $V24]} 196 197 # load last block into v24 198 # note: We should load the last block before store the second to last block 199 # for in-place operation. 200 @{[vsetvli "zero", $TAIL_LENGTH, "e8", "m1", "tu", "ma"]} 201 @{[vle8_v $V24, $INPUT]} 202 203 # setup `x` multiplier with byte-reversed order 204 # 0b00000010 => 0b01000000 (0x40) 205 li $T0, 0x40 206 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} 207 @{[vmv_v_i $V28, 0]} 208 @{[vsetivli "zero", 1, "e8", "m1", "tu", "ma"]} 209 @{[vmv_v_x $V28, $T0]} 210 211 # compute IV for last block 212 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} 213 @{[vgmul_vv $V16, $V28]} 214 @{[vbrev8_v $V28, $V16]} 215 216 # store second to last block 217 @{[vsetvli "zero", $TAIL_LENGTH, "e8", "m1", "ta", "ma"]} 218 @{[vse8_v $V25, $OUTPUT]} 219___ 220 221 return $code; 222} 223 224# prepare xts dec second to last block's input(v24) and iv(v29) and 225# last block's and iv(v28) 226sub handle_xts_dec_last_block { 227 my $code=<<___; 228 bnez $TAIL_LENGTH, 1f 229 ret 2301: 231 # load second to last block's ciphertext 232 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} 233 @{[vle32_v $V24, $INPUT]} 234 addi $INPUT, $INPUT, 16 235 236 # setup `x` multiplier with byte-reversed order 237 # 0b00000010 => 0b01000000 (0x40) 238 li $T0, 0x40 239 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} 240 @{[vmv_v_i $V20, 0]} 241 @{[vsetivli "zero", 1, "e8", "m1", "tu", "ma"]} 242 @{[vmv_v_x $V20, $T0]} 243 244 beqz $LENGTH, 1f 245 # slidedown third to last block 246 addi $VL, $VL, -4 247 @{[vsetivli "zero", 4, "e32", "m4", "ta", "ma"]} 248 # multiplier 249 @{[vslidedown_vx $V16, $V16, $VL]} 250 251 # compute IV for last block 252 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} 253 @{[vgmul_vv $V16, $V20]} 254 @{[vbrev8_v $V28, $V16]} 255 256 # compute IV for second to last block 257 @{[vgmul_vv $V16, $V20]} 258 @{[vbrev8_v $V29, $V16]} 259 j 2f 2601: 261 # compute IV for second to last block 262 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} 263 @{[vgmul_vv $V16, $V20]} 264 @{[vbrev8_v $V29, $V16]} 2652: 266___ 267 268 return $code; 269} 270 271# Load all 11 round keys to v1-v11 registers. 272sub aes_128_load_key { 273 my $code=<<___; 274 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} 275 @{[vle32_v $V1, $KEY1]} 276 addi $KEY1, $KEY1, 16 277 @{[vle32_v $V2, $KEY1]} 278 addi $KEY1, $KEY1, 16 279 @{[vle32_v $V3, $KEY1]} 280 addi $KEY1, $KEY1, 16 281 @{[vle32_v $V4, $KEY1]} 282 addi $KEY1, $KEY1, 16 283 @{[vle32_v $V5, $KEY1]} 284 addi $KEY1, $KEY1, 16 285 @{[vle32_v $V6, $KEY1]} 286 addi $KEY1, $KEY1, 16 287 @{[vle32_v $V7, $KEY1]} 288 addi $KEY1, $KEY1, 16 289 @{[vle32_v $V8, $KEY1]} 290 addi $KEY1, $KEY1, 16 291 @{[vle32_v $V9, $KEY1]} 292 addi $KEY1, $KEY1, 16 293 @{[vle32_v $V10, $KEY1]} 294 addi $KEY1, $KEY1, 16 295 @{[vle32_v $V11, $KEY1]} 296___ 297 298 return $code; 299} 300 301# Load all 15 round keys to v1-v15 registers. 302sub aes_256_load_key { 303 my $code=<<___; 304 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} 305 @{[vle32_v $V1, $KEY1]} 306 addi $KEY1, $KEY1, 16 307 @{[vle32_v $V2, $KEY1]} 308 addi $KEY1, $KEY1, 16 309 @{[vle32_v $V3, $KEY1]} 310 addi $KEY1, $KEY1, 16 311 @{[vle32_v $V4, $KEY1]} 312 addi $KEY1, $KEY1, 16 313 @{[vle32_v $V5, $KEY1]} 314 addi $KEY1, $KEY1, 16 315 @{[vle32_v $V6, $KEY1]} 316 addi $KEY1, $KEY1, 16 317 @{[vle32_v $V7, $KEY1]} 318 addi $KEY1, $KEY1, 16 319 @{[vle32_v $V8, $KEY1]} 320 addi $KEY1, $KEY1, 16 321 @{[vle32_v $V9, $KEY1]} 322 addi $KEY1, $KEY1, 16 323 @{[vle32_v $V10, $KEY1]} 324 addi $KEY1, $KEY1, 16 325 @{[vle32_v $V11, $KEY1]} 326 addi $KEY1, $KEY1, 16 327 @{[vle32_v $V12, $KEY1]} 328 addi $KEY1, $KEY1, 16 329 @{[vle32_v $V13, $KEY1]} 330 addi $KEY1, $KEY1, 16 331 @{[vle32_v $V14, $KEY1]} 332 addi $KEY1, $KEY1, 16 333 @{[vle32_v $V15, $KEY1]} 334___ 335 336 return $code; 337} 338 339# aes-128 enc with round keys v1-v11 340sub aes_128_enc { 341 my $code=<<___; 342 @{[vaesz_vs $V24, $V1]} 343 @{[vaesem_vs $V24, $V2]} 344 @{[vaesem_vs $V24, $V3]} 345 @{[vaesem_vs $V24, $V4]} 346 @{[vaesem_vs $V24, $V5]} 347 @{[vaesem_vs $V24, $V6]} 348 @{[vaesem_vs $V24, $V7]} 349 @{[vaesem_vs $V24, $V8]} 350 @{[vaesem_vs $V24, $V9]} 351 @{[vaesem_vs $V24, $V10]} 352 @{[vaesef_vs $V24, $V11]} 353___ 354 355 return $code; 356} 357 358# aes-128 dec with round keys v1-v11 359sub aes_128_dec { 360 my $code=<<___; 361 @{[vaesz_vs $V24, $V11]} 362 @{[vaesdm_vs $V24, $V10]} 363 @{[vaesdm_vs $V24, $V9]} 364 @{[vaesdm_vs $V24, $V8]} 365 @{[vaesdm_vs $V24, $V7]} 366 @{[vaesdm_vs $V24, $V6]} 367 @{[vaesdm_vs $V24, $V5]} 368 @{[vaesdm_vs $V24, $V4]} 369 @{[vaesdm_vs $V24, $V3]} 370 @{[vaesdm_vs $V24, $V2]} 371 @{[vaesdf_vs $V24, $V1]} 372___ 373 374 return $code; 375} 376 377# aes-256 enc with round keys v1-v15 378sub aes_256_enc { 379 my $code=<<___; 380 @{[vaesz_vs $V24, $V1]} 381 @{[vaesem_vs $V24, $V2]} 382 @{[vaesem_vs $V24, $V3]} 383 @{[vaesem_vs $V24, $V4]} 384 @{[vaesem_vs $V24, $V5]} 385 @{[vaesem_vs $V24, $V6]} 386 @{[vaesem_vs $V24, $V7]} 387 @{[vaesem_vs $V24, $V8]} 388 @{[vaesem_vs $V24, $V9]} 389 @{[vaesem_vs $V24, $V10]} 390 @{[vaesem_vs $V24, $V11]} 391 @{[vaesem_vs $V24, $V12]} 392 @{[vaesem_vs $V24, $V13]} 393 @{[vaesem_vs $V24, $V14]} 394 @{[vaesef_vs $V24, $V15]} 395___ 396 397 return $code; 398} 399 400# aes-256 dec with round keys v1-v15 401sub aes_256_dec { 402 my $code=<<___; 403 @{[vaesz_vs $V24, $V15]} 404 @{[vaesdm_vs $V24, $V14]} 405 @{[vaesdm_vs $V24, $V13]} 406 @{[vaesdm_vs $V24, $V12]} 407 @{[vaesdm_vs $V24, $V11]} 408 @{[vaesdm_vs $V24, $V10]} 409 @{[vaesdm_vs $V24, $V9]} 410 @{[vaesdm_vs $V24, $V8]} 411 @{[vaesdm_vs $V24, $V7]} 412 @{[vaesdm_vs $V24, $V6]} 413 @{[vaesdm_vs $V24, $V5]} 414 @{[vaesdm_vs $V24, $V4]} 415 @{[vaesdm_vs $V24, $V3]} 416 @{[vaesdm_vs $V24, $V2]} 417 @{[vaesdf_vs $V24, $V1]} 418___ 419 420 return $code; 421} 422 423$code .= <<___; 424.p2align 3 425.globl rv64i_zvbb_zvkg_zvkned_aes_xts_encrypt 426.type rv64i_zvbb_zvkg_zvkned_aes_xts_encrypt,\@function 427rv64i_zvbb_zvkg_zvkned_aes_xts_encrypt: 428 @{[compute_xts_iv0]} 429 430 # aes block size is 16 431 andi $TAIL_LENGTH, $LENGTH, 15 432 mv $STORE_LEN32, $LENGTH 433 beqz $TAIL_LENGTH, 1f 434 sub $LENGTH, $LENGTH, $TAIL_LENGTH 435 addi $STORE_LEN32, $LENGTH, -16 4361: 437 # We make the `LENGTH` become e32 length here. 438 srli $LEN32, $LENGTH, 2 439 srli $STORE_LEN32, $STORE_LEN32, 2 440 441 # Load number of rounds 442 lwu $T0, 240($KEY1) 443 li $T1, 14 444 li $T2, 10 445 beq $T0, $T1, aes_xts_enc_256 446 beq $T0, $T2, aes_xts_enc_128 447.size rv64i_zvbb_zvkg_zvkned_aes_xts_encrypt,.-rv64i_zvbb_zvkg_zvkned_aes_xts_encrypt 448___ 449 450$code .= <<___; 451.p2align 3 452aes_xts_enc_128: 453 @{[init_first_round]} 454 @{[aes_128_load_key]} 455 456 @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "ma"]} 457 j 1f 458 459.Lenc_blocks_128: 460 @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "ma"]} 461 # load plaintext into v24 462 @{[vle32_v $V24, $INPUT]} 463 # update iv 464 @{[vgmul_vv $V16, $V20]} 465 # reverse the iv's bits order back 466 @{[vbrev8_v $V28, $V16]} 4671: 468 @{[vxor_vv $V24, $V24, $V28]} 469 slli $T0, $VL, 2 470 sub $LEN32, $LEN32, $VL 471 add $INPUT, $INPUT, $T0 472 @{[aes_128_enc]} 473 @{[vxor_vv $V24, $V24, $V28]} 474 475 # store ciphertext 476 @{[vsetvli "zero", $STORE_LEN32, "e32", "m4", "ta", "ma"]} 477 @{[vse32_v $V24, $OUTPUT]} 478 add $OUTPUT, $OUTPUT, $T0 479 sub $STORE_LEN32, $STORE_LEN32, $VL 480 481 bnez $LEN32, .Lenc_blocks_128 482 483 @{[handle_xts_enc_last_block]} 484 485 # xts last block 486 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} 487 @{[vxor_vv $V24, $V24, $V28]} 488 @{[aes_128_enc]} 489 @{[vxor_vv $V24, $V24, $V28]} 490 491 # store last block ciphertext 492 addi $OUTPUT, $OUTPUT, -16 493 @{[vse32_v $V24, $OUTPUT]} 494 495 ret 496.size aes_xts_enc_128,.-aes_xts_enc_128 497___ 498 499$code .= <<___; 500.p2align 3 501aes_xts_enc_256: 502 @{[init_first_round]} 503 @{[aes_256_load_key]} 504 505 @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "ma"]} 506 j 1f 507 508.Lenc_blocks_256: 509 @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "ma"]} 510 # load plaintext into v24 511 @{[vle32_v $V24, $INPUT]} 512 # update iv 513 @{[vgmul_vv $V16, $V20]} 514 # reverse the iv's bits order back 515 @{[vbrev8_v $V28, $V16]} 5161: 517 @{[vxor_vv $V24, $V24, $V28]} 518 slli $T0, $VL, 2 519 sub $LEN32, $LEN32, $VL 520 add $INPUT, $INPUT, $T0 521 @{[aes_256_enc]} 522 @{[vxor_vv $V24, $V24, $V28]} 523 524 # store ciphertext 525 @{[vsetvli "zero", $STORE_LEN32, "e32", "m4", "ta", "ma"]} 526 @{[vse32_v $V24, $OUTPUT]} 527 add $OUTPUT, $OUTPUT, $T0 528 sub $STORE_LEN32, $STORE_LEN32, $VL 529 530 bnez $LEN32, .Lenc_blocks_256 531 532 @{[handle_xts_enc_last_block]} 533 534 # xts last block 535 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} 536 @{[vxor_vv $V24, $V24, $V28]} 537 @{[aes_256_enc]} 538 @{[vxor_vv $V24, $V24, $V28]} 539 540 # store last block ciphertext 541 addi $OUTPUT, $OUTPUT, -16 542 @{[vse32_v $V24, $OUTPUT]} 543 544 ret 545.size aes_xts_enc_256,.-aes_xts_enc_256 546___ 547 548################################################################################ 549# void rv64i_zvbb_zvkg_zvkned_aes_xts_decrypt(const unsigned char *in, 550# unsigned char *out, size_t length, 551# const AES_KEY *key1, 552# const AES_KEY *key2, 553# const unsigned char iv[16]) 554$code .= <<___; 555.p2align 3 556.globl rv64i_zvbb_zvkg_zvkned_aes_xts_decrypt 557.type rv64i_zvbb_zvkg_zvkned_aes_xts_decrypt,\@function 558rv64i_zvbb_zvkg_zvkned_aes_xts_decrypt: 559 @{[compute_xts_iv0]} 560 561 # aes block size is 16 562 andi $TAIL_LENGTH, $LENGTH, 15 563 beqz $TAIL_LENGTH, 1f 564 sub $LENGTH, $LENGTH, $TAIL_LENGTH 565 addi $LENGTH, $LENGTH, -16 5661: 567 # We make the `LENGTH` become e32 length here. 568 srli $LEN32, $LENGTH, 2 569 570 # Load number of rounds 571 lwu $T0, 240($KEY1) 572 li $T1, 14 573 li $T2, 10 574 beq $T0, $T1, aes_xts_dec_256 575 beq $T0, $T2, aes_xts_dec_128 576.size rv64i_zvbb_zvkg_zvkned_aes_xts_decrypt,.-rv64i_zvbb_zvkg_zvkned_aes_xts_decrypt 577___ 578 579$code .= <<___; 580.p2align 3 581aes_xts_dec_128: 582 @{[init_first_round]} 583 @{[aes_128_load_key]} 584 585 beqz $LEN32, 2f 586 587 @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "ma"]} 588 j 1f 589 590.Ldec_blocks_128: 591 @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "ma"]} 592 # load ciphertext into v24 593 @{[vle32_v $V24, $INPUT]} 594 # update iv 595 @{[vgmul_vv $V16, $V20]} 596 # reverse the iv's bits order back 597 @{[vbrev8_v $V28, $V16]} 5981: 599 @{[vxor_vv $V24, $V24, $V28]} 600 slli $T0, $VL, 2 601 sub $LEN32, $LEN32, $VL 602 add $INPUT, $INPUT, $T0 603 @{[aes_128_dec]} 604 @{[vxor_vv $V24, $V24, $V28]} 605 606 # store plaintext 607 @{[vse32_v $V24, $OUTPUT]} 608 add $OUTPUT, $OUTPUT, $T0 609 610 bnez $LEN32, .Ldec_blocks_128 611 6122: 613 @{[handle_xts_dec_last_block]} 614 615 ## xts second to last block 616 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} 617 @{[vxor_vv $V24, $V24, $V29]} 618 @{[aes_128_dec]} 619 @{[vxor_vv $V24, $V24, $V29]} 620 @{[vmv_v_v $V25, $V24]} 621 622 # load last block ciphertext 623 @{[vsetvli "zero", $TAIL_LENGTH, "e8", "m1", "tu", "ma"]} 624 @{[vle8_v $V24, $INPUT]} 625 626 # store second to last block plaintext 627 addi $T0, $OUTPUT, 16 628 @{[vse8_v $V25, $T0]} 629 630 ## xts last block 631 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} 632 @{[vxor_vv $V24, $V24, $V28]} 633 @{[aes_128_dec]} 634 @{[vxor_vv $V24, $V24, $V28]} 635 636 # store second to last block plaintext 637 @{[vse32_v $V24, $OUTPUT]} 638 639 ret 640.size aes_xts_dec_128,.-aes_xts_dec_128 641___ 642 643$code .= <<___; 644.p2align 3 645aes_xts_dec_256: 646 @{[init_first_round]} 647 @{[aes_256_load_key]} 648 649 beqz $LEN32, 2f 650 651 @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "ma"]} 652 j 1f 653 654.Ldec_blocks_256: 655 @{[vsetvli $VL, $LEN32, "e32", "m4", "ta", "ma"]} 656 # load ciphertext into v24 657 @{[vle32_v $V24, $INPUT]} 658 # update iv 659 @{[vgmul_vv $V16, $V20]} 660 # reverse the iv's bits order back 661 @{[vbrev8_v $V28, $V16]} 6621: 663 @{[vxor_vv $V24, $V24, $V28]} 664 slli $T0, $VL, 2 665 sub $LEN32, $LEN32, $VL 666 add $INPUT, $INPUT, $T0 667 @{[aes_256_dec]} 668 @{[vxor_vv $V24, $V24, $V28]} 669 670 # store plaintext 671 @{[vse32_v $V24, $OUTPUT]} 672 add $OUTPUT, $OUTPUT, $T0 673 674 bnez $LEN32, .Ldec_blocks_256 675 6762: 677 @{[handle_xts_dec_last_block]} 678 679 ## xts second to last block 680 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} 681 @{[vxor_vv $V24, $V24, $V29]} 682 @{[aes_256_dec]} 683 @{[vxor_vv $V24, $V24, $V29]} 684 @{[vmv_v_v $V25, $V24]} 685 686 # load last block ciphertext 687 @{[vsetvli "zero", $TAIL_LENGTH, "e8", "m1", "tu", "ma"]} 688 @{[vle8_v $V24, $INPUT]} 689 690 # store second to last block plaintext 691 addi $T0, $OUTPUT, 16 692 @{[vse8_v $V25, $T0]} 693 694 ## xts last block 695 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} 696 @{[vxor_vv $V24, $V24, $V28]} 697 @{[aes_256_dec]} 698 @{[vxor_vv $V24, $V24, $V28]} 699 700 # store second to last block plaintext 701 @{[vse32_v $V24, $OUTPUT]} 702 703 ret 704.size aes_xts_dec_256,.-aes_xts_dec_256 705___ 706} 707 708print $code; 709 710close STDOUT or die "error closing STDOUT: $!"; 711