1#! /usr/bin/env perl 2# This file is dual-licensed, meaning that you can use it under your 3# choice of either of the following two licenses: 4# 5# Copyright 2023 The OpenSSL Project Authors. All Rights Reserved. 6# 7# Licensed under the Apache License 2.0 (the "License"). You can obtain 8# a copy in the file LICENSE in the source distribution or at 9# https://www.openssl.org/source/license.html 10# 11# or 12# 13# Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu> 14# Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com> 15# All rights reserved. 16# 17# Redistribution and use in source and binary forms, with or without 18# modification, are permitted provided that the following conditions 19# are met: 20# 1. Redistributions of source code must retain the above copyright 21# notice, this list of conditions and the following disclaimer. 22# 2. Redistributions in binary form must reproduce the above copyright 23# notice, this list of conditions and the following disclaimer in the 24# documentation and/or other materials provided with the distribution. 25# 26# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 27# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 28# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 29# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 30# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 31# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 32# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 33# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 34# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 35# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 36# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 37 38# The generated code of this file depends on the following RISC-V extensions: 39# - RV64I 40# - RISC-V Vector ('V') with VLEN >= 128 41# - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb') 42# - RISC-V Vector SM3 Secure Hash extension ('Zvksh') 43 44use strict; 45use warnings; 46 47use FindBin qw($Bin); 48use lib "$Bin"; 49use lib "$Bin/../../perlasm"; 50use riscv; 51 52# $output is the last argument if it looks like a file (it has an extension) 53# $flavour is the first argument if it doesn't look like a file 54my $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 55my $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; 56 57$output and open STDOUT,">$output"; 58 59my $code=<<___; 60.text 61___ 62 63################################################################################ 64# ossl_hwsm3_block_data_order_zvksh(SM3_CTX *c, const void *p, size_t num); 65{ 66my ($CTX, $INPUT, $NUM) = ("a0", "a1", "a2"); 67my ($V0, $V1, $V2, $V3, $V4, $V5, $V6, $V7, 68 $V8, $V9, $V10, $V11, $V12, $V13, $V14, $V15, 69 $V16, $V17, $V18, $V19, $V20, $V21, $V22, $V23, 70 $V24, $V25, $V26, $V27, $V28, $V29, $V30, $V31, 71) = map("v$_",(0..31)); 72 73$code .= <<___; 74.text 75.p2align 3 76.globl ossl_hwsm3_block_data_order_zvksh 77.type ossl_hwsm3_block_data_order_zvksh,\@function 78ossl_hwsm3_block_data_order_zvksh: 79 @{[vsetivli "zero", 8, "e32", "m2", "ta", "ma"]} 80 81 # Load initial state of hash context (c->A-H). 82 @{[vle32_v $V0, $CTX]} 83 @{[vrev8_v $V0, $V0]} 84 85L_sm3_loop: 86 # Copy the previous state to v2. 87 # It will be XOR'ed with the current state at the end of the round. 88 @{[vmv_v_v $V2, $V0]} 89 90 # Load the 64B block in 2x32B chunks. 91 @{[vle32_v $V6, $INPUT]} # v6 := {w7, ..., w0} 92 addi $INPUT, $INPUT, 32 93 94 @{[vle32_v $V8, $INPUT]} # v8 := {w15, ..., w8} 95 addi $INPUT, $INPUT, 32 96 97 addi $NUM, $NUM, -1 98 99 # As vsm3c consumes only w0, w1, w4, w5 we need to slide the input 100 # 2 elements down so we process elements w2, w3, w6, w7 101 # This will be repeated for each odd round. 102 @{[vslidedown_vi $V4, $V6, 2]} # v4 := {X, X, w7, ..., w2} 103 104 @{[vsm3c_vi $V0, $V6, 0]} 105 @{[vsm3c_vi $V0, $V4, 1]} 106 107 # Prepare a vector with {w11, ..., w4} 108 @{[vslidedown_vi $V4, $V4, 2]} # v4 := {X, X, X, X, w7, ..., w4} 109 @{[vslideup_vi $V4, $V8, 4]} # v4 := {w11, w10, w9, w8, w7, w6, w5, w4} 110 111 @{[vsm3c_vi $V0, $V4, 2]} 112 @{[vslidedown_vi $V4, $V4, 2]} # v4 := {X, X, w11, w10, w9, w8, w7, w6} 113 @{[vsm3c_vi $V0, $V4, 3]} 114 115 @{[vsm3c_vi $V0, $V8, 4]} 116 @{[vslidedown_vi $V4, $V8, 2]} # v4 := {X, X, w15, w14, w13, w12, w11, w10} 117 @{[vsm3c_vi $V0, $V4, 5]} 118 119 @{[vsm3me_vv $V6, $V8, $V6]} # v6 := {w23, w22, w21, w20, w19, w18, w17, w16} 120 121 # Prepare a register with {w19, w18, w17, w16, w15, w14, w13, w12} 122 @{[vslidedown_vi $V4, $V4, 2]} # v4 := {X, X, X, X, w15, w14, w13, w12} 123 @{[vslideup_vi $V4, $V6, 4]} # v4 := {w19, w18, w17, w16, w15, w14, w13, w12} 124 125 @{[vsm3c_vi $V0, $V4, 6]} 126 @{[vslidedown_vi $V4, $V4, 2]} # v4 := {X, X, w19, w18, w17, w16, w15, w14} 127 @{[vsm3c_vi $V0, $V4, 7]} 128 129 @{[vsm3c_vi $V0, $V6, 8]} 130 @{[vslidedown_vi $V4, $V6, 2]} # v4 := {X, X, w23, w22, w21, w20, w19, w18} 131 @{[vsm3c_vi $V0, $V4, 9]} 132 133 @{[vsm3me_vv $V8, $V6, $V8]} # v8 := {w31, w30, w29, w28, w27, w26, w25, w24} 134 135 # Prepare a register with {w27, w26, w25, w24, w23, w22, w21, w20} 136 @{[vslidedown_vi $V4, $V4, 2]} # v4 := {X, X, X, X, w23, w22, w21, w20} 137 @{[vslideup_vi $V4, $V8, 4]} # v4 := {w27, w26, w25, w24, w23, w22, w21, w20} 138 139 @{[vsm3c_vi $V0, $V4, 10]} 140 @{[vslidedown_vi $V4, $V4, 2]} # v4 := {X, X, w27, w26, w25, w24, w23, w22} 141 @{[vsm3c_vi $V0, $V4, 11]} 142 143 @{[vsm3c_vi $V0, $V8, 12]} 144 @{[vslidedown_vi $V4, $V8, 2]} # v4 := {x, X, w31, w30, w29, w28, w27, w26} 145 @{[vsm3c_vi $V0, $V4, 13]} 146 147 @{[vsm3me_vv $V6, $V8, $V6]} # v6 := {w32, w33, w34, w35, w36, w37, w38, w39} 148 149 # Prepare a register with {w35, w34, w33, w32, w31, w30, w29, w28} 150 @{[vslidedown_vi $V4, $V4, 2]} # v4 := {X, X, X, X, w31, w30, w29, w28} 151 @{[vslideup_vi $V4, $V6, 4]} # v4 := {w35, w34, w33, w32, w31, w30, w29, w28} 152 153 @{[vsm3c_vi $V0, $V4, 14]} 154 @{[vslidedown_vi $V4, $V4, 2]} # v4 := {X, X, w35, w34, w33, w32, w31, w30} 155 @{[vsm3c_vi $V0, $V4, 15]} 156 157 @{[vsm3c_vi $V0, $V6, 16]} 158 @{[vslidedown_vi $V4, $V6, 2]} # v4 := {X, X, w39, w38, w37, w36, w35, w34} 159 @{[vsm3c_vi $V0, $V4, 17]} 160 161 @{[vsm3me_vv $V8, $V6, $V8]} # v8 := {w47, w46, w45, w44, w43, w42, w41, w40} 162 163 # Prepare a register with {w43, w42, w41, w40, w39, w38, w37, w36} 164 @{[vslidedown_vi $V4, $V4, 2]} # v4 := {X, X, X, X, w39, w38, w37, w36} 165 @{[vslideup_vi $V4, $V8, 4]} # v4 := {w43, w42, w41, w40, w39, w38, w37, w36} 166 167 @{[vsm3c_vi $V0, $V4, 18]} 168 @{[vslidedown_vi $V4, $V4, 2]} # v4 := {X, X, w43, w42, w41, w40, w39, w38} 169 @{[vsm3c_vi $V0, $V4, 19]} 170 171 @{[vsm3c_vi $V0, $V8, 20]} 172 @{[vslidedown_vi $V4, $V8, 2]} # v4 := {X, X, w47, w46, w45, w44, w43, w42} 173 @{[vsm3c_vi $V0, $V4, 21]} 174 175 @{[vsm3me_vv $V6, $V8, $V6]} # v6 := {w55, w54, w53, w52, w51, w50, w49, w48} 176 177 # Prepare a register with {w51, w50, w49, w48, w47, w46, w45, w44} 178 @{[vslidedown_vi $V4, $V4, 2]} # v4 := {X, X, X, X, w47, w46, w45, w44} 179 @{[vslideup_vi $V4, $V6, 4]} # v4 := {w51, w50, w49, w48, w47, w46, w45, w44} 180 181 @{[vsm3c_vi $V0, $V4, 22]} 182 @{[vslidedown_vi $V4, $V4, 2]} # v4 := {X, X, w51, w50, w49, w48, w47, w46} 183 @{[vsm3c_vi $V0, $V4, 23]} 184 185 @{[vsm3c_vi $V0, $V6, 24]} 186 @{[vslidedown_vi $V4, $V6, 2]} # v4 := {X, X, w55, w54, w53, w52, w51, w50} 187 @{[vsm3c_vi $V0, $V4, 25]} 188 189 @{[vsm3me_vv $V8, $V6, $V8]} # v8 := {w63, w62, w61, w60, w59, w58, w57, w56} 190 191 # Prepare a register with {w59, w58, w57, w56, w55, w54, w53, w52} 192 @{[vslidedown_vi $V4, $V4, 2]} # v4 := {X, X, X, X, w55, w54, w53, w52} 193 @{[vslideup_vi $V4, $V8, 4]} # v4 := {w59, w58, w57, w56, w55, w54, w53, w52} 194 195 @{[vsm3c_vi $V0, $V4, 26]} 196 @{[vslidedown_vi $V4, $V4, 2]} # v4 := {X, X, w59, w58, w57, w56, w55, w54} 197 @{[vsm3c_vi $V0, $V4, 27]} 198 199 @{[vsm3c_vi $V0, $V8, 28]} 200 @{[vslidedown_vi $V4, $V8, 2]} # v4 := {X, X, w63, w62, w61, w60, w59, w58} 201 @{[vsm3c_vi $V0, $V4, 29]} 202 203 @{[vsm3me_vv $V6, $V8, $V6]} # v6 := {w71, w70, w69, w68, w67, w66, w65, w64} 204 205 # Prepare a register with {w67, w66, w65, w64, w63, w62, w61, w60} 206 @{[vslidedown_vi $V4, $V4, 2]} # v4 := {X, X, X, X, w63, w62, w61, w60} 207 @{[vslideup_vi $V4, $V6, 4]} # v4 := {w67, w66, w65, w64, w63, w62, w61, w60} 208 209 @{[vsm3c_vi $V0, $V4, 30]} 210 @{[vslidedown_vi $V4, $V4, 2]} # v4 := {X, X, w67, w66, w65, w64, w63, w62} 211 @{[vsm3c_vi $V0, $V4, 31]} 212 213 # XOR in the previous state. 214 @{[vxor_vv $V0, $V0, $V2]} 215 216 bnez $NUM, L_sm3_loop # Check if there are any more block to process 217L_sm3_end: 218 @{[vrev8_v $V0, $V0]} 219 @{[vse32_v $V0, $CTX]} 220 ret 221 222.size ossl_hwsm3_block_data_order_zvksh,.-ossl_hwsm3_block_data_order_zvksh 223___ 224} 225 226print $code; 227 228close STDOUT or die "error closing STDOUT: $!"; 229