1#! /usr/bin/env perl 2# This file is dual-licensed, meaning that you can use it under your 3# choice of either of the following two licenses: 4# 5# Copyright 2023 The OpenSSL Project Authors. All Rights Reserved. 6# 7# Licensed under the Apache License 2.0 (the "License"). You can obtain 8# a copy in the file LICENSE in the source distribution or at 9# https://www.openssl.org/source/license.html 10# 11# or 12# 13# Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu> 14# Copyright (c) 2023, Phoebe Chen <phoebe.chen@sifive.com> 15# All rights reserved. 16# 17# Redistribution and use in source and binary forms, with or without 18# modification, are permitted provided that the following conditions 19# are met: 20# 1. Redistributions of source code must retain the above copyright 21# notice, this list of conditions and the following disclaimer. 22# 2. Redistributions in binary form must reproduce the above copyright 23# notice, this list of conditions and the following disclaimer in the 24# documentation and/or other materials provided with the distribution. 25# 26# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 27# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 28# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 29# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 30# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 31# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 32# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 33# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 34# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 35# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 36# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 37 38# The generated code of this file depends on the following RISC-V extensions: 39# - RV64I 40# - RISC-V Vector ('V') with VLEN >= 128 41# - RISC-V Vector Cryptography Bit-manipulation extension ('Zvkb') 42# - RISC-V Vector SHA-2 Secure Hash extension ('Zvknha' or 'Zvknhb') 43 44use strict; 45use warnings; 46 47use FindBin qw($Bin); 48use lib "$Bin"; 49use lib "$Bin/../../perlasm"; 50use riscv; 51 52# $output is the last argument if it looks like a file (it has an extension) 53# $flavour is the first argument if it doesn't look like a file 54my $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 55my $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; 56 57$output and open STDOUT,">$output"; 58 59my $code=<<___; 60.text 61___ 62 63my ($V0, $V1, $V2, $V3, $V4, $V5, $V6, $V7, 64 $V8, $V9, $V10, $V11, $V12, $V13, $V14, $V15, 65 $V16, $V17, $V18, $V19, $V20, $V21, $V22, $V23, 66 $V24, $V25, $V26, $V27, $V28, $V29, $V30, $V31, 67) = map("v$_",(0..31)); 68 69my $K256 = "K256"; 70 71# Function arguments 72my ($H, $INP, $LEN, $KT, $H2, $INDEX_PATTERN) = ("a0", "a1", "a2", "a3", "t3", "t4"); 73 74sub sha_256_load_constant { 75 my $code=<<___; 76 la $KT, $K256 # Load round constants K256 77 @{[vle32_v $V10, $KT]} 78 addi $KT, $KT, 16 79 @{[vle32_v $V11, $KT]} 80 addi $KT, $KT, 16 81 @{[vle32_v $V12, $KT]} 82 addi $KT, $KT, 16 83 @{[vle32_v $V13, $KT]} 84 addi $KT, $KT, 16 85 @{[vle32_v $V14, $KT]} 86 addi $KT, $KT, 16 87 @{[vle32_v $V15, $KT]} 88 addi $KT, $KT, 16 89 @{[vle32_v $V16, $KT]} 90 addi $KT, $KT, 16 91 @{[vle32_v $V17, $KT]} 92 addi $KT, $KT, 16 93 @{[vle32_v $V18, $KT]} 94 addi $KT, $KT, 16 95 @{[vle32_v $V19, $KT]} 96 addi $KT, $KT, 16 97 @{[vle32_v $V20, $KT]} 98 addi $KT, $KT, 16 99 @{[vle32_v $V21, $KT]} 100 addi $KT, $KT, 16 101 @{[vle32_v $V22, $KT]} 102 addi $KT, $KT, 16 103 @{[vle32_v $V23, $KT]} 104 addi $KT, $KT, 16 105 @{[vle32_v $V24, $KT]} 106 addi $KT, $KT, 16 107 @{[vle32_v $V25, $KT]} 108___ 109 110 return $code; 111} 112 113################################################################################ 114# void sha256_block_data_order_zvkb_zvknha_or_zvknhb(void *c, const void *p, size_t len) 115$code .= <<___; 116.p2align 2 117.globl sha256_block_data_order_zvkb_zvknha_or_zvknhb 118.type sha256_block_data_order_zvkb_zvknha_or_zvknhb,\@function 119sha256_block_data_order_zvkb_zvknha_or_zvknhb: 120 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} 121 122 @{[sha_256_load_constant]} 123 124 # H is stored as {a,b,c,d},{e,f,g,h}, but we need {f,e,b,a},{h,g,d,c} 125 # The dst vtype is e32m1 and the index vtype is e8mf4. 126 # We use index-load with the following index pattern at v26. 127 # i8 index: 128 # 20, 16, 4, 0 129 # Instead of setting the i8 index, we could use a single 32bit 130 # little-endian value to cover the 4xi8 index. 131 # i32 value: 132 # 0x 00 04 10 14 133 li $INDEX_PATTERN, 0x00041014 134 @{[vsetivli "zero", 1, "e32", "m1", "ta", "ma"]} 135 @{[vmv_v_x $V26, $INDEX_PATTERN]} 136 137 addi $H2, $H, 8 138 139 # Use index-load to get {f,e,b,a},{h,g,d,c} 140 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} 141 @{[vluxei8_v $V6, $H, $V26]} 142 @{[vluxei8_v $V7, $H2, $V26]} 143 144 # Setup v0 mask for the vmerge to replace the first word (idx==0) in key-scheduling. 145 # The AVL is 4 in SHA, so we could use a single e8(8 element masking) for masking. 146 @{[vsetivli "zero", 1, "e8", "m1", "ta", "ma"]} 147 @{[vmv_v_i $V0, 0x01]} 148 149 @{[vsetivli "zero", 4, "e32", "m1", "ta", "ma"]} 150 151L_round_loop: 152 # Decrement length by 1 153 add $LEN, $LEN, -1 154 155 # Keep the current state as we need it later: H' = H+{a',b',c',...,h'}. 156 @{[vmv_v_v $V30, $V6]} 157 @{[vmv_v_v $V31, $V7]} 158 159 # Load the 512-bits of the message block in v1-v4 and perform 160 # an endian swap on each 4 bytes element. 161 @{[vle32_v $V1, $INP]} 162 @{[vrev8_v $V1, $V1]} 163 add $INP, $INP, 16 164 @{[vle32_v $V2, $INP]} 165 @{[vrev8_v $V2, $V2]} 166 add $INP, $INP, 16 167 @{[vle32_v $V3, $INP]} 168 @{[vrev8_v $V3, $V3]} 169 add $INP, $INP, 16 170 @{[vle32_v $V4, $INP]} 171 @{[vrev8_v $V4, $V4]} 172 add $INP, $INP, 16 173 174 # Quad-round 0 (+0, Wt from oldest to newest in v1->v2->v3->v4) 175 @{[vadd_vv $V5, $V10, $V1]} 176 @{[vsha2cl_vv $V7, $V6, $V5]} 177 @{[vsha2ch_vv $V6, $V7, $V5]} 178 @{[vmerge_vvm $V5, $V3, $V2, $V0]} 179 @{[vsha2ms_vv $V1, $V5, $V4]} # Generate W[19:16] 180 181 # Quad-round 1 (+1, v2->v3->v4->v1) 182 @{[vadd_vv $V5, $V11, $V2]} 183 @{[vsha2cl_vv $V7, $V6, $V5]} 184 @{[vsha2ch_vv $V6, $V7, $V5]} 185 @{[vmerge_vvm $V5, $V4, $V3, $V0]} 186 @{[vsha2ms_vv $V2, $V5, $V1]} # Generate W[23:20] 187 188 # Quad-round 2 (+2, v3->v4->v1->v2) 189 @{[vadd_vv $V5, $V12, $V3]} 190 @{[vsha2cl_vv $V7, $V6, $V5]} 191 @{[vsha2ch_vv $V6, $V7, $V5]} 192 @{[vmerge_vvm $V5, $V1, $V4, $V0]} 193 @{[vsha2ms_vv $V3, $V5, $V2]} # Generate W[27:24] 194 195 # Quad-round 3 (+3, v4->v1->v2->v3) 196 @{[vadd_vv $V5, $V13, $V4]} 197 @{[vsha2cl_vv $V7, $V6, $V5]} 198 @{[vsha2ch_vv $V6, $V7, $V5]} 199 @{[vmerge_vvm $V5, $V2, $V1, $V0]} 200 @{[vsha2ms_vv $V4, $V5, $V3]} # Generate W[31:28] 201 202 # Quad-round 4 (+0, v1->v2->v3->v4) 203 @{[vadd_vv $V5, $V14, $V1]} 204 @{[vsha2cl_vv $V7, $V6, $V5]} 205 @{[vsha2ch_vv $V6, $V7, $V5]} 206 @{[vmerge_vvm $V5, $V3, $V2, $V0]} 207 @{[vsha2ms_vv $V1, $V5, $V4]} # Generate W[35:32] 208 209 # Quad-round 5 (+1, v2->v3->v4->v1) 210 @{[vadd_vv $V5, $V15, $V2]} 211 @{[vsha2cl_vv $V7, $V6, $V5]} 212 @{[vsha2ch_vv $V6, $V7, $V5]} 213 @{[vmerge_vvm $V5, $V4, $V3, $V0]} 214 @{[vsha2ms_vv $V2, $V5, $V1]} # Generate W[39:36] 215 216 # Quad-round 6 (+2, v3->v4->v1->v2) 217 @{[vadd_vv $V5, $V16, $V3]} 218 @{[vsha2cl_vv $V7, $V6, $V5]} 219 @{[vsha2ch_vv $V6, $V7, $V5]} 220 @{[vmerge_vvm $V5, $V1, $V4, $V0]} 221 @{[vsha2ms_vv $V3, $V5, $V2]} # Generate W[43:40] 222 223 # Quad-round 7 (+3, v4->v1->v2->v3) 224 @{[vadd_vv $V5, $V17, $V4]} 225 @{[vsha2cl_vv $V7, $V6, $V5]} 226 @{[vsha2ch_vv $V6, $V7, $V5]} 227 @{[vmerge_vvm $V5, $V2, $V1, $V0]} 228 @{[vsha2ms_vv $V4, $V5, $V3]} # Generate W[47:44] 229 230 # Quad-round 8 (+0, v1->v2->v3->v4) 231 @{[vadd_vv $V5, $V18, $V1]} 232 @{[vsha2cl_vv $V7, $V6, $V5]} 233 @{[vsha2ch_vv $V6, $V7, $V5]} 234 @{[vmerge_vvm $V5, $V3, $V2, $V0]} 235 @{[vsha2ms_vv $V1, $V5, $V4]} # Generate W[51:48] 236 237 # Quad-round 9 (+1, v2->v3->v4->v1) 238 @{[vadd_vv $V5, $V19, $V2]} 239 @{[vsha2cl_vv $V7, $V6, $V5]} 240 @{[vsha2ch_vv $V6, $V7, $V5]} 241 @{[vmerge_vvm $V5, $V4, $V3, $V0]} 242 @{[vsha2ms_vv $V2, $V5, $V1]} # Generate W[55:52] 243 244 # Quad-round 10 (+2, v3->v4->v1->v2) 245 @{[vadd_vv $V5, $V20, $V3]} 246 @{[vsha2cl_vv $V7, $V6, $V5]} 247 @{[vsha2ch_vv $V6, $V7, $V5]} 248 @{[vmerge_vvm $V5, $V1, $V4, $V0]} 249 @{[vsha2ms_vv $V3, $V5, $V2]} # Generate W[59:56] 250 251 # Quad-round 11 (+3, v4->v1->v2->v3) 252 @{[vadd_vv $V5, $V21, $V4]} 253 @{[vsha2cl_vv $V7, $V6, $V5]} 254 @{[vsha2ch_vv $V6, $V7, $V5]} 255 @{[vmerge_vvm $V5, $V2, $V1, $V0]} 256 @{[vsha2ms_vv $V4, $V5, $V3]} # Generate W[63:60] 257 258 # Quad-round 12 (+0, v1->v2->v3->v4) 259 # Note that we stop generating new message schedule words (Wt, v1-13) 260 # as we already generated all the words we end up consuming (i.e., W[63:60]). 261 @{[vadd_vv $V5, $V22, $V1]} 262 @{[vsha2cl_vv $V7, $V6, $V5]} 263 @{[vsha2ch_vv $V6, $V7, $V5]} 264 265 # Quad-round 13 (+1, v2->v3->v4->v1) 266 @{[vadd_vv $V5, $V23, $V2]} 267 @{[vsha2cl_vv $V7, $V6, $V5]} 268 @{[vsha2ch_vv $V6, $V7, $V5]} 269 270 # Quad-round 14 (+2, v3->v4->v1->v2) 271 @{[vadd_vv $V5, $V24, $V3]} 272 @{[vsha2cl_vv $V7, $V6, $V5]} 273 @{[vsha2ch_vv $V6, $V7, $V5]} 274 275 # Quad-round 15 (+3, v4->v1->v2->v3) 276 @{[vadd_vv $V5, $V25, $V4]} 277 @{[vsha2cl_vv $V7, $V6, $V5]} 278 @{[vsha2ch_vv $V6, $V7, $V5]} 279 280 # H' = H+{a',b',c',...,h'} 281 @{[vadd_vv $V6, $V30, $V6]} 282 @{[vadd_vv $V7, $V31, $V7]} 283 bnez $LEN, L_round_loop 284 285 # Store {f,e,b,a},{h,g,d,c} back to {a,b,c,d},{e,f,g,h}. 286 @{[vsuxei8_v $V6, $H, $V26]} 287 @{[vsuxei8_v $V7, $H2, $V26]} 288 289 ret 290.size sha256_block_data_order_zvkb_zvknha_or_zvknhb,.-sha256_block_data_order_zvkb_zvknha_or_zvknhb 291 292.p2align 2 293.type $K256,\@object 294$K256: 295 .word 0x428a2f98, 0x71374491, 0xb5c0fbcf, 0xe9b5dba5 296 .word 0x3956c25b, 0x59f111f1, 0x923f82a4, 0xab1c5ed5 297 .word 0xd807aa98, 0x12835b01, 0x243185be, 0x550c7dc3 298 .word 0x72be5d74, 0x80deb1fe, 0x9bdc06a7, 0xc19bf174 299 .word 0xe49b69c1, 0xefbe4786, 0x0fc19dc6, 0x240ca1cc 300 .word 0x2de92c6f, 0x4a7484aa, 0x5cb0a9dc, 0x76f988da 301 .word 0x983e5152, 0xa831c66d, 0xb00327c8, 0xbf597fc7 302 .word 0xc6e00bf3, 0xd5a79147, 0x06ca6351, 0x14292967 303 .word 0x27b70a85, 0x2e1b2138, 0x4d2c6dfc, 0x53380d13 304 .word 0x650a7354, 0x766a0abb, 0x81c2c92e, 0x92722c85 305 .word 0xa2bfe8a1, 0xa81a664b, 0xc24b8b70, 0xc76c51a3 306 .word 0xd192e819, 0xd6990624, 0xf40e3585, 0x106aa070 307 .word 0x19a4c116, 0x1e376c08, 0x2748774c, 0x34b0bcb5 308 .word 0x391c0cb3, 0x4ed8aa4a, 0x5b9cca4f, 0x682e6ff3 309 .word 0x748f82ee, 0x78a5636f, 0x84c87814, 0x8cc70208 310 .word 0x90befffa, 0xa4506ceb, 0xbef9a3f7, 0xc67178f2 311.size $K256,.-$K256 312___ 313 314print $code; 315 316close STDOUT or die "error closing STDOUT: $!"; 317