1#! /usr/bin/env perl 2# This file is dual-licensed, meaning that you can use it under your 3# choice of either of the following two licenses: 4# 5# Copyright 2022-2023 The OpenSSL Project Authors. All Rights Reserved. 6# 7# Licensed under the Apache License 2.0 (the "License"). You can obtain 8# a copy in the file LICENSE in the source distribution or at 9# https://www.openssl.org/source/license.html 10# 11# or 12# 13# Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu> 14# All rights reserved. 15# 16# Redistribution and use in source and binary forms, with or without 17# modification, are permitted provided that the following conditions 18# are met: 19# 1. Redistributions of source code must retain the above copyright 20# notice, this list of conditions and the following disclaimer. 21# 2. Redistributions in binary form must reproduce the above copyright 22# notice, this list of conditions and the following disclaimer in the 23# documentation and/or other materials provided with the distribution. 24# 25# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 37use strict; 38use warnings; 39 40use FindBin qw($Bin); 41use lib "$Bin"; 42use lib "$Bin/../../perlasm"; 43use riscv; 44 45# $output is the last argument if it looks like a file (it has an extension) 46# $flavour is the first argument if it doesn't look like a file 47my $output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 48my $flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; 49 50$output and open STDOUT,">$output"; 51 52my $code=<<___; 53.text 54___ 55 56################################################################################ 57# void gcm_init_rv64i_zbc(u128 Htable[16], const u64 H[2]); 58# void gcm_init_rv64i_zbc__zbb(u128 Htable[16], const u64 H[2]); 59# void gcm_init_rv64i_zbc__zbkb(u128 Htable[16], const u64 H[2]); 60# 61# input: H: 128-bit H - secret parameter E(K, 0^128) 62# output: Htable: Preprocessed key data for gcm_gmult_rv64i_zbc* and 63# gcm_ghash_rv64i_zbc* 64# 65# All callers of this function revert the byte-order unconditionally 66# on little-endian machines. So we need to revert the byte-order back. 67# Additionally we reverse the bits of each byte. 68 69{ 70my ($Htable,$H,$VAL0,$VAL1,$TMP0,$TMP1,$TMP2) = ("a0","a1","a2","a3","t0","t1","t2"); 71 72$code .= <<___; 73.p2align 3 74.globl gcm_init_rv64i_zbc 75.type gcm_init_rv64i_zbc,\@function 76gcm_init_rv64i_zbc: 77 ld $VAL0,0($H) 78 ld $VAL1,8($H) 79 @{[brev8_rv64i $VAL0, $TMP0, $TMP1, $TMP2]} 80 @{[brev8_rv64i $VAL1, $TMP0, $TMP1, $TMP2]} 81 @{[sd_rev8_rv64i $VAL0, $Htable, 0, $TMP0]} 82 @{[sd_rev8_rv64i $VAL1, $Htable, 8, $TMP0]} 83 ret 84.size gcm_init_rv64i_zbc,.-gcm_init_rv64i_zbc 85___ 86} 87 88{ 89my ($Htable,$H,$VAL0,$VAL1,$TMP0,$TMP1,$TMP2) = ("a0","a1","a2","a3","t0","t1","t2"); 90 91$code .= <<___; 92.p2align 3 93.globl gcm_init_rv64i_zbc__zbb 94.type gcm_init_rv64i_zbc__zbb,\@function 95gcm_init_rv64i_zbc__zbb: 96 ld $VAL0,0($H) 97 ld $VAL1,8($H) 98 @{[brev8_rv64i $VAL0, $TMP0, $TMP1, $TMP2]} 99 @{[brev8_rv64i $VAL1, $TMP0, $TMP1, $TMP2]} 100 @{[rev8 $VAL0, $VAL0]} 101 @{[rev8 $VAL1, $VAL1]} 102 sd $VAL0,0($Htable) 103 sd $VAL1,8($Htable) 104 ret 105.size gcm_init_rv64i_zbc__zbb,.-gcm_init_rv64i_zbc__zbb 106___ 107} 108 109{ 110my ($Htable,$H,$TMP0,$TMP1) = ("a0","a1","t0","t1"); 111 112$code .= <<___; 113.p2align 3 114.globl gcm_init_rv64i_zbc__zbkb 115.type gcm_init_rv64i_zbc__zbkb,\@function 116gcm_init_rv64i_zbc__zbkb: 117 ld $TMP0,0($H) 118 ld $TMP1,8($H) 119 @{[brev8 $TMP0, $TMP0]} 120 @{[brev8 $TMP1, $TMP1]} 121 @{[rev8 $TMP0, $TMP0]} 122 @{[rev8 $TMP1, $TMP1]} 123 sd $TMP0,0($Htable) 124 sd $TMP1,8($Htable) 125 ret 126.size gcm_init_rv64i_zbc__zbkb,.-gcm_init_rv64i_zbc__zbkb 127___ 128} 129 130################################################################################ 131# void gcm_gmult_rv64i_zbc(u64 Xi[2], const u128 Htable[16]); 132# void gcm_gmult_rv64i_zbc__zbkb(u64 Xi[2], const u128 Htable[16]); 133# 134# input: Xi: current hash value 135# Htable: copy of H 136# output: Xi: next hash value Xi 137# 138# Compute GMULT (Xi*H mod f) using the Zbc (clmul) and Zbb (basic bit manip) 139# extensions. Using the no-Karatsuba approach and clmul for the final reduction. 140# This results in an implementation with minimized number of instructions. 141# HW with clmul latencies higher than 2 cycles might observe a performance 142# improvement with Karatsuba. HW with clmul latencies higher than 6 cycles 143# might observe a performance improvement with additionally converting the 144# reduction to shift&xor. For a full discussion of this estimates see 145# https://github.com/riscv/riscv-crypto/blob/master/doc/supp/gcm-mode-cmul.adoc 146{ 147my ($Xi,$Htable,$x0,$x1,$y0,$y1) = ("a0","a1","a4","a5","a6","a7"); 148my ($z0,$z1,$z2,$z3,$t0,$t1,$polymod) = ("t0","t1","t2","t3","t4","t5","t6"); 149 150$code .= <<___; 151.p2align 3 152.globl gcm_gmult_rv64i_zbc 153.type gcm_gmult_rv64i_zbc,\@function 154gcm_gmult_rv64i_zbc: 155 # Load Xi and bit-reverse it 156 ld $x0, 0($Xi) 157 ld $x1, 8($Xi) 158 @{[brev8_rv64i $x0, $z0, $z1, $z2]} 159 @{[brev8_rv64i $x1, $z0, $z1, $z2]} 160 161 # Load the key (already bit-reversed) 162 ld $y0, 0($Htable) 163 ld $y1, 8($Htable) 164 165 # Load the reduction constant 166 la $polymod, Lpolymod 167 lbu $polymod, 0($polymod) 168 169 # Multiplication (without Karatsuba) 170 @{[clmulh $z3, $x1, $y1]} 171 @{[clmul $z2, $x1, $y1]} 172 @{[clmulh $t1, $x0, $y1]} 173 @{[clmul $z1, $x0, $y1]} 174 xor $z2, $z2, $t1 175 @{[clmulh $t1, $x1, $y0]} 176 @{[clmul $t0, $x1, $y0]} 177 xor $z2, $z2, $t1 178 xor $z1, $z1, $t0 179 @{[clmulh $t1, $x0, $y0]} 180 @{[clmul $z0, $x0, $y0]} 181 xor $z1, $z1, $t1 182 183 # Reduction with clmul 184 @{[clmulh $t1, $z3, $polymod]} 185 @{[clmul $t0, $z3, $polymod]} 186 xor $z2, $z2, $t1 187 xor $z1, $z1, $t0 188 @{[clmulh $t1, $z2, $polymod]} 189 @{[clmul $t0, $z2, $polymod]} 190 xor $x1, $z1, $t1 191 xor $x0, $z0, $t0 192 193 # Bit-reverse Xi back and store it 194 @{[brev8_rv64i $x0, $z0, $z1, $z2]} 195 @{[brev8_rv64i $x1, $z0, $z1, $z2]} 196 sd $x0, 0($Xi) 197 sd $x1, 8($Xi) 198 ret 199.size gcm_gmult_rv64i_zbc,.-gcm_gmult_rv64i_zbc 200___ 201} 202 203{ 204my ($Xi,$Htable,$x0,$x1,$y0,$y1) = ("a0","a1","a4","a5","a6","a7"); 205my ($z0,$z1,$z2,$z3,$t0,$t1,$polymod) = ("t0","t1","t2","t3","t4","t5","t6"); 206 207$code .= <<___; 208.p2align 3 209.globl gcm_gmult_rv64i_zbc__zbkb 210.type gcm_gmult_rv64i_zbc__zbkb,\@function 211gcm_gmult_rv64i_zbc__zbkb: 212 # Load Xi and bit-reverse it 213 ld $x0, 0($Xi) 214 ld $x1, 8($Xi) 215 @{[brev8 $x0, $x0]} 216 @{[brev8 $x1, $x1]} 217 218 # Load the key (already bit-reversed) 219 ld $y0, 0($Htable) 220 ld $y1, 8($Htable) 221 222 # Load the reduction constant 223 la $polymod, Lpolymod 224 lbu $polymod, 0($polymod) 225 226 # Multiplication (without Karatsuba) 227 @{[clmulh $z3, $x1, $y1]} 228 @{[clmul $z2, $x1, $y1]} 229 @{[clmulh $t1, $x0, $y1]} 230 @{[clmul $z1, $x0, $y1]} 231 xor $z2, $z2, $t1 232 @{[clmulh $t1, $x1, $y0]} 233 @{[clmul $t0, $x1, $y0]} 234 xor $z2, $z2, $t1 235 xor $z1, $z1, $t0 236 @{[clmulh $t1, $x0, $y0]} 237 @{[clmul $z0, $x0, $y0]} 238 xor $z1, $z1, $t1 239 240 # Reduction with clmul 241 @{[clmulh $t1, $z3, $polymod]} 242 @{[clmul $t0, $z3, $polymod]} 243 xor $z2, $z2, $t1 244 xor $z1, $z1, $t0 245 @{[clmulh $t1, $z2, $polymod]} 246 @{[clmul $t0, $z2, $polymod]} 247 xor $x1, $z1, $t1 248 xor $x0, $z0, $t0 249 250 # Bit-reverse Xi back and store it 251 @{[brev8 $x0, $x0]} 252 @{[brev8 $x1, $x1]} 253 sd $x0, 0($Xi) 254 sd $x1, 8($Xi) 255 ret 256.size gcm_gmult_rv64i_zbc__zbkb,.-gcm_gmult_rv64i_zbc__zbkb 257___ 258} 259 260################################################################################ 261# void gcm_ghash_rv64i_zbc(u64 Xi[2], const u128 Htable[16], 262# const u8 *inp, size_t len); 263# void gcm_ghash_rv64i_zbc__zbkb(u64 Xi[2], const u128 Htable[16], 264# const u8 *inp, size_t len); 265# 266# input: Xi: current hash value 267# Htable: copy of H 268# inp: pointer to input data 269# len: length of input data in bytes (multiple of block size) 270# output: Xi: Xi+1 (next hash value Xi) 271{ 272my ($Xi,$Htable,$inp,$len,$x0,$x1,$y0,$y1) = ("a0","a1","a2","a3","a4","a5","a6","a7"); 273my ($z0,$z1,$z2,$z3,$t0,$t1,$polymod) = ("t0","t1","t2","t3","t4","t5","t6"); 274 275$code .= <<___; 276.p2align 3 277.globl gcm_ghash_rv64i_zbc 278.type gcm_ghash_rv64i_zbc,\@function 279gcm_ghash_rv64i_zbc: 280 # Load Xi and bit-reverse it 281 ld $x0, 0($Xi) 282 ld $x1, 8($Xi) 283 @{[brev8_rv64i $x0, $z0, $z1, $z2]} 284 @{[brev8_rv64i $x1, $z0, $z1, $z2]} 285 286 # Load the key (already bit-reversed) 287 ld $y0, 0($Htable) 288 ld $y1, 8($Htable) 289 290 # Load the reduction constant 291 la $polymod, Lpolymod 292 lbu $polymod, 0($polymod) 293 294Lstep: 295 # Load the input data, bit-reverse them, and XOR them with Xi 296 ld $t0, 0($inp) 297 ld $t1, 8($inp) 298 add $inp, $inp, 16 299 add $len, $len, -16 300 @{[brev8_rv64i $t0, $z0, $z1, $z2]} 301 @{[brev8_rv64i $t1, $z0, $z1, $z2]} 302 xor $x0, $x0, $t0 303 xor $x1, $x1, $t1 304 305 # Multiplication (without Karatsuba) 306 @{[clmulh $z3, $x1, $y1]} 307 @{[clmul $z2, $x1, $y1]} 308 @{[clmulh $t1, $x0, $y1]} 309 @{[clmul $z1, $x0, $y1]} 310 xor $z2, $z2, $t1 311 @{[clmulh $t1, $x1, $y0]} 312 @{[clmul $t0, $x1, $y0]} 313 xor $z2, $z2, $t1 314 xor $z1, $z1, $t0 315 @{[clmulh $t1, $x0, $y0]} 316 @{[clmul $z0, $x0, $y0]} 317 xor $z1, $z1, $t1 318 319 # Reduction with clmul 320 @{[clmulh $t1, $z3, $polymod]} 321 @{[clmul $t0, $z3, $polymod]} 322 xor $z2, $z2, $t1 323 xor $z1, $z1, $t0 324 @{[clmulh $t1, $z2, $polymod]} 325 @{[clmul $t0, $z2, $polymod]} 326 xor $x1, $z1, $t1 327 xor $x0, $z0, $t0 328 329 # Iterate over all blocks 330 bnez $len, Lstep 331 332 # Bit-reverse final Xi back and store it 333 @{[brev8_rv64i $x0, $z0, $z1, $z2]} 334 @{[brev8_rv64i $x1, $z0, $z1, $z2]} 335 sd $x0, 0($Xi) 336 sd $x1, 8($Xi) 337 ret 338.size gcm_ghash_rv64i_zbc,.-gcm_ghash_rv64i_zbc 339___ 340} 341 342{ 343my ($Xi,$Htable,$inp,$len,$x0,$x1,$y0,$y1) = ("a0","a1","a2","a3","a4","a5","a6","a7"); 344my ($z0,$z1,$z2,$z3,$t0,$t1,$polymod) = ("t0","t1","t2","t3","t4","t5","t6"); 345 346$code .= <<___; 347.p2align 3 348.globl gcm_ghash_rv64i_zbc__zbkb 349.type gcm_ghash_rv64i_zbc__zbkb,\@function 350gcm_ghash_rv64i_zbc__zbkb: 351 # Load Xi and bit-reverse it 352 ld $x0, 0($Xi) 353 ld $x1, 8($Xi) 354 @{[brev8 $x0, $x0]} 355 @{[brev8 $x1, $x1]} 356 357 # Load the key (already bit-reversed) 358 ld $y0, 0($Htable) 359 ld $y1, 8($Htable) 360 361 # Load the reduction constant 362 la $polymod, Lpolymod 363 lbu $polymod, 0($polymod) 364 365Lstep_zkbk: 366 # Load the input data, bit-reverse them, and XOR them with Xi 367 ld $t0, 0($inp) 368 ld $t1, 8($inp) 369 add $inp, $inp, 16 370 add $len, $len, -16 371 @{[brev8 $t0, $t0]} 372 @{[brev8 $t1, $t1]} 373 xor $x0, $x0, $t0 374 xor $x1, $x1, $t1 375 376 # Multiplication (without Karatsuba) 377 @{[clmulh $z3, $x1, $y1]} 378 @{[clmul $z2, $x1, $y1]} 379 @{[clmulh $t1, $x0, $y1]} 380 @{[clmul $z1, $x0, $y1]} 381 xor $z2, $z2, $t1 382 @{[clmulh $t1, $x1, $y0]} 383 @{[clmul $t0, $x1, $y0]} 384 xor $z2, $z2, $t1 385 xor $z1, $z1, $t0 386 @{[clmulh $t1, $x0, $y0]} 387 @{[clmul $z0, $x0, $y0]} 388 xor $z1, $z1, $t1 389 390 # Reduction with clmul 391 @{[clmulh $t1, $z3, $polymod]} 392 @{[clmul $t0, $z3, $polymod]} 393 xor $z2, $z2, $t1 394 xor $z1, $z1, $t0 395 @{[clmulh $t1, $z2, $polymod]} 396 @{[clmul $t0, $z2, $polymod]} 397 xor $x1, $z1, $t1 398 xor $x0, $z0, $t0 399 400 # Iterate over all blocks 401 bnez $len, Lstep_zkbk 402 403 # Bit-reverse final Xi back and store it 404 @{[brev8 $x0, $x0]} 405 @{[brev8 $x1, $x1]} 406 sd $x0, 0($Xi) 407 sd $x1, 8($Xi) 408 ret 409.size gcm_ghash_rv64i_zbc__zbkb,.-gcm_ghash_rv64i_zbc__zbkb 410___ 411} 412 413$code .= <<___; 414.p2align 3 415Lbrev8_const: 416 .dword 0xAAAAAAAAAAAAAAAA 417 .dword 0xCCCCCCCCCCCCCCCC 418 .dword 0xF0F0F0F0F0F0F0F0 419.size Lbrev8_const,.-Lbrev8_const 420 421Lpolymod: 422 .byte 0x87 423.size Lpolymod,.-Lpolymod 424___ 425 426print $code; 427 428close STDOUT or die "error closing STDOUT: $!"; 429