1#! /usr/bin/env perl 2# This file is dual-licensed, meaning that you can use it under your 3# choice of either of the following two licenses: 4# 5# Copyright 2022-2023 The OpenSSL Project Authors. All Rights Reserved. 6# 7# Licensed under the Apache License 2.0 (the "License"). You can obtain 8# a copy in the file LICENSE in the source distribution or at 9# https://www.openssl.org/source/license.html 10# 11# or 12# 13# Copyright (c) 2022, Hongren (Zenithal) Zheng <i@zenithal.me> 14# All rights reserved. 15# 16# Redistribution and use in source and binary forms, with or without 17# modification, are permitted provided that the following conditions 18# are met: 19# 1. Redistributions of source code must retain the above copyright 20# notice, this list of conditions and the following disclaimer. 21# 2. Redistributions in binary form must reproduce the above copyright 22# notice, this list of conditions and the following disclaimer in the 23# documentation and/or other materials provided with the distribution. 24# 25# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 26# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 27# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 28# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 29# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 30# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 31# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 32# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 33# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 34# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 35# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 36 37# $output is the last argument if it looks like a file (it has an extension) 38# $flavour is the first argument if it doesn't look like a file 39$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 40$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; 41 42$output and open STDOUT,">$output"; 43 44################################################################################ 45# Utility functions to help with keeping track of which registers to stack/ 46# unstack when entering / exiting routines. 47################################################################################ 48{ 49 # Callee-saved registers 50 my @callee_saved = map("x$_",(2,8,9,18..27)); 51 # Caller-saved registers 52 my @caller_saved = map("x$_",(1,5..7,10..17,28..31)); 53 my @must_save; 54 sub use_reg { 55 my $reg = shift; 56 if (grep(/^$reg$/, @callee_saved)) { 57 push(@must_save, $reg); 58 } elsif (!grep(/^$reg$/, @caller_saved)) { 59 # Register is not usable! 60 die("Unusable register ".$reg); 61 } 62 return $reg; 63 } 64 sub use_regs { 65 return map(use_reg("x$_"), @_); 66 } 67 sub save_regs { 68 my $ret = ''; 69 my $stack_reservation = ($#must_save + 1) * 8; 70 my $stack_offset = $stack_reservation; 71 if ($stack_reservation % 16) { 72 $stack_reservation += 8; 73 } 74 $ret.=" addi sp,sp,-$stack_reservation\n"; 75 foreach (@must_save) { 76 $stack_offset -= 8; 77 $ret.=" sw $_,$stack_offset(sp)\n"; 78 } 79 return $ret; 80 } 81 sub load_regs { 82 my $ret = ''; 83 my $stack_reservation = ($#must_save + 1) * 8; 84 my $stack_offset = $stack_reservation; 85 if ($stack_reservation % 16) { 86 $stack_reservation += 8; 87 } 88 foreach (@must_save) { 89 $stack_offset -= 8; 90 $ret.=" lw $_,$stack_offset(sp)\n"; 91 } 92 $ret.=" addi sp,sp,$stack_reservation\n"; 93 return $ret; 94 } 95 sub clear_regs { 96 @must_save = (); 97 } 98} 99 100################################################################################ 101# util for encoding scalar crypto extension instructions 102################################################################################ 103 104my @regs = map("x$_",(0..31)); 105my %reglookup; 106@reglookup{@regs} = @regs; 107 108# Takes a register name, possibly an alias, and converts it to a register index 109# from 0 to 31 110sub read_reg { 111 my $reg = lc shift; 112 if (!exists($reglookup{$reg})) { 113 die("Unknown register ".$reg); 114 } 115 my $regstr = $reglookup{$reg}; 116 if (!($regstr =~ /^x([0-9]+)$/)) { 117 die("Could not process register ".$reg); 118 } 119 return $1; 120} 121 122sub aes32dsi { 123 # Encoding for aes32dsi rd, rs1, rs2, bs instruction on RV32 124 # bs_XXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX 125 my $template = 0b00_10101_00000_00000_000_00000_0110011; 126 my $rd = read_reg shift; 127 my $rs1 = read_reg shift; 128 my $rs2 = read_reg shift; 129 my $bs = shift; 130 131 return ".word ".($template | ($bs << 30) | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7)); 132} 133 134sub aes32dsmi { 135 # Encoding for aes32dsmi rd, rs1, rs2, bs instruction on RV32 136 # bs_XXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX 137 my $template = 0b00_10111_00000_00000_000_00000_0110011; 138 my $rd = read_reg shift; 139 my $rs1 = read_reg shift; 140 my $rs2 = read_reg shift; 141 my $bs = shift; 142 143 return ".word ".($template | ($bs << 30) | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7)); 144} 145 146sub aes32esi { 147 # Encoding for aes32esi rd, rs1, rs2, bs instruction on RV32 148 # bs_XXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX 149 my $template = 0b00_10001_00000_00000_000_00000_0110011; 150 my $rd = read_reg shift; 151 my $rs1 = read_reg shift; 152 my $rs2 = read_reg shift; 153 my $bs = shift; 154 155 return ".word ".($template | ($bs << 30) | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7)); 156} 157 158sub aes32esmi { 159 # Encoding for aes32esmi rd, rs1, rs2, bs instruction on RV32 160 # bs_XXXXX_ rs2 _ rs1 _XXX_ rd _XXXXXXX 161 my $template = 0b00_10011_00000_00000_000_00000_0110011; 162 my $rd = read_reg shift; 163 my $rs1 = read_reg shift; 164 my $rs2 = read_reg shift; 165 my $bs = shift; 166 167 return ".word ".($template | ($bs << 30) | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7)); 168} 169 170sub rori { 171 # Encoding for ror rd, rs1, imm instruction on RV64 172 # XXXXXXX_shamt_ rs1 _XXX_ rd _XXXXXXX 173 my $template = 0b0110000_00000_00000_101_00000_0010011; 174 my $rd = read_reg shift; 175 my $rs1 = read_reg shift; 176 my $shamt = shift; 177 178 return ".word ".($template | ($shamt << 20) | ($rs1 << 15) | ($rd << 7)); 179} 180 181################################################################################ 182# Register assignment for rv32i_zkne_encrypt and rv32i_zknd_decrypt 183################################################################################ 184 185# Registers initially to hold AES state (called s0-s3 or y0-y3 elsewhere) 186my ($Q0,$Q1,$Q2,$Q3) = use_regs(6..9); 187 188# Function arguments (x10-x12 are a0-a2 in the ABI) 189# Input block pointer, output block pointer, key pointer 190my ($INP,$OUTP,$KEYP) = use_regs(10..12); 191 192# Registers initially to hold Key 193my ($T0,$T1,$T2,$T3) = use_regs(13..16); 194 195# Loop counter 196my ($loopcntr) = use_regs(30); 197 198################################################################################ 199# Utility for rv32i_zkne_encrypt and rv32i_zknd_decrypt 200################################################################################ 201 202# outer product of whole state into one column of key 203sub outer { 204 my $inst = shift; 205 my $key = shift; 206 # state 0 to 3 207 my $s0 = shift; 208 my $s1 = shift; 209 my $s2 = shift; 210 my $s3 = shift; 211 my $ret = ''; 212$ret .= <<___; 213 @{[$inst->($key,$key,$s0,0)]} 214 @{[$inst->($key,$key,$s1,1)]} 215 @{[$inst->($key,$key,$s2,2)]} 216 @{[$inst->($key,$key,$s3,3)]} 217___ 218 return $ret; 219} 220 221sub aes32esmi4 { 222 return outer(\&aes32esmi, @_) 223} 224 225sub aes32esi4 { 226 return outer(\&aes32esi, @_) 227} 228 229sub aes32dsmi4 { 230 return outer(\&aes32dsmi, @_) 231} 232 233sub aes32dsi4 { 234 return outer(\&aes32dsi, @_) 235} 236 237################################################################################ 238# void rv32i_zkne_encrypt(const unsigned char *in, unsigned char *out, 239# const AES_KEY *key); 240################################################################################ 241my $code .= <<___; 242.text 243.balign 16 244.globl rv32i_zkne_encrypt 245.type rv32i_zkne_encrypt,\@function 246rv32i_zkne_encrypt: 247___ 248 249$code .= save_regs(); 250 251$code .= <<___; 252 # Load input to block cipher 253 lw $Q0,0($INP) 254 lw $Q1,4($INP) 255 lw $Q2,8($INP) 256 lw $Q3,12($INP) 257 258 # Load key 259 lw $T0,0($KEYP) 260 lw $T1,4($KEYP) 261 lw $T2,8($KEYP) 262 lw $T3,12($KEYP) 263 264 # Load number of rounds 265 lw $loopcntr,240($KEYP) 266 267 # initial transformation 268 xor $Q0,$Q0,$T0 269 xor $Q1,$Q1,$T1 270 xor $Q2,$Q2,$T2 271 xor $Q3,$Q3,$T3 272 273 # The main loop only executes the first N-2 rounds, each loop consumes two rounds 274 add $loopcntr,$loopcntr,-2 275 srli $loopcntr,$loopcntr,1 2761: 277 # Grab next key in schedule 278 add $KEYP,$KEYP,16 279 lw $T0,0($KEYP) 280 lw $T1,4($KEYP) 281 lw $T2,8($KEYP) 282 lw $T3,12($KEYP) 283 284 @{[aes32esmi4 $T0,$Q0,$Q1,$Q2,$Q3]} 285 @{[aes32esmi4 $T1,$Q1,$Q2,$Q3,$Q0]} 286 @{[aes32esmi4 $T2,$Q2,$Q3,$Q0,$Q1]} 287 @{[aes32esmi4 $T3,$Q3,$Q0,$Q1,$Q2]} 288 # now T0~T3 hold the new state 289 290 # Grab next key in schedule 291 add $KEYP,$KEYP,16 292 lw $Q0,0($KEYP) 293 lw $Q1,4($KEYP) 294 lw $Q2,8($KEYP) 295 lw $Q3,12($KEYP) 296 297 @{[aes32esmi4 $Q0,$T0,$T1,$T2,$T3]} 298 @{[aes32esmi4 $Q1,$T1,$T2,$T3,$T0]} 299 @{[aes32esmi4 $Q2,$T2,$T3,$T0,$T1]} 300 @{[aes32esmi4 $Q3,$T3,$T0,$T1,$T2]} 301 # now Q0~Q3 hold the new state 302 303 add $loopcntr,$loopcntr,-1 304 bgtz $loopcntr,1b 305 306# final two rounds 307 # Grab next key in schedule 308 add $KEYP,$KEYP,16 309 lw $T0,0($KEYP) 310 lw $T1,4($KEYP) 311 lw $T2,8($KEYP) 312 lw $T3,12($KEYP) 313 314 @{[aes32esmi4 $T0,$Q0,$Q1,$Q2,$Q3]} 315 @{[aes32esmi4 $T1,$Q1,$Q2,$Q3,$Q0]} 316 @{[aes32esmi4 $T2,$Q2,$Q3,$Q0,$Q1]} 317 @{[aes32esmi4 $T3,$Q3,$Q0,$Q1,$Q2]} 318 # now T0~T3 hold the new state 319 320 # Grab next key in schedule 321 add $KEYP,$KEYP,16 322 lw $Q0,0($KEYP) 323 lw $Q1,4($KEYP) 324 lw $Q2,8($KEYP) 325 lw $Q3,12($KEYP) 326 327 # no mix column now 328 @{[aes32esi4 $Q0,$T0,$T1,$T2,$T3]} 329 @{[aes32esi4 $Q1,$T1,$T2,$T3,$T0]} 330 @{[aes32esi4 $Q2,$T2,$T3,$T0,$T1]} 331 @{[aes32esi4 $Q3,$T3,$T0,$T1,$T2]} 332 # now Q0~Q3 hold the new state 333 334 sw $Q0,0($OUTP) 335 sw $Q1,4($OUTP) 336 sw $Q2,8($OUTP) 337 sw $Q3,12($OUTP) 338 339 # Pop registers and return 340___ 341 342$code .= load_regs(); 343 344$code .= <<___; 345 ret 346___ 347 348################################################################################ 349# void rv32i_zknd_decrypt(const unsigned char *in, unsigned char *out, 350# const AES_KEY *key); 351################################################################################ 352$code .= <<___; 353.text 354.balign 16 355.globl rv32i_zknd_decrypt 356.type rv32i_zknd_decrypt,\@function 357rv32i_zknd_decrypt: 358___ 359 360$code .= save_regs(); 361 362$code .= <<___; 363 # Load input to block cipher 364 lw $Q0,0($INP) 365 lw $Q1,4($INP) 366 lw $Q2,8($INP) 367 lw $Q3,12($INP) 368 369 # Load number of rounds 370 lw $loopcntr,240($KEYP) 371 372 # Load the last key 373 # use T0 as temporary now 374 slli $T0,$loopcntr,4 375 add $KEYP,$KEYP,$T0 376 # Load key 377 lw $T0,0($KEYP) 378 lw $T1,4($KEYP) 379 lw $T2,8($KEYP) 380 lw $T3,12($KEYP) 381 382 # initial transformation 383 xor $Q0,$Q0,$T0 384 xor $Q1,$Q1,$T1 385 xor $Q2,$Q2,$T2 386 xor $Q3,$Q3,$T3 387 388 # The main loop only executes the first N-2 rounds, each loop consumes two rounds 389 add $loopcntr,$loopcntr,-2 390 srli $loopcntr,$loopcntr,1 3911: 392 # Grab next key in schedule 393 add $KEYP,$KEYP,-16 394 lw $T0,0($KEYP) 395 lw $T1,4($KEYP) 396 lw $T2,8($KEYP) 397 lw $T3,12($KEYP) 398 399 @{[aes32dsmi4 $T0,$Q0,$Q3,$Q2,$Q1]} 400 @{[aes32dsmi4 $T1,$Q1,$Q0,$Q3,$Q2]} 401 @{[aes32dsmi4 $T2,$Q2,$Q1,$Q0,$Q3]} 402 @{[aes32dsmi4 $T3,$Q3,$Q2,$Q1,$Q0]} 403 # now T0~T3 hold the new state 404 405 # Grab next key in schedule 406 add $KEYP,$KEYP,-16 407 lw $Q0,0($KEYP) 408 lw $Q1,4($KEYP) 409 lw $Q2,8($KEYP) 410 lw $Q3,12($KEYP) 411 412 @{[aes32dsmi4 $Q0,$T0,$T3,$T2,$T1]} 413 @{[aes32dsmi4 $Q1,$T1,$T0,$T3,$T2]} 414 @{[aes32dsmi4 $Q2,$T2,$T1,$T0,$T3]} 415 @{[aes32dsmi4 $Q3,$T3,$T2,$T1,$T0]} 416 # now Q0~Q3 hold the new state 417 418 add $loopcntr,$loopcntr,-1 419 bgtz $loopcntr,1b 420 421# final two rounds 422 # Grab next key in schedule 423 add $KEYP,$KEYP,-16 424 lw $T0,0($KEYP) 425 lw $T1,4($KEYP) 426 lw $T2,8($KEYP) 427 lw $T3,12($KEYP) 428 429 @{[aes32dsmi4 $T0,$Q0,$Q3,$Q2,$Q1]} 430 @{[aes32dsmi4 $T1,$Q1,$Q0,$Q3,$Q2]} 431 @{[aes32dsmi4 $T2,$Q2,$Q1,$Q0,$Q3]} 432 @{[aes32dsmi4 $T3,$Q3,$Q2,$Q1,$Q0]} 433 # now T0~T3 hold the new state 434 435 # Grab next key in schedule 436 add $KEYP,$KEYP,-16 437 lw $Q0,0($KEYP) 438 lw $Q1,4($KEYP) 439 lw $Q2,8($KEYP) 440 lw $Q3,12($KEYP) 441 442 # no mix column now 443 @{[aes32dsi4 $Q0,$T0,$T3,$T2,$T1]} 444 @{[aes32dsi4 $Q1,$T1,$T0,$T3,$T2]} 445 @{[aes32dsi4 $Q2,$T2,$T1,$T0,$T3]} 446 @{[aes32dsi4 $Q3,$T3,$T2,$T1,$T0]} 447 # now Q0~Q3 hold the new state 448 449 sw $Q0,0($OUTP) 450 sw $Q1,4($OUTP) 451 sw $Q2,8($OUTP) 452 sw $Q3,12($OUTP) 453 454 # Pop registers and return 455___ 456 457$code .= load_regs(); 458 459$code .= <<___; 460 ret 461___ 462 463clear_regs(); 464 465################################################################################ 466# Register assignment for rv32i_zkn[e/d]_set_[en/de]crypt 467################################################################################ 468 469# Function arguments (x10-x12 are a0-a2 in the ABI) 470# Pointer to user key, number of bits in key, key pointer 471my ($UKEY,$BITS,$KEYP) = use_regs(10..12); 472 473# Temporaries 474my ($T0,$T1,$T2,$T3,$T4,$T5,$T6,$T7,$T8) = use_regs(13..17,28..31); 475 476################################################################################ 477# utility functions for rv32i_zkne_set_encrypt_key 478################################################################################ 479 480my @rcon = (0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36); 481 482# do 4 sbox on 4 bytes of rs, (possibly mix), then xor with rd 483sub sbox4 { 484 my $inst = shift; 485 my $rd = shift; 486 my $rs = shift; 487 my $ret = <<___; 488 @{[$inst->($rd,$rd,$rs,0)]} 489 @{[$inst->($rd,$rd,$rs,1)]} 490 @{[$inst->($rd,$rd,$rs,2)]} 491 @{[$inst->($rd,$rd,$rs,3)]} 492___ 493 return $ret; 494} 495 496sub fwdsbox4 { 497 return sbox4(\&aes32esi, @_); 498} 499 500sub ke128enc { 501 my $zbkb = shift; 502 my $rnum = 0; 503 my $ret = ''; 504$ret .= <<___; 505 lw $T0,0($UKEY) 506 lw $T1,4($UKEY) 507 lw $T2,8($UKEY) 508 lw $T3,12($UKEY) 509 510 sw $T0,0($KEYP) 511 sw $T1,4($KEYP) 512 sw $T2,8($KEYP) 513 sw $T3,12($KEYP) 514___ 515 while($rnum < 10) { 516$ret .= <<___; 517 # use T4 to store rcon 518 li $T4,$rcon[$rnum] 519 # as xor is associative and commutative 520 # we fist xor T0 with RCON, then use T0 to 521 # xor the result of each SBOX result of T3 522 xor $T0,$T0,$T4 523 # use T4 to store rotated T3 524___ 525 # right rotate by 8 526 if ($zbkb) { 527$ret .= <<___; 528 @{[rori $T4,$T3,8]} 529___ 530 } else { 531$ret .= <<___; 532 srli $T4,$T3,8 533 slli $T5,$T3,24 534 or $T4,$T4,$T5 535___ 536 } 537$ret .= <<___; 538 # update T0 539 @{[fwdsbox4 $T0,$T4]} 540 541 # update new T1~T3 542 xor $T1,$T1,$T0 543 xor $T2,$T2,$T1 544 xor $T3,$T3,$T2 545 546 add $KEYP,$KEYP,16 547 sw $T0,0($KEYP) 548 sw $T1,4($KEYP) 549 sw $T2,8($KEYP) 550 sw $T3,12($KEYP) 551___ 552 $rnum++; 553 } 554 return $ret; 555} 556 557sub ke192enc { 558 my $zbkb = shift; 559 my $rnum = 0; 560 my $ret = ''; 561$ret .= <<___; 562 lw $T0,0($UKEY) 563 lw $T1,4($UKEY) 564 lw $T2,8($UKEY) 565 lw $T3,12($UKEY) 566 lw $T4,16($UKEY) 567 lw $T5,20($UKEY) 568 569 sw $T0,0($KEYP) 570 sw $T1,4($KEYP) 571 sw $T2,8($KEYP) 572 sw $T3,12($KEYP) 573 sw $T4,16($KEYP) 574 sw $T5,20($KEYP) 575___ 576 while($rnum < 8) { 577$ret .= <<___; 578 # see the comment in ke128enc 579 li $T6,$rcon[$rnum] 580 xor $T0,$T0,$T6 581___ 582 # right rotate by 8 583 if ($zbkb) { 584$ret .= <<___; 585 @{[rori $T6,$T5,8]} 586___ 587 } else { 588$ret .= <<___; 589 srli $T6,$T5,8 590 slli $T7,$T5,24 591 or $T6,$T6,$T7 592___ 593 } 594$ret .= <<___; 595 @{[fwdsbox4 $T0,$T6]} 596 xor $T1,$T1,$T0 597 xor $T2,$T2,$T1 598 xor $T3,$T3,$T2 599___ 600 if ($rnum != 7) { 601 # note that (8+1)*24 = 216, (12+1)*16 = 208 602 # thus the last 8 bytes can be dropped 603$ret .= <<___; 604 xor $T4,$T4,$T3 605 xor $T5,$T5,$T4 606___ 607 } 608$ret .= <<___; 609 add $KEYP,$KEYP,24 610 sw $T0,0($KEYP) 611 sw $T1,4($KEYP) 612 sw $T2,8($KEYP) 613 sw $T3,12($KEYP) 614___ 615 if ($rnum != 7) { 616$ret .= <<___; 617 sw $T4,16($KEYP) 618 sw $T5,20($KEYP) 619___ 620 } 621 $rnum++; 622 } 623 return $ret; 624} 625 626sub ke256enc { 627 my $zbkb = shift; 628 my $rnum = 0; 629 my $ret = ''; 630$ret .= <<___; 631 lw $T0,0($UKEY) 632 lw $T1,4($UKEY) 633 lw $T2,8($UKEY) 634 lw $T3,12($UKEY) 635 lw $T4,16($UKEY) 636 lw $T5,20($UKEY) 637 lw $T6,24($UKEY) 638 lw $T7,28($UKEY) 639 640 sw $T0,0($KEYP) 641 sw $T1,4($KEYP) 642 sw $T2,8($KEYP) 643 sw $T3,12($KEYP) 644 sw $T4,16($KEYP) 645 sw $T5,20($KEYP) 646 sw $T6,24($KEYP) 647 sw $T7,28($KEYP) 648___ 649 while($rnum < 7) { 650$ret .= <<___; 651 # see the comment in ke128enc 652 li $T8,$rcon[$rnum] 653 xor $T0,$T0,$T8 654___ 655 # right rotate by 8 656 if ($zbkb) { 657$ret .= <<___; 658 @{[rori $T8,$T7,8]} 659___ 660 } else { 661$ret .= <<___; 662 srli $T8,$T7,8 663 slli $BITS,$T7,24 664 or $T8,$T8,$BITS 665___ 666 } 667$ret .= <<___; 668 @{[fwdsbox4 $T0,$T8]} 669 xor $T1,$T1,$T0 670 xor $T2,$T2,$T1 671 xor $T3,$T3,$T2 672 673 add $KEYP,$KEYP,32 674 sw $T0,0($KEYP) 675 sw $T1,4($KEYP) 676 sw $T2,8($KEYP) 677 sw $T3,12($KEYP) 678___ 679 if ($rnum != 6) { 680 # note that (7+1)*32 = 256, (14+1)*16 = 240 681 # thus the last 16 bytes can be dropped 682$ret .= <<___; 683 # for aes256, T3->T4 needs 4sbox but no rotate/rcon 684 @{[fwdsbox4 $T4,$T3]} 685 xor $T5,$T5,$T4 686 xor $T6,$T6,$T5 687 xor $T7,$T7,$T6 688 sw $T4,16($KEYP) 689 sw $T5,20($KEYP) 690 sw $T6,24($KEYP) 691 sw $T7,28($KEYP) 692___ 693 } 694 $rnum++; 695 } 696 return $ret; 697} 698 699################################################################################ 700# void rv32i_zkne_set_encrypt_key(const unsigned char *userKey, const int bits, 701# AES_KEY *key) 702################################################################################ 703sub AES_set_common { 704 my ($ke128, $ke192, $ke256) = @_; 705 my $ret = ''; 706$ret .= <<___; 707 bnez $UKEY,1f # if (!userKey || !key) return -1; 708 bnez $KEYP,1f 709 li a0,-1 710 ret 7111: 712 # Determine number of rounds from key size in bits 713 li $T0,128 714 bne $BITS,$T0,1f 715 li $T1,10 # key->rounds = 10 if bits == 128 716 sw $T1,240($KEYP) # store key->rounds 717$ke128 718 j 4f 7191: 720 li $T0,192 721 bne $BITS,$T0,2f 722 li $T1,12 # key->rounds = 12 if bits == 192 723 sw $T1,240($KEYP) # store key->rounds 724$ke192 725 j 4f 7262: 727 li $T1,14 # key->rounds = 14 if bits == 256 728 li $T0,256 729 beq $BITS,$T0,3f 730 li a0,-2 # If bits != 128, 192, or 256, return -2 731 j 5f 7323: 733 sw $T1,240($KEYP) # store key->rounds 734$ke256 7354: # return 0 736 li a0,0 7375: # return a0 738___ 739 return $ret; 740} 741$code .= <<___; 742.text 743.balign 16 744.globl rv32i_zkne_set_encrypt_key 745.type rv32i_zkne_set_encrypt_key,\@function 746rv32i_zkne_set_encrypt_key: 747___ 748 749$code .= save_regs(); 750$code .= AES_set_common(ke128enc(0), ke192enc(0),ke256enc(0)); 751$code .= load_regs(); 752$code .= <<___; 753 ret 754___ 755 756################################################################################ 757# void rv32i_zbkb_zkne_set_encrypt_key(const unsigned char *userKey, 758# const int bits, AES_KEY *key) 759################################################################################ 760$code .= <<___; 761.text 762.balign 16 763.globl rv32i_zbkb_zkne_set_encrypt_key 764.type rv32i_zbkb_zkne_set_encrypt_key,\@function 765rv32i_zbkb_zkne_set_encrypt_key: 766___ 767 768$code .= save_regs(); 769$code .= AES_set_common(ke128enc(1), ke192enc(1),ke256enc(1)); 770$code .= load_regs(); 771$code .= <<___; 772 ret 773___ 774 775################################################################################ 776# utility functions for rv32i_zknd_zkne_set_decrypt_key 777################################################################################ 778 779sub invm4 { 780 # fwd sbox then inv sbox then mix column 781 # the result is only mix column 782 # this simulates aes64im T0 783 my $rd = shift; 784 my $tmp = shift; 785 my $rs = shift; 786 my $ret = <<___; 787 li $tmp,0 788 li $rd,0 789 @{[fwdsbox4 $tmp,$rs]} 790 @{[sbox4(\&aes32dsmi, $rd,$tmp)]} 791___ 792 return $ret; 793} 794 795sub ke128dec { 796 my $zbkb = shift; 797 my $rnum = 0; 798 my $ret = ''; 799$ret .= <<___; 800 lw $T0,0($UKEY) 801 lw $T1,4($UKEY) 802 lw $T2,8($UKEY) 803 lw $T3,12($UKEY) 804 805 sw $T0,0($KEYP) 806 sw $T1,4($KEYP) 807 sw $T2,8($KEYP) 808 sw $T3,12($KEYP) 809___ 810 while($rnum < 10) { 811$ret .= <<___; 812 # see comments in ke128enc 813 li $T4,$rcon[$rnum] 814 xor $T0,$T0,$T4 815___ 816 # right rotate by 8 817 if ($zbkb) { 818$ret .= <<___; 819 @{[rori $T4,$T3,8]} 820___ 821 } else { 822$ret .= <<___; 823 srli $T4,$T3,8 824 slli $T5,$T3,24 825 or $T4,$T4,$T5 826___ 827 } 828$ret .= <<___; 829 @{[fwdsbox4 $T0,$T4]} 830 xor $T1,$T1,$T0 831 xor $T2,$T2,$T1 832 xor $T3,$T3,$T2 833 add $KEYP,$KEYP,16 834___ 835 # need to mixcolumn only for [1:N-1] round keys 836 # this is from the fact that aes32dsmi subwords first then mix column 837 # intuitively decryption needs to first mix column then subwords 838 # however, for merging datapaths (encryption first subwords then mix column) 839 # aes32dsmi chooses to inverse the order of them, thus 840 # transform should then be done on the round key 841 if ($rnum < 9) { 842$ret .= <<___; 843 # T4 and T5 are temp variables 844 @{[invm4 $T5,$T4,$T0]} 845 sw $T5,0($KEYP) 846 @{[invm4 $T5,$T4,$T1]} 847 sw $T5,4($KEYP) 848 @{[invm4 $T5,$T4,$T2]} 849 sw $T5,8($KEYP) 850 @{[invm4 $T5,$T4,$T3]} 851 sw $T5,12($KEYP) 852___ 853 } else { 854$ret .= <<___; 855 sw $T0,0($KEYP) 856 sw $T1,4($KEYP) 857 sw $T2,8($KEYP) 858 sw $T3,12($KEYP) 859___ 860 } 861 $rnum++; 862 } 863 return $ret; 864} 865 866sub ke192dec { 867 my $zbkb = shift; 868 my $rnum = 0; 869 my $ret = ''; 870$ret .= <<___; 871 lw $T0,0($UKEY) 872 lw $T1,4($UKEY) 873 lw $T2,8($UKEY) 874 lw $T3,12($UKEY) 875 lw $T4,16($UKEY) 876 lw $T5,20($UKEY) 877 878 sw $T0,0($KEYP) 879 sw $T1,4($KEYP) 880 sw $T2,8($KEYP) 881 sw $T3,12($KEYP) 882 # see the comment in ke128dec 883 # T7 and T6 are temp variables 884 @{[invm4 $T7,$T6,$T4]} 885 sw $T7,16($KEYP) 886 @{[invm4 $T7,$T6,$T5]} 887 sw $T7,20($KEYP) 888___ 889 while($rnum < 8) { 890$ret .= <<___; 891 # see the comment in ke128enc 892 li $T6,$rcon[$rnum] 893 xor $T0,$T0,$T6 894___ 895 # right rotate by 8 896 if ($zbkb) { 897$ret .= <<___; 898 @{[rori $T6,$T5,8]} 899___ 900 } else { 901$ret .= <<___; 902 srli $T6,$T5,8 903 slli $T7,$T5,24 904 or $T6,$T6,$T7 905___ 906 } 907$ret .= <<___; 908 @{[fwdsbox4 $T0,$T6]} 909 xor $T1,$T1,$T0 910 xor $T2,$T2,$T1 911 xor $T3,$T3,$T2 912 913 add $KEYP,$KEYP,24 914___ 915 if ($rnum < 7) { 916$ret .= <<___; 917 xor $T4,$T4,$T3 918 xor $T5,$T5,$T4 919 920 # see the comment in ke128dec 921 # T7 and T6 are temp variables 922 @{[invm4 $T7,$T6,$T0]} 923 sw $T7,0($KEYP) 924 @{[invm4 $T7,$T6,$T1]} 925 sw $T7,4($KEYP) 926 @{[invm4 $T7,$T6,$T2]} 927 sw $T7,8($KEYP) 928 @{[invm4 $T7,$T6,$T3]} 929 sw $T7,12($KEYP) 930 @{[invm4 $T7,$T6,$T4]} 931 sw $T7,16($KEYP) 932 @{[invm4 $T7,$T6,$T5]} 933 sw $T7,20($KEYP) 934___ 935 } else { # rnum == 7 936$ret .= <<___; 937 # the reason for dropping T4/T5 is in ke192enc 938 # the reason for not invm4 is in ke128dec 939 sw $T0,0($KEYP) 940 sw $T1,4($KEYP) 941 sw $T2,8($KEYP) 942 sw $T3,12($KEYP) 943___ 944 } 945 $rnum++; 946 } 947 return $ret; 948} 949 950sub ke256dec { 951 my $zbkb = shift; 952 my $rnum = 0; 953 my $ret = ''; 954$ret .= <<___; 955 lw $T0,0($UKEY) 956 lw $T1,4($UKEY) 957 lw $T2,8($UKEY) 958 lw $T3,12($UKEY) 959 lw $T4,16($UKEY) 960 lw $T5,20($UKEY) 961 lw $T6,24($UKEY) 962 lw $T7,28($UKEY) 963 964 sw $T0,0($KEYP) 965 sw $T1,4($KEYP) 966 sw $T2,8($KEYP) 967 sw $T3,12($KEYP) 968 # see the comment in ke128dec 969 # BITS and T8 are temp variables 970 # BITS are not used anymore 971 @{[invm4 $T8,$BITS,$T4]} 972 sw $T8,16($KEYP) 973 @{[invm4 $T8,$BITS,$T5]} 974 sw $T8,20($KEYP) 975 @{[invm4 $T8,$BITS,$T6]} 976 sw $T8,24($KEYP) 977 @{[invm4 $T8,$BITS,$T7]} 978 sw $T8,28($KEYP) 979___ 980 while($rnum < 7) { 981$ret .= <<___; 982 # see the comment in ke128enc 983 li $T8,$rcon[$rnum] 984 xor $T0,$T0,$T8 985___ 986 # right rotate by 8 987 if ($zbkb) { 988$ret .= <<___; 989 @{[rori $T8,$T7,8]} 990___ 991 } else { 992$ret .= <<___; 993 srli $T8,$T7,8 994 slli $BITS,$T7,24 995 or $T8,$T8,$BITS 996___ 997 } 998$ret .= <<___; 999 @{[fwdsbox4 $T0,$T8]} 1000 xor $T1,$T1,$T0 1001 xor $T2,$T2,$T1 1002 xor $T3,$T3,$T2 1003 1004 add $KEYP,$KEYP,32 1005___ 1006 if ($rnum < 6) { 1007$ret .= <<___; 1008 # for aes256, T3->T4 needs 4sbox but no rotate/rcon 1009 @{[fwdsbox4 $T4,$T3]} 1010 xor $T5,$T5,$T4 1011 xor $T6,$T6,$T5 1012 xor $T7,$T7,$T6 1013 1014 # see the comment in ke128dec 1015 # T8 and BITS are temp variables 1016 @{[invm4 $T8,$BITS,$T0]} 1017 sw $T8,0($KEYP) 1018 @{[invm4 $T8,$BITS,$T1]} 1019 sw $T8,4($KEYP) 1020 @{[invm4 $T8,$BITS,$T2]} 1021 sw $T8,8($KEYP) 1022 @{[invm4 $T8,$BITS,$T3]} 1023 sw $T8,12($KEYP) 1024 @{[invm4 $T8,$BITS,$T4]} 1025 sw $T8,16($KEYP) 1026 @{[invm4 $T8,$BITS,$T5]} 1027 sw $T8,20($KEYP) 1028 @{[invm4 $T8,$BITS,$T6]} 1029 sw $T8,24($KEYP) 1030 @{[invm4 $T8,$BITS,$T7]} 1031 sw $T8,28($KEYP) 1032___ 1033 } else { 1034$ret .= <<___; 1035 sw $T0,0($KEYP) 1036 sw $T1,4($KEYP) 1037 sw $T2,8($KEYP) 1038 sw $T3,12($KEYP) 1039 # last 16 bytes are dropped 1040 # see the comment in ke256enc 1041___ 1042 } 1043 $rnum++; 1044 } 1045 return $ret; 1046} 1047 1048################################################################################ 1049# void rv32i_zknd_zkne_set_decrypt_key(const unsigned char *userKey, const int bits, 1050# AES_KEY *key) 1051################################################################################ 1052# a note on naming: set_decrypt_key needs aes32esi thus add zkne on name 1053$code .= <<___; 1054.text 1055.balign 16 1056.globl rv32i_zknd_zkne_set_decrypt_key 1057.type rv32i_zknd_zkne_set_decrypt_key,\@function 1058rv32i_zknd_zkne_set_decrypt_key: 1059___ 1060$code .= save_regs(); 1061$code .= AES_set_common(ke128dec(0), ke192dec(0),ke256dec(0)); 1062$code .= load_regs(); 1063$code .= <<___; 1064 ret 1065___ 1066 1067################################################################################ 1068# void rv32i_zbkb_zknd_zkne_set_decrypt_key(const unsigned char *userKey, 1069# const int bits, AES_KEY *key) 1070################################################################################ 1071$code .= <<___; 1072.text 1073.balign 16 1074.globl rv32i_zbkb_zknd_zkne_set_decrypt_key 1075.type rv32i_zbkb_zknd_zkne_set_decrypt_key,\@function 1076rv32i_zbkb_zknd_zkne_set_decrypt_key: 1077___ 1078 1079$code .= save_regs(); 1080$code .= AES_set_common(ke128dec(1), ke192dec(1),ke256dec(1)); 1081$code .= load_regs(); 1082$code .= <<___; 1083 ret 1084___ 1085 1086 1087 1088print $code; 1089close STDOUT or die "error closing STDOUT: $!"; 1090