xref: /openssl/crypto/aes/asm/aes-riscv32-zkn.pl (revision da1c088f)
1#! /usr/bin/env perl
2# This file is dual-licensed, meaning that you can use it under your
3# choice of either of the following two licenses:
4#
5# Copyright 2022-2023 The OpenSSL Project Authors. All Rights Reserved.
6#
7# Licensed under the Apache License 2.0 (the "License"). You can obtain
8# a copy in the file LICENSE in the source distribution or at
9# https://www.openssl.org/source/license.html
10#
11# or
12#
13# Copyright (c) 2022, Hongren (Zenithal) Zheng <i@zenithal.me>
14# All rights reserved.
15#
16# Redistribution and use in source and binary forms, with or without
17# modification, are permitted provided that the following conditions
18# are met:
19# 1. Redistributions of source code must retain the above copyright
20#    notice, this list of conditions and the following disclaimer.
21# 2. Redistributions in binary form must reproduce the above copyright
22#    notice, this list of conditions and the following disclaimer in the
23#    documentation and/or other materials provided with the distribution.
24#
25# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
26# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
27# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
28# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
29# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
30# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
31# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
32# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
33# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
34# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
35# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
36
37# $output is the last argument if it looks like a file (it has an extension)
38# $flavour is the first argument if it doesn't look like a file
39$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef;
40$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef;
41
42$output and open STDOUT,">$output";
43
44################################################################################
45# Utility functions to help with keeping track of which registers to stack/
46# unstack when entering / exiting routines.
47################################################################################
48{
49    # Callee-saved registers
50    my @callee_saved = map("x$_",(2,8,9,18..27));
51    # Caller-saved registers
52    my @caller_saved = map("x$_",(1,5..7,10..17,28..31));
53    my @must_save;
54    sub use_reg {
55        my $reg = shift;
56        if (grep(/^$reg$/, @callee_saved)) {
57            push(@must_save, $reg);
58        } elsif (!grep(/^$reg$/, @caller_saved)) {
59            # Register is not usable!
60            die("Unusable register ".$reg);
61        }
62        return $reg;
63    }
64    sub use_regs {
65        return map(use_reg("x$_"), @_);
66    }
67    sub save_regs {
68        my $ret = '';
69        my $stack_reservation = ($#must_save + 1) * 8;
70        my $stack_offset = $stack_reservation;
71        if ($stack_reservation % 16) {
72            $stack_reservation += 8;
73        }
74        $ret.="    addi    sp,sp,-$stack_reservation\n";
75        foreach (@must_save) {
76            $stack_offset -= 8;
77            $ret.="    sw      $_,$stack_offset(sp)\n";
78        }
79        return $ret;
80    }
81    sub load_regs {
82        my $ret = '';
83        my $stack_reservation = ($#must_save + 1) * 8;
84        my $stack_offset = $stack_reservation;
85        if ($stack_reservation % 16) {
86            $stack_reservation += 8;
87        }
88        foreach (@must_save) {
89            $stack_offset -= 8;
90            $ret.="    lw      $_,$stack_offset(sp)\n";
91        }
92        $ret.="    addi    sp,sp,$stack_reservation\n";
93        return $ret;
94    }
95    sub clear_regs {
96        @must_save = ();
97    }
98}
99
100################################################################################
101# util for encoding scalar crypto extension instructions
102################################################################################
103
104my @regs = map("x$_",(0..31));
105my %reglookup;
106@reglookup{@regs} = @regs;
107
108# Takes a register name, possibly an alias, and converts it to a register index
109# from 0 to 31
110sub read_reg {
111    my $reg = lc shift;
112    if (!exists($reglookup{$reg})) {
113        die("Unknown register ".$reg);
114    }
115    my $regstr = $reglookup{$reg};
116    if (!($regstr =~ /^x([0-9]+)$/)) {
117        die("Could not process register ".$reg);
118    }
119    return $1;
120}
121
122sub aes32dsi {
123    # Encoding for aes32dsi rd, rs1, rs2, bs instruction on RV32
124    #                bs_XXXXX_ rs2 _ rs1 _XXX_ rd  _XXXXXXX
125    my $template = 0b00_10101_00000_00000_000_00000_0110011;
126    my $rd = read_reg shift;
127    my $rs1 = read_reg shift;
128    my $rs2 = read_reg shift;
129    my $bs = shift;
130
131    return ".word ".($template | ($bs << 30) | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7));
132}
133
134sub aes32dsmi {
135    # Encoding for aes32dsmi rd, rs1, rs2, bs instruction on RV32
136    #                bs_XXXXX_ rs2 _ rs1 _XXX_ rd  _XXXXXXX
137    my $template = 0b00_10111_00000_00000_000_00000_0110011;
138    my $rd = read_reg shift;
139    my $rs1 = read_reg shift;
140    my $rs2 = read_reg shift;
141    my $bs = shift;
142
143    return ".word ".($template | ($bs << 30) | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7));
144}
145
146sub aes32esi {
147    # Encoding for aes32esi rd, rs1, rs2, bs instruction on RV32
148    #                bs_XXXXX_ rs2 _ rs1 _XXX_ rd  _XXXXXXX
149    my $template = 0b00_10001_00000_00000_000_00000_0110011;
150    my $rd = read_reg shift;
151    my $rs1 = read_reg shift;
152    my $rs2 = read_reg shift;
153    my $bs = shift;
154
155    return ".word ".($template | ($bs << 30) | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7));
156}
157
158sub aes32esmi {
159    # Encoding for aes32esmi rd, rs1, rs2, bs instruction on RV32
160    #                bs_XXXXX_ rs2 _ rs1 _XXX_ rd  _XXXXXXX
161    my $template = 0b00_10011_00000_00000_000_00000_0110011;
162    my $rd = read_reg shift;
163    my $rs1 = read_reg shift;
164    my $rs2 = read_reg shift;
165    my $bs = shift;
166
167    return ".word ".($template | ($bs << 30) | ($rs2 << 20) | ($rs1 << 15) | ($rd << 7));
168}
169
170sub rori {
171    # Encoding for ror rd, rs1, imm instruction on RV64
172    #                XXXXXXX_shamt_ rs1 _XXX_ rd  _XXXXXXX
173    my $template = 0b0110000_00000_00000_101_00000_0010011;
174    my $rd = read_reg shift;
175    my $rs1 = read_reg shift;
176    my $shamt = shift;
177
178    return ".word ".($template | ($shamt << 20) | ($rs1 << 15) | ($rd << 7));
179}
180
181################################################################################
182# Register assignment for rv32i_zkne_encrypt and rv32i_zknd_decrypt
183################################################################################
184
185# Registers initially to hold AES state (called s0-s3 or y0-y3 elsewhere)
186my ($Q0,$Q1,$Q2,$Q3) = use_regs(6..9);
187
188# Function arguments (x10-x12 are a0-a2 in the ABI)
189# Input block pointer, output block pointer, key pointer
190my ($INP,$OUTP,$KEYP) = use_regs(10..12);
191
192# Registers initially to hold Key
193my ($T0,$T1,$T2,$T3) = use_regs(13..16);
194
195# Loop counter
196my ($loopcntr) = use_regs(30);
197
198################################################################################
199# Utility for rv32i_zkne_encrypt and rv32i_zknd_decrypt
200################################################################################
201
202# outer product of whole state into one column of key
203sub outer {
204    my $inst = shift;
205    my $key = shift;
206    # state 0 to 3
207    my $s0 = shift;
208    my $s1 = shift;
209    my $s2 = shift;
210    my $s3 = shift;
211    my $ret = '';
212$ret .= <<___;
213    @{[$inst->($key,$key,$s0,0)]}
214    @{[$inst->($key,$key,$s1,1)]}
215    @{[$inst->($key,$key,$s2,2)]}
216    @{[$inst->($key,$key,$s3,3)]}
217___
218    return $ret;
219}
220
221sub aes32esmi4 {
222    return outer(\&aes32esmi, @_)
223}
224
225sub aes32esi4 {
226    return outer(\&aes32esi, @_)
227}
228
229sub aes32dsmi4 {
230    return outer(\&aes32dsmi, @_)
231}
232
233sub aes32dsi4 {
234    return outer(\&aes32dsi, @_)
235}
236
237################################################################################
238# void rv32i_zkne_encrypt(const unsigned char *in, unsigned char *out,
239#   const AES_KEY *key);
240################################################################################
241my $code .= <<___;
242.text
243.balign 16
244.globl rv32i_zkne_encrypt
245.type   rv32i_zkne_encrypt,\@function
246rv32i_zkne_encrypt:
247___
248
249$code .= save_regs();
250
251$code .= <<___;
252    # Load input to block cipher
253    lw      $Q0,0($INP)
254    lw      $Q1,4($INP)
255    lw      $Q2,8($INP)
256    lw      $Q3,12($INP)
257
258    # Load key
259    lw      $T0,0($KEYP)
260    lw      $T1,4($KEYP)
261    lw      $T2,8($KEYP)
262    lw      $T3,12($KEYP)
263
264    # Load number of rounds
265    lw      $loopcntr,240($KEYP)
266
267    # initial transformation
268    xor     $Q0,$Q0,$T0
269    xor     $Q1,$Q1,$T1
270    xor     $Q2,$Q2,$T2
271    xor     $Q3,$Q3,$T3
272
273    # The main loop only executes the first N-2 rounds, each loop consumes two rounds
274    add     $loopcntr,$loopcntr,-2
275    srli    $loopcntr,$loopcntr,1
2761:
277    # Grab next key in schedule
278    add     $KEYP,$KEYP,16
279    lw      $T0,0($KEYP)
280    lw      $T1,4($KEYP)
281    lw      $T2,8($KEYP)
282    lw      $T3,12($KEYP)
283
284    @{[aes32esmi4 $T0,$Q0,$Q1,$Q2,$Q3]}
285    @{[aes32esmi4 $T1,$Q1,$Q2,$Q3,$Q0]}
286    @{[aes32esmi4 $T2,$Q2,$Q3,$Q0,$Q1]}
287    @{[aes32esmi4 $T3,$Q3,$Q0,$Q1,$Q2]}
288    # now T0~T3 hold the new state
289
290    # Grab next key in schedule
291    add     $KEYP,$KEYP,16
292    lw      $Q0,0($KEYP)
293    lw      $Q1,4($KEYP)
294    lw      $Q2,8($KEYP)
295    lw      $Q3,12($KEYP)
296
297    @{[aes32esmi4 $Q0,$T0,$T1,$T2,$T3]}
298    @{[aes32esmi4 $Q1,$T1,$T2,$T3,$T0]}
299    @{[aes32esmi4 $Q2,$T2,$T3,$T0,$T1]}
300    @{[aes32esmi4 $Q3,$T3,$T0,$T1,$T2]}
301    # now Q0~Q3 hold the new state
302
303    add     $loopcntr,$loopcntr,-1
304    bgtz    $loopcntr,1b
305
306# final two rounds
307    # Grab next key in schedule
308    add     $KEYP,$KEYP,16
309    lw      $T0,0($KEYP)
310    lw      $T1,4($KEYP)
311    lw      $T2,8($KEYP)
312    lw      $T3,12($KEYP)
313
314    @{[aes32esmi4 $T0,$Q0,$Q1,$Q2,$Q3]}
315    @{[aes32esmi4 $T1,$Q1,$Q2,$Q3,$Q0]}
316    @{[aes32esmi4 $T2,$Q2,$Q3,$Q0,$Q1]}
317    @{[aes32esmi4 $T3,$Q3,$Q0,$Q1,$Q2]}
318    # now T0~T3 hold the new state
319
320    # Grab next key in schedule
321    add     $KEYP,$KEYP,16
322    lw      $Q0,0($KEYP)
323    lw      $Q1,4($KEYP)
324    lw      $Q2,8($KEYP)
325    lw      $Q3,12($KEYP)
326
327    # no mix column now
328    @{[aes32esi4 $Q0,$T0,$T1,$T2,$T3]}
329    @{[aes32esi4 $Q1,$T1,$T2,$T3,$T0]}
330    @{[aes32esi4 $Q2,$T2,$T3,$T0,$T1]}
331    @{[aes32esi4 $Q3,$T3,$T0,$T1,$T2]}
332    # now Q0~Q3 hold the new state
333
334    sw      $Q0,0($OUTP)
335    sw      $Q1,4($OUTP)
336    sw      $Q2,8($OUTP)
337    sw      $Q3,12($OUTP)
338
339    # Pop registers and return
340___
341
342$code .= load_regs();
343
344$code .= <<___;
345    ret
346___
347
348################################################################################
349# void rv32i_zknd_decrypt(const unsigned char *in, unsigned char *out,
350#   const AES_KEY *key);
351################################################################################
352$code .= <<___;
353.text
354.balign 16
355.globl rv32i_zknd_decrypt
356.type   rv32i_zknd_decrypt,\@function
357rv32i_zknd_decrypt:
358___
359
360$code .= save_regs();
361
362$code .= <<___;
363    # Load input to block cipher
364    lw      $Q0,0($INP)
365    lw      $Q1,4($INP)
366    lw      $Q2,8($INP)
367    lw      $Q3,12($INP)
368
369    # Load number of rounds
370    lw      $loopcntr,240($KEYP)
371
372    # Load the last key
373    # use T0 as temporary now
374    slli    $T0,$loopcntr,4
375    add     $KEYP,$KEYP,$T0
376    # Load key
377    lw      $T0,0($KEYP)
378    lw      $T1,4($KEYP)
379    lw      $T2,8($KEYP)
380    lw      $T3,12($KEYP)
381
382    # initial transformation
383    xor     $Q0,$Q0,$T0
384    xor     $Q1,$Q1,$T1
385    xor     $Q2,$Q2,$T2
386    xor     $Q3,$Q3,$T3
387
388    # The main loop only executes the first N-2 rounds, each loop consumes two rounds
389    add     $loopcntr,$loopcntr,-2
390    srli    $loopcntr,$loopcntr,1
3911:
392    # Grab next key in schedule
393    add     $KEYP,$KEYP,-16
394    lw      $T0,0($KEYP)
395    lw      $T1,4($KEYP)
396    lw      $T2,8($KEYP)
397    lw      $T3,12($KEYP)
398
399    @{[aes32dsmi4 $T0,$Q0,$Q3,$Q2,$Q1]}
400    @{[aes32dsmi4 $T1,$Q1,$Q0,$Q3,$Q2]}
401    @{[aes32dsmi4 $T2,$Q2,$Q1,$Q0,$Q3]}
402    @{[aes32dsmi4 $T3,$Q3,$Q2,$Q1,$Q0]}
403    # now T0~T3 hold the new state
404
405    # Grab next key in schedule
406    add     $KEYP,$KEYP,-16
407    lw      $Q0,0($KEYP)
408    lw      $Q1,4($KEYP)
409    lw      $Q2,8($KEYP)
410    lw      $Q3,12($KEYP)
411
412    @{[aes32dsmi4 $Q0,$T0,$T3,$T2,$T1]}
413    @{[aes32dsmi4 $Q1,$T1,$T0,$T3,$T2]}
414    @{[aes32dsmi4 $Q2,$T2,$T1,$T0,$T3]}
415    @{[aes32dsmi4 $Q3,$T3,$T2,$T1,$T0]}
416    # now Q0~Q3 hold the new state
417
418    add     $loopcntr,$loopcntr,-1
419    bgtz    $loopcntr,1b
420
421# final two rounds
422    # Grab next key in schedule
423    add     $KEYP,$KEYP,-16
424    lw      $T0,0($KEYP)
425    lw      $T1,4($KEYP)
426    lw      $T2,8($KEYP)
427    lw      $T3,12($KEYP)
428
429    @{[aes32dsmi4 $T0,$Q0,$Q3,$Q2,$Q1]}
430    @{[aes32dsmi4 $T1,$Q1,$Q0,$Q3,$Q2]}
431    @{[aes32dsmi4 $T2,$Q2,$Q1,$Q0,$Q3]}
432    @{[aes32dsmi4 $T3,$Q3,$Q2,$Q1,$Q0]}
433    # now T0~T3 hold the new state
434
435    # Grab next key in schedule
436    add     $KEYP,$KEYP,-16
437    lw      $Q0,0($KEYP)
438    lw      $Q1,4($KEYP)
439    lw      $Q2,8($KEYP)
440    lw      $Q3,12($KEYP)
441
442    # no mix column now
443    @{[aes32dsi4 $Q0,$T0,$T3,$T2,$T1]}
444    @{[aes32dsi4 $Q1,$T1,$T0,$T3,$T2]}
445    @{[aes32dsi4 $Q2,$T2,$T1,$T0,$T3]}
446    @{[aes32dsi4 $Q3,$T3,$T2,$T1,$T0]}
447    # now Q0~Q3 hold the new state
448
449    sw      $Q0,0($OUTP)
450    sw      $Q1,4($OUTP)
451    sw      $Q2,8($OUTP)
452    sw      $Q3,12($OUTP)
453
454    # Pop registers and return
455___
456
457$code .= load_regs();
458
459$code .= <<___;
460    ret
461___
462
463clear_regs();
464
465################################################################################
466# Register assignment for rv32i_zkn[e/d]_set_[en/de]crypt
467################################################################################
468
469# Function arguments (x10-x12 are a0-a2 in the ABI)
470# Pointer to user key, number of bits in key, key pointer
471my ($UKEY,$BITS,$KEYP) = use_regs(10..12);
472
473# Temporaries
474my ($T0,$T1,$T2,$T3,$T4,$T5,$T6,$T7,$T8) = use_regs(13..17,28..31);
475
476################################################################################
477# utility functions for rv32i_zkne_set_encrypt_key
478################################################################################
479
480my @rcon = (0x01, 0x02, 0x04, 0x08, 0x10, 0x20, 0x40, 0x80, 0x1b, 0x36);
481
482# do 4 sbox on 4 bytes of rs, (possibly mix), then xor with rd
483sub sbox4 {
484    my $inst = shift;
485    my $rd = shift;
486    my $rs = shift;
487    my $ret = <<___;
488    @{[$inst->($rd,$rd,$rs,0)]}
489    @{[$inst->($rd,$rd,$rs,1)]}
490    @{[$inst->($rd,$rd,$rs,2)]}
491    @{[$inst->($rd,$rd,$rs,3)]}
492___
493    return $ret;
494}
495
496sub fwdsbox4 {
497    return sbox4(\&aes32esi, @_);
498}
499
500sub ke128enc {
501    my $zbkb = shift;
502    my $rnum = 0;
503    my $ret = '';
504$ret .= <<___;
505    lw      $T0,0($UKEY)
506    lw      $T1,4($UKEY)
507    lw      $T2,8($UKEY)
508    lw      $T3,12($UKEY)
509
510    sw      $T0,0($KEYP)
511    sw      $T1,4($KEYP)
512    sw      $T2,8($KEYP)
513    sw      $T3,12($KEYP)
514___
515    while($rnum < 10) {
516$ret .= <<___;
517    # use T4 to store rcon
518    li      $T4,$rcon[$rnum]
519    # as xor is associative and commutative
520    # we fist xor T0 with RCON, then use T0 to
521    # xor the result of each SBOX result of T3
522    xor     $T0,$T0,$T4
523    # use T4 to store rotated T3
524___
525        # right rotate by 8
526        if ($zbkb) {
527$ret .= <<___;
528    @{[rori    $T4,$T3,8]}
529___
530        } else {
531$ret .= <<___;
532    srli    $T4,$T3,8
533    slli    $T5,$T3,24
534    or      $T4,$T4,$T5
535___
536        }
537$ret .= <<___;
538    # update T0
539    @{[fwdsbox4 $T0,$T4]}
540
541    # update new T1~T3
542    xor     $T1,$T1,$T0
543    xor     $T2,$T2,$T1
544    xor     $T3,$T3,$T2
545
546    add     $KEYP,$KEYP,16
547    sw      $T0,0($KEYP)
548    sw      $T1,4($KEYP)
549    sw      $T2,8($KEYP)
550    sw      $T3,12($KEYP)
551___
552        $rnum++;
553    }
554    return $ret;
555}
556
557sub ke192enc {
558    my $zbkb = shift;
559    my $rnum = 0;
560    my $ret = '';
561$ret .= <<___;
562    lw      $T0,0($UKEY)
563    lw      $T1,4($UKEY)
564    lw      $T2,8($UKEY)
565    lw      $T3,12($UKEY)
566    lw      $T4,16($UKEY)
567    lw      $T5,20($UKEY)
568
569    sw      $T0,0($KEYP)
570    sw      $T1,4($KEYP)
571    sw      $T2,8($KEYP)
572    sw      $T3,12($KEYP)
573    sw      $T4,16($KEYP)
574    sw      $T5,20($KEYP)
575___
576    while($rnum < 8) {
577$ret .= <<___;
578    # see the comment in ke128enc
579    li      $T6,$rcon[$rnum]
580    xor     $T0,$T0,$T6
581___
582        # right rotate by 8
583        if ($zbkb) {
584$ret .= <<___;
585    @{[rori    $T6,$T5,8]}
586___
587        } else {
588$ret .= <<___;
589    srli    $T6,$T5,8
590    slli    $T7,$T5,24
591    or      $T6,$T6,$T7
592___
593        }
594$ret .= <<___;
595    @{[fwdsbox4 $T0,$T6]}
596    xor     $T1,$T1,$T0
597    xor     $T2,$T2,$T1
598    xor     $T3,$T3,$T2
599___
600        if ($rnum != 7) {
601        # note that (8+1)*24 = 216, (12+1)*16 = 208
602        # thus the last 8 bytes can be dropped
603$ret .= <<___;
604    xor     $T4,$T4,$T3
605    xor     $T5,$T5,$T4
606___
607        }
608$ret .= <<___;
609    add     $KEYP,$KEYP,24
610    sw      $T0,0($KEYP)
611    sw      $T1,4($KEYP)
612    sw      $T2,8($KEYP)
613    sw      $T3,12($KEYP)
614___
615        if ($rnum != 7) {
616$ret .= <<___;
617    sw      $T4,16($KEYP)
618    sw      $T5,20($KEYP)
619___
620        }
621        $rnum++;
622    }
623    return $ret;
624}
625
626sub ke256enc {
627    my $zbkb = shift;
628    my $rnum = 0;
629    my $ret = '';
630$ret .= <<___;
631    lw      $T0,0($UKEY)
632    lw      $T1,4($UKEY)
633    lw      $T2,8($UKEY)
634    lw      $T3,12($UKEY)
635    lw      $T4,16($UKEY)
636    lw      $T5,20($UKEY)
637    lw      $T6,24($UKEY)
638    lw      $T7,28($UKEY)
639
640    sw      $T0,0($KEYP)
641    sw      $T1,4($KEYP)
642    sw      $T2,8($KEYP)
643    sw      $T3,12($KEYP)
644    sw      $T4,16($KEYP)
645    sw      $T5,20($KEYP)
646    sw      $T6,24($KEYP)
647    sw      $T7,28($KEYP)
648___
649    while($rnum < 7) {
650$ret .= <<___;
651    # see the comment in ke128enc
652    li      $T8,$rcon[$rnum]
653    xor     $T0,$T0,$T8
654___
655        # right rotate by 8
656        if ($zbkb) {
657$ret .= <<___;
658    @{[rori    $T8,$T7,8]}
659___
660        } else {
661$ret .= <<___;
662    srli    $T8,$T7,8
663    slli    $BITS,$T7,24
664    or      $T8,$T8,$BITS
665___
666        }
667$ret .= <<___;
668    @{[fwdsbox4 $T0,$T8]}
669    xor     $T1,$T1,$T0
670    xor     $T2,$T2,$T1
671    xor     $T3,$T3,$T2
672
673    add     $KEYP,$KEYP,32
674    sw      $T0,0($KEYP)
675    sw      $T1,4($KEYP)
676    sw      $T2,8($KEYP)
677    sw      $T3,12($KEYP)
678___
679        if ($rnum != 6) {
680        # note that (7+1)*32 = 256, (14+1)*16 = 240
681        # thus the last 16 bytes can be dropped
682$ret .= <<___;
683    # for aes256, T3->T4 needs 4sbox but no rotate/rcon
684    @{[fwdsbox4 $T4,$T3]}
685    xor     $T5,$T5,$T4
686    xor     $T6,$T6,$T5
687    xor     $T7,$T7,$T6
688    sw      $T4,16($KEYP)
689    sw      $T5,20($KEYP)
690    sw      $T6,24($KEYP)
691    sw      $T7,28($KEYP)
692___
693        }
694        $rnum++;
695    }
696    return $ret;
697}
698
699################################################################################
700# void rv32i_zkne_set_encrypt_key(const unsigned char *userKey, const int bits,
701#   AES_KEY *key)
702################################################################################
703sub AES_set_common {
704    my ($ke128, $ke192, $ke256) = @_;
705    my $ret = '';
706$ret .= <<___;
707    bnez    $UKEY,1f        # if (!userKey || !key) return -1;
708    bnez    $KEYP,1f
709    li      a0,-1
710    ret
7111:
712    # Determine number of rounds from key size in bits
713    li      $T0,128
714    bne     $BITS,$T0,1f
715    li      $T1,10          # key->rounds = 10 if bits == 128
716    sw      $T1,240($KEYP)  # store key->rounds
717$ke128
718    j       4f
7191:
720    li      $T0,192
721    bne     $BITS,$T0,2f
722    li      $T1,12          # key->rounds = 12 if bits == 192
723    sw      $T1,240($KEYP)  # store key->rounds
724$ke192
725    j       4f
7262:
727    li      $T1,14          # key->rounds = 14 if bits == 256
728    li      $T0,256
729    beq     $BITS,$T0,3f
730    li      a0,-2           # If bits != 128, 192, or 256, return -2
731    j       5f
7323:
733    sw      $T1,240($KEYP)  # store key->rounds
734$ke256
7354:  # return 0
736    li      a0,0
7375:  # return a0
738___
739    return $ret;
740}
741$code .= <<___;
742.text
743.balign 16
744.globl rv32i_zkne_set_encrypt_key
745.type rv32i_zkne_set_encrypt_key,\@function
746rv32i_zkne_set_encrypt_key:
747___
748
749$code .= save_regs();
750$code .= AES_set_common(ke128enc(0), ke192enc(0),ke256enc(0));
751$code .= load_regs();
752$code .= <<___;
753    ret
754___
755
756################################################################################
757# void rv32i_zbkb_zkne_set_encrypt_key(const unsigned char *userKey,
758#   const int bits, AES_KEY *key)
759################################################################################
760$code .= <<___;
761.text
762.balign 16
763.globl rv32i_zbkb_zkne_set_encrypt_key
764.type rv32i_zbkb_zkne_set_encrypt_key,\@function
765rv32i_zbkb_zkne_set_encrypt_key:
766___
767
768$code .= save_regs();
769$code .= AES_set_common(ke128enc(1), ke192enc(1),ke256enc(1));
770$code .= load_regs();
771$code .= <<___;
772    ret
773___
774
775################################################################################
776# utility functions for rv32i_zknd_zkne_set_decrypt_key
777################################################################################
778
779sub invm4 {
780    # fwd sbox then inv sbox then mix column
781    # the result is only mix column
782    # this simulates aes64im T0
783    my $rd = shift;
784    my $tmp = shift;
785    my $rs = shift;
786    my $ret = <<___;
787    li      $tmp,0
788    li      $rd,0
789    @{[fwdsbox4 $tmp,$rs]}
790    @{[sbox4(\&aes32dsmi, $rd,$tmp)]}
791___
792    return $ret;
793}
794
795sub ke128dec {
796    my $zbkb = shift;
797    my $rnum = 0;
798    my $ret = '';
799$ret .= <<___;
800    lw      $T0,0($UKEY)
801    lw      $T1,4($UKEY)
802    lw      $T2,8($UKEY)
803    lw      $T3,12($UKEY)
804
805    sw      $T0,0($KEYP)
806    sw      $T1,4($KEYP)
807    sw      $T2,8($KEYP)
808    sw      $T3,12($KEYP)
809___
810    while($rnum < 10) {
811$ret .= <<___;
812    # see comments in ke128enc
813    li      $T4,$rcon[$rnum]
814    xor     $T0,$T0,$T4
815___
816        # right rotate by 8
817        if ($zbkb) {
818$ret .= <<___;
819    @{[rori    $T4,$T3,8]}
820___
821        } else {
822$ret .= <<___;
823    srli    $T4,$T3,8
824    slli    $T5,$T3,24
825    or      $T4,$T4,$T5
826___
827        }
828$ret .= <<___;
829    @{[fwdsbox4 $T0,$T4]}
830    xor     $T1,$T1,$T0
831    xor     $T2,$T2,$T1
832    xor     $T3,$T3,$T2
833    add     $KEYP,$KEYP,16
834___
835    # need to mixcolumn only for [1:N-1] round keys
836    # this is from the fact that aes32dsmi subwords first then mix column
837    # intuitively decryption needs to first mix column then subwords
838    # however, for merging datapaths (encryption first subwords then mix column)
839    # aes32dsmi chooses to inverse the order of them, thus
840    # transform should then be done on the round key
841        if ($rnum < 9) {
842$ret .= <<___;
843    # T4 and T5 are temp variables
844    @{[invm4 $T5,$T4,$T0]}
845    sw      $T5,0($KEYP)
846    @{[invm4 $T5,$T4,$T1]}
847    sw      $T5,4($KEYP)
848    @{[invm4 $T5,$T4,$T2]}
849    sw      $T5,8($KEYP)
850    @{[invm4 $T5,$T4,$T3]}
851    sw      $T5,12($KEYP)
852___
853        } else {
854$ret .= <<___;
855    sw      $T0,0($KEYP)
856    sw      $T1,4($KEYP)
857    sw      $T2,8($KEYP)
858    sw      $T3,12($KEYP)
859___
860        }
861        $rnum++;
862    }
863    return $ret;
864}
865
866sub ke192dec {
867    my $zbkb = shift;
868    my $rnum = 0;
869    my $ret = '';
870$ret .= <<___;
871    lw      $T0,0($UKEY)
872    lw      $T1,4($UKEY)
873    lw      $T2,8($UKEY)
874    lw      $T3,12($UKEY)
875    lw      $T4,16($UKEY)
876    lw      $T5,20($UKEY)
877
878    sw      $T0,0($KEYP)
879    sw      $T1,4($KEYP)
880    sw      $T2,8($KEYP)
881    sw      $T3,12($KEYP)
882    # see the comment in ke128dec
883    # T7 and T6 are temp variables
884    @{[invm4 $T7,$T6,$T4]}
885    sw      $T7,16($KEYP)
886    @{[invm4 $T7,$T6,$T5]}
887    sw      $T7,20($KEYP)
888___
889    while($rnum < 8) {
890$ret .= <<___;
891    # see the comment in ke128enc
892    li      $T6,$rcon[$rnum]
893    xor     $T0,$T0,$T6
894___
895        # right rotate by 8
896        if ($zbkb) {
897$ret .= <<___;
898    @{[rori    $T6,$T5,8]}
899___
900        } else {
901$ret .= <<___;
902    srli    $T6,$T5,8
903    slli    $T7,$T5,24
904    or      $T6,$T6,$T7
905___
906        }
907$ret .= <<___;
908    @{[fwdsbox4 $T0,$T6]}
909    xor     $T1,$T1,$T0
910    xor     $T2,$T2,$T1
911    xor     $T3,$T3,$T2
912
913    add     $KEYP,$KEYP,24
914___
915        if ($rnum < 7) {
916$ret .= <<___;
917    xor     $T4,$T4,$T3
918    xor     $T5,$T5,$T4
919
920    # see the comment in ke128dec
921    # T7 and T6 are temp variables
922    @{[invm4 $T7,$T6,$T0]}
923    sw      $T7,0($KEYP)
924    @{[invm4 $T7,$T6,$T1]}
925    sw      $T7,4($KEYP)
926    @{[invm4 $T7,$T6,$T2]}
927    sw      $T7,8($KEYP)
928    @{[invm4 $T7,$T6,$T3]}
929    sw      $T7,12($KEYP)
930    @{[invm4 $T7,$T6,$T4]}
931    sw      $T7,16($KEYP)
932    @{[invm4 $T7,$T6,$T5]}
933    sw      $T7,20($KEYP)
934___
935        } else { # rnum == 7
936$ret .= <<___;
937    # the reason for dropping T4/T5 is in ke192enc
938    # the reason for not invm4 is in ke128dec
939    sw      $T0,0($KEYP)
940    sw      $T1,4($KEYP)
941    sw      $T2,8($KEYP)
942    sw      $T3,12($KEYP)
943___
944        }
945        $rnum++;
946    }
947    return $ret;
948}
949
950sub ke256dec {
951    my $zbkb = shift;
952    my $rnum = 0;
953    my $ret = '';
954$ret .= <<___;
955    lw      $T0,0($UKEY)
956    lw      $T1,4($UKEY)
957    lw      $T2,8($UKEY)
958    lw      $T3,12($UKEY)
959    lw      $T4,16($UKEY)
960    lw      $T5,20($UKEY)
961    lw      $T6,24($UKEY)
962    lw      $T7,28($UKEY)
963
964    sw      $T0,0($KEYP)
965    sw      $T1,4($KEYP)
966    sw      $T2,8($KEYP)
967    sw      $T3,12($KEYP)
968    # see the comment in ke128dec
969    # BITS and T8 are temp variables
970    # BITS are not used anymore
971    @{[invm4 $T8,$BITS,$T4]}
972    sw      $T8,16($KEYP)
973    @{[invm4 $T8,$BITS,$T5]}
974    sw      $T8,20($KEYP)
975    @{[invm4 $T8,$BITS,$T6]}
976    sw      $T8,24($KEYP)
977    @{[invm4 $T8,$BITS,$T7]}
978    sw      $T8,28($KEYP)
979___
980    while($rnum < 7) {
981$ret .= <<___;
982    # see the comment in ke128enc
983    li      $T8,$rcon[$rnum]
984    xor     $T0,$T0,$T8
985___
986        # right rotate by 8
987        if ($zbkb) {
988$ret .= <<___;
989    @{[rori    $T8,$T7,8]}
990___
991        } else {
992$ret .= <<___;
993    srli    $T8,$T7,8
994    slli    $BITS,$T7,24
995    or      $T8,$T8,$BITS
996___
997        }
998$ret .= <<___;
999    @{[fwdsbox4 $T0,$T8]}
1000    xor     $T1,$T1,$T0
1001    xor     $T2,$T2,$T1
1002    xor     $T3,$T3,$T2
1003
1004    add     $KEYP,$KEYP,32
1005___
1006        if ($rnum < 6) {
1007$ret .= <<___;
1008    # for aes256, T3->T4 needs 4sbox but no rotate/rcon
1009    @{[fwdsbox4 $T4,$T3]}
1010    xor     $T5,$T5,$T4
1011    xor     $T6,$T6,$T5
1012    xor     $T7,$T7,$T6
1013
1014    # see the comment in ke128dec
1015    # T8 and BITS are temp variables
1016    @{[invm4 $T8,$BITS,$T0]}
1017    sw      $T8,0($KEYP)
1018    @{[invm4 $T8,$BITS,$T1]}
1019    sw      $T8,4($KEYP)
1020    @{[invm4 $T8,$BITS,$T2]}
1021    sw      $T8,8($KEYP)
1022    @{[invm4 $T8,$BITS,$T3]}
1023    sw      $T8,12($KEYP)
1024    @{[invm4 $T8,$BITS,$T4]}
1025    sw      $T8,16($KEYP)
1026    @{[invm4 $T8,$BITS,$T5]}
1027    sw      $T8,20($KEYP)
1028    @{[invm4 $T8,$BITS,$T6]}
1029    sw      $T8,24($KEYP)
1030    @{[invm4 $T8,$BITS,$T7]}
1031    sw      $T8,28($KEYP)
1032___
1033        } else {
1034$ret .= <<___;
1035    sw      $T0,0($KEYP)
1036    sw      $T1,4($KEYP)
1037    sw      $T2,8($KEYP)
1038    sw      $T3,12($KEYP)
1039    # last 16 bytes are dropped
1040    # see the comment in ke256enc
1041___
1042        }
1043        $rnum++;
1044    }
1045    return $ret;
1046}
1047
1048################################################################################
1049# void rv32i_zknd_zkne_set_decrypt_key(const unsigned char *userKey, const int bits,
1050#   AES_KEY *key)
1051################################################################################
1052# a note on naming: set_decrypt_key needs aes32esi thus add zkne on name
1053$code .= <<___;
1054.text
1055.balign 16
1056.globl rv32i_zknd_zkne_set_decrypt_key
1057.type   rv32i_zknd_zkne_set_decrypt_key,\@function
1058rv32i_zknd_zkne_set_decrypt_key:
1059___
1060$code .= save_regs();
1061$code .= AES_set_common(ke128dec(0), ke192dec(0),ke256dec(0));
1062$code .= load_regs();
1063$code .= <<___;
1064    ret
1065___
1066
1067################################################################################
1068# void rv32i_zbkb_zknd_zkne_set_decrypt_key(const unsigned char *userKey,
1069#   const int bits, AES_KEY *key)
1070################################################################################
1071$code .= <<___;
1072.text
1073.balign 16
1074.globl rv32i_zbkb_zknd_zkne_set_decrypt_key
1075.type rv32i_zbkb_zknd_zkne_set_decrypt_key,\@function
1076rv32i_zbkb_zknd_zkne_set_decrypt_key:
1077___
1078
1079$code .= save_regs();
1080$code .= AES_set_common(ke128dec(1), ke192dec(1),ke256dec(1));
1081$code .= load_regs();
1082$code .= <<___;
1083    ret
1084___
1085
1086
1087
1088print $code;
1089close STDOUT or die "error closing STDOUT: $!";
1090