1#! /usr/bin/env perl 2# Copyright 2021-2023 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the Apache License 2.0 (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8# 9# This module implements support for Armv8 SM3 instructions 10 11# $output is the last argument if it looks like a file (it has an extension) 12# $flavour is the first argument if it doesn't look like a file 13$output = $#ARGV >= 0 && $ARGV[$#ARGV] =~ m|\.\w+$| ? pop : undef; 14$flavour = $#ARGV >= 0 && $ARGV[0] !~ m|\.| ? shift : undef; 15 16$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 17( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or 18( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or 19die "can't locate arm-xlate.pl"; 20 21open OUT,"| \"$^X\" $xlate $flavour \"$output\"" 22 or die "can't call $xlate: $!"; 23*STDOUT=*OUT; 24 25# Message expanding: 26# Wj <- P1(W[j-16]^W[j-9]^(W[j-3]<<<15))^(W[j-13]<<<7)^W[j-6] 27# Input: s0, s1, s2, s3 28# s0 = w0 | w1 | w2 | w3 29# s1 = w4 | w5 | w6 | w7 30# s2 = w8 | w9 | w10 | w11 31# s3 = w12 | w13 | w14 | w15 32# Output: s4 33sub msg_exp () { 34my $s0 = shift; 35my $s1 = shift; 36my $s2 = shift; 37my $s3 = shift; 38my $s4 = shift; 39my $vtmp1 = shift; 40my $vtmp2 = shift; 41$code.=<<___; 42 // s4 = w7 | w8 | w9 | w10 43 ext $s4.16b, $s1.16b, $s2.16b, #12 44 // vtmp1 = w3 | w4 | w5 | w6 45 ext $vtmp1.16b, $s0.16b, $s1.16b, #12 46 // vtmp2 = w10 | w11 | w12 | w13 47 ext $vtmp2.16b, $s2.16b, $s3.16b, #8 48 sm3partw1 $s4.4s, $s0.4s, $s3.4s 49 sm3partw2 $s4.4s, $vtmp2.4s, $vtmp1.4s 50___ 51} 52 53# A round of compresson function 54# Input: 55# ab - choose instruction among sm3tt1a, sm3tt1b, sm3tt2a, sm3tt2b 56# vstate0 - vstate1, store digest status(A - H) 57# vconst0 - vconst1, interleaved used to store Tj <<< j 58# vtmp - temporary register 59# vw - for sm3tt1ab, vw = s0 eor s1 60# s0 - for sm3tt2ab, just be s0 61# i, choose wj' or wj from vw 62sub round () { 63my $ab = shift; 64my $vstate0 = shift; 65my $vstate1 = shift; 66my $vconst0 = shift; 67my $vconst1 = shift; 68my $vtmp = shift; 69my $vw = shift; 70my $s0 = shift; 71my $i = shift; 72$code.=<<___; 73 sm3ss1 $vtmp.4s, $vstate0.4s, $vconst0.4s, $vstate1.4s 74 shl $vconst1.4s, $vconst0.4s, #1 75 sri $vconst1.4s, $vconst0.4s, #31 76 sm3tt1$ab $vstate0.4s, $vtmp.4s, $vw.4s[$i] 77 sm3tt2$ab $vstate1.4s, $vtmp.4s, $s0.4s[$i] 78___ 79} 80 81sub qround () { 82my $ab = shift; 83my $vstate0 = shift; 84my $vstate1 = shift; 85my $vconst0 = shift; 86my $vconst1 = shift; 87my $vtmp1 = shift; 88my $vtmp2 = shift; 89my $s0 = shift; 90my $s1 = shift; 91my $s2 = shift; 92my $s3 = shift; 93my $s4 = shift; 94 if($s4) { 95 &msg_exp($s0, $s1, $s2, $s3, $s4, $vtmp1, $vtmp2); 96 } 97$code.=<<___; 98 eor $vtmp1.16b, $s0.16b, $s1.16b 99___ 100 &round($ab, $vstate0, $vstate1, $vconst0, $vconst1, $vtmp2, 101 $vtmp1, $s0, 0); 102 &round($ab, $vstate0, $vstate1, $vconst1, $vconst0, $vtmp2, 103 $vtmp1, $s0, 1); 104 &round($ab, $vstate0, $vstate1, $vconst0, $vconst1, $vtmp2, 105 $vtmp1, $s0, 2); 106 &round($ab, $vstate0, $vstate1, $vconst1, $vconst0, $vtmp2, 107 $vtmp1, $s0, 3); 108} 109 110$code=<<___; 111#include "arm_arch.h" 112.text 113___ 114 115{{{ 116my ($pstate,$pdata,$num)=("x0","x1","w2"); 117my ($state1,$state2)=("v5","v6"); 118my ($sconst1, $sconst2)=("s16","s17"); 119my ($vconst1, $vconst2)=("v16","v17"); 120my ($s0,$s1,$s2,$s3,$s4)=map("v$_",(0..4)); 121my ($bkstate1,$bkstate2)=("v18","v19"); 122my ($vconst_tmp1,$vconst_tmp2)=("v20","v21"); 123my ($vtmp1,$vtmp2)=("v22","v23"); 124my $constaddr="x8"; 125# void ossl_hwsm3_block_data_order(SM3_CTX *c, const void *p, size_t num) 126$code.=<<___; 127.globl ossl_hwsm3_block_data_order 128.type ossl_hwsm3_block_data_order,%function 129.align 5 130ossl_hwsm3_block_data_order: 131 AARCH64_VALID_CALL_TARGET 132 // load state 133 ld1 {$state1.4s-$state2.4s}, [$pstate] 134 rev64 $state1.4s, $state1.4s 135 rev64 $state2.4s, $state2.4s 136 ext $state1.16b, $state1.16b, $state1.16b, #8 137 ext $state2.16b, $state2.16b, $state2.16b, #8 138 139 adr $constaddr, .Tj 140 ldp $sconst1, $sconst2, [$constaddr] 141 142.Loop: 143 // load input 144 ld1 {$s0.16b-$s3.16b}, [$pdata], #64 145 sub $num, $num, #1 146 147 mov $bkstate1.16b, $state1.16b 148 mov $bkstate2.16b, $state2.16b 149 150#ifndef __ARMEB__ 151 rev32 $s0.16b, $s0.16b 152 rev32 $s1.16b, $s1.16b 153 rev32 $s2.16b, $s2.16b 154 rev32 $s3.16b, $s3.16b 155#endif 156 157 ext $vconst_tmp1.16b, $vconst1.16b, $vconst1.16b, #4 158___ 159 &qround("a",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, 160 $s0,$s1,$s2,$s3,$s4); 161 &qround("a",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, 162 $s1,$s2,$s3,$s4,$s0); 163 &qround("a",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, 164 $s2,$s3,$s4,$s0,$s1); 165 &qround("a",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, 166 $s3,$s4,$s0,$s1,$s2); 167 168$code.=<<___; 169 ext $vconst_tmp1.16b, $vconst2.16b, $vconst2.16b, #4 170___ 171 172 &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, 173 $s4,$s0,$s1,$s2,$s3); 174 &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, 175 $s0,$s1,$s2,$s3,$s4); 176 &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, 177 $s1,$s2,$s3,$s4,$s0); 178 &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, 179 $s2,$s3,$s4,$s0,$s1); 180 &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, 181 $s3,$s4,$s0,$s1,$s2); 182 &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, 183 $s4,$s0,$s1,$s2,$s3); 184 &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, 185 $s0,$s1,$s2,$s3,$s4); 186 &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, 187 $s1,$s2,$s3,$s4,$s0); 188 &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, 189 $s2,$s3,$s4,$s0,$s1); 190 &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, 191 $s3,$s4); 192 &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, 193 $s4,$s0); 194 &qround("b",$state1,$state2,$vconst_tmp1,$vconst_tmp2,$vtmp1,$vtmp2, 195 $s0,$s1); 196 197$code.=<<___; 198 eor $state1.16b, $state1.16b, $bkstate1.16b 199 eor $state2.16b, $state2.16b, $bkstate2.16b 200 201 // any remained blocks? 202 cbnz $num, .Loop 203 204 // save state 205 rev64 $state1.4s, $state1.4s 206 rev64 $state2.4s, $state2.4s 207 ext $state1.16b, $state1.16b, $state1.16b, #8 208 ext $state2.16b, $state2.16b, $state2.16b, #8 209 st1 {$state1.4s-$state2.4s}, [$pstate] 210 ret 211.size ossl_hwsm3_block_data_order,.-ossl_hwsm3_block_data_order 212 213.align 3 214.Tj: 215.word 0x79cc4519, 0x9d8a7a87 216___ 217}}} 218 219######################################### 220my %sm3partopcode = ( 221 "sm3partw1" => 0xce60C000, 222 "sm3partw2" => 0xce60C400); 223 224my %sm3ss1opcode = ( 225 "sm3ss1" => 0xce400000); 226 227my %sm3ttopcode = ( 228 "sm3tt1a" => 0xce408000, 229 "sm3tt1b" => 0xce408400, 230 "sm3tt2a" => 0xce408800, 231 "sm3tt2b" => 0xce408C00); 232 233sub unsm3part { 234 my ($mnemonic,$arg)=@_; 235 236 $arg=~ m/[qv](\d+)[^,]*,\s*[qv](\d+)[^,]*,\s*[qv](\d+)/o 237 && 238 sprintf ".inst\t0x%08x\t//%s %s", 239 $sm3partopcode{$mnemonic}|$1|($2<<5)|($3<<16), 240 $mnemonic,$arg; 241} 242 243sub unsm3ss1 { 244 my ($mnemonic,$arg)=@_; 245 246 $arg=~ m/[qv](\d+)[^,]*,\s*[qv](\d+)[^,]*,\s*[qv](\d+)[^,]*,\s*[qv](\d+)/o 247 && 248 sprintf ".inst\t0x%08x\t//%s %s", 249 $sm3ss1opcode{$mnemonic}|$1|($2<<5)|($3<<16)|($4<<10), 250 $mnemonic,$arg; 251} 252 253sub unsm3tt { 254 my ($mnemonic,$arg)=@_; 255 256 $arg=~ m/[qv](\d+)[^,]*,\s*[qv](\d+)[^,]*,\s*[qv](\d+)[^,]*\[([0-3])\]/o 257 && 258 sprintf ".inst\t0x%08x\t//%s %s", 259 $sm3ttopcode{$mnemonic}|$1|($2<<5)|($3<<16)|($4<<12), 260 $mnemonic,$arg; 261} 262 263open SELF,$0; 264while(<SELF>) { 265 next if (/^#!/); 266 last if (!s/^#/\/\// and !/^$/); 267 print; 268} 269close SELF; 270 271foreach(split("\n",$code)) { 272 s/\`([^\`]*)\`/eval($1)/ge; 273 274 s/\b(sm3partw[1-2])\s+([qv].*)/unsm3part($1,$2)/ge; 275 s/\b(sm3ss1)\s+([qv].*)/unsm3ss1($1,$2)/ge; 276 s/\b(sm3tt[1-2][a-b])\s+([qv].*)/unsm3tt($1,$2)/ge; 277 print $_,"\n"; 278} 279 280close STDOUT or die "error closing STDOUT: $!"; 281