1#! /usr/bin/env perl 2# Copyright 2012-2021 The OpenSSL Project Authors. All Rights Reserved. 3# 4# Licensed under the Apache License 2.0 (the "License"). You may not use 5# this file except in compliance with the License. You can obtain a copy 6# in the file LICENSE in the source distribution or at 7# https://www.openssl.org/source/license.html 8 9 10# ==================================================================== 11# Written by David S. Miller and Andy Polyakov. 12# The module is licensed under 2-clause BSD license. October 2012. 13# All rights reserved. 14# ==================================================================== 15 16###################################################################### 17# AES for SPARC T4. 18# 19# AES round instructions complete in 3 cycles and can be issued every 20# cycle. It means that round calculations should take 4*rounds cycles, 21# because any given round instruction depends on result of *both* 22# previous instructions: 23# 24# |0 |1 |2 |3 |4 25# |01|01|01| 26# |23|23|23| 27# |01|01|... 28# |23|... 29# 30# Provided that fxor [with IV] takes 3 cycles to complete, critical 31# path length for CBC encrypt would be 3+4*rounds, or in other words 32# it should process one byte in at least (3+4*rounds)/16 cycles. This 33# estimate doesn't account for "collateral" instructions, such as 34# fetching input from memory, xor-ing it with zero-round key and 35# storing the result. Yet, *measured* performance [for data aligned 36# at 64-bit boundary!] deviates from this equation by less than 0.5%: 37# 38# 128-bit key 192- 256- 39# CBC encrypt 2.70/2.90(*) 3.20/3.40 3.70/3.90 40# (*) numbers after slash are for 41# misaligned data. 42# 43# Out-of-order execution logic managed to fully overlap "collateral" 44# instructions with those on critical path. Amazing! 45# 46# As with Intel AES-NI, question is if it's possible to improve 47# performance of parallelizable modes by interleaving round 48# instructions. Provided round instruction latency and throughput 49# optimal interleave factor is 2. But can we expect 2x performance 50# improvement? Well, as round instructions can be issued one per 51# cycle, they don't saturate the 2-way issue pipeline and therefore 52# there is room for "collateral" calculations... Yet, 2x speed-up 53# over CBC encrypt remains unattaintable: 54# 55# 128-bit key 192- 256- 56# CBC decrypt 1.64/2.11 1.89/2.37 2.23/2.61 57# CTR 1.64/2.08(*) 1.89/2.33 2.23/2.61 58# (*) numbers after slash are for 59# misaligned data. 60# 61# Estimates based on amount of instructions under assumption that 62# round instructions are not pairable with any other instruction 63# suggest that latter is the actual case and pipeline runs 64# underutilized. It should be noted that T4 out-of-order execution 65# logic is so capable that performance gain from 2x interleave is 66# not even impressive, ~7-13% over non-interleaved code, largest 67# for 256-bit keys. 68 69# To anchor to something else, software implementation processes 70# one byte in 29 cycles with 128-bit key on same processor. Intel 71# Sandy Bridge encrypts byte in 5.07 cycles in CBC mode and decrypts 72# in 0.93, naturally with AES-NI. 73 74$0 =~ m/(.*[\/\\])[^\/\\]+$/; $dir=$1; 75push(@INC,"${dir}","${dir}../../perlasm"); 76require "sparcv9_modes.pl"; 77 78$output = pop and open STDOUT,">$output"; 79 80$::evp=1; # if $evp is set to 0, script generates module with 81# AES_[en|de]crypt, AES_set_[en|de]crypt_key and AES_cbc_encrypt entry 82# points. These however are not fully compatible with openssl/aes.h, 83# because they expect AES_KEY to be aligned at 64-bit boundary. When 84# used through EVP, alignment is arranged at EVP layer. Second thing 85# that is arranged by EVP is at least 32-bit alignment of IV. 86 87###################################################################### 88# single-round subroutines 89# 90{ 91my ($inp,$out,$key,$rounds,$tmp,$mask)=map("%o$_",(0..5)); 92 93$code.=<<___; 94#ifndef __ASSEMBLER__ 95# define __ASSEMBLER__ 1 96#endif 97#include "crypto/sparc_arch.h" 98 99#ifdef __arch64__ 100.register %g2,#scratch 101.register %g3,#scratch 102#endif 103 104.text 105 106.globl aes_t4_encrypt 107.align 32 108aes_t4_encrypt: 109 andcc $inp, 7, %g1 ! is input aligned? 110 andn $inp, 7, $inp 111 112 ldx [$key + 0], %g4 113 ldx [$key + 8], %g5 114 115 ldx [$inp + 0], %o4 116 bz,pt %icc, 1f 117 ldx [$inp + 8], %o5 118 ldx [$inp + 16], $inp 119 sll %g1, 3, %g1 120 sub %g0, %g1, %o3 121 sllx %o4, %g1, %o4 122 sllx %o5, %g1, %g1 123 srlx %o5, %o3, %o5 124 srlx $inp, %o3, %o3 125 or %o5, %o4, %o4 126 or %o3, %g1, %o5 1271: 128 ld [$key + 240], $rounds 129 ldd [$key + 16], %f12 130 ldd [$key + 24], %f14 131 xor %g4, %o4, %o4 132 xor %g5, %o5, %o5 133 movxtod %o4, %f0 134 movxtod %o5, %f2 135 srl $rounds, 1, $rounds 136 ldd [$key + 32], %f16 137 sub $rounds, 1, $rounds 138 ldd [$key + 40], %f18 139 add $key, 48, $key 140 141.Lenc: 142 aes_eround01 %f12, %f0, %f2, %f4 143 aes_eround23 %f14, %f0, %f2, %f2 144 ldd [$key + 0], %f12 145 ldd [$key + 8], %f14 146 sub $rounds,1,$rounds 147 aes_eround01 %f16, %f4, %f2, %f0 148 aes_eround23 %f18, %f4, %f2, %f2 149 ldd [$key + 16], %f16 150 ldd [$key + 24], %f18 151 brnz,pt $rounds, .Lenc 152 add $key, 32, $key 153 154 andcc $out, 7, $tmp ! is output aligned? 155 aes_eround01 %f12, %f0, %f2, %f4 156 aes_eround23 %f14, %f0, %f2, %f2 157 aes_eround01_l %f16, %f4, %f2, %f0 158 aes_eround23_l %f18, %f4, %f2, %f2 159 160 bnz,pn %icc, 2f 161 nop 162 163 std %f0, [$out + 0] 164 retl 165 std %f2, [$out + 8] 166 1672: alignaddrl $out, %g0, $out 168 mov 0xff, $mask 169 srl $mask, $tmp, $mask 170 171 faligndata %f0, %f0, %f4 172 faligndata %f0, %f2, %f6 173 faligndata %f2, %f2, %f8 174 175 stda %f4, [$out + $mask]0xc0 ! partial store 176 std %f6, [$out + 8] 177 add $out, 16, $out 178 orn %g0, $mask, $mask 179 retl 180 stda %f8, [$out + $mask]0xc0 ! partial store 181.type aes_t4_encrypt,#function 182.size aes_t4_encrypt,.-aes_t4_encrypt 183 184.globl aes_t4_decrypt 185.align 32 186aes_t4_decrypt: 187 andcc $inp, 7, %g1 ! is input aligned? 188 andn $inp, 7, $inp 189 190 ldx [$key + 0], %g4 191 ldx [$key + 8], %g5 192 193 ldx [$inp + 0], %o4 194 bz,pt %icc, 1f 195 ldx [$inp + 8], %o5 196 ldx [$inp + 16], $inp 197 sll %g1, 3, %g1 198 sub %g0, %g1, %o3 199 sllx %o4, %g1, %o4 200 sllx %o5, %g1, %g1 201 srlx %o5, %o3, %o5 202 srlx $inp, %o3, %o3 203 or %o5, %o4, %o4 204 or %o3, %g1, %o5 2051: 206 ld [$key + 240], $rounds 207 ldd [$key + 16], %f12 208 ldd [$key + 24], %f14 209 xor %g4, %o4, %o4 210 xor %g5, %o5, %o5 211 movxtod %o4, %f0 212 movxtod %o5, %f2 213 srl $rounds, 1, $rounds 214 ldd [$key + 32], %f16 215 sub $rounds, 1, $rounds 216 ldd [$key + 40], %f18 217 add $key, 48, $key 218 219.Ldec: 220 aes_dround01 %f12, %f0, %f2, %f4 221 aes_dround23 %f14, %f0, %f2, %f2 222 ldd [$key + 0], %f12 223 ldd [$key + 8], %f14 224 sub $rounds,1,$rounds 225 aes_dround01 %f16, %f4, %f2, %f0 226 aes_dround23 %f18, %f4, %f2, %f2 227 ldd [$key + 16], %f16 228 ldd [$key + 24], %f18 229 brnz,pt $rounds, .Ldec 230 add $key, 32, $key 231 232 andcc $out, 7, $tmp ! is output aligned? 233 aes_dround01 %f12, %f0, %f2, %f4 234 aes_dround23 %f14, %f0, %f2, %f2 235 aes_dround01_l %f16, %f4, %f2, %f0 236 aes_dround23_l %f18, %f4, %f2, %f2 237 238 bnz,pn %icc, 2f 239 nop 240 241 std %f0, [$out + 0] 242 retl 243 std %f2, [$out + 8] 244 2452: alignaddrl $out, %g0, $out 246 mov 0xff, $mask 247 srl $mask, $tmp, $mask 248 249 faligndata %f0, %f0, %f4 250 faligndata %f0, %f2, %f6 251 faligndata %f2, %f2, %f8 252 253 stda %f4, [$out + $mask]0xc0 ! partial store 254 std %f6, [$out + 8] 255 add $out, 16, $out 256 orn %g0, $mask, $mask 257 retl 258 stda %f8, [$out + $mask]0xc0 ! partial store 259.type aes_t4_decrypt,#function 260.size aes_t4_decrypt,.-aes_t4_decrypt 261___ 262} 263 264###################################################################### 265# key setup subroutines 266# 267{ 268my ($inp,$bits,$out,$tmp)=map("%o$_",(0..5)); 269$code.=<<___; 270.globl aes_t4_set_encrypt_key 271.align 32 272aes_t4_set_encrypt_key: 273.Lset_encrypt_key: 274 and $inp, 7, $tmp 275 alignaddr $inp, %g0, $inp 276 cmp $bits, 192 277 ldd [$inp + 0], %f0 278 bl,pt %icc,.L128 279 ldd [$inp + 8], %f2 280 281 be,pt %icc,.L192 282 ldd [$inp + 16], %f4 283 brz,pt $tmp, .L256aligned 284 ldd [$inp + 24], %f6 285 286 ldd [$inp + 32], %f8 287 faligndata %f0, %f2, %f0 288 faligndata %f2, %f4, %f2 289 faligndata %f4, %f6, %f4 290 faligndata %f6, %f8, %f6 291.L256aligned: 292___ 293for ($i=0; $i<6; $i++) { 294 $code.=<<___; 295 std %f0, [$out + `32*$i+0`] 296 aes_kexpand1 %f0, %f6, $i, %f0 297 std %f2, [$out + `32*$i+8`] 298 aes_kexpand2 %f2, %f0, %f2 299 std %f4, [$out + `32*$i+16`] 300 aes_kexpand0 %f4, %f2, %f4 301 std %f6, [$out + `32*$i+24`] 302 aes_kexpand2 %f6, %f4, %f6 303___ 304} 305$code.=<<___; 306 std %f0, [$out + `32*$i+0`] 307 aes_kexpand1 %f0, %f6, $i, %f0 308 std %f2, [$out + `32*$i+8`] 309 aes_kexpand2 %f2, %f0, %f2 310 std %f4, [$out + `32*$i+16`] 311 std %f6, [$out + `32*$i+24`] 312 std %f0, [$out + `32*$i+32`] 313 std %f2, [$out + `32*$i+40`] 314 315 mov 14, $tmp 316 st $tmp, [$out + 240] 317 retl 318 xor %o0, %o0, %o0 319 320.align 16 321.L192: 322 brz,pt $tmp, .L192aligned 323 nop 324 325 ldd [$inp + 24], %f6 326 faligndata %f0, %f2, %f0 327 faligndata %f2, %f4, %f2 328 faligndata %f4, %f6, %f4 329.L192aligned: 330___ 331for ($i=0; $i<7; $i++) { 332 $code.=<<___; 333 std %f0, [$out + `24*$i+0`] 334 aes_kexpand1 %f0, %f4, $i, %f0 335 std %f2, [$out + `24*$i+8`] 336 aes_kexpand2 %f2, %f0, %f2 337 std %f4, [$out + `24*$i+16`] 338 aes_kexpand2 %f4, %f2, %f4 339___ 340} 341$code.=<<___; 342 std %f0, [$out + `24*$i+0`] 343 aes_kexpand1 %f0, %f4, $i, %f0 344 std %f2, [$out + `24*$i+8`] 345 aes_kexpand2 %f2, %f0, %f2 346 std %f4, [$out + `24*$i+16`] 347 std %f0, [$out + `24*$i+24`] 348 std %f2, [$out + `24*$i+32`] 349 350 mov 12, $tmp 351 st $tmp, [$out + 240] 352 retl 353 xor %o0, %o0, %o0 354 355.align 16 356.L128: 357 brz,pt $tmp, .L128aligned 358 nop 359 360 ldd [$inp + 16], %f4 361 faligndata %f0, %f2, %f0 362 faligndata %f2, %f4, %f2 363.L128aligned: 364___ 365for ($i=0; $i<10; $i++) { 366 $code.=<<___; 367 std %f0, [$out + `16*$i+0`] 368 aes_kexpand1 %f0, %f2, $i, %f0 369 std %f2, [$out + `16*$i+8`] 370 aes_kexpand2 %f2, %f0, %f2 371___ 372} 373$code.=<<___; 374 std %f0, [$out + `16*$i+0`] 375 std %f2, [$out + `16*$i+8`] 376 377 mov 10, $tmp 378 st $tmp, [$out + 240] 379 retl 380 xor %o0, %o0, %o0 381.type aes_t4_set_encrypt_key,#function 382.size aes_t4_set_encrypt_key,.-aes_t4_set_encrypt_key 383 384.globl aes_t4_set_decrypt_key 385.align 32 386aes_t4_set_decrypt_key: 387 mov %o7, %o5 388 call .Lset_encrypt_key 389 nop 390 391 mov %o5, %o7 392 sll $tmp, 4, $inp ! $tmp is number of rounds 393 add $tmp, 2, $tmp 394 add $out, $inp, $inp ! $inp=$out+16*rounds 395 srl $tmp, 2, $tmp ! $tmp=(rounds+2)/4 396 397.Lkey_flip: 398 ldd [$out + 0], %f0 399 ldd [$out + 8], %f2 400 ldd [$out + 16], %f4 401 ldd [$out + 24], %f6 402 ldd [$inp + 0], %f8 403 ldd [$inp + 8], %f10 404 ldd [$inp - 16], %f12 405 ldd [$inp - 8], %f14 406 sub $tmp, 1, $tmp 407 std %f0, [$inp + 0] 408 std %f2, [$inp + 8] 409 std %f4, [$inp - 16] 410 std %f6, [$inp - 8] 411 std %f8, [$out + 0] 412 std %f10, [$out + 8] 413 std %f12, [$out + 16] 414 std %f14, [$out + 24] 415 add $out, 32, $out 416 brnz $tmp, .Lkey_flip 417 sub $inp, 32, $inp 418 419 retl 420 xor %o0, %o0, %o0 421.type aes_t4_set_decrypt_key,#function 422.size aes_t4_set_decrypt_key,.-aes_t4_set_decrypt_key 423___ 424} 425 426{{{ 427my ($inp,$out,$len,$key,$ivec,$enc)=map("%i$_",(0..5)); 428my ($ileft,$iright,$ooff,$omask,$ivoff)=map("%l$_",(1..7)); 429 430$code.=<<___; 431.align 32 432_aes128_encrypt_1x: 433___ 434for ($i=0; $i<4; $i++) { 435 $code.=<<___; 436 aes_eround01 %f`16+8*$i+0`, %f0, %f2, %f4 437 aes_eround23 %f`16+8*$i+2`, %f0, %f2, %f2 438 aes_eround01 %f`16+8*$i+4`, %f4, %f2, %f0 439 aes_eround23 %f`16+8*$i+6`, %f4, %f2, %f2 440___ 441} 442$code.=<<___; 443 aes_eround01 %f48, %f0, %f2, %f4 444 aes_eround23 %f50, %f0, %f2, %f2 445 aes_eround01_l %f52, %f4, %f2, %f0 446 retl 447 aes_eround23_l %f54, %f4, %f2, %f2 448.type _aes128_encrypt_1x,#function 449.size _aes128_encrypt_1x,.-_aes128_encrypt_1x 450 451.align 32 452_aes128_encrypt_2x: 453___ 454for ($i=0; $i<4; $i++) { 455 $code.=<<___; 456 aes_eround01 %f`16+8*$i+0`, %f0, %f2, %f8 457 aes_eround23 %f`16+8*$i+2`, %f0, %f2, %f2 458 aes_eround01 %f`16+8*$i+0`, %f4, %f6, %f10 459 aes_eround23 %f`16+8*$i+2`, %f4, %f6, %f6 460 aes_eround01 %f`16+8*$i+4`, %f8, %f2, %f0 461 aes_eround23 %f`16+8*$i+6`, %f8, %f2, %f2 462 aes_eround01 %f`16+8*$i+4`, %f10, %f6, %f4 463 aes_eround23 %f`16+8*$i+6`, %f10, %f6, %f6 464___ 465} 466$code.=<<___; 467 aes_eround01 %f48, %f0, %f2, %f8 468 aes_eround23 %f50, %f0, %f2, %f2 469 aes_eround01 %f48, %f4, %f6, %f10 470 aes_eround23 %f50, %f4, %f6, %f6 471 aes_eround01_l %f52, %f8, %f2, %f0 472 aes_eround23_l %f54, %f8, %f2, %f2 473 aes_eround01_l %f52, %f10, %f6, %f4 474 retl 475 aes_eround23_l %f54, %f10, %f6, %f6 476.type _aes128_encrypt_2x,#function 477.size _aes128_encrypt_2x,.-_aes128_encrypt_2x 478 479.align 32 480_aes128_loadkey: 481 ldx [$key + 0], %g4 482 ldx [$key + 8], %g5 483___ 484for ($i=2; $i<22;$i++) { # load key schedule 485 $code.=<<___; 486 ldd [$key + `8*$i`], %f`12+2*$i` 487___ 488} 489$code.=<<___; 490 retl 491 nop 492.type _aes128_loadkey,#function 493.size _aes128_loadkey,.-_aes128_loadkey 494_aes128_load_enckey=_aes128_loadkey 495_aes128_load_deckey=_aes128_loadkey 496 497___ 498 499&alg_cbc_encrypt_implement("aes",128); 500if ($::evp) { 501 &alg_ctr32_implement("aes",128); 502 &alg_xts_implement("aes",128,"en"); 503 &alg_xts_implement("aes",128,"de"); 504} 505&alg_cbc_decrypt_implement("aes",128); 506 507$code.=<<___; 508.align 32 509_aes128_decrypt_1x: 510___ 511for ($i=0; $i<4; $i++) { 512 $code.=<<___; 513 aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f4 514 aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2 515 aes_dround01 %f`16+8*$i+4`, %f4, %f2, %f0 516 aes_dround23 %f`16+8*$i+6`, %f4, %f2, %f2 517___ 518} 519$code.=<<___; 520 aes_dround01 %f48, %f0, %f2, %f4 521 aes_dround23 %f50, %f0, %f2, %f2 522 aes_dround01_l %f52, %f4, %f2, %f0 523 retl 524 aes_dround23_l %f54, %f4, %f2, %f2 525.type _aes128_decrypt_1x,#function 526.size _aes128_decrypt_1x,.-_aes128_decrypt_1x 527 528.align 32 529_aes128_decrypt_2x: 530___ 531for ($i=0; $i<4; $i++) { 532 $code.=<<___; 533 aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f8 534 aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2 535 aes_dround01 %f`16+8*$i+0`, %f4, %f6, %f10 536 aes_dround23 %f`16+8*$i+2`, %f4, %f6, %f6 537 aes_dround01 %f`16+8*$i+4`, %f8, %f2, %f0 538 aes_dround23 %f`16+8*$i+6`, %f8, %f2, %f2 539 aes_dround01 %f`16+8*$i+4`, %f10, %f6, %f4 540 aes_dround23 %f`16+8*$i+6`, %f10, %f6, %f6 541___ 542} 543$code.=<<___; 544 aes_dround01 %f48, %f0, %f2, %f8 545 aes_dround23 %f50, %f0, %f2, %f2 546 aes_dround01 %f48, %f4, %f6, %f10 547 aes_dround23 %f50, %f4, %f6, %f6 548 aes_dround01_l %f52, %f8, %f2, %f0 549 aes_dround23_l %f54, %f8, %f2, %f2 550 aes_dround01_l %f52, %f10, %f6, %f4 551 retl 552 aes_dround23_l %f54, %f10, %f6, %f6 553.type _aes128_decrypt_2x,#function 554.size _aes128_decrypt_2x,.-_aes128_decrypt_2x 555___ 556 557$code.=<<___; 558.align 32 559_aes192_encrypt_1x: 560___ 561for ($i=0; $i<5; $i++) { 562 $code.=<<___; 563 aes_eround01 %f`16+8*$i+0`, %f0, %f2, %f4 564 aes_eround23 %f`16+8*$i+2`, %f0, %f2, %f2 565 aes_eround01 %f`16+8*$i+4`, %f4, %f2, %f0 566 aes_eround23 %f`16+8*$i+6`, %f4, %f2, %f2 567___ 568} 569$code.=<<___; 570 aes_eround01 %f56, %f0, %f2, %f4 571 aes_eround23 %f58, %f0, %f2, %f2 572 aes_eround01_l %f60, %f4, %f2, %f0 573 retl 574 aes_eround23_l %f62, %f4, %f2, %f2 575.type _aes192_encrypt_1x,#function 576.size _aes192_encrypt_1x,.-_aes192_encrypt_1x 577 578.align 32 579_aes192_encrypt_2x: 580___ 581for ($i=0; $i<5; $i++) { 582 $code.=<<___; 583 aes_eround01 %f`16+8*$i+0`, %f0, %f2, %f8 584 aes_eround23 %f`16+8*$i+2`, %f0, %f2, %f2 585 aes_eround01 %f`16+8*$i+0`, %f4, %f6, %f10 586 aes_eround23 %f`16+8*$i+2`, %f4, %f6, %f6 587 aes_eround01 %f`16+8*$i+4`, %f8, %f2, %f0 588 aes_eround23 %f`16+8*$i+6`, %f8, %f2, %f2 589 aes_eround01 %f`16+8*$i+4`, %f10, %f6, %f4 590 aes_eround23 %f`16+8*$i+6`, %f10, %f6, %f6 591___ 592} 593$code.=<<___; 594 aes_eround01 %f56, %f0, %f2, %f8 595 aes_eround23 %f58, %f0, %f2, %f2 596 aes_eround01 %f56, %f4, %f6, %f10 597 aes_eround23 %f58, %f4, %f6, %f6 598 aes_eround01_l %f60, %f8, %f2, %f0 599 aes_eround23_l %f62, %f8, %f2, %f2 600 aes_eround01_l %f60, %f10, %f6, %f4 601 retl 602 aes_eround23_l %f62, %f10, %f6, %f6 603.type _aes192_encrypt_2x,#function 604.size _aes192_encrypt_2x,.-_aes192_encrypt_2x 605 606.align 32 607_aes256_encrypt_1x: 608 aes_eround01 %f16, %f0, %f2, %f4 609 aes_eround23 %f18, %f0, %f2, %f2 610 ldd [$key + 208], %f16 611 ldd [$key + 216], %f18 612 aes_eround01 %f20, %f4, %f2, %f0 613 aes_eround23 %f22, %f4, %f2, %f2 614 ldd [$key + 224], %f20 615 ldd [$key + 232], %f22 616___ 617for ($i=1; $i<6; $i++) { 618 $code.=<<___; 619 aes_eround01 %f`16+8*$i+0`, %f0, %f2, %f4 620 aes_eround23 %f`16+8*$i+2`, %f0, %f2, %f2 621 aes_eround01 %f`16+8*$i+4`, %f4, %f2, %f0 622 aes_eround23 %f`16+8*$i+6`, %f4, %f2, %f2 623___ 624} 625$code.=<<___; 626 aes_eround01 %f16, %f0, %f2, %f4 627 aes_eround23 %f18, %f0, %f2, %f2 628 ldd [$key + 16], %f16 629 ldd [$key + 24], %f18 630 aes_eround01_l %f20, %f4, %f2, %f0 631 aes_eround23_l %f22, %f4, %f2, %f2 632 ldd [$key + 32], %f20 633 retl 634 ldd [$key + 40], %f22 635.type _aes256_encrypt_1x,#function 636.size _aes256_encrypt_1x,.-_aes256_encrypt_1x 637 638.align 32 639_aes256_encrypt_2x: 640 aes_eround01 %f16, %f0, %f2, %f8 641 aes_eround23 %f18, %f0, %f2, %f2 642 aes_eround01 %f16, %f4, %f6, %f10 643 aes_eround23 %f18, %f4, %f6, %f6 644 ldd [$key + 208], %f16 645 ldd [$key + 216], %f18 646 aes_eround01 %f20, %f8, %f2, %f0 647 aes_eround23 %f22, %f8, %f2, %f2 648 aes_eround01 %f20, %f10, %f6, %f4 649 aes_eround23 %f22, %f10, %f6, %f6 650 ldd [$key + 224], %f20 651 ldd [$key + 232], %f22 652___ 653for ($i=1; $i<6; $i++) { 654 $code.=<<___; 655 aes_eround01 %f`16+8*$i+0`, %f0, %f2, %f8 656 aes_eround23 %f`16+8*$i+2`, %f0, %f2, %f2 657 aes_eround01 %f`16+8*$i+0`, %f4, %f6, %f10 658 aes_eround23 %f`16+8*$i+2`, %f4, %f6, %f6 659 aes_eround01 %f`16+8*$i+4`, %f8, %f2, %f0 660 aes_eround23 %f`16+8*$i+6`, %f8, %f2, %f2 661 aes_eround01 %f`16+8*$i+4`, %f10, %f6, %f4 662 aes_eround23 %f`16+8*$i+6`, %f10, %f6, %f6 663___ 664} 665$code.=<<___; 666 aes_eround01 %f16, %f0, %f2, %f8 667 aes_eround23 %f18, %f0, %f2, %f2 668 aes_eround01 %f16, %f4, %f6, %f10 669 aes_eround23 %f18, %f4, %f6, %f6 670 ldd [$key + 16], %f16 671 ldd [$key + 24], %f18 672 aes_eround01_l %f20, %f8, %f2, %f0 673 aes_eround23_l %f22, %f8, %f2, %f2 674 aes_eround01_l %f20, %f10, %f6, %f4 675 aes_eround23_l %f22, %f10, %f6, %f6 676 ldd [$key + 32], %f20 677 retl 678 ldd [$key + 40], %f22 679.type _aes256_encrypt_2x,#function 680.size _aes256_encrypt_2x,.-_aes256_encrypt_2x 681 682.align 32 683_aes192_loadkey: 684 ldx [$key + 0], %g4 685 ldx [$key + 8], %g5 686___ 687for ($i=2; $i<26;$i++) { # load key schedule 688 $code.=<<___; 689 ldd [$key + `8*$i`], %f`12+2*$i` 690___ 691} 692$code.=<<___; 693 retl 694 nop 695.type _aes192_loadkey,#function 696.size _aes192_loadkey,.-_aes192_loadkey 697_aes256_loadkey=_aes192_loadkey 698_aes192_load_enckey=_aes192_loadkey 699_aes192_load_deckey=_aes192_loadkey 700_aes256_load_enckey=_aes192_loadkey 701_aes256_load_deckey=_aes192_loadkey 702___ 703 704&alg_cbc_encrypt_implement("aes",256); 705&alg_cbc_encrypt_implement("aes",192); 706if ($::evp) { 707 &alg_ctr32_implement("aes",256); 708 &alg_xts_implement("aes",256,"en"); 709 &alg_xts_implement("aes",256,"de"); 710 &alg_ctr32_implement("aes",192); 711} 712&alg_cbc_decrypt_implement("aes",192); 713&alg_cbc_decrypt_implement("aes",256); 714 715$code.=<<___; 716.align 32 717_aes256_decrypt_1x: 718 aes_dround01 %f16, %f0, %f2, %f4 719 aes_dround23 %f18, %f0, %f2, %f2 720 ldd [$key + 208], %f16 721 ldd [$key + 216], %f18 722 aes_dround01 %f20, %f4, %f2, %f0 723 aes_dround23 %f22, %f4, %f2, %f2 724 ldd [$key + 224], %f20 725 ldd [$key + 232], %f22 726___ 727for ($i=1; $i<6; $i++) { 728 $code.=<<___; 729 aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f4 730 aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2 731 aes_dround01 %f`16+8*$i+4`, %f4, %f2, %f0 732 aes_dround23 %f`16+8*$i+6`, %f4, %f2, %f2 733___ 734} 735$code.=<<___; 736 aes_dround01 %f16, %f0, %f2, %f4 737 aes_dround23 %f18, %f0, %f2, %f2 738 ldd [$key + 16], %f16 739 ldd [$key + 24], %f18 740 aes_dround01_l %f20, %f4, %f2, %f0 741 aes_dround23_l %f22, %f4, %f2, %f2 742 ldd [$key + 32], %f20 743 retl 744 ldd [$key + 40], %f22 745.type _aes256_decrypt_1x,#function 746.size _aes256_decrypt_1x,.-_aes256_decrypt_1x 747 748.align 32 749_aes256_decrypt_2x: 750 aes_dround01 %f16, %f0, %f2, %f8 751 aes_dround23 %f18, %f0, %f2, %f2 752 aes_dround01 %f16, %f4, %f6, %f10 753 aes_dround23 %f18, %f4, %f6, %f6 754 ldd [$key + 208], %f16 755 ldd [$key + 216], %f18 756 aes_dround01 %f20, %f8, %f2, %f0 757 aes_dround23 %f22, %f8, %f2, %f2 758 aes_dround01 %f20, %f10, %f6, %f4 759 aes_dround23 %f22, %f10, %f6, %f6 760 ldd [$key + 224], %f20 761 ldd [$key + 232], %f22 762___ 763for ($i=1; $i<6; $i++) { 764 $code.=<<___; 765 aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f8 766 aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2 767 aes_dround01 %f`16+8*$i+0`, %f4, %f6, %f10 768 aes_dround23 %f`16+8*$i+2`, %f4, %f6, %f6 769 aes_dround01 %f`16+8*$i+4`, %f8, %f2, %f0 770 aes_dround23 %f`16+8*$i+6`, %f8, %f2, %f2 771 aes_dround01 %f`16+8*$i+4`, %f10, %f6, %f4 772 aes_dround23 %f`16+8*$i+6`, %f10, %f6, %f6 773___ 774} 775$code.=<<___; 776 aes_dround01 %f16, %f0, %f2, %f8 777 aes_dround23 %f18, %f0, %f2, %f2 778 aes_dround01 %f16, %f4, %f6, %f10 779 aes_dround23 %f18, %f4, %f6, %f6 780 ldd [$key + 16], %f16 781 ldd [$key + 24], %f18 782 aes_dround01_l %f20, %f8, %f2, %f0 783 aes_dround23_l %f22, %f8, %f2, %f2 784 aes_dround01_l %f20, %f10, %f6, %f4 785 aes_dround23_l %f22, %f10, %f6, %f6 786 ldd [$key + 32], %f20 787 retl 788 ldd [$key + 40], %f22 789.type _aes256_decrypt_2x,#function 790.size _aes256_decrypt_2x,.-_aes256_decrypt_2x 791 792.align 32 793_aes192_decrypt_1x: 794___ 795for ($i=0; $i<5; $i++) { 796 $code.=<<___; 797 aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f4 798 aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2 799 aes_dround01 %f`16+8*$i+4`, %f4, %f2, %f0 800 aes_dround23 %f`16+8*$i+6`, %f4, %f2, %f2 801___ 802} 803$code.=<<___; 804 aes_dround01 %f56, %f0, %f2, %f4 805 aes_dround23 %f58, %f0, %f2, %f2 806 aes_dround01_l %f60, %f4, %f2, %f0 807 retl 808 aes_dround23_l %f62, %f4, %f2, %f2 809.type _aes192_decrypt_1x,#function 810.size _aes192_decrypt_1x,.-_aes192_decrypt_1x 811 812.align 32 813_aes192_decrypt_2x: 814___ 815for ($i=0; $i<5; $i++) { 816 $code.=<<___; 817 aes_dround01 %f`16+8*$i+0`, %f0, %f2, %f8 818 aes_dround23 %f`16+8*$i+2`, %f0, %f2, %f2 819 aes_dround01 %f`16+8*$i+0`, %f4, %f6, %f10 820 aes_dround23 %f`16+8*$i+2`, %f4, %f6, %f6 821 aes_dround01 %f`16+8*$i+4`, %f8, %f2, %f0 822 aes_dround23 %f`16+8*$i+6`, %f8, %f2, %f2 823 aes_dround01 %f`16+8*$i+4`, %f10, %f6, %f4 824 aes_dround23 %f`16+8*$i+6`, %f10, %f6, %f6 825___ 826} 827$code.=<<___; 828 aes_dround01 %f56, %f0, %f2, %f8 829 aes_dround23 %f58, %f0, %f2, %f2 830 aes_dround01 %f56, %f4, %f6, %f10 831 aes_dround23 %f58, %f4, %f6, %f6 832 aes_dround01_l %f60, %f8, %f2, %f0 833 aes_dround23_l %f62, %f8, %f2, %f2 834 aes_dround01_l %f60, %f10, %f6, %f4 835 retl 836 aes_dround23_l %f62, %f10, %f6, %f6 837.type _aes192_decrypt_2x,#function 838.size _aes192_decrypt_2x,.-_aes192_decrypt_2x 839___ 840}}} 841 842if (!$::evp) { 843$code.=<<___; 844.global AES_encrypt 845AES_encrypt=aes_t4_encrypt 846.global AES_decrypt 847AES_decrypt=aes_t4_decrypt 848.global AES_set_encrypt_key 849.align 32 850AES_set_encrypt_key: 851 andcc %o2, 7, %g0 ! check alignment 852 bnz,a,pn %icc, 1f 853 mov -1, %o0 854 brz,a,pn %o0, 1f 855 mov -1, %o0 856 brz,a,pn %o2, 1f 857 mov -1, %o0 858 andncc %o1, 0x1c0, %g0 859 bnz,a,pn %icc, 1f 860 mov -2, %o0 861 cmp %o1, 128 862 bl,a,pn %icc, 1f 863 mov -2, %o0 864 b aes_t4_set_encrypt_key 865 nop 8661: retl 867 nop 868.type AES_set_encrypt_key,#function 869.size AES_set_encrypt_key,.-AES_set_encrypt_key 870 871.global AES_set_decrypt_key 872.align 32 873AES_set_decrypt_key: 874 andcc %o2, 7, %g0 ! check alignment 875 bnz,a,pn %icc, 1f 876 mov -1, %o0 877 brz,a,pn %o0, 1f 878 mov -1, %o0 879 brz,a,pn %o2, 1f 880 mov -1, %o0 881 andncc %o1, 0x1c0, %g0 882 bnz,a,pn %icc, 1f 883 mov -2, %o0 884 cmp %o1, 128 885 bl,a,pn %icc, 1f 886 mov -2, %o0 887 b aes_t4_set_decrypt_key 888 nop 8891: retl 890 nop 891.type AES_set_decrypt_key,#function 892.size AES_set_decrypt_key,.-AES_set_decrypt_key 893___ 894 895my ($inp,$out,$len,$key,$ivec,$enc)=map("%o$_",(0..5)); 896 897$code.=<<___; 898.globl AES_cbc_encrypt 899.align 32 900AES_cbc_encrypt: 901 ld [$key + 240], %g1 902 nop 903 brz $enc, .Lcbc_decrypt 904 cmp %g1, 12 905 906 bl,pt %icc, aes128_t4_cbc_encrypt 907 nop 908 be,pn %icc, aes192_t4_cbc_encrypt 909 nop 910 ba aes256_t4_cbc_encrypt 911 nop 912 913.Lcbc_decrypt: 914 bl,pt %icc, aes128_t4_cbc_decrypt 915 nop 916 be,pn %icc, aes192_t4_cbc_decrypt 917 nop 918 ba aes256_t4_cbc_decrypt 919 nop 920.type AES_cbc_encrypt,#function 921.size AES_cbc_encrypt,.-AES_cbc_encrypt 922___ 923} 924$code.=<<___; 925.asciz "AES for SPARC T4, David S. Miller, Andy Polyakov" 926.align 4 927___ 928 929&emit_assembler(); 930 931close STDOUT or die "error closing STDOUT: $!"; 932